From 0c5716b886f2a251f506ffbf3799a3f9199b2592 Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Thu, 12 Jan 2023 05:29:55 +0000 Subject: [PATCH 01/47] system/musl: Fix wchar_t issue --- system/musl/APKBUILD | 4 +- system/musl/signed-wchar_t-fixes.patch | 77 ++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 system/musl/signed-wchar_t-fixes.patch diff --git a/system/musl/APKBUILD b/system/musl/APKBUILD index ab12e01961..9816d009e2 100644 --- a/system/musl/APKBUILD +++ b/system/musl/APKBUILD @@ -1,7 +1,7 @@ # Maintainer: A. Wilcox <awilfox@adelielinux.org> pkgname=musl pkgver=1.2.3 -pkgrel=0 +pkgrel=1 pkgdesc="System library (libc) implementation" url="https://www.musl-libc.org/" arch="all" @@ -27,6 +27,7 @@ source="https://musl.libc.org/releases/musl-${pkgver}.tar.gz handle-aux-at_base.patch fgetspent_r.patch realpath.patch + signed-wchar_t-fixes.patch ldconfig getent.c @@ -125,6 +126,7 @@ f7b05d8c5f804ba3ad6998b3de5fa4d9dfceac4aca63dd67298c2d5f27cdd28a91eba74f6e428c25 1f4e9aea5a546015c75f77aa0dec10d56fc14831ccc15cf71ff27fc15ac5230ffeadb382ebe1c87c1ea07a462620e16ed01cd36252d997d1a9c2af11cb5c9ff3 handle-aux-at_base.patch ded41235148930f8cf781538f7d63ecb0c65ea4e8ce792565f3649ee2523592a76b2a166785f0b145fc79f5852fd1fb1729a7a09110b3b8f85cba3912e790807 fgetspent_r.patch d5ec3f1a86f2194e0af83c2391508811b939d0f8f2fd2ac5ac7f03774f8a250ce42399110d2ae04d32b864ee292863fed683a029b64598dbbcb21d9811a825d0 realpath.patch +3770af3bc961e5d5b8c152c428cd20dc54e026b23b31d764fbc2e71ee38140d160db2267755f23800bc8586fd4b51554b1caebb2415bef82fd0f4a6dd8bf640d signed-wchar_t-fixes.patch cb71d29a87f334c75ecbc911becde7be825ab30d8f39fa6d64cb53812a7c9abaf91d9804c72540e5be3ddd3c84cfe7fd9632274309005cb8bcdf9a9b09b4b923 ldconfig 378d70e65bcc65bb4e1415354cecfa54b0c1146dfb24474b69e418cdbf7ad730472cd09f6f103e1c99ba6c324c9560bccdf287f5889bbc3ef0bdf0e08da47413 getent.c 9d42d66fb1facce2b85dad919be5be819ee290bd26ca2db00982b2f8e055a0196290a008711cbe2b18ec9eee8d2270e3b3a4692c5a1b807013baa5c2b70a2bbf iconv.c" diff --git a/system/musl/signed-wchar_t-fixes.patch b/system/musl/signed-wchar_t-fixes.patch new file mode 100644 index 0000000000..0d5309315b --- /dev/null +++ b/system/musl/signed-wchar_t-fixes.patch @@ -0,0 +1,77 @@ +From 99b84a793669c69acc705a61d339441b50bd09a8 Mon Sep 17 00:00:00 2001 +From: Gabriel Ravier <gabravier@gmail.com> +Date: Wed, 4 Jan 2023 16:07:19 +0100 +Subject: [PATCH] fix return value of wcs{,n}cmp for near-limits signed wchar_t values + +The standard states that: + + > Unless explicitly stated otherwise, the functions described in + this subclause order two wide characters the same way as two + integers of the underlying integer type designated by `wchar_t`. + > [...] + > The `wcscmp` function returns an integer greater than, equal to, + or less than zero, accordingly as the wide string pointed to by s1 + is greater than, equal to, or less than the wide string pointed to + by s2. + > The `wcsncmp` function returns an integer greater than, equal to, + or less than zero, accordingly as the possibly null-terminated + array pointed to by s1 is greater than, equal to, or less than the + possibly null-terminated array pointed to by s2 + - N3047 (latest C draft as of the time of writing) + +Yet a simple test program such as this: + + #include <wchar.h> + #include <stdio.h> + + int main() + { + wchar_t str1[2] = { WCHAR_MAX, L'\0' }; + wchar_t str2[2] = { WCHAR_MIN, L'\0' }; + + printf("%d\n", wcscmp(str1, str2)); + printf("%d\n", wcsncmp(str1, str2, 1)); + } + +Will fail to run correctly according to this specification on musl (on +targets that have signed wchar_t), as it will print -1 instead of +1 (it should print 1, since WCHAR_MAX > WCHAR_MIN). + +This appears to be due to the fact that musl uses a simple subtraction +to implement wcscmp and wcsncmp, which may result in an overflow. + +This patch fixes this by replacing the subtraction with a little bit +of code that orders the characters correctly, returning -1 if the +character from the first string is smaller than the one from the +second, 0 if they are equal and 1 if the character from the first +string is larger than the one from the second +--- + src/string/wcscmp.c | 2 +- + src/string/wcsncmp.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/src/string/wcscmp.c b/src/string/wcscmp.c +index 26eeee70..286ec3ea 100644 +--- a/src/string/wcscmp.c ++++ b/src/string/wcscmp.c +@@ -3,5 +3,5 @@ + int wcscmp(const wchar_t *l, const wchar_t *r) + { + for (; *l==*r && *l && *r; l++, r++); +- return *l - *r; ++ return *l < *r ? -1 : *l > *r; + } +diff --git a/src/string/wcsncmp.c b/src/string/wcsncmp.c +index 4ab32a92..2b3558bf 100644 +--- a/src/string/wcsncmp.c ++++ b/src/string/wcsncmp.c +@@ -3,5 +3,5 @@ + int wcsncmp(const wchar_t *l, const wchar_t *r, size_t n) + { + for (; n && *l==*r && *l && *r; n--, l++, r++); +- return n ? *l - *r : 0; ++ return n ? (*l < *r ? -1 : *l > *r) : 0; + } +-- +2.38.1 + -- GitLab From c1b4d8935778ad4c0c75e0c50b5845acac2e14d0 Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Thu, 30 Mar 2023 01:58:49 -0500 Subject: [PATCH 02/47] system/asciidoctor: Update to 2.0.18 --- system/asciidoctor/APKBUILD | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/system/asciidoctor/APKBUILD b/system/asciidoctor/APKBUILD index 3b26fbab03..ef87e93142 100644 --- a/system/asciidoctor/APKBUILD +++ b/system/asciidoctor/APKBUILD @@ -1,7 +1,7 @@ # Contributor: Seung Soo Mun <hamletmun@gmail.com> # Maintainer: A. Wilcox <awilfox@adelielinux.org> pkgname=asciidoctor -pkgver=2.0.17 +pkgver=2.0.18 pkgrel=0 pkgdesc="An implementation of AsciiDoc in Ruby" url="https://rubygems.org/gems/$pkgname" @@ -34,4 +34,4 @@ package() { done } -sha512sums="41ebc833c7b8bded5704034c7fcf7d2f26643a24de24eda57db1a0ac94976e775bf0f093f46faaa99c5007c61af325aa5b02e5321159a88daac3397800adbd03 asciidoctor-2.0.17.gem" +sha512sums="522c0a587a95c7a5c5618f88832c11a547448d12b883ee9a7d9f6e8509b44e2c5c027cf23335790cbc5cd74dc297010141ac535c0ba45622e49eac8d70fe6127 asciidoctor-2.0.18.gem" -- GitLab From 2aaf0aeac0a2402f2ae5f073ff4a86191781060f Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Thu, 30 Mar 2023 02:03:09 -0500 Subject: [PATCH 03/47] system/bc: Update to 6.5.0, update URL Also download direct from upstream instead of deprecated GH mirror. --- system/bc/APKBUILD | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/system/bc/APKBUILD b/system/bc/APKBUILD index 96940ff994..64117b1a95 100644 --- a/system/bc/APKBUILD +++ b/system/bc/APKBUILD @@ -1,16 +1,16 @@ # Contributor: A. Wilcox <awilfox@adelielinux.org> # Maintainer: A. Wilcox <awilfox@adelielinux.org> pkgname=bc -pkgver=6.2.1 +pkgver=6.5.0 pkgrel=0 pkgdesc="An arbitrary precision numeric processing language (calculator)" -url="https://git.yzena.com/gavin/bc" +url="https://git.gavinhoward.com/gavin/bc" arch="all" license="BSD-2-Clause" depends="" makedepends="" subpackages="$pkgname-doc" -source="https://github.com/gavinhoward/bc/releases/download/$pkgver/bc-$pkgver.tar.xz" +source="https://git.gavinhoward.com/gavin/bc/releases/download/$pkgver/$pkgname-$pkgver.tar.xz" build() { PREFIX="/usr" DESTDIR="$pkgdir" ./configure.sh -g -G -O3 @@ -25,4 +25,4 @@ package() { make install } -sha512sums="6c0fcd1e5860d2e344c7245cd2ae2672b19ec184cb2723b2d811559edd47bb0b0874b0b5372a9c0f44cce6a7e9069a1b6eb99019a8d19a5b0d056b23ac44f0e3 bc-6.2.1.tar.xz" +sha512sums="1cb03038d828a0b10734c29931777add8b22f194c507b8ff538ec1aa52a2a97a4ac2733d72cdb0710fdc6937807074e929f7918c56aaf5be8fbe908ea7c5a401 bc-6.5.0.tar.xz" -- GitLab From 5ea02efd8e9576f89274c90fe9e49839b18ccac1 Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Thu, 30 Mar 2023 02:05:33 -0500 Subject: [PATCH 04/47] system/byacc: Update to 20230219 --- system/byacc/APKBUILD | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/system/byacc/APKBUILD b/system/byacc/APKBUILD index 8d70a256a8..e9440d1708 100644 --- a/system/byacc/APKBUILD +++ b/system/byacc/APKBUILD @@ -1,7 +1,7 @@ # Contributor: Nathan Angelacos <nangel@alpinelinux.org> # Maintainer: A. Wilcox <awilfox@adelielinux.org> pkgname=byacc -pkgver=20220128 +pkgver=20230219 pkgrel=0 pkgdesc="The Berkeley Yacc general-purpose parser generator" url="https://invisible-island.net/byacc/byacc.html" @@ -30,4 +30,4 @@ package() { make DESTDIR="$pkgdir" install } -sha512sums="e8ae4c56f5be4cc0ef1d281c43f02c6296fdc40f630269f2a61af511f270ae059ad185b9718190b8133018f7b74b7ca6f84ced5d63a359960b52ea2a3ef562ea byacc-20220128.tgz" +sha512sums="55fb9abc82afffb68a2167eb3f6211b1c2c91dda4f3b6004c01c46b2022bbec4ed8f793cfb86094eaa66decbd40d9e6f06af5b0da9d72f30ca0676aa38eb2d5e byacc-20230219.tgz" -- GitLab From 7c97598cf01499e2c2082b3f61a9ad060b536277 Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Fri, 31 Mar 2023 00:15:08 -0500 Subject: [PATCH 05/47] system/curl: Update to 8.0.1 [CVE] --- system/curl/APKBUILD | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/system/curl/APKBUILD b/system/curl/APKBUILD index e153dca2b6..3a9b02f888 100644 --- a/system/curl/APKBUILD +++ b/system/curl/APKBUILD @@ -3,8 +3,8 @@ # Contributor: Åukasz Jendrysik <scadu@yandex.com> # Maintainer: Zach van Rijn <me@zv.io> pkgname=curl -pkgver=7.87.0 -pkgrel=1 +pkgver=8.0.1 +pkgrel=0 pkgdesc="A URL retrival utility and library" url="https://curl.haxx.se" arch="all" @@ -17,6 +17,15 @@ source="https://curl.haxx.se/download/$pkgname-$pkgver.tar.xz" subpackages="$pkgname-dbg $pkgname-doc $pkgname-dev libcurl" # secfixes: +# 8.0.1-r0: +# - CVE-2023-27538 +# - CVE-2023-27536 +# - CVE-2023-27535 +# - CVE-2023-27534 +# - CVE-2023-27533 +# - CVE-2023-23916 +# - CVE-2023-23915 +# - CVE-2023-23914 # 7.79.1-r0: # - CVE-2021-22947 # - CVE-2021-22946 @@ -99,7 +108,6 @@ build() { --enable-ipv6 \ --enable-unix-sockets \ --with-libssh2 \ - --without-libidn \ --without-libidn2 \ --disable-ldap \ --with-pic \ @@ -124,4 +132,4 @@ libcurl() { mv "$pkgdir"/usr/lib "$subpkgdir"/usr } -sha512sums="aa125991592667280dce3788aabe81487cf8c55b0afc59d675cc30b76055bb7114f5380b4a0e3b6461a8f81bf9812fa26d493a85f7e01d84263d484a0d699ee7 curl-7.87.0.tar.xz" +sha512sums="3bb777982659ed697ae90f113ff7b65d6ce8ba9fe6a8984cfd6769d2f051a72ba953c911abe234c204ec2cc5a35d68b4d033037fad7fba31bb92a52543f8d13d curl-8.0.1.tar.xz" -- GitLab From 3a2b5420c965d86ea5861036adde8bdcdb8fb471 Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Fri, 31 Mar 2023 00:30:13 -0500 Subject: [PATCH 06/47] system/coreutils: Update to 9.2 Fixes: #787 --- system/coreutils/APKBUILD | 12 ++++---- system/coreutils/csplit-vm-test.patch | 30 +++++++++++++++++++ .../coreutils/test-df-symlink-bindmount.patch | 6 ++-- system/coreutils/test-tee-avoid-eintr.patch | 15 ++++++++++ .../coreutils/tests-cp-proc-short-read.patch | 15 ---------- 5 files changed, 55 insertions(+), 23 deletions(-) create mode 100644 system/coreutils/csplit-vm-test.patch create mode 100644 system/coreutils/test-tee-avoid-eintr.patch delete mode 100644 system/coreutils/tests-cp-proc-short-read.patch diff --git a/system/coreutils/APKBUILD b/system/coreutils/APKBUILD index 40d6c9fafb..6a4c7957e4 100644 --- a/system/coreutils/APKBUILD +++ b/system/coreutils/APKBUILD @@ -2,7 +2,7 @@ # Contributor: Michael Mason <ms13sp@gmail.com> # Maintainer: A. Wilcox <awilfox@adelielinux.org> pkgname=coreutils -pkgver=9.1 +pkgver=9.2 pkgrel=0 pkgdesc="Basic file, shell, and text manipulation utilities" url="https://www.gnu.org/software/coreutils/" @@ -15,13 +15,14 @@ subpackages="$pkgname-doc" [ "${CBUILD}" = "${CHOST}" ] && subpackages="$subpackages $pkgname-lang" install="$pkgname.post-deinstall" source="https://ftp.gnu.org/gnu/coreutils/$pkgname-$pkgver.tar.xz + csplit-vm-test.patch disable-csplit-io-err-test.patch disable-mbrtowc-test.patch gnulib-tests-dont-require-gpg-passphrase.patch gnulib-test-fixes.patch localename-test-fix.patch test-df-symlink-bindmount.patch - tests-cp-proc-short-read.patch + test-tee-avoid-eintr.patch " [ "${CBUILD}" != "${CHOST}" ] && source="$source 051_all_coreutils-mangen.patch @@ -69,14 +70,15 @@ package() { done } -sha512sums="a6ee2c549140b189e8c1b35e119d4289ec27244ec0ed9da0ac55202f365a7e33778b1dc7c4e64d1669599ff81a8297fe4f5adbcc8a3a2f75c919a43cd4b9bdfa coreutils-9.1.tar.xz +sha512sums="7e3108fefba4ef995cc73c64ac5f4e09827a44649a97ddd624eb61d67ce82da5ed6dc8c0f79d3e269f5cdb7d43877a61ef5b93194dd905bec432a7e31f9f479c coreutils-9.2.tar.xz +5f5eab0ae69b26d0906b890a7681b9b5bbfefdd87f3caf84e307d6764ec4a23ec764c18df3404d5ceda9b90b4b5fef150aac7a6d709afa0be6f90a25793fd62c csplit-vm-test.patch bd8b5cecba59eba30458c8afa895877d39a987b989bc06b70cd6836e0ef684677aaadcb4949b58a713779fe1df1e15e3903e9be14e306671b86b69018b75de8b disable-csplit-io-err-test.patch 595be7f580f8906467624959c7eddbd6d7007571d92668b6a1ea5543b73764035b44b02ab5f352c67ec6562a368f220af445edd0a0965fb9826bccfd25ddbdba disable-mbrtowc-test.patch fd97fccd661befc558b3afb3e32c82dd2cef511a05e6129d49540599297c1b59ab1f109e63a12f585a2348c26c28fb98330c348829d1fe61cf8149d0dd1c989c gnulib-tests-dont-require-gpg-passphrase.patch eaba7ad1c5b43d25dc96baaf6f01be5976f9f64c26ea55e1c78d6a3f12825f2f0e998aae7f2ad6d9b2637a3d11586ffe21b87fbbd3c1bb6c9898c5963cb2a32c gnulib-test-fixes.patch b1509e5678a05f24d6e764c047546e5e34a7fbd5edb59c7debedb144a0096d8ac247c7e2722a5f68c90751e5280bec743c9a6ed3e1433c1916294d68d7bca109 localename-test-fix.patch -43bb4cb8a330dc785ff8f09685e4fb2879df49b6944e2f64f9fa34a36740f392b115b3af57d481703690b9ee6c6f48ffb385b35cd128b1b40955f69dbd68bb3d test-df-symlink-bindmount.patch -7dfc5d37cc22c06c88a027102482b33f46a962b6f71d9adb80225d8f68d0be0760894e4480c3f80018a4f725bb7d0779987a6d28bceb7e141e01dbad78f84c2d tests-cp-proc-short-read.patch" +15a91b343b6c4d6ef31b322cd2787893eacccb81a7e0202aeac17c3aa18130e6feb5ddb32f39187b4cf2a5d6a9a536a0a6f992e4da90d954a72f158d0df6e4ba test-df-symlink-bindmount.patch +43bf6a1934a22478c93f4680180f1d7f157c9a5f7320bd8efac3f6e850db08bb57d32f4658eca34ee711e61538ef1932bd6abf730b4d2371e1a128c6d148939f test-tee-avoid-eintr.patch" [ "${CBUILD}" != "${CHOST}" ] && sha512sums=" bae804f057252c25452ac178c545dc2c4b4775cbfbdcfd4775edd1a4ed6507882bfac39e2b11ad01b74230ca48d761bf62f11bde5bcbc35a6e5a61cbe4a46e36 051_all_coreutils-mangen.patch " diff --git a/system/coreutils/csplit-vm-test.patch b/system/coreutils/csplit-vm-test.patch new file mode 100644 index 0000000000..da2e1ad153 --- /dev/null +++ b/system/coreutils/csplit-vm-test.patch @@ -0,0 +1,30 @@ +From aa99b99f0cc03fac8a7db00b6aec8887756a499c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?P=C3=A1draig=20Brady?= <P@draigBrady.com> +Date: Thu, 30 Mar 2023 14:28:03 +0100 +Subject: [PATCH] tests: adjust csplit VM limit + +* tests/misc/csplit-heap.sh: More memory is required to avoid +a false failure on some systems. Noticed with musl libc +with bash as the shell. This is confirmed to still easily +trigger with the original memory leak being tested. +Addresses https://bugs.gnu.org/62542 +--- + tests/misc/csplit-heap.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tests/misc/csplit-heap.sh b/tests/misc/csplit-heap.sh +index 2ba3c4500..36b286b93 100755 +--- a/tests/misc/csplit-heap.sh ++++ b/tests/misc/csplit-heap.sh +@@ -25,7 +25,7 @@ vm=$(get_min_ulimit_v_ csplit -z f %n%1) \ + || skip_ "this shell lacks ulimit support" + + ( +- ulimit -v $(($vm + 1000)) \ ++ ulimit -v $(($vm + 4000)) \ + && { yes | head -n2500000; echo n; } | csplit -z - %n%1 + ) || fail=1 + +-- +2.26.2 + diff --git a/system/coreutils/test-df-symlink-bindmount.patch b/system/coreutils/test-df-symlink-bindmount.patch index de5cdfb022..cd44c90668 100644 --- a/system/coreutils/test-df-symlink-bindmount.patch +++ b/system/coreutils/test-df-symlink-bindmount.patch @@ -6,6 +6,6 @@ # source and target. This excludes for example BTRFS sub-volumes. -if test "$(df --output=source | grep -F "$file_system" | wc -l)" = 1; then +if test "$(df --all --output=source | grep -F "$file_system" | wc -l)" = 1; then - df --out=source,target '.' > out || fail=1 - compare exp out || fail=1 - fi + # Restrict to systems with a single file system root (and have findmnt(1)) + if test "$(findmnt -nro FSROOT | uniq | wc -l)" = 1; then + df --out=source,target '.' > out || fail=1 diff --git a/system/coreutils/test-tee-avoid-eintr.patch b/system/coreutils/test-tee-avoid-eintr.patch new file mode 100644 index 0000000000..c513486f73 --- /dev/null +++ b/system/coreutils/test-tee-avoid-eintr.patch @@ -0,0 +1,15 @@ +Upstream report: https://debbugs.gnu.org/cgi/bugreport.cgi?bug=62542 + +diff --git a/tests/misc/tee.sh b/tests/misc/tee.sh +index 0b97a9ea3..444cb688a 100755 +--- a/tests/misc/tee.sh ++++ b/tests/misc/tee.sh +@@ -99,7 +99,7 @@ dd count=20 bs=100K if=/dev/zero status=none | + dd count=0 oflag=nonblock status=none + tee || { cleanup_; touch tee.fail; } + } >fifo +-test -f tee.fail && fail=1 ++test -f tee.fail && fail=1 || cleanup_ + + # Ensure tee honors --output-error modes + read_fifo() { timeout 10 dd count=1 if=fifo of=/dev/null status=none & } diff --git a/system/coreutils/tests-cp-proc-short-read.patch b/system/coreutils/tests-cp-proc-short-read.patch deleted file mode 100644 index 33f0417d8d..0000000000 --- a/system/coreutils/tests-cp-proc-short-read.patch +++ /dev/null @@ -1,15 +0,0 @@ -diff --git a/tests/cp/proc-short-read.sh b/tests/cp/proc-short-read.sh -index bedb08e06..89dba8d3d 100755 ---- a/tests/cp/proc-short-read.sh -+++ b/tests/cp/proc-short-read.sh -@@ -28,8 +28,8 @@ cp $proc_large 1 || fail=1 - cat $proc_large > 2 || fail=1 - - # adjust varying parts --sed '/MHz/d; /bogomips/d;' 1 > proc.cp || framework_failure_ --sed '/MHz/d; /bogomips/d;' 2 > proc.cat || framework_failure_ -+sed '/MHz/d; /[Bb][Oo][Gg][Oo][Mm][Ii][Pp][Ss]/d;' 1 > proc.cp || framework_failure_ -+sed '/MHz/d; /[Bb][Oo][Gg][Oo][Mm][Ii][Pp][Ss]/d;' 2 > proc.cat || framework_failure_ - - compare proc.cp proc.cat || fail=1 - -- GitLab From f441488dd1040792617e6231bb38238c0f9057fe Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Fri, 31 Mar 2023 00:50:18 -0500 Subject: [PATCH 07/47] system/diffutils: Update to 3.9 --- system/diffutils/APKBUILD | 10 ++++------ .../gnulib-tests-dont-require-gpg-passphrase.patch | 2 +- system/diffutils/musl-ppc32.patch | 12 ------------ 3 files changed, 5 insertions(+), 19 deletions(-) delete mode 100644 system/diffutils/musl-ppc32.patch diff --git a/system/diffutils/APKBUILD b/system/diffutils/APKBUILD index 0264d56edd..f7fd123c44 100644 --- a/system/diffutils/APKBUILD +++ b/system/diffutils/APKBUILD @@ -1,7 +1,7 @@ # Maintainer: Zach van Rijn <me@zv.io> pkgname=diffutils -pkgver=3.8 -pkgrel=1 +pkgver=3.9 +pkgrel=0 pkgdesc="Utility programs used for creating patch files" url="https://www.gnu.org/software/diffutils/" arch="all" @@ -12,7 +12,6 @@ subpackages="$pkgname-doc $pkgname-lang" source="https://ftp.gnu.org/pub/gnu/$pkgname/$pkgname-$pkgver.tar.xz disable-mbrtowc-test.patch gnulib-tests-dont-require-gpg-passphrase.patch - musl-ppc32.patch " build() { @@ -36,7 +35,6 @@ package() { rmdir -p "$pkgdir"/usr/lib 2>/dev/null || true } -sha512sums="279441270987e70d5ecfaf84b6285a4866929c43ec877e50f154a788858d548a8a316f2fc26ad62f7348c8d289cb29a09d06dfadce1806e3d8b4ea88c8b1aa7c diffutils-3.8.tar.xz +sha512sums="d43280cb1cb2615a8867d971467eb9a3fa037fe9a411028068036f733dab42b10d42767093cea4de71e62b2659a3ec73bd7d1a8f251befd49587e32802682d0f diffutils-3.9.tar.xz 079d04f48fa5f5fd57eca8ba9ddefcdbb9e42a26dba7ca9b0a6f3f391112aa5ce34f0a32a931432f0ef0cc3345cc97de8310660cd05df9b30c38609440a729ee disable-mbrtowc-test.patch -d45ab939e70b8b6e80d8950a8436be81e34e1067741196229edc7fbe33db92aa14532cf7490379741e779ae2cfc6010bec22dda2b2a046a3bc9eb68b94a0ff11 gnulib-tests-dont-require-gpg-passphrase.patch -249b62b0d3b166507103860122460d8c042c12bbf4c7fe883e2d0e2b90bd9f2ba387f444908aa874c9a06db72aea3e7dccefa0033c663088a0be015aaa077a9c musl-ppc32.patch" +bf78ccb22f9bcc7bb69414e19075233369c4373210a3f686b459ba37a0167ed130bd03b2d18440e04b773e5131a19006482a0ed8a1e2344ed4e869778c843f95 gnulib-tests-dont-require-gpg-passphrase.patch" diff --git a/system/diffutils/gnulib-tests-dont-require-gpg-passphrase.patch b/system/diffutils/gnulib-tests-dont-require-gpg-passphrase.patch index 1eb15c5e08..6cde110726 100644 --- a/system/diffutils/gnulib-tests-dont-require-gpg-passphrase.patch +++ b/system/diffutils/gnulib-tests-dont-require-gpg-passphrase.patch @@ -1,6 +1,6 @@ --- diffutils-3.6/gnulib-tests/test-vc-list-files-git.sh.old 2018-01-03 18:36:29.000000000 -0600 +++ diffutils-3.6/gnulib-tests/test-vc-list-files-git.sh 2018-06-18 21:57:08.305562148 -0500 -@@ -32,6 +32,7 @@ +@@ -39,6 +39,7 @@ touch d/a b c && git config user.email "you@example.com" && git config user.name "Your Name" && diff --git a/system/diffutils/musl-ppc32.patch b/system/diffutils/musl-ppc32.patch deleted file mode 100644 index c1cd3dacee..0000000000 --- a/system/diffutils/musl-ppc32.patch +++ /dev/null @@ -1,12 +0,0 @@ -diff -ur a/lib/sigsegv.c b/lib/sigsegv.c ---- a/lib/sigsegv.c 2022-01-08 21:35:58.620000000 +0000 -+++ b/lib/sigsegv.c 2022-01-08 21:36:11.380000000 +0000 -@@ -222,7 +222,7 @@ - # if 0 - # define SIGSEGV_FAULT_STACKPOINTER ((ucontext_t *) ucp)->uc_mcontext.regs->gpr[1] - # else --# define SIGSEGV_FAULT_STACKPOINTER ((ucontext_t *) ucp)->uc_mcontext.uc_regs->gregs[1] -+# define SIGSEGV_FAULT_STACKPOINTER ((ucontext_t *) ucp)->uc_mcontext.gregs[1] - # endif - # endif - -- GitLab From 1e22a0733e5969aed017e75ee44c7a4ba93a980d Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Fri, 31 Mar 2023 00:50:40 -0500 Subject: [PATCH 08/47] system/dash: Update to 0.5.12 --- system/dash/APKBUILD | 11 +++-- system/dash/posix-dashes.patch | 87 +++++++++++++++++++++++++++++++++ system/dash/ulimit-dash-r.patch | 33 +++++++++++++ 3 files changed, 128 insertions(+), 3 deletions(-) create mode 100644 system/dash/posix-dashes.patch create mode 100644 system/dash/ulimit-dash-r.patch diff --git a/system/dash/APKBUILD b/system/dash/APKBUILD index 4733a97634..06785c7f2a 100644 --- a/system/dash/APKBUILD +++ b/system/dash/APKBUILD @@ -1,7 +1,7 @@ # Contributor: Natanael Copa <ncopa@alpinelinux.org> # Maintainer: SÃle Ekaterin Liszka <sheila@vulpine.house> pkgname=dash -pkgver=0.5.11.5 +pkgver=0.5.12 pkgrel=0 pkgdesc="Small and fast POSIX-compliant shell" url="http://gondor.apana.org.au/~herbert/dash/" @@ -10,7 +10,10 @@ license="GPL-2.0+" depends="" makedepends="" subpackages="$pkgname-binsh::noarch $pkgname-doc" -source="http://gondor.apana.org.au/~herbert/$pkgname/files/$pkgname-$pkgver.tar.gz" +source="http://gondor.apana.org.au/~herbert/$pkgname/files/$pkgname-$pkgver.tar.gz + ulimit-dash-r.patch + posix-dashes.patch + " build() { ./configure \ @@ -41,4 +44,6 @@ binsh() { ln -s /bin/dash "$subpkgdir"/bin/sh } -sha512sums="5387e213820eeb44d812bb4697543023fd4662b51a9ffd52a702810fed8b28d23fbe35a7f371e6686107de9f81902eff109458964b4622f4c5412d60190a66bf dash-0.5.11.5.tar.gz" +sha512sums="13bd262be0089260cbd13530a9cf34690c0abeb2f1920eb5e61be7951b716f9f335b86279d425dbfae56cbd49231a8fdffdff70601a5177da3d543be6fc5eb17 dash-0.5.12.tar.gz +84e5bf95c5824d1929d1c10935d0197715277aa29e2481dee03302731c06e97afb953ef6e4f6099a7abfd88fc0a52e3cf38e3d30d497dc3040ed4dc5d9802506 ulimit-dash-r.patch +b0dd2742f58fb17725624aaad25bcef10bf54c1c4ec8237e1d5e45552aceecbc4bcf491f1f7b7b7a2dea4168939b07f7671d706e43d6e148d171cf48b389aa0c posix-dashes.patch" diff --git a/system/dash/posix-dashes.patch b/system/dash/posix-dashes.patch new file mode 100644 index 0000000000..b852ec20c7 --- /dev/null +++ b/system/dash/posix-dashes.patch @@ -0,0 +1,87 @@ +From 54485578e01017534dae30731f7682abadb38a09 Mon Sep 17 00:00:00 2001 +From: наб <nabijaczleweli@nabijaczleweli.xyz> +Date: Wed, 4 Jan 2023 12:33:45 +0100 +Subject: builtin: Ignore first -- in getopts per POSIX +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Issue 7, XCU, getopts, OPTIONS reads "None.", +and getopts isn't a special built-in listed in sexion 2.14 ‒ +this means that XCU, 1. Introduction, 1.4 Utility Description Defaults, +OPTIONS, Default Behavior applies: + Default Behavior: When this section is listed as "None.", it means + that the implementation need not support any options. Standard + utilities that do not accept options, but that do accept operands, + shall recognize "--" as a first argument to be discarded. + +Test with: getopts -- d: a +Correct output is no output, exit 1 +Wrong output errors out with d: being an invalid argument name + +Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> +--- + src/options.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/src/options.c b/src/options.c +index 3158498..2d4bd3b 100644 +--- a/src/options.c ++++ b/src/options.c +@@ -409,6 +409,9 @@ getoptscmd(int argc, char **argv) + { + char **optbase; + ++ nextopt(nullstr); ++ argc -= argptr - argv - 1; ++ argv = argptr - 1; + if (argc < 3) + sh_error("Usage: getopts optstring var [arg...]"); + else if (argc == 3) { +-- +cgit + +From ba57b84b305dd16f9d3e0d798835a7e9e15454ae Mon Sep 17 00:00:00 2001 +From: наб <nabijaczleweli@nabijaczleweli.xyz> +Date: Wed, 4 Jan 2023 12:35:13 +0100 +Subject: builtin: Ignore first -- in type for consistency +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This appears to be the only remaining built-in that doesn't use +nextopt() to parse its arguments (and isn't forbidden from doing so) ‒ +users expect to be able to do this, and it's nice to be consistent here. + +Test with: type -- ls -- +Correct output lists ls=/bin/ls, then --=ENOENT +Wrong output lists --=ENOENT, ls=/bin/ls, --=ENOENT + +Fixes: https://bugs.debian.org/870317 +Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> +--- + src/exec.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/src/exec.c b/src/exec.c +index d7a1f53..83cba94 100644 +--- a/src/exec.c ++++ b/src/exec.c +@@ -766,11 +766,11 @@ unsetfunc(const char *name) + int + typecmd(int argc, char **argv) + { +- int i; + int err = 0; + +- for (i = 1; i < argc; i++) { +- err |= describe_command(out1, argv[i], NULL, 1); ++ nextopt(nullstr); ++ while (*argptr) { ++ err |= describe_command(out1, *argptr++, NULL, 1); + } + return err; + } +-- +cgit + diff --git a/system/dash/ulimit-dash-r.patch b/system/dash/ulimit-dash-r.patch new file mode 100644 index 0000000000..48111f16d6 --- /dev/null +++ b/system/dash/ulimit-dash-r.patch @@ -0,0 +1,33 @@ +From 4bdefd16c6ea4b5b7c2b4dc2fccf5226401e13b7 Mon Sep 17 00:00:00 2001 +From: Vincent Lefevre <vincent@vinc17.net> +Date: Fri, 16 Dec 2022 18:20:19 +0100 +Subject: builtin: Actually accept ulimit -r + +The original commit that added it supposes this works, but it only adds +it to the ulimit -a listing and the manual, but doesn't allow it as an +option. + +Fixes: 46abc8c6d8a5 ("[BUILTIN] Add support for ulimit -r") +Link: https://bugs.debian.org/975326 +Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> +--- + src/miscbltin.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/src/miscbltin.c b/src/miscbltin.c +index 5ccbbcb..e553f9e 100644 +--- a/src/miscbltin.c ++++ b/src/miscbltin.c +@@ -440,6 +440,9 @@ ulimitcmd(int argc, char **argv) + #endif + #ifdef RLIMIT_LOCKS + "w" ++#endif ++#ifdef RLIMIT_RTPRIO ++ "r" + #endif + )) != '\0') + switch (optc) { +-- +cgit + -- GitLab From dac81897bae9d5295b1e3b247232ec2455da0dcb Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Fri, 31 Mar 2023 01:02:56 -0500 Subject: [PATCH 09/47] system/e2fsprogs: Update to 1.47.0 [CVE] --- system/e2fsprogs/APKBUILD | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/system/e2fsprogs/APKBUILD b/system/e2fsprogs/APKBUILD index 3e665afda6..1313cde8b8 100644 --- a/system/e2fsprogs/APKBUILD +++ b/system/e2fsprogs/APKBUILD @@ -1,7 +1,7 @@ # Contributor: Valery Kartel <valery.kartel@gmail.com> # Maintainer: A. Wilcox <awilfox@adelielinux.org> pkgname=e2fsprogs -pkgver=1.46.5 +pkgver=1.47.0 pkgrel=0 pkgdesc="Ext2/3/4 filesystem utilities" url="http://e2fsprogs.sourceforge.net" @@ -17,6 +17,8 @@ source="https://www.kernel.org/pub/linux/kernel/people/tytso/$pkgname/v$pkgver/$ " # secfixes: +# 1.47.0-r0: +# - CVE-2022-1304 # 1.45.3-r1: # - CVE-2019-5094 @@ -59,6 +61,6 @@ libcom_err() { mv "$pkgdir"/lib/libcom_err* "$subpkgdir"/lib/ } -sha512sums="53282e1c524f62a95012b1aceef296d494f5238c82c9b08b29fbe6a803dbf7ccfdcd9124eb2f11fe2ff9de26387c78751a92c3217ca414f6db6242098a72d3fa e2fsprogs-1.46.5.tar.xz +sha512sums="0e6d64c565b455becb84166b6a5c7090724bac5cfe69098657a31bf0481b4e2cace3de1363121b7d84820fbae85b7c83ac5f2a2b02bb36280f0e3ae83a934cec e2fsprogs-1.47.0.tar.xz 34ca45c64a132bb4b507cd4ffb763c6d1b7979eccfed20f63417e514871b47639d32f2a3ecff090713c21a0f02ac503d5093960c80401d64081c592d01af279d header-fix.patch a3cf5ce222fce3d2655d4cac3cbead1f5eb0f7da5d10f2d9ed3a7f50b5970b74e559af47d50802270c853451e850177772fab3d7c495aa12fbd0aa85d4c34cf2 time64.patch" -- GitLab From 51635c486701bf7f0e00d0d90866cbc09dd65436 Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Fri, 31 Mar 2023 01:04:46 -0500 Subject: [PATCH 10/47] system/ed: Update to 1.19 --- system/ed/APKBUILD | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/system/ed/APKBUILD b/system/ed/APKBUILD index b22e1a6a80..6fb692b129 100644 --- a/system/ed/APKBUILD +++ b/system/ed/APKBUILD @@ -1,6 +1,6 @@ # Maintainer: Zach van Rijn <me@zv.io> pkgname=ed -pkgver=1.18 +pkgver=1.19 pkgrel=0 pkgdesc="GNU version of standard text editor" url="https://www.gnu.org/software/ed/" @@ -30,4 +30,4 @@ package() { make DESTDIR="$pkgdir" install } -sha512sums="e5dac94697d63fb90cc17e3e653fa56309e9652cc25b2e98a0e21f1ed41e38c48bc33b5fc746275a59e702d1644d3af88f3d82598b482f7309f4e68aab783286 ed-1.18.tar.lz" +sha512sums="594d5cf895931783110ee1956078f6a77aee022fb67730cbc6f1d30521c120b97820a5eb349278575f85c4c1e17fea0e16a3bc22592edae53c6fd27941ee3d7e ed-1.19.tar.lz" -- GitLab From 2637cad6efa027d88c6ca5588affc55c07071fad Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Fri, 31 Mar 2023 01:40:01 -0500 Subject: [PATCH 11/47] system/git: Update to 2.40.0 --- system/git/APKBUILD | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/system/git/APKBUILD b/system/git/APKBUILD index c6f54fccd4..cd84b9352c 100644 --- a/system/git/APKBUILD +++ b/system/git/APKBUILD @@ -1,7 +1,7 @@ # Contributor: Åukasz Jendrysik <scadu@yandex.com> # Maintainer: A. Wilcox <awilfox@adelielinux.org> pkgname=git -pkgver=2.39.2 +pkgver=2.40.0 pkgrel=0 pkgdesc="Distributed version control system" url="https://www.git-scm.com/" @@ -167,7 +167,7 @@ subtree() { make install prefix=/usr DESTDIR="$subpkgdir" } -sha512sums="fdca70bee19401c5c7a6d2f3d70bd80b6ba99f6a9f97947de31d4366ee3a78a18d5298abb25727ec8ef67131bca673e48dff2a5a050b6e032884ab04066b20cb git-2.39.2.tar.xz +sha512sums="a2720f8f9a0258c0bb5e23badcfd68a147682e45a5d039a42c47128296c508109d5039029db89311a35db97a9008585e84ed11b400846502c9be913d67f0fd90 git-2.40.0.tar.xz 4bcc8367478601c856e0977d46fc4842f62daf300093a576704ad27ccd9fae975f95d3fbfcb00e9fa7254b1db64cd074f49a94fb5cf0abd8d72d7edc9ab8798c dont-test-other-encodings.patch 89528cdd14c51fd568aa61cf6c5eae08ea0844e59f9af9292da5fc6c268261f4166017d002d494400945e248df6b844e2f9f9cd2d9345d516983f5a110e4c42a git-daemon.initd fbf1f425206a76e2a8f82342537ed939ff7e623d644c086ca2ced5f69b36734695f9f80ebda1728f75a94d6cd2fcb71bf845b64239368caab418e4d368c141ec git-daemon.confd -- GitLab From 5d9ffc28fc6a0d99338cac0b020e3eac486bccfc Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Fri, 31 Mar 2023 01:43:34 -0500 Subject: [PATCH 12/47] system/grep: Update to 3.10 --- system/grep/APKBUILD | 12 +++++------- .../gnulib-tests-dont-require-gpg-passphrase.patch | 2 +- system/grep/musl-ppc32.patch | 12 ------------ 3 files changed, 6 insertions(+), 20 deletions(-) delete mode 100644 system/grep/musl-ppc32.patch diff --git a/system/grep/APKBUILD b/system/grep/APKBUILD index dd6ef54a6f..67b6356b23 100644 --- a/system/grep/APKBUILD +++ b/system/grep/APKBUILD @@ -2,8 +2,8 @@ # Contributor: Natanael Copa <ncopa@alpinelinux.org> # Maintainer: A. Wilcox <awilfox@adelielinux.org> pkgname=grep -pkgver=3.7 -pkgrel=1 +pkgver=3.10 +pkgrel=0 pkgdesc="Pattern matching utilities" url="https://www.gnu.org/software/grep/grep.html" arch="all" @@ -15,7 +15,6 @@ subpackages="$pkgname-doc $pkgname-lang" source="https://mirrors.kernel.org/gnu/$pkgname/$pkgname-$pkgver.tar.xz gnulib-tests-dont-require-gpg-passphrase.patch fix-tests.patch - musl-ppc32.patch " build() { @@ -41,7 +40,6 @@ package() { rmdir -p "$pkgdir"/usr/lib 2>/dev/null || true } -sha512sums="e9e45dcd40af8367f819f2b93c5e1b4e98a251a9aa251841fa67a875380fae52cfa27c68c6dbdd6a4dde1b1017ee0f6b9833ef6dd6e419d32d71b6df5e972b82 grep-3.7.tar.xz -fd97fccd661befc558b3afb3e32c82dd2cef511a05e6129d49540599297c1b59ab1f109e63a12f585a2348c26c28fb98330c348829d1fe61cf8149d0dd1c989c gnulib-tests-dont-require-gpg-passphrase.patch -9ba6b01c0c74933299afb469dadd2ea0c7e24befa34c691671a576063e32a1f0c735541e5e2bb0073d8afd814790909f7f895827aa8a2fbacdfcae380a7bcb11 fix-tests.patch -203765b90acf7fffaf8da9906e8d772ff81e074bf6585e35c713df107e9537c0a915013a327b3f770cc06c3a85f5ef55d6be32bbfdf6ce91a1aa1d4ac47e8ef3 musl-ppc32.patch" +sha512sums="865e8f3fd7afc68f1a52f5e1e1ee05fb9c6d6182201efb0dbdf6075347b0b1d2bf0784537a8f8dd4fb050d523f7a1d2fb5b9c3e3245087d0e6cc12d6e9d3961b grep-3.10.tar.xz +7e4bc1da5de16a036e00fef6d9387b701bbe447d21d77cc3fc28a73e0364c972dec7cdcd70a176ef339b221fad92e7feccbb1e20f3f7b114a3585b8551770de5 gnulib-tests-dont-require-gpg-passphrase.patch +9ba6b01c0c74933299afb469dadd2ea0c7e24befa34c691671a576063e32a1f0c735541e5e2bb0073d8afd814790909f7f895827aa8a2fbacdfcae380a7bcb11 fix-tests.patch" diff --git a/system/grep/gnulib-tests-dont-require-gpg-passphrase.patch b/system/grep/gnulib-tests-dont-require-gpg-passphrase.patch index b649931873..a1ec3b54f1 100644 --- a/system/grep/gnulib-tests-dont-require-gpg-passphrase.patch +++ b/system/grep/gnulib-tests-dont-require-gpg-passphrase.patch @@ -1,6 +1,6 @@ --- grep-3.1/gnulib-tests/test-vc-list-files-git.sh.old 2018-01-03 18:36:29.000000000 -0600 +++ grep-3.1/gnulib-tests/test-vc-list-files-git.sh 2018-06-18 21:57:08.305562148 -0500 -@@ -32,6 +32,7 @@ +@@ -39,6 +39,7 @@ touch d/a b c && git config user.email "you@example.com" && git config user.name "Your Name" && diff --git a/system/grep/musl-ppc32.patch b/system/grep/musl-ppc32.patch deleted file mode 100644 index 7f41494084..0000000000 --- a/system/grep/musl-ppc32.patch +++ /dev/null @@ -1,12 +0,0 @@ -diff -ur a/lib/sigsegv.c b/lib/sigsegv.c ---- a/lib/sigsegv.c 2022-05-07 13:35:30.000000000 +0000 -+++ b/lib/sigsegv.c 2022-05-07 13:38:33.030000000 +0000 -@@ -222,7 +222,7 @@ - # if 0 - # define SIGSEGV_FAULT_STACKPOINTER ((ucontext_t *) ucp)->uc_mcontext.regs->gpr[1] - # else --# define SIGSEGV_FAULT_STACKPOINTER ((ucontext_t *) ucp)->uc_mcontext.uc_regs->gregs[1] -+# define SIGSEGV_FAULT_STACKPOINTER ((ucontext_t *) ucp)->uc_mcontext.gregs[1] - # endif - # endif - -- GitLab From 0eaa7a4038aee79b01fea5f9847daca1217c05c3 Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Fri, 31 Mar 2023 01:46:56 -0500 Subject: [PATCH 13/47] system/help2man: Update to 1.49.3 --- system/help2man/APKBUILD | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/system/help2man/APKBUILD b/system/help2man/APKBUILD index 5570fe0a43..7c81cd3f12 100644 --- a/system/help2man/APKBUILD +++ b/system/help2man/APKBUILD @@ -1,6 +1,6 @@ # Maintainer: A. Wilcox <awilfox@adelielinux.org> pkgname=help2man -pkgver=1.49.2 +pkgver=1.49.3 pkgrel=0 pkgdesc="Create simple man pages from --help output" url="https://www.gnu.org/software/help2man" @@ -28,4 +28,4 @@ package() { make DESTDIR="$pkgdir" install } -sha512sums="cb8f9f923263d7160a27a7924ae559aba93d7258167888eb9e0e3e97a2014297b8d739b2bb7869acbf586354d099bd91d85f8208b901bce5ba0c5ad4b6abd6d5 help2man-1.49.2.tar.xz" +sha512sums="5f19fad1e38b0572b63ed8bfd146b717fc22dff3a26641e8b5c8431df13da9574679d9a3407da62790db8a29286694ea1bfd751cba168f475302ad31cce845cf help2man-1.49.3.tar.xz" -- GitLab From 1575ead08d70bb8978ee88fd38e7d977544fbd48 Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Fri, 31 Mar 2023 01:49:20 -0500 Subject: [PATCH 14/47] system/kmod: Update to 30 --- system/kmod/APKBUILD | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/system/kmod/APKBUILD b/system/kmod/APKBUILD index e384286df4..171f279ce4 100644 --- a/system/kmod/APKBUILD +++ b/system/kmod/APKBUILD @@ -1,8 +1,8 @@ # Contributor: Natanael Copa <ncopa@alpinelinux.org> # Maintainer: A. Wilcox <awilfox@adelielinux.org> pkgname=kmod -pkgver=29 -pkgrel=1 +pkgver=30 +pkgrel=0 pkgdesc="Linux kernel module management utilities" url="https://git.kernel.org/?p=utils/kernel/kmod/kmod.git;a=summary" arch="all" @@ -60,6 +60,6 @@ bashcomp() { } -sha512sums="557cdcaec75e5a1ceea2d10862c944e9a65ef54f6ee9da6dc98ce4582418fdc9958aab2e14a84807db61daf36ec4fcdc23a36376c39d5dc31d1823ca7cd47998 kmod-29.tar.xz +sha512sums="e2cd34e600a72e44710760dfda9364b790b8352a99eafbd43e683e4a06f37e6b5c0b5d14e7c28070e30fc5fc6ceddedf7b97f3b6c2c5c2d91204fefd630b9a3e kmod-30.tar.xz f2ea3527bfba182c5e15557c224a5bba8c7e2ea3d21cf604e6eb2277226dcf983c3aeb9ac44a322c7f2b4942b35135da999d63a5b946b829d3f3b09c050a0f17 strndupa.patch 7f0f89fe98167f71b7924f341ba701d001158225463c2f5e39f706a0224b1e952ee901b441f28cb15787563e83bb6e54eb752bf8669ae7c0ffbb11984542f354 kmod-static-nodes.initd" -- GitLab From 81f648ea47de70e9870bda654ee3f1df725b62db Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Fri, 31 Mar 2023 01:52:02 -0500 Subject: [PATCH 15/47] system/lddtree: Update to 1.27 --- system/lddtree/APKBUILD | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/system/lddtree/APKBUILD b/system/lddtree/APKBUILD index fd852f0b03..7f709a49d8 100644 --- a/system/lddtree/APKBUILD +++ b/system/lddtree/APKBUILD @@ -1,12 +1,12 @@ # Contributor: Natanael Copa <ncopa@alpinelinux.org> # Maintainer: pkgname=lddtree -pkgver=1.26 -pkgrel=2 +pkgver=1.27 +pkgrel=0 pkgdesc="List dynamic dependencies as a tree" url="https://github.com/ncopa/lddtree" arch="noarch" -options="!check" +options="!check" # Requires atf. license="GPL-2.0-only" depends="scanelf cmd:which" makedepends="" @@ -18,4 +18,4 @@ package() { install -Dm755 lddtree.sh "$pkgdir"/usr/bin/lddtree } -sha512sums="9c244cf47627e6cfb396f4187e7c35b438bcb8e3978e43a91ad5a5d034233eaaffd1319e98b6a26b8bbcb570ede3eeb809a14720b50771587a96dde5f6516340 lddtree-1.26.tar.gz" +sha512sums="aa27c8556a8370a662dc08a5a1fe80ce1dbcab28478e61bf52997fe93108c1ce96f6bbe5427326c5f215abc58b3ba45a8aae4e5196bda1746ef9c95c99dd5ee2 lddtree-1.27.tar.gz" -- GitLab From 29f253f0fa6ef36b14ab09ab825b9784de06c225 Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Fri, 31 Mar 2023 01:56:55 -0500 Subject: [PATCH 16/47] system/libarchive: Update to 3.6.2 [CVE] --- system/libarchive/APKBUILD | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/system/libarchive/APKBUILD b/system/libarchive/APKBUILD index e76303dd5f..5c37f8a8dc 100644 --- a/system/libarchive/APKBUILD +++ b/system/libarchive/APKBUILD @@ -1,10 +1,10 @@ # Contributor: Sergey Lukin <sergej.lukin@gmail.com> # Maintainer: A. Wilcox <awilfox@adelielinux.org> pkgname=libarchive -pkgver=3.6.1 -pkgrel=1 +pkgver=3.6.2 +pkgrel=0 pkgdesc="Multi-format archive and compression library" -url="https://libarchive.org/" +url="https://www.libarchive.org/" arch="all" license="BSD-2-Clause AND BSD-3-Clause AND Public-Domain" depends="" @@ -17,6 +17,8 @@ source="https://github.com/libarchive/libarchive/releases/download/v$pkgver/$pkg " # secfixes: +# 3.6.2-r0: +# - CVE-2022-36227 # 3.6.1-r0: # - CVE-2022-26280 # 3.4.2-r0: @@ -58,6 +60,6 @@ tools() { ln -s bsdcpio "$subpkgdir"/usr/bin/cpio } -sha512sums="58f7ac0c52116f73326a07dec10ff232be33b318862078785dc39f1fb2f8773b5194eabfa14764bb51ce6a5a1aa8820526e7f4c76087a6f4fcbe7789a22275b4 libarchive-3.6.1.tar.gz +sha512sums="24e476465054a29a2d48adf3c197a171b5361fa5038729a1f14d578c6701424de4e5dd6a2b20a6b697969ab43bdd8afc1585f8de0465c266f455d7eaa19e5048 libarchive-3.6.2.tar.gz 27cf2aaa3e70e3a2a9944fac0c96c411e669c7e1a48daad1423bff68eef0f49153e5ef9d22dc9591a65353119d7fe203a28258ab82278aeb86b46fe691bcfb6a disable-locale-tests.patch 56a2b13b6cd7b127c04ac50ebf49994ec91ff2467d5d0f32b8428bd4df82b167459d4630dee5d1b5119f70b04ea6fe4411d44b768e0f313fa302c9e5fe51b7cd disable-unaligned-access-arm32-xxhash.patch" -- GitLab From 57fcbe5a86ecd7f69794520d2b2aecadb4df3417 Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Fri, 31 Mar 2023 01:59:02 -0500 Subject: [PATCH 17/47] system/libcap: Update to 2.68 --- system/libcap/APKBUILD | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/system/libcap/APKBUILD b/system/libcap/APKBUILD index 19eddc33c3..0a6a688295 100644 --- a/system/libcap/APKBUILD +++ b/system/libcap/APKBUILD @@ -1,7 +1,7 @@ # Contributor: Natanael Copa <ncopa@alpinelinux.org> # Maintainer: Zach van Rijn <me@zv.io> pkgname=libcap -pkgver=2.64 +pkgver=2.68 pkgrel=0 pkgdesc="POSIX 1003.1e capabilities" arch="all" @@ -41,4 +41,4 @@ package() { DESTDIR="$pkgdir" install } -sha512sums="3c5cf478cef249585ee1a0dfd75c6b41b0daf4e1ecb59dce894eac5523841aa79ca499be4161f73193dd8e7363edcd51063f3e281930cee939ebd50983eecbaf libcap-2.64.tar.xz" +sha512sums="ede3e1356aef22e18a46dc8ff0727500ab023bea698cf2bb822abb06625e272940afea52ad6457d0cd8cf1c7f435f1b568baf0a6bf0a08ae96fbf6d7502f9de2 libcap-2.68.tar.xz" -- GitLab From ca543e8ec92cd508a6d596054474a180c462842d Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Fri, 31 Mar 2023 02:14:00 -0500 Subject: [PATCH 18/47] system/libffi: Update to 3.4.4 --- system/libffi/APKBUILD | 9 ++-- system/libffi/gnu-linux-define.patch | 15 ------ system/libffi/long-double-size.patch | 44 ---------------- system/libffi/powerpc-fixes.patch | 79 ---------------------------- 4 files changed, 3 insertions(+), 144 deletions(-) delete mode 100644 system/libffi/gnu-linux-define.patch delete mode 100644 system/libffi/long-double-size.patch delete mode 100644 system/libffi/powerpc-fixes.patch diff --git a/system/libffi/APKBUILD b/system/libffi/APKBUILD index 659ec19842..7b9e3b07c1 100644 --- a/system/libffi/APKBUILD +++ b/system/libffi/APKBUILD @@ -1,6 +1,6 @@ # Maintainer: A. Wilcox <awilfox@adelielinux.org> pkgname=libffi -pkgver=3.4.2 +pkgver=3.4.4 pkgrel=0 pkgdesc="A portable foreign function interface library" url="https://sourceware.org/libffi" @@ -10,9 +10,7 @@ depends="" makedepends="texinfo" checkdepends="dejagnu" subpackages="$pkgname-dev $pkgname-doc" -source="https://github.com/$pkgname/$pkgname/releases/download/v$pkgver/$pkgname-$pkgver.tar.gz - long-double-size.patch - " +source="https://github.com/$pkgname/$pkgname/releases/download/v$pkgver/$pkgname-$pkgver.tar.gz" build () { ./configure \ @@ -32,5 +30,4 @@ package() { install -m644 LICENSE ""$pkgdir"/usr/share/licenses/$pkgname/" } -sha512sums="31bad35251bf5c0adb998c88ff065085ca6105cf22071b9bd4b5d5d69db4fadf16cadeec9baca944c4bb97b619b035bb8279de8794b922531fddeb0779eb7fb1 libffi-3.4.2.tar.gz -ef6ee13ac5ad6a5c7b60235d64e0c4bd57ccea320c7498327a0602cbe9398dadeef2c10f2ff958f106ae033412d1e174735a6c5ab0ee68982275274cc4906fde long-double-size.patch" +sha512sums="88680aeb0fa0dc0319e5cd2ba45b4b5a340bc9b4bcf20b1e0613b39cd898f177a3863aa94034d8e23a7f6f44d858a53dcd36d1bb8dee13b751ef814224061889 libffi-3.4.4.tar.gz" diff --git a/system/libffi/gnu-linux-define.patch b/system/libffi/gnu-linux-define.patch deleted file mode 100644 index 8dcae738ba..0000000000 --- a/system/libffi/gnu-linux-define.patch +++ /dev/null @@ -1,15 +0,0 @@ -http://bugs.alpinelinux.org/issues/4275 - -diff --git a/closures.c.orig b/closures.c -index 721ff00..22a699c 100644 ---- a/src/closures.c.orig -+++ b/src/closures.c -@@ -34,7 +34,7 @@ - #include <ffi_common.h> - - #if !FFI_MMAP_EXEC_WRIT && !FFI_EXEC_TRAMPOLINE_TABLE --# if __gnu_linux__ && !defined(__ANDROID__) -+# if __linux__ && !defined(__ANDROID__) - /* This macro indicates it may be forbidden to map anonymous memory - with both write and execute permission. Code compiled when this - option is defined will attempt to map such pages once, but if it diff --git a/system/libffi/long-double-size.patch b/system/libffi/long-double-size.patch deleted file mode 100644 index 8f04496eb5..0000000000 --- a/system/libffi/long-double-size.patch +++ /dev/null @@ -1,44 +0,0 @@ ---- libffi-3.4.2/testsuite/libffi.closures/huge_struct.c.old 2021-06-27 10:17:08.000000000 -0500 -+++ libffi-3.4.2/testsuite/libffi.closures/huge_struct.c 2022-05-05 03:22:39.842515657 -0500 -@@ -6,7 +6,7 @@ - */ - - /* { dg-do run { xfail strongarm*-*-* xscale*-*-* } } */ --/* { dg-options -mlong-double-128 { target powerpc64*-*-linux* } } */ -+/* { dg-options -mlong-double-128 { target powerpc64*-*-linux-gnu* } } */ - /* { dg-options -Wformat=0 { target moxie*-*-elf or1k-*-* } } */ - - #include <inttypes.h> ---- libffi-3.4.2/testsuite/libffi.closures/cls_longdouble.c.old 2021-06-27 10:17:08.000000000 -0500 -+++ libffi-3.4.2/testsuite/libffi.closures/cls_longdouble.c 2022-05-05 03:22:48.131612271 -0500 -@@ -7,7 +7,7 @@ - /* This test is known to PASS on armv7l-unknown-linux-gnueabihf, so I have - remove the xfail for arm*-*-* below, until we know more. */ - /* { dg-do run { xfail strongarm*-*-* xscale*-*-* } } */ --/* { dg-options -mlong-double-128 { target powerpc64*-*-linux* } } */ -+/* { dg-options -mlong-double-128 { target powerpc64*-*-linux-gnu* } } */ - - #include "ffitest.h" - ---- libffi-3.4.2/testsuite/libffi.closures/cls_align_longdouble_split.c.old 2021-06-27 10:17:08.000000000 -0500 -+++ libffi-3.4.2/testsuite/libffi.closures/cls_align_longdouble_split.c 2022-05-05 03:22:55.740782984 -0500 -@@ -5,7 +5,7 @@ - Originator: <hos@tamanegi.org> 20031203 */ - - /* { dg-do run { xfail strongarm*-*-* xscale*-*-* } } */ --/* { dg-options -mlong-double-128 { target powerpc64*-*-linux* } } */ -+/* { dg-options -mlong-double-128 { target powerpc64*-*-linux-gnu* } } */ - - #include "ffitest.h" - ---- libffi-3.4.2/testsuite/libffi.closures/cls_align_longdouble_split2.c.old 2021-06-27 10:17:08.000000000 -0500 -+++ libffi-3.4.2/testsuite/libffi.closures/cls_align_longdouble_split2.c 2022-05-05 03:23:00.930217413 -0500 -@@ -6,7 +6,7 @@ - */ - - /* { dg-do run { xfail strongarm*-*-* } } */ --/* { dg-options -mlong-double-128 { target powerpc64*-*-linux* } } */ -+/* { dg-options -mlong-double-128 { target powerpc64*-*-linux-gnu* } } */ - - #include "ffitest.h" - diff --git a/system/libffi/powerpc-fixes.patch b/system/libffi/powerpc-fixes.patch deleted file mode 100644 index e62ae32d17..0000000000 --- a/system/libffi/powerpc-fixes.patch +++ /dev/null @@ -1,79 +0,0 @@ -From bf6946074b948540e4147154041ea244bafb38c4 Mon Sep 17 00:00:00 2001 -From: Samuel Holland <samuel@sholland.org> -Date: Sat, 13 Oct 2018 01:14:03 +0000 -Subject: [PATCH] powerpc: Fix alignment after float structs - ---- - src/powerpc/ffi_linux64.c | 8 +++----- - 1 file changed, 3 insertions(+), 5 deletions(-) - -diff --git a/src/powerpc/ffi_linux64.c b/src/powerpc/ffi_linux64.c -index 2534ecf3..197a270d 100644 ---- a/src/powerpc/ffi_linux64.c -+++ b/src/powerpc/ffi_linux64.c -@@ -580,11 +580,9 @@ ffi_prep_args64 (extended_cif *ecif, unsigned long *const stack) - fparg_count++; - } - while (--elnum != 0); -- if ((next_arg.p & 3) != 0) -- { -- if (++next_arg.f == gpr_end.f) -- next_arg.f = rest.f; -- } -+ if ((next_arg.p & 7) != 0) -+ if (++next_arg.f == gpr_end.f) -+ next_arg.f = rest.f; - } - else - do -From 49a1bbadfa0b5ad5c373271c8ba7a5d8911a85d9 Mon Sep 17 00:00:00 2001 -From: Samuel Holland <samuel@sholland.org> -Date: Sat, 13 Oct 2018 01:14:20 +0000 -Subject: [PATCH] powerpc: Don't pad rvalues copied from FP regs - ---- - src/powerpc/ffi.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/src/powerpc/ffi.c b/src/powerpc/ffi.c -index 7eb543e4..94a11700 100644 ---- a/src/powerpc/ffi.c -+++ b/src/powerpc/ffi.c -@@ -121,8 +121,9 @@ ffi_call_int (ffi_cif *cif, - # endif - /* The SYSV ABI returns a structure of up to 8 bytes in size - left-padded in r3/r4, and the ELFv2 ABI similarly returns a -- structure of up to 8 bytes in size left-padded in r3. */ -- if (rsize <= 8) -+ structure of up to 8 bytes in size left-padded in r3. But -+ note that a structure of a single float is not paddded. */ -+ if (rsize <= 8 && (cif->flags & FLAG_RETURNS_FP) == 0) - memcpy (rvalue, (char *) smst_buffer + 8 - rsize, rsize); - else - #endif -From b0c598d5d6b653a3ea87a2d04afb6b35441e5f7e Mon Sep 17 00:00:00 2001 -From: Samuel Holland <samuel@sholland.org> -Date: Sat, 13 Oct 2018 01:14:58 +0000 -Subject: [PATCH] powerpc: Add missing check in struct alignment - ---- - src/powerpc/ffi_linux64.c | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/src/powerpc/ffi_linux64.c b/src/powerpc/ffi_linux64.c -index 197a270d..d755c712 100644 ---- a/src/powerpc/ffi_linux64.c -+++ b/src/powerpc/ffi_linux64.c -@@ -536,7 +536,11 @@ ffi_prep_args64 (extended_cif *ecif, unsigned long *const stack) - if (align > 16) - align = 16; - if (align > 1) -- next_arg.p = ALIGN (next_arg.p, align); -+ { -+ next_arg.p = ALIGN (next_arg.p, align); -+ if (next_arg.ul == gpr_end.ul) -+ next_arg.ul = rest.ul; -+ } - } - #if _CALL_ELF == 2 - elt = discover_homogeneous_aggregate (*ptr, &elnum); -- GitLab From 66f1e5efc43603c27003f0e42e01052c25717a59 Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Fri, 31 Mar 2023 02:41:10 -0500 Subject: [PATCH 19/47] system/libgpg-error: Update to 1.46 --- system/libgpg-error/APKBUILD | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/system/libgpg-error/APKBUILD b/system/libgpg-error/APKBUILD index 41bfa0efbd..dec8c16131 100644 --- a/system/libgpg-error/APKBUILD +++ b/system/libgpg-error/APKBUILD @@ -1,7 +1,7 @@ # Contributor: Sören Tempel <soeren+alpine@soeren-tempel.net> # Maintainer: A. Wilcox <awilfox@adelielinux.org> pkgname=libgpg-error -pkgver=1.45 +pkgver=1.46 pkgrel=0 pkgdesc="GnuPG runtime library" url="https://www.gnupg.org" @@ -36,4 +36,4 @@ lisp() { mv "$pkgdir"/usr/share "$subpkgdir"/usr/share/ } -sha512sums="882f2dd617e89137d7a9d61b60488dac32321dd4fdb699e9687b6bd9380c056c027da502837f4482289c0fe00e7de01210e804428f05a0843ae2ca23fdcc6457 libgpg-error-1.45.tar.bz2" +sha512sums="b06223bb2b0f67d3db5d0d9ab116361a0eda175d4667352b5c0941408d37f2b0ba8e507297e480ccebb88cbba9d0a133820b896914b07d264fb3edaac7b8c99d libgpg-error-1.46.tar.bz2" -- GitLab From 02536ba58d71cb9513a4346e963a2c58744c1fe1 Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Fri, 31 Mar 2023 02:44:48 -0500 Subject: [PATCH 20/47] system/libidn: Update to 1.40 --- system/libidn/APKBUILD | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/system/libidn/APKBUILD b/system/libidn/APKBUILD index 81a947fc85..2cf6e7a1a7 100644 --- a/system/libidn/APKBUILD +++ b/system/libidn/APKBUILD @@ -1,7 +1,7 @@ # Contributor: Michael Mason <ms13sp@gmail.com> # Maintainer: pkgname=libidn -pkgver=1.38 +pkgver=1.40 pkgrel=0 pkgdesc="Library for internationalized domain names" url="https://www.gnu.org/software/libidn/" @@ -39,4 +39,4 @@ package() { make DESTDIR="$pkgdir" install } -sha512sums="5e59b2263fde44d1463b47b516347b17a4e3e3696ebba66ab5fe464d567e2ec81f769fa7cf72ed51cfb501e32221813bb375373713a47e2f599fc6122850e419 libidn-1.38.tar.gz" +sha512sums="6588454c0a6153b76090057c0f3b97ef6cd78b3d7c84dd27cb9537556b7f6d2b4048485c3b82e33e9fb3c9b8d308c0899676ea92f92cf201a6454bd9af781f96 libidn-1.40.tar.gz" -- GitLab From f5b22acd450be4c0c2ea2c14d31ec285f6a5931a Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Fri, 31 Mar 2023 02:47:01 -0500 Subject: [PATCH 21/47] system/libpipeline: Update to 1.5.7 --- system/libpipeline/APKBUILD | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/system/libpipeline/APKBUILD b/system/libpipeline/APKBUILD index 34a95362c0..a772cc14e3 100644 --- a/system/libpipeline/APKBUILD +++ b/system/libpipeline/APKBUILD @@ -1,10 +1,10 @@ # Contributor: A. Wilcox <awilfox@adelielinux.org> # Maintainer: A. Wilcox <awilfox@adelielinux.org> pkgname=libpipeline -pkgver=1.5.6 +pkgver=1.5.7 pkgrel=0 pkgdesc="C pipeline manipulation library" -url="http://libpipeline.nongnu.org/" +url="https://libpipeline.nongnu.org/" arch="all" license="GPL-3.0+" depends="" @@ -32,4 +32,4 @@ package() { make DESTDIR="$pkgdir" install } -sha512sums="35a627cf2d736df4e7e6b238eddb61e3fbd00e90b56b16135b4e4f5c6b54fbdb35a496705528c8308e14273341dfe0ed14f78c0791474584c8dc2df99bfdc570 libpipeline-1.5.6.tar.gz" +sha512sums="bb3be954f5d826cef805f85d65759fb197c31adf80d92360c7d0caa486e0d4877510681390ca01c028b6e805f968f14e8bf4b3eca02e429529031787e7f14d84 libpipeline-1.5.7.tar.gz" -- GitLab From c1d1ba44065dc9804a6207c462763e00d90c4602 Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Fri, 31 Mar 2023 02:51:44 -0500 Subject: [PATCH 22/47] system/libuv: Update to 1.44.2 --- system/libuv/APKBUILD | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/system/libuv/APKBUILD b/system/libuv/APKBUILD index 599ede9a20..7a5ac1b4dd 100644 --- a/system/libuv/APKBUILD +++ b/system/libuv/APKBUILD @@ -2,7 +2,7 @@ # Conttributor: Sören Tempel <soeren+alpine@soeren-tempel.net> # Maintainer: A. Wilcox <awilfox@adelielinux.org> pkgname=libuv -pkgver=1.44.1 +pkgver=1.44.2 pkgrel=0 pkgdesc="Cross-platform asychronous I/O" url="https://libuv.org" @@ -41,5 +41,5 @@ package() { "$pkgdir"/usr/share/licenses/$pkgname/LICENSE } -sha512sums="b4f8944e2c79e3a6a31ded6cccbe4c0eeada50db6bc8a448d7015642795012a4b80ffeef7ca455bb093c59a8950d0e1430566c3c2fa87b73f82699098162d834 libuv-v1.44.1.tar.gz +sha512sums="91197ff9303112567bbb915bbb88058050e2ad1c048815a3b57c054635d5dc7df458b956089d785475290132236cb0edcfae830f5d749de29a9a3213eeaf0b20 libuv-v1.44.2.tar.gz e7b2242345af697dd175a5fc29b7eb001e5b0f43743a86d9e3fc7307cf3b3bb98e46c2993e84332ecb2934a30f942a78f4ddd19efed6871d85fcf8e2bba15643 fix-test-tty-size-assumption.patch" -- GitLab From f6444b1fad87c378840aa18bdfb417174c92d467 Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Fri, 31 Mar 2023 03:27:59 -0500 Subject: [PATCH 23/47] system/libxml2: Update to 2.10.3 --- system/libxml2/APKBUILD | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/system/libxml2/APKBUILD b/system/libxml2/APKBUILD index 57845daa87..237d7f179c 100644 --- a/system/libxml2/APKBUILD +++ b/system/libxml2/APKBUILD @@ -1,7 +1,7 @@ # Contributor: Carlo Landmeter <clandmeter@gmail.com> # Maintainer: A. Wilcox <awilfox@adelielinux.org> pkgname=libxml2 -pkgver=2.9.13 +pkgver=2.10.3 pkgrel=0 pkgdesc="XML parsing library" url="https://gitlab.gnome.org/GNOME/libxml2/-/wikis/home" @@ -12,13 +12,16 @@ depends="" depends_dev="zlib-dev icu-dev" checkdepends="perl libarchive" makedepends="$depends_dev python3-dev" -subpackages="$pkgname-doc $pkgname-dev py-libxml2:py" +subpackages="$pkgname-doc $pkgname-dev py3-libxml2:py" provides="$pkgname-utils=$pkgver-r$pkgrel" -source="https://download.gnome.org/sources/libxml2/2.9/libxml2-$pkgver.tar.xz +source="https://download.gnome.org/sources/libxml2/${pkgver%.*}/libxml2-$pkgver.tar.xz python-segfault-fix.patch " # secfixes: +# 2.10.0-r0: +# - CVE-2022-29824 +# - CVE-2022-2309 # 2.9.4-r1: # - CVE-2016-5131 # 2.9.4-r2: @@ -55,17 +58,11 @@ package() { make -j1 DESTDIR="$pkgdir" install } -dev() { - default_dev - mv "$pkgdir"/usr/lib/*.sh "$subpkgdir"/usr/lib/ -} - py() { pkgdesc="$pkgname Python bindings" - provides="py3-libxml2=$pkgver-r$pkgrel" install -d "$subpkgdir"/usr/lib mv "$pkgdir"/usr/lib/python3* "$subpkgdir"/usr/lib/ } -sha512sums="fc51980cb9222bd3b5242f73d28b55fa15a80e68e52e1c45274f1eda11500ed385853209edb3b2a1f06b9de0be304c159a9bd898c7d84b0899eacb00723d98b5 libxml2-2.9.13.tar.xz +sha512sums="33bb87ae9a45c475c3de09477e5d94840d8f687f893ef7839408bc7267e57611c4f2b863ed8ec819a4b5f1ebd6a122db9f6054c73bceed427d37f3e67f62620c libxml2-2.10.3.tar.xz 384b3d2031cd8f77528190bbb7652faa9ccb22bc604bcf4927e59046d38830dac38010828fe1568b6514976f725981a6d3ac1aa595d31477a36db2afe491452c python-segfault-fix.patch" -- GitLab From 8e733f3b3806123948f21db5290338345203f681 Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Fri, 31 Mar 2023 03:38:43 -0500 Subject: [PATCH 24/47] system/libxslt: Update to 1.1.37 --- system/libxslt/APKBUILD | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/system/libxslt/APKBUILD b/system/libxslt/APKBUILD index 59f93a3897..1db907710b 100644 --- a/system/libxslt/APKBUILD +++ b/system/libxslt/APKBUILD @@ -1,13 +1,14 @@ # Contributor: Francesco Colista <fcolista@alpinelinux.org> # Maintainer: A. Wilcox <awilfox@adelielinux.org> pkgname=libxslt -pkgver=1.1.35 +pkgver=1.1.37 pkgrel=0 pkgdesc="XML stylesheet transformation library" url="https://gitlab.gnome.org/GNOME/libxslt/-/wikis/home" arch="all" license="MIT" depends="" +checkdepends="py3-libxml2" makedepends="libxml2-dev libgcrypt-dev libgpg-error-dev python3-dev" subpackages="$pkgname-doc $pkgname-dev" source="https://download.gnome.org/sources/libxslt/${pkgver%.*}/libxslt-$pkgver.tar.xz" @@ -39,4 +40,4 @@ package() { make DESTDIR="$pkgdir" install } -sha512sums="9dd4a699235f50ae9b75b25137e387471635b4b2da0a4e4380879cd49f1513470fcfbfd775269b066eac513a1ffa6860c77ec42747168e2348248f09f60c8c96 libxslt-1.1.35.tar.xz" +sha512sums="a4e477d2bb918b7d01945e2c7491c3a4aae799dc1602bbd13de55c8a5052e210a20bc45115347eae44473c8b1d03dbc5e4a2aa18c2218f1fdfd376d87cd501ca libxslt-1.1.37.tar.xz" -- GitLab From e920ba77694c56bf4bf23c9c6c8227a4e1b241e1 Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Fri, 31 Mar 2023 03:42:30 -0500 Subject: [PATCH 25/47] system/lz4: Update to 1.9.4 Multi-job `make check` is now explicitly supported upstream, so we can remove -j1. --- system/lz4/APKBUILD | 13 +++++-------- system/lz4/CVE-2021-3520.patch | 22 ---------------------- 2 files changed, 5 insertions(+), 30 deletions(-) delete mode 100644 system/lz4/CVE-2021-3520.patch diff --git a/system/lz4/APKBUILD b/system/lz4/APKBUILD index 591fefe3b4..3911e26f40 100644 --- a/system/lz4/APKBUILD +++ b/system/lz4/APKBUILD @@ -1,17 +1,15 @@ # Contributor: Stuart Cardall <developer@it-offshore.co.uk> # Maintainer: Dan Theisen <djt@hxx.in> pkgname=lz4 -pkgver=1.9.3 -pkgrel=1 +pkgver=1.9.4 +pkgrel=0 pkgdesc="LZ4: Extremely Fast Compression algorithm" url="https://github.com/lz4/lz4" arch="all" license="BSD-2-Clause GPL-2.0-only" checkdepends="diffutils" subpackages="$pkgname-dev $pkgname-doc $pkgname-libs" -source="$pkgname-$pkgver.tar.gz::https://github.com/$pkgname/$pkgname/archive/v$pkgver.tar.gz - CVE-2021-3520.patch - " +source="$pkgname-$pkgver.tar.gz::https://github.com/$pkgname/$pkgname/archive/v$pkgver.tar.gz" case "$CARCH" in armhf) options="!check" ;; # FIXME @@ -26,12 +24,11 @@ build() { } check() { - make -j1 check + make check } package() { make PREFIX="/usr" DESTDIR="$pkgdir" install } -sha512sums="c246b0bda881ee9399fa1be490fa39f43b291bb1d9db72dba8a85db1a50aad416a97e9b300eee3d2a4203c2bd88bda2762e81bc229c3aa409ad217eb306a454c lz4-1.9.3.tar.gz -29038d80c4399ded52b49e69d0f0d80bef8bf424e3540de366ef539706c8c1119784d6137c96130f131239d74a4c110dd9790cae5c9b17c102820446582c5637 CVE-2021-3520.patch" +sha512sums="043a9acb2417624019d73db140d83b80f1d7c43a6fd5be839193d68df8fd0b3f610d7ed4d628c2a9184f7cde9a0fd1ba9d075d8251298e3eb4b3a77f52736684 lz4-1.9.4.tar.gz" diff --git a/system/lz4/CVE-2021-3520.patch b/system/lz4/CVE-2021-3520.patch deleted file mode 100644 index 053958dfe8..0000000000 --- a/system/lz4/CVE-2021-3520.patch +++ /dev/null @@ -1,22 +0,0 @@ -From 8301a21773ef61656225e264f4f06ae14462bca7 Mon Sep 17 00:00:00 2001 -From: Jasper Lievisse Adriaanse <j@jasper.la> -Date: Fri, 26 Feb 2021 15:21:20 +0100 -Subject: [PATCH] Fix potential memory corruption with negative memmove() size - ---- - lib/lz4.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/lib/lz4.c b/lib/lz4.c -index 5f524d01d..c2f504ef3 100644 ---- a/lib/lz4.c -+++ b/lib/lz4.c -@@ -1749,7 +1749,7 @@ LZ4_decompress_generic( - const size_t dictSize /* note : = 0 if noDict */ - ) - { -- if (src == NULL) { return -1; } -+ if ((src == NULL) || (outputSize < 0)) { return -1; } - - { const BYTE* ip = (const BYTE*) src; - const BYTE* const iend = ip + srcSize; -- GitLab From 66cb8bc7ca4633a0b4e9091a315197dccbf298b0 Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Sat, 1 Apr 2023 21:13:34 -0500 Subject: [PATCH 26/47] system/mawk: Update to 1.3.4.20230322 --- system/mawk/APKBUILD | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/system/mawk/APKBUILD b/system/mawk/APKBUILD index 326003b519..1a8cf4935d 100644 --- a/system/mawk/APKBUILD +++ b/system/mawk/APKBUILD @@ -1,8 +1,8 @@ # Contributor: A. Wilcox <awilfox@adelielinux.org> # Maintainer: A. Wilcox <awilfox@adelielinux.org> pkgname=mawk -pkgver=1.3.4.20200120 -pkgrel=1 +pkgver=1.3.4.20230322 +pkgrel=0 pkgdesc="Pattern scanning and text processing language" url="https://invisible-island.net/mawk/mawk.html" arch="all" @@ -37,4 +37,4 @@ package() { ln -s ../../bin/awk "$pkgdir"/usr/bin/awk } -sha512sums="14d9a6642ce931bf6457d248fc2d6da4f0ea7541976ca282ea708b26df048f86fdf92c27f72d497501ccd43a244d1d1a606f1a2f266a7558306fea35dcc3041b mawk-1.3.4-20200120.tgz" +sha512sums="475f64acfbde686caf65ae3f227b75e1c2578011333467612d44c0e4658a49fa6c9c75ad3338c7ffd33e24d68b3d8156a0f1ad1189b60833597016060e6695c4 mawk-1.3.4-20230322.tgz" -- GitLab From 354a921e08e4e710c645f95f999cc9ce1ce96272 Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Sat, 1 Apr 2023 21:19:34 -0500 Subject: [PATCH 27/47] system/nghttp2: Update to 1.52.0 --- system/nghttp2/APKBUILD | 5 ++--- user/nghttp2-utils/APKBUILD | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/system/nghttp2/APKBUILD b/system/nghttp2/APKBUILD index 830a4231f7..bb3f4073ee 100644 --- a/system/nghttp2/APKBUILD +++ b/system/nghttp2/APKBUILD @@ -1,7 +1,7 @@ # Contributor: Natanael Copa <ncopa@alpinelinux.org> # Maintainer: SÃle Ekaterin Liszka <sheila@adelielinux.org> pkgname=nghttp2 -pkgver=1.47.0 +pkgver=1.52.0 pkgrel=0 pkgdesc="Experimental HTTP/2 client, server and proxy" url="https://nghttp2.org/" @@ -29,7 +29,6 @@ build() { --disable-static \ --without-neverbleed \ --without-jemalloc \ - --disable-python-bindings \ --enable-lib-only make } @@ -42,4 +41,4 @@ package() { make DESTDIR="$pkgdir" install } -sha512sums="ad6266a15789fec966db6be8ac0b9ee6cca257a3bb91fdd34a58acf0e472643a571941b5974d16c98f6ac5bfa6a03c4b70a6dff222fb0cd50909178b7e94ce48 nghttp2-1.47.0.tar.xz" +sha512sums="3af1ce13270f7afc8652bd3de71200d9632204617fe04d2be7156d60eeb1a5cc415573677791a399ae03577e8e3256939b1b05d27dbd98dee504d09ec5325d56 nghttp2-1.52.0.tar.xz" diff --git a/user/nghttp2-utils/APKBUILD b/user/nghttp2-utils/APKBUILD index 519f85aecd..c81deed550 100644 --- a/user/nghttp2-utils/APKBUILD +++ b/user/nghttp2-utils/APKBUILD @@ -1,7 +1,7 @@ # Contributor: Natanael Copa <ncopa@alpinelinux.org> # Maintainer: SÃle Ekaterin Liszka <sheila@adelielinux.org> pkgname=nghttp2-tools -pkgver=1.47.0 +pkgver=1.52.0 pkgrel=0 pkgdesc="Experimental HTTP/2 client, server and proxy" url="https://nghttp2.org/" @@ -30,7 +30,6 @@ build() { --disable-static \ --without-neverbleed \ --without-jemalloc \ - --disable-python-bindings \ --enable-app \ --disable-shared make @@ -52,4 +51,4 @@ package() { rm -rf "$pkgdir"/usr/share/nghttp2 } -sha512sums="ad6266a15789fec966db6be8ac0b9ee6cca257a3bb91fdd34a58acf0e472643a571941b5974d16c98f6ac5bfa6a03c4b70a6dff222fb0cd50909178b7e94ce48 nghttp2-1.47.0.tar.xz" +sha512sums="3af1ce13270f7afc8652bd3de71200d9632204617fe04d2be7156d60eeb1a5cc415573677791a399ae03577e8e3256939b1b05d27dbd98dee504d09ec5325d56 nghttp2-1.52.0.tar.xz" -- GitLab From 9ca9198327f29cf5712cb446c1902d8277de38e4 Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Sat, 1 Apr 2023 21:22:11 -0500 Subject: [PATCH 28/47] system/nspr: Update to 4.35 --- system/nspr/APKBUILD | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/system/nspr/APKBUILD b/system/nspr/APKBUILD index 92d0060554..033c5a6d6e 100644 --- a/system/nspr/APKBUILD +++ b/system/nspr/APKBUILD @@ -1,6 +1,6 @@ # Maintainer: A. Wilcox <awilfox@adelielinux.org> pkgname=nspr -pkgver=4.34 +pkgver=4.35 pkgrel=0 pkgdesc="Netscape Portable Runtime" url="https://firefox-source-docs.mozilla.org/nspr/index.html" @@ -56,5 +56,5 @@ package() { "$pkgdir"/usr/include/nspr/md } -sha512sums="4cfac886c14cf7df4c4b79fa1c3bc92e1b14260c9c3018fa2562060d62fecb4e66c0b4e8f7edf4f4823def784a919d99dde88a89674f0cd8a644310b0569ead4 nspr-4.34.tar.gz +sha512sums="502815833116e25f79ddf71d1526484908aa92fbc55f8a892729cb404a4daafcc0470a89854cd080d2d20299fdb7d9662507c5362c7ae661cbacf308ac56ef7f nspr-4.35.tar.gz 1f694fc151f6578080449e3aa999c520486bbe117b8237150966ec43092db4156e81412ac889045e0c0c3bf65d459af5bdc1cf19c9fa3dab120405a60732f15a stacksize.patch" -- GitLab From d2b04b86aec557884a76bf7ab68b0ac6c53ea10c Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Sat, 1 Apr 2023 21:29:59 -0500 Subject: [PATCH 29/47] system/nss: Update to 3.89 --- system/nss/APKBUILD | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/system/nss/APKBUILD b/system/nss/APKBUILD index b887325e7e..04a3f12bb6 100644 --- a/system/nss/APKBUILD +++ b/system/nss/APKBUILD @@ -1,7 +1,7 @@ # Contributor: Åukasz Jendrysik <scadu@yandex.com> # Maintainer: A. Wilcox <awilfox@adelielinux.org> pkgname=nss -pkgver=3.82 +pkgver=3.89 _ver=$(printf '%s' "$pkgver" | tr . _) pkgrel=0 pkgdesc="Mozilla Network Security Services" @@ -157,7 +157,7 @@ tools() { mv "$pkgdir"/usr/bin "$subpkgdir"/usr/ } -sha512sums="6e0f28c3f776178ab2d97c6e2436aa10d72c9c2668aea1a6695ccf49e8c3c4cd2d266168508bcb456c655f2e692dceb44eae53c80d50076d7156db3deac70057 nss-3.82.tar.gz +sha512sums="1db06d4575f2c16d2a0629007981211e714f99c014c0a6256dd33d0caf8c809ba8d5be204d018f9d1cc99b9fcd055ac1fb99b399486ed43c9cf3f55f2747de82 nss-3.89.tar.gz 75dbd648a461940647ff373389cc73bc8ec609139cd46c91bcce866af02be6bcbb0524eb3dfb721fbd5b0bc68c20081ed6f7debf6b24317f2a7ba823e8d3c531 nss.pc.in 0f2efa8563b11da68669d281b4459289a56f5a3a906eb60382126f3adcfe47420cdcedc6ab57727a3afeeffa2bbb4c750b43bef8b5f343a75c968411dfa30e09 nss-util.pc.in 09c69d4cc39ec9deebc88696a80d0f15eb2d8c94d9daa234a2adfec941b63805eb4ce7f2e1943857b938bddcaee1beac246a0ec627b71563d9f846e6119a4a15 nss-softokn.pc.in -- GitLab From 2254ad6178669b86df6749025f96b9184f524e80 Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Sat, 1 Apr 2023 21:49:56 -0500 Subject: [PATCH 30/47] system/openssl: Update for CVE --- system/openssl/APKBUILD | 6 +++- system/openssl/CVE-2023-0465.patch | 51 ++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 system/openssl/CVE-2023-0465.patch diff --git a/system/openssl/APKBUILD b/system/openssl/APKBUILD index cf3ac587e1..851c4f7aea 100644 --- a/system/openssl/APKBUILD +++ b/system/openssl/APKBUILD @@ -1,7 +1,7 @@ # Maintainer: A. Wilcox <awilfox@adelielinux.org> pkgname=openssl pkgver=1.1.1t -pkgrel=0 +pkgrel=1 pkgdesc="Toolkit for SSL and TLS" url="https://www.openssl.org/" arch="all" @@ -12,6 +12,7 @@ makedepends_build="perl" subpackages="$pkgname-dbg $pkgname-dev $pkgname-doc libcrypto1.1:libcrypto libssl1.1:libssl" source="https://www.openssl.org/source/${pkgname}-${pkgver}.tar.gz + CVE-2023-0465.patch ppc-auxv.patch ppc64.patch " @@ -60,6 +61,8 @@ source="https://www.openssl.org/source/${pkgname}-${pkgver}.tar.gz # - CVE-2019-1551 # 1.1.1g-r0: # - CVE-2020-1967 +# 1.1.1t-r1: +# - CVE-2023-0465 build() { # openssl will prepend crosscompile always core CC et al @@ -130,5 +133,6 @@ libssl() { } sha512sums="628676c9c3bc1cf46083d64f61943079f97f0eefd0264042e40a85dbbd988f271bfe01cd1135d22cc3f67a298f1d078041f8f2e97b0da0d93fe172da573da18c openssl-1.1.1t.tar.gz +c86d1a74387f3e0ff085e2785bd834b529fdc6b397fa8f559d413b9fa4e35848523c58ce94e00e75b17f55af28f58f0c347973a739a5d15465e205391fc59b26 CVE-2023-0465.patch 7fd3158c6eb3451f10e4bfd78f85c3e7aef84716eb38e00503d5cfc8e414b7bdf02e0671d0299a96a453dd2e38249dcf1281136b27b6df372f3ea08fbf78329b ppc-auxv.patch e040f23770d52b988578f7ff84d77563340f37c026db7643db8e4ef18e795e27d10cb42cb8656da4d9c57a28283a2828729d70f940edc950c3422a54fea55509 ppc64.patch" diff --git a/system/openssl/CVE-2023-0465.patch b/system/openssl/CVE-2023-0465.patch new file mode 100644 index 0000000000..a270624d33 --- /dev/null +++ b/system/openssl/CVE-2023-0465.patch @@ -0,0 +1,51 @@ +From b013765abfa80036dc779dd0e50602c57bb3bf95 Mon Sep 17 00:00:00 2001 +From: Matt Caswell <matt@openssl.org> +Date: Tue, 7 Mar 2023 16:52:55 +0000 +Subject: [PATCH] Ensure that EXFLAG_INVALID_POLICY is checked even in leaf + certs + +Even though we check the leaf cert to confirm it is valid, we +later ignored the invalid flag and did not notice that the leaf +cert was bad. + +Fixes: CVE-2023-0465 + +Reviewed-by: Hugo Landau <hlandau@openssl.org> +Reviewed-by: Tomas Mraz <tomas@openssl.org> +(Merged from https://github.com/openssl/openssl/pull/20588) +--- + crypto/x509/x509_vfy.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/crypto/x509/x509_vfy.c b/crypto/x509/x509_vfy.c +index 925fbb54125..1dfe4f9f31a 100644 +--- a/crypto/x509/x509_vfy.c ++++ b/crypto/x509/x509_vfy.c +@@ -1649,18 +1649,25 @@ static int check_policy(X509_STORE_CTX *ctx) + } + /* Invalid or inconsistent extensions */ + if (ret == X509_PCY_TREE_INVALID) { +- int i; ++ int i, cbcalled = 0; + + /* Locate certificates with bad extensions and notify callback. */ +- for (i = 1; i < sk_X509_num(ctx->chain); i++) { ++ for (i = 0; i < sk_X509_num(ctx->chain); i++) { + X509 *x = sk_X509_value(ctx->chain, i); + + if (!(x->ex_flags & EXFLAG_INVALID_POLICY)) + continue; ++ cbcalled = 1; + if (!verify_cb_cert(ctx, x, i, + X509_V_ERR_INVALID_POLICY_EXTENSION)) + return 0; + } ++ if (!cbcalled) { ++ /* Should not be able to get here */ ++ X509err(X509_F_CHECK_POLICY, ERR_R_INTERNAL_ERROR); ++ return 0; ++ } ++ /* The callback ignored the error so we return success */ + return 1; + } + if (ret == X509_PCY_TREE_FAILURE) { -- GitLab From af317ffc5e1698511df211d0043e46db80695dec Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Sat, 1 Apr 2023 21:53:40 -0500 Subject: [PATCH 31/47] system/pcre2: Update to 10.42 --- system/pcre2/APKBUILD | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/system/pcre2/APKBUILD b/system/pcre2/APKBUILD index 5a7392b2c2..4e4aa309d7 100644 --- a/system/pcre2/APKBUILD +++ b/system/pcre2/APKBUILD @@ -1,7 +1,7 @@ # Contributor: Jakub Jirutka <jakub@jirutka.cz> # Maintainer: Zach van Rijn <me@zv.io> pkgname=pcre2 -pkgver=10.40 +pkgver=10.42 pkgrel=0 pkgdesc="Perl-compatible regular expression library" url="https://pcre.org" @@ -69,4 +69,4 @@ tools() { mv "$pkgdir"/usr/bin "$subpkgdir"/usr/ } -sha512sums="679c6f540571850adec880934812e4f26f08ad858c776f10d1ed68ed3c0d4f91f6e1b53d781b53340af43a22c521e585cfc908f3659013c630a320e4fb246dc2 pcre2-10.40.tar.gz" +sha512sums="a3db6c5c620775838819be616652e73ce00f5ef5c1f49f559ff3efb51a119d02f01254c5901c1f7d0c47c0ddfcf4313e38d6ca32c35381b8f87f36896d10e6f7 pcre2-10.42.tar.gz" -- GitLab From a1e062a2db46fc15e94b447f6172e653a515b28a Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Sat, 1 Apr 2023 21:56:18 -0500 Subject: [PATCH 32/47] system/pkgconf: Update to 1.9.4 [CVE] --- system/pkgconf/APKBUILD | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/system/pkgconf/APKBUILD b/system/pkgconf/APKBUILD index cd35918af5..e67f275656 100644 --- a/system/pkgconf/APKBUILD +++ b/system/pkgconf/APKBUILD @@ -1,6 +1,6 @@ # Maintainer: A. Wilcox <awilfox@adelielinux.org> pkgname=pkgconf -pkgver=1.8.0 +pkgver=1.9.4 pkgrel=0 pkgdesc="Toolkit for maintaining development package metadata" url="http://pkgconf.org/" @@ -47,4 +47,4 @@ dev() { mv "$subpkgdir"/usr/share/aclocal/pkg.m4 "$pkgdir"/usr/share/aclocal/ } -sha512sums="58204006408ad5ce91222ed3c93c2e0b61c04fa83c0a8ad337b747b583744578dbebd4ad5ccbc577689637caa1c5dc246b7795ac46e39c6666b1aa78199b7c28 pkgconf-1.8.0.tar.xz" +sha512sums="079436244f3942161f91c961c96d382a85082079c9843fec5ddd7fb245ba7500a9f7a201b5ef2c70a7a079fe1aacf3a52b73de5402a6f061df87bcdcf0a90755 pkgconf-1.9.4.tar.xz" -- GitLab From fc0727aa86b46392a36ef98a40a0a2ca43791b97 Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Sat, 1 Apr 2023 22:03:42 -0500 Subject: [PATCH 33/47] system/po4a: Update to 0.69 --- system/po4a/APKBUILD | 6 +++--- system/po4a/disable-stats.patch | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/system/po4a/APKBUILD b/system/po4a/APKBUILD index aeebb22243..f5cdb0755d 100644 --- a/system/po4a/APKBUILD +++ b/system/po4a/APKBUILD @@ -1,7 +1,7 @@ # Contributor: Christian Kampka <christian@kampka.net> # Maintainer: Zach van Rijn <me@zv.io> pkgname=po4a -pkgver=0.66 +pkgver=0.69 pkgrel=0 pkgdesc="Tools for helping translation of documentation" url="https://po4a.org" @@ -35,5 +35,5 @@ package() { find ${pkgdir} -name .packlist -o -name perllocal.pod -delete } -sha512sums="f72e1267cbd6ced01ecc362c327b1634c47dff15398b48644548d28e01ba91108d5626e6d4abcfcb360697e4c0affa3228c6993653ce8f257acbbb3d7ae20fc4 po4a-0.66.tar.gz -4877c3be93b9f6da56db75feb3c72433a14aef465401d0f39db62a1914cc69f062c03a6001f96e5fd726eadd63d0b6094835e30cfe4ce865945af4195d9ee07f disable-stats.patch" +sha512sums="9cb5eec547ab18d1c3ebdda212b909fc4f5489a74641ba2d7e0a3a1d060f245d23667c16e687c678c5ccc3809c9315d20673266dcc3764172a899caa397238e3 po4a-0.69.tar.gz +be457a023383c60864bd155b13d8952f8ae523b709a464af2419695a3fb64c1ee6b4176b23811241fa9bed87c2d0c44dbf8c19178046b052b49ea191d03ccc5a disable-stats.patch" diff --git a/system/po4a/disable-stats.patch b/system/po4a/disable-stats.patch index 765a385201..692888460b 100644 --- a/system/po4a/disable-stats.patch +++ b/system/po4a/disable-stats.patch @@ -13,10 +13,10 @@ This is because stdout/stderr is empty. @@ -15,7 +15,8 @@ $self->depends_on('docs'); $self->depends_on('distmeta'); # regenerate META.yml - $self->depends_on('man'); -- $self->depends_on('postats'); + $self->depends_on('man') unless ($^O eq 'MSWin32'); +- $self->depends_on('postats') unless ($^O eq 'MSWin32'); + # We don't need to show stats at the end. -+ # $self->depends_on('postats'); ++ # $self->depends_on('postats') unless $(^O eq 'MSWin32'); } sub make_files_writable { -- GitLab From 0cc5e3092ce2bc3b67ec1c25f0d5f0be244ea6fc Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Sat, 1 Apr 2023 22:06:18 -0500 Subject: [PATCH 34/47] system/popt: Update to 1.19 --- system/popt/APKBUILD | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/system/popt/APKBUILD b/system/popt/APKBUILD index 80e18f44dc..0eda8692d6 100644 --- a/system/popt/APKBUILD +++ b/system/popt/APKBUILD @@ -1,14 +1,16 @@ # Contributor: Natanael Copa <ncopa@alpinelinux.org> # Maintainer: A. Wilcox <awilfox@adelielinux.org> pkgname=popt -pkgver=1.18 +pkgver=1.19 pkgrel=0 pkgdesc="Command-line option parser" url="https://github.com/rpm-software-management/popt" arch="all" -license="X11" +license="MIT" +depends="" +makedepends="" subpackages="$pkgname-doc $pkgname-dev $pkgname-lang" -source="http://ftp.rpm.org/$pkgname/releases/$pkgname-${pkgver%.*}.x/$pkgname-$pkgver.tar.gz" +source="https://ftp.osuosl.org/pub/rpm/$pkgname/releases/$pkgname-${pkgver%.*}.x/$pkgname-$pkgver.tar.gz" build() { ./configure \ @@ -32,4 +34,4 @@ package() { mv "$pkgdir"/lib/pkgconfig "$pkgdir"/usr/lib } -sha512sums="86422e8762adda3d02d46c20ac74ffe389d4f991d552b6fea729f007345b6426cbeb71160284e2deaa2ce44ce754a9e6cf6ccbd64bff9bc2253df40cdc2f79a5 popt-1.18.tar.gz" +sha512sums="5d1b6a15337e4cd5991817c1957f97fc4ed98659870017c08f26f754e34add31d639d55ee77ca31f29bb631c0b53368c1893bd96cf76422d257f7997a11f6466 popt-1.19.tar.gz" -- GitLab From 59f50eb4aea3e8c0ba74414ad1a1f09795d80e03 Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Sat, 1 Apr 2023 22:09:48 -0500 Subject: [PATCH 35/47] system/psmisc: Update to 23.6 --- system/psmisc/APKBUILD | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/system/psmisc/APKBUILD b/system/psmisc/APKBUILD index 4e0cb4c443..b809690303 100644 --- a/system/psmisc/APKBUILD +++ b/system/psmisc/APKBUILD @@ -1,7 +1,7 @@ # Contributor: A. Wilcox <awilfox@adelielinux.org> # Maintainer: Zach van Rijn <me@zv.io> pkgname=psmisc -pkgver=23.4 +pkgver=23.6 pkgrel=0 pkgdesc="Miscellaneous utilities that use the proc filesystem" url="https://gitlab.com/psmisc/psmisc" @@ -49,5 +49,5 @@ package() { make DESTDIR="$pkgdir" install } -sha512sums="3632cb12d2de604974229f9e4707cde4db467fafff326e76e510251fe262fa4f1b1edd2d46443733996a17af6a57daf46ae98a6088a829f4ae52222da5e9963a psmisc-23.4.tar.bz2 +sha512sums="17ee04c2ce8bd5107b583069853dbf296ecbbf5b3bfb395d02e35691212de453e8b8cae15666a61a3041487cc0e4d1a6e7fbe105afc3a0114bd5b19682efa17a psmisc-23.6.tar.bz2 a910611896368a088503f50a04a1c2af00d57ee20f3613e81c79cd89574805a505dff43e356ed833a464e3b59d7c1e11fd52cf0bbf32fcfece4dbd2380f23b71 fix-peekfd-on-ppc.patch" -- GitLab From 3829532d512ff2a90cf3951c15b0f4adc4660c0a Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Sat, 1 Apr 2023 22:14:13 -0500 Subject: [PATCH 36/47] system/python3: Update to 3.11.2 --- system/python3/APKBUILD | 10 ++- system/python3/CVE-2022-45061.patch | 95 -------------------------- system/python3/fix-xattrs-glibc.patch | 4 +- system/python3/musl-find_library.patch | 4 +- 4 files changed, 8 insertions(+), 105 deletions(-) delete mode 100644 system/python3/CVE-2022-45061.patch diff --git a/system/python3/APKBUILD b/system/python3/APKBUILD index bf4fc4cc71..fcd6bd2efa 100644 --- a/system/python3/APKBUILD +++ b/system/python3/APKBUILD @@ -1,7 +1,7 @@ # Contributor: SÃle Ekaterin Liszka <sheila@vulpine.house> # Maintainer: A. Wilcox <awilfox@adelielinux.org> pkgname=python3 -pkgver=3.11.0 +pkgver=3.11.2 _basever="${pkgver%.*}" pkgrel=0 pkgdesc="A high-level scripting language" @@ -40,7 +40,6 @@ source="https://www.python.org/ftp/python/$pkgver/Python-$pkgver.tar.xz musl-find_library.patch musl-has-login_tty.patch fix-xattrs-glibc.patch - CVE-2022-45061.patch " builddir="$srcdir/Python-$pkgver" @@ -191,8 +190,7 @@ tests() { "$subpkgdir"/usr/lib/python$_basever/ } -sha512sums="314eef88ae0d68760f34d7a32f238fd2ecb27c50963baa7357c42ad8159026ec50229a0b31d83c39710a472904a06422afc082f9658a90a1dc83ccb74c08039d Python-3.11.0.tar.xz -ab8eaa2858d5109049b1f9f553198d40e0ef8d78211ad6455f7b491af525bffb16738fed60fc84e960c4889568d25753b9e4a1494834fea48291b33f07000ec2 musl-find_library.patch +sha512sums="5684ec7eae2dce26facc54d448ccdb6901bbfa1cab03abbe8fd34e4268a2b701daa13df15903349492447035be78380d473389e8703b4e910a65b088d2462e8b Python-3.11.2.tar.xz +df1c7096a7744c94312ee6cacdd54345e384bcdf2a17148163f5f4c70f0cfa80301efbcbb2398306401ec53106e5c6922ba582a7df226e718cedb53396cc4786 musl-find_library.patch 75c60afecba2e57f11d58c20aadc611ebbb5c68e05b14415c5cf2f7aa75e103986764ca22f76e6a58b2c08e2ff3acffdbf6d85d2c8c4589743a0b949a4c90687 musl-has-login_tty.patch -4b4696d139e53aad184b72461478821335aadedc4811ec9e96cdea9a4f7ef19ebf0aac8c6afae6345f33c79fbd3ae2c63021de36044a2803d0dc8894fa291cf5 fix-xattrs-glibc.patch -039982b5f35d5aa412596dba81b0666fdf979e6c120aefa3ae29333fbaa56f6f6ad69db513dcd93e06a66522405058be2e39e56350816abcb9febd8f5778036f CVE-2022-45061.patch" +6e587012c59e276e2e309ee348059d76707543fdda48ed53ca374cc9d9ca3499ecdf644cee59ec23b18f101e8750cc973413ebd59c7f242f40aeefb69e75a374 fix-xattrs-glibc.patch" diff --git a/system/python3/CVE-2022-45061.patch b/system/python3/CVE-2022-45061.patch deleted file mode 100644 index 0b6faa7cbf..0000000000 --- a/system/python3/CVE-2022-45061.patch +++ /dev/null @@ -1,95 +0,0 @@ -From b8f89940de09a51fdbd8fe4705d3d1d7f1bb0c6a Mon Sep 17 00:00:00 2001 -From: "Miss Islington (bot)" - <31488909+miss-islington@users.noreply.github.com> -Date: Mon, 7 Nov 2022 18:57:10 -0800 -Subject: [PATCH] [3.11] gh-98433: Fix quadratic time idna decoding. (GH-99092) - (GH-99222) - -There was an unnecessary quadratic loop in idna decoding. This restores -the behavior to linear. - -(cherry picked from commit d315722564927c7202dd6e111dc79eaf14240b0d) - -(cherry picked from commit a6f6c3a3d6f2b580f2d87885c9b8a9350ad7bf15) - -Co-authored-by: Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> -Co-authored-by: Gregory P. Smith <greg@krypto.org> ---- - Lib/encodings/idna.py | 32 +++++++++---------- - Lib/test/test_codecs.py | 6 ++++ - ...2-11-04-09-29-36.gh-issue-98433.l76c5G.rst | 6 ++++ - 3 files changed, 27 insertions(+), 17 deletions(-) - create mode 100644 Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst - -diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py -index ea4058512fe3..bf98f513366b 100644 ---- a/Lib/encodings/idna.py -+++ b/Lib/encodings/idna.py -@@ -39,23 +39,21 @@ def nameprep(label): - - # Check bidi - RandAL = [stringprep.in_table_d1(x) for x in label] -- for c in RandAL: -- if c: -- # There is a RandAL char in the string. Must perform further -- # tests: -- # 1) The characters in section 5.8 MUST be prohibited. -- # This is table C.8, which was already checked -- # 2) If a string contains any RandALCat character, the string -- # MUST NOT contain any LCat character. -- if any(stringprep.in_table_d2(x) for x in label): -- raise UnicodeError("Violation of BIDI requirement 2") -- -- # 3) If a string contains any RandALCat character, a -- # RandALCat character MUST be the first character of the -- # string, and a RandALCat character MUST be the last -- # character of the string. -- if not RandAL[0] or not RandAL[-1]: -- raise UnicodeError("Violation of BIDI requirement 3") -+ if any(RandAL): -+ # There is a RandAL char in the string. Must perform further -+ # tests: -+ # 1) The characters in section 5.8 MUST be prohibited. -+ # This is table C.8, which was already checked -+ # 2) If a string contains any RandALCat character, the string -+ # MUST NOT contain any LCat character. -+ if any(stringprep.in_table_d2(x) for x in label): -+ raise UnicodeError("Violation of BIDI requirement 2") -+ # 3) If a string contains any RandALCat character, a -+ # RandALCat character MUST be the first character of the -+ # string, and a RandALCat character MUST be the last -+ # character of the string. -+ if not RandAL[0] or not RandAL[-1]: -+ raise UnicodeError("Violation of BIDI requirement 3") - - return label - -diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py -index 8edd5ac0633e..240756726133 100644 ---- a/Lib/test/test_codecs.py -+++ b/Lib/test/test_codecs.py -@@ -1535,6 +1535,12 @@ def test_builtin_encode(self): - self.assertEqual("pyth\xf6n.org".encode("idna"), b"xn--pythn-mua.org") - self.assertEqual("pyth\xf6n.org.".encode("idna"), b"xn--pythn-mua.org.") - -+ def test_builtin_decode_length_limit(self): -+ with self.assertRaisesRegex(UnicodeError, "too long"): -+ (b"xn--016c"+b"a"*1100).decode("idna") -+ with self.assertRaisesRegex(UnicodeError, "too long"): -+ (b"xn--016c"+b"a"*70).decode("idna") -+ - def test_stream(self): - r = codecs.getreader("idna")(io.BytesIO(b"abc")) - r.read(3) -diff --git a/Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst b/Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst -new file mode 100644 -index 000000000000..5185fac2e29d ---- /dev/null -+++ b/Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst -@@ -0,0 +1,6 @@ -+The IDNA codec decoder used on DNS hostnames by :mod:`socket` or :mod:`asyncio` -+related name resolution functions no longer involves a quadratic algorithm. -+This prevents a potential CPU denial of service if an out-of-spec excessive -+length hostname involving bidirectional characters were decoded. Some protocols -+such as :mod:`urllib` http ``3xx`` redirects potentially allow for an attacker -+to supply such a name. diff --git a/system/python3/fix-xattrs-glibc.patch b/system/python3/fix-xattrs-glibc.patch index d784281083..da0c6e3cb3 100644 --- a/system/python3/fix-xattrs-glibc.patch +++ b/system/python3/fix-xattrs-glibc.patch @@ -1,14 +1,14 @@ diff -ur a/Modules/posixmodule.c b/Modules/posixmodule.c --- a/Modules/posixmodule.c 2022-01-11 11:45:55.120000000 +0000 +++ b/Modules/posixmodule.c 2022-01-11 11:47:28.010000000 +0000 -@@ -247,8 +247,9 @@ +@@ -272,8 +272,9 @@ # undef HAVE_SCHED_SETAFFINITY #endif -#if defined(HAVE_SYS_XATTR_H) && defined(__GLIBC__) && !defined(__FreeBSD_kernel__) && !defined(__GNU__) +#if defined(HAVE_SYS_XATTR_H) && defined(__linux__) && !defined(__FreeBSD_kernel__) && !defined(__GNU__) # define USE_XATTRS -+#include <linux/limits.h> ++# include <linux/limits.h> #endif #ifdef USE_XATTRS diff --git a/system/python3/musl-find_library.patch b/system/python3/musl-find_library.patch index 7899abb736..6181ede0a7 100644 --- a/system/python3/musl-find_library.patch +++ b/system/python3/musl-find_library.patch @@ -1,13 +1,13 @@ diff -ru Python-2.7.12.orig/Lib/ctypes/util.py Python-2.7.12/Lib/ctypes/util.py --- Python-2.7.12.orig/Lib/ctypes/util.py 2016-06-26 00:49:30.000000000 +0300 +++ Python-2.7.12/Lib/ctypes/util.py 2016-11-03 16:05:46.954665040 +0200 -@@ -204,6 +204,41 @@ +@@ -265,6 +265,41 @@ def find_library(name, is64 = False): return _get_soname(_findLib_crle(name, is64) or _findLib_gcc(name)) + elif True: + -+ # Patched for Alpine Linux / musl - search manually system paths ++ # Patched for musl to search manually system paths + def _is_elf(filepath): + try: + with open(filepath, 'rb') as fh: -- GitLab From 5d4bb161ef016f6e00a50e1dd0a0d8d090845b45 Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Sat, 1 Apr 2023 22:17:46 -0500 Subject: [PATCH 37/47] system/rhash: Update URL, update to 1.4.3 --- system/rhash/APKBUILD | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/system/rhash/APKBUILD b/system/rhash/APKBUILD index 5438b35cda..c587565094 100644 --- a/system/rhash/APKBUILD +++ b/system/rhash/APKBUILD @@ -1,10 +1,10 @@ # Contributor: Przemyslaw Pawelczyk <przemoc@zoho.com> # Maintainer: A. Wilcox <awilfox@adelielinux.org> pkgname=rhash -pkgver=1.4.2 +pkgver=1.4.3 pkgrel=0 pkgdesc="Utility for calculation and verification of hash sums and magnet links" -url="http://rhash.sourceforge.net/" +url="https://rhash.sourceforge.net/" arch="all" license="MIT" depends="" @@ -31,4 +31,4 @@ package() { make -j1 DESTDIR="$pkgdir" install install-gmo install-lib-headers install-lib-so-link install-man } -sha512sums="41df57e8b3f32c93d8e6f2ac668b32aaa23eb2eaf90a83f109e61e511404a5036ea88bcf2854e19c1ade0f61960e0d9edf01f3d82e1c645fed36579e9d7a6a25 rhash-1.4.2.tar.gz" +sha512sums="d87ffcde28d8f25cf775c279fed457e52d24523ed9b695629dae694b3c22372247d18f6032f8ce13a0b70fa2953be408982e46659daaa7c4ab227ae89eaed9c7 rhash-1.4.3.tar.gz" -- GitLab From 1683764b9386e8e898fc2e6d28333c3cea235bd3 Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Sat, 1 Apr 2023 22:25:22 -0500 Subject: [PATCH 38/47] system/ruby: Update to 3.1.4 --- system/ruby/APKBUILD | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/system/ruby/APKBUILD b/system/ruby/APKBUILD index de9c267174..db2e2a3c7b 100644 --- a/system/ruby/APKBUILD +++ b/system/ruby/APKBUILD @@ -42,7 +42,7 @@ # - CVE-2020-8130 # pkgname=ruby -pkgver=3.1.2 +pkgver=3.1.4 _abiver="${pkgver%.*}.0" pkgrel=0 pkgdesc="An object-oriented language for quick and easy programming" @@ -275,7 +275,7 @@ _mvgem() { done } -sha512sums="4a74e9efc6ea4b3eff4fec7534eb1fff4794d021531defc2e9937e53c6668db8ecdc0fff2bc23d5e6602d0df344a2caa85b31c5414309541e3d5313ec82b6e21 ruby-3.1.2.tar.xz +sha512sums="a627bb629a10750b8b2081ad451a41faea0fc85d95aa1e267e3d2a0f56a35bb58195d4a8d13bbdbd82f4197a96dae22b1cee1dfc83861ec33a67ece07aef5633 ruby-3.1.4.tar.xz a142199140fa711a64717429e9069fd2082319abaf4b129f561db374b3bc16e2a90cc4c849b5d28334505d1c71fed242aef3c44d983da3513d239dcb778673a5 rubygems-avoid-platform-specific-gems.patch 814fe6359505b70d8ff680adf22f20a74b4dbd3fecc9a63a6c2456ee9824257815929917b6df5394ed069a6869511b8c6dce5b95b4acbbb7867c1f3a975a0150 test_insns-lower-recursion-depth.patch 3ffc034c01110ee5531265333ca5ee8d61d08131843fe3004c5b34c88c9c1b32cb4ed89574f393177c8bd526e9c15da61ab344f93adf07b9148c561ee19e2eb5 fix-get_main_stack.patch -- GitLab From f1b3071936ba0eb90f57db55c237babff7b0c6e3 Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Sat, 1 Apr 2023 22:35:58 -0500 Subject: [PATCH 39/47] system/shadow: Update to 4.13 Dots in usernames made it upstream. --- system/shadow/APKBUILD | 8 +++----- system/shadow/dots-in-usernames.patch | 11 ----------- 2 files changed, 3 insertions(+), 16 deletions(-) delete mode 100644 system/shadow/dots-in-usernames.patch diff --git a/system/shadow/APKBUILD b/system/shadow/APKBUILD index 12ec92b6f5..17418afc7b 100644 --- a/system/shadow/APKBUILD +++ b/system/shadow/APKBUILD @@ -2,7 +2,7 @@ # Contributor: Jakub Jirutka <jakub@jirutka.cz> # Maintainer: A. Wilcox <awilfox@adelielinux.org> pkgname=shadow -pkgver=4.11.1 +pkgver=4.13 pkgrel=0 pkgdesc="Login and password management utilities" url="https://github.com/shadow-maint/shadow" @@ -13,9 +13,8 @@ depends="" makedepends_build="docbook-xml docbook-xsl itstool libxslt" makedepends_host="linux-pam-dev utmps-dev" subpackages="$pkgname-doc $pkgname-dbg $pkgname-lang $pkgname-uidmap" -source="https://github.com/shadow-maint/shadow/releases/download/v$pkgver/shadow-$pkgver.tar.xz +source="https://github.com/shadow-maint/shadow/releases/download/$pkgver/shadow-$pkgver.tar.xz login.pamd - dots-in-usernames.patch useradd-defaults.patch pam-useradd.patch " @@ -98,8 +97,7 @@ uidmap() { touch "$subpkgdir"/etc/subuid "$subpkgdir"/etc/subgid } -sha512sums="12fbe4d6ac929ad3c21525ed0f1026b5b678ccec9762f2ec7e611d9c180934def506325f2835fb750dd30af035b592f827ff151cd6e4c805aaaf8e01425c279f shadow-4.11.1.tar.xz +sha512sums="2949a728c3312bef13d23138d6b79caf402781b1cb179e33b5be546c1790971ec20778d0e9cd3dbe09691d928ffcbe88e60da42fab58c69a90d5ebe5e3e2ab8e shadow-4.13.tar.xz 46a6f83f3698e101b58b8682852da749619412f75dfa85cecad03d0847f6c3dc452d984510db7094220e4570a0565b83b0556e16198ad894a3ec84b3e513d58d login.pamd -6492e8d07a595cbefedeb9009d39eef48195bc2e8caeb523827b83f243d872c912df95a9479d6fc274ff105d2902ebc5c74bdec04f82e7c0d1340d0ae8607f73 dots-in-usernames.patch fb6b19b05a58f5200d4ad39c7393735ef8e0db853c50ba2ad29efb7bdd871bc8659f6845c9cc0660ce12372dbc78b7df9cfd8a06f6913036d905e5372f2ed99f useradd-defaults.patch 0b4587e263cb6be12fa5ae6bc3b3fc4d3696dae355bc67d085dc58c52ff96edb4d163b95db2092b8c2f3310839430cac03c7af356641b42e24ee4aa6410f5cf1 pam-useradd.patch" diff --git a/system/shadow/dots-in-usernames.patch b/system/shadow/dots-in-usernames.patch deleted file mode 100644 index 17ddab151f..0000000000 --- a/system/shadow/dots-in-usernames.patch +++ /dev/null @@ -1,11 +0,0 @@ ---- shadow-4.1.3/libmisc/chkname.c -+++ shadow-4.1.3/libmisc/chkname.c -@@ -45,6 +45,7 @@ - ( ('0' <= *name) && ('9' >= *name) ) || - ('_' == *name) || - ('-' == *name) || -+ ('.' == *name) || - ( ('$' == *name) && ('\0' == *(name + 1)) ) - )) { - return false; - -- GitLab From 7ad68e54dd709208b507811df7310dc4435b18cd Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Sat, 1 Apr 2023 22:48:55 -0500 Subject: [PATCH 40/47] system/tcl: Update to 8.6.13 --- system/tcl/APKBUILD | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/system/tcl/APKBUILD b/system/tcl/APKBUILD index 9a22461a72..7b4097e9b8 100644 --- a/system/tcl/APKBUILD +++ b/system/tcl/APKBUILD @@ -1,10 +1,10 @@ # Contributor: Sören Tempel <soeren+alpine@soeren-tempel.net> # Maintainer: Zach van Rijn <me@zv.io> pkgname=tcl -pkgver=8.6.12 +pkgver=8.6.13 pkgrel=0 pkgdesc="The Tcl scripting language" -url="http://tcl.sourceforge.net/" +url="https://tcl.sourceforge.net/" arch="all" license="TCL" depends="" @@ -72,5 +72,5 @@ dev() { done } -sha512sums="15def824484309fff6831b436e33d91ab1c6b095178f427d1f58b9a04e5e676b18dfdf1d225c6ab9ec15dc233358c40789edc7daf91c5908a1837e9f337feb60 tcl8.6.12-src.tar.gz +sha512sums="b597f6b62fd71457e96445401a4f8aa662c2678de8a52127c60b0abddebf1fd4452ba5364420902a15b263c0118fc91167550fd1ad2d24fa4ab2204e372c027e tcl8.6.13-src.tar.gz cd52cbe80fd2be227c9018dbe38fa0888302293402e7a57f2e231c195b7c1902f1b58bc87d19c9d123915ca757e871abf084c0ef23b1a7f187bc03ba93904cc2 tcl-stat64.patch" -- GitLab From dd93879d4e771e3c2d1264563a125182d005d9cd Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Sat, 1 Apr 2023 23:00:37 -0500 Subject: [PATCH 41/47] system/texinfo: Update to 7.0.3 --- system/texinfo/APKBUILD | 11 ++++------- .../disable-layout_formatting_plaintext-test.patch | 11 ----------- 2 files changed, 4 insertions(+), 18 deletions(-) delete mode 100644 system/texinfo/disable-layout_formatting_plaintext-test.patch diff --git a/system/texinfo/APKBUILD b/system/texinfo/APKBUILD index da8723f615..e32cf71d66 100644 --- a/system/texinfo/APKBUILD +++ b/system/texinfo/APKBUILD @@ -1,7 +1,7 @@ # Maintainer: A. Wilcox <awilfox@adelielinux.org> pkgname=texinfo -pkgver=6.8 -pkgrel=1 +pkgver=7.0.3 +pkgrel=0 pkgdesc="Utilities to manage on-line documentation" url="https://www.gnu.org/software/texinfo/" arch="all" @@ -9,9 +9,7 @@ license="GPL-3.0+" depends="perl" makedepends="ncurses-dev perl-dev" subpackages="$pkgname-doc $pkgname-lang" -source="https://ftp.gnu.org/gnu/$pkgname/$pkgname-$pkgver.tar.xz - disable-layout_formatting_plaintext-test.patch - " +source="https://ftp.gnu.org/gnu/$pkgname/$pkgname-$pkgver.tar.xz" build() { ./configure \ @@ -32,5 +30,4 @@ package() { gzip "$pkgdir"/usr/share/info/* } -sha512sums="0ff9290b14e4d83e32b889cfa24e6d065f98b2a764daf6b92c6c895fddbb35258398da6257c113220d5a4d886f7b54b09c4b117ca5eacfee6797f9bffde0f909 texinfo-6.8.tar.xz -d35b98e57c8a16041dd64c73bfe63775637003c0cb4bbf28e5e4757d3cddf8197c2cae84a83e71d8784b3e43573f683e822f9d8d8c3ad49fce8ad9d1adc53143 disable-layout_formatting_plaintext-test.patch" +sha512sums="7d14f7458f2b7d0ee0b740e00a5fc2a9d61d33811aa5905d649875ec518dcb4f01be46fb0c46748f7dfe36950597a852f1473ab0648d5add225bc8f35528a8ff texinfo-7.0.3.tar.xz" diff --git a/system/texinfo/disable-layout_formatting_plaintext-test.patch b/system/texinfo/disable-layout_formatting_plaintext-test.patch deleted file mode 100644 index 7840cac131..0000000000 --- a/system/texinfo/disable-layout_formatting_plaintext-test.patch +++ /dev/null @@ -1,11 +0,0 @@ -diff -ur a/tp/tests/Makefile.in b/tp/tests/Makefile.in ---- a/tp/tests/Makefile.in 2022-04-23 15:20:38.150000000 +0000 -+++ b/tp/tests/Makefile.in 2022-04-23 15:20:58.450000000 +0000 -@@ -1618,7 +1618,6 @@ - test_scripts/layout_formatting_weird_quotes.sh \ - test_scripts/layout_formatting_html.sh \ - test_scripts/layout_formatting_html_nodes.sh \ -- test_scripts/layout_formatting_plaintext.sh \ - test_scripts/layout_formatting_mathjax.sh \ - test_scripts/layout_formatting_html32.sh \ - test_scripts/layout_formatting_regions.sh \ -- GitLab From e1f278718c4907f332e90fb8228b45731d726ee5 Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Sat, 1 Apr 2023 23:16:08 -0500 Subject: [PATCH 42/47] system/tzdata: Update to 2023c --- system/tzdata/APKBUILD | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/system/tzdata/APKBUILD b/system/tzdata/APKBUILD index 7916dd980a..de3eb0a51c 100644 --- a/system/tzdata/APKBUILD +++ b/system/tzdata/APKBUILD @@ -2,7 +2,7 @@ # Contributor: Natanael Copa <ncopa@alpinelinux.org> # Maintainer: A. Wilcox <awilfox@adelielinux.org> pkgname=tzdata -pkgver=2022a +pkgver=2023c pkgrel=0 pkgdesc="Time zone data" url="https://www.iana.org/time-zones" @@ -47,5 +47,5 @@ package() { rm -f "$pkgdir"/usr/share/zoneinfo/localtime } -sha512sums="3f047a6f414ae3df4a3d6bb9b39a1790833d191ae48e6320ab9438cd326dc455475186a02c44e4cde96b48101ab000880919b1e0e8092aed7254443ed2c831ed tzcode2022a.tar.gz -542e4559beac8fd8c4af7d08d816fd12cfe7ffcb6f20bba4ff1c20eba717749ef96e5cf599b2fe03b5b8469c0467f8cb1c893008160da281055a123dd9e810d9 tzdata2022a.tar.gz" +sha512sums="fa18bae9c0e7c061bc9d5f5f2eb9967f0e4ddb2baafdee9887fa30cd0c60f4aa6f21eacffb17df0d59d26ff54d08c5dcefa98159309eba497e86443624913a82 tzcode2023c.tar.gz +608bd286ebcbd0004cfdc1da183273f08aff61f90c8867661154453d77a05d421e4c46ad6d066a1fe2e87d5c82ec0f1c0224667a3b35f3180a3eb7f6ff84cbf5 tzdata2023c.tar.gz" -- GitLab From a3cde59dd68168f1ff68b5cd28b7267ebe7e9611 Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Sat, 1 Apr 2023 23:22:53 -0500 Subject: [PATCH 43/47] system/util-linux: Update to 2.38.1 --- system/util-linux/APKBUILD | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/system/util-linux/APKBUILD b/system/util-linux/APKBUILD index fb90e6f02e..2cda4fc68e 100644 --- a/system/util-linux/APKBUILD +++ b/system/util-linux/APKBUILD @@ -2,7 +2,7 @@ # Contributor: Leonardo Arena <rnalrd@alpinelinux.org> # Maintainer: A. Wilcox <awilfox@adelielinux.org> pkgname=util-linux -pkgver=2.38 +pkgver=2.38.1 case $pkgver in *.*.*) _v=${pkgver%.*};; @@ -98,5 +98,5 @@ libmount() { mv "$pkgdir"/lib/libmount.so.* "$subpkgdir"/lib/ } -sha512sums="d0f7888f457592067938e216695871ce6475a45d83a092cc3fd72b8cf8fca145ca5f3a99122f1744ef60b4f773055cf4e178dc6c59cd30837172aee0b5597e8c util-linux-2.38.tar.xz +sha512sums="07f11147f67dfc6c8bc766dfc83266054e6ede776feada0566b447d13276b6882ee85c6fe53e8d94a17c03332106fc0549deca3cf5f2e92dda554e9bc0551957 util-linux-2.38.1.tar.xz 876bb9041eca1b2cca1e9aac898f282db576f7860aba690a95c0ac629d7c5b2cdeccba504dda87ff55c2a10b67165985ce16ca41a0694a267507e1e0cafd46d9 ttydefaults.h" -- GitLab From 1ceea34d30867baa8db6278ac76cc076729605b6 Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Sat, 1 Apr 2023 23:42:45 -0500 Subject: [PATCH 44/47] system/xz: Update to 5.4.2 --- system/xz/APKBUILD | 12 +-- .../CVE-2022-1271-xzgrep-ZDI-CAN-16587.patch | 94 ------------------- system/xz/dont-use-libdir-for-pkgconfig.patch | 8 +- 3 files changed, 8 insertions(+), 106 deletions(-) delete mode 100644 system/xz/CVE-2022-1271-xzgrep-ZDI-CAN-16587.patch diff --git a/system/xz/APKBUILD b/system/xz/APKBUILD index 7b8861fca5..0b52b0f85b 100644 --- a/system/xz/APKBUILD +++ b/system/xz/APKBUILD @@ -1,8 +1,8 @@ # Contributor: Sören Tempel <soeren+alpine@soeren-tempel.net> # Maintainer: Dan Theisen <djt@hxx.in> pkgname=xz -pkgver=5.2.5 -pkgrel=1 +pkgver=5.4.2 +pkgrel=0 pkgdesc="Library and command line tools for XZ and LZMA compressed files" url="https://tukaani.org/xz/" arch="all" @@ -12,7 +12,6 @@ makedepends="" subpackages="$pkgname-doc $pkgname-dev $pkgname-lang $pkgname-libs" source="https://tukaani.org/xz/xz-$pkgver.tar.gz dont-use-libdir-for-pkgconfig.patch - CVE-2022-1271-xzgrep-ZDI-CAN-16587.patch " build() { @@ -30,8 +29,6 @@ build() { sed 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=""|g' \ -i libtool - sed 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|g' \ - -i libtool make } @@ -46,6 +43,5 @@ package() { "$pkgdir"/usr/share/licenses/$pkgname } -sha512sums="7443674247deda2935220fbc4dfc7665e5bb5a260be8ad858c8bd7d7b9f0f868f04ea45e62eb17c0a5e6a2de7c7500ad2d201e2d668c48ca29bd9eea5a73a3ce xz-5.2.5.tar.gz -9310ae2568dd6ac474e3cb9895e1339ca2dbe8834f856edbb7d2264c0019bde4bbd94aa1edd34e5c8d0aed1f35a1877b0e053ed08a270835ea81e59c7be5edb3 dont-use-libdir-for-pkgconfig.patch -52b16268e333399444f433a11ccf3a9b020a6914ed23fc8e082128fec596011d7c6863d47414d4c0f245d20ebed4b3a50b422599b4b88d66f6c6eb2e74b9a939 CVE-2022-1271-xzgrep-ZDI-CAN-16587.patch" +sha512sums="149f980338bea3d66de1ff5994b2b236ae1773135eda68b62b009df0c9dcdf5467f8cb2c06da95a71b6556d60bd3d21f475feced34d5dfdb80ee95416a2f9737 xz-5.4.2.tar.gz +54bbe1f8aae954d2550941f69a509e210d0f6bee2393494dcf445a14d14046953c125177b4cc9fa79ec55b81379dfe4ae0187f106abd2f3cc4331782a5c0b4fd dont-use-libdir-for-pkgconfig.patch" diff --git a/system/xz/CVE-2022-1271-xzgrep-ZDI-CAN-16587.patch b/system/xz/CVE-2022-1271-xzgrep-ZDI-CAN-16587.patch deleted file mode 100644 index 406ded5903..0000000000 --- a/system/xz/CVE-2022-1271-xzgrep-ZDI-CAN-16587.patch +++ /dev/null @@ -1,94 +0,0 @@ -From 69d1b3fc29677af8ade8dc15dba83f0589cb63d6 Mon Sep 17 00:00:00 2001 -From: Lasse Collin <lasse.collin@tukaani.org> -Date: Tue, 29 Mar 2022 19:19:12 +0300 -Subject: [PATCH] xzgrep: Fix escaping of malicious filenames (ZDI-CAN-16587). - -Malicious filenames can make xzgrep to write to arbitrary files -or (with a GNU sed extension) lead to arbitrary code execution. - -xzgrep from XZ Utils versions up to and including 5.2.5 are -affected. 5.3.1alpha and 5.3.2alpha are affected as well. -This patch works for all of them. - -This bug was inherited from gzip's zgrep. gzip 1.12 includes -a fix for zgrep. - -The issue with the old sed script is that with multiple newlines, -the N-command will read the second line of input, then the -s-commands will be skipped because it's not the end of the -file yet, then a new sed cycle starts and the pattern space -is printed and emptied. So only the last line or two get escaped. - -One way to fix this would be to read all lines into the pattern -space first. However, the included fix is even simpler: All lines -except the last line get a backslash appended at the end. To ensure -that shell command substitution doesn't eat a possible trailing -newline, a colon is appended to the filename before escaping. -The colon is later used to separate the filename from the grep -output so it is fine to add it here instead of a few lines later. - -The old code also wasn't POSIX compliant as it used \n in the -replacement section of the s-command. Using \<newline> is the -POSIX compatible method. - -LC_ALL=C was added to the two critical sed commands. POSIX sed -manual recommends it when using sed to manipulate pathnames -because in other locales invalid multibyte sequences might -cause issues with some sed implementations. In case of GNU sed, -these particular sed scripts wouldn't have such problems but some -other scripts could have, see: - - info '(sed)Locale Considerations' - -This vulnerability was discovered by: -cleemy desu wayo working with Trend Micro Zero Day Initiative - -Thanks to Jim Meyering and Paul Eggert discussing the different -ways to fix this and for coordinating the patch release schedule -with gzip. ---- - src/scripts/xzgrep.in | 20 ++++++++++++-------- - 1 file changed, 12 insertions(+), 8 deletions(-) - -diff --git a/src/scripts/xzgrep.in b/src/scripts/xzgrep.in -index b180936..e5186ba 100644 ---- a/src/scripts/xzgrep.in -+++ b/src/scripts/xzgrep.in -@@ -180,22 +180,26 @@ for i; do - { test $# -eq 1 || test $no_filename -eq 1; }; then - eval "$grep" - else -+ # Append a colon so that the last character will never be a newline -+ # which would otherwise get lost in shell command substitution. -+ i="$i:" -+ -+ # Escape & \ | and newlines only if such characters are present -+ # (speed optimization). - case $i in - (*' - '* | *'&'* | *'\'* | *'|'*) -- i=$(printf '%s\n' "$i" | -- sed ' -- $!N -- $s/[&\|]/\\&/g -- $s/\n/\\n/g -- ');; -+ i=$(printf '%s\n' "$i" | LC_ALL=C sed 's/[&\|]/\\&/g; $!s/$/\\/');; - esac -- sed_script="s|^|$i:|" -+ -+ # $i already ends with a colon so don't add it here. -+ sed_script="s|^|$i|" - - # Fail if grep or sed fails. - r=$( - exec 4>&1 -- (eval "$grep" 4>&-; echo $? >&4) 3>&- | sed "$sed_script" >&3 4>&- -+ (eval "$grep" 4>&-; echo $? >&4) 3>&- | -+ LC_ALL=C sed "$sed_script" >&3 4>&- - ) || r=2 - exit $r - fi >&3 5>&- --- -2.35.1 - diff --git a/system/xz/dont-use-libdir-for-pkgconfig.patch b/system/xz/dont-use-libdir-for-pkgconfig.patch index dbf4d59276..c6b1c480c1 100644 --- a/system/xz/dont-use-libdir-for-pkgconfig.patch +++ b/system/xz/dont-use-libdir-for-pkgconfig.patch @@ -1,6 +1,6 @@ --- xz-5.2.4/src/liblzma/Makefile.am.old 2018-04-29 15:16:04.000000000 +0000 +++ xz-5.2.4/src/liblzma/Makefile.am 2018-07-19 02:18:19.010000000 +0000 -@@ -97,7 +97,7 @@ +@@ -101,7 +101,7 @@ ## pkg-config @@ -11,10 +11,10 @@ --- xz-5.2.4/src/liblzma/Makefile.in.old 2018-04-29 16:01:26.000000000 +0000 +++ xz-5.2.4/src/liblzma/Makefile.in 2018-07-19 02:18:33.600000000 +0000 -@@ -752,7 +752,7 @@ +@@ -879,7 +879,7 @@ - liblzma_la_LDFLAGS = -no-undefined -version-info 7:4:2 $(am__append_1) \ - $(am__append_42) + liblzma_la_LDFLAGS = -no-undefined -version-info 9:2:4 $(am__append_1) \ + $(am__append_2) $(am__append_48) -pkgconfigdir = $(libdir)/pkgconfig +pkgconfigdir = /usr/lib/pkgconfig pkgconfig_DATA = liblzma.pc -- GitLab From 6023efd9683dd9141a75652bdb2e920053211e82 Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Sat, 1 Apr 2023 23:45:45 -0500 Subject: [PATCH 45/47] system/zstd: Update to 1.5.4 --- system/zstd/APKBUILD | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/system/zstd/APKBUILD b/system/zstd/APKBUILD index 93fb1ab1a8..6ed1b2fd37 100644 --- a/system/zstd/APKBUILD +++ b/system/zstd/APKBUILD @@ -1,7 +1,7 @@ # Contributor: A. Wilcox <awilfox@adelielinux.org> # Maintainer: A. Wilcox <awilfox@adelielinux.org> pkgname=zstd -pkgver=1.5.2 +pkgver=1.5.4 pkgrel=0 pkgdesc="Fast real-time compression algorithm" url="https://facebook.github.io/zstd/" @@ -29,4 +29,4 @@ package() { make PREFIX="/usr" DESTDIR="$pkgdir" install } -sha512sums="96dbd2eb6623e3564a0fd36489b61bc3cb27758a584fdc9f064f3985d2e8b5605d7022890d00a6d15464d3cd0707d7e75d8cf6210323782d0af406b90a6d6784 zstd-1.5.2.tar.gz" +sha512sums="2896a6dd6b60cc251720356babcbab6018c874eb2149121b26e28041496fc355a9cb5fd1b39c91558fcfbafb789b3d721264a0f9b5734f893d5f3cdf97016394 zstd-1.5.4.tar.gz" -- GitLab From 8d507b61d2549117fc4ad2e6ff35d35e4062eac4 Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Sun, 2 Apr 2023 17:07:03 -0500 Subject: [PATCH 46/47] system/easy-kernel: Update to 5.15.98-mc4 Also reconfigure all eight architectures for oldconfig. --- ...5.15.76.patch => 0100-linux-5.15.98.patch} | 217921 ++++++++++++--- system/easy-kernel/0500-print-fw-info.patch | 9 +- system/easy-kernel/1000-version.patch | 4 +- system/easy-kernel/APKBUILD | 28 +- system/easy-kernel/config-aarch64 | 15 +- system/easy-kernel/config-armv7 | 1851 +- system/easy-kernel/config-m68k | 110 +- system/easy-kernel/config-pmmx | 134 +- system/easy-kernel/config-ppc | 8 +- system/easy-kernel/config-ppc64 | 120 +- system/easy-kernel/config-sparc64 | 2115 +- system/easy-kernel/config-x86_64 | 145 +- 12 files changed, 175830 insertions(+), 46630 deletions(-) rename system/easy-kernel/{0100-linux-5.15.76.patch => 0100-linux-5.15.98.patch} (77%) diff --git a/system/easy-kernel/0100-linux-5.15.76.patch b/system/easy-kernel/0100-linux-5.15.98.patch similarity index 77% rename from system/easy-kernel/0100-linux-5.15.76.patch rename to system/easy-kernel/0100-linux-5.15.98.patch index 0680486dfe..afb515c077 100644 --- a/system/easy-kernel/0100-linux-5.15.76.patch +++ b/system/easy-kernel/0100-linux-5.15.98.patch @@ -45,6 +45,20 @@ index 308a6756d3bf3..491ead8044888 100644 KernelVersion: 4.2 Contact: linux-iio@vger.kernel.org Description: +diff --git a/Documentation/ABI/testing/sysfs-bus-spi-devices-spi-nor b/Documentation/ABI/testing/sysfs-bus-spi-devices-spi-nor +index d76cd3946434d..e9ef69aef20b1 100644 +--- a/Documentation/ABI/testing/sysfs-bus-spi-devices-spi-nor ++++ b/Documentation/ABI/testing/sysfs-bus-spi-devices-spi-nor +@@ -5,6 +5,9 @@ Contact: linux-mtd@lists.infradead.org + Description: (RO) The JEDEC ID of the SPI NOR flash as reported by the + flash device. + ++ The attribute is not present if the flash doesn't support ++ the "Read JEDEC ID" command (9Fh). This is the case for ++ non-JEDEC compliant flashes. + + What: /sys/bus/spi/devices/.../spi-nor/manufacturer + Date: April 2021 diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index b46ef147616ab..a7362b1096c4d 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -92,6 +106,30 @@ index f627e705e663b..48d41b6696270 100644 ====== ===================== ================================= What: /sys/fs/f2fs/<disk>/ckpt_thread_ioprio +diff --git a/Documentation/ABI/testing/sysfs-kernel-oops_count b/Documentation/ABI/testing/sysfs-kernel-oops_count +new file mode 100644 +index 0000000000000..156cca9dbc960 +--- /dev/null ++++ b/Documentation/ABI/testing/sysfs-kernel-oops_count +@@ -0,0 +1,6 @@ ++What: /sys/kernel/oops_count ++Date: November 2022 ++KernelVersion: 6.2.0 ++Contact: Linux Kernel Hardening List <linux-hardening@vger.kernel.org> ++Description: ++ Shows how many times the system has Oopsed since last boot. +diff --git a/Documentation/ABI/testing/sysfs-kernel-warn_count b/Documentation/ABI/testing/sysfs-kernel-warn_count +new file mode 100644 +index 0000000000000..90a029813717d +--- /dev/null ++++ b/Documentation/ABI/testing/sysfs-kernel-warn_count +@@ -0,0 +1,6 @@ ++What: /sys/kernel/warn_count ++Date: November 2022 ++KernelVersion: 6.2.0 ++Contact: Linux Kernel Hardening List <linux-hardening@vger.kernel.org> ++Description: ++ Shows how many times the system has Warned since last boot. diff --git a/Documentation/accounting/psi.rst b/Documentation/accounting/psi.rst index f2b3439edcc2c..5e40b3f437f90 100644 --- a/Documentation/accounting/psi.rst @@ -195,15 +233,114 @@ index 922c23bb4372a..c07dc0ee860e7 100644 163 char +diff --git a/Documentation/admin-guide/hw-vuln/cross-thread-rsb.rst b/Documentation/admin-guide/hw-vuln/cross-thread-rsb.rst +new file mode 100644 +index 0000000000000..ec6e9f5bcf9e8 +--- /dev/null ++++ b/Documentation/admin-guide/hw-vuln/cross-thread-rsb.rst +@@ -0,0 +1,92 @@ ++ ++.. SPDX-License-Identifier: GPL-2.0 ++ ++Cross-Thread Return Address Predictions ++======================================= ++ ++Certain AMD and Hygon processors are subject to a cross-thread return address ++predictions vulnerability. When running in SMT mode and one sibling thread ++transitions out of C0 state, the other sibling thread could use return target ++predictions from the sibling thread that transitioned out of C0. ++ ++The Spectre v2 mitigations protect the Linux kernel, as it fills the return ++address prediction entries with safe targets when context switching to the idle ++thread. However, KVM does allow a VMM to prevent exiting guest mode when ++transitioning out of C0. This could result in a guest-controlled return target ++being consumed by the sibling thread. ++ ++Affected processors ++------------------- ++ ++The following CPUs are vulnerable: ++ ++ - AMD Family 17h processors ++ - Hygon Family 18h processors ++ ++Related CVEs ++------------ ++ ++The following CVE entry is related to this issue: ++ ++ ============== ======================================= ++ CVE-2022-27672 Cross-Thread Return Address Predictions ++ ============== ======================================= ++ ++Problem ++------- ++ ++Affected SMT-capable processors support 1T and 2T modes of execution when SMT ++is enabled. In 2T mode, both threads in a core are executing code. For the ++processor core to enter 1T mode, it is required that one of the threads ++requests to transition out of the C0 state. This can be communicated with the ++HLT instruction or with an MWAIT instruction that requests non-C0. ++When the thread re-enters the C0 state, the processor transitions back ++to 2T mode, assuming the other thread is also still in C0 state. ++ ++In affected processors, the return address predictor (RAP) is partitioned ++depending on the SMT mode. For instance, in 2T mode each thread uses a private ++16-entry RAP, but in 1T mode, the active thread uses a 32-entry RAP. Upon ++transition between 1T/2T mode, the RAP contents are not modified but the RAP ++pointers (which control the next return target to use for predictions) may ++change. This behavior may result in return targets from one SMT thread being ++used by RET predictions in the sibling thread following a 1T/2T switch. In ++particular, a RET instruction executed immediately after a transition to 1T may ++use a return target from the thread that just became idle. In theory, this ++could lead to information disclosure if the return targets used do not come ++from trustworthy code. ++ ++Attack scenarios ++---------------- ++ ++An attack can be mounted on affected processors by performing a series of CALL ++instructions with targeted return locations and then transitioning out of C0 ++state. ++ ++Mitigation mechanism ++-------------------- ++ ++Before entering idle state, the kernel context switches to the idle thread. The ++context switch fills the RAP entries (referred to as the RSB in Linux) with safe ++targets by performing a sequence of CALL instructions. ++ ++Prevent a guest VM from directly putting the processor into an idle state by ++intercepting HLT and MWAIT instructions. ++ ++Both mitigations are required to fully address this issue. ++ ++Mitigation control on the kernel command line ++--------------------------------------------- ++ ++Use existing Spectre v2 mitigations that will fill the RSB on context switch. ++ ++Mitigation control for KVM - module parameter ++--------------------------------------------- ++ ++By default, the KVM hypervisor mitigates this issue by intercepting guest ++attempts to transition out of C0. A VMM can use the KVM_CAP_X86_DISABLE_EXITS ++capability to override those interceptions, but since this is not common, the ++mitigation that covers this path is not enabled by default. ++ ++The mitigation for the KVM_CAP_X86_DISABLE_EXITS capability can be turned on ++using the boolean module parameter mitigate_smt_rsb, e.g.: ++ kvm.mitigate_smt_rsb=1 diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst -index 8cbc711cda935..4df436e7c4177 100644 +index 8cbc711cda935..e0614760a99e7 100644 --- a/Documentation/admin-guide/hw-vuln/index.rst +++ b/Documentation/admin-guide/hw-vuln/index.rst -@@ -17,3 +17,4 @@ are configurable at compile, boot or run time. +@@ -17,3 +17,5 @@ are configurable at compile, boot or run time. special-register-buffer-data-sampling.rst core-scheduling.rst l1d_flush.rst + processor_mmio_stale_data.rst ++ cross-thread-rsb.rst diff --git a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst new file mode 100644 index 0000000000000..c98fd11907cc8 @@ -812,10 +949,26 @@ index aec2cd2aaea73..19754beb5a4e6 100644 In addition to the architecture-level kernel command line options affecting CPU idle time management, there are parameters affecting individual ``CPUIdle`` diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst -index 426162009ce99..609b891754081 100644 +index 426162009ce99..48b91c485c993 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst -@@ -795,6 +795,7 @@ bit 1 print system memory info +@@ -671,6 +671,15 @@ This is the default behavior. + an oops event is detected. + + ++oops_limit ++========== ++ ++Number of kernel oopses after which the kernel should panic when ++``panic_on_oops`` is not set. Setting this to 0 disables checking ++the count. Setting this to 1 has the same effect as setting ++``panic_on_oops=1``. The default value is 10000. ++ ++ + osrelease, ostype & version + =========================== + +@@ -795,6 +804,7 @@ bit 1 print system memory info bit 2 print timer info bit 3 print locks info if ``CONFIG_LOCKDEP`` is on bit 4 print ftrace buffer @@ -823,7 +976,7 @@ index 426162009ce99..609b891754081 100644 ===== ============================================ So for example to print tasks and memory info on panic, user can:: -@@ -1013,28 +1014,22 @@ This is a directory, with the following entries: +@@ -1013,28 +1023,22 @@ This is a directory, with the following entries: * ``boot_id``: a UUID generated the first time this is retrieved, and unvarying after that; @@ -860,7 +1013,7 @@ index 426162009ce99..609b891754081 100644 randomize_va_space -@@ -1099,7 +1094,7 @@ task_delayacct +@@ -1099,7 +1103,7 @@ task_delayacct =============== Enables/disables task delay accounting (see @@ -869,6 +1022,23 @@ index 426162009ce99..609b891754081 100644 a small amount of overhead in the scheduler but is useful for debugging and performance tuning. It is required by some tools such as iotop. +@@ -1490,6 +1494,16 @@ entry will default to 2 instead of 0. + 2 Unprivileged calls to ``bpf()`` are disabled + = ============================================================= + ++ ++warn_limit ++========== ++ ++Number of kernel warnings after which the kernel should panic when ++``panic_on_warn`` is not set. Setting this to 0 disables checking ++the warning count. Setting this to 1 has the same effect as setting ++``panic_on_warn=1``. The default value is 0. ++ ++ + watchdog + ======== + diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index 4150f74c521a8..5310f398794c1 100644 --- a/Documentation/admin-guide/sysctl/net.rst @@ -1220,6 +1390,67 @@ index 851cb07812173..047fd69e03770 100644 /* CVBS VDAC output port */ port@0 { +diff --git a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml +index 35426fde86106..283a12cd3e144 100644 +--- a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml ++++ b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml +@@ -31,7 +31,7 @@ properties: + - description: Display byte clock + - description: Display byte interface clock + - description: Display pixel clock +- - description: Display escape clock ++ - description: Display core clock + - description: Display AHB clock + - description: Display AXI clock + +@@ -135,8 +135,6 @@ required: + - phy-names + - assigned-clocks + - assigned-clock-parents +- - power-domains +- - operating-points-v2 + - ports + + additionalProperties: false +diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml b/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml +index 4399715953e1a..4dd5eed50506a 100644 +--- a/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml ++++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml +@@ -39,7 +39,6 @@ required: + - compatible + - reg + - reg-names +- - vdds-supply + + unevaluatedProperties: false + +diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml b/Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml +index 064df50e21a5c..23355ac67d3d1 100644 +--- a/Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml ++++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml +@@ -37,7 +37,6 @@ required: + - compatible + - reg + - reg-names +- - vcca-supply + + unevaluatedProperties: false + +diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml b/Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml +index 69eecaa64b187..ddb0ac4c29d44 100644 +--- a/Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml ++++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml +@@ -34,6 +34,10 @@ properties: + vddio-supply: + description: Phandle to vdd-io regulator device node. + ++ qcom,dsi-phy-regulator-ldo-mode: ++ type: boolean ++ description: Indicates if the LDO mode PHY regulator is wanted. ++ + required: + - compatible + - reg diff --git a/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml b/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml index 0cebaaefda032..419c3b2ac5a6f 100644 --- a/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml @@ -1596,6 +1827,400 @@ index c115c95ee584e..5b8db76b6cdd7 100644 - rockchip,px30-gmac - rockchip,rk3128-gmac - rockchip,rk3228-gmac +diff --git a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml +index acea1cd444fd5..9b0548264a397 100644 +--- a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml ++++ b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml +@@ -14,9 +14,6 @@ description: |+ + This PCIe host controller is based on the Synopsys DesignWare PCIe IP + and thus inherits all the common properties defined in snps,dw-pcie.yaml. + +-allOf: +- - $ref: /schemas/pci/snps,dw-pcie.yaml# +- + properties: + compatible: + enum: +@@ -59,7 +56,7 @@ properties: + - const: pcie + - const: pcie_bus + - const: pcie_phy +- - const: pcie_inbound_axi for imx6sx-pcie, pcie_aux for imx8mq-pcie ++ - enum: [ pcie_inbound_axi, pcie_aux ] + + num-lanes: + const: 1 +@@ -166,6 +163,47 @@ required: + - clocks + - clock-names + ++allOf: ++ - $ref: /schemas/pci/snps,dw-pcie.yaml# ++ - if: ++ properties: ++ compatible: ++ contains: ++ const: fsl,imx6sx-pcie ++ then: ++ properties: ++ clock-names: ++ items: ++ - {} ++ - {} ++ - {} ++ - const: pcie_inbound_axi ++ - if: ++ properties: ++ compatible: ++ contains: ++ const: fsl,imx8mq-pcie ++ then: ++ properties: ++ clock-names: ++ items: ++ - {} ++ - {} ++ - {} ++ - const: pcie_aux ++ - if: ++ properties: ++ compatible: ++ not: ++ contains: ++ enum: ++ - fsl,imx6sx-pcie ++ - fsl,imx8mq-pcie ++ then: ++ properties: ++ clock-names: ++ maxItems: 3 ++ + unevaluatedProperties: false + + examples: +diff --git a/Documentation/devicetree/bindings/pci/toshiba,visconti-pcie.yaml b/Documentation/devicetree/bindings/pci/toshiba,visconti-pcie.yaml +index 30b6396d83c83..aea0e2bcdd778 100644 +--- a/Documentation/devicetree/bindings/pci/toshiba,visconti-pcie.yaml ++++ b/Documentation/devicetree/bindings/pci/toshiba,visconti-pcie.yaml +@@ -36,7 +36,7 @@ properties: + - const: mpu + + interrupts: +- maxItems: 1 ++ maxItems: 2 + + clocks: + items: +@@ -94,8 +94,9 @@ examples: + #interrupt-cells = <1>; + ranges = <0x81000000 0 0x40000000 0 0x40000000 0 0x00010000>, + <0x82000000 0 0x50000000 0 0x50000000 0 0x20000000>; +- interrupts = <GIC_SPI 215 IRQ_TYPE_LEVEL_HIGH>; +- interrupt-names = "intr"; ++ interrupts = <GIC_SPI 211 IRQ_TYPE_LEVEL_HIGH>, ++ <GIC_SPI 215 IRQ_TYPE_LEVEL_HIGH>; ++ interrupt-names = "msi", "intr"; + interrupt-map-mask = <0 0 0 7>; + interrupt-map = + <0 0 0 1 &gic GIC_SPI 215 IRQ_TYPE_LEVEL_HIGH +diff --git a/Documentation/devicetree/bindings/phy/amlogic,g12a-usb2-phy.yaml b/Documentation/devicetree/bindings/phy/amlogic,g12a-usb2-phy.yaml +new file mode 100644 +index 0000000000000..ff86c87309a41 +--- /dev/null ++++ b/Documentation/devicetree/bindings/phy/amlogic,g12a-usb2-phy.yaml +@@ -0,0 +1,78 @@ ++# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) ++# Copyright 2019 BayLibre, SAS ++%YAML 1.2 ++--- ++$id: "http://devicetree.org/schemas/phy/amlogic,g12a-usb2-phy.yaml#" ++$schema: "http://devicetree.org/meta-schemas/core.yaml#" ++ ++title: Amlogic G12A USB2 PHY ++ ++maintainers: ++ - Neil Armstrong <narmstrong@baylibre.com> ++ ++properties: ++ compatible: ++ enum: ++ - amlogic,g12a-usb2-phy ++ - amlogic,a1-usb2-phy ++ ++ reg: ++ maxItems: 1 ++ ++ clocks: ++ maxItems: 1 ++ ++ clock-names: ++ items: ++ - const: xtal ++ ++ resets: ++ maxItems: 1 ++ ++ reset-names: ++ items: ++ - const: phy ++ ++ "#phy-cells": ++ const: 0 ++ ++ phy-supply: ++ description: ++ Phandle to a regulator that provides power to the PHY. This ++ regulator will be managed during the PHY power on/off sequence. ++ ++required: ++ - compatible ++ - reg ++ - clocks ++ - clock-names ++ - resets ++ - reset-names ++ - "#phy-cells" ++ ++if: ++ properties: ++ compatible: ++ enum: ++ - amlogic,meson-a1-usb-ctrl ++ ++then: ++ properties: ++ power-domains: ++ maxItems: 1 ++ required: ++ - power-domains ++ ++additionalProperties: false ++ ++examples: ++ - | ++ phy@36000 { ++ compatible = "amlogic,g12a-usb2-phy"; ++ reg = <0x36000 0x2000>; ++ clocks = <&xtal>; ++ clock-names = "xtal"; ++ resets = <&phy_reset>; ++ reset-names = "phy"; ++ #phy-cells = <0>; ++ }; +diff --git a/Documentation/devicetree/bindings/phy/amlogic,g12a-usb3-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/amlogic,g12a-usb3-pcie-phy.yaml +new file mode 100644 +index 0000000000000..84738644e3989 +--- /dev/null ++++ b/Documentation/devicetree/bindings/phy/amlogic,g12a-usb3-pcie-phy.yaml +@@ -0,0 +1,59 @@ ++# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) ++# Copyright 2019 BayLibre, SAS ++%YAML 1.2 ++--- ++$id: "http://devicetree.org/schemas/phy/amlogic,g12a-usb3-pcie-phy.yaml#" ++$schema: "http://devicetree.org/meta-schemas/core.yaml#" ++ ++title: Amlogic G12A USB3 + PCIE Combo PHY ++ ++maintainers: ++ - Neil Armstrong <narmstrong@baylibre.com> ++ ++properties: ++ compatible: ++ enum: ++ - amlogic,g12a-usb3-pcie-phy ++ ++ reg: ++ maxItems: 1 ++ ++ clocks: ++ maxItems: 1 ++ ++ clock-names: ++ items: ++ - const: ref_clk ++ ++ resets: ++ maxItems: 1 ++ ++ reset-names: ++ items: ++ - const: phy ++ ++ "#phy-cells": ++ const: 1 ++ ++required: ++ - compatible ++ - reg ++ - clocks ++ - clock-names ++ - resets ++ - reset-names ++ - "#phy-cells" ++ ++additionalProperties: false ++ ++examples: ++ - | ++ phy@46000 { ++ compatible = "amlogic,g12a-usb3-pcie-phy"; ++ reg = <0x46000 0x2000>; ++ clocks = <&ref_clk>; ++ clock-names = "ref_clk"; ++ resets = <&phy_reset>; ++ reset-names = "phy"; ++ #phy-cells = <1>; ++ }; +diff --git a/Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb2-phy.yaml b/Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb2-phy.yaml +deleted file mode 100644 +index 399ebde454095..0000000000000 +--- a/Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb2-phy.yaml ++++ /dev/null +@@ -1,78 +0,0 @@ +-# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +-# Copyright 2019 BayLibre, SAS +-%YAML 1.2 +---- +-$id: "http://devicetree.org/schemas/phy/amlogic,meson-g12a-usb2-phy.yaml#" +-$schema: "http://devicetree.org/meta-schemas/core.yaml#" +- +-title: Amlogic G12A USB2 PHY +- +-maintainers: +- - Neil Armstrong <narmstrong@baylibre.com> +- +-properties: +- compatible: +- enum: +- - amlogic,meson-g12a-usb2-phy +- - amlogic,meson-a1-usb2-phy +- +- reg: +- maxItems: 1 +- +- clocks: +- maxItems: 1 +- +- clock-names: +- items: +- - const: xtal +- +- resets: +- maxItems: 1 +- +- reset-names: +- items: +- - const: phy +- +- "#phy-cells": +- const: 0 +- +- phy-supply: +- description: +- Phandle to a regulator that provides power to the PHY. This +- regulator will be managed during the PHY power on/off sequence. +- +-required: +- - compatible +- - reg +- - clocks +- - clock-names +- - resets +- - reset-names +- - "#phy-cells" +- +-if: +- properties: +- compatible: +- enum: +- - amlogic,meson-a1-usb-ctrl +- +-then: +- properties: +- power-domains: +- maxItems: 1 +- required: +- - power-domains +- +-additionalProperties: false +- +-examples: +- - | +- phy@36000 { +- compatible = "amlogic,meson-g12a-usb2-phy"; +- reg = <0x36000 0x2000>; +- clocks = <&xtal>; +- clock-names = "xtal"; +- resets = <&phy_reset>; +- reset-names = "phy"; +- #phy-cells = <0>; +- }; +diff --git a/Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb3-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb3-pcie-phy.yaml +deleted file mode 100644 +index 453c083cf44cb..0000000000000 +--- a/Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb3-pcie-phy.yaml ++++ /dev/null +@@ -1,59 +0,0 @@ +-# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +-# Copyright 2019 BayLibre, SAS +-%YAML 1.2 +---- +-$id: "http://devicetree.org/schemas/phy/amlogic,meson-g12a-usb3-pcie-phy.yaml#" +-$schema: "http://devicetree.org/meta-schemas/core.yaml#" +- +-title: Amlogic G12A USB3 + PCIE Combo PHY +- +-maintainers: +- - Neil Armstrong <narmstrong@baylibre.com> +- +-properties: +- compatible: +- enum: +- - amlogic,meson-g12a-usb3-pcie-phy +- +- reg: +- maxItems: 1 +- +- clocks: +- maxItems: 1 +- +- clock-names: +- items: +- - const: ref_clk +- +- resets: +- maxItems: 1 +- +- reset-names: +- items: +- - const: phy +- +- "#phy-cells": +- const: 1 +- +-required: +- - compatible +- - reg +- - clocks +- - clock-names +- - resets +- - reset-names +- - "#phy-cells" +- +-additionalProperties: false +- +-examples: +- - | +- phy@46000 { +- compatible = "amlogic,meson-g12a-usb3-pcie-phy"; +- reg = <0x46000 0x2000>; +- clocks = <&ref_clk>; +- clock-names = "ref_clk"; +- resets = <&phy_reset>; +- reset-names = "phy"; +- #phy-cells = <1>; +- }; diff --git a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml index ad2866c997383..fcd82df3aebbd 100644 --- a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml @@ -1781,6 +2406,19 @@ index cc3fe5ed7421e..1b0062e3c1a4b 100644 then: required: - qcom,smd-channels +diff --git a/Documentation/devicetree/bindings/sound/qcom,wcd9335.txt b/Documentation/devicetree/bindings/sound/qcom,wcd9335.txt +index 5d6ea66a863fe..1f75feec3dec6 100644 +--- a/Documentation/devicetree/bindings/sound/qcom,wcd9335.txt ++++ b/Documentation/devicetree/bindings/sound/qcom,wcd9335.txt +@@ -109,7 +109,7 @@ audio-codec@1{ + reg = <1 0>; + interrupts = <&msmgpio 54 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "intr2" +- reset-gpios = <&msmgpio 64 0>; ++ reset-gpios = <&msmgpio 64 GPIO_ACTIVE_LOW>; + slim-ifc-dev = <&wc9335_ifd>; + clock-names = "mclk", "native"; + clocks = <&rpmcc RPM_SMD_DIV_CLK1>, diff --git a/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml b/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml index 35a8045b2c70d..53627c6e2ae32 100644 --- a/Documentation/devicetree/bindings/spi/nvidia,tegra210-quad.yaml @@ -2019,6 +2657,63 @@ index b81794e0cfbb9..06ac89adaafba 100644 Intel Stratix10 SoC Service Layer --------------------------------- Some features of the Intel Stratix10 SoC require a level of privilege +diff --git a/Documentation/driver-api/spi.rst b/Documentation/driver-api/spi.rst +index f64cb666498aa..f28887045049d 100644 +--- a/Documentation/driver-api/spi.rst ++++ b/Documentation/driver-api/spi.rst +@@ -25,8 +25,8 @@ hardware, which may be as simple as a set of GPIO pins or as complex as + a pair of FIFOs connected to dual DMA engines on the other side of the + SPI shift register (maximizing throughput). Such drivers bridge between + whatever bus they sit on (often the platform bus) and SPI, and expose +-the SPI side of their device as a :c:type:`struct spi_master +-<spi_master>`. SPI devices are children of that master, ++the SPI side of their device as a :c:type:`struct spi_controller ++<spi_controller>`. SPI devices are children of that master, + represented as a :c:type:`struct spi_device <spi_device>` and + manufactured from :c:type:`struct spi_board_info + <spi_board_info>` descriptors which are usually provided by +diff --git a/Documentation/fault-injection/fault-injection.rst b/Documentation/fault-injection/fault-injection.rst +index 4a25c5eb6f072..8c47847755a68 100644 +--- a/Documentation/fault-injection/fault-injection.rst ++++ b/Documentation/fault-injection/fault-injection.rst +@@ -83,9 +83,7 @@ configuration of fault-injection capabilities. + - /sys/kernel/debug/fail*/times: + + specifies how many times failures may happen at most. A value of -1 +- means "no limit". Note, though, that this file only accepts unsigned +- values. So, if you want to specify -1, you better use 'printf' instead +- of 'echo', e.g.: $ printf %#x -1 > times ++ means "no limit". + + - /sys/kernel/debug/fail*/space: + +@@ -277,7 +275,7 @@ Application Examples + echo Y > /sys/kernel/debug/$FAILTYPE/task-filter + echo 10 > /sys/kernel/debug/$FAILTYPE/probability + echo 100 > /sys/kernel/debug/$FAILTYPE/interval +- printf %#x -1 > /sys/kernel/debug/$FAILTYPE/times ++ echo -1 > /sys/kernel/debug/$FAILTYPE/times + echo 0 > /sys/kernel/debug/$FAILTYPE/space + echo 2 > /sys/kernel/debug/$FAILTYPE/verbose + echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-wait +@@ -331,7 +329,7 @@ Application Examples + echo N > /sys/kernel/debug/$FAILTYPE/task-filter + echo 10 > /sys/kernel/debug/$FAILTYPE/probability + echo 100 > /sys/kernel/debug/$FAILTYPE/interval +- printf %#x -1 > /sys/kernel/debug/$FAILTYPE/times ++ echo -1 > /sys/kernel/debug/$FAILTYPE/times + echo 0 > /sys/kernel/debug/$FAILTYPE/space + echo 2 > /sys/kernel/debug/$FAILTYPE/verbose + echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-wait +@@ -362,7 +360,7 @@ Application Examples + echo N > /sys/kernel/debug/$FAILTYPE/task-filter + echo 100 > /sys/kernel/debug/$FAILTYPE/probability + echo 0 > /sys/kernel/debug/$FAILTYPE/interval +- printf %#x -1 > /sys/kernel/debug/$FAILTYPE/times ++ echo -1 > /sys/kernel/debug/$FAILTYPE/times + echo 0 > /sys/kernel/debug/$FAILTYPE/space + echo 1 > /sys/kernel/debug/$FAILTYPE/verbose + diff --git a/Documentation/filesystems/ext4/attributes.rst b/Documentation/filesystems/ext4/attributes.rst index 54386a010a8d7..871d2da7a0a91 100644 --- a/Documentation/filesystems/ext4/attributes.rst @@ -2446,7 +3141,7 @@ index 2afccc63856ee..1cfbf1add2fc9 100644 bit 1: enable rescheduling of new connections when it is safe. That is, whenever expire_nodest_conn and for TCP sockets, when diff --git a/Documentation/process/code-of-conduct-interpretation.rst b/Documentation/process/code-of-conduct-interpretation.rst -index e899f14a4ba24..4f8a06b00f608 100644 +index e899f14a4ba24..43da2cc2e3b9b 100644 --- a/Documentation/process/code-of-conduct-interpretation.rst +++ b/Documentation/process/code-of-conduct-interpretation.rst @@ -51,7 +51,7 @@ the Technical Advisory Board (TAB) or other maintainers if you're @@ -2454,10 +3149,48 @@ index e899f14a4ba24..4f8a06b00f608 100644 considered a violation report unless you want it to be. If you are uncertain about approaching the TAB or any other maintainers, please -reach out to our conflict mediator, Mishi Choudhary <mishi@linux.com>. -+reach out to our conflict mediator, Joanna Lee <joanna.lee@gesmer.com>. ++reach out to our conflict mediator, Joanna Lee <jlee@linuxfoundation.org>. In the end, "be kind to each other" is really what the end goal is for everybody. We know everyone is human and we all fail at times, but the +diff --git a/Documentation/process/deprecated.rst b/Documentation/process/deprecated.rst +index 8ced754a5a0f6..f3484f60eae59 100644 +--- a/Documentation/process/deprecated.rst ++++ b/Documentation/process/deprecated.rst +@@ -70,6 +70,9 @@ Instead, the 2-factor form of the allocator should be used:: + + foo = kmalloc_array(count, size, GFP_KERNEL); + ++Specifically, kmalloc() can be replaced with kmalloc_array(), and ++kzalloc() can be replaced with kcalloc(). ++ + If no 2-factor form is available, the saturate-on-overflow helpers should + be used:: + +@@ -90,9 +93,20 @@ Instead, use the helper:: + array usage and switch to a `flexible array member + <#zero-length-and-one-element-arrays>`_ instead. + +-See array_size(), array3_size(), and struct_size(), +-for more details as well as the related check_add_overflow() and +-check_mul_overflow() family of functions. ++For other calculations, please compose the use of the size_mul(), ++size_add(), and size_sub() helpers. For example, in the case of:: ++ ++ foo = krealloc(current_size + chunk_size * (count - 3), GFP_KERNEL); ++ ++Instead, use the helpers:: ++ ++ foo = krealloc(size_add(current_size, ++ size_mul(chunk_size, ++ size_sub(count, 3))), GFP_KERNEL); ++ ++For more details, also see array3_size() and flex_array_size(), ++as well as the related check_mul_overflow(), check_add_overflow(), ++check_sub_overflow(), and check_shl_overflow() family of functions. + + simple_strtol(), simple_strtoll(), simple_strtoul(), simple_strtoull() + ---------------------------------------------------------------------- diff --git a/Documentation/process/stable-kernel-rules.rst b/Documentation/process/stable-kernel-rules.rst index 003c865e9c212..fbcb48bc2a903 100644 --- a/Documentation/process/stable-kernel-rules.rst @@ -2539,6 +3272,30 @@ index 0ea967d345838..9b52f50a68542 100644 ALC680 ====== +diff --git a/Documentation/sphinx/load_config.py b/Documentation/sphinx/load_config.py +index eeb394b39e2cc..8b416bfd75ac1 100644 +--- a/Documentation/sphinx/load_config.py ++++ b/Documentation/sphinx/load_config.py +@@ -3,7 +3,7 @@ + + import os + import sys +-from sphinx.util.pycompat import execfile_ ++from sphinx.util.osutil import fs_encoding + + # ------------------------------------------------------------------------------ + def loadConfig(namespace): +@@ -48,7 +48,9 @@ def loadConfig(namespace): + sys.stdout.write("load additional sphinx-config: %s\n" % config_file) + config = namespace.copy() + config['__file__'] = config_file +- execfile_(config_file, config) ++ with open(config_file, 'rb') as f: ++ code = compile(f.read(), fs_encoding, 'exec') ++ exec(code, config) + del config['__file__'] + namespace.update(config) + else: diff --git a/Documentation/sphinx/requirements.txt b/Documentation/sphinx/requirements.txt index 9a35f50798a65..2c573541ab712 100644 --- a/Documentation/sphinx/requirements.txt @@ -2614,6 +3371,19 @@ index 8ddb9b09451c8..c47f381d0c002 100644 5.3 Clearing filters -------------------- +diff --git a/Documentation/trace/histogram.rst b/Documentation/trace/histogram.rst +index 533415644c54d..a78350a8fed43 100644 +--- a/Documentation/trace/histogram.rst ++++ b/Documentation/trace/histogram.rst +@@ -39,7 +39,7 @@ Documentation written by Tom Zanussi + will use the event's kernel stacktrace as the key. The keywords + 'keys' or 'key' can be used to specify keys, and the keywords + 'values', 'vals', or 'val' can be used to specify values. Compound +- keys consisting of up to two fields can be specified by the 'keys' ++ keys consisting of up to three fields can be specified by the 'keys' + keyword. Hashing a compound key produces a unique entry in the + table for each unique combination of component keys, and can be + useful for providing more fine-grained summaries of event data. diff --git a/Documentation/tty/device_drivers/oxsemi-tornado.rst b/Documentation/tty/device_drivers/oxsemi-tornado.rst new file mode 100644 index 0000000000000..0180d8bb08818 @@ -2787,8 +3557,104 @@ index 976d34445a246..f1421cf1a1b31 100644 * - __u64 - ``flags`` - See :ref:`Decode Parameters Flags <hevc_decode_params_flags>` +diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst +index a6729c8cf0634..b550f43214c79 100644 +--- a/Documentation/virt/kvm/api.rst ++++ b/Documentation/virt/kvm/api.rst +@@ -7265,3 +7265,63 @@ The argument to KVM_ENABLE_CAP is also a bitmask, and must be a subset + of the result of KVM_CHECK_EXTENSION. KVM will forward to userspace + the hypercalls whose corresponding bit is in the argument, and return + ENOSYS for the others. ++ ++9. Known KVM API problems ++========================= ++ ++In some cases, KVM's API has some inconsistencies or common pitfalls ++that userspace need to be aware of. This section details some of ++these issues. ++ ++Most of them are architecture specific, so the section is split by ++architecture. ++ ++9.1. x86 ++-------- ++ ++``KVM_GET_SUPPORTED_CPUID`` issues ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++In general, ``KVM_GET_SUPPORTED_CPUID`` is designed so that it is possible ++to take its result and pass it directly to ``KVM_SET_CPUID2``. This section ++documents some cases in which that requires some care. ++ ++Local APIC features ++~~~~~~~~~~~~~~~~~~~ ++ ++CPU[EAX=1]:ECX[21] (X2APIC) is reported by ``KVM_GET_SUPPORTED_CPUID``, ++but it can only be enabled if ``KVM_CREATE_IRQCHIP`` or ++``KVM_ENABLE_CAP(KVM_CAP_IRQCHIP_SPLIT)`` are used to enable in-kernel emulation of ++the local APIC. ++ ++The same is true for the ``KVM_FEATURE_PV_UNHALT`` paravirtualized feature. ++ ++CPU[EAX=1]:ECX[24] (TSC_DEADLINE) is not reported by ``KVM_GET_SUPPORTED_CPUID``. ++It can be enabled if ``KVM_CAP_TSC_DEADLINE_TIMER`` is present and the kernel ++has enabled in-kernel emulation of the local APIC. ++ ++CPU topology ++~~~~~~~~~~~~ ++ ++Several CPUID values include topology information for the host CPU: ++0x0b and 0x1f for Intel systems, 0x8000001e for AMD systems. Different ++versions of KVM return different values for this information and userspace ++should not rely on it. Currently they return all zeroes. ++ ++If userspace wishes to set up a guest topology, it should be careful that ++the values of these three leaves differ for each CPU. In particular, ++the APIC ID is found in EDX for all subleaves of 0x0b and 0x1f, and in EAX ++for 0x8000001e; the latter also encodes the core id and node id in bits ++7:0 of EBX and ECX respectively. ++ ++Obsolete ioctls and capabilities ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++KVM_CAP_DISABLE_QUIRKS does not let userspace know which quirks are actually ++available. Use ``KVM_CHECK_EXTENSION(KVM_CAP_DISABLE_QUIRKS2)`` instead if ++available. ++ ++Ordering of KVM_GET_*/KVM_SET_* ioctls ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++TBD +diff --git a/Documentation/virt/kvm/devices/vm.rst b/Documentation/virt/kvm/devices/vm.rst +index 0aa5b1cfd700c..60acc39e0e937 100644 +--- a/Documentation/virt/kvm/devices/vm.rst ++++ b/Documentation/virt/kvm/devices/vm.rst +@@ -215,6 +215,7 @@ KVM_S390_VM_TOD_EXT). + :Parameters: address of a buffer in user space to store the data (u8) to + :Returns: -EFAULT if the given address is not accessible from kernel space; + -EINVAL if setting the TOD clock extension to != 0 is not supported ++ -EOPNOTSUPP for a PV guest (TOD managed by the ultravisor) + + 3.2. ATTRIBUTE: KVM_S390_VM_TOD_LOW + ----------------------------------- +@@ -224,6 +225,7 @@ the POP (u64). + + :Parameters: address of a buffer in user space to store the data (u64) to + :Returns: -EFAULT if the given address is not accessible from kernel space ++ -EOPNOTSUPP for a PV guest (TOD managed by the ultravisor) + + 3.3. ATTRIBUTE: KVM_S390_VM_TOD_EXT + ----------------------------------- +@@ -237,6 +239,7 @@ it, it is stored as 0 and not allowed to be set to a value != 0. + (kvm_s390_vm_tod_clock) to + :Returns: -EFAULT if the given address is not accessible from kernel space; + -EINVAL if setting the TOD clock extension to != 0 is not supported ++ -EOPNOTSUPP for a PV guest (TOD managed by the ultravisor) + + 4. GROUP: KVM_S390_VM_CRYPTO + ============================ diff --git a/MAINTAINERS b/MAINTAINERS -index 3b79fd441dde8..edc32575828b5 100644 +index 3b79fd441dde8..d0884a5d49b99 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -434,6 +434,7 @@ ACPI VIOT DRIVER @@ -2807,7 +3673,24 @@ index 3b79fd441dde8..edc32575828b5 100644 S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git F: drivers/iommu/amd/ -@@ -5602,6 +5604,7 @@ M: Christoph Hellwig <hch@lst.de> +@@ -3112,7 +3114,7 @@ F: drivers/net/ieee802154/atusb.h + AUDIT SUBSYSTEM + M: Paul Moore <paul@paul-moore.com> + M: Eric Paris <eparis@redhat.com> +-L: linux-audit@redhat.com (moderated for non-subscribers) ++L: audit@vger.kernel.org + S: Supported + W: https://github.com/linux-audit + T: git git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/audit.git +@@ -3405,6 +3407,7 @@ F: net/sched/act_bpf.c + F: net/sched/cls_bpf.c + F: samples/bpf/ + F: scripts/bpf_doc.py ++F: scripts/pahole-version.sh + F: tools/bpf/ + F: tools/lib/bpf/ + F: tools/testing/selftests/bpf/ +@@ -5602,6 +5605,7 @@ M: Christoph Hellwig <hch@lst.de> M: Marek Szyprowski <m.szyprowski@samsung.com> R: Robin Murphy <robin.murphy@arm.com> L: iommu@lists.linux-foundation.org @@ -2815,7 +3698,7 @@ index 3b79fd441dde8..edc32575828b5 100644 S: Supported W: http://git.infradead.org/users/hch/dma-mapping.git T: git git://git.infradead.org/users/hch/dma-mapping.git -@@ -5614,6 +5617,7 @@ F: kernel/dma/ +@@ -5614,6 +5618,7 @@ F: kernel/dma/ DMA MAPPING BENCHMARK M: Barry Song <song.bao.hua@hisilicon.com> L: iommu@lists.linux-foundation.org @@ -2823,7 +3706,7 @@ index 3b79fd441dde8..edc32575828b5 100644 F: kernel/dma/map_benchmark.c F: tools/testing/selftests/dma/ -@@ -7024,7 +7028,6 @@ F: drivers/net/mdio/fwnode_mdio.c +@@ -7024,7 +7029,6 @@ F: drivers/net/mdio/fwnode_mdio.c F: drivers/net/mdio/of_mdio.c F: drivers/net/pcs/ F: drivers/net/phy/ @@ -2831,7 +3714,7 @@ index 3b79fd441dde8..edc32575828b5 100644 F: include/dt-bindings/net/qca-ar803x.h F: include/linux/*mdio*.h F: include/linux/mdio/*.h -@@ -7036,6 +7039,7 @@ F: include/linux/platform_data/mdio-gpio.h +@@ -7036,6 +7040,7 @@ F: include/linux/platform_data/mdio-gpio.h F: include/trace/events/mdio.h F: include/uapi/linux/mdio.h F: include/uapi/linux/mii.h @@ -2839,7 +3722,7 @@ index 3b79fd441dde8..edc32575828b5 100644 EXFAT FILE SYSTEM M: Namjae Jeon <linkinjeon@kernel.org> -@@ -7115,6 +7119,7 @@ F: drivers/gpu/drm/exynos/exynos_dp* +@@ -7115,6 +7120,7 @@ F: drivers/gpu/drm/exynos/exynos_dp* EXYNOS SYSMMU (IOMMU) driver M: Marek Szyprowski <m.szyprowski@samsung.com> L: iommu@lists.linux-foundation.org @@ -2847,7 +3730,26 @@ index 3b79fd441dde8..edc32575828b5 100644 S: Maintained F: drivers/iommu/exynos-iommu.c -@@ -7947,9 +7952,10 @@ F: drivers/media/usb/go7007/ +@@ -7239,9 +7245,6 @@ F: include/linux/fs.h + F: include/linux/fs_types.h + F: include/uapi/linux/fs.h + F: include/uapi/linux/openat2.h +-X: fs/io-wq.c +-X: fs/io-wq.h +-X: fs/io_uring.c + + FINTEK F75375S HARDWARE MONITOR AND FAN CONTROLLER DRIVER + M: Riku Voipio <riku.voipio@iki.fi> +@@ -7744,7 +7747,7 @@ F: Documentation/locking/*futex* + F: include/asm-generic/futex.h + F: include/linux/futex.h + F: include/uapi/linux/futex.h +-F: kernel/futex.c ++F: kernel/futex/* + F: tools/perf/bench/futex* + F: tools/testing/selftests/futex/ + +@@ -7947,9 +7950,10 @@ F: drivers/media/usb/go7007/ GOODIX TOUCHSCREEN M: Bastien Nocera <hadess@hadess.net> @@ -2859,7 +3761,7 @@ index 3b79fd441dde8..edc32575828b5 100644 GOOGLE ETHERNET DRIVERS M: Jeroen de Borst <jeroendb@google.com> -@@ -9457,6 +9463,7 @@ INTEL IOMMU (VT-d) +@@ -9457,6 +9461,7 @@ INTEL IOMMU (VT-d) M: David Woodhouse <dwmw2@infradead.org> M: Lu Baolu <baolu.lu@linux.intel.com> L: iommu@lists.linux-foundation.org @@ -2867,7 +3769,7 @@ index 3b79fd441dde8..edc32575828b5 100644 S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git F: drivers/iommu/intel/ -@@ -9793,6 +9800,7 @@ IOMMU DRIVERS +@@ -9793,6 +9798,7 @@ IOMMU DRIVERS M: Joerg Roedel <joro@8bytes.org> M: Will Deacon <will@kernel.org> L: iommu@lists.linux-foundation.org @@ -2875,7 +3777,18 @@ index 3b79fd441dde8..edc32575828b5 100644 S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git F: Documentation/devicetree/bindings/iommu/ -@@ -11795,6 +11803,7 @@ F: drivers/i2c/busses/i2c-mt65xx.c +@@ -9810,9 +9816,7 @@ L: io-uring@vger.kernel.org + S: Maintained + T: git git://git.kernel.dk/linux-block + T: git git://git.kernel.dk/liburing +-F: fs/io-wq.c +-F: fs/io-wq.h +-F: fs/io_uring.c ++F: io_uring/ + F: include/linux/io_uring.h + F: include/uapi/linux/io_uring.h + F: tools/io_uring/ +@@ -11795,6 +11799,7 @@ F: drivers/i2c/busses/i2c-mt65xx.c MEDIATEK IOMMU DRIVER M: Yong Wu <yong.wu@mediatek.com> L: iommu@lists.linux-foundation.org @@ -2883,7 +3796,7 @@ index 3b79fd441dde8..edc32575828b5 100644 L: linux-mediatek@lists.infradead.org (moderated for non-subscribers) S: Supported F: Documentation/devicetree/bindings/iommu/mediatek* -@@ -15554,6 +15563,7 @@ F: drivers/i2c/busses/i2c-qcom-cci.c +@@ -15554,6 +15559,7 @@ F: drivers/i2c/busses/i2c-qcom-cci.c QUALCOMM IOMMU M: Rob Clark <robdclark@gmail.com> L: iommu@lists.linux-foundation.org @@ -2891,7 +3804,7 @@ index 3b79fd441dde8..edc32575828b5 100644 L: linux-arm-msm@vger.kernel.org S: Maintained F: drivers/iommu/arm/arm-smmu/qcom_iommu.c -@@ -15720,6 +15730,8 @@ F: arch/mips/generic/board-ranchu.c +@@ -15720,6 +15726,8 @@ F: arch/mips/generic/board-ranchu.c RANDOM NUMBER DRIVER M: "Theodore Ts'o" <tytso@mit.edu> @@ -2900,7 +3813,7 @@ index 3b79fd441dde8..edc32575828b5 100644 S: Maintained F: drivers/char/random.c -@@ -17980,6 +17992,7 @@ F: arch/x86/boot/video* +@@ -17980,6 +17988,7 @@ F: arch/x86/boot/video* SWIOTLB SUBSYSTEM M: Christoph Hellwig <hch@infradead.org> L: iommu@lists.linux-foundation.org @@ -2908,7 +3821,7 @@ index 3b79fd441dde8..edc32575828b5 100644 S: Supported W: http://git.infradead.org/users/hch/dma-mapping.git T: git git://git.infradead.org/users/hch/dma-mapping.git -@@ -20560,12 +20573,14 @@ M: Juergen Gross <jgross@suse.com> +@@ -20560,12 +20569,14 @@ M: Juergen Gross <jgross@suse.com> M: Stefano Stabellini <sstabellini@kernel.org> L: xen-devel@lists.xenproject.org (moderated for non-subscribers) L: iommu@lists.linux-foundation.org @@ -2924,7 +3837,7 @@ index 3b79fd441dde8..edc32575828b5 100644 M: linux-xfs@vger.kernel.org L: linux-xfs@vger.kernel.org diff --git a/Makefile b/Makefile -index ed6e7ec60eff6..e7293e7a7ee98 100644 +index ed6e7ec60eff6..b17ce4c2e8f28 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ @@ -2932,11 +3845,19 @@ index ed6e7ec60eff6..e7293e7a7ee98 100644 VERSION = 5 PATCHLEVEL = 15 -SUBLEVEL = 0 -+SUBLEVEL = 76 ++SUBLEVEL = 98 EXTRAVERSION = NAME = Trick or Treat -@@ -480,6 +480,8 @@ LZ4 = lz4c +@@ -430,6 +430,7 @@ else + HOSTCC = gcc + HOSTCXX = g++ + endif ++HOSTPKG_CONFIG = pkg-config + + export KBUILD_USERCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes \ + -O2 -fomit-frame-pointer -std=gnu89 +@@ -480,6 +481,8 @@ LZ4 = lz4c XZ = xz ZSTD = zstd @@ -2945,7 +3866,16 @@ index ed6e7ec60eff6..e7293e7a7ee98 100644 CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \ -Wbitwise -Wno-return-void -Wno-unknown-attribute $(CF) NOSTDINC_FLAGS := -@@ -534,6 +536,7 @@ export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE +@@ -523,7 +526,7 @@ KBUILD_LDFLAGS_MODULE := + KBUILD_LDFLAGS := + CLANG_FLAGS := + +-export ARCH SRCARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE LD CC ++export ARCH SRCARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE LD CC HOSTPKG_CONFIG + export CPP AR NM STRIP OBJCOPY OBJDUMP READELF PAHOLE RESOLVE_BTFIDS LEX YACC AWK INSTALLKERNEL + export PERL PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX + export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ ZSTD +@@ -534,6 +537,7 @@ export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL @@ -2953,7 +3883,7 @@ index ed6e7ec60eff6..e7293e7a7ee98 100644 # Files to ignore in find ... statements -@@ -687,12 +690,19 @@ endif +@@ -687,12 +691,19 @@ endif ifdef CONFIG_CC_IS_GCC RETPOLINE_CFLAGS := $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) @@ -2973,7 +3903,7 @@ index ed6e7ec60eff6..e7293e7a7ee98 100644 export RETPOLINE_CFLAGS export RETPOLINE_VDSO_CFLAGS -@@ -811,6 +821,9 @@ endif +@@ -811,6 +822,9 @@ endif KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable) KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable) @@ -2983,7 +3913,7 @@ index ed6e7ec60eff6..e7293e7a7ee98 100644 ifdef CONFIG_FRAME_POINTER KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls else -@@ -831,12 +844,12 @@ endif +@@ -831,12 +845,12 @@ endif # Initialize all stack variables with a zero value. ifdef CONFIG_INIT_STACK_ALL_ZERO @@ -2999,7 +3929,7 @@ index ed6e7ec60eff6..e7293e7a7ee98 100644 # While VLAs have been removed, GCC produces unreachable stack probes # for the randomize_kstack_offset feature. Disable it for all compilers. -@@ -857,7 +870,9 @@ else +@@ -857,7 +871,9 @@ else DEBUG_CFLAGS += -g endif @@ -3010,7 +3940,7 @@ index ed6e7ec60eff6..e7293e7a7ee98 100644 KBUILD_AFLAGS += -Wa,-gdwarf-2 endif -@@ -1008,6 +1023,21 @@ ifdef CONFIG_CC_IS_GCC +@@ -1008,6 +1024,21 @@ ifdef CONFIG_CC_IS_GCC KBUILD_CFLAGS += -Wno-maybe-uninitialized endif @@ -3032,7 +3962,7 @@ index ed6e7ec60eff6..e7293e7a7ee98 100644 # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += -fno-strict-overflow -@@ -1053,6 +1083,11 @@ KBUILD_CFLAGS += $(KCFLAGS) +@@ -1053,6 +1084,11 @@ KBUILD_CFLAGS += $(KCFLAGS) KBUILD_LDFLAGS_MODULE += --build-id=sha1 LDFLAGS_vmlinux += --build-id=sha1 @@ -3044,7 +3974,18 @@ index ed6e7ec60eff6..e7293e7a7ee98 100644 ifeq ($(CONFIG_STRIP_ASM_SYMS),y) LDFLAGS_vmlinux += $(call ld-option, -X,) endif -@@ -1125,13 +1160,11 @@ vmlinux-alldirs := $(sort $(vmlinux-dirs) Documentation \ +@@ -1115,7 +1151,9 @@ export MODORDER := $(extmod_prefix)modules.order + export MODULES_NSDEPS := $(extmod_prefix)modules.nsdeps + + ifeq ($(KBUILD_EXTMOD),) +-core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/ ++core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ ++core-$(CONFIG_BLOCK) += block/ ++core-$(CONFIG_IO_URING) += io_uring/ + + vmlinux-dirs := $(patsubst %/,%,$(filter %/, \ + $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ +@@ -1125,13 +1163,11 @@ vmlinux-alldirs := $(sort $(vmlinux-dirs) Documentation \ $(patsubst %/,%,$(filter %/, $(core-) \ $(drivers-) $(libs-)))) @@ -3060,7 +4001,7 @@ index ed6e7ec60eff6..e7293e7a7ee98 100644 # Externally visible symbols (used by link-vmlinux.sh) KBUILD_VMLINUX_OBJS := $(head-y) $(patsubst %/,%/built-in.a, $(core-y)) KBUILD_VMLINUX_OBJS += $(addsuffix built-in.a, $(filter %/, $(libs-y))) -@@ -1160,7 +1193,7 @@ KBUILD_MODULES := 1 +@@ -1160,7 +1196,7 @@ KBUILD_MODULES := 1 autoksyms_recursive: descend modules.order $(Q)$(CONFIG_SHELL) $(srctree)/scripts/adjust_autoksyms.sh \ @@ -3069,7 +4010,7 @@ index ed6e7ec60eff6..e7293e7a7ee98 100644 endif autoksyms_h := $(if $(CONFIG_TRIM_UNUSED_KSYMS), include/generated/autoksyms.h) -@@ -1301,8 +1334,7 @@ hdr-inst := -f $(srctree)/scripts/Makefile.headersinst obj +@@ -1301,8 +1337,7 @@ hdr-inst := -f $(srctree)/scripts/Makefile.headersinst obj PHONY += headers headers: $(version_h) scripts_unifdef uapi-asm-generic archheaders archscripts @@ -3124,6 +4065,19 @@ index 18f48a6f2ff6d..8f3f5eecba28b 100644 #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE extern void copy_page(void * _to, void * _from); +diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h +index 2592356e32154..0ce1eee0924b1 100644 +--- a/arch/alpha/include/asm/thread_info.h ++++ b/arch/alpha/include/asm/thread_info.h +@@ -77,7 +77,7 @@ register struct thread_info *__current_thread_info __asm__("$8"); + + /* Work to do on interrupt/exception return. */ + #define _TIF_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ +- _TIF_NOTIFY_RESUME) ++ _TIF_NOTIFY_RESUME | _TIF_NOTIFY_SIGNAL) + + /* Work to do on any return to userspace. */ + #define _TIF_ALLWORK_MASK (_TIF_WORK_MASK \ diff --git a/arch/alpha/include/asm/timex.h b/arch/alpha/include/asm/timex.h index b565cc6f408e9..f89798da8a147 100644 --- a/arch/alpha/include/asm/timex.h @@ -3135,6 +4089,22 @@ index b565cc6f408e9..f89798da8a147 100644 +#define get_cycles get_cycles #endif +diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S +index e227f3a29a43c..c41a5a9c3b9f2 100644 +--- a/arch/alpha/kernel/entry.S ++++ b/arch/alpha/kernel/entry.S +@@ -469,8 +469,10 @@ entSys: + #ifdef CONFIG_AUDITSYSCALL + lda $6, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT + and $3, $6, $3 +-#endif + bne $3, strace ++#else ++ blbs $3, strace /* check for SYSCALL_TRACE in disguise */ ++#endif + beq $4, 1f + ldq $27, 0($5) + 1: jsr $26, ($27), sys_ni_syscall diff --git a/arch/alpha/kernel/rtc.c b/arch/alpha/kernel/rtc.c index ce3077946e1d9..fb3025396ac96 100644 --- a/arch/alpha/kernel/rtc.c @@ -3166,6 +4136,76 @@ index 90635ef5dafac..6dc952b0df4a9 100644 return count; } +diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c +index e805106409f76..f5ba12adde67c 100644 +--- a/arch/alpha/kernel/traps.c ++++ b/arch/alpha/kernel/traps.c +@@ -192,7 +192,7 @@ die_if_kernel(char * str, struct pt_regs *regs, long err, unsigned long *r9_15) + local_irq_enable(); + while (1); + } +- do_exit(SIGSEGV); ++ make_task_dead(SIGSEGV); + } + + #ifndef CONFIG_MATHEMU +@@ -577,7 +577,7 @@ do_entUna(void * va, unsigned long opcode, unsigned long reg, + + printk("Bad unaligned kernel access at %016lx: %p %lx %lu\n", + pc, va, opcode, reg); +- do_exit(SIGSEGV); ++ make_task_dead(SIGSEGV); + + got_exception: + /* Ok, we caught the exception, but we don't want it. Is there +@@ -632,7 +632,7 @@ got_exception: + local_irq_enable(); + while (1); + } +- do_exit(SIGSEGV); ++ make_task_dead(SIGSEGV); + } + + /* +diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c +index eee5102c3d889..e9193d52222ea 100644 +--- a/arch/alpha/mm/fault.c ++++ b/arch/alpha/mm/fault.c +@@ -204,7 +204,7 @@ retry: + printk(KERN_ALERT "Unable to handle kernel paging request at " + "virtual address %016lx\n", address); + die_if_kernel("Oops", regs, cause, (unsigned long*)regs - 16); +- do_exit(SIGKILL); ++ make_task_dead(SIGKILL); + + /* We ran out of memory, or some other thing happened to us that + made us unable to handle the page fault gracefully. */ +diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h +index 8f777d6441a5d..80347382a3800 100644 +--- a/arch/arc/include/asm/io.h ++++ b/arch/arc/include/asm/io.h +@@ -32,7 +32,7 @@ static inline void ioport_unmap(void __iomem *addr) + { + } + +-extern void iounmap(const void __iomem *addr); ++extern void iounmap(const volatile void __iomem *addr); + + /* + * io{read,write}{16,32}be() macros +diff --git a/arch/arc/include/asm/pgtable-levels.h b/arch/arc/include/asm/pgtable-levels.h +index 8084ef2f64910..4e7a19cb8e528 100644 +--- a/arch/arc/include/asm/pgtable-levels.h ++++ b/arch/arc/include/asm/pgtable-levels.h +@@ -163,7 +163,7 @@ + #define pmd_page_vaddr(pmd) (pmd_val(pmd) & PAGE_MASK) + #define pmd_page(pmd) virt_to_page(pmd_page_vaddr(pmd)) + #define set_pmd(pmdp, pmd) (*(pmdp) = pmd) +-#define pmd_pgtable(pmd) ((pgtable_t) pmd_page_vaddr(pmd)) ++#define pmd_pgtable(pmd) ((pgtable_t) pmd_page(pmd)) + + /* + * 4th level paging: pte diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index dd77a0c8f740b..66ba549b520fc 100644 --- a/arch/arc/kernel/entry.S @@ -3200,6 +4240,19 @@ index 3793876f42d9b..5f7f5aab361f1 100644 return 0; } +diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c +index 0ee75aca6e109..712c2311daefb 100644 +--- a/arch/arc/mm/ioremap.c ++++ b/arch/arc/mm/ioremap.c +@@ -94,7 +94,7 @@ void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size, + EXPORT_SYMBOL(ioremap_prot); + + +-void iounmap(const void __iomem *addr) ++void iounmap(const volatile void __iomem *addr) + { + /* weird double cast to handle phys_addr_t > 32 bits */ + if (arc_uncached_addr_space((phys_addr_t)(u32)addr)) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index dcf2df6da98f0..a8ae17f5740d9 100644 --- a/arch/arm/Kconfig @@ -3405,6 +4458,46 @@ index 7e0934180724d..7a72fc636a7a7 100644 omap3-beagle-xm.dtb \ omap3-beagle-xm-ab.dtb \ omap3-cm-t3517.dtb \ +diff --git a/arch/arm/boot/dts/am335x-pcm-953.dtsi b/arch/arm/boot/dts/am335x-pcm-953.dtsi +index 124026fa0d095..f207499461b34 100644 +--- a/arch/arm/boot/dts/am335x-pcm-953.dtsi ++++ b/arch/arm/boot/dts/am335x-pcm-953.dtsi +@@ -12,22 +12,20 @@ + compatible = "phytec,am335x-pcm-953", "phytec,am335x-phycore-som", "ti,am33xx"; + + /* Power */ +- regulators { +- vcc3v3: fixedregulator@1 { +- compatible = "regulator-fixed"; +- regulator-name = "vcc3v3"; +- regulator-min-microvolt = <3300000>; +- regulator-max-microvolt = <3300000>; +- regulator-boot-on; +- }; ++ vcc3v3: fixedregulator1 { ++ compatible = "regulator-fixed"; ++ regulator-name = "vcc3v3"; ++ regulator-min-microvolt = <3300000>; ++ regulator-max-microvolt = <3300000>; ++ regulator-boot-on; ++ }; + +- vcc1v8: fixedregulator@2 { +- compatible = "regulator-fixed"; +- regulator-name = "vcc1v8"; +- regulator-min-microvolt = <1800000>; +- regulator-max-microvolt = <1800000>; +- regulator-boot-on; +- }; ++ vcc1v8: fixedregulator2 { ++ compatible = "regulator-fixed"; ++ regulator-name = "vcc1v8"; ++ regulator-min-microvolt = <1800000>; ++ regulator-max-microvolt = <1800000>; ++ regulator-boot-on; + }; + + /* User IO */ diff --git a/arch/arm/boot/dts/am33xx-l4.dtsi b/arch/arm/boot/dts/am33xx-l4.dtsi index c9629cb5ccd1e..9a750883b987b 100644 --- a/arch/arm/boot/dts/am33xx-l4.dtsi @@ -3559,11 +4652,91 @@ index c260aa1a85bdb..a1f029e9d1f3d 100644 &atl_tm { status = "disabled"; }; +diff --git a/arch/arm/boot/dts/armada-370.dtsi b/arch/arm/boot/dts/armada-370.dtsi +index 46e6d3ed8f35a..c042c416a94a3 100644 +--- a/arch/arm/boot/dts/armada-370.dtsi ++++ b/arch/arm/boot/dts/armada-370.dtsi +@@ -74,7 +74,7 @@ + + pcie2: pcie@2,0 { + device_type = "pci"; +- assigned-addresses = <0x82002800 0 0x80000 0 0x2000>; ++ assigned-addresses = <0x82001000 0 0x80000 0 0x2000>; + reg = <0x1000 0 0 0 0>; + #address-cells = <3>; + #size-cells = <2>; +diff --git a/arch/arm/boot/dts/armada-375.dtsi b/arch/arm/boot/dts/armada-375.dtsi +index 7f2f24a29e6c1..352a2f7ba3114 100644 +--- a/arch/arm/boot/dts/armada-375.dtsi ++++ b/arch/arm/boot/dts/armada-375.dtsi +@@ -582,7 +582,7 @@ + + pcie1: pcie@2,0 { + device_type = "pci"; +- assigned-addresses = <0x82000800 0 0x44000 0 0x2000>; ++ assigned-addresses = <0x82001000 0 0x44000 0 0x2000>; + reg = <0x1000 0 0 0 0>; + #address-cells = <3>; + #size-cells = <2>; +diff --git a/arch/arm/boot/dts/armada-380.dtsi b/arch/arm/boot/dts/armada-380.dtsi +index cff1269f3fbfd..7146cc8f082af 100644 +--- a/arch/arm/boot/dts/armada-380.dtsi ++++ b/arch/arm/boot/dts/armada-380.dtsi +@@ -79,7 +79,7 @@ + /* x1 port */ + pcie@2,0 { + device_type = "pci"; +- assigned-addresses = <0x82000800 0 0x40000 0 0x2000>; ++ assigned-addresses = <0x82001000 0 0x40000 0 0x2000>; + reg = <0x1000 0 0 0 0>; + #address-cells = <3>; + #size-cells = <2>; +@@ -98,7 +98,7 @@ + /* x1 port */ + pcie@3,0 { + device_type = "pci"; +- assigned-addresses = <0x82000800 0 0x44000 0 0x2000>; ++ assigned-addresses = <0x82001800 0 0x44000 0 0x2000>; + reg = <0x1800 0 0 0 0>; + #address-cells = <3>; + #size-cells = <2>; diff --git a/arch/arm/boot/dts/armada-385-turris-omnia.dts b/arch/arm/boot/dts/armada-385-turris-omnia.dts -index 5bd6a66d2c2b4..01b0dfd55d703 100644 +index 5bd6a66d2c2b4..e7649c795699c 100644 --- a/arch/arm/boot/dts/armada-385-turris-omnia.dts +++ b/arch/arm/boot/dts/armada-385-turris-omnia.dts -@@ -471,7 +471,7 @@ +@@ -23,6 +23,12 @@ + stdout-path = &uart0; + }; + ++ aliases { ++ ethernet0 = ð0; ++ ethernet1 = ð1; ++ ethernet2 = ð2; ++ }; ++ + memory { + device_type = "memory"; + reg = <0x00000000 0x40000000>; /* 1024 MB */ +@@ -450,7 +456,17 @@ + }; + }; + +- /* port 6 is connected to eth0 */ ++ ports@6 { ++ reg = <6>; ++ label = "cpu"; ++ ethernet = <ð0>; ++ phy-mode = "rgmii-id"; ++ ++ fixed-link { ++ speed = <1000>; ++ full-duplex; ++ }; ++ }; + }; + }; + }; +@@ -471,7 +487,7 @@ marvell,function = "spi0"; }; @@ -3572,7 +4745,7 @@ index 5bd6a66d2c2b4..01b0dfd55d703 100644 marvell,pins = "mpp26"; marvell,function = "spi0"; }; -@@ -506,7 +506,7 @@ +@@ -506,7 +522,7 @@ }; }; @@ -3581,6 +4754,37 @@ index 5bd6a66d2c2b4..01b0dfd55d703 100644 }; &uart0 { +diff --git a/arch/arm/boot/dts/armada-385.dtsi b/arch/arm/boot/dts/armada-385.dtsi +index f0022d10c7159..f081f7cb66e5f 100644 +--- a/arch/arm/boot/dts/armada-385.dtsi ++++ b/arch/arm/boot/dts/armada-385.dtsi +@@ -84,7 +84,7 @@ + /* x1 port */ + pcie2: pcie@2,0 { + device_type = "pci"; +- assigned-addresses = <0x82000800 0 0x40000 0 0x2000>; ++ assigned-addresses = <0x82001000 0 0x40000 0 0x2000>; + reg = <0x1000 0 0 0 0>; + #address-cells = <3>; + #size-cells = <2>; +@@ -103,7 +103,7 @@ + /* x1 port */ + pcie3: pcie@3,0 { + device_type = "pci"; +- assigned-addresses = <0x82000800 0 0x44000 0 0x2000>; ++ assigned-addresses = <0x82001800 0 0x44000 0 0x2000>; + reg = <0x1800 0 0 0 0>; + #address-cells = <3>; + #size-cells = <2>; +@@ -125,7 +125,7 @@ + */ + pcie4: pcie@4,0 { + device_type = "pci"; +- assigned-addresses = <0x82000800 0 0x48000 0 0x2000>; ++ assigned-addresses = <0x82002000 0 0x48000 0 0x2000>; + reg = <0x2000 0 0 0 0>; + #address-cells = <3>; + #size-cells = <2>; diff --git a/arch/arm/boot/dts/armada-38x.dtsi b/arch/arm/boot/dts/armada-38x.dtsi index 9b1a24cc5e91f..df3c8d1d8f641 100644 --- a/arch/arm/boot/dts/armada-38x.dtsi @@ -3603,6 +4807,153 @@ index 9b1a24cc5e91f..df3c8d1d8f641 100644 reg = <0x12100 0x100>; reg-shift = <2>; interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>; +diff --git a/arch/arm/boot/dts/armada-39x.dtsi b/arch/arm/boot/dts/armada-39x.dtsi +index e0b7c20998312..9525e7b7f4360 100644 +--- a/arch/arm/boot/dts/armada-39x.dtsi ++++ b/arch/arm/boot/dts/armada-39x.dtsi +@@ -453,7 +453,7 @@ + /* x1 port */ + pcie@2,0 { + device_type = "pci"; +- assigned-addresses = <0x82000800 0 0x40000 0 0x2000>; ++ assigned-addresses = <0x82001000 0 0x40000 0 0x2000>; + reg = <0x1000 0 0 0 0>; + #address-cells = <3>; + #size-cells = <2>; +@@ -472,7 +472,7 @@ + /* x1 port */ + pcie@3,0 { + device_type = "pci"; +- assigned-addresses = <0x82000800 0 0x44000 0 0x2000>; ++ assigned-addresses = <0x82001800 0 0x44000 0 0x2000>; + reg = <0x1800 0 0 0 0>; + #address-cells = <3>; + #size-cells = <2>; +@@ -494,7 +494,7 @@ + */ + pcie@4,0 { + device_type = "pci"; +- assigned-addresses = <0x82000800 0 0x48000 0 0x2000>; ++ assigned-addresses = <0x82002000 0 0x48000 0 0x2000>; + reg = <0x2000 0 0 0 0>; + #address-cells = <3>; + #size-cells = <2>; +diff --git a/arch/arm/boot/dts/armada-xp-mv78230.dtsi b/arch/arm/boot/dts/armada-xp-mv78230.dtsi +index 8558bf6bb54c6..d55fe162fc7f0 100644 +--- a/arch/arm/boot/dts/armada-xp-mv78230.dtsi ++++ b/arch/arm/boot/dts/armada-xp-mv78230.dtsi +@@ -97,7 +97,7 @@ + + pcie2: pcie@2,0 { + device_type = "pci"; +- assigned-addresses = <0x82000800 0 0x44000 0 0x2000>; ++ assigned-addresses = <0x82001000 0 0x44000 0 0x2000>; + reg = <0x1000 0 0 0 0>; + #address-cells = <3>; + #size-cells = <2>; +@@ -115,7 +115,7 @@ + + pcie3: pcie@3,0 { + device_type = "pci"; +- assigned-addresses = <0x82000800 0 0x48000 0 0x2000>; ++ assigned-addresses = <0x82001800 0 0x48000 0 0x2000>; + reg = <0x1800 0 0 0 0>; + #address-cells = <3>; + #size-cells = <2>; +@@ -133,7 +133,7 @@ + + pcie4: pcie@4,0 { + device_type = "pci"; +- assigned-addresses = <0x82000800 0 0x4c000 0 0x2000>; ++ assigned-addresses = <0x82002000 0 0x4c000 0 0x2000>; + reg = <0x2000 0 0 0 0>; + #address-cells = <3>; + #size-cells = <2>; +@@ -151,7 +151,7 @@ + + pcie5: pcie@5,0 { + device_type = "pci"; +- assigned-addresses = <0x82000800 0 0x80000 0 0x2000>; ++ assigned-addresses = <0x82002800 0 0x80000 0 0x2000>; + reg = <0x2800 0 0 0 0>; + #address-cells = <3>; + #size-cells = <2>; +diff --git a/arch/arm/boot/dts/armada-xp-mv78260.dtsi b/arch/arm/boot/dts/armada-xp-mv78260.dtsi +index 2d85fe8ac3272..fdcc818199401 100644 +--- a/arch/arm/boot/dts/armada-xp-mv78260.dtsi ++++ b/arch/arm/boot/dts/armada-xp-mv78260.dtsi +@@ -112,7 +112,7 @@ + + pcie2: pcie@2,0 { + device_type = "pci"; +- assigned-addresses = <0x82000800 0 0x44000 0 0x2000>; ++ assigned-addresses = <0x82001000 0 0x44000 0 0x2000>; + reg = <0x1000 0 0 0 0>; + #address-cells = <3>; + #size-cells = <2>; +@@ -130,7 +130,7 @@ + + pcie3: pcie@3,0 { + device_type = "pci"; +- assigned-addresses = <0x82000800 0 0x48000 0 0x2000>; ++ assigned-addresses = <0x82001800 0 0x48000 0 0x2000>; + reg = <0x1800 0 0 0 0>; + #address-cells = <3>; + #size-cells = <2>; +@@ -148,7 +148,7 @@ + + pcie4: pcie@4,0 { + device_type = "pci"; +- assigned-addresses = <0x82000800 0 0x4c000 0 0x2000>; ++ assigned-addresses = <0x82002000 0 0x4c000 0 0x2000>; + reg = <0x2000 0 0 0 0>; + #address-cells = <3>; + #size-cells = <2>; +@@ -166,7 +166,7 @@ + + pcie5: pcie@5,0 { + device_type = "pci"; +- assigned-addresses = <0x82000800 0 0x80000 0 0x2000>; ++ assigned-addresses = <0x82002800 0 0x80000 0 0x2000>; + reg = <0x2800 0 0 0 0>; + #address-cells = <3>; + #size-cells = <2>; +@@ -184,7 +184,7 @@ + + pcie6: pcie@6,0 { + device_type = "pci"; +- assigned-addresses = <0x82000800 0 0x84000 0 0x2000>; ++ assigned-addresses = <0x82003000 0 0x84000 0 0x2000>; + reg = <0x3000 0 0 0 0>; + #address-cells = <3>; + #size-cells = <2>; +@@ -202,7 +202,7 @@ + + pcie7: pcie@7,0 { + device_type = "pci"; +- assigned-addresses = <0x82000800 0 0x88000 0 0x2000>; ++ assigned-addresses = <0x82003800 0 0x88000 0 0x2000>; + reg = <0x3800 0 0 0 0>; + #address-cells = <3>; + #size-cells = <2>; +@@ -220,7 +220,7 @@ + + pcie8: pcie@8,0 { + device_type = "pci"; +- assigned-addresses = <0x82000800 0 0x8c000 0 0x2000>; ++ assigned-addresses = <0x82004000 0 0x8c000 0 0x2000>; + reg = <0x4000 0 0 0 0>; + #address-cells = <3>; + #size-cells = <2>; +@@ -238,7 +238,7 @@ + + pcie9: pcie@9,0 { + device_type = "pci"; +- assigned-addresses = <0x82000800 0 0x42000 0 0x2000>; ++ assigned-addresses = <0x82004800 0 0x42000 0 0x2000>; + reg = <0x4800 0 0 0 0>; + #address-cells = <3>; + #size-cells = <2>; diff --git a/arch/arm/boot/dts/aspeed-ast2500-evb.dts b/arch/arm/boot/dts/aspeed-ast2500-evb.dts index 1d24b394ea4c3..a497dd135491b 100644 --- a/arch/arm/boot/dts/aspeed-ast2500-evb.dts @@ -4087,11 +5438,47 @@ index 3ca97b47c69ce..7e5c598e7e68f 100644 #address-cells = <1>; #size-cells = <0>; +diff --git a/arch/arm/boot/dts/at91rm9200.dtsi b/arch/arm/boot/dts/at91rm9200.dtsi +index d1181ead18e5a..21344fbc89e5e 100644 +--- a/arch/arm/boot/dts/at91rm9200.dtsi ++++ b/arch/arm/boot/dts/at91rm9200.dtsi +@@ -660,7 +660,7 @@ + compatible = "atmel,at91rm9200-udc"; + reg = <0xfffb0000 0x4000>; + interrupts = <11 IRQ_TYPE_LEVEL_HIGH 2>; +- clocks = <&pmc PMC_TYPE_PERIPHERAL 11>, <&pmc PMC_TYPE_SYSTEM 2>; ++ clocks = <&pmc PMC_TYPE_PERIPHERAL 11>, <&pmc PMC_TYPE_SYSTEM 1>; + clock-names = "pclk", "hclk"; + status = "disabled"; + }; diff --git a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi -index 87bb39060e8be..ca03685f0f086 100644 +index 87bb39060e8be..4783e657b4cb6 100644 --- a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi +++ b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi -@@ -219,6 +219,12 @@ +@@ -39,6 +39,13 @@ + + }; + ++ usb1 { ++ pinctrl_usb1_vbus_gpio: usb1_vbus_gpio { ++ atmel,pins = ++ <AT91_PIOC 5 AT91_PERIPH_GPIO AT91_PINCTRL_DEGLITCH>; /* PC5 GPIO */ ++ }; ++ }; ++ + mmc0_slot1 { + pinctrl_board_mmc0_slot1: mmc0_slot1-board { + atmel,pins = +@@ -84,6 +91,8 @@ + }; + + usb1: gadget@fffa4000 { ++ pinctrl-0 = <&pinctrl_usb1_vbus_gpio>; ++ pinctrl-names = "default"; + atmel,vbus-gpio = <&pioC 5 GPIO_ACTIVE_HIGH>; + status = "okay"; + }; +@@ -219,6 +228,12 @@ wm8731: wm8731@1b { compatible = "wm8731"; reg = <0x1b>; @@ -4912,6 +6299,19 @@ index 5017b7b259cbe..618c812eef73e 100644 device_type = "memory"; reg = <0x00000000 0x08000000>; }; +diff --git a/arch/arm/boot/dts/dove.dtsi b/arch/arm/boot/dts/dove.dtsi +index 89e0bdaf3a85f..726d353eda686 100644 +--- a/arch/arm/boot/dts/dove.dtsi ++++ b/arch/arm/boot/dts/dove.dtsi +@@ -129,7 +129,7 @@ + pcie1: pcie@2 { + device_type = "pci"; + status = "disabled"; +- assigned-addresses = <0x82002800 0 0x80000 0 0x2000>; ++ assigned-addresses = <0x82001000 0 0x80000 0 0x2000>; + reg = <0x1000 0 0 0 0>; + clocks = <&gate_clk 5>; + marvell,pcie-port = <1>; diff --git a/arch/arm/boot/dts/dra7-l4.dtsi b/arch/arm/boot/dts/dra7-l4.dtsi index 956a26d52a4c3..5733e3a4ea8e7 100644 --- a/arch/arm/boot/dts/dra7-l4.dtsi @@ -5194,6 +6594,19 @@ index 4f88e96d81ddb..d5c68d1ea707c 100644 }; }; }; +diff --git a/arch/arm/boot/dts/imx53-ppd.dts b/arch/arm/boot/dts/imx53-ppd.dts +index 37d0cffea99c5..70c4a4852256c 100644 +--- a/arch/arm/boot/dts/imx53-ppd.dts ++++ b/arch/arm/boot/dts/imx53-ppd.dts +@@ -488,7 +488,7 @@ + scl-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>; + status = "okay"; + +- i2c-switch@70 { ++ i2c-mux@70 { + compatible = "nxp,pca9547"; + #address-cells = <1>; + #size-cells = <0>; diff --git a/arch/arm/boot/dts/imx6dl-eckelmann-ci4x10.dts b/arch/arm/boot/dts/imx6dl-eckelmann-ci4x10.dts index b4a9523e325b4..864dc5018451f 100644 --- a/arch/arm/boot/dts/imx6dl-eckelmann-ci4x10.dts @@ -5329,6 +6742,21 @@ index 05ee283882290..cc18010023942 100644 compatible = "microchip,sst25vf016b"; spi-max-frequency = <20000000>; reg = <0>; +diff --git a/arch/arm/boot/dts/imx6q-prti6q.dts b/arch/arm/boot/dts/imx6q-prti6q.dts +index b4605edfd2ab8..d8fa83effd638 100644 +--- a/arch/arm/boot/dts/imx6q-prti6q.dts ++++ b/arch/arm/boot/dts/imx6q-prti6q.dts +@@ -364,8 +364,8 @@ + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_wifi>; + interrupts-extended = <&gpio1 30 IRQ_TYPE_LEVEL_HIGH>; +- ref-clock-frequency = "38400000"; +- tcxo-clock-frequency = "19200000"; ++ ref-clock-frequency = <38400000>; ++ tcxo-clock-frequency = <19200000>; + }; + }; + diff --git a/arch/arm/boot/dts/imx6q-rex-pro.dts b/arch/arm/boot/dts/imx6q-rex-pro.dts index 1767e1a3cd53a..271f4b2d9b9f0 100644 --- a/arch/arm/boot/dts/imx6q-rex-pro.dts @@ -5460,6 +6888,44 @@ index 648f5fcb72e65..2c1d6f28e6950 100644 #address-cells = <1>; #size-cells = <1>; compatible = "sst,sst25vf040b", "jedec,spi-nor"; +diff --git a/arch/arm/boot/dts/imx6qdl-gw560x.dtsi b/arch/arm/boot/dts/imx6qdl-gw560x.dtsi +index 4bc4371e6bae5..4b81a975c979d 100644 +--- a/arch/arm/boot/dts/imx6qdl-gw560x.dtsi ++++ b/arch/arm/boot/dts/imx6qdl-gw560x.dtsi +@@ -632,7 +632,6 @@ + &uart1 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_uart1>; +- uart-has-rtscts; + rts-gpios = <&gpio7 1 GPIO_ACTIVE_HIGH>; + status = "okay"; + }; +diff --git a/arch/arm/boot/dts/imx6qdl-gw5910.dtsi b/arch/arm/boot/dts/imx6qdl-gw5910.dtsi +index 68e5ab2e27e22..6bb4855d13ce5 100644 +--- a/arch/arm/boot/dts/imx6qdl-gw5910.dtsi ++++ b/arch/arm/boot/dts/imx6qdl-gw5910.dtsi +@@ -29,7 +29,7 @@ + + user-pb { + label = "user_pb"; +- gpios = <&gsc_gpio 0 GPIO_ACTIVE_LOW>; ++ gpios = <&gsc_gpio 2 GPIO_ACTIVE_LOW>; + linux,code = <BTN_0>; + }; + +diff --git a/arch/arm/boot/dts/imx6qdl-gw5913.dtsi b/arch/arm/boot/dts/imx6qdl-gw5913.dtsi +index 8e23cec7149e5..696427b487f01 100644 +--- a/arch/arm/boot/dts/imx6qdl-gw5913.dtsi ++++ b/arch/arm/boot/dts/imx6qdl-gw5913.dtsi +@@ -26,7 +26,7 @@ + + user-pb { + label = "user_pb"; +- gpios = <&gsc_gpio 0 GPIO_ACTIVE_LOW>; ++ gpios = <&gsc_gpio 2 GPIO_ACTIVE_LOW>; + linux,code = <BTN_0>; + }; + diff --git a/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi b/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi index b167b33bd108d..683f6e58ab230 100644 --- a/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi @@ -5867,6 +7333,19 @@ index 2a449a3c1ae27..09a83dbdf6510 100644 compatible = "mxicy,mx25v8035f", "jedec,spi-nor"; spi-max-frequency = <50000000>; reg = <0>; +diff --git a/arch/arm/boot/dts/imx6ul-pico-dwarf.dts b/arch/arm/boot/dts/imx6ul-pico-dwarf.dts +index 162dc259edc8c..5a74c7f68eb62 100644 +--- a/arch/arm/boot/dts/imx6ul-pico-dwarf.dts ++++ b/arch/arm/boot/dts/imx6ul-pico-dwarf.dts +@@ -32,7 +32,7 @@ + }; + + &i2c2 { +- clock_frequency = <100000>; ++ clock-frequency = <100000>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_i2c2>; + status = "okay"; diff --git a/arch/arm/boot/dts/imx6ul.dtsi b/arch/arm/boot/dts/imx6ul.dtsi index afeec01f65228..eca8bf89ab88f 100644 --- a/arch/arm/boot/dts/imx6ul.dtsi @@ -6049,6 +7528,28 @@ index e0751e6ba3c0f..a31de900139d6 100644 clock-names = "mclk"; wlf,shared-lrclk; }; +diff --git a/arch/arm/boot/dts/imx7d-pico-dwarf.dts b/arch/arm/boot/dts/imx7d-pico-dwarf.dts +index 5162fe227d1ea..fdc10563f1473 100644 +--- a/arch/arm/boot/dts/imx7d-pico-dwarf.dts ++++ b/arch/arm/boot/dts/imx7d-pico-dwarf.dts +@@ -32,7 +32,7 @@ + }; + + &i2c1 { +- clock_frequency = <100000>; ++ clock-frequency = <100000>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_i2c1>; + status = "okay"; +@@ -52,7 +52,7 @@ + }; + + &i2c4 { +- clock_frequency = <100000>; ++ clock-frequency = <100000>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_i2c1>; + status = "okay"; diff --git a/arch/arm/boot/dts/imx7d-pico-hobbit.dts b/arch/arm/boot/dts/imx7d-pico-hobbit.dts index 7b2198a9372c6..d917dc4f2f227 100644 --- a/arch/arm/boot/dts/imx7d-pico-hobbit.dts @@ -6071,6 +7572,28 @@ index 7b2198a9372c6..d917dc4f2f227 100644 VDDA-supply = <®_2p5v>; VDDIO-supply = <®_vref_1v8>; }; +diff --git a/arch/arm/boot/dts/imx7d-pico-nymph.dts b/arch/arm/boot/dts/imx7d-pico-nymph.dts +index 104a85254adbb..5afb1674e0125 100644 +--- a/arch/arm/boot/dts/imx7d-pico-nymph.dts ++++ b/arch/arm/boot/dts/imx7d-pico-nymph.dts +@@ -43,7 +43,7 @@ + }; + + &i2c1 { +- clock_frequency = <100000>; ++ clock-frequency = <100000>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_i2c1>; + status = "okay"; +@@ -64,7 +64,7 @@ + }; + + &i2c2 { +- clock_frequency = <100000>; ++ clock-frequency = <100000>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_i2c2>; + status = "okay"; diff --git a/arch/arm/boot/dts/imx7d-pico-pi.dts b/arch/arm/boot/dts/imx7d-pico-pi.dts index 70bea95c06d83..f263e391e24cb 100644 --- a/arch/arm/boot/dts/imx7d-pico-pi.dts @@ -6557,6 +8080,116 @@ index f5f070a874823..764832ddfa78a 100644 reg = <0x98e00000 0x5C>; interrupts = <5 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clk_apb>; +diff --git a/arch/arm/boot/dts/nuvoton-npcm730-gbs.dts b/arch/arm/boot/dts/nuvoton-npcm730-gbs.dts +index eb6eb21cb2a44..33c8d5b3d679a 100644 +--- a/arch/arm/boot/dts/nuvoton-npcm730-gbs.dts ++++ b/arch/arm/boot/dts/nuvoton-npcm730-gbs.dts +@@ -366,7 +366,7 @@ + spi-max-frequency = <20000000>; + spi-rx-bus-width = <2>; + label = "bmc"; +- partitions@80000000 { ++ partitions { + compatible = "fixed-partitions"; + #address-cells = <1>; + #size-cells = <1>; +diff --git a/arch/arm/boot/dts/nuvoton-npcm730-gsj.dts b/arch/arm/boot/dts/nuvoton-npcm730-gsj.dts +index d4ff49939a3d9..bbe18618f5c56 100644 +--- a/arch/arm/boot/dts/nuvoton-npcm730-gsj.dts ++++ b/arch/arm/boot/dts/nuvoton-npcm730-gsj.dts +@@ -142,7 +142,7 @@ + reg = <0>; + spi-rx-bus-width = <2>; + +- partitions@80000000 { ++ partitions { + compatible = "fixed-partitions"; + #address-cells = <1>; + #size-cells = <1>; +diff --git a/arch/arm/boot/dts/nuvoton-npcm730-kudo.dts b/arch/arm/boot/dts/nuvoton-npcm730-kudo.dts +index 82a104b2a65f1..8e3425cb8e8b9 100644 +--- a/arch/arm/boot/dts/nuvoton-npcm730-kudo.dts ++++ b/arch/arm/boot/dts/nuvoton-npcm730-kudo.dts +@@ -388,7 +388,7 @@ + spi-max-frequency = <5000000>; + spi-rx-bus-width = <2>; + label = "bmc"; +- partitions@80000000 { ++ partitions { + compatible = "fixed-partitions"; + #address-cells = <1>; + #size-cells = <1>; +@@ -422,7 +422,7 @@ + reg = <1>; + spi-max-frequency = <5000000>; + spi-rx-bus-width = <2>; +- partitions@88000000 { ++ partitions { + compatible = "fixed-partitions"; + #address-cells = <1>; + #size-cells = <1>; +@@ -447,7 +447,7 @@ + reg = <0>; + spi-max-frequency = <5000000>; + spi-rx-bus-width = <2>; +- partitions@A0000000 { ++ partitions { + compatible = "fixed-partitions"; + #address-cells = <1>; + #size-cells = <1>; +diff --git a/arch/arm/boot/dts/nuvoton-npcm750-evb.dts b/arch/arm/boot/dts/nuvoton-npcm750-evb.dts +index 0334641f88292..cf274c926711a 100644 +--- a/arch/arm/boot/dts/nuvoton-npcm750-evb.dts ++++ b/arch/arm/boot/dts/nuvoton-npcm750-evb.dts +@@ -74,7 +74,7 @@ + spi-rx-bus-width = <2>; + reg = <0>; + spi-max-frequency = <5000000>; +- partitions@80000000 { ++ partitions { + compatible = "fixed-partitions"; + #address-cells = <1>; + #size-cells = <1>; +@@ -135,7 +135,7 @@ + spi-rx-bus-width = <2>; + reg = <0>; + spi-max-frequency = <5000000>; +- partitions@A0000000 { ++ partitions { + compatible = "fixed-partitions"; + #address-cells = <1>; + #size-cells = <1>; +diff --git a/arch/arm/boot/dts/nuvoton-npcm750-runbmc-olympus.dts b/arch/arm/boot/dts/nuvoton-npcm750-runbmc-olympus.dts +index 767e0ac0df7c5..7fe7efee28acb 100644 +--- a/arch/arm/boot/dts/nuvoton-npcm750-runbmc-olympus.dts ++++ b/arch/arm/boot/dts/nuvoton-npcm750-runbmc-olympus.dts +@@ -107,7 +107,7 @@ + reg = <0>; + spi-rx-bus-width = <2>; + +- partitions@80000000 { ++ partitions { + compatible = "fixed-partitions"; + #address-cells = <1>; + #size-cells = <1>; +@@ -146,7 +146,7 @@ + reg = <1>; + npcm,fiu-rx-bus-width = <2>; + +- partitions@88000000 { ++ partitions { + compatible = "fixed-partitions"; + #address-cells = <1>; + #size-cells = <1>; +@@ -173,7 +173,7 @@ + reg = <0>; + spi-rx-bus-width = <2>; + +- partitions@A0000000 { ++ partitions { + compatible = "fixed-partitions"; + #address-cells = <1>; + #size-cells = <1>; diff --git a/arch/arm/boot/dts/omap-gpmc-smsc9221.dtsi b/arch/arm/boot/dts/omap-gpmc-smsc9221.dtsi index 7f6aefd134514..e7534fe9c53cf 100644 --- a/arch/arm/boot/dts/omap-gpmc-smsc9221.dtsi @@ -6926,6 +8559,19 @@ index 90846a7655b49..dde4364892bf0 100644 compatible = "arm,arm11mp-gic"; interrupt-controller; #interrupt-cells = <3>; +diff --git a/arch/arm/boot/dts/qcom-apq8064.dtsi b/arch/arm/boot/dts/qcom-apq8064.dtsi +index d1c1c6aab2b87..0e830476fefd2 100644 +--- a/arch/arm/boot/dts/qcom-apq8064.dtsi ++++ b/arch/arm/boot/dts/qcom-apq8064.dtsi +@@ -1571,7 +1571,7 @@ + }; + + etb@1a01000 { +- compatible = "coresight-etb10", "arm,primecell"; ++ compatible = "arm,coresight-etb10", "arm,primecell"; + reg = <0x1a01000 0x1000>; + + clocks = <&rpmcc RPM_QDSS_CLK>; diff --git a/arch/arm/boot/dts/qcom-ipq4019.dtsi b/arch/arm/boot/dts/qcom-ipq4019.dtsi index ff1bdb10ad198..08bc5f46649dd 100644 --- a/arch/arm/boot/dts/qcom-ipq4019.dtsi @@ -7077,6 +8723,57 @@ index 1e6ce035f76a9..0e76d03087fe5 100644 "imem", "config"; +diff --git a/arch/arm/boot/dts/rk3036-evb.dts b/arch/arm/boot/dts/rk3036-evb.dts +index 2a7e6624efb93..94216f870b57c 100644 +--- a/arch/arm/boot/dts/rk3036-evb.dts ++++ b/arch/arm/boot/dts/rk3036-evb.dts +@@ -31,11 +31,10 @@ + &i2c1 { + status = "okay"; + +- hym8563: hym8563@51 { ++ hym8563: rtc@51 { + compatible = "haoyu,hym8563"; + reg = <0x51>; + #clock-cells = <0>; +- clock-frequency = <32768>; + clock-output-names = "xin32k"; + }; + }; +diff --git a/arch/arm/boot/dts/rk3188-radxarock.dts b/arch/arm/boot/dts/rk3188-radxarock.dts +index 36c0945f43b22..3718fac62841c 100644 +--- a/arch/arm/boot/dts/rk3188-radxarock.dts ++++ b/arch/arm/boot/dts/rk3188-radxarock.dts +@@ -71,7 +71,7 @@ + #sound-dai-cells = <0>; + }; + +- ir_recv: gpio-ir-receiver { ++ ir_recv: ir-receiver { + compatible = "gpio-ir-receiver"; + gpios = <&gpio0 RK_PB2 GPIO_ACTIVE_LOW>; + pinctrl-names = "default"; +diff --git a/arch/arm/boot/dts/rk3188.dtsi b/arch/arm/boot/dts/rk3188.dtsi +index 2c606494b78c4..e07b1d79c470a 100644 +--- a/arch/arm/boot/dts/rk3188.dtsi ++++ b/arch/arm/boot/dts/rk3188.dtsi +@@ -378,7 +378,7 @@ + rockchip,pins = <2 RK_PD3 1 &pcfg_pull_none>; + }; + +- lcdc1_rgb24: ldcd1-rgb24 { ++ lcdc1_rgb24: lcdc1-rgb24 { + rockchip,pins = <2 RK_PA0 1 &pcfg_pull_none>, + <2 RK_PA1 1 &pcfg_pull_none>, + <2 RK_PA2 1 &pcfg_pull_none>, +@@ -606,7 +606,6 @@ + + &global_timer { + interrupts = <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_EDGE_RISING)>; +- status = "disabled"; + }; + + &local_timer { diff --git a/arch/arm/boot/dts/rk322x.dtsi b/arch/arm/boot/dts/rk322x.dtsi index 75af99c76d7ea..f31cf1df892b2 100644 --- a/arch/arm/boot/dts/rk322x.dtsi @@ -7092,8 +8789,84 @@ index 75af99c76d7ea..f31cf1df892b2 100644 pinctrl-names = "default"; pinctrl-0 = <&hdmii2c_xfer &hdmi_hpd &hdmi_cec>; resets = <&cru SRST_HDMI_P>; +diff --git a/arch/arm/boot/dts/rk3288-evb-act8846.dts b/arch/arm/boot/dts/rk3288-evb-act8846.dts +index be695b8c1f672..8a635c2431274 100644 +--- a/arch/arm/boot/dts/rk3288-evb-act8846.dts ++++ b/arch/arm/boot/dts/rk3288-evb-act8846.dts +@@ -54,7 +54,7 @@ + vin-supply = <&vcc_sys>; + }; + +- hym8563@51 { ++ rtc@51 { + compatible = "haoyu,hym8563"; + reg = <0x51>; + +diff --git a/arch/arm/boot/dts/rk3288-firefly.dtsi b/arch/arm/boot/dts/rk3288-firefly.dtsi +index 7fb582302b326..74ba7e21850a5 100644 +--- a/arch/arm/boot/dts/rk3288-firefly.dtsi ++++ b/arch/arm/boot/dts/rk3288-firefly.dtsi +@@ -233,11 +233,10 @@ + vin-supply = <&vcc_sys>; + }; + +- hym8563: hym8563@51 { ++ hym8563: rtc@51 { + compatible = "haoyu,hym8563"; + reg = <0x51>; + #clock-cells = <0>; +- clock-frequency = <32768>; + clock-output-names = "xin32k"; + interrupt-parent = <&gpio7>; + interrupts = <RK_PA4 IRQ_TYPE_EDGE_FALLING>; +diff --git a/arch/arm/boot/dts/rk3288-miqi.dts b/arch/arm/boot/dts/rk3288-miqi.dts +index 713f55e143c69..db1eb648e0e1a 100644 +--- a/arch/arm/boot/dts/rk3288-miqi.dts ++++ b/arch/arm/boot/dts/rk3288-miqi.dts +@@ -162,11 +162,10 @@ + vin-supply = <&vcc_sys>; + }; + +- hym8563: hym8563@51 { ++ hym8563: rtc@51 { + compatible = "haoyu,hym8563"; + reg = <0x51>; + #clock-cells = <0>; +- clock-frequency = <32768>; + clock-output-names = "xin32k"; + }; + +diff --git a/arch/arm/boot/dts/rk3288-rock2-square.dts b/arch/arm/boot/dts/rk3288-rock2-square.dts +index c4d1d142d8c68..bc44606ca05d8 100644 +--- a/arch/arm/boot/dts/rk3288-rock2-square.dts ++++ b/arch/arm/boot/dts/rk3288-rock2-square.dts +@@ -165,11 +165,10 @@ + }; + + &i2c0 { +- hym8563: hym8563@51 { ++ hym8563: rtc@51 { + compatible = "haoyu,hym8563"; + reg = <0x51>; + #clock-cells = <0>; +- clock-frequency = <32768>; + clock-output-names = "xin32k"; + interrupt-parent = <&gpio0>; + interrupts = <RK_PA4 IRQ_TYPE_EDGE_FALLING>; +diff --git a/arch/arm/boot/dts/rk3288-vmarc-som.dtsi b/arch/arm/boot/dts/rk3288-vmarc-som.dtsi +index 0ae2bd150e372..793951655b73b 100644 +--- a/arch/arm/boot/dts/rk3288-vmarc-som.dtsi ++++ b/arch/arm/boot/dts/rk3288-vmarc-som.dtsi +@@ -241,7 +241,6 @@ + interrupt-parent = <&gpio5>; + interrupts = <RK_PC3 IRQ_TYPE_LEVEL_LOW>; + #clock-cells = <0>; +- clock-frequency = <32768>; + clock-output-names = "hym8563"; + pinctrl-names = "default"; + pinctrl-0 = <&hym8563_int>; diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi -index 4dcdcf17c9776..66ff5db53c5a9 100644 +index 4dcdcf17c9776..8670c948ca8da 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -971,7 +971,7 @@ @@ -7105,6 +8878,32 @@ index 4dcdcf17c9776..66ff5db53c5a9 100644 compatible = "rockchip,rk3288-crypto"; reg = <0x0 0xff8a0000 0x0 0x4000>; interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>; +@@ -1180,6 +1180,7 @@ + clock-names = "dp", "pclk"; + phys = <&edp_phy>; + phy-names = "dp"; ++ power-domains = <&power RK3288_PD_VIO>; + resets = <&cru SRST_EDP>; + reset-names = "dp"; + rockchip,grf = <&grf>; +diff --git a/arch/arm/boot/dts/rk3xxx.dtsi b/arch/arm/boot/dts/rk3xxx.dtsi +index 616a828e0c6e4..17e89d30de781 100644 +--- a/arch/arm/boot/dts/rk3xxx.dtsi ++++ b/arch/arm/boot/dts/rk3xxx.dtsi +@@ -76,6 +76,13 @@ + reg = <0x1013c200 0x20>; + interrupts = <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_EDGE_RISING)>; + clocks = <&cru CORE_PERI>; ++ status = "disabled"; ++ /* The clock source and the sched_clock provided by the arm_global_timer ++ * on Rockchip rk3066a/rk3188 are quite unstable because their rates ++ * depend on the CPU frequency. ++ * Keep the arm_global_timer disabled in order to have the ++ * DW_APB_TIMER (rk3066a) or ROCKCHIP_TIMER (rk3188) selected by default. ++ */ + }; + + local_timer: local-timer@1013c600 { diff --git a/arch/arm/boot/dts/s5pv210-aries.dtsi b/arch/arm/boot/dts/s5pv210-aries.dtsi index 160f8cd9a68da..b6d55a782c208 100644 --- a/arch/arm/boot/dts/s5pv210-aries.dtsi @@ -7172,6 +8971,19 @@ index 353ba7b09a0c0..c5265f3ae31d6 100644 clock-names = "iis", "i2s_opclk0"; clocks = <&clocks CLK_I2S2>, <&clocks SCLK_AUDIO2>; pinctrl-names = "default"; +diff --git a/arch/arm/boot/dts/sam9x60.dtsi b/arch/arm/boot/dts/sam9x60.dtsi +index ec45ced3cde68..e1e0dec8cc1f2 100644 +--- a/arch/arm/boot/dts/sam9x60.dtsi ++++ b/arch/arm/boot/dts/sam9x60.dtsi +@@ -567,7 +567,7 @@ + mpddrc: mpddrc@ffffe800 { + compatible = "microchip,sam9x60-ddramc", "atmel,sama5d3-ddramc"; + reg = <0xffffe800 0x200>; +- clocks = <&pmc PMC_TYPE_SYSTEM 2>, <&pmc PMC_TYPE_CORE PMC_MCK>; ++ clocks = <&pmc PMC_TYPE_SYSTEM 2>, <&pmc PMC_TYPE_PERIPHERAL 49>; + clock-names = "ddrck", "mpddr"; + }; + diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi index 801969c113d64..4c87c2aa8fc86 100644 --- a/arch/arm/boot/dts/sama5d2.dtsi @@ -7195,9 +9007,18 @@ index 801969c113d64..4c87c2aa8fc86 100644 }; diff --git a/arch/arm/boot/dts/sama7g5-pinfunc.h b/arch/arm/boot/dts/sama7g5-pinfunc.h -index 22fe9e522a97b..4eb30445d2057 100644 +index 22fe9e522a97b..6e87f0d4b8fce 100644 --- a/arch/arm/boot/dts/sama7g5-pinfunc.h +++ b/arch/arm/boot/dts/sama7g5-pinfunc.h +@@ -261,7 +261,7 @@ + #define PIN_PB2__FLEXCOM6_IO0 PINMUX_PIN(PIN_PB2, 2, 1) + #define PIN_PB2__ADTRG PINMUX_PIN(PIN_PB2, 3, 1) + #define PIN_PB2__A20 PINMUX_PIN(PIN_PB2, 4, 1) +-#define PIN_PB2__FLEXCOM11_IO0 PINMUX_PIN(PIN_PB2, 6, 3) ++#define PIN_PB2__FLEXCOM11_IO1 PINMUX_PIN(PIN_PB2, 6, 3) + #define PIN_PB3 35 + #define PIN_PB3__GPIO PINMUX_PIN(PIN_PB3, 0, 0) + #define PIN_PB3__RF1 PINMUX_PIN(PIN_PB3, 1, 1) @@ -765,7 +765,7 @@ #define PIN_PD20__PCK0 PINMUX_PIN(PIN_PD20, 1, 3) #define PIN_PD20__FLEXCOM2_IO3 PINMUX_PIN(PIN_PD20, 2, 2) @@ -7424,6 +9245,19 @@ index c87b881b2c8bb..9135533676879 100644 }; rtc@e0580000 { +diff --git a/arch/arm/boot/dts/spear600.dtsi b/arch/arm/boot/dts/spear600.dtsi +index fd41243a0b2c0..9d5a04a46b14e 100644 +--- a/arch/arm/boot/dts/spear600.dtsi ++++ b/arch/arm/boot/dts/spear600.dtsi +@@ -47,7 +47,7 @@ + compatible = "arm,pl110", "arm,primecell"; + reg = <0xfc200000 0x1000>; + interrupt-parent = <&vic1>; +- interrupts = <12>; ++ interrupts = <13>; + status = "disabled"; + }; + diff --git a/arch/arm/boot/dts/ste-ux500-samsung-codina.dts b/arch/arm/boot/dts/ste-ux500-samsung-codina.dts index 952606e607ed6..ce62ba877da12 100644 --- a/arch/arm/boot/dts/ste-ux500-samsung-codina.dts @@ -7649,10 +9483,31 @@ index bd289bf5d2690..e0d4833187988 100644 resets = <&rcc USBH_R>; interrupts = <GIC_SPI 75 IRQ_TYPE_LEVEL_HIGH>; companion = <&usbh_ohci>; +diff --git a/arch/arm/boot/dts/stm32mp157a-dhcor-avenger96.dts b/arch/arm/boot/dts/stm32mp157a-dhcor-avenger96.dts +index 2e3c9fbb4eb36..275167f26fd9d 100644 +--- a/arch/arm/boot/dts/stm32mp157a-dhcor-avenger96.dts ++++ b/arch/arm/boot/dts/stm32mp157a-dhcor-avenger96.dts +@@ -13,7 +13,6 @@ + /dts-v1/; + + #include "stm32mp157.dtsi" +-#include "stm32mp15xc.dtsi" + #include "stm32mp15xx-dhcor-som.dtsi" + #include "stm32mp15xx-dhcor-avenger96.dtsi" + diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcor-avenger96.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcor-avenger96.dtsi -index 6885948f3024e..8eb51d84b6988 100644 +index 6885948f3024e..d3375ad8c91fc 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcor-avenger96.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcor-avenger96.dtsi +@@ -100,7 +100,7 @@ + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + +- gpios = <&gpioz 3 GPIO_ACTIVE_HIGH>; ++ gpio = <&gpioz 3 GPIO_ACTIVE_HIGH>; + enable-active-high; + }; + }; @@ -141,6 +141,7 @@ compatible = "snps,dwmac-mdio"; reset-gpios = <&gpioz 2 GPIO_ACTIVE_LOW>; @@ -7955,6 +9810,32 @@ index e81e5937a60ae..03301ddb3403a 100644 pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb1>, <&pinctrl_usb3>; clock-names = "ref", "bus_early", "suspend"; +diff --git a/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts b/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts +index 043ddd70372f0..36d5299b2baa8 100644 +--- a/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts ++++ b/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts +@@ -343,7 +343,7 @@ + }; + + &i2c2 { +- tca9548@70 { ++ i2c-mux@70 { + compatible = "nxp,pca9548"; + pinctrl-0 = <&pinctrl_i2c_mux_reset>; + pinctrl-names = "default"; +diff --git a/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts b/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts +index de79dcfd32e62..ba2001f373158 100644 +--- a/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts ++++ b/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts +@@ -340,7 +340,7 @@ + }; + + &i2c2 { +- tca9548@70 { ++ i2c-mux@70 { + compatible = "nxp,pca9548"; + pinctrl-0 = <&pinctrl_i2c_mux_reset>; + pinctrl-names = "default"; diff --git a/arch/arm/configs/cm_x300_defconfig b/arch/arm/configs/cm_x300_defconfig index 502a9d870ca44..45769d0ddd4ef 100644 --- a/arch/arm/configs/cm_x300_defconfig @@ -8387,6 +10268,70 @@ index 92282558caf7c..2b8970d8e5a2f 100644 MT_ROM, MT_MEMORY_RWX_NONCACHED, MT_MEMORY_RW_DTCM, +diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h +index fe87397c3d8c6..bdbc1e590891e 100644 +--- a/arch/arm/include/asm/perf_event.h ++++ b/arch/arm/include/asm/perf_event.h +@@ -17,7 +17,7 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); + + #define perf_arch_fetch_caller_regs(regs, __ip) { \ + (regs)->ARM_pc = (__ip); \ +- (regs)->ARM_fp = (unsigned long) __builtin_frame_address(0); \ ++ frame_pointer((regs)) = (unsigned long) __builtin_frame_address(0); \ + (regs)->ARM_sp = current_stack_pointer; \ + (regs)->ARM_cpsr = SVC_MODE; \ + } +diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h +index d16aba48fa0a4..090011394477f 100644 +--- a/arch/arm/include/asm/pgtable-nommu.h ++++ b/arch/arm/include/asm/pgtable-nommu.h +@@ -44,12 +44,6 @@ + + typedef pte_t *pte_addr_t; + +-/* +- * ZERO_PAGE is a global shared page that is always zero: used +- * for zero-mapped memory areas etc.. +- */ +-#define ZERO_PAGE(vaddr) (virt_to_page(0)) +- + /* + * Mark the prot value as uncacheable and unbufferable. + */ +diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h +index cd1f84bb40aea..a25c4303fc0e6 100644 +--- a/arch/arm/include/asm/pgtable.h ++++ b/arch/arm/include/asm/pgtable.h +@@ -10,6 +10,15 @@ + #include <linux/const.h> + #include <asm/proc-fns.h> + ++#ifndef __ASSEMBLY__ ++/* ++ * ZERO_PAGE is a global shared page that is always zero: used ++ * for zero-mapped memory areas etc.. ++ */ ++extern struct page *empty_zero_page; ++#define ZERO_PAGE(vaddr) (empty_zero_page) ++#endif ++ + #ifndef CONFIG_MMU + + #include <asm-generic/pgtable-nopud.h> +@@ -156,13 +165,6 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + #define __S111 __PAGE_SHARED_EXEC + + #ifndef __ASSEMBLY__ +-/* +- * ZERO_PAGE is a global shared page that is always zero: used +- * for zero-mapped memory areas etc.. +- */ +-extern struct page *empty_zero_page; +-#define ZERO_PAGE(vaddr) (empty_zero_page) +- + + extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; + diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h index 9e6b972863077..8aeff55aebfaf 100644 --- a/arch/arm/include/asm/processor.h @@ -8495,6 +10440,33 @@ index 0000000000000..85f9e538fb325 +int spectre_bhb_update_vectors(unsigned int method); + +#endif +diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h +index 9a18da3e10cc3..b682189a2b5df 100644 +--- a/arch/arm/include/asm/thread_info.h ++++ b/arch/arm/include/asm/thread_info.h +@@ -129,15 +129,16 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, + #define TIF_NEED_RESCHED 1 /* rescheduling necessary */ + #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ + #define TIF_UPROBE 3 /* breakpointed or singlestepping */ +-#define TIF_SYSCALL_TRACE 4 /* syscall trace active */ +-#define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */ +-#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ +-#define TIF_SECCOMP 7 /* seccomp syscall filtering active */ +-#define TIF_NOTIFY_SIGNAL 8 /* signal notifications exist */ ++#define TIF_NOTIFY_SIGNAL 4 /* signal notifications exist */ + + #define TIF_USING_IWMMXT 17 + #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ +-#define TIF_RESTORE_SIGMASK 20 ++#define TIF_RESTORE_SIGMASK 19 ++#define TIF_SYSCALL_TRACE 20 /* syscall trace active */ ++#define TIF_SYSCALL_AUDIT 21 /* syscall auditing active */ ++#define TIF_SYSCALL_TRACEPOINT 22 /* syscall tracepoint instrumentation */ ++#define TIF_SECCOMP 23 /* seccomp syscall filtering active */ ++ + + #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) + #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) diff --git a/arch/arm/include/asm/timex.h b/arch/arm/include/asm/timex.h index 7c3b3671d6c25..6d1337c169cd3 100644 --- a/arch/arm/include/asm/timex.h @@ -9238,7 +11210,7 @@ index 6166ba38bf994..b74bfcf94fb1a 100644 (void *)address); res = -EFAULT; diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c -index 195dff58bafc7..54abd8720ddef 100644 +index 195dff58bafc7..91e757bb054e6 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -30,6 +30,7 @@ @@ -9249,6 +11221,15 @@ index 195dff58bafc7..54abd8720ddef 100644 #include <asm/unistd.h> #include <asm/traps.h> #include <asm/ptrace.h> +@@ -333,7 +334,7 @@ static void oops_end(unsigned long flags, struct pt_regs *regs, int signr) + if (panic_on_oops) + panic("Fatal exception"); + if (signr) +- do_exit(signr); ++ make_task_dead(signr); + } + + /* @@ -574,7 +575,7 @@ do_cache_op(unsigned long start, unsigned long end, int flags) if (end < start || flags) return -EINVAL; @@ -9504,17 +11485,26 @@ index 8711d6824c1fa..c8cc993ca8ca1 100644 return 0; diff --git a/arch/arm/mach-at91/pm_suspend.S b/arch/arm/mach-at91/pm_suspend.S -index fdb4f63ecde4b..65cfcc19a936c 100644 +index fdb4f63ecde4b..2f0a370a13096 100644 --- a/arch/arm/mach-at91/pm_suspend.S +++ b/arch/arm/mach-at91/pm_suspend.S -@@ -172,9 +172,15 @@ sr_ena_2: - /* Put DDR PHY's DLL in bypass mode for non-backup modes. */ +@@ -169,12 +169,23 @@ sr_ena_2: + cmp tmp1, #UDDRC_STAT_SELFREF_TYPE_SW + bne sr_ena_2 + +- /* Put DDR PHY's DLL in bypass mode for non-backup modes. */ ++ /* Disable DX DLLs for non-backup modes. */ cmp r7, #AT91_PM_BACKUP beq sr_ena_3 - ldr tmp1, [r3, #DDR3PHY_PIR] - orr tmp1, tmp1, #DDR3PHY_PIR_DLLBYP - str tmp1, [r3, #DDR3PHY_PIR] + ++ /* Do not soft reset the AC DLL. */ ++ ldr tmp1, [r3, DDR3PHY_ACDLLCR] ++ bic tmp1, tmp1, DDR3PHY_ACDLLCR_DLLSRST ++ str tmp1, [r3, DDR3PHY_ACDLLCR] ++ + /* Disable DX DLLs. */ + ldr tmp1, [r3, #DDR3PHY_DX0DLLCR] + orr tmp1, tmp1, #DDR3PHY_DXDLLCR_DLLDIS @@ -9526,7 +11516,7 @@ index fdb4f63ecde4b..65cfcc19a936c 100644 sr_ena_3: /* Power down DDR PHY data receivers. */ -@@ -221,10 +227,14 @@ sr_ena_3: +@@ -221,10 +232,14 @@ sr_ena_3: bic tmp1, tmp1, #DDR3PHY_DSGCR_ODTPDD_ODT0 str tmp1, [r3, #DDR3PHY_DSGCR] @@ -9662,6 +11652,66 @@ index a56cc64deeb8f..9ce93e0b6cdc3 100644 /* set the secondary core boot from DDR */ remap_reg_value = readl_relaxed(ctrl_base + REG_SC_CTRL); +diff --git a/arch/arm/mach-imx/cpu-imx25.c b/arch/arm/mach-imx/cpu-imx25.c +index b2e1963f473de..2ee2d2813d577 100644 +--- a/arch/arm/mach-imx/cpu-imx25.c ++++ b/arch/arm/mach-imx/cpu-imx25.c +@@ -23,6 +23,7 @@ static int mx25_read_cpu_rev(void) + + np = of_find_compatible_node(NULL, NULL, "fsl,imx25-iim"); + iim_base = of_iomap(np, 0); ++ of_node_put(np); + BUG_ON(!iim_base); + rev = readl(iim_base + MXC_IIMSREV); + iounmap(iim_base); +diff --git a/arch/arm/mach-imx/cpu-imx27.c b/arch/arm/mach-imx/cpu-imx27.c +index bf70e13bbe9ee..1d28939083683 100644 +--- a/arch/arm/mach-imx/cpu-imx27.c ++++ b/arch/arm/mach-imx/cpu-imx27.c +@@ -28,6 +28,7 @@ static int mx27_read_cpu_rev(void) + + np = of_find_compatible_node(NULL, NULL, "fsl,imx27-ccm"); + ccm_base = of_iomap(np, 0); ++ of_node_put(np); + BUG_ON(!ccm_base); + /* + * now we have access to the IO registers. As we need +diff --git a/arch/arm/mach-imx/cpu-imx31.c b/arch/arm/mach-imx/cpu-imx31.c +index b9c24b851d1ab..35c544924e509 100644 +--- a/arch/arm/mach-imx/cpu-imx31.c ++++ b/arch/arm/mach-imx/cpu-imx31.c +@@ -39,6 +39,7 @@ static int mx31_read_cpu_rev(void) + + np = of_find_compatible_node(NULL, NULL, "fsl,imx31-iim"); + iim_base = of_iomap(np, 0); ++ of_node_put(np); + BUG_ON(!iim_base); + + /* read SREV register from IIM module */ +diff --git a/arch/arm/mach-imx/cpu-imx35.c b/arch/arm/mach-imx/cpu-imx35.c +index 80e7d8ab9f1bb..1fe75b39c2d99 100644 +--- a/arch/arm/mach-imx/cpu-imx35.c ++++ b/arch/arm/mach-imx/cpu-imx35.c +@@ -21,6 +21,7 @@ static int mx35_read_cpu_rev(void) + + np = of_find_compatible_node(NULL, NULL, "fsl,imx35-iim"); + iim_base = of_iomap(np, 0); ++ of_node_put(np); + BUG_ON(!iim_base); + + rev = imx_readl(iim_base + MXC_IIMSREV); +diff --git a/arch/arm/mach-imx/cpu-imx5.c b/arch/arm/mach-imx/cpu-imx5.c +index ad56263778f93..a67c89bf155dd 100644 +--- a/arch/arm/mach-imx/cpu-imx5.c ++++ b/arch/arm/mach-imx/cpu-imx5.c +@@ -28,6 +28,7 @@ static u32 imx5_read_srev_reg(const char *compat) + + np = of_find_compatible_node(NULL, NULL, compat); + iim_base = of_iomap(np, 0); ++ of_node_put(np); + WARN_ON(!iim_base); + + srev = readl(iim_base + IIM_SREV) & 0xff; diff --git a/arch/arm/mach-iop32x/include/mach/entry-macro.S b/arch/arm/mach-iop32x/include/mach/entry-macro.S index 8e6766d4621eb..341e5d9a6616d 100644 --- a/arch/arm/mach-iop32x/include/mach/entry-macro.S @@ -9872,6 +11922,36 @@ index 6794e2db1ad5f..ecc46c31004f6 100644 return 0; } +diff --git a/arch/arm/mach-mmp/time.c b/arch/arm/mach-mmp/time.c +index 41b2e8abc9e69..708816caf859c 100644 +--- a/arch/arm/mach-mmp/time.c ++++ b/arch/arm/mach-mmp/time.c +@@ -43,18 +43,21 @@ + static void __iomem *mmp_timer_base = TIMERS_VIRT_BASE; + + /* +- * FIXME: the timer needs some delay to stablize the counter capture ++ * Read the timer through the CVWR register. Delay is required after requesting ++ * a read. The CR register cannot be directly read due to metastability issues ++ * documented in the PXA168 software manual. + */ + static inline uint32_t timer_read(void) + { +- int delay = 100; ++ uint32_t val; ++ int delay = 3; + + __raw_writel(1, mmp_timer_base + TMR_CVWR(1)); + + while (delay--) +- cpu_relax(); ++ val = __raw_readl(mmp_timer_base + TMR_CVWR(1)); + +- return __raw_readl(mmp_timer_base + TMR_CVWR(1)); ++ return val; + } + + static u64 notrace mmp_read_sched_clock(void) diff --git a/arch/arm/mach-mstar/Kconfig b/arch/arm/mach-mstar/Kconfig index cd300eeedc206..0bf4d312bcfd9 100644 --- a/arch/arm/mach-mstar/Kconfig @@ -9884,6 +11964,22 @@ index cd300eeedc206..0bf4d312bcfd9 100644 select MST_IRQ select MSTAR_MSC313_MPLL help +diff --git a/arch/arm/mach-mxs/mach-mxs.c b/arch/arm/mach-mxs/mach-mxs.c +index 25c9d184fa4c6..1c57ac4016493 100644 +--- a/arch/arm/mach-mxs/mach-mxs.c ++++ b/arch/arm/mach-mxs/mach-mxs.c +@@ -393,8 +393,10 @@ static void __init mxs_machine_init(void) + + root = of_find_node_by_path("/"); + ret = of_property_read_string(root, "model", &soc_dev_attr->machine); +- if (ret) ++ if (ret) { ++ kfree(soc_dev_attr); + return; ++ } + + soc_dev_attr->family = "Freescale MXS Family"; + soc_dev_attr->soc_id = mxs_get_soc_id(); diff --git a/arch/arm/mach-omap1/clock.c b/arch/arm/mach-omap1/clock.c index 9d4a0ab50a468..d63d5eb8d8fdf 100644 --- a/arch/arm/mach-omap1/clock.c @@ -10321,6 +12417,19 @@ index fb688003d156e..712da6a81b23f 100644 else walk_pte(st, pmd, addr, domain); +diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c +index efa4020250315..af5177801fb10 100644 +--- a/arch/arm/mm/fault.c ++++ b/arch/arm/mm/fault.c +@@ -125,7 +125,7 @@ __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, + show_pte(KERN_ALERT, mm, addr); + die("Oops", regs, fsr); + bust_spinlocks(0); +- do_exit(SIGKILL); ++ make_task_dead(SIGKILL); + } + + /* diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 80fb5a4a5c050..2660bdfcad4d0 100644 --- a/arch/arm/mm/ioremap.c @@ -10462,6 +12571,46 @@ index a4e0060051070..83a91e0ab8480 100644 create_mapping(&map); } +diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c +index 2658f52903da6..80613674deb5b 100644 +--- a/arch/arm/mm/nommu.c ++++ b/arch/arm/mm/nommu.c +@@ -26,6 +26,13 @@ + + unsigned long vectors_base; + ++/* ++ * empty_zero_page is a special page that is used for ++ * zero-initialized data and COW. ++ */ ++struct page *empty_zero_page; ++EXPORT_SYMBOL(empty_zero_page); ++ + #ifdef CONFIG_ARM_MPU + struct mpu_rgn_info mpu_rgn_info; + #endif +@@ -148,9 +155,21 @@ void __init adjust_lowmem_bounds(void) + */ + void __init paging_init(const struct machine_desc *mdesc) + { ++ void *zero_page; ++ + early_trap_init((void *)vectors_base); + mpu_setup(); ++ ++ /* allocate the zero page. */ ++ zero_page = (void *)memblock_alloc(PAGE_SIZE, PAGE_SIZE); ++ if (!zero_page) ++ panic("%s: Failed to allocate %lu bytes align=0x%lx\n", ++ __func__, PAGE_SIZE, PAGE_SIZE); ++ + bootmem_init(); ++ ++ empty_zero_page = virt_to_page(zero_page); ++ flush_dcache_page(empty_zero_page); + } + + /* diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index 114c05ab4dd91..8bc7a2d6d6c7f 100644 --- a/arch/arm/mm/proc-v7-bugs.c @@ -10750,6 +12899,20 @@ index 114c05ab4dd91..8bc7a2d6d6c7f 100644 + cpu_v7_spectre_v2_init(); + cpu_v7_spectre_bhb_init(); } +diff --git a/arch/arm/nwfpe/Makefile b/arch/arm/nwfpe/Makefile +index 303400fa2cdf7..2aec85ab1e8b9 100644 +--- a/arch/arm/nwfpe/Makefile ++++ b/arch/arm/nwfpe/Makefile +@@ -11,3 +11,9 @@ nwfpe-y += fpa11.o fpa11_cpdo.o fpa11_cpdt.o \ + entry.o + + nwfpe-$(CONFIG_FPE_NWFPE_XP) += extended_cpdo.o ++ ++# Try really hard to avoid generating calls to __aeabi_uldivmod() from ++# float64_rem() due to loop elision. ++ifdef CONFIG_CC_IS_CLANG ++CFLAGS_softfloat.o += -mllvm -replexitval=never ++endif diff --git a/arch/arm/probes/decode.h b/arch/arm/probes/decode.h index 9731735989921..facc889d05eee 100644 --- a/arch/arm/probes/decode.h @@ -10806,6 +12969,28 @@ index 14db56f49f0a3..6159010dac4a6 100644 obj-$(CONFIG_KPROBES) += core.o actions-common.o checkers-common.o obj-$(CONFIG_ARM_KPROBES_TEST) += test-kprobes.o test-kprobes-objs := test-core.o +diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c +index 9d8634e2f12f7..9bcae72dda440 100644 +--- a/arch/arm/probes/kprobes/core.c ++++ b/arch/arm/probes/kprobes/core.c +@@ -11,6 +11,8 @@ + * Copyright (C) 2007 Marvell Ltd. + */ + ++#define pr_fmt(fmt) "kprobes: " fmt ++ + #include <linux/kernel.h> + #include <linux/kprobes.h> + #include <linux/module.h> +@@ -278,7 +280,7 @@ void __kprobes kprobe_handler(struct pt_regs *regs) + break; + case KPROBE_REENTER: + /* A nested probe was hit in FIQ, it is a BUG */ +- pr_warn("Unrecoverable kprobe detected.\n"); ++ pr_warn("Failed to recover from reentered kprobes.\n"); + dump_kprobe(p); + fallthrough; + default: diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c index 84a1cea1f43b9..309648c17f486 100644 --- a/arch/arm/xen/p2m.c @@ -11120,8 +13305,30 @@ index d301ac0d406bf..3ec301bd08a91 100644 #address-cells = <1>; #size-cells = <0>; reg = <0xff8d2000 0x100>, +diff --git a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi +index 3f5254eeb47b1..e2ab338adb3c1 100644 +--- a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi ++++ b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi +@@ -1885,7 +1885,7 @@ + sd_emmc_b: sd@5000 { + compatible = "amlogic,meson-axg-mmc"; + reg = <0x0 0x5000 0x0 0x800>; +- interrupts = <GIC_SPI 217 IRQ_TYPE_EDGE_RISING>; ++ interrupts = <GIC_SPI 217 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + clocks = <&clkc CLKID_SD_EMMC_B>, + <&clkc CLKID_SD_EMMC_B_CLK0>, +@@ -1897,7 +1897,7 @@ + sd_emmc_c: mmc@7000 { + compatible = "amlogic,meson-axg-mmc"; + reg = <0x0 0x7000 0x0 0x800>; +- interrupts = <GIC_SPI 218 IRQ_TYPE_EDGE_RISING>; ++ interrupts = <GIC_SPI 218 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + clocks = <&clkc CLKID_SD_EMMC_C>, + <&clkc CLKID_SD_EMMC_C_CLK0>, diff --git a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi -index 00c6f53290d43..a3a1ea0f21340 100644 +index 00c6f53290d43..2526d6e3a3dcb 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi @@ -58,7 +58,7 @@ @@ -11146,6 +13353,33 @@ index 00c6f53290d43..a3a1ea0f21340 100644 linux,cma { compatible = "shared-dma-pool"; reusable; +@@ -2324,7 +2330,7 @@ + sd_emmc_a: sd@ffe03000 { + compatible = "amlogic,meson-axg-mmc"; + reg = <0x0 0xffe03000 0x0 0x800>; +- interrupts = <GIC_SPI 189 IRQ_TYPE_EDGE_RISING>; ++ interrupts = <GIC_SPI 189 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + clocks = <&clkc CLKID_SD_EMMC_A>, + <&clkc CLKID_SD_EMMC_A_CLK0>, +@@ -2336,7 +2342,7 @@ + sd_emmc_b: sd@ffe05000 { + compatible = "amlogic,meson-axg-mmc"; + reg = <0x0 0xffe05000 0x0 0x800>; +- interrupts = <GIC_SPI 190 IRQ_TYPE_EDGE_RISING>; ++ interrupts = <GIC_SPI 190 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + clocks = <&clkc CLKID_SD_EMMC_B>, + <&clkc CLKID_SD_EMMC_B_CLK0>, +@@ -2348,7 +2354,7 @@ + sd_emmc_c: mmc@ffe07000 { + compatible = "amlogic,meson-axg-mmc"; + reg = <0x0 0xffe07000 0x0 0x800>; +- interrupts = <GIC_SPI 191 IRQ_TYPE_EDGE_RISING>; ++ interrupts = <GIC_SPI 191 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + clocks = <&clkc CLKID_SD_EMMC_C>, + <&clkc CLKID_SD_EMMC_C_CLK0>, diff --git a/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts b/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts index 81269ccc24968..4fb31c2ba31c4 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts @@ -11410,7 +13644,7 @@ index feb0885047400..b40d2c1002c92 100644 pwms = <&pwm_AO_cd 1 1250 0>; pwm-dutycycle-range = <100 0>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi -index 6b457b2c30a4b..aa14ea017a613 100644 +index 6b457b2c30a4b..ee623ead972e5 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi @@ -49,6 +49,12 @@ @@ -11426,6 +13660,31 @@ index 6b457b2c30a4b..aa14ea017a613 100644 linux,cma { compatible = "shared-dma-pool"; reusable; +@@ -596,21 +602,21 @@ + sd_emmc_a: mmc@70000 { + compatible = "amlogic,meson-gx-mmc", "amlogic,meson-gxbb-mmc"; + reg = <0x0 0x70000 0x0 0x800>; +- interrupts = <GIC_SPI 216 IRQ_TYPE_EDGE_RISING>; ++ interrupts = <GIC_SPI 216 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + }; + + sd_emmc_b: mmc@72000 { + compatible = "amlogic,meson-gx-mmc", "amlogic,meson-gxbb-mmc"; + reg = <0x0 0x72000 0x0 0x800>; +- interrupts = <GIC_SPI 217 IRQ_TYPE_EDGE_RISING>; ++ interrupts = <GIC_SPI 217 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + }; + + sd_emmc_c: mmc@74000 { + compatible = "amlogic,meson-gx-mmc", "amlogic,meson-gxbb-mmc"; + reg = <0x0 0x74000 0x0 0x800>; +- interrupts = <GIC_SPI 218 IRQ_TYPE_EDGE_RISING>; ++ interrupts = <GIC_SPI 218 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + }; + }; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi index a350fee1264d7..a4d34398da358 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi @@ -11590,7 +13849,7 @@ index 3d8b1f4f2001b..78bdbd2ccc9de 100644 opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <770000>; diff --git a/arch/arm64/boot/dts/arm/juno-base.dtsi b/arch/arm64/boot/dts/arm/juno-base.dtsi -index 6288e104a0893..34e5549ea748a 100644 +index 6288e104a0893..a00b0f14c222f 100644 --- a/arch/arm64/boot/dts/arm/juno-base.dtsi +++ b/arch/arm64/boot/dts/arm/juno-base.dtsi @@ -26,7 +26,8 @@ @@ -11613,6 +13872,33 @@ index 6288e104a0893..34e5549ea748a 100644 <0x43000000 0x8 0x00000000 0x8 0x00000000 0x2 0x00000000>; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 7>; +@@ -597,12 +597,26 @@ + polling-delay = <1000>; + polling-delay-passive = <100>; + thermal-sensors = <&scpi_sensors0 0>; ++ trips { ++ pmic_crit0: trip0 { ++ temperature = <90000>; ++ hysteresis = <2000>; ++ type = "critical"; ++ }; ++ }; + }; + + soc { + polling-delay = <1000>; + polling-delay-passive = <100>; + thermal-sensors = <&scpi_sensors0 3>; ++ trips { ++ soc_crit0: trip0 { ++ temperature = <80000>; ++ hysteresis = <2000>; ++ type = "critical"; ++ }; ++ }; + }; + + big_cluster_thermal_zone: big-cluster { diff --git a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906.dtsi b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906.dtsi index 66023d5535247..d084c33d5ca82 100644 --- a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4906.dtsi @@ -11725,6 +14011,19 @@ index 2cfeaf3b0a876..8c218689fef70 100644 compatible = "brcm,iproc-ahci", "generic-ahci"; reg = <0x663f2000 0x1000>; dma-coherent; +diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1012a-qds.dts b/arch/arm64/boot/dts/freescale/fsl-ls1012a-qds.dts +index e22c5e77fecdc..9615f3b9ee608 100644 +--- a/arch/arm64/boot/dts/freescale/fsl-ls1012a-qds.dts ++++ b/arch/arm64/boot/dts/freescale/fsl-ls1012a-qds.dts +@@ -110,7 +110,7 @@ + &i2c0 { + status = "okay"; + +- pca9547@77 { ++ i2c-mux@77 { + compatible = "nxp,pca9547"; + reg = <0x77>; + #address-cells = <1>; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1012a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-ls1012a-rdb.dts index 79f155dedb2d0..e662677a6e28f 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1012a-rdb.dts @@ -11769,6 +14068,19 @@ index bfd14b64567e4..2f92e62ecafe9 100644 &enetc_port1 { phy-handle = <&qds_phy1>; phy-connection-type = "rgmii-id"; +diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a-qds.dts b/arch/arm64/boot/dts/freescale/fsl-ls1043a-qds.dts +index fea167d222cfe..14856bc79b221 100644 +--- a/arch/arm64/boot/dts/freescale/fsl-ls1043a-qds.dts ++++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a-qds.dts +@@ -70,7 +70,7 @@ + &i2c0 { + status = "okay"; + +- pca9547@77 { ++ i2c-mux@77 { + compatible = "nxp,pca9547"; + reg = <0x77>; + #address-cells = <1>; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi index 01b01e3204118..35d1939e690b0 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi @@ -11786,6 +14098,19 @@ index 01b01e3204118..35d1939e690b0 100644 status = "disabled"; }; +diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a-qds.dts b/arch/arm64/boot/dts/freescale/fsl-ls1046a-qds.dts +index eec62c63dafe2..9ee9928f71b49 100644 +--- a/arch/arm64/boot/dts/freescale/fsl-ls1046a-qds.dts ++++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a-qds.dts +@@ -76,7 +76,7 @@ + &i2c0 { + status = "okay"; + +- pca9547@77 { ++ i2c-mux@77 { + compatible = "nxp,pca9547"; + reg = <0x77>; + #address-cells = <1>; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi index 687fea6d8afa4..4e7bd04d97984 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi @@ -11803,8 +14128,34 @@ index 687fea6d8afa4..4e7bd04d97984 100644 status = "disabled"; }; +diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a-qds.dts b/arch/arm64/boot/dts/freescale/fsl-ls1088a-qds.dts +index 41d8b15f25a54..aa52ff73ff9e0 100644 +--- a/arch/arm64/boot/dts/freescale/fsl-ls1088a-qds.dts ++++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a-qds.dts +@@ -53,7 +53,7 @@ + &i2c0 { + status = "okay"; + +- i2c-switch@77 { ++ i2c-mux@77 { + compatible = "nxp,pca9547"; + reg = <0x77>; + #address-cells = <1>; +diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-ls1088a-rdb.dts +index 1bfbce69cc8b7..ee8e932628d17 100644 +--- a/arch/arm64/boot/dts/freescale/fsl-ls1088a-rdb.dts ++++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a-rdb.dts +@@ -136,7 +136,7 @@ + &i2c0 { + status = "okay"; + +- i2c-switch@77 { ++ i2c-mux@77 { + compatible = "nxp,pca9547"; + reg = <0x77>; + #address-cells = <1>; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a-ten64.dts b/arch/arm64/boot/dts/freescale/fsl-ls1088a-ten64.dts -index 3063851c2fb91..d3f03dcbb8c38 100644 +index 3063851c2fb91..a9c6682a3955e 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1088a-ten64.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a-ten64.dts @@ -38,7 +38,6 @@ @@ -11823,11 +14174,40 @@ index 3063851c2fb91..d3f03dcbb8c38 100644 linux,code = <KEY_WPS_BUTTON>; }; }; +@@ -247,7 +245,7 @@ + &i2c3 { + status = "okay"; + +- i2c-switch@70 { ++ i2c-mux@70 { + compatible = "nxp,pca9540"; + #address-cells = <1>; + #size-cells = <0>; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi -index f85e437f80b73..6050723172436 100644 +index f85e437f80b73..63441028622a6 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi -@@ -847,7 +847,7 @@ +@@ -758,6 +758,9 @@ + little-endian; + #address-cells = <1>; + #size-cells = <0>; ++ clock-frequency = <2500000>; ++ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL ++ QORIQ_CLK_PLL_DIV(1)>; + status = "disabled"; + }; + +@@ -767,6 +770,9 @@ + little-endian; + #address-cells = <1>; + #size-cells = <0>; ++ clock-frequency = <2500000>; ++ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL ++ QORIQ_CLK_PLL_DIV(1)>; + status = "disabled"; + }; + +@@ -847,7 +853,7 @@ }; cluster1_core0_watchdog: wdt@c000000 { @@ -11836,7 +14216,7 @@ index f85e437f80b73..6050723172436 100644 reg = <0x0 0xc000000 0x0 0x1000>; clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL QORIQ_CLK_PLL_DIV(16)>, -@@ -857,7 +857,7 @@ +@@ -857,7 +863,7 @@ }; cluster1_core1_watchdog: wdt@c010000 { @@ -11845,7 +14225,7 @@ index f85e437f80b73..6050723172436 100644 reg = <0x0 0xc010000 0x0 0x1000>; clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL QORIQ_CLK_PLL_DIV(16)>, -@@ -867,7 +867,7 @@ +@@ -867,7 +873,7 @@ }; cluster1_core2_watchdog: wdt@c020000 { @@ -11854,7 +14234,7 @@ index f85e437f80b73..6050723172436 100644 reg = <0x0 0xc020000 0x0 0x1000>; clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL QORIQ_CLK_PLL_DIV(16)>, -@@ -877,7 +877,7 @@ +@@ -877,7 +883,7 @@ }; cluster1_core3_watchdog: wdt@c030000 { @@ -11863,7 +14243,7 @@ index f85e437f80b73..6050723172436 100644 reg = <0x0 0xc030000 0x0 0x1000>; clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL QORIQ_CLK_PLL_DIV(16)>, -@@ -887,7 +887,7 @@ +@@ -887,7 +893,7 @@ }; cluster2_core0_watchdog: wdt@c100000 { @@ -11872,7 +14252,7 @@ index f85e437f80b73..6050723172436 100644 reg = <0x0 0xc100000 0x0 0x1000>; clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL QORIQ_CLK_PLL_DIV(16)>, -@@ -897,7 +897,7 @@ +@@ -897,7 +903,7 @@ }; cluster2_core1_watchdog: wdt@c110000 { @@ -11881,7 +14261,7 @@ index f85e437f80b73..6050723172436 100644 reg = <0x0 0xc110000 0x0 0x1000>; clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL QORIQ_CLK_PLL_DIV(16)>, -@@ -907,7 +907,7 @@ +@@ -907,7 +913,7 @@ }; cluster2_core2_watchdog: wdt@c120000 { @@ -11890,7 +14270,7 @@ index f85e437f80b73..6050723172436 100644 reg = <0x0 0xc120000 0x0 0x1000>; clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL QORIQ_CLK_PLL_DIV(16)>, -@@ -917,7 +917,7 @@ +@@ -917,7 +923,7 @@ }; cluster2_core3_watchdog: wdt@c130000 { @@ -11899,8 +14279,34 @@ index f85e437f80b73..6050723172436 100644 reg = <0x0 0xc130000 0x0 0x1000>; clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL QORIQ_CLK_PLL_DIV(16)>, +diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa-qds.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa-qds.dtsi +index 10d2fe0919651..8d96d18c3697a 100644 +--- a/arch/arm64/boot/dts/freescale/fsl-ls208xa-qds.dtsi ++++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa-qds.dtsi +@@ -44,7 +44,7 @@ + + &i2c0 { + status = "okay"; +- pca9547@77 { ++ i2c-mux@77 { + compatible = "nxp,pca9547"; + reg = <0x77>; + #address-cells = <1>; +diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa-rdb.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa-rdb.dtsi +index 4b71c4fcb35f6..787e408da0024 100644 +--- a/arch/arm64/boot/dts/freescale/fsl-ls208xa-rdb.dtsi ++++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa-rdb.dtsi +@@ -44,7 +44,7 @@ + + &i2c0 { + status = "okay"; +- pca9547@75 { ++ i2c-mux@75 { + compatible = "nxp,pca9547"; + reg = <0x75>; + #address-cells = <1>; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi -index 801ba9612d361..1282b61da8a55 100644 +index 801ba9612d361..12e59777363fe 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi @@ -387,7 +387,7 @@ @@ -11975,8 +14381,41 @@ index 801ba9612d361..1282b61da8a55 100644 reg = <0x0 0xc310000 0x0 0x1000>; clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL QORIQ_CLK_PLL_DIV(4)>, +@@ -525,6 +525,9 @@ + little-endian; + #address-cells = <1>; + #size-cells = <0>; ++ clock-frequency = <2500000>; ++ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL ++ QORIQ_CLK_PLL_DIV(2)>; + status = "disabled"; + }; + +@@ -534,6 +537,9 @@ + little-endian; + #address-cells = <1>; + #size-cells = <0>; ++ clock-frequency = <2500000>; ++ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL ++ QORIQ_CLK_PLL_DIV(2)>; + status = "disabled"; + }; + +diff --git a/arch/arm64/boot/dts/freescale/fsl-lx2160a-cex7.dtsi b/arch/arm64/boot/dts/freescale/fsl-lx2160a-cex7.dtsi +index afb455210bd07..d32a52ab00a42 100644 +--- a/arch/arm64/boot/dts/freescale/fsl-lx2160a-cex7.dtsi ++++ b/arch/arm64/boot/dts/freescale/fsl-lx2160a-cex7.dtsi +@@ -54,7 +54,7 @@ + &i2c0 { + status = "okay"; + +- i2c-switch@77 { ++ i2c-mux@77 { + compatible = "nxp,pca9547"; + #address-cells = <1>; + #size-cells = <0>; diff --git a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi -index c4b1a59ba424b..51c4f61007cdb 100644 +index c4b1a59ba424b..1bc7f538f6905 100644 --- a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi @@ -719,7 +719,7 @@ @@ -11997,10 +14436,82 @@ index c4b1a59ba424b..51c4f61007cdb 100644 status = "disabled"; }; +@@ -1369,6 +1369,9 @@ + #address-cells = <1>; + #size-cells = <0>; + little-endian; ++ clock-frequency = <2500000>; ++ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL ++ QORIQ_CLK_PLL_DIV(2)>; + status = "disabled"; + }; + +@@ -1379,6 +1382,9 @@ + little-endian; + #address-cells = <1>; + #size-cells = <0>; ++ clock-frequency = <2500000>; ++ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL ++ QORIQ_CLK_PLL_DIV(2)>; + status = "disabled"; + }; + +diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi +index a79f42a9618ec..639220dbff008 100644 +--- a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi ++++ b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi +@@ -38,9 +38,9 @@ conn_subsys: bus@5b000000 { + interrupts = <GIC_SPI 232 IRQ_TYPE_LEVEL_HIGH>; + reg = <0x5b010000 0x10000>; + clocks = <&sdhc0_lpcg IMX_LPCG_CLK_4>, +- <&sdhc0_lpcg IMX_LPCG_CLK_5>, +- <&sdhc0_lpcg IMX_LPCG_CLK_0>; +- clock-names = "ipg", "per", "ahb"; ++ <&sdhc0_lpcg IMX_LPCG_CLK_0>, ++ <&sdhc0_lpcg IMX_LPCG_CLK_5>; ++ clock-names = "ipg", "ahb", "per"; + power-domains = <&pd IMX_SC_R_SDHC_0>; + status = "disabled"; + }; +@@ -49,9 +49,9 @@ conn_subsys: bus@5b000000 { + interrupts = <GIC_SPI 233 IRQ_TYPE_LEVEL_HIGH>; + reg = <0x5b020000 0x10000>; + clocks = <&sdhc1_lpcg IMX_LPCG_CLK_4>, +- <&sdhc1_lpcg IMX_LPCG_CLK_5>, +- <&sdhc1_lpcg IMX_LPCG_CLK_0>; +- clock-names = "ipg", "per", "ahb"; ++ <&sdhc1_lpcg IMX_LPCG_CLK_0>, ++ <&sdhc1_lpcg IMX_LPCG_CLK_5>; ++ clock-names = "ipg", "ahb", "per"; + power-domains = <&pd IMX_SC_R_SDHC_1>; + fsl,tuning-start-tap = <20>; + fsl,tuning-step= <2>; +@@ -62,9 +62,9 @@ conn_subsys: bus@5b000000 { + interrupts = <GIC_SPI 234 IRQ_TYPE_LEVEL_HIGH>; + reg = <0x5b030000 0x10000>; + clocks = <&sdhc2_lpcg IMX_LPCG_CLK_4>, +- <&sdhc2_lpcg IMX_LPCG_CLK_5>, +- <&sdhc2_lpcg IMX_LPCG_CLK_0>; +- clock-names = "ipg", "per", "ahb"; ++ <&sdhc2_lpcg IMX_LPCG_CLK_0>, ++ <&sdhc2_lpcg IMX_LPCG_CLK_5>; ++ clock-names = "ipg", "ahb", "per"; + power-domains = <&pd IMX_SC_R_SDHC_2>; + status = "disabled"; + }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-beacon-baseboard.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-beacon-baseboard.dtsi -index 6f5e63696ec0a..94e5fa8ca9572 100644 +index 6f5e63696ec0a..bb18354c10f08 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-beacon-baseboard.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-beacon-baseboard.dtsi +@@ -70,7 +70,7 @@ + &ecspi2 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_espi2>; +- cs-gpios = <&gpio5 9 GPIO_ACTIVE_LOW>; ++ cs-gpios = <&gpio5 13 GPIO_ACTIVE_LOW>; + status = "okay"; + + eeprom@0 { @@ -166,6 +166,7 @@ pinctrl-0 = <&pinctrl_uart3>; assigned-clocks = <&clk IMX8MM_CLK_UART3>; @@ -12009,6 +14520,15 @@ index 6f5e63696ec0a..94e5fa8ca9572 100644 status = "okay"; }; +@@ -185,7 +186,7 @@ + MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0x82 + MX8MM_IOMUXC_ECSPI2_MOSI_ECSPI2_MOSI 0x82 + MX8MM_IOMUXC_ECSPI2_MISO_ECSPI2_MISO 0x82 +- MX8MM_IOMUXC_ECSPI1_SS0_GPIO5_IO9 0x41 ++ MX8MM_IOMUXC_ECSPI2_SS0_GPIO5_IO13 0x41 + >; + }; + @@ -236,6 +237,8 @@ fsl,pins = < MX8MM_IOMUXC_ECSPI1_SCLK_UART3_DCE_RX 0x40 @@ -12033,6 +14553,32 @@ index e99e7644ff392..49d7470812eef 100644 reset-gpios = <&gpio4 27 GPIO_ACTIVE_LOW>; }; }; +diff --git a/arch/arm64/boot/dts/freescale/imx8mm-nitrogen-r2.dts b/arch/arm64/boot/dts/freescale/imx8mm-nitrogen-r2.dts +index 74c09891600f2..6357078185edd 100644 +--- a/arch/arm64/boot/dts/freescale/imx8mm-nitrogen-r2.dts ++++ b/arch/arm64/boot/dts/freescale/imx8mm-nitrogen-r2.dts +@@ -214,7 +214,7 @@ + pinctrl-0 = <&pinctrl_i2c3>; + status = "okay"; + +- i2cmux@70 { ++ i2c-mux@70 { + compatible = "nxp,pca9540"; + reg = <0x70>; + #address-cells = <1>; +diff --git a/arch/arm64/boot/dts/freescale/imx8mm-pinfunc.h b/arch/arm64/boot/dts/freescale/imx8mm-pinfunc.h +index a003e6af33533..56271abfb7e09 100644 +--- a/arch/arm64/boot/dts/freescale/imx8mm-pinfunc.h ++++ b/arch/arm64/boot/dts/freescale/imx8mm-pinfunc.h +@@ -601,7 +601,7 @@ + #define MX8MM_IOMUXC_UART1_RXD_GPIO5_IO22 0x234 0x49C 0x000 0x5 0x0 + #define MX8MM_IOMUXC_UART1_RXD_TPSMP_HDATA24 0x234 0x49C 0x000 0x7 0x0 + #define MX8MM_IOMUXC_UART1_TXD_UART1_DCE_TX 0x238 0x4A0 0x000 0x0 0x0 +-#define MX8MM_IOMUXC_UART1_TXD_UART1_DTE_RX 0x238 0x4A0 0x4F4 0x0 0x0 ++#define MX8MM_IOMUXC_UART1_TXD_UART1_DTE_RX 0x238 0x4A0 0x4F4 0x0 0x1 + #define MX8MM_IOMUXC_UART1_TXD_ECSPI3_MOSI 0x238 0x4A0 0x000 0x1 0x0 + #define MX8MM_IOMUXC_UART1_TXD_GPIO5_IO23 0x238 0x4A0 0x000 0x5 0x0 + #define MX8MM_IOMUXC_UART1_TXD_TPSMP_HDATA25 0x238 0x4A0 0x000 0x7 0x0 diff --git a/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi index 1dc9d187601c5..a0bd540f27d3d 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi @@ -12138,6 +14684,35 @@ index d2ffd62a3bd46..942fed2eed643 100644 MX8MM_IOMUXC_ECSPI2_SS0_GPIO5_IO13 0xd6 >; }; +diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7901.dts b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7901.dts +index bafd5c8ea4e28..f7e41e5c2c7bc 100644 +--- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7901.dts ++++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7901.dts +@@ -675,6 +675,7 @@ + &usbotg2 { + dr_mode = "host"; + vbus-supply = <®_usb2_vbus>; ++ over-current-active-low; + status = "okay"; + }; + +diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi +index 2f632e8ca3880..67e91fdfaf526 100644 +--- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi ++++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi +@@ -1014,10 +1014,10 @@ + clocks = <&clk IMX8MM_CLK_NAND_USDHC_BUS_RAWNAND_CLK>; + }; + +- gpmi: nand-controller@33002000{ ++ gpmi: nand-controller@33002000 { + compatible = "fsl,imx8mm-gpmi-nand", "fsl,imx7d-gpmi-nand"; + #address-cells = <1>; +- #size-cells = <1>; ++ #size-cells = <0>; + reg = <0x33002000 0x2000>, <0x33004000 0x4000>; + reg-names = "gpmi-nand", "bch"; + interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm64/boot/dts/freescale/imx8mn-beacon-baseboard.dtsi b/arch/arm64/boot/dts/freescale/imx8mn-beacon-baseboard.dtsi index 376ca8ff72133..e69fd41b46d0e 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-beacon-baseboard.dtsi @@ -12196,7 +14771,7 @@ index b16c7caf34c11..87b5e23c766f7 100644 ti,debounce-rep = /bits/ 16 <1>; ti,settle-delay-usec = /bits/ 16 <150>; diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi -index da6c942fb7f9d..6d6cbd4c83b8f 100644 +index da6c942fb7f9d..6dcead5bae620 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi @@ -263,7 +263,7 @@ @@ -12244,6 +14819,15 @@ index da6c942fb7f9d..6d6cbd4c83b8f 100644 reg = <0x300b0000 0x10000>; interrupts = <GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clk IMX8MN_CLK_SAI7_IPG>, +@@ -998,7 +998,7 @@ + gpmi: nand-controller@33002000 { + compatible = "fsl,imx8mn-gpmi-nand", "fsl,imx7d-gpmi-nand"; + #address-cells = <1>; +- #size-cells = <1>; ++ #size-cells = <0>; + reg = <0x33002000 0x2000>, <0x33004000 0x4000>; + reg-names = "gpmi-nand", "bch"; + interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts index 7b99fad6e4d6e..5c9fb39dd99e5 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts @@ -12465,6 +15049,90 @@ index 984a6b9ded8d7..6aa720bafe289 100644 >; }; }; +diff --git a/arch/arm64/boot/dts/freescale/imx8mp-phycore-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-phycore-som.dtsi +index fc178eebf8aa4..8e189d8997941 100644 +--- a/arch/arm64/boot/dts/freescale/imx8mp-phycore-som.dtsi ++++ b/arch/arm64/boot/dts/freescale/imx8mp-phycore-som.dtsi +@@ -98,7 +98,6 @@ + + regulators { + buck1: BUCK1 { +- regulator-compatible = "BUCK1"; + regulator-min-microvolt = <600000>; + regulator-max-microvolt = <2187500>; + regulator-boot-on; +@@ -107,7 +106,6 @@ + }; + + buck2: BUCK2 { +- regulator-compatible = "BUCK2"; + regulator-min-microvolt = <600000>; + regulator-max-microvolt = <2187500>; + regulator-boot-on; +@@ -116,7 +114,6 @@ + }; + + buck4: BUCK4 { +- regulator-compatible = "BUCK4"; + regulator-min-microvolt = <600000>; + regulator-max-microvolt = <3400000>; + regulator-boot-on; +@@ -124,7 +121,6 @@ + }; + + buck5: BUCK5 { +- regulator-compatible = "BUCK5"; + regulator-min-microvolt = <600000>; + regulator-max-microvolt = <3400000>; + regulator-boot-on; +@@ -132,7 +128,6 @@ + }; + + buck6: BUCK6 { +- regulator-compatible = "BUCK6"; + regulator-min-microvolt = <600000>; + regulator-max-microvolt = <3400000>; + regulator-boot-on; +@@ -140,7 +135,6 @@ + }; + + ldo1: LDO1 { +- regulator-compatible = "LDO1"; + regulator-min-microvolt = <1600000>; + regulator-max-microvolt = <3300000>; + regulator-boot-on; +@@ -148,7 +142,6 @@ + }; + + ldo2: LDO2 { +- regulator-compatible = "LDO2"; + regulator-min-microvolt = <800000>; + regulator-max-microvolt = <1150000>; + regulator-boot-on; +@@ -156,7 +149,6 @@ + }; + + ldo3: LDO3 { +- regulator-compatible = "LDO3"; + regulator-min-microvolt = <800000>; + regulator-max-microvolt = <3300000>; + regulator-boot-on; +@@ -164,7 +156,6 @@ + }; + + ldo4: LDO4 { +- regulator-compatible = "LDO4"; + regulator-min-microvolt = <800000>; + regulator-max-microvolt = <3300000>; + regulator-boot-on; +@@ -172,7 +163,6 @@ + }; + + ldo5: LDO5 { +- regulator-compatible = "LDO5"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <3300000>; + }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index 9b07b26230a11..664177ed38d3e 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -12499,6 +15167,50 @@ index 460ef0d86540a..c86cd20d4e709 100644 maxim,over-heat-temp = <700>; maxim,over-volt = <4500>; maxim,rsns-microohm = <5000>; +diff --git a/arch/arm64/boot/dts/freescale/imx8mq-nitrogen.dts b/arch/arm64/boot/dts/freescale/imx8mq-nitrogen.dts +index f70fb32b96b0c..cf14ab5f7404c 100644 +--- a/arch/arm64/boot/dts/freescale/imx8mq-nitrogen.dts ++++ b/arch/arm64/boot/dts/freescale/imx8mq-nitrogen.dts +@@ -133,7 +133,7 @@ + pinctrl-0 = <&pinctrl_i2c1>; + status = "okay"; + +- i2cmux@70 { ++ i2c-mux@70 { + compatible = "nxp,pca9546"; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_i2c1_pca9546>; +@@ -216,7 +216,7 @@ + pinctrl-0 = <&pinctrl_i2c4>; + status = "okay"; + +- pca9546: i2cmux@70 { ++ pca9546: i2c-mux@70 { + compatible = "nxp,pca9546"; + reg = <0x70>; + #address-cells = <1>; +diff --git a/arch/arm64/boot/dts/freescale/imx8mq-thor96.dts b/arch/arm64/boot/dts/freescale/imx8mq-thor96.dts +index 5d5aa6537225f..6e6182709d220 100644 +--- a/arch/arm64/boot/dts/freescale/imx8mq-thor96.dts ++++ b/arch/arm64/boot/dts/freescale/imx8mq-thor96.dts +@@ -339,7 +339,7 @@ + bus-width = <4>; + non-removable; + no-sd; +- no-emmc; ++ no-mmc; + status = "okay"; + + brcmf: wifi@1 { +@@ -359,7 +359,7 @@ + cd-gpios = <&gpio2 12 GPIO_ACTIVE_LOW>; + bus-width = <4>; + no-sdio; +- no-emmc; ++ no-mmc; + disable-wp; + status = "okay"; + }; diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi index 4066b16126552..fd38092bb247e 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi @@ -12551,6 +15263,19 @@ index aebbe2b84aa13..a143f38bc78bd 100644 #clock-cells = <2>; }; +diff --git a/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts b/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts +index 863232a47004c..4497763d57ccf 100644 +--- a/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts ++++ b/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts +@@ -61,7 +61,7 @@ + pinctrl-0 = <&pinctrl_lpi2c1 &pinctrl_ioexp_rst>; + status = "okay"; + +- i2c-switch@71 { ++ i2c-mux@71 { + compatible = "nxp,pca9646", "nxp,pca9546"; + #address-cells = <1>; + #size-cells = <0>; diff --git a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi b/arch/arm64/boot/dts/hisilicon/hi3660.dtsi index 2d5c1a348716a..6eabec2602e23 100644 --- a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi @@ -12641,7 +15366,7 @@ index c5eb3604dd5b7..119db6b541b7b 100644 &mdio { diff --git a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts -index 04da07ae44208..1cee26479bfec 100644 +index 04da07ae44208..b276dd77df83c 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts @@ -18,6 +18,7 @@ @@ -12652,7 +15377,20 @@ index 04da07ae44208..1cee26479bfec 100644 ethernet1 = ð1; mmc0 = &sdhci0; mmc1 = &sdhci1; -@@ -138,7 +139,9 @@ +@@ -124,9 +125,12 @@ + /delete-property/ mrvl,i2c-fast-mode; + status = "okay"; + ++ /* MCP7940MT-I/MNY RTC */ + rtc@6f { + compatible = "microchip,mcp7940x"; + reg = <0x6f>; ++ interrupt-parent = <&gpiosb>; ++ interrupts = <5 0>; /* GPIO2_5 */ + }; + }; + +@@ -138,7 +142,9 @@ /* * U-Boot port for Turris Mox has a bug which always expects that "ranges" DT property * contains exactly 2 ranges with 3 (child) address cells, 2 (parent) address cells and @@ -12663,7 +15401,7 @@ index 04da07ae44208..1cee26479bfec 100644 * conditions are not met then U-Boot crashes during loading kernel DTB file. PCIe address * space is 128 MB long, so the best split between MEM and IO is to use fixed 16 MB window * for IO and the rest 112 MB (64+32+16) for MEM, despite that maximal IO size is just 64 kB. -@@ -147,6 +150,9 @@ +@@ -147,6 +153,9 @@ * https://source.denx.de/u-boot/u-boot/-/commit/cb2ddb291ee6fcbddd6d8f4ff49089dfe580f5d7 * https://source.denx.de/u-boot/u-boot/-/commit/c64ac3b3185aeb3846297ad7391fc6df8ecd73bf * https://source.denx.de/u-boot/u-boot/-/commit/4a82fca8e330157081fc132a591ebd99ba02ee33 @@ -12716,6 +15454,209 @@ index a2b7e5ec979d3..327b04134134f 100644 +&cp0_gpio2 { + status = "okay"; +}; +diff --git a/arch/arm64/boot/dts/mediatek/mt2712-evb.dts b/arch/arm64/boot/dts/mediatek/mt2712-evb.dts +index 7d369fdd3117f..9d20cabf4f699 100644 +--- a/arch/arm64/boot/dts/mediatek/mt2712-evb.dts ++++ b/arch/arm64/boot/dts/mediatek/mt2712-evb.dts +@@ -26,14 +26,14 @@ + stdout-path = "serial0:921600n8"; + }; + +- cpus_fixed_vproc0: fixedregulator@0 { ++ cpus_fixed_vproc0: regulator-vproc-buck0 { + compatible = "regulator-fixed"; + regulator-name = "vproc_buck0"; + regulator-min-microvolt = <1000000>; + regulator-max-microvolt = <1000000>; + }; + +- cpus_fixed_vproc1: fixedregulator@1 { ++ cpus_fixed_vproc1: regulator-vproc-buck1 { + compatible = "regulator-fixed"; + regulator-name = "vproc_buck1"; + regulator-min-microvolt = <1000000>; +@@ -50,7 +50,7 @@ + id-gpio = <&pio 14 GPIO_ACTIVE_HIGH>; + }; + +- usb_p0_vbus: regulator@2 { ++ usb_p0_vbus: regulator-usb-p0-vbus { + compatible = "regulator-fixed"; + regulator-name = "p0_vbus"; + regulator-min-microvolt = <5000000>; +@@ -59,7 +59,7 @@ + enable-active-high; + }; + +- usb_p1_vbus: regulator@3 { ++ usb_p1_vbus: regulator-usb-p1-vbus { + compatible = "regulator-fixed"; + regulator-name = "p1_vbus"; + regulator-min-microvolt = <5000000>; +@@ -68,7 +68,7 @@ + enable-active-high; + }; + +- usb_p2_vbus: regulator@4 { ++ usb_p2_vbus: regulator-usb-p2-vbus { + compatible = "regulator-fixed"; + regulator-name = "p2_vbus"; + regulator-min-microvolt = <5000000>; +@@ -77,7 +77,7 @@ + enable-active-high; + }; + +- usb_p3_vbus: regulator@5 { ++ usb_p3_vbus: regulator-usb-p3-vbus { + compatible = "regulator-fixed"; + regulator-name = "p3_vbus"; + regulator-min-microvolt = <5000000>; +diff --git a/arch/arm64/boot/dts/mediatek/mt2712e.dtsi b/arch/arm64/boot/dts/mediatek/mt2712e.dtsi +index a9cca9c146fdc..993a03d7fff14 100644 +--- a/arch/arm64/boot/dts/mediatek/mt2712e.dtsi ++++ b/arch/arm64/boot/dts/mediatek/mt2712e.dtsi +@@ -160,70 +160,70 @@ + #clock-cells = <0>; + }; + +- clk26m: oscillator@0 { ++ clk26m: oscillator-26m { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <26000000>; + clock-output-names = "clk26m"; + }; + +- clk32k: oscillator@1 { ++ clk32k: oscillator-32k { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <32768>; + clock-output-names = "clk32k"; + }; + +- clkfpc: oscillator@2 { ++ clkfpc: oscillator-50m { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <50000000>; + clock-output-names = "clkfpc"; + }; + +- clkaud_ext_i_0: oscillator@3 { ++ clkaud_ext_i_0: oscillator-aud0 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <6500000>; + clock-output-names = "clkaud_ext_i_0"; + }; + +- clkaud_ext_i_1: oscillator@4 { ++ clkaud_ext_i_1: oscillator-aud1 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <196608000>; + clock-output-names = "clkaud_ext_i_1"; + }; + +- clkaud_ext_i_2: oscillator@5 { ++ clkaud_ext_i_2: oscillator-aud2 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <180633600>; + clock-output-names = "clkaud_ext_i_2"; + }; + +- clki2si0_mck_i: oscillator@6 { ++ clki2si0_mck_i: oscillator-i2s0 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <30000000>; + clock-output-names = "clki2si0_mck_i"; + }; + +- clki2si1_mck_i: oscillator@7 { ++ clki2si1_mck_i: oscillator-i2s1 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <30000000>; + clock-output-names = "clki2si1_mck_i"; + }; + +- clki2si2_mck_i: oscillator@8 { ++ clki2si2_mck_i: oscillator-i2s2 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <30000000>; + clock-output-names = "clki2si2_mck_i"; + }; + +- clktdmin_mclk_i: oscillator@9 { ++ clktdmin_mclk_i: oscillator-mclk { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <30000000>; +@@ -266,7 +266,7 @@ + reg = <0 0x10005000 0 0x1000>; + }; + +- pio: pinctrl@10005000 { ++ pio: pinctrl@1000b000 { + compatible = "mediatek,mt2712-pinctrl"; + reg = <0 0x1000b000 0 0x1000>; + mediatek,pctl-regmap = <&syscfg_pctl_a>; +diff --git a/arch/arm64/boot/dts/mediatek/mt6779.dtsi b/arch/arm64/boot/dts/mediatek/mt6779.dtsi +index 9bdf5145966c5..dde9ce137b4f1 100644 +--- a/arch/arm64/boot/dts/mediatek/mt6779.dtsi ++++ b/arch/arm64/boot/dts/mediatek/mt6779.dtsi +@@ -88,14 +88,14 @@ + interrupts = <GIC_PPI 7 IRQ_TYPE_LEVEL_LOW 0>; + }; + +- clk26m: oscillator@0 { ++ clk26m: oscillator-26m { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <26000000>; + clock-output-names = "clk26m"; + }; + +- clk32k: oscillator@1 { ++ clk32k: oscillator-32k { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <32768>; +@@ -117,7 +117,7 @@ + compatible = "simple-bus"; + ranges; + +- gic: interrupt-controller@0c000000 { ++ gic: interrupt-controller@c000000 { + compatible = "arm,gic-v3"; + #interrupt-cells = <4>; + interrupt-parent = <&gic>; +@@ -138,7 +138,7 @@ + + }; + +- sysirq: intpol-controller@0c53a650 { ++ sysirq: intpol-controller@c53a650 { + compatible = "mediatek,mt6779-sysirq", + "mediatek,mt6577-sysirq"; + interrupt-controller; +diff --git a/arch/arm64/boot/dts/mediatek/mt6797.dtsi b/arch/arm64/boot/dts/mediatek/mt6797.dtsi +index 15616231022a2..c3677d77e0a45 100644 +--- a/arch/arm64/boot/dts/mediatek/mt6797.dtsi ++++ b/arch/arm64/boot/dts/mediatek/mt6797.dtsi +@@ -95,7 +95,7 @@ + }; + }; + +- clk26m: oscillator@0 { ++ clk26m: oscillator-26m { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <26000000>; diff --git a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts index 2f77dc40b9b82..6b99d903b4791 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts @@ -12729,6 +15670,19 @@ index 2f77dc40b9b82..6b99d903b4791 100644 }; }; +diff --git a/arch/arm64/boot/dts/mediatek/mt8183.dtsi b/arch/arm64/boot/dts/mediatek/mt8183.dtsi +index 409cf827970cf..f4e0bea8ddcb6 100644 +--- a/arch/arm64/boot/dts/mediatek/mt8183.dtsi ++++ b/arch/arm64/boot/dts/mediatek/mt8183.dtsi +@@ -1212,7 +1212,7 @@ + <GIC_SPI 278 IRQ_TYPE_LEVEL_LOW>; + interrupt-names = "job", "mmu", "gpu"; + +- clocks = <&topckgen CLK_TOP_MFGPLL_CK>; ++ clocks = <&mfgcfg CLK_MFG_BG3D>; + + power-domains = + <&spm MT8183_POWER_DOMAIN_MFG_CORE0>, diff --git a/arch/arm64/boot/dts/mediatek/mt8192.dtsi b/arch/arm64/boot/dts/mediatek/mt8192.dtsi index 9757138a8bbd8..d1e63527b3875 100644 --- a/arch/arm64/boot/dts/mediatek/mt8192.dtsi @@ -12852,6 +15806,37 @@ index 9757138a8bbd8..d1e63527b3875 100644 }; i2c3: i2c3@11cb0000 { +diff --git a/arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi b/arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi +index fcddec14738d8..54514d62398f2 100644 +--- a/arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi ++++ b/arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi +@@ -17,7 +17,7 @@ + }; + + firmware { +- optee: optee@4fd00000 { ++ optee: optee { + compatible = "linaro,optee-tz"; + method = "smc"; + }; +@@ -210,7 +210,7 @@ + }; + }; + +- i2c0_pins_a: i2c0@0 { ++ i2c0_pins_a: i2c0 { + pins1 { + pinmux = <MT8516_PIN_58_SDA0__FUNC_SDA0_0>, + <MT8516_PIN_59_SCL0__FUNC_SCL0_0>; +@@ -218,7 +218,7 @@ + }; + }; + +- i2c2_pins_a: i2c2@0 { ++ i2c2_pins_a: i2c2 { + pins1 { + pinmux = <MT8516_PIN_60_SDA2__FUNC_SDA2_0>, + <MT8516_PIN_61_SCL2__FUNC_SCL2_0>; diff --git a/arch/arm64/boot/dts/nvidia/tegra186.dtsi b/arch/arm64/boot/dts/nvidia/tegra186.dtsi index e94f8add1a400..5b0bc9aa1a426 100644 --- a/arch/arm64/boot/dts/nvidia/tegra186.dtsi @@ -12987,8 +15972,21 @@ index f0efb3a628040..2b47845722206 100644 #clock-cells = <1>; #reset-cells = <1>; #power-domain-cells = <1>; +diff --git a/arch/arm64/boot/dts/qcom/ipq6018-cp01-c1.dts b/arch/arm64/boot/dts/qcom/ipq6018-cp01-c1.dts +index 5aec183087128..5310259d03dc5 100644 +--- a/arch/arm64/boot/dts/qcom/ipq6018-cp01-c1.dts ++++ b/arch/arm64/boot/dts/qcom/ipq6018-cp01-c1.dts +@@ -37,6 +37,8 @@ + + &spi_0 { + cs-select = <0>; ++ pinctrl-0 = <&spi_0_pins>; ++ pinctrl-names = "default"; + status = "okay"; + + m25p80@0 { diff --git a/arch/arm64/boot/dts/qcom/ipq6018.dtsi b/arch/arm64/boot/dts/qcom/ipq6018.dtsi -index d2fe58e0eb7aa..ce4c2b4a5fc07 100644 +index d2fe58e0eb7aa..30ac0b2e8c896 100644 --- a/arch/arm64/boot/dts/qcom/ipq6018.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq6018.dtsi @@ -200,7 +200,7 @@ @@ -13009,8 +16007,17 @@ index d2fe58e0eb7aa..ce4c2b4a5fc07 100644 interrupt-controller; #interrupt-cells = <2>; +@@ -401,7 +401,7 @@ + reset-names = "phy", + "common"; + +- pcie_phy0: lane@84200 { ++ pcie_phy0: phy@84200 { + reg = <0x0 0x84200 0x0 0x16c>, /* Serdes Tx */ + <0x0 0x84400 0x0 0x200>, /* Serdes Rx */ + <0x0 0x84800 0x0 0x4f4>; /* PCS: Lane0, COM, PCIE */ diff --git a/arch/arm64/boot/dts/qcom/ipq8074.dtsi b/arch/arm64/boot/dts/qcom/ipq8074.dtsi -index db333001df4d6..6b9ac05504905 100644 +index db333001df4d6..9d4019e0949a9 100644 --- a/arch/arm64/boot/dts/qcom/ipq8074.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq8074.dtsi @@ -13,7 +13,7 @@ @@ -13022,6 +16029,24 @@ index db333001df4d6..6b9ac05504905 100644 #clock-cells = <0>; }; +@@ -106,7 +106,7 @@ + reset-names = "phy","common"; + status = "disabled"; + +- usb1_ssphy: lane@58200 { ++ usb1_ssphy: phy@58200 { + reg = <0x00058200 0x130>, /* Tx */ + <0x00058400 0x200>, /* Rx */ + <0x00058800 0x1f8>, /* PCS */ +@@ -149,7 +149,7 @@ + reset-names = "phy","common"; + status = "disabled"; + +- usb0_ssphy: lane@78200 { ++ usb0_ssphy: phy@78200 { + reg = <0x00078200 0x130>, /* Tx */ + <0x00078400 0x200>, /* Rx */ + <0x00078800 0x1f8>, /* PCS */ @@ -220,7 +220,7 @@ clock-names = "bam_clk"; #dma-cells = <1>; @@ -13041,7 +16066,7 @@ index db333001df4d6..6b9ac05504905 100644 reg = <0x079b0000 0x10000>; #address-cells = <1>; diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi b/arch/arm64/boot/dts/qcom/msm8916.dtsi -index 3f85e34a8ce6f..19e201f52b167 100644 +index 3f85e34a8ce6f..b967dbfba3b84 100644 --- a/arch/arm64/boot/dts/qcom/msm8916.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi @@ -19,8 +19,8 @@ @@ -13064,6 +16089,15 @@ index 3f85e34a8ce6f..19e201f52b167 100644 compatible = "qcom,rpm-msg-ram"; reg = <0x00060000 0x8000>; }; +@@ -1307,7 +1307,7 @@ + }; + + mpss: remoteproc@4080000 { +- compatible = "qcom,msm8916-mss-pil", "qcom,q6v5-pil"; ++ compatible = "qcom,msm8916-mss-pil"; + reg = <0x04080000 0x100>, + <0x04020000 0x040>; + @@ -1384,11 +1384,17 @@ lpass: audio-controller@7708000 { status = "disabled"; @@ -13108,10 +16142,127 @@ index 1ccca83292ac9..c7d191dc6d4ba 100644 /* S1, S2, S6 and S12 are managed by RPMPD */ diff --git a/arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts b/arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts -index 357d55496e750..a3d6340a0c55b 100644 +index 357d55496e750..d08659c606b9a 100644 --- a/arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts +++ b/arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts -@@ -142,7 +142,7 @@ +@@ -11,6 +11,12 @@ + #include <dt-bindings/gpio/gpio.h> + #include <dt-bindings/input/gpio-keys.h> + ++/delete-node/ &adsp_mem; ++/delete-node/ &audio_mem; ++/delete-node/ &mpss_mem; ++/delete-node/ &peripheral_region; ++/delete-node/ &rmtfs_mem; ++ + / { + model = "Xiaomi Mi 4C"; + compatible = "xiaomi,libra", "qcom,msm8992"; +@@ -60,24 +66,66 @@ + #size-cells = <2>; + ranges; + +- /* This is for getting crash logs using Android downstream kernels */ +- ramoops@dfc00000 { +- compatible = "ramoops"; +- reg = <0x0 0xdfc00000 0x0 0x40000>; +- console-size = <0x10000>; +- record-size = <0x10000>; +- ftrace-size = <0x10000>; +- pmsg-size = <0x20000>; ++ memory_hole: hole@6400000 { ++ reg = <0 0x06400000 0 0x600000>; ++ no-map; ++ }; ++ ++ memory_hole2: hole2@6c00000 { ++ reg = <0 0x06c00000 0 0x2400000>; ++ no-map; ++ }; ++ ++ mpss_mem: mpss@9000000 { ++ reg = <0 0x09000000 0 0x5a00000>; ++ no-map; ++ }; ++ ++ tzapp: tzapp@ea00000 { ++ reg = <0 0x0ea00000 0 0x1900000>; ++ no-map; + }; + +- modem_region: modem_region@9000000 { +- reg = <0x0 0x9000000 0x0 0x5a00000>; ++ mdm_rfsa_mem: mdm-rfsa@ca0b0000 { ++ reg = <0 0xca0b0000 0 0x10000>; + no-map; + }; + +- tzapp: modem_region@ea00000 { +- reg = <0x0 0xea00000 0x0 0x1900000>; ++ rmtfs_mem: rmtfs@ca100000 { ++ compatible = "qcom,rmtfs-mem"; ++ reg = <0 0xca100000 0 0x180000>; + no-map; ++ ++ qcom,client-id = <1>; ++ }; ++ ++ audio_mem: audio@cb400000 { ++ reg = <0 0xcb000000 0 0x400000>; ++ no-mem; ++ }; ++ ++ qseecom_mem: qseecom@cb400000 { ++ reg = <0 0xcb400000 0 0x1c00000>; ++ no-mem; ++ }; ++ ++ adsp_rfsa_mem: adsp-rfsa@cd000000 { ++ reg = <0 0xcd000000 0 0x10000>; ++ no-map; ++ }; ++ ++ sensor_rfsa_mem: sensor-rfsa@cd010000 { ++ reg = <0 0xcd010000 0 0x10000>; ++ no-map; ++ }; ++ ++ ramoops@dfc00000 { ++ compatible = "ramoops"; ++ reg = <0 0xdfc00000 0 0x40000>; ++ console-size = <0x10000>; ++ record-size = <0x10000>; ++ ftrace-size = <0x10000>; ++ pmsg-size = <0x20000>; + }; + }; + }; +@@ -120,9 +168,21 @@ + status = "okay"; + }; + +-&peripheral_region { +- reg = <0x0 0x7400000 0x0 0x1c00000>; +- no-map; ++&pm8994_spmi_regulators { ++ VDD_APC0: s8 { ++ regulator-min-microvolt = <680000>; ++ regulator-max-microvolt = <1180000>; ++ regulator-always-on; ++ regulator-boot-on; ++ }; ++ ++ /* APC1 is 3-phase, but quoting downstream, s11 is "the gang leader" */ ++ VDD_APC1: s11 { ++ regulator-min-microvolt = <700000>; ++ regulator-max-microvolt = <1225000>; ++ regulator-always-on; ++ regulator-boot-on; ++ }; + }; + + &rpm_requests { +@@ -142,7 +202,7 @@ vdd_l17_29-supply = <&vph_pwr>; vdd_l20_21-supply = <&vph_pwr>; vdd_l25-supply = <&pm8994_s5>; @@ -13120,6 +16271,21 @@ index 357d55496e750..a3d6340a0c55b 100644 /* S1, S2, S6 and S12 are managed by RPMPD */ +diff --git a/arch/arm64/boot/dts/qcom/msm8992.dtsi b/arch/arm64/boot/dts/qcom/msm8992.dtsi +index 58fe58cc77036..765e1f1989b58 100644 +--- a/arch/arm64/boot/dts/qcom/msm8992.dtsi ++++ b/arch/arm64/boot/dts/qcom/msm8992.dtsi +@@ -14,10 +14,6 @@ + compatible = "qcom,rpmcc-msm8992"; + }; + +-&tcsr_mutex { +- compatible = "qcom,sfpb-mutex"; +-}; +- + &timer { + interrupts = <GIC_PPI 2 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>, + <GIC_PPI 3 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>, diff --git a/arch/arm64/boot/dts/qcom/msm8994.dtsi b/arch/arm64/boot/dts/qcom/msm8994.dtsi index 986fe60dec5fb..a8dc8163ee82d 100644 --- a/arch/arm64/boot/dts/qcom/msm8994.dtsi @@ -13181,10 +16347,309 @@ index 986fe60dec5fb..a8dc8163ee82d 100644 reg = <0xfc428000 0x4000>; }; diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi -index 52df22ab3f6ae..6077c36019514 100644 +index 52df22ab3f6ae..40174220e8e28 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi -@@ -638,7 +638,7 @@ +@@ -142,82 +142,92 @@ + /* Nominal fmax for now */ + opp-307200000 { + opp-hz = /bits/ 64 <307200000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-422400000 { + opp-hz = /bits/ 64 <422400000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-480000000 { + opp-hz = /bits/ 64 <480000000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-556800000 { + opp-hz = /bits/ 64 <556800000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-652800000 { + opp-hz = /bits/ 64 <652800000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-729600000 { + opp-hz = /bits/ 64 <729600000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-844800000 { + opp-hz = /bits/ 64 <844800000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-960000000 { + opp-hz = /bits/ 64 <960000000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-1036800000 { + opp-hz = /bits/ 64 <1036800000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-1113600000 { + opp-hz = /bits/ 64 <1113600000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-1190400000 { + opp-hz = /bits/ 64 <1190400000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-1228800000 { + opp-hz = /bits/ 64 <1228800000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-1324800000 { + opp-hz = /bits/ 64 <1324800000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x5>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-1363200000 { ++ opp-hz = /bits/ 64 <1363200000>; ++ opp-supported-hw = <0x2>; + clock-latency-ns = <200000>; + }; + opp-1401600000 { + opp-hz = /bits/ 64 <1401600000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x5>; + clock-latency-ns = <200000>; + }; + opp-1478400000 { + opp-hz = /bits/ 64 <1478400000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x1>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-1497600000 { ++ opp-hz = /bits/ 64 <1497600000>; ++ opp-supported-hw = <0x04>; + clock-latency-ns = <200000>; + }; + opp-1593600000 { + opp-hz = /bits/ 64 <1593600000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x1>; + clock-latency-ns = <200000>; + }; + }; +@@ -230,127 +240,137 @@ + /* Nominal fmax for now */ + opp-307200000 { + opp-hz = /bits/ 64 <307200000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-403200000 { + opp-hz = /bits/ 64 <403200000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-480000000 { + opp-hz = /bits/ 64 <480000000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-556800000 { + opp-hz = /bits/ 64 <556800000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-652800000 { + opp-hz = /bits/ 64 <652800000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-729600000 { + opp-hz = /bits/ 64 <729600000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-806400000 { + opp-hz = /bits/ 64 <806400000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-883200000 { + opp-hz = /bits/ 64 <883200000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-940800000 { + opp-hz = /bits/ 64 <940800000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-1036800000 { + opp-hz = /bits/ 64 <1036800000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-1113600000 { + opp-hz = /bits/ 64 <1113600000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-1190400000 { + opp-hz = /bits/ 64 <1190400000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-1248000000 { + opp-hz = /bits/ 64 <1248000000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-1324800000 { + opp-hz = /bits/ 64 <1324800000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-1401600000 { + opp-hz = /bits/ 64 <1401600000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-1478400000 { + opp-hz = /bits/ 64 <1478400000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-1555200000 { + opp-hz = /bits/ 64 <1555200000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-1632000000 { + opp-hz = /bits/ 64 <1632000000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-1708800000 { + opp-hz = /bits/ 64 <1708800000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x7>; + clock-latency-ns = <200000>; + }; + opp-1785600000 { + opp-hz = /bits/ 64 <1785600000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x7>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-1804800000 { ++ opp-hz = /bits/ 64 <1804800000>; ++ opp-supported-hw = <0x6>; + clock-latency-ns = <200000>; + }; + opp-1824000000 { + opp-hz = /bits/ 64 <1824000000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x1>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-1900800000 { ++ opp-hz = /bits/ 64 <1900800000>; ++ opp-supported-hw = <0x4>; + clock-latency-ns = <200000>; + }; + opp-1920000000 { + opp-hz = /bits/ 64 <1920000000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x1>; + clock-latency-ns = <200000>; + }; + opp-1996800000 { + opp-hz = /bits/ 64 <1996800000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x1>; + clock-latency-ns = <200000>; + }; + opp-2073600000 { + opp-hz = /bits/ 64 <2073600000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x1>; + clock-latency-ns = <200000>; + }; + opp-2150400000 { + opp-hz = /bits/ 64 <2150400000>; +- opp-supported-hw = <0x77>; ++ opp-supported-hw = <0x1>; + clock-latency-ns = <200000>; + }; + }; +@@ -598,7 +618,7 @@ + reset-names = "phy", "common", "cfg"; + status = "disabled"; + +- pciephy_0: lane@35000 { ++ pciephy_0: phy@35000 { + reg = <0x00035000 0x130>, + <0x00035200 0x200>, + <0x00035400 0x1dc>; +@@ -611,7 +631,7 @@ + reset-names = "lane0"; + }; + +- pciephy_1: lane@36000 { ++ pciephy_1: phy@36000 { + reg = <0x00036000 0x130>, + <0x00036200 0x200>, + <0x00036400 0x1dc>; +@@ -624,7 +644,7 @@ + reset-names = "lane1"; + }; + +- pciephy_2: lane@37000 { ++ pciephy_2: phy@37000 { + reg = <0x00037000 0x130>, + <0x00037200 0x200>, + <0x00037400 0x1dc>; +@@ -638,7 +658,7 @@ }; }; @@ -13193,7 +16658,7 @@ index 52df22ab3f6ae..6077c36019514 100644 compatible = "qcom,rpm-msg-ram"; reg = <0x00068000 0x6000>; }; -@@ -965,9 +965,6 @@ +@@ -965,9 +985,6 @@ nvmem-cells = <&speedbin_efuse>; nvmem-cell-names = "speed_bin"; @@ -13203,8 +16668,321 @@ index 52df22ab3f6ae..6077c36019514 100644 operating-points-v2 = <&gpu_opp_table>; status = "disabled"; +@@ -978,17 +995,17 @@ + compatible ="operating-points-v2"; + + /* +- * 624Mhz and 560Mhz are only available on speed +- * bin (1 << 0). All the rest are available on +- * all bins of the hardware ++ * 624Mhz is only available on speed bins 0 and 3. ++ * 560Mhz is only available on speed bins 0, 2 and 3. ++ * All the rest are available on all bins of the hardware. + */ + opp-624000000 { + opp-hz = /bits/ 64 <624000000>; +- opp-supported-hw = <0x01>; ++ opp-supported-hw = <0x09>; + }; + opp-560000000 { + opp-hz = /bits/ 64 <560000000>; +- opp-supported-hw = <0x01>; ++ opp-supported-hw = <0x0d>; + }; + opp-510000000 { + opp-hz = /bits/ 64 <510000000>; +@@ -1746,7 +1763,7 @@ + reset-names = "ufsphy"; + status = "disabled"; + +- ufsphy_lane: lanes@627400 { ++ ufsphy_lane: phy@627400 { + reg = <0x627400 0x12c>, + <0x627600 0x200>, + <0x627c00 0x1b4>; +@@ -2601,7 +2618,7 @@ + reset-names = "phy", "common"; + status = "disabled"; + +- ssusb_phy_0: lane@7410200 { ++ ssusb_phy_0: phy@7410200 { + reg = <0x07410200 0x200>, + <0x07410400 0x130>, + <0x07410600 0x1a8>; +diff --git a/arch/arm64/boot/dts/qcom/msm8996pro.dtsi b/arch/arm64/boot/dts/qcom/msm8996pro.dtsi +new file mode 100644 +index 0000000000000..63e1b4ec7a360 +--- /dev/null ++++ b/arch/arm64/boot/dts/qcom/msm8996pro.dtsi +@@ -0,0 +1,266 @@ ++// SPDX-License-Identifier: BSD-3-Clause ++/* ++ * Copyright (c) 2022, Linaro Limited ++ */ ++ ++#include "msm8996.dtsi" ++ ++/ { ++ /delete-node/ opp-table-cluster0; ++ /delete-node/ opp-table-cluster1; ++ ++ /* ++ * On MSM8996 Pro the cpufreq driver shifts speed bins into the high ++ * nibble of supported hw, so speed bin 0 becomes 0x10, speed bin 1 ++ * becomes 0x20, speed 2 becomes 0x40. ++ */ ++ ++ cluster0_opp: opp-table-cluster0 { ++ compatible = "operating-points-v2-kryo-cpu"; ++ nvmem-cells = <&speedbin_efuse>; ++ opp-shared; ++ ++ opp-307200000 { ++ opp-hz = /bits/ 64 <307200000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-384000000 { ++ opp-hz = /bits/ 64 <384000000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-460800000 { ++ opp-hz = /bits/ 64 <460800000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-537600000 { ++ opp-hz = /bits/ 64 <537600000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-614400000 { ++ opp-hz = /bits/ 64 <614400000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-691200000 { ++ opp-hz = /bits/ 64 <691200000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-768000000 { ++ opp-hz = /bits/ 64 <768000000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-844800000 { ++ opp-hz = /bits/ 64 <844800000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-902400000 { ++ opp-hz = /bits/ 64 <902400000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-979200000 { ++ opp-hz = /bits/ 64 <979200000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-1056000000 { ++ opp-hz = /bits/ 64 <1056000000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-1132800000 { ++ opp-hz = /bits/ 64 <1132800000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-1209600000 { ++ opp-hz = /bits/ 64 <1209600000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-1286400000 { ++ opp-hz = /bits/ 64 <1286400000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-1363200000 { ++ opp-hz = /bits/ 64 <1363200000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-1440000000 { ++ opp-hz = /bits/ 64 <1440000000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-1516800000 { ++ opp-hz = /bits/ 64 <1516800000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-1593600000 { ++ opp-hz = /bits/ 64 <1593600000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-1996800000 { ++ opp-hz = /bits/ 64 <1996800000>; ++ opp-supported-hw = <0x20>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-2188800000 { ++ opp-hz = /bits/ 64 <2188800000>; ++ opp-supported-hw = <0x10>; ++ clock-latency-ns = <200000>; ++ }; ++ }; ++ ++ cluster1_opp: opp-table-cluster1 { ++ compatible = "operating-points-v2-kryo-cpu"; ++ nvmem-cells = <&speedbin_efuse>; ++ opp-shared; ++ ++ opp-307200000 { ++ opp-hz = /bits/ 64 <307200000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-384000000 { ++ opp-hz = /bits/ 64 <384000000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-460800000 { ++ opp-hz = /bits/ 64 <460800000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-537600000 { ++ opp-hz = /bits/ 64 <537600000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-614400000 { ++ opp-hz = /bits/ 64 <614400000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-691200000 { ++ opp-hz = /bits/ 64 <691200000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-748800000 { ++ opp-hz = /bits/ 64 <748800000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-825600000 { ++ opp-hz = /bits/ 64 <825600000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-902400000 { ++ opp-hz = /bits/ 64 <902400000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-979200000 { ++ opp-hz = /bits/ 64 <979200000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-1056000000 { ++ opp-hz = /bits/ 64 <1056000000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-1132800000 { ++ opp-hz = /bits/ 64 <1132800000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-1209600000 { ++ opp-hz = /bits/ 64 <1209600000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-1286400000 { ++ opp-hz = /bits/ 64 <1286400000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-1363200000 { ++ opp-hz = /bits/ 64 <1363200000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-1440000000 { ++ opp-hz = /bits/ 64 <1440000000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-1516800000 { ++ opp-hz = /bits/ 64 <1516800000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-1593600000 { ++ opp-hz = /bits/ 64 <1593600000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-1670400000 { ++ opp-hz = /bits/ 64 <1670400000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-1747200000 { ++ opp-hz = /bits/ 64 <1747200000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-1824000000 { ++ opp-hz = /bits/ 64 <1824000000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-1900800000 { ++ opp-hz = /bits/ 64 <1900800000>; ++ opp-supported-hw = <0x70>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-1977600000 { ++ opp-hz = /bits/ 64 <1977600000>; ++ opp-supported-hw = <0x30>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-2054400000 { ++ opp-hz = /bits/ 64 <2054400000>; ++ opp-supported-hw = <0x30>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-2150400000 { ++ opp-hz = /bits/ 64 <2150400000>; ++ opp-supported-hw = <0x30>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-2246400000 { ++ opp-hz = /bits/ 64 <2246400000>; ++ opp-supported-hw = <0x10>; ++ clock-latency-ns = <200000>; ++ }; ++ opp-2342400000 { ++ opp-hz = /bits/ 64 <2342400000>; ++ opp-supported-hw = <0x10>; ++ clock-latency-ns = <200000>; ++ }; ++ }; ++}; diff --git a/arch/arm64/boot/dts/qcom/msm8998.dtsi b/arch/arm64/boot/dts/qcom/msm8998.dtsi -index 34039b5c80175..228339f81c327 100644 +index 34039b5c80175..5350b911f4f6c 100644 --- a/arch/arm64/boot/dts/qcom/msm8998.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8998.dtsi @@ -308,38 +308,42 @@ @@ -13267,6 +17045,46 @@ index 34039b5c80175..228339f81c327 100644 compatible = "qcom,rpm-msg-ram"; reg = <0x00778000 0x7000>; }; +@@ -990,7 +994,7 @@ + vdda-phy-supply = <&vreg_l1a_0p875>; + vdda-pll-supply = <&vreg_l2a_1p2>; + +- pciephy: lane@1c06800 { ++ pciephy: phy@1c06800 { + reg = <0x01c06200 0x128>, <0x01c06400 0x1fc>, <0x01c06800 0x20c>; + #phy-cells = <0>; + +@@ -1062,7 +1066,7 @@ + reset-names = "ufsphy"; + resets = <&ufshc 0>; + +- ufsphy_lanes: lanes@1da7400 { ++ ufsphy_lanes: phy@1da7400 { + reg = <0x01da7400 0x128>, + <0x01da7600 0x1fc>, + <0x01da7c00 0x1dc>, +@@ -1995,7 +1999,7 @@ + <&gcc GCC_USB3PHY_PHY_BCR>; + reset-names = "phy", "common"; + +- usb1_ssphy: lane@c010200 { ++ usb1_ssphy: phy@c010200 { + reg = <0xc010200 0x128>, + <0xc010400 0x200>, + <0xc010c00 0x20c>, +diff --git a/arch/arm64/boot/dts/qcom/pm660.dtsi b/arch/arm64/boot/dts/qcom/pm660.dtsi +index e847d7209afc6..affc736d154ad 100644 +--- a/arch/arm64/boot/dts/qcom/pm660.dtsi ++++ b/arch/arm64/boot/dts/qcom/pm660.dtsi +@@ -152,7 +152,7 @@ + qcom,pre-scaling = <1 3>; + }; + +- vcoin: vcoin@83 { ++ vcoin: vcoin@85 { + reg = <ADC5_VCOIN>; + qcom,decimation = <1024>; + qcom,pre-scaling = <1 3>; diff --git a/arch/arm64/boot/dts/qcom/pm8916.dtsi b/arch/arm64/boot/dts/qcom/pm8916.dtsi index f931cb0de231f..42180f1b5dbbb 100644 --- a/arch/arm64/boot/dts/qcom/pm8916.dtsi @@ -13336,6 +17154,58 @@ index 28d5b5528516b..0ce2d36ab257f 100644 compatible = "fixed-clock"; #clock-cells = <0>; clock-frequency = <40000000>; +diff --git a/arch/arm64/boot/dts/qcom/sa8155p-adp.dts b/arch/arm64/boot/dts/qcom/sa8155p-adp.dts +index 5ae2ddc65f7e4..56a789a5789e6 100644 +--- a/arch/arm64/boot/dts/qcom/sa8155p-adp.dts ++++ b/arch/arm64/boot/dts/qcom/sa8155p-adp.dts +@@ -43,7 +43,6 @@ + + regulator-always-on; + regulator-boot-on; +- regulator-allow-set-load; + + vin-supply = <&vreg_3p3>; + }; +@@ -114,6 +113,9 @@ + regulator-max-microvolt = <880000>; + regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; + regulator-allow-set-load; ++ regulator-allowed-modes = ++ <RPMH_REGULATOR_MODE_LPM ++ RPMH_REGULATOR_MODE_HPM>; + }; + + vreg_l7a_1p8: ldo7 { +@@ -129,6 +131,9 @@ + regulator-max-microvolt = <2960000>; + regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; + regulator-allow-set-load; ++ regulator-allowed-modes = ++ <RPMH_REGULATOR_MODE_LPM ++ RPMH_REGULATOR_MODE_HPM>; + }; + + vreg_l11a_0p8: ldo11 { +@@ -235,6 +240,9 @@ + regulator-max-microvolt = <1200000>; + regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; + regulator-allow-set-load; ++ regulator-allowed-modes = ++ <RPMH_REGULATOR_MODE_LPM ++ RPMH_REGULATOR_MODE_HPM>; + }; + + vreg_l7c_1p8: ldo7 { +@@ -250,6 +258,9 @@ + regulator-max-microvolt = <1200000>; + regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; + regulator-allow-set-load; ++ regulator-allowed-modes = ++ <RPMH_REGULATOR_MODE_LPM ++ RPMH_REGULATOR_MODE_HPM>; + }; + + vreg_l10c_3p3: ldo10 { diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor-coachz.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor-coachz.dtsi index a758e4d226122..81098aa9687ba 100644 --- a/arch/arm64/boot/dts/qcom/sc7180-trogdor-coachz.dtsi @@ -13633,7 +17503,7 @@ index fd78f16181ddd..b795a9993cc1b 100644 }; diff --git a/arch/arm64/boot/dts/qcom/sdm630.dtsi b/arch/arm64/boot/dts/qcom/sdm630.dtsi -index 9c7f87e42fccd..952bb133914f4 100644 +index 9c7f87e42fccd..c2e1a0d9a2725 100644 --- a/arch/arm64/boot/dts/qcom/sdm630.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm630.dtsi @@ -8,6 +8,7 @@ @@ -13653,6 +17523,15 @@ index 9c7f87e42fccd..952bb133914f4 100644 compatible = "qcom,rpm-msg-ram"; reg = <0x00778000 0x7000>; }; +@@ -767,7 +768,7 @@ + pins = "gpio17", "gpio18", "gpio19"; + function = "gpio"; + drive-strength = <2>; +- bias-no-pull; ++ bias-disable; + }; + }; + @@ -1041,11 +1042,13 @@ nvmem-cells = <&gpu_speed_bin>; nvmem-cell-names = "speed_bin"; @@ -13690,6 +17569,44 @@ index bba1c2bce2131..0afe9eee025e1 100644 drive-strength = <14>; }; }; +diff --git a/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi b/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi +index dfd1b42c07fd5..3566db1d7357e 100644 +--- a/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi ++++ b/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi +@@ -1299,7 +1299,7 @@ ap_ts_i2c: &i2c14 { + config { + pins = "gpio126"; + function = "gpio"; +- bias-no-pull; ++ bias-disable; + drive-strength = <2>; + output-low; + }; +@@ -1309,7 +1309,7 @@ ap_ts_i2c: &i2c14 { + config { + pins = "gpio126"; + function = "gpio"; +- bias-no-pull; ++ bias-disable; + drive-strength = <2>; + output-high; + }; +diff --git a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts +index 2d5533dd4ec2d..146d3cd3f1b31 100644 +--- a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts ++++ b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts +@@ -1045,7 +1045,10 @@ + + /* PINCTRL - additions to nodes defined in sdm845.dtsi */ + &qup_spi2_default { +- drive-strength = <16>; ++ pinconf { ++ pins = "gpio27", "gpio28", "gpio29", "gpio30"; ++ drive-strength = <16>; ++ }; + }; + + &qup_uart3_default{ diff --git a/arch/arm64/boot/dts/qcom/sdm845-xiaomi-beryllium.dts b/arch/arm64/boot/dts/qcom/sdm845-xiaomi-beryllium.dts index c60c8c640e17f..736951fabb7a9 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-xiaomi-beryllium.dts @@ -13704,9 +17621,36 @@ index c60c8c640e17f..736951fabb7a9 100644 vddneg-supply = <&ibb>; diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi -index b3b9119261844..ea7a272d267a7 100644 +index b3b9119261844..ed293f635f145 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi +@@ -2064,7 +2064,7 @@ + + status = "disabled"; + +- pcie0_lane: lanes@1c06200 { ++ pcie0_lane: phy@1c06200 { + reg = <0 0x01c06200 0 0x128>, + <0 0x01c06400 0 0x1fc>, + <0 0x01c06800 0 0x218>, +@@ -2174,7 +2174,7 @@ + + status = "disabled"; + +- pcie1_lane: lanes@1c06200 { ++ pcie1_lane: phy@1c06200 { + reg = <0 0x01c0a800 0 0x800>, + <0 0x01c0a800 0 0x800>, + <0 0x01c0b800 0 0x400>; +@@ -2302,7 +2302,7 @@ + reset-names = "ufsphy"; + status = "disabled"; + +- ufs_mem_phy_lanes: lanes@1d87400 { ++ ufs_mem_phy_lanes: phy@1d87400 { + reg = <0 0x01d87400 0 0x108>, + <0 0x01d87600 0 0x1e0>, + <0 0x01d87c00 0 0x1dc>, @@ -2316,11 +2316,11 @@ compatible = "qcom,bam-v1.7.0"; reg = <0 0x01dc4000 0 0x24000>; @@ -13747,6 +17691,24 @@ index b3b9119261844..ea7a272d267a7 100644 #address-cells = <1>; #size-cells = <1>; +@@ -3699,7 +3699,7 @@ + <&gcc GCC_USB3_PHY_PRIM_BCR>; + reset-names = "phy", "common"; + +- usb_1_ssphy: lanes@88e9200 { ++ usb_1_ssphy: phy@88e9200 { + reg = <0 0x088e9200 0 0x128>, + <0 0x088e9400 0 0x200>, + <0 0x088e9c00 0 0x218>, +@@ -3732,7 +3732,7 @@ + <&gcc GCC_USB3_PHY_SEC_BCR>; + reset-names = "phy", "common"; + +- usb_2_ssphy: lane@88eb200 { ++ usb_2_ssphy: phy@88eb200 { + reg = <0 0x088eb200 0 0x128>, + <0 0x088eb400 0 0x1fc>, + <0 0x088eb800 0 0x218>, @@ -4147,7 +4147,7 @@ power-domains = <&dispcc MDSS_GDSC>; @@ -13757,10 +17719,23 @@ index b3b9119261844..ea7a272d267a7 100644 clock-names = "iface", "core"; diff --git a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts -index 2ba23aa582a18..617a634ac9051 100644 +index 2ba23aa582a18..834fb463f99ec 100644 --- a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts +++ b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts -@@ -518,6 +518,10 @@ +@@ -475,8 +475,10 @@ + }; + + &qup_i2c12_default { +- drive-strength = <2>; +- bias-disable; ++ pinmux { ++ drive-strength = <2>; ++ bias-disable; ++ }; + }; + + &qup_uart6_default { +@@ -518,6 +520,10 @@ dai@1 { reg = <1>; }; @@ -13771,7 +17746,7 @@ index 2ba23aa582a18..617a634ac9051 100644 }; &sound { -@@ -530,6 +534,7 @@ +@@ -530,6 +536,7 @@ "SpkrLeft IN", "SPK1 OUT", "SpkrRight IN", "SPK2 OUT", "MM_DL1", "MultiMedia1 Playback", @@ -13779,7 +17754,7 @@ index 2ba23aa582a18..617a634ac9051 100644 "MultiMedia2 Capture", "MM_UL2"; mm1-dai-link { -@@ -546,6 +551,13 @@ +@@ -546,6 +553,13 @@ }; }; @@ -13793,7 +17768,7 @@ index 2ba23aa582a18..617a634ac9051 100644 slim-dai-link { link-name = "SLIM Playback"; cpu { -@@ -575,6 +587,21 @@ +@@ -575,6 +589,21 @@ sound-dai = <&wcd9340 1>; }; }; @@ -13874,7 +17849,7 @@ index 58b6b2742d3f9..47f8e5397ebba 100644 &usb3 { diff --git a/arch/arm64/boot/dts/qcom/sm6125.dtsi b/arch/arm64/boot/dts/qcom/sm6125.dtsi -index 2b37ce6a9f9c5..f89af5e351127 100644 +index 2b37ce6a9f9c5..dc3bddc54eb62 100644 --- a/arch/arm64/boot/dts/qcom/sm6125.dtsi +++ b/arch/arm64/boot/dts/qcom/sm6125.dtsi @@ -336,23 +336,43 @@ @@ -13933,6 +17908,15 @@ index 2b37ce6a9f9c5..f89af5e351127 100644 compatible = "qcom,rpm-msg-ram"; reg = <0x045f0000 0x7000>; }; +@@ -388,7 +408,7 @@ + sdhc_1: sdhci@4744000 { + compatible = "qcom,sm6125-sdhci", "qcom,sdhci-msm-v5"; + reg = <0x04744000 0x1000>, <0x04745000 0x1000>; +- reg-names = "hc", "core"; ++ reg-names = "hc", "cqhci"; + + interrupts = <GIC_SPI 348 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 352 IRQ_TYPE_LEVEL_HIGH>; @@ -417,8 +437,8 @@ <&xo_board>; clock-names = "iface", "core", "xo"; @@ -13944,10 +17928,71 @@ index 2b37ce6a9f9c5..f89af5e351127 100644 pinctrl-names = "default", "sleep"; bus-width = <4>; +diff --git a/arch/arm64/boot/dts/qcom/sm8150-sony-xperia-kumano.dtsi b/arch/arm64/boot/dts/qcom/sm8150-sony-xperia-kumano.dtsi +index 014fe3a315489..fb6e5a140c9f6 100644 +--- a/arch/arm64/boot/dts/qcom/sm8150-sony-xperia-kumano.dtsi ++++ b/arch/arm64/boot/dts/qcom/sm8150-sony-xperia-kumano.dtsi +@@ -348,6 +348,9 @@ + regulator-max-microvolt = <2960000>; + regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; + regulator-allow-set-load; ++ regulator-allowed-modes = ++ <RPMH_REGULATOR_MODE_LPM ++ RPMH_REGULATOR_MODE_HPM>; + }; + + vreg_l7c_3p0: ldo7 { +@@ -367,6 +370,9 @@ + regulator-max-microvolt = <2960000>; + regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; + regulator-allow-set-load; ++ regulator-allowed-modes = ++ <RPMH_REGULATOR_MODE_LPM ++ RPMH_REGULATOR_MODE_HPM>; + }; + + vreg_l10c_3p3: ldo10 { diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi -index ef0232c2cf45b..f347f752d536d 100644 +index ef0232c2cf45b..292e40d6162dd 100644 --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi +@@ -1692,12 +1692,12 @@ + reset-names = "ufsphy"; + status = "disabled"; + +- ufs_mem_phy_lanes: lanes@1d87400 { +- reg = <0 0x01d87400 0 0x108>, +- <0 0x01d87600 0 0x1e0>, +- <0 0x01d87c00 0 0x1dc>, +- <0 0x01d87800 0 0x108>, +- <0 0x01d87a00 0 0x1e0>; ++ ufs_mem_phy_lanes: phy@1d87400 { ++ reg = <0 0x01d87400 0 0x16c>, ++ <0 0x01d87600 0 0x200>, ++ <0 0x01d87c00 0 0x200>, ++ <0 0x01d87800 0 0x16c>, ++ <0 0x01d87a00 0 0x200>; + #phy-cells = <0>; + }; + }; +@@ -3010,7 +3010,7 @@ + <&gcc GCC_USB3_PHY_PRIM_BCR>; + reset-names = "phy", "common"; + +- usb_1_ssphy: lanes@88e9200 { ++ usb_1_ssphy: phy@88e9200 { + reg = <0 0x088e9200 0 0x200>, + <0 0x088e9400 0 0x200>, + <0 0x088e9c00 0 0x218>, +@@ -3043,7 +3043,7 @@ + <&gcc GCC_USB3_PHY_SEC_BCR>; + reset-names = "phy", "common"; + +- usb_2_ssphy: lane@88eb200 { ++ usb_2_ssphy: phy@88eb200 { + reg = <0 0x088eb200 0 0x200>, + <0 0x088eb400 0 0x200>, + <0 0x088eb800 0 0x800>, @@ -3434,9 +3434,9 @@ qcom,tcs-offset = <0xd00>; qcom,drv-id = <2>; @@ -13961,8 +18006,41 @@ index ef0232c2cf45b..f347f752d536d 100644 rpmhcc: clock-controller { compatible = "qcom,sm8150-rpmh-clk"; +diff --git a/arch/arm64/boot/dts/qcom/sm8250-sony-xperia-edo.dtsi b/arch/arm64/boot/dts/qcom/sm8250-sony-xperia-edo.dtsi +index d63f7a9bc4e9a..effbd6a9c9891 100644 +--- a/arch/arm64/boot/dts/qcom/sm8250-sony-xperia-edo.dtsi ++++ b/arch/arm64/boot/dts/qcom/sm8250-sony-xperia-edo.dtsi +@@ -317,6 +317,9 @@ + regulator-max-microvolt = <2960000>; + regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; + regulator-allow-set-load; ++ regulator-allowed-modes = ++ <RPMH_REGULATOR_MODE_LPM ++ RPMH_REGULATOR_MODE_HPM>; + }; + + vreg_l7c_2p85: ldo7 { +@@ -339,6 +342,9 @@ + regulator-max-microvolt = <2960000>; + regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; + regulator-allow-set-load; ++ regulator-allowed-modes = ++ <RPMH_REGULATOR_MODE_LPM ++ RPMH_REGULATOR_MODE_HPM>; + }; + + vreg_l10c_3p3: ldo10 { +@@ -585,7 +591,7 @@ + pins = "gpio39"; + function = "gpio"; + drive-strength = <2>; +- bias-disabled; ++ bias-disable; + input-enable; + }; + diff --git a/arch/arm64/boot/dts/qcom/sm8250.dtsi b/arch/arm64/boot/dts/qcom/sm8250.dtsi -index d12e4cbfc8527..b710bca456489 100644 +index d12e4cbfc8527..4e3b772a8bded 100644 --- a/arch/arm64/boot/dts/qcom/sm8250.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8250.dtsi @@ -1434,8 +1434,8 @@ @@ -13976,6 +18054,15 @@ index d12e4cbfc8527..b710bca456489 100644 pinctrl-names = "default"; pinctrl-0 = <&pcie0_default_state>; +@@ -1463,7 +1463,7 @@ + + status = "disabled"; + +- pcie0_lane: lanes@1c06200 { ++ pcie0_lane: phy@1c06200 { + reg = <0 0x1c06200 0 0x170>, /* tx */ + <0 0x1c06400 0 0x200>, /* rx */ + <0 0x1c06800 0 0x1f0>, /* pcs */ @@ -1472,6 +1472,8 @@ clock-names = "pipe0"; @@ -14005,6 +18092,15 @@ index d12e4cbfc8527..b710bca456489 100644 pinctrl-names = "default"; pinctrl-0 = <&pcie1_default_state>; +@@ -1567,7 +1569,7 @@ + + status = "disabled"; + +- pcie1_lane: lanes@1c0e200 { ++ pcie1_lane: phy@1c0e200 { + reg = <0 0x1c0e200 0 0x170>, /* tx0 */ + <0 0x1c0e400 0 0x200>, /* rx0 */ + <0 0x1c0ea00 0 0x1f0>, /* pcs */ @@ -1578,6 +1580,8 @@ clock-names = "pipe0"; @@ -14034,6 +18130,15 @@ index d12e4cbfc8527..b710bca456489 100644 pinctrl-names = "default"; pinctrl-0 = <&pcie2_default_state>; +@@ -1673,7 +1677,7 @@ + + status = "disabled"; + +- pcie2_lane: lanes@1c16200 { ++ pcie2_lane: phy@1c16200 { + reg = <0 0x1c16200 0 0x170>, /* tx0 */ + <0 0x1c16400 0 0x200>, /* rx0 */ + <0 0x1c16a00 0 0x1f0>, /* pcs */ @@ -1684,6 +1688,8 @@ clock-names = "pipe0"; @@ -14043,8 +18148,107 @@ index d12e4cbfc8527..b710bca456489 100644 clock-output-names = "pcie_2_pipe_clk"; }; }; +@@ -1750,12 +1756,12 @@ + reset-names = "ufsphy"; + status = "disabled"; + +- ufs_mem_phy_lanes: lanes@1d87400 { +- reg = <0 0x01d87400 0 0x108>, +- <0 0x01d87600 0 0x1e0>, +- <0 0x01d87c00 0 0x1dc>, +- <0 0x01d87800 0 0x108>, +- <0 0x01d87a00 0 0x1e0>; ++ ufs_mem_phy_lanes: phy@1d87400 { ++ reg = <0 0x01d87400 0 0x16c>, ++ <0 0x01d87600 0 0x200>, ++ <0 0x01d87c00 0 0x200>, ++ <0 0x01d87800 0 0x16c>, ++ <0 0x01d87a00 0 0x200>; + #phy-cells = <0>; + }; + }; +@@ -1927,7 +1933,7 @@ + pins = "gpio7"; + function = "dmic1_data"; + drive-strength = <2>; +- pull-down; ++ bias-pull-down; + input-enable; + }; + }; +@@ -2300,15 +2306,11 @@ + dp_phy: dp-phy@88ea200 { + reg = <0 0x088ea200 0 0x200>, + <0 0x088ea400 0 0x200>, +- <0 0x088eac00 0 0x400>, ++ <0 0x088eaa00 0 0x200>, + <0 0x088ea600 0 0x200>, +- <0 0x088ea800 0 0x200>, +- <0 0x088eaa00 0 0x100>; ++ <0 0x088ea800 0 0x200>; + #phy-cells = <0>; + #clock-cells = <1>; +- clocks = <&gcc GCC_USB3_PRIM_PHY_PIPE_CLK>; +- clock-names = "pipe0"; +- clock-output-names = "usb3_phy_pipe_clk_src"; + }; + }; + +@@ -2330,7 +2332,7 @@ + <&gcc GCC_USB3_PHY_SEC_BCR>; + reset-names = "phy", "common"; + +- usb_2_ssphy: lanes@88eb200 { ++ usb_2_ssphy: phy@88eb200 { + reg = <0 0x088eb200 0 0x200>, + <0 0x088eb400 0 0x200>, + <0 0x088eb800 0 0x800>; +diff --git a/arch/arm64/boot/dts/qcom/sm8350-hdk.dts b/arch/arm64/boot/dts/qcom/sm8350-hdk.dts +index 56093e260ddfd..9ea0d7233add0 100644 +--- a/arch/arm64/boot/dts/qcom/sm8350-hdk.dts ++++ b/arch/arm64/boot/dts/qcom/sm8350-hdk.dts +@@ -108,6 +108,9 @@ + regulator-max-microvolt = <888000>; + regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; + regulator-allow-set-load; ++ regulator-allowed-modes = ++ <RPMH_REGULATOR_MODE_LPM ++ RPMH_REGULATOR_MODE_HPM>; + }; + + vreg_l6b_1p2: ldo6 { +@@ -116,6 +119,9 @@ + regulator-max-microvolt = <1208000>; + regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; + regulator-allow-set-load; ++ regulator-allowed-modes = ++ <RPMH_REGULATOR_MODE_LPM ++ RPMH_REGULATOR_MODE_HPM>; + }; + + vreg_l7b_2p96: ldo7 { +@@ -124,6 +130,9 @@ + regulator-max-microvolt = <2504000>; + regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; + regulator-allow-set-load; ++ regulator-allowed-modes = ++ <RPMH_REGULATOR_MODE_LPM ++ RPMH_REGULATOR_MODE_HPM>; + }; + + vreg_l9b_1p2: ldo9 { +@@ -132,6 +141,9 @@ + regulator-max-microvolt = <1200000>; + regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; + regulator-allow-set-load; ++ regulator-allowed-modes = ++ <RPMH_REGULATOR_MODE_LPM ++ RPMH_REGULATOR_MODE_HPM>; + }; + }; + diff --git a/arch/arm64/boot/dts/qcom/sm8350.dtsi b/arch/arm64/boot/dts/qcom/sm8350.dtsi -index e91cd8a5e5356..c0a3ea47302f4 100644 +index e91cd8a5e5356..1ef16975d13a1 100644 --- a/arch/arm64/boot/dts/qcom/sm8350.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8350.dtsi @@ -35,6 +35,24 @@ @@ -14141,6 +18345,25 @@ index e91cd8a5e5356..c0a3ea47302f4 100644 #address-cells = <2>; #size-cells = <2>; #clock-cells = <1>; +@@ -1081,12 +1123,12 @@ + reset-names = "ufsphy"; + status = "disabled"; + +- ufs_mem_phy_lanes: lanes@1d87400 { +- reg = <0 0x01d87400 0 0x108>, +- <0 0x01d87600 0 0x1e0>, +- <0 0x01d87c00 0 0x1dc>, +- <0 0x01d87800 0 0x108>, +- <0 0x01d87a00 0 0x1e0>; ++ ufs_mem_phy_lanes: phy@1d87400 { ++ reg = <0 0x01d87400 0 0x188>, ++ <0 0x01d87600 0 0x200>, ++ <0 0x01d87c00 0 0x200>, ++ <0 0x01d87800 0 0x188>, ++ <0 0x01d87a00 0 0x200>; + #phy-cells = <0>; + #clock-cells = <0>; + }; @@ -2185,7 +2227,7 @@ }; }; @@ -14548,9 +18771,18 @@ index 248ebb61aa790..5200d0bbd9e9c 100644 reg = <0x0 0xff240000 0x0 0x4000>; interrupts = <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts -index 665b2e69455dd..ea6820902ede0 100644 +index 665b2e69455dd..7ea48167747c6 100644 --- a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts +++ b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts +@@ -19,7 +19,7 @@ + stdout-path = "serial2:1500000n8"; + }; + +- ir_rx { ++ ir-receiver { + compatible = "gpio-ir-receiver"; + gpios = <&gpio0 RK_PC0 GPIO_ACTIVE_HIGH>; + pinctrl-names = "default"; @@ -97,7 +97,7 @@ regulator-max-microvolt = <3300000>; regulator-always-on; @@ -14560,6 +18792,26 @@ index 665b2e69455dd..ea6820902ede0 100644 }; vdd_core: vdd-core { +diff --git a/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts +index aa22a0c222655..5d5d9574088ca 100644 +--- a/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts ++++ b/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts +@@ -96,7 +96,6 @@ + linux,default-trigger = "heartbeat"; + gpios = <&rk805 1 GPIO_ACTIVE_LOW>; + default-state = "on"; +- mode = <0x23>; + }; + + user_led: led-1 { +@@ -104,7 +103,6 @@ + linux,default-trigger = "mmc1"; + gpios = <&rk805 0 GPIO_ACTIVE_LOW>; + default-state = "off"; +- mode = <0x05>; + }; + }; + }; diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index 8c821acb21ffb..3cbe83e6fb9a4 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -14732,9 +18984,18 @@ index 2b5f001ff4a61..9e5d07f5712e6 100644 cpu-supply = <&vdd_cpu_b>; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts -index 292bb7e80cf35..3ae5d727e3674 100644 +index 292bb7e80cf35..f07f4b8231f91 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts +@@ -207,7 +207,7 @@ + cap-sd-highspeed; + cd-gpios = <&gpio0 RK_PA7 GPIO_ACTIVE_LOW>; + disable-wp; +- max-frequency = <150000000>; ++ max-frequency = <40000000>; + pinctrl-names = "default"; + pinctrl-0 = <&sdmmc_clk &sdmmc_cmd &sdmmc_cd &sdmmc_bus4>; + vmmc-supply = <&vcc3v3_baseboard>; @@ -232,6 +232,7 @@ &usbdrd_dwc3_0 { @@ -14793,10 +19054,18 @@ index fb67db4619ea0..7b27079fd6116 100644 mmc-hs400-1_8v; mmc-hs400-enhanced-strobe; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi -index b28888ea9262e..100a769165ef9 100644 +index b28888ea9262e..a7ec81657503c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi -@@ -457,7 +457,7 @@ +@@ -446,7 +446,6 @@ + &i2s1 { + rockchip,playback-channels = <2>; + rockchip,capture-channels = <2>; +- status = "okay"; + }; + + &i2s2 { +@@ -457,7 +456,7 @@ status = "okay"; bt656-supply = <&vcc_3v0>; @@ -14940,7 +19209,7 @@ index e2b397c884018..8a76f4821b11b 100644 }; }; diff --git a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi -index ba4e5d3e1ed7a..82be00069bcd5 100644 +index ba4e5d3e1ed7a..4f232f575ab2a 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi @@ -35,7 +35,10 @@ @@ -14955,6 +19224,14 @@ index ba4e5d3e1ed7a..82be00069bcd5 100644 /* * vcpumntirq: * virtual CPU interface maintenance interrupt +@@ -117,7 +120,6 @@ + dmas = <&main_udmap 0xc000>, <&main_udmap 0x4000>, + <&main_udmap 0x4001>; + dma-names = "tx", "rx1", "rx2"; +- dma-coherent; + + rng: rng@4e10000 { + compatible = "inside-secure,safexcel-eip76"; diff --git a/arch/arm64/boot/dts/ti/k3-am65.dtsi b/arch/arm64/boot/dts/ti/k3-am65.dtsi index a9fc1af03f27f..1607db9b32dd2 100644 --- a/arch/arm64/boot/dts/ti/k3-am65.dtsi @@ -15090,7 +19367,7 @@ index b7005b8031495..afe99f3920ccd 100644 <0x00 0x18000000 0x00 0x18000000 0x00 0x08000000>, /* PCIe1 DAT0 */ <0x41 0x00000000 0x41 0x00000000 0x01 0x00000000>, /* PCIe1 DAT1 */ diff --git a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi -index cf3482376c1e6..6c81997ee28ad 100644 +index cf3482376c1e6..ad21bb1417aa6 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi @@ -42,7 +42,7 @@ @@ -15114,7 +19391,15 @@ index cf3482376c1e6..6c81997ee28ad 100644 /* vcpumntirq: virtual CPU interface maintenance interrupt */ interrupts = <GIC_PPI 9 IRQ_TYPE_LEVEL_HIGH>; -@@ -610,7 +613,7 @@ +@@ -333,7 +336,6 @@ + dmas = <&main_udmap 0xc000>, <&main_udmap 0x4000>, + <&main_udmap 0x4001>; + dma-names = "tx", "rx1", "rx2"; +- dma-coherent; + + rng: rng@4e10000 { + compatible = "inside-secure,safexcel-eip76"; +@@ -610,7 +612,7 @@ clock-names = "fck"; #address-cells = <3>; #size-cells = <2>; @@ -15123,7 +19408,7 @@ index cf3482376c1e6..6c81997ee28ad 100644 vendor-id = <0x104c>; device-id = <0xb00d>; msi-map = <0x0 &gic_its 0x0 0x10000>; -@@ -636,7 +639,7 @@ +@@ -636,7 +638,7 @@ clocks = <&k3_clks 239 1>; clock-names = "fck"; max-functions = /bits/ 8 <6>; @@ -15132,7 +19417,7 @@ index cf3482376c1e6..6c81997ee28ad 100644 dma-coherent; }; -@@ -658,7 +661,7 @@ +@@ -658,7 +660,7 @@ clock-names = "fck"; #address-cells = <3>; #size-cells = <2>; @@ -15141,7 +19426,7 @@ index cf3482376c1e6..6c81997ee28ad 100644 vendor-id = <0x104c>; device-id = <0xb00d>; msi-map = <0x0 &gic_its 0x10000 0x10000>; -@@ -684,7 +687,7 @@ +@@ -684,7 +686,7 @@ clocks = <&k3_clks 240 1>; clock-names = "fck"; max-functions = /bits/ 8 <6>; @@ -15150,7 +19435,7 @@ index cf3482376c1e6..6c81997ee28ad 100644 dma-coherent; }; -@@ -706,7 +709,7 @@ +@@ -706,7 +708,7 @@ clock-names = "fck"; #address-cells = <3>; #size-cells = <2>; @@ -15159,7 +19444,7 @@ index cf3482376c1e6..6c81997ee28ad 100644 vendor-id = <0x104c>; device-id = <0xb00d>; msi-map = <0x0 &gic_its 0x20000 0x10000>; -@@ -732,7 +735,7 @@ +@@ -732,7 +734,7 @@ clocks = <&k3_clks 241 1>; clock-names = "fck"; max-functions = /bits/ 8 <6>; @@ -15168,7 +19453,7 @@ index cf3482376c1e6..6c81997ee28ad 100644 dma-coherent; }; -@@ -754,7 +757,7 @@ +@@ -754,7 +756,7 @@ clock-names = "fck"; #address-cells = <3>; #size-cells = <2>; @@ -15177,7 +19462,7 @@ index cf3482376c1e6..6c81997ee28ad 100644 vendor-id = <0x104c>; device-id = <0xb00d>; msi-map = <0x0 &gic_its 0x30000 0x10000>; -@@ -780,7 +783,7 @@ +@@ -780,7 +782,7 @@ clocks = <&k3_clks 242 1>; clock-names = "fck"; max-functions = /bits/ 8 <6>; @@ -15393,6 +19678,285 @@ index bfa58409a4d4d..448a575db8e8e 100644 +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + .endm #endif /* __ASM_ASSEMBLER_H */ +diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h +index 13869b76b58cd..abd302e521c06 100644 +--- a/arch/arm64/include/asm/atomic_ll_sc.h ++++ b/arch/arm64/include/asm/atomic_ll_sc.h +@@ -12,19 +12,6 @@ + + #include <linux/stringify.h> + +-#ifdef CONFIG_ARM64_LSE_ATOMICS +-#define __LL_SC_FALLBACK(asm_ops) \ +-" b 3f\n" \ +-" .subsection 1\n" \ +-"3:\n" \ +-asm_ops "\n" \ +-" b 4f\n" \ +-" .previous\n" \ +-"4:\n" +-#else +-#define __LL_SC_FALLBACK(asm_ops) asm_ops +-#endif +- + #ifndef CONFIG_CC_HAS_K_CONSTRAINT + #define K + #endif +@@ -43,12 +30,11 @@ __ll_sc_atomic_##op(int i, atomic_t *v) \ + int result; \ + \ + asm volatile("// atomic_" #op "\n" \ +- __LL_SC_FALLBACK( \ +-" prfm pstl1strm, %2\n" \ +-"1: ldxr %w0, %2\n" \ +-" " #asm_op " %w0, %w0, %w3\n" \ +-" stxr %w1, %w0, %2\n" \ +-" cbnz %w1, 1b\n") \ ++ " prfm pstl1strm, %2\n" \ ++ "1: ldxr %w0, %2\n" \ ++ " " #asm_op " %w0, %w0, %w3\n" \ ++ " stxr %w1, %w0, %2\n" \ ++ " cbnz %w1, 1b\n" \ + : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ + : __stringify(constraint) "r" (i)); \ + } +@@ -61,13 +47,12 @@ __ll_sc_atomic_##op##_return##name(int i, atomic_t *v) \ + int result; \ + \ + asm volatile("// atomic_" #op "_return" #name "\n" \ +- __LL_SC_FALLBACK( \ +-" prfm pstl1strm, %2\n" \ +-"1: ld" #acq "xr %w0, %2\n" \ +-" " #asm_op " %w0, %w0, %w3\n" \ +-" st" #rel "xr %w1, %w0, %2\n" \ +-" cbnz %w1, 1b\n" \ +-" " #mb ) \ ++ " prfm pstl1strm, %2\n" \ ++ "1: ld" #acq "xr %w0, %2\n" \ ++ " " #asm_op " %w0, %w0, %w3\n" \ ++ " st" #rel "xr %w1, %w0, %2\n" \ ++ " cbnz %w1, 1b\n" \ ++ " " #mb \ + : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ + : __stringify(constraint) "r" (i) \ + : cl); \ +@@ -83,13 +68,12 @@ __ll_sc_atomic_fetch_##op##name(int i, atomic_t *v) \ + int val, result; \ + \ + asm volatile("// atomic_fetch_" #op #name "\n" \ +- __LL_SC_FALLBACK( \ +-" prfm pstl1strm, %3\n" \ +-"1: ld" #acq "xr %w0, %3\n" \ +-" " #asm_op " %w1, %w0, %w4\n" \ +-" st" #rel "xr %w2, %w1, %3\n" \ +-" cbnz %w2, 1b\n" \ +-" " #mb ) \ ++ " prfm pstl1strm, %3\n" \ ++ "1: ld" #acq "xr %w0, %3\n" \ ++ " " #asm_op " %w1, %w0, %w4\n" \ ++ " st" #rel "xr %w2, %w1, %3\n" \ ++ " cbnz %w2, 1b\n" \ ++ " " #mb \ + : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \ + : __stringify(constraint) "r" (i) \ + : cl); \ +@@ -142,12 +126,11 @@ __ll_sc_atomic64_##op(s64 i, atomic64_t *v) \ + unsigned long tmp; \ + \ + asm volatile("// atomic64_" #op "\n" \ +- __LL_SC_FALLBACK( \ +-" prfm pstl1strm, %2\n" \ +-"1: ldxr %0, %2\n" \ +-" " #asm_op " %0, %0, %3\n" \ +-" stxr %w1, %0, %2\n" \ +-" cbnz %w1, 1b") \ ++ " prfm pstl1strm, %2\n" \ ++ "1: ldxr %0, %2\n" \ ++ " " #asm_op " %0, %0, %3\n" \ ++ " stxr %w1, %0, %2\n" \ ++ " cbnz %w1, 1b" \ + : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ + : __stringify(constraint) "r" (i)); \ + } +@@ -160,13 +143,12 @@ __ll_sc_atomic64_##op##_return##name(s64 i, atomic64_t *v) \ + unsigned long tmp; \ + \ + asm volatile("// atomic64_" #op "_return" #name "\n" \ +- __LL_SC_FALLBACK( \ +-" prfm pstl1strm, %2\n" \ +-"1: ld" #acq "xr %0, %2\n" \ +-" " #asm_op " %0, %0, %3\n" \ +-" st" #rel "xr %w1, %0, %2\n" \ +-" cbnz %w1, 1b\n" \ +-" " #mb ) \ ++ " prfm pstl1strm, %2\n" \ ++ "1: ld" #acq "xr %0, %2\n" \ ++ " " #asm_op " %0, %0, %3\n" \ ++ " st" #rel "xr %w1, %0, %2\n" \ ++ " cbnz %w1, 1b\n" \ ++ " " #mb \ + : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ + : __stringify(constraint) "r" (i) \ + : cl); \ +@@ -176,19 +158,18 @@ __ll_sc_atomic64_##op##_return##name(s64 i, atomic64_t *v) \ + + #define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op, constraint)\ + static inline long \ +-__ll_sc_atomic64_fetch_##op##name(s64 i, atomic64_t *v) \ ++__ll_sc_atomic64_fetch_##op##name(s64 i, atomic64_t *v) \ + { \ + s64 result, val; \ + unsigned long tmp; \ + \ + asm volatile("// atomic64_fetch_" #op #name "\n" \ +- __LL_SC_FALLBACK( \ +-" prfm pstl1strm, %3\n" \ +-"1: ld" #acq "xr %0, %3\n" \ +-" " #asm_op " %1, %0, %4\n" \ +-" st" #rel "xr %w2, %1, %3\n" \ +-" cbnz %w2, 1b\n" \ +-" " #mb ) \ ++ " prfm pstl1strm, %3\n" \ ++ "1: ld" #acq "xr %0, %3\n" \ ++ " " #asm_op " %1, %0, %4\n" \ ++ " st" #rel "xr %w2, %1, %3\n" \ ++ " cbnz %w2, 1b\n" \ ++ " " #mb \ + : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \ + : __stringify(constraint) "r" (i) \ + : cl); \ +@@ -240,15 +221,14 @@ __ll_sc_atomic64_dec_if_positive(atomic64_t *v) + unsigned long tmp; + + asm volatile("// atomic64_dec_if_positive\n" +- __LL_SC_FALLBACK( +-" prfm pstl1strm, %2\n" +-"1: ldxr %0, %2\n" +-" subs %0, %0, #1\n" +-" b.lt 2f\n" +-" stlxr %w1, %0, %2\n" +-" cbnz %w1, 1b\n" +-" dmb ish\n" +-"2:") ++ " prfm pstl1strm, %2\n" ++ "1: ldxr %0, %2\n" ++ " subs %0, %0, #1\n" ++ " b.lt 2f\n" ++ " stlxr %w1, %0, %2\n" ++ " cbnz %w1, 1b\n" ++ " dmb ish\n" ++ "2:" + : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) + : + : "cc", "memory"); +@@ -274,7 +254,6 @@ __ll_sc__cmpxchg_case_##name##sz(volatile void *ptr, \ + old = (u##sz)old; \ + \ + asm volatile( \ +- __LL_SC_FALLBACK( \ + " prfm pstl1strm, %[v]\n" \ + "1: ld" #acq "xr" #sfx "\t%" #w "[oldval], %[v]\n" \ + " eor %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n" \ +@@ -282,7 +261,7 @@ __ll_sc__cmpxchg_case_##name##sz(volatile void *ptr, \ + " st" #rel "xr" #sfx "\t%w[tmp], %" #w "[new], %[v]\n" \ + " cbnz %w[tmp], 1b\n" \ + " " #mb "\n" \ +- "2:") \ ++ "2:" \ + : [tmp] "=&r" (tmp), [oldval] "=&r" (oldval), \ + [v] "+Q" (*(u##sz *)ptr) \ + : [old] __stringify(constraint) "r" (old), [new] "r" (new) \ +@@ -326,7 +305,6 @@ __ll_sc__cmpxchg_double##name(unsigned long old1, \ + unsigned long tmp, ret; \ + \ + asm volatile("// __cmpxchg_double" #name "\n" \ +- __LL_SC_FALLBACK( \ + " prfm pstl1strm, %2\n" \ + "1: ldxp %0, %1, %2\n" \ + " eor %0, %0, %3\n" \ +@@ -336,8 +314,8 @@ __ll_sc__cmpxchg_double##name(unsigned long old1, \ + " st" #rel "xp %w0, %5, %6, %2\n" \ + " cbnz %w0, 1b\n" \ + " " #mb "\n" \ +- "2:") \ +- : "=&r" (tmp), "=&r" (ret), "+Q" (*(unsigned long *)ptr) \ ++ "2:" \ ++ : "=&r" (tmp), "=&r" (ret), "+Q" (*(__uint128_t *)ptr) \ + : "r" (old1), "r" (old2), "r" (new1), "r" (new2) \ + : cl); \ + \ +diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h +index da3280f639cd7..28e96118c1e5a 100644 +--- a/arch/arm64/include/asm/atomic_lse.h ++++ b/arch/arm64/include/asm/atomic_lse.h +@@ -11,11 +11,11 @@ + #define __ASM_ATOMIC_LSE_H + + #define ATOMIC_OP(op, asm_op) \ +-static inline void __lse_atomic_##op(int i, atomic_t *v) \ ++static inline void __lse_atomic_##op(int i, atomic_t *v) \ + { \ + asm volatile( \ + __LSE_PREAMBLE \ +-" " #asm_op " %w[i], %[v]\n" \ ++ " " #asm_op " %w[i], %[v]\n" \ + : [i] "+r" (i), [v] "+Q" (v->counter) \ + : "r" (v)); \ + } +@@ -32,7 +32,7 @@ static inline int __lse_atomic_fetch_##op##name(int i, atomic_t *v) \ + { \ + asm volatile( \ + __LSE_PREAMBLE \ +-" " #asm_op #mb " %w[i], %w[i], %[v]" \ ++ " " #asm_op #mb " %w[i], %w[i], %[v]" \ + : [i] "+r" (i), [v] "+Q" (v->counter) \ + : "r" (v) \ + : cl); \ +@@ -130,7 +130,7 @@ static inline int __lse_atomic_sub_return##name(int i, atomic_t *v) \ + " add %w[i], %w[i], %w[tmp]" \ + : [i] "+&r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp) \ + : "r" (v) \ +- : cl); \ ++ : cl); \ + \ + return i; \ + } +@@ -168,7 +168,7 @@ static inline void __lse_atomic64_##op(s64 i, atomic64_t *v) \ + { \ + asm volatile( \ + __LSE_PREAMBLE \ +-" " #asm_op " %[i], %[v]\n" \ ++ " " #asm_op " %[i], %[v]\n" \ + : [i] "+r" (i), [v] "+Q" (v->counter) \ + : "r" (v)); \ + } +@@ -185,7 +185,7 @@ static inline long __lse_atomic64_fetch_##op##name(s64 i, atomic64_t *v)\ + { \ + asm volatile( \ + __LSE_PREAMBLE \ +-" " #asm_op #mb " %[i], %[i], %[v]" \ ++ " " #asm_op #mb " %[i], %[i], %[v]" \ + : [i] "+r" (i), [v] "+Q" (v->counter) \ + : "r" (v) \ + : cl); \ +@@ -272,7 +272,7 @@ static inline void __lse_atomic64_sub(s64 i, atomic64_t *v) + } + + #define ATOMIC64_OP_SUB_RETURN(name, mb, cl...) \ +-static inline long __lse_atomic64_sub_return##name(s64 i, atomic64_t *v) \ ++static inline long __lse_atomic64_sub_return##name(s64 i, atomic64_t *v)\ + { \ + unsigned long tmp; \ + \ +@@ -403,7 +403,7 @@ __lse__cmpxchg_double##name(unsigned long old1, \ + " eor %[old2], %[old2], %[oldval2]\n" \ + " orr %[old1], %[old1], %[old2]" \ + : [old1] "+&r" (x0), [old2] "+&r" (x1), \ +- [v] "+Q" (*(unsigned long *)ptr) \ ++ [v] "+Q" (*(__uint128_t *)ptr) \ + : [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \ + [oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \ + : cl); \ diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index 0f6d16faa5402..a58e366f0b074 100644 --- a/arch/arm64/include/asm/cpu.h @@ -15446,10 +20010,27 @@ index ef6be92b1921a..a77b5f49b3a6c 100644 DECLARE_STATIC_KEY_FALSE(arm64_mismatched_32bit_el0); diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h -index 6231e1f0abe7e..39f5c1672f480 100644 +index 6231e1f0abe7e..9cf5d9551e991 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h -@@ -73,6 +73,15 @@ +@@ -41,7 +41,7 @@ + (((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT) + + #define MIDR_CPU_MODEL(imp, partnum) \ +- (((imp) << MIDR_IMPLEMENTOR_SHIFT) | \ ++ ((_AT(u32, imp) << MIDR_IMPLEMENTOR_SHIFT) | \ + (0xf << MIDR_ARCHITECTURE_SHIFT) | \ + ((partnum) << MIDR_PARTNUM_SHIFT)) + +@@ -60,6 +60,7 @@ + #define ARM_CPU_IMP_FUJITSU 0x46 + #define ARM_CPU_IMP_HISI 0x48 + #define ARM_CPU_IMP_APPLE 0x61 ++#define ARM_CPU_IMP_AMPERE 0xC0 + + #define ARM_CPU_PART_AEM_V8 0xD0F + #define ARM_CPU_PART_FOUNDATION 0xD00 +@@ -73,6 +74,15 @@ #define ARM_CPU_PART_CORTEX_A76 0xD0B #define ARM_CPU_PART_NEOVERSE_N1 0xD0C #define ARM_CPU_PART_CORTEX_A77 0xD0D @@ -15465,7 +20046,16 @@ index 6231e1f0abe7e..39f5c1672f480 100644 #define APM_CPU_PART_POTENZA 0x000 -@@ -113,6 +122,15 @@ +@@ -103,6 +113,8 @@ + #define APPLE_CPU_PART_M1_ICESTORM 0x022 + #define APPLE_CPU_PART_M1_FIRESTORM 0x023 + ++#define AMPERE_CPU_PART_AMPERE1 0xAC3 ++ + #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) + #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) + #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) +@@ -113,6 +125,15 @@ #define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) #define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1) #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) @@ -15481,8 +20071,38 @@ index 6231e1f0abe7e..39f5c1672f480 100644 #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) +@@ -133,6 +154,7 @@ + #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110) + #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM) + #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM) ++#define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1) + + /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ + #define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX +diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h +index 657c921fd784a..e1e10a24519b2 100644 +--- a/arch/arm64/include/asm/debug-monitors.h ++++ b/arch/arm64/include/asm/debug-monitors.h +@@ -76,7 +76,7 @@ struct task_struct; + + struct step_hook { + struct list_head node; +- int (*fn)(struct pt_regs *regs, unsigned int esr); ++ int (*fn)(struct pt_regs *regs, unsigned long esr); + }; + + void register_user_step_hook(struct step_hook *hook); +@@ -87,7 +87,7 @@ void unregister_kernel_step_hook(struct step_hook *hook); + + struct break_hook { + struct list_head node; +- int (*fn)(struct pt_regs *regs, unsigned int esr); ++ int (*fn)(struct pt_regs *regs, unsigned long esr); + u16 imm; + u16 mask; /* These bits are ignored when comparing with imm */ + }; diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h -index d3e1825337be3..ad55079abe476 100644 +index d3e1825337be3..c5d4551a1be71 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -14,7 +14,6 @@ @@ -15493,6 +20113,27 @@ index d3e1825337be3..ad55079abe476 100644 #else #define efi_init() #endif +@@ -26,6 +25,7 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); + ({ \ + efi_virtmap_load(); \ + __efi_fpsimd_begin(); \ ++ spin_lock(&efi_rt_lock); \ + }) + + #define arch_efi_call_virt(p, f, args...) \ +@@ -37,10 +37,12 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); + + #define arch_efi_call_virt_teardown() \ + ({ \ ++ spin_unlock(&efi_rt_lock); \ + __efi_fpsimd_end(); \ + efi_virtmap_unload(); \ + }) + ++extern spinlock_t efi_rt_lock; + efi_status_t __efi_rt_asm_wrapper(void *, const char *, ...); + + #define ARCH_EFI_IRQ_FLAGS_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 3198acb2aad8c..7f3c87f7a0cec 100644 --- a/arch/arm64/include/asm/el2_setup.h @@ -15507,7 +20148,7 @@ index 3198acb2aad8c..7f3c87f7a0cec 100644 .Lskip_gicv3_\@: .endm diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h -index 29f97eb3dad41..8f59bbeba7a7e 100644 +index 29f97eb3dad41..9f91c8906edd9 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -68,6 +68,7 @@ @@ -15518,6 +20159,76 @@ index 29f97eb3dad41..8f59bbeba7a7e 100644 #define ESR_ELx_EC_MASK (UL(0x3F) << ESR_ELx_EC_SHIFT) #define ESR_ELx_EC(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT) +@@ -323,14 +324,14 @@ + #ifndef __ASSEMBLY__ + #include <asm/types.h> + +-static inline bool esr_is_data_abort(u32 esr) ++static inline bool esr_is_data_abort(unsigned long esr) + { +- const u32 ec = ESR_ELx_EC(esr); ++ const unsigned long ec = ESR_ELx_EC(esr); + + return ec == ESR_ELx_EC_DABT_LOW || ec == ESR_ELx_EC_DABT_CUR; + } + +-const char *esr_get_class_string(u32 esr); ++const char *esr_get_class_string(unsigned long esr); + #endif /* __ASSEMBLY */ + + #endif /* __ASM_ESR_H */ +diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h +index 339477dca5513..0e6535aa78c2f 100644 +--- a/arch/arm64/include/asm/exception.h ++++ b/arch/arm64/include/asm/exception.h +@@ -19,9 +19,9 @@ + #define __exception_irq_entry __kprobes + #endif + +-static inline u32 disr_to_esr(u64 disr) ++static inline unsigned long disr_to_esr(u64 disr) + { +- unsigned int esr = ESR_ELx_EC_SERROR << ESR_ELx_EC_SHIFT; ++ unsigned long esr = ESR_ELx_EC_SERROR << ESR_ELx_EC_SHIFT; + + if ((disr & DISR_EL1_IDS) == 0) + esr |= (disr & DISR_EL1_ESR_MASK); +@@ -57,23 +57,23 @@ asmlinkage void call_on_irq_stack(struct pt_regs *regs, + void (*func)(struct pt_regs *)); + asmlinkage void asm_exit_to_user_mode(struct pt_regs *regs); + +-void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs); ++void do_mem_abort(unsigned long far, unsigned long esr, struct pt_regs *regs); + void do_undefinstr(struct pt_regs *regs); + void do_bti(struct pt_regs *regs); +-void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr, ++void do_debug_exception(unsigned long addr_if_watchpoint, unsigned long esr, + struct pt_regs *regs); +-void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs); +-void do_sve_acc(unsigned int esr, struct pt_regs *regs); +-void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs); +-void do_sysinstr(unsigned int esr, struct pt_regs *regs); +-void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); +-void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr); +-void do_cp15instr(unsigned int esr, struct pt_regs *regs); ++void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs); ++void do_sve_acc(unsigned long esr, struct pt_regs *regs); ++void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs); ++void do_sysinstr(unsigned long esr, struct pt_regs *regs); ++void do_sp_pc_abort(unsigned long addr, unsigned long esr, struct pt_regs *regs); ++void bad_el0_sync(struct pt_regs *regs, int reason, unsigned long esr); ++void do_cp15instr(unsigned long esr, struct pt_regs *regs); + void do_el0_svc(struct pt_regs *regs); + void do_el0_svc_compat(struct pt_regs *regs); +-void do_ptrauth_fault(struct pt_regs *regs, unsigned int esr); +-void do_serror(struct pt_regs *regs, unsigned int esr); ++void do_ptrauth_fault(struct pt_regs *regs, unsigned long esr); ++void do_serror(struct pt_regs *regs, unsigned long esr); + void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags); + +-void panic_bad_stack(struct pt_regs *regs, unsigned int esr, unsigned long far); ++void panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigned long far); + #endif /* __ASM_EXCEPTION_H */ diff --git a/arch/arm64/include/asm/extable.h b/arch/arm64/include/asm/extable.h index b15eb4a3e6b20..840a35ed92ec8 100644 --- a/arch/arm64/include/asm/extable.h @@ -15632,6 +20343,39 @@ index 327120c0089fe..f67a561e0935e 100644 #define CPTR_EL2_TAM (1 << 30) #define CPTR_EL2_TTA (1 << 20) #define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT) +diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h +index fd418955e31e6..64f8a90d33277 100644 +--- a/arch/arm64/include/asm/kvm_emulate.h ++++ b/arch/arm64/include/asm/kvm_emulate.h +@@ -366,8 +366,26 @@ static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) + + static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu) + { +- if (kvm_vcpu_abt_iss1tw(vcpu)) +- return true; ++ if (kvm_vcpu_abt_iss1tw(vcpu)) { ++ /* ++ * Only a permission fault on a S1PTW should be ++ * considered as a write. Otherwise, page tables baked ++ * in a read-only memslot will result in an exception ++ * being delivered in the guest. ++ * ++ * The drawback is that we end-up faulting twice if the ++ * guest is using any of HW AF/DB: a translation fault ++ * to map the page containing the PT (read only at ++ * first), then a permission fault to allow the flags ++ * to be set. ++ */ ++ switch (kvm_vcpu_trap_get_fault_type(vcpu)) { ++ case ESR_ELx_FSC_PERM: ++ return true; ++ default: ++ return false; ++ } ++ } + + if (kvm_vcpu_trap_is_iabt(vcpu)) + return false; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f8be56d5342ba..1713630bf8f5a 100644 --- a/arch/arm64/include/asm/kvm_host.h @@ -15873,7 +20617,7 @@ index dfa76afa0ccff..ed57717cd0040 100644 #endif /* __ASM_PGTABLE_H */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h -index ee2bdc1b9f5bb..d9bf3d12a2b85 100644 +index ee2bdc1b9f5bb..7364530de0a77 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -204,8 +204,9 @@ void tls_preserve_current_state(void); @@ -15887,6 +20631,22 @@ index ee2bdc1b9f5bb..d9bf3d12a2b85 100644 regs->pc = pc; if (system_uses_irq_prio_masking()) +@@ -239,13 +240,13 @@ static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc, + } + #endif + +-static inline bool is_ttbr0_addr(unsigned long addr) ++static __always_inline bool is_ttbr0_addr(unsigned long addr) + { + /* entry assembly clears tags for TTBR0 addrs */ + return addr < TASK_SIZE; + } + +-static inline bool is_ttbr1_addr(unsigned long addr) ++static __always_inline bool is_ttbr1_addr(unsigned long addr) + { + /* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */ + return arch_kasan_reset_tag(addr) >= PAGE_OFFSET; @@ -335,12 +336,10 @@ long get_tagged_addr_ctrl(struct task_struct *task); * of header definitions for the use of task_stack_page. */ @@ -15964,6 +20724,19 @@ index f62ca39da6c5a..aa3d3607d5c8d 100644 +void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused); #endif /* __ASSEMBLY__ */ #endif /* __ASM_SPECTRE_H */ +diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h +index b383b4802a7bd..d30217c21eff7 100644 +--- a/arch/arm64/include/asm/syscall_wrapper.h ++++ b/arch/arm64/include/asm/syscall_wrapper.h +@@ -8,7 +8,7 @@ + #ifndef __ASM_SYSCALL_WRAPPER_H + #define __ASM_SYSCALL_WRAPPER_H + +-struct pt_regs; ++#include <asm/ptrace.h> + + #define SC_ARM64_REGS_TO_ARGS(x, ...) \ + __MAP(x,__SC_ARGS \ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index b268082d67edd..f79f3720e4cbe 100644 --- a/arch/arm64/include/asm/sysreg.h @@ -16018,6 +20791,65 @@ index b268082d67edd..f79f3720e4cbe 100644 /* GCR_EL1 Definitions */ #define SYS_GCR_EL1_RRND (BIT(16)) #define SYS_GCR_EL1_EXCL_MASK 0xffffUL +diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h +index 305a7157c6a6a..0eb7709422e29 100644 +--- a/arch/arm64/include/asm/system_misc.h ++++ b/arch/arm64/include/asm/system_misc.h +@@ -23,9 +23,9 @@ void die(const char *msg, struct pt_regs *regs, int err); + struct siginfo; + void arm64_notify_die(const char *str, struct pt_regs *regs, + int signo, int sicode, unsigned long far, +- int err); ++ unsigned long err); + +-void hook_debug_fault_code(int nr, int (*fn)(unsigned long, unsigned int, ++void hook_debug_fault_code(int nr, int (*fn)(unsigned long, unsigned long, + struct pt_regs *), + int sig, int code, const char *name); + +diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h +index 54f32a0675dff..6e5826470bea6 100644 +--- a/arch/arm64/include/asm/traps.h ++++ b/arch/arm64/include/asm/traps.h +@@ -24,7 +24,7 @@ struct undef_hook { + + void register_undef_hook(struct undef_hook *hook); + void unregister_undef_hook(struct undef_hook *hook); +-void force_signal_inject(int signal, int code, unsigned long address, unsigned int err); ++void force_signal_inject(int signal, int code, unsigned long address, unsigned long err); + void arm64_notify_segfault(unsigned long addr); + void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *str); + void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str); +@@ -57,7 +57,7 @@ static inline int in_entry_text(unsigned long ptr) + * errors share the same encoding as an all-zeros encoding from a CPU that + * doesn't support RAS. + */ +-static inline bool arm64_is_ras_serror(u32 esr) ++static inline bool arm64_is_ras_serror(unsigned long esr) + { + WARN_ON(preemptible()); + +@@ -77,9 +77,9 @@ static inline bool arm64_is_ras_serror(u32 esr) + * We treat them as Uncontainable. + * Non-RAS SError's are reported as Uncontained/Uncategorized. + */ +-static inline u32 arm64_ras_serror_get_severity(u32 esr) ++static inline unsigned long arm64_ras_serror_get_severity(unsigned long esr) + { +- u32 aet = esr & ESR_ELx_AET; ++ unsigned long aet = esr & ESR_ELx_AET; + + if (!arm64_is_ras_serror(esr)) { + /* Not a RAS error, we can't interpret the ESR. */ +@@ -98,6 +98,6 @@ static inline u32 arm64_ras_serror_get_severity(u32 esr) + return aet; + } + +-bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr); +-void __noreturn arm64_serror_panic(struct pt_regs *regs, u32 esr); ++bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned long esr); ++void __noreturn arm64_serror_panic(struct pt_regs *regs, unsigned long esr); + #endif diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 190b494e22ab9..0fd6056ba412b 100644 --- a/arch/arm64/include/asm/uaccess.h @@ -16683,8 +21515,225 @@ index 87731fea5e418..591c18a889a56 100644 info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1); info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1); info->reg_id_aa64mmfr2 = read_cpuid(ID_AA64MMFR2_EL1); +diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c +index 4f3661eeb7ec6..bf9fe71589bca 100644 +--- a/arch/arm64/kernel/debug-monitors.c ++++ b/arch/arm64/kernel/debug-monitors.c +@@ -202,7 +202,7 @@ void unregister_kernel_step_hook(struct step_hook *hook) + * So we call all the registered handlers, until the right handler is + * found which returns zero. + */ +-static int call_step_hook(struct pt_regs *regs, unsigned int esr) ++static int call_step_hook(struct pt_regs *regs, unsigned long esr) + { + struct step_hook *hook; + struct list_head *list; +@@ -238,7 +238,7 @@ static void send_user_sigtrap(int si_code) + "User debug trap"); + } + +-static int single_step_handler(unsigned long unused, unsigned int esr, ++static int single_step_handler(unsigned long unused, unsigned long esr, + struct pt_regs *regs) + { + bool handler_found = false; +@@ -299,11 +299,11 @@ void unregister_kernel_break_hook(struct break_hook *hook) + unregister_debug_hook(&hook->node); + } + +-static int call_break_hook(struct pt_regs *regs, unsigned int esr) ++static int call_break_hook(struct pt_regs *regs, unsigned long esr) + { + struct break_hook *hook; + struct list_head *list; +- int (*fn)(struct pt_regs *regs, unsigned int esr) = NULL; ++ int (*fn)(struct pt_regs *regs, unsigned long esr) = NULL; + + list = user_mode(regs) ? &user_break_hook : &kernel_break_hook; + +@@ -312,7 +312,7 @@ static int call_break_hook(struct pt_regs *regs, unsigned int esr) + * entirely not preemptible, and we can use rcu list safely here. + */ + list_for_each_entry_rcu(hook, list, node) { +- unsigned int comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK; ++ unsigned long comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK; + + if ((comment & ~hook->mask) == hook->imm) + fn = hook->fn; +@@ -322,7 +322,7 @@ static int call_break_hook(struct pt_regs *regs, unsigned int esr) + } + NOKPROBE_SYMBOL(call_break_hook); + +-static int brk_handler(unsigned long unused, unsigned int esr, ++static int brk_handler(unsigned long unused, unsigned long esr, + struct pt_regs *regs) + { + if (call_break_hook(regs, esr) == DBG_HOOK_HANDLED) +diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S +index 75691a2641c1c..2d3c4b02393e4 100644 +--- a/arch/arm64/kernel/efi-rt-wrapper.S ++++ b/arch/arm64/kernel/efi-rt-wrapper.S +@@ -4,6 +4,7 @@ + */ + + #include <linux/linkage.h> ++#include <asm/assembler.h> + + SYM_FUNC_START(__efi_rt_asm_wrapper) + stp x29, x30, [sp, #-32]! +@@ -16,6 +17,12 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) + */ + stp x1, x18, [sp, #16] + ++ ldr_l x16, efi_rt_stack_top ++ mov sp, x16 ++#ifdef CONFIG_SHADOW_CALL_STACK ++ str x18, [sp, #-16]! ++#endif ++ + /* + * We are lucky enough that no EFI runtime services take more than + * 5 arguments, so all are passed in registers rather than via the +@@ -29,6 +36,7 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) + mov x4, x6 + blr x8 + ++ mov sp, x29 + ldp x1, x2, [sp, #16] + cmp x2, x18 + ldp x29, x30, [sp], #32 +@@ -42,6 +50,10 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) + * called with preemption disabled and a separate shadow stack is used + * for interrupts. + */ +- mov x18, x2 ++#ifdef CONFIG_SHADOW_CALL_STACK ++ ldr_l x18, efi_rt_stack_top ++ ldr x18, [x18, #-16] ++#endif ++ + b efi_handle_corrupted_x18 // tail call + SYM_FUNC_END(__efi_rt_asm_wrapper) +diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c +index e1be6c429810d..386bd81ca12bb 100644 +--- a/arch/arm64/kernel/efi.c ++++ b/arch/arm64/kernel/efi.c +@@ -12,6 +12,14 @@ + + #include <asm/efi.h> + ++static bool region_is_misaligned(const efi_memory_desc_t *md) ++{ ++ if (PAGE_SIZE == EFI_PAGE_SIZE) ++ return false; ++ return !PAGE_ALIGNED(md->phys_addr) || ++ !PAGE_ALIGNED(md->num_pages << EFI_PAGE_SHIFT); ++} ++ + /* + * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be + * executable, everything else can be mapped with the XN bits +@@ -25,14 +33,22 @@ static __init pteval_t create_mapping_protection(efi_memory_desc_t *md) + if (type == EFI_MEMORY_MAPPED_IO) + return PROT_DEVICE_nGnRE; + +- if (WARN_ONCE(!PAGE_ALIGNED(md->phys_addr), +- "UEFI Runtime regions are not aligned to 64 KB -- buggy firmware?")) ++ if (region_is_misaligned(md)) { ++ static bool __initdata code_is_misaligned; ++ + /* +- * If the region is not aligned to the page size of the OS, we +- * can not use strict permissions, since that would also affect +- * the mapping attributes of the adjacent regions. ++ * Regions that are not aligned to the OS page size cannot be ++ * mapped with strict permissions, as those might interfere ++ * with the permissions that are needed by the adjacent ++ * region's mapping. However, if we haven't encountered any ++ * misaligned runtime code regions so far, we can safely use ++ * non-executable permissions for non-code regions. + */ +- return pgprot_val(PAGE_KERNEL_EXEC); ++ code_is_misaligned |= (type == EFI_RUNTIME_SERVICES_CODE); ++ ++ return code_is_misaligned ? pgprot_val(PAGE_KERNEL_EXEC) ++ : pgprot_val(PAGE_KERNEL); ++ } + + /* R-- */ + if ((attr & (EFI_MEMORY_XP | EFI_MEMORY_RO)) == +@@ -63,19 +79,16 @@ int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) + bool page_mappings_only = (md->type == EFI_RUNTIME_SERVICES_CODE || + md->type == EFI_RUNTIME_SERVICES_DATA); + +- if (!PAGE_ALIGNED(md->phys_addr) || +- !PAGE_ALIGNED(md->num_pages << EFI_PAGE_SHIFT)) { +- /* +- * If the end address of this region is not aligned to page +- * size, the mapping is rounded up, and may end up sharing a +- * page frame with the next UEFI memory region. If we create +- * a block entry now, we may need to split it again when mapping +- * the next region, and support for that is going to be removed +- * from the MMU routines. So avoid block mappings altogether in +- * that case. +- */ ++ /* ++ * If this region is not aligned to the page size used by the OS, the ++ * mapping will be rounded outwards, and may end up sharing a page ++ * frame with an adjacent runtime memory region. Given that the page ++ * table descriptor covering the shared page will be rewritten when the ++ * adjacent region gets mapped, we must avoid block mappings here so we ++ * don't have to worry about splitting them when that happens. ++ */ ++ if (region_is_misaligned(md)) + page_mappings_only = true; +- } + + create_pgd_mapping(mm, md->phys_addr, md->virt_addr, + md->num_pages << EFI_PAGE_SHIFT, +@@ -102,6 +115,9 @@ int __init efi_set_mapping_permissions(struct mm_struct *mm, + BUG_ON(md->type != EFI_RUNTIME_SERVICES_CODE && + md->type != EFI_RUNTIME_SERVICES_DATA); + ++ if (region_is_misaligned(md)) ++ return 0; ++ + /* + * Calling apply_to_page_range() is only safe on regions that are + * guaranteed to be mapped down to pages. Since we are only called +@@ -128,3 +144,30 @@ asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f) + pr_err_ratelimited(FW_BUG "register x18 corrupted by EFI %s\n", f); + return s; + } ++ ++DEFINE_SPINLOCK(efi_rt_lock); ++ ++asmlinkage u64 *efi_rt_stack_top __ro_after_init; ++ ++/* EFI requires 8 KiB of stack space for runtime services */ ++static_assert(THREAD_SIZE >= SZ_8K); ++ ++static int __init arm64_efi_rt_init(void) ++{ ++ void *p; ++ ++ if (!efi_enabled(EFI_RUNTIME_SERVICES)) ++ return 0; ++ ++ p = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, GFP_KERNEL, ++ NUMA_NO_NODE, &&l); ++l: if (!p) { ++ pr_warn("Failed to allocate EFI runtime stack\n"); ++ clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); ++ return -ENOMEM; ++ } ++ ++ efi_rt_stack_top = p + THREAD_SIZE; ++ return 0; ++} ++core_initcall(arm64_efi_rt_init); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c -index 32f9796c4ffe7..60225bc09b017 100644 +index 32f9796c4ffe7..fc91dad1579ab 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -72,7 +72,7 @@ static __always_inline void __exit_to_kernel_mode(struct pt_regs *regs) @@ -16723,6 +21772,41 @@ index 32f9796c4ffe7..60225bc09b017 100644 } rcu_nmi_exit(); +@@ -273,13 +273,13 @@ extern void (*handle_arch_irq)(struct pt_regs *); + extern void (*handle_arch_fiq)(struct pt_regs *); + + static void noinstr __panic_unhandled(struct pt_regs *regs, const char *vector, +- unsigned int esr) ++ unsigned long esr) + { + arm64_enter_nmi(regs); + + console_verbose(); + +- pr_crit("Unhandled %s exception on CPU%d, ESR 0x%08x -- %s\n", ++ pr_crit("Unhandled %s exception on CPU%d, ESR 0x%016lx -- %s\n", + vector, smp_processor_id(), esr, + esr_get_class_string(esr)); + +@@ -320,7 +320,8 @@ static void cortex_a76_erratum_1463225_svc_handler(void) + __this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 0); + } + +-static bool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) ++static __always_inline bool ++cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) + { + if (!__this_cpu_read(__in_cortex_a76_erratum_1463225_wa)) + return false; +@@ -795,7 +796,7 @@ UNHANDLED(el0t, 32, error) + #ifdef CONFIG_VMAP_STACK + asmlinkage void noinstr handle_bad_stack(struct pt_regs *regs) + { +- unsigned int esr = read_sysreg(esr_el1); ++ unsigned long esr = read_sysreg(esr_el1); + unsigned long far = read_sysreg(far_el1); + + arm64_enter_nmi(regs); diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index b3e4f9a088b1a..8cf970d219f5d 100644 --- a/arch/arm64/kernel/entry-ftrace.S @@ -17090,6 +22174,37 @@ index bc6d5a970a131..34e38eb00f056 100644 br x5 #endif SYM_CODE_END(__sdei_asm_handler) +diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c +index ff4962750b3d0..7a3fcf21b18a7 100644 +--- a/arch/arm64/kernel/fpsimd.c ++++ b/arch/arm64/kernel/fpsimd.c +@@ -930,7 +930,7 @@ void fpsimd_release_task(struct task_struct *dead_task) + * would have disabled the SVE access trap for userspace during + * ret_to_user, making an SVE access trap impossible in that case. + */ +-void do_sve_acc(unsigned int esr, struct pt_regs *regs) ++void do_sve_acc(unsigned long esr, struct pt_regs *regs) + { + /* Even if we chose not to use SVE, the hardware could still trap: */ + if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) { +@@ -972,7 +972,7 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs) + /* + * Trapped FP/ASIMD access. + */ +-void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs) ++void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs) + { + /* TODO: implement lazy context saving/restoring */ + WARN_ON(1); +@@ -981,7 +981,7 @@ void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs) + /* + * Raise a SIGFPE for the current process. + */ +-void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs) ++void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs) + { + unsigned int si_code = FPE_FLTUNK; + diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 7f467bd9db7a3..dba774f3b8d7c 100644 --- a/arch/arm64/kernel/ftrace.c @@ -17297,6 +22412,28 @@ index 17962452e31de..ab6566bf1c332 100644 /* * Create the identity mapping. +diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c +index 712e97c03e54c..2a7f21314cde6 100644 +--- a/arch/arm64/kernel/hw_breakpoint.c ++++ b/arch/arm64/kernel/hw_breakpoint.c +@@ -617,7 +617,7 @@ NOKPROBE_SYMBOL(toggle_bp_registers); + /* + * Debug exception handlers. + */ +-static int breakpoint_handler(unsigned long unused, unsigned int esr, ++static int breakpoint_handler(unsigned long unused, unsigned long esr, + struct pt_regs *regs) + { + int i, step = 0, *kernel_step; +@@ -751,7 +751,7 @@ static int watchpoint_report(struct perf_event *wp, unsigned long addr, + return step; + } + +-static int watchpoint_handler(unsigned long addr, unsigned int esr, ++static int watchpoint_handler(unsigned long addr, unsigned long esr, + struct pt_regs *regs) + { + int i, step = 0, *kernel_step, access, closest_match = 0; diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index c96a9a0043bf4..e03e60f9482b4 100644 --- a/arch/arm64/kernel/image-vars.h @@ -17312,6 +22449,36 @@ index c96a9a0043bf4..e03e60f9482b4 100644 /* Global kernel state accessed by nVHE hyp code. */ KVM_NVHE_ALIAS(kvm_vgic_global_state); +diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c +index 2aede780fb80c..cda9c1e9864f7 100644 +--- a/arch/arm64/kernel/kgdb.c ++++ b/arch/arm64/kernel/kgdb.c +@@ -232,14 +232,14 @@ int kgdb_arch_handle_exception(int exception_vector, int signo, + return err; + } + +-static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr) ++static int kgdb_brk_fn(struct pt_regs *regs, unsigned long esr) + { + kgdb_handle_exception(1, SIGTRAP, 0, regs); + return DBG_HOOK_HANDLED; + } + NOKPROBE_SYMBOL(kgdb_brk_fn) + +-static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr) ++static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned long esr) + { + compiled_break = 1; + kgdb_handle_exception(1, SIGTRAP, 0, regs); +@@ -248,7 +248,7 @@ static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr) + } + NOKPROBE_SYMBOL(kgdb_compiled_brk_fn); + +-static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr) ++static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned long esr) + { + if (!kgdb_single_step) + return DBG_HOOK_ERROR; diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index 63634b4d72c15..59c648d518488 100644 --- a/arch/arm64/kernel/machine_kexec_file.c @@ -17347,10 +22514,29 @@ index b5ec010c481f3..309a27553c875 100644 return NULL; } diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c -index e5e801bc53122..dacca0684ea34 100644 +index e5e801bc53122..a3898bac5ae6f 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c -@@ -73,6 +73,9 @@ void mte_sync_tags(pte_t old_pte, pte_t pte) +@@ -53,7 +53,12 @@ static void mte_sync_page_tags(struct page *page, pte_t old_pte, + * the new page->flags are visible before the tags were updated. + */ + smp_wmb(); +- mte_clear_page_tags(page_address(page)); ++ /* ++ * Test PG_mte_tagged again in case it was racing with another ++ * set_pte_at(). ++ */ ++ if (!test_and_set_bit(PG_mte_tagged, &page->flags)) ++ mte_clear_page_tags(page_address(page)); + } + + void mte_sync_tags(pte_t old_pte, pte_t pte) +@@ -69,10 +74,13 @@ void mte_sync_tags(pte_t old_pte, pte_t pte) + + /* if PG_mte_tagged is set, tags have already been initialised */ + for (i = 0; i < nr_pages; i++, page++) { +- if (!test_and_set_bit(PG_mte_tagged, &page->flags)) ++ if (!test_bit(PG_mte_tagged, &page->flags)) mte_sync_page_tags(page, old_pte, check_swap, pte_is_tagged); } @@ -17360,7 +22546,7 @@ index e5e801bc53122..dacca0684ea34 100644 } int memcmp_pages(struct page *page1, struct page *page2) -@@ -210,6 +213,49 @@ void mte_thread_switch(struct task_struct *next) +@@ -210,6 +218,49 @@ void mte_thread_switch(struct task_struct *next) mte_check_tfsr_el1(); } @@ -17410,7 +22596,7 @@ index e5e801bc53122..dacca0684ea34 100644 void mte_suspend_enter(void) { if (!system_supports_mte()) -@@ -226,6 +272,14 @@ void mte_suspend_enter(void) +@@ -226,6 +277,14 @@ void mte_suspend_enter(void) mte_check_tfsr_el1(); } @@ -17581,6 +22767,69 @@ index 4a72c27273097..86d9f20131723 100644 misc |= PERF_RECORD_MISC_GUEST_USER; else misc |= PERF_RECORD_MISC_GUEST_KERNEL; +diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c +index 6dbcc89f66627..2162b6fd7251d 100644 +--- a/arch/arm64/kernel/probes/kprobes.c ++++ b/arch/arm64/kernel/probes/kprobes.c +@@ -7,6 +7,9 @@ + * Copyright (C) 2013 Linaro Limited. + * Author: Sandeepa Prabhu <sandeepa.prabhu@linaro.org> + */ ++ ++#define pr_fmt(fmt) "kprobes: " fmt ++ + #include <linux/extable.h> + #include <linux/kasan.h> + #include <linux/kernel.h> +@@ -218,7 +221,7 @@ static int __kprobes reenter_kprobe(struct kprobe *p, + break; + case KPROBE_HIT_SS: + case KPROBE_REENTER: +- pr_warn("Unrecoverable kprobe detected.\n"); ++ pr_warn("Failed to recover from reentered kprobes.\n"); + dump_kprobe(p); + BUG(); + break; +@@ -332,7 +335,7 @@ static void __kprobes kprobe_handler(struct pt_regs *regs) + } + + static int __kprobes +-kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned int esr) ++kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned long esr) + { + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + unsigned long addr = instruction_pointer(regs); +@@ -356,7 +359,7 @@ static struct break_hook kprobes_break_ss_hook = { + }; + + static int __kprobes +-kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr) ++kprobe_breakpoint_handler(struct pt_regs *regs, unsigned long esr) + { + kprobe_handler(regs); + return DBG_HOOK_HANDLED; +diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c +index 9be668f3f0341..d49aef2657cdf 100644 +--- a/arch/arm64/kernel/probes/uprobes.c ++++ b/arch/arm64/kernel/probes/uprobes.c +@@ -166,7 +166,7 @@ int arch_uprobe_exception_notify(struct notifier_block *self, + } + + static int uprobe_breakpoint_handler(struct pt_regs *regs, +- unsigned int esr) ++ unsigned long esr) + { + if (uprobe_pre_sstep_notifier(regs)) + return DBG_HOOK_HANDLED; +@@ -175,7 +175,7 @@ static int uprobe_breakpoint_handler(struct pt_regs *regs, + } + + static int uprobe_single_step_handler(struct pt_regs *regs, +- unsigned int esr) ++ unsigned long esr) + { + struct uprobe_task *utask = current->utask; + diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 40adb8cdbf5af..23efabcb00b85 100644 --- a/arch/arm64/kernel/process.c @@ -17652,7 +22901,7 @@ index 40adb8cdbf5af..23efabcb00b85 100644 if (task_spec_ssb_noexec(current)) { arch_prctl_spec_ctrl_set(current, PR_SPEC_STORE_BYPASS, diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c -index 902e4084c4775..40be3a7c2c531 100644 +index 902e4084c4775..428cfabd11c49 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -18,15 +18,18 @@ @@ -17765,7 +23014,7 @@ index 902e4084c4775..40be3a7c2c531 100644 { u32 insn; -@@ -770,3 +813,345 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) +@@ -770,3 +813,351 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) return -ENODEV; } } @@ -17824,6 +23073,10 @@ index 902e4084c4775..40be3a7c2c531 100644 + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), + {}, + }; ++ static const struct midr_range spectre_bhb_k11_list[] = { ++ MIDR_ALL_VERSIONS(MIDR_AMPERE1), ++ {}, ++ }; + static const struct midr_range spectre_bhb_k8_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), @@ -17834,6 +23087,8 @@ index 902e4084c4775..40be3a7c2c531 100644 + k = 32; + else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k24_list)) + k = 24; ++ else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k11_list)) ++ k = 11; + else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k8_list)) + k = 8; + @@ -18330,11 +23585,259 @@ index 4dd14a6620c17..877c68f472822 100644 int cpc_read_ffh(int cpu, struct cpc_reg *reg, u64 *val) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c -index b03e383d944ab..fe0cd0568813e 100644 +index b03e383d944ab..21e69a991bc83 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c -@@ -988,7 +988,7 @@ static struct break_hook bug_break_hook = { - static int reserved_fault_handler(struct pt_regs *regs, unsigned int esr) +@@ -235,7 +235,7 @@ void die(const char *str, struct pt_regs *regs, int err) + raw_spin_unlock_irqrestore(&die_lock, flags); + + if (ret != NOTIFY_STOP) +- do_exit(SIGSEGV); ++ make_task_dead(SIGSEGV); + } + + static void arm64_show_signal(int signo, const char *str) +@@ -243,7 +243,7 @@ static void arm64_show_signal(int signo, const char *str) + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + struct task_struct *tsk = current; +- unsigned int esr = tsk->thread.fault_code; ++ unsigned long esr = tsk->thread.fault_code; + struct pt_regs *regs = task_pt_regs(tsk); + + /* Leave if the signal won't be shown */ +@@ -254,7 +254,7 @@ static void arm64_show_signal(int signo, const char *str) + + pr_info("%s[%d]: unhandled exception: ", tsk->comm, task_pid_nr(tsk)); + if (esr) +- pr_cont("%s, ESR 0x%08x, ", esr_get_class_string(esr), esr); ++ pr_cont("%s, ESR 0x%016lx, ", esr_get_class_string(esr), esr); + + pr_cont("%s", str); + print_vma_addr(KERN_CONT " in ", regs->pc); +@@ -288,7 +288,7 @@ void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, + + void arm64_notify_die(const char *str, struct pt_regs *regs, + int signo, int sicode, unsigned long far, +- int err) ++ unsigned long err) + { + if (user_mode(regs)) { + WARN_ON(regs != current_pt_regs()); +@@ -440,7 +440,7 @@ exit: + return fn ? fn(regs, instr) : 1; + } + +-void force_signal_inject(int signal, int code, unsigned long address, unsigned int err) ++void force_signal_inject(int signal, int code, unsigned long address, unsigned long err) + { + const char *desc; + struct pt_regs *regs = current_pt_regs(); +@@ -507,7 +507,7 @@ void do_bti(struct pt_regs *regs) + } + NOKPROBE_SYMBOL(do_bti); + +-void do_ptrauth_fault(struct pt_regs *regs, unsigned int esr) ++void do_ptrauth_fault(struct pt_regs *regs, unsigned long esr) + { + /* + * Unexpected FPAC exception or pointer authentication failure in +@@ -538,7 +538,7 @@ NOKPROBE_SYMBOL(do_ptrauth_fault); + uaccess_ttbr0_disable(); \ + } + +-static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) ++static void user_cache_maint_handler(unsigned long esr, struct pt_regs *regs) + { + unsigned long tagged_address, address; + int rt = ESR_ELx_SYS64_ISS_RT(esr); +@@ -578,7 +578,7 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) + arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); + } + +-static void ctr_read_handler(unsigned int esr, struct pt_regs *regs) ++static void ctr_read_handler(unsigned long esr, struct pt_regs *regs) + { + int rt = ESR_ELx_SYS64_ISS_RT(esr); + unsigned long val = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0); +@@ -597,7 +597,7 @@ static void ctr_read_handler(unsigned int esr, struct pt_regs *regs) + arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); + } + +-static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs) ++static void cntvct_read_handler(unsigned long esr, struct pt_regs *regs) + { + int rt = ESR_ELx_SYS64_ISS_RT(esr); + +@@ -605,7 +605,7 @@ static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs) + arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); + } + +-static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) ++static void cntfrq_read_handler(unsigned long esr, struct pt_regs *regs) + { + int rt = ESR_ELx_SYS64_ISS_RT(esr); + +@@ -613,7 +613,7 @@ static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) + arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); + } + +-static void mrs_handler(unsigned int esr, struct pt_regs *regs) ++static void mrs_handler(unsigned long esr, struct pt_regs *regs) + { + u32 sysreg, rt; + +@@ -624,15 +624,15 @@ static void mrs_handler(unsigned int esr, struct pt_regs *regs) + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); + } + +-static void wfi_handler(unsigned int esr, struct pt_regs *regs) ++static void wfi_handler(unsigned long esr, struct pt_regs *regs) + { + arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); + } + + struct sys64_hook { +- unsigned int esr_mask; +- unsigned int esr_val; +- void (*handler)(unsigned int esr, struct pt_regs *regs); ++ unsigned long esr_mask; ++ unsigned long esr_val; ++ void (*handler)(unsigned long esr, struct pt_regs *regs); + }; + + static const struct sys64_hook sys64_hooks[] = { +@@ -675,7 +675,7 @@ static const struct sys64_hook sys64_hooks[] = { + }; + + #ifdef CONFIG_COMPAT +-static bool cp15_cond_valid(unsigned int esr, struct pt_regs *regs) ++static bool cp15_cond_valid(unsigned long esr, struct pt_regs *regs) + { + int cond; + +@@ -695,7 +695,7 @@ static bool cp15_cond_valid(unsigned int esr, struct pt_regs *regs) + return aarch32_opcode_cond_checks[cond](regs->pstate); + } + +-static void compat_cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) ++static void compat_cntfrq_read_handler(unsigned long esr, struct pt_regs *regs) + { + int reg = (esr & ESR_ELx_CP15_32_ISS_RT_MASK) >> ESR_ELx_CP15_32_ISS_RT_SHIFT; + +@@ -712,7 +712,7 @@ static const struct sys64_hook cp15_32_hooks[] = { + {}, + }; + +-static void compat_cntvct_read_handler(unsigned int esr, struct pt_regs *regs) ++static void compat_cntvct_read_handler(unsigned long esr, struct pt_regs *regs) + { + int rt = (esr & ESR_ELx_CP15_64_ISS_RT_MASK) >> ESR_ELx_CP15_64_ISS_RT_SHIFT; + int rt2 = (esr & ESR_ELx_CP15_64_ISS_RT2_MASK) >> ESR_ELx_CP15_64_ISS_RT2_SHIFT; +@@ -732,7 +732,7 @@ static const struct sys64_hook cp15_64_hooks[] = { + {}, + }; + +-void do_cp15instr(unsigned int esr, struct pt_regs *regs) ++void do_cp15instr(unsigned long esr, struct pt_regs *regs) + { + const struct sys64_hook *hook, *hook_base; + +@@ -773,7 +773,7 @@ void do_cp15instr(unsigned int esr, struct pt_regs *regs) + NOKPROBE_SYMBOL(do_cp15instr); + #endif + +-void do_sysinstr(unsigned int esr, struct pt_regs *regs) ++void do_sysinstr(unsigned long esr, struct pt_regs *regs) + { + const struct sys64_hook *hook; + +@@ -837,7 +837,7 @@ static const char *esr_class_str[] = { + [ESR_ELx_EC_BRK64] = "BRK (AArch64)", + }; + +-const char *esr_get_class_string(u32 esr) ++const char *esr_get_class_string(unsigned long esr) + { + return esr_class_str[ESR_ELx_EC(esr)]; + } +@@ -846,7 +846,7 @@ const char *esr_get_class_string(u32 esr) + * bad_el0_sync handles unexpected, but potentially recoverable synchronous + * exceptions taken from EL0. + */ +-void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr) ++void bad_el0_sync(struct pt_regs *regs, int reason, unsigned long esr) + { + unsigned long pc = instruction_pointer(regs); + +@@ -862,7 +862,7 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr) + DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack) + __aligned(16); + +-void panic_bad_stack(struct pt_regs *regs, unsigned int esr, unsigned long far) ++void panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigned long far) + { + unsigned long tsk_stk = (unsigned long)current->stack; + unsigned long irq_stk = (unsigned long)this_cpu_read(irq_stack_ptr); +@@ -871,7 +871,7 @@ void panic_bad_stack(struct pt_regs *regs, unsigned int esr, unsigned long far) + console_verbose(); + pr_emerg("Insufficient stack space to handle exception!"); + +- pr_emerg("ESR: 0x%08x -- %s\n", esr, esr_get_class_string(esr)); ++ pr_emerg("ESR: 0x%016lx -- %s\n", esr, esr_get_class_string(esr)); + pr_emerg("FAR: 0x%016lx\n", far); + + pr_emerg("Task stack: [0x%016lx..0x%016lx]\n", +@@ -892,11 +892,11 @@ void panic_bad_stack(struct pt_regs *regs, unsigned int esr, unsigned long far) + } + #endif + +-void __noreturn arm64_serror_panic(struct pt_regs *regs, u32 esr) ++void __noreturn arm64_serror_panic(struct pt_regs *regs, unsigned long esr) + { + console_verbose(); + +- pr_crit("SError Interrupt on CPU%d, code 0x%08x -- %s\n", ++ pr_crit("SError Interrupt on CPU%d, code 0x%016lx -- %s\n", + smp_processor_id(), esr, esr_get_class_string(esr)); + if (regs) + __show_regs(regs); +@@ -907,9 +907,9 @@ void __noreturn arm64_serror_panic(struct pt_regs *regs, u32 esr) + unreachable(); + } + +-bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr) ++bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned long esr) + { +- u32 aet = arm64_ras_serror_get_severity(esr); ++ unsigned long aet = arm64_ras_serror_get_severity(esr); + + switch (aet) { + case ESR_ELx_AET_CE: /* corrected error */ +@@ -939,7 +939,7 @@ bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr) + } + } + +-void do_serror(struct pt_regs *regs, unsigned int esr) ++void do_serror(struct pt_regs *regs, unsigned long esr) + { + /* non-RAS errors are not containable */ + if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(regs, esr)) +@@ -960,7 +960,7 @@ int is_valid_bugaddr(unsigned long addr) + return 1; + } + +-static int bug_handler(struct pt_regs *regs, unsigned int esr) ++static int bug_handler(struct pt_regs *regs, unsigned long esr) + { + switch (report_bug(regs->pc, regs)) { + case BUG_TRAP_TYPE_BUG: +@@ -985,10 +985,10 @@ static struct break_hook bug_break_hook = { + .imm = BUG_BRK_IMM, + }; + +-static int reserved_fault_handler(struct pt_regs *regs, unsigned int esr) ++static int reserved_fault_handler(struct pt_regs *regs, unsigned long esr) { pr_err("%s generated an invalid instruction at %pS!\n", - in_bpf_jit(regs) ? "BPF JIT" : "Kernel text patching", @@ -18342,6 +23845,29 @@ index b03e383d944ab..fe0cd0568813e 100644 (void *)instruction_pointer(regs)); /* We cannot handle this */ +@@ -1007,7 +1007,7 @@ static struct break_hook fault_break_hook = { + #define KASAN_ESR_SIZE_MASK 0x0f + #define KASAN_ESR_SIZE(esr) (1 << ((esr) & KASAN_ESR_SIZE_MASK)) + +-static int kasan_handler(struct pt_regs *regs, unsigned int esr) ++static int kasan_handler(struct pt_regs *regs, unsigned long esr) + { + bool recover = esr & KASAN_ESR_RECOVER; + bool write = esr & KASAN_ESR_WRITE; +@@ -1050,11 +1050,11 @@ static struct break_hook kasan_break_hook = { + * Initial handler for AArch64 BRK exceptions + * This handler only used until debug_traps_init(). + */ +-int __init early_brk64(unsigned long addr, unsigned int esr, ++int __init early_brk64(unsigned long addr, unsigned long esr, + struct pt_regs *regs) + { + #ifdef CONFIG_KASAN_SW_TAGS +- unsigned int comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK; ++ unsigned long comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK; + + if ((comment & ~KASAN_BRK_MASK) == KASAN_BRK_IMM) + return kasan_handler(regs, esr) != DBG_HOOK_HANDLED; diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 945e6bb326e3e..b5d8f72e8b32e 100644 --- a/arch/arm64/kernel/vdso/Makefile @@ -18427,7 +23953,7 @@ index f6b1a88245db2..184abd7c4206e 100644 #endif #ifdef CONFIG_KVM diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c -index fe102cd2e5183..4cb265e153617 100644 +index fe102cd2e5183..3fe816c244cec 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -712,8 +712,7 @@ static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu) @@ -18530,54 +24056,66 @@ index fe102cd2e5183..4cb265e153617 100644 err = create_hyp_exec_mappings(__pa_symbol(__bp_harden_hyp_vecs), __BP_HARDEN_HYP_VECS_SZ, &base); if (err) -@@ -1971,9 +1983,25 @@ out_err: +@@ -1971,31 +1983,50 @@ out_err: return err; } -static void _kvm_host_prot_finalize(void *discard) +static void _kvm_host_prot_finalize(void *arg) -+{ + { +- WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)); + int *err = arg; + + if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize))) + WRITE_ONCE(*err, -EINVAL); -+} -+ + } + +-static int finalize_hyp_mode(void) +static int pkvm_drop_host_privileges(void) { -- WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)); +- if (!is_protected_kvm_enabled()) +- return 0; +- +- /* +- * Exclude HYP BSS from kmemleak so that it doesn't get peeked +- * at, which would end badly once the section is inaccessible. +- * None of other sections should ever be introspected. +- */ +- kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start); + int ret = 0; -+ + + /* + * Flip the static key upfront as that may no longer be possible + * once the host stage 2 is installed. + */ + static_branch_enable(&kvm_protected_mode_initialized); +- on_each_cpu(_kvm_host_prot_finalize, NULL, 1); + +- return 0; + /* -+ * Flip the static key upfront as that may no longer be possible -+ * once the host stage 2 is installed. ++ * Fixup the boot mode so that we don't take spurious round ++ * trips via EL2 on cpu_resume. Flush to the PoC for a good ++ * measure, so that it can be observed by a CPU coming out of ++ * suspend with the MMU off. + */ -+ static_branch_enable(&kvm_protected_mode_initialized); ++ __boot_cpu_mode[0] = __boot_cpu_mode[1] = BOOT_CPU_MODE_EL1; ++ dcache_clean_poc((unsigned long)__boot_cpu_mode, ++ (unsigned long)(__boot_cpu_mode + 2)); ++ + on_each_cpu(_kvm_host_prot_finalize, &ret, 1); + return ret; - } - - static int finalize_hyp_mode(void) -@@ -1982,20 +2010,12 @@ static int finalize_hyp_mode(void) - return 0; - - /* -- * Exclude HYP BSS from kmemleak so that it doesn't get peeked -- * at, which would end badly once the section is inaccessible. -- * None of other sections should ever be introspected. ++} ++ ++static int finalize_hyp_mode(void) ++{ ++ if (!is_protected_kvm_enabled()) ++ return 0; ++ ++ /* + * Exclude HYP sections from kmemleak so that they don't get peeked + * at, which would end badly once inaccessible. - */ - kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start); -- -- /* -- * Flip the static key upfront as that may no longer be possible -- * once the host stage 2 is installed. -- */ -- static_branch_enable(&kvm_protected_mode_initialized); -- on_each_cpu(_kvm_host_prot_finalize, NULL, 1); -- -- return 0; ++ */ ++ kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start); + kmemleak_free_part(__va(hyp_mem_base), hyp_mem_size); + return pkvm_drop_host_privileges(); } @@ -18616,10 +24154,18 @@ index 275a27368a04c..a5ab5215094ee 100644 switch (exception_index) { diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c -index 0418399e0a201..c5d0097154020 100644 +index 0418399e0a201..aa06e28f2991f 100644 --- a/arch/arm64/kvm/hyp/exception.c +++ b/arch/arm64/kvm/hyp/exception.c -@@ -38,7 +38,10 @@ static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) +@@ -13,6 +13,7 @@ + #include <hyp/adjust_pc.h> + #include <linux/kvm_host.h> + #include <asm/kvm_emulate.h> ++#include <asm/kvm_mmu.h> + + #if !defined (__KVM_NVHE_HYPERVISOR__) && !defined (__KVM_VHE_HYPERVISOR__) + #error Hypervisor code only! +@@ -38,7 +39,10 @@ static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, u64 val) { @@ -18631,6 +24177,15 @@ index 0418399e0a201..c5d0097154020 100644 } static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val) +@@ -112,7 +116,7 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode, + new |= (old & PSR_C_BIT); + new |= (old & PSR_V_BIT); + +- if (kvm_has_mte(vcpu->kvm)) ++ if (kvm_has_mte(kern_hyp_va(vcpu->kvm))) + new |= PSR_TCO_BIT; + + new |= (old & PSR_DIT_BIT); diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 9aa9b73475c95..7839d075729b1 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S @@ -18716,6 +24271,21 @@ index de7e14c862e6c..7ecca8b078519 100644 if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) ctxt_sys_reg(ctxt, DISR_EL1) = read_sysreg_s(SYS_VDISR_EL2); +diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile +index 8d741f71377f4..964c2134ea1e5 100644 +--- a/arch/arm64/kvm/hyp/nvhe/Makefile ++++ b/arch/arm64/kvm/hyp/nvhe/Makefile +@@ -83,6 +83,10 @@ quiet_cmd_hypcopy = HYPCOPY $@ + # Remove ftrace, Shadow Call Stack, and CFI CFLAGS. + # This is equivalent to the 'notrace', '__noscs', and '__nocfi' annotations. + KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) $(CC_FLAGS_CFI), $(KBUILD_CFLAGS)) ++# Starting from 13.0.0 llvm emits SHT_REL section '.llvm.call-graph-profile' ++# when profile optimization is applied. gen-hyprel does not support SHT_REL and ++# causes a build failure. Remove profile optimization flags. ++KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%, $(KBUILD_CFLAGS)) + + # KVM nVHE code is run at a different exception code with a different map, so + # compiler instrumentation that inserts callbacks or checks into the code may diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index 4b652ffb591d4..d310d2b2c8b40 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S @@ -19087,6 +24657,101 @@ index fefcca2b14dc7..dcea440159855 100644 void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val); +diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c +index 21a6207fb2eed..8eb70451323b6 100644 +--- a/arch/arm64/kvm/vgic/vgic-v3.c ++++ b/arch/arm64/kvm/vgic/vgic-v3.c +@@ -347,26 +347,23 @@ retry: + * The deactivation of the doorbell interrupt will trigger the + * unmapping of the associated vPE. + */ +-static void unmap_all_vpes(struct vgic_dist *dist) ++static void unmap_all_vpes(struct kvm *kvm) + { +- struct irq_desc *desc; ++ struct vgic_dist *dist = &kvm->arch.vgic; + int i; + +- for (i = 0; i < dist->its_vm.nr_vpes; i++) { +- desc = irq_to_desc(dist->its_vm.vpes[i]->irq); +- irq_domain_deactivate_irq(irq_desc_get_irq_data(desc)); +- } ++ for (i = 0; i < dist->its_vm.nr_vpes; i++) ++ free_irq(dist->its_vm.vpes[i]->irq, kvm_get_vcpu(kvm, i)); + } + +-static void map_all_vpes(struct vgic_dist *dist) ++static void map_all_vpes(struct kvm *kvm) + { +- struct irq_desc *desc; ++ struct vgic_dist *dist = &kvm->arch.vgic; + int i; + +- for (i = 0; i < dist->its_vm.nr_vpes; i++) { +- desc = irq_to_desc(dist->its_vm.vpes[i]->irq); +- irq_domain_activate_irq(irq_desc_get_irq_data(desc), false); +- } ++ for (i = 0; i < dist->its_vm.nr_vpes; i++) ++ WARN_ON(vgic_v4_request_vpe_irq(kvm_get_vcpu(kvm, i), ++ dist->its_vm.vpes[i]->irq)); + } + + /** +@@ -391,7 +388,7 @@ int vgic_v3_save_pending_tables(struct kvm *kvm) + * and enabling of the doorbells have already been done. + */ + if (kvm_vgic_global_state.has_gicv4_1) { +- unmap_all_vpes(dist); ++ unmap_all_vpes(kvm); + vlpi_avail = true; + } + +@@ -441,7 +438,7 @@ int vgic_v3_save_pending_tables(struct kvm *kvm) + + out: + if (vlpi_avail) +- map_all_vpes(dist); ++ map_all_vpes(kvm); + + return ret; + } +diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c +index c1845d8f5f7e7..f507e3fcffce3 100644 +--- a/arch/arm64/kvm/vgic/vgic-v4.c ++++ b/arch/arm64/kvm/vgic/vgic-v4.c +@@ -222,6 +222,11 @@ void vgic_v4_get_vlpi_state(struct vgic_irq *irq, bool *val) + *val = !!(*ptr & mask); + } + ++int vgic_v4_request_vpe_irq(struct kvm_vcpu *vcpu, int irq) ++{ ++ return request_irq(irq, vgic_v4_doorbell_handler, 0, "vcpu", vcpu); ++} ++ + /** + * vgic_v4_init - Initialize the GICv4 data structures + * @kvm: Pointer to the VM being initialized +@@ -282,8 +287,7 @@ int vgic_v4_init(struct kvm *kvm) + irq_flags &= ~IRQ_NOAUTOEN; + irq_set_status_flags(irq, irq_flags); + +- ret = request_irq(irq, vgic_v4_doorbell_handler, +- 0, "vcpu", vcpu); ++ ret = vgic_v4_request_vpe_irq(vcpu, irq); + if (ret) { + kvm_err("failed to allocate vcpu IRQ%d\n", irq); + /* +diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h +index 14a9218641f57..36021c31a706a 100644 +--- a/arch/arm64/kvm/vgic/vgic.h ++++ b/arch/arm64/kvm/vgic/vgic.h +@@ -321,5 +321,6 @@ int vgic_v4_init(struct kvm *kvm); + void vgic_v4_teardown(struct kvm *kvm); + void vgic_v4_configure_vsgis(struct kvm *kvm); + void vgic_v4_get_vlpi_state(struct vgic_irq *irq, bool *val); ++int vgic_v4_request_vpe_irq(struct kvm_vcpu *vcpu, int irq); + + #endif diff --git a/arch/arm64/lib/clear_page.S b/arch/arm64/lib/clear_page.S index b84b179edba3a..1fd5d790ab800 100644 --- a/arch/arm64/lib/clear_page.S @@ -19199,6 +24864,306 @@ index aa0060178343a..60a8b6a8a42b5 100644 regs->pc = (unsigned long)&fixup->fixup + fixup->fixup; return 1; } +diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c +index 9ae24e3b72be1..97a93ee756a2e 100644 +--- a/arch/arm64/mm/fault.c ++++ b/arch/arm64/mm/fault.c +@@ -43,7 +43,7 @@ + #include <asm/traps.h> + + struct fault_info { +- int (*fn)(unsigned long far, unsigned int esr, ++ int (*fn)(unsigned long far, unsigned long esr, + struct pt_regs *regs); + int sig; + int code; +@@ -53,17 +53,17 @@ struct fault_info { + static const struct fault_info fault_info[]; + static struct fault_info debug_fault_info[]; + +-static inline const struct fault_info *esr_to_fault_info(unsigned int esr) ++static inline const struct fault_info *esr_to_fault_info(unsigned long esr) + { + return fault_info + (esr & ESR_ELx_FSC); + } + +-static inline const struct fault_info *esr_to_debug_fault_info(unsigned int esr) ++static inline const struct fault_info *esr_to_debug_fault_info(unsigned long esr) + { + return debug_fault_info + DBG_ESR_EVT(esr); + } + +-static void data_abort_decode(unsigned int esr) ++static void data_abort_decode(unsigned long esr) + { + pr_alert("Data abort info:\n"); + +@@ -85,11 +85,11 @@ static void data_abort_decode(unsigned int esr) + (esr & ESR_ELx_WNR) >> ESR_ELx_WNR_SHIFT); + } + +-static void mem_abort_decode(unsigned int esr) ++static void mem_abort_decode(unsigned long esr) + { + pr_alert("Mem abort info:\n"); + +- pr_alert(" ESR = 0x%08x\n", esr); ++ pr_alert(" ESR = 0x%016lx\n", esr); + pr_alert(" EC = 0x%02lx: %s, IL = %u bits\n", + ESR_ELx_EC(esr), esr_get_class_string(esr), + (esr & ESR_ELx_IL) ? 32 : 16); +@@ -99,7 +99,7 @@ static void mem_abort_decode(unsigned int esr) + pr_alert(" EA = %lu, S1PTW = %lu\n", + (esr & ESR_ELx_EA) >> ESR_ELx_EA_SHIFT, + (esr & ESR_ELx_S1PTW) >> ESR_ELx_S1PTW_SHIFT); +- pr_alert(" FSC = 0x%02x: %s\n", (esr & ESR_ELx_FSC), ++ pr_alert(" FSC = 0x%02lx: %s\n", (esr & ESR_ELx_FSC), + esr_to_fault_info(esr)->name); + + if (esr_is_data_abort(esr)) +@@ -229,20 +229,20 @@ int ptep_set_access_flags(struct vm_area_struct *vma, + return 1; + } + +-static bool is_el1_instruction_abort(unsigned int esr) ++static bool is_el1_instruction_abort(unsigned long esr) + { + return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR; + } + +-static bool is_el1_data_abort(unsigned int esr) ++static bool is_el1_data_abort(unsigned long esr) + { + return ESR_ELx_EC(esr) == ESR_ELx_EC_DABT_CUR; + } + +-static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr, ++static inline bool is_el1_permission_fault(unsigned long addr, unsigned long esr, + struct pt_regs *regs) + { +- unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; ++ unsigned long fsc_type = esr & ESR_ELx_FSC_TYPE; + + if (!is_el1_data_abort(esr) && !is_el1_instruction_abort(esr)) + return false; +@@ -258,7 +258,7 @@ static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr, + } + + static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr, +- unsigned int esr, ++ unsigned long esr, + struct pt_regs *regs) + { + unsigned long flags; +@@ -290,7 +290,7 @@ static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr, + } + + static void die_kernel_fault(const char *msg, unsigned long addr, +- unsigned int esr, struct pt_regs *regs) ++ unsigned long esr, struct pt_regs *regs) + { + bust_spinlocks(1); + +@@ -302,11 +302,11 @@ static void die_kernel_fault(const char *msg, unsigned long addr, + show_pte(addr); + die("Oops", regs, esr); + bust_spinlocks(0); +- do_exit(SIGKILL); ++ make_task_dead(SIGKILL); + } + + #ifdef CONFIG_KASAN_HW_TAGS +-static void report_tag_fault(unsigned long addr, unsigned int esr, ++static void report_tag_fault(unsigned long addr, unsigned long esr, + struct pt_regs *regs) + { + /* +@@ -318,11 +318,11 @@ static void report_tag_fault(unsigned long addr, unsigned int esr, + } + #else + /* Tag faults aren't enabled without CONFIG_KASAN_HW_TAGS. */ +-static inline void report_tag_fault(unsigned long addr, unsigned int esr, ++static inline void report_tag_fault(unsigned long addr, unsigned long esr, + struct pt_regs *regs) { } + #endif + +-static void do_tag_recovery(unsigned long addr, unsigned int esr, ++static void do_tag_recovery(unsigned long addr, unsigned long esr, + struct pt_regs *regs) + { + +@@ -337,9 +337,9 @@ static void do_tag_recovery(unsigned long addr, unsigned int esr, + isb(); + } + +-static bool is_el1_mte_sync_tag_check_fault(unsigned int esr) ++static bool is_el1_mte_sync_tag_check_fault(unsigned long esr) + { +- unsigned int fsc = esr & ESR_ELx_FSC; ++ unsigned long fsc = esr & ESR_ELx_FSC; + + if (!is_el1_data_abort(esr)) + return false; +@@ -350,7 +350,12 @@ static bool is_el1_mte_sync_tag_check_fault(unsigned int esr) + return false; + } + +-static void __do_kernel_fault(unsigned long addr, unsigned int esr, ++static bool is_translation_fault(unsigned long esr) ++{ ++ return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_FAULT; ++} ++ ++static void __do_kernel_fault(unsigned long addr, unsigned long esr, + struct pt_regs *regs) + { + const char *msg; +@@ -382,7 +387,8 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, + } else if (addr < PAGE_SIZE) { + msg = "NULL pointer dereference"; + } else { +- if (kfence_handle_page_fault(addr, esr & ESR_ELx_WNR, regs)) ++ if (is_translation_fault(esr) && ++ kfence_handle_page_fault(addr, esr & ESR_ELx_WNR, regs)) + return; + + msg = "paging request"; +@@ -391,7 +397,7 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, + die_kernel_fault(msg, addr, esr, regs); + } + +-static void set_thread_esr(unsigned long address, unsigned int esr) ++static void set_thread_esr(unsigned long address, unsigned long esr) + { + current->thread.fault_address = address; + +@@ -439,7 +445,7 @@ static void set_thread_esr(unsigned long address, unsigned int esr) + * exception level). Fail safe by not providing an ESR + * context record at all. + */ +- WARN(1, "ESR 0x%x is not DABT or IABT from EL0\n", esr); ++ WARN(1, "ESR 0x%lx is not DABT or IABT from EL0\n", esr); + esr = 0; + break; + } +@@ -448,7 +454,7 @@ static void set_thread_esr(unsigned long address, unsigned int esr) + current->thread.fault_code = esr; + } + +-static void do_bad_area(unsigned long far, unsigned int esr, ++static void do_bad_area(unsigned long far, unsigned long esr, + struct pt_regs *regs) + { + unsigned long addr = untagged_addr(far); +@@ -499,7 +505,7 @@ static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr, + return handle_mm_fault(vma, addr, mm_flags, regs); + } + +-static bool is_el0_instruction_abort(unsigned int esr) ++static bool is_el0_instruction_abort(unsigned long esr) + { + return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW; + } +@@ -508,12 +514,12 @@ static bool is_el0_instruction_abort(unsigned int esr) + * Note: not valid for EL1 DC IVAC, but we never use that such that it + * should fault. EL0 cannot issue DC IVAC (undef). + */ +-static bool is_write_abort(unsigned int esr) ++static bool is_write_abort(unsigned long esr) + { + return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM); + } + +-static int __kprobes do_page_fault(unsigned long far, unsigned int esr, ++static int __kprobes do_page_fault(unsigned long far, unsigned long esr, + struct pt_regs *regs) + { + const struct fault_info *inf; +@@ -671,7 +677,7 @@ no_context: + } + + static int __kprobes do_translation_fault(unsigned long far, +- unsigned int esr, ++ unsigned long esr, + struct pt_regs *regs) + { + unsigned long addr = untagged_addr(far); +@@ -683,19 +689,19 @@ static int __kprobes do_translation_fault(unsigned long far, + return 0; + } + +-static int do_alignment_fault(unsigned long far, unsigned int esr, ++static int do_alignment_fault(unsigned long far, unsigned long esr, + struct pt_regs *regs) + { + do_bad_area(far, esr, regs); + return 0; + } + +-static int do_bad(unsigned long far, unsigned int esr, struct pt_regs *regs) ++static int do_bad(unsigned long far, unsigned long esr, struct pt_regs *regs) + { + return 1; /* "fault" */ + } + +-static int do_sea(unsigned long far, unsigned int esr, struct pt_regs *regs) ++static int do_sea(unsigned long far, unsigned long esr, struct pt_regs *regs) + { + const struct fault_info *inf; + unsigned long siaddr; +@@ -725,7 +731,7 @@ static int do_sea(unsigned long far, unsigned int esr, struct pt_regs *regs) + return 0; + } + +-static int do_tag_check_fault(unsigned long far, unsigned int esr, ++static int do_tag_check_fault(unsigned long far, unsigned long esr, + struct pt_regs *regs) + { + /* +@@ -805,7 +811,7 @@ static const struct fault_info fault_info[] = { + { do_bad, SIGKILL, SI_KERNEL, "unknown 63" }, + }; + +-void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs) ++void do_mem_abort(unsigned long far, unsigned long esr, struct pt_regs *regs) + { + const struct fault_info *inf = esr_to_fault_info(esr); + unsigned long addr = untagged_addr(far); +@@ -828,14 +834,14 @@ void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs) + } + NOKPROBE_SYMBOL(do_mem_abort); + +-void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs) ++void do_sp_pc_abort(unsigned long addr, unsigned long esr, struct pt_regs *regs) + { + arm64_notify_die("SP/PC alignment exception", regs, SIGBUS, BUS_ADRALN, + addr, esr); + } + NOKPROBE_SYMBOL(do_sp_pc_abort); + +-int __init early_brk64(unsigned long addr, unsigned int esr, ++int __init early_brk64(unsigned long addr, unsigned long esr, + struct pt_regs *regs); + + /* +@@ -855,7 +861,7 @@ static struct fault_info __refdata debug_fault_info[] = { + }; + + void __init hook_debug_fault_code(int nr, +- int (*fn)(unsigned long, unsigned int, struct pt_regs *), ++ int (*fn)(unsigned long, unsigned long, struct pt_regs *), + int sig, int code, const char *name) + { + BUG_ON(nr < 0 || nr >= ARRAY_SIZE(debug_fault_info)); +@@ -888,7 +894,7 @@ static void debug_exception_exit(struct pt_regs *regs) + } + NOKPROBE_SYMBOL(debug_exception_exit); + +-void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr, ++void do_debug_exception(unsigned long addr_if_watchpoint, unsigned long esr, + struct pt_regs *regs) + { + const struct fault_info *inf = esr_to_debug_fault_info(esr); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 37a81754d9b61..3b269c7567984 100644 --- a/arch/arm64/mm/init.c @@ -19487,6 +25452,24 @@ index cfd9deb347c38..6680689242df3 100644 return ret; } +diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c +index 7c4ef56265ee1..fd6cabc6d033a 100644 +--- a/arch/arm64/mm/mteswap.c ++++ b/arch/arm64/mm/mteswap.c +@@ -62,7 +62,12 @@ bool mte_restore_tags(swp_entry_t entry, struct page *page) + * the new page->flags are visible before the tags were updated. + */ + smp_wmb(); +- mte_restore_page_tags(page_address(page), tags); ++ /* ++ * Test PG_mte_tagged again in case it was racing with another ++ * set_pte_at(). ++ */ ++ if (!test_and_set_bit(PG_mte_tagged, &page->flags)) ++ mte_restore_page_tags(page_address(page), tags); + + return true; + } diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index d35c90d2e47ad..1a9684b114745 100644 --- a/arch/arm64/mm/proc.S @@ -19675,6 +25658,19 @@ index 49305c2e6dfd3..6b1e70aee8cff 100644 WORKAROUND_CAVIUM_23154 WORKAROUND_CAVIUM_27456 WORKAROUND_CAVIUM_30115 +diff --git a/arch/csky/abiv1/alignment.c b/arch/csky/abiv1/alignment.c +index cb2a0d94a144d..2df115d0e2105 100644 +--- a/arch/csky/abiv1/alignment.c ++++ b/arch/csky/abiv1/alignment.c +@@ -294,7 +294,7 @@ bad_area: + __func__, opcode, rz, rx, imm, addr); + show_regs(regs); + bust_spinlocks(0); +- do_exit(SIGKILL); ++ make_task_dead(SIGKILL); + } + + force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)addr); diff --git a/arch/csky/include/asm/uaccess.h b/arch/csky/include/asm/uaccess.h index c40f06ee8d3ef..ac5a54f57d407 100644 --- a/arch/csky/include/asm/uaccess.h @@ -19737,10 +25733,18 @@ index ab55e98ee8f62..75e1f9df5f604 100644 return; } diff --git a/arch/csky/kernel/probes/kprobes.c b/arch/csky/kernel/probes/kprobes.c -index 8fffa34d4e1c5..584ed9f36290f 100644 +index 8fffa34d4e1c5..bd92ac376e157 100644 --- a/arch/csky/kernel/probes/kprobes.c +++ b/arch/csky/kernel/probes/kprobes.c -@@ -28,7 +28,7 @@ static int __kprobes patch_text_cb(void *priv) +@@ -1,5 +1,7 @@ + // SPDX-License-Identifier: GPL-2.0+ + ++#define pr_fmt(fmt) "kprobes: " fmt ++ + #include <linux/kprobes.h> + #include <linux/extable.h> + #include <linux/slab.h> +@@ -28,7 +30,7 @@ static int __kprobes patch_text_cb(void *priv) struct csky_insn_patch *param = priv; unsigned int addr = (unsigned int)param->addr; @@ -19749,6 +25753,19 @@ index 8fffa34d4e1c5..584ed9f36290f 100644 *(u16 *) addr = cpu_to_le16(param->opcode); dcache_wb_range(addr, addr + 2); atomic_inc(¶m->cpu_count); +@@ -77,10 +79,8 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) + { + unsigned long probe_addr = (unsigned long)p->addr; + +- if (probe_addr & 0x1) { +- pr_warn("Address not aligned.\n"); +- return -EINVAL; +- } ++ if (probe_addr & 0x1) ++ return -EILSEQ; + + /* copy instruction */ + p->opcode = le32_to_cpu(*p->addr); @@ -124,6 +124,10 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) @@ -19760,6 +25777,15 @@ index 8fffa34d4e1c5..584ed9f36290f 100644 } static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) +@@ -225,7 +229,7 @@ static int __kprobes reenter_kprobe(struct kprobe *p, + break; + case KPROBE_HIT_SS: + case KPROBE_REENTER: +- pr_warn("Unrecoverable kprobe detected.\n"); ++ pr_warn("Failed to recover from reentered kprobes.\n"); + dump_kprobe(p); + BUG(); + break; diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c index c7b763d2f526e..8867ddf3e6c77 100644 --- a/arch/csky/kernel/signal.c @@ -19774,9 +25800,18 @@ index c7b763d2f526e..8867ddf3e6c77 100644 frame = get_sigframe(ksig, regs, sizeof(*frame)); diff --git a/arch/csky/kernel/traps.c b/arch/csky/kernel/traps.c -index e5fbf8653a215..2020af88b6361 100644 +index e5fbf8653a215..6e426fba01193 100644 --- a/arch/csky/kernel/traps.c +++ b/arch/csky/kernel/traps.c +@@ -109,7 +109,7 @@ void die(struct pt_regs *regs, const char *str) + if (panic_on_oops) + panic("Fatal exception"); + if (ret != NOTIFY_STOP) +- do_exit(SIGSEGV); ++ make_task_dead(SIGSEGV); + } + + void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr) @@ -209,7 +209,7 @@ asmlinkage void do_trap_illinsn(struct pt_regs *regs) asmlinkage void do_trap_fpe(struct pt_regs *regs) @@ -19795,6 +25830,53 @@ index e5fbf8653a215..2020af88b6361 100644 if (user_mode(regs) && fpu_libc_helper(regs)) return; #endif +diff --git a/arch/csky/mm/fault.c b/arch/csky/mm/fault.c +index 466ad949818a6..7215a46b6b8eb 100644 +--- a/arch/csky/mm/fault.c ++++ b/arch/csky/mm/fault.c +@@ -67,7 +67,7 @@ static inline void no_context(struct pt_regs *regs, unsigned long addr) + pr_alert("Unable to handle kernel paging request at virtual " + "addr 0x%08lx, pc: 0x%08lx\n", addr, regs->pc); + die(regs, "Oops"); +- do_exit(SIGKILL); ++ make_task_dead(SIGKILL); + } + + static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault) +diff --git a/arch/h8300/kernel/traps.c b/arch/h8300/kernel/traps.c +index bdbe988d8dbcf..a92c39e03802e 100644 +--- a/arch/h8300/kernel/traps.c ++++ b/arch/h8300/kernel/traps.c +@@ -17,6 +17,7 @@ + #include <linux/types.h> + #include <linux/sched.h> + #include <linux/sched/debug.h> ++#include <linux/sched/task.h> + #include <linux/mm_types.h> + #include <linux/kernel.h> + #include <linux/errno.h> +@@ -106,7 +107,7 @@ void die(const char *str, struct pt_regs *fp, unsigned long err) + dump(fp); + + spin_unlock_irq(&die_lock); +- do_exit(SIGSEGV); ++ make_task_dead(SIGSEGV); + } + + static int kstack_depth_to_print = 24; +diff --git a/arch/h8300/mm/fault.c b/arch/h8300/mm/fault.c +index d4bc9c16f2df9..b465441f490df 100644 +--- a/arch/h8300/mm/fault.c ++++ b/arch/h8300/mm/fault.c +@@ -51,7 +51,7 @@ asmlinkage int do_page_fault(struct pt_regs *regs, unsigned long address, + printk(" at virtual address %08lx\n", address); + if (!user_mode(regs)) + die("Oops", regs, error_code); +- do_exit(SIGKILL); ++ make_task_dead(SIGKILL); + + return 1; + } diff --git a/arch/hexagon/include/asm/timer-regs.h b/arch/hexagon/include/asm/timer-regs.h deleted file mode 100644 index ee6c61423a058..0000000000000 @@ -19914,6 +25996,19 @@ index feffe527ac929..febc95714d756 100644 return 0; } +diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c +index edfc35dafeb19..1240f038cce02 100644 +--- a/arch/hexagon/kernel/traps.c ++++ b/arch/hexagon/kernel/traps.c +@@ -214,7 +214,7 @@ int die(const char *str, struct pt_regs *regs, long err) + panic("Fatal exception"); + + oops_exit(); +- do_exit(err); ++ make_task_dead(err); + return 0; + } + diff --git a/arch/hexagon/lib/io.c b/arch/hexagon/lib/io.c index d35d69d6588c4..55f75392857b0 100644 --- a/arch/hexagon/lib/io.c @@ -19947,6 +26042,19 @@ index d35d69d6588c4..55f75392857b0 100644 } +EXPORT_SYMBOL(__raw_writesl); +diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig +index 1e33666fa679b..b1f2b6ac9b1d5 100644 +--- a/arch/ia64/Kconfig ++++ b/arch/ia64/Kconfig +@@ -323,7 +323,7 @@ config ARCH_PROC_KCORE_TEXT + depends on PROC_KCORE + + config IA64_MCA_RECOVERY +- tristate "MCA recovery from errors other than TLB." ++ bool "MCA recovery from errors other than TLB." + + config IA64_PALINFO + tristate "/proc/pal support" diff --git a/arch/ia64/Kconfig.debug b/arch/ia64/Kconfig.debug index 40ca23bd228d6..2ce008e2d1644 100644 --- a/arch/ia64/Kconfig.debug @@ -20025,6 +26133,45 @@ index 441ed04b10378..d4048518a1d7d 100644 return 1; return 0; +diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c +index 5bfc79be4cefe..23c203639a968 100644 +--- a/arch/ia64/kernel/mca_drv.c ++++ b/arch/ia64/kernel/mca_drv.c +@@ -176,7 +176,7 @@ mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr) + spin_unlock(&mca_bh_lock); + + /* This process is about to be killed itself */ +- do_exit(SIGKILL); ++ make_task_dead(SIGKILL); + } + + /** +diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c +index e13cb905930fb..753642366e12e 100644 +--- a/arch/ia64/kernel/traps.c ++++ b/arch/ia64/kernel/traps.c +@@ -85,7 +85,7 @@ die (const char *str, struct pt_regs *regs, long err) + if (panic_on_oops) + panic("Fatal exception"); + +- do_exit(SIGSEGV); ++ make_task_dead(SIGSEGV); + return 0; + } + +diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c +index 02de2e70c5874..4796cccbf74f3 100644 +--- a/arch/ia64/mm/fault.c ++++ b/arch/ia64/mm/fault.c +@@ -259,7 +259,7 @@ retry: + regs = NULL; + bust_spinlocks(0); + if (regs) +- do_exit(SIGKILL); ++ make_task_dead(SIGKILL); + return; + + out_of_memory: diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c index d6579ec3ea324..4c7b1f50e3b7d 100644 --- a/arch/ia64/mm/numa.c @@ -20279,9 +26426,18 @@ index 340ffeea0a9dc..a97600b2af502 100644 void read_persistent_clock64(struct timespec64 *ts) { diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c -index 9718ce94cc845..34d6458340b0f 100644 +index 9718ce94cc845..59fc63feb0dcc 100644 --- a/arch/m68k/kernel/traps.c +++ b/arch/m68k/kernel/traps.c +@@ -1131,7 +1131,7 @@ void die_if_kernel (char *str, struct pt_regs *fp, int nr) + pr_crit("%s: %08x\n", str, nr); + show_registers(fp); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); +- do_exit(SIGSEGV); ++ make_task_dead(SIGSEGV); + } + + asmlinkage void set_esp0(unsigned long ssp) @@ -1145,7 +1145,7 @@ asmlinkage void set_esp0(unsigned long ssp) */ asmlinkage void fpsp040_die(void) @@ -20291,6 +26447,19 @@ index 9718ce94cc845..34d6458340b0f 100644 } #ifdef CONFIG_M68KFPU_EMU +diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c +index ef46e77e97a5b..fcb3a0d8421c5 100644 +--- a/arch/m68k/mm/fault.c ++++ b/arch/m68k/mm/fault.c +@@ -48,7 +48,7 @@ int send_fault_sig(struct pt_regs *regs) + pr_alert("Unable to handle kernel access"); + pr_cont(" at virtual address %p\n", addr); + die_if_kernel("Oops", regs, 0 /*error_code*/); +- do_exit(SIGKILL); ++ make_task_dead(SIGKILL); + } + + return 1; diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h index d2a8ef9f89787..3fe96979d2c62 100644 --- a/arch/microblaze/include/asm/uaccess.h @@ -20361,6 +26530,23 @@ index d2a8ef9f89787..3fe96979d2c62 100644 __gu_err; \ }) +diff --git a/arch/microblaze/kernel/exceptions.c b/arch/microblaze/kernel/exceptions.c +index 908788497b287..fd153d5fab982 100644 +--- a/arch/microblaze/kernel/exceptions.c ++++ b/arch/microblaze/kernel/exceptions.c +@@ -44,10 +44,10 @@ void die(const char *str, struct pt_regs *fp, long err) + pr_warn("Oops: %s, sig: %ld\n", str, err); + show_regs(fp); + spin_unlock_irq(&die_lock); +- /* do_exit() should take care of panic'ing from an interrupt ++ /* make_task_dead() should take care of panic'ing from an interrupt + * context so we don't handle it here + */ +- do_exit(err); ++ make_task_dead(err); + } + + /* for user application debugging */ diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms index 584081df89c28..6e3f36c841e5d 100644 --- a/arch/mips/Kbuild.platforms @@ -20459,10 +26645,19 @@ index 0a63721d0fbf3..5a33d6b48d779 100644 } extmem -= lowmem; diff --git a/arch/mips/bcm63xx/clk.c b/arch/mips/bcm63xx/clk.c -index 5a3e325275d0d..6e6756e8fa0a9 100644 +index 5a3e325275d0d..86a6e25908664 100644 --- a/arch/mips/bcm63xx/clk.c +++ b/arch/mips/bcm63xx/clk.c -@@ -381,6 +381,18 @@ void clk_disable(struct clk *clk) +@@ -361,6 +361,8 @@ static struct clk clk_periph = { + */ + int clk_enable(struct clk *clk) + { ++ if (!clk) ++ return 0; + mutex_lock(&clocks_mutex); + clk_enable_unlocked(clk); + mutex_unlock(&clocks_mutex); +@@ -381,6 +383,18 @@ void clk_disable(struct clk *clk) EXPORT_SYMBOL(clk_disable); @@ -20526,6 +26721,32 @@ index 9e34f433b9b58..efbbddaf0fde5 100644 reg = <0x22 0x6>; }; }; +diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper-board.c b/arch/mips/cavium-octeon/executive/cvmx-helper-board.c +index 1daa0c6b6f4ea..572a053e30ed5 100644 +--- a/arch/mips/cavium-octeon/executive/cvmx-helper-board.c ++++ b/arch/mips/cavium-octeon/executive/cvmx-helper-board.c +@@ -211,7 +211,7 @@ union cvmx_helper_link_info __cvmx_helper_board_link_get(int ipd_port) + { + union cvmx_helper_link_info result; + +- WARN(!octeon_is_simulation(), ++ WARN_ONCE(!octeon_is_simulation(), + "Using deprecated link status - please update your DT"); + + /* Unless we fix it later, all links are defaulted to down */ +diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper.c b/arch/mips/cavium-octeon/executive/cvmx-helper.c +index 6044ff4710022..a18ad2daf0052 100644 +--- a/arch/mips/cavium-octeon/executive/cvmx-helper.c ++++ b/arch/mips/cavium-octeon/executive/cvmx-helper.c +@@ -1100,7 +1100,7 @@ union cvmx_helper_link_info cvmx_helper_link_get(int ipd_port) + if (index == 0) + result = __cvmx_helper_rgmii_link_get(ipd_port); + else { +- WARN(1, "Using deprecated link status - please update your DT"); ++ WARN_ONCE(1, "Using deprecated link status - please update your DT"); + result.s.full_duplex = 1; + result.s.link_up = 1; + result.s.speed = 1000; diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index be5d4afcd30f9..353dfeee0a6d3 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c @@ -20844,6 +27065,18 @@ index b463f2aa5a613..db497a8167da2 100644 ".previous\n" \ \ : [tmp_err] "=r" (error) \ +diff --git a/arch/mips/include/asm/fw/fw.h b/arch/mips/include/asm/fw/fw.h +index d0ef8b4892bbe..d0494ce4b3373 100644 +--- a/arch/mips/include/asm/fw/fw.h ++++ b/arch/mips/include/asm/fw/fw.h +@@ -26,6 +26,6 @@ extern char *fw_getcmdline(void); + extern void fw_meminit(void); + extern char *fw_getenv(char *name); + extern unsigned long fw_getenvl(char *name); +-extern void fw_init_early_console(char port); ++extern void fw_init_early_console(void); + + #endif /* __ASM_FW_H_ */ diff --git a/arch/mips/include/asm/local.h b/arch/mips/include/asm/local.h index ecda7295ddcd1..3fa6340903882 100644 --- a/arch/mips/include/asm/local.h @@ -20938,6 +27171,24 @@ index 13373c5144f89..efb41b3519747 100644 bnez t0, 2f nop 1: +diff --git a/arch/mips/include/asm/mach-ralink/mt7621.h b/arch/mips/include/asm/mach-ralink/mt7621.h +index 6bbf082dd149e..79d5bb0e06d63 100644 +--- a/arch/mips/include/asm/mach-ralink/mt7621.h ++++ b/arch/mips/include/asm/mach-ralink/mt7621.h +@@ -7,10 +7,12 @@ + #ifndef _MT7621_REGS_H_ + #define _MT7621_REGS_H_ + ++#define IOMEM(x) ((void __iomem *)(KSEG1ADDR(x))) ++ + #define MT7621_PALMBUS_BASE 0x1C000000 + #define MT7621_PALMBUS_SIZE 0x03FFFFFF + +-#define MT7621_SYSC_BASE 0x1E000000 ++#define MT7621_SYSC_BASE IOMEM(0x1E000000) + + #define SYSC_REG_CHIP_NAME0 0x00 + #define SYSC_REG_CHIP_NAME1 0x04 diff --git a/arch/mips/include/asm/mips-cm.h b/arch/mips/include/asm/mips-cm.h index aeae2effa123d..23c67c0871b17 100644 --- a/arch/mips/include/asm/mips-cm.h @@ -21509,6 +27760,63 @@ index 630fcb4cb30e7..7c861e6a89529 100644 } #else static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) { } +diff --git a/arch/mips/kernel/jump_label.c b/arch/mips/kernel/jump_label.c +index 662c8db9f45ba..9f5b1247b4ba4 100644 +--- a/arch/mips/kernel/jump_label.c ++++ b/arch/mips/kernel/jump_label.c +@@ -56,7 +56,7 @@ void arch_jump_label_transform(struct jump_entry *e, + * The branch offset must fit in the instruction's 26 + * bit field. + */ +- WARN_ON((offset >= BIT(25)) || ++ WARN_ON((offset >= (long)BIT(25)) || + (offset < -(long)BIT(25))); + + insn.j_format.opcode = bc6_op; +diff --git a/arch/mips/kernel/kprobes.c b/arch/mips/kernel/kprobes.c +index 75bff0f773198..b0934a0d7aedd 100644 +--- a/arch/mips/kernel/kprobes.c ++++ b/arch/mips/kernel/kprobes.c +@@ -11,6 +11,8 @@ + * Copyright (C) IBM Corporation, 2002, 2004 + */ + ++#define pr_fmt(fmt) "kprobes: " fmt ++ + #include <linux/kprobes.h> + #include <linux/preempt.h> + #include <linux/uaccess.h> +@@ -80,8 +82,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) + insn = p->addr[0]; + + if (insn_has_ll_or_sc(insn)) { +- pr_notice("Kprobes for ll and sc instructions are not" +- "supported\n"); ++ pr_notice("Kprobes for ll and sc instructions are not supported\n"); + ret = -EINVAL; + goto out; + } +@@ -219,7 +220,7 @@ static int evaluate_branch_instruction(struct kprobe *p, struct pt_regs *regs, + return 0; + + unaligned: +- pr_notice("%s: unaligned epc - sending SIGBUS.\n", current->comm); ++ pr_notice("Failed to emulate branch instruction because of unaligned epc - sending SIGBUS to %s.\n", current->comm); + force_sig(SIGBUS); + return -EFAULT; + +@@ -238,10 +239,8 @@ static void prepare_singlestep(struct kprobe *p, struct pt_regs *regs, + regs->cp0_epc = (unsigned long)p->addr; + else if (insn_has_delayslot(p->opcode)) { + ret = evaluate_branch_instruction(p, regs, kcb); +- if (ret < 0) { +- pr_notice("Kprobes: Error in evaluating branch\n"); ++ if (ret < 0) + return; +- } + } + regs->cp0_epc = (unsigned long)&p->ainsn.insn[0]; + } diff --git a/arch/mips/kernel/mips-cm.c b/arch/mips/kernel/mips-cm.c index 90f1c3df1f0e4..b4f7d950c8468 100644 --- a/arch/mips/kernel/mips-cm.c @@ -21825,17 +28133,20 @@ index b91e911064756..2e687c60bc4f1 100644 .endm diff --git a/arch/mips/kernel/relocate_kernel.S b/arch/mips/kernel/relocate_kernel.S -index f3c908abdbb80..cfde14b48fd8d 100644 +index f3c908abdbb80..f5b2ef979b437 100644 --- a/arch/mips/kernel/relocate_kernel.S +++ b/arch/mips/kernel/relocate_kernel.S -@@ -147,10 +147,10 @@ LEAF(kexec_smp_wait) +@@ -145,12 +145,11 @@ LEAF(kexec_smp_wait) + * kexec_args[0..3] are used to prepare register values. + */ - kexec_args: - EXPORT(kexec_args) +-kexec_args: +- EXPORT(kexec_args) -arg0: PTR 0x0 -arg1: PTR 0x0 -arg2: PTR 0x0 -arg3: PTR 0x0 ++EXPORT(kexec_args) +arg0: PTR_WD 0x0 +arg1: PTR_WD 0x0 +arg2: PTR_WD 0x0 @@ -21843,14 +28154,17 @@ index f3c908abdbb80..cfde14b48fd8d 100644 .size kexec_args,PTRSIZE*4 #ifdef CONFIG_SMP -@@ -161,10 +161,10 @@ arg3: PTR 0x0 +@@ -159,31 +158,27 @@ arg3: PTR 0x0 + * their registers a0-a3. secondary_kexec_args[0..3] are used + * to prepare register values. */ - secondary_kexec_args: - EXPORT(secondary_kexec_args) +-secondary_kexec_args: +- EXPORT(secondary_kexec_args) -s_arg0: PTR 0x0 -s_arg1: PTR 0x0 -s_arg2: PTR 0x0 -s_arg3: PTR 0x0 ++EXPORT(secondary_kexec_args) +s_arg0: PTR_WD 0x0 +s_arg1: PTR_WD 0x0 +s_arg2: PTR_WD 0x0 @@ -21858,25 +28172,29 @@ index f3c908abdbb80..cfde14b48fd8d 100644 .size secondary_kexec_args,PTRSIZE*4 kexec_flag: LONG 0x1 -@@ -173,17 +173,17 @@ kexec_flag: - kexec_start_address: - EXPORT(kexec_start_address) + #endif + +-kexec_start_address: +- EXPORT(kexec_start_address) - PTR 0x0 ++EXPORT(kexec_start_address) + PTR_WD 0x0 .size kexec_start_address, PTRSIZE - kexec_indirection_page: - EXPORT(kexec_indirection_page) +-kexec_indirection_page: +- EXPORT(kexec_indirection_page) - PTR 0 ++EXPORT(kexec_indirection_page) + PTR_WD 0 .size kexec_indirection_page, PTRSIZE relocate_new_kernel_end: - relocate_new_kernel_size: - EXPORT(relocate_new_kernel_size) +-relocate_new_kernel_size: +- EXPORT(relocate_new_kernel_size) - PTR relocate_new_kernel_end - relocate_new_kernel ++EXPORT(relocate_new_kernel_size) + PTR_WD relocate_new_kernel_end - relocate_new_kernel .size relocate_new_kernel_size, PTRSIZE diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S @@ -22061,9 +28379,18 @@ index caa01457dce60..ed339d7979f3f 100644 return 0; diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c -index 6f07362de5cec..edd93430b954a 100644 +index 6f07362de5cec..afb2c955d99ef 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c +@@ -416,7 +416,7 @@ void __noreturn die(const char *str, struct pt_regs *regs) + if (regs && kexec_should_crash(current)) + crash_kexec(regs); + +- do_exit(sig); ++ make_task_dead(sig); + } + + extern struct exception_table_entry __start___dbe_table[]; @@ -2085,19 +2085,19 @@ static void *set_vi_srs_handler(int n, vi_handler_t addr, int srs) * If no shadow set is selected then use the default handler * that does normal register saving and standard interrupt exit @@ -22116,6 +28443,64 @@ index 3d0cf471f2fe1..b2cc2c2dd4bfc 100644 ret = io_remap_pfn_range(vma, base, gic_pfn, gic_size, pgprot_noncached(vma->vm_page_prot)); +diff --git a/arch/mips/kernel/vpe-cmp.c b/arch/mips/kernel/vpe-cmp.c +index e673603e11e5d..92140edb3ce3e 100644 +--- a/arch/mips/kernel/vpe-cmp.c ++++ b/arch/mips/kernel/vpe-cmp.c +@@ -75,7 +75,6 @@ ATTRIBUTE_GROUPS(vpe); + + static void vpe_device_release(struct device *cd) + { +- kfree(cd); + } + + static struct class vpe_class = { +@@ -157,6 +156,7 @@ out_dev: + device_del(&vpe_device); + + out_class: ++ put_device(&vpe_device); + class_unregister(&vpe_class); + + out_chrdev: +@@ -169,7 +169,7 @@ void __exit vpe_module_exit(void) + { + struct vpe *v, *n; + +- device_del(&vpe_device); ++ device_unregister(&vpe_device); + class_unregister(&vpe_class); + unregister_chrdev(major, VPE_MODULE_NAME); + +diff --git a/arch/mips/kernel/vpe-mt.c b/arch/mips/kernel/vpe-mt.c +index bad6b0891b2b5..84a82b551ec35 100644 +--- a/arch/mips/kernel/vpe-mt.c ++++ b/arch/mips/kernel/vpe-mt.c +@@ -313,7 +313,6 @@ ATTRIBUTE_GROUPS(vpe); + + static void vpe_device_release(struct device *cd) + { +- kfree(cd); + } + + static struct class vpe_class = { +@@ -497,6 +496,7 @@ out_dev: + device_del(&vpe_device); + + out_class: ++ put_device(&vpe_device); + class_unregister(&vpe_class); + + out_chrdev: +@@ -509,7 +509,7 @@ void __exit vpe_module_exit(void) + { + struct vpe *v, *n; + +- device_del(&vpe_device); ++ device_unregister(&vpe_device); + class_unregister(&vpe_class); + unregister_chrdev(major, VPE_MODULE_NAME); + diff --git a/arch/mips/lantiq/clk.c b/arch/mips/lantiq/clk.c index dd819e31fcbbf..2d5a0bcb0cec1 100644 --- a/arch/mips/lantiq/clk.c @@ -22508,6 +28893,36 @@ index e9de6da0ce51f..9dcfe9de55b0a 100644 return platform_add_devices(ls1c_platform_devices, ARRAY_SIZE(ls1c_platform_devices)); +diff --git a/arch/mips/loongson64/reset.c b/arch/mips/loongson64/reset.c +index 758d5d26aaaa2..e420800043b08 100644 +--- a/arch/mips/loongson64/reset.c ++++ b/arch/mips/loongson64/reset.c +@@ -16,6 +16,7 @@ + #include <asm/bootinfo.h> + #include <asm/idle.h> + #include <asm/reboot.h> ++#include <asm/bug.h> + + #include <loongson.h> + #include <boot_param.h> +@@ -159,8 +160,17 @@ static int __init mips_reboot_setup(void) + + #ifdef CONFIG_KEXEC + kexec_argv = kmalloc(KEXEC_ARGV_SIZE, GFP_KERNEL); ++ if (WARN_ON(!kexec_argv)) ++ return -ENOMEM; ++ + kdump_argv = kmalloc(KEXEC_ARGV_SIZE, GFP_KERNEL); ++ if (WARN_ON(!kdump_argv)) ++ return -ENOMEM; ++ + kexec_envp = kmalloc(KEXEC_ENVP_SIZE, GFP_KERNEL); ++ if (WARN_ON(!kexec_envp)) ++ return -ENOMEM; ++ + fw_arg1 = KEXEC_ARGV_ADDR; + memcpy(kexec_envp, (void *)fw_arg2, KEXEC_ENVP_SIZE); + diff --git a/arch/mips/loongson64/vbios_quirk.c b/arch/mips/loongson64/vbios_quirk.c index 9a29e94d3db1d..3115d4de982c5 100644 --- a/arch/mips/loongson64/vbios_quirk.c @@ -22637,6 +29052,70 @@ index 9adad24c2e65e..3471a089bc05f 100644 /* * We'll only be making use of the fact that we can rotate bits * into the fill if the CPU supports RIXI, so don't bother +diff --git a/arch/mips/pic32/pic32mzda/early_console.c b/arch/mips/pic32/pic32mzda/early_console.c +index 25372e62783b5..3cd1b408fa1cb 100644 +--- a/arch/mips/pic32/pic32mzda/early_console.c ++++ b/arch/mips/pic32/pic32mzda/early_console.c +@@ -27,7 +27,7 @@ + #define U_BRG(x) (UART_BASE(x) + 0x40) + + static void __iomem *uart_base; +-static char console_port = -1; ++static int console_port = -1; + + static int __init configure_uart_pins(int port) + { +@@ -47,7 +47,7 @@ static int __init configure_uart_pins(int port) + return 0; + } + +-static void __init configure_uart(char port, int baud) ++static void __init configure_uart(int port, int baud) + { + u32 pbclk; + +@@ -60,7 +60,7 @@ static void __init configure_uart(char port, int baud) + uart_base + PIC32_SET(U_STA(port))); + } + +-static void __init setup_early_console(char port, int baud) ++static void __init setup_early_console(int port, int baud) + { + if (configure_uart_pins(port)) + return; +@@ -130,16 +130,15 @@ _out: + return baud; + } + +-void __init fw_init_early_console(char port) ++void __init fw_init_early_console(void) + { + char *arch_cmdline = pic32_getcmdline(); +- int baud = -1; ++ int baud, port; + + uart_base = ioremap(PIC32_BASE_UART, 0xc00); + + baud = get_baud_from_cmdline(arch_cmdline); +- if (port == -1) +- port = get_port_from_cmdline(arch_cmdline); ++ port = get_port_from_cmdline(arch_cmdline); + + if (port == -1) + port = EARLY_CONSOLE_PORT; +diff --git a/arch/mips/pic32/pic32mzda/init.c b/arch/mips/pic32/pic32mzda/init.c +index 764f2d022fae4..429830afff54f 100644 +--- a/arch/mips/pic32/pic32mzda/init.c ++++ b/arch/mips/pic32/pic32mzda/init.c +@@ -47,7 +47,7 @@ void __init plat_mem_setup(void) + strlcpy(arcs_cmdline, boot_command_line, COMMAND_LINE_SIZE); + + #ifdef CONFIG_EARLY_PRINTK +- fw_init_early_console(-1); ++ fw_init_early_console(); + #endif + pic32_config_init(); + } diff --git a/arch/mips/ralink/ill_acc.c b/arch/mips/ralink/ill_acc.c index bdf53807d7c2b..bea857c9da8b7 100644 --- a/arch/mips/ralink/ill_acc.c @@ -22650,10 +29129,10 @@ index bdf53807d7c2b..bea857c9da8b7 100644 } diff --git a/arch/mips/ralink/mt7621.c b/arch/mips/ralink/mt7621.c -index bd71f5b142383..4c83786612193 100644 +index bd71f5b142383..0db23bcf2a970 100644 --- a/arch/mips/ralink/mt7621.c +++ b/arch/mips/ralink/mt7621.c -@@ -20,31 +20,41 @@ +@@ -20,31 +20,42 @@ #include "common.h" @@ -22661,6 +29140,7 @@ index bd71f5b142383..4c83786612193 100644 +#define MT7621_MEM_TEST_PATTERN 0xaa5555aa + +static u32 detect_magic __initdata; ++static struct ralink_soc_info *soc_info_ptr; phys_addr_t mips_cpc_default_phys_base(void) { @@ -22708,6 +29188,143 @@ index bd71f5b142383..4c83786612193 100644 } void __init ralink_of_remap(void) +@@ -56,41 +67,83 @@ void __init ralink_of_remap(void) + panic("Failed to remap core resources"); + } + +-static void soc_dev_init(struct ralink_soc_info *soc_info, u32 rev) ++static unsigned int __init mt7621_get_soc_name0(void) ++{ ++ return __raw_readl(MT7621_SYSC_BASE + SYSC_REG_CHIP_NAME0); ++} ++ ++static unsigned int __init mt7621_get_soc_name1(void) ++{ ++ return __raw_readl(MT7621_SYSC_BASE + SYSC_REG_CHIP_NAME1); ++} ++ ++static bool __init mt7621_soc_valid(void) ++{ ++ if (mt7621_get_soc_name0() == MT7621_CHIP_NAME0 && ++ mt7621_get_soc_name1() == MT7621_CHIP_NAME1) ++ return true; ++ else ++ return false; ++} ++ ++static const char __init *mt7621_get_soc_id(void) ++{ ++ if (mt7621_soc_valid()) ++ return "MT7621"; ++ else ++ return "invalid"; ++} ++ ++static unsigned int __init mt7621_get_soc_rev(void) ++{ ++ return __raw_readl(MT7621_SYSC_BASE + SYSC_REG_CHIP_REV); ++} ++ ++static unsigned int __init mt7621_get_soc_ver(void) ++{ ++ return (mt7621_get_soc_rev() >> CHIP_REV_VER_SHIFT) & CHIP_REV_VER_MASK; ++} ++ ++static unsigned int __init mt7621_get_soc_eco(void) ++{ ++ return (mt7621_get_soc_rev() & CHIP_REV_ECO_MASK); ++} ++ ++static const char __init *mt7621_get_soc_revision(void) ++{ ++ if (mt7621_get_soc_rev() == 1 && mt7621_get_soc_eco() == 1) ++ return "E2"; ++ else ++ return "E1"; ++} ++ ++static int __init mt7621_soc_dev_init(void) + { + struct soc_device *soc_dev; + struct soc_device_attribute *soc_dev_attr; + + soc_dev_attr = kzalloc(sizeof(*soc_dev_attr), GFP_KERNEL); + if (!soc_dev_attr) +- return; ++ return -ENOMEM; + + soc_dev_attr->soc_id = "mt7621"; + soc_dev_attr->family = "Ralink"; ++ soc_dev_attr->revision = mt7621_get_soc_revision(); + +- if (((rev >> CHIP_REV_VER_SHIFT) & CHIP_REV_VER_MASK) == 1 && +- (rev & CHIP_REV_ECO_MASK) == 1) +- soc_dev_attr->revision = "E2"; +- else +- soc_dev_attr->revision = "E1"; +- +- soc_dev_attr->data = soc_info; ++ soc_dev_attr->data = soc_info_ptr; + + soc_dev = soc_device_register(soc_dev_attr); + if (IS_ERR(soc_dev)) { + kfree(soc_dev_attr); +- return; ++ return PTR_ERR(soc_dev); + } ++ ++ return 0; + } ++device_initcall(mt7621_soc_dev_init); + + void __init prom_soc_init(struct ralink_soc_info *soc_info) + { +- void __iomem *sysc = (void __iomem *) KSEG1ADDR(MT7621_SYSC_BASE); +- unsigned char *name = NULL; +- u32 n0; +- u32 n1; +- u32 rev; +- + /* Early detection of CMP support */ + mips_cm_probe(); + mips_cpc_probe(); +@@ -113,27 +166,23 @@ void __init prom_soc_init(struct ralink_soc_info *soc_info) + __sync(); + } + +- n0 = __raw_readl(sysc + SYSC_REG_CHIP_NAME0); +- n1 = __raw_readl(sysc + SYSC_REG_CHIP_NAME1); +- +- if (n0 == MT7621_CHIP_NAME0 && n1 == MT7621_CHIP_NAME1) { +- name = "MT7621"; ++ if (mt7621_soc_valid()) + soc_info->compatible = "mediatek,mt7621-soc"; +- } else { +- panic("mt7621: unknown SoC, n0:%08x n1:%08x\n", n0, n1); +- } ++ else ++ panic("mt7621: unknown SoC, n0:%08x n1:%08x\n", ++ mt7621_get_soc_name0(), ++ mt7621_get_soc_name1()); + ralink_soc = MT762X_SOC_MT7621AT; +- rev = __raw_readl(sysc + SYSC_REG_CHIP_REV); + + snprintf(soc_info->sys_type, RAMIPS_SYS_TYPE_LEN, + "MediaTek %s ver:%u eco:%u", +- name, +- (rev >> CHIP_REV_VER_SHIFT) & CHIP_REV_VER_MASK, +- (rev & CHIP_REV_ECO_MASK)); ++ mt7621_get_soc_id(), ++ mt7621_get_soc_ver(), ++ mt7621_get_soc_eco()); + + soc_info->mem_detect = mt7621_memory_detect; + +- soc_dev_init(soc_info, rev); ++ soc_info_ptr = soc_info; + + if (!register_cps_smp_ops()) + return; diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c index 04684990e28ef..b7f6f782d9a13 100644 --- a/arch/mips/rb532/devices.c @@ -22922,6 +29539,19 @@ index d4cbf069dc224..37a40981deb3b 100644 __pu_err; \ }) +diff --git a/arch/nds32/kernel/fpu.c b/arch/nds32/kernel/fpu.c +index 9edd7ed7d7bf8..701c09a668de4 100644 +--- a/arch/nds32/kernel/fpu.c ++++ b/arch/nds32/kernel/fpu.c +@@ -223,7 +223,7 @@ inline void handle_fpu_exception(struct pt_regs *regs) + } + } else if (fpcsr & FPCSR_mskRIT) { + if (!user_mode(regs)) +- do_exit(SIGILL); ++ make_task_dead(SIGILL); + si_signo = SIGILL; + } + diff --git a/arch/nds32/kernel/perf_event_cpu.c b/arch/nds32/kernel/perf_event_cpu.c index 0ce6f9f307e6a..f387919607813 100644 --- a/arch/nds32/kernel/perf_event_cpu.c @@ -22983,6 +29613,59 @@ index 0ce6f9f307e6a..f387919607813 100644 misc |= PERF_RECORD_MISC_GUEST_USER; else misc |= PERF_RECORD_MISC_GUEST_KERNEL; +diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c +index f06421c645aff..b90030e8e546f 100644 +--- a/arch/nds32/kernel/traps.c ++++ b/arch/nds32/kernel/traps.c +@@ -141,7 +141,7 @@ void die(const char *str, struct pt_regs *regs, int err) + + bust_spinlocks(0); + spin_unlock_irq(&die_lock); +- do_exit(SIGSEGV); ++ make_task_dead(SIGSEGV); + } + + EXPORT_SYMBOL(die); +@@ -240,7 +240,7 @@ void unhandled_interruption(struct pt_regs *regs) + pr_emerg("unhandled_interruption\n"); + show_regs(regs); + if (!user_mode(regs)) +- do_exit(SIGKILL); ++ make_task_dead(SIGKILL); + force_sig(SIGKILL); + } + +@@ -251,7 +251,7 @@ void unhandled_exceptions(unsigned long entry, unsigned long addr, + addr, type); + show_regs(regs); + if (!user_mode(regs)) +- do_exit(SIGKILL); ++ make_task_dead(SIGKILL); + force_sig(SIGKILL); + } + +@@ -278,7 +278,7 @@ void do_revinsn(struct pt_regs *regs) + pr_emerg("Reserved Instruction\n"); + show_regs(regs); + if (!user_mode(regs)) +- do_exit(SIGILL); ++ make_task_dead(SIGILL); + force_sig(SIGILL); + } + +diff --git a/arch/nios2/boot/Makefile b/arch/nios2/boot/Makefile +index 37dfc7e584bce..0b704c1f379f5 100644 +--- a/arch/nios2/boot/Makefile ++++ b/arch/nios2/boot/Makefile +@@ -20,7 +20,7 @@ $(obj)/vmlinux.bin: vmlinux FORCE + $(obj)/vmlinux.gz: $(obj)/vmlinux.bin FORCE + $(call if_changed,gzip) + +-$(obj)/vmImage: $(obj)/vmlinux.gz ++$(obj)/vmImage: $(obj)/vmlinux.gz FORCE + $(call if_changed,uimage) + @$(kecho) 'Kernel: $@ is ready' + diff --git a/arch/nios2/include/asm/entry.h b/arch/nios2/include/asm/entry.h index cf37f55efbc22..bafb7b2ca59fc 100644 --- a/arch/nios2/include/asm/entry.h @@ -23270,6 +29953,23 @@ index 6176d63023c1d..c2875a6dd5a4a 100644 + [0 ... __NR_syscalls-1] = sys_ni_syscall, #include <asm/unistd.h> }; +diff --git a/arch/nios2/kernel/traps.c b/arch/nios2/kernel/traps.c +index 596986a74a26d..85ac49d64cf73 100644 +--- a/arch/nios2/kernel/traps.c ++++ b/arch/nios2/kernel/traps.c +@@ -37,10 +37,10 @@ void die(const char *str, struct pt_regs *regs, long err) + show_regs(regs); + spin_unlock_irq(&die_lock); + /* +- * do_exit() should take care of panic'ing from an interrupt ++ * make_task_dead() should take care of panic'ing from an interrupt + * context so we don't handle it here + */ +- do_exit(err); ++ make_task_dead(err); + } + + void _exception(int signo, struct pt_regs *regs, int code, unsigned long addr) diff --git a/arch/openrisc/include/asm/io.h b/arch/openrisc/include/asm/io.h index c298061c70a7e..8aa3e78181e9a 100644 --- a/arch/openrisc/include/asm/io.h @@ -23393,6 +30093,19 @@ index 415e209732a3d..ba78766cf00b5 100644 } /* Instruction cache invalidate - performed on each cpu */ +diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c +index aa1e709405acd..9df1d85bfe1d1 100644 +--- a/arch/openrisc/kernel/traps.c ++++ b/arch/openrisc/kernel/traps.c +@@ -212,7 +212,7 @@ void die(const char *str, struct pt_regs *regs, long err) + __asm__ __volatile__("l.nop 1"); + do {} while (1); + #endif +- do_exit(SIGSEGV); ++ make_task_dead(SIGSEGV); + } + + /* This is normally the 'Oops' routine */ diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c index daae13a76743b..8ec0dafecf257 100644 --- a/arch/openrisc/mm/ioremap.c @@ -23523,6 +30236,29 @@ index fceb9cf02fb3a..71aa0921d6c72 100644 arch_spinlock_t *s = (arch_spinlock_t *)&lws_lock_start[index]; arch_spin_unlock(s); local_irq_restore(*flags); +diff --git a/arch/parisc/include/asm/hardware.h b/arch/parisc/include/asm/hardware.h +index 9d3d7737c58b1..a005ebc547793 100644 +--- a/arch/parisc/include/asm/hardware.h ++++ b/arch/parisc/include/asm/hardware.h +@@ -10,12 +10,12 @@ + #define SVERSION_ANY_ID PA_SVERSION_ANY_ID + + struct hp_hardware { +- unsigned short hw_type:5; /* HPHW_xxx */ +- unsigned short hversion; +- unsigned long sversion:28; +- unsigned short opt; +- const char name[80]; /* The hardware description */ +-}; ++ unsigned int hw_type:8; /* HPHW_xxx */ ++ unsigned int hversion:12; ++ unsigned int sversion:12; ++ unsigned char opt; ++ unsigned char name[59]; /* The hardware description */ ++} __packed; + + struct parisc_device; + diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 7badd872f05ac..3e7cf882639fb 100644 --- a/arch/parisc/include/asm/pgtable.h @@ -23667,6 +30403,55 @@ index 8ecc1f0c0483d..d0e090a2c000d 100644 #endif #endif +diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h +index 9e3c010c0f61e..5f1f3eea5aa5f 100644 +--- a/arch/parisc/include/uapi/asm/mman.h ++++ b/arch/parisc/include/uapi/asm/mman.h +@@ -49,31 +49,30 @@ + #define MADV_DONTFORK 10 /* don't inherit across fork */ + #define MADV_DOFORK 11 /* do inherit across fork */ + +-#define MADV_COLD 20 /* deactivate these pages */ +-#define MADV_PAGEOUT 21 /* reclaim these pages */ ++#define MADV_MERGEABLE 12 /* KSM may merge identical pages */ ++#define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */ + +-#define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */ +-#define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */ ++#define MADV_HUGEPAGE 14 /* Worth backing with hugepages */ ++#define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages */ + +-#define MADV_MERGEABLE 65 /* KSM may merge identical pages */ +-#define MADV_UNMERGEABLE 66 /* KSM may not merge identical pages */ ++#define MADV_DONTDUMP 16 /* Explicity exclude from the core dump, ++ overrides the coredump filter bits */ ++#define MADV_DODUMP 17 /* Clear the MADV_NODUMP flag */ + +-#define MADV_HUGEPAGE 67 /* Worth backing with hugepages */ +-#define MADV_NOHUGEPAGE 68 /* Not worth backing with hugepages */ ++#define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */ ++#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */ + +-#define MADV_DONTDUMP 69 /* Explicity exclude from the core dump, +- overrides the coredump filter bits */ +-#define MADV_DODUMP 70 /* Clear the MADV_NODUMP flag */ ++#define MADV_COLD 20 /* deactivate these pages */ ++#define MADV_PAGEOUT 21 /* reclaim these pages */ + +-#define MADV_WIPEONFORK 71 /* Zero memory on fork, child only */ +-#define MADV_KEEPONFORK 72 /* Undo MADV_WIPEONFORK */ ++#define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */ ++#define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */ + + #define MADV_HWPOISON 100 /* poison a page for testing */ + #define MADV_SOFT_OFFLINE 101 /* soft offline page for testing */ + + /* compatibility flags */ + #define MAP_FILE 0 +-#define MAP_VARIABLE 0 + + #define PKEY_DISABLE_ACCESS 0x1 + #define PKEY_DISABLE_WRITE 0x2 diff --git a/arch/parisc/install.sh b/arch/parisc/install.sh index 056d588befdd6..70d3cffb02515 100644 --- a/arch/parisc/install.sh @@ -23706,7 +30491,7 @@ index 39e02227e2310..394e6e14e5c42 100644 /* We don't have pte special. As a result, we can be called with diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c -index 776d624a7207b..d126e78e101ae 100644 +index 776d624a7207b..e7ee0c0c91d35 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -520,7 +520,6 @@ alloc_pa_dev(unsigned long hpa, struct hardware_path *mod_path) @@ -23732,6 +30517,36 @@ index 776d624a7207b..d126e78e101ae 100644 /* Silently fail things like mouse ports which are subsumed within * the keyboard controller +@@ -883,15 +882,13 @@ void __init walk_central_bus(void) + &root); + } + +-static void print_parisc_device(struct parisc_device *dev) ++static __init void print_parisc_device(struct parisc_device *dev) + { +- char hw_path[64]; +- static int count; ++ static int count __initdata; + +- print_pa_hwpath(dev, hw_path); +- pr_info("%d. %s at %pap [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }", +- ++count, dev->name, &(dev->hpa.start), hw_path, dev->id.hw_type, +- dev->id.hversion_rev, dev->id.hversion, dev->id.sversion); ++ pr_info("%d. %s at %pap { type:%d, hv:%#x, sv:%#x, rev:%#x }", ++ ++count, dev->name, &(dev->hpa.start), dev->id.hw_type, ++ dev->id.hversion, dev->id.sversion, dev->id.hversion_rev); + + if (dev->num_addrs) { + int k; +@@ -1080,7 +1077,7 @@ static __init int qemu_print_iodc_data(struct device *lin_dev, void *data) + + + +-static int print_one_device(struct device * dev, void * data) ++static __init int print_one_device(struct device * dev, void * data) + { + struct parisc_device * pdev = to_parisc_device(dev); + diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 9f939afe6b88c..437c8d31f3907 100644 --- a/arch/parisc/kernel/entry.S @@ -23747,6 +30562,36 @@ index 9f939afe6b88c..437c8d31f3907 100644 and,COND(=) %r19,%r2,%r0 b,n syscall_restore_rfi +diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c +index 7034227dbdf32..8e5a906df9175 100644 +--- a/arch/parisc/kernel/firmware.c ++++ b/arch/parisc/kernel/firmware.c +@@ -1230,7 +1230,7 @@ static char __attribute__((aligned(64))) iodc_dbuf[4096]; + */ + int pdc_iodc_print(const unsigned char *str, unsigned count) + { +- unsigned int i; ++ unsigned int i, found = 0; + unsigned long flags; + + for (i = 0; i < count;) { +@@ -1239,6 +1239,7 @@ int pdc_iodc_print(const unsigned char *str, unsigned count) + iodc_dbuf[i+0] = '\r'; + iodc_dbuf[i+1] = '\n'; + i += 2; ++ found = 1; + goto print; + default: + iodc_dbuf[i] = str[i]; +@@ -1255,7 +1256,7 @@ print: + __pa(iodc_retbuf), 0, __pa(iodc_dbuf), i, 0); + spin_unlock_irqrestore(&pdc_lock, flags); + +- return i; ++ return i - found; + } + + #if !defined(BOOTLOADER) diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S index aa93d775c34db..598d0938449da 100644 --- a/arch/parisc/kernel/head.S @@ -23888,6 +30733,52 @@ index 1b6129e7d776b..b861bbbc87178 100644 boot_cpu_data.pdc.sys_model_name, cpuinfo->dev ? cpuinfo->dev->name : "Unknown"); +diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c +index 65de6c4c9354d..b9398e805978d 100644 +--- a/arch/parisc/kernel/ptrace.c ++++ b/arch/parisc/kernel/ptrace.c +@@ -127,6 +127,12 @@ long arch_ptrace(struct task_struct *child, long request, + unsigned long tmp; + long ret = -EIO; + ++ unsigned long user_regs_struct_size = sizeof(struct user_regs_struct); ++#ifdef CONFIG_64BIT ++ if (is_compat_task()) ++ user_regs_struct_size /= 2; ++#endif ++ + switch (request) { + + /* Read the word at location addr in the USER area. For ptraced +@@ -182,14 +188,14 @@ long arch_ptrace(struct task_struct *child, long request, + return copy_regset_to_user(child, + task_user_regset_view(current), + REGSET_GENERAL, +- 0, sizeof(struct user_regs_struct), ++ 0, user_regs_struct_size, + datap); + + case PTRACE_SETREGS: /* Set all gp regs in the child. */ + return copy_regset_from_user(child, + task_user_regset_view(current), + REGSET_GENERAL, +- 0, sizeof(struct user_regs_struct), ++ 0, user_regs_struct_size, + datap); + + case PTRACE_GETFPREGS: /* Get the child FPU state. */ +@@ -303,6 +309,11 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, + } + } + break; ++ case PTRACE_GETREGS: ++ case PTRACE_SETREGS: ++ case PTRACE_GETFPREGS: ++ case PTRACE_SETFPREGS: ++ return arch_ptrace(child, request, addr, data); + + default: + ret = compat_ptrace_request(child, request, addr, data); diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index cceb09855e03f..3fb86ee507dd5 100644 --- a/arch/parisc/kernel/setup.c @@ -23998,6 +30889,41 @@ index 1405b603b91b6..cf92ece20b757 100644 inline void smp_send_stop(void) { send_IPI_allbutself(IPI_CPU_STOP); } +diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c +index 5f12537318ab2..31950882e272f 100644 +--- a/arch/parisc/kernel/sys_parisc.c ++++ b/arch/parisc/kernel/sys_parisc.c +@@ -463,3 +463,30 @@ asmlinkage long parisc_inotify_init1(int flags) + flags = FIX_O_NONBLOCK(flags); + return sys_inotify_init1(flags); + } ++ ++/* ++ * madvise() wrapper ++ * ++ * Up to kernel v6.1 parisc has different values than all other ++ * platforms for the MADV_xxx flags listed below. ++ * To keep binary compatibility with existing userspace programs ++ * translate the former values to the new values. ++ * ++ * XXX: Remove this wrapper in year 2025 (or later) ++ */ ++ ++asmlinkage notrace long parisc_madvise(unsigned long start, size_t len_in, int behavior) ++{ ++ switch (behavior) { ++ case 65: behavior = MADV_MERGEABLE; break; ++ case 66: behavior = MADV_UNMERGEABLE; break; ++ case 67: behavior = MADV_HUGEPAGE; break; ++ case 68: behavior = MADV_NOHUGEPAGE; break; ++ case 69: behavior = MADV_DONTDUMP; break; ++ case 70: behavior = MADV_DODUMP; break; ++ case 71: behavior = MADV_WIPEONFORK; break; ++ case 72: behavior = MADV_KEEPONFORK; break; ++ } ++ ++ return sys_madvise(start, len_in, behavior); ++} diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 3f24a0af1e047..9842dcb2041e5 100644 --- a/arch/parisc/kernel/syscall.S @@ -24012,9 +30938,18 @@ index 3f24a0af1e047..9842dcb2041e5 100644 /* Clip LWS number to a 32-bit value for 32-bit processes */ depdi 0, 31, 32, %r20 diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl -index bf751e0732b70..c23f4fa170051 100644 +index bf751e0732b70..50c759f11c25d 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl +@@ -131,7 +131,7 @@ + 116 common sysinfo sys_sysinfo compat_sys_sysinfo + 117 common shutdown sys_shutdown + 118 common fsync sys_fsync +-119 common madvise sys_madvise ++119 common madvise parisc_madvise + 120 common clone sys_clone_wrapper + 121 common setdomainname sys_setdomainname + 122 common sendfile sys_sendfile compat_sys_sendfile @@ -413,7 +413,7 @@ 412 32 utimensat_time64 sys_utimensat sys_utimensat 413 32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64 @@ -24064,9 +30999,18 @@ index 9fb1e794831b0..d8e59a1000ab7 100644 /* register at clocksource framework */ diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c -index 747c328fb8862..6fe5a3e98edc2 100644 +index 747c328fb8862..70ace36879507 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c +@@ -268,7 +268,7 @@ void die_if_kernel(char *str, struct pt_regs *regs, long err) + panic("Fatal exception"); + + oops_exit(); +- do_exit(SIGSEGV); ++ make_task_dead(SIGSEGV); + } + + /* gdb uses break 4,8 */ @@ -661,6 +661,8 @@ void notrace handle_interruption(int code, struct pt_regs *regs) by hand. Technically we need to emulate: fdc,fdce,pdc,"fic,4f",prober,probeir,probew, probeiw @@ -24788,6 +31732,106 @@ index a24722ccaebf1..c2f9aea78b29f 100644 / { model = "MPC8560ADS"; +diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-2.dtsi +new file mode 100644 +index 0000000000000..437dab3fc0176 +--- /dev/null ++++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-2.dtsi +@@ -0,0 +1,44 @@ ++// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later ++/* ++ * QorIQ FMan v3 10g port #2 device tree stub [ controller @ offset 0x400000 ] ++ * ++ * Copyright 2022 Sean Anderson <sean.anderson@seco.com> ++ * Copyright 2012 - 2015 Freescale Semiconductor Inc. ++ */ ++ ++fman@400000 { ++ fman0_rx_0x08: port@88000 { ++ cell-index = <0x8>; ++ compatible = "fsl,fman-v3-port-rx"; ++ reg = <0x88000 0x1000>; ++ fsl,fman-10g-port; ++ }; ++ ++ fman0_tx_0x28: port@a8000 { ++ cell-index = <0x28>; ++ compatible = "fsl,fman-v3-port-tx"; ++ reg = <0xa8000 0x1000>; ++ fsl,fman-10g-port; ++ }; ++ ++ ethernet@e0000 { ++ cell-index = <0>; ++ compatible = "fsl,fman-memac"; ++ reg = <0xe0000 0x1000>; ++ fsl,fman-ports = <&fman0_rx_0x08 &fman0_tx_0x28>; ++ ptp-timer = <&ptp_timer0>; ++ pcsphy-handle = <&pcsphy0>; ++ }; ++ ++ mdio@e1000 { ++ #address-cells = <1>; ++ #size-cells = <0>; ++ compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; ++ reg = <0xe1000 0x1000>; ++ fsl,erratum-a011043; /* must ignore read errors */ ++ ++ pcsphy0: ethernet-phy@0 { ++ reg = <0x0>; ++ }; ++ }; ++}; +diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-3.dtsi +new file mode 100644 +index 0000000000000..ad116b17850a8 +--- /dev/null ++++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-3.dtsi +@@ -0,0 +1,44 @@ ++// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later ++/* ++ * QorIQ FMan v3 10g port #3 device tree stub [ controller @ offset 0x400000 ] ++ * ++ * Copyright 2022 Sean Anderson <sean.anderson@seco.com> ++ * Copyright 2012 - 2015 Freescale Semiconductor Inc. ++ */ ++ ++fman@400000 { ++ fman0_rx_0x09: port@89000 { ++ cell-index = <0x9>; ++ compatible = "fsl,fman-v3-port-rx"; ++ reg = <0x89000 0x1000>; ++ fsl,fman-10g-port; ++ }; ++ ++ fman0_tx_0x29: port@a9000 { ++ cell-index = <0x29>; ++ compatible = "fsl,fman-v3-port-tx"; ++ reg = <0xa9000 0x1000>; ++ fsl,fman-10g-port; ++ }; ++ ++ ethernet@e2000 { ++ cell-index = <1>; ++ compatible = "fsl,fman-memac"; ++ reg = <0xe2000 0x1000>; ++ fsl,fman-ports = <&fman0_rx_0x09 &fman0_tx_0x29>; ++ ptp-timer = <&ptp_timer0>; ++ pcsphy-handle = <&pcsphy1>; ++ }; ++ ++ mdio@e3000 { ++ #address-cells = <1>; ++ #size-cells = <0>; ++ compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; ++ reg = <0xe3000 0x1000>; ++ fsl,erratum-a011043; /* must ignore read errors */ ++ ++ pcsphy1: ethernet-phy@0 { ++ reg = <0x0>; ++ }; ++ }; ++}; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi index c90702b04a530..48e5cd61599c6 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi @@ -24903,6 +31947,41 @@ index 099a598c74c00..bfe1ed5be3374 100644 }; mdio0: mdio@fc000 { +diff --git a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi +index ecbb447920bc6..27714dc2f04a5 100644 +--- a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi ++++ b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi +@@ -609,8 +609,8 @@ + /include/ "qoriq-bman1.dtsi" + + /include/ "qoriq-fman3-0.dtsi" +-/include/ "qoriq-fman3-0-1g-0.dtsi" +-/include/ "qoriq-fman3-0-1g-1.dtsi" ++/include/ "qoriq-fman3-0-10g-2.dtsi" ++/include/ "qoriq-fman3-0-10g-3.dtsi" + /include/ "qoriq-fman3-0-1g-2.dtsi" + /include/ "qoriq-fman3-0-1g-3.dtsi" + /include/ "qoriq-fman3-0-1g-4.dtsi" +@@ -659,3 +659,19 @@ + interrupts = <16 2 1 9>; + }; + }; ++ ++&fman0_rx_0x08 { ++ /delete-property/ fsl,fman-10g-port; ++}; ++ ++&fman0_tx_0x28 { ++ /delete-property/ fsl,fman-10g-port; ++}; ++ ++&fman0_rx_0x09 { ++ /delete-property/ fsl,fman-10g-port; ++}; ++ ++&fman0_tx_0x29 { ++ /delete-property/ fsl,fman-10g-port; ++}; diff --git a/arch/powerpc/boot/dts/lite5200.dts b/arch/powerpc/boot/dts/lite5200.dts index cb2782dd6132c..e7b194775d783 100644 --- a/arch/powerpc/boot/dts/lite5200.dts @@ -25283,6 +32362,19 @@ index 21cc571ea9c2d..5c98a950eca0d 100644 static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned long val) { } +diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h +index 4f897993b7107..699a88584ae16 100644 +--- a/arch/powerpc/include/asm/imc-pmu.h ++++ b/arch/powerpc/include/asm/imc-pmu.h +@@ -137,7 +137,7 @@ struct imc_pmu { + * are inited. + */ + struct imc_pmu_ref { +- struct mutex lock; ++ spinlock_t lock; + unsigned int id; + int refc; + }; diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index a1d238255f077..a07960066b5fa 100644 --- a/arch/powerpc/include/asm/interrupt.h @@ -25766,6 +32858,34 @@ index 9dc97d2f9d27e..a05b34cf5f408 100644 void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, ...); extern void __noreturn rtas_restart(char *cmd); +diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h +index 6e4af4492a144..e92d39c0cd1d9 100644 +--- a/arch/powerpc/include/asm/sections.h ++++ b/arch/powerpc/include/asm/sections.h +@@ -6,22 +6,10 @@ + #include <linux/elf.h> + #include <linux/uaccess.h> + +-#define arch_is_kernel_initmem_freed arch_is_kernel_initmem_freed +- + #include <asm-generic/sections.h> + +-extern bool init_mem_is_free; +- +-static inline int arch_is_kernel_initmem_freed(unsigned long addr) +-{ +- if (!init_mem_is_free) +- return 0; +- +- return addr >= (unsigned long)__init_begin && +- addr < (unsigned long)__init_end; +-} +- + extern char __head_end[]; ++extern char __srwx_boundary[]; + + #ifdef __powerpc64__ + diff --git a/arch/powerpc/include/asm/set_memory.h b/arch/powerpc/include/asm/set_memory.h index b040094f79202..7ebc807aa8cc8 100644 --- a/arch/powerpc/include/asm/set_memory.h @@ -26484,10 +33604,31 @@ index 13cad9297d822..3c097356366b8 100644 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) lwz r8,TI_LOCAL_FLAGS(r2) /* set napping bit */ diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c -index de10a26972581..df048e331cbfe 100644 +index de10a26972581..8703df709cce8 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c -@@ -148,7 +148,7 @@ notrace long system_call_exception(long r3, long r4, long r5, +@@ -53,16 +53,18 @@ static inline bool exit_must_hard_disable(void) + */ + static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable) + { ++ bool must_hard_disable = (exit_must_hard_disable() || !restartable); ++ + /* This must be done with RI=1 because tracing may touch vmaps */ + trace_hardirqs_on(); + +- if (exit_must_hard_disable() || !restartable) ++ if (must_hard_disable) + __hard_EE_RI_disable(); + + #ifdef CONFIG_PPC64 + /* This pattern matches prep_irq_for_idle */ + if (unlikely(lazy_irq_pending_nocheck())) { +- if (exit_must_hard_disable() || !restartable) { ++ if (must_hard_disable) { + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + __hard_RI_enable(); + } +@@ -148,7 +150,7 @@ notrace long system_call_exception(long r3, long r4, long r5, */ if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && unlikely(MSR_TM_TRANSACTIONAL(regs->msr))) @@ -26496,7 +33637,7 @@ index de10a26972581..df048e331cbfe 100644 /* * If the system call was made with a transaction active, doom it and -@@ -266,7 +266,7 @@ static void check_return_regs_valid(struct pt_regs *regs) +@@ -266,7 +268,7 @@ static void check_return_regs_valid(struct pt_regs *regs) if (trap_is_scv(regs)) return; @@ -27246,7 +34387,7 @@ index 02d4719bf43a8..232e4549defe1 100644 p_st: .8byte _stext - 0b diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c -index ff80bbad22a58..7834ce3aa7f1b 100644 +index ff80bbad22a58..2dae702e7a5a7 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -42,13 +42,21 @@ @@ -27272,7 +34413,40 @@ index ff80bbad22a58..7834ce3aa7f1b 100644 enter_rtas(args); srr_regs_clobbered(); /* rtas uses SRRs, invalidate */ -@@ -836,59 +844,6 @@ void rtas_activate_firmware(void) +@@ -780,6 +788,7 @@ void __noreturn rtas_halt(void) + + /* Must be in the RMO region, so we place it here */ + static char rtas_os_term_buf[2048]; ++static s32 ibm_os_term_token = RTAS_UNKNOWN_SERVICE; + + void rtas_os_term(char *str) + { +@@ -791,16 +800,20 @@ void rtas_os_term(char *str) + * this property may terminate the partition which we want to avoid + * since it interferes with panic_timeout. + */ +- if (RTAS_UNKNOWN_SERVICE == rtas_token("ibm,os-term") || +- RTAS_UNKNOWN_SERVICE == rtas_token("ibm,extended-os-term")) ++ if (ibm_os_term_token == RTAS_UNKNOWN_SERVICE) + return; + + snprintf(rtas_os_term_buf, 2048, "OS panic: %s", str); + ++ /* ++ * Keep calling as long as RTAS returns a "try again" status, ++ * but don't use rtas_busy_delay(), which potentially ++ * schedules. ++ */ + do { +- status = rtas_call(rtas_token("ibm,os-term"), 1, 1, NULL, ++ status = rtas_call(ibm_os_term_token, 1, 1, NULL, + __pa(rtas_os_term_buf)); +- } while (rtas_busy_delay(status)); ++ } while (rtas_busy_delay_time(status)); + + if (status != 0) + printk(KERN_EMERG "ibm,os-term call failed %d\n", status); +@@ -836,59 +849,6 @@ void rtas_activate_firmware(void) pr_err("ibm,activate-firmware failed (%i)\n", fwrc); } @@ -27332,7 +34506,7 @@ index ff80bbad22a58..7834ce3aa7f1b 100644 /** * Find a specific pseries error log in an RTAS extended event log. * @log: RTAS error/event log -@@ -974,7 +929,7 @@ static struct rtas_filter rtas_filters[] __ro_after_init = { +@@ -974,7 +934,7 @@ static struct rtas_filter rtas_filters[] __ro_after_init = { { "get-time-of-day", -1, -1, -1, -1, -1 }, { "ibm,get-vpd", -1, 0, -1, 1, 2 }, { "ibm,lpar-perftools", -1, 2, 3, -1, -1 }, @@ -27341,7 +34515,7 @@ index ff80bbad22a58..7834ce3aa7f1b 100644 { "ibm,read-slot-reset-state", -1, -1, -1, -1, -1 }, { "ibm,scan-log-dump", -1, 0, 1, -1, -1 }, { "ibm,set-dynamic-indicator", -1, 2, -1, -1, -1 }, -@@ -1023,6 +978,15 @@ static bool block_rtas_call(int token, int nargs, +@@ -1023,6 +983,15 @@ static bool block_rtas_call(int token, int nargs, size = 1; end = base + size - 1; @@ -27357,7 +34531,21 @@ index ff80bbad22a58..7834ce3aa7f1b 100644 if (!in_rmo_buf(base, end)) goto err; } -@@ -1235,6 +1199,12 @@ int __init early_init_dt_scan_rtas(unsigned long node, +@@ -1203,6 +1172,13 @@ void __init rtas_initialize(void) + no_entry = of_property_read_u32(rtas.dev, "linux,rtas-entry", &entry); + rtas.entry = no_entry ? rtas.base : entry; + ++ /* ++ * Discover these now to avoid device tree lookups in the ++ * panic path. ++ */ ++ if (of_property_read_bool(rtas.dev, "ibm,extended-os-term")) ++ ibm_os_term_token = rtas_token("ibm,os-term"); ++ + /* If RTAS was found, allocate the RMO buffer for it and look for + * the stop-self token if any + */ +@@ -1235,6 +1211,12 @@ int __init early_init_dt_scan_rtas(unsigned long node, entryp = of_get_flat_dt_prop(node, "linux,rtas-entry", NULL); sizep = of_get_flat_dt_prop(node, "rtas-size", NULL); @@ -27816,11 +35004,68 @@ index f9fd5f743eba3..d636fc755f608 100644 /* Restore callee's TOC */ ld r2, 24(r1) +diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c +index 11741703d26e0..a08bb7cefdc54 100644 +--- a/arch/powerpc/kernel/traps.c ++++ b/arch/powerpc/kernel/traps.c +@@ -245,7 +245,7 @@ static void oops_end(unsigned long flags, struct pt_regs *regs, + + if (panic_on_oops) + panic("Fatal exception"); +- do_exit(signr); ++ make_task_dead(signr); + } + NOKPROBE_SYMBOL(oops_end); + +@@ -792,9 +792,9 @@ int machine_check_generic(struct pt_regs *regs) + void die_mce(const char *str, struct pt_regs *regs, long err) + { + /* +- * The machine check wants to kill the interrupted context, but +- * do_exit() checks for in_interrupt() and panics in that case, so +- * exit the irq/nmi before calling die. ++ * The machine check wants to kill the interrupted context, ++ * but make_task_dead() checks for in_interrupt() and panics ++ * in that case, so exit the irq/nmi before calling die. + */ + if (in_nmi()) + nmi_exit(); diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S -index 40bdefe9caa73..1a63e37f336ab 100644 +index 40bdefe9caa73..a664d0c4344a9 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S -@@ -275,9 +275,7 @@ SECTIONS +@@ -32,6 +32,10 @@ + + #define STRICT_ALIGN_SIZE (1 << CONFIG_DATA_SHIFT) + ++#if STRICT_ALIGN_SIZE < PAGE_SIZE ++#error "CONFIG_DATA_SHIFT must be >= PAGE_SHIFT" ++#endif ++ + ENTRY(_stext) + + PHDRS { +@@ -204,12 +208,16 @@ SECTIONS + } + #endif + ++ /* ++ * Various code relies on __init_begin being at the strict RWX boundary. ++ */ ++ . = ALIGN(STRICT_ALIGN_SIZE); ++ __srwx_boundary = .; ++ __init_begin = .; ++ + /* + * Init sections discarded at runtime + */ +- . = ALIGN(STRICT_ALIGN_SIZE); +- __init_begin = .; +- . = ALIGN(PAGE_SIZE); + .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { + _sinittext = .; + INIT_TEXT +@@ -275,9 +283,7 @@ SECTIONS . = ALIGN(8); .dynsym : AT(ADDR(.dynsym) - LOAD_OFFSET) { @@ -28749,7 +35994,7 @@ index 39b84e7452e1b..aa3bb8da1cb9b 100644 #include <linux/uaccess.h> #include <asm/reg.h> diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c -index 27061583a0107..bfca0afe91126 100644 +index 27061583a0107..692c336e4f55b 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -76,7 +76,7 @@ unsigned long p_block_mapped(phys_addr_t pa) @@ -28779,10 +36024,12 @@ index 27061583a0107..bfca0afe91126 100644 if (size < 128 << 10) break; -@@ -160,7 +160,10 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) +@@ -159,8 +159,11 @@ static unsigned long __init __mmu_mapin_ram(unsigned long base, unsigned long to + unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) { unsigned long done; - unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; +- unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; ++ unsigned long border = (unsigned long)__srwx_boundary - PAGE_OFFSET; + unsigned long size; + size = roundup_pow_of_two((unsigned long)_einittext - PAGE_OFFSET); @@ -28929,10 +36176,52 @@ index c145776d3ae5e..7bfd88c4b5470 100644 /* diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c -index ae20add7954a0..a339cb5de5dd1 100644 +index ae20add7954a0..feb24313e2e3c 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c -@@ -954,15 +954,6 @@ pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addre +@@ -232,6 +232,14 @@ void radix__mark_rodata_ro(void) + end = (unsigned long)__init_begin; + + radix__change_memory_range(start, end, _PAGE_WRITE); ++ ++ for (start = PAGE_OFFSET; start < (unsigned long)_stext; start += PAGE_SIZE) { ++ end = start + PAGE_SIZE; ++ if (overlaps_interrupt_vector_text(start, end)) ++ radix__change_memory_range(start, end, _PAGE_WRITE); ++ else ++ break; ++ } + } + + void radix__mark_initmem_nx(void) +@@ -260,8 +268,24 @@ print_mapping(unsigned long start, unsigned long end, unsigned long size, bool e + static unsigned long next_boundary(unsigned long addr, unsigned long end) + { + #ifdef CONFIG_STRICT_KERNEL_RWX +- if (addr < __pa_symbol(__init_begin)) +- return __pa_symbol(__init_begin); ++ unsigned long stext_phys; ++ ++ stext_phys = __pa_symbol(_stext); ++ ++ // Relocatable kernel running at non-zero real address ++ if (stext_phys != 0) { ++ // The end of interrupts code at zero is a rodata boundary ++ unsigned long end_intr = __pa_symbol(__end_interrupts) - stext_phys; ++ if (addr < end_intr) ++ return end_intr; ++ ++ // Start of relocated kernel text is a rodata boundary ++ if (addr < stext_phys) ++ return stext_phys; ++ } ++ ++ if (addr < __pa_symbol(__srwx_boundary)) ++ return __pa_symbol(__srwx_boundary); + #endif + return end; + } +@@ -954,15 +978,6 @@ pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addre pmd = *pmdp; pmd_clear(pmdp); @@ -28948,7 +36237,7 @@ index ae20add7954a0..a339cb5de5dd1 100644 radix__flush_tlb_collapsed_pmd(vma->vm_mm, address); return pmd; -@@ -1093,7 +1084,7 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) +@@ -1093,7 +1108,7 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) int pud_clear_huge(pud_t *pud) { @@ -28957,7 +36246,7 @@ index ae20add7954a0..a339cb5de5dd1 100644 pud_clear(pud); return 1; } -@@ -1140,7 +1131,7 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) +@@ -1140,7 +1155,7 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) int pmd_clear_huge(pmd_t *pmd) { @@ -29840,6 +37129,18 @@ index 2f46e31c76129..4f53d0b97539b 100644 obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o obj-$(CONFIG_PPC_POWERNV) += imc-pmu.o +diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c +index 082f6d0308a47..8718289c051dd 100644 +--- a/arch/powerpc/perf/callchain.c ++++ b/arch/powerpc/perf/callchain.c +@@ -61,6 +61,7 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re + next_sp = fp[0]; + + if (next_sp == sp + STACK_INT_FRAME_SIZE && ++ validate_sp(sp, current, STACK_INT_FRAME_SIZE) && + fp[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { + /* + * This looks like an interrupt frame for an diff --git a/arch/powerpc/perf/callchain.h b/arch/powerpc/perf/callchain.h index d6fa6e25234f4..19a8d051ddf10 100644 --- a/arch/powerpc/perf/callchain.h @@ -30023,11 +37324,555 @@ index 73e62e9b179bc..1078784b74c9b 100644 if (unlikely(!found) && !arch_irq_disabled_regs(regs)) printk_ratelimited(KERN_WARNING "Can't find PMC that caused IRQ\n"); +diff --git a/arch/powerpc/perf/hv-gpci-requests.h b/arch/powerpc/perf/hv-gpci-requests.h +index 8965b4463d433..5e86371a20c78 100644 +--- a/arch/powerpc/perf/hv-gpci-requests.h ++++ b/arch/powerpc/perf/hv-gpci-requests.h +@@ -79,6 +79,7 @@ REQUEST(__field(0, 8, partition_id) + ) + #include I(REQUEST_END) + ++#ifdef ENABLE_EVENTS_COUNTERINFO_V6 + /* + * Not available for counter_info_version >= 0x8, use + * run_instruction_cycles_by_partition(0x100) instead. +@@ -92,6 +93,7 @@ REQUEST(__field(0, 8, partition_id) + __count(0x10, 8, cycles) + ) + #include I(REQUEST_END) ++#endif + + #define REQUEST_NAME system_performance_capabilities + #define REQUEST_NUM 0x40 +@@ -103,6 +105,7 @@ REQUEST(__field(0, 1, perf_collect_privileged) + ) + #include I(REQUEST_END) + ++#ifdef ENABLE_EVENTS_COUNTERINFO_V6 + #define REQUEST_NAME processor_bus_utilization_abc_links + #define REQUEST_NUM 0x50 + #define REQUEST_IDX_KIND "hw_chip_id=?" +@@ -194,6 +197,7 @@ REQUEST(__field(0, 4, phys_processor_idx) + __count(0x28, 8, instructions_completed) + ) + #include I(REQUEST_END) ++#endif + + /* Processor_core_power_mode (0x95) skipped, no counters */ + /* Affinity_domain_information_by_virtual_processor (0xA0) skipped, +diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c +index c756228a081fb..28b770bbc10b4 100644 +--- a/arch/powerpc/perf/hv-gpci.c ++++ b/arch/powerpc/perf/hv-gpci.c +@@ -72,7 +72,7 @@ static struct attribute_group format_group = { + + static struct attribute_group event_group = { + .name = "events", +- .attrs = hv_gpci_event_attrs, ++ /* .attrs is set in init */ + }; + + #define HV_CAPS_ATTR(_name, _format) \ +@@ -330,6 +330,7 @@ static int hv_gpci_init(void) + int r; + unsigned long hret; + struct hv_perf_caps caps; ++ struct hv_gpci_request_buffer *arg; + + hv_gpci_assert_offsets_correct(); + +@@ -353,6 +354,36 @@ static int hv_gpci_init(void) + /* sampling not supported */ + h_gpci_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + ++ arg = (void *)get_cpu_var(hv_gpci_reqb); ++ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); ++ ++ /* ++ * hcall H_GET_PERF_COUNTER_INFO populates the output ++ * counter_info_version value based on the system hypervisor. ++ * Pass the counter request 0x10 corresponds to request type ++ * 'Dispatch_timebase_by_processor', to get the supported ++ * counter_info_version. ++ */ ++ arg->params.counter_request = cpu_to_be32(0x10); ++ ++ r = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, ++ virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); ++ if (r) { ++ pr_devel("hcall failed, can't get supported counter_info_version: 0x%x\n", r); ++ arg->params.counter_info_version_out = 0x8; ++ } ++ ++ /* ++ * Use counter_info_version_out value to assign ++ * required hv-gpci event list. ++ */ ++ if (arg->params.counter_info_version_out >= 0x8) ++ event_group.attrs = hv_gpci_event_attrs; ++ else ++ event_group.attrs = hv_gpci_event_attrs_v6; ++ ++ put_cpu_var(hv_gpci_reqb); ++ + r = perf_pmu_register(&h_gpci_pmu, h_gpci_pmu.name, -1); + if (r) + return r; +diff --git a/arch/powerpc/perf/hv-gpci.h b/arch/powerpc/perf/hv-gpci.h +index 4d108262bed79..c72020912dea5 100644 +--- a/arch/powerpc/perf/hv-gpci.h ++++ b/arch/powerpc/perf/hv-gpci.h +@@ -26,6 +26,7 @@ enum { + #define REQUEST_FILE "../hv-gpci-requests.h" + #define NAME_LOWER hv_gpci + #define NAME_UPPER HV_GPCI ++#define ENABLE_EVENTS_COUNTERINFO_V6 + #include "req-gen/perf.h" + #undef REQUEST_FILE + #undef NAME_LOWER diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c -index e106909ff9c37..e7583fbcc8fa1 100644 +index e106909ff9c37..b8a100b9736c7 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c -@@ -1457,7 +1457,11 @@ static int trace_imc_event_init(struct perf_event *event) +@@ -13,6 +13,7 @@ + #include <asm/cputhreads.h> + #include <asm/smp.h> + #include <linux/string.h> ++#include <linux/spinlock.h> + + /* Nest IMC data structures and variables */ + +@@ -49,7 +50,7 @@ static int trace_imc_mem_size; + * core and trace-imc + */ + static struct imc_pmu_ref imc_global_refc = { +- .lock = __MUTEX_INITIALIZER(imc_global_refc.lock), ++ .lock = __SPIN_LOCK_INITIALIZER(imc_global_refc.lock), + .id = 0, + .refc = 0, + }; +@@ -393,7 +394,7 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu) + get_hard_smp_processor_id(cpu)); + /* + * If this is the last cpu in this chip then, skip the reference +- * count mutex lock and make the reference count on this chip zero. ++ * count lock and make the reference count on this chip zero. + */ + ref = get_nest_pmu_ref(cpu); + if (!ref) +@@ -455,15 +456,15 @@ static void nest_imc_counters_release(struct perf_event *event) + /* + * See if we need to disable the nest PMU. + * If no events are currently in use, then we have to take a +- * mutex to ensure that we don't race with another task doing ++ * lock to ensure that we don't race with another task doing + * enable or disable the nest counters. + */ + ref = get_nest_pmu_ref(event->cpu); + if (!ref) + return; + +- /* Take the mutex lock for this node and then decrement the reference count */ +- mutex_lock(&ref->lock); ++ /* Take the lock for this node and then decrement the reference count */ ++ spin_lock(&ref->lock); + if (ref->refc == 0) { + /* + * The scenario where this is true is, when perf session is +@@ -475,7 +476,7 @@ static void nest_imc_counters_release(struct perf_event *event) + * an OPAL call to disable the engine in that node. + * + */ +- mutex_unlock(&ref->lock); ++ spin_unlock(&ref->lock); + return; + } + ref->refc--; +@@ -483,7 +484,7 @@ static void nest_imc_counters_release(struct perf_event *event) + rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST, + get_hard_smp_processor_id(event->cpu)); + if (rc) { +- mutex_unlock(&ref->lock); ++ spin_unlock(&ref->lock); + pr_err("nest-imc: Unable to stop the counters for core %d\n", node_id); + return; + } +@@ -491,7 +492,7 @@ static void nest_imc_counters_release(struct perf_event *event) + WARN(1, "nest-imc: Invalid event reference count\n"); + ref->refc = 0; + } +- mutex_unlock(&ref->lock); ++ spin_unlock(&ref->lock); + } + + static int nest_imc_event_init(struct perf_event *event) +@@ -550,26 +551,25 @@ static int nest_imc_event_init(struct perf_event *event) + + /* + * Get the imc_pmu_ref struct for this node. +- * Take the mutex lock and then increment the count of nest pmu events +- * inited. ++ * Take the lock and then increment the count of nest pmu events inited. + */ + ref = get_nest_pmu_ref(event->cpu); + if (!ref) + return -EINVAL; + +- mutex_lock(&ref->lock); ++ spin_lock(&ref->lock); + if (ref->refc == 0) { + rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_NEST, + get_hard_smp_processor_id(event->cpu)); + if (rc) { +- mutex_unlock(&ref->lock); ++ spin_unlock(&ref->lock); + pr_err("nest-imc: Unable to start the counters for node %d\n", + node_id); + return rc; + } + } + ++ref->refc; +- mutex_unlock(&ref->lock); ++ spin_unlock(&ref->lock); + + event->destroy = nest_imc_counters_release; + return 0; +@@ -605,9 +605,8 @@ static int core_imc_mem_init(int cpu, int size) + return -ENOMEM; + mem_info->vbase = page_address(page); + +- /* Init the mutex */ + core_imc_refc[core_id].id = core_id; +- mutex_init(&core_imc_refc[core_id].lock); ++ spin_lock_init(&core_imc_refc[core_id].lock); + + rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_CORE, + __pa((void *)mem_info->vbase), +@@ -696,9 +695,8 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu) + perf_pmu_migrate_context(&core_imc_pmu->pmu, cpu, ncpu); + } else { + /* +- * If this is the last cpu in this core then, skip taking refernce +- * count mutex lock for this core and directly zero "refc" for +- * this core. ++ * If this is the last cpu in this core then skip taking reference ++ * count lock for this core and directly zero "refc" for this core. + */ + opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, + get_hard_smp_processor_id(cpu)); +@@ -713,11 +711,11 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu) + * last cpu in this core and core-imc event running + * in this cpu. + */ +- mutex_lock(&imc_global_refc.lock); ++ spin_lock(&imc_global_refc.lock); + if (imc_global_refc.id == IMC_DOMAIN_CORE) + imc_global_refc.refc--; + +- mutex_unlock(&imc_global_refc.lock); ++ spin_unlock(&imc_global_refc.lock); + } + return 0; + } +@@ -732,7 +730,7 @@ static int core_imc_pmu_cpumask_init(void) + + static void reset_global_refc(struct perf_event *event) + { +- mutex_lock(&imc_global_refc.lock); ++ spin_lock(&imc_global_refc.lock); + imc_global_refc.refc--; + + /* +@@ -744,7 +742,7 @@ static void reset_global_refc(struct perf_event *event) + imc_global_refc.refc = 0; + imc_global_refc.id = 0; + } +- mutex_unlock(&imc_global_refc.lock); ++ spin_unlock(&imc_global_refc.lock); + } + + static void core_imc_counters_release(struct perf_event *event) +@@ -757,17 +755,17 @@ static void core_imc_counters_release(struct perf_event *event) + /* + * See if we need to disable the IMC PMU. + * If no events are currently in use, then we have to take a +- * mutex to ensure that we don't race with another task doing ++ * lock to ensure that we don't race with another task doing + * enable or disable the core counters. + */ + core_id = event->cpu / threads_per_core; + +- /* Take the mutex lock and decrement the refernce count for this core */ ++ /* Take the lock and decrement the refernce count for this core */ + ref = &core_imc_refc[core_id]; + if (!ref) + return; + +- mutex_lock(&ref->lock); ++ spin_lock(&ref->lock); + if (ref->refc == 0) { + /* + * The scenario where this is true is, when perf session is +@@ -779,7 +777,7 @@ static void core_imc_counters_release(struct perf_event *event) + * an OPAL call to disable the engine in that core. + * + */ +- mutex_unlock(&ref->lock); ++ spin_unlock(&ref->lock); + return; + } + ref->refc--; +@@ -787,7 +785,7 @@ static void core_imc_counters_release(struct perf_event *event) + rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, + get_hard_smp_processor_id(event->cpu)); + if (rc) { +- mutex_unlock(&ref->lock); ++ spin_unlock(&ref->lock); + pr_err("IMC: Unable to stop the counters for core %d\n", core_id); + return; + } +@@ -795,7 +793,7 @@ static void core_imc_counters_release(struct perf_event *event) + WARN(1, "core-imc: Invalid event reference count\n"); + ref->refc = 0; + } +- mutex_unlock(&ref->lock); ++ spin_unlock(&ref->lock); + + reset_global_refc(event); + } +@@ -833,7 +831,6 @@ static int core_imc_event_init(struct perf_event *event) + if ((!pcmi->vbase)) + return -ENODEV; + +- /* Get the core_imc mutex for this core */ + ref = &core_imc_refc[core_id]; + if (!ref) + return -EINVAL; +@@ -841,22 +838,22 @@ static int core_imc_event_init(struct perf_event *event) + /* + * Core pmu units are enabled only when it is used. + * See if this is triggered for the first time. +- * If yes, take the mutex lock and enable the core counters. ++ * If yes, take the lock and enable the core counters. + * If not, just increment the count in core_imc_refc struct. + */ +- mutex_lock(&ref->lock); ++ spin_lock(&ref->lock); + if (ref->refc == 0) { + rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE, + get_hard_smp_processor_id(event->cpu)); + if (rc) { +- mutex_unlock(&ref->lock); ++ spin_unlock(&ref->lock); + pr_err("core-imc: Unable to start the counters for core %d\n", + core_id); + return rc; + } + } + ++ref->refc; +- mutex_unlock(&ref->lock); ++ spin_unlock(&ref->lock); + + /* + * Since the system can run either in accumulation or trace-mode +@@ -867,7 +864,7 @@ static int core_imc_event_init(struct perf_event *event) + * to know whether any other trace/thread imc + * events are running. + */ +- mutex_lock(&imc_global_refc.lock); ++ spin_lock(&imc_global_refc.lock); + if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_CORE) { + /* + * No other trace/thread imc events are running in +@@ -876,10 +873,10 @@ static int core_imc_event_init(struct perf_event *event) + imc_global_refc.id = IMC_DOMAIN_CORE; + imc_global_refc.refc++; + } else { +- mutex_unlock(&imc_global_refc.lock); ++ spin_unlock(&imc_global_refc.lock); + return -EBUSY; + } +- mutex_unlock(&imc_global_refc.lock); ++ spin_unlock(&imc_global_refc.lock); + + event->hw.event_base = (u64)pcmi->vbase + (config & IMC_EVENT_OFFSET_MASK); + event->destroy = core_imc_counters_release; +@@ -951,10 +948,10 @@ static int ppc_thread_imc_cpu_offline(unsigned int cpu) + mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63)))); + + /* Reduce the refc if thread-imc event running on this cpu */ +- mutex_lock(&imc_global_refc.lock); ++ spin_lock(&imc_global_refc.lock); + if (imc_global_refc.id == IMC_DOMAIN_THREAD) + imc_global_refc.refc--; +- mutex_unlock(&imc_global_refc.lock); ++ spin_unlock(&imc_global_refc.lock); + + return 0; + } +@@ -994,7 +991,7 @@ static int thread_imc_event_init(struct perf_event *event) + if (!target) + return -EINVAL; + +- mutex_lock(&imc_global_refc.lock); ++ spin_lock(&imc_global_refc.lock); + /* + * Check if any other trace/core imc events are running in the + * system, if not set the global id to thread-imc. +@@ -1003,10 +1000,10 @@ static int thread_imc_event_init(struct perf_event *event) + imc_global_refc.id = IMC_DOMAIN_THREAD; + imc_global_refc.refc++; + } else { +- mutex_unlock(&imc_global_refc.lock); ++ spin_unlock(&imc_global_refc.lock); + return -EBUSY; + } +- mutex_unlock(&imc_global_refc.lock); ++ spin_unlock(&imc_global_refc.lock); + + event->pmu->task_ctx_nr = perf_sw_context; + event->destroy = reset_global_refc; +@@ -1128,25 +1125,25 @@ static int thread_imc_event_add(struct perf_event *event, int flags) + /* + * imc pmus are enabled only when it is used. + * See if this is triggered for the first time. +- * If yes, take the mutex lock and enable the counters. ++ * If yes, take the lock and enable the counters. + * If not, just increment the count in ref count struct. + */ + ref = &core_imc_refc[core_id]; + if (!ref) + return -EINVAL; + +- mutex_lock(&ref->lock); ++ spin_lock(&ref->lock); + if (ref->refc == 0) { + if (opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE, + get_hard_smp_processor_id(smp_processor_id()))) { +- mutex_unlock(&ref->lock); ++ spin_unlock(&ref->lock); + pr_err("thread-imc: Unable to start the counter\ + for core %d\n", core_id); + return -EINVAL; + } + } + ++ref->refc; +- mutex_unlock(&ref->lock); ++ spin_unlock(&ref->lock); + return 0; + } + +@@ -1163,12 +1160,12 @@ static void thread_imc_event_del(struct perf_event *event, int flags) + return; + } + +- mutex_lock(&ref->lock); ++ spin_lock(&ref->lock); + ref->refc--; + if (ref->refc == 0) { + if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, + get_hard_smp_processor_id(smp_processor_id()))) { +- mutex_unlock(&ref->lock); ++ spin_unlock(&ref->lock); + pr_err("thread-imc: Unable to stop the counters\ + for core %d\n", core_id); + return; +@@ -1176,7 +1173,7 @@ static void thread_imc_event_del(struct perf_event *event, int flags) + } else if (ref->refc < 0) { + ref->refc = 0; + } +- mutex_unlock(&ref->lock); ++ spin_unlock(&ref->lock); + + /* Set bit 0 of LDBAR to zero, to stop posting updates to memory */ + mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63)))); +@@ -1217,9 +1214,8 @@ static int trace_imc_mem_alloc(int cpu_id, int size) + } + } + +- /* Init the mutex, if not already */ + trace_imc_refc[core_id].id = core_id; +- mutex_init(&trace_imc_refc[core_id].lock); ++ spin_lock_init(&trace_imc_refc[core_id].lock); + + mtspr(SPRN_LDBAR, 0); + return 0; +@@ -1239,10 +1235,10 @@ static int ppc_trace_imc_cpu_offline(unsigned int cpu) + * Reduce the refc if any trace-imc event running + * on this cpu. + */ +- mutex_lock(&imc_global_refc.lock); ++ spin_lock(&imc_global_refc.lock); + if (imc_global_refc.id == IMC_DOMAIN_TRACE) + imc_global_refc.refc--; +- mutex_unlock(&imc_global_refc.lock); ++ spin_unlock(&imc_global_refc.lock); + + return 0; + } +@@ -1364,17 +1360,17 @@ static int trace_imc_event_add(struct perf_event *event, int flags) + } + + mtspr(SPRN_LDBAR, ldbar_value); +- mutex_lock(&ref->lock); ++ spin_lock(&ref->lock); + if (ref->refc == 0) { + if (opal_imc_counters_start(OPAL_IMC_COUNTERS_TRACE, + get_hard_smp_processor_id(smp_processor_id()))) { +- mutex_unlock(&ref->lock); ++ spin_unlock(&ref->lock); + pr_err("trace-imc: Unable to start the counters for core %d\n", core_id); + return -EINVAL; + } + } + ++ref->refc; +- mutex_unlock(&ref->lock); ++ spin_unlock(&ref->lock); + return 0; + } + +@@ -1407,19 +1403,19 @@ static void trace_imc_event_del(struct perf_event *event, int flags) + return; + } + +- mutex_lock(&ref->lock); ++ spin_lock(&ref->lock); + ref->refc--; + if (ref->refc == 0) { + if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_TRACE, + get_hard_smp_processor_id(smp_processor_id()))) { +- mutex_unlock(&ref->lock); ++ spin_unlock(&ref->lock); + pr_err("trace-imc: Unable to stop the counters for core %d\n", core_id); + return; + } + } else if (ref->refc < 0) { + ref->refc = 0; + } +- mutex_unlock(&ref->lock); ++ spin_unlock(&ref->lock); + + trace_imc_event_stop(event, flags); + } +@@ -1441,7 +1437,7 @@ static int trace_imc_event_init(struct perf_event *event) + * no other thread is running any core/thread imc + * events + */ +- mutex_lock(&imc_global_refc.lock); ++ spin_lock(&imc_global_refc.lock); + if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_TRACE) { + /* + * No core/thread imc events are running in the +@@ -1450,14 +1446,18 @@ static int trace_imc_event_init(struct perf_event *event) + imc_global_refc.id = IMC_DOMAIN_TRACE; + imc_global_refc.refc++; + } else { +- mutex_unlock(&imc_global_refc.lock); ++ spin_unlock(&imc_global_refc.lock); + return -EBUSY; + } +- mutex_unlock(&imc_global_refc.lock); ++ spin_unlock(&imc_global_refc.lock); event->hw.idx = -1; @@ -30040,6 +37885,19 @@ index e106909ff9c37..e7583fbcc8fa1 100644 event->destroy = reset_global_refc; return 0; } +@@ -1522,10 +1522,10 @@ static int init_nest_pmu_ref(void) + i = 0; + for_each_node(nid) { + /* +- * Mutex lock to avoid races while tracking the number of ++ * Take the lock to avoid races while tracking the number of + * sessions using the chip's nest pmu units. + */ +- mutex_init(&nest_imc_refc[i].lock); ++ spin_lock_init(&nest_imc_refc[i].lock); + + /* + * Loop to init the "id" with the node_id. Variable "i" initialized to diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index f92bf5f6b74f1..027a2add780e8 100644 --- a/arch/powerpc/perf/isa207-common.c @@ -30240,6 +38098,37 @@ index ff3382140d7e6..cbdd074ee2a70 100644 }; static int power9_get_alternatives(u64 event, unsigned int flags, u64 alt[]) +diff --git a/arch/powerpc/perf/req-gen/perf.h b/arch/powerpc/perf/req-gen/perf.h +index fa9bc804e67af..6b2a59fefffa7 100644 +--- a/arch/powerpc/perf/req-gen/perf.h ++++ b/arch/powerpc/perf/req-gen/perf.h +@@ -139,6 +139,26 @@ PMU_EVENT_ATTR_STRING( \ + #define REQUEST_(r_name, r_value, r_idx_1, r_fields) \ + r_fields + ++/* Generate event list for platforms with counter_info_version 0x6 or below */ ++static __maybe_unused struct attribute *hv_gpci_event_attrs_v6[] = { ++#include REQUEST_FILE ++ NULL ++}; ++ ++/* ++ * Based on getPerfCountInfo v1.018 documentation, some of the hv-gpci ++ * events were deprecated for platform firmware that supports ++ * counter_info_version 0x8 or above. ++ * Those deprecated events are still part of platform firmware that ++ * support counter_info_version 0x6 and below. As per the getPerfCountInfo ++ * v1.018 documentation there is no counter_info_version 0x7. ++ * Undefining macro ENABLE_EVENTS_COUNTERINFO_V6, to disable the addition of ++ * deprecated events in "hv_gpci_event_attrs" attribute group, for platforms ++ * that supports counter_info_version 0x8 or above. ++ */ ++#undef ENABLE_EVENTS_COUNTERINFO_V6 ++ ++/* Generate event list for platforms with counter_info_version 0x8 or above*/ + static __maybe_unused struct attribute *hv_gpci_event_attrs[] = { + #include REQUEST_FILE + NULL diff --git a/arch/powerpc/platforms/44x/fsp2.c b/arch/powerpc/platforms/44x/fsp2.c index b299e43f5ef94..823397c802def 100644 --- a/arch/powerpc/platforms/44x/fsp2.c @@ -30272,6 +38161,31 @@ index ae8b812c92029..2481e78c04234 100644 + return 1; } __setup("powersave=off", cpm_powersave_off); +diff --git a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c +index b91ebebd9ff20..e0049b7df2125 100644 +--- a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c ++++ b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c +@@ -530,6 +530,7 @@ static int mpc52xx_lpbfifo_probe(struct platform_device *op) + err_bcom_rx_irq: + bcom_gen_bd_rx_release(lpbfifo.bcom_rx_task); + err_bcom_rx: ++ free_irq(lpbfifo.irq, &lpbfifo); + err_irq: + iounmap(lpbfifo.regs); + lpbfifo.regs = NULL; +diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c +index b6133a237a709..6e18d07035680 100644 +--- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c ++++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c +@@ -106,7 +106,7 @@ static int __init of_fsl_spi_probe(char *type, char *compatible, u32 sysclk, + + goto next; + unreg: +- platform_device_del(pdev); ++ platform_device_put(pdev); + err: + pr_err("%pOF: registration failed\n", np); + next: diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile index 60e4e97a929db..260fbad7967b2 100644 --- a/arch/powerpc/platforms/85xx/Makefile @@ -31341,6 +39255,31 @@ index 8bb08e395de05..08d9d3d5a22b0 100644 int rx_fifo_size; int wcreds_max; int rsvd_txbuf_count; +diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c +index 09fafcf2d3a06..f51fd4ac3f0b6 100644 +--- a/arch/powerpc/platforms/pseries/eeh_pseries.c ++++ b/arch/powerpc/platforms/pseries/eeh_pseries.c +@@ -845,18 +845,8 @@ static int __init eeh_pseries_init(void) + return -EINVAL; + } + +- /* Initialize error log lock and size */ +- spin_lock_init(&slot_errbuf_lock); +- eeh_error_buf_size = rtas_token("rtas-error-log-max"); +- if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) { +- pr_info("%s: unknown EEH error log size\n", +- __func__); +- eeh_error_buf_size = 1024; +- } else if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) { +- pr_info("%s: EEH error log size %d exceeds the maximal %d\n", +- __func__, eeh_error_buf_size, RTAS_ERROR_LOG_MAX); +- eeh_error_buf_size = RTAS_ERROR_LOG_MAX; +- } ++ /* Initialize error log size */ ++ eeh_error_buf_size = rtas_get_error_log_max(); + + /* Set EEH probe mode */ + eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG); diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index a52af8fbf5711..8322ca86d5acf 100644 --- a/arch/powerpc/platforms/pseries/iommu.c @@ -31768,7 +39707,7 @@ index c5d75c02ad8b5..7b69299c29123 100644 return; irq_set_default_host(xive_irq_domain); diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c -index f143b6f111ac0..2bf78a30238b9 100644 +index f143b6f111ac0..43bd2579d942b 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -13,6 +13,7 @@ @@ -31806,7 +39745,15 @@ index f143b6f111ac0..2bf78a30238b9 100644 static int __xive_irq_bitmap_alloc(struct xive_irq_bitmap *xibm) { int irq; -@@ -653,6 +665,9 @@ static int xive_spapr_debug_show(struct seq_file *m, void *private) +@@ -425,6 +437,7 @@ static int xive_spapr_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) + + data->trig_mmio = ioremap(data->trig_page, 1u << data->esb_shift); + if (!data->trig_mmio) { ++ iounmap(data->eoi_mmio); + pr_err("Failed to map trigger page for irq 0x%x\n", hw_irq); + return -ENOMEM; + } +@@ -653,6 +666,9 @@ static int xive_spapr_debug_show(struct seq_file *m, void *private) struct xive_irq_bitmap *xibm; char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL); @@ -31816,7 +39763,7 @@ index f143b6f111ac0..2bf78a30238b9 100644 list_for_each_entry(xibm, &xive_irq_bitmaps, list) { memset(buf, 0, PAGE_SIZE); bitmap_print_to_pagebuf(true, buf, xibm->bitmap, xibm->count); -@@ -701,6 +716,7 @@ static bool xive_get_max_prio(u8 *max_prio) +@@ -701,6 +717,7 @@ static bool xive_get_max_prio(u8 *max_prio) } reg = of_get_property(rootdn, "ibm,plat-res-int-priorities", &len); @@ -31824,7 +39771,7 @@ index f143b6f111ac0..2bf78a30238b9 100644 if (!reg) { pr_err("Failed to read 'ibm,plat-res-int-priorities' property\n"); return false; -@@ -800,7 +816,7 @@ bool __init xive_spapr_init(void) +@@ -800,7 +817,7 @@ bool __init xive_spapr_init(void) u32 val; u32 len; const __be32 *reg; @@ -31833,7 +39780,7 @@ index f143b6f111ac0..2bf78a30238b9 100644 if (xive_spapr_disabled()) return false; -@@ -816,32 +832,35 @@ bool __init xive_spapr_init(void) +@@ -816,32 +833,35 @@ bool __init xive_spapr_init(void) /* Resource 1 is the OS ring TIMA */ if (of_address_to_resource(np, 1, &r)) { pr_err("Failed to get thread mgmnt area resource\n"); @@ -31877,7 +39824,7 @@ index f143b6f111ac0..2bf78a30238b9 100644 /* Iterate the EQ sizes and pick one */ of_property_for_each_u32(np, "ibm,xive-eq-sizes", prop, reg, val) { -@@ -852,10 +871,19 @@ bool __init xive_spapr_init(void) +@@ -852,10 +872,19 @@ bool __init xive_spapr_init(void) /* Initialize XIVE core with our backend */ if (!xive_core_init(np, &xive_spapr_ops, tima, TM_QW1_OS, max_prio)) @@ -31921,10 +39868,31 @@ index 014e00e74d2b6..63792af004170 100755 - echo "WARNING: You need at least binutils >= 2.19 to build a CONFIG_RELOCATABLE kernel" -fi diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c -index dd8241c009e53..8b28ff9d98d16 100644 +index dd8241c009e53..3c085e1e5232e 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c -@@ -3264,8 +3264,7 @@ static void show_task(struct task_struct *volatile tsk) +@@ -1528,9 +1528,9 @@ bpt_cmds(void) + cmd = inchar(); + + switch (cmd) { +- static const char badaddr[] = "Only kernel addresses are permitted for breakpoints\n"; +- int mode; +- case 'd': /* bd - hardware data breakpoint */ ++ case 'd': { /* bd - hardware data breakpoint */ ++ static const char badaddr[] = "Only kernel addresses are permitted for breakpoints\n"; ++ int mode; + if (xmon_is_ro) { + printf(xmon_ro_msg); + break; +@@ -1563,6 +1563,7 @@ bpt_cmds(void) + + force_enable_xmon(); + break; ++ } + + case 'i': /* bi - hardware instr breakpoint */ + if (xmon_is_ro) { +@@ -3264,8 +3265,7 @@ static void show_task(struct task_struct *volatile tsk) * appropriate for calling from xmon. This could be moved * to a common, generic, routine used by both. */ @@ -32013,7 +39981,7 @@ index 30676ebb16ebd..46a534f047931 100644 This enables support for SiFive SoC platform hardware. diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile -index 0eb4568fbd290..dc77857ca27db 100644 +index 0eb4568fbd290..337a686f941b5 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -39,6 +39,7 @@ else @@ -32044,15 +40012,19 @@ index 0eb4568fbd290..dc77857ca27db 100644 KBUILD_CFLAGS += -march=$(subst fd,,$(riscv-march-y)) KBUILD_AFLAGS += -march=$(riscv-march-y) -@@ -69,6 +77,7 @@ ifeq ($(CONFIG_PERF_EVENTS),y) +@@ -68,7 +76,11 @@ ifeq ($(CONFIG_PERF_EVENTS),y) + KBUILD_CFLAGS += -fno-omit-frame-pointer endif ++# Avoid generating .eh_frame sections. ++KBUILD_CFLAGS += -fno-asynchronous-unwind-tables -fno-unwind-tables ++ KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax) +KBUILD_AFLAGS_MODULE += $(call as-option,-Wa$(comma)-mno-relax) # GCC versions that support the "-mstrict-align" option default to allowing # unaligned accesses. While unaligned accesses are explicitly allowed in the -@@ -108,11 +117,13 @@ PHONY += vdso_install +@@ -108,11 +120,13 @@ PHONY += vdso_install vdso_install: $(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@ @@ -32257,7 +40229,7 @@ index 7db8610534834..64c06c9b41dc8 100644 reg = <0x0 0x3000000 0x0 0x8000>; interrupt-parent = <&plic0>; diff --git a/arch/riscv/boot/dts/sifive/fu740-c000.dtsi b/arch/riscv/boot/dts/sifive/fu740-c000.dtsi -index abbb960f90a00..454079a69ab44 100644 +index abbb960f90a00..f72bb158a7ab3 100644 --- a/arch/riscv/boot/dts/sifive/fu740-c000.dtsi +++ b/arch/riscv/boot/dts/sifive/fu740-c000.dtsi @@ -134,6 +134,30 @@ @@ -32291,11 +40263,72 @@ index abbb960f90a00..454079a69ab44 100644 }; soc { #address-cells = <2>; +@@ -304,7 +328,7 @@ + bus-range = <0x0 0xff>; + ranges = <0x81000000 0x0 0x60080000 0x0 0x60080000 0x0 0x10000>, /* I/O */ + <0x82000000 0x0 0x60090000 0x0 0x60090000 0x0 0xff70000>, /* mem */ +- <0x82000000 0x0 0x70000000 0x0 0x70000000 0x0 0x1000000>, /* mem */ ++ <0x82000000 0x0 0x70000000 0x0 0x70000000 0x0 0x10000000>, /* mem */ + <0xc3000000 0x20 0x00000000 0x20 0x00000000 0x20 0x00000000>; /* mem prefetchable */ + num-lanes = <0x8>; + interrupts = <56>, <57>, <58>, <59>, <60>, <61>, <62>, <63>, <64>; diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts -index 60846e88ae4b1..22f971e971614 100644 +index 60846e88ae4b1..2f4d677c9c4ff 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts -@@ -80,6 +80,7 @@ +@@ -3,6 +3,8 @@ + + #include "fu540-c000.dtsi" + #include <dt-bindings/gpio/gpio.h> ++#include <dt-bindings/leds/common.h> ++#include <dt-bindings/pwm/pwm.h> + + /* Clock frequency (in Hz) of the PCB crystal for rtcclk */ + #define RTCCLK_FREQ 1000000 +@@ -46,6 +48,42 @@ + compatible = "gpio-restart"; + gpios = <&gpio 10 GPIO_ACTIVE_LOW>; + }; ++ ++ led-controller { ++ compatible = "pwm-leds"; ++ ++ led-d1 { ++ pwms = <&pwm0 0 7812500 PWM_POLARITY_INVERTED>; ++ active-low; ++ color = <LED_COLOR_ID_GREEN>; ++ max-brightness = <255>; ++ label = "d1"; ++ }; ++ ++ led-d2 { ++ pwms = <&pwm0 1 7812500 PWM_POLARITY_INVERTED>; ++ active-low; ++ color = <LED_COLOR_ID_GREEN>; ++ max-brightness = <255>; ++ label = "d2"; ++ }; ++ ++ led-d3 { ++ pwms = <&pwm0 2 7812500 PWM_POLARITY_INVERTED>; ++ active-low; ++ color = <LED_COLOR_ID_GREEN>; ++ max-brightness = <255>; ++ label = "d3"; ++ }; ++ ++ led-d4 { ++ pwms = <&pwm0 3 7812500 PWM_POLARITY_INVERTED>; ++ active-low; ++ color = <LED_COLOR_ID_GREEN>; ++ max-brightness = <255>; ++ label = "d4"; ++ }; ++ }; + }; + + &uart0 { +@@ -80,6 +118,7 @@ spi-max-frequency = <20000000>; voltage-ranges = <3300 3300>; disable-wp; @@ -32422,11 +40455,27 @@ index 67406c3763890..0377ce0fcc726 100644 ".endif\n" #define __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, enable) \ +diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h +index 618d7c5af1a2d..e15a1c9f1cf88 100644 +--- a/arch/riscv/include/asm/asm.h ++++ b/arch/riscv/include/asm/asm.h +@@ -23,6 +23,7 @@ + #define REG_L __REG_SEL(ld, lw) + #define REG_S __REG_SEL(sd, sw) + #define REG_SC __REG_SEL(sc.d, sc.w) ++#define REG_AMOSWAP_AQ __REG_SEL(amoswap.d.aq, amoswap.w.aq) + #define REG_ASM __REG_SEL(.dword, .word) + #define SZREG __REG_SEL(8, 4) + #define LGREG __REG_SEL(3, 2) diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h -index 49b398fe99f1b..cc4f6787f9371 100644 +index 49b398fe99f1b..1bb8662875dda 100644 --- a/arch/riscv/include/asm/efi.h +++ b/arch/riscv/include/asm/efi.h -@@ -13,7 +13,6 @@ +@@ -10,10 +10,10 @@ + #include <asm/mmu_context.h> + #include <asm/ptrace.h> + #include <asm/tlbflush.h> ++#include <asm/pgalloc.h> #ifdef CONFIG_EFI extern void efi_init(void); @@ -32434,6 +40483,33 @@ index 49b398fe99f1b..cc4f6787f9371 100644 #else #define efi_init() #endif +@@ -21,7 +21,10 @@ extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt); + int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); + int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); + +-#define arch_efi_call_virt_setup() efi_virtmap_load() ++#define arch_efi_call_virt_setup() ({ \ ++ sync_kernel_mappings(efi_mm.pgd); \ ++ efi_virtmap_load(); \ ++ }) + #define arch_efi_call_virt_teardown() efi_virtmap_unload() + + #define arch_efi_call_virt(p, f, args...) p->f(args) +diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h +index a5c2ca1d1cd8b..ec19d6afc8965 100644 +--- a/arch/riscv/include/asm/hugetlb.h ++++ b/arch/riscv/include/asm/hugetlb.h +@@ -5,4 +5,10 @@ + #include <asm-generic/hugetlb.h> + #include <asm/page.h> + ++static inline void arch_clear_hugepage_flags(struct page *page) ++{ ++ clear_bit(PG_dcache_clean, &page->flags); ++} ++#define arch_clear_hugepage_flags arch_clear_hugepage_flags ++ + #endif /* _ASM_RISCV_HUGETLB_H */ diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h index 69605a4742706..92080a2279372 100644 --- a/arch/riscv/include/asm/io.h @@ -32492,6 +40568,19 @@ index d6c277992f76a..b53891964ae03 100644 } extern void arch_irq_work_raise(void); #endif /* _ASM_RISCV_IRQ_WORK_H */ +diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h +index 0099dc1161683..5ff1f19fd45c2 100644 +--- a/arch/riscv/include/asm/mmu.h ++++ b/arch/riscv/include/asm/mmu.h +@@ -19,6 +19,8 @@ typedef struct { + #ifdef CONFIG_SMP + /* A local icache flush is needed before user execution can resume. */ + cpumask_t icache_stale_mask; ++ /* A local tlb flush is needed before user execution can resume. */ ++ cpumask_t tlb_stale_mask; + #endif + } mm_context_t; + diff --git a/arch/riscv/include/asm/module.lds.h b/arch/riscv/include/asm/module.lds.h index 4254ff2ff0494..1075beae1ac64 100644 --- a/arch/riscv/include/asm/module.lds.h @@ -32508,6 +40597,48 @@ index 4254ff2ff0494..1075beae1ac64 100644 + .got.plt : { BYTE(0) } } #endif +diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h +index 0af6933a7100d..98e0403324823 100644 +--- a/arch/riscv/include/asm/pgalloc.h ++++ b/arch/riscv/include/asm/pgalloc.h +@@ -38,6 +38,13 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) + } + #endif /* __PAGETABLE_PMD_FOLDED */ + ++static inline void sync_kernel_mappings(pgd_t *pgd) ++{ ++ memcpy(pgd + USER_PTRS_PER_PGD, ++ init_mm.pgd + USER_PTRS_PER_PGD, ++ (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); ++} ++ + static inline pgd_t *pgd_alloc(struct mm_struct *mm) + { + pgd_t *pgd; +@@ -46,9 +53,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) + if (likely(pgd != NULL)) { + memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); + /* Copy kernel mappings */ +- memcpy(pgd + USER_PTRS_PER_PGD, +- init_mm.pgd + USER_PTRS_PER_PGD, +- (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); ++ sync_kernel_mappings(pgd); + } + return pgd; + } +diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h +index 39b550310ec64..799c16e065253 100644 +--- a/arch/riscv/include/asm/pgtable.h ++++ b/arch/riscv/include/asm/pgtable.h +@@ -386,7 +386,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, + * Relying on flush_tlb_fix_spurious_fault would suffice, but + * the extra traps reduce performance. So, eagerly SFENCE.VMA. + */ +- local_flush_tlb_page(address); ++ flush_tlb_page(vma, address); + } + + static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h index a7d2811f35365..62d0e6e61da83 100644 --- a/arch/riscv/include/asm/smp.h @@ -32589,6 +40720,48 @@ index 507cae273bc62..d6a7428f6248d 100644 return get_cycles(); } #define random_get_entropy() random_get_entropy() +diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h +index 801019381dea3..907b9efd39a87 100644 +--- a/arch/riscv/include/asm/tlbflush.h ++++ b/arch/riscv/include/asm/tlbflush.h +@@ -22,6 +22,24 @@ static inline void local_flush_tlb_page(unsigned long addr) + { + ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory")); + } ++ ++static inline void local_flush_tlb_all_asid(unsigned long asid) ++{ ++ __asm__ __volatile__ ("sfence.vma x0, %0" ++ : ++ : "r" (asid) ++ : "memory"); ++} ++ ++static inline void local_flush_tlb_page_asid(unsigned long addr, ++ unsigned long asid) ++{ ++ __asm__ __volatile__ ("sfence.vma %0, %1" ++ : ++ : "r" (addr), "r" (asid) ++ : "memory"); ++} ++ + #else /* CONFIG_MMU */ + #define local_flush_tlb_all() do { } while (0) + #define local_flush_tlb_page(addr) do { } while (0) +diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h +index f314ff44c48d1..d4d628af21a45 100644 +--- a/arch/riscv/include/asm/uaccess.h ++++ b/arch/riscv/include/asm/uaccess.h +@@ -216,7 +216,7 @@ do { \ + might_fault(); \ + access_ok(__p, sizeof(*__p)) ? \ + __get_user((x), __p) : \ +- ((x) = 0, -EFAULT); \ ++ ((x) = (__force __typeof__(x))0, -EFAULT); \ + }) + + #define __put_user_asm(insn, x, ptr, err) \ diff --git a/arch/riscv/include/asm/unistd.h b/arch/riscv/include/asm/unistd.h index 6c316093a1e59..977ee6181dabf 100644 --- a/arch/riscv/include/asm/unistd.h @@ -32673,7 +40846,7 @@ index 0241592982314..1aa540350abd3 100644 } if (md->attribute & EFI_MEMORY_XP) { diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S -index 98f502654edd3..7e52ad5d61adb 100644 +index 98f502654edd3..5ca2860cc06cd 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -108,7 +108,7 @@ _save_context: @@ -32716,6 +40889,26 @@ index 98f502654edd3..7e52ad5d61adb 100644 2: #endif REG_L a0, PT_STATUS(sp) +@@ -387,6 +387,19 @@ handle_syscall_trace_exit: + + #ifdef CONFIG_VMAP_STACK + handle_kernel_stack_overflow: ++ /* ++ * Takes the psuedo-spinlock for the shadow stack, in case multiple ++ * harts are concurrently overflowing their kernel stacks. We could ++ * store any value here, but since we're overflowing the kernel stack ++ * already we only have SP to use as a scratch register. So we just ++ * swap in the address of the spinlock, as that's definately non-zero. ++ * ++ * Pairs with a store_release in handle_bad_stack(). ++ */ ++1: la sp, spin_shadow_stack ++ REG_AMOSWAP_AQ sp, sp, (sp) ++ bnez sp, 1b ++ + la sp, shadow_stack + addi sp, sp, SHADOW_OVERFLOW_STACK_SIZE + diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 52c5ff9804c55..4c3c7592b6fc8 100644 --- a/arch/riscv/kernel/head.S @@ -32776,10 +40969,19 @@ index a80b52a74f58c..059c5e216ae75 100644 .section ".rodata" diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c -index e6eca271a4d60..ee79e6839b863 100644 +index e6eca271a4d60..db41c676e5a26 100644 --- a/arch/riscv/kernel/machine_kexec.c +++ b/arch/riscv/kernel/machine_kexec.c -@@ -65,7 +65,9 @@ machine_kexec_prepare(struct kimage *image) +@@ -15,6 +15,8 @@ + #include <linux/compiler.h> /* For unreachable() */ + #include <linux/cpu.h> /* For cpu_down() */ + #include <linux/reboot.h> ++#include <linux/interrupt.h> ++#include <linux/irq.h> + + /* + * kexec_image_info - Print received image details +@@ -65,7 +67,9 @@ machine_kexec_prepare(struct kimage *image) if (image->segment[i].memsz <= sizeof(fdt)) continue; @@ -32790,7 +40992,7 @@ index e6eca271a4d60..ee79e6839b863 100644 continue; if (fdt_check_header(&fdt)) -@@ -136,19 +138,37 @@ void machine_shutdown(void) +@@ -136,19 +140,70 @@ void machine_shutdown(void) #endif } @@ -32809,6 +41011,37 @@ index e6eca271a4d60..ee79e6839b863 100644 + smp_send_stop(); + cpus_stopped = 1; +} ++ ++static void machine_kexec_mask_interrupts(void) ++{ ++ unsigned int i; ++ struct irq_desc *desc; ++ ++ for_each_irq_desc(i, desc) { ++ struct irq_chip *chip; ++ int ret; ++ ++ chip = irq_desc_get_chip(desc); ++ if (!chip) ++ continue; ++ ++ /* ++ * First try to remove the active state. If this ++ * fails, try to EOI the interrupt. ++ */ ++ ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false); ++ ++ if (ret && irqd_irq_inprogress(&desc->irq_data) && ++ chip->irq_eoi) ++ chip->irq_eoi(&desc->irq_data); ++ ++ if (chip->irq_mask) ++ chip->irq_mask(&desc->irq_data); ++ ++ if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) ++ chip->irq_disable(&desc->irq_data); ++ } ++} + /* * machine_crash_shutdown - Prepare to kexec after a kernel crash @@ -32829,10 +41062,12 @@ index e6eca271a4d60..ee79e6839b863 100644 + crash_save_cpu(regs, smp_processor_id()); - machine_shutdown(); ++ machine_kexec_mask_interrupts(); ++ pr_info("Starting crashdump kernel...\n"); } -@@ -169,7 +189,8 @@ machine_kexec(struct kimage *image) +@@ -169,7 +224,8 @@ machine_kexec(struct kimage *image) struct kimage_arch *internal = &image->arch; unsigned long jump_addr = (unsigned long) image->start; unsigned long first_ind_entry = (unsigned long) &image->head; @@ -32959,6 +41194,103 @@ index 0bb1854dce833..357f985041cb9 100644 pr_warn("RISC-V does not support perf in guest mode!"); return; } +diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c +index 00088dc6da4b6..7548b1d62509c 100644 +--- a/arch/riscv/kernel/probes/kprobes.c ++++ b/arch/riscv/kernel/probes/kprobes.c +@@ -1,5 +1,7 @@ + // SPDX-License-Identifier: GPL-2.0+ + ++#define pr_fmt(fmt) "kprobes: " fmt ++ + #include <linux/kprobes.h> + #include <linux/extable.h> + #include <linux/slab.h> +@@ -46,18 +48,35 @@ static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs) + post_kprobe_handler(p, kcb, regs); + } + +-int __kprobes arch_prepare_kprobe(struct kprobe *p) ++static bool __kprobes arch_check_kprobe(struct kprobe *p) + { +- unsigned long probe_addr = (unsigned long)p->addr; ++ unsigned long tmp = (unsigned long)p->addr - p->offset; ++ unsigned long addr = (unsigned long)p->addr; + +- if (probe_addr & 0x1) { +- pr_warn("Address not aligned.\n"); ++ while (tmp <= addr) { ++ if (tmp == addr) ++ return true; + +- return -EINVAL; ++ tmp += GET_INSN_LENGTH(*(u16 *)tmp); + } + ++ return false; ++} ++ ++int __kprobes arch_prepare_kprobe(struct kprobe *p) ++{ ++ u16 *insn = (u16 *)p->addr; ++ ++ if ((unsigned long)insn & 0x1) ++ return -EILSEQ; ++ ++ if (!arch_check_kprobe(p)) ++ return -EILSEQ; ++ + /* copy instruction */ +- p->opcode = *p->addr; ++ p->opcode = (kprobe_opcode_t)(*insn++); ++ if (GET_INSN_LENGTH(p->opcode) == 4) ++ p->opcode |= (kprobe_opcode_t)(*insn) << 16; + + /* decode instruction */ + switch (riscv_probe_decode_insn(p->addr, &p->ainsn.api)) { +@@ -191,7 +210,7 @@ static int __kprobes reenter_kprobe(struct kprobe *p, + break; + case KPROBE_HIT_SS: + case KPROBE_REENTER: +- pr_warn("Unrecoverable kprobe detected.\n"); ++ pr_warn("Failed to recover from reentered kprobes.\n"); + dump_kprobe(p); + BUG(); + break; +diff --git a/arch/riscv/kernel/probes/simulate-insn.c b/arch/riscv/kernel/probes/simulate-insn.c +index d73e96f6ed7c5..a20568bd1f1a8 100644 +--- a/arch/riscv/kernel/probes/simulate-insn.c ++++ b/arch/riscv/kernel/probes/simulate-insn.c +@@ -71,11 +71,11 @@ bool __kprobes simulate_jalr(u32 opcode, unsigned long addr, struct pt_regs *reg + u32 rd_index = (opcode >> 7) & 0x1f; + u32 rs1_index = (opcode >> 15) & 0x1f; + +- ret = rv_insn_reg_set_val(regs, rd_index, addr + 4); ++ ret = rv_insn_reg_get_val(regs, rs1_index, &base_addr); + if (!ret) + return ret; + +- ret = rv_insn_reg_get_val(regs, rs1_index, &base_addr); ++ ret = rv_insn_reg_set_val(regs, rd_index, addr + 4); + if (!ret) + return ret; + +diff --git a/arch/riscv/kernel/probes/simulate-insn.h b/arch/riscv/kernel/probes/simulate-insn.h +index cb6ff7dccb92e..de8474146a9b6 100644 +--- a/arch/riscv/kernel/probes/simulate-insn.h ++++ b/arch/riscv/kernel/probes/simulate-insn.h +@@ -31,9 +31,9 @@ __RISCV_INSN_FUNCS(fence, 0x7f, 0x0f); + } while (0) + + __RISCV_INSN_FUNCS(c_j, 0xe003, 0xa001); +-__RISCV_INSN_FUNCS(c_jr, 0xf007, 0x8002); ++__RISCV_INSN_FUNCS(c_jr, 0xf07f, 0x8002); + __RISCV_INSN_FUNCS(c_jal, 0xe003, 0x2001); +-__RISCV_INSN_FUNCS(c_jalr, 0xf007, 0x9002); ++__RISCV_INSN_FUNCS(c_jalr, 0xf07f, 0x9002); + __RISCV_INSN_FUNCS(c_beqz, 0xe003, 0xc001); + __RISCV_INSN_FUNCS(c_bnez, 0xe003, 0xe001); + __RISCV_INSN_FUNCS(c_ebreak, 0xffff, 0x9002); diff --git a/arch/riscv/kernel/probes/uprobes.c b/arch/riscv/kernel/probes/uprobes.c index 7a057b5f0adc7..c976a21cd4bd5 100644 --- a/arch/riscv/kernel/probes/uprobes.c @@ -32990,6 +41322,19 @@ index 7a057b5f0adc7..c976a21cd4bd5 100644 } bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx, +diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c +index 03ac3aa611f59..bda3bc2947186 100644 +--- a/arch/riscv/kernel/process.c ++++ b/arch/riscv/kernel/process.c +@@ -124,6 +124,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg, + { + struct pt_regs *childregs = task_pt_regs(p); + ++ memset(&p->thread.s, 0, sizeof(p->thread.s)); ++ + /* p->thread holds context to be restored by __switch_to() */ + if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { + /* Kernel thread */ diff --git a/arch/riscv/kernel/reset.c b/arch/riscv/kernel/reset.c index ee5878d968cc1..9c842c41684ac 100644 --- a/arch/riscv/kernel/reset.c @@ -33023,7 +41368,7 @@ index ee5878d968cc1..9c842c41684ac 100644 + default_power_off(); } diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c -index b9620e5f00baf..7bdbf3f608a4f 100644 +index b9620e5f00baf..14b84d09354aa 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -59,6 +59,16 @@ atomic_t hart_lottery __section(".sdata") @@ -33074,6 +41419,30 @@ index b9620e5f00baf..7bdbf3f608a4f 100644 #ifdef CONFIG_CMDLINE_FORCE strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); pr_info("Forcing kernel command line to: %s\n", boot_command_line); +@@ -281,6 +291,7 @@ void __init setup_arch(char **cmdline_p) + else + pr_err("No DTB found in kernel mappings\n"); + #endif ++ early_init_fdt_scan_reserved_mem(); + misc_mem_init(); + + init_resources(); +@@ -320,10 +331,11 @@ subsys_initcall(topology_init); + + void free_initmem(void) + { +- if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) +- set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end), +- IS_ENABLED(CONFIG_64BIT) ? +- set_memory_rw : set_memory_rw_nx); ++ if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) { ++ set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end), set_memory_rw_nx); ++ if (IS_ENABLED(CONFIG_64BIT)) ++ set_kernel_memory(__init_begin, __init_end, set_memory_nx); ++ } + + free_initmem_default(POISON_FREE_INITMEM); + } diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index c2d5ecbe55264..f8fb85dc94b7a 100644 --- a/arch/riscv/kernel/signal.c @@ -33132,10 +41501,10 @@ index bd82375db51a6..0f323e935dd89 100644 /* diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c -index 315db3d0229bf..c2601150b91c8 100644 +index 315db3d0229bf..ee8ef91c8aaf4 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c -@@ -22,15 +22,16 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, +@@ -22,15 +22,17 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, bool (*fn)(void *, unsigned long), void *arg) { unsigned long fp, sp, pc; @@ -33152,18 +41521,28 @@ index 315db3d0229bf..c2601150b91c8 100644 + fp = (unsigned long)__builtin_frame_address(0); + sp = sp_in_global; + pc = (unsigned long)walk_stackframe; ++ level = -1; } else { /* task blocked in __switch_to */ fp = task->thread.s[0]; -@@ -42,7 +43,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, +@@ -42,7 +44,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, unsigned long low, high; struct stackframe *frame; - if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc))) -+ if (unlikely(!__kernel_text_address(pc) || (level++ >= 1 && !fn(arg, pc)))) ++ if (unlikely(!__kernel_text_address(pc) || (level++ >= 0 && !fn(arg, pc)))) break; /* Validate frame pointer */ +@@ -59,7 +61,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, + } else { + fp = frame->fp; + pc = ftrace_graph_ret_addr(current, NULL, frame->ra, +- (unsigned long *)(fp - 8)); ++ &frame->ra); + } + + } diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index 12f8a7fce78b1..bb402685057a2 100644 --- a/arch/riscv/kernel/sys_riscv.c @@ -33230,7 +41609,7 @@ index 0000000000000..99fe67377e5ed + +#endif /* __TRACE_IRQ_H */ diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c -index 0daaa3e4630d4..8c58aa5d2b369 100644 +index 0daaa3e4630d4..6084bd93d2f58 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -16,12 +16,14 @@ @@ -33259,6 +41638,95 @@ index 0daaa3e4630d4..8c58aa5d2b369 100644 bust_spinlocks(0); add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); spin_unlock_irq(&die_lock); +@@ -54,7 +59,7 @@ void die(struct pt_regs *regs, const char *str) + if (panic_on_oops) + panic("Fatal exception"); + if (ret != NOTIFY_STOP) +- do_exit(SIGSEGV); ++ make_task_dead(SIGSEGV); + } + + void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr) +@@ -206,18 +211,36 @@ static DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], + * shadow stack, handled_ kernel_ stack_ overflow(in kernel/entry.S) is used + * to get per-cpu overflow stack(get_overflow_stack). + */ +-long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE/sizeof(long)]; ++long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE/sizeof(long)] __aligned(16); + asmlinkage unsigned long get_overflow_stack(void) + { + return (unsigned long)this_cpu_ptr(overflow_stack) + + OVERFLOW_STACK_SIZE; + } + ++/* ++ * A pseudo spinlock to protect the shadow stack from being used by multiple ++ * harts concurrently. This isn't a real spinlock because the lock side must ++ * be taken without a valid stack and only a single register, it's only taken ++ * while in the process of panicing anyway so the performance and error ++ * checking a proper spinlock gives us doesn't matter. ++ */ ++unsigned long spin_shadow_stack; ++ + asmlinkage void handle_bad_stack(struct pt_regs *regs) + { + unsigned long tsk_stk = (unsigned long)current->stack; + unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack); + ++ /* ++ * We're done with the shadow stack by this point, as we're on the ++ * overflow stack. Tell any other concurrent overflowing harts that ++ * they can proceed with panicing by releasing the pseudo-spinlock. ++ * ++ * This pairs with an amoswap.aq in handle_kernel_stack_overflow. ++ */ ++ smp_store_release(&spin_shadow_stack, 0); ++ + console_verbose(); + + pr_emerg("Insufficient stack space to handle exception!\n"); +diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile +index f2e065671e4d5..06e6b27f3bcc9 100644 +--- a/arch/riscv/kernel/vdso/Makefile ++++ b/arch/riscv/kernel/vdso/Makefile +@@ -17,6 +17,7 @@ vdso-syms += flush_icache + obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o + + ccflags-y := -fno-stack-protector ++ccflags-y += -DDISABLE_BRANCH_PROFILING + + ifneq ($(c-gettimeofday-y),) + CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y) +@@ -28,9 +29,12 @@ obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) + + obj-y += vdso.o + CPPFLAGS_vdso.lds += -P -C -U$(ARCH) ++ifneq ($(filter vgettimeofday, $(vdso-syms)),) ++CPPFLAGS_vdso.lds += -DHAS_VGETTIMEOFDAY ++endif + + # Disable -pg to prevent insert call site +-CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os ++CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) + + # Disable profiling and instrumentation for VDSO code + GCOV_PROFILE := n +diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S +index e9111f700af08..3729cb28aac8d 100644 +--- a/arch/riscv/kernel/vdso/vdso.lds.S ++++ b/arch/riscv/kernel/vdso/vdso.lds.S +@@ -65,9 +65,11 @@ VERSION + LINUX_4.15 { + global: + __vdso_rt_sigreturn; ++#ifdef HAS_VGETTIMEOFDAY + __vdso_gettimeofday; + __vdso_clock_gettime; + __vdso_clock_getres; ++#endif + __vdso_getcpu; + __vdso_flush_icache; + local: *; diff --git a/arch/riscv/lib/memmove.S b/arch/riscv/lib/memmove.S index 07d1d2152ba5c..e0609e1f0864d 100644 --- a/arch/riscv/lib/memmove.S @@ -33695,10 +42163,55 @@ index 7ebaef10ea1b6..ac7a25298a04a 100644 endif obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o +diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c +index 89f81067e09ed..2ae1201cff886 100644 +--- a/arch/riscv/mm/cacheflush.c ++++ b/arch/riscv/mm/cacheflush.c +@@ -85,7 +85,9 @@ void flush_icache_pte(pte_t pte) + { + struct page *page = pte_page(pte); + +- if (!test_and_set_bit(PG_dcache_clean, &page->flags)) ++ if (!test_bit(PG_dcache_clean, &page->flags)) { + flush_icache_all(); ++ set_bit(PG_dcache_clean, &page->flags); ++ } + } + #endif /* CONFIG_MMU */ +diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c +index ee3459cb6750b..cc4a47bda82a0 100644 +--- a/arch/riscv/mm/context.c ++++ b/arch/riscv/mm/context.c +@@ -196,6 +196,16 @@ switch_mm_fast: + + if (need_flush_tlb) + local_flush_tlb_all(); ++#ifdef CONFIG_SMP ++ else { ++ cpumask_t *mask = &mm->context.tlb_stale_mask; ++ ++ if (cpumask_test_cpu(cpu, mask)) { ++ cpumask_clear_cpu(cpu, mask); ++ local_flush_tlb_all_asid(cntx & asid_mask); ++ } ++ } ++#endif + } + + static void set_mm_noasid(struct mm_struct *mm) diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c -index aa08dd2f8faec..7cfaf366463fb 100644 +index aa08dd2f8faec..676a3f28811fa 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c +@@ -31,7 +31,7 @@ static void die_kernel_fault(const char *msg, unsigned long addr, + + bust_spinlocks(0); + die(regs, "Oops"); +- do_exit(SIGKILL); ++ make_task_dead(SIGKILL); + } + + static inline void no_context(struct pt_regs *regs, unsigned long addr) @@ -188,7 +188,8 @@ static inline bool access_error(unsigned long cause, struct vm_area_struct *vma) } break; @@ -33710,7 +42223,7 @@ index aa08dd2f8faec..7cfaf366463fb 100644 } break; diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c -index c0cddf0fc22db..a37a08ceededd 100644 +index c0cddf0fc22db..830f53b141a0c 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -100,6 +100,10 @@ static void __init print_vm_layout(void) @@ -33736,7 +42249,7 @@ index c0cddf0fc22db..a37a08ceededd 100644 /* * memblock allocator is not aware of the fact that last 4K bytes of * the addressable memory can not be mapped because of IS_ERR_VALUE -@@ -218,8 +222,25 @@ static void __init setup_bootmem(void) +@@ -218,10 +222,26 @@ static void __init setup_bootmem(void) * early_init_fdt_reserve_self() since __pa() does * not work for DTB pointers that are fixmap addresses */ @@ -33762,9 +42275,11 @@ index c0cddf0fc22db..a37a08ceededd 100644 + memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va)); + } - early_init_fdt_scan_reserved_mem(); +- early_init_fdt_scan_reserved_mem(); dma_contiguous_reserve(dma32_phys_limit); -@@ -248,6 +269,7 @@ pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); + if (IS_ENABLED(CONFIG_64BIT)) + hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); +@@ -248,6 +268,7 @@ pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); #ifdef CONFIG_XIP_KERNEL @@ -33772,7 +42287,7 @@ index c0cddf0fc22db..a37a08ceededd 100644 #define trampoline_pg_dir ((pgd_t *)XIP_FIXUP(trampoline_pg_dir)) #define fixmap_pte ((pte_t *)XIP_FIXUP(fixmap_pte)) #define early_pg_dir ((pgd_t *)XIP_FIXUP(early_pg_dir)) -@@ -451,6 +473,7 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size) +@@ -451,6 +472,7 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size) } #ifdef CONFIG_XIP_KERNEL @@ -33780,7 +42295,7 @@ index c0cddf0fc22db..a37a08ceededd 100644 /* called from head.S with MMU off */ asmlinkage void __init __copy_data(void) { -@@ -813,13 +836,22 @@ static void __init reserve_crashkernel(void) +@@ -813,13 +835,22 @@ static void __init reserve_crashkernel(void) /* * Current riscv boot protocol requires 2MB alignment for * RV64 and 4MB alignment for RV32 (hugepage size) @@ -33838,6 +42353,106 @@ index 5e49e4b4a4ccc..86c56616e5dea 100644 flush_tlb_kernel_range(start, end); +diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c +index 64f8201237c24..efefc3986c48c 100644 +--- a/arch/riscv/mm/tlbflush.c ++++ b/arch/riscv/mm/tlbflush.c +@@ -5,23 +5,7 @@ + #include <linux/sched.h> + #include <asm/sbi.h> + #include <asm/mmu_context.h> +- +-static inline void local_flush_tlb_all_asid(unsigned long asid) +-{ +- __asm__ __volatile__ ("sfence.vma x0, %0" +- : +- : "r" (asid) +- : "memory"); +-} +- +-static inline void local_flush_tlb_page_asid(unsigned long addr, +- unsigned long asid) +-{ +- __asm__ __volatile__ ("sfence.vma %0, %1" +- : +- : "r" (addr), "r" (asid) +- : "memory"); +-} ++#include <asm/tlbflush.h> + + void flush_tlb_all(void) + { +@@ -31,6 +15,7 @@ void flush_tlb_all(void) + static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, + unsigned long size, unsigned long stride) + { ++ struct cpumask *pmask = &mm->context.tlb_stale_mask; + struct cpumask *cmask = mm_cpumask(mm); + struct cpumask hmask; + unsigned int cpuid; +@@ -45,6 +30,15 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, + if (static_branch_unlikely(&use_asid_allocator)) { + unsigned long asid = atomic_long_read(&mm->context.id); + ++ /* ++ * TLB will be immediately flushed on harts concurrently ++ * executing this MM context. TLB flush on other harts ++ * is deferred until this MM context migrates there. ++ */ ++ cpumask_setall(pmask); ++ cpumask_clear_cpu(cpuid, pmask); ++ cpumask_andnot(pmask, pmask, cmask); ++ + if (broadcast) { + riscv_cpuid_to_hartid_mask(cmask, &hmask); + sbi_remote_sfence_vma_asid(cpumask_bits(&hmask), +diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c +index 3af4131c22c7a..2e3f1a626a3af 100644 +--- a/arch/riscv/net/bpf_jit_comp64.c ++++ b/arch/riscv/net/bpf_jit_comp64.c +@@ -120,6 +120,25 @@ static bool in_auipc_jalr_range(s64 val) + val < ((1L << 31) - (1L << 11)); + } + ++/* Emit fixed-length instructions for address */ ++static int emit_addr(u8 rd, u64 addr, bool extra_pass, struct rv_jit_context *ctx) ++{ ++ u64 ip = (u64)(ctx->insns + ctx->ninsns); ++ s64 off = addr - ip; ++ s64 upper = (off + (1 << 11)) >> 12; ++ s64 lower = off & 0xfff; ++ ++ if (extra_pass && !in_auipc_jalr_range(off)) { ++ pr_err("bpf-jit: target offset 0x%llx is out of range\n", off); ++ return -ERANGE; ++ } ++ ++ emit(rv_auipc(rd, upper), ctx); ++ emit(rv_addi(rd, rd, lower), ctx); ++ return 0; ++} ++ ++/* Emit variable-length instructions for 32-bit and 64-bit imm */ + static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx) + { + /* Note that the immediate from the add is sign-extended, +@@ -887,7 +906,15 @@ out_be: + u64 imm64; + + imm64 = (u64)insn1.imm << 32 | (u32)imm; +- emit_imm(rd, imm64, ctx); ++ if (bpf_pseudo_func(insn)) { ++ /* fixed-length insns for extra jit pass */ ++ ret = emit_addr(rd, imm64, extra_pass, ctx); ++ if (ret) ++ return ret; ++ } else { ++ emit_imm(rd, imm64, ctx); ++ } ++ + return 1; + } + diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index b86de61b8caa2..e402fa964f235 100644 --- a/arch/s390/Kconfig @@ -33897,6 +42512,18 @@ index 450b351dfa8ef..c7b7a60f6405d 100644 endif ifdef CONFIG_EXPOLINE +diff --git a/arch/s390/boot/compressed/decompressor.c b/arch/s390/boot/compressed/decompressor.c +index e27c2140d6206..623f6775d01d7 100644 +--- a/arch/s390/boot/compressed/decompressor.c ++++ b/arch/s390/boot/compressed/decompressor.c +@@ -80,6 +80,6 @@ void *decompress_kernel(void) + void *output = (void *)decompress_offset; + + __decompress(_compressed_start, _compressed_end - _compressed_start, +- NULL, NULL, output, 0, NULL, error); ++ NULL, NULL, output, vmlinux.image_size, NULL, error); + return output; + } diff --git a/arch/s390/boot/compressed/decompressor.h b/arch/s390/boot/compressed/decompressor.h index a59f75c5b0490..f75cc31a77dd9 100644 --- a/arch/s390/boot/compressed/decompressor.h @@ -33909,6 +42536,30 @@ index a59f75c5b0490..f75cc31a77dd9 100644 }; /* Symbols defined by linker scripts */ +diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S +index 918e05137d4c6..1686a852534fc 100644 +--- a/arch/s390/boot/compressed/vmlinux.lds.S ++++ b/arch/s390/boot/compressed/vmlinux.lds.S +@@ -93,8 +93,17 @@ SECTIONS + _compressed_start = .; + *(.vmlinux.bin.compressed) + _compressed_end = .; +- FILL(0xff); +- . = ALIGN(4096); ++ } ++ ++#define SB_TRAILER_SIZE 32 ++ /* Trailer needed for Secure Boot */ ++ . += SB_TRAILER_SIZE; /* make sure .sb.trailer does not overwrite the previous section */ ++ . = ALIGN(4096) - SB_TRAILER_SIZE; ++ .sb.trailer : { ++ QUAD(0) ++ QUAD(0) ++ QUAD(0) ++ QUAD(0x000000207a49504c) + } + _end = .; + diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 6dc8d0a538640..1aa11a8f57dd8 100644 --- a/arch/s390/boot/startup.c @@ -34443,6 +43094,60 @@ index 1effac6a01520..1c4f585dd39b6 100644 int chsc_sstpi(void *page, void *result, size_t size); int chsc_stzi(void *page, void *result, size_t size); int chsc_sgib(u32 origin); +diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h +index 0d90cbeb89b43..a0914bc6c9bdd 100644 +--- a/arch/s390/include/asm/cpu_mf.h ++++ b/arch/s390/include/asm/cpu_mf.h +@@ -128,19 +128,21 @@ struct hws_combined_entry { + struct hws_diag_entry diag; /* Diagnostic-sampling data entry */ + } __packed; + +-struct hws_trailer_entry { +- union { +- struct { +- unsigned int f:1; /* 0 - Block Full Indicator */ +- unsigned int a:1; /* 1 - Alert request control */ +- unsigned int t:1; /* 2 - Timestamp format */ +- unsigned int :29; /* 3 - 31: Reserved */ +- unsigned int bsdes:16; /* 32-47: size of basic SDE */ +- unsigned int dsdes:16; /* 48-63: size of diagnostic SDE */ +- }; +- unsigned long long flags; /* 0 - 63: All indicators */ ++union hws_trailer_header { ++ struct { ++ unsigned int f:1; /* 0 - Block Full Indicator */ ++ unsigned int a:1; /* 1 - Alert request control */ ++ unsigned int t:1; /* 2 - Timestamp format */ ++ unsigned int :29; /* 3 - 31: Reserved */ ++ unsigned int bsdes:16; /* 32-47: size of basic SDE */ ++ unsigned int dsdes:16; /* 48-63: size of diagnostic SDE */ ++ unsigned long long overflow; /* 64 - Overflow Count */ + }; +- unsigned long long overflow; /* 64 - sample Overflow count */ ++ __uint128_t val; ++}; ++ ++struct hws_trailer_entry { ++ union hws_trailer_header header; /* 0 - 15 Flags + Overflow Count */ + unsigned char timestamp[16]; /* 16 - 31 timestamp */ + unsigned long long reserved1; /* 32 -Reserved */ + unsigned long long reserved2; /* */ +@@ -287,14 +289,11 @@ static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi, + return USEC_PER_SEC * qsi->cpu_speed / rate; + } + +-#define SDB_TE_ALERT_REQ_MASK 0x4000000000000000UL +-#define SDB_TE_BUFFER_FULL_MASK 0x8000000000000000UL +- + /* Return TOD timestamp contained in an trailer entry */ + static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te) + { + /* TOD in STCKE format */ +- if (te->t) ++ if (te->header.t) + return *((unsigned long long *) &te->timestamp[1]); + + /* TOD in STCK format */ diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h index 04dc65f8901dc..80b93c06a2bbe 100644 --- a/arch/s390/include/asm/ctl_reg.h @@ -34476,6 +43181,27 @@ index 04dc65f8901dc..80b93c06a2bbe 100644 - #endif /* __ASSEMBLY__ */ #endif /* __ASM_CTL_REG_H */ +diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h +index 19a55e1e3a0c5..5fc91a90657e7 100644 +--- a/arch/s390/include/asm/debug.h ++++ b/arch/s390/include/asm/debug.h +@@ -4,8 +4,8 @@ + * + * Copyright IBM Corp. 1999, 2020 + */ +-#ifndef DEBUG_H +-#define DEBUG_H ++#ifndef _ASM_S390_DEBUG_H ++#define _ASM_S390_DEBUG_H + + #include <linux/string.h> + #include <linux/spinlock.h> +@@ -487,4 +487,4 @@ void debug_register_static(debug_info_t *id, int pages_per_area, int nr_areas); + + #endif /* MODULE */ + +-#endif /* DEBUG_H */ ++#endif /* _ASM_S390_DEBUG_H */ diff --git a/arch/s390/include/asm/extable.h b/arch/s390/include/asm/extable.h index 16dc57dd90b30..8511f0e59290f 100644 --- a/arch/s390/include/asm/extable.h @@ -34496,6 +43222,20 @@ index 16dc57dd90b30..8511f0e59290f 100644 +#define swap_ex_entry_fixup swap_ex_entry_fixup #endif +diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h +index c22debfcebf12..bf15767b729f9 100644 +--- a/arch/s390/include/asm/futex.h ++++ b/arch/s390/include/asm/futex.h +@@ -16,7 +16,8 @@ + "3: jl 1b\n" \ + " lhi %0,0\n" \ + "4: sacf 768\n" \ +- EX_TABLE(0b,4b) EX_TABLE(2b,4b) EX_TABLE(3b,4b) \ ++ EX_TABLE(0b,4b) EX_TABLE(1b,4b) \ ++ EX_TABLE(2b,4b) EX_TABLE(3b,4b) \ + : "=d" (ret), "=&d" (oldval), "=&d" (newval), \ + "=m" (*uaddr) \ + : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \ diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h index 40264f60b0da9..f4073106e1f39 100644 --- a/arch/s390/include/asm/gmap.h @@ -34594,6 +43334,19 @@ index e4dc64cc9c555..287bb88f76986 100644 struct zpci_iomap_entry { u32 fh; +diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h +index cb5fc06904354..081837b391e35 100644 +--- a/arch/s390/include/asm/percpu.h ++++ b/arch/s390/include/asm/percpu.h +@@ -31,7 +31,7 @@ + pcp_op_T__ *ptr__; \ + preempt_disable_notrace(); \ + ptr__ = raw_cpu_ptr(&(pcp)); \ +- prev__ = *ptr__; \ ++ prev__ = READ_ONCE(*ptr__); \ + do { \ + old__ = prev__; \ + new__ = old__ op (val); \ diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index d9d5350cc3ec3..bf15da0fedbca 100644 --- a/arch/s390/include/asm/preempt.h @@ -34691,9 +43444,18 @@ index b57da93385888..9242d7ad71e79 100644 OFFSET(__LC_MCESAD, lowcore, mcesad); OFFSET(__LC_EXT_PARAMS2, lowcore, ext_params2); diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c -index d72a6df058d79..f17ad2daab079 100644 +index d72a6df058d79..8722bd07c6079 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c +@@ -44,7 +44,7 @@ struct save_area { + u64 fprs[16]; + u32 fpc; + u32 prefix; +- u64 todpreg; ++ u32 todpreg; + u64 timer; + u64 todcmp; + u64 vxrs_low[16]; @@ -132,28 +132,27 @@ static inline void *load_real_addr(void *addr) /* * Copy memory of the old, dumped system to a kernel space virtual address @@ -34815,6 +43577,17 @@ index d72a6df058d79..f17ad2daab079 100644 return NULL; memset(nt_name, 0, sizeof(nt_name)); if (copy_oldmem_kernel(¬e, addr, sizeof(note))) +diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c +index db1bc00229caf..272ef8597e208 100644 +--- a/arch/s390/kernel/dumpstack.c ++++ b/arch/s390/kernel/dumpstack.c +@@ -224,5 +224,5 @@ void die(struct pt_regs *regs, const char *str) + if (panic_on_oops) + panic("Fatal exception: panic_on_oops"); + oops_exit(); +- do_exit(SIGSEGV); ++ make_task_dead(SIGSEGV); + } diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 4c9b967290ae0..d530eb4dc413f 100644 --- a/arch/s390/kernel/entry.S @@ -34923,6 +43696,28 @@ index 3a3145c4a3ba4..be5d432b902e0 100644 set_irq_regs(old_regs); irqentry_exit(regs, state); +diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c +index 52d056a5f89fc..952d44b0610b0 100644 +--- a/arch/s390/kernel/kprobes.c ++++ b/arch/s390/kernel/kprobes.c +@@ -7,6 +7,8 @@ + * s390 port, used ppc64 as template. Mike Grundy <grundym@us.ibm.com> + */ + ++#define pr_fmt(fmt) "kprobes: " fmt ++ + #include <linux/moduleloader.h> + #include <linux/kprobes.h> + #include <linux/ptrace.h> +@@ -259,7 +261,7 @@ static void kprobe_reenter_check(struct kprobe_ctlblk *kcb, struct kprobe *p) + * is a BUG. The code path resides in the .kprobes.text + * section and is executed with interrupts disabled. + */ +- pr_err("Invalid kprobe detected.\n"); ++ pr_err("Failed to recover from reentered kprobes.\n"); + dump_kprobe(p); + BUG(); + } diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 0505e55a62979..4b95684fbe46e 100644 --- a/arch/s390/kernel/machine_kexec.c @@ -34937,7 +43732,7 @@ index 0505e55a62979..4b95684fbe46e 100644 void machine_shutdown(void) diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c -index f9e4baa64b675..3459362c54ac3 100644 +index f9e4baa64b675..c7fd818512890 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -12,6 +12,7 @@ @@ -34980,7 +43775,7 @@ index f9e4baa64b675..3459362c54ac3 100644 } #endif /* CONFIG_KEXEC_SIG */ -@@ -170,6 +179,7 @@ static int kexec_file_add_ipl_report(struct kimage *image, +@@ -170,13 +179,12 @@ static int kexec_file_add_ipl_report(struct kimage *image, struct kexec_buf buf; unsigned long addr; void *ptr, *end; @@ -34988,7 +43783,14 @@ index f9e4baa64b675..3459362c54ac3 100644 buf.image = image; -@@ -199,9 +209,13 @@ static int kexec_file_add_ipl_report(struct kimage *image, + data->memsz = ALIGN(data->memsz, PAGE_SIZE); + buf.mem = data->memsz; +- if (image->type == KEXEC_TYPE_CRASH) +- buf.mem += crashk_res.start; + + ptr = (void *)ipl_cert_list_addr; + end = ptr + ipl_cert_list_size; +@@ -199,9 +207,13 @@ static int kexec_file_add_ipl_report(struct kimage *image, ptr += len; } @@ -35002,18 +43804,21 @@ index f9e4baa64b675..3459362c54ac3 100644 data->memsz += buf.memsz; -@@ -209,7 +223,9 @@ static int kexec_file_add_ipl_report(struct kimage *image, +@@ -209,7 +221,12 @@ static int kexec_file_add_ipl_report(struct kimage *image, data->kernel_buf + offsetof(struct lowcore, ipl_parmblock_ptr); *lc_ipl_parmblock_ptr = (__u32)buf.mem; - return kexec_add_buffer(&buf); ++ if (image->type == KEXEC_TYPE_CRASH) ++ buf.mem += crashk_res.start; ++ + ret = kexec_add_buffer(&buf); +out: + return ret; } void *kexec_file_add_components(struct kimage *image, -@@ -269,6 +285,7 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, +@@ -269,6 +286,7 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, { Elf_Rela *relas; int i, r_type; @@ -35021,7 +43826,7 @@ index f9e4baa64b675..3459362c54ac3 100644 relas = (void *)pi->ehdr + relsec->sh_offset; -@@ -303,7 +320,15 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, +@@ -303,7 +321,15 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, addr = section->sh_addr + relas[i].r_offset; r_type = ELF64_R_TYPE(relas[i].r_info); @@ -35038,7 +43843,7 @@ index f9e4baa64b675..3459362c54ac3 100644 } return 0; } -@@ -321,3 +346,11 @@ int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, +@@ -321,3 +347,11 @@ int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, return kexec_image_probe_default(image, buf, buf_len); } @@ -35123,7 +43928,7 @@ index b01ba460b7cad..b032e556eeb71 100644 write(ip, insn, sizeof(insn)); info->plt_initialized = 1; diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c -index 20f8e1868853f..383b4799b6dd3 100644 +index 20f8e1868853f..d4f071e73a0a6 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -62,7 +62,7 @@ static inline unsigned long nmi_get_mcesa_size(void) @@ -35135,6 +43940,15 @@ index 20f8e1868853f..383b4799b6dd3 100644 void __init nmi_alloc_boot_cpu(struct lowcore *lc) { +@@ -175,7 +175,7 @@ void __s390_handle_mcck(void) + "malfunction (code 0x%016lx).\n", mcck.mcck_code); + printk(KERN_EMERG "mcck: task: %s, pid: %d.\n", + current->comm, current->pid); +- do_exit(SIGSEGV); ++ make_task_dead(SIGSEGV); + } + } + @@ -273,7 +273,14 @@ static int notrace s390_validate_registers(union mci mci, int umode) /* Validate vector registers */ union ctlreg0 cr0; @@ -35286,6 +44100,247 @@ index 4a99154fe6514..d2a2a18b55808 100644 hwc->state |= PERF_HES_UPTODATE; } } +diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c +index db62def4ef28e..4e6fadaeaa1a6 100644 +--- a/arch/s390/kernel/perf_cpum_sf.c ++++ b/arch/s390/kernel/perf_cpum_sf.c +@@ -163,14 +163,15 @@ static void free_sampling_buffer(struct sf_buffer *sfb) + + static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags) + { +- unsigned long sdb, *trailer; ++ struct hws_trailer_entry *te; ++ unsigned long sdb; + + /* Allocate and initialize sample-data-block */ + sdb = get_zeroed_page(gfp_flags); + if (!sdb) + return -ENOMEM; +- trailer = trailer_entry_ptr(sdb); +- *trailer = SDB_TE_ALERT_REQ_MASK; ++ te = (struct hws_trailer_entry *)trailer_entry_ptr(sdb); ++ te->header.a = 1; + + /* Link SDB into the sample-data-block-table */ + *sdbt = sdb; +@@ -1206,7 +1207,7 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, + "%s: Found unknown" + " sampling data entry: te->f %i" + " basic.def %#4x (%p)\n", __func__, +- te->f, sample->def, sample); ++ te->header.f, sample->def, sample); + /* Sample slot is not yet written or other record. + * + * This condition can occur if the buffer was reused +@@ -1217,7 +1218,7 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, + * that are not full. Stop processing if the first + * invalid format was detected. + */ +- if (!te->f) ++ if (!te->header.f) + break; + } + +@@ -1227,6 +1228,16 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, + } + } + ++static inline __uint128_t __cdsg(__uint128_t *ptr, __uint128_t old, __uint128_t new) ++{ ++ asm volatile( ++ " cdsg %[old],%[new],%[ptr]\n" ++ : [old] "+d" (old), [ptr] "+QS" (*ptr) ++ : [new] "d" (new) ++ : "memory", "cc"); ++ return old; ++} ++ + /* hw_perf_event_update() - Process sampling buffer + * @event: The perf event + * @flush_all: Flag to also flush partially filled sample-data-blocks +@@ -1243,10 +1254,11 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, + */ + static void hw_perf_event_update(struct perf_event *event, int flush_all) + { ++ unsigned long long event_overflow, sampl_overflow, num_sdb; ++ union hws_trailer_header old, prev, new; + struct hw_perf_event *hwc = &event->hw; + struct hws_trailer_entry *te; + unsigned long *sdbt; +- unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags; + int done; + + /* +@@ -1266,25 +1278,25 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) + te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); + + /* Leave loop if no more work to do (block full indicator) */ +- if (!te->f) { ++ if (!te->header.f) { + done = 1; + if (!flush_all) + break; + } + + /* Check the sample overflow count */ +- if (te->overflow) ++ if (te->header.overflow) + /* Account sample overflows and, if a particular limit + * is reached, extend the sampling buffer. + * For details, see sfb_account_overflows(). + */ +- sampl_overflow += te->overflow; ++ sampl_overflow += te->header.overflow; + + /* Timestamps are valid for full sample-data-blocks only */ + debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx " + "overflow %llu timestamp %#llx\n", +- __func__, (unsigned long)sdbt, te->overflow, +- (te->f) ? trailer_timestamp(te) : 0ULL); ++ __func__, (unsigned long)sdbt, te->header.overflow, ++ (te->header.f) ? trailer_timestamp(te) : 0ULL); + + /* Collect all samples from a single sample-data-block and + * flag if an (perf) event overflow happened. If so, the PMU +@@ -1294,12 +1306,16 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) + num_sdb++; + + /* Reset trailer (using compare-double-and-swap) */ ++ /* READ_ONCE() 16 byte header */ ++ prev.val = __cdsg(&te->header.val, 0, 0); + do { +- te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK; +- te_flags |= SDB_TE_ALERT_REQ_MASK; +- } while (!cmpxchg_double(&te->flags, &te->overflow, +- te->flags, te->overflow, +- te_flags, 0ULL)); ++ old.val = prev.val; ++ new.val = prev.val; ++ new.f = 0; ++ new.a = 1; ++ new.overflow = 0; ++ prev.val = __cdsg(&te->header.val, old.val, new.val); ++ } while (prev.val != old.val); + + /* Advance to next sample-data-block */ + sdbt++; +@@ -1384,7 +1400,7 @@ static void aux_output_end(struct perf_output_handle *handle) + range_scan = AUX_SDB_NUM_ALERT(aux); + for (i = 0, idx = aux->head; i < range_scan; i++, idx++) { + te = aux_sdb_trailer(aux, idx); +- if (!(te->flags & SDB_TE_BUFFER_FULL_MASK)) ++ if (!te->header.f) + break; + } + /* i is num of SDBs which are full */ +@@ -1392,7 +1408,7 @@ static void aux_output_end(struct perf_output_handle *handle) + + /* Remove alert indicators in the buffer */ + te = aux_sdb_trailer(aux, aux->alert_mark); +- te->flags &= ~SDB_TE_ALERT_REQ_MASK; ++ te->header.a = 0; + + debug_sprintf_event(sfdbg, 6, "%s: SDBs %ld range %ld head %ld\n", + __func__, i, range_scan, aux->head); +@@ -1437,9 +1453,9 @@ static int aux_output_begin(struct perf_output_handle *handle, + idx = aux->empty_mark + 1; + for (i = 0; i < range_scan; i++, idx++) { + te = aux_sdb_trailer(aux, idx); +- te->flags &= ~(SDB_TE_BUFFER_FULL_MASK | +- SDB_TE_ALERT_REQ_MASK); +- te->overflow = 0; ++ te->header.f = 0; ++ te->header.a = 0; ++ te->header.overflow = 0; + } + /* Save the position of empty SDBs */ + aux->empty_mark = aux->head + range - 1; +@@ -1448,7 +1464,7 @@ static int aux_output_begin(struct perf_output_handle *handle, + /* Set alert indicator */ + aux->alert_mark = aux->head + range/2 - 1; + te = aux_sdb_trailer(aux, aux->alert_mark); +- te->flags = te->flags | SDB_TE_ALERT_REQ_MASK; ++ te->header.a = 1; + + /* Reset hardware buffer head */ + head = AUX_SDB_INDEX(aux, aux->head); +@@ -1475,14 +1491,17 @@ static int aux_output_begin(struct perf_output_handle *handle, + static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, + unsigned long long *overflow) + { +- unsigned long long orig_overflow, orig_flags, new_flags; ++ union hws_trailer_header old, prev, new; + struct hws_trailer_entry *te; + + te = aux_sdb_trailer(aux, alert_index); ++ /* READ_ONCE() 16 byte header */ ++ prev.val = __cdsg(&te->header.val, 0, 0); + do { +- orig_flags = te->flags; +- *overflow = orig_overflow = te->overflow; +- if (orig_flags & SDB_TE_BUFFER_FULL_MASK) { ++ old.val = prev.val; ++ new.val = prev.val; ++ *overflow = old.overflow; ++ if (old.f) { + /* + * SDB is already set by hardware. + * Abort and try to set somewhere +@@ -1490,10 +1509,10 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, + */ + return false; + } +- new_flags = orig_flags | SDB_TE_ALERT_REQ_MASK; +- } while (!cmpxchg_double(&te->flags, &te->overflow, +- orig_flags, orig_overflow, +- new_flags, 0ULL)); ++ new.a = 1; ++ new.overflow = 0; ++ prev.val = __cdsg(&te->header.val, old.val, new.val); ++ } while (prev.val != old.val); + return true; + } + +@@ -1522,8 +1541,9 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, + static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range, + unsigned long long *overflow) + { +- unsigned long long orig_overflow, orig_flags, new_flags; + unsigned long i, range_scan, idx, idx_old; ++ union hws_trailer_header old, prev, new; ++ unsigned long long orig_overflow; + struct hws_trailer_entry *te; + + debug_sprintf_event(sfdbg, 6, "%s: range %ld head %ld alert %ld " +@@ -1554,17 +1574,20 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range, + idx_old = idx = aux->empty_mark + 1; + for (i = 0; i < range_scan; i++, idx++) { + te = aux_sdb_trailer(aux, idx); ++ /* READ_ONCE() 16 byte header */ ++ prev.val = __cdsg(&te->header.val, 0, 0); + do { +- orig_flags = te->flags; +- orig_overflow = te->overflow; +- new_flags = orig_flags & ~SDB_TE_BUFFER_FULL_MASK; ++ old.val = prev.val; ++ new.val = prev.val; ++ orig_overflow = old.overflow; ++ new.f = 0; ++ new.overflow = 0; + if (idx == aux->alert_mark) +- new_flags |= SDB_TE_ALERT_REQ_MASK; ++ new.a = 1; + else +- new_flags &= ~SDB_TE_ALERT_REQ_MASK; +- } while (!cmpxchg_double(&te->flags, &te->overflow, +- orig_flags, orig_overflow, +- new_flags, 0ULL)); ++ new.a = 0; ++ prev.val = __cdsg(&te->header.val, old.val, new.val); ++ } while (prev.val != old.val); + *overflow += orig_overflow; + } + diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index ea7729bebaa07..a7f8db73984b0 100644 --- a/arch/s390/kernel/perf_event.c @@ -35691,10 +44746,18 @@ index 6568de2367010..0dea82b87e54b 100644 # Build rules diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S -index 63bdb9e1bfc13..b508ccad4856d 100644 +index 63bdb9e1bfc13..8ce1615c10467 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S -@@ -132,6 +132,7 @@ SECTIONS +@@ -80,6 +80,7 @@ SECTIONS + _end_amode31_refs = .; + } + ++ . = ALIGN(PAGE_SIZE); + _edata = .; /* End of data section */ + + /* will be freed after init */ +@@ -132,6 +133,7 @@ SECTIONS /* * Table with the patch locations to undo expolines */ @@ -35702,7 +44765,15 @@ index 63bdb9e1bfc13..b508ccad4856d 100644 .nospec_call_table : { __nospec_call_start = . ; *(.s390_indirect*) -@@ -212,6 +213,7 @@ SECTIONS +@@ -193,6 +195,7 @@ SECTIONS + + BSS_SECTION(PAGE_SIZE, 4 * PAGE_SIZE, PAGE_SIZE) + ++ . = ALIGN(PAGE_SIZE); + _end = . ; + + /* +@@ -212,6 +215,7 @@ SECTIONS QUAD(__dynsym_start) /* dynsym_start */ QUAD(__rela_dyn_start) /* rela_dyn_start */ QUAD(__rela_dyn_end) /* rela_dyn_end */ @@ -35743,10 +44814,53 @@ index 2bd8f854f1b41..8ca301f49b305 100644 return handle_instruction(vcpu); } diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c -index 2245f4b8d3629..8ce03a5ca8634 100644 +index 2245f4b8d3629..ca7d09f098092 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c -@@ -2115,6 +2115,13 @@ int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu) +@@ -81,8 +81,9 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id) + struct esca_block *sca = vcpu->kvm->arch.sca; + union esca_sigp_ctrl *sigp_ctrl = + &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); +- union esca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl; ++ union esca_sigp_ctrl new_val = {0}, old_val; + ++ old_val = READ_ONCE(*sigp_ctrl); + new_val.scn = src_id; + new_val.c = 1; + old_val.c = 0; +@@ -93,8 +94,9 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id) + struct bsca_block *sca = vcpu->kvm->arch.sca; + union bsca_sigp_ctrl *sigp_ctrl = + &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); +- union bsca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl; ++ union bsca_sigp_ctrl new_val = {0}, old_val; + ++ old_val = READ_ONCE(*sigp_ctrl); + new_val.scn = src_id; + new_val.c = 1; + old_val.c = 0; +@@ -124,16 +126,18 @@ static void sca_clear_ext_call(struct kvm_vcpu *vcpu) + struct esca_block *sca = vcpu->kvm->arch.sca; + union esca_sigp_ctrl *sigp_ctrl = + &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); +- union esca_sigp_ctrl old = *sigp_ctrl; ++ union esca_sigp_ctrl old; + ++ old = READ_ONCE(*sigp_ctrl); + expect = old.value; + rc = cmpxchg(&sigp_ctrl->value, old.value, 0); + } else { + struct bsca_block *sca = vcpu->kvm->arch.sca; + union bsca_sigp_ctrl *sigp_ctrl = + &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); +- union bsca_sigp_ctrl old = *sigp_ctrl; ++ union bsca_sigp_ctrl old; + ++ old = READ_ONCE(*sigp_ctrl); + expect = old.value; + rc = cmpxchg(&sigp_ctrl->value, old.value, 0); + } +@@ -2115,6 +2119,13 @@ int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu) return test_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs); } @@ -35761,10 +44875,64 @@ index 2245f4b8d3629..8ce03a5ca8634 100644 { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c -index 1c97493d21e10..b456aa196c04f 100644 +index 1c97493d21e10..c61533e1448a2 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c -@@ -3447,7 +3447,7 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) +@@ -1117,6 +1117,8 @@ static int kvm_s390_vm_get_migration(struct kvm *kvm, + return 0; + } + ++static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); ++ + static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) + { + struct kvm_s390_vm_tod_clock gtod; +@@ -1126,7 +1128,7 @@ static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) + + if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx) + return -EINVAL; +- kvm_s390_set_tod_clock(kvm, >od); ++ __kvm_s390_set_tod_clock(kvm, >od); + + VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", + gtod.epoch_idx, gtod.tod); +@@ -1157,7 +1159,7 @@ static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) + sizeof(gtod.tod))) + return -EFAULT; + +- kvm_s390_set_tod_clock(kvm, >od); ++ __kvm_s390_set_tod_clock(kvm, >od); + VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod); + return 0; + } +@@ -1169,6 +1171,16 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) + if (attr->flags) + return -EINVAL; + ++ mutex_lock(&kvm->lock); ++ /* ++ * For protected guests, the TOD is managed by the ultravisor, so trying ++ * to change it will never bring the expected results. ++ */ ++ if (kvm_s390_pv_is_protected(kvm)) { ++ ret = -EOPNOTSUPP; ++ goto out_unlock; ++ } ++ + switch (attr->attr) { + case KVM_S390_VM_TOD_EXT: + ret = kvm_s390_set_tod_ext(kvm, attr); +@@ -1183,6 +1195,9 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) + ret = -ENXIO; + break; + } ++ ++out_unlock: ++ mutex_unlock(&kvm->lock); + return ret; + } + +@@ -3447,7 +3462,7 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) { /* do not poll with more than halt_poll_max_steal percent of steal time */ if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >= @@ -35773,7 +44941,7 @@ index 1c97493d21e10..b456aa196c04f 100644 vcpu->stat.halt_no_poll_steal++; return true; } -@@ -3913,14 +3913,12 @@ retry: +@@ -3913,14 +3928,12 @@ retry: return 0; } @@ -35789,32 +44957,23 @@ index 1c97493d21e10..b456aa196c04f 100644 preempt_disable(); store_tod_clock_ext(&clk); -@@ -3941,9 +3939,24 @@ void kvm_s390_set_tod_clock(struct kvm *kvm, +@@ -3941,7 +3954,15 @@ void kvm_s390_set_tod_clock(struct kvm *kvm, kvm_s390_vcpu_unblock_all(kvm); preempt_enable(); +} + -+void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) -+{ -+ mutex_lock(&kvm->lock); -+ __kvm_s390_set_tod_clock(kvm, gtod); - mutex_unlock(&kvm->lock); - } - +int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) +{ + if (!mutex_trylock(&kvm->lock)) + return 0; + __kvm_s390_set_tod_clock(kvm, gtod); -+ mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->lock); + return 1; -+} -+ + } + /** - * kvm_arch_fault_in_page - fault-in guest page if necessary - * @vcpu: The corresponding virtual cpu -@@ -4642,10 +4655,15 @@ int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) +@@ -4642,10 +4663,15 @@ int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) } } @@ -35832,7 +44991,7 @@ index 1c97493d21e10..b456aa196c04f 100644 __disable_ibs_on_vcpu(vcpu); for (i = 0; i < online_vcpus; i++) { -@@ -4703,6 +4721,8 @@ static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu, +@@ -4703,6 +4729,8 @@ static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu, return -EINVAL; if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block)) return -E2BIG; @@ -35842,21 +45001,20 @@ index 1c97493d21e10..b456aa196c04f 100644 switch (mop->op) { case KVM_S390_MEMOP_SIDA_READ: diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h -index 52bc8fbaa60ac..f8803bf0ff170 100644 +index 52bc8fbaa60ac..a2fde6d69057b 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h -@@ -326,8 +326,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); +@@ -326,8 +326,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); /* implemented in kvm-s390.c */ -void kvm_s390_set_tod_clock(struct kvm *kvm, - const struct kvm_s390_vm_tod_clock *gtod); -+void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); +int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr); -@@ -418,6 +418,7 @@ void kvm_s390_destroy_adapters(struct kvm *kvm); +@@ -418,6 +417,7 @@ void kvm_s390_destroy_adapters(struct kvm *kvm); int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu); extern struct kvm_device_ops kvm_flic_ops; int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu); @@ -36008,6 +45166,22 @@ index 683036c1c92a8..52800279686c0 100644 dest_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr); BUG_ON(dest_vcpu == NULL); +diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c +index acda4b6fc8518..2c0704f5eb3c6 100644 +--- a/arch/s390/kvm/vsie.c ++++ b/arch/s390/kvm/vsie.c +@@ -538,8 +538,10 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) + if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI)) + scb_s->eca |= scb_o->eca & ECA_CEI; + /* Epoch Extension */ +- if (test_kvm_facility(vcpu->kvm, 139)) ++ if (test_kvm_facility(vcpu->kvm, 139)) { + scb_s->ecd |= scb_o->ecd & ECD_MEF; ++ scb_s->epdx = scb_o->epdx; ++ } + + /* etoken */ + if (test_kvm_facility(vcpu->kvm, 156)) diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index ecf327d743a03..c0635cf787e31 100644 --- a/arch/s390/lib/test_unwind.c @@ -36026,6 +45200,33 @@ index ecf327d743a03..c0635cf787e31 100644 "0: nopr %%r7\n" EX_TABLE(0b, 0b) :); +diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c +index a596e69d3c474..0b012ce0921c1 100644 +--- a/arch/s390/lib/uaccess.c ++++ b/arch/s390/lib/uaccess.c +@@ -212,7 +212,7 @@ static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size + asm volatile( + " llilh 0,%[spec]\n" + "0: .insn ss,0xc80000000000,0(%0,%1),0(%4),0\n" +- " jz 4f\n" ++ "6: jz 4f\n" + "1: algr %0,%2\n" + " slgr %1,%2\n" + " j 0b\n" +@@ -222,11 +222,11 @@ static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size + " clgr %0,%3\n" /* copy crosses next page boundary? */ + " jnh 5f\n" + "3: .insn ss,0xc80000000000,0(%3,%1),0(%4),0\n" +- " slgr %0,%3\n" ++ "7: slgr %0,%3\n" + " j 5f\n" + "4: slgr %0,%0\n" + "5:\n" +- EX_TABLE(0b,2b) EX_TABLE(3b,5b) ++ EX_TABLE(0b,2b) EX_TABLE(6b,2b) EX_TABLE(3b,5b) EX_TABLE(7b,5b) + : "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2) + : "a" (empty_zero_page), [spec] "K" (0x81UL) + : "cc", "memory", "0"); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 212632d57db9c..c930dff312df3 100644 --- a/arch/s390/mm/fault.c @@ -36485,6 +45686,41 @@ index 5b8d647523f96..6d57625b8ed99 100644 } void zpci_event_availability(void *data) +diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c +index c5b35ea129cfa..b94163ee5632c 100644 +--- a/arch/s390/pci/pci_mmio.c ++++ b/arch/s390/pci/pci_mmio.c +@@ -63,7 +63,7 @@ static inline int __pcistg_mio_inuser( + asm volatile ( + " sacf 256\n" + "0: llgc %[tmp],0(%[src])\n" +- " sllg %[val],%[val],8\n" ++ "4: sllg %[val],%[val],8\n" + " aghi %[src],1\n" + " ogr %[val],%[tmp]\n" + " brctg %[cnt],0b\n" +@@ -71,7 +71,7 @@ static inline int __pcistg_mio_inuser( + "2: ipm %[cc]\n" + " srl %[cc],28\n" + "3: sacf 768\n" +- EX_TABLE(0b, 3b) EX_TABLE(1b, 3b) EX_TABLE(2b, 3b) ++ EX_TABLE(0b, 3b) EX_TABLE(4b, 3b) EX_TABLE(1b, 3b) EX_TABLE(2b, 3b) + : + [src] "+a" (src), [cnt] "+d" (cnt), + [val] "+d" (val), [tmp] "=d" (tmp), +@@ -214,10 +214,10 @@ static inline int __pcilg_mio_inuser( + "2: ahi %[shift],-8\n" + " srlg %[tmp],%[val],0(%[shift])\n" + "3: stc %[tmp],0(%[dst])\n" +- " aghi %[dst],1\n" ++ "5: aghi %[dst],1\n" + " brctg %[cnt],2b\n" + "4: sacf 768\n" +- EX_TABLE(0b, 4b) EX_TABLE(1b, 4b) EX_TABLE(3b, 4b) ++ EX_TABLE(0b, 4b) EX_TABLE(1b, 4b) EX_TABLE(3b, 4b) EX_TABLE(5b, 4b) + : + [ioaddr_len] "+&d" (ioaddr_len.pair), + [cc] "+d" (cc), [val] "=d" (val), diff --git a/arch/sh/Kconfig.debug b/arch/sh/Kconfig.debug index 958f790273ab9..10290e5c1f438 100644 --- a/arch/sh/Kconfig.debug @@ -36640,6 +45876,19 @@ index d606679a211e1..57efaf5b82ae0 100644 } pr_notice("Booting machvec: %s\n", get_system_type()); +diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c +index e76b221570999..361b764700b74 100644 +--- a/arch/sh/kernel/traps.c ++++ b/arch/sh/kernel/traps.c +@@ -57,7 +57,7 @@ void die(const char *str, struct pt_regs *regs, long err) + if (panic_on_oops) + panic("Fatal exception"); + +- do_exit(SIGSEGV); ++ make_task_dead(SIGSEGV); + } + + void die_if_kernel(const char *str, struct pt_regs *regs, long err) diff --git a/arch/sh/math-emu/math.c b/arch/sh/math-emu/math.c index e8be0eca0444a..615ba932c398e 100644 --- a/arch/sh/math-emu/math.c @@ -36861,6 +46110,36 @@ index 2a78d2af12655..6eeb766987d1a 100644 +static_assert(offsetof(siginfo_t, si_perf_flags) == 0x24); static_assert(offsetof(siginfo_t, si_band) == 0x10); static_assert(offsetof(siginfo_t, si_fd) == 0x14); +diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c +index 5630e5a395e0d..179aabfa712ea 100644 +--- a/arch/sparc/kernel/traps_32.c ++++ b/arch/sparc/kernel/traps_32.c +@@ -86,9 +86,7 @@ void __noreturn die_if_kernel(char *str, struct pt_regs *regs) + } + printk("Instruction DUMP:"); + instruction_dump ((unsigned long *) regs->pc); +- if(regs->psr & PSR_PS) +- do_exit(SIGKILL); +- do_exit(SIGSEGV); ++ make_task_dead((regs->psr & PSR_PS) ? SIGKILL : SIGSEGV); + } + + void do_hw_interrupt(struct pt_regs *regs, unsigned long type) +diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c +index 6863025ed56d2..21077821f4272 100644 +--- a/arch/sparc/kernel/traps_64.c ++++ b/arch/sparc/kernel/traps_64.c +@@ -2559,9 +2559,7 @@ void __noreturn die_if_kernel(char *str, struct pt_regs *regs) + } + if (panic_on_oops) + panic("Fatal exception"); +- if (regs->tstate & TSTATE_PRIV) +- do_exit(SIGKILL); +- do_exit(SIGSEGV); ++ make_task_dead((regs->tstate & TSTATE_PRIV)? SIGKILL : SIGSEGV); + } + EXPORT_SYMBOL(die_if_kernel); + diff --git a/arch/sparc/kernel/windows.c b/arch/sparc/kernel/windows.c index 69a6ba6e92937..8f20862ccc83e 100644 --- a/arch/sparc/kernel/windows.c @@ -38196,6 +47475,28 @@ index b5aecb524a8aa..ffec8bb01ba8c 100644 $(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE $(call if_changed,ld) +diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S +index 5521ea12f44e0..aa9b964575843 100644 +--- a/arch/x86/boot/bioscall.S ++++ b/arch/x86/boot/bioscall.S +@@ -32,7 +32,7 @@ intcall: + movw %dx, %si + movw %sp, %di + movw $11, %cx +- rep; movsd ++ rep; movsl + + /* Pop full state from the stack */ + popal +@@ -67,7 +67,7 @@ intcall: + jz 4f + movw %sp, %si + movw $11, %cx +- rep; movsd ++ rep; movsl + 4: addw $44, %sp + + /* Restore state and return */ diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 431bf7f846c3c..15c5ae62a0e94 100644 --- a/arch/x86/boot/compressed/Makefile @@ -40503,7 +49804,7 @@ index 0000000000000..bfb7bcb362bcf + +.popsection diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S -index ccb9d32768f31..6b44263d7efbc 100644 +index ccb9d32768f31..e309e71560389 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -268,19 +268,16 @@ @@ -40595,8 +49896,26 @@ index ccb9d32768f31..6b44263d7efbc 100644 SYM_FUNC_END(entry_INT80_32) .macro FIXUP_ESPFIX_STACK +@@ -1248,14 +1239,14 @@ SYM_CODE_START(asm_exc_nmi) + SYM_CODE_END(asm_exc_nmi) + + .pushsection .text, "ax" +-SYM_CODE_START(rewind_stack_do_exit) ++SYM_CODE_START(rewind_stack_and_make_dead) + /* Prevent any naive code from trying to unwind to our caller. */ + xorl %ebp, %ebp + + movl PER_CPU_VAR(cpu_current_top_of_stack), %esi + leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp + +- call do_exit ++ call make_task_dead + 1: jmp 1b +-SYM_CODE_END(rewind_stack_do_exit) ++SYM_CODE_END(rewind_stack_and_make_dead) + .popsection diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S -index e38a4cf795d96..a3af2a9159b1b 100644 +index e38a4cf795d96..9f1333a9ee41d 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -85,7 +85,7 @@ @@ -40958,6 +50277,24 @@ index e38a4cf795d96..a3af2a9159b1b 100644 /* Always restore stashed CR3 value (see paranoid_entry) */ RESTORE_CR3 scratch_reg=%r15 save_reg=%r14 +@@ -1429,7 +1487,7 @@ SYM_CODE_END(ignore_sysret) + #endif + + .pushsection .text, "ax" +-SYM_CODE_START(rewind_stack_do_exit) ++SYM_CODE_START(rewind_stack_and_make_dead) + UNWIND_HINT_FUNC + /* Prevent any naive code from trying to unwind to our caller. */ + xorl %ebp, %ebp +@@ -1438,6 +1496,6 @@ SYM_CODE_START(rewind_stack_do_exit) + leaq -PTREGS_SIZE(%rax), %rsp + UNWIND_HINT_REGS + +- call do_exit +-SYM_CODE_END(rewind_stack_do_exit) ++ call make_task_dead ++SYM_CODE_END(rewind_stack_and_make_dead) + .popsection diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 0051cf5c792d1..4d637a965efbe 100644 --- a/arch/x86/entry/entry_64_compat.S @@ -41119,6 +50456,20 @@ index a2dddcc189f69..c277c63195ce8 100644 GCOV_PROFILE := n quiet_cmd_vdso_and_check = VDSO $@ +diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S +index 4bf48462fca7a..e8c60ae7a7c83 100644 +--- a/arch/x86/entry/vdso/vdso.lds.S ++++ b/arch/x86/entry/vdso/vdso.lds.S +@@ -27,7 +27,9 @@ VERSION { + __vdso_time; + clock_getres; + __vdso_clock_getres; ++#ifdef CONFIG_X86_SGX + __vdso_sgx_enter_enclave; ++#endif + local: *; + }; + } diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S index 6ddd7a937b3e3..d33c6513fd2cb 100644 --- a/arch/x86/entry/vdso/vdso32/system_call.S @@ -41196,6 +50547,19 @@ index 2e203f3a25a7b..ef2dd18272431 100644 .balign 4096, 0xcc +diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c +index 9687a8aef01c5..d93d098dea99d 100644 +--- a/arch/x86/events/amd/core.c ++++ b/arch/x86/events/amd/core.c +@@ -976,7 +976,7 @@ static int __init amd_core_pmu_init(void) + * numbered counter following it. + */ + for (i = 0; i < x86_pmu.num_counters - 1; i += 2) +- even_ctr_mask |= 1 << i; ++ even_ctr_mask |= BIT_ULL(i); + + pair_constraint = (struct event_constraint) + __EVENT_CONSTRAINT(0, even_ctr_mask, 0, diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 9739019d4b67a..2704ec1e42a30 100644 --- a/arch/x86/events/amd/ibs.c @@ -41362,7 +50726,7 @@ index 6dfa8ddaa60f7..81d5e0a1f48cd 100644 else misc |= PERF_RECORD_MISC_GUEST_KERNEL; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c -index 9a044438072ba..588b83cc730d3 100644 +index 9a044438072ba..b70e1522a27ac 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -243,7 +243,8 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = { @@ -41457,7 +50821,15 @@ index 9a044438072ba..588b83cc730d3 100644 }; static __init void intel_clovertown_quirk(void) -@@ -5447,7 +5466,11 @@ static void intel_pmu_check_event_constraints(struct event_constraint *event_con +@@ -4694,6 +4713,7 @@ static const struct x86_cpu_desc isolation_ucodes[] = { + INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 5, 0x00000000), + INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 6, 0x00000000), + INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 7, 0x00000000), ++ INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 11, 0x00000000), + INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_L, 3, 0x0000007c), + INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE, 3, 0x0000007c), + INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 9, 0x0000004e), +@@ -5447,7 +5467,11 @@ static void intel_pmu_check_event_constraints(struct event_constraint *event_con /* Disabled fixed counters which are not in CPUID */ c->idxmsk64 &= intel_ctrl; @@ -41470,7 +50842,15 @@ index 9a044438072ba..588b83cc730d3 100644 c->idxmsk64 |= (1ULL << num_counters) - 1; } c->idxmsk64 &= -@@ -6181,6 +6204,19 @@ __init int intel_pmu_init(void) +@@ -6085,6 +6109,7 @@ __init int intel_pmu_init(void) + break; + + case INTEL_FAM6_SAPPHIRERAPIDS_X: ++ case INTEL_FAM6_EMERALDRAPIDS_X: + pmem = true; + x86_pmu.late_ack = true; + memcpy(hw_cache_event_ids, spr_hw_cache_event_ids, sizeof(hw_cache_event_ids)); +@@ -6181,6 +6206,19 @@ __init int intel_pmu_init(void) pmu->num_counters = x86_pmu.num_counters; pmu->num_counters_fixed = x86_pmu.num_counters_fixed; } @@ -41491,7 +50871,7 @@ index 9a044438072ba..588b83cc730d3 100644 pmu->unconstrained = (struct event_constraint) __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1, diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c -index 8647713276a73..266ac8263696a 100644 +index 8647713276a73..21a9cb48daf5d 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -236,6 +236,7 @@ static u64 load_latency_data(u64 status) @@ -41518,7 +50898,7 @@ index 8647713276a73..266ac8263696a 100644 } struct pebs_record_core { -@@ -923,7 +931,8 @@ struct event_constraint intel_skl_pebs_event_constraints[] = { +@@ -923,12 +931,18 @@ struct event_constraint intel_skl_pebs_event_constraints[] = { }; struct event_constraint intel_icl_pebs_event_constraints[] = { @@ -41528,7 +50908,19 @@ index 8647713276a73..266ac8263696a 100644 INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), /* SLOTS */ INTEL_PLD_CONSTRAINT(0x1cd, 0xff), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ -@@ -943,7 +952,7 @@ struct event_constraint intel_icl_pebs_event_constraints[] = { +- INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf), /* MEM_INST_RETIRED.LOAD */ +- INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf), /* MEM_INST_RETIRED.STORE */ ++ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */ ++ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */ ++ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */ ++ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */ ++ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */ ++ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */ ++ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */ + + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* MEM_LOAD_*_RETIRED.* */ + +@@ -943,14 +957,19 @@ struct event_constraint intel_icl_pebs_event_constraints[] = { }; struct event_constraint intel_spr_pebs_event_constraints[] = { @@ -41537,8 +50929,22 @@ index 8647713276a73..266ac8263696a 100644 INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xfe), + INTEL_PLD_CONSTRAINT(0x1cd, 0xfe), + INTEL_PSD_CONSTRAINT(0x2cd, 0x1), +- INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf), +- INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf), ++ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */ ++ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */ ++ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */ ++ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */ ++ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */ ++ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */ ++ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */ + + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), + diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c -index 9e6d6eaeb4cb6..6737213873911 100644 +index 9e6d6eaeb4cb6..b3f92255cbd2d 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -1114,6 +1114,14 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) @@ -41566,8 +50972,17 @@ index 9e6d6eaeb4cb6..6737213873911 100644 if (WARN_ON(xfeature_size(XFEATURE_LBR) != get_lbr_state_size())) return false; +@@ -1836,7 +1847,7 @@ void __init intel_pmu_arch_lbr_init(void) + return; + + clear_arch_lbr: +- clear_cpu_cap(&boot_cpu_data, X86_FEATURE_ARCH_LBR); ++ setup_clear_cpu_cap(X86_FEATURE_ARCH_LBR); + } + + /** diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c -index 7f406c14715fd..9ac2054878049 100644 +index 7f406c14715fd..d0295240c78a8 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -13,6 +13,8 @@ @@ -41600,18 +51015,33 @@ index 7f406c14715fd..9ac2054878049 100644 perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_TRUNCATED); advance++; -@@ -1347,11 +1350,37 @@ static void pt_addr_filters_fini(struct perf_event *event) +@@ -1244,6 +1247,15 @@ static int pt_buffer_try_single(struct pt_buffer *buf, int nr_pages) + if (1 << order != nr_pages) + goto out; + ++ /* ++ * Some processors cannot always support single range for more than ++ * 4KB - refer errata TGL052, ADL037 and RPL017. Future processors might ++ * also be affected, so for now rather than trying to keep track of ++ * which ones, just disable it for all. ++ */ ++ if (nr_pages > 1) ++ goto out; ++ + buf->single = true; + buf->nr_pages = nr_pages; + ret = 0; +@@ -1347,10 +1359,36 @@ static void pt_addr_filters_fini(struct perf_event *event) event->hw.addr_filters = NULL; } -static inline bool valid_kernel_ip(unsigned long ip) +#ifdef CONFIG_X86_64 +static u64 canonical_address(u64 vaddr, u8 vaddr_bits) - { -- return virt_addr_valid(ip) && kernel_ip(ip); ++{ + return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits); - } - ++} ++ +static u64 is_canonical_address(u64 vaddr, u8 vaddr_bits) +{ + return canonical_address(vaddr, vaddr_bits) == vaddr; @@ -41627,20 +51057,20 @@ index 7f406c14715fd..9ac2054878049 100644 + +/* Clamp to a canonical address less-than-or-equal-to the address given */ +static u64 clamp_to_le_canonical_addr(u64 vaddr, u8 vaddr_bits) -+{ + { +- return virt_addr_valid(ip) && kernel_ip(ip); + return is_canonical_address(vaddr, vaddr_bits) ? + vaddr : + BIT_ULL(vaddr_bits - 1) - 1; -+} + } +#else +#define clamp_to_ge_canonical_addr(x, y) (x) +#define clamp_to_le_canonical_addr(x, y) (x) +#endif -+ + static int pt_event_addr_filters_validate(struct list_head *filters) { - struct perf_addr_filter *filter; -@@ -1366,14 +1395,6 @@ static int pt_event_addr_filters_validate(struct list_head *filters) +@@ -1366,14 +1404,6 @@ static int pt_event_addr_filters_validate(struct list_head *filters) filter->action == PERF_ADDR_FILTER_ACTION_START) return -EOPNOTSUPP; @@ -41655,7 +51085,7 @@ index 7f406c14715fd..9ac2054878049 100644 if (++range > intel_pt_validate_hw_cap(PT_CAP_num_address_ranges)) return -EOPNOTSUPP; } -@@ -1397,9 +1418,26 @@ static void pt_event_addr_filters_sync(struct perf_event *event) +@@ -1397,9 +1427,26 @@ static void pt_event_addr_filters_sync(struct perf_event *event) if (filter->path.dentry && !fr[range].start) { msr_a = msr_b = 0; } else { @@ -41685,6 +51115,30 @@ index 7f406c14715fd..9ac2054878049 100644 } filters->filter[range].msr_a = msr_a; +diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c +index c72e368dd1641..7e16c590f2593 100644 +--- a/arch/x86/events/intel/uncore.c ++++ b/arch/x86/events/intel/uncore.c +@@ -1829,6 +1829,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init), ++ X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &spr_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init), + {}, + }; +diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h +index b9687980aab6d..d6f7c6c1a930a 100644 +--- a/arch/x86/events/intel/uncore.h ++++ b/arch/x86/events/intel/uncore.h +@@ -2,6 +2,7 @@ + #include <linux/slab.h> + #include <linux/pci.h> + #include <asm/apicdef.h> ++#include <asm/intel-family.h> + #include <linux/io-64-nonatomic-lo-hi.h> + + #include <linux/perf_event.h> diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h index 7280c8a3c8310..6d735611c281c 100644 --- a/arch/x86/events/intel/uncore_discovery.h @@ -41699,7 +51153,7 @@ index 7280c8a3c8310..6d735611c281c 100644 unit.table3 == -1ULL) diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c -index 0f63706cdadfc..dc3ae55f79e08 100644 +index 0f63706cdadfc..912fb3821a6bb 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -788,6 +788,22 @@ int snb_pci2phy_map_init(int devid) @@ -41734,8 +51188,25 @@ index 0f63706cdadfc..dc3ae55f79e08 100644 }; static struct intel_uncore_type snb_uncore_imc = { +@@ -1407,6 +1423,7 @@ static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box) + /* MCHBAR is disabled */ + if (!(mch_bar & BIT(0))) { + pr_warn("perf uncore: MCHBAR is disabled. Failed to map IMC free-running counters.\n"); ++ pci_dev_put(pdev); + return; + } + mch_bar &= ~BIT(0); +@@ -1420,6 +1437,8 @@ static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box) + box->io_addr = ioremap(addr, type->mmio_map_size); + if (!box->io_addr) + pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name); ++ ++ pci_dev_put(pdev); + } + + static struct intel_uncore_ops tgl_uncore_imc_freerunning_ops = { diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c -index 5ddc0f30db6fc..ed869443efb21 100644 +index 5ddc0f30db6fc..8f371f3cbbd24 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -452,7 +452,7 @@ @@ -41747,7 +51218,15 @@ index 5ddc0f30db6fc..ed869443efb21 100644 #define ICX_IMC_MEM_STRIDE 0x4 /* SPR */ -@@ -3608,6 +3608,9 @@ static int skx_cha_hw_config(struct intel_uncore_box *box, struct perf_event *ev +@@ -2891,6 +2891,7 @@ static bool hswep_has_limit_sbox(unsigned int device) + return false; + + pci_read_config_dword(dev, HSWEP_PCU_CAPID4_OFFET, &capid4); ++ pci_dev_put(dev); + if (!hswep_get_chop(capid4)) + return true; + +@@ -3608,6 +3609,9 @@ static int skx_cha_hw_config(struct intel_uncore_box *box, struct perf_event *ev struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; struct extra_reg *er; int idx = 0; @@ -41757,7 +51236,7 @@ index 5ddc0f30db6fc..ed869443efb21 100644 for (er = skx_uncore_cha_extra_regs; er->msr; er++) { if (er->event != (event->hw.config & er->config_mask)) -@@ -3675,6 +3678,7 @@ static struct event_constraint skx_uncore_iio_constraints[] = { +@@ -3675,6 +3679,7 @@ static struct event_constraint skx_uncore_iio_constraints[] = { UNCORE_EVENT_CONSTRAINT(0xc0, 0xc), UNCORE_EVENT_CONSTRAINT(0xc5, 0xc), UNCORE_EVENT_CONSTRAINT(0xd4, 0xc), @@ -41765,7 +51244,47 @@ index 5ddc0f30db6fc..ed869443efb21 100644 EVENT_CONSTRAINT_END }; -@@ -4525,6 +4529,13 @@ static void snr_iio_cleanup_mapping(struct intel_uncore_type *type) +@@ -3799,6 +3804,21 @@ static const struct attribute_group *skx_iio_attr_update[] = { + NULL, + }; + ++static void pmu_clear_mapping_attr(const struct attribute_group **groups, ++ struct attribute_group *ag) ++{ ++ int i; ++ ++ for (i = 0; groups[i]; i++) { ++ if (groups[i] == ag) { ++ for (i++; groups[i]; i++) ++ groups[i - 1] = groups[i]; ++ groups[i - 1] = NULL; ++ break; ++ } ++ } ++} ++ + static int + pmu_iio_set_mapping(struct intel_uncore_type *type, struct attribute_group *ag) + { +@@ -3847,7 +3867,7 @@ clear_attrs: + clear_topology: + kfree(type->topology); + clear_attr_update: +- type->attr_update = NULL; ++ pmu_clear_mapping_attr(type->attr_update, ag); + return ret; + } + +@@ -4488,6 +4508,8 @@ static int sad_cfg_iio_topology(struct intel_uncore_type *type, u8 *sad_pmon_map + type->topology = NULL; + } + ++ pci_dev_put(dev); ++ + return ret; + } + +@@ -4525,6 +4547,13 @@ static void snr_iio_cleanup_mapping(struct intel_uncore_type *type) pmu_iio_cleanup_mapping(type, &snr_iio_mapping_group); } @@ -41779,7 +51298,7 @@ index 5ddc0f30db6fc..ed869443efb21 100644 static struct intel_uncore_type snr_uncore_iio = { .name = "iio", .num_counters = 4, -@@ -4536,6 +4547,7 @@ static struct intel_uncore_type snr_uncore_iio = { +@@ -4536,6 +4565,7 @@ static struct intel_uncore_type snr_uncore_iio = { .event_mask_ext = SNR_IIO_PMON_RAW_EVENT_MASK_EXT, .box_ctl = SNR_IIO_MSR_PMON_BOX_CTL, .msr_offset = SNR_IIO_MSR_OFFSET, @@ -41787,7 +51306,16 @@ index 5ddc0f30db6fc..ed869443efb21 100644 .ops = &ivbep_uncore_msr_ops, .format_group = &snr_uncore_iio_format_group, .attr_update = snr_iio_attr_update, -@@ -5076,8 +5088,10 @@ static struct event_constraint icx_uncore_iio_constraints[] = { +@@ -4845,6 +4875,8 @@ static int snr_uncore_mmio_map(struct intel_uncore_box *box, + + addr += box_ctl; + ++ pci_dev_put(pdev); ++ + box->io_addr = ioremap(addr, type->mmio_map_size); + if (!box->io_addr) { + pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name); +@@ -5076,8 +5108,10 @@ static struct event_constraint icx_uncore_iio_constraints[] = { UNCORE_EVENT_CONSTRAINT(0x02, 0x3), UNCORE_EVENT_CONSTRAINT(0x03, 0x3), UNCORE_EVENT_CONSTRAINT(0x83, 0x3), @@ -41798,7 +51326,19 @@ index 5ddc0f30db6fc..ed869443efb21 100644 EVENT_CONSTRAINT_END }; -@@ -5463,12 +5477,12 @@ static struct intel_uncore_ops icx_uncore_mmio_ops = { +@@ -5125,6 +5159,11 @@ static int icx_iio_get_topology(struct intel_uncore_type *type) + + static int icx_iio_set_mapping(struct intel_uncore_type *type) + { ++ /* Detect ICX-D system. This case is not supported */ ++ if (boot_cpu_data.x86_model == INTEL_FAM6_ICELAKE_D) { ++ pmu_clear_mapping_attr(type->attr_update, &icx_iio_mapping_group); ++ return -EPERM; ++ } + return pmu_iio_set_mapping(type, &icx_iio_mapping_group); + } + +@@ -5463,12 +5502,12 @@ static struct intel_uncore_ops icx_uncore_mmio_ops = { static struct intel_uncore_type icx_uncore_imc = { .name = "imc", .num_counters = 4, @@ -41813,7 +51353,7 @@ index 5ddc0f30db6fc..ed869443efb21 100644 .perf_ctr = SNR_IMC_MMIO_PMON_CTR0, .event_ctl = SNR_IMC_MMIO_PMON_CTL0, .event_mask = SNBEP_PMON_RAW_EVENT_MASK, -@@ -5647,6 +5661,7 @@ static struct intel_uncore_type spr_uncore_chabox = { +@@ -5647,6 +5686,7 @@ static struct intel_uncore_type spr_uncore_chabox = { .event_mask = SPR_CHA_PMON_EVENT_MASK, .event_mask_ext = SPR_RAW_EVENT_MASK_EXT, .num_shared_regs = 1, @@ -41821,7 +51361,7 @@ index 5ddc0f30db6fc..ed869443efb21 100644 .ops = &spr_uncore_chabox_ops, .format_group = &spr_uncore_chabox_format_group, .attr_update = uncore_alias_groups, -@@ -5658,6 +5673,7 @@ static struct intel_uncore_type spr_uncore_iio = { +@@ -5658,6 +5698,7 @@ static struct intel_uncore_type spr_uncore_iio = { .event_mask_ext = SNR_IIO_PMON_RAW_EVENT_MASK_EXT, .format_group = &snr_uncore_iio_format_group, .attr_update = uncore_alias_groups, @@ -41829,7 +51369,7 @@ index 5ddc0f30db6fc..ed869443efb21 100644 }; static struct attribute *spr_uncore_raw_formats_attr[] = { -@@ -5686,9 +5702,16 @@ static struct intel_uncore_type spr_uncore_irp = { +@@ -5686,9 +5727,16 @@ static struct intel_uncore_type spr_uncore_irp = { }; @@ -41846,7 +51386,7 @@ index 5ddc0f30db6fc..ed869443efb21 100644 }; static struct intel_uncore_type spr_uncore_pcu = { -@@ -5765,6 +5788,7 @@ static struct intel_uncore_type spr_uncore_upi = { +@@ -5765,6 +5813,7 @@ static struct intel_uncore_type spr_uncore_upi = { static struct intel_uncore_type spr_uncore_m3upi = { SPR_UNCORE_PCI_COMMON_FORMAT(), .name = "m3upi", @@ -41854,8 +51394,20 @@ index 5ddc0f30db6fc..ed869443efb21 100644 }; static struct intel_uncore_type spr_uncore_mdf = { +diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c +index 96c775abe31ff..d23b5523cdd3b 100644 +--- a/arch/x86/events/msr.c ++++ b/arch/x86/events/msr.c +@@ -69,6 +69,7 @@ static bool test_intel(int idx, void *data) + case INTEL_FAM6_BROADWELL_G: + case INTEL_FAM6_BROADWELL_X: + case INTEL_FAM6_SAPPHIRERAPIDS_X: ++ case INTEL_FAM6_EMERALDRAPIDS_X: + + case INTEL_FAM6_ATOM_SILVERMONT: + case INTEL_FAM6_ATOM_SILVERMONT_D: diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c -index 85feafacc445d..77e3a47af5ad5 100644 +index 85feafacc445d..840ee43e3e464 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -536,11 +536,14 @@ static struct perf_msr intel_rapl_spr_msrs[] = { @@ -41876,11 +51428,89 @@ index 85feafacc445d..77e3a47af5ad5 100644 static int rapl_cpu_offline(unsigned int cpu) { struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); +@@ -801,6 +804,8 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = { + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &model_hsx), + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &model_skl), ++ X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &model_skl), ++ X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &model_spr), diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c -index 708a2712a516d..b6d48ca5b0f17 100644 +index 708a2712a516d..95f98af74fdca 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c -@@ -139,7 +139,6 @@ void set_hv_tscchange_cb(void (*cb)(void)) +@@ -45,7 +45,7 @@ EXPORT_SYMBOL_GPL(hv_vp_assist_page); + static int hv_cpu_init(unsigned int cpu) + { + union hv_vp_assist_msr_contents msr = { 0 }; +- struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()]; ++ struct hv_vp_assist_page **hvp = &hv_vp_assist_page[cpu]; + int ret; + + ret = hv_common_cpu_init(cpu); +@@ -55,34 +55,32 @@ static int hv_cpu_init(unsigned int cpu) + if (!hv_vp_assist_page) + return 0; + +- if (!*hvp) { +- if (hv_root_partition) { +- /* +- * For root partition we get the hypervisor provided VP assist +- * page, instead of allocating a new page. +- */ +- rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); +- *hvp = memremap(msr.pfn << +- HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT, +- PAGE_SIZE, MEMREMAP_WB); +- } else { +- /* +- * The VP assist page is an "overlay" page (see Hyper-V TLFS's +- * Section 5.2.1 "GPA Overlay Pages"). Here it must be zeroed +- * out to make sure we always write the EOI MSR in +- * hv_apic_eoi_write() *after* the EOI optimization is disabled +- * in hv_cpu_die(), otherwise a CPU may not be stopped in the +- * case of CPU offlining and the VM will hang. +- */ ++ if (hv_root_partition) { ++ /* ++ * For root partition we get the hypervisor provided VP assist ++ * page, instead of allocating a new page. ++ */ ++ rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); ++ *hvp = memremap(msr.pfn << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT, ++ PAGE_SIZE, MEMREMAP_WB); ++ } else { ++ /* ++ * The VP assist page is an "overlay" page (see Hyper-V TLFS's ++ * Section 5.2.1 "GPA Overlay Pages"). Here it must be zeroed ++ * out to make sure we always write the EOI MSR in ++ * hv_apic_eoi_write() *after* the EOI optimization is disabled ++ * in hv_cpu_die(), otherwise a CPU may not be stopped in the ++ * case of CPU offlining and the VM will hang. ++ */ ++ if (!*hvp) + *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO); +- if (*hvp) +- msr.pfn = vmalloc_to_pfn(*hvp); +- } +- WARN_ON(!(*hvp)); +- if (*hvp) { +- msr.enable = 1; +- wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); +- } ++ if (*hvp) ++ msr.pfn = vmalloc_to_pfn(*hvp); ++ ++ } ++ if (!WARN_ON(!(*hvp))) { ++ msr.enable = 1; ++ wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); + } + + return 0; +@@ -139,7 +137,6 @@ void set_hv_tscchange_cb(void (*cb)(void)) struct hv_reenlightenment_control re_ctrl = { .vector = HYPERV_REENLIGHTENMENT_VECTOR, .enabled = 1, @@ -41888,7 +51518,7 @@ index 708a2712a516d..b6d48ca5b0f17 100644 }; struct hv_tsc_emulation_control emu_ctrl = {.enabled = 1}; -@@ -148,13 +147,20 @@ void set_hv_tscchange_cb(void (*cb)(void)) +@@ -148,13 +145,20 @@ void set_hv_tscchange_cb(void (*cb)(void)) return; } @@ -41909,7 +51539,7 @@ index 708a2712a516d..b6d48ca5b0f17 100644 } EXPORT_SYMBOL_GPL(set_hv_tscchange_cb); -@@ -342,20 +348,13 @@ static void __init hv_get_partition_id(void) +@@ -342,20 +346,13 @@ static void __init hv_get_partition_id(void) */ void __init hyperv_init(void) { @@ -41931,6 +51561,15 @@ index 708a2712a516d..b6d48ca5b0f17 100644 if (hv_common_init()) return; +@@ -472,8 +469,6 @@ void hyperv_cleanup(void) + { + union hv_x64_msr_hypercall_contents hypercall_msr; + +- unregister_syscore_ops(&hv_syscore_ops); +- + /* Reset our OS id */ + wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); + diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index bd13736d0c054..0ad2378fe6ad7 100644 --- a/arch/x86/hyperv/mmu.c @@ -42343,7 +51982,7 @@ index 16a51e7288d58..b0f206681fde3 100644 /* * There are 32 bits/features in each mask word. The high bits diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h -index d0ce5cfd3ac14..2b56bfef99172 100644 +index d0ce5cfd3ac14..e31c7e75d6b02 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -203,8 +203,8 @@ @@ -42357,7 +51996,7 @@ index d0ce5cfd3ac14..2b56bfef99172 100644 #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ -@@ -294,6 +294,14 @@ +@@ -294,6 +294,17 @@ #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ #define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ #define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ @@ -42369,10 +52008,13 @@ index d0ce5cfd3ac14..2b56bfef99172 100644 +#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ +#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ +#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ ++ ++ ++#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ -@@ -313,6 +321,7 @@ +@@ -313,6 +324,7 @@ #define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ @@ -42380,7 +52022,7 @@ index d0ce5cfd3ac14..2b56bfef99172 100644 /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ -@@ -436,5 +445,9 @@ +@@ -436,5 +448,10 @@ #define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ @@ -42388,8 +52030,53 @@ index d0ce5cfd3ac14..2b56bfef99172 100644 +#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */ +#define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */ +#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ ++#define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */ #endif /* _ASM_X86_CPUFEATURES_H */ +diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h +index cfdf307ddc012..9ed8343c9b3cb 100644 +--- a/arch/x86/include/asm/debugreg.h ++++ b/arch/x86/include/asm/debugreg.h +@@ -39,7 +39,20 @@ static __always_inline unsigned long native_get_debugreg(int regno) + asm("mov %%db6, %0" :"=r" (val)); + break; + case 7: +- asm("mov %%db7, %0" :"=r" (val)); ++ /* ++ * Apply __FORCE_ORDER to DR7 reads to forbid re-ordering them ++ * with other code. ++ * ++ * This is needed because a DR7 access can cause a #VC exception ++ * when running under SEV-ES. Taking a #VC exception is not a ++ * safe thing to do just anywhere in the entry code and ++ * re-ordering might place the access into an unsafe location. ++ * ++ * This happened in the NMI handler, where the DR7 read was ++ * re-ordered to happen before the call to sev_es_ist_enter(), ++ * causing stack recursion. ++ */ ++ asm volatile("mov %%db7, %0" : "=r" (val) : __FORCE_ORDER); + break; + default: + BUG(); +@@ -66,7 +79,16 @@ static __always_inline void native_set_debugreg(int regno, unsigned long value) + asm("mov %0, %%db6" ::"r" (value)); + break; + case 7: +- asm("mov %0, %%db7" ::"r" (value)); ++ /* ++ * Apply __FORCE_ORDER to DR7 writes to forbid re-ordering them ++ * with other code. ++ * ++ * While is didn't happen with a DR7 write (see the DR7 read ++ * comment above which explains where it happened), add the ++ * __FORCE_ORDER here too to avoid similar problems in the ++ * future. ++ */ ++ asm volatile("mov %0, %%db7" ::"r" (value), __FORCE_ORDER); + break; + default: + BUG(); diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 8f28fafa98b32..834a3b6d81e12 100644 --- a/arch/x86/include/asm/disabled-features.h @@ -42603,6 +52290,21 @@ index 5a18694a89b24..ce6fc4f8d1d11 100644 : \ : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ : "memory") +diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h +index 109dfcc75299d..d91df71f60fb1 100644 +--- a/arch/x86/include/asm/fpu/xstate.h ++++ b/arch/x86/include/asm/fpu/xstate.h +@@ -136,8 +136,8 @@ extern void __init update_regset_xstate_info(unsigned int size, + + void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); + int xfeature_size(int xfeature_nr); +-int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf); +-int copy_sigframe_from_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf); ++int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf, u32 *pkru); ++int copy_sigframe_from_user_to_xstate(struct task_struct *tsk, const void __user *ubuf); + + void xsaves(struct xregs_state *xsave, u64 mask); + void xrstors(struct xregs_state *xsave, u64 mask); diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h index f9c00110a69ad..99d345b686fa2 100644 --- a/arch/x86/include/asm/futex.h @@ -42703,6 +52405,40 @@ index 91d7182ad2d6e..3df123f437c96 100644 int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE]); int insn_fetch_from_user_inatomic(struct pt_regs *regs, +diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h +index 27158436f322d..b8e7ea9e71e20 100644 +--- a/arch/x86/include/asm/intel-family.h ++++ b/arch/x86/include/asm/intel-family.h +@@ -105,10 +105,17 @@ + + #define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F /* Golden Cove */ + ++#define INTEL_FAM6_EMERALDRAPIDS_X 0xCF ++ ++#define INTEL_FAM6_GRANITERAPIDS_X 0xAD ++#define INTEL_FAM6_GRANITERAPIDS_D 0xAE ++ + #define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */ + #define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */ + +-/* "Small Core" Processors (Atom) */ ++#define INTEL_FAM6_LUNARLAKE_M 0xBD ++ ++/* "Small Core" Processors (Atom/E-Core) */ + + #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ + #define INTEL_FAM6_ATOM_BONNELL_MID 0x26 /* Silverthorne, Lincroft */ +@@ -135,6 +142,10 @@ + #define INTEL_FAM6_ATOM_TREMONT 0x96 /* Elkhart Lake */ + #define INTEL_FAM6_ATOM_TREMONT_L 0x9C /* Jasper Lake */ + ++#define INTEL_FAM6_SIERRAFOREST_X 0xAF ++ ++#define INTEL_FAM6_GRANDRIDGE 0xB6 ++ + /* Xeon Phi */ + + #define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index bf1ed2ddc74bd..7a983119bc403 100644 --- a/arch/x86/include/asm/iommu.h @@ -43073,7 +52809,7 @@ index b85147d75626e..d71c7e8b738d2 100644 #define arch_msi_msg_data x86_msi_data diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h -index a7c413432b33d..8f38265bc81dc 100644 +index a7c413432b33d..f069ab09c5fc1 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -51,6 +51,8 @@ @@ -43148,7 +52884,19 @@ index a7c413432b33d..8f38265bc81dc 100644 #define MSR_IA32_SYSENTER_CS 0x00000174 #define MSR_IA32_SYSENTER_ESP 0x00000175 -@@ -489,6 +528,9 @@ +@@ -456,6 +495,11 @@ + #define MSR_AMD64_CPUID_FN_1 0xc0011004 + #define MSR_AMD64_LS_CFG 0xc0011020 + #define MSR_AMD64_DC_CFG 0xc0011022 ++ ++#define MSR_AMD64_DE_CFG 0xc0011029 ++#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT 1 ++#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE BIT_ULL(MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT) ++ + #define MSR_AMD64_BU_CFG2 0xc001102a + #define MSR_AMD64_IBSFETCHCTL 0xc0011030 + #define MSR_AMD64_IBSFETCHLINAD 0xc0011031 +@@ -489,6 +533,9 @@ /* Fam 17h MSRs */ #define MSR_F17H_IRPERF 0xc00000e9 @@ -43158,6 +52906,16 @@ index a7c413432b33d..8f38265bc81dc 100644 /* Fam 16h MSRs */ #define MSR_F16H_L2I_PERF_CTL 0xc0010230 #define MSR_F16H_L2I_PERF_CTR 0xc0010231 +@@ -530,9 +577,6 @@ + #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL + #define FAM10H_MMIO_CONF_BASE_SHIFT 20 + #define MSR_FAM10H_NODE_ID 0xc001100c +-#define MSR_F10H_DECFG 0xc0011029 +-#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 +-#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT) + + /* K8 MSRs */ + #define MSR_K8_TOP_MEM1 0xc001001a diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index a3f87f1015d3d..d42e6c6b47b1e 100644 --- a/arch/x86/include/asm/msr.h @@ -43223,7 +52981,7 @@ index a3f87f1015d3d..d42e6c6b47b1e 100644 if (tracepoint_enabled(write_msr)) do_trace_write_msr(msr, ((u64)high << 32 | low), err); diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h -index ec2d5c8c66947..f5ce9a0ab2330 100644 +index ec2d5c8c66947..06c9f0eaa9ed7 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -5,11 +5,15 @@ @@ -43467,7 +53225,7 @@ index ec2d5c8c66947..f5ce9a0ab2330 100644 /* The Intel SPEC CTRL MSR base value cache */ extern u64 x86_spec_ctrl_base; +DECLARE_PER_CPU(u64, x86_spec_ctrl_current); -+extern void write_spec_ctrl_current(u64 val, bool force); ++extern void update_spec_ctrl_cond(u64 val); +extern u64 spec_ctrl_current(void); /* @@ -43820,6 +53578,19 @@ index 35bb35d28733e..54df06687d834 100644 } __attribute__((packed)); #define loaddebug(thread,register) \ +diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h +index 6a2827d0681fc..e8ab7c1f1080a 100644 +--- a/arch/x86/include/asm/syscall_wrapper.h ++++ b/arch/x86/include/asm/syscall_wrapper.h +@@ -6,7 +6,7 @@ + #ifndef _ASM_X86_SYSCALL_WRAPPER_H + #define _ASM_X86_SYSCALL_WRAPPER_H + +-struct pt_regs; ++#include <asm/ptrace.h> + + extern long __x64_sys_ni_syscall(const struct pt_regs *regs); + extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); diff --git a/arch/x86/include/asm/timex.h b/arch/x86/include/asm/timex.h index a4a8b1b16c0c1..956e4145311b1 100644 --- a/arch/x86/include/asm/timex.h @@ -44201,6 +53972,32 @@ index 14bcd59bcdee2..94ac7402c1ac2 100644 {} }; +diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c +index 7de599eba7f04..7945eae5b315f 100644 +--- a/arch/x86/kernel/acpi/cstate.c ++++ b/arch/x86/kernel/acpi/cstate.c +@@ -79,6 +79,21 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags, + */ + flags->bm_control = 0; + } ++ if (c->x86_vendor == X86_VENDOR_AMD && c->x86 >= 0x17) { ++ /* ++ * For all AMD Zen or newer CPUs that support C3, caches ++ * should not be flushed by software while entering C3 ++ * type state. Set bm->check to 1 so that kernel doesn't ++ * need to execute cache flush operation. ++ */ ++ flags->bm_check = 1; ++ /* ++ * In current AMD C state implementation ARB_DIS is no longer ++ * used. So set bm_control to zero to indicate ARB_DIS is not ++ * required while entering C3 type state. ++ */ ++ flags->bm_control = 0; ++ } + } + EXPORT_SYMBOL(acpi_processor_power_init_bm_check); + diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S index daf88f8143c5f..cf69081073b54 100644 --- a/arch/x86/kernel/acpi/wakeup_32.S @@ -44887,10 +54684,19 @@ index 0000000000000..03bb2f343ddb7 +} +EXPORT_SYMBOL_GPL(cc_platform_has); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c -index 2131af9f2fa23..8b1bf1c14fc35 100644 +index 2131af9f2fa23..c30e32097fb11 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c -@@ -886,6 +886,28 @@ static void init_amd_bd(struct cpuinfo_x86 *c) +@@ -794,8 +794,6 @@ static void init_amd_gh(struct cpuinfo_x86 *c) + set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH); + } + +-#define MSR_AMD64_DE_CFG 0xC0011029 +- + static void init_amd_ln(struct cpuinfo_x86 *c) + { + /* +@@ -886,6 +884,28 @@ static void init_amd_bd(struct cpuinfo_x86 *c) clear_rdrand_cpuid_bit(c); } @@ -44919,7 +54725,7 @@ index 2131af9f2fa23..8b1bf1c14fc35 100644 static void init_amd_zn(struct cpuinfo_x86 *c) { set_cpu_cap(c, X86_FEATURE_ZEN); -@@ -894,12 +916,21 @@ static void init_amd_zn(struct cpuinfo_x86 *c) +@@ -894,12 +914,21 @@ static void init_amd_zn(struct cpuinfo_x86 *c) node_reclaim_distance = 32; #endif @@ -44947,7 +54753,7 @@ index 2131af9f2fa23..8b1bf1c14fc35 100644 } static void init_amd(struct cpuinfo_x86 *c) -@@ -931,7 +962,8 @@ static void init_amd(struct cpuinfo_x86 *c) +@@ -931,7 +960,8 @@ static void init_amd(struct cpuinfo_x86 *c) case 0x12: init_amd_ln(c); break; case 0x15: init_amd_bd(c); break; case 0x16: init_amd_jg(c); break; @@ -44957,7 +54763,18 @@ index 2131af9f2fa23..8b1bf1c14fc35 100644 case 0x19: init_amd_zn(c); break; } -@@ -989,6 +1021,8 @@ static void init_amd(struct cpuinfo_x86 *c) +@@ -958,8 +988,8 @@ static void init_amd(struct cpuinfo_x86 *c) + * msr_set_bit() uses the safe accessors, too, even if the MSR + * is not present. + */ +- msr_set_bit(MSR_F10H_DECFG, +- MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT); ++ msr_set_bit(MSR_AMD64_DE_CFG, ++ MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT); + + /* A serializing LFENCE stops RDTSC speculation */ + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); +@@ -989,6 +1019,8 @@ static void init_amd(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_IRPERF) && !cpu_has_amd_erratum(c, amd_erratum_1054)) msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT); @@ -44967,7 +54784,7 @@ index 2131af9f2fa23..8b1bf1c14fc35 100644 #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c -index ecfca3bbcd968..7b15f7ef760d1 100644 +index ecfca3bbcd968..544e6c61e17d0 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -16,6 +16,7 @@ @@ -44978,7 +54795,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 #include <asm/spec-ctrl.h> #include <asm/cmdline.h> -@@ -37,24 +38,52 @@ +@@ -37,24 +38,59 @@ static void __init spectre_v1_select_mitigation(void); static void __init spectre_v2_select_mitigation(void); @@ -45006,6 +54823,13 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 + static DEFINE_MUTEX(spec_ctrl_mutex); ++/* Update SPEC_CTRL MSR and its cached copy unconditionally */ ++static void update_spec_ctrl(u64 val) ++{ ++ this_cpu_write(x86_spec_ctrl_current, val); ++ wrmsrl(MSR_IA32_SPEC_CTRL, val); ++} ++ /* - * The vendor and possibly platform specific bits which can be modified in - * x86_spec_ctrl_base. @@ -45013,7 +54837,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 + * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update(). */ -static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS; -+void write_spec_ctrl_current(u64 val, bool force) ++void update_spec_ctrl_cond(u64 val) +{ + if (this_cpu_read(x86_spec_ctrl_current) == val) + return; @@ -45024,7 +54848,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 + * When KERNEL_IBRS this MSR is written on return-to-user, unless + * forced the update can be delayed until that time. + */ -+ if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) ++ if (!cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) + wrmsrl(MSR_IA32_SPEC_CTRL, val); +} + @@ -45036,7 +54860,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 /* * AMD specific MSR info for Speculative Store Bypass control. -@@ -84,6 +113,10 @@ EXPORT_SYMBOL_GPL(mds_idle_clear); +@@ -84,6 +120,10 @@ EXPORT_SYMBOL_GPL(mds_idle_clear); */ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); @@ -45047,7 +54871,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 void __init check_bugs(void) { identify_boot_cpu(); -@@ -107,26 +140,27 @@ void __init check_bugs(void) +@@ -107,26 +147,27 @@ void __init check_bugs(void) if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); @@ -45087,7 +54911,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 arch_smt_update(); #ifdef CONFIG_X86_32 -@@ -161,31 +195,17 @@ void __init check_bugs(void) +@@ -161,31 +202,17 @@ void __init check_bugs(void) #endif } @@ -45124,7 +54948,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 if (hostval != guestval) { msrval = setguest ? guestval : hostval; wrmsrl(MSR_IA32_SPEC_CTRL, msrval); -@@ -266,14 +286,6 @@ static void __init mds_select_mitigation(void) +@@ -266,14 +293,6 @@ static void __init mds_select_mitigation(void) } } @@ -45139,7 +54963,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 static int __init mds_cmdline(char *str) { if (!boot_cpu_has_bug(X86_BUG_MDS)) -@@ -328,7 +340,7 @@ static void __init taa_select_mitigation(void) +@@ -328,7 +347,7 @@ static void __init taa_select_mitigation(void) /* TSX previously disabled by tsx=off */ if (!boot_cpu_has(X86_FEATURE_RTM)) { taa_mitigation = TAA_MITIGATION_TSX_DISABLED; @@ -45148,7 +54972,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 } if (cpu_mitigations_off()) { -@@ -342,7 +354,7 @@ static void __init taa_select_mitigation(void) +@@ -342,7 +361,7 @@ static void __init taa_select_mitigation(void) */ if (taa_mitigation == TAA_MITIGATION_OFF && mds_mitigation == MDS_MITIGATION_OFF) @@ -45157,7 +54981,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 if (boot_cpu_has(X86_FEATURE_MD_CLEAR)) taa_mitigation = TAA_MITIGATION_VERW; -@@ -374,18 +386,6 @@ static void __init taa_select_mitigation(void) +@@ -374,18 +393,6 @@ static void __init taa_select_mitigation(void) if (taa_nosmt || cpu_mitigations_auto_nosmt()) cpu_smt_disable(false); @@ -45176,7 +55000,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 } static int __init tsx_async_abort_parse_cmdline(char *str) -@@ -409,6 +409,154 @@ static int __init tsx_async_abort_parse_cmdline(char *str) +@@ -409,6 +416,154 @@ static int __init tsx_async_abort_parse_cmdline(char *str) } early_param("tsx_async_abort", tsx_async_abort_parse_cmdline); @@ -45331,7 +55155,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 #undef pr_fmt #define pr_fmt(fmt) "SRBDS: " fmt -@@ -470,11 +618,13 @@ static void __init srbds_select_mitigation(void) +@@ -470,11 +625,13 @@ static void __init srbds_select_mitigation(void) return; /* @@ -45348,7 +55172,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 srbds_mitigation = SRBDS_MITIGATION_TSX_OFF; else if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) srbds_mitigation = SRBDS_MITIGATION_HYPERVISOR; -@@ -618,12 +768,180 @@ static int __init nospectre_v1_cmdline(char *str) +@@ -618,12 +775,180 @@ static int __init nospectre_v1_cmdline(char *str) } early_param("nospectre_v1", nospectre_v1_cmdline); @@ -45532,7 +55356,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init = SPECTRE_V2_USER_NONE; static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init = -@@ -650,6 +968,33 @@ static inline const char *spectre_v2_module_string(void) +@@ -650,6 +975,33 @@ static inline const char *spectre_v2_module_string(void) static inline const char *spectre_v2_module_string(void) { return ""; } #endif @@ -45566,7 +55390,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 static inline bool match_option(const char *arg, int arglen, const char *opt) { int len = strlen(opt); -@@ -664,7 +1009,11 @@ enum spectre_v2_mitigation_cmd { +@@ -664,7 +1016,11 @@ enum spectre_v2_mitigation_cmd { SPECTRE_V2_CMD_FORCE, SPECTRE_V2_CMD_RETPOLINE, SPECTRE_V2_CMD_RETPOLINE_GENERIC, @@ -45579,7 +55403,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 }; enum spectre_v2_user_cmd { -@@ -705,13 +1054,15 @@ static void __init spec_v2_user_print_cond(const char *reason, bool secure) +@@ -705,13 +1061,15 @@ static void __init spec_v2_user_print_cond(const char *reason, bool secure) pr_info("spectre_v2_user=%s forced on command line.\n", reason); } @@ -45597,7 +55421,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 case SPECTRE_V2_CMD_NONE: return SPECTRE_V2_USER_CMD_NONE; case SPECTRE_V2_CMD_FORCE: -@@ -737,8 +1088,16 @@ spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) +@@ -737,8 +1095,16 @@ spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) return SPECTRE_V2_USER_CMD_AUTO; } @@ -45615,7 +55439,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 { enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE; bool smt_possible = IS_ENABLED(CONFIG_SMP); -@@ -751,7 +1110,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) +@@ -751,7 +1117,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) cpu_smt_control == CPU_SMT_NOT_SUPPORTED) smt_possible = false; @@ -45624,7 +55448,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 switch (cmd) { case SPECTRE_V2_USER_CMD_NONE: goto set_mode; -@@ -799,12 +1158,12 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) +@@ -799,12 +1165,12 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) } /* @@ -45640,7 +55464,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 return; /* -@@ -816,6 +1175,14 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) +@@ -816,6 +1182,14 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON)) mode = SPECTRE_V2_USER_STRICT_PREFERRED; @@ -45655,7 +55479,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 spectre_v2_user_stibp = mode; set_mode: -@@ -824,9 +1191,12 @@ set_mode: +@@ -824,9 +1198,12 @@ set_mode: static const char * const spectre_v2_strings[] = { [SPECTRE_V2_NONE] = "Vulnerable", @@ -45671,7 +55495,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 }; static const struct { -@@ -837,9 +1207,14 @@ static const struct { +@@ -837,9 +1214,14 @@ static const struct { { "off", SPECTRE_V2_CMD_NONE, false }, { "on", SPECTRE_V2_CMD_FORCE, true }, { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, @@ -45687,7 +55511,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 }; static void __init spec_v2_print_cond(const char *reason, bool secure) -@@ -875,17 +1250,54 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) +@@ -875,17 +1257,54 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) } if ((cmd == SPECTRE_V2_CMD_RETPOLINE || @@ -45718,13 +55542,9 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 + !boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { + pr_err("%s selected, but CPU doesn't have a serializing LFENCE. Switching to AUTO select\n", + mitigation_options[i].option); - return SPECTRE_V2_CMD_AUTO; - } - -- if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD && -- boot_cpu_data.x86_vendor != X86_VENDOR_HYGON && -- boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { -- pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); ++ return SPECTRE_V2_CMD_AUTO; ++ } ++ + if (cmd == SPECTRE_V2_CMD_IBRS && !IS_ENABLED(CONFIG_CPU_IBRS_ENTRY)) { + pr_err("%s selected but not compiled in. Switching to AUTO select\n", + mitigation_options[i].option); @@ -45740,16 +55560,20 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 + if (cmd == SPECTRE_V2_CMD_IBRS && !boot_cpu_has(X86_FEATURE_IBRS)) { + pr_err("%s selected but CPU doesn't have IBRS. Switching to AUTO select\n", + mitigation_options[i].option); -+ return SPECTRE_V2_CMD_AUTO; -+ } -+ + return SPECTRE_V2_CMD_AUTO; + } + +- if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD && +- boot_cpu_data.x86_vendor != X86_VENDOR_HYGON && +- boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { +- pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); + if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_has(X86_FEATURE_XENPV)) { + pr_err("%s selected but running as XenPV guest. Switching to AUTO select\n", + mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; } -@@ -894,6 +1306,79 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) +@@ -894,6 +1313,79 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return cmd; } @@ -45775,7 +55599,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 + + if (ia32_cap & ARCH_CAP_RRSBA) { + x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; -+ write_spec_ctrl_current(x86_spec_ctrl_base, true); ++ update_spec_ctrl(x86_spec_ctrl_base); + } +} + @@ -45829,7 +55653,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); -@@ -914,86 +1399,172 @@ static void __init spectre_v2_select_mitigation(void) +@@ -914,86 +1406,172 @@ static void __init spectre_v2_select_mitigation(void) case SPECTRE_V2_CMD_FORCE: case SPECTRE_V2_CMD_AUTO: if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) { @@ -45840,7 +55664,9 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 - goto specv2_set_mode; + mode = SPECTRE_V2_EIBRS; + break; -+ } + } +- if (IS_ENABLED(CONFIG_RETPOLINE)) +- goto retpoline_auto; + + if (IS_ENABLED(CONFIG_CPU_IBRS_ENTRY) && + boot_cpu_has_bug(X86_BUG_RETBLEED) && @@ -45849,9 +55675,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 + boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { + mode = SPECTRE_V2_IBRS; + break; - } -- if (IS_ENABLED(CONFIG_RETPOLINE)) -- goto retpoline_auto; ++ } + + mode = spectre_v2_select_retpoline(); break; @@ -45914,7 +55738,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 + + if (spectre_v2_in_ibrs_mode(mode)) { + x86_spec_ctrl_base |= SPEC_CTRL_IBRS; -+ write_spec_ctrl_current(x86_spec_ctrl_base, true); ++ update_spec_ctrl(x86_spec_ctrl_base); + } + + switch (mode) { @@ -46041,11 +55865,11 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 { - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP); -+ write_spec_ctrl_current(val, true); ++ update_spec_ctrl(val); } /* Update x86_spec_ctrl_base in case SMT state changed. */ -@@ -1028,6 +1599,8 @@ static void update_indir_branch_cond(void) +@@ -1028,6 +1606,8 @@ static void update_indir_branch_cond(void) /* Update the static key controlling the MDS CPU buffer clear in idle */ static void update_mds_branch_idle(void) { @@ -46054,7 +55878,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 /* * Enable the idle clearing if SMT is active on CPUs which are * affected only by MSBDS and not any other MDS variant. -@@ -1039,19 +1612,26 @@ static void update_mds_branch_idle(void) +@@ -1039,19 +1619,26 @@ static void update_mds_branch_idle(void) if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY)) return; @@ -46083,7 +55907,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 switch (spectre_v2_user_stibp) { case SPECTRE_V2_USER_NONE: break; -@@ -1087,6 +1667,16 @@ void cpu_bugs_smt_update(void) +@@ -1087,6 +1674,16 @@ void cpu_bugs_smt_update(void) break; } @@ -46100,7 +55924,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 mutex_unlock(&spec_ctrl_mutex); } -@@ -1190,16 +1780,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) +@@ -1190,16 +1787,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) break; } @@ -46117,25 +55941,34 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 /* * We have three CPU feature flags that are in play here: * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. -@@ -1217,7 +1797,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) +@@ -1217,7 +1804,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) x86_amd_ssb_disable(); } else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); -+ write_spec_ctrl_current(x86_spec_ctrl_base, true); ++ update_spec_ctrl(x86_spec_ctrl_base); } } -@@ -1468,7 +2048,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) +@@ -1364,6 +1951,8 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) + if (ctrl == PR_SPEC_FORCE_DISABLE) + task_set_spec_ib_force_disable(task); + task_update_spec_tif(task); ++ if (task == current) ++ indirect_branch_prediction_barrier(); + break; + default: + return -ERANGE; +@@ -1468,7 +2057,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) void x86_spec_ctrl_setup_ap(void) { if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); -+ write_spec_ctrl_current(x86_spec_ctrl_base, true); ++ update_spec_ctrl(x86_spec_ctrl_base); if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) x86_amd_ssb_disable(); -@@ -1689,9 +2269,26 @@ static ssize_t tsx_async_abort_show_state(char *buf) +@@ -1689,9 +2278,26 @@ static ssize_t tsx_async_abort_show_state(char *buf) sched_smt_active() ? "vulnerable" : "disabled"); } @@ -46163,7 +55996,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 return ""; switch (spectre_v2_user_stibp) { -@@ -1721,11 +2318,65 @@ static char *ibpb_state(void) +@@ -1721,11 +2327,65 @@ static char *ibpb_state(void) return ""; } @@ -46229,7 +56062,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { -@@ -1746,12 +2397,7 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr +@@ -1746,12 +2406,7 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr return sprintf(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]); case X86_BUG_SPECTRE_V2: @@ -46243,7 +56076,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 case X86_BUG_SPEC_STORE_BYPASS: return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); -@@ -1773,6 +2419,13 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr +@@ -1773,6 +2428,13 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_SRBDS: return srbds_show_state(buf); @@ -46257,7 +56090,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 default: break; } -@@ -1824,4 +2477,17 @@ ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char * +@@ -1824,4 +2486,17 @@ ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char * { return cpu_show_common(dev, attr, buf, X86_BUG_SRBDS); } @@ -46276,7 +56109,7 @@ index ecfca3bbcd968..7b15f7ef760d1 100644 +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index b3410f1ac2175..9c1df6222df92 100644 +index b3410f1ac2175..1698470dbea5f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1027,6 +1027,8 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) @@ -46313,7 +56146,7 @@ index b3410f1ac2175..9c1df6222df92 100644 /* * Technically, swapgs isn't serializing on AMD (despite it previously -@@ -1077,42 +1084,80 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { +@@ -1077,42 +1084,82 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { * good enough for our purposes. */ @@ -46367,6 +56200,8 @@ index b3410f1ac2175..9c1df6222df92 100644 +#define MMIO_SBDS BIT(2) +/* CPU is affected by RETbleed, speculating where you would not expect it */ +#define RETBLEED BIT(3) ++/* CPU is affected by SMT (cross-thread) return predictions */ ++#define SMT_RSB BIT(4) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), @@ -46402,12 +56237,12 @@ index b3410f1ac2175..9c1df6222df92 100644 + + VULNBL_AMD(0x15, RETBLEED), + VULNBL_AMD(0x16, RETBLEED), -+ VULNBL_AMD(0x17, RETBLEED), -+ VULNBL_HYGON(0x18, RETBLEED), ++ VULNBL_AMD(0x17, RETBLEED | SMT_RSB), ++ VULNBL_HYGON(0x18, RETBLEED | SMT_RSB), {} }; -@@ -1133,6 +1178,13 @@ u64 x86_read_arch_cap_msr(void) +@@ -1133,6 +1180,13 @@ u64 x86_read_arch_cap_msr(void) return ia32_cap; } @@ -46421,7 +56256,7 @@ index b3410f1ac2175..9c1df6222df92 100644 static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = x86_read_arch_cap_msr(); -@@ -1186,12 +1238,43 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) +@@ -1186,12 +1240,46 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) /* * SRBDS affects CPUs which support RDRAND or RDSEED and are listed * in the vulnerability blacklist. @@ -46462,11 +56297,14 @@ index b3410f1ac2175..9c1df6222df92 100644 + !cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) && + !(ia32_cap & ARCH_CAP_PBRSB_NO)) + setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); ++ ++ if (cpu_matches(cpu_vuln_blacklist, SMT_RSB)) ++ setup_force_cpu_bug(X86_BUG_SMT_RSB); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; -@@ -1396,9 +1479,8 @@ void __init early_cpu_init(void) +@@ -1396,9 +1484,8 @@ void __init early_cpu_init(void) early_identify_cpu(&boot_cpu_data); } @@ -46477,7 +56315,7 @@ index b3410f1ac2175..9c1df6222df92 100644 /* * Empirically, writing zero to a segment selector on AMD does * not clear the base, whereas writing zero to a segment -@@ -1419,10 +1501,43 @@ static void detect_null_seg_behavior(struct cpuinfo_x86 *c) +@@ -1419,10 +1506,43 @@ static void detect_null_seg_behavior(struct cpuinfo_x86 *c) wrmsrl(MSR_FS_BASE, 1); loadsegment(fs, 0); rdmsrl(MSR_FS_BASE, tmp); @@ -46524,7 +56362,7 @@ index b3410f1ac2175..9c1df6222df92 100644 } static void generic_identify(struct cpuinfo_x86 *c) -@@ -1458,8 +1573,6 @@ static void generic_identify(struct cpuinfo_x86 *c) +@@ -1458,8 +1578,6 @@ static void generic_identify(struct cpuinfo_x86 *c) get_model_name(c); /* Default name */ @@ -46533,7 +56371,7 @@ index b3410f1ac2175..9c1df6222df92 100644 /* * ESPFIX is a strange bug. All real CPUs have it. Paravirt * systems that run Linux at CPL > 0 may or may not have the -@@ -1684,6 +1797,8 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c) +@@ -1684,6 +1802,8 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c) validate_apic_and_package_id(c); x86_spec_ctrl_setup_ap(); update_srbds_msr(); @@ -46590,7 +56428,7 @@ index da696eb4821a0..e77032c5f85cc 100644 #undef pr_fmt #define pr_fmt(fmt) "x86/cpu: " fmt diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c -index 6d50136f7ab98..21fd425088fe5 100644 +index 6d50136f7ab98..c393b8773ace6 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -302,6 +302,12 @@ static void init_hygon(struct cpuinfo_x86 *c) @@ -46606,6 +56444,17 @@ index 6d50136f7ab98..21fd425088fe5 100644 set_cpu_cap(c, X86_FEATURE_ZEN); set_cpu_cap(c, X86_FEATURE_CPB); +@@ -320,8 +326,8 @@ static void init_hygon(struct cpuinfo_x86 *c) + * msr_set_bit() uses the safe accessors, too, even if the MSR + * is not present. + */ +- msr_set_bit(MSR_F10H_DECFG, +- MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT); ++ msr_set_bit(MSR_AMD64_DE_CFG, ++ MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT); + + /* A serializing LFENCE stops RDTSC speculation */ + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); @@ -335,6 +341,8 @@ static void init_hygon(struct cpuinfo_x86 *c) /* Hygon CPUs don't reset SS attributes on SYSRET, Xen does. */ if (!cpu_has(c, X86_FEATURE_XENPV)) @@ -46680,7 +56529,7 @@ index 8321c43554a1d..ae7d4c85f4f43 100644 } diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c -index 08831acc1d036..a873577e49dcc 100644 +index 08831acc1d036..6469d3135d268 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -400,7 +400,7 @@ static void threshold_restart_bank(void *_tr) @@ -46692,7 +56541,81 @@ index 08831acc1d036..a873577e49dcc 100644 return; rdmsr(tr->b->address, lo, hi); -@@ -1470,10 +1470,23 @@ out_free: +@@ -526,7 +526,7 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high, + /* Fall back to method we used for older processors: */ + switch (block) { + case 0: +- addr = msr_ops.misc(bank); ++ addr = mca_msr_reg(bank, MCA_MISC); + break; + case 1: + offset = ((low & MASK_BLKPTR_LO) >> 21); +@@ -965,6 +965,24 @@ _log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc) + return status & MCI_STATUS_DEFERRED; + } + ++static bool _log_error_deferred(unsigned int bank, u32 misc) ++{ ++ if (!_log_error_bank(bank, mca_msr_reg(bank, MCA_STATUS), ++ mca_msr_reg(bank, MCA_ADDR), misc)) ++ return false; ++ ++ /* ++ * Non-SMCA systems don't have MCA_DESTAT/MCA_DEADDR registers. ++ * Return true here to avoid accessing these registers. ++ */ ++ if (!mce_flags.smca) ++ return true; ++ ++ /* Clear MCA_DESTAT if the deferred error was logged from MCA_STATUS. */ ++ wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0); ++ return true; ++} ++ + /* + * We have three scenarios for checking for Deferred errors: + * +@@ -976,20 +994,9 @@ _log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc) + */ + static void log_error_deferred(unsigned int bank) + { +- bool defrd; +- +- defrd = _log_error_bank(bank, msr_ops.status(bank), +- msr_ops.addr(bank), 0); +- +- if (!mce_flags.smca) ++ if (_log_error_deferred(bank, 0)) + return; + +- /* Clear MCA_DESTAT if we logged the deferred error from MCA_STATUS. */ +- if (defrd) { +- wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0); +- return; +- } +- + /* + * Only deferred errors are logged in MCA_DE{STAT,ADDR} so just check + * for a valid error. +@@ -1009,7 +1016,7 @@ static void amd_deferred_error_interrupt(void) + + static void log_error_thresholding(unsigned int bank, u64 misc) + { +- _log_error_bank(bank, msr_ops.status(bank), msr_ops.addr(bank), misc); ++ _log_error_deferred(bank, misc); + } + + static void log_and_reset_block(struct threshold_block *block) +@@ -1397,7 +1404,7 @@ static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu, + } + } + +- err = allocate_threshold_blocks(cpu, b, bank, 0, msr_ops.misc(bank)); ++ err = allocate_threshold_blocks(cpu, b, bank, 0, mca_msr_reg(bank, MCA_MISC)); + if (err) + goto out_kobj; + +@@ -1470,10 +1477,23 @@ out_free: kfree(bank); } @@ -46717,7 +56640,7 @@ index 08831acc1d036..a873577e49dcc 100644 if (!bp) return 0; -@@ -1484,13 +1497,7 @@ int mce_threshold_remove_device(unsigned int cpu) +@@ -1484,13 +1504,7 @@ int mce_threshold_remove_device(unsigned int cpu) */ this_cpu_write(threshold_banks, NULL); @@ -46732,7 +56655,7 @@ index 08831acc1d036..a873577e49dcc 100644 return 0; } -@@ -1527,15 +1534,14 @@ int mce_threshold_create_device(unsigned int cpu) +@@ -1527,15 +1541,14 @@ int mce_threshold_create_device(unsigned int cpu) if (!(this_cpu_read(bank_map) & (1 << bank))) continue; err = threshold_create_bank(bp, cpu, bank); @@ -46785,10 +56708,80 @@ index 0e3ae64d3b76b..b08b90cdc2a3e 100644 if (severity >= GHES_SEV_RECOVERABLE) m.status |= MCI_STATUS_UC; diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c -index 193204aee8801..773037e5fd761 100644 +index 193204aee8801..5ee82fd386ddb 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c -@@ -295,11 +295,17 @@ static void wait_for_panic(void) +@@ -176,53 +176,27 @@ void mce_unregister_decode_chain(struct notifier_block *nb) + } + EXPORT_SYMBOL_GPL(mce_unregister_decode_chain); + +-static inline u32 ctl_reg(int bank) ++u32 mca_msr_reg(int bank, enum mca_msr reg) + { +- return MSR_IA32_MCx_CTL(bank); +-} +- +-static inline u32 status_reg(int bank) +-{ +- return MSR_IA32_MCx_STATUS(bank); +-} +- +-static inline u32 addr_reg(int bank) +-{ +- return MSR_IA32_MCx_ADDR(bank); +-} +- +-static inline u32 misc_reg(int bank) +-{ +- return MSR_IA32_MCx_MISC(bank); +-} +- +-static inline u32 smca_ctl_reg(int bank) +-{ +- return MSR_AMD64_SMCA_MCx_CTL(bank); +-} +- +-static inline u32 smca_status_reg(int bank) +-{ +- return MSR_AMD64_SMCA_MCx_STATUS(bank); +-} ++ if (mce_flags.smca) { ++ switch (reg) { ++ case MCA_CTL: return MSR_AMD64_SMCA_MCx_CTL(bank); ++ case MCA_ADDR: return MSR_AMD64_SMCA_MCx_ADDR(bank); ++ case MCA_MISC: return MSR_AMD64_SMCA_MCx_MISC(bank); ++ case MCA_STATUS: return MSR_AMD64_SMCA_MCx_STATUS(bank); ++ } ++ } + +-static inline u32 smca_addr_reg(int bank) +-{ +- return MSR_AMD64_SMCA_MCx_ADDR(bank); +-} ++ switch (reg) { ++ case MCA_CTL: return MSR_IA32_MCx_CTL(bank); ++ case MCA_ADDR: return MSR_IA32_MCx_ADDR(bank); ++ case MCA_MISC: return MSR_IA32_MCx_MISC(bank); ++ case MCA_STATUS: return MSR_IA32_MCx_STATUS(bank); ++ } + +-static inline u32 smca_misc_reg(int bank) +-{ +- return MSR_AMD64_SMCA_MCx_MISC(bank); ++ return 0; + } + +-struct mca_msr_regs msr_ops = { +- .ctl = ctl_reg, +- .status = status_reg, +- .addr = addr_reg, +- .misc = misc_reg +-}; +- + static void __print_mce(struct mce *m) + { + pr_emerg(HW_ERR "CPU %d: Machine Check%s: %Lx Bank %d: %016Lx\n", +@@ -295,11 +269,17 @@ static void wait_for_panic(void) panic("Panicing machine check CPU died"); } @@ -46808,7 +56801,7 @@ index 193204aee8801..773037e5fd761 100644 if (!fake_panic) { /* -@@ -314,7 +320,7 @@ static void mce_panic(const char *msg, struct mce *final, char *exp) +@@ -314,7 +294,7 @@ static void mce_panic(const char *msg, struct mce *final, char *exp) } else { /* Don't log too much for fake panic */ if (atomic_inc_return(&mce_fake_panicked) > 1) @@ -46817,7 +56810,7 @@ index 193204aee8801..773037e5fd761 100644 } pending = mce_gen_pool_prepare_records(); /* First print corrected ones that are still unlogged */ -@@ -352,6 +358,9 @@ static void mce_panic(const char *msg, struct mce *final, char *exp) +@@ -352,6 +332,9 @@ static void mce_panic(const char *msg, struct mce *final, char *exp) panic(msg); } else pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg); @@ -46827,7 +56820,21 @@ index 193204aee8801..773037e5fd761 100644 } /* Support code for software error injection */ -@@ -373,13 +382,16 @@ static int msr_to_offset(u32 msr) +@@ -362,24 +345,27 @@ static int msr_to_offset(u32 msr) + + if (msr == mca_cfg.rip_msr) + return offsetof(struct mce, ip); +- if (msr == msr_ops.status(bank)) ++ if (msr == mca_msr_reg(bank, MCA_STATUS)) + return offsetof(struct mce, status); +- if (msr == msr_ops.addr(bank)) ++ if (msr == mca_msr_reg(bank, MCA_ADDR)) + return offsetof(struct mce, addr); +- if (msr == msr_ops.misc(bank)) ++ if (msr == mca_msr_reg(bank, MCA_MISC)) + return offsetof(struct mce, misc); + if (msr == MSR_IA32_MCG_STATUS) + return offsetof(struct mce, mcgstatus); return -1; } @@ -46850,7 +56857,7 @@ index 193204aee8801..773037e5fd761 100644 show_stack_regs(regs); -@@ -387,8 +399,6 @@ __visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup, +@@ -387,8 +373,6 @@ __visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup, while (true) cpu_relax(); @@ -46859,7 +56866,7 @@ index 193204aee8801..773037e5fd761 100644 } /* MSR access wrappers used for error injection */ -@@ -420,32 +430,13 @@ static noinstr u64 mce_rdmsrl(u32 msr) +@@ -420,32 +404,13 @@ static noinstr u64 mce_rdmsrl(u32 msr) */ asm volatile("1: rdmsr\n" "2:\n" @@ -46893,7 +56900,7 @@ index 193204aee8801..773037e5fd761 100644 static noinstr void mce_wrmsrl(u32 msr, u64 v) { u32 low, high; -@@ -470,7 +461,7 @@ static noinstr void mce_wrmsrl(u32 msr, u64 v) +@@ -470,7 +435,7 @@ static noinstr void mce_wrmsrl(u32 msr, u64 v) /* See comment in mce_rdmsrl() */ asm volatile("1: wrmsr\n" "2:\n" @@ -46902,7 +56909,7 @@ index 193204aee8801..773037e5fd761 100644 : : "c" (msr), "a"(low), "d" (high) : "memory"); } -@@ -682,7 +673,7 @@ static struct notifier_block mce_default_nb = { +@@ -682,13 +647,13 @@ static struct notifier_block mce_default_nb = { /* * Read ADDR and MISC registers. */ @@ -46910,8 +56917,43 @@ index 193204aee8801..773037e5fd761 100644 +static noinstr void mce_read_aux(struct mce *m, int i) { if (m->status & MCI_STATUS_MISCV) - m->misc = mce_rdmsrl(msr_ops.misc(i)); -@@ -1072,10 +1063,13 @@ static int mce_start(int *no_way_out) +- m->misc = mce_rdmsrl(msr_ops.misc(i)); ++ m->misc = mce_rdmsrl(mca_msr_reg(i, MCA_MISC)); + + if (m->status & MCI_STATUS_ADDRV) { +- m->addr = mce_rdmsrl(msr_ops.addr(i)); ++ m->addr = mce_rdmsrl(mca_msr_reg(i, MCA_ADDR)); + + /* + * Mask the reported address by the reported granularity. +@@ -758,7 +723,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) + m.bank = i; + + barrier(); +- m.status = mce_rdmsrl(msr_ops.status(i)); ++ m.status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS)); + + /* If this entry is not valid, ignore it */ + if (!(m.status & MCI_STATUS_VAL)) +@@ -826,7 +791,7 @@ clear_it: + /* + * Clear state for this bank. + */ +- mce_wrmsrl(msr_ops.status(i), 0); ++ mce_wrmsrl(mca_msr_reg(i, MCA_STATUS), 0); + } + + /* +@@ -851,7 +816,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, + int i; + + for (i = 0; i < this_cpu_read(mce_num_banks); i++) { +- m->status = mce_rdmsrl(msr_ops.status(i)); ++ m->status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS)); + if (!(m->status & MCI_STATUS_VAL)) + continue; + +@@ -1072,10 +1037,13 @@ static int mce_start(int *no_way_out) * Synchronize between CPUs after main scanning loop. * This invokes the bulk of the Monarch processing. */ @@ -46927,7 +56969,7 @@ index 193204aee8801..773037e5fd761 100644 if (!timeout) goto reset; -@@ -1119,7 +1113,8 @@ static int mce_end(int order) +@@ -1119,7 +1087,8 @@ static int mce_end(int order) /* * Don't reset anything. That's done by the Monarch. */ @@ -46937,7 +56979,7 @@ index 193204aee8801..773037e5fd761 100644 } /* -@@ -1135,6 +1130,10 @@ reset: +@@ -1135,6 +1104,10 @@ reset: * Let others run again. */ atomic_set(&mce_executing, 0); @@ -46948,7 +56990,25 @@ index 193204aee8801..773037e5fd761 100644 return ret; } -@@ -1280,10 +1279,12 @@ static void kill_me_maybe(struct callback_head *cb) +@@ -1144,7 +1117,7 @@ static void mce_clear_state(unsigned long *toclear) + + for (i = 0; i < this_cpu_read(mce_num_banks); i++) { + if (test_bit(i, toclear)) +- mce_wrmsrl(msr_ops.status(i), 0); ++ mce_wrmsrl(mca_msr_reg(i, MCA_STATUS), 0); + } + } + +@@ -1203,7 +1176,7 @@ static void __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *fin + m->addr = 0; + m->bank = i; + +- m->status = mce_rdmsrl(msr_ops.status(i)); ++ m->status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS)); + if (!(m->status & MCI_STATUS_VAL)) + continue; + +@@ -1280,10 +1253,12 @@ static void kill_me_maybe(struct callback_head *cb) /* * -EHWPOISON from memory_failure() means that it already sent SIGBUS @@ -46964,7 +57024,7 @@ index 193204aee8801..773037e5fd761 100644 return; if (p->mce_vaddr != (void __user *)-1l) { -@@ -1454,6 +1455,14 @@ noinstr void do_machine_check(struct pt_regs *regs) +@@ -1454,6 +1429,14 @@ noinstr void do_machine_check(struct pt_regs *regs) if (worst != MCE_AR_SEVERITY && !kill_current_task) goto out; @@ -46979,7 +57039,7 @@ index 193204aee8801..773037e5fd761 100644 /* Fault was in user mode and we need to take some action */ if ((m.cs & 3) == 3) { /* If this triggers there is no way to recover. Die hard. */ -@@ -1479,6 +1488,9 @@ noinstr void do_machine_check(struct pt_regs *regs) +@@ -1479,6 +1462,9 @@ noinstr void do_machine_check(struct pt_regs *regs) if (m.kflags & MCE_IN_KERNEL_COPYIN) queue_task_work(&m, msg, kill_current_task); } @@ -46989,6 +57049,58 @@ index 193204aee8801..773037e5fd761 100644 out: mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); } +@@ -1687,8 +1673,8 @@ static void __mcheck_cpu_init_clear_banks(void) + + if (!b->init) + continue; +- wrmsrl(msr_ops.ctl(i), b->ctl); +- wrmsrl(msr_ops.status(i), 0); ++ wrmsrl(mca_msr_reg(i, MCA_CTL), b->ctl); ++ wrmsrl(mca_msr_reg(i, MCA_STATUS), 0); + } + } + +@@ -1714,7 +1700,7 @@ static void __mcheck_cpu_check_banks(void) + if (!b->init) + continue; + +- rdmsrl(msr_ops.ctl(i), msrval); ++ rdmsrl(mca_msr_reg(i, MCA_CTL), msrval); + b->init = !!msrval; + } + } +@@ -1871,13 +1857,6 @@ static void __mcheck_cpu_init_early(struct cpuinfo_x86 *c) + mce_flags.succor = !!cpu_has(c, X86_FEATURE_SUCCOR); + mce_flags.smca = !!cpu_has(c, X86_FEATURE_SMCA); + mce_flags.amd_threshold = 1; +- +- if (mce_flags.smca) { +- msr_ops.ctl = smca_ctl_reg; +- msr_ops.status = smca_status_reg; +- msr_ops.addr = smca_addr_reg; +- msr_ops.misc = smca_misc_reg; +- } + } + } + +@@ -2253,7 +2232,7 @@ static void mce_disable_error_reporting(void) + struct mce_bank *b = &mce_banks[i]; + + if (b->init) +- wrmsrl(msr_ops.ctl(i), 0); ++ wrmsrl(mca_msr_reg(i, MCA_CTL), 0); + } + return; + } +@@ -2605,7 +2584,7 @@ static void mce_reenable_cpu(void) + struct mce_bank *b = &mce_banks[i]; + + if (b->init) +- wrmsrl(msr_ops.ctl(i), b->ctl); ++ wrmsrl(mca_msr_reg(i, MCA_CTL), b->ctl); + } + } + diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 0bfc14041bbb4..b63b548497c14 100644 --- a/arch/x86/kernel/cpu/mce/inject.c @@ -47031,9 +57143,30 @@ index acfd5d9f93c68..baafbb37be678 100644 ((m->status & 0xa0000000ffffffff) == 0x80000000000f0005)) return true; diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h -index 88dcc79cfb07d..80dc94313bcfc 100644 +index 88dcc79cfb07d..760b57814760a 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h +@@ -168,14 +168,14 @@ struct mce_vendor_flags { + + extern struct mce_vendor_flags mce_flags; + +-struct mca_msr_regs { +- u32 (*ctl) (int bank); +- u32 (*status) (int bank); +- u32 (*addr) (int bank); +- u32 (*misc) (int bank); ++enum mca_msr { ++ MCA_CTL, ++ MCA_STATUS, ++ MCA_ADDR, ++ MCA_MISC, + }; + +-extern struct mca_msr_regs msr_ops; ++u32 mca_msr_reg(int bank, enum mca_msr reg); + + /* Decide whether to add MCE record to MCE event pool or filter it out. */ + extern bool filter_mce(struct mce *m); @@ -186,14 +186,4 @@ extern bool amd_filter_mce(struct mce *m); static inline bool amd_filter_mce(struct mce *m) { return false; }; #endif @@ -47167,6 +57300,32 @@ index efb69be41ab18..150ebfb8c12ed 100644 }; static int mc_cpu_starting(unsigned int cpu) +diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c +index 7e8e07bddd5fe..1ba590e6ef7bb 100644 +--- a/arch/x86/kernel/cpu/microcode/intel.c ++++ b/arch/x86/kernel/cpu/microcode/intel.c +@@ -659,7 +659,6 @@ void load_ucode_intel_ap(void) + else + iup = &intel_ucode_patch; + +-reget: + if (!*iup) { + patch = __load_ucode_intel(&uci); + if (!patch) +@@ -670,12 +669,7 @@ reget: + + uci.mc = *iup; + +- if (apply_microcode_early(&uci, true)) { +- /* Mixed-silicon system? Try to refetch the proper patch: */ +- *iup = NULL; +- +- goto reget; +- } ++ apply_microcode_early(&uci, true); + } + + static struct microcode_intel *find_patch(struct ucode_cpu_info *uci) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index e095c28d27ae8..ba0efc30fac52 100644 --- a/arch/x86/kernel/cpu/mshyperv.c @@ -47327,6 +57486,37 @@ index db813f819ad6c..4d8398986f784 100644 local_irq_enable(); out_hit: perf_event_release_kernel(hit_event); +diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c +index b57b3db9a6a78..4f5d79e658cd3 100644 +--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c ++++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c +@@ -580,8 +580,10 @@ static int __rdtgroup_move_task(struct task_struct *tsk, + /* + * Ensure the task's closid and rmid are written before determining if + * the task is current that will decide if it will be interrupted. ++ * This pairs with the full barrier between the rq->curr update and ++ * resctrl_sched_in() during context switch. + */ +- barrier(); ++ smp_mb(); + + /* + * By now, the task's closid and rmid are set. If the task is current +@@ -2363,6 +2365,14 @@ static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to, + WRITE_ONCE(t->closid, to->closid); + WRITE_ONCE(t->rmid, to->mon.rmid); + ++ /* ++ * Order the closid/rmid stores above before the loads ++ * in task_curr(). This pairs with the full barrier ++ * between the rq->curr update and resctrl_sched_in() ++ * during context switch. ++ */ ++ smp_mb(); ++ + /* + * If the task is on a CPU, set the CPU in the mask. + * The detection is inaccurate as tasks might move or diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 21d1f062895a8..06bfef1c4175e 100644 --- a/arch/x86/kernel/cpu/scattered.c @@ -47340,7 +57530,7 @@ index 21d1f062895a8..06bfef1c4175e 100644 { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c -index 001808e3901cc..19876ebfb5044 100644 +index 001808e3901cc..fa5777af8da1a 100644 --- a/arch/x86/kernel/cpu/sgx/encl.c +++ b/arch/x86/kernel/cpu/sgx/encl.c @@ -12,6 +12,116 @@ @@ -47534,16 +57724,46 @@ index 001808e3901cc..19876ebfb5044 100644 return ret; } -@@ -410,6 +552,8 @@ void sgx_encl_release(struct kref *ref) +@@ -391,11 +533,15 @@ const struct vm_operations_struct sgx_vm_ops = { + void sgx_encl_release(struct kref *ref) + { + struct sgx_encl *encl = container_of(ref, struct sgx_encl, refcount); ++ unsigned long max_page_index = PFN_DOWN(encl->base + encl->size - 1); + struct sgx_va_page *va_page; + struct sgx_encl_page *entry; +- unsigned long index; ++ unsigned long count = 0; + +- xa_for_each(&encl->page_array, index, entry) { ++ XA_STATE(xas, &encl->page_array, PFN_DOWN(encl->base)); ++ ++ xas_lock(&xas); ++ xas_for_each(&xas, entry, max_page_index) { + if (entry->epc_page) { + /* + * The page and its radix tree entry cannot be freed +@@ -410,7 +556,20 @@ void sgx_encl_release(struct kref *ref) } kfree(entry); -+ /* Invoke scheduler to prevent soft lockups. */ -+ cond_resched(); ++ /* ++ * Invoke scheduler on every XA_CHECK_SCHED iteration ++ * to prevent soft lockups. ++ */ ++ if (!(++count % XA_CHECK_SCHED)) { ++ xas_pause(&xas); ++ xas_unlock(&xas); ++ ++ cond_resched(); ++ ++ xas_lock(&xas); ++ } } ++ xas_unlock(&xas); xa_destroy(&encl->page_array); -@@ -574,10 +718,10 @@ static struct page *sgx_encl_get_backing_page(struct sgx_encl *encl, + +@@ -574,10 +733,10 @@ static struct page *sgx_encl_get_backing_page(struct sgx_encl *encl, * 0 on success, * -errno otherwise. */ @@ -47556,7 +57776,7 @@ index 001808e3901cc..19876ebfb5044 100644 struct page *contents; struct page *pcmd; -@@ -585,7 +729,7 @@ int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, +@@ -585,7 +744,7 @@ int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, if (IS_ERR(contents)) return PTR_ERR(contents); @@ -47565,7 +57785,7 @@ index 001808e3901cc..19876ebfb5044 100644 if (IS_ERR(pcmd)) { put_page(contents); return PTR_ERR(pcmd); -@@ -594,25 +738,118 @@ int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, +@@ -594,25 +753,118 @@ int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, backing->page_index = page_index; backing->contents = contents; backing->pcmd = pcmd; @@ -47660,7 +57880,7 @@ index 001808e3901cc..19876ebfb5044 100644 + + set_active_memcg(memcg); + mem_cgroup_put(encl_memcg); -+ + + return ret; +} + @@ -47684,7 +57904,7 @@ index 001808e3901cc..19876ebfb5044 100644 +{ + return sgx_encl_get_backing(encl, page_index, backing); +} - ++ +/** + * sgx_encl_put_backing() - Unpin the backing storage + * @backing: data for accessing backing storage for the page @@ -47716,6 +57936,57 @@ index fec43ca65065b..332ef3568267e 100644 int sgx_encl_test_and_clear_young(struct mm_struct *mm, struct sgx_encl_page *page); +diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c +index 83df20e3e6333..217777c029eea 100644 +--- a/arch/x86/kernel/cpu/sgx/ioctl.c ++++ b/arch/x86/kernel/cpu/sgx/ioctl.c +@@ -372,6 +372,29 @@ err_out_free: + return ret; + } + ++/* ++ * Ensure user provided offset and length values are valid for ++ * an enclave. ++ */ ++static int sgx_validate_offset_length(struct sgx_encl *encl, ++ unsigned long offset, ++ unsigned long length) ++{ ++ if (!IS_ALIGNED(offset, PAGE_SIZE)) ++ return -EINVAL; ++ ++ if (!length || !IS_ALIGNED(length, PAGE_SIZE)) ++ return -EINVAL; ++ ++ if (offset + length < offset) ++ return -EINVAL; ++ ++ if (offset + length - PAGE_SIZE >= encl->size) ++ return -EINVAL; ++ ++ return 0; ++} ++ + /** + * sgx_ioc_enclave_add_pages() - The handler for %SGX_IOC_ENCLAVE_ADD_PAGES + * @encl: an enclave pointer +@@ -425,14 +448,10 @@ static long sgx_ioc_enclave_add_pages(struct sgx_encl *encl, void __user *arg) + if (copy_from_user(&add_arg, arg, sizeof(add_arg))) + return -EFAULT; + +- if (!IS_ALIGNED(add_arg.offset, PAGE_SIZE) || +- !IS_ALIGNED(add_arg.src, PAGE_SIZE)) +- return -EINVAL; +- +- if (!add_arg.length || add_arg.length & (PAGE_SIZE - 1)) ++ if (!IS_ALIGNED(add_arg.src, PAGE_SIZE)) + return -EINVAL; + +- if (add_arg.offset + add_arg.length - PAGE_SIZE >= encl->size) ++ if (sgx_validate_offset_length(encl, add_arg.offset, add_arg.length)) + return -EINVAL; + + if (copy_from_user(&secinfo, (void __user *)add_arg.secinfo, diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index 63d3de02bbccb..4ea48acf55faa 100644 --- a/arch/x86/kernel/cpu/sgx/main.c @@ -47942,7 +58213,7 @@ index 132a2de44d2fe..5e868b62a7c4e 100644 * Reinit the apicid, now that we have extended initial_apicid. */ diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c -index 9c7a5f0492929..ec7bbac3a9f29 100644 +index 9c7a5f0492929..8009c8346d8f8 100644 --- a/arch/x86/kernel/cpu/tsx.c +++ b/arch/x86/kernel/cpu/tsx.c @@ -19,7 +19,7 @@ @@ -47963,16 +58234,32 @@ index 9c7a5f0492929..ec7bbac3a9f29 100644 { u64 tsx; -@@ -58,7 +58,7 @@ void tsx_enable(void) +@@ -58,24 +58,6 @@ void tsx_enable(void) wrmsrl(MSR_IA32_TSX_CTRL, tsx); } -static bool __init tsx_ctrl_is_supported(void) -+static bool tsx_ctrl_is_supported(void) +-{ +- u64 ia32_cap = x86_read_arch_cap_msr(); +- +- /* +- * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this +- * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES. +- * +- * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a +- * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES +- * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get +- * MSR_IA32_TSX_CTRL support even after a microcode update. Thus, +- * tsx= cmdline requests will do nothing on CPUs without +- * MSR_IA32_TSX_CTRL support. +- */ +- return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR); +-} +- + static enum tsx_ctrl_states x86_get_tsx_auto_mode(void) { - u64 ia32_cap = x86_read_arch_cap_msr(); - -@@ -84,7 +84,45 @@ static enum tsx_ctrl_states x86_get_tsx_auto_mode(void) + if (boot_cpu_has_bug(X86_BUG_TAA)) +@@ -84,7 +66,45 @@ static enum tsx_ctrl_states x86_get_tsx_auto_mode(void) return TSX_CTRL_ENABLE; } @@ -48019,11 +58306,11 @@ index 9c7a5f0492929..ec7bbac3a9f29 100644 { u64 msr; -@@ -97,6 +135,39 @@ void tsx_clear_cpuid(void) +@@ -97,6 +117,40 @@ void tsx_clear_cpuid(void) rdmsrl(MSR_TSX_FORCE_ABORT, msr); msr |= MSR_TFA_TSX_CPUID_CLEAR; wrmsrl(MSR_TSX_FORCE_ABORT, msr); -+ } else if (tsx_ctrl_is_supported()) { ++ } else if (cpu_feature_enabled(X86_FEATURE_MSR_TSX_CTRL)) { + rdmsrl(MSR_IA32_TSX_CTRL, msr); + msr |= TSX_CTRL_CPUID_CLEAR; + wrmsrl(MSR_IA32_TSX_CTRL, msr); @@ -48046,7 +58333,8 @@ index 9c7a5f0492929..ec7bbac3a9f29 100644 + u64 mcu_opt_ctrl; + + /* Check if RTM_ALLOW exists */ -+ if (!boot_cpu_has_bug(X86_BUG_TAA) || !tsx_ctrl_is_supported() || ++ if (!boot_cpu_has_bug(X86_BUG_TAA) || ++ !cpu_feature_enabled(X86_FEATURE_MSR_TSX_CTRL) || + !cpu_feature_enabled(X86_FEATURE_SRBDS_CTRL)) + return; + @@ -48059,7 +58347,7 @@ index 9c7a5f0492929..ec7bbac3a9f29 100644 } } -@@ -105,14 +176,14 @@ void __init tsx_init(void) +@@ -105,14 +159,14 @@ void __init tsx_init(void) char arg[5] = {}; int ret; @@ -48080,7 +58368,29 @@ index 9c7a5f0492929..ec7bbac3a9f29 100644 tsx_ctrl_state = TSX_CTRL_RTM_ALWAYS_ABORT; tsx_clear_cpuid(); setup_clear_cpu_cap(X86_FEATURE_RTM); -@@ -175,3 +246,16 @@ void __init tsx_init(void) +@@ -120,7 +174,20 @@ void __init tsx_init(void) + return; + } + +- if (!tsx_ctrl_is_supported()) { ++ /* ++ * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this ++ * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES. ++ * ++ * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a ++ * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES ++ * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get ++ * MSR_IA32_TSX_CTRL support even after a microcode update. Thus, ++ * tsx= cmdline requests will do nothing on CPUs without ++ * MSR_IA32_TSX_CTRL support. ++ */ ++ if (x86_read_arch_cap_msr() & ARCH_CAP_TSX_CTRL_MSR) { ++ setup_force_cpu_cap(X86_FEATURE_MSR_TSX_CTRL); ++ } else { + tsx_ctrl_state = TSX_CTRL_NOT_SUPPORTED; + return; + } +@@ -175,3 +242,16 @@ void __init tsx_init(void) setup_force_cpu_cap(X86_FEATURE_HLE); } } @@ -48097,6 +58407,44 @@ index 9c7a5f0492929..ec7bbac3a9f29 100644 + /* See comment over that function for more details. */ + tsx_clear_cpuid(); +} +diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c +index e8326a8d1c5dc..03a454d427c3d 100644 +--- a/arch/x86/kernel/crash.c ++++ b/arch/x86/kernel/crash.c +@@ -401,10 +401,8 @@ int crash_load_segments(struct kimage *image) + kbuf.buf_align = ELF_CORE_HEADER_ALIGN; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + ret = kexec_add_buffer(&kbuf); +- if (ret) { +- vfree((void *)image->elf_headers); ++ if (ret) + return ret; +- } + image->elf_load_addr = kbuf.mem; + pr_debug("Loaded ELF headers at 0x%lx bufsz=0x%lx memsz=0x%lx\n", + image->elf_load_addr, kbuf.bufsz, kbuf.bufsz); +diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c +index ea4fe192189d5..53de044e56540 100644 +--- a/arch/x86/kernel/dumpstack.c ++++ b/arch/x86/kernel/dumpstack.c +@@ -351,7 +351,7 @@ unsigned long oops_begin(void) + } + NOKPROBE_SYMBOL(oops_begin); + +-void __noreturn rewind_stack_do_exit(int signr); ++void __noreturn rewind_stack_and_make_dead(int signr); + + void oops_end(unsigned long flags, struct pt_regs *regs, int signr) + { +@@ -386,7 +386,7 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr) + * reuse the task stack and that existing poisons are invalid. + */ + kasan_unpoison_task_stack(current); +- rewind_stack_do_exit(signr); ++ rewind_stack_and_make_dead(signr); + } + NOKPROBE_SYMBOL(oops_end); + diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 5601b95944fae..6c5defd6569a3 100644 --- a/arch/x86/kernel/dumpstack_64.c @@ -48312,8 +58660,33 @@ index 7ada7bd03a327..759e1cef5e695 100644 } EXPORT_SYMBOL(irq_fpu_usable); +diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c +index 64e29927cc32f..c949424a11c19 100644 +--- a/arch/x86/kernel/fpu/init.c ++++ b/arch/x86/kernel/fpu/init.c +@@ -138,9 +138,6 @@ static void __init fpu__init_system_generic(void) + unsigned int fpu_kernel_xstate_size __ro_after_init; + EXPORT_SYMBOL_GPL(fpu_kernel_xstate_size); + +-/* Get alignment of the TYPE. */ +-#define TYPE_ALIGN(TYPE) offsetof(struct { char x; TYPE test; }, test) +- + /* + * Enforce that 'MEMBER' is the last field of 'TYPE'. + * +@@ -148,8 +145,8 @@ EXPORT_SYMBOL_GPL(fpu_kernel_xstate_size); + * because that's how C aligns structs. + */ + #define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER) \ +- BUILD_BUG_ON(sizeof(TYPE) != ALIGN(offsetofend(TYPE, MEMBER), \ +- TYPE_ALIGN(TYPE))) ++ BUILD_BUG_ON(sizeof(TYPE) != \ ++ ALIGN(offsetofend(TYPE, MEMBER), _Alignof(TYPE))) + + /* + * We append the 'struct fpu' to the task_struct: diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c -index 66ed317ebc0d3..125cbbe10fefa 100644 +index 66ed317ebc0d3..bd243ae57680e 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -87,11 +87,9 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, @@ -48342,8 +58715,17 @@ index 66ed317ebc0d3..125cbbe10fefa 100644 /* Mark FP and SSE as in use when XSAVE is enabled */ if (use_xsave()) +@@ -164,7 +163,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, + } + + fpu_force_restore(fpu); +- ret = copy_uabi_from_kernel_to_xstate(&fpu->state.xsave, kbuf ?: tmpbuf); ++ ret = copy_uabi_from_kernel_to_xstate(&fpu->state.xsave, kbuf ?: tmpbuf, &target->thread.pkru); + + out: + vfree(tmpbuf); diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c -index 831b25c5e7058..7f71bd4dcd0d6 100644 +index 831b25c5e7058..7f76cb099e66a 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -205,7 +205,7 @@ retry: @@ -48368,8 +58750,94 @@ index 831b25c5e7058..7f71bd4dcd0d6 100644 } /* +@@ -371,7 +370,7 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx, + fpregs_unlock(); + + if (use_xsave() && !fx_only) { +- ret = copy_sigframe_from_user_to_xstate(&fpu->state.xsave, buf_fx); ++ ret = copy_sigframe_from_user_to_xstate(tsk, buf_fx); + if (ret) + return ret; + } else { +diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c +index c8def1b7f8fba..8bbf37c0bebe2 100644 +--- a/arch/x86/kernel/fpu/xstate.c ++++ b/arch/x86/kernel/fpu/xstate.c +@@ -1091,8 +1091,31 @@ static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size, + } + + ++/** ++ * copy_uabi_to_xstate - Copy a UABI format buffer to the kernel xstate ++ * @fpstate: The fpstate buffer to copy to ++ * @kbuf: The UABI format buffer, if it comes from the kernel ++ * @ubuf: The UABI format buffer, if it comes from userspace ++ * @pkru: The location to write the PKRU value to ++ * ++ * Converts from the UABI format into the kernel internal hardware ++ * dependent format. ++ * ++ * This function ultimately has two different callers with distinct PKRU ++ * behavior. ++ * 1. When called from sigreturn the PKRU register will be restored from ++ * @fpstate via an XRSTOR. Correctly copying the UABI format buffer to ++ * @fpstate is sufficient to cover this case, but the caller will also ++ * pass a pointer to the thread_struct's pkru field in @pkru and updating ++ * it is harmless. ++ * 2. When called from ptrace the PKRU register will be restored from the ++ * thread_struct's pkru field. A pointer to that is passed in @pkru. ++ * The kernel will restore it manually, so the XRSTOR behavior that resets ++ * the PKRU register to the hardware init value (0) if the corresponding ++ * xfeatures bit is not set is emulated here. ++ */ + static int copy_uabi_to_xstate(struct xregs_state *xsave, const void *kbuf, +- const void __user *ubuf) ++ const void __user *ubuf, u32 *pkru) + { + unsigned int offset, size; + struct xstate_header hdr; +@@ -1140,6 +1163,14 @@ static int copy_uabi_to_xstate(struct xregs_state *xsave, const void *kbuf, + } + } + ++ if (hdr.xfeatures & XFEATURE_MASK_PKRU) { ++ struct pkru_state *xpkru; ++ ++ xpkru = __raw_xsave_addr(xsave, XFEATURE_PKRU); ++ *pkru = xpkru->pkru; ++ } else ++ *pkru = 0; ++ + /* + * The state that came in from userspace was user-state only. + * Mask all the user states out of 'xfeatures': +@@ -1159,9 +1190,9 @@ static int copy_uabi_to_xstate(struct xregs_state *xsave, const void *kbuf, + * format and copy to the target thread. This is called from + * xstateregs_set(). + */ +-int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) ++int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf, u32 *pkru) + { +- return copy_uabi_to_xstate(xsave, kbuf, NULL); ++ return copy_uabi_to_xstate(xsave, kbuf, NULL, pkru); + } + + /* +@@ -1169,10 +1200,10 @@ int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) + * XSAVE[S] format and copy to the target thread. This is called from the + * sigreturn() and rt_sigreturn() system calls. + */ +-int copy_sigframe_from_user_to_xstate(struct xregs_state *xsave, ++int copy_sigframe_from_user_to_xstate(struct task_struct *tsk, + const void __user *ubuf) + { +- return copy_uabi_to_xstate(xsave, NULL, ubuf); ++ return copy_uabi_to_xstate(&tsk->thread.fpu.state.xsave, NULL, ubuf, &tsk->thread.pkru); + } + + static bool validate_xsaves_xrstors(u64 mask) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c -index 1b3ce3b4a2a2f..b3c9ef01d6c09 100644 +index 1b3ce3b4a2a2f..4017da3a4c701 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -93,6 +93,7 @@ static int ftrace_verify_code(unsigned long ip, const char *old_code) @@ -48380,7 +58848,17 @@ index 1b3ce3b4a2a2f..b3c9ef01d6c09 100644 WARN_ON(1); return -EINVAL; } -@@ -308,7 +309,7 @@ union ftrace_op_code_union { +@@ -218,7 +219,9 @@ void ftrace_replace_code(int enable) + + ret = ftrace_verify_code(rec->ip, old); + if (ret) { ++ ftrace_expected = old; + ftrace_bug(ret, rec); ++ ftrace_expected = NULL; + return; + } + } +@@ -308,7 +311,7 @@ union ftrace_op_code_union { } __attribute__((packed)); }; @@ -48389,7 +58867,7 @@ index 1b3ce3b4a2a2f..b3c9ef01d6c09 100644 static unsigned long create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) -@@ -321,12 +322,12 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) +@@ -321,12 +324,12 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) unsigned long offset; unsigned long npages; unsigned long size; @@ -48403,7 +58881,7 @@ index 1b3ce3b4a2a2f..b3c9ef01d6c09 100644 union ftrace_op_code_union op_ptr; int ret; -@@ -366,10 +367,10 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) +@@ -366,10 +369,10 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) ip = trampoline + size; /* The trampoline ends with ret(q) */ @@ -48603,6 +59081,18 @@ index 882213df37130..71f336425e58a 100644 if (hpet_rtc_flags & RTC_UIE && curr_time.tm_sec != hpet_prev_update_sec) { +diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c +index 15aefa3f3e18e..f91e5e31aa4f0 100644 +--- a/arch/x86/kernel/i8259.c ++++ b/arch/x86/kernel/i8259.c +@@ -114,6 +114,7 @@ static void make_8259A_irq(unsigned int irq) + disable_irq_nosync(irq); + io_apic_irqs &= ~(1<<irq); + irq_set_chip_and_handler(irq, &i8259A_chip, handle_level_irq); ++ irq_set_status_flags(irq, IRQ_LEVEL); + enable_irq(irq); + lapic_assign_legacy_vector(irq, true); + } diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index e28f6a5d14f1b..766ffe3ba3137 100644 --- a/arch/x86/kernel/irq.c @@ -48631,6 +59121,22 @@ index 8ef35063964b1..b8db1022aa6ca 100644 + RET SYM_FUNC_END(native_save_fl) EXPORT_SYMBOL(native_save_fl) +diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c +index beb1bada1b0ab..c683666876f1c 100644 +--- a/arch/x86/kernel/irqinit.c ++++ b/arch/x86/kernel/irqinit.c +@@ -65,8 +65,10 @@ void __init init_ISA_irqs(void) + + legacy_pic->init(0); + +- for (i = 0; i < nr_legacy_irqs(); i++) ++ for (i = 0; i < nr_legacy_irqs(); i++) { + irq_set_chip_and_handler(i, chip, handle_level_irq); ++ irq_set_status_flags(i, IRQ_LEVEL); ++ } + } + + void __init init_IRQ(void) diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index 64b6da95af984..e2e89bebcbc32 100644 --- a/arch/x86/kernel/kdebugfs.c @@ -48697,10 +59203,37 @@ index 64b6da95af984..e2e89bebcbc32 100644 memunmap(data); no++; diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c -index b6e046e4b2895..6872f3834668d 100644 +index b6e046e4b2895..c4b618d0b16a0 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c -@@ -495,7 +495,7 @@ static void kprobe_emulate_jcc(struct kprobe *p, struct pt_regs *regs) +@@ -37,6 +37,7 @@ + #include <linux/extable.h> + #include <linux/kdebug.h> + #include <linux/kallsyms.h> ++#include <linux/kgdb.h> + #include <linux/ftrace.h> + #include <linux/kasan.h> + #include <linux/moduleloader.h> +@@ -289,12 +290,15 @@ static int can_probe(unsigned long paddr) + if (ret < 0) + return 0; + ++#ifdef CONFIG_KGDB + /* +- * Another debugging subsystem might insert this breakpoint. +- * In that case, we can't recover it. ++ * If there is a dynamically installed kgdb sw breakpoint, ++ * this function should not be probed. + */ +- if (insn.opcode.bytes[0] == INT3_INSN_OPCODE) ++ if (insn.opcode.bytes[0] == INT3_INSN_OPCODE && ++ kgdb_has_hit_break(addr)) + return 0; ++#endif + addr += insn.length; + } + +@@ -495,7 +499,7 @@ static void kprobe_emulate_jcc(struct kprobe *p, struct pt_regs *regs) match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^ ((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT); if (p->ainsn.jcc.type >= 0xe) @@ -48709,7 +59242,7 @@ index b6e046e4b2895..6872f3834668d 100644 } __kprobe_emulate_jmp(p, regs, (match && !invert) || (!match && invert)); } -@@ -816,16 +816,20 @@ NOKPROBE_SYMBOL(arch_prepare_kretprobe); +@@ -816,16 +820,20 @@ NOKPROBE_SYMBOL(arch_prepare_kretprobe); static void kprobe_post_process(struct kprobe *cur, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { @@ -48737,7 +59270,7 @@ index b6e046e4b2895..6872f3834668d 100644 } NOKPROBE_SYMBOL(kprobe_post_process); -@@ -1044,7 +1048,7 @@ asm( +@@ -1044,7 +1052,7 @@ asm( RESTORE_REGS_STRING " popfl\n" #endif @@ -48746,6 +59279,61 @@ index b6e046e4b2895..6872f3834668d 100644 ".size kretprobe_trampoline, .-kretprobe_trampoline\n" ); NOKPROBE_SYMBOL(kretprobe_trampoline); +diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c +index 71425ebba98a1..a9121073d9518 100644 +--- a/arch/x86/kernel/kprobes/opt.c ++++ b/arch/x86/kernel/kprobes/opt.c +@@ -15,6 +15,7 @@ + #include <linux/extable.h> + #include <linux/kdebug.h> + #include <linux/kallsyms.h> ++#include <linux/kgdb.h> + #include <linux/ftrace.h> + #include <linux/objtool.h> + #include <linux/pgtable.h> +@@ -272,19 +273,6 @@ static int insn_is_indirect_jump(struct insn *insn) + return ret; + } + +-static bool is_padding_int3(unsigned long addr, unsigned long eaddr) +-{ +- unsigned char ops; +- +- for (; addr < eaddr; addr++) { +- if (get_kernel_nofault(ops, (void *)addr) < 0 || +- ops != INT3_INSN_OPCODE) +- return false; +- } +- +- return true; +-} +- + /* Decode whole function to ensure any instructions don't jump into target */ + static int can_optimize(unsigned long paddr) + { +@@ -327,15 +315,15 @@ static int can_optimize(unsigned long paddr) + ret = insn_decode_kernel(&insn, (void *)recovered_insn); + if (ret < 0) + return 0; +- ++#ifdef CONFIG_KGDB + /* +- * In the case of detecting unknown breakpoint, this could be +- * a padding INT3 between functions. Let's check that all the +- * rest of the bytes are also INT3. ++ * If there is a dynamically installed kgdb sw breakpoint, ++ * this function should not be probed. + */ +- if (insn.opcode.bytes[0] == INT3_INSN_OPCODE) +- return is_padding_int3(addr, paddr - offset + size) ? 1 : 0; +- ++ if (insn.opcode.bytes[0] == INT3_INSN_OPCODE && ++ kgdb_has_hit_break(addr)) ++ return 0; ++#endif + /* Recover address */ + insn.kaddr = (void *)addr; + insn.next_byte = (void *)(addr + insn.length); diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c index d0a19121c6a4f..257892fcefa79 100644 --- a/arch/x86/kernel/ksysfs.c @@ -49165,7 +59753,7 @@ index 6b07faaa15798..23154d24b1173 100644 } device_initcall(register_e820_pmem); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c -index 1d9463e3096b6..7073764535256 100644 +index 1d9463e3096b6..bc9b4b93cf9bc 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -132,6 +132,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg, @@ -49181,7 +59769,7 @@ index 1d9463e3096b6..7073764535256 100644 if (updmsr) - wrmsrl(MSR_IA32_SPEC_CTRL, msr); -+ write_spec_ctrl_current(msr, false); ++ update_spec_ctrl_cond(msr); } static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) @@ -50082,7 +60670,7 @@ index 50a4515fe0ad1..9452dc9664b51 100644 unsigned int cpu, bool bootcpu) { diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c -index a1202536fc57c..3423aaea4ad85 100644 +index a1202536fc57c..8488966da5f19 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -93,22 +93,27 @@ static struct orc_entry *orc_find(unsigned long ip); @@ -50118,6 +60706,38 @@ index a1202536fc57c..3423aaea4ad85 100644 } #else static struct orc_entry *orc_ftrace_find(unsigned long ip) +@@ -695,7 +700,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, + /* Otherwise, skip ahead to the user-specified starting frame: */ + while (!unwind_done(state) && + (!on_stack(&state->stack_info, first_frame, sizeof(long)) || +- state->sp < (unsigned long)first_frame)) ++ state->sp <= (unsigned long)first_frame)) + unwind_next_frame(state); + + return; +diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c +index b63cf8f7745ee..6c07f6daaa227 100644 +--- a/arch/x86/kernel/uprobes.c ++++ b/arch/x86/kernel/uprobes.c +@@ -722,8 +722,9 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) + switch (opc1) { + case 0xeb: /* jmp 8 */ + case 0xe9: /* jmp 32 */ +- case 0x90: /* prefix* + nop; same as jmp with .offs = 0 */ + break; ++ case 0x90: /* prefix* + nop; same as jmp with .offs = 0 */ ++ goto setup; + + case 0xe8: /* call relative */ + branch_clear_offset(auprobe, insn); +@@ -753,6 +754,7 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) + return -ENOTSUPP; + } + ++setup: + auprobe->branch.opc1 = opc1; + auprobe->branch.ilen = insn->length; + auprobe->branch.offs = insn->immediate.value; diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S index 641f0fe1e5b4a..1258a5872d128 100644 --- a/arch/x86/kernel/verify_cpu.S @@ -50198,7 +60818,7 @@ index efd9e9ea17f25..c1efcd194ad7b 100644 * struct alt_inst entries. From the header (alternative.h): * "Alternative instructions for different CPU types or capabilities" diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c -index 751aa85a30012..d85a0808a446e 100644 +index 751aa85a30012..528437e3e2f3f 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -232,6 +232,25 @@ u64 kvm_vcpu_reserved_gpa_bits_raw(struct kvm_vcpu *vcpu) @@ -50259,11 +60879,11 @@ index 751aa85a30012..d85a0808a446e 100644 kvfree(e2); - return r; - } -- + - kvfree(vcpu->arch.cpuid_entries); - vcpu->arch.cpuid_entries = e2; - vcpu->arch.cpuid_nent = cpuid->nent; - +- - kvm_update_cpuid_runtime(vcpu); - kvm_vcpu_after_set_cpuid(vcpu); - @@ -50292,7 +60912,34 @@ index 751aa85a30012..d85a0808a446e 100644 kvm_cpu_cap_mask(CPUID_7_ECX, F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) | -@@ -716,13 +718,16 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) +@@ -565,16 +567,22 @@ struct kvm_cpuid_array { + int nent; + }; + ++static struct kvm_cpuid_entry2 *get_next_cpuid(struct kvm_cpuid_array *array) ++{ ++ if (array->nent >= array->maxnent) ++ return NULL; ++ ++ return &array->entries[array->nent++]; ++} ++ + static struct kvm_cpuid_entry2 *do_host_cpuid(struct kvm_cpuid_array *array, + u32 function, u32 index) + { +- struct kvm_cpuid_entry2 *entry; ++ struct kvm_cpuid_entry2 *entry = get_next_cpuid(array); + +- if (array->nent >= array->maxnent) ++ if (!entry) + return NULL; + +- entry = &array->entries[array->nent++]; +- + entry->function = function; + entry->index = index; + entry->flags = 0; +@@ -716,13 +724,16 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->edx = 0; } break; @@ -50311,6 +60958,78 @@ index 751aa85a30012..d85a0808a446e 100644 perf_get_x86_pmu_capability(&cap); /* +@@ -750,22 +761,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) + entry->edx = edx.full; + break; + } +- /* +- * Per Intel's SDM, the 0x1f is a superset of 0xb, +- * thus they can be handled by common code. +- */ + case 0x1f: + case 0xb: + /* +- * Populate entries until the level type (ECX[15:8]) of the +- * previous entry is zero. Note, CPUID EAX.{0x1f,0xb}.0 is +- * the starting entry, filled by the primary do_host_cpuid(). ++ * No topology; a valid topology is indicated by the presence ++ * of subleaf 1. + */ +- for (i = 1; entry->ecx & 0xff00; ++i) { +- entry = do_host_cpuid(array, function, i); +- if (!entry) +- goto out; +- } ++ entry->eax = entry->ebx = entry->ecx = 0; + break; + case 0xd: + entry->eax &= supported_xcr0; +@@ -897,11 +899,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) + entry->eax = min(entry->eax, 0x8000001f); + break; + case 0x80000001: ++ entry->ebx &= ~GENMASK(27, 16); + cpuid_entry_override(entry, CPUID_8000_0001_EDX); + cpuid_entry_override(entry, CPUID_8000_0001_ECX); + break; + case 0x80000006: +- /* L2 cache and TLB: pass through host info. */ ++ /* Drop reserved bits, pass host L2 cache and TLB info. */ ++ entry->edx &= ~GENMASK(17, 16); + break; + case 0x80000007: /* Advanced power management */ + /* invariant TSC is CPUID.80000007H:EDX[8] */ +@@ -931,6 +935,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) + g_phys_as = phys_as; + + entry->eax = g_phys_as | (virt_as << 8); ++ entry->ecx &= ~(GENMASK(31, 16) | GENMASK(11, 8)); + entry->edx = 0; + cpuid_entry_override(entry, CPUID_8000_0008_EBX); + break; +@@ -950,14 +955,21 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) + entry->ecx = entry->edx = 0; + break; + case 0x8000001a: ++ entry->eax &= GENMASK(2, 0); ++ entry->ebx = entry->ecx = entry->edx = 0; ++ break; + case 0x8000001e: ++ /* Do not return host topology information. */ ++ entry->eax = entry->ebx = entry->ecx = 0; ++ entry->edx = 0; /* reserved */ + break; + case 0x8000001F: + if (!kvm_cpu_cap_has(X86_FEATURE_SEV)) { + entry->eax = entry->ebx = entry->ecx = entry->edx = 0; + } else { + cpuid_entry_override(entry, CPUID_8000_001F_EAX); +- ++ /* Clear NumVMPL since KVM does not support VMPL. */ ++ entry->ebx &= ~GENMASK(31, 12); + /* + * Enumerate '0' for "PA bits reduction", the adjusted + * MAXPHYADDR is enumerated directly (see 0x80000008). diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c index 54a83a7445384..f33c804a922ac 100644 --- a/arch/x86/kvm/debugfs.c @@ -50326,7 +61045,7 @@ index 54a83a7445384..f33c804a922ac 100644 memset(log, 0, sizeof(log)); for (i = 0; i < KVM_NR_PAGE_SIZES; i++) { diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c -index 9a144ca8e1460..45d82a9501328 100644 +index 9a144ca8e1460..cb96e4354f317 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -187,9 +187,6 @@ @@ -50414,7 +61133,112 @@ index 9a144ca8e1460..45d82a9501328 100644 FOP_SETCC(seto) FOP_SETCC(setno) FOP_SETCC(setc) -@@ -1053,7 +1069,7 @@ static int em_bsr_c(struct x86_emulate_ctxt *ctxt) +@@ -779,8 +795,7 @@ static int linearize(struct x86_emulate_ctxt *ctxt, + ctxt->mode, linear); + } + +-static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst, +- enum x86emul_mode mode) ++static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst) + { + ulong linear; + int rc; +@@ -790,41 +805,71 @@ static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst, + + if (ctxt->op_bytes != sizeof(unsigned long)) + addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1); +- rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear); ++ rc = __linearize(ctxt, addr, &max_size, 1, false, true, ctxt->mode, &linear); + if (rc == X86EMUL_CONTINUE) + ctxt->_eip = addr.ea; + return rc; + } + ++static inline int emulator_recalc_and_set_mode(struct x86_emulate_ctxt *ctxt) ++{ ++ u64 efer; ++ struct desc_struct cs; ++ u16 selector; ++ u32 base3; ++ ++ ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); ++ ++ if (!(ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PE)) { ++ /* Real mode. cpu must not have long mode active */ ++ if (efer & EFER_LMA) ++ return X86EMUL_UNHANDLEABLE; ++ ctxt->mode = X86EMUL_MODE_REAL; ++ return X86EMUL_CONTINUE; ++ } ++ ++ if (ctxt->eflags & X86_EFLAGS_VM) { ++ /* Protected/VM86 mode. cpu must not have long mode active */ ++ if (efer & EFER_LMA) ++ return X86EMUL_UNHANDLEABLE; ++ ctxt->mode = X86EMUL_MODE_VM86; ++ return X86EMUL_CONTINUE; ++ } ++ ++ if (!ctxt->ops->get_segment(ctxt, &selector, &cs, &base3, VCPU_SREG_CS)) ++ return X86EMUL_UNHANDLEABLE; ++ ++ if (efer & EFER_LMA) { ++ if (cs.l) { ++ /* Proper long mode */ ++ ctxt->mode = X86EMUL_MODE_PROT64; ++ } else if (cs.d) { ++ /* 32 bit compatibility mode*/ ++ ctxt->mode = X86EMUL_MODE_PROT32; ++ } else { ++ ctxt->mode = X86EMUL_MODE_PROT16; ++ } ++ } else { ++ /* Legacy 32 bit / 16 bit mode */ ++ ctxt->mode = cs.d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; ++ } ++ ++ return X86EMUL_CONTINUE; ++} ++ + static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst) + { +- return assign_eip(ctxt, dst, ctxt->mode); ++ return assign_eip(ctxt, dst); + } + +-static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst, +- const struct desc_struct *cs_desc) ++static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst) + { +- enum x86emul_mode mode = ctxt->mode; +- int rc; ++ int rc = emulator_recalc_and_set_mode(ctxt); + +-#ifdef CONFIG_X86_64 +- if (ctxt->mode >= X86EMUL_MODE_PROT16) { +- if (cs_desc->l) { +- u64 efer = 0; ++ if (rc != X86EMUL_CONTINUE) ++ return rc; + +- ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); +- if (efer & EFER_LMA) +- mode = X86EMUL_MODE_PROT64; +- } else +- mode = X86EMUL_MODE_PROT32; /* temporary value */ +- } +-#endif +- if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32) +- mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; +- rc = assign_eip(ctxt, dst, mode); +- if (rc == X86EMUL_CONTINUE) +- ctxt->mode = mode; +- return rc; ++ return assign_eip(ctxt, dst); + } + + static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) +@@ -1053,7 +1098,7 @@ static int em_bsr_c(struct x86_emulate_ctxt *ctxt) static __always_inline u8 test_cc(unsigned int condition, unsigned long flags) { u8 rc; @@ -50423,7 +61247,7 @@ index 9a144ca8e1460..45d82a9501328 100644 flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF; asm("push %[flags]; popf; " CALL_NOSPEC -@@ -1614,11 +1630,6 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, +@@ -1614,11 +1659,6 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, goto exception; } @@ -50435,7 +61259,7 @@ index 9a144ca8e1460..45d82a9501328 100644 dpl = seg_desc.dpl; switch (seg) { -@@ -1658,12 +1669,6 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, +@@ -1658,12 +1698,6 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, case VCPU_SREG_TR: if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9)) goto exception; @@ -50448,7 +61272,7 @@ index 9a144ca8e1460..45d82a9501328 100644 break; case VCPU_SREG_LDTR: if (seg_desc.s || seg_desc.type != 2) -@@ -1682,6 +1687,11 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, +@@ -1682,6 +1716,11 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, break; } @@ -50460,7 +61284,7 @@ index 9a144ca8e1460..45d82a9501328 100644 if (seg_desc.s) { /* mark segment as accessed */ if (!(seg_desc.type & 1)) { -@@ -1696,8 +1706,17 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, +@@ -1696,8 +1735,17 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, if (ret != X86EMUL_CONTINUE) return ret; if (emul_is_noncanonical_address(get_desc_base(&seg_desc) | @@ -50480,7 +61304,7 @@ index 9a144ca8e1460..45d82a9501328 100644 } load: ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg); -@@ -1917,7 +1936,7 @@ static int em_pop_sreg(struct x86_emulate_ctxt *ctxt) +@@ -1917,7 +1965,7 @@ static int em_pop_sreg(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; @@ -50489,7 +61313,51 @@ index 9a144ca8e1460..45d82a9501328 100644 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS; if (ctxt->op_bytes > 2) rsp_increment(ctxt, ctxt->op_bytes - 2); -@@ -3510,8 +3529,10 @@ static int em_rdpid(struct x86_emulate_ctxt *ctxt) +@@ -2134,7 +2182,7 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt) + if (rc != X86EMUL_CONTINUE) + return rc; + +- rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc); ++ rc = assign_eip_far(ctxt, ctxt->src.val); + /* Error handling is not implemented. */ + if (rc != X86EMUL_CONTINUE) + return X86EMUL_UNHANDLEABLE; +@@ -2215,7 +2263,7 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) + &new_desc); + if (rc != X86EMUL_CONTINUE) + return rc; +- rc = assign_eip_far(ctxt, eip, &new_desc); ++ rc = assign_eip_far(ctxt, eip); + /* Error handling is not implemented. */ + if (rc != X86EMUL_CONTINUE) + return X86EMUL_UNHANDLEABLE; +@@ -2598,7 +2646,7 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt) + * those side effects need to be explicitly handled for both success + * and shutdown. + */ +- return X86EMUL_CONTINUE; ++ return emulator_recalc_and_set_mode(ctxt); + + emulate_shutdown: + ctxt->ops->triple_fault(ctxt); +@@ -2842,6 +2890,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) + ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); + + ctxt->_eip = rdx; ++ ctxt->mode = usermode; + *reg_write(ctxt, VCPU_REGS_RSP) = rcx; + + return X86EMUL_CONTINUE; +@@ -3438,7 +3487,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) + if (rc != X86EMUL_CONTINUE) + return rc; + +- rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc); ++ rc = assign_eip_far(ctxt, ctxt->src.val); + if (rc != X86EMUL_CONTINUE) + goto fail; + +@@ -3510,8 +3559,10 @@ static int em_rdpid(struct x86_emulate_ctxt *ctxt) { u64 tsc_aux = 0; @@ -50501,7 +61369,34 @@ index 9a144ca8e1460..45d82a9501328 100644 ctxt->dst.val = tsc_aux; return X86EMUL_CONTINUE; } -@@ -4101,6 +4122,9 @@ static int em_xsetbv(struct x86_emulate_ctxt *ctxt) +@@ -3578,11 +3629,25 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt) + + static int em_cr_write(struct x86_emulate_ctxt *ctxt) + { +- if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val)) ++ int cr_num = ctxt->modrm_reg; ++ int r; ++ ++ if (ctxt->ops->set_cr(ctxt, cr_num, ctxt->src.val)) + return emulate_gp(ctxt, 0); + + /* Disable writeback. */ + ctxt->dst.type = OP_NONE; ++ ++ if (cr_num == 0) { ++ /* ++ * CR0 write might have updated CR0.PE and/or CR0.PG ++ * which can affect the cpu's execution mode. ++ */ ++ r = emulator_recalc_and_set_mode(ctxt); ++ if (r != X86EMUL_CONTINUE) ++ return r; ++ } ++ + return X86EMUL_CONTINUE; + } + +@@ -4101,6 +4166,9 @@ static int em_xsetbv(struct x86_emulate_ctxt *ctxt) { u32 eax, ecx, edx; @@ -50950,7 +61845,7 @@ index e9688a9f7b579..7bb165c232334 100644 static __always_inline u64 rsvd_bits(int s, int e) { diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c -index 0cc58901bf7a7..ba1749a770eb1 100644 +index 0cc58901bf7a7..4724289c8a7f8 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1071,20 +1071,6 @@ static bool rmap_can_add(struct kvm_vcpu *vcpu) @@ -51039,7 +61934,15 @@ index 0cc58901bf7a7..ba1749a770eb1 100644 if (iterator->level == PT32E_ROOT_LEVEL) { /* -@@ -2718,7 +2709,6 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, +@@ -2366,6 +2357,7 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm, + { + bool list_unstable; + ++ lockdep_assert_held_write(&kvm->mmu_lock); + trace_kvm_mmu_prepare_zap_page(sp); + ++kvm->stat.mmu_shadow_zapped; + *nr_zapped = mmu_zap_unsync_children(kvm, sp, invalid_list); +@@ -2718,7 +2710,6 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, bool host_writable) { int was_rmapped = 0; @@ -51047,7 +61950,7 @@ index 0cc58901bf7a7..ba1749a770eb1 100644 int set_spte_ret; int ret = RET_PF_FIXED; bool flush = false; -@@ -2778,9 +2768,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, +@@ -2778,9 +2769,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, if (!was_rmapped) { kvm_update_page_stats(vcpu->kvm, level, 1); @@ -51058,7 +61961,7 @@ index 0cc58901bf7a7..ba1749a770eb1 100644 } return ret; -@@ -3314,6 +3302,8 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa, +@@ -3314,6 +3303,8 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa, return; sp = to_shadow_page(*root_hpa & PT64_BASE_ADDR_MASK); @@ -51067,7 +61970,7 @@ index 0cc58901bf7a7..ba1749a770eb1 100644 if (is_tdp_mmu_page(sp)) kvm_tdp_mmu_put_root(kvm, sp, false); -@@ -3579,7 +3569,7 @@ set_root_pgd: +@@ -3579,7 +3570,7 @@ set_root_pgd: out_unlock: write_unlock(&vcpu->kvm->mmu_lock); @@ -51076,7 +61979,7 @@ index 0cc58901bf7a7..ba1749a770eb1 100644 } static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu) -@@ -3889,12 +3879,23 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr) +@@ -3889,12 +3880,23 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr) walk_shadow_page_lockless_end(vcpu); } @@ -51101,7 +62004,7 @@ index 0cc58901bf7a7..ba1749a770eb1 100644 arch.gfn = gfn; arch.direct_map = vcpu->arch.mmu->direct_map; arch.cr3 = vcpu->arch.mmu->get_guest_pgd(vcpu); -@@ -3956,6 +3957,7 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, +@@ -3956,6 +3958,7 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, *pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL, write, writable, hva); @@ -51109,7 +62012,30 @@ index 0cc58901bf7a7..ba1749a770eb1 100644 out_retry: *r = RET_PF_RETRY; -@@ -4679,6 +4681,7 @@ static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu, +@@ -4005,16 +4008,17 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, + + if (!is_noslot_pfn(pfn) && mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, hva)) + goto out_unlock; +- r = make_mmu_pages_available(vcpu); +- if (r) +- goto out_unlock; + +- if (is_tdp_mmu_fault) ++ if (is_tdp_mmu_fault) { + r = kvm_tdp_mmu_map(vcpu, gpa, error_code, map_writable, max_level, + pfn, prefault); +- else ++ } else { ++ r = make_mmu_pages_available(vcpu); ++ if (r) ++ goto out_unlock; + r = __direct_map(vcpu, gpa, error_code, map_writable, max_level, pfn, + prefault, is_tdp); ++ } + + out_unlock: + if (is_tdp_mmu_fault) +@@ -4679,6 +4683,7 @@ static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu, /* PKEY and LA57 are active iff long mode is active. */ ext.cr4_pke = ____is_efer_lma(regs) && ____is_cr4_pke(regs); ext.cr4_la57 = ____is_efer_lma(regs) && ____is_cr4_la57(regs); @@ -51117,7 +62043,7 @@ index 0cc58901bf7a7..ba1749a770eb1 100644 } ext.valid = 1; -@@ -4851,7 +4854,7 @@ void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0, +@@ -4851,7 +4856,7 @@ void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0, struct kvm_mmu *context = &vcpu->arch.guest_mmu; struct kvm_mmu_role_regs regs = { .cr0 = cr0, @@ -51126,7 +62052,7 @@ index 0cc58901bf7a7..ba1749a770eb1 100644 .efer = efer, }; union kvm_mmu_role new_role; -@@ -4915,7 +4918,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, +@@ -4915,7 +4920,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, context->direct_map = false; update_permission_bitmask(context, true); @@ -51135,7 +62061,7 @@ index 0cc58901bf7a7..ba1749a770eb1 100644 reset_rsvds_bits_mask_ept(vcpu, context, execonly); reset_ept_shadow_zero_bits_mask(vcpu, context, execonly); } -@@ -5368,7 +5371,7 @@ void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, +@@ -5368,7 +5373,7 @@ void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) { @@ -51144,7 +62070,7 @@ index 0cc58901bf7a7..ba1749a770eb1 100644 ++vcpu->stat.invlpg; } EXPORT_SYMBOL_GPL(kvm_mmu_invlpg); -@@ -5381,14 +5384,16 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid) +@@ -5381,14 +5386,16 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid) uint i; if (pcid == kvm_get_active_pcid(vcpu)) { @@ -51163,7 +62089,7 @@ index 0cc58901bf7a7..ba1749a770eb1 100644 tlb_flush = true; } } -@@ -5473,8 +5478,8 @@ slot_handle_level(struct kvm *kvm, const struct kvm_memory_slot *memslot, +@@ -5473,8 +5480,8 @@ slot_handle_level(struct kvm *kvm, const struct kvm_memory_slot *memslot, } static __always_inline bool @@ -51174,7 +62100,7 @@ index 0cc58901bf7a7..ba1749a770eb1 100644 { return slot_handle_level(kvm, memslot, fn, PG_LEVEL_4K, PG_LEVEL_4K, flush_on_yield); -@@ -5575,6 +5580,7 @@ static void kvm_zap_obsolete_pages(struct kvm *kvm) +@@ -5575,6 +5582,7 @@ static void kvm_zap_obsolete_pages(struct kvm *kvm) { struct kvm_mmu_page *sp, *node; int nr_zapped, batch = 0; @@ -51182,7 +62108,7 @@ index 0cc58901bf7a7..ba1749a770eb1 100644 restart: list_for_each_entry_safe_reverse(sp, node, -@@ -5606,11 +5612,12 @@ restart: +@@ -5606,11 +5614,12 @@ restart: goto restart; } @@ -51199,7 +62125,7 @@ index 0cc58901bf7a7..ba1749a770eb1 100644 } /* -@@ -5758,13 +5765,11 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) +@@ -5758,13 +5767,11 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, gfn_start, gfn_end, flush); @@ -51215,7 +62141,7 @@ index 0cc58901bf7a7..ba1749a770eb1 100644 kvm_dec_notifier_count(kvm, gfn_start, gfn_end); -@@ -5856,21 +5861,21 @@ restart: +@@ -5856,21 +5863,21 @@ restart: void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, const struct kvm_memory_slot *slot) { @@ -51244,7 +62170,7 @@ index 0cc58901bf7a7..ba1749a770eb1 100644 read_unlock(&kvm->mmu_lock); } } -@@ -5897,8 +5902,11 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, +@@ -5897,8 +5904,11 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, if (kvm_memslots_have_rmaps(kvm)) { write_lock(&kvm->mmu_lock); @@ -51258,7 +62184,7 @@ index 0cc58901bf7a7..ba1749a770eb1 100644 write_unlock(&kvm->mmu_lock); } -@@ -6091,12 +6099,24 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) +@@ -6091,12 +6101,24 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) return 0; } @@ -51286,7 +62212,7 @@ index 0cc58901bf7a7..ba1749a770eb1 100644 /* * MMU roles use union aliasing which is, generally speaking, an -@@ -6168,7 +6188,7 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu) +@@ -6168,7 +6190,7 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu) mmu_free_memory_caches(vcpu); } @@ -52016,7 +62942,7 @@ index 8052d92069e01..3d3f8dfb80457 100644 void svm_vcpu_blocking(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c -index 510b833cbd399..78f1138753e65 100644 +index 510b833cbd399..e0b4f88b04b3e 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -275,7 +275,8 @@ static bool nested_vmcb_check_cr3_cr4(struct kvm_vcpu *vcpu, @@ -52039,10 +62965,23 @@ index 510b833cbd399..78f1138753e65 100644 rc = kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.vmcb12_gpa), &map); if (rc) { if (rc == -EINVAL) -@@ -942,9 +940,9 @@ void svm_free_nested(struct vcpu_svm *svm) - /* - * Forcibly leave nested mode in order to be able to reset the VCPU later on. - */ +@@ -921,6 +919,9 @@ void svm_free_nested(struct vcpu_svm *svm) + if (!svm->nested.initialized) + return; + ++ if (WARN_ON_ONCE(svm->vmcb != svm->vmcb01.ptr)) ++ svm_switch_vmcb(svm, &svm->vmcb01); ++ + svm_vcpu_free_msrpm(svm->nested.msrpm); + svm->nested.msrpm = NULL; + +@@ -939,12 +940,9 @@ void svm_free_nested(struct vcpu_svm *svm) + svm->nested.initialized = false; + } + +-/* +- * Forcibly leave nested mode in order to be able to reset the VCPU later on. +- */ -void svm_leave_nested(struct vcpu_svm *svm) +void svm_leave_nested(struct kvm_vcpu *vcpu) { @@ -52464,7 +63403,7 @@ index 7e34d7163adab..eeedcb3d40e89 100644 return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->ghcb_sa, count, in); } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c -index 989685098b3ea..49bb3db2761a7 100644 +index 989685098b3ea..0611dac70c25c 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -281,7 +281,7 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) @@ -52489,7 +63428,20 @@ index 989685098b3ea..49bb3db2761a7 100644 set_exception_intercept(svm, GP_VECTOR); } } -@@ -390,6 +394,10 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu) +@@ -313,12 +317,6 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) + return 0; + } + +-static int is_external_interrupt(u32 info) +-{ +- info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; +- return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR); +-} +- + static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu) + { + struct vcpu_svm *svm = to_svm(vcpu); +@@ -390,6 +388,10 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu) */ (void)skip_emulated_instruction(vcpu); rip = kvm_rip_read(vcpu); @@ -52500,7 +63452,59 @@ index 989685098b3ea..49bb3db2761a7 100644 svm->int3_rip = rip + svm->vmcb->save.cs.base; svm->int3_injected = rip - old_rip; } -@@ -1176,9 +1184,10 @@ static void init_vmcb(struct kvm_vcpu *vcpu) +@@ -463,11 +465,24 @@ static int has_svm(void) + return 1; + } + ++void __svm_write_tsc_multiplier(u64 multiplier) ++{ ++ preempt_disable(); ++ ++ if (multiplier == __this_cpu_read(current_tsc_ratio)) ++ goto out; ++ ++ wrmsrl(MSR_AMD64_TSC_RATIO, multiplier); ++ __this_cpu_write(current_tsc_ratio, multiplier); ++out: ++ preempt_enable(); ++} ++ + static void svm_hardware_disable(void) + { + /* Make sure we clean up behind us */ + if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) +- wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); ++ __svm_write_tsc_multiplier(TSC_RATIO_DEFAULT); + + cpu_svm_disable(); + +@@ -509,8 +524,11 @@ static int svm_hardware_enable(void) + wrmsrl(MSR_VM_HSAVE_PA, __sme_page_pa(sd->save_area)); + + if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { +- wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); +- __this_cpu_write(current_tsc_ratio, TSC_RATIO_DEFAULT); ++ /* ++ * Set the default value, even if we don't use TSC scaling ++ * to avoid having stale value in the msr ++ */ ++ __svm_write_tsc_multiplier(TSC_RATIO_DEFAULT); + } + + +@@ -1123,9 +1141,10 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) + + static void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier) + { +- wrmsrl(MSR_AMD64_TSC_RATIO, multiplier); ++ __svm_write_tsc_multiplier(multiplier); + } + ++ + /* Evaluate instruction intercepts that depend on guest CPUID features. */ + static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu, + struct vcpu_svm *svm) +@@ -1176,9 +1195,10 @@ static void init_vmcb(struct kvm_vcpu *vcpu) * Guest access to VMware backdoor ports could legitimately * trigger #GP because of TSS I/O permission bitmap. * We intercept those #GP and allow access to them anyway @@ -52513,7 +63517,31 @@ index 989685098b3ea..49bb3db2761a7 100644 set_exception_intercept(svm, GP_VECTOR); svm_set_intercept(svm, INTERCEPT_INTR); -@@ -1517,6 +1526,15 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) +@@ -1418,6 +1438,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) + */ + svm_clear_current_vmcb(svm->vmcb); + ++ svm_leave_nested(vcpu); + svm_free_nested(svm); + + sev_free_vcpu(vcpu); +@@ -1447,13 +1468,8 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu) + vmsave(__sme_page_pa(sd->save_area)); + } + +- if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { +- u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio; +- if (tsc_ratio != __this_cpu_read(current_tsc_ratio)) { +- __this_cpu_write(current_tsc_ratio, tsc_ratio); +- wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio); +- } +- } ++ if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) ++ __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio); + + if (likely(tsc_aux_uret_slot >= 0)) + kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull); +@@ -1517,6 +1533,15 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) to_svm(vcpu)->vmcb->save.rflags = rflags; } @@ -52529,7 +63557,7 @@ index 989685098b3ea..49bb3db2761a7 100644 static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) { switch (reg) { -@@ -1713,6 +1731,7 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) +@@ -1713,6 +1738,7 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { struct vcpu_svm *svm = to_svm(vcpu); u64 hcr0 = cr0; @@ -52537,7 +63565,7 @@ index 989685098b3ea..49bb3db2761a7 100644 #ifdef CONFIG_X86_64 if (vcpu->arch.efer & EFER_LME && !vcpu->arch.guest_state_protected) { -@@ -1729,8 +1748,11 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) +@@ -1729,8 +1755,11 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) #endif vcpu->arch.cr0 = cr0; @@ -52550,7 +63578,7 @@ index 989685098b3ea..49bb3db2761a7 100644 /* * re-enable caching here because the QEMU bios -@@ -1774,8 +1796,12 @@ void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +@@ -1774,8 +1803,12 @@ void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) svm_flush_tlb(vcpu); vcpu->arch.cr4 = cr4; @@ -52564,7 +63592,7 @@ index 989685098b3ea..49bb3db2761a7 100644 cr4 |= host_cr4_mce; to_svm(vcpu)->vmcb->save.cr4 = cr4; vmcb_mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); -@@ -2224,10 +2250,6 @@ static int gp_interception(struct kvm_vcpu *vcpu) +@@ -2224,10 +2257,6 @@ static int gp_interception(struct kvm_vcpu *vcpu) if (error_code) goto reinject; @@ -52575,7 +63603,7 @@ index 989685098b3ea..49bb3db2761a7 100644 /* Decode the instruction for usage later */ if (x86_decode_emulated_instruction(vcpu, 0, NULL, 0) != EMULATION_OK) goto reinject; -@@ -2245,8 +2267,13 @@ static int gp_interception(struct kvm_vcpu *vcpu) +@@ -2245,8 +2274,13 @@ static int gp_interception(struct kvm_vcpu *vcpu) if (!is_guest_mode(vcpu)) return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP | EMULTYPE_NO_DECODE); @@ -52590,7 +63618,54 @@ index 989685098b3ea..49bb3db2761a7 100644 reinject: kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); -@@ -3394,8 +3421,6 @@ static void svm_set_irq(struct kvm_vcpu *vcpu) +@@ -2639,9 +2673,9 @@ static int svm_get_msr_feature(struct kvm_msr_entry *msr) + msr->data = 0; + + switch (msr->index) { +- case MSR_F10H_DECFG: +- if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) +- msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE; ++ case MSR_AMD64_DE_CFG: ++ if (cpu_feature_enabled(X86_FEATURE_LFENCE_RDTSC)) ++ msr->data |= MSR_AMD64_DE_CFG_LFENCE_SERIALIZE; + break; + case MSR_IA32_PERF_CAPABILITIES: + return 0; +@@ -2750,7 +2784,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) + msr_info->data = 0x1E; + } + break; +- case MSR_F10H_DECFG: ++ case MSR_AMD64_DE_CFG: + msr_info->data = svm->msr_decfg; + break; + default: +@@ -2950,7 +2984,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) + case MSR_VM_IGNNE: + vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); + break; +- case MSR_F10H_DECFG: { ++ case MSR_AMD64_DE_CFG: { + struct kvm_msr_entry msr_entry; + + msr_entry.index = msr->index; +@@ -3332,15 +3366,6 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) + return 0; + } + +- if (is_external_interrupt(svm->vmcb->control.exit_int_info) && +- exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR && +- exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH && +- exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI) +- printk(KERN_ERR "%s: unexpected exit_int_info 0x%x " +- "exit_code 0x%x\n", +- __func__, svm->vmcb->control.exit_int_info, +- exit_code); +- + if (exit_fastpath != EXIT_FASTPATH_NONE) + return 1; + +@@ -3394,8 +3419,6 @@ static void svm_set_irq(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -52599,7 +63674,7 @@ index 989685098b3ea..49bb3db2761a7 100644 trace_kvm_inj_virq(vcpu->arch.interrupt.nr); ++vcpu->stat.irq_injections; -@@ -3485,14 +3510,7 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu) +@@ -3485,14 +3508,7 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu) if (!gif_set(svm)) return true; @@ -52615,7 +63690,7 @@ index 989685098b3ea..49bb3db2761a7 100644 /* As long as interrupts are being delivered... */ if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) ? !(svm->vmcb01.ptr->save.rflags & X86_EFLAGS_IF) -@@ -3503,7 +3521,7 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu) +@@ -3503,7 +3519,7 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu) if (nested_exit_on_intr(svm)) return false; } else { @@ -52624,7 +63699,7 @@ index 989685098b3ea..49bb3db2761a7 100644 return true; } -@@ -3666,6 +3684,18 @@ static void svm_complete_interrupts(struct kvm_vcpu *vcpu) +@@ -3666,6 +3682,18 @@ static void svm_complete_interrupts(struct kvm_vcpu *vcpu) vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK; type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK; @@ -52643,7 +63718,7 @@ index 989685098b3ea..49bb3db2761a7 100644 switch (type) { case SVM_EXITINTINFO_TYPE_NMI: vcpu->arch.nmi_injected = true; -@@ -3679,16 +3709,11 @@ static void svm_complete_interrupts(struct kvm_vcpu *vcpu) +@@ -3679,16 +3707,11 @@ static void svm_complete_interrupts(struct kvm_vcpu *vcpu) /* * In case of software exceptions, do not reinject the vector, @@ -52663,7 +63738,24 @@ index 989685098b3ea..49bb3db2761a7 100644 if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) { u32 err = svm->vmcb->control.exit_int_info_err; kvm_requeue_exception_e(vcpu, vector, err); -@@ -4247,6 +4272,8 @@ out: +@@ -3717,8 +3740,14 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu) + + static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) + { +- if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR && +- to_svm(vcpu)->vmcb->control.exit_info_1) ++ struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control; ++ ++ /* ++ * Note, the next RIP must be provided as SRCU isn't held, i.e. KVM ++ * can't read guest memory (dereference memslots) to decode the WRMSR. ++ */ ++ if (control->exit_code == SVM_EXIT_MSR && control->exit_info_1 && ++ nrips && control->next_rip) + return handle_fastpath_set_msr_irqoff(vcpu); + + return EXIT_FASTPATH_NONE; +@@ -4247,6 +4276,8 @@ out: static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu) { @@ -52672,7 +63764,7 @@ index 989685098b3ea..49bb3db2761a7 100644 } static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) -@@ -4376,10 +4403,17 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) +@@ -4376,10 +4407,17 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) * Enter the nested guest now */ @@ -52690,7 +63782,7 @@ index 989685098b3ea..49bb3db2761a7 100644 unmap_save: kvm_vcpu_unmap(vcpu, &map_save, true); unmap_map: -@@ -4405,8 +4439,13 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int i +@@ -4405,8 +4443,13 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int i bool smep, smap, is_user; unsigned long cr4; @@ -52705,7 +63797,7 @@ index 989685098b3ea..49bb3db2761a7 100644 */ if (sev_es_guest(vcpu->kvm)) return false; -@@ -4454,23 +4493,27 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int i +@@ -4454,23 +4497,27 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int i if (likely(!insn || insn_len)) return true; @@ -52744,7 +63836,7 @@ index 989685098b3ea..49bb3db2761a7 100644 } return false; -@@ -4562,6 +4605,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { +@@ -4562,6 +4609,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .cache_reg = svm_cache_reg, .get_rflags = svm_get_rflags, .set_rflags = svm_set_rflags, @@ -52752,7 +63844,7 @@ index 989685098b3ea..49bb3db2761a7 100644 .tlb_flush_all = svm_flush_tlb, .tlb_flush_current = svm_flush_tlb, -@@ -4592,7 +4636,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { +@@ -4592,7 +4640,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .load_eoi_exitmap = svm_load_eoi_exitmap, .hwapic_irr_update = svm_hwapic_irr_update, .hwapic_isr_update = svm_hwapic_isr_update, @@ -52760,7 +63852,7 @@ index 989685098b3ea..49bb3db2761a7 100644 .apicv_post_state_restore = avic_post_state_restore, .set_tss_addr = svm_set_tss_addr, -@@ -4635,6 +4678,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { +@@ -4635,6 +4682,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .mem_enc_op = svm_mem_enc_op, .mem_enc_reg_region = svm_register_enc_region, .mem_enc_unreg_region = svm_unregister_enc_region, @@ -52769,7 +63861,7 @@ index 989685098b3ea..49bb3db2761a7 100644 .vm_copy_enc_context_from = svm_vm_copy_asid_from, diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h -index 5d30db599e10d..7004f356edf94 100644 +index 5d30db599e10d..1d9b1a9e4398f 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -22,6 +22,8 @@ @@ -52790,7 +63882,15 @@ index 5d30db599e10d..7004f356edf94 100644 void svm_free_nested(struct vcpu_svm *svm); int svm_allocate_nested(struct vcpu_svm *svm); int nested_svm_vmrun(struct kvm_vcpu *vcpu); -@@ -497,7 +499,7 @@ extern struct kvm_x86_nested_ops svm_nested_ops; +@@ -485,6 +487,7 @@ int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, + int nested_svm_exit_special(struct vcpu_svm *svm); + void nested_load_control_from_vmcb12(struct vcpu_svm *svm, + struct vmcb_control_area *control); ++void __svm_write_tsc_multiplier(u64 multiplier); + void nested_sync_control_from_vmcb02(struct vcpu_svm *svm); + void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm); + void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb); +@@ -497,7 +500,7 @@ extern struct kvm_x86_nested_ops svm_nested_ops; #define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31 #define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31) @@ -52799,7 +63899,7 @@ index 5d30db599e10d..7004f356edf94 100644 #define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12) #define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62) #define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63) -@@ -553,6 +555,8 @@ int svm_register_enc_region(struct kvm *kvm, +@@ -553,6 +556,8 @@ int svm_register_enc_region(struct kvm *kvm, int svm_unregister_enc_region(struct kvm *kvm, struct kvm_enc_region *range); int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd); @@ -52874,6 +63974,44 @@ index 4fa17df123cd6..723f8534986c3 100644 3: cmpb $0, kvm_rebooting jne 2b +diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h +index 03ebe368333ef..c41506ed8c7dd 100644 +--- a/arch/x86/kvm/trace.h ++++ b/arch/x86/kvm/trace.h +@@ -355,25 +355,29 @@ TRACE_EVENT(kvm_inj_virq, + * Tracepoint for kvm interrupt injection: + */ + TRACE_EVENT(kvm_inj_exception, +- TP_PROTO(unsigned exception, bool has_error, unsigned error_code), +- TP_ARGS(exception, has_error, error_code), ++ TP_PROTO(unsigned exception, bool has_error, unsigned error_code, ++ bool reinjected), ++ TP_ARGS(exception, has_error, error_code, reinjected), + + TP_STRUCT__entry( + __field( u8, exception ) + __field( u8, has_error ) + __field( u32, error_code ) ++ __field( bool, reinjected ) + ), + + TP_fast_assign( + __entry->exception = exception; + __entry->has_error = has_error; + __entry->error_code = error_code; ++ __entry->reinjected = reinjected; + ), + +- TP_printk("%s (0x%x)", ++ TP_printk("%s (0x%x)%s", + __print_symbolic(__entry->exception, kvm_trace_sym_exc), + /* FIXME: don't print error_code if not present */ +- __entry->has_error ? __entry->error_code : 0) ++ __entry->has_error ? __entry->error_code : 0, ++ __entry->reinjected ? " [reinjected]" : "") + ); + + /* diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c index ba6f99f584ac3..a7ed30d5647af 100644 --- a/arch/x86/kvm/vmx/evmcs.c @@ -52902,7 +64040,7 @@ index 152ab0aa82cf6..b43976e4b9636 100644 #define EVMCS1_UNSUPPORTED_VMFUNC (VMX_VMFUNC_EPTP_SWITCHING) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c -index eedcebf580041..91b182fafb43d 100644 +index eedcebf580041..f3c136548af69 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -523,29 +523,6 @@ static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu, @@ -53503,7 +64641,7 @@ index eedcebf580041..91b182fafb43d 100644 /* * VCPU_EXREG_PDPTR will be clobbered in arch/x86/kvm/vmx/vmx.h between -@@ -4567,6 +4604,17 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, +@@ -4567,8 +4604,30 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, WARN_ON_ONCE(nested_early_check); } @@ -53520,8 +64658,21 @@ index eedcebf580041..91b182fafb43d 100644 + vmx_switch_vmcs(vcpu, &vmx->vmcs01); ++ /* ++ * If IBRS is advertised to the vCPU, KVM must flush the indirect ++ * branch predictors when transitioning from L2 to L1, as L1 expects ++ * hardware (KVM in this case) to provide separate predictor modes. ++ * Bare metal isolates VMX root (host) from VMX non-root (guest), but ++ * doesn't isolate different VMCSs, i.e. in this case, doesn't provide ++ * separate modes for L2 vs L1. ++ */ ++ if (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) ++ indirect_branch_prediction_barrier(); ++ /* Update any VMCS fields that might have changed while L2 ran */ -@@ -4603,6 +4651,11 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); + vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); +@@ -4603,6 +4662,11 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); } @@ -53533,7 +64684,7 @@ index eedcebf580041..91b182fafb43d 100644 if ((vm_exit_reason != -1) && (enable_shadow_vmcs || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))) vmx->nested.need_vmcs12_to_shadow_sync = true; -@@ -4917,20 +4970,25 @@ static int handle_vmon(struct kvm_vcpu *vcpu) +@@ -4917,20 +4981,36 @@ static int handle_vmon(struct kvm_vcpu *vcpu) | FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX; /* @@ -53544,31 +64695,79 @@ index eedcebf580041..91b182fafb43d 100644 - * have already been checked by hardware, prior to the VM-exit for - * VMXON. We do test guest cr4.VMXE because processor CR4 always has - * that bit set to 1 in non-root mode. -+ * Note, KVM cannot rely on hardware to perform the CR0/CR4 #UD checks -+ * that have higher priority than VM-Exit (see Intel SDM's pseudocode -+ * for VMXON), as KVM must load valid CR0/CR4 values into hardware while -+ * running the guest, i.e. KVM needs to check the _guest_ values. ++ * Manually check CR4.VMXE checks, KVM must force CR4.VMXE=1 to enter ++ * the guest and so cannot rely on hardware to perform the check, ++ * which has higher priority than VM-Exit (see Intel SDM's pseudocode ++ * for VMXON). + * -+ * Rely on hardware for the other two pre-VM-Exit checks, !VM86 and -+ * !COMPATIBILITY modes. KVM may run the guest in VM86 to emulate Real -+ * Mode, but KVM will never take the guest out of those modes. ++ * Rely on hardware for the other pre-VM-Exit checks, CR0.PE=1, !VM86 ++ * and !COMPATIBILITY modes. For an unrestricted guest, KVM doesn't ++ * force any of the relevant guest state. For a restricted guest, KVM ++ * does force CR0.PE=1, but only to also force VM86 in order to emulate ++ * Real Mode, and so there's no need to check CR0.PE manually. */ -- if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) { -+ if (!nested_host_cr0_valid(vcpu, kvm_read_cr0(vcpu)) || -+ !nested_host_cr4_valid(vcpu, kvm_read_cr4(vcpu))) { + if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) { kvm_queue_exception(vcpu, UD_VECTOR); return 1; } - /* CPL=0 must be checked manually. */ + /* -+ * CPL=0 and all other checks that are lower priority than VM-Exit must -+ * be checked manually. ++ * The CPL is checked for "not in VMX operation" and for "in VMX root", ++ * and has higher priority than the VM-Fail due to being post-VMXON, ++ * i.e. VMXON #GPs outside of VMX non-root if CPL!=0. In VMX non-root, ++ * VMXON causes VM-Exit and KVM unconditionally forwards VMXON VM-Exits ++ * from L2 to L1, i.e. there's no need to check for the vCPU being in ++ * VMX non-root. ++ * ++ * Forwarding the VM-Exit unconditionally, i.e. without performing the ++ * #UD checks (see above), is functionally ok because KVM doesn't allow ++ * L1 to run L2 without CR4.VMXE=0, and because KVM never modifies L2's ++ * CR0 or CR4, i.e. it's L2's responsibility to emulate #UDs that are ++ * missed by hardware due to shadowing CR0 and/or CR4. + */ if (vmx_get_cpl(vcpu)) { kvm_inject_gp(vcpu, 0); return 1; -@@ -6697,6 +6755,9 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps) +@@ -4939,6 +5019,17 @@ static int handle_vmon(struct kvm_vcpu *vcpu) + if (vmx->nested.vmxon) + return nested_vmx_fail(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION); + ++ /* ++ * Invalid CR0/CR4 generates #GP. These checks are performed if and ++ * only if the vCPU isn't already in VMX operation, i.e. effectively ++ * have lower priority than the VM-Fail above. ++ */ ++ if (!nested_host_cr0_valid(vcpu, kvm_read_cr0(vcpu)) || ++ !nested_host_cr4_valid(vcpu, kvm_read_cr4(vcpu))) { ++ kvm_inject_gp(vcpu, 0); ++ return 1; ++ } ++ + if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES) + != VMXON_NEEDED_FEATURES) { + kvm_inject_gp(vcpu, 0); +@@ -6218,9 +6309,6 @@ out: + return kvm_state.size; + } + +-/* +- * Forcibly leave nested mode in order to be able to reset the VCPU later on. +- */ + void vmx_leave_nested(struct kvm_vcpu *vcpu) + { + if (is_guest_mode(vcpu)) { +@@ -6589,7 +6677,8 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps) + SECONDARY_EXEC_ENABLE_INVPCID | + SECONDARY_EXEC_RDSEED_EXITING | + SECONDARY_EXEC_XSAVES | +- SECONDARY_EXEC_TSC_SCALING; ++ SECONDARY_EXEC_TSC_SCALING | ++ SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; + + /* + * We can emulate "VMCS shadowing," even if the hardware +@@ -6697,6 +6786,9 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps) rdmsrl(MSR_IA32_VMX_CR0_FIXED1, msrs->cr0_fixed1); rdmsrl(MSR_IA32_VMX_CR4_FIXED1, msrs->cr4_fixed1); @@ -53578,7 +64777,7 @@ index eedcebf580041..91b182fafb43d 100644 msrs->vmcs_enum = nested_vmx_calc_vmcs_enum_msr(); } -@@ -6750,6 +6811,7 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)) +@@ -6750,6 +6842,7 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)) } struct kvm_x86_nested_ops vmx_nested_ops = { @@ -53884,6 +65083,22 @@ index 0000000000000..edc3f16cc1896 +#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1) + +#endif /* __KVM_X86_VMX_RUN_FLAGS_H */ +diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c +index 6693ebdc07701..b8cf9a59c145e 100644 +--- a/arch/x86/kvm/vmx/sgx.c ++++ b/arch/x86/kvm/vmx/sgx.c +@@ -188,8 +188,10 @@ static int __handle_encls_ecreate(struct kvm_vcpu *vcpu, + /* Enforce CPUID restriction on max enclave size. */ + max_size_log2 = (attributes & SGX_ATTR_MODE64BIT) ? sgx_12_0->edx >> 8 : + sgx_12_0->edx; +- if (size >= BIT_ULL(max_size_log2)) ++ if (size >= BIT_ULL(max_size_log2)) { + kvm_inject_gp(vcpu, 0); ++ return 1; ++ } + + /* + * sgx_virt_ecreate() returns: diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h index 6e5de2e2b0da6..4de2a6e3b1900 100644 --- a/arch/x86/kvm/vmx/vmcs.h @@ -54167,7 +65382,7 @@ index 3a6461694fc25..982138bebb70f 100644 + RET SYM_FUNC_END(vmx_do_interrupt_nmi_irqoff) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c -index 7d595effb66f0..ba457167ae8a2 100644 +index 7d595effb66f0..c849173b60c27 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -226,6 +226,9 @@ static const struct { @@ -54285,7 +65500,20 @@ index 7d595effb66f0..ba457167ae8a2 100644 } static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, -@@ -1370,6 +1433,11 @@ void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) +@@ -1269,8 +1332,10 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu, + + /* + * No indirect branch prediction barrier needed when switching +- * the active VMCS within a guest, e.g. on nested VM-Enter. +- * The L1 VMM can protect itself with retpolines, IBPB or IBRS. ++ * the active VMCS within a vCPU, unless IBRS is advertised to ++ * the vCPU. To minimize the number of IBPBs executed, KVM ++ * performs IBPB on nested VM-Exit (a single nested transition ++ * may switch the active VMCS multiple times). + */ + if (!buddy || WARN_ON_ONCE(buddy->vmcs != prev)) + indirect_branch_prediction_barrier(); +@@ -1370,6 +1435,11 @@ void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) vmx->emulation_required = vmx_emulation_required(vcpu); } @@ -54297,7 +65525,7 @@ index 7d595effb66f0..ba457167ae8a2 100644 u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu) { u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); -@@ -1608,7 +1676,17 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu) +@@ -1608,7 +1678,17 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu) kvm_deliver_exception_payload(vcpu); if (has_error_code) { @@ -54316,7 +65544,7 @@ index 7d595effb66f0..ba457167ae8a2 100644 intr_info |= INTR_INFO_DELIVER_CODE_MASK; } -@@ -2234,6 +2312,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) +@@ -2234,6 +2314,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ret = kvm_set_msr_common(vcpu, msr_info); } @@ -54327,7 +65555,7 @@ index 7d595effb66f0..ba457167ae8a2 100644 return ret; } -@@ -2927,6 +3009,13 @@ static void vmx_flush_tlb_all(struct kvm_vcpu *vcpu) +@@ -2927,6 +3011,13 @@ static void vmx_flush_tlb_all(struct kvm_vcpu *vcpu) } } @@ -54341,7 +65569,7 @@ index 7d595effb66f0..ba457167ae8a2 100644 static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu) { struct kvm_mmu *mmu = vcpu->arch.mmu; -@@ -2939,31 +3028,29 @@ static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu) +@@ -2939,31 +3030,29 @@ static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu) if (enable_ept) ept_sync_context(construct_eptp(vcpu, root_hpa, mmu->shadow_root_level)); @@ -54380,7 +65608,7 @@ index 7d595effb66f0..ba457167ae8a2 100644 } void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu) -@@ -3135,8 +3222,8 @@ static bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +@@ -3135,8 +3224,8 @@ static bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { /* * We operate under the default treatment of SMM, so VMX cannot be @@ -54391,7 +65619,35 @@ index 7d595effb66f0..ba457167ae8a2 100644 */ if ((cr4 & X86_CR4_VMXE) && is_smm(vcpu)) return false; -@@ -3695,46 +3782,6 @@ void free_vpid(int vpid) +@@ -3274,18 +3363,15 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var) + { + u32 ar; + +- if (var->unusable || !var->present) +- ar = 1 << 16; +- else { +- ar = var->type & 15; +- ar |= (var->s & 1) << 4; +- ar |= (var->dpl & 3) << 5; +- ar |= (var->present & 1) << 7; +- ar |= (var->avl & 1) << 12; +- ar |= (var->l & 1) << 13; +- ar |= (var->db & 1) << 14; +- ar |= (var->g & 1) << 15; +- } ++ ar = var->type & 15; ++ ar |= (var->s & 1) << 4; ++ ar |= (var->dpl & 3) << 5; ++ ar |= (var->present & 1) << 7; ++ ar |= (var->avl & 1) << 12; ++ ar |= (var->l & 1) << 13; ++ ar |= (var->db & 1) << 14; ++ ar |= (var->g & 1) << 15; ++ ar |= (var->unusable || !var->present) << 16; + + return ar; + } +@@ -3695,46 +3781,6 @@ void free_vpid(int vpid) spin_unlock(&vmx_vpid_lock); } @@ -54438,7 +65694,7 @@ index 7d595effb66f0..ba457167ae8a2 100644 void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type) { struct vcpu_vmx *vmx = to_vmx(vcpu); -@@ -4012,8 +4059,7 @@ static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) +@@ -4012,8 +4058,7 @@ static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) if (pi_test_and_set_on(&vmx->pi_desc)) return 0; @@ -54448,7 +65704,7 @@ index 7d595effb66f0..ba457167ae8a2 100644 kvm_vcpu_kick(vcpu); return 0; -@@ -4140,6 +4186,11 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) +@@ -4140,6 +4185,11 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -54460,7 +65716,7 @@ index 7d595effb66f0..ba457167ae8a2 100644 pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx)); if (cpu_has_secondary_exec_ctrls()) { if (kvm_vcpu_apicv_active(vcpu)) -@@ -4487,6 +4538,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) +@@ -4487,6 +4537,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); vpid_sync_context(vmx->vpid); @@ -54469,7 +65725,7 @@ index 7d595effb66f0..ba457167ae8a2 100644 } static void vmx_enable_irq_window(struct kvm_vcpu *vcpu) -@@ -4833,8 +4886,33 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) +@@ -4833,8 +4885,33 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) dr6 = vmx_get_exit_qual(vcpu); if (!(vcpu->guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { @@ -54503,7 +65759,7 @@ index 7d595effb66f0..ba457167ae8a2 100644 kvm_queue_exception_p(vcpu, DB_VECTOR, dr6); return 1; -@@ -5907,18 +5985,14 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) +@@ -5907,18 +5984,14 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) vmx_flush_pml_buffer(vcpu); /* @@ -54526,7 +65782,7 @@ index 7d595effb66f0..ba457167ae8a2 100644 if (is_guest_mode(vcpu)) { /* * PML is never enabled when running L2, bail immediately if a -@@ -5940,10 +6014,30 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) +@@ -5940,10 +6013,30 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) */ nested_mark_vmcs12_pages_dirty(vcpu); @@ -54557,7 +65813,7 @@ index 7d595effb66f0..ba457167ae8a2 100644 if (exit_reason.failed_vmentry) { dump_vmcs(vcpu); vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; -@@ -6288,9 +6382,9 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) +@@ -6288,9 +6381,9 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); int max_irr; @@ -54569,7 +65825,7 @@ index 7d595effb66f0..ba457167ae8a2 100644 return -EIO; if (pi_test_on(&vmx->pi_desc)) { -@@ -6300,22 +6394,33 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) +@@ -6300,22 +6393,33 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) * But on x86 this is just a compiler barrier anyway. */ smp_mb__after_atomic(); @@ -54615,7 +65871,7 @@ index 7d595effb66f0..ba457167ae8a2 100644 return max_irr; } -@@ -6375,6 +6480,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) +@@ -6375,6 +6479,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) return; handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc)); @@ -54623,7 +65879,7 @@ index 7d595effb66f0..ba457167ae8a2 100644 } static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) -@@ -6576,6 +6682,31 @@ void noinstr vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) +@@ -6576,6 +6681,31 @@ void noinstr vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) } } @@ -54655,7 +65911,7 @@ index 7d595effb66f0..ba457167ae8a2 100644 static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) { switch (to_vmx(vcpu)->exit_reason.basic) { -@@ -6589,7 +6720,8 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) +@@ -6589,7 +6719,8 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) } static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, @@ -54665,7 +65921,7 @@ index 7d595effb66f0..ba457167ae8a2 100644 { kvm_guest_enter_irqoff(); -@@ -6598,15 +6730,22 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, +@@ -6598,15 +6729,22 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, vmx_l1d_flush(vcpu); else if (static_branch_unlikely(&mds_user_clear)) mds_clear_cpu_buffers(); @@ -54689,7 +65945,7 @@ index 7d595effb66f0..ba457167ae8a2 100644 kvm_guest_exit_irqoff(); } -@@ -6626,9 +6765,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) +@@ -6626,9 +6764,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) * consistency check VM-Exit due to invalid guest state and bail. */ if (unlikely(vmx->emulation_required)) { @@ -54700,7 +65956,7 @@ index 7d595effb66f0..ba457167ae8a2 100644 vmx->exit_reason.full = EXIT_REASON_INVALID_STATE; vmx->exit_reason.failed_vmentry = 1; -@@ -6703,27 +6840,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) +@@ -6703,27 +6839,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); /* The actual VMENTER/EXIT is in the .noinstr.text section. */ @@ -54729,7 +65985,7 @@ index 7d595effb66f0..ba457167ae8a2 100644 /* All fields are clean at this point */ if (static_branch_unlikely(&enable_evmcs)) { -@@ -7524,6 +7641,7 @@ static int vmx_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) +@@ -7524,6 +7640,7 @@ static int vmx_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) if (ret) return ret; @@ -54737,7 +65993,7 @@ index 7d595effb66f0..ba457167ae8a2 100644 vmx->nested.smm.guest_mode = false; } return 0; -@@ -7551,6 +7669,8 @@ static void vmx_migrate_timers(struct kvm_vcpu *vcpu) +@@ -7551,6 +7668,8 @@ static void vmx_migrate_timers(struct kvm_vcpu *vcpu) static void hardware_unsetup(void) { @@ -54746,7 +66002,7 @@ index 7d595effb66f0..ba457167ae8a2 100644 if (nested) nested_vmx_hardware_unsetup(); -@@ -7606,6 +7726,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { +@@ -7606,6 +7725,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .cache_reg = vmx_cache_reg, .get_rflags = vmx_get_rflags, .set_rflags = vmx_set_rflags, @@ -54754,7 +66010,19 @@ index 7d595effb66f0..ba457167ae8a2 100644 .tlb_flush_all = vmx_flush_tlb_all, .tlb_flush_current = vmx_flush_tlb_current, -@@ -7809,10 +7930,10 @@ static __init int hardware_setup(void) +@@ -7781,6 +7901,11 @@ static __init int hardware_setup(void) + if (!cpu_has_virtual_nmis()) + enable_vnmi = 0; + ++#ifdef CONFIG_X86_SGX_KVM ++ if (!cpu_has_vmx_encls_vmexit()) ++ enable_sgx = false; ++#endif ++ + /* + * set_apic_access_page_addr() is used to reload apic access + * page upon invalidation. No need to do anything if not +@@ -7809,10 +7934,10 @@ static __init int hardware_setup(void) ple_window_shrink = 0; } @@ -54767,7 +66035,7 @@ index 7d595effb66f0..ba457167ae8a2 100644 if (cpu_has_vmx_tsc_scaling()) { kvm_has_tsc_control = true; -@@ -7879,8 +8000,6 @@ static __init int hardware_setup(void) +@@ -7879,8 +8004,6 @@ static __init int hardware_setup(void) vmx_x86_ops.request_immediate_exit = __kvm_request_immediate_exit; } @@ -54776,7 +66044,7 @@ index 7d595effb66f0..ba457167ae8a2 100644 kvm_mce_cap_supported |= MCG_LMCE_P; if (pt_mode != PT_MODE_SYSTEM && pt_mode != PT_MODE_HOST_GUEST) -@@ -7904,6 +8023,9 @@ static __init int hardware_setup(void) +@@ -7904,6 +8027,9 @@ static __init int hardware_setup(void) r = alloc_kvm_area(); if (r) nested_vmx_hardware_unsetup(); @@ -54786,7 +66054,7 @@ index 7d595effb66f0..ba457167ae8a2 100644 return r; } -@@ -7912,6 +8034,7 @@ static struct kvm_x86_init_ops vmx_init_ops __initdata = { +@@ -7912,6 +8038,7 @@ static struct kvm_x86_init_ops vmx_init_ops __initdata = { .disabled_by_bios = vmx_disabled_by_bios, .check_processor_compatibility = vmx_check_processor_compat, .hardware_setup = hardware_setup, @@ -54794,7 +66062,7 @@ index 7d595effb66f0..ba457167ae8a2 100644 .runtime_ops = &vmx_x86_ops, }; -@@ -8020,6 +8143,8 @@ static int __init vmx_init(void) +@@ -8020,6 +8147,8 @@ static int __init vmx_init(void) return r; } @@ -54934,10 +66202,21 @@ index 592217fd7d920..20f1213a93685 100644 { return vmcs_read16(GUEST_INTR_STATUS) & 0xff; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index bfe0de3008a60..8648799d48f8b 100644 +index bfe0de3008a60..0622256cd768f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c -@@ -277,6 +277,8 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { +@@ -184,6 +184,10 @@ module_param(force_emulation_prefix, bool, S_IRUGO); + int __read_mostly pi_inject_timer = -1; + module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR); + ++/* Enable/disable SMT_RSB bug mitigation */ ++bool __read_mostly mitigate_smt_rsb; ++module_param(mitigate_smt_rsb, bool, 0444); ++ + /* + * Restoring the host value for MSRs that are only consumed when running in + * usermode, e.g. SYSCALL MSRs and TSC_AUX, can be deferred until the CPU +@@ -277,6 +281,8 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, nested_run), STATS_DESC_COUNTER(VCPU, directed_yield_attempted), STATS_DESC_COUNTER(VCPU, directed_yield_successful), @@ -54946,7 +66225,45 @@ index bfe0de3008a60..8648799d48f8b 100644 STATS_DESC_ICOUNTER(VCPU, guest_mode) }; -@@ -848,6 +850,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3) +@@ -523,6 +529,7 @@ static int exception_class(int vector) + #define EXCPT_TRAP 1 + #define EXCPT_ABORT 2 + #define EXCPT_INTERRUPT 3 ++#define EXCPT_DB 4 + + static int exception_type(int vector) + { +@@ -533,8 +540,14 @@ static int exception_type(int vector) + + mask = 1 << vector; + +- /* #DB is trap, as instruction watchpoints are handled elsewhere */ +- if (mask & ((1 << DB_VECTOR) | (1 << BP_VECTOR) | (1 << OF_VECTOR))) ++ /* ++ * #DBs can be trap-like or fault-like, the caller must check other CPU ++ * state, e.g. DR6, to determine whether a #DB is a trap or fault. ++ */ ++ if (mask & (1 << DB_VECTOR)) ++ return EXCPT_DB; ++ ++ if (mask & ((1 << BP_VECTOR) | (1 << OF_VECTOR))) + return EXCPT_TRAP; + + if (mask & ((1 << DF_VECTOR) | (1 << MC_VECTOR))) +@@ -599,6 +612,12 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu) + } + EXPORT_SYMBOL_GPL(kvm_deliver_exception_payload); + ++/* Forcibly leave the nested mode in cases like a vCPU reset */ ++static void kvm_leave_nested(struct kvm_vcpu *vcpu) ++{ ++ kvm_x86_ops.nested_ops->leave_nested(vcpu); ++} ++ + static void kvm_multiple_exception(struct kvm_vcpu *vcpu, + unsigned nr, bool has_error, u32 error_code, + bool has_payload, unsigned long payload, bool reinject) +@@ -848,6 +867,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3) memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs)); kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR); @@ -54954,7 +66271,7 @@ index bfe0de3008a60..8648799d48f8b 100644 vcpu->arch.pdptrs_from_userspace = false; out: -@@ -1018,6 +1021,7 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) +@@ -1018,6 +1038,7 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu) { @@ -54962,7 +66279,7 @@ index bfe0de3008a60..8648799d48f8b 100644 if (static_call(kvm_x86_get_cpl)(vcpu) != 0 || __kvm_set_xcr(vcpu, kvm_rcx_read(vcpu), kvm_read_edx_eax(vcpu))) { kvm_inject_gp(vcpu, 0); -@@ -1028,7 +1032,7 @@ int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu) +@@ -1028,7 +1049,7 @@ int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_emulate_xsetbv); @@ -54971,7 +66288,7 @@ index bfe0de3008a60..8648799d48f8b 100644 { if (cr4 & cr4_reserved_bits) return false; -@@ -1036,9 +1040,15 @@ bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +@@ -1036,9 +1057,15 @@ bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) if (cr4 & vcpu->arch.cr4_guest_rsvd_bits) return false; @@ -54989,7 +66306,7 @@ index bfe0de3008a60..8648799d48f8b 100644 void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4) { -@@ -1091,6 +1101,18 @@ static void kvm_invalidate_pcid(struct kvm_vcpu *vcpu, unsigned long pcid) +@@ -1091,6 +1118,18 @@ static void kvm_invalidate_pcid(struct kvm_vcpu *vcpu, unsigned long pcid) unsigned long roots_to_free = 0; int i; @@ -55008,7 +66325,7 @@ index bfe0de3008a60..8648799d48f8b 100644 /* * If neither the current CR3 nor any of the prev_roots use the given * PCID, then nothing needs to be done here because a resync will -@@ -1311,7 +1333,7 @@ static const u32 msrs_to_save_all[] = { +@@ -1311,27 +1350,17 @@ static const u32 msrs_to_save_all[] = { MSR_IA32_UMWAIT_CONTROL, MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1, @@ -55017,7 +66334,36 @@ index bfe0de3008a60..8648799d48f8b 100644 MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS, MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL, MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1, -@@ -1444,12 +1466,32 @@ static const u32 msr_based_features_all[] = { + MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3, + MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5, + MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7, +- MSR_ARCH_PERFMON_PERFCTR0 + 8, MSR_ARCH_PERFMON_PERFCTR0 + 9, +- MSR_ARCH_PERFMON_PERFCTR0 + 10, MSR_ARCH_PERFMON_PERFCTR0 + 11, +- MSR_ARCH_PERFMON_PERFCTR0 + 12, MSR_ARCH_PERFMON_PERFCTR0 + 13, +- MSR_ARCH_PERFMON_PERFCTR0 + 14, MSR_ARCH_PERFMON_PERFCTR0 + 15, +- MSR_ARCH_PERFMON_PERFCTR0 + 16, MSR_ARCH_PERFMON_PERFCTR0 + 17, + MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1, + MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3, + MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5, + MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7, +- MSR_ARCH_PERFMON_EVENTSEL0 + 8, MSR_ARCH_PERFMON_EVENTSEL0 + 9, +- MSR_ARCH_PERFMON_EVENTSEL0 + 10, MSR_ARCH_PERFMON_EVENTSEL0 + 11, +- MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13, +- MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15, +- MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17, + + MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3, + MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3, +@@ -1435,7 +1464,7 @@ static const u32 msr_based_features_all[] = { + MSR_IA32_VMX_EPT_VPID_CAP, + MSR_IA32_VMX_VMFUNC, + +- MSR_F10H_DECFG, ++ MSR_AMD64_DE_CFG, + MSR_IA32_UCODE_REV, + MSR_IA32_ARCH_CAPABILITIES, + MSR_IA32_PERF_CAPABILITIES, +@@ -1444,12 +1473,32 @@ static const u32 msr_based_features_all[] = { static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)]; static unsigned int num_msr_based_features; @@ -55051,7 +66397,7 @@ index bfe0de3008a60..8648799d48f8b 100644 /* * If nx_huge_pages is enabled, KVM's shadow paging will ensure that -@@ -1592,8 +1634,7 @@ static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info) +@@ -1592,8 +1641,7 @@ static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return r; } @@ -55061,7 +66407,7 @@ index bfe0de3008a60..8648799d48f8b 100644 kvm_mmu_reset_context(vcpu); return 0; -@@ -3079,17 +3120,20 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info) +@@ -3079,17 +3127,20 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* only 0 or all 1s can be written to IA32_MCi_CTL * some Linux kernels though clear bit 10 in bank 4 to * workaround a BIOS/GART TBL issue on AMD K8s, ignore @@ -55086,7 +66432,7 @@ index bfe0de3008a60..8648799d48f8b 100644 } vcpu->arch.mce_banks[offset] = data; -@@ -3193,10 +3237,37 @@ static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu) +@@ -3193,10 +3244,37 @@ static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu) static_call(kvm_x86_tlb_flush_guest)(vcpu); } @@ -55126,7 +66472,7 @@ index bfe0de3008a60..8648799d48f8b 100644 if (kvm_xen_msr_enabled(vcpu->kvm)) { kvm_xen_runstate_set_running(vcpu); -@@ -3206,47 +3277,85 @@ static void record_steal_time(struct kvm_vcpu *vcpu) +@@ -3206,47 +3284,85 @@ static void record_steal_time(struct kvm_vcpu *vcpu) if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return; @@ -55139,7 +66485,7 @@ index bfe0de3008a60..8648799d48f8b 100644 - st = map.hva + - offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS); + slots = kvm_memslots(vcpu->kvm); -+ + + if (unlikely(slots->generation != ghc->generation || + gpa != ghc->gpa || + kvm_is_error_hva(ghc->hva) || !ghc->memslot)) { @@ -55150,7 +66496,7 @@ index bfe0de3008a60..8648799d48f8b 100644 + kvm_is_error_hva(ghc->hva) || !ghc->memslot) + return; + } - ++ + st = (struct kvm_steal_time __user *)ghc->hva; /* * Doing a TLB flush here, on the guest's behalf, can avoid @@ -55216,11 +66562,11 @@ index bfe0de3008a60..8648799d48f8b 100644 + unsafe_put_user(steal, &st->steal, out); - smp_wmb(); +- +- st->version += 1; + version += 1; + unsafe_put_user(version, &st->version, out); -- st->version += 1; -- - kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false); + out: + user_access_end(); @@ -55229,7 +66575,7 @@ index bfe0de3008a60..8648799d48f8b 100644 } int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) -@@ -3282,7 +3391,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) +@@ -3282,7 +3398,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!msr_info->host_initiated) return 1; @@ -55238,7 +66584,7 @@ index bfe0de3008a60..8648799d48f8b 100644 return 1; if (data & ~msr_ent.data) return 1; -@@ -3376,6 +3485,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) +@@ -3376,6 +3492,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (data & ~supported_xss) return 1; vcpu->arch.ia32_xss = data; @@ -55246,7 +66592,27 @@ index bfe0de3008a60..8648799d48f8b 100644 break; case MSR_SMI_COUNT: if (!msr_info->host_initiated) -@@ -4285,44 +4395,70 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +@@ -4051,10 +4168,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) + r = KVM_CLOCK_TSC_STABLE; + break; + case KVM_CAP_X86_DISABLE_EXITS: +- r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE | +- KVM_X86_DISABLE_EXITS_CSTATE; +- if(kvm_can_mwait_in_guest()) +- r |= KVM_X86_DISABLE_EXITS_MWAIT; ++ r = KVM_X86_DISABLE_EXITS_PAUSE; ++ ++ if (!mitigate_smt_rsb) { ++ r |= KVM_X86_DISABLE_EXITS_HLT | ++ KVM_X86_DISABLE_EXITS_CSTATE; ++ ++ if (kvm_can_mwait_in_guest()) ++ r |= KVM_X86_DISABLE_EXITS_MWAIT; ++ } + break; + case KVM_CAP_X86_SMM: + /* SMBASE is usually relocated above 1M on modern chipsets, +@@ -4285,44 +4407,70 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) { @@ -55257,7 +66623,7 @@ index bfe0de3008a60..8648799d48f8b 100644 + struct kvm_memslots *slots; + static const u8 preempted = KVM_VCPU_PREEMPTED; + gpa_t gpa = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS; -+ + + /* + * The vCPU can be marked preempted if and only if the VM-Exit was on + * an instruction boundary and will not trigger guest emulation of any @@ -55269,7 +66635,7 @@ index bfe0de3008a60..8648799d48f8b 100644 + vcpu->stat.preemption_other++; + return; + } - ++ + vcpu->stat.preemption_reported++; if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return; @@ -55292,14 +66658,14 @@ index bfe0de3008a60..8648799d48f8b 100644 + gpa != ghc->gpa || + kvm_is_error_hva(ghc->hva) || !ghc->memslot)) + return; -+ + +- kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true); + st = (struct kvm_steal_time __user *)ghc->hva; + BUILD_BUG_ON(sizeof(st->preempted) != sizeof(preempted)); + + if (!copy_to_user_nofault(&st->preempted, &preempted, sizeof(preempted))) + vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED; - -- kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true); ++ + mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa)); } @@ -55337,7 +66703,7 @@ index bfe0de3008a60..8648799d48f8b 100644 static_call(kvm_x86_vcpu_put)(vcpu); vcpu->arch.last_host_tsc = rdtsc(); -@@ -4331,8 +4467,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) +@@ -4331,8 +4479,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) { @@ -55347,19 +66713,218 @@ index bfe0de3008a60..8648799d48f8b 100644 return kvm_apic_get_state(vcpu, s); } -@@ -4642,8 +4777,10 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, +@@ -4642,8 +4789,10 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.apic->sipi_vector = events->sipi_vector; if (events->flags & KVM_VCPUEVENT_VALID_SMM) { - if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) + if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) { -+ kvm_x86_ops.nested_ops->leave_nested(vcpu); ++ kvm_leave_nested(vcpu); kvm_smm_changed(vcpu, events->smi.smm); + } vcpu->arch.smi_pending = events->smi.pending; -@@ -6803,15 +6940,8 @@ static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt, +@@ -4672,12 +4821,11 @@ static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, + { + unsigned long val; + ++ memset(dbgregs, 0, sizeof(*dbgregs)); + memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); + kvm_get_dr(vcpu, 6, &val); + dbgregs->dr6 = val; + dbgregs->dr7 = vcpu->arch.dr7; +- dbgregs->flags = 0; +- memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved)); + } + + static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, +@@ -5606,15 +5754,26 @@ split_irqchip_unlock: + if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS) + break; + +- if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) && +- kvm_can_mwait_in_guest()) +- kvm->arch.mwait_in_guest = true; +- if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT) +- kvm->arch.hlt_in_guest = true; + if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE) + kvm->arch.pause_in_guest = true; +- if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE) +- kvm->arch.cstate_in_guest = true; ++ ++#define SMT_RSB_MSG "This processor is affected by the Cross-Thread Return Predictions vulnerability. " \ ++ "KVM_CAP_X86_DISABLE_EXITS should only be used with SMT disabled or trusted guests." ++ ++ if (!mitigate_smt_rsb) { ++ if (boot_cpu_has_bug(X86_BUG_SMT_RSB) && cpu_smt_possible() && ++ (cap->args[0] & ~KVM_X86_DISABLE_EXITS_PAUSE)) ++ pr_warn_once(SMT_RSB_MSG); ++ ++ if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) && ++ kvm_can_mwait_in_guest()) ++ kvm->arch.mwait_in_guest = true; ++ if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT) ++ kvm->arch.hlt_in_guest = true; ++ if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE) ++ kvm->arch.cstate_in_guest = true; ++ } ++ + r = 0; + break; + case KVM_CAP_MSR_PLATFORM_INFO: +@@ -5626,6 +5785,11 @@ split_irqchip_unlock: + r = 0; + break; + case KVM_CAP_X86_USER_SPACE_MSR: ++ r = -EINVAL; ++ if (cap->args[0] & ~(KVM_MSR_EXIT_REASON_INVAL | ++ KVM_MSR_EXIT_REASON_UNKNOWN | ++ KVM_MSR_EXIT_REASON_FILTER)) ++ break; + kvm->arch.user_space_msr_mask = cap->args[0]; + r = 0; + break; +@@ -5746,23 +5910,22 @@ static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter, + return 0; + } + +-static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp) ++static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, ++ struct kvm_msr_filter *filter) + { +- struct kvm_msr_filter __user *user_msr_filter = argp; + struct kvm_x86_msr_filter *new_filter, *old_filter; +- struct kvm_msr_filter filter; + bool default_allow; + bool empty = true; + int r = 0; + u32 i; + +- if (copy_from_user(&filter, user_msr_filter, sizeof(filter))) +- return -EFAULT; ++ if (filter->flags & ~KVM_MSR_FILTER_DEFAULT_DENY) ++ return -EINVAL; + +- for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) +- empty &= !filter.ranges[i].nmsrs; ++ for (i = 0; i < ARRAY_SIZE(filter->ranges); i++) ++ empty &= !filter->ranges[i].nmsrs; + +- default_allow = !(filter.flags & KVM_MSR_FILTER_DEFAULT_DENY); ++ default_allow = !(filter->flags & KVM_MSR_FILTER_DEFAULT_DENY); + if (empty && !default_allow) + return -EINVAL; + +@@ -5770,8 +5933,8 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp) + if (!new_filter) + return -ENOMEM; + +- for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) { +- r = kvm_add_msr_filter(new_filter, &filter.ranges[i]); ++ for (i = 0; i < ARRAY_SIZE(filter->ranges); i++) { ++ r = kvm_add_msr_filter(new_filter, &filter->ranges[i]); + if (r) { + kvm_free_msr_filter(new_filter); + return r; +@@ -5794,6 +5957,62 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp) + return 0; + } + ++#ifdef CONFIG_KVM_COMPAT ++/* for KVM_X86_SET_MSR_FILTER */ ++struct kvm_msr_filter_range_compat { ++ __u32 flags; ++ __u32 nmsrs; ++ __u32 base; ++ __u32 bitmap; ++}; ++ ++struct kvm_msr_filter_compat { ++ __u32 flags; ++ struct kvm_msr_filter_range_compat ranges[KVM_MSR_FILTER_MAX_RANGES]; ++}; ++ ++#define KVM_X86_SET_MSR_FILTER_COMPAT _IOW(KVMIO, 0xc6, struct kvm_msr_filter_compat) ++ ++long kvm_arch_vm_compat_ioctl(struct file *filp, unsigned int ioctl, ++ unsigned long arg) ++{ ++ void __user *argp = (void __user *)arg; ++ struct kvm *kvm = filp->private_data; ++ long r = -ENOTTY; ++ ++ switch (ioctl) { ++ case KVM_X86_SET_MSR_FILTER_COMPAT: { ++ struct kvm_msr_filter __user *user_msr_filter = argp; ++ struct kvm_msr_filter_compat filter_compat; ++ struct kvm_msr_filter filter; ++ int i; ++ ++ if (copy_from_user(&filter_compat, user_msr_filter, ++ sizeof(filter_compat))) ++ return -EFAULT; ++ ++ filter.flags = filter_compat.flags; ++ for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) { ++ struct kvm_msr_filter_range_compat *cr; ++ ++ cr = &filter_compat.ranges[i]; ++ filter.ranges[i] = (struct kvm_msr_filter_range) { ++ .flags = cr->flags, ++ .nmsrs = cr->nmsrs, ++ .base = cr->base, ++ .bitmap = (__u8 *)(ulong)cr->bitmap, ++ }; ++ } ++ ++ r = kvm_vm_ioctl_set_msr_filter(kvm, &filter); ++ break; ++ } ++ } ++ ++ return r; ++} ++#endif ++ + #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER + static int kvm_arch_suspend_notifier(struct kvm *kvm) + { +@@ -6168,9 +6387,16 @@ set_pit2_out: + case KVM_SET_PMU_EVENT_FILTER: + r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp); + break; +- case KVM_X86_SET_MSR_FILTER: +- r = kvm_vm_ioctl_set_msr_filter(kvm, argp); ++ case KVM_X86_SET_MSR_FILTER: { ++ struct kvm_msr_filter __user *user_msr_filter = argp; ++ struct kvm_msr_filter filter; ++ ++ if (copy_from_user(&filter, user_msr_filter, sizeof(filter))) ++ return -EFAULT; ++ ++ r = kvm_vm_ioctl_set_msr_filter(kvm, &filter); + break; ++ } + default: + r = -ENOTTY; + } +@@ -6238,12 +6464,12 @@ static void kvm_init_msr_list(void) + intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2) + continue; + break; +- case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17: ++ case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 7: + if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >= + min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp)) + continue; + break; +- case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17: ++ case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 7: + if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >= + min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp)) + continue; +@@ -6803,15 +7029,8 @@ static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt, exception, &write_emultor); } @@ -55377,7 +66942,7 @@ index bfe0de3008a60..8648799d48f8b 100644 static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, unsigned long addr, -@@ -6820,12 +6950,11 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, +@@ -6820,12 +7039,11 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, unsigned int bytes, struct x86_exception *exception) { @@ -55392,7 +66957,7 @@ index bfe0de3008a60..8648799d48f8b 100644 /* guests cmpxchg8b have to be emulated atomically */ if (bytes > 8 || (bytes & (bytes - 1))) -@@ -6849,31 +6978,32 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, +@@ -6849,31 +7067,32 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, if (((gpa + bytes - 1) & page_line_mask) != (gpa & page_line_mask)) goto emul_write; @@ -55434,7 +66999,7 @@ index bfe0de3008a60..8648799d48f8b 100644 return X86EMUL_CMPXCHG_FAILED; kvm_page_track_write(vcpu, gpa, new, bytes); -@@ -6948,7 +7078,13 @@ static int emulator_pio_in(struct kvm_vcpu *vcpu, int size, +@@ -6948,7 +7167,13 @@ static int emulator_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port, void *val, unsigned int count) { if (vcpu->arch.pio.count) { @@ -55449,7 +67014,7 @@ index bfe0de3008a60..8648799d48f8b 100644 } else { int r = __emulator_pio_in(vcpu, size, port, count); if (!r) -@@ -6957,7 +7093,6 @@ static int emulator_pio_in(struct kvm_vcpu *vcpu, int size, +@@ -6957,7 +7182,6 @@ static int emulator_pio_in(struct kvm_vcpu *vcpu, int size, /* Results already available, fall through. */ } @@ -55457,7 +67022,7 @@ index bfe0de3008a60..8648799d48f8b 100644 complete_emulator_pio_in(vcpu, val); return 1; } -@@ -7300,6 +7435,11 @@ static bool emulator_guest_has_fxsr(struct x86_emulate_ctxt *ctxt) +@@ -7300,6 +7524,11 @@ static bool emulator_guest_has_fxsr(struct x86_emulate_ctxt *ctxt) return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_FXSR); } @@ -55469,7 +67034,7 @@ index bfe0de3008a60..8648799d48f8b 100644 static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg) { return kvm_register_read_raw(emul_to_vcpu(ctxt), reg); -@@ -7382,6 +7522,7 @@ static const struct x86_emulate_ops emulate_ops = { +@@ -7382,6 +7611,7 @@ static const struct x86_emulate_ops emulate_ops = { .guest_has_long_mode = emulator_guest_has_long_mode, .guest_has_movbe = emulator_guest_has_movbe, .guest_has_fxsr = emulator_guest_has_fxsr, @@ -55477,7 +67042,7 @@ index bfe0de3008a60..8648799d48f8b 100644 .set_nmi_mask = emulator_set_nmi_mask, .get_hflags = emulator_get_hflags, .exiting_smm = emulator_exiting_smm, -@@ -7747,7 +7888,7 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu) +@@ -7747,7 +7977,7 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction); @@ -55486,7 +67051,7 @@ index bfe0de3008a60..8648799d48f8b 100644 { if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) && (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) { -@@ -7816,25 +7957,23 @@ static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt) +@@ -7816,25 +8046,23 @@ static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt) } /* @@ -55521,7 +67086,7 @@ index bfe0de3008a60..8648799d48f8b 100644 r = x86_decode_insn(ctxt, insn, insn_len, emulation_type); trace_kvm_emulate_insn_start(vcpu); -@@ -7867,6 +8006,15 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, +@@ -7867,6 +8095,15 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, if (!(emulation_type & EMULTYPE_NO_DECODE)) { kvm_clear_exception_queue(vcpu); @@ -55537,7 +67102,18 @@ index bfe0de3008a60..8648799d48f8b 100644 r = x86_decode_emulated_instruction(vcpu, emulation_type, insn, insn_len); if (r != EMULATION_OK) { -@@ -7905,7 +8053,12 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, +@@ -7879,7 +8116,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + write_fault_to_spt, + emulation_type)) + return 1; +- if (ctxt->have_exception) { ++ ++ if (ctxt->have_exception && ++ !(emulation_type & EMULTYPE_SKIP)) { + /* + * #UD should result in just EMULATION_FAILED, and trap-like + * exception should not be encountered during decode. +@@ -7905,7 +8144,12 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, * updating interruptibility state and injecting single-step #DBs. */ if (emulation_type & EMULTYPE_SKIP) { @@ -55551,7 +67127,7 @@ index bfe0de3008a60..8648799d48f8b 100644 if (ctxt->eflags & X86_EFLAGS_RF) kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF); return 1; -@@ -7969,6 +8122,9 @@ restart: +@@ -7969,6 +8213,9 @@ restart: writeback = false; r = 0; vcpu->arch.complete_userspace_io = complete_emulated_mmio; @@ -55561,7 +67137,20 @@ index bfe0de3008a60..8648799d48f8b 100644 } else if (r == EMULATION_RESTART) goto restart; else -@@ -8340,7 +8496,7 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = { +@@ -7978,6 +8225,12 @@ restart: + unsigned long rflags = static_call(kvm_x86_get_rflags)(vcpu); + toggle_interruptibility(vcpu, ctxt->interruptibility); + vcpu->arch.emulate_regs_need_sync_to_vcpu = false; ++ ++ /* ++ * Note, EXCPT_DB is assumed to be fault-like as the emulator ++ * only supports code breakpoints and general detect #DB, both ++ * of which are fault-like. ++ */ + if (!ctxt->have_exception || + exception_type(ctxt->exception.vector) == EXCPT_TRAP) { + kvm_rip_write(vcpu, ctxt->eip); +@@ -8340,7 +8593,7 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = { .is_in_guest = kvm_is_in_guest, .is_user_mode = kvm_is_user_mode, .get_guest_ip = kvm_get_guest_ip, @@ -55570,7 +67159,7 @@ index bfe0de3008a60..8648799d48f8b 100644 }; #ifdef CONFIG_X86_64 -@@ -8455,14 +8611,12 @@ int kvm_arch_init(void *opaque) +@@ -8455,14 +8708,12 @@ int kvm_arch_init(void *opaque) } kvm_nr_uret_msrs = 0; @@ -55586,7 +67175,7 @@ index bfe0de3008a60..8648799d48f8b 100644 if (boot_cpu_has(X86_FEATURE_XSAVE)) { host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0; -@@ -8496,7 +8650,6 @@ void kvm_arch_exit(void) +@@ -8496,7 +8747,6 @@ void kvm_arch_exit(void) clear_hv_tscchange_cb(); #endif kvm_lapic_exit(); @@ -55594,7 +67183,7 @@ index bfe0de3008a60..8648799d48f8b 100644 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block, -@@ -8508,7 +8661,7 @@ void kvm_arch_exit(void) +@@ -8508,7 +8758,7 @@ void kvm_arch_exit(void) cancel_work_sync(&pvclock_gtod_work); #endif kvm_x86_ops.hardware_enable = NULL; @@ -55603,7 +67192,7 @@ index bfe0de3008a60..8648799d48f8b 100644 free_percpu(user_return_msrs); kmem_cache_destroy(x86_emulator_cache); kmem_cache_destroy(x86_fpu_cache); -@@ -8567,6 +8720,13 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, +@@ -8567,6 +8817,13 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, if (clock_type != KVM_CLOCK_PAIRING_WALLCLOCK) return -KVM_EOPNOTSUPP; @@ -55617,7 +67206,7 @@ index bfe0de3008a60..8648799d48f8b 100644 if (!kvm_get_walltime_and_clockread(&ts, &cycle)) return -KVM_EOPNOTSUPP; -@@ -8592,15 +8752,17 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, +@@ -8592,15 +8849,17 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, */ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) { @@ -55643,7 +67232,7 @@ index bfe0de3008a60..8648799d48f8b 100644 kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL); } -@@ -8686,7 +8848,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) +@@ -8686,7 +8945,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) trace_kvm_hypercall(nr, a0, a1, a2, a3); @@ -55652,7 +67241,7 @@ index bfe0de3008a60..8648799d48f8b 100644 if (!op_64_bit) { nr &= 0xFFFFFFFF; a0 &= 0xFFFFFFFF; -@@ -8790,14 +8952,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) +@@ -8790,14 +9049,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) { struct kvm_run *kvm_run = vcpu->run; @@ -55668,7 +67257,54 @@ index bfe0de3008a60..8648799d48f8b 100644 kvm_run->cr8 = kvm_get_cr8(vcpu); kvm_run->apic_base = kvm_get_apic_base(vcpu); -@@ -9359,8 +9514,7 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) +@@ -8855,6 +9107,11 @@ int kvm_check_nested_events(struct kvm_vcpu *vcpu) + + static void kvm_inject_exception(struct kvm_vcpu *vcpu) + { ++ trace_kvm_inj_exception(vcpu->arch.exception.nr, ++ vcpu->arch.exception.has_error_code, ++ vcpu->arch.exception.error_code, ++ vcpu->arch.exception.injected); ++ + if (vcpu->arch.exception.error_code && !is_protmode(vcpu)) + vcpu->arch.exception.error_code = false; + static_call(kvm_x86_queue_exception)(vcpu); +@@ -8912,13 +9169,16 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit) + + /* try to inject new event if pending */ + if (vcpu->arch.exception.pending) { +- trace_kvm_inj_exception(vcpu->arch.exception.nr, +- vcpu->arch.exception.has_error_code, +- vcpu->arch.exception.error_code); +- +- vcpu->arch.exception.pending = false; +- vcpu->arch.exception.injected = true; +- ++ /* ++ * Fault-class exceptions, except #DBs, set RF=1 in the RFLAGS ++ * value pushed on the stack. Trap-like exception and all #DBs ++ * leave RF as-is (KVM follows Intel's behavior in this regard; ++ * AMD states that code breakpoint #DBs excplitly clear RF=0). ++ * ++ * Note, most versions of Intel's SDM and AMD's APM incorrectly ++ * describe the behavior of General Detect #DBs, which are ++ * fault-like. They do _not_ set RF, a la code breakpoints. ++ */ + if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT) + __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) | + X86_EFLAGS_RF); +@@ -8932,6 +9192,10 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit) + } + + kvm_inject_exception(vcpu); ++ ++ vcpu->arch.exception.pending = false; ++ vcpu->arch.exception.injected = true; ++ + can_inject = false; + } + +@@ -9359,8 +9623,7 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) if (irqchip_split(vcpu->kvm)) kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors); else { @@ -55678,7 +67314,7 @@ index bfe0de3008a60..8648799d48f8b 100644 if (ioapic_in_kernel(vcpu->kvm)) kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); } -@@ -9378,12 +9532,16 @@ static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu) +@@ -9378,12 +9641,16 @@ static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu) if (!kvm_apic_hw_enabled(vcpu->arch.apic)) return; @@ -55697,7 +67333,7 @@ index bfe0de3008a60..8648799d48f8b 100644 } void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, -@@ -9400,6 +9558,11 @@ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, +@@ -9400,6 +9667,11 @@ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); } @@ -55709,7 +67345,7 @@ index bfe0de3008a60..8648799d48f8b 100644 void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) { if (!lapic_in_kernel(vcpu)) -@@ -9475,10 +9638,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) +@@ -9475,10 +9747,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) /* Flushing all ASIDs flushes the current ASID... */ kvm_clear_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); } @@ -55721,7 +67357,7 @@ index bfe0de3008a60..8648799d48f8b 100644 if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) { vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS; -@@ -9629,10 +9789,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) +@@ -9629,10 +9898,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) /* * This handles the case where a posted interrupt was @@ -55737,7 +67373,7 @@ index bfe0de3008a60..8648799d48f8b 100644 if (kvm_vcpu_exit_request(vcpu)) { vcpu->mode = OUTSIDE_GUEST_MODE; -@@ -9668,8 +9830,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) +@@ -9668,8 +9939,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST)) break; @@ -55748,7 +67384,7 @@ index bfe0de3008a60..8648799d48f8b 100644 if (unlikely(kvm_vcpu_exit_request(vcpu))) { exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED; -@@ -9823,6 +9985,13 @@ static int vcpu_run(struct kvm_vcpu *vcpu) +@@ -9823,6 +10094,13 @@ static int vcpu_run(struct kvm_vcpu *vcpu) vcpu->arch.l1tf_flush_l1d = true; for (;;) { @@ -55762,7 +67398,7 @@ index bfe0de3008a60..8648799d48f8b 100644 if (kvm_vcpu_running(vcpu)) { r = vcpu_enter_guest(vcpu); } else { -@@ -10707,8 +10876,21 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) +@@ -10707,8 +10985,21 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) r = kvm_create_lapic(vcpu, lapic_timer_advance_ns); if (r < 0) goto fail_mmu_destroy; @@ -55785,7 +67421,7 @@ index bfe0de3008a60..8648799d48f8b 100644 } else static_branch_inc(&kvm_has_noapic_vcpu); -@@ -10817,11 +10999,8 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) +@@ -10817,11 +11108,8 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { @@ -55797,7 +67433,26 @@ index bfe0de3008a60..8648799d48f8b 100644 kvmclock_reset(vcpu); static_call(kvm_x86_vcpu_free)(vcpu); -@@ -10908,7 +11087,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) +@@ -10850,8 +11138,18 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) + unsigned long new_cr0; + u32 eax, dummy; + ++ /* ++ * SVM doesn't unconditionally VM-Exit on INIT and SHUTDOWN, thus it's ++ * possible to INIT the vCPU while L2 is active. Force the vCPU back ++ * into L1 as EFER.SVME is cleared on INIT (along with all other EFER ++ * bits), i.e. virtualization is disabled. ++ */ ++ if (is_guest_mode(vcpu)) ++ kvm_leave_nested(vcpu); ++ + kvm_lapic_reset(vcpu, init_event); + ++ WARN_ON_ONCE(is_guest_mode(vcpu) || is_smm(vcpu)); + vcpu->arch.hflags = 0; + + vcpu->arch.smi_pending = 0; +@@ -10908,7 +11206,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vcpu->arch.msr_misc_features_enables = 0; @@ -55807,7 +67462,7 @@ index bfe0de3008a60..8648799d48f8b 100644 } memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs)); -@@ -10927,8 +11107,6 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) +@@ -10927,8 +11226,6 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) eax = 0x600; kvm_rdx_write(vcpu, eax); @@ -55816,7 +67471,7 @@ index bfe0de3008a60..8648799d48f8b 100644 static_call(kvm_x86_vcpu_reset)(vcpu, init_event); kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); -@@ -11104,6 +11282,10 @@ int kvm_arch_hardware_setup(void *opaque) +@@ -11104,6 +11401,10 @@ int kvm_arch_hardware_setup(void *opaque) memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops)); kvm_ops_static_call_update(); @@ -55827,7 +67482,7 @@ index bfe0de3008a60..8648799d48f8b 100644 if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES)) supported_xss = 0; -@@ -11131,6 +11313,9 @@ int kvm_arch_hardware_setup(void *opaque) +@@ -11131,6 +11432,9 @@ int kvm_arch_hardware_setup(void *opaque) void kvm_arch_hardware_unsetup(void) { @@ -55837,7 +67492,7 @@ index bfe0de3008a60..8648799d48f8b 100644 static_call(kvm_x86_hardware_unsetup)(); } -@@ -11420,7 +11605,7 @@ static int memslot_rmap_alloc(struct kvm_memory_slot *slot, +@@ -11420,7 +11724,7 @@ static int memslot_rmap_alloc(struct kvm_memory_slot *slot, if (slot->arch.rmap[i]) continue; @@ -55846,7 +67501,7 @@ index bfe0de3008a60..8648799d48f8b 100644 if (!slot->arch.rmap[i]) { memslot_rmap_free(slot); return -ENOMEM; -@@ -11501,7 +11686,7 @@ static int kvm_alloc_memslot_metadata(struct kvm *kvm, +@@ -11501,7 +11805,7 @@ static int kvm_alloc_memslot_metadata(struct kvm *kvm, lpages = __kvm_mmu_slot_lpages(slot, npages, level); @@ -55855,7 +67510,7 @@ index bfe0de3008a60..8648799d48f8b 100644 if (!linfo) goto out_free; -@@ -12045,9 +12230,9 @@ void kvm_arch_end_assignment(struct kvm *kvm) +@@ -12045,9 +12349,9 @@ void kvm_arch_end_assignment(struct kvm *kvm) } EXPORT_SYMBOL_GPL(kvm_arch_end_assignment); @@ -55867,7 +67522,7 @@ index bfe0de3008a60..8648799d48f8b 100644 } EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device); -@@ -12509,3 +12694,19 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_enter); +@@ -12509,3 +12813,20 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_enter); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_exit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_enter); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_exit); @@ -55875,6 +67530,7 @@ index bfe0de3008a60..8648799d48f8b 100644 +static int __init kvm_x86_init(void) +{ + kvm_mmu_x86_module_init(); ++ mitigate_smt_rsb &= boot_cpu_has_bug(X86_BUG_SMT_RSB) && cpu_smt_possible(); + return 0; +} +module_init(kvm_x86_init); @@ -56984,14 +68640,16 @@ index c565def611e24..55e371cc69fd5 100644 #define get_next(t, insn) \ ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); }) diff --git a/arch/x86/lib/iomap_copy_64.S b/arch/x86/lib/iomap_copy_64.S -index cb5a1964506b1..a1f9416bf67a5 100644 +index cb5a1964506b1..6ff2f56cb0f71 100644 --- a/arch/x86/lib/iomap_copy_64.S +++ b/arch/x86/lib/iomap_copy_64.S -@@ -11,5 +11,5 @@ +@@ -10,6 +10,6 @@ + */ SYM_FUNC_START(__iowrite32_copy) movl %edx,%ecx - rep movsd +- rep movsd - ret ++ rep movsl + RET SYM_FUNC_END(__iowrite32_copy) diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S @@ -58036,10 +69694,27 @@ index 36098226a9573..200ad5ceeb43f 100644 /* * We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c -index 60ade7dd71bd9..7ce9b8dd87577 100644 +index 60ade7dd71bd9..5dfa40279f0fd 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c -@@ -614,6 +614,7 @@ static bool memremap_is_efi_data(resource_size_t phys_addr, +@@ -216,9 +216,15 @@ __ioremap_caller(resource_size_t phys_addr, unsigned long size, + * Mappings have to be page-aligned + */ + offset = phys_addr & ~PAGE_MASK; +- phys_addr &= PHYSICAL_PAGE_MASK; ++ phys_addr &= PAGE_MASK; + size = PAGE_ALIGN(last_addr+1) - phys_addr; + ++ /* ++ * Mask out any bits not part of the actual physical ++ * address, like memory encryption bits. ++ */ ++ phys_addr &= PHYSICAL_PAGE_MASK; ++ + retval = memtype_reserve(phys_addr, (u64)phys_addr + size, + pcm, &new_pcm); + if (retval) { +@@ -614,6 +620,7 @@ static bool memremap_is_efi_data(resource_size_t phys_addr, static bool memremap_is_setup_data(resource_size_t phys_addr, unsigned long size) { @@ -58047,7 +69722,7 @@ index 60ade7dd71bd9..7ce9b8dd87577 100644 struct setup_data *data; u64 paddr, paddr_next; -@@ -626,6 +627,10 @@ static bool memremap_is_setup_data(resource_size_t phys_addr, +@@ -626,6 +633,10 @@ static bool memremap_is_setup_data(resource_size_t phys_addr, data = memremap(paddr, sizeof(*data), MEMREMAP_WB | MEMREMAP_DEC); @@ -58058,7 +69733,7 @@ index 60ade7dd71bd9..7ce9b8dd87577 100644 paddr_next = data->next; len = data->len; -@@ -635,10 +640,21 @@ static bool memremap_is_setup_data(resource_size_t phys_addr, +@@ -635,10 +646,21 @@ static bool memremap_is_setup_data(resource_size_t phys_addr, return true; } @@ -58084,7 +69759,7 @@ index 60ade7dd71bd9..7ce9b8dd87577 100644 } memunmap(data); -@@ -659,22 +675,51 @@ static bool memremap_is_setup_data(resource_size_t phys_addr, +@@ -659,22 +681,51 @@ static bool memremap_is_setup_data(resource_size_t phys_addr, static bool __init early_memremap_is_setup_data(resource_size_t phys_addr, unsigned long size) { @@ -59083,7 +70758,7 @@ index 75f4faff84682..3a5abffe5660d 100644 .data diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c -index 6665f88020983..732cb075d7072 100644 +index 6665f88020983..f5133d620d4ef 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -25,6 +25,7 @@ @@ -59136,21 +70811,29 @@ index 6665f88020983..732cb075d7072 100644 msr_array[i].info.reg.q = 0; } saved_msrs->num = total_num; -@@ -500,10 +511,24 @@ static int pm_cpu_check(const struct x86_cpu_id *c) +@@ -500,10 +511,32 @@ static int pm_cpu_check(const struct x86_cpu_id *c) return ret; } +static void pm_save_spec_msr(void) +{ -+ u32 spec_msr_id[] = { -+ MSR_IA32_SPEC_CTRL, -+ MSR_IA32_TSX_CTRL, -+ MSR_TSX_FORCE_ABORT, -+ MSR_IA32_MCU_OPT_CTRL, -+ MSR_AMD64_LS_CFG, ++ struct msr_enumeration { ++ u32 msr_no; ++ u32 feature; ++ } msr_enum[] = { ++ { MSR_IA32_SPEC_CTRL, X86_FEATURE_MSR_SPEC_CTRL }, ++ { MSR_IA32_TSX_CTRL, X86_FEATURE_MSR_TSX_CTRL }, ++ { MSR_TSX_FORCE_ABORT, X86_FEATURE_TSX_FORCE_ABORT }, ++ { MSR_IA32_MCU_OPT_CTRL, X86_FEATURE_SRBDS_CTRL }, ++ { MSR_AMD64_LS_CFG, X86_FEATURE_LS_CFG_SSBD }, ++ { MSR_AMD64_DE_CFG, X86_FEATURE_LFENCE_RDTSC }, + }; ++ int i; + -+ msr_build_context(spec_msr_id, ARRAY_SIZE(spec_msr_id)); ++ for (i = 0; i < ARRAY_SIZE(msr_enum); i++) { ++ if (boot_cpu_has(msr_enum[i].feature)) ++ msr_build_context(&msr_enum[i].msr_no, 1); ++ } +} + static int pm_check_save_msr(void) @@ -59561,6 +71244,100 @@ index 8bfc103301077..1f80dd3a2dd4a 100644 if (ret != 0) setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); } +diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c +index c1b2f764b29a2..cdec892b28e2e 100644 +--- a/arch/x86/xen/smp.c ++++ b/arch/x86/xen/smp.c +@@ -32,30 +32,30 @@ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) + + void xen_smp_intr_free(unsigned int cpu) + { ++ kfree(per_cpu(xen_resched_irq, cpu).name); ++ per_cpu(xen_resched_irq, cpu).name = NULL; + if (per_cpu(xen_resched_irq, cpu).irq >= 0) { + unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu).irq, NULL); + per_cpu(xen_resched_irq, cpu).irq = -1; +- kfree(per_cpu(xen_resched_irq, cpu).name); +- per_cpu(xen_resched_irq, cpu).name = NULL; + } ++ kfree(per_cpu(xen_callfunc_irq, cpu).name); ++ per_cpu(xen_callfunc_irq, cpu).name = NULL; + if (per_cpu(xen_callfunc_irq, cpu).irq >= 0) { + unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu).irq, NULL); + per_cpu(xen_callfunc_irq, cpu).irq = -1; +- kfree(per_cpu(xen_callfunc_irq, cpu).name); +- per_cpu(xen_callfunc_irq, cpu).name = NULL; + } ++ kfree(per_cpu(xen_debug_irq, cpu).name); ++ per_cpu(xen_debug_irq, cpu).name = NULL; + if (per_cpu(xen_debug_irq, cpu).irq >= 0) { + unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu).irq, NULL); + per_cpu(xen_debug_irq, cpu).irq = -1; +- kfree(per_cpu(xen_debug_irq, cpu).name); +- per_cpu(xen_debug_irq, cpu).name = NULL; + } ++ kfree(per_cpu(xen_callfuncsingle_irq, cpu).name); ++ per_cpu(xen_callfuncsingle_irq, cpu).name = NULL; + if (per_cpu(xen_callfuncsingle_irq, cpu).irq >= 0) { + unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu).irq, + NULL); + per_cpu(xen_callfuncsingle_irq, cpu).irq = -1; +- kfree(per_cpu(xen_callfuncsingle_irq, cpu).name); +- per_cpu(xen_callfuncsingle_irq, cpu).name = NULL; + } + } + +@@ -65,6 +65,7 @@ int xen_smp_intr_init(unsigned int cpu) + char *resched_name, *callfunc_name, *debug_name; + + resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu); ++ per_cpu(xen_resched_irq, cpu).name = resched_name; + rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, + cpu, + xen_reschedule_interrupt, +@@ -74,9 +75,9 @@ int xen_smp_intr_init(unsigned int cpu) + if (rc < 0) + goto fail; + per_cpu(xen_resched_irq, cpu).irq = rc; +- per_cpu(xen_resched_irq, cpu).name = resched_name; + + callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu); ++ per_cpu(xen_callfunc_irq, cpu).name = callfunc_name; + rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR, + cpu, + xen_call_function_interrupt, +@@ -86,10 +87,10 @@ int xen_smp_intr_init(unsigned int cpu) + if (rc < 0) + goto fail; + per_cpu(xen_callfunc_irq, cpu).irq = rc; +- per_cpu(xen_callfunc_irq, cpu).name = callfunc_name; + + if (!xen_fifo_events) { + debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu); ++ per_cpu(xen_debug_irq, cpu).name = debug_name; + rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, + xen_debug_interrupt, + IRQF_PERCPU | IRQF_NOBALANCING, +@@ -97,10 +98,10 @@ int xen_smp_intr_init(unsigned int cpu) + if (rc < 0) + goto fail; + per_cpu(xen_debug_irq, cpu).irq = rc; +- per_cpu(xen_debug_irq, cpu).name = debug_name; + } + + callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu); ++ per_cpu(xen_callfuncsingle_irq, cpu).name = callfunc_name; + rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR, + cpu, + xen_call_function_single_interrupt, +@@ -110,7 +111,6 @@ int xen_smp_intr_init(unsigned int cpu) + if (rc < 0) + goto fail; + per_cpu(xen_callfuncsingle_irq, cpu).irq = rc; +- per_cpu(xen_callfuncsingle_irq, cpu).name = callfunc_name; + + return 0; + diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c index 6ff3c887e0b99..b70afdff419ca 100644 --- a/arch/x86/xen/smp_hvm.c @@ -59579,18 +71356,61 @@ index 6ff3c887e0b99..b70afdff419ca 100644 * The alternative logic (which patches the unlock/lock) runs before * the smp bootup up code is activated. Hence we need to set this up diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c -index 7ed56c6075b0c..cd5539fc5eb45 100644 +index 7ed56c6075b0c..b47b5111397a7 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c -@@ -129,7 +129,7 @@ int xen_smp_intr_init_pv(unsigned int cpu) +@@ -97,18 +97,18 @@ asmlinkage __visible void cpu_bringup_and_idle(void) + + void xen_smp_intr_free_pv(unsigned int cpu) + { ++ kfree(per_cpu(xen_irq_work, cpu).name); ++ per_cpu(xen_irq_work, cpu).name = NULL; + if (per_cpu(xen_irq_work, cpu).irq >= 0) { + unbind_from_irqhandler(per_cpu(xen_irq_work, cpu).irq, NULL); + per_cpu(xen_irq_work, cpu).irq = -1; +- kfree(per_cpu(xen_irq_work, cpu).name); +- per_cpu(xen_irq_work, cpu).name = NULL; + } + ++ kfree(per_cpu(xen_pmu_irq, cpu).name); ++ per_cpu(xen_pmu_irq, cpu).name = NULL; + if (per_cpu(xen_pmu_irq, cpu).irq >= 0) { + unbind_from_irqhandler(per_cpu(xen_pmu_irq, cpu).irq, NULL); + per_cpu(xen_pmu_irq, cpu).irq = -1; +- kfree(per_cpu(xen_pmu_irq, cpu).name); +- per_cpu(xen_pmu_irq, cpu).name = NULL; + } + } + +@@ -118,6 +118,7 @@ int xen_smp_intr_init_pv(unsigned int cpu) + char *callfunc_name, *pmu_name; + + callfunc_name = kasprintf(GFP_KERNEL, "irqwork%d", cpu); ++ per_cpu(xen_irq_work, cpu).name = callfunc_name; + rc = bind_ipi_to_irqhandler(XEN_IRQ_WORK_VECTOR, + cpu, + xen_irq_work_interrupt, +@@ -127,10 +128,10 @@ int xen_smp_intr_init_pv(unsigned int cpu) + if (rc < 0) + goto fail; per_cpu(xen_irq_work, cpu).irq = rc; - per_cpu(xen_irq_work, cpu).name = callfunc_name; +- per_cpu(xen_irq_work, cpu).name = callfunc_name; - if (is_xen_pmu(cpu)) { + if (is_xen_pmu) { pmu_name = kasprintf(GFP_KERNEL, "pmu%d", cpu); ++ per_cpu(xen_pmu_irq, cpu).name = pmu_name; rc = bind_virq_to_irqhandler(VIRQ_XENPMU, cpu, xen_pmu_irq_handler, + IRQF_PERCPU|IRQF_NOBALANCING, +@@ -138,7 +139,6 @@ int xen_smp_intr_init_pv(unsigned int cpu) + if (rc < 0) + goto fail; + per_cpu(xen_pmu_irq, cpu).irq = rc; +- per_cpu(xen_pmu_irq, cpu).name = pmu_name; + } + + return 0; @@ -148,28 +148,12 @@ int xen_smp_intr_init_pv(unsigned int cpu) return rc; } @@ -59640,6 +71460,44 @@ index 7ed56c6075b0c..cd5539fc5eb45 100644 + x86_init.mpparse.find_smp_config = x86_init_noop; + x86_init.mpparse.get_smp_config = _get_smp_config; } +diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c +index 043c73dfd2c98..5c6fc16e4b925 100644 +--- a/arch/x86/xen/spinlock.c ++++ b/arch/x86/xen/spinlock.c +@@ -75,6 +75,7 @@ void xen_init_lock_cpu(int cpu) + cpu, per_cpu(lock_kicker_irq, cpu)); + + name = kasprintf(GFP_KERNEL, "spinlock%d", cpu); ++ per_cpu(irq_name, cpu) = name; + irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR, + cpu, + dummy_handler, +@@ -85,7 +86,6 @@ void xen_init_lock_cpu(int cpu) + if (irq >= 0) { + disable_irq(irq); /* make sure it's never delivered */ + per_cpu(lock_kicker_irq, cpu) = irq; +- per_cpu(irq_name, cpu) = name; + } + + printk("cpu %d spinlock event irq %d\n", cpu, irq); +@@ -98,6 +98,8 @@ void xen_uninit_lock_cpu(int cpu) + if (!xen_pvspin) + return; + ++ kfree(per_cpu(irq_name, cpu)); ++ per_cpu(irq_name, cpu) = NULL; + /* + * When booting the kernel with 'mitigations=auto,nosmt', the secondary + * CPUs are not activated, and lock_kicker_irq is not initialized. +@@ -108,8 +110,6 @@ void xen_uninit_lock_cpu(int cpu) + + unbind_from_irqhandler(irq, NULL); + per_cpu(lock_kicker_irq, cpu) = -1; +- kfree(per_cpu(irq_name, cpu)); +- per_cpu(irq_name, cpu) = NULL; + } + + PV_CALLEE_SAVE_REGS_THUNK(xen_vcpu_stolen); diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index d9c945ee11008..9ef0a5cca96ee 100644 --- a/arch/x86/xen/time.c @@ -60110,6 +71968,17 @@ index e8ceb15286081..16b8a6273772c 100644 if (!IS_ERR(clk)) { ccount_freq = clk_get_rate(clk); return; +diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c +index 874b6efc6fb31..904086ad56827 100644 +--- a/arch/xtensa/kernel/traps.c ++++ b/arch/xtensa/kernel/traps.c +@@ -552,5 +552,5 @@ void die(const char * str, struct pt_regs * regs, long err) + if (panic_on_oops) + panic("Fatal exception"); + +- do_exit(err); ++ make_task_dead(err); + } diff --git a/arch/xtensa/mm/tlb.c b/arch/xtensa/mm/tlb.c index f436cf2efd8b7..27a477dae2322 100644 --- a/arch/xtensa/mm/tlb.c @@ -60308,6 +72177,19 @@ index 538e6748e85a7..c79c1d09ea863 100644 return 0; } arch_initcall(machine_setup); +diff --git a/block/Makefile b/block/Makefile +index 41aa1ba69c900..74df168729ecb 100644 +--- a/block/Makefile ++++ b/block/Makefile +@@ -3,7 +3,7 @@ + # Makefile for the kernel block layer + # + +-obj-$(CONFIG_BLOCK) := bdev.o fops.o bio.o elevator.o blk-core.o blk-sysfs.o \ ++obj-y := bdev.o fops.o bio.o elevator.o blk-core.o blk-sysfs.o \ + blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ + blk-exec.o blk-merge.o blk-timeout.o \ + blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \ diff --git a/block/bdev.c b/block/bdev.c index 485a258b0ab37..18abafb135e0b 100644 --- a/block/bdev.c @@ -60373,7 +72255,7 @@ index 485a258b0ab37..18abafb135e0b 100644 spin_lock(&blockdev_superblock->s_inode_list_lock); diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c -index 85b8e1c3a762d..e2e765a54fe95 100644 +index 85b8e1c3a762d..53e275e377a73 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -555,6 +555,7 @@ static void bfq_pd_init(struct blkg_policy_data *pd) @@ -60415,7 +72297,7 @@ index 85b8e1c3a762d..e2e765a54fe95 100644 /* * Update chain of bfq_groups as we might be handling a leaf group * which, along with some of its relatives, has not been hooked yet -@@ -621,8 +605,24 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, +@@ -621,8 +605,28 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, bfq_group_set_parent(curr_bfqg, parent); } } @@ -60428,6 +72310,10 @@ index 85b8e1c3a762d..e2e765a54fe95 100644 + struct bfq_group *bfqg; + + while (blkg) { ++ if (!blkg->online) { ++ blkg = blkg->parent; ++ continue; ++ } + bfqg = blkg_to_bfqg(blkg); + if (bfqg->online) { + bio_associate_blkg_from_css(bio, &blkg->blkcg->css); @@ -60441,7 +72327,7 @@ index 85b8e1c3a762d..e2e765a54fe95 100644 } /** -@@ -644,6 +644,12 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, +@@ -644,6 +648,12 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, { struct bfq_entity *entity = &bfqq->entity; @@ -60454,7 +72340,7 @@ index 85b8e1c3a762d..e2e765a54fe95 100644 /* * Get extra reference to prevent bfqq from being freed in * next possible expire or deactivate. -@@ -698,25 +704,15 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, +@@ -698,38 +708,58 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, * Move bic to blkcg, assuming that bfqd->lock is held; which makes * sure that the reference to cgroup is valid across the call (see * comments in bfq_bic_update_cgroup on this issue) @@ -60470,9 +72356,11 @@ index 85b8e1c3a762d..e2e765a54fe95 100644 + struct bfq_io_cq *bic, + struct bfq_group *bfqg) { - struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0); - struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1); +- struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0); +- struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1); - struct bfq_group *bfqg; ++ struct bfq_queue *async_bfqq = bic_to_bfqq(bic, false); ++ struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, true); struct bfq_entity *entity; - bfqg = bfq_find_set_group(bfqd, blkcg); @@ -60483,7 +72371,11 @@ index 85b8e1c3a762d..e2e765a54fe95 100644 if (async_bfqq) { entity = &async_bfqq->entity; -@@ -727,9 +723,39 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, + if (entity->sched_data != &bfqg->sched_data) { +- bic_set_bfqq(bic, NULL, 0); ++ bic_set_bfqq(bic, NULL, false); + bfq_release_process_ref(bfqd, async_bfqq); + } } if (sync_bfqq) { @@ -60519,14 +72411,14 @@ index 85b8e1c3a762d..e2e765a54fe95 100644 + * request from the old cgroup. + */ + bfq_put_cooperator(sync_bfqq); ++ bic_set_bfqq(bic, NULL, true); + bfq_release_process_ref(bfqd, sync_bfqq); -+ bic_set_bfqq(bic, NULL, 1); + } + } } return bfqg; -@@ -738,20 +764,24 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, +@@ -738,20 +768,24 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) { struct bfq_data *bfqd = bic_to_bfqd(bic); @@ -60556,7 +72448,7 @@ index 85b8e1c3a762d..e2e765a54fe95 100644 /* * Update blkg_path for bfq_log_* functions. We cache this * path, and update it here, for the following -@@ -804,8 +834,6 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) +@@ -804,8 +838,6 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) */ blkg_path(bfqg_to_blkg(bfqg), bfqg->blkg_path, sizeof(bfqg->blkg_path)); bic->blkcg_serial_nr = serial_nr; @@ -60565,7 +72457,7 @@ index 85b8e1c3a762d..e2e765a54fe95 100644 } /** -@@ -933,6 +961,7 @@ static void bfq_pd_offline(struct blkg_policy_data *pd) +@@ -933,6 +965,7 @@ static void bfq_pd_offline(struct blkg_policy_data *pd) put_async_queues: bfq_put_async_queues(bfqd, bfqg); @@ -60573,7 +72465,7 @@ index 85b8e1c3a762d..e2e765a54fe95 100644 spin_unlock_irqrestore(&bfqd->lock, flags); /* -@@ -1422,7 +1451,7 @@ void bfq_end_wr_async(struct bfq_data *bfqd) +@@ -1422,7 +1455,7 @@ void bfq_end_wr_async(struct bfq_data *bfqd) bfq_end_wr_async_queues(bfqd, bfqd->root_group); } @@ -60583,10 +72475,32 @@ index 85b8e1c3a762d..e2e765a54fe95 100644 return bfqd->root_group; } diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c -index 480e1a1348596..4b862f18f4b2e 100644 +index 480e1a1348596..f54554906451e 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c -@@ -2022,9 +2022,7 @@ static void bfq_check_waker(struct bfq_data *bfqd, struct bfq_queue *bfqq, +@@ -386,6 +386,12 @@ static void bfq_put_stable_ref(struct bfq_queue *bfqq); + + void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync) + { ++ struct bfq_queue *old_bfqq = bic->bfqq[is_sync]; ++ ++ /* Clear bic pointer if bfqq is detached from this bic */ ++ if (old_bfqq && old_bfqq->bic == bic) ++ old_bfqq->bic = NULL; ++ + /* + * If bfqq != NULL, then a non-stable queue merge between + * bic->bfqq and bfqq is happening here. This causes troubles +@@ -461,6 +467,8 @@ static struct bfq_io_cq *bfq_bic_lookup(struct bfq_data *bfqd, + */ + void bfq_schedule_dispatch(struct bfq_data *bfqd) + { ++ lockdep_assert_held(&bfqd->lock); ++ + if (bfqd->queued != 0) { + bfq_log(bfqd, "schedule dispatch"); + blk_mq_run_hw_queues(bfqd->queue, true); +@@ -2022,9 +2030,7 @@ static void bfq_check_waker(struct bfq_data *bfqd, struct bfq_queue *bfqq, if (!bfqd->last_completed_rq_bfqq || bfqd->last_completed_rq_bfqq == bfqq || bfq_bfqq_has_short_ttime(bfqq) || @@ -60597,7 +72511,7 @@ index 480e1a1348596..4b862f18f4b2e 100644 return; if (bfqd->last_completed_rq_bfqq != -@@ -2084,7 +2082,7 @@ static void bfq_add_request(struct request *rq) +@@ -2084,7 +2090,7 @@ static void bfq_add_request(struct request *rq) bfqq->queued[rq_is_sync(rq)]++; bfqd->queued++; @@ -60606,7 +72520,7 @@ index 480e1a1348596..4b862f18f4b2e 100644 bfq_check_waker(bfqd, bfqq, now_ns); /* -@@ -2337,10 +2335,17 @@ static bool bfq_bio_merge(struct request_queue *q, struct bio *bio, +@@ -2337,10 +2343,17 @@ static bool bfq_bio_merge(struct request_queue *q, struct bio *bio, spin_lock_irq(&bfqd->lock); @@ -60626,7 +72540,7 @@ index 480e1a1348596..4b862f18f4b2e 100644 bfqd->bio_bic = bic; ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free); -@@ -2370,8 +2375,6 @@ static int bfq_request_merge(struct request_queue *q, struct request **req, +@@ -2370,8 +2383,6 @@ static int bfq_request_merge(struct request_queue *q, struct request **req, return ELEVATOR_NO_MERGE; } @@ -60635,7 +72549,7 @@ index 480e1a1348596..4b862f18f4b2e 100644 static void bfq_request_merged(struct request_queue *q, struct request *req, enum elv_merge type) { -@@ -2380,7 +2383,7 @@ static void bfq_request_merged(struct request_queue *q, struct request *req, +@@ -2380,7 +2391,7 @@ static void bfq_request_merged(struct request_queue *q, struct request *req, blk_rq_pos(req) < blk_rq_pos(container_of(rb_prev(&req->rb_node), struct request, rb_node))) { @@ -60644,7 +72558,7 @@ index 480e1a1348596..4b862f18f4b2e 100644 struct bfq_data *bfqd; struct request *prev, *next_rq; -@@ -2432,8 +2435,8 @@ static void bfq_request_merged(struct request_queue *q, struct request *req, +@@ -2432,8 +2443,8 @@ static void bfq_request_merged(struct request_queue *q, struct request *req, static void bfq_requests_merged(struct request_queue *q, struct request *rq, struct request *next) { @@ -60655,7 +72569,7 @@ index 480e1a1348596..4b862f18f4b2e 100644 if (!bfqq) goto remove; -@@ -2638,6 +2641,14 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) +@@ -2638,6 +2649,14 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) if (process_refs == 0 || new_process_refs == 0) return NULL; @@ -60670,7 +72584,7 @@ index 480e1a1348596..4b862f18f4b2e 100644 bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d", new_bfqq->pid); -@@ -2662,6 +2673,15 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) +@@ -2662,6 +2681,15 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) * are likely to increase the throughput. */ bfqq->new_bfqq = new_bfqq; @@ -60686,7 +72600,7 @@ index 480e1a1348596..4b862f18f4b2e 100644 new_bfqq->ref += process_refs; return new_bfqq; } -@@ -2724,6 +2744,10 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, +@@ -2724,6 +2752,10 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, { struct bfq_queue *in_service_bfqq, *new_bfqq; @@ -60697,7 +72611,7 @@ index 480e1a1348596..4b862f18f4b2e 100644 /* * Check delayed stable merge for rotational or non-queueing * devs. For this branch to be executed, bfqq must not be -@@ -2762,9 +2786,12 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, +@@ -2762,9 +2794,12 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, struct bfq_queue *new_bfqq = bfq_setup_merge(bfqq, stable_merge_bfqq); @@ -60713,7 +72627,7 @@ index 480e1a1348596..4b862f18f4b2e 100644 return new_bfqq; } else return NULL; -@@ -2825,9 +2852,6 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, +@@ -2825,9 +2860,6 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, if (bfq_too_late_for_merging(bfqq)) return NULL; @@ -60723,7 +72637,16 @@ index 480e1a1348596..4b862f18f4b2e 100644 if (!io_struct || unlikely(bfqq == &bfqd->oom_bfqq)) return NULL; -@@ -5061,7 +5085,7 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) +@@ -3014,7 +3046,7 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, + /* + * Merge queues (that is, let bic redirect its requests to new_bfqq) + */ +- bic_set_bfqq(bic, new_bfqq, 1); ++ bic_set_bfqq(bic, new_bfqq, true); + bfq_mark_bfqq_coop(new_bfqq); + /* + * new_bfqq now belongs to at least two bics (it is a shared queue): +@@ -5061,7 +5093,7 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) struct bfq_data *bfqd = hctx->queue->elevator->elevator_data; struct request *rq; struct bfq_queue *in_serv_queue; @@ -60732,7 +72655,7 @@ index 480e1a1348596..4b862f18f4b2e 100644 spin_lock_irq(&bfqd->lock); -@@ -5069,14 +5093,15 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) +@@ -5069,14 +5101,15 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) waiting_rq = in_serv_queue && bfq_bfqq_wait_request(in_serv_queue); rq = __bfq_dispatch_request(hctx); @@ -60754,7 +72677,7 @@ index 480e1a1348596..4b862f18f4b2e 100644 return rq; } -@@ -5173,7 +5198,7 @@ static void bfq_put_stable_ref(struct bfq_queue *bfqq) +@@ -5173,7 +5206,7 @@ static void bfq_put_stable_ref(struct bfq_queue *bfqq) bfq_put_queue(bfqq); } @@ -60763,7 +72686,32 @@ index 480e1a1348596..4b862f18f4b2e 100644 { struct bfq_queue *__bfqq, *next; -@@ -5579,14 +5604,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, +@@ -5218,9 +5251,8 @@ static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync) + unsigned long flags; + + spin_lock_irqsave(&bfqd->lock, flags); +- bfqq->bic = NULL; +- bfq_exit_bfqq(bfqd, bfqq); + bic_set_bfqq(bic, NULL, is_sync); ++ bfq_exit_bfqq(bfqd, bfqq); + spin_unlock_irqrestore(&bfqd->lock, flags); + } + } +@@ -5327,9 +5359,11 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio) + + bfqq = bic_to_bfqq(bic, false); + if (bfqq) { +- bfq_release_process_ref(bfqd, bfqq); +- bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic, true); ++ struct bfq_queue *old_bfqq = bfqq; ++ ++ bfqq = bfq_get_queue(bfqd, bio, false, bic, true); + bic_set_bfqq(bic, bfqq, false); ++ bfq_release_process_ref(bfqd, old_bfqq); + } + + bfqq = bic_to_bfqq(bic, true); +@@ -5579,14 +5613,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, struct bfq_queue *bfqq; struct bfq_group *bfqg; @@ -60779,7 +72727,7 @@ index 480e1a1348596..4b862f18f4b2e 100644 if (!is_sync) { async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class, ioprio); -@@ -5632,8 +5650,6 @@ out: +@@ -5632,8 +5659,6 @@ out: if (bfqq != &bfqd->oom_bfqq && is_sync && !respawn) bfqq = bfq_do_or_sched_stable_merge(bfqd, bfqq, bic); @@ -60788,7 +72736,7 @@ index 480e1a1348596..4b862f18f4b2e 100644 return bfqq; } -@@ -5964,6 +5980,8 @@ static inline void bfq_update_insert_stats(struct request_queue *q, +@@ -5964,6 +5989,8 @@ static inline void bfq_update_insert_stats(struct request_queue *q, unsigned int cmd_flags) {} #endif /* CONFIG_BFQ_CGROUP_DEBUG */ @@ -60797,7 +72745,7 @@ index 480e1a1348596..4b862f18f4b2e 100644 static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, bool at_head) { -@@ -5979,60 +5997,16 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, +@@ -5979,60 +6006,16 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, bfqg_stats_update_legacy_io(q, rq); #endif spin_lock_irq(&bfqd->lock); @@ -60860,7 +72808,7 @@ index 480e1a1348596..4b862f18f4b2e 100644 if (at_head) list_add(&rq->queuelist, &bfqd->dispatch); else -@@ -6059,7 +6033,6 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, +@@ -6059,7 +6042,6 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, * merge). */ cmd_flags = rq->cmd_flags; @@ -60868,7 +72816,7 @@ index 480e1a1348596..4b862f18f4b2e 100644 spin_unlock_irq(&bfqd->lock); bfq_update_insert_stats(q, bfqq, idle_timer_disabled, -@@ -6453,6 +6426,7 @@ static void bfq_finish_requeue_request(struct request *rq) +@@ -6453,6 +6435,7 @@ static void bfq_finish_requeue_request(struct request *rq) bfq_completed_request(bfqq, bfqd); } bfq_finish_requeue_request_body(bfqq); @@ -60876,7 +72824,37 @@ index 480e1a1348596..4b862f18f4b2e 100644 spin_unlock_irqrestore(&bfqd->lock, flags); /* -@@ -6674,6 +6648,7 @@ static struct bfq_queue *bfq_init_rq(struct request *rq) +@@ -6494,7 +6477,7 @@ bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq) + return bfqq; + } + +- bic_set_bfqq(bic, NULL, 1); ++ bic_set_bfqq(bic, NULL, true); + + bfq_put_cooperator(bfqq); + +@@ -6654,6 +6637,12 @@ static struct bfq_queue *bfq_init_rq(struct request *rq) + bfqq = bfq_get_bfqq_handle_split(bfqd, bic, bio, + true, is_sync, + NULL); ++ if (unlikely(bfqq == &bfqd->oom_bfqq)) ++ bfqq_already_existing = true; ++ } else ++ bfqq_already_existing = true; ++ ++ if (!bfqq_already_existing) { + bfqq->waker_bfqq = old_bfqq->waker_bfqq; + bfqq->tentative_waker_bfqq = NULL; + +@@ -6667,13 +6656,13 @@ static struct bfq_queue *bfq_init_rq(struct request *rq) + if (bfqq->waker_bfqq) + hlist_add_head(&bfqq->woken_list_node, + &bfqq->waker_bfqq->woken_list); +- } else +- bfqq_already_existing = true; ++ } + } + } bfqq->allocated++; bfqq->ref++; @@ -60884,7 +72862,17 @@ index 480e1a1348596..4b862f18f4b2e 100644 bfq_log_bfqq(bfqd, bfqq, "get_request %p: bfqq %p, %d", rq, bfqq, bfqq->ref); -@@ -6920,6 +6895,8 @@ static void bfq_exit_queue(struct elevator_queue *e) +@@ -6770,8 +6759,8 @@ bfq_idle_slice_timer_body(struct bfq_data *bfqd, struct bfq_queue *bfqq) + bfq_bfqq_expire(bfqd, bfqq, true, reason); + + schedule_dispatch: +- spin_unlock_irqrestore(&bfqd->lock, flags); + bfq_schedule_dispatch(bfqd); ++ spin_unlock_irqrestore(&bfqd->lock, flags); + } + + /* +@@ -6920,6 +6909,8 @@ static void bfq_exit_queue(struct elevator_queue *e) spin_unlock_irq(&bfqd->lock); #endif @@ -61175,7 +73163,7 @@ index a6fb6a0b42955..ba9120d4fe499 100644 size <<= 9; diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c -index 9a1c5839dd469..ce5858dadca55 100644 +index 9a1c5839dd469..3ee4c1217b636 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -633,6 +633,14 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, @@ -61255,7 +73243,18 @@ index 9a1c5839dd469..ce5858dadca55 100644 } } -@@ -1875,12 +1886,8 @@ EXPORT_SYMBOL_GPL(bio_associate_blkg); +@@ -1349,6 +1360,10 @@ retry: + list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) + pol->pd_init_fn(blkg->pd[pol->plid]); + ++ if (pol->pd_online_fn) ++ list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) ++ pol->pd_online_fn(blkg->pd[pol->plid]); ++ + __set_bit(pol->plid, q->blkcg_pols); + ret = 0; + +@@ -1875,12 +1890,8 @@ EXPORT_SYMBOL_GPL(bio_associate_blkg); */ void bio_clone_blkg_association(struct bio *dst, struct bio *src) { @@ -61271,7 +73270,7 @@ index 9a1c5839dd469..ce5858dadca55 100644 EXPORT_SYMBOL_GPL(bio_clone_blkg_association); diff --git a/block/blk-core.c b/block/blk-core.c -index 4d8f5fe915887..13e1fca1e923f 100644 +index 4d8f5fe915887..ed6271dcc1b16 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -49,6 +49,7 @@ @@ -61342,7 +73341,24 @@ index 4d8f5fe915887..13e1fca1e923f 100644 } /* -@@ -887,10 +887,8 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio) +@@ -698,14 +698,10 @@ static inline bool should_fail_request(struct block_device *part, + static inline bool bio_check_ro(struct bio *bio) + { + if (op_is_write(bio_op(bio)) && bdev_read_only(bio->bi_bdev)) { +- char b[BDEVNAME_SIZE]; +- + if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) + return false; +- +- WARN_ONCE(1, +- "Trying to write to read-only block-device %s (partno %d)\n", +- bio_devname(bio, b), bio->bi_bdev->bd_partno); ++ pr_warn("Trying to write to read-only block-device %pg\n", ++ bio->bi_bdev); + /* Older lvm-tools actually trigger this */ + return false; + } +@@ -887,10 +883,8 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio) if (unlikely(!current->io_context)) create_task_io_context(current, GFP_ATOMIC, q->node); @@ -61354,7 +73370,7 @@ index 4d8f5fe915887..13e1fca1e923f 100644 blk_cgroup_bio_start(bio); blkcg_bio_issue_init(bio); -@@ -1293,20 +1291,32 @@ void blk_account_io_start(struct request *rq) +@@ -1293,20 +1287,32 @@ void blk_account_io_start(struct request *rq) } static unsigned long __part_start_io_acct(struct block_device *part, @@ -61391,7 +73407,7 @@ index 4d8f5fe915887..13e1fca1e923f 100644 /** * bio_start_io_acct - start I/O accounting for bio based drivers -@@ -1316,14 +1326,15 @@ static unsigned long __part_start_io_acct(struct block_device *part, +@@ -1316,14 +1322,15 @@ static unsigned long __part_start_io_acct(struct block_device *part, */ unsigned long bio_start_io_acct(struct bio *bio) { @@ -61764,7 +73780,7 @@ index 4526adde01564..c7f71d83eff18 100644 goto cleanup; diff --git a/block/blk-merge.c b/block/blk-merge.c -index 7a5c81c02c800..bbe66a9010bf1 100644 +index 7a5c81c02c800..d1435b6572977 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -7,6 +7,7 @@ @@ -61775,7 +73791,39 @@ index 7a5c81c02c800..bbe66a9010bf1 100644 #include <trace/events/block.h> -@@ -561,6 +562,9 @@ static inline unsigned int blk_rq_get_max_segments(struct request *rq) +@@ -278,6 +279,16 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, + *segs = nsegs; + return NULL; + split: ++ /* ++ * We can't sanely support splitting for a REQ_NOWAIT bio. End it ++ * with EAGAIN if splitting is required and return an error pointer. ++ */ ++ if (bio->bi_opf & REQ_NOWAIT) { ++ bio->bi_status = BLK_STS_AGAIN; ++ bio_endio(bio); ++ return ERR_PTR(-EAGAIN); ++ } ++ + *segs = nsegs; + + /* +@@ -337,11 +348,13 @@ void __blk_queue_split(struct bio **bio, unsigned int *nr_segs) + break; + } + split = blk_bio_segment_split(q, *bio, &q->bio_split, nr_segs); ++ if (IS_ERR(split)) ++ *bio = split = NULL; + break; + } + + if (split) { +- /* there isn't chance to merge the splitted bio */ ++ /* there isn't chance to merge the split bio */ + split->bi_opf |= REQ_NOMERGE; + + bio_chain(split, *bio); +@@ -561,6 +574,9 @@ static inline unsigned int blk_rq_get_max_segments(struct request *rq) static inline int ll_new_hw_segment(struct request *req, struct bio *bio, unsigned int nr_phys_segs) { @@ -61785,7 +73833,7 @@ index 7a5c81c02c800..bbe66a9010bf1 100644 if (blk_integrity_merge_bio(req->q, req, bio) == false) goto no_merge; -@@ -657,6 +661,9 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, +@@ -657,6 +673,9 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, if (total_phys_segments > blk_rq_get_max_segments(req)) return 0; @@ -61795,7 +73843,7 @@ index 7a5c81c02c800..bbe66a9010bf1 100644 if (blk_integrity_merge_rq(q, req, next) == false) return 0; -@@ -863,6 +870,10 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) +@@ -863,6 +882,10 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) if (rq->rq_disk != bio->bi_bdev->bd_disk) return false; @@ -61844,8 +73892,39 @@ index 0f006cabfd914..35770e33c817a 100644 return ret; } +diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c +index 253c857cba47c..7074ce8d2d03f 100644 +--- a/block/blk-mq-sysfs.c ++++ b/block/blk-mq-sysfs.c +@@ -187,7 +187,7 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx) + { + struct request_queue *q = hctx->queue; + struct blk_mq_ctx *ctx; +- int i, ret; ++ int i, j, ret; + + if (!hctx->nr_ctx) + return 0; +@@ -199,9 +199,16 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx) + hctx_for_each_ctx(hctx, ctx, i) { + ret = kobject_add(&ctx->kobj, &hctx->kobj, "cpu%u", ctx->cpu); + if (ret) +- break; ++ goto out; + } + ++ return 0; ++out: ++ hctx_for_each_ctx(hctx, ctx, j) { ++ if (j < i) ++ kobject_del(&ctx->kobj); ++ } ++ kobject_del(&hctx->kobj); + return ret; + } + diff --git a/block/blk-mq.c b/block/blk-mq.c -index 652a31fc3bb38..1a28ba9017edb 100644 +index 652a31fc3bb38..9f53b4caf9772 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -476,6 +476,8 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, @@ -61929,7 +74008,16 @@ index 652a31fc3bb38..1a28ba9017edb 100644 return BLK_MAX_REQUEST_COUNT; } -@@ -4019,6 +4020,19 @@ unsigned int blk_mq_rq_cpu(struct request *rq) +@@ -2192,6 +2193,8 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) + + blk_queue_bounce(q, &bio); + __blk_queue_split(&bio, &nr_segs); ++ if (!bio) ++ goto queue_exit; + + if (!bio_integrity_prep(bio)) + goto queue_exit; +@@ -4019,6 +4022,19 @@ unsigned int blk_mq_rq_cpu(struct request *rq) } EXPORT_SYMBOL(blk_mq_rq_cpu); @@ -62299,7 +74387,7 @@ index ff45d8388f487..1b5e57f6115f3 100644 if (err) { diff --git a/block/genhd.c b/block/genhd.c -index ab12ae6e636e8..74e19d67ceab5 100644 +index ab12ae6e636e8..a1d9e785dcf70 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -19,6 +19,7 @@ @@ -62371,7 +74459,18 @@ index ab12ae6e636e8..74e19d67ceab5 100644 ret = bd_register_pending_holders(disk); if (ret < 0) -@@ -530,8 +538,6 @@ out_del_block_link: +@@ -519,8 +527,10 @@ out_unregister_bdi: + bdi_unregister(disk->bdi); + out_unregister_queue: + blk_unregister_queue(disk); ++ rq_qos_exit(disk->queue); + out_put_slave_dir: + kobject_put(disk->slave_dir); ++ disk->slave_dir = NULL; + out_put_holder_dir: + kobject_put(disk->part0->bd_holder_dir); + out_del_integrity: +@@ -530,8 +540,6 @@ out_del_block_link: sysfs_remove_link(block_depr, dev_name(ddev)); out_device_del: device_del(ddev); @@ -62380,7 +74479,7 @@ index ab12ae6e636e8..74e19d67ceab5 100644 out_free_ext_minor: if (disk->major == BLOCK_EXT_MAJOR) blk_free_ext_minor(disk->first_minor); -@@ -539,6 +545,20 @@ out_free_ext_minor: +@@ -539,6 +547,20 @@ out_free_ext_minor: } EXPORT_SYMBOL(device_add_disk); @@ -62401,7 +74500,15 @@ index ab12ae6e636e8..74e19d67ceab5 100644 /** * del_gendisk - remove the gendisk * @disk: the struct gendisk to remove -@@ -1082,6 +1102,8 @@ static void disk_release(struct device *dev) +@@ -603,6 +625,7 @@ void del_gendisk(struct gendisk *disk) + + kobject_put(disk->part0->bd_holder_dir); + kobject_put(disk->slave_dir); ++ disk->slave_dir = NULL; + + part_stat_set_all(disk->part0, 0); + disk->part0->bd_stamp = 0; +@@ -1082,6 +1105,8 @@ static void disk_release(struct device *dev) might_sleep(); WARN_ON_ONCE(disk_live(disk)); @@ -62542,10 +74649,152 @@ index 0e4ff245f2bf2..6c830154856fc 100644 case IOPRIO_WHO_USER: uid = make_kuid(current_user_ns(), who); diff --git a/block/mq-deadline.c b/block/mq-deadline.c -index 7f3c3932b723e..cd2342d297048 100644 +index 7f3c3932b723e..aaef5088a3baf 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c -@@ -811,7 +811,7 @@ SHOW_JIFFIES(deadline_read_expire_show, dd->fifo_expire[DD_READ]); +@@ -153,6 +153,20 @@ static u8 dd_rq_ioclass(struct request *rq) + return IOPRIO_PRIO_CLASS(req_get_ioprio(rq)); + } + ++/* ++ * get the request before `rq' in sector-sorted order ++ */ ++static inline struct request * ++deadline_earlier_request(struct request *rq) ++{ ++ struct rb_node *node = rb_prev(&rq->rb_node); ++ ++ if (node) ++ return rb_entry_rq(node); ++ ++ return NULL; ++} ++ + /* + * get the request after `rq' in sector-sorted order + */ +@@ -288,6 +302,39 @@ static inline int deadline_check_fifo(struct dd_per_prio *per_prio, + return 0; + } + ++/* ++ * Check if rq has a sequential request preceding it. ++ */ ++static bool deadline_is_seq_write(struct deadline_data *dd, struct request *rq) ++{ ++ struct request *prev = deadline_earlier_request(rq); ++ ++ if (!prev) ++ return false; ++ ++ return blk_rq_pos(prev) + blk_rq_sectors(prev) == blk_rq_pos(rq); ++} ++ ++/* ++ * Skip all write requests that are sequential from @rq, even if we cross ++ * a zone boundary. ++ */ ++static struct request *deadline_skip_seq_writes(struct deadline_data *dd, ++ struct request *rq) ++{ ++ sector_t pos = blk_rq_pos(rq); ++ sector_t skipped_sectors = 0; ++ ++ while (rq) { ++ if (blk_rq_pos(rq) != pos + skipped_sectors) ++ break; ++ skipped_sectors += blk_rq_sectors(rq); ++ rq = deadline_latter_request(rq); ++ } ++ ++ return rq; ++} ++ + /* + * For the specified data direction, return the next request to + * dispatch using arrival ordered lists. +@@ -308,11 +355,16 @@ deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio, + + /* + * Look for a write request that can be dispatched, that is one with +- * an unlocked target zone. ++ * an unlocked target zone. For some HDDs, breaking a sequential ++ * write stream can lead to lower throughput, so make sure to preserve ++ * sequential write streams, even if that stream crosses into the next ++ * zones and these zones are unlocked. + */ + spin_lock_irqsave(&dd->zone_lock, flags); + list_for_each_entry(rq, &per_prio->fifo_list[DD_WRITE], queuelist) { +- if (blk_req_can_dispatch_to_zone(rq)) ++ if (blk_req_can_dispatch_to_zone(rq) && ++ (blk_queue_nonrot(rq->q) || ++ !deadline_is_seq_write(dd, rq))) + goto out; + } + rq = NULL; +@@ -342,13 +394,19 @@ deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio, + + /* + * Look for a write request that can be dispatched, that is one with +- * an unlocked target zone. ++ * an unlocked target zone. For some HDDs, breaking a sequential ++ * write stream can lead to lower throughput, so make sure to preserve ++ * sequential write streams, even if that stream crosses into the next ++ * zones and these zones are unlocked. + */ + spin_lock_irqsave(&dd->zone_lock, flags); + while (rq) { + if (blk_req_can_dispatch_to_zone(rq)) + break; +- rq = deadline_latter_request(rq); ++ if (blk_queue_nonrot(rq->q)) ++ rq = deadline_latter_request(rq); ++ else ++ rq = deadline_skip_seq_writes(dd, rq); + } + spin_unlock_irqrestore(&dd->zone_lock, flags); + +@@ -733,6 +791,18 @@ static void dd_prepare_request(struct request *rq) + rq->elv.priv[0] = NULL; + } + ++static bool dd_has_write_work(struct blk_mq_hw_ctx *hctx) ++{ ++ struct deadline_data *dd = hctx->queue->elevator->elevator_data; ++ enum dd_prio p; ++ ++ for (p = 0; p <= DD_PRIO_MAX; p++) ++ if (!list_empty_careful(&dd->per_prio[p].fifo_list[DD_WRITE])) ++ return true; ++ ++ return false; ++} ++ + /* + * Callback from inside blk_mq_free_request(). + * +@@ -755,7 +825,6 @@ static void dd_finish_request(struct request *rq) + struct deadline_data *dd = q->elevator->elevator_data; + const u8 ioprio_class = dd_rq_ioclass(rq); + const enum dd_prio prio = ioprio_class_to_prio[ioprio_class]; +- struct dd_per_prio *per_prio = &dd->per_prio[prio]; + + /* + * The block layer core may call dd_finish_request() without having +@@ -771,9 +840,10 @@ static void dd_finish_request(struct request *rq) + + spin_lock_irqsave(&dd->zone_lock, flags); + blk_req_zone_write_unlock(rq); +- if (!list_empty(&per_prio->fifo_list[DD_WRITE])) +- blk_mq_sched_mark_restart_hctx(rq->mq_hctx); + spin_unlock_irqrestore(&dd->zone_lock, flags); ++ ++ if (dd_has_write_work(rq->mq_hctx)) ++ blk_mq_sched_mark_restart_hctx(rq->mq_hctx); + } + } + +@@ -811,7 +881,7 @@ SHOW_JIFFIES(deadline_read_expire_show, dd->fifo_expire[DD_READ]); SHOW_JIFFIES(deadline_write_expire_show, dd->fifo_expire[DD_WRITE]); SHOW_INT(deadline_writes_starved_show, dd->writes_starved); SHOW_INT(deadline_front_merges_show, dd->front_merges); @@ -62554,7 +74803,7 @@ index 7f3c3932b723e..cd2342d297048 100644 SHOW_INT(deadline_fifo_batch_show, dd->fifo_batch); #undef SHOW_INT #undef SHOW_JIFFIES -@@ -840,7 +840,7 @@ STORE_JIFFIES(deadline_read_expire_store, &dd->fifo_expire[DD_READ], 0, INT_MAX) +@@ -840,7 +910,7 @@ STORE_JIFFIES(deadline_read_expire_store, &dd->fifo_expire[DD_READ], 0, INT_MAX) STORE_JIFFIES(deadline_write_expire_store, &dd->fifo_expire[DD_WRITE], 0, INT_MAX); STORE_INT(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX); STORE_INT(deadline_front_merges_store, &dd->front_merges, 0, 1); @@ -62575,6 +74824,72 @@ index 7bea19dd9458f..b9e9af84f5188 100644 #include <linux/slab.h> #include <linux/ctype.h> #include <linux/genhd.h> +diff --git a/block/sed-opal.c b/block/sed-opal.c +index daafadbb88cae..0ac5a4f3f2261 100644 +--- a/block/sed-opal.c ++++ b/block/sed-opal.c +@@ -88,8 +88,8 @@ struct opal_dev { + u64 lowest_lba; + + size_t pos; +- u8 cmd[IO_BUFFER_LENGTH]; +- u8 resp[IO_BUFFER_LENGTH]; ++ u8 *cmd; ++ u8 *resp; + + struct parsed_resp parsed; + size_t prev_d_len; +@@ -2134,6 +2134,8 @@ void free_opal_dev(struct opal_dev *dev) + return; + + clean_opal_dev(dev); ++ kfree(dev->resp); ++ kfree(dev->cmd); + kfree(dev); + } + EXPORT_SYMBOL(free_opal_dev); +@@ -2146,17 +2148,39 @@ struct opal_dev *init_opal_dev(void *data, sec_send_recv *send_recv) + if (!dev) + return NULL; + ++ /* ++ * Presumably DMA-able buffers must be cache-aligned. Kmalloc makes ++ * sure the allocated buffer is DMA-safe in that regard. ++ */ ++ dev->cmd = kmalloc(IO_BUFFER_LENGTH, GFP_KERNEL); ++ if (!dev->cmd) ++ goto err_free_dev; ++ ++ dev->resp = kmalloc(IO_BUFFER_LENGTH, GFP_KERNEL); ++ if (!dev->resp) ++ goto err_free_cmd; ++ + INIT_LIST_HEAD(&dev->unlk_lst); + mutex_init(&dev->dev_lock); + dev->data = data; + dev->send_recv = send_recv; + if (check_opal_support(dev) != 0) { + pr_debug("Opal is not supported on this device\n"); +- kfree(dev); +- return NULL; ++ goto err_free_resp; + } + + return dev; ++ ++err_free_resp: ++ kfree(dev->resp); ++ ++err_free_cmd: ++ kfree(dev->cmd); ++ ++err_free_dev: ++ kfree(dev); ++ ++ return NULL; + } + EXPORT_SYMBOL(init_opal_dev); + diff --git a/certs/blacklist_hashes.c b/certs/blacklist_hashes.c index 344892337be07..d5961aa3d3380 100644 --- a/certs/blacklist_hashes.c @@ -63030,7 +75345,7 @@ index 72fe480f9bd67..0000000000000 -MODULE_ALIAS_CRYPTO("blake2s-256-generic"); -MODULE_LICENSE("GPL v2"); diff --git a/crypto/cryptd.c b/crypto/cryptd.c -index a1bea0f4baa88..668095eca0faf 100644 +index a1bea0f4baa88..ca3a40fc7da91 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -39,6 +39,10 @@ struct cryptd_cpu_queue { @@ -63044,7 +75359,21 @@ index a1bea0f4baa88..668095eca0faf 100644 struct cryptd_cpu_queue __percpu *cpu_queue; }; -@@ -125,28 +129,28 @@ static void cryptd_fini_queue(struct cryptd_queue *queue) +@@ -64,11 +68,12 @@ struct aead_instance_ctx { + + struct cryptd_skcipher_ctx { + refcount_t refcnt; +- struct crypto_sync_skcipher *child; ++ struct crypto_skcipher *child; + }; + + struct cryptd_skcipher_request_ctx { + crypto_completion_t complete; ++ struct skcipher_request req; + }; + + struct cryptd_hash_ctx { +@@ -125,28 +130,28 @@ static void cryptd_fini_queue(struct cryptd_queue *queue) static int cryptd_enqueue_request(struct cryptd_queue *queue, struct crypto_async_request *request) { @@ -63080,7 +75409,7 @@ index a1bea0f4baa88..668095eca0faf 100644 return err; } -@@ -162,15 +166,10 @@ static void cryptd_queue_worker(struct work_struct *work) +@@ -162,15 +167,10 @@ static void cryptd_queue_worker(struct work_struct *work) cpu_queue = container_of(work, struct cryptd_cpu_queue, work); /* * Only handle one request at a time to avoid hogging crypto workqueue. @@ -63096,6 +75425,91 @@ index a1bea0f4baa88..668095eca0faf 100644 local_bh_enable(); if (!req) +@@ -228,13 +228,13 @@ static int cryptd_skcipher_setkey(struct crypto_skcipher *parent, + const u8 *key, unsigned int keylen) + { + struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(parent); +- struct crypto_sync_skcipher *child = ctx->child; ++ struct crypto_skcipher *child = ctx->child; + +- crypto_sync_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); +- crypto_sync_skcipher_set_flags(child, +- crypto_skcipher_get_flags(parent) & +- CRYPTO_TFM_REQ_MASK); +- return crypto_sync_skcipher_setkey(child, key, keylen); ++ crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); ++ crypto_skcipher_set_flags(child, ++ crypto_skcipher_get_flags(parent) & ++ CRYPTO_TFM_REQ_MASK); ++ return crypto_skcipher_setkey(child, key, keylen); + } + + static void cryptd_skcipher_complete(struct skcipher_request *req, int err) +@@ -259,13 +259,13 @@ static void cryptd_skcipher_encrypt(struct crypto_async_request *base, + struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); +- struct crypto_sync_skcipher *child = ctx->child; +- SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, child); ++ struct skcipher_request *subreq = &rctx->req; ++ struct crypto_skcipher *child = ctx->child; + + if (unlikely(err == -EINPROGRESS)) + goto out; + +- skcipher_request_set_sync_tfm(subreq, child); ++ skcipher_request_set_tfm(subreq, child); + skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP, + NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, +@@ -287,13 +287,13 @@ static void cryptd_skcipher_decrypt(struct crypto_async_request *base, + struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); +- struct crypto_sync_skcipher *child = ctx->child; +- SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, child); ++ struct skcipher_request *subreq = &rctx->req; ++ struct crypto_skcipher *child = ctx->child; + + if (unlikely(err == -EINPROGRESS)) + goto out; + +- skcipher_request_set_sync_tfm(subreq, child); ++ skcipher_request_set_tfm(subreq, child); + skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP, + NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, +@@ -344,9 +344,10 @@ static int cryptd_skcipher_init_tfm(struct crypto_skcipher *tfm) + if (IS_ERR(cipher)) + return PTR_ERR(cipher); + +- ctx->child = (struct crypto_sync_skcipher *)cipher; ++ ctx->child = cipher; + crypto_skcipher_set_reqsize( +- tfm, sizeof(struct cryptd_skcipher_request_ctx)); ++ tfm, sizeof(struct cryptd_skcipher_request_ctx) + ++ crypto_skcipher_reqsize(cipher)); + return 0; + } + +@@ -354,7 +355,7 @@ static void cryptd_skcipher_exit_tfm(struct crypto_skcipher *tfm) + { + struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); + +- crypto_free_sync_skcipher(ctx->child); ++ crypto_free_skcipher(ctx->child); + } + + static void cryptd_skcipher_free(struct skcipher_instance *inst) +@@ -932,7 +933,7 @@ struct crypto_skcipher *cryptd_skcipher_child(struct cryptd_skcipher *tfm) + { + struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base); + +- return &ctx->child->base; ++ return ctx->child; + } + EXPORT_SYMBOL_GPL(cryptd_skcipher_child); + diff --git a/crypto/drbg.c b/crypto/drbg.c index ea85d4a0fe9e9..761104e93d44a 100644 --- a/crypto/drbg.c @@ -63625,10 +76039,26 @@ index 8ac3e73e8ea65..9d804831c8b3f 100644 hash_name = crypto_attr_alg_name(tb[2]); if (IS_ERR(hash_name)) { diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c -index 82b0400985a51..3362897bf61b9 100644 +index 82b0400985a51..4ada7e7493904 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c -@@ -1333,7 +1333,7 @@ static void test_mb_skcipher_speed(const char *algo, int enc, int secs, +@@ -1295,15 +1295,6 @@ static void test_mb_skcipher_speed(const char *algo, int enc, int secs, + goto out_free_tfm; + } + +- +- for (i = 0; i < num_mb; ++i) +- if (testmgr_alloc_buf(data[i].xbuf)) { +- while (i--) +- testmgr_free_buf(data[i].xbuf); +- goto out_free_tfm; +- } +- +- + for (i = 0; i < num_mb; ++i) { + data[i].req = skcipher_request_alloc(tfm, GFP_KERNEL); + if (!data[i].req) { +@@ -1333,7 +1324,7 @@ static void test_mb_skcipher_speed(const char *algo, int enc, int secs, if (bs > XBUFSIZE * PAGE_SIZE) { pr_err("template (%u) too big for buffer (%lu)\n", @@ -63637,7 +76067,7 @@ index 82b0400985a51..3362897bf61b9 100644 goto out; } -@@ -1386,8 +1386,7 @@ static void test_mb_skcipher_speed(const char *algo, int enc, int secs, +@@ -1386,8 +1377,7 @@ static void test_mb_skcipher_speed(const char *algo, int enc, int secs, memset(cur->xbuf[p], 0xff, k); skcipher_request_set_crypt(cur->req, cur->sg, @@ -63647,7 +76077,7 @@ index 82b0400985a51..3362897bf61b9 100644 } if (secs) { -@@ -1864,10 +1863,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) +@@ -1864,10 +1854,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) ret += tcrypt_test("rmd160"); break; @@ -63658,7 +76088,7 @@ index 82b0400985a51..3362897bf61b9 100644 case 42: ret += tcrypt_test("blake2b-512"); break; -@@ -2435,10 +2430,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) +@@ -2435,10 +2421,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) test_hash_speed("rmd160", sec, generic_hash_speed_template); if (mode > 300 && mode < 400) break; fallthrough; @@ -63669,7 +76099,7 @@ index 82b0400985a51..3362897bf61b9 100644 case 317: test_hash_speed("blake2b-512", sec, generic_hash_speed_template); if (mode > 300 && mode < 400) break; -@@ -2547,10 +2538,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) +@@ -2547,10 +2529,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) test_ahash_speed("rmd160", sec, generic_hash_speed_template); if (mode > 400 && mode < 500) break; fallthrough; @@ -63964,6 +76394,19 @@ index be5d40ae14882..a110338c860c7 100644 obj-$(CONFIG_VDPA) += vdpa/ obj-$(CONFIG_XEN) += xen/ +diff --git a/drivers/accessibility/speakup/main.c b/drivers/accessibility/speakup/main.c +index d726537fa16ce..7b2016534162c 100644 +--- a/drivers/accessibility/speakup/main.c ++++ b/drivers/accessibility/speakup/main.c +@@ -1778,7 +1778,7 @@ static void speakup_con_update(struct vc_data *vc) + { + unsigned long flags; + +- if (!speakup_console[vc->vc_num] || spk_parked) ++ if (!speakup_console[vc->vc_num] || spk_parked || !synth) + return; + if (!spin_trylock_irqsave(&speakup_info.spinlock, flags)) + /* Speakup output, discard */ diff --git a/drivers/accessibility/speakup/speakup_dectlk.c b/drivers/accessibility/speakup/speakup_dectlk.c index 580ec796816bc..78ca4987e619e 100644 --- a/drivers/accessibility/speakup/speakup_dectlk.c @@ -63977,7 +76420,7 @@ index 580ec796816bc..78ca4987e619e 100644 { VOL, .u.n = {"[:dv g5 %d] ", 86, 60, 86, 0, 0, NULL } }, { PUNCT, .u.n = {"[:pu %c] ", 0, 0, 2, 0, 0, "nsa" } }, diff --git a/drivers/accessibility/speakup/spk_ttyio.c b/drivers/accessibility/speakup/spk_ttyio.c -index 0d1f397cd8961..08cf8a17754bb 100644 +index 0d1f397cd8961..07373b3debd1e 100644 --- a/drivers/accessibility/speakup/spk_ttyio.c +++ b/drivers/accessibility/speakup/spk_ttyio.c @@ -88,7 +88,7 @@ static int spk_ttyio_receive_buf2(struct tty_struct *tty, @@ -63998,6 +76441,16 @@ index 0d1f397cd8961..08cf8a17754bb 100644 return rv; } +@@ -354,6 +354,9 @@ void spk_ttyio_release(struct spk_synth *in_synth) + { + struct tty_struct *tty = in_synth->dev; + ++ if (tty == NULL) ++ return; ++ + tty_lock(tty); + + if (tty->ops->close) diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c index b0cb662233f1a..81aff651a0d49 100644 --- a/drivers/acpi/ac.c @@ -64252,6 +76705,41 @@ index d41b810e367c4..4366d36ef1198 100644 /***************************************************************************** * +diff --git a/drivers/acpi/acpica/dsmethod.c b/drivers/acpi/acpica/dsmethod.c +index 8e011e59b9b48..ee1832ba39a24 100644 +--- a/drivers/acpi/acpica/dsmethod.c ++++ b/drivers/acpi/acpica/dsmethod.c +@@ -517,7 +517,7 @@ acpi_ds_call_control_method(struct acpi_thread_state *thread, + info = ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_evaluate_info)); + if (!info) { + status = AE_NO_MEMORY; +- goto cleanup; ++ goto pop_walk_state; + } + + info->parameters = &this_walk_state->operands[0]; +@@ -529,7 +529,7 @@ acpi_ds_call_control_method(struct acpi_thread_state *thread, + + ACPI_FREE(info); + if (ACPI_FAILURE(status)) { +- goto cleanup; ++ goto pop_walk_state; + } + + next_walk_state->method_nesting_depth = +@@ -575,6 +575,12 @@ acpi_ds_call_control_method(struct acpi_thread_state *thread, + + return_ACPI_STATUS(status); + ++pop_walk_state: ++ ++ /* On error, pop the walk state to be deleted from thread */ ++ ++ acpi_ds_pop_walk_state(thread); ++ + cleanup: + + /* On error, we must terminate the method properly */ diff --git a/drivers/acpi/acpica/exfield.c b/drivers/acpi/acpica/exfield.c index 06f3c9df1e22d..8618500f23b39 100644 --- a/drivers/acpi/acpica/exfield.c @@ -64408,6 +76896,24 @@ index 915c2433463d7..e7c30ce06e189 100644 } /* Null child means "get first node" */ +diff --git a/drivers/acpi/acpica/utcopy.c b/drivers/acpi/acpica/utcopy.c +index d9877153f4001..fdd503bb69c47 100644 +--- a/drivers/acpi/acpica/utcopy.c ++++ b/drivers/acpi/acpica/utcopy.c +@@ -916,13 +916,6 @@ acpi_ut_copy_ipackage_to_ipackage(union acpi_operand_object *source_obj, + status = acpi_ut_walk_package_tree(source_obj, dest_obj, + acpi_ut_copy_ielement_to_ielement, + walk_state); +- if (ACPI_FAILURE(status)) { +- +- /* On failure, delete the destination package object */ +- +- acpi_ut_remove_reference(dest_obj); +- } +- + return_ACPI_STATUS(status); + } + diff --git a/drivers/acpi/acpica/utdelete.c b/drivers/acpi/acpica/utdelete.c index e5ba9795ec696..8d7736d2d2699 100644 --- a/drivers/acpi/acpica/utdelete.c @@ -64528,9 +77034,18 @@ index 242f3c2d55330..698d67cee0527 100644 __setup("erst_disable", setup_erst_disable); diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c -index 0c8330ed1ffd5..d490670f8d55a 100644 +index 0c8330ed1ffd5..8678e162181f4 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c +@@ -163,7 +163,7 @@ static void ghes_unmap(void __iomem *vaddr, enum fixed_addresses fixmap_idx) + clear_fixmap(fixmap_idx); + } + +-int ghes_estatus_pool_init(int num_ghes) ++int ghes_estatus_pool_init(unsigned int num_ghes) + { + unsigned long addr, len; + int rc; @@ -985,7 +985,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work) ghes_estatus_cache_add(generic, estatus); } @@ -65215,6 +77730,79 @@ index d91b560e88674..54b2be94d23dc 100644 bool busy_polling; unsigned int polling_guard; }; +diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c +index 7dd80acf92c78..2575d6c51f898 100644 +--- a/drivers/acpi/nfit/core.c ++++ b/drivers/acpi/nfit/core.c +@@ -3676,8 +3676,8 @@ void acpi_nfit_shutdown(void *data) + + mutex_lock(&acpi_desc->init_mutex); + set_bit(ARS_CANCEL, &acpi_desc->scrub_flags); +- cancel_delayed_work_sync(&acpi_desc->dwork); + mutex_unlock(&acpi_desc->init_mutex); ++ cancel_delayed_work_sync(&acpi_desc->dwork); + + /* + * Bounce the nvdimm bus lock to make sure any in-flight +diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c +index c3d783aca196f..b42653707fdcd 100644 +--- a/drivers/acpi/numa/hmat.c ++++ b/drivers/acpi/numa/hmat.c +@@ -563,17 +563,26 @@ static int initiator_cmp(void *priv, const struct list_head *a, + { + struct memory_initiator *ia; + struct memory_initiator *ib; +- unsigned long *p_nodes = priv; + + ia = list_entry(a, struct memory_initiator, node); + ib = list_entry(b, struct memory_initiator, node); + +- set_bit(ia->processor_pxm, p_nodes); +- set_bit(ib->processor_pxm, p_nodes); +- + return ia->processor_pxm - ib->processor_pxm; + } + ++static int initiators_to_nodemask(unsigned long *p_nodes) ++{ ++ struct memory_initiator *initiator; ++ ++ if (list_empty(&initiators)) ++ return -ENXIO; ++ ++ list_for_each_entry(initiator, &initiators, node) ++ set_bit(initiator->processor_pxm, p_nodes); ++ ++ return 0; ++} ++ + static void hmat_register_target_initiators(struct memory_target *target) + { + static DECLARE_BITMAP(p_nodes, MAX_NUMNODES); +@@ -610,7 +619,10 @@ static void hmat_register_target_initiators(struct memory_target *target) + * initiators. + */ + bitmap_zero(p_nodes, MAX_NUMNODES); +- list_sort(p_nodes, &initiators, initiator_cmp); ++ list_sort(NULL, &initiators, initiator_cmp); ++ if (initiators_to_nodemask(p_nodes) < 0) ++ return; ++ + if (!access0done) { + for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) { + loc = localities_types[i]; +@@ -644,8 +656,9 @@ static void hmat_register_target_initiators(struct memory_target *target) + + /* Access 1 ignores Generic Initiators */ + bitmap_zero(p_nodes, MAX_NUMNODES); +- list_sort(p_nodes, &initiators, initiator_cmp); +- best = 0; ++ if (initiators_to_nodemask(p_nodes) < 0) ++ return; ++ + for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) { + loc = localities_types[i]; + if (!loc) diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c index 53cab975f612c..63b98eae5e75e 100644 --- a/drivers/acpi/pci_mcfg.c @@ -65460,11 +78048,70 @@ index f0ed4414edb1f..c95eedd58f5bf 100644 } out: +diff --git a/drivers/acpi/prmt.c b/drivers/acpi/prmt.c +index 89c22bc550570..09c0af8a46f0a 100644 +--- a/drivers/acpi/prmt.c ++++ b/drivers/acpi/prmt.c +@@ -219,6 +219,11 @@ static acpi_status acpi_platformrt_space_handler(u32 function, + efi_status_t status; + struct prm_context_buffer context; + ++ if (!efi_enabled(EFI_RUNTIME_SERVICES)) { ++ pr_err_ratelimited("PRM: EFI runtime services no longer available\n"); ++ return AE_NO_HANDLER; ++ } ++ + /* + * The returned acpi_status will always be AE_OK. Error values will be + * saved in the first byte of the PRM message buffer to be used by ASL. +@@ -308,6 +313,11 @@ void __init init_prmt(void) + + pr_info("PRM: found %u modules\n", mc); + ++ if (!efi_enabled(EFI_RUNTIME_SERVICES)) { ++ pr_err("PRM: EFI runtime services unavailable\n"); ++ return; ++ } ++ + status = acpi_install_address_space_handler(ACPI_ROOT_OBJECT, + ACPI_ADR_SPACE_PLATFORM_RT, + &acpi_platformrt_space_handler, diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c -index f37fba9e5ba0b..dc880dad2ade5 100644 +index f37fba9e5ba0b..e9116db1e3527 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c -@@ -604,7 +604,7 @@ static DEFINE_RAW_SPINLOCK(c3_lock); +@@ -531,10 +531,27 @@ static void wait_for_freeze(void) + /* No delay is needed if we are in guest */ + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + return; ++ /* ++ * Modern (>=Nehalem) Intel systems use ACPI via intel_idle, ++ * not this code. Assume that any Intel systems using this ++ * are ancient and may need the dummy wait. This also assumes ++ * that the motivating chipset issue was Intel-only. ++ */ ++ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) ++ return; + #endif +- /* Dummy wait op - must do something useless after P_LVL2 read +- because chipsets cannot guarantee that STPCLK# signal +- gets asserted in time to freeze execution properly. */ ++ /* ++ * Dummy wait op - must do something useless after P_LVL2 read ++ * because chipsets cannot guarantee that STPCLK# signal gets ++ * asserted in time to freeze execution properly ++ * ++ * This workaround has been in place since the original ACPI ++ * implementation was merged, circa 2002. ++ * ++ * If a profile is pointing to this instruction, please first ++ * consider moving your system to a more modern idle ++ * mechanism. ++ */ + inl(acpi_gbl_FADT.xpm_timer_block.address); + } + +@@ -604,7 +621,7 @@ static DEFINE_RAW_SPINLOCK(c3_lock); * @cx: Target state context * @index: index of target state */ @@ -65473,7 +78120,7 @@ index f37fba9e5ba0b..dc880dad2ade5 100644 struct acpi_processor *pr, struct acpi_processor_cx *cx, int index) -@@ -661,7 +661,7 @@ static int acpi_idle_enter_bm(struct cpuidle_driver *drv, +@@ -661,7 +678,7 @@ static int acpi_idle_enter_bm(struct cpuidle_driver *drv, return index; } @@ -65482,7 +78129,7 @@ index f37fba9e5ba0b..dc880dad2ade5 100644 struct cpuidle_driver *drv, int index) { struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu); -@@ -690,7 +690,7 @@ static int acpi_idle_enter(struct cpuidle_device *dev, +@@ -690,7 +707,7 @@ static int acpi_idle_enter(struct cpuidle_device *dev, return index; } @@ -65491,7 +78138,7 @@ index f37fba9e5ba0b..dc880dad2ade5 100644 struct cpuidle_driver *drv, int index) { struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu); -@@ -789,9 +789,11 @@ static int acpi_processor_setup_cstates(struct acpi_processor *pr) +@@ -789,9 +806,11 @@ static int acpi_processor_setup_cstates(struct acpi_processor *pr) state->enter = acpi_idle_enter; state->flags = 0; @@ -65505,7 +78152,7 @@ index f37fba9e5ba0b..dc880dad2ade5 100644 } /* * Halt-induced C1 is not good for ->enter_s2idle, because it -@@ -1075,6 +1077,11 @@ static int flatten_lpi_states(struct acpi_processor *pr, +@@ -1075,6 +1094,11 @@ static int flatten_lpi_states(struct acpi_processor *pr, return 0; } @@ -65517,7 +78164,7 @@ index f37fba9e5ba0b..dc880dad2ade5 100644 static int acpi_processor_get_lpi_info(struct acpi_processor *pr) { int ret, i; -@@ -1083,6 +1090,11 @@ static int acpi_processor_get_lpi_info(struct acpi_processor *pr) +@@ -1083,6 +1107,11 @@ static int acpi_processor_get_lpi_info(struct acpi_processor *pr) struct acpi_device *d = NULL; struct acpi_lpi_states_array info[2], *tmp, *prev, *curr; @@ -65529,7 +78176,7 @@ index f37fba9e5ba0b..dc880dad2ade5 100644 if (!osc_pc_lpi_support_confirmed) return -EOPNOTSUPP; -@@ -1134,11 +1146,6 @@ static int acpi_processor_get_lpi_info(struct acpi_processor *pr) +@@ -1134,11 +1163,6 @@ static int acpi_processor_get_lpi_info(struct acpi_processor *pr) return 0; } @@ -65644,7 +78291,7 @@ index e312ebaed8db4..488915328646e 100644 return NULL; diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c -index ee78a210c6068..19358a641610d 100644 +index ee78a210c6068..33921949bd8fd 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -16,6 +16,7 @@ @@ -65655,7 +78302,7 @@ index ee78a210c6068..19358a641610d 100644 #ifdef CONFIG_X86 #define valid_IRQ(i) (((i) != 0) && ((i) != 2)) -@@ -380,9 +381,68 @@ unsigned int acpi_dev_get_irq_type(int triggering, int polarity) +@@ -380,9 +381,120 @@ unsigned int acpi_dev_get_irq_type(int triggering, int polarity) } EXPORT_SYMBOL_GPL(acpi_dev_get_irq_type); @@ -65677,16 +78324,68 @@ index ee78a210c6068..19358a641610d 100644 + { } +}; + ++static const struct dmi_system_id asus_laptop[] = { ++ { ++ .ident = "Asus Vivobook K3402ZA", ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), ++ DMI_MATCH(DMI_BOARD_NAME, "K3402ZA"), ++ }, ++ }, ++ { ++ .ident = "Asus Vivobook K3502ZA", ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), ++ DMI_MATCH(DMI_BOARD_NAME, "K3502ZA"), ++ }, ++ }, ++ { } ++}; ++ ++static const struct dmi_system_id lenovo_laptop[] = { ++ { ++ .ident = "LENOVO IdeaPad Flex 5 14ALC7", ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "82R9"), ++ }, ++ }, ++ { ++ .ident = "LENOVO IdeaPad Flex 5 16ALC7", ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "82RA"), ++ }, ++ }, ++ { } ++}; ++ ++static const struct dmi_system_id schenker_gm_rg[] = { ++ { ++ .ident = "XMG CORE 15 (M22)", ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "SchenkerTechnologiesGmbH"), ++ DMI_MATCH(DMI_BOARD_NAME, "GMxRGxx"), ++ }, ++ }, ++ { } ++}; ++ +struct irq_override_cmp { + const struct dmi_system_id *system; + unsigned char irq; + unsigned char triggering; + unsigned char polarity; + unsigned char shareable; ++ bool override; +}; + -+static const struct irq_override_cmp skip_override_table[] = { -+ { medion_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0 }, ++static const struct irq_override_cmp override_table[] = { ++ { medion_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false }, ++ { asus_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false }, ++ { lenovo_laptop, 6, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, true }, ++ { lenovo_laptop, 10, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, true }, ++ { schenker_gm_rg, 1, ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_LOW, 1, true }, +}; + +static bool acpi_dev_irq_override(u32 gsi, u8 triggering, u8 polarity, @@ -65694,6 +78393,17 @@ index ee78a210c6068..19358a641610d 100644 +{ + int i; + ++ for (i = 0; i < ARRAY_SIZE(override_table); i++) { ++ const struct irq_override_cmp *entry = &override_table[i]; ++ ++ if (dmi_check_system(entry->system) && ++ entry->irq == gsi && ++ entry->triggering == triggering && ++ entry->polarity == polarity && ++ entry->shareable == shareable) ++ return entry->override; ++ } ++ +#ifdef CONFIG_X86 + /* + * IRQ override isn't needed on modern AMD Zen systems and @@ -65704,17 +78414,6 @@ index ee78a210c6068..19358a641610d 100644 + return false; +#endif + -+ for (i = 0; i < ARRAY_SIZE(skip_override_table); i++) { -+ const struct irq_override_cmp *entry = &skip_override_table[i]; -+ -+ if (dmi_check_system(entry->system) && -+ entry->irq == gsi && -+ entry->triggering == triggering && -+ entry->polarity == polarity && -+ entry->shareable == shareable) -+ return false; -+ } -+ + return true; +} + @@ -65725,7 +78424,7 @@ index ee78a210c6068..19358a641610d 100644 { int irq, p, t; -@@ -401,7 +461,9 @@ static void acpi_dev_get_irqresource(struct resource *res, u32 gsi, +@@ -401,7 +513,9 @@ static void acpi_dev_get_irqresource(struct resource *res, u32 gsi, * using extended IRQ descriptors we take the IRQ configuration * from _CRS directly. */ @@ -65737,10 +78436,18 @@ index ee78a210c6068..19358a641610d 100644 u8 pol = p ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH; diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c -index 5b54c80b9d32a..6e9cd41c5f9b1 100644 +index 5b54c80b9d32a..ae74720888dbf 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c -@@ -1690,6 +1690,7 @@ static bool acpi_device_enumeration_by_parent(struct acpi_device *device) +@@ -793,6 +793,7 @@ static bool acpi_info_matches_ids(struct acpi_device_info *info, + static const char * const acpi_ignore_dep_ids[] = { + "PNP0D80", /* Windows-compatible System Power Management Controller */ + "INT33BD", /* Intel Baytrail Mailbox Device */ ++ "LATT2021", /* Lattice FW Update Client Driver */ + NULL + }; + +@@ -1690,6 +1691,7 @@ static bool acpi_device_enumeration_by_parent(struct acpi_device *device) { struct list_head resource_list; bool is_serial_bus_slave = false; @@ -65748,7 +78455,7 @@ index 5b54c80b9d32a..6e9cd41c5f9b1 100644 /* * These devices have multiple I2cSerialBus resources and an i2c-client * must be instantiated for each, each with its own i2c_device_id. -@@ -1698,11 +1699,18 @@ static bool acpi_device_enumeration_by_parent(struct acpi_device *device) +@@ -1698,11 +1700,18 @@ static bool acpi_device_enumeration_by_parent(struct acpi_device *device) * drivers/platform/x86/i2c-multi-instantiate.c driver, which knows * which i2c_device_id to use for each resource. */ @@ -65768,7 +78475,7 @@ index 5b54c80b9d32a..6e9cd41c5f9b1 100644 {} }; -@@ -1716,8 +1724,7 @@ static bool acpi_device_enumeration_by_parent(struct acpi_device *device) +@@ -1716,8 +1725,7 @@ static bool acpi_device_enumeration_by_parent(struct acpi_device *device) fwnode_property_present(&device->fwnode, "baud"))) return true; @@ -66091,10 +78798,48 @@ index d2256326c73ae..647f11cf165d7 100644 epid); } diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c -index 1c48358b43ba3..e0185e841b2a3 100644 +index 1c48358b43ba3..4a11a38764321 100644 --- a/drivers/acpi/x86/s2idle.c +++ b/drivers/acpi/x86/s2idle.c -@@ -424,15 +424,11 @@ static int lps0_device_attach(struct acpi_device *adev, +@@ -86,6 +86,8 @@ struct lpi_device_constraint_amd { + int min_dstate; + }; + ++static LIST_HEAD(lps0_s2idle_devops_head); ++ + static struct lpi_constraints *lpi_constraints_table; + static int lpi_constraints_table_size; + static int rev_id; +@@ -378,16 +380,13 @@ static int lps0_device_attach(struct acpi_device *adev, + * AMDI0006: + * - should use rev_id 0x0 + * - function mask = 0x3: Should use Microsoft method +- * AMDI0007: +- * - Should use rev_id 0x2 +- * - Should only use AMD method + */ + const char *hid = acpi_device_hid(adev); +- rev_id = strcmp(hid, "AMDI0007") ? 0 : 2; ++ rev_id = 0; + lps0_dsm_func_mask = validate_dsm(adev->handle, + ACPI_LPS0_DSM_UUID_AMD, rev_id, &lps0_dsm_guid); + lps0_dsm_func_mask_microsoft = validate_dsm(adev->handle, +- ACPI_LPS0_DSM_UUID_MICROSOFT, 0, ++ ACPI_LPS0_DSM_UUID_MICROSOFT, rev_id, + &lps0_dsm_guid_microsoft); + if (lps0_dsm_func_mask > 0x3 && (!strcmp(hid, "AMD0004") || + !strcmp(hid, "AMD0005") || +@@ -395,9 +394,6 @@ static int lps0_device_attach(struct acpi_device *adev, + lps0_dsm_func_mask = (lps0_dsm_func_mask << 1) | 0x1; + acpi_handle_debug(adev->handle, "_DSM UUID %s: Adjusted function mask: 0x%x\n", + ACPI_LPS0_DSM_UUID_AMD, lps0_dsm_func_mask); +- } else if (lps0_dsm_func_mask_microsoft > 0 && !strcmp(hid, "AMDI0007")) { +- lps0_dsm_func_mask_microsoft = -EINVAL; +- acpi_handle_debug(adev->handle, "_DSM Using AMD method\n"); + } + } else { + rev_id = 1; +@@ -424,15 +420,11 @@ static int lps0_device_attach(struct acpi_device *adev, mem_sleep_current = PM_SUSPEND_TO_IDLE; /* @@ -66114,8 +78859,73 @@ index 1c48358b43ba3..e0185e841b2a3 100644 return 0; } +@@ -444,6 +436,8 @@ static struct acpi_scan_handler lps0_handler = { + + int acpi_s2idle_prepare_late(void) + { ++ struct acpi_s2idle_dev_ops *handler; ++ + if (!lps0_device_handle || sleep_no_lps0) + return 0; + +@@ -474,14 +468,26 @@ int acpi_s2idle_prepare_late(void) + acpi_sleep_run_lps0_dsm(ACPI_LPS0_MS_ENTRY, + lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft); + } ++ ++ list_for_each_entry(handler, &lps0_s2idle_devops_head, list_node) { ++ if (handler->prepare) ++ handler->prepare(); ++ } ++ + return 0; + } + + void acpi_s2idle_restore_early(void) + { ++ struct acpi_s2idle_dev_ops *handler; ++ + if (!lps0_device_handle || sleep_no_lps0) + return; + ++ list_for_each_entry(handler, &lps0_s2idle_devops_head, list_node) ++ if (handler->restore) ++ handler->restore(); ++ + /* Modern standby exit */ + if (lps0_dsm_func_mask_microsoft > 0) + acpi_sleep_run_lps0_dsm(ACPI_LPS0_MS_EXIT, +@@ -524,4 +530,28 @@ void acpi_s2idle_setup(void) + s2idle_set_ops(&acpi_s2idle_ops_lps0); + } + ++int acpi_register_lps0_dev(struct acpi_s2idle_dev_ops *arg) ++{ ++ if (!lps0_device_handle || sleep_no_lps0) ++ return -ENODEV; ++ ++ lock_system_sleep(); ++ list_add(&arg->list_node, &lps0_s2idle_devops_head); ++ unlock_system_sleep(); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(acpi_register_lps0_dev); ++ ++void acpi_unregister_lps0_dev(struct acpi_s2idle_dev_ops *arg) ++{ ++ if (!lps0_device_handle || sleep_no_lps0) ++ return; ++ ++ lock_system_sleep(); ++ list_del(&arg->list_node); ++ unlock_system_sleep(); ++} ++EXPORT_SYMBOL_GPL(acpi_unregister_lps0_dev); ++ + #endif /* CONFIG_SUSPEND */ diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c -index f22f23933063b..3a3f09b6cbfc9 100644 +index f22f23933063b..222b951ff56ae 100644 --- a/drivers/acpi/x86/utils.c +++ b/drivers/acpi/x86/utils.c @@ -22,58 +22,71 @@ @@ -66311,7 +79121,7 @@ index f22f23933063b..3a3f09b6cbfc9 100644 ret = true; break; } -@@ -156,7 +198,24 @@ static const struct x86_cpu_id storage_d3_cpu_ids[] = { +@@ -156,7 +198,30 @@ static const struct x86_cpu_id storage_d3_cpu_ids[] = { {} }; @@ -66327,6 +79137,12 @@ index f22f23933063b..3a3f09b6cbfc9 100644 + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 14 7425 2-in-1"), + } + }, ++ { ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), ++ DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 16 5625"), ++ } ++ }, + {} +}; + @@ -66352,7 +79168,7 @@ index 962041148482c..6c0f7f4f7d1de 100644 if (ret) goto err_out; diff --git a/drivers/android/binder.c b/drivers/android/binder.c -index 9edacc8b97688..00c6c03ff8222 100644 +index 9edacc8b97688..c8d33c5dbe295 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -170,8 +170,32 @@ static inline void binder_stats_created(enum binder_stat_types type) @@ -66527,7 +79343,303 @@ index 9edacc8b97688..00c6c03ff8222 100644 if (ret < 0) { ret = -EPERM; goto err_security; -@@ -2270,8 +2318,8 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda, +@@ -2221,16 +2269,266 @@ err_fd_not_accepted: + return ret; + } + +-static int binder_translate_fd_array(struct binder_fd_array_object *fda, ++/** ++ * struct binder_ptr_fixup - data to be fixed-up in target buffer ++ * @offset offset in target buffer to fixup ++ * @skip_size bytes to skip in copy (fixup will be written later) ++ * @fixup_data data to write at fixup offset ++ * @node list node ++ * ++ * This is used for the pointer fixup list (pf) which is created and consumed ++ * during binder_transaction() and is only accessed locally. No ++ * locking is necessary. ++ * ++ * The list is ordered by @offset. ++ */ ++struct binder_ptr_fixup { ++ binder_size_t offset; ++ size_t skip_size; ++ binder_uintptr_t fixup_data; ++ struct list_head node; ++}; ++ ++/** ++ * struct binder_sg_copy - scatter-gather data to be copied ++ * @offset offset in target buffer ++ * @sender_uaddr user address in source buffer ++ * @length bytes to copy ++ * @node list node ++ * ++ * This is used for the sg copy list (sgc) which is created and consumed ++ * during binder_transaction() and is only accessed locally. No ++ * locking is necessary. ++ * ++ * The list is ordered by @offset. ++ */ ++struct binder_sg_copy { ++ binder_size_t offset; ++ const void __user *sender_uaddr; ++ size_t length; ++ struct list_head node; ++}; ++ ++/** ++ * binder_do_deferred_txn_copies() - copy and fixup scatter-gather data ++ * @alloc: binder_alloc associated with @buffer ++ * @buffer: binder buffer in target process ++ * @sgc_head: list_head of scatter-gather copy list ++ * @pf_head: list_head of pointer fixup list ++ * ++ * Processes all elements of @sgc_head, applying fixups from @pf_head ++ * and copying the scatter-gather data from the source process' user ++ * buffer to the target's buffer. It is expected that the list creation ++ * and processing all occurs during binder_transaction() so these lists ++ * are only accessed in local context. ++ * ++ * Return: 0=success, else -errno ++ */ ++static int binder_do_deferred_txn_copies(struct binder_alloc *alloc, ++ struct binder_buffer *buffer, ++ struct list_head *sgc_head, ++ struct list_head *pf_head) ++{ ++ int ret = 0; ++ struct binder_sg_copy *sgc, *tmpsgc; ++ struct binder_ptr_fixup *tmppf; ++ struct binder_ptr_fixup *pf = ++ list_first_entry_or_null(pf_head, struct binder_ptr_fixup, ++ node); ++ ++ list_for_each_entry_safe(sgc, tmpsgc, sgc_head, node) { ++ size_t bytes_copied = 0; ++ ++ while (bytes_copied < sgc->length) { ++ size_t copy_size; ++ size_t bytes_left = sgc->length - bytes_copied; ++ size_t offset = sgc->offset + bytes_copied; ++ ++ /* ++ * We copy up to the fixup (pointed to by pf) ++ */ ++ copy_size = pf ? min(bytes_left, (size_t)pf->offset - offset) ++ : bytes_left; ++ if (!ret && copy_size) ++ ret = binder_alloc_copy_user_to_buffer( ++ alloc, buffer, ++ offset, ++ sgc->sender_uaddr + bytes_copied, ++ copy_size); ++ bytes_copied += copy_size; ++ if (copy_size != bytes_left) { ++ BUG_ON(!pf); ++ /* we stopped at a fixup offset */ ++ if (pf->skip_size) { ++ /* ++ * we are just skipping. This is for ++ * BINDER_TYPE_FDA where the translated ++ * fds will be fixed up when we get ++ * to target context. ++ */ ++ bytes_copied += pf->skip_size; ++ } else { ++ /* apply the fixup indicated by pf */ ++ if (!ret) ++ ret = binder_alloc_copy_to_buffer( ++ alloc, buffer, ++ pf->offset, ++ &pf->fixup_data, ++ sizeof(pf->fixup_data)); ++ bytes_copied += sizeof(pf->fixup_data); ++ } ++ list_del(&pf->node); ++ kfree(pf); ++ pf = list_first_entry_or_null(pf_head, ++ struct binder_ptr_fixup, node); ++ } ++ } ++ list_del(&sgc->node); ++ kfree(sgc); ++ } ++ list_for_each_entry_safe(pf, tmppf, pf_head, node) { ++ BUG_ON(pf->skip_size == 0); ++ list_del(&pf->node); ++ kfree(pf); ++ } ++ BUG_ON(!list_empty(sgc_head)); ++ ++ return ret > 0 ? -EINVAL : ret; ++} ++ ++/** ++ * binder_cleanup_deferred_txn_lists() - free specified lists ++ * @sgc_head: list_head of scatter-gather copy list ++ * @pf_head: list_head of pointer fixup list ++ * ++ * Called to clean up @sgc_head and @pf_head if there is an ++ * error. ++ */ ++static void binder_cleanup_deferred_txn_lists(struct list_head *sgc_head, ++ struct list_head *pf_head) ++{ ++ struct binder_sg_copy *sgc, *tmpsgc; ++ struct binder_ptr_fixup *pf, *tmppf; ++ ++ list_for_each_entry_safe(sgc, tmpsgc, sgc_head, node) { ++ list_del(&sgc->node); ++ kfree(sgc); ++ } ++ list_for_each_entry_safe(pf, tmppf, pf_head, node) { ++ list_del(&pf->node); ++ kfree(pf); ++ } ++} ++ ++/** ++ * binder_defer_copy() - queue a scatter-gather buffer for copy ++ * @sgc_head: list_head of scatter-gather copy list ++ * @offset: binder buffer offset in target process ++ * @sender_uaddr: user address in source process ++ * @length: bytes to copy ++ * ++ * Specify a scatter-gather block to be copied. The actual copy must ++ * be deferred until all the needed fixups are identified and queued. ++ * Then the copy and fixups are done together so un-translated values ++ * from the source are never visible in the target buffer. ++ * ++ * We are guaranteed that repeated calls to this function will have ++ * monotonically increasing @offset values so the list will naturally ++ * be ordered. ++ * ++ * Return: 0=success, else -errno ++ */ ++static int binder_defer_copy(struct list_head *sgc_head, binder_size_t offset, ++ const void __user *sender_uaddr, size_t length) ++{ ++ struct binder_sg_copy *bc = kzalloc(sizeof(*bc), GFP_KERNEL); ++ ++ if (!bc) ++ return -ENOMEM; ++ ++ bc->offset = offset; ++ bc->sender_uaddr = sender_uaddr; ++ bc->length = length; ++ INIT_LIST_HEAD(&bc->node); ++ ++ /* ++ * We are guaranteed that the deferred copies are in-order ++ * so just add to the tail. ++ */ ++ list_add_tail(&bc->node, sgc_head); ++ ++ return 0; ++} ++ ++/** ++ * binder_add_fixup() - queue a fixup to be applied to sg copy ++ * @pf_head: list_head of binder ptr fixup list ++ * @offset: binder buffer offset in target process ++ * @fixup: bytes to be copied for fixup ++ * @skip_size: bytes to skip when copying (fixup will be applied later) ++ * ++ * Add the specified fixup to a list ordered by @offset. When copying ++ * the scatter-gather buffers, the fixup will be copied instead of ++ * data from the source buffer. For BINDER_TYPE_FDA fixups, the fixup ++ * will be applied later (in target process context), so we just skip ++ * the bytes specified by @skip_size. If @skip_size is 0, we copy the ++ * value in @fixup. ++ * ++ * This function is called *mostly* in @offset order, but there are ++ * exceptions. Since out-of-order inserts are relatively uncommon, ++ * we insert the new element by searching backward from the tail of ++ * the list. ++ * ++ * Return: 0=success, else -errno ++ */ ++static int binder_add_fixup(struct list_head *pf_head, binder_size_t offset, ++ binder_uintptr_t fixup, size_t skip_size) ++{ ++ struct binder_ptr_fixup *pf = kzalloc(sizeof(*pf), GFP_KERNEL); ++ struct binder_ptr_fixup *tmppf; ++ ++ if (!pf) ++ return -ENOMEM; ++ ++ pf->offset = offset; ++ pf->fixup_data = fixup; ++ pf->skip_size = skip_size; ++ INIT_LIST_HEAD(&pf->node); ++ ++ /* Fixups are *mostly* added in-order, but there are some ++ * exceptions. Look backwards through list for insertion point. ++ */ ++ list_for_each_entry_reverse(tmppf, pf_head, node) { ++ if (tmppf->offset < pf->offset) { ++ list_add(&pf->node, &tmppf->node); ++ return 0; ++ } ++ } ++ /* ++ * if we get here, then the new offset is the lowest so ++ * insert at the head ++ */ ++ list_add(&pf->node, pf_head); ++ return 0; ++} ++ ++static int binder_translate_fd_array(struct list_head *pf_head, ++ struct binder_fd_array_object *fda, ++ const void __user *sender_ubuffer, + struct binder_buffer_object *parent, ++ struct binder_buffer_object *sender_uparent, + struct binder_transaction *t, + struct binder_thread *thread, + struct binder_transaction *in_reply_to) + { + binder_size_t fdi, fd_buf_size; + binder_size_t fda_offset; ++ const void __user *sender_ufda_base; + struct binder_proc *proc = thread->proc; +- struct binder_proc *target_proc = t->to_proc; ++ int ret; ++ ++ if (fda->num_fds == 0) ++ return 0; + + fd_buf_size = sizeof(u32) * fda->num_fds; + if (fda->num_fds >= SIZE_MAX / sizeof(u32)) { +@@ -2254,29 +2552,36 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda, + */ + fda_offset = (parent->buffer - (uintptr_t)t->buffer->user_data) + + fda->parent_offset; +- if (!IS_ALIGNED((unsigned long)fda_offset, sizeof(u32))) { ++ sender_ufda_base = (void __user *)(uintptr_t)sender_uparent->buffer + ++ fda->parent_offset; ++ ++ if (!IS_ALIGNED((unsigned long)fda_offset, sizeof(u32)) || ++ !IS_ALIGNED((unsigned long)sender_ufda_base, sizeof(u32))) { + binder_user_error("%d:%d parent offset not aligned correctly.\n", + proc->pid, thread->pid); + return -EINVAL; + } ++ ret = binder_add_fixup(pf_head, fda_offset, 0, fda->num_fds * sizeof(u32)); ++ if (ret) ++ return ret; ++ + for (fdi = 0; fdi < fda->num_fds; fdi++) { + u32 fd; +- int ret; + binder_size_t offset = fda_offset + fdi * sizeof(fd); ++ binder_size_t sender_uoffset = fdi * sizeof(fd); + +- ret = binder_alloc_copy_from_buffer(&target_proc->alloc, +- &fd, t->buffer, +- offset, sizeof(fd)); ++ ret = copy_from_user(&fd, sender_ufda_base + sender_uoffset, sizeof(fd)); if (!ret) ret = binder_translate_fd(fd, offset, t, thread, in_reply_to); @@ -66538,7 +79650,30 @@ index 9edacc8b97688..00c6c03ff8222 100644 } return 0; } -@@ -2456,6 +2504,7 @@ static void binder_transaction(struct binder_proc *proc, + +-static int binder_fixup_parent(struct binder_transaction *t, ++static int binder_fixup_parent(struct list_head *pf_head, ++ struct binder_transaction *t, + struct binder_thread *thread, + struct binder_buffer_object *bp, + binder_size_t off_start_offset, +@@ -2322,14 +2627,7 @@ static int binder_fixup_parent(struct binder_transaction *t, + } + buffer_offset = bp->parent_offset + + (uintptr_t)parent->buffer - (uintptr_t)b->user_data; +- if (binder_alloc_copy_to_buffer(&target_proc->alloc, b, buffer_offset, +- &bp->buffer, sizeof(bp->buffer))) { +- binder_user_error("%d:%d got transaction with invalid parent offset\n", +- proc->pid, thread->pid); +- return -EINVAL; +- } +- +- return 0; ++ return binder_add_fixup(pf_head, buffer_offset, bp->buffer, 0); + } + + /** +@@ -2456,6 +2754,7 @@ static void binder_transaction(struct binder_proc *proc, binder_size_t off_start_offset, off_end_offset; binder_size_t off_min; binder_size_t sg_buf_offset, sg_buf_end_offset; @@ -66546,16 +79681,20 @@ index 9edacc8b97688..00c6c03ff8222 100644 struct binder_proc *target_proc = NULL; struct binder_thread *target_thread = NULL; struct binder_node *target_node = NULL; -@@ -2470,6 +2519,8 @@ static void binder_transaction(struct binder_proc *proc, +@@ -2470,6 +2769,12 @@ static void binder_transaction(struct binder_proc *proc, int t_debug_id = atomic_inc_return(&binder_last_id); char *secctx = NULL; u32 secctx_sz = 0; ++ struct list_head sgc_head; ++ struct list_head pf_head; + const void __user *user_buffer = (const void __user *) + (uintptr_t)tr->data.ptr.buffer; ++ INIT_LIST_HEAD(&sgc_head); ++ INIT_LIST_HEAD(&pf_head); e = binder_transaction_log_add(&binder_transaction_log); e->debug_id = t_debug_id; -@@ -2595,8 +2646,8 @@ static void binder_transaction(struct binder_proc *proc, +@@ -2595,8 +2900,8 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_invalid_target_handle; } @@ -66566,7 +79705,7 @@ index 9edacc8b97688..00c6c03ff8222 100644 return_error = BR_FAILED_REPLY; return_error_param = -EPERM; return_error_line = __LINE__; -@@ -2722,16 +2773,7 @@ static void binder_transaction(struct binder_proc *proc, +@@ -2722,16 +3027,7 @@ static void binder_transaction(struct binder_proc *proc, u32 secid; size_t added_size; @@ -66584,7 +79723,7 @@ index 9edacc8b97688..00c6c03ff8222 100644 ret = security_secid_to_secctx(secid, &secctx, &secctx_sz); if (ret) { return_error = BR_FAILED_REPLY; -@@ -2790,19 +2832,6 @@ static void binder_transaction(struct binder_proc *proc, +@@ -2790,19 +3086,6 @@ static void binder_transaction(struct binder_proc *proc, t->buffer->clear_on_free = !!(t->flags & TF_CLEAR_BUF); trace_binder_transaction_alloc_buf(t->buffer); @@ -66604,7 +79743,7 @@ index 9edacc8b97688..00c6c03ff8222 100644 if (binder_alloc_copy_user_to_buffer( &target_proc->alloc, t->buffer, -@@ -2847,6 +2876,7 @@ static void binder_transaction(struct binder_proc *proc, +@@ -2847,6 +3130,7 @@ static void binder_transaction(struct binder_proc *proc, size_t object_size; struct binder_object object; binder_size_t object_offset; @@ -66612,7 +79751,7 @@ index 9edacc8b97688..00c6c03ff8222 100644 if (binder_alloc_copy_from_buffer(&target_proc->alloc, &object_offset, -@@ -2858,8 +2888,27 @@ static void binder_transaction(struct binder_proc *proc, +@@ -2858,8 +3142,27 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_bad_offset; } @@ -66642,7 +79781,7 @@ index 9edacc8b97688..00c6c03ff8222 100644 if (object_size == 0 || object_offset < off_min) { binder_user_error("%d:%d got transaction with invalid offset (%lld, min %lld max %lld) or object.\n", proc->pid, thread->pid, -@@ -2871,6 +2920,11 @@ static void binder_transaction(struct binder_proc *proc, +@@ -2871,6 +3174,11 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_bad_offset; } @@ -66654,11 +79793,43 @@ index 9edacc8b97688..00c6c03ff8222 100644 hdr = &object.hdr; off_min = object_offset + object_size; -@@ -2966,9 +3020,14 @@ static void binder_transaction(struct binder_proc *proc, +@@ -2933,6 +3241,8 @@ static void binder_transaction(struct binder_proc *proc, + case BINDER_TYPE_FDA: { + struct binder_object ptr_object; + binder_size_t parent_offset; ++ struct binder_object user_object; ++ size_t user_parent_size; + struct binder_fd_array_object *fda = + to_binder_fd_array_object(hdr); + size_t num_valid = (buffer_offset - off_start_offset) / +@@ -2964,11 +3274,35 @@ static void binder_transaction(struct binder_proc *proc, + return_error_line = __LINE__; + goto err_bad_parent; } - ret = binder_translate_fd_array(fda, parent, t, thread, - in_reply_to); +- ret = binder_translate_fd_array(fda, parent, t, thread, +- in_reply_to); - if (ret < 0) { ++ /* ++ * We need to read the user version of the parent ++ * object to get the original user offset ++ */ ++ user_parent_size = ++ binder_get_object(proc, user_buffer, t->buffer, ++ parent_offset, &user_object); ++ if (user_parent_size != sizeof(user_object.bbo)) { ++ binder_user_error("%d:%d invalid ptr object size: %zd vs %zd\n", ++ proc->pid, thread->pid, ++ user_parent_size, ++ sizeof(user_object.bbo)); ++ return_error = BR_FAILED_REPLY; ++ return_error_param = -EINVAL; ++ return_error_line = __LINE__; ++ goto err_bad_parent; ++ } ++ ret = binder_translate_fd_array(&pf_head, fda, ++ user_buffer, parent, ++ &user_object.bbo, t, ++ thread, in_reply_to); + if (!ret) + ret = binder_alloc_copy_to_buffer(&target_proc->alloc, + t->buffer, @@ -66671,7 +79842,43 @@ index 9edacc8b97688..00c6c03ff8222 100644 return_error_line = __LINE__; goto err_translate_failed; } -@@ -3038,6 +3097,19 @@ static void binder_transaction(struct binder_proc *proc, +@@ -2990,19 +3324,14 @@ static void binder_transaction(struct binder_proc *proc, + return_error_line = __LINE__; + goto err_bad_offset; + } +- if (binder_alloc_copy_user_to_buffer( +- &target_proc->alloc, +- t->buffer, +- sg_buf_offset, +- (const void __user *) +- (uintptr_t)bp->buffer, +- bp->length)) { +- binder_user_error("%d:%d got transaction with invalid offsets ptr\n", +- proc->pid, thread->pid); +- return_error_param = -EFAULT; ++ ret = binder_defer_copy(&sgc_head, sg_buf_offset, ++ (const void __user *)(uintptr_t)bp->buffer, ++ bp->length); ++ if (ret) { + return_error = BR_FAILED_REPLY; ++ return_error_param = ret; + return_error_line = __LINE__; +- goto err_copy_data_failed; ++ goto err_translate_failed; + } + /* Fixup buffer pointer to target proc address space */ + bp->buffer = (uintptr_t) +@@ -3011,7 +3340,8 @@ static void binder_transaction(struct binder_proc *proc, + + num_valid = (buffer_offset - off_start_offset) / + sizeof(binder_size_t); +- ret = binder_fixup_parent(t, thread, bp, ++ ret = binder_fixup_parent(&pf_head, t, ++ thread, bp, + off_start_offset, + num_valid, + last_fixup_obj_off, +@@ -3038,6 +3368,30 @@ static void binder_transaction(struct binder_proc *proc, goto err_bad_object_type; } } @@ -66687,11 +79894,30 @@ index 9edacc8b97688..00c6c03ff8222 100644 + return_error_param = -EFAULT; + return_error_line = __LINE__; + goto err_copy_data_failed; ++ } ++ ++ ret = binder_do_deferred_txn_copies(&target_proc->alloc, t->buffer, ++ &sgc_head, &pf_head); ++ if (ret) { ++ binder_user_error("%d:%d got transaction with invalid offsets ptr\n", ++ proc->pid, thread->pid); ++ return_error = BR_FAILED_REPLY; ++ return_error_param = ret; ++ return_error_line = __LINE__; ++ goto err_copy_data_failed; + } if (t->buffer->oneway_spam_suspect) tcomplete->type = BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT; else -@@ -3185,6 +3257,7 @@ err_invalid_target_handle: +@@ -3111,6 +3465,7 @@ err_bad_object_type: + err_bad_offset: + err_bad_parent: + err_copy_data_failed: ++ binder_cleanup_deferred_txn_lists(&sgc_head, &pf_head); + binder_free_txn_fixups(t); + trace_binder_transaction_failed_buffer_release(t->buffer); + binder_transaction_buffer_release(target_proc, NULL, t->buffer, +@@ -3185,6 +3540,7 @@ err_invalid_target_handle: * binder_free_buf() - free the specified buffer * @proc: binder proc that owns buffer * @buffer: buffer to be freed @@ -66699,7 +79925,7 @@ index 9edacc8b97688..00c6c03ff8222 100644 * * If buffer for an async transaction, enqueue the next async * transaction from the node. -@@ -3194,7 +3267,7 @@ err_invalid_target_handle: +@@ -3194,7 +3550,7 @@ err_invalid_target_handle: static void binder_free_buf(struct binder_proc *proc, struct binder_thread *thread, @@ -66708,7 +79934,7 @@ index 9edacc8b97688..00c6c03ff8222 100644 { binder_inner_proc_lock(proc); if (buffer->transaction) { -@@ -3222,7 +3295,7 @@ binder_free_buf(struct binder_proc *proc, +@@ -3222,7 +3578,7 @@ binder_free_buf(struct binder_proc *proc, binder_node_inner_unlock(buf_node); } trace_binder_transaction_buffer_release(buffer); @@ -66717,7 +79943,7 @@ index 9edacc8b97688..00c6c03ff8222 100644 binder_alloc_free_buf(&proc->alloc, buffer); } -@@ -3424,7 +3497,7 @@ static int binder_thread_write(struct binder_proc *proc, +@@ -3424,7 +3780,7 @@ static int binder_thread_write(struct binder_proc *proc, proc->pid, thread->pid, (u64)data_ptr, buffer->debug_id, buffer->transaction ? "active" : "finished"); @@ -66726,7 +79952,7 @@ index 9edacc8b97688..00c6c03ff8222 100644 break; } -@@ -4117,7 +4190,7 @@ retry: +@@ -4117,7 +4473,7 @@ retry: buffer->transaction = NULL; binder_cleanup_transaction(t, "fd fixups failed", BR_FAILED_REPLY); @@ -66735,7 +79961,7 @@ index 9edacc8b97688..00c6c03ff8222 100644 binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, "%d:%d %stransaction %d fd fixups failed %d/%d, line %d\n", proc->pid, thread->pid, -@@ -4353,6 +4426,7 @@ static void binder_free_proc(struct binder_proc *proc) +@@ -4353,6 +4709,7 @@ static void binder_free_proc(struct binder_proc *proc) } binder_alloc_deferred_release(&proc->alloc); put_task_struct(proc->tsk); @@ -66743,7 +79969,7 @@ index 9edacc8b97688..00c6c03ff8222 100644 binder_stats_deleted(BINDER_STAT_PROC); kfree(proc); } -@@ -4430,23 +4504,20 @@ static int binder_thread_release(struct binder_proc *proc, +@@ -4430,23 +4787,20 @@ static int binder_thread_release(struct binder_proc *proc, __release(&t->lock); /* @@ -66776,7 +80002,7 @@ index 9edacc8b97688..00c6c03ff8222 100644 */ if (thread->looper & BINDER_LOOPER_STATE_POLL) synchronize_rcu(); -@@ -4564,7 +4635,7 @@ static int binder_ioctl_set_ctx_mgr(struct file *filp, +@@ -4564,7 +4918,7 @@ static int binder_ioctl_set_ctx_mgr(struct file *filp, ret = -EBUSY; goto out; } @@ -66785,7 +80011,7 @@ index 9edacc8b97688..00c6c03ff8222 100644 if (ret < 0) goto out; if (uid_valid(context->binder_context_mgr_uid)) { -@@ -5055,6 +5126,7 @@ static int binder_open(struct inode *nodp, struct file *filp) +@@ -5055,6 +5409,7 @@ static int binder_open(struct inode *nodp, struct file *filp) spin_lock_init(&proc->outer_lock); get_task_struct(current->group_leader); proc->tsk = current->group_leader; @@ -66793,7 +80019,7 @@ index 9edacc8b97688..00c6c03ff8222 100644 INIT_LIST_HEAD(&proc->todo); init_waitqueue_head(&proc->freeze_wait); proc->default_priority = task_nice(current); -@@ -5765,8 +5837,7 @@ static void print_binder_proc_stats(struct seq_file *m, +@@ -5765,8 +6120,7 @@ static void print_binder_proc_stats(struct seq_file *m, print_binder_stats(m, " ", &proc->stats); } @@ -66803,7 +80029,7 @@ index 9edacc8b97688..00c6c03ff8222 100644 { struct binder_proc *proc; struct binder_node *node; -@@ -5805,7 +5876,7 @@ int binder_state_show(struct seq_file *m, void *unused) +@@ -5805,7 +6159,7 @@ int binder_state_show(struct seq_file *m, void *unused) return 0; } @@ -66812,7 +80038,7 @@ index 9edacc8b97688..00c6c03ff8222 100644 { struct binder_proc *proc; -@@ -5821,7 +5892,7 @@ int binder_stats_show(struct seq_file *m, void *unused) +@@ -5821,7 +6175,7 @@ int binder_stats_show(struct seq_file *m, void *unused) return 0; } @@ -66821,7 +80047,7 @@ index 9edacc8b97688..00c6c03ff8222 100644 { struct binder_proc *proc; -@@ -5877,7 +5948,7 @@ static void print_binder_transaction_log_entry(struct seq_file *m, +@@ -5877,7 +6231,7 @@ static void print_binder_transaction_log_entry(struct seq_file *m, "\n" : " (incomplete)\n"); } @@ -66830,7 +80056,7 @@ index 9edacc8b97688..00c6c03ff8222 100644 { struct binder_transaction_log *log = m->private; unsigned int log_cur = atomic_read(&log->cur); -@@ -5909,6 +5980,45 @@ const struct file_operations binder_fops = { +@@ -5909,6 +6263,45 @@ const struct file_operations binder_fops = { .release = binder_release, }; @@ -66876,7 +80102,7 @@ index 9edacc8b97688..00c6c03ff8222 100644 static int __init init_binder_device(const char *name) { int ret; -@@ -5954,36 +6064,18 @@ static int __init binder_init(void) +@@ -5954,36 +6347,18 @@ static int __init binder_init(void) atomic_set(&binder_transaction_log_failed.cur, ~0U); binder_debugfs_dir_entry_root = debugfs_create_dir("binder", NULL); @@ -66924,7 +80150,7 @@ index 9edacc8b97688..00c6c03ff8222 100644 if (!IS_ENABLED(CONFIG_ANDROID_BINDERFS) && diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c -index 340515f54498c..8ed450125c924 100644 +index 340515f54498c..6acfb896b2e5c 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -213,7 +213,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate, @@ -67001,7 +80227,20 @@ index 340515f54498c..8ed450125c924 100644 binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC, "%d: binder_free_buf size %zd async free %zd\n", -@@ -788,7 +787,6 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc, +@@ -754,6 +753,12 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc, + const char *failure_string; + struct binder_buffer *buffer; + ++ if (unlikely(vma->vm_mm != alloc->vma_vm_mm)) { ++ ret = -EINVAL; ++ failure_string = "invalid vma->vm_mm"; ++ goto err_invalid_mm; ++ } ++ + mutex_lock(&binder_alloc_mmap_lock); + if (alloc->buffer_size) { + ret = -EBUSY; +@@ -788,7 +793,6 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc, binder_insert_free_buffer(alloc, buffer); alloc->free_async_space = alloc->buffer_size / 2; binder_alloc_set_vma(alloc, vma); @@ -67009,7 +80248,15 @@ index 340515f54498c..8ed450125c924 100644 return 0; -@@ -817,7 +815,8 @@ void binder_alloc_deferred_release(struct binder_alloc *alloc) +@@ -801,6 +805,7 @@ err_alloc_pages_failed: + alloc->buffer_size = 0; + err_already_mapped: + mutex_unlock(&binder_alloc_mmap_lock); ++err_invalid_mm: + binder_alloc_debug(BINDER_DEBUG_USER_ERROR, + "%s: %d %lx-%lx %s failed %d\n", __func__, + alloc->pid, vma->vm_start, vma->vm_end, +@@ -817,7 +822,8 @@ void binder_alloc_deferred_release(struct binder_alloc *alloc) buffers = 0; mutex_lock(&alloc->mutex); @@ -67019,7 +80266,7 @@ index 340515f54498c..8ed450125c924 100644 while ((n = rb_first(&alloc->allocated_buffers))) { buffer = rb_entry(n, struct binder_buffer, rb_node); -@@ -924,17 +923,25 @@ void binder_alloc_print_pages(struct seq_file *m, +@@ -924,17 +930,25 @@ void binder_alloc_print_pages(struct seq_file *m, * Make sure the binder_alloc is fully initialized, otherwise we might * read inconsistent state. */ @@ -67038,7 +80285,7 @@ index 340515f54498c..8ed450125c924 100644 + if (binder_alloc_get_vma(alloc) == NULL) { + mmap_read_unlock(alloc->vma_vm_mm); + goto uninitialized; -+ } + } + + mmap_read_unlock(alloc->vma_vm_mm); + for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) { @@ -67049,13 +80296,13 @@ index 340515f54498c..8ed450125c924 100644 + active++; + else + lru++; - } ++ } + +uninitialized: mutex_unlock(&alloc->mutex); seq_printf(m, " pages: %d:%d:%d\n", active, lru, free); seq_printf(m, " pages high watermark: %zu\n", alloc->pages_high); -@@ -1079,6 +1086,8 @@ static struct shrinker binder_shrinker = { +@@ -1079,6 +1093,8 @@ static struct shrinker binder_shrinker = { void binder_alloc_init(struct binder_alloc *alloc) { alloc->pid = current->group_leader->pid; @@ -67237,11 +80484,32 @@ index e3605cdd43357..6d717ed76766e 100644 } proc_log_dir = binderfs_create_dir(binder_logs_root_dir, "proc"); +diff --git a/drivers/ata/acard-ahci.c b/drivers/ata/acard-ahci.c +index 2a04e8abd3977..26e0eb537b4f5 100644 +--- a/drivers/ata/acard-ahci.c ++++ b/drivers/ata/acard-ahci.c +@@ -267,7 +267,7 @@ static bool acard_ahci_qc_fill_rtf(struct ata_queued_cmd *qc) + if (qc->tf.protocol == ATA_PROT_PIO && qc->dma_dir == DMA_FROM_DEVICE && + !(qc->flags & ATA_QCFLAG_FAILED)) { + ata_tf_from_fis(rx_fis + RX_FIS_PIO_SETUP, &qc->result_tf); +- qc->result_tf.command = (rx_fis + RX_FIS_PIO_SETUP)[15]; ++ qc->result_tf.status = (rx_fis + RX_FIS_PIO_SETUP)[15]; + } else + ata_tf_from_fis(rx_fis + RX_FIS_D2H_REG, &qc->result_tf); + diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c -index 186cbf90c8ead..812731e80f8e0 100644 +index 186cbf90c8ead..149ee16fd0225 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c -@@ -442,6 +442,7 @@ static const struct pci_device_id ahci_pci_tbl[] = { +@@ -83,6 +83,7 @@ enum board_ids { + static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent); + static void ahci_remove_one(struct pci_dev *dev); + static void ahci_shutdown_one(struct pci_dev *dev); ++static void ahci_intel_pcs_quirk(struct pci_dev *pdev, struct ahci_host_priv *hpriv); + static int ahci_vt8251_hardreset(struct ata_link *link, unsigned int *class, + unsigned long deadline); + static int ahci_avn_hardreset(struct ata_link *link, unsigned int *class, +@@ -442,6 +443,7 @@ static const struct pci_device_id ahci_pci_tbl[] = { /* AMD */ { PCI_VDEVICE(AMD, 0x7800), board_ahci }, /* AMD Hudson-2 */ { PCI_VDEVICE(AMD, 0x7900), board_ahci }, /* AMD CZ */ @@ -67249,6 +80517,90 @@ index 186cbf90c8ead..812731e80f8e0 100644 /* AMD is using RAID class only for ahci controllers */ { PCI_VENDOR_ID_AMD, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_STORAGE_RAID << 8, 0xffffff, board_ahci }, +@@ -667,6 +669,25 @@ static void ahci_pci_save_initial_config(struct pci_dev *pdev, + ahci_save_initial_config(&pdev->dev, hpriv); + } + ++static int ahci_pci_reset_controller(struct ata_host *host) ++{ ++ struct pci_dev *pdev = to_pci_dev(host->dev); ++ struct ahci_host_priv *hpriv = host->private_data; ++ int rc; ++ ++ rc = ahci_reset_controller(host); ++ if (rc) ++ return rc; ++ ++ /* ++ * If platform firmware failed to enable ports, try to enable ++ * them here. ++ */ ++ ahci_intel_pcs_quirk(pdev, hpriv); ++ ++ return 0; ++} ++ + static void ahci_pci_init_controller(struct ata_host *host) + { + struct ahci_host_priv *hpriv = host->private_data; +@@ -734,7 +755,7 @@ static int ahci_p5wdh_hardreset(struct ata_link *link, unsigned int *class, + + /* clear D2H reception area to properly wait for D2H FIS */ + ata_tf_init(link->device, &tf); +- tf.command = ATA_BUSY; ++ tf.status = ATA_BUSY; + ata_tf_to_fis(&tf, 0, 0, d2h_fis); + + rc = sata_link_hardreset(link, sata_ehc_deb_timing(&link->eh_context), +@@ -805,7 +826,7 @@ static int ahci_avn_hardreset(struct ata_link *link, unsigned int *class, + + /* clear D2H reception area to properly wait for D2H FIS */ + ata_tf_init(link->device, &tf); +- tf.command = ATA_BUSY; ++ tf.status = ATA_BUSY; + ata_tf_to_fis(&tf, 0, 0, d2h_fis); + + rc = sata_link_hardreset(link, timing, deadline, &online, +@@ -868,7 +889,7 @@ static int ahci_pci_device_runtime_resume(struct device *dev) + struct ata_host *host = pci_get_drvdata(pdev); + int rc; + +- rc = ahci_reset_controller(host); ++ rc = ahci_pci_reset_controller(host); + if (rc) + return rc; + ahci_pci_init_controller(host); +@@ -903,7 +924,7 @@ static int ahci_pci_device_resume(struct device *dev) + ahci_mcp89_apple_enable(pdev); + + if (pdev->dev.power.power_state.event == PM_EVENT_SUSPEND) { +- rc = ahci_reset_controller(host); ++ rc = ahci_pci_reset_controller(host); + if (rc) + return rc; + +@@ -1788,12 +1809,6 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) + /* save initial config */ + ahci_pci_save_initial_config(pdev, hpriv); + +- /* +- * If platform firmware failed to enable ports, try to enable +- * them here. +- */ +- ahci_intel_pcs_quirk(pdev, hpriv); +- + /* prepare host */ + if (hpriv->cap & HOST_CAP_NCQ) { + pi.flags |= ATA_FLAG_NCQ; +@@ -1903,7 +1918,7 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) + if (rc) + return rc; + +- rc = ahci_reset_controller(host); ++ rc = ahci_pci_reset_controller(host); + if (rc) + return rc; + diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index 2e89499bd9c3d..60ae707a88cc0 100644 --- a/drivers/ata/ahci.h @@ -67272,10 +80624,54 @@ index 388baf528fa81..189f75d537414 100644 MODULE_LICENSE("GPL"); -MODULE_ALIAS("ahci:imx"); +MODULE_ALIAS("platform:" DRV_NAME); +diff --git a/drivers/ata/ahci_qoriq.c b/drivers/ata/ahci_qoriq.c +index 5b46fc9aeb4a0..e5ac3d1c214c0 100644 +--- a/drivers/ata/ahci_qoriq.c ++++ b/drivers/ata/ahci_qoriq.c +@@ -125,7 +125,7 @@ static int ahci_qoriq_hardreset(struct ata_link *link, unsigned int *class, + + /* clear D2H reception area to properly wait for D2H FIS */ + ata_tf_init(link->device, &tf); +- tf.command = ATA_BUSY; ++ tf.status = ATA_BUSY; + ata_tf_to_fis(&tf, 0, 0, d2h_fis); + + rc = sata_link_hardreset(link, timing, deadline, &online, +diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c +index dffc432b9d54a..292099410cf68 100644 +--- a/drivers/ata/ahci_xgene.c ++++ b/drivers/ata/ahci_xgene.c +@@ -365,7 +365,7 @@ static int xgene_ahci_do_hardreset(struct ata_link *link, + do { + /* clear D2H reception area to properly wait for D2H FIS */ + ata_tf_init(link->device, &tf); +- tf.command = ATA_BUSY; ++ tf.status = ATA_BUSY; + ata_tf_to_fis(&tf, 0, 0, d2h_fis); + rc = sata_link_hardreset(link, timing, deadline, online, + ahci_check_ready); diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c -index 5b3fa2cbe7223..395772fa39432 100644 +index 5b3fa2cbe7223..192115a45dd78 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c +@@ -1552,7 +1552,7 @@ int ahci_do_hardreset(struct ata_link *link, unsigned int *class, + + /* clear D2H reception area to properly wait for D2H FIS */ + ata_tf_init(link->device, &tf); +- tf.command = ATA_BUSY; ++ tf.status = ATA_BUSY; + ata_tf_to_fis(&tf, 0, 0, d2h_fis); + + rc = sata_link_hardreset(link, timing, deadline, online, +@@ -2038,7 +2038,7 @@ static bool ahci_qc_fill_rtf(struct ata_queued_cmd *qc) + if (qc->tf.protocol == ATA_PROT_PIO && qc->dma_dir == DMA_FROM_DEVICE && + !(qc->flags & ATA_QCFLAG_FAILED)) { + ata_tf_from_fis(rx_fis + RX_FIS_PIO_SETUP, &qc->result_tf); +- qc->result_tf.command = (rx_fis + RX_FIS_PIO_SETUP)[15]; ++ qc->result_tf.status = (rx_fis + RX_FIS_PIO_SETUP)[15]; + } else + ata_tf_from_fis(rx_fis + RX_FIS_D2H_REG, &qc->result_tf); + @@ -2305,6 +2305,18 @@ int ahci_port_resume(struct ata_port *ap) EXPORT_SYMBOL_GPL(ahci_port_resume); @@ -67336,10 +80732,159 @@ index 0910441321f72..64d6da0a53035 100644 hpriv->nports = 1; hpriv->phys = devm_kcalloc(dev, hpriv->nports, sizeof(*hpriv->phys), GFP_KERNEL); +diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c +index 7a7d6642edcc5..d15f3e908ea4a 100644 +--- a/drivers/ata/libata-acpi.c ++++ b/drivers/ata/libata-acpi.c +@@ -554,13 +554,13 @@ static void ata_acpi_gtf_to_tf(struct ata_device *dev, + + tf->flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; + tf->protocol = ATA_PROT_NODATA; +- tf->feature = gtf->tf[0]; /* 0x1f1 */ ++ tf->error = gtf->tf[0]; /* 0x1f1 */ + tf->nsect = gtf->tf[1]; /* 0x1f2 */ + tf->lbal = gtf->tf[2]; /* 0x1f3 */ + tf->lbam = gtf->tf[3]; /* 0x1f4 */ + tf->lbah = gtf->tf[4]; /* 0x1f5 */ + tf->device = gtf->tf[5]; /* 0x1f6 */ +- tf->command = gtf->tf[6]; /* 0x1f7 */ ++ tf->status = gtf->tf[6]; /* 0x1f7 */ + } + + static int ata_acpi_filter_tf(struct ata_device *dev, +@@ -650,9 +650,7 @@ static int ata_acpi_run_tf(struct ata_device *dev, + struct ata_taskfile *pptf = NULL; + struct ata_taskfile tf, ptf, rtf; + unsigned int err_mask; +- const char *level; + const char *descr; +- char msg[60]; + int rc; + + if ((gtf->tf[0] == 0) && (gtf->tf[1] == 0) && (gtf->tf[2] == 0) +@@ -666,6 +664,10 @@ static int ata_acpi_run_tf(struct ata_device *dev, + pptf = &ptf; + } + ++ descr = ata_get_cmd_descript(tf.command); ++ if (!descr) ++ descr = "unknown"; ++ + if (!ata_acpi_filter_tf(dev, &tf, pptf)) { + rtf = tf; + err_mask = ata_exec_internal(dev, &rtf, NULL, +@@ -673,40 +675,42 @@ static int ata_acpi_run_tf(struct ata_device *dev, + + switch (err_mask) { + case 0: +- level = KERN_DEBUG; +- snprintf(msg, sizeof(msg), "succeeded"); ++ ata_dev_dbg(dev, ++ "ACPI cmd %02x/%02x:%02x:%02x:%02x:%02x:%02x" ++ "(%s) succeeded\n", ++ tf.command, tf.feature, tf.nsect, tf.lbal, ++ tf.lbam, tf.lbah, tf.device, descr); + rc = 1; + break; + + case AC_ERR_DEV: +- level = KERN_INFO; +- snprintf(msg, sizeof(msg), +- "rejected by device (Stat=0x%02x Err=0x%02x)", +- rtf.command, rtf.feature); ++ ata_dev_info(dev, ++ "ACPI cmd %02x/%02x:%02x:%02x:%02x:%02x:%02x" ++ "(%s) rejected by device (Stat=0x%02x Err=0x%02x)", ++ tf.command, tf.feature, tf.nsect, tf.lbal, ++ tf.lbam, tf.lbah, tf.device, descr, ++ rtf.status, rtf.error); + rc = 0; + break; + + default: +- level = KERN_ERR; +- snprintf(msg, sizeof(msg), +- "failed (Emask=0x%x Stat=0x%02x Err=0x%02x)", +- err_mask, rtf.command, rtf.feature); ++ ata_dev_err(dev, ++ "ACPI cmd %02x/%02x:%02x:%02x:%02x:%02x:%02x" ++ "(%s) failed (Emask=0x%x Stat=0x%02x Err=0x%02x)", ++ tf.command, tf.feature, tf.nsect, tf.lbal, ++ tf.lbam, tf.lbah, tf.device, descr, ++ err_mask, rtf.status, rtf.error); + rc = -EIO; + break; + } + } else { +- level = KERN_INFO; +- snprintf(msg, sizeof(msg), "filtered out"); ++ ata_dev_info(dev, ++ "ACPI cmd %02x/%02x:%02x:%02x:%02x:%02x:%02x" ++ "(%s) filtered out\n", ++ tf.command, tf.feature, tf.nsect, tf.lbal, ++ tf.lbam, tf.lbah, tf.device, descr); + rc = 0; + } +- descr = ata_get_cmd_descript(tf.command); +- +- ata_dev_printk(dev, level, +- "ACPI cmd %02x/%02x:%02x:%02x:%02x:%02x:%02x (%s) %s\n", +- tf.command, tf.feature, tf.nsect, tf.lbal, +- tf.lbam, tf.lbah, tf.device, +- (descr ? descr : "unknown"), msg); +- + return rc; + } + diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c -index eed65311b5d1d..4d308e3163c39 100644 +index eed65311b5d1d..025260b80a94c 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c +@@ -1185,7 +1185,7 @@ static int ata_read_native_max_address(struct ata_device *dev, u64 *max_sectors) + ata_dev_warn(dev, + "failed to read native max address (err_mask=0x%x)\n", + err_mask); +- if (err_mask == AC_ERR_DEV && (tf.feature & ATA_ABORTED)) ++ if (err_mask == AC_ERR_DEV && (tf.error & ATA_ABORTED)) + return -EACCES; + return -EIO; + } +@@ -1249,7 +1249,7 @@ static int ata_set_max_sectors(struct ata_device *dev, u64 new_sectors) + "failed to set max address (err_mask=0x%x)\n", + err_mask); + if (err_mask == AC_ERR_DEV && +- (tf.feature & (ATA_ABORTED | ATA_IDNF))) ++ (tf.error & (ATA_ABORTED | ATA_IDNF))) + return -EACCES; + return -EIO; + } +@@ -1616,7 +1616,7 @@ unsigned ata_exec_internal_sg(struct ata_device *dev, + + /* perform minimal error analysis */ + if (qc->flags & ATA_QCFLAG_FAILED) { +- if (qc->result_tf.command & (ATA_ERR | ATA_DF)) ++ if (qc->result_tf.status & (ATA_ERR | ATA_DF)) + qc->err_mask |= AC_ERR_DEV; + + if (!qc->err_mask) +@@ -1625,7 +1625,7 @@ unsigned ata_exec_internal_sg(struct ata_device *dev, + if (qc->err_mask & ~AC_ERR_OTHER) + qc->err_mask &= ~AC_ERR_OTHER; + } else if (qc->tf.command == ATA_CMD_REQ_SENSE_DATA) { +- qc->result_tf.command |= ATA_SENSE; ++ qc->result_tf.status |= ATA_SENSE; + } + + /* finish up */ +@@ -1848,7 +1848,7 @@ retry: + return 0; + } + +- if ((err_mask == AC_ERR_DEV) && (tf.feature & ATA_ABORTED)) { ++ if ((err_mask == AC_ERR_DEV) && (tf.error & ATA_ABORTED)) { + /* Device or controller might have reported + * the wrong device class. Give a shot at the + * other IDENTIFY if the current one is @@ -2007,7 +2007,7 @@ unsigned int ata_read_log_page(struct ata_device *dev, u8 log, retry: @@ -67381,6 +80926,15 @@ index eed65311b5d1d..4d308e3163c39 100644 return; err_mask = ata_read_log_page(dev, +@@ -3071,7 +3076,7 @@ int sata_down_spd_limit(struct ata_link *link, u32 spd_limit) + */ + if (spd > 1) + mask &= (1 << (spd - 1)) - 1; +- else ++ else if (link->sata_spd) + return -EINVAL; + + /* were we already at the bottom? */ @@ -3851,6 +3856,8 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "VRFDFC22048UCHC-TE*", NULL, ATA_HORKAGE_NODMA }, /* Odd clown on sil3726/4726 PMPs */ @@ -67419,6 +80973,15 @@ index eed65311b5d1d..4d308e3163c39 100644 /* * As defined, the DRAT (Deterministic Read After Trim) and RZAT +@@ -4356,7 +4371,7 @@ static unsigned int ata_dev_init_params(struct ata_device *dev, + /* A clean abort indicates an original or just out of spec drive + and we should continue as we issue the setup based on the + drive reported working geometry */ +- if (err_mask == AC_ERR_DEV && (tf.feature & ATA_ABORTED)) ++ if (err_mask == AC_ERR_DEV && (tf.error & ATA_ABORTED)) + err_mask = 0; + + DPRINTK("EXIT, err_mask=%x\n", err_mask); @@ -5489,7 +5504,7 @@ struct ata_host *ata_host_alloc_pinfo(struct device *dev, const struct ata_port_info * const * ppi, int n_ports) @@ -67437,8 +81000,76 @@ index eed65311b5d1d..4d308e3163c39 100644 struct ata_port *ap = host->ports[i]; if (ppi[j]) +@@ -6482,67 +6497,6 @@ const struct ata_port_info ata_dummy_port_info = { + }; + EXPORT_SYMBOL_GPL(ata_dummy_port_info); + +-/* +- * Utility print functions +- */ +-void ata_port_printk(const struct ata_port *ap, const char *level, +- const char *fmt, ...) +-{ +- struct va_format vaf; +- va_list args; +- +- va_start(args, fmt); +- +- vaf.fmt = fmt; +- vaf.va = &args; +- +- printk("%sata%u: %pV", level, ap->print_id, &vaf); +- +- va_end(args); +-} +-EXPORT_SYMBOL(ata_port_printk); +- +-void ata_link_printk(const struct ata_link *link, const char *level, +- const char *fmt, ...) +-{ +- struct va_format vaf; +- va_list args; +- +- va_start(args, fmt); +- +- vaf.fmt = fmt; +- vaf.va = &args; +- +- if (sata_pmp_attached(link->ap) || link->ap->slave_link) +- printk("%sata%u.%02u: %pV", +- level, link->ap->print_id, link->pmp, &vaf); +- else +- printk("%sata%u: %pV", +- level, link->ap->print_id, &vaf); +- +- va_end(args); +-} +-EXPORT_SYMBOL(ata_link_printk); +- +-void ata_dev_printk(const struct ata_device *dev, const char *level, +- const char *fmt, ...) +-{ +- struct va_format vaf; +- va_list args; +- +- va_start(args, fmt); +- +- vaf.fmt = fmt; +- vaf.va = &args; +- +- printk("%sata%u.%02u: %pV", +- level, dev->link->ap->print_id, dev->link->pmp + dev->devno, +- &vaf); +- +- va_end(args); +-} +-EXPORT_SYMBOL(ata_dev_printk); +- + void ata_print_version(const struct device *dev, const char *version) + { + dev_printk(KERN_DEBUG, dev, "version %s\n", version); diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c -index bf9c4b6c5c3d4..7aea631edb274 100644 +index bf9c4b6c5c3d4..8350abc172908 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -93,6 +93,12 @@ static const unsigned long ata_eh_identify_timeouts[] = { @@ -67463,6 +81094,48 @@ index bf9c4b6c5c3d4..7aea631edb274 100644 { .commands = CMDS(ATA_CMD_READ_NATIVE_MAX, ATA_CMD_READ_NATIVE_MAX_EXT), .timeouts = ata_eh_other_timeouts, }, { .commands = CMDS(ATA_CMD_SET_MAX, ATA_CMD_SET_MAX_EXT), +@@ -1378,7 +1386,7 @@ unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key) + + err_mask = ata_exec_internal(dev, &tf, cdb, DMA_NONE, NULL, 0, 0); + if (err_mask == AC_ERR_DEV) +- *r_sense_key = tf.feature >> 4; ++ *r_sense_key = tf.error >> 4; + return err_mask; + } + +@@ -1423,12 +1431,12 @@ static void ata_eh_request_sense(struct ata_queued_cmd *qc, + + err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); + /* Ignore err_mask; ATA_ERR might be set */ +- if (tf.command & ATA_SENSE) { ++ if (tf.status & ATA_SENSE) { + ata_scsi_set_sense(dev, cmd, tf.lbah, tf.lbam, tf.lbal); + qc->flags |= ATA_QCFLAG_SENSE_VALID; + } else { + ata_dev_warn(dev, "request sense failed stat %02x emask %x\n", +- tf.command, err_mask); ++ tf.status, err_mask); + } + } + +@@ -1553,7 +1561,7 @@ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, + const struct ata_taskfile *tf) + { + unsigned int tmp, action = 0; +- u8 stat = tf->command, err = tf->feature; ++ u8 stat = tf->status, err = tf->error; + + if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) { + qc->err_mask |= AC_ERR_HSM; +@@ -1590,7 +1598,7 @@ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, + if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) { + tmp = atapi_eh_request_sense(qc->dev, + qc->scsicmd->sense_buffer, +- qc->result_tf.feature >> 4); ++ qc->result_tf.error >> 4); + if (!tmp) + qc->flags |= ATA_QCFLAG_SENSE_VALID; + else @@ -2122,6 +2130,7 @@ const char *ata_get_cmd_descript(u8 command) { ATA_CMD_WRITE_QUEUED_FUA_EXT, "WRITE DMA QUEUED FUA EXT" }, { ATA_CMD_FPDMA_READ, "READ FPDMA QUEUED" }, @@ -67471,10 +81144,188 @@ index bf9c4b6c5c3d4..7aea631edb274 100644 { ATA_CMD_FPDMA_SEND, "SEND FPDMA QUEUED" }, { ATA_CMD_FPDMA_RECV, "RECEIVE FPDMA QUEUED" }, { ATA_CMD_PIO_READ, "READ SECTOR(S)" }, +@@ -2363,7 +2372,7 @@ static void ata_eh_link_report(struct ata_link *link) + cmd->hob_feature, cmd->hob_nsect, + cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah, + cmd->device, qc->tag, data_buf, cdb_buf, +- res->command, res->feature, res->nsect, ++ res->status, res->error, res->nsect, + res->lbal, res->lbam, res->lbah, + res->hob_feature, res->hob_nsect, + res->hob_lbal, res->hob_lbam, res->hob_lbah, +@@ -2371,28 +2380,28 @@ static void ata_eh_link_report(struct ata_link *link) + qc->err_mask & AC_ERR_NCQ ? " <F>" : ""); + + #ifdef CONFIG_ATA_VERBOSE_ERROR +- if (res->command & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ | +- ATA_SENSE | ATA_ERR)) { +- if (res->command & ATA_BUSY) ++ if (res->status & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ | ++ ATA_SENSE | ATA_ERR)) { ++ if (res->status & ATA_BUSY) + ata_dev_err(qc->dev, "status: { Busy }\n"); + else + ata_dev_err(qc->dev, "status: { %s%s%s%s%s}\n", +- res->command & ATA_DRDY ? "DRDY " : "", +- res->command & ATA_DF ? "DF " : "", +- res->command & ATA_DRQ ? "DRQ " : "", +- res->command & ATA_SENSE ? "SENSE " : "", +- res->command & ATA_ERR ? "ERR " : ""); ++ res->status & ATA_DRDY ? "DRDY " : "", ++ res->status & ATA_DF ? "DF " : "", ++ res->status & ATA_DRQ ? "DRQ " : "", ++ res->status & ATA_SENSE ? "SENSE " : "", ++ res->status & ATA_ERR ? "ERR " : ""); + } + + if (cmd->command != ATA_CMD_PACKET && +- (res->feature & (ATA_ICRC | ATA_UNC | ATA_AMNF | +- ATA_IDNF | ATA_ABORTED))) ++ (res->error & (ATA_ICRC | ATA_UNC | ATA_AMNF | ATA_IDNF | ++ ATA_ABORTED))) + ata_dev_err(qc->dev, "error: { %s%s%s%s%s}\n", +- res->feature & ATA_ICRC ? "ICRC " : "", +- res->feature & ATA_UNC ? "UNC " : "", +- res->feature & ATA_AMNF ? "AMNF " : "", +- res->feature & ATA_IDNF ? "IDNF " : "", +- res->feature & ATA_ABORTED ? "ABRT " : ""); ++ res->error & ATA_ICRC ? "ICRC " : "", ++ res->error & ATA_UNC ? "UNC " : "", ++ res->error & ATA_AMNF ? "AMNF " : "", ++ res->error & ATA_IDNF ? "IDNF " : "", ++ res->error & ATA_ABORTED ? "ABRT " : ""); + #endif + } + } +diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c +index 8f3ff830ab0c6..b5aa525d87603 100644 +--- a/drivers/ata/libata-sata.c ++++ b/drivers/ata/libata-sata.c +@@ -191,8 +191,8 @@ EXPORT_SYMBOL_GPL(ata_tf_to_fis); + + void ata_tf_from_fis(const u8 *fis, struct ata_taskfile *tf) + { +- tf->command = fis[2]; /* status */ +- tf->feature = fis[3]; /* error */ ++ tf->status = fis[2]; ++ tf->error = fis[3]; + + tf->lbal = fis[4]; + tf->lbam = fis[5]; +@@ -1402,8 +1402,8 @@ static int ata_eh_read_log_10h(struct ata_device *dev, + + *tag = buf[0] & 0x1f; + +- tf->command = buf[2]; +- tf->feature = buf[3]; ++ tf->status = buf[2]; ++ tf->error = buf[3]; + tf->lbal = buf[4]; + tf->lbam = buf[5]; + tf->lbah = buf[6]; +@@ -1413,7 +1413,8 @@ static int ata_eh_read_log_10h(struct ata_device *dev, + tf->hob_lbah = buf[10]; + tf->nsect = buf[12]; + tf->hob_nsect = buf[13]; +- if (dev->class == ATA_DEV_ZAC && ata_id_has_ncq_autosense(dev->id)) ++ if (dev->class == ATA_DEV_ZAC && ata_id_has_ncq_autosense(dev->id) && ++ (tf->status & ATA_SENSE)) + tf->auxiliary = buf[14] << 16 | buf[15] << 8 | buf[16]; + + return 0; +@@ -1477,8 +1478,12 @@ void ata_eh_analyze_ncq_error(struct ata_link *link) + memcpy(&qc->result_tf, &tf, sizeof(tf)); + qc->result_tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_LBA | ATA_TFLAG_LBA48; + qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ; +- if (dev->class == ATA_DEV_ZAC && +- ((qc->result_tf.command & ATA_SENSE) || qc->result_tf.auxiliary)) { ++ ++ /* ++ * If the device supports NCQ autosense, ata_eh_read_log_10h() will have ++ * stored the sense data in qc->result_tf.auxiliary. ++ */ ++ if (qc->result_tf.auxiliary) { + char sense_key, asc, ascq; + + sense_key = (qc->result_tf.auxiliary >> 16) & 0xff; diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c -index 1fb4611f7eeb9..10303611d17b9 100644 +index 1fb4611f7eeb9..4d8129640d60e 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c +@@ -671,7 +671,7 @@ static void ata_qc_set_pc_nbytes(struct ata_queued_cmd *qc) + */ + static void ata_dump_status(unsigned id, struct ata_taskfile *tf) + { +- u8 stat = tf->command, err = tf->feature; ++ u8 stat = tf->status, err = tf->error; + + pr_warn("ata%u: status=0x%02x { ", id, stat); + if (stat & ATA_BUSY) { +@@ -867,8 +867,8 @@ static void ata_gen_passthru_sense(struct ata_queued_cmd *qc) + * onto sense key, asc & ascq. + */ + if (qc->err_mask || +- tf->command & (ATA_BUSY | ATA_DF | ATA_ERR | ATA_DRQ)) { +- ata_to_sense_error(qc->ap->print_id, tf->command, tf->feature, ++ tf->status & (ATA_BUSY | ATA_DF | ATA_ERR | ATA_DRQ)) { ++ ata_to_sense_error(qc->ap->print_id, tf->status, tf->error, + &sense_key, &asc, &ascq, verbose); + ata_scsi_set_sense(qc->dev, cmd, sense_key, asc, ascq); + } else { +@@ -897,13 +897,13 @@ static void ata_gen_passthru_sense(struct ata_queued_cmd *qc) + * Copy registers into sense buffer. + */ + desc[2] = 0x00; +- desc[3] = tf->feature; /* == error reg */ ++ desc[3] = tf->error; + desc[5] = tf->nsect; + desc[7] = tf->lbal; + desc[9] = tf->lbam; + desc[11] = tf->lbah; + desc[12] = tf->device; +- desc[13] = tf->command; /* == status reg */ ++ desc[13] = tf->status; + + /* + * Fill in Extend bit, and the high order bytes +@@ -918,8 +918,8 @@ static void ata_gen_passthru_sense(struct ata_queued_cmd *qc) + } + } else { + /* Fixed sense format */ +- desc[0] = tf->feature; +- desc[1] = tf->command; /* status */ ++ desc[0] = tf->error; ++ desc[1] = tf->status; + desc[2] = tf->device; + desc[3] = tf->nsect; + desc[7] = 0; +@@ -968,14 +968,14 @@ static void ata_gen_ata_sense(struct ata_queued_cmd *qc) + * onto sense key, asc & ascq. + */ + if (qc->err_mask || +- tf->command & (ATA_BUSY | ATA_DF | ATA_ERR | ATA_DRQ)) { +- ata_to_sense_error(qc->ap->print_id, tf->command, tf->feature, ++ tf->status & (ATA_BUSY | ATA_DF | ATA_ERR | ATA_DRQ)) { ++ ata_to_sense_error(qc->ap->print_id, tf->status, tf->error, + &sense_key, &asc, &ascq, verbose); + ata_scsi_set_sense(dev, cmd, sense_key, asc, ascq); + } else { + /* Could not decode error */ + ata_dev_warn(dev, "could not decode error status 0x%x err_mask 0x%x\n", +- tf->command, qc->err_mask); ++ tf->status, qc->err_mask); + ata_scsi_set_sense(dev, cmd, ABORTED_COMMAND, 0, 0); + return; + } +@@ -2490,7 +2490,7 @@ static void atapi_request_sense(struct ata_queued_cmd *qc) + + /* fill these in, for the case where they are -not- overwritten */ + cmd->sense_buffer[0] = 0x70; +- cmd->sense_buffer[2] = qc->tf.feature >> 4; ++ cmd->sense_buffer[2] = qc->tf.error >> 4; + + ata_qc_reinit(qc); + @@ -2826,8 +2826,19 @@ static unsigned int ata_scsi_pass_thru(struct ata_queued_cmd *qc) goto invalid_fld; } @@ -67497,8 +81348,132 @@ index 1fb4611f7eeb9..10303611d17b9 100644 /* enable LBA */ tf->flags |= ATA_TFLAG_LBA; +@@ -3248,6 +3259,7 @@ static unsigned int ata_scsiop_maint_in(struct ata_scsi_args *args, u8 *rbuf) + case REPORT_LUNS: + case REQUEST_SENSE: + case SYNCHRONIZE_CACHE: ++ case SYNCHRONIZE_CACHE_16: + case REZERO_UNIT: + case SEEK_6: + case SEEK_10: +@@ -3914,6 +3926,7 @@ static inline ata_xlat_func_t ata_get_xlat_func(struct ata_device *dev, u8 cmd) + return ata_scsi_write_same_xlat; + + case SYNCHRONIZE_CACHE: ++ case SYNCHRONIZE_CACHE_16: + if (ata_try_flush_cache(dev)) + return ata_scsi_flush_xlat; + break; +@@ -3975,44 +3988,51 @@ void ata_scsi_dump_cdb(struct ata_port *ap, struct scsi_cmnd *cmd) + + int __ata_scsi_queuecmd(struct scsi_cmnd *scmd, struct ata_device *dev) + { ++ struct ata_port *ap = dev->link->ap; + u8 scsi_op = scmd->cmnd[0]; + ata_xlat_func_t xlat_func; +- int rc = 0; ++ ++ /* ++ * scsi_queue_rq() will defer commands if scsi_host_in_recovery(). ++ * However, this check is done without holding the ap->lock (a libata ++ * specific lock), so we can have received an error irq since then, ++ * therefore we must check if EH is pending, while holding ap->lock. ++ */ ++ if (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) ++ return SCSI_MLQUEUE_DEVICE_BUSY; ++ ++ if (unlikely(!scmd->cmd_len)) ++ goto bad_cdb_len; + + if (dev->class == ATA_DEV_ATA || dev->class == ATA_DEV_ZAC) { +- if (unlikely(!scmd->cmd_len || scmd->cmd_len > dev->cdb_len)) ++ if (unlikely(scmd->cmd_len > dev->cdb_len)) + goto bad_cdb_len; + + xlat_func = ata_get_xlat_func(dev, scsi_op); +- } else { +- if (unlikely(!scmd->cmd_len)) +- goto bad_cdb_len; ++ } else if (likely((scsi_op != ATA_16) || !atapi_passthru16)) { ++ /* relay SCSI command to ATAPI device */ ++ int len = COMMAND_SIZE(scsi_op); + +- xlat_func = NULL; +- if (likely((scsi_op != ATA_16) || !atapi_passthru16)) { +- /* relay SCSI command to ATAPI device */ +- int len = COMMAND_SIZE(scsi_op); +- if (unlikely(len > scmd->cmd_len || +- len > dev->cdb_len || +- scmd->cmd_len > ATAPI_CDB_LEN)) +- goto bad_cdb_len; ++ if (unlikely(len > scmd->cmd_len || ++ len > dev->cdb_len || ++ scmd->cmd_len > ATAPI_CDB_LEN)) ++ goto bad_cdb_len; + +- xlat_func = atapi_xlat; +- } else { +- /* ATA_16 passthru, treat as an ATA command */ +- if (unlikely(scmd->cmd_len > 16)) +- goto bad_cdb_len; ++ xlat_func = atapi_xlat; ++ } else { ++ /* ATA_16 passthru, treat as an ATA command */ ++ if (unlikely(scmd->cmd_len > 16)) ++ goto bad_cdb_len; + +- xlat_func = ata_get_xlat_func(dev, scsi_op); +- } ++ xlat_func = ata_get_xlat_func(dev, scsi_op); + } + + if (xlat_func) +- rc = ata_scsi_translate(dev, scmd, xlat_func); +- else +- ata_scsi_simulate(dev, scmd); ++ return ata_scsi_translate(dev, scmd, xlat_func); + +- return rc; ++ ata_scsi_simulate(dev, scmd); ++ ++ return 0; + + bad_cdb_len: + DPRINTK("bad CDB len=%u, scsi_op=0x%02x, max=%u\n", +@@ -4159,6 +4179,7 @@ void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd) + * turning this into a no-op. + */ + case SYNCHRONIZE_CACHE: ++ case SYNCHRONIZE_CACHE_16: + fallthrough; + + /* no-op's, complete with success */ +diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c +index b71ea4a680b01..8409e53b7b7a0 100644 +--- a/drivers/ata/libata-sff.c ++++ b/drivers/ata/libata-sff.c +@@ -457,8 +457,8 @@ void ata_sff_tf_read(struct ata_port *ap, struct ata_taskfile *tf) + { + struct ata_ioports *ioaddr = &ap->ioaddr; + +- tf->command = ata_sff_check_status(ap); +- tf->feature = ioread8(ioaddr->error_addr); ++ tf->status = ata_sff_check_status(ap); ++ tf->error = ioread8(ioaddr->error_addr); + tf->nsect = ioread8(ioaddr->nsect_addr); + tf->lbal = ioread8(ioaddr->lbal_addr); + tf->lbam = ioread8(ioaddr->lbam_addr); +@@ -1837,7 +1837,7 @@ unsigned int ata_sff_dev_classify(struct ata_device *dev, int present, + memset(&tf, 0, sizeof(tf)); + + ap->ops->sff_tf_read(ap, &tf); +- err = tf.feature; ++ err = tf.error; + if (r_err) + *r_err = err; + diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c -index 34bb4608bdc67..93d6920cd86cd 100644 +index 34bb4608bdc67..60f22e1a4943f 100644 --- a/drivers/ata/libata-transport.c +++ b/drivers/ata/libata-transport.c @@ -196,7 +196,7 @@ static struct { @@ -67510,6 +81485,80 @@ index 34bb4608bdc67..93d6920cd86cd 100644 /* * ATA Port attributes +@@ -301,7 +301,9 @@ int ata_tport_add(struct device *parent, + pm_runtime_enable(dev); + pm_runtime_forbid(dev); + +- transport_add_device(dev); ++ error = transport_add_device(dev); ++ if (error) ++ goto tport_transport_add_err; + transport_configure_device(dev); + + error = ata_tlink_add(&ap->link); +@@ -312,12 +314,12 @@ int ata_tport_add(struct device *parent, + + tport_link_err: + transport_remove_device(dev); ++ tport_transport_add_err: + device_del(dev); + + tport_err: + transport_destroy_device(dev); + put_device(dev); +- ata_host_put(ap->host); + return error; + } + +@@ -426,7 +428,9 @@ int ata_tlink_add(struct ata_link *link) + goto tlink_err; + } + +- transport_add_device(dev); ++ error = transport_add_device(dev); ++ if (error) ++ goto tlink_transport_err; + transport_configure_device(dev); + + ata_for_each_dev(ata_dev, link, ALL) { +@@ -441,6 +445,7 @@ int ata_tlink_add(struct ata_link *link) + ata_tdev_delete(ata_dev); + } + transport_remove_device(dev); ++ tlink_transport_err: + device_del(dev); + tlink_err: + transport_destroy_device(dev); +@@ -678,7 +683,13 @@ static int ata_tdev_add(struct ata_device *ata_dev) + return error; + } + +- transport_add_device(dev); ++ error = transport_add_device(dev); ++ if (error) { ++ device_del(dev); ++ ata_tdev_free(ata_dev); ++ return error; ++ } ++ + transport_configure_device(dev); + return 0; + } +diff --git a/drivers/ata/pata_ep93xx.c b/drivers/ata/pata_ep93xx.c +index 46208ececbb6a..3fc26026014e2 100644 +--- a/drivers/ata/pata_ep93xx.c ++++ b/drivers/ata/pata_ep93xx.c +@@ -416,8 +416,8 @@ static void ep93xx_pata_tf_read(struct ata_port *ap, struct ata_taskfile *tf) + { + struct ep93xx_pata_data *drv_data = ap->host->private_data; + +- tf->command = ep93xx_pata_check_status(ap); +- tf->feature = ep93xx_pata_read_reg(drv_data, IDECTRL_ADDR_FEATURE); ++ tf->status = ep93xx_pata_check_status(ap); ++ tf->error = ep93xx_pata_read_reg(drv_data, IDECTRL_ADDR_FEATURE); + tf->nsect = ep93xx_pata_read_reg(drv_data, IDECTRL_ADDR_NSECT); + tf->lbal = ep93xx_pata_read_reg(drv_data, IDECTRL_ADDR_LBAL); + tf->lbam = ep93xx_pata_read_reg(drv_data, IDECTRL_ADDR_LBAM); diff --git a/drivers/ata/pata_hpt37x.c b/drivers/ata/pata_hpt37x.c index f242157bc81bb..9d371859e81ed 100644 --- a/drivers/ata/pata_hpt37x.c @@ -67552,6 +81601,47 @@ index f242157bc81bb..9d371859e81ed 100644 total += sr & 0x1FF; udelay(15); } +diff --git a/drivers/ata/pata_ixp4xx_cf.c b/drivers/ata/pata_ixp4xx_cf.c +index 99c63087c8ae9..17b557c91e1c7 100644 +--- a/drivers/ata/pata_ixp4xx_cf.c ++++ b/drivers/ata/pata_ixp4xx_cf.c +@@ -114,7 +114,7 @@ static void ixp4xx_set_piomode(struct ata_port *ap, struct ata_device *adev) + { + struct ixp4xx_pata *ixpp = ap->host->private_data; + +- ata_dev_printk(adev, KERN_INFO, "configured for PIO%d 8bit\n", ++ ata_dev_info(adev, "configured for PIO%d 8bit\n", + adev->pio_mode - XFER_PIO_0); + ixp4xx_set_8bit_timing(ixpp, adev->pio_mode); + } +@@ -132,8 +132,8 @@ static unsigned int ixp4xx_mmio_data_xfer(struct ata_queued_cmd *qc, + struct ixp4xx_pata *ixpp = ap->host->private_data; + unsigned long flags; + +- ata_dev_printk(adev, KERN_DEBUG, "%s %d bytes\n", (rw == READ) ? "READ" : "WRITE", +- buflen); ++ ata_dev_dbg(adev, "%s %d bytes\n", (rw == READ) ? "READ" : "WRITE", ++ buflen); + spin_lock_irqsave(ap->lock, flags); + + /* set the expansion bus in 16bit mode and restore +diff --git a/drivers/ata/pata_legacy.c b/drivers/ata/pata_legacy.c +index 0a8bf09a5c19e..03c580625c2cc 100644 +--- a/drivers/ata/pata_legacy.c ++++ b/drivers/ata/pata_legacy.c +@@ -315,9 +315,10 @@ static void pdc20230_set_piomode(struct ata_port *ap, struct ata_device *adev) + outb(inb(0x1F4) & 0x07, 0x1F4); + + rt = inb(0x1F3); +- rt &= 0x07 << (3 * adev->devno); ++ rt &= ~(0x07 << (3 * !adev->devno)); + if (pio) +- rt |= (1 + 3 * pio) << (3 * adev->devno); ++ rt |= (1 + 3 * pio) << (3 * !adev->devno); ++ outb(rt, 0x1F3); + + udelay(100); + outb(inb(0x1F2) | 0x01, 0x1F2); diff --git a/drivers/ata/pata_marvell.c b/drivers/ata/pata_marvell.c index 361597d14c569..d45a75bfc0169 100644 --- a/drivers/ata/pata_marvell.c @@ -67565,10 +81655,43 @@ index 361597d14c569..d45a75bfc0169 100644 if (ioread8(ap->ioaddr.bmdma_addr + 1) & 1) return ATA_CBL_PATA40; return ATA_CBL_PATA80; +diff --git a/drivers/ata/pata_ns87415.c b/drivers/ata/pata_ns87415.c +index f4949e704356e..9dd6bffefb485 100644 +--- a/drivers/ata/pata_ns87415.c ++++ b/drivers/ata/pata_ns87415.c +@@ -264,8 +264,8 @@ void ns87560_tf_read(struct ata_port *ap, struct ata_taskfile *tf) + { + struct ata_ioports *ioaddr = &ap->ioaddr; + +- tf->command = ns87560_check_status(ap); +- tf->feature = ioread8(ioaddr->error_addr); ++ tf->status = ns87560_check_status(ap); ++ tf->error = ioread8(ioaddr->error_addr); + tf->nsect = ioread8(ioaddr->nsect_addr); + tf->lbal = ioread8(ioaddr->lbal_addr); + tf->lbam = ioread8(ioaddr->lbam_addr); diff --git a/drivers/ata/pata_octeon_cf.c b/drivers/ata/pata_octeon_cf.c -index b5a3f710d76de..4cc8a1027888a 100644 +index b5a3f710d76de..6c9f2efcedc11 100644 --- a/drivers/ata/pata_octeon_cf.c +++ b/drivers/ata/pata_octeon_cf.c +@@ -386,7 +386,7 @@ static void octeon_cf_tf_read16(struct ata_port *ap, struct ata_taskfile *tf) + void __iomem *base = ap->ioaddr.data_addr; + + blob = __raw_readw(base + 0xc); +- tf->feature = blob >> 8; ++ tf->error = blob >> 8; + + blob = __raw_readw(base + 2); + tf->nsect = blob & 0xff; +@@ -398,7 +398,7 @@ static void octeon_cf_tf_read16(struct ata_port *ap, struct ata_taskfile *tf) + + blob = __raw_readw(base + 6); + tf->device = blob & 0xff; +- tf->command = blob >> 8; ++ tf->status = blob >> 8; + + if (tf->flags & ATA_TFLAG_LBA48) { + if (likely(ap->ioaddr.ctl_addr)) { @@ -888,12 +888,14 @@ static int octeon_cf_probe(struct platform_device *pdev) int i; res_dma = platform_get_resource(dma_dev, IORESOURCE_MEM, 0); @@ -67592,6 +81715,19 @@ index b5a3f710d76de..4cc8a1027888a 100644 } of_node_put(dma_node); } +diff --git a/drivers/ata/pata_samsung_cf.c b/drivers/ata/pata_samsung_cf.c +index 3da0e8e302861..149d771c61d67 100644 +--- a/drivers/ata/pata_samsung_cf.c ++++ b/drivers/ata/pata_samsung_cf.c +@@ -213,7 +213,7 @@ static void pata_s3c_tf_read(struct ata_port *ap, struct ata_taskfile *tf) + { + struct ata_ioports *ioaddr = &ap->ioaddr; + +- tf->feature = ata_inb(ap->host, ioaddr->error_addr); ++ tf->error = ata_inb(ap->host, ioaddr->error_addr); + tf->nsect = ata_inb(ap->host, ioaddr->nsect_addr); + tf->lbal = ata_inb(ap->host, ioaddr->lbal_addr); + tf->lbam = ata_inb(ap->host, ioaddr->lbam_addr); diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c index 338c2e50f7591..29e2b0dfba309 100644 --- a/drivers/ata/sata_dwc_460ex.c @@ -67661,6 +81797,137 @@ index e5838b23c9e0a..3b31a4f596d86 100644 return 0; } +diff --git a/drivers/ata/sata_highbank.c b/drivers/ata/sata_highbank.c +index 8440203e835ed..f9bb3be4b939e 100644 +--- a/drivers/ata/sata_highbank.c ++++ b/drivers/ata/sata_highbank.c +@@ -400,7 +400,7 @@ static int ahci_highbank_hardreset(struct ata_link *link, unsigned int *class, + + /* clear D2H reception area to properly wait for D2H FIS */ + ata_tf_init(link->device, &tf); +- tf.command = ATA_BUSY; ++ tf.status = ATA_BUSY; + ata_tf_to_fis(&tf, 0, 0, d2h_fis); + + do { +diff --git a/drivers/ata/sata_inic162x.c b/drivers/ata/sata_inic162x.c +index e517bd8822a5f..659f1a903298f 100644 +--- a/drivers/ata/sata_inic162x.c ++++ b/drivers/ata/sata_inic162x.c +@@ -559,13 +559,13 @@ static void inic_tf_read(struct ata_port *ap, struct ata_taskfile *tf) + { + void __iomem *port_base = inic_port_base(ap); + +- tf->feature = readb(port_base + PORT_TF_FEATURE); ++ tf->error = readb(port_base + PORT_TF_FEATURE); + tf->nsect = readb(port_base + PORT_TF_NSECT); + tf->lbal = readb(port_base + PORT_TF_LBAL); + tf->lbam = readb(port_base + PORT_TF_LBAM); + tf->lbah = readb(port_base + PORT_TF_LBAH); + tf->device = readb(port_base + PORT_TF_DEVICE); +- tf->command = readb(port_base + PORT_TF_COMMAND); ++ tf->status = readb(port_base + PORT_TF_COMMAND); + } + + static bool inic_qc_fill_rtf(struct ata_queued_cmd *qc) +@@ -582,11 +582,11 @@ static bool inic_qc_fill_rtf(struct ata_queued_cmd *qc) + */ + inic_tf_read(qc->ap, &tf); + +- if (!(tf.command & ATA_ERR)) ++ if (!(tf.status & ATA_ERR)) + return false; + +- rtf->command = tf.command; +- rtf->feature = tf.feature; ++ rtf->status = tf.status; ++ rtf->error = tf.error; + return true; + } + +diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c +index 44b0ed8f6bb8a..9759e24f718fc 100644 +--- a/drivers/ata/sata_rcar.c ++++ b/drivers/ata/sata_rcar.c +@@ -417,8 +417,8 @@ static void sata_rcar_tf_read(struct ata_port *ap, struct ata_taskfile *tf) + { + struct ata_ioports *ioaddr = &ap->ioaddr; + +- tf->command = sata_rcar_check_status(ap); +- tf->feature = ioread32(ioaddr->error_addr); ++ tf->status = sata_rcar_check_status(ap); ++ tf->error = ioread32(ioaddr->error_addr); + tf->nsect = ioread32(ioaddr->nsect_addr); + tf->lbal = ioread32(ioaddr->lbal_addr); + tf->lbam = ioread32(ioaddr->lbam_addr); +diff --git a/drivers/ata/sata_svw.c b/drivers/ata/sata_svw.c +index f8552559db7f5..2e3418a82b445 100644 +--- a/drivers/ata/sata_svw.c ++++ b/drivers/ata/sata_svw.c +@@ -194,24 +194,24 @@ static void k2_sata_tf_load(struct ata_port *ap, const struct ata_taskfile *tf) + static void k2_sata_tf_read(struct ata_port *ap, struct ata_taskfile *tf) + { + struct ata_ioports *ioaddr = &ap->ioaddr; +- u16 nsect, lbal, lbam, lbah, feature; ++ u16 nsect, lbal, lbam, lbah, error; + +- tf->command = k2_stat_check_status(ap); ++ tf->status = k2_stat_check_status(ap); + tf->device = readw(ioaddr->device_addr); +- feature = readw(ioaddr->error_addr); ++ error = readw(ioaddr->error_addr); + nsect = readw(ioaddr->nsect_addr); + lbal = readw(ioaddr->lbal_addr); + lbam = readw(ioaddr->lbam_addr); + lbah = readw(ioaddr->lbah_addr); + +- tf->feature = feature; ++ tf->error = error; + tf->nsect = nsect; + tf->lbal = lbal; + tf->lbam = lbam; + tf->lbah = lbah; + + if (tf->flags & ATA_TFLAG_LBA48) { +- tf->hob_feature = feature >> 8; ++ tf->hob_feature = error >> 8; + tf->hob_nsect = nsect >> 8; + tf->hob_lbal = lbal >> 8; + tf->hob_lbam = lbam >> 8; +diff --git a/drivers/ata/sata_vsc.c b/drivers/ata/sata_vsc.c +index 8fa952cb9f7f4..87e4ed66b3064 100644 +--- a/drivers/ata/sata_vsc.c ++++ b/drivers/ata/sata_vsc.c +@@ -183,24 +183,24 @@ static void vsc_sata_tf_load(struct ata_port *ap, const struct ata_taskfile *tf) + static void vsc_sata_tf_read(struct ata_port *ap, struct ata_taskfile *tf) + { + struct ata_ioports *ioaddr = &ap->ioaddr; +- u16 nsect, lbal, lbam, lbah, feature; ++ u16 nsect, lbal, lbam, lbah, error; + +- tf->command = ata_sff_check_status(ap); ++ tf->status = ata_sff_check_status(ap); + tf->device = readw(ioaddr->device_addr); +- feature = readw(ioaddr->error_addr); ++ error = readw(ioaddr->error_addr); + nsect = readw(ioaddr->nsect_addr); + lbal = readw(ioaddr->lbal_addr); + lbam = readw(ioaddr->lbam_addr); + lbah = readw(ioaddr->lbah_addr); + +- tf->feature = feature; ++ tf->error = error; + tf->nsect = nsect; + tf->lbal = lbal; + tf->lbam = lbam; + tf->lbah = lbah; + + if (tf->flags & ATA_TFLAG_LBA48) { +- tf->hob_feature = feature >> 8; ++ tf->hob_feature = error >> 8; + tf->hob_nsect = nsect >> 8; + tf->hob_lbal = lbal >> 8; + tf->hob_lbam = lbam >> 8; diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index 422753d52244b..a31ffe16e626f 100644 --- a/drivers/atm/eni.c @@ -67975,6 +82242,22 @@ index bdc98c5713d5e..d171535fc18f5 100644 out_unregister: kobject_put(&priv->kobj); /* drv->p is freed in driver_release() */ +diff --git a/drivers/base/class.c b/drivers/base/class.c +index 7476f393df977..0e44a68e90a02 100644 +--- a/drivers/base/class.c ++++ b/drivers/base/class.c +@@ -192,6 +192,11 @@ int __class_register(struct class *cls, struct lock_class_key *key) + } + error = class_add_groups(class_get(cls), cls->class_groups); + class_put(cls); ++ if (error) { ++ kobject_del(&cp->subsys.kobj); ++ kfree_const(cp->subsys.kobj.name); ++ kfree(cp); ++ } + return error; + } + EXPORT_SYMBOL_GPL(__class_register); diff --git a/drivers/base/component.c b/drivers/base/component.c index 5e79299f6c3ff..870485cbbb87c 100644 --- a/drivers/base/component.c @@ -68073,7 +82356,7 @@ index 5fc258073bc75..55405ebf23abf 100644 }; diff --git a/drivers/base/dd.c b/drivers/base/dd.c -index 68ea1f949daa9..63cc011188109 100644 +index 68ea1f949daa9..060348125635b 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -257,7 +257,6 @@ DEFINE_SHOW_ATTRIBUTE(deferred_devs); @@ -68175,7 +82458,7 @@ index 68ea1f949daa9..63cc011188109 100644 int ret; /* -@@ -1112,6 +1120,11 @@ static int __driver_attach(struct device *dev, void *data) +@@ -1112,9 +1120,18 @@ static int __driver_attach(struct device *dev, void *data) dev_dbg(dev, "Device match requests probe deferral\n"); dev->can_match = true; driver_deferred_probe_add(dev); @@ -68186,8 +82469,16 @@ index 68ea1f949daa9..63cc011188109 100644 + return 0; } else if (ret < 0) { dev_dbg(dev, "Bus failed to match device: %d\n", ret); - return ret; -@@ -1130,9 +1143,11 @@ static int __driver_attach(struct device *dev, void *data) +- return ret; ++ /* ++ * Driver could not match with device, but may match with ++ * another device on the bus. ++ */ ++ return 0; + } /* ret > 0 means positive match */ + + if (driver_allows_async_probing(drv)) { +@@ -1130,9 +1147,11 @@ static int __driver_attach(struct device *dev, void *data) if (!dev->driver) { get_device(dev); dev->p->async_driver = drv; @@ -68200,7 +82491,7 @@ index 68ea1f949daa9..63cc011188109 100644 return 0; } -@@ -1208,6 +1223,8 @@ static void __device_release_driver(struct device *dev, struct device *parent) +@@ -1208,6 +1227,8 @@ static void __device_release_driver(struct device *dev, struct device *parent) devres_release_all(dev); arch_teardown_dma_ops(dev); @@ -68398,7 +82689,7 @@ index c56d34f8158f7..5366d1b5359c8 100644 node_remove_accesses(node); node_remove_caches(node); diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c -index 5db704f02e712..7f3d21e6fdfb3 100644 +index 5db704f02e712..94fe30c187ad8 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -219,6 +219,9 @@ static void genpd_debug_remove(struct generic_pm_domain *genpd) @@ -68430,6 +82721,17 @@ index 5db704f02e712..7f3d21e6fdfb3 100644 cancel_work_sync(&genpd->power_off_work); if (genpd_is_cpu_domain(genpd)) free_cpumask_var(genpd->cpus); +@@ -2885,6 +2889,10 @@ static int genpd_iterate_idle_states(struct device_node *dn, + np = it.node; + if (!of_match_node(idle_state_match, np)) + continue; ++ ++ if (!of_device_is_available(np)) ++ continue; ++ + if (states) { + ret = genpd_parse_state(&states[i], np); + if (ret) { diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index cbea78e79f3df..8c4819fe73d4c 100644 --- a/drivers/base/power/main.c @@ -68702,7 +83004,7 @@ index cbea78e79f3df..8c4819fe73d4c 100644 bool dev_pm_skip_suspend(struct device *dev) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c -index ec94049442b99..3179c9265471b 100644 +index ec94049442b99..c1142a7a4fe65 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -305,16 +305,34 @@ static int rpm_get_suppliers(struct device *dev) @@ -68744,7 +83046,26 @@ index ec94049442b99..3179c9265471b 100644 if (try_to_suspend) pm_request_idle(link->supplier); } -@@ -1770,9 +1788,8 @@ void pm_runtime_drop_link(struct device_link *link) +@@ -466,7 +484,17 @@ static int rpm_idle(struct device *dev, int rpmflags) + + dev->power.idle_notification = true; + +- retval = __rpm_callback(callback, dev); ++ if (dev->power.irq_safe) ++ spin_unlock(&dev->power.lock); ++ else ++ spin_unlock_irq(&dev->power.lock); ++ ++ retval = callback(dev); ++ ++ if (dev->power.irq_safe) ++ spin_lock(&dev->power.lock); ++ else ++ spin_lock_irq(&dev->power.lock); + + dev->power.idle_notification = false; + wake_up_all(&dev->power.wait_queue); +@@ -1770,9 +1798,8 @@ void pm_runtime_drop_link(struct device_link *link) return; pm_runtime_drop_link_count(link->consumer); @@ -68840,7 +83161,7 @@ index 99bda0da23a82..8666590201c9a 100644 /** diff --git a/drivers/base/property.c b/drivers/base/property.c -index 453918eb7390c..735a23db1b5e9 100644 +index 453918eb7390c..17a648d643566 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -48,12 +48,14 @@ bool fwnode_property_present(const struct fwnode_handle *fwnode, @@ -69009,27 +83330,46 @@ index 453918eb7390c..735a23db1b5e9 100644 if (!fwnode_has_op(fwnode, device_is_available)) return true; -@@ -1045,14 +1067,14 @@ fwnode_graph_get_next_endpoint(const struct fwnode_handle *fwnode, - parent = fwnode_graph_get_port_parent(prev); - else +@@ -1033,25 +1055,31 @@ struct fwnode_handle * + fwnode_graph_get_next_endpoint(const struct fwnode_handle *fwnode, + struct fwnode_handle *prev) + { ++ struct fwnode_handle *ep, *port_parent = NULL; + const struct fwnode_handle *parent; +- struct fwnode_handle *ep; + + /* + * If this function is in a loop and the previous iteration returned + * an endpoint from fwnode->secondary, then we need to use the secondary + * as parent rather than @fwnode. + */ +- if (prev) +- parent = fwnode_graph_get_port_parent(prev); +- else ++ if (prev) { ++ port_parent = fwnode_graph_get_port_parent(prev); ++ parent = port_parent; ++ } else { parent = fwnode; ++ } + if (IS_ERR_OR_NULL(parent)) + return NULL; ep = fwnode_call_ptr_op(parent, graph_get_next_endpoint, prev); + if (ep) -+ return ep; ++ goto out_put_port_parent; - if (IS_ERR_OR_NULL(ep) && - !IS_ERR_OR_NULL(parent) && !IS_ERR_OR_NULL(parent->secondary)) - ep = fwnode_graph_get_next_endpoint(parent->secondary, NULL); -- -- return ep; -+ return fwnode_graph_get_next_endpoint(parent->secondary, NULL); ++ ep = fwnode_graph_get_next_endpoint(parent->secondary, NULL); + ++out_put_port_parent: ++ fwnode_handle_put(port_parent); + return ep; } EXPORT_SYMBOL_GPL(fwnode_graph_get_next_endpoint); - -@@ -1269,8 +1291,10 @@ fwnode_graph_devcon_match(struct fwnode_handle *fwnode, const char *con_id, +@@ -1269,8 +1297,10 @@ fwnode_graph_devcon_match(struct fwnode_handle *fwnode, const char *con_id, fwnode_graph_for_each_endpoint(fwnode, ep) { node = fwnode_graph_get_remote_port_parent(ep); @@ -69156,6 +83496,19 @@ index c46f6a8e14d23..3ba1232ce8451 100644 nargs_prop, sizeof(u32), &nargs_prop_val, 1); if (error) +diff --git a/drivers/base/test/test_async_driver_probe.c b/drivers/base/test/test_async_driver_probe.c +index 3bb7beb127a96..c157a912d6739 100644 +--- a/drivers/base/test/test_async_driver_probe.c ++++ b/drivers/base/test/test_async_driver_probe.c +@@ -146,7 +146,7 @@ static int __init test_async_probe_init(void) + calltime = ktime_get(); + for_each_online_cpu(cpu) { + nid = cpu_to_node(cpu); +- pdev = &sync_dev[sync_id]; ++ pdev = &async_dev[async_id]; + + *pdev = test_platform_device_register_node("test_async_driver", + async_id, diff --git a/drivers/base/topology.c b/drivers/base/topology.c index 43c0940643f5d..5df6d861bc21b 100644 --- a/drivers/base/topology.c @@ -69645,7 +83998,7 @@ index 5d9181382ce19..0a5766a2f1618 100644 struct drbd_peer_device *, struct peer_device_info *, diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c -index 55234a558e98b..d59af26d77032 100644 +index 55234a558e98b..eaf20a3324018 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -171,7 +171,7 @@ void tl_release(struct drbd_connection *connection, unsigned int barrier_nr, @@ -69670,7 +84023,42 @@ index 55234a558e98b..d59af26d77032 100644 list_for_each_entry_safe_from(req, r, &connection->transfer_log, tl_requests) { if (req->epoch != expect_epoch) break; -@@ -2737,6 +2740,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig +@@ -2241,7 +2244,8 @@ void drbd_destroy_device(struct kref *kref) + kref_put(&peer_device->connection->kref, drbd_destroy_connection); + kfree(peer_device); + } +- memset(device, 0xfd, sizeof(*device)); ++ if (device->submit.wq) ++ destroy_workqueue(device->submit.wq); + kfree(device); + kref_put(&resource->kref, drbd_destroy_resource); + } +@@ -2333,7 +2337,6 @@ void drbd_destroy_resource(struct kref *kref) + idr_destroy(&resource->devices); + free_cpumask_var(resource->cpu_mask); + kfree(resource->name); +- memset(resource, 0xf2, sizeof(*resource)); + kfree(resource); + } + +@@ -2674,7 +2677,6 @@ void drbd_destroy_connection(struct kref *kref) + drbd_free_socket(&connection->data); + kfree(connection->int_dig_in); + kfree(connection->int_dig_vv); +- memset(connection, 0xfc, sizeof(*connection)); + kfree(connection); + kref_put(&resource->kref, drbd_destroy_resource); + } +@@ -2696,7 +2698,7 @@ static int init_submitter(struct drbd_device *device) + enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor) + { + struct drbd_resource *resource = adm_ctx->resource; +- struct drbd_connection *connection; ++ struct drbd_connection *connection, *n; + struct drbd_device *device; + struct drbd_peer_device *peer_device, *tmp_peer_device; + struct gendisk *disk; +@@ -2737,6 +2739,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig sprintf(disk->disk_name, "drbd%d", minor); disk->private_data = device; @@ -69678,7 +84066,7 @@ index 55234a558e98b..d59af26d77032 100644 blk_queue_write_cache(disk->queue, true, true); /* Setting the max_hw_sectors to an odd value of 8kibyte here This triggers a max_bio_size message upon first attach or connect */ -@@ -2791,10 +2795,12 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig +@@ -2791,10 +2794,12 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig if (init_submitter(device)) { err = ERR_NOMEM; @@ -69689,20 +84077,25 @@ index 55234a558e98b..d59af26d77032 100644 - add_disk(disk); + err = add_disk(disk); + if (err) -+ goto out_idr_remove_from_resource; ++ goto out_destroy_workqueue; /* inherit the connection state */ device->state.conn = first_connection(resource)->cstate; -@@ -2808,8 +2814,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig +@@ -2808,10 +2813,10 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig drbd_debugfs_device_add(device); return NO_ERROR; -out_idr_remove_vol: - idr_remove(&connection->peer_devices, vnr); ++out_destroy_workqueue: ++ destroy_workqueue(device->submit.wq); out_idr_remove_from_resource: - for_each_connection(connection, resource) { +- for_each_connection(connection, resource) { ++ for_each_connection_safe(connection, n, resource) { peer_device = idr_remove(&connection->peer_devices, vnr); -@@ -3603,9 +3607,8 @@ const char *cmdname(enum drbd_packet cmd) + if (peer_device) + kref_put(&connection->kref, drbd_destroy_connection); +@@ -3603,9 +3608,8 @@ const char *cmdname(enum drbd_packet cmd) * when we want to support more than * one PRO_VERSION */ static const char *cmdnames[] = { @@ -69713,7 +84106,7 @@ index 55234a558e98b..d59af26d77032 100644 [P_DATA_REPLY] = "DataReply", [P_RS_DATA_REPLY] = "RSDataReply", [P_BARRIER] = "Barrier", -@@ -3616,7 +3619,6 @@ const char *cmdname(enum drbd_packet cmd) +@@ -3616,7 +3620,6 @@ const char *cmdname(enum drbd_packet cmd) [P_DATA_REQUEST] = "DataRequest", [P_RS_DATA_REQUEST] = "RSDataRequest", [P_SYNC_PARAM] = "SyncParam", @@ -69721,7 +84114,7 @@ index 55234a558e98b..d59af26d77032 100644 [P_PROTOCOL] = "ReportProtocol", [P_UUIDS] = "ReportUUIDs", [P_SIZES] = "ReportSizes", -@@ -3624,6 +3626,7 @@ const char *cmdname(enum drbd_packet cmd) +@@ -3624,6 +3627,7 @@ const char *cmdname(enum drbd_packet cmd) [P_SYNC_UUID] = "ReportSyncUUID", [P_AUTH_CHALLENGE] = "AuthChallenge", [P_AUTH_RESPONSE] = "AuthResponse", @@ -69729,7 +84122,7 @@ index 55234a558e98b..d59af26d77032 100644 [P_PING] = "Ping", [P_PING_ACK] = "PingAck", [P_RECV_ACK] = "RecvAck", -@@ -3634,23 +3637,25 @@ const char *cmdname(enum drbd_packet cmd) +@@ -3634,23 +3638,25 @@ const char *cmdname(enum drbd_packet cmd) [P_NEG_DREPLY] = "NegDReply", [P_NEG_RS_DREPLY] = "NegRSDReply", [P_BARRIER_ACK] = "BarrierAck", @@ -69928,7 +84321,7 @@ index 44ccf8b4f4b29..69184cf17b6ad 100644 int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c -index 5ca233644d705..47e0d105b462e 100644 +index 5ca233644d705..4281dc847bc22 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -180,7 +180,8 @@ void start_new_tl_epoch(struct drbd_connection *connection) @@ -69941,6 +84334,15 @@ index 5ca233644d705..47e0d105b462e 100644 bio_endio(m->bio); dec_ap_bio(device); } +@@ -1601,6 +1602,8 @@ blk_qc_t drbd_submit_bio(struct bio *bio) + struct drbd_device *device = bio->bi_bdev->bd_disk->private_data; + + blk_queue_split(&bio); ++ if (!bio) ++ return BLK_QC_T_NONE; + + /* + * what we "blindly" assume: diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index b8a27818ab3f8..4ee11aef6672b 100644 --- a/drivers/block/drbd/drbd_state.c @@ -70040,7 +84442,7 @@ index ba80f612d6abb..d5b0479bc9a66 100644 struct drbd_peer_device_state_change *, enum drbd_notification_type type); diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c -index fef79ea52e3ed..db0b3e8982fe5 100644 +index fef79ea52e3ed..4dc25a123d946 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -184,6 +184,7 @@ static int print_unex = 1; @@ -70227,7 +84629,19 @@ index fef79ea52e3ed..db0b3e8982fe5 100644 static bool floppy_available(int drive) { -@@ -4693,6 +4716,8 @@ static int __init do_floppy_init(void) +@@ -4564,8 +4587,10 @@ static int __init do_floppy_init(void) + goto out_put_disk; + + err = floppy_alloc_disk(drive, 0); +- if (err) ++ if (err) { ++ blk_mq_free_tag_set(&tag_sets[drive]); + goto out_put_disk; ++ } + + timer_setup(&motor_off_timer[drive], motor_off_callback, 0); + } +@@ -4693,6 +4718,8 @@ static int __init do_floppy_init(void) if (err) goto out_remove_drives; @@ -70236,7 +84650,7 @@ index fef79ea52e3ed..db0b3e8982fe5 100644 device_add_disk(&floppy_device[drive].dev, disks[drive][0], NULL); } -@@ -4703,7 +4728,8 @@ out_remove_drives: +@@ -4703,7 +4730,8 @@ out_remove_drives: while (drive--) { if (floppy_available(drive)) { del_gendisk(disks[drive][0]); @@ -70246,7 +84660,7 @@ index fef79ea52e3ed..db0b3e8982fe5 100644 } } out_release_dma: -@@ -4946,7 +4972,8 @@ static void __exit floppy_module_exit(void) +@@ -4946,7 +4974,8 @@ static void __exit floppy_module_exit(void) if (disks[drive][i]) del_gendisk(disks[drive][i]); } @@ -70257,7 +84671,7 @@ index fef79ea52e3ed..db0b3e8982fe5 100644 for (i = 0; i < ARRAY_SIZE(floppy_type); i++) { if (disks[drive][i]) diff --git a/drivers/block/loop.c b/drivers/block/loop.c -index 7bf4686af774e..79e485949b60d 100644 +index 7bf4686af774e..68a0c0fe64dd8 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -79,6 +79,7 @@ @@ -70372,7 +84786,25 @@ index 7bf4686af774e..79e485949b60d 100644 if (err) return err; -@@ -2442,7 +2440,7 @@ static int loop_control_remove(int idx) +@@ -2093,7 +2091,16 @@ static const struct block_device_operations lo_fops = { + /* + * And now the modules code and kernel interface. + */ +-static int max_loop; ++ ++/* ++ * If max_loop is specified, create that many devices upfront. ++ * This also becomes a hard limit. If max_loop is not specified, ++ * create CONFIG_BLK_DEV_LOOP_MIN_COUNT loop devices at module ++ * init time. Loop devices can be requested on-demand with the ++ * /dev/loop-control interface, or be instantiated by accessing ++ * a 'dead' device node. ++ */ ++static int max_loop = CONFIG_BLK_DEV_LOOP_MIN_COUNT; + module_param(max_loop, int, 0444); + MODULE_PARM_DESC(max_loop, "Maximum number of loop devices"); + module_param(max_part, int, 0444); +@@ -2442,7 +2449,7 @@ static int loop_control_remove(int idx) int ret; if (idx < 0) { @@ -70381,6 +84813,44 @@ index 7bf4686af774e..79e485949b60d 100644 return -EINVAL; } +@@ -2538,7 +2545,7 @@ MODULE_ALIAS("devname:loop-control"); + + static int __init loop_init(void) + { +- int i, nr; ++ int i; + int err; + + part_shift = 0; +@@ -2566,19 +2573,6 @@ static int __init loop_init(void) + goto err_out; + } + +- /* +- * If max_loop is specified, create that many devices upfront. +- * This also becomes a hard limit. If max_loop is not specified, +- * create CONFIG_BLK_DEV_LOOP_MIN_COUNT loop devices at module +- * init time. Loop devices can be requested on-demand with the +- * /dev/loop-control interface, or be instantiated by accessing +- * a 'dead' device node. +- */ +- if (max_loop) +- nr = max_loop; +- else +- nr = CONFIG_BLK_DEV_LOOP_MIN_COUNT; +- + err = misc_register(&loop_misc); + if (err < 0) + goto err_out; +@@ -2590,7 +2584,7 @@ static int __init loop_init(void) + } + + /* pre-create number of devices given by config or max_loop */ +- for (i = 0; i < nr; i++) ++ for (i = 0; i < max_loop; i++) + loop_add(i); + + printk(KERN_INFO "loop: module loaded\n"); diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 901855717cb53..ba61e72741eab 100644 --- a/drivers/block/mtip32xx/mtip32xx.c @@ -70408,7 +84878,7 @@ index 26798da661bd4..bcaabf038947c 100644 bio_for_each_segment(bvec, bio, iter) { diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c -index 1183f7872b713..ec2b5dd2ce4ad 100644 +index 1183f7872b713..c1ef1df42eb66 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -122,10 +122,10 @@ struct nbd_device { @@ -70609,23 +85079,21 @@ index 1183f7872b713..ec2b5dd2ce4ad 100644 /* * Tell the block layer that we are not a rotational device */ -@@ -1749,20 +1758,22 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs) +@@ -1747,22 +1756,14 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs) + refcount_set(&nbd->refs, 0); + INIT_LIST_HEAD(&nbd->list); disk->major = NBD_MAJOR; - - /* Too big first_minor can cause duplicate creation of +- +- /* Too big first_minor can cause duplicate creation of - * sysfs files/links, since first_minor will be truncated to - * byte in __device_add_disk(). -+ * sysfs files/links, since index << part_shift might overflow, or -+ * MKDEV() expect that the max bits of first_minor is 20. - */ +- */ disk->first_minor = index << part_shift; - if (disk->first_minor > 0xff) { -+ if (disk->first_minor < index || disk->first_minor > MINORMASK) { - err = -EINVAL; +- err = -EINVAL; - goto out_free_idr; -+ goto out_free_work; - } - +- } +- disk->minors = 1 << part_shift; disk->fops = &nbd_fops; disk->private_data = nbd; @@ -70637,7 +85105,7 @@ index 1183f7872b713..ec2b5dd2ce4ad 100644 /* * Now publish the device. -@@ -1771,6 +1782,10 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs) +@@ -1771,6 +1772,10 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs) nbd_total_devices++; return nbd; @@ -70648,7 +85116,28 @@ index 1183f7872b713..ec2b5dd2ce4ad 100644 out_free_idr: mutex_lock(&nbd_index_mutex); idr_remove(&nbd_index_idr, index); -@@ -1907,13 +1922,14 @@ again: +@@ -1856,8 +1861,19 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) + if (!netlink_capable(skb, CAP_SYS_ADMIN)) + return -EPERM; + +- if (info->attrs[NBD_ATTR_INDEX]) ++ if (info->attrs[NBD_ATTR_INDEX]) { + index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); ++ ++ /* ++ * Too big first_minor can cause duplicate creation of ++ * sysfs files/links, since index << part_shift might overflow, or ++ * MKDEV() expect that the max bits of first_minor is 20. ++ */ ++ if (index < 0 || index > MINORMASK >> part_shift) { ++ printk(KERN_ERR "nbd: illegal input index %d\n", index); ++ return -EINVAL; ++ } ++ } + if (!info->attrs[NBD_ATTR_SOCKETS]) { + printk(KERN_ERR "nbd: must specify at least one socket\n"); + return -EINVAL; +@@ -1907,13 +1923,14 @@ again: nbd_put(nbd); return -EINVAL; } @@ -70666,7 +85155,7 @@ index 1183f7872b713..ec2b5dd2ce4ad 100644 refcount_set(&nbd->config_refs, 1); set_bit(NBD_RT_BOUND, &config->runtime_flags); -@@ -2023,14 +2039,12 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd) +@@ -2023,14 +2040,12 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd) mutex_lock(&nbd->config_lock); nbd_disconnect(nbd); sock_shutdown(nbd); @@ -70684,7 +85173,7 @@ index 1183f7872b713..ec2b5dd2ce4ad 100644 nbd_clear_que(nbd); nbd->task_setup = NULL; mutex_unlock(&nbd->config_lock); -@@ -2135,7 +2149,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) +@@ -2135,7 +2150,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) mutex_lock(&nbd->config_lock); config = nbd->config; if (!test_bit(NBD_RT_BOUND, &config->runtime_flags) || @@ -70693,7 +85182,7 @@ index 1183f7872b713..ec2b5dd2ce4ad 100644 dev_err(nbd_to_dev(nbd), "not configured, cannot reconfigure\n"); ret = -EINVAL; -@@ -2473,6 +2487,12 @@ static void __exit nbd_cleanup(void) +@@ -2473,6 +2488,12 @@ static void __exit nbd_cleanup(void) struct nbd_device *nbd; LIST_HEAD(del_list); @@ -70706,7 +85195,7 @@ index 1183f7872b713..ec2b5dd2ce4ad 100644 nbd_dbg_close(); mutex_lock(&nbd_index_mutex); -@@ -2482,6 +2502,9 @@ static void __exit nbd_cleanup(void) +@@ -2482,6 +2503,9 @@ static void __exit nbd_cleanup(void) while (!list_empty(&del_list)) { nbd = list_first_entry(&del_list, struct nbd_device, list); list_del_init(&nbd->list); @@ -70716,7 +85205,7 @@ index 1183f7872b713..ec2b5dd2ce4ad 100644 if (refcount_read(&nbd->refs) != 1) printk(KERN_ERR "nbd: possibly leaking a device\n"); nbd_put(nbd); -@@ -2491,7 +2514,6 @@ static void __exit nbd_cleanup(void) +@@ -2491,7 +2515,6 @@ static void __exit nbd_cleanup(void) destroy_workqueue(nbd_del_wq); idr_destroy(&nbd_index_idr); @@ -70762,6 +85251,32 @@ index 187d779c8ca08..4c8b4101516c3 100644 out_cleanup_zone: null_free_zoned_dev(dev); out_cleanup_disk: +diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c +index 0f26b2510a756..ca2ab977ef8ef 100644 +--- a/drivers/block/pktcdvd.c ++++ b/drivers/block/pktcdvd.c +@@ -2407,6 +2407,8 @@ static blk_qc_t pkt_submit_bio(struct bio *bio) + struct bio *split; + + blk_queue_split(&bio); ++ if (!bio) ++ return BLK_QC_T_NONE; + + pd = bio->bi_bdev->bd_disk->queue->queuedata; + if (!pd) { +diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c +index c7b19e128b03c..c79aa4d8ccf73 100644 +--- a/drivers/block/ps3vram.c ++++ b/drivers/block/ps3vram.c +@@ -587,6 +587,8 @@ static blk_qc_t ps3vram_submit_bio(struct bio *bio) + dev_dbg(&dev->core, "%s\n", __func__); + + blk_queue_split(&bio); ++ if (!bio) ++ return BLK_QC_T_NONE; + + spin_lock_irq(&priv->lock); + busy = !bio_list_empty(&priv->list); diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index e65c9d706f6fb..c4a52f33604dc 100644 --- a/drivers/block/rbd.c @@ -70828,6 +85343,19 @@ index aafecfe970558..1896cde8135e4 100644 /* first remove sysfs itself to avoid deadlock */ sysfs_remove_file_self(&sess_dev->kobj, &attr->attr); rnbd_srv_destroy_dev_session_sysfs(sess_dev); +diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c +index 1cc40b0ea7619..6b253d99bc48d 100644 +--- a/drivers/block/rsxx/dev.c ++++ b/drivers/block/rsxx/dev.c +@@ -127,6 +127,8 @@ static blk_qc_t rsxx_submit_bio(struct bio *bio) + blk_status_t st = BLK_STS_IOERR; + + blk_queue_split(&bio); ++ if (!bio) ++ return BLK_QC_T_NONE; + + might_sleep(); + diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 7ccc8d2a41bc6..3911d0833e1b9 100644 --- a/drivers/block/swim.c @@ -71841,7 +86369,7 @@ index e4182acee488c..a18f289d73466 100644 { 0x6106, "BCM4359C0" }, /* 003.001.006 */ { 0x4106, "BCM4335A0" }, /* 002.001.006 */ diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c -index f1705b46fc889..de3d851d85e7b 100644 +index f1705b46fc889..d707aa63e9441 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -2193,8 +2193,15 @@ static int btintel_setup_combined(struct hci_dev *hdev) @@ -71934,10 +86462,12 @@ index f1705b46fc889..de3d851d85e7b 100644 * * Also, it is not easy to convert TLV based version from the * legacy version format. -@@ -2344,6 +2354,19 @@ static int btintel_setup_combined(struct hci_dev *hdev) +@@ -2343,7 +2353,20 @@ static int btintel_setup_combined(struct hci_dev *hdev) + */ err = btintel_read_version(hdev, &ver); if (err) - return err; +- return err; ++ break; + + /* Apply the device specific HCI quirks + * @@ -71954,7 +86484,17 @@ index f1705b46fc889..de3d851d85e7b 100644 err = btintel_bootloader_setup(hdev, &ver); break; case 0x17: -@@ -2399,9 +2422,10 @@ static int btintel_shutdown_combined(struct hci_dev *hdev) +@@ -2371,7 +2394,8 @@ static int btintel_setup_combined(struct hci_dev *hdev) + default: + bt_dev_err(hdev, "Unsupported Intel hw variant (%u)", + INTEL_HW_VARIANT(ver_tlv.cnvi_bt)); +- return -EINVAL; ++ err = -EINVAL; ++ break; + } + + exit_error: +@@ -2399,9 +2423,10 @@ static int btintel_shutdown_combined(struct hci_dev *hdev) /* Some platforms have an issue with BT LED when the interface is * down or BT radio is turned off, which takes 5 seconds to BT LED @@ -72074,7 +86614,7 @@ index e9d91d7c0db48..9ba22b13b4fa0 100644 /* Parse and handle the return WMT event */ diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c -index 60d2fce59a71d..64d72ea0c3108 100644 +index 60d2fce59a71d..9c32263f872b9 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -59,7 +59,9 @@ static struct usb_driver btusb_driver; @@ -72214,7 +86754,36 @@ index 60d2fce59a71d..64d72ea0c3108 100644 { USB_DEVICE(0x04c5, 0x161f), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0b05, 0x18ef), .driver_info = BTUSB_REALTEK | -@@ -2217,6 +2280,7 @@ static void btusb_mtk_wmt_recv(struct urb *urb) +@@ -672,13 +735,13 @@ static inline void btusb_free_frags(struct btusb_data *data) + + spin_lock_irqsave(&data->rxlock, flags); + +- kfree_skb(data->evt_skb); ++ dev_kfree_skb_irq(data->evt_skb); + data->evt_skb = NULL; + +- kfree_skb(data->acl_skb); ++ dev_kfree_skb_irq(data->acl_skb); + data->acl_skb = NULL; + +- kfree_skb(data->sco_skb); ++ dev_kfree_skb_irq(data->sco_skb); + data->sco_skb = NULL; + + spin_unlock_irqrestore(&data->rxlock, flags); +@@ -1838,6 +1901,11 @@ static int btusb_setup_csr(struct hci_dev *hdev) + + rp = (struct hci_rp_read_local_version *)skb->data; + ++ bt_dev_info(hdev, "CSR: Setting up dongle with HCI ver=%u rev=%04x; LMP ver=%u subver=%04x; manufacturer=%u", ++ le16_to_cpu(rp->hci_ver), le16_to_cpu(rp->hci_rev), ++ le16_to_cpu(rp->lmp_ver), le16_to_cpu(rp->lmp_subver), ++ le16_to_cpu(rp->manufacturer)); ++ + /* Detect a wide host of Chinese controllers that aren't CSR. + * + * Known fake bcdDevices: 0x0100, 0x0134, 0x1915, 0x2520, 0x7558, 0x8891 +@@ -2217,6 +2285,7 @@ static void btusb_mtk_wmt_recv(struct urb *urb) skb = bt_skb_alloc(HCI_WMT_MAX_EVENT_SIZE, GFP_ATOMIC); if (!skb) { hdev->stat.err_rx++; @@ -72222,7 +86791,7 @@ index 60d2fce59a71d..64d72ea0c3108 100644 return; } -@@ -2237,6 +2301,7 @@ static void btusb_mtk_wmt_recv(struct urb *urb) +@@ -2237,6 +2306,7 @@ static void btusb_mtk_wmt_recv(struct urb *urb) data->evt_skb = skb_clone(skb, GFP_ATOMIC); if (!data->evt_skb) { kfree_skb(skb); @@ -72230,7 +86799,7 @@ index 60d2fce59a71d..64d72ea0c3108 100644 return; } } -@@ -2245,6 +2310,7 @@ static void btusb_mtk_wmt_recv(struct urb *urb) +@@ -2245,6 +2315,7 @@ static void btusb_mtk_wmt_recv(struct urb *urb) if (err < 0) { kfree_skb(data->evt_skb); data->evt_skb = NULL; @@ -72238,7 +86807,7 @@ index 60d2fce59a71d..64d72ea0c3108 100644 return; } -@@ -2255,6 +2321,7 @@ static void btusb_mtk_wmt_recv(struct urb *urb) +@@ -2255,6 +2326,7 @@ static void btusb_mtk_wmt_recv(struct urb *urb) wake_up_bit(&data->flags, BTUSB_TX_WAIT_VND_EVT); } @@ -72246,7 +86815,7 @@ index 60d2fce59a71d..64d72ea0c3108 100644 return; } else if (urb->status == -ENOENT) { /* Avoid suspend failed when usb_kill_urb */ -@@ -2275,6 +2342,7 @@ static void btusb_mtk_wmt_recv(struct urb *urb) +@@ -2275,6 +2347,7 @@ static void btusb_mtk_wmt_recv(struct urb *urb) usb_anchor_urb(urb, &data->ctrl_anchor); err = usb_submit_urb(urb, GFP_ATOMIC); if (err < 0) { @@ -72254,7 +86823,7 @@ index 60d2fce59a71d..64d72ea0c3108 100644 /* -EPERM: urb is being killed; * -ENODEV: device got disconnected */ -@@ -2367,15 +2435,29 @@ static int btusb_mtk_hci_wmt_sync(struct hci_dev *hdev, +@@ -2367,15 +2440,29 @@ static int btusb_mtk_hci_wmt_sync(struct hci_dev *hdev, set_bit(BTUSB_TX_WAIT_VND_EVT, &data->flags); @@ -72284,7 +86853,7 @@ index 60d2fce59a71d..64d72ea0c3108 100644 if (err < 0) goto err_free_wc; -@@ -2515,6 +2597,7 @@ static int btusb_mtk_setup_firmware_79xx(struct hci_dev *hdev, const char *fwnam +@@ -2515,6 +2602,7 @@ static int btusb_mtk_setup_firmware_79xx(struct hci_dev *hdev, const char *fwnam } else { bt_dev_err(hdev, "Failed wmt patch dwnld status (%d)", status); @@ -72292,7 +86861,7 @@ index 60d2fce59a71d..64d72ea0c3108 100644 goto err_release_fw; } } -@@ -2804,11 +2887,16 @@ static int btusb_mtk_setup(struct hci_dev *hdev) +@@ -2804,11 +2892,16 @@ static int btusb_mtk_setup(struct hci_dev *hdev) case 0x7668: fwname = FIRMWARE_MT7668; break; @@ -72309,7 +86878,7 @@ index 60d2fce59a71d..64d72ea0c3108 100644 /* It's Device EndPoint Reset Option Register */ btusb_mtk_uhw_reg_write(data, MTK_EP_RST_OPT, MTK_EP_RST_IN_OUT_OPT); -@@ -2828,6 +2916,7 @@ static int btusb_mtk_setup(struct hci_dev *hdev) +@@ -2828,6 +2921,7 @@ static int btusb_mtk_setup(struct hci_dev *hdev) } hci_set_msft_opcode(hdev, 0xFD30); @@ -72317,7 +86886,7 @@ index 60d2fce59a71d..64d72ea0c3108 100644 goto done; default: bt_dev_err(hdev, "Unsupported hardware variant (%08x)", -@@ -3806,8 +3895,14 @@ static int btusb_probe(struct usb_interface *intf, +@@ -3806,8 +3900,14 @@ static int btusb_probe(struct usb_interface *intf, hdev->send = btusb_send_frame_intel; hdev->cmd_timeout = btusb_intel_cmd_timeout; @@ -72361,10 +86930,32 @@ index ef54afa293574..cf622e4596055 100644 { }, }; MODULE_DEVICE_TABLE(of, bcm_bluetooth_of_match); +diff --git a/drivers/bluetooth/hci_bcsp.c b/drivers/bluetooth/hci_bcsp.c +index cf4a560958173..8055f63603f45 100644 +--- a/drivers/bluetooth/hci_bcsp.c ++++ b/drivers/bluetooth/hci_bcsp.c +@@ -378,7 +378,7 @@ static void bcsp_pkt_cull(struct bcsp_struct *bcsp) + i++; + + __skb_unlink(skb, &bcsp->unack); +- kfree_skb(skb); ++ dev_kfree_skb_irq(skb); + } + + if (skb_queue_empty(&bcsp->unack)) diff --git a/drivers/bluetooth/hci_h5.c b/drivers/bluetooth/hci_h5.c -index 0c0dedece59c5..e0ea9d25bb393 100644 +index 0c0dedece59c5..1363b21c81b73 100644 --- a/drivers/bluetooth/hci_h5.c +++ b/drivers/bluetooth/hci_h5.c +@@ -313,7 +313,7 @@ static void h5_pkt_cull(struct h5 *h5) + break; + + __skb_unlink(skb, &h5->unack); +- kfree_skb(skb); ++ dev_kfree_skb_irq(skb); + } + + if (skb_queue_empty(&h5->unack)) @@ -587,9 +587,11 @@ static int h5_recv(struct hci_uart *hu, const void *data, int count) count -= processed; } @@ -72485,8 +87076,21 @@ index 5ed2cfa7da1d9..2d960a5e36793 100644 /* Flush any pending characters in the driver */ tty_driver_flush_buffer(tty); +diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c +index eb1e736efeebb..e4e5b26e2c33b 100644 +--- a/drivers/bluetooth/hci_ll.c ++++ b/drivers/bluetooth/hci_ll.c +@@ -345,7 +345,7 @@ static int ll_enqueue(struct hci_uart *hu, struct sk_buff *skb) + default: + BT_ERR("illegal hcill state: %ld (losing packet)", + ll->hcill_state); +- kfree_skb(skb); ++ dev_kfree_skb_irq(skb); + break; + } + diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c -index 53deea2eb7b4d..ed91af4319b5b 100644 +index 53deea2eb7b4d..e45777b3f5dac 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -696,9 +696,9 @@ static int qca_close(struct hci_uart *hu) @@ -72501,6 +87105,15 @@ index 53deea2eb7b4d..ed91af4319b5b 100644 qca->hu = NULL; kfree_skb(qca->rx_skb); +@@ -912,7 +912,7 @@ static int qca_enqueue(struct hci_uart *hu, struct sk_buff *skb) + default: + BT_ERR("Illegal tx state: %d (losing packet)", + qca->tx_ibs_state); +- kfree_skb(skb); ++ dev_kfree_skb_irq(skb); + break; + } + @@ -1927,6 +1927,9 @@ static int qca_power_off(struct hci_dev *hdev) hu->hdev->hw_error = NULL; hu->hdev->cmd_timeout = NULL; @@ -72537,6 +87150,24 @@ index 53deea2eb7b4d..ed91af4319b5b 100644 dev_warn(&serdev->dev, "failed to acquire enable gpio\n"); power_ctrl_enabled = false; } +@@ -2153,10 +2156,17 @@ static void qca_serdev_shutdown(struct device *dev) + int timeout = msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS); + struct serdev_device *serdev = to_serdev_device(dev); + struct qca_serdev *qcadev = serdev_device_get_drvdata(serdev); ++ struct hci_uart *hu = &qcadev->serdev_hu; ++ struct hci_dev *hdev = hu->hdev; ++ struct qca_data *qca = hu->priv; + const u8 ibs_wake_cmd[] = { 0xFD }; + const u8 edl_reset_soc_cmd[] = { 0x01, 0x00, 0xFC, 0x01, 0x05 }; + + if (qcadev->btsoc_type == QCA_QCA6390) { ++ if (test_bit(QCA_BT_OFF, &qca->flags) || ++ !test_bit(HCI_RUNNING, &hdev->flags)) ++ return; ++ + serdev_device_write_flush(serdev); + ret = serdev_device_write_buf(serdev, ibs_wake_cmd, + sizeof(ibs_wake_cmd)); diff --git a/drivers/bluetooth/hci_serdev.c b/drivers/bluetooth/hci_serdev.c index 3b00d82d36cf7..649d112eea787 100644 --- a/drivers/bluetooth/hci_serdev.c @@ -72593,7 +87224,7 @@ index 8ab26dec5f6e8..8469f9876dd26 100644 BT_ERR("Can't register HCI device"); hci_free_dev(hdev); diff --git a/drivers/bluetooth/virtio_bt.c b/drivers/bluetooth/virtio_bt.c -index 57908ce4fae85..076e4942a3f0e 100644 +index 57908ce4fae85..612f10456849f 100644 --- a/drivers/bluetooth/virtio_bt.c +++ b/drivers/bluetooth/virtio_bt.c @@ -202,6 +202,9 @@ static void virtbt_rx_handle(struct virtio_bluetooth *vbt, struct sk_buff *skb) @@ -72606,6 +87237,15 @@ index 57908ce4fae85..076e4942a3f0e 100644 } } +@@ -216,7 +219,7 @@ static void virtbt_rx_work(struct work_struct *work) + if (!skb) + return; + +- skb->len = len; ++ skb_put(skb, len); + virtbt_rx_handle(vbt, skb); + + if (virtbt_add_inbuf(vbt) < 0) diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c index 8fd4a356a86ec..74593a1722fe0 100644 --- a/drivers/bus/fsl-mc/fsl-mc-bus.c @@ -72675,6 +87315,41 @@ index 378f5d62a9912..e7eaa8784fee0 100644 fail: hisi_lpc_acpi_remove(hostdev); return ret; +diff --git a/drivers/bus/intel-ixp4xx-eb.c b/drivers/bus/intel-ixp4xx-eb.c +index a4388440aca7a..91db001eb69a6 100644 +--- a/drivers/bus/intel-ixp4xx-eb.c ++++ b/drivers/bus/intel-ixp4xx-eb.c +@@ -49,7 +49,7 @@ + #define IXP4XX_EXP_SIZE_SHIFT 10 + #define IXP4XX_EXP_CNFG_0 BIT(9) /* Always zero */ + #define IXP43X_EXP_SYNC_INTEL BIT(8) /* Only on IXP43x */ +-#define IXP43X_EXP_EXP_CHIP BIT(7) /* Only on IXP43x */ ++#define IXP43X_EXP_EXP_CHIP BIT(7) /* Only on IXP43x, dangerous to touch on IXP42x */ + #define IXP4XX_EXP_BYTE_RD16 BIT(6) + #define IXP4XX_EXP_HRDY_POL BIT(5) /* Only on IXP42x */ + #define IXP4XX_EXP_MUX_EN BIT(4) +@@ -57,8 +57,6 @@ + #define IXP4XX_EXP_WORD BIT(2) /* Always zero */ + #define IXP4XX_EXP_WR_EN BIT(1) + #define IXP4XX_EXP_BYTE_EN BIT(0) +-#define IXP42X_RESERVED (BIT(30)|IXP4XX_EXP_CNFG_0|BIT(8)|BIT(7)|IXP4XX_EXP_WORD) +-#define IXP43X_RESERVED (BIT(30)|IXP4XX_EXP_CNFG_0|BIT(5)|IXP4XX_EXP_WORD) + + #define IXP4XX_EXP_CNFG0 0x20 + #define IXP4XX_EXP_CNFG0_MEM_MAP BIT(31) +@@ -252,10 +250,9 @@ static void ixp4xx_exp_setup_chipselect(struct ixp4xx_eb *eb, + cs_cfg |= val << IXP4XX_EXP_CYC_TYPE_SHIFT; + } + +- if (eb->is_42x) +- cs_cfg &= ~IXP42X_RESERVED; + if (eb->is_43x) { +- cs_cfg &= ~IXP43X_RESERVED; ++ /* Should always be zero */ ++ cs_cfg &= ~IXP4XX_EXP_WORD; + /* + * This bit for Intel strata flash is currently unused, but let's + * report it if we find one. diff --git a/drivers/bus/mhi/core/debugfs.c b/drivers/bus/mhi/core/debugfs.c index 858d7516410bb..d818586c229d2 100644 --- a/drivers/bus/mhi/core/debugfs.c @@ -73147,7 +87822,7 @@ index b15c5bc37dd4f..9a94b8d66f575 100644 dev_err(&mhi_cntrl->mhi_dev->dev, "Event ring rp points outside of the event ring\n"); diff --git a/drivers/bus/mhi/core/pm.c b/drivers/bus/mhi/core/pm.c -index fb99e3727155b..1020268a075a5 100644 +index fb99e3727155b..1a87b9c6c2f89 100644 --- a/drivers/bus/mhi/core/pm.c +++ b/drivers/bus/mhi/core/pm.c @@ -218,7 +218,7 @@ int mhi_ready_state_transition(struct mhi_controller *mhi_cntrl) @@ -73159,7 +87834,17 @@ index fb99e3727155b..1020268a075a5 100644 /* Update all cores */ smp_wmb(); -@@ -420,7 +420,7 @@ static int mhi_pm_mission_mode_transition(struct mhi_controller *mhi_cntrl) +@@ -297,7 +297,8 @@ int mhi_pm_m0_transition(struct mhi_controller *mhi_cntrl) + read_lock_irq(&mhi_chan->lock); + + /* Only ring DB if ring is not empty */ +- if (tre_ring->base && tre_ring->wp != tre_ring->rp) ++ if (tre_ring->base && tre_ring->wp != tre_ring->rp && ++ mhi_chan->ch_state == MHI_CH_STATE_ENABLED) + mhi_ring_chan_db(mhi_cntrl, mhi_chan); + read_unlock_irq(&mhi_chan->lock); + } +@@ -420,7 +421,7 @@ static int mhi_pm_mission_mode_transition(struct mhi_controller *mhi_cntrl) continue; ring->wp = ring->base + ring->len - ring->el_size; @@ -73168,7 +87853,7 @@ index fb99e3727155b..1020268a075a5 100644 /* Update to all cores */ smp_wmb(); -@@ -881,7 +881,7 @@ int mhi_pm_suspend(struct mhi_controller *mhi_cntrl) +@@ -881,7 +882,7 @@ int mhi_pm_suspend(struct mhi_controller *mhi_cntrl) } EXPORT_SYMBOL_GPL(mhi_pm_suspend); @@ -73177,7 +87862,7 @@ index fb99e3727155b..1020268a075a5 100644 { struct mhi_chan *itr, *tmp; struct device *dev = &mhi_cntrl->mhi_dev->dev; -@@ -898,8 +898,12 @@ int mhi_pm_resume(struct mhi_controller *mhi_cntrl) +@@ -898,8 +899,12 @@ int mhi_pm_resume(struct mhi_controller *mhi_cntrl) if (MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state)) return -EIO; @@ -73192,7 +87877,7 @@ index fb99e3727155b..1020268a075a5 100644 /* Notify clients about exiting LPM */ list_for_each_entry_safe(itr, tmp, &mhi_cntrl->lpm_chans, node) { -@@ -940,8 +944,19 @@ int mhi_pm_resume(struct mhi_controller *mhi_cntrl) +@@ -940,8 +945,19 @@ int mhi_pm_resume(struct mhi_controller *mhi_cntrl) return 0; } @@ -73212,7 +87897,7 @@ index fb99e3727155b..1020268a075a5 100644 int __mhi_device_get_sync(struct mhi_controller *mhi_cntrl) { int ret; -@@ -1038,7 +1053,7 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl) +@@ -1038,7 +1054,7 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl) enum mhi_ee_type current_ee; enum dev_st_transition next_state; struct device *dev = &mhi_cntrl->mhi_dev->dev; @@ -73221,7 +87906,7 @@ index fb99e3727155b..1020268a075a5 100644 int ret; dev_info(dev, "Requested to power ON\n"); -@@ -1055,10 +1070,6 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl) +@@ -1055,10 +1071,6 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl) mutex_lock(&mhi_cntrl->pm_mutex); mhi_cntrl->pm_state = MHI_PM_DISABLE; @@ -73232,7 +87917,7 @@ index fb99e3727155b..1020268a075a5 100644 /* Setup BHI INTVEC */ write_lock_irq(&mhi_cntrl->pm_lock); mhi_write_reg(mhi_cntrl, mhi_cntrl->bhi, BHI_INTVEC, 0); -@@ -1072,7 +1083,7 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl) +@@ -1072,7 +1084,7 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl) dev_err(dev, "%s is not a valid EE for power on\n", TO_MHI_EXEC_STR(current_ee)); ret = -EIO; @@ -73241,7 +87926,7 @@ index fb99e3727155b..1020268a075a5 100644 } state = mhi_get_mhi_state(mhi_cntrl); -@@ -1081,20 +1092,12 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl) +@@ -1081,20 +1093,12 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl) if (state == MHI_STATE_SYS_ERR) { mhi_set_mhi_state(mhi_cntrl, MHI_STATE_RESET); @@ -73267,7 +87952,7 @@ index fb99e3727155b..1020268a075a5 100644 } /* -@@ -1104,6 +1107,10 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl) +@@ -1104,6 +1108,10 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl) mhi_write_reg(mhi_cntrl, mhi_cntrl->bhi, BHI_INTVEC, 0); } @@ -73278,7 +87963,7 @@ index fb99e3727155b..1020268a075a5 100644 /* Transition to next state */ next_state = MHI_IN_PBL(current_ee) ? DEV_ST_TRANSITION_PBL : DEV_ST_TRANSITION_READY; -@@ -1116,10 +1123,7 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl) +@@ -1116,10 +1124,7 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl) return 0; @@ -73445,7 +88130,7 @@ index 626dedd110cbc..fca0d0669aa97 100644 return res.start; } diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c -index 6f225dddc74f4..60b082fe2ed02 100644 +index 6f225dddc74f4..fac8627b04e34 100644 --- a/drivers/bus/sunxi-rsb.c +++ b/drivers/bus/sunxi-rsb.c @@ -227,6 +227,8 @@ static struct sunxi_rsb_device *sunxi_rsb_device_create(struct sunxi_rsb *rsb, @@ -73457,7 +88142,68 @@ index 6f225dddc74f4..60b082fe2ed02 100644 err_device_add: put_device(&rdev->dev); -@@ -687,11 +689,11 @@ err_clk_disable: +@@ -269,6 +271,9 @@ EXPORT_SYMBOL_GPL(sunxi_rsb_driver_register); + /* common code that starts a transfer */ + static int _sunxi_rsb_run_xfer(struct sunxi_rsb *rsb) + { ++ u32 int_mask, status; ++ bool timeout; ++ + if (readl(rsb->regs + RSB_CTRL) & RSB_CTRL_START_TRANS) { + dev_dbg(rsb->dev, "RSB transfer still in progress\n"); + return -EBUSY; +@@ -276,13 +281,23 @@ static int _sunxi_rsb_run_xfer(struct sunxi_rsb *rsb) + + reinit_completion(&rsb->complete); + +- writel(RSB_INTS_LOAD_BSY | RSB_INTS_TRANS_ERR | RSB_INTS_TRANS_OVER, +- rsb->regs + RSB_INTE); ++ int_mask = RSB_INTS_LOAD_BSY | RSB_INTS_TRANS_ERR | RSB_INTS_TRANS_OVER; ++ writel(int_mask, rsb->regs + RSB_INTE); + writel(RSB_CTRL_START_TRANS | RSB_CTRL_GLOBAL_INT_ENB, + rsb->regs + RSB_CTRL); + +- if (!wait_for_completion_io_timeout(&rsb->complete, +- msecs_to_jiffies(100))) { ++ if (irqs_disabled()) { ++ timeout = readl_poll_timeout_atomic(rsb->regs + RSB_INTS, ++ status, (status & int_mask), ++ 10, 100000); ++ writel(status, rsb->regs + RSB_INTS); ++ } else { ++ timeout = !wait_for_completion_io_timeout(&rsb->complete, ++ msecs_to_jiffies(100)); ++ status = rsb->status; ++ } ++ ++ if (timeout) { + dev_dbg(rsb->dev, "RSB timeout\n"); + + /* abort the transfer */ +@@ -294,18 +309,18 @@ static int _sunxi_rsb_run_xfer(struct sunxi_rsb *rsb) + return -ETIMEDOUT; + } + +- if (rsb->status & RSB_INTS_LOAD_BSY) { ++ if (status & RSB_INTS_LOAD_BSY) { + dev_dbg(rsb->dev, "RSB busy\n"); + return -EBUSY; + } + +- if (rsb->status & RSB_INTS_TRANS_ERR) { +- if (rsb->status & RSB_INTS_TRANS_ERR_ACK) { ++ if (status & RSB_INTS_TRANS_ERR) { ++ if (status & RSB_INTS_TRANS_ERR_ACK) { + dev_dbg(rsb->dev, "RSB slave nack\n"); + return -EINVAL; + } + +- if (rsb->status & RSB_INTS_TRANS_ERR_DATA) { ++ if (status & RSB_INTS_TRANS_ERR_DATA) { + dev_dbg(rsb->dev, "RSB transfer data error\n"); + return -EIO; + } +@@ -687,11 +702,11 @@ err_clk_disable: static void sunxi_rsb_hw_exit(struct sunxi_rsb *rsb) { @@ -73473,6 +88219,44 @@ index 6f225dddc74f4..60b082fe2ed02 100644 } static int __maybe_unused sunxi_rsb_runtime_suspend(struct device *dev) +@@ -814,14 +829,6 @@ static int sunxi_rsb_remove(struct platform_device *pdev) + return 0; + } + +-static void sunxi_rsb_shutdown(struct platform_device *pdev) +-{ +- struct sunxi_rsb *rsb = platform_get_drvdata(pdev); +- +- pm_runtime_disable(&pdev->dev); +- sunxi_rsb_hw_exit(rsb); +-} +- + static const struct dev_pm_ops sunxi_rsb_dev_pm_ops = { + SET_RUNTIME_PM_OPS(sunxi_rsb_runtime_suspend, + sunxi_rsb_runtime_resume, NULL) +@@ -837,7 +844,6 @@ MODULE_DEVICE_TABLE(of, sunxi_rsb_of_match_table); + static struct platform_driver sunxi_rsb_driver = { + .probe = sunxi_rsb_probe, + .remove = sunxi_rsb_remove, +- .shutdown = sunxi_rsb_shutdown, + .driver = { + .name = RSB_CTRL_NAME, + .of_match_table = sunxi_rsb_of_match_table, +@@ -855,7 +861,13 @@ static int __init sunxi_rsb_init(void) + return ret; + } + +- return platform_driver_register(&sunxi_rsb_driver); ++ ret = platform_driver_register(&sunxi_rsb_driver); ++ if (ret) { ++ bus_unregister(&sunxi_rsb_bus); ++ return ret; ++ } ++ ++ return 0; + } + module_init(sunxi_rsb_init); + diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index 6a8b7fb5be58d..e93912e56f28c 100644 --- a/drivers/bus/ti-sysc.c @@ -73916,6 +88700,53 @@ index 239eca4d68055..650c7d9180802 100644 default HW_RANDOM help This driver provides kernel-side support for the Random Number +diff --git a/drivers/char/hw_random/amd-rng.c b/drivers/char/hw_random/amd-rng.c +index c22d4184bb612..0555e3838bce1 100644 +--- a/drivers/char/hw_random/amd-rng.c ++++ b/drivers/char/hw_random/amd-rng.c +@@ -143,15 +143,19 @@ static int __init amd_rng_mod_init(void) + found: + err = pci_read_config_dword(pdev, 0x58, &pmbase); + if (err) +- return err; ++ goto put_dev; + + pmbase &= 0x0000FF00; +- if (pmbase == 0) +- return -EIO; ++ if (pmbase == 0) { ++ err = -EIO; ++ goto put_dev; ++ } + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); +- if (!priv) +- return -ENOMEM; ++ if (!priv) { ++ err = -ENOMEM; ++ goto put_dev; ++ } + + if (!request_region(pmbase + PMBASE_OFFSET, PMBASE_SIZE, DRV_NAME)) { + dev_err(&pdev->dev, DRV_NAME " region 0x%x already in use!\n", +@@ -185,6 +189,8 @@ err_iomap: + release_region(pmbase + PMBASE_OFFSET, PMBASE_SIZE); + out: + kfree(priv); ++put_dev: ++ pci_dev_put(pdev); + return err; + } + +@@ -200,6 +206,8 @@ static void __exit amd_rng_mod_exit(void) + + release_region(priv->pmbase + PMBASE_OFFSET, PMBASE_SIZE); + ++ pci_dev_put(priv->pcidev); ++ + kfree(priv); + } + diff --git a/drivers/char/hw_random/arm_smccc_trng.c b/drivers/char/hw_random/arm_smccc_trng.c index b24ac39a903b3..e34c3ea692b6c 100644 --- a/drivers/char/hw_random/arm_smccc_trng.c @@ -74244,6 +89075,91 @@ index a3db27916256d..cfb085de876b7 100644 #include <linux/kernel.h> #include <linux/kthread.h> #include <linux/sched/signal.h> +diff --git a/drivers/char/hw_random/geode-rng.c b/drivers/char/hw_random/geode-rng.c +index 138ce434f86b2..12fbe80918319 100644 +--- a/drivers/char/hw_random/geode-rng.c ++++ b/drivers/char/hw_random/geode-rng.c +@@ -51,6 +51,10 @@ static const struct pci_device_id pci_tbl[] = { + }; + MODULE_DEVICE_TABLE(pci, pci_tbl); + ++struct amd_geode_priv { ++ struct pci_dev *pcidev; ++ void __iomem *membase; ++}; + + static int geode_rng_data_read(struct hwrng *rng, u32 *data) + { +@@ -90,6 +94,7 @@ static int __init geode_rng_init(void) + const struct pci_device_id *ent; + void __iomem *mem; + unsigned long rng_base; ++ struct amd_geode_priv *priv; + + for_each_pci_dev(pdev) { + ent = pci_match_id(pci_tbl, pdev); +@@ -97,17 +102,26 @@ static int __init geode_rng_init(void) + goto found; + } + /* Device not found. */ +- goto out; ++ return err; + + found: ++ priv = kzalloc(sizeof(*priv), GFP_KERNEL); ++ if (!priv) { ++ err = -ENOMEM; ++ goto put_dev; ++ } ++ + rng_base = pci_resource_start(pdev, 0); + if (rng_base == 0) +- goto out; ++ goto free_priv; + err = -ENOMEM; + mem = ioremap(rng_base, 0x58); + if (!mem) +- goto out; +- geode_rng.priv = (unsigned long)mem; ++ goto free_priv; ++ ++ geode_rng.priv = (unsigned long)priv; ++ priv->membase = mem; ++ priv->pcidev = pdev; + + pr_info("AMD Geode RNG detected\n"); + err = hwrng_register(&geode_rng); +@@ -116,20 +130,26 @@ found: + err); + goto err_unmap; + } +-out: + return err; + + err_unmap: + iounmap(mem); +- goto out; ++free_priv: ++ kfree(priv); ++put_dev: ++ pci_dev_put(pdev); ++ return err; + } + + static void __exit geode_rng_exit(void) + { +- void __iomem *mem = (void __iomem *)geode_rng.priv; ++ struct amd_geode_priv *priv; + ++ priv = (struct amd_geode_priv *)geode_rng.priv; + hwrng_unregister(&geode_rng); +- iounmap(mem); ++ iounmap(priv->membase); ++ pci_dev_put(priv->pcidev); ++ kfree(priv); + } + + module_init(geode_rng_init); diff --git a/drivers/char/hw_random/imx-rngc.c b/drivers/char/hw_random/imx-rngc.c index b05d676ca814c..2964efeb71c33 100644 --- a/drivers/char/hw_random/imx-rngc.c @@ -74331,7 +89247,7 @@ index e0d77fa048fb6..f06e4f95114f9 100644 return -EIO; diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c -index e96cb5c4f97a3..2badf36d4816c 100644 +index e96cb5c4f97a3..15c211c5d6f4e 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -11,8 +11,8 @@ @@ -74363,7 +89279,26 @@ index e96cb5c4f97a3..2badf36d4816c 100644 } static void _ipmi_destroy_user(struct ipmi_user *user) -@@ -2930,7 +2932,7 @@ cleanup_bmc_device(struct kref *ref) +@@ -1271,6 +1273,7 @@ static void _ipmi_destroy_user(struct ipmi_user *user) + unsigned long flags; + struct cmd_rcvr *rcvr; + struct cmd_rcvr *rcvrs = NULL; ++ struct module *owner; + + if (!acquire_ipmi_user(user, &i)) { + /* +@@ -1332,8 +1335,9 @@ static void _ipmi_destroy_user(struct ipmi_user *user) + kfree(rcvr); + } + ++ owner = intf->owner; + kref_put(&intf->refcount, intf_free); +- module_put(intf->owner); ++ module_put(owner); + } + + int ipmi_destroy_user(struct ipmi_user *user) +@@ -2930,7 +2934,7 @@ cleanup_bmc_device(struct kref *ref) * with removing the device attributes while reading a device * attribute. */ @@ -74372,7 +89307,26 @@ index e96cb5c4f97a3..2badf36d4816c 100644 } /* -@@ -4789,7 +4791,9 @@ static atomic_t recv_msg_inuse_count = ATOMIC_INIT(0); +@@ -3525,12 +3529,16 @@ static void deliver_smi_err_response(struct ipmi_smi *intf, + struct ipmi_smi_msg *msg, + unsigned char err) + { ++ int rv; + msg->rsp[0] = msg->data[0] | 4; + msg->rsp[1] = msg->data[1]; + msg->rsp[2] = err; + msg->rsp_size = 3; +- /* It's an error, so it will never requeue, no need to check return. */ +- handle_one_recv_msg(intf, msg); ++ ++ /* This will never requeue, but it may ask us to free the message. */ ++ rv = handle_one_recv_msg(intf, msg); ++ if (rv == 0) ++ ipmi_free_smi_msg(msg); + } + + static void cleanup_smi_msgs(struct ipmi_smi *intf) +@@ -4789,7 +4797,9 @@ static atomic_t recv_msg_inuse_count = ATOMIC_INIT(0); static void free_smi_msg(struct ipmi_smi_msg *msg) { atomic_dec(&smi_msg_inuse_count); @@ -74383,7 +89337,7 @@ index e96cb5c4f97a3..2badf36d4816c 100644 } struct ipmi_smi_msg *ipmi_alloc_smi_msg(void) -@@ -4808,7 +4812,9 @@ EXPORT_SYMBOL(ipmi_alloc_smi_msg); +@@ -4808,7 +4818,9 @@ EXPORT_SYMBOL(ipmi_alloc_smi_msg); static void free_recv_msg(struct ipmi_recv_msg *msg) { atomic_dec(&recv_msg_inuse_count); @@ -74394,7 +89348,7 @@ index e96cb5c4f97a3..2badf36d4816c 100644 } static struct ipmi_recv_msg *ipmi_alloc_recv_msg(void) -@@ -4826,7 +4832,7 @@ static struct ipmi_recv_msg *ipmi_alloc_recv_msg(void) +@@ -4826,7 +4838,7 @@ static struct ipmi_recv_msg *ipmi_alloc_recv_msg(void) void ipmi_free_recv_msg(struct ipmi_recv_msg *msg) { @@ -74403,7 +89357,7 @@ index e96cb5c4f97a3..2badf36d4816c 100644 kref_put(&msg->user->refcount, free_user); msg->done(msg); } -@@ -5142,7 +5148,16 @@ static int ipmi_init_msghandler(void) +@@ -5142,7 +5154,16 @@ static int ipmi_init_msghandler(void) if (initialized) goto out; @@ -74421,7 +89375,7 @@ index e96cb5c4f97a3..2badf36d4816c 100644 timer_setup(&ipmi_timer, ipmi_timeout, 0); mod_timer(&ipmi_timer, jiffies + IPMI_TIMEOUT_JIFFIES); -@@ -5151,6 +5166,9 @@ static int ipmi_init_msghandler(void) +@@ -5151,6 +5172,9 @@ static int ipmi_init_msghandler(void) initialized = true; @@ -74431,7 +89385,7 @@ index e96cb5c4f97a3..2badf36d4816c 100644 out: mutex_unlock(&ipmi_interfaces_mutex); return rv; -@@ -5174,6 +5192,8 @@ static void __exit cleanup_ipmi(void) +@@ -5174,6 +5198,8 @@ static void __exit cleanup_ipmi(void) int count; if (initialized) { @@ -74440,6 +89394,53 @@ index e96cb5c4f97a3..2badf36d4816c 100644 atomic_notifier_chain_unregister(&panic_notifier_list, &panic_block); +diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c +index 6f3272b58ced3..17255e705cb06 100644 +--- a/drivers/char/ipmi/ipmi_si_intf.c ++++ b/drivers/char/ipmi/ipmi_si_intf.c +@@ -2152,6 +2152,20 @@ skip_fallback_noirq: + } + module_init(init_ipmi_si); + ++static void wait_msg_processed(struct smi_info *smi_info) ++{ ++ unsigned long jiffies_now; ++ long time_diff; ++ ++ while (smi_info->curr_msg || (smi_info->si_state != SI_NORMAL)) { ++ jiffies_now = jiffies; ++ time_diff = (((long)jiffies_now - (long)smi_info->last_timeout_jiffies) ++ * SI_USEC_PER_JIFFY); ++ smi_event_handler(smi_info, time_diff); ++ schedule_timeout_uninterruptible(1); ++ } ++} ++ + static void shutdown_smi(void *send_info) + { + struct smi_info *smi_info = send_info; +@@ -2186,16 +2200,13 @@ static void shutdown_smi(void *send_info) + * in the BMC. Note that timers and CPU interrupts are off, + * so no need for locks. + */ +- while (smi_info->curr_msg || (smi_info->si_state != SI_NORMAL)) { +- poll(smi_info); +- schedule_timeout_uninterruptible(1); +- } ++ wait_msg_processed(smi_info); ++ + if (smi_info->handlers) + disable_si_irq(smi_info); +- while (smi_info->curr_msg || (smi_info->si_state != SI_NORMAL)) { +- poll(smi_info); +- schedule_timeout_uninterruptible(1); +- } ++ ++ wait_msg_processed(smi_info); ++ + if (smi_info->handlers) + smi_info->handlers->cleanup(smi_info->si_sm); + diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 20d5af92966d4..f366e8e3eee3d 100644 --- a/drivers/char/ipmi/ipmi_ssif.c @@ -74595,6 +89596,45 @@ index e4ff3b50de7f3..883b4a3410122 100644 pr_warn("heartbeat send failure: %d\n", rv); return rv; } +diff --git a/drivers/char/ipmi/kcs_bmc_aspeed.c b/drivers/char/ipmi/kcs_bmc_aspeed.c +index 92a37b33494cb..f23c146bb740c 100644 +--- a/drivers/char/ipmi/kcs_bmc_aspeed.c ++++ b/drivers/char/ipmi/kcs_bmc_aspeed.c +@@ -404,13 +404,31 @@ static void aspeed_kcs_check_obe(struct timer_list *timer) + static void aspeed_kcs_irq_mask_update(struct kcs_bmc_device *kcs_bmc, u8 mask, u8 state) + { + struct aspeed_kcs_bmc *priv = to_aspeed_kcs_bmc(kcs_bmc); ++ int rc; ++ u8 str; + + /* We don't have an OBE IRQ, emulate it */ + if (mask & KCS_BMC_EVENT_TYPE_OBE) { +- if (KCS_BMC_EVENT_TYPE_OBE & state) +- mod_timer(&priv->obe.timer, jiffies + OBE_POLL_PERIOD); +- else ++ if (KCS_BMC_EVENT_TYPE_OBE & state) { ++ /* ++ * Given we don't have an OBE IRQ, delay by polling briefly to see if we can ++ * observe such an event before returning to the caller. This is not ++ * incorrect because OBF may have already become clear before enabling the ++ * IRQ if we had one, under which circumstance no event will be propagated ++ * anyway. ++ * ++ * The onus is on the client to perform a race-free check that it hasn't ++ * missed the event. ++ */ ++ rc = read_poll_timeout_atomic(aspeed_kcs_inb, str, ++ !(str & KCS_BMC_STR_OBF), 1, 100, false, ++ &priv->kcs_bmc, priv->kcs_bmc.ioreg.str); ++ /* Time for the slow path? */ ++ if (rc == -ETIMEDOUT) ++ mod_timer(&priv->obe.timer, jiffies + OBE_POLL_PERIOD); ++ } else { + del_timer(&priv->obe.timer); ++ } + } + + if (mask & KCS_BMC_EVENT_TYPE_IBF) { diff --git a/drivers/char/ipmi/kcs_bmc_serio.c b/drivers/char/ipmi/kcs_bmc_serio.c index 7948cabde50b4..7e2067628a6ce 100644 --- a/drivers/char/ipmi/kcs_bmc_serio.c @@ -78010,6 +93050,51 @@ index 605969ed0f965..8642326de6e1c 100644 -} -EXPORT_SYMBOL_GPL(add_bootloader_randomness); +#endif /* CONFIG_SYSCTL */ +diff --git a/drivers/char/tpm/eventlog/acpi.c b/drivers/char/tpm/eventlog/acpi.c +index 1b18ce5ebab1e..0913d3eb8d518 100644 +--- a/drivers/char/tpm/eventlog/acpi.c ++++ b/drivers/char/tpm/eventlog/acpi.c +@@ -90,16 +90,21 @@ int tpm_read_log_acpi(struct tpm_chip *chip) + return -ENODEV; + + if (tbl->header.length < +- sizeof(*tbl) + sizeof(struct acpi_tpm2_phy)) ++ sizeof(*tbl) + sizeof(struct acpi_tpm2_phy)) { ++ acpi_put_table((struct acpi_table_header *)tbl); + return -ENODEV; ++ } + + tpm2_phy = (void *)tbl + sizeof(*tbl); + len = tpm2_phy->log_area_minimum_length; + + start = tpm2_phy->log_area_start_address; +- if (!start || !len) ++ if (!start || !len) { ++ acpi_put_table((struct acpi_table_header *)tbl); + return -ENODEV; ++ } + ++ acpi_put_table((struct acpi_table_header *)tbl); + format = EFI_TCG2_EVENT_LOG_FORMAT_TCG_2; + } else { + /* Find TCPA entry in RSDT (ACPI_LOGICAL_ADDRESSING) */ +@@ -120,8 +125,10 @@ int tpm_read_log_acpi(struct tpm_chip *chip) + break; + } + ++ acpi_put_table((struct acpi_table_header *)buff); + format = EFI_TCG2_EVENT_LOG_FORMAT_TCG_1_2; + } ++ + if (!len) { + dev_warn(&chip->dev, "%s: TCPA log area empty\n", __func__); + return -EIO; +@@ -156,5 +163,4 @@ err: + kfree(log->bios_event_log); + log->bios_event_log = NULL; + return ret; +- + } diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index ddaeceb7e1091..65d800ecc9964 100644 --- a/drivers/char/tpm/tpm-chip.c @@ -78169,6 +93254,27 @@ index c08cbb306636b..dc4c0a0a51290 100644 priv->response_length = ret; mod_timer(&priv->user_read_timer, jiffies + (120 * HZ)); } +diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c +index 1621ce8187052..d69905233aff2 100644 +--- a/drivers/char/tpm/tpm-interface.c ++++ b/drivers/char/tpm/tpm-interface.c +@@ -401,13 +401,14 @@ int tpm_pm_suspend(struct device *dev) + !pm_suspend_via_firmware()) + goto suspended; + +- if (!tpm_chip_start(chip)) { ++ rc = tpm_try_get_ops(chip); ++ if (!rc) { + if (chip->flags & TPM_CHIP_FLAG_TPM2) + tpm2_shutdown(chip, TPM2_SU_STATE); + else + rc = tpm1_pm_suspend(chip, tpm_suspend_pcr); + +- tpm_chip_stop(chip); ++ tpm_put_ops(chip); + } + + suspended: diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index 283f78211c3a7..2163c6ee0d364 100644 --- a/drivers/char/tpm/tpm.h @@ -78304,6 +93410,97 @@ index 784b8b3cb903f..ffb35f0154c16 100644 + + return rc; +} +diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c +index 18606651d1aa4..16fc481d60950 100644 +--- a/drivers/char/tpm/tpm_crb.c ++++ b/drivers/char/tpm/tpm_crb.c +@@ -252,7 +252,7 @@ static int __crb_relinquish_locality(struct device *dev, + iowrite32(CRB_LOC_CTRL_RELINQUISH, &priv->regs_h->loc_ctrl); + if (!crb_wait_for_reg_32(&priv->regs_h->loc_state, mask, value, + TPM2_TIMEOUT_C)) { +- dev_warn(dev, "TPM_LOC_STATE_x.requestAccess timed out\n"); ++ dev_warn(dev, "TPM_LOC_STATE_x.Relinquish timed out\n"); + return -ETIME; + } + +@@ -676,12 +676,16 @@ static int crb_acpi_add(struct acpi_device *device) + + /* Should the FIFO driver handle this? */ + sm = buf->start_method; +- if (sm == ACPI_TPM2_MEMORY_MAPPED) +- return -ENODEV; ++ if (sm == ACPI_TPM2_MEMORY_MAPPED) { ++ rc = -ENODEV; ++ goto out; ++ } + + priv = devm_kzalloc(dev, sizeof(struct crb_priv), GFP_KERNEL); +- if (!priv) +- return -ENOMEM; ++ if (!priv) { ++ rc = -ENOMEM; ++ goto out; ++ } + + if (sm == ACPI_TPM2_COMMAND_BUFFER_WITH_ARM_SMC) { + if (buf->header.length < (sizeof(*buf) + sizeof(*crb_smc))) { +@@ -689,7 +693,8 @@ static int crb_acpi_add(struct acpi_device *device) + FW_BUG "TPM2 ACPI table has wrong size %u for start method type %d\n", + buf->header.length, + ACPI_TPM2_COMMAND_BUFFER_WITH_ARM_SMC); +- return -EINVAL; ++ rc = -EINVAL; ++ goto out; + } + crb_smc = ACPI_ADD_PTR(struct tpm2_crb_smc, buf, sizeof(*buf)); + priv->smc_func_id = crb_smc->smc_func_id; +@@ -700,17 +705,23 @@ static int crb_acpi_add(struct acpi_device *device) + + rc = crb_map_io(device, priv, buf); + if (rc) +- return rc; ++ goto out; + + chip = tpmm_chip_alloc(dev, &tpm_crb); +- if (IS_ERR(chip)) +- return PTR_ERR(chip); ++ if (IS_ERR(chip)) { ++ rc = PTR_ERR(chip); ++ goto out; ++ } + + dev_set_drvdata(&chip->dev, priv); + chip->acpi_dev_handle = device->handle; + chip->flags = TPM_CHIP_FLAG_TPM2; + +- return tpm_chip_register(chip); ++ rc = tpm_chip_register(chip); ++ ++out: ++ acpi_put_table((struct acpi_table_header *)buf); ++ return rc; + } + + static int crb_acpi_remove(struct acpi_device *device) +diff --git a/drivers/char/tpm/tpm_ftpm_tee.c b/drivers/char/tpm/tpm_ftpm_tee.c +index 6e3235565a4d8..d9daaafdd295c 100644 +--- a/drivers/char/tpm/tpm_ftpm_tee.c ++++ b/drivers/char/tpm/tpm_ftpm_tee.c +@@ -397,7 +397,13 @@ static int __init ftpm_mod_init(void) + if (rc) + return rc; + +- return driver_register(&ftpm_tee_driver.driver); ++ rc = driver_register(&ftpm_tee_driver.driver); ++ if (rc) { ++ platform_driver_unregister(&ftpm_tee_plat_driver); ++ return rc; ++ } ++ ++ return 0; + } + + static void __exit ftpm_mod_exit(void) diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c index 3af4c07a9342f..d3989b257f422 100644 --- a/drivers/char/tpm/tpm_ibmvtpm.c @@ -78316,6 +93513,41 @@ index 3af4c07a9342f..d3989b257f422 100644 dev_err(dev, "CRQ response timed out\n"); goto init_irq_cleanup; } +diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c +index d3f2e5364c275..e53164c828085 100644 +--- a/drivers/char/tpm/tpm_tis.c ++++ b/drivers/char/tpm/tpm_tis.c +@@ -125,6 +125,7 @@ static int check_acpi_tpm2(struct device *dev) + const struct acpi_device_id *aid = acpi_match_device(tpm_acpi_tbl, dev); + struct acpi_table_tpm2 *tbl; + acpi_status st; ++ int ret = 0; + + if (!aid || aid->driver_data != DEVICE_IS_TPM2) + return 0; +@@ -132,8 +133,7 @@ static int check_acpi_tpm2(struct device *dev) + /* If the ACPI TPM2 signature is matched then a global ACPI_SIG_TPM2 + * table is mandatory + */ +- st = +- acpi_get_table(ACPI_SIG_TPM2, 1, (struct acpi_table_header **)&tbl); ++ st = acpi_get_table(ACPI_SIG_TPM2, 1, (struct acpi_table_header **)&tbl); + if (ACPI_FAILURE(st) || tbl->header.length < sizeof(*tbl)) { + dev_err(dev, FW_BUG "failed to get TPM2 ACPI table\n"); + return -EINVAL; +@@ -141,9 +141,10 @@ static int check_acpi_tpm2(struct device *dev) + + /* The tpm2_crb driver handles this device */ + if (tbl->start_method != ACPI_TPM2_MEMORY_MAPPED) +- return -ENODEV; ++ ret = -ENODEV; + +- return 0; ++ acpi_put_table((struct acpi_table_header *)tbl); ++ return ret; + } + #else + static int check_acpi_tpm2(struct device *dev) diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c index 69579efb247b3..dc56b976d8162 100644 --- a/drivers/char/tpm/tpm_tis_core.c @@ -78534,6 +93766,19 @@ index 790890978424a..5144ada2c7e1a 100644 }; static struct clk_div_table i2s_div_table[] = { +diff --git a/drivers/clk/at91/at91rm9200.c b/drivers/clk/at91/at91rm9200.c +index 428a6f4b9ebc5..8d36e615cd9dd 100644 +--- a/drivers/clk/at91/at91rm9200.c ++++ b/drivers/clk/at91/at91rm9200.c +@@ -40,7 +40,7 @@ static const struct clk_pll_characteristics rm9200_pll_characteristics = { + }; + + static const struct sck at91rm9200_systemck[] = { +- { .n = "udpck", .p = "usbck", .id = 2 }, ++ { .n = "udpck", .p = "usbck", .id = 1 }, + { .n = "uhpck", .p = "usbck", .id = 4 }, + { .n = "pck0", .p = "prog0", .id = 8 }, + { .n = "pck1", .p = "prog1", .id = 9 }, diff --git a/drivers/clk/at91/clk-generated.c b/drivers/clk/at91/clk-generated.c index b656d25a97678..fe772baeb15ff 100644 --- a/drivers/clk/at91/clk-generated.c @@ -79300,6 +94545,128 @@ index a2c6486ef1708..f8417ee2961aa 100644 }; struct clps711x_clk { +diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c +index f9d5b73343417..4fb4fd4b06bda 100644 +--- a/drivers/clk/clk-devres.c ++++ b/drivers/clk/clk-devres.c +@@ -4,42 +4,101 @@ + #include <linux/export.h> + #include <linux/gfp.h> + ++struct devm_clk_state { ++ struct clk *clk; ++ void (*exit)(struct clk *clk); ++}; ++ + static void devm_clk_release(struct device *dev, void *res) + { +- clk_put(*(struct clk **)res); ++ struct devm_clk_state *state = res; ++ ++ if (state->exit) ++ state->exit(state->clk); ++ ++ clk_put(state->clk); + } + +-struct clk *devm_clk_get(struct device *dev, const char *id) ++static struct clk *__devm_clk_get(struct device *dev, const char *id, ++ struct clk *(*get)(struct device *dev, const char *id), ++ int (*init)(struct clk *clk), ++ void (*exit)(struct clk *clk)) + { +- struct clk **ptr, *clk; ++ struct devm_clk_state *state; ++ struct clk *clk; ++ int ret; + +- ptr = devres_alloc(devm_clk_release, sizeof(*ptr), GFP_KERNEL); +- if (!ptr) ++ state = devres_alloc(devm_clk_release, sizeof(*state), GFP_KERNEL); ++ if (!state) + return ERR_PTR(-ENOMEM); + +- clk = clk_get(dev, id); +- if (!IS_ERR(clk)) { +- *ptr = clk; +- devres_add(dev, ptr); +- } else { +- devres_free(ptr); ++ clk = get(dev, id); ++ if (IS_ERR(clk)) { ++ ret = PTR_ERR(clk); ++ goto err_clk_get; + } + ++ if (init) { ++ ret = init(clk); ++ if (ret) ++ goto err_clk_init; ++ } ++ ++ state->clk = clk; ++ state->exit = exit; ++ ++ devres_add(dev, state); ++ + return clk; ++ ++err_clk_init: ++ ++ clk_put(clk); ++err_clk_get: ++ ++ devres_free(state); ++ return ERR_PTR(ret); ++} ++ ++struct clk *devm_clk_get(struct device *dev, const char *id) ++{ ++ return __devm_clk_get(dev, id, clk_get, NULL, NULL); + } + EXPORT_SYMBOL(devm_clk_get); + +-struct clk *devm_clk_get_optional(struct device *dev, const char *id) ++struct clk *devm_clk_get_prepared(struct device *dev, const char *id) + { +- struct clk *clk = devm_clk_get(dev, id); ++ return __devm_clk_get(dev, id, clk_get, clk_prepare, clk_unprepare); ++} ++EXPORT_SYMBOL_GPL(devm_clk_get_prepared); + +- if (clk == ERR_PTR(-ENOENT)) +- return NULL; ++struct clk *devm_clk_get_enabled(struct device *dev, const char *id) ++{ ++ return __devm_clk_get(dev, id, clk_get, ++ clk_prepare_enable, clk_disable_unprepare); ++} ++EXPORT_SYMBOL_GPL(devm_clk_get_enabled); + +- return clk; ++struct clk *devm_clk_get_optional(struct device *dev, const char *id) ++{ ++ return __devm_clk_get(dev, id, clk_get_optional, NULL, NULL); + } + EXPORT_SYMBOL(devm_clk_get_optional); + ++struct clk *devm_clk_get_optional_prepared(struct device *dev, const char *id) ++{ ++ return __devm_clk_get(dev, id, clk_get_optional, ++ clk_prepare, clk_unprepare); ++} ++EXPORT_SYMBOL_GPL(devm_clk_get_optional_prepared); ++ ++struct clk *devm_clk_get_optional_enabled(struct device *dev, const char *id) ++{ ++ return __devm_clk_get(dev, id, clk_get_optional, ++ clk_prepare_enable, clk_disable_unprepare); ++} ++EXPORT_SYMBOL_GPL(devm_clk_get_optional_enabled); ++ + struct clk_bulk_devres { + struct clk_bulk_data *clks; + int num_clks; diff --git a/drivers/clk/clk-oxnas.c b/drivers/clk/clk-oxnas.c index 78d5ea669fea7..2fe36f579ac5e 100644 --- a/drivers/clk/clk-oxnas.c @@ -79673,9 +95040,86 @@ index c4e0f1c07192f..3f6fd7ef2a68f 100644 hws[IMX7D_USB_CTRL_CLK] = imx_clk_hw_gate4("usb_ctrl_clk", "ahb_root_clk", base + 0x4680, 0); hws[IMX7D_USB_PHY1_CLK] = imx_clk_hw_gate4("usb_phy1_clk", "pll_usb1_main_clk", base + 0x46a0, 0); diff --git a/drivers/clk/imx/clk-imx8mn.c b/drivers/clk/imx/clk-imx8mn.c -index c55577604e16a..021355a247081 100644 +index c55577604e16a..52903146fdbaf 100644 --- a/drivers/clk/imx/clk-imx8mn.c +++ b/drivers/clk/imx/clk-imx8mn.c +@@ -30,7 +30,7 @@ static const char * const audio_pll2_bypass_sels[] = {"audio_pll2", "audio_pll2_ + static const char * const video_pll1_bypass_sels[] = {"video_pll1", "video_pll1_ref_sel", }; + static const char * const dram_pll_bypass_sels[] = {"dram_pll", "dram_pll_ref_sel", }; + static const char * const gpu_pll_bypass_sels[] = {"gpu_pll", "gpu_pll_ref_sel", }; +-static const char * const vpu_pll_bypass_sels[] = {"vpu_pll", "vpu_pll_ref_sel", }; ++static const char * const m7_alt_pll_bypass_sels[] = {"m7_alt_pll", "m7_alt_pll_ref_sel", }; + static const char * const arm_pll_bypass_sels[] = {"arm_pll", "arm_pll_ref_sel", }; + static const char * const sys_pll3_bypass_sels[] = {"sys_pll3", "sys_pll3_ref_sel", }; + +@@ -40,7 +40,7 @@ static const char * const imx8mn_a53_sels[] = {"osc_24m", "arm_pll_out", "sys_pl + + static const char * const imx8mn_a53_core_sels[] = {"arm_a53_div", "arm_pll_out", }; + +-static const char * const imx8mn_m7_sels[] = {"osc_24m", "sys_pll2_200m", "sys_pll2_250m", "vpu_pll_out", ++static const char * const imx8mn_m7_sels[] = {"osc_24m", "sys_pll2_200m", "sys_pll2_250m", "m7_alt_pll_out", + "sys_pll1_800m", "audio_pll1_out", "video_pll1_out", "sys_pll3_out", }; + + static const char * const imx8mn_gpu_core_sels[] = {"osc_24m", "gpu_pll_out", "sys_pll1_800m", +@@ -108,27 +108,27 @@ static const char * const imx8mn_disp_pixel_sels[] = {"osc_24m", "video_pll1_out + "sys_pll3_out", "clk_ext4", }; + + static const char * const imx8mn_sai2_sels[] = {"osc_24m", "audio_pll1_out", "audio_pll2_out", +- "video_pll1_out", "sys_pll1_133m", "osc_hdmi", +- "clk_ext3", "clk_ext4", }; ++ "video_pll1_out", "sys_pll1_133m", "dummy", ++ "clk_ext2", "clk_ext3", }; + + static const char * const imx8mn_sai3_sels[] = {"osc_24m", "audio_pll1_out", "audio_pll2_out", +- "video_pll1_out", "sys_pll1_133m", "osc_hdmi", ++ "video_pll1_out", "sys_pll1_133m", "dummy", + "clk_ext3", "clk_ext4", }; + + static const char * const imx8mn_sai5_sels[] = {"osc_24m", "audio_pll1_out", "audio_pll2_out", +- "video_pll1_out", "sys_pll1_133m", "osc_hdmi", ++ "video_pll1_out", "sys_pll1_133m", "dummy", + "clk_ext2", "clk_ext3", }; + + static const char * const imx8mn_sai6_sels[] = {"osc_24m", "audio_pll1_out", "audio_pll2_out", +- "video_pll1_out", "sys_pll1_133m", "osc_hdmi", ++ "video_pll1_out", "sys_pll1_133m", "dummy", + "clk_ext3", "clk_ext4", }; + + static const char * const imx8mn_sai7_sels[] = {"osc_24m", "audio_pll1_out", "audio_pll2_out", +- "video_pll1_out", "sys_pll1_133m", "osc_hdmi", ++ "video_pll1_out", "sys_pll1_133m", "dummy", + "clk_ext3", "clk_ext4", }; + + static const char * const imx8mn_spdif1_sels[] = {"osc_24m", "audio_pll1_out", "audio_pll2_out", +- "video_pll1_out", "sys_pll1_133m", "osc_hdmi", ++ "video_pll1_out", "sys_pll1_133m", "dummy", + "clk_ext2", "clk_ext3", }; + + static const char * const imx8mn_enet_ref_sels[] = {"osc_24m", "sys_pll2_125m", "sys_pll2_50m", +@@ -140,8 +140,8 @@ static const char * const imx8mn_enet_timer_sels[] = {"osc_24m", "sys_pll2_100m" + "clk_ext4", "video_pll1_out", }; + + static const char * const imx8mn_enet_phy_sels[] = {"osc_24m", "sys_pll2_50m", "sys_pll2_125m", +- "sys_pll2_200m", "sys_pll2_500m", "video_pll1_out", +- "audio_pll2_out", }; ++ "sys_pll2_200m", "sys_pll2_500m", "audio_pll1_out", ++ "video_pll_out", "audio_pll2_out", }; + + static const char * const imx8mn_nand_sels[] = {"osc_24m", "sys_pll2_500m", "audio_pll1_out", + "sys_pll1_400m", "audio_pll2_out", "sys_pll3_out", +@@ -228,10 +228,10 @@ static const char * const imx8mn_pwm4_sels[] = {"osc_24m", "sys_pll2_100m", "sys + "sys_pll1_80m", "video_pll1_out", }; + + static const char * const imx8mn_wdog_sels[] = {"osc_24m", "sys_pll1_133m", "sys_pll1_160m", +- "vpu_pll_out", "sys_pll2_125m", "sys_pll3_out", ++ "m7_alt_pll_out", "sys_pll2_125m", "sys_pll3_out", + "sys_pll1_80m", "sys_pll2_166m", }; + +-static const char * const imx8mn_wrclk_sels[] = {"osc_24m", "sys_pll1_40m", "vpu_pll_out", ++static const char * const imx8mn_wrclk_sels[] = {"osc_24m", "sys_pll1_40m", "m7_alt_pll_out", + "sys_pll3_out", "sys_pll2_200m", "sys_pll1_266m", + "sys_pll2_500m", "sys_pll1_100m", }; + @@ -277,9 +277,9 @@ static const char * const imx8mn_pdm_sels[] = {"osc_24m", "sys_pll2_100m", "audi static const char * const imx8mn_dram_core_sels[] = {"dram_pll_out", "dram_alt_root", }; @@ -79689,6 +95133,42 @@ index c55577604e16a..021355a247081 100644 static const char * const imx8mn_clko2_sels[] = {"osc_24m", "sys_pll2_200m", "sys_pll1_400m", "sys_pll2_166m", "sys_pll3_out", "audio_pll1_out", "video_pll1_out", "osc_32k", }; +@@ -328,7 +328,7 @@ static int imx8mn_clocks_probe(struct platform_device *pdev) + hws[IMX8MN_VIDEO_PLL1_REF_SEL] = imx_clk_hw_mux("video_pll1_ref_sel", base + 0x28, 0, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels)); + hws[IMX8MN_DRAM_PLL_REF_SEL] = imx_clk_hw_mux("dram_pll_ref_sel", base + 0x50, 0, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels)); + hws[IMX8MN_GPU_PLL_REF_SEL] = imx_clk_hw_mux("gpu_pll_ref_sel", base + 0x64, 0, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels)); +- hws[IMX8MN_VPU_PLL_REF_SEL] = imx_clk_hw_mux("vpu_pll_ref_sel", base + 0x74, 0, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels)); ++ hws[IMX8MN_M7_ALT_PLL_REF_SEL] = imx_clk_hw_mux("m7_alt_pll_ref_sel", base + 0x74, 0, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels)); + hws[IMX8MN_ARM_PLL_REF_SEL] = imx_clk_hw_mux("arm_pll_ref_sel", base + 0x84, 0, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels)); + hws[IMX8MN_SYS_PLL3_REF_SEL] = imx_clk_hw_mux("sys_pll3_ref_sel", base + 0x114, 0, 2, pll_ref_sels, ARRAY_SIZE(pll_ref_sels)); + +@@ -337,7 +337,7 @@ static int imx8mn_clocks_probe(struct platform_device *pdev) + hws[IMX8MN_VIDEO_PLL1] = imx_clk_hw_pll14xx("video_pll1", "video_pll1_ref_sel", base + 0x28, &imx_1443x_pll); + hws[IMX8MN_DRAM_PLL] = imx_clk_hw_pll14xx("dram_pll", "dram_pll_ref_sel", base + 0x50, &imx_1443x_dram_pll); + hws[IMX8MN_GPU_PLL] = imx_clk_hw_pll14xx("gpu_pll", "gpu_pll_ref_sel", base + 0x64, &imx_1416x_pll); +- hws[IMX8MN_VPU_PLL] = imx_clk_hw_pll14xx("vpu_pll", "vpu_pll_ref_sel", base + 0x74, &imx_1416x_pll); ++ hws[IMX8MN_M7_ALT_PLL] = imx_clk_hw_pll14xx("m7_alt_pll", "m7_alt_pll_ref_sel", base + 0x74, &imx_1416x_pll); + hws[IMX8MN_ARM_PLL] = imx_clk_hw_pll14xx("arm_pll", "arm_pll_ref_sel", base + 0x84, &imx_1416x_pll); + hws[IMX8MN_SYS_PLL1] = imx_clk_hw_fixed("sys_pll1", 800000000); + hws[IMX8MN_SYS_PLL2] = imx_clk_hw_fixed("sys_pll2", 1000000000); +@@ -349,7 +349,7 @@ static int imx8mn_clocks_probe(struct platform_device *pdev) + hws[IMX8MN_VIDEO_PLL1_BYPASS] = imx_clk_hw_mux_flags("video_pll1_bypass", base + 0x28, 16, 1, video_pll1_bypass_sels, ARRAY_SIZE(video_pll1_bypass_sels), CLK_SET_RATE_PARENT); + hws[IMX8MN_DRAM_PLL_BYPASS] = imx_clk_hw_mux_flags("dram_pll_bypass", base + 0x50, 16, 1, dram_pll_bypass_sels, ARRAY_SIZE(dram_pll_bypass_sels), CLK_SET_RATE_PARENT); + hws[IMX8MN_GPU_PLL_BYPASS] = imx_clk_hw_mux_flags("gpu_pll_bypass", base + 0x64, 28, 1, gpu_pll_bypass_sels, ARRAY_SIZE(gpu_pll_bypass_sels), CLK_SET_RATE_PARENT); +- hws[IMX8MN_VPU_PLL_BYPASS] = imx_clk_hw_mux_flags("vpu_pll_bypass", base + 0x74, 28, 1, vpu_pll_bypass_sels, ARRAY_SIZE(vpu_pll_bypass_sels), CLK_SET_RATE_PARENT); ++ hws[IMX8MN_M7_ALT_PLL_BYPASS] = imx_clk_hw_mux_flags("m7_alt_pll_bypass", base + 0x74, 28, 1, m7_alt_pll_bypass_sels, ARRAY_SIZE(m7_alt_pll_bypass_sels), CLK_SET_RATE_PARENT); + hws[IMX8MN_ARM_PLL_BYPASS] = imx_clk_hw_mux_flags("arm_pll_bypass", base + 0x84, 28, 1, arm_pll_bypass_sels, ARRAY_SIZE(arm_pll_bypass_sels), CLK_SET_RATE_PARENT); + hws[IMX8MN_SYS_PLL3_BYPASS] = imx_clk_hw_mux_flags("sys_pll3_bypass", base + 0x114, 28, 1, sys_pll3_bypass_sels, ARRAY_SIZE(sys_pll3_bypass_sels), CLK_SET_RATE_PARENT); + +@@ -359,7 +359,7 @@ static int imx8mn_clocks_probe(struct platform_device *pdev) + hws[IMX8MN_VIDEO_PLL1_OUT] = imx_clk_hw_gate("video_pll1_out", "video_pll1_bypass", base + 0x28, 13); + hws[IMX8MN_DRAM_PLL_OUT] = imx_clk_hw_gate("dram_pll_out", "dram_pll_bypass", base + 0x50, 13); + hws[IMX8MN_GPU_PLL_OUT] = imx_clk_hw_gate("gpu_pll_out", "gpu_pll_bypass", base + 0x64, 11); +- hws[IMX8MN_VPU_PLL_OUT] = imx_clk_hw_gate("vpu_pll_out", "vpu_pll_bypass", base + 0x74, 11); ++ hws[IMX8MN_M7_ALT_PLL_OUT] = imx_clk_hw_gate("m7_alt_pll_out", "m7_alt_pll_bypass", base + 0x74, 11); + hws[IMX8MN_ARM_PLL_OUT] = imx_clk_hw_gate("arm_pll_out", "arm_pll_bypass", base + 0x84, 11); + hws[IMX8MN_SYS_PLL3_OUT] = imx_clk_hw_gate("sys_pll3_out", "sys_pll3_bypass", base + 0x114, 11); + diff --git a/drivers/clk/imx/clk-imx8mp.c b/drivers/clk/imx/clk-imx8mp.c index 12837304545d5..b173c30093946 100644 --- a/drivers/clk/imx/clk-imx8mp.c @@ -79794,6 +95274,46 @@ index 5154b0cf8ad6c..66ff141da0a42 100644 }, [JZ4725B_CLK_SPI] = { +diff --git a/drivers/clk/ingenic/jz4760-cgu.c b/drivers/clk/ingenic/jz4760-cgu.c +index 14483797a4dbf..11906242e1d3d 100644 +--- a/drivers/clk/ingenic/jz4760-cgu.c ++++ b/drivers/clk/ingenic/jz4760-cgu.c +@@ -58,7 +58,7 @@ jz4760_cgu_calc_m_n_od(const struct ingenic_cgu_pll_info *pll_info, + unsigned long rate, unsigned long parent_rate, + unsigned int *pm, unsigned int *pn, unsigned int *pod) + { +- unsigned int m, n, od, m_max = (1 << pll_info->m_bits) - 2; ++ unsigned int m, n, od, m_max = (1 << pll_info->m_bits) - 1; + + /* The frequency after the N divider must be between 1 and 50 MHz. */ + n = parent_rate / (1 * MHZ); +@@ -66,19 +66,17 @@ jz4760_cgu_calc_m_n_od(const struct ingenic_cgu_pll_info *pll_info, + /* The N divider must be >= 2. */ + n = clamp_val(n, 2, 1 << pll_info->n_bits); + +- for (;; n >>= 1) { +- od = (unsigned int)-1; ++ rate /= MHZ; ++ parent_rate /= MHZ; + +- do { +- m = (rate / MHZ) * (1 << ++od) * n / (parent_rate / MHZ); +- } while ((m > m_max || m & 1) && (od < 4)); +- +- if (od < 4 && m >= 4 && m <= m_max) +- break; ++ for (m = m_max; m >= m_max && n >= 2; n--) { ++ m = rate * n / parent_rate; ++ od = m & 1; ++ m <<= od; + } + + *pm = m; +- *pn = n; ++ *pn = n + 1; + *pod = 1 << od; + } + diff --git a/drivers/clk/ingenic/tcu.c b/drivers/clk/ingenic/tcu.c index 77acfbeb48300..11fc395618365 100644 --- a/drivers/clk/ingenic/tcu.c @@ -80207,7 +95727,7 @@ index eaedcceb766f9..5e44ceb730ad1 100644 regmap_write(regmap, PLL_CAL_L_VAL(pll), TRION_PLL_CAL_VAL); clk_alpha_pll_write_config(regmap, PLL_ALPHA_VAL(pll), config->alpha); diff --git a/drivers/clk/qcom/clk-krait.c b/drivers/clk/qcom/clk-krait.c -index 59f1af415b580..90046428693c2 100644 +index 59f1af415b580..e74fc81a14d00 100644 --- a/drivers/clk/qcom/clk-krait.c +++ b/drivers/clk/qcom/clk-krait.c @@ -32,11 +32,16 @@ static void __krait_mux_set_sel(struct krait_mux_clk *mux, int sel) @@ -80228,6 +95748,15 @@ index 59f1af415b580..90046428693c2 100644 } static int krait_mux_set_parent(struct clk_hw *hw, u8 index) +@@ -93,6 +98,8 @@ static int krait_div2_set_rate(struct clk_hw *hw, unsigned long rate, + + if (d->lpl) + mask = mask << (d->shift + LPL_SHIFT) | mask << d->shift; ++ else ++ mask <<= d->shift; + + spin_lock_irqsave(&krait_clock_reg_lock, flags); + val = krait_get_l2_indirect_reg(d->offset); diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c index e1b1b426fae4b..c3823cc32edc6 100644 --- a/drivers/clk/qcom/clk-rcg2.c @@ -80856,7 +96385,7 @@ index 3c3a7ff045621..9b1674b28d45d 100644 [GCC_AGGRE2_USB3_AXI_CLK] = &gcc_aggre2_usb3_axi_clk.clkr, [GCC_QSPI_AHB_CLK] = &gcc_qspi_ahb_clk.clkr, diff --git a/drivers/clk/qcom/gcc-sc7280.c b/drivers/clk/qcom/gcc-sc7280.c -index 6cefcdc869905..ce7c5ba2b9b7a 100644 +index 6cefcdc869905..d10efbf260b7a 100644 --- a/drivers/clk/qcom/gcc-sc7280.c +++ b/drivers/clk/qcom/gcc-sc7280.c @@ -2998,7 +2998,7 @@ static struct clk_branch gcc_cfg_noc_lpass_clk = { @@ -80868,6 +96397,14 @@ index 6cefcdc869905..ce7c5ba2b9b7a 100644 }, }, }; +@@ -3571,6 +3571,7 @@ static int gcc_sc7280_probe(struct platform_device *pdev) + regmap_update_bits(regmap, 0x28004, BIT(0), BIT(0)); + regmap_update_bits(regmap, 0x28014, BIT(0), BIT(0)); + regmap_update_bits(regmap, 0x71004, BIT(0), BIT(0)); ++ regmap_update_bits(regmap, 0x7100C, BIT(13), BIT(13)); + + ret = qcom_cc_register_rcg_dfs(regmap, gcc_dfs_clocks, + ARRAY_SIZE(gcc_dfs_clocks)); diff --git a/drivers/clk/qcom/gcc-sm6115.c b/drivers/clk/qcom/gcc-sm6115.c index 68fe9f6f0d2f3..e24a977c25806 100644 --- a/drivers/clk/qcom/gcc-sm6115.c @@ -81056,6 +96593,28 @@ index 543cfab7561f9..431b55bb0d2f7 100644 }, }; +diff --git a/drivers/clk/qcom/gcc-sm8250.c b/drivers/clk/qcom/gcc-sm8250.c +index 9755ef4888c19..a0ba37656b07b 100644 +--- a/drivers/clk/qcom/gcc-sm8250.c ++++ b/drivers/clk/qcom/gcc-sm8250.c +@@ -3267,7 +3267,7 @@ static struct gdsc usb30_prim_gdsc = { + .pd = { + .name = "usb30_prim_gdsc", + }, +- .pwrsts = PWRSTS_OFF_ON, ++ .pwrsts = PWRSTS_RET_ON, + }; + + static struct gdsc usb30_sec_gdsc = { +@@ -3275,7 +3275,7 @@ static struct gdsc usb30_sec_gdsc = { + .pd = { + .name = "usb30_sec_gdsc", + }, +- .pwrsts = PWRSTS_OFF_ON, ++ .pwrsts = PWRSTS_RET_ON, + }; + + static struct gdsc hlos1_vote_mmnoc_mmu_tbu_hf0_gdsc = { diff --git a/drivers/clk/qcom/gdsc.c b/drivers/clk/qcom/gdsc.c index 4ece326ea233e..cf23cfd7e4674 100644 --- a/drivers/clk/qcom/gdsc.c @@ -81138,8 +96697,99 @@ index 5bb396b344d16..762f1b5e1ec51 100644 const u8 pwrsts; /* Powerdomain allowable state bitfields */ #define PWRSTS_OFF BIT(0) +diff --git a/drivers/clk/qcom/gpucc-sc7280.c b/drivers/clk/qcom/gpucc-sc7280.c +index 9a832f2bcf491..1490cd45a654a 100644 +--- a/drivers/clk/qcom/gpucc-sc7280.c ++++ b/drivers/clk/qcom/gpucc-sc7280.c +@@ -463,6 +463,7 @@ static int gpu_cc_sc7280_probe(struct platform_device *pdev) + */ + regmap_update_bits(regmap, 0x1170, BIT(0), BIT(0)); + regmap_update_bits(regmap, 0x1098, BIT(0), BIT(0)); ++ regmap_update_bits(regmap, 0x1098, BIT(13), BIT(13)); + + return qcom_cc_really_probe(pdev, &gpu_cc_sc7280_desc, regmap); + } +diff --git a/drivers/clk/qcom/lpasscorecc-sc7180.c b/drivers/clk/qcom/lpasscorecc-sc7180.c +index ac09b7b840aba..a5731994cbed1 100644 +--- a/drivers/clk/qcom/lpasscorecc-sc7180.c ++++ b/drivers/clk/qcom/lpasscorecc-sc7180.c +@@ -356,7 +356,7 @@ static const struct qcom_cc_desc lpass_audio_hm_sc7180_desc = { + .num_gdscs = ARRAY_SIZE(lpass_audio_hm_sc7180_gdscs), + }; + +-static int lpass_create_pm_clks(struct platform_device *pdev) ++static int lpass_setup_runtime_pm(struct platform_device *pdev) + { + int ret; + +@@ -375,7 +375,7 @@ static int lpass_create_pm_clks(struct platform_device *pdev) + if (ret < 0) + dev_err(&pdev->dev, "failed to acquire iface clock\n"); + +- return ret; ++ return pm_runtime_resume_and_get(&pdev->dev); + } + + static int lpass_core_cc_sc7180_probe(struct platform_device *pdev) +@@ -384,7 +384,7 @@ static int lpass_core_cc_sc7180_probe(struct platform_device *pdev) + struct regmap *regmap; + int ret; + +- ret = lpass_create_pm_clks(pdev); ++ ret = lpass_setup_runtime_pm(pdev); + if (ret) + return ret; + +@@ -392,12 +392,14 @@ static int lpass_core_cc_sc7180_probe(struct platform_device *pdev) + desc = &lpass_audio_hm_sc7180_desc; + ret = qcom_cc_probe_by_index(pdev, 1, desc); + if (ret) +- return ret; ++ goto exit; + + lpass_core_cc_sc7180_regmap_config.name = "lpass_core_cc"; + regmap = qcom_cc_map(pdev, &lpass_core_cc_sc7180_desc); +- if (IS_ERR(regmap)) +- return PTR_ERR(regmap); ++ if (IS_ERR(regmap)) { ++ ret = PTR_ERR(regmap); ++ goto exit; ++ } + + /* + * Keep the CLK always-ON +@@ -415,6 +417,7 @@ static int lpass_core_cc_sc7180_probe(struct platform_device *pdev) + ret = qcom_cc_really_probe(pdev, &lpass_core_cc_sc7180_desc, regmap); + + pm_runtime_mark_last_busy(&pdev->dev); ++exit: + pm_runtime_put_autosuspend(&pdev->dev); + + return ret; +@@ -425,14 +428,19 @@ static int lpass_hm_core_probe(struct platform_device *pdev) + const struct qcom_cc_desc *desc; + int ret; + +- ret = lpass_create_pm_clks(pdev); ++ ret = lpass_setup_runtime_pm(pdev); + if (ret) + return ret; + + lpass_core_cc_sc7180_regmap_config.name = "lpass_hm_core"; + desc = &lpass_core_hm_sc7180_desc; + +- return qcom_cc_probe_by_index(pdev, 0, desc); ++ ret = qcom_cc_probe_by_index(pdev, 0, desc); ++ ++ pm_runtime_mark_last_busy(&pdev->dev); ++ pm_runtime_put_autosuspend(&pdev->dev); ++ ++ return ret; + } + + static const struct of_device_id lpass_hm_sc7180_match_table[] = { diff --git a/drivers/clk/renesas/r9a06g032-clocks.c b/drivers/clk/renesas/r9a06g032-clocks.c -index c99942f0e4d4c..abc0891fd96db 100644 +index c99942f0e4d4c..3e43ae8480ddf 100644 --- a/drivers/clk/renesas/r9a06g032-clocks.c +++ b/drivers/clk/renesas/r9a06g032-clocks.c @@ -286,8 +286,8 @@ static const struct r9a06g032_clkdesc r9a06g032_clocks[] = { @@ -81164,6 +96814,23 @@ index c99942f0e4d4c..abc0891fd96db 100644 .dual.group = 1, }, D_UGATE(CLK_UART0, "clk_uart0", UART_GROUP_012, 0, 0, 0x1b2, 0x1b3, 0x1b4, 0x1b5), +@@ -386,7 +386,7 @@ static int r9a06g032_attach_dev(struct generic_pm_domain *pd, + int error; + int index; + +- while (!of_parse_phandle_with_args(np, "clocks", "#clock-cells", i, ++ while (!of_parse_phandle_with_args(np, "clocks", "#clock-cells", i++, + &clkspec)) { + if (clkspec.np != pd->dev.of_node) + continue; +@@ -399,7 +399,6 @@ static int r9a06g032_attach_dev(struct generic_pm_domain *pd, + if (error) + return error; + } +- i++; + } + + return 0; diff --git a/drivers/clk/renesas/r9a07g044-cpg.c b/drivers/clk/renesas/r9a07g044-cpg.c index 1490446985e2e..61609eddf7d04 100644 --- a/drivers/clk/renesas/r9a07g044-cpg.c @@ -81219,6 +96886,18 @@ index 761922ea5db76..1c92e73cd2b8c 100644 } static int __init rzg2l_cpg_probe(struct platform_device *pdev) +diff --git a/drivers/clk/rockchip/clk-pll.c b/drivers/clk/rockchip/clk-pll.c +index f7827b3b7fc1c..6e5e502be44a6 100644 +--- a/drivers/clk/rockchip/clk-pll.c ++++ b/drivers/clk/rockchip/clk-pll.c +@@ -981,6 +981,7 @@ struct clk *rockchip_clk_register_pll(struct rockchip_clk_provider *ctx, + return mux_clk; + + err_pll: ++ kfree(pll->rate_table); + clk_unregister(mux_clk); + mux_clk = pll_clk; + err_mux: diff --git a/drivers/clk/rockchip/clk-rk3568.c b/drivers/clk/rockchip/clk-rk3568.c index 75ca855e720df..6e5440841d1ee 100644 --- a/drivers/clk/rockchip/clk-rk3568.c @@ -81261,6 +96940,42 @@ index b7be7e11b0dfe..bb8a844309bf5 100644 clk_fractional_divider_general_approximation(hw, rate, parent_rate, m, n); } +diff --git a/drivers/clk/samsung/clk-pll.c b/drivers/clk/samsung/clk-pll.c +index 5873a9354b507..4909e940f0ab6 100644 +--- a/drivers/clk/samsung/clk-pll.c ++++ b/drivers/clk/samsung/clk-pll.c +@@ -1385,6 +1385,7 @@ static void __init _samsung_clk_register_pll(struct samsung_clk_provider *ctx, + if (ret) { + pr_err("%s: failed to register pll clock %s : %d\n", + __func__, pll_clk->name, ret); ++ kfree(pll->rate_table); + kfree(pll); + return; + } +diff --git a/drivers/clk/socfpga/clk-gate.c b/drivers/clk/socfpga/clk-gate.c +index 1ec9678d8cd32..ee2a2d284113c 100644 +--- a/drivers/clk/socfpga/clk-gate.c ++++ b/drivers/clk/socfpga/clk-gate.c +@@ -188,8 +188,10 @@ void __init socfpga_gate_init(struct device_node *node) + return; + + ops = kmemdup(&gateclk_ops, sizeof(gateclk_ops), GFP_KERNEL); +- if (WARN_ON(!ops)) ++ if (WARN_ON(!ops)) { ++ kfree(socfpga_clk); + return; ++ } + + rc = of_property_read_u32_array(node, "clk-gate", clk_gate, 2); + if (rc) +@@ -243,6 +245,7 @@ void __init socfpga_gate_init(struct device_node *node) + + err = clk_hw_register(NULL, hw_clk); + if (err) { ++ kfree(ops); + kfree(socfpga_clk); + return; + } diff --git a/drivers/clk/sprd/common.c b/drivers/clk/sprd/common.c index d620bbbcdfc88..ce81e4087a8fc 100644 --- a/drivers/clk/sprd/common.c @@ -81288,6 +97003,23 @@ index d620bbbcdfc88..ce81e4087a8fc 100644 if (IS_ERR(regmap)) { dev_err(dev, "failed to get regmap from its parent.\n"); return PTR_ERR(regmap); +diff --git a/drivers/clk/st/clkgen-fsyn.c b/drivers/clk/st/clkgen-fsyn.c +index 164285d6be978..ba18e58f0aae3 100644 +--- a/drivers/clk/st/clkgen-fsyn.c ++++ b/drivers/clk/st/clkgen-fsyn.c +@@ -1008,9 +1008,10 @@ static void __init st_of_quadfs_setup(struct device_node *np, + + clk = st_clk_register_quadfs_pll(pll_name, clk_parent_name, datac->data, + reg, lock); +- if (IS_ERR(clk)) ++ if (IS_ERR(clk)) { ++ kfree(lock); + goto err_exit; +- else ++ } else + pr_debug("%s: parent %s rate %u\n", + __clk_get_name(clk), + __clk_get_name(clk_get_parent(clk)), diff --git a/drivers/clk/sunxi-ng/ccu-sun4i-a10.c b/drivers/clk/sunxi-ng/ccu-sun4i-a10.c index f32366d9336e7..bd9a8782fec3d 100644 --- a/drivers/clk/sunxi-ng/ccu-sun4i-a10.c @@ -81932,6 +97664,688 @@ index 5319cd3804801..3bc55ab75314b 100644 init.parent_names = NULL; init.num_parents = 0; +diff --git a/drivers/clk/x86/Kconfig b/drivers/clk/x86/Kconfig +index 69642e15fcc1f..ced99e082e3dd 100644 +--- a/drivers/clk/x86/Kconfig ++++ b/drivers/clk/x86/Kconfig +@@ -1,8 +1,9 @@ + # SPDX-License-Identifier: GPL-2.0-only + config CLK_LGM_CGU + depends on OF && HAS_IOMEM && (X86 || COMPILE_TEST) ++ select MFD_SYSCON + select OF_EARLY_FLATTREE + bool "Clock driver for Lightning Mountain(LGM) platform" + help +- Clock Generation Unit(CGU) driver for Intel Lightning Mountain(LGM) +- network processor SoC. ++ Clock Generation Unit(CGU) driver for MaxLinear's x86 based ++ Lightning Mountain(LGM) network processor SoC. +diff --git a/drivers/clk/x86/clk-cgu-pll.c b/drivers/clk/x86/clk-cgu-pll.c +index 3179557b5f784..409dbf55f4cae 100644 +--- a/drivers/clk/x86/clk-cgu-pll.c ++++ b/drivers/clk/x86/clk-cgu-pll.c +@@ -1,8 +1,9 @@ + // SPDX-License-Identifier: GPL-2.0 + /* ++ * Copyright (C) 2020-2022 MaxLinear, Inc. + * Copyright (C) 2020 Intel Corporation. +- * Zhu YiXin <yixin.zhu@intel.com> +- * Rahul Tanwar <rahul.tanwar@intel.com> ++ * Zhu Yixin <yzhu@maxlinear.com> ++ * Rahul Tanwar <rtanwar@maxlinear.com> + */ + + #include <linux/clk-provider.h> +@@ -40,13 +41,10 @@ static unsigned long lgm_pll_recalc_rate(struct clk_hw *hw, unsigned long prate) + { + struct lgm_clk_pll *pll = to_lgm_clk_pll(hw); + unsigned int div, mult, frac; +- unsigned long flags; + +- spin_lock_irqsave(&pll->lock, flags); + mult = lgm_get_clk_val(pll->membase, PLL_REF_DIV(pll->reg), 0, 12); + div = lgm_get_clk_val(pll->membase, PLL_REF_DIV(pll->reg), 18, 6); + frac = lgm_get_clk_val(pll->membase, pll->reg, 2, 24); +- spin_unlock_irqrestore(&pll->lock, flags); + + if (pll->type == TYPE_LJPLL) + div *= 4; +@@ -57,12 +55,9 @@ static unsigned long lgm_pll_recalc_rate(struct clk_hw *hw, unsigned long prate) + static int lgm_pll_is_enabled(struct clk_hw *hw) + { + struct lgm_clk_pll *pll = to_lgm_clk_pll(hw); +- unsigned long flags; + unsigned int ret; + +- spin_lock_irqsave(&pll->lock, flags); + ret = lgm_get_clk_val(pll->membase, pll->reg, 0, 1); +- spin_unlock_irqrestore(&pll->lock, flags); + + return ret; + } +@@ -70,15 +65,13 @@ static int lgm_pll_is_enabled(struct clk_hw *hw) + static int lgm_pll_enable(struct clk_hw *hw) + { + struct lgm_clk_pll *pll = to_lgm_clk_pll(hw); +- unsigned long flags; + u32 val; + int ret; + +- spin_lock_irqsave(&pll->lock, flags); + lgm_set_clk_val(pll->membase, pll->reg, 0, 1, 1); +- ret = readl_poll_timeout_atomic(pll->membase + pll->reg, +- val, (val & 0x1), 1, 100); +- spin_unlock_irqrestore(&pll->lock, flags); ++ ret = regmap_read_poll_timeout_atomic(pll->membase, pll->reg, ++ val, (val & 0x1), 1, 100); ++ + + return ret; + } +@@ -86,11 +79,8 @@ static int lgm_pll_enable(struct clk_hw *hw) + static void lgm_pll_disable(struct clk_hw *hw) + { + struct lgm_clk_pll *pll = to_lgm_clk_pll(hw); +- unsigned long flags; + +- spin_lock_irqsave(&pll->lock, flags); + lgm_set_clk_val(pll->membase, pll->reg, 0, 1, 0); +- spin_unlock_irqrestore(&pll->lock, flags); + } + + static const struct clk_ops lgm_pll_ops = { +@@ -121,7 +111,6 @@ lgm_clk_register_pll(struct lgm_clk_provider *ctx, + return ERR_PTR(-ENOMEM); + + pll->membase = ctx->membase; +- pll->lock = ctx->lock; + pll->reg = list->reg; + pll->flags = list->flags; + pll->type = list->type; +diff --git a/drivers/clk/x86/clk-cgu.c b/drivers/clk/x86/clk-cgu.c +index 33de600e0c38e..89b53f280aee0 100644 +--- a/drivers/clk/x86/clk-cgu.c ++++ b/drivers/clk/x86/clk-cgu.c +@@ -1,8 +1,9 @@ + // SPDX-License-Identifier: GPL-2.0 + /* ++ * Copyright (C) 2020-2022 MaxLinear, Inc. + * Copyright (C) 2020 Intel Corporation. +- * Zhu YiXin <yixin.zhu@intel.com> +- * Rahul Tanwar <rahul.tanwar@intel.com> ++ * Zhu Yixin <yzhu@maxlinear.com> ++ * Rahul Tanwar <rtanwar@maxlinear.com> + */ + #include <linux/clk-provider.h> + #include <linux/device.h> +@@ -24,14 +25,10 @@ + static struct clk_hw *lgm_clk_register_fixed(struct lgm_clk_provider *ctx, + const struct lgm_clk_branch *list) + { +- unsigned long flags; + +- if (list->div_flags & CLOCK_FLAG_VAL_INIT) { +- spin_lock_irqsave(&ctx->lock, flags); ++ if (list->div_flags & CLOCK_FLAG_VAL_INIT) + lgm_set_clk_val(ctx->membase, list->div_off, list->div_shift, + list->div_width, list->div_val); +- spin_unlock_irqrestore(&ctx->lock, flags); +- } + + return clk_hw_register_fixed_rate(NULL, list->name, + list->parent_data[0].name, +@@ -41,33 +38,27 @@ static struct clk_hw *lgm_clk_register_fixed(struct lgm_clk_provider *ctx, + static u8 lgm_clk_mux_get_parent(struct clk_hw *hw) + { + struct lgm_clk_mux *mux = to_lgm_clk_mux(hw); +- unsigned long flags; + u32 val; + +- spin_lock_irqsave(&mux->lock, flags); + if (mux->flags & MUX_CLK_SW) + val = mux->reg; + else + val = lgm_get_clk_val(mux->membase, mux->reg, mux->shift, + mux->width); +- spin_unlock_irqrestore(&mux->lock, flags); + return clk_mux_val_to_index(hw, NULL, mux->flags, val); + } + + static int lgm_clk_mux_set_parent(struct clk_hw *hw, u8 index) + { + struct lgm_clk_mux *mux = to_lgm_clk_mux(hw); +- unsigned long flags; + u32 val; + + val = clk_mux_index_to_val(NULL, mux->flags, index); +- spin_lock_irqsave(&mux->lock, flags); + if (mux->flags & MUX_CLK_SW) + mux->reg = val; + else + lgm_set_clk_val(mux->membase, mux->reg, mux->shift, + mux->width, val); +- spin_unlock_irqrestore(&mux->lock, flags); + + return 0; + } +@@ -90,7 +81,7 @@ static struct clk_hw * + lgm_clk_register_mux(struct lgm_clk_provider *ctx, + const struct lgm_clk_branch *list) + { +- unsigned long flags, cflags = list->mux_flags; ++ unsigned long cflags = list->mux_flags; + struct device *dev = ctx->dev; + u8 shift = list->mux_shift; + u8 width = list->mux_width; +@@ -111,7 +102,6 @@ lgm_clk_register_mux(struct lgm_clk_provider *ctx, + init.num_parents = list->num_parents; + + mux->membase = ctx->membase; +- mux->lock = ctx->lock; + mux->reg = reg; + mux->shift = shift; + mux->width = width; +@@ -123,11 +113,8 @@ lgm_clk_register_mux(struct lgm_clk_provider *ctx, + if (ret) + return ERR_PTR(ret); + +- if (cflags & CLOCK_FLAG_VAL_INIT) { +- spin_lock_irqsave(&mux->lock, flags); ++ if (cflags & CLOCK_FLAG_VAL_INIT) + lgm_set_clk_val(mux->membase, reg, shift, width, list->mux_val); +- spin_unlock_irqrestore(&mux->lock, flags); +- } + + return hw; + } +@@ -136,13 +123,10 @@ static unsigned long + lgm_clk_divider_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) + { + struct lgm_clk_divider *divider = to_lgm_clk_divider(hw); +- unsigned long flags; + unsigned int val; + +- spin_lock_irqsave(÷r->lock, flags); + val = lgm_get_clk_val(divider->membase, divider->reg, + divider->shift, divider->width); +- spin_unlock_irqrestore(÷r->lock, flags); + + return divider_recalc_rate(hw, parent_rate, val, divider->table, + divider->flags, divider->width); +@@ -163,7 +147,6 @@ lgm_clk_divider_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long prate) + { + struct lgm_clk_divider *divider = to_lgm_clk_divider(hw); +- unsigned long flags; + int value; + + value = divider_get_val(rate, prate, divider->table, +@@ -171,10 +154,8 @@ lgm_clk_divider_set_rate(struct clk_hw *hw, unsigned long rate, + if (value < 0) + return value; + +- spin_lock_irqsave(÷r->lock, flags); + lgm_set_clk_val(divider->membase, divider->reg, + divider->shift, divider->width, value); +- spin_unlock_irqrestore(÷r->lock, flags); + + return 0; + } +@@ -182,12 +163,10 @@ lgm_clk_divider_set_rate(struct clk_hw *hw, unsigned long rate, + static int lgm_clk_divider_enable_disable(struct clk_hw *hw, int enable) + { + struct lgm_clk_divider *div = to_lgm_clk_divider(hw); +- unsigned long flags; + +- spin_lock_irqsave(&div->lock, flags); +- lgm_set_clk_val(div->membase, div->reg, div->shift_gate, +- div->width_gate, enable); +- spin_unlock_irqrestore(&div->lock, flags); ++ if (div->flags != DIV_CLK_NO_MASK) ++ lgm_set_clk_val(div->membase, div->reg, div->shift_gate, ++ div->width_gate, enable); + return 0; + } + +@@ -213,7 +192,7 @@ static struct clk_hw * + lgm_clk_register_divider(struct lgm_clk_provider *ctx, + const struct lgm_clk_branch *list) + { +- unsigned long flags, cflags = list->div_flags; ++ unsigned long cflags = list->div_flags; + struct device *dev = ctx->dev; + struct lgm_clk_divider *div; + struct clk_init_data init = {}; +@@ -236,7 +215,6 @@ lgm_clk_register_divider(struct lgm_clk_provider *ctx, + init.num_parents = 1; + + div->membase = ctx->membase; +- div->lock = ctx->lock; + div->reg = reg; + div->shift = shift; + div->width = width; +@@ -251,11 +229,8 @@ lgm_clk_register_divider(struct lgm_clk_provider *ctx, + if (ret) + return ERR_PTR(ret); + +- if (cflags & CLOCK_FLAG_VAL_INIT) { +- spin_lock_irqsave(&div->lock, flags); ++ if (cflags & CLOCK_FLAG_VAL_INIT) + lgm_set_clk_val(div->membase, reg, shift, width, list->div_val); +- spin_unlock_irqrestore(&div->lock, flags); +- } + + return hw; + } +@@ -264,7 +239,6 @@ static struct clk_hw * + lgm_clk_register_fixed_factor(struct lgm_clk_provider *ctx, + const struct lgm_clk_branch *list) + { +- unsigned long flags; + struct clk_hw *hw; + + hw = clk_hw_register_fixed_factor(ctx->dev, list->name, +@@ -273,12 +247,9 @@ lgm_clk_register_fixed_factor(struct lgm_clk_provider *ctx, + if (IS_ERR(hw)) + return ERR_CAST(hw); + +- if (list->div_flags & CLOCK_FLAG_VAL_INIT) { +- spin_lock_irqsave(&ctx->lock, flags); ++ if (list->div_flags & CLOCK_FLAG_VAL_INIT) + lgm_set_clk_val(ctx->membase, list->div_off, list->div_shift, + list->div_width, list->div_val); +- spin_unlock_irqrestore(&ctx->lock, flags); +- } + + return hw; + } +@@ -286,13 +257,10 @@ lgm_clk_register_fixed_factor(struct lgm_clk_provider *ctx, + static int lgm_clk_gate_enable(struct clk_hw *hw) + { + struct lgm_clk_gate *gate = to_lgm_clk_gate(hw); +- unsigned long flags; + unsigned int reg; + +- spin_lock_irqsave(&gate->lock, flags); + reg = GATE_HW_REG_EN(gate->reg); + lgm_set_clk_val(gate->membase, reg, gate->shift, 1, 1); +- spin_unlock_irqrestore(&gate->lock, flags); + + return 0; + } +@@ -300,25 +268,19 @@ static int lgm_clk_gate_enable(struct clk_hw *hw) + static void lgm_clk_gate_disable(struct clk_hw *hw) + { + struct lgm_clk_gate *gate = to_lgm_clk_gate(hw); +- unsigned long flags; + unsigned int reg; + +- spin_lock_irqsave(&gate->lock, flags); + reg = GATE_HW_REG_DIS(gate->reg); + lgm_set_clk_val(gate->membase, reg, gate->shift, 1, 1); +- spin_unlock_irqrestore(&gate->lock, flags); + } + + static int lgm_clk_gate_is_enabled(struct clk_hw *hw) + { + struct lgm_clk_gate *gate = to_lgm_clk_gate(hw); + unsigned int reg, ret; +- unsigned long flags; + +- spin_lock_irqsave(&gate->lock, flags); + reg = GATE_HW_REG_STAT(gate->reg); + ret = lgm_get_clk_val(gate->membase, reg, gate->shift, 1); +- spin_unlock_irqrestore(&gate->lock, flags); + + return ret; + } +@@ -333,7 +295,7 @@ static struct clk_hw * + lgm_clk_register_gate(struct lgm_clk_provider *ctx, + const struct lgm_clk_branch *list) + { +- unsigned long flags, cflags = list->gate_flags; ++ unsigned long cflags = list->gate_flags; + const char *pname = list->parent_data[0].name; + struct device *dev = ctx->dev; + u8 shift = list->gate_shift; +@@ -354,7 +316,6 @@ lgm_clk_register_gate(struct lgm_clk_provider *ctx, + init.num_parents = pname ? 1 : 0; + + gate->membase = ctx->membase; +- gate->lock = ctx->lock; + gate->reg = reg; + gate->shift = shift; + gate->flags = cflags; +@@ -366,9 +327,7 @@ lgm_clk_register_gate(struct lgm_clk_provider *ctx, + return ERR_PTR(ret); + + if (cflags & CLOCK_FLAG_VAL_INIT) { +- spin_lock_irqsave(&gate->lock, flags); + lgm_set_clk_val(gate->membase, reg, shift, 1, list->gate_val); +- spin_unlock_irqrestore(&gate->lock, flags); + } + + return hw; +@@ -396,8 +355,22 @@ int lgm_clk_register_branches(struct lgm_clk_provider *ctx, + hw = lgm_clk_register_fixed_factor(ctx, list); + break; + case CLK_TYPE_GATE: +- hw = lgm_clk_register_gate(ctx, list); ++ if (list->gate_flags & GATE_CLK_HW) { ++ hw = lgm_clk_register_gate(ctx, list); ++ } else { ++ /* ++ * GATE_CLKs can be controlled either from ++ * CGU clk driver i.e. this driver or directly ++ * from power management driver/daemon. It is ++ * dependent on the power policy/profile requirements ++ * of the end product. To override control of gate ++ * clks from this driver, provide NULL for this index ++ * of gate clk provider. ++ */ ++ hw = NULL; ++ } + break; ++ + default: + dev_err(ctx->dev, "invalid clk type\n"); + return -EINVAL; +@@ -443,24 +416,18 @@ lgm_clk_ddiv_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) + static int lgm_clk_ddiv_enable(struct clk_hw *hw) + { + struct lgm_clk_ddiv *ddiv = to_lgm_clk_ddiv(hw); +- unsigned long flags; + +- spin_lock_irqsave(&ddiv->lock, flags); + lgm_set_clk_val(ddiv->membase, ddiv->reg, ddiv->shift_gate, + ddiv->width_gate, 1); +- spin_unlock_irqrestore(&ddiv->lock, flags); + return 0; + } + + static void lgm_clk_ddiv_disable(struct clk_hw *hw) + { + struct lgm_clk_ddiv *ddiv = to_lgm_clk_ddiv(hw); +- unsigned long flags; + +- spin_lock_irqsave(&ddiv->lock, flags); + lgm_set_clk_val(ddiv->membase, ddiv->reg, ddiv->shift_gate, + ddiv->width_gate, 0); +- spin_unlock_irqrestore(&ddiv->lock, flags); + } + + static int +@@ -497,32 +464,25 @@ lgm_clk_ddiv_set_rate(struct clk_hw *hw, unsigned long rate, + { + struct lgm_clk_ddiv *ddiv = to_lgm_clk_ddiv(hw); + u32 div, ddiv1, ddiv2; +- unsigned long flags; + + div = DIV_ROUND_CLOSEST_ULL((u64)prate, rate); + +- spin_lock_irqsave(&ddiv->lock, flags); + if (lgm_get_clk_val(ddiv->membase, ddiv->reg, ddiv->shift2, 1)) { + div = DIV_ROUND_CLOSEST_ULL((u64)div, 5); + div = div * 2; + } + +- if (div <= 0) { +- spin_unlock_irqrestore(&ddiv->lock, flags); ++ if (div <= 0) + return -EINVAL; +- } + +- if (lgm_clk_get_ddiv_val(div, &ddiv1, &ddiv2)) { +- spin_unlock_irqrestore(&ddiv->lock, flags); ++ if (lgm_clk_get_ddiv_val(div, &ddiv1, &ddiv2)) + return -EINVAL; +- } + + lgm_set_clk_val(ddiv->membase, ddiv->reg, ddiv->shift0, ddiv->width0, + ddiv1 - 1); + + lgm_set_clk_val(ddiv->membase, ddiv->reg, ddiv->shift1, ddiv->width1, + ddiv2 - 1); +- spin_unlock_irqrestore(&ddiv->lock, flags); + + return 0; + } +@@ -533,18 +493,15 @@ lgm_clk_ddiv_round_rate(struct clk_hw *hw, unsigned long rate, + { + struct lgm_clk_ddiv *ddiv = to_lgm_clk_ddiv(hw); + u32 div, ddiv1, ddiv2; +- unsigned long flags; + u64 rate64; + + div = DIV_ROUND_CLOSEST_ULL((u64)*prate, rate); + + /* if predivide bit is enabled, modify div by factor of 2.5 */ +- spin_lock_irqsave(&ddiv->lock, flags); + if (lgm_get_clk_val(ddiv->membase, ddiv->reg, ddiv->shift2, 1)) { + div = div * 2; + div = DIV_ROUND_CLOSEST_ULL((u64)div, 5); + } +- spin_unlock_irqrestore(&ddiv->lock, flags); + + if (div <= 0) + return *prate; +@@ -558,12 +515,10 @@ lgm_clk_ddiv_round_rate(struct clk_hw *hw, unsigned long rate, + do_div(rate64, ddiv2); + + /* if predivide bit is enabled, modify rounded rate by factor of 2.5 */ +- spin_lock_irqsave(&ddiv->lock, flags); + if (lgm_get_clk_val(ddiv->membase, ddiv->reg, ddiv->shift2, 1)) { + rate64 = rate64 * 2; + rate64 = DIV_ROUND_CLOSEST_ULL(rate64, 5); + } +- spin_unlock_irqrestore(&ddiv->lock, flags); + + return rate64; + } +@@ -600,7 +555,6 @@ int lgm_clk_register_ddiv(struct lgm_clk_provider *ctx, + init.num_parents = 1; + + ddiv->membase = ctx->membase; +- ddiv->lock = ctx->lock; + ddiv->reg = list->reg; + ddiv->shift0 = list->shift0; + ddiv->width0 = list->width0; +diff --git a/drivers/clk/x86/clk-cgu.h b/drivers/clk/x86/clk-cgu.h +index 4e22bfb223128..bcaf8aec94e5d 100644 +--- a/drivers/clk/x86/clk-cgu.h ++++ b/drivers/clk/x86/clk-cgu.h +@@ -1,28 +1,28 @@ + /* SPDX-License-Identifier: GPL-2.0 */ + /* +- * Copyright(c) 2020 Intel Corporation. +- * Zhu YiXin <yixin.zhu@intel.com> +- * Rahul Tanwar <rahul.tanwar@intel.com> ++ * Copyright (C) 2020-2022 MaxLinear, Inc. ++ * Copyright (C) 2020 Intel Corporation. ++ * Zhu Yixin <yzhu@maxlinear.com> ++ * Rahul Tanwar <rtanwar@maxlinear.com> + */ + + #ifndef __CLK_CGU_H + #define __CLK_CGU_H + +-#include <linux/io.h> ++#include <linux/regmap.h> + + struct lgm_clk_mux { + struct clk_hw hw; +- void __iomem *membase; ++ struct regmap *membase; + unsigned int reg; + u8 shift; + u8 width; + unsigned long flags; +- spinlock_t lock; + }; + + struct lgm_clk_divider { + struct clk_hw hw; +- void __iomem *membase; ++ struct regmap *membase; + unsigned int reg; + u8 shift; + u8 width; +@@ -30,12 +30,11 @@ struct lgm_clk_divider { + u8 width_gate; + unsigned long flags; + const struct clk_div_table *table; +- spinlock_t lock; + }; + + struct lgm_clk_ddiv { + struct clk_hw hw; +- void __iomem *membase; ++ struct regmap *membase; + unsigned int reg; + u8 shift0; + u8 width0; +@@ -48,16 +47,14 @@ struct lgm_clk_ddiv { + unsigned int mult; + unsigned int div; + unsigned long flags; +- spinlock_t lock; + }; + + struct lgm_clk_gate { + struct clk_hw hw; +- void __iomem *membase; ++ struct regmap *membase; + unsigned int reg; + u8 shift; + unsigned long flags; +- spinlock_t lock; + }; + + enum lgm_clk_type { +@@ -77,11 +74,10 @@ enum lgm_clk_type { + * @clk_data: array of hw clocks and clk number. + */ + struct lgm_clk_provider { +- void __iomem *membase; ++ struct regmap *membase; + struct device_node *np; + struct device *dev; + struct clk_hw_onecell_data clk_data; +- spinlock_t lock; + }; + + enum pll_type { +@@ -92,11 +88,10 @@ enum pll_type { + + struct lgm_clk_pll { + struct clk_hw hw; +- void __iomem *membase; ++ struct regmap *membase; + unsigned int reg; + unsigned long flags; + enum pll_type type; +- spinlock_t lock; + }; + + /** +@@ -202,6 +197,8 @@ struct lgm_clk_branch { + /* clock flags definition */ + #define CLOCK_FLAG_VAL_INIT BIT(16) + #define MUX_CLK_SW BIT(17) ++#define GATE_CLK_HW BIT(18) ++#define DIV_CLK_NO_MASK BIT(19) + + #define LGM_MUX(_id, _name, _pdata, _f, _reg, \ + _shift, _width, _cf, _v) \ +@@ -300,29 +297,32 @@ struct lgm_clk_branch { + .div = _d, \ + } + +-static inline void lgm_set_clk_val(void __iomem *membase, u32 reg, ++static inline void lgm_set_clk_val(struct regmap *membase, u32 reg, + u8 shift, u8 width, u32 set_val) + { + u32 mask = (GENMASK(width - 1, 0) << shift); +- u32 regval; + +- regval = readl(membase + reg); +- regval = (regval & ~mask) | ((set_val << shift) & mask); +- writel(regval, membase + reg); ++ regmap_update_bits(membase, reg, mask, set_val << shift); + } + +-static inline u32 lgm_get_clk_val(void __iomem *membase, u32 reg, ++static inline u32 lgm_get_clk_val(struct regmap *membase, u32 reg, + u8 shift, u8 width) + { + u32 mask = (GENMASK(width - 1, 0) << shift); + u32 val; + +- val = readl(membase + reg); ++ if (regmap_read(membase, reg, &val)) { ++ WARN_ONCE(1, "Failed to read clk reg: 0x%x\n", reg); ++ return 0; ++ } ++ + val = (val & mask) >> shift; + + return val; + } + ++ ++ + int lgm_clk_register_branches(struct lgm_clk_provider *ctx, + const struct lgm_clk_branch *list, + unsigned int nr_clk); +diff --git a/drivers/clk/x86/clk-lgm.c b/drivers/clk/x86/clk-lgm.c +index 020f4e83a5ccb..f69455dd1c980 100644 +--- a/drivers/clk/x86/clk-lgm.c ++++ b/drivers/clk/x86/clk-lgm.c +@@ -1,10 +1,12 @@ + // SPDX-License-Identifier: GPL-2.0 + /* ++ * Copyright (C) 2020-2022 MaxLinear, Inc. + * Copyright (C) 2020 Intel Corporation. +- * Zhu YiXin <yixin.zhu@intel.com> +- * Rahul Tanwar <rahul.tanwar@intel.com> ++ * Zhu Yixin <yzhu@maxlinear.com> ++ * Rahul Tanwar <rtanwar@maxlinear.com> + */ + #include <linux/clk-provider.h> ++#include <linux/mfd/syscon.h> + #include <linux/of.h> + #include <linux/platform_device.h> + #include <dt-bindings/clock/intel,lgm-clk.h> +@@ -253,8 +255,8 @@ static const struct lgm_clk_branch lgm_branch_clks[] = { + LGM_FIXED(LGM_CLK_SLIC, "slic", NULL, 0, CGU_IF_CLK1, + 8, 2, CLOCK_FLAG_VAL_INIT, 8192000, 2), + LGM_FIXED(LGM_CLK_DOCSIS, "v_docsis", NULL, 0, 0, 0, 0, 0, 16000000, 0), +- LGM_DIV(LGM_CLK_DCL, "dcl", "v_ifclk", 0, CGU_PCMCR, +- 25, 3, 0, 0, 0, 0, dcl_div), ++ LGM_DIV(LGM_CLK_DCL, "dcl", "v_ifclk", CLK_SET_RATE_PARENT, CGU_PCMCR, ++ 25, 3, 0, 0, DIV_CLK_NO_MASK, 0, dcl_div), + LGM_MUX(LGM_CLK_PCM, "pcm", pcm_p, 0, CGU_C55_PCMCR, + 0, 1, CLK_MUX_ROUND_CLOSEST, 0), + LGM_FIXED_FACTOR(LGM_CLK_DDR_PHY, "ddr_phy", "ddr", +@@ -433,13 +435,15 @@ static int lgm_cgu_probe(struct platform_device *pdev) + + ctx->clk_data.num = CLK_NR_CLKS; + +- ctx->membase = devm_platform_ioremap_resource(pdev, 0); +- if (IS_ERR(ctx->membase)) ++ ctx->membase = syscon_node_to_regmap(np); ++ if (IS_ERR(ctx->membase)) { ++ dev_err(dev, "Failed to get clk CGU iomem\n"); + return PTR_ERR(ctx->membase); ++ } ++ + + ctx->np = np; + ctx->dev = dev; +- spin_lock_init(&ctx->lock); + + ret = lgm_clk_register_plls(ctx, lgm_pll_clks, + ARRAY_SIZE(lgm_pll_clks)); diff --git a/drivers/clk/zynqmp/clkc.c b/drivers/clk/zynqmp/clkc.c index eb25303eefed4..2c9da6623b84e 100644 --- a/drivers/clk/zynqmp/clkc.c @@ -82149,6 +98563,168 @@ index ff188ab68496e..bb47610bbd1c4 100644 hv_setup_sched_clock(read_hv_sched_clock_msr); } -EXPORT_SYMBOL_GPL(hv_init_clocksource); +diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c +index dd0956ad969c1..d35548aa026fb 100644 +--- a/drivers/clocksource/sh_cmt.c ++++ b/drivers/clocksource/sh_cmt.c +@@ -13,6 +13,7 @@ + #include <linux/init.h> + #include <linux/interrupt.h> + #include <linux/io.h> ++#include <linux/iopoll.h> + #include <linux/ioport.h> + #include <linux/irq.h> + #include <linux/module.h> +@@ -116,6 +117,7 @@ struct sh_cmt_device { + void __iomem *mapbase; + struct clk *clk; + unsigned long rate; ++ unsigned int reg_delay; + + raw_spinlock_t lock; /* Protect the shared start/stop register */ + +@@ -247,10 +249,17 @@ static inline u32 sh_cmt_read_cmstr(struct sh_cmt_channel *ch) + + static inline void sh_cmt_write_cmstr(struct sh_cmt_channel *ch, u32 value) + { +- if (ch->iostart) +- ch->cmt->info->write_control(ch->iostart, 0, value); +- else +- ch->cmt->info->write_control(ch->cmt->mapbase, 0, value); ++ u32 old_value = sh_cmt_read_cmstr(ch); ++ ++ if (value != old_value) { ++ if (ch->iostart) { ++ ch->cmt->info->write_control(ch->iostart, 0, value); ++ udelay(ch->cmt->reg_delay); ++ } else { ++ ch->cmt->info->write_control(ch->cmt->mapbase, 0, value); ++ udelay(ch->cmt->reg_delay); ++ } ++ } + } + + static inline u32 sh_cmt_read_cmcsr(struct sh_cmt_channel *ch) +@@ -260,7 +269,12 @@ static inline u32 sh_cmt_read_cmcsr(struct sh_cmt_channel *ch) + + static inline void sh_cmt_write_cmcsr(struct sh_cmt_channel *ch, u32 value) + { +- ch->cmt->info->write_control(ch->ioctrl, CMCSR, value); ++ u32 old_value = sh_cmt_read_cmcsr(ch); ++ ++ if (value != old_value) { ++ ch->cmt->info->write_control(ch->ioctrl, CMCSR, value); ++ udelay(ch->cmt->reg_delay); ++ } + } + + static inline u32 sh_cmt_read_cmcnt(struct sh_cmt_channel *ch) +@@ -268,14 +282,33 @@ static inline u32 sh_cmt_read_cmcnt(struct sh_cmt_channel *ch) + return ch->cmt->info->read_count(ch->ioctrl, CMCNT); + } + +-static inline void sh_cmt_write_cmcnt(struct sh_cmt_channel *ch, u32 value) ++static inline int sh_cmt_write_cmcnt(struct sh_cmt_channel *ch, u32 value) + { ++ /* Tests showed that we need to wait 3 clocks here */ ++ unsigned int cmcnt_delay = DIV_ROUND_UP(3 * ch->cmt->reg_delay, 2); ++ u32 reg; ++ ++ if (ch->cmt->info->model > SH_CMT_16BIT) { ++ int ret = read_poll_timeout_atomic(sh_cmt_read_cmcsr, reg, ++ !(reg & SH_CMT32_CMCSR_WRFLG), ++ 1, cmcnt_delay, false, ch); ++ if (ret < 0) ++ return ret; ++ } ++ + ch->cmt->info->write_count(ch->ioctrl, CMCNT, value); ++ udelay(cmcnt_delay); ++ return 0; + } + + static inline void sh_cmt_write_cmcor(struct sh_cmt_channel *ch, u32 value) + { +- ch->cmt->info->write_count(ch->ioctrl, CMCOR, value); ++ u32 old_value = ch->cmt->info->read_count(ch->ioctrl, CMCOR); ++ ++ if (value != old_value) { ++ ch->cmt->info->write_count(ch->ioctrl, CMCOR, value); ++ udelay(ch->cmt->reg_delay); ++ } + } + + static u32 sh_cmt_get_counter(struct sh_cmt_channel *ch, u32 *has_wrapped) +@@ -319,7 +352,7 @@ static void sh_cmt_start_stop_ch(struct sh_cmt_channel *ch, int start) + + static int sh_cmt_enable(struct sh_cmt_channel *ch) + { +- int k, ret; ++ int ret; + + dev_pm_syscore_device(&ch->cmt->pdev->dev, true); + +@@ -347,26 +380,9 @@ static int sh_cmt_enable(struct sh_cmt_channel *ch) + } + + sh_cmt_write_cmcor(ch, 0xffffffff); +- sh_cmt_write_cmcnt(ch, 0); +- +- /* +- * According to the sh73a0 user's manual, as CMCNT can be operated +- * only by the RCLK (Pseudo 32 kHz), there's one restriction on +- * modifying CMCNT register; two RCLK cycles are necessary before +- * this register is either read or any modification of the value +- * it holds is reflected in the LSI's actual operation. +- * +- * While at it, we're supposed to clear out the CMCNT as of this +- * moment, so make sure it's processed properly here. This will +- * take RCLKx2 at maximum. +- */ +- for (k = 0; k < 100; k++) { +- if (!sh_cmt_read_cmcnt(ch)) +- break; +- udelay(1); +- } ++ ret = sh_cmt_write_cmcnt(ch, 0); + +- if (sh_cmt_read_cmcnt(ch)) { ++ if (ret || sh_cmt_read_cmcnt(ch)) { + dev_err(&ch->cmt->pdev->dev, "ch%u: cannot clear CMCNT\n", + ch->index); + ret = -ETIMEDOUT; +@@ -987,8 +1003,8 @@ MODULE_DEVICE_TABLE(of, sh_cmt_of_table); + + static int sh_cmt_setup(struct sh_cmt_device *cmt, struct platform_device *pdev) + { +- unsigned int mask; +- unsigned int i; ++ unsigned int mask, i; ++ unsigned long rate; + int ret; + + cmt->pdev = pdev; +@@ -1024,10 +1040,16 @@ static int sh_cmt_setup(struct sh_cmt_device *cmt, struct platform_device *pdev) + if (ret < 0) + goto err_clk_unprepare; + +- if (cmt->info->width == 16) +- cmt->rate = clk_get_rate(cmt->clk) / 512; +- else +- cmt->rate = clk_get_rate(cmt->clk) / 8; ++ rate = clk_get_rate(cmt->clk); ++ if (!rate) { ++ ret = -EINVAL; ++ goto err_clk_disable; ++ } ++ ++ /* We shall wait 2 input clks after register writes */ ++ if (cmt->info->model >= SH_CMT_48BIT) ++ cmt->reg_delay = DIV_ROUND_UP(2UL * USEC_PER_SEC, rate); ++ cmt->rate = rate / (cmt->info->width == 16 ? 512 : 8); + + /* Map the memory resource(s). */ + ret = sh_cmt_map_memory(cmt); diff --git a/drivers/clocksource/timer-ixp4xx.c b/drivers/clocksource/timer-ixp4xx.c index cbb184953510b..b8e92991c4719 100644 --- a/drivers/clocksource/timer-ixp4xx.c @@ -82204,19 +98780,6 @@ index 56c0cc32d0ac6..d514b44e67dd1 100644 ret = -EINVAL; goto err_iomap; } -diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c -index c51c5ed15aa75..0e7748df4be30 100644 ---- a/drivers/clocksource/timer-riscv.c -+++ b/drivers/clocksource/timer-riscv.c -@@ -32,7 +32,7 @@ static int riscv_clock_next_event(unsigned long delta, - static unsigned int riscv_clock_event_irq; - static DEFINE_PER_CPU(struct clock_event_device, riscv_clock_event) = { - .name = "riscv_timer_clockevent", -- .features = CLOCK_EVT_FEAT_ONESHOT, -+ .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP, - .rating = 100, - .set_next_event = riscv_clock_next_event, - }; diff --git a/drivers/clocksource/timer-sp804.c b/drivers/clocksource/timer-sp804.c index 401d592e85f5a..e6a87f4af2b50 100644 --- a/drivers/clocksource/timer-sp804.c @@ -82246,7 +98809,7 @@ index 401d592e85f5a..e6a87f4af2b50 100644 if (IS_ERR(clk1)) clk1 = NULL; diff --git a/drivers/clocksource/timer-ti-dm-systimer.c b/drivers/clocksource/timer-ti-dm-systimer.c -index b6f97960d8ee0..2737407ff0698 100644 +index b6f97960d8ee0..632523c1232f6 100644 --- a/drivers/clocksource/timer-ti-dm-systimer.c +++ b/drivers/clocksource/timer-ti-dm-systimer.c @@ -241,8 +241,7 @@ static void __init dmtimer_systimer_assign_alwon(void) @@ -82259,7 +98822,19 @@ index b6f97960d8ee0..2737407ff0698 100644 quirk_unreliable_oscillator = true; counter_32k = -ENODEV; } -@@ -695,9 +694,9 @@ static int __init dmtimer_percpu_quirk_init(struct device_node *np, u32 pa) +@@ -346,8 +345,10 @@ static int __init dmtimer_systimer_init_clock(struct dmtimer_systimer *t, + return error; + + r = clk_get_rate(clock); +- if (!r) ++ if (!r) { ++ clk_disable_unprepare(clock); + return -ENODEV; ++ } + + if (is_ick) + t->ick = clock; +@@ -695,9 +696,9 @@ static int __init dmtimer_percpu_quirk_init(struct device_node *np, u32 pa) return 0; } @@ -82271,6 +98846,19 @@ index b6f97960d8ee0..2737407ff0698 100644 return dmtimer_percpu_timer_init(np, 1); return 0; +diff --git a/drivers/comedi/drivers/adv_pci1760.c b/drivers/comedi/drivers/adv_pci1760.c +index 6de8ab97d346c..d6934b6c436d1 100644 +--- a/drivers/comedi/drivers/adv_pci1760.c ++++ b/drivers/comedi/drivers/adv_pci1760.c +@@ -59,7 +59,7 @@ + #define PCI1760_CMD_CLR_IMB2 0x00 /* Clears IMB2 */ + #define PCI1760_CMD_SET_DO 0x01 /* Set output state */ + #define PCI1760_CMD_GET_DO 0x02 /* Read output status */ +-#define PCI1760_CMD_GET_STATUS 0x03 /* Read current status */ ++#define PCI1760_CMD_GET_STATUS 0x07 /* Read current status */ + #define PCI1760_CMD_GET_FW_VER 0x0e /* Read firmware version */ + #define PCI1760_CMD_GET_HW_VER 0x0f /* Read hardware version */ + #define PCI1760_CMD_SET_PWM_HI(x) (0x10 + (x) * 2) /* Set "hi" period */ diff --git a/drivers/comedi/drivers/dt9812.c b/drivers/comedi/drivers/dt9812.c index 634f57730c1e0..704b04d2980d3 100644 --- a/drivers/comedi/drivers/dt9812.c @@ -82558,6 +99146,95 @@ index 9f920819cd742..9a1d146b7ebb2 100644 devpriv->usb_tx_buf = kzalloc(size, GFP_KERNEL); if (!devpriv->usb_tx_buf) return -ENOMEM; +diff --git a/drivers/counter/microchip-tcb-capture.c b/drivers/counter/microchip-tcb-capture.c +index 1aa70b9c48330..22563dcded751 100644 +--- a/drivers/counter/microchip-tcb-capture.c ++++ b/drivers/counter/microchip-tcb-capture.c +@@ -29,7 +29,6 @@ struct mchp_tc_data { + int qdec_mode; + int num_channels; + int channel[2]; +- bool trig_inverted; + }; + + enum mchp_tc_count_function { +@@ -166,7 +165,7 @@ static int mchp_tc_count_signal_read(struct counter_device *counter, + + regmap_read(priv->regmap, ATMEL_TC_REG(priv->channel[0], SR), &sr); + +- if (priv->trig_inverted) ++ if (signal->id == 1) + sigstatus = (sr & ATMEL_TC_MTIOB); + else + sigstatus = (sr & ATMEL_TC_MTIOA); +@@ -184,6 +183,17 @@ static int mchp_tc_count_action_get(struct counter_device *counter, + struct mchp_tc_data *const priv = counter->priv; + u32 cmr; + ++ if (priv->qdec_mode) { ++ *action = COUNTER_SYNAPSE_ACTION_BOTH_EDGES; ++ return 0; ++ } ++ ++ /* Only TIOA signal is evaluated in non-QDEC mode */ ++ if (synapse->signal->id != 0) { ++ *action = COUNTER_SYNAPSE_ACTION_NONE; ++ return 0; ++ } ++ + regmap_read(priv->regmap, ATMEL_TC_REG(priv->channel[0], CMR), &cmr); + + switch (cmr & ATMEL_TC_ETRGEDG) { +@@ -212,8 +222,8 @@ static int mchp_tc_count_action_set(struct counter_device *counter, + struct mchp_tc_data *const priv = counter->priv; + u32 edge = ATMEL_TC_ETRGEDG_NONE; + +- /* QDEC mode is rising edge only */ +- if (priv->qdec_mode) ++ /* QDEC mode is rising edge only; only TIOA handled in non-QDEC mode */ ++ if (priv->qdec_mode || synapse->signal->id != 0) + return -EINVAL; + + switch (action) { +diff --git a/drivers/counter/stm32-lptimer-cnt.c b/drivers/counter/stm32-lptimer-cnt.c +index 13656957c45f1..fa7f86cf0ea32 100644 +--- a/drivers/counter/stm32-lptimer-cnt.c ++++ b/drivers/counter/stm32-lptimer-cnt.c +@@ -69,7 +69,7 @@ static int stm32_lptim_set_enable_state(struct stm32_lptim_cnt *priv, + + /* ensure CMP & ARR registers are properly written */ + ret = regmap_read_poll_timeout(priv->regmap, STM32_LPTIM_ISR, val, +- (val & STM32_LPTIM_CMPOK_ARROK), ++ (val & STM32_LPTIM_CMPOK_ARROK) == STM32_LPTIM_CMPOK_ARROK, + 100, 1000); + if (ret) + return ret; +diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c +index d0b10baf039ab..151771129c7ba 100644 +--- a/drivers/cpufreq/amd_freq_sensitivity.c ++++ b/drivers/cpufreq/amd_freq_sensitivity.c +@@ -124,6 +124,8 @@ static int __init amd_freq_sensitivity_init(void) + if (!pcidev) { + if (!boot_cpu_has(X86_FEATURE_PROC_FEEDBACK)) + return -ENODEV; ++ } else { ++ pci_dev_put(pcidev); + } + + if (rdmsrl_safe(MSR_AMD64_FREQ_SENSITIVITY_ACTUAL, &val)) +diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c +index c10fc33b29b18..b74289a95a171 100644 +--- a/drivers/cpufreq/armada-37xx-cpufreq.c ++++ b/drivers/cpufreq/armada-37xx-cpufreq.c +@@ -445,7 +445,7 @@ static int __init armada37xx_cpufreq_driver_init(void) + return -ENODEV; + } + +- clk = clk_get(cpu_dev, 0); ++ clk = clk_get(cpu_dev, NULL); + if (IS_ERR(clk)) { + dev_err(cpu_dev, "Cannot get clock for CPU0\n"); + return PTR_ERR(clk); diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index d4c27022b9c9b..e0ff09d66c96b 100644 --- a/drivers/cpufreq/cppc_cpufreq.c @@ -82644,8 +99321,28 @@ index d4c27022b9c9b..e0ff09d66c96b 100644 } static int cppc_cpufreq_set_target(struct cpufreq_policy *policy, +diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c +index ca1d103ec4492..e1b5975c7daa1 100644 +--- a/drivers/cpufreq/cpufreq-dt-platdev.c ++++ b/drivers/cpufreq/cpufreq-dt-platdev.c +@@ -133,6 +133,7 @@ static const struct of_device_id blocklist[] __initconst = { + { .compatible = "nvidia,tegra30", }, + { .compatible = "nvidia,tegra124", }, + { .compatible = "nvidia,tegra210", }, ++ { .compatible = "nvidia,tegra234", }, + + { .compatible = "qcom,apq8096", }, + { .compatible = "qcom,msm8996", }, +@@ -143,6 +144,7 @@ static const struct of_device_id blocklist[] __initconst = { + { .compatible = "qcom,sc8180x", }, + { .compatible = "qcom,sdm845", }, + { .compatible = "qcom,sm6350", }, ++ { .compatible = "qcom,sm6375", }, + { .compatible = "qcom,sm8150", }, + { .compatible = "qcom,sm8250", }, + { .compatible = "qcom,sm8350", }, diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c -index 5782b15a8caad..799431d287ee8 100644 +index 5782b15a8caad..b998b50839534 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -28,6 +28,7 @@ @@ -82678,6 +99375,22 @@ index 5782b15a8caad..799431d287ee8 100644 if (unlikely(!dev)) return; +@@ -1212,6 +1212,7 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) + if (!zalloc_cpumask_var(&policy->real_cpus, GFP_KERNEL)) + goto err_free_rcpumask; + ++ init_completion(&policy->kobj_unregister); + ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, + cpufreq_global_kobject, "policy%u", cpu); + if (ret) { +@@ -1250,7 +1251,6 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) + init_rwsem(&policy->rwsem); + spin_lock_init(&policy->transition_lock); + init_waitqueue_head(&policy->transition_wait); +- init_completion(&policy->kobj_unregister); + INIT_WORK(&policy->update, handle_update); + + policy->cpu = cpu; @@ -1391,7 +1391,7 @@ static int cpufreq_online(unsigned int cpu) if (new_policy) { for_each_cpu(j, policy->related_cpus) { @@ -82747,11 +99460,114 @@ index 5782b15a8caad..799431d287ee8 100644 trace_cpu_frequency_limits(policy); policy->cached_target_freq = UINT_MAX; +diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c +index 63f7c219062b9..55c80319d2684 100644 +--- a/drivers/cpufreq/cpufreq_governor.c ++++ b/drivers/cpufreq/cpufreq_governor.c +@@ -388,6 +388,15 @@ static void free_policy_dbs_info(struct policy_dbs_info *policy_dbs, + gov->free(policy_dbs); + } + ++static void cpufreq_dbs_data_release(struct kobject *kobj) ++{ ++ struct dbs_data *dbs_data = to_dbs_data(to_gov_attr_set(kobj)); ++ struct dbs_governor *gov = dbs_data->gov; ++ ++ gov->exit(dbs_data); ++ kfree(dbs_data); ++} ++ + int cpufreq_dbs_governor_init(struct cpufreq_policy *policy) + { + struct dbs_governor *gov = dbs_governor_of(policy); +@@ -425,6 +434,7 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy) + goto free_policy_dbs_info; + } + ++ dbs_data->gov = gov; + gov_attr_set_init(&dbs_data->attr_set, &policy_dbs->list); + + ret = gov->init(dbs_data); +@@ -447,6 +457,7 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy) + policy->governor_data = policy_dbs; + + gov->kobj_type.sysfs_ops = &governor_sysfs_ops; ++ gov->kobj_type.release = cpufreq_dbs_data_release; + ret = kobject_init_and_add(&dbs_data->attr_set.kobj, &gov->kobj_type, + get_governor_parent_kobj(policy), + "%s", gov->gov.name); +@@ -488,13 +499,8 @@ void cpufreq_dbs_governor_exit(struct cpufreq_policy *policy) + + policy->governor_data = NULL; + +- if (!count) { +- if (!have_governor_per_policy()) +- gov->gdbs_data = NULL; +- +- gov->exit(dbs_data); +- kfree(dbs_data); +- } ++ if (!count && !have_governor_per_policy()) ++ gov->gdbs_data = NULL; + + free_policy_dbs_info(policy_dbs, gov); + +diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h +index bab8e61403771..a6de26318abb8 100644 +--- a/drivers/cpufreq/cpufreq_governor.h ++++ b/drivers/cpufreq/cpufreq_governor.h +@@ -37,6 +37,7 @@ enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE}; + /* Governor demand based switching data (per-policy or global). */ + struct dbs_data { + struct gov_attr_set attr_set; ++ struct dbs_governor *gov; + void *tuners; + unsigned int ignore_nice_load; + unsigned int sampling_rate; +diff --git a/drivers/cpufreq/cpufreq_governor_attr_set.c b/drivers/cpufreq/cpufreq_governor_attr_set.c +index a6f365b9cc1ad..771770ea0ed0b 100644 +--- a/drivers/cpufreq/cpufreq_governor_attr_set.c ++++ b/drivers/cpufreq/cpufreq_governor_attr_set.c +@@ -8,11 +8,6 @@ + + #include "cpufreq_governor.h" + +-static inline struct gov_attr_set *to_gov_attr_set(struct kobject *kobj) +-{ +- return container_of(kobj, struct gov_attr_set, kobj); +-} +- + static inline struct governor_attr *to_gov_attr(struct attribute *attr) + { + return container_of(attr, struct governor_attr, attr); diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c -index 8c176b7dae415..c57229c108a73 100644 +index 8c176b7dae415..eee74a2fe3174 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c -@@ -335,6 +335,8 @@ static void intel_pstste_sched_itmt_work_fn(struct work_struct *work) +@@ -27,6 +27,7 @@ + #include <linux/pm_qos.h> + #include <trace/events/power.h> + ++#include <asm/cpu.h> + #include <asm/div64.h> + #include <asm/msr.h> + #include <asm/cpu_device_id.h> +@@ -277,10 +278,10 @@ static struct cpudata **all_cpu_data; + * structure is used to store those callbacks. + */ + struct pstate_funcs { +- int (*get_max)(void); +- int (*get_max_physical)(void); +- int (*get_min)(void); +- int (*get_turbo)(void); ++ int (*get_max)(int cpu); ++ int (*get_max_physical)(int cpu); ++ int (*get_min)(int cpu); ++ int (*get_turbo)(int cpu); + int (*get_scaling)(void); + int (*get_cpu_scaling)(int cpu); + int (*get_aperf_mperf_shift)(void); +@@ -335,6 +336,8 @@ static void intel_pstste_sched_itmt_work_fn(struct work_struct *work) static DECLARE_WORK(sched_itmt_work, intel_pstste_sched_itmt_work_fn); @@ -82760,7 +99576,7 @@ index 8c176b7dae415..c57229c108a73 100644 static void intel_pstate_set_itmt_prio(int cpu) { struct cppc_perf_caps cppc_perf; -@@ -345,6 +347,14 @@ static void intel_pstate_set_itmt_prio(int cpu) +@@ -345,6 +348,14 @@ static void intel_pstate_set_itmt_prio(int cpu) if (ret) return; @@ -82775,17 +99591,62 @@ index 8c176b7dae415..c57229c108a73 100644 /* * The priorities can be set regardless of whether or not * sched_set_itmt_support(true) has been called and it is valid to -@@ -537,7 +547,8 @@ static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu) - * scaling factor is too high, so recompute it to make the HWP_CAP - * highest performance correspond to the maximum turbo frequency. - */ +@@ -385,16 +396,6 @@ static int intel_pstate_get_cppc_guaranteed(int cpu) + + return cppc_perf.nominal_perf; + } +- +-static u32 intel_pstate_cppc_nominal(int cpu) +-{ +- u64 nominal_perf; +- +- if (cppc_get_nominal_perf(cpu, &nominal_perf)) +- return 0; +- +- return nominal_perf; +-} + #else /* CONFIG_ACPI_CPPC_LIB */ + static inline void intel_pstate_set_itmt_prio(int cpu) + { +@@ -518,34 +519,18 @@ static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu) + { + int perf_ctl_max_phys = cpu->pstate.max_pstate_physical; + int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling; +- int perf_ctl_turbo = pstate_funcs.get_turbo(); +- int turbo_freq = perf_ctl_turbo * perf_ctl_scaling; ++ int perf_ctl_turbo = pstate_funcs.get_turbo(cpu->cpu); + int scaling = cpu->pstate.scaling; + + pr_debug("CPU%d: perf_ctl_max_phys = %d\n", cpu->cpu, perf_ctl_max_phys); +- pr_debug("CPU%d: perf_ctl_max = %d\n", cpu->cpu, pstate_funcs.get_max()); + pr_debug("CPU%d: perf_ctl_turbo = %d\n", cpu->cpu, perf_ctl_turbo); + pr_debug("CPU%d: perf_ctl_scaling = %d\n", cpu->cpu, perf_ctl_scaling); + pr_debug("CPU%d: HWP_CAP guaranteed = %d\n", cpu->cpu, cpu->pstate.max_pstate); + pr_debug("CPU%d: HWP_CAP highest = %d\n", cpu->cpu, cpu->pstate.turbo_pstate); + pr_debug("CPU%d: HWP-to-frequency scaling factor: %d\n", cpu->cpu, scaling); + +- /* +- * If the product of the HWP performance scaling factor and the HWP_CAP +- * highest performance is greater than the maximum turbo frequency +- * corresponding to the pstate_funcs.get_turbo() return value, the +- * scaling factor is too high, so recompute it to make the HWP_CAP +- * highest performance correspond to the maximum turbo frequency. +- */ - if (turbo_freq < cpu->pstate.turbo_pstate * scaling) { -+ cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * scaling; -+ if (turbo_freq < cpu->pstate.turbo_freq) { - cpu->pstate.turbo_freq = turbo_freq; - scaling = DIV_ROUND_UP(turbo_freq, cpu->pstate.turbo_pstate); - cpu->pstate.scaling = scaling; -@@ -998,9 +1009,22 @@ static void intel_pstate_hwp_offline(struct cpudata *cpu) +- cpu->pstate.turbo_freq = turbo_freq; +- scaling = DIV_ROUND_UP(turbo_freq, cpu->pstate.turbo_pstate); +- cpu->pstate.scaling = scaling; +- +- pr_debug("CPU%d: refined HWP-to-frequency scaling factor: %d\n", +- cpu->cpu, scaling); +- } +- ++ cpu->pstate.turbo_freq = rounddown(cpu->pstate.turbo_pstate * scaling, ++ perf_ctl_scaling); + cpu->pstate.max_freq = rounddown(cpu->pstate.max_pstate * scaling, + perf_ctl_scaling); + +@@ -998,9 +983,22 @@ static void intel_pstate_hwp_offline(struct cpudata *cpu) */ value &= ~GENMASK_ULL(31, 24); value |= HWP_ENERGY_PERF_PREFERENCE(cpu->epp_cached); @@ -82809,7 +99670,216 @@ index 8c176b7dae415..c57229c108a73 100644 value &= ~GENMASK_ULL(31, 0); min_perf = HWP_LOWEST_PERF(READ_ONCE(cpu->hwp_cap_cached)); -@@ -2233,6 +2257,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = { +@@ -1557,7 +1555,7 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata) + cpudata->epp_default = intel_pstate_get_epp(cpudata, 0); + } + +-static int atom_get_min_pstate(void) ++static int atom_get_min_pstate(int not_used) + { + u64 value; + +@@ -1565,7 +1563,7 @@ static int atom_get_min_pstate(void) + return (value >> 8) & 0x7F; + } + +-static int atom_get_max_pstate(void) ++static int atom_get_max_pstate(int not_used) + { + u64 value; + +@@ -1573,7 +1571,7 @@ static int atom_get_max_pstate(void) + return (value >> 16) & 0x7F; + } + +-static int atom_get_turbo_pstate(void) ++static int atom_get_turbo_pstate(int not_used) + { + u64 value; + +@@ -1651,23 +1649,23 @@ static void atom_get_vid(struct cpudata *cpudata) + cpudata->vid.turbo = value & 0x7f; + } + +-static int core_get_min_pstate(void) ++static int core_get_min_pstate(int cpu) + { + u64 value; + +- rdmsrl(MSR_PLATFORM_INFO, value); ++ rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &value); + return (value >> 40) & 0xFF; + } + +-static int core_get_max_pstate_physical(void) ++static int core_get_max_pstate_physical(int cpu) + { + u64 value; + +- rdmsrl(MSR_PLATFORM_INFO, value); ++ rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &value); + return (value >> 8) & 0xFF; + } + +-static int core_get_tdp_ratio(u64 plat_info) ++static int core_get_tdp_ratio(int cpu, u64 plat_info) + { + /* Check how many TDP levels present */ + if (plat_info & 0x600000000) { +@@ -1677,13 +1675,13 @@ static int core_get_tdp_ratio(u64 plat_info) + int err; + + /* Get the TDP level (0, 1, 2) to get ratios */ +- err = rdmsrl_safe(MSR_CONFIG_TDP_CONTROL, &tdp_ctrl); ++ err = rdmsrl_safe_on_cpu(cpu, MSR_CONFIG_TDP_CONTROL, &tdp_ctrl); + if (err) + return err; + + /* TDP MSR are continuous starting at 0x648 */ + tdp_msr = MSR_CONFIG_TDP_NOMINAL + (tdp_ctrl & 0x03); +- err = rdmsrl_safe(tdp_msr, &tdp_ratio); ++ err = rdmsrl_safe_on_cpu(cpu, tdp_msr, &tdp_ratio); + if (err) + return err; + +@@ -1700,7 +1698,7 @@ static int core_get_tdp_ratio(u64 plat_info) + return -ENXIO; + } + +-static int core_get_max_pstate(void) ++static int core_get_max_pstate(int cpu) + { + u64 tar; + u64 plat_info; +@@ -1708,10 +1706,10 @@ static int core_get_max_pstate(void) + int tdp_ratio; + int err; + +- rdmsrl(MSR_PLATFORM_INFO, plat_info); ++ rdmsrl_on_cpu(cpu, MSR_PLATFORM_INFO, &plat_info); + max_pstate = (plat_info >> 8) & 0xFF; + +- tdp_ratio = core_get_tdp_ratio(plat_info); ++ tdp_ratio = core_get_tdp_ratio(cpu, plat_info); + if (tdp_ratio <= 0) + return max_pstate; + +@@ -1720,7 +1718,7 @@ static int core_get_max_pstate(void) + return tdp_ratio; + } + +- err = rdmsrl_safe(MSR_TURBO_ACTIVATION_RATIO, &tar); ++ err = rdmsrl_safe_on_cpu(cpu, MSR_TURBO_ACTIVATION_RATIO, &tar); + if (!err) { + int tar_levels; + +@@ -1735,13 +1733,13 @@ static int core_get_max_pstate(void) + return max_pstate; + } + +-static int core_get_turbo_pstate(void) ++static int core_get_turbo_pstate(int cpu) + { + u64 value; + int nont, ret; + +- rdmsrl(MSR_TURBO_RATIO_LIMIT, value); +- nont = core_get_max_pstate(); ++ rdmsrl_on_cpu(cpu, MSR_TURBO_RATIO_LIMIT, &value); ++ nont = core_get_max_pstate(cpu); + ret = (value) & 255; + if (ret <= nont) + ret = nont; +@@ -1769,50 +1767,37 @@ static int knl_get_aperf_mperf_shift(void) + return 10; + } + +-static int knl_get_turbo_pstate(void) ++static int knl_get_turbo_pstate(int cpu) + { + u64 value; + int nont, ret; + +- rdmsrl(MSR_TURBO_RATIO_LIMIT, value); +- nont = core_get_max_pstate(); ++ rdmsrl_on_cpu(cpu, MSR_TURBO_RATIO_LIMIT, &value); ++ nont = core_get_max_pstate(cpu); + ret = (((value) >> 8) & 0xFF); + if (ret <= nont) + ret = nont; + return ret; + } + +-#ifdef CONFIG_ACPI_CPPC_LIB +-static u32 hybrid_ref_perf; +- +-static int hybrid_get_cpu_scaling(int cpu) ++static void hybrid_get_type(void *data) + { +- return DIV_ROUND_UP(core_get_scaling() * hybrid_ref_perf, +- intel_pstate_cppc_nominal(cpu)); ++ u8 *cpu_type = data; ++ ++ *cpu_type = get_this_hybrid_cpu_type(); + } + +-static void intel_pstate_cppc_set_cpu_scaling(void) ++static int hybrid_get_cpu_scaling(int cpu) + { +- u32 min_nominal_perf = U32_MAX; +- int cpu; ++ u8 cpu_type = 0; + +- for_each_present_cpu(cpu) { +- u32 nominal_perf = intel_pstate_cppc_nominal(cpu); ++ smp_call_function_single(cpu, hybrid_get_type, &cpu_type, 1); ++ /* P-cores have a smaller perf level-to-freqency scaling factor. */ ++ if (cpu_type == 0x40) ++ return 78741; + +- if (nominal_perf && nominal_perf < min_nominal_perf) +- min_nominal_perf = nominal_perf; +- } +- +- if (min_nominal_perf < U32_MAX) { +- hybrid_ref_perf = min_nominal_perf; +- pstate_funcs.get_cpu_scaling = hybrid_get_cpu_scaling; +- } ++ return core_get_scaling(); + } +-#else +-static inline void intel_pstate_cppc_set_cpu_scaling(void) +-{ +-} +-#endif /* CONFIG_ACPI_CPPC_LIB */ + + static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate) + { +@@ -1842,10 +1827,10 @@ static void intel_pstate_max_within_limits(struct cpudata *cpu) + + static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) + { +- int perf_ctl_max_phys = pstate_funcs.get_max_physical(); ++ int perf_ctl_max_phys = pstate_funcs.get_max_physical(cpu->cpu); + int perf_ctl_scaling = pstate_funcs.get_scaling(); + +- cpu->pstate.min_pstate = pstate_funcs.get_min(); ++ cpu->pstate.min_pstate = pstate_funcs.get_min(cpu->cpu); + cpu->pstate.max_pstate_physical = perf_ctl_max_phys; + cpu->pstate.perf_ctl_scaling = perf_ctl_scaling; + +@@ -1861,8 +1846,8 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) + } + } else { + cpu->pstate.scaling = perf_ctl_scaling; +- cpu->pstate.max_pstate = pstate_funcs.get_max(); +- cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(); ++ cpu->pstate.max_pstate = pstate_funcs.get_max(cpu->cpu); ++ cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(cpu->cpu); + } + + if (cpu->pstate.scaling == perf_ctl_scaling) { +@@ -2233,6 +2218,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = { X86_MATCH(SKYLAKE_X, core_funcs), X86_MATCH(COMETLAKE, core_funcs), X86_MATCH(ICELAKE_X, core_funcs), @@ -82817,7 +99887,7 @@ index 8c176b7dae415..c57229c108a73 100644 {} }; MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); -@@ -2241,6 +2266,7 @@ static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = { +@@ -2241,6 +2227,7 @@ static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = { X86_MATCH(BROADWELL_D, core_funcs), X86_MATCH(BROADWELL_X, core_funcs), X86_MATCH(SKYLAKE_X, core_funcs), @@ -82825,7 +99895,7 @@ index 8c176b7dae415..c57229c108a73 100644 {} }; -@@ -2902,6 +2928,27 @@ static int intel_cpufreq_cpu_exit(struct cpufreq_policy *policy) +@@ -2902,6 +2889,27 @@ static int intel_cpufreq_cpu_exit(struct cpufreq_policy *policy) return intel_pstate_cpu_exit(policy); } @@ -82853,7 +99923,7 @@ index 8c176b7dae415..c57229c108a73 100644 static struct cpufreq_driver intel_cpufreq = { .flags = CPUFREQ_CONST_LOOPS, .verify = intel_cpufreq_verify_policy, -@@ -2911,7 +2958,7 @@ static struct cpufreq_driver intel_cpufreq = { +@@ -2911,7 +2919,7 @@ static struct cpufreq_driver intel_cpufreq = { .exit = intel_cpufreq_cpu_exit, .offline = intel_cpufreq_cpu_offline, .online = intel_pstate_cpu_online, @@ -82862,6 +99932,28 @@ index 8c176b7dae415..c57229c108a73 100644 .resume = intel_pstate_resume, .update_limits = intel_pstate_update_limits, .name = "intel_cpufreq", +@@ -3016,9 +3024,9 @@ static unsigned int force_load __initdata; + + static int __init intel_pstate_msrs_not_valid(void) + { +- if (!pstate_funcs.get_max() || +- !pstate_funcs.get_min() || +- !pstate_funcs.get_turbo()) ++ if (!pstate_funcs.get_max(0) || ++ !pstate_funcs.get_min(0) || ++ !pstate_funcs.get_turbo(0)) + return -ENODEV; + + return 0; +@@ -3234,7 +3242,7 @@ static int __init intel_pstate_init(void) + default_driver = &intel_pstate; + + if (boot_cpu_has(X86_FEATURE_HYBRID_CPU)) +- intel_pstate_cppc_set_cpu_scaling(); ++ pstate_funcs.get_cpu_scaling = hybrid_get_cpu_scaling; + + goto hwp_cpu_matched; + } diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index 866163883b48d..bfe240c726e34 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c @@ -82927,7 +100019,7 @@ index 4f20c6a9108df..8e41fe9ee870d 100644 * on the bus frequencies */ diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c -index a2be0df7e1747..bb2f59fd0de43 100644 +index a2be0df7e1747..bbcba2c38e853 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -24,12 +24,16 @@ @@ -82947,7 +100039,15 @@ index a2be0df7e1747..bb2f59fd0de43 100644 u32 reg_current_vote; u32 reg_perf_state; u8 lut_row_size; -@@ -266,28 +270,31 @@ static void qcom_get_related_cpus(int index, struct cpumask *m) +@@ -173,6 +177,7 @@ static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev, + } + } else if (ret != -ENODEV) { + dev_err(cpu_dev, "Invalid opp table in device tree\n"); ++ kfree(table); + return ret; + } else { + policy->fast_switch_possible = true; +@@ -266,28 +271,31 @@ static void qcom_get_related_cpus(int index, struct cpumask *m) } } @@ -82986,7 +100086,7 @@ index a2be0df7e1747..bb2f59fd0de43 100644 opp = dev_pm_opp_find_freq_floor(dev, &freq_hz); if (IS_ERR(opp) && PTR_ERR(opp) == -ERANGE) -@@ -304,7 +311,8 @@ static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data) +@@ -304,7 +312,8 @@ static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data) if (capacity > max_capacity) capacity = max_capacity; @@ -82996,7 +100096,7 @@ index a2be0df7e1747..bb2f59fd0de43 100644 /* * In the unlikely case policy is unregistered do not enable -@@ -342,9 +350,13 @@ static irqreturn_t qcom_lmh_dcvs_handle_irq(int irq, void *data) +@@ -342,9 +351,13 @@ static irqreturn_t qcom_lmh_dcvs_handle_irq(int irq, void *data) /* Disable interrupt and enable polling */ disable_irq_nosync(c_data->throttle_irq); @@ -83012,7 +100112,7 @@ index a2be0df7e1747..bb2f59fd0de43 100644 } static const struct qcom_cpufreq_soc_data qcom_soc_data = { -@@ -358,8 +370,10 @@ static const struct qcom_cpufreq_soc_data qcom_soc_data = { +@@ -358,8 +371,10 @@ static const struct qcom_cpufreq_soc_data qcom_soc_data = { static const struct qcom_cpufreq_soc_data epss_soc_data = { .reg_enable = 0x0, @@ -83174,6 +100274,18 @@ index b51b5df084500..540105ca0781f 100644 err = cpuhp_setup_state_nocalls(CPUHP_AP_CPU_PM_STARTING, "cpuidle/psci:online", psci_idle_cpuhp_up, +diff --git a/drivers/cpuidle/dt_idle_states.c b/drivers/cpuidle/dt_idle_states.c +index 252f2a9686a62..448bc796b0b40 100644 +--- a/drivers/cpuidle/dt_idle_states.c ++++ b/drivers/cpuidle/dt_idle_states.c +@@ -223,6 +223,6 @@ int dt_init_idle_driver(struct cpuidle_driver *drv, + * also be 0 on platforms with missing DT idle states or legacy DT + * configuration predating the DT idle states bindings. + */ +- return i; ++ return state_idx - start_idx; + } + EXPORT_SYMBOL_GPL(dt_init_idle_driver); diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index 53ec9585ccd44..469e18547d06c 100644 --- a/drivers/cpuidle/sysfs.c @@ -83214,6 +100326,23 @@ index 53ec9585ccd44..469e18547d06c 100644 kobject_uevent(&kdev->kobj, KOBJ_ADD); return 0; +diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig +index 51690e73153ad..b46343b5c26b4 100644 +--- a/drivers/crypto/Kconfig ++++ b/drivers/crypto/Kconfig +@@ -772,7 +772,12 @@ config CRYPTO_DEV_IMGTEC_HASH + config CRYPTO_DEV_ROCKCHIP + tristate "Rockchip's Cryptographic Engine driver" + depends on OF && ARCH_ROCKCHIP ++ depends on PM ++ select CRYPTO_ECB ++ select CRYPTO_CBC ++ select CRYPTO_DES + select CRYPTO_AES ++ select CRYPTO_ENGINE + select CRYPTO_LIB_DES + select CRYPTO_MD5 + select CRYPTO_SHA1 diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c index 54ae8d16e4931..35e3cadccac2b 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c @@ -83258,7 +100387,7 @@ index 88194718a806c..859b7522faaac 100644 return 0; } diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c -index 9ef1c85c4aaa5..3c46ad8c3a1c5 100644 +index 9ef1c85c4aaa5..005eefecfdf59 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c @@ -11,6 +11,7 @@ @@ -83285,7 +100414,7 @@ index 9ef1c85c4aaa5..3c46ad8c3a1c5 100644 + unsigned int ivsize = crypto_skcipher_ivsize(tfm); + struct sun8i_ss_flow *sf = &ss->flows[rctx->flow]; + int i = 0; -+ u32 a; ++ dma_addr_t a; + int err; + + rctx->ivlen = ivsize; @@ -83657,6 +100786,31 @@ index c6865cbd334b2..e79514fce731f 100644 return 0; } +diff --git a/drivers/crypto/amlogic/amlogic-gxl-core.c b/drivers/crypto/amlogic/amlogic-gxl-core.c +index 6e7ae896717cd..937187027ad57 100644 +--- a/drivers/crypto/amlogic/amlogic-gxl-core.c ++++ b/drivers/crypto/amlogic/amlogic-gxl-core.c +@@ -237,7 +237,6 @@ static int meson_crypto_probe(struct platform_device *pdev) + return err; + } + +- mc->irqs = devm_kcalloc(mc->dev, MAXFLOW, sizeof(int), GFP_KERNEL); + for (i = 0; i < MAXFLOW; i++) { + mc->irqs[i] = platform_get_irq(pdev, i); + if (mc->irqs[i] < 0) +diff --git a/drivers/crypto/amlogic/amlogic-gxl.h b/drivers/crypto/amlogic/amlogic-gxl.h +index dc0f142324a3c..8c0746a1d6d43 100644 +--- a/drivers/crypto/amlogic/amlogic-gxl.h ++++ b/drivers/crypto/amlogic/amlogic-gxl.h +@@ -95,7 +95,7 @@ struct meson_dev { + struct device *dev; + struct meson_flow *chanlist; + atomic_t flow; +- int *irqs; ++ int irqs[MAXFLOW]; + #ifdef CONFIG_CRYPTO_DEV_AMLOGIC_GXL_DEBUG + struct dentry *dbgfs_dir; + #endif diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c index 9391ccc03382d..fe05584031914 100644 --- a/drivers/crypto/atmel-aes.c @@ -83890,6 +101044,18 @@ index 8c32d0eb8fcf2..6872ac3440010 100644 mcode->is_ae = is_ae; mcode->core_mask = 0ULL; +diff --git a/drivers/crypto/cavium/nitrox/nitrox_mbx.c b/drivers/crypto/cavium/nitrox/nitrox_mbx.c +index 2e9c0d2143632..199fcec9b8d0b 100644 +--- a/drivers/crypto/cavium/nitrox/nitrox_mbx.c ++++ b/drivers/crypto/cavium/nitrox/nitrox_mbx.c +@@ -191,6 +191,7 @@ int nitrox_mbox_init(struct nitrox_device *ndev) + ndev->iov.pf2vf_wq = alloc_workqueue("nitrox_pf2vf", 0, 0); + if (!ndev->iov.pf2vf_wq) { + kfree(ndev->iov.vfdev); ++ ndev->iov.vfdev = NULL; + return -ENOMEM; + } + /* enable pf2vf mailbox interrupts */ diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c index d718db224be42..9f753cb4f5f18 100644 --- a/drivers/crypto/ccp/ccp-dmaengine.c @@ -84043,6 +101209,35 @@ index 2ecb0e1f65d8d..900727b5edda5 100644 if (rc) { dev_err(sev->dev, "SEV: failed to INIT error %#x\n", error); return; +diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c +index 88c672ad27e44..9470a9a19f29d 100644 +--- a/drivers/crypto/ccp/sp-pci.c ++++ b/drivers/crypto/ccp/sp-pci.c +@@ -320,6 +320,15 @@ static const struct psp_vdata pspv3 = { + .inten_reg = 0x10690, + .intsts_reg = 0x10694, + }; ++ ++static const struct psp_vdata pspv4 = { ++ .sev = &sevv2, ++ .tee = &teev1, ++ .feature_reg = 0x109fc, ++ .inten_reg = 0x10690, ++ .intsts_reg = 0x10694, ++}; ++ + #endif + + static const struct sp_dev_vdata dev_vdata[] = { +@@ -365,7 +374,7 @@ static const struct sp_dev_vdata dev_vdata[] = { + { /* 5 */ + .bar = 2, + #ifdef CONFIG_CRYPTO_DEV_SP_PSP +- .psp_vdata = &pspv2, ++ .psp_vdata = &pspv4, + #endif + }, + }; diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c index a5e041d9d2cf1..6140e49273226 100644 --- a/drivers/crypto/ccree/cc_buffer_mgr.c @@ -84163,8 +101358,21 @@ index 78833491f534d..309da6334a0a0 100644 } struct tdes_keys { +diff --git a/drivers/crypto/ccree/cc_debugfs.c b/drivers/crypto/ccree/cc_debugfs.c +index 7083767602fcf..8f008f024f8f1 100644 +--- a/drivers/crypto/ccree/cc_debugfs.c ++++ b/drivers/crypto/ccree/cc_debugfs.c +@@ -55,7 +55,7 @@ void __init cc_debugfs_global_init(void) + cc_debugfs_dir = debugfs_create_dir("ccree", NULL); + } + +-void __exit cc_debugfs_global_fini(void) ++void cc_debugfs_global_fini(void) + { + debugfs_remove(cc_debugfs_dir); + } diff --git a/drivers/crypto/ccree/cc_driver.c b/drivers/crypto/ccree/cc_driver.c -index e599ac6dc162a..790fa9058a36d 100644 +index e599ac6dc162a..41f0a404bdf9e 100644 --- a/drivers/crypto/ccree/cc_driver.c +++ b/drivers/crypto/ccree/cc_driver.c @@ -103,7 +103,8 @@ MODULE_DEVICE_TABLE(of, arm_ccree_dev_of_match); @@ -84177,6 +101385,25 @@ index e599ac6dc162a..790fa9058a36d 100644 /* compute CC_AXIM_CACHE_PARAMS */ cache_params = cc_ioread(drvdata, CC_REG(AXIM_CACHE_PARAMS)); +@@ -655,9 +656,17 @@ static struct platform_driver ccree_driver = { + + static int __init ccree_init(void) + { ++ int rc; ++ + cc_debugfs_global_init(); + +- return platform_driver_register(&ccree_driver); ++ rc = platform_driver_register(&ccree_driver); ++ if (rc) { ++ cc_debugfs_global_fini(); ++ return rc; ++ } ++ ++ return 0; + } + module_init(ccree_init); + diff --git a/drivers/crypto/gemini/sl3516-ce-cipher.c b/drivers/crypto/gemini/sl3516-ce-cipher.c index c1c2b1d866639..f2be0a7d7f7ac 100644 --- a/drivers/crypto/gemini/sl3516-ce-cipher.c @@ -84213,8 +101440,33 @@ index a032c192ef1d6..4062251fd1b68 100644 } else if (memcmp(ptr, p, ctx->key_sz) > 0) { hpre_curve25519_src_modulo_p(ptr); } +diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c +index 65a641396c07f..edc61e4105f30 100644 +--- a/drivers/crypto/hisilicon/hpre/hpre_main.c ++++ b/drivers/crypto/hisilicon/hpre/hpre_main.c +@@ -1143,18 +1143,12 @@ err_with_qm_init: + static void hpre_remove(struct pci_dev *pdev) + { + struct hisi_qm *qm = pci_get_drvdata(pdev); +- int ret; + + hisi_qm_pm_uninit(qm); + hisi_qm_wait_task_finish(qm, &hpre_devices); + hisi_qm_alg_unregister(qm, &hpre_devices); +- if (qm->fun_type == QM_HW_PF && qm->vfs_num) { +- ret = hisi_qm_sriov_disable(pdev, true); +- if (ret) { +- pci_err(pdev, "Disable SRIOV fail!\n"); +- return; +- } +- } ++ if (qm->fun_type == QM_HW_PF && qm->vfs_num) ++ hisi_qm_sriov_disable(pdev, true); + + hpre_debugfs_exit(qm); + hisi_qm_stop(qm, QM_NORMAL); diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c -index 369562d34d66a..b8900a5dbf6e1 100644 +index 369562d34d66a..fd89918abd191 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -1888,8 +1888,10 @@ static ssize_t qm_cmd_write(struct file *filp, const char __user *buffer, @@ -84239,7 +101491,28 @@ index 369562d34d66a..b8900a5dbf6e1 100644 /* reset mailbox qos val */ qm->mb_qos = 0; -@@ -5986,7 +5988,7 @@ int hisi_qm_resume(struct device *dev) +@@ -5725,8 +5727,8 @@ static int hisi_qm_memory_init(struct hisi_qm *qm) + GFP_ATOMIC); + dev_dbg(dev, "allocate qm dma buf size=%zx)\n", qm->qdma.size); + if (!qm->qdma.va) { +- ret = -ENOMEM; +- goto err_alloc_qdma; ++ ret = -ENOMEM; ++ goto err_destroy_idr; + } + + QM_INIT_BUF(qm, eqe, QM_EQ_DEPTH); +@@ -5742,7 +5744,8 @@ static int hisi_qm_memory_init(struct hisi_qm *qm) + + err_alloc_qp_array: + dma_free_coherent(dev, qm->qdma.size, qm->qdma.va, qm->qdma.dma); +-err_alloc_qdma: ++err_destroy_idr: ++ idr_destroy(&qm->qp_idr); + kfree(qm->factor); + + return ret; +@@ -5986,7 +5989,7 @@ int hisi_qm_resume(struct device *dev) if (ret) pci_err(pdev, "failed to start qm(%d)\n", ret); @@ -84248,6 +101521,36 @@ index 369562d34d66a..b8900a5dbf6e1 100644 } EXPORT_SYMBOL_GPL(hisi_qm_resume); +diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h +index 3068093229a50..bbb35de994eb7 100644 +--- a/drivers/crypto/hisilicon/qm.h ++++ b/drivers/crypto/hisilicon/qm.h +@@ -318,14 +318,14 @@ struct hisi_qp { + static inline int q_num_set(const char *val, const struct kernel_param *kp, + unsigned int device) + { +- struct pci_dev *pdev = pci_get_device(PCI_VENDOR_ID_HUAWEI, +- device, NULL); ++ struct pci_dev *pdev; + u32 n, q_num; + int ret; + + if (!val) + return -EINVAL; + ++ pdev = pci_get_device(PCI_VENDOR_ID_HUAWEI, device, NULL); + if (!pdev) { + q_num = min_t(u32, QM_QNUM_V1, QM_QNUM_V2); + pr_info("No device found currently, suppose queue number is %u\n", +@@ -335,6 +335,8 @@ static inline int q_num_set(const char *val, const struct kernel_param *kp, + q_num = QM_QNUM_V1; + else + q_num = QM_QNUM_V2; ++ ++ pci_dev_put(pdev); + } + + ret = kstrtou32(val, 10, &n); diff --git a/drivers/crypto/hisilicon/sec/sec_algs.c b/drivers/crypto/hisilicon/sec/sec_algs.c index 0a3c8f019b025..490e1542305e1 100644 --- a/drivers/crypto/hisilicon/sec/sec_algs.c @@ -84521,6 +101824,29 @@ index 9520a4113c81e..a91e6e0e9c693 100644 }; static u16 sgl_sge_nr = HZIP_SGL_SGE_NR; +diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c +index aa4c7b2af3e2e..34b41cbcfa8de 100644 +--- a/drivers/crypto/img-hash.c ++++ b/drivers/crypto/img-hash.c +@@ -358,12 +358,16 @@ static int img_hash_dma_init(struct img_hash_dev *hdev) + static void img_hash_dma_task(unsigned long d) + { + struct img_hash_dev *hdev = (struct img_hash_dev *)d; +- struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req); ++ struct img_hash_request_ctx *ctx; + u8 *addr; + size_t nbytes, bleft, wsend, len, tbc; + struct scatterlist tsg; + +- if (!hdev->req || !ctx->sg) ++ if (!hdev->req) ++ return; ++ ++ ctx = ahash_request_ctx(hdev->req); ++ if (!ctx->sg) + return; + + addr = sg_virt(ctx->sg); diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c index 9ff885d50edfc..389a7b51f1f38 100644 --- a/drivers/crypto/inside-secure/safexcel.c @@ -84719,6 +102045,58 @@ index d19e5ffb5104b..d6f9e2fe863d7 100644 src_buf = sg_virt(src); len = sg_dma_len(src); tlen += len; +diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c +index 3b0bf6fea491a..b4db560105a9e 100644 +--- a/drivers/crypto/n2_core.c ++++ b/drivers/crypto/n2_core.c +@@ -1229,6 +1229,7 @@ struct n2_hash_tmpl { + const u8 *hash_init; + u8 hw_op_hashsz; + u8 digest_size; ++ u8 statesize; + u8 block_size; + u8 auth_type; + u8 hmac_type; +@@ -1260,6 +1261,7 @@ static const struct n2_hash_tmpl hash_tmpls[] = { + .hmac_type = AUTH_TYPE_HMAC_MD5, + .hw_op_hashsz = MD5_DIGEST_SIZE, + .digest_size = MD5_DIGEST_SIZE, ++ .statesize = sizeof(struct md5_state), + .block_size = MD5_HMAC_BLOCK_SIZE }, + { .name = "sha1", + .hash_zero = sha1_zero_message_hash, +@@ -1268,6 +1270,7 @@ static const struct n2_hash_tmpl hash_tmpls[] = { + .hmac_type = AUTH_TYPE_HMAC_SHA1, + .hw_op_hashsz = SHA1_DIGEST_SIZE, + .digest_size = SHA1_DIGEST_SIZE, ++ .statesize = sizeof(struct sha1_state), + .block_size = SHA1_BLOCK_SIZE }, + { .name = "sha256", + .hash_zero = sha256_zero_message_hash, +@@ -1276,6 +1279,7 @@ static const struct n2_hash_tmpl hash_tmpls[] = { + .hmac_type = AUTH_TYPE_HMAC_SHA256, + .hw_op_hashsz = SHA256_DIGEST_SIZE, + .digest_size = SHA256_DIGEST_SIZE, ++ .statesize = sizeof(struct sha256_state), + .block_size = SHA256_BLOCK_SIZE }, + { .name = "sha224", + .hash_zero = sha224_zero_message_hash, +@@ -1284,6 +1288,7 @@ static const struct n2_hash_tmpl hash_tmpls[] = { + .hmac_type = AUTH_TYPE_RESERVED, + .hw_op_hashsz = SHA256_DIGEST_SIZE, + .digest_size = SHA224_DIGEST_SIZE, ++ .statesize = sizeof(struct sha256_state), + .block_size = SHA224_BLOCK_SIZE }, + }; + #define NUM_HASH_TMPLS ARRAY_SIZE(hash_tmpls) +@@ -1424,6 +1429,7 @@ static int __n2_register_one_ahash(const struct n2_hash_tmpl *tmpl) + + halg = &ahash->halg; + halg->digestsize = tmpl->digest_size; ++ halg->statesize = tmpl->statesize; + + base = &halg->base; + snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "%s", tmpl->name); diff --git a/drivers/crypto/nx/nx-common-powernv.c b/drivers/crypto/nx/nx-common-powernv.c index 32a036ada5d0a..f418817c0f43e 100644 --- a/drivers/crypto/nx/nx-common-powernv.c @@ -84745,6 +102123,19 @@ index 9b968ac4ee7b6..a196bb8b17010 100644 return 0; } #endif +diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c +index f6bf53c00b614..4ec6949a7ca9e 100644 +--- a/drivers/crypto/omap-sham.c ++++ b/drivers/crypto/omap-sham.c +@@ -2114,7 +2114,7 @@ static int omap_sham_probe(struct platform_device *pdev) + + pm_runtime_enable(dev); + +- err = pm_runtime_get_sync(dev); ++ err = pm_runtime_resume_and_get(dev); + if (err < 0) { + dev_err(dev, "failed to get sync: %d\n", err); + goto err_pm; diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c index 33d8e50dcbdac..88c0ded411f15 100644 --- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -86350,11 +103741,1306 @@ index 99ba8d51d1020..031b5f701a0a3 100644 } static int qcom_rng_seed(struct crypto_rng *tfm, const u8 *seed, +diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c +index 35d73061d1569..14a0aef18ab13 100644 +--- a/drivers/crypto/rockchip/rk3288_crypto.c ++++ b/drivers/crypto/rockchip/rk3288_crypto.c +@@ -65,186 +65,24 @@ static void rk_crypto_disable_clk(struct rk_crypto_info *dev) + clk_disable_unprepare(dev->sclk); + } + +-static int check_alignment(struct scatterlist *sg_src, +- struct scatterlist *sg_dst, +- int align_mask) +-{ +- int in, out, align; +- +- in = IS_ALIGNED((uint32_t)sg_src->offset, 4) && +- IS_ALIGNED((uint32_t)sg_src->length, align_mask); +- if (!sg_dst) +- return in; +- out = IS_ALIGNED((uint32_t)sg_dst->offset, 4) && +- IS_ALIGNED((uint32_t)sg_dst->length, align_mask); +- align = in && out; +- +- return (align && (sg_src->length == sg_dst->length)); +-} +- +-static int rk_load_data(struct rk_crypto_info *dev, +- struct scatterlist *sg_src, +- struct scatterlist *sg_dst) +-{ +- unsigned int count; +- +- dev->aligned = dev->aligned ? +- check_alignment(sg_src, sg_dst, dev->align_size) : +- dev->aligned; +- if (dev->aligned) { +- count = min(dev->left_bytes, sg_src->length); +- dev->left_bytes -= count; +- +- if (!dma_map_sg(dev->dev, sg_src, 1, DMA_TO_DEVICE)) { +- dev_err(dev->dev, "[%s:%d] dma_map_sg(src) error\n", +- __func__, __LINE__); +- return -EINVAL; +- } +- dev->addr_in = sg_dma_address(sg_src); +- +- if (sg_dst) { +- if (!dma_map_sg(dev->dev, sg_dst, 1, DMA_FROM_DEVICE)) { +- dev_err(dev->dev, +- "[%s:%d] dma_map_sg(dst) error\n", +- __func__, __LINE__); +- dma_unmap_sg(dev->dev, sg_src, 1, +- DMA_TO_DEVICE); +- return -EINVAL; +- } +- dev->addr_out = sg_dma_address(sg_dst); +- } +- } else { +- count = (dev->left_bytes > PAGE_SIZE) ? +- PAGE_SIZE : dev->left_bytes; +- +- if (!sg_pcopy_to_buffer(dev->first, dev->src_nents, +- dev->addr_vir, count, +- dev->total - dev->left_bytes)) { +- dev_err(dev->dev, "[%s:%d] pcopy err\n", +- __func__, __LINE__); +- return -EINVAL; +- } +- dev->left_bytes -= count; +- sg_init_one(&dev->sg_tmp, dev->addr_vir, count); +- if (!dma_map_sg(dev->dev, &dev->sg_tmp, 1, DMA_TO_DEVICE)) { +- dev_err(dev->dev, "[%s:%d] dma_map_sg(sg_tmp) error\n", +- __func__, __LINE__); +- return -ENOMEM; +- } +- dev->addr_in = sg_dma_address(&dev->sg_tmp); +- +- if (sg_dst) { +- if (!dma_map_sg(dev->dev, &dev->sg_tmp, 1, +- DMA_FROM_DEVICE)) { +- dev_err(dev->dev, +- "[%s:%d] dma_map_sg(sg_tmp) error\n", +- __func__, __LINE__); +- dma_unmap_sg(dev->dev, &dev->sg_tmp, 1, +- DMA_TO_DEVICE); +- return -ENOMEM; +- } +- dev->addr_out = sg_dma_address(&dev->sg_tmp); +- } +- } +- dev->count = count; +- return 0; +-} +- +-static void rk_unload_data(struct rk_crypto_info *dev) +-{ +- struct scatterlist *sg_in, *sg_out; +- +- sg_in = dev->aligned ? dev->sg_src : &dev->sg_tmp; +- dma_unmap_sg(dev->dev, sg_in, 1, DMA_TO_DEVICE); +- +- if (dev->sg_dst) { +- sg_out = dev->aligned ? dev->sg_dst : &dev->sg_tmp; +- dma_unmap_sg(dev->dev, sg_out, 1, DMA_FROM_DEVICE); +- } +-} +- + static irqreturn_t rk_crypto_irq_handle(int irq, void *dev_id) + { + struct rk_crypto_info *dev = platform_get_drvdata(dev_id); + u32 interrupt_status; + +- spin_lock(&dev->lock); + interrupt_status = CRYPTO_READ(dev, RK_CRYPTO_INTSTS); + CRYPTO_WRITE(dev, RK_CRYPTO_INTSTS, interrupt_status); + ++ dev->status = 1; + if (interrupt_status & 0x0a) { + dev_warn(dev->dev, "DMA Error\n"); +- dev->err = -EFAULT; ++ dev->status = 0; + } +- tasklet_schedule(&dev->done_task); ++ complete(&dev->complete); + +- spin_unlock(&dev->lock); + return IRQ_HANDLED; + } + +-static int rk_crypto_enqueue(struct rk_crypto_info *dev, +- struct crypto_async_request *async_req) +-{ +- unsigned long flags; +- int ret; +- +- spin_lock_irqsave(&dev->lock, flags); +- ret = crypto_enqueue_request(&dev->queue, async_req); +- if (dev->busy) { +- spin_unlock_irqrestore(&dev->lock, flags); +- return ret; +- } +- dev->busy = true; +- spin_unlock_irqrestore(&dev->lock, flags); +- tasklet_schedule(&dev->queue_task); +- +- return ret; +-} +- +-static void rk_crypto_queue_task_cb(unsigned long data) +-{ +- struct rk_crypto_info *dev = (struct rk_crypto_info *)data; +- struct crypto_async_request *async_req, *backlog; +- unsigned long flags; +- int err = 0; +- +- dev->err = 0; +- spin_lock_irqsave(&dev->lock, flags); +- backlog = crypto_get_backlog(&dev->queue); +- async_req = crypto_dequeue_request(&dev->queue); +- +- if (!async_req) { +- dev->busy = false; +- spin_unlock_irqrestore(&dev->lock, flags); +- return; +- } +- spin_unlock_irqrestore(&dev->lock, flags); +- +- if (backlog) { +- backlog->complete(backlog, -EINPROGRESS); +- backlog = NULL; +- } +- +- dev->async_req = async_req; +- err = dev->start(dev); +- if (err) +- dev->complete(dev->async_req, err); +-} +- +-static void rk_crypto_done_task_cb(unsigned long data) +-{ +- struct rk_crypto_info *dev = (struct rk_crypto_info *)data; +- +- if (dev->err) { +- dev->complete(dev->async_req, dev->err); +- return; +- } +- +- dev->err = dev->update(dev); +- if (dev->err) +- dev->complete(dev->async_req, dev->err); +-} +- + static struct rk_crypto_tmp *rk_cipher_algs[] = { + &rk_ecb_aes_alg, + &rk_cbc_aes_alg, +@@ -337,8 +175,6 @@ static int rk_crypto_probe(struct platform_device *pdev) + if (err) + goto err_crypto; + +- spin_lock_init(&crypto_info->lock); +- + crypto_info->reg = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(crypto_info->reg)) { + err = PTR_ERR(crypto_info->reg); +@@ -389,18 +225,11 @@ static int rk_crypto_probe(struct platform_device *pdev) + crypto_info->dev = &pdev->dev; + platform_set_drvdata(pdev, crypto_info); + +- tasklet_init(&crypto_info->queue_task, +- rk_crypto_queue_task_cb, (unsigned long)crypto_info); +- tasklet_init(&crypto_info->done_task, +- rk_crypto_done_task_cb, (unsigned long)crypto_info); +- crypto_init_queue(&crypto_info->queue, 50); ++ crypto_info->engine = crypto_engine_alloc_init(&pdev->dev, true); ++ crypto_engine_start(crypto_info->engine); ++ init_completion(&crypto_info->complete); + +- crypto_info->enable_clk = rk_crypto_enable_clk; +- crypto_info->disable_clk = rk_crypto_disable_clk; +- crypto_info->load_data = rk_load_data; +- crypto_info->unload_data = rk_unload_data; +- crypto_info->enqueue = rk_crypto_enqueue; +- crypto_info->busy = false; ++ rk_crypto_enable_clk(crypto_info); + + err = rk_crypto_register(crypto_info); + if (err) { +@@ -412,9 +241,9 @@ static int rk_crypto_probe(struct platform_device *pdev) + return 0; + + err_register_alg: +- tasklet_kill(&crypto_info->queue_task); +- tasklet_kill(&crypto_info->done_task); ++ crypto_engine_exit(crypto_info->engine); + err_crypto: ++ dev_err(dev, "Crypto Accelerator not successfully registered\n"); + return err; + } + +@@ -423,8 +252,8 @@ static int rk_crypto_remove(struct platform_device *pdev) + struct rk_crypto_info *crypto_tmp = platform_get_drvdata(pdev); + + rk_crypto_unregister(); +- tasklet_kill(&crypto_tmp->done_task); +- tasklet_kill(&crypto_tmp->queue_task); ++ rk_crypto_disable_clk(crypto_tmp); ++ crypto_engine_exit(crypto_tmp->engine); + return 0; + } + +diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h +index 97278c2574ff9..045e811b4af84 100644 +--- a/drivers/crypto/rockchip/rk3288_crypto.h ++++ b/drivers/crypto/rockchip/rk3288_crypto.h +@@ -5,9 +5,11 @@ + #include <crypto/aes.h> + #include <crypto/internal/des.h> + #include <crypto/algapi.h> ++#include <linux/dma-mapping.h> + #include <linux/interrupt.h> + #include <linux/delay.h> + #include <linux/scatterlist.h> ++#include <crypto/engine.h> + #include <crypto/internal/hash.h> + #include <crypto/internal/skcipher.h> + +@@ -193,45 +195,15 @@ struct rk_crypto_info { + struct reset_control *rst; + void __iomem *reg; + int irq; +- struct crypto_queue queue; +- struct tasklet_struct queue_task; +- struct tasklet_struct done_task; +- struct crypto_async_request *async_req; +- int err; +- /* device lock */ +- spinlock_t lock; +- +- /* the public variable */ +- struct scatterlist *sg_src; +- struct scatterlist *sg_dst; +- struct scatterlist sg_tmp; +- struct scatterlist *first; +- unsigned int left_bytes; +- void *addr_vir; +- int aligned; +- int align_size; +- size_t src_nents; +- size_t dst_nents; +- unsigned int total; +- unsigned int count; +- dma_addr_t addr_in; +- dma_addr_t addr_out; +- bool busy; +- int (*start)(struct rk_crypto_info *dev); +- int (*update)(struct rk_crypto_info *dev); +- void (*complete)(struct crypto_async_request *base, int err); +- int (*enable_clk)(struct rk_crypto_info *dev); +- void (*disable_clk)(struct rk_crypto_info *dev); +- int (*load_data)(struct rk_crypto_info *dev, +- struct scatterlist *sg_src, +- struct scatterlist *sg_dst); +- void (*unload_data)(struct rk_crypto_info *dev); +- int (*enqueue)(struct rk_crypto_info *dev, +- struct crypto_async_request *async_req); ++ ++ struct crypto_engine *engine; ++ struct completion complete; ++ int status; + }; + + /* the private variable of hash */ + struct rk_ahash_ctx { ++ struct crypto_engine_ctx enginectx; + struct rk_crypto_info *dev; + /* for fallback */ + struct crypto_ahash *fallback_tfm; +@@ -241,14 +213,23 @@ struct rk_ahash_ctx { + struct rk_ahash_rctx { + struct ahash_request fallback_req; + u32 mode; ++ int nrsg; + }; + + /* the private variable of cipher */ + struct rk_cipher_ctx { ++ struct crypto_engine_ctx enginectx; + struct rk_crypto_info *dev; + unsigned int keylen; +- u32 mode; ++ u8 key[AES_MAX_KEY_SIZE]; + u8 iv[AES_BLOCK_SIZE]; ++ struct crypto_skcipher *fallback_tfm; ++}; ++ ++struct rk_cipher_rctx { ++ u8 backup_iv[AES_BLOCK_SIZE]; ++ u32 mode; ++ struct skcipher_request fallback_req; // keep at the end + }; + + enum alg_type { +diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c +index ed03058497bc2..edd40e16a3f0a 100644 +--- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c ++++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c +@@ -9,6 +9,7 @@ + * Some ideas are from marvell/cesa.c and s5p-sss.c driver. + */ + #include <linux/device.h> ++#include <asm/unaligned.h> + #include "rk3288_crypto.h" + + /* +@@ -16,6 +17,40 @@ + * so we put the fixed hash out when met zero message. + */ + ++static bool rk_ahash_need_fallback(struct ahash_request *req) ++{ ++ struct scatterlist *sg; ++ ++ sg = req->src; ++ while (sg) { ++ if (!IS_ALIGNED(sg->offset, sizeof(u32))) { ++ return true; ++ } ++ if (sg->length % 4) { ++ return true; ++ } ++ sg = sg_next(sg); ++ } ++ return false; ++} ++ ++static int rk_ahash_digest_fb(struct ahash_request *areq) ++{ ++ struct rk_ahash_rctx *rctx = ahash_request_ctx(areq); ++ struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); ++ struct rk_ahash_ctx *tfmctx = crypto_ahash_ctx(tfm); ++ ++ ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm); ++ rctx->fallback_req.base.flags = areq->base.flags & ++ CRYPTO_TFM_REQ_MAY_SLEEP; ++ ++ rctx->fallback_req.nbytes = areq->nbytes; ++ rctx->fallback_req.src = areq->src; ++ rctx->fallback_req.result = areq->result; ++ ++ return crypto_ahash_digest(&rctx->fallback_req); ++} ++ + static int zero_message_process(struct ahash_request *req) + { + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); +@@ -38,16 +73,12 @@ static int zero_message_process(struct ahash_request *req) + return 0; + } + +-static void rk_ahash_crypto_complete(struct crypto_async_request *base, int err) +-{ +- if (base->complete) +- base->complete(base, err); +-} +- +-static void rk_ahash_reg_init(struct rk_crypto_info *dev) ++static void rk_ahash_reg_init(struct ahash_request *req) + { +- struct ahash_request *req = ahash_request_cast(dev->async_req); + struct rk_ahash_rctx *rctx = ahash_request_ctx(req); ++ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); ++ struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm); ++ struct rk_crypto_info *dev = tctx->dev; + int reg_status; + + reg_status = CRYPTO_READ(dev, RK_CRYPTO_CTRL) | +@@ -74,7 +105,7 @@ static void rk_ahash_reg_init(struct rk_crypto_info *dev) + RK_CRYPTO_BYTESWAP_BRFIFO | + RK_CRYPTO_BYTESWAP_BTFIFO); + +- CRYPTO_WRITE(dev, RK_CRYPTO_HASH_MSG_LEN, dev->total); ++ CRYPTO_WRITE(dev, RK_CRYPTO_HASH_MSG_LEN, req->nbytes); + } + + static int rk_ahash_init(struct ahash_request *req) +@@ -167,48 +198,64 @@ static int rk_ahash_digest(struct ahash_request *req) + struct rk_ahash_ctx *tctx = crypto_tfm_ctx(req->base.tfm); + struct rk_crypto_info *dev = tctx->dev; + ++ if (rk_ahash_need_fallback(req)) ++ return rk_ahash_digest_fb(req); ++ + if (!req->nbytes) + return zero_message_process(req); +- else +- return dev->enqueue(dev, &req->base); ++ ++ return crypto_transfer_hash_request_to_engine(dev->engine, req); + } + +-static void crypto_ahash_dma_start(struct rk_crypto_info *dev) ++static void crypto_ahash_dma_start(struct rk_crypto_info *dev, struct scatterlist *sg) + { +- CRYPTO_WRITE(dev, RK_CRYPTO_HRDMAS, dev->addr_in); +- CRYPTO_WRITE(dev, RK_CRYPTO_HRDMAL, (dev->count + 3) / 4); ++ CRYPTO_WRITE(dev, RK_CRYPTO_HRDMAS, sg_dma_address(sg)); ++ CRYPTO_WRITE(dev, RK_CRYPTO_HRDMAL, sg_dma_len(sg) / 4); + CRYPTO_WRITE(dev, RK_CRYPTO_CTRL, RK_CRYPTO_HASH_START | + (RK_CRYPTO_HASH_START << 16)); + } + +-static int rk_ahash_set_data_start(struct rk_crypto_info *dev) ++static int rk_hash_prepare(struct crypto_engine *engine, void *breq) ++{ ++ struct ahash_request *areq = container_of(breq, struct ahash_request, base); ++ struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); ++ struct rk_ahash_rctx *rctx = ahash_request_ctx(areq); ++ struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm); ++ int ret; ++ ++ ret = dma_map_sg(tctx->dev->dev, areq->src, sg_nents(areq->src), DMA_TO_DEVICE); ++ if (ret <= 0) ++ return -EINVAL; ++ ++ rctx->nrsg = ret; ++ ++ return 0; ++} ++ ++static int rk_hash_unprepare(struct crypto_engine *engine, void *breq) + { +- int err; ++ struct ahash_request *areq = container_of(breq, struct ahash_request, base); ++ struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); ++ struct rk_ahash_rctx *rctx = ahash_request_ctx(areq); ++ struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm); + +- err = dev->load_data(dev, dev->sg_src, NULL); +- if (!err) +- crypto_ahash_dma_start(dev); +- return err; ++ dma_unmap_sg(tctx->dev->dev, areq->src, rctx->nrsg, DMA_TO_DEVICE); ++ return 0; + } + +-static int rk_ahash_start(struct rk_crypto_info *dev) ++static int rk_hash_run(struct crypto_engine *engine, void *breq) + { +- struct ahash_request *req = ahash_request_cast(dev->async_req); +- struct crypto_ahash *tfm; +- struct rk_ahash_rctx *rctx; +- +- dev->total = req->nbytes; +- dev->left_bytes = req->nbytes; +- dev->aligned = 0; +- dev->align_size = 4; +- dev->sg_dst = NULL; +- dev->sg_src = req->src; +- dev->first = req->src; +- dev->src_nents = sg_nents(req->src); +- rctx = ahash_request_ctx(req); ++ struct ahash_request *areq = container_of(breq, struct ahash_request, base); ++ struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); ++ struct rk_ahash_rctx *rctx = ahash_request_ctx(areq); ++ struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm); ++ struct scatterlist *sg = areq->src; ++ int err = 0; ++ int i; ++ u32 v; ++ + rctx->mode = 0; + +- tfm = crypto_ahash_reqtfm(req); + switch (crypto_ahash_digestsize(tfm)) { + case SHA1_DIGEST_SIZE: + rctx->mode = RK_CRYPTO_HASH_SHA1; +@@ -220,32 +267,26 @@ static int rk_ahash_start(struct rk_crypto_info *dev) + rctx->mode = RK_CRYPTO_HASH_MD5; + break; + default: +- return -EINVAL; ++ err = -EINVAL; ++ goto theend; + } + +- rk_ahash_reg_init(dev); +- return rk_ahash_set_data_start(dev); +-} +- +-static int rk_ahash_crypto_rx(struct rk_crypto_info *dev) +-{ +- int err = 0; +- struct ahash_request *req = ahash_request_cast(dev->async_req); +- struct crypto_ahash *tfm; +- +- dev->unload_data(dev); +- if (dev->left_bytes) { +- if (dev->aligned) { +- if (sg_is_last(dev->sg_src)) { +- dev_warn(dev->dev, "[%s:%d], Lack of data\n", +- __func__, __LINE__); +- err = -ENOMEM; +- goto out_rx; +- } +- dev->sg_src = sg_next(dev->sg_src); ++ rk_ahash_reg_init(areq); ++ ++ while (sg) { ++ reinit_completion(&tctx->dev->complete); ++ tctx->dev->status = 0; ++ crypto_ahash_dma_start(tctx->dev, sg); ++ wait_for_completion_interruptible_timeout(&tctx->dev->complete, ++ msecs_to_jiffies(2000)); ++ if (!tctx->dev->status) { ++ dev_err(tctx->dev->dev, "DMA timeout\n"); ++ err = -EFAULT; ++ goto theend; + } +- err = rk_ahash_set_data_start(dev); +- } else { ++ sg = sg_next(sg); ++ } ++ + /* + * it will take some time to process date after last dma + * transmission. +@@ -256,18 +297,20 @@ static int rk_ahash_crypto_rx(struct rk_crypto_info *dev) + * efficiency, and make it response quickly when dma + * complete. + */ +- while (!CRYPTO_READ(dev, RK_CRYPTO_HASH_STS)) +- udelay(10); +- +- tfm = crypto_ahash_reqtfm(req); +- memcpy_fromio(req->result, dev->reg + RK_CRYPTO_HASH_DOUT_0, +- crypto_ahash_digestsize(tfm)); +- dev->complete(dev->async_req, 0); +- tasklet_schedule(&dev->queue_task); ++ while (!CRYPTO_READ(tctx->dev, RK_CRYPTO_HASH_STS)) ++ udelay(10); ++ ++ for (i = 0; i < crypto_ahash_digestsize(tfm) / 4; i++) { ++ v = readl(tctx->dev->reg + RK_CRYPTO_HASH_DOUT_0 + i * 4); ++ put_unaligned_le32(v, areq->result + i * 4); + } + +-out_rx: +- return err; ++theend: ++ local_bh_disable(); ++ crypto_finalize_hash_request(engine, breq, err); ++ local_bh_enable(); ++ ++ return 0; + } + + static int rk_cra_hash_init(struct crypto_tfm *tfm) +@@ -281,14 +324,6 @@ static int rk_cra_hash_init(struct crypto_tfm *tfm) + algt = container_of(alg, struct rk_crypto_tmp, alg.hash); + + tctx->dev = algt->dev; +- tctx->dev->addr_vir = (void *)__get_free_page(GFP_KERNEL); +- if (!tctx->dev->addr_vir) { +- dev_err(tctx->dev->dev, "failed to kmalloc for addr_vir\n"); +- return -ENOMEM; +- } +- tctx->dev->start = rk_ahash_start; +- tctx->dev->update = rk_ahash_crypto_rx; +- tctx->dev->complete = rk_ahash_crypto_complete; + + /* for fallback */ + tctx->fallback_tfm = crypto_alloc_ahash(alg_name, 0, +@@ -297,19 +332,23 @@ static int rk_cra_hash_init(struct crypto_tfm *tfm) + dev_err(tctx->dev->dev, "Could not load fallback driver.\n"); + return PTR_ERR(tctx->fallback_tfm); + } ++ + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct rk_ahash_rctx) + + crypto_ahash_reqsize(tctx->fallback_tfm)); + +- return tctx->dev->enable_clk(tctx->dev); ++ tctx->enginectx.op.do_one_request = rk_hash_run; ++ tctx->enginectx.op.prepare_request = rk_hash_prepare; ++ tctx->enginectx.op.unprepare_request = rk_hash_unprepare; ++ ++ return 0; + } + + static void rk_cra_hash_exit(struct crypto_tfm *tfm) + { + struct rk_ahash_ctx *tctx = crypto_tfm_ctx(tfm); + +- free_page((unsigned long)tctx->dev->addr_vir); +- return tctx->dev->disable_clk(tctx->dev); ++ crypto_free_ahash(tctx->fallback_tfm); + } + + struct rk_crypto_tmp rk_ahash_sha1 = { diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c -index 1cece1a7d3f00..5bbf0d2722e11 100644 +index 1cece1a7d3f00..67a7e05d5ae31 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c +++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c -@@ -506,7 +506,6 @@ struct rk_crypto_tmp rk_ecb_des3_ede_alg = { +@@ -9,23 +9,77 @@ + * Some ideas are from marvell-cesa.c and s5p-sss.c driver. + */ + #include <linux/device.h> ++#include <crypto/scatterwalk.h> + #include "rk3288_crypto.h" + + #define RK_CRYPTO_DEC BIT(0) + +-static void rk_crypto_complete(struct crypto_async_request *base, int err) ++static int rk_cipher_need_fallback(struct skcipher_request *req) + { +- if (base->complete) +- base->complete(base, err); ++ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); ++ unsigned int bs = crypto_skcipher_blocksize(tfm); ++ struct scatterlist *sgs, *sgd; ++ unsigned int stodo, dtodo, len; ++ ++ if (!req->cryptlen) ++ return true; ++ ++ len = req->cryptlen; ++ sgs = req->src; ++ sgd = req->dst; ++ while (sgs && sgd) { ++ if (!IS_ALIGNED(sgs->offset, sizeof(u32))) { ++ return true; ++ } ++ if (!IS_ALIGNED(sgd->offset, sizeof(u32))) { ++ return true; ++ } ++ stodo = min(len, sgs->length); ++ if (stodo % bs) { ++ return true; ++ } ++ dtodo = min(len, sgd->length); ++ if (dtodo % bs) { ++ return true; ++ } ++ if (stodo != dtodo) { ++ return true; ++ } ++ len -= stodo; ++ sgs = sg_next(sgs); ++ sgd = sg_next(sgd); ++ } ++ return false; ++} ++ ++static int rk_cipher_fallback(struct skcipher_request *areq) ++{ ++ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq); ++ struct rk_cipher_ctx *op = crypto_skcipher_ctx(tfm); ++ struct rk_cipher_rctx *rctx = skcipher_request_ctx(areq); ++ int err; ++ ++ skcipher_request_set_tfm(&rctx->fallback_req, op->fallback_tfm); ++ skcipher_request_set_callback(&rctx->fallback_req, areq->base.flags, ++ areq->base.complete, areq->base.data); ++ skcipher_request_set_crypt(&rctx->fallback_req, areq->src, areq->dst, ++ areq->cryptlen, areq->iv); ++ if (rctx->mode & RK_CRYPTO_DEC) ++ err = crypto_skcipher_decrypt(&rctx->fallback_req); ++ else ++ err = crypto_skcipher_encrypt(&rctx->fallback_req); ++ return err; + } + + static int rk_handle_req(struct rk_crypto_info *dev, + struct skcipher_request *req) + { +- if (!IS_ALIGNED(req->cryptlen, dev->align_size)) +- return -EINVAL; +- else +- return dev->enqueue(dev, &req->base); ++ struct crypto_engine *engine = dev->engine; ++ ++ if (rk_cipher_need_fallback(req)) ++ return rk_cipher_fallback(req); ++ ++ return crypto_transfer_skcipher_request_to_engine(engine, req); + } + + static int rk_aes_setkey(struct crypto_skcipher *cipher, +@@ -38,8 +92,9 @@ static int rk_aes_setkey(struct crypto_skcipher *cipher, + keylen != AES_KEYSIZE_256) + return -EINVAL; + ctx->keylen = keylen; +- memcpy_toio(ctx->dev->reg + RK_CRYPTO_AES_KEY_0, key, keylen); +- return 0; ++ memcpy(ctx->key, key, keylen); ++ ++ return crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen); + } + + static int rk_des_setkey(struct crypto_skcipher *cipher, +@@ -53,8 +108,9 @@ static int rk_des_setkey(struct crypto_skcipher *cipher, + return err; + + ctx->keylen = keylen; +- memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_KEY1_0, key, keylen); +- return 0; ++ memcpy(ctx->key, key, keylen); ++ ++ return crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen); + } + + static int rk_tdes_setkey(struct crypto_skcipher *cipher, +@@ -68,17 +124,19 @@ static int rk_tdes_setkey(struct crypto_skcipher *cipher, + return err; + + ctx->keylen = keylen; +- memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_KEY1_0, key, keylen); +- return 0; ++ memcpy(ctx->key, key, keylen); ++ ++ return crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen); + } + + static int rk_aes_ecb_encrypt(struct skcipher_request *req) + { + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); ++ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); + struct rk_crypto_info *dev = ctx->dev; + +- ctx->mode = RK_CRYPTO_AES_ECB_MODE; ++ rctx->mode = RK_CRYPTO_AES_ECB_MODE; + return rk_handle_req(dev, req); + } + +@@ -86,9 +144,10 @@ static int rk_aes_ecb_decrypt(struct skcipher_request *req) + { + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); ++ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); + struct rk_crypto_info *dev = ctx->dev; + +- ctx->mode = RK_CRYPTO_AES_ECB_MODE | RK_CRYPTO_DEC; ++ rctx->mode = RK_CRYPTO_AES_ECB_MODE | RK_CRYPTO_DEC; + return rk_handle_req(dev, req); + } + +@@ -96,9 +155,10 @@ static int rk_aes_cbc_encrypt(struct skcipher_request *req) + { + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); ++ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); + struct rk_crypto_info *dev = ctx->dev; + +- ctx->mode = RK_CRYPTO_AES_CBC_MODE; ++ rctx->mode = RK_CRYPTO_AES_CBC_MODE; + return rk_handle_req(dev, req); + } + +@@ -106,9 +166,10 @@ static int rk_aes_cbc_decrypt(struct skcipher_request *req) + { + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); ++ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); + struct rk_crypto_info *dev = ctx->dev; + +- ctx->mode = RK_CRYPTO_AES_CBC_MODE | RK_CRYPTO_DEC; ++ rctx->mode = RK_CRYPTO_AES_CBC_MODE | RK_CRYPTO_DEC; + return rk_handle_req(dev, req); + } + +@@ -116,9 +177,10 @@ static int rk_des_ecb_encrypt(struct skcipher_request *req) + { + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); ++ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); + struct rk_crypto_info *dev = ctx->dev; + +- ctx->mode = 0; ++ rctx->mode = 0; + return rk_handle_req(dev, req); + } + +@@ -126,9 +188,10 @@ static int rk_des_ecb_decrypt(struct skcipher_request *req) + { + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); ++ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); + struct rk_crypto_info *dev = ctx->dev; + +- ctx->mode = RK_CRYPTO_DEC; ++ rctx->mode = RK_CRYPTO_DEC; + return rk_handle_req(dev, req); + } + +@@ -136,9 +199,10 @@ static int rk_des_cbc_encrypt(struct skcipher_request *req) + { + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); ++ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); + struct rk_crypto_info *dev = ctx->dev; + +- ctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC; ++ rctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC; + return rk_handle_req(dev, req); + } + +@@ -146,9 +210,10 @@ static int rk_des_cbc_decrypt(struct skcipher_request *req) + { + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); ++ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); + struct rk_crypto_info *dev = ctx->dev; + +- ctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC | RK_CRYPTO_DEC; ++ rctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC | RK_CRYPTO_DEC; + return rk_handle_req(dev, req); + } + +@@ -156,9 +221,10 @@ static int rk_des3_ede_ecb_encrypt(struct skcipher_request *req) + { + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); ++ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); + struct rk_crypto_info *dev = ctx->dev; + +- ctx->mode = RK_CRYPTO_TDES_SELECT; ++ rctx->mode = RK_CRYPTO_TDES_SELECT; + return rk_handle_req(dev, req); + } + +@@ -166,9 +232,10 @@ static int rk_des3_ede_ecb_decrypt(struct skcipher_request *req) + { + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); ++ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); + struct rk_crypto_info *dev = ctx->dev; + +- ctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_DEC; ++ rctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_DEC; + return rk_handle_req(dev, req); + } + +@@ -176,9 +243,10 @@ static int rk_des3_ede_cbc_encrypt(struct skcipher_request *req) + { + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); ++ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); + struct rk_crypto_info *dev = ctx->dev; + +- ctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC; ++ rctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC; + return rk_handle_req(dev, req); + } + +@@ -186,43 +254,42 @@ static int rk_des3_ede_cbc_decrypt(struct skcipher_request *req) + { + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); ++ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); + struct rk_crypto_info *dev = ctx->dev; + +- ctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC | ++ rctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC | + RK_CRYPTO_DEC; + return rk_handle_req(dev, req); + } + +-static void rk_ablk_hw_init(struct rk_crypto_info *dev) ++static void rk_ablk_hw_init(struct rk_crypto_info *dev, struct skcipher_request *req) + { +- struct skcipher_request *req = +- skcipher_request_cast(dev->async_req); + struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req); + struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher); ++ struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); + struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(cipher); +- u32 ivsize, block, conf_reg = 0; ++ u32 block, conf_reg = 0; + + block = crypto_tfm_alg_blocksize(tfm); +- ivsize = crypto_skcipher_ivsize(cipher); + + if (block == DES_BLOCK_SIZE) { +- ctx->mode |= RK_CRYPTO_TDES_FIFO_MODE | ++ rctx->mode |= RK_CRYPTO_TDES_FIFO_MODE | + RK_CRYPTO_TDES_BYTESWAP_KEY | + RK_CRYPTO_TDES_BYTESWAP_IV; +- CRYPTO_WRITE(dev, RK_CRYPTO_TDES_CTRL, ctx->mode); +- memcpy_toio(dev->reg + RK_CRYPTO_TDES_IV_0, req->iv, ivsize); ++ CRYPTO_WRITE(dev, RK_CRYPTO_TDES_CTRL, rctx->mode); ++ memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_KEY1_0, ctx->key, ctx->keylen); + conf_reg = RK_CRYPTO_DESSEL; + } else { +- ctx->mode |= RK_CRYPTO_AES_FIFO_MODE | ++ rctx->mode |= RK_CRYPTO_AES_FIFO_MODE | + RK_CRYPTO_AES_KEY_CHANGE | + RK_CRYPTO_AES_BYTESWAP_KEY | + RK_CRYPTO_AES_BYTESWAP_IV; + if (ctx->keylen == AES_KEYSIZE_192) +- ctx->mode |= RK_CRYPTO_AES_192BIT_key; ++ rctx->mode |= RK_CRYPTO_AES_192BIT_key; + else if (ctx->keylen == AES_KEYSIZE_256) +- ctx->mode |= RK_CRYPTO_AES_256BIT_key; +- CRYPTO_WRITE(dev, RK_CRYPTO_AES_CTRL, ctx->mode); +- memcpy_toio(dev->reg + RK_CRYPTO_AES_IV_0, req->iv, ivsize); ++ rctx->mode |= RK_CRYPTO_AES_256BIT_key; ++ CRYPTO_WRITE(dev, RK_CRYPTO_AES_CTRL, rctx->mode); ++ memcpy_toio(ctx->dev->reg + RK_CRYPTO_AES_KEY_0, ctx->key, ctx->keylen); + } + conf_reg |= RK_CRYPTO_BYTESWAP_BTFIFO | + RK_CRYPTO_BYTESWAP_BRFIFO; +@@ -231,146 +298,138 @@ static void rk_ablk_hw_init(struct rk_crypto_info *dev) + RK_CRYPTO_BCDMA_ERR_ENA | RK_CRYPTO_BCDMA_DONE_ENA); + } + +-static void crypto_dma_start(struct rk_crypto_info *dev) ++static void crypto_dma_start(struct rk_crypto_info *dev, ++ struct scatterlist *sgs, ++ struct scatterlist *sgd, unsigned int todo) + { +- CRYPTO_WRITE(dev, RK_CRYPTO_BRDMAS, dev->addr_in); +- CRYPTO_WRITE(dev, RK_CRYPTO_BRDMAL, dev->count / 4); +- CRYPTO_WRITE(dev, RK_CRYPTO_BTDMAS, dev->addr_out); ++ CRYPTO_WRITE(dev, RK_CRYPTO_BRDMAS, sg_dma_address(sgs)); ++ CRYPTO_WRITE(dev, RK_CRYPTO_BRDMAL, todo); ++ CRYPTO_WRITE(dev, RK_CRYPTO_BTDMAS, sg_dma_address(sgd)); + CRYPTO_WRITE(dev, RK_CRYPTO_CTRL, RK_CRYPTO_BLOCK_START | + _SBF(RK_CRYPTO_BLOCK_START, 16)); + } + +-static int rk_set_data_start(struct rk_crypto_info *dev) ++static int rk_cipher_run(struct crypto_engine *engine, void *async_req) + { +- int err; +- struct skcipher_request *req = +- skcipher_request_cast(dev->async_req); +- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); ++ struct skcipher_request *areq = container_of(async_req, struct skcipher_request, base); ++ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq); + struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); +- u32 ivsize = crypto_skcipher_ivsize(tfm); +- u8 *src_last_blk = page_address(sg_page(dev->sg_src)) + +- dev->sg_src->offset + dev->sg_src->length - ivsize; +- +- /* Store the iv that need to be updated in chain mode. +- * And update the IV buffer to contain the next IV for decryption mode. +- */ +- if (ctx->mode & RK_CRYPTO_DEC) { +- memcpy(ctx->iv, src_last_blk, ivsize); +- sg_pcopy_to_buffer(dev->first, dev->src_nents, req->iv, +- ivsize, dev->total - ivsize); +- } +- +- err = dev->load_data(dev, dev->sg_src, dev->sg_dst); +- if (!err) +- crypto_dma_start(dev); +- return err; +-} +- +-static int rk_ablk_start(struct rk_crypto_info *dev) +-{ +- struct skcipher_request *req = +- skcipher_request_cast(dev->async_req); +- unsigned long flags; ++ struct rk_cipher_rctx *rctx = skcipher_request_ctx(areq); ++ struct scatterlist *sgs, *sgd; + int err = 0; ++ int ivsize = crypto_skcipher_ivsize(tfm); ++ int offset; ++ u8 iv[AES_BLOCK_SIZE]; ++ u8 biv[AES_BLOCK_SIZE]; ++ u8 *ivtouse = areq->iv; ++ unsigned int len = areq->cryptlen; ++ unsigned int todo; ++ ++ ivsize = crypto_skcipher_ivsize(tfm); ++ if (areq->iv && crypto_skcipher_ivsize(tfm) > 0) { ++ if (rctx->mode & RK_CRYPTO_DEC) { ++ offset = areq->cryptlen - ivsize; ++ scatterwalk_map_and_copy(rctx->backup_iv, areq->src, ++ offset, ivsize, 0); ++ } ++ } + +- dev->left_bytes = req->cryptlen; +- dev->total = req->cryptlen; +- dev->sg_src = req->src; +- dev->first = req->src; +- dev->src_nents = sg_nents(req->src); +- dev->sg_dst = req->dst; +- dev->dst_nents = sg_nents(req->dst); +- dev->aligned = 1; +- +- spin_lock_irqsave(&dev->lock, flags); +- rk_ablk_hw_init(dev); +- err = rk_set_data_start(dev); +- spin_unlock_irqrestore(&dev->lock, flags); +- return err; +-} +- +-static void rk_iv_copyback(struct rk_crypto_info *dev) +-{ +- struct skcipher_request *req = +- skcipher_request_cast(dev->async_req); +- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); +- struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); +- u32 ivsize = crypto_skcipher_ivsize(tfm); ++ sgs = areq->src; ++ sgd = areq->dst; + +- /* Update the IV buffer to contain the next IV for encryption mode. */ +- if (!(ctx->mode & RK_CRYPTO_DEC)) { +- if (dev->aligned) { +- memcpy(req->iv, sg_virt(dev->sg_dst) + +- dev->sg_dst->length - ivsize, ivsize); ++ while (sgs && sgd && len) { ++ if (!sgs->length) { ++ sgs = sg_next(sgs); ++ sgd = sg_next(sgd); ++ continue; ++ } ++ if (rctx->mode & RK_CRYPTO_DEC) { ++ /* we backup last block of source to be used as IV at next step */ ++ offset = sgs->length - ivsize; ++ scatterwalk_map_and_copy(biv, sgs, offset, ivsize, 0); ++ } ++ if (sgs == sgd) { ++ err = dma_map_sg(ctx->dev->dev, sgs, 1, DMA_BIDIRECTIONAL); ++ if (err <= 0) { ++ err = -EINVAL; ++ goto theend_iv; ++ } + } else { +- memcpy(req->iv, dev->addr_vir + +- dev->count - ivsize, ivsize); ++ err = dma_map_sg(ctx->dev->dev, sgs, 1, DMA_TO_DEVICE); ++ if (err <= 0) { ++ err = -EINVAL; ++ goto theend_iv; ++ } ++ err = dma_map_sg(ctx->dev->dev, sgd, 1, DMA_FROM_DEVICE); ++ if (err <= 0) { ++ err = -EINVAL; ++ goto theend_sgs; ++ } ++ } ++ err = 0; ++ rk_ablk_hw_init(ctx->dev, areq); ++ if (ivsize) { ++ if (ivsize == DES_BLOCK_SIZE) ++ memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_IV_0, ivtouse, ivsize); ++ else ++ memcpy_toio(ctx->dev->reg + RK_CRYPTO_AES_IV_0, ivtouse, ivsize); ++ } ++ reinit_completion(&ctx->dev->complete); ++ ctx->dev->status = 0; ++ ++ todo = min(sg_dma_len(sgs), len); ++ len -= todo; ++ crypto_dma_start(ctx->dev, sgs, sgd, todo / 4); ++ wait_for_completion_interruptible_timeout(&ctx->dev->complete, ++ msecs_to_jiffies(2000)); ++ if (!ctx->dev->status) { ++ dev_err(ctx->dev->dev, "DMA timeout\n"); ++ err = -EFAULT; ++ goto theend; + } ++ if (sgs == sgd) { ++ dma_unmap_sg(ctx->dev->dev, sgs, 1, DMA_BIDIRECTIONAL); ++ } else { ++ dma_unmap_sg(ctx->dev->dev, sgs, 1, DMA_TO_DEVICE); ++ dma_unmap_sg(ctx->dev->dev, sgd, 1, DMA_FROM_DEVICE); ++ } ++ if (rctx->mode & RK_CRYPTO_DEC) { ++ memcpy(iv, biv, ivsize); ++ ivtouse = iv; ++ } else { ++ offset = sgd->length - ivsize; ++ scatterwalk_map_and_copy(iv, sgd, offset, ivsize, 0); ++ ivtouse = iv; ++ } ++ sgs = sg_next(sgs); ++ sgd = sg_next(sgd); + } +-} +- +-static void rk_update_iv(struct rk_crypto_info *dev) +-{ +- struct skcipher_request *req = +- skcipher_request_cast(dev->async_req); +- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); +- struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); +- u32 ivsize = crypto_skcipher_ivsize(tfm); +- u8 *new_iv = NULL; + +- if (ctx->mode & RK_CRYPTO_DEC) { +- new_iv = ctx->iv; +- } else { +- new_iv = page_address(sg_page(dev->sg_dst)) + +- dev->sg_dst->offset + dev->sg_dst->length - ivsize; ++ if (areq->iv && ivsize > 0) { ++ offset = areq->cryptlen - ivsize; ++ if (rctx->mode & RK_CRYPTO_DEC) { ++ memcpy(areq->iv, rctx->backup_iv, ivsize); ++ memzero_explicit(rctx->backup_iv, ivsize); ++ } else { ++ scatterwalk_map_and_copy(areq->iv, areq->dst, offset, ++ ivsize, 0); ++ } + } + +- if (ivsize == DES_BLOCK_SIZE) +- memcpy_toio(dev->reg + RK_CRYPTO_TDES_IV_0, new_iv, ivsize); +- else if (ivsize == AES_BLOCK_SIZE) +- memcpy_toio(dev->reg + RK_CRYPTO_AES_IV_0, new_iv, ivsize); +-} ++theend: ++ local_bh_disable(); ++ crypto_finalize_skcipher_request(engine, areq, err); ++ local_bh_enable(); ++ return 0; + +-/* return: +- * true some err was occurred +- * fault no err, continue +- */ +-static int rk_ablk_rx(struct rk_crypto_info *dev) +-{ +- int err = 0; +- struct skcipher_request *req = +- skcipher_request_cast(dev->async_req); +- +- dev->unload_data(dev); +- if (!dev->aligned) { +- if (!sg_pcopy_from_buffer(req->dst, dev->dst_nents, +- dev->addr_vir, dev->count, +- dev->total - dev->left_bytes - +- dev->count)) { +- err = -EINVAL; +- goto out_rx; +- } +- } +- if (dev->left_bytes) { +- rk_update_iv(dev); +- if (dev->aligned) { +- if (sg_is_last(dev->sg_src)) { +- dev_err(dev->dev, "[%s:%d] Lack of data\n", +- __func__, __LINE__); +- err = -ENOMEM; +- goto out_rx; +- } +- dev->sg_src = sg_next(dev->sg_src); +- dev->sg_dst = sg_next(dev->sg_dst); +- } +- err = rk_set_data_start(dev); ++theend_sgs: ++ if (sgs == sgd) { ++ dma_unmap_sg(ctx->dev->dev, sgs, 1, DMA_BIDIRECTIONAL); + } else { +- rk_iv_copyback(dev); +- /* here show the calculation is over without any err */ +- dev->complete(dev->async_req, 0); +- tasklet_schedule(&dev->queue_task); ++ dma_unmap_sg(ctx->dev->dev, sgs, 1, DMA_TO_DEVICE); ++ dma_unmap_sg(ctx->dev->dev, sgd, 1, DMA_FROM_DEVICE); + } +-out_rx: ++theend_iv: + return err; + } + +@@ -378,26 +437,34 @@ static int rk_ablk_init_tfm(struct crypto_skcipher *tfm) + { + struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_alg *alg = crypto_skcipher_alg(tfm); ++ const char *name = crypto_tfm_alg_name(&tfm->base); + struct rk_crypto_tmp *algt; + + algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher); + + ctx->dev = algt->dev; +- ctx->dev->align_size = crypto_tfm_alg_alignmask(crypto_skcipher_tfm(tfm)) + 1; +- ctx->dev->start = rk_ablk_start; +- ctx->dev->update = rk_ablk_rx; +- ctx->dev->complete = rk_crypto_complete; +- ctx->dev->addr_vir = (char *)__get_free_page(GFP_KERNEL); + +- return ctx->dev->addr_vir ? ctx->dev->enable_clk(ctx->dev) : -ENOMEM; ++ ctx->fallback_tfm = crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK); ++ if (IS_ERR(ctx->fallback_tfm)) { ++ dev_err(ctx->dev->dev, "ERROR: Cannot allocate fallback for %s %ld\n", ++ name, PTR_ERR(ctx->fallback_tfm)); ++ return PTR_ERR(ctx->fallback_tfm); ++ } ++ ++ tfm->reqsize = sizeof(struct rk_cipher_rctx) + ++ crypto_skcipher_reqsize(ctx->fallback_tfm); ++ ++ ctx->enginectx.op.do_one_request = rk_cipher_run; ++ ++ return 0; + } + + static void rk_ablk_exit_tfm(struct crypto_skcipher *tfm) + { + struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); + +- free_page((unsigned long)ctx->dev->addr_vir); +- ctx->dev->disable_clk(ctx->dev); ++ memzero_explicit(ctx->key, ctx->keylen); ++ crypto_free_skcipher(ctx->fallback_tfm); + } + + struct rk_crypto_tmp rk_ecb_aes_alg = { +@@ -406,7 +473,7 @@ struct rk_crypto_tmp rk_ecb_aes_alg = { + .base.cra_name = "ecb(aes)", + .base.cra_driver_name = "ecb-aes-rk", + .base.cra_priority = 300, +- .base.cra_flags = CRYPTO_ALG_ASYNC, ++ .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct rk_cipher_ctx), + .base.cra_alignmask = 0x0f, +@@ -428,7 +495,7 @@ struct rk_crypto_tmp rk_cbc_aes_alg = { + .base.cra_name = "cbc(aes)", + .base.cra_driver_name = "cbc-aes-rk", + .base.cra_priority = 300, +- .base.cra_flags = CRYPTO_ALG_ASYNC, ++ .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct rk_cipher_ctx), + .base.cra_alignmask = 0x0f, +@@ -451,7 +518,7 @@ struct rk_crypto_tmp rk_ecb_des_alg = { + .base.cra_name = "ecb(des)", + .base.cra_driver_name = "ecb-des-rk", + .base.cra_priority = 300, +- .base.cra_flags = CRYPTO_ALG_ASYNC, ++ .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, + .base.cra_blocksize = DES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct rk_cipher_ctx), + .base.cra_alignmask = 0x07, +@@ -473,7 +540,7 @@ struct rk_crypto_tmp rk_cbc_des_alg = { + .base.cra_name = "cbc(des)", + .base.cra_driver_name = "cbc-des-rk", + .base.cra_priority = 300, +- .base.cra_flags = CRYPTO_ALG_ASYNC, ++ .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, + .base.cra_blocksize = DES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct rk_cipher_ctx), + .base.cra_alignmask = 0x07, +@@ -496,7 +563,7 @@ struct rk_crypto_tmp rk_ecb_des3_ede_alg = { + .base.cra_name = "ecb(des3_ede)", + .base.cra_driver_name = "ecb-des3-ede-rk", + .base.cra_priority = 300, +- .base.cra_flags = CRYPTO_ALG_ASYNC, ++ .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, + .base.cra_blocksize = DES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct rk_cipher_ctx), + .base.cra_alignmask = 0x07, +@@ -506,7 +573,6 @@ struct rk_crypto_tmp rk_ecb_des3_ede_alg = { .exit = rk_ablk_exit_tfm, .min_keysize = DES3_EDE_KEY_SIZE, .max_keysize = DES3_EDE_KEY_SIZE, @@ -86362,6 +105048,15 @@ index 1cece1a7d3f00..5bbf0d2722e11 100644 .setkey = rk_tdes_setkey, .encrypt = rk_des3_ede_ecb_encrypt, .decrypt = rk_des3_ede_ecb_decrypt, +@@ -519,7 +585,7 @@ struct rk_crypto_tmp rk_cbc_des3_ede_alg = { + .base.cra_name = "cbc(des3_ede)", + .base.cra_driver_name = "cbc-des3-ede-rk", + .base.cra_priority = 300, +- .base.cra_flags = CRYPTO_ALG_ASYNC, ++ .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, + .base.cra_blocksize = DES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct rk_cipher_ctx), + .base.cra_alignmask = 0x07, diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c index 55aa3a71169b0..7717e9e5977bb 100644 --- a/drivers/crypto/s5p-sss.c @@ -88099,6 +106794,30 @@ index fc89e91beea7c..7610e4a9ac4e2 100644 kmem_cache_destroy(dax_cache); } +diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c +index 85faa7a5c7d12..a473b640c40ae 100644 +--- a/drivers/devfreq/devfreq.c ++++ b/drivers/devfreq/devfreq.c +@@ -775,8 +775,7 @@ static void remove_sysfs_files(struct devfreq *devfreq, + * @dev: the device to add devfreq feature. + * @profile: device-specific profile to run devfreq. + * @governor_name: name of the policy to choose frequency. +- * @data: private data for the governor. The devfreq framework does not +- * touch this value. ++ * @data: devfreq driver pass to governors, governor should not change it. + */ + struct devfreq *devfreq_add_device(struct device *dev, + struct devfreq_dev_profile *profile, +@@ -1003,8 +1002,7 @@ static void devm_devfreq_dev_release(struct device *dev, void *res) + * @dev: the device to add devfreq feature. + * @profile: device-specific profile to run devfreq. + * @governor_name: name of the policy to choose frequency. +- * @data: private data for the governor. The devfreq framework does not +- * touch this value. ++ * @data: devfreq driver pass to governors, governor should not change it. + * + * This function manages automatically the memory of devfreq device using device + * resource management and simplify the free operation for memory of devfreq diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c index 17ed980d90998..d6da9c3e31067 100644 --- a/drivers/devfreq/event/exynos-ppmu.c @@ -88125,6 +106844,57 @@ index 17ed980d90998..d6da9c3e31067 100644 j = 0; for_each_child_of_node(events_np, node) { +diff --git a/drivers/devfreq/governor_userspace.c b/drivers/devfreq/governor_userspace.c +index ab9db7adb3ade..d69672ccacc49 100644 +--- a/drivers/devfreq/governor_userspace.c ++++ b/drivers/devfreq/governor_userspace.c +@@ -21,7 +21,7 @@ struct userspace_data { + + static int devfreq_userspace_func(struct devfreq *df, unsigned long *freq) + { +- struct userspace_data *data = df->data; ++ struct userspace_data *data = df->governor_data; + + if (data->valid) + *freq = data->user_frequency; +@@ -40,7 +40,7 @@ static ssize_t set_freq_store(struct device *dev, struct device_attribute *attr, + int err = 0; + + mutex_lock(&devfreq->lock); +- data = devfreq->data; ++ data = devfreq->governor_data; + + sscanf(buf, "%lu", &wanted); + data->user_frequency = wanted; +@@ -60,7 +60,7 @@ static ssize_t set_freq_show(struct device *dev, + int err = 0; + + mutex_lock(&devfreq->lock); +- data = devfreq->data; ++ data = devfreq->governor_data; + + if (data->valid) + err = sprintf(buf, "%lu\n", data->user_frequency); +@@ -91,7 +91,7 @@ static int userspace_init(struct devfreq *devfreq) + goto out; + } + data->valid = false; +- devfreq->data = data; ++ devfreq->governor_data = data; + + err = sysfs_create_group(&devfreq->dev.kobj, &dev_attr_group); + out: +@@ -107,8 +107,8 @@ static void userspace_exit(struct devfreq *devfreq) + if (devfreq->dev.kobj.sd) + sysfs_remove_group(&devfreq->dev.kobj, &dev_attr_group); + +- kfree(devfreq->data); +- devfreq->data = NULL; ++ kfree(devfreq->governor_data); ++ devfreq->governor_data = NULL; + } + + static int devfreq_userspace_handler(struct devfreq *devfreq, diff --git a/drivers/devfreq/rk3399_dmc.c b/drivers/devfreq/rk3399_dmc.c index 293857ebfd75d..538e8dc74f40a 100644 --- a/drivers/devfreq/rk3399_dmc.c @@ -88138,6 +106908,39 @@ index 293857ebfd75d..538e8dc74f40a 100644 /* * Before remove the opp table we need to unregister the opp notifier. */ +diff --git a/drivers/dio/dio.c b/drivers/dio/dio.c +index 4c06c93c93d32..c7f7134adc21d 100644 +--- a/drivers/dio/dio.c ++++ b/drivers/dio/dio.c +@@ -110,6 +110,12 @@ static char dio_no_name[] = { 0 }; + + #endif /* CONFIG_DIO_CONSTANTS */ + ++static void dio_dev_release(struct device *dev) ++{ ++ struct dio_dev *ddev = container_of(dev, typeof(struct dio_dev), dev); ++ kfree(ddev); ++} ++ + int __init dio_find(int deviceid) + { + /* Called to find a DIO device before the full bus scan has run. +@@ -224,6 +230,7 @@ static int __init dio_init(void) + dev->bus = &dio_bus; + dev->dev.parent = &dio_bus.dev; + dev->dev.bus = &dio_bus_type; ++ dev->dev.release = dio_dev_release; + dev->scode = scode; + dev->resource.start = pa; + dev->resource.end = pa + DIO_SIZE(scode, va); +@@ -251,6 +258,7 @@ static int __init dio_init(void) + if (error) { + pr_err("DIO: Error registering device %s\n", + dev->name); ++ put_device(&dev->dev); + continue; + } + error = dio_create_sysfs_dev_files(dev); diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 63d32261b63ff..968c3df2810e6 100644 --- a/drivers/dma-buf/dma-buf.c @@ -88443,7 +107246,7 @@ index d3fbd950be944..3e07f961e2f3d 100644 static void dma_fence_array_release(struct dma_fence *fence) diff --git a/drivers/dma-buf/dma-heap.c b/drivers/dma-buf/dma-heap.c -index 56bf5ad01ad54..8f5848aa144fe 100644 +index 56bf5ad01ad54..59d158873f4cb 100644 --- a/drivers/dma-buf/dma-heap.c +++ b/drivers/dma-buf/dma-heap.c @@ -14,6 +14,7 @@ @@ -88462,6 +107265,54 @@ index 56bf5ad01ad54..8f5848aa144fe 100644 /* Get the kernel ioctl cmd that matches */ kcmd = dma_heap_ioctl_cmds[nr]; +@@ -231,18 +233,6 @@ struct dma_heap *dma_heap_add(const struct dma_heap_export_info *exp_info) + return ERR_PTR(-EINVAL); + } + +- /* check the name is unique */ +- mutex_lock(&heap_list_lock); +- list_for_each_entry(h, &heap_list, list) { +- if (!strcmp(h->name, exp_info->name)) { +- mutex_unlock(&heap_list_lock); +- pr_err("dma_heap: Already registered heap named %s\n", +- exp_info->name); +- return ERR_PTR(-EINVAL); +- } +- } +- mutex_unlock(&heap_list_lock); +- + heap = kzalloc(sizeof(*heap), GFP_KERNEL); + if (!heap) + return ERR_PTR(-ENOMEM); +@@ -281,13 +271,27 @@ struct dma_heap *dma_heap_add(const struct dma_heap_export_info *exp_info) + err_ret = ERR_CAST(dev_ret); + goto err2; + } +- /* Add heap to the list */ ++ + mutex_lock(&heap_list_lock); ++ /* check the name is unique */ ++ list_for_each_entry(h, &heap_list, list) { ++ if (!strcmp(h->name, exp_info->name)) { ++ mutex_unlock(&heap_list_lock); ++ pr_err("dma_heap: Already registered heap named %s\n", ++ exp_info->name); ++ err_ret = ERR_PTR(-EINVAL); ++ goto err3; ++ } ++ } ++ ++ /* Add heap to the list */ + list_add(&heap->list, &heap_list); + mutex_unlock(&heap_list_lock); + + return heap; + ++err3: ++ device_destroy(dma_heap_class, heap->heap_devt); + err2: + cdev_del(&heap->heap_cdev); + err1: diff --git a/drivers/dma-buf/heaps/cma_heap.c b/drivers/dma-buf/heaps/cma_heap.c index 0c05b79870f96..83f02bd51dda6 100644 --- a/drivers/dma-buf/heaps/cma_heap.c @@ -88582,6 +107433,322 @@ index c57a609db75be..bf11d32205f38 100644 } static void __exit udmabuf_dev_exit(void) +diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c +index 30ae36124b1db..4583a8b5e5bd8 100644 +--- a/drivers/dma/at_hdmac.c ++++ b/drivers/dma/at_hdmac.c +@@ -256,6 +256,8 @@ static void atc_dostart(struct at_dma_chan *atchan, struct at_desc *first) + ATC_SPIP_BOUNDARY(first->boundary)); + channel_writel(atchan, DPIP, ATC_DPIP_HOLE(first->dst_hole) | + ATC_DPIP_BOUNDARY(first->boundary)); ++ /* Don't allow CPU to reorder channel enable. */ ++ wmb(); + dma_writel(atdma, CHER, atchan->mask); + + vdbg_dump_regs(atchan); +@@ -316,7 +318,8 @@ static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie) + struct at_desc *desc_first = atc_first_active(atchan); + struct at_desc *desc; + int ret; +- u32 ctrla, dscr, trials; ++ u32 ctrla, dscr; ++ unsigned int i; + + /* + * If the cookie doesn't match to the currently running transfer then +@@ -386,7 +389,7 @@ static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie) + dscr = channel_readl(atchan, DSCR); + rmb(); /* ensure DSCR is read before CTRLA */ + ctrla = channel_readl(atchan, CTRLA); +- for (trials = 0; trials < ATC_MAX_DSCR_TRIALS; ++trials) { ++ for (i = 0; i < ATC_MAX_DSCR_TRIALS; ++i) { + u32 new_dscr; + + rmb(); /* ensure DSCR is read after CTRLA */ +@@ -412,7 +415,7 @@ static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie) + rmb(); /* ensure DSCR is read before CTRLA */ + ctrla = channel_readl(atchan, CTRLA); + } +- if (unlikely(trials >= ATC_MAX_DSCR_TRIALS)) ++ if (unlikely(i == ATC_MAX_DSCR_TRIALS)) + return -ETIMEDOUT; + + /* for the first descriptor we can be more accurate */ +@@ -462,18 +465,6 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) + if (!atc_chan_is_cyclic(atchan)) + dma_cookie_complete(txd); + +- /* If the transfer was a memset, free our temporary buffer */ +- if (desc->memset_buffer) { +- dma_pool_free(atdma->memset_pool, desc->memset_vaddr, +- desc->memset_paddr); +- desc->memset_buffer = false; +- } +- +- /* move children to free_list */ +- list_splice_init(&desc->tx_list, &atchan->free_list); +- /* move myself to free_list */ +- list_move(&desc->desc_node, &atchan->free_list); +- + spin_unlock_irqrestore(&atchan->lock, flags); + + dma_descriptor_unmap(txd); +@@ -483,42 +474,20 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) + dmaengine_desc_get_callback_invoke(txd, NULL); + + dma_run_dependencies(txd); +-} +- +-/** +- * atc_complete_all - finish work for all transactions +- * @atchan: channel to complete transactions for +- * +- * Eventually submit queued descriptors if any +- * +- * Assume channel is idle while calling this function +- * Called with atchan->lock held and bh disabled +- */ +-static void atc_complete_all(struct at_dma_chan *atchan) +-{ +- struct at_desc *desc, *_desc; +- LIST_HEAD(list); +- unsigned long flags; +- +- dev_vdbg(chan2dev(&atchan->chan_common), "complete all\n"); + + spin_lock_irqsave(&atchan->lock, flags); +- +- /* +- * Submit queued descriptors ASAP, i.e. before we go through +- * the completed ones. +- */ +- if (!list_empty(&atchan->queue)) +- atc_dostart(atchan, atc_first_queued(atchan)); +- /* empty active_list now it is completed */ +- list_splice_init(&atchan->active_list, &list); +- /* empty queue list by moving descriptors (if any) to active_list */ +- list_splice_init(&atchan->queue, &atchan->active_list); +- ++ /* move children to free_list */ ++ list_splice_init(&desc->tx_list, &atchan->free_list); ++ /* add myself to free_list */ ++ list_add(&desc->desc_node, &atchan->free_list); + spin_unlock_irqrestore(&atchan->lock, flags); + +- list_for_each_entry_safe(desc, _desc, &list, desc_node) +- atc_chain_complete(atchan, desc); ++ /* If the transfer was a memset, free our temporary buffer */ ++ if (desc->memset_buffer) { ++ dma_pool_free(atdma->memset_pool, desc->memset_vaddr, ++ desc->memset_paddr); ++ desc->memset_buffer = false; ++ } + } + + /** +@@ -527,26 +496,28 @@ static void atc_complete_all(struct at_dma_chan *atchan) + */ + static void atc_advance_work(struct at_dma_chan *atchan) + { ++ struct at_desc *desc; + unsigned long flags; +- int ret; + + dev_vdbg(chan2dev(&atchan->chan_common), "advance_work\n"); + + spin_lock_irqsave(&atchan->lock, flags); +- ret = atc_chan_is_enabled(atchan); +- spin_unlock_irqrestore(&atchan->lock, flags); +- if (ret) +- return; +- +- if (list_empty(&atchan->active_list) || +- list_is_singular(&atchan->active_list)) +- return atc_complete_all(atchan); ++ if (atc_chan_is_enabled(atchan) || list_empty(&atchan->active_list)) ++ return spin_unlock_irqrestore(&atchan->lock, flags); + +- atc_chain_complete(atchan, atc_first_active(atchan)); ++ desc = atc_first_active(atchan); ++ /* Remove the transfer node from the active list. */ ++ list_del_init(&desc->desc_node); ++ spin_unlock_irqrestore(&atchan->lock, flags); ++ atc_chain_complete(atchan, desc); + + /* advance work */ + spin_lock_irqsave(&atchan->lock, flags); +- atc_dostart(atchan, atc_first_active(atchan)); ++ if (!list_empty(&atchan->active_list)) { ++ desc = atc_first_queued(atchan); ++ list_move_tail(&desc->desc_node, &atchan->active_list); ++ atc_dostart(atchan, desc); ++ } + spin_unlock_irqrestore(&atchan->lock, flags); + } + +@@ -558,6 +529,7 @@ static void atc_advance_work(struct at_dma_chan *atchan) + static void atc_handle_error(struct at_dma_chan *atchan) + { + struct at_desc *bad_desc; ++ struct at_desc *desc; + struct at_desc *child; + unsigned long flags; + +@@ -570,13 +542,12 @@ static void atc_handle_error(struct at_dma_chan *atchan) + bad_desc = atc_first_active(atchan); + list_del_init(&bad_desc->desc_node); + +- /* As we are stopped, take advantage to push queued descriptors +- * in active_list */ +- list_splice_init(&atchan->queue, atchan->active_list.prev); +- + /* Try to restart the controller */ +- if (!list_empty(&atchan->active_list)) +- atc_dostart(atchan, atc_first_active(atchan)); ++ if (!list_empty(&atchan->active_list)) { ++ desc = atc_first_queued(atchan); ++ list_move_tail(&desc->desc_node, &atchan->active_list); ++ atc_dostart(atchan, desc); ++ } + + /* + * KERN_CRITICAL may seem harsh, but since this only happens +@@ -691,19 +662,11 @@ static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx) + spin_lock_irqsave(&atchan->lock, flags); + cookie = dma_cookie_assign(tx); + +- if (list_empty(&atchan->active_list)) { +- dev_vdbg(chan2dev(tx->chan), "tx_submit: started %u\n", +- desc->txd.cookie); +- atc_dostart(atchan, desc); +- list_add_tail(&desc->desc_node, &atchan->active_list); +- } else { +- dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u\n", +- desc->txd.cookie); +- list_add_tail(&desc->desc_node, &atchan->queue); +- } +- ++ list_add_tail(&desc->desc_node, &atchan->queue); + spin_unlock_irqrestore(&atchan->lock, flags); + ++ dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u\n", ++ desc->txd.cookie); + return cookie; + } + +@@ -1437,11 +1400,8 @@ static int atc_terminate_all(struct dma_chan *chan) + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct at_dma *atdma = to_at_dma(chan->device); + int chan_id = atchan->chan_common.chan_id; +- struct at_desc *desc, *_desc; + unsigned long flags; + +- LIST_HEAD(list); +- + dev_vdbg(chan2dev(chan), "%s\n", __func__); + + /* +@@ -1460,19 +1420,15 @@ static int atc_terminate_all(struct dma_chan *chan) + cpu_relax(); + + /* active_list entries will end up before queued entries */ +- list_splice_init(&atchan->queue, &list); +- list_splice_init(&atchan->active_list, &list); +- +- spin_unlock_irqrestore(&atchan->lock, flags); +- +- /* Flush all pending and queued descriptors */ +- list_for_each_entry_safe(desc, _desc, &list, desc_node) +- atc_chain_complete(atchan, desc); ++ list_splice_tail_init(&atchan->queue, &atchan->free_list); ++ list_splice_tail_init(&atchan->active_list, &atchan->free_list); + + clear_bit(ATC_IS_PAUSED, &atchan->status); + /* if channel dedicated to cyclic operations, free it */ + clear_bit(ATC_IS_CYCLIC, &atchan->status); + ++ spin_unlock_irqrestore(&atchan->lock, flags); ++ + return 0; + } + +@@ -1527,20 +1483,26 @@ atc_tx_status(struct dma_chan *chan, + } + + /** +- * atc_issue_pending - try to finish work ++ * atc_issue_pending - takes the first transaction descriptor in the pending ++ * queue and starts the transfer. + * @chan: target DMA channel + */ + static void atc_issue_pending(struct dma_chan *chan) + { +- struct at_dma_chan *atchan = to_at_dma_chan(chan); ++ struct at_dma_chan *atchan = to_at_dma_chan(chan); ++ struct at_desc *desc; ++ unsigned long flags; + + dev_vdbg(chan2dev(chan), "issue_pending\n"); + +- /* Not needed for cyclic transfers */ +- if (atc_chan_is_cyclic(atchan)) +- return; ++ spin_lock_irqsave(&atchan->lock, flags); ++ if (atc_chan_is_enabled(atchan) || list_empty(&atchan->queue)) ++ return spin_unlock_irqrestore(&atchan->lock, flags); + +- atc_advance_work(atchan); ++ desc = atc_first_queued(atchan); ++ list_move_tail(&desc->desc_node, &atchan->active_list); ++ atc_dostart(atchan, desc); ++ spin_unlock_irqrestore(&atchan->lock, flags); + } + + /** +@@ -1958,7 +1920,11 @@ static int __init at_dma_probe(struct platform_device *pdev) + dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask) ? "slave " : "", + plat_dat->nr_channels); + +- dma_async_device_register(&atdma->dma_common); ++ err = dma_async_device_register(&atdma->dma_common); ++ if (err) { ++ dev_err(&pdev->dev, "Unable to register: %d.\n", err); ++ goto err_dma_async_device_register; ++ } + + /* + * Do not return an error if the dmac node is not present in order to +@@ -1978,6 +1944,7 @@ static int __init at_dma_probe(struct platform_device *pdev) + + err_of_dma_controller_register: + dma_async_device_unregister(&atdma->dma_common); ++err_dma_async_device_register: + dma_pool_destroy(atdma->memset_pool); + err_memset_pool_create: + dma_pool_destroy(atdma->dma_desc_pool); +diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h +index 4d1ebc040031c..d4d382d746078 100644 +--- a/drivers/dma/at_hdmac_regs.h ++++ b/drivers/dma/at_hdmac_regs.h +@@ -186,13 +186,13 @@ + /* LLI == Linked List Item; aka DMA buffer descriptor */ + struct at_lli { + /* values that are not changed by hardware */ +- dma_addr_t saddr; +- dma_addr_t daddr; ++ u32 saddr; ++ u32 daddr; + /* value that may get written back: */ +- u32 ctrla; ++ u32 ctrla; + /* more values that are not changed by hardware */ +- u32 ctrlb; +- dma_addr_t dscr; /* chain to next lli */ ++ u32 ctrlb; ++ u32 dscr; /* chain to next lli */ + }; + + /** diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index ab78e0f6afd70..c5638afe94368 100644 --- a/drivers/dma/at_xdmac.c @@ -88934,6 +108101,34 @@ index 906ddba6a6f5d..8a24a5cbc2633 100644 /* Configure some stuff */ bcom_set_task_pragma(tsk->tasknum, BCOM_GEN_TX_BD_PRAGMA); +diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c +index af3ee288bc117..4ec7bb58c195f 100644 +--- a/drivers/dma/dmaengine.c ++++ b/drivers/dma/dmaengine.c +@@ -451,7 +451,8 @@ static int dma_chan_get(struct dma_chan *chan) + /* The channel is already in use, update client count */ + if (chan->client_count) { + __module_get(owner); +- goto out; ++ chan->client_count++; ++ return 0; + } + + if (!try_module_get(owner)) +@@ -470,11 +471,11 @@ static int dma_chan_get(struct dma_chan *chan) + goto err_out; + } + ++ chan->client_count++; ++ + if (!dma_has_cap(DMA_PRIVATE, chan->device->cap_mask)) + balance_ref_count(chan); + +-out: +- chan->client_count++; + return 0; + + err_out: diff --git a/drivers/dma/dmaengine.h b/drivers/dma/dmaengine.h index 1bfbd64b13717..53f16d3f00294 100644 --- a/drivers/dma/dmaengine.h @@ -88948,7 +108143,7 @@ index 1bfbd64b13717..53f16d3f00294 100644 struct dma_chan *dma_get_slave_channel(struct dma_chan *chan); diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c -index 35993ab921547..48de8d2b32f2c 100644 +index 35993ab921547..41654b2f6c600 100644 --- a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c +++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c @@ -944,6 +944,11 @@ static int dw_axi_dma_chan_slave_config(struct dma_chan *dchan, @@ -88963,7 +108158,27 @@ index 35993ab921547..48de8d2b32f2c 100644 dev_err(dchan2dev(&chan->vc.chan), "SAR: 0x%llx DAR: 0x%llx LLP: 0x%llx BTS 0x%x CTL: 0x%x:%08x", le64_to_cpu(desc->lli->sar), -@@ -1011,6 +1016,11 @@ static void axi_chan_block_xfer_complete(struct axi_dma_chan *chan) +@@ -975,6 +980,11 @@ static noinline void axi_chan_handle_err(struct axi_dma_chan *chan, u32 status) + + /* The bad descriptor currently is in the head of vc list */ + vd = vchan_next_desc(&chan->vc); ++ if (!vd) { ++ dev_err(chan2dev(chan), "BUG: %s, IRQ with no descriptors\n", ++ axi_chan_name(chan)); ++ goto out; ++ } + /* Remove the completed descriptor from issued list */ + list_del(&vd->node); + +@@ -989,6 +999,7 @@ static noinline void axi_chan_handle_err(struct axi_dma_chan *chan, u32 status) + /* Try to restart the controller */ + axi_chan_start_first_queued(chan); + ++out: + spin_unlock_irqrestore(&chan->vc.lock, flags); + } + +@@ -1011,6 +1022,11 @@ static void axi_chan_block_xfer_complete(struct axi_dma_chan *chan) /* The completed descriptor currently is in the head of vc list */ vd = vchan_next_desc(&chan->vc); @@ -88975,7 +108190,7 @@ index 35993ab921547..48de8d2b32f2c 100644 if (chan->cyclic) { desc = vd_to_axi_desc(vd); -@@ -1040,6 +1050,7 @@ static void axi_chan_block_xfer_complete(struct axi_dma_chan *chan) +@@ -1040,6 +1056,7 @@ static void axi_chan_block_xfer_complete(struct axi_dma_chan *chan) axi_chan_start_first_queued(chan); } @@ -89132,7 +108347,7 @@ index b9b2b4a4124ee..033df43db0cec 100644 void idxd_cdev_remove(void) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c -index 83a5ff2ecf2a0..11d3f2aede711 100644 +index 83a5ff2ecf2a0..37b07c679c0ee 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -394,8 +394,6 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq) @@ -89261,6 +108476,16 @@ index 83a5ff2ecf2a0..11d3f2aede711 100644 for (i = 0; i < idxd->max_groups; i++) { struct idxd_group *group = idxd->groups[i]; +@@ -1236,8 +1248,7 @@ int __drv_enable_wq(struct idxd_wq *wq) + return 0; + + err_map_portal: +- rc = idxd_wq_disable(wq, false); +- if (rc < 0) ++ if (idxd_wq_disable(wq, false)) + dev_dbg(dev, "wq %s disable failed\n", dev_name(wq_confdev(wq))); + err: + return rc; diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index e0f056c1d1f56..29af898f3c242 100644 --- a/drivers/dma/idxd/dma.c @@ -89699,7 +108924,7 @@ index 2ddc31e64db03..da31e73d24d4c 100644 res = platform_get_resource(pdev, IORESOURCE_MEM, 0); imxdma->base = devm_ioremap_resource(&pdev->dev, res); diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c -index cacc725ca5459..5215a5e39f3c3 100644 +index cacc725ca5459..292f4c9a963dd 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -198,12 +198,12 @@ struct sdma_script_start_addrs { @@ -89728,7 +108953,21 @@ index cacc725ca5459..5215a5e39f3c3 100644 /* End of v3 array */ s32 mcu_2_zqspi_addr; /* End of v4 array */ -@@ -1780,17 +1780,17 @@ static void sdma_add_scripts(struct sdma_engine *sdma, +@@ -1428,10 +1428,12 @@ static struct sdma_desc *sdma_transfer_init(struct sdma_channel *sdmac, + sdma_config_ownership(sdmac, false, true, false); + + if (sdma_load_context(sdmac)) +- goto err_desc_out; ++ goto err_bd_out; + + return desc; + ++err_bd_out: ++ sdma_free_bd(desc); + err_desc_out: + kfree(desc); + err_out: +@@ -1780,17 +1782,17 @@ static void sdma_add_scripts(struct sdma_engine *sdma, saddr_arr[i] = addr_arr[i]; /* @@ -89756,7 +108995,7 @@ index cacc725ca5459..5215a5e39f3c3 100644 } static void sdma_load_firmware(const struct firmware *fw, void *context) -@@ -1869,7 +1869,7 @@ static int sdma_event_remap(struct sdma_engine *sdma) +@@ -1869,7 +1871,7 @@ static int sdma_event_remap(struct sdma_engine *sdma) u32 reg, val, shift, num_map, i; int ret = 0; @@ -89765,7 +109004,7 @@ index cacc725ca5459..5215a5e39f3c3 100644 goto out; event_remap = of_find_property(np, propname, NULL); -@@ -1917,7 +1917,7 @@ static int sdma_event_remap(struct sdma_engine *sdma) +@@ -1917,7 +1919,7 @@ static int sdma_event_remap(struct sdma_engine *sdma) } out: @@ -89774,7 +109013,7 @@ index cacc725ca5459..5215a5e39f3c3 100644 of_node_put(gpr_np); return ret; -@@ -2264,7 +2264,7 @@ MODULE_DESCRIPTION("i.MX SDMA driver"); +@@ -2264,7 +2266,7 @@ MODULE_DESCRIPTION("i.MX SDMA driver"); #if IS_ENABLED(CONFIG_SOC_IMX6Q) MODULE_FIRMWARE("imx/sdma/sdma-imx6q.bin"); #endif @@ -89815,9 +109054,18 @@ index 37ff4ec7db76f..e2070df6cad28 100644 static void ioat_reboot_chan(struct ioatdma_chan *ioat_chan) diff --git a/drivers/dma/lgm/lgm-dma.c b/drivers/dma/lgm/lgm-dma.c -index efe8bd3a0e2aa..9b9184f964be3 100644 +index efe8bd3a0e2aa..1709d159af7e0 100644 --- a/drivers/dma/lgm/lgm-dma.c +++ b/drivers/dma/lgm/lgm-dma.c +@@ -914,7 +914,7 @@ static void ldma_dev_init(struct ldma_dev *d) + } + } + +-static int ldma_cfg_init(struct ldma_dev *d) ++static int ldma_parse_dt(struct ldma_dev *d) + { + struct fwnode_handle *fwnode = dev_fwnode(d->dev); + struct ldma_port *p; @@ -1593,11 +1593,12 @@ static int intel_ldma_probe(struct platform_device *pdev) d->core_clk = devm_clk_get_optional(dev, NULL); if (IS_ERR(d->core_clk)) @@ -89832,6 +109080,28 @@ index efe8bd3a0e2aa..9b9184f964be3 100644 reset_control_deassert(d->rst); ret = devm_add_action_or_reset(dev, ldma_clk_disable, d); +@@ -1660,10 +1661,6 @@ static int intel_ldma_probe(struct platform_device *pdev) + p->ldev = d; + } + +- ret = ldma_cfg_init(d); +- if (ret) +- return ret; +- + dma_dev->dev = &pdev->dev; + + ch_mask = (unsigned long)d->channels_mask; +@@ -1674,6 +1671,10 @@ static int intel_ldma_probe(struct platform_device *pdev) + ldma_dma_init_v3X(j, d); + } + ++ ret = ldma_parse_dt(d); ++ if (ret) ++ return ret; ++ + dma_dev->device_alloc_chan_resources = ldma_alloc_chan_resources; + dma_dev->device_free_chan_resources = ldma_free_chan_resources; + dma_dev->device_terminate_all = ldma_terminate_all; diff --git a/drivers/dma/mediatek/mtk-uart-apdma.c b/drivers/dma/mediatek/mtk-uart-apdma.c index 375e7e647df6b..a1517ef1f4a01 100644 --- a/drivers/dma/mediatek/mtk-uart-apdma.c @@ -89884,6 +109154,18 @@ index 89f1814ff27a0..26d11885c50ec 100644 - if (cfg->slave_id) - chan->drcmr = cfg->slave_id; + return 0; + } +diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c +index 9b0d463f89bbd..4800c596433ad 100644 +--- a/drivers/dma/mv_xor_v2.c ++++ b/drivers/dma/mv_xor_v2.c +@@ -899,6 +899,7 @@ static int mv_xor_v2_remove(struct platform_device *pdev) + tasklet_kill(&xor_dev->irq_tasklet); + + clk_disable_unprepare(xor_dev->clk); ++ clk_disable_unprepare(xor_dev->reg_clk); + return 0; } diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c @@ -89935,10 +109217,43 @@ index 110de8a600588..4ef68ddff75bc 100644 if (!add_desc(&pool, &lock, GFP_ATOMIC, 1)) diff --git a/drivers/dma/ptdma/ptdma-dev.c b/drivers/dma/ptdma/ptdma-dev.c -index 8a6bf291a73fe..daafea5bc35d9 100644 +index 8a6bf291a73fe..bca4063b0dce4 100644 --- a/drivers/dma/ptdma/ptdma-dev.c +++ b/drivers/dma/ptdma/ptdma-dev.c -@@ -207,7 +207,7 @@ int pt_core_init(struct pt_device *pt) +@@ -71,12 +71,13 @@ static int pt_core_execute_cmd(struct ptdma_desc *desc, struct pt_cmd_queue *cmd + bool soc = FIELD_GET(DWORD0_SOC, desc->dw0); + u8 *q_desc = (u8 *)&cmd_q->qbase[cmd_q->qidx]; + u32 tail; ++ unsigned long flags; + + if (soc) { + desc->dw0 |= FIELD_PREP(DWORD0_IOC, desc->dw0); + desc->dw0 &= ~DWORD0_SOC; + } +- mutex_lock(&cmd_q->q_mutex); ++ spin_lock_irqsave(&cmd_q->q_lock, flags); + + /* Copy 32-byte command descriptor to hw queue. */ + memcpy(q_desc, desc, 32); +@@ -91,7 +92,7 @@ static int pt_core_execute_cmd(struct ptdma_desc *desc, struct pt_cmd_queue *cmd + + /* Turn the queue back on using our cached control register */ + pt_start_queue(cmd_q); +- mutex_unlock(&cmd_q->q_mutex); ++ spin_unlock_irqrestore(&cmd_q->q_lock, flags); + + return 0; + } +@@ -197,7 +198,7 @@ int pt_core_init(struct pt_device *pt) + + cmd_q->pt = pt; + cmd_q->dma_pool = dma_pool; +- mutex_init(&cmd_q->q_mutex); ++ spin_lock_init(&cmd_q->q_lock); + + /* Page alignment satisfies our needs for N <= 128 */ + cmd_q->qsize = Q_SIZE(Q_DESC_SIZE); +@@ -207,7 +208,7 @@ int pt_core_init(struct pt_device *pt) if (!cmd_q->qbase) { dev_err(dev, "unable to allocate command queue\n"); ret = -ENOMEM; @@ -89947,7 +109262,7 @@ index 8a6bf291a73fe..daafea5bc35d9 100644 } cmd_q->qidx = 0; -@@ -229,8 +229,10 @@ int pt_core_init(struct pt_device *pt) +@@ -229,8 +230,10 @@ int pt_core_init(struct pt_device *pt) /* Request an irq */ ret = request_irq(pt->pt_irq, pt_core_irq_handler, 0, dev_name(pt->dev), pt); @@ -89960,7 +109275,7 @@ index 8a6bf291a73fe..daafea5bc35d9 100644 /* Update the device registers with queue information. */ cmd_q->qcontrol &= ~CMD_Q_SIZE; -@@ -250,21 +252,20 @@ int pt_core_init(struct pt_device *pt) +@@ -250,21 +253,20 @@ int pt_core_init(struct pt_device *pt) /* Register the DMA engine support */ ret = pt_dmaengine_register(pt); if (ret) @@ -89986,8 +109301,21 @@ index 8a6bf291a73fe..daafea5bc35d9 100644 dma_pool_destroy(pt->cmd_q.dma_pool); return ret; +diff --git a/drivers/dma/ptdma/ptdma.h b/drivers/dma/ptdma/ptdma.h +index afbf192c92305..0f0b400a864e4 100644 +--- a/drivers/dma/ptdma/ptdma.h ++++ b/drivers/dma/ptdma/ptdma.h +@@ -196,7 +196,7 @@ struct pt_cmd_queue { + struct ptdma_desc *qbase; + + /* Aligned queue start address (per requirement) */ +- struct mutex q_mutex ____cacheline_aligned; ++ spinlock_t q_lock ____cacheline_aligned; + unsigned int qidx; + + unsigned int qsize; diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c -index 4a2a796e348c1..aa6e552249ab9 100644 +index 4a2a796e348c1..e613ace79ea83 100644 --- a/drivers/dma/pxa_dma.c +++ b/drivers/dma/pxa_dma.c @@ -910,13 +910,6 @@ static void pxad_get_config(struct pxad_chan *chan, @@ -90004,6 +109332,23 @@ index 4a2a796e348c1..aa6e552249ab9 100644 } static struct dma_async_tx_descriptor * +@@ -1255,14 +1248,14 @@ static int pxad_init_phys(struct platform_device *op, + return -ENOMEM; + + for (i = 0; i < nb_phy_chans; i++) +- if (platform_get_irq(op, i) > 0) ++ if (platform_get_irq_optional(op, i) > 0) + nr_irq++; + + for (i = 0; i < nb_phy_chans; i++) { + phy = &pdev->phys[i]; + phy->base = pdev->base; + phy->idx = i; +- irq = platform_get_irq(op, i); ++ irq = platform_get_irq_optional(op, i); + if ((nr_irq > 1) && (irq > 0)) + ret = devm_request_irq(&op->dev, irq, + pxad_chan_handler, diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c index c8a77b428b528..ca8c862c9747e 100644 --- a/drivers/dma/qcom/bam_dma.c @@ -90388,9 +109733,18 @@ index 18cbd1e43c2e8..21a7bdc88970a 100644 chan = &dmadev->chan[id]; if (!chan) { diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c -index b1115a6d1935c..d1dff3a29db59 100644 +index b1115a6d1935c..f4f722eacee2b 100644 --- a/drivers/dma/tegra210-adma.c +++ b/drivers/dma/tegra210-adma.c +@@ -224,7 +224,7 @@ static int tegra_adma_init(struct tegra_adma *tdma) + int ret; + + /* Clear any interrupts */ +- tdma_write(tdma, tdma->cdata->global_int_clear, 0x1); ++ tdma_write(tdma, tdma->cdata->ch_base_offset + tdma->cdata->global_int_clear, 0x1); + + /* Assert soft reset */ + tdma_write(tdma, ADMA_GLOBAL_SOFT_RESET, 0x1); @@ -867,7 +867,7 @@ static int tegra_adma_probe(struct platform_device *pdev) pm_runtime_enable(&pdev->dev); @@ -90436,6 +109790,34 @@ index 71d24fc07c003..f744ddbbbad7f 100644 return ERR_PTR(-ENOMEM); } set_bit(map->xbar_out, xbar->dma_inuse); +diff --git a/drivers/dma/ti/k3-udma-glue.c b/drivers/dma/ti/k3-udma-glue.c +index 4fdd9f06b7235..4f1aeb81e9c7f 100644 +--- a/drivers/dma/ti/k3-udma-glue.c ++++ b/drivers/dma/ti/k3-udma-glue.c +@@ -299,6 +299,7 @@ struct k3_udma_glue_tx_channel *k3_udma_glue_request_tx_chn(struct device *dev, + ret = device_register(&tx_chn->common.chan_dev); + if (ret) { + dev_err(dev, "Channel Device registration failed %d\n", ret); ++ put_device(&tx_chn->common.chan_dev); + tx_chn->common.chan_dev.parent = NULL; + goto err; + } +@@ -917,6 +918,7 @@ k3_udma_glue_request_rx_chn_priv(struct device *dev, const char *name, + ret = device_register(&rx_chn->common.chan_dev); + if (ret) { + dev_err(dev, "Channel Device registration failed %d\n", ret); ++ put_device(&rx_chn->common.chan_dev); + rx_chn->common.chan_dev.parent = NULL; + goto err; + } +@@ -1048,6 +1050,7 @@ k3_udma_glue_request_remote_rx_chn(struct device *dev, const char *name, + ret = device_register(&rx_chn->common.chan_dev); + if (ret) { + dev_err(dev, "Channel Device registration failed %d\n", ret); ++ put_device(&rx_chn->common.chan_dev); + rx_chn->common.chan_dev.parent = NULL; + goto err; + } diff --git a/drivers/dma/ti/k3-udma-private.c b/drivers/dma/ti/k3-udma-private.c index aada84f40723c..3257b2f5157c3 100644 --- a/drivers/dma/ti/k3-udma-private.c @@ -90459,7 +109841,7 @@ index aada84f40723c..3257b2f5157c3 100644 if (!ud) { pr_debug("UDMA has not been probed\n"); diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c -index a35858610780c..75f2a0006c734 100644 +index a35858610780c..d796e50dfe992 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -300,8 +300,6 @@ struct udma_chan { @@ -90471,7 +109853,7 @@ index a35858610780c..75f2a0006c734 100644 /* Channel configuration parameters */ struct udma_chan_config config; -@@ -757,6 +755,20 @@ static void udma_reset_rings(struct udma_chan *uc) +@@ -757,6 +755,21 @@ static void udma_reset_rings(struct udma_chan *uc) } } @@ -90480,11 +109862,12 @@ index a35858610780c..75f2a0006c734 100644 + if (uc->desc->dir == DMA_DEV_TO_MEM) { + udma_rchanrt_write(uc, UDMA_CHAN_RT_BCNT_REG, val); + udma_rchanrt_write(uc, UDMA_CHAN_RT_SBCNT_REG, val); -+ udma_rchanrt_write(uc, UDMA_CHAN_RT_PEER_BCNT_REG, val); ++ if (uc->config.ep_type != PSIL_EP_NATIVE) ++ udma_rchanrt_write(uc, UDMA_CHAN_RT_PEER_BCNT_REG, val); + } else { + udma_tchanrt_write(uc, UDMA_CHAN_RT_BCNT_REG, val); + udma_tchanrt_write(uc, UDMA_CHAN_RT_SBCNT_REG, val); -+ if (!uc->bchan) ++ if (!uc->bchan && uc->config.ep_type != PSIL_EP_NATIVE) + udma_tchanrt_write(uc, UDMA_CHAN_RT_PEER_BCNT_REG, val); + } +} @@ -90492,7 +109875,7 @@ index a35858610780c..75f2a0006c734 100644 static void udma_reset_counters(struct udma_chan *uc) { u32 val; -@@ -790,8 +802,6 @@ static void udma_reset_counters(struct udma_chan *uc) +@@ -790,8 +803,6 @@ static void udma_reset_counters(struct udma_chan *uc) val = udma_rchanrt_read(uc, UDMA_CHAN_RT_PEER_BCNT_REG); udma_rchanrt_write(uc, UDMA_CHAN_RT_PEER_BCNT_REG, val); } @@ -90501,7 +109884,7 @@ index a35858610780c..75f2a0006c734 100644 } static int udma_reset_chan(struct udma_chan *uc, bool hard) -@@ -1115,7 +1125,7 @@ static void udma_check_tx_completion(struct work_struct *work) +@@ -1115,7 +1126,7 @@ static void udma_check_tx_completion(struct work_struct *work) if (uc->desc) { struct udma_desc *d = uc->desc; @@ -90510,7 +109893,7 @@ index a35858610780c..75f2a0006c734 100644 udma_start(uc); vchan_cookie_complete(&d->vd); break; -@@ -1168,7 +1178,7 @@ static irqreturn_t udma_ring_irq_handler(int irq, void *data) +@@ -1168,7 +1179,7 @@ static irqreturn_t udma_ring_irq_handler(int irq, void *data) vchan_cyclic_callback(&d->vd); } else { if (udma_is_desc_really_done(uc, d)) { @@ -90519,7 +109902,7 @@ index a35858610780c..75f2a0006c734 100644 udma_start(uc); vchan_cookie_complete(&d->vd); } else { -@@ -1204,7 +1214,7 @@ static irqreturn_t udma_udma_irq_handler(int irq, void *data) +@@ -1204,7 +1215,7 @@ static irqreturn_t udma_udma_irq_handler(int irq, void *data) vchan_cyclic_callback(&d->vd); } else { /* TODO: figure out the real amount of data */ @@ -90528,7 +109911,7 @@ index a35858610780c..75f2a0006c734 100644 udma_start(uc); vchan_cookie_complete(&d->vd); } -@@ -1348,6 +1358,7 @@ static int bcdma_get_bchan(struct udma_chan *uc) +@@ -1348,6 +1359,7 @@ static int bcdma_get_bchan(struct udma_chan *uc) { struct udma_dev *ud = uc->ud; enum udma_tp_level tpl; @@ -90536,7 +109919,7 @@ index a35858610780c..75f2a0006c734 100644 if (uc->bchan) { dev_dbg(ud->dev, "chan%d: already have bchan%d allocated\n", -@@ -1365,8 +1376,11 @@ static int bcdma_get_bchan(struct udma_chan *uc) +@@ -1365,8 +1377,11 @@ static int bcdma_get_bchan(struct udma_chan *uc) tpl = ud->bchan_tpl.levels - 1; uc->bchan = __udma_reserve_bchan(ud, tpl, -1); @@ -90550,7 +109933,7 @@ index a35858610780c..75f2a0006c734 100644 uc->tchan = uc->bchan; -@@ -1376,6 +1390,7 @@ static int bcdma_get_bchan(struct udma_chan *uc) +@@ -1376,6 +1391,7 @@ static int bcdma_get_bchan(struct udma_chan *uc) static int udma_get_tchan(struct udma_chan *uc) { struct udma_dev *ud = uc->ud; @@ -90558,7 +109941,7 @@ index a35858610780c..75f2a0006c734 100644 if (uc->tchan) { dev_dbg(ud->dev, "chan%d: already have tchan%d allocated\n", -@@ -1390,8 +1405,11 @@ static int udma_get_tchan(struct udma_chan *uc) +@@ -1390,8 +1406,11 @@ static int udma_get_tchan(struct udma_chan *uc) */ uc->tchan = __udma_reserve_tchan(ud, uc->config.channel_tpl, uc->config.mapped_channel_id); @@ -90572,7 +109955,7 @@ index a35858610780c..75f2a0006c734 100644 if (ud->tflow_cnt) { int tflow_id; -@@ -1421,6 +1439,7 @@ static int udma_get_tchan(struct udma_chan *uc) +@@ -1421,6 +1440,7 @@ static int udma_get_tchan(struct udma_chan *uc) static int udma_get_rchan(struct udma_chan *uc) { struct udma_dev *ud = uc->ud; @@ -90580,7 +109963,7 @@ index a35858610780c..75f2a0006c734 100644 if (uc->rchan) { dev_dbg(ud->dev, "chan%d: already have rchan%d allocated\n", -@@ -1435,8 +1454,13 @@ static int udma_get_rchan(struct udma_chan *uc) +@@ -1435,8 +1455,13 @@ static int udma_get_rchan(struct udma_chan *uc) */ uc->rchan = __udma_reserve_rchan(ud, uc->config.channel_tpl, uc->config.mapped_channel_id); @@ -90595,7 +109978,7 @@ index a35858610780c..75f2a0006c734 100644 } static int udma_get_chan_pair(struct udma_chan *uc) -@@ -1490,6 +1514,7 @@ static int udma_get_chan_pair(struct udma_chan *uc) +@@ -1490,6 +1515,7 @@ static int udma_get_chan_pair(struct udma_chan *uc) static int udma_get_rflow(struct udma_chan *uc, int flow_id) { struct udma_dev *ud = uc->ud; @@ -90603,7 +109986,7 @@ index a35858610780c..75f2a0006c734 100644 if (!uc->rchan) { dev_err(ud->dev, "chan%d: does not have rchan??\n", uc->id); -@@ -1503,8 +1528,13 @@ static int udma_get_rflow(struct udma_chan *uc, int flow_id) +@@ -1503,8 +1529,13 @@ static int udma_get_rflow(struct udma_chan *uc, int flow_id) } uc->rflow = __udma_get_rflow(ud, flow_id); @@ -90618,7 +110001,7 @@ index a35858610780c..75f2a0006c734 100644 } static void bcdma_put_bchan(struct udma_chan *uc) -@@ -3791,7 +3821,6 @@ static enum dma_status udma_tx_status(struct dma_chan *chan, +@@ -3791,7 +3822,6 @@ static enum dma_status udma_tx_status(struct dma_chan *chan, bcnt = udma_tchanrt_read(uc, UDMA_CHAN_RT_BCNT_REG); } @@ -90643,7 +110026,7 @@ index d6b8a202474f4..290836b7e1be2 100644 enum dma_slave_buswidth buswidth; diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c -index a4450bc954665..4273150b68dc4 100644 +index a4450bc954665..edc2bb8f0523c 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -3037,9 +3037,10 @@ static int xilinx_dma_probe(struct platform_device *pdev) @@ -90682,16 +110065,20 @@ index a4450bc954665..4273150b68dc4 100644 /* Initialize the DMA engine */ xdev->common.dev = &pdev->dev; -@@ -3134,7 +3139,7 @@ static int xilinx_dma_probe(struct platform_device *pdev) +@@ -3133,8 +3138,10 @@ static int xilinx_dma_probe(struct platform_device *pdev) + /* Initialize the channels */ for_each_child_of_node(node, child) { err = xilinx_dma_child_probe(xdev, child); - if (err < 0) +- if (err < 0) - goto disable_clks; ++ if (err < 0) { ++ of_node_put(child); + goto error; ++ } } if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) { -@@ -3169,12 +3174,12 @@ static int xilinx_dma_probe(struct platform_device *pdev) +@@ -3169,12 +3176,12 @@ static int xilinx_dma_probe(struct platform_device *pdev) return 0; @@ -90851,6 +110238,77 @@ index b8a7d9594afd4..1fa5ca57e9ec1 100644 irqs[idx] = irq; masks[idx] = dmc520_irq_configs[idx].mask; if (irq >= 0) { +diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c +index 8c4d947fb8486..85c229985f905 100644 +--- a/drivers/edac/edac_device.c ++++ b/drivers/edac/edac_device.c +@@ -34,6 +34,9 @@ + static DEFINE_MUTEX(device_ctls_mutex); + static LIST_HEAD(edac_device_list); + ++/* Default workqueue processing interval on this instance, in msecs */ ++#define DEFAULT_POLL_INTERVAL 1000 ++ + #ifdef CONFIG_EDAC_DEBUG + static void edac_device_dump_device(struct edac_device_ctl_info *edac_dev) + { +@@ -366,7 +369,7 @@ static void edac_device_workq_function(struct work_struct *work_req) + * whole one second to save timers firing all over the period + * between integral seconds + */ +- if (edac_dev->poll_msec == 1000) ++ if (edac_dev->poll_msec == DEFAULT_POLL_INTERVAL) + edac_queue_work(&edac_dev->work, round_jiffies_relative(edac_dev->delay)); + else + edac_queue_work(&edac_dev->work, edac_dev->delay); +@@ -396,7 +399,7 @@ static void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev, + * timers firing on sub-second basis, while they are happy + * to fire together on the 1 second exactly + */ +- if (edac_dev->poll_msec == 1000) ++ if (edac_dev->poll_msec == DEFAULT_POLL_INTERVAL) + edac_queue_work(&edac_dev->work, round_jiffies_relative(edac_dev->delay)); + else + edac_queue_work(&edac_dev->work, edac_dev->delay); +@@ -424,17 +427,16 @@ static void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev) + * Then restart the workq on the new delay + */ + void edac_device_reset_delay_period(struct edac_device_ctl_info *edac_dev, +- unsigned long value) ++ unsigned long msec) + { +- unsigned long jiffs = msecs_to_jiffies(value); +- +- if (value == 1000) +- jiffs = round_jiffies_relative(value); +- +- edac_dev->poll_msec = value; +- edac_dev->delay = jiffs; ++ edac_dev->poll_msec = msec; ++ edac_dev->delay = msecs_to_jiffies(msec); + +- edac_mod_work(&edac_dev->work, jiffs); ++ /* See comment in edac_device_workq_setup() above */ ++ if (edac_dev->poll_msec == DEFAULT_POLL_INTERVAL) ++ edac_mod_work(&edac_dev->work, round_jiffies_relative(edac_dev->delay)); ++ else ++ edac_mod_work(&edac_dev->work, edac_dev->delay); + } + + int edac_device_alloc_index(void) +@@ -473,11 +475,7 @@ int edac_device_add_device(struct edac_device_ctl_info *edac_dev) + /* This instance is NOW RUNNING */ + edac_dev->op_state = OP_RUNNING_POLL; + +- /* +- * enable workq processing on this instance, +- * default = 1000 msec +- */ +- edac_device_workq_setup(edac_dev, 1000); ++ edac_device_workq_setup(edac_dev, edac_dev->poll_msec ?: DEFAULT_POLL_INTERVAL); + } else { + edac_dev->op_state = OP_RUNNING_INTERRUPT; + } diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 2c5975674723a..a859ddd9d4a13 100644 --- a/drivers/edac/edac_mc.c @@ -90864,6 +110322,19 @@ index 2c5975674723a..a859ddd9d4a13 100644 if (r == 0) return (char *)ptr; +diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h +index aa1f91688eb8e..841d238bc3f18 100644 +--- a/drivers/edac/edac_module.h ++++ b/drivers/edac/edac_module.h +@@ -56,7 +56,7 @@ bool edac_stop_work(struct delayed_work *work); + bool edac_mod_work(struct delayed_work *work, unsigned long delay); + + extern void edac_device_reset_delay_period(struct edac_device_ctl_info +- *edac_dev, unsigned long value); ++ *edac_dev, unsigned long msec); + extern void edac_mc_reset_delay_period(unsigned long value); + + extern void *edac_align_ptr(void **p, unsigned size, int n_elems); diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c index 6d1ddecbf0da3..d0a9ccf640c4b 100644 --- a/drivers/edac/ghes_edac.c @@ -90886,11 +110357,49 @@ index 6d1ddecbf0da3..d0a9ccf640c4b 100644 } static void assign_dmi_dimm_info(struct dimm_info *dimm, struct memdev_dmi_entry *entry) +diff --git a/drivers/edac/highbank_mc_edac.c b/drivers/edac/highbank_mc_edac.c +index 61b76ec226af1..19fba258ae108 100644 +--- a/drivers/edac/highbank_mc_edac.c ++++ b/drivers/edac/highbank_mc_edac.c +@@ -174,8 +174,10 @@ static int highbank_mc_probe(struct platform_device *pdev) + drvdata = mci->pvt_info; + platform_set_drvdata(pdev, mci); + +- if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) +- return -ENOMEM; ++ if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) { ++ res = -ENOMEM; ++ goto free; ++ } + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!r) { +@@ -243,6 +245,7 @@ err2: + edac_mc_del_mc(&pdev->dev); + err: + devres_release_group(&pdev->dev, NULL); ++free: + edac_mc_free(mci); + return res; + } diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c -index 83345bfac246f..6cf50ee0b77c5 100644 +index 83345bfac246f..e0af60833d28c 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c -@@ -358,6 +358,9 @@ static int i10nm_get_hbm_munits(void) +@@ -198,11 +198,10 @@ static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus, + if (unlikely(pci_enable_device(pdev) < 0)) { + edac_dbg(2, "Failed to enable device %02x:%02x.%x\n", + bus, dev, fun); ++ pci_dev_put(pdev); + return NULL; + } + +- pci_dev_get(pdev); +- + return pdev; + } + +@@ -358,6 +357,9 @@ static int i10nm_get_hbm_munits(void) mbase = ioremap(base + off, I10NM_HBM_IMC_MMIO_SIZE); if (!mbase) { @@ -90900,7 +110409,7 @@ index 83345bfac246f..6cf50ee0b77c5 100644 i10nm_printk(KERN_ERR, "Failed to ioremap for hbm mc 0x%llx\n", base + off); return -ENOMEM; -@@ -368,6 +371,12 @@ static int i10nm_get_hbm_munits(void) +@@ -368,6 +370,12 @@ static int i10nm_get_hbm_munits(void) mcmtr = I10NM_GET_MCMTR(&d->imc[lmc], 0); if (!I10NM_IS_HBM_IMC(mcmtr)) { @@ -90913,6 +110422,36 @@ index 83345bfac246f..6cf50ee0b77c5 100644 i10nm_printk(KERN_ERR, "This isn't an hbm mc!\n"); return -ENODEV; } +diff --git a/drivers/edac/qcom_edac.c b/drivers/edac/qcom_edac.c +index 97a27e42dd610..c45519f59dc11 100644 +--- a/drivers/edac/qcom_edac.c ++++ b/drivers/edac/qcom_edac.c +@@ -252,7 +252,7 @@ clear: + static int + dump_syn_reg(struct edac_device_ctl_info *edev_ctl, int err_type, u32 bank) + { +- struct llcc_drv_data *drv = edev_ctl->pvt_info; ++ struct llcc_drv_data *drv = edev_ctl->dev->platform_data; + int ret; + + ret = dump_syn_reg_values(drv, bank, err_type); +@@ -289,7 +289,7 @@ static irqreturn_t + llcc_ecc_irq_handler(int irq, void *edev_ctl) + { + struct edac_device_ctl_info *edac_dev_ctl = edev_ctl; +- struct llcc_drv_data *drv = edac_dev_ctl->pvt_info; ++ struct llcc_drv_data *drv = edac_dev_ctl->dev->platform_data; + irqreturn_t irq_rc = IRQ_NONE; + u32 drp_error, trp_error, i; + int ret; +@@ -358,7 +358,6 @@ static int qcom_llcc_edac_probe(struct platform_device *pdev) + edev_ctl->dev_name = dev_name(dev); + edev_ctl->ctl_name = "llcc"; + edev_ctl->panic_on_ue = LLCC_ERP_PANIC_ON_UE; +- edev_ctl->pvt_info = llcc_driv_data; + + rc = edac_device_add_device(edev_ctl); + if (rc) diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 4c626fcd4dcbb..1522d4aa2ca62 100644 --- a/drivers/edac/sb_edac.c @@ -90987,6 +110526,19 @@ index 2ccd1db5e98ff..7197f9fa02457 100644 goto out_err; } rc = devm_request_irq(&pdev->dev, irq, +diff --git a/drivers/extcon/Kconfig b/drivers/extcon/Kconfig +index c69d40ae5619a..7684b3afa6304 100644 +--- a/drivers/extcon/Kconfig ++++ b/drivers/extcon/Kconfig +@@ -180,7 +180,7 @@ config EXTCON_USBC_CROS_EC + + config EXTCON_USBC_TUSB320 + tristate "TI TUSB320 USB-C extcon support" +- depends on I2C ++ depends on I2C && TYPEC + select REGMAP_I2C + help + Say Y here to enable support for USB Type C cable detection extcon diff --git a/drivers/extcon/extcon-axp288.c b/drivers/extcon/extcon-axp288.c index fdb31954cf2b6..8073bc7d3e615 100644 --- a/drivers/extcon/extcon-axp288.c @@ -91031,6 +110583,498 @@ index 5b9a3cf8df268..2a7874108df87 100644 /* * Update current extcon state if for example OTG connection was there * before the probe +diff --git a/drivers/extcon/extcon-usbc-tusb320.c b/drivers/extcon/extcon-usbc-tusb320.c +index 805af73b41521..9dfa545427ca1 100644 +--- a/drivers/extcon/extcon-usbc-tusb320.c ++++ b/drivers/extcon/extcon-usbc-tusb320.c +@@ -1,11 +1,12 @@ + // SPDX-License-Identifier: GPL-2.0 +-/** ++/* + * drivers/extcon/extcon-tusb320.c - TUSB320 extcon driver + * + * Copyright (C) 2020 National Instruments Corporation + * Author: Michael Auchter <michael.auchter@ni.com> + */ + ++#include <linux/bitfield.h> + #include <linux/extcon-provider.h> + #include <linux/i2c.h> + #include <linux/init.h> +@@ -13,21 +14,70 @@ + #include <linux/kernel.h> + #include <linux/module.h> + #include <linux/regmap.h> ++#include <linux/usb/typec.h> ++ ++#define TUSB320_REG8 0x8 ++#define TUSB320_REG8_CURRENT_MODE_ADVERTISE GENMASK(7, 6) ++#define TUSB320_REG8_CURRENT_MODE_ADVERTISE_USB 0x0 ++#define TUSB320_REG8_CURRENT_MODE_ADVERTISE_15A 0x1 ++#define TUSB320_REG8_CURRENT_MODE_ADVERTISE_30A 0x2 ++#define TUSB320_REG8_CURRENT_MODE_DETECT GENMASK(5, 4) ++#define TUSB320_REG8_CURRENT_MODE_DETECT_DEF 0x0 ++#define TUSB320_REG8_CURRENT_MODE_DETECT_MED 0x1 ++#define TUSB320_REG8_CURRENT_MODE_DETECT_ACC 0x2 ++#define TUSB320_REG8_CURRENT_MODE_DETECT_HI 0x3 ++#define TUSB320_REG8_ACCESSORY_CONNECTED GENMASK(3, 2) ++#define TUSB320_REG8_ACCESSORY_CONNECTED_NONE 0x0 ++#define TUSB320_REG8_ACCESSORY_CONNECTED_AUDIO 0x4 ++#define TUSB320_REG8_ACCESSORY_CONNECTED_ACC 0x5 ++#define TUSB320_REG8_ACCESSORY_CONNECTED_DEBUG 0x6 ++#define TUSB320_REG8_ACTIVE_CABLE_DETECTION BIT(0) + + #define TUSB320_REG9 0x9 + #define TUSB320_REG9_ATTACHED_STATE_SHIFT 6 + #define TUSB320_REG9_ATTACHED_STATE_MASK 0x3 + #define TUSB320_REG9_CABLE_DIRECTION BIT(5) + #define TUSB320_REG9_INTERRUPT_STATUS BIT(4) +-#define TUSB320_ATTACHED_STATE_NONE 0x0 +-#define TUSB320_ATTACHED_STATE_DFP 0x1 +-#define TUSB320_ATTACHED_STATE_UFP 0x2 +-#define TUSB320_ATTACHED_STATE_ACC 0x3 ++ ++#define TUSB320_REGA 0xa ++#define TUSB320L_REGA_DISABLE_TERM BIT(0) ++#define TUSB320_REGA_I2C_SOFT_RESET BIT(3) ++#define TUSB320_REGA_MODE_SELECT_SHIFT 4 ++#define TUSB320_REGA_MODE_SELECT_MASK 0x3 ++ ++#define TUSB320L_REGA0_REVISION 0xa0 ++ ++enum tusb320_attached_state { ++ TUSB320_ATTACHED_STATE_NONE, ++ TUSB320_ATTACHED_STATE_DFP, ++ TUSB320_ATTACHED_STATE_UFP, ++ TUSB320_ATTACHED_STATE_ACC, ++}; ++ ++enum tusb320_mode { ++ TUSB320_MODE_PORT, ++ TUSB320_MODE_UFP, ++ TUSB320_MODE_DFP, ++ TUSB320_MODE_DRP, ++}; ++ ++struct tusb320_priv; ++ ++struct tusb320_ops { ++ int (*set_mode)(struct tusb320_priv *priv, enum tusb320_mode mode); ++ int (*get_revision)(struct tusb320_priv *priv, unsigned int *revision); ++}; + + struct tusb320_priv { + struct device *dev; + struct regmap *regmap; + struct extcon_dev *edev; ++ struct tusb320_ops *ops; ++ enum tusb320_attached_state state; ++ struct typec_port *port; ++ struct typec_capability cap; ++ enum typec_port_type port_type; ++ enum typec_pwr_opmode pwr_opmode; + }; + + static const char * const tusb_attached_states[] = { +@@ -62,19 +112,142 @@ static int tusb320_check_signature(struct tusb320_priv *priv) + return 0; + } + +-static irqreturn_t tusb320_irq_handler(int irq, void *dev_id) ++static int tusb320_set_mode(struct tusb320_priv *priv, enum tusb320_mode mode) + { +- struct tusb320_priv *priv = dev_id; +- int state, polarity; +- unsigned reg; ++ int ret; + +- if (regmap_read(priv->regmap, TUSB320_REG9, ®)) { +- dev_err(priv->dev, "error during i2c read!\n"); +- return IRQ_NONE; ++ /* Mode cannot be changed while cable is attached */ ++ if (priv->state != TUSB320_ATTACHED_STATE_NONE) ++ return -EBUSY; ++ ++ /* Write mode */ ++ ret = regmap_write_bits(priv->regmap, TUSB320_REGA, ++ TUSB320_REGA_MODE_SELECT_MASK << TUSB320_REGA_MODE_SELECT_SHIFT, ++ mode << TUSB320_REGA_MODE_SELECT_SHIFT); ++ if (ret) { ++ dev_err(priv->dev, "failed to write mode: %d\n", ret); ++ return ret; + } + +- if (!(reg & TUSB320_REG9_INTERRUPT_STATUS)) +- return IRQ_NONE; ++ return 0; ++} ++ ++static int tusb320l_set_mode(struct tusb320_priv *priv, enum tusb320_mode mode) ++{ ++ int ret; ++ ++ /* Disable CC state machine */ ++ ret = regmap_write_bits(priv->regmap, TUSB320_REGA, ++ TUSB320L_REGA_DISABLE_TERM, 1); ++ if (ret) { ++ dev_err(priv->dev, ++ "failed to disable CC state machine: %d\n", ret); ++ return ret; ++ } ++ ++ /* Write mode */ ++ ret = regmap_write_bits(priv->regmap, TUSB320_REGA, ++ TUSB320_REGA_MODE_SELECT_MASK << TUSB320_REGA_MODE_SELECT_SHIFT, ++ mode << TUSB320_REGA_MODE_SELECT_SHIFT); ++ if (ret) { ++ dev_err(priv->dev, "failed to write mode: %d\n", ret); ++ goto err; ++ } ++ ++ msleep(5); ++err: ++ /* Re-enable CC state machine */ ++ ret = regmap_write_bits(priv->regmap, TUSB320_REGA, ++ TUSB320L_REGA_DISABLE_TERM, 0); ++ if (ret) ++ dev_err(priv->dev, ++ "failed to re-enable CC state machine: %d\n", ret); ++ ++ return ret; ++} ++ ++static int tusb320_reset(struct tusb320_priv *priv) ++{ ++ int ret; ++ ++ /* Set mode to default (follow PORT pin) */ ++ ret = priv->ops->set_mode(priv, TUSB320_MODE_PORT); ++ if (ret && ret != -EBUSY) { ++ dev_err(priv->dev, ++ "failed to set mode to PORT: %d\n", ret); ++ return ret; ++ } ++ ++ /* Perform soft reset */ ++ ret = regmap_write_bits(priv->regmap, TUSB320_REGA, ++ TUSB320_REGA_I2C_SOFT_RESET, 1); ++ if (ret) { ++ dev_err(priv->dev, ++ "failed to write soft reset bit: %d\n", ret); ++ return ret; ++ } ++ ++ /* Wait for chip to go through reset */ ++ msleep(95); ++ ++ return 0; ++} ++ ++static int tusb320l_get_revision(struct tusb320_priv *priv, unsigned int *revision) ++{ ++ return regmap_read(priv->regmap, TUSB320L_REGA0_REVISION, revision); ++} ++ ++static struct tusb320_ops tusb320_ops = { ++ .set_mode = tusb320_set_mode, ++}; ++ ++static struct tusb320_ops tusb320l_ops = { ++ .set_mode = tusb320l_set_mode, ++ .get_revision = tusb320l_get_revision, ++}; ++ ++static int tusb320_set_adv_pwr_mode(struct tusb320_priv *priv) ++{ ++ u8 mode; ++ ++ if (priv->pwr_opmode == TYPEC_PWR_MODE_USB) ++ mode = TUSB320_REG8_CURRENT_MODE_ADVERTISE_USB; ++ else if (priv->pwr_opmode == TYPEC_PWR_MODE_1_5A) ++ mode = TUSB320_REG8_CURRENT_MODE_ADVERTISE_15A; ++ else if (priv->pwr_opmode == TYPEC_PWR_MODE_3_0A) ++ mode = TUSB320_REG8_CURRENT_MODE_ADVERTISE_30A; ++ else /* No other mode is supported. */ ++ return -EINVAL; ++ ++ return regmap_write_bits(priv->regmap, TUSB320_REG8, ++ TUSB320_REG8_CURRENT_MODE_ADVERTISE, ++ FIELD_PREP(TUSB320_REG8_CURRENT_MODE_ADVERTISE, ++ mode)); ++} ++ ++static int tusb320_port_type_set(struct typec_port *port, ++ enum typec_port_type type) ++{ ++ struct tusb320_priv *priv = typec_get_drvdata(port); ++ ++ if (type == TYPEC_PORT_SRC) ++ return priv->ops->set_mode(priv, TUSB320_MODE_DFP); ++ else if (type == TYPEC_PORT_SNK) ++ return priv->ops->set_mode(priv, TUSB320_MODE_UFP); ++ else if (type == TYPEC_PORT_DRP) ++ return priv->ops->set_mode(priv, TUSB320_MODE_DRP); ++ else ++ return priv->ops->set_mode(priv, TUSB320_MODE_PORT); ++} ++ ++static const struct typec_operations tusb320_typec_ops = { ++ .port_type_set = tusb320_port_type_set, ++}; ++ ++static void tusb320_extcon_irq_handler(struct tusb320_priv *priv, u8 reg) ++{ ++ int state, polarity; + + state = (reg >> TUSB320_REG9_ATTACHED_STATE_SHIFT) & + TUSB320_REG9_ATTACHED_STATE_MASK; +@@ -96,20 +269,171 @@ static irqreturn_t tusb320_irq_handler(int irq, void *dev_id) + extcon_sync(priv->edev, EXTCON_USB); + extcon_sync(priv->edev, EXTCON_USB_HOST); + ++ priv->state = state; ++} ++ ++static void tusb320_typec_irq_handler(struct tusb320_priv *priv, u8 reg9) ++{ ++ struct typec_port *port = priv->port; ++ struct device *dev = priv->dev; ++ u8 mode, role, state; ++ int ret, reg8; ++ bool ori; ++ ++ ori = reg9 & TUSB320_REG9_CABLE_DIRECTION; ++ typec_set_orientation(port, ori ? TYPEC_ORIENTATION_REVERSE : ++ TYPEC_ORIENTATION_NORMAL); ++ ++ state = (reg9 >> TUSB320_REG9_ATTACHED_STATE_SHIFT) & ++ TUSB320_REG9_ATTACHED_STATE_MASK; ++ if (state == TUSB320_ATTACHED_STATE_DFP) ++ role = TYPEC_SOURCE; ++ else ++ role = TYPEC_SINK; ++ ++ typec_set_vconn_role(port, role); ++ typec_set_pwr_role(port, role); ++ typec_set_data_role(port, role == TYPEC_SOURCE ? ++ TYPEC_HOST : TYPEC_DEVICE); ++ ++ ret = regmap_read(priv->regmap, TUSB320_REG8, ®8); ++ if (ret) { ++ dev_err(dev, "error during reg8 i2c read, ret=%d!\n", ret); ++ return; ++ } ++ ++ mode = FIELD_GET(TUSB320_REG8_CURRENT_MODE_DETECT, reg8); ++ if (mode == TUSB320_REG8_CURRENT_MODE_DETECT_DEF) ++ typec_set_pwr_opmode(port, TYPEC_PWR_MODE_USB); ++ else if (mode == TUSB320_REG8_CURRENT_MODE_DETECT_MED) ++ typec_set_pwr_opmode(port, TYPEC_PWR_MODE_1_5A); ++ else if (mode == TUSB320_REG8_CURRENT_MODE_DETECT_HI) ++ typec_set_pwr_opmode(port, TYPEC_PWR_MODE_3_0A); ++ else /* Charge through accessory */ ++ typec_set_pwr_opmode(port, TYPEC_PWR_MODE_USB); ++} ++ ++static irqreturn_t tusb320_state_update_handler(struct tusb320_priv *priv, ++ bool force_update) ++{ ++ unsigned int reg; ++ ++ if (regmap_read(priv->regmap, TUSB320_REG9, ®)) { ++ dev_err(priv->dev, "error during i2c read!\n"); ++ return IRQ_NONE; ++ } ++ ++ if (!force_update && !(reg & TUSB320_REG9_INTERRUPT_STATUS)) ++ return IRQ_NONE; ++ ++ tusb320_extcon_irq_handler(priv, reg); ++ ++ /* ++ * Type-C support is optional. Only call the Type-C handler if a ++ * port had been registered previously. ++ */ ++ if (priv->port) ++ tusb320_typec_irq_handler(priv, reg); ++ + regmap_write(priv->regmap, TUSB320_REG9, reg); + + return IRQ_HANDLED; + } + ++static irqreturn_t tusb320_irq_handler(int irq, void *dev_id) ++{ ++ struct tusb320_priv *priv = dev_id; ++ ++ return tusb320_state_update_handler(priv, false); ++} ++ + static const struct regmap_config tusb320_regmap_config = { + .reg_bits = 8, + .val_bits = 8, + }; + +-static int tusb320_extcon_probe(struct i2c_client *client, +- const struct i2c_device_id *id) ++static int tusb320_extcon_probe(struct tusb320_priv *priv) ++{ ++ int ret; ++ ++ priv->edev = devm_extcon_dev_allocate(priv->dev, tusb320_extcon_cable); ++ if (IS_ERR(priv->edev)) { ++ dev_err(priv->dev, "failed to allocate extcon device\n"); ++ return PTR_ERR(priv->edev); ++ } ++ ++ ret = devm_extcon_dev_register(priv->dev, priv->edev); ++ if (ret < 0) { ++ dev_err(priv->dev, "failed to register extcon device\n"); ++ return ret; ++ } ++ ++ extcon_set_property_capability(priv->edev, EXTCON_USB, ++ EXTCON_PROP_USB_TYPEC_POLARITY); ++ extcon_set_property_capability(priv->edev, EXTCON_USB_HOST, ++ EXTCON_PROP_USB_TYPEC_POLARITY); ++ ++ return 0; ++} ++ ++static int tusb320_typec_probe(struct i2c_client *client, ++ struct tusb320_priv *priv) ++{ ++ struct fwnode_handle *connector; ++ const char *cap_str; ++ int ret; ++ ++ /* The Type-C connector is optional, for backward compatibility. */ ++ connector = device_get_named_child_node(&client->dev, "connector"); ++ if (!connector) ++ return 0; ++ ++ /* Type-C connector found. */ ++ ret = typec_get_fw_cap(&priv->cap, connector); ++ if (ret) ++ return ret; ++ ++ priv->port_type = priv->cap.type; ++ ++ /* This goes into register 0x8 field CURRENT_MODE_ADVERTISE */ ++ ret = fwnode_property_read_string(connector, "typec-power-opmode", &cap_str); ++ if (ret) ++ return ret; ++ ++ ret = typec_find_pwr_opmode(cap_str); ++ if (ret < 0) ++ return ret; ++ if (ret == TYPEC_PWR_MODE_PD) ++ return -EINVAL; ++ ++ priv->pwr_opmode = ret; ++ ++ /* Initialize the hardware with the devicetree settings. */ ++ ret = tusb320_set_adv_pwr_mode(priv); ++ if (ret) ++ return ret; ++ ++ priv->cap.revision = USB_TYPEC_REV_1_1; ++ priv->cap.accessory[0] = TYPEC_ACCESSORY_AUDIO; ++ priv->cap.accessory[1] = TYPEC_ACCESSORY_DEBUG; ++ priv->cap.orientation_aware = true; ++ priv->cap.driver_data = priv; ++ priv->cap.ops = &tusb320_typec_ops; ++ priv->cap.fwnode = connector; ++ ++ priv->port = typec_register_port(&client->dev, &priv->cap); ++ if (IS_ERR(priv->port)) ++ return PTR_ERR(priv->port); ++ ++ return 0; ++} ++ ++static int tusb320_probe(struct i2c_client *client, ++ const struct i2c_device_id *id) + { + struct tusb320_priv *priv; ++ const void *match_data; ++ unsigned int revision; + int ret; + + priv = devm_kzalloc(&client->dev, sizeof(*priv), GFP_KERNEL); +@@ -125,25 +449,42 @@ static int tusb320_extcon_probe(struct i2c_client *client, + if (ret) + return ret; + +- priv->edev = devm_extcon_dev_allocate(priv->dev, tusb320_extcon_cable); +- if (IS_ERR(priv->edev)) { +- dev_err(priv->dev, "failed to allocate extcon device\n"); +- return PTR_ERR(priv->edev); ++ match_data = device_get_match_data(&client->dev); ++ if (!match_data) ++ return -EINVAL; ++ ++ priv->ops = (struct tusb320_ops*)match_data; ++ ++ if (priv->ops->get_revision) { ++ ret = priv->ops->get_revision(priv, &revision); ++ if (ret) ++ dev_warn(priv->dev, ++ "failed to read revision register: %d\n", ret); ++ else ++ dev_info(priv->dev, "chip revision %d\n", revision); + } + +- ret = devm_extcon_dev_register(priv->dev, priv->edev); +- if (ret < 0) { +- dev_err(priv->dev, "failed to register extcon device\n"); ++ ret = tusb320_extcon_probe(priv); ++ if (ret) + return ret; +- } + +- extcon_set_property_capability(priv->edev, EXTCON_USB, +- EXTCON_PROP_USB_TYPEC_POLARITY); +- extcon_set_property_capability(priv->edev, EXTCON_USB_HOST, +- EXTCON_PROP_USB_TYPEC_POLARITY); ++ ret = tusb320_typec_probe(client, priv); ++ if (ret) ++ return ret; + + /* update initial state */ +- tusb320_irq_handler(client->irq, priv); ++ tusb320_state_update_handler(priv, true); ++ ++ /* Reset chip to its default state */ ++ ret = tusb320_reset(priv); ++ if (ret) ++ dev_warn(priv->dev, "failed to reset chip: %d\n", ret); ++ else ++ /* ++ * State and polarity might change after a reset, so update ++ * them again and make sure the interrupt status bit is cleared. ++ */ ++ tusb320_state_update_handler(priv, true); + + ret = devm_request_threaded_irq(priv->dev, client->irq, NULL, + tusb320_irq_handler, +@@ -154,13 +495,14 @@ static int tusb320_extcon_probe(struct i2c_client *client, + } + + static const struct of_device_id tusb320_extcon_dt_match[] = { +- { .compatible = "ti,tusb320", }, ++ { .compatible = "ti,tusb320", .data = &tusb320_ops, }, ++ { .compatible = "ti,tusb320l", .data = &tusb320l_ops, }, + { } + }; + MODULE_DEVICE_TABLE(of, tusb320_extcon_dt_match); + + static struct i2c_driver tusb320_extcon_driver = { +- .probe = tusb320_extcon_probe, ++ .probe = tusb320_probe, + .driver = { + .name = "extcon-tusb320", + .of_match_table = tusb320_extcon_dt_match, diff --git a/drivers/extcon/extcon.c b/drivers/extcon/extcon.c index e7a9561a826d3..f305503ec27ed 100644 --- a/drivers/extcon/extcon.c @@ -91134,10 +111178,22 @@ index 54be88167c60b..f3b3953cac834 100644 /* Wait for all users, especially device workqueue jobs, to finish. */ fw_card_put(card); diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c -index fb6c651214f32..b0cc3f1e9bb00 100644 +index fb6c651214f32..16ea847ade5fd 100644 --- a/drivers/firewire/core-cdev.c +++ b/drivers/firewire/core-cdev.c -@@ -1480,6 +1480,7 @@ static void outbound_phy_packet_callback(struct fw_packet *packet, +@@ -818,8 +818,10 @@ static int ioctl_send_response(struct client *client, union ioctl_arg *arg) + + r = container_of(resource, struct inbound_transaction_resource, + resource); +- if (is_fcp_request(r->request)) ++ if (is_fcp_request(r->request)) { ++ kfree(r->data); + goto out; ++ } + + if (a->length != fw_get_response_length(r->request)) { + ret = -EINVAL; +@@ -1480,6 +1482,7 @@ static void outbound_phy_packet_callback(struct fw_packet *packet, { struct outbound_phy_packet_event *e = container_of(packet, struct outbound_phy_packet_event, p); @@ -91145,7 +111201,7 @@ index fb6c651214f32..b0cc3f1e9bb00 100644 switch (status) { /* expected: */ -@@ -1496,9 +1497,10 @@ static void outbound_phy_packet_callback(struct fw_packet *packet, +@@ -1496,9 +1499,10 @@ static void outbound_phy_packet_callback(struct fw_packet *packet, } e->phy_packet.data[0] = packet->timestamp; @@ -91393,6 +111449,31 @@ index de416f9e79213..3fe172c03c247 100644 ph->xops->xfer_put(ph, t); +diff --git a/drivers/firmware/arm_scmi/bus.c b/drivers/firmware/arm_scmi/bus.c +index f6fe723ab869e..7c1c0951e562d 100644 +--- a/drivers/firmware/arm_scmi/bus.c ++++ b/drivers/firmware/arm_scmi/bus.c +@@ -216,9 +216,20 @@ void scmi_device_destroy(struct scmi_device *scmi_dev) + device_unregister(&scmi_dev->dev); + } + ++void scmi_device_link_add(struct device *consumer, struct device *supplier) ++{ ++ struct device_link *link; ++ ++ link = device_link_add(consumer, supplier, DL_FLAG_AUTOREMOVE_CONSUMER); ++ ++ WARN_ON(!link); ++} ++ + void scmi_set_handle(struct scmi_device *scmi_dev) + { + scmi_dev->handle = scmi_handle_get(&scmi_dev->dev); ++ if (scmi_dev->handle) ++ scmi_device_link_add(&scmi_dev->dev, scmi_dev->handle->dev); + } + + int scmi_protocol_register(const struct scmi_protocol *proto) diff --git a/drivers/firmware/arm_scmi/clock.c b/drivers/firmware/arm_scmi/clock.c index 35b56c8ba0c0e..e76194a60edf9 100644 --- a/drivers/firmware/arm_scmi/clock.c @@ -91422,8 +111503,20 @@ index 35b56c8ba0c0e..e76194a60edf9 100644 if (!clk->name[0]) return NULL; +diff --git a/drivers/firmware/arm_scmi/common.h b/drivers/firmware/arm_scmi/common.h +index dea1bfbe10527..b9f5829c0c4dd 100644 +--- a/drivers/firmware/arm_scmi/common.h ++++ b/drivers/firmware/arm_scmi/common.h +@@ -272,6 +272,7 @@ struct scmi_xfer_ops { + struct scmi_revision_info * + scmi_revision_area_get(const struct scmi_protocol_handle *ph); + int scmi_handle_put(const struct scmi_handle *handle); ++void scmi_device_link_add(struct device *consumer, struct device *supplier); + struct scmi_handle *scmi_handle_get(struct device *dev); + void scmi_set_handle(struct scmi_device *scmi_dev); + void scmi_setup_protocol_implemented(const struct scmi_protocol_handle *ph, diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c -index b406b3f78f467..e815b8f987393 100644 +index b406b3f78f467..11842497b2261 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -652,7 +652,8 @@ static void scmi_handle_response(struct scmi_chan_info *cinfo, @@ -91436,7 +111529,98 @@ index b406b3f78f467..e815b8f987393 100644 return; } -@@ -2112,7 +2113,7 @@ static void __exit scmi_driver_exit(void) +@@ -782,6 +783,8 @@ static int do_xfer(const struct scmi_protocol_handle *ph, + xfer->hdr.protocol_id, xfer->hdr.seq, + xfer->hdr.poll_completion); + ++ /* Clear any stale status */ ++ xfer->hdr.status = SCMI_SUCCESS; + xfer->state = SCMI_XFER_SENT_OK; + /* + * Even though spinlocking is not needed here since no race is possible +@@ -1515,8 +1518,12 @@ scmi_txrx_setup(struct scmi_info *info, struct device *dev, int prot_id) + { + int ret = scmi_chan_setup(info, dev, prot_id, true); + +- if (!ret) /* Rx is optional, hence no error check */ +- scmi_chan_setup(info, dev, prot_id, false); ++ if (!ret) { ++ /* Rx is optional, report only memory errors */ ++ ret = scmi_chan_setup(info, dev, prot_id, false); ++ if (ret && ret != -ENOMEM) ++ ret = 0; ++ } + + return ret; + } +@@ -1726,10 +1733,16 @@ int scmi_protocol_device_request(const struct scmi_device_id *id_table) + sdev = scmi_get_protocol_device(child, info, + id_table->protocol_id, + id_table->name); +- /* Set handle if not already set: device existed */ +- if (sdev && !sdev->handle) +- sdev->handle = +- scmi_handle_get_from_info_unlocked(info); ++ if (sdev) { ++ /* Set handle if not already set: device existed */ ++ if (!sdev->handle) ++ sdev->handle = ++ scmi_handle_get_from_info_unlocked(info); ++ /* Relink consumer and suppliers */ ++ if (sdev->handle) ++ scmi_device_link_add(&sdev->dev, ++ sdev->handle->dev); ++ } + } else { + dev_err(info->dev, + "Failed. SCMI protocol %d not active.\n", +@@ -1915,20 +1928,17 @@ void scmi_free_channel(struct scmi_chan_info *cinfo, struct idr *idr, int id) + + static int scmi_remove(struct platform_device *pdev) + { +- int ret = 0, id; ++ int ret, id; + struct scmi_info *info = platform_get_drvdata(pdev); + struct device_node *child; + + mutex_lock(&scmi_list_mutex); + if (info->users) +- ret = -EBUSY; +- else +- list_del(&info->node); ++ dev_warn(&pdev->dev, ++ "Still active SCMI users will be forcibly unbound.\n"); ++ list_del(&info->node); + mutex_unlock(&scmi_list_mutex); + +- if (ret) +- return ret; +- + scmi_notification_exit(&info->handle); + + mutex_lock(&info->protocols_mtx); +@@ -1940,7 +1950,11 @@ static int scmi_remove(struct platform_device *pdev) + idr_destroy(&info->active_protocols); + + /* Safe to free channels since no more users */ +- return scmi_cleanup_txrx_channels(info); ++ ret = scmi_cleanup_txrx_channels(info); ++ if (ret) ++ dev_warn(&pdev->dev, "Failed to cleanup SCMI channels.\n"); ++ ++ return 0; + } + + static ssize_t protocol_version_show(struct device *dev, +@@ -2008,6 +2022,7 @@ MODULE_DEVICE_TABLE(of, scmi_of_match); + static struct platform_driver scmi_driver = { + .driver = { + .name = "arm-scmi", ++ .suppress_bind_attrs = true, + .of_match_table = scmi_of_match, + .dev_groups = versions_groups, + }, +@@ -2112,7 +2127,7 @@ static void __exit scmi_driver_exit(void) } module_exit(scmi_driver_exit); @@ -91653,8 +111837,38 @@ index 308471586381f..1ed66d13c06c4 100644 return si->sensors + sensor_id; } +diff --git a/drivers/firmware/arm_scmi/shmem.c b/drivers/firmware/arm_scmi/shmem.c +index 0e3eaea5d8526..56a1f61aa3ff2 100644 +--- a/drivers/firmware/arm_scmi/shmem.c ++++ b/drivers/firmware/arm_scmi/shmem.c +@@ -58,10 +58,11 @@ u32 shmem_read_header(struct scmi_shared_mem __iomem *shmem) + void shmem_fetch_response(struct scmi_shared_mem __iomem *shmem, + struct scmi_xfer *xfer) + { ++ size_t len = ioread32(&shmem->length); ++ + xfer->hdr.status = ioread32(shmem->msg_payload); + /* Skip the length of header and status in shmem area i.e 8 bytes */ +- xfer->rx.len = min_t(size_t, xfer->rx.len, +- ioread32(&shmem->length) - 8); ++ xfer->rx.len = min_t(size_t, xfer->rx.len, len > 8 ? len - 8 : 0); + + /* Take a copy to the rx buffer.. */ + memcpy_fromio(xfer->rx.buf, shmem->msg_payload + 4, xfer->rx.len); +@@ -70,8 +71,10 @@ void shmem_fetch_response(struct scmi_shared_mem __iomem *shmem, + void shmem_fetch_notification(struct scmi_shared_mem __iomem *shmem, + size_t max_len, struct scmi_xfer *xfer) + { ++ size_t len = ioread32(&shmem->length); ++ + /* Skip only the length of header in shmem area i.e 4 bytes */ +- xfer->rx.len = min_t(size_t, max_len, ioread32(&shmem->length) - 4); ++ xfer->rx.len = min_t(size_t, max_len, len > 4 ? len - 4 : 0); + + /* Take a copy to the rx buffer.. */ + memcpy_fromio(xfer->rx.buf, shmem->msg_payload, xfer->rx.len); diff --git a/drivers/firmware/arm_scmi/virtio.c b/drivers/firmware/arm_scmi/virtio.c -index 11e8efb713751..87039c5c03fdb 100644 +index 11e8efb713751..0c351eeee7463 100644 --- a/drivers/firmware/arm_scmi/virtio.c +++ b/drivers/firmware/arm_scmi/virtio.c @@ -82,7 +82,8 @@ static bool scmi_vio_have_vq_rx(struct virtio_device *vdev) @@ -91686,6 +111900,29 @@ index 11e8efb713751..87039c5c03fdb 100644 } else { /* Here IRQs are assumed to be already disabled by the caller */ spin_lock(&vioch->lock); +@@ -247,19 +247,19 @@ static int virtio_chan_setup(struct scmi_chan_info *cinfo, struct device *dev, + for (i = 0; i < vioch->max_msg; i++) { + struct scmi_vio_msg *msg; + +- msg = devm_kzalloc(cinfo->dev, sizeof(*msg), GFP_KERNEL); ++ msg = devm_kzalloc(dev, sizeof(*msg), GFP_KERNEL); + if (!msg) + return -ENOMEM; + + if (tx) { +- msg->request = devm_kzalloc(cinfo->dev, ++ msg->request = devm_kzalloc(dev, + VIRTIO_SCMI_MAX_PDU_SIZE, + GFP_KERNEL); + if (!msg->request) + return -ENOMEM; + } + +- msg->input = devm_kzalloc(cinfo->dev, VIRTIO_SCMI_MAX_PDU_SIZE, ++ msg->input = devm_kzalloc(dev, VIRTIO_SCMI_MAX_PDU_SIZE, + GFP_KERNEL); + if (!msg->input) + return -ENOMEM; @@ -269,7 +269,7 @@ static int virtio_chan_setup(struct scmi_chan_info *cinfo, struct device *dev, list_add_tail(&msg->list, &vioch->free_list); spin_unlock_irqrestore(&vioch->lock, flags); @@ -91986,7 +112223,7 @@ index 0ef086e43090b..7e771c56c13c6 100644 if (record->reason == KMSG_DUMP_OOPS && try_module_get(THIS_MODULE)) if (!schedule_work(&efivar_work)) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c -index 847f33ffc4aed..e3df82d5d37a8 100644 +index 847f33ffc4aed..332739f3eded5 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -209,7 +209,7 @@ static int __init efivar_ssdt_setup(char *str) @@ -91998,7 +112235,49 @@ index 847f33ffc4aed..e3df82d5d37a8 100644 } __setup("efivar_ssdt=", efivar_ssdt_setup); -@@ -719,6 +719,13 @@ void __init efi_systab_report_header(const efi_table_hdr_t *systab_hdr, +@@ -385,8 +385,8 @@ static int __init efisubsys_init(void) + efi_kobj = kobject_create_and_add("efi", firmware_kobj); + if (!efi_kobj) { + pr_err("efi: Firmware registration failed.\n"); +- destroy_workqueue(efi_rts_wq); +- return -ENOMEM; ++ error = -ENOMEM; ++ goto err_destroy_wq; + } + + if (efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE | +@@ -429,7 +429,10 @@ err_unregister: + generic_ops_unregister(); + err_put: + kobject_put(efi_kobj); +- destroy_workqueue(efi_rts_wq); ++err_destroy_wq: ++ if (efi_rts_wq) ++ destroy_workqueue(efi_rts_wq); ++ + return error; + } + +@@ -590,7 +593,7 @@ int __init efi_config_parse_tables(const efi_config_table_t *config_tables, + + seed = early_memremap(efi_rng_seed, sizeof(*seed)); + if (seed != NULL) { +- size = READ_ONCE(seed->size); ++ size = min_t(u32, seed->size, SZ_1K); // sanity check + early_memunmap(seed, sizeof(*seed)); + } else { + pr_err("Could not map UEFI random seed!\n"); +@@ -599,8 +602,8 @@ int __init efi_config_parse_tables(const efi_config_table_t *config_tables, + seed = early_memremap(efi_rng_seed, + sizeof(*seed) + size); + if (seed != NULL) { +- pr_notice("seeding entropy pool\n"); + add_bootloader_randomness(seed->bits, size); ++ memzero_explicit(seed->bits, size); + early_memunmap(seed, sizeof(*seed) + size); + } else { + pr_err("Could not map UEFI random seed!\n"); +@@ -719,6 +722,13 @@ void __init efi_systab_report_header(const efi_table_hdr_t *systab_hdr, systab_hdr->revision >> 16, systab_hdr->revision & 0xffff, vendor); @@ -92012,6 +112291,15 @@ index 847f33ffc4aed..e3df82d5d37a8 100644 } static __initdata char memory_type_name[][13] = { +@@ -940,6 +950,8 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size) + /* first try to find a slot in an existing linked list entry */ + for (prsv = efi_memreserve_root->next; prsv; ) { + rsv = memremap(prsv, sizeof(*rsv), MEMREMAP_WB); ++ if (!rsv) ++ return -ENOMEM; + index = atomic_fetch_add_unless(&rsv->count, 1, rsv->size); + if (index < rsv->size) { + rsv->entry[index].base = addr; diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index d0537573501e9..2c67f71f23753 100644 --- a/drivers/firmware/efi/libstub/Makefile @@ -92047,6 +112335,19 @@ index 2363fee9211c9..9cc556013d085 100644 kernel_size = _edata - _text; kernel_memsize = kernel_size + (_end - _edata); +diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h +index cde0a2ef507d9..fbffdd7290a31 100644 +--- a/drivers/firmware/efi/libstub/efistub.h ++++ b/drivers/firmware/efi/libstub/efistub.h +@@ -766,6 +766,8 @@ efi_status_t efi_get_random_bytes(unsigned long size, u8 *out); + efi_status_t efi_random_alloc(unsigned long size, unsigned long align, + unsigned long *addr, unsigned long random_seed); + ++efi_status_t efi_random_get_seed(void); ++ + efi_status_t check_platform_features(void); + + void *get_efi_config_table(efi_guid_t guid); diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index fe567be0f118b..804f542be3f28 100644 --- a/drivers/firmware/efi/libstub/fdt.c @@ -92066,6 +112367,90 @@ index fe567be0f118b..804f542be3f28 100644 status = update_fdt((void *)fdt_addr, fdt_size, (void *)*new_fdt_addr, MAX_FDT_SIZE, cmdline_ptr, initrd_addr, initrd_size); +diff --git a/drivers/firmware/efi/libstub/random.c b/drivers/firmware/efi/libstub/random.c +index 24aa375353724..f85d2c0668777 100644 +--- a/drivers/firmware/efi/libstub/random.c ++++ b/drivers/firmware/efi/libstub/random.c +@@ -67,22 +67,43 @@ efi_status_t efi_random_get_seed(void) + efi_guid_t rng_proto = EFI_RNG_PROTOCOL_GUID; + efi_guid_t rng_algo_raw = EFI_RNG_ALGORITHM_RAW; + efi_guid_t rng_table_guid = LINUX_EFI_RANDOM_SEED_TABLE_GUID; ++ struct linux_efi_random_seed *prev_seed, *seed = NULL; ++ int prev_seed_size = 0, seed_size = EFI_RANDOM_SEED_SIZE; + efi_rng_protocol_t *rng = NULL; +- struct linux_efi_random_seed *seed = NULL; + efi_status_t status; + + status = efi_bs_call(locate_protocol, &rng_proto, NULL, (void **)&rng); + if (status != EFI_SUCCESS) + return status; + +- status = efi_bs_call(allocate_pool, EFI_RUNTIME_SERVICES_DATA, +- sizeof(*seed) + EFI_RANDOM_SEED_SIZE, ++ /* ++ * Check whether a seed was provided by a prior boot stage. In that ++ * case, instead of overwriting it, let's create a new buffer that can ++ * hold both, and concatenate the existing and the new seeds. ++ * Note that we should read the seed size with caution, in case the ++ * table got corrupted in memory somehow. ++ */ ++ prev_seed = get_efi_config_table(LINUX_EFI_RANDOM_SEED_TABLE_GUID); ++ if (prev_seed && prev_seed->size <= 512U) { ++ prev_seed_size = prev_seed->size; ++ seed_size += prev_seed_size; ++ } ++ ++ /* ++ * Use EFI_ACPI_RECLAIM_MEMORY here so that it is guaranteed that the ++ * allocation will survive a kexec reboot (although we refresh the seed ++ * beforehand) ++ */ ++ status = efi_bs_call(allocate_pool, EFI_ACPI_RECLAIM_MEMORY, ++ struct_size(seed, bits, seed_size), + (void **)&seed); +- if (status != EFI_SUCCESS) +- return status; ++ if (status != EFI_SUCCESS) { ++ efi_warn("Failed to allocate memory for RNG seed.\n"); ++ goto err_warn; ++ } + + status = efi_call_proto(rng, get_rng, &rng_algo_raw, +- EFI_RANDOM_SEED_SIZE, seed->bits); ++ EFI_RANDOM_SEED_SIZE, seed->bits); + + if (status == EFI_UNSUPPORTED) + /* +@@ -95,14 +116,28 @@ efi_status_t efi_random_get_seed(void) + if (status != EFI_SUCCESS) + goto err_freepool; + +- seed->size = EFI_RANDOM_SEED_SIZE; ++ seed->size = seed_size; ++ if (prev_seed_size) ++ memcpy(seed->bits + EFI_RANDOM_SEED_SIZE, prev_seed->bits, ++ prev_seed_size); ++ + status = efi_bs_call(install_configuration_table, &rng_table_guid, seed); + if (status != EFI_SUCCESS) + goto err_freepool; + ++ if (prev_seed_size) { ++ /* wipe and free the old seed if we managed to install the new one */ ++ memzero_explicit(prev_seed->bits, prev_seed_size); ++ efi_bs_call(free_pool, prev_seed); ++ } + return EFI_SUCCESS; + + err_freepool: ++ memzero_explicit(seed, struct_size(seed, bits, seed_size)); + efi_bs_call(free_pool, seed); ++ efi_warn("Failed to obtain seed from EFI_RNG_PROTOCOL\n"); ++err_warn: ++ if (prev_seed) ++ efi_warn("Retaining bootloader-supplied seed only"); + return status; + } diff --git a/drivers/firmware/efi/libstub/riscv-stub.c b/drivers/firmware/efi/libstub/riscv-stub.c index 380e4e2513994..9c460843442f5 100644 --- a/drivers/firmware/efi/libstub/riscv-stub.c @@ -92163,6 +112548,44 @@ index f14c4ff5839f9..72162645b553e 100644 efi_stub_entry(handle, sys_table_arg, boot_params); /* not reached */ +diff --git a/drivers/firmware/efi/memattr.c b/drivers/firmware/efi/memattr.c +index 0a9aba5f9ceff..f178b2984dfb2 100644 +--- a/drivers/firmware/efi/memattr.c ++++ b/drivers/firmware/efi/memattr.c +@@ -33,7 +33,7 @@ int __init efi_memattr_init(void) + return -ENOMEM; + } + +- if (tbl->version > 1) { ++ if (tbl->version > 2) { + pr_warn("Unexpected EFI Memory Attributes table version %d\n", + tbl->version); + goto unmap; +diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c +index f3e54f6616f02..60075e0e4943a 100644 +--- a/drivers/firmware/efi/runtime-wrappers.c ++++ b/drivers/firmware/efi/runtime-wrappers.c +@@ -62,6 +62,7 @@ struct efi_runtime_work efi_rts_work; + \ + if (!efi_enabled(EFI_RUNTIME_SERVICES)) { \ + pr_warn_once("EFI Runtime Services are disabled!\n"); \ ++ efi_rts_work.status = EFI_DEVICE_ERROR; \ + goto exit; \ + } \ + \ +diff --git a/drivers/firmware/efi/tpm.c b/drivers/firmware/efi/tpm.c +index 8f665678e9e39..e8d69bd548f3f 100644 +--- a/drivers/firmware/efi/tpm.c ++++ b/drivers/firmware/efi/tpm.c +@@ -97,7 +97,7 @@ int __init efi_tpm_eventlog_init(void) + goto out_calc; + } + +- memblock_reserve((unsigned long)final_tbl, ++ memblock_reserve(efi.tpm_final_log, + tbl_size + sizeof(*final_tbl)); + efi_tpm_final_log_size = tbl_size; + diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index abdc8a6a39631..cae590bd08f27 100644 --- a/drivers/firmware/efi/vars.c @@ -92220,11 +112643,121 @@ index 97968aece54f8..983e07dc022ed 100644 help This option enables the coreboot_table module, which provides other firmware modules access to the coreboot table. The coreboot table +diff --git a/drivers/firmware/google/coreboot_table.c b/drivers/firmware/google/coreboot_table.c +index c52bcaa9def60..f3694d3478019 100644 +--- a/drivers/firmware/google/coreboot_table.c ++++ b/drivers/firmware/google/coreboot_table.c +@@ -93,7 +93,12 @@ static int coreboot_table_populate(struct device *dev, void *ptr) + for (i = 0; i < header->table_entries; i++) { + entry = ptr_entry; + +- device = kzalloc(sizeof(struct device) + entry->size, GFP_KERNEL); ++ if (entry->size < sizeof(*entry)) { ++ dev_warn(dev, "coreboot table entry too small!\n"); ++ return -EINVAL; ++ } ++ ++ device = kzalloc(sizeof(device->dev) + entry->size, GFP_KERNEL); + if (!device) + return -ENOMEM; + +@@ -101,7 +106,7 @@ static int coreboot_table_populate(struct device *dev, void *ptr) + device->dev.parent = dev; + device->dev.bus = &coreboot_bus_type; + device->dev.release = coreboot_device_release; +- memcpy(&device->entry, ptr_entry, entry->size); ++ memcpy(device->raw, ptr_entry, entry->size); + + ret = device_register(&device->dev); + if (ret) { +@@ -149,12 +154,8 @@ static int coreboot_table_probe(struct platform_device *pdev) + if (!ptr) + return -ENOMEM; + +- ret = bus_register(&coreboot_bus_type); +- if (!ret) { +- ret = coreboot_table_populate(dev, ptr); +- if (ret) +- bus_unregister(&coreboot_bus_type); +- } ++ ret = coreboot_table_populate(dev, ptr); ++ + memunmap(ptr); + + return ret; +@@ -169,7 +170,6 @@ static int __cb_dev_unregister(struct device *dev, void *dummy) + static int coreboot_table_remove(struct platform_device *pdev) + { + bus_for_each_dev(&coreboot_bus_type, NULL, NULL, __cb_dev_unregister); +- bus_unregister(&coreboot_bus_type); + return 0; + } + +@@ -199,6 +199,32 @@ static struct platform_driver coreboot_table_driver = { + .of_match_table = of_match_ptr(coreboot_of_match), + }, + }; +-module_platform_driver(coreboot_table_driver); ++ ++static int __init coreboot_table_driver_init(void) ++{ ++ int ret; ++ ++ ret = bus_register(&coreboot_bus_type); ++ if (ret) ++ return ret; ++ ++ ret = platform_driver_register(&coreboot_table_driver); ++ if (ret) { ++ bus_unregister(&coreboot_bus_type); ++ return ret; ++ } ++ ++ return 0; ++} ++ ++static void __exit coreboot_table_driver_exit(void) ++{ ++ platform_driver_unregister(&coreboot_table_driver); ++ bus_unregister(&coreboot_bus_type); ++} ++ ++module_init(coreboot_table_driver_init); ++module_exit(coreboot_table_driver_exit); ++ + MODULE_AUTHOR("Google, Inc."); + MODULE_LICENSE("GPL"); +diff --git a/drivers/firmware/google/coreboot_table.h b/drivers/firmware/google/coreboot_table.h +index beb778674acdc..4a89277b99a39 100644 +--- a/drivers/firmware/google/coreboot_table.h ++++ b/drivers/firmware/google/coreboot_table.h +@@ -66,6 +66,7 @@ struct coreboot_device { + struct coreboot_table_entry entry; + struct lb_cbmem_ref cbmem_ref; + struct lb_framebuffer framebuffer; ++ DECLARE_FLEX_ARRAY(u8, raw); + }; + }; + diff --git a/drivers/firmware/google/gsmi.c b/drivers/firmware/google/gsmi.c -index adaa492c3d2df..4e2575dfeb908 100644 +index adaa492c3d2df..871bedf533a80 100644 --- a/drivers/firmware/google/gsmi.c +++ b/drivers/firmware/google/gsmi.c -@@ -681,6 +681,15 @@ static struct notifier_block gsmi_die_notifier = { +@@ -361,9 +361,10 @@ static efi_status_t gsmi_get_variable(efi_char16_t *name, + memcpy(data, gsmi_dev.data_buf->start, *data_size); + + /* All variables are have the following attributes */ +- *attr = EFI_VARIABLE_NON_VOLATILE | +- EFI_VARIABLE_BOOTSERVICE_ACCESS | +- EFI_VARIABLE_RUNTIME_ACCESS; ++ if (attr) ++ *attr = EFI_VARIABLE_NON_VOLATILE | ++ EFI_VARIABLE_BOOTSERVICE_ACCESS | ++ EFI_VARIABLE_RUNTIME_ACCESS; + } + + spin_unlock_irqrestore(&gsmi_dev.lock, flags); +@@ -681,6 +682,15 @@ static struct notifier_block gsmi_die_notifier = { static int gsmi_panic_callback(struct notifier_block *nb, unsigned long reason, void *arg) { @@ -92340,6 +112873,18 @@ index 172c751a4f6c2..f08e056ed0ae4 100644 return err; } +diff --git a/drivers/firmware/raspberrypi.c b/drivers/firmware/raspberrypi.c +index 4b8978b254f9a..dba315f675bc7 100644 +--- a/drivers/firmware/raspberrypi.c ++++ b/drivers/firmware/raspberrypi.c +@@ -272,6 +272,7 @@ static int rpi_firmware_probe(struct platform_device *pdev) + int ret = PTR_ERR(fw->chan); + if (ret != -EPROBE_DEFER) + dev_err(dev, "Failed to get mbox channel: %d\n", ret); ++ kfree(fw); + return ret; + } + diff --git a/drivers/firmware/scpi_pm_domain.c b/drivers/firmware/scpi_pm_domain.c index 51201600d789b..800673910b511 100644 --- a/drivers/firmware/scpi_pm_domain.c @@ -92684,6 +113229,22 @@ index f86666cf2c6a8..c38143ef23c64 100644 if (IS_ERR(fds)) return PTR_ERR(fds); +diff --git a/drivers/fpga/stratix10-soc.c b/drivers/fpga/stratix10-soc.c +index 047fd7f237069..91212bab58717 100644 +--- a/drivers/fpga/stratix10-soc.c ++++ b/drivers/fpga/stratix10-soc.c +@@ -213,9 +213,9 @@ static int s10_ops_write_init(struct fpga_manager *mgr, + /* Allocate buffers from the service layer's pool. */ + for (i = 0; i < NUM_SVC_BUFS; i++) { + kbuf = stratix10_svc_allocate_memory(priv->chan, SVC_BUF_SIZE); +- if (!kbuf) { ++ if (IS_ERR(kbuf)) { + s10_free_buffers(mgr); +- ret = -ENOMEM; ++ ret = PTR_ERR(kbuf); + goto init_done; + } + diff --git a/drivers/fsi/fsi-core.c b/drivers/fsi/fsi-core.c index 59ddc9fd5bca4..92e6eebd1851e 100644 --- a/drivers/fsi/fsi-core.c @@ -92884,6 +113445,37 @@ index b223f0ef337b9..ecf738411fe22 100644 mutex_init(&occ->occ_lock); if (dev->of_node) { +diff --git a/drivers/fsi/fsi-sbefifo.c b/drivers/fsi/fsi-sbefifo.c +index 84cb965bfed5c..97045a8d94224 100644 +--- a/drivers/fsi/fsi-sbefifo.c ++++ b/drivers/fsi/fsi-sbefifo.c +@@ -640,7 +640,7 @@ static void sbefifo_collect_async_ffdc(struct sbefifo *sbefifo) + } + ffdc_iov.iov_base = ffdc; + ffdc_iov.iov_len = SBEFIFO_MAX_FFDC_SIZE; +- iov_iter_kvec(&ffdc_iter, WRITE, &ffdc_iov, 1, SBEFIFO_MAX_FFDC_SIZE); ++ iov_iter_kvec(&ffdc_iter, READ, &ffdc_iov, 1, SBEFIFO_MAX_FFDC_SIZE); + cmd[0] = cpu_to_be32(2); + cmd[1] = cpu_to_be32(SBEFIFO_CMD_GET_SBE_FFDC); + rc = sbefifo_do_command(sbefifo, cmd, 2, &ffdc_iter); +@@ -737,7 +737,7 @@ int sbefifo_submit(struct device *dev, const __be32 *command, size_t cmd_len, + rbytes = (*resp_len) * sizeof(__be32); + resp_iov.iov_base = response; + resp_iov.iov_len = rbytes; +- iov_iter_kvec(&resp_iter, WRITE, &resp_iov, 1, rbytes); ++ iov_iter_kvec(&resp_iter, READ, &resp_iov, 1, rbytes); + + /* Perform the command */ + mutex_lock(&sbefifo->lock); +@@ -817,7 +817,7 @@ static ssize_t sbefifo_user_read(struct file *file, char __user *buf, + /* Prepare iov iterator */ + resp_iov.iov_base = buf; + resp_iov.iov_len = len; +- iov_iter_init(&resp_iter, WRITE, &resp_iov, 1, len); ++ iov_iter_init(&resp_iter, READ, &resp_iov, 1, len); + + /* Perform the command */ + mutex_lock(&sbefifo->lock); diff --git a/drivers/fsi/fsi-scom.c b/drivers/fsi/fsi-scom.c index da1486bb6a144..bcb756dc98663 100644 --- a/drivers/fsi/fsi-scom.c @@ -93041,6 +113633,72 @@ index 34e35b64dcdc0..23047dc84ef1b 100644 } static void gpio_fwd_set_multiple_locked(struct gpio_chip *chip, +diff --git a/drivers/gpio/gpio-amd8111.c b/drivers/gpio/gpio-amd8111.c +index 14e6b3e64add5..6f3ded619c8b2 100644 +--- a/drivers/gpio/gpio-amd8111.c ++++ b/drivers/gpio/gpio-amd8111.c +@@ -226,7 +226,10 @@ found: + ioport_unmap(gp.pm); + goto out; + } ++ return 0; ++ + out: ++ pci_dev_put(pdev); + return err; + } + +@@ -234,6 +237,7 @@ static void __exit amd_gpio_exit(void) + { + gpiochip_remove(&gp.chip); + ioport_unmap(gp.pm); ++ pci_dev_put(gp.pdev); + } + + module_init(amd_gpio_init); +diff --git a/drivers/gpio/gpio-amdpt.c b/drivers/gpio/gpio-amdpt.c +index 44398992ae15f..dba4836a18f80 100644 +--- a/drivers/gpio/gpio-amdpt.c ++++ b/drivers/gpio/gpio-amdpt.c +@@ -35,19 +35,19 @@ static int pt_gpio_request(struct gpio_chip *gc, unsigned offset) + + dev_dbg(gc->parent, "pt_gpio_request offset=%x\n", offset); + +- spin_lock_irqsave(&gc->bgpio_lock, flags); ++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags); + + using_pins = readl(pt_gpio->reg_base + PT_SYNC_REG); + if (using_pins & BIT(offset)) { + dev_warn(gc->parent, "PT GPIO pin %x reconfigured\n", + offset); +- spin_unlock_irqrestore(&gc->bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags); + return -EINVAL; + } + + writel(using_pins | BIT(offset), pt_gpio->reg_base + PT_SYNC_REG); + +- spin_unlock_irqrestore(&gc->bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags); + + return 0; + } +@@ -58,13 +58,13 @@ static void pt_gpio_free(struct gpio_chip *gc, unsigned offset) + unsigned long flags; + u32 using_pins; + +- spin_lock_irqsave(&gc->bgpio_lock, flags); ++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags); + + using_pins = readl(pt_gpio->reg_base + PT_SYNC_REG); + using_pins &= ~BIT(offset); + writel(using_pins, pt_gpio->reg_base + PT_SYNC_REG); + +- spin_unlock_irqrestore(&gc->bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags); + + dev_dbg(gc->parent, "pt_gpio_free offset=%x\n", offset); + } diff --git a/drivers/gpio/gpio-aspeed-sgpio.c b/drivers/gpio/gpio-aspeed-sgpio.c index 3d6ef37a7702a..454cefbeecf0e 100644 --- a/drivers/gpio/gpio-aspeed-sgpio.c @@ -93411,6 +114069,110 @@ index 3c8f20c57695f..318a7d95a1a8b 100644 gpio_id = of_match_node(aspeed_gpio_of_table, pdev->dev.of_node); if (!gpio_id) +diff --git a/drivers/gpio/gpio-brcmstb.c b/drivers/gpio/gpio-brcmstb.c +index 895a79936248d..c5d85e931f2a9 100644 +--- a/drivers/gpio/gpio-brcmstb.c ++++ b/drivers/gpio/gpio-brcmstb.c +@@ -92,9 +92,9 @@ brcmstb_gpio_get_active_irqs(struct brcmstb_gpio_bank *bank) + unsigned long status; + unsigned long flags; + +- spin_lock_irqsave(&bank->gc.bgpio_lock, flags); ++ raw_spin_lock_irqsave(&bank->gc.bgpio_lock, flags); + status = __brcmstb_gpio_get_active_irqs(bank); +- spin_unlock_irqrestore(&bank->gc.bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&bank->gc.bgpio_lock, flags); + + return status; + } +@@ -114,14 +114,14 @@ static void brcmstb_gpio_set_imask(struct brcmstb_gpio_bank *bank, + u32 imask; + unsigned long flags; + +- spin_lock_irqsave(&gc->bgpio_lock, flags); ++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags); + imask = gc->read_reg(priv->reg_base + GIO_MASK(bank->id)); + if (enable) + imask |= mask; + else + imask &= ~mask; + gc->write_reg(priv->reg_base + GIO_MASK(bank->id), imask); +- spin_unlock_irqrestore(&gc->bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags); + } + + static int brcmstb_gpio_to_irq(struct gpio_chip *gc, unsigned offset) +@@ -204,7 +204,7 @@ static int brcmstb_gpio_irq_set_type(struct irq_data *d, unsigned int type) + return -EINVAL; + } + +- spin_lock_irqsave(&bank->gc.bgpio_lock, flags); ++ raw_spin_lock_irqsave(&bank->gc.bgpio_lock, flags); + + iedge_config = bank->gc.read_reg(priv->reg_base + + GIO_EC(bank->id)) & ~mask; +@@ -220,7 +220,7 @@ static int brcmstb_gpio_irq_set_type(struct irq_data *d, unsigned int type) + bank->gc.write_reg(priv->reg_base + GIO_LEVEL(bank->id), + ilevel | level); + +- spin_unlock_irqrestore(&bank->gc.bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&bank->gc.bgpio_lock, flags); + return 0; + } + +diff --git a/drivers/gpio/gpio-cadence.c b/drivers/gpio/gpio-cadence.c +index 562f8f7e7d1fc..137aea49ba026 100644 +--- a/drivers/gpio/gpio-cadence.c ++++ b/drivers/gpio/gpio-cadence.c +@@ -41,12 +41,12 @@ static int cdns_gpio_request(struct gpio_chip *chip, unsigned int offset) + struct cdns_gpio_chip *cgpio = gpiochip_get_data(chip); + unsigned long flags; + +- spin_lock_irqsave(&chip->bgpio_lock, flags); ++ raw_spin_lock_irqsave(&chip->bgpio_lock, flags); + + iowrite32(ioread32(cgpio->regs + CDNS_GPIO_BYPASS_MODE) & ~BIT(offset), + cgpio->regs + CDNS_GPIO_BYPASS_MODE); + +- spin_unlock_irqrestore(&chip->bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&chip->bgpio_lock, flags); + return 0; + } + +@@ -55,13 +55,13 @@ static void cdns_gpio_free(struct gpio_chip *chip, unsigned int offset) + struct cdns_gpio_chip *cgpio = gpiochip_get_data(chip); + unsigned long flags; + +- spin_lock_irqsave(&chip->bgpio_lock, flags); ++ raw_spin_lock_irqsave(&chip->bgpio_lock, flags); + + iowrite32(ioread32(cgpio->regs + CDNS_GPIO_BYPASS_MODE) | + (BIT(offset) & cgpio->bypass_orig), + cgpio->regs + CDNS_GPIO_BYPASS_MODE); + +- spin_unlock_irqrestore(&chip->bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&chip->bgpio_lock, flags); + } + + static void cdns_gpio_irq_mask(struct irq_data *d) +@@ -90,7 +90,7 @@ static int cdns_gpio_irq_set_type(struct irq_data *d, unsigned int type) + u32 mask = BIT(d->hwirq); + int ret = 0; + +- spin_lock_irqsave(&chip->bgpio_lock, flags); ++ raw_spin_lock_irqsave(&chip->bgpio_lock, flags); + + int_value = ioread32(cgpio->regs + CDNS_GPIO_IRQ_VALUE) & ~mask; + int_type = ioread32(cgpio->regs + CDNS_GPIO_IRQ_TYPE) & ~mask; +@@ -115,7 +115,7 @@ static int cdns_gpio_irq_set_type(struct irq_data *d, unsigned int type) + iowrite32(int_type, cgpio->regs + CDNS_GPIO_IRQ_TYPE); + + err_irq_type: +- spin_unlock_irqrestore(&chip->bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&chip->bgpio_lock, flags); + return ret; + } + diff --git a/drivers/gpio/gpio-dln2.c b/drivers/gpio/gpio-dln2.c index 026903e3ef543..08b9e2cf4f2d6 100644 --- a/drivers/gpio/gpio-dln2.c @@ -93457,9 +114219,111 @@ index 026903e3ef543..08b9e2cf4f2d6 100644 girq->parent_handler = NULL; girq->num_parents = 0; diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c -index f98fa33e16790..e981e7a46fc1c 100644 +index f98fa33e16790..a503f37001ebb 100644 --- a/drivers/gpio/gpio-dwapb.c +++ b/drivers/gpio/gpio-dwapb.c +@@ -242,9 +242,9 @@ static void dwapb_irq_ack(struct irq_data *d) + u32 val = BIT(irqd_to_hwirq(d)); + unsigned long flags; + +- spin_lock_irqsave(&gc->bgpio_lock, flags); ++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags); + dwapb_write(gpio, GPIO_PORTA_EOI, val); +- spin_unlock_irqrestore(&gc->bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags); + } + + static void dwapb_irq_mask(struct irq_data *d) +@@ -254,10 +254,10 @@ static void dwapb_irq_mask(struct irq_data *d) + unsigned long flags; + u32 val; + +- spin_lock_irqsave(&gc->bgpio_lock, flags); ++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags); + val = dwapb_read(gpio, GPIO_INTMASK) | BIT(irqd_to_hwirq(d)); + dwapb_write(gpio, GPIO_INTMASK, val); +- spin_unlock_irqrestore(&gc->bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags); + } + + static void dwapb_irq_unmask(struct irq_data *d) +@@ -267,10 +267,10 @@ static void dwapb_irq_unmask(struct irq_data *d) + unsigned long flags; + u32 val; + +- spin_lock_irqsave(&gc->bgpio_lock, flags); ++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags); + val = dwapb_read(gpio, GPIO_INTMASK) & ~BIT(irqd_to_hwirq(d)); + dwapb_write(gpio, GPIO_INTMASK, val); +- spin_unlock_irqrestore(&gc->bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags); + } + + static void dwapb_irq_enable(struct irq_data *d) +@@ -280,11 +280,11 @@ static void dwapb_irq_enable(struct irq_data *d) + unsigned long flags; + u32 val; + +- spin_lock_irqsave(&gc->bgpio_lock, flags); ++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags); + val = dwapb_read(gpio, GPIO_INTEN); + val |= BIT(irqd_to_hwirq(d)); + dwapb_write(gpio, GPIO_INTEN, val); +- spin_unlock_irqrestore(&gc->bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags); + } + + static void dwapb_irq_disable(struct irq_data *d) +@@ -294,11 +294,11 @@ static void dwapb_irq_disable(struct irq_data *d) + unsigned long flags; + u32 val; + +- spin_lock_irqsave(&gc->bgpio_lock, flags); ++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags); + val = dwapb_read(gpio, GPIO_INTEN); + val &= ~BIT(irqd_to_hwirq(d)); + dwapb_write(gpio, GPIO_INTEN, val); +- spin_unlock_irqrestore(&gc->bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags); + } + + static int dwapb_irq_set_type(struct irq_data *d, u32 type) +@@ -308,7 +308,7 @@ static int dwapb_irq_set_type(struct irq_data *d, u32 type) + irq_hw_number_t bit = irqd_to_hwirq(d); + unsigned long level, polarity, flags; + +- spin_lock_irqsave(&gc->bgpio_lock, flags); ++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags); + level = dwapb_read(gpio, GPIO_INTTYPE_LEVEL); + polarity = dwapb_read(gpio, GPIO_INT_POLARITY); + +@@ -343,7 +343,7 @@ static int dwapb_irq_set_type(struct irq_data *d, u32 type) + dwapb_write(gpio, GPIO_INTTYPE_LEVEL, level); + if (type != IRQ_TYPE_EDGE_BOTH) + dwapb_write(gpio, GPIO_INT_POLARITY, polarity); +- spin_unlock_irqrestore(&gc->bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags); + + return 0; + } +@@ -373,7 +373,7 @@ static int dwapb_gpio_set_debounce(struct gpio_chip *gc, + unsigned long flags, val_deb; + unsigned long mask = BIT(offset); + +- spin_lock_irqsave(&gc->bgpio_lock, flags); ++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags); + + val_deb = dwapb_read(gpio, GPIO_PORTA_DEBOUNCE); + if (debounce) +@@ -382,7 +382,7 @@ static int dwapb_gpio_set_debounce(struct gpio_chip *gc, + val_deb &= ~mask; + dwapb_write(gpio, GPIO_PORTA_DEBOUNCE, val_deb); + +- spin_unlock_irqrestore(&gc->bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags); + + return 0; + } @@ -653,10 +653,9 @@ static int dwapb_get_clks(struct dwapb_gpio *gpio) gpio->clks[1].id = "db"; err = devm_clk_bulk_get_optional(gpio->dev, DWAPB_NR_CLOCKS, @@ -93474,10 +114338,290 @@ index f98fa33e16790..e981e7a46fc1c 100644 err = clk_bulk_prepare_enable(DWAPB_NR_CLOCKS, gpio->clks); if (err) { +@@ -739,7 +738,7 @@ static int dwapb_gpio_suspend(struct device *dev) + unsigned long flags; + int i; + +- spin_lock_irqsave(&gc->bgpio_lock, flags); ++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags); + for (i = 0; i < gpio->nr_ports; i++) { + unsigned int offset; + unsigned int idx = gpio->ports[i].idx; +@@ -766,7 +765,7 @@ static int dwapb_gpio_suspend(struct device *dev) + dwapb_write(gpio, GPIO_INTMASK, ~ctx->wake_en); + } + } +- spin_unlock_irqrestore(&gc->bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags); + + clk_bulk_disable_unprepare(DWAPB_NR_CLOCKS, gpio->clks); + +@@ -786,7 +785,7 @@ static int dwapb_gpio_resume(struct device *dev) + return err; + } + +- spin_lock_irqsave(&gc->bgpio_lock, flags); ++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags); + for (i = 0; i < gpio->nr_ports; i++) { + unsigned int offset; + unsigned int idx = gpio->ports[i].idx; +@@ -813,7 +812,7 @@ static int dwapb_gpio_resume(struct device *dev) + dwapb_write(gpio, GPIO_PORTA_EOI, 0xffffffff); + } + } +- spin_unlock_irqrestore(&gc->bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags); + + return 0; + } +diff --git a/drivers/gpio/gpio-grgpio.c b/drivers/gpio/gpio-grgpio.c +index f954359c9544e..21204a5dca3d4 100644 +--- a/drivers/gpio/gpio-grgpio.c ++++ b/drivers/gpio/gpio-grgpio.c +@@ -145,7 +145,7 @@ static int grgpio_irq_set_type(struct irq_data *d, unsigned int type) + return -EINVAL; + } + +- spin_lock_irqsave(&priv->gc.bgpio_lock, flags); ++ raw_spin_lock_irqsave(&priv->gc.bgpio_lock, flags); + + ipol = priv->gc.read_reg(priv->regs + GRGPIO_IPOL) & ~mask; + iedge = priv->gc.read_reg(priv->regs + GRGPIO_IEDGE) & ~mask; +@@ -153,7 +153,7 @@ static int grgpio_irq_set_type(struct irq_data *d, unsigned int type) + priv->gc.write_reg(priv->regs + GRGPIO_IPOL, ipol | pol); + priv->gc.write_reg(priv->regs + GRGPIO_IEDGE, iedge | edge); + +- spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); + + return 0; + } +@@ -164,11 +164,11 @@ static void grgpio_irq_mask(struct irq_data *d) + int offset = d->hwirq; + unsigned long flags; + +- spin_lock_irqsave(&priv->gc.bgpio_lock, flags); ++ raw_spin_lock_irqsave(&priv->gc.bgpio_lock, flags); + + grgpio_set_imask(priv, offset, 0); + +- spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); + } + + static void grgpio_irq_unmask(struct irq_data *d) +@@ -177,11 +177,11 @@ static void grgpio_irq_unmask(struct irq_data *d) + int offset = d->hwirq; + unsigned long flags; + +- spin_lock_irqsave(&priv->gc.bgpio_lock, flags); ++ raw_spin_lock_irqsave(&priv->gc.bgpio_lock, flags); + + grgpio_set_imask(priv, offset, 1); + +- spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); + } + + static struct irq_chip grgpio_irq_chip = { +@@ -199,7 +199,7 @@ static irqreturn_t grgpio_irq_handler(int irq, void *dev) + int i; + int match = 0; + +- spin_lock_irqsave(&priv->gc.bgpio_lock, flags); ++ raw_spin_lock_irqsave(&priv->gc.bgpio_lock, flags); + + /* + * For each gpio line, call its interrupt handler if it its underlying +@@ -215,7 +215,7 @@ static irqreturn_t grgpio_irq_handler(int irq, void *dev) + } + } + +- spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); + + if (!match) + dev_warn(priv->dev, "No gpio line matched irq %d\n", irq); +@@ -247,13 +247,13 @@ static int grgpio_irq_map(struct irq_domain *d, unsigned int irq, + dev_dbg(priv->dev, "Mapping irq %d for gpio line %d\n", + irq, offset); + +- spin_lock_irqsave(&priv->gc.bgpio_lock, flags); ++ raw_spin_lock_irqsave(&priv->gc.bgpio_lock, flags); + + /* Request underlying irq if not already requested */ + lirq->irq = irq; + uirq = &priv->uirqs[lirq->index]; + if (uirq->refcnt == 0) { +- spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); + ret = request_irq(uirq->uirq, grgpio_irq_handler, 0, + dev_name(priv->dev), priv); + if (ret) { +@@ -262,11 +262,11 @@ static int grgpio_irq_map(struct irq_domain *d, unsigned int irq, + uirq->uirq); + return ret; + } +- spin_lock_irqsave(&priv->gc.bgpio_lock, flags); ++ raw_spin_lock_irqsave(&priv->gc.bgpio_lock, flags); + } + uirq->refcnt++; + +- spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); + + /* Setup irq */ + irq_set_chip_data(irq, priv); +@@ -290,7 +290,7 @@ static void grgpio_irq_unmap(struct irq_domain *d, unsigned int irq) + irq_set_chip_and_handler(irq, NULL, NULL); + irq_set_chip_data(irq, NULL); + +- spin_lock_irqsave(&priv->gc.bgpio_lock, flags); ++ raw_spin_lock_irqsave(&priv->gc.bgpio_lock, flags); + + /* Free underlying irq if last user unmapped */ + index = -1; +@@ -309,13 +309,13 @@ static void grgpio_irq_unmap(struct irq_domain *d, unsigned int irq) + uirq = &priv->uirqs[lirq->index]; + uirq->refcnt--; + if (uirq->refcnt == 0) { +- spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); + free_irq(uirq->uirq, priv); + return; + } + } + +- spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); + } + + static const struct irq_domain_ops grgpio_irq_domain_ops = { +diff --git a/drivers/gpio/gpio-hlwd.c b/drivers/gpio/gpio-hlwd.c +index 641719a96a1a9..4e13e937f8324 100644 +--- a/drivers/gpio/gpio-hlwd.c ++++ b/drivers/gpio/gpio-hlwd.c +@@ -65,7 +65,7 @@ static void hlwd_gpio_irqhandler(struct irq_desc *desc) + int hwirq; + u32 emulated_pending; + +- spin_lock_irqsave(&hlwd->gpioc.bgpio_lock, flags); ++ raw_spin_lock_irqsave(&hlwd->gpioc.bgpio_lock, flags); + pending = ioread32be(hlwd->regs + HW_GPIOB_INTFLAG); + pending &= ioread32be(hlwd->regs + HW_GPIOB_INTMASK); + +@@ -93,7 +93,7 @@ static void hlwd_gpio_irqhandler(struct irq_desc *desc) + /* Mark emulated interrupts as pending */ + pending |= rising | falling; + } +- spin_unlock_irqrestore(&hlwd->gpioc.bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&hlwd->gpioc.bgpio_lock, flags); + + chained_irq_enter(chip, desc); + +@@ -118,11 +118,11 @@ static void hlwd_gpio_irq_mask(struct irq_data *data) + unsigned long flags; + u32 mask; + +- spin_lock_irqsave(&hlwd->gpioc.bgpio_lock, flags); ++ raw_spin_lock_irqsave(&hlwd->gpioc.bgpio_lock, flags); + mask = ioread32be(hlwd->regs + HW_GPIOB_INTMASK); + mask &= ~BIT(data->hwirq); + iowrite32be(mask, hlwd->regs + HW_GPIOB_INTMASK); +- spin_unlock_irqrestore(&hlwd->gpioc.bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&hlwd->gpioc.bgpio_lock, flags); + } + + static void hlwd_gpio_irq_unmask(struct irq_data *data) +@@ -132,11 +132,11 @@ static void hlwd_gpio_irq_unmask(struct irq_data *data) + unsigned long flags; + u32 mask; + +- spin_lock_irqsave(&hlwd->gpioc.bgpio_lock, flags); ++ raw_spin_lock_irqsave(&hlwd->gpioc.bgpio_lock, flags); + mask = ioread32be(hlwd->regs + HW_GPIOB_INTMASK); + mask |= BIT(data->hwirq); + iowrite32be(mask, hlwd->regs + HW_GPIOB_INTMASK); +- spin_unlock_irqrestore(&hlwd->gpioc.bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&hlwd->gpioc.bgpio_lock, flags); + } + + static void hlwd_gpio_irq_enable(struct irq_data *data) +@@ -173,7 +173,7 @@ static int hlwd_gpio_irq_set_type(struct irq_data *data, unsigned int flow_type) + unsigned long flags; + u32 level; + +- spin_lock_irqsave(&hlwd->gpioc.bgpio_lock, flags); ++ raw_spin_lock_irqsave(&hlwd->gpioc.bgpio_lock, flags); + + hlwd->edge_emulation &= ~BIT(data->hwirq); + +@@ -194,11 +194,11 @@ static int hlwd_gpio_irq_set_type(struct irq_data *data, unsigned int flow_type) + hlwd_gpio_irq_setup_emulation(hlwd, data->hwirq, flow_type); + break; + default: +- spin_unlock_irqrestore(&hlwd->gpioc.bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&hlwd->gpioc.bgpio_lock, flags); + return -EINVAL; + } + +- spin_unlock_irqrestore(&hlwd->gpioc.bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&hlwd->gpioc.bgpio_lock, flags); + return 0; + } + diff --git a/drivers/gpio/gpio-idt3243x.c b/drivers/gpio/gpio-idt3243x.c -index 50003ad2e5898..52b8b72ded77f 100644 +index 50003ad2e5898..1cafdf46f8756 100644 --- a/drivers/gpio/gpio-idt3243x.c +++ b/drivers/gpio/gpio-idt3243x.c +@@ -57,7 +57,7 @@ static int idt_gpio_irq_set_type(struct irq_data *d, unsigned int flow_type) + if (sense == IRQ_TYPE_NONE || (sense & IRQ_TYPE_EDGE_BOTH)) + return -EINVAL; + +- spin_lock_irqsave(&gc->bgpio_lock, flags); ++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags); + + ilevel = readl(ctrl->gpio + IDT_GPIO_ILEVEL); + if (sense & IRQ_TYPE_LEVEL_HIGH) +@@ -68,7 +68,7 @@ static int idt_gpio_irq_set_type(struct irq_data *d, unsigned int flow_type) + writel(ilevel, ctrl->gpio + IDT_GPIO_ILEVEL); + irq_set_handler_locked(d, handle_level_irq); + +- spin_unlock_irqrestore(&gc->bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags); + return 0; + } + +@@ -86,12 +86,12 @@ static void idt_gpio_mask(struct irq_data *d) + struct idt_gpio_ctrl *ctrl = gpiochip_get_data(gc); + unsigned long flags; + +- spin_lock_irqsave(&gc->bgpio_lock, flags); ++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags); + + ctrl->mask_cache |= BIT(d->hwirq); + writel(ctrl->mask_cache, ctrl->pic + IDT_PIC_IRQ_MASK); + +- spin_unlock_irqrestore(&gc->bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags); + } + + static void idt_gpio_unmask(struct irq_data *d) +@@ -100,12 +100,12 @@ static void idt_gpio_unmask(struct irq_data *d) + struct idt_gpio_ctrl *ctrl = gpiochip_get_data(gc); + unsigned long flags; + +- spin_lock_irqsave(&gc->bgpio_lock, flags); ++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags); + + ctrl->mask_cache &= ~BIT(d->hwirq); + writel(ctrl->mask_cache, ctrl->pic + IDT_PIC_IRQ_MASK); + +- spin_unlock_irqrestore(&gc->bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags); + } + + static int idt_gpio_irq_init_hw(struct gpio_chip *gc) @@ -132,7 +132,7 @@ static int idt_gpio_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct gpio_irq_chip *girq; @@ -93498,6 +114642,232 @@ index 50003ad2e5898..52b8b72ded77f 100644 girq = &ctrl->gc.irq; girq->chip = &idt_gpio_irqchip; +diff --git a/drivers/gpio/gpio-ixp4xx.c b/drivers/gpio/gpio-ixp4xx.c +index b3b050604e0be..6b184502fa3f8 100644 +--- a/drivers/gpio/gpio-ixp4xx.c ++++ b/drivers/gpio/gpio-ixp4xx.c +@@ -128,7 +128,7 @@ static int ixp4xx_gpio_irq_set_type(struct irq_data *d, unsigned int type) + int_reg = IXP4XX_REG_GPIT1; + } + +- spin_lock_irqsave(&g->gc.bgpio_lock, flags); ++ raw_spin_lock_irqsave(&g->gc.bgpio_lock, flags); + + /* Clear the style for the appropriate pin */ + val = __raw_readl(g->base + int_reg); +@@ -147,7 +147,7 @@ static int ixp4xx_gpio_irq_set_type(struct irq_data *d, unsigned int type) + val |= BIT(d->hwirq); + __raw_writel(val, g->base + IXP4XX_REG_GPOE); + +- spin_unlock_irqrestore(&g->gc.bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&g->gc.bgpio_lock, flags); + + /* This parent only accept level high (asserted) */ + return irq_chip_set_type_parent(d, IRQ_TYPE_LEVEL_HIGH); +diff --git a/drivers/gpio/gpio-loongson1.c b/drivers/gpio/gpio-loongson1.c +index 1b1ee94eeab47..5d90b3bc5a256 100644 +--- a/drivers/gpio/gpio-loongson1.c ++++ b/drivers/gpio/gpio-loongson1.c +@@ -25,10 +25,10 @@ static int ls1x_gpio_request(struct gpio_chip *gc, unsigned int offset) + { + unsigned long flags; + +- spin_lock_irqsave(&gc->bgpio_lock, flags); ++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags); + __raw_writel(__raw_readl(gpio_reg_base + GPIO_CFG) | BIT(offset), + gpio_reg_base + GPIO_CFG); +- spin_unlock_irqrestore(&gc->bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags); + + return 0; + } +@@ -37,10 +37,10 @@ static void ls1x_gpio_free(struct gpio_chip *gc, unsigned int offset) + { + unsigned long flags; + +- spin_lock_irqsave(&gc->bgpio_lock, flags); ++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags); + __raw_writel(__raw_readl(gpio_reg_base + GPIO_CFG) & ~BIT(offset), + gpio_reg_base + GPIO_CFG); +- spin_unlock_irqrestore(&gc->bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags); + } + + static int ls1x_gpio_probe(struct platform_device *pdev) +diff --git a/drivers/gpio/gpio-menz127.c b/drivers/gpio/gpio-menz127.c +index 1e21c661d79d6..a035a9bcb57c6 100644 +--- a/drivers/gpio/gpio-menz127.c ++++ b/drivers/gpio/gpio-menz127.c +@@ -64,7 +64,7 @@ static int men_z127_debounce(struct gpio_chip *gc, unsigned gpio, + debounce /= 50; + } + +- spin_lock(&gc->bgpio_lock); ++ raw_spin_lock(&gc->bgpio_lock); + + db_en = readl(priv->reg_base + MEN_Z127_DBER); + +@@ -79,7 +79,7 @@ static int men_z127_debounce(struct gpio_chip *gc, unsigned gpio, + writel(db_en, priv->reg_base + MEN_Z127_DBER); + writel(db_cnt, priv->reg_base + GPIO_TO_DBCNT_REG(gpio)); + +- spin_unlock(&gc->bgpio_lock); ++ raw_spin_unlock(&gc->bgpio_lock); + + return 0; + } +@@ -91,7 +91,7 @@ static int men_z127_set_single_ended(struct gpio_chip *gc, + struct men_z127_gpio *priv = gpiochip_get_data(gc); + u32 od_en; + +- spin_lock(&gc->bgpio_lock); ++ raw_spin_lock(&gc->bgpio_lock); + od_en = readl(priv->reg_base + MEN_Z127_ODER); + + if (param == PIN_CONFIG_DRIVE_OPEN_DRAIN) +@@ -101,7 +101,7 @@ static int men_z127_set_single_ended(struct gpio_chip *gc, + od_en &= ~BIT(offset); + + writel(od_en, priv->reg_base + MEN_Z127_ODER); +- spin_unlock(&gc->bgpio_lock); ++ raw_spin_unlock(&gc->bgpio_lock); + + return 0; + } +diff --git a/drivers/gpio/gpio-mlxbf2.c b/drivers/gpio/gpio-mlxbf2.c +index 40a052bc67849..5a09070e5f78c 100644 +--- a/drivers/gpio/gpio-mlxbf2.c ++++ b/drivers/gpio/gpio-mlxbf2.c +@@ -120,7 +120,7 @@ static int mlxbf2_gpio_lock_acquire(struct mlxbf2_gpio_context *gs) + u32 arm_gpio_lock_val; + + mutex_lock(yu_arm_gpio_lock_param.lock); +- spin_lock(&gs->gc.bgpio_lock); ++ raw_spin_lock(&gs->gc.bgpio_lock); + + arm_gpio_lock_val = readl(yu_arm_gpio_lock_param.io); + +@@ -128,7 +128,7 @@ static int mlxbf2_gpio_lock_acquire(struct mlxbf2_gpio_context *gs) + * When lock active bit[31] is set, ModeX is write enabled + */ + if (YU_LOCK_ACTIVE_BIT(arm_gpio_lock_val)) { +- spin_unlock(&gs->gc.bgpio_lock); ++ raw_spin_unlock(&gs->gc.bgpio_lock); + mutex_unlock(yu_arm_gpio_lock_param.lock); + return -EINVAL; + } +@@ -146,7 +146,7 @@ static void mlxbf2_gpio_lock_release(struct mlxbf2_gpio_context *gs) + __releases(yu_arm_gpio_lock_param.lock) + { + writel(YU_ARM_GPIO_LOCK_RELEASE, yu_arm_gpio_lock_param.io); +- spin_unlock(&gs->gc.bgpio_lock); ++ raw_spin_unlock(&gs->gc.bgpio_lock); + mutex_unlock(yu_arm_gpio_lock_param.lock); + } + +diff --git a/drivers/gpio/gpio-mmio.c b/drivers/gpio/gpio-mmio.c +index c335a0309ba31..d9dff3dc92ae5 100644 +--- a/drivers/gpio/gpio-mmio.c ++++ b/drivers/gpio/gpio-mmio.c +@@ -220,7 +220,7 @@ static void bgpio_set(struct gpio_chip *gc, unsigned int gpio, int val) + unsigned long mask = bgpio_line2mask(gc, gpio); + unsigned long flags; + +- spin_lock_irqsave(&gc->bgpio_lock, flags); ++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags); + + if (val) + gc->bgpio_data |= mask; +@@ -229,7 +229,7 @@ static void bgpio_set(struct gpio_chip *gc, unsigned int gpio, int val) + + gc->write_reg(gc->reg_dat, gc->bgpio_data); + +- spin_unlock_irqrestore(&gc->bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags); + } + + static void bgpio_set_with_clear(struct gpio_chip *gc, unsigned int gpio, +@@ -248,7 +248,7 @@ static void bgpio_set_set(struct gpio_chip *gc, unsigned int gpio, int val) + unsigned long mask = bgpio_line2mask(gc, gpio); + unsigned long flags; + +- spin_lock_irqsave(&gc->bgpio_lock, flags); ++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags); + + if (val) + gc->bgpio_data |= mask; +@@ -257,7 +257,7 @@ static void bgpio_set_set(struct gpio_chip *gc, unsigned int gpio, int val) + + gc->write_reg(gc->reg_set, gc->bgpio_data); + +- spin_unlock_irqrestore(&gc->bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags); + } + + static void bgpio_multiple_get_masks(struct gpio_chip *gc, +@@ -286,7 +286,7 @@ static void bgpio_set_multiple_single_reg(struct gpio_chip *gc, + unsigned long flags; + unsigned long set_mask, clear_mask; + +- spin_lock_irqsave(&gc->bgpio_lock, flags); ++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags); + + bgpio_multiple_get_masks(gc, mask, bits, &set_mask, &clear_mask); + +@@ -295,7 +295,7 @@ static void bgpio_set_multiple_single_reg(struct gpio_chip *gc, + + gc->write_reg(reg, gc->bgpio_data); + +- spin_unlock_irqrestore(&gc->bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags); + } + + static void bgpio_set_multiple(struct gpio_chip *gc, unsigned long *mask, +@@ -347,7 +347,7 @@ static int bgpio_dir_in(struct gpio_chip *gc, unsigned int gpio) + { + unsigned long flags; + +- spin_lock_irqsave(&gc->bgpio_lock, flags); ++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags); + + gc->bgpio_dir &= ~bgpio_line2mask(gc, gpio); + +@@ -356,7 +356,7 @@ static int bgpio_dir_in(struct gpio_chip *gc, unsigned int gpio) + if (gc->reg_dir_out) + gc->write_reg(gc->reg_dir_out, gc->bgpio_dir); + +- spin_unlock_irqrestore(&gc->bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags); + + return 0; + } +@@ -387,7 +387,7 @@ static void bgpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val) + { + unsigned long flags; + +- spin_lock_irqsave(&gc->bgpio_lock, flags); ++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags); + + gc->bgpio_dir |= bgpio_line2mask(gc, gpio); + +@@ -396,7 +396,7 @@ static void bgpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val) + if (gc->reg_dir_out) + gc->write_reg(gc->reg_dir_out, gc->bgpio_dir); + +- spin_unlock_irqrestore(&gc->bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags); + } + + static int bgpio_dir_out_dir_first(struct gpio_chip *gc, unsigned int gpio, +@@ -610,7 +610,7 @@ int bgpio_init(struct gpio_chip *gc, struct device *dev, + if (gc->bgpio_bits > BITS_PER_LONG) + return -EINVAL; + +- spin_lock_init(&gc->bgpio_lock); ++ raw_spin_lock_init(&gc->bgpio_lock); + gc->parent = dev; + gc->label = dev_name(dev); + gc->base = -1; diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c index d26bff29157b5..0bded5853c41b 100644 --- a/drivers/gpio/gpio-mockup.c @@ -93637,6 +115007,71 @@ index 8f429d9f36616..a245bfd5a6173 100644 spin_lock_init(&mvpwm->lock); +diff --git a/drivers/gpio/gpio-mxc.c b/drivers/gpio/gpio-mxc.c +index c871602fc5ba9..853d9aa6b3b1f 100644 +--- a/drivers/gpio/gpio-mxc.c ++++ b/drivers/gpio/gpio-mxc.c +@@ -18,6 +18,7 @@ + #include <linux/module.h> + #include <linux/platform_device.h> + #include <linux/slab.h> ++#include <linux/spinlock.h> + #include <linux/syscore_ops.h> + #include <linux/gpio/driver.h> + #include <linux/of.h> +@@ -147,6 +148,7 @@ static int gpio_set_irq_type(struct irq_data *d, u32 type) + { + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + struct mxc_gpio_port *port = gc->private; ++ unsigned long flags; + u32 bit, val; + u32 gpio_idx = d->hwirq; + int edge; +@@ -185,6 +187,8 @@ static int gpio_set_irq_type(struct irq_data *d, u32 type) + return -EINVAL; + } + ++ raw_spin_lock_irqsave(&port->gc.bgpio_lock, flags); ++ + if (GPIO_EDGE_SEL >= 0) { + val = readl(port->base + GPIO_EDGE_SEL); + if (edge == GPIO_INT_BOTH_EDGES) +@@ -204,15 +208,20 @@ static int gpio_set_irq_type(struct irq_data *d, u32 type) + + writel(1 << gpio_idx, port->base + GPIO_ISR); + +- return 0; ++ raw_spin_unlock_irqrestore(&port->gc.bgpio_lock, flags); ++ ++ return port->gc.direction_input(&port->gc, gpio_idx); + } + + static void mxc_flip_edge(struct mxc_gpio_port *port, u32 gpio) + { + void __iomem *reg = port->base; ++ unsigned long flags; + u32 bit, val; + int edge; + ++ raw_spin_lock_irqsave(&port->gc.bgpio_lock, flags); ++ + reg += GPIO_ICR1 + ((gpio & 0x10) >> 2); /* lower or upper register */ + bit = gpio & 0xf; + val = readl(reg); +@@ -227,9 +236,12 @@ static void mxc_flip_edge(struct mxc_gpio_port *port, u32 gpio) + } else { + pr_err("mxc: invalid configuration for GPIO %d: %x\n", + gpio, edge); +- return; ++ goto unlock; + } + writel(val | (edge << (bit << 1)), reg); ++ ++unlock: ++ raw_spin_unlock_irqrestore(&port->gc.bgpio_lock, flags); + } + + /* handle 32 interrupts in one status register */ diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index d2fe76f3f34fd..4860bf3b7e002 100644 --- a/drivers/gpio/gpio-pca953x.c @@ -93780,7 +115215,7 @@ index eeeb39bc171dc..bd75401b549d1 100644 chained_irq_exit(irq_chip, desc); diff --git a/drivers/gpio/gpio-rockchip.c b/drivers/gpio/gpio-rockchip.c -index ce63cbd14d69a..d32928c1efe0f 100644 +index ce63cbd14d69a..a197f698efebb 100644 --- a/drivers/gpio/gpio-rockchip.c +++ b/drivers/gpio/gpio-rockchip.c @@ -19,6 +19,8 @@ @@ -93888,7 +115323,15 @@ index ce63cbd14d69a..d32928c1efe0f 100644 } rockchip_gpio_writel(bank, level, bank->gpio_regs->int_type); -@@ -689,7 +699,7 @@ static int rockchip_gpio_probe(struct platform_device *pdev) +@@ -595,6 +605,7 @@ static int rockchip_gpiolib_register(struct rockchip_pin_bank *bank) + return -ENODATA; + + pctldev = of_pinctrl_get(pctlnp); ++ of_node_put(pctlnp); + if (!pctldev) + return -ENODEV; + +@@ -689,7 +700,7 @@ static int rockchip_gpio_probe(struct platform_device *pdev) struct device_node *pctlnp = of_get_parent(np); struct pinctrl_dev *pctldev = NULL; struct rockchip_pin_bank *bank = NULL; @@ -93897,7 +115340,7 @@ index ce63cbd14d69a..d32928c1efe0f 100644 static int gpio; int id, ret; -@@ -730,15 +740,22 @@ static int rockchip_gpio_probe(struct platform_device *pdev) +@@ -730,15 +741,22 @@ static int rockchip_gpio_probe(struct platform_device *pdev) return ret; } @@ -93928,10 +115371,68 @@ index ce63cbd14d69a..d32928c1efe0f 100644 } diff --git a/drivers/gpio/gpio-sifive.c b/drivers/gpio/gpio-sifive.c -index 403f9e833d6a3..7d82388b4ab7c 100644 +index 403f9e833d6a3..f41123de69c59 100644 --- a/drivers/gpio/gpio-sifive.c +++ b/drivers/gpio/gpio-sifive.c -@@ -223,7 +223,7 @@ static int sifive_gpio_probe(struct platform_device *pdev) +@@ -44,7 +44,7 @@ static void sifive_gpio_set_ie(struct sifive_gpio *chip, unsigned int offset) + unsigned long flags; + unsigned int trigger; + +- spin_lock_irqsave(&chip->gc.bgpio_lock, flags); ++ raw_spin_lock_irqsave(&chip->gc.bgpio_lock, flags); + trigger = (chip->irq_state & BIT(offset)) ? chip->trigger[offset] : 0; + regmap_update_bits(chip->regs, SIFIVE_GPIO_RISE_IE, BIT(offset), + (trigger & IRQ_TYPE_EDGE_RISING) ? BIT(offset) : 0); +@@ -54,7 +54,7 @@ static void sifive_gpio_set_ie(struct sifive_gpio *chip, unsigned int offset) + (trigger & IRQ_TYPE_LEVEL_HIGH) ? BIT(offset) : 0); + regmap_update_bits(chip->regs, SIFIVE_GPIO_LOW_IE, BIT(offset), + (trigger & IRQ_TYPE_LEVEL_LOW) ? BIT(offset) : 0); +- spin_unlock_irqrestore(&chip->gc.bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&chip->gc.bgpio_lock, flags); + } + + static int sifive_gpio_irq_set_type(struct irq_data *d, unsigned int trigger) +@@ -84,13 +84,13 @@ static void sifive_gpio_irq_enable(struct irq_data *d) + /* Switch to input */ + gc->direction_input(gc, offset); + +- spin_lock_irqsave(&gc->bgpio_lock, flags); ++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags); + /* Clear any sticky pending interrupts */ + regmap_write(chip->regs, SIFIVE_GPIO_RISE_IP, bit); + regmap_write(chip->regs, SIFIVE_GPIO_FALL_IP, bit); + regmap_write(chip->regs, SIFIVE_GPIO_HIGH_IP, bit); + regmap_write(chip->regs, SIFIVE_GPIO_LOW_IP, bit); +- spin_unlock_irqrestore(&gc->bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags); + + /* Enable interrupts */ + assign_bit(offset, &chip->irq_state, 1); +@@ -116,13 +116,13 @@ static void sifive_gpio_irq_eoi(struct irq_data *d) + u32 bit = BIT(offset); + unsigned long flags; + +- spin_lock_irqsave(&gc->bgpio_lock, flags); ++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags); + /* Clear all pending interrupts */ + regmap_write(chip->regs, SIFIVE_GPIO_RISE_IP, bit); + regmap_write(chip->regs, SIFIVE_GPIO_FALL_IP, bit); + regmap_write(chip->regs, SIFIVE_GPIO_HIGH_IP, bit); + regmap_write(chip->regs, SIFIVE_GPIO_LOW_IP, bit); +- spin_unlock_irqrestore(&gc->bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags); + + irq_chip_eoi_parent(d); + } +@@ -209,6 +209,7 @@ static int sifive_gpio_probe(struct platform_device *pdev) + return -ENODEV; + } + parent = irq_find_host(irq_parent); ++ of_node_put(irq_parent); + if (!parent) { + dev_err(dev, "no IRQ parent domain\n"); + return -ENODEV; +@@ -223,7 +224,7 @@ static int sifive_gpio_probe(struct platform_device *pdev) NULL, chip->base + SIFIVE_GPIO_OUTPUT_EN, chip->base + SIFIVE_GPIO_INPUT_EN, @@ -93940,6 +115441,27 @@ index 403f9e833d6a3..7d82388b4ab7c 100644 if (ret) { dev_err(dev, "unable to init generic GPIO\n"); return ret; +diff --git a/drivers/gpio/gpio-tb10x.c b/drivers/gpio/gpio-tb10x.c +index 718a508d3b2f8..de6afa3f97168 100644 +--- a/drivers/gpio/gpio-tb10x.c ++++ b/drivers/gpio/gpio-tb10x.c +@@ -62,14 +62,14 @@ static inline void tb10x_set_bits(struct tb10x_gpio *gpio, unsigned int offs, + u32 r; + unsigned long flags; + +- spin_lock_irqsave(&gpio->gc.bgpio_lock, flags); ++ raw_spin_lock_irqsave(&gpio->gc.bgpio_lock, flags); + + r = tb10x_reg_read(gpio, offs); + r = (r & ~mask) | (val & mask); + + tb10x_reg_write(gpio, offs, r); + +- spin_unlock_irqrestore(&gpio->gc.bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&gpio->gc.bgpio_lock, flags); + } + + static int tb10x_gpio_to_irq(struct gpio_chip *chip, unsigned offset) diff --git a/drivers/gpio/gpio-tegra186.c b/drivers/gpio/gpio-tegra186.c index c99858f40a27e..00762de3d4096 100644 --- a/drivers/gpio/gpio-tegra186.c @@ -94217,10 +115739,130 @@ index 47712b6903b51..53be0bdf2bc38 100644 return irq; } diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c -index c7b5446d01fd2..937e7a8dd8a96 100644 +index c7b5446d01fd2..2a2e0691462bf 100644 --- a/drivers/gpio/gpiolib-cdev.c +++ b/drivers/gpio/gpiolib-cdev.c -@@ -330,7 +330,7 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip) +@@ -54,6 +54,50 @@ static_assert(IS_ALIGNED(sizeof(struct gpio_v2_line_values), 8)); + * interface to gpiolib GPIOs via ioctl()s. + */ + ++typedef __poll_t (*poll_fn)(struct file *, struct poll_table_struct *); ++typedef long (*ioctl_fn)(struct file *, unsigned int, unsigned long); ++typedef ssize_t (*read_fn)(struct file *, char __user *, ++ size_t count, loff_t *); ++ ++static __poll_t call_poll_locked(struct file *file, ++ struct poll_table_struct *wait, ++ struct gpio_device *gdev, poll_fn func) ++{ ++ __poll_t ret; ++ ++ down_read(&gdev->sem); ++ ret = func(file, wait); ++ up_read(&gdev->sem); ++ ++ return ret; ++} ++ ++static long call_ioctl_locked(struct file *file, unsigned int cmd, ++ unsigned long arg, struct gpio_device *gdev, ++ ioctl_fn func) ++{ ++ long ret; ++ ++ down_read(&gdev->sem); ++ ret = func(file, cmd, arg); ++ up_read(&gdev->sem); ++ ++ return ret; ++} ++ ++static ssize_t call_read_locked(struct file *file, char __user *buf, ++ size_t count, loff_t *f_ps, ++ struct gpio_device *gdev, read_fn func) ++{ ++ ssize_t ret; ++ ++ down_read(&gdev->sem); ++ ret = func(file, buf, count, f_ps); ++ up_read(&gdev->sem); ++ ++ return ret; ++} ++ + /* + * GPIO line handle management + */ +@@ -190,23 +234,25 @@ static long linehandle_set_config(struct linehandle_state *lh, + return 0; + } + +-static long linehandle_ioctl(struct file *file, unsigned int cmd, +- unsigned long arg) ++static long linehandle_ioctl_unlocked(struct file *file, unsigned int cmd, ++ unsigned long arg) + { + struct linehandle_state *lh = file->private_data; + void __user *ip = (void __user *)arg; + struct gpiohandle_data ghd; + DECLARE_BITMAP(vals, GPIOHANDLES_MAX); +- int i; ++ unsigned int i; ++ int ret; + +- if (cmd == GPIOHANDLE_GET_LINE_VALUES_IOCTL) { +- /* NOTE: It's ok to read values of output lines. */ +- int ret = gpiod_get_array_value_complex(false, +- true, +- lh->num_descs, +- lh->descs, +- NULL, +- vals); ++ if (!lh->gdev->chip) ++ return -ENODEV; ++ ++ switch (cmd) { ++ case GPIOHANDLE_GET_LINE_VALUES_IOCTL: ++ /* NOTE: It's okay to read values of output lines */ ++ ret = gpiod_get_array_value_complex(false, true, ++ lh->num_descs, lh->descs, ++ NULL, vals); + if (ret) + return ret; + +@@ -218,7 +264,7 @@ static long linehandle_ioctl(struct file *file, unsigned int cmd, + return -EFAULT; + + return 0; +- } else if (cmd == GPIOHANDLE_SET_LINE_VALUES_IOCTL) { ++ case GPIOHANDLE_SET_LINE_VALUES_IOCTL: + /* + * All line descriptors were created at once with the same + * flags so just check if the first one is really output. +@@ -240,10 +286,20 @@ static long linehandle_ioctl(struct file *file, unsigned int cmd, + lh->descs, + NULL, + vals); +- } else if (cmd == GPIOHANDLE_SET_CONFIG_IOCTL) { ++ case GPIOHANDLE_SET_CONFIG_IOCTL: + return linehandle_set_config(lh, ip); ++ default: ++ return -EINVAL; + } +- return -EINVAL; ++} ++ ++static long linehandle_ioctl(struct file *file, unsigned int cmd, ++ unsigned long arg) ++{ ++ struct linehandle_state *lh = file->private_data; ++ ++ return call_ioctl_locked(file, cmd, arg, lh->gdev, ++ linehandle_ioctl_unlocked); + } + + #ifdef CONFIG_COMPAT +@@ -330,7 +386,7 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip) goto out_free_lh; } @@ -94229,7 +115871,112 @@ index c7b5446d01fd2..937e7a8dd8a96 100644 if (ret) goto out_free_lh; lh->descs[i] = desc; -@@ -1378,7 +1378,7 @@ static int linereq_create(struct gpio_device *gdev, void __user *ip) +@@ -1182,20 +1238,34 @@ static long linereq_set_config(struct linereq *lr, void __user *ip) + return ret; + } + +-static long linereq_ioctl(struct file *file, unsigned int cmd, +- unsigned long arg) ++static long linereq_ioctl_unlocked(struct file *file, unsigned int cmd, ++ unsigned long arg) + { + struct linereq *lr = file->private_data; + void __user *ip = (void __user *)arg; + +- if (cmd == GPIO_V2_LINE_GET_VALUES_IOCTL) ++ if (!lr->gdev->chip) ++ return -ENODEV; ++ ++ switch (cmd) { ++ case GPIO_V2_LINE_GET_VALUES_IOCTL: + return linereq_get_values(lr, ip); +- else if (cmd == GPIO_V2_LINE_SET_VALUES_IOCTL) ++ case GPIO_V2_LINE_SET_VALUES_IOCTL: + return linereq_set_values(lr, ip); +- else if (cmd == GPIO_V2_LINE_SET_CONFIG_IOCTL) ++ case GPIO_V2_LINE_SET_CONFIG_IOCTL: + return linereq_set_config(lr, ip); ++ default: ++ return -EINVAL; ++ } ++} + +- return -EINVAL; ++static long linereq_ioctl(struct file *file, unsigned int cmd, ++ unsigned long arg) ++{ ++ struct linereq *lr = file->private_data; ++ ++ return call_ioctl_locked(file, cmd, arg, lr->gdev, ++ linereq_ioctl_unlocked); + } + + #ifdef CONFIG_COMPAT +@@ -1206,12 +1276,15 @@ static long linereq_ioctl_compat(struct file *file, unsigned int cmd, + } + #endif + +-static __poll_t linereq_poll(struct file *file, +- struct poll_table_struct *wait) ++static __poll_t linereq_poll_unlocked(struct file *file, ++ struct poll_table_struct *wait) + { + struct linereq *lr = file->private_data; + __poll_t events = 0; + ++ if (!lr->gdev->chip) ++ return EPOLLHUP | EPOLLERR; ++ + poll_wait(file, &lr->wait, wait); + + if (!kfifo_is_empty_spinlocked_noirqsave(&lr->events, +@@ -1221,16 +1294,25 @@ static __poll_t linereq_poll(struct file *file, + return events; + } + +-static ssize_t linereq_read(struct file *file, +- char __user *buf, +- size_t count, +- loff_t *f_ps) ++static __poll_t linereq_poll(struct file *file, ++ struct poll_table_struct *wait) ++{ ++ struct linereq *lr = file->private_data; ++ ++ return call_poll_locked(file, wait, lr->gdev, linereq_poll_unlocked); ++} ++ ++static ssize_t linereq_read_unlocked(struct file *file, char __user *buf, ++ size_t count, loff_t *f_ps) + { + struct linereq *lr = file->private_data; + struct gpio_v2_line_event le; + ssize_t bytes_read = 0; + int ret; + ++ if (!lr->gdev->chip) ++ return -ENODEV; ++ + if (count < sizeof(le)) + return -EINVAL; + +@@ -1275,6 +1357,15 @@ static ssize_t linereq_read(struct file *file, + return bytes_read; + } + ++static ssize_t linereq_read(struct file *file, char __user *buf, ++ size_t count, loff_t *f_ps) ++{ ++ struct linereq *lr = file->private_data; ++ ++ return call_read_locked(file, buf, count, f_ps, lr->gdev, ++ linereq_read_unlocked); ++} ++ + static void linereq_free(struct linereq *lr) + { + unsigned int i; +@@ -1378,7 +1469,7 @@ static int linereq_create(struct gpio_device *gdev, void __user *ip) goto out_free_linereq; } @@ -94238,7 +115985,112 @@ index c7b5446d01fd2..937e7a8dd8a96 100644 if (ret) goto out_free_linereq; -@@ -1764,7 +1764,7 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip) +@@ -1490,12 +1581,15 @@ struct lineevent_state { + (GPIOEVENT_REQUEST_RISING_EDGE | \ + GPIOEVENT_REQUEST_FALLING_EDGE) + +-static __poll_t lineevent_poll(struct file *file, +- struct poll_table_struct *wait) ++static __poll_t lineevent_poll_unlocked(struct file *file, ++ struct poll_table_struct *wait) + { + struct lineevent_state *le = file->private_data; + __poll_t events = 0; + ++ if (!le->gdev->chip) ++ return EPOLLHUP | EPOLLERR; ++ + poll_wait(file, &le->wait, wait); + + if (!kfifo_is_empty_spinlocked_noirqsave(&le->events, &le->wait.lock)) +@@ -1504,15 +1598,21 @@ static __poll_t lineevent_poll(struct file *file, + return events; + } + ++static __poll_t lineevent_poll(struct file *file, ++ struct poll_table_struct *wait) ++{ ++ struct lineevent_state *le = file->private_data; ++ ++ return call_poll_locked(file, wait, le->gdev, lineevent_poll_unlocked); ++} ++ + struct compat_gpioeevent_data { + compat_u64 timestamp; + u32 id; + }; + +-static ssize_t lineevent_read(struct file *file, +- char __user *buf, +- size_t count, +- loff_t *f_ps) ++static ssize_t lineevent_read_unlocked(struct file *file, char __user *buf, ++ size_t count, loff_t *f_ps) + { + struct lineevent_state *le = file->private_data; + struct gpioevent_data ge; +@@ -1520,6 +1620,9 @@ static ssize_t lineevent_read(struct file *file, + ssize_t ge_size; + int ret; + ++ if (!le->gdev->chip) ++ return -ENODEV; ++ + /* + * When compatible system call is being used the struct gpioevent_data, + * in case of at least ia32, has different size due to the alignment +@@ -1577,6 +1680,15 @@ static ssize_t lineevent_read(struct file *file, + return bytes_read; + } + ++static ssize_t lineevent_read(struct file *file, char __user *buf, ++ size_t count, loff_t *f_ps) ++{ ++ struct lineevent_state *le = file->private_data; ++ ++ return call_read_locked(file, buf, count, f_ps, le->gdev, ++ lineevent_read_unlocked); ++} ++ + static void lineevent_free(struct lineevent_state *le) + { + if (le->irq) +@@ -1594,13 +1706,16 @@ static int lineevent_release(struct inode *inode, struct file *file) + return 0; + } + +-static long lineevent_ioctl(struct file *file, unsigned int cmd, +- unsigned long arg) ++static long lineevent_ioctl_unlocked(struct file *file, unsigned int cmd, ++ unsigned long arg) + { + struct lineevent_state *le = file->private_data; + void __user *ip = (void __user *)arg; + struct gpiohandle_data ghd; + ++ if (!le->gdev->chip) ++ return -ENODEV; ++ + /* + * We can get the value for an event line but not set it, + * because it is input by definition. +@@ -1623,6 +1738,15 @@ static long lineevent_ioctl(struct file *file, unsigned int cmd, + return -EINVAL; + } + ++static long lineevent_ioctl(struct file *file, unsigned int cmd, ++ unsigned long arg) ++{ ++ struct lineevent_state *le = file->private_data; ++ ++ return call_ioctl_locked(file, cmd, arg, le->gdev, ++ lineevent_ioctl_unlocked); ++} ++ + #ifdef CONFIG_COMPAT + static long lineevent_ioctl_compat(struct file *file, unsigned int cmd, + unsigned long arg) +@@ -1764,7 +1888,7 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip) } } @@ -94247,7 +116099,7 @@ index c7b5446d01fd2..937e7a8dd8a96 100644 if (ret) goto out_free_le; le->desc = desc; -@@ -1784,7 +1784,6 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip) +@@ -1784,7 +1908,6 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip) ret = -ENODEV; goto out_free_le; } @@ -94255,7 +116107,7 @@ index c7b5446d01fd2..937e7a8dd8a96 100644 if (eflags & GPIOEVENT_REQUEST_RISING_EDGE) irqflags |= test_bit(FLAG_ACTIVE_LOW, &desc->flags) ? -@@ -1798,7 +1797,7 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip) +@@ -1798,7 +1921,7 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip) init_waitqueue_head(&le->wait); /* Request a thread to read the events */ @@ -94264,7 +116116,7 @@ index c7b5446d01fd2..937e7a8dd8a96 100644 lineevent_irq_handler, lineevent_irq_thread, irqflags, -@@ -1807,6 +1806,8 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip) +@@ -1807,6 +1930,8 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip) if (ret) goto out_free_le; @@ -94273,6 +116125,154 @@ index c7b5446d01fd2..937e7a8dd8a96 100644 fd = get_unused_fd_flags(O_RDONLY | O_CLOEXEC); if (fd < 0) { ret = fd; +@@ -2113,28 +2238,30 @@ static long gpio_ioctl(struct file *file, unsigned int cmd, unsigned long arg) + return -ENODEV; + + /* Fill in the struct and pass to userspace */ +- if (cmd == GPIO_GET_CHIPINFO_IOCTL) { ++ switch (cmd) { ++ case GPIO_GET_CHIPINFO_IOCTL: + return chipinfo_get(cdev, ip); + #ifdef CONFIG_GPIO_CDEV_V1 +- } else if (cmd == GPIO_GET_LINEHANDLE_IOCTL) { ++ case GPIO_GET_LINEHANDLE_IOCTL: + return linehandle_create(gdev, ip); +- } else if (cmd == GPIO_GET_LINEEVENT_IOCTL) { ++ case GPIO_GET_LINEEVENT_IOCTL: + return lineevent_create(gdev, ip); +- } else if (cmd == GPIO_GET_LINEINFO_IOCTL || +- cmd == GPIO_GET_LINEINFO_WATCH_IOCTL) { +- return lineinfo_get_v1(cdev, ip, +- cmd == GPIO_GET_LINEINFO_WATCH_IOCTL); ++ case GPIO_GET_LINEINFO_IOCTL: ++ return lineinfo_get_v1(cdev, ip, false); ++ case GPIO_GET_LINEINFO_WATCH_IOCTL: ++ return lineinfo_get_v1(cdev, ip, true); + #endif /* CONFIG_GPIO_CDEV_V1 */ +- } else if (cmd == GPIO_V2_GET_LINEINFO_IOCTL || +- cmd == GPIO_V2_GET_LINEINFO_WATCH_IOCTL) { +- return lineinfo_get(cdev, ip, +- cmd == GPIO_V2_GET_LINEINFO_WATCH_IOCTL); +- } else if (cmd == GPIO_V2_GET_LINE_IOCTL) { ++ case GPIO_V2_GET_LINEINFO_IOCTL: ++ return lineinfo_get(cdev, ip, false); ++ case GPIO_V2_GET_LINEINFO_WATCH_IOCTL: ++ return lineinfo_get(cdev, ip, true); ++ case GPIO_V2_GET_LINE_IOCTL: + return linereq_create(gdev, ip); +- } else if (cmd == GPIO_GET_LINEINFO_UNWATCH_IOCTL) { ++ case GPIO_GET_LINEINFO_UNWATCH_IOCTL: + return lineinfo_unwatch(cdev, ip); ++ default: ++ return -EINVAL; + } +- return -EINVAL; + } + + #ifdef CONFIG_COMPAT +@@ -2176,12 +2303,15 @@ static int lineinfo_changed_notify(struct notifier_block *nb, + return NOTIFY_OK; + } + +-static __poll_t lineinfo_watch_poll(struct file *file, +- struct poll_table_struct *pollt) ++static __poll_t lineinfo_watch_poll_unlocked(struct file *file, ++ struct poll_table_struct *pollt) + { + struct gpio_chardev_data *cdev = file->private_data; + __poll_t events = 0; + ++ if (!cdev->gdev->chip) ++ return EPOLLHUP | EPOLLERR; ++ + poll_wait(file, &cdev->wait, pollt); + + if (!kfifo_is_empty_spinlocked_noirqsave(&cdev->events, +@@ -2191,8 +2321,17 @@ static __poll_t lineinfo_watch_poll(struct file *file, + return events; + } + +-static ssize_t lineinfo_watch_read(struct file *file, char __user *buf, +- size_t count, loff_t *off) ++static __poll_t lineinfo_watch_poll(struct file *file, ++ struct poll_table_struct *pollt) ++{ ++ struct gpio_chardev_data *cdev = file->private_data; ++ ++ return call_poll_locked(file, pollt, cdev->gdev, ++ lineinfo_watch_poll_unlocked); ++} ++ ++static ssize_t lineinfo_watch_read_unlocked(struct file *file, char __user *buf, ++ size_t count, loff_t *off) + { + struct gpio_chardev_data *cdev = file->private_data; + struct gpio_v2_line_info_changed event; +@@ -2200,6 +2339,9 @@ static ssize_t lineinfo_watch_read(struct file *file, char __user *buf, + int ret; + size_t event_size; + ++ if (!cdev->gdev->chip) ++ return -ENODEV; ++ + #ifndef CONFIG_GPIO_CDEV_V1 + event_size = sizeof(struct gpio_v2_line_info_changed); + if (count < event_size) +@@ -2267,6 +2409,15 @@ static ssize_t lineinfo_watch_read(struct file *file, char __user *buf, + return bytes_read; + } + ++static ssize_t lineinfo_watch_read(struct file *file, char __user *buf, ++ size_t count, loff_t *off) ++{ ++ struct gpio_chardev_data *cdev = file->private_data; ++ ++ return call_read_locked(file, buf, count, off, cdev->gdev, ++ lineinfo_watch_read_unlocked); ++} ++ + /** + * gpio_chrdev_open() - open the chardev for ioctl operations + * @inode: inode for this chardev +@@ -2280,13 +2431,17 @@ static int gpio_chrdev_open(struct inode *inode, struct file *file) + struct gpio_chardev_data *cdev; + int ret = -ENOMEM; + ++ down_read(&gdev->sem); ++ + /* Fail on open if the backing gpiochip is gone */ +- if (!gdev->chip) +- return -ENODEV; ++ if (!gdev->chip) { ++ ret = -ENODEV; ++ goto out_unlock; ++ } + + cdev = kzalloc(sizeof(*cdev), GFP_KERNEL); + if (!cdev) +- return -ENOMEM; ++ goto out_unlock; + + cdev->watched_lines = bitmap_zalloc(gdev->chip->ngpio, GFP_KERNEL); + if (!cdev->watched_lines) +@@ -2309,6 +2464,8 @@ static int gpio_chrdev_open(struct inode *inode, struct file *file) + if (ret) + goto out_unregister_notifier; + ++ up_read(&gdev->sem); ++ + return ret; + + out_unregister_notifier: +@@ -2318,6 +2475,8 @@ out_free_bitmap: + bitmap_free(cdev->watched_lines); + out_free_cdev: + kfree(cdev); ++out_unlock: ++ up_read(&gdev->sem); + return ret; + } + diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 0ad288ab6262d..7a96eb626a08b 100644 --- a/drivers/gpio/gpiolib-of.c @@ -94336,10 +116336,173 @@ index 4098bc7f88b7e..44c1ad51b3fe9 100644 status = gpiod_set_transitory(desc, false); if (!status) { diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c -index d1b9b721218f2..320baed949ee8 100644 +index d1b9b721218f2..8c041a8dd9d8f 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c -@@ -1368,6 +1368,16 @@ static int gpiochip_to_irq(struct gpio_chip *gc, unsigned int offset) +@@ -189,9 +189,8 @@ static int gpiochip_find_base(int ngpio) + /* found a free space? */ + if (gdev->base + gdev->ngpio <= base) + break; +- else +- /* nope, check the space right before the chip */ +- base = gdev->base - ngpio; ++ /* nope, check the space right before the chip */ ++ base = gdev->base - ngpio; + } + + if (gpio_is_valid(base)) { +@@ -525,12 +524,13 @@ static int gpiochip_setup_dev(struct gpio_device *gdev) + if (ret) + return ret; + ++ /* From this point, the .release() function cleans up gpio_device */ ++ gdev->dev.release = gpiodevice_release; ++ + ret = gpiochip_sysfs_register(gdev); + if (ret) + goto err_remove_device; + +- /* From this point, the .release() function cleans up gpio_device */ +- gdev->dev.release = gpiodevice_release; + dev_dbg(&gdev->dev, "registered GPIOs %d to %d on %s\n", gdev->base, + gdev->base + gdev->ngpio - 1, gdev->chip->label ? : "generic"); + +@@ -594,11 +594,12 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, + struct lock_class_key *request_key) + { + struct fwnode_handle *fwnode = gc->parent ? dev_fwnode(gc->parent) : NULL; +- unsigned long flags; +- int ret = 0; +- unsigned i; +- int base = gc->base; + struct gpio_device *gdev; ++ unsigned long flags; ++ unsigned int i; ++ u32 ngpios = 0; ++ int base = 0; ++ int ret = 0; + + /* + * First: allocate and populate the internal stat container, and +@@ -640,22 +641,43 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, + else + gdev->owner = THIS_MODULE; + +- gdev->descs = kcalloc(gc->ngpio, sizeof(gdev->descs[0]), GFP_KERNEL); +- if (!gdev->descs) { +- ret = -ENOMEM; +- goto err_free_dev_name; ++ /* ++ * Try the device properties if the driver didn't supply the number ++ * of GPIO lines. ++ */ ++ ngpios = gc->ngpio; ++ if (ngpios == 0) { ++ ret = device_property_read_u32(&gdev->dev, "ngpios", &ngpios); ++ if (ret == -ENODATA) ++ /* ++ * -ENODATA means that there is no property found and ++ * we want to issue the error message to the user. ++ * Besides that, we want to return different error code ++ * to state that supplied value is not valid. ++ */ ++ ngpios = 0; ++ else if (ret) ++ goto err_free_dev_name; ++ ++ gc->ngpio = ngpios; + } + + if (gc->ngpio == 0) { + chip_err(gc, "tried to insert a GPIO chip with zero lines\n"); + ret = -EINVAL; +- goto err_free_descs; ++ goto err_free_dev_name; + } + + if (gc->ngpio > FASTPATH_NGPIO) + chip_warn(gc, "line cnt %u is greater than fast path cnt %u\n", + gc->ngpio, FASTPATH_NGPIO); + ++ gdev->descs = kcalloc(gc->ngpio, sizeof(*gdev->descs), GFP_KERNEL); ++ if (!gdev->descs) { ++ ret = -ENOMEM; ++ goto err_free_dev_name; ++ } ++ + gdev->label = kstrdup_const(gc->label ?: "unknown", GFP_KERNEL); + if (!gdev->label) { + ret = -ENOMEM; +@@ -674,11 +696,13 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, + * it may be a pipe dream. It will not happen before we get rid + * of the sysfs interface anyways. + */ ++ base = gc->base; + if (base < 0) { + base = gpiochip_find_base(gc->ngpio); + if (base < 0) { +- ret = base; + spin_unlock_irqrestore(&gpio_lock, flags); ++ ret = base; ++ base = 0; + goto err_free_label; + } + /* +@@ -703,6 +727,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, + spin_unlock_irqrestore(&gpio_lock, flags); + + BLOCKING_INIT_NOTIFIER_HEAD(&gdev->notifier); ++ init_rwsem(&gdev->sem); + + #ifdef CONFIG_PINCTRL + INIT_LIST_HEAD(&gdev->pin_ranges); +@@ -786,6 +811,11 @@ err_remove_of_chip: + err_free_gpiochip_mask: + gpiochip_remove_pin_ranges(gc); + gpiochip_free_valid_mask(gc); ++ if (gdev->dev.release) { ++ /* release() has been registered by gpiochip_setup_dev() */ ++ put_device(&gdev->dev); ++ goto err_print_message; ++ } + err_remove_from_list: + spin_lock_irqsave(&gpio_lock, flags); + list_del(&gdev->list); +@@ -799,13 +829,14 @@ err_free_dev_name: + err_free_ida: + ida_free(&gpio_ida, gdev->id); + err_free_gdev: ++ kfree(gdev); ++err_print_message: + /* failures here can mean systems won't boot... */ + if (ret != -EPROBE_DEFER) { + pr_err("%s: GPIOs %d..%d (%s) failed to register, %d\n", __func__, +- gdev->base, gdev->base + gdev->ngpio - 1, ++ base, base + (int)ngpios - 1, + gc->label ? : "generic", ret); + } +- kfree(gdev); + return ret; + } + EXPORT_SYMBOL_GPL(gpiochip_add_data_with_key); +@@ -835,6 +866,8 @@ void gpiochip_remove(struct gpio_chip *gc) + unsigned long flags; + unsigned int i; + ++ down_write(&gdev->sem); ++ + /* FIXME: should the legacy sysfs handling be moved to gpio_device? */ + gpiochip_sysfs_unregister(gdev); + gpiochip_free_hogs(gc); +@@ -869,6 +902,7 @@ void gpiochip_remove(struct gpio_chip *gc) + * gone. + */ + gcdev_unregister(gdev); ++ up_write(&gdev->sem); + put_device(&gdev->dev); + } + EXPORT_SYMBOL_GPL(gpiochip_remove); +@@ -1368,6 +1402,16 @@ static int gpiochip_to_irq(struct gpio_chip *gc, unsigned int offset) { struct irq_domain *domain = gc->irq.domain; @@ -94356,7 +116519,7 @@ index d1b9b721218f2..320baed949ee8 100644 if (!gpiochip_irqchip_irq_valid(gc, offset)) return -ENXIO; -@@ -1550,6 +1560,15 @@ static int gpiochip_add_irqchip(struct gpio_chip *gc, +@@ -1550,6 +1594,15 @@ static int gpiochip_add_irqchip(struct gpio_chip *gc, gpiochip_set_irq_hooks(gc); @@ -94372,7 +116535,7 @@ index d1b9b721218f2..320baed949ee8 100644 acpi_gpiochip_request_interrupts(gc); return 0; -@@ -2186,6 +2205,16 @@ static int gpio_set_bias(struct gpio_desc *desc) +@@ -2186,6 +2239,16 @@ static int gpio_set_bias(struct gpio_desc *desc) return gpio_set_config_with_argument_optional(desc, bias, arg); } @@ -94389,7 +116552,29 @@ index d1b9b721218f2..320baed949ee8 100644 int gpio_set_debounce_timeout(struct gpio_desc *desc, unsigned int debounce) { return gpio_set_config_with_argument_optional(desc, -@@ -3106,6 +3135,16 @@ int gpiod_to_irq(const struct gpio_desc *desc) +@@ -2350,8 +2413,7 @@ int gpiod_direction_output(struct gpio_desc *desc, int value) + ret = gpiod_direction_input(desc); + goto set_output_flag; + } +- } +- else if (test_bit(FLAG_OPEN_SOURCE, &desc->flags)) { ++ } else if (test_bit(FLAG_OPEN_SOURCE, &desc->flags)) { + ret = gpio_set_config(desc, PIN_CONFIG_DRIVE_OPEN_SOURCE); + if (!ret) + goto set_output_value; +@@ -2508,9 +2570,9 @@ static int gpiod_get_raw_value_commit(const struct gpio_desc *desc) + static int gpio_chip_get_multiple(struct gpio_chip *gc, + unsigned long *mask, unsigned long *bits) + { +- if (gc->get_multiple) { ++ if (gc->get_multiple) + return gc->get_multiple(gc, mask, bits); +- } else if (gc->get) { ++ if (gc->get) { + int i, value; + + for_each_set_bit(i, mask, gc->ngpio) { +@@ -3106,6 +3168,16 @@ int gpiod_to_irq(const struct gpio_desc *desc) return retirq; } @@ -94407,10 +116592,84 @@ index d1b9b721218f2..320baed949ee8 100644 } EXPORT_SYMBOL_GPL(gpiod_to_irq); diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h -index 30bc3f80f83e6..c31f4626915de 100644 +index 30bc3f80f83e6..73b732a1d9c94 100644 --- a/drivers/gpio/gpiolib.h +++ b/drivers/gpio/gpiolib.h -@@ -135,6 +135,18 @@ struct gpio_desc { +@@ -15,6 +15,7 @@ + #include <linux/device.h> + #include <linux/module.h> + #include <linux/cdev.h> ++#include <linux/rwsem.h> + + #define GPIOCHIP_NAME "gpiochip" + +@@ -37,6 +38,12 @@ + * or name of the IP component in a System on Chip. + * @data: per-instance data assigned by the driver + * @list: links gpio_device:s together for traversal ++ * @notifier: used to notify subscribers about lines being requested, released ++ * or reconfigured ++ * @sem: protects the structure from a NULL-pointer dereference of @chip by ++ * user-space operations when the device gets unregistered during ++ * a hot-unplug event ++ * @pin_ranges: range of pins served by the GPIO driver + * + * This state container holds most of the runtime variable data + * for a GPIO device and can hold references and live on after the +@@ -57,6 +64,7 @@ struct gpio_device { + void *data; + struct list_head list; + struct blocking_notifier_head notifier; ++ struct rw_semaphore sem; + + #ifdef CONFIG_PINCTRL + /* +@@ -72,6 +80,20 @@ struct gpio_device { + /* gpio suffixes used for ACPI and device tree lookup */ + static __maybe_unused const char * const gpio_suffixes[] = { "gpios", "gpio" }; + ++/** ++ * struct gpio_array - Opaque descriptor for a structure of GPIO array attributes ++ * ++ * @desc: Array of pointers to the GPIO descriptors ++ * @size: Number of elements in desc ++ * @chip: Parent GPIO chip ++ * @get_mask: Get mask used in fastpath ++ * @set_mask: Set mask used in fastpath ++ * @invert_mask: Invert mask used in fastpath ++ * ++ * This structure is attached to struct gpiod_descs obtained from ++ * gpiod_get_array() and can be passed back to get/set array functions in order ++ * to activate fast processing path if applicable. ++ */ + struct gpio_array { + struct gpio_desc **desc; + unsigned int size; +@@ -96,6 +118,23 @@ int gpiod_set_array_value_complex(bool raw, bool can_sleep, + extern spinlock_t gpio_lock; + extern struct list_head gpio_devices; + ++ ++/** ++ * struct gpio_desc - Opaque descriptor for a GPIO ++ * ++ * @gdev: Pointer to the parent GPIO device ++ * @flags: Binary descriptor flags ++ * @label: Name of the consumer ++ * @name: Line name ++ * @hog: Pointer to the device node that hogs this line (if any) ++ * @debounce_period_us: Debounce period in microseconds ++ * ++ * These are obtained using gpiod_get() and are preferable to the old ++ * integer-based handles. ++ * ++ * Contrary to integers, a pointer to a &struct gpio_desc is guaranteed to be ++ * valid until the GPIO is released. ++ */ + struct gpio_desc { + struct gpio_device *gdev; + unsigned long flags; +@@ -135,6 +174,18 @@ struct gpio_desc { int gpiod_request(struct gpio_desc *desc, const char *label); void gpiod_free(struct gpio_desc *desc); @@ -94596,10 +116855,38 @@ index 1d41c2c00623b..5690cb6d27fed 100644 all_hub = true; return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub); +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c +index 46cd4ee6bafb7..f3743089a1c99 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c +@@ -44,5 +44,6 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = { + .get_atc_vmid_pasid_mapping_info = + kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, + .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, ++ .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, + .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings + }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c -index 054c1a224defb..c904269b3e148 100644 +index 054c1a224defb..34303dd3ada96 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +@@ -476,13 +476,13 @@ kfd_mem_dmamap_userptr(struct kgd_mem *mem, + struct ttm_tt *ttm = bo->tbo.ttm; + int ret; + ++ if (WARN_ON(ttm->num_pages != src_ttm->num_pages)) ++ return -EINVAL; ++ + ttm->sg = kmalloc(sizeof(*ttm->sg), GFP_KERNEL); + if (unlikely(!ttm->sg)) + return -ENOMEM; + +- if (WARN_ON(ttm->num_pages != src_ttm->num_pages)) +- return -EINVAL; +- + /* Same sequence as in amdgpu_ttm_tt_pin_userptr */ + ret = sg_alloc_table_from_pages(ttm->sg, src_ttm->pages, + ttm->num_pages, 0, @@ -1318,16 +1318,10 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, struct amdgpu_vm *vm) { @@ -94663,6 +116950,15 @@ index 054c1a224defb..c904269b3e148 100644 if (size) *size = amdgpu_bo_size(bo); +@@ -1918,7 +1910,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, + + ret = drm_vma_node_allow(&obj->vma_node, drm_priv); + if (ret) { +- kfree(mem); ++ kfree(*mem); + return ret; + } + @@ -2397,12 +2389,15 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) process_info->eviction_fence = new_fence; *ef = dma_fence_get(&new_fence->base); @@ -94682,6 +116978,18 @@ index 054c1a224defb..c904269b3e148 100644 /* Attach eviction fence to PD / PT BOs */ list_for_each_entry(peer_vm, &process_info->vm_list_head, vm_list_node) { +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +index 27b19503773b9..71354f505b84b 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +@@ -317,6 +317,7 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev) + + if (!found) + return false; ++ pci_dev_put(pdev); + + adev->bios = kmalloc(size, GFP_KERNEL); + if (!adev->bios) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 15c45b2a39835..714178f1b6c6e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -94709,7 +117017,7 @@ index c905a4cfc173d..044b41f0bfd9c 100644 static inline struct amdgpu_bo_list_entry * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c -index b9c11c2b2885a..4b1d62ebf8ddc 100644 +index b9c11c2b2885a..c777aff164b76 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -175,7 +175,7 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector) @@ -94721,7 +117029,19 @@ index b9c11c2b2885a..4b1d62ebf8ddc 100644 (mode_clock * 5/4 <= max_tmds_clock)) bpc = 10; else -@@ -387,6 +387,9 @@ amdgpu_connector_lcd_native_mode(struct drm_encoder *encoder) +@@ -315,8 +315,10 @@ static void amdgpu_connector_get_edid(struct drm_connector *connector) + if (!amdgpu_connector->edid) { + /* some laptops provide a hardcoded edid in rom for LCDs */ + if (((connector->connector_type == DRM_MODE_CONNECTOR_LVDS) || +- (connector->connector_type == DRM_MODE_CONNECTOR_eDP))) ++ (connector->connector_type == DRM_MODE_CONNECTOR_eDP))) { + amdgpu_connector->edid = amdgpu_connector_get_hardcoded_edid(adev); ++ drm_connector_update_edid_property(connector, amdgpu_connector->edid); ++ } + } + } + +@@ -387,6 +389,9 @@ amdgpu_connector_lcd_native_mode(struct drm_encoder *encoder) native_mode->vdisplay != 0 && native_mode->clock != 0) { mode = drm_mode_duplicate(dev, native_mode); @@ -94731,7 +117051,7 @@ index b9c11c2b2885a..4b1d62ebf8ddc 100644 mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER; drm_mode_set_name(mode); -@@ -401,6 +404,9 @@ amdgpu_connector_lcd_native_mode(struct drm_encoder *encoder) +@@ -401,6 +406,9 @@ amdgpu_connector_lcd_native_mode(struct drm_encoder *encoder) * simpler. */ mode = drm_cvt_mode(dev, native_mode->hdisplay, native_mode->vdisplay, 60, true, false, false); @@ -94741,7 +117061,7 @@ index b9c11c2b2885a..4b1d62ebf8ddc 100644 mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER; DRM_DEBUG_KMS("Adding cvt approximation of native panel mode %s\n", mode->name); } -@@ -827,6 +833,7 @@ static int amdgpu_connector_vga_get_modes(struct drm_connector *connector) +@@ -827,6 +835,7 @@ static int amdgpu_connector_vga_get_modes(struct drm_connector *connector) amdgpu_connector_get_edid(connector); ret = amdgpu_connector_ddc_get_modes(connector); @@ -94749,7 +117069,7 @@ index b9c11c2b2885a..4b1d62ebf8ddc 100644 return ret; } -@@ -1664,10 +1671,12 @@ amdgpu_connector_add(struct amdgpu_device *adev, +@@ -1664,10 +1673,12 @@ amdgpu_connector_add(struct amdgpu_device *adev, adev->mode_info.dither_property, AMDGPU_FMT_DITHER_DISABLE); @@ -94763,7 +117083,7 @@ index b9c11c2b2885a..4b1d62ebf8ddc 100644 subpixel_order = SubPixelHorizontalRGB; connector->interlace_allowed = true; -@@ -1789,6 +1798,7 @@ amdgpu_connector_add(struct amdgpu_device *adev, +@@ -1789,6 +1800,7 @@ amdgpu_connector_add(struct amdgpu_device *adev, drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.audio_property, AMDGPU_AUDIO_AUTO); @@ -94771,7 +117091,7 @@ index b9c11c2b2885a..4b1d62ebf8ddc 100644 } drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.dither_property, -@@ -1842,6 +1852,7 @@ amdgpu_connector_add(struct amdgpu_device *adev, +@@ -1842,6 +1854,7 @@ amdgpu_connector_add(struct amdgpu_device *adev, drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.audio_property, AMDGPU_AUDIO_AUTO); @@ -94779,7 +117099,7 @@ index b9c11c2b2885a..4b1d62ebf8ddc 100644 } drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.dither_property, -@@ -1892,6 +1903,7 @@ amdgpu_connector_add(struct amdgpu_device *adev, +@@ -1892,6 +1905,7 @@ amdgpu_connector_add(struct amdgpu_device *adev, drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.audio_property, AMDGPU_AUDIO_AUTO); @@ -94964,18 +117284,29 @@ index 141a8474e24f2..8b641f40fdf66 100644 void amdgpu_debugfs_gem_init(struct amdgpu_device *adev); -int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c -index af9bdf16eefd4..ac4dabcde33f8 100644 +index af9bdf16eefd4..b5fe2c91f58c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c -@@ -30,6 +30,7 @@ +@@ -30,7 +30,9 @@ #include <linux/module.h> #include <linux/console.h> #include <linux/slab.h> +#include <linux/pci.h> ++#include <drm/drm_aperture.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_probe_helper.h> -@@ -1308,6 +1309,31 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev) + #include <drm/amdgpu_drm.h> +@@ -88,6 +90,8 @@ MODULE_FIRMWARE("amdgpu/yellow_carp_gpu_info.bin"); + + #define AMDGPU_RESUME_MS 2000 + ++static const struct drm_driver amdgpu_kms_driver; ++ + const char *amdgpu_asic_name[] = { + "TAHITI", + "PITCAIRN", +@@ -1308,6 +1312,31 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev) return true; } @@ -95007,7 +117338,7 @@ index af9bdf16eefd4..ac4dabcde33f8 100644 /* if we get transitioned to only one device, take VGA back */ /** * amdgpu_device_vga_set_decode - enable/disable vga decode -@@ -2069,6 +2095,8 @@ out: +@@ -2069,6 +2098,8 @@ out: */ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) { @@ -95016,7 +117347,7 @@ index af9bdf16eefd4..ac4dabcde33f8 100644 int i, r; amdgpu_device_enable_virtual_display(adev); -@@ -2168,6 +2196,18 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) +@@ -2168,6 +2199,18 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) return -EINVAL; } @@ -95035,7 +117366,7 @@ index af9bdf16eefd4..ac4dabcde33f8 100644 amdgpu_amdkfd_device_probe(adev); adev->pm.pp_feature = amdgpu_pp_feature_mask; -@@ -2348,8 +2388,20 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) +@@ -2348,8 +2391,20 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) } adev->ip_blocks[i].status.sw = true; @@ -95058,7 +117389,7 @@ index af9bdf16eefd4..ac4dabcde33f8 100644 r = amdgpu_device_vram_scratch_init(adev); if (r) { DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r); -@@ -2394,6 +2446,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) +@@ -2394,6 +2449,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (r) goto init_failed; @@ -95069,7 +117400,7 @@ index af9bdf16eefd4..ac4dabcde33f8 100644 r = amdgpu_device_ip_hw_init_phase1(adev); if (r) goto init_failed; -@@ -2432,10 +2488,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) +@@ -2432,10 +2491,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (!adev->gmc.xgmi.pending_reset) amdgpu_amdkfd_device_init(adev); @@ -95080,7 +117411,7 @@ index af9bdf16eefd4..ac4dabcde33f8 100644 amdgpu_fru_get_product_info(adev); init_failed: -@@ -2745,6 +2797,11 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) +@@ -2745,6 +2800,11 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = false; } @@ -95092,7 +117423,7 @@ index af9bdf16eefd4..ac4dabcde33f8 100644 return 0; } -@@ -2805,10 +2862,6 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) +@@ -2805,10 +2865,6 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) amdgpu_ras_fini(adev); @@ -95103,7 +117434,7 @@ index af9bdf16eefd4..ac4dabcde33f8 100644 return 0; } -@@ -2992,8 +3045,8 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) +@@ -2992,8 +3048,8 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) int i, r; static enum amd_ip_block_type ip_order[] = { @@ -95113,7 +117444,7 @@ index af9bdf16eefd4..ac4dabcde33f8 100644 AMD_IP_BLOCK_TYPE_PSP, AMD_IP_BLOCK_TYPE_IH, }; -@@ -3084,7 +3137,8 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) +@@ -3084,7 +3140,8 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) continue; if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || @@ -95123,7 +117454,23 @@ index af9bdf16eefd4..ac4dabcde33f8 100644 r = adev->ip_blocks[i].version->funcs->resume(adev); if (r) { -@@ -3531,6 +3585,9 @@ int amdgpu_device_init(struct amdgpu_device *adev, +@@ -3131,6 +3188,15 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) + return r; + } + adev->ip_blocks[i].status.hw = true; ++ ++ if (adev->in_s0ix && adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { ++ /* disable gfxoff for IP resume. The gfxoff will be re-enabled in ++ * amdgpu_device_resume() after IP resume. ++ */ ++ amdgpu_gfx_off_ctrl(adev, false); ++ DRM_DEBUG("will disable gfxoff for re-initializing other blocks\n"); ++ } ++ + } + + return 0; +@@ -3531,6 +3597,9 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->rmmio_size = pci_resource_len(adev->pdev, 2); } @@ -95133,7 +117480,19 @@ index af9bdf16eefd4..ac4dabcde33f8 100644 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size); if (adev->rmmio == NULL) { return -ENOMEM; -@@ -3850,7 +3907,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) +@@ -3571,6 +3640,11 @@ int amdgpu_device_init(struct amdgpu_device *adev, + if (r) + return r; + ++ /* Get rid of things like offb */ ++ r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver); ++ if (r) ++ return r; ++ + /* doorbell bar mapping and doorbell index init*/ + amdgpu_device_doorbell_init(adev); + +@@ -3850,7 +3924,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) /* disable all interrupts */ amdgpu_irq_disable_all(adev); if (adev->mode_info.mode_config_initialized){ @@ -95142,7 +117501,7 @@ index af9bdf16eefd4..ac4dabcde33f8 100644 drm_helper_force_disable_all(adev_to_drm(adev)); else drm_atomic_helper_shutdown(adev_to_drm(adev)); -@@ -3876,8 +3933,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) +@@ -3876,8 +3950,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) void amdgpu_device_fini_sw(struct amdgpu_device *adev) { @@ -95152,7 +117511,7 @@ index af9bdf16eefd4..ac4dabcde33f8 100644 release_firmware(adev->firmware.gpu_info_fw); adev->firmware.gpu_info_fw = NULL; adev->accel_working = false; -@@ -3909,6 +3966,25 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) +@@ -3909,6 +3983,25 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) } @@ -95178,7 +117537,7 @@ index af9bdf16eefd4..ac4dabcde33f8 100644 /* * Suspend & resume. -@@ -3926,12 +4002,20 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) +@@ -3926,12 +4019,20 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) { struct amdgpu_device *adev = drm_to_adev(dev); @@ -95199,7 +117558,7 @@ index af9bdf16eefd4..ac4dabcde33f8 100644 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3)) DRM_WARN("smart shift update failed\n"); -@@ -3949,17 +4033,19 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) +@@ -3949,17 +4050,19 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) if (!adev->in_s0ix) amdgpu_amdkfd_suspend(adev, adev->in_runpm); @@ -95225,7 +117584,7 @@ index af9bdf16eefd4..ac4dabcde33f8 100644 return 0; } -@@ -3979,6 +4065,12 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) +@@ -3979,6 +4082,12 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) struct amdgpu_device *adev = drm_to_adev(dev); int r = 0; @@ -95238,7 +117597,7 @@ index af9bdf16eefd4..ac4dabcde33f8 100644 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; -@@ -3993,6 +4085,13 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) +@@ -3993,6 +4102,13 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) } r = amdgpu_device_ip_resume(adev); @@ -95252,7 +117611,21 @@ index af9bdf16eefd4..ac4dabcde33f8 100644 if (r) { dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r); return r; -@@ -4466,10 +4565,6 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, +@@ -4015,6 +4131,13 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) + /* Make sure IB tests flushed */ + flush_delayed_work(&adev->delayed_init_work); + ++ if (adev->in_s0ix) { ++ /* re-enable gfxoff after IP resume. This re-enables gfxoff after ++ * it was disabled for IP resume in amdgpu_device_ip_resume_phase2(). ++ */ ++ amdgpu_gfx_off_ctrl(adev, true); ++ DRM_DEBUG("will enable gfxoff for the mission mode\n"); ++ } + if (fbcon) + amdgpu_fbdev_set_suspend(adev, 0); + +@@ -4466,10 +4589,6 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, if (reset_context->reset_req_dev == adev) job = reset_context->job; @@ -95263,7 +117636,32 @@ index af9bdf16eefd4..ac4dabcde33f8 100644 if (amdgpu_sriov_vf(adev)) { /* stop the data exchange thread */ amdgpu_virt_fini_data_exchange(adev); -@@ -5130,7 +5225,7 @@ skip_hw_reset: +@@ -4791,6 +4910,8 @@ static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev) + pm_runtime_enable(&(p->dev)); + pm_runtime_resume(&(p->dev)); + } ++ ++ pci_dev_put(p); + } + + static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev) +@@ -4829,6 +4950,7 @@ static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev) + + if (expires < ktime_get_mono_fast_ns()) { + dev_warn(adev->dev, "failed to suspend display audio\n"); ++ pci_dev_put(p); + /* TODO: abort the succeeding gpu reset? */ + return -ETIMEDOUT; + } +@@ -4836,6 +4958,7 @@ static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev) + + pm_runtime_disable(&(p->dev)); + ++ pci_dev_put(p); + return 0; + } + +@@ -5130,7 +5253,7 @@ skip_hw_reset: drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res); } @@ -95272,7 +117670,7 @@ index af9bdf16eefd4..ac4dabcde33f8 100644 drm_helper_resume_force_mode(adev_to_drm(tmp_adev)); } -@@ -5610,7 +5705,7 @@ void amdgpu_device_flush_hdp(struct amdgpu_device *adev, +@@ -5610,7 +5733,7 @@ void amdgpu_device_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { #ifdef CONFIG_X86_64 @@ -95281,7 +117679,7 @@ index af9bdf16eefd4..ac4dabcde33f8 100644 return; #endif if (adev->gmc.xgmi.connected_to_cpu) -@@ -5626,7 +5721,7 @@ void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev, +@@ -5626,7 +5749,7 @@ void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { #ifdef CONFIG_X86_64 @@ -95422,10 +117820,18 @@ index ae6ab93c868b8..7444484a12bf8 100644 r = ttm_bo_validate(&bo->tbo, &placement, &ctx); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c -index f18240f873878..28dea2eb61c7f 100644 +index f18240f873878..4df888c7e2fff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c -@@ -38,6 +38,7 @@ +@@ -23,7 +23,6 @@ + */ + + #include <drm/amdgpu_drm.h> +-#include <drm/drm_aperture.h> + #include <drm/drm_drv.h> + #include <drm/drm_gem.h> + #include <drm/drm_vblank.h> +@@ -38,6 +37,7 @@ #include <drm/drm_probe_helper.h> #include <linux/mmu_notifier.h> #include <linux/suspend.h> @@ -95433,7 +117839,7 @@ index f18240f873878..28dea2eb61c7f 100644 #include "amdgpu.h" #include "amdgpu_irq.h" -@@ -679,7 +680,7 @@ MODULE_PARM_DESC(sched_policy, +@@ -679,7 +679,7 @@ MODULE_PARM_DESC(sched_policy, * Maximum number of processes that HWS can schedule concurrently. The maximum is the * number of VMIDs assigned to the HWS, which is also the default. */ @@ -95442,7 +117848,7 @@ index f18240f873878..28dea2eb61c7f 100644 module_param(hws_max_conc_proc, int, 0444); MODULE_PARM_DESC(hws_max_conc_proc, "Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))"); -@@ -890,6 +891,717 @@ MODULE_PARM_DESC(smu_pptable_id, +@@ -890,6 +890,717 @@ MODULE_PARM_DESC(smu_pptable_id, "specify pptable id to be used (-1 = auto(default) value, 0 = use pptable from vbios, > 0 = soft pptable id)"); module_param_named(smu_pptable_id, amdgpu_smu_pptable_id, int, 0444); @@ -96160,7 +118566,22 @@ index f18240f873878..28dea2eb61c7f 100644 static const struct pci_device_id pciidlist[] = { #ifdef CONFIG_DRM_AMDGPU_SI {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, -@@ -1237,6 +1949,7 @@ static const struct pci_device_id pciidlist[] = { +@@ -1224,10 +1935,10 @@ static const struct pci_device_id pciidlist[] = { + {0x1002, 0x73FF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, + + /* Aldebaran */ +- {0x1002, 0x7408, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT}, +- {0x1002, 0x740C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT}, +- {0x1002, 0x740F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT}, +- {0x1002, 0x7410, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT}, ++ {0x1002, 0x7408, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN}, ++ {0x1002, 0x740C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN}, ++ {0x1002, 0x740F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN}, ++ {0x1002, 0x7410, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN}, + + /* CYAN_SKILLFISH */ + {0x1002, 0x13FE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU}, +@@ -1237,6 +1948,7 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x7421, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, {0x1002, 0x7422, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, {0x1002, 0x7423, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, @@ -96168,7 +118589,7 @@ index f18240f873878..28dea2eb61c7f 100644 {0x1002, 0x743F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, {0, 0, 0} -@@ -1246,14 +1959,45 @@ MODULE_DEVICE_TABLE(pci, pciidlist); +@@ -1246,14 +1958,45 @@ MODULE_DEVICE_TABLE(pci, pciidlist); static const struct drm_driver amdgpu_kms_driver; @@ -96215,18 +118636,37 @@ index f18240f873878..28dea2eb61c7f 100644 if (amdgpu_virtual_display || amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK)) -@@ -1310,6 +2054,10 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, +@@ -1264,6 +2007,15 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, + "See modparam exp_hw_support\n"); + return -ENODEV; + } ++ /* differentiate between P10 and P11 asics with the same DID */ ++ if (pdev->device == 0x67FF && ++ (pdev->revision == 0xE3 || ++ pdev->revision == 0xE7 || ++ pdev->revision == 0xF3 || ++ pdev->revision == 0xF7)) { ++ flags &= ~AMD_ASIC_MASK; ++ flags |= CHIP_POLARIS10; ++ } + + /* Due to hardware bugs, S/G Display on raven requires a 1:1 IOMMU mapping, + * however, SME requires an indirect IOMMU mapping because the encryption +@@ -1310,10 +2062,9 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, } #endif +- /* Get rid of things like offb */ +- ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &amdgpu_kms_driver); +- if (ret) +- return ret; + base = pci_resource_start(pdev, 0); + size = pci_resource_len(pdev, 0); + is_fw_fb = amdgpu_is_fw_framebuffer(base, size); -+ - /* Get rid of things like offb */ - ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &amdgpu_kms_driver); - if (ret) -@@ -1322,6 +2070,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, + + adev = devm_drm_dev_alloc(&pdev->dev, &amdgpu_kms_driver, typeof(*adev), ddev); + if (IS_ERR(adev)) +@@ -1322,6 +2073,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, adev->dev = &pdev->dev; adev->pdev = pdev; ddev = adev_to_drm(adev); @@ -96234,7 +118674,22 @@ index f18240f873878..28dea2eb61c7f 100644 if (!supports_atomic) ddev->driver_features &= ~DRIVER_ATOMIC; -@@ -1471,13 +2220,20 @@ static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work) +@@ -1332,12 +2084,12 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, + + pci_set_drvdata(pdev, ddev); + +- ret = amdgpu_driver_load_kms(adev, ent->driver_data); ++ ret = amdgpu_driver_load_kms(adev, flags); + if (ret) + goto err_pci; + + retry_init: +- ret = drm_dev_register(ddev, ent->driver_data); ++ ret = drm_dev_register(ddev, flags); + if (ret == -EAGAIN && ++retry <= 3) { + DRM_INFO("retry init %d\n", retry); + /* Don't request EX mode too frequently which is attacking */ +@@ -1471,13 +2223,20 @@ static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work) static int amdgpu_pmops_prepare(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); @@ -96257,7 +118712,7 @@ index f18240f873878..28dea2eb61c7f 100644 return 0; } -@@ -1491,15 +2247,23 @@ static int amdgpu_pmops_suspend(struct device *dev) +@@ -1491,15 +2250,23 @@ static int amdgpu_pmops_suspend(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); @@ -96286,7 +118741,7 @@ index f18240f873878..28dea2eb61c7f 100644 } static int amdgpu_pmops_resume(struct device *dev) -@@ -1511,6 +2275,8 @@ static int amdgpu_pmops_resume(struct device *dev) +@@ -1511,6 +2278,8 @@ static int amdgpu_pmops_resume(struct device *dev) r = amdgpu_device_resume(drm_dev, true); if (amdgpu_acpi_is_s0ix_active(adev)) adev->in_s0ix = false; @@ -96295,7 +118750,7 @@ index f18240f873878..28dea2eb61c7f 100644 return r; } -@@ -1575,12 +2341,27 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) +@@ -1575,12 +2344,27 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) if (amdgpu_device_supports_px(drm_dev)) drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; @@ -96323,7 +118778,7 @@ index f18240f873878..28dea2eb61c7f 100644 if (amdgpu_device_supports_px(drm_dev)) { /* Only need to handle PCI state in the driver for ATPX * PCI core handles it for _PR3. -@@ -1634,8 +2415,11 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) +@@ -1634,8 +2418,11 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) amdgpu_device_baco_exit(drm_dev); } ret = amdgpu_device_resume(drm_dev, false); @@ -96336,7 +118791,7 @@ index f18240f873878..28dea2eb61c7f 100644 if (amdgpu_device_supports_px(drm_dev)) drm_dev->switch_power_state = DRM_SWITCH_POWER_ON; -@@ -1719,6 +2503,7 @@ static const struct dev_pm_ops amdgpu_pm_ops = { +@@ -1719,6 +2506,7 @@ static const struct dev_pm_ops amdgpu_pm_ops = { .prepare = amdgpu_pmops_prepare, .complete = amdgpu_pmops_complete, .suspend = amdgpu_pmops_suspend, @@ -96359,7 +118814,7 @@ index cd0acbea75da6..d58ab9deb0280 100644 drm_fb_helper_initial_config(&rfbdev->helper, bpp_sel); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c -index 8d682befe0d68..14499f0de32dc 100644 +index 8d682befe0d68..bbd6f7a123033 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -552,9 +552,6 @@ void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev) @@ -96372,7 +118827,22 @@ index 8d682befe0d68..14499f0de32dc 100644 /* You can't wait for HW to signal if it's gone */ if (!drm_dev_is_unplugged(&adev->ddev)) r = amdgpu_fence_wait_empty(ring); -@@ -614,11 +611,6 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev) +@@ -582,7 +579,13 @@ void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev) + if (!ring || !ring->fence_drv.initialized) + continue; + +- if (!ring->no_scheduler) ++ /* ++ * Notice we check for sched.ops since there's some ++ * override on the meaning of sched.ready by amdgpu. ++ * The natural check would be sched.ready, which is ++ * set as drm_sched_init() finishes... ++ */ ++ if (ring->sched.ops) + drm_sched_fini(&ring->sched); + + for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j) +@@ -614,11 +617,6 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev) if (!ring || !ring->fence_drv.initialized) continue; @@ -96385,7 +118855,7 @@ index 8d682befe0d68..14499f0de32dc 100644 if (ring->fence_drv.irq_src) amdgpu_irq_get(adev, ring->fence_drv.irq_src, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c -index d6aa032890ee8..a1e63ba4c54a5 100644 +index d6aa032890ee8..13ca51ff8bd0b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -61,7 +61,7 @@ static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf) @@ -96397,8 +118867,23 @@ index d6aa032890ee8..a1e63ba4c54a5 100644 drm_dev_exit(idx); } else { +@@ -419,11 +419,9 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, + if (r) + goto release_object; + +- if (args->flags & AMDGPU_GEM_USERPTR_REGISTER) { +- r = amdgpu_mn_register(bo, args->addr); +- if (r) +- goto release_object; +- } ++ r = amdgpu_mn_register(bo, args->addr); ++ if (r) ++ goto release_object; + + if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) { + r = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c -index 1916ec84dd71f..e7845df6cad22 100644 +index 1916ec84dd71f..5e32906f9819a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -266,7 +266,7 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, @@ -96410,6 +118895,24 @@ index 1916ec84dd71f..e7845df6cad22 100644 if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap)) continue; +@@ -580,10 +580,14 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) + if (adev->gfx.gfx_off_req_count == 0 && + !adev->gfx.gfx_off_state) { + /* If going to s2idle, no need to wait */ +- if (adev->in_s0ix) +- delay = GFX_OFF_NO_DELAY; +- schedule_delayed_work(&adev->gfx.gfx_off_delay_work, ++ if (adev->in_s0ix) { ++ if (!amdgpu_dpm_set_powergating_by_smu(adev, ++ AMD_IP_BLOCK_TYPE_GFX, true)) ++ adev->gfx.gfx_off_state = true; ++ } else { ++ schedule_delayed_work(&adev->gfx.gfx_off_delay_work, + delay); ++ } + } + } else { + if (adev->gfx.gfx_off_req_count == 0) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index f3d62e196901a..0c7963dfacad1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -96432,10 +118935,28 @@ index f3d62e196901a..0c7963dfacad1 100644 /* Order reading of wptr vs. reading of IH ring data */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c -index 7e45640fbee02..6744427577b36 100644 +index 7e45640fbee02..43e30b9a2e024 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c -@@ -152,21 +152,10 @@ static void amdgpu_get_audio_func(struct amdgpu_device *adev) +@@ -43,6 +43,17 @@ + #include "amdgpu_display.h" + #include "amdgpu_ras.h" + ++static void amdgpu_runtime_pm_quirk(struct amdgpu_device *adev) ++{ ++ /* ++ * Add below quirk on several sienna_cichlid cards to disable ++ * runtime pm to fix EMI failures. ++ */ ++ if (((adev->pdev->device == 0x73A1) && (adev->pdev->revision == 0x00)) || ++ ((adev->pdev->device == 0x73BF) && (adev->pdev->revision == 0xCF))) ++ adev->runpm = false; ++} ++ + void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev) + { + struct amdgpu_gpu_instance *gpu_instance; +@@ -152,21 +163,10 @@ static void amdgpu_get_audio_func(struct amdgpu_device *adev) int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) { struct drm_device *dev; @@ -96457,7 +118978,7 @@ index 7e45640fbee02..6744427577b36 100644 /* amdgpu_device_init should report only fatal error * like memory allocation failure or iomapping failure, * or memory manager initialization failure, it must -@@ -206,6 +195,12 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) +@@ -206,6 +206,15 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) adev->runpm = true; break; } @@ -96467,6 +118988,9 @@ index 7e45640fbee02..6744427577b36 100644 + */ + if (adev->is_fw_fb) + adev->runpm = false; ++ ++ amdgpu_runtime_pm_quirk(adev); ++ if (adev->runpm) dev_info(adev->dev, "Using BACO for runtime pm\n"); } @@ -96781,8 +119305,22 @@ index abd8469380e51..0ed0736d515aa 100644 &adev->firmware.fw_buf_mc, &adev->firmware.fw_buf_ptr); } +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +index 008a308a4ecaf..0c10222707902 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +@@ -149,6 +149,9 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) + break; + case CHIP_VANGOGH: + fw_name = FIRMWARE_VANGOGH; ++ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && ++ (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) ++ adev->vcn.indirect_sram = true; + break; + case CHIP_DIMGREY_CAVEFISH: + fw_name = FIRMWARE_DIMGREY_CAVEFISH; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c -index ca058fbcccd43..a0803425b4566 100644 +index ca058fbcccd43..b508126a9738f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -24,6 +24,7 @@ @@ -96857,7 +119395,7 @@ index ca058fbcccd43..a0803425b4566 100644 void amdgpu_detect_virtualization(struct amdgpu_device *adev) { uint32_t reg; -@@ -694,7 +701,8 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev) +@@ -694,10 +701,17 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev) adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV; if (!reg) { @@ -96867,11 +119405,57 @@ index ca058fbcccd43..a0803425b4566 100644 adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; } ++ if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID) ++ /* VF MMIO access (except mailbox range) from CPU ++ * will be blocked during sriov runtime ++ */ ++ adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT; ++ + /* we have the ability to check now */ + if (amdgpu_sriov_vf(adev)) { + switch (adev->asic_type) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h -index 8d4c20bb71c59..9adfb8d63280a 100644 +index 8d4c20bb71c59..4af3610f4a827 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h -@@ -308,6 +308,7 @@ int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev); +@@ -31,6 +31,7 @@ + #define AMDGPU_SRIOV_CAPS_IS_VF (1 << 2) /* this GPU is a virtual function */ + #define AMDGPU_PASSTHROUGH_MODE (1 << 3) /* thw whole GPU is pass through for VM */ + #define AMDGPU_SRIOV_CAPS_RUNTIME (1 << 4) /* is out of full access mode */ ++#define AMDGPU_VF_MMIO_ACCESS_PROTECT (1 << 5) /* MMIO write access is not allowed in sriov runtime */ + + /* all asic after AI use this offset */ + #define mmRCC_IOV_FUNC_IDENTIFIER 0xDE5 +@@ -61,6 +62,8 @@ struct amdgpu_vf_error_buffer { + uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE]; + }; + ++enum idh_request; ++ + /** + * struct amdgpu_virt_ops - amdgpu device virt operations + */ +@@ -70,7 +73,8 @@ struct amdgpu_virt_ops { + int (*req_init_data)(struct amdgpu_device *adev); + int (*reset_gpu)(struct amdgpu_device *adev); + int (*wait_reset)(struct amdgpu_device *adev); +- void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3); ++ void (*trans_msg)(struct amdgpu_device *adev, enum idh_request req, ++ u32 data1, u32 data2, u32 data3); + }; + + /* +@@ -278,6 +282,9 @@ struct amdgpu_video_codec_info; + #define amdgpu_passthrough(adev) \ + ((adev)->virt.caps & AMDGPU_PASSTHROUGH_MODE) + ++#define amdgpu_sriov_vf_mmio_access_protection(adev) \ ++((adev)->virt.caps & AMDGPU_VF_MMIO_ACCESS_PROTECT) ++ + static inline bool is_virtual_machine(void) + { + #ifdef CONFIG_X86 +@@ -308,6 +315,7 @@ int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev); void amdgpu_virt_free_mm_table(struct amdgpu_device *adev); void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev); void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev); @@ -96914,7 +119498,7 @@ index ce982afeff913..7d58bf410be05 100644 kfree(adev->mode_info.bios_hardcoded_edid); kfree(adev->amdgpu_vkms_output); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c -index 6b15cad78de9d..fd37bb39774c8 100644 +index 6b15cad78de9d..01710cd0d9727 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -768,11 +768,17 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, @@ -96937,6 +119521,19 @@ index 6b15cad78de9d..fd37bb39774c8 100644 } /** +@@ -3218,7 +3224,11 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) + */ + #ifdef CONFIG_X86_64 + if (amdgpu_vm_update_mode == -1) { +- if (amdgpu_gmc_vram_full_visible(&adev->gmc)) ++ /* For asic with VF MMIO access protection ++ * avoid using CPU for VM table updates ++ */ ++ if (amdgpu_gmc_vram_full_visible(&adev->gmc) && ++ !amdgpu_sriov_vf_mmio_access_protection(adev)) + adev->vm_manager.vm_update_mode = + AMDGPU_VM_USE_CPU_FOR_COMPUTE; + else diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 978ac927ac11d..ce0b9cb61f582 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -97582,7 +120179,7 @@ index 01efda4398e56..6e277236b44fb 100644 if (!(adev->flags & AMD_IS_APU) && diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c -index 8931000dcd418..9014f71d52ddf 100644 +index 8931000dcd418..f14f7bb3cf0c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -770,8 +770,8 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring) @@ -97596,24 +120193,70 @@ index 8931000dcd418..9014f71d52ddf 100644 ring->wptr_offs, lower_32_bits(ring->wptr << 2), upper_32_bits(ring->wptr << 2)); -@@ -2062,6 +2062,10 @@ static int sdma_v4_0_suspend(void *handle) +@@ -978,13 +978,13 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se + + + /** +- * sdma_v4_0_gfx_stop - stop the gfx async dma engines ++ * sdma_v4_0_gfx_enable - enable the gfx async dma engines + * + * @adev: amdgpu_device pointer +- * +- * Stop the gfx async dma ring buffers (VEGA10). ++ * @enable: enable SDMA RB/IB ++ * control the gfx async dma ring buffers (VEGA10). + */ +-static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev) ++static void sdma_v4_0_gfx_enable(struct amdgpu_device *adev, bool enable) + { + struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; + u32 rb_cntl, ib_cntl; +@@ -999,10 +999,10 @@ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev) + } + + rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL); +- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); ++ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, enable ? 1 : 0); + WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl); + ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL); +- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); ++ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, enable ? 1 : 0); + WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl); + } + } +@@ -1129,7 +1129,7 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable) + int i; + + if (!enable) { +- sdma_v4_0_gfx_stop(adev); ++ sdma_v4_0_gfx_enable(adev, enable); + sdma_v4_0_rlc_stop(adev); + if (adev->sdma.has_page_queue) + sdma_v4_0_page_stop(adev); +@@ -2062,6 +2062,12 @@ static int sdma_v4_0_suspend(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* SMU saves SDMA state for us */ -+ if (adev->in_s0ix) ++ if (adev->in_s0ix) { ++ sdma_v4_0_gfx_enable(adev, false); + return 0; ++ } + return sdma_v4_0_hw_fini(adev); } -@@ -2069,6 +2073,10 @@ static int sdma_v4_0_resume(void *handle) +@@ -2069,6 +2075,14 @@ static int sdma_v4_0_resume(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* SMU restores SDMA state for us */ -+ if (adev->in_s0ix) ++ if (adev->in_s0ix) { ++ sdma_v4_0_enable(adev, true); ++ sdma_v4_0_gfx_enable(adev, true); ++ amdgpu_ttm_set_buffer_funcs_status(adev, true); + return 0; ++ } + return sdma_v4_0_hw_init(adev); } @@ -98628,6 +121271,113 @@ index 3eea4edee355d..b8bdd796cd911 100644 for (i = 0; (event_waiters) && (i < num_events) ; i++) { init_wait(&event_waiters[i].wait); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +index 4a16e3c257b92..131d98c600eed 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +@@ -780,7 +780,7 @@ svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc, + static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) + { + unsigned long addr = vmf->address; +- struct vm_area_struct *vma; ++ struct svm_range_bo *svm_bo; + enum svm_work_list_ops op; + struct svm_range *parent; + struct svm_range *prange; +@@ -788,24 +788,42 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) + struct mm_struct *mm; + int r = 0; + +- vma = vmf->vma; +- mm = vma->vm_mm; ++ svm_bo = vmf->page->zone_device_data; ++ if (!svm_bo) { ++ pr_debug("failed get device page at addr 0x%lx\n", addr); ++ return VM_FAULT_SIGBUS; ++ } ++ if (!mmget_not_zero(svm_bo->eviction_fence->mm)) { ++ pr_debug("addr 0x%lx of process mm is detroyed\n", addr); ++ return VM_FAULT_SIGBUS; ++ } + +- p = kfd_lookup_process_by_mm(vma->vm_mm); ++ mm = svm_bo->eviction_fence->mm; ++ if (mm != vmf->vma->vm_mm) ++ pr_debug("addr 0x%lx is COW mapping in child process\n", addr); ++ ++ p = kfd_lookup_process_by_mm(mm); + if (!p) { + pr_debug("failed find process at fault address 0x%lx\n", addr); +- return VM_FAULT_SIGBUS; ++ r = VM_FAULT_SIGBUS; ++ goto out_mmput; + } +- addr >>= PAGE_SHIFT; ++ if (READ_ONCE(p->svms.faulting_task) == current) { ++ pr_debug("skipping ram migration\n"); ++ r = 0; ++ goto out_unref_process; ++ } ++ + pr_debug("CPU page fault svms 0x%p address 0x%lx\n", &p->svms, addr); ++ addr >>= PAGE_SHIFT; + + mutex_lock(&p->svms.lock); + + prange = svm_range_from_addr(&p->svms, addr, &parent); + if (!prange) { +- pr_debug("cannot find svm range at 0x%lx\n", addr); ++ pr_debug("failed get range svms 0x%p addr 0x%lx\n", &p->svms, addr); + r = -EFAULT; +- goto out; ++ goto out_unlock_svms; + } + + mutex_lock(&parent->migrate_mutex); +@@ -827,10 +845,10 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) + goto out_unlock_prange; + } + +- r = svm_migrate_vram_to_ram(prange, mm); ++ r = svm_migrate_vram_to_ram(prange, vmf->vma->vm_mm); + if (r) +- pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r, +- prange, prange->start, prange->last); ++ pr_debug("failed %d migrate svms 0x%p range 0x%p [0x%lx 0x%lx]\n", ++ r, prange->svms, prange, prange->start, prange->last); + + /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */ + if (p->xnack_enabled && parent == prange) +@@ -844,12 +862,13 @@ out_unlock_prange: + if (prange != parent) + mutex_unlock(&prange->migrate_mutex); + mutex_unlock(&parent->migrate_mutex); +-out: ++out_unlock_svms: + mutex_unlock(&p->svms.lock); +- kfd_unref_process(p); +- ++out_unref_process: + pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr); +- ++ kfd_unref_process(p); ++out_mmput: ++ mmput(mm); + return r ? VM_FAULT_SIGBUS : 0; + } + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +index 6d8f9bb2d9057..47ec820cae72b 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +@@ -755,6 +755,7 @@ struct svm_range_list { + atomic_t evicted_ranges; + struct delayed_work restore_work; + DECLARE_BITMAP(bitmap_supported, MAX_GPU_INSTANCE); ++ struct task_struct *faulting_task; + }; + + /* Process data */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c index ed4bc5f844ce7..766b3660c8c86 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c @@ -98670,7 +121420,7 @@ index ed4bc5f844ce7..766b3660c8c86 100644 return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c -index 9d0f65a90002d..74e6f613be020 100644 +index 9d0f65a90002d..22a70aaccf13c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -936,7 +936,7 @@ svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last, @@ -98712,7 +121462,19 @@ index 9d0f65a90002d..74e6f613be020 100644 r = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->validate_list, ctx->intr, NULL); -@@ -1570,7 +1565,6 @@ retry_flush_work: +@@ -1494,9 +1489,11 @@ static int svm_range_validate_and_map(struct mm_struct *mm, + + next = min(vma->vm_end, end); + npages = (next - addr) >> PAGE_SHIFT; ++ WRITE_ONCE(p->svms.faulting_task, current); + r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL, + addr, npages, &hmm_range, + readonly, true, owner); ++ WRITE_ONCE(p->svms.faulting_task, NULL); + if (r) { + pr_debug("failed %d to get svm range pages\n", r); + goto unreserve_out; +@@ -1570,7 +1567,6 @@ retry_flush_work: static void svm_range_restore_work(struct work_struct *work) { struct delayed_work *dwork = to_delayed_work(work); @@ -98720,7 +121482,7 @@ index 9d0f65a90002d..74e6f613be020 100644 struct svm_range_list *svms; struct svm_range *prange; struct kfd_process *p; -@@ -1590,12 +1584,10 @@ static void svm_range_restore_work(struct work_struct *work) +@@ -1590,12 +1586,10 @@ static void svm_range_restore_work(struct work_struct *work) * the lifetime of this thread, kfd_process and mm will be valid. */ p = container_of(svms, struct kfd_process, svms); @@ -98733,7 +121495,7 @@ index 9d0f65a90002d..74e6f613be020 100644 svm_range_list_lock_and_flush_work(svms, mm); mutex_lock(&svms->lock); -@@ -1648,7 +1640,6 @@ static void svm_range_restore_work(struct work_struct *work) +@@ -1648,7 +1642,6 @@ static void svm_range_restore_work(struct work_struct *work) out_reschedule: mutex_unlock(&svms->lock); mmap_write_unlock(mm); @@ -98741,7 +121503,7 @@ index 9d0f65a90002d..74e6f613be020 100644 /* If validation failed, reschedule another attempt */ if (evicted_ranges) { -@@ -1764,49 +1755,54 @@ static struct svm_range *svm_range_clone(struct svm_range *old) +@@ -1764,49 +1757,54 @@ static struct svm_range *svm_range_clone(struct svm_range *old) } /** @@ -98825,7 +121587,7 @@ index 9d0f65a90002d..74e6f613be020 100644 node = interval_tree_iter_first(&svms->objects, start, last); while (node) { -@@ -1834,14 +1830,14 @@ svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new, +@@ -1834,14 +1832,14 @@ svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new, if (node->start < start) { pr_debug("change old range start\n"); @@ -98842,7 +121604,7 @@ index 9d0f65a90002d..74e6f613be020 100644 insert_list); if (r) goto out; -@@ -1853,7 +1849,7 @@ svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new, +@@ -1853,7 +1851,7 @@ svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new, prange = old; } @@ -98851,7 +121613,7 @@ index 9d0f65a90002d..74e6f613be020 100644 list_add(&prange->update_list, update_list); /* insert a new node if needed */ -@@ -1873,8 +1869,16 @@ svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new, +@@ -1873,8 +1871,16 @@ svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new, start = next_start; } @@ -98870,7 +121632,7 @@ index 9d0f65a90002d..74e6f613be020 100644 out: if (r) -@@ -2177,6 +2181,8 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, +@@ -2177,6 +2183,8 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, if (range->event == MMU_NOTIFY_RELEASE) return true; @@ -98879,7 +121641,7 @@ index 9d0f65a90002d..74e6f613be020 100644 start = mni->interval_tree.start; last = mni->interval_tree.last; -@@ -2203,6 +2209,7 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, +@@ -2203,6 +2211,7 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, } svm_range_unlock(prange); @@ -98887,7 +121649,7 @@ index 9d0f65a90002d..74e6f613be020 100644 return true; } -@@ -2702,59 +2709,6 @@ svm_range_is_valid(struct mm_struct *mm, uint64_t start, uint64_t size) +@@ -2702,59 +2711,6 @@ svm_range_is_valid(struct mm_struct *mm, uint64_t start, uint64_t size) return true; } @@ -98947,7 +121709,7 @@ index 9d0f65a90002d..74e6f613be020 100644 /** * svm_range_best_prefetch_location - decide the best prefetch location * @prange: svm range structure -@@ -2979,7 +2933,6 @@ static int +@@ -2979,7 +2935,6 @@ static int svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs) { @@ -98955,7 +121717,7 @@ index 9d0f65a90002d..74e6f613be020 100644 struct mm_struct *mm = current->mm; struct list_head update_list; struct list_head insert_list; -@@ -2998,8 +2951,6 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, +@@ -2998,8 +2953,6 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, svms = &p->svms; @@ -98964,7 +121726,7 @@ index 9d0f65a90002d..74e6f613be020 100644 svm_range_list_lock_and_flush_work(svms, mm); if (!svm_range_is_valid(mm, start, size)) { -@@ -3075,8 +3026,6 @@ out_unlock_range: +@@ -3075,8 +3028,6 @@ out_unlock_range: mutex_unlock(&svms->lock); mmap_read_unlock(mm); out: @@ -98973,8 +121735,33 @@ index 9d0f65a90002d..74e6f613be020 100644 pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", p->pasid, &p->svms, start, start + size - 1, r); +diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig +index 127667e549c19..f25a2c80afcfd 100644 +--- a/drivers/gpu/drm/amd/display/Kconfig ++++ b/drivers/gpu/drm/amd/display/Kconfig +@@ -5,6 +5,7 @@ menu "Display Engine Configuration" + config DRM_AMD_DC + bool "AMD DC - Enable new display engine" + default y ++ depends on BROKEN || !CC_IS_CLANG || X86_64 || SPARC64 || ARM64 + select SND_HDA_COMPONENT if SND_HDA_CORE + select DRM_AMD_DC_DCN if (X86 || PPC64) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS) + help +@@ -12,6 +13,12 @@ config DRM_AMD_DC + support for AMDGPU. This adds required support for Vega and + Raven ASICs. + ++ calculate_bandwidth() is presently broken on all !(X86_64 || SPARC64 || ARM64) ++ architectures built with Clang (all released versions), whereby the stack ++ frame gets blown up to well over 5k. This would cause an immediate kernel ++ panic on most architectures. We'll revert this when the following bug report ++ has been resolved: https://github.com/llvm/llvm-project/issues/41896. ++ + config DRM_AMD_DC_DCN + def_bool n + help diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c -index 1ea31dcc7a8b0..d9f99212e624b 100644 +index 1ea31dcc7a8b0..b4293b5a82526 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -70,6 +70,7 @@ @@ -99193,7 +121980,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 #if defined(CONFIG_DRM_AMD_DC_DCN) static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_addr_space_config *pa_config) { -@@ -1083,6 +1239,157 @@ static void vblank_control_worker(struct work_struct *work) +@@ -1083,6 +1239,194 @@ static void vblank_control_worker(struct work_struct *work) } #endif @@ -99332,7 +122119,44 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 + DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3460"), + }, + }, ++ { ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), ++ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower Plus 7010"), ++ }, ++ }, ++ { ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), ++ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower 7010"), ++ }, ++ }, ++ { ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), ++ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF Plus 7010"), ++ }, ++ }, ++ { ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), ++ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF 7010"), ++ }, ++ }, ++ { ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), ++ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro Plus 7010"), ++ }, ++ }, ++ { ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), ++ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro 7010"), ++ }, ++ }, + {} ++ /* TODO: refactor this from a fixed table to a dynamic option */ +}; + +static void retrieve_dmi_info(struct amdgpu_display_manager *dm) @@ -99351,7 +122175,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 static int amdgpu_dm_init(struct amdgpu_device *adev) { struct dc_init_data init_data; -@@ -1141,8 +1448,15 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) +@@ -1141,8 +1485,15 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) case CHIP_RAVEN: case CHIP_RENOIR: init_data.flags.gpu_vm_support = true; @@ -99368,7 +122192,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 break; case CHIP_VANGOGH: case CHIP_YELLOW_CARP: -@@ -1167,6 +1481,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) +@@ -1167,6 +1518,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) init_data.flags.power_down_display_on_boot = true; INIT_LIST_HEAD(&adev->dm.da_list); @@ -99378,7 +122202,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 /* Display Core create. */ adev->dm.dc = dc_create(&init_data); -@@ -1184,6 +1501,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) +@@ -1184,6 +1538,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) if (adev->asic_type != CHIP_CARRIZO && adev->asic_type != CHIP_STONEY) adev->dm.dc->debug.disable_stutter = amdgpu_pp_feature_mask & PP_STUTTER_MODE ? false : true; @@ -99387,7 +122211,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 if (amdgpu_dc_debug_mask & DC_DISABLE_STUTTER) adev->dm.dc->debug.disable_stutter = true; -@@ -1202,6 +1521,12 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) +@@ -1202,6 +1558,12 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) dc_hardware_init(adev->dm.dc); @@ -99400,7 +122224,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 #if defined(CONFIG_DRM_AMD_DC_DCN) if ((adev->flags & AMD_IS_APU) && (adev->asic_type >= CHIP_CARRIZO)) { struct dc_phy_addr_space_config pa_config; -@@ -1254,7 +1579,25 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) +@@ -1254,7 +1616,25 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) DRM_INFO("amdgpu: fail to allocate adev->dm.dmub_notify"); goto error; } @@ -99426,7 +122250,18 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 } if (amdgpu_dm_initialize_drm_device(adev)) { -@@ -1336,6 +1679,8 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev) +@@ -1308,10 +1688,6 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev) + } + #endif + +- for (i = 0; i < adev->dm.display_indexes_num; i++) { +- drm_encoder_cleanup(&adev->dm.mst_encoders[i].base); +- } +- + amdgpu_dm_destroy_drm_device(&adev->dm); + + #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) +@@ -1336,6 +1712,8 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev) if (dc_enable_dmub_notifications(adev->dm.dc)) { kfree(adev->dm.dmub_notify); adev->dm.dmub_notify = NULL; @@ -99435,7 +122270,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 } if (adev->dm.dmub_bo) -@@ -1361,6 +1706,18 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev) +@@ -1361,6 +1739,18 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev) adev->dm.freesync_module = NULL; } @@ -99454,7 +122289,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 mutex_destroy(&adev->dm.audio_lock); mutex_destroy(&adev->dm.dc_lock); -@@ -1980,6 +2337,16 @@ context_alloc_fail: +@@ -1980,6 +2370,16 @@ context_alloc_fail: return res; } @@ -99471,7 +122306,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 static int dm_suspend(void *handle) { struct amdgpu_device *adev = handle; -@@ -2001,6 +2368,8 @@ static int dm_suspend(void *handle) +@@ -2001,6 +2401,8 @@ static int dm_suspend(void *handle) amdgpu_dm_irq_suspend(adev); @@ -99480,7 +122315,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 return ret; } -@@ -2011,6 +2380,8 @@ static int dm_suspend(void *handle) +@@ -2011,6 +2413,8 @@ static int dm_suspend(void *handle) amdgpu_dm_irq_suspend(adev); @@ -99489,7 +122324,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3); return 0; -@@ -2206,6 +2577,9 @@ static int dm_resume(void *handle) +@@ -2206,6 +2610,9 @@ static int dm_resume(void *handle) if (amdgpu_in_reset(adev)) { dc_state = dm->cached_dc_state; @@ -99499,7 +122334,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 r = dm_dmub_hw_init(adev); if (r) DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r); -@@ -2217,8 +2591,8 @@ static int dm_resume(void *handle) +@@ -2217,8 +2624,8 @@ static int dm_resume(void *handle) for (i = 0; i < dc_state->stream_count; i++) { dc_state->streams[i]->mode_changed = true; @@ -99510,7 +122345,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 = 0xffffffff; } } -@@ -2253,10 +2627,12 @@ static int dm_resume(void *handle) +@@ -2253,10 +2660,12 @@ static int dm_resume(void *handle) /* TODO: Remove dc_state->dccg, use dc->dccg directly. */ dc_resource_state_construct(dm->dc, dm_state->context); @@ -99526,7 +122361,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 /* power on hardware */ dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0); -@@ -2282,7 +2658,8 @@ static int dm_resume(void *handle) +@@ -2282,7 +2691,8 @@ static int dm_resume(void *handle) * this is the case when traversing through already created * MST connectors, should be skipped */ @@ -99536,7 +122371,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 continue; mutex_lock(&aconnector->hpd_lock); -@@ -2402,7 +2779,7 @@ static struct drm_mode_config_helper_funcs amdgpu_dm_mode_config_helperfuncs = { +@@ -2402,7 +2812,7 @@ static struct drm_mode_config_helper_funcs amdgpu_dm_mode_config_helperfuncs = { static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) { @@ -99545,7 +122380,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 struct amdgpu_dm_backlight_caps *caps; struct amdgpu_display_manager *dm; struct drm_connector *conn_base; -@@ -2432,7 +2809,7 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) +@@ -2432,7 +2842,7 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) caps = &dm->backlight_caps[i]; caps->ext_caps = &aconnector->dc_link->dpcd_sink_ext_caps; caps->aux_support = false; @@ -99554,7 +122389,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 min_cll = conn_base->hdr_sink_metadata.hdmi_type1.min_cll; if (caps->ext_caps->bits.oled == 1 /*|| -@@ -2460,8 +2837,8 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) +@@ -2460,8 +2870,8 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) * The results of the above expressions can be verified at * pre_computed_values. */ @@ -99565,7 +122400,22 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 max = (1 << q) * pre_computed_values[r]; // min luminance: maxLum * (CV/255)^2 / 100 -@@ -2615,9 +2992,8 @@ void amdgpu_dm_update_connector_after_detect( +@@ -2583,13 +2993,12 @@ void amdgpu_dm_update_connector_after_detect( + aconnector->edid = + (struct edid *)sink->dc_edid.raw_edid; + +- drm_connector_update_edid_property(connector, +- aconnector->edid); + if (aconnector->dc_link->aux_mode) + drm_dp_cec_set_edid(&aconnector->dm_dp_aux.aux, + aconnector->edid); + } + ++ drm_connector_update_edid_property(connector, aconnector->edid); + amdgpu_dm_update_freesync_caps(connector, aconnector->edid); + update_connector_ext_caps(aconnector); + } else { +@@ -2615,9 +3024,8 @@ void amdgpu_dm_update_connector_after_detect( dc_sink_release(sink); } @@ -99576,7 +122426,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 struct drm_connector *connector = &aconnector->base; struct drm_device *dev = connector->dev; enum dc_connection_type new_connection_type = dc_connection_none; -@@ -2676,7 +3052,15 @@ static void handle_hpd_irq(void *param) +@@ -2676,7 +3084,15 @@ static void handle_hpd_irq(void *param) } @@ -99593,7 +122443,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 { uint8_t esi[DP_PSR_ERROR_STATUS - DP_SINK_COUNT_ESI] = { 0 }; uint8_t dret; -@@ -2754,6 +3138,25 @@ static void dm_handle_hpd_rx_irq(struct amdgpu_dm_connector *aconnector) +@@ -2754,6 +3170,25 @@ static void dm_handle_hpd_rx_irq(struct amdgpu_dm_connector *aconnector) DRM_DEBUG_DRIVER("Loop exceeded max iterations\n"); } @@ -99619,7 +122469,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 static void handle_hpd_rx_irq(void *param) { struct amdgpu_dm_connector *aconnector = (struct amdgpu_dm_connector *)param; -@@ -2765,14 +3168,16 @@ static void handle_hpd_rx_irq(void *param) +@@ -2765,14 +3200,16 @@ static void handle_hpd_rx_irq(void *param) enum dc_connection_type new_connection_type = dc_connection_none; struct amdgpu_device *adev = drm_to_adev(dev); union hpd_irq_data hpd_irq_data; @@ -99638,7 +122488,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 /* * TODO:Temporary add mutex to protect hpd interrupt not have a gpio * conflict, after implement i2c helper, this mutex should be -@@ -2780,43 +3185,41 @@ static void handle_hpd_rx_irq(void *param) +@@ -2780,43 +3217,41 @@ static void handle_hpd_rx_irq(void *param) */ mutex_lock(&aconnector->hpd_lock); @@ -99712,7 +122562,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 out: if (result && !is_mst_root_connector) { -@@ -2901,6 +3304,10 @@ static void register_hpd_handlers(struct amdgpu_device *adev) +@@ -2901,6 +3336,10 @@ static void register_hpd_handlers(struct amdgpu_device *adev) amdgpu_dm_irq_register_interrupt(adev, &int_params, handle_hpd_rx_irq, (void *) aconnector); @@ -99723,7 +122573,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 } } } -@@ -3213,7 +3620,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev) +@@ -3213,7 +3652,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev) /* Use GRPH_PFLIP interrupt */ for (i = DCN_1_0__SRCID__HUBP0_FLIP_INTERRUPT; @@ -99732,7 +122582,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 i++) { r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->pageflip_irq); if (r) { -@@ -3508,7 +3915,7 @@ static u32 convert_brightness_to_user(const struct amdgpu_dm_backlight_caps *cap +@@ -3508,7 +3947,7 @@ static u32 convert_brightness_to_user(const struct amdgpu_dm_backlight_caps *cap max - min); } @@ -99741,7 +122591,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 int bl_idx, u32 user_brightness) { -@@ -3536,7 +3943,8 @@ static int amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, +@@ -3536,7 +3975,8 @@ static int amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, DRM_DEBUG("DM: Failed to update backlight on eDP[%d]\n", bl_idx); } @@ -99751,7 +122601,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 } static int amdgpu_dm_backlight_update_status(struct backlight_device *bd) -@@ -3839,8 +4247,17 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) +@@ -3839,8 +4279,17 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) } else if (dc_link_detect(link, DETECT_REASON_BOOT)) { amdgpu_dm_update_connector_after_detect(aconnector); register_backlight_device(dm, link); @@ -99769,7 +122619,25 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 } -@@ -5033,7 +5450,7 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev, +@@ -3979,6 +4428,17 @@ DEVICE_ATTR_WO(s3_debug); + static int dm_early_init(void *handle) + { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; ++ struct amdgpu_mode_info *mode_info = &adev->mode_info; ++ struct atom_context *ctx = mode_info->atom_context; ++ int index = GetIndexIntoMasterTable(DATA, Object_Header); ++ u16 data_offset; ++ ++ /* if there is no object header, skip DM */ ++ if (!amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) { ++ adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK; ++ dev_info(adev->dev, "No object header, skipping DM\n"); ++ return -ENOENT; ++ } + + switch (adev->asic_type) { + #if defined(CONFIG_DRM_AMD_DC_SI) +@@ -5033,7 +5493,7 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev, plane_info->visible = true; plane_info->stereo_format = PLANE_STEREO_FORMAT_NONE; @@ -99778,7 +122646,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 ret = fill_plane_color_attributes(plane_state, plane_info->format, &plane_info->color_space); -@@ -5100,7 +5517,7 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev, +@@ -5100,7 +5560,7 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev, dc_plane_state->global_alpha = plane_info.global_alpha; dc_plane_state->global_alpha_value = plane_info.global_alpha_value; dc_plane_state->dcc = plane_info.dcc; @@ -99787,7 +122655,25 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 dc_plane_state->flip_int_enabled = true; /* -@@ -5587,6 +6004,7 @@ static void update_dsc_caps(struct amdgpu_dm_connector *aconnector, +@@ -5402,8 +5862,6 @@ static void fill_stream_properties_from_drm_display_mode( + + timing_out->aspect_ratio = get_aspect_ratio(mode_in); + +- stream->output_color_space = get_output_color_space(timing_out); +- + stream->out_transfer_func->type = TF_TYPE_PREDEFINED; + stream->out_transfer_func->tf = TRANSFER_FUNCTION_SRGB; + if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) { +@@ -5414,6 +5872,8 @@ static void fill_stream_properties_from_drm_display_mode( + adjust_colour_depth_from_display_info(timing_out, info); + } + } ++ ++ stream->output_color_space = get_output_color_space(timing_out); + } + + static void fill_audio_info(struct audio_info *audio_info, +@@ -5587,6 +6047,7 @@ static void update_dsc_caps(struct amdgpu_dm_connector *aconnector, struct dsc_dec_dpcd_caps *dsc_caps) { stream->timing.flags.DSC = 0; @@ -99795,7 +122681,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 if (aconnector->dc_link && sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT) { dc_dsc_parse_dsc_dpcd(aconnector->dc_link->ctx->dc, -@@ -7527,6 +7945,9 @@ static void amdgpu_dm_connector_add_common_modes(struct drm_encoder *encoder, +@@ -7527,6 +7988,9 @@ static void amdgpu_dm_connector_add_common_modes(struct drm_encoder *encoder, mode = amdgpu_dm_create_common_mode(encoder, common_modes[i].name, common_modes[i].w, common_modes[i].h); @@ -99805,7 +122691,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 drm_mode_probed_add(connector, mode); amdgpu_dm_connector->num_modes++; } -@@ -8447,15 +8868,15 @@ static void amdgpu_dm_handle_vrr_transition(struct dm_crtc_state *old_state, +@@ -8447,15 +8911,15 @@ static void amdgpu_dm_handle_vrr_transition(struct dm_crtc_state *old_state, * We also need vupdate irq for the actual core vblank handling * at end of vblank. */ @@ -99824,7 +122710,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 drm_crtc_vblank_put(new_state->base.crtc); DRM_DEBUG_DRIVER("%s: crtc=%u VRR on->off: Drop vblank ref\n", __func__, new_state->base.crtc->base.id); -@@ -9191,23 +9612,6 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) +@@ -9191,23 +9655,6 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) mutex_unlock(&dm->dc_lock); } @@ -99848,7 +122734,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 /** * Enable interrupts for CRTCs that are newly enabled or went through * a modeset. It was intentionally deferred until after the front end -@@ -9217,16 +9621,29 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) +@@ -9217,16 +9664,29 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); #ifdef CONFIG_DEBUG_FS @@ -99881,7 +122767,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 if (new_crtc_state->active && (!old_crtc_state->active || -@@ -9234,16 +9651,19 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) +@@ -9234,16 +9694,19 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) dc_stream_retain(dm_new_crtc_state->stream); acrtc->dm_irq_params.stream = dm_new_crtc_state->stream; manage_dm_interrupts(adev, acrtc, true); @@ -99904,7 +122790,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) if (amdgpu_dm_crc_window_is_activated(crtc)) { spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags); -@@ -9255,14 +9675,12 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) +@@ -9255,14 +9718,12 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags); } #endif @@ -99921,7 +122807,7 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 } for_each_new_crtc_in_state(state, crtc, new_crtc_state, j) -@@ -9286,7 +9704,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) +@@ -9286,7 +9747,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) /* restore the backlight level */ for (i = 0; i < dm->num_of_edps; i++) { if (dm->backlight_dev[i] && @@ -99930,7 +122816,21 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 amdgpu_dm_backlight_set_level(dm, i, dm->brightness[i]); } #endif -@@ -10196,10 +10614,13 @@ static int dm_check_crtc_cursor(struct drm_atomic_state *state, +@@ -9686,6 +10147,13 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, + if (!dm_old_crtc_state->stream) + goto skip_modeset; + ++ /* Unset freesync video if it was active before */ ++ if (dm_old_crtc_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED) { ++ dm_new_crtc_state->freesync_config.state = VRR_STATE_INACTIVE; ++ dm_new_crtc_state->freesync_config.fixed_refresh_in_uhz = 0; ++ } ++ ++ /* Now check if we should set freesync video mode */ + if (amdgpu_freesync_vid_mode && dm_new_crtc_state->stream && + is_timing_unchanged_for_freesync(new_crtc_state, + old_crtc_state)) { +@@ -10196,10 +10664,13 @@ static int dm_check_crtc_cursor(struct drm_atomic_state *state, static int add_affected_mst_dsc_crtcs(struct drm_atomic_state *state, struct drm_crtc *crtc) { struct drm_connector *connector; @@ -99946,7 +122846,18 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 if (conn_state->crtc != crtc) continue; -@@ -10412,6 +10833,14 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, +@@ -10332,8 +10803,8 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, + goto fail; + } + +- if (dm_old_con_state->abm_level != +- dm_new_con_state->abm_level) ++ if (dm_old_con_state->abm_level != dm_new_con_state->abm_level || ++ dm_old_con_state->scaling != dm_new_con_state->scaling) + new_crtc_state->connectors_changed = true; + } + +@@ -10412,6 +10883,18 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, } } @@ -99956,7 +122867,11 @@ index 1ea31dcc7a8b0..d9f99212e624b 100644 + * `dcn10_can_pipe_disable_cursor`). By now, all modified planes are in + * atomic state, so call drm helper to normalize zpos. + */ -+ drm_atomic_normalize_zpos(dev, state); ++ ret = drm_atomic_normalize_zpos(dev, state); ++ if (ret) { ++ drm_dbg(dev, "drm_atomic_normalize_zpos() failed\n"); ++ goto fail; ++ } + /* Remove exiting planes if they are modified */ for_each_oldnew_plane_in_state_reverse(state, plane, old_plane_state, new_plane_state, i) { @@ -100423,61 +123338,8 @@ index 8080bba5b7a76..6d694cea24201 100644 buf += 1; size -= 1; *pos += 1; -diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c -index 6fee12c91ef59..d793eec69d61e 100644 ---- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c -+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c -@@ -40,6 +40,39 @@ - - #include "dm_helpers.h" - -+struct monitor_patch_info { -+ unsigned int manufacturer_id; -+ unsigned int product_id; -+ void (*patch_func)(struct dc_edid_caps *edid_caps, unsigned int param); -+ unsigned int patch_param; -+}; -+static void set_max_dsc_bpp_limit(struct dc_edid_caps *edid_caps, unsigned int param); -+ -+static const struct monitor_patch_info monitor_patch_table[] = { -+{0x6D1E, 0x5BBF, set_max_dsc_bpp_limit, 15}, -+{0x6D1E, 0x5B9A, set_max_dsc_bpp_limit, 15}, -+}; -+ -+static void set_max_dsc_bpp_limit(struct dc_edid_caps *edid_caps, unsigned int param) -+{ -+ if (edid_caps) -+ edid_caps->panel_patch.max_dsc_target_bpp_limit = param; -+} -+ -+static int amdgpu_dm_patch_edid_caps(struct dc_edid_caps *edid_caps) -+{ -+ int i, ret = 0; -+ -+ for (i = 0; i < ARRAY_SIZE(monitor_patch_table); i++) -+ if ((edid_caps->manufacturer_id == monitor_patch_table[i].manufacturer_id) -+ && (edid_caps->product_id == monitor_patch_table[i].product_id)) { -+ monitor_patch_table[i].patch_func(edid_caps, monitor_patch_table[i].patch_param); -+ ret++; -+ } -+ -+ return ret; -+} -+ - /* dm_helpers_parse_edid_caps - * - * Parse edid caps -@@ -125,6 +158,8 @@ enum dc_edid_status dm_helpers_parse_edid_caps( - kfree(sads); - kfree(sadb); - -+ amdgpu_dm_patch_edid_caps(edid_caps); -+ - return result; - } - diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c -index 7af0d58c231b6..652cf108b3c2a 100644 +index 7af0d58c231b6..bc02e3e0d17d0 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -36,6 +36,8 @@ @@ -100560,11 +123422,36 @@ index 7af0d58c231b6..652cf108b3c2a 100644 if (!aconnector->dsc_aux) return false; +@@ -356,7 +385,6 @@ static const struct drm_connector_helper_funcs dm_dp_mst_connector_helper_funcs + static void amdgpu_dm_encoder_destroy(struct drm_encoder *encoder) + { + drm_encoder_cleanup(encoder); +- kfree(encoder); + } + + static const struct drm_encoder_funcs amdgpu_dm_encoder_funcs = { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c -index 70a554f1e725a..7072fb2ec07fa 100644 +index 70a554f1e725a..278ff281a1bd5 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c -@@ -74,10 +74,8 @@ bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream) +@@ -36,10 +36,14 @@ void amdgpu_dm_set_psr_caps(struct dc_link *link) + { + uint8_t dpcd_data[EDP_PSR_RECEIVER_CAP_SIZE]; + +- if (!(link->connector_signal & SIGNAL_TYPE_EDP)) ++ if (!(link->connector_signal & SIGNAL_TYPE_EDP)) { ++ link->psr_settings.psr_feature_enabled = false; + return; +- if (link->type == dc_connection_none) ++ } ++ if (link->type == dc_connection_none) { ++ link->psr_settings.psr_feature_enabled = false; + return; ++ } + if (dm_helpers_dp_read_dpcd(NULL, link, DP_PSR_SUPPORT, + dpcd_data, sizeof(dpcd_data))) { + link->dpcd_caps.psr_caps.psr_version = dpcd_data[0]; +@@ -74,10 +78,8 @@ bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream) link = stream->link; @@ -100577,6 +123464,45 @@ index 70a554f1e725a..7072fb2ec07fa 100644 psr_config.psr_frame_capture_indication_req = 0; psr_config.psr_rfb_setup_time = 0x37; psr_config.psr_sdp_transmit_line_num_deadline = 0x20; +diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +index 6dbde74c1e069..1d86fd5610c03 100644 +--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c ++++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +@@ -352,6 +352,7 @@ static enum bp_result get_gpio_i2c_info( + uint32_t count = 0; + unsigned int table_index = 0; + bool find_valid = false; ++ struct atom_gpio_pin_assignment *pin; + + if (!info) + return BP_RESULT_BADINPUT; +@@ -379,20 +380,17 @@ static enum bp_result get_gpio_i2c_info( + - sizeof(struct atom_common_table_header)) + / sizeof(struct atom_gpio_pin_assignment); + ++ pin = (struct atom_gpio_pin_assignment *) header->gpio_pin; ++ + for (table_index = 0; table_index < count; table_index++) { +- if (((record->i2c_id & I2C_HW_CAP) == ( +- header->gpio_pin[table_index].gpio_id & +- I2C_HW_CAP)) && +- ((record->i2c_id & I2C_HW_ENGINE_ID_MASK) == +- (header->gpio_pin[table_index].gpio_id & +- I2C_HW_ENGINE_ID_MASK)) && +- ((record->i2c_id & I2C_HW_LANE_MUX) == +- (header->gpio_pin[table_index].gpio_id & +- I2C_HW_LANE_MUX))) { ++ if (((record->i2c_id & I2C_HW_CAP) == (pin->gpio_id & I2C_HW_CAP)) && ++ ((record->i2c_id & I2C_HW_ENGINE_ID_MASK) == (pin->gpio_id & I2C_HW_ENGINE_ID_MASK)) && ++ ((record->i2c_id & I2C_HW_LANE_MUX) == (pin->gpio_id & I2C_HW_LANE_MUX))) { + /* still valid */ + find_valid = true; + break; + } ++ pin = (struct atom_gpio_pin_assignment *)((uint8_t *)pin + sizeof(struct atom_gpio_pin_assignment)); + } + + /* If we don't find the entry that we are looking for then diff --git a/drivers/gpu/drm/amd/display/dc/calcs/bw_fixed.c b/drivers/gpu/drm/amd/display/dc/calcs/bw_fixed.c index 6ca288fb5fb9e..2d46bc527b218 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/bw_fixed.c @@ -100876,6 +123802,21 @@ index c798c65d42765..6c9378208127d 100644 } return; +diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +index 9039fb134db59..f858ae68aa5f6 100644 +--- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c ++++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +@@ -92,8 +92,8 @@ static const struct out_csc_color_matrix_type output_csc_matrix[] = { + { 0xE00, 0xF349, 0xFEB7, 0x1000, 0x6CE, 0x16E3, + 0x24F, 0x200, 0xFCCB, 0xF535, 0xE00, 0x1000} }, + { COLOR_SPACE_YCBCR2020_TYPE, +- { 0x1000, 0xF149, 0xFEB7, 0x0000, 0x0868, 0x15B2, +- 0x01E6, 0x0000, 0xFB88, 0xF478, 0x1000, 0x0000} }, ++ { 0x1000, 0xF149, 0xFEB7, 0x1004, 0x0868, 0x15B2, ++ 0x01E6, 0x201, 0xFB88, 0xF478, 0x1000, 0x1004} }, + { COLOR_SPACE_YCBCR709_BLACK_TYPE, + { 0x0000, 0x0000, 0x0000, 0x1000, 0x0000, 0x0000, + 0x0000, 0x0200, 0x0000, 0x0000, 0x0000, 0x1000} }, diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 1e44b13c1c7de..3c4205248efc2 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -101290,6 +124231,68 @@ index 62d595ded8668..46d7e75e4553e 100644 struct audio_output audio_output; build_audio_output(context, pipe_ctx, &audio_output); +diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +index c65e4d125c8e2..013fca9b9c68c 100644 +--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c ++++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +@@ -361,7 +361,8 @@ static const struct dce_audio_registers audio_regs[] = { + audio_regs(2), + audio_regs(3), + audio_regs(4), +- audio_regs(5) ++ audio_regs(5), ++ audio_regs(6), + }; + + #define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ +diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c b/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c +index dcfa0a3efa00d..bf72d3f60d7f4 100644 +--- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c ++++ b/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c +@@ -1127,6 +1127,7 @@ struct resource_pool *dce60_create_resource_pool( + if (dce60_construct(num_virtual_links, dc, pool)) + return &pool->base; + ++ kfree(pool); + BREAK_TO_DEBUGGER(); + return NULL; + } +@@ -1324,6 +1325,7 @@ struct resource_pool *dce61_create_resource_pool( + if (dce61_construct(num_virtual_links, dc, pool)) + return &pool->base; + ++ kfree(pool); + BREAK_TO_DEBUGGER(); + return NULL; + } +@@ -1517,6 +1519,7 @@ struct resource_pool *dce64_create_resource_pool( + if (dce64_construct(num_virtual_links, dc, pool)) + return &pool->base; + ++ kfree(pool); + BREAK_TO_DEBUGGER(); + return NULL; + } +diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c +index 725d92e40cd30..52d1f9746e8cb 100644 +--- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c ++++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c +@@ -1138,6 +1138,7 @@ struct resource_pool *dce80_create_resource_pool( + if (dce80_construct(num_virtual_links, dc, pool)) + return &pool->base; + ++ kfree(pool); + BREAK_TO_DEBUGGER(); + return NULL; + } +@@ -1337,6 +1338,7 @@ struct resource_pool *dce81_create_resource_pool( + if (dce81_construct(num_virtual_links, dc, pool)) + return &pool->base; + ++ kfree(pool); + BREAK_TO_DEBUGGER(); + return NULL; + } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c index f4f423d0b8c3f..80595d7f060c3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c @@ -101303,10 +124306,52 @@ index f4f423d0b8c3f..80595d7f060c3 100644 void hubbub1_construct(struct hubbub *hubbub, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c -index df8a7718a85fc..91ab4dbbe1a6d 100644 +index df8a7718a85fc..c655d03ef754d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c -@@ -1052,9 +1052,13 @@ static bool dcn10_hw_wa_force_recovery(struct dc *dc) +@@ -804,6 +804,32 @@ static void false_optc_underflow_wa( + tg->funcs->clear_optc_underflow(tg); + } + ++static int calculate_vready_offset_for_group(struct pipe_ctx *pipe) ++{ ++ struct pipe_ctx *other_pipe; ++ int vready_offset = pipe->pipe_dlg_param.vready_offset; ++ ++ /* Always use the largest vready_offset of all connected pipes */ ++ for (other_pipe = pipe->bottom_pipe; other_pipe != NULL; other_pipe = other_pipe->bottom_pipe) { ++ if (other_pipe->pipe_dlg_param.vready_offset > vready_offset) ++ vready_offset = other_pipe->pipe_dlg_param.vready_offset; ++ } ++ for (other_pipe = pipe->top_pipe; other_pipe != NULL; other_pipe = other_pipe->top_pipe) { ++ if (other_pipe->pipe_dlg_param.vready_offset > vready_offset) ++ vready_offset = other_pipe->pipe_dlg_param.vready_offset; ++ } ++ for (other_pipe = pipe->next_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->next_odm_pipe) { ++ if (other_pipe->pipe_dlg_param.vready_offset > vready_offset) ++ vready_offset = other_pipe->pipe_dlg_param.vready_offset; ++ } ++ for (other_pipe = pipe->prev_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->prev_odm_pipe) { ++ if (other_pipe->pipe_dlg_param.vready_offset > vready_offset) ++ vready_offset = other_pipe->pipe_dlg_param.vready_offset; ++ } ++ ++ return vready_offset; ++} ++ + enum dc_status dcn10_enable_stream_timing( + struct pipe_ctx *pipe_ctx, + struct dc_state *context, +@@ -838,7 +864,7 @@ enum dc_status dcn10_enable_stream_timing( + pipe_ctx->stream_res.tg->funcs->program_timing( + pipe_ctx->stream_res.tg, + &stream->timing, +- pipe_ctx->pipe_dlg_param.vready_offset, ++ calculate_vready_offset_for_group(pipe_ctx), + pipe_ctx->pipe_dlg_param.vstartup_start, + pipe_ctx->pipe_dlg_param.vupdate_offset, + pipe_ctx->pipe_dlg_param.vupdate_width, +@@ -1052,9 +1078,13 @@ static bool dcn10_hw_wa_force_recovery(struct dc *dc) void dcn10_verify_allow_pstate_change_high(struct dc *dc) { @@ -101321,7 +124366,7 @@ index df8a7718a85fc..91ab4dbbe1a6d 100644 int i = 0; if (should_log_hw_state) -@@ -1063,8 +1067,8 @@ void dcn10_verify_allow_pstate_change_high(struct dc *dc) +@@ -1063,8 +1093,8 @@ void dcn10_verify_allow_pstate_change_high(struct dc *dc) TRACE_DC_PIPE_STATE(pipe_ctx, i, MAX_PIPES); BREAK_TO_DEBUGGER(); if (dcn10_hw_wa_force_recovery(dc)) { @@ -101332,7 +124377,7 @@ index df8a7718a85fc..91ab4dbbe1a6d 100644 BREAK_TO_DEBUGGER(); } } -@@ -1435,6 +1439,9 @@ void dcn10_init_hw(struct dc *dc) +@@ -1435,6 +1465,9 @@ void dcn10_init_hw(struct dc *dc) } } @@ -101342,7 +124387,7 @@ index df8a7718a85fc..91ab4dbbe1a6d 100644 /* If taking control over from VBIOS, we may want to optimize our first * mode set, so we need to skip powering down pipes until we know which * pipes we want to use. -@@ -1487,8 +1494,6 @@ void dcn10_init_hw(struct dc *dc) +@@ -1487,8 +1520,6 @@ void dcn10_init_hw(struct dc *dc) REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0); } @@ -101351,7 +124396,7 @@ index df8a7718a85fc..91ab4dbbe1a6d 100644 if (dc->clk_mgr->funcs->notify_wm_ranges) dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr); -@@ -1522,7 +1527,7 @@ void dcn10_power_down_on_boot(struct dc *dc) +@@ -1522,7 +1553,7 @@ void dcn10_power_down_on_boot(struct dc *dc) for (i = 0; i < dc->link_count; i++) { struct dc_link *link = dc->links[i]; @@ -101360,7 +124405,7 @@ index df8a7718a85fc..91ab4dbbe1a6d 100644 link->link_enc->funcs->is_dig_enabled(link->link_enc) && dc->hwss.power_down) { dc->hwss.power_down(dc); -@@ -2455,14 +2460,18 @@ void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) +@@ -2455,14 +2486,18 @@ void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) struct mpc *mpc = dc->res_pool->mpc; struct mpc_tree *mpc_tree_params = &(pipe_ctx->stream_res.opp->mpc_tree_params); @@ -101384,7 +124429,16 @@ index df8a7718a85fc..91ab4dbbe1a6d 100644 if (pipe_ctx->plane_state->global_alpha) blnd_cfg.global_alpha = pipe_ctx->plane_state->global_alpha_value; else -@@ -3508,7 +3517,7 @@ void dcn10_calc_vupdate_position( +@@ -2767,7 +2802,7 @@ void dcn10_program_pipe( + + pipe_ctx->stream_res.tg->funcs->program_global_sync( + pipe_ctx->stream_res.tg, +- pipe_ctx->pipe_dlg_param.vready_offset, ++ calculate_vready_offset_for_group(pipe_ctx), + pipe_ctx->pipe_dlg_param.vstartup_start, + pipe_ctx->pipe_dlg_param.vupdate_offset, + pipe_ctx->pipe_dlg_param.vupdate_width); +@@ -3508,7 +3543,7 @@ void dcn10_calc_vupdate_position( { const struct dc_crtc_timing *dc_crtc_timing = &pipe_ctx->stream->timing; int vline_int_offset_from_vupdate = @@ -101393,7 +124447,7 @@ index df8a7718a85fc..91ab4dbbe1a6d 100644 int vupdate_offset_from_vsync = dc->hwss.get_vupdate_offset_from_vsync(pipe_ctx); int start_position; -@@ -3533,18 +3542,10 @@ void dcn10_calc_vupdate_position( +@@ -3533,18 +3568,10 @@ void dcn10_calc_vupdate_position( static void dcn10_cal_vline_position( struct dc *dc, struct pipe_ctx *pipe_ctx, @@ -101413,7 +124467,7 @@ index df8a7718a85fc..91ab4dbbe1a6d 100644 case START_V_UPDATE: dcn10_calc_vupdate_position( dc, -@@ -3553,7 +3554,9 @@ static void dcn10_cal_vline_position( +@@ -3553,7 +3580,9 @@ static void dcn10_cal_vline_position( end_line); break; case START_V_SYNC: @@ -101424,7 +124478,7 @@ index df8a7718a85fc..91ab4dbbe1a6d 100644 break; default: ASSERT(0); -@@ -3563,24 +3566,15 @@ static void dcn10_cal_vline_position( +@@ -3563,24 +3592,15 @@ static void dcn10_cal_vline_position( void dcn10_setup_periodic_interrupt( struct dc *dc, @@ -101439,10 +124493,10 @@ index df8a7718a85fc..91ab4dbbe1a6d 100644 - if (vline == VLINE0) { - uint32_t start_line = 0; - uint32_t end_line = 0; +- +- dcn10_cal_vline_position(dc, pipe_ctx, vline, &start_line, &end_line); + dcn10_cal_vline_position(dc, pipe_ctx, &start_line, &end_line); -- dcn10_cal_vline_position(dc, pipe_ctx, vline, &start_line, &end_line); -- - tg->funcs->setup_vertical_interrupt0(tg, start_line, end_line); - - } else if (vline == VLINE1) { @@ -101514,7 +124568,7 @@ index 37848f4577b18..92fee47278e5a 100644 OTG_CLOCK_GATE_DIS, 0, OTG_CLOCK_EN, 0); diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c -index a47ba1d45be92..0de1bbbabf9af 100644 +index a47ba1d45be92..bf2a8f53694b4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -1513,6 +1513,7 @@ static void dcn20_update_dchubp_dpp( @@ -101525,7 +124579,66 @@ index a47ba1d45be92..0de1bbbabf9af 100644 || pipe_ctx->stream->update_flags.bits.gamut_remap || pipe_ctx->stream->update_flags.bits.out_csc) { /* dpp/cm gamut remap*/ -@@ -2297,14 +2298,18 @@ void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) +@@ -1563,6 +1564,31 @@ static void dcn20_update_dchubp_dpp( + hubp->funcs->set_blank(hubp, false); + } + ++static int calculate_vready_offset_for_group(struct pipe_ctx *pipe) ++{ ++ struct pipe_ctx *other_pipe; ++ int vready_offset = pipe->pipe_dlg_param.vready_offset; ++ ++ /* Always use the largest vready_offset of all connected pipes */ ++ for (other_pipe = pipe->bottom_pipe; other_pipe != NULL; other_pipe = other_pipe->bottom_pipe) { ++ if (other_pipe->pipe_dlg_param.vready_offset > vready_offset) ++ vready_offset = other_pipe->pipe_dlg_param.vready_offset; ++ } ++ for (other_pipe = pipe->top_pipe; other_pipe != NULL; other_pipe = other_pipe->top_pipe) { ++ if (other_pipe->pipe_dlg_param.vready_offset > vready_offset) ++ vready_offset = other_pipe->pipe_dlg_param.vready_offset; ++ } ++ for (other_pipe = pipe->next_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->next_odm_pipe) { ++ if (other_pipe->pipe_dlg_param.vready_offset > vready_offset) ++ vready_offset = other_pipe->pipe_dlg_param.vready_offset; ++ } ++ for (other_pipe = pipe->prev_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->prev_odm_pipe) { ++ if (other_pipe->pipe_dlg_param.vready_offset > vready_offset) ++ vready_offset = other_pipe->pipe_dlg_param.vready_offset; ++ } ++ ++ return vready_offset; ++} + + static void dcn20_program_pipe( + struct dc *dc, +@@ -1581,7 +1607,7 @@ static void dcn20_program_pipe( + + pipe_ctx->stream_res.tg->funcs->program_global_sync( + pipe_ctx->stream_res.tg, +- pipe_ctx->pipe_dlg_param.vready_offset, ++ calculate_vready_offset_for_group(pipe_ctx), + pipe_ctx->pipe_dlg_param.vstartup_start, + pipe_ctx->pipe_dlg_param.vupdate_offset, + pipe_ctx->pipe_dlg_param.vupdate_width); +@@ -1764,7 +1790,7 @@ void dcn20_post_unlock_program_front_end( + + for (j = 0; j < TIMEOUT_FOR_PIPE_ENABLE_MS*1000 + && hubp->funcs->hubp_is_flip_pending(hubp); j++) +- mdelay(1); ++ udelay(1); + } + } + +@@ -1874,7 +1900,7 @@ bool dcn20_update_bandwidth( + + pipe_ctx->stream_res.tg->funcs->program_global_sync( + pipe_ctx->stream_res.tg, +- pipe_ctx->pipe_dlg_param.vready_offset, ++ calculate_vready_offset_for_group(pipe_ctx), + pipe_ctx->pipe_dlg_param.vstartup_start, + pipe_ctx->pipe_dlg_param.vupdate_offset, + pipe_ctx->pipe_dlg_param.vupdate_width); +@@ -2297,14 +2323,18 @@ void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) struct mpc *mpc = dc->res_pool->mpc; struct mpc_tree *mpc_tree_params = &(pipe_ctx->stream_res.opp->mpc_tree_params); @@ -101942,7 +125055,7 @@ index 7d3ff5d444023..2292bb82026e2 100644 .disable_dcc = DCC_ENABLE, .vsr_support = true, diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c -index dd38796ba30ad..7aad0340f7946 100644 +index dd38796ba30ad..67d83417ec337 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c @@ -193,7 +193,7 @@ static const struct dc_debug_options debug_defaults_drv = { @@ -101963,6 +125076,27 @@ index dd38796ba30ad..7aad0340f7946 100644 vpg_inst = eng_id; afmt_inst = eng_id; } else +@@ -1344,6 +1344,20 @@ void dcn303_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param + dcn3_03_soc.clock_limits[i].phyclk_d18_mhz = dcn3_03_soc.clock_limits[0].phyclk_d18_mhz; + dcn3_03_soc.clock_limits[i].dscclk_mhz = dcn3_03_soc.clock_limits[0].dscclk_mhz; + } ++ ++ // WA: patch strobe modes to compensate for DCN303 BW issue ++ if (dcn3_03_soc.num_chans <= 4) { ++ for (i = 0; i < dcn3_03_soc.num_states; i++) { ++ if (dcn3_03_soc.clock_limits[i].dram_speed_mts > 1700) ++ break; ++ ++ if (dcn3_03_soc.clock_limits[i].dram_speed_mts >= 1500) { ++ dcn3_03_soc.clock_limits[i].dcfclk_mhz = 100; ++ dcn3_03_soc.clock_limits[i].fabricclk_mhz = 100; ++ } ++ } ++ } ++ + /* re-init DML with updated bb */ + dml_init_instance(&dc->dml, &dcn3_03_soc, &dcn3_03_ip, DML_PROJECT_DCN30); + if (dc->current_state) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c index b0892443fbd57..c7c27a605f159 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c @@ -102337,6 +125471,18 @@ index 90c73a1cb9861..208d2dc8b1d1a 100644 + hubbub31->debug_test_index_pstate = 0x6; } +diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c +index 53b792b997b7e..127055044cf1a 100644 +--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c ++++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c +@@ -79,6 +79,7 @@ static struct hubp_funcs dcn31_hubp_funcs = { + .hubp_init = hubp3_init, + .set_unbounded_requesting = hubp31_set_unbounded_requesting, + .hubp_soft_reset = hubp31_soft_reset, ++ .hubp_set_flip_int = hubp1_set_flip_int, + .hubp_in_blank = hubp1_in_blank, + }; + diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c index 3afa1159a5f7d..b72d080b302a1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c @@ -106481,6 +129627,20 @@ index 7ec4331e67f26..a486769b66c6a 100644 typedef struct harvest_info_header { uint32_t signature; /* Table Signature */ uint32_t version; /* Table Version */ +diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h +index bac15c466733d..6e27c8b16391f 100644 +--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h ++++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h +@@ -341,7 +341,8 @@ struct amd_pm_funcs { + int (*get_power_profile_mode)(void *handle, char *buf); + int (*set_power_profile_mode)(void *handle, long *input, uint32_t size); + int (*set_fine_grain_clk_vol)(void *handle, uint32_t type, long *input, uint32_t size); +- int (*odn_edit_dpm_table)(void *handle, uint32_t type, long *input, uint32_t size); ++ int (*odn_edit_dpm_table)(void *handle, enum PP_OD_DPM_TABLE_COMMAND type, ++ long *input, uint32_t size); + int (*set_mp1_state)(void *handle, enum pp_mp1_state mp1_state); + int (*smu_i2c_bus_access)(void *handle, bool acquire); + int (*gfx_state_change_set)(void *handle, uint32_t state); diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 03581d5b18360..a68496b3f9296 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -106580,6 +129740,35 @@ index 98f1b3d8c1d59..16e3f72d31b9f 100644 }; #define R600_SSTU_DFLT 0 +diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +index 321215003643b..0f5930e797bd5 100644 +--- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c ++++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +@@ -924,7 +924,8 @@ static int pp_set_fine_grain_clk_vol(void *handle, uint32_t type, long *input, u + return hwmgr->hwmgr_func->set_fine_grain_clk_vol(hwmgr, type, input, size); + } + +-static int pp_odn_edit_dpm_table(void *handle, uint32_t type, long *input, uint32_t size) ++static int pp_odn_edit_dpm_table(void *handle, enum PP_OD_DPM_TABLE_COMMAND type, ++ long *input, uint32_t size) + { + struct pp_hwmgr *hwmgr = handle; + +diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c +index 67d7da0b6fed5..1d829402cd2e2 100644 +--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c ++++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c +@@ -75,8 +75,10 @@ int psm_init_power_state_table(struct pp_hwmgr *hwmgr) + for (i = 0; i < table_entries; i++) { + result = hwmgr->hwmgr_func->get_pp_table_entry(hwmgr, i, state); + if (result) { ++ kfree(hwmgr->current_ps); + kfree(hwmgr->request_ps); + kfree(hwmgr->ps); ++ hwmgr->current_ps = NULL; + hwmgr->request_ps = NULL; + hwmgr->ps = NULL; + return -EINVAL; diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c index 1de3ae77e03ed..cf74621f94a75 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c @@ -106911,6 +130100,45 @@ index c152a61ddd2c9..e6336654c5655 100644 size += sysfs_emit_at(buf, size, "%s %16s %s %s %s %s\n",title[0], title[1], title[2], title[3], title[4], title[5]); +diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c +index dad3e3741a4e8..190af79f3236f 100644 +--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c ++++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c +@@ -67,22 +67,21 @@ int vega10_fan_ctrl_get_fan_speed_info(struct pp_hwmgr *hwmgr, + int vega10_fan_ctrl_get_fan_speed_pwm(struct pp_hwmgr *hwmgr, + uint32_t *speed) + { +- uint32_t current_rpm; +- uint32_t percent = 0; +- +- if (hwmgr->thermal_controller.fanInfo.bNoFan) +- return 0; ++ struct amdgpu_device *adev = hwmgr->adev; ++ uint32_t duty100, duty; ++ uint64_t tmp64; + +- if (vega10_get_current_rpm(hwmgr, ¤t_rpm)) +- return -1; ++ duty100 = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL1), ++ CG_FDO_CTRL1, FMAX_DUTY100); ++ duty = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_THERMAL_STATUS), ++ CG_THERMAL_STATUS, FDO_PWM_DUTY); + +- if (hwmgr->thermal_controller. +- advanceFanControlParameters.usMaxFanRPM != 0) +- percent = current_rpm * 255 / +- hwmgr->thermal_controller. +- advanceFanControlParameters.usMaxFanRPM; ++ if (!duty100) ++ return -EINVAL; + +- *speed = MIN(percent, 255); ++ tmp64 = (uint64_t)duty * 255; ++ do_div(tmp64, duty100); ++ *speed = MIN((uint32_t)tmp64, 255); + + return 0; + } diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c index 8558718e15a8f..a2f4d6773d458 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c @@ -106961,10 +130189,20 @@ index 8558718e15a8f..a2f4d6773d458 100644 (clocks.data[i].clocks_in_khz / 1000 == now) ? "*" : ""); break; diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c -index 0cf39c1244b1c..85d55ab4e369f 100644 +index 0cf39c1244b1c..299b5c838bf70 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c -@@ -3238,6 +3238,8 @@ static int vega20_get_ppfeature_status(struct pp_hwmgr *hwmgr, char *buf) +@@ -2961,7 +2961,8 @@ static int vega20_odn_edit_dpm_table(struct pp_hwmgr *hwmgr, + data->od8_settings.od8_settings_array; + OverDriveTable_t *od_table = + &(data->smc_state_table.overdrive_table); +- int32_t input_index, input_clk, input_vol, i; ++ int32_t input_clk, input_vol, i; ++ uint32_t input_index; + int od8_id; + int ret; + +@@ -3238,6 +3239,8 @@ static int vega20_get_ppfeature_status(struct pp_hwmgr *hwmgr, char *buf) int ret = 0; int size = 0; @@ -106973,7 +130211,7 @@ index 0cf39c1244b1c..85d55ab4e369f 100644 ret = vega20_get_enabled_smc_features(hwmgr, &features_enabled); PP_ASSERT_WITH_CODE(!ret, "[EnableAllSmuFeatures] Failed to get enabled smc features!", -@@ -3372,13 +3374,13 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, +@@ -3372,13 +3375,13 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, return ret); if (vega20_get_sclks(hwmgr, &clocks)) { @@ -106989,7 +130227,7 @@ index 0cf39c1244b1c..85d55ab4e369f 100644 i, clocks.data[i].clocks_in_khz / 1000, (clocks.data[i].clocks_in_khz == now * 10) ? "*" : ""); break; -@@ -3390,13 +3392,13 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, +@@ -3390,13 +3393,13 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, return ret); if (vega20_get_memclocks(hwmgr, &clocks)) { @@ -107005,7 +130243,7 @@ index 0cf39c1244b1c..85d55ab4e369f 100644 i, clocks.data[i].clocks_in_khz / 1000, (clocks.data[i].clocks_in_khz == now * 10) ? "*" : ""); break; -@@ -3408,13 +3410,13 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, +@@ -3408,13 +3411,13 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, return ret); if (vega20_get_socclocks(hwmgr, &clocks)) { @@ -107021,7 +130259,7 @@ index 0cf39c1244b1c..85d55ab4e369f 100644 i, clocks.data[i].clocks_in_khz / 1000, (clocks.data[i].clocks_in_khz == now * 10) ? "*" : ""); break; -@@ -3426,7 +3428,7 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, +@@ -3426,7 +3429,7 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, return ret); for (i = 0; i < fclk_dpm_table->count; i++) @@ -107030,7 +130268,7 @@ index 0cf39c1244b1c..85d55ab4e369f 100644 i, fclk_dpm_table->dpm_levels[i].value, fclk_dpm_table->dpm_levels[i].value == (now / 100) ? "*" : ""); break; -@@ -3438,13 +3440,13 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, +@@ -3438,13 +3441,13 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, return ret); if (vega20_get_dcefclocks(hwmgr, &clocks)) { @@ -107046,7 +130284,7 @@ index 0cf39c1244b1c..85d55ab4e369f 100644 i, clocks.data[i].clocks_in_khz / 1000, (clocks.data[i].clocks_in_khz == now * 10) ? "*" : ""); break; -@@ -3458,7 +3460,7 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, +@@ -3458,7 +3461,7 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, gen_speed = pptable->PcieGenSpeed[i]; lane_width = pptable->PcieLaneCount[i]; @@ -107055,7 +130293,7 @@ index 0cf39c1244b1c..85d55ab4e369f 100644 (gen_speed == 0) ? "2.5GT/s," : (gen_speed == 1) ? "5.0GT/s," : (gen_speed == 2) ? "8.0GT/s," : -@@ -3479,18 +3481,18 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, +@@ -3479,18 +3482,18 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, case OD_SCLK: if (od8_settings[OD8_SETTING_GFXCLK_FMIN].feature_id && od8_settings[OD8_SETTING_GFXCLK_FMAX].feature_id) { @@ -107079,7 +130317,7 @@ index 0cf39c1244b1c..85d55ab4e369f 100644 od_table->UclkFmax); } -@@ -3503,14 +3505,14 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, +@@ -3503,14 +3506,14 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, od8_settings[OD8_SETTING_GFXCLK_VOLTAGE1].feature_id && od8_settings[OD8_SETTING_GFXCLK_VOLTAGE2].feature_id && od8_settings[OD8_SETTING_GFXCLK_VOLTAGE3].feature_id) { @@ -107098,7 +130336,7 @@ index 0cf39c1244b1c..85d55ab4e369f 100644 od_table->GfxclkFreq3, od_table->GfxclkVolt3 / VOLTAGE_SCALE); } -@@ -3518,17 +3520,17 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, +@@ -3518,17 +3521,17 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, break; case OD_RANGE: @@ -107119,7 +130357,7 @@ index 0cf39c1244b1c..85d55ab4e369f 100644 od8_settings[OD8_SETTING_UCLK_FMAX].min_value, od8_settings[OD8_SETTING_UCLK_FMAX].max_value); } -@@ -3539,22 +3541,22 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, +@@ -3539,22 +3542,22 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, od8_settings[OD8_SETTING_GFXCLK_VOLTAGE1].feature_id && od8_settings[OD8_SETTING_GFXCLK_VOLTAGE2].feature_id && od8_settings[OD8_SETTING_GFXCLK_VOLTAGE3].feature_id) { @@ -107148,7 +130386,7 @@ index 0cf39c1244b1c..85d55ab4e369f 100644 od8_settings[OD8_SETTING_GFXCLK_VOLTAGE3].min_value, od8_settings[OD8_SETTING_GFXCLK_VOLTAGE3].max_value); } -@@ -4003,6 +4005,8 @@ static int vega20_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) +@@ -4003,6 +4006,8 @@ static int vega20_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) if (!buf) return -EINVAL; @@ -107286,7 +130524,7 @@ index b1ad451af06bd..dfba0bc732073 100644 break; case SMU_PCIE: diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c -index ca57221e39629..c71d50e821682 100644 +index ca57221e39629..82a8c184526d1 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -338,7 +338,7 @@ sienna_cichlid_get_allowed_feature_mask(struct smu_context *smu, @@ -107298,7 +130536,7 @@ index ca57221e39629..c71d50e821682 100644 *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DS_LCLK_BIT); return 0; -@@ -358,6 +358,17 @@ static void sienna_cichlid_check_bxco_support(struct smu_context *smu) +@@ -358,6 +358,23 @@ static void sienna_cichlid_check_bxco_support(struct smu_context *smu) smu_baco->platform_support = (val & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK) ? true : false; @@ -107310,13 +130548,19 @@ index ca57221e39629..c71d50e821682 100644 + if (((adev->pdev->device == 0x73A1) && + (adev->pdev->revision == 0x00)) || + ((adev->pdev->device == 0x73BF) && -+ (adev->pdev->revision == 0xCF))) ++ (adev->pdev->revision == 0xCF)) || ++ ((adev->pdev->device == 0x7422) && ++ (adev->pdev->revision == 0x00)) || ++ ((adev->pdev->device == 0x73A3) && ++ (adev->pdev->revision == 0x00)) || ++ ((adev->pdev->device == 0x73E3) && ++ (adev->pdev->revision == 0x00))) + smu_baco->platform_support = false; + } } -@@ -418,6 +429,36 @@ static int sienna_cichlid_store_powerplay_table(struct smu_context *smu) +@@ -418,6 +435,36 @@ static int sienna_cichlid_store_powerplay_table(struct smu_context *smu) return 0; } @@ -107353,7 +130597,7 @@ index ca57221e39629..c71d50e821682 100644 static int sienna_cichlid_setup_pptable(struct smu_context *smu) { int ret = 0; -@@ -438,7 +479,7 @@ static int sienna_cichlid_setup_pptable(struct smu_context *smu) +@@ -438,7 +485,7 @@ static int sienna_cichlid_setup_pptable(struct smu_context *smu) if (ret) return ret; @@ -107362,7 +130606,7 @@ index ca57221e39629..c71d50e821682 100644 } static int sienna_cichlid_tables_init(struct smu_context *smu) -@@ -1278,21 +1319,37 @@ static int sienna_cichlid_populate_umd_state_clk(struct smu_context *smu) +@@ -1278,21 +1325,37 @@ static int sienna_cichlid_populate_umd_state_clk(struct smu_context *smu) &dpm_context->dpm_tables.soc_table; struct smu_umd_pstate_table *pstate_table = &smu->pstate_table; @@ -107405,7 +130649,7 @@ index ca57221e39629..c71d50e821682 100644 return 0; } -@@ -3728,14 +3785,14 @@ static ssize_t sienna_cichlid_get_gpu_metrics(struct smu_context *smu, +@@ -3728,14 +3791,14 @@ static ssize_t sienna_cichlid_get_gpu_metrics(struct smu_context *smu, static int sienna_cichlid_enable_mgpu_fan_boost(struct smu_context *smu) { @@ -107423,7 +130667,7 @@ index ca57221e39629..c71d50e821682 100644 return 0; return smu_cmn_send_smc_msg_with_param(smu, -@@ -3869,6 +3926,7 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = { +@@ -3869,6 +3932,7 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = { .dump_pptable = sienna_cichlid_dump_pptable, .init_microcode = smu_v11_0_init_microcode, .load_microcode = smu_v11_0_load_microcode, @@ -107451,7 +130695,7 @@ index 38cd0ece24f6b..42f705c7a36f8 100644 #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c -index 87b055466a33f..614c3d0495141 100644 +index 87b055466a33f..83fa3d20a1d57 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -772,7 +772,7 @@ int smu_v11_0_set_allowed_mask(struct smu_context *smu) @@ -107472,6 +130716,17 @@ index 87b055466a33f..614c3d0495141 100644 /* * To prevent from possible overheat, some ASICs may have requirement * for minimum fan speed: +@@ -1593,6 +1595,10 @@ bool smu_v11_0_baco_is_support(struct smu_context *smu) + if (amdgpu_sriov_vf(smu->adev) || !smu_baco->platform_support) + return false; + ++ /* return true if ASIC is in BACO state already */ ++ if (smu_v11_0_baco_get_state(smu) == SMU_BACO_STATE_ENTER) ++ return true; ++ + /* Arcturus does not support this bit mask */ + if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_BACO_BIT) && + !smu_cmn_feature_is_enabled(smu, SMU_FEATURE_BACO_BIT)) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index f6ef0ce6e9e2c..a9dceef4a7011 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -108111,7 +131366,7 @@ index 431b6e12a81fe..68ec45abc1fbf 100644 select DRM_PANEL_BRIDGE help diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511.h b/drivers/gpu/drm/bridge/adv7511/adv7511.h -index 05e3abb5a0c9a..aeeb09a27202e 100644 +index 05e3abb5a0c9a..fdd8e3d3232ec 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511.h +++ b/drivers/gpu/drm/bridge/adv7511/adv7511.h @@ -169,6 +169,7 @@ @@ -108134,6 +131389,16 @@ index 05e3abb5a0c9a..aeeb09a27202e 100644 ADV7511_CEC_CTRL_POWER_DOWN); return 0; } +@@ -397,7 +395,8 @@ static inline int adv7511_cec_init(struct device *dev, struct adv7511 *adv7511) + + void adv7533_dsi_power_on(struct adv7511 *adv); + void adv7533_dsi_power_off(struct adv7511 *adv); +-void adv7533_mode_set(struct adv7511 *adv, const struct drm_display_mode *mode); ++enum drm_mode_status adv7533_mode_valid(struct adv7511 *adv, ++ const struct drm_display_mode *mode); + int adv7533_patch_registers(struct adv7511 *adv); + int adv7533_patch_cec_registers(struct adv7511 *adv); + int adv7533_attach_dsi(struct adv7511 *adv); diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c b/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c index a20a45c0b353f..ddd1305b82b2c 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c @@ -108157,7 +131422,7 @@ index a20a45c0b353f..ddd1305b82b2c 100644 return ret == -EPROBE_DEFER ? ret : 0; } diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c -index 76555ae64e9ce..3dc551d223d66 100644 +index 76555ae64e9ce..44762116aac97 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -351,11 +351,17 @@ static void __adv7511_power_on(struct adv7511 *adv7511) @@ -108211,7 +131476,53 @@ index 76555ae64e9ce..3dc551d223d66 100644 } adv7511->status = status; -@@ -1048,6 +1063,10 @@ static int adv7511_init_cec_regmap(struct adv7511 *adv) +@@ -682,7 +697,7 @@ adv7511_detect(struct adv7511 *adv7511, struct drm_connector *connector) + } + + static enum drm_mode_status adv7511_mode_valid(struct adv7511 *adv7511, +- struct drm_display_mode *mode) ++ const struct drm_display_mode *mode) + { + if (mode->clock > 165000) + return MODE_CLOCK_HIGH; +@@ -776,9 +791,6 @@ static void adv7511_mode_set(struct adv7511 *adv7511, + regmap_update_bits(adv7511->regmap, 0x17, + 0x60, (vsync_polarity << 6) | (hsync_polarity << 5)); + +- if (adv7511->type == ADV7533 || adv7511->type == ADV7535) +- adv7533_mode_set(adv7511, adj_mode); +- + drm_mode_copy(&adv7511->curr_mode, adj_mode); + + /* +@@ -898,6 +910,18 @@ static void adv7511_bridge_mode_set(struct drm_bridge *bridge, + adv7511_mode_set(adv, mode, adj_mode); + } + ++static enum drm_mode_status adv7511_bridge_mode_valid(struct drm_bridge *bridge, ++ const struct drm_display_info *info, ++ const struct drm_display_mode *mode) ++{ ++ struct adv7511 *adv = bridge_to_adv7511(bridge); ++ ++ if (adv->type == ADV7533 || adv->type == ADV7535) ++ return adv7533_mode_valid(adv, mode); ++ else ++ return adv7511_mode_valid(adv, mode); ++} ++ + static int adv7511_bridge_attach(struct drm_bridge *bridge, + enum drm_bridge_attach_flags flags) + { +@@ -948,6 +972,7 @@ static const struct drm_bridge_funcs adv7511_bridge_funcs = { + .enable = adv7511_bridge_enable, + .disable = adv7511_bridge_disable, + .mode_set = adv7511_bridge_mode_set, ++ .mode_valid = adv7511_bridge_mode_valid, + .attach = adv7511_bridge_attach, + .detect = adv7511_bridge_detect, + .get_edid = adv7511_bridge_get_edid, +@@ -1048,6 +1073,10 @@ static int adv7511_init_cec_regmap(struct adv7511 *adv) ADV7511_CEC_I2C_ADDR_DEFAULT); if (IS_ERR(adv->i2c_cec)) return PTR_ERR(adv->i2c_cec); @@ -108222,7 +131533,7 @@ index 76555ae64e9ce..3dc551d223d66 100644 i2c_set_clientdata(adv->i2c_cec, adv); adv->regmap_cec = devm_regmap_init_i2c(adv->i2c_cec, -@@ -1252,9 +1271,6 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id) +@@ -1252,9 +1281,6 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id) if (ret) goto err_i2c_unregister_packet; @@ -108232,7 +131543,7 @@ index 76555ae64e9ce..3dc551d223d66 100644 INIT_WORK(&adv7511->hpd_work, adv7511_hpd_work); if (i2c->irq) { -@@ -1291,6 +1307,7 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id) +@@ -1291,6 +1317,7 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id) return 0; err_unregister_cec: @@ -108240,7 +131551,7 @@ index 76555ae64e9ce..3dc551d223d66 100644 i2c_unregister_device(adv7511->i2c_cec); clk_disable_unprepare(adv7511->cec_clk); err_i2c_unregister_packet: -@@ -1309,8 +1326,6 @@ static int adv7511_remove(struct i2c_client *i2c) +@@ -1309,8 +1336,6 @@ static int adv7511_remove(struct i2c_client *i2c) if (adv7511->type == ADV7533 || adv7511->type == ADV7535) adv7533_detach_dsi(adv7511); @@ -108249,7 +131560,7 @@ index 76555ae64e9ce..3dc551d223d66 100644 adv7511_uninit_regulators(adv7511); -@@ -1319,6 +1334,8 @@ static int adv7511_remove(struct i2c_client *i2c) +@@ -1319,6 +1344,8 @@ static int adv7511_remove(struct i2c_client *i2c) adv7511_audio_exit(adv7511); cec_unregister_adapter(adv7511->cec_adap); @@ -108258,21 +131569,21 @@ index 76555ae64e9ce..3dc551d223d66 100644 i2c_unregister_device(adv7511->i2c_packet); i2c_unregister_device(adv7511->i2c_edid); -@@ -1362,10 +1379,21 @@ static struct i2c_driver adv7511_driver = { +@@ -1362,10 +1389,21 @@ static struct i2c_driver adv7511_driver = { static int __init adv7511_init(void) { - if (IS_ENABLED(CONFIG_DRM_MIPI_DSI)) - mipi_dsi_driver_register(&adv7533_dsi_driver); + int ret; - -- return i2c_add_driver(&adv7511_driver); ++ + if (IS_ENABLED(CONFIG_DRM_MIPI_DSI)) { + ret = mipi_dsi_driver_register(&adv7533_dsi_driver); + if (ret) + return ret; + } -+ + +- return i2c_add_driver(&adv7511_driver); + ret = i2c_add_driver(&adv7511_driver); + if (ret) { + if (IS_ENABLED(CONFIG_DRM_MIPI_DSI)) @@ -108283,6 +131594,50 @@ index 76555ae64e9ce..3dc551d223d66 100644 } module_init(adv7511_init); +diff --git a/drivers/gpu/drm/bridge/adv7511/adv7533.c b/drivers/gpu/drm/bridge/adv7511/adv7533.c +index 59d718bde8c41..7eda12f338a1d 100644 +--- a/drivers/gpu/drm/bridge/adv7511/adv7533.c ++++ b/drivers/gpu/drm/bridge/adv7511/adv7533.c +@@ -100,26 +100,27 @@ void adv7533_dsi_power_off(struct adv7511 *adv) + regmap_write(adv->regmap_cec, 0x27, 0x0b); + } + +-void adv7533_mode_set(struct adv7511 *adv, const struct drm_display_mode *mode) ++enum drm_mode_status adv7533_mode_valid(struct adv7511 *adv, ++ const struct drm_display_mode *mode) + { ++ int lanes; + struct mipi_dsi_device *dsi = adv->dsi; +- int lanes, ret; +- +- if (adv->num_dsi_lanes != 4) +- return; + + if (mode->clock > 80000) + lanes = 4; + else + lanes = 3; + +- if (lanes != dsi->lanes) { +- mipi_dsi_detach(dsi); +- dsi->lanes = lanes; +- ret = mipi_dsi_attach(dsi); +- if (ret) +- dev_err(&dsi->dev, "failed to change host lanes\n"); +- } ++ /* ++ * TODO: add support for dynamic switching of lanes ++ * by using the bridge pre_enable() op . Till then filter ++ * out the modes which shall need different number of lanes ++ * than what was configured in the device tree. ++ */ ++ if (lanes != dsi->lanes) ++ return MODE_BAD; ++ ++ return MODE_OK; + } + + int adv7533_patch_registers(struct adv7511 *adv) diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c index b7d2e4449cfaa..f0305f833b6c0 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c @@ -108481,7 +131836,7 @@ index cab6c8b92efd4..6a4f20fccf841 100644 dev_warn(dp->dev, "Failed to apply PSR %d\n", ret); return ret; diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c -index 14d73fb1dd15b..392a9c56e9a00 100644 +index 14d73fb1dd15b..f895ef1939fa0 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.c +++ b/drivers/gpu/drm/bridge/analogix/anx7625.c @@ -720,7 +720,7 @@ static int edid_read(struct anx7625_data *ctx, @@ -108502,7 +131857,7 @@ index 14d73fb1dd15b..392a9c56e9a00 100644 } static int segments_edid_read(struct anx7625_data *ctx, -@@ -785,13 +785,14 @@ static int segments_edid_read(struct anx7625_data *ctx, +@@ -785,17 +785,18 @@ static int segments_edid_read(struct anx7625_data *ctx, if (cnt > EDID_TRY_CNT) return -EIO; @@ -108519,6 +131874,20 @@ index 14d73fb1dd15b..392a9c56e9a00 100644 int count, blocks_num; u8 pblock_buf[MAX_DPCD_BUFFER_SIZE]; u8 i, j; +- u8 g_edid_break = 0; ++ int g_edid_break = 0; + int ret; + struct device *dev = &ctx->client->dev; + +@@ -826,7 +827,7 @@ static int sp_tx_edid_read(struct anx7625_data *ctx, + g_edid_break = edid_read(ctx, offset, + pblock_buf); + +- if (g_edid_break) ++ if (g_edid_break < 0) + break; + + memcpy(&pedid_blocks_buf[offset], @@ -887,7 +888,11 @@ static int sp_tx_edid_read(struct anx7625_data *ctx, } @@ -109414,7 +132783,7 @@ index feb04f127b550..f50b47ac11a82 100644 } diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c -index f08d0fded61f7..d3129a3e6ab76 100644 +index f08d0fded61f7..8bb403bc712a4 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -757,6 +757,14 @@ static void hdmi_enable_audio_clk(struct dw_hdmi *hdmi, bool enable) @@ -109444,6 +132813,26 @@ index f08d0fded61f7..d3129a3e6ab76 100644 *num_output_fmts = 1; output_fmts[0] = MEDIA_BUS_FMT_FIXED; +@@ -2585,6 +2594,9 @@ static u32 *dw_hdmi_bridge_atomic_get_output_bus_fmts(struct drm_bridge *bridge, + * if supported. In any case the default RGB888 format is added + */ + ++ /* Default 8bit RGB fallback */ ++ output_fmts[i++] = MEDIA_BUS_FMT_RGB888_1X24; ++ + if (max_bpc >= 16 && info->bpc == 16) { + if (info->color_formats & DRM_COLOR_FORMAT_YCRCB444) + output_fmts[i++] = MEDIA_BUS_FMT_YUV16_1X48; +@@ -2618,9 +2630,6 @@ static u32 *dw_hdmi_bridge_atomic_get_output_bus_fmts(struct drm_bridge *bridge, + if (info->color_formats & DRM_COLOR_FORMAT_YCRCB444) + output_fmts[i++] = MEDIA_BUS_FMT_YUV8_1X24; + +- /* Default 8bit RGB fallback */ +- output_fmts[i++] = MEDIA_BUS_FMT_RGB888_1X24; +- + *num_output_fmts = i; + + return output_fmts; @@ -2961,6 +2970,7 @@ static irqreturn_t dw_hdmi_irq(int irq, void *dev_id) { struct dw_hdmi *hdmi = dev_id; @@ -109629,7 +133018,7 @@ index a32f70bc68ea4..c901c0e1a3b04 100644 static int sn65dsi83_remove(struct i2c_client *client) diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c -index 41d48a393e7f5..45a5f1e48f0ef 100644 +index 41d48a393e7f5..bbedce0eeddae 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -188,6 +188,7 @@ static const struct regmap_config ti_sn65dsi86_regmap_config = { @@ -109640,6 +133029,18 @@ index 41d48a393e7f5..45a5f1e48f0ef 100644 }; static void ti_sn65dsi86_write_u16(struct ti_sn65dsi86 *pdata, +@@ -919,9 +920,9 @@ static void ti_sn_bridge_set_video_timings(struct ti_sn65dsi86 *pdata) + &pdata->bridge.encoder->crtc->state->adjusted_mode; + u8 hsync_polarity = 0, vsync_polarity = 0; + +- if (mode->flags & DRM_MODE_FLAG_PHSYNC) ++ if (mode->flags & DRM_MODE_FLAG_NHSYNC) + hsync_polarity = CHA_HSYNC_POLARITY; +- if (mode->flags & DRM_MODE_FLAG_PVSYNC) ++ if (mode->flags & DRM_MODE_FLAG_NVSYNC) + vsync_polarity = CHA_VSYNC_POLARITY; + + ti_sn65dsi86_write_u16(pdata, SN_CHA_ACTIVE_LINE_LENGTH_LOW_REG, @@ -1472,6 +1473,7 @@ static inline void ti_sn_gpio_unregister(void) {} static void ti_sn65dsi86_runtime_disable(void *data) @@ -109855,10 +133256,20 @@ index 791379816837d..4f20137ef21d5 100644 connector->polled = DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT; diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c -index 2ba257b1ae208..e9b7926d9b663 100644 +index 2ba257b1ae208..cfe163103cfd7 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c -@@ -2233,6 +2233,9 @@ EXPORT_SYMBOL(drm_connector_atomic_hdr_metadata_equal); +@@ -487,6 +487,9 @@ void drm_connector_cleanup(struct drm_connector *connector) + mutex_destroy(&connector->mutex); + + memset(connector, 0, sizeof(*connector)); ++ ++ if (dev->registered) ++ drm_sysfs_hotplug_event(dev); + } + EXPORT_SYMBOL(drm_connector_cleanup); + +@@ -2233,6 +2236,9 @@ EXPORT_SYMBOL(drm_connector_atomic_hdr_metadata_equal); void drm_connector_set_vrr_capable_property( struct drm_connector *connector, bool capable) { @@ -109897,6 +133308,105 @@ index 298ea7a495913..f7c03ad5a15a5 100644 static void of_dp_aux_depopulate_ep_devices_void(void *data) { +diff --git a/drivers/gpu/drm/drm_dp_dual_mode_helper.c b/drivers/gpu/drm/drm_dp_dual_mode_helper.c +index 9faf49354cabd..cb52a00ae1b11 100644 +--- a/drivers/gpu/drm/drm_dp_dual_mode_helper.c ++++ b/drivers/gpu/drm/drm_dp_dual_mode_helper.c +@@ -63,23 +63,45 @@ + ssize_t drm_dp_dual_mode_read(struct i2c_adapter *adapter, + u8 offset, void *buffer, size_t size) + { ++ u8 zero = 0; ++ char *tmpbuf = NULL; ++ /* ++ * As sub-addressing is not supported by all adaptors, ++ * always explicitly read from the start and discard ++ * any bytes that come before the requested offset. ++ * This way, no matter whether the adaptor supports it ++ * or not, we'll end up reading the proper data. ++ */ + struct i2c_msg msgs[] = { + { + .addr = DP_DUAL_MODE_SLAVE_ADDRESS, + .flags = 0, + .len = 1, +- .buf = &offset, ++ .buf = &zero, + }, + { + .addr = DP_DUAL_MODE_SLAVE_ADDRESS, + .flags = I2C_M_RD, +- .len = size, ++ .len = size + offset, + .buf = buffer, + }, + }; + int ret; + ++ if (offset) { ++ tmpbuf = kmalloc(size + offset, GFP_KERNEL); ++ if (!tmpbuf) ++ return -ENOMEM; ++ ++ msgs[1].buf = tmpbuf; ++ } ++ + ret = i2c_transfer(adapter, msgs, ARRAY_SIZE(msgs)); ++ if (tmpbuf) ++ memcpy(buffer, tmpbuf + offset, size); ++ ++ kfree(tmpbuf); ++ + if (ret < 0) + return ret; + if (ret != ARRAY_SIZE(msgs)) +@@ -208,18 +230,6 @@ enum drm_dp_dual_mode_type drm_dp_dual_mode_detect(const struct drm_device *dev, + if (ret) + return DRM_DP_DUAL_MODE_UNKNOWN; + +- /* +- * Sigh. Some (maybe all?) type 1 adaptors are broken and ack +- * the offset but ignore it, and instead they just always return +- * data from the start of the HDMI ID buffer. So for a broken +- * type 1 HDMI adaptor a single byte read will always give us +- * 0x44, and for a type 1 DVI adaptor it should give 0x00 +- * (assuming it implements any registers). Fortunately neither +- * of those values will match the type 2 signature of the +- * DP_DUAL_MODE_ADAPTOR_ID register so we can proceed with +- * the type 2 adaptor detection safely even in the presence +- * of broken type 1 adaptors. +- */ + ret = drm_dp_dual_mode_read(adapter, DP_DUAL_MODE_ADAPTOR_ID, + &adaptor_id, sizeof(adaptor_id)); + drm_dbg_kms(dev, "DP dual mode adaptor ID: %02x (err %zd)\n", adaptor_id, ret); +@@ -233,11 +243,10 @@ enum drm_dp_dual_mode_type drm_dp_dual_mode_detect(const struct drm_device *dev, + return DRM_DP_DUAL_MODE_TYPE2_DVI; + } + /* +- * If neither a proper type 1 ID nor a broken type 1 adaptor +- * as described above, assume type 1, but let the user know +- * that we may have misdetected the type. ++ * If not a proper type 1 ID, still assume type 1, but let ++ * the user know that we may have misdetected the type. + */ +- if (!is_type1_adaptor(adaptor_id) && adaptor_id != hdmi_id[0]) ++ if (!is_type1_adaptor(adaptor_id)) + drm_err(dev, "Unexpected DP dual mode adaptor ID %02x\n", adaptor_id); + + } +@@ -343,10 +352,8 @@ EXPORT_SYMBOL(drm_dp_dual_mode_get_tmds_output); + * @enable: enable (as opposed to disable) the TMDS output buffers + * + * Set the state of the TMDS output buffers in the adaptor. For +- * type2 this is set via the DP_DUAL_MODE_TMDS_OEN register. As +- * some type 1 adaptors have problems with registers (see comments +- * in drm_dp_dual_mode_detect()) we avoid touching the register, +- * making this function a no-op on type 1 adaptors. ++ * type2 this is set via the DP_DUAL_MODE_TMDS_OEN register. ++ * Type1 adaptors do not support any register writes. + * + * Returns: + * 0 on success, negative error code on failure diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c index 6d0f2c447f3b9..b8815e7f5832e 100644 --- a/drivers/gpu/drm/drm_dp_helper.c @@ -109977,7 +133487,7 @@ index 6d0f2c447f3b9..b8815e7f5832e 100644 ret = drm_edp_backlight_set_level(aux, bl, level); diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c -index 86d13d6bc4631..9bf9430209b0f 100644 +index 86d13d6bc4631..0d915fe8b6e43 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -3860,9 +3860,7 @@ int drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr, @@ -110034,8 +133544,17 @@ index 86d13d6bc4631..9bf9430209b0f 100644 seq_printf(m, "branch oui read failed\n"); goto out; } +@@ -5287,7 +5285,7 @@ int drm_dp_mst_add_affected_dsc_crtcs(struct drm_atomic_state *state, struct drm + mst_state = drm_atomic_get_mst_topology_state(state, mgr); + + if (IS_ERR(mst_state)) +- return -EINVAL; ++ return PTR_ERR(mst_state); + + list_for_each_entry(pos, &mst_state->vcpis, next) { + diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c -index 7a5097467ba5c..b3a1636d1b984 100644 +index 7a5097467ba5c..6f1791613757b 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -581,6 +581,7 @@ static int drm_dev_init(struct drm_device *dev, @@ -110046,7 +133565,12 @@ index 7a5097467ba5c..b3a1636d1b984 100644 int ret; if (!drm_core_init_complete) { -@@ -617,13 +618,15 @@ static int drm_dev_init(struct drm_device *dev, +@@ -613,17 +614,19 @@ static int drm_dev_init(struct drm_device *dev, + mutex_init(&dev->clientlist_mutex); + mutex_init(&dev->master_mutex); + +- ret = drmm_add_action(dev, drm_dev_init_release, NULL); ++ ret = drmm_add_action_or_reset(dev, drm_dev_init_release, NULL); if (ret) return ret; @@ -110066,7 +133590,7 @@ index 7a5097467ba5c..b3a1636d1b984 100644 ret = drm_minor_alloc(dev, DRM_MINOR_RENDER); if (ret) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c -index ea9a79bc95839..6ab048ba8021c 100644 +index ea9a79bc95839..d940c76419c5a 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -1994,9 +1994,6 @@ struct edid *drm_do_get_edid(struct drm_connector *connector, @@ -110099,7 +133623,17 @@ index ea9a79bc95839..6ab048ba8021c 100644 if (has_audio) { DRM_DEBUG_KMS("Monitor has basic audio support\n"); -@@ -5003,21 +5004,21 @@ static void drm_parse_hdmi_deep_color_info(struct drm_connector *connector, +@@ -4941,7 +4942,8 @@ static void drm_parse_hdmi_forum_vsdb(struct drm_connector *connector, + else if (hf_vsdb[11] & DRM_EDID_DSC_10BPC) + hdmi_dsc->bpc_supported = 10; + else +- hdmi_dsc->bpc_supported = 0; ++ /* Supports min 8 BPC if DSC 1.2 is supported*/ ++ hdmi_dsc->bpc_supported = 8; + + dsc_max_frl_rate = (hf_vsdb[12] & DRM_EDID_DSC_MAX_FRL_RATE_MASK) >> 4; + drm_get_max_frl_rate(dsc_max_frl_rate, &hdmi_dsc->max_lanes, +@@ -5003,21 +5005,21 @@ static void drm_parse_hdmi_deep_color_info(struct drm_connector *connector, if (hdmi[6] & DRM_EDID_HDMI_DC_30) { dc_bpc = 10; @@ -110124,7 +133658,7 @@ index ea9a79bc95839..6ab048ba8021c 100644 DRM_DEBUG("%s: HDMI sink does deep color 48.\n", connector->name); } -@@ -5032,16 +5033,9 @@ static void drm_parse_hdmi_deep_color_info(struct drm_connector *connector, +@@ -5032,16 +5034,9 @@ static void drm_parse_hdmi_deep_color_info(struct drm_connector *connector, connector->name, dc_bpc); info->bpc = dc_bpc; @@ -110142,7 +133676,7 @@ index ea9a79bc95839..6ab048ba8021c 100644 DRM_DEBUG("%s: HDMI sink does YCRCB444 in deep color.\n", connector->name); } -@@ -5205,6 +5199,7 @@ u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edi +@@ -5205,6 +5200,7 @@ u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edi if (!(edid->input & DRM_EDID_INPUT_DIGITAL)) return quirks; @@ -110150,7 +133684,7 @@ index ea9a79bc95839..6ab048ba8021c 100644 drm_parse_cea_ext(connector, edid); /* -@@ -5253,7 +5248,6 @@ u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edi +@@ -5253,7 +5249,6 @@ u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edi DRM_DEBUG("%s: Assigning EDID-1.4 digital sink color depth as %d bpc.\n", connector->name, info->bpc); @@ -110210,6 +133744,30 @@ index 8e7a124d6c5a3..ed589e7182bb4 100644 /* * Shamelessly leak the physical address to user-space. As +diff --git a/drivers/gpu/drm/drm_fourcc.c b/drivers/gpu/drm/drm_fourcc.c +index eda832f9200db..32ee023aed266 100644 +--- a/drivers/gpu/drm/drm_fourcc.c ++++ b/drivers/gpu/drm/drm_fourcc.c +@@ -260,12 +260,15 @@ const struct drm_format_info *__drm_format_info(u32 format) + .vsub = 2, .is_yuv = true }, + { .format = DRM_FORMAT_Q410, .depth = 0, + .num_planes = 3, .char_per_block = { 2, 2, 2 }, +- .block_w = { 1, 1, 1 }, .block_h = { 1, 1, 1 }, .hsub = 0, +- .vsub = 0, .is_yuv = true }, ++ .block_w = { 1, 1, 1 }, .block_h = { 1, 1, 1 }, .hsub = 1, ++ .vsub = 1, .is_yuv = true }, + { .format = DRM_FORMAT_Q401, .depth = 0, + .num_planes = 3, .char_per_block = { 2, 2, 2 }, +- .block_w = { 1, 1, 1 }, .block_h = { 1, 1, 1 }, .hsub = 0, +- .vsub = 0, .is_yuv = true }, ++ .block_w = { 1, 1, 1 }, .block_h = { 1, 1, 1 }, .hsub = 1, ++ .vsub = 1, .is_yuv = true }, ++ { .format = DRM_FORMAT_P030, .depth = 0, .num_planes = 2, ++ .char_per_block = { 4, 8, 0 }, .block_w = { 3, 3, 0 }, .block_h = { 1, 1, 0 }, ++ .hsub = 2, .vsub = 2, .is_yuv = true}, + }; + + unsigned int i; diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 09c8200458594..dbd19a34b517b 100644 --- a/drivers/gpu/drm/drm_gem.c @@ -110292,7 +133850,7 @@ index d53388199f34c..6533efa840204 100644 cma_obj = to_drm_gem_cma_obj(obj); diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c -index a61946374c826..a30ffc07470cc 100644 +index a61946374c826..15c3849e995bd 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -22,17 +22,22 @@ @@ -110514,7 +134072,31 @@ index a61946374c826..a30ffc07470cc 100644 mutex_unlock(&shmem->pages_lock); return true; -@@ -581,19 +568,18 @@ static const struct vm_operations_struct drm_gem_shmem_vm_ops = { +@@ -554,12 +541,20 @@ static void drm_gem_shmem_vm_open(struct vm_area_struct *vma) + { + struct drm_gem_object *obj = vma->vm_private_data; + struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); +- int ret; + + WARN_ON(shmem->base.import_attach); + +- ret = drm_gem_shmem_get_pages(shmem); +- WARN_ON_ONCE(ret != 0); ++ mutex_lock(&shmem->pages_lock); ++ ++ /* ++ * We should have already pinned the pages when the buffer was first ++ * mmap'd, vm_open() just grabs an additional reference for the new ++ * mm the vma is getting copied into (ie. on fork()). ++ */ ++ if (!WARN_ON_ONCE(!shmem->pages_use_count)) ++ shmem->pages_use_count++; ++ ++ mutex_unlock(&shmem->pages_lock); + + drm_gem_vm_open(vma); + } +@@ -581,19 +576,18 @@ static const struct vm_operations_struct drm_gem_shmem_vm_ops = { /** * drm_gem_shmem_mmap - Memory-map a shmem GEM object @@ -110538,16 +134120,22 @@ index a61946374c826..a30ffc07470cc 100644 int ret; if (obj->import_attach) { -@@ -604,8 +590,6 @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +@@ -604,13 +598,9 @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) return dma_buf_mmap(obj->dma_buf, vma, 0); } - shmem = to_drm_gem_shmem_obj(obj); - ret = drm_gem_shmem_get_pages(shmem); - if (ret) { - drm_gem_vm_close(vma); -@@ -624,17 +608,13 @@ EXPORT_SYMBOL_GPL(drm_gem_shmem_mmap); +- if (ret) { +- drm_gem_vm_close(vma); ++ if (ret) + return ret; +- } + + vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND; + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); +@@ -624,17 +614,13 @@ EXPORT_SYMBOL_GPL(drm_gem_shmem_mmap); /** * drm_gem_shmem_print_info() - Print &drm_gem_shmem_object info for debugfs @@ -110568,7 +134156,7 @@ index a61946374c826..a30ffc07470cc 100644 drm_printf_indent(p, indent, "pages_use_count=%u\n", shmem->pages_use_count); drm_printf_indent(p, indent, "vmap_use_count=%u\n", shmem->vmap_use_count); drm_printf_indent(p, indent, "vaddr=%p\n", shmem->vaddr); -@@ -644,12 +624,10 @@ EXPORT_SYMBOL(drm_gem_shmem_print_info); +@@ -644,12 +630,10 @@ EXPORT_SYMBOL(drm_gem_shmem_print_info); /** * drm_gem_shmem_get_sg_table - Provide a scatter/gather table of pinned * pages for a shmem GEM object @@ -110583,7 +134171,7 @@ index a61946374c826..a30ffc07470cc 100644 * * Drivers who need to acquire an scatter/gather table for objects need to call * drm_gem_shmem_get_pages_sgt() instead. -@@ -657,9 +635,9 @@ EXPORT_SYMBOL(drm_gem_shmem_print_info); +@@ -657,9 +641,9 @@ EXPORT_SYMBOL(drm_gem_shmem_print_info); * Returns: * A pointer to the scatter/gather table of pinned pages or NULL on failure. */ @@ -110595,7 +134183,7 @@ index a61946374c826..a30ffc07470cc 100644 WARN_ON(shmem->base.import_attach); -@@ -670,7 +648,7 @@ EXPORT_SYMBOL_GPL(drm_gem_shmem_get_sg_table); +@@ -670,7 +654,7 @@ EXPORT_SYMBOL_GPL(drm_gem_shmem_get_sg_table); /** * drm_gem_shmem_get_pages_sgt - Pin pages, dma map them, and return a * scatter/gather table for a shmem GEM object. @@ -110604,7 +134192,7 @@ index a61946374c826..a30ffc07470cc 100644 * * This function returns a scatter/gather table suitable for driver usage. If * the sg table doesn't exist, the pages are pinned, dma-mapped, and a sg -@@ -683,10 +661,10 @@ EXPORT_SYMBOL_GPL(drm_gem_shmem_get_sg_table); +@@ -683,10 +667,10 @@ EXPORT_SYMBOL_GPL(drm_gem_shmem_get_sg_table); * Returns: * A pointer to the scatter/gather table of pinned pages or errno on failure. */ @@ -110617,7 +134205,7 @@ index a61946374c826..a30ffc07470cc 100644 struct sg_table *sgt; if (shmem->sgt) -@@ -698,7 +676,7 @@ struct sg_table *drm_gem_shmem_get_pages_sgt(struct drm_gem_object *obj) +@@ -698,7 +682,7 @@ struct sg_table *drm_gem_shmem_get_pages_sgt(struct drm_gem_object *obj) if (ret) return ERR_PTR(ret); @@ -110656,7 +134244,7 @@ index ecf3d2a54a98c..759c65bfd2845 100644 EXPORT_SYMBOL(drm_gem_ttm_vunmap); diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h -index 17f3548c8ed25..d05e6a5b66873 100644 +index 17f3548c8ed25..f97a0875b9a12 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -74,8 +74,8 @@ int drm_prime_fd_to_handle_ioctl(struct drm_device *dev, void *data, @@ -110670,6 +134258,16 @@ index 17f3548c8ed25..d05e6a5b66873 100644 /* drm_drv.c */ struct drm_minor *drm_minor_acquire(unsigned int minor_id); +@@ -104,7 +104,8 @@ static inline void drm_vblank_flush_worker(struct drm_vblank_crtc *vblank) + + static inline void drm_vblank_destroy_worker(struct drm_vblank_crtc *vblank) + { +- kthread_destroy_worker(vblank->worker); ++ if (vblank->worker) ++ kthread_destroy_worker(vblank->worker); + } + + int drm_vblank_worker_init(struct drm_vblank_crtc *vblank); diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index be4a52dc4d6fa..fb5e6f86dea20 100644 --- a/drivers/gpu/drm/drm_ioctl.c @@ -110734,7 +134332,7 @@ index 997b8827fed27..37c34146eea83 100644 */ int drm_of_find_panel_or_bridge(const struct device_node *np, diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c -index e1b2ce4921ae7..083273736c837 100644 +index e1b2ce4921ae7..ce739ba45c551 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -109,6 +109,18 @@ static const struct drm_dmi_panel_orientation_data lcd1200x1920_rightside_up = { @@ -110756,10 +134354,16 @@ index e1b2ce4921ae7..083273736c837 100644 static const struct dmi_system_id orientation_data[] = { { /* Acer One 10 (S1003) */ .matches = { -@@ -116,6 +128,12 @@ static const struct dmi_system_id orientation_data[] = { +@@ -116,6 +128,18 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "One S1003"), }, .driver_data = (void *)&lcd800x1280_rightside_up, ++ }, { /* Acer Switch V 10 (SW5-017) */ ++ .matches = { ++ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Acer"), ++ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "SW5-017"), ++ }, ++ .driver_data = (void *)&lcd800x1280_rightside_up, + }, { /* Anbernic Win600 */ + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Anbernic"), @@ -110769,7 +134373,7 @@ index e1b2ce4921ae7..083273736c837 100644 }, { /* Asus T100HA */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), -@@ -154,6 +172,12 @@ static const struct dmi_system_id orientation_data[] = { +@@ -154,6 +178,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "MicroPC"), }, .driver_data = (void *)&lcd720x1280_rightside_up, @@ -110782,7 +134386,7 @@ index e1b2ce4921ae7..083273736c837 100644 }, { /* * GPD Pocket, note that the the DMI data is less generic then * it seems, devices with a board-vendor of "AMI Corporation" -@@ -205,6 +229,13 @@ static const struct dmi_system_id orientation_data[] = { +@@ -205,6 +235,13 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_BOARD_NAME, "TW891"), }, .driver_data = (void *)&itworks_tw891, @@ -110796,7 +134400,7 @@ index e1b2ce4921ae7..083273736c837 100644 }, { /* * Lenovo Ideapad Miix 310 laptop, only some production batches * have a portrait screen, the resolution checks makes the quirk -@@ -223,13 +254,24 @@ static const struct dmi_system_id orientation_data[] = { +@@ -223,13 +260,30 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Lenovo MIIX 320-10ICR"), }, .driver_data = (void *)&lcd800x1280_rightside_up, @@ -110814,6 +134418,12 @@ index e1b2ce4921ae7..083273736c837 100644 DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad D330-10IGM"), }, .driver_data = (void *)&lcd1200x1920_rightside_up, ++ }, { /* Lenovo Ideapad D330-10IGL (HD) */ ++ .matches = { ++ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"), ++ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad D330-10IGL"), ++ }, ++ .driver_data = (void *)&lcd800x1280_rightside_up, + }, { /* Lenovo Yoga Book X90F / X91F / X91L */ + .matches = { + /* Non exact match to match all versions */ @@ -110823,7 +134433,7 @@ index e1b2ce4921ae7..083273736c837 100644 }, { /* OneGX1 Pro */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "SYSTEM_MANUFACTURER"), -@@ -237,6 +279,25 @@ static const struct dmi_system_id orientation_data[] = { +@@ -237,6 +291,25 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Default string"), }, .driver_data = (void *)&onegx1_pro, @@ -111081,6 +134691,35 @@ index 486259e154aff..90488ab8c6d8e 100644 /* * Copy the command submission and bo array to kernel space in * one go, and do this outside of any locks. +diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +index cc5b07f863463..e8ff70be449ac 100644 +--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c ++++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +@@ -416,6 +416,12 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu) + if (gpu->identity.model == chipModel_GC700) + gpu->identity.features &= ~chipFeatures_FAST_CLEAR; + ++ /* These models/revisions don't have the 2D pipe bit */ ++ if ((gpu->identity.model == chipModel_GC500 && ++ gpu->identity.revision <= 2) || ++ gpu->identity.model == chipModel_GC300) ++ gpu->identity.features |= chipFeatures_PIPE_2D; ++ + if ((gpu->identity.model == chipModel_GC500 && + gpu->identity.revision < 2) || + (gpu->identity.model == chipModel_GC300 && +@@ -449,8 +455,9 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu) + gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_5); + } + +- /* GC600 idle register reports zero bits where modules aren't present */ +- if (gpu->identity.model == chipModel_GC600) ++ /* GC600/300 idle register reports zero bits where modules aren't present */ ++ if (gpu->identity.model == chipModel_GC600 || ++ gpu->identity.model == chipModel_GC300) + gpu->idle_mask = VIVS_HI_IDLE_STATE_TX | + VIVS_HI_IDLE_STATE_RA | + VIVS_HI_IDLE_STATE_SE | diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h index 1c75c8ed5bcea..85eddd492774d 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h @@ -111094,9 +134733,22 @@ index 1c75c8ed5bcea..85eddd492774d 100644 void __iomem *mmio; int irq; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c -index 9fb1a2aadbcb0..aabb997a74eb4 100644 +index 9fb1a2aadbcb0..2de806173b3aa 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c +@@ -80,10 +80,10 @@ static int etnaviv_iommu_map(struct etnaviv_iommu_context *context, u32 iova, + return -EINVAL; + + for_each_sgtable_dma_sg(sgt, sg, i) { +- u32 pa = sg_dma_address(sg) - sg->offset; ++ phys_addr_t pa = sg_dma_address(sg) - sg->offset; + size_t bytes = sg_dma_len(sg) + sg->offset; + +- VERB("map[%d]: %08x %08x(%zx)", i, iova, pa, bytes); ++ VERB("map[%d]: %08x %pap(%zx)", i, iova, &pa, bytes); + + ret = etnaviv_context_map(context, da, pa, bytes, prot); + if (ret) @@ -286,6 +286,12 @@ void etnaviv_iommu_unmap_gem(struct etnaviv_iommu_context *context, mutex_lock(&context->lock); @@ -111175,6 +134827,22 @@ index 12571ac455404..12989a47eb66e 100644 } #endif +diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c +index 4d4a715b429d1..2c2b92324a2e9 100644 +--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c ++++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c +@@ -60,8 +60,9 @@ static int fsl_dcu_drm_connector_get_modes(struct drm_connector *connector) + return drm_panel_get_modes(fsl_connector->panel, connector); + } + +-static int fsl_dcu_drm_connector_mode_valid(struct drm_connector *connector, +- struct drm_display_mode *mode) ++static enum drm_mode_status ++fsl_dcu_drm_connector_mode_valid(struct drm_connector *connector, ++ struct drm_display_mode *mode) + { + if (mode->hdisplay & 0xf) + return MODE_ERROR; diff --git a/drivers/gpu/drm/gma500/gma_display.c b/drivers/gpu/drm/gma500/gma_display.c index b03f7b8241f2b..7162f4c946afe 100644 --- a/drivers/gpu/drm/gma500/gma_display.c @@ -113238,7 +136906,7 @@ index 0000000000000..282020cb47d5b + +#endif /* __INTEL_BACKLIGHT_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c -index fd71346aac7bc..32d5a556b7eac 100644 +index fd71346aac7bc..aa0b936075597 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -1692,6 +1692,39 @@ static u8 map_ddc_pin(struct drm_i915_private *i915, u8 vbt_pin) @@ -113281,7 +136949,30 @@ index fd71346aac7bc..32d5a556b7eac 100644 static enum port __dvo_port_to_port(int n_ports, int n_dvo, const int port_mapping[][3], u8 dvo_port) { -@@ -2622,35 +2655,17 @@ bool intel_bios_is_port_edp(struct drm_i915_private *i915, enum port port) +@@ -1787,6 +1820,22 @@ static enum port dvo_port_to_port(struct drm_i915_private *i915, + dvo_port); + } + ++static enum port ++dsi_dvo_port_to_port(struct drm_i915_private *i915, u8 dvo_port) ++{ ++ switch (dvo_port) { ++ case DVO_PORT_MIPIA: ++ return PORT_A; ++ case DVO_PORT_MIPIC: ++ if (DISPLAY_VER(i915) >= 11) ++ return PORT_B; ++ else ++ return PORT_C; ++ default: ++ return PORT_NONE; ++ } ++} ++ + static int parse_bdb_230_dp_max_link_rate(const int vbt_max_link_rate) + { + switch (vbt_max_link_rate) { +@@ -2622,35 +2671,17 @@ bool intel_bios_is_port_edp(struct drm_i915_private *i915, enum port port) return false; } @@ -113320,7 +137011,7 @@ index fd71346aac7bc..32d5a556b7eac 100644 child->aux_channel != 0) return true; -@@ -2660,10 +2675,36 @@ static bool child_dev_is_dp_dual_mode(const struct child_device_config *child, +@@ -2660,10 +2691,36 @@ static bool child_dev_is_dp_dual_mode(const struct child_device_config *child, bool intel_bios_is_port_dp_dual_mode(struct drm_i915_private *i915, enum port port) { @@ -113358,6 +137049,41 @@ index fd71346aac7bc..32d5a556b7eac 100644 return true; } +@@ -2692,19 +2749,16 @@ bool intel_bios_is_dsi_present(struct drm_i915_private *i915, + + dvo_port = child->dvo_port; + +- if (dvo_port == DVO_PORT_MIPIA || +- (dvo_port == DVO_PORT_MIPIB && DISPLAY_VER(i915) >= 11) || +- (dvo_port == DVO_PORT_MIPIC && DISPLAY_VER(i915) < 11)) { +- if (port) +- *port = dvo_port - DVO_PORT_MIPIA; +- return true; +- } else if (dvo_port == DVO_PORT_MIPIB || +- dvo_port == DVO_PORT_MIPIC || +- dvo_port == DVO_PORT_MIPID) { ++ if (dsi_dvo_port_to_port(i915, dvo_port) == PORT_NONE) { + drm_dbg_kms(&i915->drm, + "VBT has unsupported DSI port %c\n", + port_name(dvo_port - DVO_PORT_MIPIA)); ++ continue; + } ++ ++ if (port) ++ *port = dsi_dvo_port_to_port(i915, dvo_port); ++ return true; + } + + return false; +@@ -2789,7 +2843,7 @@ bool intel_bios_get_dsc_params(struct intel_encoder *encoder, + if (!(child->device_type & DEVICE_TYPE_MIPI_OUTPUT)) + continue; + +- if (child->dvo_port - DVO_PORT_MIPIA == encoder->port) { ++ if (dsi_dvo_port_to_port(i915, child->dvo_port) == encoder->port) { + if (!devdata->dsc) + return false; + diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index 4b94256d73197..ea48620f76d9c 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c @@ -113446,6 +137172,19 @@ index 46c6eecbd9175..0ceaed1c96562 100644 int min_cdclk; }; +diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c +index 34fa4130d5c4f..745ffa7572e85 100644 +--- a/drivers/gpu/drm/i915/display/intel_cdclk.c ++++ b/drivers/gpu/drm/i915/display/intel_cdclk.c +@@ -1269,7 +1269,7 @@ static const struct intel_cdclk_vals adlp_cdclk_table[] = { + { .refclk = 24000, .cdclk = 192000, .divider = 2, .ratio = 16 }, + { .refclk = 24000, .cdclk = 312000, .divider = 2, .ratio = 26 }, + { .refclk = 24000, .cdclk = 552000, .divider = 2, .ratio = 46 }, +- { .refclk = 24400, .cdclk = 648000, .divider = 2, .ratio = 54 }, ++ { .refclk = 24000, .cdclk = 648000, .divider = 2, .ratio = 54 }, + + { .refclk = 38400, .cdclk = 179200, .divider = 3, .ratio = 14 }, + { .refclk = 38400, .cdclk = 192000, .divider = 2, .ratio = 10 }, diff --git a/drivers/gpu/drm/i915/display/intel_connector.c b/drivers/gpu/drm/i915/display/intel_connector.c index 9bed1ccecea0d..4f49d782eca23 100644 --- a/drivers/gpu/drm/i915/display/intel_connector.c @@ -113901,7 +137640,7 @@ index b3c8e1c450efb..0e04d4dd1c132 100644 continue; } diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c -index 5cf152be44877..f87e4d510ea5e 100644 +index 5cf152be44877..64a15b636e8d4 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -29,6 +29,7 @@ @@ -114046,7 +137785,93 @@ index 5cf152be44877..f87e4d510ea5e 100644 /* Read the eDP DSC DPCD registers */ if (DISPLAY_VER(dev_priv) >= 10) -@@ -4240,12 +4228,7 @@ intel_dp_detect(struct drm_connector *connector, +@@ -3257,61 +3245,6 @@ static void intel_dp_phy_pattern_update(struct intel_dp *intel_dp, + } + } + +-static void +-intel_dp_autotest_phy_ddi_disable(struct intel_dp *intel_dp, +- const struct intel_crtc_state *crtc_state) +-{ +- struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); +- struct drm_device *dev = dig_port->base.base.dev; +- struct drm_i915_private *dev_priv = to_i915(dev); +- struct intel_crtc *crtc = to_intel_crtc(dig_port->base.base.crtc); +- enum pipe pipe = crtc->pipe; +- u32 trans_ddi_func_ctl_value, trans_conf_value, dp_tp_ctl_value; +- +- trans_ddi_func_ctl_value = intel_de_read(dev_priv, +- TRANS_DDI_FUNC_CTL(pipe)); +- trans_conf_value = intel_de_read(dev_priv, PIPECONF(pipe)); +- dp_tp_ctl_value = intel_de_read(dev_priv, TGL_DP_TP_CTL(pipe)); +- +- trans_ddi_func_ctl_value &= ~(TRANS_DDI_FUNC_ENABLE | +- TGL_TRANS_DDI_PORT_MASK); +- trans_conf_value &= ~PIPECONF_ENABLE; +- dp_tp_ctl_value &= ~DP_TP_CTL_ENABLE; +- +- intel_de_write(dev_priv, PIPECONF(pipe), trans_conf_value); +- intel_de_write(dev_priv, TRANS_DDI_FUNC_CTL(pipe), +- trans_ddi_func_ctl_value); +- intel_de_write(dev_priv, TGL_DP_TP_CTL(pipe), dp_tp_ctl_value); +-} +- +-static void +-intel_dp_autotest_phy_ddi_enable(struct intel_dp *intel_dp, +- const struct intel_crtc_state *crtc_state) +-{ +- struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); +- struct drm_device *dev = dig_port->base.base.dev; +- struct drm_i915_private *dev_priv = to_i915(dev); +- enum port port = dig_port->base.port; +- struct intel_crtc *crtc = to_intel_crtc(dig_port->base.base.crtc); +- enum pipe pipe = crtc->pipe; +- u32 trans_ddi_func_ctl_value, trans_conf_value, dp_tp_ctl_value; +- +- trans_ddi_func_ctl_value = intel_de_read(dev_priv, +- TRANS_DDI_FUNC_CTL(pipe)); +- trans_conf_value = intel_de_read(dev_priv, PIPECONF(pipe)); +- dp_tp_ctl_value = intel_de_read(dev_priv, TGL_DP_TP_CTL(pipe)); +- +- trans_ddi_func_ctl_value |= TRANS_DDI_FUNC_ENABLE | +- TGL_TRANS_DDI_SELECT_PORT(port); +- trans_conf_value |= PIPECONF_ENABLE; +- dp_tp_ctl_value |= DP_TP_CTL_ENABLE; +- +- intel_de_write(dev_priv, PIPECONF(pipe), trans_conf_value); +- intel_de_write(dev_priv, TGL_DP_TP_CTL(pipe), dp_tp_ctl_value); +- intel_de_write(dev_priv, TRANS_DDI_FUNC_CTL(pipe), +- trans_ddi_func_ctl_value); +-} +- + static void intel_dp_process_phy_request(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state) + { +@@ -3329,14 +3262,10 @@ static void intel_dp_process_phy_request(struct intel_dp *intel_dp, + intel_dp_get_adjust_train(intel_dp, crtc_state, DP_PHY_DPRX, + link_status); + +- intel_dp_autotest_phy_ddi_disable(intel_dp, crtc_state); +- + intel_dp_set_signal_levels(intel_dp, crtc_state, DP_PHY_DPRX); + + intel_dp_phy_pattern_update(intel_dp, crtc_state); + +- intel_dp_autotest_phy_ddi_enable(intel_dp, crtc_state); +- + drm_dp_dpcd_write(&intel_dp->aux, DP_TRAINING_LANE0_SET, + intel_dp->train_set, crtc_state->lane_count); + +@@ -3509,6 +3438,8 @@ intel_dp_handle_hdmi_link_status_change(struct intel_dp *intel_dp) + + drm_dp_pcon_hdmi_frl_link_error_count(&intel_dp->aux, &intel_dp->attached_connector->base); + ++ intel_dp->frl.is_trained = false; ++ + /* Restart FRL training or fall back to TMDS mode */ + intel_dp_check_frl_training(intel_dp); + } +@@ -4240,12 +4171,7 @@ intel_dp_detect(struct drm_connector *connector, * supports link training fallback params. */ if (intel_dp->reset_link_params || intel_dp->is_mst) { @@ -114060,7 +137885,7 @@ index 5cf152be44877..f87e4d510ea5e 100644 intel_dp->reset_link_params = false; } -@@ -4617,7 +4600,7 @@ intel_dp_hpd_pulse(struct intel_digital_port *dig_port, bool long_hpd) +@@ -4617,7 +4543,7 @@ intel_dp_hpd_pulse(struct intel_digital_port *dig_port, bool long_hpd) struct intel_dp *intel_dp = &dig_port->dp; if (dig_port->base.type == INTEL_OUTPUT_EDP && @@ -114069,7 +137894,7 @@ index 5cf152be44877..f87e4d510ea5e 100644 /* * vdd off can generate a long/short pulse on eDP which * would require vdd on to handle it, and thus we -@@ -4716,432 +4699,6 @@ intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connect +@@ -4716,432 +4642,6 @@ intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connect drm_connector_attach_vrr_capable_property(connector); } @@ -114502,7 +138327,7 @@ index 5cf152be44877..f87e4d510ea5e 100644 static bool intel_edp_init_connector(struct intel_dp *intel_dp, struct intel_connector *intel_connector) { -@@ -5296,6 +4853,9 @@ intel_dp_init_connector(struct intel_digital_port *dig_port, +@@ -5296,6 +4796,9 @@ intel_dp_init_connector(struct intel_digital_port *dig_port, return false; intel_dp_set_source_rates(intel_dp); @@ -115487,7 +139312,7 @@ index 0000000000000..ffa175b4cf4f4 + +#endif /* __INTEL_DRRS_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c -index c2a2cd1f84dc5..0a88088a11e8c 100644 +index c2a2cd1f84dc5..55dd02a01f1ac 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -121,9 +121,25 @@ struct i2c_adapter_lookup { @@ -115508,9 +139333,9 @@ index c2a2cd1f84dc5..0a88088a11e8c 100644 + return ffs(intel_dsi->ports) - 1; + + if (seq_port) { -+ if (intel_dsi->ports & PORT_B) ++ if (intel_dsi->ports & BIT(PORT_B)) + return PORT_B; -+ else if (intel_dsi->ports & PORT_C) ++ else if (intel_dsi->ports & BIT(PORT_C)) + return PORT_C; + } + @@ -117735,6 +141560,143 @@ index 8a52b7a167746..407b096f53921 100644 }; void intel_init_quirks(struct drm_i915_private *i915) +diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c +index 6cb27599ea030..adb1693b15758 100644 +--- a/drivers/gpu/drm/i915/display/intel_sdvo.c ++++ b/drivers/gpu/drm/i915/display/intel_sdvo.c +@@ -2762,13 +2762,10 @@ intel_sdvo_dvi_init(struct intel_sdvo *intel_sdvo, int device) + if (!intel_sdvo_connector) + return false; + +- if (device == 0) { +- intel_sdvo->controlled_output |= SDVO_OUTPUT_TMDS0; ++ if (device == 0) + intel_sdvo_connector->output_flag = SDVO_OUTPUT_TMDS0; +- } else if (device == 1) { +- intel_sdvo->controlled_output |= SDVO_OUTPUT_TMDS1; ++ else if (device == 1) + intel_sdvo_connector->output_flag = SDVO_OUTPUT_TMDS1; +- } + + intel_connector = &intel_sdvo_connector->base; + connector = &intel_connector->base; +@@ -2823,7 +2820,6 @@ intel_sdvo_tv_init(struct intel_sdvo *intel_sdvo, int type) + encoder->encoder_type = DRM_MODE_ENCODER_TVDAC; + connector->connector_type = DRM_MODE_CONNECTOR_SVIDEO; + +- intel_sdvo->controlled_output |= type; + intel_sdvo_connector->output_flag = type; + + if (intel_sdvo_connector_init(intel_sdvo_connector, intel_sdvo) < 0) { +@@ -2864,13 +2860,10 @@ intel_sdvo_analog_init(struct intel_sdvo *intel_sdvo, int device) + encoder->encoder_type = DRM_MODE_ENCODER_DAC; + connector->connector_type = DRM_MODE_CONNECTOR_VGA; + +- if (device == 0) { +- intel_sdvo->controlled_output |= SDVO_OUTPUT_RGB0; ++ if (device == 0) + intel_sdvo_connector->output_flag = SDVO_OUTPUT_RGB0; +- } else if (device == 1) { +- intel_sdvo->controlled_output |= SDVO_OUTPUT_RGB1; ++ else if (device == 1) + intel_sdvo_connector->output_flag = SDVO_OUTPUT_RGB1; +- } + + if (intel_sdvo_connector_init(intel_sdvo_connector, intel_sdvo) < 0) { + kfree(intel_sdvo_connector); +@@ -2900,13 +2893,10 @@ intel_sdvo_lvds_init(struct intel_sdvo *intel_sdvo, int device) + encoder->encoder_type = DRM_MODE_ENCODER_LVDS; + connector->connector_type = DRM_MODE_CONNECTOR_LVDS; + +- if (device == 0) { +- intel_sdvo->controlled_output |= SDVO_OUTPUT_LVDS0; ++ if (device == 0) + intel_sdvo_connector->output_flag = SDVO_OUTPUT_LVDS0; +- } else if (device == 1) { +- intel_sdvo->controlled_output |= SDVO_OUTPUT_LVDS1; ++ else if (device == 1) + intel_sdvo_connector->output_flag = SDVO_OUTPUT_LVDS1; +- } + + if (intel_sdvo_connector_init(intel_sdvo_connector, intel_sdvo) < 0) { + kfree(intel_sdvo_connector); +@@ -2939,16 +2929,39 @@ err: + return false; + } + ++static u16 intel_sdvo_filter_output_flags(u16 flags) ++{ ++ flags &= SDVO_OUTPUT_MASK; ++ ++ /* SDVO requires XXX1 function may not exist unless it has XXX0 function.*/ ++ if (!(flags & SDVO_OUTPUT_TMDS0)) ++ flags &= ~SDVO_OUTPUT_TMDS1; ++ ++ if (!(flags & SDVO_OUTPUT_RGB0)) ++ flags &= ~SDVO_OUTPUT_RGB1; ++ ++ if (!(flags & SDVO_OUTPUT_LVDS0)) ++ flags &= ~SDVO_OUTPUT_LVDS1; ++ ++ return flags; ++} ++ + static bool + intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, u16 flags) + { +- /* SDVO requires XXX1 function may not exist unless it has XXX0 function.*/ ++ struct drm_i915_private *i915 = to_i915(intel_sdvo->base.base.dev); ++ ++ flags = intel_sdvo_filter_output_flags(flags); ++ ++ intel_sdvo->controlled_output = flags; ++ ++ intel_sdvo_select_ddc_bus(i915, intel_sdvo); + + if (flags & SDVO_OUTPUT_TMDS0) + if (!intel_sdvo_dvi_init(intel_sdvo, 0)) + return false; + +- if ((flags & SDVO_TMDS_MASK) == SDVO_TMDS_MASK) ++ if (flags & SDVO_OUTPUT_TMDS1) + if (!intel_sdvo_dvi_init(intel_sdvo, 1)) + return false; + +@@ -2969,7 +2982,7 @@ intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, u16 flags) + if (!intel_sdvo_analog_init(intel_sdvo, 0)) + return false; + +- if ((flags & SDVO_RGB_MASK) == SDVO_RGB_MASK) ++ if (flags & SDVO_OUTPUT_RGB1) + if (!intel_sdvo_analog_init(intel_sdvo, 1)) + return false; + +@@ -2977,14 +2990,13 @@ intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, u16 flags) + if (!intel_sdvo_lvds_init(intel_sdvo, 0)) + return false; + +- if ((flags & SDVO_LVDS_MASK) == SDVO_LVDS_MASK) ++ if (flags & SDVO_OUTPUT_LVDS1) + if (!intel_sdvo_lvds_init(intel_sdvo, 1)) + return false; + +- if ((flags & SDVO_OUTPUT_MASK) == 0) { ++ if (flags == 0) { + unsigned char bytes[2]; + +- intel_sdvo->controlled_output = 0; + memcpy(bytes, &intel_sdvo->caps.output_flags, 2); + DRM_DEBUG_KMS("%s: Unknown SDVO output type (0x%02x%02x)\n", + SDVO_NAME(intel_sdvo), +@@ -3396,8 +3408,6 @@ bool intel_sdvo_init(struct drm_i915_private *dev_priv, + */ + intel_sdvo->base.cloneable = 0; + +- intel_sdvo_select_ddc_bus(dev_priv, intel_sdvo); +- + /* Set the input timing to the screen. Assume always input 0. */ + if (!intel_sdvo_set_target_input(intel_sdvo)) + goto err_output; diff --git a/drivers/gpu/drm/i915/display/intel_snps_phy.c b/drivers/gpu/drm/i915/display/intel_snps_phy.c index 18b52b64af955..536b319ffe5ba 100644 --- a/drivers/gpu/drm/i915/display/intel_snps_phy.c @@ -117765,6 +141727,19 @@ index 3ffece568ed98..0e885440be242 100644 if (val == 0xffffffff) { drm_dbg_kms(&i915->drm, "Port %s: PHY in TCCOLD, assuming not complete\n", +diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c +index 724e7b04f3b63..b97b4b3b85e07 100644 +--- a/drivers/gpu/drm/i915/display/skl_universal_plane.c ++++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c +@@ -1473,7 +1473,7 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state) + u32 offset; + int ret; + +- if (w > max_width || w < min_width || h > max_height) { ++ if (w > max_width || w < min_width || h > max_height || h < 1) { + drm_dbg_kms(&dev_priv->drm, + "requested Y/RGB source size %dx%d outside limits (min: %dx1 max: %dx%d)\n", + w, h, min_width, max_width, max_height); diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index 0ee4ff341e25d..b27738df447d0 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -117829,6 +141804,26 @@ index 166bb46408a9b..60f6a731f1bf6 100644 mutex_unlock(&ctx->mutex); /* +diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +index afa34111de02e..af74c9c37c9cc 100644 +--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c ++++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +@@ -34,13 +34,13 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme + goto err; + } + +- ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL); ++ ret = sg_alloc_table(st, obj->mm.pages->orig_nents, GFP_KERNEL); + if (ret) + goto err_free; + + src = obj->mm.pages->sgl; + dst = st->sgl; +- for (i = 0; i < obj->mm.pages->nents; i++) { ++ for (i = 0; i < obj->mm.pages->orig_nents; i++) { + sg_set_page(dst, sg_page(src), src->length, 0); + dst = sg_next(dst); + src = sg_next(src); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 1aa249908b645..0d480867fc0c2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -117900,6 +141895,73 @@ index 1aa249908b645..0d480867fc0c2 100644 if (err) return err; } +diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c +index e5ae9c06510cc..3c8de65bfb393 100644 +--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c ++++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c +@@ -143,24 +143,10 @@ static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { + .put_pages = i915_gem_object_put_pages_internal, + }; + +-/** +- * i915_gem_object_create_internal: create an object with volatile pages +- * @i915: the i915 device +- * @size: the size in bytes of backing storage to allocate for the object +- * +- * Creates a new object that wraps some internal memory for private use. +- * This object is not backed by swappable storage, and as such its contents +- * are volatile and only valid whilst pinned. If the object is reaped by the +- * shrinker, its pages and data will be discarded. Equally, it is not a full +- * GEM object and so not valid for access from userspace. This makes it useful +- * for hardware interfaces like ringbuffers (which are pinned from the time +- * the request is written to the time the hardware stops accessing it), but +- * not for contexts (which need to be preserved when not active for later +- * reuse). Note that it is not cleared upon allocation. +- */ + struct drm_i915_gem_object * +-i915_gem_object_create_internal(struct drm_i915_private *i915, +- phys_addr_t size) ++__i915_gem_object_create_internal(struct drm_i915_private *i915, ++ const struct drm_i915_gem_object_ops *ops, ++ phys_addr_t size) + { + static struct lock_class_key lock_class; + struct drm_i915_gem_object *obj; +@@ -177,7 +163,7 @@ i915_gem_object_create_internal(struct drm_i915_private *i915, + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, size); +- i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class, 0); ++ i915_gem_object_init(obj, ops, &lock_class, 0); + obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; + + /* +@@ -197,3 +183,25 @@ i915_gem_object_create_internal(struct drm_i915_private *i915, + + return obj; + } ++ ++/** ++ * i915_gem_object_create_internal: create an object with volatile pages ++ * @i915: the i915 device ++ * @size: the size in bytes of backing storage to allocate for the object ++ * ++ * Creates a new object that wraps some internal memory for private use. ++ * This object is not backed by swappable storage, and as such its contents ++ * are volatile and only valid whilst pinned. If the object is reaped by the ++ * shrinker, its pages and data will be discarded. Equally, it is not a full ++ * GEM object and so not valid for access from userspace. This makes it useful ++ * for hardware interfaces like ringbuffers (which are pinned from the time ++ * the request is written to the time the hardware stops accessing it), but ++ * not for contexts (which need to be preserved when not active for later ++ * reuse). Note that it is not cleared upon allocation. ++ */ ++struct drm_i915_gem_object * ++i915_gem_object_create_internal(struct drm_i915_private *i915, ++ phys_addr_t size) ++{ ++ return __i915_gem_object_create_internal(i915, &i915_gem_object_internal_ops, size); ++} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 5130e8ed95647..28e07040cf47a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -117987,6 +142049,46 @@ index 8eb1c3a6fc9cd..9053cea3395a6 100644 return pages; } +diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +index 11f072193f3b1..827f2f9dcda6a 100644 +--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c ++++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +@@ -533,7 +533,7 @@ static int shmem_object_init(struct intel_memory_region *mem, + mapping_set_gfp_mask(mapping, mask); + GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); + +- i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class, 0); ++ i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class, flags); + obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; + obj->write_domain = I915_GEM_DOMAIN_CPU; + obj->read_domains = I915_GEM_DOMAIN_CPU; +diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c +index ef4d0f7dc1186..d4897ce0ad0c0 100644 +--- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c ++++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c +@@ -294,10 +294,6 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, + spin_unlock(&obj->vma.lock); + + obj->tiling_and_stride = tiling | stride; +- i915_gem_object_unlock(obj); +- +- /* Force the fence to be reacquired for GTT access */ +- i915_gem_object_release_mmap_gtt(obj); + + /* Try to preallocate memory required to save swizzling on put-pages */ + if (i915_gem_object_needs_bit17_swizzle(obj)) { +@@ -310,6 +306,11 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, + obj->bit_17 = NULL; + } + ++ i915_gem_object_unlock(obj); ++ ++ /* Force the fence to be reacquired for GTT access */ ++ i915_gem_object_release_mmap_gtt(obj); ++ + return 0; + } + diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 6ea13159bffcc..4b823fbfe76a1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -118005,6 +142107,334 @@ index 6ea13159bffcc..4b823fbfe76a1 100644 } ttm_bo_move_to_lru_tail(bo, bo->resource, NULL); +diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c +index 1aee5e6b1b23f..b257666a26fc2 100644 +--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c ++++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c +@@ -244,6 +244,7 @@ err_scratch1: + i915_gem_object_put(vm->scratch[1]); + err_scratch0: + i915_gem_object_put(vm->scratch[0]); ++ vm->scratch[0] = NULL; + return ret; + } + +@@ -262,15 +263,13 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm) + { + struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); + +- __i915_vma_put(ppgtt->vma); +- + gen6_ppgtt_free_pd(ppgtt); + free_scratch(vm); + +- mutex_destroy(&ppgtt->flush); +- mutex_destroy(&ppgtt->pin_mutex); ++ if (ppgtt->base.pd) ++ free_pd(&ppgtt->base.vm, ppgtt->base.pd); + +- free_pd(&ppgtt->base.vm, ppgtt->base.pd); ++ mutex_destroy(&ppgtt->flush); + } + + static int pd_vma_set_pages(struct i915_vma *vma) +@@ -331,37 +330,6 @@ static const struct i915_vma_ops pd_vma_ops = { + .unbind_vma = pd_vma_unbind, + }; + +-static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size) +-{ +- struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt; +- struct i915_vma *vma; +- +- GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); +- GEM_BUG_ON(size > ggtt->vm.total); +- +- vma = i915_vma_alloc(); +- if (!vma) +- return ERR_PTR(-ENOMEM); +- +- i915_active_init(&vma->active, NULL, NULL, 0); +- +- kref_init(&vma->ref); +- mutex_init(&vma->pages_mutex); +- vma->vm = i915_vm_get(&ggtt->vm); +- vma->ops = &pd_vma_ops; +- vma->private = ppgtt; +- +- vma->size = size; +- vma->fence_size = size; +- atomic_set(&vma->flags, I915_VMA_GGTT); +- vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */ +- +- INIT_LIST_HEAD(&vma->obj_link); +- INIT_LIST_HEAD(&vma->closed_link); +- +- return vma; +-} +- + int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww) + { + struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); +@@ -378,24 +346,85 @@ int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww) + if (atomic_add_unless(&ppgtt->pin_count, 1, 0)) + return 0; + +- if (mutex_lock_interruptible(&ppgtt->pin_mutex)) +- return -EINTR; ++ /* grab the ppgtt resv to pin the object */ ++ err = i915_vm_lock_objects(&ppgtt->base.vm, ww); ++ if (err) ++ return err; + + /* + * PPGTT PDEs reside in the GGTT and consists of 512 entries. The + * allocator works in address space sizes, so it's multiplied by page + * size. We allocate at the top of the GTT to avoid fragmentation. + */ +- err = 0; +- if (!atomic_read(&ppgtt->pin_count)) ++ if (!atomic_read(&ppgtt->pin_count)) { + err = i915_ggtt_pin(ppgtt->vma, ww, GEN6_PD_ALIGN, PIN_HIGH); ++ ++ GEM_BUG_ON(ppgtt->vma->fence); ++ clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(ppgtt->vma)); ++ } + if (!err) + atomic_inc(&ppgtt->pin_count); +- mutex_unlock(&ppgtt->pin_mutex); + + return err; + } + ++static int pd_dummy_obj_get_pages(struct drm_i915_gem_object *obj) ++{ ++ obj->mm.pages = ZERO_SIZE_PTR; ++ return 0; ++} ++ ++static void pd_dummy_obj_put_pages(struct drm_i915_gem_object *obj, ++ struct sg_table *pages) ++{ ++} ++ ++static const struct drm_i915_gem_object_ops pd_dummy_obj_ops = { ++ .name = "pd_dummy_obj", ++ .get_pages = pd_dummy_obj_get_pages, ++ .put_pages = pd_dummy_obj_put_pages, ++}; ++ ++static struct i915_page_directory * ++gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt) ++{ ++ struct i915_ggtt * const ggtt = ppgtt->base.vm.gt->ggtt; ++ struct i915_page_directory *pd; ++ int err; ++ ++ pd = __alloc_pd(I915_PDES); ++ if (unlikely(!pd)) ++ return ERR_PTR(-ENOMEM); ++ ++ pd->pt.base = __i915_gem_object_create_internal(ppgtt->base.vm.gt->i915, ++ &pd_dummy_obj_ops, ++ I915_PDES * SZ_4K); ++ if (IS_ERR(pd->pt.base)) { ++ err = PTR_ERR(pd->pt.base); ++ pd->pt.base = NULL; ++ goto err_pd; ++ } ++ ++ pd->pt.base->base.resv = i915_vm_resv_get(&ppgtt->base.vm); ++ pd->pt.base->shares_resv_from = &ppgtt->base.vm; ++ ++ ppgtt->vma = i915_vma_instance(pd->pt.base, &ggtt->vm, NULL); ++ if (IS_ERR(ppgtt->vma)) { ++ err = PTR_ERR(ppgtt->vma); ++ ppgtt->vma = NULL; ++ goto err_pd; ++ } ++ ++ /* The dummy object we create is special, override ops.. */ ++ ppgtt->vma->ops = &pd_vma_ops; ++ ppgtt->vma->private = ppgtt; ++ return pd; ++ ++err_pd: ++ free_pd(&ppgtt->base.vm, pd); ++ return ERR_PTR(err); ++} ++ + void gen6_ppgtt_unpin(struct i915_ppgtt *base) + { + struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); +@@ -427,7 +456,6 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) + return ERR_PTR(-ENOMEM); + + mutex_init(&ppgtt->flush); +- mutex_init(&ppgtt->pin_mutex); + + ppgtt_init(&ppgtt->base, gt); + ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t)); +@@ -442,30 +470,19 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) + ppgtt->base.vm.alloc_pt_dma = alloc_pt_dma; + ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode; + +- ppgtt->base.pd = __alloc_pd(I915_PDES); +- if (!ppgtt->base.pd) { +- err = -ENOMEM; +- goto err_free; +- } +- + err = gen6_ppgtt_init_scratch(ppgtt); + if (err) +- goto err_pd; ++ goto err_put; + +- ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE); +- if (IS_ERR(ppgtt->vma)) { +- err = PTR_ERR(ppgtt->vma); +- goto err_scratch; ++ ppgtt->base.pd = gen6_alloc_top_pd(ppgtt); ++ if (IS_ERR(ppgtt->base.pd)) { ++ err = PTR_ERR(ppgtt->base.pd); ++ goto err_put; + } + + return &ppgtt->base; + +-err_scratch: +- free_scratch(&ppgtt->base.vm); +-err_pd: +- free_pd(&ppgtt->base.vm, ppgtt->base.pd); +-err_free: +- mutex_destroy(&ppgtt->pin_mutex); +- kfree(ppgtt); ++err_put: ++ i915_vm_put(&ppgtt->base.vm); + return ERR_PTR(err); + } +diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h +index 6a61a5c3a85a6..9b498ca76ac6b 100644 +--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h ++++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h +@@ -19,7 +19,6 @@ struct gen6_ppgtt { + u32 pp_dir; + + atomic_t pin_count; +- struct mutex pin_mutex; + + bool scan_for_unused_pt; + }; +diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +index 6e0e52eeb87a6..0cf604c5a6c24 100644 +--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c ++++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +@@ -196,7 +196,10 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) + if (intel_vgpu_active(vm->i915)) + gen8_ppgtt_notify_vgt(ppgtt, false); + +- __gen8_ppgtt_cleanup(vm, ppgtt->pd, gen8_pd_top_count(vm), vm->top); ++ if (ppgtt->pd) ++ __gen8_ppgtt_cleanup(vm, ppgtt->pd, ++ gen8_pd_top_count(vm), vm->top); ++ + free_scratch(vm); + } + +@@ -656,8 +659,10 @@ static int gen8_init_scratch(struct i915_address_space *vm) + struct drm_i915_gem_object *obj; + + obj = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K); +- if (IS_ERR(obj)) ++ if (IS_ERR(obj)) { ++ ret = PTR_ERR(obj); + goto free_scratch; ++ } + + ret = map_pt_dma(vm, obj); + if (ret) { +@@ -676,7 +681,8 @@ static int gen8_init_scratch(struct i915_address_space *vm) + free_scratch: + while (i--) + i915_gem_object_put(vm->scratch[i]); +- return -ENOMEM; ++ vm->scratch[0] = NULL; ++ return ret; + } + + static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) +@@ -753,6 +759,7 @@ err_pd: + */ + struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt) + { ++ struct i915_page_directory *pd; + struct i915_ppgtt *ppgtt; + int err; + +@@ -779,44 +786,39 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt) + else + ppgtt->vm.alloc_pt_dma = alloc_pt_dma; + ++ ppgtt->vm.pte_encode = gen8_pte_encode; ++ ++ ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND; ++ ppgtt->vm.insert_entries = gen8_ppgtt_insert; ++ ppgtt->vm.insert_page = gen8_ppgtt_insert_entry; ++ ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc; ++ ppgtt->vm.clear_range = gen8_ppgtt_clear; ++ ppgtt->vm.foreach = gen8_ppgtt_foreach; ++ ppgtt->vm.cleanup = gen8_ppgtt_cleanup; ++ + err = gen8_init_scratch(&ppgtt->vm); + if (err) +- goto err_free; ++ goto err_put; + +- ppgtt->pd = gen8_alloc_top_pd(&ppgtt->vm); +- if (IS_ERR(ppgtt->pd)) { +- err = PTR_ERR(ppgtt->pd); +- goto err_free_scratch; ++ pd = gen8_alloc_top_pd(&ppgtt->vm); ++ if (IS_ERR(pd)) { ++ err = PTR_ERR(pd); ++ goto err_put; + } ++ ppgtt->pd = pd; + + if (!i915_vm_is_4lvl(&ppgtt->vm)) { + err = gen8_preallocate_top_level_pdp(ppgtt); + if (err) +- goto err_free_pd; ++ goto err_put; + } + +- ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND; +- ppgtt->vm.insert_entries = gen8_ppgtt_insert; +- ppgtt->vm.insert_page = gen8_ppgtt_insert_entry; +- ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc; +- ppgtt->vm.clear_range = gen8_ppgtt_clear; +- ppgtt->vm.foreach = gen8_ppgtt_foreach; +- +- ppgtt->vm.pte_encode = gen8_pte_encode; +- + if (intel_vgpu_active(gt->i915)) + gen8_ppgtt_notify_vgt(ppgtt, true); + +- ppgtt->vm.cleanup = gen8_ppgtt_cleanup; +- + return ppgtt; + +-err_free_pd: +- __gen8_ppgtt_cleanup(&ppgtt->vm, ppgtt->pd, +- gen8_pd_top_count(&ppgtt->vm), ppgtt->vm.top); +-err_free_scratch: +- free_scratch(&ppgtt->vm); +-err_free: +- kfree(ppgtt); ++err_put: ++ i915_vm_put(&ppgtt->vm); + return ERR_PTR(err); + } diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index e54351a170e2c..a63631ea0ec47 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -118199,7 +142629,7 @@ index de5f9c86b9a44..773ff51218335 100644 static void enable_error_interrupt(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c -index 62d40c9866427..ed8ad3b263959 100644 +index 62d40c9866427..952e7177409ba 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -29,6 +29,8 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) @@ -118211,7 +142641,23 @@ index 62d40c9866427..ed8ad3b263959 100644 INIT_LIST_HEAD(>->closed_vma); spin_lock_init(>->closed_lock); -@@ -895,3 +897,119 @@ void intel_gt_info_print(const struct intel_gt_info *info, +@@ -648,8 +650,13 @@ int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) + return -EINTR; + } + +- return timeout ? timeout : intel_uc_wait_for_idle(>->uc, +- remaining_timeout); ++ if (timeout) ++ return timeout; ++ ++ if (remaining_timeout < 0) ++ remaining_timeout = 0; ++ ++ return intel_uc_wait_for_idle(>->uc, remaining_timeout); + } + + int intel_gt_init(struct intel_gt *gt) +@@ -895,3 +902,123 @@ void intel_gt_info_print(const struct intel_gt_info *info, intel_sseu_dump(&info->sseu, p); } @@ -118300,6 +142746,10 @@ index 62d40c9866427..ed8ad3b263959 100644 + if (!i915_mmio_reg_offset(rb.reg)) + continue; + ++ if (GRAPHICS_VER(i915) == 12 && (engine->class == VIDEO_DECODE_CLASS || ++ engine->class == VIDEO_ENHANCEMENT_CLASS)) ++ rb.bit = _MASKED_BIT_ENABLE(rb.bit); ++ + intel_uncore_write_fw(uncore, rb.reg, rb.bit); + } + @@ -118342,6 +142792,19 @@ index 74e771871a9bd..c0169d6017c2d 100644 +void intel_gt_invalidate_tlbs(struct intel_gt *gt); + #endif /* __INTEL_GT_H__ */ +diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c +index edb881d756309..1dfd01668c79c 100644 +--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c ++++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c +@@ -199,7 +199,7 @@ out_active: spin_lock(&timelines->lock); + if (remaining_timeout) + *remaining_timeout = timeout; + +- return active_count ? timeout : 0; ++ return active_count ? timeout ?: -ETIME : 0; + } + + static void retire_work_handler(struct work_struct *work) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index a81e21bf1bd1a..9fbcbcc6c35db 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -118355,11 +142818,118 @@ index a81e21bf1bd1a..9fbcbcc6c35db 100644 struct intel_gt_timelines { spinlock_t lock; /* protects active_list */ struct list_head active_list; +diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c +index e137dd32b5b8b..2d3a979736cc1 100644 +--- a/drivers/gpu/drm/i915/gt/intel_gtt.c ++++ b/drivers/gpu/drm/i915/gt/intel_gtt.c +@@ -341,6 +341,9 @@ void free_scratch(struct i915_address_space *vm) + { + int i; + ++ if (!vm->scratch[0]) ++ return; ++ + for (i = 0; i <= vm->top; i++) + i915_gem_object_put(vm->scratch[i]); + } +diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c +index 1dac21aa7e5c3..5b59a6effc207 100644 +--- a/drivers/gpu/drm/i915/gt/intel_migrate.c ++++ b/drivers/gpu/drm/i915/gt/intel_migrate.c +@@ -13,7 +13,6 @@ + + struct insert_pte_data { + u64 offset; +- bool is_lmem; + }; + + #define CHUNK_SZ SZ_8M /* ~1ms at 8GiB/s preemption delay */ +@@ -40,7 +39,7 @@ static void insert_pte(struct i915_address_space *vm, + struct insert_pte_data *d = data; + + vm->insert_page(vm, px_dma(pt), d->offset, I915_CACHE_NONE, +- d->is_lmem ? PTE_LM : 0); ++ i915_gem_object_is_lmem(pt->base) ? PTE_LM : 0); + d->offset += PAGE_SIZE; + } + +@@ -134,8 +133,7 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt) + goto err_vm; + + /* Now allow the GPU to rewrite the PTE via its own ppGTT */ +- d.is_lmem = i915_gem_object_is_lmem(vm->vm.scratch[0]); +- vm->vm.foreach(&vm->vm, base, base + sz, insert_pte, &d); ++ vm->vm.foreach(&vm->vm, base, d.offset - base, insert_pte, &d); + } + + return &vm->vm; +@@ -281,10 +279,10 @@ static int emit_pte(struct i915_request *rq, + GEM_BUG_ON(GRAPHICS_VER(rq->engine->i915) < 8); + + /* Compute the page directory offset for the target address range */ +- offset += (u64)rq->engine->instance << 32; + offset >>= 12; + offset *= sizeof(u64); + offset += 2 * CHUNK_SZ; ++ offset += (u64)rq->engine->instance << 32; + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c -index 91200c43951f7..18b0e57c58c1e 100644 +index 91200c43951f7..9dc244b70ce4b 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c -@@ -293,9 +293,9 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask) +@@ -271,6 +271,7 @@ out: + static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask) + { + struct intel_uncore *uncore = gt->uncore; ++ int loops = 2; + int err; + + /* +@@ -278,24 +279,45 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask) + * for fifo space for the write or forcewake the chip for + * the read + */ +- intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask); ++ do { ++ intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask); + +- /* Wait for the device to ack the reset requests */ +- err = __intel_wait_for_register_fw(uncore, +- GEN6_GDRST, hw_domain_mask, 0, +- 500, 0, +- NULL); ++ /* ++ * Wait for the device to ack the reset requests. ++ * ++ * On some platforms, e.g. Jasperlake, we see that the ++ * engine register state is not cleared until shortly after ++ * GDRST reports completion, causing a failure as we try ++ * to immediately resume while the internal state is still ++ * in flux. If we immediately repeat the reset, the second ++ * reset appears to serialise with the first, and since ++ * it is a no-op, the registers should retain their reset ++ * value. However, there is still a concern that upon ++ * leaving the second reset, the internal engine state ++ * is still in flux and not ready for resuming. ++ */ ++ err = __intel_wait_for_register_fw(uncore, GEN6_GDRST, ++ hw_domain_mask, 0, ++ 2000, 0, ++ NULL); ++ } while (err == 0 && --loops); + if (err) + GT_TRACE(gt, + "Wait for 0x%08x engines reset failed\n", + hw_domain_mask); + ++ /* ++ * As we have observed that the engine state is still volatile ++ * after GDRST is acked, impose a small delay to let everything settle. ++ */ ++ udelay(50); ++ return err; } @@ -118372,7 +142942,7 @@ index 91200c43951f7..18b0e57c58c1e 100644 { static const u32 hw_engine_mask[] = { [RCS0] = GEN6_GRDOM_RENDER, -@@ -322,6 +322,20 @@ static int gen6_reset_engines(struct intel_gt *gt, +@@ -322,6 +344,20 @@ static int gen6_reset_engines(struct intel_gt *gt, return gen6_hw_domain_reset(gt, hw_mask); } @@ -118393,7 +142963,7 @@ index 91200c43951f7..18b0e57c58c1e 100644 static struct intel_engine_cs *find_sfc_paired_vecs_engine(struct intel_engine_cs *engine) { int vecs_id; -@@ -488,9 +502,9 @@ static void gen11_unlock_sfc(struct intel_engine_cs *engine) +@@ -488,9 +524,9 @@ static void gen11_unlock_sfc(struct intel_engine_cs *engine) rmw_clear_fw(uncore, sfc_lock.lock_reg, sfc_lock.lock_bit); } @@ -118406,7 +142976,7 @@ index 91200c43951f7..18b0e57c58c1e 100644 { static const u32 hw_engine_mask[] = { [RCS0] = GEN11_GRDOM_RENDER, -@@ -601,8 +615,11 @@ static int gen8_reset_engines(struct intel_gt *gt, +@@ -601,8 +637,11 @@ static int gen8_reset_engines(struct intel_gt *gt, struct intel_engine_cs *engine; const bool reset_non_ready = retry >= 1; intel_engine_mask_t tmp; @@ -118418,7 +142988,7 @@ index 91200c43951f7..18b0e57c58c1e 100644 for_each_engine_masked(engine, gt, engine_mask, tmp) { ret = gen8_engine_reset_prepare(engine); if (ret && !reset_non_ready) -@@ -623,15 +640,26 @@ static int gen8_reset_engines(struct intel_gt *gt, +@@ -623,15 +662,26 @@ static int gen8_reset_engines(struct intel_gt *gt, */ } @@ -118471,7 +143041,7 @@ index 2958e2fae3800..02e18e70c78ea 100644 static void reset_prepare(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c -index aae609d7d85dd..6b5ab19a2ada9 100644 +index aae609d7d85dd..de93a1e988f29 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -621,13 +621,6 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -118488,6 +143058,52 @@ index aae609d7d85dd..6b5ab19a2ada9 100644 } static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine, +@@ -1056,6 +1049,22 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) + GAMT_CHKN_BIT_REG, + GAMT_CHKN_DISABLE_L3_COH_PIPE); + ++ /* ++ * Wa_1408615072:icl,ehl (vsunit) ++ * Wa_1407596294:icl,ehl (hsunit) ++ */ ++ wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, ++ VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS); ++ ++ /* Wa_1407352427:icl,ehl */ ++ wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, ++ PSDUNIT_CLKGATE_DIS); ++ ++ /* Wa_1406680159:icl,ehl */ ++ wa_write_or(wal, ++ SUBSLICE_UNIT_LEVEL_CLKGATE, ++ GWUNIT_CLKGATE_DIS); ++ + /* Wa_1607087056:icl,ehl,jsl */ + if (IS_ICELAKE(i915) || + IS_JSL_EHL_GT_STEP(i915, STEP_A0, STEP_B0)) +@@ -1745,22 +1754,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) + wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS, + GEN11_ENABLE_32_PLANE_MODE); + +- /* +- * Wa_1408615072:icl,ehl (vsunit) +- * Wa_1407596294:icl,ehl (hsunit) +- */ +- wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, +- VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS); +- +- /* Wa_1407352427:icl,ehl */ +- wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, +- PSDUNIT_CLKGATE_DIS); +- +- /* Wa_1406680159:icl,ehl */ +- wa_write_or(wal, +- SUBSLICE_UNIT_LEVEL_CLKGATE, +- GWUNIT_CLKGATE_DIS); +- + /* + * Wa_1408767742:icl[a2..forever],ehl[all] + * Wa_1605460711:icl[a0..c0] diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 2c1af030310c0..8b89215afe46b 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c @@ -118553,7 +143169,7 @@ index 65a3e7fdb2b2c..95ff630157b9c 100644 }; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c -index 87d8dc8f51b96..6e09a1cca37b4 100644 +index 87d8dc8f51b96..97b5ba2fc834f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -148,11 +148,12 @@ static inline void clr_context_registered(struct intel_context *ce) @@ -118899,6 +143515,34 @@ index 87d8dc8f51b96..6e09a1cca37b4 100644 capture_error_state(guc, ce); guc_context_replay(ce); } +@@ -2803,6 +2845,8 @@ void intel_guc_find_hung_context(struct intel_engine_cs *engine) + return; + + xa_for_each(&guc->context_lookup, index, ce) { ++ bool found; ++ + if (!intel_context_is_pinned(ce)) + continue; + +@@ -2814,10 +2858,18 @@ void intel_guc_find_hung_context(struct intel_engine_cs *engine) + continue; + } + ++ found = false; ++ spin_lock(&ce->guc_state.lock); + list_for_each_entry(rq, &ce->guc_active.requests, sched.link) { + if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE) + continue; + ++ found = true; ++ break; ++ } ++ spin_unlock(&ce->guc_state.lock); ++ ++ if (found) { + intel_engine_set_hung_context(engine, ce); + + /* Can only cope with one hang at a time... */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index fc5387b410a2b..9ee22ac925409 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -119006,6 +143650,74 @@ index c4118b8082682..11971ee929f89 100644 return; } +diff --git a/drivers/gpu/drm/i915/gvt/debugfs.c b/drivers/gpu/drm/i915/gvt/debugfs.c +index 9f1c209d92511..e08ed0e9f1653 100644 +--- a/drivers/gpu/drm/i915/gvt/debugfs.c ++++ b/drivers/gpu/drm/i915/gvt/debugfs.c +@@ -175,8 +175,13 @@ void intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu) + */ + void intel_gvt_debugfs_remove_vgpu(struct intel_vgpu *vgpu) + { +- debugfs_remove_recursive(vgpu->debugfs); +- vgpu->debugfs = NULL; ++ struct intel_gvt *gvt = vgpu->gvt; ++ struct drm_minor *minor = gvt->gt->i915->drm.primary; ++ ++ if (minor->debugfs_root && gvt->debugfs_root) { ++ debugfs_remove_recursive(vgpu->debugfs); ++ vgpu->debugfs = NULL; ++ } + } + + /** +@@ -199,6 +204,10 @@ void intel_gvt_debugfs_init(struct intel_gvt *gvt) + */ + void intel_gvt_debugfs_clean(struct intel_gvt *gvt) + { +- debugfs_remove_recursive(gvt->debugfs_root); +- gvt->debugfs_root = NULL; ++ struct drm_minor *minor = gvt->gt->i915->drm.primary; ++ ++ if (minor->debugfs_root) { ++ debugfs_remove_recursive(gvt->debugfs_root); ++ gvt->debugfs_root = NULL; ++ } + } +diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c +index e5c2fdfc20e33..7ea7abef6143f 100644 +--- a/drivers/gpu/drm/i915/gvt/gtt.c ++++ b/drivers/gpu/drm/i915/gvt/gtt.c +@@ -1195,10 +1195,8 @@ static int split_2MB_gtt_entry(struct intel_vgpu *vgpu, + for_each_shadow_entry(sub_spt, &sub_se, sub_index) { + ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, + start_gfn + sub_index, PAGE_SIZE, &dma_addr); +- if (ret) { +- ppgtt_invalidate_spt(spt); +- return ret; +- } ++ if (ret) ++ goto err; + sub_se.val64 = se->val64; + + /* Copy the PAT field from PDE. */ +@@ -1217,6 +1215,17 @@ static int split_2MB_gtt_entry(struct intel_vgpu *vgpu, + ops->set_pfn(se, sub_spt->shadow_page.mfn); + ppgtt_set_shadow_entry(spt, se, index); + return 0; ++err: ++ /* Cancel the existing addess mappings of DMA addr. */ ++ for_each_present_shadow_entry(sub_spt, &sub_se, sub_index) { ++ gvt_vdbg_mm("invalidate 4K entry\n"); ++ ppgtt_invalidate_pte(sub_spt, &sub_se); ++ } ++ /* Release the new allocated spt. */ ++ trace_spt_change(sub_spt->vgpu->id, "release", sub_spt, ++ sub_spt->guest_page.gfn, sub_spt->shadow_page.type); ++ ppgtt_free_spt(sub_spt); ++ return ret; + } + + static int split_64KB_gtt_entry(struct intel_vgpu *vgpu, diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index cde0a477fb497..7ed7dba42c834 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c @@ -119019,11 +143731,66 @@ index cde0a477fb497..7ed7dba42c834 100644 return -EINVAL; } +diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c +index 1bb1be5c48c84..0291d42cfba8d 100644 +--- a/drivers/gpu/drm/i915/gvt/scheduler.c ++++ b/drivers/gpu/drm/i915/gvt/scheduler.c +@@ -694,6 +694,7 @@ intel_vgpu_shadow_mm_pin(struct intel_vgpu_workload *workload) + + if (workload->shadow_mm->type != INTEL_GVT_MM_PPGTT || + !workload->shadow_mm->ppgtt_mm.shadowed) { ++ intel_vgpu_unpin_mm(workload->shadow_mm); + gvt_vgpu_err("workload shadow ppgtt isn't ready\n"); + return -EINVAL; + } +diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c +index 59fb4c710c8ca..41094d51fc6fd 100644 +--- a/drivers/gpu/drm/i915/i915_drv.c ++++ b/drivers/gpu/drm/i915/i915_drv.c +@@ -986,12 +986,9 @@ static int i915_driver_open(struct drm_device *dev, struct drm_file *file) + */ + static void i915_driver_lastclose(struct drm_device *dev) + { +- struct drm_i915_private *i915 = to_i915(dev); +- + intel_fbdev_restore_mode(dev); + +- if (HAS_DISPLAY(i915)) +- vga_switcheroo_process_delayed_switch(); ++ vga_switcheroo_process_delayed_switch(); + } + + static void i915_driver_postclose(struct drm_device *dev, struct drm_file *file) +diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h +index 005b1cec70075..236cfee1cbf0a 100644 +--- a/drivers/gpu/drm/i915/i915_drv.h ++++ b/drivers/gpu/drm/i915/i915_drv.h +@@ -1905,6 +1905,10 @@ int i915_gem_evict_vm(struct i915_address_space *vm); + struct drm_i915_gem_object * + i915_gem_object_create_internal(struct drm_i915_private *dev_priv, + phys_addr_t size); ++struct drm_i915_gem_object * ++__i915_gem_object_create_internal(struct drm_i915_private *dev_priv, ++ const struct drm_i915_gem_object_ops *ops, ++ phys_addr_t size); + + /* i915_gem_tiling.c */ + static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c -index 1bbd09ad52873..1ad7259fb1f0c 100644 +index 1bbd09ad52873..0bba1c5baca0b 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c -@@ -865,7 +865,7 @@ static const struct intel_device_info jsl_info = { +@@ -401,7 +401,8 @@ static const struct intel_device_info ilk_m_info = { + .has_coherent_ggtt = true, \ + .has_llc = 1, \ + .has_rc6 = 1, \ +- .has_rc6p = 1, \ ++ /* snb does support rc6p, but enabling it causes various issues */ \ ++ .has_rc6p = 0, \ + .has_rps = true, \ + .dma_mask_size = 40, \ + .ppgtt_type = INTEL_PPGTT_ALIASING, \ +@@ -865,7 +866,7 @@ static const struct intel_device_info jsl_info = { }, \ TGL_CURSOR_OFFSETS, \ .has_global_mocs = 1, \ @@ -119121,6 +143888,30 @@ index 9023d4ecf3b37..3c70aa5229e5a 100644 #define SKL_DMC_DC3_DC5_COUNT _MMIO(0x80030) #define SKL_DMC_DC5_DC6_COUNT _MMIO(0x8002C) #define BXT_DMC_DC3_DC5_COUNT _MMIO(0x80038) +diff --git a/drivers/gpu/drm/i915/i915_switcheroo.c b/drivers/gpu/drm/i915/i915_switcheroo.c +index de0e224b56ce3..f1ce9f591efaf 100644 +--- a/drivers/gpu/drm/i915/i915_switcheroo.c ++++ b/drivers/gpu/drm/i915/i915_switcheroo.c +@@ -18,6 +18,10 @@ static void i915_switcheroo_set_state(struct pci_dev *pdev, + dev_err(&pdev->dev, "DRM not initialized, aborting switch.\n"); + return; + } ++ if (!HAS_DISPLAY(i915)) { ++ dev_err(&pdev->dev, "Device state not initialized, aborting switch.\n"); ++ return; ++ } + + if (state == VGA_SWITCHEROO_ON) { + drm_info(&i915->drm, "switched on\n"); +@@ -43,7 +47,7 @@ static bool i915_switcheroo_can_switch(struct pci_dev *pdev) + * locking inversion with the driver load path. And the access here is + * completely racy anyway. So don't bother with locking for now. + */ +- return i915 && atomic_read(&i915->drm.open_count) == 0; ++ return i915 && HAS_DISPLAY(i915) && atomic_read(&i915->drm.open_count) == 0; + } + + static const struct vga_switcheroo_client_ops i915_switcheroo_ops = { diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index cdf0e9c6fd73e..313c0000a814e 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c @@ -119684,6 +144475,20 @@ index 3c0b0a8b5250d..4c63209dcf530 100644 void intel_uncore_forcewake_flush(struct intel_uncore *uncore, enum forcewake_domains fw_domains); +diff --git a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c +index 4b328346b48a2..83ffd175ca894 100644 +--- a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c ++++ b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c +@@ -16,8 +16,7 @@ + + int intel_selftest_modify_policy(struct intel_engine_cs *engine, + struct intel_selftest_saved_policy *saved, +- u32 modify_type) +- ++ enum selftest_scheduler_modify modify_type) + { + int err; + diff --git a/drivers/gpu/drm/imx/dcss/dcss-dev.c b/drivers/gpu/drm/imx/dcss/dcss-dev.c index c849533ca83e3..3f5750cc2673e 100644 --- a/drivers/gpu/drm/imx/dcss/dcss-dev.c @@ -119780,6 +144585,22 @@ index e5078d03020d9..fb0e951248f68 100644 } else if (!channel->panel) { /* fallback to display-timings node */ ret = of_get_drm_display_mode(child, +diff --git a/drivers/gpu/drm/imx/imx-tve.c b/drivers/gpu/drm/imx/imx-tve.c +index bc8c3f802a152..fbfb7adead0b3 100644 +--- a/drivers/gpu/drm/imx/imx-tve.c ++++ b/drivers/gpu/drm/imx/imx-tve.c +@@ -217,8 +217,9 @@ static int imx_tve_connector_get_modes(struct drm_connector *connector) + return ret; + } + +-static int imx_tve_connector_mode_valid(struct drm_connector *connector, +- struct drm_display_mode *mode) ++static enum drm_mode_status ++imx_tve_connector_mode_valid(struct drm_connector *connector, ++ struct drm_display_mode *mode) + { + struct imx_tve *tve = con_to_tve(connector); + unsigned long rate; diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c index 9c8829f945b23..f7863d6dea804 100644 --- a/drivers/gpu/drm/imx/ipuv3-crtc.c @@ -119793,6 +144614,53 @@ index 9c8829f945b23..f7863d6dea804 100644 disable_partial = true; } +diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c +index 846c1aae69c82..924a66f539511 100644 +--- a/drivers/gpu/drm/imx/ipuv3-plane.c ++++ b/drivers/gpu/drm/imx/ipuv3-plane.c +@@ -619,6 +619,11 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, + break; + } + ++ if (ipu_plane->dp_flow == IPU_DP_FLOW_SYNC_BG) ++ width = ipu_src_rect_width(new_state); ++ else ++ width = drm_rect_width(&new_state->src) >> 16; ++ + eba = drm_plane_state_to_eba(new_state, 0); + + /* +@@ -627,8 +632,7 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, + */ + if (ipu_state->use_pre) { + axi_id = ipu_chan_assign_axi_id(ipu_plane->dma); +- ipu_prg_channel_configure(ipu_plane->ipu_ch, axi_id, +- ipu_src_rect_width(new_state), ++ ipu_prg_channel_configure(ipu_plane->ipu_ch, axi_id, width, + drm_rect_height(&new_state->src) >> 16, + fb->pitches[0], fb->format->format, + fb->modifier, &eba); +@@ -683,9 +687,8 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, + break; + } + +- ipu_dmfc_config_wait4eot(ipu_plane->dmfc, ALIGN(drm_rect_width(dst), 8)); ++ ipu_dmfc_config_wait4eot(ipu_plane->dmfc, width); + +- width = ipu_src_rect_width(new_state); + height = drm_rect_height(&new_state->src) >> 16; + info = drm_format_info(fb->format->format); + ipu_calculate_bursts(width, info->cpp[0], fb->pitches[0], +@@ -749,8 +752,7 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, + ipu_cpmem_set_burstsize(ipu_plane->ipu_ch, 16); + + ipu_cpmem_zero(ipu_plane->alpha_ch); +- ipu_cpmem_set_resolution(ipu_plane->alpha_ch, +- ipu_src_rect_width(new_state), ++ ipu_cpmem_set_resolution(ipu_plane->alpha_ch, width, + drm_rect_height(&new_state->src) >> 16); + ipu_cpmem_set_format_passthrough(ipu_plane->alpha_ch, 8); + ipu_cpmem_set_high_priority(ipu_plane->alpha_ch); diff --git a/drivers/gpu/drm/imx/parallel-display.c b/drivers/gpu/drm/imx/parallel-display.c index a8aba0141ce71..63ba2ad846791 100644 --- a/drivers/gpu/drm/imx/parallel-display.c @@ -119824,6 +144692,23 @@ index a8aba0141ce71..63ba2ad846791 100644 bridge_state->output_bus_cfg.flags = bus_flags; bridge_state->input_bus_cfg.flags = bus_flags; imx_crtc_state->bus_flags = bus_flags; +diff --git a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c +index a5df1c8d34cde..d9231b89d73e8 100644 +--- a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c ++++ b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c +@@ -1326,7 +1326,11 @@ static int ingenic_drm_init(void) + return err; + } + +- return platform_driver_register(&ingenic_drm_driver); ++ err = platform_driver_register(&ingenic_drm_driver); ++ if (IS_ENABLED(CONFIG_DRM_INGENIC_IPU) && err) ++ platform_driver_unregister(ingenic_ipu_driver_ptr); ++ ++ return err; + } + module_init(ingenic_drm_init); + diff --git a/drivers/gpu/drm/kmb/kmb_plane.c b/drivers/gpu/drm/kmb/kmb_plane.c index 00404ba4126dd..2735b8eb35376 100644 --- a/drivers/gpu/drm/kmb/kmb_plane.c @@ -120088,7 +144973,7 @@ index 75d7f45579e26..a6a6cb5f75af7 100644 } diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c -index 4554e2de14309..41c783349321e 100644 +index 4554e2de14309..94c6bd3b00823 100644 --- a/drivers/gpu/drm/mediatek/mtk_dpi.c +++ b/drivers/gpu/drm/mediatek/mtk_dpi.c @@ -54,13 +54,7 @@ enum mtk_dpi_out_channel_swap { @@ -120136,16 +145021,43 @@ index 4554e2de14309..41c783349321e 100644 } static void mtk_dpi_dual_edge(struct mtk_dpi *dpi) -@@ -436,7 +417,6 @@ static int mtk_dpi_power_on(struct mtk_dpi *dpi) - if (dpi->pinctrl && dpi->pins_dpi) - pinctrl_select_state(dpi->pinctrl, dpi->pins_dpi); +@@ -406,9 +387,6 @@ static void mtk_dpi_power_off(struct mtk_dpi *dpi) + if (--dpi->refcount != 0) + return; + +- if (dpi->pinctrl && dpi->pins_gpio) +- pinctrl_select_state(dpi->pinctrl, dpi->pins_gpio); +- + mtk_dpi_disable(dpi); + clk_disable_unprepare(dpi->pixel_clk); + clk_disable_unprepare(dpi->engine_clk); +@@ -433,10 +411,6 @@ static int mtk_dpi_power_on(struct mtk_dpi *dpi) + goto err_pixel; + } +- if (dpi->pinctrl && dpi->pins_dpi) +- pinctrl_select_state(dpi->pinctrl, dpi->pins_dpi); +- - mtk_dpi_enable(dpi); return 0; err_pixel: -@@ -658,6 +638,7 @@ static void mtk_dpi_bridge_enable(struct drm_bridge *bridge) +@@ -650,14 +624,21 @@ static void mtk_dpi_bridge_disable(struct drm_bridge *bridge) + struct mtk_dpi *dpi = bridge_to_dpi(bridge); + mtk_dpi_power_off(dpi); ++ ++ if (dpi->pinctrl && dpi->pins_gpio) ++ pinctrl_select_state(dpi->pinctrl, dpi->pins_gpio); + } + + static void mtk_dpi_bridge_enable(struct drm_bridge *bridge) + { + struct mtk_dpi *dpi = bridge_to_dpi(bridge); + ++ if (dpi->pinctrl && dpi->pins_dpi) ++ pinctrl_select_state(dpi->pinctrl, dpi->pins_dpi); ++ mtk_dpi_power_on(dpi); mtk_dpi_set_display_mode(dpi, &dpi->mode); + mtk_dpi_enable(dpi); @@ -120693,10 +145605,24 @@ index 93b40c245f007..a6d28533f1b12 100644 static void mtk_dsi_unbind(struct device *dev, struct device *master, diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c -index 5838c44cbf6f0..3196189429bcf 100644 +index 5838c44cbf6f0..7613b0fa2be6e 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c -@@ -1224,12 +1224,14 @@ static int mtk_hdmi_bridge_mode_valid(struct drm_bridge *bridge, +@@ -1203,9 +1203,10 @@ static enum drm_connector_status mtk_hdmi_detect(struct mtk_hdmi *hdmi) + return mtk_hdmi_update_plugged_status(hdmi); + } + +-static int mtk_hdmi_bridge_mode_valid(struct drm_bridge *bridge, +- const struct drm_display_info *info, +- const struct drm_display_mode *mode) ++static enum drm_mode_status ++mtk_hdmi_bridge_mode_valid(struct drm_bridge *bridge, ++ const struct drm_display_info *info, ++ const struct drm_display_mode *mode) + { + struct mtk_hdmi *hdmi = hdmi_ctx_from_bridge(bridge); + struct drm_bridge *next_bridge; +@@ -1224,12 +1225,14 @@ static int mtk_hdmi_bridge_mode_valid(struct drm_bridge *bridge, return MODE_BAD; } @@ -121968,7 +146894,7 @@ index 8640a8a8a4691..44aa526294439 100644 priv->viu.osd1_ctrl_stat2 = readl(priv->io_base + diff --git a/drivers/gpu/drm/meson/meson_viu.c b/drivers/gpu/drm/meson/meson_viu.c -index 259f3e6bec90a..d4b907889a21d 100644 +index 259f3e6bec90a..cd399b0b71814 100644 --- a/drivers/gpu/drm/meson/meson_viu.c +++ b/drivers/gpu/drm/meson/meson_viu.c @@ -94,7 +94,7 @@ static void meson_viu_set_g12a_osd1_matrix(struct meson_drm *priv, @@ -121980,7 +146906,25 @@ index 259f3e6bec90a..d4b907889a21d 100644 priv->io_base + _REG(VPP_WRAP_OSD1_MATRIX_COEF22)); writel(((m[18] & 0xfff) << 16) | (m[19] & 0xfff), -@@ -469,17 +469,17 @@ void meson_viu_init(struct meson_drm *priv) +@@ -436,15 +436,14 @@ void meson_viu_init(struct meson_drm *priv) + + /* Initialize OSD1 fifo control register */ + reg = VIU_OSD_DDR_PRIORITY_URGENT | +- VIU_OSD_HOLD_FIFO_LINES(31) | + VIU_OSD_FIFO_DEPTH_VAL(32) | /* fifo_depth_val: 32*8=256 */ + VIU_OSD_WORDS_PER_BURST(4) | /* 4 words in 1 burst */ + VIU_OSD_FIFO_LIMITS(2); /* fifo_lim: 2*16=32 */ + + if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A)) +- reg |= VIU_OSD_BURST_LENGTH_32; ++ reg |= (VIU_OSD_BURST_LENGTH_32 | VIU_OSD_HOLD_FIFO_LINES(31)); + else +- reg |= VIU_OSD_BURST_LENGTH_64; ++ reg |= (VIU_OSD_BURST_LENGTH_64 | VIU_OSD_HOLD_FIFO_LINES(4)); + + writel_relaxed(reg, priv->io_base + _REG(VIU_OSD1_FIFO_CTRL_STAT)); + writel_relaxed(reg, priv->io_base + _REG(VIU_OSD2_FIFO_CTRL_STAT)); +@@ -469,17 +468,17 @@ void meson_viu_init(struct meson_drm *priv) priv->io_base + _REG(VD2_IF0_LUMA_FIFO_SIZE)); if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A)) { @@ -122026,10 +146970,20 @@ index fd98e8bbc5500..2c7271f545dcc 100644 WREG_GFX(8, 0x0f); diff --git a/drivers/gpu/drm/mgag200/mgag200_pll.c b/drivers/gpu/drm/mgag200/mgag200_pll.c -index e9ae22b4f8138..52be08b744ade 100644 +index e9ae22b4f8138..87f9846b9b4ff 100644 --- a/drivers/gpu/drm/mgag200/mgag200_pll.c +++ b/drivers/gpu/drm/mgag200/mgag200_pll.c -@@ -404,9 +404,9 @@ mgag200_pixpll_update_g200wb(struct mgag200_pll *pixpll, const struct mgag200_pl +@@ -268,7 +268,8 @@ static void mgag200_pixpll_update_g200se_04(struct mgag200_pll *pixpll, + pixpllcp = pixpllc->p - 1; + pixpllcs = pixpllc->s; + +- xpixpllcm = pixpllcm | ((pixpllcn & BIT(8)) >> 1); ++ // For G200SE A, BIT(7) should be set unconditionally. ++ xpixpllcm = BIT(7) | pixpllcm; + xpixpllcn = pixpllcn; + xpixpllcp = (pixpllcs << 3) | pixpllcp; + +@@ -404,9 +405,9 @@ mgag200_pixpll_update_g200wb(struct mgag200_pll *pixpll, const struct mgag200_pl udelay(50); /* program pixel pll register */ @@ -122095,7 +147049,7 @@ index 8b73f70766a47..4347a104755a9 100644 } diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c -index 267a880811d65..c0dec5b919d43 100644 +index 267a880811d65..2d07c02c59f14 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -658,19 +658,23 @@ static void a6xx_set_cp_protect(struct msm_gpu *gpu) @@ -122185,7 +147139,44 @@ index 267a880811d65..c0dec5b919d43 100644 } static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) -@@ -1866,6 +1872,7 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) +@@ -1740,7 +1746,7 @@ static u32 fuse_to_supp_hw(struct device *dev, struct adreno_rev rev, u32 fuse) + + if (val == UINT_MAX) { + DRM_DEV_ERROR(dev, +- "missing support for speed-bin: %u. Some OPPs may not be supported by hardware", ++ "missing support for speed-bin: %u. Some OPPs may not be supported by hardware\n", + fuse); + return UINT_MAX; + } +@@ -1750,7 +1756,7 @@ static u32 fuse_to_supp_hw(struct device *dev, struct adreno_rev rev, u32 fuse) + + static int a6xx_set_supported_hw(struct device *dev, struct adreno_rev rev) + { +- u32 supp_hw = UINT_MAX; ++ u32 supp_hw; + u32 speedbin; + int ret; + +@@ -1762,15 +1768,13 @@ static int a6xx_set_supported_hw(struct device *dev, struct adreno_rev rev) + if (ret == -ENOENT) { + return 0; + } else if (ret) { +- DRM_DEV_ERROR(dev, +- "failed to read speed-bin (%d). Some OPPs may not be supported by hardware", +- ret); +- goto done; ++ dev_err_probe(dev, ret, ++ "failed to read speed-bin. Some OPPs may not be supported by hardware\n"); ++ return ret; + } + + supp_hw = fuse_to_supp_hw(dev, rev, speedbin); + +-done: + ret = devm_pm_opp_set_supported_hw(dev, &supp_hw, 1); + if (ret) + return ret; +@@ -1866,6 +1870,7 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) BUG_ON(!node); ret = a6xx_gmu_init(a6xx_gpu, node); @@ -122226,6 +147217,34 @@ index 748665232d296..bba68776cb25d 100644 msm_gpu_cleanup(&adreno_gpu->base); } +diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h +index 225c277a6223e..588722e824f6f 100644 +--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h ++++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h +@@ -29,11 +29,9 @@ enum { + ADRENO_FW_MAX, + }; + +-enum adreno_quirks { +- ADRENO_QUIRK_TWO_PASS_USE_WFI = 1, +- ADRENO_QUIRK_FAULT_DETECT_MASK = 2, +- ADRENO_QUIRK_LMLOADKILL_DISABLE = 3, +-}; ++#define ADRENO_QUIRK_TWO_PASS_USE_WFI BIT(0) ++#define ADRENO_QUIRK_FAULT_DETECT_MASK BIT(1) ++#define ADRENO_QUIRK_LMLOADKILL_DISABLE BIT(2) + + struct adreno_rev { + uint8_t core; +@@ -65,7 +63,7 @@ struct adreno_info { + const char *name; + const char *fw[ADRENO_FW_MAX]; + uint32_t gmem; +- enum adreno_quirks quirks; ++ u64 quirks; + struct msm_gpu *(*init)(struct drm_device *dev); + const char *zapfw; + u32 inactive_period; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c index 768012243b440..2186fc947e5b5 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c @@ -122666,6 +147685,22 @@ index cdcaf470f1480..97ae68182f3ed 100644 return PTR_ERR(connector); } +diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_connector.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_connector.c +index 7288041dd86ad..7444b75c42157 100644 +--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_connector.c ++++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_connector.c +@@ -56,8 +56,9 @@ static int mdp4_lvds_connector_get_modes(struct drm_connector *connector) + return ret; + } + +-static int mdp4_lvds_connector_mode_valid(struct drm_connector *connector, +- struct drm_display_mode *mode) ++static enum drm_mode_status ++mdp4_lvds_connector_mode_valid(struct drm_connector *connector, ++ struct drm_display_mode *mode) + { + struct mdp4_lvds_connector *mdp4_lvds_connector = + to_mdp4_lvds_connector(connector); diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_plane.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_plane.c index 49bdabea8ed59..3e20f72d75efd 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_plane.c @@ -123030,7 +148065,7 @@ index cabe15190ec18..369e57f73a470 100644 va_start(va, fmt); diff --git a/drivers/gpu/drm/msm/dp/dp_aux.c b/drivers/gpu/drm/msm/dp/dp_aux.c -index eb40d8413bca9..6d36f63c33388 100644 +index eb40d8413bca9..7b8d4ba868eb7 100644 --- a/drivers/gpu/drm/msm/dp/dp_aux.c +++ b/drivers/gpu/drm/msm/dp/dp_aux.c @@ -33,6 +33,7 @@ struct dp_aux_private { @@ -123061,7 +148096,18 @@ index eb40d8413bca9..6d36f63c33388 100644 mutex_unlock(&aux->mutex); return ret; -@@ -431,8 +438,13 @@ void dp_aux_init(struct drm_dp_aux *dp_aux) +@@ -399,6 +406,10 @@ void dp_aux_isr(struct drm_dp_aux *dp_aux) + + isr = dp_catalog_aux_get_irq(aux->catalog); + ++ /* no interrupts pending, return immediately */ ++ if (!isr) ++ return; ++ + if (!aux->cmd_busy) + return; + +@@ -431,8 +442,13 @@ void dp_aux_init(struct drm_dp_aux *dp_aux) aux = container_of(dp_aux, struct dp_aux_private, dp_aux); @@ -123075,7 +148121,7 @@ index eb40d8413bca9..6d36f63c33388 100644 } void dp_aux_deinit(struct drm_dp_aux *dp_aux) -@@ -441,7 +453,12 @@ void dp_aux_deinit(struct drm_dp_aux *dp_aux) +@@ -441,7 +457,12 @@ void dp_aux_deinit(struct drm_dp_aux *dp_aux) aux = container_of(dp_aux, struct dp_aux_private, dp_aux); @@ -123394,7 +148440,7 @@ index 2363a2df9597b..dcc7af21a5f05 100644 + #endif /* _DP_CTRL_H_ */ diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c -index a0392e4d8134c..b141ccb527b00 100644 +index a0392e4d8134c..15e38ad7aefb4 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -81,6 +81,7 @@ struct dp_display_private { @@ -123585,6 +148631,15 @@ index a0392e4d8134c..b141ccb527b00 100644 DRM_DEBUG_DP("hpd_state=%d\n", state); mutex_unlock(&dp->event_mutex); +@@ -827,7 +840,7 @@ static int dp_display_set_mode(struct msm_dp *dp_display, + + dp = container_of(dp_display, struct dp_display_private, dp_display); + +- dp->panel->dp_mode.drm_mode = mode->drm_mode; ++ drm_mode_copy(&dp->panel->dp_mode.drm_mode, &mode->drm_mode); + dp->panel->dp_mode.bpp = mode->bpp; + dp->panel->dp_mode.capabilities = mode->capabilities; + dp_panel_init_panel_info(dp->panel); @@ -852,7 +865,7 @@ static int dp_display_enable(struct dp_display_private *dp, u32 data) return 0; } @@ -123668,7 +148723,7 @@ index a0392e4d8134c..b141ccb527b00 100644 } static irqreturn_t dp_display_irq_handler(int irq, void *dev_id) -@@ -1194,10 +1224,9 @@ int dp_display_request_irq(struct msm_dp *dp_display) +@@ -1194,13 +1224,12 @@ int dp_display_request_irq(struct msm_dp *dp_display) dp = container_of(dp_display, struct dp_display_private, dp_display); dp->irq = irq_of_parse_and_map(dp->pdev->dev.of_node, 0); @@ -123681,7 +148736,11 @@ index a0392e4d8134c..b141ccb527b00 100644 + return -EINVAL; } - rc = devm_request_irq(&dp->pdev->dev, dp->irq, +- rc = devm_request_irq(&dp->pdev->dev, dp->irq, ++ rc = devm_request_irq(dp_display->drm_dev->dev, dp->irq, + dp_display_irq_handler, + IRQF_TRIGGER_HIGH, "dp_display_isr", dp); + if (rc < 0) { @@ -1236,8 +1265,11 @@ static int dp_display_probe(struct platform_device *pdev) return -EPROBE_DEFER; } @@ -123916,7 +148975,7 @@ index 2181b60e1d1d8..62b742e701d2c 100644 if (panel->aux_cfg_update_done) { diff --git a/drivers/gpu/drm/msm/dsi/dsi.c b/drivers/gpu/drm/msm/dsi/dsi.c -index 75ae3008b68f4..122fadcf7cc1e 100644 +index 75ae3008b68f4..fb8b21837c296 100644 --- a/drivers/gpu/drm/msm/dsi/dsi.c +++ b/drivers/gpu/drm/msm/dsi/dsi.c @@ -40,7 +40,12 @@ static int dsi_get_phy(struct msm_dsi *msm_dsi) @@ -123933,7 +148992,20 @@ index 75ae3008b68f4..122fadcf7cc1e 100644 DRM_DEV_ERROR(&pdev->dev, "%s: phy driver is not ready\n", __func__); return -EPROBE_DEFER; } -@@ -215,9 +220,13 @@ int msm_dsi_modeset_init(struct msm_dsi *msm_dsi, struct drm_device *dev, +@@ -207,6 +212,12 @@ int msm_dsi_modeset_init(struct msm_dsi *msm_dsi, struct drm_device *dev, + return -EINVAL; + + priv = dev->dev_private; ++ ++ if (priv->num_bridges == ARRAY_SIZE(priv->bridges)) { ++ DRM_DEV_ERROR(dev->dev, "too many bridges\n"); ++ return -ENOSPC; ++ } ++ + msm_dsi->dev = dev; + + ret = msm_dsi_host_modeset_init(msm_dsi->host, dev); +@@ -215,9 +226,13 @@ int msm_dsi_modeset_init(struct msm_dsi *msm_dsi, struct drm_device *dev, goto fail; } @@ -124424,7 +149496,7 @@ index cb297b08458e4..8cc1ef8199ac9 100644 glbl_rescode_bot_ctrl = 0x3c; } diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c -index 737453b6e5966..23fb88b533247 100644 +index 737453b6e5966..e1a9b52d0a292 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi.c @@ -8,6 +8,8 @@ @@ -124445,20 +149517,22 @@ index 737453b6e5966..23fb88b533247 100644 /* Process DDC: */ msm_hdmi_i2c_irq(hdmi->i2c); -@@ -97,8 +99,13 @@ static int msm_hdmi_get_phy(struct hdmi *hdmi) +@@ -97,10 +99,15 @@ static int msm_hdmi_get_phy(struct hdmi *hdmi) of_node_put(phy_node); - if (!phy_pdev || !hdmi->phy) { + if (!phy_pdev) { -+ DRM_DEV_ERROR(&pdev->dev, "phy driver is not ready\n"); -+ return -EPROBE_DEFER; -+ } -+ if (!hdmi->phy) { DRM_DEV_ERROR(&pdev->dev, "phy driver is not ready\n"); -+ put_device(&phy_pdev->dev); return -EPROBE_DEFER; } ++ if (!hdmi->phy) { ++ DRM_DEV_ERROR(&pdev->dev, "phy driver is not ready\n"); ++ put_device(&phy_pdev->dev); ++ return -EPROBE_DEFER; ++ } + + hdmi->phy_dev = get_device(&phy_pdev->dev); @@ -137,6 +144,10 @@ static struct hdmi *msm_hdmi_init(struct platform_device *pdev) /* HDCP needs physical address of hdmi register */ @@ -124471,7 +149545,42 @@ index 737453b6e5966..23fb88b533247 100644 hdmi->mmio_phy_addr = res->start; hdmi->qfprom_mmio = msm_ioremap(pdev, -@@ -297,7 +308,7 @@ int msm_hdmi_modeset_init(struct hdmi *hdmi, +@@ -236,7 +247,21 @@ static struct hdmi *msm_hdmi_init(struct platform_device *pdev) + hdmi->pwr_clks[i] = clk; + } + +- pm_runtime_enable(&pdev->dev); ++ hdmi->hpd_gpiod = devm_gpiod_get_optional(&pdev->dev, "hpd", GPIOD_IN); ++ /* This will catch e.g. -EPROBE_DEFER */ ++ if (IS_ERR(hdmi->hpd_gpiod)) { ++ ret = PTR_ERR(hdmi->hpd_gpiod); ++ DRM_DEV_ERROR(&pdev->dev, "failed to get hpd gpio: (%d)\n", ret); ++ goto fail; ++ } ++ ++ if (!hdmi->hpd_gpiod) ++ DBG("failed to get HPD gpio"); ++ ++ if (hdmi->hpd_gpiod) ++ gpiod_set_consumer_name(hdmi->hpd_gpiod, "HDMI_HPD"); ++ ++ devm_pm_runtime_enable(&pdev->dev); + + hdmi->workq = alloc_ordered_workqueue("msm_hdmi", 0); + +@@ -284,6 +309,11 @@ int msm_hdmi_modeset_init(struct hdmi *hdmi, + struct platform_device *pdev = hdmi->pdev; + int ret; + ++ if (priv->num_bridges == ARRAY_SIZE(priv->bridges)) { ++ DRM_DEV_ERROR(dev->dev, "too many bridges\n"); ++ return -ENOSPC; ++ } ++ + hdmi->dev = dev; + hdmi->encoder = encoder; + +@@ -297,7 +327,7 @@ int msm_hdmi_modeset_init(struct hdmi *hdmi, goto fail; } @@ -124480,7 +149589,7 @@ index 737453b6e5966..23fb88b533247 100644 if (IS_ERR(hdmi->connector)) { ret = PTR_ERR(hdmi->connector); DRM_DEV_ERROR(dev->dev, "failed to create HDMI connector: %d\n", ret); -@@ -305,10 +316,12 @@ int msm_hdmi_modeset_init(struct hdmi *hdmi, +@@ -305,15 +335,17 @@ int msm_hdmi_modeset_init(struct hdmi *hdmi, goto fail; } @@ -124496,7 +149605,14 @@ index 737453b6e5966..23fb88b533247 100644 goto fail; } -@@ -321,7 +334,9 @@ int msm_hdmi_modeset_init(struct hdmi *hdmi, +- ret = devm_request_irq(&pdev->dev, hdmi->irq, +- msm_hdmi_irq, IRQF_TRIGGER_HIGH | IRQF_ONESHOT, ++ ret = devm_request_irq(dev->dev, hdmi->irq, ++ msm_hdmi_irq, IRQF_TRIGGER_HIGH, + "hdmi_isr", hdmi); + if (ret < 0) { + DRM_DEV_ERROR(dev->dev, "failed to request IRQ%u: %d\n", +@@ -321,7 +353,9 @@ int msm_hdmi_modeset_init(struct hdmi *hdmi, goto fail; } @@ -124507,25 +149623,128 @@ index 737453b6e5966..23fb88b533247 100644 if (ret < 0) { DRM_DEV_ERROR(&hdmi->pdev->dev, "failed to enable HPD: %d\n", ret); goto fail; +@@ -409,20 +443,6 @@ static struct hdmi_platform_config hdmi_tx_8996_config = { + .hpd_freq = hpd_clk_freq_8x74, + }; + +-static const struct { +- const char *name; +- const bool output; +- const int value; +- const char *label; +-} msm_hdmi_gpio_pdata[] = { +- { "qcom,hdmi-tx-ddc-clk", true, 1, "HDMI_DDC_CLK" }, +- { "qcom,hdmi-tx-ddc-data", true, 1, "HDMI_DDC_DATA" }, +- { "qcom,hdmi-tx-hpd", false, 1, "HDMI_HPD" }, +- { "qcom,hdmi-tx-mux-en", true, 1, "HDMI_MUX_EN" }, +- { "qcom,hdmi-tx-mux-sel", true, 0, "HDMI_MUX_SEL" }, +- { "qcom,hdmi-tx-mux-lpm", true, 1, "HDMI_MUX_LPM" }, +-}; +- + /* + * HDMI audio codec callbacks + */ +@@ -535,7 +555,7 @@ static int msm_hdmi_bind(struct device *dev, struct device *master, void *data) + struct hdmi_platform_config *hdmi_cfg; + struct hdmi *hdmi; + struct device_node *of_node = dev->of_node; +- int i, err; ++ int err; + + hdmi_cfg = (struct hdmi_platform_config *) + of_device_get_match_data(dev); +@@ -547,42 +567,6 @@ static int msm_hdmi_bind(struct device *dev, struct device *master, void *data) + hdmi_cfg->mmio_name = "core_physical"; + hdmi_cfg->qfprom_mmio_name = "qfprom_physical"; + +- for (i = 0; i < HDMI_MAX_NUM_GPIO; i++) { +- const char *name = msm_hdmi_gpio_pdata[i].name; +- struct gpio_desc *gpiod; +- +- /* +- * We are fetching the GPIO lines "as is" since the connector +- * code is enabling and disabling the lines. Until that point +- * the power-on default value will be kept. +- */ +- gpiod = devm_gpiod_get_optional(dev, name, GPIOD_ASIS); +- /* This will catch e.g. -PROBE_DEFER */ +- if (IS_ERR(gpiod)) +- return PTR_ERR(gpiod); +- if (!gpiod) { +- /* Try a second time, stripping down the name */ +- char name3[32]; +- +- /* +- * Try again after stripping out the "qcom,hdmi-tx" +- * prefix. This is mainly to match "hpd-gpios" used +- * in the upstream bindings. +- */ +- if (sscanf(name, "qcom,hdmi-tx-%s", name3)) +- gpiod = devm_gpiod_get_optional(dev, name3, GPIOD_ASIS); +- if (IS_ERR(gpiod)) +- return PTR_ERR(gpiod); +- if (!gpiod) +- DBG("failed to get gpio: %s", name); +- } +- hdmi_cfg->gpios[i].gpiod = gpiod; +- if (gpiod) +- gpiod_set_consumer_name(gpiod, msm_hdmi_gpio_pdata[i].label); +- hdmi_cfg->gpios[i].output = msm_hdmi_gpio_pdata[i].output; +- hdmi_cfg->gpios[i].value = msm_hdmi_gpio_pdata[i].value; +- } +- + dev->platform_data = hdmi_cfg; + + hdmi = msm_hdmi_init(to_platform_device(dev)); diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.h b/drivers/gpu/drm/msm/hdmi/hdmi.h -index d0b84f0abee17..8d2706bec3b99 100644 +index d0b84f0abee17..20f554312b17c 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi.h +++ b/drivers/gpu/drm/msm/hdmi/hdmi.h -@@ -114,6 +114,13 @@ struct hdmi_platform_config { - struct hdmi_gpio_data gpios[HDMI_MAX_NUM_GPIO]; - }; +@@ -19,17 +19,9 @@ + #include "msm_drv.h" + #include "hdmi.xml.h" + +-#define HDMI_MAX_NUM_GPIO 6 +- + struct hdmi_phy; + struct hdmi_platform_config; + +-struct hdmi_gpio_data { +- struct gpio_desc *gpiod; +- bool output; +- int value; +-}; +- + struct hdmi_audio { + bool enabled; + struct hdmi_audio_infoframe infoframe; +@@ -61,6 +53,8 @@ struct hdmi { + struct clk **hpd_clks; + struct clk **pwr_clks; ++ struct gpio_desc *hpd_gpiod; ++ + struct hdmi_phy *phy; + struct device *phy_dev; + +@@ -109,10 +103,14 @@ struct hdmi_platform_config { + /* clks that need to be on for screen pwr (ie pixel clk): */ + const char **pwr_clk_names; + int pwr_clk_cnt; ++}; + +- /* gpio's: */ +- struct hdmi_gpio_data gpios[HDMI_MAX_NUM_GPIO]; +struct hdmi_bridge { + struct drm_bridge base; + struct hdmi *hdmi; + struct work_struct hpd_work; -+}; + }; +#define to_hdmi_bridge(x) container_of(x, struct hdmi_bridge, base) -+ + void msm_hdmi_set_mode(struct hdmi *hdmi, bool power_on); - static inline void hdmi_write(struct hdmi *hdmi, u32 reg, u32 data) -@@ -230,13 +237,11 @@ void msm_hdmi_audio_set_sample_rate(struct hdmi *hdmi, int rate); +@@ -230,13 +228,11 @@ void msm_hdmi_audio_set_sample_rate(struct hdmi *hdmi, int rate); struct drm_bridge *msm_hdmi_bridge_init(struct hdmi *hdmi); void msm_hdmi_bridge_destroy(struct drm_bridge *bridge); @@ -125126,10 +150345,10 @@ index 58707a1f3878f..0000000000000 -} diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_hpd.c b/drivers/gpu/drm/msm/hdmi/hdmi_hpd.c new file mode 100644 -index 0000000000000..c3a236bb952ca +index 0000000000000..52ebe562ca9be --- /dev/null +++ b/drivers/gpu/drm/msm/hdmi/hdmi_hpd.c -@@ -0,0 +1,323 @@ +@@ -0,0 +1,269 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2013 Red Hat @@ -125192,48 +150411,6 @@ index 0000000000000..c3a236bb952ca + } +} + -+static int gpio_config(struct hdmi *hdmi, bool on) -+{ -+ const struct hdmi_platform_config *config = hdmi->config; -+ int i; -+ -+ if (on) { -+ for (i = 0; i < HDMI_MAX_NUM_GPIO; i++) { -+ struct hdmi_gpio_data gpio = config->gpios[i]; -+ -+ if (gpio.gpiod) { -+ if (gpio.output) { -+ gpiod_direction_output(gpio.gpiod, -+ gpio.value); -+ } else { -+ gpiod_direction_input(gpio.gpiod); -+ gpiod_set_value_cansleep(gpio.gpiod, -+ gpio.value); -+ } -+ } -+ } -+ -+ DBG("gpio on"); -+ } else { -+ for (i = 0; i < HDMI_MAX_NUM_GPIO; i++) { -+ struct hdmi_gpio_data gpio = config->gpios[i]; -+ -+ if (!gpio.gpiod) -+ continue; -+ -+ if (gpio.output) { -+ int value = gpio.value ? 0 : 1; -+ -+ gpiod_set_value_cansleep(gpio.gpiod, value); -+ } -+ } -+ -+ DBG("gpio off"); -+ } -+ -+ return 0; -+} -+ +static void enable_hpd_clocks(struct hdmi *hdmi, bool enable) +{ + const struct hdmi_platform_config *config = hdmi->config; @@ -125289,11 +150466,8 @@ index 0000000000000..c3a236bb952ca + goto fail; + } + -+ ret = gpio_config(hdmi, true); -+ if (ret) { -+ DRM_DEV_ERROR(dev, "failed to configure GPIOs: %d\n", ret); -+ goto fail; -+ } ++ if (hdmi->hpd_gpiod) ++ gpiod_set_value_cansleep(hdmi->hpd_gpiod, 1); + + pm_runtime_get_sync(dev); + enable_hpd_clocks(hdmi, true); @@ -125342,10 +150516,6 @@ index 0000000000000..c3a236bb952ca + enable_hpd_clocks(hdmi, false); + pm_runtime_put_autosuspend(dev); + -+ ret = gpio_config(hdmi, false); -+ if (ret) -+ dev_warn(dev, "failed to unconfigure GPIOs: %d\n", ret); -+ + ret = pinctrl_pm_select_sleep_state(dev); + if (ret) + dev_warn(dev, "pinctrl state chg failed: %d\n", ret); @@ -125407,10 +150577,7 @@ index 0000000000000..c3a236bb952ca +#define HPD_GPIO_INDEX 2 +static enum drm_connector_status detect_gpio(struct hdmi *hdmi) +{ -+ const struct hdmi_platform_config *config = hdmi->config; -+ struct hdmi_gpio_data hpd_gpio = config->gpios[HPD_GPIO_INDEX]; -+ -+ return gpiod_get_value(hpd_gpio.gpiod) ? ++ return gpiod_get_value(hdmi->hpd_gpiod) ? + connector_status_connected : + connector_status_disconnected; +} @@ -125420,8 +150587,6 @@ index 0000000000000..c3a236bb952ca +{ + struct hdmi_bridge *hdmi_bridge = to_hdmi_bridge(bridge); + struct hdmi *hdmi = hdmi_bridge->hdmi; -+ const struct hdmi_platform_config *config = hdmi->config; -+ struct hdmi_gpio_data hpd_gpio = config->gpios[HPD_GPIO_INDEX]; + enum drm_connector_status stat_gpio, stat_reg; + int retry = 20; + @@ -125429,7 +150594,7 @@ index 0000000000000..c3a236bb952ca + * some platforms may not have hpd gpio. Rely only on the status + * provided by REG_HDMI_HPD_INT_STATUS in this case. + */ -+ if (!hpd_gpio.gpiod) ++ if (!hdmi->hpd_gpiod) + return detect_reg(hdmi); + + do { @@ -126830,6 +151995,44 @@ index 57199be082fd3..c2b5cc5f97eda 100644 } else { cstate = &pstate->base; } +diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c +index 634f64f88fc8b..81a1ad2c88a7e 100644 +--- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c ++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c +@@ -65,10 +65,33 @@ tu102_devinit_pll_set(struct nvkm_devinit *init, u32 type, u32 freq) + return ret; + } + ++static int ++tu102_devinit_wait(struct nvkm_device *device) ++{ ++ unsigned timeout = 50 + 2000; ++ ++ do { ++ if (nvkm_rd32(device, 0x118128) & 0x00000001) { ++ if ((nvkm_rd32(device, 0x118234) & 0x000000ff) == 0xff) ++ return 0; ++ } ++ ++ usleep_range(1000, 2000); ++ } while (timeout--); ++ ++ return -ETIMEDOUT; ++} ++ + int + tu102_devinit_post(struct nvkm_devinit *base, bool post) + { + struct nv50_devinit *init = nv50_devinit(base); ++ int ret; ++ ++ ret = tu102_devinit_wait(init->base.subdev.device); ++ if (ret) ++ return ret; ++ + gm200_devinit_preos(init, post); + return 0; + } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c index 24382875fb4f3..455e95a89259f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c @@ -127316,7 +152519,7 @@ index b937e24dac8e0..25829a0a8e801 100644 static int sharp_nt_panel_remove(struct mipi_dsi_device *dsi) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c -index 9b6c4e6c38a1b..1a9685eb80026 100644 +index 9b6c4e6c38a1b..fb785f5a106ac 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -721,6 +721,7 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc, @@ -127362,6 +152565,57 @@ index 9b6c4e6c38a1b..1a9685eb80026 100644 .connector_type = DRM_MODE_CONNECTOR_LVDS, }; +@@ -3088,6 +3090,7 @@ static const struct display_timing logictechno_lt161010_2nh_timing = { + static const struct panel_desc logictechno_lt161010_2nh = { + .timings = &logictechno_lt161010_2nh_timing, + .num_timings = 1, ++ .bpc = 6, + .size = { + .width = 154, + .height = 86, +@@ -3117,6 +3120,7 @@ static const struct display_timing logictechno_lt170410_2whc_timing = { + static const struct panel_desc logictechno_lt170410_2whc = { + .timings = &logictechno_lt170410_2whc_timing, + .num_timings = 1, ++ .bpc = 8, + .size = { + .width = 217, + .height = 136, +diff --git a/drivers/gpu/drm/panel/panel-sitronix-st7701.c b/drivers/gpu/drm/panel/panel-sitronix-st7701.c +index 320a2a8fd4592..098955526b687 100644 +--- a/drivers/gpu/drm/panel/panel-sitronix-st7701.c ++++ b/drivers/gpu/drm/panel/panel-sitronix-st7701.c +@@ -384,7 +384,15 @@ static int st7701_dsi_probe(struct mipi_dsi_device *dsi) + st7701->dsi = dsi; + st7701->desc = desc; + +- return mipi_dsi_attach(dsi); ++ ret = mipi_dsi_attach(dsi); ++ if (ret) ++ goto err_attach; ++ ++ return 0; ++ ++err_attach: ++ drm_panel_remove(&st7701->panel); ++ return ret; + } + + static int st7701_dsi_remove(struct mipi_dsi_device *dsi) +diff --git a/drivers/gpu/drm/panfrost/Kconfig b/drivers/gpu/drm/panfrost/Kconfig +index 86cdc0ce79e65..77f4d32e52045 100644 +--- a/drivers/gpu/drm/panfrost/Kconfig ++++ b/drivers/gpu/drm/panfrost/Kconfig +@@ -3,7 +3,8 @@ + config DRM_PANFROST + tristate "Panfrost (DRM support for ARM Mali Midgard/Bifrost GPUs)" + depends on DRM +- depends on ARM || ARM64 || (COMPILE_TEST && !GENERIC_ATOMIC64) ++ depends on ARM || ARM64 || COMPILE_TEST ++ depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE + depends on MMU + select DRM_SCHED + select IOMMU_SUPPORT diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.c b/drivers/gpu/drm/panfrost/panfrost_devfreq.c index 194af7f607a6e..be36dd060a2b4 100644 --- a/drivers/gpu/drm/panfrost/panfrost_devfreq.c @@ -127385,10 +152639,57 @@ index 194af7f607a6e..be36dd060a2b4 100644 /* diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c -index 1ffaef5ec5ff5..e48e357ea4f18 100644 +index 1ffaef5ec5ff5..4c271244092b4 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c -@@ -418,12 +418,12 @@ static int panfrost_ioctl_madvise(struct drm_device *dev, void *data, +@@ -82,6 +82,7 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data, + struct panfrost_gem_object *bo; + struct drm_panfrost_create_bo *args = data; + struct panfrost_gem_mapping *mapping; ++ int ret; + + if (!args->size || args->pad || + (args->flags & ~(PANFROST_BO_NOEXEC | PANFROST_BO_HEAP))) +@@ -92,21 +93,29 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data, + !(args->flags & PANFROST_BO_NOEXEC)) + return -EINVAL; + +- bo = panfrost_gem_create_with_handle(file, dev, args->size, args->flags, +- &args->handle); ++ bo = panfrost_gem_create(dev, args->size, args->flags); + if (IS_ERR(bo)) + return PTR_ERR(bo); + ++ ret = drm_gem_handle_create(file, &bo->base.base, &args->handle); ++ if (ret) ++ goto out; ++ + mapping = panfrost_gem_mapping_get(bo, priv); +- if (!mapping) { +- drm_gem_object_put(&bo->base.base); +- return -EINVAL; ++ if (mapping) { ++ args->offset = mapping->mmnode.start << PAGE_SHIFT; ++ panfrost_gem_mapping_put(mapping); ++ } else { ++ /* This can only happen if the handle from ++ * drm_gem_handle_create() has already been guessed and freed ++ * by user space ++ */ ++ ret = -EINVAL; + } + +- args->offset = mapping->mmnode.start << PAGE_SHIFT; +- panfrost_gem_mapping_put(mapping); +- +- return 0; ++out: ++ drm_gem_object_put(&bo->base.base); ++ return ret; + } + + /** +@@ -418,12 +427,12 @@ static int panfrost_ioctl_madvise(struct drm_device *dev, void *data, } } @@ -127405,7 +152706,7 @@ index 1ffaef5ec5ff5..e48e357ea4f18 100644 list_del_init(&bo->base.madv_list); } diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c -index 23377481f4e31..6d9bdb9180cb7 100644 +index 23377481f4e31..55e3a68ed28a4 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c @@ -49,7 +49,7 @@ static void panfrost_gem_free_object(struct drm_gem_object *obj) @@ -127451,6 +152752,53 @@ index 23377481f4e31..6d9bdb9180cb7 100644 }; /** +@@ -232,12 +234,8 @@ struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t + } + + struct panfrost_gem_object * +-panfrost_gem_create_with_handle(struct drm_file *file_priv, +- struct drm_device *dev, size_t size, +- u32 flags, +- uint32_t *handle) ++panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags) + { +- int ret; + struct drm_gem_shmem_object *shmem; + struct panfrost_gem_object *bo; + +@@ -253,16 +251,6 @@ panfrost_gem_create_with_handle(struct drm_file *file_priv, + bo->noexec = !!(flags & PANFROST_BO_NOEXEC); + bo->is_heap = !!(flags & PANFROST_BO_HEAP); + +- /* +- * Allocate an id of idr table where the obj is registered +- * and handle has the id what user can see. +- */ +- ret = drm_gem_handle_create(file_priv, &shmem->base, handle); +- /* drop reference from allocate - handle holds it now. */ +- drm_gem_object_put(&shmem->base); +- if (ret) +- return ERR_PTR(ret); +- + return bo; + } + +diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h +index 8088d5fd8480e..ad2877eeeccdf 100644 +--- a/drivers/gpu/drm/panfrost/panfrost_gem.h ++++ b/drivers/gpu/drm/panfrost/panfrost_gem.h +@@ -69,10 +69,7 @@ panfrost_gem_prime_import_sg_table(struct drm_device *dev, + struct sg_table *sgt); + + struct panfrost_gem_object * +-panfrost_gem_create_with_handle(struct drm_file *file_priv, +- struct drm_device *dev, size_t size, +- u32 flags, +- uint32_t *handle); ++panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags); + + int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_priv); + void panfrost_gem_close(struct drm_gem_object *obj, diff --git a/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c index 1b9f68d8e9aa6..b0142341e2235 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c @@ -127626,6 +152974,73 @@ index 769f666335ac4..672d2239293e0 100644 break; default: break; +diff --git a/drivers/gpu/drm/radeon/radeon_bios.c b/drivers/gpu/drm/radeon/radeon_bios.c +index 33121655d50bb..63bdc9f6fc243 100644 +--- a/drivers/gpu/drm/radeon/radeon_bios.c ++++ b/drivers/gpu/drm/radeon/radeon_bios.c +@@ -227,6 +227,7 @@ static bool radeon_atrm_get_bios(struct radeon_device *rdev) + + if (!found) + return false; ++ pci_dev_put(pdev); + + rdev->bios = kmalloc(size, GFP_KERNEL); + if (!rdev->bios) { +@@ -612,13 +613,14 @@ static bool radeon_acpi_vfct_bios(struct radeon_device *rdev) + acpi_size tbl_size; + UEFI_ACPI_VFCT *vfct; + unsigned offset; ++ bool r = false; + + if (!ACPI_SUCCESS(acpi_get_table("VFCT", 1, &hdr))) + return false; + tbl_size = hdr->length; + if (tbl_size < sizeof(UEFI_ACPI_VFCT)) { + DRM_ERROR("ACPI VFCT table present but broken (too short #1)\n"); +- return false; ++ goto out; + } + + vfct = (UEFI_ACPI_VFCT *)hdr; +@@ -631,13 +633,13 @@ static bool radeon_acpi_vfct_bios(struct radeon_device *rdev) + offset += sizeof(VFCT_IMAGE_HEADER); + if (offset > tbl_size) { + DRM_ERROR("ACPI VFCT image header truncated\n"); +- return false; ++ goto out; + } + + offset += vhdr->ImageLength; + if (offset > tbl_size) { + DRM_ERROR("ACPI VFCT image truncated\n"); +- return false; ++ goto out; + } + + if (vhdr->ImageLength && +@@ -649,15 +651,18 @@ static bool radeon_acpi_vfct_bios(struct radeon_device *rdev) + rdev->bios = kmemdup(&vbios->VbiosContent, + vhdr->ImageLength, + GFP_KERNEL); ++ if (rdev->bios) ++ r = true; + +- if (!rdev->bios) +- return false; +- return true; ++ goto out; + } + } + + DRM_ERROR("ACPI VFCT table present but broken (too short #2)\n"); +- return false; ++ ++out: ++ acpi_put_table(hdr); ++ return r; + } + #else + static inline bool radeon_acpi_vfct_bios(struct radeon_device *rdev) diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 607ad5620bd99..d157bb9072e86 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c @@ -127845,7 +153260,7 @@ index ade2327a10e2c..512581698a1e0 100644 static int rockchip_dp_remove(struct platform_device *pdev) diff --git a/drivers/gpu/drm/rockchip/cdn-dp-core.c b/drivers/gpu/drm/rockchip/cdn-dp-core.c -index 13c6b857158fc..6b5d0722afa6c 100644 +index 13c6b857158fc..20e63cadec8c7 100644 --- a/drivers/gpu/drm/rockchip/cdn-dp-core.c +++ b/drivers/gpu/drm/rockchip/cdn-dp-core.c @@ -277,8 +277,9 @@ static int cdn_dp_connector_get_modes(struct drm_connector *connector) @@ -127860,8 +153275,17 @@ index 13c6b857158fc..6b5d0722afa6c 100644 { struct cdn_dp_device *dp = connector_to_dp(connector); struct drm_display_info *display_info = &dp->connector.display_info; +@@ -564,7 +565,7 @@ static void cdn_dp_encoder_mode_set(struct drm_encoder *encoder, + video->v_sync_polarity = !!(mode->flags & DRM_MODE_FLAG_NVSYNC); + video->h_sync_polarity = !!(mode->flags & DRM_MODE_FLAG_NHSYNC); + +- memcpy(&dp->mode, adjusted, sizeof(*mode)); ++ drm_mode_copy(&dp->mode, adjusted); + } + + static bool cdn_dp_check_link_status(struct cdn_dp_device *dp) diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c -index a2262bee5aa47..59c3d8ef6bf9a 100644 +index a2262bee5aa47..1129f98fe7f94 100644 --- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c @@ -268,6 +268,8 @@ struct dw_mipi_dsi_rockchip { @@ -127976,7 +153400,43 @@ index a2262bee5aa47..59c3d8ef6bf9a 100644 } static const struct component_ops dw_mipi_dsi_rockchip_ops = { -@@ -1276,6 +1285,36 @@ static const struct phy_ops dw_mipi_dsi_dphy_ops = { +@@ -1018,23 +1027,31 @@ static int dw_mipi_dsi_rockchip_host_attach(void *priv_data, + if (ret) { + DRM_DEV_ERROR(dsi->dev, "Failed to register component: %d\n", + ret); +- return ret; ++ goto out; + } + + second = dw_mipi_dsi_rockchip_find_second(dsi); +- if (IS_ERR(second)) +- return PTR_ERR(second); ++ if (IS_ERR(second)) { ++ ret = PTR_ERR(second); ++ goto out; ++ } + if (second) { + ret = component_add(second, &dw_mipi_dsi_rockchip_ops); + if (ret) { + DRM_DEV_ERROR(second, + "Failed to register component: %d\n", + ret); +- return ret; ++ goto out; + } + } + + return 0; ++ ++out: ++ mutex_lock(&dsi->usage_mutex); ++ dsi->usage_mode = DW_DSI_USAGE_IDLE; ++ mutex_unlock(&dsi->usage_mutex); ++ return ret; + } + + static int dw_mipi_dsi_rockchip_host_detach(void *priv_data, +@@ -1276,6 +1293,36 @@ static const struct phy_ops dw_mipi_dsi_dphy_ops = { .exit = dw_mipi_dsi_dphy_exit, }; @@ -128013,7 +153473,7 @@ index a2262bee5aa47..59c3d8ef6bf9a 100644 static int dw_mipi_dsi_rockchip_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; -@@ -1397,14 +1436,10 @@ static int dw_mipi_dsi_rockchip_probe(struct platform_device *pdev) +@@ -1397,14 +1444,10 @@ static int dw_mipi_dsi_rockchip_probe(struct platform_device *pdev) if (ret != -EPROBE_DEFER) DRM_DEV_ERROR(dev, "Failed to probe dw_mipi_dsi: %d\n", ret); @@ -128029,12 +153489,18 @@ index a2262bee5aa47..59c3d8ef6bf9a 100644 } static int dw_mipi_dsi_rockchip_remove(struct platform_device *pdev) -@@ -1593,6 +1628,7 @@ struct platform_driver dw_mipi_dsi_rockchip_driver = { +@@ -1593,6 +1636,13 @@ struct platform_driver dw_mipi_dsi_rockchip_driver = { .remove = dw_mipi_dsi_rockchip_remove, .driver = { .of_match_table = dw_mipi_dsi_rockchip_dt_ids, + .pm = &dw_mipi_dsi_rockchip_pm_ops, .name = "dw-mipi-dsi-rockchip", ++ /* ++ * For dual-DSI display, one DSI pokes at the other DSI's ++ * drvdata in dw_mipi_dsi_rockchip_find_second(). This is not ++ * safe for asynchronous probe. ++ */ ++ .probe_type = PROBE_FORCE_SYNCHRONOUS, }, }; diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c @@ -128069,6 +153535,32 @@ index 830bdd5e9b7ce..8677c82716784 100644 drm_encoder_helper_add(encoder, &dw_hdmi_rockchip_encoder_helper_funcs); drm_simple_encoder_init(drm, encoder, DRM_MODE_ENCODER_TMDS); +diff --git a/drivers/gpu/drm/rockchip/inno_hdmi.c b/drivers/gpu/drm/rockchip/inno_hdmi.c +index 7afdc54eb3ec1..78120da5e63aa 100644 +--- a/drivers/gpu/drm/rockchip/inno_hdmi.c ++++ b/drivers/gpu/drm/rockchip/inno_hdmi.c +@@ -488,7 +488,7 @@ static void inno_hdmi_encoder_mode_set(struct drm_encoder *encoder, + inno_hdmi_setup(hdmi, adj_mode); + + /* Store the display mode for plugin/DPMS poweron events */ +- memcpy(&hdmi->previous_mode, adj_mode, sizeof(hdmi->previous_mode)); ++ drm_mode_copy(&hdmi->previous_mode, adj_mode); + } + + static void inno_hdmi_encoder_enable(struct drm_encoder *encoder) +diff --git a/drivers/gpu/drm/rockchip/rk3066_hdmi.c b/drivers/gpu/drm/rockchip/rk3066_hdmi.c +index 1c546c3a89984..17e7c40a9e7b9 100644 +--- a/drivers/gpu/drm/rockchip/rk3066_hdmi.c ++++ b/drivers/gpu/drm/rockchip/rk3066_hdmi.c +@@ -383,7 +383,7 @@ rk3066_hdmi_encoder_mode_set(struct drm_encoder *encoder, + struct rk3066_hdmi *hdmi = to_rk3066_hdmi(encoder); + + /* Store the display mode for plugin/DPMS poweron events. */ +- memcpy(&hdmi->previous_mode, adj_mode, sizeof(hdmi->previous_mode)); ++ drm_mode_copy(&hdmi->previous_mode, adj_mode); + } + + static void rk3066_hdmi_encoder_enable(struct drm_encoder *encoder) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c index a25b98b7f5bd7..d5b74ea06a451 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c @@ -128095,6 +153587,42 @@ index a25b98b7f5bd7..d5b74ea06a451 100644 res = platform_get_resource(pdev, IORESOURCE_MEM, 1); if (res) { +diff --git a/drivers/gpu/drm/rockchip/rockchip_lvds.c b/drivers/gpu/drm/rockchip/rockchip_lvds.c +index 551653940e396..2550429df49fe 100644 +--- a/drivers/gpu/drm/rockchip/rockchip_lvds.c ++++ b/drivers/gpu/drm/rockchip/rockchip_lvds.c +@@ -145,7 +145,7 @@ static int rk3288_lvds_poweron(struct rockchip_lvds *lvds) + DRM_DEV_ERROR(lvds->dev, "failed to enable lvds pclk %d\n", ret); + return ret; + } +- ret = pm_runtime_get_sync(lvds->dev); ++ ret = pm_runtime_resume_and_get(lvds->dev); + if (ret < 0) { + DRM_DEV_ERROR(lvds->dev, "failed to get pm runtime: %d\n", ret); + clk_disable(lvds->pclk); +@@ -329,16 +329,20 @@ static int px30_lvds_poweron(struct rockchip_lvds *lvds) + { + int ret; + +- ret = pm_runtime_get_sync(lvds->dev); ++ ret = pm_runtime_resume_and_get(lvds->dev); + if (ret < 0) { + DRM_DEV_ERROR(lvds->dev, "failed to get pm runtime: %d\n", ret); + return ret; + } + + /* Enable LVDS mode */ +- return regmap_update_bits(lvds->grf, PX30_LVDS_GRF_PD_VO_CON1, ++ ret = regmap_update_bits(lvds->grf, PX30_LVDS_GRF_PD_VO_CON1, + PX30_LVDS_MODE_EN(1) | PX30_LVDS_P2S_EN(1), + PX30_LVDS_MODE_EN(1) | PX30_LVDS_P2S_EN(1)); ++ if (ret) ++ pm_runtime_put(lvds->dev); ++ ++ return ret; + } + + static void px30_lvds_poweroff(struct rockchip_lvds *lvds) diff --git a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c index ca7cc82125cbc..8c873fcd0e99f 100644 --- a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c @@ -128146,6 +153674,81 @@ index 6b4759ed6bfd4..c491429f1a029 100644 drm_dp_encode_sideband_req(in, txmsg); ret = drm_dp_decode_sideband_req(txmsg, out); +diff --git a/drivers/gpu/drm/sti/sti_dvo.c b/drivers/gpu/drm/sti/sti_dvo.c +index b6ee8a82e656c..577c477b5f467 100644 +--- a/drivers/gpu/drm/sti/sti_dvo.c ++++ b/drivers/gpu/drm/sti/sti_dvo.c +@@ -288,7 +288,7 @@ static void sti_dvo_set_mode(struct drm_bridge *bridge, + + DRM_DEBUG_DRIVER("\n"); + +- memcpy(&dvo->mode, mode, sizeof(struct drm_display_mode)); ++ drm_mode_copy(&dvo->mode, mode); + + /* According to the path used (main or aux), the dvo clocks should + * have a different parent clock. */ +@@ -346,8 +346,9 @@ static int sti_dvo_connector_get_modes(struct drm_connector *connector) + + #define CLK_TOLERANCE_HZ 50 + +-static int sti_dvo_connector_mode_valid(struct drm_connector *connector, +- struct drm_display_mode *mode) ++static enum drm_mode_status ++sti_dvo_connector_mode_valid(struct drm_connector *connector, ++ struct drm_display_mode *mode) + { + int target = mode->clock * 1000; + int target_min = target - CLK_TOLERANCE_HZ; +diff --git a/drivers/gpu/drm/sti/sti_hda.c b/drivers/gpu/drm/sti/sti_hda.c +index 03f3377f918c0..aa54a6400ab84 100644 +--- a/drivers/gpu/drm/sti/sti_hda.c ++++ b/drivers/gpu/drm/sti/sti_hda.c +@@ -523,7 +523,7 @@ static void sti_hda_set_mode(struct drm_bridge *bridge, + + DRM_DEBUG_DRIVER("\n"); + +- memcpy(&hda->mode, mode, sizeof(struct drm_display_mode)); ++ drm_mode_copy(&hda->mode, mode); + + if (!hda_get_mode_idx(hda->mode, &mode_idx)) { + DRM_ERROR("Undefined mode\n"); +@@ -600,8 +600,9 @@ static int sti_hda_connector_get_modes(struct drm_connector *connector) + + #define CLK_TOLERANCE_HZ 50 + +-static int sti_hda_connector_mode_valid(struct drm_connector *connector, +- struct drm_display_mode *mode) ++static enum drm_mode_status ++sti_hda_connector_mode_valid(struct drm_connector *connector, ++ struct drm_display_mode *mode) + { + int target = mode->clock * 1000; + int target_min = target - CLK_TOLERANCE_HZ; +diff --git a/drivers/gpu/drm/sti/sti_hdmi.c b/drivers/gpu/drm/sti/sti_hdmi.c +index f3ace11209dd7..36bea1551ef84 100644 +--- a/drivers/gpu/drm/sti/sti_hdmi.c ++++ b/drivers/gpu/drm/sti/sti_hdmi.c +@@ -940,7 +940,7 @@ static void sti_hdmi_set_mode(struct drm_bridge *bridge, + DRM_DEBUG_DRIVER("\n"); + + /* Copy the drm display mode in the connector local structure */ +- memcpy(&hdmi->mode, mode, sizeof(struct drm_display_mode)); ++ drm_mode_copy(&hdmi->mode, mode); + + /* Update clock framerate according to the selected mode */ + ret = clk_set_rate(hdmi->clk_pix, mode->clock * 1000); +@@ -1003,8 +1003,9 @@ fail: + + #define CLK_TOLERANCE_HZ 50 + +-static int sti_hdmi_connector_mode_valid(struct drm_connector *connector, +- struct drm_display_mode *mode) ++static enum drm_mode_status ++sti_hdmi_connector_mode_valid(struct drm_connector *connector, ++ struct drm_display_mode *mode) + { + int target = mode->clock * 1000; + int target_min = target - CLK_TOLERANCE_HZ; diff --git a/drivers/gpu/drm/stm/ltdc.c b/drivers/gpu/drm/stm/ltdc.c index 195de30eb90c7..9d235b60b4286 100644 --- a/drivers/gpu/drm/stm/ltdc.c @@ -128351,8 +153954,24 @@ index 145833a9d82d4..5b3fbee186713 100644 /* format 20 is packed YVU444 10-bit */ /* format 21 is packed YUV444 10-bit */ +diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c +index a29d64f875635..abb409b08bc64 100644 +--- a/drivers/gpu/drm/tegra/dc.c ++++ b/drivers/gpu/drm/tegra/dc.c +@@ -3022,8 +3022,10 @@ static int tegra_dc_probe(struct platform_device *pdev) + usleep_range(2000, 4000); + + err = reset_control_assert(dc->rst); +- if (err < 0) ++ if (err < 0) { ++ clk_disable_unprepare(dc->clk); + return err; ++ } + + usleep_range(2000, 4000); + diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c -index 8d37d6b00562a..611cd8dad46ed 100644 +index 8d37d6b00562a..4f5affdc60807 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -21,6 +21,10 @@ @@ -128384,6 +154003,17 @@ index 8d37d6b00562a..611cd8dad46ed 100644 /* * If the host1x client is already attached to an IOMMU domain that is * not the shared IOMMU domain, don't try to attach it to a different +@@ -1068,6 +1083,10 @@ static bool host1x_drm_wants_iommu(struct host1x_device *dev) + struct host1x *host1x = dev_get_drvdata(dev->dev.parent); + struct iommu_domain *domain; + ++ /* Our IOMMU usage policy doesn't currently play well with GART */ ++ if (of_machine_is_compatible("nvidia,tegra20")) ++ return false; ++ + /* + * If the Tegra DRM clients are backed by an IOMMU, push buffers are + * likely to be allocated beyond the 32-bit boundary if sufficient diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c index f46d377f0c304..de1333dc0d867 100644 --- a/drivers/gpu/drm/tegra/dsi.c @@ -129165,7 +154795,7 @@ index 18f5009ce90e3..3b8576f193214 100644 crtc_funcs, NULL); drm_crtc_helper_add(crtc, crtc_helper_funcs); diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c -index f6c16c5aee683..d216a1fd057c1 100644 +index f6c16c5aee683..099df15e1a61c 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -214,6 +214,15 @@ static void vc4_match_add_drivers(struct device *dev, @@ -129201,7 +154831,21 @@ index f6c16c5aee683..d216a1fd057c1 100644 vc4 = devm_drm_dev_alloc(dev, &vc4_drm_driver, struct vc4_dev, base); if (IS_ERR(vc4)) return PTR_ERR(vc4); -@@ -378,6 +397,7 @@ module_init(vc4_drm_register); +@@ -364,7 +383,12 @@ static int __init vc4_drm_register(void) + if (ret) + return ret; + +- return platform_driver_register(&vc4_platform_driver); ++ ret = platform_driver_register(&vc4_platform_driver); ++ if (ret) ++ platform_unregister_drivers(component_drivers, ++ ARRAY_SIZE(component_drivers)); ++ ++ return ret; + } + + static void __exit vc4_drm_unregister(void) +@@ -378,6 +402,7 @@ module_init(vc4_drm_register); module_exit(vc4_drm_unregister); MODULE_ALIAS("platform:vc4-drm"); @@ -129641,7 +155285,7 @@ index a185027911ce5..b7b2c76770dc6 100644 } diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c -index ed8a4b7f8b6e2..9b3e642a08e1a 100644 +index ed8a4b7f8b6e2..665f772f9ffc4 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -38,6 +38,7 @@ @@ -129925,7 +155569,17 @@ index ed8a4b7f8b6e2..9b3e642a08e1a 100644 static irqreturn_t vc4_hdmi_hpd_irq_thread(int irq, void *priv) { struct vc4_hdmi *vc4_hdmi = priv; -@@ -2098,6 +2122,27 @@ static int vc5_hdmi_init_resources(struct vc4_hdmi *vc4_hdmi) +@@ -1830,7 +1854,8 @@ static int vc4_hdmi_cec_init(struct vc4_hdmi *vc4_hdmi) + } + + vc4_hdmi->cec_adap = cec_allocate_adapter(&vc4_hdmi_cec_adap_ops, +- vc4_hdmi, "vc4", ++ vc4_hdmi, ++ vc4_hdmi->variant->card_name, + CEC_CAP_DEFAULTS | + CEC_CAP_CONNECTOR_INFO, 1); + ret = PTR_ERR_OR_ZERO(vc4_hdmi->cec_adap); +@@ -2098,6 +2123,27 @@ static int vc5_hdmi_init_resources(struct vc4_hdmi *vc4_hdmi) return 0; } @@ -129953,7 +155607,7 @@ index ed8a4b7f8b6e2..9b3e642a08e1a 100644 static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) { const struct vc4_hdmi_variant *variant = of_device_get_match_data(dev); -@@ -2161,6 +2206,29 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) +@@ -2161,6 +2207,29 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) vc4_hdmi->disable_4kp60 = true; } @@ -129983,7 +155637,7 @@ index ed8a4b7f8b6e2..9b3e642a08e1a 100644 if (vc4_hdmi->variant->reset) vc4_hdmi->variant->reset(vc4_hdmi); -@@ -2172,8 +2240,6 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) +@@ -2172,8 +2241,6 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) clk_prepare_enable(vc4_hdmi->pixel_bvb_clock); } @@ -129992,7 +155646,7 @@ index ed8a4b7f8b6e2..9b3e642a08e1a 100644 drm_simple_encoder_init(drm, encoder, DRM_MODE_ENCODER_TMDS); drm_encoder_helper_add(encoder, &vc4_hdmi_encoder_helper_funcs); -@@ -2197,6 +2263,8 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) +@@ -2197,6 +2264,8 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) vc4_hdmi_debugfs_regs, vc4_hdmi); @@ -130001,7 +155655,7 @@ index ed8a4b7f8b6e2..9b3e642a08e1a 100644 return 0; err_free_cec: -@@ -2207,6 +2275,8 @@ err_destroy_conn: +@@ -2207,6 +2276,8 @@ err_destroy_conn: vc4_hdmi_connector_destroy(&vc4_hdmi->connector); err_destroy_encoder: drm_encoder_cleanup(encoder); @@ -130010,7 +155664,7 @@ index ed8a4b7f8b6e2..9b3e642a08e1a 100644 pm_runtime_disable(dev); err_put_ddc: put_device(&vc4_hdmi->ddc->dev); -@@ -2243,6 +2313,7 @@ static void vc4_hdmi_unbind(struct device *dev, struct device *master, +@@ -2243,6 +2314,7 @@ static void vc4_hdmi_unbind(struct device *dev, struct device *master, kfree(vc4_hdmi->hdmi_regset.regs); kfree(vc4_hdmi->hd_regset.regs); @@ -130018,7 +155672,7 @@ index ed8a4b7f8b6e2..9b3e642a08e1a 100644 vc4_hdmi_cec_exit(vc4_hdmi); vc4_hdmi_hotplug_exit(vc4_hdmi); vc4_hdmi_connector_destroy(&vc4_hdmi->connector); -@@ -2352,11 +2423,18 @@ static const struct of_device_id vc4_hdmi_dt_match[] = { +@@ -2352,11 +2424,18 @@ static const struct of_device_id vc4_hdmi_dt_match[] = { {} }; @@ -130222,9 +155876,31 @@ index c239045e05d6f..9d88bfb50c9b0 100644 HVS_WRITE(SCALER_DISPCTRL, dispctrl); diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c -index f0b3e4cf5bceb..6030d4a821555 100644 +index f0b3e4cf5bceb..1bb8bcc45d71d 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c +@@ -193,8 +193,8 @@ vc4_hvs_get_new_global_state(struct drm_atomic_state *state) + struct drm_private_state *priv_state; + + priv_state = drm_atomic_get_new_private_obj_state(state, &vc4->hvs_channels); +- if (IS_ERR(priv_state)) +- return ERR_CAST(priv_state); ++ if (!priv_state) ++ return ERR_PTR(-EINVAL); + + return to_vc4_hvs_state(priv_state); + } +@@ -206,8 +206,8 @@ vc4_hvs_get_old_global_state(struct drm_atomic_state *state) + struct drm_private_state *priv_state; + + priv_state = drm_atomic_get_old_private_obj_state(state, &vc4->hvs_channels); +- if (IS_ERR(priv_state)) +- return ERR_CAST(priv_state); ++ if (!priv_state) ++ return ERR_PTR(-EINVAL); + + return to_vc4_hvs_state(priv_state); + } @@ -233,6 +233,7 @@ static void vc4_hvs_pv_muxing_commit(struct vc4_dev *vc4, unsigned int i; @@ -130541,10 +156217,30 @@ index 2de61b63ef91d..48d3c9955f0dd 100644 drm_gem_object_put(objs->objs[i]); virtio_gpu_array_free(objs); diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c -index 5c1ad1596889b..15c3e63db396d 100644 +index 5c1ad1596889b..3c750ba6ba1fe 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c -@@ -512,8 +512,10 @@ static int virtio_gpu_get_caps_ioctl(struct drm_device *dev, +@@ -292,10 +292,18 @@ static int virtio_gpu_resource_create_ioctl(struct drm_device *dev, void *data, + drm_gem_object_release(obj); + return ret; + } +- drm_gem_object_put(obj); + + rc->res_handle = qobj->hw_res_handle; /* similiar to a VM address */ + rc->bo_handle = handle; ++ ++ /* ++ * The handle owns the reference now. But we must drop our ++ * remaining reference *after* we no longer need to dereference ++ * the obj. Otherwise userspace could guess the handle and ++ * race closing it from another thread. ++ */ ++ drm_gem_object_put(obj); ++ + return 0; + } + +@@ -512,8 +520,10 @@ static int virtio_gpu_get_caps_ioctl(struct drm_device *dev, spin_unlock(&vgdev->display_info_lock); /* not in cache - need to talk to hw */ @@ -130557,6 +156253,26 @@ index 5c1ad1596889b..15c3e63db396d 100644 virtio_gpu_notify(vgdev); copy_exit: +@@ -654,11 +664,18 @@ static int virtio_gpu_resource_create_blob_ioctl(struct drm_device *dev, + drm_gem_object_release(obj); + return ret; + } +- drm_gem_object_put(obj); + + rc_blob->res_handle = bo->hw_res_handle; + rc_blob->bo_handle = handle; + ++ /* ++ * The handle owns the reference now. But we must drop our ++ * remaining reference *after* we no longer need to dereference ++ * the obj. Otherwise userspace could guess the handle and ++ * race closing it from another thread. ++ */ ++ drm_gem_object_put(obj); ++ + return 0; + } + diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c index f648b0e24447b..7e75fb0fc7bd0 100644 --- a/drivers/gpu/drm/virtio/virtgpu_object.c @@ -131395,10 +157111,20 @@ index c5191de365ca1..fe4732bf2c9d2 100644 } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c -index 74fa419092138..171e90c4b9f3f 100644 +index 74fa419092138..01d5a01af2594 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c -@@ -916,6 +916,15 @@ static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv, +@@ -186,7 +186,8 @@ void vmw_kms_cursor_snoop(struct vmw_surface *srf, + if (cmd->dma.guest.ptr.offset % PAGE_SIZE || + box->x != 0 || box->y != 0 || box->z != 0 || + box->srcx != 0 || box->srcy != 0 || box->srcz != 0 || +- box->d != 1 || box_count != 1) { ++ box->d != 1 || box_count != 1 || ++ box->w > 64 || box->h > 64) { + /* TODO handle none page aligned offsets */ + /* TODO handle more dst & src != 0 */ + /* TODO handle more then one copy */ +@@ -916,6 +917,15 @@ static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv, * Sanity checks. */ @@ -131414,7 +157140,7 @@ index 74fa419092138..171e90c4b9f3f 100644 /* Surface must be marked as a scanout. */ if (unlikely(!surface->metadata.scanout)) return -EINVAL; -@@ -1229,20 +1238,13 @@ static int vmw_kms_new_framebuffer_bo(struct vmw_private *dev_priv, +@@ -1229,20 +1239,13 @@ static int vmw_kms_new_framebuffer_bo(struct vmw_private *dev_priv, return -EINVAL; } @@ -131442,7 +157168,7 @@ index 74fa419092138..171e90c4b9f3f 100644 } vfbd = kzalloc(sizeof(*vfbd), GFP_KERNEL); -@@ -1336,7 +1338,6 @@ vmw_kms_new_framebuffer(struct vmw_private *dev_priv, +@@ -1336,7 +1339,6 @@ vmw_kms_new_framebuffer(struct vmw_private *dev_priv, ret = vmw_kms_new_framebuffer_surface(dev_priv, surface, &vfb, mode_cmd, is_bo_proxy); @@ -131450,7 +157176,7 @@ index 74fa419092138..171e90c4b9f3f 100644 /* * vmw_create_bo_proxy() adds a reference that is no longer * needed -@@ -1398,13 +1399,16 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev, +@@ -1398,13 +1400,16 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev, ret = vmw_user_lookup_handle(dev_priv, tfile, mode_cmd->handles[0], &surface, &bo); @@ -131469,7 +157195,7 @@ index 74fa419092138..171e90c4b9f3f 100644 dev_priv->texture_max_width, dev_priv->texture_max_height); goto err_out; -@@ -2516,7 +2520,7 @@ void vmw_kms_helper_validation_finish(struct vmw_private *dev_priv, +@@ -2516,7 +2521,7 @@ void vmw_kms_helper_validation_finish(struct vmw_private *dev_priv, if (file_priv) vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv), ret, user_fence_rep, fence, @@ -131535,7 +157261,7 @@ index f9394207dd3cc..632e587519722 100644 }; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c -index e50fb82a30300..47eb3a50dd08e 100644 +index e50fb82a30300..8d2437fa6894b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c @@ -1076,6 +1076,7 @@ int vmw_mksstat_add_ioctl(struct drm_device *dev, void *data, @@ -131546,6 +157272,34 @@ index e50fb82a30300..47eb3a50dd08e 100644 return -EFAULT; } +@@ -1084,21 +1085,21 @@ int vmw_mksstat_add_ioctl(struct drm_device *dev, void *data, + reset_ppn_array(pdesc->strsPPNs, ARRAY_SIZE(pdesc->strsPPNs)); + + /* Pin mksGuestStat user pages and store those in the instance descriptor */ +- nr_pinned_stat = pin_user_pages(arg->stat, num_pages_stat, FOLL_LONGTERM, pages_stat, NULL); ++ nr_pinned_stat = pin_user_pages_fast(arg->stat, num_pages_stat, FOLL_LONGTERM, pages_stat); + if (num_pages_stat != nr_pinned_stat) + goto err_pin_stat; + + for (i = 0; i < num_pages_stat; ++i) + pdesc->statPPNs[i] = page_to_pfn(pages_stat[i]); + +- nr_pinned_info = pin_user_pages(arg->info, num_pages_info, FOLL_LONGTERM, pages_info, NULL); ++ nr_pinned_info = pin_user_pages_fast(arg->info, num_pages_info, FOLL_LONGTERM, pages_info); + if (num_pages_info != nr_pinned_info) + goto err_pin_info; + + for (i = 0; i < num_pages_info; ++i) + pdesc->infoPPNs[i] = page_to_pfn(pages_info[i]); + +- nr_pinned_strs = pin_user_pages(arg->strs, num_pages_strs, FOLL_LONGTERM, pages_strs, NULL); ++ nr_pinned_strs = pin_user_pages_fast(arg->strs, num_pages_strs, FOLL_LONGTERM, pages_strs); + if (num_pages_strs != nr_pinned_strs) + goto err_pin_strs; + +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg_arm64.h b/drivers/gpu/drm/vmwgfx/vmwgfx_msg_arm64.h +old mode 100755 +new mode 100644 diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c index e5a9a5cbd01a7..922317d1acc8a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c @@ -131674,6 +157428,21 @@ index 8d1e869cc1964..34ab08369e043 100644 } /** +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c +index bd157fb21b450..605ff05d449fc 100644 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c +@@ -953,6 +953,10 @@ int vmw_kms_sou_init_display(struct vmw_private *dev_priv) + struct drm_device *dev = &dev_priv->drm; + int i, ret; + ++ /* Screen objects won't work if GMR's aren't available */ ++ if (!dev_priv->has_gmr) ++ return -ENOSYS; ++ + if (!(dev_priv->capabilities & SVGA_CAP_SCREEN_OBJECT_2)) { + return -ENOSYS; + } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c index d85310b2608dd..f5e90d0e2d0f8 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c @@ -132134,7 +157903,7 @@ index 6dab94adf25e5..6815b4db17c1b 100644 help Driver for the NVIDIA Tegra host1x hardware. diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c -index fbb6447b8659e..fc9f54282f7d6 100644 +index fbb6447b8659e..c2a4bf2aae615 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -18,6 +18,10 @@ @@ -132148,7 +157917,18 @@ index fbb6447b8659e..fc9f54282f7d6 100644 #include "bus.h" #include "channel.h" #include "debug.h" -@@ -238,6 +242,17 @@ static struct iommu_domain *host1x_iommu_attach(struct host1x *host) +@@ -200,6 +204,10 @@ static void host1x_setup_sid_table(struct host1x *host) + + static bool host1x_wants_iommu(struct host1x *host1x) + { ++ /* Our IOMMU usage policy doesn't currently play well with GART */ ++ if (of_machine_is_compatible("nvidia,tegra20")) ++ return false; ++ + /* + * If we support addressing a maximum of 32 bits of physical memory + * and if the host1x firewall is enabled, there's no need to enable +@@ -238,6 +246,17 @@ static struct iommu_domain *host1x_iommu_attach(struct host1x *host) struct iommu_domain *domain = iommu_get_domain_for_dev(host->dev); int err; @@ -132166,7 +157946,7 @@ index fbb6447b8659e..fc9f54282f7d6 100644 /* * We may not always want to enable IOMMU support (for example if the * host1x firewall is already enabled and we don't support addressing -@@ -511,6 +526,7 @@ static int host1x_remove(struct platform_device *pdev) +@@ -511,6 +530,7 @@ static int host1x_remove(struct platform_device *pdev) host1x_syncpt_deinit(host); reset_control_assert(host->rst); clk_disable_unprepare(host->clk); @@ -132293,7 +158073,7 @@ index 3c33bf572d6d3..9235ab7161e3a 100644 Support for Samsung InfraRed remote control or keyboards. diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_client.c b/drivers/hid/amd-sfh-hid/amd_sfh_client.c -index 840fd075c56f1..ab149b80f86c1 100644 +index 840fd075c56f1..911a23a9bcd1b 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_client.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_client.c @@ -154,6 +154,8 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata) @@ -132305,7 +158085,18 @@ index 840fd075c56f1..ab149b80f86c1 100644 INIT_DELAYED_WORK(&cl_data->work, amd_sfh_work); INIT_DELAYED_WORK(&cl_data->work_buffer, amd_sfh_work_buffer); -@@ -226,6 +228,17 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata) +@@ -164,6 +166,10 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata) + in_data->sensor_virt_addr[i] = dma_alloc_coherent(dev, sizeof(int) * 8, + &cl_data->sensor_dma_addr[i], + GFP_KERNEL); ++ if (!in_data->sensor_virt_addr[i]) { ++ rc = -ENOMEM; ++ goto cleanup; ++ } + cl_data->sensor_sts[i] = SENSOR_DISABLED; + cl_data->sensor_requested_cnt[i] = 0; + cl_data->cur_hid_dev = i; +@@ -226,6 +232,17 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata) dev_dbg(dev, "sid 0x%x status 0x%x\n", cl_data->sensor_idx[i], cl_data->sensor_sts[i]); } @@ -132660,8 +158451,49 @@ index f3ecddc519ee8..b59c3dafa6a48 100644 return rdesc; } +diff --git a/drivers/hid/hid-betopff.c b/drivers/hid/hid-betopff.c +index 467d789f9bc2d..25ed7b9a917e4 100644 +--- a/drivers/hid/hid-betopff.c ++++ b/drivers/hid/hid-betopff.c +@@ -60,7 +60,6 @@ static int betopff_init(struct hid_device *hid) + struct list_head *report_list = + &hid->report_enum[HID_OUTPUT_REPORT].report_list; + struct input_dev *dev; +- int field_count = 0; + int error; + int i, j; + +@@ -86,19 +85,21 @@ static int betopff_init(struct hid_device *hid) + * ----------------------------------------- + * Do init them with default value. + */ ++ if (report->maxfield < 4) { ++ hid_err(hid, "not enough fields in the report: %d\n", ++ report->maxfield); ++ return -ENODEV; ++ } + for (i = 0; i < report->maxfield; i++) { ++ if (report->field[i]->report_count < 1) { ++ hid_err(hid, "no values in the field\n"); ++ return -ENODEV; ++ } + for (j = 0; j < report->field[i]->report_count; j++) { + report->field[i]->value[j] = 0x00; +- field_count++; + } + } + +- if (field_count < 4) { +- hid_err(hid, "not enough fields in the report: %d\n", +- field_count); +- return -ENODEV; +- } +- + betopff = kzalloc(sizeof(*betopff), GFP_KERNEL); + if (!betopff) + return -ENOMEM; diff --git a/drivers/hid/hid-bigbenff.c b/drivers/hid/hid-bigbenff.c -index db6da21ade063..e8c5e3ac9fff1 100644 +index db6da21ade063..e8b16665860d6 100644 --- a/drivers/hid/hid-bigbenff.c +++ b/drivers/hid/hid-bigbenff.c @@ -191,7 +191,7 @@ static void bigben_worker(struct work_struct *work) @@ -132673,7 +158505,15 @@ index db6da21ade063..e8c5e3ac9fff1 100644 return; if (bigben->work_led) { -@@ -347,6 +347,12 @@ static int bigben_probe(struct hid_device *hid, +@@ -344,9 +344,20 @@ static int bigben_probe(struct hid_device *hid, + } + + report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; ++ if (list_empty(report_list)) { ++ hid_err(hid, "no output report found\n"); ++ error = -ENODEV; ++ goto error_hw_stop; ++ } bigben->report = list_entry(report_list->next, struct hid_report, list); @@ -132700,6 +158540,48 @@ index ca556d39da2ae..f04d2aa23efe4 100644 hdev->quirks |= HID_QUIRK_INPUT_PER_APP; ret = hid_parse(hdev); if (ret) { +diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c +index dbed2524fd47b..f1ea883db5de1 100644 +--- a/drivers/hid/hid-core.c ++++ b/drivers/hid/hid-core.c +@@ -988,8 +988,8 @@ struct hid_report *hid_validate_values(struct hid_device *hid, + * Validating on id 0 means we should examine the first + * report in the list. + */ +- report = list_entry( +- hid->report_enum[type].report_list.next, ++ report = list_first_entry_or_null( ++ &hid->report_enum[type].report_list, + struct hid_report, list); + } else { + report = hid->report_enum[type].report_id_hash[id]; +@@ -1197,6 +1197,7 @@ int hid_open_report(struct hid_device *device) + __u8 *end; + __u8 *next; + int ret; ++ int i; + static int (*dispatch_type[])(struct hid_parser *parser, + struct hid_item *item) = { + hid_parser_main, +@@ -1247,6 +1248,8 @@ int hid_open_report(struct hid_device *device) + goto err; + } + device->collection_size = HID_DEFAULT_NUM_COLLECTIONS; ++ for (i = 0; i < HID_DEFAULT_NUM_COLLECTIONS; i++) ++ device->collection[i].parent_idx = -1; + + ret = -EINVAL; + while ((next = fetch_item(start, end, &item)) != NULL) { +@@ -1310,6 +1313,9 @@ static s32 snto32(__u32 value, unsigned n) + if (!value || !n) + return 0; + ++ if (n > 32) ++ n = 32; ++ + switch (n) { + case 8: return ((__s8)value); + case 16: return ((__s16)value); diff --git a/drivers/hid/hid-corsair.c b/drivers/hid/hid-corsair.c index 902a60e249ed2..8c895c820b672 100644 --- a/drivers/hid/hid-corsair.c @@ -132786,6 +158668,54 @@ index 021049805bb71..8e4a5528e25df 100644 return ret; } +diff --git a/drivers/hid/hid-elecom.c b/drivers/hid/hid-elecom.c +index e59e9911fc370..4fa45ee77503b 100644 +--- a/drivers/hid/hid-elecom.c ++++ b/drivers/hid/hid-elecom.c +@@ -12,6 +12,7 @@ + * Copyright (c) 2017 Alex Manoussakis <amanou@gnu.org> + * Copyright (c) 2017 Tomasz Kramkowski <tk@the-tk.com> + * Copyright (c) 2020 YOSHIOKA Takuma <lo48576@hard-wi.red> ++ * Copyright (c) 2022 Takahiro Fujii <fujii@xaxxi.net> + */ + + /* +@@ -89,7 +90,7 @@ static __u8 *elecom_report_fixup(struct hid_device *hdev, __u8 *rdesc, + case USB_DEVICE_ID_ELECOM_M_DT1URBK: + case USB_DEVICE_ID_ELECOM_M_DT1DRBK: + case USB_DEVICE_ID_ELECOM_M_HT1URBK: +- case USB_DEVICE_ID_ELECOM_M_HT1DRBK: ++ case USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D: + /* + * Report descriptor format: + * 12: button bit count +@@ -99,6 +100,16 @@ static __u8 *elecom_report_fixup(struct hid_device *hdev, __u8 *rdesc, + */ + mouse_button_fixup(hdev, rdesc, *rsize, 12, 30, 14, 20, 8); + break; ++ case USB_DEVICE_ID_ELECOM_M_HT1DRBK_011C: ++ /* ++ * Report descriptor format: ++ * 22: button bit count ++ * 30: padding bit count ++ * 24: button report size ++ * 16: button usage maximum ++ */ ++ mouse_button_fixup(hdev, rdesc, *rsize, 22, 30, 24, 16, 8); ++ break; + } + return rdesc; + } +@@ -112,7 +123,8 @@ static const struct hid_device_id elecom_devices[] = { + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1URBK) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1DRBK) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK) }, +- { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK) }, ++ { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D) }, ++ { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_011C) }, + { } + }; + MODULE_DEVICE_TABLE(hid, elecom_devices); diff --git a/drivers/hid/hid-elo.c b/drivers/hid/hid-elo.c index 383dfda8c12fc..2876cb6a7dcab 100644 --- a/drivers/hid/hid-elo.c @@ -132912,8 +158842,21 @@ index 195b735b001d0..7c907939bfae1 100644 }; module_hid_driver(holtek_mouse_driver); +diff --git a/drivers/hid/hid-hyperv.c b/drivers/hid/hid-hyperv.c +index 978ee2aab2d40..b7704dd6809dc 100644 +--- a/drivers/hid/hid-hyperv.c ++++ b/drivers/hid/hid-hyperv.c +@@ -498,7 +498,7 @@ static int mousevsc_probe(struct hv_device *device, + + ret = hid_add_device(hid_dev); + if (ret) +- goto probe_err1; ++ goto probe_err2; + + + ret = hid_parse(hid_dev); diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h -index 29564b370341e..cb2b48d6915ee 100644 +index 29564b370341e..b153ddc3319e8 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -394,8 +394,12 @@ @@ -132929,7 +158872,17 @@ index 29564b370341e..cb2b48d6915ee 100644 #define USB_VENDOR_ID_ELECOM 0x056e #define USB_DEVICE_ID_ELECOM_BM084 0x0061 -@@ -496,6 +500,7 @@ +@@ -406,7 +410,8 @@ + #define USB_DEVICE_ID_ELECOM_M_DT1URBK 0x00fe + #define USB_DEVICE_ID_ELECOM_M_DT1DRBK 0x00ff + #define USB_DEVICE_ID_ELECOM_M_HT1URBK 0x010c +-#define USB_DEVICE_ID_ELECOM_M_HT1DRBK 0x010d ++#define USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D 0x010d ++#define USB_DEVICE_ID_ELECOM_M_HT1DRBK_011C 0x011c + + #define USB_VENDOR_ID_DREAM_CHEEKY 0x1d34 + #define USB_DEVICE_ID_DREAM_CHEEKY_WN 0x0004 +@@ -496,6 +501,7 @@ #define USB_DEVICE_ID_GOOGLE_MAGNEMITE 0x503d #define USB_DEVICE_ID_GOOGLE_MOONBALL 0x5044 #define USB_DEVICE_ID_GOOGLE_DON 0x5050 @@ -132937,7 +158890,7 @@ index 29564b370341e..cb2b48d6915ee 100644 #define USB_VENDOR_ID_GOTOP 0x08f2 #define USB_DEVICE_ID_SUPER_Q2 0x007f -@@ -750,6 +755,7 @@ +@@ -750,6 +756,7 @@ #define USB_DEVICE_ID_LENOVO_X1_COVER 0x6085 #define USB_DEVICE_ID_LENOVO_X1_TAB 0x60a3 #define USB_DEVICE_ID_LENOVO_X1_TAB3 0x60b5 @@ -132945,7 +158898,15 @@ index 29564b370341e..cb2b48d6915ee 100644 #define USB_DEVICE_ID_LENOVO_OPTICAL_USB_MOUSE_600E 0x600e #define USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_608D 0x608d #define USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_6019 0x6019 -@@ -881,6 +887,7 @@ +@@ -839,6 +846,7 @@ + #define USB_DEVICE_ID_MADCATZ_BEATPAD 0x4540 + #define USB_DEVICE_ID_MADCATZ_RAT5 0x1705 + #define USB_DEVICE_ID_MADCATZ_RAT9 0x1709 ++#define USB_DEVICE_ID_MADCATZ_MMO7 0x1713 + + #define USB_VENDOR_ID_MCC 0x09db + #define USB_DEVICE_ID_MCC_PMD1024LS 0x0076 +@@ -881,9 +889,11 @@ #define USB_DEVICE_ID_MS_TOUCH_COVER_2 0x07a7 #define USB_DEVICE_ID_MS_TYPE_COVER_2 0x07a9 #define USB_DEVICE_ID_MS_POWER_COVER 0x07da @@ -132953,7 +158914,38 @@ index 29564b370341e..cb2b48d6915ee 100644 #define USB_DEVICE_ID_MS_XBOX_ONE_S_CONTROLLER 0x02fd #define USB_DEVICE_ID_MS_PIXART_MOUSE 0x00cb #define USB_DEVICE_ID_8BITDO_SN30_PRO_PLUS 0x02e0 -@@ -1276,6 +1283,9 @@ ++#define USB_DEVICE_ID_MS_MOUSE_0783 0x0783 + + #define USB_VENDOR_ID_MOJO 0x8282 + #define USB_DEVICE_ID_RETRO_ADAPTER 0x3201 +@@ -956,7 +966,10 @@ + #define USB_DEVICE_ID_ORTEK_IHOME_IMAC_A210S 0x8003 + + #define USB_VENDOR_ID_PLANTRONICS 0x047f ++#define USB_DEVICE_ID_PLANTRONICS_BLACKWIRE_3210_SERIES 0xc055 + #define USB_DEVICE_ID_PLANTRONICS_BLACKWIRE_3220_SERIES 0xc056 ++#define USB_DEVICE_ID_PLANTRONICS_BLACKWIRE_3215_SERIES 0xc057 ++#define USB_DEVICE_ID_PLANTRONICS_BLACKWIRE_3225_SERIES 0xc058 + + #define USB_VENDOR_ID_PANASONIC 0x04da + #define USB_DEVICE_ID_PANABOARD_UBT780 0x1044 +@@ -1102,6 +1115,7 @@ + #define USB_DEVICE_ID_SONY_PS4_CONTROLLER_2 0x09cc + #define USB_DEVICE_ID_SONY_PS4_CONTROLLER_DONGLE 0x0ba0 + #define USB_DEVICE_ID_SONY_PS5_CONTROLLER 0x0ce6 ++#define USB_DEVICE_ID_SONY_PS5_CONTROLLER_2 0x0df2 + #define USB_DEVICE_ID_SONY_MOTION_CONTROLLER 0x03d5 + #define USB_DEVICE_ID_SONY_NAVIGATION_CONTROLLER 0x042f + #define USB_DEVICE_ID_SONY_BUZZ_CONTROLLER 0x0002 +@@ -1173,6 +1187,7 @@ + #define USB_DEVICE_ID_SYNAPTICS_DELL_K15A 0x6e21 + #define USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1002 0x73f4 + #define USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1003 0x73f5 ++#define USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_017 0x73f6 + #define USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5 0x81a7 + + #define USB_VENDOR_ID_TEXAS_INSTRUMENTS 0x2047 +@@ -1276,6 +1291,9 @@ #define USB_DEVICE_ID_WEIDA_8752 0xC300 #define USB_DEVICE_ID_WEIDA_8755 0xC301 @@ -132963,7 +158955,15 @@ index 29564b370341e..cb2b48d6915ee 100644 #define USB_VENDOR_ID_WISEGROUP 0x0925 #define USB_DEVICE_ID_SMARTJOY_PLUS 0x0005 #define USB_DEVICE_ID_SUPER_JOY_BOX_3 0x8888 -@@ -1346,6 +1356,7 @@ +@@ -1326,6 +1344,7 @@ + + #define USB_VENDOR_ID_PRIMAX 0x0461 + #define USB_DEVICE_ID_PRIMAX_MOUSE_4D22 0x4d22 ++#define USB_DEVICE_ID_PRIMAX_MOUSE_4E2A 0x4e2a + #define USB_DEVICE_ID_PRIMAX_KEYBOARD 0x4e05 + #define USB_DEVICE_ID_PRIMAX_REZEL 0x4e72 + #define USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4D0F 0x4d0f +@@ -1346,6 +1365,7 @@ #define USB_VENDOR_ID_UGTIZER 0x2179 #define USB_DEVICE_ID_UGTIZER_TABLET_GP0610 0x0053 #define USB_DEVICE_ID_UGTIZER_TABLET_GT5040 0x0077 @@ -133045,6 +159045,22 @@ index 4b5ebeacd2836..f197aed6444a5 100644 if (usage->hat_min < usage->hat_max || usage->hat_dir) { int hat_dir = usage->hat_dir; if (!hat_dir) +diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c +index 430fa4f52ed3b..75ebfcf318896 100644 +--- a/drivers/hid/hid-ite.c ++++ b/drivers/hid/hid-ite.c +@@ -121,6 +121,11 @@ static const struct hid_device_id ite_devices[] = { + USB_VENDOR_ID_SYNAPTICS, + USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1003), + .driver_data = QUIRK_TOUCHPAD_ON_OFF_REPORT }, ++ /* ITE8910 USB kbd ctlr, with Synaptics touchpad connected to it. */ ++ { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, ++ USB_VENDOR_ID_SYNAPTICS, ++ USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_017), ++ .driver_data = QUIRK_TOUCHPAD_ON_OFF_REPORT }, + { } + }; + MODULE_DEVICE_TABLE(hid, ite_devices); diff --git a/drivers/hid/hid-led.c b/drivers/hid/hid-led.c index c2c66ceca1327..7d82f8d426bbc 100644 --- a/drivers/hid/hid-led.c @@ -133083,6 +159099,23 @@ index d40af911df635..fb3f7258009c2 100644 /* G29 only work with the 1st interface */ if ((hdev->product == USB_DEVICE_ID_LOGITECH_G29_WHEEL) && (iface_num != 0)) { +diff --git a/drivers/hid/hid-lg4ff.c b/drivers/hid/hid-lg4ff.c +index 5e6a0cef2a06d..e3fcf1353fb3b 100644 +--- a/drivers/hid/hid-lg4ff.c ++++ b/drivers/hid/hid-lg4ff.c +@@ -872,6 +872,12 @@ static ssize_t lg4ff_alternate_modes_store(struct device *dev, struct device_att + return -ENOMEM; + + i = strlen(lbuf); ++ ++ if (i == 0) { ++ kfree(lbuf); ++ return -EINVAL; ++ } ++ + if (lbuf[i-1] == '\n') { + if (i == 1) { + kfree(lbuf); diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c index a0017b010c342..c358778e070bc 100644 --- a/drivers/hid/hid-logitech-dj.c @@ -133293,7 +159326,7 @@ index 686788ebf3e1e..c6b8da7160021 100644 .event = magicmouse_event, .input_mapping = magicmouse_input_mapping, diff --git a/drivers/hid/hid-mcp2221.c b/drivers/hid/hid-mcp2221.c -index 4211b9839209b..de52e9f7bb8cb 100644 +index 4211b9839209b..560eeec4035aa 100644 --- a/drivers/hid/hid-mcp2221.c +++ b/drivers/hid/hid-mcp2221.c @@ -385,6 +385,9 @@ static int mcp_smbus_write(struct mcp2221 *mcp, u16 addr, @@ -133306,8 +159339,39 @@ index 4211b9839209b..de52e9f7bb8cb 100644 memcpy(&mcp->txbuf[5], buf, len); data_len = len + 5; } +@@ -837,12 +840,19 @@ static int mcp2221_probe(struct hid_device *hdev, + return ret; + } + +- ret = hid_hw_start(hdev, HID_CONNECT_HIDRAW); ++ /* ++ * This driver uses the .raw_event callback and therefore does not need any ++ * HID_CONNECT_xxx flags. ++ */ ++ ret = hid_hw_start(hdev, 0); + if (ret) { + hid_err(hdev, "can't start hardware\n"); + return ret; + } + ++ hid_info(hdev, "USB HID v%x.%02x Device [%s] on %s\n", hdev->version >> 8, ++ hdev->version & 0xff, hdev->name, hdev->phys); ++ + ret = hid_hw_open(hdev); + if (ret) { + hid_err(hdev, "can't open device\n"); +@@ -867,8 +877,7 @@ static int mcp2221_probe(struct hid_device *hdev, + mcp->adapter.retries = 1; + mcp->adapter.dev.parent = &hdev->dev; + snprintf(mcp->adapter.name, sizeof(mcp->adapter.name), +- "MCP2221 usb-i2c bridge on hidraw%d", +- ((struct hidraw *)hdev->hidraw)->minor); ++ "MCP2221 usb-i2c bridge"); + + ret = i2c_add_adapter(&mcp->adapter); + if (ret) { diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c -index 3ea7cb1cda84c..08462ac72b897 100644 +index 3ea7cb1cda84c..6b86d368d5e74 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -193,6 +193,8 @@ static void mt_post_parse(struct mt_device *td, struct mt_application *app); @@ -133389,7 +159453,18 @@ index 3ea7cb1cda84c..08462ac72b897 100644 } static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id) -@@ -2024,6 +2043,12 @@ static const struct hid_device_id mt_devices[] = { +@@ -1946,6 +1965,10 @@ static const struct hid_device_id mt_devices[] = { + HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, + USB_VENDOR_ID_ELAN, 0x313a) }, + ++ { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, ++ HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, ++ USB_VENDOR_ID_ELAN, 0x3148) }, ++ + /* Elitegroup panel */ + { .driver_data = MT_CLS_SERIAL, + MT_USB_DEVICE(USB_VENDOR_ID_ELITEGROUP, +@@ -2024,6 +2047,12 @@ static const struct hid_device_id mt_devices[] = { USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_TAB3) }, @@ -133402,7 +159477,7 @@ index 3ea7cb1cda84c..08462ac72b897 100644 /* MosArt panels */ { .driver_data = MT_CLS_CONFIDENCE_MINUS_ONE, MT_USB_DEVICE(USB_VENDOR_ID_ASUS, -@@ -2120,6 +2145,11 @@ static const struct hid_device_id mt_devices[] = { +@@ -2120,6 +2149,11 @@ static const struct hid_device_id mt_devices[] = { MT_USB_DEVICE(USB_VENDOR_ID_VTL, USB_DEVICE_ID_VTL_MULTITOUCH_FF3F) }, @@ -133414,7 +159489,7 @@ index 3ea7cb1cda84c..08462ac72b897 100644 /* Wistron panels */ { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_WISTRON, -@@ -2163,6 +2193,9 @@ static const struct hid_device_id mt_devices[] = { +@@ -2163,6 +2197,9 @@ static const struct hid_device_id mt_devices[] = { { .driver_data = MT_CLS_GOOGLE, HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_TOUCH_ROSE) }, @@ -133424,6 +159499,112 @@ index 3ea7cb1cda84c..08462ac72b897 100644 /* Generic MT device */ { HID_DEVICE(HID_BUS_ANY, HID_GROUP_MULTITOUCH, HID_ANY_ID, HID_ANY_ID) }, +diff --git a/drivers/hid/hid-plantronics.c b/drivers/hid/hid-plantronics.c +index e81b7cec2d124..3d414ae194acb 100644 +--- a/drivers/hid/hid-plantronics.c ++++ b/drivers/hid/hid-plantronics.c +@@ -198,9 +198,18 @@ err: + } + + static const struct hid_device_id plantronics_devices[] = { ++ { HID_USB_DEVICE(USB_VENDOR_ID_PLANTRONICS, ++ USB_DEVICE_ID_PLANTRONICS_BLACKWIRE_3210_SERIES), ++ .driver_data = PLT_QUIRK_DOUBLE_VOLUME_KEYS }, + { HID_USB_DEVICE(USB_VENDOR_ID_PLANTRONICS, + USB_DEVICE_ID_PLANTRONICS_BLACKWIRE_3220_SERIES), + .driver_data = PLT_QUIRK_DOUBLE_VOLUME_KEYS }, ++ { HID_USB_DEVICE(USB_VENDOR_ID_PLANTRONICS, ++ USB_DEVICE_ID_PLANTRONICS_BLACKWIRE_3215_SERIES), ++ .driver_data = PLT_QUIRK_DOUBLE_VOLUME_KEYS }, ++ { HID_USB_DEVICE(USB_VENDOR_ID_PLANTRONICS, ++ USB_DEVICE_ID_PLANTRONICS_BLACKWIRE_3225_SERIES), ++ .driver_data = PLT_QUIRK_DOUBLE_VOLUME_KEYS }, + { HID_USB_DEVICE(USB_VENDOR_ID_PLANTRONICS, HID_ANY_ID) }, + { } + }; +diff --git a/drivers/hid/hid-playstation.c b/drivers/hid/hid-playstation.c +index ab7c82c2e8867..944e5e5ff1348 100644 +--- a/drivers/hid/hid-playstation.c ++++ b/drivers/hid/hid-playstation.c +@@ -626,6 +626,7 @@ static const struct attribute_group ps_device_attribute_group = { + + static int dualsense_get_calibration_data(struct dualsense *ds) + { ++ struct hid_device *hdev = ds->base.hdev; + short gyro_pitch_bias, gyro_pitch_plus, gyro_pitch_minus; + short gyro_yaw_bias, gyro_yaw_plus, gyro_yaw_minus; + short gyro_roll_bias, gyro_roll_plus, gyro_roll_minus; +@@ -636,6 +637,7 @@ static int dualsense_get_calibration_data(struct dualsense *ds) + int speed_2x; + int range_2g; + int ret = 0; ++ int i; + uint8_t *buf; + + buf = kzalloc(DS_FEATURE_REPORT_CALIBRATION_SIZE, GFP_KERNEL); +@@ -687,6 +689,21 @@ static int dualsense_get_calibration_data(struct dualsense *ds) + ds->gyro_calib_data[2].sens_numer = speed_2x*DS_GYRO_RES_PER_DEG_S; + ds->gyro_calib_data[2].sens_denom = gyro_roll_plus - gyro_roll_minus; + ++ /* ++ * Sanity check gyro calibration data. This is needed to prevent crashes ++ * during report handling of virtual, clone or broken devices not implementing ++ * calibration data properly. ++ */ ++ for (i = 0; i < ARRAY_SIZE(ds->gyro_calib_data); i++) { ++ if (ds->gyro_calib_data[i].sens_denom == 0) { ++ hid_warn(hdev, "Invalid gyro calibration data for axis (%d), disabling calibration.", ++ ds->gyro_calib_data[i].abs_code); ++ ds->gyro_calib_data[i].bias = 0; ++ ds->gyro_calib_data[i].sens_numer = DS_GYRO_RANGE; ++ ds->gyro_calib_data[i].sens_denom = S16_MAX; ++ } ++ } ++ + /* + * Set accelerometer calibration and normalization parameters. + * Data values will be normalized to 1/DS_ACC_RES_PER_G g. +@@ -709,6 +726,21 @@ static int dualsense_get_calibration_data(struct dualsense *ds) + ds->accel_calib_data[2].sens_numer = 2*DS_ACC_RES_PER_G; + ds->accel_calib_data[2].sens_denom = range_2g; + ++ /* ++ * Sanity check accelerometer calibration data. This is needed to prevent crashes ++ * during report handling of virtual, clone or broken devices not implementing calibration ++ * data properly. ++ */ ++ for (i = 0; i < ARRAY_SIZE(ds->accel_calib_data); i++) { ++ if (ds->accel_calib_data[i].sens_denom == 0) { ++ hid_warn(hdev, "Invalid accelerometer calibration data for axis (%d), disabling calibration.", ++ ds->accel_calib_data[i].abs_code); ++ ds->accel_calib_data[i].bias = 0; ++ ds->accel_calib_data[i].sens_numer = DS_ACC_RANGE; ++ ds->accel_calib_data[i].sens_denom = S16_MAX; ++ } ++ } ++ + err_free: + kfree(buf); + return ret; +@@ -1282,7 +1314,8 @@ static int ps_probe(struct hid_device *hdev, const struct hid_device_id *id) + goto err_stop; + } + +- if (hdev->product == USB_DEVICE_ID_SONY_PS5_CONTROLLER) { ++ if (hdev->product == USB_DEVICE_ID_SONY_PS5_CONTROLLER || ++ hdev->product == USB_DEVICE_ID_SONY_PS5_CONTROLLER_2) { + dev = dualsense_create(hdev); + if (IS_ERR(dev)) { + hid_err(hdev, "Failed to create dualsense.\n"); +@@ -1320,6 +1353,8 @@ static void ps_remove(struct hid_device *hdev) + static const struct hid_device_id ps_devices[] = { + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS5_CONTROLLER) }, + { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS5_CONTROLLER) }, ++ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS5_CONTROLLER_2) }, ++ { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS5_CONTROLLER_2) }, + { } + }; + MODULE_DEVICE_TABLE(hid, ps_devices); diff --git a/drivers/hid/hid-prodikeys.c b/drivers/hid/hid-prodikeys.c index 2666af02d5c1a..e4e9471d0f1e9 100644 --- a/drivers/hid/hid-prodikeys.c @@ -133450,18 +159631,29 @@ index 2666af02d5c1a..e4e9471d0f1e9 100644 if (pk == NULL) { hid_err(hdev, "can't alloc descriptor\n"); diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c -index 2e104682c22b9..544d1197aca48 100644 +index 2e104682c22b9..4a8c32148e58f 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c -@@ -124,6 +124,7 @@ static const struct hid_device_id hid_quirks[] = { +@@ -122,8 +122,10 @@ static const struct hid_device_id hid_quirks[] = { + { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C05A), HID_QUIRK_ALWAYS_POLL }, + { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C06A), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_MCS, USB_DEVICE_ID_MCS_GAMEPADBLOCK), HID_QUIRK_MULTI_INPUT }, ++ { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_MOUSE_0783), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PIXART_MOUSE), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_POWER_COVER), HID_QUIRK_NO_INIT_REPORTS }, + { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_SURFACE3_COVER), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_SURFACE_PRO_2), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TOUCH_COVER_2), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TYPE_COVER_2), HID_QUIRK_NO_INIT_REPORTS }, -@@ -186,6 +187,7 @@ static const struct hid_device_id hid_quirks[] = { +@@ -145,6 +147,7 @@ static const struct hid_device_id hid_quirks[] = { + { HID_USB_DEVICE(USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN), HID_QUIRK_NO_INIT_REPORTS }, + { HID_USB_DEVICE(USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_USB_OPTICAL_MOUSE), HID_QUIRK_ALWAYS_POLL }, + { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_MOUSE_4D22), HID_QUIRK_ALWAYS_POLL }, ++ { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_MOUSE_4E2A), HID_QUIRK_ALWAYS_POLL }, + { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4D0F), HID_QUIRK_ALWAYS_POLL }, + { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4D65), HID_QUIRK_ALWAYS_POLL }, + { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4E22), HID_QUIRK_ALWAYS_POLL }, +@@ -186,6 +189,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_TURBOX, USB_DEVICE_ID_TURBOX_KEYBOARD), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_KNA5), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_TWA60), HID_QUIRK_MULTI_INPUT }, @@ -133469,6 +159661,24 @@ index 2e104682c22b9..544d1197aca48 100644 { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_10_6_INCH), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_14_1_INCH), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SIRIUS_BATTERY_FREE_TABLET), HID_QUIRK_MULTI_INPUT }, +@@ -377,7 +381,8 @@ static const struct hid_device_id hid_have_special_driver[] = { + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1URBK) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1DRBK) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK) }, +- { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK) }, ++ { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D) }, ++ { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_011C) }, + #endif + #if IS_ENABLED(CONFIG_HID_ELO) + { HID_USB_DEVICE(USB_VENDOR_ID_ELO, 0x0009) }, +@@ -606,6 +611,7 @@ static const struct hid_device_id hid_have_special_driver[] = { + { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_MMO7) }, + { HID_USB_DEVICE(USB_VENDOR_ID_MADCATZ, USB_DEVICE_ID_MADCATZ_RAT5) }, + { HID_USB_DEVICE(USB_VENDOR_ID_MADCATZ, USB_DEVICE_ID_MADCATZ_RAT9) }, ++ { HID_USB_DEVICE(USB_VENDOR_ID_MADCATZ, USB_DEVICE_ID_MADCATZ_MMO7) }, + #endif + #if IS_ENABLED(CONFIG_HID_SAMSUNG) + { HID_USB_DEVICE(USB_VENDOR_ID_SAMSUNG, USB_DEVICE_ID_SAMSUNG_IR_REMOTE) }, diff --git a/drivers/hid/hid-roccat-arvo.c b/drivers/hid/hid-roccat-arvo.c index 4556d2a50f754..d94ee0539421e 100644 --- a/drivers/hid/hid-roccat-arvo.c @@ -133631,6 +159841,19 @@ index 26373b82fe812..6da80e442fdd1 100644 wake_up_interruptible(&device->wait); return 0; } +diff --git a/drivers/hid/hid-saitek.c b/drivers/hid/hid-saitek.c +index c7bf14c019605..b84e975977c42 100644 +--- a/drivers/hid/hid-saitek.c ++++ b/drivers/hid/hid-saitek.c +@@ -187,6 +187,8 @@ static const struct hid_device_id saitek_devices[] = { + .driver_data = SAITEK_RELEASE_MODE_RAT7 }, + { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_MMO7), + .driver_data = SAITEK_RELEASE_MODE_MMO7 }, ++ { HID_USB_DEVICE(USB_VENDOR_ID_MADCATZ, USB_DEVICE_ID_MADCATZ_MMO7), ++ .driver_data = SAITEK_RELEASE_MODE_MMO7 }, + { } + }; + diff --git a/drivers/hid/hid-samsung.c b/drivers/hid/hid-samsung.c index 2e1c31156eca0..cf5992e970940 100644 --- a/drivers/hid/hid-samsung.c @@ -133645,6 +159868,19 @@ index 2e1c31156eca0..cf5992e970940 100644 ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed\n"); +diff --git a/drivers/hid/hid-sensor-custom.c b/drivers/hid/hid-sensor-custom.c +index 32c2306e240d6..602465ad27458 100644 +--- a/drivers/hid/hid-sensor-custom.c ++++ b/drivers/hid/hid-sensor-custom.c +@@ -62,7 +62,7 @@ struct hid_sensor_sample { + u32 raw_len; + } __packed; + +-static struct attribute hid_custom_attrs[] = { ++static struct attribute hid_custom_attrs[HID_CUSTOM_TOTAL_ATTRS] = { + {.name = "name", .mode = S_IRUGO}, + {.name = "units", .mode = S_IRUGO}, + {.name = "unit-expo", .mode = S_IRUGO}, diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index d1b107d547f54..60ec2b29d54de 100644 --- a/drivers/hid/hid-sony.c @@ -133826,10 +160062,10 @@ index d70cd3d7f583b..ac3fd870673d2 100644 dev = devm_kzalloc(&hdev->dev, sizeof(*dev), GFP_KERNEL); diff --git a/drivers/hid/hid-uclogic-core.c b/drivers/hid/hid-uclogic-core.c -index 6a9865dd703c0..d8ab0139e5cda 100644 +index 6a9865dd703c0..785d81d61ba48 100644 --- a/drivers/hid/hid-uclogic-core.c +++ b/drivers/hid/hid-uclogic-core.c -@@ -164,6 +164,9 @@ static int uclogic_probe(struct hid_device *hdev, +@@ -164,11 +164,15 @@ static int uclogic_probe(struct hid_device *hdev, struct uclogic_drvdata *drvdata = NULL; bool params_initialized = false; @@ -133839,6 +160075,12 @@ index 6a9865dd703c0..d8ab0139e5cda 100644 /* * libinput requires the pad interface to be on a different node * than the pen, so use QUIRK_MULTI_INPUT for all tablets. + */ + hdev->quirks |= HID_QUIRK_MULTI_INPUT; ++ hdev->quirks |= HID_QUIRK_HIDINPUT_FORCE; + + /* Allocate and assign driver data */ + drvdata = devm_kzalloc(&hdev->dev, sizeof(*drvdata), GFP_KERNEL); diff --git a/drivers/hid/hid-uclogic-params.c b/drivers/hid/hid-uclogic-params.c index 3d67b748a3b95..3e70f969fb849 100644 --- a/drivers/hid/hid-uclogic-params.c @@ -134451,6 +160693,34 @@ index 405e0d5212cc8..df0a825694f52 100644 ++cl->send_msg_cnt_ipc; } +diff --git a/drivers/hid/intel-ish-hid/ishtp/dma-if.c b/drivers/hid/intel-ish-hid/ishtp/dma-if.c +index 40554c8daca07..00046cbfd4ed0 100644 +--- a/drivers/hid/intel-ish-hid/ishtp/dma-if.c ++++ b/drivers/hid/intel-ish-hid/ishtp/dma-if.c +@@ -104,6 +104,11 @@ void *ishtp_cl_get_dma_send_buf(struct ishtp_device *dev, + int required_slots = (size / DMA_SLOT_SIZE) + + 1 * (size % DMA_SLOT_SIZE != 0); + ++ if (!dev->ishtp_dma_tx_map) { ++ dev_err(dev->devc, "Fail to allocate Tx map\n"); ++ return NULL; ++ } ++ + spin_lock_irqsave(&dev->ishtp_dma_tx_lock, flags); + for (i = 0; i <= (dev->ishtp_dma_num_slots - required_slots); i++) { + free = 1; +@@ -150,6 +155,11 @@ void ishtp_cl_release_dma_acked_mem(struct ishtp_device *dev, + return; + } + ++ if (!dev->ishtp_dma_tx_map) { ++ dev_err(dev->devc, "Fail to allocate Tx map\n"); ++ return; ++ } ++ + i = (msg_addr - dev->ishtp_host_dma_tx_buf) / DMA_SLOT_SIZE; + spin_lock_irqsave(&dev->ishtp_dma_tx_lock, flags); + for (j = 0; j < acked_slots; j++) { diff --git a/drivers/hid/surface-hid/surface_hid.c b/drivers/hid/surface-hid/surface_hid.c index a3a70e4f3f6c9..d4aa8c81903ae 100644 --- a/drivers/hid/surface-hid/surface_hid.c @@ -134548,10 +160818,20 @@ index 8fe3efcb83271..fc06d8bb42e0f 100644 return 0; diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c -index 93f49b766376e..7e67c41d97a44 100644 +index 93f49b766376e..2b6388da545e9 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c -@@ -726,7 +726,7 @@ static void wacom_retrieve_hid_descriptor(struct hid_device *hdev, +@@ -160,6 +160,9 @@ static int wacom_raw_event(struct hid_device *hdev, struct hid_report *report, + { + struct wacom *wacom = hid_get_drvdata(hdev); + ++ if (wacom->wacom_wac.features.type == BOOTLOADER) ++ return 0; ++ + if (size > WACOM_PKGLEN_MAX) + return 1; + +@@ -726,7 +729,7 @@ static void wacom_retrieve_hid_descriptor(struct hid_device *hdev, * Skip the query for this type and modify defaults based on * interface number. */ @@ -134560,7 +160840,7 @@ index 93f49b766376e..7e67c41d97a44 100644 if (intf->cur_altsetting->desc.bInterfaceNumber == 0) features->device_type = WACOM_DEVICETYPE_WL_MONITOR; else -@@ -2124,7 +2124,7 @@ static int wacom_register_inputs(struct wacom *wacom) +@@ -2124,7 +2127,7 @@ static int wacom_register_inputs(struct wacom *wacom) error = wacom_setup_pad_input_capabilities(pad_input_dev, wacom_wac); if (error) { @@ -134569,7 +160849,7 @@ index 93f49b766376e..7e67c41d97a44 100644 input_free_device(pad_input_dev); wacom_wac->pad_input = NULL; pad_input_dev = NULL; -@@ -2217,7 +2217,7 @@ static void wacom_update_name(struct wacom *wacom, const char *suffix) +@@ -2217,7 +2220,7 @@ static void wacom_update_name(struct wacom *wacom, const char *suffix) if ((features->type == HID_GENERIC) && !strcmp("Wacom HID", features->name)) { char *product_name = wacom->hdev->name; @@ -134578,7 +160858,7 @@ index 93f49b766376e..7e67c41d97a44 100644 struct usb_interface *intf = to_usb_interface(wacom->hdev->dev.parent); struct usb_device *dev = interface_to_usbdev(intf); product_name = dev->product; -@@ -2454,6 +2454,9 @@ static void wacom_wireless_work(struct work_struct *work) +@@ -2454,6 +2457,9 @@ static void wacom_wireless_work(struct work_struct *work) wacom_destroy_battery(wacom); @@ -134588,7 +160868,7 @@ index 93f49b766376e..7e67c41d97a44 100644 /* Stylus interface */ hdev1 = usb_get_intfdata(usbdev->config->interface[1]); wacom1 = hid_get_drvdata(hdev1); -@@ -2733,8 +2736,6 @@ static void wacom_mode_change_work(struct work_struct *work) +@@ -2733,8 +2739,6 @@ static void wacom_mode_change_work(struct work_struct *work) static int wacom_probe(struct hid_device *hdev, const struct hid_device_id *id) { @@ -134597,7 +160877,7 @@ index 93f49b766376e..7e67c41d97a44 100644 struct wacom *wacom; struct wacom_wac *wacom_wac; struct wacom_features *features; -@@ -2769,8 +2770,14 @@ static int wacom_probe(struct hid_device *hdev, +@@ -2769,8 +2773,14 @@ static int wacom_probe(struct hid_device *hdev, wacom_wac->hid_data.inputmode = -1; wacom_wac->mode_report = -1; @@ -134614,8 +160894,20 @@ index 93f49b766376e..7e67c41d97a44 100644 mutex_init(&wacom->lock); INIT_DELAYED_WORK(&wacom->init_work, wacom_init_work); INIT_WORK(&wacom->wireless_work, wacom_wireless_work); +@@ -2785,6 +2795,11 @@ static int wacom_probe(struct hid_device *hdev, + return error; + } + ++ if (features->type == BOOTLOADER) { ++ hid_warn(hdev, "Using device in hidraw-only mode"); ++ return hid_hw_start(hdev, HID_CONNECT_HIDRAW); ++ } ++ + error = wacom_parse_and_register(wacom, false); + if (error) + return error; diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c -index 33a6908995b1b..c454231afec89 100644 +index 33a6908995b1b..546aaaaec016e 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -638,9 +638,26 @@ static int wacom_intuos_id_mangle(int tool_id) @@ -134840,11 +161132,37 @@ index 33a6908995b1b..c454231afec89 100644 wacom_wac_pad_event(hdev, field, usage, value); else if (WACOM_PEN_FIELD(field) && wacom->wacom_wac.pen_input) wacom_wac_pen_event(hdev, field, usage, value); +@@ -4756,6 +4813,9 @@ static const struct wacom_features wacom_features_0x3c8 = + static const struct wacom_features wacom_features_HID_ANY_ID = + { "Wacom HID", .type = HID_GENERIC, .oVid = HID_ANY_ID, .oPid = HID_ANY_ID }; + ++static const struct wacom_features wacom_features_0x94 = ++ { "Wacom Bootloader", .type = BOOTLOADER }; ++ + #define USB_DEVICE_WACOM(prod) \ + HID_DEVICE(BUS_USB, HID_GROUP_WACOM, USB_VENDOR_ID_WACOM, prod),\ + .driver_data = (kernel_ulong_t)&wacom_features_##prod +@@ -4829,6 +4889,7 @@ const struct hid_device_id wacom_ids[] = { + { USB_DEVICE_WACOM(0x84) }, + { USB_DEVICE_WACOM(0x90) }, + { USB_DEVICE_WACOM(0x93) }, ++ { USB_DEVICE_WACOM(0x94) }, + { USB_DEVICE_WACOM(0x97) }, + { USB_DEVICE_WACOM(0x9A) }, + { USB_DEVICE_WACOM(0x9F) }, diff --git a/drivers/hid/wacom_wac.h b/drivers/hid/wacom_wac.h -index 8b2d4e5b2303c..466b62cc16dc1 100644 +index 8b2d4e5b2303c..44e0763a0ede8 100644 --- a/drivers/hid/wacom_wac.h +++ b/drivers/hid/wacom_wac.h -@@ -301,6 +301,7 @@ struct hid_data { +@@ -242,6 +242,7 @@ enum { + MTTPC, + MTTPC_B, + HID_GENERIC, ++ BOOTLOADER, + MAX_TYPE + }; + +@@ -301,6 +302,7 @@ struct hid_data { bool barrelswitch; bool barrelswitch2; bool serialhi; @@ -134853,10 +161171,22 @@ index 8b2d4e5b2303c..466b62cc16dc1 100644 int y; int pressure; diff --git a/drivers/hsi/controllers/omap_ssi_core.c b/drivers/hsi/controllers/omap_ssi_core.c -index 44a3f5660c109..eb98201583185 100644 +index 44a3f5660c109..26f2c3c012978 100644 --- a/drivers/hsi/controllers/omap_ssi_core.c +++ b/drivers/hsi/controllers/omap_ssi_core.c -@@ -524,6 +524,7 @@ static int ssi_probe(struct platform_device *pd) +@@ -502,8 +502,10 @@ static int ssi_probe(struct platform_device *pd) + platform_set_drvdata(pd, ssi); + + err = ssi_add_controller(ssi, pd); +- if (err < 0) ++ if (err < 0) { ++ hsi_put_controller(ssi); + goto out1; ++ } + + pm_runtime_enable(&pd->dev); + +@@ -524,6 +526,7 @@ static int ssi_probe(struct platform_device *pd) if (!childpdev) { err = -ENODEV; dev_err(&pd->dev, "failed to create ssi controller port\n"); @@ -134864,6 +161194,32 @@ index 44a3f5660c109..eb98201583185 100644 goto out3; } } +@@ -535,9 +538,9 @@ out3: + device_for_each_child(&pd->dev, NULL, ssi_remove_ports); + out2: + ssi_remove_controller(ssi); ++ pm_runtime_disable(&pd->dev); + out1: + platform_set_drvdata(pd, NULL); +- pm_runtime_disable(&pd->dev); + + return err; + } +@@ -628,7 +631,13 @@ static int __init ssi_init(void) { + if (ret) + return ret; + +- return platform_driver_register(&ssi_port_pdriver); ++ ret = platform_driver_register(&ssi_port_pdriver); ++ if (ret) { ++ platform_driver_unregister(&ssi_pdriver); ++ return ret; ++ } ++ ++ return 0; + } + module_init(ssi_init); + diff --git a/drivers/hsi/controllers/omap_ssi_port.c b/drivers/hsi/controllers/omap_ssi_port.c index a0cb5be246e1c..b9495b720f1bd 100644 --- a/drivers/hsi/controllers/omap_ssi_port.c @@ -134943,7 +161299,7 @@ index f3761c73b0742..6b967bb386907 100644 spin_lock_irqsave(&rqstor->req_lock, flags); diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c -index 142308526ec6a..07003019263a2 100644 +index 142308526ec6a..d8dc5cc5e3a88 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -380,7 +380,7 @@ void vmbus_channel_map_relid(struct vmbus_channel *channel) @@ -134972,7 +161328,26 @@ index 142308526ec6a..07003019263a2 100644 vmbus_connection.channels[channel->offermsg.child_relid], channel); } -@@ -637,6 +637,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) +@@ -531,13 +531,17 @@ static void vmbus_add_channel_work(struct work_struct *work) + * Add the new device to the bus. This will kick off device-driver + * binding which eventually invokes the device driver's AddDevice() + * method. ++ * ++ * If vmbus_device_register() fails, the 'device_obj' is freed in ++ * vmbus_device_release() as called by device_unregister() in the ++ * error path of vmbus_device_register(). In the outside error ++ * path, there's no need to free it. + */ + ret = vmbus_device_register(newchannel->device_obj); + + if (ret != 0) { + pr_err("unable to add child device object (relid %d)\n", + newchannel->offermsg.child_relid); +- kfree(newchannel->device_obj); + goto err_deq_chan; + } + +@@ -637,6 +641,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) */ if (newchannel->offermsg.offer.sub_channel_index == 0) { mutex_unlock(&vmbus_connection.channel_mutex); @@ -135108,10 +161483,30 @@ index 7f11ea07d698f..3248b48f37f61 100644 /* * Specify our alignment requirements as it relates diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c -index 314015d9e912d..f4091143213b0 100644 +index 314015d9e912d..1475ea77351ef 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c -@@ -408,7 +408,16 @@ int hv_ringbuffer_read(struct vmbus_channel *channel, +@@ -249,6 +249,19 @@ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info) + ring_info->pkt_buffer_size = 0; + } + ++/* ++ * Check if the ring buffer spinlock is available to take or not; used on ++ * atomic contexts, like panic path (see the Hyper-V framebuffer driver). ++ */ ++ ++bool hv_ringbuffer_spinlock_busy(struct vmbus_channel *channel) ++{ ++ struct hv_ring_buffer_info *rinfo = &channel->outbound; ++ ++ return spin_is_locked(&rinfo->ring_lock); ++} ++EXPORT_SYMBOL_GPL(hv_ringbuffer_spinlock_busy); ++ + /* Write to the ring buffer. */ + int hv_ringbuffer_write(struct vmbus_channel *channel, + const struct kvec *kv_list, u32 kv_count, +@@ -408,7 +421,16 @@ int hv_ringbuffer_read(struct vmbus_channel *channel, static u32 hv_pkt_iter_avail(const struct hv_ring_buffer_info *rbi) { u32 priv_read_loc = rbi->priv_read_index; @@ -135130,7 +161525,7 @@ index 314015d9e912d..f4091143213b0 100644 if (write_loc >= priv_read_loc) return write_loc - priv_read_loc; diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c -index 392c1ac4f8193..ecfc299834e15 100644 +index 392c1ac4f8193..b906a3a7941c2 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -76,8 +76,8 @@ static int hyperv_panic_event(struct notifier_block *nb, unsigned long val, @@ -135207,7 +161602,15 @@ index 392c1ac4f8193..ecfc299834e15 100644 dev_err(device, "Unable to set up channel sysfs files\n"); return ret; } -@@ -2331,7 +2340,7 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, +@@ -2100,6 +2109,7 @@ int vmbus_device_register(struct hv_device *child_device_obj) + ret = device_register(&child_device_obj->device); + if (ret) { + pr_err("Unable to register child device\n"); ++ put_device(&child_device_obj->device); + return ret; + } + +@@ -2331,7 +2341,7 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, bool fb_overlap_ok) { struct resource *iter, *shadow; @@ -135216,7 +161619,7 @@ index 392c1ac4f8193..ecfc299834e15 100644 const char *dev_n = dev_name(&device_obj->device); int retval; -@@ -2366,6 +2375,14 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, +@@ -2366,6 +2376,14 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, range_max = iter->end; start = (range_min + align - 1) & ~(align - 1); for (; start + size - 1 <= range_max; start += align) { @@ -135231,7 +161634,7 @@ index 392c1ac4f8193..ecfc299834e15 100644 shadow = __request_region(iter, start, size, NULL, IORESOURCE_BUSY); if (!shadow) -@@ -2773,10 +2790,15 @@ static void __exit vmbus_exit(void) +@@ -2773,10 +2791,15 @@ static void __exit vmbus_exit(void) if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { kmsg_dump_unregister(&hv_kmsg_dumper); unregister_die_notifier(&hyperv_die_block); @@ -135250,10 +161653,18 @@ index 392c1ac4f8193..ecfc299834e15 100644 unregister_sysctl_table(hv_ctl_table_hdr); hv_ctl_table_hdr = NULL; diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig -index c4578e8f34bb5..51f1caa10d113 100644 +index c4578e8f34bb5..17ba1d9ff0751 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig -@@ -944,7 +944,7 @@ config SENSORS_LTC4261 +@@ -776,6 +776,7 @@ config SENSORS_IT87 + config SENSORS_JC42 + tristate "JEDEC JC42.4 compliant memory module temperature sensors" + depends on I2C ++ select REGMAP_I2C + help + If you say yes here, you get support for JEDEC JC42.4 compliant + temperature sensors, which are used on many DDR3 memory modules for +@@ -944,7 +945,7 @@ config SENSORS_LTC4261 config SENSORS_LTQ_CPUTEMP bool "Lantiq cpu temperature sensor driver" @@ -135262,7 +161673,7 @@ index c4578e8f34bb5..51f1caa10d113 100644 help If you say yes here you get support for the temperature sensor inside your CPU. -@@ -1317,7 +1317,7 @@ config SENSORS_LM90 +@@ -1317,7 +1318,7 @@ config SENSORS_LM90 Maxim MAX6646, MAX6647, MAX6648, MAX6649, MAX6654, MAX6657, MAX6658, MAX6659, MAX6680, MAX6681, MAX6692, MAX6695, MAX6696, ON Semiconductor NCT1008, Winbond/Nuvoton W83L771W/G/AWG/ASG, @@ -135307,7 +161718,7 @@ index d519aca4a9d64..c67cd037a93fd 100644 val = clamp_val(val, 1, 65534); diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c -index bb9211215a688..032129292957e 100644 +index bb9211215a688..42b84ebff0579 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -46,9 +46,6 @@ MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius"); @@ -135329,7 +161740,22 @@ index bb9211215a688..032129292957e 100644 struct cpumask cpumask; struct temp_data *core_data[MAX_CORE_DATA]; struct device_attribute name_attr; -@@ -441,7 +440,7 @@ static struct temp_data *init_temp_data(unsigned int cpu, int pkg_flag) +@@ -243,10 +242,13 @@ static int adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev) + */ + if (host_bridge && host_bridge->vendor == PCI_VENDOR_ID_INTEL) { + for (i = 0; i < ARRAY_SIZE(tjmax_pci_table); i++) { +- if (host_bridge->device == tjmax_pci_table[i].device) ++ if (host_bridge->device == tjmax_pci_table[i].device) { ++ pci_dev_put(host_bridge); + return tjmax_pci_table[i].tjmax; ++ } + } + } ++ pci_dev_put(host_bridge); + + for (i = 0; i < ARRAY_SIZE(tjmax_table); i++) { + if (strstr(c->x86_model_id, tjmax_table[i].id)) +@@ -441,7 +443,7 @@ static struct temp_data *init_temp_data(unsigned int cpu, int pkg_flag) MSR_IA32_THERM_STATUS; tdata->is_pkg_data = pkg_flag; tdata->cpu = cpu; @@ -135338,7 +161764,7 @@ index bb9211215a688..032129292957e 100644 tdata->attr_size = MAX_CORE_ATTRS; mutex_init(&tdata->update_lock); return tdata; -@@ -454,7 +453,7 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu, +@@ -454,7 +456,7 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu, struct platform_data *pdata = platform_get_drvdata(pdev); struct cpuinfo_x86 *c = &cpu_data(cpu); u32 eax, edx; @@ -135347,7 +161773,7 @@ index bb9211215a688..032129292957e 100644 /* * Find attr number for sysfs: -@@ -462,14 +461,26 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu, +@@ -462,14 +464,26 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu, * The attr number is always core id + 2 * The Pkgtemp will always show up as temp1_*, if available */ @@ -135379,7 +161805,7 @@ index bb9211215a688..032129292957e 100644 /* Test if we can access the status register */ err = rdmsr_safe_on_cpu(cpu, tdata->status_reg, &eax, &edx); -@@ -505,6 +516,9 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu, +@@ -505,6 +519,9 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu, exit_free: pdata->core_data[attr_no] = NULL; kfree(tdata); @@ -135389,7 +161815,16 @@ index bb9211215a688..032129292957e 100644 return err; } -@@ -524,6 +538,9 @@ static void coretemp_remove_core(struct platform_data *pdata, int indx) +@@ -519,11 +536,18 @@ static void coretemp_remove_core(struct platform_data *pdata, int indx) + { + struct temp_data *tdata = pdata->core_data[indx]; + ++ /* if we errored on add then this is already gone */ ++ if (!tdata) ++ return; ++ + /* Remove the sysfs attributes */ + sysfs_remove_group(&pdata->hwmon_dev->kobj, &tdata->attr_group); kfree(pdata->core_data[indx]); pdata->core_data[indx] = NULL; @@ -135399,7 +161834,7 @@ index bb9211215a688..032129292957e 100644 } static int coretemp_probe(struct platform_device *pdev) -@@ -537,6 +554,7 @@ static int coretemp_probe(struct platform_device *pdev) +@@ -537,6 +561,7 @@ static int coretemp_probe(struct platform_device *pdev) return -ENOMEM; pdata->pkg_id = pdev->id; @@ -135407,7 +161842,7 @@ index bb9211215a688..032129292957e 100644 platform_set_drvdata(pdev, pdata); pdata->hwmon_dev = devm_hwmon_device_register_with_groups(dev, DRVNAME, -@@ -553,6 +571,7 @@ static int coretemp_remove(struct platform_device *pdev) +@@ -553,6 +578,7 @@ static int coretemp_remove(struct platform_device *pdev) if (pdata->core_data[i]) coretemp_remove_core(pdata, i); @@ -135415,7 +161850,7 @@ index bb9211215a688..032129292957e 100644 return 0; } -@@ -647,7 +666,7 @@ static int coretemp_cpu_offline(unsigned int cpu) +@@ -647,7 +673,7 @@ static int coretemp_cpu_offline(unsigned int cpu) struct platform_device *pdev = coretemp_get_pdev(cpu); struct platform_data *pd; struct temp_data *tdata; @@ -135424,7 +161859,7 @@ index bb9211215a688..032129292957e 100644 /* * Don't execute this on suspend as the device remove locks -@@ -660,12 +679,19 @@ static int coretemp_cpu_offline(unsigned int cpu) +@@ -660,12 +686,19 @@ static int coretemp_cpu_offline(unsigned int cpu) if (!pdev) return 0; @@ -135623,6 +162058,19 @@ index 8d3b1dae31df1..3ae961986fc31 100644 INIT_LIST_HEAD(&hwdev->tzdata); +diff --git a/drivers/hwmon/i5500_temp.c b/drivers/hwmon/i5500_temp.c +index 360f5aee13947..d4be03f43fb45 100644 +--- a/drivers/hwmon/i5500_temp.c ++++ b/drivers/hwmon/i5500_temp.c +@@ -108,7 +108,7 @@ static int i5500_temp_probe(struct pci_dev *pdev, + u32 tstimer; + s8 tsfsc; + +- err = pci_enable_device(pdev); ++ err = pcim_enable_device(pdev); + if (err) { + dev_err(&pdev->dev, "Failed to enable device\n"); + return err; diff --git a/drivers/hwmon/ibmaem.c b/drivers/hwmon/ibmaem.c index a4ec85207782d..2e6d6a5cffa16 100644 --- a/drivers/hwmon/ibmaem.c @@ -135667,6 +162115,429 @@ index a4ec85207782d..2e6d6a5cffa16 100644 dev_err: ida_simple_remove(&aem_ida, data->id); id_err: +diff --git a/drivers/hwmon/ibmpex.c b/drivers/hwmon/ibmpex.c +index b2ab83c9fd9a8..fe90f0536d76c 100644 +--- a/drivers/hwmon/ibmpex.c ++++ b/drivers/hwmon/ibmpex.c +@@ -502,6 +502,7 @@ static void ibmpex_register_bmc(int iface, struct device *dev) + return; + + out_register: ++ list_del(&data->list); + hwmon_device_unregister(data->hwmon_dev); + out_user: + ipmi_destroy_user(data->user); +diff --git a/drivers/hwmon/ina3221.c b/drivers/hwmon/ina3221.c +index 58d3828e2ec0c..14586b2fb17d1 100644 +--- a/drivers/hwmon/ina3221.c ++++ b/drivers/hwmon/ina3221.c +@@ -228,7 +228,7 @@ static int ina3221_read_value(struct ina3221_data *ina, unsigned int reg, + * Shunt Voltage Sum register has 14-bit value with 1-bit shift + * Other Shunt Voltage registers have 12 bits with 3-bit shift + */ +- if (reg == INA3221_SHUNT_SUM) ++ if (reg == INA3221_SHUNT_SUM || reg == INA3221_CRIT_SUM) + *val = sign_extend32(regval >> 1, 14); + else + *val = sign_extend32(regval >> 3, 12); +@@ -465,7 +465,7 @@ static int ina3221_write_curr(struct device *dev, u32 attr, + * SHUNT_SUM: (1 / 40uV) << 1 = 1 / 20uV + * SHUNT[1-3]: (1 / 40uV) << 3 = 1 / 5uV + */ +- if (reg == INA3221_SHUNT_SUM) ++ if (reg == INA3221_SHUNT_SUM || reg == INA3221_CRIT_SUM) + regval = DIV_ROUND_CLOSEST(voltage_uv, 20) & 0xfffe; + else + regval = DIV_ROUND_CLOSEST(voltage_uv, 5) & 0xfff8; +diff --git a/drivers/hwmon/jc42.c b/drivers/hwmon/jc42.c +index 4a03d010ec5a8..52f341d46029b 100644 +--- a/drivers/hwmon/jc42.c ++++ b/drivers/hwmon/jc42.c +@@ -19,6 +19,7 @@ + #include <linux/err.h> + #include <linux/mutex.h> + #include <linux/of.h> ++#include <linux/regmap.h> + + /* Addresses to scan */ + static const unsigned short normal_i2c[] = { +@@ -189,31 +190,14 @@ static struct jc42_chips jc42_chips[] = { + { STM_MANID, STTS3000_DEVID, STTS3000_DEVID_MASK }, + }; + +-enum temp_index { +- t_input = 0, +- t_crit, +- t_min, +- t_max, +- t_num_temp +-}; +- +-static const u8 temp_regs[t_num_temp] = { +- [t_input] = JC42_REG_TEMP, +- [t_crit] = JC42_REG_TEMP_CRITICAL, +- [t_min] = JC42_REG_TEMP_LOWER, +- [t_max] = JC42_REG_TEMP_UPPER, +-}; +- + /* Each client has this additional data */ + struct jc42_data { +- struct i2c_client *client; + struct mutex update_lock; /* protect register access */ ++ struct regmap *regmap; + bool extended; /* true if extended range supported */ + bool valid; +- unsigned long last_updated; /* In jiffies */ + u16 orig_config; /* original configuration */ + u16 config; /* current configuration */ +- u16 temp[t_num_temp];/* Temperatures */ + }; + + #define JC42_TEMP_MIN_EXTENDED (-40000) +@@ -238,85 +222,102 @@ static int jc42_temp_from_reg(s16 reg) + return reg * 125 / 2; + } + +-static struct jc42_data *jc42_update_device(struct device *dev) +-{ +- struct jc42_data *data = dev_get_drvdata(dev); +- struct i2c_client *client = data->client; +- struct jc42_data *ret = data; +- int i, val; +- +- mutex_lock(&data->update_lock); +- +- if (time_after(jiffies, data->last_updated + HZ) || !data->valid) { +- for (i = 0; i < t_num_temp; i++) { +- val = i2c_smbus_read_word_swapped(client, temp_regs[i]); +- if (val < 0) { +- ret = ERR_PTR(val); +- goto abort; +- } +- data->temp[i] = val; +- } +- data->last_updated = jiffies; +- data->valid = true; +- } +-abort: +- mutex_unlock(&data->update_lock); +- return ret; +-} +- + static int jc42_read(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long *val) + { +- struct jc42_data *data = jc42_update_device(dev); +- int temp, hyst; ++ struct jc42_data *data = dev_get_drvdata(dev); ++ unsigned int regval; ++ int ret, temp, hyst; + +- if (IS_ERR(data)) +- return PTR_ERR(data); ++ mutex_lock(&data->update_lock); + + switch (attr) { + case hwmon_temp_input: +- *val = jc42_temp_from_reg(data->temp[t_input]); +- return 0; ++ ret = regmap_read(data->regmap, JC42_REG_TEMP, ®val); ++ if (ret) ++ break; ++ ++ *val = jc42_temp_from_reg(regval); ++ break; + case hwmon_temp_min: +- *val = jc42_temp_from_reg(data->temp[t_min]); +- return 0; ++ ret = regmap_read(data->regmap, JC42_REG_TEMP_LOWER, ®val); ++ if (ret) ++ break; ++ ++ *val = jc42_temp_from_reg(regval); ++ break; + case hwmon_temp_max: +- *val = jc42_temp_from_reg(data->temp[t_max]); +- return 0; ++ ret = regmap_read(data->regmap, JC42_REG_TEMP_UPPER, ®val); ++ if (ret) ++ break; ++ ++ *val = jc42_temp_from_reg(regval); ++ break; + case hwmon_temp_crit: +- *val = jc42_temp_from_reg(data->temp[t_crit]); +- return 0; ++ ret = regmap_read(data->regmap, JC42_REG_TEMP_CRITICAL, ++ ®val); ++ if (ret) ++ break; ++ ++ *val = jc42_temp_from_reg(regval); ++ break; + case hwmon_temp_max_hyst: +- temp = jc42_temp_from_reg(data->temp[t_max]); ++ ret = regmap_read(data->regmap, JC42_REG_TEMP_UPPER, ®val); ++ if (ret) ++ break; ++ ++ temp = jc42_temp_from_reg(regval); + hyst = jc42_hysteresis[(data->config & JC42_CFG_HYST_MASK) + >> JC42_CFG_HYST_SHIFT]; + *val = temp - hyst; +- return 0; ++ break; + case hwmon_temp_crit_hyst: +- temp = jc42_temp_from_reg(data->temp[t_crit]); ++ ret = regmap_read(data->regmap, JC42_REG_TEMP_CRITICAL, ++ ®val); ++ if (ret) ++ break; ++ ++ temp = jc42_temp_from_reg(regval); + hyst = jc42_hysteresis[(data->config & JC42_CFG_HYST_MASK) + >> JC42_CFG_HYST_SHIFT]; + *val = temp - hyst; +- return 0; ++ break; + case hwmon_temp_min_alarm: +- *val = (data->temp[t_input] >> JC42_ALARM_MIN_BIT) & 1; +- return 0; ++ ret = regmap_read(data->regmap, JC42_REG_TEMP, ®val); ++ if (ret) ++ break; ++ ++ *val = (regval >> JC42_ALARM_MIN_BIT) & 1; ++ break; + case hwmon_temp_max_alarm: +- *val = (data->temp[t_input] >> JC42_ALARM_MAX_BIT) & 1; +- return 0; ++ ret = regmap_read(data->regmap, JC42_REG_TEMP, ®val); ++ if (ret) ++ break; ++ ++ *val = (regval >> JC42_ALARM_MAX_BIT) & 1; ++ break; + case hwmon_temp_crit_alarm: +- *val = (data->temp[t_input] >> JC42_ALARM_CRIT_BIT) & 1; +- return 0; ++ ret = regmap_read(data->regmap, JC42_REG_TEMP, ®val); ++ if (ret) ++ break; ++ ++ *val = (regval >> JC42_ALARM_CRIT_BIT) & 1; ++ break; + default: +- return -EOPNOTSUPP; ++ ret = -EOPNOTSUPP; ++ break; + } ++ ++ mutex_unlock(&data->update_lock); ++ ++ return ret; + } + + static int jc42_write(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long val) + { + struct jc42_data *data = dev_get_drvdata(dev); +- struct i2c_client *client = data->client; ++ unsigned int regval; + int diff, hyst; + int ret; + +@@ -324,21 +325,23 @@ static int jc42_write(struct device *dev, enum hwmon_sensor_types type, + + switch (attr) { + case hwmon_temp_min: +- data->temp[t_min] = jc42_temp_to_reg(val, data->extended); +- ret = i2c_smbus_write_word_swapped(client, temp_regs[t_min], +- data->temp[t_min]); ++ ret = regmap_write(data->regmap, JC42_REG_TEMP_LOWER, ++ jc42_temp_to_reg(val, data->extended)); + break; + case hwmon_temp_max: +- data->temp[t_max] = jc42_temp_to_reg(val, data->extended); +- ret = i2c_smbus_write_word_swapped(client, temp_regs[t_max], +- data->temp[t_max]); ++ ret = regmap_write(data->regmap, JC42_REG_TEMP_UPPER, ++ jc42_temp_to_reg(val, data->extended)); + break; + case hwmon_temp_crit: +- data->temp[t_crit] = jc42_temp_to_reg(val, data->extended); +- ret = i2c_smbus_write_word_swapped(client, temp_regs[t_crit], +- data->temp[t_crit]); ++ ret = regmap_write(data->regmap, JC42_REG_TEMP_CRITICAL, ++ jc42_temp_to_reg(val, data->extended)); + break; + case hwmon_temp_crit_hyst: ++ ret = regmap_read(data->regmap, JC42_REG_TEMP_CRITICAL, ++ ®val); ++ if (ret) ++ break; ++ + /* + * JC42.4 compliant chips only support four hysteresis values. + * Pick best choice and go from there. +@@ -346,7 +349,7 @@ static int jc42_write(struct device *dev, enum hwmon_sensor_types type, + val = clamp_val(val, (data->extended ? JC42_TEMP_MIN_EXTENDED + : JC42_TEMP_MIN) - 6000, + JC42_TEMP_MAX); +- diff = jc42_temp_from_reg(data->temp[t_crit]) - val; ++ diff = jc42_temp_from_reg(regval) - val; + hyst = 0; + if (diff > 0) { + if (diff < 2250) +@@ -358,9 +361,8 @@ static int jc42_write(struct device *dev, enum hwmon_sensor_types type, + } + data->config = (data->config & ~JC42_CFG_HYST_MASK) | + (hyst << JC42_CFG_HYST_SHIFT); +- ret = i2c_smbus_write_word_swapped(data->client, +- JC42_REG_CONFIG, +- data->config); ++ ret = regmap_write(data->regmap, JC42_REG_CONFIG, ++ data->config); + break; + default: + ret = -EOPNOTSUPP; +@@ -458,51 +460,80 @@ static const struct hwmon_chip_info jc42_chip_info = { + .info = jc42_info, + }; + ++static bool jc42_readable_reg(struct device *dev, unsigned int reg) ++{ ++ return (reg >= JC42_REG_CAP && reg <= JC42_REG_DEVICEID) || ++ reg == JC42_REG_SMBUS; ++} ++ ++static bool jc42_writable_reg(struct device *dev, unsigned int reg) ++{ ++ return (reg >= JC42_REG_CONFIG && reg <= JC42_REG_TEMP_CRITICAL) || ++ reg == JC42_REG_SMBUS; ++} ++ ++static bool jc42_volatile_reg(struct device *dev, unsigned int reg) ++{ ++ return reg == JC42_REG_CONFIG || reg == JC42_REG_TEMP; ++} ++ ++static const struct regmap_config jc42_regmap_config = { ++ .reg_bits = 8, ++ .val_bits = 16, ++ .val_format_endian = REGMAP_ENDIAN_BIG, ++ .max_register = JC42_REG_SMBUS, ++ .writeable_reg = jc42_writable_reg, ++ .readable_reg = jc42_readable_reg, ++ .volatile_reg = jc42_volatile_reg, ++ .cache_type = REGCACHE_RBTREE, ++}; ++ + static int jc42_probe(struct i2c_client *client) + { + struct device *dev = &client->dev; + struct device *hwmon_dev; ++ unsigned int config, cap; + struct jc42_data *data; +- int config, cap; ++ int ret; + + data = devm_kzalloc(dev, sizeof(struct jc42_data), GFP_KERNEL); + if (!data) + return -ENOMEM; + +- data->client = client; ++ data->regmap = devm_regmap_init_i2c(client, &jc42_regmap_config); ++ if (IS_ERR(data->regmap)) ++ return PTR_ERR(data->regmap); ++ + i2c_set_clientdata(client, data); + mutex_init(&data->update_lock); + +- cap = i2c_smbus_read_word_swapped(client, JC42_REG_CAP); +- if (cap < 0) +- return cap; ++ ret = regmap_read(data->regmap, JC42_REG_CAP, &cap); ++ if (ret) ++ return ret; + + data->extended = !!(cap & JC42_CAP_RANGE); + + if (device_property_read_bool(dev, "smbus-timeout-disable")) { +- int smbus; +- + /* + * Not all chips support this register, but from a + * quick read of various datasheets no chip appears + * incompatible with the below attempt to disable + * the timeout. And the whole thing is opt-in... + */ +- smbus = i2c_smbus_read_word_swapped(client, JC42_REG_SMBUS); +- if (smbus < 0) +- return smbus; +- i2c_smbus_write_word_swapped(client, JC42_REG_SMBUS, +- smbus | SMBUS_STMOUT); ++ ret = regmap_set_bits(data->regmap, JC42_REG_SMBUS, ++ SMBUS_STMOUT); ++ if (ret) ++ return ret; + } + +- config = i2c_smbus_read_word_swapped(client, JC42_REG_CONFIG); +- if (config < 0) +- return config; ++ ret = regmap_read(data->regmap, JC42_REG_CONFIG, &config); ++ if (ret) ++ return ret; + + data->orig_config = config; + if (config & JC42_CFG_SHUTDOWN) { + config &= ~JC42_CFG_SHUTDOWN; +- i2c_smbus_write_word_swapped(client, JC42_REG_CONFIG, config); ++ regmap_write(data->regmap, JC42_REG_CONFIG, config); + } + data->config = config; + +@@ -523,7 +554,7 @@ static int jc42_remove(struct i2c_client *client) + + config = (data->orig_config & ~JC42_CFG_HYST_MASK) + | (data->config & JC42_CFG_HYST_MASK); +- i2c_smbus_write_word_swapped(client, JC42_REG_CONFIG, config); ++ regmap_write(data->regmap, JC42_REG_CONFIG, config); + } + return 0; + } +@@ -535,8 +566,11 @@ static int jc42_suspend(struct device *dev) + struct jc42_data *data = dev_get_drvdata(dev); + + data->config |= JC42_CFG_SHUTDOWN; +- i2c_smbus_write_word_swapped(data->client, JC42_REG_CONFIG, +- data->config); ++ regmap_write(data->regmap, JC42_REG_CONFIG, data->config); ++ ++ regcache_cache_only(data->regmap, true); ++ regcache_mark_dirty(data->regmap); ++ + return 0; + } + +@@ -544,10 +578,13 @@ static int jc42_resume(struct device *dev) + { + struct jc42_data *data = dev_get_drvdata(dev); + ++ regcache_cache_only(data->regmap, false); ++ + data->config &= ~JC42_CFG_SHUTDOWN; +- i2c_smbus_write_word_swapped(data->client, JC42_REG_CONFIG, +- data->config); +- return 0; ++ regmap_write(data->regmap, JC42_REG_CONFIG, data->config); ++ ++ /* Restore cached register values to hardware */ ++ return regcache_sync(data->regmap); + } + + static const struct dev_pm_ops jc42_dev_pm_ops = { diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index 567b7c521f388..1c9493c708132 100644 --- a/drivers/hwmon/lm90.c @@ -136129,6 +163000,19 @@ index 567b7c521f388..1c9493c708132 100644 if (data->flags & LM90_HAVE_OFFSET) data->channel_config[1] |= HWMON_T_OFFSET; +diff --git a/drivers/hwmon/ltc2947-core.c b/drivers/hwmon/ltc2947-core.c +index 5423466de697a..e918490f3ff75 100644 +--- a/drivers/hwmon/ltc2947-core.c ++++ b/drivers/hwmon/ltc2947-core.c +@@ -396,7 +396,7 @@ static int ltc2947_read_temp(struct device *dev, const u32 attr, long *val, + return ret; + + /* in milidegrees celcius, temp is given by: */ +- *val = (__val * 204) + 550; ++ *val = (__val * 204) + 5500; + + return 0; + } diff --git a/drivers/hwmon/mr75203.c b/drivers/hwmon/mr75203.c index 868243dba1ee0..05da83841536f 100644 --- a/drivers/hwmon/mr75203.c @@ -136930,6 +163814,19 @@ index 8bd6435c13e82..2148fd543bb4b 100644 case PORT_RESISTANCE_RSN_SHORT: case PORT_RESISTANCE_RSN_OPEN: default: +diff --git a/drivers/hwspinlock/qcom_hwspinlock.c b/drivers/hwspinlock/qcom_hwspinlock.c +index 3647109666658..e499146648639 100644 +--- a/drivers/hwspinlock/qcom_hwspinlock.c ++++ b/drivers/hwspinlock/qcom_hwspinlock.c +@@ -105,7 +105,7 @@ static const struct regmap_config tcsr_mutex_config = { + .reg_bits = 32, + .reg_stride = 4, + .val_bits = 32, +- .max_register = 0x40000, ++ .max_register = 0x20000, + .fast_io = true, + }; + diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c index 8a18c71df37a8..6471f4232a2e0 100644 --- a/drivers/hwtracing/coresight/coresight-core.c @@ -137058,15 +163955,42 @@ index 00de46565bc40..c60442970c2a4 100644 static struct notifier_block debug_notifier = { diff --git a/drivers/hwtracing/coresight/coresight-cti-core.c b/drivers/hwtracing/coresight/coresight-cti-core.c -index e2a3620cbf489..8988b2ed2ea6f 100644 +index e2a3620cbf489..dcd607a0c41a1 100644 --- a/drivers/hwtracing/coresight/coresight-cti-core.c +++ b/drivers/hwtracing/coresight/coresight-cti-core.c -@@ -175,7 +175,7 @@ static int cti_disable_hw(struct cti_drvdata *drvdata) +@@ -90,11 +90,9 @@ void cti_write_all_hw_regs(struct cti_drvdata *drvdata) + static int cti_enable_hw(struct cti_drvdata *drvdata) + { + struct cti_config *config = &drvdata->config; +- struct device *dev = &drvdata->csdev->dev; + unsigned long flags; + int rc = 0; + +- pm_runtime_get_sync(dev->parent); + spin_lock_irqsave(&drvdata->spinlock, flags); + + /* no need to do anything if enabled or unpowered*/ +@@ -119,7 +117,6 @@ cti_state_unchanged: + /* cannot enable due to error */ + cti_err_not_enabled: + spin_unlock_irqrestore(&drvdata->spinlock, flags); +- pm_runtime_put(dev->parent); + return rc; + } + +@@ -153,7 +150,6 @@ cti_hp_not_enabled: + static int cti_disable_hw(struct cti_drvdata *drvdata) + { + struct cti_config *config = &drvdata->config; +- struct device *dev = &drvdata->csdev->dev; + struct coresight_device *csdev = drvdata->csdev; + + spin_lock(&drvdata->spinlock); +@@ -175,7 +171,6 @@ static int cti_disable_hw(struct cti_drvdata *drvdata) coresight_disclaim_device_unlocked(csdev); CS_LOCK(drvdata->base); spin_unlock(&drvdata->spinlock); - pm_runtime_put(dev); -+ pm_runtime_put(dev->parent); return 0; /* not disabled this call */ @@ -137124,7 +164048,7 @@ index 43054568430f2..c30989e0675f5 100644 create_dev_exit_unlock: mutex_unlock(&cscfg_mutex); diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c -index 1768684968797..7dddb85b90591 100644 +index 1768684968797..fac63d092c7be 100644 --- a/drivers/hwtracing/coresight/coresight-trbe.c +++ b/drivers/hwtracing/coresight/coresight-trbe.c @@ -366,7 +366,7 @@ static unsigned long __trbe_normal_offset(struct perf_output_handle *handle) @@ -137158,6 +164082,14 @@ index 1768684968797..7dddb85b90591 100644 if (cpumask_test_cpu(cpu, &drvdata->supported_cpus)) arm_trbe_register_coresight_cpu(drvdata, cpu); if (cpumask_test_cpu(cpu, &drvdata->supported_cpus)) +@@ -1024,6 +1030,7 @@ static int arm_trbe_probe_cpuhp(struct trbe_drvdata *drvdata) + + static void arm_trbe_remove_cpuhp(struct trbe_drvdata *drvdata) + { ++ cpuhp_state_remove_instance(drvdata->trbe_online, &drvdata->hotplug_node); + cpuhp_remove_multi_state(drvdata->trbe_online); + } + diff --git a/drivers/hwtracing/intel_th/msu-sink.c b/drivers/hwtracing/intel_th/msu-sink.c index 2c7f5116be126..891b28ea25fe6 100644 --- a/drivers/hwtracing/intel_th/msu-sink.c @@ -137563,10 +164495,40 @@ index 72df563477b1c..f8639a4457d23 100644 static int cbus_i2c_remove(struct platform_device *pdev) diff --git a/drivers/i2c/busses/i2c-designware-common.c b/drivers/i2c/busses/i2c-designware-common.c -index bf2a4920638ab..a1100e37626e2 100644 +index bf2a4920638ab..4af65f101dac4 100644 --- a/drivers/i2c/busses/i2c-designware-common.c +++ b/drivers/i2c/busses/i2c-designware-common.c -@@ -477,9 +477,6 @@ int i2c_dw_prepare_clk(struct dw_i2c_dev *dev, bool prepare) +@@ -351,7 +351,8 @@ u32 i2c_dw_scl_hcnt(u32 ic_clk, u32 tSYMBOL, u32 tf, int cond, int offset) + * + * If your hardware is free from tHD;STA issue, try this one. + */ +- return DIV_ROUND_CLOSEST(ic_clk * tSYMBOL, MICRO) - 8 + offset; ++ return DIV_ROUND_CLOSEST_ULL((u64)ic_clk * tSYMBOL, MICRO) - ++ 8 + offset; + else + /* + * Conditional expression: +@@ -367,7 +368,8 @@ u32 i2c_dw_scl_hcnt(u32 ic_clk, u32 tSYMBOL, u32 tf, int cond, int offset) + * The reason why we need to take into account "tf" here, + * is the same as described in i2c_dw_scl_lcnt(). + */ +- return DIV_ROUND_CLOSEST(ic_clk * (tSYMBOL + tf), MICRO) - 3 + offset; ++ return DIV_ROUND_CLOSEST_ULL((u64)ic_clk * (tSYMBOL + tf), MICRO) - ++ 3 + offset; + } + + u32 i2c_dw_scl_lcnt(u32 ic_clk, u32 tLOW, u32 tf, int offset) +@@ -383,7 +385,8 @@ u32 i2c_dw_scl_lcnt(u32 ic_clk, u32 tLOW, u32 tf, int offset) + * account the fall time of SCL signal (tf). Default tf value + * should be 0.3 us, for safety. + */ +- return DIV_ROUND_CLOSEST(ic_clk * (tLOW + tf), MICRO) - 1 + offset; ++ return DIV_ROUND_CLOSEST_ULL((u64)ic_clk * (tLOW + tf), MICRO) - ++ 1 + offset; + } + + int i2c_dw_set_sda_hold(struct dw_i2c_dev *dev) +@@ -477,9 +480,6 @@ int i2c_dw_prepare_clk(struct dw_i2c_dev *dev, bool prepare) { int ret; @@ -137632,7 +164594,7 @@ index 9b08bb5df38d2..e0559eff8928b 100644 dev->cmd_err |= DW_IC_ERR_TX_ABRT; dev->status = STATUS_IDLE; diff --git a/drivers/i2c/busses/i2c-designware-pcidrv.c b/drivers/i2c/busses/i2c-designware-pcidrv.c -index 0f409a4c2da0d..5b45941bcbddc 100644 +index 0f409a4c2da0d..de8dd3e3333ed 100644 --- a/drivers/i2c/busses/i2c-designware-pcidrv.c +++ b/drivers/i2c/busses/i2c-designware-pcidrv.c @@ -39,10 +39,10 @@ enum dw_pci_ctl_id_t { @@ -137650,6 +164612,15 @@ index 0f409a4c2da0d..5b45941bcbddc 100644 u32 sda_hold; }; +@@ -398,6 +398,8 @@ static const struct pci_device_id i2_designware_pci_ids[] = { + { PCI_VDEVICE(ATI, 0x73a4), navi_amd }, + { PCI_VDEVICE(ATI, 0x73e4), navi_amd }, + { PCI_VDEVICE(ATI, 0x73c4), navi_amd }, ++ { PCI_VDEVICE(ATI, 0x7444), navi_amd }, ++ { PCI_VDEVICE(ATI, 0x7464), navi_amd }, + { 0,} + }; + MODULE_DEVICE_TABLE(pci, i2_designware_pci_ids); diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index 21113665ddeac..718bebe4fb877 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c @@ -137675,7 +164646,7 @@ index 21113665ddeac..718bebe4fb877 100644 dev->get_clk_rate_khz = i2c_dw_get_clk_rate_khz; diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c -index 89ae78ef1a1cc..98e39a17fb830 100644 +index 89ae78ef1a1cc..74d343d1a36b8 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -763,6 +763,11 @@ static int i801_block_transaction(struct i801_priv *priv, union i2c_smbus_data * @@ -137707,7 +164678,15 @@ index 89ae78ef1a1cc..98e39a17fb830 100644 /* Experience has shown that the block buffer can only be used for SMBus (not I2C) block transactions, even though the datasheet doesn't mention this limitation. */ -@@ -1493,7 +1488,6 @@ static struct platform_device * +@@ -1247,6 +1242,7 @@ static const struct { + */ + { "Latitude 5480", 0x29 }, + { "Vostro V131", 0x1d }, ++ { "Vostro 5568", 0x29 }, + }; + + static void register_dell_lis3lv02d_i2c_device(struct i801_priv *priv) +@@ -1493,7 +1489,6 @@ static struct platform_device * i801_add_tco_spt(struct i801_priv *priv, struct pci_dev *pci_dev, struct resource *tco_res) { @@ -137715,7 +164694,7 @@ index 89ae78ef1a1cc..98e39a17fb830 100644 struct resource *res; unsigned int devfn; u64 base64_addr; -@@ -1506,7 +1500,7 @@ i801_add_tco_spt(struct i801_priv *priv, struct pci_dev *pci_dev, +@@ -1506,7 +1501,7 @@ i801_add_tco_spt(struct i801_priv *priv, struct pci_dev *pci_dev, * enumerated by the PCI subsystem, so we need to unhide/hide it * to lookup the P2SB BAR. */ @@ -137724,7 +164703,7 @@ index 89ae78ef1a1cc..98e39a17fb830 100644 devfn = PCI_DEVFN(PCI_SLOT(pci_dev->devfn), 1); -@@ -1524,7 +1518,7 @@ i801_add_tco_spt(struct i801_priv *priv, struct pci_dev *pci_dev, +@@ -1524,7 +1519,7 @@ i801_add_tco_spt(struct i801_priv *priv, struct pci_dev *pci_dev, /* Hide the P2SB device, if it was hidden before */ if (hidden) pci_bus_write_config_byte(pci_dev->bus, devfn, 0xe1, hidden); @@ -137734,10 +164713,30 @@ index 89ae78ef1a1cc..98e39a17fb830 100644 res = &tco_res[1]; if (pci_dev->device == PCI_DEVICE_ID_INTEL_DNV_SMBUS) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c -index 3576b63a6c037..2e4d05040e50e 100644 +index 3576b63a6c037..5e8853d3f8da7 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c -@@ -1487,9 +1487,7 @@ static int i2c_imx_remove(struct platform_device *pdev) +@@ -1051,7 +1051,8 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs, + int i, result; + unsigned int temp; + int block_data = msgs->flags & I2C_M_RECV_LEN; +- int use_dma = i2c_imx->dma && msgs->len >= DMA_THRESHOLD && !block_data; ++ int use_dma = i2c_imx->dma && msgs->flags & I2C_M_DMA_SAFE && ++ msgs->len >= DMA_THRESHOLD && !block_data; + + dev_dbg(&i2c_imx->adapter.dev, + "<%s> write slave address: addr=0x%x\n", +@@ -1217,7 +1218,8 @@ static int i2c_imx_xfer_common(struct i2c_adapter *adapter, + result = i2c_imx_read(i2c_imx, &msgs[i], is_lastmsg, atomic); + } else { + if (!atomic && +- i2c_imx->dma && msgs[i].len >= DMA_THRESHOLD) ++ i2c_imx->dma && msgs[i].len >= DMA_THRESHOLD && ++ msgs[i].flags & I2C_M_DMA_SAFE) + result = i2c_imx_dma_write(i2c_imx, &msgs[i]); + else + result = i2c_imx_write(i2c_imx, &msgs[i], atomic); +@@ -1487,9 +1489,7 @@ static int i2c_imx_remove(struct platform_device *pdev) struct imx_i2c_struct *i2c_imx = platform_get_drvdata(pdev); int irq, ret; @@ -137748,7 +164747,7 @@ index 3576b63a6c037..2e4d05040e50e 100644 /* remove adapter */ dev_dbg(&i2c_imx->adapter.dev, "adapter removed\n"); -@@ -1498,17 +1496,21 @@ static int i2c_imx_remove(struct platform_device *pdev) +@@ -1498,17 +1498,21 @@ static int i2c_imx_remove(struct platform_device *pdev) if (i2c_imx->dma) i2c_imx_dma_free(i2c_imx); @@ -137777,7 +164776,7 @@ index 3576b63a6c037..2e4d05040e50e 100644 pm_runtime_put_noidle(&pdev->dev); pm_runtime_disable(&pdev->dev); diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c -index a6187cbec2c94..483428c5e30b9 100644 +index a6187cbec2c94..10cdd501d4c52 100644 --- a/drivers/i2c/busses/i2c-ismt.c +++ b/drivers/i2c/busses/i2c-ismt.c @@ -82,6 +82,7 @@ @@ -137807,7 +164806,17 @@ index a6187cbec2c94..483428c5e30b9 100644 /* Initialize common control bits */ if (likely(pci_dev_msi_enabled(priv->pci_dev))) desc->control = ISMT_DESC_INT | ISMT_DESC_FAIR; -@@ -522,6 +528,9 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr, +@@ -503,6 +509,9 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr, + if (read_write == I2C_SMBUS_WRITE) { + /* Block Write */ + dev_dbg(dev, "I2C_SMBUS_BLOCK_DATA: WRITE\n"); ++ if (data->block[0] < 1 || data->block[0] > I2C_SMBUS_BLOCK_MAX) ++ return -EINVAL; ++ + dma_size = data->block[0] + 1; + dma_direction = DMA_TO_DEVICE; + desc->wr_len_cmd = dma_size; +@@ -522,6 +531,9 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr, case I2C_SMBUS_BLOCK_PROC_CALL: dev_dbg(dev, "I2C_SMBUS_BLOCK_PROC_CALL\n"); @@ -137817,7 +164826,7 @@ index a6187cbec2c94..483428c5e30b9 100644 dma_size = I2C_SMBUS_BLOCK_MAX; desc->tgtaddr_rw = ISMT_DESC_ADDR_RW(addr, 1); desc->wr_len_cmd = data->block[0] + 1; -@@ -708,6 +717,8 @@ static void ismt_hw_init(struct ismt_priv *priv) +@@ -708,6 +720,8 @@ static void ismt_hw_init(struct ismt_priv *priv) /* initialize the Master Descriptor Base Address (MDBA) */ writeq(priv->io_rng_dma, priv->smba + ISMT_MSTR_MDBA); @@ -137826,7 +164835,7 @@ index a6187cbec2c94..483428c5e30b9 100644 /* initialize the Master Control Register (MCTRL) */ writel(ISMT_MCTRL_MEIE, priv->smba + ISMT_MSTR_MCTRL); -@@ -795,6 +806,12 @@ static int ismt_dev_init(struct ismt_priv *priv) +@@ -795,6 +809,12 @@ static int ismt_dev_init(struct ismt_priv *priv) priv->head = 0; init_completion(&priv->cmp); @@ -138269,8 +165278,152 @@ index 45fe4a7fe0c03..901f0fb04fee4 100644 return ret; } +diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c +index 5c8e94b6cdb5a..103a05ecc3d6b 100644 +--- a/drivers/i2c/busses/i2c-mv64xxx.c ++++ b/drivers/i2c/busses/i2c-mv64xxx.c +@@ -150,6 +150,7 @@ struct mv64xxx_i2c_data { + /* Clk div is 2 to the power n, not 2 to the power n + 1 */ + bool clk_n_base_0; + struct i2c_bus_recovery_info rinfo; ++ bool atomic; + }; + + static struct mv64xxx_i2c_regs mv64xxx_i2c_regs_mv64xxx = { +@@ -179,7 +180,10 @@ mv64xxx_i2c_prepare_for_io(struct mv64xxx_i2c_data *drv_data, + u32 dir = 0; + + drv_data->cntl_bits = MV64XXX_I2C_REG_CONTROL_ACK | +- MV64XXX_I2C_REG_CONTROL_INTEN | MV64XXX_I2C_REG_CONTROL_TWSIEN; ++ MV64XXX_I2C_REG_CONTROL_TWSIEN; ++ ++ if (!drv_data->atomic) ++ drv_data->cntl_bits |= MV64XXX_I2C_REG_CONTROL_INTEN; + + if (msg->flags & I2C_M_RD) + dir = 1; +@@ -409,7 +413,8 @@ mv64xxx_i2c_do_action(struct mv64xxx_i2c_data *drv_data) + case MV64XXX_I2C_ACTION_RCV_DATA_STOP: + drv_data->msg->buf[drv_data->byte_posn++] = + readl(drv_data->reg_base + drv_data->reg_offsets.data); +- drv_data->cntl_bits &= ~MV64XXX_I2C_REG_CONTROL_INTEN; ++ if (!drv_data->atomic) ++ drv_data->cntl_bits &= ~MV64XXX_I2C_REG_CONTROL_INTEN; + writel(drv_data->cntl_bits | MV64XXX_I2C_REG_CONTROL_STOP, + drv_data->reg_base + drv_data->reg_offsets.control); + drv_data->block = 0; +@@ -427,7 +432,8 @@ mv64xxx_i2c_do_action(struct mv64xxx_i2c_data *drv_data) + drv_data->rc = -EIO; + fallthrough; + case MV64XXX_I2C_ACTION_SEND_STOP: +- drv_data->cntl_bits &= ~MV64XXX_I2C_REG_CONTROL_INTEN; ++ if (!drv_data->atomic) ++ drv_data->cntl_bits &= ~MV64XXX_I2C_REG_CONTROL_INTEN; + writel(drv_data->cntl_bits | MV64XXX_I2C_REG_CONTROL_STOP, + drv_data->reg_base + drv_data->reg_offsets.control); + drv_data->block = 0; +@@ -575,6 +581,17 @@ mv64xxx_i2c_wait_for_completion(struct mv64xxx_i2c_data *drv_data) + spin_unlock_irqrestore(&drv_data->lock, flags); + } + ++static void mv64xxx_i2c_wait_polling(struct mv64xxx_i2c_data *drv_data) ++{ ++ ktime_t timeout = ktime_add_ms(ktime_get(), drv_data->adapter.timeout); ++ ++ while (READ_ONCE(drv_data->block) && ++ ktime_compare(ktime_get(), timeout) < 0) { ++ udelay(5); ++ mv64xxx_i2c_intr(0, drv_data); ++ } ++} ++ + static int + mv64xxx_i2c_execute_msg(struct mv64xxx_i2c_data *drv_data, struct i2c_msg *msg, + int is_last) +@@ -590,7 +607,11 @@ mv64xxx_i2c_execute_msg(struct mv64xxx_i2c_data *drv_data, struct i2c_msg *msg, + mv64xxx_i2c_send_start(drv_data); + spin_unlock_irqrestore(&drv_data->lock, flags); + +- mv64xxx_i2c_wait_for_completion(drv_data); ++ if (!drv_data->atomic) ++ mv64xxx_i2c_wait_for_completion(drv_data); ++ else ++ mv64xxx_i2c_wait_polling(drv_data); ++ + return drv_data->rc; + } + +@@ -717,7 +738,7 @@ mv64xxx_i2c_functionality(struct i2c_adapter *adap) + } + + static int +-mv64xxx_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) ++mv64xxx_i2c_xfer_core(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) + { + struct mv64xxx_i2c_data *drv_data = i2c_get_adapdata(adap); + int rc, ret = num; +@@ -730,7 +751,7 @@ mv64xxx_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) + drv_data->msgs = msgs; + drv_data->num_msgs = num; + +- if (mv64xxx_i2c_can_offload(drv_data)) ++ if (mv64xxx_i2c_can_offload(drv_data) && !drv_data->atomic) + rc = mv64xxx_i2c_offload_xfer(drv_data); + else + rc = mv64xxx_i2c_execute_msg(drv_data, &msgs[0], num == 1); +@@ -747,8 +768,27 @@ mv64xxx_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) + return ret; + } + ++static int ++mv64xxx_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) ++{ ++ struct mv64xxx_i2c_data *drv_data = i2c_get_adapdata(adap); ++ ++ drv_data->atomic = 0; ++ return mv64xxx_i2c_xfer_core(adap, msgs, num); ++} ++ ++static int mv64xxx_i2c_xfer_atomic(struct i2c_adapter *adap, ++ struct i2c_msg msgs[], int num) ++{ ++ struct mv64xxx_i2c_data *drv_data = i2c_get_adapdata(adap); ++ ++ drv_data->atomic = 1; ++ return mv64xxx_i2c_xfer_core(adap, msgs, num); ++} ++ + static const struct i2c_algorithm mv64xxx_i2c_algo = { + .master_xfer = mv64xxx_i2c_xfer, ++ .master_xfer_atomic = mv64xxx_i2c_xfer_atomic, + .functionality = mv64xxx_i2c_functionality, + }; + +@@ -1047,14 +1087,6 @@ mv64xxx_i2c_remove(struct platform_device *pd) + return 0; + } + +-static void +-mv64xxx_i2c_shutdown(struct platform_device *pd) +-{ +- pm_runtime_disable(&pd->dev); +- if (!pm_runtime_status_suspended(&pd->dev)) +- mv64xxx_i2c_runtime_suspend(&pd->dev); +-} +- + static const struct dev_pm_ops mv64xxx_i2c_pm_ops = { + SET_RUNTIME_PM_OPS(mv64xxx_i2c_runtime_suspend, + mv64xxx_i2c_runtime_resume, NULL) +@@ -1065,7 +1097,6 @@ static const struct dev_pm_ops mv64xxx_i2c_pm_ops = { + static struct platform_driver mv64xxx_i2c_driver = { + .probe = mv64xxx_i2c_probe, + .remove = mv64xxx_i2c_remove, +- .shutdown = mv64xxx_i2c_shutdown, + .driver = { + .name = MV64XXX_I2C_CTLR_NAME, + .pm = &mv64xxx_i2c_pm_ops, diff --git a/drivers/i2c/busses/i2c-mxs.c b/drivers/i2c/busses/i2c-mxs.c -index 864a3f1bd4e14..68f67d084c63a 100644 +index 864a3f1bd4e14..b353732f593b1 100644 --- a/drivers/i2c/busses/i2c-mxs.c +++ b/drivers/i2c/busses/i2c-mxs.c @@ -799,7 +799,7 @@ static int mxs_i2c_probe(struct platform_device *pdev) @@ -138282,8 +165435,19 @@ index 864a3f1bd4e14..68f67d084c63a 100644 i2c->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(i2c->regs)) +@@ -826,8 +826,8 @@ static int mxs_i2c_probe(struct platform_device *pdev) + /* Setup the DMA */ + i2c->dmach = dma_request_chan(dev, "rx-tx"); + if (IS_ERR(i2c->dmach)) { +- dev_err(dev, "Failed to request dma\n"); +- return PTR_ERR(i2c->dmach); ++ return dev_err_probe(dev, PTR_ERR(i2c->dmach), ++ "Failed to request dma\n"); + } + + platform_set_drvdata(pdev, i2c); diff --git a/drivers/i2c/busses/i2c-npcm7xx.c b/drivers/i2c/busses/i2c-npcm7xx.c -index 2ad166355ec9b..31e3d2c9d6bc5 100644 +index 2ad166355ec9b..c1b6797372409 100644 --- a/drivers/i2c/busses/i2c-npcm7xx.c +++ b/drivers/i2c/busses/i2c-npcm7xx.c @@ -123,11 +123,11 @@ enum i2c_addr { @@ -138665,16 +165829,24 @@ index 2ad166355ec9b..31e3d2c9d6bc5 100644 adap->algo = &npcm_i2c_algo; adap->quirks = &npcm_i2c_quirks; adap->algo_data = bus; -@@ -2336,8 +2363,7 @@ static struct platform_driver npcm_i2c_bus_driver = { +@@ -2335,8 +2362,16 @@ static struct platform_driver npcm_i2c_bus_driver = { + static int __init npcm_i2c_init(void) { ++ int ret; ++ npcm_i2c_debugfs_dir = debugfs_create_dir("npcm_i2c", NULL); - platform_driver_register(&npcm_i2c_bus_driver); -- return 0; -+ return platform_driver_register(&npcm_i2c_bus_driver); ++ ++ ret = platform_driver_register(&npcm_i2c_bus_driver); ++ if (ret) { ++ debugfs_remove_recursive(npcm_i2c_debugfs_dir); ++ return ret; ++ } ++ + return 0; } module_init(npcm_i2c_init); - diff --git a/drivers/i2c/busses/i2c-pasemi.c b/drivers/i2c/busses/i2c-pasemi.c index 20f2772c0e79b..2c909522f0f38 100644 --- a/drivers/i2c/busses/i2c-pasemi.c @@ -138693,7 +165865,7 @@ index 20f2772c0e79b..2c909522f0f38 100644 return 0; diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c -index 8c1b31ed0c429..39cb1b7bb8656 100644 +index 8c1b31ed0c429..809fbd014cd68 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -77,6 +77,7 @@ @@ -139013,6 +166185,49 @@ index 8c1b31ed0c429..39cb1b7bb8656 100644 adapdata->smba = smba; adapdata->sb800_main = sb800_main; adapdata->port = port << piix4_port_shift_sb800; +@@ -961,6 +1080,7 @@ static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id) + "", &piix4_main_adapters[0]); + if (retval < 0) + return retval; ++ piix4_adapter_count = 1; + } + + /* Check for auxiliary SMBus on some AMD chipsets */ +diff --git a/drivers/i2c/busses/i2c-pxa-pci.c b/drivers/i2c/busses/i2c-pxa-pci.c +index f614cade432bb..30e38bc8b6db8 100644 +--- a/drivers/i2c/busses/i2c-pxa-pci.c ++++ b/drivers/i2c/busses/i2c-pxa-pci.c +@@ -105,7 +105,7 @@ static int ce4100_i2c_probe(struct pci_dev *dev, + int i; + struct ce4100_devices *sds; + +- ret = pci_enable_device_mem(dev); ++ ret = pcim_enable_device(dev); + if (ret) + return ret; + +@@ -114,10 +114,8 @@ static int ce4100_i2c_probe(struct pci_dev *dev, + return -EINVAL; + } + sds = kzalloc(sizeof(*sds), GFP_KERNEL); +- if (!sds) { +- ret = -ENOMEM; +- goto err_mem; +- } ++ if (!sds) ++ return -ENOMEM; + + for (i = 0; i < ARRAY_SIZE(sds->pdev); i++) { + sds->pdev[i] = add_i2c_device(dev, i); +@@ -133,8 +131,6 @@ static int ce4100_i2c_probe(struct pci_dev *dev, + + err_dev_add: + kfree(sds); +-err_mem: +- pci_disable_device(dev); + return ret; + } + diff --git a/drivers/i2c/busses/i2c-qcom-cci.c b/drivers/i2c/busses/i2c-qcom-cci.c index c1de8eb66169f..2bdb86ab2ea81 100644 --- a/drivers/i2c/busses/i2c-qcom-cci.c @@ -139131,10 +166346,78 @@ index bff9913c37b8b..2c016f0299fce 100644 pm_runtime_disable(dev); return ret; diff --git a/drivers/i2c/busses/i2c-rk3x.c b/drivers/i2c/busses/i2c-rk3x.c -index 819ab4ee517e1..02ddb237f69af 100644 +index 819ab4ee517e1..13c14eb175e94 100644 --- a/drivers/i2c/busses/i2c-rk3x.c +++ b/drivers/i2c/busses/i2c-rk3x.c -@@ -423,8 +423,8 @@ static void rk3x_i2c_handle_read(struct rk3x_i2c *i2c, unsigned int ipd) +@@ -80,7 +80,7 @@ enum { + #define DEFAULT_SCL_RATE (100 * 1000) /* Hz */ + + /** +- * struct i2c_spec_values: ++ * struct i2c_spec_values - I2C specification values for various modes + * @min_hold_start_ns: min hold time (repeated) START condition + * @min_low_ns: min LOW period of the SCL clock + * @min_high_ns: min HIGH period of the SCL cloc +@@ -136,7 +136,7 @@ static const struct i2c_spec_values fast_mode_plus_spec = { + }; + + /** +- * struct rk3x_i2c_calced_timings: ++ * struct rk3x_i2c_calced_timings - calculated V1 timings + * @div_low: Divider output for low + * @div_high: Divider output for high + * @tuning: Used to adjust setup/hold data time, +@@ -159,7 +159,7 @@ enum rk3x_i2c_state { + }; + + /** +- * struct rk3x_i2c_soc_data: ++ * struct rk3x_i2c_soc_data - SOC-specific data + * @grf_offset: offset inside the grf regmap for setting the i2c type + * @calc_timings: Callback function for i2c timing information calculated + */ +@@ -239,7 +239,8 @@ static inline void rk3x_i2c_clean_ipd(struct rk3x_i2c *i2c) + } + + /** +- * Generate a START condition, which triggers a REG_INT_START interrupt. ++ * rk3x_i2c_start - Generate a START condition, which triggers a REG_INT_START interrupt. ++ * @i2c: target controller data + */ + static void rk3x_i2c_start(struct rk3x_i2c *i2c) + { +@@ -258,8 +259,8 @@ static void rk3x_i2c_start(struct rk3x_i2c *i2c) + } + + /** +- * Generate a STOP condition, which triggers a REG_INT_STOP interrupt. +- * ++ * rk3x_i2c_stop - Generate a STOP condition, which triggers a REG_INT_STOP interrupt. ++ * @i2c: target controller data + * @error: Error code to return in rk3x_i2c_xfer + */ + static void rk3x_i2c_stop(struct rk3x_i2c *i2c, int error) +@@ -298,7 +299,8 @@ static void rk3x_i2c_stop(struct rk3x_i2c *i2c, int error) + } + + /** +- * Setup a read according to i2c->msg ++ * rk3x_i2c_prepare_read - Setup a read according to i2c->msg ++ * @i2c: target controller data + */ + static void rk3x_i2c_prepare_read(struct rk3x_i2c *i2c) + { +@@ -329,7 +331,8 @@ static void rk3x_i2c_prepare_read(struct rk3x_i2c *i2c) + } + + /** +- * Fill the transmit buffer with data from i2c->msg ++ * rk3x_i2c_fill_transmit_buf - Fill the transmit buffer with data from i2c->msg ++ * @i2c: target controller data + */ + static void rk3x_i2c_fill_transmit_buf(struct rk3x_i2c *i2c) + { +@@ -423,8 +426,8 @@ static void rk3x_i2c_handle_read(struct rk3x_i2c *i2c, unsigned int ipd) if (!(ipd & REG_INT_MBRF)) return; @@ -139145,6 +166428,72 @@ index 819ab4ee517e1..02ddb237f69af 100644 /* Can only handle a maximum of 32 bytes at a time */ if (len > 32) +@@ -532,11 +535,10 @@ out: + } + + /** +- * Get timing values of I2C specification +- * ++ * rk3x_i2c_get_spec - Get timing values of I2C specification + * @speed: Desired SCL frequency + * +- * Returns: Matched i2c spec values. ++ * Return: Matched i2c_spec_values. + */ + static const struct i2c_spec_values *rk3x_i2c_get_spec(unsigned int speed) + { +@@ -549,13 +551,12 @@ static const struct i2c_spec_values *rk3x_i2c_get_spec(unsigned int speed) + } + + /** +- * Calculate divider values for desired SCL frequency +- * ++ * rk3x_i2c_v0_calc_timings - Calculate divider values for desired SCL frequency + * @clk_rate: I2C input clock rate + * @t: Known I2C timing information + * @t_calc: Caculated rk3x private timings that would be written into regs + * +- * Returns: 0 on success, -EINVAL if the goal SCL rate is too slow. In that case ++ * Return: %0 on success, -%EINVAL if the goal SCL rate is too slow. In that case + * a best-effort divider value is returned in divs. If the target rate is + * too high, we silently use the highest possible rate. + */ +@@ -710,13 +711,12 @@ static int rk3x_i2c_v0_calc_timings(unsigned long clk_rate, + } + + /** +- * Calculate timing values for desired SCL frequency +- * ++ * rk3x_i2c_v1_calc_timings - Calculate timing values for desired SCL frequency + * @clk_rate: I2C input clock rate + * @t: Known I2C timing information + * @t_calc: Caculated rk3x private timings that would be written into regs + * +- * Returns: 0 on success, -EINVAL if the goal SCL rate is too slow. In that case ++ * Return: %0 on success, -%EINVAL if the goal SCL rate is too slow. In that case + * a best-effort divider value is returned in divs. If the target rate is + * too high, we silently use the highest possible rate. + * The following formulas are v1's method to calculate timings. +@@ -960,14 +960,14 @@ static int rk3x_i2c_clk_notifier_cb(struct notifier_block *nb, unsigned long + } + + /** +- * Setup I2C registers for an I2C operation specified by msgs, num. +- * +- * Must be called with i2c->lock held. +- * ++ * rk3x_i2c_setup - Setup I2C registers for an I2C operation specified by msgs, num. ++ * @i2c: target controller data + * @msgs: I2C msgs to process + * @num: Number of msgs + * +- * returns: Number of I2C msgs processed or negative in case of error ++ * Must be called with i2c->lock held. ++ * ++ * Return: Number of I2C msgs processed or negative in case of error + */ + static int rk3x_i2c_setup(struct rk3x_i2c *i2c, struct i2c_msg *msgs, int num) + { diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index b9b19a2a2ffa0..50d5ae81d2271 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c @@ -139234,6 +166583,78 @@ index b9b19a2a2ffa0..50d5ae81d2271 100644 ret = -ETIMEDOUT; goto pm_free; } +diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c +index c883044715f3b..444867cef682f 100644 +--- a/drivers/i2c/busses/i2c-tegra.c ++++ b/drivers/i2c/busses/i2c-tegra.c +@@ -283,6 +283,7 @@ struct tegra_i2c_dev { + struct dma_chan *tx_dma_chan; + struct dma_chan *rx_dma_chan; + unsigned int dma_buf_size; ++ struct device *dma_dev; + dma_addr_t dma_phys; + void *dma_buf; + +@@ -419,7 +420,7 @@ static int tegra_i2c_dma_submit(struct tegra_i2c_dev *i2c_dev, size_t len) + static void tegra_i2c_release_dma(struct tegra_i2c_dev *i2c_dev) + { + if (i2c_dev->dma_buf) { +- dma_free_coherent(i2c_dev->dev, i2c_dev->dma_buf_size, ++ dma_free_coherent(i2c_dev->dma_dev, i2c_dev->dma_buf_size, + i2c_dev->dma_buf, i2c_dev->dma_phys); + i2c_dev->dma_buf = NULL; + } +@@ -466,10 +467,13 @@ static int tegra_i2c_init_dma(struct tegra_i2c_dev *i2c_dev) + + i2c_dev->tx_dma_chan = chan; + ++ WARN_ON(i2c_dev->tx_dma_chan->device != i2c_dev->rx_dma_chan->device); ++ i2c_dev->dma_dev = chan->device->dev; ++ + i2c_dev->dma_buf_size = i2c_dev->hw->quirks->max_write_len + + I2C_PACKET_HEADER_SIZE; + +- dma_buf = dma_alloc_coherent(i2c_dev->dev, i2c_dev->dma_buf_size, ++ dma_buf = dma_alloc_coherent(i2c_dev->dma_dev, i2c_dev->dma_buf_size, + &dma_phys, GFP_KERNEL | __GFP_NOWARN); + if (!dma_buf) { + dev_err(i2c_dev->dev, "failed to allocate DMA buffer\n"); +@@ -1255,7 +1259,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, + + if (i2c_dev->dma_mode) { + if (i2c_dev->msg_read) { +- dma_sync_single_for_device(i2c_dev->dev, ++ dma_sync_single_for_device(i2c_dev->dma_dev, + i2c_dev->dma_phys, + xfer_size, DMA_FROM_DEVICE); + +@@ -1263,7 +1267,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, + if (err) + return err; + } else { +- dma_sync_single_for_cpu(i2c_dev->dev, ++ dma_sync_single_for_cpu(i2c_dev->dma_dev, + i2c_dev->dma_phys, + xfer_size, DMA_TO_DEVICE); + } +@@ -1276,7 +1280,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, + memcpy(i2c_dev->dma_buf + I2C_PACKET_HEADER_SIZE, + msg->buf, msg->len); + +- dma_sync_single_for_device(i2c_dev->dev, ++ dma_sync_single_for_device(i2c_dev->dma_dev, + i2c_dev->dma_phys, + xfer_size, DMA_TO_DEVICE); + +@@ -1327,7 +1331,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, + } + + if (i2c_dev->msg_read && i2c_dev->msg_err == I2C_ERR_NONE) { +- dma_sync_single_for_cpu(i2c_dev->dev, ++ dma_sync_single_for_cpu(i2c_dev->dma_dev, + i2c_dev->dma_phys, + xfer_size, DMA_FROM_DEVICE); + diff --git a/drivers/i2c/busses/i2c-thunderx-pcidrv.c b/drivers/i2c/busses/i2c-thunderx-pcidrv.c index 12c90aa0900e6..a77cd86fe75ed 100644 --- a/drivers/i2c/busses/i2c-thunderx-pcidrv.c @@ -139375,7 +166796,7 @@ index f10a603b13fb0..5cb21d7da05b6 100644 if (ret) return ret; diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c -index bb93db98404ef..612343771ce25 100644 +index bb93db98404ef..34b8da949462a 100644 --- a/drivers/i2c/busses/i2c-xiic.c +++ b/drivers/i2c/busses/i2c-xiic.c @@ -756,7 +756,6 @@ static const struct i2c_adapter_quirks xiic_quirks = { @@ -139395,6 +166816,14 @@ index bb93db98404ef..612343771ce25 100644 mutex_init(&i2c->lock); init_waitqueue_head(&i2c->wait); +@@ -933,6 +934,7 @@ static struct platform_driver xiic_i2c_driver = { + + module_platform_driver(xiic_i2c_driver); + ++MODULE_ALIAS("platform:" DRIVER_NAME); + MODULE_AUTHOR("info@mocean-labs.com"); + MODULE_DESCRIPTION("Xilinx I2C bus driver"); + MODULE_LICENSE("GPL v2"); diff --git a/drivers/i2c/busses/i2c-xlr.c b/drivers/i2c/busses/i2c-xlr.c index 126d1393e548b..9ce20652d4942 100644 --- a/drivers/i2c/busses/i2c-xlr.c @@ -139653,6 +167082,26 @@ index d3acd8d66c323..33024acaac02b 100644 i2c_mux_del_adapters(muxc); err_parent: i2c_put_adapter(parent); +diff --git a/drivers/i2c/muxes/i2c-mux-reg.c b/drivers/i2c/muxes/i2c-mux-reg.c +index 0e0679f65cf77..30a6de1694e07 100644 +--- a/drivers/i2c/muxes/i2c-mux-reg.c ++++ b/drivers/i2c/muxes/i2c-mux-reg.c +@@ -183,13 +183,12 @@ static int i2c_mux_reg_probe(struct platform_device *pdev) + if (!mux->data.reg) { + dev_info(&pdev->dev, + "Register not set, using platform resource\n"); +- res = platform_get_resource(pdev, IORESOURCE_MEM, 0); +- mux->data.reg_size = resource_size(res); +- mux->data.reg = devm_ioremap_resource(&pdev->dev, res); ++ mux->data.reg = devm_platform_get_and_ioremap_resource(pdev, 0, &res); + if (IS_ERR(mux->data.reg)) { + ret = PTR_ERR(mux->data.reg); + goto err_put_parent; + } ++ mux->data.reg_size = resource_size(res); + } + + if (mux->data.reg_size != 4 && mux->data.reg_size != 2 && diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c index c3b4c677b4429..dfe18dcd008d4 100644 --- a/drivers/i3c/master.c @@ -139819,6 +167268,64 @@ index e6c543b5ee1dd..376e631e80d69 100644 if ((disabled_states_mask & BIT(drv->state_count)) || ((icpu->use_acpi || force_use_acpi) && intel_idle_off_by_default(mwait_hint) && +diff --git a/drivers/iio/accel/adis16201.c b/drivers/iio/accel/adis16201.c +index 7a434e2884d43..dfb8e2e5bdf58 100644 +--- a/drivers/iio/accel/adis16201.c ++++ b/drivers/iio/accel/adis16201.c +@@ -300,3 +300,4 @@ MODULE_AUTHOR("Barry Song <21cnbao@gmail.com>"); + MODULE_DESCRIPTION("Analog Devices ADIS16201 Dual-Axis Digital Inclinometer and Accelerometer"); + MODULE_LICENSE("GPL v2"); + MODULE_ALIAS("spi:adis16201"); ++MODULE_IMPORT_NS(IIO_ADISLIB); +diff --git a/drivers/iio/accel/adis16209.c b/drivers/iio/accel/adis16209.c +index ac08e866d6128..5a9c6e2296f1d 100644 +--- a/drivers/iio/accel/adis16209.c ++++ b/drivers/iio/accel/adis16209.c +@@ -310,3 +310,4 @@ MODULE_AUTHOR("Barry Song <21cnbao@gmail.com>"); + MODULE_DESCRIPTION("Analog Devices ADIS16209 Dual-Axis Digital Inclinometer and Accelerometer"); + MODULE_LICENSE("GPL v2"); + MODULE_ALIAS("spi:adis16209"); ++MODULE_IMPORT_NS(IIO_ADISLIB); +diff --git a/drivers/iio/accel/adxl372.c b/drivers/iio/accel/adxl372.c +index fc95924077176..4a358f8c27f36 100644 +--- a/drivers/iio/accel/adxl372.c ++++ b/drivers/iio/accel/adxl372.c +@@ -998,17 +998,30 @@ static ssize_t adxl372_get_fifo_watermark(struct device *dev, + return sprintf(buf, "%d\n", st->watermark); + } + +-static IIO_CONST_ATTR(hwfifo_watermark_min, "1"); +-static IIO_CONST_ATTR(hwfifo_watermark_max, +- __stringify(ADXL372_FIFO_SIZE)); ++static ssize_t hwfifo_watermark_min_show(struct device *dev, ++ struct device_attribute *attr, ++ char *buf) ++{ ++ return sysfs_emit(buf, "%s\n", "1"); ++} ++ ++static ssize_t hwfifo_watermark_max_show(struct device *dev, ++ struct device_attribute *attr, ++ char *buf) ++{ ++ return sysfs_emit(buf, "%s\n", __stringify(ADXL372_FIFO_SIZE)); ++} ++ ++static IIO_DEVICE_ATTR_RO(hwfifo_watermark_min, 0); ++static IIO_DEVICE_ATTR_RO(hwfifo_watermark_max, 0); + static IIO_DEVICE_ATTR(hwfifo_watermark, 0444, + adxl372_get_fifo_watermark, NULL, 0); + static IIO_DEVICE_ATTR(hwfifo_enabled, 0444, + adxl372_get_fifo_enabled, NULL, 0); + + static const struct attribute *adxl372_fifo_attributes[] = { +- &iio_const_attr_hwfifo_watermark_min.dev_attr.attr, +- &iio_const_attr_hwfifo_watermark_max.dev_attr.attr, ++ &iio_dev_attr_hwfifo_watermark_min.dev_attr.attr, ++ &iio_dev_attr_hwfifo_watermark_max.dev_attr.attr, + &iio_dev_attr_hwfifo_watermark.dev_attr.attr, + &iio_dev_attr_hwfifo_enabled.dev_attr.attr, + NULL, diff --git a/drivers/iio/accel/bma180.c b/drivers/iio/accel/bma180.c index 2edfcb4819b7d..3a1f47c7288ff 100644 --- a/drivers/iio/accel/bma180.c @@ -140000,6 +167507,18 @@ index b67572c3ef069..9cbe98c3ba9a2 100644 +extern const struct regmap_config fxls8962af_spi_regmap_conf; #endif /* _FXLS8962AF_H_ */ +diff --git a/drivers/iio/accel/hid-sensor-accel-3d.c b/drivers/iio/accel/hid-sensor-accel-3d.c +index a2def6f9380a3..5eac7ea199931 100644 +--- a/drivers/iio/accel/hid-sensor-accel-3d.c ++++ b/drivers/iio/accel/hid-sensor-accel-3d.c +@@ -280,6 +280,7 @@ static int accel_3d_capture_sample(struct hid_sensor_hub_device *hsdev, + hid_sensor_convert_timestamp( + &accel_state->common_attributes, + *(int64_t *)raw_data); ++ ret = 0; + break; + default: + break; diff --git a/drivers/iio/accel/kxcjk-1013.c b/drivers/iio/accel/kxcjk-1013.c index a51fdd3c9b5b5..594a383169c75 100644 --- a/drivers/iio/accel/kxcjk-1013.c @@ -140394,6 +167913,33 @@ index 069b561ee7689..b8cc94b7dd80a 100644 else return -EIO; +diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c +index 1d652d9b2f5cd..bd5c49571d1ab 100644 +--- a/drivers/iio/adc/ad_sigma_delta.c ++++ b/drivers/iio/adc/ad_sigma_delta.c +@@ -280,10 +280,10 @@ int ad_sigma_delta_single_conversion(struct iio_dev *indio_dev, + unsigned int data_reg; + int ret = 0; + +- if (iio_buffer_enabled(indio_dev)) +- return -EBUSY; ++ ret = iio_device_claim_direct_mode(indio_dev); ++ if (ret) ++ return ret; + +- mutex_lock(&indio_dev->mlock); + ad_sigma_delta_set_channel(sigma_delta, chan->address); + + spi_bus_lock(sigma_delta->spi->master); +@@ -322,7 +322,7 @@ out: + ad_sigma_delta_set_mode(sigma_delta, AD_SD_MODE_IDLE); + sigma_delta->bus_locked = false; + spi_bus_unlock(sigma_delta->spi->master); +- mutex_unlock(&indio_dev->mlock); ++ iio_device_release_direct_mode(indio_dev); + + if (ret) + return ret; diff --git a/drivers/iio/adc/adi-axi-adc.c b/drivers/iio/adc/adi-axi-adc.c index a73e3c2d212fa..a9e655e69eaa2 100644 --- a/drivers/iio/adc/adi-axi-adc.c @@ -140519,6 +168065,22 @@ index ea5ca163d8796..403a29e4dc3e9 100644 vref_disable_resume: regulator_disable(st->vref); +diff --git a/drivers/iio/adc/at91_adc.c b/drivers/iio/adc/at91_adc.c +index 5a7d3a3a5fa82..d61b8ce643a80 100644 +--- a/drivers/iio/adc/at91_adc.c ++++ b/drivers/iio/adc/at91_adc.c +@@ -634,8 +634,10 @@ static struct iio_trigger *at91_adc_allocate_trigger(struct iio_dev *idev, + trig->ops = &at91_adc_trigger_ops; + + ret = iio_trigger_register(trig); +- if (ret) ++ if (ret) { ++ iio_trigger_free(trig); + return NULL; ++ } + + return trig; + } diff --git a/drivers/iio/adc/axp20x_adc.c b/drivers/iio/adc/axp20x_adc.c index 3e0c0233b4315..df99f1365c398 100644 --- a/drivers/iio/adc/axp20x_adc.c @@ -140575,6 +168137,22 @@ index 5f5e8b39e4d22..84dbe9e2f0eff 100644 {} }; +diff --git a/drivers/iio/adc/berlin2-adc.c b/drivers/iio/adc/berlin2-adc.c +index 8b04b95b7b7ae..fa2c87946e16f 100644 +--- a/drivers/iio/adc/berlin2-adc.c ++++ b/drivers/iio/adc/berlin2-adc.c +@@ -289,8 +289,10 @@ static int berlin2_adc_probe(struct platform_device *pdev) + int ret; + + indio_dev = devm_iio_device_alloc(&pdev->dev, sizeof(*priv)); +- if (!indio_dev) ++ if (!indio_dev) { ++ of_node_put(parent_np); + return -ENOMEM; ++ } + + priv = iio_priv(indio_dev); + platform_set_drvdata(pdev, indio_dev); diff --git a/drivers/iio/adc/dln2-adc.c b/drivers/iio/adc/dln2-adc.c index 16407664182ce..97d162a3cba4e 100644 --- a/drivers/iio/adc/dln2-adc.c @@ -140739,6 +168317,29 @@ index 42ea8bc7e7805..adc5ceaef8c93 100644 err: mcb_release_mem(mem); return -ENXIO; +diff --git a/drivers/iio/adc/mp2629_adc.c b/drivers/iio/adc/mp2629_adc.c +index aca084f1e78a5..79d8fd79b0549 100644 +--- a/drivers/iio/adc/mp2629_adc.c ++++ b/drivers/iio/adc/mp2629_adc.c +@@ -56,7 +56,8 @@ static struct iio_map mp2629_adc_maps[] = { + MP2629_MAP(SYSTEM_VOLT, "system-volt"), + MP2629_MAP(INPUT_VOLT, "input-volt"), + MP2629_MAP(BATT_CURRENT, "batt-current"), +- MP2629_MAP(INPUT_CURRENT, "input-current") ++ MP2629_MAP(INPUT_CURRENT, "input-current"), ++ { } + }; + + static int mp2629_read_raw(struct iio_dev *indio_dev, +@@ -73,7 +74,7 @@ static int mp2629_read_raw(struct iio_dev *indio_dev, + if (ret) + return ret; + +- if (chan->address == MP2629_INPUT_VOLT) ++ if (chan->channel == MP2629_INPUT_VOLT) + rval &= GENMASK(6, 0); + *val = rval; + return IIO_VAL_INT; diff --git a/drivers/iio/adc/rzg2l_adc.c b/drivers/iio/adc/rzg2l_adc.c index 32fbf57c362fa..2fa41b90bcfa9 100644 --- a/drivers/iio/adc/rzg2l_adc.c @@ -140909,6 +168510,18 @@ index 5088de835bb15..ef5b54ed96614 100644 if (status & regs->isr_ovr.mask) { /* +diff --git a/drivers/iio/adc/stm32-dfsdm-adc.c b/drivers/iio/adc/stm32-dfsdm-adc.c +index 1cfefb3b5e56c..6592221cbe21d 100644 +--- a/drivers/iio/adc/stm32-dfsdm-adc.c ++++ b/drivers/iio/adc/stm32-dfsdm-adc.c +@@ -1521,6 +1521,7 @@ static const struct of_device_id stm32_dfsdm_adc_match[] = { + }, + {} + }; ++MODULE_DEVICE_TABLE(of, stm32_dfsdm_adc_match); + + static int stm32_dfsdm_adc_probe(struct platform_device *pdev) + { diff --git a/drivers/iio/adc/stmpe-adc.c b/drivers/iio/adc/stmpe-adc.c index fba659bfdb40a..64305d9fa5602 100644 --- a/drivers/iio/adc/stmpe-adc.c @@ -141009,6 +168622,31 @@ index 16fc608db36a5..bd48b073e7200 100644 }, .probe = adc081c_probe, .id_table = adc081c_id, +diff --git a/drivers/iio/adc/ti-adc128s052.c b/drivers/iio/adc/ti-adc128s052.c +index 83c1ae07b3e9a..8618ae7bc0671 100644 +--- a/drivers/iio/adc/ti-adc128s052.c ++++ b/drivers/iio/adc/ti-adc128s052.c +@@ -193,13 +193,13 @@ static int adc128_remove(struct spi_device *spi) + } + + static const struct of_device_id adc128_of_match[] = { +- { .compatible = "ti,adc128s052", }, +- { .compatible = "ti,adc122s021", }, +- { .compatible = "ti,adc122s051", }, +- { .compatible = "ti,adc122s101", }, +- { .compatible = "ti,adc124s021", }, +- { .compatible = "ti,adc124s051", }, +- { .compatible = "ti,adc124s101", }, ++ { .compatible = "ti,adc128s052", .data = (void*)0L, }, ++ { .compatible = "ti,adc122s021", .data = (void*)1L, }, ++ { .compatible = "ti,adc122s051", .data = (void*)1L, }, ++ { .compatible = "ti,adc122s101", .data = (void*)1L, }, ++ { .compatible = "ti,adc124s021", .data = (void*)2L, }, ++ { .compatible = "ti,adc124s051", .data = (void*)2L, }, ++ { .compatible = "ti,adc124s101", .data = (void*)2L, }, + { /* sentinel */ }, + }; + MODULE_DEVICE_TABLE(of, adc128_of_match); diff --git a/drivers/iio/adc/ti-ads131e08.c b/drivers/iio/adc/ti-ads131e08.c index 0c2025a225750..80a09817c1194 100644 --- a/drivers/iio/adc/ti-ads131e08.c @@ -141083,10 +168721,29 @@ index 170950d5dd499..e8fc4d01f30b6 100644 priv->tx = devm_kzalloc(&priv->spi->dev, size, GFP_KERNEL); diff --git a/drivers/iio/adc/twl6030-gpadc.c b/drivers/iio/adc/twl6030-gpadc.c -index c6416ad795ca4..256177b15c511 100644 +index c6416ad795ca4..024bdc1ef77e6 100644 --- a/drivers/iio/adc/twl6030-gpadc.c +++ b/drivers/iio/adc/twl6030-gpadc.c -@@ -911,6 +911,8 @@ static int twl6030_gpadc_probe(struct platform_device *pdev) +@@ -57,6 +57,18 @@ + #define TWL6030_GPADCS BIT(1) + #define TWL6030_GPADCR BIT(0) + ++#define USB_VBUS_CTRL_SET 0x04 ++#define USB_ID_CTRL_SET 0x06 ++ ++#define TWL6030_MISC1 0xE4 ++#define VBUS_MEAS 0x01 ++#define ID_MEAS 0x01 ++ ++#define VAC_MEAS 0x04 ++#define VBAT_MEAS 0x02 ++#define BB_MEAS 0x01 ++ ++ + /** + * struct twl6030_chnl_calib - channel calibration + * @gain: slope coefficient for ideal curve +@@ -911,6 +923,8 @@ static int twl6030_gpadc_probe(struct platform_device *pdev) ret = devm_request_threaded_irq(dev, irq, NULL, twl6030_gpadc_irq_handler, IRQF_ONESHOT, "twl6030_gpadc", indio_dev); @@ -141095,6 +168752,33 @@ index c6416ad795ca4..256177b15c511 100644 ret = twl6030_gpadc_enable_irq(TWL6030_GPADC_RT_SW1_EOC_MASK); if (ret < 0) { +@@ -925,6 +939,26 @@ static int twl6030_gpadc_probe(struct platform_device *pdev) + return ret; + } + ++ ret = twl_i2c_write_u8(TWL_MODULE_USB, VBUS_MEAS, USB_VBUS_CTRL_SET); ++ if (ret < 0) { ++ dev_err(dev, "failed to wire up inputs\n"); ++ return ret; ++ } ++ ++ ret = twl_i2c_write_u8(TWL_MODULE_USB, ID_MEAS, USB_ID_CTRL_SET); ++ if (ret < 0) { ++ dev_err(dev, "failed to wire up inputs\n"); ++ return ret; ++ } ++ ++ ret = twl_i2c_write_u8(TWL6030_MODULE_ID0, ++ VBAT_MEAS | BB_MEAS | VAC_MEAS, ++ TWL6030_MISC1); ++ if (ret < 0) { ++ dev_err(dev, "failed to wire up inputs\n"); ++ return ret; ++ } ++ + indio_dev->name = DRIVER_NAME; + indio_dev->info = &twl6030_gpadc_iio_info; + indio_dev->modes = INDIO_DIRECT_MODE; diff --git a/drivers/iio/afe/iio-rescale.c b/drivers/iio/afe/iio-rescale.c index 774eb3044edd8..cc28713b0dc8b 100644 --- a/drivers/iio/afe/iio-rescale.c @@ -141585,6 +169269,24 @@ index c0b7ef9007354..c24f609c2ade6 100644 return ERR_PTR(ret); } +diff --git a/drivers/iio/gyro/adis16136.c b/drivers/iio/gyro/adis16136.c +index 36879f01e28ca..71295709f2b96 100644 +--- a/drivers/iio/gyro/adis16136.c ++++ b/drivers/iio/gyro/adis16136.c +@@ -591,3 +591,4 @@ module_spi_driver(adis16136_driver); + MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>"); + MODULE_DESCRIPTION("Analog Devices ADIS16133/ADIS16135/ADIS16136 gyroscope driver"); + MODULE_LICENSE("GPL v2"); ++MODULE_IMPORT_NS(IIO_ADISLIB); +diff --git a/drivers/iio/gyro/adis16260.c b/drivers/iio/gyro/adis16260.c +index 66b6b7bd5e1bc..eaf57bd339edd 100644 +--- a/drivers/iio/gyro/adis16260.c ++++ b/drivers/iio/gyro/adis16260.c +@@ -433,3 +433,4 @@ module_spi_driver(adis16260_driver); + MODULE_AUTHOR("Barry Song <21cnbao@gmail.com>"); + MODULE_DESCRIPTION("Analog Devices ADIS16260/5 Digital Gyroscope Sensor"); + MODULE_LICENSE("GPL v2"); ++MODULE_IMPORT_NS(IIO_ADISLIB); diff --git a/drivers/iio/gyro/adxrs290.c b/drivers/iio/gyro/adxrs290.c index 3e0734ddafe36..600e9725da788 100644 --- a/drivers/iio/gyro/adxrs290.c @@ -141635,6 +169337,18 @@ index 17b939a367ad0..81a6d09788bd7 100644 err_buffer_cleanup: iio_triggered_buffer_cleanup(indio_dev); err_trigger_unregister: +diff --git a/drivers/iio/gyro/hid-sensor-gyro-3d.c b/drivers/iio/gyro/hid-sensor-gyro-3d.c +index 8f0ad022c7f1b..698c50da1f109 100644 +--- a/drivers/iio/gyro/hid-sensor-gyro-3d.c ++++ b/drivers/iio/gyro/hid-sensor-gyro-3d.c +@@ -231,6 +231,7 @@ static int gyro_3d_capture_sample(struct hid_sensor_hub_device *hsdev, + gyro_state->timestamp = + hid_sensor_convert_timestamp(&gyro_state->common_attributes, + *(s64 *)raw_data); ++ ret = 0; + break; + default: + break; diff --git a/drivers/iio/gyro/itg3200_buffer.c b/drivers/iio/gyro/itg3200_buffer.c index 04dd6a7969ea7..4cfa0d4395605 100644 --- a/drivers/iio/gyro/itg3200_buffer.c @@ -141696,6 +169410,94 @@ index 41d835493347c..9d8916871b4bf 100644 return 0; } +diff --git a/drivers/iio/health/afe4403.c b/drivers/iio/health/afe4403.c +index d4921385aaf7d..b5f959bba4229 100644 +--- a/drivers/iio/health/afe4403.c ++++ b/drivers/iio/health/afe4403.c +@@ -245,14 +245,14 @@ static int afe4403_read_raw(struct iio_dev *indio_dev, + int *val, int *val2, long mask) + { + struct afe4403_data *afe = iio_priv(indio_dev); +- unsigned int reg = afe4403_channel_values[chan->address]; +- unsigned int field = afe4403_channel_leds[chan->address]; ++ unsigned int reg, field; + int ret; + + switch (chan->type) { + case IIO_INTENSITY: + switch (mask) { + case IIO_CHAN_INFO_RAW: ++ reg = afe4403_channel_values[chan->address]; + ret = afe4403_read(afe, reg, val); + if (ret) + return ret; +@@ -262,6 +262,7 @@ static int afe4403_read_raw(struct iio_dev *indio_dev, + case IIO_CURRENT: + switch (mask) { + case IIO_CHAN_INFO_RAW: ++ field = afe4403_channel_leds[chan->address]; + ret = regmap_field_read(afe->fields[field], val); + if (ret) + return ret; +diff --git a/drivers/iio/health/afe4404.c b/drivers/iio/health/afe4404.c +index d8a27dfe074a3..70f0f6f6351cd 100644 +--- a/drivers/iio/health/afe4404.c ++++ b/drivers/iio/health/afe4404.c +@@ -250,20 +250,20 @@ static int afe4404_read_raw(struct iio_dev *indio_dev, + int *val, int *val2, long mask) + { + struct afe4404_data *afe = iio_priv(indio_dev); +- unsigned int value_reg = afe4404_channel_values[chan->address]; +- unsigned int led_field = afe4404_channel_leds[chan->address]; +- unsigned int offdac_field = afe4404_channel_offdacs[chan->address]; ++ unsigned int value_reg, led_field, offdac_field; + int ret; + + switch (chan->type) { + case IIO_INTENSITY: + switch (mask) { + case IIO_CHAN_INFO_RAW: ++ value_reg = afe4404_channel_values[chan->address]; + ret = regmap_read(afe->regmap, value_reg, val); + if (ret) + return ret; + return IIO_VAL_INT; + case IIO_CHAN_INFO_OFFSET: ++ offdac_field = afe4404_channel_offdacs[chan->address]; + ret = regmap_field_read(afe->fields[offdac_field], val); + if (ret) + return ret; +@@ -273,6 +273,7 @@ static int afe4404_read_raw(struct iio_dev *indio_dev, + case IIO_CURRENT: + switch (mask) { + case IIO_CHAN_INFO_RAW: ++ led_field = afe4404_channel_leds[chan->address]; + ret = regmap_field_read(afe->fields[led_field], val); + if (ret) + return ret; +@@ -295,19 +296,20 @@ static int afe4404_write_raw(struct iio_dev *indio_dev, + int val, int val2, long mask) + { + struct afe4404_data *afe = iio_priv(indio_dev); +- unsigned int led_field = afe4404_channel_leds[chan->address]; +- unsigned int offdac_field = afe4404_channel_offdacs[chan->address]; ++ unsigned int led_field, offdac_field; + + switch (chan->type) { + case IIO_INTENSITY: + switch (mask) { + case IIO_CHAN_INFO_OFFSET: ++ offdac_field = afe4404_channel_offdacs[chan->address]; + return regmap_field_write(afe->fields[offdac_field], val); + } + break; + case IIO_CURRENT: + switch (mask) { + case IIO_CHAN_INFO_RAW: ++ led_field = afe4404_channel_leds[chan->address]; + return regmap_field_write(afe->fields[led_field], val); + } + break; diff --git a/drivers/iio/humidity/hts221_buffer.c b/drivers/iio/humidity/hts221_buffer.c index f29692b9d2db0..66b32413cf5e2 100644 --- a/drivers/iio/humidity/hts221_buffer.c @@ -141715,29 +169517,321 @@ index f29692b9d2db0..66b32413cf5e2 100644 static int hts221_buffer_preenable(struct iio_dev *iio_dev) diff --git a/drivers/iio/imu/adis.c b/drivers/iio/imu/adis.c -index b9a06ca29beec..d4e692b187cda 100644 +index b9a06ca29beec..bc40240b29e26 100644 --- a/drivers/iio/imu/adis.c +++ b/drivers/iio/imu/adis.c -@@ -430,6 +430,8 @@ int __adis_initial_startup(struct adis *adis) +@@ -30,8 +30,8 @@ + * @value: The value to write to device (up to 4 bytes) + * @size: The size of the @value (in bytes) + */ +-int __adis_write_reg(struct adis *adis, unsigned int reg, +- unsigned int value, unsigned int size) ++int __adis_write_reg(struct adis *adis, unsigned int reg, unsigned int value, ++ unsigned int size) + { + unsigned int page = reg / ADIS_PAGE_SIZE; + int ret, i; +@@ -114,14 +114,14 @@ int __adis_write_reg(struct adis *adis, unsigned int reg, + ret = spi_sync(adis->spi, &msg); + if (ret) { + dev_err(&adis->spi->dev, "Failed to write register 0x%02X: %d\n", +- reg, ret); ++ reg, ret); + } else { + adis->current_page = page; + } + + return ret; + } +-EXPORT_SYMBOL_GPL(__adis_write_reg); ++EXPORT_SYMBOL_NS_GPL(__adis_write_reg, IIO_ADISLIB); + + /** + * __adis_read_reg() - read N bytes from register (unlocked version) +@@ -130,8 +130,8 @@ EXPORT_SYMBOL_GPL(__adis_write_reg); + * @val: The value read back from the device + * @size: The size of the @val buffer + */ +-int __adis_read_reg(struct adis *adis, unsigned int reg, +- unsigned int *val, unsigned int size) ++int __adis_read_reg(struct adis *adis, unsigned int reg, unsigned int *val, ++ unsigned int size) + { + unsigned int page = reg / ADIS_PAGE_SIZE; + struct spi_message msg; +@@ -201,12 +201,12 @@ int __adis_read_reg(struct adis *adis, unsigned int reg, + ret = spi_sync(adis->spi, &msg); + if (ret) { + dev_err(&adis->spi->dev, "Failed to read register 0x%02X: %d\n", +- reg, ret); ++ reg, ret); + return ret; +- } else { +- adis->current_page = page; + } + ++ adis->current_page = page; ++ + switch (size) { + case 4: + *val = get_unaligned_be32(adis->rx); +@@ -218,7 +218,7 @@ int __adis_read_reg(struct adis *adis, unsigned int reg, + + return ret; + } +-EXPORT_SYMBOL_GPL(__adis_read_reg); ++EXPORT_SYMBOL_NS_GPL(__adis_read_reg, IIO_ADISLIB); + /** + * __adis_update_bits_base() - ADIS Update bits function - Unlocked version + * @adis: The adis device +@@ -243,17 +243,17 @@ int __adis_update_bits_base(struct adis *adis, unsigned int reg, const u32 mask, + + return __adis_write_reg(adis, reg, __val, size); + } +-EXPORT_SYMBOL_GPL(__adis_update_bits_base); ++EXPORT_SYMBOL_NS_GPL(__adis_update_bits_base, IIO_ADISLIB); + + #ifdef CONFIG_DEBUG_FS + +-int adis_debugfs_reg_access(struct iio_dev *indio_dev, +- unsigned int reg, unsigned int writeval, unsigned int *readval) ++int adis_debugfs_reg_access(struct iio_dev *indio_dev, unsigned int reg, ++ unsigned int writeval, unsigned int *readval) + { + struct adis *adis = iio_device_get_drvdata(indio_dev); + + if (readval) { +- uint16_t val16; ++ u16 val16; + int ret; + + ret = adis_read_reg_16(adis, reg, &val16); +@@ -261,36 +261,41 @@ int adis_debugfs_reg_access(struct iio_dev *indio_dev, + *readval = val16; + + return ret; +- } else { +- return adis_write_reg_16(adis, reg, writeval); + } ++ ++ return adis_write_reg_16(adis, reg, writeval); + } +-EXPORT_SYMBOL(adis_debugfs_reg_access); ++EXPORT_SYMBOL_NS(adis_debugfs_reg_access, IIO_ADISLIB); + + #endif + + /** +- * adis_enable_irq() - Enable or disable data ready IRQ ++ * __adis_enable_irq() - Enable or disable data ready IRQ (unlocked) + * @adis: The adis device + * @enable: Whether to enable the IRQ + * + * Returns 0 on success, negative error code otherwise + */ +-int adis_enable_irq(struct adis *adis, bool enable) ++int __adis_enable_irq(struct adis *adis, bool enable) + { +- int ret = 0; +- uint16_t msc; ++ int ret; ++ u16 msc; + +- mutex_lock(&adis->state_lock); ++ if (adis->data->enable_irq) ++ return adis->data->enable_irq(adis, enable); ++ ++ if (adis->data->unmasked_drdy) { ++ if (enable) ++ enable_irq(adis->spi->irq); ++ else ++ disable_irq(adis->spi->irq); + +- if (adis->data->enable_irq) { +- ret = adis->data->enable_irq(adis, enable); +- goto out_unlock; ++ return 0; + } + + ret = __adis_read_reg_16(adis, adis->data->msc_ctrl_reg, &msc); + if (ret) +- goto out_unlock; ++ return ret; + + msc |= ADIS_MSC_CTRL_DATA_RDY_POL_HIGH; + msc &= ~ADIS_MSC_CTRL_DATA_RDY_DIO2; +@@ -299,13 +304,9 @@ int adis_enable_irq(struct adis *adis, bool enable) + else + msc &= ~ADIS_MSC_CTRL_DATA_RDY_EN; + +- ret = __adis_write_reg_16(adis, adis->data->msc_ctrl_reg, msc); +- +-out_unlock: +- mutex_unlock(&adis->state_lock); +- return ret; ++ return __adis_write_reg_16(adis, adis->data->msc_ctrl_reg, msc); + } +-EXPORT_SYMBOL(adis_enable_irq); ++EXPORT_SYMBOL_NS(__adis_enable_irq, IIO_ADISLIB); + + /** + * __adis_check_status() - Check the device for error conditions (unlocked) +@@ -315,7 +316,7 @@ EXPORT_SYMBOL(adis_enable_irq); + */ + int __adis_check_status(struct adis *adis) + { +- uint16_t status; ++ u16 status; + int ret; + int i; + +@@ -337,7 +338,7 @@ int __adis_check_status(struct adis *adis) + + return -EIO; + } +-EXPORT_SYMBOL_GPL(__adis_check_status); ++EXPORT_SYMBOL_NS_GPL(__adis_check_status, IIO_ADISLIB); + + /** + * __adis_reset() - Reset the device (unlocked version) +@@ -351,7 +352,7 @@ int __adis_reset(struct adis *adis) + const struct adis_timeout *timeouts = adis->data->timeouts; + + ret = __adis_write_reg_8(adis, adis->data->glob_cmd_reg, +- ADIS_GLOB_CMD_SW_RESET); ++ ADIS_GLOB_CMD_SW_RESET); + if (ret) { + dev_err(&adis->spi->dev, "Failed to reset device: %d\n", ret); + return ret; +@@ -361,7 +362,7 @@ int __adis_reset(struct adis *adis) + + return 0; + } +-EXPORT_SYMBOL_GPL(__adis_reset); ++EXPORT_SYMBOL_NS_GPL(__adis_reset, IIO_ADIS_LIB); + + static int adis_self_test(struct adis *adis) + { +@@ -407,7 +408,7 @@ int __adis_initial_startup(struct adis *adis) + { + const struct adis_timeout *timeouts = adis->data->timeouts; + struct gpio_desc *gpio; +- uint16_t prod_id; ++ u16 prod_id; + int ret; + + /* check if the device has rst pin low */ +@@ -416,7 +417,7 @@ int __adis_initial_startup(struct adis *adis) + return PTR_ERR(gpio); + + if (gpio) { +- msleep(10); ++ usleep_range(10, 12); + /* bring device out of reset */ + gpiod_set_value_cansleep(gpio, 0); + msleep(timeouts->reset_ms); +@@ -430,6 +431,14 @@ int __adis_initial_startup(struct adis *adis) if (ret) return ret; -+ adis_enable_irq(adis, false); ++ /* ++ * don't bother calling this if we can't unmask the IRQ as in this case ++ * the IRQ is most likely not yet requested and we will request it ++ * with 'IRQF_NO_AUTOEN' anyways. ++ */ ++ if (!adis->data->unmasked_drdy) ++ __adis_enable_irq(adis, false); + if (!adis->data->prod_id_reg) return 0; -@@ -526,7 +528,7 @@ int adis_init(struct adis *adis, struct iio_dev *indio_dev, +@@ -444,7 +453,7 @@ int __adis_initial_startup(struct adis *adis) + + return 0; + } +-EXPORT_SYMBOL_GPL(__adis_initial_startup); ++EXPORT_SYMBOL_NS_GPL(__adis_initial_startup, IIO_ADISLIB); + + /** + * adis_single_conversion() - Performs a single sample conversion +@@ -462,7 +471,8 @@ EXPORT_SYMBOL_GPL(__adis_initial_startup); + * a error bit in the channels raw value set error_mask to 0. + */ + int adis_single_conversion(struct iio_dev *indio_dev, +- const struct iio_chan_spec *chan, unsigned int error_mask, int *val) ++ const struct iio_chan_spec *chan, ++ unsigned int error_mask, int *val) + { + struct adis *adis = iio_device_get_drvdata(indio_dev); + unsigned int uval; +@@ -471,7 +481,7 @@ int adis_single_conversion(struct iio_dev *indio_dev, + mutex_lock(&adis->state_lock); + + ret = __adis_read_reg(adis, chan->address, &uval, +- chan->scan_type.storagebits / 8); ++ chan->scan_type.storagebits / 8); + if (ret) + goto err_unlock; + +@@ -491,7 +501,7 @@ err_unlock: + mutex_unlock(&adis->state_lock); + return ret; + } +-EXPORT_SYMBOL_GPL(adis_single_conversion); ++EXPORT_SYMBOL_NS_GPL(adis_single_conversion, IIO_ADISLIB); + + /** + * adis_init() - Initialize adis device structure +@@ -506,7 +516,7 @@ EXPORT_SYMBOL_GPL(adis_single_conversion); + * called. + */ + int adis_init(struct adis *adis, struct iio_dev *indio_dev, +- struct spi_device *spi, const struct adis_data *data) ++ struct spi_device *spi, const struct adis_data *data) + { + if (!data || !data->timeouts) { + dev_err(&spi->dev, "No config data or timeouts not defined!\n"); +@@ -526,9 +536,9 @@ int adis_init(struct adis *adis, struct iio_dev *indio_dev, adis->current_page = 0; } - return adis_enable_irq(adis, false); + return 0; } - EXPORT_SYMBOL_GPL(adis_init); +-EXPORT_SYMBOL_GPL(adis_init); ++EXPORT_SYMBOL_NS_GPL(adis_init, IIO_ADISLIB); + MODULE_LICENSE("GPL"); + MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>"); +diff --git a/drivers/iio/imu/adis16400.c b/drivers/iio/imu/adis16400.c +index b12917a7cb602..9bcd9a9261b92 100644 +--- a/drivers/iio/imu/adis16400.c ++++ b/drivers/iio/imu/adis16400.c +@@ -1230,3 +1230,4 @@ module_spi_driver(adis16400_driver); + MODULE_AUTHOR("Manuel Stahl <manuel.stahl@iis.fraunhofer.de>"); + MODULE_DESCRIPTION("Analog Devices ADIS16400/5 IMU SPI driver"); + MODULE_LICENSE("GPL v2"); ++MODULE_IMPORT_NS(IIO_ADISLIB); +diff --git a/drivers/iio/imu/adis16460.c b/drivers/iio/imu/adis16460.c +index a6f9fba3e03f4..40fc0e582a9fd 100644 +--- a/drivers/iio/imu/adis16460.c ++++ b/drivers/iio/imu/adis16460.c +@@ -444,3 +444,4 @@ module_spi_driver(adis16460_driver); + MODULE_AUTHOR("Dragos Bogdan <dragos.bogdan@analog.com>"); + MODULE_DESCRIPTION("Analog Devices ADIS16460 IMU driver"); + MODULE_LICENSE("GPL"); ++MODULE_IMPORT_NS(IIO_ADISLIB); +diff --git a/drivers/iio/imu/adis16475.c b/drivers/iio/imu/adis16475.c +index 287fff39a927a..9d28534db3b08 100644 +--- a/drivers/iio/imu/adis16475.c ++++ b/drivers/iio/imu/adis16475.c +@@ -1382,3 +1382,4 @@ module_spi_driver(adis16475_driver); + MODULE_AUTHOR("Nuno Sa <nuno.sa@analog.com>"); + MODULE_DESCRIPTION("Analog Devices ADIS16475 IMU driver"); + MODULE_LICENSE("GPL"); ++MODULE_IMPORT_NS(IIO_ADISLIB); diff --git a/drivers/iio/imu/adis16480.c b/drivers/iio/imu/adis16480.c -index ed129321a14da..f9b4540db1f43 100644 +index ed129321a14da..44bbe3d199073 100644 --- a/drivers/iio/imu/adis16480.c +++ b/drivers/iio/imu/adis16480.c @@ -1403,6 +1403,7 @@ static int adis16480_probe(struct spi_device *spi) @@ -141762,6 +169856,90 @@ index ed129321a14da..f9b4540db1f43 100644 if (ret) return ret; +@@ -1533,3 +1538,4 @@ module_spi_driver(adis16480_driver); + MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>"); + MODULE_DESCRIPTION("Analog Devices ADIS16480 IMU driver"); + MODULE_LICENSE("GPL v2"); ++MODULE_IMPORT_NS(IIO_ADISLIB); +diff --git a/drivers/iio/imu/adis_buffer.c b/drivers/iio/imu/adis_buffer.c +index 351c303c8a8c0..928933027ae34 100644 +--- a/drivers/iio/imu/adis_buffer.c ++++ b/drivers/iio/imu/adis_buffer.c +@@ -20,7 +20,7 @@ + #include <linux/iio/imu/adis.h> + + static int adis_update_scan_mode_burst(struct iio_dev *indio_dev, +- const unsigned long *scan_mask) ++ const unsigned long *scan_mask) + { + struct adis *adis = iio_device_get_drvdata(indio_dev); + unsigned int burst_length, burst_max_length; +@@ -67,7 +67,7 @@ static int adis_update_scan_mode_burst(struct iio_dev *indio_dev, + } + + int adis_update_scan_mode(struct iio_dev *indio_dev, +- const unsigned long *scan_mask) ++ const unsigned long *scan_mask) + { + struct adis *adis = iio_device_get_drvdata(indio_dev); + const struct iio_chan_spec *chan; +@@ -124,7 +124,7 @@ int adis_update_scan_mode(struct iio_dev *indio_dev, + + return 0; + } +-EXPORT_SYMBOL_GPL(adis_update_scan_mode); ++EXPORT_SYMBOL_NS_GPL(adis_update_scan_mode, IIO_ADISLIB); + + static irqreturn_t adis_trigger_handler(int irq, void *p) + { +@@ -158,7 +158,7 @@ static irqreturn_t adis_trigger_handler(int irq, void *p) + } + + iio_push_to_buffers_with_timestamp(indio_dev, adis->buffer, +- pf->timestamp); ++ pf->timestamp); + + irq_done: + iio_trigger_notify_done(indio_dev->trig); +@@ -212,5 +212,5 @@ devm_adis_setup_buffer_and_trigger(struct adis *adis, struct iio_dev *indio_dev, + return devm_add_action_or_reset(&adis->spi->dev, adis_buffer_cleanup, + adis); + } +-EXPORT_SYMBOL_GPL(devm_adis_setup_buffer_and_trigger); ++EXPORT_SYMBOL_NS_GPL(devm_adis_setup_buffer_and_trigger, IIO_ADISLIB); + +diff --git a/drivers/iio/imu/adis_trigger.c b/drivers/iio/imu/adis_trigger.c +index 48eedc29b28a8..f890bf842db86 100644 +--- a/drivers/iio/imu/adis_trigger.c ++++ b/drivers/iio/imu/adis_trigger.c +@@ -15,8 +15,7 @@ + #include <linux/iio/trigger.h> + #include <linux/iio/imu/adis.h> + +-static int adis_data_rdy_trigger_set_state(struct iio_trigger *trig, +- bool state) ++static int adis_data_rdy_trigger_set_state(struct iio_trigger *trig, bool state) + { + struct adis *adis = iio_trigger_get_drvdata(trig); + +@@ -30,6 +29,10 @@ static const struct iio_trigger_ops adis_trigger_ops = { + static int adis_validate_irq_flag(struct adis *adis) + { + unsigned long direction = adis->irq_flag & IRQF_TRIGGER_MASK; ++ ++ /* We cannot mask the interrupt so ensure it's not enabled at request */ ++ if (adis->data->unmasked_drdy) ++ adis->irq_flag |= IRQF_NO_AUTOEN; + /* + * Typically this devices have data ready either on the rising edge or + * on the falling edge of the data ready pin. This checks enforces that +@@ -84,5 +87,5 @@ int devm_adis_probe_trigger(struct adis *adis, struct iio_dev *indio_dev) + + return devm_iio_trigger_register(&adis->spi->dev, adis->trig); + } +-EXPORT_SYMBOL_GPL(devm_adis_probe_trigger); ++EXPORT_SYMBOL_NS_GPL(devm_adis_probe_trigger, IIO_ADISLIB); + diff --git a/drivers/iio/imu/bmi160/bmi160_core.c b/drivers/iio/imu/bmi160/bmi160_core.c index 824b5124a5f55..01336105792ee 100644 --- a/drivers/iio/imu/bmi160/bmi160_core.c @@ -141818,6 +169996,222 @@ index 824b5124a5f55..01336105792ee 100644 } static int bmi160_data_rdy_trigger_set_state(struct iio_trigger *trig, +diff --git a/drivers/iio/imu/fxos8700_core.c b/drivers/iio/imu/fxos8700_core.c +index ab288186f36e4..04d3778fcc153 100644 +--- a/drivers/iio/imu/fxos8700_core.c ++++ b/drivers/iio/imu/fxos8700_core.c +@@ -10,6 +10,7 @@ + #include <linux/regmap.h> + #include <linux/acpi.h> + #include <linux/bitops.h> ++#include <linux/bitfield.h> + + #include <linux/iio/iio.h> + #include <linux/iio/sysfs.h> +@@ -144,9 +145,8 @@ + #define FXOS8700_NVM_DATA_BNK0 0xa7 + + /* Bit definitions for FXOS8700_CTRL_REG1 */ +-#define FXOS8700_CTRL_ODR_MSK 0x38 + #define FXOS8700_CTRL_ODR_MAX 0x00 +-#define FXOS8700_CTRL_ODR_MIN GENMASK(4, 3) ++#define FXOS8700_CTRL_ODR_MSK GENMASK(5, 3) + + /* Bit definitions for FXOS8700_M_CTRL_REG1 */ + #define FXOS8700_HMS_MASK GENMASK(1, 0) +@@ -320,7 +320,7 @@ static enum fxos8700_sensor fxos8700_to_sensor(enum iio_chan_type iio_type) + switch (iio_type) { + case IIO_ACCEL: + return FXOS8700_ACCEL; +- case IIO_ANGL_VEL: ++ case IIO_MAGN: + return FXOS8700_MAGN; + default: + return -EINVAL; +@@ -345,15 +345,35 @@ static int fxos8700_set_active_mode(struct fxos8700_data *data, + static int fxos8700_set_scale(struct fxos8700_data *data, + enum fxos8700_sensor t, int uscale) + { +- int i; ++ int i, ret, val; ++ bool active_mode; + static const int scale_num = ARRAY_SIZE(fxos8700_accel_scale); + struct device *dev = regmap_get_device(data->regmap); + + if (t == FXOS8700_MAGN) { +- dev_err(dev, "Magnetometer scale is locked at 1200uT\n"); ++ dev_err(dev, "Magnetometer scale is locked at 0.001Gs\n"); + return -EINVAL; + } + ++ /* ++ * When device is in active mode, it failed to set an ACCEL ++ * full-scale range(2g/4g/8g) in FXOS8700_XYZ_DATA_CFG. ++ * This is not align with the datasheet, but it is a fxos8700 ++ * chip behavier. Set the device in standby mode before setting ++ * an ACCEL full-scale range. ++ */ ++ ret = regmap_read(data->regmap, FXOS8700_CTRL_REG1, &val); ++ if (ret) ++ return ret; ++ ++ active_mode = val & FXOS8700_ACTIVE; ++ if (active_mode) { ++ ret = regmap_write(data->regmap, FXOS8700_CTRL_REG1, ++ val & ~FXOS8700_ACTIVE); ++ if (ret) ++ return ret; ++ } ++ + for (i = 0; i < scale_num; i++) + if (fxos8700_accel_scale[i].uscale == uscale) + break; +@@ -361,8 +381,12 @@ static int fxos8700_set_scale(struct fxos8700_data *data, + if (i == scale_num) + return -EINVAL; + +- return regmap_write(data->regmap, FXOS8700_XYZ_DATA_CFG, ++ ret = regmap_write(data->regmap, FXOS8700_XYZ_DATA_CFG, + fxos8700_accel_scale[i].bits); ++ if (ret) ++ return ret; ++ return regmap_write(data->regmap, FXOS8700_CTRL_REG1, ++ active_mode); + } + + static int fxos8700_get_scale(struct fxos8700_data *data, +@@ -372,7 +396,7 @@ static int fxos8700_get_scale(struct fxos8700_data *data, + static const int scale_num = ARRAY_SIZE(fxos8700_accel_scale); + + if (t == FXOS8700_MAGN) { +- *uscale = 1200; /* Magnetometer is locked at 1200uT */ ++ *uscale = 1000; /* Magnetometer is locked at 0.001Gs */ + return 0; + } + +@@ -394,22 +418,61 @@ static int fxos8700_get_data(struct fxos8700_data *data, int chan_type, + int axis, int *val) + { + u8 base, reg; ++ s16 tmp; + int ret; +- enum fxos8700_sensor type = fxos8700_to_sensor(chan_type); + +- base = type ? FXOS8700_OUT_X_MSB : FXOS8700_M_OUT_X_MSB; ++ /* ++ * Different register base addresses varies with channel types. ++ * This bug hasn't been noticed before because using an enum is ++ * really hard to read. Use an a switch statement to take over that. ++ */ ++ switch (chan_type) { ++ case IIO_ACCEL: ++ base = FXOS8700_OUT_X_MSB; ++ break; ++ case IIO_MAGN: ++ base = FXOS8700_M_OUT_X_MSB; ++ break; ++ default: ++ return -EINVAL; ++ } + + /* Block read 6 bytes of device output registers to avoid data loss */ + ret = regmap_bulk_read(data->regmap, base, data->buf, +- FXOS8700_DATA_BUF_SIZE); ++ sizeof(data->buf)); + if (ret) + return ret; + + /* Convert axis to buffer index */ + reg = axis - IIO_MOD_X; + ++ /* ++ * Convert to native endianness. The accel data and magn data ++ * are signed, so a forced type conversion is needed. ++ */ ++ tmp = be16_to_cpu(data->buf[reg]); ++ ++ /* ++ * ACCEL output data registers contain the X-axis, Y-axis, and Z-axis ++ * 14-bit left-justified sample data and MAGN output data registers ++ * contain the X-axis, Y-axis, and Z-axis 16-bit sample data. Apply ++ * a signed 2 bits right shift to the readback raw data from ACCEL ++ * output data register and keep that from MAGN sensor as the origin. ++ * Value should be extended to 32 bit. ++ */ ++ switch (chan_type) { ++ case IIO_ACCEL: ++ tmp = tmp >> 2; ++ break; ++ case IIO_MAGN: ++ /* Nothing to do */ ++ break; ++ default: ++ return -EINVAL; ++ } ++ + /* Convert to native endianness */ +- *val = sign_extend32(be16_to_cpu(data->buf[reg]), 15); ++ *val = sign_extend32(tmp, 15); + + return 0; + } +@@ -445,10 +508,9 @@ static int fxos8700_set_odr(struct fxos8700_data *data, enum fxos8700_sensor t, + if (i >= odr_num) + return -EINVAL; + +- return regmap_update_bits(data->regmap, +- FXOS8700_CTRL_REG1, +- FXOS8700_CTRL_ODR_MSK + FXOS8700_ACTIVE, +- fxos8700_odr[i].bits << 3 | active_mode); ++ val &= ~FXOS8700_CTRL_ODR_MSK; ++ val |= FIELD_PREP(FXOS8700_CTRL_ODR_MSK, fxos8700_odr[i].bits) | FXOS8700_ACTIVE; ++ return regmap_write(data->regmap, FXOS8700_CTRL_REG1, val); + } + + static int fxos8700_get_odr(struct fxos8700_data *data, enum fxos8700_sensor t, +@@ -461,7 +523,7 @@ static int fxos8700_get_odr(struct fxos8700_data *data, enum fxos8700_sensor t, + if (ret) + return ret; + +- val &= FXOS8700_CTRL_ODR_MSK; ++ val = FIELD_GET(FXOS8700_CTRL_ODR_MSK, val); + + for (i = 0; i < odr_num; i++) + if (val == fxos8700_odr[i].bits) +@@ -526,7 +588,7 @@ static IIO_CONST_ATTR(in_accel_sampling_frequency_available, + static IIO_CONST_ATTR(in_magn_sampling_frequency_available, + "1.5625 6.25 12.5 50 100 200 400 800"); + static IIO_CONST_ATTR(in_accel_scale_available, "0.000244 0.000488 0.000976"); +-static IIO_CONST_ATTR(in_magn_scale_available, "0.000001200"); ++static IIO_CONST_ATTR(in_magn_scale_available, "0.001000"); + + static struct attribute *fxos8700_attrs[] = { + &iio_const_attr_in_accel_sampling_frequency_available.dev_attr.attr, +@@ -592,14 +654,19 @@ static int fxos8700_chip_init(struct fxos8700_data *data, bool use_spi) + if (ret) + return ret; + +- /* Max ODR (800Hz individual or 400Hz hybrid), active mode */ +- ret = regmap_write(data->regmap, FXOS8700_CTRL_REG1, +- FXOS8700_CTRL_ODR_MAX | FXOS8700_ACTIVE); ++ /* ++ * Set max full-scale range (+/-8G) for ACCEL sensor in chip ++ * initialization then activate the device. ++ */ ++ ret = regmap_write(data->regmap, FXOS8700_XYZ_DATA_CFG, MODE_8G); + if (ret) + return ret; + +- /* Set for max full-scale range (+/-8G) */ +- return regmap_write(data->regmap, FXOS8700_XYZ_DATA_CFG, MODE_8G); ++ /* Max ODR (800Hz individual or 400Hz hybrid), active mode */ ++ return regmap_update_bits(data->regmap, FXOS8700_CTRL_REG1, ++ FXOS8700_CTRL_ODR_MSK | FXOS8700_ACTIVE, ++ FIELD_PREP(FXOS8700_CTRL_ODR_MSK, FXOS8700_CTRL_ODR_MAX) | ++ FXOS8700_ACTIVE); + } + + static void fxos8700_chip_uninit(void *data) diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600.h b/drivers/iio/imu/inv_icm42600/inv_icm42600.h index c0f5059b13b31..995a9dc06521d 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600.h @@ -142158,6 +170552,46 @@ index 2dbb37e09b8cf..a7f5d432c95d9 100644 INIT_LIST_HEAD(&iio_dev_opaque->buffer_list); INIT_LIST_HEAD(&iio_dev_opaque->ioctl_handlers); +diff --git a/drivers/iio/industrialio-event.c b/drivers/iio/industrialio-event.c +index d0732eac0f0ac..07bf47a1a3567 100644 +--- a/drivers/iio/industrialio-event.c ++++ b/drivers/iio/industrialio-event.c +@@ -549,7 +549,7 @@ int iio_device_register_eventset(struct iio_dev *indio_dev) + + ret = iio_device_register_sysfs_group(indio_dev, &ev_int->group); + if (ret) +- goto error_free_setup_event_lines; ++ goto error_free_group_attrs; + + ev_int->ioctl_handler.ioctl = iio_event_ioctl; + iio_device_ioctl_handler_register(&iio_dev_opaque->indio_dev, +@@ -557,6 +557,8 @@ int iio_device_register_eventset(struct iio_dev *indio_dev) + + return 0; + ++error_free_group_attrs: ++ kfree(ev_int->group.attrs); + error_free_setup_event_lines: + iio_free_chan_devattr_list(&ev_int->dev_attr_list); + kfree(ev_int); +diff --git a/drivers/iio/industrialio-sw-trigger.c b/drivers/iio/industrialio-sw-trigger.c +index 9ae793a70b8bf..a7714d32a6418 100644 +--- a/drivers/iio/industrialio-sw-trigger.c ++++ b/drivers/iio/industrialio-sw-trigger.c +@@ -58,8 +58,12 @@ int iio_register_sw_trigger_type(struct iio_sw_trigger_type *t) + + t->group = configfs_register_default_group(iio_triggers_group, t->name, + &iio_trigger_type_group_type); +- if (IS_ERR(t->group)) ++ if (IS_ERR(t->group)) { ++ mutex_lock(&iio_trigger_types_lock); ++ list_del(&t->list); ++ mutex_unlock(&iio_trigger_types_lock); + ret = PTR_ERR(t->group); ++ } + + return ret; + } diff --git a/drivers/iio/industrialio-trigger.c b/drivers/iio/industrialio-trigger.c index b23caa2f2aa1f..f504ed351b3e2 100644 --- a/drivers/iio/industrialio-trigger.c @@ -142321,6 +170755,56 @@ index 391a3380a1d10..bf9ce01c854bb 100644 break; case IIO_VAL_INT_PLUS_MICRO: if (scale_val2 < 0) +diff --git a/drivers/iio/light/Kconfig b/drivers/iio/light/Kconfig +index a62c7b4b86784..b46eac71941c9 100644 +--- a/drivers/iio/light/Kconfig ++++ b/drivers/iio/light/Kconfig +@@ -294,6 +294,8 @@ config RPR0521 + tristate "ROHM RPR0521 ALS and proximity sensor driver" + depends on I2C + select REGMAP_I2C ++ select IIO_BUFFER ++ select IIO_TRIGGERED_BUFFER + help + Say Y here if you want to build support for ROHM's RPR0521 + ambient light and proximity sensor device. +diff --git a/drivers/iio/light/apds9960.c b/drivers/iio/light/apds9960.c +index 4141c0fa7bc46..7c4353317337e 100644 +--- a/drivers/iio/light/apds9960.c ++++ b/drivers/iio/light/apds9960.c +@@ -54,9 +54,6 @@ + #define APDS9960_REG_CONTROL_PGAIN_MASK_SHIFT 2 + + #define APDS9960_REG_CONFIG_2 0x90 +-#define APDS9960_REG_CONFIG_2_GGAIN_MASK 0x60 +-#define APDS9960_REG_CONFIG_2_GGAIN_MASK_SHIFT 5 +- + #define APDS9960_REG_ID 0x92 + + #define APDS9960_REG_STATUS 0x93 +@@ -77,6 +74,9 @@ + #define APDS9960_REG_GCONF_1_GFIFO_THRES_MASK_SHIFT 6 + + #define APDS9960_REG_GCONF_2 0xa3 ++#define APDS9960_REG_GCONF_2_GGAIN_MASK 0x60 ++#define APDS9960_REG_GCONF_2_GGAIN_MASK_SHIFT 5 ++ + #define APDS9960_REG_GOFFSET_U 0xa4 + #define APDS9960_REG_GOFFSET_D 0xa5 + #define APDS9960_REG_GPULSE 0xa6 +@@ -396,9 +396,9 @@ static int apds9960_set_pxs_gain(struct apds9960_data *data, int val) + } + + ret = regmap_update_bits(data->regmap, +- APDS9960_REG_CONFIG_2, +- APDS9960_REG_CONFIG_2_GGAIN_MASK, +- idx << APDS9960_REG_CONFIG_2_GGAIN_MASK_SHIFT); ++ APDS9960_REG_GCONF_2, ++ APDS9960_REG_GCONF_2_GGAIN_MASK, ++ idx << APDS9960_REG_GCONF_2_GGAIN_MASK_SHIFT); + if (!ret) + data->pxs_gain = idx; + mutex_unlock(&data->lock); diff --git a/drivers/iio/light/cros_ec_light_prox.c b/drivers/iio/light/cros_ec_light_prox.c index de472f23d1cba..16b893bae3881 100644 --- a/drivers/iio/light/cros_ec_light_prox.c @@ -142395,6 +170879,19 @@ index 07e91846307c7..fc63856ed54de 100644 mutex_unlock(&data->lock); return IRQ_HANDLED; +diff --git a/drivers/iio/light/tsl2583.c b/drivers/iio/light/tsl2583.c +index 7e101d5f72eea..d696d19e2e8e9 100644 +--- a/drivers/iio/light/tsl2583.c ++++ b/drivers/iio/light/tsl2583.c +@@ -858,7 +858,7 @@ static int tsl2583_probe(struct i2c_client *clientp, + TSL2583_POWER_OFF_DELAY_MS); + pm_runtime_use_autosuspend(&clientp->dev); + +- ret = devm_iio_device_register(indio_dev->dev.parent, indio_dev); ++ ret = iio_device_register(indio_dev); + if (ret) { + dev_err(&clientp->dev, "%s: iio registration failed\n", + __func__); diff --git a/drivers/iio/magnetometer/ak8975.c b/drivers/iio/magnetometer/ak8975.c index 42b8a2680e3aa..1509fd0cbb50f 100644 --- a/drivers/iio/magnetometer/ak8975.c @@ -142846,6 +171343,290 @@ index 0730380ceb692..cf8b92fae1b3d 100644 return rc; rc = devm_iio_device_register(&client->dev, iio); +diff --git a/drivers/iio/pressure/ms5611.h b/drivers/iio/pressure/ms5611.h +index bc06271fa38bc..5e2d2d4d87b56 100644 +--- a/drivers/iio/pressure/ms5611.h ++++ b/drivers/iio/pressure/ms5611.h +@@ -25,13 +25,6 @@ enum { + MS5607, + }; + +-struct ms5611_chip_info { +- u16 prom[MS5611_PROM_WORDS_NB]; +- +- int (*temp_and_pressure_compensate)(struct ms5611_chip_info *chip_info, +- s32 *temp, s32 *pressure); +-}; +- + /* + * OverSampling Rate descriptor. + * Warning: cmd MUST be kept aligned on a word boundary (see +@@ -50,12 +43,15 @@ struct ms5611_state { + const struct ms5611_osr *pressure_osr; + const struct ms5611_osr *temp_osr; + +- int (*reset)(struct device *dev); +- int (*read_prom_word)(struct device *dev, int index, u16 *word); +- int (*read_adc_temp_and_pressure)(struct device *dev, ++ u16 prom[MS5611_PROM_WORDS_NB]; ++ ++ int (*reset)(struct ms5611_state *st); ++ int (*read_prom_word)(struct ms5611_state *st, int index, u16 *word); ++ int (*read_adc_temp_and_pressure)(struct ms5611_state *st, + s32 *temp, s32 *pressure); + +- struct ms5611_chip_info *chip_info; ++ int (*compensate_temp_and_pressure)(struct ms5611_state *st, s32 *temp, ++ s32 *pressure); + struct regulator *vdd; + }; + +diff --git a/drivers/iio/pressure/ms5611_core.c b/drivers/iio/pressure/ms5611_core.c +index 214b0d25f5980..874a73b3ea9d6 100644 +--- a/drivers/iio/pressure/ms5611_core.c ++++ b/drivers/iio/pressure/ms5611_core.c +@@ -85,8 +85,7 @@ static int ms5611_read_prom(struct iio_dev *indio_dev) + struct ms5611_state *st = iio_priv(indio_dev); + + for (i = 0; i < MS5611_PROM_WORDS_NB; i++) { +- ret = st->read_prom_word(&indio_dev->dev, +- i, &st->chip_info->prom[i]); ++ ret = st->read_prom_word(st, i, &st->prom[i]); + if (ret < 0) { + dev_err(&indio_dev->dev, + "failed to read prom at %d\n", i); +@@ -94,7 +93,7 @@ static int ms5611_read_prom(struct iio_dev *indio_dev) + } + } + +- if (!ms5611_prom_is_valid(st->chip_info->prom, MS5611_PROM_WORDS_NB)) { ++ if (!ms5611_prom_is_valid(st->prom, MS5611_PROM_WORDS_NB)) { + dev_err(&indio_dev->dev, "PROM integrity check failed\n"); + return -ENODEV; + } +@@ -108,28 +107,27 @@ static int ms5611_read_temp_and_pressure(struct iio_dev *indio_dev, + int ret; + struct ms5611_state *st = iio_priv(indio_dev); + +- ret = st->read_adc_temp_and_pressure(&indio_dev->dev, temp, pressure); ++ ret = st->read_adc_temp_and_pressure(st, temp, pressure); + if (ret < 0) { + dev_err(&indio_dev->dev, + "failed to read temperature and pressure\n"); + return ret; + } + +- return st->chip_info->temp_and_pressure_compensate(st->chip_info, +- temp, pressure); ++ return st->compensate_temp_and_pressure(st, temp, pressure); + } + +-static int ms5611_temp_and_pressure_compensate(struct ms5611_chip_info *chip_info, ++static int ms5611_temp_and_pressure_compensate(struct ms5611_state *st, + s32 *temp, s32 *pressure) + { + s32 t = *temp, p = *pressure; + s64 off, sens, dt; + +- dt = t - (chip_info->prom[5] << 8); +- off = ((s64)chip_info->prom[2] << 16) + ((chip_info->prom[4] * dt) >> 7); +- sens = ((s64)chip_info->prom[1] << 15) + ((chip_info->prom[3] * dt) >> 8); ++ dt = t - (st->prom[5] << 8); ++ off = ((s64)st->prom[2] << 16) + ((st->prom[4] * dt) >> 7); ++ sens = ((s64)st->prom[1] << 15) + ((st->prom[3] * dt) >> 8); + +- t = 2000 + ((chip_info->prom[6] * dt) >> 23); ++ t = 2000 + ((st->prom[6] * dt) >> 23); + if (t < 2000) { + s64 off2, sens2, t2; + +@@ -155,17 +153,17 @@ static int ms5611_temp_and_pressure_compensate(struct ms5611_chip_info *chip_inf + return 0; + } + +-static int ms5607_temp_and_pressure_compensate(struct ms5611_chip_info *chip_info, ++static int ms5607_temp_and_pressure_compensate(struct ms5611_state *st, + s32 *temp, s32 *pressure) + { + s32 t = *temp, p = *pressure; + s64 off, sens, dt; + +- dt = t - (chip_info->prom[5] << 8); +- off = ((s64)chip_info->prom[2] << 17) + ((chip_info->prom[4] * dt) >> 6); +- sens = ((s64)chip_info->prom[1] << 16) + ((chip_info->prom[3] * dt) >> 7); ++ dt = t - (st->prom[5] << 8); ++ off = ((s64)st->prom[2] << 17) + ((st->prom[4] * dt) >> 6); ++ sens = ((s64)st->prom[1] << 16) + ((st->prom[3] * dt) >> 7); + +- t = 2000 + ((chip_info->prom[6] * dt) >> 23); ++ t = 2000 + ((st->prom[6] * dt) >> 23); + if (t < 2000) { + s64 off2, sens2, t2, tmp; + +@@ -196,7 +194,7 @@ static int ms5611_reset(struct iio_dev *indio_dev) + int ret; + struct ms5611_state *st = iio_priv(indio_dev); + +- ret = st->reset(&indio_dev->dev); ++ ret = st->reset(st); + if (ret < 0) { + dev_err(&indio_dev->dev, "failed to reset device\n"); + return ret; +@@ -343,15 +341,6 @@ static int ms5611_write_raw(struct iio_dev *indio_dev, + + static const unsigned long ms5611_scan_masks[] = {0x3, 0}; + +-static struct ms5611_chip_info chip_info_tbl[] = { +- [MS5611] = { +- .temp_and_pressure_compensate = ms5611_temp_and_pressure_compensate, +- }, +- [MS5607] = { +- .temp_and_pressure_compensate = ms5607_temp_and_pressure_compensate, +- } +-}; +- + static const struct iio_chan_spec ms5611_channels[] = { + { + .type = IIO_PRESSURE, +@@ -434,7 +423,20 @@ int ms5611_probe(struct iio_dev *indio_dev, struct device *dev, + struct ms5611_state *st = iio_priv(indio_dev); + + mutex_init(&st->lock); +- st->chip_info = &chip_info_tbl[type]; ++ ++ switch (type) { ++ case MS5611: ++ st->compensate_temp_and_pressure = ++ ms5611_temp_and_pressure_compensate; ++ break; ++ case MS5607: ++ st->compensate_temp_and_pressure = ++ ms5607_temp_and_pressure_compensate; ++ break; ++ default: ++ return -EINVAL; ++ } ++ + st->temp_osr = + &ms5611_avail_temp_osr[ARRAY_SIZE(ms5611_avail_temp_osr) - 1]; + st->pressure_osr = +diff --git a/drivers/iio/pressure/ms5611_i2c.c b/drivers/iio/pressure/ms5611_i2c.c +index 7c04f730430c7..cccc40f7df0b9 100644 +--- a/drivers/iio/pressure/ms5611_i2c.c ++++ b/drivers/iio/pressure/ms5611_i2c.c +@@ -20,17 +20,15 @@ + + #include "ms5611.h" + +-static int ms5611_i2c_reset(struct device *dev) ++static int ms5611_i2c_reset(struct ms5611_state *st) + { +- struct ms5611_state *st = iio_priv(dev_to_iio_dev(dev)); +- + return i2c_smbus_write_byte(st->client, MS5611_RESET); + } + +-static int ms5611_i2c_read_prom_word(struct device *dev, int index, u16 *word) ++static int ms5611_i2c_read_prom_word(struct ms5611_state *st, int index, ++ u16 *word) + { + int ret; +- struct ms5611_state *st = iio_priv(dev_to_iio_dev(dev)); + + ret = i2c_smbus_read_word_swapped(st->client, + MS5611_READ_PROM_WORD + (index << 1)); +@@ -57,11 +55,10 @@ static int ms5611_i2c_read_adc(struct ms5611_state *st, s32 *val) + return 0; + } + +-static int ms5611_i2c_read_adc_temp_and_pressure(struct device *dev, ++static int ms5611_i2c_read_adc_temp_and_pressure(struct ms5611_state *st, + s32 *temp, s32 *pressure) + { + int ret; +- struct ms5611_state *st = iio_priv(dev_to_iio_dev(dev)); + const struct ms5611_osr *osr = st->temp_osr; + + ret = i2c_smbus_write_byte(st->client, osr->cmd); +diff --git a/drivers/iio/pressure/ms5611_spi.c b/drivers/iio/pressure/ms5611_spi.c +index 45d3a7d5be8e4..3039fe8aa2a2d 100644 +--- a/drivers/iio/pressure/ms5611_spi.c ++++ b/drivers/iio/pressure/ms5611_spi.c +@@ -15,18 +15,17 @@ + + #include "ms5611.h" + +-static int ms5611_spi_reset(struct device *dev) ++static int ms5611_spi_reset(struct ms5611_state *st) + { + u8 cmd = MS5611_RESET; +- struct ms5611_state *st = iio_priv(dev_to_iio_dev(dev)); + + return spi_write_then_read(st->client, &cmd, 1, NULL, 0); + } + +-static int ms5611_spi_read_prom_word(struct device *dev, int index, u16 *word) ++static int ms5611_spi_read_prom_word(struct ms5611_state *st, int index, ++ u16 *word) + { + int ret; +- struct ms5611_state *st = iio_priv(dev_to_iio_dev(dev)); + + ret = spi_w8r16be(st->client, MS5611_READ_PROM_WORD + (index << 1)); + if (ret < 0) +@@ -37,11 +36,10 @@ static int ms5611_spi_read_prom_word(struct device *dev, int index, u16 *word) + return 0; + } + +-static int ms5611_spi_read_adc(struct device *dev, s32 *val) ++static int ms5611_spi_read_adc(struct ms5611_state *st, s32 *val) + { + int ret; + u8 buf[3] = { MS5611_READ_ADC }; +- struct ms5611_state *st = iio_priv(dev_to_iio_dev(dev)); + + ret = spi_write_then_read(st->client, buf, 1, buf, 3); + if (ret < 0) +@@ -52,11 +50,10 @@ static int ms5611_spi_read_adc(struct device *dev, s32 *val) + return 0; + } + +-static int ms5611_spi_read_adc_temp_and_pressure(struct device *dev, ++static int ms5611_spi_read_adc_temp_and_pressure(struct ms5611_state *st, + s32 *temp, s32 *pressure) + { + int ret; +- struct ms5611_state *st = iio_priv(dev_to_iio_dev(dev)); + const struct ms5611_osr *osr = st->temp_osr; + + /* +@@ -68,7 +65,7 @@ static int ms5611_spi_read_adc_temp_and_pressure(struct device *dev, + return ret; + + usleep_range(osr->conv_usec, osr->conv_usec + (osr->conv_usec / 10UL)); +- ret = ms5611_spi_read_adc(dev, temp); ++ ret = ms5611_spi_read_adc(st, temp); + if (ret < 0) + return ret; + +@@ -78,7 +75,7 @@ static int ms5611_spi_read_adc_temp_and_pressure(struct device *dev, + return ret; + + usleep_range(osr->conv_usec, osr->conv_usec + (osr->conv_usec / 10UL)); +- return ms5611_spi_read_adc(dev, pressure); ++ return ms5611_spi_read_adc(st, pressure); + } + + static int ms5611_spi_probe(struct spi_device *spi) +@@ -94,7 +91,7 @@ static int ms5611_spi_probe(struct spi_device *spi) + spi_set_drvdata(spi, indio_dev); + + spi->mode = SPI_MODE_0; +- spi->max_speed_hz = 20000000; ++ spi->max_speed_hz = min(spi->max_speed_hz, 20000000U); + spi->bits_per_word = 8; + ret = spi_setup(spi); + if (ret < 0) diff --git a/drivers/iio/pressure/st_pressure_i2c.c b/drivers/iio/pressure/st_pressure_i2c.c index 52fa98f24478d..6215de677017e 100644 --- a/drivers/iio/pressure/st_pressure_i2c.c @@ -142916,8 +171697,71 @@ index cf38144b6f954..13a87d3e3544f 100644 return -ETIMEDOUT; vl53l0x_clear_irq(data); +diff --git a/drivers/iio/temperature/ltc2983.c b/drivers/iio/temperature/ltc2983.c +index 3b4a0e60e6059..b2ae2d2c7eefc 100644 +--- a/drivers/iio/temperature/ltc2983.c ++++ b/drivers/iio/temperature/ltc2983.c +@@ -205,6 +205,7 @@ struct ltc2983_data { + * Holds the converted temperature + */ + __be32 temp ____cacheline_aligned; ++ __be32 chan_val; + }; + + struct ltc2983_sensor { +@@ -309,19 +310,18 @@ static int __ltc2983_fault_handler(const struct ltc2983_data *st, + return 0; + } + +-static int __ltc2983_chan_assign_common(const struct ltc2983_data *st, ++static int __ltc2983_chan_assign_common(struct ltc2983_data *st, + const struct ltc2983_sensor *sensor, + u32 chan_val) + { + u32 reg = LTC2983_CHAN_START_ADDR(sensor->chan); +- __be32 __chan_val; + + chan_val |= LTC2983_CHAN_TYPE(sensor->type); + dev_dbg(&st->spi->dev, "Assign reg:0x%04X, val:0x%08X\n", reg, + chan_val); +- __chan_val = cpu_to_be32(chan_val); +- return regmap_bulk_write(st->regmap, reg, &__chan_val, +- sizeof(__chan_val)); ++ st->chan_val = cpu_to_be32(chan_val); ++ return regmap_bulk_write(st->regmap, reg, &st->chan_val, ++ sizeof(st->chan_val)); + } + + static int __ltc2983_chan_custom_sensor_assign(struct ltc2983_data *st, +@@ -1376,13 +1376,6 @@ static int ltc2983_setup(struct ltc2983_data *st, bool assign_iio) + return ret; + } + +- st->iio_chan = devm_kzalloc(&st->spi->dev, +- st->iio_channels * sizeof(*st->iio_chan), +- GFP_KERNEL); +- +- if (!st->iio_chan) +- return -ENOMEM; +- + ret = regmap_update_bits(st->regmap, LTC2983_GLOBAL_CONFIG_REG, + LTC2983_NOTCH_FREQ_MASK, + LTC2983_NOTCH_FREQ(st->filter_notch_freq)); +@@ -1494,6 +1487,12 @@ static int ltc2983_probe(struct spi_device *spi) + if (ret) + return ret; + ++ st->iio_chan = devm_kzalloc(&spi->dev, ++ st->iio_channels * sizeof(*st->iio_chan), ++ GFP_KERNEL); ++ if (!st->iio_chan) ++ return -ENOMEM; ++ + ret = ltc2983_setup(st, true); + if (ret) + return ret; diff --git a/drivers/iio/trigger/iio-trig-sysfs.c b/drivers/iio/trigger/iio-trig-sysfs.c -index e9adfff45b39b..bec9b94e088b9 100644 +index e9adfff45b39b..33986e9963a5a 100644 --- a/drivers/iio/trigger/iio-trig-sysfs.c +++ b/drivers/iio/trigger/iio-trig-sysfs.c @@ -195,6 +195,7 @@ static int iio_sysfs_trigger_remove(int id) @@ -142928,6 +171772,21 @@ index e9adfff45b39b..bec9b94e088b9 100644 iio_trigger_free(t->trig); list_del(&t->l); +@@ -207,9 +208,13 @@ static int iio_sysfs_trigger_remove(int id) + + static int __init iio_sysfs_trig_init(void) + { ++ int ret; + device_initialize(&iio_sysfs_trig_dev); + dev_set_name(&iio_sysfs_trig_dev, "iio_sysfs_trigger"); +- return device_add(&iio_sysfs_trig_dev); ++ ret = device_add(&iio_sysfs_trig_dev); ++ if (ret) ++ put_device(&iio_sysfs_trig_dev); ++ return ret; + } + module_init(iio_sysfs_trig_init); + diff --git a/drivers/iio/trigger/stm32-timer-trigger.c b/drivers/iio/trigger/stm32-timer-trigger.c index 33083877cd19d..4353b749ecef2 100644 --- a/drivers/iio/trigger/stm32-timer-trigger.c @@ -143029,7 +171888,7 @@ index c903b74f46a46..5c910f5c01b35 100644 spin_lock_irq(&cm_id_priv->lock); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c -index 704ce595542c5..0da66dd40d6a8 100644 +index 704ce595542c5..fd192104fd8d3 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -67,8 +67,8 @@ static const char * const cma_events[] = { @@ -143069,6 +171928,15 @@ index 704ce595542c5..0da66dd40d6a8 100644 if (!memcmp(&gid, dgid, sizeof(gid))) { cma_dev = cur_dev; sgid = gid; +@@ -1427,7 +1433,7 @@ static bool validate_ipv4_net_dev(struct net_device *net_dev, + return false; + + memset(&fl4, 0, sizeof(fl4)); +- fl4.flowi4_iif = net_dev->ifindex; ++ fl4.flowi4_oif = net_dev->ifindex; + fl4.daddr = daddr; + fl4.saddr = saddr; + @@ -1712,8 +1718,8 @@ cma_ib_id_from_event(struct ib_cm_id *cm_id, } @@ -143184,7 +172052,7 @@ index 704ce595542c5..0da66dd40d6a8 100644 if (req.private_data_len) { diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c -index f4814bb7f082f..6ab46648af909 100644 +index f4814bb7f082f..ab2106a09f9c6 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2461,7 +2461,8 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid, @@ -143197,8 +172065,114 @@ index f4814bb7f082f..6ab46648af909 100644 if (!memcmp(&tmp_gid, gid, sizeof *gid)) { *port_num = port; if (index) +@@ -2813,10 +2814,18 @@ static int __init ib_core_init(void) + + nldev_init(); + rdma_nl_register(RDMA_NL_LS, ibnl_ls_cb_table); +- roce_gid_mgmt_init(); ++ ret = roce_gid_mgmt_init(); ++ if (ret) { ++ pr_warn("Couldn't init RoCE GID management\n"); ++ goto err_parent; ++ } + + return 0; + ++err_parent: ++ rdma_nl_unregister(RDMA_NL_LS); ++ nldev_exit(); ++ unregister_pernet_device(&rdma_dev_net_ops); + err_compat: + unregister_blocking_lsm_notifier(&ibdev_lsm_nb); + err_sa: +@@ -2839,8 +2848,8 @@ err: + static void __exit ib_core_cleanup(void) + { + roce_gid_mgmt_cleanup(); +- nldev_exit(); + rdma_nl_unregister(RDMA_NL_LS); ++ nldev_exit(); + unregister_pernet_device(&rdma_dev_net_ops); + unregister_blocking_lsm_notifier(&ibdev_lsm_nb); + ib_sa_cleanup(); +diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c +index 1893aa613ad73..674344eb8e2f4 100644 +--- a/drivers/infiniband/core/mad.c ++++ b/drivers/infiniband/core/mad.c +@@ -59,9 +59,6 @@ static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr, + struct ib_mad_qp_info *qp_info, + struct trace_event_raw_ib_mad_send_template *entry) + { +- u16 pkey; +- struct ib_device *dev = qp_info->port_priv->device; +- u32 pnum = qp_info->port_priv->port_num; + struct ib_ud_wr *wr = &mad_send_wr->send_wr; + struct rdma_ah_attr attr = {}; + +@@ -69,8 +66,6 @@ static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr, + + /* These are common */ + entry->sl = attr.sl; +- ib_query_pkey(dev, pnum, wr->pkey_index, &pkey); +- entry->pkey = pkey; + entry->rqpn = wr->remote_qpn; + entry->rqkey = wr->remote_qkey; + entry->dlid = rdma_ah_get_dlid(&attr); +diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c +index e9b4b2cccaa0f..7ad3ba7d5a0a1 100644 +--- a/drivers/infiniband/core/nldev.c ++++ b/drivers/infiniband/core/nldev.c +@@ -511,7 +511,7 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, + + /* In create_qp() port is not set yet */ + if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port)) +- return -EINVAL; ++ return -EMSGSIZE; + + ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num); + if (ret) +@@ -550,7 +550,7 @@ static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin, + struct rdma_cm_id *cm_id = &id_priv->id; + + if (port && port != cm_id->port_num) +- return 0; ++ return -EAGAIN; + + if (cm_id->port_num && + nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num)) +@@ -892,6 +892,8 @@ static int fill_stat_counter_qps(struct sk_buff *msg, + int ret = 0; + + table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP); ++ if (!table_attr) ++ return -EMSGSIZE; + + rt = &counter->device->res[RDMA_RESTRACK_QP]; + xa_lock(&rt->xa); +@@ -2349,7 +2351,7 @@ void __init nldev_init(void) + rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table); + } + +-void __exit nldev_exit(void) ++void nldev_exit(void) + { + rdma_nl_unregister(RDMA_NL_NLDEV); + } +diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c +index 1f935d9f61785..01a499a8b88db 100644 +--- a/drivers/infiniband/core/restrack.c ++++ b/drivers/infiniband/core/restrack.c +@@ -343,8 +343,6 @@ void rdma_restrack_del(struct rdma_restrack_entry *res) + rt = &dev->res[res->type]; + + old = xa_erase(&rt->xa, res->id); +- if (res->type == RDMA_RESTRACK_MR) +- return; + WARN_ON(old != res); + + out: diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c -index 6146c3c1cbe5c..8d709986b88c7 100644 +index 6146c3c1cbe5c..253ccaf343f69 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -757,7 +757,7 @@ static void ib_port_release(struct kobject *kobj) @@ -143219,6 +172193,56 @@ index 6146c3c1cbe5c..8d709986b88c7 100644 attr->gid_tbl_len + attr->pkey_tbl_len), GFP_KERNEL); if (!p) +@@ -1198,6 +1198,9 @@ static struct ib_port *setup_port(struct ib_core_device *coredev, int port_num, + p->port_num = port_num; + kobject_init(&p->kobj, &port_type); + ++ if (device->port_data && is_full_dev) ++ device->port_data[port_num].sysfs = p; ++ + cur_group = p->groups_list; + ret = alloc_port_table_group("gids", &p->groups[0], p->attrs_list, + attr->gid_tbl_len, show_port_gid); +@@ -1243,9 +1246,6 @@ static struct ib_port *setup_port(struct ib_core_device *coredev, int port_num, + } + + list_add_tail(&p->kobj.entry, &coredev->port_list); +- if (device->port_data && is_full_dev) +- device->port_data[port_num].sysfs = p; +- + return p; + + err_groups: +@@ -1253,6 +1253,8 @@ err_groups: + err_del: + kobject_del(&p->kobj); + err_put: ++ if (device->port_data && is_full_dev) ++ device->port_data[port_num].sysfs = NULL; + kobject_put(&p->kobj); + return ERR_PTR(ret); + } +@@ -1261,14 +1263,17 @@ static void destroy_port(struct ib_core_device *coredev, struct ib_port *port) + { + bool is_full_dev = &port->ibdev->coredev == coredev; + +- if (port->ibdev->port_data && +- port->ibdev->port_data[port->port_num].sysfs == port) +- port->ibdev->port_data[port->port_num].sysfs = NULL; + list_del(&port->kobj.entry); + if (is_full_dev) + sysfs_remove_groups(&port->kobj, port->ibdev->ops.port_groups); ++ + sysfs_remove_groups(&port->kobj, port->groups_list); + kobject_del(&port->kobj); ++ ++ if (port->ibdev->port_data && ++ port->ibdev->port_data[port->port_num].sysfs == port) ++ port->ibdev->port_data[port->port_num].sysfs = NULL; ++ + kobject_put(&port->kobj); + } + diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 2b72c4fa95506..9d6ac9dff39a2 100644 --- a/drivers/infiniband/core/ucma.c @@ -143402,7 +172426,7 @@ index 2f2c7646fce17..a02916a3a79ce 100644 data[i] = &uapi->notsupp_method; uapi->write_methods = data; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c -index 89a2b21976d63..b721085bb5971 100644 +index 89a2b21976d63..f0c07e4ba4388 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1232,6 +1232,9 @@ static struct ib_qp *create_qp(struct ib_device *dev, struct ib_pd *pd, @@ -143428,6 +172452,27 @@ index 89a2b21976d63..b721085bb5971 100644 rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); rdma_restrack_parent_name(&mr->res, &pd->res); +@@ -2959,15 +2965,18 @@ EXPORT_SYMBOL(__rdma_block_iter_start); + bool __rdma_block_iter_next(struct ib_block_iter *biter) + { + unsigned int block_offset; ++ unsigned int sg_delta; + + if (!biter->__sg_nents || !biter->__sg) + return false; + + biter->__dma_addr = sg_dma_address(biter->__sg) + biter->__sg_advance; + block_offset = biter->__dma_addr & (BIT_ULL(biter->__pg_bit) - 1); +- biter->__sg_advance += BIT_ULL(biter->__pg_bit) - block_offset; ++ sg_delta = BIT_ULL(biter->__pg_bit) - block_offset; + +- if (biter->__sg_advance >= sg_dma_len(biter->__sg)) { ++ if (sg_dma_len(biter->__sg) - biter->__sg_advance > sg_delta) { ++ biter->__sg_advance += sg_delta; ++ } else { + biter->__sg_advance = 0; + biter->__sg = sg_next(biter->__sg); + biter->__sg_nents--; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 408dfbcc47b5e..b7ec3a3926785 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -143527,6 +172572,44 @@ index d20b4ef2c853d..ffbd9a89981e7 100644 init_attr->cap.max_send_wr = qhp->attr.sq_num_entries; init_attr->cap.max_recv_wr = qhp->attr.rq_num_entries; init_attr->cap.max_send_sge = qhp->attr.sq_max_sges; +diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c +index 417dea5f90cfe..d6d48db866814 100644 +--- a/drivers/infiniband/hw/efa/efa_main.c ++++ b/drivers/infiniband/hw/efa/efa_main.c +@@ -1,6 +1,6 @@ + // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause + /* +- * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. ++ * Copyright 2018-2022 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + + #include <linux/module.h> +@@ -14,10 +14,12 @@ + + #define PCI_DEV_ID_EFA0_VF 0xefa0 + #define PCI_DEV_ID_EFA1_VF 0xefa1 ++#define PCI_DEV_ID_EFA2_VF 0xefa2 + + static const struct pci_device_id efa_pci_tbl[] = { + { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA0_VF) }, + { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA1_VF) }, ++ { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA2_VF) }, + { } + }; + +diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c +index 98c813ba4304c..4c403d9e90cb3 100644 +--- a/drivers/infiniband/hw/hfi1/affinity.c ++++ b/drivers/infiniband/hw/hfi1/affinity.c +@@ -178,6 +178,8 @@ out: + for (node = 0; node < node_affinity.num_possible_nodes; node++) + hfi1_per_node_cntr[node] = 1; + ++ pci_dev_put(dev); ++ + return 0; + } + diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 37273dc0c03ca..b0d587254fe66 100644 --- a/drivers/infiniband/hw/hfi1/chip.c @@ -143554,7 +172637,7 @@ index de411884386bf..385e6cff0d279 100644 needset = (rcd->ctxt == HFI1_CTRL_CTXT) ? 0 : 1; diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c -index 1783a6ea5427b..686d170a5947e 100644 +index 1783a6ea5427b..1c1172aeb36e9 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -265,6 +265,8 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) @@ -143578,6 +172661,76 @@ index 1783a6ea5427b..686d170a5947e 100644 user_init(uctxt); +@@ -1314,12 +1318,15 @@ static int user_exp_rcv_setup(struct hfi1_filedata *fd, unsigned long arg, + addr = arg + offsetof(struct hfi1_tid_info, tidcnt); + if (copy_to_user((void __user *)addr, &tinfo.tidcnt, + sizeof(tinfo.tidcnt))) +- return -EFAULT; ++ ret = -EFAULT; + + addr = arg + offsetof(struct hfi1_tid_info, length); +- if (copy_to_user((void __user *)addr, &tinfo.length, ++ if (!ret && copy_to_user((void __user *)addr, &tinfo.length, + sizeof(tinfo.length))) + ret = -EFAULT; ++ ++ if (ret) ++ hfi1_user_exp_rcv_invalid(fd, &tinfo); + } + + return ret; +diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c +index 31e63e245ea92..ddf3217893f86 100644 +--- a/drivers/infiniband/hw/hfi1/firmware.c ++++ b/drivers/infiniband/hw/hfi1/firmware.c +@@ -1744,6 +1744,7 @@ int parse_platform_config(struct hfi1_devdata *dd) + + if (!dd->platform_config.data) { + dd_dev_err(dd, "%s: Missing config file\n", __func__); ++ ret = -EINVAL; + goto bail; + } + ptr = (u32 *)dd->platform_config.data; +@@ -1752,6 +1753,7 @@ int parse_platform_config(struct hfi1_devdata *dd) + ptr++; + if (magic_num != PLATFORM_CONFIG_MAGIC_NUM) { + dd_dev_err(dd, "%s: Bad config file\n", __func__); ++ ret = -EINVAL; + goto bail; + } + +@@ -1775,6 +1777,7 @@ int parse_platform_config(struct hfi1_devdata *dd) + if (file_length > dd->platform_config.size) { + dd_dev_info(dd, "%s:File claims to be larger than read size\n", + __func__); ++ ret = -EINVAL; + goto bail; + } else if (file_length < dd->platform_config.size) { + dd_dev_info(dd, +@@ -1795,6 +1798,7 @@ int parse_platform_config(struct hfi1_devdata *dd) + dd_dev_err(dd, "%s: Failed validation at offset %ld\n", + __func__, (ptr - (u32 *) + dd->platform_config.data)); ++ ret = -EINVAL; + goto bail; + } + +@@ -1838,6 +1842,7 @@ int parse_platform_config(struct hfi1_devdata *dd) + __func__, table_type, + (ptr - (u32 *) + dd->platform_config.data)); ++ ret = -EINVAL; + goto bail; /* We don't trust this file now */ + } + pcfgcache->config_tables[table_type].table = ptr; +@@ -1857,6 +1862,7 @@ int parse_platform_config(struct hfi1_devdata *dd) + __func__, table_type, + (ptr - + (u32 *)dd->platform_config.data)); ++ ret = -EINVAL; + goto bail; /* We don't trust this file now */ + } + pcfgcache->config_tables[table_type].table_metadata = diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index e3679d076eaaf..7facc04cc36c3 100644 --- a/drivers/infiniband/hw/hfi1/init.c @@ -143795,6 +172948,20 @@ index 876cc78a22cca..7333646021bb8 100644 kfree(handler); } +diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c +index 3d42bd2b36bd4..51ae58c02b15c 100644 +--- a/drivers/infiniband/hw/hfi1/pio.c ++++ b/drivers/infiniband/hw/hfi1/pio.c +@@ -913,8 +913,7 @@ void sc_disable(struct send_context *sc) + spin_unlock(&sc->release_lock); + + write_seqlock(&sc->waitlock); +- if (!list_empty(&sc->piowait)) +- list_move(&sc->piowait, &wake_list); ++ list_splice_init(&sc->piowait, &wake_list); + write_sequnlock(&sc->waitlock); + while (!list_empty(&wake_list)) { + struct iowait *wait; diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 2b6c24b7b5865..a95b654f52540 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c @@ -143828,6 +172995,403 @@ index 2b6c24b7b5865..a95b654f52540 100644 kfree(dd->per_sdma); dd->per_sdma = NULL; +diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c +index 0c86e9d354f8e..1d2020c30ef3b 100644 +--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c ++++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c +@@ -23,18 +23,25 @@ static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, + static bool tid_rb_invalidate(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq); ++static bool tid_cover_invalidate(struct mmu_interval_notifier *mni, ++ const struct mmu_notifier_range *range, ++ unsigned long cur_seq); + static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *, + struct tid_group *grp, + unsigned int start, u16 count, + u32 *tidlist, unsigned int *tididx, + unsigned int *pmapped); +-static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, +- struct tid_group **grp); ++static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo); ++static void __clear_tid_node(struct hfi1_filedata *fd, ++ struct tid_rb_node *node); + static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node); + + static const struct mmu_interval_notifier_ops tid_mn_ops = { + .invalidate = tid_rb_invalidate, + }; ++static const struct mmu_interval_notifier_ops tid_cover_ops = { ++ .invalidate = tid_cover_invalidate, ++}; + + /* + * Initialize context and file private data needed for Expected +@@ -153,16 +160,11 @@ static void unpin_rcv_pages(struct hfi1_filedata *fd, + static int pin_rcv_pages(struct hfi1_filedata *fd, struct tid_user_buf *tidbuf) + { + int pinned; +- unsigned int npages; ++ unsigned int npages = tidbuf->npages; + unsigned long vaddr = tidbuf->vaddr; + struct page **pages = NULL; + struct hfi1_devdata *dd = fd->uctxt->dd; + +- /* Get the number of pages the user buffer spans */ +- npages = num_user_pages(vaddr, tidbuf->length); +- if (!npages) +- return -EINVAL; +- + if (npages > fd->uctxt->expected_count) { + dd_dev_err(dd, "Expected buffer too big\n"); + return -EINVAL; +@@ -189,7 +191,6 @@ static int pin_rcv_pages(struct hfi1_filedata *fd, struct tid_user_buf *tidbuf) + return pinned; + } + tidbuf->pages = pages; +- tidbuf->npages = npages; + fd->tid_n_pinned += pinned; + return pinned; + } +@@ -253,53 +254,66 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, + tididx = 0, mapped, mapped_pages = 0; + u32 *tidlist = NULL; + struct tid_user_buf *tidbuf; ++ unsigned long mmu_seq = 0; + + if (!PAGE_ALIGNED(tinfo->vaddr)) + return -EINVAL; ++ if (tinfo->length == 0) ++ return -EINVAL; + + tidbuf = kzalloc(sizeof(*tidbuf), GFP_KERNEL); + if (!tidbuf) + return -ENOMEM; + ++ mutex_init(&tidbuf->cover_mutex); + tidbuf->vaddr = tinfo->vaddr; + tidbuf->length = tinfo->length; ++ tidbuf->npages = num_user_pages(tidbuf->vaddr, tidbuf->length); + tidbuf->psets = kcalloc(uctxt->expected_count, sizeof(*tidbuf->psets), + GFP_KERNEL); + if (!tidbuf->psets) { +- kfree(tidbuf); +- return -ENOMEM; ++ ret = -ENOMEM; ++ goto fail_release_mem; ++ } ++ ++ if (fd->use_mn) { ++ ret = mmu_interval_notifier_insert( ++ &tidbuf->notifier, current->mm, ++ tidbuf->vaddr, tidbuf->npages * PAGE_SIZE, ++ &tid_cover_ops); ++ if (ret) ++ goto fail_release_mem; ++ mmu_seq = mmu_interval_read_begin(&tidbuf->notifier); + } + + pinned = pin_rcv_pages(fd, tidbuf); + if (pinned <= 0) { +- kfree(tidbuf->psets); +- kfree(tidbuf); +- return pinned; ++ ret = (pinned < 0) ? pinned : -ENOSPC; ++ goto fail_unpin; + } + + /* Find sets of physically contiguous pages */ + tidbuf->n_psets = find_phys_blocks(tidbuf, pinned); + +- /* +- * We don't need to access this under a lock since tid_used is per +- * process and the same process cannot be in hfi1_user_exp_rcv_clear() +- * and hfi1_user_exp_rcv_setup() at the same time. +- */ ++ /* Reserve the number of expected tids to be used. */ + spin_lock(&fd->tid_lock); + if (fd->tid_used + tidbuf->n_psets > fd->tid_limit) + pageset_count = fd->tid_limit - fd->tid_used; + else + pageset_count = tidbuf->n_psets; ++ fd->tid_used += pageset_count; + spin_unlock(&fd->tid_lock); + +- if (!pageset_count) +- goto bail; ++ if (!pageset_count) { ++ ret = -ENOSPC; ++ goto fail_unreserve; ++ } + + ngroups = pageset_count / dd->rcv_entries.group_size; + tidlist = kcalloc(pageset_count, sizeof(*tidlist), GFP_KERNEL); + if (!tidlist) { + ret = -ENOMEM; +- goto nomem; ++ goto fail_unreserve; + } + + tididx = 0; +@@ -395,43 +409,78 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, + } + unlock: + mutex_unlock(&uctxt->exp_mutex); +-nomem: + hfi1_cdbg(TID, "total mapped: tidpairs:%u pages:%u (%d)", tididx, + mapped_pages, ret); +- if (tididx) { +- spin_lock(&fd->tid_lock); +- fd->tid_used += tididx; +- spin_unlock(&fd->tid_lock); +- tinfo->tidcnt = tididx; +- tinfo->length = mapped_pages * PAGE_SIZE; +- +- if (copy_to_user(u64_to_user_ptr(tinfo->tidlist), +- tidlist, sizeof(tidlist[0]) * tididx)) { +- /* +- * On failure to copy to the user level, we need to undo +- * everything done so far so we don't leak resources. +- */ +- tinfo->tidlist = (unsigned long)&tidlist; +- hfi1_user_exp_rcv_clear(fd, tinfo); +- tinfo->tidlist = 0; +- ret = -EFAULT; +- goto bail; ++ ++ /* fail if nothing was programmed, set error if none provided */ ++ if (tididx == 0) { ++ if (ret >= 0) ++ ret = -ENOSPC; ++ goto fail_unreserve; ++ } ++ ++ /* adjust reserved tid_used to actual count */ ++ spin_lock(&fd->tid_lock); ++ fd->tid_used -= pageset_count - tididx; ++ spin_unlock(&fd->tid_lock); ++ ++ /* unpin all pages not covered by a TID */ ++ unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages, pinned - mapped_pages, ++ false); ++ ++ if (fd->use_mn) { ++ /* check for an invalidate during setup */ ++ bool fail = false; ++ ++ mutex_lock(&tidbuf->cover_mutex); ++ fail = mmu_interval_read_retry(&tidbuf->notifier, mmu_seq); ++ mutex_unlock(&tidbuf->cover_mutex); ++ ++ if (fail) { ++ ret = -EBUSY; ++ goto fail_unprogram; + } + } + +- /* +- * If not everything was mapped (due to insufficient RcvArray entries, +- * for example), unpin all unmapped pages so we can pin them nex time. +- */ +- if (mapped_pages != pinned) +- unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages, +- (pinned - mapped_pages), false); +-bail: ++ tinfo->tidcnt = tididx; ++ tinfo->length = mapped_pages * PAGE_SIZE; ++ ++ if (copy_to_user(u64_to_user_ptr(tinfo->tidlist), ++ tidlist, sizeof(tidlist[0]) * tididx)) { ++ ret = -EFAULT; ++ goto fail_unprogram; ++ } ++ ++ if (fd->use_mn) ++ mmu_interval_notifier_remove(&tidbuf->notifier); ++ kfree(tidbuf->pages); + kfree(tidbuf->psets); ++ kfree(tidbuf); + kfree(tidlist); ++ return 0; ++ ++fail_unprogram: ++ /* unprogram, unmap, and unpin all allocated TIDs */ ++ tinfo->tidlist = (unsigned long)tidlist; ++ hfi1_user_exp_rcv_clear(fd, tinfo); ++ tinfo->tidlist = 0; ++ pinned = 0; /* nothing left to unpin */ ++ pageset_count = 0; /* nothing left reserved */ ++fail_unreserve: ++ spin_lock(&fd->tid_lock); ++ fd->tid_used -= pageset_count; ++ spin_unlock(&fd->tid_lock); ++fail_unpin: ++ if (fd->use_mn) ++ mmu_interval_notifier_remove(&tidbuf->notifier); ++ if (pinned > 0) ++ unpin_rcv_pages(fd, tidbuf, NULL, 0, pinned, false); ++fail_release_mem: + kfree(tidbuf->pages); ++ kfree(tidbuf->psets); + kfree(tidbuf); +- return ret > 0 ? 0 : ret; ++ kfree(tidlist); ++ return ret; + } + + int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd, +@@ -452,7 +501,7 @@ int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd, + + mutex_lock(&uctxt->exp_mutex); + for (tididx = 0; tididx < tinfo->tidcnt; tididx++) { +- ret = unprogram_rcvarray(fd, tidinfo[tididx], NULL); ++ ret = unprogram_rcvarray(fd, tidinfo[tididx]); + if (ret) { + hfi1_cdbg(TID, "Failed to unprogram rcv array %d", + ret); +@@ -707,6 +756,7 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd, + } + + node->fdata = fd; ++ mutex_init(&node->invalidate_mutex); + node->phys = page_to_phys(pages[0]); + node->npages = npages; + node->rcventry = rcventry; +@@ -722,11 +772,6 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd, + &tid_mn_ops); + if (ret) + goto out_unmap; +- /* +- * FIXME: This is in the wrong order, the notifier should be +- * established before the pages are pinned by pin_rcv_pages. +- */ +- mmu_interval_read_begin(&node->notifier); + } + fd->entry_to_rb[node->rcventry - uctxt->expected_base] = node; + +@@ -746,8 +791,7 @@ out_unmap: + return -EFAULT; + } + +-static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, +- struct tid_group **grp) ++static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo) + { + struct hfi1_ctxtdata *uctxt = fd->uctxt; + struct hfi1_devdata *dd = uctxt->dd; +@@ -770,9 +814,6 @@ static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, + if (!node || node->rcventry != (uctxt->expected_base + rcventry)) + return -EBADF; + +- if (grp) +- *grp = node->grp; +- + if (fd->use_mn) + mmu_interval_notifier_remove(&node->notifier); + cacheless_tid_rb_remove(fd, node); +@@ -780,23 +821,34 @@ static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, + return 0; + } + +-static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) ++static void __clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) + { + struct hfi1_ctxtdata *uctxt = fd->uctxt; + struct hfi1_devdata *dd = uctxt->dd; + ++ mutex_lock(&node->invalidate_mutex); ++ if (node->freed) ++ goto done; ++ node->freed = true; ++ + trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry, + node->npages, + node->notifier.interval_tree.start, node->phys, + node->dma_addr); + +- /* +- * Make sure device has seen the write before we unpin the +- * pages. +- */ ++ /* Make sure device has seen the write before pages are unpinned */ + hfi1_put_tid(dd, node->rcventry, PT_INVALID_FLUSH, 0, 0); + + unpin_rcv_pages(fd, NULL, node, 0, node->npages, true); ++done: ++ mutex_unlock(&node->invalidate_mutex); ++} ++ ++static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) ++{ ++ struct hfi1_ctxtdata *uctxt = fd->uctxt; ++ ++ __clear_tid_node(fd, node); + + node->grp->used--; + node->grp->map &= ~(1 << (node->rcventry - node->grp->base)); +@@ -855,10 +907,16 @@ static bool tid_rb_invalidate(struct mmu_interval_notifier *mni, + if (node->freed) + return true; + ++ /* take action only if unmapping */ ++ if (range->event != MMU_NOTIFY_UNMAP) ++ return true; ++ + trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, + node->notifier.interval_tree.start, + node->rcventry, node->npages, node->dma_addr); +- node->freed = true; ++ ++ /* clear the hardware rcvarray entry */ ++ __clear_tid_node(fdata, node); + + spin_lock(&fdata->invalid_lock); + if (fdata->invalid_tid_idx < uctxt->expected_count) { +@@ -888,6 +946,23 @@ static bool tid_rb_invalidate(struct mmu_interval_notifier *mni, + return true; + } + ++static bool tid_cover_invalidate(struct mmu_interval_notifier *mni, ++ const struct mmu_notifier_range *range, ++ unsigned long cur_seq) ++{ ++ struct tid_user_buf *tidbuf = ++ container_of(mni, struct tid_user_buf, notifier); ++ ++ /* take action only if unmapping */ ++ if (range->event == MMU_NOTIFY_UNMAP) { ++ mutex_lock(&tidbuf->cover_mutex); ++ mmu_interval_set_seq(mni, cur_seq); ++ mutex_unlock(&tidbuf->cover_mutex); ++ } ++ ++ return true; ++} ++ + static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, + struct tid_rb_node *tnode) + { +diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h +index 8c53e416bf843..f8ee997d0050e 100644 +--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h ++++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h +@@ -16,6 +16,8 @@ struct tid_pageset { + }; + + struct tid_user_buf { ++ struct mmu_interval_notifier notifier; ++ struct mutex cover_mutex; + unsigned long vaddr; + unsigned long length; + unsigned int npages; +@@ -27,6 +29,7 @@ struct tid_user_buf { + struct tid_rb_node { + struct mmu_interval_notifier notifier; + struct hfi1_filedata *fdata; ++ struct mutex invalidate_mutex; /* covers hw removal */ + unsigned long phys; + struct tid_group *grp; + u32 rcventry; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 26bea51869bf0..ef8e0bdacb516 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c @@ -143875,7 +173439,7 @@ index 9467c39e3d288..c94991356a2e8 100644 u32 max_wqes; u32 max_srq_wrs; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c -index d5f3faa1627a4..1dbad159f3792 100644 +index d5f3faa1627a4..79d92b7899849 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -33,6 +33,7 @@ @@ -143886,7 +173450,297 @@ index d5f3faa1627a4..1dbad159f3792 100644 #include <linux/kernel.h> #include <linux/types.h> #include <net/addrconf.h> -@@ -1050,9 +1051,14 @@ static u32 hns_roce_v2_cmd_hw_resetting(struct hns_roce_dev *hr_dev, +@@ -81,7 +82,6 @@ static const u32 hns_roce_op_code[] = { + HR_OPC_MAP(ATOMIC_CMP_AND_SWP, ATOM_CMP_AND_SWAP), + HR_OPC_MAP(ATOMIC_FETCH_AND_ADD, ATOM_FETCH_AND_ADD), + HR_OPC_MAP(SEND_WITH_INV, SEND_WITH_INV), +- HR_OPC_MAP(LOCAL_INV, LOCAL_INV), + HR_OPC_MAP(MASKED_ATOMIC_CMP_AND_SWP, ATOM_MSK_CMP_AND_SWAP), + HR_OPC_MAP(MASKED_ATOMIC_FETCH_AND_ADD, ATOM_MSK_FETCH_AND_ADD), + HR_OPC_MAP(REG_MR, FAST_REG_PMR), +@@ -148,8 +148,15 @@ static void set_atomic_seg(const struct ib_send_wr *wr, + aseg->cmp_data = 0; + } + +- roce_set_field(rc_sq_wqe->byte_16, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, +- V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge); ++ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, valid_num_sge); ++} ++ ++static unsigned int get_std_sge_num(struct hns_roce_qp *qp) ++{ ++ if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD) ++ return 0; ++ ++ return HNS_ROCE_SGE_IN_WQE; + } + + static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, +@@ -157,16 +164,16 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, + unsigned int *sge_idx, u32 msg_len) + { + struct ib_device *ibdev = &(to_hr_dev(qp->ibqp.device))->ib_dev; +- unsigned int dseg_len = sizeof(struct hns_roce_v2_wqe_data_seg); +- unsigned int ext_sge_sz = qp->sq.max_gs * dseg_len; + unsigned int left_len_in_pg; + unsigned int idx = *sge_idx; ++ unsigned int std_sge_num; + unsigned int i = 0; + unsigned int len; + void *addr; + void *dseg; + +- if (msg_len > ext_sge_sz) { ++ std_sge_num = get_std_sge_num(qp); ++ if (msg_len > (qp->sq.max_gs - std_sge_num) * HNS_ROCE_SGE_SIZE) { + ibdev_err(ibdev, + "no enough extended sge space for inline data.\n"); + return -EINVAL; +@@ -186,7 +193,7 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, + if (len <= left_len_in_pg) { + memcpy(dseg, addr, len); + +- idx += len / dseg_len; ++ idx += len / HNS_ROCE_SGE_SIZE; + + i++; + if (i >= wr->num_sge) +@@ -201,7 +208,7 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, + + len -= left_len_in_pg; + addr += left_len_in_pg; +- idx += left_len_in_pg / dseg_len; ++ idx += left_len_in_pg / HNS_ROCE_SGE_SIZE; + dseg = hns_roce_get_extend_sge(qp, + idx & (qp->sge.sge_cnt - 1)); + left_len_in_pg = 1 << HNS_HW_PAGE_SHIFT; +@@ -270,8 +277,7 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ib_send_wr *wr, + dseg += sizeof(struct hns_roce_v2_rc_send_wqe); + + if (msg_len <= HNS_ROCE_V2_MAX_RC_INL_INN_SZ) { +- roce_set_bit(rc_sq_wqe->byte_20, +- V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S, 0); ++ hr_reg_clear(rc_sq_wqe, RC_SEND_WQE_INL_TYPE); + + for (i = 0; i < wr->num_sge; i++) { + memcpy(dseg, ((void *)wr->sg_list[i].addr), +@@ -279,17 +285,13 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ib_send_wr *wr, + dseg += wr->sg_list[i].length; + } + } else { +- roce_set_bit(rc_sq_wqe->byte_20, +- V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S, 1); ++ hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_INL_TYPE); + + ret = fill_ext_sge_inl_data(qp, wr, &curr_idx, msg_len); + if (ret) + return ret; + +- roce_set_field(rc_sq_wqe->byte_16, +- V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, +- V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, +- curr_idx - *sge_idx); ++ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, curr_idx - *sge_idx); + } + + *sge_idx = curr_idx; +@@ -308,12 +310,10 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, + int j = 0; + int i; + +- roce_set_field(rc_sq_wqe->byte_20, +- V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M, +- V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, +- (*sge_ind) & (qp->sge.sge_cnt - 1)); ++ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_MSG_START_SGE_IDX, ++ (*sge_ind) & (qp->sge.sge_cnt - 1)); + +- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S, ++ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_INLINE, + !!(wr->send_flags & IB_SEND_INLINE)); + if (wr->send_flags & IB_SEND_INLINE) + return set_rc_inl(qp, wr, rc_sq_wqe, sge_ind); +@@ -338,9 +338,7 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, + valid_num_sge - HNS_ROCE_SGE_IN_WQE); + } + +- roce_set_field(rc_sq_wqe->byte_16, +- V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, +- V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge); ++ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, valid_num_sge); + + return 0; + } +@@ -411,8 +409,7 @@ static int set_ud_opcode(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe, + + ud_sq_wqe->immtdata = get_immtdata(wr); + +- roce_set_field(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OPCODE_M, +- V2_UD_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op)); ++ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_OPCODE, to_hr_opcode(ib_op)); + + return 0; + } +@@ -423,21 +420,15 @@ static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe, + struct ib_device *ib_dev = ah->ibah.device; + struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); + +- roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M, +- V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, ah->av.udp_sport); +- +- roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M, +- V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, ah->av.hop_limit); +- roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_TCLASS_M, +- V2_UD_SEND_WQE_BYTE_36_TCLASS_S, ah->av.tclass); +- roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M, +- V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, ah->av.flowlabel); ++ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_UDPSPN, ah->av.udp_sport); ++ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_HOPLIMIT, ah->av.hop_limit); ++ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_TCLASS, ah->av.tclass); ++ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_FLOW_LABEL, ah->av.flowlabel); + + if (WARN_ON(ah->av.sl > MAX_SERVICE_LEVEL)) + return -EINVAL; + +- roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_SL_M, +- V2_UD_SEND_WQE_BYTE_40_SL_S, ah->av.sl); ++ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SL, ah->av.sl); + + ud_sq_wqe->sgid_index = ah->av.gid_index; + +@@ -447,10 +438,8 @@ static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe, + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + return 0; + +- roce_set_bit(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S, +- ah->av.vlan_en); +- roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_VLAN_M, +- V2_UD_SEND_WQE_BYTE_36_VLAN_S, ah->av.vlan_id); ++ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_VLAN_EN, ah->av.vlan_en); ++ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_VLAN, ah->av.vlan_id); + + return 0; + } +@@ -475,27 +464,19 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, + + ud_sq_wqe->msg_len = cpu_to_le32(msg_len); + +- roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_CQE_S, ++ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_CQE, + !!(wr->send_flags & IB_SEND_SIGNALED)); +- +- roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_SE_S, ++ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SE, + !!(wr->send_flags & IB_SEND_SOLICITED)); + +- roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_PD_M, +- V2_UD_SEND_WQE_BYTE_16_PD_S, to_hr_pd(qp->ibqp.pd)->pdn); +- +- roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M, +- V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge); +- +- roce_set_field(ud_sq_wqe->byte_20, +- V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M, +- V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, +- curr_idx & (qp->sge.sge_cnt - 1)); ++ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_PD, to_hr_pd(qp->ibqp.pd)->pdn); ++ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SGE_NUM, valid_num_sge); ++ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_MSG_START_SGE_IDX, ++ curr_idx & (qp->sge.sge_cnt - 1)); + + ud_sq_wqe->qkey = cpu_to_le32(ud_wr(wr)->remote_qkey & 0x80000000 ? + qp->qkey : ud_wr(wr)->remote_qkey); +- roce_set_field(ud_sq_wqe->byte_32, V2_UD_SEND_WQE_BYTE_32_DQPN_M, +- V2_UD_SEND_WQE_BYTE_32_DQPN_S, ud_wr(wr)->remote_qpn); ++ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_DQPN, ud_wr(wr)->remote_qpn); + + ret = fill_ud_av(ud_sq_wqe, ah); + if (ret) +@@ -515,8 +496,7 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, + dma_wmb(); + + *sge_idx = curr_idx; +- roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OWNER_S, +- owner_bit); ++ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_OWNER, owner_bit); + + return 0; + } +@@ -551,9 +531,6 @@ static int set_rc_opcode(struct hns_roce_dev *hr_dev, + else + ret = -EOPNOTSUPP; + break; +- case IB_WR_LOCAL_INV: +- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SO_S, 1); +- fallthrough; + case IB_WR_SEND_WITH_INV: + rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey); + break; +@@ -564,11 +541,11 @@ static int set_rc_opcode(struct hns_roce_dev *hr_dev, + if (unlikely(ret)) + return ret; + +- roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OPCODE_M, +- V2_RC_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op)); ++ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_OPCODE, to_hr_opcode(ib_op)); + + return ret; + } ++ + static inline int set_rc_wqe(struct hns_roce_qp *qp, + const struct ib_send_wr *wr, + void *wqe, unsigned int *sge_idx, +@@ -589,13 +566,13 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, + if (WARN_ON(ret)) + return ret; + +- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FENCE_S, ++ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_FENCE, + (wr->send_flags & IB_SEND_FENCE) ? 1 : 0); + +- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SE_S, ++ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SE, + (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0); + +- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_CQE_S, ++ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_CQE, + (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0); + + if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || +@@ -615,8 +592,7 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, + dma_wmb(); + + *sge_idx = curr_idx; +- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S, +- owner_bit); ++ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_OWNER, owner_bit); + + return ret; + } +@@ -677,16 +653,15 @@ static void hns_roce_write512(struct hns_roce_dev *hr_dev, u64 *val, + static void write_dwqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, + void *wqe) + { ++#define HNS_ROCE_SL_SHIFT 2 + struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe; + + /* All kinds of DirectWQE have the same header field layout */ +- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FLAG_S, 1); +- roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_DB_SL_L_M, +- V2_RC_SEND_WQE_BYTE_4_DB_SL_L_S, qp->sl); +- roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_DB_SL_H_M, +- V2_RC_SEND_WQE_BYTE_4_DB_SL_H_S, qp->sl >> 2); +- roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_M, +- V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_S, qp->sq.head); ++ hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_FLAG); ++ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_DB_SL_L, qp->sl); ++ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_DB_SL_H, ++ qp->sl >> HNS_ROCE_SL_SHIFT); ++ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_WQE_INDEX, qp->sq.head); + + hns_roce_write512(hr_dev, wqe, qp->sq.db_reg); + } +@@ -1050,9 +1025,14 @@ static u32 hns_roce_v2_cmd_hw_resetting(struct hns_roce_dev *hr_dev, unsigned long instance_stage, unsigned long reset_stage) { @@ -143901,7 +173755,7 @@ index d5f3faa1627a4..1dbad159f3792 100644 /* When hardware reset is detected, we should stop sending mailbox&cmq& * doorbell to hardware. If now in .init_instance() function, we should -@@ -1064,7 +1070,11 @@ static u32 hns_roce_v2_cmd_hw_resetting(struct hns_roce_dev *hr_dev, +@@ -1064,7 +1044,11 @@ static u32 hns_roce_v2_cmd_hw_resetting(struct hns_roce_dev *hr_dev, * again. */ hr_dev->dis_db = true; @@ -143914,7 +173768,7 @@ index d5f3faa1627a4..1dbad159f3792 100644 hr_dev->is_reset = true; if (!hr_dev->is_reset || reset_stage == HNS_ROCE_STATE_RST_INIT || -@@ -1263,6 +1273,16 @@ static int hns_roce_cmq_csq_done(struct hns_roce_dev *hr_dev) +@@ -1263,6 +1247,40 @@ static int hns_roce_cmq_csq_done(struct hns_roce_dev *hr_dev) return tail == priv->cmq.csq.head; } @@ -143927,11 +173781,44 @@ index d5f3faa1627a4..1dbad159f3792 100644 + handle->rinfo.instance_state == HNS_ROCE_STATE_INIT) + hr_dev->cmd.state = HNS_ROCE_CMDQ_STATE_FATAL_ERR; +} ++ ++static int hns_roce_cmd_err_convert_errno(u16 desc_ret) ++{ ++ struct hns_roce_cmd_errcode errcode_table[] = { ++ {CMD_EXEC_SUCCESS, 0}, ++ {CMD_NO_AUTH, -EPERM}, ++ {CMD_NOT_EXIST, -EOPNOTSUPP}, ++ {CMD_CRQ_FULL, -EXFULL}, ++ {CMD_NEXT_ERR, -ENOSR}, ++ {CMD_NOT_EXEC, -ENOTBLK}, ++ {CMD_PARA_ERR, -EINVAL}, ++ {CMD_RESULT_ERR, -ERANGE}, ++ {CMD_TIMEOUT, -ETIME}, ++ {CMD_HILINK_ERR, -ENOLINK}, ++ {CMD_INFO_ILLEGAL, -ENXIO}, ++ {CMD_INVALID, -EBADR}, ++ }; ++ u16 i; ++ ++ for (i = 0; i < ARRAY_SIZE(errcode_table); i++) ++ if (desc_ret == errcode_table[i].return_status) ++ return errcode_table[i].errno; ++ return -EIO; ++} + static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, struct hns_roce_cmq_desc *desc, int num) { -@@ -1316,6 +1336,8 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, +@@ -1307,7 +1325,7 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, + dev_err_ratelimited(hr_dev->dev, + "Cmdq IO error, opcode = %x, return = %x\n", + desc->opcode, desc_ret); +- ret = -EIO; ++ ret = hns_roce_cmd_err_convert_errno(desc_ret); + } + } else { + /* FW/HW reset or incorrect number of desc */ +@@ -1316,6 +1334,8 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, csq->head, tail); csq->head = tail; @@ -143940,7 +173827,7 @@ index d5f3faa1627a4..1dbad159f3792 100644 ret = -EAGAIN; } -@@ -1330,6 +1352,9 @@ static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, +@@ -1330,6 +1350,9 @@ static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, bool busy; int ret; @@ -143950,7 +173837,7 @@ index d5f3faa1627a4..1dbad159f3792 100644 if (!v2_chk_mbox_is_avail(hr_dev, &busy)) return busy ? -EBUSY : 0; -@@ -1526,6 +1551,9 @@ static void hns_roce_function_clear(struct hns_roce_dev *hr_dev) +@@ -1526,6 +1549,9 @@ static void hns_roce_function_clear(struct hns_roce_dev *hr_dev) { int i; @@ -143960,7 +173847,7 @@ index d5f3faa1627a4..1dbad159f3792 100644 for (i = hr_dev->func_num - 1; i >= 0; i--) { __hns_roce_function_clear(hr_dev, i); if (i != 0) -@@ -1594,11 +1622,17 @@ static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev) +@@ -1594,11 +1620,17 @@ static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev) { struct hns_roce_cmq_desc desc; struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data; @@ -143979,7 +173866,29 @@ index d5f3faa1627a4..1dbad159f3792 100644 hr_reg_write(req, CFG_GLOBAL_PARAM_UDP_PORT, ROCE_V2_UDP_DPORT); return hns_roce_cmq_send(hr_dev, &desc, 1); -@@ -1939,7 +1973,7 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) +@@ -1749,17 +1781,16 @@ static int __hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev, + swt = (struct hns_roce_vf_switch *)desc.data; + hns_roce_cmq_setup_basic_desc(&desc, HNS_SWITCH_PARAMETER_CFG, true); + swt->rocee_sel |= cpu_to_le32(HNS_ICL_SWITCH_CMD_ROCEE_SEL); +- roce_set_field(swt->fun_id, VF_SWITCH_DATA_FUN_ID_VF_ID_M, +- VF_SWITCH_DATA_FUN_ID_VF_ID_S, vf_id); ++ hr_reg_write(swt, VF_SWITCH_VF_ID, vf_id); + ret = hns_roce_cmq_send(hr_dev, &desc, 1); + if (ret) + return ret; + + desc.flag = cpu_to_le16(HNS_ROCE_CMD_FLAG_IN); + desc.flag &= cpu_to_le16(~HNS_ROCE_CMD_FLAG_WR); +- roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LPBK_S, 1); +- roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S, 0); +- roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S, 1); ++ hr_reg_enable(swt, VF_SWITCH_ALW_LPBK); ++ hr_reg_clear(swt, VF_SWITCH_ALW_LCL_LPBK); ++ hr_reg_enable(swt, VF_SWITCH_ALW_DST_OVRD); + + return hns_roce_cmq_send(hr_dev, &desc, 1); + } +@@ -1939,7 +1970,7 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->num_mtpts = HNS_ROCE_V2_MAX_MTPT_NUM; caps->num_pds = HNS_ROCE_V2_MAX_PD_NUM; caps->num_qpc_timer = HNS_ROCE_V2_MAX_QPC_TIMER_NUM; @@ -143988,7 +173897,7 @@ index d5f3faa1627a4..1dbad159f3792 100644 caps->max_qp_init_rdma = HNS_ROCE_V2_MAX_QP_INIT_RDMA; caps->max_qp_dest_rdma = HNS_ROCE_V2_MAX_QP_DEST_RDMA; -@@ -2233,7 +2267,6 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) +@@ -2233,7 +2264,6 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->max_rq_sg = roundup_pow_of_two(caps->max_rq_sg); caps->max_extend_sg = le32_to_cpu(resp_a->max_extend_sg); caps->num_qpc_timer = le16_to_cpu(resp_a->num_qpc_timer); @@ -143996,6 +173905,16 @@ index d5f3faa1627a4..1dbad159f3792 100644 caps->max_srq_sges = le16_to_cpu(resp_a->max_srq_sges); caps->max_srq_sges = roundup_pow_of_two(caps->max_srq_sges); caps->num_aeq_vectors = resp_a->num_aeq_vectors; +@@ -2365,6 +2395,9 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) + V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_M, + V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_S); + ++ if (!(caps->page_size_cap & PAGE_SIZE)) ++ caps->page_size_cap = HNS_ROCE_V2_PAGE_SIZE_SUPPORTED; ++ + return 0; + } + @@ -2802,6 +2835,9 @@ static int v2_wait_mbox_complete(struct hns_roce_dev *hr_dev, u32 timeout, mb_st = (struct hns_roce_mbox_status *)desc.data; end = msecs_to_jiffies(timeout) + jiffies; @@ -144006,7 +173925,243 @@ index d5f3faa1627a4..1dbad159f3792 100644 status = 0; hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_MB_ST, true); -@@ -3328,7 +3364,7 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, +@@ -2906,10 +2942,8 @@ static int config_sgid_table(struct hns_roce_dev *hr_dev, + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SGID_TB, false); + +- roce_set_field(sgid_tb->table_idx_rsv, CFG_SGID_TB_TABLE_IDX_M, +- CFG_SGID_TB_TABLE_IDX_S, gid_index); +- roce_set_field(sgid_tb->vf_sgid_type_rsv, CFG_SGID_TB_VF_SGID_TYPE_M, +- CFG_SGID_TB_VF_SGID_TYPE_S, sgid_type); ++ hr_reg_write(sgid_tb, CFG_SGID_TB_TABLE_IDX, gid_index); ++ hr_reg_write(sgid_tb, CFG_SGID_TB_VF_SGID_TYPE, sgid_type); + + copy_gid(&sgid_tb->vf_sgid_l, gid); + +@@ -2944,19 +2978,14 @@ static int config_gmv_table(struct hns_roce_dev *hr_dev, + + copy_gid(&tb_a->vf_sgid_l, gid); + +- roce_set_field(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_SGID_TYPE_M, +- CFG_GMV_TB_VF_SGID_TYPE_S, sgid_type); +- roce_set_bit(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_VLAN_EN_S, +- vlan_id < VLAN_CFI_MASK); +- roce_set_field(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_VLAN_ID_M, +- CFG_GMV_TB_VF_VLAN_ID_S, vlan_id); ++ hr_reg_write(tb_a, GMV_TB_A_VF_SGID_TYPE, sgid_type); ++ hr_reg_write(tb_a, GMV_TB_A_VF_VLAN_EN, vlan_id < VLAN_CFI_MASK); ++ hr_reg_write(tb_a, GMV_TB_A_VF_VLAN_ID, vlan_id); + + tb_b->vf_smac_l = cpu_to_le32(*(u32 *)mac); +- roce_set_field(tb_b->vf_smac_h, CFG_GMV_TB_SMAC_H_M, +- CFG_GMV_TB_SMAC_H_S, *(u16 *)&mac[4]); + +- roce_set_field(tb_b->table_idx_rsv, CFG_GMV_TB_SGID_IDX_M, +- CFG_GMV_TB_SGID_IDX_S, gid_index); ++ hr_reg_write(tb_b, GMV_TB_B_SMAC_H, *(u16 *)&mac[4]); ++ hr_reg_write(tb_b, GMV_TB_B_SGID_IDX, gid_index); + + return hns_roce_cmq_send(hr_dev, desc, 2); + } +@@ -3005,10 +3034,8 @@ static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, + reg_smac_l = *(u32 *)(&addr[0]); + reg_smac_h = *(u16 *)(&addr[4]); + +- roce_set_field(smac_tb->tb_idx_rsv, CFG_SMAC_TB_IDX_M, +- CFG_SMAC_TB_IDX_S, phy_port); +- roce_set_field(smac_tb->vf_smac_h_rsv, CFG_SMAC_TB_VF_SMAC_H_M, +- CFG_SMAC_TB_VF_SMAC_H_S, reg_smac_h); ++ hr_reg_write(smac_tb, CFG_SMAC_TB_IDX, phy_port); ++ hr_reg_write(smac_tb, CFG_SMAC_TB_VF_SMAC_H, reg_smac_h); + smac_tb->vf_smac_l = cpu_to_le32(reg_smac_l); + + return hns_roce_cmq_send(hr_dev, &desc, 1); +@@ -3024,7 +3051,8 @@ static int set_mtpt_pbl(struct hns_roce_dev *hr_dev, + int i, count; + + count = hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, pages, +- ARRAY_SIZE(pages), &pbl_ba); ++ min_t(int, ARRAY_SIZE(pages), mr->npages), ++ &pbl_ba); + if (count < 1) { + ibdev_err(ibdev, "failed to find PBL mtr, count = %d.\n", + count); +@@ -3037,21 +3065,15 @@ static int set_mtpt_pbl(struct hns_roce_dev *hr_dev, + + mpt_entry->pbl_size = cpu_to_le32(mr->npages); + mpt_entry->pbl_ba_l = cpu_to_le32(pbl_ba >> 3); +- roce_set_field(mpt_entry->byte_48_mode_ba, +- V2_MPT_BYTE_48_PBL_BA_H_M, V2_MPT_BYTE_48_PBL_BA_H_S, +- upper_32_bits(pbl_ba >> 3)); ++ hr_reg_write(mpt_entry, MPT_PBL_BA_H, upper_32_bits(pbl_ba >> 3)); + + mpt_entry->pa0_l = cpu_to_le32(lower_32_bits(pages[0])); +- roce_set_field(mpt_entry->byte_56_pa0_h, V2_MPT_BYTE_56_PA0_H_M, +- V2_MPT_BYTE_56_PA0_H_S, upper_32_bits(pages[0])); ++ hr_reg_write(mpt_entry, MPT_PA0_H, upper_32_bits(pages[0])); + + mpt_entry->pa1_l = cpu_to_le32(lower_32_bits(pages[1])); +- roce_set_field(mpt_entry->byte_64_buf_pa1, V2_MPT_BYTE_64_PA1_H_M, +- V2_MPT_BYTE_64_PA1_H_S, upper_32_bits(pages[1])); +- roce_set_field(mpt_entry->byte_64_buf_pa1, +- V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M, +- V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S, +- to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift)); ++ hr_reg_write(mpt_entry, MPT_PA1_H, upper_32_bits(pages[1])); ++ hr_reg_write(mpt_entry, MPT_PBL_BUF_PG_SZ, ++ to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift)); + + return 0; + } +@@ -3068,7 +3090,6 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev, + + hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_VALID); + hr_reg_write(mpt_entry, MPT_PD, mr->pd); +- hr_reg_enable(mpt_entry, MPT_L_INV_EN); + + hr_reg_write_bool(mpt_entry, MPT_BIND_EN, + mr->access & IB_ACCESS_MW_BIND); +@@ -3113,24 +3134,19 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev, + u32 mr_access_flags = mr->access; + int ret = 0; + +- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M, +- V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_VALID); +- +- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, +- V2_MPT_BYTE_4_PD_S, mr->pd); ++ hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_VALID); ++ hr_reg_write(mpt_entry, MPT_PD, mr->pd); + + if (flags & IB_MR_REREG_ACCESS) { +- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, +- V2_MPT_BYTE_8_BIND_EN_S, ++ hr_reg_write(mpt_entry, MPT_BIND_EN, + (mr_access_flags & IB_ACCESS_MW_BIND ? 1 : 0)); +- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, +- V2_MPT_BYTE_8_ATOMIC_EN_S, ++ hr_reg_write(mpt_entry, MPT_ATOMIC_EN, + mr_access_flags & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0); +- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S, ++ hr_reg_write(mpt_entry, MPT_RR_EN, + mr_access_flags & IB_ACCESS_REMOTE_READ ? 1 : 0); +- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S, ++ hr_reg_write(mpt_entry, MPT_RW_EN, + mr_access_flags & IB_ACCESS_REMOTE_WRITE ? 1 : 0); +- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_LW_EN_S, ++ hr_reg_write(mpt_entry, MPT_LW_EN, + mr_access_flags & IB_ACCESS_LOCAL_WRITE ? 1 : 0); + } + +@@ -3161,37 +3177,27 @@ static int hns_roce_v2_frmr_write_mtpt(struct hns_roce_dev *hr_dev, + return -ENOBUFS; + } + +- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M, +- V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE); +- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M, +- V2_MPT_BYTE_4_PBL_HOP_NUM_S, 1); +- roce_set_field(mpt_entry->byte_4_pd_hop_st, +- V2_MPT_BYTE_4_PBL_BA_PG_SZ_M, +- V2_MPT_BYTE_4_PBL_BA_PG_SZ_S, +- to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift)); +- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, +- V2_MPT_BYTE_4_PD_S, mr->pd); ++ hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_FREE); ++ hr_reg_write(mpt_entry, MPT_PD, mr->pd); + +- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 1); +- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1); +- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1); ++ hr_reg_enable(mpt_entry, MPT_RA_EN); ++ hr_reg_enable(mpt_entry, MPT_R_INV_EN); + +- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_FRE_S, 1); +- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0); +- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 0); +- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1); ++ hr_reg_enable(mpt_entry, MPT_FRE); ++ hr_reg_clear(mpt_entry, MPT_MR_MW); ++ hr_reg_enable(mpt_entry, MPT_BPD); ++ hr_reg_clear(mpt_entry, MPT_PA); ++ ++ hr_reg_write(mpt_entry, MPT_PBL_HOP_NUM, 1); ++ hr_reg_write(mpt_entry, MPT_PBL_BA_PG_SZ, ++ to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift)); ++ hr_reg_write(mpt_entry, MPT_PBL_BUF_PG_SZ, ++ to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift)); + + mpt_entry->pbl_size = cpu_to_le32(mr->npages); + + mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(pbl_ba >> 3)); +- roce_set_field(mpt_entry->byte_48_mode_ba, V2_MPT_BYTE_48_PBL_BA_H_M, +- V2_MPT_BYTE_48_PBL_BA_H_S, +- upper_32_bits(pbl_ba >> 3)); +- +- roce_set_field(mpt_entry->byte_64_buf_pa1, +- V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M, +- V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S, +- to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift)); ++ hr_reg_write(mpt_entry, MPT_PBL_BA_H, upper_32_bits(pbl_ba >> 3)); + + return 0; + } +@@ -3203,36 +3209,28 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw) + mpt_entry = mb_buf; + memset(mpt_entry, 0, sizeof(*mpt_entry)); + +- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M, +- V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE); +- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, +- V2_MPT_BYTE_4_PD_S, mw->pdn); +- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M, +- V2_MPT_BYTE_4_PBL_HOP_NUM_S, +- mw->pbl_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : +- mw->pbl_hop_num); +- roce_set_field(mpt_entry->byte_4_pd_hop_st, +- V2_MPT_BYTE_4_PBL_BA_PG_SZ_M, +- V2_MPT_BYTE_4_PBL_BA_PG_SZ_S, +- mw->pbl_ba_pg_sz + PG_SHIFT_OFFSET); +- +- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1); +- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1); +- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_LW_EN_S, 1); +- +- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0); +- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 1); +- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1); +- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BQP_S, +- mw->ibmw.type == IB_MW_TYPE_1 ? 0 : 1); ++ hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_FREE); ++ hr_reg_write(mpt_entry, MPT_PD, mw->pdn); ++ ++ hr_reg_enable(mpt_entry, MPT_R_INV_EN); ++ hr_reg_enable(mpt_entry, MPT_LW_EN); + +- roce_set_field(mpt_entry->byte_64_buf_pa1, +- V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M, +- V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S, +- mw->pbl_buf_pg_sz + PG_SHIFT_OFFSET); ++ hr_reg_enable(mpt_entry, MPT_MR_MW); ++ hr_reg_enable(mpt_entry, MPT_BPD); ++ hr_reg_clear(mpt_entry, MPT_PA); ++ hr_reg_write(mpt_entry, MPT_BQP, ++ mw->ibmw.type == IB_MW_TYPE_1 ? 0 : 1); + + mpt_entry->lkey = cpu_to_le32(mw->rkey); + ++ hr_reg_write(mpt_entry, MPT_PBL_HOP_NUM, ++ mw->pbl_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : ++ mw->pbl_hop_num); ++ hr_reg_write(mpt_entry, MPT_PBL_BA_PG_SZ, ++ mw->pbl_ba_pg_sz + PG_SHIFT_OFFSET); ++ hr_reg_write(mpt_entry, MPT_PBL_BUF_PG_SZ, ++ mw->pbl_buf_pg_sz + PG_SHIFT_OFFSET); ++ + return 0; + } + +@@ -3328,7 +3326,7 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, memset(cq_context, 0, sizeof(*cq_context)); hr_reg_write(cq_context, CQC_CQ_ST, V2_CQ_STATE_VALID); @@ -144015,7 +174170,25 @@ index d5f3faa1627a4..1dbad159f3792 100644 hr_reg_write(cq_context, CQC_SHIFT, ilog2(hr_cq->cq_depth)); hr_reg_write(cq_context, CQC_CEQN, hr_cq->vector); hr_reg_write(cq_context, CQC_CQN, hr_cq->cqn); -@@ -4399,8 +4435,8 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, +@@ -3571,7 +3569,6 @@ static const u32 wc_send_op_map[] = { + HR_WC_OP_MAP(RDMA_READ, RDMA_READ), + HR_WC_OP_MAP(RDMA_WRITE, RDMA_WRITE), + HR_WC_OP_MAP(RDMA_WRITE_WITH_IMM, RDMA_WRITE), +- HR_WC_OP_MAP(LOCAL_INV, LOCAL_INV), + HR_WC_OP_MAP(ATOM_CMP_AND_SWAP, COMP_SWAP), + HR_WC_OP_MAP(ATOM_FETCH_AND_ADD, FETCH_ADD), + HR_WC_OP_MAP(ATOM_MSK_CMP_AND_SWAP, MASKED_COMP_SWAP), +@@ -3621,9 +3618,6 @@ static void fill_send_wc(struct ib_wc *wc, struct hns_roce_v2_cqe *cqe) + case HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM: + wc->wc_flags |= IB_WC_WITH_IMM; + break; +- case HNS_ROCE_V2_WQE_OP_LOCAL_INV: +- wc->wc_flags |= IB_WC_WITH_INVALIDATE; +- break; + case HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP: + case HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD: + case HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP: +@@ -4399,8 +4393,8 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, mtu = ib_mtu_enum_to_int(ib_mtu); if (WARN_ON(mtu <= 0)) return -EINVAL; @@ -144026,7 +174199,7 @@ index d5f3faa1627a4..1dbad159f3792 100644 lp_pktn_ini = ilog2(MAX_LP_MSG_LEN / mtu); if (WARN_ON(lp_pktn_ini >= 0xF)) return -EINVAL; -@@ -4802,6 +4838,30 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp, +@@ -4802,6 +4796,30 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp, return ret; } @@ -144057,7 +174230,7 @@ index d5f3faa1627a4..1dbad159f3792 100644 static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, -@@ -4811,6 +4871,7 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp, +@@ -4811,6 +4829,7 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); int ret = 0; @@ -144065,7 +174238,7 @@ index d5f3faa1627a4..1dbad159f3792 100644 if (attr_mask & IB_QP_AV) { ret = hns_roce_v2_set_path(ibqp, attr, attr_mask, context, -@@ -4820,12 +4881,10 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp, +@@ -4820,12 +4839,10 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp, } if (attr_mask & IB_QP_TIMEOUT) { @@ -144081,7 +174254,7 @@ index d5f3faa1627a4..1dbad159f3792 100644 } } -@@ -4882,7 +4941,9 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp, +@@ -4882,7 +4899,9 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp, set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask); if (attr_mask & IB_QP_MIN_RNR_TIMER) { @@ -144092,7 +174265,16 @@ index d5f3faa1627a4..1dbad159f3792 100644 hr_reg_clear(qpc_mask, QPC_MIN_RNR_TIME); } -@@ -5499,6 +5560,16 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) +@@ -5138,6 +5157,8 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, + + rdma_ah_set_sl(&qp_attr->ah_attr, + hr_reg_read(&context, QPC_SL)); ++ rdma_ah_set_port_num(&qp_attr->ah_attr, hr_qp->port + 1); ++ rdma_ah_set_ah_flags(&qp_attr->ah_attr, IB_AH_GRH); + grh->flow_label = hr_reg_read(&context, QPC_FL); + grh->sgid_index = hr_reg_read(&context, QPC_GMV_IDX); + grh->hop_limit = hr_reg_read(&context, QPC_HOPLIMIT); +@@ -5499,6 +5520,16 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) hr_reg_write(cq_context, CQC_CQ_MAX_CNT, cq_count); hr_reg_clear(cqc_mask, CQC_CQ_MAX_CNT); @@ -144109,7 +174291,7 @@ index d5f3faa1627a4..1dbad159f3792 100644 hr_reg_write(cq_context, CQC_CQ_PERIOD, cq_period); hr_reg_clear(cqc_mask, CQC_CQ_PERIOD); -@@ -5783,8 +5854,8 @@ static irqreturn_t hns_roce_v2_msix_interrupt_abn(int irq, void *dev_id) +@@ -5783,8 +5814,8 @@ static irqreturn_t hns_roce_v2_msix_interrupt_abn(int irq, void *dev_id) dev_err(dev, "AEQ overflow!\n"); @@ -144120,7 +174302,7 @@ index d5f3faa1627a4..1dbad159f3792 100644 /* Set reset level for reset_event() */ if (ops->set_default_reset_request) -@@ -5894,6 +5965,15 @@ static int config_eqc(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq, +@@ -5894,6 +5925,15 @@ static int config_eqc(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq, hr_reg_write(eqc, EQC_EQ_PROD_INDX, HNS_ROCE_EQ_INIT_PROD_IDX); hr_reg_write(eqc, EQC_EQ_MAX_CNT, eq->eq_max_cnt); @@ -144136,7 +174318,7 @@ index d5f3faa1627a4..1dbad159f3792 100644 hr_reg_write(eqc, EQC_EQ_PERIOD, eq->eq_period); hr_reg_write(eqc, EQC_EQE_REPORT_TIMER, HNS_ROCE_EQ_INIT_REPORT_TIMER); hr_reg_write(eqc, EQC_EQE_BA_L, bt_ba >> 3); -@@ -6397,10 +6477,8 @@ static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle) +@@ -6397,10 +6437,8 @@ static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle) if (!hr_dev) return 0; @@ -144148,7 +174330,7 @@ index d5f3faa1627a4..1dbad159f3792 100644 return 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h -index 4d904d5e82be4..d3d5b5f57052c 100644 +index 4d904d5e82be4..67f5b6fcfa1b1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -51,7 +51,7 @@ @@ -144169,7 +174351,255 @@ index 4d904d5e82be4..d3d5b5f57052c 100644 #define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2 #define HNS_ROCE_INVALID_LKEY 0x0 #define HNS_ROCE_INVALID_SGE_LENGTH 0x80000000 -@@ -1444,6 +1444,14 @@ struct hns_roce_dip { +@@ -184,7 +184,6 @@ enum { + HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP = 0x8, + HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD = 0x9, + HNS_ROCE_V2_WQE_OP_FAST_REG_PMR = 0xa, +- HNS_ROCE_V2_WQE_OP_LOCAL_INV = 0xb, + HNS_ROCE_V2_WQE_OP_BIND_MW = 0xc, + HNS_ROCE_V2_WQE_OP_MASK = 0x1f, + }; +@@ -277,6 +276,11 @@ enum hns_roce_cmd_return_status { + CMD_OTHER_ERR = 0xff + }; + ++struct hns_roce_cmd_errcode { ++ enum hns_roce_cmd_return_status return_status; ++ int errno; ++}; ++ + enum hns_roce_sgid_type { + GID_TYPE_FLAG_ROCE_V1 = 0, + GID_TYPE_FLAG_ROCE_V2_IPV4, +@@ -790,12 +794,15 @@ struct hns_roce_v2_mpt_entry { + #define MPT_LKEY MPT_FIELD_LOC(223, 192) + #define MPT_VA MPT_FIELD_LOC(287, 224) + #define MPT_PBL_SIZE MPT_FIELD_LOC(319, 288) +-#define MPT_PBL_BA MPT_FIELD_LOC(380, 320) ++#define MPT_PBL_BA_L MPT_FIELD_LOC(351, 320) ++#define MPT_PBL_BA_H MPT_FIELD_LOC(380, 352) + #define MPT_BLK_MODE MPT_FIELD_LOC(381, 381) + #define MPT_RSV0 MPT_FIELD_LOC(383, 382) +-#define MPT_PA0 MPT_FIELD_LOC(441, 384) ++#define MPT_PA0_L MPT_FIELD_LOC(415, 384) ++#define MPT_PA0_H MPT_FIELD_LOC(441, 416) + #define MPT_BOUND_VA MPT_FIELD_LOC(447, 442) +-#define MPT_PA1 MPT_FIELD_LOC(505, 448) ++#define MPT_PA1_L MPT_FIELD_LOC(479, 448) ++#define MPT_PA1_H MPT_FIELD_LOC(505, 480) + #define MPT_PERSIST_EN MPT_FIELD_LOC(506, 506) + #define MPT_RSV2 MPT_FIELD_LOC(507, 507) + #define MPT_PBL_BUF_PG_SZ MPT_FIELD_LOC(511, 508) +@@ -901,48 +908,24 @@ struct hns_roce_v2_ud_send_wqe { + u8 dgid[GID_LEN_V2]; + }; + +-#define V2_UD_SEND_WQE_BYTE_4_OPCODE_S 0 +-#define V2_UD_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0) +- +-#define V2_UD_SEND_WQE_BYTE_4_OWNER_S 7 +- +-#define V2_UD_SEND_WQE_BYTE_4_CQE_S 8 +- +-#define V2_UD_SEND_WQE_BYTE_4_SE_S 11 +- +-#define V2_UD_SEND_WQE_BYTE_16_PD_S 0 +-#define V2_UD_SEND_WQE_BYTE_16_PD_M GENMASK(23, 0) +- +-#define V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S 24 +-#define V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M GENMASK(31, 24) +- +-#define V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0 +-#define V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0) +- +-#define V2_UD_SEND_WQE_BYTE_24_UDPSPN_S 16 +-#define V2_UD_SEND_WQE_BYTE_24_UDPSPN_M GENMASK(31, 16) +- +-#define V2_UD_SEND_WQE_BYTE_32_DQPN_S 0 +-#define V2_UD_SEND_WQE_BYTE_32_DQPN_M GENMASK(23, 0) +- +-#define V2_UD_SEND_WQE_BYTE_36_VLAN_S 0 +-#define V2_UD_SEND_WQE_BYTE_36_VLAN_M GENMASK(15, 0) +- +-#define V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S 16 +-#define V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M GENMASK(23, 16) +- +-#define V2_UD_SEND_WQE_BYTE_36_TCLASS_S 24 +-#define V2_UD_SEND_WQE_BYTE_36_TCLASS_M GENMASK(31, 24) +- +-#define V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S 0 +-#define V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M GENMASK(19, 0) +- +-#define V2_UD_SEND_WQE_BYTE_40_SL_S 20 +-#define V2_UD_SEND_WQE_BYTE_40_SL_M GENMASK(23, 20) +- +-#define V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S 30 +- +-#define V2_UD_SEND_WQE_BYTE_40_LBI_S 31 ++#define UD_SEND_WQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_ud_send_wqe, h, l) ++ ++#define UD_SEND_WQE_OPCODE UD_SEND_WQE_FIELD_LOC(4, 0) ++#define UD_SEND_WQE_OWNER UD_SEND_WQE_FIELD_LOC(7, 7) ++#define UD_SEND_WQE_CQE UD_SEND_WQE_FIELD_LOC(8, 8) ++#define UD_SEND_WQE_SE UD_SEND_WQE_FIELD_LOC(11, 11) ++#define UD_SEND_WQE_PD UD_SEND_WQE_FIELD_LOC(119, 96) ++#define UD_SEND_WQE_SGE_NUM UD_SEND_WQE_FIELD_LOC(127, 120) ++#define UD_SEND_WQE_MSG_START_SGE_IDX UD_SEND_WQE_FIELD_LOC(151, 128) ++#define UD_SEND_WQE_UDPSPN UD_SEND_WQE_FIELD_LOC(191, 176) ++#define UD_SEND_WQE_DQPN UD_SEND_WQE_FIELD_LOC(247, 224) ++#define UD_SEND_WQE_VLAN UD_SEND_WQE_FIELD_LOC(271, 256) ++#define UD_SEND_WQE_HOPLIMIT UD_SEND_WQE_FIELD_LOC(279, 272) ++#define UD_SEND_WQE_TCLASS UD_SEND_WQE_FIELD_LOC(287, 280) ++#define UD_SEND_WQE_FLOW_LABEL UD_SEND_WQE_FIELD_LOC(307, 288) ++#define UD_SEND_WQE_SL UD_SEND_WQE_FIELD_LOC(311, 308) ++#define UD_SEND_WQE_VLAN_EN UD_SEND_WQE_FIELD_LOC(318, 318) ++#define UD_SEND_WQE_LBI UD_SEND_WQE_FIELD_LOC(319, 319) + + struct hns_roce_v2_rc_send_wqe { + __le32 byte_4; +@@ -957,42 +940,22 @@ struct hns_roce_v2_rc_send_wqe { + __le64 va; + }; + +-#define V2_RC_SEND_WQE_BYTE_4_OPCODE_S 0 +-#define V2_RC_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0) +- +-#define V2_RC_SEND_WQE_BYTE_4_DB_SL_L_S 5 +-#define V2_RC_SEND_WQE_BYTE_4_DB_SL_L_M GENMASK(6, 5) +- +-#define V2_RC_SEND_WQE_BYTE_4_DB_SL_H_S 13 +-#define V2_RC_SEND_WQE_BYTE_4_DB_SL_H_M GENMASK(14, 13) +- +-#define V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_S 15 +-#define V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_M GENMASK(30, 15) +- +-#define V2_RC_SEND_WQE_BYTE_4_OWNER_S 7 +- +-#define V2_RC_SEND_WQE_BYTE_4_CQE_S 8 +- +-#define V2_RC_SEND_WQE_BYTE_4_FENCE_S 9 +- +-#define V2_RC_SEND_WQE_BYTE_4_SO_S 10 +- +-#define V2_RC_SEND_WQE_BYTE_4_SE_S 11 +- +-#define V2_RC_SEND_WQE_BYTE_4_INLINE_S 12 +- +-#define V2_RC_SEND_WQE_BYTE_4_FLAG_S 31 +- +-#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_S 0 +-#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_M GENMASK(23, 0) +- +-#define V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S 24 +-#define V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M GENMASK(31, 24) +- +-#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0 +-#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0) +- +-#define V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S 31 ++#define RC_SEND_WQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_rc_send_wqe, h, l) ++ ++#define RC_SEND_WQE_OPCODE RC_SEND_WQE_FIELD_LOC(4, 0) ++#define RC_SEND_WQE_DB_SL_L RC_SEND_WQE_FIELD_LOC(6, 5) ++#define RC_SEND_WQE_DB_SL_H RC_SEND_WQE_FIELD_LOC(14, 13) ++#define RC_SEND_WQE_OWNER RC_SEND_WQE_FIELD_LOC(7, 7) ++#define RC_SEND_WQE_CQE RC_SEND_WQE_FIELD_LOC(8, 8) ++#define RC_SEND_WQE_FENCE RC_SEND_WQE_FIELD_LOC(9, 9) ++#define RC_SEND_WQE_SE RC_SEND_WQE_FIELD_LOC(11, 11) ++#define RC_SEND_WQE_INLINE RC_SEND_WQE_FIELD_LOC(12, 12) ++#define RC_SEND_WQE_WQE_INDEX RC_SEND_WQE_FIELD_LOC(30, 15) ++#define RC_SEND_WQE_FLAG RC_SEND_WQE_FIELD_LOC(31, 31) ++#define RC_SEND_WQE_XRC_SRQN RC_SEND_WQE_FIELD_LOC(119, 96) ++#define RC_SEND_WQE_SGE_NUM RC_SEND_WQE_FIELD_LOC(127, 120) ++#define RC_SEND_WQE_MSG_START_SGE_IDX RC_SEND_WQE_FIELD_LOC(151, 128) ++#define RC_SEND_WQE_INL_TYPE RC_SEND_WQE_FIELD_LOC(159, 159) + + struct hns_roce_wqe_frmr_seg { + __le32 pbl_size; +@@ -1114,12 +1077,12 @@ struct hns_roce_vf_switch { + __le32 resv3; + }; + +-#define VF_SWITCH_DATA_FUN_ID_VF_ID_S 3 +-#define VF_SWITCH_DATA_FUN_ID_VF_ID_M GENMASK(10, 3) ++#define VF_SWITCH_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_vf_switch, h, l) + +-#define VF_SWITCH_DATA_CFG_ALW_LPBK_S 1 +-#define VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S 2 +-#define VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S 3 ++#define VF_SWITCH_VF_ID VF_SWITCH_FIELD_LOC(42, 35) ++#define VF_SWITCH_ALW_LPBK VF_SWITCH_FIELD_LOC(65, 65) ++#define VF_SWITCH_ALW_LCL_LPBK VF_SWITCH_FIELD_LOC(66, 66) ++#define VF_SWITCH_ALW_DST_OVRD VF_SWITCH_FIELD_LOC(67, 67) + + struct hns_roce_post_mbox { + __le32 in_param_l; +@@ -1182,11 +1145,10 @@ struct hns_roce_cfg_sgid_tb { + __le32 vf_sgid_type_rsv; + }; + +-#define CFG_SGID_TB_TABLE_IDX_S 0 +-#define CFG_SGID_TB_TABLE_IDX_M GENMASK(7, 0) ++#define SGID_TB_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_sgid_tb, h, l) + +-#define CFG_SGID_TB_VF_SGID_TYPE_S 0 +-#define CFG_SGID_TB_VF_SGID_TYPE_M GENMASK(1, 0) ++#define CFG_SGID_TB_TABLE_IDX SGID_TB_FIELD_LOC(7, 0) ++#define CFG_SGID_TB_VF_SGID_TYPE SGID_TB_FIELD_LOC(161, 160) + + struct hns_roce_cfg_smac_tb { + __le32 tb_idx_rsv; +@@ -1194,11 +1156,11 @@ struct hns_roce_cfg_smac_tb { + __le32 vf_smac_h_rsv; + __le32 rsv[3]; + }; +-#define CFG_SMAC_TB_IDX_S 0 +-#define CFG_SMAC_TB_IDX_M GENMASK(7, 0) + +-#define CFG_SMAC_TB_VF_SMAC_H_S 0 +-#define CFG_SMAC_TB_VF_SMAC_H_M GENMASK(15, 0) ++#define SMAC_TB_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_smac_tb, h, l) ++ ++#define CFG_SMAC_TB_IDX SMAC_TB_FIELD_LOC(7, 0) ++#define CFG_SMAC_TB_VF_SMAC_H SMAC_TB_FIELD_LOC(79, 64) + + struct hns_roce_cfg_gmv_tb_a { + __le32 vf_sgid_l; +@@ -1209,16 +1171,11 @@ struct hns_roce_cfg_gmv_tb_a { + __le32 resv; + }; + +-#define CFG_GMV_TB_SGID_IDX_S 0 +-#define CFG_GMV_TB_SGID_IDX_M GENMASK(7, 0) ++#define GMV_TB_A_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_gmv_tb_a, h, l) + +-#define CFG_GMV_TB_VF_SGID_TYPE_S 0 +-#define CFG_GMV_TB_VF_SGID_TYPE_M GENMASK(1, 0) +- +-#define CFG_GMV_TB_VF_VLAN_EN_S 2 +- +-#define CFG_GMV_TB_VF_VLAN_ID_S 16 +-#define CFG_GMV_TB_VF_VLAN_ID_M GENMASK(27, 16) ++#define GMV_TB_A_VF_SGID_TYPE GMV_TB_A_FIELD_LOC(129, 128) ++#define GMV_TB_A_VF_VLAN_EN GMV_TB_A_FIELD_LOC(130, 130) ++#define GMV_TB_A_VF_VLAN_ID GMV_TB_A_FIELD_LOC(155, 144) + + struct hns_roce_cfg_gmv_tb_b { + __le32 vf_smac_l; +@@ -1227,8 +1184,10 @@ struct hns_roce_cfg_gmv_tb_b { + __le32 resv[3]; + }; + +-#define CFG_GMV_TB_SMAC_H_S 0 +-#define CFG_GMV_TB_SMAC_H_M GENMASK(15, 0) ++#define GMV_TB_B_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_gmv_tb_b, h, l) ++ ++#define GMV_TB_B_SMAC_H GMV_TB_B_FIELD_LOC(47, 32) ++#define GMV_TB_B_SGID_IDX GMV_TB_B_FIELD_LOC(71, 64) + + #define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM 5 + struct hns_roce_query_pf_caps_a { +@@ -1444,6 +1403,14 @@ struct hns_roce_dip { struct list_head node; /* all dips are on a list */ }; @@ -144217,7 +174647,7 @@ index 5d39bd08582af..13c8195b5c3a6 100644 dev_err(dev, "Failed to init CQC timer memory, aborting.\n"); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c -index 7089ac7802913..20360df25771c 100644 +index 7089ac7802913..a593c142cd6ba 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -272,7 +272,6 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, @@ -144228,6 +174658,19 @@ index 7089ac7802913..20360df25771c 100644 return &mr->ibmr; +@@ -416,10 +415,10 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + + return &mr->ibmr; + +-err_key: +- free_mr_key(hr_dev, mr); + err_pbl: + free_mr_pbl(hr_dev, mr); ++err_key: ++ free_mr_key(hr_dev, mr); + err_free: + kfree(mr); + return ERR_PTR(ret); diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 9af4509894e68..5d50d2d1deca9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -144260,7 +174703,7 @@ index 6eee9deadd122..e64ef6903fb4f 100644 } diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c -index 6dea0a49d1718..b08c67bb264c9 100644 +index 6dea0a49d1718..a8ec3d8f6e465 100644 --- a/drivers/infiniband/hw/irdma/cm.c +++ b/drivers/infiniband/hw/irdma/cm.c @@ -1477,12 +1477,13 @@ irdma_find_listener(struct irdma_cm_core *cm_core, u32 *dst_addr, u16 dst_port, @@ -144282,7 +174725,17 @@ index 6dea0a49d1718..b08c67bb264c9 100644 refcount_inc(&listen_node->refcnt); spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); -@@ -2305,10 +2306,8 @@ err: +@@ -1722,6 +1723,9 @@ irdma_add_mqh_4(struct irdma_device *iwdev, struct irdma_cm_info *cm_info, + continue; + + idev = in_dev_get(ip_dev); ++ if (!idev) ++ continue; ++ + in_dev_for_each_ifa_rtnl(ifa, idev) { + ibdev_dbg(&iwdev->ibdev, + "CM: Allocating child CM Listener forIP=%pI4, vlan_id=%d, MAC=%pM\n", +@@ -2305,10 +2309,8 @@ err: return NULL; } @@ -144294,7 +174747,7 @@ index 6dea0a49d1718..b08c67bb264c9 100644 struct irdma_cm_core *cm_core = cm_node->cm_core; struct irdma_qp *iwqp; struct irdma_cm_info nfo; -@@ -2356,7 +2355,6 @@ static void irdma_cm_node_free_cb(struct rcu_head *rcu_head) +@@ -2356,7 +2358,6 @@ static void irdma_cm_node_free_cb(struct rcu_head *rcu_head) } cm_core->cm_free_ah(cm_node); @@ -144302,7 +174755,7 @@ index 6dea0a49d1718..b08c67bb264c9 100644 } /** -@@ -2384,8 +2382,9 @@ void irdma_rem_ref_cm_node(struct irdma_cm_node *cm_node) +@@ -2384,8 +2385,9 @@ void irdma_rem_ref_cm_node(struct irdma_cm_node *cm_node) spin_unlock_irqrestore(&cm_core->ht_lock, flags); @@ -144314,7 +174767,7 @@ index 6dea0a49d1718..b08c67bb264c9 100644 } /** -@@ -3244,15 +3243,10 @@ enum irdma_status_code irdma_setup_cm_core(struct irdma_device *iwdev, +@@ -3244,15 +3246,10 @@ enum irdma_status_code irdma_setup_cm_core(struct irdma_device *iwdev, */ void irdma_cleanup_cm_core(struct irdma_cm_core *cm_core) { @@ -144331,7 +174784,7 @@ index 6dea0a49d1718..b08c67bb264c9 100644 destroy_workqueue(cm_core->event_wq); cm_core->dev->ws_reset(&cm_core->iwdev->vsi); -@@ -3465,12 +3459,6 @@ static void irdma_cm_disconn_true(struct irdma_qp *iwqp) +@@ -3465,12 +3462,6 @@ static void irdma_cm_disconn_true(struct irdma_qp *iwqp) } cm_id = iwqp->cm_id; @@ -144344,7 +174797,7 @@ index 6dea0a49d1718..b08c67bb264c9 100644 original_hw_tcp_state = iwqp->hw_tcp_state; original_ibqp_state = iwqp->ibqp_state; last_ae = iwqp->last_aeq; -@@ -3492,11 +3480,11 @@ static void irdma_cm_disconn_true(struct irdma_qp *iwqp) +@@ -3492,11 +3483,11 @@ static void irdma_cm_disconn_true(struct irdma_qp *iwqp) disconn_status = -ECONNRESET; } @@ -144361,7 +174814,7 @@ index 6dea0a49d1718..b08c67bb264c9 100644 issue_close = 1; iwqp->cm_id = NULL; qp->term_flags = 0; -@@ -4234,10 +4222,6 @@ void irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr, +@@ -4234,10 +4225,6 @@ void irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr, struct irdma_cm_node *cm_node; struct list_head teardown_list; struct ib_qp_attr attr; @@ -144372,7 +174825,7 @@ index 6dea0a49d1718..b08c67bb264c9 100644 INIT_LIST_HEAD(&teardown_list); -@@ -4254,52 +4238,6 @@ void irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr, +@@ -4254,52 +4241,6 @@ void irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr, irdma_cm_disconn(cm_node->iwqp); irdma_rem_ref_cm_node(cm_node); } @@ -145009,7 +175462,7 @@ index ac91ea5296db9..db7d0a3000699 100644 + return polarity != ukcq->polarity; +} diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c -index 102dc9342f2a2..911902d2b93e4 100644 +index 102dc9342f2a2..c5971a840b876 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -29,22 +29,25 @@ static int irdma_query_device(struct ib_device *ibdev, @@ -145041,7 +175494,56 @@ index 102dc9342f2a2..911902d2b93e4 100644 props->max_ah = rf->max_ah; props->max_mcast_grp = rf->max_mcg; props->max_mcast_qp_attach = IRDMA_MAX_MGS_PER_CTX; -@@ -1617,13 +1620,13 @@ int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, +@@ -57,36 +60,6 @@ static int irdma_query_device(struct ib_device *ibdev, + return 0; + } + +-/** +- * irdma_get_eth_speed_and_width - Get IB port speed and width from netdev speed +- * @link_speed: netdev phy link speed +- * @active_speed: IB port speed +- * @active_width: IB port width +- */ +-static void irdma_get_eth_speed_and_width(u32 link_speed, u16 *active_speed, +- u8 *active_width) +-{ +- if (link_speed <= SPEED_1000) { +- *active_width = IB_WIDTH_1X; +- *active_speed = IB_SPEED_SDR; +- } else if (link_speed <= SPEED_10000) { +- *active_width = IB_WIDTH_1X; +- *active_speed = IB_SPEED_FDR10; +- } else if (link_speed <= SPEED_20000) { +- *active_width = IB_WIDTH_4X; +- *active_speed = IB_SPEED_DDR; +- } else if (link_speed <= SPEED_25000) { +- *active_width = IB_WIDTH_1X; +- *active_speed = IB_SPEED_EDR; +- } else if (link_speed <= SPEED_40000) { +- *active_width = IB_WIDTH_4X; +- *active_speed = IB_SPEED_FDR10; +- } else { +- *active_width = IB_WIDTH_4X; +- *active_speed = IB_SPEED_EDR; +- } +-} +- + /** + * irdma_query_port - get port attributes + * @ibdev: device pointer from stack +@@ -114,8 +87,9 @@ static int irdma_query_port(struct ib_device *ibdev, u32 port, + props->state = IB_PORT_DOWN; + props->phys_state = IB_PORT_PHYS_STATE_DISABLED; + } +- irdma_get_eth_speed_and_width(SPEED_100000, &props->active_speed, +- &props->active_width); ++ ++ ib_get_eth_speed(ibdev, port, &props->active_speed, ++ &props->active_width); + + if (rdma_protocol_roce(ibdev, 1)) { + props->gid_tbl_len = 32; +@@ -1617,13 +1591,13 @@ int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, if (issue_modify_qp && iwqp->ibqp_state > IB_QPS_RTS) { if (dont_wait) { @@ -145057,7 +175559,7 @@ index 102dc9342f2a2..911902d2b93e4 100644 } else { int close_timer_started; -@@ -1759,11 +1762,11 @@ static int irdma_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) +@@ -1759,11 +1733,11 @@ static int irdma_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) spin_unlock_irqrestore(&iwcq->lock, flags); irdma_cq_wq_destroy(iwdev->rf, cq); @@ -145070,7 +175572,7 @@ index 102dc9342f2a2..911902d2b93e4 100644 return 0; } -@@ -2506,7 +2509,7 @@ static int irdma_dealloc_mw(struct ib_mw *ibmw) +@@ -2506,7 +2480,7 @@ static int irdma_dealloc_mw(struct ib_mw *ibmw) cqp_info = &cqp_request->info; info = &cqp_info->in.u.dealloc_stag.info; memset(info, 0, sizeof(*info)); @@ -145079,7 +175581,7 @@ index 102dc9342f2a2..911902d2b93e4 100644 info->stag_idx = ibmw->rkey >> IRDMA_CQPSQ_STAG_IDX_S; info->mr = false; cqp_info->cqp_cmd = IRDMA_OP_DEALLOC_STAG; -@@ -2776,7 +2779,7 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, +@@ -2776,7 +2750,7 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, if (req.reg_type == IRDMA_MEMREG_TYPE_MEM) { iwmr->page_size = ib_umem_find_best_pgsz(region, @@ -145088,7 +175590,7 @@ index 102dc9342f2a2..911902d2b93e4 100644 virt); if (unlikely(!iwmr->page_size)) { kfree(iwmr); -@@ -3018,7 +3021,7 @@ static int irdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) +@@ -3018,7 +2992,7 @@ static int irdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) cqp_info = &cqp_request->info; info = &cqp_info->in.u.dealloc_stag.info; memset(info, 0, sizeof(*info)); @@ -145097,7 +175599,7 @@ index 102dc9342f2a2..911902d2b93e4 100644 info->stag_idx = ib_mr->rkey >> IRDMA_CQPSQ_STAG_IDX_S; info->mr = true; if (iwpbl->pbl_allocated) -@@ -3356,6 +3359,8 @@ static enum ib_wc_status irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcode +@@ -3356,6 +3330,8 @@ static enum ib_wc_status irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcode return IB_WC_RETRY_EXC_ERR; case FLUSH_MW_BIND_ERR: return IB_WC_MW_BIND_ERR; @@ -145106,7 +175608,7 @@ index 102dc9342f2a2..911902d2b93e4 100644 case FLUSH_FATAL_ERR: default: return IB_WC_FATAL_ERR; -@@ -3604,18 +3609,31 @@ static int irdma_req_notify_cq(struct ib_cq *ibcq, +@@ -3604,18 +3580,31 @@ static int irdma_req_notify_cq(struct ib_cq *ibcq, struct irdma_cq *iwcq; struct irdma_cq_uk *ukcq; unsigned long flags; @@ -145229,6 +175731,30 @@ index 8662f462e2a5f..3a1a4ac9dd33d 100644 } err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp); +diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c +index 224ba36f2946c..1a0ecf439c099 100644 +--- a/drivers/infiniband/hw/mlx5/counters.c ++++ b/drivers/infiniband/hw/mlx5/counters.c +@@ -249,7 +249,6 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, + const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1); + struct mlx5_core_dev *mdev; + int ret, num_counters; +- u32 mdev_port_num; + + if (!stats) + return -EINVAL; +@@ -270,8 +269,9 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, + } + + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { +- mdev = mlx5_ib_get_native_port_mdev(dev, port_num, +- &mdev_port_num); ++ if (!port_num) ++ port_num = 1; ++ mdev = mlx5_ib_get_native_port_mdev(dev, port_num, NULL); + if (!mdev) { + /* If port is not affiliated yet, its in down state + * which doesn't have any counters yet, so it would be diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index e95967aefe788..21beded40066d 100644 --- a/drivers/infiniband/hw/mlx5/devx.c @@ -145342,6 +175868,108 @@ index d0d98e584ebcc..fcf6447b4a4e0 100644 return mlx5_base_mkey(mmkey->key) == mlx5_base_mkey(key); return mmkey->key == key; } +diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c +index e5abbcfc1d574..55b05a3e31b8e 100644 +--- a/drivers/infiniband/hw/mlx5/qp.c ++++ b/drivers/infiniband/hw/mlx5/qp.c +@@ -4499,6 +4499,40 @@ static bool mlx5_ib_modify_qp_allowed(struct mlx5_ib_dev *dev, + return false; + } + ++static int validate_rd_atomic(struct mlx5_ib_dev *dev, struct ib_qp_attr *attr, ++ int attr_mask, enum ib_qp_type qp_type) ++{ ++ int log_max_ra_res; ++ int log_max_ra_req; ++ ++ if (qp_type == MLX5_IB_QPT_DCI) { ++ log_max_ra_res = 1 << MLX5_CAP_GEN(dev->mdev, ++ log_max_ra_res_dc); ++ log_max_ra_req = 1 << MLX5_CAP_GEN(dev->mdev, ++ log_max_ra_req_dc); ++ } else { ++ log_max_ra_res = 1 << MLX5_CAP_GEN(dev->mdev, ++ log_max_ra_res_qp); ++ log_max_ra_req = 1 << MLX5_CAP_GEN(dev->mdev, ++ log_max_ra_req_qp); ++ } ++ ++ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && ++ attr->max_rd_atomic > log_max_ra_res) { ++ mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n", ++ attr->max_rd_atomic); ++ return false; ++ } ++ ++ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && ++ attr->max_dest_rd_atomic > log_max_ra_req) { ++ mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n", ++ attr->max_dest_rd_atomic); ++ return false; ++ } ++ return true; ++} ++ + int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) + { +@@ -4586,21 +4620,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + goto out; + } + +- if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && +- attr->max_rd_atomic > +- (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp))) { +- mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n", +- attr->max_rd_atomic); +- goto out; +- } +- +- if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && +- attr->max_dest_rd_atomic > +- (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp))) { +- mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n", +- attr->max_dest_rd_atomic); ++ if (!validate_rd_atomic(dev, attr, attr_mask, qp_type)) + goto out; +- } + + if (cur_state == new_state && cur_state == IB_QPS_RESET) { + err = 0; +diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c +index 755930be01b8e..6b59f97a182b0 100644 +--- a/drivers/infiniband/hw/qedr/main.c ++++ b/drivers/infiniband/hw/qedr/main.c +@@ -345,6 +345,10 @@ static int qedr_alloc_resources(struct qedr_dev *dev) + if (IS_IWARP(dev)) { + xa_init(&dev->qps); + dev->iwarp_wq = create_singlethread_workqueue("qedr_iwarpq"); ++ if (!dev->iwarp_wq) { ++ rc = -ENOMEM; ++ goto err1; ++ } + } + + /* Allocate Status blocks for CNQ */ +@@ -352,7 +356,7 @@ static int qedr_alloc_resources(struct qedr_dev *dev) + GFP_KERNEL); + if (!dev->sb_array) { + rc = -ENOMEM; +- goto err1; ++ goto err_destroy_wq; + } + + dev->cnq_array = kcalloc(dev->num_cnq, +@@ -403,6 +407,9 @@ err3: + kfree(dev->cnq_array); + err2: + kfree(dev->sb_array); ++err_destroy_wq: ++ if (IS_IWARP(dev)) ++ destroy_workqueue(dev->iwarp_wq); + err1: + kfree(dev->sgid_tbl); + return rc; diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 8def88cfa3009..db9ef3e1eb97c 100644 --- a/drivers/infiniband/hw/qedr/qedr.h @@ -145473,6 +176101,32 @@ index ac11943a5ddb0..bf2f30d67949d 100644 } pkt->addrlimit = addrlimit; +diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c +index 760b254ba42d6..48a57568cad69 100644 +--- a/drivers/infiniband/hw/usnic/usnic_uiom.c ++++ b/drivers/infiniband/hw/usnic/usnic_uiom.c +@@ -281,8 +281,8 @@ iter_chunk: + size = pa_end - pa_start + PAGE_SIZE; + usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x", + va_start, &pa_start, size, flags); +- err = iommu_map(pd->domain, va_start, pa_start, +- size, flags); ++ err = iommu_map_atomic(pd->domain, va_start, ++ pa_start, size, flags); + if (err) { + usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n", + va_start, &pa_start, size, err); +@@ -298,8 +298,8 @@ iter_chunk: + size = pa - pa_start + PAGE_SIZE; + usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x\n", + va_start, &pa_start, size, flags); +- err = iommu_map(pd->domain, va_start, pa_start, +- size, flags); ++ err = iommu_map_atomic(pd->domain, va_start, ++ pa_start, size, flags); + if (err) { + usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n", + va_start, &pa_start, size, err); diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 3305f2744bfaa..3acab569fbb94 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c @@ -145891,7 +176545,7 @@ index 742e6ec93686c..8723898701063 100644 RXE_PORT_BAD_PKEY_CNTR = 0, RXE_PORT_QKEY_VIOL_CNTR = 0, diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c -index 1ab6af7ddb254..57ebf4871608d 100644 +index 1ab6af7ddb254..d7a968356a9bb 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -195,6 +195,14 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp, @@ -145979,13 +176633,21 @@ index 1ab6af7ddb254..57ebf4871608d 100644 if (qp->sq.queue) { __rxe_do_task(&qp->comp.task); __rxe_do_task(&qp->req.task); -@@ -834,8 +835,10 @@ static void rxe_qp_do_cleanup(struct work_struct *work) +@@ -829,13 +830,15 @@ static void rxe_qp_do_cleanup(struct work_struct *work) + qp->resp.mr = NULL; + } +- if (qp_type(qp) == IB_QPT_RC) +- sk_dst_reset(qp->sk->sk); +- free_rd_atomic_resources(qp); - kernel_sock_shutdown(qp->sk, SHUT_RDWR); - sock_release(qp->sk); + if (qp->sk) { ++ if (qp_type(qp) == IB_QPT_RC) ++ sk_dst_reset(qp->sk->sk); ++ + kernel_sock_shutdown(qp->sk, SHUT_RDWR); + sock_release(qp->sk); + } @@ -147037,6 +177699,52 @@ index 7a5ed86ffc9f9..69d639cab8985 100644 } return; +diff --git a/drivers/infiniband/sw/siw/siw_cq.c b/drivers/infiniband/sw/siw/siw_cq.c +index d68e37859e73b..403029de6b92d 100644 +--- a/drivers/infiniband/sw/siw/siw_cq.c ++++ b/drivers/infiniband/sw/siw/siw_cq.c +@@ -56,8 +56,6 @@ int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc) + if (READ_ONCE(cqe->flags) & SIW_WQE_VALID) { + memset(wc, 0, sizeof(*wc)); + wc->wr_id = cqe->id; +- wc->status = map_cqe_status[cqe->status].ib; +- wc->opcode = map_wc_opcode[cqe->opcode]; + wc->byte_len = cqe->bytes; + + /* +@@ -71,10 +69,32 @@ int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc) + wc->wc_flags = IB_WC_WITH_INVALIDATE; + } + wc->qp = cqe->base_qp; ++ wc->opcode = map_wc_opcode[cqe->opcode]; ++ wc->status = map_cqe_status[cqe->status].ib; + siw_dbg_cq(cq, + "idx %u, type %d, flags %2x, id 0x%pK\n", + cq->cq_get % cq->num_cqe, cqe->opcode, + cqe->flags, (void *)(uintptr_t)cqe->id); ++ } else { ++ /* ++ * A malicious user may set invalid opcode or ++ * status in the user mmapped CQE array. ++ * Sanity check and correct values in that case ++ * to avoid out-of-bounds access to global arrays ++ * for opcode and status mapping. ++ */ ++ u8 opcode = cqe->opcode; ++ u16 status = cqe->status; ++ ++ if (opcode >= SIW_NUM_OPCODES) { ++ opcode = 0; ++ status = SIW_WC_GENERAL_ERR; ++ } else if (status >= SIW_NUM_WC_STATUS) { ++ status = SIW_WC_GENERAL_ERR; ++ } ++ wc->opcode = map_wc_opcode[opcode]; ++ wc->status = map_cqe_status[status].ib; ++ + } + WRITE_ONCE(cqe->flags, 0); + cq->cq_get++; diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c index 7e01f2438afc5..e6f634971228e 100644 --- a/drivers/infiniband/sw/siw/siw_qp.c @@ -147165,7 +177873,7 @@ index 60116f20653c7..fd721cc19682e 100644 spin_unlock_irqrestore(&qp->orq_lock, flags); diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c -index 1f4e60257700e..7d47b521070b1 100644 +index 1f4e60257700e..05052b49107f2 100644 --- a/drivers/infiniband/sw/siw/siw_qp_tx.c +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -29,7 +29,7 @@ static struct page *siw_get_pblpage(struct siw_mem *mem, u64 addr, int *idx) @@ -147173,7 +177881,7 @@ index 1f4e60257700e..7d47b521070b1 100644 if (paddr) - return virt_to_page(paddr); -+ return virt_to_page((void *)paddr); ++ return virt_to_page((void *)(uintptr_t)paddr); return NULL; } @@ -147205,7 +177913,7 @@ index 1f4e60257700e..7d47b521070b1 100644 } diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c -index 1b36350601faa..ff33659acffa9 100644 +index 1b36350601faa..9c7fbda9e068a 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -311,7 +311,8 @@ int siw_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, @@ -147235,6 +177943,100 @@ index 1b36350601faa..ff33659acffa9 100644 return 0; } +@@ -670,13 +674,45 @@ static int siw_copy_inline_sgl(const struct ib_send_wr *core_wr, + static int siw_sq_flush_wr(struct siw_qp *qp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) + { +- struct siw_sqe sqe = {}; + int rv = 0; + + while (wr) { +- sqe.id = wr->wr_id; +- sqe.opcode = wr->opcode; +- rv = siw_sqe_complete(qp, &sqe, 0, SIW_WC_WR_FLUSH_ERR); ++ struct siw_sqe sqe = {}; ++ ++ switch (wr->opcode) { ++ case IB_WR_RDMA_WRITE: ++ sqe.opcode = SIW_OP_WRITE; ++ break; ++ case IB_WR_RDMA_READ: ++ sqe.opcode = SIW_OP_READ; ++ break; ++ case IB_WR_RDMA_READ_WITH_INV: ++ sqe.opcode = SIW_OP_READ_LOCAL_INV; ++ break; ++ case IB_WR_SEND: ++ sqe.opcode = SIW_OP_SEND; ++ break; ++ case IB_WR_SEND_WITH_IMM: ++ sqe.opcode = SIW_OP_SEND_WITH_IMM; ++ break; ++ case IB_WR_SEND_WITH_INV: ++ sqe.opcode = SIW_OP_SEND_REMOTE_INV; ++ break; ++ case IB_WR_LOCAL_INV: ++ sqe.opcode = SIW_OP_INVAL_STAG; ++ break; ++ case IB_WR_REG_MR: ++ sqe.opcode = SIW_OP_REG_MR; ++ break; ++ default: ++ rv = -EINVAL; ++ break; ++ } ++ if (!rv) { ++ sqe.id = wr->wr_id; ++ rv = siw_sqe_complete(qp, &sqe, 0, ++ SIW_WC_WR_FLUSH_ERR); ++ } + if (rv) { + if (bad_wr) + *bad_wr = wr; +diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c +index 0aa8629fdf62e..1ea95f8009b82 100644 +--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c ++++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c +@@ -2197,6 +2197,14 @@ int ipoib_intf_init(struct ib_device *hca, u32 port, const char *name, + rn->attach_mcast = ipoib_mcast_attach; + rn->detach_mcast = ipoib_mcast_detach; + rn->hca = hca; ++ ++ rc = netif_set_real_num_tx_queues(dev, 1); ++ if (rc) ++ goto out; ++ ++ rc = netif_set_real_num_rx_queues(dev, 1); ++ if (rc) ++ goto out; + } + + priv->rn_ops = dev->netdev_ops; +diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c +index 5b05cf3837da1..28e9b70844e44 100644 +--- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c ++++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c +@@ -42,6 +42,11 @@ static const struct nla_policy ipoib_policy[IFLA_IPOIB_MAX + 1] = { + [IFLA_IPOIB_UMCAST] = { .type = NLA_U16 }, + }; + ++static unsigned int ipoib_get_max_num_queues(void) ++{ ++ return min_t(unsigned int, num_possible_cpus(), 128); ++} ++ + static int ipoib_fill_info(struct sk_buff *skb, const struct net_device *dev) + { + struct ipoib_dev_priv *priv = ipoib_priv(dev); +@@ -173,6 +178,8 @@ static struct rtnl_link_ops ipoib_link_ops __read_mostly = { + .changelink = ipoib_changelink, + .get_size = ipoib_get_size, + .fill_info = ipoib_fill_info, ++ .get_num_rx_queues = ipoib_get_max_num_queues, ++ .get_num_tx_queues = ipoib_get_max_num_queues, + }; + + struct rtnl_link_ops *ipoib_get_link_ops(void) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 776e46ee95dad..ef2d165d15a8b 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -147579,7 +178381,7 @@ index 4ee592ccf979b..dbf9a778c3bd7 100644 static struct attribute *rtrs_clt_attrs[] = { diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c -index bc8824b4ee0d4..c644617725a88 100644 +index bc8824b4ee0d4..54eb6556c63db 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -48,12 +48,12 @@ static struct class *rtrs_clt_dev_class; @@ -148192,7 +178994,7 @@ index bc8824b4ee0d4..c644617725a88 100644 void (*conf)(void *priv, int errno), struct rtrs_permit *permit, void *priv, const struct kvec *vec, size_t usr_len, -@@ -958,13 +959,13 @@ static void rtrs_clt_init_req(struct rtrs_clt_io_req *req, +@@ -958,15 +959,15 @@ static void rtrs_clt_init_req(struct rtrs_clt_io_req *req, req->sg_cnt = sg_cnt; req->priv = priv; req->dir = dir; @@ -148206,8 +179008,11 @@ index bc8824b4ee0d4..c644617725a88 100644 - req->mp_policy = sess->clt->mp_policy; + req->mp_policy = clt_path->clt->mp_policy; - iov_iter_kvec(&iter, READ, vec, 1, usr_len); +- iov_iter_kvec(&iter, READ, vec, 1, usr_len); ++ iov_iter_kvec(&iter, WRITE, vec, 1, usr_len); len = _copy_from_iter(req->iu->buf, usr_len, &iter); + WARN_ON(len != usr_len); + @@ -974,7 +975,7 @@ static void rtrs_clt_init_req(struct rtrs_clt_io_req *req, } @@ -152529,7 +183334,7 @@ index 859c79685daf3..c529b6d63c9a0 100644 int rtrs_srv_get_queue_depth(struct rtrs_srv *sess); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c -index 71eda91e810cf..2f4991cea98c6 100644 +index 71eda91e810cf..a6117a7d0ab17 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1955,7 +1955,8 @@ static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp) @@ -152560,7 +183365,215 @@ index 71eda91e810cf..2f4991cea98c6 100644 tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmnd)); ch_idx = blk_mq_unique_tag_to_hwq(tag); if (WARN_ON_ONCE(ch_idx >= target->ch_count)) -@@ -4038,9 +4037,11 @@ static void srp_remove_one(struct ib_device *device, void *client_data) +@@ -3398,7 +3397,8 @@ static int srp_parse_options(struct net *net, const char *buf, + break; + + case SRP_OPT_PKEY: +- if (match_hex(args, &token)) { ++ ret = match_hex(args, &token); ++ if (ret) { + pr_warn("bad P_Key parameter '%s'\n", p); + goto out; + } +@@ -3458,7 +3458,8 @@ static int srp_parse_options(struct net *net, const char *buf, + break; + + case SRP_OPT_MAX_SECT: +- if (match_int(args, &token)) { ++ ret = match_int(args, &token); ++ if (ret) { + pr_warn("bad max sect parameter '%s'\n", p); + goto out; + } +@@ -3466,8 +3467,15 @@ static int srp_parse_options(struct net *net, const char *buf, + break; + + case SRP_OPT_QUEUE_SIZE: +- if (match_int(args, &token) || token < 1) { ++ ret = match_int(args, &token); ++ if (ret) { ++ pr_warn("match_int() failed for queue_size parameter '%s', Error %d\n", ++ p, ret); ++ goto out; ++ } ++ if (token < 1) { + pr_warn("bad queue_size parameter '%s'\n", p); ++ ret = -EINVAL; + goto out; + } + target->scsi_host->can_queue = token; +@@ -3478,25 +3486,40 @@ static int srp_parse_options(struct net *net, const char *buf, + break; + + case SRP_OPT_MAX_CMD_PER_LUN: +- if (match_int(args, &token) || token < 1) { ++ ret = match_int(args, &token); ++ if (ret) { ++ pr_warn("match_int() failed for max cmd_per_lun parameter '%s', Error %d\n", ++ p, ret); ++ goto out; ++ } ++ if (token < 1) { + pr_warn("bad max cmd_per_lun parameter '%s'\n", + p); ++ ret = -EINVAL; + goto out; + } + target->scsi_host->cmd_per_lun = token; + break; + + case SRP_OPT_TARGET_CAN_QUEUE: +- if (match_int(args, &token) || token < 1) { ++ ret = match_int(args, &token); ++ if (ret) { ++ pr_warn("match_int() failed for max target_can_queue parameter '%s', Error %d\n", ++ p, ret); ++ goto out; ++ } ++ if (token < 1) { + pr_warn("bad max target_can_queue parameter '%s'\n", + p); ++ ret = -EINVAL; + goto out; + } + target->target_can_queue = token; + break; + + case SRP_OPT_IO_CLASS: +- if (match_hex(args, &token)) { ++ ret = match_hex(args, &token); ++ if (ret) { + pr_warn("bad IO class parameter '%s'\n", p); + goto out; + } +@@ -3505,6 +3528,7 @@ static int srp_parse_options(struct net *net, const char *buf, + pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n", + token, SRP_REV10_IB_IO_CLASS, + SRP_REV16A_IB_IO_CLASS); ++ ret = -EINVAL; + goto out; + } + target->io_class = token; +@@ -3527,16 +3551,24 @@ static int srp_parse_options(struct net *net, const char *buf, + break; + + case SRP_OPT_CMD_SG_ENTRIES: +- if (match_int(args, &token) || token < 1 || token > 255) { ++ ret = match_int(args, &token); ++ if (ret) { ++ pr_warn("match_int() failed for max cmd_sg_entries parameter '%s', Error %d\n", ++ p, ret); ++ goto out; ++ } ++ if (token < 1 || token > 255) { + pr_warn("bad max cmd_sg_entries parameter '%s'\n", + p); ++ ret = -EINVAL; + goto out; + } + target->cmd_sg_cnt = token; + break; + + case SRP_OPT_ALLOW_EXT_SG: +- if (match_int(args, &token)) { ++ ret = match_int(args, &token); ++ if (ret) { + pr_warn("bad allow_ext_sg parameter '%s'\n", p); + goto out; + } +@@ -3544,43 +3576,77 @@ static int srp_parse_options(struct net *net, const char *buf, + break; + + case SRP_OPT_SG_TABLESIZE: +- if (match_int(args, &token) || token < 1 || +- token > SG_MAX_SEGMENTS) { ++ ret = match_int(args, &token); ++ if (ret) { ++ pr_warn("match_int() failed for max sg_tablesize parameter '%s', Error %d\n", ++ p, ret); ++ goto out; ++ } ++ if (token < 1 || token > SG_MAX_SEGMENTS) { + pr_warn("bad max sg_tablesize parameter '%s'\n", + p); ++ ret = -EINVAL; + goto out; + } + target->sg_tablesize = token; + break; + + case SRP_OPT_COMP_VECTOR: +- if (match_int(args, &token) || token < 0) { ++ ret = match_int(args, &token); ++ if (ret) { ++ pr_warn("match_int() failed for comp_vector parameter '%s', Error %d\n", ++ p, ret); ++ goto out; ++ } ++ if (token < 0) { + pr_warn("bad comp_vector parameter '%s'\n", p); ++ ret = -EINVAL; + goto out; + } + target->comp_vector = token; + break; + + case SRP_OPT_TL_RETRY_COUNT: +- if (match_int(args, &token) || token < 2 || token > 7) { ++ ret = match_int(args, &token); ++ if (ret) { ++ pr_warn("match_int() failed for tl_retry_count parameter '%s', Error %d\n", ++ p, ret); ++ goto out; ++ } ++ if (token < 2 || token > 7) { + pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n", + p); ++ ret = -EINVAL; + goto out; + } + target->tl_retry_count = token; + break; + + case SRP_OPT_MAX_IT_IU_SIZE: +- if (match_int(args, &token) || token < 0) { ++ ret = match_int(args, &token); ++ if (ret) { ++ pr_warn("match_int() failed for max it_iu_size parameter '%s', Error %d\n", ++ p, ret); ++ goto out; ++ } ++ if (token < 0) { + pr_warn("bad maximum initiator to target IU size '%s'\n", p); ++ ret = -EINVAL; + goto out; + } + target->max_it_iu_size = token; + break; + + case SRP_OPT_CH_COUNT: +- if (match_int(args, &token) || token < 1) { ++ ret = match_int(args, &token); ++ if (ret) { ++ pr_warn("match_int() failed for channel count parameter '%s', Error %d\n", ++ p, ret); ++ goto out; ++ } ++ if (token < 1) { + pr_warn("bad channel count %s\n", p); ++ ret = -EINVAL; + goto out; + } + target->ch_count = token; +@@ -3589,6 +3655,7 @@ static int srp_parse_options(struct net *net, const char *buf, + default: + pr_warn("unknown parameter or missing value '%s' in target creation request\n", + p); ++ ret = -EINVAL; + goto out; + } + } +@@ -4038,9 +4105,11 @@ static void srp_remove_one(struct ib_device *device, void *client_data) spin_unlock(&host->target_lock); /* @@ -152574,6 +183587,32 @@ index 71eda91e810cf..2f4991cea98c6 100644 flush_workqueue(srp_remove_wq); kfree(host); +diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h +index abccddeea1e32..152242e8f733d 100644 +--- a/drivers/infiniband/ulp/srp/ib_srp.h ++++ b/drivers/infiniband/ulp/srp/ib_srp.h +@@ -62,9 +62,6 @@ enum { + SRP_DEFAULT_CMD_SQ_SIZE = SRP_DEFAULT_QUEUE_SIZE - SRP_RSP_SQ_SIZE - + SRP_TSK_MGMT_SQ_SIZE, + +- SRP_TAG_NO_REQ = ~0U, +- SRP_TAG_TSK_MGMT = 1U << 31, +- + SRP_MAX_PAGES_PER_MR = 512, + + SRP_MAX_ADD_CDB_LEN = 16, +@@ -79,6 +76,11 @@ enum { + sizeof(struct srp_imm_buf), + }; + ++enum { ++ SRP_TAG_NO_REQ = ~0U, ++ SRP_TAG_TSK_MGMT = BIT(31), ++}; ++ + enum srp_target_state { + SRP_TARGET_SCANNING, + SRP_TARGET_LIVE, diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 3cadf12954172..7b69b0c9e48d9 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -152931,8 +183970,20 @@ index ccaeb24263854..ba246fabc6c17 100644 switch (type) { case EV_KEY: __set_bit(code, dev->keybit); +diff --git a/drivers/input/joystick/Kconfig b/drivers/input/joystick/Kconfig +index 3b23078bc7b5b..db4135bbd279a 100644 +--- a/drivers/input/joystick/Kconfig ++++ b/drivers/input/joystick/Kconfig +@@ -46,6 +46,7 @@ config JOYSTICK_A3D + config JOYSTICK_ADC + tristate "Simple joystick connected over ADC" + depends on IIO ++ select IIO_BUFFER + select IIO_BUFFER_CB + help + Say Y here if you have a simple joystick connected over ADC. diff --git a/drivers/input/joystick/iforce/iforce-main.c b/drivers/input/joystick/iforce/iforce-main.c -index b2a68bc9f0b4d..b86de1312512b 100644 +index b2a68bc9f0b4d..84b87526b7ba3 100644 --- a/drivers/input/joystick/iforce/iforce-main.c +++ b/drivers/input/joystick/iforce/iforce-main.c @@ -50,6 +50,7 @@ static struct iforce_device iforce_device[] = { @@ -152943,6 +183994,33 @@ index b2a68bc9f0b4d..b86de1312512b 100644 { 0x05ef, 0x8888, "AVB Top Shot Force Feedback Racing Wheel", btn_wheel, abs_wheel, ff_iforce }, //? { 0x061c, 0xc0a4, "ACT LABS Force RS", btn_wheel, abs_wheel, ff_iforce }, //? { 0x061c, 0xc084, "ACT LABS Force RS", btn_wheel, abs_wheel, ff_iforce }, +@@ -272,22 +273,22 @@ int iforce_init_device(struct device *parent, u16 bustype, + * Get device info. + */ + +- if (!iforce_get_id_packet(iforce, 'M', buf, &len) || len < 3) ++ if (!iforce_get_id_packet(iforce, 'M', buf, &len) && len >= 3) + input_dev->id.vendor = get_unaligned_le16(buf + 1); + else + dev_warn(&iforce->dev->dev, "Device does not respond to id packet M\n"); + +- if (!iforce_get_id_packet(iforce, 'P', buf, &len) || len < 3) ++ if (!iforce_get_id_packet(iforce, 'P', buf, &len) && len >= 3) + input_dev->id.product = get_unaligned_le16(buf + 1); + else + dev_warn(&iforce->dev->dev, "Device does not respond to id packet P\n"); + +- if (!iforce_get_id_packet(iforce, 'B', buf, &len) || len < 3) ++ if (!iforce_get_id_packet(iforce, 'B', buf, &len) && len >= 3) + iforce->device_memory.end = get_unaligned_le16(buf + 1); + else + dev_warn(&iforce->dev->dev, "Device does not respond to id packet B\n"); + +- if (!iforce_get_id_packet(iforce, 'N', buf, &len) || len < 2) ++ if (!iforce_get_id_packet(iforce, 'N', buf, &len) && len >= 2) + ff_effects = buf[1]; + else + dev_warn(&iforce->dev->dev, "Device does not respond to id packet N\n"); diff --git a/drivers/input/joystick/iforce/iforce-serio.c b/drivers/input/joystick/iforce/iforce-serio.c index f95a81b9fac72..2380546d79782 100644 --- a/drivers/input/joystick/iforce/iforce-serio.c @@ -153213,6 +184291,19 @@ index 65286762b02ab..ad8660be0127c 100644 #define SNVS_LPSR_REG 0x4C /* LP Status Register */ #define SNVS_LPCR_REG 0x38 /* LP Control Register */ #define SNVS_HPSR_REG 0x14 +diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig +index dd5227cf86964..b5b8ddb536be4 100644 +--- a/drivers/input/misc/Kconfig ++++ b/drivers/input/misc/Kconfig +@@ -330,7 +330,7 @@ config INPUT_CPCAP_PWRBUTTON + + config INPUT_WISTRON_BTNS + tristate "x86 Wistron laptop button interface" +- depends on X86_32 ++ depends on X86_32 && !UML + select INPUT_SPARSEKMAP + select NEW_LEDS + select LEDS_CLASS diff --git a/drivers/input/misc/ariel-pwrbutton.c b/drivers/input/misc/ariel-pwrbutton.c index 17bbaac8b80c8..cdc80715b5fd6 100644 --- a/drivers/input/misc/ariel-pwrbutton.c @@ -153315,10 +184406,35 @@ index 3fb64dbda1a21..76873aa005b41 100644 MODULE_DESCRIPTION("RK805 PMIC Power Key driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/input/misc/soc_button_array.c b/drivers/input/misc/soc_button_array.c -index cb6ec59a045d4..efffcf0ebd3b4 100644 +index cb6ec59a045d4..31c02c2019c1c 100644 --- a/drivers/input/misc/soc_button_array.c +++ b/drivers/input/misc/soc_button_array.c -@@ -85,13 +85,13 @@ static const struct dmi_system_id dmi_use_low_level_irq[] = { +@@ -18,6 +18,10 @@ + #include <linux/gpio.h> + #include <linux/platform_device.h> + ++static bool use_low_level_irq; ++module_param(use_low_level_irq, bool, 0444); ++MODULE_PARM_DESC(use_low_level_irq, "Use low-level triggered IRQ instead of edge triggered"); ++ + struct soc_button_info { + const char *name; + int acpi_index; +@@ -73,6 +77,13 @@ static const struct dmi_system_id dmi_use_low_level_irq[] = { + DMI_MATCH(DMI_PRODUCT_NAME, "Aspire SW5-012"), + }, + }, ++ { ++ /* Acer Switch V 10 SW5-017, same issue as Acer Switch 10 SW5-012. */ ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "Acer"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "SW5-017"), ++ }, ++ }, + { + /* + * Acer One S1003. _LID method messes with power-button GPIO +@@ -85,13 +96,13 @@ static const struct dmi_system_id dmi_use_low_level_irq[] = { }, { /* @@ -153334,6 +184450,16 @@ index cb6ec59a045d4..efffcf0ebd3b4 100644 }, }, {} /* Terminating entry */ +@@ -164,7 +175,8 @@ soc_button_device_create(struct platform_device *pdev, + } + + /* See dmi_use_low_level_irq[] comment */ +- if (!autorepeat && dmi_check_system(dmi_use_low_level_irq)) { ++ if (!autorepeat && (use_low_level_irq || ++ dmi_check_system(dmi_use_low_level_irq))) { + irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW); + gpio_keys[n_buttons].irq = irq; + gpio_keys[n_buttons].gpio = -ENOENT; diff --git a/drivers/input/misc/sparcspkr.c b/drivers/input/misc/sparcspkr.c index fe43e5557ed72..cdcb7737c46aa 100644 --- a/drivers/input/misc/sparcspkr.c @@ -153557,7 +184683,7 @@ index a9065c6ab5508..da2c67cb86422 100644 gscps2_reset(ps2port); diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h -index a5a0035536462..4b0201cf71f5e 100644 +index a5a0035536462..239c777f8271c 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -67,605 +67,767 @@ static inline void i8042_write_command(int val) @@ -153627,22 +184753,22 @@ index a5a0035536462..4b0201cf71f5e 100644 + .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_NEVER) }, { -+ /* ASUS ZenBook UX425UA */ ++ /* ASUS ZenBook UX425UA/QA */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "X750LN"), -+ DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX425UA"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX425"), }, + .driver_data = (void *)(SERIO_QUIRK_PROBE_DEFER | SERIO_QUIRK_RESET_NEVER) }, { -+ /* ASUS ZenBook UM325UA */ ++ /* ASUS ZenBook UM325UA/QA */ .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Compaq"), - DMI_MATCH(DMI_PRODUCT_NAME , "ProLiant"), - DMI_MATCH(DMI_PRODUCT_VERSION, "8500"), + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), -+ DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX325UA_UM325UA"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX325"), }, + .driver_data = (void *)(SERIO_QUIRK_PROBE_DEFER | SERIO_QUIRK_RESET_NEVER) }, @@ -154623,7 +185749,7 @@ index a5a0035536462..4b0201cf71f5e 100644 }, { /* Medion Akoya E1222 */ -@@ -680,306 +843,434 @@ static const struct dmi_system_id __initconst i8042_dmi_reset_table[] = { +@@ -680,306 +843,441 @@ static const struct dmi_system_id __initconst i8042_dmi_reset_table[] = { DMI_MATCH(DMI_SYS_VENDOR, "MEDION"), DMI_MATCH(DMI_PRODUCT_NAME, "E122X"), }, @@ -154637,19 +185763,16 @@ index a5a0035536462..4b0201cf71f5e 100644 - DMI_MATCH(DMI_PRODUCT_NAME, "N10"), + DMI_MATCH(DMI_BOARD_VENDOR, "MICRO-STAR INTERNATIONAL CO., LTD"), + DMI_MATCH(DMI_BOARD_NAME, "U-100"), - }, ++ }, + .driver_data = (void *)(SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOPNP) - }, - { -- /* Dell Vostro 1320 */ ++ }, ++ { + /* + * No data is coming from the touchscreen unless KBC + * is in legacy mode. + */ + /* Panasonic CF-29 */ - .matches = { -- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), -- DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 1320"), ++ .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Matsushita"), + DMI_MATCH(DMI_PRODUCT_NAME, "CF-29"), + }, @@ -154736,18 +185859,18 @@ index a5a0035536462..4b0201cf71f5e 100644 + .driver_data = (void *)(SERIO_QUIRK_NOLOOP) }, { -- /* Dell Vostro 1520 */ +- /* Dell Vostro 1320 */ + /* Sharp Actius MM20 */ .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), -- DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 1520"), +- DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 1320"), + DMI_MATCH(DMI_SYS_VENDOR, "SHARP"), + DMI_MATCH(DMI_PRODUCT_NAME, "PC-MM20 Series"), }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX) }, { -- /* Dell Vostro 1720 */ +- /* Dell Vostro 1520 */ + /* + * Sony Vaio FZ-240E - + * reset and GET ID commands issued via KBD port are @@ -154755,59 +185878,69 @@ index a5a0035536462..4b0201cf71f5e 100644 + */ .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), -- DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 1720"), +- DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 1520"), + DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"), + DMI_MATCH(DMI_PRODUCT_NAME, "VGN-FZ240E"), }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX) }, { -- /* Lenovo Ideapad U455 */ +- /* Dell Vostro 1720 */ + /* + * Most (all?) VAIOs do not have external PS/2 ports nor + * they implement active multiplexing properly, and + * MUX discovery usually messes up keyboard/touchpad. + */ .matches = { -- DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -- DMI_MATCH(DMI_PRODUCT_NAME, "20046"), +- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), +- DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 1720"), + DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"), + DMI_MATCH(DMI_BOARD_NAME, "VAIO"), }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX) }, { -- /* Lenovo ThinkPad L460 */ +- /* Lenovo Ideapad U455 */ + /* Sony Vaio FS-115b */ .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -- DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad L460"), +- DMI_MATCH(DMI_PRODUCT_NAME, "20046"), + DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"), + DMI_MATCH(DMI_PRODUCT_NAME, "VGN-FS115B"), }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX) }, { -- /* Clevo P650RS, 650RP6, Sager NP8152-S, and others */ +- /* Lenovo ThinkPad L460 */ + /* + * Sony Vaio VGN-CS series require MUX or the touch sensor + * buttons will disturb touchpad operation + */ .matches = { -- DMI_MATCH(DMI_SYS_VENDOR, "Notebook"), -- DMI_MATCH(DMI_PRODUCT_NAME, "P65xRP"), +- DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), +- DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad L460"), + DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"), + DMI_MATCH(DMI_PRODUCT_NAME, "VGN-CS"), }, + .driver_data = (void *)(SERIO_QUIRK_FORCEMUX) }, { +- /* Clevo P650RS, 650RP6, Sager NP8152-S, and others */ + .matches = { +- DMI_MATCH(DMI_SYS_VENDOR, "Notebook"), +- DMI_MATCH(DMI_PRODUCT_NAME, "P65xRP"), ++ DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "Satellite P10"), + }, ++ .driver_data = (void *)(SERIO_QUIRK_NOMUX) + }, + { - /* Lenovo ThinkPad Twist S230u */ .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_NAME, "33474HU"), + DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), -+ DMI_MATCH(DMI_PRODUCT_NAME, "Satellite P10"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "EQUIUM A110"), }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX) }, @@ -154818,7 +185951,7 @@ index a5a0035536462..4b0201cf71f5e 100644 - DMI_MATCH(DMI_PRODUCT_NAME, "Proteus"), - DMI_MATCH(DMI_PRODUCT_VERSION, "EL07R4"), + DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), -+ DMI_MATCH(DMI_PRODUCT_NAME, "EQUIUM A110"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "SATELLITE C850D"), }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX) }, @@ -154827,16 +185960,6 @@ index a5a0035536462..4b0201cf71f5e 100644 - -#ifdef CONFIG_PNP -static const struct dmi_system_id __initconst i8042_dmi_nopnp_table[] = { - { -- /* Intel MBO Desktop D845PESV */ - .matches = { -- DMI_MATCH(DMI_BOARD_NAME, "D845PESV"), -- DMI_MATCH(DMI_BOARD_VENDOR, "Intel Corporation"), -+ DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), -+ DMI_MATCH(DMI_PRODUCT_NAME, "SATELLITE C850D"), - }, -+ .driver_data = (void *)(SERIO_QUIRK_NOMUX) - }, + /* + * A lot of modern Clevo barebones have touchpad and/or keyboard issues + * after suspend fixable with nomux + reset + noloop + nopnp. Luckily, @@ -154845,12 +185968,9 @@ index a5a0035536462..4b0201cf71f5e 100644 + * board_name changed. + */ { -- /* -- * Intel NUC D54250WYK - does not have i8042 controller but -- * declares PS/2 devices in DSDT. -- */ +- /* Intel MBO Desktop D845PESV */ .matches = { -- DMI_MATCH(DMI_BOARD_NAME, "D54250WYK"), +- DMI_MATCH(DMI_BOARD_NAME, "D845PESV"), - DMI_MATCH(DMI_BOARD_VENDOR, "Intel Corporation"), + DMI_MATCH(DMI_BOARD_VENDOR, "TUXEDO"), + DMI_MATCH(DMI_BOARD_NAME, "AURA1501"), @@ -154859,10 +185979,13 @@ index a5a0035536462..4b0201cf71f5e 100644 + SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, { -- /* MSI Wind U-100 */ +- /* +- * Intel NUC D54250WYK - does not have i8042 controller but +- * declares PS/2 devices in DSDT. +- */ .matches = { -- DMI_MATCH(DMI_BOARD_NAME, "U-100"), -- DMI_MATCH(DMI_BOARD_VENDOR, "MICRO-STAR INTERNATIONAL CO., LTD"), +- DMI_MATCH(DMI_BOARD_NAME, "D54250WYK"), +- DMI_MATCH(DMI_BOARD_VENDOR, "Intel Corporation"), + DMI_MATCH(DMI_BOARD_VENDOR, "TUXEDO"), + DMI_MATCH(DMI_BOARD_NAME, "EDUBOOK1502"), }, @@ -154870,37 +185993,39 @@ index a5a0035536462..4b0201cf71f5e 100644 + SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, { -- /* Acer Aspire 5 A515 */ +- /* MSI Wind U-100 */ + /* Mivvy M310 */ .matches = { -- DMI_MATCH(DMI_BOARD_NAME, "Grumpy_PK"), -- DMI_MATCH(DMI_BOARD_VENDOR, "PK"), +- DMI_MATCH(DMI_BOARD_NAME, "U-100"), +- DMI_MATCH(DMI_BOARD_VENDOR, "MICRO-STAR INTERNATIONAL CO., LTD"), + DMI_MATCH(DMI_SYS_VENDOR, "VIOOO"), + DMI_MATCH(DMI_PRODUCT_NAME, "N10"), }, + .driver_data = (void *)(SERIO_QUIRK_RESET_ALWAYS) }, -- { } --}; -- --static const struct dmi_system_id __initconst i8042_dmi_laptop_table[] = { + /* + * Some laptops need keyboard reset before probing for the trackpad to get + * it detected, initialised & finally work. + */ { +- /* Acer Aspire 5 A515 */ + /* Schenker XMG C504 - Elantech touchpad */ .matches = { -- DMI_MATCH(DMI_CHASSIS_TYPE, "8"), /* Portable */ +- DMI_MATCH(DMI_BOARD_NAME, "Grumpy_PK"), +- DMI_MATCH(DMI_BOARD_VENDOR, "PK"), + DMI_MATCH(DMI_SYS_VENDOR, "XMG"), + DMI_MATCH(DMI_PRODUCT_NAME, "C504"), }, + .driver_data = (void *)(SERIO_QUIRK_KBDRESET) }, +- { } +-}; +- +-static const struct dmi_system_id __initconst i8042_dmi_laptop_table[] = { { + /* Blue FB5601 */ .matches = { -- DMI_MATCH(DMI_CHASSIS_TYPE, "9"), /* Laptop */ +- DMI_MATCH(DMI_CHASSIS_TYPE, "8"), /* Portable */ + DMI_MATCH(DMI_SYS_VENDOR, "blue"), + DMI_MATCH(DMI_PRODUCT_NAME, "FB5601"), + DMI_MATCH(DMI_PRODUCT_VERSION, "M606"), @@ -154919,7 +186044,7 @@ index a5a0035536462..4b0201cf71f5e 100644 + */ { .matches = { -- DMI_MATCH(DMI_CHASSIS_TYPE, "10"), /* Notebook */ +- DMI_MATCH(DMI_CHASSIS_TYPE, "9"), /* Laptop */ + DMI_MATCH(DMI_BOARD_NAME, "LAPQC71A"), }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | @@ -154927,10 +186052,18 @@ index a5a0035536462..4b0201cf71f5e 100644 }, { .matches = { -- DMI_MATCH(DMI_CHASSIS_TYPE, "14"), /* Sub-Notebook */ +- DMI_MATCH(DMI_CHASSIS_TYPE, "10"), /* Notebook */ + DMI_MATCH(DMI_BOARD_NAME, "LAPQC71B"), }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | ++ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + }, + { + .matches = { +- DMI_MATCH(DMI_CHASSIS_TYPE, "14"), /* Sub-Notebook */ ++ DMI_MATCH(DMI_BOARD_NAME, "N140CU"), + }, ++ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | + SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, - { } @@ -154943,7 +186076,7 @@ index a5a0035536462..4b0201cf71f5e 100644 .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "Vostro V13"), -+ DMI_MATCH(DMI_BOARD_NAME, "N140CU"), ++ DMI_MATCH(DMI_BOARD_NAME, "N141CU"), }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | + SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) @@ -154953,7 +186086,7 @@ index a5a0035536462..4b0201cf71f5e 100644 .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), - DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion dv4 Notebook PC"), -+ DMI_MATCH(DMI_BOARD_NAME, "N141CU"), ++ DMI_MATCH(DMI_BOARD_NAME, "NH5xAx"), }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | + SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) @@ -154964,17 +186097,6 @@ index a5a0035536462..4b0201cf71f5e 100644 .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), - DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK A544"), -+ DMI_MATCH(DMI_BOARD_NAME, "NH5xAx"), - }, -+ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | -+ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) - }, - { -- /* Fujitsu AH544 laptop */ -- /* https://bugzilla.kernel.org/show_bug.cgi?id=69731 */ - .matches = { -- DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), -- DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK AH544"), + DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"), }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | @@ -154991,11 +186113,11 @@ index a5a0035536462..4b0201cf71f5e 100644 + * the two different dmi strings below. NS50MU is not a typo! + */ { -- /* Fujitsu U574 laptop */ +- /* Fujitsu AH544 laptop */ - /* https://bugzilla.kernel.org/show_bug.cgi?id=69731 */ .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), -- DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK U574"), +- DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK AH544"), + DMI_MATCH(DMI_BOARD_NAME, "NS50MU"), }, + .driver_data = (void *)(SERIO_QUIRK_NOAUX | SERIO_QUIRK_NOMUX | @@ -155003,16 +186125,27 @@ index a5a0035536462..4b0201cf71f5e 100644 + SERIO_QUIRK_NOPNP) }, { -- /* Fujitsu UH554 laptop */ +- /* Fujitsu U574 laptop */ +- /* https://bugzilla.kernel.org/show_bug.cgi?id=69731 */ .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), -- DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK UH544"), +- DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK U574"), + DMI_MATCH(DMI_BOARD_NAME, "NS50_70MU"), }, + .driver_data = (void *)(SERIO_QUIRK_NOAUX | SERIO_QUIRK_NOMUX | + SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP | + SERIO_QUIRK_NOPNP) }, + { +- /* Fujitsu UH554 laptop */ + .matches = { +- DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), +- DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK UH544"), ++ DMI_MATCH(DMI_BOARD_NAME, "NJ50_70CU"), + }, ++ .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | ++ SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + }, - { } -}; - @@ -155025,36 +186158,41 @@ index a5a0035536462..4b0201cf71f5e 100644 -static const struct dmi_system_id __initconst i8042_dmi_dritek_table[] = { { - /* Acer Aspire 5100 */ ++ /* ++ * This is only a partial board_name and might be followed by ++ * another letter or number. DMI_MATCH however does do partial ++ * matching. ++ */ .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Acer"), - DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5100"), -+ DMI_MATCH(DMI_BOARD_NAME, "NJ50_70CU"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "P65xH"), }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | + SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, { - /* Acer Aspire 5610 */ -+ /* -+ * This is only a partial board_name and might be followed by -+ * another letter or number. DMI_MATCH however does do partial -+ * matching. -+ */ ++ /* Clevo P650RS, 650RP6, Sager NP8152-S, and others */ .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Acer"), - DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5610"), -+ DMI_MATCH(DMI_PRODUCT_NAME, "P65xH"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "P65xRP"), }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | + SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, { - /* Acer Aspire 5630 */ -+ /* Clevo P650RS, 650RP6, Sager NP8152-S, and others */ ++ /* ++ * This is only a partial board_name and might be followed by ++ * another letter or number. DMI_MATCH however does do partial ++ * matching. ++ */ .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Acer"), - DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5630"), -+ DMI_MATCH(DMI_PRODUCT_NAME, "P65xRP"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "P65_P67H"), }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | + SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) @@ -155069,7 +186207,7 @@ index a5a0035536462..4b0201cf71f5e 100644 .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Acer"), - DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5650"), -+ DMI_MATCH(DMI_PRODUCT_NAME, "P65_P67H"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "P65_67RP"), }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | + SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) @@ -155084,7 +186222,7 @@ index a5a0035536462..4b0201cf71f5e 100644 .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Acer"), - DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5680"), -+ DMI_MATCH(DMI_PRODUCT_NAME, "P65_67RP"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "P65_67RS"), }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | + SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) @@ -155099,22 +186237,17 @@ index a5a0035536462..4b0201cf71f5e 100644 .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Acer"), - DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5720"), -+ DMI_MATCH(DMI_PRODUCT_NAME, "P65_67RS"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "P67xRP"), }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | + SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, { - /* Acer Aspire 9110 */ -+ /* -+ * This is only a partial board_name and might be followed by -+ * another letter or number. DMI_MATCH however does do partial -+ * matching. -+ */ .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Acer"), - DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 9110"), -+ DMI_MATCH(DMI_PRODUCT_NAME, "P67xRP"), ++ DMI_MATCH(DMI_BOARD_NAME, "PB50_70DFx,DDx"), }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | + SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) @@ -155124,7 +186257,7 @@ index a5a0035536462..4b0201cf71f5e 100644 .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Acer"), - DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 660"), -+ DMI_MATCH(DMI_BOARD_NAME, "PB50_70DFx,DDx"), ++ DMI_MATCH(DMI_BOARD_NAME, "PCX0DX"), }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | + SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) @@ -155204,7 +186337,7 @@ index a5a0035536462..4b0201cf71f5e 100644 #endif /* CONFIG_X86 */ -@@ -1135,11 +1426,6 @@ static int __init i8042_pnp_init(void) +@@ -1135,11 +1433,6 @@ static int __init i8042_pnp_init(void) bool pnp_data_busted = false; int err; @@ -155216,7 +186349,7 @@ index a5a0035536462..4b0201cf71f5e 100644 if (i8042_nopnp) { pr_info("PNP detection disabled\n"); return 0; -@@ -1243,6 +1529,59 @@ static inline int i8042_pnp_init(void) { return 0; } +@@ -1243,6 +1536,59 @@ static inline int i8042_pnp_init(void) { return 0; } static inline void i8042_pnp_exit(void) { } #endif /* CONFIG_PNP */ @@ -155276,7 +186409,7 @@ index a5a0035536462..4b0201cf71f5e 100644 static int __init i8042_platform_init(void) { int retval; -@@ -1265,42 +1604,17 @@ static int __init i8042_platform_init(void) +@@ -1265,42 +1611,17 @@ static int __init i8042_platform_init(void) i8042_kbd_irq = I8042_MAP_IRQ(1); i8042_aux_irq = I8042_MAP_IRQ(12); @@ -155326,7 +186459,7 @@ index a5a0035536462..4b0201cf71f5e 100644 * A20 was already enabled during early kernel init. But some buggy * BIOSes (in MSI Laptops) require A20 to be enabled using 8042 to diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c -index 0b9f1d0a8f8b0..3fc0a89cc785c 100644 +index 0b9f1d0a8f8b0..f132d6dfc25eb 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -45,6 +45,10 @@ static bool i8042_unlock; @@ -155451,7 +186584,7 @@ index 0b9f1d0a8f8b0..3fc0a89cc785c 100644 { int error; -@@ -1535,7 +1539,7 @@ static int i8042_kbd_bind_notifier(struct notifier_block *nb, +@@ -1535,12 +1539,10 @@ static int i8042_kbd_bind_notifier(struct notifier_block *nb, return 0; } @@ -155460,7 +186593,28 @@ index 0b9f1d0a8f8b0..3fc0a89cc785c 100644 { int error; -@@ -1600,6 +1604,7 @@ static struct platform_driver i8042_driver = { +- i8042_platform_device = dev; +- + if (i8042_reset == I8042_RESET_ALWAYS) { + error = i8042_controller_selftest(); + if (error) +@@ -1578,7 +1580,6 @@ static int __init i8042_probe(struct platform_device *dev) + i8042_free_aux_ports(); /* in case KBD failed but AUX not */ + i8042_free_irqs(); + i8042_controller_reset(false); +- i8042_platform_device = NULL; + + return error; + } +@@ -1588,7 +1589,6 @@ static int i8042_remove(struct platform_device *dev) + i8042_unregister_ports(); + i8042_free_irqs(); + i8042_controller_reset(false); +- i8042_platform_device = NULL; + + return 0; + } +@@ -1600,6 +1600,7 @@ static struct platform_driver i8042_driver = { .pm = &i8042_pm_ops, #endif }, @@ -155468,7 +186622,7 @@ index 0b9f1d0a8f8b0..3fc0a89cc785c 100644 .remove = i8042_remove, .shutdown = i8042_shutdown, }; -@@ -1610,7 +1615,6 @@ static struct notifier_block i8042_kbd_bind_notifier_block = { +@@ -1610,7 +1611,6 @@ static struct notifier_block i8042_kbd_bind_notifier_block = { static int __init i8042_init(void) { @@ -155476,7 +186630,7 @@ index 0b9f1d0a8f8b0..3fc0a89cc785c 100644 int err; dbg_init(); -@@ -1626,17 +1630,29 @@ static int __init i8042_init(void) +@@ -1626,17 +1626,29 @@ static int __init i8042_init(void) /* Set this before creating the dev to allow i8042_command to work right away */ i8042_present = true; @@ -155547,7 +186701,7 @@ index 05de92c0293bc..eb66cd2689b7c 100644 data->object_table = (struct mxt_object *)(id_buf + MXT_OBJECT_START); diff --git a/drivers/input/touchscreen/elants_i2c.c b/drivers/input/touchscreen/elants_i2c.c -index 68f542bb809f4..b9e2219efbb8f 100644 +index 68f542bb809f4..6858a3e20a0cc 100644 --- a/drivers/input/touchscreen/elants_i2c.c +++ b/drivers/input/touchscreen/elants_i2c.c @@ -117,6 +117,19 @@ @@ -155617,6 +186771,48 @@ index 68f542bb809f4..b9e2219efbb8f 100644 /* Recovery mode detection! */ if (force) { +@@ -1285,14 +1329,12 @@ static int elants_i2c_power_on(struct elants_data *ts) + if (IS_ERR_OR_NULL(ts->reset_gpio)) + return 0; + +- gpiod_set_value_cansleep(ts->reset_gpio, 1); +- + error = regulator_enable(ts->vcc33); + if (error) { + dev_err(&ts->client->dev, + "failed to enable vcc33 regulator: %d\n", + error); +- goto release_reset_gpio; ++ return error; + } + + error = regulator_enable(ts->vccio); +@@ -1301,7 +1343,7 @@ static int elants_i2c_power_on(struct elants_data *ts) + "failed to enable vccio regulator: %d\n", + error); + regulator_disable(ts->vcc33); +- goto release_reset_gpio; ++ return error; + } + + /* +@@ -1310,7 +1352,6 @@ static int elants_i2c_power_on(struct elants_data *ts) + */ + udelay(ELAN_POWERON_DELAY_USEC); + +-release_reset_gpio: + gpiod_set_value_cansleep(ts->reset_gpio, 0); + if (error) + return error; +@@ -1418,7 +1459,7 @@ static int elants_i2c_probe(struct i2c_client *client) + return error; + } + +- ts->reset_gpio = devm_gpiod_get(&client->dev, "reset", GPIOD_OUT_LOW); ++ ts->reset_gpio = devm_gpiod_get(&client->dev, "reset", GPIOD_OUT_HIGH); + if (IS_ERR(ts->reset_gpio)) { + error = PTR_ERR(ts->reset_gpio); + diff --git a/drivers/input/touchscreen/exc3000.c b/drivers/input/touchscreen/exc3000.c index cbe0dd4129121..4b7eee01c6aad 100644 --- a/drivers/input/touchscreen/exc3000.c @@ -155643,7 +186839,7 @@ index cbe0dd4129121..4b7eee01c6aad 100644 goto out_unlock; } diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c -index 4f53d3c57e698..9a9deea511636 100644 +index 4f53d3c57e698..0b513fcd51d13 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -14,20 +14,15 @@ @@ -155941,7 +187137,32 @@ index 4f53d3c57e698..9a9deea511636 100644 if (IS_ERR(gpiod)) { error = PTR_ERR(gpiod); if (error != -EPROBE_DEFER) -@@ -1205,10 +1185,8 @@ reset: +@@ -1059,6 +1039,7 @@ static int goodix_configure_dev(struct goodix_ts_data *ts) + input_set_abs_params(ts->input_dev, ABS_MT_WIDTH_MAJOR, 0, 255, 0, 0); + input_set_abs_params(ts->input_dev, ABS_MT_TOUCH_MAJOR, 0, 255, 0, 0); + ++retry_read_config: + /* Read configuration and apply touchscreen parameters */ + goodix_read_config(ts); + +@@ -1066,6 +1047,16 @@ static int goodix_configure_dev(struct goodix_ts_data *ts) + touchscreen_parse_properties(ts->input_dev, true, &ts->prop); + + if (!ts->prop.max_x || !ts->prop.max_y || !ts->max_touch_num) { ++ if (!ts->reset_controller_at_probe && ++ ts->irq_pin_access_method != IRQ_PIN_ACCESS_NONE) { ++ dev_info(&ts->client->dev, "Config not set, resetting controller\n"); ++ /* Retry after a controller reset */ ++ ts->reset_controller_at_probe = true; ++ error = goodix_reset(ts); ++ if (error) ++ return error; ++ goto retry_read_config; ++ } + dev_err(&ts->client->dev, + "Invalid config (%d, %d, %d), using defaults\n", + ts->prop.max_x, ts->prop.max_y, ts->max_touch_num); +@@ -1205,10 +1196,8 @@ reset: if (ts->reset_controller_at_probe) { /* reset the controller */ error = goodix_reset(ts); @@ -155953,7 +187174,7 @@ index 4f53d3c57e698..9a9deea511636 100644 } error = goodix_i2c_test(client); -@@ -1350,10 +1328,8 @@ static int __maybe_unused goodix_resume(struct device *dev) +@@ -1350,10 +1339,8 @@ static int __maybe_unused goodix_resume(struct device *dev) if (error != 0 || config_ver != ts->config[0]) { error = goodix_reset(ts); @@ -155965,7 +187186,7 @@ index 4f53d3c57e698..9a9deea511636 100644 error = goodix_send_cfg(ts, ts->config, ts->chip->config_len); if (error) -@@ -1387,6 +1363,7 @@ MODULE_DEVICE_TABLE(acpi, goodix_acpi_match); +@@ -1387,6 +1374,7 @@ MODULE_DEVICE_TABLE(acpi, goodix_acpi_match); #ifdef CONFIG_OF static const struct of_device_id goodix_of_match[] = { { .compatible = "goodix,gt1151" }, @@ -156083,6 +187304,26 @@ index 2745bf1aee381..83f4be05e27b6 100644 dev_err(&client->dev, "Failed to get gpio: %d\n", error); return error; +diff --git a/drivers/input/touchscreen/raydium_i2c_ts.c b/drivers/input/touchscreen/raydium_i2c_ts.c +index 4d2d22a869773..bdb3e2c3ab797 100644 +--- a/drivers/input/touchscreen/raydium_i2c_ts.c ++++ b/drivers/input/touchscreen/raydium_i2c_ts.c +@@ -210,12 +210,14 @@ static int raydium_i2c_send(struct i2c_client *client, + + error = raydium_i2c_xfer(client, addr, xfer, ARRAY_SIZE(xfer)); + if (likely(!error)) +- return 0; ++ goto out; + + msleep(RM_RETRY_DELAY_MS); + } while (++tries < RM_MAX_RETRIES); + + dev_err(&client->dev, "%s failed: %d\n", __func__, error); ++out: ++ kfree(tx_buf); + return error; + } + diff --git a/drivers/input/touchscreen/st1232.c b/drivers/input/touchscreen/st1232.c index 6abae665ca71d..9d1dea6996a22 100644 --- a/drivers/input/touchscreen/st1232.c @@ -156546,7 +187787,7 @@ index 8dbe61e2b3c15..b6e0bf186cf54 100644 static inline struct amd_iommu *dev_to_amd_iommu(struct device *dev) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c -index 2a822b229bd05..9a7742732d73f 100644 +index 2a822b229bd05..ce91a6d8532f3 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -21,6 +21,7 @@ @@ -156783,6 +188024,20 @@ index 2a822b229bd05..9a7742732d73f 100644 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); if (iommu->ppr_log != NULL) +@@ -3205,6 +3226,13 @@ static int __init parse_ivrs_acpihid(char *str) + return 1; + } + ++ /* ++ * Ignore leading zeroes after ':', so e.g., AMDI0095:00 ++ * will match AMDI0095:0 in the second strcmp in acpi_dev_hid_uid_match ++ */ ++ while (*uid == '0' && *(uid + 1)) ++ uid++; ++ + i = early_acpihid_map_size++; + memcpy(early_acpihid_map[i].hid, hid, strlen(hid)); + memcpy(early_acpihid_map[i].uid, uid, strlen(uid)); diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 182c93a43efd8..1eddf557636d7 100644 --- a/drivers/iommu/amd/io_pgtable.c @@ -156877,10 +188132,18 @@ index 1722bb161841f..7154fb551ddc9 100644 if (err) return err; diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c -index a9e568276c99f..a45c5536d2506 100644 +index a9e568276c99f..c96cf9b217197 100644 --- a/drivers/iommu/amd/iommu_v2.c +++ b/drivers/iommu/amd/iommu_v2.c -@@ -928,10 +928,8 @@ static int __init amd_iommu_v2_init(void) +@@ -588,6 +588,7 @@ out_drop_state: + put_device_state(dev_state); + + out: ++ pci_dev_put(pdev); + return ret; + } + +@@ -928,10 +929,8 @@ static int __init amd_iommu_v2_init(void) { int ret; @@ -156892,7 +188155,7 @@ index a9e568276c99f..a45c5536d2506 100644 /* * Load anyway to provide the symbols to other modules * which may use AMD IOMMUv2 optionally. -@@ -946,6 +944,8 @@ static int __init amd_iommu_v2_init(void) +@@ -946,6 +945,8 @@ static int __init amd_iommu_v2_init(void) amd_iommu_register_ppr_notifier(&ppr_nb); @@ -157522,8 +188785,21 @@ index 939ffa7689867..f96acc3525e8f 100644 } static int __maybe_unused exynos_sysmmu_suspend(struct device *dev) +diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c +index fc38b1fba7cff..bb5d253188a18 100644 +--- a/drivers/iommu/fsl_pamu.c ++++ b/drivers/iommu/fsl_pamu.c +@@ -865,7 +865,7 @@ static int fsl_pamu_probe(struct platform_device *pdev) + ret = create_csd(ppaact_phys, mem_size, csd_port_id); + if (ret) { + dev_err(dev, "could not create coherence subdomain\n"); +- return ret; ++ goto error; + } + } + diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c -index b7708b93f3fa1..f026bd269cb0b 100644 +index b7708b93f3fa1..bff2420fc3e14 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -385,7 +385,7 @@ static int dmar_pci_bus_notifier(struct notifier_block *nb, @@ -157544,8 +188820,16 @@ index b7708b93f3fa1..f026bd269cb0b 100644 node = NUMA_NO_NODE; drhd->iommu->node = node; return 0; +@@ -822,6 +822,7 @@ int __init dmar_dev_scope_init(void) + info = dmar_alloc_pci_notify_info(dev, + BUS_NOTIFY_ADD_DEVICE); + if (!info) { ++ pci_dev_put(dev); + return dmar_dev_scope_status; + } else { + dmar_pci_bus_add_dev(info); diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c -index d75f59ae28e6e..b0a975e0a8cb0 100644 +index d75f59ae28e6e..751ff91af0ff6 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -191,38 +191,6 @@ static phys_addr_t root_entry_uctp(struct root_entry *re) @@ -157694,7 +188978,22 @@ index d75f59ae28e6e..b0a975e0a8cb0 100644 entry = &root->lo; if (sm_supported(iommu)) { if (devfn >= 0x80) { -@@ -1222,13 +1243,11 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level, +@@ -1027,11 +1048,9 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, + + domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); + pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; +- if (domain_use_first_level(domain)) { +- pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US; +- if (iommu_is_dma_domain(&domain->domain)) +- pteval |= DMA_FL_PTE_ACCESS; +- } ++ if (domain_use_first_level(domain)) ++ pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS; ++ + if (cmpxchg64(&pte->val, 0ULL, pteval)) + /* Someone else set it while we were thinking; use theirs. */ + free_pgtable_page(tmp_page); +@@ -1222,13 +1241,11 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level, pte = &pte[pfn_level_offset(pfn, level)]; do { @@ -157709,7 +189008,7 @@ index d75f59ae28e6e..b0a975e0a8cb0 100644 /* If range covers entire pagetable, free it */ if (start_pfn <= level_pfn && last_pfn >= level_pfn + level_size(level) - 1) { -@@ -1249,7 +1268,7 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level, +@@ -1249,7 +1266,7 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level, freelist); } next: @@ -157718,7 +189017,7 @@ index d75f59ae28e6e..b0a975e0a8cb0 100644 } while (!first_pte_in_page(++pte) && pfn <= last_pfn); if (first_pte) -@@ -1635,7 +1654,8 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, +@@ -1635,7 +1652,8 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, unsigned long pfn, unsigned int pages, int ih, int map) { @@ -157728,7 +189027,7 @@ index d75f59ae28e6e..b0a975e0a8cb0 100644 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT; u16 did = domain->iommu_did[iommu->seq_id]; -@@ -1647,10 +1667,30 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, +@@ -1647,10 +1665,30 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, if (domain_use_first_level(domain)) { domain_flush_piotlb(iommu, domain, addr, pages, ih); } else { @@ -157761,7 +189060,7 @@ index d75f59ae28e6e..b0a975e0a8cb0 100644 */ if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap)) -@@ -1854,6 +1894,11 @@ static void free_dmar_iommu(struct intel_iommu *iommu) +@@ -1854,6 +1892,11 @@ static void free_dmar_iommu(struct intel_iommu *iommu) iommu->domain_ids = NULL; } @@ -157773,7 +189072,7 @@ index d75f59ae28e6e..b0a975e0a8cb0 100644 g_iommus[iommu->seq_id] = NULL; /* free context mapping */ -@@ -2062,7 +2107,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, +@@ -2062,7 +2105,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, goto out_unlock; ret = 0; @@ -157782,7 +189081,7 @@ index d75f59ae28e6e..b0a975e0a8cb0 100644 goto out_unlock; /* -@@ -2074,7 +2119,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, +@@ -2074,7 +2117,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, * in-flight DMA will exist, and we don't need to worry anymore * hereafter. */ @@ -157791,7 +189090,7 @@ index d75f59ae28e6e..b0a975e0a8cb0 100644 u16 did_old = context_domain_id(context); if (did_old < cap_ndoms(iommu->cap)) { -@@ -2085,6 +2130,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain, +@@ -2085,6 +2128,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain, iommu->flush.flush_iotlb(iommu, did_old, 0, 0, DMA_TLB_DSI_FLUSH); } @@ -157800,7 +189099,7 @@ index d75f59ae28e6e..b0a975e0a8cb0 100644 } context_clear_entry(context); -@@ -2649,7 +2696,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, +@@ -2649,7 +2694,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, spin_unlock_irqrestore(&device_domain_lock, flags); /* PASID table is mandatory for a PCI device in scalable mode. */ @@ -157809,7 +189108,7 @@ index d75f59ae28e6e..b0a975e0a8cb0 100644 ret = intel_pasid_alloc_table(dev); if (ret) { dev_err(dev, "PASID table allocation failed\n"); -@@ -2714,6 +2761,7 @@ static int __init si_domain_init(int hw) +@@ -2714,6 +2759,7 @@ static int __init si_domain_init(int hw) if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { domain_exit(si_domain); @@ -157817,7 +189116,7 @@ index d75f59ae28e6e..b0a975e0a8cb0 100644 return -EFAULT; } -@@ -2979,32 +3027,14 @@ static int copy_context_table(struct intel_iommu *iommu, +@@ -2979,32 +3025,14 @@ static int copy_context_table(struct intel_iommu *iommu, /* Now copy the context entry */ memcpy(&ce, old_ce + idx, sizeof(ce)); @@ -157852,7 +189151,7 @@ index d75f59ae28e6e..b0a975e0a8cb0 100644 new_ce[idx] = ce; } -@@ -3031,8 +3061,8 @@ static int copy_translation_tables(struct intel_iommu *iommu) +@@ -3031,8 +3059,8 @@ static int copy_translation_tables(struct intel_iommu *iommu) bool new_ext, ext; rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG); @@ -157863,7 +189162,7 @@ index d75f59ae28e6e..b0a975e0a8cb0 100644 /* * The RTT bit can only be changed when translation is disabled, -@@ -3043,6 +3073,10 @@ static int copy_translation_tables(struct intel_iommu *iommu) +@@ -3043,6 +3071,10 @@ static int copy_translation_tables(struct intel_iommu *iommu) if (new_ext != ext) return -EINVAL; @@ -157874,7 +189173,7 @@ index d75f59ae28e6e..b0a975e0a8cb0 100644 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK; if (!old_rt_phys) return -EINVAL; -@@ -3364,6 +3398,10 @@ free_iommu: +@@ -3364,6 +3396,10 @@ free_iommu: disable_dmar_iommu(iommu); free_dmar_iommu(iommu); } @@ -157885,6 +189184,18 @@ index d75f59ae28e6e..b0a975e0a8cb0 100644 kfree(g_iommus); +@@ -4205,8 +4241,10 @@ static inline bool has_external_pci(void) + struct pci_dev *pdev = NULL; + + for_each_pci_dev(pdev) +- if (pdev->external_facing) ++ if (pdev->external_facing) { ++ pci_dev_put(pdev); + return true; ++ } + + return false; + } @@ -5705,7 +5743,7 @@ static void quirk_igfx_skip_te_disable(struct pci_dev *dev) ver = (dev->device >> 8) & 0xff; if (ver != 0x45 && ver != 0x46 && ver != 0x4c && @@ -157933,6 +189244,29 @@ index f912fe45bea2c..a673195978843 100644 out_free_bitmap: bitmap_free(bitmap); out_free_pages: +diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c +index 07c390aed1fe9..0060bd089dc7f 100644 +--- a/drivers/iommu/intel/pasid.c ++++ b/drivers/iommu/intel/pasid.c +@@ -717,7 +717,7 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, + * Since it is a second level only translation setup, we should + * set SRE bit as well (addresses are expected to be GPAs). + */ +- if (pasid != PASID_RID2PASID) ++ if (pasid != PASID_RID2PASID && ecap_srs(iommu->ecap)) + pasid_set_sre(pte); + pasid_set_present(pte); + pasid_flush_caches(iommu, pte, pasid, did); +@@ -756,7 +756,8 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu, + * We should set SRE bit as well since the addresses are expected + * to be GPAs. + */ +- pasid_set_sre(pte); ++ if (ecap_srs(iommu->ecap)) ++ pasid_set_sre(pte); + pasid_set_present(pte); + pasid_flush_caches(iommu, pte, pasid, did); + diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 0c228787704f3..3a9468b1d2c3c 100644 --- a/drivers/iommu/intel/svm.c @@ -158189,7 +189523,7 @@ index 3303d707bab4b..7f409e9eea4b7 100644 if (ops->probe_finalize) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c -index 9e8bc802ac053..cae5a73ff518c 100644 +index 9e8bc802ac053..0835f32e040ad 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -83,8 +83,7 @@ static void free_iova_flush_queue(struct iova_domain *iovad) @@ -158216,6 +189550,24 @@ index 9e8bc802ac053..cae5a73ff518c 100644 cached_iova = to_iova(iovad->cached_node); if (free->pfn_lo >= cached_iova->pfn_lo) +@@ -252,7 +252,7 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, + + curr = __get_cached_rbnode(iovad, limit_pfn); + curr_iova = to_iova(curr); +- retry_pfn = curr_iova->pfn_hi + 1; ++ retry_pfn = curr_iova->pfn_hi; + + retry: + do { +@@ -266,7 +266,7 @@ retry: + if (high_pfn < size || new_pfn < low_pfn) { + if (low_pfn == iovad->start_pfn && retry_pfn < limit_pfn) { + high_pfn = limit_pfn; +- low_pfn = retry_pfn; ++ low_pfn = retry_pfn + 1; + curr = iova_find_limit(iovad, limit_pfn); + curr_iova = to_iova(curr); + goto retry; diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index d38ff29a76e8f..96708cd2757f7 100644 --- a/drivers/iommu/ipmmu-vmsa.c @@ -158444,7 +189796,7 @@ index f81fa8862ed04..f413546ac6e57 100644 struct mtk_smi_larb_iommu larb_imu[MTK_LARB_NR_MAX]; }; diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c -index be22fcf988cee..254530ad6c488 100644 +index be22fcf988cee..fe1c3123a7e77 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -80,6 +80,7 @@ @@ -158533,6 +189885,24 @@ index be22fcf988cee..254530ad6c488 100644 } data->larb_imu[i].dev = &plarbdev->dev; +@@ -612,7 +655,7 @@ static int mtk_iommu_probe(struct platform_device *pdev) + ret = iommu_device_sysfs_add(&data->iommu, &pdev->dev, NULL, + dev_name(&pdev->dev)); + if (ret) +- return ret; ++ goto out_clk_unprepare; + + ret = iommu_device_register(&data->iommu, &mtk_iommu_ops, dev); + if (ret) +@@ -635,6 +678,8 @@ out_dev_unreg: + iommu_device_unregister(&data->iommu); + out_sysfs_remove: + iommu_device_sysfs_remove(&data->iommu); ++out_clk_unprepare: ++ clk_disable_unprepare(data->bclk); + return ret; + } + diff --git a/drivers/iommu/omap-iommu-debug.c b/drivers/iommu/omap-iommu-debug.c index a99afb5d9011c..259f65291d909 100644 --- a/drivers/iommu/omap-iommu-debug.c @@ -158567,7 +189937,7 @@ index 91749654fd490..be60f6f3a265d 100644 arch_data = kcalloc(num_iommus + 1, sizeof(*arch_data), GFP_KERNEL); if (!arch_data) diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c -index 5cb260820eda6..7f23ad61c094f 100644 +index 5cb260820eda6..823f1a7d8c6e2 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -200,8 +200,8 @@ static inline phys_addr_t rk_dte_pt_address(u32 dte) @@ -158581,6 +189951,100 @@ index 5cb260820eda6..7f23ad61c094f 100644 static inline phys_addr_t rk_dte_pt_address_v2(u32 dte) { +@@ -280,19 +280,17 @@ static u32 rk_mk_pte(phys_addr_t page, int prot) + * 11:9 - Page address bit 34:32 + * 8:4 - Page address bit 39:35 + * 3 - Security +- * 2 - Readable +- * 1 - Writable ++ * 2 - Writable ++ * 1 - Readable + * 0 - 1 if Page @ Page address is valid + */ +-#define RK_PTE_PAGE_READABLE_V2 BIT(2) +-#define RK_PTE_PAGE_WRITABLE_V2 BIT(1) + + static u32 rk_mk_pte_v2(phys_addr_t page, int prot) + { + u32 flags = 0; + +- flags |= (prot & IOMMU_READ) ? RK_PTE_PAGE_READABLE_V2 : 0; +- flags |= (prot & IOMMU_WRITE) ? RK_PTE_PAGE_WRITABLE_V2 : 0; ++ flags |= (prot & IOMMU_READ) ? RK_PTE_PAGE_READABLE : 0; ++ flags |= (prot & IOMMU_WRITE) ? RK_PTE_PAGE_WRITABLE : 0; + + return rk_mk_dte_v2(page) | flags; + } +diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c +index 92997021e188a..ed35741955997 100644 +--- a/drivers/iommu/sun50i-iommu.c ++++ b/drivers/iommu/sun50i-iommu.c +@@ -27,6 +27,7 @@ + #include <linux/types.h> + + #define IOMMU_RESET_REG 0x010 ++#define IOMMU_RESET_RELEASE_ALL 0xffffffff + #define IOMMU_ENABLE_REG 0x020 + #define IOMMU_ENABLE_ENABLE BIT(0) + +@@ -270,7 +271,7 @@ static u32 sun50i_mk_pte(phys_addr_t page, int prot) + enum sun50i_iommu_aci aci; + u32 flags = 0; + +- if (prot & (IOMMU_READ | IOMMU_WRITE)) ++ if ((prot & (IOMMU_READ | IOMMU_WRITE)) == (IOMMU_READ | IOMMU_WRITE)) + aci = SUN50I_IOMMU_ACI_RD_WR; + else if (prot & IOMMU_READ) + aci = SUN50I_IOMMU_ACI_RD; +@@ -511,7 +512,7 @@ static u32 *sun50i_dte_get_page_table(struct sun50i_iommu_domain *sun50i_domain, + sun50i_iommu_free_page_table(iommu, drop_pt); + } + +- sun50i_table_flush(sun50i_domain, page_table, PT_SIZE); ++ sun50i_table_flush(sun50i_domain, page_table, NUM_PT_ENTRIES); + sun50i_table_flush(sun50i_domain, dte_addr, 1); + + return page_table; +@@ -601,7 +602,6 @@ static struct iommu_domain *sun50i_iommu_domain_alloc(unsigned type) + struct sun50i_iommu_domain *sun50i_domain; + + if (type != IOMMU_DOMAIN_DMA && +- type != IOMMU_DOMAIN_IDENTITY && + type != IOMMU_DOMAIN_UNMANAGED) + return NULL; + +@@ -869,8 +869,8 @@ static phys_addr_t sun50i_iommu_handle_perm_irq(struct sun50i_iommu *iommu) + + static irqreturn_t sun50i_iommu_irq(int irq, void *dev_id) + { ++ u32 status, l1_status, l2_status, resets; + struct sun50i_iommu *iommu = dev_id; +- u32 status; + + spin_lock(&iommu->iommu_lock); + +@@ -880,6 +880,9 @@ static irqreturn_t sun50i_iommu_irq(int irq, void *dev_id) + return IRQ_NONE; + } + ++ l1_status = iommu_read(iommu, IOMMU_L1PG_INT_REG); ++ l2_status = iommu_read(iommu, IOMMU_L2PG_INT_REG); ++ + if (status & IOMMU_INT_INVALID_L2PG) + sun50i_iommu_handle_pt_irq(iommu, + IOMMU_INT_ERR_ADDR_L2_REG, +@@ -893,8 +896,9 @@ static irqreturn_t sun50i_iommu_irq(int irq, void *dev_id) + + iommu_write(iommu, IOMMU_INT_CLR_REG, status); + +- iommu_write(iommu, IOMMU_RESET_REG, ~status); +- iommu_write(iommu, IOMMU_RESET_REG, status); ++ resets = (status | l1_status | l2_status) & IOMMU_INT_MASTER_MASK; ++ iommu_write(iommu, IOMMU_RESET_REG, ~resets); ++ iommu_write(iommu, IOMMU_RESET_REG, IOMMU_RESET_RELEASE_ALL); + + spin_unlock(&iommu->iommu_lock); + diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 0a281833f6117..abbdaeb4bf8f1 100644 --- a/drivers/iommu/tegra-smmu.c @@ -158735,6 +190199,19 @@ index e3483789f4df3..1bd0621c4ce2a 100644 else spurious_interrupt(); } +diff --git a/drivers/irqchip/irq-gic-pm.c b/drivers/irqchip/irq-gic-pm.c +index b60e1853593f4..3989d16f997b3 100644 +--- a/drivers/irqchip/irq-gic-pm.c ++++ b/drivers/irqchip/irq-gic-pm.c +@@ -102,7 +102,7 @@ static int gic_probe(struct platform_device *pdev) + + pm_runtime_enable(dev); + +- ret = pm_runtime_get_sync(dev); ++ ret = pm_runtime_resume_and_get(dev); + if (ret < 0) + goto rpm_disable; + diff --git a/drivers/irqchip/irq-gic-realview.c b/drivers/irqchip/irq-gic-realview.c index b4c1924f02554..38fab02ffe9d0 100644 --- a/drivers/irqchip/irq-gic-realview.c @@ -158748,7 +190225,7 @@ index b4c1924f02554..38fab02ffe9d0 100644 /* new irq mode with no DCC */ regmap_write(map, REALVIEW_SYS_LOCK_OFFSET, diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c -index eb0882d153666..fc1bfffc468f3 100644 +index eb0882d153666..59a5d06b2d3e4 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -742,7 +742,7 @@ static struct its_collection *its_build_invall_cmd(struct its_node *its, @@ -158760,6 +190237,15 @@ index eb0882d153666..fc1bfffc468f3 100644 } static struct its_vpe *its_build_vinvall_cmd(struct its_node *its, +@@ -1620,7 +1620,7 @@ static int its_select_cpu(struct irq_data *d, + + cpu = cpumask_pick_least_loaded(d, tmpmask); + } else { +- cpumask_and(tmpmask, irq_data_get_affinity_mask(d), cpu_online_mask); ++ cpumask_copy(tmpmask, aff_mask); + + /* If we cannot cross sockets, limit the search to that node */ + if ((its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144) && @@ -3007,18 +3007,12 @@ static int __init allocate_lpi_tables(void) return 0; } @@ -159378,6 +190864,18 @@ index e1f771c72fc4c..ad3e2c1b3c87b 100644 /* All interrupts target IRQ */ writel_relaxed(0, base + ICTLR_CPU_IEP_CLASS); +diff --git a/drivers/irqchip/irq-wpcm450-aic.c b/drivers/irqchip/irq-wpcm450-aic.c +index f3ac392d5bc87..36d0d0cf3fa25 100644 +--- a/drivers/irqchip/irq-wpcm450-aic.c ++++ b/drivers/irqchip/irq-wpcm450-aic.c +@@ -146,6 +146,7 @@ static int __init wpcm450_aic_of_init(struct device_node *node, + aic->regs = of_iomap(node, 0); + if (!aic->regs) { + pr_err("Failed to map WPCM450 AIC registers\n"); ++ kfree(aic); + return -ENOMEM; + } + diff --git a/drivers/irqchip/irq-xtensa-mx.c b/drivers/irqchip/irq-xtensa-mx.c index 27933338f7b36..8c581c985aa7d 100644 --- a/drivers/irqchip/irq-xtensa-mx.c @@ -159445,11 +190943,129 @@ index 173e6520e06ec..c0b457f26ec41 100644 } static void qcom_pdc_gic_disable(struct irq_data *d) +diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c +index 4f7eaa17fb274..e840609c50eb7 100644 +--- a/drivers/isdn/hardware/mISDN/hfcmulti.c ++++ b/drivers/isdn/hardware/mISDN/hfcmulti.c +@@ -3217,6 +3217,7 @@ static int + hfcm_l1callback(struct dchannel *dch, u_int cmd) + { + struct hfc_multi *hc = dch->hw; ++ struct sk_buff_head free_queue; + u_long flags; + + switch (cmd) { +@@ -3245,6 +3246,7 @@ hfcm_l1callback(struct dchannel *dch, u_int cmd) + l1_event(dch->l1, HW_POWERUP_IND); + break; + case HW_DEACT_REQ: ++ __skb_queue_head_init(&free_queue); + /* start deactivation */ + spin_lock_irqsave(&hc->lock, flags); + if (hc->ctype == HFC_TYPE_E1) { +@@ -3264,20 +3266,21 @@ hfcm_l1callback(struct dchannel *dch, u_int cmd) + plxsd_checksync(hc, 0); + } + } +- skb_queue_purge(&dch->squeue); ++ skb_queue_splice_init(&dch->squeue, &free_queue); + if (dch->tx_skb) { +- dev_kfree_skb(dch->tx_skb); ++ __skb_queue_tail(&free_queue, dch->tx_skb); + dch->tx_skb = NULL; + } + dch->tx_idx = 0; + if (dch->rx_skb) { +- dev_kfree_skb(dch->rx_skb); ++ __skb_queue_tail(&free_queue, dch->rx_skb); + dch->rx_skb = NULL; + } + test_and_clear_bit(FLG_TX_BUSY, &dch->Flags); + if (test_and_clear_bit(FLG_BUSY_TIMER, &dch->Flags)) + del_timer(&dch->timer); + spin_unlock_irqrestore(&hc->lock, flags); ++ __skb_queue_purge(&free_queue); + break; + case HW_POWERUP_REQ: + spin_lock_irqsave(&hc->lock, flags); +@@ -3384,6 +3387,9 @@ handle_dmsg(struct mISDNchannel *ch, struct sk_buff *skb) + case PH_DEACTIVATE_REQ: + test_and_clear_bit(FLG_L2_ACTIVATED, &dch->Flags); + if (dch->dev.D.protocol != ISDN_P_TE_S0) { ++ struct sk_buff_head free_queue; ++ ++ __skb_queue_head_init(&free_queue); + spin_lock_irqsave(&hc->lock, flags); + if (debug & DEBUG_HFCMULTI_MSG) + printk(KERN_DEBUG +@@ -3405,14 +3411,14 @@ handle_dmsg(struct mISDNchannel *ch, struct sk_buff *skb) + /* deactivate */ + dch->state = 1; + } +- skb_queue_purge(&dch->squeue); ++ skb_queue_splice_init(&dch->squeue, &free_queue); + if (dch->tx_skb) { +- dev_kfree_skb(dch->tx_skb); ++ __skb_queue_tail(&free_queue, dch->tx_skb); + dch->tx_skb = NULL; + } + dch->tx_idx = 0; + if (dch->rx_skb) { +- dev_kfree_skb(dch->rx_skb); ++ __skb_queue_tail(&free_queue, dch->rx_skb); + dch->rx_skb = NULL; + } + test_and_clear_bit(FLG_TX_BUSY, &dch->Flags); +@@ -3424,6 +3430,7 @@ handle_dmsg(struct mISDNchannel *ch, struct sk_buff *skb) + #endif + ret = 0; + spin_unlock_irqrestore(&hc->lock, flags); ++ __skb_queue_purge(&free_queue); + } else + ret = l1_event(dch->l1, hh->prim); + break; diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c -index bd087cca1c1d2..af17459c1a5c0 100644 +index bd087cca1c1d2..eba58b99cd29d 100644 --- a/drivers/isdn/hardware/mISDN/hfcpci.c +++ b/drivers/isdn/hardware/mISDN/hfcpci.c -@@ -2005,7 +2005,11 @@ setup_hw(struct hfc_pci *hc) +@@ -1617,16 +1617,19 @@ hfcpci_l2l1D(struct mISDNchannel *ch, struct sk_buff *skb) + test_and_clear_bit(FLG_L2_ACTIVATED, &dch->Flags); + spin_lock_irqsave(&hc->lock, flags); + if (hc->hw.protocol == ISDN_P_NT_S0) { ++ struct sk_buff_head free_queue; ++ ++ __skb_queue_head_init(&free_queue); + /* prepare deactivation */ + Write_hfc(hc, HFCPCI_STATES, 0x40); +- skb_queue_purge(&dch->squeue); ++ skb_queue_splice_init(&dch->squeue, &free_queue); + if (dch->tx_skb) { +- dev_kfree_skb(dch->tx_skb); ++ __skb_queue_tail(&free_queue, dch->tx_skb); + dch->tx_skb = NULL; + } + dch->tx_idx = 0; + if (dch->rx_skb) { +- dev_kfree_skb(dch->rx_skb); ++ __skb_queue_tail(&free_queue, dch->rx_skb); + dch->rx_skb = NULL; + } + test_and_clear_bit(FLG_TX_BUSY, &dch->Flags); +@@ -1639,10 +1642,12 @@ hfcpci_l2l1D(struct mISDNchannel *ch, struct sk_buff *skb) + hc->hw.mst_m &= ~HFCPCI_MASTER; + Write_hfc(hc, HFCPCI_MST_MODE, hc->hw.mst_m); + ret = 0; ++ spin_unlock_irqrestore(&hc->lock, flags); ++ __skb_queue_purge(&free_queue); + } else { + ret = l1_event(dch->l1, hh->prim); ++ spin_unlock_irqrestore(&hc->lock, flags); + } +- spin_unlock_irqrestore(&hc->lock, flags); + break; + } + if (!ret) +@@ -2005,7 +2010,11 @@ setup_hw(struct hfc_pci *hc) } /* Allocate memory for FIFOS */ /* the memory needs to be on a 32k boundary within the first 4G */ @@ -159462,11 +191078,98 @@ index bd087cca1c1d2..af17459c1a5c0 100644 buffer = dma_alloc_coherent(&hc->pdev->dev, 0x8000, &hc->hw.dmahandle, GFP_KERNEL); /* We silently assume the address is okay if nonzero */ +diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c +index cd5642cef01fd..e8b37bd5e34a3 100644 +--- a/drivers/isdn/hardware/mISDN/hfcsusb.c ++++ b/drivers/isdn/hardware/mISDN/hfcsusb.c +@@ -326,20 +326,24 @@ hfcusb_l2l1D(struct mISDNchannel *ch, struct sk_buff *skb) + test_and_clear_bit(FLG_L2_ACTIVATED, &dch->Flags); + + if (hw->protocol == ISDN_P_NT_S0) { ++ struct sk_buff_head free_queue; ++ ++ __skb_queue_head_init(&free_queue); + hfcsusb_ph_command(hw, HFC_L1_DEACTIVATE_NT); + spin_lock_irqsave(&hw->lock, flags); +- skb_queue_purge(&dch->squeue); ++ skb_queue_splice_init(&dch->squeue, &free_queue); + if (dch->tx_skb) { +- dev_kfree_skb(dch->tx_skb); ++ __skb_queue_tail(&free_queue, dch->tx_skb); + dch->tx_skb = NULL; + } + dch->tx_idx = 0; + if (dch->rx_skb) { +- dev_kfree_skb(dch->rx_skb); ++ __skb_queue_tail(&free_queue, dch->rx_skb); + dch->rx_skb = NULL; + } + test_and_clear_bit(FLG_TX_BUSY, &dch->Flags); + spin_unlock_irqrestore(&hw->lock, flags); ++ __skb_queue_purge(&free_queue); + #ifdef FIXME + if (test_and_clear_bit(FLG_L1_BUSY, &dch->Flags)) + dchannel_sched_event(&hc->dch, D_CLEARBUSY); +@@ -1330,7 +1334,7 @@ tx_iso_complete(struct urb *urb) + printk("\n"); + } + +- dev_kfree_skb(tx_skb); ++ dev_consume_skb_irq(tx_skb); + tx_skb = NULL; + if (fifo->dch && get_next_dframe(fifo->dch)) + tx_skb = fifo->dch->tx_skb; +diff --git a/drivers/isdn/hardware/mISDN/netjet.c b/drivers/isdn/hardware/mISDN/netjet.c +index a52f275f82634..f8447135a9022 100644 +--- a/drivers/isdn/hardware/mISDN/netjet.c ++++ b/drivers/isdn/hardware/mISDN/netjet.c +@@ -956,7 +956,7 @@ nj_release(struct tiger_hw *card) + } + if (card->irq > 0) + free_irq(card->irq, card); +- if (card->isac.dch.dev.dev.class) ++ if (device_is_registered(&card->isac.dch.dev.dev)) + mISDN_unregister_device(&card->isac.dch.dev); + + for (i = 0; i < 2; i++) { diff --git a/drivers/isdn/mISDN/core.c b/drivers/isdn/mISDN/core.c -index 55891e4204460..a41b4b2645941 100644 +index 55891e4204460..90ee56d07a6e9 100644 --- a/drivers/isdn/mISDN/core.c +++ b/drivers/isdn/mISDN/core.c -@@ -381,7 +381,7 @@ mISDNInit(void) +@@ -222,7 +222,7 @@ mISDN_register_device(struct mISDNdevice *dev, + + err = get_free_devid(); + if (err < 0) +- goto error1; ++ return err; + dev->id = err; + + device_initialize(&dev->dev); +@@ -233,11 +233,12 @@ mISDN_register_device(struct mISDNdevice *dev, + if (debug & DEBUG_CORE) + printk(KERN_DEBUG "mISDN_register %s %d\n", + dev_name(&dev->dev), dev->id); ++ dev->dev.class = &mISDN_class; ++ + err = create_stack(dev); + if (err) + goto error1; + +- dev->dev.class = &mISDN_class; + dev->dev.platform_data = dev; + dev->dev.parent = parent; + dev_set_drvdata(&dev->dev, dev); +@@ -249,8 +250,8 @@ mISDN_register_device(struct mISDNdevice *dev, + + error3: + delete_stack(dev); +- return err; + error1: ++ put_device(&dev->dev); + return err; + + } +@@ -381,7 +382,7 @@ mISDNInit(void) err = mISDN_inittimer(&debug); if (err) goto error2; @@ -159475,7 +191178,7 @@ index 55891e4204460..a41b4b2645941 100644 if (err) goto error3; err = Isdnl2_Init(&debug); -@@ -395,7 +395,7 @@ mISDNInit(void) +@@ -395,7 +396,7 @@ mISDNInit(void) error5: Isdnl2_cleanup(); error4: @@ -159484,7 +191187,7 @@ index 55891e4204460..a41b4b2645941 100644 error3: mISDN_timer_cleanup(); error2: -@@ -408,7 +408,7 @@ static void mISDN_cleanup(void) +@@ -408,7 +409,7 @@ static void mISDN_cleanup(void) { misdn_sock_cleanup(); Isdnl2_cleanup(); @@ -159509,10 +191212,27 @@ index 23b44d3033279..42599f49c189d 100644 extern void Isdnl2_cleanup(void); diff --git a/drivers/isdn/mISDN/dsp_pipeline.c b/drivers/isdn/mISDN/dsp_pipeline.c -index e11ca6bbc7f41..c3b2c99b5cd5c 100644 +index e11ca6bbc7f41..cfbcd9e973c2e 100644 --- a/drivers/isdn/mISDN/dsp_pipeline.c +++ b/drivers/isdn/mISDN/dsp_pipeline.c -@@ -192,7 +192,7 @@ void dsp_pipeline_destroy(struct dsp_pipeline *pipeline) +@@ -77,6 +77,7 @@ int mISDN_dsp_element_register(struct mISDN_dsp_element *elem) + if (!entry) + return -ENOMEM; + ++ INIT_LIST_HEAD(&entry->list); + entry->elem = elem; + + entry->dev.class = elements_class; +@@ -107,7 +108,7 @@ err2: + device_unregister(&entry->dev); + return ret; + err1: +- kfree(entry); ++ put_device(&entry->dev); + return ret; + } + EXPORT_SYMBOL(mISDN_dsp_element_register); +@@ -192,7 +193,7 @@ void dsp_pipeline_destroy(struct dsp_pipeline *pipeline) int dsp_pipeline_build(struct dsp_pipeline *pipeline, const char *cfg) { int found = 0; @@ -159521,7 +191241,7 @@ index e11ca6bbc7f41..c3b2c99b5cd5c 100644 struct dsp_element_entry *entry, *n; struct dsp_pipeline_entry *pipeline_entry; struct mISDN_dsp_element *elem; -@@ -203,10 +203,10 @@ int dsp_pipeline_build(struct dsp_pipeline *pipeline, const char *cfg) +@@ -203,10 +204,10 @@ int dsp_pipeline_build(struct dsp_pipeline *pipeline, const char *cfg) if (!list_empty(&pipeline->list)) _dsp_pipeline_destroy(pipeline); @@ -159700,6 +191420,34 @@ index 73b3961890397..afb0942ccc293 100644 break; mutex_lock(&adb_handler_mutex); req->reply[0] = adb_handler[req->data[2]].original_address; +diff --git a/drivers/macintosh/macio-adb.c b/drivers/macintosh/macio-adb.c +index dc634c2932fd3..dd24655861401 100644 +--- a/drivers/macintosh/macio-adb.c ++++ b/drivers/macintosh/macio-adb.c +@@ -105,6 +105,10 @@ int macio_init(void) + return -ENXIO; + } + adb = ioremap(r.start, sizeof(struct adb_regs)); ++ if (!adb) { ++ of_node_put(adbs); ++ return -ENOMEM; ++ } + + out_8(&adb->ctrl.r, 0); + out_8(&adb->intr.r, 0); +diff --git a/drivers/macintosh/macio_asic.c b/drivers/macintosh/macio_asic.c +index c1fdf28960216..df69d648f6d0a 100644 +--- a/drivers/macintosh/macio_asic.c ++++ b/drivers/macintosh/macio_asic.c +@@ -423,7 +423,7 @@ static struct macio_dev * macio_add_one_device(struct macio_chip *chip, + if (of_device_register(&dev->ofdev) != 0) { + printk(KERN_DEBUG"macio: device registration error for %s!\n", + dev_name(&dev->ofdev.dev)); +- kfree(dev); ++ put_device(&dev->ofdev.dev); + return NULL; + } + diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index 4b98bc26a94b5..2109129ea1bbf 100644 --- a/drivers/macintosh/via-pmu.c @@ -159713,6 +191461,21 @@ index 4b98bc26a94b5..2109129ea1bbf 100644 via_pmu_event(PMU_EVT_POWER, !!(data[1]&8)); via_pmu_event(PMU_EVT_LID, data[1]&1); } +diff --git a/drivers/mailbox/arm_mhuv2.c b/drivers/mailbox/arm_mhuv2.c +index d997f8ebfa98c..3af15083a25af 100644 +--- a/drivers/mailbox/arm_mhuv2.c ++++ b/drivers/mailbox/arm_mhuv2.c +@@ -1061,8 +1061,8 @@ static int mhuv2_probe(struct amba_device *adev, const struct amba_id *id) + int ret = -EINVAL; + + reg = devm_of_iomap(dev, dev->of_node, 0, NULL); +- if (!reg) +- return -ENOMEM; ++ if (IS_ERR(reg)) ++ return PTR_ERR(reg); + + mhu = devm_kzalloc(dev, sizeof(*mhu), GFP_KERNEL); + if (!mhu) diff --git a/drivers/mailbox/bcm-flexrm-mailbox.c b/drivers/mailbox/bcm-flexrm-mailbox.c index 78073ad1f2f1f..b7e9fd53d47db 100644 --- a/drivers/mailbox/bcm-flexrm-mailbox.c @@ -159792,10 +191555,26 @@ index 0ce75c6b36b65..850d4004c50e0 100644 } diff --git a/drivers/mailbox/mailbox-mpfs.c b/drivers/mailbox/mailbox-mpfs.c -index 0d6e2231a2c75..cfacb3f320a64 100644 +index 0d6e2231a2c75..853901acaeec2 100644 --- a/drivers/mailbox/mailbox-mpfs.c +++ b/drivers/mailbox/mailbox-mpfs.c -@@ -62,6 +62,7 @@ struct mpfs_mbox { +@@ -2,7 +2,7 @@ + /* + * Microchip PolarFire SoC (MPFS) system controller/mailbox controller driver + * +- * Copyright (c) 2020 Microchip Corporation. All rights reserved. ++ * Copyright (c) 2020-2022 Microchip Corporation. All rights reserved. + * + * Author: Conor Dooley <conor.dooley@microchip.com> + * +@@ -56,12 +56,13 @@ + #define SCB_STATUS_NOTIFY_MASK BIT(SCB_STATUS_NOTIFY) + + #define SCB_STATUS_POS (16) +-#define SCB_STATUS_MASK GENMASK_ULL(SCB_STATUS_POS + SCB_MASK_WIDTH, SCB_STATUS_POS) ++#define SCB_STATUS_MASK GENMASK(SCB_STATUS_POS + SCB_MASK_WIDTH - 1, SCB_STATUS_POS) + + struct mpfs_mbox { struct mbox_controller controller; struct device *dev; int irq; @@ -159846,7 +191625,43 @@ index 0d6e2231a2c75..cfacb3f320a64 100644 return 0; } -@@ -141,7 +140,7 @@ static void mpfs_mbox_rx_data(struct mbox_chan *chan) +@@ -131,17 +130,42 @@ static void mpfs_mbox_rx_data(struct mbox_chan *chan) + struct mpfs_mbox *mbox = (struct mpfs_mbox *)chan->con_priv; + struct mpfs_mss_response *response = mbox->response; + u16 num_words = ALIGN((response->resp_size), (4)) / 4U; +- u32 i; ++ u32 i, status; + + if (!response->resp_msg) { + dev_err(mbox->dev, "failed to assign memory for response %d\n", -ENOMEM); + return; + } + ++ /* ++ * The status is stored in bits 31:16 of the SERVICES_SR register. ++ * It is only valid when BUSY == 0. ++ * We should *never* get an interrupt while the controller is ++ * still in the busy state. If we do, something has gone badly ++ * wrong & the content of the mailbox would not be valid. ++ */ ++ if (mpfs_mbox_busy(mbox)) { ++ dev_err(mbox->dev, "got an interrupt but system controller is busy\n"); ++ response->resp_status = 0xDEAD; ++ return; ++ } ++ ++ status = readl_relaxed(mbox->ctrl_base + SERVICES_SR_OFFSET); ++ ++ /* ++ * If the status of the individual servers is non-zero, the service has ++ * failed. The contents of the mailbox at this point are not be valid, ++ * so don't bother reading them. Set the status so that the driver ++ * implementing the service can handle the result. ++ */ ++ response->resp_status = (status & SCB_STATUS_MASK) >> SCB_STATUS_POS; ++ if (response->resp_status) ++ return; ++ if (!mpfs_mbox_busy(mbox)) { for (i = 0; i < num_words; i++) { response->resp_msg[i] = @@ -159855,7 +191670,7 @@ index 0d6e2231a2c75..cfacb3f320a64 100644 + mbox->resp_offset + i * 0x4); } } -@@ -200,14 +199,18 @@ static int mpfs_mbox_probe(struct platform_device *pdev) +@@ -200,14 +224,18 @@ static int mpfs_mbox_probe(struct platform_device *pdev) if (!mbox) return -ENOMEM; @@ -159877,7 +191692,7 @@ index 0d6e2231a2c75..cfacb3f320a64 100644 mbox->irq = platform_get_irq(pdev, 0); if (mbox->irq < 0) return mbox->irq; -@@ -232,7 +235,7 @@ static int mpfs_mbox_probe(struct platform_device *pdev) +@@ -232,7 +260,7 @@ static int mpfs_mbox_probe(struct platform_device *pdev) } static const struct of_device_id mpfs_mbox_of_match[] = { @@ -160005,6 +191820,57 @@ index acd0675da681e..78f7265039c66 100644 return 0; } +diff --git a/drivers/mailbox/zynqmp-ipi-mailbox.c b/drivers/mailbox/zynqmp-ipi-mailbox.c +index f44079d62b1a7..527204c6d5cd0 100644 +--- a/drivers/mailbox/zynqmp-ipi-mailbox.c ++++ b/drivers/mailbox/zynqmp-ipi-mailbox.c +@@ -493,6 +493,7 @@ static int zynqmp_ipi_mbox_probe(struct zynqmp_ipi_mbox *ipi_mbox, + ret = device_register(&ipi_mbox->dev); + if (ret) { + dev_err(dev, "Failed to register ipi mbox dev.\n"); ++ put_device(&ipi_mbox->dev); + return ret; + } + mdev = &ipi_mbox->dev; +@@ -619,7 +620,8 @@ static void zynqmp_ipi_free_mboxes(struct zynqmp_ipi_pdata *pdata) + ipi_mbox = &pdata->ipi_mboxes[i]; + if (ipi_mbox->dev.parent) { + mbox_controller_unregister(&ipi_mbox->mbox); +- device_unregister(&ipi_mbox->dev); ++ if (device_is_registered(&ipi_mbox->dev)) ++ device_unregister(&ipi_mbox->dev); + } + } + } +diff --git a/drivers/mcb/mcb-core.c b/drivers/mcb/mcb-core.c +index cf128b3471d78..0530db5482311 100644 +--- a/drivers/mcb/mcb-core.c ++++ b/drivers/mcb/mcb-core.c +@@ -71,8 +71,10 @@ static int mcb_probe(struct device *dev) + + get_device(dev); + ret = mdrv->probe(mdev, found_id); +- if (ret) ++ if (ret) { + module_put(carrier_mod); ++ put_device(dev); ++ } + + return ret; + } +diff --git a/drivers/mcb/mcb-parse.c b/drivers/mcb/mcb-parse.c +index 0266bfddfbe27..aa6938da0db85 100644 +--- a/drivers/mcb/mcb-parse.c ++++ b/drivers/mcb/mcb-parse.c +@@ -108,7 +108,7 @@ static int chameleon_parse_gdd(struct mcb_bus *bus, + return 0; + + err: +- mcb_free_dev(mdev); ++ put_device(&mdev->dev); + + return ret; + } diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 0595559de174a..98daa9d200f79 100644 --- a/drivers/md/bcache/btree.c @@ -160537,6 +192403,146 @@ index 02b2f9df73f69..31df716951f66 100644 /* * 14 (16384ths) is chosen here as something that each backing device * should be a reasonable fraction of the share, and not to blow up +diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c +index 89a73204dbf47..0f6f74e3030f7 100644 +--- a/drivers/md/dm-cache-metadata.c ++++ b/drivers/md/dm-cache-metadata.c +@@ -551,11 +551,13 @@ static int __create_persistent_data_objects(struct dm_cache_metadata *cmd, + return r; + } + +-static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd) ++static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd, ++ bool destroy_bm) + { + dm_sm_destroy(cmd->metadata_sm); + dm_tm_destroy(cmd->tm); +- dm_block_manager_destroy(cmd->bm); ++ if (destroy_bm) ++ dm_block_manager_destroy(cmd->bm); + } + + typedef unsigned long (*flags_mutator)(unsigned long); +@@ -826,7 +828,7 @@ static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev, + cmd2 = lookup(bdev); + if (cmd2) { + mutex_unlock(&table_lock); +- __destroy_persistent_data_objects(cmd); ++ __destroy_persistent_data_objects(cmd, true); + kfree(cmd); + return cmd2; + } +@@ -874,7 +876,7 @@ void dm_cache_metadata_close(struct dm_cache_metadata *cmd) + mutex_unlock(&table_lock); + + if (!cmd->fail_io) +- __destroy_persistent_data_objects(cmd); ++ __destroy_persistent_data_objects(cmd, true); + kfree(cmd); + } + } +@@ -1808,14 +1810,52 @@ int dm_cache_metadata_needs_check(struct dm_cache_metadata *cmd, bool *result) + + int dm_cache_metadata_abort(struct dm_cache_metadata *cmd) + { +- int r; ++ int r = -EINVAL; ++ struct dm_block_manager *old_bm = NULL, *new_bm = NULL; ++ ++ /* fail_io is double-checked with cmd->root_lock held below */ ++ if (unlikely(cmd->fail_io)) ++ return r; ++ ++ /* ++ * Replacement block manager (new_bm) is created and old_bm destroyed outside of ++ * cmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of ++ * shrinker associated with the block manager's bufio client vs cmd root_lock). ++ * - must take shrinker_rwsem without holding cmd->root_lock ++ */ ++ new_bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT, ++ CACHE_MAX_CONCURRENT_LOCKS); + + WRITE_LOCK(cmd); +- __destroy_persistent_data_objects(cmd); +- r = __create_persistent_data_objects(cmd, false); ++ if (cmd->fail_io) { ++ WRITE_UNLOCK(cmd); ++ goto out; ++ } ++ ++ __destroy_persistent_data_objects(cmd, false); ++ old_bm = cmd->bm; ++ if (IS_ERR(new_bm)) { ++ DMERR("could not create block manager during abort"); ++ cmd->bm = NULL; ++ r = PTR_ERR(new_bm); ++ goto out_unlock; ++ } ++ ++ cmd->bm = new_bm; ++ r = __open_or_format_metadata(cmd, false); ++ if (r) { ++ cmd->bm = NULL; ++ goto out_unlock; ++ } ++ new_bm = NULL; ++out_unlock: + if (r) + cmd->fail_io = true; + WRITE_UNLOCK(cmd); ++ dm_block_manager_destroy(old_bm); ++out: ++ if (new_bm && !IS_ERR(new_bm)) ++ dm_block_manager_destroy(new_bm); + + return r; + } +diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c +index bdd500447dea2..abfe7e37b76f4 100644 +--- a/drivers/md/dm-cache-target.c ++++ b/drivers/md/dm-cache-target.c +@@ -915,16 +915,16 @@ static void abort_transaction(struct cache *cache) + if (get_cache_mode(cache) >= CM_READ_ONLY) + return; + +- if (dm_cache_metadata_set_needs_check(cache->cmd)) { +- DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name); +- set_cache_mode(cache, CM_FAIL); +- } +- + DMERR_LIMIT("%s: aborting current metadata transaction", dev_name); + if (dm_cache_metadata_abort(cache->cmd)) { + DMERR("%s: failed to abort metadata transaction", dev_name); + set_cache_mode(cache, CM_FAIL); + } ++ ++ if (dm_cache_metadata_set_needs_check(cache->cmd)) { ++ DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name); ++ set_cache_mode(cache, CM_FAIL); ++ } + } + + static void metadata_operation_failed(struct cache *cache, const char *op, int r) +@@ -1895,6 +1895,7 @@ static void destroy(struct cache *cache) + if (cache->prison) + dm_bio_prison_destroy_v2(cache->prison); + ++ cancel_delayed_work_sync(&cache->waker); + if (cache->wq) + destroy_workqueue(cache->wq); + +diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c +index edd22e4d65dff..ec4f2487ef10d 100644 +--- a/drivers/md/dm-clone-target.c ++++ b/drivers/md/dm-clone-target.c +@@ -1959,6 +1959,7 @@ static void clone_dtr(struct dm_target *ti) + + mempool_exit(&clone->hydration_pool); + dm_kcopyd_client_destroy(clone->kcopyd_client); ++ cancel_delayed_work_sync(&clone->waker); + destroy_workqueue(clone->wq); + hash_table_exit(clone); + dm_clone_metadata_close(clone->cmd); diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 55dccdfbcb22e..5a7d270b32c01 100644 --- a/drivers/md/dm-core.h @@ -160618,10 +192624,27 @@ index 2a78f68741431..a56df45366059 100644 static int era_preresume(struct dm_target *ti) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c -index dc03b70f6e65c..d5b8270869620 100644 +index dc03b70f6e65c..508e81bfef2c4 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c -@@ -2459,9 +2459,11 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start, +@@ -259,6 +259,7 @@ struct dm_integrity_c { + + struct completion crypto_backoff; + ++ bool wrote_to_journal; + bool journal_uptodate; + bool just_formatted; + bool recalculate_flag; +@@ -2361,6 +2362,8 @@ static void integrity_commit(struct work_struct *w) + if (!commit_sections) + goto release_flush_bios; + ++ ic->wrote_to_journal = true; ++ + i = commit_start; + for (n = 0; n < commit_sections; n++) { + for (j = 0; j < ic->journal_section_entries; j++) { +@@ -2459,9 +2462,11 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start, dm_integrity_io_error(ic, "invalid sector in journal", -EIO); sec &= ~(sector_t)(ic->sectors_per_block - 1); } @@ -160635,7 +192658,47 @@ index dc03b70f6e65c..d5b8270869620 100644 get_area_and_offset(ic, sec, &area, &offset); restore_last_bytes(ic, access_journal_data(ic, i, j), je); for (k = j + 1; k < ic->journal_section_entries; k++) { -@@ -4381,6 +4383,7 @@ try_smaller_buffer: +@@ -2573,10 +2578,6 @@ static void integrity_writer(struct work_struct *w) + + unsigned prev_free_sectors; + +- /* the following test is not needed, but it tests the replay code */ +- if (unlikely(dm_post_suspending(ic->ti)) && !ic->meta_dev) +- return; +- + spin_lock_irq(&ic->endio_wait.lock); + write_start = ic->committed_section; + write_sections = ic->n_committed_sections; +@@ -3083,10 +3084,17 @@ static void dm_integrity_postsuspend(struct dm_target *ti) + drain_workqueue(ic->commit_wq); + + if (ic->mode == 'J') { +- if (ic->meta_dev) +- queue_work(ic->writer_wq, &ic->writer_work); ++ queue_work(ic->writer_wq, &ic->writer_work); + drain_workqueue(ic->writer_wq); + dm_integrity_flush_buffers(ic, true); ++ if (ic->wrote_to_journal) { ++ init_journal(ic, ic->free_section, ++ ic->journal_sections - ic->free_section, ic->commit_seq); ++ if (ic->free_section) { ++ init_journal(ic, 0, ic->free_section, ++ next_commit_seq(ic->commit_seq)); ++ } ++ } + } + + if (ic->mode == 'B') { +@@ -3114,6 +3122,8 @@ static void dm_integrity_resume(struct dm_target *ti) + + DEBUG_print("resume\n"); + ++ ic->wrote_to_journal = false; ++ + if (ic->provided_data_sectors != old_provided_data_sectors) { + if (ic->provided_data_sectors > old_provided_data_sectors && + ic->mode == 'B' && +@@ -4381,6 +4391,7 @@ try_smaller_buffer: } if (ic->internal_hash) { @@ -160643,7 +192706,7 @@ index dc03b70f6e65c..d5b8270869620 100644 ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1); if (!ic->recalc_wq ) { ti->error = "Cannot allocate workqueue"; -@@ -4394,8 +4397,10 @@ try_smaller_buffer: +@@ -4394,8 +4405,10 @@ try_smaller_buffer: r = -ENOMEM; goto bad; } @@ -160656,7 +192719,7 @@ index dc03b70f6e65c..d5b8270869620 100644 if (!ic->recalc_tags) { ti->error = "Cannot allocate tags for recalculating"; r = -ENOMEM; -@@ -4473,8 +4478,6 @@ try_smaller_buffer: +@@ -4473,8 +4486,6 @@ try_smaller_buffer: } if (should_write_sb) { @@ -160665,8 +192728,17 @@ index dc03b70f6e65c..d5b8270869620 100644 init_journal(ic, 0, ic->journal_sections, 0); r = dm_integrity_failed(ic); if (unlikely(r)) { +@@ -4528,6 +4539,8 @@ static void dm_integrity_dtr(struct dm_target *ti) + BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress)); + BUG_ON(!list_empty(&ic->wait_list)); + ++ if (ic->mode == 'B') ++ cancel_delayed_work_sync(&ic->bitmap_flush_work); + if (ic->metadata_wq) + destroy_workqueue(ic->metadata_wq); + if (ic->wait_wq) diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c -index 21fe8652b095b..901abd6dea419 100644 +index 21fe8652b095b..dcaca4aaac91a 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -18,6 +18,7 @@ @@ -160677,6 +192749,24 @@ index 21fe8652b095b..901abd6dea419 100644 #include <linux/uaccess.h> #include <linux/ima.h> +@@ -654,7 +655,7 @@ static void list_version_get_needed(struct target_type *tt, void *needed_param) + size_t *needed = needed_param; + + *needed += sizeof(struct dm_target_versions); +- *needed += strlen(tt->name); ++ *needed += strlen(tt->name) + 1; + *needed += ALIGN_MASK; + } + +@@ -719,7 +720,7 @@ static int __list_versions(struct dm_ioctl *param, size_t param_size, const char + iter_info.old_vers = NULL; + iter_info.vers = vers; + iter_info.flags = 0; +- iter_info.end = (char *)vers+len; ++ iter_info.end = (char *)vers + needed; + + /* + * Now loop through filling out the names & versions. @@ -1788,6 +1789,7 @@ static ioctl_fn lookup_ioctl(unsigned int cmd, int *ioctl_flags) if (unlikely(cmd >= ARRAY_SIZE(_ioctls))) return NULL; @@ -161043,10 +193133,111 @@ index 2ddfae678f320..09c81a1ec057d 100644 + #endif diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c -index c88ed14d49e65..0ada99572b689 100644 +index c88ed14d49e65..3ce7017bf9d56 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c -@@ -2073,10 +2073,13 @@ int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd, +@@ -724,6 +724,15 @@ static int __open_metadata(struct dm_pool_metadata *pmd) + goto bad_cleanup_data_sm; + } + ++ /* ++ * For pool metadata opening process, root setting is redundant ++ * because it will be set again in __begin_transaction(). But dm ++ * pool aborting process really needs to get last transaction's ++ * root to avoid accessing broken btree. ++ */ ++ pmd->root = le64_to_cpu(disk_super->data_mapping_root); ++ pmd->details_root = le64_to_cpu(disk_super->device_details_root); ++ + __setup_btree_details(pmd); + dm_bm_unlock(sblock); + +@@ -776,13 +785,15 @@ static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, bool f + return r; + } + +-static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd) ++static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd, ++ bool destroy_bm) + { + dm_sm_destroy(pmd->data_sm); + dm_sm_destroy(pmd->metadata_sm); + dm_tm_destroy(pmd->nb_tm); + dm_tm_destroy(pmd->tm); +- dm_block_manager_destroy(pmd->bm); ++ if (destroy_bm) ++ dm_block_manager_destroy(pmd->bm); + } + + static int __begin_transaction(struct dm_pool_metadata *pmd) +@@ -989,7 +1000,7 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd) + } + pmd_write_unlock(pmd); + if (!pmd->fail_io) +- __destroy_persistent_data_objects(pmd); ++ __destroy_persistent_data_objects(pmd, true); + + kfree(pmd); + return 0; +@@ -1888,19 +1899,52 @@ static void __set_abort_with_changes_flags(struct dm_pool_metadata *pmd) + int dm_pool_abort_metadata(struct dm_pool_metadata *pmd) + { + int r = -EINVAL; ++ struct dm_block_manager *old_bm = NULL, *new_bm = NULL; ++ ++ /* fail_io is double-checked with pmd->root_lock held below */ ++ if (unlikely(pmd->fail_io)) ++ return r; ++ ++ /* ++ * Replacement block manager (new_bm) is created and old_bm destroyed outside of ++ * pmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of ++ * shrinker associated with the block manager's bufio client vs pmd root_lock). ++ * - must take shrinker_rwsem without holding pmd->root_lock ++ */ ++ new_bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE << SECTOR_SHIFT, ++ THIN_MAX_CONCURRENT_LOCKS); + + pmd_write_lock(pmd); +- if (pmd->fail_io) ++ if (pmd->fail_io) { ++ pmd_write_unlock(pmd); + goto out; ++ } + + __set_abort_with_changes_flags(pmd); +- __destroy_persistent_data_objects(pmd); +- r = __create_persistent_data_objects(pmd, false); ++ __destroy_persistent_data_objects(pmd, false); ++ old_bm = pmd->bm; ++ if (IS_ERR(new_bm)) { ++ DMERR("could not create block manager during abort"); ++ pmd->bm = NULL; ++ r = PTR_ERR(new_bm); ++ goto out_unlock; ++ } ++ ++ pmd->bm = new_bm; ++ r = __open_or_format_metadata(pmd, false); ++ if (r) { ++ pmd->bm = NULL; ++ goto out_unlock; ++ } ++ new_bm = NULL; ++out_unlock: + if (r) + pmd->fail_io = true; +- +-out: + pmd_write_unlock(pmd); ++ dm_block_manager_destroy(old_bm); ++out: ++ if (new_bm && !IS_ERR(new_bm)) ++ dm_block_manager_destroy(new_bm); + + return r; + } +@@ -2073,10 +2117,13 @@ int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd, dm_sm_threshold_fn fn, void *context) { @@ -161063,10 +193254,19 @@ index c88ed14d49e65..0ada99572b689 100644 return r; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c -index 4c67b77c23c1b..0a85e4cd607c6 100644 +index 4c67b77c23c1b..cce26f46ded52 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c -@@ -3401,8 +3401,10 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) +@@ -2907,6 +2907,8 @@ static void __pool_destroy(struct pool *pool) + dm_bio_prison_destroy(pool->prison); + dm_kcopyd_client_destroy(pool->copier); + ++ cancel_delayed_work_sync(&pool->waker); ++ cancel_delayed_work_sync(&pool->no_space_timeout); + if (pool->wq) + destroy_workqueue(pool->wq); + +@@ -3401,8 +3403,10 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) calc_metadata_threshold(pt), metadata_low_callback, pool); @@ -161078,6 +193278,39 @@ index 4c67b77c23c1b..0a85e4cd607c6 100644 dm_pool_register_pre_commit_callback(pool->pmd, metadata_pre_commit_callback, pool); +@@ -3564,20 +3568,28 @@ static int pool_preresume(struct dm_target *ti) + */ + r = bind_control_target(pool, ti); + if (r) +- return r; ++ goto out; + + r = maybe_resize_data_dev(ti, &need_commit1); + if (r) +- return r; ++ goto out; + + r = maybe_resize_metadata_dev(ti, &need_commit2); + if (r) +- return r; ++ goto out; + + if (need_commit1 || need_commit2) + (void) commit(pool); ++out: ++ /* ++ * When a thin-pool is PM_FAIL, it cannot be rebuilt if ++ * bio is in deferred list. Therefore need to return 0 ++ * to allow pool_resume() to flush IO. ++ */ ++ if (r && get_pool_mode(pool) == PM_FAIL) ++ r = 0; + +- return 0; ++ return r; + } + + static void pool_suspend_active_thins(struct pool *pool) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 88288c8d6bc8c..426299ceb33d7 100644 --- a/drivers/md/dm-verity-target.c @@ -161492,9 +193725,33 @@ index 76d9da49fda75..9dd2c2da075d9 100644 if (!ti->type->iterate_devices) goto out; diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c -index e29c6298ef5c9..8cc11b1987ec8 100644 +index e29c6298ef5c9..062142559caa3 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c +@@ -486,7 +486,7 @@ void md_bitmap_print_sb(struct bitmap *bitmap) + sb = kmap_atomic(bitmap->storage.sb_page); + pr_debug("%s: bitmap file superblock:\n", bmname(bitmap)); + pr_debug(" magic: %08x\n", le32_to_cpu(sb->magic)); +- pr_debug(" version: %d\n", le32_to_cpu(sb->version)); ++ pr_debug(" version: %u\n", le32_to_cpu(sb->version)); + pr_debug(" uuid: %08x.%08x.%08x.%08x\n", + le32_to_cpu(*(__le32 *)(sb->uuid+0)), + le32_to_cpu(*(__le32 *)(sb->uuid+4)), +@@ -497,11 +497,11 @@ void md_bitmap_print_sb(struct bitmap *bitmap) + pr_debug("events cleared: %llu\n", + (unsigned long long) le64_to_cpu(sb->events_cleared)); + pr_debug(" state: %08x\n", le32_to_cpu(sb->state)); +- pr_debug(" chunksize: %d B\n", le32_to_cpu(sb->chunksize)); +- pr_debug(" daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep)); ++ pr_debug(" chunksize: %u B\n", le32_to_cpu(sb->chunksize)); ++ pr_debug(" daemon sleep: %us\n", le32_to_cpu(sb->daemon_sleep)); + pr_debug(" sync size: %llu KB\n", + (unsigned long long)le64_to_cpu(sb->sync_size)/2); +- pr_debug("max write behind: %d\n", le32_to_cpu(sb->write_behind)); ++ pr_debug("max write behind: %u\n", le32_to_cpu(sb->write_behind)); + kunmap_atomic(sb); + } + @@ -639,14 +639,6 @@ re_read: daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ; write_behind = le32_to_cpu(sb->write_behind); @@ -161569,8 +193826,84 @@ index e29c6298ef5c9..8cc11b1987ec8 100644 md_bitmap_print_sb(bitmap); if (bitmap->cluster_slot < 0) md_cluster_stop(bitmap->mddev); +@@ -2104,7 +2106,8 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, + bytes = DIV_ROUND_UP(chunks, 8); + if (!bitmap->mddev->bitmap_info.external) + bytes += sizeof(bitmap_super_t); +- } while (bytes > (space << 9)); ++ } while (bytes > (space << 9) && (chunkshift + BITMAP_BLOCK_SHIFT) < ++ (BITS_PER_BYTE * sizeof(((bitmap_super_t *)0)->chunksize) - 1)); + } else + chunkshift = ffz(~chunksize) - BITMAP_BLOCK_SHIFT; + +@@ -2149,7 +2152,7 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, + bitmap->counts.missing_pages = pages; + bitmap->counts.chunkshift = chunkshift; + bitmap->counts.chunks = chunks; +- bitmap->mddev->bitmap_info.chunksize = 1 << (chunkshift + ++ bitmap->mddev->bitmap_info.chunksize = 1UL << (chunkshift + + BITMAP_BLOCK_SHIFT); + + blocks = min(old_counts.chunks << old_counts.chunkshift, +@@ -2175,8 +2178,8 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, + bitmap->counts.missing_pages = old_counts.pages; + bitmap->counts.chunkshift = old_counts.chunkshift; + bitmap->counts.chunks = old_counts.chunks; +- bitmap->mddev->bitmap_info.chunksize = 1 << (old_counts.chunkshift + +- BITMAP_BLOCK_SHIFT); ++ bitmap->mddev->bitmap_info.chunksize = ++ 1UL << (old_counts.chunkshift + BITMAP_BLOCK_SHIFT); + blocks = old_counts.chunks << old_counts.chunkshift; + pr_warn("Could not pre-allocate in-memory bitmap for cluster raid\n"); + break; +@@ -2194,20 +2197,23 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, + + if (set) { + bmc_new = md_bitmap_get_counter(&bitmap->counts, block, &new_blocks, 1); +- if (*bmc_new == 0) { +- /* need to set on-disk bits too. */ +- sector_t end = block + new_blocks; +- sector_t start = block >> chunkshift; +- start <<= chunkshift; +- while (start < end) { +- md_bitmap_file_set_bit(bitmap, block); +- start += 1 << chunkshift; ++ if (bmc_new) { ++ if (*bmc_new == 0) { ++ /* need to set on-disk bits too. */ ++ sector_t end = block + new_blocks; ++ sector_t start = block >> chunkshift; ++ ++ start <<= chunkshift; ++ while (start < end) { ++ md_bitmap_file_set_bit(bitmap, block); ++ start += 1 << chunkshift; ++ } ++ *bmc_new = 2; ++ md_bitmap_count_page(&bitmap->counts, block, 1); ++ md_bitmap_set_pending(&bitmap->counts, block); + } +- *bmc_new = 2; +- md_bitmap_count_page(&bitmap->counts, block, 1); +- md_bitmap_set_pending(&bitmap->counts, block); ++ *bmc_new |= NEEDED_MASK; + } +- *bmc_new |= NEEDED_MASK; + if (new_blocks < old_blocks) + old_blocks = new_blocks; + } +@@ -2514,6 +2520,9 @@ chunksize_store(struct mddev *mddev, const char *buf, size_t len) + if (csize < 512 || + !is_power_of_2(csize)) + return -EINVAL; ++ if (BITS_PER_LONG > 32 && csize >= (1ULL << (BITS_PER_BYTE * ++ sizeof(((bitmap_super_t *)0)->chunksize)))) ++ return -EOVERFLOW; + mddev->bitmap_info.chunksize = csize; + return len; + } diff --git a/drivers/md/md.c b/drivers/md/md.c -index 6c0c3d0d905aa..04e1e294b4b1e 100644 +index 6c0c3d0d905aa..9e54b865f30da 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -51,6 +51,7 @@ @@ -161581,7 +193914,47 @@ index 6c0c3d0d905aa..04e1e294b4b1e 100644 #include <linux/module.h> #include <linux/reboot.h> #include <linux/file.h> -@@ -2193,6 +2194,7 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors) +@@ -457,6 +458,8 @@ static blk_qc_t md_submit_bio(struct bio *bio) + } + + blk_queue_split(&bio); ++ if (!bio) ++ return BLK_QC_T_NONE; + + if (mddev->ro == 1 && unlikely(rw == WRITE)) { + if (bio_sectors(bio) != 0) +@@ -525,13 +528,14 @@ static void md_end_flush(struct bio *bio) + struct md_rdev *rdev = bio->bi_private; + struct mddev *mddev = rdev->mddev; + ++ bio_put(bio); ++ + rdev_dec_pending(rdev, mddev); + + if (atomic_dec_and_test(&mddev->flush_pending)) { + /* The pre-request flush has finished */ + queue_work(md_wq, &mddev->flush_work); + } +- bio_put(bio); + } + + static void md_submit_flush_data(struct work_struct *ws); +@@ -934,10 +938,12 @@ static void super_written(struct bio *bio) + } else + clear_bit(LastDev, &rdev->flags); + ++ bio_put(bio); ++ ++ rdev_dec_pending(rdev, mddev); ++ + if (atomic_dec_and_test(&mddev->pending_writes)) + wake_up(&mddev->sb_wait); +- rdev_dec_pending(rdev, mddev); +- bio_put(bio); + } + + void md_super_write(struct mddev *mddev, struct md_rdev *rdev, +@@ -2193,6 +2199,7 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors) if (!num_sectors || num_sectors > max_sectors) num_sectors = max_sectors; @@ -161589,7 +193962,7 @@ index 6c0c3d0d905aa..04e1e294b4b1e 100644 } sb = page_address(rdev->sb_page); sb->data_size = cpu_to_le64(num_sectors); -@@ -2626,14 +2628,16 @@ static void sync_sbs(struct mddev *mddev, int nospares) +@@ -2626,14 +2633,16 @@ static void sync_sbs(struct mddev *mddev, int nospares) static bool does_sb_need_changing(struct mddev *mddev) { @@ -161609,7 +193982,7 @@ index 6c0c3d0d905aa..04e1e294b4b1e 100644 /* No good device found. */ if (!rdev) -@@ -2976,7 +2980,11 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) +@@ -2976,7 +2985,11 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) * -write_error - clears WriteErrorSeen * {,-}failfast - set/clear FailFast */ @@ -161621,7 +193994,7 @@ index 6c0c3d0d905aa..04e1e294b4b1e 100644 if (cmd_match(buf, "faulty") && rdev->mddev->pers) { md_error(rdev->mddev, rdev); if (test_bit(Faulty, &rdev->flags)) -@@ -2991,7 +2999,6 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) +@@ -2991,7 +3004,6 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) if (rdev->raid_disk >= 0) err = -EBUSY; else { @@ -161629,7 +194002,7 @@ index 6c0c3d0d905aa..04e1e294b4b1e 100644 err = 0; if (mddev_is_clustered(mddev)) err = md_cluster_ops->remove_disk(mddev, rdev); -@@ -3008,10 +3015,12 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) +@@ -3008,10 +3020,12 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) } else if (cmd_match(buf, "writemostly")) { set_bit(WriteMostly, &rdev->flags); mddev_create_serial_pool(rdev->mddev, rdev, false); @@ -161642,7 +194015,7 @@ index 6c0c3d0d905aa..04e1e294b4b1e 100644 err = 0; } else if (cmd_match(buf, "blocked")) { set_bit(Blocked, &rdev->flags); -@@ -3037,9 +3046,11 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) +@@ -3037,9 +3051,11 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) err = 0; } else if (cmd_match(buf, "failfast")) { set_bit(FailFast, &rdev->flags); @@ -161654,7 +194027,7 @@ index 6c0c3d0d905aa..04e1e294b4b1e 100644 err = 0; } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0 && !test_bit(Journal, &rdev->flags)) { -@@ -3118,6 +3129,8 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) +@@ -3118,6 +3134,8 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) clear_bit(ExternalBbl, &rdev->flags); err = 0; } @@ -161663,7 +194036,7 @@ index 6c0c3d0d905aa..04e1e294b4b1e 100644 if (!err) sysfs_notify_dirent_safe(rdev->sysfs_state); return err ? err : len; -@@ -5575,8 +5588,6 @@ static void md_free(struct kobject *ko) +@@ -5575,8 +5593,6 @@ static void md_free(struct kobject *ko) bioset_exit(&mddev->bio_set); bioset_exit(&mddev->sync_set); @@ -161672,7 +194045,7 @@ index 6c0c3d0d905aa..04e1e294b4b1e 100644 kfree(mddev); } -@@ -5640,6 +5651,7 @@ static int md_alloc(dev_t dev, char *name) +@@ -5640,6 +5656,7 @@ static int md_alloc(dev_t dev, char *name) * removed (mddev_delayed_delete). */ flush_workqueue(md_misc_wq); @@ -161680,7 +194053,7 @@ index 6c0c3d0d905aa..04e1e294b4b1e 100644 mutex_lock(&disks_mutex); mddev = mddev_alloc(dev); -@@ -5862,13 +5874,6 @@ int md_run(struct mddev *mddev) +@@ -5862,13 +5879,6 @@ int md_run(struct mddev *mddev) if (err) goto exit_bio_set; } @@ -161694,7 +194067,7 @@ index 6c0c3d0d905aa..04e1e294b4b1e 100644 spin_lock(&pers_lock); pers = find_pers(mddev->level, mddev->clevel); -@@ -6045,9 +6050,6 @@ bitmap_abort: +@@ -6045,9 +6055,6 @@ bitmap_abort: module_put(pers->owner); md_bitmap_destroy(mddev); abort: @@ -161704,7 +194077,7 @@ index 6c0c3d0d905aa..04e1e294b4b1e 100644 bioset_exit(&mddev->sync_set); exit_bio_set: bioset_exit(&mddev->bio_set); -@@ -6271,11 +6273,10 @@ void md_stop(struct mddev *mddev) +@@ -6271,11 +6278,10 @@ void md_stop(struct mddev *mddev) /* stop the array and free an attached data structures. * This is called from dm-raid */ @@ -161717,7 +194090,7 @@ index 6c0c3d0d905aa..04e1e294b4b1e 100644 } EXPORT_SYMBOL_GPL(md_stop); -@@ -7943,17 +7944,22 @@ EXPORT_SYMBOL(md_register_thread); +@@ -7943,17 +7949,22 @@ EXPORT_SYMBOL(md_register_thread); void md_unregister_thread(struct md_thread **threadp) { @@ -161745,7 +194118,7 @@ index 6c0c3d0d905aa..04e1e294b4b1e 100644 kthread_stop(thread->tsk); kfree(thread); } -@@ -8580,6 +8586,23 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev, +@@ -8580,6 +8591,23 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev, } EXPORT_SYMBOL_GPL(md_submit_discard_bio); @@ -161769,7 +194142,7 @@ index 6c0c3d0d905aa..04e1e294b4b1e 100644 static void md_end_io_acct(struct bio *bio) { struct md_io_acct *md_io_acct = bio->bi_private; -@@ -9446,6 +9469,7 @@ void md_reap_sync_thread(struct mddev *mddev) +@@ -9446,6 +9474,7 @@ void md_reap_sync_thread(struct mddev *mddev) wake_up(&resync_wait); /* flag recovery needed just to double check */ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); @@ -161777,7 +194150,7 @@ index 6c0c3d0d905aa..04e1e294b4b1e 100644 sysfs_notify_dirent_safe(mddev->sysfs_action); md_new_event(mddev); if (mddev->event_work.func) -@@ -9754,16 +9778,18 @@ static int read_rdev(struct mddev *mddev, struct md_rdev *rdev) +@@ -9754,16 +9783,18 @@ static int read_rdev(struct mddev *mddev, struct md_rdev *rdev) void md_reload_sb(struct mddev *mddev, int nr) { @@ -161992,7 +194365,7 @@ index 62c8b6adac70e..c16fa65a3ffd3 100644 static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c -index 19598bd38939d..9fa4794936426 100644 +index 19598bd38939d..783763f6845f4 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1502,6 +1502,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, @@ -162003,6 +194376,14 @@ index 19598bd38939d..9fa4794936426 100644 (atomic_read(&bitmap->behind_writes) < mddev->bitmap_info.max_write_behind) && !waitqueue_active(&bitmap->behind_wait)) { +@@ -3140,6 +3141,7 @@ static int raid1_run(struct mddev *mddev) + * RAID1 needs at least one disk in active + */ + if (conf->raid_disks - mddev->degraded < 1) { ++ md_unregister_thread(&conf->thread); + ret = -EINVAL; + goto abort; + } diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index aa2636582841e..c4c1a3a7d7abc 100644 --- a/drivers/md/raid10.c @@ -162598,6 +194979,32 @@ index 8c613aa649c6f..0eb90cc0ffb0f 100644 } static int cec_pin_adap_transmit(struct cec_adapter *adap, u8 attempts, +diff --git a/drivers/media/cec/platform/cros-ec/cros-ec-cec.c b/drivers/media/cec/platform/cros-ec/cros-ec-cec.c +index 2d95e16cd2489..f66699d5dc66e 100644 +--- a/drivers/media/cec/platform/cros-ec/cros-ec-cec.c ++++ b/drivers/media/cec/platform/cros-ec/cros-ec-cec.c +@@ -44,6 +44,8 @@ static void handle_cec_message(struct cros_ec_cec *cros_ec_cec) + uint8_t *cec_message = cros_ec->event_data.data.cec_message; + unsigned int len = cros_ec->event_size; + ++ if (len > CEC_MAX_MSG_SIZE) ++ len = CEC_MAX_MSG_SIZE; + cros_ec_cec->rx_msg.len = len; + memcpy(cros_ec_cec->rx_msg.msg, cec_message, len); + +diff --git a/drivers/media/cec/platform/s5p/s5p_cec.c b/drivers/media/cec/platform/s5p/s5p_cec.c +index 028a09a7531ef..102f1af01000a 100644 +--- a/drivers/media/cec/platform/s5p/s5p_cec.c ++++ b/drivers/media/cec/platform/s5p/s5p_cec.c +@@ -115,6 +115,8 @@ static irqreturn_t s5p_cec_irq_handler(int irq, void *priv) + dev_dbg(cec->dev, "Buffer overrun (worker did not process previous message)\n"); + cec->rx = STATE_BUSY; + cec->msg.len = status >> 24; ++ if (cec->msg.len > CEC_MAX_MSG_SIZE) ++ cec->msg.len = CEC_MAX_MSG_SIZE; + cec->msg.rx_status = CEC_RX_STATUS_OK; + s5p_cec_get_rx_buf(cec, cec->msg.len, + cec->msg.msg); diff --git a/drivers/media/common/saa7146/saa7146_fops.c b/drivers/media/common/saa7146/saa7146_fops.c index baf5772c52a96..be32159777142 100644 --- a/drivers/media/common/saa7146/saa7146_fops.c @@ -162611,8 +195018,80 @@ index baf5772c52a96..be32159777142 100644 } saa7146_video_uops.init(dev,vv); +diff --git a/drivers/media/common/videobuf2/frame_vector.c b/drivers/media/common/videobuf2/frame_vector.c +index ce879f6f8f829..144027035892a 100644 +--- a/drivers/media/common/videobuf2/frame_vector.c ++++ b/drivers/media/common/videobuf2/frame_vector.c +@@ -35,10 +35,7 @@ + int get_vaddr_frames(unsigned long start, unsigned int nr_frames, + struct frame_vector *vec) + { +- struct mm_struct *mm = current->mm; +- struct vm_area_struct *vma; +- int ret = 0; +- int err; ++ int ret; + + if (nr_frames == 0) + return 0; +@@ -51,45 +48,17 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, + ret = pin_user_pages_fast(start, nr_frames, + FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM, + (struct page **)(vec->ptrs)); +- if (ret > 0) { +- vec->got_ref = true; +- vec->is_pfns = false; +- goto out_unlocked; +- } ++ vec->got_ref = true; ++ vec->is_pfns = false; ++ vec->nr_frames = ret; + +- mmap_read_lock(mm); +- vec->got_ref = false; +- vec->is_pfns = true; +- ret = 0; +- do { +- unsigned long *nums = frame_vector_pfns(vec); +- +- vma = vma_lookup(mm, start); +- if (!vma) +- break; +- +- while (ret < nr_frames && start + PAGE_SIZE <= vma->vm_end) { +- err = follow_pfn(vma, start, &nums[ret]); +- if (err) { +- if (ret == 0) +- ret = err; +- goto out; +- } +- start += PAGE_SIZE; +- ret++; +- } +- /* Bail out if VMA doesn't completely cover the tail page. */ +- if (start < vma->vm_end) +- break; +- } while (ret < nr_frames); +-out: +- mmap_read_unlock(mm); +-out_unlocked: +- if (!ret) +- ret = -EFAULT; +- if (ret > 0) +- vec->nr_frames = ret; +- return ret; ++ if (likely(ret > 0)) ++ return ret; ++ ++ /* This used to (racily) return non-refcounted pfns. Let people know */ ++ WARN_ONCE(1, "get_vaddr_frames() cannot follow VM_IO mapping"); ++ vec->nr_frames = 0; ++ return ret ? ret : -EFAULT; + } + EXPORT_SYMBOL(get_vaddr_frames); + diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c -index 508ac295eb06e..033b0c83272fe 100644 +index 508ac295eb06e..30c8497f7c118 100644 --- a/drivers/media/common/videobuf2/videobuf2-core.c +++ b/drivers/media/common/videobuf2/videobuf2-core.c @@ -68,13 +68,13 @@ module_param(debug, int, 0644); @@ -162657,7 +195136,150 @@ index 508ac295eb06e..033b0c83272fe 100644 if (IS_ERR_OR_NULL(mem_priv)) { if (mem_priv) ret = PTR_ERR(mem_priv); -@@ -975,7 +976,7 @@ void *vb2_plane_vaddr(struct vb2_buffer *vb, unsigned int plane_no) +@@ -787,7 +788,13 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, + num_buffers = max_t(unsigned int, *count, q->min_buffers_needed); + num_buffers = min_t(unsigned int, num_buffers, VB2_MAX_FRAME); + memset(q->alloc_devs, 0, sizeof(q->alloc_devs)); ++ /* ++ * Set this now to ensure that drivers see the correct q->memory value ++ * in the queue_setup op. ++ */ ++ mutex_lock(&q->mmap_lock); + q->memory = memory; ++ mutex_unlock(&q->mmap_lock); + + /* + * Ask the driver how many buffers and planes per buffer it requires. +@@ -796,22 +803,27 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, + ret = call_qop(q, queue_setup, q, &num_buffers, &num_planes, + plane_sizes, q->alloc_devs); + if (ret) +- return ret; ++ goto error; + + /* Check that driver has set sane values */ +- if (WARN_ON(!num_planes)) +- return -EINVAL; ++ if (WARN_ON(!num_planes)) { ++ ret = -EINVAL; ++ goto error; ++ } + + for (i = 0; i < num_planes; i++) +- if (WARN_ON(!plane_sizes[i])) +- return -EINVAL; ++ if (WARN_ON(!plane_sizes[i])) { ++ ret = -EINVAL; ++ goto error; ++ } + + /* Finally, allocate buffers and video memory */ + allocated_buffers = + __vb2_queue_alloc(q, memory, num_buffers, num_planes, plane_sizes); + if (allocated_buffers == 0) { + dprintk(q, 1, "memory allocation failed\n"); +- return -ENOMEM; ++ ret = -ENOMEM; ++ goto error; + } + + /* +@@ -852,7 +864,8 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, + if (ret < 0) { + /* + * Note: __vb2_queue_free() will subtract 'allocated_buffers' +- * from q->num_buffers. ++ * from q->num_buffers and it will reset q->memory to ++ * VB2_MEMORY_UNKNOWN. + */ + __vb2_queue_free(q, allocated_buffers); + mutex_unlock(&q->mmap_lock); +@@ -868,6 +881,12 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, + q->waiting_for_buffers = !q->is_output; + + return 0; ++ ++error: ++ mutex_lock(&q->mmap_lock); ++ q->memory = VB2_MEMORY_UNKNOWN; ++ mutex_unlock(&q->mmap_lock); ++ return ret; + } + EXPORT_SYMBOL_GPL(vb2_core_reqbufs); + +@@ -878,6 +897,7 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, + { + unsigned int num_planes = 0, num_buffers, allocated_buffers; + unsigned plane_sizes[VB2_MAX_PLANES] = { }; ++ bool no_previous_buffers = !q->num_buffers; + int ret; + + if (q->num_buffers == VB2_MAX_FRAME) { +@@ -885,13 +905,19 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, + return -ENOBUFS; + } + +- if (!q->num_buffers) { ++ if (no_previous_buffers) { + if (q->waiting_in_dqbuf && *count) { + dprintk(q, 1, "another dup()ped fd is waiting for a buffer\n"); + return -EBUSY; + } + memset(q->alloc_devs, 0, sizeof(q->alloc_devs)); ++ /* ++ * Set this now to ensure that drivers see the correct q->memory ++ * value in the queue_setup op. ++ */ ++ mutex_lock(&q->mmap_lock); + q->memory = memory; ++ mutex_unlock(&q->mmap_lock); + q->waiting_for_buffers = !q->is_output; + } else { + if (q->memory != memory) { +@@ -914,14 +940,15 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, + ret = call_qop(q, queue_setup, q, &num_buffers, + &num_planes, plane_sizes, q->alloc_devs); + if (ret) +- return ret; ++ goto error; + + /* Finally, allocate buffers and video memory */ + allocated_buffers = __vb2_queue_alloc(q, memory, num_buffers, + num_planes, plane_sizes); + if (allocated_buffers == 0) { + dprintk(q, 1, "memory allocation failed\n"); +- return -ENOMEM; ++ ret = -ENOMEM; ++ goto error; + } + + /* +@@ -952,7 +979,8 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, + if (ret < 0) { + /* + * Note: __vb2_queue_free() will subtract 'allocated_buffers' +- * from q->num_buffers. ++ * from q->num_buffers and it will reset q->memory to ++ * VB2_MEMORY_UNKNOWN. + */ + __vb2_queue_free(q, allocated_buffers); + mutex_unlock(&q->mmap_lock); +@@ -967,6 +995,14 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, + *count = allocated_buffers; + + return 0; ++ ++error: ++ if (no_previous_buffers) { ++ mutex_lock(&q->mmap_lock); ++ q->memory = VB2_MEMORY_UNKNOWN; ++ mutex_unlock(&q->mmap_lock); ++ } ++ return ret; + } + EXPORT_SYMBOL_GPL(vb2_core_create_bufs); + +@@ -975,7 +1011,7 @@ void *vb2_plane_vaddr(struct vb2_buffer *vb, unsigned int plane_no) if (plane_no >= vb->num_planes || !vb->planes[plane_no].mem_priv) return NULL; @@ -162666,7 +195288,7 @@ index 508ac295eb06e..033b0c83272fe 100644 } EXPORT_SYMBOL_GPL(vb2_plane_vaddr); -@@ -985,7 +986,7 @@ void *vb2_plane_cookie(struct vb2_buffer *vb, unsigned int plane_no) +@@ -985,7 +1021,7 @@ void *vb2_plane_cookie(struct vb2_buffer *vb, unsigned int plane_no) if (plane_no >= vb->num_planes || !vb->planes[plane_no].mem_priv) return NULL; @@ -162675,7 +195297,7 @@ index 508ac295eb06e..033b0c83272fe 100644 } EXPORT_SYMBOL_GPL(vb2_plane_cookie); -@@ -1125,10 +1126,11 @@ static int __prepare_userptr(struct vb2_buffer *vb) +@@ -1125,10 +1161,11 @@ static int __prepare_userptr(struct vb2_buffer *vb) vb->planes[plane].data_offset = 0; /* Acquire each plane's memory */ @@ -162691,7 +195313,7 @@ index 508ac295eb06e..033b0c83272fe 100644 if (IS_ERR(mem_priv)) { dprintk(q, 1, "failed acquiring userspace memory for plane %d\n", plane); -@@ -1249,9 +1251,11 @@ static int __prepare_dmabuf(struct vb2_buffer *vb) +@@ -1249,9 +1286,11 @@ static int __prepare_dmabuf(struct vb2_buffer *vb) vb->planes[plane].data_offset = 0; /* Acquire each plane's memory */ @@ -162706,7 +195328,30 @@ index 508ac295eb06e..033b0c83272fe 100644 if (IS_ERR(mem_priv)) { dprintk(q, 1, "failed to attach dmabuf\n"); ret = PTR_ERR(mem_priv); -@@ -2187,8 +2191,10 @@ int vb2_core_expbuf(struct vb2_queue *q, int *fd, unsigned int type, +@@ -2120,6 +2159,22 @@ static int __find_plane_by_offset(struct vb2_queue *q, unsigned long off, + struct vb2_buffer *vb; + unsigned int buffer, plane; + ++ /* ++ * Sanity checks to ensure the lock is held, MEMORY_MMAP is ++ * used and fileio isn't active. ++ */ ++ lockdep_assert_held(&q->mmap_lock); ++ ++ if (q->memory != VB2_MEMORY_MMAP) { ++ dprintk(q, 1, "queue is not currently set up for mmap\n"); ++ return -EINVAL; ++ } ++ ++ if (vb2_fileio_is_active(q)) { ++ dprintk(q, 1, "file io in progress\n"); ++ return -EBUSY; ++ } ++ + /* + * Go over all buffers and their planes, comparing the given offset + * with an offset assigned to each plane. If a match is found, +@@ -2187,8 +2242,10 @@ int vb2_core_expbuf(struct vb2_queue *q, int *fd, unsigned int type, vb_plane = &vb->planes[plane]; @@ -162719,6 +195364,67 @@ index 508ac295eb06e..033b0c83272fe 100644 if (IS_ERR_OR_NULL(dbuf)) { dprintk(q, 1, "failed to export buffer %d, plane %d\n", index, plane); +@@ -2219,11 +2276,6 @@ int vb2_mmap(struct vb2_queue *q, struct vm_area_struct *vma) + int ret; + unsigned long length; + +- if (q->memory != VB2_MEMORY_MMAP) { +- dprintk(q, 1, "queue is not currently set up for mmap\n"); +- return -EINVAL; +- } +- + /* + * Check memory area access mode. + */ +@@ -2245,14 +2297,9 @@ int vb2_mmap(struct vb2_queue *q, struct vm_area_struct *vma) + + mutex_lock(&q->mmap_lock); + +- if (vb2_fileio_is_active(q)) { +- dprintk(q, 1, "mmap: file io in progress\n"); +- ret = -EBUSY; +- goto unlock; +- } +- + /* +- * Find the plane corresponding to the offset passed by userspace. ++ * Find the plane corresponding to the offset passed by userspace. This ++ * will return an error if not MEMORY_MMAP or file I/O is in progress. + */ + ret = __find_plane_by_offset(q, off, &buffer, &plane); + if (ret) +@@ -2305,22 +2352,25 @@ unsigned long vb2_get_unmapped_area(struct vb2_queue *q, + void *vaddr; + int ret; + +- if (q->memory != VB2_MEMORY_MMAP) { +- dprintk(q, 1, "queue is not currently set up for mmap\n"); +- return -EINVAL; +- } ++ mutex_lock(&q->mmap_lock); + + /* +- * Find the plane corresponding to the offset passed by userspace. ++ * Find the plane corresponding to the offset passed by userspace. This ++ * will return an error if not MEMORY_MMAP or file I/O is in progress. + */ + ret = __find_plane_by_offset(q, off, &buffer, &plane); + if (ret) +- return ret; ++ goto unlock; + + vb = q->bufs[buffer]; + + vaddr = vb2_plane_vaddr(vb, plane); ++ mutex_unlock(&q->mmap_lock); + return vaddr ? (unsigned long)vaddr : -EINVAL; ++ ++unlock: ++ mutex_unlock(&q->mmap_lock); ++ return ret; + } + EXPORT_SYMBOL_GPL(vb2_get_unmapped_area); + #endif diff --git a/drivers/media/common/videobuf2/videobuf2-dma-contig.c b/drivers/media/common/videobuf2/videobuf2-dma-contig.c index a7f61ba854405..f8c65b0401054 100644 --- a/drivers/media/common/videobuf2/videobuf2-dma-contig.c @@ -163085,10 +195791,22 @@ index 83f95258ec8c6..ef36abd912dcc 100644 return buf; diff --git a/drivers/media/dvb-core/dmxdev.c b/drivers/media/dvb-core/dmxdev.c -index 5d5a48475a54f..01f288fa37e0e 100644 +index 5d5a48475a54f..8abf7f44d96bc 100644 --- a/drivers/media/dvb-core/dmxdev.c +++ b/drivers/media/dvb-core/dmxdev.c -@@ -1413,7 +1413,7 @@ static const struct dvb_device dvbdev_dvr = { +@@ -800,6 +800,11 @@ static int dvb_demux_open(struct inode *inode, struct file *file) + if (mutex_lock_interruptible(&dmxdev->mutex)) + return -ERESTARTSYS; + ++ if (dmxdev->exit) { ++ mutex_unlock(&dmxdev->mutex); ++ return -ENODEV; ++ } ++ + for (i = 0; i < dmxdev->filternum; i++) + if (dmxdev->filter[i].state == DMXDEV_STATE_FREE) + break; +@@ -1413,7 +1418,7 @@ static const struct dvb_device dvbdev_dvr = { }; int dvb_dmxdev_init(struct dmxdev *dmxdev, struct dvb_adapter *dvb_adapter) { @@ -163097,7 +195815,7 @@ index 5d5a48475a54f..01f288fa37e0e 100644 if (dmxdev->demux->open(dmxdev->demux) < 0) return -EUSERS; -@@ -1432,14 +1432,26 @@ int dvb_dmxdev_init(struct dmxdev *dmxdev, struct dvb_adapter *dvb_adapter) +@@ -1432,21 +1437,36 @@ int dvb_dmxdev_init(struct dmxdev *dmxdev, struct dvb_adapter *dvb_adapter) DMXDEV_STATE_FREE); } @@ -163126,6 +195844,65 @@ index 5d5a48475a54f..01f288fa37e0e 100644 } EXPORT_SYMBOL(dvb_dmxdev_init); + + void dvb_dmxdev_release(struct dmxdev *dmxdev) + { ++ mutex_lock(&dmxdev->mutex); + dmxdev->exit = 1; ++ mutex_unlock(&dmxdev->mutex); ++ + if (dmxdev->dvbdev->users > 1) { + wait_event(dmxdev->dvbdev->wait_queue, + dmxdev->dvbdev->users == 1); +diff --git a/drivers/media/dvb-core/dvb_ca_en50221.c b/drivers/media/dvb-core/dvb_ca_en50221.c +index 15a08d8c69ef8..c2d2792227f86 100644 +--- a/drivers/media/dvb-core/dvb_ca_en50221.c ++++ b/drivers/media/dvb-core/dvb_ca_en50221.c +@@ -157,7 +157,7 @@ static void dvb_ca_private_free(struct dvb_ca_private *ca) + { + unsigned int i; + +- dvb_free_device(ca->dvbdev); ++ dvb_device_put(ca->dvbdev); + for (i = 0; i < ca->slot_count; i++) + vfree(ca->slot_info[i].rx_buffer.data); + +diff --git a/drivers/media/dvb-core/dvb_frontend.c b/drivers/media/dvb-core/dvb_frontend.c +index 258637d762d64..09facc78d88aa 100644 +--- a/drivers/media/dvb-core/dvb_frontend.c ++++ b/drivers/media/dvb-core/dvb_frontend.c +@@ -136,7 +136,7 @@ static void __dvb_frontend_free(struct dvb_frontend *fe) + struct dvb_frontend_private *fepriv = fe->frontend_priv; + + if (fepriv) +- dvb_free_device(fepriv->dvbdev); ++ dvb_device_put(fepriv->dvbdev); + + dvb_frontend_invoke_release(fe, fe->ops.release); + +@@ -2985,6 +2985,7 @@ int dvb_register_frontend(struct dvb_adapter *dvb, + .name = fe->ops.info.name, + #endif + }; ++ int ret; + + dev_dbg(dvb->device, "%s:\n", __func__); + +@@ -3018,8 +3019,13 @@ int dvb_register_frontend(struct dvb_adapter *dvb, + "DVB: registering adapter %i frontend %i (%s)...\n", + fe->dvb->num, fe->id, fe->ops.info.name); + +- dvb_register_device(fe->dvb, &fepriv->dvbdev, &dvbdev_template, ++ ret = dvb_register_device(fe->dvb, &fepriv->dvbdev, &dvbdev_template, + fe, DVB_DEVICE_FRONTEND, 0); ++ if (ret) { ++ dvb_frontend_put(fe); ++ mutex_unlock(&frontend_mutex); ++ return ret; ++ } + + /* + * Initialize the cache to the proper values according with the diff --git a/drivers/media/dvb-core/dvb_vb2.c b/drivers/media/dvb-core/dvb_vb2.c index 6974f17315294..1331f2c2237e6 100644 --- a/drivers/media/dvb-core/dvb_vb2.c @@ -163157,6 +195934,114 @@ index 6974f17315294..1331f2c2237e6 100644 ret = vb2_core_qbuf(&ctx->vb_q, b->index, b, NULL); if (ret) { dprintk(1, "[%s] index=%d errno=%d\n", ctx->name, +diff --git a/drivers/media/dvb-core/dvbdev.c b/drivers/media/dvb-core/dvbdev.c +index 795d9bfaba5cf..6e2b7e97da176 100644 +--- a/drivers/media/dvb-core/dvbdev.c ++++ b/drivers/media/dvb-core/dvbdev.c +@@ -107,7 +107,7 @@ static int dvb_device_open(struct inode *inode, struct file *file) + new_fops = fops_get(dvbdev->fops); + if (!new_fops) + goto fail; +- file->private_data = dvbdev; ++ file->private_data = dvb_device_get(dvbdev); + replace_fops(file, new_fops); + if (file->f_op->open) + err = file->f_op->open(inode, file); +@@ -171,6 +171,9 @@ int dvb_generic_release(struct inode *inode, struct file *file) + } + + dvbdev->users++; ++ ++ dvb_device_put(dvbdev); ++ + return 0; + } + EXPORT_SYMBOL(dvb_generic_release); +@@ -342,6 +345,7 @@ static int dvb_create_media_entity(struct dvb_device *dvbdev, + GFP_KERNEL); + if (!dvbdev->pads) { + kfree(dvbdev->entity); ++ dvbdev->entity = NULL; + return -ENOMEM; + } + } +@@ -488,6 +492,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, + } + + memcpy(dvbdev, template, sizeof(struct dvb_device)); ++ kref_init(&dvbdev->ref); + dvbdev->type = type; + dvbdev->id = id; + dvbdev->adapter = adap; +@@ -518,7 +523,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, + #endif + + dvbdev->minor = minor; +- dvb_minors[minor] = dvbdev; ++ dvb_minors[minor] = dvb_device_get(dvbdev); + up_write(&minor_rwsem); + + ret = dvb_register_media_device(dvbdev, type, minor, demux_sink_pads); +@@ -563,6 +568,7 @@ void dvb_remove_device(struct dvb_device *dvbdev) + + down_write(&minor_rwsem); + dvb_minors[dvbdev->minor] = NULL; ++ dvb_device_put(dvbdev); + up_write(&minor_rwsem); + + dvb_media_device_free(dvbdev); +@@ -574,21 +580,34 @@ void dvb_remove_device(struct dvb_device *dvbdev) + EXPORT_SYMBOL(dvb_remove_device); + + +-void dvb_free_device(struct dvb_device *dvbdev) ++static void dvb_free_device(struct kref *ref) + { +- if (!dvbdev) +- return; ++ struct dvb_device *dvbdev = container_of(ref, struct dvb_device, ref); + + kfree (dvbdev->fops); + kfree (dvbdev); + } +-EXPORT_SYMBOL(dvb_free_device); ++ ++ ++struct dvb_device *dvb_device_get(struct dvb_device *dvbdev) ++{ ++ kref_get(&dvbdev->ref); ++ return dvbdev; ++} ++EXPORT_SYMBOL(dvb_device_get); ++ ++ ++void dvb_device_put(struct dvb_device *dvbdev) ++{ ++ if (dvbdev) ++ kref_put(&dvbdev->ref, dvb_free_device); ++} + + + void dvb_unregister_device(struct dvb_device *dvbdev) + { + dvb_remove_device(dvbdev); +- dvb_free_device(dvbdev); ++ dvb_device_put(dvbdev); + } + EXPORT_SYMBOL(dvb_unregister_device); + +diff --git a/drivers/media/dvb-frontends/bcm3510.c b/drivers/media/dvb-frontends/bcm3510.c +index da0ff7b44da41..68b92b4419cff 100644 +--- a/drivers/media/dvb-frontends/bcm3510.c ++++ b/drivers/media/dvb-frontends/bcm3510.c +@@ -649,6 +649,7 @@ static int bcm3510_download_firmware(struct dvb_frontend* fe) + deb_info("firmware chunk, addr: 0x%04x, len: 0x%04x, total length: 0x%04zx\n",addr,len,fw->size); + if ((ret = bcm3510_write_ram(st,addr,&b[i+4],len)) < 0) { + err("firmware download failed: %d\n",ret); ++ release_firmware(fw); + return ret; + } + i += 4 + len; diff --git a/drivers/media/dvb-frontends/dib8000.c b/drivers/media/dvb-frontends/dib8000.c index bb02354a48b81..d67f2dd997d06 100644 --- a/drivers/media/dvb-frontends/dib8000.c @@ -163173,6 +196058,19 @@ index bb02354a48b81..d67f2dd997d06 100644 dibx000_init_i2c_master(&state->i2c_master, DIB8000, state->i2c.adap, state->i2c.addr); +diff --git a/drivers/media/dvb-frontends/drxk_hard.c b/drivers/media/dvb-frontends/drxk_hard.c +index d7fc2595f15b8..efe92eef67db6 100644 +--- a/drivers/media/dvb-frontends/drxk_hard.c ++++ b/drivers/media/dvb-frontends/drxk_hard.c +@@ -6673,7 +6673,7 @@ static int drxk_read_snr(struct dvb_frontend *fe, u16 *snr) + static int drxk_read_ucblocks(struct dvb_frontend *fe, u32 *ucblocks) + { + struct drxk_state *state = fe->demodulator_priv; +- u16 err; ++ u16 err = 0; + + dprintk(1, "\n"); + diff --git a/drivers/media/dvb-frontends/mn88443x.c b/drivers/media/dvb-frontends/mn88443x.c index e4528784f8477..fff212c0bf3b5 100644 --- a/drivers/media/dvb-frontends/mn88443x.c @@ -163219,6 +196117,22 @@ index e4528784f8477..fff212c0bf3b5 100644 mn88443x_s_sleep(chip); mn88443x_t_sleep(chip); +diff --git a/drivers/media/dvb-frontends/stv0288.c b/drivers/media/dvb-frontends/stv0288.c +index 3d54a0ec86afd..3ae1f3a2f1420 100644 +--- a/drivers/media/dvb-frontends/stv0288.c ++++ b/drivers/media/dvb-frontends/stv0288.c +@@ -440,9 +440,8 @@ static int stv0288_set_frontend(struct dvb_frontend *fe) + struct stv0288_state *state = fe->demodulator_priv; + struct dtv_frontend_properties *c = &fe->dtv_property_cache; + +- char tm; +- unsigned char tda[3]; +- u8 reg, time_out = 0; ++ u8 tda[3], reg, time_out = 0; ++ s8 tm; + + dprintk("%s : FE_SET_FRONTEND\n", __func__); + diff --git a/drivers/media/firewire/firedtv-avc.c b/drivers/media/firewire/firedtv-avc.c index 2bf9467b917d1..71991f8638e6b 100644 --- a/drivers/media/firewire/firedtv-avc.c @@ -163281,6 +196195,49 @@ index 08feb3e8c1bf6..6157e73eef24e 100644 help Support for the Texas Instruments THS8200 video encoder. +diff --git a/drivers/media/i2c/ad5820.c b/drivers/media/i2c/ad5820.c +index 2958a46944614..07639ecc85aa8 100644 +--- a/drivers/media/i2c/ad5820.c ++++ b/drivers/media/i2c/ad5820.c +@@ -327,18 +327,18 @@ static int ad5820_probe(struct i2c_client *client, + + ret = media_entity_pads_init(&coil->subdev.entity, 0, NULL); + if (ret < 0) +- goto cleanup2; ++ goto clean_mutex; + + ret = v4l2_async_register_subdev(&coil->subdev); + if (ret < 0) +- goto cleanup; ++ goto clean_entity; + + return ret; + +-cleanup2: +- mutex_destroy(&coil->power_lock); +-cleanup: ++clean_entity: + media_entity_cleanup(&coil->subdev.entity); ++clean_mutex: ++ mutex_destroy(&coil->power_lock); + return ret; + } + +diff --git a/drivers/media/i2c/adv748x/adv748x-afe.c b/drivers/media/i2c/adv748x/adv748x-afe.c +index 02eabe10ab970..00095c7762c24 100644 +--- a/drivers/media/i2c/adv748x/adv748x-afe.c ++++ b/drivers/media/i2c/adv748x/adv748x-afe.c +@@ -521,6 +521,10 @@ int adv748x_afe_init(struct adv748x_afe *afe) + } + } + ++ adv748x_afe_s_input(afe, afe->input); ++ ++ adv_dbg(state, "AFE Default input set to %d\n", afe->input); ++ + /* Entity pads and sinks are 0-indexed to match the pads */ + for (i = ADV748X_AFE_SINK_AIN0; i <= ADV748X_AFE_SINK_AIN7; i++) + afe->pads[i].flags = MEDIA_PAD_FL_SINK; diff --git a/drivers/media/i2c/adv7511-v4l2.c b/drivers/media/i2c/adv7511-v4l2.c index 41f4e749a859c..2217004264e4b 100644 --- a/drivers/media/i2c/adv7511-v4l2.c @@ -164631,6 +197588,40 @@ index 73fc901ecf3db..bf0b9b0914cd5 100644 if (mxb_probe(dev)) { saa7146_vv_release(dev); return -1; +diff --git a/drivers/media/pci/saa7164/saa7164-core.c b/drivers/media/pci/saa7164/saa7164-core.c +index 7973ae42873a6..c10997e2271d2 100644 +--- a/drivers/media/pci/saa7164/saa7164-core.c ++++ b/drivers/media/pci/saa7164/saa7164-core.c +@@ -1259,7 +1259,7 @@ static int saa7164_initdev(struct pci_dev *pci_dev, + + if (saa7164_dev_setup(dev) < 0) { + err = -EINVAL; +- goto fail_free; ++ goto fail_dev; + } + + /* print pci info */ +@@ -1427,6 +1427,8 @@ fail_fw: + + fail_irq: + saa7164_dev_unregister(dev); ++fail_dev: ++ pci_disable_device(pci_dev); + fail_free: + v4l2_device_unregister(&dev->v4l2_dev); + kfree(dev); +diff --git a/drivers/media/pci/solo6x10/solo6x10-core.c b/drivers/media/pci/solo6x10/solo6x10-core.c +index 4a546eeefe38f..6d87fbb0ee04a 100644 +--- a/drivers/media/pci/solo6x10/solo6x10-core.c ++++ b/drivers/media/pci/solo6x10/solo6x10-core.c +@@ -420,6 +420,7 @@ static int solo_sysfs_init(struct solo_dev *solo_dev) + solo_dev->nr_chans); + + if (device_register(dev)) { ++ put_device(dev); + dev->parent = NULL; + return -ENOMEM; + } diff --git a/drivers/media/pci/tw686x/tw686x-core.c b/drivers/media/pci/tw686x/tw686x-core.c index 6676e069b515d..384d38754a4b1 100644 --- a/drivers/media/pci/tw686x/tw686x-core.c @@ -165125,6 +198116,47 @@ index f2785131ff569..a4defc30cf412 100644 static struct platform_driver microchip_xisc_driver = { .probe = microchip_xisc_probe, +diff --git a/drivers/media/platform/coda/coda-bit.c b/drivers/media/platform/coda/coda-bit.c +index c484c008ab027..582a6c581f3c3 100644 +--- a/drivers/media/platform/coda/coda-bit.c ++++ b/drivers/media/platform/coda/coda-bit.c +@@ -852,7 +852,7 @@ static void coda_setup_iram(struct coda_ctx *ctx) + /* Only H.264BP and H.263P3 are considered */ + iram_info->buf_dbk_y_use = coda_iram_alloc(iram_info, w64); + iram_info->buf_dbk_c_use = coda_iram_alloc(iram_info, w64); +- if (!iram_info->buf_dbk_c_use) ++ if (!iram_info->buf_dbk_y_use || !iram_info->buf_dbk_c_use) + goto out; + iram_info->axi_sram_use |= dbk_bits; + +@@ -876,7 +876,7 @@ static void coda_setup_iram(struct coda_ctx *ctx) + + iram_info->buf_dbk_y_use = coda_iram_alloc(iram_info, w128); + iram_info->buf_dbk_c_use = coda_iram_alloc(iram_info, w128); +- if (!iram_info->buf_dbk_c_use) ++ if (!iram_info->buf_dbk_y_use || !iram_info->buf_dbk_c_use) + goto out; + iram_info->axi_sram_use |= dbk_bits; + +@@ -1082,10 +1082,16 @@ static int coda_start_encoding(struct coda_ctx *ctx) + } + + if (dst_fourcc == V4L2_PIX_FMT_JPEG) { +- if (!ctx->params.jpeg_qmat_tab[0]) ++ if (!ctx->params.jpeg_qmat_tab[0]) { + ctx->params.jpeg_qmat_tab[0] = kmalloc(64, GFP_KERNEL); +- if (!ctx->params.jpeg_qmat_tab[1]) ++ if (!ctx->params.jpeg_qmat_tab[0]) ++ return -ENOMEM; ++ } ++ if (!ctx->params.jpeg_qmat_tab[1]) { + ctx->params.jpeg_qmat_tab[1] = kmalloc(64, GFP_KERNEL); ++ if (!ctx->params.jpeg_qmat_tab[1]) ++ return -ENOMEM; ++ } + coda_set_jpeg_compression_quality(ctx, ctx->params.jpeg_quality); + } + diff --git a/drivers/media/platform/coda/coda-common.c b/drivers/media/platform/coda/coda-common.c index 0e312b0842d7f..b4b85a19f7d64 100644 --- a/drivers/media/platform/coda/coda-common.c @@ -165229,10 +198261,29 @@ index 0e312b0842d7f..b4b85a19f7d64 100644 (1 << V4L2_MPEG_VIDEO_H264_PROFILE_HIGH)), V4L2_MPEG_VIDEO_H264_PROFILE_HIGH); diff --git a/drivers/media/platform/coda/coda-jpeg.c b/drivers/media/platform/coda/coda-jpeg.c -index b11cfbe166dd3..a72f4655e5ad5 100644 +index b11cfbe166dd3..b7bf529f18f77 100644 --- a/drivers/media/platform/coda/coda-jpeg.c +++ b/drivers/media/platform/coda/coda-jpeg.c -@@ -1127,7 +1127,8 @@ static int coda9_jpeg_prepare_encode(struct coda_ctx *ctx) +@@ -1052,10 +1052,16 @@ static int coda9_jpeg_start_encoding(struct coda_ctx *ctx) + v4l2_err(&dev->v4l2_dev, "error loading Huffman tables\n"); + return ret; + } +- if (!ctx->params.jpeg_qmat_tab[0]) ++ if (!ctx->params.jpeg_qmat_tab[0]) { + ctx->params.jpeg_qmat_tab[0] = kmalloc(64, GFP_KERNEL); +- if (!ctx->params.jpeg_qmat_tab[1]) ++ if (!ctx->params.jpeg_qmat_tab[0]) ++ return -ENOMEM; ++ } ++ if (!ctx->params.jpeg_qmat_tab[1]) { + ctx->params.jpeg_qmat_tab[1] = kmalloc(64, GFP_KERNEL); ++ if (!ctx->params.jpeg_qmat_tab[1]) ++ return -ENOMEM; ++ } + coda_set_jpeg_compression_quality(ctx, ctx->params.jpeg_quality); + + return 0; +@@ -1127,7 +1133,8 @@ static int coda9_jpeg_prepare_encode(struct coda_ctx *ctx) coda_write(dev, 0, CODA9_REG_JPEG_GBU_BT_PTR); coda_write(dev, 0, CODA9_REG_JPEG_GBU_WD_PTR); coda_write(dev, 0, CODA9_REG_JPEG_GBU_BBSR); @@ -165242,7 +198293,7 @@ index b11cfbe166dd3..a72f4655e5ad5 100644 coda_write(dev, 0, CODA9_REG_JPEG_GBU_CTRL); coda_write(dev, 0, CODA9_REG_JPEG_GBU_FF_RPTR); coda_write(dev, 127, CODA9_REG_JPEG_GBU_BBER); -@@ -1257,6 +1258,23 @@ static void coda9_jpeg_finish_encode(struct coda_ctx *ctx) +@@ -1257,6 +1264,23 @@ static void coda9_jpeg_finish_encode(struct coda_ctx *ctx) coda_hw_reset(ctx); } @@ -165266,7 +198317,7 @@ index b11cfbe166dd3..a72f4655e5ad5 100644 static void coda9_jpeg_release(struct coda_ctx *ctx) { int i; -@@ -1276,6 +1294,7 @@ const struct coda_context_ops coda9_jpeg_encode_ops = { +@@ -1276,6 +1300,7 @@ const struct coda_context_ops coda9_jpeg_encode_ops = { .start_streaming = coda9_jpeg_start_encoding, .prepare_run = coda9_jpeg_prepare_encode, .finish_run = coda9_jpeg_finish_encode, @@ -165448,6 +198499,19 @@ index f1ce10828b8e5..8ffc01c606d0c 100644 return 0; } +diff --git a/drivers/media/platform/exynos4-is/fimc-core.c b/drivers/media/platform/exynos4-is/fimc-core.c +index bfdee771cef9d..4afe0b9b17730 100644 +--- a/drivers/media/platform/exynos4-is/fimc-core.c ++++ b/drivers/media/platform/exynos4-is/fimc-core.c +@@ -1174,7 +1174,7 @@ int __init fimc_register_driver(void) + return platform_driver_register(&fimc_driver); + } + +-void __exit fimc_unregister_driver(void) ++void fimc_unregister_driver(void) + { + platform_driver_unregister(&fimc_driver); + } diff --git a/drivers/media/platform/exynos4-is/fimc-is.c b/drivers/media/platform/exynos4-is/fimc-is.c index e55e411038f48..a7704ff069d6c 100644 --- a/drivers/media/platform/exynos4-is/fimc-is.c @@ -165500,22 +198564,63 @@ index edcb3a5e3cb90..2dd4ddbc748a1 100644 enum v4l2_buf_type type) { } +diff --git a/drivers/media/platform/exynos4-is/media-dev.c b/drivers/media/platform/exynos4-is/media-dev.c +index fa648721eaab9..b19d7c8ddc06b 100644 +--- a/drivers/media/platform/exynos4-is/media-dev.c ++++ b/drivers/media/platform/exynos4-is/media-dev.c +@@ -1380,9 +1380,7 @@ static int subdev_notifier_bound(struct v4l2_async_notifier *notifier, + + /* Find platform data for this sensor subdev */ + for (i = 0; i < ARRAY_SIZE(fmd->sensor); i++) +- if (fmd->sensor[i].asd && +- fmd->sensor[i].asd->match.fwnode == +- of_fwnode_handle(subdev->dev->of_node)) ++ if (fmd->sensor[i].asd == asd) + si = &fmd->sensor[i]; + + if (si == NULL) +@@ -1474,7 +1472,7 @@ static int fimc_md_probe(struct platform_device *pdev) + pinctrl = devm_pinctrl_get(dev); + if (IS_ERR(pinctrl)) { + ret = PTR_ERR(pinctrl); +- if (ret != EPROBE_DEFER) ++ if (ret != -EPROBE_DEFER) + dev_err(dev, "Failed to get pinctrl: %d\n", ret); + goto err_clk; + } +@@ -1586,7 +1584,11 @@ static int __init fimc_md_init(void) + if (ret) + return ret; + +- return platform_driver_register(&fimc_md_driver); ++ ret = platform_driver_register(&fimc_md_driver); ++ if (ret) ++ fimc_unregister_driver(); ++ ++ return ret; + } + + static void __exit fimc_md_exit(void) diff --git a/drivers/media/platform/imx-jpeg/mxc-jpeg-hw.c b/drivers/media/platform/imx-jpeg/mxc-jpeg-hw.c -index 29c604b1b1790..718b7b08f93e0 100644 +index 29c604b1b1790..8936d5ce886c2 100644 --- a/drivers/media/platform/imx-jpeg/mxc-jpeg-hw.c +++ b/drivers/media/platform/imx-jpeg/mxc-jpeg-hw.c -@@ -79,6 +79,11 @@ void mxc_jpeg_enable_irq(void __iomem *reg, int slot) - writel(0xFFFFFFFF, reg + MXC_SLOT_OFFSET(slot, SLOT_IRQ_EN)); - } +@@ -76,7 +76,14 @@ void print_wrapper_info(struct device *dev, void __iomem *reg) + void mxc_jpeg_enable_irq(void __iomem *reg, int slot) + { +- writel(0xFFFFFFFF, reg + MXC_SLOT_OFFSET(slot, SLOT_IRQ_EN)); ++ writel(0xFFFFFFFF, reg + MXC_SLOT_OFFSET(slot, SLOT_STATUS)); ++ writel(0xF0C, reg + MXC_SLOT_OFFSET(slot, SLOT_IRQ_EN)); ++} ++ +void mxc_jpeg_disable_irq(void __iomem *reg, int slot) +{ + writel(0x0, reg + MXC_SLOT_OFFSET(slot, SLOT_IRQ_EN)); -+} -+ ++ writel(0xFFFFFFFF, reg + MXC_SLOT_OFFSET(slot, SLOT_STATUS)); + } + void mxc_jpeg_sw_reset(void __iomem *reg) - { - /* diff --git a/drivers/media/platform/imx-jpeg/mxc-jpeg-hw.h b/drivers/media/platform/imx-jpeg/mxc-jpeg-hw.h index ae70d3a0dc243..bf4e1973a0661 100644 --- a/drivers/media/platform/imx-jpeg/mxc-jpeg-hw.h @@ -166881,6 +199986,27 @@ index 8594d275b41d1..02cb8005504a2 100644 return 0; } +diff --git a/drivers/media/platform/qcom/camss/camss-video.c b/drivers/media/platform/qcom/camss/camss-video.c +index f282275af626f..5173b79995ee7 100644 +--- a/drivers/media/platform/qcom/camss/camss-video.c ++++ b/drivers/media/platform/qcom/camss/camss-video.c +@@ -493,7 +493,7 @@ static int video_start_streaming(struct vb2_queue *q, unsigned int count) + + ret = media_pipeline_start(&vdev->entity, &video->pipe); + if (ret < 0) +- return ret; ++ goto flush_buffers; + + ret = video_check_format(video); + if (ret < 0) +@@ -522,6 +522,7 @@ static int video_start_streaming(struct vb2_queue *q, unsigned int count) + error: + media_pipeline_stop(&vdev->entity); + ++flush_buffers: + video->ops->flush_buffers(video, VB2_BUF_STATE_QUEUED); + + return ret; diff --git a/drivers/media/platform/qcom/venus/core.c b/drivers/media/platform/qcom/venus/core.c index 91b15842c5558..1f0181b6353c9 100644 --- a/drivers/media/platform/qcom/venus/core.c @@ -166948,7 +200074,7 @@ index 60f4b8e4b8d02..1bf5db7673ebf 100644 /* FOLLOWING PROPERTIES ARE NOT IMPLEMENTED IN CORE YET */ case HFI_PROPERTY_CONFIG_BUFFER_REQUIREMENTS: diff --git a/drivers/media/platform/qcom/venus/pm_helpers.c b/drivers/media/platform/qcom/venus/pm_helpers.c -index 3e2345eb47f7c..03fc82cb3fead 100644 +index 3e2345eb47f7c..055513a7301f1 100644 --- a/drivers/media/platform/qcom/venus/pm_helpers.c +++ b/drivers/media/platform/qcom/venus/pm_helpers.c @@ -163,14 +163,12 @@ static u32 load_per_type(struct venus_core *core, u32 session_type) @@ -167026,7 +200152,15 @@ index 3e2345eb47f7c..03fc82cb3fead 100644 else continue; -@@ -876,7 +875,7 @@ static int vcodec_domains_get(struct venus_core *core) +@@ -870,13 +869,13 @@ static int vcodec_domains_get(struct venus_core *core) + for (i = 0; i < res->vcodec_pmdomains_num; i++) { + pd = dev_pm_domain_attach_by_name(dev, + res->vcodec_pmdomains[i]); +- if (IS_ERR(pd)) +- return PTR_ERR(pd); ++ if (IS_ERR_OR_NULL(pd)) ++ return PTR_ERR(pd) ? : -ENODATA; + core->pmdomains[i] = pd; } skip_pmdomains: @@ -167322,6 +200456,26 @@ index 6759091b15e09..e3246344fb724 100644 rel_vdev: video_device_release(vfd); unreg_v4l2_dev: +diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c +index 41988eb0ec0a5..0f980f68058c0 100644 +--- a/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c ++++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c +@@ -1270,11 +1270,12 @@ static int rkisp1_capture_link_validate(struct media_link *link) + struct rkisp1_capture *cap = video_get_drvdata(vdev); + const struct rkisp1_capture_fmt_cfg *fmt = + rkisp1_find_fmt_cfg(cap, cap->pix.fmt.pixelformat); +- struct v4l2_subdev_format sd_fmt; ++ struct v4l2_subdev_format sd_fmt = { ++ .which = V4L2_SUBDEV_FORMAT_ACTIVE, ++ .pad = link->source->index, ++ }; + int ret; + +- sd_fmt.which = V4L2_SUBDEV_FORMAT_ACTIVE; +- sd_fmt.pad = link->source->index; + ret = v4l2_subdev_call(sd, pad, get_fmt, NULL, &sd_fmt); + if (ret) + return ret; diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c index 7474150b94ed3..560f928c37520 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c @@ -167335,8 +200489,89 @@ index 7474150b94ed3..560f928c37520 100644 debugfs_create_ulong("data_loss", 0444, debug->debugfs_dir, &debug->data_loss); debugfs_create_ulong("outform_size_err", 0444, debug->debugfs_dir, +diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c +index 8fa5b0abf1f9c..e0e7d0b4ea047 100644 +--- a/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c ++++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c +@@ -275,7 +275,7 @@ static void rkisp1_lsc_config(struct rkisp1_params *params, + RKISP1_CIF_ISP_LSC_XSIZE_01 + i * 4); + + /* program x grad tables */ +- data = RKISP1_CIF_ISP_LSC_SECT_SIZE(arg->x_grad_tbl[i * 2], ++ data = RKISP1_CIF_ISP_LSC_SECT_GRAD(arg->x_grad_tbl[i * 2], + arg->x_grad_tbl[i * 2 + 1]); + rkisp1_write(params->rkisp1, data, + RKISP1_CIF_ISP_LSC_XGRAD_01 + i * 4); +@@ -287,7 +287,7 @@ static void rkisp1_lsc_config(struct rkisp1_params *params, + RKISP1_CIF_ISP_LSC_YSIZE_01 + i * 4); + + /* program y grad tables */ +- data = RKISP1_CIF_ISP_LSC_SECT_SIZE(arg->y_grad_tbl[i * 2], ++ data = RKISP1_CIF_ISP_LSC_SECT_GRAD(arg->y_grad_tbl[i * 2], + arg->y_grad_tbl[i * 2 + 1]); + rkisp1_write(params->rkisp1, data, + RKISP1_CIF_ISP_LSC_YGRAD_01 + i * 4); +@@ -751,7 +751,7 @@ static void rkisp1_ie_enable(struct rkisp1_params *params, bool en) + } + } + +-static void rkisp1_csm_config(struct rkisp1_params *params, bool full_range) ++static void rkisp1_csm_config(struct rkisp1_params *params) + { + static const u16 full_range_coeff[] = { + 0x0026, 0x004b, 0x000f, +@@ -765,7 +765,7 @@ static void rkisp1_csm_config(struct rkisp1_params *params, bool full_range) + }; + unsigned int i; + +- if (full_range) { ++ if (params->quantization == V4L2_QUANTIZATION_FULL_RANGE) { + for (i = 0; i < ARRAY_SIZE(full_range_coeff); i++) + rkisp1_write(params->rkisp1, full_range_coeff[i], + RKISP1_CIF_ISP_CC_COEFF_0 + i * 4); +@@ -1235,11 +1235,7 @@ static void rkisp1_params_config_parameter(struct rkisp1_params *params) + rkisp1_param_set_bits(params, RKISP1_CIF_ISP_HIST_PROP, + rkisp1_hst_params_default_config.mode); + +- /* set the range */ +- if (params->quantization == V4L2_QUANTIZATION_FULL_RANGE) +- rkisp1_csm_config(params, true); +- else +- rkisp1_csm_config(params, false); ++ rkisp1_csm_config(params); + + spin_lock_irq(¶ms->config_lock); + +diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-regs.h b/drivers/media/platform/rockchip/rkisp1/rkisp1-regs.h +index fa33080f51db5..f584ccfe0286f 100644 +--- a/drivers/media/platform/rockchip/rkisp1/rkisp1-regs.h ++++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-regs.h +@@ -480,7 +480,7 @@ + (((v0) & 0xFFF) | (((v1) & 0xFFF) << 12)) + #define RKISP1_CIF_ISP_LSC_SECT_SIZE(v0, v1) \ + (((v0) & 0xFFF) | (((v1) & 0xFFF) << 16)) +-#define RKISP1_CIF_ISP_LSC_GRAD_SIZE(v0, v1) \ ++#define RKISP1_CIF_ISP_LSC_SECT_GRAD(v0, v1) \ + (((v0) & 0xFFF) | (((v1) & 0xFFF) << 16)) + + /* LSC: ISP_LSC_TABLE_SEL */ +diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-resizer.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-resizer.c +index 2070f4b067059..a166ede409675 100644 +--- a/drivers/media/platform/rockchip/rkisp1/rkisp1-resizer.c ++++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-resizer.c +@@ -510,6 +510,10 @@ static int rkisp1_rsz_init_config(struct v4l2_subdev *sd, + sink_fmt->height = RKISP1_DEFAULT_HEIGHT; + sink_fmt->field = V4L2_FIELD_NONE; + sink_fmt->code = RKISP1_DEF_FMT; ++ sink_fmt->colorspace = V4L2_COLORSPACE_SRGB; ++ sink_fmt->xfer_func = V4L2_XFER_FUNC_SRGB; ++ sink_fmt->ycbcr_enc = V4L2_YCBCR_ENC_601; ++ sink_fmt->quantization = V4L2_QUANTIZATION_LIM_RANGE; + + sink_crop = v4l2_subdev_get_try_crop(sd, sd_state, + RKISP1_RSZ_PAD_SINK); diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc.c b/drivers/media/platform/s5p-mfc/s5p_mfc.c -index eba2b9f040df0..4fc135d9f38bd 100644 +index eba2b9f040df0..4c511b026bd72 100644 --- a/drivers/media/platform/s5p-mfc/s5p_mfc.c +++ b/drivers/media/platform/s5p-mfc/s5p_mfc.c @@ -1283,11 +1283,15 @@ static int s5p_mfc_probe(struct platform_device *pdev) @@ -167373,6 +200608,161 @@ index eba2b9f040df0..4fc135d9f38bd 100644 v4l2_device_unregister(&dev->v4l2_dev); s5p_mfc_unconfigure_dma_memory(dev); +@@ -1580,8 +1583,18 @@ static struct s5p_mfc_variant mfc_drvdata_v7 = { + .port_num = MFC_NUM_PORTS_V7, + .buf_size = &buf_size_v7, + .fw_name[0] = "s5p-mfc-v7.fw", +- .clk_names = {"mfc", "sclk_mfc"}, +- .num_clocks = 2, ++ .clk_names = {"mfc"}, ++ .num_clocks = 1, ++}; ++ ++static struct s5p_mfc_variant mfc_drvdata_v7_3250 = { ++ .version = MFC_VERSION_V7, ++ .version_bit = MFC_V7_BIT, ++ .port_num = MFC_NUM_PORTS_V7, ++ .buf_size = &buf_size_v7, ++ .fw_name[0] = "s5p-mfc-v7.fw", ++ .clk_names = {"mfc", "sclk_mfc"}, ++ .num_clocks = 2, + }; + + static struct s5p_mfc_buf_size_v6 mfc_buf_size_v8 = { +@@ -1651,6 +1664,9 @@ static const struct of_device_id exynos_mfc_match[] = { + }, { + .compatible = "samsung,mfc-v7", + .data = &mfc_drvdata_v7, ++ }, { ++ .compatible = "samsung,exynos3250-mfc", ++ .data = &mfc_drvdata_v7_3250, + }, { + .compatible = "samsung,mfc-v8", + .data = &mfc_drvdata_v8, +diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_ctrl.c b/drivers/media/platform/s5p-mfc/s5p_mfc_ctrl.c +index da138c314963a..58822ec5370e2 100644 +--- a/drivers/media/platform/s5p-mfc/s5p_mfc_ctrl.c ++++ b/drivers/media/platform/s5p-mfc/s5p_mfc_ctrl.c +@@ -468,8 +468,10 @@ void s5p_mfc_close_mfc_inst(struct s5p_mfc_dev *dev, struct s5p_mfc_ctx *ctx) + s5p_mfc_hw_call(dev->mfc_ops, try_run, dev); + /* Wait until instance is returned or timeout occurred */ + if (s5p_mfc_wait_for_done_ctx(ctx, +- S5P_MFC_R2H_CMD_CLOSE_INSTANCE_RET, 0)) ++ S5P_MFC_R2H_CMD_CLOSE_INSTANCE_RET, 0)){ ++ clear_work_bit_irqsave(ctx); + mfc_err("Err returning instance\n"); ++ } + + /* Free resources */ + s5p_mfc_hw_call(dev->mfc_ops, release_codec_buffers, ctx); +diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c b/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c +index 1fad99edb0913..3da1775a65f19 100644 +--- a/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c ++++ b/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c +@@ -1218,6 +1218,7 @@ static int enc_post_frame_start(struct s5p_mfc_ctx *ctx) + unsigned long mb_y_addr, mb_c_addr; + int slice_type; + unsigned int strm_size; ++ bool src_ready; + + slice_type = s5p_mfc_hw_call(dev->mfc_ops, get_enc_slice_type, dev); + strm_size = s5p_mfc_hw_call(dev->mfc_ops, get_enc_strm_size, dev); +@@ -1257,7 +1258,8 @@ static int enc_post_frame_start(struct s5p_mfc_ctx *ctx) + } + } + } +- if ((ctx->src_queue_cnt > 0) && (ctx->state == MFCINST_RUNNING)) { ++ if (ctx->src_queue_cnt > 0 && (ctx->state == MFCINST_RUNNING || ++ ctx->state == MFCINST_FINISHING)) { + mb_entry = list_entry(ctx->src_queue.next, struct s5p_mfc_buf, + list); + if (mb_entry->flags & MFC_BUF_FLAG_USED) { +@@ -1288,7 +1290,13 @@ static int enc_post_frame_start(struct s5p_mfc_ctx *ctx) + vb2_set_plane_payload(&mb_entry->b->vb2_buf, 0, strm_size); + vb2_buffer_done(&mb_entry->b->vb2_buf, VB2_BUF_STATE_DONE); + } +- if ((ctx->src_queue_cnt == 0) || (ctx->dst_queue_cnt == 0)) ++ ++ src_ready = true; ++ if (ctx->state == MFCINST_RUNNING && ctx->src_queue_cnt == 0) ++ src_ready = false; ++ if (ctx->state == MFCINST_FINISHING && ctx->ref_queue_cnt == 0) ++ src_ready = false; ++ if (!src_ready || ctx->dst_queue_cnt == 0) + clear_work_bit(ctx); + + return 0; +diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_opr_v6.c b/drivers/media/platform/s5p-mfc/s5p_mfc_opr_v6.c +index a1453053e31ab..ef8169f6c428c 100644 +--- a/drivers/media/platform/s5p-mfc/s5p_mfc_opr_v6.c ++++ b/drivers/media/platform/s5p-mfc/s5p_mfc_opr_v6.c +@@ -1060,7 +1060,7 @@ static int s5p_mfc_set_enc_params_h264(struct s5p_mfc_ctx *ctx) + } + + /* aspect ratio VUI */ +- readl(mfc_regs->e_h264_options); ++ reg = readl(mfc_regs->e_h264_options); + reg &= ~(0x1 << 5); + reg |= ((p_h264->vui_sar & 0x1) << 5); + writel(reg, mfc_regs->e_h264_options); +@@ -1083,7 +1083,7 @@ static int s5p_mfc_set_enc_params_h264(struct s5p_mfc_ctx *ctx) + + /* intra picture period for H.264 open GOP */ + /* control */ +- readl(mfc_regs->e_h264_options); ++ reg = readl(mfc_regs->e_h264_options); + reg &= ~(0x1 << 4); + reg |= ((p_h264->open_gop & 0x1) << 4); + writel(reg, mfc_regs->e_h264_options); +@@ -1097,23 +1097,23 @@ static int s5p_mfc_set_enc_params_h264(struct s5p_mfc_ctx *ctx) + } + + /* 'WEIGHTED_BI_PREDICTION' for B is disable */ +- readl(mfc_regs->e_h264_options); ++ reg = readl(mfc_regs->e_h264_options); + reg &= ~(0x3 << 9); + writel(reg, mfc_regs->e_h264_options); + + /* 'CONSTRAINED_INTRA_PRED_ENABLE' is disable */ +- readl(mfc_regs->e_h264_options); ++ reg = readl(mfc_regs->e_h264_options); + reg &= ~(0x1 << 14); + writel(reg, mfc_regs->e_h264_options); + + /* ASO */ +- readl(mfc_regs->e_h264_options); ++ reg = readl(mfc_regs->e_h264_options); + reg &= ~(0x1 << 6); + reg |= ((p_h264->aso & 0x1) << 6); + writel(reg, mfc_regs->e_h264_options); + + /* hier qp enable */ +- readl(mfc_regs->e_h264_options); ++ reg = readl(mfc_regs->e_h264_options); + reg &= ~(0x1 << 8); + reg |= ((p_h264->open_gop & 0x1) << 8); + writel(reg, mfc_regs->e_h264_options); +@@ -1134,7 +1134,7 @@ static int s5p_mfc_set_enc_params_h264(struct s5p_mfc_ctx *ctx) + writel(reg, mfc_regs->e_h264_num_t_layer); + + /* frame packing SEI generation */ +- readl(mfc_regs->e_h264_options); ++ reg = readl(mfc_regs->e_h264_options); + reg &= ~(0x1 << 25); + reg |= ((p_h264->sei_frame_packing & 0x1) << 25); + writel(reg, mfc_regs->e_h264_options); +diff --git a/drivers/media/platform/sti/c8sectpfe/c8sectpfe-core.c b/drivers/media/platform/sti/c8sectpfe/c8sectpfe-core.c +index 338b205ae3a79..88d0188397e7b 100644 +--- a/drivers/media/platform/sti/c8sectpfe/c8sectpfe-core.c ++++ b/drivers/media/platform/sti/c8sectpfe/c8sectpfe-core.c +@@ -940,6 +940,7 @@ static int configure_channels(struct c8sectpfei *fei) + if (ret) { + dev_err(fei->dev, + "configure_memdma_and_inputblock failed\n"); ++ of_node_put(child); + goto err_unmap; + } + index++; diff --git a/drivers/media/platform/sti/delta/delta-v4l2.c b/drivers/media/platform/sti/delta/delta-v4l2.c index c887a31ebb540..420ad4d8df5d5 100644 --- a/drivers/media/platform/sti/delta/delta-v4l2.c @@ -167588,7 +200978,7 @@ index f491420d7b538..76d39e2e87706 100644 err_initial: return retval; diff --git a/drivers/media/radio/si470x/radio-si470x-usb.c b/drivers/media/radio/si470x/radio-si470x-usb.c -index fedff68d8c496..3f8634a465730 100644 +index fedff68d8c496..1365ae732b799 100644 --- a/drivers/media/radio/si470x/radio-si470x-usb.c +++ b/drivers/media/radio/si470x/radio-si470x-usb.c @@ -16,7 +16,7 @@ @@ -167600,6 +200990,18 @@ index fedff68d8c496..3f8634a465730 100644 #define DRIVER_DESC "USB radio driver for Si470x FM Radio Receivers" #define DRIVER_VERSION "1.0.10" +@@ -733,8 +733,10 @@ static int si470x_usb_driver_probe(struct usb_interface *intf, + + /* start radio */ + retval = si470x_start_usb(radio); +- if (retval < 0) ++ if (retval < 0 && !radio->int_in_running) + goto err_buf; ++ else if (retval < 0) /* in case of radio->int_in_running == 1 */ ++ goto err_all; + + /* set initial frequency */ + si470x_set_freq(radio, 87.5 * FREQ_MUL); /* available in all regions */ diff --git a/drivers/media/rc/gpio-ir-tx.c b/drivers/media/rc/gpio-ir-tx.c index c6cd2e6d8e654..a50701cfbbd7b 100644 --- a/drivers/media/rc/gpio-ir-tx.c @@ -167675,7 +201077,7 @@ index effaa5751d6c9..3e9988ee785f0 100644 do { rawir.duration = ir->buf_in[i] * 85; diff --git a/drivers/media/rc/imon.c b/drivers/media/rc/imon.c -index 2ca4e86c7b9f1..97355e3ebdfd4 100644 +index 2ca4e86c7b9f1..72e4bb0fb71ec 100644 --- a/drivers/media/rc/imon.c +++ b/drivers/media/rc/imon.c @@ -153,6 +153,24 @@ struct imon_context { @@ -167819,7 +201221,24 @@ index 2ca4e86c7b9f1..97355e3ebdfd4 100644 return retval; } -@@ -934,15 +948,12 @@ static ssize_t vfd_write(struct file *file, const char __user *buf, +@@ -632,15 +646,14 @@ static int send_packet(struct imon_context *ictx) + pr_err_ratelimited("error submitting urb(%d)\n", retval); + } else { + /* Wait for transmission to complete (or abort) */ +- mutex_unlock(&ictx->lock); + retval = wait_for_completion_interruptible( + &ictx->tx.finished); + if (retval) { + usb_kill_urb(ictx->tx_urb); + pr_err_ratelimited("task interrupted\n"); + } +- mutex_lock(&ictx->lock); + ++ ictx->tx.busy = false; + retval = ictx->tx.status; + if (retval) + pr_err_ratelimited("packet tx failed (%d)\n", retval); +@@ -934,17 +947,15 @@ static ssize_t vfd_write(struct file *file, const char __user *buf, int offset; int seq; int retval = 0; @@ -167835,8 +201254,12 @@ index 2ca4e86c7b9f1..97355e3ebdfd4 100644 return -ENODEV; - } - mutex_lock(&ictx->lock); +- mutex_lock(&ictx->lock); ++ if (mutex_lock_interruptible(&ictx->lock)) ++ return -ERESTARTSYS; + if (!ictx->dev_present_intf0) { + pr_err_ratelimited("no iMON device present\n"); @@ -1018,13 +1029,10 @@ static ssize_t lcd_write(struct file *file, const char __user *buf, size_t n_bytes, loff_t *pos) { @@ -168176,10 +201599,45 @@ index b91a1e845b972..506f52c1af101 100644 fail_regulator: kfree(dvb_spi); diff --git a/drivers/media/test-drivers/vidtv/vidtv_bridge.c b/drivers/media/test-drivers/vidtv/vidtv_bridge.c -index 75617709c8ce2..82620613d56b8 100644 +index 75617709c8ce2..dff7265a42ca2 100644 --- a/drivers/media/test-drivers/vidtv/vidtv_bridge.c +++ b/drivers/media/test-drivers/vidtv/vidtv_bridge.c -@@ -564,6 +564,10 @@ static int vidtv_bridge_remove(struct platform_device *pdev) +@@ -459,26 +459,20 @@ fail_dmx_conn: + for (j = j - 1; j >= 0; --j) + dvb->demux.dmx.remove_frontend(&dvb->demux.dmx, + &dvb->dmx_fe[j]); +-fail_dmx_dev: + dvb_dmxdev_release(&dvb->dmx_dev); +-fail_dmx: ++fail_dmx_dev: + dvb_dmx_release(&dvb->demux); ++fail_dmx: ++fail_demod_probe: ++ for (i = i - 1; i >= 0; --i) { ++ dvb_unregister_frontend(dvb->fe[i]); + fail_fe: +- for (j = i; j >= 0; --j) +- dvb_unregister_frontend(dvb->fe[j]); ++ dvb_module_release(dvb->i2c_client_tuner[i]); + fail_tuner_probe: +- for (j = i; j >= 0; --j) +- if (dvb->i2c_client_tuner[j]) +- dvb_module_release(dvb->i2c_client_tuner[j]); +- +-fail_demod_probe: +- for (j = i; j >= 0; --j) +- if (dvb->i2c_client_demod[j]) +- dvb_module_release(dvb->i2c_client_demod[j]); +- ++ dvb_module_release(dvb->i2c_client_demod[i]); ++ } + fail_adapter: + dvb_unregister_adapter(&dvb->adapter); +- + fail_i2c: + i2c_del_adapter(&dvb->i2c_adapter); + +@@ -564,6 +558,10 @@ static int vidtv_bridge_remove(struct platform_device *pdev) static void vidtv_bridge_dev_release(struct device *dev) { @@ -168232,6 +201690,166 @@ index d79b65854627c..4676083cee3b8 100644 } void vidtv_s302m_encoder_destroy(struct vidtv_encoder *e) +diff --git a/drivers/media/test-drivers/vimc/vimc-core.c b/drivers/media/test-drivers/vimc/vimc-core.c +index 4b0ae6f51d765..857529ce3638a 100644 +--- a/drivers/media/test-drivers/vimc/vimc-core.c ++++ b/drivers/media/test-drivers/vimc/vimc-core.c +@@ -357,7 +357,7 @@ static int __init vimc_init(void) + if (ret) { + dev_err(&vimc_pdev.dev, + "platform driver registration failed (err=%d)\n", ret); +- platform_driver_unregister(&vimc_pdrv); ++ platform_device_unregister(&vimc_pdev); + return ret; + } + +diff --git a/drivers/media/test-drivers/vivid/vivid-core.c b/drivers/media/test-drivers/vivid/vivid-core.c +index d2bd2653cf54d..065bdc33f0491 100644 +--- a/drivers/media/test-drivers/vivid/vivid-core.c ++++ b/drivers/media/test-drivers/vivid/vivid-core.c +@@ -330,6 +330,28 @@ static int vidioc_g_fbuf(struct file *file, void *fh, struct v4l2_framebuffer *a + return vivid_vid_out_g_fbuf(file, fh, a); + } + ++/* ++ * Only support the framebuffer of one of the vivid instances. ++ * Anything else is rejected. ++ */ ++bool vivid_validate_fb(const struct v4l2_framebuffer *a) ++{ ++ struct vivid_dev *dev; ++ int i; ++ ++ for (i = 0; i < n_devs; i++) { ++ dev = vivid_devs[i]; ++ if (!dev || !dev->video_pbase) ++ continue; ++ if ((unsigned long)a->base == dev->video_pbase && ++ a->fmt.width <= dev->display_width && ++ a->fmt.height <= dev->display_height && ++ a->fmt.bytesperline <= dev->display_byte_stride) ++ return true; ++ } ++ return false; ++} ++ + static int vidioc_s_fbuf(struct file *file, void *fh, const struct v4l2_framebuffer *a) + { + struct video_device *vdev = video_devdata(file); +@@ -910,8 +932,12 @@ static int vivid_detect_feature_set(struct vivid_dev *dev, int inst, + + /* how many inputs do we have and of what type? */ + dev->num_inputs = num_inputs[inst]; +- if (dev->num_inputs < 1) +- dev->num_inputs = 1; ++ if (node_type & 0x20007) { ++ if (dev->num_inputs < 1) ++ dev->num_inputs = 1; ++ } else { ++ dev->num_inputs = 0; ++ } + if (dev->num_inputs >= MAX_INPUTS) + dev->num_inputs = MAX_INPUTS; + for (i = 0; i < dev->num_inputs; i++) { +@@ -928,8 +954,12 @@ static int vivid_detect_feature_set(struct vivid_dev *dev, int inst, + + /* how many outputs do we have and of what type? */ + dev->num_outputs = num_outputs[inst]; +- if (dev->num_outputs < 1) +- dev->num_outputs = 1; ++ if (node_type & 0x40300) { ++ if (dev->num_outputs < 1) ++ dev->num_outputs = 1; ++ } else { ++ dev->num_outputs = 0; ++ } + if (dev->num_outputs >= MAX_OUTPUTS) + dev->num_outputs = MAX_OUTPUTS; + for (i = 0; i < dev->num_outputs; i++) { +diff --git a/drivers/media/test-drivers/vivid/vivid-core.h b/drivers/media/test-drivers/vivid/vivid-core.h +index 1e3c4f5a9413f..7ceaf9bac2f05 100644 +--- a/drivers/media/test-drivers/vivid/vivid-core.h ++++ b/drivers/media/test-drivers/vivid/vivid-core.h +@@ -610,4 +610,6 @@ static inline bool vivid_is_hdmi_out(const struct vivid_dev *dev) + return dev->output_type[dev->output] == HDMI; + } + ++bool vivid_validate_fb(const struct v4l2_framebuffer *a); ++ + #endif +diff --git a/drivers/media/test-drivers/vivid/vivid-vid-cap.c b/drivers/media/test-drivers/vivid/vivid-vid-cap.c +index b9caa4b26209e..331a3f4286d2e 100644 +--- a/drivers/media/test-drivers/vivid/vivid-vid-cap.c ++++ b/drivers/media/test-drivers/vivid/vivid-vid-cap.c +@@ -452,6 +452,12 @@ void vivid_update_format_cap(struct vivid_dev *dev, bool keep_controls) + tpg_reset_source(&dev->tpg, dev->src_rect.width, dev->src_rect.height, dev->field_cap); + dev->crop_cap = dev->src_rect; + dev->crop_bounds_cap = dev->src_rect; ++ if (dev->bitmap_cap && ++ (dev->compose_cap.width != dev->crop_cap.width || ++ dev->compose_cap.height != dev->crop_cap.height)) { ++ vfree(dev->bitmap_cap); ++ dev->bitmap_cap = NULL; ++ } + dev->compose_cap = dev->crop_cap; + if (V4L2_FIELD_HAS_T_OR_B(dev->field_cap)) + dev->compose_cap.height /= 2; +@@ -909,6 +915,8 @@ int vivid_vid_cap_s_selection(struct file *file, void *fh, struct v4l2_selection + struct vivid_dev *dev = video_drvdata(file); + struct v4l2_rect *crop = &dev->crop_cap; + struct v4l2_rect *compose = &dev->compose_cap; ++ unsigned orig_compose_w = compose->width; ++ unsigned orig_compose_h = compose->height; + unsigned factor = V4L2_FIELD_HAS_T_OR_B(dev->field_cap) ? 2 : 1; + int ret; + +@@ -953,6 +961,7 @@ int vivid_vid_cap_s_selection(struct file *file, void *fh, struct v4l2_selection + if (dev->has_compose_cap) { + v4l2_rect_set_min_size(compose, &min_rect); + v4l2_rect_set_max_size(compose, &max_rect); ++ v4l2_rect_map_inside(compose, &fmt); + } + dev->fmt_cap_rect = fmt; + tpg_s_buf_height(&dev->tpg, fmt.height); +@@ -1025,17 +1034,17 @@ int vivid_vid_cap_s_selection(struct file *file, void *fh, struct v4l2_selection + s->r.height /= factor; + } + v4l2_rect_map_inside(&s->r, &dev->fmt_cap_rect); +- if (dev->bitmap_cap && (compose->width != s->r.width || +- compose->height != s->r.height)) { +- vfree(dev->bitmap_cap); +- dev->bitmap_cap = NULL; +- } + *compose = s->r; + break; + default: + return -EINVAL; + } + ++ if (dev->bitmap_cap && (compose->width != orig_compose_w || ++ compose->height != orig_compose_h)) { ++ vfree(dev->bitmap_cap); ++ dev->bitmap_cap = NULL; ++ } + tpg_s_crop_compose(&dev->tpg, crop, compose); + return 0; + } +@@ -1272,7 +1281,14 @@ int vivid_vid_cap_s_fbuf(struct file *file, void *fh, + return -EINVAL; + if (a->fmt.bytesperline < (a->fmt.width * fmt->bit_depth[0]) / 8) + return -EINVAL; +- if (a->fmt.height * a->fmt.bytesperline < a->fmt.sizeimage) ++ if (a->fmt.bytesperline > a->fmt.sizeimage / a->fmt.height) ++ return -EINVAL; ++ ++ /* ++ * Only support the framebuffer of one of the vivid instances. ++ * Anything else is rejected. ++ */ ++ if (!vivid_validate_fb(a)) + return -EINVAL; + + dev->fb_vbase_cap = phys_to_virt((unsigned long)a->base); diff --git a/drivers/media/tuners/msi001.c b/drivers/media/tuners/msi001.c index 78e6fd600d8ef..44247049a3190 100644 --- a/drivers/media/tuners/msi001.c @@ -168442,7 +202060,7 @@ index 7865fa0a82957..cd5861a30b6f8 100644 .frontend_attach = mxl111sf_frontend_attach_mercury_mh, .tuner_attach = mxl111sf_attach_tuner, diff --git a/drivers/media/usb/dvb-usb/az6027.c b/drivers/media/usb/dvb-usb/az6027.c -index 1c39b61cde29b..86788771175b7 100644 +index 1c39b61cde29b..32b4ee65c2802 100644 --- a/drivers/media/usb/dvb-usb/az6027.c +++ b/drivers/media/usb/dvb-usb/az6027.c @@ -391,6 +391,7 @@ static struct rc_map_table rc_map_az6027_table[] = { @@ -168453,6 +202071,17 @@ index 1c39b61cde29b..86788771175b7 100644 return 0; } +@@ -974,6 +975,10 @@ static int az6027_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[], int n + if (msg[i].addr == 0x99) { + req = 0xBE; + index = 0; ++ if (msg[i].len < 1) { ++ i = -EOPNOTSUPP; ++ break; ++ } + value = msg[i].buf[0] & 0x00ff; + length = 1; + az6027_usb_out_op(d, req, value, index, data, length); diff --git a/drivers/media/usb/dvb-usb/dib0700_core.c b/drivers/media/usb/dvb-usb/dib0700_core.c index 70219b3e85666..7ea8f68b0f458 100644 --- a/drivers/media/usb/dvb-usb/dib0700_core.c @@ -168479,6 +202108,28 @@ index 02b51d1a1b67c..aff60c10cb0b2 100644 if (!buf) return -ENOMEM; +diff --git a/drivers/media/usb/dvb-usb/dvb-usb-init.c b/drivers/media/usb/dvb-usb/dvb-usb-init.c +index 61439c8f33cab..58eea8ab54779 100644 +--- a/drivers/media/usb/dvb-usb/dvb-usb-init.c ++++ b/drivers/media/usb/dvb-usb/dvb-usb-init.c +@@ -81,7 +81,7 @@ static int dvb_usb_adapter_init(struct dvb_usb_device *d, short *adapter_nrs) + + ret = dvb_usb_adapter_stream_init(adap); + if (ret) +- return ret; ++ goto stream_init_err; + + ret = dvb_usb_adapter_dvb_init(adap, adapter_nrs); + if (ret) +@@ -114,6 +114,8 @@ frontend_init_err: + dvb_usb_adapter_dvb_exit(adap); + dvb_init_err: + dvb_usb_adapter_stream_exit(adap); ++stream_init_err: ++ kfree(adap->priv); + return ret; + } + diff --git a/drivers/media/usb/dvb-usb/dw2102.c b/drivers/media/usb/dvb-usb/dw2102.c index f0e686b05dc63..ca75ebdc10b37 100644 --- a/drivers/media/usb/dvb-usb/dw2102.c @@ -169656,7 +203307,7 @@ index 47aff3b197426..94037af1af2dc 100644 case VIDIOC_G_FMT32: case VIDIOC_S_FMT32: diff --git a/drivers/media/v4l2-core/v4l2-ctrls-core.c b/drivers/media/v4l2-core/v4l2-ctrls-core.c -index c4b5082849b66..45a76f40deeb3 100644 +index c4b5082849b66..3798a57bbbd43 100644 --- a/drivers/media/v4l2-core/v4l2-ctrls-core.c +++ b/drivers/media/v4l2-core/v4l2-ctrls-core.c @@ -113,6 +113,7 @@ static void std_init_compound(const struct v4l2_ctrl *ctrl, u32 idx, @@ -169683,6 +203334,55 @@ index c4b5082849b66..45a76f40deeb3 100644 } } +@@ -1446,7 +1456,7 @@ struct v4l2_ctrl *v4l2_ctrl_new_std_menu(struct v4l2_ctrl_handler *hdl, + else if (type == V4L2_CTRL_TYPE_INTEGER_MENU) + qmenu_int = v4l2_ctrl_get_int_menu(id, &qmenu_int_len); + +- if ((!qmenu && !qmenu_int) || (qmenu_int && max > qmenu_int_len)) { ++ if ((!qmenu && !qmenu_int) || (qmenu_int && max >= qmenu_int_len)) { + handler_set_err(hdl, -EINVAL); + return NULL; + } +diff --git a/drivers/media/v4l2-core/v4l2-dv-timings.c b/drivers/media/v4l2-core/v4l2-dv-timings.c +index af48705c704f8..942d0005c55e8 100644 +--- a/drivers/media/v4l2-core/v4l2-dv-timings.c ++++ b/drivers/media/v4l2-core/v4l2-dv-timings.c +@@ -145,6 +145,8 @@ bool v4l2_valid_dv_timings(const struct v4l2_dv_timings *t, + const struct v4l2_bt_timings *bt = &t->bt; + const struct v4l2_bt_timings_cap *cap = &dvcap->bt; + u32 caps = cap->capabilities; ++ const u32 max_vert = 10240; ++ u32 max_hor = 3 * bt->width; + + if (t->type != V4L2_DV_BT_656_1120) + return false; +@@ -161,6 +163,26 @@ bool v4l2_valid_dv_timings(const struct v4l2_dv_timings *t, + (bt->interlaced && !(caps & V4L2_DV_BT_CAP_INTERLACED)) || + (!bt->interlaced && !(caps & V4L2_DV_BT_CAP_PROGRESSIVE))) + return false; ++ ++ /* sanity checks for the blanking timings */ ++ if (!bt->interlaced && ++ (bt->il_vbackporch || bt->il_vsync || bt->il_vfrontporch)) ++ return false; ++ /* ++ * Some video receivers cannot properly separate the frontporch, ++ * backporch and sync values, and instead they only have the total ++ * blanking. That can be assigned to any of these three fields. ++ * So just check that none of these are way out of range. ++ */ ++ if (bt->hfrontporch > max_hor || ++ bt->hsync > max_hor || bt->hbackporch > max_hor) ++ return false; ++ if (bt->vfrontporch > max_vert || ++ bt->vsync > max_vert || bt->vbackporch > max_vert) ++ return false; ++ if (bt->interlaced && (bt->il_vfrontporch > max_vert || ++ bt->il_vsync > max_vert || bt->il_vbackporch > max_vert)) ++ return false; + return fnc == NULL || fnc(t, fnc_handle); + } + EXPORT_SYMBOL_GPL(v4l2_valid_dv_timings); diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index 05d5db3d85e58..7c596a85f34f5 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c @@ -169983,6 +203683,63 @@ index e7f4bf5bc8dd7..8aeed39c415f2 100644 return EPOLLERR; spin_lock_irqsave(&src_q->done_lock, flags); +diff --git a/drivers/media/v4l2-core/videobuf-dma-contig.c b/drivers/media/v4l2-core/videobuf-dma-contig.c +index 52312ce2ba056..f2c4393595574 100644 +--- a/drivers/media/v4l2-core/videobuf-dma-contig.c ++++ b/drivers/media/v4l2-core/videobuf-dma-contig.c +@@ -36,12 +36,11 @@ struct videobuf_dma_contig_memory { + + static int __videobuf_dc_alloc(struct device *dev, + struct videobuf_dma_contig_memory *mem, +- unsigned long size, gfp_t flags) ++ unsigned long size) + { + mem->size = size; +- mem->vaddr = dma_alloc_coherent(dev, mem->size, +- &mem->dma_handle, flags); +- ++ mem->vaddr = dma_alloc_coherent(dev, mem->size, &mem->dma_handle, ++ GFP_KERNEL); + if (!mem->vaddr) { + dev_err(dev, "memory alloc size %ld failed\n", mem->size); + return -ENOMEM; +@@ -258,8 +257,7 @@ static int __videobuf_iolock(struct videobuf_queue *q, + return videobuf_dma_contig_user_get(mem, vb); + + /* allocate memory for the read() method */ +- if (__videobuf_dc_alloc(q->dev, mem, PAGE_ALIGN(vb->size), +- GFP_KERNEL)) ++ if (__videobuf_dc_alloc(q->dev, mem, PAGE_ALIGN(vb->size))) + return -ENOMEM; + break; + case V4L2_MEMORY_OVERLAY: +@@ -295,22 +293,18 @@ static int __videobuf_mmap_mapper(struct videobuf_queue *q, + BUG_ON(!mem); + MAGIC_CHECK(mem->magic, MAGIC_DC_MEM); + +- if (__videobuf_dc_alloc(q->dev, mem, PAGE_ALIGN(buf->bsize), +- GFP_KERNEL | __GFP_COMP)) ++ if (__videobuf_dc_alloc(q->dev, mem, PAGE_ALIGN(buf->bsize))) + goto error; + +- /* Try to remap memory */ +- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); +- + /* the "vm_pgoff" is just used in v4l2 to find the + * corresponding buffer data structure which is allocated + * earlier and it does not mean the offset from the physical + * buffer start address as usual. So set it to 0 to pass +- * the sanity check in vm_iomap_memory(). ++ * the sanity check in dma_mmap_coherent(). + */ + vma->vm_pgoff = 0; +- +- retval = vm_iomap_memory(vma, mem->dma_handle, mem->size); ++ retval = dma_mmap_coherent(q->dev, vma, mem->vaddr, mem->dma_handle, ++ mem->size); + if (retval) { + dev_err(q->dev, "mmap: remap failed with error %d. ", + retval); diff --git a/drivers/memory/atmel-ebi.c b/drivers/memory/atmel-ebi.c index c267283b01fda..e749dcb3ddea9 100644 --- a/drivers/memory/atmel-ebi.c @@ -170032,6 +203789,32 @@ index c267283b01fda..e749dcb3ddea9 100644 } static __maybe_unused int atmel_ebi_resume(struct device *dev) +diff --git a/drivers/memory/atmel-sdramc.c b/drivers/memory/atmel-sdramc.c +index 9c49d00c2a966..ea6e9e1eaf046 100644 +--- a/drivers/memory/atmel-sdramc.c ++++ b/drivers/memory/atmel-sdramc.c +@@ -47,19 +47,17 @@ static int atmel_ramc_probe(struct platform_device *pdev) + caps = of_device_get_match_data(&pdev->dev); + + if (caps->has_ddrck) { +- clk = devm_clk_get(&pdev->dev, "ddrck"); ++ clk = devm_clk_get_enabled(&pdev->dev, "ddrck"); + if (IS_ERR(clk)) + return PTR_ERR(clk); +- clk_prepare_enable(clk); + } + + if (caps->has_mpddr_clk) { +- clk = devm_clk_get(&pdev->dev, "mpddr"); ++ clk = devm_clk_get_enabled(&pdev->dev, "mpddr"); + if (IS_ERR(clk)) { + pr_err("AT91 RAMC: couldn't get mpddr clock\n"); + return PTR_ERR(clk); + } +- clk_prepare_enable(clk); + } + + return 0; diff --git a/drivers/memory/emif.c b/drivers/memory/emif.c index 762d0c0f0716f..ecc78d6f89ed2 100644 --- a/drivers/memory/emif.c @@ -170109,6 +203892,22 @@ index d062c2f8250f4..75a8c38df9394 100644 irq_dispose_mapping(fsl_ifc_ctrl_dev->irq); err: iounmap(fsl_ifc_ctrl_dev->gregs); +diff --git a/drivers/memory/mvebu-devbus.c b/drivers/memory/mvebu-devbus.c +index 8450638e86700..efc6c08db2b70 100644 +--- a/drivers/memory/mvebu-devbus.c ++++ b/drivers/memory/mvebu-devbus.c +@@ -280,10 +280,9 @@ static int mvebu_devbus_probe(struct platform_device *pdev) + if (IS_ERR(devbus->base)) + return PTR_ERR(devbus->base); + +- clk = devm_clk_get(&pdev->dev, NULL); ++ clk = devm_clk_get_enabled(&pdev->dev, NULL); + if (IS_ERR(clk)) + return PTR_ERR(clk); +- clk_prepare_enable(clk); + + /* + * Obtain clock period in picoseconds, diff --git a/drivers/memory/of_memory.c b/drivers/memory/of_memory.c index d9f5437d3bce0..1791614f324b7 100644 --- a/drivers/memory/of_memory.c @@ -170559,6 +204358,74 @@ index 9c8318923ed0b..c491cd549644f 100644 if (!dmc->counter) return -ENOMEM; +diff --git a/drivers/memory/tegra/tegra186.c b/drivers/memory/tegra/tegra186.c +index 3d153881abc16..4bed0e54fd456 100644 +--- a/drivers/memory/tegra/tegra186.c ++++ b/drivers/memory/tegra/tegra186.c +@@ -20,32 +20,6 @@ + #define MC_SID_STREAMID_SECURITY_WRITE_ACCESS_DISABLED BIT(16) + #define MC_SID_STREAMID_SECURITY_OVERRIDE BIT(8) + +-static void tegra186_mc_program_sid(struct tegra_mc *mc) +-{ +- unsigned int i; +- +- for (i = 0; i < mc->soc->num_clients; i++) { +- const struct tegra_mc_client *client = &mc->soc->clients[i]; +- u32 override, security; +- +- override = readl(mc->regs + client->regs.sid.override); +- security = readl(mc->regs + client->regs.sid.security); +- +- dev_dbg(mc->dev, "client %s: override: %x security: %x\n", +- client->name, override, security); +- +- dev_dbg(mc->dev, "setting SID %u for %s\n", client->sid, +- client->name); +- writel(client->sid, mc->regs + client->regs.sid.override); +- +- override = readl(mc->regs + client->regs.sid.override); +- security = readl(mc->regs + client->regs.sid.security); +- +- dev_dbg(mc->dev, "client %s: override: %x security: %x\n", +- client->name, override, security); +- } +-} +- + static int tegra186_mc_probe(struct tegra_mc *mc) + { + int err; +@@ -54,8 +28,6 @@ static int tegra186_mc_probe(struct tegra_mc *mc) + if (err < 0) + return err; + +- tegra186_mc_program_sid(mc); +- + return 0; + } + +@@ -64,13 +36,6 @@ static void tegra186_mc_remove(struct tegra_mc *mc) + of_platform_depopulate(mc->dev); + } + +-static int tegra186_mc_resume(struct tegra_mc *mc) +-{ +- tegra186_mc_program_sid(mc); +- +- return 0; +-} +- + #if IS_ENABLED(CONFIG_IOMMU_API) + static void tegra186_mc_client_sid_override(struct tegra_mc *mc, + const struct tegra_mc_client *client, +@@ -142,7 +107,6 @@ static int tegra186_mc_probe_device(struct tegra_mc *mc, struct device *dev) + const struct tegra_mc_ops tegra186_mc_ops = { + .probe = tegra186_mc_probe, + .remove = tegra186_mc_remove, +- .resume = tegra186_mc_resume, + .probe_device = tegra186_mc_probe_device, + }; + diff --git a/drivers/memory/tegra/tegra20-emc.c b/drivers/memory/tegra/tegra20-emc.c index c3462dbc8c22b..6fc90f2160e93 100644 --- a/drivers/memory/tegra/tegra20-emc.c @@ -170570,7 +204437,7 @@ index c3462dbc8c22b..6fc90f2160e93 100644 +MODULE_SOFTDEP("pre: governor_simpleondemand"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c -index acf36676e388d..f854822f84d6c 100644 +index acf36676e388d..7619c30b4ee10 100644 --- a/drivers/memstick/core/ms_block.c +++ b/drivers/memstick/core/ms_block.c @@ -1341,17 +1341,17 @@ static int msb_ftl_initialize(struct msb_data *msb) @@ -170614,6 +204481,36 @@ index acf36676e388d..f854822f84d6c 100644 kfree(msb->lba_to_pba_table); kfree(msb->cache); msb->card = NULL; +@@ -2149,6 +2150,11 @@ static int msb_init_disk(struct memstick_dev *card) + + msb->usage_count = 1; + msb->io_queue = alloc_ordered_workqueue("ms_block", WQ_MEM_RECLAIM); ++ if (!msb->io_queue) { ++ rc = -ENOMEM; ++ goto out_cleanup_disk; ++ } ++ + INIT_WORK(&msb->io_work, msb_io_work); + sg_init_table(msb->prealloc_sg, MS_BLOCK_MAX_SEGS+1); + +@@ -2156,10 +2162,16 @@ static int msb_init_disk(struct memstick_dev *card) + set_disk_ro(msb->disk, 1); + + msb_start(card); +- device_add_disk(&card->dev, msb->disk, NULL); ++ rc = device_add_disk(&card->dev, msb->disk, NULL); ++ if (rc) ++ goto out_destroy_workqueue; + dbg("Disk added"); + return 0; + ++out_destroy_workqueue: ++ destroy_workqueue(msb->io_queue); ++out_cleanup_disk: ++ blk_cleanup_disk(msb->disk); + out_free_tag_set: + blk_mq_free_tag_set(&msb->tag_set); + out_release_id: diff --git a/drivers/memstick/host/jmb38x_ms.c b/drivers/memstick/host/jmb38x_ms.c index f9a93b0565e15..435d4c058b20e 100644 --- a/drivers/memstick/host/jmb38x_ms.c @@ -170652,7 +204549,7 @@ index e79a0218c492e..1d35d147552d4 100644 #ifdef CONFIG_PM_SLEEP diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig -index ca0edab91aeb6..d2f3452455389 100644 +index ca0edab91aeb6..5dd7ea0ebd46c 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -1194,6 +1194,7 @@ config MFD_SI476X_CORE @@ -170663,6 +204560,14 @@ index ca0edab91aeb6..d2f3452455389 100644 select REGMAP_I2C help This driver creates a single register map with the intention for it +@@ -1994,6 +1995,7 @@ config MFD_ROHM_BD957XMUF + depends on I2C=y + depends on OF + select REGMAP_I2C ++ select REGMAP_IRQ + select MFD_CORE + help + Select this option to get support for the ROHM BD9576MUF and diff --git a/drivers/mfd/altera-sysmgr.c b/drivers/mfd/altera-sysmgr.c index 20cb294c75122..5d3715a28b28e 100644 --- a/drivers/mfd/altera-sysmgr.c @@ -170975,6 +204880,109 @@ index c223d2c6a3635..998e8cc408a0e 100644 } static int lp8788_remove(struct i2c_client *cl) +diff --git a/drivers/mfd/lpc_ich.c b/drivers/mfd/lpc_ich.c +index f10e53187f67a..9ffab9aafd81b 100644 +--- a/drivers/mfd/lpc_ich.c ++++ b/drivers/mfd/lpc_ich.c +@@ -63,6 +63,8 @@ + #define SPIBASE_BYT 0x54 + #define SPIBASE_BYT_SZ 512 + #define SPIBASE_BYT_EN BIT(1) ++#define BYT_BCR 0xfc ++#define BYT_BCR_WPD BIT(0) + + #define SPIBASE_LPT 0x3800 + #define SPIBASE_LPT_SZ 512 +@@ -1084,12 +1086,57 @@ wdt_done: + return ret; + } + ++static bool lpc_ich_byt_set_writeable(void __iomem *base, void *data) ++{ ++ u32 val; ++ ++ val = readl(base + BYT_BCR); ++ if (!(val & BYT_BCR_WPD)) { ++ val |= BYT_BCR_WPD; ++ writel(val, base + BYT_BCR); ++ val = readl(base + BYT_BCR); ++ } ++ ++ return val & BYT_BCR_WPD; ++} ++ ++static bool lpc_ich_lpt_set_writeable(void __iomem *base, void *data) ++{ ++ struct pci_dev *pdev = data; ++ u32 bcr; ++ ++ pci_read_config_dword(pdev, BCR, &bcr); ++ if (!(bcr & BCR_WPD)) { ++ bcr |= BCR_WPD; ++ pci_write_config_dword(pdev, BCR, bcr); ++ pci_read_config_dword(pdev, BCR, &bcr); ++ } ++ ++ return bcr & BCR_WPD; ++} ++ ++static bool lpc_ich_bxt_set_writeable(void __iomem *base, void *data) ++{ ++ unsigned int spi = PCI_DEVFN(13, 2); ++ struct pci_bus *bus = data; ++ u32 bcr; ++ ++ pci_bus_read_config_dword(bus, spi, BCR, &bcr); ++ if (!(bcr & BCR_WPD)) { ++ bcr |= BCR_WPD; ++ pci_bus_write_config_dword(bus, spi, BCR, bcr); ++ pci_bus_read_config_dword(bus, spi, BCR, &bcr); ++ } ++ ++ return bcr & BCR_WPD; ++} ++ + static int lpc_ich_init_spi(struct pci_dev *dev) + { + struct lpc_ich_priv *priv = pci_get_drvdata(dev); + struct resource *res = &intel_spi_res[0]; + struct intel_spi_boardinfo *info; +- u32 spi_base, rcba, bcr; ++ u32 spi_base, rcba; + + info = devm_kzalloc(&dev->dev, sizeof(*info), GFP_KERNEL); + if (!info) +@@ -1103,6 +1150,8 @@ static int lpc_ich_init_spi(struct pci_dev *dev) + if (spi_base & SPIBASE_BYT_EN) { + res->start = spi_base & ~(SPIBASE_BYT_SZ - 1); + res->end = res->start + SPIBASE_BYT_SZ - 1; ++ ++ info->set_writeable = lpc_ich_byt_set_writeable; + } + break; + +@@ -1113,8 +1162,8 @@ static int lpc_ich_init_spi(struct pci_dev *dev) + res->start = spi_base + SPIBASE_LPT; + res->end = res->start + SPIBASE_LPT_SZ - 1; + +- pci_read_config_dword(dev, BCR, &bcr); +- info->writeable = !!(bcr & BCR_WPD); ++ info->set_writeable = lpc_ich_lpt_set_writeable; ++ info->data = dev; + } + break; + +@@ -1135,8 +1184,8 @@ static int lpc_ich_init_spi(struct pci_dev *dev) + res->start = spi_base & 0xfffffff0; + res->end = res->start + SPIBASE_APL_SZ - 1; + +- pci_bus_read_config_dword(bus, spi, BCR, &bcr); +- info->writeable = !!(bcr & BCR_WPD); ++ info->set_writeable = lpc_ich_bxt_set_writeable; ++ info->data = bus; + } + + pci_bus_write_config_byte(bus, p2sb, 0xe1, 0x1); diff --git a/drivers/mfd/max77620.c b/drivers/mfd/max77620.c index fec2096474ad1..a6661e07035ba 100644 --- a/drivers/mfd/max77620.c @@ -171053,6 +205061,216 @@ index 6fb206da27298..265464b5d7cc5 100644 }; module_spi_driver(cpcap_driver); +diff --git a/drivers/mfd/mt6360-core.c b/drivers/mfd/mt6360-core.c +index 6eaa6775b8885..d3b32eb798377 100644 +--- a/drivers/mfd/mt6360-core.c ++++ b/drivers/mfd/mt6360-core.c +@@ -402,7 +402,7 @@ static int mt6360_regmap_read(void *context, const void *reg, size_t reg_size, + struct mt6360_ddata *ddata = context; + u8 bank = *(u8 *)reg; + u8 reg_addr = *(u8 *)(reg + 1); +- struct i2c_client *i2c = ddata->i2c[bank]; ++ struct i2c_client *i2c; + bool crc_needed = false; + u8 *buf; + int buf_len = MT6360_ALLOC_READ_SIZE(val_size); +@@ -410,6 +410,11 @@ static int mt6360_regmap_read(void *context, const void *reg, size_t reg_size, + u8 crc; + int ret; + ++ if (bank >= MT6360_SLAVE_MAX) ++ return -EINVAL; ++ ++ i2c = ddata->i2c[bank]; ++ + if (bank == MT6360_SLAVE_PMIC || bank == MT6360_SLAVE_LDO) { + crc_needed = true; + ret = mt6360_xlate_pmicldo_addr(®_addr, val_size); +@@ -453,13 +458,18 @@ static int mt6360_regmap_write(void *context, const void *val, size_t val_size) + struct mt6360_ddata *ddata = context; + u8 bank = *(u8 *)val; + u8 reg_addr = *(u8 *)(val + 1); +- struct i2c_client *i2c = ddata->i2c[bank]; ++ struct i2c_client *i2c; + bool crc_needed = false; + u8 *buf; + int buf_len = MT6360_ALLOC_WRITE_SIZE(val_size); + int write_size = val_size - MT6360_REGMAP_REG_BYTE_SIZE; + int ret; + ++ if (bank >= MT6360_SLAVE_MAX) ++ return -EINVAL; ++ ++ i2c = ddata->i2c[bank]; ++ + if (bank == MT6360_SLAVE_PMIC || bank == MT6360_SLAVE_LDO) { + crc_needed = true; + ret = mt6360_xlate_pmicldo_addr(®_addr, val_size - MT6360_REGMAP_REG_BYTE_SIZE); +diff --git a/drivers/mfd/qcom-pm8008.c b/drivers/mfd/qcom-pm8008.c +index c472d7f8103c4..9f3c4a01b4c1c 100644 +--- a/drivers/mfd/qcom-pm8008.c ++++ b/drivers/mfd/qcom-pm8008.c +@@ -54,13 +54,6 @@ enum { + + #define PM8008_PERIPH_OFFSET(paddr) (paddr - PM8008_PERIPH_0_BASE) + +-struct pm8008_data { +- struct device *dev; +- struct regmap *regmap; +- int irq; +- struct regmap_irq_chip_data *irq_data; +-}; +- + static unsigned int p0_offs[] = {PM8008_PERIPH_OFFSET(PM8008_PERIPH_0_BASE)}; + static unsigned int p1_offs[] = {PM8008_PERIPH_OFFSET(PM8008_PERIPH_1_BASE)}; + static unsigned int p2_offs[] = {PM8008_PERIPH_OFFSET(PM8008_PERIPH_2_BASE)}; +@@ -150,7 +143,7 @@ static struct regmap_config qcom_mfd_regmap_cfg = { + .max_register = 0xFFFF, + }; + +-static int pm8008_init(struct pm8008_data *chip) ++static int pm8008_init(struct regmap *regmap) + { + int rc; + +@@ -160,34 +153,31 @@ static int pm8008_init(struct pm8008_data *chip) + * This is required to enable the writing of TYPE registers in + * regmap_irq_sync_unlock(). + */ +- rc = regmap_write(chip->regmap, +- (PM8008_TEMP_ALARM_ADDR | INT_SET_TYPE_OFFSET), +- BIT(0)); ++ rc = regmap_write(regmap, (PM8008_TEMP_ALARM_ADDR | INT_SET_TYPE_OFFSET), BIT(0)); + if (rc) + return rc; + + /* Do the same for GPIO1 and GPIO2 peripherals */ +- rc = regmap_write(chip->regmap, +- (PM8008_GPIO1_ADDR | INT_SET_TYPE_OFFSET), BIT(0)); ++ rc = regmap_write(regmap, (PM8008_GPIO1_ADDR | INT_SET_TYPE_OFFSET), BIT(0)); + if (rc) + return rc; + +- rc = regmap_write(chip->regmap, +- (PM8008_GPIO2_ADDR | INT_SET_TYPE_OFFSET), BIT(0)); ++ rc = regmap_write(regmap, (PM8008_GPIO2_ADDR | INT_SET_TYPE_OFFSET), BIT(0)); + + return rc; + } + +-static int pm8008_probe_irq_peripherals(struct pm8008_data *chip, ++static int pm8008_probe_irq_peripherals(struct device *dev, ++ struct regmap *regmap, + int client_irq) + { + int rc, i; + struct regmap_irq_type *type; + struct regmap_irq_chip_data *irq_data; + +- rc = pm8008_init(chip); ++ rc = pm8008_init(regmap); + if (rc) { +- dev_err(chip->dev, "Init failed: %d\n", rc); ++ dev_err(dev, "Init failed: %d\n", rc); + return rc; + } + +@@ -207,10 +197,10 @@ static int pm8008_probe_irq_peripherals(struct pm8008_data *chip, + IRQ_TYPE_LEVEL_HIGH | IRQ_TYPE_LEVEL_LOW); + } + +- rc = devm_regmap_add_irq_chip(chip->dev, chip->regmap, client_irq, ++ rc = devm_regmap_add_irq_chip(dev, regmap, client_irq, + IRQF_SHARED, 0, &pm8008_irq_chip, &irq_data); + if (rc) { +- dev_err(chip->dev, "Failed to add IRQ chip: %d\n", rc); ++ dev_err(dev, "Failed to add IRQ chip: %d\n", rc); + return rc; + } + +@@ -220,26 +210,23 @@ static int pm8008_probe_irq_peripherals(struct pm8008_data *chip, + static int pm8008_probe(struct i2c_client *client) + { + int rc; +- struct pm8008_data *chip; +- +- chip = devm_kzalloc(&client->dev, sizeof(*chip), GFP_KERNEL); +- if (!chip) +- return -ENOMEM; ++ struct device *dev; ++ struct regmap *regmap; + +- chip->dev = &client->dev; +- chip->regmap = devm_regmap_init_i2c(client, &qcom_mfd_regmap_cfg); +- if (!chip->regmap) +- return -ENODEV; ++ dev = &client->dev; ++ regmap = devm_regmap_init_i2c(client, &qcom_mfd_regmap_cfg); ++ if (IS_ERR(regmap)) ++ return PTR_ERR(regmap); + +- i2c_set_clientdata(client, chip); ++ i2c_set_clientdata(client, regmap); + +- if (of_property_read_bool(chip->dev->of_node, "interrupt-controller")) { +- rc = pm8008_probe_irq_peripherals(chip, client->irq); ++ if (of_property_read_bool(dev->of_node, "interrupt-controller")) { ++ rc = pm8008_probe_irq_peripherals(dev, regmap, client->irq); + if (rc) +- dev_err(chip->dev, "Failed to probe irq periphs: %d\n", rc); ++ dev_err(dev, "Failed to probe irq periphs: %d\n", rc); + } + +- return devm_of_platform_populate(chip->dev); ++ return devm_of_platform_populate(dev); + } + + static const struct of_device_id pm8008_match[] = { +diff --git a/drivers/mfd/qcom_rpm.c b/drivers/mfd/qcom_rpm.c +index 71bc34b74bc9c..8fea0e511550a 100644 +--- a/drivers/mfd/qcom_rpm.c ++++ b/drivers/mfd/qcom_rpm.c +@@ -547,7 +547,7 @@ static int qcom_rpm_probe(struct platform_device *pdev) + init_completion(&rpm->ack); + + /* Enable message RAM clock */ +- rpm->ramclk = devm_clk_get(&pdev->dev, "ram"); ++ rpm->ramclk = devm_clk_get_enabled(&pdev->dev, "ram"); + if (IS_ERR(rpm->ramclk)) { + ret = PTR_ERR(rpm->ramclk); + if (ret == -EPROBE_DEFER) +@@ -558,7 +558,6 @@ static int qcom_rpm_probe(struct platform_device *pdev) + */ + rpm->ramclk = NULL; + } +- clk_prepare_enable(rpm->ramclk); /* Accepts NULL */ + + irq_ack = platform_get_irq_byname(pdev, "ack"); + if (irq_ack < 0) +@@ -673,22 +672,11 @@ static int qcom_rpm_probe(struct platform_device *pdev) + if (ret) + dev_warn(&pdev->dev, "failed to mark wakeup irq as wakeup\n"); + +- return of_platform_populate(pdev->dev.of_node, NULL, NULL, &pdev->dev); +-} +- +-static int qcom_rpm_remove(struct platform_device *pdev) +-{ +- struct qcom_rpm *rpm = dev_get_drvdata(&pdev->dev); +- +- of_platform_depopulate(&pdev->dev); +- clk_disable_unprepare(rpm->ramclk); +- +- return 0; ++ return devm_of_platform_populate(&pdev->dev); + } + + static struct platform_driver qcom_rpm_driver = { + .probe = qcom_rpm_probe, +- .remove = qcom_rpm_remove, + .driver = { + .name = "qcom_rpm", + .of_match_table = qcom_rpm_of_match, diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c index bc0a2c38653e5..3ac4508a6742a 100644 --- a/drivers/mfd/sm501.c @@ -171320,6 +205538,73 @@ index 59eda55d92a38..f150d8769f198 100644 } #ifdef CONFIG_PM +diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c +index 186308f1f8eba..6334376826a92 100644 +--- a/drivers/misc/cxl/guest.c ++++ b/drivers/misc/cxl/guest.c +@@ -959,10 +959,10 @@ int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_n + * if it returns an error! + */ + if ((rc = cxl_register_afu(afu))) +- goto err_put1; ++ goto err_put_dev; + + if ((rc = cxl_sysfs_afu_add(afu))) +- goto err_put1; ++ goto err_del_dev; + + /* + * pHyp doesn't expose the programming models supported by the +@@ -978,7 +978,7 @@ int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_n + afu->modes_supported = CXL_MODE_DIRECTED; + + if ((rc = cxl_afu_select_best_mode(afu))) +- goto err_put2; ++ goto err_remove_sysfs; + + adapter->afu[afu->slice] = afu; + +@@ -998,10 +998,12 @@ int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_n + + return 0; + +-err_put2: ++err_remove_sysfs: + cxl_sysfs_afu_remove(afu); +-err_put1: +- device_unregister(&afu->dev); ++err_del_dev: ++ device_del(&afu->dev); ++err_put_dev: ++ put_device(&afu->dev); + free = false; + guest_release_serr_irq(afu); + err2: +@@ -1135,18 +1137,20 @@ struct cxl *cxl_guest_init_adapter(struct device_node *np, struct platform_devic + * even if it returns an error! + */ + if ((rc = cxl_register_adapter(adapter))) +- goto err_put1; ++ goto err_put_dev; + + if ((rc = cxl_sysfs_adapter_add(adapter))) +- goto err_put1; ++ goto err_del_dev; + + /* release the context lock as the adapter is configured */ + cxl_adapter_context_unlock(adapter); + + return adapter; + +-err_put1: +- device_unregister(&adapter->dev); ++err_del_dev: ++ device_del(&adapter->dev); ++err_put_dev: ++ put_device(&adapter->dev); + free = false; + cxl_guest_remove_chardev(adapter); + err1: diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c index 4cb829d5d873c..2e4dcfebf19af 100644 --- a/drivers/misc/cxl/irq.c @@ -171332,6 +205617,76 @@ index 4cb829d5d873c..2e4dcfebf19af 100644 afu_irq_name_free(ctx); return -ENOMEM; } +diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c +index 2ba899f5659ff..d183836d80e3f 100644 +--- a/drivers/misc/cxl/pci.c ++++ b/drivers/misc/cxl/pci.c +@@ -387,6 +387,7 @@ int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid, + rc = get_phb_index(np, phb_index); + if (rc) { + pr_err("cxl: invalid phb index\n"); ++ of_node_put(np); + return rc; + } + +@@ -1164,10 +1165,10 @@ static int pci_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev) + * if it returns an error! + */ + if ((rc = cxl_register_afu(afu))) +- goto err_put1; ++ goto err_put_dev; + + if ((rc = cxl_sysfs_afu_add(afu))) +- goto err_put1; ++ goto err_del_dev; + + adapter->afu[afu->slice] = afu; + +@@ -1176,10 +1177,12 @@ static int pci_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev) + + return 0; + +-err_put1: ++err_del_dev: ++ device_del(&afu->dev); ++err_put_dev: + pci_deconfigure_afu(afu); + cxl_debugfs_afu_remove(afu); +- device_unregister(&afu->dev); ++ put_device(&afu->dev); + return rc; + + err_free_native: +@@ -1667,23 +1670,25 @@ static struct cxl *cxl_pci_init_adapter(struct pci_dev *dev) + * even if it returns an error! + */ + if ((rc = cxl_register_adapter(adapter))) +- goto err_put1; ++ goto err_put_dev; + + if ((rc = cxl_sysfs_adapter_add(adapter))) +- goto err_put1; ++ goto err_del_dev; + + /* Release the context lock as adapter is configured */ + cxl_adapter_context_unlock(adapter); + + return adapter; + +-err_put1: ++err_del_dev: ++ device_del(&adapter->dev); ++err_put_dev: + /* This should mirror cxl_remove_adapter, except without the + * sysfs parts + */ + cxl_debugfs_adapter_remove(adapter); + cxl_deconfigure_adapter(adapter); +- device_unregister(&adapter->dev); ++ put_device(&adapter->dev); + return ERR_PTR(rc); + + err_release: diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c index 632325474233a..403243859dce9 100644 --- a/drivers/misc/eeprom/at25.c @@ -171572,10 +205927,40 @@ index b0cff4b152da8..7f430742ce2b8 100644 /* Find position of colon in the buffer */ diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c -index ad6ced4546556..cf5705776c4f6 100644 +index ad6ced4546556..55e42ccaef436 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c -@@ -719,16 +719,18 @@ static int fastrpc_get_meta_size(struct fastrpc_invoke_ctx *ctx) +@@ -247,6 +247,13 @@ static void fastrpc_free_map(struct kref *ref) + dma_buf_put(map->buf); + } + ++ if (map->fl) { ++ spin_lock(&map->fl->lock); ++ list_del(&map->node); ++ spin_unlock(&map->fl->lock); ++ map->fl = NULL; ++ } ++ + kfree(map); + } + +@@ -256,10 +263,12 @@ static void fastrpc_map_put(struct fastrpc_map *map) + kref_put(&map->refcount, fastrpc_free_map); + } + +-static void fastrpc_map_get(struct fastrpc_map *map) ++static int fastrpc_map_get(struct fastrpc_map *map) + { +- if (map) +- kref_get(&map->refcount); ++ if (!map) ++ return -ENOENT; ++ ++ return kref_get_unless_zero(&map->refcount) ? 0 : -ENOENT; + } + + static int fastrpc_map_find(struct fastrpc_user *fl, int fd, +@@ -719,16 +728,18 @@ static int fastrpc_get_meta_size(struct fastrpc_invoke_ctx *ctx) static u64 fastrpc_get_payload_size(struct fastrpc_invoke_ctx *ctx, int metalen) { u64 size = 0; @@ -171598,7 +205983,33 @@ index ad6ced4546556..cf5705776c4f6 100644 } } -@@ -1284,7 +1286,14 @@ static int fastrpc_dmabuf_alloc(struct fastrpc_user *fl, char __user *argp) +@@ -1112,12 +1123,7 @@ err_invoke: + fl->init_mem = NULL; + fastrpc_buf_free(imem); + err_alloc: +- if (map) { +- spin_lock(&fl->lock); +- list_del(&map->node); +- spin_unlock(&fl->lock); +- fastrpc_map_put(map); +- } ++ fastrpc_map_put(map); + err: + kfree(args); + +@@ -1194,10 +1200,8 @@ static int fastrpc_device_release(struct inode *inode, struct file *file) + fastrpc_context_put(ctx); + } + +- list_for_each_entry_safe(map, m, &fl->maps, node) { +- list_del(&map->node); ++ list_for_each_entry_safe(map, m, &fl->maps, node) + fastrpc_map_put(map); +- } + + list_for_each_entry_safe(buf, b, &fl->mmaps, node) { + list_del(&buf->node); +@@ -1284,7 +1288,14 @@ static int fastrpc_dmabuf_alloc(struct fastrpc_user *fl, char __user *argp) } if (copy_to_user(argp, &bp, sizeof(bp))) { @@ -171614,7 +206025,7 @@ index ad6ced4546556..cf5705776c4f6 100644 return -EFAULT; } -@@ -1342,17 +1351,18 @@ static int fastrpc_req_munmap_impl(struct fastrpc_user *fl, +@@ -1342,17 +1353,18 @@ static int fastrpc_req_munmap_impl(struct fastrpc_user *fl, struct fastrpc_req_munmap *req) { struct fastrpc_invoke_args args[1] = { [0] = { 0 } }; @@ -171637,7 +206048,7 @@ index ad6ced4546556..cf5705776c4f6 100644 } spin_unlock(&fl->lock); -@@ -1540,7 +1550,12 @@ static int fastrpc_cb_probe(struct platform_device *pdev) +@@ -1540,7 +1552,12 @@ static int fastrpc_cb_probe(struct platform_device *pdev) of_property_read_u32(dev->of_node, "qcom,nsessions", &sessions); spin_lock_irqsave(&cctx->lock, flags); @@ -171651,7 +206062,7 @@ index ad6ced4546556..cf5705776c4f6 100644 sess->used = false; sess->valid = true; sess->dev = dev; -@@ -1553,13 +1568,12 @@ static int fastrpc_cb_probe(struct platform_device *pdev) +@@ -1553,13 +1570,12 @@ static int fastrpc_cb_probe(struct platform_device *pdev) struct fastrpc_session_ctx *dup_sess; for (i = 1; i < sessions; i++) { @@ -172061,20 +206472,22 @@ index be41843df75bc..cf2b8261da144 100644 return 0; } diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h -index 67bb6a25fd0a0..15e8e2b322b1a 100644 +index 67bb6a25fd0a0..609519571545f 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h -@@ -107,6 +107,9 @@ +@@ -107,6 +107,11 @@ #define MEI_DEV_ID_ADP_S 0x7AE8 /* Alder Lake Point S */ #define MEI_DEV_ID_ADP_LP 0x7A60 /* Alder Lake Point LP */ #define MEI_DEV_ID_ADP_P 0x51E0 /* Alder Lake Point P */ +#define MEI_DEV_ID_ADP_N 0x54E0 /* Alder Lake Point N */ + +#define MEI_DEV_ID_RPL_S 0x7A68 /* Raptor Lake Point S */ ++ ++#define MEI_DEV_ID_MTL_M 0x7E70 /* Meteor Lake Point M */ /* * MEI HW Section -@@ -120,6 +123,7 @@ +@@ -120,6 +125,7 @@ #define PCI_CFG_HFS_2 0x48 #define PCI_CFG_HFS_3 0x60 # define PCI_CFG_HFS_3_FW_SKU_MSK 0x00000070 @@ -172206,21 +206619,78 @@ index a67f4f2d33a93..0706322154cbe 100644 reset_slots: /* reset the number of slots and header */ diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c -index 3a45aaf002ac8..5324b65d0d29a 100644 +index 3a45aaf002ac8..f2765d6b8c043 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c -@@ -113,6 +113,9 @@ static const struct pci_device_id mei_me_pci_tbl[] = { +@@ -113,6 +113,11 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_S, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_LP, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_P, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_N, MEI_ME_PCH15_CFG)}, + + {MEI_PCI_DEVICE(MEI_DEV_ID_RPL_S, MEI_ME_PCH15_CFG)}, ++ ++ {MEI_PCI_DEVICE(MEI_DEV_ID_MTL_M, MEI_ME_PCH15_CFG)}, /* required last entry */ {0, } +diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c +index a68738f382521..f1f669efe050d 100644 +--- a/drivers/misc/ocxl/config.c ++++ b/drivers/misc/ocxl/config.c +@@ -204,6 +204,18 @@ static int read_dvsec_vendor(struct pci_dev *dev) + return 0; + } + ++/** ++ * get_dvsec_vendor0() - Find a related PCI device (function 0) ++ * @dev: PCI device to match ++ * @dev0: The PCI device (function 0) found ++ * @out_pos: The position of PCI device (function 0) ++ * ++ * Returns 0 on success, negative on failure. ++ * ++ * NOTE: If it's successful, the reference of dev0 is increased, ++ * so after using it, the callers must call pci_dev_put() to give ++ * up the reference. ++ */ + static int get_dvsec_vendor0(struct pci_dev *dev, struct pci_dev **dev0, + int *out_pos) + { +@@ -213,10 +225,14 @@ static int get_dvsec_vendor0(struct pci_dev *dev, struct pci_dev **dev0, + dev = get_function_0(dev); + if (!dev) + return -1; ++ } else { ++ dev = pci_dev_get(dev); + } + pos = find_dvsec(dev, OCXL_DVSEC_VENDOR_ID); +- if (!pos) ++ if (!pos) { ++ pci_dev_put(dev); + return -1; ++ } + *dev0 = dev; + *out_pos = pos; + return 0; +@@ -233,6 +249,7 @@ int ocxl_config_get_reset_reload(struct pci_dev *dev, int *val) + + pci_read_config_dword(dev0, pos + OCXL_DVSEC_VENDOR_RESET_RELOAD, + &reset_reload); ++ pci_dev_put(dev0); + *val = !!(reset_reload & BIT(0)); + return 0; + } +@@ -254,6 +271,7 @@ int ocxl_config_set_reset_reload(struct pci_dev *dev, int val) + reset_reload &= ~BIT(0); + pci_write_config_dword(dev0, pos + OCXL_DVSEC_VENDOR_RESET_RELOAD, + reset_reload); ++ pci_dev_put(dev0); + return 0; + } + diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c -index e70525eedaaeb..134806c2e67ec 100644 +index e70525eedaaeb..a199c7ce3f81d 100644 --- a/drivers/misc/ocxl/file.c +++ b/drivers/misc/ocxl/file.c @@ -259,6 +259,8 @@ static long afu_ioctl(struct file *file, unsigned int cmd, @@ -172232,7 +206702,21 @@ index e70525eedaaeb..134806c2e67ec 100644 break; case OCXL_IOCTL_GET_METADATA: -@@ -558,7 +560,9 @@ int ocxl_file_register_afu(struct ocxl_afu *afu) +@@ -541,8 +543,11 @@ int ocxl_file_register_afu(struct ocxl_afu *afu) + goto err_put; + + rc = device_register(&info->dev); +- if (rc) +- goto err_put; ++ if (rc) { ++ free_minor(info); ++ put_device(&info->dev); ++ return rc; ++ } + + rc = ocxl_sysfs_register_afu(info); + if (rc) +@@ -558,7 +563,9 @@ int ocxl_file_register_afu(struct ocxl_afu *afu) err_unregister: ocxl_sysfs_unregister_afu(info); // safe to call even if register failed @@ -172354,6 +206838,126 @@ index bb7aa63685388..b9e6400a574b0 100644 }; static void pvpanic_remove(void *param) +diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c +index d7ef61e602ede..b836936e97471 100644 +--- a/drivers/misc/sgi-gru/grufault.c ++++ b/drivers/misc/sgi-gru/grufault.c +@@ -648,6 +648,7 @@ int gru_handle_user_call_os(unsigned long cb) + if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB) + return -EINVAL; + ++again: + gts = gru_find_lock_gts(cb); + if (!gts) + return -EINVAL; +@@ -656,7 +657,11 @@ int gru_handle_user_call_os(unsigned long cb) + if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) + goto exit; + +- gru_check_context_placement(gts); ++ if (gru_check_context_placement(gts)) { ++ gru_unlock_gts(gts); ++ gru_unload_context(gts, 1); ++ goto again; ++ } + + /* + * CCH may contain stale data if ts_force_cch_reload is set. +@@ -874,7 +879,11 @@ int gru_set_context_option(unsigned long arg) + } else { + gts->ts_user_blade_id = req.val1; + gts->ts_user_chiplet_id = req.val0; +- gru_check_context_placement(gts); ++ if (gru_check_context_placement(gts)) { ++ gru_unlock_gts(gts); ++ gru_unload_context(gts, 1); ++ return ret; ++ } + } + break; + case sco_gseg_owner: +diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c +index 9afda47efbf2e..3a16eb8e03f73 100644 +--- a/drivers/misc/sgi-gru/grumain.c ++++ b/drivers/misc/sgi-gru/grumain.c +@@ -716,9 +716,10 @@ static int gru_check_chiplet_assignment(struct gru_state *gru, + * chiplet. Misassignment can occur if the process migrates to a different + * blade or if the user changes the selected blade/chiplet. + */ +-void gru_check_context_placement(struct gru_thread_state *gts) ++int gru_check_context_placement(struct gru_thread_state *gts) + { + struct gru_state *gru; ++ int ret = 0; + + /* + * If the current task is the context owner, verify that the +@@ -726,15 +727,23 @@ void gru_check_context_placement(struct gru_thread_state *gts) + * references. Pthread apps use non-owner references to the CBRs. + */ + gru = gts->ts_gru; ++ /* ++ * If gru or gts->ts_tgid_owner isn't initialized properly, return ++ * success to indicate that the caller does not need to unload the ++ * gru context.The caller is responsible for their inspection and ++ * reinitialization if needed. ++ */ + if (!gru || gts->ts_tgid_owner != current->tgid) +- return; ++ return ret; + + if (!gru_check_chiplet_assignment(gru, gts)) { + STAT(check_context_unload); +- gru_unload_context(gts, 1); ++ ret = -EINVAL; + } else if (gru_retarget_intr(gts)) { + STAT(check_context_retarget_intr); + } ++ ++ return ret; + } + + +@@ -934,7 +943,12 @@ again: + mutex_lock(>s->ts_ctxlock); + preempt_disable(); + +- gru_check_context_placement(gts); ++ if (gru_check_context_placement(gts)) { ++ preempt_enable(); ++ mutex_unlock(>s->ts_ctxlock); ++ gru_unload_context(gts, 1); ++ return VM_FAULT_NOPAGE; ++ } + + if (!gts->ts_gru) { + STAT(load_user_context); +diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h +index e4c067c61251b..5c9783150cdfa 100644 +--- a/drivers/misc/sgi-gru/grutables.h ++++ b/drivers/misc/sgi-gru/grutables.h +@@ -638,7 +638,7 @@ extern int gru_user_flush_tlb(unsigned long arg); + extern int gru_user_unload_context(unsigned long arg); + extern int gru_get_exception_detail(unsigned long arg); + extern int gru_set_context_option(unsigned long address); +-extern void gru_check_context_placement(struct gru_thread_state *gts); ++extern int gru_check_context_placement(struct gru_thread_state *gts); + extern int gru_cpu_fault_map_id(void); + extern struct vm_area_struct *gru_find_vma(unsigned long vaddr); + extern void gru_flush_all_tlb(struct gru_state *gru); +diff --git a/drivers/misc/tifm_7xx1.c b/drivers/misc/tifm_7xx1.c +index 228f2eb1d4762..2aebbfda104d8 100644 +--- a/drivers/misc/tifm_7xx1.c ++++ b/drivers/misc/tifm_7xx1.c +@@ -190,7 +190,7 @@ static void tifm_7xx1_switch_media(struct work_struct *work) + spin_unlock_irqrestore(&fm->lock, flags); + } + if (sock) +- tifm_free_device(&sock->dev); ++ put_device(&sock->dev); + } + spin_lock_irqsave(&fm->lock, flags); + } diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c index 488eeb2811aeb..976d051071dc3 100644 --- a/drivers/misc/uacce/uacce.c @@ -172622,11 +207226,39 @@ index 488eeb2811aeb..976d051071dc3 100644 put_device(&uacce->dev); } EXPORT_SYMBOL_GPL(uacce_remove); +diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c +index 94ebf7f3fd58a..fe67e39d68543 100644 +--- a/drivers/misc/vmw_vmci/vmci_queue_pair.c ++++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c +@@ -854,6 +854,7 @@ static int qp_notify_peer_local(bool attach, struct vmci_handle handle) + u32 context_id = vmci_get_context_id(); + struct vmci_event_qp ev; + ++ memset(&ev, 0, sizeof(ev)); + ev.msg.hdr.dst = vmci_make_handle(context_id, VMCI_EVENT_HANDLER); + ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_CONTEXT_RESOURCE_ID); +@@ -1467,6 +1468,7 @@ static int qp_notify_peer(bool attach, + * kernel. + */ + ++ memset(&ev, 0, sizeof(ev)); + ev.msg.hdr.dst = vmci_make_handle(peer_id, VMCI_EVENT_HANDLER); + ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_CONTEXT_RESOURCE_ID); diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c -index 431af5e8be2f8..b2533be3a453f 100644 +index 431af5e8be2f8..ed034b93cb255 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c -@@ -175,7 +175,7 @@ static inline int mmc_blk_part_switch(struct mmc_card *card, +@@ -133,6 +133,7 @@ struct mmc_blk_data { + * track of the current selected device partition. + */ + unsigned int part_curr; ++#define MMC_BLK_PART_INVALID UINT_MAX /* Unknown partition active */ + int area_type; + + /* debugfs files (only in main mmc_blk_data) */ +@@ -175,7 +176,7 @@ static inline int mmc_blk_part_switch(struct mmc_card *card, unsigned int part_type); static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq, struct mmc_card *card, @@ -172635,7 +207267,7 @@ index 431af5e8be2f8..b2533be3a453f 100644 struct mmc_queue *mq); static void mmc_blk_hsq_req_done(struct mmc_request *mrq); -@@ -609,11 +609,11 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, +@@ -609,11 +610,11 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, if (idata->rpmb || (cmd.flags & MMC_RSP_R1B) == MMC_RSP_R1B) { /* @@ -172651,7 +207283,63 @@ index 431af5e8be2f8..b2533be3a453f 100644 } return err; -@@ -1107,6 +1107,11 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req) +@@ -984,9 +985,16 @@ static unsigned int mmc_blk_data_timeout_ms(struct mmc_host *host, + return ms; + } + ++/* ++ * Attempts to reset the card and get back to the requested partition. ++ * Therefore any error here must result in cancelling the block layer ++ * request, it must not be reattempted without going through the mmc_blk ++ * partition sanity checks. ++ */ + static int mmc_blk_reset(struct mmc_blk_data *md, struct mmc_host *host, + int type) + { ++ struct mmc_blk_data *main_md = dev_get_drvdata(&host->card->dev); + int err; + + if (md->reset_done & type) +@@ -994,23 +1002,22 @@ static int mmc_blk_reset(struct mmc_blk_data *md, struct mmc_host *host, + + md->reset_done |= type; + err = mmc_hw_reset(host); ++ /* ++ * A successful reset will leave the card in the main partition, but ++ * upon failure it might not be, so set it to MMC_BLK_PART_INVALID ++ * in that case. ++ */ ++ main_md->part_curr = err ? MMC_BLK_PART_INVALID : main_md->part_type; ++ if (err) ++ return err; + /* Ensure we switch back to the correct partition */ +- if (err) { +- struct mmc_blk_data *main_md = +- dev_get_drvdata(&host->card->dev); +- int part_err; +- +- main_md->part_curr = main_md->part_type; +- part_err = mmc_blk_part_switch(host->card, md->part_type); +- if (part_err) { +- /* +- * We have failed to get back into the correct +- * partition, so we need to abort the whole request. +- */ +- return -ENODEV; +- } +- } +- return err; ++ if (mmc_blk_part_switch(host->card, md->part_type)) ++ /* ++ * We have failed to get back into the correct ++ * partition, so we need to abort the whole request. ++ */ ++ return -ENODEV; ++ return 0; + } + + static inline void mmc_blk_reset_success(struct mmc_blk_data *md, int type) +@@ -1107,6 +1114,11 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req) nr = blk_rq_sectors(req); do { @@ -172663,7 +207351,7 @@ index 431af5e8be2f8..b2533be3a453f 100644 err = 0; if (card->quirks & MMC_QUIRK_INAND_CMD38) { err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, -@@ -1117,7 +1122,7 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req) +@@ -1117,7 +1129,7 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req) card->ext_csd.generic_cmd6_time); } if (!err) @@ -172672,7 +207360,7 @@ index 431af5e8be2f8..b2533be3a453f 100644 } while (err == -EIO && !mmc_blk_reset(md, card->host, type)); if (err) status = BLK_STS_IOERR; -@@ -1285,7 +1290,7 @@ static void mmc_blk_eval_resp_error(struct mmc_blk_request *brq) +@@ -1285,7 +1297,7 @@ static void mmc_blk_eval_resp_error(struct mmc_blk_request *brq) } static void mmc_blk_data_prep(struct mmc_queue *mq, struct mmc_queue_req *mqrq, @@ -172681,7 +207369,7 @@ index 431af5e8be2f8..b2533be3a453f 100644 bool *do_data_tag_p) { struct mmc_blk_data *md = mq->blkdata; -@@ -1351,12 +1356,12 @@ static void mmc_blk_data_prep(struct mmc_queue *mq, struct mmc_queue_req *mqrq, +@@ -1351,12 +1363,12 @@ static void mmc_blk_data_prep(struct mmc_queue *mq, struct mmc_queue_req *mqrq, brq->data.blocks--; /* @@ -172697,7 +207385,7 @@ index 431af5e8be2f8..b2533be3a453f 100644 /* * Some controllers have HW issues while operating -@@ -1482,8 +1487,7 @@ void mmc_blk_cqe_recovery(struct mmc_queue *mq) +@@ -1482,8 +1494,7 @@ void mmc_blk_cqe_recovery(struct mmc_queue *mq) err = mmc_cqe_recovery(host); if (err) mmc_blk_reset(mq->blkdata, host, MMC_BLK_CQE_RECOVERY); @@ -172707,7 +207395,7 @@ index 431af5e8be2f8..b2533be3a453f 100644 pr_debug("%s: CQE recovery done\n", mmc_hostname(host)); } -@@ -1574,7 +1578,7 @@ static int mmc_blk_cqe_issue_rw_rq(struct mmc_queue *mq, struct request *req) +@@ -1574,7 +1585,7 @@ static int mmc_blk_cqe_issue_rw_rq(struct mmc_queue *mq, struct request *req) static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq, struct mmc_card *card, @@ -172716,7 +207404,7 @@ index 431af5e8be2f8..b2533be3a453f 100644 struct mmc_queue *mq) { u32 readcmd, writecmd; -@@ -1583,7 +1587,7 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq, +@@ -1583,7 +1594,7 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq, struct mmc_blk_data *md = mq->blkdata; bool do_rel_wr, do_data_tag; @@ -172725,7 +207413,7 @@ index 431af5e8be2f8..b2533be3a453f 100644 brq->mrq.cmd = &brq->cmd; -@@ -1674,7 +1678,7 @@ static int mmc_blk_fix_state(struct mmc_card *card, struct request *req) +@@ -1674,7 +1685,7 @@ static int mmc_blk_fix_state(struct mmc_card *card, struct request *req) #define MMC_READ_SINGLE_RETRIES 2 @@ -172734,7 +207422,7 @@ index 431af5e8be2f8..b2533be3a453f 100644 static void mmc_blk_read_single(struct mmc_queue *mq, struct request *req) { struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req); -@@ -1682,31 +1686,32 @@ static void mmc_blk_read_single(struct mmc_queue *mq, struct request *req) +@@ -1682,31 +1693,32 @@ static void mmc_blk_read_single(struct mmc_queue *mq, struct request *req) struct mmc_card *card = mq->card; struct mmc_host *host = card->host; blk_status_t error = BLK_STS_OK; @@ -172751,12 +207439,12 @@ index 431af5e8be2f8..b2533be3a453f 100644 + mmc_blk_rw_rq_prep(mqrq, card, 1, mq); - mmc_wait_for_req(host, mrq); -+ mmc_wait_for_req(host, mrq); - +- - err = mmc_send_status(card, &status); - if (err) - goto error_exit; -- ++ mmc_wait_for_req(host, mrq); + - if (!mmc_host_is_spi(host) && - !mmc_ready_for_data(status)) { - err = mmc_blk_fix_state(card, req); @@ -172781,7 +207469,7 @@ index 431af5e8be2f8..b2533be3a453f 100644 if (mrq->cmd->error || mrq->data->error || -@@ -1716,13 +1721,13 @@ static void mmc_blk_read_single(struct mmc_queue *mq, struct request *req) +@@ -1716,13 +1728,13 @@ static void mmc_blk_read_single(struct mmc_queue *mq, struct request *req) else error = BLK_STS_OK; @@ -172797,7 +207485,19 @@ index 431af5e8be2f8..b2533be3a453f 100644 /* Let it try the remaining request again */ if (mqrq->retries > MMC_MAX_RETRIES - 1) mqrq->retries = MMC_MAX_RETRIES - 1; -@@ -1863,10 +1868,9 @@ static void mmc_blk_mq_rw_recovery(struct mmc_queue *mq, struct request *req) +@@ -1850,8 +1862,9 @@ static void mmc_blk_mq_rw_recovery(struct mmc_queue *mq, struct request *req) + return; + + /* Reset before last retry */ +- if (mqrq->retries + 1 == MMC_MAX_RETRIES) +- mmc_blk_reset(md, card->host, type); ++ if (mqrq->retries + 1 == MMC_MAX_RETRIES && ++ mmc_blk_reset(md, card->host, type)) ++ return; + + /* Command errors fail fast, so use all MMC_MAX_RETRIES */ + if (brq->sbc.error || brq->cmd.error) +@@ -1863,10 +1876,9 @@ static void mmc_blk_mq_rw_recovery(struct mmc_queue *mq, struct request *req) return; } @@ -172811,7 +207511,7 @@ index 431af5e8be2f8..b2533be3a453f 100644 mmc_blk_read_single(mq, req); return; } -@@ -1880,6 +1884,31 @@ static inline bool mmc_blk_rq_error(struct mmc_blk_request *brq) +@@ -1880,6 +1892,31 @@ static inline bool mmc_blk_rq_error(struct mmc_blk_request *brq) brq->data.error || brq->cmd.resp[0] & CMD_ERRORS; } @@ -172843,7 +207543,7 @@ index 431af5e8be2f8..b2533be3a453f 100644 static int mmc_blk_busy_cb(void *cb_data, bool *busy) { struct mmc_blk_busy_data *data = cb_data; -@@ -1903,9 +1932,16 @@ static int mmc_blk_card_busy(struct mmc_card *card, struct request *req) +@@ -1903,9 +1940,16 @@ static int mmc_blk_card_busy(struct mmc_card *card, struct request *req) struct mmc_blk_busy_data cb_data; int err; @@ -172861,7 +207561,7 @@ index 431af5e8be2f8..b2533be3a453f 100644 cb_data.card = card; cb_data.status = 0; err = __mmc_poll_for_busy(card, MMC_BLK_TIMEOUT_MS, &mmc_blk_busy_cb, -@@ -2344,6 +2380,8 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, +@@ -2344,6 +2388,8 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, struct mmc_blk_data *md; int devidx, ret; char cap_str[10]; @@ -172870,7 +207570,7 @@ index 431af5e8be2f8..b2533be3a453f 100644 devidx = ida_simple_get(&mmc_blk_ida, 0, max_devices, GFP_KERNEL); if (devidx < 0) { -@@ -2425,13 +2463,17 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, +@@ -2425,13 +2471,17 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, md->flags |= MMC_BLK_CMD23; } @@ -172914,10 +207614,55 @@ index 7bd392d55cfa5..5c6986131faff 100644 + #endif diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c -index 240c5af793dce..368f10405e132 100644 +index 240c5af793dce..07eda6cc6767b 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c -@@ -2264,7 +2264,7 @@ void mmc_start_host(struct mmc_host *host) +@@ -1132,7 +1132,13 @@ u32 mmc_select_voltage(struct mmc_host *host, u32 ocr) + mmc_power_cycle(host, ocr); + } else { + bit = fls(ocr) - 1; +- ocr &= 3 << bit; ++ /* ++ * The bit variable represents the highest voltage bit set in ++ * the OCR register. ++ * To keep a range of 2 values (e.g. 3.2V/3.3V and 3.3V/3.4V), ++ * we must shift the mask '3' with (bit - 1). ++ */ ++ ocr &= 3 << (bit - 1); + if (bit != host->ios.vdd) + dev_warn(mmc_dev(host), "exceeding card's volts\n"); + } +@@ -1476,6 +1482,11 @@ void mmc_init_erase(struct mmc_card *card) + card->pref_erase = 0; + } + ++static bool is_trim_arg(unsigned int arg) ++{ ++ return (arg & MMC_TRIM_OR_DISCARD_ARGS) && arg != MMC_DISCARD_ARG; ++} ++ + static unsigned int mmc_mmc_erase_timeout(struct mmc_card *card, + unsigned int arg, unsigned int qty) + { +@@ -1758,7 +1769,7 @@ int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr, + !(card->ext_csd.sec_feature_support & EXT_CSD_SEC_ER_EN)) + return -EOPNOTSUPP; + +- if (mmc_card_mmc(card) && (arg & MMC_TRIM_ARGS) && ++ if (mmc_card_mmc(card) && is_trim_arg(arg) && + !(card->ext_csd.sec_feature_support & EXT_CSD_SEC_GB_CL_EN)) + return -EOPNOTSUPP; + +@@ -1788,7 +1799,7 @@ int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr, + * identified by the card->eg_boundary flag. + */ + rem = card->erase_size - (from % card->erase_size); +- if ((arg & MMC_TRIM_ARGS) && (card->eg_boundary) && (nr > rem)) { ++ if ((arg & MMC_TRIM_OR_DISCARD_ARGS) && card->eg_boundary && nr > rem) { + err = mmc_do_erase(card, from, from + rem - 1, arg); + from += rem; + if ((err) || (to <= from)) +@@ -2264,7 +2275,7 @@ void mmc_start_host(struct mmc_host *host) _mmc_detect_change(host, 0, false); } @@ -172926,7 +207671,7 @@ index 240c5af793dce..368f10405e132 100644 { if (host->slot.cd_irq >= 0) { mmc_gpio_set_cd_wake(host, false); -@@ -2273,6 +2273,11 @@ void mmc_stop_host(struct mmc_host *host) +@@ -2273,6 +2284,11 @@ void mmc_stop_host(struct mmc_host *host) host->rescan_disable = 1; cancel_delayed_work_sync(&host->detect); @@ -173065,6 +207810,20 @@ index 29e58ffae3797..d805f84507198 100644 } err: if (err) { +diff --git a/drivers/mmc/core/mmc_test.c b/drivers/mmc/core/mmc_test.c +index 63524551a13a1..4052f828f75e7 100644 +--- a/drivers/mmc/core/mmc_test.c ++++ b/drivers/mmc/core/mmc_test.c +@@ -3181,7 +3181,8 @@ static int __mmc_test_register_dbgfs_file(struct mmc_card *card, + struct mmc_test_dbgfs_file *df; + + if (card->debugfs_root) +- debugfs_create_file(name, mode, card->debugfs_root, card, fops); ++ file = debugfs_create_file(name, mode, card->debugfs_root, ++ card, fops); + + df = kmalloc(sizeof(*df), GFP_KERNEL); + if (!df) { diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h index d68e6e513a4f4..c8c0f50a2076d 100644 --- a/drivers/mmc/core/quirks.h @@ -173083,7 +207842,7 @@ index d68e6e513a4f4..c8c0f50a2076d 100644 }; diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c -index 4646b7a03db6b..86a8a1f565839 100644 +index 4646b7a03db6b..592166e53dce8 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -66,7 +66,7 @@ static const unsigned int sd_au_size[] = { @@ -173129,7 +207888,39 @@ index 4646b7a03db6b..86a8a1f565839 100644 /* * For SPI, enable CRC as appropriate. * This CRC enable is located AFTER the reading of the -@@ -1473,26 +1475,15 @@ retry: +@@ -1250,7 +1252,7 @@ static int sd_read_ext_regs(struct mmc_card *card) + */ + err = sd_read_ext_reg(card, 0, 0, 0, 512, gen_info_buf); + if (err) { +- pr_warn("%s: error %d reading general info of SD ext reg\n", ++ pr_err("%s: error %d reading general info of SD ext reg\n", + mmc_hostname(card->host), err); + goto out; + } +@@ -1264,7 +1266,12 @@ static int sd_read_ext_regs(struct mmc_card *card) + /* Number of extensions to be find. */ + num_ext = gen_info_buf[4]; + +- /* We support revision 0, but limit it to 512 bytes for simplicity. */ ++ /* ++ * We only support revision 0 and limit it to 512 bytes for simplicity. ++ * No matter what, let's return zero to allow us to continue using the ++ * card, even if we can't support the features from the SD function ++ * extensions registers. ++ */ + if (rev != 0 || len > 512) { + pr_warn("%s: non-supported SD ext reg layout\n", + mmc_hostname(card->host)); +@@ -1279,7 +1286,7 @@ static int sd_read_ext_regs(struct mmc_card *card) + for (i = 0; i < num_ext; i++) { + err = sd_parse_ext_reg(card, gen_info_buf, &next_ext_addr); + if (err) { +- pr_warn("%s: error %d parsing SD ext reg\n", ++ pr_err("%s: error %d parsing SD ext reg\n", + mmc_hostname(card->host), err); + goto out; + } +@@ -1473,26 +1480,15 @@ retry: if (!v18_fixup_failed && !mmc_host_is_spi(host) && mmc_host_uhs(host) && mmc_sd_card_using_v18(card) && host->ios.signal_voltage != MMC_SIGNAL_VOLTAGE_180) { @@ -173164,7 +207955,7 @@ index 4646b7a03db6b..86a8a1f565839 100644 } /* Initialization sequence for UHS-I cards */ -@@ -1527,7 +1518,7 @@ retry: +@@ -1527,7 +1523,7 @@ retry: mmc_set_bus_width(host, MMC_BUS_WIDTH_4); } } @@ -173173,7 +207964,7 @@ index 4646b7a03db6b..86a8a1f565839 100644 if (!oldcard) { /* Read/parse the extension registers. */ err = sd_read_ext_regs(card); -@@ -1559,7 +1550,7 @@ retry: +@@ -1559,7 +1555,7 @@ retry: err = -EINVAL; goto free_card; } @@ -173182,7 +207973,7 @@ index 4646b7a03db6b..86a8a1f565839 100644 host->card = card; return 0; -@@ -1663,6 +1654,12 @@ static int sd_poweroff_notify(struct mmc_card *card) +@@ -1663,6 +1659,12 @@ static int sd_poweroff_notify(struct mmc_card *card) goto out; } @@ -173217,6 +208008,142 @@ index 68edf7a615be5..5447c47157aa5 100644 mmc_fixup_device(card, sdio_fixup_methods); if (card->type == MMC_TYPE_SD_COMBO) { +diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c +index fda03b35c14a5..f6cdec00e97e7 100644 +--- a/drivers/mmc/core/sdio_bus.c ++++ b/drivers/mmc/core/sdio_bus.c +@@ -290,7 +290,14 @@ static void sdio_release_func(struct device *dev) + { + struct sdio_func *func = dev_to_sdio_func(dev); + +- sdio_free_func_cis(func); ++ if (!(func->card->quirks & MMC_QUIRK_NONSTD_SDIO)) ++ sdio_free_func_cis(func); ++ ++ /* ++ * We have now removed the link to the tuples in the ++ * card structure, so remove the reference. ++ */ ++ put_device(&func->card->dev); + + kfree(func->info); + kfree(func->tmpbuf); +@@ -322,6 +329,12 @@ struct sdio_func *sdio_alloc_func(struct mmc_card *card) + + device_initialize(&func->dev); + ++ /* ++ * We may link to tuples in the card structure, ++ * we need make sure we have a reference to it. ++ */ ++ get_device(&func->card->dev); ++ + func->dev.parent = &card->dev; + func->dev.bus = &sdio_bus_type; + func->dev.release = sdio_release_func; +@@ -375,10 +388,9 @@ int sdio_add_func(struct sdio_func *func) + */ + void sdio_remove_func(struct sdio_func *func) + { +- if (!sdio_func_present(func)) +- return; ++ if (sdio_func_present(func)) ++ device_del(&func->dev); + +- device_del(&func->dev); + of_node_put(func->dev.of_node); + put_device(&func->dev); + } +diff --git a/drivers/mmc/core/sdio_cis.c b/drivers/mmc/core/sdio_cis.c +index a705ba6eff5bf..afaa6cab1adc3 100644 +--- a/drivers/mmc/core/sdio_cis.c ++++ b/drivers/mmc/core/sdio_cis.c +@@ -403,12 +403,6 @@ int sdio_read_func_cis(struct sdio_func *func) + if (ret) + return ret; + +- /* +- * Since we've linked to tuples in the card structure, +- * we must make sure we have a reference to it. +- */ +- get_device(&func->card->dev); +- + /* + * Vendor/device id is optional for function CIS, so + * copy it from the card structure as needed. +@@ -434,11 +428,5 @@ void sdio_free_func_cis(struct sdio_func *func) + } + + func->tuples = NULL; +- +- /* +- * We have now removed the link to the tuples in the +- * card structure, so remove the reference. +- */ +- put_device(&func->card->dev); + } + +diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig +index ccc148cdb5eee..c167186085c86 100644 +--- a/drivers/mmc/host/Kconfig ++++ b/drivers/mmc/host/Kconfig +@@ -1069,9 +1069,10 @@ config MMC_SDHCI_OMAP + + config MMC_SDHCI_AM654 + tristate "Support for the SDHCI Controller in TI's AM654 SOCs" +- depends on MMC_SDHCI_PLTFM && OF && REGMAP_MMIO ++ depends on MMC_SDHCI_PLTFM && OF + select MMC_SDHCI_IO_ACCESSORS + select MMC_CQHCI ++ select REGMAP_MMIO + help + This selects the Secure Digital Host Controller Interface (SDHCI) + support present in TI's AM654 SOCs. The controller supports +diff --git a/drivers/mmc/host/alcor.c b/drivers/mmc/host/alcor.c +index bfb8efeb7eb80..d01df01d4b4d1 100644 +--- a/drivers/mmc/host/alcor.c ++++ b/drivers/mmc/host/alcor.c +@@ -1114,7 +1114,10 @@ static int alcor_pci_sdmmc_drv_probe(struct platform_device *pdev) + alcor_hw_init(host); + + dev_set_drvdata(&pdev->dev, host); +- mmc_add_host(mmc); ++ ret = mmc_add_host(mmc); ++ if (ret) ++ goto free_host; ++ + return 0; + + free_host: +diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c +index 807177c953f3d..6f971a3e7e494 100644 +--- a/drivers/mmc/host/atmel-mci.c ++++ b/drivers/mmc/host/atmel-mci.c +@@ -2223,6 +2223,7 @@ static int atmci_init_slot(struct atmel_mci *host, + { + struct mmc_host *mmc; + struct atmel_mci_slot *slot; ++ int ret; + + mmc = mmc_alloc_host(sizeof(struct atmel_mci_slot), &host->pdev->dev); + if (!mmc) +@@ -2306,11 +2307,13 @@ static int atmci_init_slot(struct atmel_mci *host, + + host->slot[id] = slot; + mmc_regulator_get_supply(mmc); +- mmc_add_host(mmc); ++ ret = mmc_add_host(mmc); ++ if (ret) { ++ mmc_free_host(mmc); ++ return ret; ++ } + + if (gpio_is_valid(slot->detect_pin)) { +- int ret; +- + timer_setup(&slot->detect_timer, atmci_detect_change, 0); + + ret = request_irq(gpio_to_irq(slot->detect_pin), diff --git a/drivers/mmc/host/au1xmmc.c b/drivers/mmc/host/au1xmmc.c index 0acc237843f7c..f5f9cb7a2da5e 100644 --- a/drivers/mmc/host/au1xmmc.c @@ -173293,7 +208220,7 @@ index 380f9aa56eb26..1e8f1bb3cad7c 100644 continue; } diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c -index 80a2c270d502e..3c59dec08c3bd 100644 +index 80a2c270d502e..8586447d4b4f2 100644 --- a/drivers/mmc/host/jz4740_mmc.c +++ b/drivers/mmc/host/jz4740_mmc.c @@ -235,6 +235,26 @@ static int jz4740_mmc_acquire_dma_channels(struct jz4740_mmc_host *host) @@ -173323,8 +208250,25 @@ index 80a2c270d502e..3c59dec08c3bd 100644 return 0; } +@@ -1018,6 +1038,16 @@ static int jz4740_mmc_probe(struct platform_device* pdev) + mmc->ops = &jz4740_mmc_ops; + if (!mmc->f_max) + mmc->f_max = JZ_MMC_CLK_RATE; ++ ++ /* ++ * There seems to be a problem with this driver on the JZ4760 and ++ * JZ4760B SoCs. There, when using the maximum rate supported (50 MHz), ++ * the communication fails with many SD cards. ++ * Until this bug is sorted out, limit the maximum rate to 24 MHz. ++ */ ++ if (host->version == JZ_MMC_JZ4760 && mmc->f_max > JZ_MMC_CLK_RATE) ++ mmc->f_max = JZ_MMC_CLK_RATE; ++ + mmc->f_min = mmc->f_max / 128; + mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; + diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c -index 8f36536cb1b6d..9b2e2548bd18b 100644 +index 8f36536cb1b6d..753f9ea254d49 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -173,6 +173,8 @@ struct meson_host { @@ -173393,6 +208337,17 @@ index 8f36536cb1b6d..9b2e2548bd18b 100644 res = platform_get_resource(pdev, IORESOURCE_MEM, 0); host->regs = devm_ioremap_resource(&pdev->dev, res); +@@ -1288,7 +1291,9 @@ static int meson_mmc_probe(struct platform_device *pdev) + } + + mmc->ops = &meson_mmc_ops; +- mmc_add_host(mmc); ++ ret = mmc_add_host(mmc); ++ if (ret) ++ goto err_free_irq; + + return 0; + diff --git a/drivers/mmc/host/meson-mx-sdhc-mmc.c b/drivers/mmc/host/meson-mx-sdhc-mmc.c index 7cd9c0ec2fcfe..28aa78aa08f3f 100644 --- a/drivers/mmc/host/meson-mx-sdhc-mmc.c @@ -173475,9 +208430,45 @@ index a5e05ed0fda3e..9d35453e7371b 100644 return; } diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c -index f4c8e1a61f537..b431cdd27353b 100644 +index f4c8e1a61f537..91fde4943defa 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c +@@ -1441,7 +1441,7 @@ static int mmc_spi_probe(struct spi_device *spi) + + status = mmc_add_host(mmc); + if (status != 0) +- goto fail_add_host; ++ goto fail_glue_init; + + /* + * Index 0 is card detect +@@ -1449,7 +1449,7 @@ static int mmc_spi_probe(struct spi_device *spi) + */ + status = mmc_gpiod_request_cd(mmc, NULL, 0, false, 1000); + if (status == -EPROBE_DEFER) +- goto fail_add_host; ++ goto fail_gpiod_request; + if (!status) { + /* + * The platform has a CD GPIO signal that may support +@@ -1464,7 +1464,7 @@ static int mmc_spi_probe(struct spi_device *spi) + /* Index 1 is write protect/read only */ + status = mmc_gpiod_request_ro(mmc, NULL, 1, 0); + if (status == -EPROBE_DEFER) +- goto fail_add_host; ++ goto fail_gpiod_request; + if (!status) + has_ro = true; + +@@ -1478,7 +1478,7 @@ static int mmc_spi_probe(struct spi_device *spi) + ? ", cd polling" : ""); + return 0; + +-fail_add_host: ++fail_gpiod_request: + mmc_remove_host(mmc); + fail_glue_init: + mmc_spi_dma_free(host); @@ -1514,6 +1514,12 @@ static int mmc_spi_remove(struct spi_device *spi) return 0; } @@ -173499,6 +208490,21 @@ index f4c8e1a61f537..b431cdd27353b 100644 .probe = mmc_spi_probe, .remove = mmc_spi_remove, }; +diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c +index 3765e2f4ad98a..2c4eda83ca181 100644 +--- a/drivers/mmc/host/mmci.c ++++ b/drivers/mmc/host/mmci.c +@@ -2254,7 +2254,9 @@ static int mmci_probe(struct amba_device *dev, + pm_runtime_set_autosuspend_delay(&dev->dev, 50); + pm_runtime_use_autosuspend(&dev->dev); + +- mmc_add_host(mmc); ++ ret = mmc_add_host(mmc); ++ if (ret) ++ goto clk_disable; + + pm_runtime_put(&dev->dev); + return 0; diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c index fdaa11f92fe6f..4cceb9bab0361 100644 --- a/drivers/mmc/host/mmci_stm32_sdmmc.c @@ -173533,7 +208539,7 @@ index fdaa11f92fe6f..4cceb9bab0361 100644 sdmmc_dlyb_set_cfgr(dlyb, dlyb->unit, phase, false); diff --git a/drivers/mmc/host/moxart-mmc.c b/drivers/mmc/host/moxart-mmc.c -index 6c9d38132f74c..dfc3ffd5b1f8c 100644 +index 6c9d38132f74c..52ed30f2d9f4f 100644 --- a/drivers/mmc/host/moxart-mmc.c +++ b/drivers/mmc/host/moxart-mmc.c @@ -111,8 +111,8 @@ @@ -173634,7 +208640,17 @@ index 6c9d38132f74c..dfc3ffd5b1f8c 100644 writel(0, host->base + REG_INTERRUPT_MASK); -@@ -675,6 +672,11 @@ static int moxart_probe(struct platform_device *pdev) +@@ -668,13 +665,20 @@ static int moxart_probe(struct platform_device *pdev) + goto out; + + dev_set_drvdata(dev, mmc); +- mmc_add_host(mmc); ++ ret = mmc_add_host(mmc); ++ if (ret) ++ goto out; + + dev_dbg(dev, "IRQ=%d, FIFO is %d bytes\n", irq, host->fifo_width); + return 0; out: @@ -173646,7 +208662,7 @@ index 6c9d38132f74c..dfc3ffd5b1f8c 100644 if (mmc) mmc_free_host(mmc); return ret; -@@ -687,17 +689,17 @@ static int moxart_remove(struct platform_device *pdev) +@@ -687,17 +691,17 @@ static int moxart_remove(struct platform_device *pdev) dev_set_drvdata(&pdev->dev, NULL); @@ -173668,7 +208684,7 @@ index 6c9d38132f74c..dfc3ffd5b1f8c 100644 return 0; } diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c -index b06b4dcb7c782..99d8881a7d6c2 100644 +index b06b4dcb7c782..9871c19d2b4e4 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -8,6 +8,7 @@ @@ -173858,7 +208874,23 @@ index b06b4dcb7c782..99d8881a7d6c2 100644 msdc_reset_hw(host); } } -@@ -2593,7 +2611,11 @@ static int msdc_drv_probe(struct platform_device *pdev) +@@ -2437,13 +2455,11 @@ static int msdc_of_clock_parse(struct platform_device *pdev, + if (IS_ERR(host->src_clk_cg)) + host->src_clk_cg = NULL; + +- host->sys_clk_cg = devm_clk_get_optional(&pdev->dev, "sys_cg"); ++ /* If present, always enable for this clock gate */ ++ host->sys_clk_cg = devm_clk_get_optional_enabled(&pdev->dev, "sys_cg"); + if (IS_ERR(host->sys_clk_cg)) + host->sys_clk_cg = NULL; + +- /* If present, always enable for this clock gate */ +- clk_prepare_enable(host->sys_clk_cg); +- + host->bulk_clks[0].id = "pclk_cg"; + host->bulk_clks[1].id = "axi_cg"; + host->bulk_clks[2].id = "ahb_cg"; +@@ -2593,7 +2609,11 @@ static int msdc_drv_probe(struct platform_device *pdev) spin_lock_init(&host->lock); platform_set_drvdata(pdev, mmc); @@ -173871,7 +208903,7 @@ index b06b4dcb7c782..99d8881a7d6c2 100644 msdc_init_hw(host); if (mmc->caps2 & MMC_CAP2_CQE) { -@@ -2752,8 +2774,12 @@ static int __maybe_unused msdc_runtime_resume(struct device *dev) +@@ -2752,8 +2772,12 @@ static int __maybe_unused msdc_runtime_resume(struct device *dev) { struct mmc_host *mmc = dev_get_drvdata(dev); struct msdc_host *host = mmc_priv(mmc); @@ -173885,7 +208917,7 @@ index b06b4dcb7c782..99d8881a7d6c2 100644 msdc_restore_reg(host); return 0; } -@@ -2762,11 +2788,14 @@ static int __maybe_unused msdc_suspend(struct device *dev) +@@ -2762,11 +2786,14 @@ static int __maybe_unused msdc_suspend(struct device *dev) { struct mmc_host *mmc = dev_get_drvdata(dev); int ret; @@ -173901,7 +208933,7 @@ index b06b4dcb7c782..99d8881a7d6c2 100644 return pm_runtime_force_suspend(dev); diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c -index 2fe6fcdbb1b30..9bf95ba217fac 100644 +index 2fe6fcdbb1b30..97227ad717150 100644 --- a/drivers/mmc/host/mxcmmc.c +++ b/drivers/mmc/host/mxcmmc.c @@ -1025,7 +1025,7 @@ static int mxcmci_probe(struct platform_device *pdev) @@ -173913,6 +208945,17 @@ index 2fe6fcdbb1b30..9bf95ba217fac 100644 /* adjust max_segs after devtype detection */ if (!is_mpc512x_mmc(host)) +@@ -1143,7 +1143,9 @@ static int mxcmci_probe(struct platform_device *pdev) + + timer_setup(&host->watchdog, mxcmci_watchdog, 0); + +- mmc_add_host(mmc); ++ ret = mmc_add_host(mmc); ++ if (ret) ++ goto out_free_dma; + + return 0; + diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c index 947581de78601..8c3655d3be961 100644 --- a/drivers/mmc/host/mxs-mmc.c @@ -173941,8 +208984,23 @@ index 947581de78601..8c3655d3be961 100644 } ssp->clk = devm_clk_get(&pdev->dev, NULL); +diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c +index 2f8038d69f677..eb0bd46b7e81e 100644 +--- a/drivers/mmc/host/omap_hsmmc.c ++++ b/drivers/mmc/host/omap_hsmmc.c +@@ -1987,7 +1987,9 @@ static int omap_hsmmc_probe(struct platform_device *pdev) + if (!ret) + mmc->caps |= MMC_CAP_SDIO_IRQ; + +- mmc_add_host(mmc); ++ ret = mmc_add_host(mmc); ++ if (ret) ++ goto err_irq; + + if (mmc_pdata(host)->name != NULL) { + ret = device_create_file(&mmc->class_dev, &dev_attr_slot_name); diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c -index 316393c694d7a..55868b6b86583 100644 +index 316393c694d7a..e25e9bb34eb39 100644 --- a/drivers/mmc/host/pxamci.c +++ b/drivers/mmc/host/pxamci.c @@ -648,7 +648,7 @@ static int pxamci_probe(struct platform_device *pdev) @@ -173963,8 +209021,22 @@ index 316393c694d7a..55868b6b86583 100644 mmc->caps = 0; host->cmdat = 0; +@@ -763,7 +763,12 @@ static int pxamci_probe(struct platform_device *pdev) + dev_warn(dev, "gpio_ro and get_ro() both defined\n"); + } + +- mmc_add_host(mmc); ++ ret = mmc_add_host(mmc); ++ if (ret) { ++ if (host->pdata && host->pdata->exit) ++ host->pdata->exit(dev, mmc); ++ goto out; ++ } + + return 0; + diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c -index a4407f391f66a..387f2a4f693a0 100644 +index a4407f391f66a..12921fba4f52b 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -51,9 +51,6 @@ @@ -173990,6 +209062,15 @@ index a4407f391f66a..387f2a4f693a0 100644 /* Avoid bad TAP */ if (bad_taps & BIT(priv->tap_set)) { +@@ -523,7 +520,7 @@ static void renesas_sdhi_reset_hs400_mode(struct tmio_mmc_host *host, + SH_MOBILE_SDHI_SCC_TMPPORT2_HS400OSEL) & + sd_scc_read32(host, priv, SH_MOBILE_SDHI_SCC_TMPPORT2)); + +- if (priv->adjust_hs400_calib_table) ++ if (priv->quirks && (priv->quirks->hs400_calib_table || priv->quirks->hs400_bad_taps)) + renesas_sdhi_adjust_hs400_mode_disable(host); + + sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, CLK_CTL_SCLKEN | @@ -550,23 +547,25 @@ static void renesas_sdhi_scc_reset(struct tmio_mmc_host *host, struct renesas_sd } @@ -174059,8 +209140,35 @@ index a4407f391f66a..387f2a4f693a0 100644 ver = sd_ctrl_read16(host, CTL_VERSION); /* GEN2_SDR104 is first known SDHI to use 32bit block count */ if (ver < SDHI_VER_GEN2_SDR104 && mmc_data->max_blk_count > U16_MAX) +@@ -1038,11 +1037,14 @@ int renesas_sdhi_probe(struct platform_device *pdev, + if (ver >= SDHI_VER_GEN3_SD) + host->get_timeout_cycles = renesas_sdhi_gen3_get_cycles; + ++ /* Check for SCC so we can reset it if needed */ ++ if (of_data && of_data->scc_offset && ver >= SDHI_VER_GEN2_SDR104) ++ priv->scc_ctl = host->ctl + of_data->scc_offset; ++ + /* Enable tuning iff we have an SCC and a supported mode */ +- if (of_data && of_data->scc_offset && +- (host->mmc->caps & MMC_CAP_UHS_SDR104 || +- host->mmc->caps2 & (MMC_CAP2_HS200_1_8V_SDR | +- MMC_CAP2_HS400_1_8V))) { ++ if (priv->scc_ctl && (host->mmc->caps & MMC_CAP_UHS_SDR104 || ++ host->mmc->caps2 & (MMC_CAP2_HS200_1_8V_SDR | ++ MMC_CAP2_HS400_1_8V))) { + const struct renesas_sdhi_scc *taps = of_data->taps; + bool use_4tap = priv->quirks && priv->quirks->hs400_4taps; + bool hit = false; +@@ -1062,7 +1064,6 @@ int renesas_sdhi_probe(struct platform_device *pdev, + if (!hit) + dev_warn(&host->pdev->dev, "Unknown clock rate for tuning\n"); + +- priv->scc_ctl = host->ctl + of_data->scc_offset; + host->check_retune = renesas_sdhi_check_scc_error; + host->ops.execute_tuning = renesas_sdhi_execute_tuning; + host->ops.prepare_hs400_tuning = renesas_sdhi_prepare_hs400_tuning; diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c -index 58cfaffa3c2d8..e1580f78c6b2d 100644 +index 58cfaffa3c2d8..8098726dcc0bf 100644 --- a/drivers/mmc/host/rtsx_pci_sdmmc.c +++ b/drivers/mmc/host/rtsx_pci_sdmmc.c @@ -38,10 +38,7 @@ struct realtek_pci_sdmmc { @@ -174144,7 +209252,15 @@ index 58cfaffa3c2d8..e1580f78c6b2d 100644 return err; } -@@ -1482,10 +1490,11 @@ static int rtsx_pci_sdmmc_drv_probe(struct platform_device *pdev) +@@ -1466,6 +1474,7 @@ static int rtsx_pci_sdmmc_drv_probe(struct platform_device *pdev) + struct realtek_pci_sdmmc *host; + struct rtsx_pcr *pcr; + struct pcr_handle *handle = pdev->dev.platform_data; ++ int ret; + + if (!handle) + return -ENXIO; +@@ -1482,10 +1491,11 @@ static int rtsx_pci_sdmmc_drv_probe(struct platform_device *pdev) host = mmc_priv(mmc); host->pcr = pcr; @@ -174157,7 +209273,7 @@ index 58cfaffa3c2d8..e1580f78c6b2d 100644 INIT_WORK(&host->work, sd_request); platform_set_drvdata(pdev, host); pcr->slots[RTSX_SD_CARD].p_dev = pdev; -@@ -1495,12 +1504,12 @@ static int rtsx_pci_sdmmc_drv_probe(struct platform_device *pdev) +@@ -1495,14 +1505,20 @@ static int rtsx_pci_sdmmc_drv_probe(struct platform_device *pdev) realtek_init_host(host); @@ -174174,9 +209290,18 @@ index 58cfaffa3c2d8..e1580f78c6b2d 100644 + pm_runtime_mark_last_busy(&pdev->dev); + pm_runtime_use_autosuspend(&pdev->dev); - mmc_add_host(mmc); +- mmc_add_host(mmc); ++ ret = mmc_add_host(mmc); ++ if (ret) { ++ pm_runtime_dont_use_autosuspend(&pdev->dev); ++ pm_runtime_disable(&pdev->dev); ++ mmc_free_host(mmc); ++ return ret; ++ } -@@ -1521,11 +1530,6 @@ static int rtsx_pci_sdmmc_drv_remove(struct platform_device *pdev) + return 0; + } +@@ -1521,11 +1537,6 @@ static int rtsx_pci_sdmmc_drv_remove(struct platform_device *pdev) pcr->slots[RTSX_SD_CARD].card_event = NULL; mmc = host->mmc; @@ -174188,7 +209313,7 @@ index 58cfaffa3c2d8..e1580f78c6b2d 100644 cancel_work_sync(&host->work); mutex_lock(&host->host_mutex); -@@ -1548,6 +1552,9 @@ static int rtsx_pci_sdmmc_drv_remove(struct platform_device *pdev) +@@ -1548,6 +1559,9 @@ static int rtsx_pci_sdmmc_drv_remove(struct platform_device *pdev) flush_work(&host->work); @@ -174198,11 +209323,250 @@ index 58cfaffa3c2d8..e1580f78c6b2d 100644 mmc_free_host(mmc); dev_dbg(&(pdev->dev), +diff --git a/drivers/mmc/host/rtsx_usb_sdmmc.c b/drivers/mmc/host/rtsx_usb_sdmmc.c +index 5fe4528e296e6..1be3a355f10d5 100644 +--- a/drivers/mmc/host/rtsx_usb_sdmmc.c ++++ b/drivers/mmc/host/rtsx_usb_sdmmc.c +@@ -1332,6 +1332,7 @@ static int rtsx_usb_sdmmc_drv_probe(struct platform_device *pdev) + #ifdef RTSX_USB_USE_LEDS_CLASS + int err; + #endif ++ int ret; + + ucr = usb_get_intfdata(to_usb_interface(pdev->dev.parent)); + if (!ucr) +@@ -1368,7 +1369,15 @@ static int rtsx_usb_sdmmc_drv_probe(struct platform_device *pdev) + INIT_WORK(&host->led_work, rtsx_usb_update_led); + + #endif +- mmc_add_host(mmc); ++ ret = mmc_add_host(mmc); ++ if (ret) { ++#ifdef RTSX_USB_USE_LEDS_CLASS ++ led_classdev_unregister(&host->led); ++#endif ++ mmc_free_host(mmc); ++ pm_runtime_disable(&pdev->dev); ++ return ret; ++ } + + return 0; + } +diff --git a/drivers/mmc/host/sdhci-brcmstb.c b/drivers/mmc/host/sdhci-brcmstb.c +index f24623aac2dbe..4d42b1810acea 100644 +--- a/drivers/mmc/host/sdhci-brcmstb.c ++++ b/drivers/mmc/host/sdhci-brcmstb.c +@@ -12,28 +12,55 @@ + #include <linux/bitops.h> + #include <linux/delay.h> + ++#include "sdhci-cqhci.h" + #include "sdhci-pltfm.h" + #include "cqhci.h" + + #define SDHCI_VENDOR 0x78 + #define SDHCI_VENDOR_ENHANCED_STRB 0x1 ++#define SDHCI_VENDOR_GATE_SDCLK_EN 0x2 + +-#define BRCMSTB_PRIV_FLAGS_NO_64BIT BIT(0) +-#define BRCMSTB_PRIV_FLAGS_BROKEN_TIMEOUT BIT(1) ++#define BRCMSTB_MATCH_FLAGS_NO_64BIT BIT(0) ++#define BRCMSTB_MATCH_FLAGS_BROKEN_TIMEOUT BIT(1) ++#define BRCMSTB_MATCH_FLAGS_HAS_CLOCK_GATE BIT(2) ++ ++#define BRCMSTB_PRIV_FLAGS_HAS_CQE BIT(0) ++#define BRCMSTB_PRIV_FLAGS_GATE_CLOCK BIT(1) + + #define SDHCI_ARASAN_CQE_BASE_ADDR 0x200 + + struct sdhci_brcmstb_priv { + void __iomem *cfg_regs; +- bool has_cqe; ++ unsigned int flags; + }; + + struct brcmstb_match_priv { + void (*hs400es)(struct mmc_host *mmc, struct mmc_ios *ios); + struct sdhci_ops *ops; +- unsigned int flags; ++ const unsigned int flags; + }; + ++static inline void enable_clock_gating(struct sdhci_host *host) ++{ ++ u32 reg; ++ ++ reg = sdhci_readl(host, SDHCI_VENDOR); ++ reg |= SDHCI_VENDOR_GATE_SDCLK_EN; ++ sdhci_writel(host, reg, SDHCI_VENDOR); ++} ++ ++void brcmstb_reset(struct sdhci_host *host, u8 mask) ++{ ++ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); ++ struct sdhci_brcmstb_priv *priv = sdhci_pltfm_priv(pltfm_host); ++ ++ sdhci_and_cqhci_reset(host, mask); ++ ++ /* Reset will clear this, so re-enable it */ ++ if (priv->flags & BRCMSTB_PRIV_FLAGS_GATE_CLOCK) ++ enable_clock_gating(host); ++} ++ + static void sdhci_brcmstb_hs400es(struct mmc_host *mmc, struct mmc_ios *ios) + { + struct sdhci_host *host = mmc_priv(mmc); +@@ -129,22 +156,23 @@ static struct sdhci_ops sdhci_brcmstb_ops = { + static struct sdhci_ops sdhci_brcmstb_ops_7216 = { + .set_clock = sdhci_brcmstb_set_clock, + .set_bus_width = sdhci_set_bus_width, +- .reset = sdhci_reset, ++ .reset = brcmstb_reset, + .set_uhs_signaling = sdhci_brcmstb_set_uhs_signaling, + }; + + static struct brcmstb_match_priv match_priv_7425 = { +- .flags = BRCMSTB_PRIV_FLAGS_NO_64BIT | +- BRCMSTB_PRIV_FLAGS_BROKEN_TIMEOUT, ++ .flags = BRCMSTB_MATCH_FLAGS_NO_64BIT | ++ BRCMSTB_MATCH_FLAGS_BROKEN_TIMEOUT, + .ops = &sdhci_brcmstb_ops, + }; + + static struct brcmstb_match_priv match_priv_7445 = { +- .flags = BRCMSTB_PRIV_FLAGS_BROKEN_TIMEOUT, ++ .flags = BRCMSTB_MATCH_FLAGS_BROKEN_TIMEOUT, + .ops = &sdhci_brcmstb_ops, + }; + + static const struct brcmstb_match_priv match_priv_7216 = { ++ .flags = BRCMSTB_MATCH_FLAGS_HAS_CLOCK_GATE, + .hs400es = sdhci_brcmstb_hs400es, + .ops = &sdhci_brcmstb_ops_7216, + }; +@@ -176,7 +204,7 @@ static int sdhci_brcmstb_add_host(struct sdhci_host *host, + bool dma64; + int ret; + +- if (!priv->has_cqe) ++ if ((priv->flags & BRCMSTB_PRIV_FLAGS_HAS_CQE) == 0) + return sdhci_add_host(host); + + dev_dbg(mmc_dev(host->mmc), "CQE is enabled\n"); +@@ -225,7 +253,6 @@ static int sdhci_brcmstb_probe(struct platform_device *pdev) + struct sdhci_brcmstb_priv *priv; + struct sdhci_host *host; + struct resource *iomem; +- bool has_cqe = false; + struct clk *clk; + int res; + +@@ -244,10 +271,6 @@ static int sdhci_brcmstb_probe(struct platform_device *pdev) + return res; + + memset(&brcmstb_pdata, 0, sizeof(brcmstb_pdata)); +- if (device_property_read_bool(&pdev->dev, "supports-cqe")) { +- has_cqe = true; +- match_priv->ops->irq = sdhci_brcmstb_cqhci_irq; +- } + brcmstb_pdata.ops = match_priv->ops; + host = sdhci_pltfm_init(pdev, &brcmstb_pdata, + sizeof(struct sdhci_brcmstb_priv)); +@@ -258,7 +281,10 @@ static int sdhci_brcmstb_probe(struct platform_device *pdev) + + pltfm_host = sdhci_priv(host); + priv = sdhci_pltfm_priv(pltfm_host); +- priv->has_cqe = has_cqe; ++ if (device_property_read_bool(&pdev->dev, "supports-cqe")) { ++ priv->flags |= BRCMSTB_PRIV_FLAGS_HAS_CQE; ++ match_priv->ops->irq = sdhci_brcmstb_cqhci_irq; ++ } + + /* Map in the non-standard CFG registers */ + iomem = platform_get_resource(pdev, IORESOURCE_MEM, 1); +@@ -273,6 +299,14 @@ static int sdhci_brcmstb_probe(struct platform_device *pdev) + if (res) + goto err; + ++ /* ++ * Automatic clock gating does not work for SD cards that may ++ * voltage switch so only enable it for non-removable devices. ++ */ ++ if ((match_priv->flags & BRCMSTB_MATCH_FLAGS_HAS_CLOCK_GATE) && ++ (host->mmc->caps & MMC_CAP_NONREMOVABLE)) ++ priv->flags |= BRCMSTB_PRIV_FLAGS_GATE_CLOCK; ++ + /* + * If the chip has enhanced strobe and it's enabled, add + * callback +@@ -287,14 +321,14 @@ static int sdhci_brcmstb_probe(struct platform_device *pdev) + * properties through mmc_of_parse(). + */ + host->caps = sdhci_readl(host, SDHCI_CAPABILITIES); +- if (match_priv->flags & BRCMSTB_PRIV_FLAGS_NO_64BIT) ++ if (match_priv->flags & BRCMSTB_MATCH_FLAGS_NO_64BIT) + host->caps &= ~SDHCI_CAN_64BIT; + host->caps1 = sdhci_readl(host, SDHCI_CAPABILITIES_1); + host->caps1 &= ~(SDHCI_SUPPORT_SDR50 | SDHCI_SUPPORT_SDR104 | + SDHCI_SUPPORT_DDR50); + host->quirks |= SDHCI_QUIRK_MISSING_CAPS; + +- if (match_priv->flags & BRCMSTB_PRIV_FLAGS_BROKEN_TIMEOUT) ++ if (match_priv->flags & BRCMSTB_MATCH_FLAGS_BROKEN_TIMEOUT) + host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL; + + res = sdhci_brcmstb_add_host(host, priv); +diff --git a/drivers/mmc/host/sdhci-cqhci.h b/drivers/mmc/host/sdhci-cqhci.h +new file mode 100644 +index 0000000000000..cf8e7ba71bbd7 +--- /dev/null ++++ b/drivers/mmc/host/sdhci-cqhci.h +@@ -0,0 +1,24 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++/* ++ * Copyright 2022 The Chromium OS Authors ++ * ++ * Support that applies to the combination of SDHCI and CQHCI, while not ++ * expressing a dependency between the two modules. ++ */ ++ ++#ifndef __MMC_HOST_SDHCI_CQHCI_H__ ++#define __MMC_HOST_SDHCI_CQHCI_H__ ++ ++#include "cqhci.h" ++#include "sdhci.h" ++ ++static inline void sdhci_and_cqhci_reset(struct sdhci_host *host, u8 mask) ++{ ++ if ((host->mmc->caps2 & MMC_CAP2_CQE) && (mask & SDHCI_RESET_ALL) && ++ host->mmc->cqe_private) ++ cqhci_deactivate(host->mmc); ++ ++ sdhci_reset(host, mask); ++} ++ ++#endif /* __MMC_HOST_SDHCI_CQHCI_H__ */ diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c -index e658f01742420..60f19369de845 100644 +index e658f01742420..794702e346574 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c -@@ -300,7 +300,6 @@ static struct esdhc_soc_data usdhc_imx8qxp_data = { +@@ -25,6 +25,7 @@ + #include <linux/of_device.h> + #include <linux/pinctrl/consumer.h> + #include <linux/pm_runtime.h> ++#include "sdhci-cqhci.h" + #include "sdhci-pltfm.h" + #include "sdhci-esdhc.h" + #include "cqhci.h" +@@ -106,6 +107,7 @@ + #define ESDHC_TUNING_START_TAP_DEFAULT 0x1 + #define ESDHC_TUNING_START_TAP_MASK 0x7f + #define ESDHC_TUNING_CMD_CRC_CHECK_DISABLE (1 << 7) ++#define ESDHC_TUNING_STEP_DEFAULT 0x1 + #define ESDHC_TUNING_STEP_MASK 0x00070000 + #define ESDHC_TUNING_STEP_SHIFT 16 + +@@ -300,7 +302,6 @@ static struct esdhc_soc_data usdhc_imx8qxp_data = { .flags = ESDHC_FLAG_USDHC | ESDHC_FLAG_STD_TUNING | ESDHC_FLAG_HAVE_CAP1 | ESDHC_FLAG_HS200 | ESDHC_FLAG_HS400 | ESDHC_FLAG_HS400_ES @@ -174210,7 +209574,7 @@ index e658f01742420..60f19369de845 100644 | ESDHC_FLAG_STATE_LOST_IN_LPMODE | ESDHC_FLAG_CLK_RATE_LOST_IN_PM_RUNTIME, }; -@@ -309,7 +308,6 @@ static struct esdhc_soc_data usdhc_imx8mm_data = { +@@ -309,7 +310,6 @@ static struct esdhc_soc_data usdhc_imx8mm_data = { .flags = ESDHC_FLAG_USDHC | ESDHC_FLAG_STD_TUNING | ESDHC_FLAG_HAVE_CAP1 | ESDHC_FLAG_HS200 | ESDHC_FLAG_HS400 | ESDHC_FLAG_HS400_ES @@ -174218,6 +209582,104 @@ index e658f01742420..60f19369de845 100644 | ESDHC_FLAG_STATE_LOST_IN_LPMODE, }; +@@ -1275,7 +1275,7 @@ static void esdhc_set_uhs_signaling(struct sdhci_host *host, unsigned timing) + + static void esdhc_reset(struct sdhci_host *host, u8 mask) + { +- sdhci_reset(host, mask); ++ sdhci_and_cqhci_reset(host, mask); + + sdhci_writel(host, host->ier, SDHCI_INT_ENABLE); + sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE); +@@ -1347,7 +1347,7 @@ static void sdhci_esdhc_imx_hwinit(struct sdhci_host *host) + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host); + struct cqhci_host *cq_host = host->mmc->cqe_private; +- int tmp; ++ u32 tmp; + + if (esdhc_is_usdhc(imx_data)) { + /* +@@ -1400,17 +1400,24 @@ static void sdhci_esdhc_imx_hwinit(struct sdhci_host *host) + + if (imx_data->socdata->flags & ESDHC_FLAG_STD_TUNING) { + tmp = readl(host->ioaddr + ESDHC_TUNING_CTRL); +- tmp |= ESDHC_STD_TUNING_EN | +- ESDHC_TUNING_START_TAP_DEFAULT; +- if (imx_data->boarddata.tuning_start_tap) { +- tmp &= ~ESDHC_TUNING_START_TAP_MASK; ++ tmp |= ESDHC_STD_TUNING_EN; ++ ++ /* ++ * ROM code or bootloader may config the start tap ++ * and step, unmask them first. ++ */ ++ tmp &= ~(ESDHC_TUNING_START_TAP_MASK | ESDHC_TUNING_STEP_MASK); ++ if (imx_data->boarddata.tuning_start_tap) + tmp |= imx_data->boarddata.tuning_start_tap; +- } ++ else ++ tmp |= ESDHC_TUNING_START_TAP_DEFAULT; + + if (imx_data->boarddata.tuning_step) { +- tmp &= ~ESDHC_TUNING_STEP_MASK; + tmp |= imx_data->boarddata.tuning_step + << ESDHC_TUNING_STEP_SHIFT; ++ } else { ++ tmp |= ESDHC_TUNING_STEP_DEFAULT ++ << ESDHC_TUNING_STEP_SHIFT; + } + + /* Disable the CMD CRC check for tuning, if not, need to +@@ -1496,7 +1503,7 @@ static void esdhc_cqe_enable(struct mmc_host *mmc) + * system resume back. + */ + cqhci_writel(cq_host, 0, CQHCI_CTL); +- if (cqhci_readl(cq_host, CQHCI_CTL) && CQHCI_HALT) ++ if (cqhci_readl(cq_host, CQHCI_CTL) & CQHCI_HALT) + dev_err(mmc_dev(host->mmc), + "failed to exit halt state when enable CQE\n"); + +@@ -1645,6 +1652,10 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev) + host->mmc_host_ops.execute_tuning = usdhc_execute_tuning; + } + ++ err = sdhci_esdhc_imx_probe_dt(pdev, host, imx_data); ++ if (err) ++ goto disable_ahb_clk; ++ + if (imx_data->socdata->flags & ESDHC_FLAG_MAN_TUNING) + sdhci_esdhc_ops.platform_execute_tuning = + esdhc_executing_tuning; +@@ -1652,13 +1663,15 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev) + if (imx_data->socdata->flags & ESDHC_FLAG_ERR004536) + host->quirks |= SDHCI_QUIRK_BROKEN_ADMA; + +- if (imx_data->socdata->flags & ESDHC_FLAG_HS400) ++ if (host->mmc->caps & MMC_CAP_8_BIT_DATA && ++ imx_data->socdata->flags & ESDHC_FLAG_HS400) + host->mmc->caps2 |= MMC_CAP2_HS400; + + if (imx_data->socdata->flags & ESDHC_FLAG_BROKEN_AUTO_CMD23) + host->quirks2 |= SDHCI_QUIRK2_ACMD23_BROKEN; + +- if (imx_data->socdata->flags & ESDHC_FLAG_HS400_ES) { ++ if (host->mmc->caps & MMC_CAP_8_BIT_DATA && ++ imx_data->socdata->flags & ESDHC_FLAG_HS400_ES) { + host->mmc->caps2 |= MMC_CAP2_HS400_ES; + host->mmc_host_ops.hs400_enhanced_strobe = + esdhc_hs400_enhanced_strobe; +@@ -1680,10 +1693,6 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev) + goto disable_ahb_clk; + } + +- err = sdhci_esdhc_imx_probe_dt(pdev, host, imx_data); +- if (err) +- goto disable_ahb_clk; +- + sdhci_esdhc_imx_hwinit(host); + + err = sdhci_add_host(host); diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c index 50c71e0ba5e4e..83d38e44fc259 100644 --- a/drivers/mmc/host/sdhci-msm.c @@ -174293,6 +209755,27 @@ index 50c71e0ba5e4e..83d38e44fc259 100644 /* Setup SDCC bus voter clock. */ msm_host->bus_clk = devm_clk_get(&pdev->dev, "bus"); if (!IS_ERR(msm_host->bus_clk)) { +diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c +index 737e2bfdedc28..bede148db7326 100644 +--- a/drivers/mmc/host/sdhci-of-arasan.c ++++ b/drivers/mmc/host/sdhci-of-arasan.c +@@ -25,6 +25,7 @@ + #include <linux/firmware/xlnx-zynqmp.h> + + #include "cqhci.h" ++#include "sdhci-cqhci.h" + #include "sdhci-pltfm.h" + + #define SDHCI_ARASAN_VENDOR_REGISTER 0x78 +@@ -359,7 +360,7 @@ static void sdhci_arasan_reset(struct sdhci_host *host, u8 mask) + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct sdhci_arasan_data *sdhci_arasan = sdhci_pltfm_priv(pltfm_host); + +- sdhci_reset(host, mask); ++ sdhci_and_cqhci_reset(host, mask); + + if (sdhci_arasan->quirks & SDHCI_ARASAN_QUIRK_FORCE_CDTEST) { + ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL); diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c index d1a1c548c515f..0452c312b65eb 100644 --- a/drivers/mmc/host/sdhci-of-at91.c @@ -174650,10 +210133,47 @@ index 8f4d1f003f656..fd188b6d88f49 100644 static int __maybe_unused sdhci_omap_suspend(struct device *dev) diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c -index d0f2edfe296c8..c2b26ada104d6 100644 +index d0f2edfe296c8..8736e04fa73cc 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c -@@ -1951,6 +1951,7 @@ static const struct pci_device_id pci_ids[] = { +@@ -978,6 +978,12 @@ static bool glk_broken_cqhci(struct sdhci_pci_slot *slot) + dmi_match(DMI_SYS_VENDOR, "IRBIS")); + } + ++static bool jsl_broken_hs400es(struct sdhci_pci_slot *slot) ++{ ++ return slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_JSL_EMMC && ++ dmi_match(DMI_BIOS_VENDOR, "ASUSTeK COMPUTER INC."); ++} ++ + static int glk_emmc_probe_slot(struct sdhci_pci_slot *slot) + { + int ret = byt_emmc_probe_slot(slot); +@@ -986,9 +992,11 @@ static int glk_emmc_probe_slot(struct sdhci_pci_slot *slot) + slot->host->mmc->caps2 |= MMC_CAP2_CQE; + + if (slot->chip->pdev->device != PCI_DEVICE_ID_INTEL_GLK_EMMC) { +- slot->host->mmc->caps2 |= MMC_CAP2_HS400_ES; +- slot->host->mmc_host_ops.hs400_enhanced_strobe = +- intel_hs400_enhanced_strobe; ++ if (!jsl_broken_hs400es(slot)) { ++ slot->host->mmc->caps2 |= MMC_CAP2_HS400_ES; ++ slot->host->mmc_host_ops.hs400_enhanced_strobe = ++ intel_hs400_enhanced_strobe; ++ } + slot->host->mmc->caps2 |= MMC_CAP2_CQE_DCMD; + } + +@@ -1810,6 +1818,8 @@ static int amd_probe(struct sdhci_pci_chip *chip) + } + } + ++ pci_dev_put(smbus_dev); ++ + if (gen == AMD_CHIPSET_BEFORE_ML || gen == AMD_CHIPSET_CZ) + chip->quirks2 |= SDHCI_QUIRK2_CLEAR_TRANSFERMODE_REG_BEFORE_CMD; + +@@ -1951,6 +1961,7 @@ static const struct pci_device_id pci_ids[] = { SDHCI_PCI_DEVICE(INTEL, JSL_SD, intel_byt_sd), SDHCI_PCI_DEVICE(INTEL, LKF_EMMC, intel_glk_emmc), SDHCI_PCI_DEVICE(INTEL, LKF_SD, intel_byt_sd), @@ -174698,10 +210218,18 @@ index 4fd99c1e82ba3..ad50f16658fe2 100644 value &= ~PCI_GLI_9755_DMACLK; pci_write_config_dword(pdev, PCI_GLI_9755_PECONF, value); diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c -index 51d55a87aebef..059034e832c92 100644 +index 51d55a87aebef..78d0b9fcc42cb 100644 --- a/drivers/mmc/host/sdhci-pci-o2micro.c +++ b/drivers/mmc/host/sdhci-pci-o2micro.c -@@ -147,6 +147,8 @@ static int sdhci_o2_get_cd(struct mmc_host *mmc) +@@ -31,6 +31,7 @@ + #define O2_SD_CAPS 0xE0 + #define O2_SD_ADMA1 0xE2 + #define O2_SD_ADMA2 0xE7 ++#define O2_SD_MISC_CTRL2 0xF0 + #define O2_SD_INF_MOD 0xF1 + #define O2_SD_MISC_CTRL4 0xFC + #define O2_SD_MISC_CTRL 0x1C0 +@@ -147,6 +148,8 @@ static int sdhci_o2_get_cd(struct mmc_host *mmc) if (!(sdhci_readw(host, O2_PLL_DLL_WDT_CONTROL1) & O2_PLL_LOCK_STATUS)) sdhci_o2_enable_internal_clock(host); @@ -174710,6 +210238,19 @@ index 51d55a87aebef..059034e832c92 100644 return !!(sdhci_readl(host, SDHCI_PRESENT_STATE) & SDHCI_CARD_PRESENT); } +@@ -828,6 +831,12 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) + /* Set Tuning Windows to 5 */ + pci_write_config_byte(chip->pdev, + O2_SD_TUNING_CTRL, 0x55); ++ //Adjust 1st and 2nd CD debounce time ++ pci_read_config_dword(chip->pdev, O2_SD_MISC_CTRL2, &scratch_32); ++ scratch_32 &= 0xFFE7FFFF; ++ scratch_32 |= 0x00180000; ++ pci_write_config_dword(chip->pdev, O2_SD_MISC_CTRL2, scratch_32); ++ pci_write_config_dword(chip->pdev, O2_SD_DETECT_SETTING, 1); + /* Lock WP */ + ret = pci_read_config_byte(chip->pdev, + O2_SD_LOCK_WP, &scratch); diff --git a/drivers/mmc/host/sdhci-pci.h b/drivers/mmc/host/sdhci-pci.h index 8f90c4163bb5c..dcd99d5057ee1 100644 --- a/drivers/mmc/host/sdhci-pci.h @@ -174723,10 +210264,33 @@ index 8f90c4163bb5c..dcd99d5057ee1 100644 #define PCI_DEVICE_ID_SYSKONNECT_8000 0x8000 #define PCI_DEVICE_ID_VIA_95D0 0x95d0 diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c -index 11e375579cfb9..f5c519026b524 100644 +index 11e375579cfb9..256260339f692 100644 --- a/drivers/mmc/host/sdhci-sprd.c +++ b/drivers/mmc/host/sdhci-sprd.c -@@ -296,7 +296,7 @@ static unsigned int sdhci_sprd_get_max_clock(struct sdhci_host *host) +@@ -224,13 +224,15 @@ static inline void _sdhci_sprd_set_clock(struct sdhci_host *host, + div = ((div & 0x300) >> 2) | ((div & 0xFF) << 8); + sdhci_enable_clk(host, div); + +- /* enable auto gate sdhc_enable_auto_gate */ +- val = sdhci_readl(host, SDHCI_SPRD_REG_32_BUSY_POSI); +- mask = SDHCI_SPRD_BIT_OUTR_CLK_AUTO_EN | +- SDHCI_SPRD_BIT_INNR_CLK_AUTO_EN; +- if (mask != (val & mask)) { +- val |= mask; +- sdhci_writel(host, val, SDHCI_SPRD_REG_32_BUSY_POSI); ++ /* Enable CLK_AUTO when the clock is greater than 400K. */ ++ if (clk > 400000) { ++ val = sdhci_readl(host, SDHCI_SPRD_REG_32_BUSY_POSI); ++ mask = SDHCI_SPRD_BIT_OUTR_CLK_AUTO_EN | ++ SDHCI_SPRD_BIT_INNR_CLK_AUTO_EN; ++ if (mask != (val & mask)) { ++ val |= mask; ++ sdhci_writel(host, val, SDHCI_SPRD_REG_32_BUSY_POSI); ++ } + } + } + +@@ -296,7 +298,7 @@ static unsigned int sdhci_sprd_get_max_clock(struct sdhci_host *host) static unsigned int sdhci_sprd_get_min_clock(struct sdhci_host *host) { @@ -174735,11 +210299,37 @@ index 11e375579cfb9..f5c519026b524 100644 } static void sdhci_sprd_set_uhs_signaling(struct sdhci_host *host, +@@ -457,7 +459,7 @@ static int sdhci_sprd_voltage_switch(struct mmc_host *mmc, struct mmc_ios *ios) + } + + if (IS_ERR(sprd_host->pinctrl)) +- return 0; ++ goto reset; + + switch (ios->signal_voltage) { + case MMC_SIGNAL_VOLTAGE_180: +@@ -485,6 +487,8 @@ static int sdhci_sprd_voltage_switch(struct mmc_host *mmc, struct mmc_ios *ios) + + /* Wait for 300 ~ 500 us for pin state stable */ + usleep_range(300, 500); ++ ++reset: + sdhci_reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA); + + return 0; diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c -index a5001875876b9..829a8bf7c77dd 100644 +index a5001875876b9..fff9fb8d6bacb 100644 --- a/drivers/mmc/host/sdhci-tegra.c +++ b/drivers/mmc/host/sdhci-tegra.c -@@ -356,23 +356,6 @@ static void tegra_sdhci_set_tap(struct sdhci_host *host, unsigned int tap) +@@ -24,6 +24,7 @@ + #include <linux/gpio/consumer.h> + #include <linux/ktime.h> + ++#include "sdhci-cqhci.h" + #include "sdhci-pltfm.h" + #include "cqhci.h" + +@@ -356,23 +357,6 @@ static void tegra_sdhci_set_tap(struct sdhci_host *host, unsigned int tap) } } @@ -174763,7 +210353,16 @@ index a5001875876b9..829a8bf7c77dd 100644 static void tegra_sdhci_reset(struct sdhci_host *host, u8 mask) { struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); -@@ -779,7 +762,7 @@ static void tegra_sdhci_set_clock(struct sdhci_host *host, unsigned int clock) +@@ -380,7 +364,7 @@ static void tegra_sdhci_reset(struct sdhci_host *host, u8 mask) + const struct sdhci_tegra_soc_data *soc_data = tegra_host->soc_data; + u32 misc_ctrl, clk_ctrl, pad_ctrl; + +- sdhci_reset(host, mask); ++ sdhci_and_cqhci_reset(host, mask); + + if (!(mask & SDHCI_RESET_ALL)) + return; +@@ -779,7 +763,7 @@ static void tegra_sdhci_set_clock(struct sdhci_host *host, unsigned int clock) */ host_clk = tegra_host->ddr_signaling ? clock * 2 : clock; clk_set_rate(pltfm_host->clk, host_clk); @@ -174772,7 +210371,7 @@ index a5001875876b9..829a8bf7c77dd 100644 if (tegra_host->ddr_signaling) host->max_clk = host_clk; else -@@ -793,6 +776,32 @@ static void tegra_sdhci_set_clock(struct sdhci_host *host, unsigned int clock) +@@ -793,6 +777,32 @@ static void tegra_sdhci_set_clock(struct sdhci_host *host, unsigned int clock) } } @@ -174827,10 +210426,18 @@ index 666cee4c7f7c6..08e838400b526 100644 static unsigned int xenon_get_max_clock(struct sdhci_host *host) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c -index 2d80a04e11d87..7728f26adb19f 100644 +index 2d80a04e11d87..cda145c2ebb68 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c -@@ -771,7 +771,19 @@ static void sdhci_adma_table_pre(struct sdhci_host *host, +@@ -338,6 +338,7 @@ static void sdhci_init(struct sdhci_host *host, int soft) + if (soft) { + /* force clock reconfiguration */ + host->clock = 0; ++ host->reinit_uhs = true; + mmc->ops->set_ios(mmc, &mmc->ios); + } + } +@@ -771,7 +772,19 @@ static void sdhci_adma_table_pre(struct sdhci_host *host, len -= offset; } @@ -174851,7 +210458,127 @@ index 2d80a04e11d87..7728f26adb19f 100644 /* tran, valid */ if (len) -@@ -3952,6 +3964,7 @@ struct sdhci_host *sdhci_alloc_host(struct device *dev, +@@ -2245,11 +2258,46 @@ void sdhci_set_uhs_signaling(struct sdhci_host *host, unsigned timing) + } + EXPORT_SYMBOL_GPL(sdhci_set_uhs_signaling); + ++static bool sdhci_timing_has_preset(unsigned char timing) ++{ ++ switch (timing) { ++ case MMC_TIMING_UHS_SDR12: ++ case MMC_TIMING_UHS_SDR25: ++ case MMC_TIMING_UHS_SDR50: ++ case MMC_TIMING_UHS_SDR104: ++ case MMC_TIMING_UHS_DDR50: ++ case MMC_TIMING_MMC_DDR52: ++ return true; ++ }; ++ return false; ++} ++ ++static bool sdhci_preset_needed(struct sdhci_host *host, unsigned char timing) ++{ ++ return !(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN) && ++ sdhci_timing_has_preset(timing); ++} ++ ++static bool sdhci_presetable_values_change(struct sdhci_host *host, struct mmc_ios *ios) ++{ ++ /* ++ * Preset Values are: Driver Strength, Clock Generator and SDCLK/RCLK ++ * Frequency. Check if preset values need to be enabled, or the Driver ++ * Strength needs updating. Note, clock changes are handled separately. ++ */ ++ return !host->preset_enabled && ++ (sdhci_preset_needed(host, ios->timing) || host->drv_type != ios->drv_type); ++} ++ + void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) + { + struct sdhci_host *host = mmc_priv(mmc); ++ bool reinit_uhs = host->reinit_uhs; ++ bool turning_on_clk = false; + u8 ctrl; + ++ host->reinit_uhs = false; ++ + if (ios->power_mode == MMC_POWER_UNDEFINED) + return; + +@@ -2275,6 +2323,8 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) + sdhci_enable_preset_value(host, false); + + if (!ios->clock || ios->clock != host->clock) { ++ turning_on_clk = ios->clock && !host->clock; ++ + host->ops->set_clock(host, ios->clock); + host->clock = ios->clock; + +@@ -2301,6 +2351,17 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) + + host->ops->set_bus_width(host, ios->bus_width); + ++ /* ++ * Special case to avoid multiple clock changes during voltage ++ * switching. ++ */ ++ if (!reinit_uhs && ++ turning_on_clk && ++ host->timing == ios->timing && ++ host->version >= SDHCI_SPEC_300 && ++ !sdhci_presetable_values_change(host, ios)) ++ return; ++ + ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL); + + if (!(host->quirks & SDHCI_QUIRK_NO_HISPD_BIT)) { +@@ -2344,6 +2405,7 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) + } + + sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2); ++ host->drv_type = ios->drv_type; + } else { + /* + * According to SDHC Spec v3.00, if the Preset Value +@@ -2371,19 +2433,14 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) + host->ops->set_uhs_signaling(host, ios->timing); + host->timing = ios->timing; + +- if (!(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN) && +- ((ios->timing == MMC_TIMING_UHS_SDR12) || +- (ios->timing == MMC_TIMING_UHS_SDR25) || +- (ios->timing == MMC_TIMING_UHS_SDR50) || +- (ios->timing == MMC_TIMING_UHS_SDR104) || +- (ios->timing == MMC_TIMING_UHS_DDR50) || +- (ios->timing == MMC_TIMING_MMC_DDR52))) { ++ if (sdhci_preset_needed(host, ios->timing)) { + u16 preset; + + sdhci_enable_preset_value(host, true); + preset = sdhci_get_preset_value(host); + ios->drv_type = FIELD_GET(SDHCI_PRESET_DRV_MASK, + preset); ++ host->drv_type = ios->drv_type; + } + + /* Re-enable SD Clock */ +@@ -3699,6 +3756,7 @@ int sdhci_resume_host(struct sdhci_host *host) + sdhci_init(host, 0); + host->pwr = 0; + host->clock = 0; ++ host->reinit_uhs = true; + mmc->ops->set_ios(mmc, &mmc->ios); + } else { + sdhci_init(host, (mmc->pm_flags & MMC_PM_KEEP_POWER)); +@@ -3761,6 +3819,7 @@ int sdhci_runtime_resume_host(struct sdhci_host *host, int soft_reset) + /* Force clock and power re-program */ + host->pwr = 0; + host->clock = 0; ++ host->reinit_uhs = true; + mmc->ops->start_signal_voltage_switch(mmc, &mmc->ios); + mmc->ops->set_ios(mmc, &mmc->ios); + +@@ -3952,6 +4011,7 @@ struct sdhci_host *sdhci_alloc_host(struct device *dev, * descriptor for each segment, plus 1 for a nop end descriptor. */ host->adma_table_cnt = SDHCI_MAX_SEGS * 2 + 1; @@ -174859,7 +210586,7 @@ index 2d80a04e11d87..7728f26adb19f 100644 host->max_timeout_count = 0xE; -@@ -4617,10 +4630,12 @@ int sdhci_setup_host(struct sdhci_host *host) +@@ -4617,10 +4677,12 @@ int sdhci_setup_host(struct sdhci_host *host) * be larger than 64 KiB though. */ if (host->flags & SDHCI_USE_ADMA) { @@ -174875,7 +210602,7 @@ index 2d80a04e11d87..7728f26adb19f 100644 mmc->max_seg_size = mmc->max_req_size; } diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h -index e8d04e42a5afd..6c689be3e48f6 100644 +index e8d04e42a5afd..6a5cc05576cd5 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -340,7 +340,8 @@ struct sdhci_adma2_64_desc { @@ -174888,7 +210615,16 @@ index e8d04e42a5afd..6c689be3e48f6 100644 */ #define SDHCI_MAX_SEGS 128 -@@ -543,6 +544,7 @@ struct sdhci_host { +@@ -522,6 +523,8 @@ struct sdhci_host { + + unsigned int clock; /* Current clock (MHz) */ + u8 pwr; /* Current voltage */ ++ u8 drv_type; /* Current UHS-I driver type */ ++ bool reinit_uhs; /* Force UHS-related re-initialization */ + + bool runtime_suspended; /* Host is runtime suspended */ + bool bus_on; /* Bus power prevents runtime suspend */ +@@ -543,6 +546,7 @@ struct sdhci_host { unsigned int blocks; /* remaining PIO blocks */ int sg_count; /* Mapped sg entries */ @@ -174897,10 +210633,18 @@ index e8d04e42a5afd..6c689be3e48f6 100644 void *adma_table; /* ADMA descriptor table */ void *align_buffer; /* Bounce buffer */ diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c -index f654afbe8e83c..a3e62e212631f 100644 +index f654afbe8e83c..9661e010df891 100644 --- a/drivers/mmc/host/sdhci_am654.c +++ b/drivers/mmc/host/sdhci_am654.c -@@ -147,6 +147,9 @@ struct sdhci_am654_data { +@@ -15,6 +15,7 @@ + #include <linux/sys_soc.h> + + #include "cqhci.h" ++#include "sdhci-cqhci.h" + #include "sdhci-pltfm.h" + + /* CTL_CFG Registers */ +@@ -147,6 +148,9 @@ struct sdhci_am654_data { int drv_strength; int strb_sel; u32 flags; @@ -174910,7 +210654,7 @@ index f654afbe8e83c..a3e62e212631f 100644 }; struct sdhci_am654_driver_data { -@@ -369,6 +372,21 @@ static void sdhci_am654_write_b(struct sdhci_host *host, u8 val, int reg) +@@ -369,6 +373,21 @@ static void sdhci_am654_write_b(struct sdhci_host *host, u8 val, int reg) } } @@ -174920,7 +210664,7 @@ index f654afbe8e83c..a3e62e212631f 100644 + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct sdhci_am654_data *sdhci_am654 = sdhci_pltfm_priv(pltfm_host); + -+ sdhci_reset(host, mask); ++ sdhci_and_cqhci_reset(host, mask); + + if (sdhci_am654->quirks & SDHCI_AM654_QUIRK_FORCE_CDTEST) { + ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL); @@ -174932,7 +210676,25 @@ index f654afbe8e83c..a3e62e212631f 100644 static int sdhci_am654_execute_tuning(struct mmc_host *mmc, u32 opcode) { struct sdhci_host *host = mmc_priv(mmc); -@@ -500,7 +518,7 @@ static struct sdhci_ops sdhci_j721e_4bit_ops = { +@@ -446,7 +465,7 @@ static struct sdhci_ops sdhci_am654_ops = { + .set_clock = sdhci_am654_set_clock, + .write_b = sdhci_am654_write_b, + .irq = sdhci_am654_cqhci_irq, +- .reset = sdhci_reset, ++ .reset = sdhci_and_cqhci_reset, + }; + + static const struct sdhci_pltfm_data sdhci_am654_pdata = { +@@ -476,7 +495,7 @@ static struct sdhci_ops sdhci_j721e_8bit_ops = { + .set_clock = sdhci_am654_set_clock, + .write_b = sdhci_am654_write_b, + .irq = sdhci_am654_cqhci_irq, +- .reset = sdhci_reset, ++ .reset = sdhci_and_cqhci_reset, + }; + + static const struct sdhci_pltfm_data sdhci_j721e_8bit_pdata = { +@@ -500,7 +519,7 @@ static struct sdhci_ops sdhci_j721e_4bit_ops = { .set_clock = sdhci_j721e_4bit_set_clock, .write_b = sdhci_am654_write_b, .irq = sdhci_am654_cqhci_irq, @@ -174941,7 +210703,7 @@ index f654afbe8e83c..a3e62e212631f 100644 }; static const struct sdhci_pltfm_data sdhci_j721e_4bit_pdata = { -@@ -514,26 +532,6 @@ static const struct sdhci_am654_driver_data sdhci_j721e_4bit_drvdata = { +@@ -514,26 +533,6 @@ static const struct sdhci_am654_driver_data sdhci_j721e_4bit_drvdata = { .flags = IOMUX_PRESENT, }; @@ -174968,7 +210730,7 @@ index f654afbe8e83c..a3e62e212631f 100644 static const struct soc_device_attribute sdhci_am654_devices[] = { { .family = "AM65X", .revision = "SR1.0", -@@ -739,6 +737,9 @@ static int sdhci_am654_get_of_property(struct platform_device *pdev, +@@ -739,6 +738,9 @@ static int sdhci_am654_get_of_property(struct platform_device *pdev, device_property_read_u32(dev, "ti,clkbuf-sel", &sdhci_am654->clkbuf_sel); @@ -174978,7 +210740,7 @@ index f654afbe8e83c..a3e62e212631f 100644 sdhci_get_of_property(pdev); return 0; -@@ -759,11 +760,11 @@ static const struct of_device_id sdhci_am654_of_match[] = { +@@ -759,11 +761,11 @@ static const struct of_device_id sdhci_am654_of_match[] = { }, { .compatible = "ti,am64-sdhci-8bit", @@ -174992,8 +210754,22 @@ index f654afbe8e83c..a3e62e212631f 100644 }, { /* sentinel */ } }; +diff --git a/drivers/mmc/host/sdhci_f_sdh30.c b/drivers/mmc/host/sdhci_f_sdh30.c +index 3f5977979cf25..6c4f43e112826 100644 +--- a/drivers/mmc/host/sdhci_f_sdh30.c ++++ b/drivers/mmc/host/sdhci_f_sdh30.c +@@ -168,6 +168,9 @@ static int sdhci_f_sdh30_probe(struct platform_device *pdev) + if (reg & SDHCI_CAN_DO_8BIT) + priv->vendor_hs200 = F_SDH30_EMMC_HS200; + ++ if (!(reg & SDHCI_TIMEOUT_CLK_MASK)) ++ host->quirks |= SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK; ++ + ret = sdhci_add_host(host); + if (ret) + goto err_add_host; diff --git a/drivers/mmc/host/sunxi-mmc.c b/drivers/mmc/host/sunxi-mmc.c -index 2702736a1c57d..ce6cb8be654ef 100644 +index 2702736a1c57d..032f2c03e8fb0 100644 --- a/drivers/mmc/host/sunxi-mmc.c +++ b/drivers/mmc/host/sunxi-mmc.c @@ -377,8 +377,9 @@ static void sunxi_mmc_init_idma_des(struct sunxi_mmc_host *host, @@ -175008,6 +210784,21 @@ index 2702736a1c57d..ce6cb8be654ef 100644 } pdes[0].config |= cpu_to_le32(SDXC_IDMAC_DES0_FD); +@@ -1482,9 +1483,11 @@ static int sunxi_mmc_remove(struct platform_device *pdev) + struct sunxi_mmc_host *host = mmc_priv(mmc); + + mmc_remove_host(mmc); +- pm_runtime_force_suspend(&pdev->dev); +- disable_irq(host->irq); +- sunxi_mmc_disable(host); ++ pm_runtime_disable(&pdev->dev); ++ if (!pm_runtime_status_suspended(&pdev->dev)) { ++ disable_irq(host->irq); ++ sunxi_mmc_disable(host); ++ } + dma_free_coherent(&pdev->dev, PAGE_SIZE, host->sg_cpu, host->sg_dma); + mmc_free_host(mmc); + diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c index b55a29c53d9c3..53a2ad9a24b87 100644 --- a/drivers/mmc/host/tmio_mmc.c @@ -175158,8 +210949,122 @@ index e2affa52ef469..437048bb80273 100644 tmio_mmc_enable_dma(host, true); return 0; +diff --git a/drivers/mmc/host/toshsd.c b/drivers/mmc/host/toshsd.c +index 8d037c2071abc..497791ffada6d 100644 +--- a/drivers/mmc/host/toshsd.c ++++ b/drivers/mmc/host/toshsd.c +@@ -651,7 +651,9 @@ static int toshsd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) + if (ret) + goto unmap; + +- mmc_add_host(mmc); ++ ret = mmc_add_host(mmc); ++ if (ret) ++ goto free_irq; + + base = pci_resource_start(pdev, 0); + dev_dbg(&pdev->dev, "MMIO %pa, IRQ %d\n", &base, pdev->irq); +@@ -660,6 +662,8 @@ static int toshsd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) + + return 0; + ++free_irq: ++ free_irq(pdev->irq, host); + unmap: + pci_iounmap(pdev, host->ioaddr); + release: +diff --git a/drivers/mmc/host/via-sdmmc.c b/drivers/mmc/host/via-sdmmc.c +index 88662a90ed960..a2b0d9461665b 100644 +--- a/drivers/mmc/host/via-sdmmc.c ++++ b/drivers/mmc/host/via-sdmmc.c +@@ -1151,7 +1151,9 @@ static int via_sd_probe(struct pci_dev *pcidev, + pcidev->subsystem_device == 0x3891) + sdhost->quirks = VIA_CRDR_QUIRK_300MS_PWRDELAY; + +- mmc_add_host(mmc); ++ ret = mmc_add_host(mmc); ++ if (ret) ++ goto unmap; + + return 0; + +diff --git a/drivers/mmc/host/vub300.c b/drivers/mmc/host/vub300.c +index 97beece62fec4..72f65f32abbc7 100644 +--- a/drivers/mmc/host/vub300.c ++++ b/drivers/mmc/host/vub300.c +@@ -2049,6 +2049,7 @@ static void vub300_enable_sdio_irq(struct mmc_host *mmc, int enable) + return; + kref_get(&vub300->kref); + if (enable) { ++ set_current_state(TASK_RUNNING); + mutex_lock(&vub300->irq_mutex); + if (vub300->irqs_queued) { + vub300->irqs_queued -= 1; +@@ -2064,6 +2065,7 @@ static void vub300_enable_sdio_irq(struct mmc_host *mmc, int enable) + vub300_queue_poll_work(vub300, 0); + } + mutex_unlock(&vub300->irq_mutex); ++ set_current_state(TASK_INTERRUPTIBLE); + } else { + vub300->irq_enabled = 0; + } +@@ -2299,14 +2301,14 @@ static int vub300_probe(struct usb_interface *interface, + 0x0000, 0x0000, &vub300->system_port_status, + sizeof(vub300->system_port_status), 1000); + if (retval < 0) { +- goto error4; ++ goto error5; + } else if (sizeof(vub300->system_port_status) == retval) { + vub300->card_present = + (0x0001 & vub300->system_port_status.port_flags) ? 1 : 0; + vub300->read_only = + (0x0010 & vub300->system_port_status.port_flags) ? 1 : 0; + } else { +- goto error4; ++ goto error5; + } + usb_set_intfdata(interface, vub300); + INIT_DELAYED_WORK(&vub300->pollwork, vub300_pollwork_thread); +@@ -2329,8 +2331,13 @@ static int vub300_probe(struct usb_interface *interface, + "USB vub300 remote SDIO host controller[%d]" + "connected with no SD/SDIO card inserted\n", + interface_to_InterfaceNumber(interface)); +- mmc_add_host(mmc); ++ retval = mmc_add_host(mmc); ++ if (retval) ++ goto error6; ++ + return 0; ++error6: ++ del_timer_sync(&vub300->inactivity_timer); + error5: + mmc_free_host(mmc); + /* +diff --git a/drivers/mmc/host/wbsd.c b/drivers/mmc/host/wbsd.c +index 67ecd342fe5f1..7c7ec8d10232b 100644 +--- a/drivers/mmc/host/wbsd.c ++++ b/drivers/mmc/host/wbsd.c +@@ -1698,7 +1698,17 @@ static int wbsd_init(struct device *dev, int base, int irq, int dma, + */ + wbsd_init_device(host); + +- mmc_add_host(mmc); ++ ret = mmc_add_host(mmc); ++ if (ret) { ++ if (!pnp) ++ wbsd_chip_poweroff(host); ++ ++ wbsd_release_resources(host); ++ wbsd_free_mmc(dev); ++ ++ mmc_free_host(mmc); ++ return ret; ++ } + + pr_info("%s: W83L51xD", mmc_hostname(mmc)); + if (host->chip_id != 0) diff --git a/drivers/mmc/host/wmt-sdmmc.c b/drivers/mmc/host/wmt-sdmmc.c -index cf10949fb0acc..8df722ec57edc 100644 +index cf10949fb0acc..3933195488575 100644 --- a/drivers/mmc/host/wmt-sdmmc.c +++ b/drivers/mmc/host/wmt-sdmmc.c @@ -849,7 +849,7 @@ static int wmt_mci_probe(struct platform_device *pdev) @@ -175171,8 +211076,20 @@ index cf10949fb0acc..8df722ec57edc 100644 } ret = clk_prepare_enable(priv->clk_sdmmc); -@@ -866,6 +866,9 @@ static int wmt_mci_probe(struct platform_device *pdev) +@@ -859,13 +859,20 @@ static int wmt_mci_probe(struct platform_device *pdev) + /* configure the controller to a known 'ready' state */ + wmt_reset_hardware(mmc); + +- mmc_add_host(mmc); ++ ret = mmc_add_host(mmc); ++ if (ret) ++ goto fail7; + + dev_info(&pdev->dev, "WMT SDHC Controller initialized\n"); + return 0; ++fail7: ++ clk_disable_unprepare(priv->clk_sdmmc); fail6: clk_put(priv->clk_sdmmc); +fail5_and_a_half: @@ -175700,6 +211617,19 @@ index ecb050ba95cdf..dc164c18f8429 100644 return error; } +diff --git a/drivers/mtd/lpddr/lpddr2_nvm.c b/drivers/mtd/lpddr/lpddr2_nvm.c +index 72f5c7b300790..add4386f99f00 100644 +--- a/drivers/mtd/lpddr/lpddr2_nvm.c ++++ b/drivers/mtd/lpddr/lpddr2_nvm.c +@@ -433,6 +433,8 @@ static int lpddr2_nvm_probe(struct platform_device *pdev) + + /* lpddr2_nvm address range */ + add_range = platform_get_resource(pdev, IORESOURCE_MEM, 0); ++ if (!add_range) ++ return -ENODEV; + + /* Populate map_info data structure */ + *map = (struct map_info) { diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig index aaa164b977fe8..4945caa88345b 100644 --- a/drivers/mtd/maps/Kconfig @@ -175733,6 +211663,26 @@ index ad7cd9cfaee04..a1b8b7b25f88b 100644 if (IS_ERR(rmap)) return PTR_ERR(rmap); +diff --git a/drivers/mtd/maps/pxa2xx-flash.c b/drivers/mtd/maps/pxa2xx-flash.c +index 7d96758a8f04e..6e5e557559704 100644 +--- a/drivers/mtd/maps/pxa2xx-flash.c ++++ b/drivers/mtd/maps/pxa2xx-flash.c +@@ -66,6 +66,7 @@ static int pxa2xx_flash_probe(struct platform_device *pdev) + if (!info->map.virt) { + printk(KERN_WARNING "Failed to ioremap %s\n", + info->map.name); ++ kfree(info); + return -ENOMEM; + } + info->map.cached = ioremap_cache(info->map.phys, info->map.size); +@@ -87,6 +88,7 @@ static int pxa2xx_flash_probe(struct platform_device *pdev) + iounmap((void *)info->map.virt); + if (info->map.cached) + iounmap(info->map.cached); ++ kfree(info); + return -EIO; + } + info->mtd->dev.parent = &pdev->dev; diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c index 03e3de3a5d79e..1e94e7d10b8be 100644 --- a/drivers/mtd/mtdblock.c @@ -175760,7 +211710,7 @@ index 03e3de3a5d79e..1e94e7d10b8be 100644 kfree(dev); } diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c -index c8fd7f758938b..61f236e0378a6 100644 +index c8fd7f758938b..3abaac109e75e 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -546,6 +546,7 @@ static int mtd_nvmem_add(struct mtd_info *mtd) @@ -175771,7 +211721,19 @@ index c8fd7f758938b..61f236e0378a6 100644 config.no_of_node = !of_device_is_compatible(node, "nvmem-cells"); config.priv = mtd; -@@ -724,8 +725,6 @@ int del_mtd_device(struct mtd_info *mtd) +@@ -670,8 +671,10 @@ int add_mtd_device(struct mtd_info *mtd) + dev_set_drvdata(&mtd->dev, mtd); + of_node_get(mtd_get_of_node(mtd)); + error = device_register(&mtd->dev); +- if (error) ++ if (error) { ++ put_device(&mtd->dev); + goto fail_added; ++ } + + /* Add the nvmem provider */ + error = mtd_nvmem_add(mtd); +@@ -724,8 +727,6 @@ int del_mtd_device(struct mtd_info *mtd) mutex_lock(&mtd_table_mutex); @@ -175780,7 +211742,7 @@ index c8fd7f758938b..61f236e0378a6 100644 if (idr_find(&mtd_idr, mtd->index) != mtd) { ret = -ENODEV; goto out_error; -@@ -741,6 +740,8 @@ int del_mtd_device(struct mtd_info *mtd) +@@ -741,6 +742,8 @@ int del_mtd_device(struct mtd_info *mtd) mtd->index, mtd->name, mtd->usecount); ret = -EBUSY; } else { @@ -175789,7 +211751,7 @@ index c8fd7f758938b..61f236e0378a6 100644 /* Try to remove the NVMEM provider */ if (mtd->nvmem) nvmem_unregister(mtd->nvmem); -@@ -825,12 +826,12 @@ static struct nvmem_device *mtd_otp_nvmem_register(struct mtd_info *mtd, +@@ -825,12 +828,12 @@ static struct nvmem_device *mtd_otp_nvmem_register(struct mtd_info *mtd, /* OTP nvmem will be registered on the physical device */ config.dev = mtd->dev.parent; @@ -175804,7 +211766,7 @@ index c8fd7f758938b..61f236e0378a6 100644 config.reg_read = reg_read; config.size = size; config.of_node = np; -@@ -842,6 +843,7 @@ static struct nvmem_device *mtd_otp_nvmem_register(struct mtd_info *mtd, +@@ -842,6 +845,7 @@ static struct nvmem_device *mtd_otp_nvmem_register(struct mtd_info *mtd, nvmem = NULL; of_node_put(np); @@ -176584,7 +212546,7 @@ index 0e9d426fe4f2b..b18861bdcdc88 100644 static const struct jz_soc_info jz4725b_soc_info = { diff --git a/drivers/mtd/nand/raw/intel-nand-controller.c b/drivers/mtd/nand/raw/intel-nand-controller.c -index b9784f3da7a11..056835fd45622 100644 +index b9784f3da7a11..53071e791e179 100644 --- a/drivers/mtd/nand/raw/intel-nand-controller.c +++ b/drivers/mtd/nand/raw/intel-nand-controller.c @@ -16,6 +16,7 @@ @@ -176603,7 +212565,7 @@ index b9784f3da7a11..056835fd45622 100644 struct nand_chip *nand; struct mtd_info *mtd; struct resource *res; -@@ -604,19 +606,29 @@ static int ebu_nand_probe(struct platform_device *pdev) +@@ -604,29 +606,42 @@ static int ebu_nand_probe(struct platform_device *pdev) if (IS_ERR(ebu_host->hsnand)) return PTR_ERR(ebu_host->hsnand); @@ -176616,11 +212578,13 @@ index b9784f3da7a11..056835fd45622 100644 + ret = of_property_read_u32(chip_np, "reg", &cs); if (ret) { dev_err(dev, "failed to get chip select: %d\n", ret); - return ret; +- return ret; ++ goto err_of_node_put; } + if (cs >= MAX_CS) { + dev_err(dev, "got invalid chip select: %d\n", cs); -+ return -EINVAL; ++ ret = -EINVAL; ++ goto err_of_node_put; + } + ebu_host->cs_num = cs; @@ -176630,12 +212594,29 @@ index b9784f3da7a11..056835fd45622 100644 ebu_host->cs[cs].chipaddr = devm_ioremap_resource(dev, res); - ebu_host->cs[cs].nand_pa = res->start; if (IS_ERR(ebu_host->cs[cs].chipaddr)) - return PTR_ERR(ebu_host->cs[cs].chipaddr); +- return PTR_ERR(ebu_host->cs[cs].chipaddr); ++ goto err_of_node_put; + ebu_host->cs[cs].nand_pa = res->start; ebu_host->clk = devm_clk_get(dev, NULL); - if (IS_ERR(ebu_host->clk)) -@@ -655,7 +667,7 @@ static int ebu_nand_probe(struct platform_device *pdev) +- if (IS_ERR(ebu_host->clk)) +- return dev_err_probe(dev, PTR_ERR(ebu_host->clk), +- "failed to get clock\n"); ++ if (IS_ERR(ebu_host->clk)) { ++ ret = dev_err_probe(dev, PTR_ERR(ebu_host->clk), ++ "failed to get clock\n"); ++ goto err_of_node_put; ++ } + + ret = clk_prepare_enable(ebu_host->clk); + if (ret) { + dev_err(dev, "failed to enable clock: %d\n", ret); +- return ret; ++ goto err_of_node_put; + } + ebu_host->clk_rate = clk_get_rate(ebu_host->clk); + +@@ -655,7 +670,7 @@ static int ebu_nand_probe(struct platform_device *pdev) writel(ebu_host->cs[cs].addr_sel | EBU_ADDR_MASK(5) | EBU_ADDR_SEL_REGEN, ebu_host->ebu + EBU_ADDR_SEL(cs)); @@ -176644,7 +212625,16 @@ index b9784f3da7a11..056835fd45622 100644 mtd = nand_to_mtd(&ebu_host->chip); if (!mtd->name) { -@@ -711,7 +723,6 @@ static int ebu_nand_remove(struct platform_device *pdev) +@@ -691,6 +706,8 @@ err_cleanup_dma: + ebu_dma_cleanup(ebu_host); + err_disable_unprepare_clk: + clk_disable_unprepare(ebu_host->clk); ++err_of_node_put: ++ of_node_put(chip_np); + + return ret; + } +@@ -711,7 +728,6 @@ static int ebu_nand_remove(struct platform_device *pdev) } static const struct of_device_id ebu_nand_match[] = { @@ -176652,6 +212642,19 @@ index b9784f3da7a11..056835fd45622 100644 { .compatible = "intel,lgm-ebunand" }, {} }; +diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c +index 2455a581fd70c..b248c5f657d56 100644 +--- a/drivers/mtd/nand/raw/marvell_nand.c ++++ b/drivers/mtd/nand/raw/marvell_nand.c +@@ -2672,7 +2672,7 @@ static int marvell_nand_chip_init(struct device *dev, struct marvell_nfc *nfc, + chip->controller = &nfc->controller; + nand_set_flash_node(chip, np); + +- if (!of_property_read_bool(np, "marvell,nand-keep-config")) ++ if (of_property_read_bool(np, "marvell,nand-keep-config")) + chip->options |= NAND_KEEP_TIMINGS; + + mtd = nand_to_mtd(chip); diff --git a/drivers/mtd/nand/raw/meson_nand.c b/drivers/mtd/nand/raw/meson_nand.c index ac3be92872d06..b97adeee4cc14 100644 --- a/drivers/mtd/nand/raw/meson_nand.c @@ -177341,6 +213344,24 @@ index 1dd1c58980934..da77ab20296ea 100644 &write_cache_variants, &update_cache_variants), SPINAND_HAS_QE_BIT, +diff --git a/drivers/mtd/parsers/bcm47xxpart.c b/drivers/mtd/parsers/bcm47xxpart.c +index 6012a10f10c83..13daf9bffd081 100644 +--- a/drivers/mtd/parsers/bcm47xxpart.c ++++ b/drivers/mtd/parsers/bcm47xxpart.c +@@ -233,11 +233,11 @@ static int bcm47xxpart_parse(struct mtd_info *master, + } + + /* Read middle of the block */ +- err = mtd_read(master, offset + 0x8000, 0x4, &bytes_read, ++ err = mtd_read(master, offset + (blocksize / 2), 0x4, &bytes_read, + (uint8_t *)buf); + if (err && !mtd_is_bitflip(err)) { + pr_err("mtd_read error while parsing (offset: 0x%X): %d\n", +- offset, err); ++ offset + (blocksize / 2), err); + continue; + } + diff --git a/drivers/mtd/parsers/ofpart_bcm4908.c b/drivers/mtd/parsers/ofpart_bcm4908.c index 0eddef4c198ec..bb072a0940e48 100644 --- a/drivers/mtd/parsers/ofpart_bcm4908.c @@ -177490,8 +213511,201 @@ index 47fbf1d1e5573..516e502694780 100644 return 0; } +diff --git a/drivers/mtd/spi-nor/controllers/intel-spi-pci.c b/drivers/mtd/spi-nor/controllers/intel-spi-pci.c +index 1bc53b8bb88a9..508f7ca098eff 100644 +--- a/drivers/mtd/spi-nor/controllers/intel-spi-pci.c ++++ b/drivers/mtd/spi-nor/controllers/intel-spi-pci.c +@@ -16,12 +16,30 @@ + #define BCR 0xdc + #define BCR_WPD BIT(0) + ++static bool intel_spi_pci_set_writeable(void __iomem *base, void *data) ++{ ++ struct pci_dev *pdev = data; ++ u32 bcr; ++ ++ /* Try to make the chip read/write */ ++ pci_read_config_dword(pdev, BCR, &bcr); ++ if (!(bcr & BCR_WPD)) { ++ bcr |= BCR_WPD; ++ pci_write_config_dword(pdev, BCR, bcr); ++ pci_read_config_dword(pdev, BCR, &bcr); ++ } ++ ++ return bcr & BCR_WPD; ++} ++ + static const struct intel_spi_boardinfo bxt_info = { + .type = INTEL_SPI_BXT, ++ .set_writeable = intel_spi_pci_set_writeable, + }; + + static const struct intel_spi_boardinfo cnl_info = { + .type = INTEL_SPI_CNL, ++ .set_writeable = intel_spi_pci_set_writeable, + }; + + static int intel_spi_pci_probe(struct pci_dev *pdev, +@@ -29,7 +47,6 @@ static int intel_spi_pci_probe(struct pci_dev *pdev, + { + struct intel_spi_boardinfo *info; + struct intel_spi *ispi; +- u32 bcr; + int ret; + + ret = pcim_enable_device(pdev); +@@ -41,15 +58,7 @@ static int intel_spi_pci_probe(struct pci_dev *pdev, + if (!info) + return -ENOMEM; + +- /* Try to make the chip read/write */ +- pci_read_config_dword(pdev, BCR, &bcr); +- if (!(bcr & BCR_WPD)) { +- bcr |= BCR_WPD; +- pci_write_config_dword(pdev, BCR, bcr); +- pci_read_config_dword(pdev, BCR, &bcr); +- } +- info->writeable = !!(bcr & BCR_WPD); +- ++ info->data = pdev; + ispi = intel_spi_probe(&pdev->dev, &pdev->resource[0], info); + if (IS_ERR(ispi)) + return PTR_ERR(ispi); +diff --git a/drivers/mtd/spi-nor/controllers/intel-spi.c b/drivers/mtd/spi-nor/controllers/intel-spi.c +index a413892ff449f..6cb818feaf7f0 100644 +--- a/drivers/mtd/spi-nor/controllers/intel-spi.c ++++ b/drivers/mtd/spi-nor/controllers/intel-spi.c +@@ -52,17 +52,17 @@ + #define FRACC 0x50 + + #define FREG(n) (0x54 + ((n) * 4)) +-#define FREG_BASE_MASK 0x3fff ++#define FREG_BASE_MASK GENMASK(14, 0) + #define FREG_LIMIT_SHIFT 16 +-#define FREG_LIMIT_MASK (0x03fff << FREG_LIMIT_SHIFT) ++#define FREG_LIMIT_MASK GENMASK(30, 16) + + /* Offset is from @ispi->pregs */ + #define PR(n) ((n) * 4) + #define PR_WPE BIT(31) + #define PR_LIMIT_SHIFT 16 +-#define PR_LIMIT_MASK (0x3fff << PR_LIMIT_SHIFT) ++#define PR_LIMIT_MASK GENMASK(30, 16) + #define PR_RPE BIT(15) +-#define PR_BASE_MASK 0x3fff ++#define PR_BASE_MASK GENMASK(14, 0) + + /* Offsets are from @ispi->sregs */ + #define SSFSTS_CTL 0x00 +@@ -116,7 +116,7 @@ + #define ERASE_OPCODE_SHIFT 8 + #define ERASE_OPCODE_MASK (0xff << ERASE_OPCODE_SHIFT) + #define ERASE_64K_OPCODE_SHIFT 16 +-#define ERASE_64K_OPCODE_MASK (0xff << ERASE_OPCODE_SHIFT) ++#define ERASE_64K_OPCODE_MASK (0xff << ERASE_64K_OPCODE_SHIFT) + + #define INTEL_SPI_TIMEOUT 5000 /* ms */ + #define INTEL_SPI_FIFO_SZ 64 +@@ -131,7 +131,6 @@ + * @sregs: Start of software sequencer registers + * @nregions: Maximum number of regions + * @pr_num: Maximum number of protected range registers +- * @writeable: Is the chip writeable + * @locked: Is SPI setting locked + * @swseq_reg: Use SW sequencer in register reads/writes + * @swseq_erase: Use SW sequencer in erase operation +@@ -149,7 +148,6 @@ struct intel_spi { + void __iomem *sregs; + size_t nregions; + size_t pr_num; +- bool writeable; + bool locked; + bool swseq_reg; + bool swseq_erase; +@@ -304,6 +302,14 @@ static int intel_spi_wait_sw_busy(struct intel_spi *ispi) + INTEL_SPI_TIMEOUT * 1000); + } + ++static bool intel_spi_set_writeable(struct intel_spi *ispi) ++{ ++ if (!ispi->info->set_writeable) ++ return false; ++ ++ return ispi->info->set_writeable(ispi->base, ispi->info->data); ++} ++ + static int intel_spi_init(struct intel_spi *ispi) + { + u32 opmenu0, opmenu1, lvscc, uvscc, val; +@@ -316,19 +322,6 @@ static int intel_spi_init(struct intel_spi *ispi) + ispi->nregions = BYT_FREG_NUM; + ispi->pr_num = BYT_PR_NUM; + ispi->swseq_reg = true; +- +- if (writeable) { +- /* Disable write protection */ +- val = readl(ispi->base + BYT_BCR); +- if (!(val & BYT_BCR_WPD)) { +- val |= BYT_BCR_WPD; +- writel(val, ispi->base + BYT_BCR); +- val = readl(ispi->base + BYT_BCR); +- } +- +- ispi->writeable = !!(val & BYT_BCR_WPD); +- } +- + break; + + case INTEL_SPI_LPT: +@@ -358,6 +351,12 @@ static int intel_spi_init(struct intel_spi *ispi) + return -EINVAL; + } + ++ /* Try to disable write protection if user asked to do so */ ++ if (writeable && !intel_spi_set_writeable(ispi)) { ++ dev_warn(ispi->dev, "can't disable chip write protection\n"); ++ writeable = false; ++ } ++ + /* Disable #SMI generation from HW sequencer */ + val = readl(ispi->base + HSFSTS_CTL); + val &= ~HSFSTS_CTL_FSMIE; +@@ -884,9 +883,12 @@ static void intel_spi_fill_partition(struct intel_spi *ispi, + /* + * If any of the regions have protection bits set, make the + * whole partition read-only to be on the safe side. ++ * ++ * Also if the user did not ask the chip to be writeable ++ * mask the bit too. + */ +- if (intel_spi_is_protected(ispi, base, limit)) +- ispi->writeable = false; ++ if (!writeable || intel_spi_is_protected(ispi, base, limit)) ++ part->mask_flags |= MTD_WRITEABLE; + + end = (limit << 12) + 4096; + if (end > part->size) +@@ -927,7 +929,6 @@ struct intel_spi *intel_spi_probe(struct device *dev, + + ispi->dev = dev; + ispi->info = info; +- ispi->writeable = info->writeable; + + ret = intel_spi_init(ispi); + if (ret) +@@ -945,10 +946,6 @@ struct intel_spi *intel_spi_probe(struct device *dev, + + intel_spi_fill_partition(ispi, &part); + +- /* Prevent writes if not explicitly enabled */ +- if (!ispi->writeable || !writeable) +- ispi->nor.mtd.flags &= ~MTD_WRITEABLE; +- + ret = mtd_device_register(&ispi->nor.mtd, &part, 1); + if (ret) + return ERR_PTR(ret); diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c -index cc08bd707378f..eb5d7b3d18609 100644 +index cc08bd707378f..d5dcc74a625ed 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c @@ -177,7 +177,7 @@ static int spi_nor_controller_ops_write_reg(struct spi_nor *nor, u8 opcode, @@ -177537,7 +213751,26 @@ index cc08bd707378f..eb5d7b3d18609 100644 return spi_mem_exec_op(nor->spimem, &op); } else if (nor->controller_ops->erase) { -@@ -3139,7 +3148,6 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, +@@ -1400,6 +1409,8 @@ spi_nor_find_best_erase_type(const struct spi_nor_erase_map *map, + continue; + + erase = &map->erase_type[i]; ++ if (!erase->size) ++ continue; + + /* Alignment is not mandatory for overlaid regions */ + if (region->offset & SNOR_OVERLAID_REGION && +@@ -2146,7 +2157,8 @@ static int spi_nor_spimem_check_readop(struct spi_nor *nor, + spi_nor_spimem_setup_op(nor, &op, read->proto); + + /* convert the dummy cycles to the number of bytes */ +- op.dummy.nbytes = (nor->read_dummy * op.dummy.buswidth) / 8; ++ op.dummy.nbytes = (read->num_mode_clocks + read->num_wait_states) * ++ op.dummy.buswidth / 8; + if (spi_nor_protocol_is_dtr(nor->read_proto)) + op.dummy.nbytes *= 2; + +@@ -3139,7 +3151,6 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, mtd->writesize = nor->params->writesize; mtd->flags = MTD_CAP_NORFLASH; mtd->size = nor->params->size; @@ -177545,7 +213778,7 @@ index cc08bd707378f..eb5d7b3d18609 100644 mtd->_read = spi_nor_read; mtd->_suspend = spi_nor_suspend; mtd->_resume = spi_nor_resume; -@@ -3169,6 +3177,8 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, +@@ -3169,6 +3180,8 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, if (info->flags & SPI_NOR_NO_ERASE) mtd->flags |= MTD_NO_ERASE; @@ -177554,6 +213787,38 @@ index cc08bd707378f..eb5d7b3d18609 100644 mtd->dev.parent = dev; nor->page_size = nor->params->page_size; +diff --git a/drivers/mtd/spi-nor/sysfs.c b/drivers/mtd/spi-nor/sysfs.c +index 9aec9d8a98ada..4c3b351aef245 100644 +--- a/drivers/mtd/spi-nor/sysfs.c ++++ b/drivers/mtd/spi-nor/sysfs.c +@@ -67,6 +67,19 @@ static struct bin_attribute *spi_nor_sysfs_bin_entries[] = { + NULL + }; + ++static umode_t spi_nor_sysfs_is_visible(struct kobject *kobj, ++ struct attribute *attr, int n) ++{ ++ struct spi_device *spi = to_spi_device(kobj_to_dev(kobj)); ++ struct spi_mem *spimem = spi_get_drvdata(spi); ++ struct spi_nor *nor = spi_mem_get_drvdata(spimem); ++ ++ if (attr == &dev_attr_jedec_id.attr && !nor->info->id_len) ++ return 0; ++ ++ return 0444; ++} ++ + static umode_t spi_nor_sysfs_is_bin_visible(struct kobject *kobj, + struct bin_attribute *attr, int n) + { +@@ -82,6 +95,7 @@ static umode_t spi_nor_sysfs_is_bin_visible(struct kobject *kobj, + + static const struct attribute_group spi_nor_sysfs_group = { + .name = "spi-nor", ++ .is_visible = spi_nor_sysfs_is_visible, + .is_bin_visible = spi_nor_sysfs_is_bin_visible, + .attrs = spi_nor_sysfs_entries, + .bin_attrs = spi_nor_sysfs_bin_entries, diff --git a/drivers/mtd/spi-nor/xilinx.c b/drivers/mtd/spi-nor/xilinx.c index 1138bdbf41998..75dd13a390404 100644 --- a/drivers/mtd/spi-nor/xilinx.c @@ -177961,6 +214226,39 @@ index 3c8f665c15580..28dccbc0e8d8f 100644 priv->ci = ci; mm = &ci->misc_map; +diff --git a/drivers/net/arcnet/com20020_cs.c b/drivers/net/arcnet/com20020_cs.c +index b88a109b3b150..26ee263d8f3aa 100644 +--- a/drivers/net/arcnet/com20020_cs.c ++++ b/drivers/net/arcnet/com20020_cs.c +@@ -113,6 +113,7 @@ static int com20020_probe(struct pcmcia_device *p_dev) + struct com20020_dev *info; + struct net_device *dev; + struct arcnet_local *lp; ++ int ret = -ENOMEM; + + dev_dbg(&p_dev->dev, "com20020_attach()\n"); + +@@ -142,12 +143,18 @@ static int com20020_probe(struct pcmcia_device *p_dev) + info->dev = dev; + p_dev->priv = info; + +- return com20020_config(p_dev); ++ ret = com20020_config(p_dev); ++ if (ret) ++ goto fail_config; ++ ++ return 0; + ++fail_config: ++ free_arcdev(dev); + fail_alloc_dev: + kfree(info); + fail_alloc_info: +- return -ENOMEM; ++ return ret; + } /* com20020_attach */ + + static void com20020_detach(struct pcmcia_device *link) diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index 54e321a695ce9..98c915943f323 100644 --- a/drivers/net/bareudp.c @@ -178019,7 +214317,7 @@ index 54e321a695ce9..98c915943f323 100644 __be32 saddr; diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c -index 6006c2e8fa2bc..8ad095c19f271 100644 +index 6006c2e8fa2bc..ff6d4e74a186a 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -87,8 +87,9 @@ static const u8 null_mac_addr[ETH_ALEN + 2] __long_aligned = { @@ -178053,7 +214351,15 @@ index 6006c2e8fa2bc..8ad095c19f271 100644 } } break; -@@ -1779,6 +1780,7 @@ static void ad_agg_selection_logic(struct aggregator *agg, +@@ -1538,6 +1539,7 @@ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr) + slave_err(bond->dev, port->slave->dev, + "Port %d did not find a suitable aggregator\n", + port->actor_port_number); ++ return; + } + } + /* if all aggregator's ports are READY_N == TRUE, set ready=TRUE +@@ -1779,6 +1781,7 @@ static void ad_agg_selection_logic(struct aggregator *agg, port = port->next_port_in_aggregator) { __enable_port(port); } @@ -178061,7 +214367,7 @@ index 6006c2e8fa2bc..8ad095c19f271 100644 } } -@@ -1994,7 +1996,7 @@ static void ad_marker_response_received(struct bond_marker *marker, +@@ -1994,7 +1997,7 @@ static void ad_marker_response_received(struct bond_marker *marker, */ void bond_3ad_initiate_agg_selection(struct bonding *bond, int timeout) { @@ -178070,7 +214376,7 @@ index 6006c2e8fa2bc..8ad095c19f271 100644 } /** -@@ -2006,30 +2008,24 @@ void bond_3ad_initiate_agg_selection(struct bonding *bond, int timeout) +@@ -2006,30 +2009,24 @@ void bond_3ad_initiate_agg_selection(struct bonding *bond, int timeout) */ void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution) { @@ -178117,7 +214423,7 @@ index 6006c2e8fa2bc..8ad095c19f271 100644 } /** -@@ -2227,7 +2223,8 @@ void bond_3ad_unbind_slave(struct slave *slave) +@@ -2227,7 +2224,8 @@ void bond_3ad_unbind_slave(struct slave *slave) temp_aggregator->num_of_ports--; if (__agg_active_ports(temp_aggregator) == 0) { select_new_active_agg = temp_aggregator->is_active; @@ -178127,7 +214433,7 @@ index 6006c2e8fa2bc..8ad095c19f271 100644 if (select_new_active_agg) { slave_info(bond->dev, slave->dev, "Removing an active aggregator\n"); /* select new active aggregator */ -@@ -2277,6 +2274,28 @@ void bond_3ad_update_ad_actor_settings(struct bonding *bond) +@@ -2277,6 +2275,28 @@ void bond_3ad_update_ad_actor_settings(struct bonding *bond) spin_unlock_bh(&bond->mode_lock); } @@ -178156,7 +214462,7 @@ index 6006c2e8fa2bc..8ad095c19f271 100644 /** * bond_3ad_state_machine_handler - handle state machines timeout * @work: work context to fetch bonding struct to work on from -@@ -2312,9 +2331,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work) +@@ -2312,9 +2332,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work) if (!bond_has_slaves(bond)) goto re_arm; @@ -178240,8 +214546,21 @@ index 7d3752cbf761d..a6a70b872ac4a 100644 if (bond->alb_info.rlb_enabled) { bond->alb_info.rlb_rebalance = 1; /* If the updelay module parameter is smaller than the +diff --git a/drivers/net/bonding/bond_debugfs.c b/drivers/net/bonding/bond_debugfs.c +index 4f9b4a18c74cd..5940945266489 100644 +--- a/drivers/net/bonding/bond_debugfs.c ++++ b/drivers/net/bonding/bond_debugfs.c +@@ -76,7 +76,7 @@ void bond_debug_reregister(struct bonding *bond) + + d = debugfs_rename(bonding_debug_root, bond->debug_dir, + bonding_debug_root, bond->dev->name); +- if (d) { ++ if (!IS_ERR(d)) { + bond->debug_dir = d; + } else { + netdev_warn(bond->dev, "failed to reregister, so just unregister old one\n"); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c -index 77dc79a7f5748..402dffc508efb 100644 +index 77dc79a7f5748..456298919d541 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -862,12 +862,8 @@ static void bond_hw_addr_flush(struct net_device *bond_dev, @@ -178314,16 +214633,16 @@ index 77dc79a7f5748..402dffc508efb 100644 - dev_mc_sync_multiple(slave_dev, bond_dev); - dev_uc_sync_multiple(slave_dev, bond_dev); - netif_addr_unlock_bh(bond_dev); -- -- if (BOND_MODE(bond) == BOND_MODE_8023AD) { -- /* add lacpdu mc addr to mc list */ -- u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; + if (bond_dev->flags & IFF_UP) { + netif_addr_lock_bh(bond_dev); + dev_mc_sync_multiple(slave_dev, bond_dev); + dev_uc_sync_multiple(slave_dev, bond_dev); + netif_addr_unlock_bh(bond_dev); +- if (BOND_MODE(bond) == BOND_MODE_8023AD) { +- /* add lacpdu mc addr to mc list */ +- u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; +- - dev_mc_add(slave_dev, lacpdu_multicast); + if (BOND_MODE(bond) == BOND_MODE_8023AD) + dev_mc_add(slave_dev, lacpdu_mcast_addr); @@ -178353,7 +214672,31 @@ index 77dc79a7f5748..402dffc508efb 100644 } slave_disable_netpoll(slave); -@@ -3475,9 +3472,11 @@ re_arm: +@@ -2502,12 +2499,21 @@ static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *in + /* called with rcu_read_lock() */ + static int bond_miimon_inspect(struct bonding *bond) + { ++ bool ignore_updelay = false; + int link_state, commit = 0; + struct list_head *iter; + struct slave *slave; +- bool ignore_updelay; + +- ignore_updelay = !rcu_dereference(bond->curr_active_slave); ++ if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) { ++ ignore_updelay = !rcu_dereference(bond->curr_active_slave); ++ } else { ++ struct bond_up_slave *usable_slaves; ++ ++ usable_slaves = rcu_dereference(bond->usable_slaves); ++ ++ if (usable_slaves && usable_slaves->count == 0) ++ ignore_updelay = true; ++ } + + bond_for_each_slave_rcu(bond, slave, iter) { + bond_propose_link_state(slave, BOND_LINK_NOCHANGE); +@@ -3475,9 +3481,11 @@ re_arm: if (!rtnl_trylock()) return; @@ -178366,7 +214709,7 @@ index 77dc79a7f5748..402dffc508efb 100644 if (should_notify_rtnl) { bond_slave_state_notify(bond); bond_slave_link_notify(bond); -@@ -3818,14 +3817,19 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, const v +@@ -3818,14 +3826,19 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, const v return true; } @@ -178388,7 +214731,7 @@ index 77dc79a7f5748..402dffc508efb 100644 } /* Generate hash based on xmit policy. If @skb is given it is used to linearize -@@ -3855,7 +3859,7 @@ static u32 __bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, const voi +@@ -3855,7 +3868,7 @@ static u32 __bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, const voi memcpy(&hash, &flow.ports.ports, sizeof(hash)); } @@ -178397,7 +214740,7 @@ index 77dc79a7f5748..402dffc508efb 100644 } /** -@@ -3872,8 +3876,8 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) +@@ -3872,8 +3885,8 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) skb->l4_hash) return skb->hash; @@ -178408,7 +214751,7 @@ index 77dc79a7f5748..402dffc508efb 100644 skb_headlen(skb)); } -@@ -3926,6 +3930,12 @@ static int bond_open(struct net_device *bond_dev) +@@ -3926,6 +3939,12 @@ static int bond_open(struct net_device *bond_dev) struct list_head *iter; struct slave *slave; @@ -178421,7 +214764,7 @@ index 77dc79a7f5748..402dffc508efb 100644 /* reset slave->backup and slave->inactive */ if (bond_has_slaves(bond)) { bond_for_each_slave(bond, slave, iter) { -@@ -3963,6 +3973,9 @@ static int bond_open(struct net_device *bond_dev) +@@ -3963,6 +3982,9 @@ static int bond_open(struct net_device *bond_dev) /* register to receive LACPDUs */ bond->recv_probe = bond_3ad_lacpdu_recv; bond_3ad_initiate_agg_selection(bond, 1); @@ -178431,7 +214774,7 @@ index 77dc79a7f5748..402dffc508efb 100644 } if (bond_mode_can_use_xmit_hash(bond)) -@@ -3974,6 +3987,7 @@ static int bond_open(struct net_device *bond_dev) +@@ -3974,6 +3996,7 @@ static int bond_open(struct net_device *bond_dev) static int bond_close(struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); @@ -178439,7 +214782,7 @@ index 77dc79a7f5748..402dffc508efb 100644 bond_work_cancel_all(bond); bond->send_peer_notif = 0; -@@ -3981,6 +3995,19 @@ static int bond_close(struct net_device *bond_dev) +@@ -3981,6 +4004,19 @@ static int bond_close(struct net_device *bond_dev) bond_alb_deinitialize(bond); bond->recv_probe = NULL; @@ -178459,7 +214802,7 @@ index 77dc79a7f5748..402dffc508efb 100644 return 0; } -@@ -4843,25 +4870,39 @@ static netdev_tx_t bond_xmit_broadcast(struct sk_buff *skb, +@@ -4843,25 +4879,39 @@ static netdev_tx_t bond_xmit_broadcast(struct sk_buff *skb, struct bonding *bond = netdev_priv(bond_dev); struct slave *slave = NULL; struct list_head *iter; @@ -178472,10 +214815,10 @@ index 77dc79a7f5748..402dffc508efb 100644 - if (bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) { - struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); + struct sk_buff *skb2; -+ + + if (!(bond_slave_is_up(slave) && slave->link == BOND_LINK_UP)) + continue; - ++ + if (bond_is_last_slave(bond, slave)) { + skb2 = skb; + skb_used = true; @@ -178507,7 +214850,7 @@ index 77dc79a7f5748..402dffc508efb 100644 } /*------------------------- Device initialization ---------------------------*/ -@@ -4999,7 +5040,7 @@ static u32 bond_sk_hash_l34(struct sock *sk) +@@ -4999,7 +5049,7 @@ static u32 bond_sk_hash_l34(struct sock *sk) /* L4 */ memcpy(&hash, &flow.ports.ports, sizeof(hash)); /* L3 */ @@ -178516,7 +214859,7 @@ index 77dc79a7f5748..402dffc508efb 100644 } static struct net_device *__bond_sk_get_lower_dev(struct bonding *bond, -@@ -5872,15 +5913,6 @@ static int bond_init(struct net_device *bond_dev) +@@ -5872,15 +5922,6 @@ static int bond_init(struct net_device *bond_dev) if (!bond->wq) return -ENOMEM; @@ -178707,6 +215050,39 @@ index 52671d1ea17d5..e04d4e7cc8683 100644 u8 head = c_can_get_tx_head(tx_ring); /* Start transmission for all cached messages */ +diff --git a/drivers/net/can/cc770/cc770_isa.c b/drivers/net/can/cc770/cc770_isa.c +index 194c86e0f340f..8f6dccd5a5879 100644 +--- a/drivers/net/can/cc770/cc770_isa.c ++++ b/drivers/net/can/cc770/cc770_isa.c +@@ -264,22 +264,24 @@ static int cc770_isa_probe(struct platform_device *pdev) + if (err) { + dev_err(&pdev->dev, + "couldn't register device (err=%d)\n", err); +- goto exit_unmap; ++ goto exit_free; + } + + dev_info(&pdev->dev, "device registered (reg_base=0x%p, irq=%d)\n", + priv->reg_base, dev->irq); + return 0; + +- exit_unmap: ++exit_free: ++ free_cc770dev(dev); ++exit_unmap: + if (mem[idx]) + iounmap(base); +- exit_release: ++exit_release: + if (mem[idx]) + release_mem_region(mem[idx], iosize); + else + release_region(port[idx], iosize); +- exit: ++exit: + return err; + } + diff --git a/drivers/net/can/dev/bittiming.c b/drivers/net/can/dev/bittiming.c index f49170eadd547..b1b5a82f08299 100644 --- a/drivers/net/can/dev/bittiming.c @@ -179346,7 +215722,7 @@ index 74d9899fc904c..eb74cdf26b88c 100644 can->bec.txerr = bec.txerr; can->bec.rxerr = bec.rxerr; diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c -index 2470c47b2e315..c4596fbe6d2f8 100644 +index 2470c47b2e315..e027229c1955b 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -204,16 +204,16 @@ enum m_can_reg { @@ -179435,7 +215811,26 @@ index 2470c47b2e315..c4596fbe6d2f8 100644 /* ack txe element */ m_can_write(cdev, M_CAN_TXEFA, FIELD_PREP(TXEFA_EFAI_MASK, -@@ -1361,7 +1367,9 @@ static void m_can_chip_config(struct net_device *dev) +@@ -1242,10 +1248,17 @@ static int m_can_set_bittiming(struct net_device *dev) + * - setup bittiming + * - configure timestamp generation + */ +-static void m_can_chip_config(struct net_device *dev) ++static int m_can_chip_config(struct net_device *dev) + { + struct m_can_classdev *cdev = netdev_priv(dev); + u32 cccr, test; ++ int err; ++ ++ err = m_can_init_ram(cdev); ++ if (err) { ++ dev_err(cdev->dev, "Message RAM configuration failed\n"); ++ return err; ++ } + + m_can_config_endisable(cdev, true); + +@@ -1361,24 +1374,33 @@ static void m_can_chip_config(struct net_device *dev) /* enable internal timestamp generation, with a prescalar of 16. The * prescalar is applied to the nominal bit timing */ @@ -179446,7 +215841,33 @@ index 2470c47b2e315..c4596fbe6d2f8 100644 m_can_config_endisable(cdev, false); -@@ -1494,20 +1502,32 @@ static int m_can_dev_setup(struct m_can_classdev *cdev) + if (cdev->ops->init) + cdev->ops->init(cdev); ++ ++ return 0; + } + +-static void m_can_start(struct net_device *dev) ++static int m_can_start(struct net_device *dev) + { + struct m_can_classdev *cdev = netdev_priv(dev); ++ int ret; + + /* basic m_can configuration */ +- m_can_chip_config(dev); ++ ret = m_can_chip_config(dev); ++ if (ret) ++ return ret; + + cdev->can.state = CAN_STATE_ERROR_ACTIVE; + + m_can_enable_all_interrupts(cdev); ++ ++ return 0; + } + + static int m_can_set_mode(struct net_device *dev, enum can_mode mode) +@@ -1494,20 +1516,32 @@ static int m_can_dev_setup(struct m_can_classdev *cdev) case 30: /* CAN_CTRLMODE_FD_NON_ISO is fixed with M_CAN IP v3.0.x */ can_set_static_ctrlmode(dev, CAN_CTRLMODE_FD_NON_ISO); @@ -179485,7 +215906,7 @@ index 2470c47b2e315..c4596fbe6d2f8 100644 cdev->can.ctrlmode_supported |= (m_can_niso_supported(cdev) ? -@@ -1622,8 +1642,6 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev) +@@ -1622,8 +1656,6 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev) if (err) goto out_fail; @@ -179494,7 +215915,7 @@ index 2470c47b2e315..c4596fbe6d2f8 100644 if (cdev->can.ctrlmode & CAN_CTRLMODE_FD) { cccr = m_can_read(cdev, M_CAN_CCCR); cccr &= ~CCCR_CMR_MASK; -@@ -1640,6 +1658,9 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev) +@@ -1640,6 +1672,9 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev) m_can_write(cdev, M_CAN_CCCR, cccr); } m_can_write(cdev, M_CAN_TXBTIE, 0x1); @@ -179504,6 +215925,42 @@ index 2470c47b2e315..c4596fbe6d2f8 100644 m_can_write(cdev, M_CAN_TXBAR, 0x1); /* End of xmit function for version 3.0.x */ } else { +@@ -1803,7 +1838,9 @@ static int m_can_open(struct net_device *dev) + } + + /* start the m_can controller */ +- m_can_start(dev); ++ err = m_can_start(dev); ++ if (err) ++ goto exit_irq_fail; + + can_led_event(dev, CAN_LED_EVENT_OPEN); + +@@ -1910,7 +1947,7 @@ int m_can_class_get_clocks(struct m_can_classdev *cdev) + cdev->hclk = devm_clk_get(cdev->dev, "hclk"); + cdev->cclk = devm_clk_get(cdev->dev, "cclk"); + +- if (IS_ERR(cdev->cclk)) { ++ if (IS_ERR(cdev->hclk) || IS_ERR(cdev->cclk)) { + dev_err(cdev->dev, "no clock found\n"); + ret = -ENODEV; + } +@@ -2061,9 +2098,13 @@ int m_can_class_resume(struct device *dev) + ret = m_can_clk_start(cdev); + if (ret) + return ret; ++ ret = m_can_start(ndev); ++ if (ret) { ++ m_can_clk_stop(cdev); ++ ++ return ret; ++ } + +- m_can_init_ram(cdev); +- m_can_start(ndev); + netif_device_attach(ndev); + netif_start_queue(ndev); + } diff --git a/drivers/net/can/m_can/m_can.h b/drivers/net/can/m_can/m_can.h index d18b515e6ccc7..2c5d409971686 100644 --- a/drivers/net/can/m_can/m_can.h @@ -179519,7 +215976,7 @@ index d18b515e6ccc7..2c5d409971686 100644 int version; diff --git a/drivers/net/can/m_can/m_can_pci.c b/drivers/net/can/m_can/m_can_pci.c -index 89cc3d41e952b..8f184a852a0a7 100644 +index 89cc3d41e952b..f2219aa2824b3 100644 --- a/drivers/net/can/m_can/m_can_pci.c +++ b/drivers/net/can/m_can/m_can_pci.c @@ -18,7 +18,7 @@ @@ -179561,6 +216018,102 @@ index 89cc3d41e952b..8f184a852a0a7 100644 return 0; } +@@ -110,7 +120,7 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) + + ret = pci_alloc_irq_vectors(pci, 1, 1, PCI_IRQ_ALL_TYPES); + if (ret < 0) +- return ret; ++ goto err_free_dev; + + mcan_class->dev = &pci->dev; + mcan_class->net->irq = pci_irq_vector(pci, 0); +@@ -122,7 +132,7 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) + + ret = m_can_class_register(mcan_class); + if (ret) +- goto err; ++ goto err_free_irq; + + /* Enable interrupt control at CAN wrapper IP */ + writel(0x1, base + CTL_CSR_INT_CTL_OFFSET); +@@ -134,8 +144,10 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) + + return 0; + +-err: ++err_free_irq: + pci_free_irq_vectors(pci); ++err_free_dev: ++ m_can_class_free_dev(mcan_class->net); + return ret; + } + +@@ -151,6 +163,7 @@ static void m_can_pci_remove(struct pci_dev *pci) + writel(0x0, priv->base + CTL_CSR_INT_CTL_OFFSET); + + m_can_class_unregister(mcan_class); ++ m_can_class_free_dev(mcan_class->net); + pci_free_irq_vectors(pci); + } + +diff --git a/drivers/net/can/m_can/m_can_platform.c b/drivers/net/can/m_can/m_can_platform.c +index eee47bad05920..de6d8e01bf2e8 100644 +--- a/drivers/net/can/m_can/m_can_platform.c ++++ b/drivers/net/can/m_can/m_can_platform.c +@@ -140,10 +140,6 @@ static int m_can_plat_probe(struct platform_device *pdev) + + platform_set_drvdata(pdev, mcan_class); + +- ret = m_can_init_ram(mcan_class); +- if (ret) +- goto probe_fail; +- + pm_runtime_enable(mcan_class->dev); + ret = m_can_class_register(mcan_class); + if (ret) +diff --git a/drivers/net/can/m_can/tcan4x5x-core.c b/drivers/net/can/m_can/tcan4x5x-core.c +index 04687b15b250e..c83b347be1cfd 100644 +--- a/drivers/net/can/m_can/tcan4x5x-core.c ++++ b/drivers/net/can/m_can/tcan4x5x-core.c +@@ -10,7 +10,7 @@ + #define TCAN4X5X_DEV_ID1 0x04 + #define TCAN4X5X_REV 0x08 + #define TCAN4X5X_STATUS 0x0C +-#define TCAN4X5X_ERROR_STATUS 0x10 ++#define TCAN4X5X_ERROR_STATUS_MASK 0x10 + #define TCAN4X5X_CONTROL 0x14 + + #define TCAN4X5X_CONFIG 0x800 +@@ -204,17 +204,7 @@ static int tcan4x5x_clear_interrupts(struct m_can_classdev *cdev) + if (ret) + return ret; + +- ret = tcan4x5x_write_tcan_reg(cdev, TCAN4X5X_MCAN_INT_REG, +- TCAN4X5X_ENABLE_MCAN_INT); +- if (ret) +- return ret; +- +- ret = tcan4x5x_write_tcan_reg(cdev, TCAN4X5X_INT_FLAGS, +- TCAN4X5X_CLEAR_ALL_INT); +- if (ret) +- return ret; +- +- return tcan4x5x_write_tcan_reg(cdev, TCAN4X5X_ERROR_STATUS, ++ return tcan4x5x_write_tcan_reg(cdev, TCAN4X5X_INT_FLAGS, + TCAN4X5X_CLEAR_ALL_INT); + } + +@@ -234,8 +224,8 @@ static int tcan4x5x_init(struct m_can_classdev *cdev) + if (ret) + return ret; + +- /* Zero out the MCAN buffers */ +- ret = m_can_init_ram(cdev); ++ ret = tcan4x5x_write_tcan_reg(cdev, TCAN4X5X_ERROR_STATUS_MASK, ++ TCAN4X5X_CLEAR_ALL_INT); + if (ret) + return ret; + diff --git a/drivers/net/can/m_can/tcan4x5x-regmap.c b/drivers/net/can/m_can/tcan4x5x-regmap.c index ca80dbaf7a3f5..26e212b8ca7a6 100644 --- a/drivers/net/can/m_can/tcan4x5x-regmap.c @@ -179574,6 +216127,38 @@ index ca80dbaf7a3f5..26e212b8ca7a6 100644 static int tcan4x5x_regmap_gather_write(void *context, const void *reg, size_t reg_len, +diff --git a/drivers/net/can/mscan/mpc5xxx_can.c b/drivers/net/can/mscan/mpc5xxx_can.c +index 35892c1efef02..7d868b6eb579b 100644 +--- a/drivers/net/can/mscan/mpc5xxx_can.c ++++ b/drivers/net/can/mscan/mpc5xxx_can.c +@@ -322,14 +322,14 @@ static int mpc5xxx_can_probe(struct platform_device *ofdev) + &mscan_clksrc); + if (!priv->can.clock.freq) { + dev_err(&ofdev->dev, "couldn't get MSCAN clock properties\n"); +- goto exit_free_mscan; ++ goto exit_put_clock; + } + + err = register_mscandev(dev, mscan_clksrc); + if (err) { + dev_err(&ofdev->dev, "registering %s failed (err=%d)\n", + DRV_NAME, err); +- goto exit_free_mscan; ++ goto exit_put_clock; + } + + dev_info(&ofdev->dev, "MSCAN at 0x%p, irq %d, clock %d Hz\n", +@@ -337,7 +337,9 @@ static int mpc5xxx_can_probe(struct platform_device *ofdev) + + return 0; + +-exit_free_mscan: ++exit_put_clock: ++ if (data->put_clock) ++ data->put_clock(ofdev); + free_candev(dev); + exit_dispose_irq: + irq_dispose_mapping(irq); diff --git a/drivers/net/can/pch_can.c b/drivers/net/can/pch_can.c index 92a54a5fd4c50..cd8d536c6fb20 100644 --- a/drivers/net/can/pch_can.c @@ -179650,10 +216235,54 @@ index 8999ec9455ec2..945b319de841c 100644 if (eifr & RCAR_CAN_EIFR_ORIF) { netdev_dbg(priv->ndev, "Receive overrun error interrupt\n"); diff --git a/drivers/net/can/rcar/rcar_canfd.c b/drivers/net/can/rcar/rcar_canfd.c -index ff9d0f5ae0dd2..2f44c567ebd73 100644 +index ff9d0f5ae0dd2..4e230e1456647 100644 --- a/drivers/net/can/rcar/rcar_canfd.c +++ b/drivers/net/can/rcar/rcar_canfd.c -@@ -1640,8 +1640,7 @@ static int rcar_canfd_channel_probe(struct rcar_canfd_global *gpriv, u32 ch, +@@ -1106,11 +1106,13 @@ static void rcar_canfd_handle_global_receive(struct rcar_canfd_global *gpriv, u3 + { + struct rcar_canfd_channel *priv = gpriv->ch[ch]; + u32 ridx = ch + RCANFD_RFFIFO_IDX; +- u32 sts; ++ u32 sts, cc; + + /* Handle Rx interrupts */ + sts = rcar_canfd_read(priv->base, RCANFD_RFSTS(ridx)); +- if (likely(sts & RCANFD_RFSTS_RFIF)) { ++ cc = rcar_canfd_read(priv->base, RCANFD_RFCC(ridx)); ++ if (likely(sts & RCANFD_RFSTS_RFIF && ++ cc & RCANFD_RFCC_RFIE)) { + if (napi_schedule_prep(&priv->napi)) { + /* Disable Rx FIFO interrupts */ + rcar_canfd_clear_bit(priv->base, +@@ -1195,11 +1197,9 @@ static void rcar_canfd_handle_channel_tx(struct rcar_canfd_global *gpriv, u32 ch + + static irqreturn_t rcar_canfd_channel_tx_interrupt(int irq, void *dev_id) + { +- struct rcar_canfd_global *gpriv = dev_id; +- u32 ch; ++ struct rcar_canfd_channel *priv = dev_id; + +- for_each_set_bit(ch, &gpriv->channels_mask, RCANFD_NUM_CHANNELS) +- rcar_canfd_handle_channel_tx(gpriv, ch); ++ rcar_canfd_handle_channel_tx(priv->gpriv, priv->channel); + + return IRQ_HANDLED; + } +@@ -1227,11 +1227,9 @@ static void rcar_canfd_handle_channel_err(struct rcar_canfd_global *gpriv, u32 c + + static irqreturn_t rcar_canfd_channel_err_interrupt(int irq, void *dev_id) + { +- struct rcar_canfd_global *gpriv = dev_id; +- u32 ch; ++ struct rcar_canfd_channel *priv = dev_id; + +- for_each_set_bit(ch, &gpriv->channels_mask, RCANFD_NUM_CHANNELS) +- rcar_canfd_handle_channel_err(gpriv, ch); ++ rcar_canfd_handle_channel_err(priv->gpriv, priv->channel); + + return IRQ_HANDLED; + } +@@ -1640,8 +1638,7 @@ static int rcar_canfd_channel_probe(struct rcar_canfd_global *gpriv, u32 ch, ndev = alloc_candev(sizeof(*priv), RCANFD_FIFO_DEPTH); if (!ndev) { dev_err(&pdev->dev, "alloc_candev() failed\n"); @@ -179663,7 +216292,38 @@ index ff9d0f5ae0dd2..2f44c567ebd73 100644 } priv = netdev_priv(ndev); -@@ -1721,22 +1720,22 @@ static int rcar_canfd_channel_probe(struct rcar_canfd_global *gpriv, u32 ch, +@@ -1650,6 +1647,7 @@ static int rcar_canfd_channel_probe(struct rcar_canfd_global *gpriv, u32 ch, + priv->ndev = ndev; + priv->base = gpriv->base; + priv->channel = ch; ++ priv->gpriv = gpriv; + priv->can.clock.freq = fcan_freq; + dev_info(&pdev->dev, "can_clk rate is %u\n", priv->can.clock.freq); + +@@ -1678,7 +1676,7 @@ static int rcar_canfd_channel_probe(struct rcar_canfd_global *gpriv, u32 ch, + } + err = devm_request_irq(&pdev->dev, err_irq, + rcar_canfd_channel_err_interrupt, 0, +- irq_name, gpriv); ++ irq_name, priv); + if (err) { + dev_err(&pdev->dev, "devm_request_irq CH Err(%d) failed, error %d\n", + err_irq, err); +@@ -1692,7 +1690,7 @@ static int rcar_canfd_channel_probe(struct rcar_canfd_global *gpriv, u32 ch, + } + err = devm_request_irq(&pdev->dev, tx_irq, + rcar_canfd_channel_tx_interrupt, 0, +- irq_name, gpriv); ++ irq_name, priv); + if (err) { + dev_err(&pdev->dev, "devm_request_irq Tx (%d) failed, error %d\n", + tx_irq, err); +@@ -1716,27 +1714,26 @@ static int rcar_canfd_channel_probe(struct rcar_canfd_global *gpriv, u32 ch, + + priv->can.do_set_mode = rcar_canfd_do_set_mode; + priv->can.do_get_berr_counter = rcar_canfd_get_berr_counter; +- priv->gpriv = gpriv; + SET_NETDEV_DEV(ndev, &pdev->dev); netif_napi_add(ndev, &priv->napi, rcar_canfd_rx_poll, RCANFD_NAPI_WEIGHT); @@ -179733,6 +216393,39 @@ index 3fad546467461..aae2677e24f99 100644 if (isrc & IRQ_BEI) { /* bus error interrupt */ priv->can.can_stats.bus_error++; +diff --git a/drivers/net/can/sja1000/sja1000_isa.c b/drivers/net/can/sja1000/sja1000_isa.c +index d513fac507185..db3e767d5320f 100644 +--- a/drivers/net/can/sja1000/sja1000_isa.c ++++ b/drivers/net/can/sja1000/sja1000_isa.c +@@ -202,22 +202,24 @@ static int sja1000_isa_probe(struct platform_device *pdev) + if (err) { + dev_err(&pdev->dev, "registering %s failed (err=%d)\n", + DRV_NAME, err); +- goto exit_unmap; ++ goto exit_free; + } + + dev_info(&pdev->dev, "%s device registered (reg_base=0x%p, irq=%d)\n", + DRV_NAME, priv->reg_base, dev->irq); + return 0; + +- exit_unmap: ++exit_free: ++ free_sja1000dev(dev); ++exit_unmap: + if (mem[idx]) + iounmap(base); +- exit_release: ++exit_release: + if (mem[idx]) + release_mem_region(mem[idx], iosize); + else + release_region(port[idx], iosize); +- exit: ++exit: + return err; + } + diff --git a/drivers/net/can/softing/softing_cs.c b/drivers/net/can/softing/softing_cs.c index 2e93ee7923739..e5c939b63fa65 100644 --- a/drivers/net/can/softing/softing_cs.c @@ -179799,7 +216492,7 @@ index 89d9c986a2297..b08b98e6ad1c9 100644 } diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c -index 0579ab74f728a..baab3adc34bc6 100644 +index 0579ab74f728a..f02275f71e4d9 100644 --- a/drivers/net/can/spi/mcp251x.c +++ b/drivers/net/can/spi/mcp251x.c @@ -1074,9 +1074,6 @@ static irqreturn_t mcp251x_can_ist(int irq, void *dev_id) @@ -179841,6 +216534,22 @@ index 0579ab74f728a..baab3adc34bc6 100644 /* any error or tx interrupt we need to clear? */ if (intf & (CANINTF_ERR | CANINTF_TX)) clear_intf |= intf & (CANINTF_ERR | CANINTF_TX); +@@ -1407,11 +1419,14 @@ static int mcp251x_can_probe(struct spi_device *spi) + + ret = mcp251x_gpio_setup(priv); + if (ret) +- goto error_probe; ++ goto out_unregister_candev; + + netdev_info(net, "MCP%x successfully initialized.\n", priv->model); + return 0; + ++out_unregister_candev: ++ unregister_candev(net); ++ + error_probe: + destroy_workqueue(priv->wq); + priv->wq = NULL; diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 673861ab665a4..3a0f022b15625 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -180009,8 +216718,32 @@ index 2b5302e724353..a1b7c1a451c0c 100644 atomic_dec(&dev->active_tx_urbs); +diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c +index c6068a251fbed..9ed048cb07e6d 100644 +--- a/drivers/net/can/usb/esd_usb2.c ++++ b/drivers/net/can/usb/esd_usb2.c +@@ -227,6 +227,10 @@ static void esd_usb2_rx_event(struct esd_usb2_net_priv *priv, + u8 rxerr = msg->msg.rx.data[2]; + u8 txerr = msg->msg.rx.data[3]; + ++ netdev_dbg(priv->netdev, ++ "CAN_ERR_EV_EXT: dlc=%#02x state=%02x ecc=%02x rec=%02x tec=%02x\n", ++ msg->msg.rx.dlc, state, ecc, rxerr, txerr); ++ + skb = alloc_can_err_skb(priv->netdev, &cf); + if (skb == NULL) { + stats->rx_dropped++; +@@ -253,6 +257,8 @@ static void esd_usb2_rx_event(struct esd_usb2_net_priv *priv, + break; + default: + priv->can.state = CAN_STATE_ERROR_ACTIVE; ++ txerr = 0; ++ rxerr = 0; + break; + } + } else { diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.c b/drivers/net/can/usb/etas_es58x/es58x_core.c -index 96a13c770e4a1..cd4e7f356e488 100644 +index 96a13c770e4a1..0e6faf962ebbc 100644 --- a/drivers/net/can/usb/etas_es58x/es58x_core.c +++ b/drivers/net/can/usb/etas_es58x/es58x_core.c @@ -664,7 +664,7 @@ int es58x_rx_err_msg(struct net_device *netdev, enum es58x_err error, @@ -180074,7 +216807,20 @@ index 96a13c770e4a1..cd4e7f356e488 100644 es58x_free_urbs(es58x_dev); return 0; -@@ -2223,7 +2223,6 @@ static struct es58x_device *es58x_init_es58x_dev(struct usb_interface *intf, +@@ -2098,8 +2098,11 @@ static int es58x_init_netdev(struct es58x_device *es58x_dev, int channel_idx) + netdev->flags |= IFF_ECHO; /* We support local echo */ + + ret = register_candev(netdev); +- if (ret) ++ if (ret) { ++ es58x_dev->netdev[channel_idx] = NULL; ++ free_candev(netdev); + return ret; ++ } + + netdev_queue_set_dql_min_limit(netdev_get_tx_queue(netdev, 0), + es58x_dev->param->dql_min_limit); +@@ -2223,7 +2226,6 @@ static struct es58x_device *es58x_init_es58x_dev(struct usb_interface *intf, init_usb_anchor(&es58x_dev->tx_urbs_idle); init_usb_anchor(&es58x_dev->tx_urbs_busy); atomic_set(&es58x_dev->tx_urbs_idle_cnt, 0); @@ -180293,7 +217039,7 @@ index 5e892bef46b00..5a43e542b302e 100644 dev->udev = interface_to_usbdev(intf); diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb.h b/drivers/net/can/usb/kvaser_usb/kvaser_usb.h -index 390b6bde883c8..62958f04a2f20 100644 +index 390b6bde883c8..5699531f87873 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb.h +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb.h @@ -35,9 +35,10 @@ @@ -180324,7 +217070,19 @@ index 390b6bde883c8..62958f04a2f20 100644 }; /* Context for an outstanding, not yet ACKed, transmission */ -@@ -84,7 +80,7 @@ struct kvaser_usb { +@@ -80,11 +76,19 @@ struct kvaser_usb_tx_urb_context { + int dlc; + }; + ++struct kvaser_usb_busparams { ++ __le32 bitrate; ++ u8 tseg1; ++ u8 tseg2; ++ u8 sjw; ++ u8 nsamples; ++} __packed; ++ + struct kvaser_usb { struct usb_device *udev; struct usb_interface *intf; struct kvaser_usb_net_priv *nets[KVASER_USB_MAX_NET_DEVICES]; @@ -180333,7 +217091,65 @@ index 390b6bde883c8..62958f04a2f20 100644 const struct kvaser_usb_dev_cfg *cfg; struct usb_endpoint_descriptor *bulk_in, *bulk_out; -@@ -166,6 +162,12 @@ struct kvaser_usb_dev_ops { +@@ -108,13 +112,19 @@ struct kvaser_usb_net_priv { + struct can_priv can; + struct can_berr_counter bec; + ++ /* subdriver-specific data */ ++ void *sub_priv; ++ + struct kvaser_usb *dev; + struct net_device *netdev; + int channel; + +- struct completion start_comp, stop_comp, flush_comp; ++ struct completion start_comp, stop_comp, flush_comp, ++ get_busparams_comp; + struct usb_anchor tx_submitted; + ++ struct kvaser_usb_busparams busparams_nominal, busparams_data; ++ + spinlock_t tx_contexts_lock; /* lock for active_tx_contexts */ + int active_tx_contexts; + struct kvaser_usb_tx_urb_context tx_contexts[]; +@@ -124,11 +134,15 @@ struct kvaser_usb_net_priv { + * struct kvaser_usb_dev_ops - Device specific functions + * @dev_set_mode: used for can.do_set_mode + * @dev_set_bittiming: used for can.do_set_bittiming ++ * @dev_get_busparams: readback arbitration busparams + * @dev_set_data_bittiming: used for can.do_set_data_bittiming ++ * @dev_get_data_busparams: readback data busparams + * @dev_get_berr_counter: used for can.do_get_berr_counter + * + * @dev_setup_endpoints: setup USB in and out endpoints + * @dev_init_card: initialize card ++ * @dev_init_channel: initialize channel ++ * @dev_remove_channel: uninitialize channel + * @dev_get_software_info: get software info + * @dev_get_software_details: get software details + * @dev_get_card_info: get card info +@@ -144,12 +158,18 @@ struct kvaser_usb_net_priv { + */ + struct kvaser_usb_dev_ops { + int (*dev_set_mode)(struct net_device *netdev, enum can_mode mode); +- int (*dev_set_bittiming)(struct net_device *netdev); +- int (*dev_set_data_bittiming)(struct net_device *netdev); ++ int (*dev_set_bittiming)(const struct net_device *netdev, ++ const struct kvaser_usb_busparams *busparams); ++ int (*dev_get_busparams)(struct kvaser_usb_net_priv *priv); ++ int (*dev_set_data_bittiming)(const struct net_device *netdev, ++ const struct kvaser_usb_busparams *busparams); ++ int (*dev_get_data_busparams)(struct kvaser_usb_net_priv *priv); + int (*dev_get_berr_counter)(const struct net_device *netdev, + struct can_berr_counter *bec); + int (*dev_setup_endpoints)(struct kvaser_usb *dev); + int (*dev_init_card)(struct kvaser_usb *dev); ++ int (*dev_init_channel)(struct kvaser_usb_net_priv *priv); ++ void (*dev_remove_channel)(struct kvaser_usb_net_priv *priv); + int (*dev_get_software_info)(struct kvaser_usb *dev); + int (*dev_get_software_details)(struct kvaser_usb *dev); + int (*dev_get_card_info)(struct kvaser_usb *dev); +@@ -166,6 +186,12 @@ struct kvaser_usb_dev_ops { int *cmd_len, u16 transid); }; @@ -180346,7 +217162,7 @@ index 390b6bde883c8..62958f04a2f20 100644 struct kvaser_usb_dev_cfg { const struct can_clock clock; const unsigned int timestamp_freq; -@@ -176,6 +178,8 @@ struct kvaser_usb_dev_cfg { +@@ -176,6 +202,8 @@ struct kvaser_usb_dev_cfg { extern const struct kvaser_usb_dev_ops kvaser_usb_hydra_dev_ops; extern const struct kvaser_usb_dev_ops kvaser_usb_leaf_dev_ops; @@ -180355,7 +217171,7 @@ index 390b6bde883c8..62958f04a2f20 100644 int kvaser_usb_recv_cmd(const struct kvaser_usb *dev, void *cmd, int len, int *actual_len); -@@ -185,4 +189,7 @@ int kvaser_usb_send_cmd_async(struct kvaser_usb_net_priv *priv, void *cmd, +@@ -185,4 +213,7 @@ int kvaser_usb_send_cmd_async(struct kvaser_usb_net_priv *priv, void *cmd, int len); int kvaser_usb_can_rx_over_error(struct net_device *netdev); @@ -180364,7 +217180,7 @@ index 390b6bde883c8..62958f04a2f20 100644 + #endif /* KVASER_USB_H */ diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c -index 0cc0fc866a2a9..bdcaccf8e2b28 100644 +index 0cc0fc866a2a9..09dbc51347d70 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c @@ -61,8 +61,6 @@ @@ -180629,7 +217445,7 @@ index 0cc0fc866a2a9..bdcaccf8e2b28 100644 resubmit_urb: usb_fill_bulk_urb(urb, dev->udev, -@@ -400,6 +436,7 @@ static int kvaser_usb_open(struct net_device *netdev) +@@ -400,21 +436,18 @@ static int kvaser_usb_open(struct net_device *netdev) { struct kvaser_usb_net_priv *priv = netdev_priv(netdev); struct kvaser_usb *dev = priv->dev; @@ -180637,10 +217453,13 @@ index 0cc0fc866a2a9..bdcaccf8e2b28 100644 int err; err = open_candev(netdev); -@@ -410,11 +447,11 @@ static int kvaser_usb_open(struct net_device *netdev) if (err) - goto error; + return err; +- err = kvaser_usb_setup_rx_urbs(dev); +- if (err) +- goto error; +- - err = dev->ops->dev_set_opt_mode(priv); + err = ops->dev_set_opt_mode(priv); if (err) @@ -180651,7 +217470,7 @@ index 0cc0fc866a2a9..bdcaccf8e2b28 100644 if (err) { netdev_warn(netdev, "Cannot start device, error %d\n", err); goto error; -@@ -443,7 +480,7 @@ static void kvaser_usb_reset_tx_urb_contexts(struct kvaser_usb_net_priv *priv) +@@ -443,7 +476,7 @@ static void kvaser_usb_reset_tx_urb_contexts(struct kvaser_usb_net_priv *priv) /* This method might sleep. Do not call it in the atomic context * of URB completions. */ @@ -180660,7 +217479,7 @@ index 0cc0fc866a2a9..bdcaccf8e2b28 100644 { usb_kill_anchored_urbs(&priv->tx_submitted); kvaser_usb_reset_tx_urb_contexts(priv); -@@ -471,22 +508,23 @@ static int kvaser_usb_close(struct net_device *netdev) +@@ -471,22 +504,23 @@ static int kvaser_usb_close(struct net_device *netdev) { struct kvaser_usb_net_priv *priv = netdev_priv(netdev); struct kvaser_usb *dev = priv->dev; @@ -180688,7 +217507,101 @@ index 0cc0fc866a2a9..bdcaccf8e2b28 100644 if (err) netdev_warn(netdev, "Cannot stop device, error %d\n", err); -@@ -525,6 +563,7 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, +@@ -499,6 +533,93 @@ static int kvaser_usb_close(struct net_device *netdev) + return 0; + } + ++static int kvaser_usb_set_bittiming(struct net_device *netdev) ++{ ++ struct kvaser_usb_net_priv *priv = netdev_priv(netdev); ++ struct kvaser_usb *dev = priv->dev; ++ const struct kvaser_usb_dev_ops *ops = dev->driver_info->ops; ++ struct can_bittiming *bt = &priv->can.bittiming; ++ ++ struct kvaser_usb_busparams busparams; ++ int tseg1 = bt->prop_seg + bt->phase_seg1; ++ int tseg2 = bt->phase_seg2; ++ int sjw = bt->sjw; ++ int err = -EOPNOTSUPP; ++ ++ busparams.bitrate = cpu_to_le32(bt->bitrate); ++ busparams.sjw = (u8)sjw; ++ busparams.tseg1 = (u8)tseg1; ++ busparams.tseg2 = (u8)tseg2; ++ if (priv->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES) ++ busparams.nsamples = 3; ++ else ++ busparams.nsamples = 1; ++ ++ err = ops->dev_set_bittiming(netdev, &busparams); ++ if (err) ++ return err; ++ ++ err = kvaser_usb_setup_rx_urbs(priv->dev); ++ if (err) ++ return err; ++ ++ err = ops->dev_get_busparams(priv); ++ if (err) { ++ /* Treat EOPNOTSUPP as success */ ++ if (err == -EOPNOTSUPP) ++ err = 0; ++ return err; ++ } ++ ++ if (memcmp(&busparams, &priv->busparams_nominal, ++ sizeof(priv->busparams_nominal)) != 0) ++ err = -EINVAL; ++ ++ return err; ++} ++ ++static int kvaser_usb_set_data_bittiming(struct net_device *netdev) ++{ ++ struct kvaser_usb_net_priv *priv = netdev_priv(netdev); ++ struct kvaser_usb *dev = priv->dev; ++ const struct kvaser_usb_dev_ops *ops = dev->driver_info->ops; ++ struct can_bittiming *dbt = &priv->can.data_bittiming; ++ ++ struct kvaser_usb_busparams busparams; ++ int tseg1 = dbt->prop_seg + dbt->phase_seg1; ++ int tseg2 = dbt->phase_seg2; ++ int sjw = dbt->sjw; ++ int err; ++ ++ if (!ops->dev_set_data_bittiming || ++ !ops->dev_get_data_busparams) ++ return -EOPNOTSUPP; ++ ++ busparams.bitrate = cpu_to_le32(dbt->bitrate); ++ busparams.sjw = (u8)sjw; ++ busparams.tseg1 = (u8)tseg1; ++ busparams.tseg2 = (u8)tseg2; ++ busparams.nsamples = 1; ++ ++ err = ops->dev_set_data_bittiming(netdev, &busparams); ++ if (err) ++ return err; ++ ++ err = kvaser_usb_setup_rx_urbs(priv->dev); ++ if (err) ++ return err; ++ ++ err = ops->dev_get_data_busparams(priv); ++ if (err) ++ return err; ++ ++ if (memcmp(&busparams, &priv->busparams_data, ++ sizeof(priv->busparams_data)) != 0) ++ err = -EINVAL; ++ ++ return err; ++} ++ + static void kvaser_usb_write_bulk_callback(struct urb *urb) + { + struct kvaser_usb_tx_urb_context *context = urb->context; +@@ -525,6 +646,7 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, { struct kvaser_usb_net_priv *priv = netdev_priv(netdev); struct kvaser_usb *dev = priv->dev; @@ -180696,7 +217609,7 @@ index 0cc0fc866a2a9..bdcaccf8e2b28 100644 struct net_device_stats *stats = &netdev->stats; struct kvaser_usb_tx_urb_context *context = NULL; struct urb *urb; -@@ -567,8 +606,8 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, +@@ -567,8 +689,8 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, goto freeurb; } @@ -180707,7 +217620,22 @@ index 0cc0fc866a2a9..bdcaccf8e2b28 100644 if (!buf) { stats->tx_dropped++; dev_kfree_skb(skb); -@@ -652,15 +691,16 @@ static void kvaser_usb_remove_interfaces(struct kvaser_usb *dev) +@@ -633,6 +755,7 @@ static const struct net_device_ops kvaser_usb_netdev_ops = { + + static void kvaser_usb_remove_interfaces(struct kvaser_usb *dev) + { ++ const struct kvaser_usb_dev_ops *ops = dev->driver_info->ops; + int i; + + for (i = 0; i < dev->nchannels; i++) { +@@ -648,19 +771,23 @@ static void kvaser_usb_remove_interfaces(struct kvaser_usb *dev) + if (!dev->nets[i]) + continue; + ++ if (ops->dev_remove_channel) ++ ops->dev_remove_channel(dev->nets[i]); ++ + free_candev(dev->nets[i]->netdev); } } @@ -180728,22 +217656,23 @@ index 0cc0fc866a2a9..bdcaccf8e2b28 100644 if (err) return err; } -@@ -677,6 +717,7 @@ static int kvaser_usb_init_one(struct kvaser_usb *dev, +@@ -677,6 +804,8 @@ static int kvaser_usb_init_one(struct kvaser_usb *dev, init_usb_anchor(&priv->tx_submitted); init_completion(&priv->start_comp); init_completion(&priv->stop_comp); + init_completion(&priv->flush_comp); ++ init_completion(&priv->get_busparams_comp); priv->can.ctrlmode_supported = 0; priv->dev = dev; -@@ -689,20 +730,19 @@ static int kvaser_usb_init_one(struct kvaser_usb *dev, +@@ -689,20 +818,19 @@ static int kvaser_usb_init_one(struct kvaser_usb *dev, priv->can.state = CAN_STATE_STOPPED; priv->can.clock.freq = dev->cfg->clock.freq; priv->can.bittiming_const = dev->cfg->bittiming_const; - priv->can.do_set_bittiming = dev->ops->dev_set_bittiming; - priv->can.do_set_mode = dev->ops->dev_set_mode; - if ((id->driver_info & KVASER_USB_HAS_TXRX_ERRORS) || -+ priv->can.do_set_bittiming = ops->dev_set_bittiming; ++ priv->can.do_set_bittiming = kvaser_usb_set_bittiming; + priv->can.do_set_mode = ops->dev_set_mode; + if ((driver_info->quirks & KVASER_USB_QUIRK_HAS_TXRX_ERRORS) || (priv->dev->card_data.capabilities & KVASER_USB_CAP_BERR_CAP)) @@ -180759,11 +217688,41 @@ index 0cc0fc866a2a9..bdcaccf8e2b28 100644 priv->can.data_bittiming_const = dev->cfg->data_bittiming_const; - priv->can.do_set_data_bittiming = - dev->ops->dev_set_data_bittiming; -+ priv->can.do_set_data_bittiming = ops->dev_set_data_bittiming; ++ priv->can.do_set_data_bittiming = kvaser_usb_set_data_bittiming; } netdev->flags |= IFF_ECHO; -@@ -733,29 +773,22 @@ static int kvaser_usb_probe(struct usb_interface *intf, +@@ -714,17 +842,26 @@ static int kvaser_usb_init_one(struct kvaser_usb *dev, + + dev->nets[channel] = priv; + ++ if (ops->dev_init_channel) { ++ err = ops->dev_init_channel(priv); ++ if (err) ++ goto err; ++ } ++ + err = register_candev(netdev); + if (err) { + dev_err(&dev->intf->dev, "Failed to register CAN device\n"); +- free_candev(netdev); +- dev->nets[channel] = NULL; +- return err; ++ goto err; + } + + netdev_dbg(netdev, "device registered\n"); + + return 0; ++ ++err: ++ free_candev(netdev); ++ dev->nets[channel] = NULL; ++ return err; + } + + static int kvaser_usb_probe(struct usb_interface *intf, +@@ -733,29 +870,22 @@ static int kvaser_usb_probe(struct usb_interface *intf, struct kvaser_usb *dev; int err; int i; @@ -180802,7 +217761,7 @@ index 0cc0fc866a2a9..bdcaccf8e2b28 100644 if (err) { dev_err(&intf->dev, "Cannot get usb endpoint(s)"); return err; -@@ -769,22 +802,22 @@ static int kvaser_usb_probe(struct usb_interface *intf, +@@ -769,22 +899,22 @@ static int kvaser_usb_probe(struct usb_interface *intf, dev->card_data.ctrlmode_supported = 0; dev->card_data.capabilities = 0; @@ -180829,7 +217788,7 @@ index 0cc0fc866a2a9..bdcaccf8e2b28 100644 if (err) { dev_err(&intf->dev, "Cannot get software details, error %d\n", err); -@@ -802,14 +835,14 @@ static int kvaser_usb_probe(struct usb_interface *intf, +@@ -802,14 +932,14 @@ static int kvaser_usb_probe(struct usb_interface *intf, dev_dbg(&intf->dev, "Max outstanding tx = %d URBs\n", dev->max_tx_urbs); @@ -180847,7 +217806,7 @@ index 0cc0fc866a2a9..bdcaccf8e2b28 100644 if (err) { dev_err(&intf->dev, "Cannot get capabilities, error %d\n", err); -@@ -819,7 +852,7 @@ static int kvaser_usb_probe(struct usb_interface *intf, +@@ -819,7 +949,7 @@ static int kvaser_usb_probe(struct usb_interface *intf, } for (i = 0; i < dev->nchannels; i++) { @@ -180857,10 +217816,92 @@ index 0cc0fc866a2a9..bdcaccf8e2b28 100644 kvaser_usb_remove_interfaces(dev); return err; diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c -index dcee8dc828ecc..45eb7e462ce93 100644 +index dcee8dc828ecc..562105b8a6327 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c -@@ -373,7 +373,7 @@ static const struct can_bittiming_const kvaser_usb_hydra_kcan_bittiming_c = { +@@ -22,6 +22,7 @@ + #include <linux/spinlock.h> + #include <linux/string.h> + #include <linux/types.h> ++#include <linux/units.h> + #include <linux/usb.h> + + #include <linux/can.h> +@@ -44,6 +45,8 @@ static const struct kvaser_usb_dev_cfg kvaser_usb_hydra_dev_cfg_rt; + + /* Minihydra command IDs */ + #define CMD_SET_BUSPARAMS_REQ 16 ++#define CMD_GET_BUSPARAMS_REQ 17 ++#define CMD_GET_BUSPARAMS_RESP 18 + #define CMD_GET_CHIP_STATE_REQ 19 + #define CMD_CHIP_STATE_EVENT 20 + #define CMD_SET_DRIVERMODE_REQ 21 +@@ -195,21 +198,26 @@ struct kvaser_cmd_chip_state_event { + #define KVASER_USB_HYDRA_BUS_MODE_CANFD_ISO 0x01 + #define KVASER_USB_HYDRA_BUS_MODE_NONISO 0x02 + struct kvaser_cmd_set_busparams { +- __le32 bitrate; +- u8 tseg1; +- u8 tseg2; +- u8 sjw; +- u8 nsamples; ++ struct kvaser_usb_busparams busparams_nominal; + u8 reserved0[4]; +- __le32 bitrate_d; +- u8 tseg1_d; +- u8 tseg2_d; +- u8 sjw_d; +- u8 nsamples_d; ++ struct kvaser_usb_busparams busparams_data; + u8 canfd_mode; + u8 reserved1[7]; + } __packed; + ++/* Busparam type */ ++#define KVASER_USB_HYDRA_BUSPARAM_TYPE_CAN 0x00 ++#define KVASER_USB_HYDRA_BUSPARAM_TYPE_CANFD 0x01 ++struct kvaser_cmd_get_busparams_req { ++ u8 type; ++ u8 reserved[27]; ++} __packed; ++ ++struct kvaser_cmd_get_busparams_res { ++ struct kvaser_usb_busparams busparams; ++ u8 reserved[20]; ++} __packed; ++ + /* Ctrl modes */ + #define KVASER_USB_HYDRA_CTRLMODE_NORMAL 0x01 + #define KVASER_USB_HYDRA_CTRLMODE_LISTEN 0x02 +@@ -280,6 +288,8 @@ struct kvaser_cmd { + struct kvaser_cmd_error_event error_event; + + struct kvaser_cmd_set_busparams set_busparams_req; ++ struct kvaser_cmd_get_busparams_req get_busparams_req; ++ struct kvaser_cmd_get_busparams_res get_busparams_res; + + struct kvaser_cmd_chip_state_event chip_state_event; + +@@ -295,6 +305,7 @@ struct kvaser_cmd { + #define KVASER_USB_HYDRA_CF_FLAG_OVERRUN BIT(1) + #define KVASER_USB_HYDRA_CF_FLAG_REMOTE_FRAME BIT(4) + #define KVASER_USB_HYDRA_CF_FLAG_EXTENDED_ID BIT(5) ++#define KVASER_USB_HYDRA_CF_FLAG_TX_ACK BIT(6) + /* CAN frame flags. Used in ext_rx_can and ext_tx_can */ + #define KVASER_USB_HYDRA_CF_FLAG_OSM_NACK BIT(12) + #define KVASER_USB_HYDRA_CF_FLAG_ABL BIT(13) +@@ -361,6 +372,10 @@ struct kvaser_cmd_ext { + } __packed; + } __packed; + ++struct kvaser_usb_net_hydra_priv { ++ int pending_get_busparams_type; ++}; ++ + static const struct can_bittiming_const kvaser_usb_hydra_kcan_bittiming_c = { + .name = "kvaser_usb_kcan", + .tseg1_min = 1, +@@ -373,7 +388,7 @@ static const struct can_bittiming_const kvaser_usb_hydra_kcan_bittiming_c = { .brp_inc = 1, }; @@ -180869,7 +217910,121 @@ index dcee8dc828ecc..45eb7e462ce93 100644 .name = "kvaser_usb_flex", .tseg1_min = 4, .tseg1_max = 16, -@@ -916,8 +916,10 @@ static void kvaser_usb_hydra_update_state(struct kvaser_usb_net_priv *priv, +@@ -530,6 +545,7 @@ static int kvaser_usb_hydra_send_simple_cmd(struct kvaser_usb *dev, + u8 cmd_no, int channel) + { + struct kvaser_cmd *cmd; ++ size_t cmd_len; + int err; + + cmd = kcalloc(1, sizeof(struct kvaser_cmd), GFP_KERNEL); +@@ -537,6 +553,7 @@ static int kvaser_usb_hydra_send_simple_cmd(struct kvaser_usb *dev, + return -ENOMEM; + + cmd->header.cmd_no = cmd_no; ++ cmd_len = kvaser_usb_hydra_cmd_size(cmd); + if (channel < 0) { + kvaser_usb_hydra_set_cmd_dest_he + (cmd, KVASER_USB_HYDRA_HE_ADDRESS_ILLEGAL); +@@ -553,7 +570,7 @@ static int kvaser_usb_hydra_send_simple_cmd(struct kvaser_usb *dev, + kvaser_usb_hydra_set_cmd_transid + (cmd, kvaser_usb_hydra_get_next_transid(dev)); + +- err = kvaser_usb_send_cmd(dev, cmd, kvaser_usb_hydra_cmd_size(cmd)); ++ err = kvaser_usb_send_cmd(dev, cmd, cmd_len); + if (err) + goto end; + +@@ -569,6 +586,7 @@ kvaser_usb_hydra_send_simple_cmd_async(struct kvaser_usb_net_priv *priv, + { + struct kvaser_cmd *cmd; + struct kvaser_usb *dev = priv->dev; ++ size_t cmd_len; + int err; + + cmd = kcalloc(1, sizeof(struct kvaser_cmd), GFP_ATOMIC); +@@ -576,14 +594,14 @@ kvaser_usb_hydra_send_simple_cmd_async(struct kvaser_usb_net_priv *priv, + return -ENOMEM; + + cmd->header.cmd_no = cmd_no; ++ cmd_len = kvaser_usb_hydra_cmd_size(cmd); + + kvaser_usb_hydra_set_cmd_dest_he + (cmd, dev->card_data.hydra.channel_to_he[priv->channel]); + kvaser_usb_hydra_set_cmd_transid + (cmd, kvaser_usb_hydra_get_next_transid(dev)); + +- err = kvaser_usb_send_cmd_async(priv, cmd, +- kvaser_usb_hydra_cmd_size(cmd)); ++ err = kvaser_usb_send_cmd_async(priv, cmd, cmd_len); + if (err) + kfree(cmd); + +@@ -727,6 +745,7 @@ static int kvaser_usb_hydra_get_single_capability(struct kvaser_usb *dev, + { + struct kvaser_usb_dev_card_data *card_data = &dev->card_data; + struct kvaser_cmd *cmd; ++ size_t cmd_len; + u32 value = 0; + u32 mask = 0; + u16 cap_cmd_res; +@@ -738,13 +757,14 @@ static int kvaser_usb_hydra_get_single_capability(struct kvaser_usb *dev, + return -ENOMEM; + + cmd->header.cmd_no = CMD_GET_CAPABILITIES_REQ; ++ cmd_len = kvaser_usb_hydra_cmd_size(cmd); + cmd->cap_req.cap_cmd = cpu_to_le16(cap_cmd_req); + + kvaser_usb_hydra_set_cmd_dest_he(cmd, card_data->hydra.sysdbg_he); + kvaser_usb_hydra_set_cmd_transid + (cmd, kvaser_usb_hydra_get_next_transid(dev)); + +- err = kvaser_usb_send_cmd(dev, cmd, kvaser_usb_hydra_cmd_size(cmd)); ++ err = kvaser_usb_send_cmd(dev, cmd, cmd_len); + if (err) + goto end; + +@@ -838,6 +858,39 @@ static void kvaser_usb_hydra_flush_queue_reply(const struct kvaser_usb *dev, + complete(&priv->flush_comp); + } + ++static void kvaser_usb_hydra_get_busparams_reply(const struct kvaser_usb *dev, ++ const struct kvaser_cmd *cmd) ++{ ++ struct kvaser_usb_net_priv *priv; ++ struct kvaser_usb_net_hydra_priv *hydra; ++ ++ priv = kvaser_usb_hydra_net_priv_from_cmd(dev, cmd); ++ if (!priv) ++ return; ++ ++ hydra = priv->sub_priv; ++ if (!hydra) ++ return; ++ ++ switch (hydra->pending_get_busparams_type) { ++ case KVASER_USB_HYDRA_BUSPARAM_TYPE_CAN: ++ memcpy(&priv->busparams_nominal, &cmd->get_busparams_res.busparams, ++ sizeof(priv->busparams_nominal)); ++ break; ++ case KVASER_USB_HYDRA_BUSPARAM_TYPE_CANFD: ++ memcpy(&priv->busparams_data, &cmd->get_busparams_res.busparams, ++ sizeof(priv->busparams_nominal)); ++ break; ++ default: ++ dev_warn(&dev->intf->dev, "Unknown get_busparams_type %d\n", ++ hydra->pending_get_busparams_type); ++ break; ++ } ++ hydra->pending_get_busparams_type = -1; ++ ++ complete(&priv->get_busparams_comp); ++} ++ + static void + kvaser_usb_hydra_bus_status_to_can_state(const struct kvaser_usb_net_priv *priv, + u8 bus_status, +@@ -916,8 +969,10 @@ static void kvaser_usb_hydra_update_state(struct kvaser_usb_net_priv *priv, new_state < CAN_STATE_BUS_OFF) priv->can.can_stats.restarts++; @@ -180882,7 +218037,7 @@ index dcee8dc828ecc..45eb7e462ce93 100644 stats = &netdev->stats; stats->rx_packets++; -@@ -1071,8 +1073,10 @@ kvaser_usb_hydra_error_frame(struct kvaser_usb_net_priv *priv, +@@ -1071,8 +1126,10 @@ kvaser_usb_hydra_error_frame(struct kvaser_usb_net_priv *priv, shhwtstamps->hwtstamp = hwtstamp; cf->can_id |= CAN_ERR_BUSERROR; @@ -180895,7 +218050,261 @@ index dcee8dc828ecc..45eb7e462ce93 100644 stats->rx_packets++; stats->rx_bytes += cf->len; -@@ -1910,7 +1914,7 @@ static int kvaser_usb_hydra_flush_queue(struct kvaser_usb_net_priv *priv) +@@ -1121,6 +1178,7 @@ static void kvaser_usb_hydra_tx_acknowledge(const struct kvaser_usb *dev, + struct kvaser_usb_net_priv *priv; + unsigned long irq_flags; + bool one_shot_fail = false; ++ bool is_err_frame = false; + u16 transid = kvaser_usb_hydra_get_cmd_transid(cmd); + + priv = kvaser_usb_hydra_net_priv_from_cmd(dev, cmd); +@@ -1139,10 +1197,13 @@ static void kvaser_usb_hydra_tx_acknowledge(const struct kvaser_usb *dev, + kvaser_usb_hydra_one_shot_fail(priv, cmd_ext); + one_shot_fail = true; + } ++ ++ is_err_frame = flags & KVASER_USB_HYDRA_CF_FLAG_TX_ACK && ++ flags & KVASER_USB_HYDRA_CF_FLAG_ERROR_FRAME; + } + + context = &priv->tx_contexts[transid % dev->max_tx_urbs]; +- if (!one_shot_fail) { ++ if (!one_shot_fail && !is_err_frame) { + struct net_device_stats *stats = &priv->netdev->stats; + + stats->tx_packets++; +@@ -1316,6 +1377,10 @@ static void kvaser_usb_hydra_handle_cmd_std(const struct kvaser_usb *dev, + kvaser_usb_hydra_state_event(dev, cmd); + break; + ++ case CMD_GET_BUSPARAMS_RESP: ++ kvaser_usb_hydra_get_busparams_reply(dev, cmd); ++ break; ++ + case CMD_ERROR_EVENT: + kvaser_usb_hydra_error_event(dev, cmd); + break; +@@ -1516,15 +1581,61 @@ static int kvaser_usb_hydra_set_mode(struct net_device *netdev, + return err; + } + +-static int kvaser_usb_hydra_set_bittiming(struct net_device *netdev) ++static int kvaser_usb_hydra_get_busparams(struct kvaser_usb_net_priv *priv, ++ int busparams_type) ++{ ++ struct kvaser_usb *dev = priv->dev; ++ struct kvaser_usb_net_hydra_priv *hydra = priv->sub_priv; ++ struct kvaser_cmd *cmd; ++ size_t cmd_len; ++ int err; ++ ++ if (!hydra) ++ return -EINVAL; ++ ++ cmd = kcalloc(1, sizeof(struct kvaser_cmd), GFP_KERNEL); ++ if (!cmd) ++ return -ENOMEM; ++ ++ cmd->header.cmd_no = CMD_GET_BUSPARAMS_REQ; ++ cmd_len = kvaser_usb_hydra_cmd_size(cmd); ++ kvaser_usb_hydra_set_cmd_dest_he ++ (cmd, dev->card_data.hydra.channel_to_he[priv->channel]); ++ kvaser_usb_hydra_set_cmd_transid ++ (cmd, kvaser_usb_hydra_get_next_transid(dev)); ++ cmd->get_busparams_req.type = busparams_type; ++ hydra->pending_get_busparams_type = busparams_type; ++ ++ reinit_completion(&priv->get_busparams_comp); ++ ++ err = kvaser_usb_send_cmd(dev, cmd, cmd_len); ++ if (err) ++ return err; ++ ++ if (!wait_for_completion_timeout(&priv->get_busparams_comp, ++ msecs_to_jiffies(KVASER_USB_TIMEOUT))) ++ return -ETIMEDOUT; ++ ++ return err; ++} ++ ++static int kvaser_usb_hydra_get_nominal_busparams(struct kvaser_usb_net_priv *priv) ++{ ++ return kvaser_usb_hydra_get_busparams(priv, KVASER_USB_HYDRA_BUSPARAM_TYPE_CAN); ++} ++ ++static int kvaser_usb_hydra_get_data_busparams(struct kvaser_usb_net_priv *priv) ++{ ++ return kvaser_usb_hydra_get_busparams(priv, KVASER_USB_HYDRA_BUSPARAM_TYPE_CANFD); ++} ++ ++static int kvaser_usb_hydra_set_bittiming(const struct net_device *netdev, ++ const struct kvaser_usb_busparams *busparams) + { + struct kvaser_cmd *cmd; + struct kvaser_usb_net_priv *priv = netdev_priv(netdev); +- struct can_bittiming *bt = &priv->can.bittiming; + struct kvaser_usb *dev = priv->dev; +- int tseg1 = bt->prop_seg + bt->phase_seg1; +- int tseg2 = bt->phase_seg2; +- int sjw = bt->sjw; ++ size_t cmd_len; + int err; + + cmd = kcalloc(1, sizeof(struct kvaser_cmd), GFP_KERNEL); +@@ -1532,33 +1643,29 @@ static int kvaser_usb_hydra_set_bittiming(struct net_device *netdev) + return -ENOMEM; + + cmd->header.cmd_no = CMD_SET_BUSPARAMS_REQ; +- cmd->set_busparams_req.bitrate = cpu_to_le32(bt->bitrate); +- cmd->set_busparams_req.sjw = (u8)sjw; +- cmd->set_busparams_req.tseg1 = (u8)tseg1; +- cmd->set_busparams_req.tseg2 = (u8)tseg2; +- cmd->set_busparams_req.nsamples = 1; ++ cmd_len = kvaser_usb_hydra_cmd_size(cmd); ++ memcpy(&cmd->set_busparams_req.busparams_nominal, busparams, ++ sizeof(cmd->set_busparams_req.busparams_nominal)); + + kvaser_usb_hydra_set_cmd_dest_he + (cmd, dev->card_data.hydra.channel_to_he[priv->channel]); + kvaser_usb_hydra_set_cmd_transid + (cmd, kvaser_usb_hydra_get_next_transid(dev)); + +- err = kvaser_usb_send_cmd(dev, cmd, kvaser_usb_hydra_cmd_size(cmd)); ++ err = kvaser_usb_send_cmd(dev, cmd, cmd_len); + + kfree(cmd); + + return err; + } + +-static int kvaser_usb_hydra_set_data_bittiming(struct net_device *netdev) ++static int kvaser_usb_hydra_set_data_bittiming(const struct net_device *netdev, ++ const struct kvaser_usb_busparams *busparams) + { + struct kvaser_cmd *cmd; + struct kvaser_usb_net_priv *priv = netdev_priv(netdev); +- struct can_bittiming *dbt = &priv->can.data_bittiming; + struct kvaser_usb *dev = priv->dev; +- int tseg1 = dbt->prop_seg + dbt->phase_seg1; +- int tseg2 = dbt->phase_seg2; +- int sjw = dbt->sjw; ++ size_t cmd_len; + int err; + + cmd = kcalloc(1, sizeof(struct kvaser_cmd), GFP_KERNEL); +@@ -1566,11 +1673,9 @@ static int kvaser_usb_hydra_set_data_bittiming(struct net_device *netdev) + return -ENOMEM; + + cmd->header.cmd_no = CMD_SET_BUSPARAMS_FD_REQ; +- cmd->set_busparams_req.bitrate_d = cpu_to_le32(dbt->bitrate); +- cmd->set_busparams_req.sjw_d = (u8)sjw; +- cmd->set_busparams_req.tseg1_d = (u8)tseg1; +- cmd->set_busparams_req.tseg2_d = (u8)tseg2; +- cmd->set_busparams_req.nsamples_d = 1; ++ cmd_len = kvaser_usb_hydra_cmd_size(cmd); ++ memcpy(&cmd->set_busparams_req.busparams_data, busparams, ++ sizeof(cmd->set_busparams_req.busparams_data)); + + if (priv->can.ctrlmode & CAN_CTRLMODE_FD) { + if (priv->can.ctrlmode & CAN_CTRLMODE_FD_NON_ISO) +@@ -1586,7 +1691,7 @@ static int kvaser_usb_hydra_set_data_bittiming(struct net_device *netdev) + kvaser_usb_hydra_set_cmd_transid + (cmd, kvaser_usb_hydra_get_next_transid(dev)); + +- err = kvaser_usb_send_cmd(dev, cmd, kvaser_usb_hydra_cmd_size(cmd)); ++ err = kvaser_usb_send_cmd(dev, cmd, cmd_len); + + kfree(cmd); + +@@ -1677,6 +1782,19 @@ static int kvaser_usb_hydra_init_card(struct kvaser_usb *dev) + return 0; + } + ++static int kvaser_usb_hydra_init_channel(struct kvaser_usb_net_priv *priv) ++{ ++ struct kvaser_usb_net_hydra_priv *hydra; ++ ++ hydra = devm_kzalloc(&priv->dev->intf->dev, sizeof(*hydra), GFP_KERNEL); ++ if (!hydra) ++ return -ENOMEM; ++ ++ priv->sub_priv = hydra; ++ ++ return 0; ++} ++ + static int kvaser_usb_hydra_get_software_info(struct kvaser_usb *dev) + { + struct kvaser_cmd cmd; +@@ -1701,6 +1819,7 @@ static int kvaser_usb_hydra_get_software_info(struct kvaser_usb *dev) + static int kvaser_usb_hydra_get_software_details(struct kvaser_usb *dev) + { + struct kvaser_cmd *cmd; ++ size_t cmd_len; + int err; + u32 flags; + struct kvaser_usb_dev_card_data *card_data = &dev->card_data; +@@ -1710,6 +1829,7 @@ static int kvaser_usb_hydra_get_software_details(struct kvaser_usb *dev) + return -ENOMEM; + + cmd->header.cmd_no = CMD_GET_SOFTWARE_DETAILS_REQ; ++ cmd_len = kvaser_usb_hydra_cmd_size(cmd); + cmd->sw_detail_req.use_ext_cmd = 1; + kvaser_usb_hydra_set_cmd_dest_he + (cmd, KVASER_USB_HYDRA_HE_ADDRESS_ILLEGAL); +@@ -1717,7 +1837,7 @@ static int kvaser_usb_hydra_get_software_details(struct kvaser_usb *dev) + kvaser_usb_hydra_set_cmd_transid + (cmd, kvaser_usb_hydra_get_next_transid(dev)); + +- err = kvaser_usb_send_cmd(dev, cmd, kvaser_usb_hydra_cmd_size(cmd)); ++ err = kvaser_usb_send_cmd(dev, cmd, cmd_len); + if (err) + goto end; + +@@ -1835,6 +1955,7 @@ static int kvaser_usb_hydra_set_opt_mode(const struct kvaser_usb_net_priv *priv) + { + struct kvaser_usb *dev = priv->dev; + struct kvaser_cmd *cmd; ++ size_t cmd_len; + int err; + + if ((priv->can.ctrlmode & +@@ -1850,6 +1971,7 @@ static int kvaser_usb_hydra_set_opt_mode(const struct kvaser_usb_net_priv *priv) + return -ENOMEM; + + cmd->header.cmd_no = CMD_SET_DRIVERMODE_REQ; ++ cmd_len = kvaser_usb_hydra_cmd_size(cmd); + kvaser_usb_hydra_set_cmd_dest_he + (cmd, dev->card_data.hydra.channel_to_he[priv->channel]); + kvaser_usb_hydra_set_cmd_transid +@@ -1859,7 +1981,7 @@ static int kvaser_usb_hydra_set_opt_mode(const struct kvaser_usb_net_priv *priv) + else + cmd->set_ctrlmode.mode = KVASER_USB_HYDRA_CTRLMODE_NORMAL; + +- err = kvaser_usb_send_cmd(dev, cmd, kvaser_usb_hydra_cmd_size(cmd)); ++ err = kvaser_usb_send_cmd(dev, cmd, cmd_len); + kfree(cmd); + + return err; +@@ -1869,7 +1991,7 @@ static int kvaser_usb_hydra_start_chip(struct kvaser_usb_net_priv *priv) + { + int err; + +- init_completion(&priv->start_comp); ++ reinit_completion(&priv->start_comp); + + err = kvaser_usb_hydra_send_simple_cmd(priv->dev, CMD_START_CHIP_REQ, + priv->channel); +@@ -1887,7 +2009,7 @@ static int kvaser_usb_hydra_stop_chip(struct kvaser_usb_net_priv *priv) + { + int err; + +- init_completion(&priv->stop_comp); ++ reinit_completion(&priv->stop_comp); + + /* Make sure we do not report invalid BUS_OFF from CMD_CHIP_STATE_EVENT + * see comment in kvaser_usb_hydra_update_state() +@@ -1910,7 +2032,7 @@ static int kvaser_usb_hydra_flush_queue(struct kvaser_usb_net_priv *priv) { int err; @@ -180904,8 +218313,35 @@ index dcee8dc828ecc..45eb7e462ce93 100644 err = kvaser_usb_hydra_send_simple_cmd(priv->dev, CMD_FLUSH_QUEUE, priv->channel); -@@ -2052,7 +2056,7 @@ static const struct kvaser_usb_dev_cfg kvaser_usb_hydra_dev_cfg_flexc = { - .freq = 24000000, +@@ -2021,10 +2143,13 @@ kvaser_usb_hydra_frame_to_cmd(const struct kvaser_usb_net_priv *priv, + const struct kvaser_usb_dev_ops kvaser_usb_hydra_dev_ops = { + .dev_set_mode = kvaser_usb_hydra_set_mode, + .dev_set_bittiming = kvaser_usb_hydra_set_bittiming, ++ .dev_get_busparams = kvaser_usb_hydra_get_nominal_busparams, + .dev_set_data_bittiming = kvaser_usb_hydra_set_data_bittiming, ++ .dev_get_data_busparams = kvaser_usb_hydra_get_data_busparams, + .dev_get_berr_counter = kvaser_usb_hydra_get_berr_counter, + .dev_setup_endpoints = kvaser_usb_hydra_setup_endpoints, + .dev_init_card = kvaser_usb_hydra_init_card, ++ .dev_init_channel = kvaser_usb_hydra_init_channel, + .dev_get_software_info = kvaser_usb_hydra_get_software_info, + .dev_get_software_details = kvaser_usb_hydra_get_software_details, + .dev_get_card_info = kvaser_usb_hydra_get_card_info, +@@ -2040,7 +2165,7 @@ const struct kvaser_usb_dev_ops kvaser_usb_hydra_dev_ops = { + + static const struct kvaser_usb_dev_cfg kvaser_usb_hydra_dev_cfg_kcan = { + .clock = { +- .freq = 80000000, ++ .freq = 80 * MEGA /* Hz */, + }, + .timestamp_freq = 80, + .bittiming_const = &kvaser_usb_hydra_kcan_bittiming_c, +@@ -2049,15 +2174,15 @@ static const struct kvaser_usb_dev_cfg kvaser_usb_hydra_dev_cfg_kcan = { + + static const struct kvaser_usb_dev_cfg kvaser_usb_hydra_dev_cfg_flexc = { + .clock = { +- .freq = 24000000, ++ .freq = 24 * MEGA /* Hz */, }, .timestamp_freq = 1, - .bittiming_const = &kvaser_usb_hydra_flexc_bittiming_c, @@ -180913,11 +218349,27 @@ index dcee8dc828ecc..45eb7e462ce93 100644 }; static const struct kvaser_usb_dev_cfg kvaser_usb_hydra_dev_cfg_rt = { + .clock = { +- .freq = 80000000, ++ .freq = 80 * MEGA /* Hz */, + }, + .timestamp_freq = 24, + .bittiming_const = &kvaser_usb_hydra_rt_bittiming_c, diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c -index 59ba7c7beec00..4312be05fc5b6 100644 +index 59ba7c7beec00..ad3103391c793 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c -@@ -28,10 +28,6 @@ +@@ -19,7 +19,9 @@ + #include <linux/spinlock.h> + #include <linux/string.h> + #include <linux/types.h> ++#include <linux/units.h> + #include <linux/usb.h> ++#include <linux/workqueue.h> + + #include <linux/can.h> + #include <linux/can/dev.h> +@@ -28,10 +30,6 @@ #include "kvaser_usb.h" @@ -180928,7 +218380,27 @@ index 59ba7c7beec00..4312be05fc5b6 100644 #define MAX_USBCAN_NET_DEVICES 2 /* Command header size */ -@@ -80,6 +76,12 @@ static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg; +@@ -59,6 +57,9 @@ static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg; + #define CMD_RX_EXT_MESSAGE 14 + #define CMD_TX_EXT_MESSAGE 15 + #define CMD_SET_BUS_PARAMS 16 ++#define CMD_GET_BUS_PARAMS 17 ++#define CMD_GET_BUS_PARAMS_REPLY 18 ++#define CMD_GET_CHIP_STATE 19 + #define CMD_CHIP_STATE_EVENT 20 + #define CMD_SET_CTRL_MODE 21 + #define CMD_RESET_CHIP 24 +@@ -73,13 +74,24 @@ static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg; + #define CMD_GET_CARD_INFO_REPLY 35 + #define CMD_GET_SOFTWARE_INFO 38 + #define CMD_GET_SOFTWARE_INFO_REPLY 39 ++#define CMD_ERROR_EVENT 45 + #define CMD_FLUSH_QUEUE 48 + #define CMD_TX_ACKNOWLEDGE 50 + #define CMD_CAN_ERROR_EVENT 51 + #define CMD_FLUSH_QUEUE_REPLY 68 ++#define CMD_GET_CAPABILITIES_REQ 95 ++#define CMD_GET_CAPABILITIES_RESP 96 #define CMD_LEAF_LOG_MESSAGE 106 @@ -180937,11 +218409,13 @@ index 59ba7c7beec00..4312be05fc5b6 100644 +#define KVASER_USB_LEAF_SWOPTION_FREQ_16_MHZ_CLK 0 +#define KVASER_USB_LEAF_SWOPTION_FREQ_32_MHZ_CLK BIT(5) +#define KVASER_USB_LEAF_SWOPTION_FREQ_24_MHZ_CLK BIT(6) ++ ++#define KVASER_USB_LEAF_SWOPTION_EXT_CAP BIT(12) + /* error factors */ #define M16C_EF_ACKE BIT(0) #define M16C_EF_CRCE BIT(1) -@@ -98,16 +100,6 @@ static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg; +@@ -98,16 +110,6 @@ static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg; #define USBCAN_ERROR_STATE_RX_ERROR BIT(1) #define USBCAN_ERROR_STATE_BUSERROR BIT(2) @@ -180958,7 +218432,116 @@ index 59ba7c7beec00..4312be05fc5b6 100644 /* ctrl modes */ #define KVASER_CTRL_MODE_NORMAL 1 #define KVASER_CTRL_MODE_SILENT 2 -@@ -317,6 +309,38 @@ struct kvaser_cmd { +@@ -164,11 +166,7 @@ struct usbcan_cmd_softinfo { + struct kvaser_cmd_busparams { + u8 tid; + u8 channel; +- __le32 bitrate; +- u8 tseg1; +- u8 tseg2; +- u8 sjw; +- u8 no_samp; ++ struct kvaser_usb_busparams busparams; + } __packed; + + struct kvaser_cmd_tx_can { +@@ -237,7 +235,7 @@ struct kvaser_cmd_tx_acknowledge_header { + u8 tid; + } __packed; + +-struct leaf_cmd_error_event { ++struct leaf_cmd_can_error_event { + u8 tid; + u8 flags; + __le16 time[3]; +@@ -249,7 +247,7 @@ struct leaf_cmd_error_event { + u8 error_factor; + } __packed; + +-struct usbcan_cmd_error_event { ++struct usbcan_cmd_can_error_event { + u8 tid; + u8 padding; + u8 tx_errors_count_ch0; +@@ -261,6 +259,28 @@ struct usbcan_cmd_error_event { + __le16 time; + } __packed; + ++/* CMD_ERROR_EVENT error codes */ ++#define KVASER_USB_LEAF_ERROR_EVENT_TX_QUEUE_FULL 0x8 ++#define KVASER_USB_LEAF_ERROR_EVENT_PARAM 0x9 ++ ++struct leaf_cmd_error_event { ++ u8 tid; ++ u8 error_code; ++ __le16 timestamp[3]; ++ __le16 padding; ++ __le16 info1; ++ __le16 info2; ++} __packed; ++ ++struct usbcan_cmd_error_event { ++ u8 tid; ++ u8 error_code; ++ __le16 info1; ++ __le16 info2; ++ __le16 timestamp; ++ __le16 padding; ++} __packed; ++ + struct kvaser_cmd_ctrl_mode { + u8 tid; + u8 channel; +@@ -285,6 +305,28 @@ struct leaf_cmd_log_message { + u8 data[8]; + } __packed; + ++/* Sub commands for cap_req and cap_res */ ++#define KVASER_USB_LEAF_CAP_CMD_LISTEN_MODE 0x02 ++#define KVASER_USB_LEAF_CAP_CMD_ERR_REPORT 0x05 ++struct kvaser_cmd_cap_req { ++ __le16 padding0; ++ __le16 cap_cmd; ++ __le16 padding1; ++ __le16 channel; ++} __packed; ++ ++/* Status codes for cap_res */ ++#define KVASER_USB_LEAF_CAP_STAT_OK 0x00 ++#define KVASER_USB_LEAF_CAP_STAT_NOT_IMPL 0x01 ++#define KVASER_USB_LEAF_CAP_STAT_UNAVAIL 0x02 ++struct kvaser_cmd_cap_res { ++ __le16 padding; ++ __le16 cap_cmd; ++ __le16 status; ++ __le32 mask; ++ __le32 value; ++} __packed; ++ + struct kvaser_cmd { + u8 len; + u8 id; +@@ -300,14 +342,18 @@ struct kvaser_cmd { + struct leaf_cmd_softinfo softinfo; + struct leaf_cmd_rx_can rx_can; + struct leaf_cmd_chip_state_event chip_state_event; +- struct leaf_cmd_error_event error_event; ++ struct leaf_cmd_can_error_event can_error_event; + struct leaf_cmd_log_message log_message; ++ struct leaf_cmd_error_event error_event; ++ struct kvaser_cmd_cap_req cap_req; ++ struct kvaser_cmd_cap_res cap_res; + } __packed leaf; + + union { + struct usbcan_cmd_softinfo softinfo; + struct usbcan_cmd_rx_can rx_can; + struct usbcan_cmd_chip_state_event chip_state_event; ++ struct usbcan_cmd_can_error_event can_error_event; + struct usbcan_cmd_error_event error_event; + } __packed usbcan; + +@@ -317,6 +363,42 @@ struct kvaser_cmd { } u; } __packed; @@ -180975,7 +218558,10 @@ index 59ba7c7beec00..4312be05fc5b6 100644 + [CMD_RX_EXT_MESSAGE] = kvaser_fsize(u.leaf.rx_can), + [CMD_LEAF_LOG_MESSAGE] = kvaser_fsize(u.leaf.log_message), + [CMD_CHIP_STATE_EVENT] = kvaser_fsize(u.leaf.chip_state_event), -+ [CMD_CAN_ERROR_EVENT] = kvaser_fsize(u.leaf.error_event), ++ [CMD_CAN_ERROR_EVENT] = kvaser_fsize(u.leaf.can_error_event), ++ [CMD_GET_CAPABILITIES_RESP] = kvaser_fsize(u.leaf.cap_res), ++ [CMD_GET_BUS_PARAMS_REPLY] = kvaser_fsize(u.busparams), ++ [CMD_ERROR_EVENT] = kvaser_fsize(u.leaf.error_event), + /* ignored events: */ + [CMD_FLUSH_QUEUE_REPLY] = CMD_SIZE_ANY, +}; @@ -180989,7 +218575,8 @@ index 59ba7c7beec00..4312be05fc5b6 100644 + [CMD_RX_STD_MESSAGE] = kvaser_fsize(u.usbcan.rx_can), + [CMD_RX_EXT_MESSAGE] = kvaser_fsize(u.usbcan.rx_can), + [CMD_CHIP_STATE_EVENT] = kvaser_fsize(u.usbcan.chip_state_event), -+ [CMD_CAN_ERROR_EVENT] = kvaser_fsize(u.usbcan.error_event), ++ [CMD_CAN_ERROR_EVENT] = kvaser_fsize(u.usbcan.can_error_event), ++ [CMD_ERROR_EVENT] = kvaser_fsize(u.usbcan.error_event), + /* ignored events: */ + [CMD_USBCAN_CLOCK_OVERFLOW_EVENT] = CMD_SIZE_ANY, +}; @@ -180997,10 +218584,16 @@ index 59ba7c7beec00..4312be05fc5b6 100644 /* Summary of a kvaser error event, for a unified Leaf/Usbcan error * handling. Some discrepancies between the two families exist: * -@@ -340,6 +364,107 @@ struct kvaser_usb_err_summary { +@@ -340,6 +422,113 @@ struct kvaser_usb_err_summary { }; }; ++struct kvaser_usb_net_leaf_priv { ++ struct kvaser_usb_net_priv *net; ++ ++ struct delayed_work chip_state_req_work; ++}; ++ +static const struct can_bittiming_const kvaser_usb_leaf_m16c_bittiming_const = { + .name = "kvaser_usb_ucii", + .tseg1_min = 4, @@ -181027,7 +218620,7 @@ index 59ba7c7beec00..4312be05fc5b6 100644 + +static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_usbcan_dev_cfg = { + .clock = { -+ .freq = 8000000, ++ .freq = 8 * MEGA /* Hz */, + }, + .timestamp_freq = 1, + .bittiming_const = &kvaser_usb_leaf_m16c_bittiming_const, @@ -181035,7 +218628,7 @@ index 59ba7c7beec00..4312be05fc5b6 100644 + +static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_m32c_dev_cfg = { + .clock = { -+ .freq = 16000000, ++ .freq = 16 * MEGA /* Hz */, + }, + .timestamp_freq = 1, + .bittiming_const = &kvaser_usb_leaf_m32c_bittiming_const, @@ -181051,7 +218644,7 @@ index 59ba7c7beec00..4312be05fc5b6 100644 + +static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_imx_dev_cfg_24mhz = { + .clock = { -+ .freq = 24000000, ++ .freq = 24 * MEGA /* Hz */, + }, + .timestamp_freq = 1, + .bittiming_const = &kvaser_usb_flexc_bittiming_const, @@ -181059,7 +218652,7 @@ index 59ba7c7beec00..4312be05fc5b6 100644 + +static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_imx_dev_cfg_32mhz = { + .clock = { -+ .freq = 32000000, ++ .freq = 32 * MEGA /* Hz */, + }, + .timestamp_freq = 1, + .bittiming_const = &kvaser_usb_flexc_bittiming_const, @@ -181105,7 +218698,7 @@ index 59ba7c7beec00..4312be05fc5b6 100644 static void * kvaser_usb_leaf_frame_to_cmd(const struct kvaser_usb_net_priv *priv, const struct sk_buff *skb, int *frame_len, -@@ -359,7 +484,7 @@ kvaser_usb_leaf_frame_to_cmd(const struct kvaser_usb_net_priv *priv, +@@ -359,7 +548,7 @@ kvaser_usb_leaf_frame_to_cmd(const struct kvaser_usb_net_priv *priv, sizeof(struct kvaser_cmd_tx_can); cmd->u.tx_can.channel = priv->channel; @@ -181114,7 +218707,7 @@ index 59ba7c7beec00..4312be05fc5b6 100644 case KVASER_LEAF: cmd_tx_can_flags = &cmd->u.tx_can.leaf.flags; break; -@@ -447,6 +572,9 @@ static int kvaser_usb_leaf_wait_cmd(const struct kvaser_usb *dev, u8 id, +@@ -447,6 +636,9 @@ static int kvaser_usb_leaf_wait_cmd(const struct kvaser_usb *dev, u8 id, end: kfree(buf); @@ -181124,7 +218717,7 @@ index 59ba7c7beec00..4312be05fc5b6 100644 return err; } -@@ -471,6 +599,34 @@ static int kvaser_usb_leaf_send_simple_cmd(const struct kvaser_usb *dev, +@@ -471,6 +663,37 @@ static int kvaser_usb_leaf_send_simple_cmd(const struct kvaser_usb *dev, return rc; } @@ -181136,6 +218729,9 @@ index 59ba7c7beec00..4312be05fc5b6 100644 + dev->fw_version = le32_to_cpu(softinfo->fw_version); + dev->max_tx_urbs = le16_to_cpu(softinfo->max_outstanding_tx); + ++ if (sw_options & KVASER_USB_LEAF_SWOPTION_EXT_CAP) ++ dev->card_data.capabilities |= KVASER_USB_CAP_EXT_CAP; ++ + if (dev->driver_info->quirks & KVASER_USB_QUIRK_IGNORE_CLK_FREQ) { + /* Firmware expects bittiming parameters calculated for 16MHz + * clock, regardless of the actual clock @@ -181159,7 +218755,7 @@ index 59ba7c7beec00..4312be05fc5b6 100644 static int kvaser_usb_leaf_get_software_info_inner(struct kvaser_usb *dev) { struct kvaser_cmd cmd; -@@ -484,16 +640,15 @@ static int kvaser_usb_leaf_get_software_info_inner(struct kvaser_usb *dev) +@@ -484,16 +707,15 @@ static int kvaser_usb_leaf_get_software_info_inner(struct kvaser_usb *dev) if (err) return err; @@ -181179,7 +218775,7 @@ index 59ba7c7beec00..4312be05fc5b6 100644 break; } -@@ -532,7 +687,7 @@ static int kvaser_usb_leaf_get_card_info(struct kvaser_usb *dev) +@@ -532,13 +754,123 @@ static int kvaser_usb_leaf_get_card_info(struct kvaser_usb *dev) dev->nchannels = cmd.u.cardinfo.nchannels; if (dev->nchannels > KVASER_USB_MAX_NET_DEVICES || @@ -181188,7 +218784,183 @@ index 59ba7c7beec00..4312be05fc5b6 100644 dev->nchannels > MAX_USBCAN_NET_DEVICES)) return -EINVAL; -@@ -668,7 +823,7 @@ kvaser_usb_leaf_rx_error_update_can_state(struct kvaser_usb_net_priv *priv, + return 0; + } + ++static int kvaser_usb_leaf_get_single_capability(struct kvaser_usb *dev, ++ u16 cap_cmd_req, u16 *status) ++{ ++ struct kvaser_usb_dev_card_data *card_data = &dev->card_data; ++ struct kvaser_cmd *cmd; ++ u32 value = 0; ++ u32 mask = 0; ++ u16 cap_cmd_res; ++ int err; ++ int i; ++ ++ cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); ++ if (!cmd) ++ return -ENOMEM; ++ ++ cmd->id = CMD_GET_CAPABILITIES_REQ; ++ cmd->u.leaf.cap_req.cap_cmd = cpu_to_le16(cap_cmd_req); ++ cmd->len = CMD_HEADER_LEN + sizeof(struct kvaser_cmd_cap_req); ++ ++ err = kvaser_usb_send_cmd(dev, cmd, cmd->len); ++ if (err) ++ goto end; ++ ++ err = kvaser_usb_leaf_wait_cmd(dev, CMD_GET_CAPABILITIES_RESP, cmd); ++ if (err) ++ goto end; ++ ++ *status = le16_to_cpu(cmd->u.leaf.cap_res.status); ++ ++ if (*status != KVASER_USB_LEAF_CAP_STAT_OK) ++ goto end; ++ ++ cap_cmd_res = le16_to_cpu(cmd->u.leaf.cap_res.cap_cmd); ++ switch (cap_cmd_res) { ++ case KVASER_USB_LEAF_CAP_CMD_LISTEN_MODE: ++ case KVASER_USB_LEAF_CAP_CMD_ERR_REPORT: ++ value = le32_to_cpu(cmd->u.leaf.cap_res.value); ++ mask = le32_to_cpu(cmd->u.leaf.cap_res.mask); ++ break; ++ default: ++ dev_warn(&dev->intf->dev, "Unknown capability command %u\n", ++ cap_cmd_res); ++ break; ++ } ++ ++ for (i = 0; i < dev->nchannels; i++) { ++ if (BIT(i) & (value & mask)) { ++ switch (cap_cmd_res) { ++ case KVASER_USB_LEAF_CAP_CMD_LISTEN_MODE: ++ card_data->ctrlmode_supported |= ++ CAN_CTRLMODE_LISTENONLY; ++ break; ++ case KVASER_USB_LEAF_CAP_CMD_ERR_REPORT: ++ card_data->capabilities |= ++ KVASER_USB_CAP_BERR_CAP; ++ break; ++ } ++ } ++ } ++ ++end: ++ kfree(cmd); ++ ++ return err; ++} ++ ++static int kvaser_usb_leaf_get_capabilities_leaf(struct kvaser_usb *dev) ++{ ++ int err; ++ u16 status; ++ ++ if (!(dev->card_data.capabilities & KVASER_USB_CAP_EXT_CAP)) { ++ dev_info(&dev->intf->dev, ++ "No extended capability support. Upgrade device firmware.\n"); ++ return 0; ++ } ++ ++ err = kvaser_usb_leaf_get_single_capability(dev, ++ KVASER_USB_LEAF_CAP_CMD_LISTEN_MODE, ++ &status); ++ if (err) ++ return err; ++ if (status) ++ dev_info(&dev->intf->dev, ++ "KVASER_USB_LEAF_CAP_CMD_LISTEN_MODE failed %u\n", ++ status); ++ ++ err = kvaser_usb_leaf_get_single_capability(dev, ++ KVASER_USB_LEAF_CAP_CMD_ERR_REPORT, ++ &status); ++ if (err) ++ return err; ++ if (status) ++ dev_info(&dev->intf->dev, ++ "KVASER_USB_LEAF_CAP_CMD_ERR_REPORT failed %u\n", ++ status); ++ ++ return 0; ++} ++ ++static int kvaser_usb_leaf_get_capabilities(struct kvaser_usb *dev) ++{ ++ int err = 0; ++ ++ if (dev->driver_info->family == KVASER_LEAF) ++ err = kvaser_usb_leaf_get_capabilities_leaf(dev); ++ ++ return err; ++} ++ + static void kvaser_usb_leaf_tx_acknowledge(const struct kvaser_usb *dev, + const struct kvaser_cmd *cmd) + { +@@ -567,7 +899,7 @@ static void kvaser_usb_leaf_tx_acknowledge(const struct kvaser_usb *dev, + context = &priv->tx_contexts[tid % dev->max_tx_urbs]; + + /* Sometimes the state change doesn't come after a bus-off event */ +- if (priv->can.restart_ms && priv->can.state >= CAN_STATE_BUS_OFF) { ++ if (priv->can.restart_ms && priv->can.state == CAN_STATE_BUS_OFF) { + struct sk_buff *skb; + struct can_frame *cf; + +@@ -623,6 +955,16 @@ static int kvaser_usb_leaf_simple_cmd_async(struct kvaser_usb_net_priv *priv, + return err; + } + ++static void kvaser_usb_leaf_chip_state_req_work(struct work_struct *work) ++{ ++ struct kvaser_usb_net_leaf_priv *leaf = ++ container_of(work, struct kvaser_usb_net_leaf_priv, ++ chip_state_req_work.work); ++ struct kvaser_usb_net_priv *priv = leaf->net; ++ ++ kvaser_usb_leaf_simple_cmd_async(priv, CMD_GET_CHIP_STATE); ++} ++ + static void + kvaser_usb_leaf_rx_error_update_can_state(struct kvaser_usb_net_priv *priv, + const struct kvaser_usb_err_summary *es, +@@ -641,20 +983,16 @@ kvaser_usb_leaf_rx_error_update_can_state(struct kvaser_usb_net_priv *priv, + new_state = CAN_STATE_BUS_OFF; + } else if (es->status & M16C_STATE_BUS_PASSIVE) { + new_state = CAN_STATE_ERROR_PASSIVE; +- } else if (es->status & M16C_STATE_BUS_ERROR) { ++ } else if ((es->status & M16C_STATE_BUS_ERROR) && ++ cur_state >= CAN_STATE_BUS_OFF) { + /* Guard against spurious error events after a busoff */ +- if (cur_state < CAN_STATE_BUS_OFF) { +- if (es->txerr >= 128 || es->rxerr >= 128) +- new_state = CAN_STATE_ERROR_PASSIVE; +- else if (es->txerr >= 96 || es->rxerr >= 96) +- new_state = CAN_STATE_ERROR_WARNING; +- else if (cur_state > CAN_STATE_ERROR_ACTIVE) +- new_state = CAN_STATE_ERROR_ACTIVE; +- } +- } +- +- if (!es->status) ++ } else if (es->txerr >= 128 || es->rxerr >= 128) { ++ new_state = CAN_STATE_ERROR_PASSIVE; ++ } else if (es->txerr >= 96 || es->rxerr >= 96) { ++ new_state = CAN_STATE_ERROR_WARNING; ++ } else { + new_state = CAN_STATE_ERROR_ACTIVE; ++ } + + if (new_state != cur_state) { + tx_state = (es->txerr >= es->rxerr) ? new_state : 0; +@@ -664,11 +1002,11 @@ kvaser_usb_leaf_rx_error_update_can_state(struct kvaser_usb_net_priv *priv, + } + + if (priv->can.restart_ms && +- cur_state >= CAN_STATE_BUS_OFF && ++ cur_state == CAN_STATE_BUS_OFF && new_state < CAN_STATE_BUS_OFF) priv->can.can_stats.restarts++; @@ -181197,7 +218969,52 @@ index 59ba7c7beec00..4312be05fc5b6 100644 case KVASER_LEAF: if (es->leaf.error_factor) { priv->can.can_stats.bus_error++; -@@ -747,7 +902,7 @@ static void kvaser_usb_leaf_rx_error(const struct kvaser_usb *dev, +@@ -698,6 +1036,7 @@ static void kvaser_usb_leaf_rx_error(const struct kvaser_usb *dev, + struct sk_buff *skb; + struct net_device_stats *stats; + struct kvaser_usb_net_priv *priv; ++ struct kvaser_usb_net_leaf_priv *leaf; + enum can_state old_state, new_state; + + if (es->channel >= dev->nchannels) { +@@ -707,8 +1046,13 @@ static void kvaser_usb_leaf_rx_error(const struct kvaser_usb *dev, + } + + priv = dev->nets[es->channel]; ++ leaf = priv->sub_priv; + stats = &priv->netdev->stats; + ++ /* Ignore e.g. state change to bus-off reported just after stopping */ ++ if (!netif_running(priv->netdev)) ++ return; ++ + /* Update all of the CAN interface's state and error counters before + * trying any memory allocation that can actually fail with -ENOMEM. + * +@@ -723,6 +1067,14 @@ static void kvaser_usb_leaf_rx_error(const struct kvaser_usb *dev, + kvaser_usb_leaf_rx_error_update_can_state(priv, es, &tmp_cf); + new_state = priv->can.state; + ++ /* If there are errors, request status updates periodically as we do ++ * not get automatic notifications of improved state. ++ */ ++ if (new_state < CAN_STATE_BUS_OFF && ++ (es->rxerr || es->txerr || new_state == CAN_STATE_ERROR_PASSIVE)) ++ schedule_delayed_work(&leaf->chip_state_req_work, ++ msecs_to_jiffies(500)); ++ + skb = alloc_can_err_skb(priv->netdev, &cf); + if (!skb) { + stats->rx_dropped++; +@@ -740,14 +1092,14 @@ static void kvaser_usb_leaf_rx_error(const struct kvaser_usb *dev, + } + + if (priv->can.restart_ms && +- old_state >= CAN_STATE_BUS_OFF && ++ old_state == CAN_STATE_BUS_OFF && + new_state < CAN_STATE_BUS_OFF) { + cf->can_id |= CAN_ERR_RESTARTED; + netif_carrier_on(priv->netdev); } } @@ -181206,7 +219023,7 @@ index 59ba7c7beec00..4312be05fc5b6 100644 case KVASER_LEAF: if (es->leaf.error_factor) { cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT; -@@ -774,8 +929,10 @@ static void kvaser_usb_leaf_rx_error(const struct kvaser_usb *dev, +@@ -774,8 +1126,10 @@ static void kvaser_usb_leaf_rx_error(const struct kvaser_usb *dev, break; } @@ -181219,7 +219036,58 @@ index 59ba7c7beec00..4312be05fc5b6 100644 stats->rx_packets++; stats->rx_bytes += cf->len; -@@ -939,7 +1096,7 @@ static void kvaser_usb_leaf_rx_can_msg(const struct kvaser_usb *dev, +@@ -838,11 +1192,11 @@ static void kvaser_usb_leaf_usbcan_rx_error(const struct kvaser_usb *dev, + + case CMD_CAN_ERROR_EVENT: + es.channel = 0; +- es.status = cmd->u.usbcan.error_event.status_ch0; +- es.txerr = cmd->u.usbcan.error_event.tx_errors_count_ch0; +- es.rxerr = cmd->u.usbcan.error_event.rx_errors_count_ch0; ++ es.status = cmd->u.usbcan.can_error_event.status_ch0; ++ es.txerr = cmd->u.usbcan.can_error_event.tx_errors_count_ch0; ++ es.rxerr = cmd->u.usbcan.can_error_event.rx_errors_count_ch0; + es.usbcan.other_ch_status = +- cmd->u.usbcan.error_event.status_ch1; ++ cmd->u.usbcan.can_error_event.status_ch1; + kvaser_usb_leaf_usbcan_conditionally_rx_error(dev, &es); + + /* The USBCAN firmware supports up to 2 channels. +@@ -850,13 +1204,13 @@ static void kvaser_usb_leaf_usbcan_rx_error(const struct kvaser_usb *dev, + */ + if (dev->nchannels == MAX_USBCAN_NET_DEVICES) { + es.channel = 1; +- es.status = cmd->u.usbcan.error_event.status_ch1; ++ es.status = cmd->u.usbcan.can_error_event.status_ch1; + es.txerr = +- cmd->u.usbcan.error_event.tx_errors_count_ch1; ++ cmd->u.usbcan.can_error_event.tx_errors_count_ch1; + es.rxerr = +- cmd->u.usbcan.error_event.rx_errors_count_ch1; ++ cmd->u.usbcan.can_error_event.rx_errors_count_ch1; + es.usbcan.other_ch_status = +- cmd->u.usbcan.error_event.status_ch0; ++ cmd->u.usbcan.can_error_event.status_ch0; + kvaser_usb_leaf_usbcan_conditionally_rx_error(dev, &es); + } + break; +@@ -873,11 +1227,11 @@ static void kvaser_usb_leaf_leaf_rx_error(const struct kvaser_usb *dev, + + switch (cmd->id) { + case CMD_CAN_ERROR_EVENT: +- es.channel = cmd->u.leaf.error_event.channel; +- es.status = cmd->u.leaf.error_event.status; +- es.txerr = cmd->u.leaf.error_event.tx_errors_count; +- es.rxerr = cmd->u.leaf.error_event.rx_errors_count; +- es.leaf.error_factor = cmd->u.leaf.error_event.error_factor; ++ es.channel = cmd->u.leaf.can_error_event.channel; ++ es.status = cmd->u.leaf.can_error_event.status; ++ es.txerr = cmd->u.leaf.can_error_event.tx_errors_count; ++ es.rxerr = cmd->u.leaf.can_error_event.rx_errors_count; ++ es.leaf.error_factor = cmd->u.leaf.can_error_event.error_factor; + break; + case CMD_LEAF_LOG_MESSAGE: + es.channel = cmd->u.leaf.log_message.channel; +@@ -939,7 +1293,7 @@ static void kvaser_usb_leaf_rx_can_msg(const struct kvaser_usb *dev, stats = &priv->netdev->stats; if ((cmd->u.rx_can_header.flag & MSG_FLAG_ERROR_FRAME) && @@ -181228,7 +219096,7 @@ index 59ba7c7beec00..4312be05fc5b6 100644 cmd->id == CMD_LEAF_LOG_MESSAGE)) { kvaser_usb_leaf_leaf_rx_error(dev, cmd); return; -@@ -955,7 +1112,7 @@ static void kvaser_usb_leaf_rx_can_msg(const struct kvaser_usb *dev, +@@ -955,7 +1309,7 @@ static void kvaser_usb_leaf_rx_can_msg(const struct kvaser_usb *dev, return; } @@ -181237,7 +219105,7 @@ index 59ba7c7beec00..4312be05fc5b6 100644 case KVASER_LEAF: rx_data = cmd->u.leaf.rx_can.data; break; -@@ -970,7 +1127,7 @@ static void kvaser_usb_leaf_rx_can_msg(const struct kvaser_usb *dev, +@@ -970,7 +1324,7 @@ static void kvaser_usb_leaf_rx_can_msg(const struct kvaser_usb *dev, return; } @@ -181246,7 +219114,104 @@ index 59ba7c7beec00..4312be05fc5b6 100644 CMD_LEAF_LOG_MESSAGE) { cf->can_id = le32_to_cpu(cmd->u.leaf.log_message.id); if (cf->can_id & KVASER_EXTENDED_FRAME) -@@ -1052,6 +1209,9 @@ static void kvaser_usb_leaf_stop_chip_reply(const struct kvaser_usb *dev, +@@ -1009,6 +1363,74 @@ static void kvaser_usb_leaf_rx_can_msg(const struct kvaser_usb *dev, + netif_rx(skb); + } + ++static void kvaser_usb_leaf_error_event_parameter(const struct kvaser_usb *dev, ++ const struct kvaser_cmd *cmd) ++{ ++ u16 info1 = 0; ++ ++ switch (dev->driver_info->family) { ++ case KVASER_LEAF: ++ info1 = le16_to_cpu(cmd->u.leaf.error_event.info1); ++ break; ++ case KVASER_USBCAN: ++ info1 = le16_to_cpu(cmd->u.usbcan.error_event.info1); ++ break; ++ } ++ ++ /* info1 will contain the offending cmd_no */ ++ switch (info1) { ++ case CMD_SET_CTRL_MODE: ++ dev_warn(&dev->intf->dev, ++ "CMD_SET_CTRL_MODE error in parameter\n"); ++ break; ++ ++ case CMD_SET_BUS_PARAMS: ++ dev_warn(&dev->intf->dev, ++ "CMD_SET_BUS_PARAMS error in parameter\n"); ++ break; ++ ++ default: ++ dev_warn(&dev->intf->dev, ++ "Unhandled parameter error event cmd_no (%u)\n", ++ info1); ++ break; ++ } ++} ++ ++static void kvaser_usb_leaf_error_event(const struct kvaser_usb *dev, ++ const struct kvaser_cmd *cmd) ++{ ++ u8 error_code = 0; ++ ++ switch (dev->driver_info->family) { ++ case KVASER_LEAF: ++ error_code = cmd->u.leaf.error_event.error_code; ++ break; ++ case KVASER_USBCAN: ++ error_code = cmd->u.usbcan.error_event.error_code; ++ break; ++ } ++ ++ switch (error_code) { ++ case KVASER_USB_LEAF_ERROR_EVENT_TX_QUEUE_FULL: ++ /* Received additional CAN message, when firmware TX queue is ++ * already full. Something is wrong with the driver. ++ * This should never happen! ++ */ ++ dev_err(&dev->intf->dev, ++ "Received error event TX_QUEUE_FULL\n"); ++ break; ++ case KVASER_USB_LEAF_ERROR_EVENT_PARAM: ++ kvaser_usb_leaf_error_event_parameter(dev, cmd); ++ break; ++ ++ default: ++ dev_warn(&dev->intf->dev, ++ "Unhandled error event (%d)\n", error_code); ++ break; ++ } ++} ++ + static void kvaser_usb_leaf_start_chip_reply(const struct kvaser_usb *dev, + const struct kvaser_cmd *cmd) + { +@@ -1049,9 +1471,31 @@ static void kvaser_usb_leaf_stop_chip_reply(const struct kvaser_usb *dev, + complete(&priv->stop_comp); + } + ++static void kvaser_usb_leaf_get_busparams_reply(const struct kvaser_usb *dev, ++ const struct kvaser_cmd *cmd) ++{ ++ struct kvaser_usb_net_priv *priv; ++ u8 channel = cmd->u.busparams.channel; ++ ++ if (channel >= dev->nchannels) { ++ dev_err(&dev->intf->dev, ++ "Invalid channel number (%d)\n", channel); ++ return; ++ } ++ ++ priv = dev->nets[channel]; ++ memcpy(&priv->busparams_nominal, &cmd->u.busparams.busparams, ++ sizeof(priv->busparams_nominal)); ++ ++ complete(&priv->get_busparams_comp); ++} ++ static void kvaser_usb_leaf_handle_command(const struct kvaser_usb *dev, const struct kvaser_cmd *cmd) { @@ -181256,7 +219221,7 @@ index 59ba7c7beec00..4312be05fc5b6 100644 switch (cmd->id) { case CMD_START_CHIP_REPLY: kvaser_usb_leaf_start_chip_reply(dev, cmd); -@@ -1067,14 +1227,14 @@ static void kvaser_usb_leaf_handle_command(const struct kvaser_usb *dev, +@@ -1067,14 +1511,14 @@ static void kvaser_usb_leaf_handle_command(const struct kvaser_usb *dev, break; case CMD_LEAF_LOG_MESSAGE: @@ -181273,8 +219238,18 @@ index 59ba7c7beec00..4312be05fc5b6 100644 kvaser_usb_leaf_leaf_rx_error(dev, cmd); else kvaser_usb_leaf_usbcan_rx_error(dev, cmd); -@@ -1086,12 +1246,12 @@ static void kvaser_usb_leaf_handle_command(const struct kvaser_usb *dev, +@@ -1084,14 +1528,22 @@ static void kvaser_usb_leaf_handle_command(const struct kvaser_usb *dev, + kvaser_usb_leaf_tx_acknowledge(dev, cmd); + break; ++ case CMD_ERROR_EVENT: ++ kvaser_usb_leaf_error_event(dev, cmd); ++ break; ++ ++ case CMD_GET_BUS_PARAMS_REPLY: ++ kvaser_usb_leaf_get_busparams_reply(dev, cmd); ++ break; ++ /* Ignored commands */ case CMD_USBCAN_CLOCK_OVERFLOW_EVENT: - if (dev->card_data.leaf.family != KVASER_USBCAN) @@ -181288,7 +219263,30 @@ index 59ba7c7beec00..4312be05fc5b6 100644 goto warn; break; -@@ -1225,24 +1385,11 @@ static int kvaser_usb_leaf_init_card(struct kvaser_usb *dev) +@@ -1164,7 +1616,7 @@ static int kvaser_usb_leaf_start_chip(struct kvaser_usb_net_priv *priv) + { + int err; + +- init_completion(&priv->start_comp); ++ reinit_completion(&priv->start_comp); + + err = kvaser_usb_leaf_send_simple_cmd(priv->dev, CMD_START_CHIP, + priv->channel); +@@ -1180,9 +1632,12 @@ static int kvaser_usb_leaf_start_chip(struct kvaser_usb_net_priv *priv) + + static int kvaser_usb_leaf_stop_chip(struct kvaser_usb_net_priv *priv) + { ++ struct kvaser_usb_net_leaf_priv *leaf = priv->sub_priv; + int err; + +- init_completion(&priv->stop_comp); ++ reinit_completion(&priv->stop_comp); ++ ++ cancel_delayed_work(&leaf->chip_state_req_work); + + err = kvaser_usb_leaf_send_simple_cmd(priv->dev, CMD_STOP_CHIP, + priv->channel); +@@ -1225,28 +1680,40 @@ static int kvaser_usb_leaf_init_card(struct kvaser_usb *dev) { struct kvaser_usb_dev_card_data *card_data = &dev->card_data; @@ -181309,11 +219307,87 @@ index 59ba7c7beec00..4312be05fc5b6 100644 - .brp_max = KVASER_USB_BRP_MAX, - .brp_inc = KVASER_USB_BRP_INC, -}; -- - static int kvaser_usb_leaf_set_bittiming(struct net_device *netdev) ++static int kvaser_usb_leaf_init_channel(struct kvaser_usb_net_priv *priv) ++{ ++ struct kvaser_usb_net_leaf_priv *leaf; ++ ++ leaf = devm_kzalloc(&priv->dev->intf->dev, sizeof(*leaf), GFP_KERNEL); ++ if (!leaf) ++ return -ENOMEM; ++ ++ leaf->net = priv; ++ INIT_DELAYED_WORK(&leaf->chip_state_req_work, ++ kvaser_usb_leaf_chip_state_req_work); ++ ++ priv->sub_priv = leaf; ++ ++ return 0; ++} ++ ++static void kvaser_usb_leaf_remove_channel(struct kvaser_usb_net_priv *priv) ++{ ++ struct kvaser_usb_net_leaf_priv *leaf = priv->sub_priv; ++ ++ if (leaf) ++ cancel_delayed_work_sync(&leaf->chip_state_req_work); ++} + +-static int kvaser_usb_leaf_set_bittiming(struct net_device *netdev) ++static int kvaser_usb_leaf_set_bittiming(const struct net_device *netdev, ++ const struct kvaser_usb_busparams *busparams) { struct kvaser_usb_net_priv *priv = netdev_priv(netdev); -@@ -1283,9 +1430,13 @@ static int kvaser_usb_leaf_set_mode(struct net_device *netdev, +- struct can_bittiming *bt = &priv->can.bittiming; + struct kvaser_usb *dev = priv->dev; + struct kvaser_cmd *cmd; + int rc; +@@ -1259,15 +1726,8 @@ static int kvaser_usb_leaf_set_bittiming(struct net_device *netdev) + cmd->len = CMD_HEADER_LEN + sizeof(struct kvaser_cmd_busparams); + cmd->u.busparams.channel = priv->channel; + cmd->u.busparams.tid = 0xff; +- cmd->u.busparams.bitrate = cpu_to_le32(bt->bitrate); +- cmd->u.busparams.sjw = bt->sjw; +- cmd->u.busparams.tseg1 = bt->prop_seg + bt->phase_seg1; +- cmd->u.busparams.tseg2 = bt->phase_seg2; +- +- if (priv->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES) +- cmd->u.busparams.no_samp = 3; +- else +- cmd->u.busparams.no_samp = 1; ++ memcpy(&cmd->u.busparams.busparams, busparams, ++ sizeof(cmd->u.busparams.busparams)); + + rc = kvaser_usb_send_cmd(dev, cmd, cmd->len); + +@@ -1275,6 +1735,27 @@ static int kvaser_usb_leaf_set_bittiming(struct net_device *netdev) + return rc; + } + ++static int kvaser_usb_leaf_get_busparams(struct kvaser_usb_net_priv *priv) ++{ ++ int err; ++ ++ if (priv->dev->driver_info->family == KVASER_USBCAN) ++ return -EOPNOTSUPP; ++ ++ reinit_completion(&priv->get_busparams_comp); ++ ++ err = kvaser_usb_leaf_send_simple_cmd(priv->dev, CMD_GET_BUS_PARAMS, ++ priv->channel); ++ if (err) ++ return err; ++ ++ if (!wait_for_completion_timeout(&priv->get_busparams_comp, ++ msecs_to_jiffies(KVASER_USB_TIMEOUT))) ++ return -ETIMEDOUT; ++ ++ return 0; ++} ++ + static int kvaser_usb_leaf_set_mode(struct net_device *netdev, + enum can_mode mode) + { +@@ -1283,9 +1764,13 @@ static int kvaser_usb_leaf_set_mode(struct net_device *netdev, switch (mode) { case CAN_MODE_START: @@ -181327,7 +219401,27 @@ index 59ba7c7beec00..4312be05fc5b6 100644 break; default: return -EOPNOTSUPP; -@@ -1348,11 +1499,3 @@ const struct kvaser_usb_dev_ops kvaser_usb_leaf_dev_ops = { +@@ -1332,14 +1817,18 @@ static int kvaser_usb_leaf_setup_endpoints(struct kvaser_usb *dev) + const struct kvaser_usb_dev_ops kvaser_usb_leaf_dev_ops = { + .dev_set_mode = kvaser_usb_leaf_set_mode, + .dev_set_bittiming = kvaser_usb_leaf_set_bittiming, ++ .dev_get_busparams = kvaser_usb_leaf_get_busparams, + .dev_set_data_bittiming = NULL, ++ .dev_get_data_busparams = NULL, + .dev_get_berr_counter = kvaser_usb_leaf_get_berr_counter, + .dev_setup_endpoints = kvaser_usb_leaf_setup_endpoints, + .dev_init_card = kvaser_usb_leaf_init_card, ++ .dev_init_channel = kvaser_usb_leaf_init_channel, ++ .dev_remove_channel = kvaser_usb_leaf_remove_channel, + .dev_get_software_info = kvaser_usb_leaf_get_software_info, + .dev_get_software_details = NULL, + .dev_get_card_info = kvaser_usb_leaf_get_card_info, +- .dev_get_capabilities = NULL, ++ .dev_get_capabilities = kvaser_usb_leaf_get_capabilities, + .dev_set_opt_mode = kvaser_usb_leaf_set_opt_mode, + .dev_start_chip = kvaser_usb_leaf_start_chip, + .dev_stop_chip = kvaser_usb_leaf_stop_chip, +@@ -1348,11 +1837,3 @@ const struct kvaser_usb_dev_ops kvaser_usb_leaf_dev_ops = { .dev_read_bulk_callback = kvaser_usb_leaf_read_bulk_callback, .dev_frame_to_cmd = kvaser_usb_leaf_frame_to_cmd, }; @@ -181340,7 +219434,7 @@ index 59ba7c7beec00..4312be05fc5b6 100644 - .bittiming_const = &kvaser_usb_leaf_bittiming_const, -}; diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c -index a1a154c08b7f7..023bd34d48e3c 100644 +index a1a154c08b7f7..e9ccdcce01cc3 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -33,10 +33,6 @@ @@ -181354,7 +219448,18 @@ index a1a154c08b7f7..023bd34d48e3c 100644 /* Microchip command id */ #define MBCA_CMD_RECEIVE_MESSAGE 0xE3 #define MBCA_CMD_I_AM_ALIVE_FROM_CAN 0xF5 -@@ -84,6 +80,8 @@ struct mcba_priv { +@@ -51,6 +47,10 @@ + #define MCBA_VER_REQ_USB 1 + #define MCBA_VER_REQ_CAN 2 + ++/* Drive the CAN_RES signal LOW "0" to activate R24 and R25 */ ++#define MCBA_VER_TERMINATION_ON 0 ++#define MCBA_VER_TERMINATION_OFF 1 ++ + #define MCBA_SIDL_EXID_MASK 0x8 + #define MCBA_DLC_MASK 0xf + #define MCBA_DLC_RTR_MASK 0x40 +@@ -84,6 +84,8 @@ struct mcba_priv { atomic_t free_ctx_cnt; void *rxbuf[MCBA_MAX_RX_URBS]; dma_addr_t rxbuf_dma[MCBA_MAX_RX_URBS]; @@ -181363,7 +219468,7 @@ index a1a154c08b7f7..023bd34d48e3c 100644 }; /* CAN frame */ -@@ -272,10 +270,8 @@ static netdev_tx_t mcba_usb_xmit(struct mcba_priv *priv, +@@ -272,10 +274,8 @@ static netdev_tx_t mcba_usb_xmit(struct mcba_priv *priv, memcpy(buf, usb_msg, MCBA_USB_TX_BUFF_SIZE); @@ -181376,7 +219481,7 @@ index a1a154c08b7f7..023bd34d48e3c 100644 urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; usb_anchor_urb(urb, &priv->tx_submitted); -@@ -368,7 +364,6 @@ static netdev_tx_t mcba_usb_start_xmit(struct sk_buff *skb, +@@ -368,7 +368,6 @@ static netdev_tx_t mcba_usb_start_xmit(struct sk_buff *skb, xmit_failed: can_free_echo_skb(priv->netdev, ctx->ndx, NULL); mcba_usb_free_ctx(ctx); @@ -181384,7 +219489,16 @@ index a1a154c08b7f7..023bd34d48e3c 100644 stats->tx_dropped++; return NETDEV_TX_OK; -@@ -611,7 +606,7 @@ static void mcba_usb_read_bulk_callback(struct urb *urb) +@@ -474,7 +473,7 @@ static void mcba_usb_process_ka_usb(struct mcba_priv *priv, + priv->usb_ka_first_pass = false; + } + +- if (msg->termination_state) ++ if (msg->termination_state == MCBA_VER_TERMINATION_ON) + priv->can.termination = MCBA_TERMINATION_ENABLED; + else + priv->can.termination = MCBA_TERMINATION_DISABLED; +@@ -611,7 +610,7 @@ static void mcba_usb_read_bulk_callback(struct urb *urb) resubmit_urb: usb_fill_bulk_urb(urb, priv->udev, @@ -181393,7 +219507,7 @@ index a1a154c08b7f7..023bd34d48e3c 100644 urb->transfer_buffer, MCBA_USB_RX_BUFF_SIZE, mcba_usb_read_bulk_callback, priv); -@@ -656,7 +651,7 @@ static int mcba_usb_start(struct mcba_priv *priv) +@@ -656,7 +655,7 @@ static int mcba_usb_start(struct mcba_priv *priv) urb->transfer_dma = buf_dma; usb_fill_bulk_urb(urb, priv->udev, @@ -181402,7 +219516,19 @@ index a1a154c08b7f7..023bd34d48e3c 100644 buf, MCBA_USB_RX_BUFF_SIZE, mcba_usb_read_bulk_callback, priv); urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; -@@ -810,6 +805,13 @@ static int mcba_usb_probe(struct usb_interface *intf, +@@ -794,9 +793,9 @@ static int mcba_set_termination(struct net_device *netdev, u16 term) + }; + + if (term == MCBA_TERMINATION_ENABLED) +- usb_msg.termination = 1; ++ usb_msg.termination = MCBA_VER_TERMINATION_ON; + else +- usb_msg.termination = 0; ++ usb_msg.termination = MCBA_VER_TERMINATION_OFF; + + mcba_usb_xmit_cmd(priv, (struct mcba_usb_msg *)&usb_msg); + +@@ -810,6 +809,13 @@ static int mcba_usb_probe(struct usb_interface *intf, struct mcba_priv *priv; int err; struct usb_device *usbdev = interface_to_usbdev(intf); @@ -181416,7 +219542,7 @@ index a1a154c08b7f7..023bd34d48e3c 100644 netdev = alloc_candev(sizeof(struct mcba_priv), MCBA_MAX_TX_URBS); if (!netdev) { -@@ -855,6 +857,9 @@ static int mcba_usb_probe(struct usb_interface *intf, +@@ -855,6 +861,9 @@ static int mcba_usb_probe(struct usb_interface *intf, goto cleanup_free_candev; } @@ -181724,6 +219850,59 @@ index a7e2fcf2df2c9..edbe5e7f1cb6b 100644 } static int bcm_sf2_cfp_rule_cmp(struct bcm_sf2_priv *priv, int port, +diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c +index e638e3eea9112..e6e1054339b59 100644 +--- a/drivers/net/dsa/dsa_loop.c ++++ b/drivers/net/dsa/dsa_loop.c +@@ -376,6 +376,17 @@ static struct mdio_driver dsa_loop_drv = { + + #define NUM_FIXED_PHYS (DSA_LOOP_NUM_PORTS - 2) + ++static void dsa_loop_phydevs_unregister(void) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < NUM_FIXED_PHYS; i++) ++ if (!IS_ERR(phydevs[i])) { ++ fixed_phy_unregister(phydevs[i]); ++ phy_device_free(phydevs[i]); ++ } ++} ++ + static int __init dsa_loop_init(void) + { + struct fixed_phy_status status = { +@@ -383,23 +394,23 @@ static int __init dsa_loop_init(void) + .speed = SPEED_100, + .duplex = DUPLEX_FULL, + }; +- unsigned int i; ++ unsigned int i, ret; + + for (i = 0; i < NUM_FIXED_PHYS; i++) + phydevs[i] = fixed_phy_register(PHY_POLL, &status, NULL); + +- return mdio_driver_register(&dsa_loop_drv); ++ ret = mdio_driver_register(&dsa_loop_drv); ++ if (ret) ++ dsa_loop_phydevs_unregister(); ++ ++ return ret; + } + module_init(dsa_loop_init); + + static void __exit dsa_loop_exit(void) + { +- unsigned int i; +- + mdio_driver_unregister(&dsa_loop_drv); +- for (i = 0; i < NUM_FIXED_PHYS; i++) +- if (!IS_ERR(phydevs[i])) +- fixed_phy_unregister(phydevs[i]); ++ dsa_loop_phydevs_unregister(); + } + module_exit(dsa_loop_exit); + diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c index 354655f9ed003..950a54ec4b59b 100644 --- a/drivers/net/dsa/hirschmann/hellcreek.c @@ -181870,7 +220049,7 @@ index 2572c6087bb5a..b28baab6d56a1 100644 if (!leds) { dev_err(hellcreek->dev, "No LEDs specified in device tree!\n"); diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c -index 89f920289ae21..0b6f29ee87b56 100644 +index 89f920289ae21..22547b10dfe50 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -10,6 +10,7 @@ @@ -181881,7 +220060,29 @@ index 89f920289ae21..0b6f29ee87b56 100644 #include <linux/etherdevice.h> #include "lan9303.h" -@@ -1083,21 +1084,27 @@ static void lan9303_adjust_link(struct dsa_switch *ds, int port, +@@ -958,7 +959,7 @@ static const struct lan9303_mib_desc lan9303_mib[] = { + { .offset = LAN9303_MAC_TX_BRDCST_CNT_0, .name = "TxBroad", }, + { .offset = LAN9303_MAC_TX_PAUSE_CNT_0, .name = "TxPause", }, + { .offset = LAN9303_MAC_TX_MULCST_CNT_0, .name = "TxMulti", }, +- { .offset = LAN9303_MAC_RX_UNDSZE_CNT_0, .name = "TxUnderRun", }, ++ { .offset = LAN9303_MAC_RX_UNDSZE_CNT_0, .name = "RxShort", }, + { .offset = LAN9303_MAC_TX_64_CNT_0, .name = "Tx64Byte", }, + { .offset = LAN9303_MAC_TX_127_CNT_0, .name = "Tx128Byte", }, + { .offset = LAN9303_MAC_TX_255_CNT_0, .name = "Tx256Byte", }, +@@ -1002,9 +1003,11 @@ static void lan9303_get_ethtool_stats(struct dsa_switch *ds, int port, + ret = lan9303_read_switch_port( + chip, port, lan9303_mib[u].offset, ®); + +- if (ret) ++ if (ret) { + dev_warn(chip->dev, "Reading status port %d reg %u failed\n", + port, lan9303_mib[u].offset); ++ reg = 0; ++ } + data[u] = reg; + } + } +@@ -1083,21 +1086,27 @@ static void lan9303_adjust_link(struct dsa_switch *ds, int port, static int lan9303_port_enable(struct dsa_switch *ds, int port, struct phy_device *phy) { @@ -181911,7 +220112,7 @@ index 89f920289ae21..0b6f29ee87b56 100644 lan9303_disable_processing_port(chip, port); lan9303_phy_write(ds, chip->phy_addr_base + port, MII_BMCR, BMCR_PDOWN); } -@@ -1309,7 +1316,7 @@ static int lan9303_probe_reset_gpio(struct lan9303 *chip, +@@ -1309,7 +1318,7 @@ static int lan9303_probe_reset_gpio(struct lan9303 *chip, struct device_node *np) { chip->reset_gpio = devm_gpiod_get_optional(chip->dev, "reset", @@ -182019,9 +220220,22 @@ index 866767b70d65b..b0a7dee27ffc9 100644 .remove = ksz8795_spi_remove, .shutdown = ksz8795_spi_shutdown, diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c -index 854e25f43fa70..379b38c5844f4 100644 +index 854e25f43fa70..bf788e17f408f 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c +@@ -675,10 +675,10 @@ static int ksz9477_port_fdb_del(struct dsa_switch *ds, int port, + ksz_read32(dev, REG_SW_ALU_VAL_D, &alu_table[3]); + + /* clear forwarding port */ +- alu_table[2] &= ~BIT(port); ++ alu_table[1] &= ~BIT(port); + + /* if there is no port to forward, clear table */ +- if ((alu_table[2] & ALU_V_PORT_MAP) == 0) { ++ if ((alu_table[1] & ALU_V_PORT_MAP) == 0) { + alu_table[0] = 0; + alu_table[1] = 0; + alu_table[2] = 0; @@ -759,6 +759,9 @@ static int ksz9477_port_fdb_dump(struct dsa_switch *ds, int port, goto exit; } @@ -182089,7 +220303,7 @@ index 7c2968a639eba..4c4e6990c0ae9 100644 "microchip,synclko-125"); } diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c -index 9890672a206d0..704ba461a6000 100644 +index 9890672a206d0..c1505de23957f 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -501,14 +501,19 @@ static bool mt7531_dual_sgmii_supported(struct mt7530_priv *priv) @@ -182123,7 +220337,41 @@ index 9890672a206d0..704ba461a6000 100644 } static void -@@ -2066,7 +2069,7 @@ mt7530_setup_mdio(struct mt7530_priv *priv) +@@ -1287,14 +1290,26 @@ mt7530_port_set_vlan_aware(struct dsa_switch *ds, int port) + if (!priv->ports[port].pvid) + mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK, + MT7530_VLAN_ACC_TAGGED); +- } + +- /* Set the port as a user port which is to be able to recognize VID +- * from incoming packets before fetching entry within the VLAN table. +- */ +- mt7530_rmw(priv, MT7530_PVC_P(port), VLAN_ATTR_MASK | PVC_EG_TAG_MASK, +- VLAN_ATTR(MT7530_VLAN_USER) | +- PVC_EG_TAG(MT7530_VLAN_EG_DISABLED)); ++ /* Set the port as a user port which is to be able to recognize ++ * VID from incoming packets before fetching entry within the ++ * VLAN table. ++ */ ++ mt7530_rmw(priv, MT7530_PVC_P(port), ++ VLAN_ATTR_MASK | PVC_EG_TAG_MASK, ++ VLAN_ATTR(MT7530_VLAN_USER) | ++ PVC_EG_TAG(MT7530_VLAN_EG_DISABLED)); ++ } else { ++ /* Also set CPU ports to the "user" VLAN port attribute, to ++ * allow VLAN classification, but keep the EG_TAG attribute as ++ * "consistent" (i.o.w. don't change its value) for packets ++ * received by the switch from the CPU, so that tagged packets ++ * are forwarded to user ports as tagged, and untagged as ++ * untagged. ++ */ ++ mt7530_rmw(priv, MT7530_PVC_P(port), VLAN_ATTR_MASK, ++ VLAN_ATTR(MT7530_VLAN_USER)); ++ } + } + + static void +@@ -2066,7 +2081,7 @@ mt7530_setup_mdio(struct mt7530_priv *priv) if (priv->irq) mt7530_setup_mdio_irq(priv); @@ -182132,7 +220380,7 @@ index 9890672a206d0..704ba461a6000 100644 if (ret) { dev_err(dev, "failed to register MDIO bus: %d\n", ret); if (priv->irq) -@@ -2216,6 +2219,7 @@ mt7530_setup(struct dsa_switch *ds) +@@ -2216,6 +2231,7 @@ mt7530_setup(struct dsa_switch *ds) ret = of_get_phy_mode(mac_np, &interface); if (ret && ret != -ENODEV) { of_node_put(mac_np); @@ -182140,7 +220388,7 @@ index 9890672a206d0..704ba461a6000 100644 return ret; } id = of_mdio_parse_addr(ds->dev, phy_node); -@@ -2291,6 +2295,8 @@ mt7531_setup(struct dsa_switch *ds) +@@ -2291,6 +2307,8 @@ mt7531_setup(struct dsa_switch *ds) SYS_CTRL_PHY_RST | SYS_CTRL_SW_RST | SYS_CTRL_REG_RST); @@ -182149,7 +220397,7 @@ index 9890672a206d0..704ba461a6000 100644 if (mt7531_dual_sgmii_supported(priv)) { priv->p5_intf_sel = P5_INTF_SEL_GMAC5_SGMII; -@@ -2526,13 +2532,7 @@ static void mt7531_sgmii_validate(struct mt7530_priv *priv, int port, +@@ -2526,13 +2544,7 @@ static void mt7531_sgmii_validate(struct mt7530_priv *priv, int port, /* Port5 supports ethier RGMII or SGMII. * Port6 supports SGMII only. */ @@ -182164,7 +220412,7 @@ index 9890672a206d0..704ba461a6000 100644 phylink_set(supported, 2500baseX_Full); phylink_set(supported, 2500baseT_Full); } -@@ -2872,8 +2872,6 @@ mt7531_cpu_port_config(struct dsa_switch *ds, int port) +@@ -2872,8 +2884,6 @@ mt7531_cpu_port_config(struct dsa_switch *ds, int port) case 6: interface = PHY_INTERFACE_MODE_2500BASEX; @@ -182173,7 +220421,7 @@ index 9890672a206d0..704ba461a6000 100644 priv->p6_interface = interface; break; default: -@@ -2900,8 +2898,6 @@ static void +@@ -2900,8 +2910,6 @@ static void mt7530_mac_port_validate(struct dsa_switch *ds, int port, unsigned long *supported) { @@ -182182,7 +220430,7 @@ index 9890672a206d0..704ba461a6000 100644 } static void mt7531_mac_port_validate(struct dsa_switch *ds, int port, -@@ -2928,7 +2924,7 @@ mt753x_phylink_validate(struct dsa_switch *ds, int port, +@@ -2928,7 +2936,7 @@ mt753x_phylink_validate(struct dsa_switch *ds, int port, phylink_set_port_modes(mask); @@ -182191,7 +220439,7 @@ index 9890672a206d0..704ba461a6000 100644 !phy_interface_mode_is_8023z(state->interface)) { phylink_set(mask, 10baseT_Half); phylink_set(mask, 10baseT_Full); -@@ -2938,8 +2934,10 @@ mt753x_phylink_validate(struct dsa_switch *ds, int port, +@@ -2938,8 +2946,10 @@ mt753x_phylink_validate(struct dsa_switch *ds, int port, } /* This switch only supports 1G full-duplex. */ @@ -182217,6 +220465,28 @@ index a4c6eb9a52d0d..83dca9179aa07 100644 /* Do not force flow control, disable Ingress and Egress * Header tagging, disable VLAN tunneling, and set the port * state to Forwarding. Additionally, if this is the CPU +diff --git a/drivers/net/dsa/mv88e6xxx/Kconfig b/drivers/net/dsa/mv88e6xxx/Kconfig +index 7a2445a34eb77..e3181d5471dfe 100644 +--- a/drivers/net/dsa/mv88e6xxx/Kconfig ++++ b/drivers/net/dsa/mv88e6xxx/Kconfig +@@ -2,7 +2,6 @@ + config NET_DSA_MV88E6XXX + tristate "Marvell 88E6xxx Ethernet switch fabric support" + depends on NET_DSA +- depends on PTP_1588_CLOCK_OPTIONAL + select IRQ_DOMAIN + select NET_DSA_TAG_EDSA + select NET_DSA_TAG_DSA +@@ -13,7 +12,8 @@ config NET_DSA_MV88E6XXX + config NET_DSA_MV88E6XXX_PTP + bool "PTP support for Marvell 88E6xxx" + default n +- depends on NET_DSA_MV88E6XXX && PTP_1588_CLOCK ++ depends on (NET_DSA_MV88E6XXX = y && PTP_1588_CLOCK = y) || \ ++ (NET_DSA_MV88E6XXX = m && PTP_1588_CLOCK) + help + Say Y to enable PTP hardware timestamping on Marvell 88E6xxx switch + chips that support it. diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 8dadcae93c9b5..f9efd0c8bab8d 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c @@ -183070,22 +221340,34 @@ index a89093bc6c6ad..9e3b572ed999e 100644 if (smi->vlan4k_enabled) max = RTL8366RB_NUM_VIDS - 1; diff --git a/drivers/net/dsa/sja1105/sja1105_devlink.c b/drivers/net/dsa/sja1105/sja1105_devlink.c -index 0569ff066634d..10c6fea1227fa 100644 +index 0569ff066634d..bdbbff2a79095 100644 --- a/drivers/net/dsa/sja1105/sja1105_devlink.c +++ b/drivers/net/dsa/sja1105/sja1105_devlink.c -@@ -93,7 +93,7 @@ static int sja1105_setup_devlink_regions(struct dsa_switch *ds) +@@ -93,8 +93,10 @@ static int sja1105_setup_devlink_regions(struct dsa_switch *ds) region = dsa_devlink_region_create(ds, ops, 1, size); if (IS_ERR(region)) { - while (i-- >= 0) + while (--i >= 0) dsa_devlink_region_destroy(priv->regions[i]); ++ ++ kfree(priv->regions); return PTR_ERR(region); } + diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c -index 924c3f129992f..1a2a7536ff8aa 100644 +index 924c3f129992f..ef4d8d6c2bd7a 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c +@@ -1025,7 +1025,7 @@ static int sja1105_init_l2_policing(struct sja1105_private *priv) + + policing[bcast].sharindx = port; + /* Only SJA1110 has multicast policers */ +- if (mcast <= table->ops->max_entry_count) ++ if (mcast < table->ops->max_entry_count) + policing[mcast].sharindx = port; + } + @@ -3372,12 +3372,28 @@ static const struct of_device_id sja1105_dt_ids[] = { }; MODULE_DEVICE_TABLE(of, sja1105_dt_ids); @@ -183115,6 +221397,30 @@ index 924c3f129992f..1a2a7536ff8aa 100644 .probe = sja1105_probe, .remove = sja1105_remove, .shutdown = sja1105_shutdown, +diff --git a/drivers/net/dsa/sja1105/sja1105_mdio.c b/drivers/net/dsa/sja1105/sja1105_mdio.c +index 215dd17ca7906..4059fcc8c8326 100644 +--- a/drivers/net/dsa/sja1105/sja1105_mdio.c ++++ b/drivers/net/dsa/sja1105/sja1105_mdio.c +@@ -256,6 +256,9 @@ static int sja1105_base_tx_mdio_read(struct mii_bus *bus, int phy, int reg) + u32 tmp; + int rc; + ++ if (reg & MII_ADDR_C45) ++ return -EOPNOTSUPP; ++ + rc = sja1105_xfer_u32(priv, SPI_READ, regs->mdio_100base_tx + reg, + &tmp, NULL); + if (rc < 0) +@@ -272,6 +275,9 @@ static int sja1105_base_tx_mdio_write(struct mii_bus *bus, int phy, int reg, + const struct sja1105_regs *regs = priv->info->regs; + u32 tmp = val; + ++ if (reg & MII_ADDR_C45) ++ return -EOPNOTSUPP; ++ + return sja1105_xfer_u32(priv, SPI_WRITE, regs->mdio_100base_tx + reg, + &tmp, NULL); + } diff --git a/drivers/net/dsa/vitesse-vsc73xx-spi.c b/drivers/net/dsa/vitesse-vsc73xx-spi.c index 645398901e05e..922ae22fad66b 100644 --- a/drivers/net/dsa/vitesse-vsc73xx-spi.c @@ -183280,6 +221586,18 @@ index 412ae3e43ffb7..35ac6fe7529c5 100644 source "drivers/net/ethernet/smsc/Kconfig" source "drivers/net/ethernet/socionext/Kconfig" source "drivers/net/ethernet/stmicro/Kconfig" +diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c +index c560ad06f0be3..a95bac4e14f6a 100644 +--- a/drivers/net/ethernet/aeroflex/greth.c ++++ b/drivers/net/ethernet/aeroflex/greth.c +@@ -258,6 +258,7 @@ static int greth_init_rings(struct greth_private *greth) + if (dma_mapping_error(greth->dev, dma_addr)) { + if (netif_msg_ifup(greth)) + dev_err(greth->dev, "Could not create initial DMA mapping\n"); ++ dev_kfree_skb(skb); + goto cleanup; + } + greth->rx_skbuff[i] = skb; diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index 1c00d719e5d76..b51f5b9577e0a 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c @@ -183334,11 +221652,174 @@ index 1c00d719e5d76..b51f5b9577e0a 100644 /* MAC address space */ ret = request_and_map(pdev, "control_port", &control_port, +diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c +index ab413fc1f68e3..f0faad149a3bd 100644 +--- a/drivers/net/ethernet/amazon/ena/ena_com.c ++++ b/drivers/net/ethernet/amazon/ena/ena_com.c +@@ -2392,29 +2392,18 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, + return -EOPNOTSUPP; + } + +- switch (func) { +- case ENA_ADMIN_TOEPLITZ: +- if (key) { +- if (key_len != sizeof(hash_key->key)) { +- netdev_err(ena_dev->net_device, +- "key len (%u) doesn't equal the supported size (%zu)\n", +- key_len, sizeof(hash_key->key)); +- return -EINVAL; +- } +- memcpy(hash_key->key, key, key_len); +- rss->hash_init_val = init_val; +- hash_key->key_parts = key_len / sizeof(hash_key->key[0]); ++ if ((func == ENA_ADMIN_TOEPLITZ) && key) { ++ if (key_len != sizeof(hash_key->key)) { ++ netdev_err(ena_dev->net_device, ++ "key len (%u) doesn't equal the supported size (%zu)\n", ++ key_len, sizeof(hash_key->key)); ++ return -EINVAL; + } +- break; +- case ENA_ADMIN_CRC32: +- rss->hash_init_val = init_val; +- break; +- default: +- netdev_err(ena_dev->net_device, "Invalid hash function (%d)\n", +- func); +- return -EINVAL; ++ memcpy(hash_key->key, key, key_len); ++ hash_key->key_parts = key_len / sizeof(hash_key->key[0]); + } + ++ rss->hash_init_val = init_val; + old_func = rss->hash_func; + rss->hash_func = func; + rc = ena_com_set_hash_function(ena_dev); +diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c +index 13e745cf3781b..413082f10dc1c 100644 +--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c ++++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c +@@ -880,11 +880,7 @@ static int ena_set_tunable(struct net_device *netdev, + switch (tuna->id) { + case ETHTOOL_RX_COPYBREAK: + len = *(u32 *)data; +- if (len > adapter->netdev->mtu) { +- ret = -EINVAL; +- break; +- } +- adapter->rx_copybreak = len; ++ ret = ena_set_rx_copybreak(adapter, len); + break; + default: + ret = -EINVAL; diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c -index 0e43000614abd..8f08e0bae3004 100644 +index 0e43000614abd..23c9750850e98 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c -@@ -1288,26 +1288,22 @@ static int handle_invalid_req_id(struct ena_ring *ring, u16 req_id, +@@ -378,9 +378,9 @@ static int ena_xdp_xmit(struct net_device *dev, int n, + + static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp) + { ++ u32 verdict = ENA_XDP_PASS; + struct bpf_prog *xdp_prog; + struct ena_ring *xdp_ring; +- u32 verdict = XDP_PASS; + struct xdp_frame *xdpf; + u64 *xdp_stat; + +@@ -397,7 +397,7 @@ static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp) + if (unlikely(!xdpf)) { + trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); + xdp_stat = &rx_ring->rx_stats.xdp_aborted; +- verdict = XDP_ABORTED; ++ verdict = ENA_XDP_DROP; + break; + } + +@@ -413,29 +413,35 @@ static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp) + + spin_unlock(&xdp_ring->xdp_tx_lock); + xdp_stat = &rx_ring->rx_stats.xdp_tx; ++ verdict = ENA_XDP_TX; + break; + case XDP_REDIRECT: + if (likely(!xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog))) { + xdp_stat = &rx_ring->rx_stats.xdp_redirect; ++ verdict = ENA_XDP_REDIRECT; + break; + } + trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); + xdp_stat = &rx_ring->rx_stats.xdp_aborted; +- verdict = XDP_ABORTED; ++ verdict = ENA_XDP_DROP; + break; + case XDP_ABORTED: + trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); + xdp_stat = &rx_ring->rx_stats.xdp_aborted; ++ verdict = ENA_XDP_DROP; + break; + case XDP_DROP: + xdp_stat = &rx_ring->rx_stats.xdp_drop; ++ verdict = ENA_XDP_DROP; + break; + case XDP_PASS: + xdp_stat = &rx_ring->rx_stats.xdp_pass; ++ verdict = ENA_XDP_PASS; + break; + default: + bpf_warn_invalid_xdp_action(verdict); + xdp_stat = &rx_ring->rx_stats.xdp_invalid; ++ verdict = ENA_XDP_DROP; + } + + ena_increase_stat(xdp_stat, 1, &rx_ring->syncp); +@@ -516,16 +522,18 @@ static void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter, + struct bpf_prog *prog, + int first, int count) + { ++ struct bpf_prog *old_bpf_prog; + struct ena_ring *rx_ring; + int i = 0; + + for (i = first; i < count; i++) { + rx_ring = &adapter->rx_ring[i]; +- xchg(&rx_ring->xdp_bpf_prog, prog); +- if (prog) { ++ old_bpf_prog = xchg(&rx_ring->xdp_bpf_prog, prog); ++ ++ if (!old_bpf_prog && prog) { + ena_xdp_register_rxq_info(rx_ring); + rx_ring->rx_headroom = XDP_PACKET_HEADROOM; +- } else { ++ } else if (old_bpf_prog && !prog) { + ena_xdp_unregister_rxq_info(rx_ring); + rx_ring->rx_headroom = NET_SKB_PAD; + } +@@ -676,6 +684,7 @@ static void ena_init_io_rings_common(struct ena_adapter *adapter, + ring->ena_dev = adapter->ena_dev; + ring->per_napi_packets = 0; + ring->cpu = 0; ++ ring->numa_node = 0; + ring->no_interrupt_event_cnt = 0; + u64_stats_init(&ring->syncp); + } +@@ -779,6 +788,7 @@ static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid) + tx_ring->next_to_use = 0; + tx_ring->next_to_clean = 0; + tx_ring->cpu = ena_irq->cpu; ++ tx_ring->numa_node = node; + return 0; + + err_push_buf_intermediate_buf: +@@ -911,6 +921,7 @@ static int ena_setup_rx_resources(struct ena_adapter *adapter, + rx_ring->next_to_clean = 0; + rx_ring->next_to_use = 0; + rx_ring->cpu = ena_irq->cpu; ++ rx_ring->numa_node = node; + + return 0; + } +@@ -1288,26 +1299,22 @@ static int handle_invalid_req_id(struct ena_ring *ring, u16 req_id, static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) { @@ -183373,7 +221854,7 @@ index 0e43000614abd..8f08e0bae3004 100644 return handle_invalid_req_id(xdp_ring, req_id, tx_info, true); } -@@ -1332,9 +1328,14 @@ static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget) +@@ -1332,9 +1339,14 @@ static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget) rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq, &req_id); @@ -183389,7 +221870,7 @@ index 0e43000614abd..8f08e0bae3004 100644 rc = validate_tx_req_id(tx_ring, req_id); if (rc) break; -@@ -1427,6 +1428,7 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, +@@ -1427,6 +1439,7 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, u16 *next_to_clean) { struct ena_rx_buffer *rx_info; @@ -183397,7 +221878,7 @@ index 0e43000614abd..8f08e0bae3004 100644 u16 len, req_id, buf = 0; struct sk_buff *skb; void *page_addr; -@@ -1439,8 +1441,14 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, +@@ -1439,8 +1452,14 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, rx_info = &rx_ring->rx_buffer_info[req_id]; if (unlikely(!rx_info->page)) { @@ -183414,7 +221895,111 @@ index 0e43000614abd..8f08e0bae3004 100644 return NULL; } -@@ -1896,9 +1904,14 @@ static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget) +@@ -1621,12 +1640,12 @@ static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp) + * we expect, then we simply drop it + */ + if (unlikely(rx_ring->ena_bufs[0].len > ENA_XDP_MAX_MTU)) +- return XDP_DROP; ++ return ENA_XDP_DROP; + + ret = ena_xdp_execute(rx_ring, xdp); + + /* The xdp program might expand the headers */ +- if (ret == XDP_PASS) { ++ if (ret == ENA_XDP_PASS) { + rx_info->page_offset = xdp->data - xdp->data_hard_start; + rx_ring->ena_bufs[0].len = xdp->data_end - xdp->data; + } +@@ -1665,7 +1684,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, + xdp_init_buff(&xdp, ENA_PAGE_SIZE, &rx_ring->xdp_rxq); + + do { +- xdp_verdict = XDP_PASS; ++ xdp_verdict = ENA_XDP_PASS; + skb = NULL; + ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; + ena_rx_ctx.max_bufs = rx_ring->sgl_size; +@@ -1693,7 +1712,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, + xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp); + + /* allocate skb and fill it */ +- if (xdp_verdict == XDP_PASS) ++ if (xdp_verdict == ENA_XDP_PASS) + skb = ena_rx_skb(rx_ring, + rx_ring->ena_bufs, + ena_rx_ctx.descs, +@@ -1711,14 +1730,15 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, + /* Packets was passed for transmission, unmap it + * from RX side. + */ +- if (xdp_verdict == XDP_TX || xdp_verdict == XDP_REDIRECT) { ++ if (xdp_verdict & ENA_XDP_FORWARDED) { + ena_unmap_rx_buff(rx_ring, + &rx_ring->rx_buffer_info[req_id]); + rx_ring->rx_buffer_info[req_id].page = NULL; + } + } +- if (xdp_verdict != XDP_PASS) { ++ if (xdp_verdict != ENA_XDP_PASS) { + xdp_flags |= xdp_verdict; ++ total_len += ena_rx_ctx.ena_bufs[0].len; + res_budget--; + continue; + } +@@ -1762,7 +1782,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, + ena_refill_rx_bufs(rx_ring, refill_required); + } + +- if (xdp_flags & XDP_REDIRECT) ++ if (xdp_flags & ENA_XDP_REDIRECT) + xdp_do_flush_map(); + + return work_done; +@@ -1819,8 +1839,9 @@ static void ena_adjust_adaptive_rx_intr_moderation(struct ena_napi *ena_napi) + static void ena_unmask_interrupt(struct ena_ring *tx_ring, + struct ena_ring *rx_ring) + { ++ u32 rx_interval = tx_ring->smoothed_interval; + struct ena_eth_io_intr_reg intr_reg; +- u32 rx_interval = 0; ++ + /* Rx ring can be NULL when for XDP tx queues which don't have an + * accompanying rx_ring pair. + */ +@@ -1858,20 +1879,27 @@ static void ena_update_ring_numa_node(struct ena_ring *tx_ring, + if (likely(tx_ring->cpu == cpu)) + goto out; + ++ tx_ring->cpu = cpu; ++ if (rx_ring) ++ rx_ring->cpu = cpu; ++ + numa_node = cpu_to_node(cpu); ++ ++ if (likely(tx_ring->numa_node == numa_node)) ++ goto out; ++ + put_cpu(); + + if (numa_node != NUMA_NO_NODE) { + ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node); +- if (rx_ring) ++ tx_ring->numa_node = numa_node; ++ if (rx_ring) { ++ rx_ring->numa_node = numa_node; + ena_com_update_numa_node(rx_ring->ena_com_io_cq, + numa_node); ++ } + } + +- tx_ring->cpu = cpu; +- if (rx_ring) +- rx_ring->cpu = cpu; +- + return; + out: + put_cpu(); +@@ -1896,9 +1924,14 @@ static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget) rc = ena_com_tx_comp_req_id_get(xdp_ring->ena_com_io_cq, &req_id); @@ -183430,7 +222015,63 @@ index 0e43000614abd..8f08e0bae3004 100644 rc = validate_xdp_req_id(xdp_ring, req_id); if (rc) break; -@@ -4013,10 +4026,6 @@ static u32 ena_calc_max_io_queue_num(struct pci_dev *pdev, +@@ -1987,11 +2020,10 @@ static int ena_io_poll(struct napi_struct *napi, int budget) + if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev)) + ena_adjust_adaptive_rx_intr_moderation(ena_napi); + ++ ena_update_ring_numa_node(tx_ring, rx_ring); + ena_unmask_interrupt(tx_ring, rx_ring); + } + +- ena_update_ring_numa_node(tx_ring, rx_ring); +- + ret = rx_work_done; + } else { + ret = budget; +@@ -2378,7 +2410,7 @@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid) + ctx.mem_queue_type = ena_dev->tx_mem_queue_type; + ctx.msix_vector = msix_vector; + ctx.queue_size = tx_ring->ring_size; +- ctx.numa_node = cpu_to_node(tx_ring->cpu); ++ ctx.numa_node = tx_ring->numa_node; + + rc = ena_com_create_io_queue(ena_dev, &ctx); + if (rc) { +@@ -2446,7 +2478,7 @@ static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid) + ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; + ctx.msix_vector = msix_vector; + ctx.queue_size = rx_ring->ring_size; +- ctx.numa_node = cpu_to_node(rx_ring->cpu); ++ ctx.numa_node = rx_ring->numa_node; + + rc = ena_com_create_io_queue(ena_dev, &ctx); + if (rc) { +@@ -2807,6 +2839,24 @@ int ena_update_queue_sizes(struct ena_adapter *adapter, + return dev_was_up ? ena_up(adapter) : 0; + } + ++int ena_set_rx_copybreak(struct ena_adapter *adapter, u32 rx_copybreak) ++{ ++ struct ena_ring *rx_ring; ++ int i; ++ ++ if (rx_copybreak > min_t(u16, adapter->netdev->mtu, ENA_PAGE_SIZE)) ++ return -EINVAL; ++ ++ adapter->rx_copybreak = rx_copybreak; ++ ++ for (i = 0; i < adapter->num_io_queues; i++) { ++ rx_ring = &adapter->rx_ring[i]; ++ rx_ring->rx_copybreak = rx_copybreak; ++ } ++ ++ return 0; ++} ++ + int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count) + { + struct ena_com_dev *ena_dev = adapter->ena_dev; +@@ -4013,10 +4063,6 @@ static u32 ena_calc_max_io_queue_num(struct pci_dev *pdev, max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_cq_num); /* 1 IRQ for mgmnt and 1 IRQs for each IO direction */ max_num_io_queues = min_t(u32, max_num_io_queues, pci_msix_vec_count(pdev) - 1); @@ -183441,6 +222082,70 @@ index 0e43000614abd..8f08e0bae3004 100644 return max_num_io_queues; } +@@ -4574,13 +4620,19 @@ static struct pci_driver ena_pci_driver = { + + static int __init ena_init(void) + { ++ int ret; ++ + ena_wq = create_singlethread_workqueue(DRV_MODULE_NAME); + if (!ena_wq) { + pr_err("Failed to create workqueue\n"); + return -ENOMEM; + } + +- return pci_register_driver(&ena_pci_driver); ++ ret = pci_register_driver(&ena_pci_driver); ++ if (ret) ++ destroy_workqueue(ena_wq); ++ ++ return ret; + } + + static void __exit ena_cleanup(void) +diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h +index 0c39fc2fa345c..bf2a39c91c00d 100644 +--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h ++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h +@@ -273,9 +273,11 @@ struct ena_ring { + bool disable_meta_caching; + u16 no_interrupt_event_cnt; + +- /* cpu for TPH */ ++ /* cpu and NUMA for TPH */ + int cpu; +- /* number of tx/rx_buffer_info's entries */ ++ int numa_node; ++ ++ /* number of tx/rx_buffer_info's entries */ + int ring_size; + + enum ena_admin_placement_policy_type tx_mem_queue_type; +@@ -404,6 +406,8 @@ int ena_update_queue_sizes(struct ena_adapter *adapter, + + int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count); + ++int ena_set_rx_copybreak(struct ena_adapter *adapter, u32 rx_copybreak); ++ + int ena_get_sset_count(struct net_device *netdev, int sset); + + enum ena_xdp_errors_t { +@@ -412,6 +416,15 @@ enum ena_xdp_errors_t { + ENA_XDP_NO_ENOUGH_QUEUES, + }; + ++enum ENA_XDP_ACTIONS { ++ ENA_XDP_PASS = 0, ++ ENA_XDP_TX = BIT(0), ++ ENA_XDP_REDIRECT = BIT(1), ++ ENA_XDP_DROP = BIT(2) ++}; ++ ++#define ENA_XDP_FORWARDED (ENA_XDP_TX | ENA_XDP_REDIRECT) ++ + static inline bool ena_xdp_present(struct ena_adapter *adapter) + { + return !!adapter->xdp_bpf_prog; diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig index 4786f0504691d..899c8a2a34b6b 100644 --- a/drivers/net/ethernet/amd/Kconfig @@ -183454,6 +222159,32 @@ index 4786f0504691d..899c8a2a34b6b 100644 depends on X86 || ARM64 || COMPILE_TEST depends on PTP_1588_CLOCK_OPTIONAL select BITREVERSE +diff --git a/drivers/net/ethernet/amd/atarilance.c b/drivers/net/ethernet/amd/atarilance.c +index 9d2f49fd945ed..a0971ed00453c 100644 +--- a/drivers/net/ethernet/amd/atarilance.c ++++ b/drivers/net/ethernet/amd/atarilance.c +@@ -821,7 +821,7 @@ lance_start_xmit(struct sk_buff *skb, struct net_device *dev) + lp->memcpy_f( PKTBUF_ADDR(head), (void *)skb->data, skb->len ); + head->flag = TMD1_OWN_CHIP | TMD1_ENP | TMD1_STP; + dev->stats.tx_bytes += skb->len; +- dev_kfree_skb( skb ); ++ dev_consume_skb_irq(skb); + lp->cur_tx++; + while( lp->cur_tx >= TX_RING_SIZE && lp->dirty_tx >= TX_RING_SIZE ) { + lp->cur_tx -= TX_RING_SIZE; +diff --git a/drivers/net/ethernet/amd/lance.c b/drivers/net/ethernet/amd/lance.c +index 945bf1d875072..6c2d72024e218 100644 +--- a/drivers/net/ethernet/amd/lance.c ++++ b/drivers/net/ethernet/amd/lance.c +@@ -999,7 +999,7 @@ static netdev_tx_t lance_start_xmit(struct sk_buff *skb, + skb_copy_from_linear_data(skb, &lp->tx_bounce_buffs[entry], skb->len); + lp->tx_ring[entry].base = + ((u32)isa_virt_to_bus((lp->tx_bounce_buffs + entry)) & 0xffffff) | 0x83000000; +- dev_kfree_skb(skb); ++ dev_consume_skb_irq(skb); + } else { + lp->tx_skbuff[entry] = skb; + lp->tx_ring[entry].base = ((u32)isa_virt_to_bus(skb->data) & 0xffffff) | 0x83000000; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h index b2cd3bdba9f89..533b8519ec352 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h @@ -183480,8 +222211,66 @@ index b2cd3bdba9f89..533b8519ec352 100644 /* Bit setting and getting macros * The get macro will extract the current bit field value from within * the variable +diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +index d5fd49dd25f33..decc1c09a031b 100644 +--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +@@ -524,19 +524,28 @@ static void xgbe_disable_vxlan(struct xgbe_prv_data *pdata) + netif_dbg(pdata, drv, pdata->netdev, "VXLAN acceleration disabled\n"); + } + ++static unsigned int xgbe_get_fc_queue_count(struct xgbe_prv_data *pdata) ++{ ++ unsigned int max_q_count = XGMAC_MAX_FLOW_CONTROL_QUEUES; ++ ++ /* From MAC ver 30H the TFCR is per priority, instead of per queue */ ++ if (XGMAC_GET_BITS(pdata->hw_feat.version, MAC_VR, SNPSVER) >= 0x30) ++ return max_q_count; ++ else ++ return min_t(unsigned int, pdata->tx_q_count, max_q_count); ++} ++ + static int xgbe_disable_tx_flow_control(struct xgbe_prv_data *pdata) + { +- unsigned int max_q_count, q_count; + unsigned int reg, reg_val; +- unsigned int i; ++ unsigned int i, q_count; + + /* Clear MTL flow control */ + for (i = 0; i < pdata->rx_q_count; i++) + XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, EHFC, 0); + + /* Clear MAC flow control */ +- max_q_count = XGMAC_MAX_FLOW_CONTROL_QUEUES; +- q_count = min_t(unsigned int, pdata->tx_q_count, max_q_count); ++ q_count = xgbe_get_fc_queue_count(pdata); + reg = MAC_Q0TFCR; + for (i = 0; i < q_count; i++) { + reg_val = XGMAC_IOREAD(pdata, reg); +@@ -553,9 +562,8 @@ static int xgbe_enable_tx_flow_control(struct xgbe_prv_data *pdata) + { + struct ieee_pfc *pfc = pdata->pfc; + struct ieee_ets *ets = pdata->ets; +- unsigned int max_q_count, q_count; + unsigned int reg, reg_val; +- unsigned int i; ++ unsigned int i, q_count; + + /* Set MTL flow control */ + for (i = 0; i < pdata->rx_q_count; i++) { +@@ -579,8 +587,7 @@ static int xgbe_enable_tx_flow_control(struct xgbe_prv_data *pdata) + } + + /* Set MAC flow control */ +- max_q_count = XGMAC_MAX_FLOW_CONTROL_QUEUES; +- q_count = min_t(unsigned int, pdata->tx_q_count, max_q_count); ++ q_count = xgbe_get_fc_queue_count(pdata); + reg = MAC_Q0TFCR; + for (i = 0; i < q_count; i++) { + reg_val = XGMAC_IOREAD(pdata, reg); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c -index 17a585adfb49c..e6883d52d230c 100644 +index 17a585adfb49c..555db1871ec9f 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -721,7 +721,9 @@ static void xgbe_stop_timers(struct xgbe_prv_data *pdata) @@ -183494,7 +222283,17 @@ index 17a585adfb49c..e6883d52d230c 100644 } } -@@ -2555,6 +2557,14 @@ read_again: +@@ -1062,6 +1064,9 @@ static void xgbe_free_irqs(struct xgbe_prv_data *pdata) + + devm_free_irq(pdata->dev, pdata->dev_irq, pdata); + ++ tasklet_kill(&pdata->tasklet_dev); ++ tasklet_kill(&pdata->tasklet_ecc); ++ + if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq)) + devm_free_irq(pdata->dev, pdata->ecc_irq, pdata); + +@@ -2555,6 +2560,14 @@ read_again: buf2_len = xgbe_rx_buf2_len(rdata, packet, len); len += buf2_len; @@ -183509,7 +222308,7 @@ index 17a585adfb49c..e6883d52d230c 100644 if (!skb) { skb = xgbe_create_skb(pdata, napi, rdata, buf1_len); -@@ -2584,8 +2594,10 @@ skip_data: +@@ -2584,8 +2597,10 @@ skip_data: if (!last || context_next) goto read_again; @@ -183521,6 +222320,86 @@ index 17a585adfb49c..e6883d52d230c 100644 /* Be sure we don't exceed the configured MTU */ max_len = netdev->mtu + ETH_HLEN; +diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c +index 22d4fc547a0a3..a9ccc4258ee50 100644 +--- a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c +@@ -447,8 +447,10 @@ static void xgbe_i2c_stop(struct xgbe_prv_data *pdata) + xgbe_i2c_disable(pdata); + xgbe_i2c_clear_all_interrupts(pdata); + +- if (pdata->dev_irq != pdata->i2c_irq) ++ if (pdata->dev_irq != pdata->i2c_irq) { + devm_free_irq(pdata->dev, pdata->i2c_irq, pdata); ++ tasklet_kill(&pdata->tasklet_i2c); ++ } + } + + static int xgbe_i2c_start(struct xgbe_prv_data *pdata) +diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +index 4e97b48695220..43fdd111235a6 100644 +--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +@@ -496,6 +496,7 @@ static enum xgbe_an xgbe_an73_tx_training(struct xgbe_prv_data *pdata, + reg |= XGBE_KR_TRAINING_ENABLE; + reg |= XGBE_KR_TRAINING_START; + XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg); ++ pdata->kr_start_time = jiffies; + + netif_dbg(pdata, link, pdata->netdev, + "KR training initiated\n"); +@@ -632,6 +633,8 @@ static enum xgbe_an xgbe_an73_incompat_link(struct xgbe_prv_data *pdata) + + xgbe_switch_mode(pdata); + ++ pdata->an_result = XGBE_AN_READY; ++ + xgbe_an_restart(pdata); + + return XGBE_AN_INCOMPAT_LINK; +@@ -1275,9 +1278,30 @@ static bool xgbe_phy_aneg_done(struct xgbe_prv_data *pdata) + static void xgbe_check_link_timeout(struct xgbe_prv_data *pdata) + { + unsigned long link_timeout; ++ unsigned long kr_time; ++ int wait; + + link_timeout = pdata->link_check + (XGBE_LINK_TIMEOUT * HZ); + if (time_after(jiffies, link_timeout)) { ++ if ((xgbe_cur_mode(pdata) == XGBE_MODE_KR) && ++ pdata->phy.autoneg == AUTONEG_ENABLE) { ++ /* AN restart should not happen while KR training is in progress. ++ * The while loop ensures no AN restart during KR training, ++ * waits up to 500ms and AN restart is triggered only if KR ++ * training is failed. ++ */ ++ wait = XGBE_KR_TRAINING_WAIT_ITER; ++ while (wait--) { ++ kr_time = pdata->kr_start_time + ++ msecs_to_jiffies(XGBE_AN_MS_TIMEOUT); ++ if (time_after(jiffies, kr_time)) ++ break; ++ /* AN restart is not required, if AN result is COMPLETE */ ++ if (pdata->an_result == XGBE_AN_COMPLETE) ++ return; ++ usleep_range(10000, 11000); ++ } ++ } + netif_dbg(pdata, link, pdata->netdev, "AN link timeout\n"); + xgbe_phy_config_aneg(pdata); + } +@@ -1390,8 +1414,10 @@ static void xgbe_phy_stop(struct xgbe_prv_data *pdata) + /* Disable auto-negotiation */ + xgbe_an_disable_all(pdata); + +- if (pdata->dev_irq != pdata->an_irq) ++ if (pdata->dev_irq != pdata->an_irq) { + devm_free_irq(pdata->dev, pdata->an_irq, pdata); ++ tasklet_kill(&pdata->tasklet_an); ++ } + + pdata->phy_if.phy_impl.stop(pdata); + diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c index 90cb55eb54665..014513ce00a14 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c @@ -183536,9 +222415,99 @@ index 90cb55eb54665..014513ce00a14 100644 } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c -index 18e48b3bc402b..213769054391c 100644 +index 18e48b3bc402b..97e32c0490f8a 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +@@ -189,6 +189,7 @@ enum xgbe_sfp_cable { + XGBE_SFP_CABLE_UNKNOWN = 0, + XGBE_SFP_CABLE_ACTIVE, + XGBE_SFP_CABLE_PASSIVE, ++ XGBE_SFP_CABLE_FIBER, + }; + + enum xgbe_sfp_base { +@@ -236,9 +237,7 @@ enum xgbe_sfp_speed { + + #define XGBE_SFP_BASE_BR 12 + #define XGBE_SFP_BASE_BR_1GBE_MIN 0x0a +-#define XGBE_SFP_BASE_BR_1GBE_MAX 0x0d + #define XGBE_SFP_BASE_BR_10GBE_MIN 0x64 +-#define XGBE_SFP_BASE_BR_10GBE_MAX 0x68 + + #define XGBE_SFP_BASE_CU_CABLE_LEN 18 + +@@ -284,6 +283,8 @@ struct xgbe_sfp_eeprom { + #define XGBE_BEL_FUSE_VENDOR "BEL-FUSE " + #define XGBE_BEL_FUSE_PARTNO "1GBT-SFP06 " + ++#define XGBE_MOLEX_VENDOR "Molex Inc. " ++ + struct xgbe_sfp_ascii { + union { + char vendor[XGBE_SFP_BASE_VENDOR_NAME_LEN + 1]; +@@ -823,25 +824,22 @@ static void xgbe_phy_sfp_phy_settings(struct xgbe_prv_data *pdata) + static bool xgbe_phy_sfp_bit_rate(struct xgbe_sfp_eeprom *sfp_eeprom, + enum xgbe_sfp_speed sfp_speed) + { +- u8 *sfp_base, min, max; ++ u8 *sfp_base, min; + + sfp_base = sfp_eeprom->base; + + switch (sfp_speed) { + case XGBE_SFP_SPEED_1000: + min = XGBE_SFP_BASE_BR_1GBE_MIN; +- max = XGBE_SFP_BASE_BR_1GBE_MAX; + break; + case XGBE_SFP_SPEED_10000: + min = XGBE_SFP_BASE_BR_10GBE_MIN; +- max = XGBE_SFP_BASE_BR_10GBE_MAX; + break; + default: + return false; + } + +- return ((sfp_base[XGBE_SFP_BASE_BR] >= min) && +- (sfp_base[XGBE_SFP_BASE_BR] <= max)); ++ return sfp_base[XGBE_SFP_BASE_BR] >= min; + } + + static void xgbe_phy_free_phy_device(struct xgbe_prv_data *pdata) +@@ -1142,16 +1140,21 @@ static void xgbe_phy_sfp_parse_eeprom(struct xgbe_prv_data *pdata) + phy_data->sfp_tx_fault = xgbe_phy_check_sfp_tx_fault(phy_data); + phy_data->sfp_rx_los = xgbe_phy_check_sfp_rx_los(phy_data); + +- /* Assume ACTIVE cable unless told it is PASSIVE */ ++ /* Assume FIBER cable unless told otherwise */ + if (sfp_base[XGBE_SFP_BASE_CABLE] & XGBE_SFP_BASE_CABLE_PASSIVE) { + phy_data->sfp_cable = XGBE_SFP_CABLE_PASSIVE; + phy_data->sfp_cable_len = sfp_base[XGBE_SFP_BASE_CU_CABLE_LEN]; +- } else { ++ } else if (sfp_base[XGBE_SFP_BASE_CABLE] & XGBE_SFP_BASE_CABLE_ACTIVE) { + phy_data->sfp_cable = XGBE_SFP_CABLE_ACTIVE; ++ } else { ++ phy_data->sfp_cable = XGBE_SFP_CABLE_FIBER; + } + + /* Determine the type of SFP */ +- if (sfp_base[XGBE_SFP_BASE_10GBE_CC] & XGBE_SFP_BASE_10GBE_CC_SR) ++ if (phy_data->sfp_cable != XGBE_SFP_CABLE_FIBER && ++ xgbe_phy_sfp_bit_rate(sfp_eeprom, XGBE_SFP_SPEED_10000)) ++ phy_data->sfp_base = XGBE_SFP_BASE_10000_CR; ++ else if (sfp_base[XGBE_SFP_BASE_10GBE_CC] & XGBE_SFP_BASE_10GBE_CC_SR) + phy_data->sfp_base = XGBE_SFP_BASE_10000_SR; + else if (sfp_base[XGBE_SFP_BASE_10GBE_CC] & XGBE_SFP_BASE_10GBE_CC_LR) + phy_data->sfp_base = XGBE_SFP_BASE_10000_LR; +@@ -1167,9 +1170,6 @@ static void xgbe_phy_sfp_parse_eeprom(struct xgbe_prv_data *pdata) + phy_data->sfp_base = XGBE_SFP_BASE_1000_CX; + else if (sfp_base[XGBE_SFP_BASE_1GBE_CC] & XGBE_SFP_BASE_1GBE_CC_T) + phy_data->sfp_base = XGBE_SFP_BASE_1000_T; +- else if ((phy_data->sfp_cable == XGBE_SFP_CABLE_PASSIVE) && +- xgbe_phy_sfp_bit_rate(sfp_eeprom, XGBE_SFP_SPEED_10000)) +- phy_data->sfp_base = XGBE_SFP_BASE_10000_CR; + + switch (phy_data->sfp_base) { + case XGBE_SFP_BASE_1000_T: @@ -1977,12 +1977,26 @@ static void xgbe_phy_rx_reset(struct xgbe_prv_data *pdata) } } @@ -183586,8 +222555,28 @@ index 18e48b3bc402b..213769054391c 100644 } static void xgbe_phy_rrc(struct xgbe_prv_data *pdata) +diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h +index 3305979a9f7c1..e0b8f3c4cc0b2 100644 +--- a/drivers/net/ethernet/amd/xgbe/xgbe.h ++++ b/drivers/net/ethernet/amd/xgbe/xgbe.h +@@ -289,6 +289,7 @@ + /* Auto-negotiation */ + #define XGBE_AN_MS_TIMEOUT 500 + #define XGBE_LINK_TIMEOUT 5 ++#define XGBE_KR_TRAINING_WAIT_ITER 50 + + #define XGBE_SGMII_AN_LINK_STATUS BIT(1) + #define XGBE_SGMII_AN_LINK_SPEED (BIT(2) | BIT(3)) +@@ -1253,6 +1254,7 @@ struct xgbe_prv_data { + unsigned int parallel_detect; + unsigned int fec_ability; + unsigned long an_start; ++ unsigned long kr_start_time; + enum xgbe_an_mode an_mode; + + /* I2C support */ diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c -index 5f1fc6582d74a..78c7cbc372b05 100644 +index 5f1fc6582d74a..71151f675a498 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -696,6 +696,12 @@ static int xgene_enet_rx_frame(struct xgene_enet_desc_ring *rx_ring, @@ -183617,6 +222606,44 @@ index 5f1fc6582d74a..78c7cbc372b05 100644 slots = page_pool->slots - 1; head = page_pool->head; +@@ -1002,8 +1004,10 @@ static int xgene_enet_open(struct net_device *ndev) + + xgene_enet_napi_enable(pdata); + ret = xgene_enet_register_irq(ndev); +- if (ret) ++ if (ret) { ++ xgene_enet_napi_disable(pdata); + return ret; ++ } + + if (ndev->phydev) { + phy_start(ndev->phydev); +diff --git a/drivers/net/ethernet/apple/bmac.c b/drivers/net/ethernet/apple/bmac.c +index a989d2df59ad0..7a966361d83f7 100644 +--- a/drivers/net/ethernet/apple/bmac.c ++++ b/drivers/net/ethernet/apple/bmac.c +@@ -1511,7 +1511,7 @@ static void bmac_tx_timeout(struct timer_list *t) + i = bp->tx_empty; + ++dev->stats.tx_errors; + if (i != bp->tx_fill) { +- dev_kfree_skb(bp->tx_bufs[i]); ++ dev_kfree_skb_irq(bp->tx_bufs[i]); + bp->tx_bufs[i] = NULL; + if (++i >= N_TX_RING) i = 0; + bp->tx_empty = i; +diff --git a/drivers/net/ethernet/apple/mace.c b/drivers/net/ethernet/apple/mace.c +index 4b80e3a52a199..44037e9e197fa 100644 +--- a/drivers/net/ethernet/apple/mace.c ++++ b/drivers/net/ethernet/apple/mace.c +@@ -841,7 +841,7 @@ static void mace_tx_timeout(struct timer_list *t) + if (mp->tx_bad_runt) { + mp->tx_bad_runt = 0; + } else if (i != mp->tx_fill) { +- dev_kfree_skb(mp->tx_bufs[i]); ++ dev_kfree_skb_irq(mp->tx_bufs[i]); + if (++i >= N_TX_RING) + i = 0; + mp->tx_empty = i; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_common.h b/drivers/net/ethernet/aquantia/atlantic/aq_common.h index 23b2d390fcdda..ace691d7cd759 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_common.h @@ -183667,6 +222694,36 @@ index 23b2d390fcdda..ace691d7cd759 100644 #define AQ_NIC_RATE_EEE_MSK (AQ_NIC_RATE_EEE_10G |\ AQ_NIC_RATE_EEE_5G |\ AQ_NIC_RATE_EEE_2G5 |\ +diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c +index a9ef0544e30f0..715859cb6560a 100644 +--- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c ++++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c +@@ -13,6 +13,7 @@ + #include "aq_ptp.h" + #include "aq_filters.h" + #include "aq_macsec.h" ++#include "aq_main.h" + + #include <linux/ptp_clock_kernel.h> + +@@ -845,7 +846,7 @@ static int aq_set_ringparam(struct net_device *ndev, + + if (netif_running(ndev)) { + ndev_running = true; +- dev_close(ndev); ++ aq_ndev_close(ndev); + } + + cfg->rxds = max(ring->rx_pending, hw_caps->rxds_min); +@@ -861,7 +862,7 @@ static int aq_set_ringparam(struct net_device *ndev, + goto err_exit; + + if (ndev_running) +- err = dev_open(ndev, NULL); ++ err = aq_ndev_open(ndev); + + err_exit: + return err; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h index bed481816ea31..7442850ca95f0 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h @@ -183680,11 +222737,227 @@ index bed481816ea31..7442850ca95f0 100644 u64 uprc; u64 mprc; u64 bprc; +diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c +index 4a6dfac857ca9..ee823a18294cd 100644 +--- a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c ++++ b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c +@@ -585,6 +585,7 @@ static int aq_update_txsa(struct aq_nic_s *nic, const unsigned int sc_idx, + + ret = aq_mss_set_egress_sakey_record(hw, &key_rec, sa_idx); + ++ memzero_explicit(&key_rec, sizeof(key_rec)); + return ret; + } + +@@ -932,6 +933,7 @@ static int aq_update_rxsa(struct aq_nic_s *nic, const unsigned int sc_idx, + + ret = aq_mss_set_ingress_sakey_record(hw, &sa_key_record, sa_idx); + ++ memzero_explicit(&sa_key_record, sizeof(sa_key_record)); + return ret; + } + +@@ -1451,26 +1453,57 @@ static void aq_check_txsa_expiration(struct aq_nic_s *nic) + egress_sa_threshold_expired); + } + ++#define AQ_LOCKED_MDO_DEF(mdo) \ ++static int aq_locked_mdo_##mdo(struct macsec_context *ctx) \ ++{ \ ++ struct aq_nic_s *nic = netdev_priv(ctx->netdev); \ ++ int ret; \ ++ mutex_lock(&nic->macsec_mutex); \ ++ ret = aq_mdo_##mdo(ctx); \ ++ mutex_unlock(&nic->macsec_mutex); \ ++ return ret; \ ++} ++ ++AQ_LOCKED_MDO_DEF(dev_open) ++AQ_LOCKED_MDO_DEF(dev_stop) ++AQ_LOCKED_MDO_DEF(add_secy) ++AQ_LOCKED_MDO_DEF(upd_secy) ++AQ_LOCKED_MDO_DEF(del_secy) ++AQ_LOCKED_MDO_DEF(add_rxsc) ++AQ_LOCKED_MDO_DEF(upd_rxsc) ++AQ_LOCKED_MDO_DEF(del_rxsc) ++AQ_LOCKED_MDO_DEF(add_rxsa) ++AQ_LOCKED_MDO_DEF(upd_rxsa) ++AQ_LOCKED_MDO_DEF(del_rxsa) ++AQ_LOCKED_MDO_DEF(add_txsa) ++AQ_LOCKED_MDO_DEF(upd_txsa) ++AQ_LOCKED_MDO_DEF(del_txsa) ++AQ_LOCKED_MDO_DEF(get_dev_stats) ++AQ_LOCKED_MDO_DEF(get_tx_sc_stats) ++AQ_LOCKED_MDO_DEF(get_tx_sa_stats) ++AQ_LOCKED_MDO_DEF(get_rx_sc_stats) ++AQ_LOCKED_MDO_DEF(get_rx_sa_stats) ++ + const struct macsec_ops aq_macsec_ops = { +- .mdo_dev_open = aq_mdo_dev_open, +- .mdo_dev_stop = aq_mdo_dev_stop, +- .mdo_add_secy = aq_mdo_add_secy, +- .mdo_upd_secy = aq_mdo_upd_secy, +- .mdo_del_secy = aq_mdo_del_secy, +- .mdo_add_rxsc = aq_mdo_add_rxsc, +- .mdo_upd_rxsc = aq_mdo_upd_rxsc, +- .mdo_del_rxsc = aq_mdo_del_rxsc, +- .mdo_add_rxsa = aq_mdo_add_rxsa, +- .mdo_upd_rxsa = aq_mdo_upd_rxsa, +- .mdo_del_rxsa = aq_mdo_del_rxsa, +- .mdo_add_txsa = aq_mdo_add_txsa, +- .mdo_upd_txsa = aq_mdo_upd_txsa, +- .mdo_del_txsa = aq_mdo_del_txsa, +- .mdo_get_dev_stats = aq_mdo_get_dev_stats, +- .mdo_get_tx_sc_stats = aq_mdo_get_tx_sc_stats, +- .mdo_get_tx_sa_stats = aq_mdo_get_tx_sa_stats, +- .mdo_get_rx_sc_stats = aq_mdo_get_rx_sc_stats, +- .mdo_get_rx_sa_stats = aq_mdo_get_rx_sa_stats, ++ .mdo_dev_open = aq_locked_mdo_dev_open, ++ .mdo_dev_stop = aq_locked_mdo_dev_stop, ++ .mdo_add_secy = aq_locked_mdo_add_secy, ++ .mdo_upd_secy = aq_locked_mdo_upd_secy, ++ .mdo_del_secy = aq_locked_mdo_del_secy, ++ .mdo_add_rxsc = aq_locked_mdo_add_rxsc, ++ .mdo_upd_rxsc = aq_locked_mdo_upd_rxsc, ++ .mdo_del_rxsc = aq_locked_mdo_del_rxsc, ++ .mdo_add_rxsa = aq_locked_mdo_add_rxsa, ++ .mdo_upd_rxsa = aq_locked_mdo_upd_rxsa, ++ .mdo_del_rxsa = aq_locked_mdo_del_rxsa, ++ .mdo_add_txsa = aq_locked_mdo_add_txsa, ++ .mdo_upd_txsa = aq_locked_mdo_upd_txsa, ++ .mdo_del_txsa = aq_locked_mdo_del_txsa, ++ .mdo_get_dev_stats = aq_locked_mdo_get_dev_stats, ++ .mdo_get_tx_sc_stats = aq_locked_mdo_get_tx_sc_stats, ++ .mdo_get_tx_sa_stats = aq_locked_mdo_get_tx_sa_stats, ++ .mdo_get_rx_sc_stats = aq_locked_mdo_get_rx_sc_stats, ++ .mdo_get_rx_sa_stats = aq_locked_mdo_get_rx_sa_stats, + }; + + int aq_macsec_init(struct aq_nic_s *nic) +@@ -1492,6 +1525,7 @@ int aq_macsec_init(struct aq_nic_s *nic) + + nic->ndev->features |= NETIF_F_HW_MACSEC; + nic->ndev->macsec_ops = &aq_macsec_ops; ++ mutex_init(&nic->macsec_mutex); + + return 0; + } +@@ -1515,7 +1549,7 @@ int aq_macsec_enable(struct aq_nic_s *nic) + if (!nic->macsec_cfg) + return 0; + +- rtnl_lock(); ++ mutex_lock(&nic->macsec_mutex); + + if (nic->aq_fw_ops->send_macsec_req) { + struct macsec_cfg_request cfg = { 0 }; +@@ -1564,7 +1598,7 @@ int aq_macsec_enable(struct aq_nic_s *nic) + ret = aq_apply_macsec_cfg(nic); + + unlock: +- rtnl_unlock(); ++ mutex_unlock(&nic->macsec_mutex); + return ret; + } + +@@ -1576,9 +1610,9 @@ void aq_macsec_work(struct aq_nic_s *nic) + if (!netif_carrier_ok(nic->ndev)) + return; + +- rtnl_lock(); ++ mutex_lock(&nic->macsec_mutex); + aq_check_txsa_expiration(nic); +- rtnl_unlock(); ++ mutex_unlock(&nic->macsec_mutex); + } + + int aq_macsec_rx_sa_cnt(struct aq_nic_s *nic) +@@ -1589,21 +1623,30 @@ int aq_macsec_rx_sa_cnt(struct aq_nic_s *nic) + if (!cfg) + return 0; + ++ mutex_lock(&nic->macsec_mutex); ++ + for (i = 0; i < AQ_MACSEC_MAX_SC; i++) { + if (!test_bit(i, &cfg->rxsc_idx_busy)) + continue; + cnt += hweight_long(cfg->aq_rxsc[i].rx_sa_idx_busy); + } + ++ mutex_unlock(&nic->macsec_mutex); + return cnt; + } + + int aq_macsec_tx_sc_cnt(struct aq_nic_s *nic) + { ++ int cnt; ++ + if (!nic->macsec_cfg) + return 0; + +- return hweight_long(nic->macsec_cfg->txsc_idx_busy); ++ mutex_lock(&nic->macsec_mutex); ++ cnt = hweight_long(nic->macsec_cfg->txsc_idx_busy); ++ mutex_unlock(&nic->macsec_mutex); ++ ++ return cnt; + } + + int aq_macsec_tx_sa_cnt(struct aq_nic_s *nic) +@@ -1614,12 +1657,15 @@ int aq_macsec_tx_sa_cnt(struct aq_nic_s *nic) + if (!cfg) + return 0; + ++ mutex_lock(&nic->macsec_mutex); ++ + for (i = 0; i < AQ_MACSEC_MAX_SC; i++) { + if (!test_bit(i, &cfg->txsc_idx_busy)) + continue; + cnt += hweight_long(cfg->aq_txsc[i].tx_sa_idx_busy); + } + ++ mutex_unlock(&nic->macsec_mutex); + return cnt; + } + +@@ -1691,6 +1737,8 @@ u64 *aq_macsec_get_stats(struct aq_nic_s *nic, u64 *data) + if (!cfg) + return data; + ++ mutex_lock(&nic->macsec_mutex); ++ + aq_macsec_update_stats(nic); + + common_stats = &cfg->stats; +@@ -1773,5 +1821,7 @@ u64 *aq_macsec_get_stats(struct aq_nic_s *nic, u64 *data) + + data += i; + ++ mutex_unlock(&nic->macsec_mutex); ++ + return data; + } diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c -index e22935ce95730..f069312463fb8 100644 +index e22935ce95730..45ed097bfe49a 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c -@@ -89,11 +89,8 @@ static int aq_ndev_close(struct net_device *ndev) +@@ -53,7 +53,7 @@ struct net_device *aq_ndev_alloc(void) + return ndev; + } + +-static int aq_ndev_open(struct net_device *ndev) ++int aq_ndev_open(struct net_device *ndev) + { + struct aq_nic_s *aq_nic = netdev_priv(ndev); + int err = 0; +@@ -83,17 +83,14 @@ err_exit: + return err; + } + +-static int aq_ndev_close(struct net_device *ndev) ++int aq_ndev_close(struct net_device *ndev) + { + struct aq_nic_s *aq_nic = netdev_priv(ndev); int err = 0; err = aq_nic_stop(aq_nic); @@ -183696,6 +222969,18 @@ index e22935ce95730..f069312463fb8 100644 return err; } +diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.h b/drivers/net/ethernet/aquantia/atlantic/aq_main.h +index a5a624b9ce733..2a562ab7a5afd 100644 +--- a/drivers/net/ethernet/aquantia/atlantic/aq_main.h ++++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.h +@@ -14,5 +14,7 @@ + + void aq_ndev_schedule_work(struct work_struct *work); + struct net_device *aq_ndev_alloc(void); ++int aq_ndev_open(struct net_device *ndev); ++int aq_ndev_close(struct net_device *ndev); + + #endif /* AQ_MAIN_H */ diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 6c049864dac08..ea2e7cd8946da 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -183798,6 +223083,19 @@ index 6c049864dac08..ea2e7cd8946da 100644 aq_ptp_ring_stop(self); +diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h +index 1a7148041e3dc..b7f7d6f66633f 100644 +--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h ++++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h +@@ -154,6 +154,8 @@ struct aq_nic_s { + struct mutex fwreq_mutex; + #if IS_ENABLED(CONFIG_MACSEC) + struct aq_macsec_cfg *macsec_cfg; ++ /* mutex to protect data in macsec_cfg */ ++ struct mutex macsec_mutex; + #endif + /* PTP support */ + struct aq_ptp_s *aq_ptp; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index d4b1976ee69b9..8647125d60aef 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -184435,6 +223733,53 @@ index dd259c8f2f4f3..58d426dda3edb 100644 } int hw_atl2_utils_get_action_resolve_table_caps(struct aq_hw_s *self, +diff --git a/drivers/net/ethernet/aquantia/atlantic/macsec/macsec_api.c b/drivers/net/ethernet/aquantia/atlantic/macsec/macsec_api.c +index 36c7cf05630a1..4319249595207 100644 +--- a/drivers/net/ethernet/aquantia/atlantic/macsec/macsec_api.c ++++ b/drivers/net/ethernet/aquantia/atlantic/macsec/macsec_api.c +@@ -757,6 +757,7 @@ set_ingress_sakey_record(struct aq_hw_s *hw, + u16 table_index) + { + u16 packed_record[18]; ++ int ret; + + if (table_index >= NUMROWS_INGRESSSAKEYRECORD) + return -EINVAL; +@@ -789,9 +790,12 @@ set_ingress_sakey_record(struct aq_hw_s *hw, + + packed_record[16] = rec->key_len & 0x3; + +- return set_raw_ingress_record(hw, packed_record, 18, 2, +- ROWOFFSET_INGRESSSAKEYRECORD + +- table_index); ++ ret = set_raw_ingress_record(hw, packed_record, 18, 2, ++ ROWOFFSET_INGRESSSAKEYRECORD + ++ table_index); ++ ++ memzero_explicit(packed_record, sizeof(packed_record)); ++ return ret; + } + + int aq_mss_set_ingress_sakey_record(struct aq_hw_s *hw, +@@ -1739,14 +1743,14 @@ static int set_egress_sakey_record(struct aq_hw_s *hw, + ret = set_raw_egress_record(hw, packed_record, 8, 2, + ROWOFFSET_EGRESSSAKEYRECORD + table_index); + if (unlikely(ret)) +- return ret; ++ goto clear_key; + ret = set_raw_egress_record(hw, packed_record + 8, 8, 2, + ROWOFFSET_EGRESSSAKEYRECORD + table_index - + 32); +- if (unlikely(ret)) +- return ret; + +- return 0; ++clear_key: ++ memzero_explicit(packed_record, sizeof(packed_record)); ++ return ret; + } + + int aq_mss_set_egress_sakey_record(struct aq_hw_s *hw, diff --git a/drivers/net/ethernet/arc/Kconfig b/drivers/net/ethernet/arc/Kconfig index 92a79c4ffa2c7..0a67612af2281 100644 --- a/drivers/net/ethernet/arc/Kconfig @@ -184458,10 +223803,27 @@ index 92a79c4ffa2c7..0a67612af2281 100644 help Support for Rockchip RK3036/RK3066/RK3188 EMAC ethernet controllers. diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c -index 02ae98aabf91c..416a5c99db5a2 100644 +index 02ae98aabf91c..7295244b78d07 100644 --- a/drivers/net/ethernet/atheros/ag71xx.c +++ b/drivers/net/ethernet/atheros/ag71xx.c -@@ -1915,15 +1915,12 @@ static int ag71xx_probe(struct platform_device *pdev) +@@ -1480,7 +1480,7 @@ static int ag71xx_open(struct net_device *ndev) + if (ret) { + netif_err(ag, link, ndev, "phylink_of_phy_connect filed with err: %i\n", + ret); +- goto err; ++ return ret; + } + + max_frame_len = ag71xx_max_frame_len(ndev->mtu); +@@ -1501,6 +1501,7 @@ static int ag71xx_open(struct net_device *ndev) + + err: + ag71xx_rings_cleanup(ag); ++ phylink_disconnect_phy(ag->phylink); + return ret; + } + +@@ -1915,15 +1916,12 @@ static int ag71xx_probe(struct platform_device *pdev) ag->mac_reset = devm_reset_control_get(&pdev->dev, "mac"); if (IS_ERR(ag->mac_reset)) { netif_err(ag, probe, ndev, "missing mac reset\n"); @@ -184480,7 +223842,7 @@ index 02ae98aabf91c..416a5c99db5a2 100644 ndev->irq = platform_get_irq(pdev, 0); err = devm_request_irq(&pdev->dev, ndev->irq, ag71xx_interrupt, -@@ -1931,7 +1928,7 @@ static int ag71xx_probe(struct platform_device *pdev) +@@ -1931,7 +1929,7 @@ static int ag71xx_probe(struct platform_device *pdev) if (err) { netif_err(ag, probe, ndev, "unable to request IRQ %d\n", ndev->irq); @@ -184489,7 +223851,7 @@ index 02ae98aabf91c..416a5c99db5a2 100644 } ndev->netdev_ops = &ag71xx_netdev_ops; -@@ -1959,10 +1956,8 @@ static int ag71xx_probe(struct platform_device *pdev) +@@ -1959,10 +1957,8 @@ static int ag71xx_probe(struct platform_device *pdev) ag->stop_desc = dmam_alloc_coherent(&pdev->dev, sizeof(struct ag71xx_desc), &ag->stop_desc_dma, GFP_KERNEL); @@ -184502,7 +223864,7 @@ index 02ae98aabf91c..416a5c99db5a2 100644 ag->stop_desc->data = 0; ag->stop_desc->ctrl = 0; -@@ -1977,7 +1972,7 @@ static int ag71xx_probe(struct platform_device *pdev) +@@ -1977,7 +1973,7 @@ static int ag71xx_probe(struct platform_device *pdev) err = of_get_phy_mode(np, &ag->phy_if_mode); if (err) { netif_err(ag, probe, ndev, "missing phy-mode property in DT\n"); @@ -184511,7 +223873,7 @@ index 02ae98aabf91c..416a5c99db5a2 100644 } netif_napi_add(ndev, &ag->napi, ag71xx_poll, AG71XX_NAPI_WEIGHT); -@@ -1985,7 +1980,7 @@ static int ag71xx_probe(struct platform_device *pdev) +@@ -1985,7 +1981,7 @@ static int ag71xx_probe(struct platform_device *pdev) err = clk_prepare_enable(ag->clk_eth); if (err) { netif_err(ag, probe, ndev, "Failed to enable eth clk.\n"); @@ -184520,7 +223882,7 @@ index 02ae98aabf91c..416a5c99db5a2 100644 } ag71xx_wr(ag, AG71XX_REG_MAC_CFG1, 0); -@@ -2021,8 +2016,6 @@ err_mdio_remove: +@@ -2021,8 +2017,6 @@ err_mdio_remove: ag71xx_mdio_remove(ag); err_put_clk: clk_disable_unprepare(ag->clk_eth); @@ -184590,6 +223952,18 @@ index 3b51b172b3172..5cbd815c737e7 100644 /* Zero out Tx-buffers */ memset(tpd_ring->desc, 0, sizeof(struct atl1c_tpd_desc) * +diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig +index 56e0fb07aec7f..cd1706909044d 100644 +--- a/drivers/net/ethernet/broadcom/Kconfig ++++ b/drivers/net/ethernet/broadcom/Kconfig +@@ -71,6 +71,7 @@ config BCM63XX_ENET + config BCMGENET + tristate "Broadcom GENET internal MAC support" + depends on HAS_IOMEM ++ depends on PTP_1588_CLOCK_OPTIONAL || !ARCH_BCM2835 + select MII + select PHYLIB + select FIXED_PHY diff --git a/drivers/net/ethernet/broadcom/Makefile b/drivers/net/ethernet/broadcom/Makefile index 0ddfb5b5d53ca..2e6c5f258a1ff 100644 --- a/drivers/net/ethernet/broadcom/Makefile @@ -184604,10 +223978,39 @@ index 0ddfb5b5d53ca..2e6c5f258a1ff 100644 +CFLAGS_tg3.o += -Wno-array-bounds +endif diff --git a/drivers/net/ethernet/broadcom/bcm4908_enet.c b/drivers/net/ethernet/broadcom/bcm4908_enet.c -index 02a569500234c..376f81796a293 100644 +index 02a569500234c..7e89664943ceb 100644 --- a/drivers/net/ethernet/broadcom/bcm4908_enet.c +++ b/drivers/net/ethernet/broadcom/bcm4908_enet.c -@@ -708,7 +708,9 @@ static int bcm4908_enet_probe(struct platform_device *pdev) +@@ -561,8 +561,6 @@ static int bcm4908_enet_start_xmit(struct sk_buff *skb, struct net_device *netde + + if (++ring->write_idx == ring->length - 1) + ring->write_idx = 0; +- enet->netdev->stats.tx_bytes += skb->len; +- enet->netdev->stats.tx_packets++; + + return NETDEV_TX_OK; + } +@@ -646,13 +644,17 @@ static int bcm4908_enet_poll_tx(struct napi_struct *napi, int weight) + + dma_unmap_single(dev, slot->dma_addr, slot->len, DMA_TO_DEVICE); + dev_kfree_skb(slot->skb); ++ ++ handled++; + bytes += slot->len; ++ + if (++tx_ring->read_idx == tx_ring->length) + tx_ring->read_idx = 0; +- +- handled++; + } + ++ enet->netdev->stats.tx_packets += handled; ++ enet->netdev->stats.tx_bytes += bytes; ++ + if (handled < weight) { + napi_complete_done(napi, handled); + bcm4908_enet_dma_ring_intrs_on(enet, tx_ring); +@@ -708,7 +710,9 @@ static int bcm4908_enet_probe(struct platform_device *pdev) enet->irq_tx = platform_get_irq_byname(pdev, "tx"); @@ -184619,7 +224022,7 @@ index 02a569500234c..376f81796a293 100644 err = bcm4908_enet_dma_alloc(enet); if (err) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c -index 7fa1b695400d7..ae541a9d1eeed 100644 +index 7fa1b695400d7..4c7f828c69c6b 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1309,11 +1309,11 @@ static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb, @@ -184646,7 +224049,17 @@ index 7fa1b695400d7..ae541a9d1eeed 100644 /* Check ring space and update SW control flow */ if (ring->desc_count == 0) -@@ -2013,6 +2015,7 @@ static int bcm_sysport_open(struct net_device *dev) +@@ -1989,6 +1991,9 @@ static int bcm_sysport_open(struct net_device *dev) + goto out_clk_disable; + } + ++ /* Indicate that the MAC is responsible for PHY PM */ ++ phydev->mac_managed_pm = true; ++ + /* Reset house keeping link status */ + priv->old_duplex = -1; + priv->old_link = -1; +@@ -2013,6 +2018,7 @@ static int bcm_sysport_open(struct net_device *dev) } /* Initialize both hardware and software ring */ @@ -184654,7 +224067,7 @@ index 7fa1b695400d7..ae541a9d1eeed 100644 for (i = 0; i < dev->num_tx_queues; i++) { ret = bcm_sysport_init_tx_ring(priv, i); if (ret) { -@@ -2582,8 +2585,10 @@ static int bcm_sysport_probe(struct platform_device *pdev) +@@ -2582,8 +2588,10 @@ static int bcm_sysport_probe(struct platform_device *pdev) device_set_wakeup_capable(&pdev->dev, 1); priv->wol_clk = devm_clk_get_optional(&pdev->dev, "sw_sysportwol"); @@ -184680,9 +224093,25 @@ index 984f76e74b43e..16b73bb9acc78 100644 /* Receive queue */ diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c -index 9513cfb5ba58c..0ce28bc955a4a 100644 +index 9513cfb5ba58c..92453e68d381b 100644 --- a/drivers/net/ethernet/broadcom/bgmac-bcma.c +++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c +@@ -228,12 +228,12 @@ static int bgmac_probe(struct bcma_device *core) + bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST; + bgmac->feature_flags |= BGMAC_FEAT_FLW_CTRL1; + bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_PHY; +- if (ci->pkg == BCMA_PKG_ID_BCM47188 || +- ci->pkg == BCMA_PKG_ID_BCM47186) { ++ if ((ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg == BCMA_PKG_ID_BCM47186) || ++ (ci->id == BCMA_CHIP_ID_BCM53572 && ci->pkg == BCMA_PKG_ID_BCM47188)) { + bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_RGMII; + bgmac->feature_flags |= BGMAC_FEAT_IOST_ATTACHED; + } +- if (ci->pkg == BCMA_PKG_ID_BCM5358) ++ if (ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg == BCMA_PKG_ID_BCM5358) + bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_EPHYRMII; + break; + case BCMA_CHIP_ID_BCM53573: @@ -323,7 +323,6 @@ static void bgmac_remove(struct bcma_device *core) bcma_mdio_mii_unregister(bgmac->mii_bus); bgmac_enet_remove(bgmac); @@ -184735,7 +224164,7 @@ index df8ff839cc621..94eb3a42158e9 100644 bgmac->read = platform_bgmac_read; bgmac->write = platform_bgmac_write; diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c -index fe4d99abd5487..6e8bc67260311 100644 +index fe4d99abd5487..fa2a43d465db7 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -189,8 +189,8 @@ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac, @@ -184748,6 +224177,14 @@ index fe4d99abd5487..6e8bc67260311 100644 wmb(); +@@ -1568,7 +1568,6 @@ void bgmac_enet_remove(struct bgmac *bgmac) + phy_disconnect(bgmac->net_dev->phydev); + netif_napi_del(&bgmac->napi); + bgmac_dma_free(bgmac); +- free_netdev(bgmac->net_dev); + } + EXPORT_SYMBOL_GPL(bgmac_enet_remove); + diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index 8c83973adca57..9d70d908c0646 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c @@ -185036,7 +224473,7 @@ index ae87296ae1ffa..553f3de939574 100644 /* Report UNLOAD_DONE to MCP */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c -index 6fbf735fca31c..5613957314501 100644 +index 6fbf735fca31c..a9f202bbada1b 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -758,9 +758,18 @@ static void bnx2x_vf_igu_reset(struct bnx2x *bp, struct bnx2x_virtf *vf) @@ -185060,6 +224497,31 @@ index 6fbf735fca31c..5613957314501 100644 /* clear vf errors*/ bnx2x_vf_semi_clear_err(bp, abs_vfid); +@@ -786,16 +795,20 @@ static void bnx2x_vf_enable_traffic(struct bnx2x *bp, struct bnx2x_virtf *vf) + + static u8 bnx2x_vf_is_pcie_pending(struct bnx2x *bp, u8 abs_vfid) + { +- struct pci_dev *dev; + struct bnx2x_virtf *vf = bnx2x_vf_by_abs_fid(bp, abs_vfid); ++ struct pci_dev *dev; ++ bool pending; + + if (!vf) + return false; + + dev = pci_get_domain_bus_and_slot(vf->domain, vf->bus, vf->devfn); +- if (dev) +- return bnx2x_is_pcie_pending(dev); +- return false; ++ if (!dev) ++ return false; ++ pending = bnx2x_is_pcie_pending(dev); ++ pci_dev_put(dev); ++ ++ return pending; + } + + int bnx2x_vf_flr_clnup_epilog(struct bnx2x *bp, u8 abs_vfid) diff --git a/drivers/net/ethernet/broadcom/bnxt/Makefile b/drivers/net/ethernet/broadcom/bnxt/Makefile index c6ef7ec2c1151..2bc2b707d6eee 100644 --- a/drivers/net/ethernet/broadcom/bnxt/Makefile @@ -185073,7 +224535,7 @@ index c6ef7ec2c1151..2bc2b707d6eee 100644 bnxt_en-$(CONFIG_BNXT_FLOWER_OFFLOAD) += bnxt_tc.o bnxt_en-$(CONFIG_DEBUG_FS) += bnxt_debugfs.o diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c -index 62f84cc91e4d1..a6ca7ba5276c4 100644 +index 62f84cc91e4d1..f64df4d532896 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -709,7 +709,6 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts) @@ -185187,7 +224649,24 @@ index 62f84cc91e4d1..a6ca7ba5276c4 100644 rc = bnxt_hwrm_set_coal(bp); if (rc) netdev_warn(bp->dev, "HWRM set coalescing failure rc: %x\n", -@@ -9789,7 +9806,8 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) +@@ -9006,10 +9023,14 @@ int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init) + netdev_err(bp->dev, "ring reservation/IRQ init failure rc: %d\n", rc); + return rc; + } +- if (tcs && (bp->tx_nr_rings_per_tc * tcs != bp->tx_nr_rings)) { ++ if (tcs && (bp->tx_nr_rings_per_tc * tcs != ++ bp->tx_nr_rings - bp->tx_nr_rings_xdp)) { + netdev_err(bp->dev, "tx ring reservation failure\n"); + netdev_reset_tc(bp->dev); +- bp->tx_nr_rings_per_tc = bp->tx_nr_rings; ++ if (bp->tx_nr_rings_xdp) ++ bp->tx_nr_rings_per_tc = bp->tx_nr_rings_xdp; ++ else ++ bp->tx_nr_rings_per_tc = bp->tx_nr_rings; + return -ENOMEM; + } + return 0; +@@ -9789,7 +9810,8 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_RESC_CHANGE) resc_reinit = true; @@ -185197,7 +224676,7 @@ index 62f84cc91e4d1..a6ca7ba5276c4 100644 fw_reset = true; else if (bp->fw_health && !bp->fw_health->status_reliable) bnxt_try_map_fw_health_reg(bp); -@@ -10234,6 +10252,12 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) +@@ -10234,6 +10256,12 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) if (irq_re_init) udp_tunnel_nic_reset_ntf(bp->dev); @@ -185210,7 +224689,7 @@ index 62f84cc91e4d1..a6ca7ba5276c4 100644 set_bit(BNXT_STATE_OPEN, &bp->state); bnxt_enable_int(bp); /* Enable TX queues */ -@@ -10289,13 +10313,15 @@ int bnxt_half_open_nic(struct bnxt *bp) +@@ -10289,13 +10317,15 @@ int bnxt_half_open_nic(struct bnxt *bp) goto half_open_err; } @@ -185228,7 +224707,7 @@ index 62f84cc91e4d1..a6ca7ba5276c4 100644 netdev_err(bp->dev, "bnxt_init_nic err: %x\n", rc); goto half_open_err; } -@@ -10303,7 +10329,7 @@ int bnxt_half_open_nic(struct bnxt *bp) +@@ -10303,7 +10333,7 @@ int bnxt_half_open_nic(struct bnxt *bp) half_open_err: bnxt_free_skbs(bp); @@ -185237,7 +224716,7 @@ index 62f84cc91e4d1..a6ca7ba5276c4 100644 dev_close(bp->dev); return rc; } -@@ -10313,9 +10339,10 @@ half_open_err: +@@ -10313,9 +10343,10 @@ half_open_err: */ void bnxt_half_close_nic(struct bnxt *bp) { @@ -185250,7 +224729,7 @@ index 62f84cc91e4d1..a6ca7ba5276c4 100644 } static void bnxt_reenable_sriov(struct bnxt *bp) -@@ -10731,7 +10758,7 @@ static void bnxt_set_rx_mode(struct net_device *dev) +@@ -10731,7 +10762,7 @@ static void bnxt_set_rx_mode(struct net_device *dev) if (dev->flags & IFF_ALLMULTI) { mask |= CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST; vnic->mc_list_count = 0; @@ -185259,7 +224738,7 @@ index 62f84cc91e4d1..a6ca7ba5276c4 100644 mc_update = bnxt_mc_list_updated(bp, &mask); } -@@ -10799,9 +10826,10 @@ skip_uc: +@@ -10799,9 +10830,10 @@ skip_uc: !bnxt_promisc_ok(bp)) vnic->rx_mask &= ~CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS; rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0); @@ -185271,7 +224750,7 @@ index 62f84cc91e4d1..a6ca7ba5276c4 100644 vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST; vnic->mc_list_count = 0; rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0); -@@ -10858,7 +10886,7 @@ static bool bnxt_rfs_capable(struct bnxt *bp) +@@ -10858,7 +10890,7 @@ static bool bnxt_rfs_capable(struct bnxt *bp) if (bp->flags & BNXT_FLAG_CHIP_P5) return bnxt_rfs_supported(bp); @@ -185280,7 +224759,17 @@ index 62f84cc91e4d1..a6ca7ba5276c4 100644 return false; vnics = 1 + bp->rx_nr_rings; -@@ -13064,10 +13092,9 @@ static int bnxt_init_dflt_ring_mode(struct bnxt *bp) +@@ -12577,8 +12609,8 @@ static int bnxt_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, + rcu_read_lock(); + hlist_for_each_entry_rcu(fltr, head, hash) { + if (bnxt_fltr_match(fltr, new_fltr)) { ++ rc = fltr->sw_id; + rcu_read_unlock(); +- rc = 0; + goto err_free; + } + } +@@ -13064,10 +13096,9 @@ static int bnxt_init_dflt_ring_mode(struct bnxt *bp) goto init_dflt_ring_err; bp->tx_nr_rings_per_tc = bp->tx_nr_rings; @@ -185294,7 +224783,7 @@ index 62f84cc91e4d1..a6ca7ba5276c4 100644 init_dflt_ring_err: bnxt_ulp_irq_restart(bp, rc); return rc; -@@ -13370,7 +13397,9 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) +@@ -13370,7 +13401,9 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) } bnxt_inv_fw_health_reg(bp); @@ -185305,7 +224794,7 @@ index 62f84cc91e4d1..a6ca7ba5276c4 100644 rc = register_netdev(dev); if (rc) -@@ -13390,6 +13419,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) +@@ -13390,6 +13423,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) init_err_cleanup: bnxt_dl_unregister(bp); @@ -185313,6 +224802,24 @@ index 62f84cc91e4d1..a6ca7ba5276c4 100644 bnxt_shutdown_tc(bp); bnxt_clear_int_mode(bp); +@@ -13667,8 +13701,16 @@ static struct pci_driver bnxt_pci_driver = { + + static int __init bnxt_init(void) + { ++ int err; ++ + bnxt_debug_init(); +- return pci_register_driver(&bnxt_pci_driver); ++ err = pci_register_driver(&bnxt_pci_driver); ++ if (err) { ++ bnxt_debug_exit(); ++ return err; ++ } ++ ++ return 0; + } + + static void __exit bnxt_exit(void) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 19fe6478e9b4b..e5874c829226e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -185883,7 +225390,7 @@ index d889f240da2b2..406dc655a5fc9 100644 #define NVM_OFF_MSIX_VEC_PER_PF_MIN 114 #define NVM_OFF_IGNORE_ARI 164 diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c -index 7260910e75fb2..0f276ce2d1eb7 100644 +index 7260910e75fb2..586311a271f21 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -25,15 +25,13 @@ @@ -185903,6 +225410,15 @@ index 7260910e75fb2..0f276ce2d1eb7 100644 static u32 bnxt_get_msglevel(struct net_device *dev) { +@@ -134,7 +132,7 @@ static int bnxt_set_coalesce(struct net_device *dev, + } + + reset_coalesce: +- if (netif_running(dev)) { ++ if (test_bit(BNXT_STATE_OPEN, &bp->state)) { + if (update_stats) { + rc = bnxt_close_nic(bp, true, false); + if (!rc) @@ -1945,6 +1943,9 @@ static int bnxt_get_fecparam(struct net_device *dev, case PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_RS272_IEEE_ACTIVE: fec->active_fec |= ETHTOOL_FEC_LLRS; @@ -186724,9 +226240,18 @@ index 89d16c587bb7d..dbd2ede53f946 100644 } diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c -index 5e0e0e70d8014..8aab07419263e 100644 +index 5e0e0e70d8014..50f86bebbc19d 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c +@@ -11176,7 +11176,7 @@ static void tg3_reset_task(struct work_struct *work) + rtnl_lock(); + tg3_full_lock(tp, 0); + +- if (!netif_running(tp->dev)) { ++ if (tp->pcierr_recovery || !netif_running(tp->dev)) { + tg3_flag_clear(tp, RESET_TASK_PENDING); + tg3_full_unlock(tp); + rtnl_unlock(); @@ -18078,16 +18078,20 @@ static void tg3_shutdown(struct pci_dev *pdev) struct net_device *dev = pci_get_drvdata(pdev); struct tg3 *tp = netdev_priv(dev); @@ -186750,8 +226275,28 @@ index 5e0e0e70d8014..8aab07419263e 100644 } /** +@@ -18107,6 +18111,9 @@ static pci_ers_result_t tg3_io_error_detected(struct pci_dev *pdev, + + netdev_info(netdev, "PCI I/O error detected\n"); + ++ /* Want to make sure that the reset task doesn't run */ ++ tg3_reset_task_cancel(tp); ++ + rtnl_lock(); + + /* Could be second call or maybe we don't have netdev yet */ +@@ -18123,9 +18130,6 @@ static pci_ers_result_t tg3_io_error_detected(struct pci_dev *pdev, + + tg3_timer_stop(tp); + +- /* Want to make sure that the reset task doesn't run */ +- tg3_reset_task_cancel(tp); +- + netif_device_detach(netdev); + + /* Clean up software state, even if MMIO is blocked */ diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c -index d13fb1d318215..3ca3f9d0fd9b5 100644 +index d13fb1d318215..906c5bbefaac9 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -35,6 +35,7 @@ @@ -186762,7 +226307,15 @@ index d13fb1d318215..3ca3f9d0fd9b5 100644 #include "macb.h" /* This structure is only used for MACB on SiFive FU540 devices */ -@@ -1155,6 +1156,36 @@ static void macb_tx_error_task(struct work_struct *work) +@@ -879,6 +880,7 @@ static int macb_mii_probe(struct net_device *dev) + + bp->phylink_config.dev = &dev->dev; + bp->phylink_config.type = PHYLINK_NETDEV; ++ bp->phylink_config.mac_managed_pm = true; + + if (bp->phy_interface == PHY_INTERFACE_MODE_SGMII) { + bp->phylink_config.poll_fixed_state = true; +@@ -1155,6 +1157,36 @@ static void macb_tx_error_task(struct work_struct *work) spin_unlock_irqrestore(&bp->lock, flags); } @@ -186799,7 +226352,7 @@ index d13fb1d318215..3ca3f9d0fd9b5 100644 static void macb_tx_interrupt(struct macb_queue *queue) { unsigned int tail; -@@ -1199,8 +1230,8 @@ static void macb_tx_interrupt(struct macb_queue *queue) +@@ -1199,8 +1231,8 @@ static void macb_tx_interrupt(struct macb_queue *queue) /* First, update TX stats if needed */ if (skb) { @@ -186810,7 +226363,7 @@ index d13fb1d318215..3ca3f9d0fd9b5 100644 gem_ptp_do_txstamp(queue, skb, desc) == 0) { /* skb now belongs to timestamp buffer * and will be removed later -@@ -1250,7 +1281,6 @@ static void gem_rx_refill(struct macb_queue *queue) +@@ -1250,7 +1282,6 @@ static void gem_rx_refill(struct macb_queue *queue) /* Make hw descriptor updates visible to CPU */ rmb(); @@ -186818,7 +226371,7 @@ index d13fb1d318215..3ca3f9d0fd9b5 100644 desc = macb_rx_desc(queue, entry); if (!queue->rx_skbuff[entry]) { -@@ -1289,6 +1319,7 @@ static void gem_rx_refill(struct macb_queue *queue) +@@ -1289,6 +1320,7 @@ static void gem_rx_refill(struct macb_queue *queue) dma_wmb(); desc->addr &= ~MACB_BIT(RX_USED); } @@ -186826,7 +226379,7 @@ index d13fb1d318215..3ca3f9d0fd9b5 100644 } /* Make descriptor updates visible to hardware */ -@@ -1606,7 +1637,14 @@ static int macb_poll(struct napi_struct *napi, int budget) +@@ -1606,7 +1638,14 @@ static int macb_poll(struct napi_struct *napi, int budget) if (work_done < budget) { napi_complete_done(napi, work_done); @@ -186842,7 +226395,7 @@ index d13fb1d318215..3ca3f9d0fd9b5 100644 status = macb_readl(bp, RSR); if (status) { if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) -@@ -1614,6 +1652,22 @@ static int macb_poll(struct napi_struct *napi, int budget) +@@ -1614,6 +1653,22 @@ static int macb_poll(struct napi_struct *napi, int budget) napi_reschedule(napi); } else { queue_writel(queue, IER, bp->rx_intr_mask); @@ -186865,7 +226418,7 @@ index d13fb1d318215..3ca3f9d0fd9b5 100644 } } -@@ -1666,6 +1720,7 @@ static void macb_tx_restart(struct macb_queue *queue) +@@ -1666,6 +1721,7 @@ static void macb_tx_restart(struct macb_queue *queue) unsigned int head = queue->tx_head; unsigned int tail = queue->tx_tail; struct macb *bp = queue->bp; @@ -186873,7 +226426,7 @@ index d13fb1d318215..3ca3f9d0fd9b5 100644 if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) queue_writel(queue, ISR, MACB_BIT(TXUBR)); -@@ -1673,6 +1728,13 @@ static void macb_tx_restart(struct macb_queue *queue) +@@ -1673,6 +1729,13 @@ static void macb_tx_restart(struct macb_queue *queue) if (head == tail) return; @@ -186887,7 +226440,7 @@ index d13fb1d318215..3ca3f9d0fd9b5 100644 macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART)); } -@@ -1999,7 +2061,8 @@ static unsigned int macb_tx_map(struct macb *bp, +@@ -1999,7 +2062,8 @@ static unsigned int macb_tx_map(struct macb *bp, ctrl |= MACB_BF(TX_LSO, lso_ctrl); ctrl |= MACB_BF(TX_TCP_SEQ_SRC, seq_ctrl); if ((bp->dev->features & NETIF_F_HW_CSUM) && @@ -186897,7 +226450,14 @@ index d13fb1d318215..3ca3f9d0fd9b5 100644 ctrl |= MACB_BIT(TX_NOCRC); } else /* Only set MSS/MFS on payload descriptors -@@ -2097,7 +2160,7 @@ static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev) +@@ -2090,23 +2154,19 @@ static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev) + bool cloned = skb_cloned(*skb) || skb_header_cloned(*skb) || + skb_is_nonlinear(*skb); + int padlen = ETH_ZLEN - (*skb)->len; +- int headroom = skb_headroom(*skb); + int tailroom = skb_tailroom(*skb); + struct sk_buff *nskb; + u32 fcs; if (!(ndev->features & NETIF_F_HW_CSUM) || !((*skb)->ip_summed != CHECKSUM_PARTIAL) || @@ -186906,7 +226466,28 @@ index d13fb1d318215..3ca3f9d0fd9b5 100644 return 0; if (padlen <= 0) { -@@ -4739,7 +4802,7 @@ static int macb_probe(struct platform_device *pdev) + /* FCS could be appeded to tailroom. */ + if (tailroom >= ETH_FCS_LEN) + goto add_fcs; +- /* FCS could be appeded by moving data to headroom. */ +- else if (!cloned && headroom + tailroom >= ETH_FCS_LEN) +- padlen = 0; + /* No room for FCS, need to reallocate skb. */ + else + padlen = ETH_FCS_LEN; +@@ -2115,10 +2175,7 @@ static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev) + padlen += ETH_FCS_LEN; + } + +- if (!cloned && headroom + tailroom >= padlen) { +- (*skb)->data = memmove((*skb)->head, (*skb)->data, (*skb)->len); +- skb_set_tail_pointer(*skb, (*skb)->len); +- } else { ++ if (cloned || tailroom < padlen) { + nskb = skb_copy_expand(*skb, 0, padlen, GFP_ATOMIC); + if (!nskb) + return -ENOMEM; +@@ -4739,7 +4796,7 @@ static int macb_probe(struct platform_device *pdev) #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT if (GEM_BFEXT(DAW64, gem_readl(bp, DCFG6))) { @@ -186931,6 +226512,115 @@ index c2e1f163bb14f..c52ec1cc8a08c 100644 tx_bd_control = TSTAMP_ALL_FRAMES; break; default: +diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c +index 2907e13b9df69..ae68821dd56d5 100644 +--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c ++++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c +@@ -1798,13 +1798,10 @@ static int liquidio_open(struct net_device *netdev) + + ifstate_set(lio, LIO_IFSTATE_RUNNING); + +- if (OCTEON_CN23XX_PF(oct)) { +- if (!oct->msix_on) +- if (setup_tx_poll_fn(netdev)) +- return -1; +- } else { +- if (setup_tx_poll_fn(netdev)) +- return -1; ++ if (!OCTEON_CN23XX_PF(oct) || !oct->msix_on) { ++ ret = setup_tx_poll_fn(netdev); ++ if (ret) ++ goto err_poll; + } + + netif_tx_start_all_queues(netdev); +@@ -1817,7 +1814,7 @@ static int liquidio_open(struct net_device *netdev) + /* tell Octeon to start forwarding packets to host */ + ret = send_rx_ctrl_cmd(lio, 1); + if (ret) +- return ret; ++ goto err_rx_ctrl; + + /* start periodical statistics fetch */ + INIT_DELAYED_WORK(&lio->stats_wk.work, lio_fetch_stats); +@@ -1828,6 +1825,27 @@ static int liquidio_open(struct net_device *netdev) + dev_info(&oct->pci_dev->dev, "%s interface is opened\n", + netdev->name); + ++ return 0; ++ ++err_rx_ctrl: ++ if (!OCTEON_CN23XX_PF(oct) || !oct->msix_on) ++ cleanup_tx_poll_fn(netdev); ++err_poll: ++ if (lio->ptp_clock) { ++ ptp_clock_unregister(lio->ptp_clock); ++ lio->ptp_clock = NULL; ++ } ++ ++ if (oct->props[lio->ifidx].napi_enabled == 1) { ++ list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list) ++ napi_disable(napi); ++ ++ oct->props[lio->ifidx].napi_enabled = 0; ++ ++ if (OCTEON_CN23XX_PF(oct)) ++ oct->droq[0]->ops.poll_mode = 0; ++ } ++ + return ret; + } + +diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c +index a27227aeae880..b43b97e15a6f0 100644 +--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c ++++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c +@@ -2250,7 +2250,7 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) + err = register_netdev(netdev); + if (err) { + dev_err(dev, "Failed to register netdevice\n"); +- goto err_unregister_interrupts; ++ goto err_destroy_workqueue; + } + + nic->msg_enable = debug; +@@ -2259,6 +2259,8 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) + + return 0; + ++err_destroy_workqueue: ++ destroy_workqueue(nic->nicvf_rx_mode_wq); + err_unregister_interrupts: + nicvf_unregister_interrupts(nic); + err_free_netdev: +diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +index c36fed9c3d737..daaffae1a89f5 100644 +--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c ++++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +@@ -1435,8 +1435,10 @@ static acpi_status bgx_acpi_match_id(acpi_handle handle, u32 lvl, + return AE_OK; + } + +- if (strncmp(string.pointer, bgx_sel, 4)) ++ if (strncmp(string.pointer, bgx_sel, 4)) { ++ kfree(string.pointer); + return AE_OK; ++ } + + acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1, + bgx_acpi_register_phy, NULL, bgx, NULL); +diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +index 38e47703f9abd..07568aa15873d 100644 +--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c ++++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +@@ -1302,6 +1302,7 @@ static int cxgb_up(struct adapter *adap) + if (ret < 0) { + CH_ERR(adap, "failed to bind qsets, err %d\n", ret); + t3_intr_disable(adap); ++ quiesce_rx(adap); + free_irq_resources(adap); + err = ret; + goto out; diff --git a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c index 7ff31d1026fb2..e0d34e64fc6cb 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c @@ -187094,6 +226784,19 @@ index 002fc62ea7262..63bc956d20376 100644 #define SFF_8472_COMP_ADDR 0x5e #define SFF_8472_COMP_LEN 0x1 #define SFF_REV_ADDR 0x1 +diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +index 49b76fd47daa0..464c2b365721f 100644 +--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c ++++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +@@ -858,7 +858,7 @@ static int cxgb4vf_open(struct net_device *dev) + */ + err = t4vf_update_port_info(pi); + if (err < 0) +- return err; ++ goto err_unwind; + + /* + * Note that this interface is up and start everything up ... diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index bcad69c480740..ddfe9208529a5 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -187387,6 +227090,24 @@ index 85b99099c6b94..5babcf05bc2f1 100644 /* When not a module we can work around broken '486 PCI boards. */ if (boot_cpu_data.x86 <= 4) { i |= 0x4800; +diff --git a/drivers/net/ethernet/dnet.c b/drivers/net/ethernet/dnet.c +index 6c51cf991dad5..14dc2e13bf038 100644 +--- a/drivers/net/ethernet/dnet.c ++++ b/drivers/net/ethernet/dnet.c +@@ -550,11 +550,11 @@ static netdev_tx_t dnet_start_xmit(struct sk_buff *skb, struct net_device *dev) + + skb_tx_timestamp(skb); + ++ spin_unlock_irqrestore(&bp->lock, flags); ++ + /* free the buffer */ + dev_kfree_skb(skb); + +- spin_unlock_irqrestore(&bp->lock, flags); +- + return NETDEV_TX_OK; + } + diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 649c5c429bd7c..1288b5e3d2201 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -187596,6 +227317,30 @@ index 25c91b3c5fd30..819266d463b07 100644 if (boot_cpu_data.x86 <= 4) np->crvalue = 0xa00; else +diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +index 685d2d8a3b366..fe5fc2b3406f9 100644 +--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c ++++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +@@ -2395,6 +2395,9 @@ static int dpaa_eth_poll(struct napi_struct *napi, int budget) + + cleaned = qman_p_poll_dqrr(np->p, budget); + ++ if (np->xdp_act & XDP_REDIRECT) ++ xdp_do_flush(); ++ + if (cleaned < budget) { + napi_complete_done(napi, cleaned); + qman_p_irqsource_add(np->p, QM_PIRQ_DQRI); +@@ -2402,9 +2405,6 @@ static int dpaa_eth_poll(struct napi_struct *napi, int budget) + qman_p_irqsource_add(np->p, QM_PIRQ_DQRI); + } + +- if (np->xdp_act & XDP_REDIRECT) +- xdp_do_flush(); +- + return cleaned; + } + diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c index 763d2c7b5fb1a..5750f9a56393a 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c @@ -187619,7 +227364,7 @@ index 763d2c7b5fb1a..5750f9a56393a 100644 if (ptp_dev) ptp = platform_get_drvdata(ptp_dev); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c -index 7065c71ed7b86..5899139aec97a 100644 +index 7065c71ed7b86..c48d410936517 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -1389,8 +1389,8 @@ static int dpaa2_eth_add_bufs(struct dpaa2_eth_priv *priv, @@ -187633,7 +227378,34 @@ index 7065c71ed7b86..5899139aec97a 100644 addr, priv->rx_buf_size, bpid); } -@@ -4329,7 +4329,7 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev) +@@ -1597,10 +1597,15 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget) + if (rx_cleaned >= budget || + txconf_cleaned >= DPAA2_ETH_TXCONF_PER_NAPI) { + work_done = budget; ++ if (ch->xdp.res & XDP_REDIRECT) ++ xdp_do_flush(); + goto out; + } + } while (store_cleaned); + ++ if (ch->xdp.res & XDP_REDIRECT) ++ xdp_do_flush(); ++ + /* We didn't consume the entire budget, so finish napi and + * re-enable data availability notifications + */ +@@ -1625,9 +1630,7 @@ out: + txc_fq->dq_bytes = 0; + } + +- if (ch->xdp.res & XDP_REDIRECT) +- xdp_do_flush_map(); +- else if (rx_cleaned && ch->xdp.res & XDP_TX) ++ if (rx_cleaned && ch->xdp.res & XDP_TX) + dpaa2_eth_xdp_tx_flush(priv, ch, &priv->fq[flowid]); + + return work_done; +@@ -4329,7 +4332,7 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev) } INIT_WORK(&priv->tx_onestep_tstamp, dpaa2_eth_tx_onestep_tstamp); @@ -187642,7 +227414,7 @@ index 7065c71ed7b86..5899139aec97a 100644 skb_queue_head_init(&priv->tx_skbs); priv->rx_copybreak = DPAA2_ETH_DEFAULT_COPYBREAK; -@@ -4511,12 +4511,12 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev) +@@ -4511,12 +4514,12 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev) #ifdef CONFIG_DEBUG_FS dpaa2_dbg_remove(priv); #endif @@ -187657,7 +227429,7 @@ index 7065c71ed7b86..5899139aec97a 100644 dpaa2_eth_dl_port_del(priv); dpaa2_eth_dl_traps_unregister(priv); dpaa2_eth_dl_unregister(priv); -@@ -4538,10 +4538,12 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev) +@@ -4538,10 +4541,12 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev) fsl_mc_portal_free(priv->mc_io); @@ -187694,10 +227466,42 @@ index 32b5faa87bb8d..208a3459f2e29 100644 dprtc_close(mc_dev->mc_io, 0, mc_dev->mc_handle); err_free_mcp: diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c -index d6eefbbf163fa..cacd454ac696c 100644 +index d6eefbbf163fa..c39b866e2582d 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c -@@ -532,6 +532,7 @@ static int dpaa2_switch_flower_parse_mirror_key(struct flow_cls_offload *cls, +@@ -132,6 +132,7 @@ int dpaa2_switch_acl_entry_add(struct dpaa2_switch_filter_block *filter_block, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, acl_entry_cfg->key_iova))) { + dev_err(dev, "DMA mapping failed\n"); ++ kfree(cmd_buff); + return -EFAULT; + } + +@@ -142,6 +143,7 @@ int dpaa2_switch_acl_entry_add(struct dpaa2_switch_filter_block *filter_block, + DMA_TO_DEVICE); + if (err) { + dev_err(dev, "dpsw_acl_add_entry() failed %d\n", err); ++ kfree(cmd_buff); + return err; + } + +@@ -172,6 +174,7 @@ dpaa2_switch_acl_entry_remove(struct dpaa2_switch_filter_block *block, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, acl_entry_cfg->key_iova))) { + dev_err(dev, "DMA mapping failed\n"); ++ kfree(cmd_buff); + return -EFAULT; + } + +@@ -182,6 +185,7 @@ dpaa2_switch_acl_entry_remove(struct dpaa2_switch_filter_block *block, + DMA_TO_DEVICE); + if (err) { + dev_err(dev, "dpsw_acl_remove_entry() failed %d\n", err); ++ kfree(cmd_buff); + return err; + } + +@@ -532,6 +536,7 @@ static int dpaa2_switch_flower_parse_mirror_key(struct flow_cls_offload *cls, struct flow_rule *rule = flow_cls_offload_flow_rule(cls); struct flow_dissector *dissector = rule->match.dissector; struct netlink_ext_ack *extack = cls->common.extack; @@ -187705,7 +227509,7 @@ index d6eefbbf163fa..cacd454ac696c 100644 if (dissector->used_keys & ~(BIT(FLOW_DISSECTOR_KEY_BASIC) | -@@ -561,9 +562,10 @@ static int dpaa2_switch_flower_parse_mirror_key(struct flow_cls_offload *cls, +@@ -561,9 +566,10 @@ static int dpaa2_switch_flower_parse_mirror_key(struct flow_cls_offload *cls, } *vlan = (u16)match.key->vlan_id; @@ -187730,10 +227534,175 @@ index a139f2e9d59f0..e0e8dfd137930 100644 obj-$(CONFIG_FSL_ENETC_IERB) += fsl-enetc-ierb.o fsl-enetc-ierb-y := enetc_ierb.o diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c -index 042327b9981fa..c0265a6f10c00 100644 +index 042327b9981fa..8b7c93447770c 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c -@@ -2142,7 +2142,7 @@ int enetc_close(struct net_device *ndev) +@@ -1220,23 +1220,6 @@ static void enetc_xdp_drop(struct enetc_bdr *rx_ring, int rx_ring_first, + rx_ring->stats.xdp_drops++; + } + +-static void enetc_xdp_free(struct enetc_bdr *rx_ring, int rx_ring_first, +- int rx_ring_last) +-{ +- while (rx_ring_first != rx_ring_last) { +- struct enetc_rx_swbd *rx_swbd = &rx_ring->rx_swbd[rx_ring_first]; +- +- if (rx_swbd->page) { +- dma_unmap_page(rx_ring->dev, rx_swbd->dma, PAGE_SIZE, +- rx_swbd->dir); +- __free_page(rx_swbd->page); +- rx_swbd->page = NULL; +- } +- enetc_bdr_idx_inc(rx_ring, &rx_ring_first); +- } +- rx_ring->stats.xdp_redirect_failures++; +-} +- + static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, + struct napi_struct *napi, int work_limit, + struct bpf_prog *prog) +@@ -1258,8 +1241,8 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, + int orig_i, orig_cleaned_cnt; + struct xdp_buff xdp_buff; + struct sk_buff *skb; +- int tmp_orig_i, err; + u32 bd_status; ++ int err; + + rxbd = enetc_rxbd(rx_ring, i); + bd_status = le32_to_cpu(rxbd->r.lstatus); +@@ -1346,18 +1329,16 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, + break; + } + +- tmp_orig_i = orig_i; +- +- while (orig_i != i) { +- enetc_flip_rx_buff(rx_ring, +- &rx_ring->rx_swbd[orig_i]); +- enetc_bdr_idx_inc(rx_ring, &orig_i); +- } +- + err = xdp_do_redirect(rx_ring->ndev, &xdp_buff, prog); + if (unlikely(err)) { +- enetc_xdp_free(rx_ring, tmp_orig_i, i); ++ enetc_xdp_drop(rx_ring, orig_i, i); ++ rx_ring->stats.xdp_redirect_failures++; + } else { ++ while (orig_i != i) { ++ enetc_flip_rx_buff(rx_ring, ++ &rx_ring->rx_swbd[orig_i]); ++ enetc_bdr_idx_inc(rx_ring, &orig_i); ++ } + xdp_redirect_frm_cnt++; + rx_ring->stats.xdp_redirect++; + } +@@ -1768,7 +1749,7 @@ static void enetc_setup_txbdr(struct enetc_hw *hw, struct enetc_bdr *tx_ring) + /* enable Tx ints by setting pkt thr to 1 */ + enetc_txbdr_wr(hw, idx, ENETC_TBICR0, ENETC_TBICR0_ICEN | 0x1); + +- tbmr = ENETC_TBMR_EN; ++ tbmr = ENETC_TBMR_EN | ENETC_TBMR_SET_PRIO(tx_ring->prio); + if (tx_ring->ndev->features & NETIF_F_HW_VLAN_CTAG_TX) + tbmr |= ENETC_TBMR_VIH; + +@@ -1800,7 +1781,12 @@ static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring) + else + enetc_rxbdr_wr(hw, idx, ENETC_RBBSR, ENETC_RXB_DMA_SIZE); + ++ /* Also prepare the consumer index in case page allocation never ++ * succeeds. In that case, hardware will never advance producer index ++ * to match consumer index, and will drop all frames. ++ */ + enetc_rxbdr_wr(hw, idx, ENETC_RBPIR, 0); ++ enetc_rxbdr_wr(hw, idx, ENETC_RBCIR, 1); + + /* enable Rx ints by setting pkt thr to 1 */ + enetc_rxbdr_wr(hw, idx, ENETC_RBICR0, ENETC_RBICR0_ICEN | 0x1); +@@ -1826,13 +1812,14 @@ static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring) + + static void enetc_setup_bdrs(struct enetc_ndev_priv *priv) + { ++ struct enetc_hw *hw = &priv->si->hw; + int i; + + for (i = 0; i < priv->num_tx_rings; i++) +- enetc_setup_txbdr(&priv->si->hw, priv->tx_ring[i]); ++ enetc_setup_txbdr(hw, priv->tx_ring[i]); + + for (i = 0; i < priv->num_rx_rings; i++) +- enetc_setup_rxbdr(&priv->si->hw, priv->rx_ring[i]); ++ enetc_setup_rxbdr(hw, priv->rx_ring[i]); + } + + static void enetc_clear_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring) +@@ -1865,13 +1852,14 @@ static void enetc_clear_txbdr(struct enetc_hw *hw, struct enetc_bdr *tx_ring) + + static void enetc_clear_bdrs(struct enetc_ndev_priv *priv) + { ++ struct enetc_hw *hw = &priv->si->hw; + int i; + + for (i = 0; i < priv->num_tx_rings; i++) +- enetc_clear_txbdr(&priv->si->hw, priv->tx_ring[i]); ++ enetc_clear_txbdr(hw, priv->tx_ring[i]); + + for (i = 0; i < priv->num_rx_rings; i++) +- enetc_clear_rxbdr(&priv->si->hw, priv->rx_ring[i]); ++ enetc_clear_rxbdr(hw, priv->rx_ring[i]); + + udelay(1); + } +@@ -1879,13 +1867,13 @@ static void enetc_clear_bdrs(struct enetc_ndev_priv *priv) + static int enetc_setup_irqs(struct enetc_ndev_priv *priv) + { + struct pci_dev *pdev = priv->si->pdev; ++ struct enetc_hw *hw = &priv->si->hw; + int i, j, err; + + for (i = 0; i < priv->bdr_int_num; i++) { + int irq = pci_irq_vector(pdev, ENETC_BDR_INT_BASE_IDX + i); + struct enetc_int_vector *v = priv->int_vector[i]; + int entry = ENETC_BDR_INT_BASE_IDX + i; +- struct enetc_hw *hw = &priv->si->hw; + + snprintf(v->name, sizeof(v->name), "%s-rxtx%d", + priv->ndev->name, i); +@@ -1973,13 +1961,14 @@ static void enetc_setup_interrupts(struct enetc_ndev_priv *priv) + + static void enetc_clear_interrupts(struct enetc_ndev_priv *priv) + { ++ struct enetc_hw *hw = &priv->si->hw; + int i; + + for (i = 0; i < priv->num_tx_rings; i++) +- enetc_txbdr_wr(&priv->si->hw, i, ENETC_TBIER, 0); ++ enetc_txbdr_wr(hw, i, ENETC_TBIER, 0); + + for (i = 0; i < priv->num_rx_rings; i++) +- enetc_rxbdr_wr(&priv->si->hw, i, ENETC_RBIER, 0); ++ enetc_rxbdr_wr(hw, i, ENETC_RBIER, 0); + } + + static int enetc_phylink_connect(struct net_device *ndev) +@@ -2011,14 +2000,14 @@ static void enetc_tx_onestep_tstamp(struct work_struct *work) + + priv = container_of(work, struct enetc_ndev_priv, tx_onestep_tstamp); + +- netif_tx_lock(priv->ndev); ++ netif_tx_lock_bh(priv->ndev); + + clear_bit_unlock(ENETC_TX_ONESTEP_TSTAMP_IN_PROGRESS, &priv->flags); + skb = skb_dequeue(&priv->tx_skbs); + if (skb) + enetc_start_xmit(skb, priv->ndev); + +- netif_tx_unlock(priv->ndev); ++ netif_tx_unlock_bh(priv->ndev); + } + + static void enetc_tx_onestep_tstamp_init(struct enetc_ndev_priv *priv) +@@ -2142,10 +2131,11 @@ int enetc_close(struct net_device *ndev) return 0; } @@ -187742,7 +227711,31 @@ index 042327b9981fa..c0265a6f10c00 100644 { struct enetc_ndev_priv *priv = netdev_priv(ndev); struct tc_mqprio_qopt *mqprio = type_data; -@@ -2196,25 +2196,6 @@ static int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data) ++ struct enetc_hw *hw = &priv->si->hw; + struct enetc_bdr *tx_ring; + int num_stack_tx_queues; + u8 num_tc; +@@ -2162,7 +2152,8 @@ static int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data) + /* Reset all ring priorities to 0 */ + for (i = 0; i < priv->num_tx_rings; i++) { + tx_ring = priv->tx_ring[i]; +- enetc_set_bdr_prio(&priv->si->hw, tx_ring->index, 0); ++ tx_ring->prio = 0; ++ enetc_set_bdr_prio(hw, tx_ring->index, tx_ring->prio); + } + + return 0; +@@ -2181,7 +2172,8 @@ static int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data) + */ + for (i = 0; i < num_tc; i++) { + tx_ring = priv->tx_ring[i]; +- enetc_set_bdr_prio(&priv->si->hw, tx_ring->index, i); ++ tx_ring->prio = i; ++ enetc_set_bdr_prio(hw, tx_ring->index, tx_ring->prio); + } + + /* Reset the number of netdev queues based on the TC count */ +@@ -2196,25 +2188,6 @@ static int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data) return 0; } @@ -187768,7 +227761,7 @@ index 042327b9981fa..c0265a6f10c00 100644 static int enetc_setup_xdp_prog(struct net_device *dev, struct bpf_prog *prog, struct netlink_ext_ack *extack) { -@@ -2307,29 +2288,6 @@ static int enetc_set_rss(struct net_device *ndev, int en) +@@ -2307,52 +2280,29 @@ static int enetc_set_rss(struct net_device *ndev, int en) return 0; } @@ -187798,8 +227791,23 @@ index 042327b9981fa..c0265a6f10c00 100644 static void enetc_enable_rxvlan(struct net_device *ndev, bool en) { struct enetc_ndev_priv *priv = netdev_priv(ndev); -@@ -2348,11 +2306,9 @@ static void enetc_enable_txvlan(struct net_device *ndev, bool en) - enetc_bdr_enable_txvlan(&priv->si->hw, i, en); ++ struct enetc_hw *hw = &priv->si->hw; + int i; + + for (i = 0; i < priv->num_rx_rings; i++) +- enetc_bdr_enable_rxvlan(&priv->si->hw, i, en); ++ enetc_bdr_enable_rxvlan(hw, i, en); + } + + static void enetc_enable_txvlan(struct net_device *ndev, bool en) + { + struct enetc_ndev_priv *priv = netdev_priv(ndev); ++ struct enetc_hw *hw = &priv->si->hw; + int i; + + for (i = 0; i < priv->num_tx_rings; i++) +- enetc_bdr_enable_txvlan(&priv->si->hw, i, en); ++ enetc_bdr_enable_txvlan(hw, i, en); } -int enetc_set_features(struct net_device *ndev, @@ -187811,7 +227819,7 @@ index 042327b9981fa..c0265a6f10c00 100644 if (changed & NETIF_F_RXHASH) enetc_set_rss(ndev, !!(features & NETIF_F_RXHASH)); -@@ -2364,11 +2320,6 @@ int enetc_set_features(struct net_device *ndev, +@@ -2364,11 +2314,6 @@ int enetc_set_features(struct net_device *ndev, if (changed & NETIF_F_HW_VLAN_CTAG_TX) enetc_enable_txvlan(ndev, !!(features & NETIF_F_HW_VLAN_CTAG_TX)); @@ -187824,10 +227832,18 @@ index 042327b9981fa..c0265a6f10c00 100644 #ifdef CONFIG_FSL_ENETC_PTP_CLOCK diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h -index 08b283347d9ce..f304cdb854ec4 100644 +index 08b283347d9ce..a3b936375c561 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h -@@ -385,11 +385,9 @@ void enetc_start(struct net_device *ndev); +@@ -91,6 +91,7 @@ struct enetc_bdr { + void __iomem *rcir; + }; + u16 index; ++ u16 prio; + int bd_count; /* # of BDs */ + int next_to_use; + int next_to_clean; +@@ -385,11 +386,9 @@ void enetc_start(struct net_device *ndev); void enetc_stop(struct net_device *ndev); netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev); struct net_device_stats *enetc_get_stats(struct net_device *ndev); @@ -187841,7 +227857,7 @@ index 08b283347d9ce..f304cdb854ec4 100644 int enetc_setup_bpf(struct net_device *dev, struct netdev_bpf *xdp); int enetc_xdp_xmit(struct net_device *ndev, int num_frames, struct xdp_frame **frames, u32 flags); -@@ -421,6 +419,7 @@ int enetc_setup_tc_block_cb(enum tc_setup_type type, void *type_data, +@@ -421,22 +420,24 @@ int enetc_setup_tc_block_cb(enum tc_setup_type type, void *type_data, int enetc_setup_tc_psfp(struct net_device *ndev, void *type_data); int enetc_psfp_init(struct enetc_ndev_priv *priv); int enetc_psfp_clean(struct enetc_ndev_priv *priv); @@ -187849,7 +227865,28 @@ index 08b283347d9ce..f304cdb854ec4 100644 static inline void enetc_get_max_cap(struct enetc_ndev_priv *priv) { -@@ -496,4 +495,9 @@ static inline int enetc_psfp_disable(struct enetc_ndev_priv *priv) ++ struct enetc_hw *hw = &priv->si->hw; + u32 reg; + +- reg = enetc_port_rd(&priv->si->hw, ENETC_PSIDCAPR); ++ reg = enetc_port_rd(hw, ENETC_PSIDCAPR); + priv->psfp_cap.max_streamid = reg & ENETC_PSIDCAPR_MSK; + /* Port stream filter capability */ +- reg = enetc_port_rd(&priv->si->hw, ENETC_PSFCAPR); ++ reg = enetc_port_rd(hw, ENETC_PSFCAPR); + priv->psfp_cap.max_psfp_filter = reg & ENETC_PSFCAPR_MSK; + /* Port stream gate capability */ +- reg = enetc_port_rd(&priv->si->hw, ENETC_PSGCAPR); ++ reg = enetc_port_rd(hw, ENETC_PSGCAPR); + priv->psfp_cap.max_psfp_gate = (reg & ENETC_PSGCAPR_SGIT_MSK); + priv->psfp_cap.max_psfp_gatelist = (reg & ENETC_PSGCAPR_GCL_MSK) >> 16; + /* Port flow meter capability */ +- reg = enetc_port_rd(&priv->si->hw, ENETC_PFMCAPR); ++ reg = enetc_port_rd(hw, ENETC_PFMCAPR); + priv->psfp_cap.max_psfp_meter = reg & ENETC_PFMCAPR_MSK; + } + +@@ -496,4 +497,9 @@ static inline int enetc_psfp_disable(struct enetc_ndev_priv *priv) { return 0; } @@ -187876,7 +227913,7 @@ index 910b9f722504a..d62c188c87480 100644 info->tx_types = (1 << HWTSTAMP_TX_OFF) | (1 << HWTSTAMP_TX_ON) | diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c -index d522bd5c90b49..3615357cc60fb 100644 +index d522bd5c90b49..5efb079ef25fa 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -708,6 +708,13 @@ static int enetc_pf_set_features(struct net_device *ndev, @@ -187932,19 +227969,59 @@ index d522bd5c90b49..3615357cc60fb 100644 .ndo_bpf = enetc_setup_bpf, .ndo_xdp_xmit = enetc_xdp_xmit, }; +@@ -772,9 +800,6 @@ static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev, + + ndev->priv_flags |= IFF_UNICAST_FLT; + +- if (si->hw_features & ENETC_SI_F_QBV) +- priv->active_offloads |= ENETC_F_QBV; +- + if (si->hw_features & ENETC_SI_F_PSFP && !enetc_psfp_enable(priv)) { + priv->active_offloads |= ENETC_F_QCI; + ndev->features |= NETIF_F_HW_TC; +@@ -1025,7 +1050,8 @@ static void enetc_pl_mac_link_up(struct phylink_config *config, + int idx; + + priv = netdev_priv(pf->si->ndev); +- if (priv->active_offloads & ENETC_F_QBV) ++ ++ if (pf->si->hw_features & ENETC_SI_F_QBV) + enetc_sched_speed_set(priv, speed); + + if (!phylink_autoneg_inband(mode) && diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c -index 4577226d3c6ad..6b236e0fd806b 100644 +index 4577226d3c6ad..ba51fb381f0cb 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c -@@ -45,6 +45,7 @@ void enetc_sched_speed_set(struct enetc_ndev_priv *priv, int speed) - | pspeed); +@@ -17,8 +17,9 @@ static u16 enetc_get_max_gcl_len(struct enetc_hw *hw) + + void enetc_sched_speed_set(struct enetc_ndev_priv *priv, int speed) + { ++ struct enetc_hw *hw = &priv->si->hw; + u32 old_speed = priv->speed; +- u32 pspeed; ++ u32 pspeed, tmp; + + if (speed == old_speed) + return; +@@ -39,36 +40,38 @@ void enetc_sched_speed_set(struct enetc_ndev_priv *priv, int speed) + } + + priv->speed = speed; +- enetc_port_wr(&priv->si->hw, ENETC_PMR, +- (enetc_port_rd(&priv->si->hw, ENETC_PMR) +- & (~ENETC_PMR_PSPEED_MASK)) +- | pspeed); ++ tmp = enetc_port_rd(hw, ENETC_PMR); ++ enetc_port_wr(hw, ENETC_PMR, (tmp & ~ENETC_PMR_PSPEED_MASK) | pspeed); } +#define ENETC_QOS_ALIGN 64 static int enetc_setup_taprio(struct net_device *ndev, struct tc_taprio_qopt_offload *admin_conf) { -@@ -52,10 +53,11 @@ static int enetc_setup_taprio(struct net_device *ndev, + struct enetc_ndev_priv *priv = netdev_priv(ndev); ++ struct enetc_hw *hw = &priv->si->hw; struct enetc_cbd cbd = {.cmd = 0}; struct tgs_gcl_conf *gcl_config; struct tgs_gcl_data *gcl_data; @@ -187957,7 +228034,26 @@ index 4577226d3c6ad..6b236e0fd806b 100644 u32 tge; int err; int i; -@@ -82,9 +84,16 @@ static int enetc_setup_taprio(struct net_device *ndev, + +- if (admin_conf->num_entries > enetc_get_max_gcl_len(&priv->si->hw)) ++ if (admin_conf->num_entries > enetc_get_max_gcl_len(hw)) + return -EINVAL; + gcl_len = admin_conf->num_entries; + +- tge = enetc_rd(&priv->si->hw, ENETC_QBV_PTGCR_OFFSET); ++ tge = enetc_rd(hw, ENETC_QBV_PTGCR_OFFSET); + if (!admin_conf->enable) { +- enetc_wr(&priv->si->hw, +- ENETC_QBV_PTGCR_OFFSET, +- tge & (~ENETC_QBV_TGE)); ++ enetc_wr(hw, ENETC_QBV_PTGCR_OFFSET, tge & ~ENETC_QBV_TGE); ++ ++ priv->active_offloads &= ~ENETC_F_QBV; ++ + return 0; + } + +@@ -82,9 +85,16 @@ static int enetc_setup_taprio(struct net_device *ndev, gcl_config = &cbd.gcl_conf; data_size = struct_size(gcl_data, entry, gcl_len); @@ -187976,7 +228072,7 @@ index 4577226d3c6ad..6b236e0fd806b 100644 gce = (struct gce *)(gcl_data + 1); -@@ -110,16 +119,8 @@ static int enetc_setup_taprio(struct net_device *ndev, +@@ -110,30 +120,22 @@ static int enetc_setup_taprio(struct net_device *ndev, cbd.length = cpu_to_le16(data_size); cbd.status_flags = 0; @@ -187995,18 +228091,168 @@ index 4577226d3c6ad..6b236e0fd806b 100644 cbd.cls = BDCR_CMD_PORT_GCL; cbd.status_flags = 0; -@@ -132,8 +133,8 @@ static int enetc_setup_taprio(struct net_device *ndev, - ENETC_QBV_PTGCR_OFFSET, - tge & (~ENETC_QBV_TGE)); +- enetc_wr(&priv->si->hw, ENETC_QBV_PTGCR_OFFSET, +- tge | ENETC_QBV_TGE); ++ enetc_wr(hw, ENETC_QBV_PTGCR_OFFSET, tge | ENETC_QBV_TGE); + + err = enetc_send_cmd(priv->si, &cbd); + if (err) +- enetc_wr(&priv->si->hw, +- ENETC_QBV_PTGCR_OFFSET, +- tge & (~ENETC_QBV_TGE)); ++ enetc_wr(hw, ENETC_QBV_PTGCR_OFFSET, tge & ~ENETC_QBV_TGE); - dma_unmap_single(&priv->si->pdev->dev, dma, data_size, DMA_TO_DEVICE); - kfree(gcl_data); + dma_free_coherent(&priv->si->pdev->dev, data_size + ENETC_QOS_ALIGN, + tmp, dma); ++ ++ if (!err) ++ priv->active_offloads |= ENETC_F_QBV; return err; } -@@ -463,8 +464,9 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, +@@ -142,6 +144,8 @@ int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data) + { + struct tc_taprio_qopt_offload *taprio = type_data; + struct enetc_ndev_priv *priv = netdev_priv(ndev); ++ struct enetc_hw *hw = &priv->si->hw; ++ struct enetc_bdr *tx_ring; + int err; + int i; + +@@ -150,18 +154,20 @@ int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data) + if (priv->tx_ring[i]->tsd_enable) + return -EBUSY; + +- for (i = 0; i < priv->num_tx_rings; i++) +- enetc_set_bdr_prio(&priv->si->hw, +- priv->tx_ring[i]->index, +- taprio->enable ? i : 0); ++ for (i = 0; i < priv->num_tx_rings; i++) { ++ tx_ring = priv->tx_ring[i]; ++ tx_ring->prio = taprio->enable ? i : 0; ++ enetc_set_bdr_prio(hw, tx_ring->index, tx_ring->prio); ++ } + + err = enetc_setup_taprio(ndev, taprio); +- +- if (err) +- for (i = 0; i < priv->num_tx_rings; i++) +- enetc_set_bdr_prio(&priv->si->hw, +- priv->tx_ring[i]->index, +- taprio->enable ? 0 : i); ++ if (err) { ++ for (i = 0; i < priv->num_tx_rings; i++) { ++ tx_ring = priv->tx_ring[i]; ++ tx_ring->prio = taprio->enable ? 0 : i; ++ enetc_set_bdr_prio(hw, tx_ring->index, tx_ring->prio); ++ } ++ } + + return err; + } +@@ -182,7 +188,7 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data) + struct tc_cbs_qopt_offload *cbs = type_data; + u32 port_transmit_rate = priv->speed; + u8 tc_nums = netdev_get_num_tc(ndev); +- struct enetc_si *si = priv->si; ++ struct enetc_hw *hw = &priv->si->hw; + u32 hi_credit_bit, hi_credit_reg; + u32 max_interference_size; + u32 port_frame_max_size; +@@ -203,15 +209,15 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data) + * lower than this TC have been disabled. + */ + if (tc == prio_top && +- enetc_get_cbs_enable(&si->hw, prio_next)) { ++ enetc_get_cbs_enable(hw, prio_next)) { + dev_err(&ndev->dev, + "Disable TC%d before disable TC%d\n", + prio_next, tc); + return -EINVAL; + } + +- enetc_port_wr(&si->hw, ENETC_PTCCBSR1(tc), 0); +- enetc_port_wr(&si->hw, ENETC_PTCCBSR0(tc), 0); ++ enetc_port_wr(hw, ENETC_PTCCBSR1(tc), 0); ++ enetc_port_wr(hw, ENETC_PTCCBSR0(tc), 0); + + return 0; + } +@@ -228,13 +234,13 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data) + * higher than this TC have been enabled. + */ + if (tc == prio_next) { +- if (!enetc_get_cbs_enable(&si->hw, prio_top)) { ++ if (!enetc_get_cbs_enable(hw, prio_top)) { + dev_err(&ndev->dev, + "Enable TC%d first before enable TC%d\n", + prio_top, prio_next); + return -EINVAL; + } +- bw_sum += enetc_get_cbs_bw(&si->hw, prio_top); ++ bw_sum += enetc_get_cbs_bw(hw, prio_top); + } + + if (bw_sum + bw >= 100) { +@@ -243,7 +249,7 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data) + return -EINVAL; + } + +- enetc_port_rd(&si->hw, ENETC_PTCMSDUR(tc)); ++ enetc_port_rd(hw, ENETC_PTCMSDUR(tc)); + + /* For top prio TC, the max_interfrence_size is maxSizedFrame. + * +@@ -263,8 +269,8 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data) + u32 m0, ma, r0, ra; + + m0 = port_frame_max_size * 8; +- ma = enetc_port_rd(&si->hw, ENETC_PTCMSDUR(prio_top)) * 8; +- ra = enetc_get_cbs_bw(&si->hw, prio_top) * ++ ma = enetc_port_rd(hw, ENETC_PTCMSDUR(prio_top)) * 8; ++ ra = enetc_get_cbs_bw(hw, prio_top) * + port_transmit_rate * 10000ULL; + r0 = port_transmit_rate * 1000000ULL; + max_interference_size = m0 + ma + +@@ -284,10 +290,10 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data) + hi_credit_reg = (u32)div_u64((ENETC_CLK * 100ULL) * hi_credit_bit, + port_transmit_rate * 1000000ULL); + +- enetc_port_wr(&si->hw, ENETC_PTCCBSR1(tc), hi_credit_reg); ++ enetc_port_wr(hw, ENETC_PTCCBSR1(tc), hi_credit_reg); + + /* Set bw register and enable this traffic class */ +- enetc_port_wr(&si->hw, ENETC_PTCCBSR0(tc), bw | ENETC_CBSE); ++ enetc_port_wr(hw, ENETC_PTCCBSR0(tc), bw | ENETC_CBSE); + + return 0; + } +@@ -297,6 +303,7 @@ int enetc_setup_tc_txtime(struct net_device *ndev, void *type_data) + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct tc_etf_qopt_offload *qopt = type_data; + u8 tc_nums = netdev_get_num_tc(ndev); ++ struct enetc_hw *hw = &priv->si->hw; + int tc; + + if (!tc_nums) +@@ -312,12 +319,11 @@ int enetc_setup_tc_txtime(struct net_device *ndev, void *type_data) + return -EBUSY; + + /* TSD and Qbv are mutually exclusive in hardware */ +- if (enetc_rd(&priv->si->hw, ENETC_QBV_PTGCR_OFFSET) & ENETC_QBV_TGE) ++ if (enetc_rd(hw, ENETC_QBV_PTGCR_OFFSET) & ENETC_QBV_TGE) + return -EBUSY; + + priv->tx_ring[tc]->tsd_enable = qopt->enable; +- enetc_port_wr(&priv->si->hw, ENETC_PTCTSDR(tc), +- qopt->enable ? ENETC_TSDE : 0); ++ enetc_port_wr(hw, ENETC_PTCTSDR(tc), qopt->enable ? ENETC_TSDE : 0); + + return 0; + } +@@ -463,8 +469,9 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, struct enetc_cbd cbd = {.cmd = 0}; struct streamid_data *si_data; struct streamid_conf *si_conf; @@ -188017,7 +228263,7 @@ index 4577226d3c6ad..6b236e0fd806b 100644 int port; int err; -@@ -485,19 +487,20 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, +@@ -485,19 +492,20 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, cbd.status_flags = 0; data_size = sizeof(struct streamid_data); @@ -188048,7 +228294,7 @@ index 4577226d3c6ad..6b236e0fd806b 100644 eth_broadcast_addr(si_data->dmac); si_data->vid_vidm_tg = (ENETC_CBDR_SID_VID_MASK + ((0x3 << 14) | ENETC_CBDR_SID_VIDM)); -@@ -512,12 +515,10 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, +@@ -512,12 +520,10 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, err = enetc_send_cmd(priv->si, &cbd); if (err) @@ -188064,7 +228310,7 @@ index 4577226d3c6ad..6b236e0fd806b 100644 /* Enable the entry overwrite again incase space flushed by hardware */ memset(&cbd, 0, sizeof(cbd)); -@@ -539,8 +540,8 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, +@@ -539,8 +545,8 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, cbd.length = cpu_to_le16(data_size); @@ -188075,7 +228321,7 @@ index 4577226d3c6ad..6b236e0fd806b 100644 /* VIDM default to be 1. * VID Match. If set (b1) then the VID must match, otherwise -@@ -560,7 +561,9 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, +@@ -560,7 +566,9 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, } err = enetc_send_cmd(priv->si, &cbd); @@ -188086,7 +228332,7 @@ index 4577226d3c6ad..6b236e0fd806b 100644 return err; } -@@ -629,8 +632,9 @@ static int enetc_streamcounter_hw_get(struct enetc_ndev_priv *priv, +@@ -629,8 +637,9 @@ static int enetc_streamcounter_hw_get(struct enetc_ndev_priv *priv, { struct enetc_cbd cbd = { .cmd = 2 }; struct sfi_counter_data *data_buf; @@ -188097,7 +228343,7 @@ index 4577226d3c6ad..6b236e0fd806b 100644 int err; cbd.index = cpu_to_le16((u16)index); -@@ -639,19 +643,19 @@ static int enetc_streamcounter_hw_get(struct enetc_ndev_priv *priv, +@@ -639,19 +648,19 @@ static int enetc_streamcounter_hw_get(struct enetc_ndev_priv *priv, cbd.status_flags = 0; data_size = sizeof(struct sfi_counter_data); @@ -188128,7 +228374,7 @@ index 4577226d3c6ad..6b236e0fd806b 100644 cbd.length = cpu_to_le16(data_size); -@@ -680,7 +684,9 @@ static int enetc_streamcounter_hw_get(struct enetc_ndev_priv *priv, +@@ -680,7 +689,9 @@ static int enetc_streamcounter_hw_get(struct enetc_ndev_priv *priv, data_buf->flow_meter_dropl; exit: @@ -188139,7 +228385,7 @@ index 4577226d3c6ad..6b236e0fd806b 100644 return err; } -@@ -719,9 +725,10 @@ static int enetc_streamgate_hw_set(struct enetc_ndev_priv *priv, +@@ -719,9 +730,10 @@ static int enetc_streamgate_hw_set(struct enetc_ndev_priv *priv, struct sgcl_conf *sgcl_config; struct sgcl_data *sgcl_data; struct sgce *sgce; @@ -188151,7 +228397,7 @@ index 4577226d3c6ad..6b236e0fd806b 100644 u64 now; cbd.index = cpu_to_le16(sgi->index); -@@ -768,24 +775,20 @@ static int enetc_streamgate_hw_set(struct enetc_ndev_priv *priv, +@@ -768,24 +780,20 @@ static int enetc_streamgate_hw_set(struct enetc_ndev_priv *priv, sgcl_config->acl_len = (sgi->num_entries - 1) & 0x3; data_size = struct_size(sgcl_data, sgcl, sgi->num_entries); @@ -188187,7 +228433,7 @@ index 4577226d3c6ad..6b236e0fd806b 100644 sgce = &sgcl_data->sgcl[0]; -@@ -840,7 +843,8 @@ static int enetc_streamgate_hw_set(struct enetc_ndev_priv *priv, +@@ -840,7 +848,8 @@ static int enetc_streamgate_hw_set(struct enetc_ndev_priv *priv, err = enetc_send_cmd(priv->si, &cbd); exit: @@ -188197,7 +228443,7 @@ index 4577226d3c6ad..6b236e0fd806b 100644 return err; } -@@ -1525,6 +1529,29 @@ int enetc_setup_tc_block_cb(enum tc_setup_type type, void *type_data, +@@ -1525,6 +1534,29 @@ int enetc_setup_tc_block_cb(enum tc_setup_type type, void *type_data, } } @@ -188277,9 +228523,47 @@ index 7b4961daa2540..ed7301b691694 100644 #define FEC_ENET_TS_TIMER ((uint)0x00008000) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c -index ec87b370bba1f..67eb9b671244b 100644 +index ec87b370bba1f..afb30d679a473 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c +@@ -72,7 +72,7 @@ + #include "fec.h" + + static void set_multicast_list(struct net_device *ndev); +-static void fec_enet_itr_coal_init(struct net_device *ndev); ++static void fec_enet_itr_coal_set(struct net_device *ndev); + + #define DRIVER_NAME "fec" + +@@ -656,7 +656,7 @@ fec_enet_txq_put_data_tso(struct fec_enet_priv_tx_q *txq, struct sk_buff *skb, + dev_kfree_skb_any(skb); + if (net_ratelimit()) + netdev_err(ndev, "Tx DMA memory map failed\n"); +- return NETDEV_TX_BUSY; ++ return NETDEV_TX_OK; + } + + bdp->cbd_datlen = cpu_to_fec16(size); +@@ -718,7 +718,7 @@ fec_enet_txq_put_hdr_tso(struct fec_enet_priv_tx_q *txq, + dev_kfree_skb_any(skb); + if (net_ratelimit()) + netdev_err(ndev, "Tx DMA memory map failed\n"); +- return NETDEV_TX_BUSY; ++ return NETDEV_TX_OK; + } + } + +@@ -1163,8 +1163,8 @@ fec_restart(struct net_device *ndev) + writel(0, fep->hwp + FEC_IMASK); + + /* Init the interrupt coalescing */ +- fec_enet_itr_coal_init(ndev); +- ++ if (fep->quirks & FEC_QUIRK_HAS_COALESCE) ++ fec_enet_itr_coal_set(ndev); + } + + static void fec_enet_stop_mode(struct fec_enet_private *fep, bool enabled) @@ -1480,7 +1480,7 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id) break; pkt_received++; @@ -188289,7 +228573,106 @@ index ec87b370bba1f..67eb9b671244b 100644 /* Check for errors. */ status ^= BD_ENET_RX_LAST; -@@ -3726,7 +3726,7 @@ static int fec_enet_init_stop_mode(struct fec_enet_private *fep, +@@ -2336,6 +2336,31 @@ static u32 fec_enet_register_offset[] = { + IEEE_R_DROP, IEEE_R_FRAME_OK, IEEE_R_CRC, IEEE_R_ALIGN, IEEE_R_MACERR, + IEEE_R_FDXFC, IEEE_R_OCTETS_OK + }; ++/* for i.MX6ul */ ++static u32 fec_enet_register_offset_6ul[] = { ++ FEC_IEVENT, FEC_IMASK, FEC_R_DES_ACTIVE_0, FEC_X_DES_ACTIVE_0, ++ FEC_ECNTRL, FEC_MII_DATA, FEC_MII_SPEED, FEC_MIB_CTRLSTAT, FEC_R_CNTRL, ++ FEC_X_CNTRL, FEC_ADDR_LOW, FEC_ADDR_HIGH, FEC_OPD, FEC_TXIC0, FEC_RXIC0, ++ FEC_HASH_TABLE_HIGH, FEC_HASH_TABLE_LOW, FEC_GRP_HASH_TABLE_HIGH, ++ FEC_GRP_HASH_TABLE_LOW, FEC_X_WMRK, FEC_R_DES_START_0, ++ FEC_X_DES_START_0, FEC_R_BUFF_SIZE_0, FEC_R_FIFO_RSFL, FEC_R_FIFO_RSEM, ++ FEC_R_FIFO_RAEM, FEC_R_FIFO_RAFL, FEC_RACC, ++ RMON_T_DROP, RMON_T_PACKETS, RMON_T_BC_PKT, RMON_T_MC_PKT, ++ RMON_T_CRC_ALIGN, RMON_T_UNDERSIZE, RMON_T_OVERSIZE, RMON_T_FRAG, ++ RMON_T_JAB, RMON_T_COL, RMON_T_P64, RMON_T_P65TO127, RMON_T_P128TO255, ++ RMON_T_P256TO511, RMON_T_P512TO1023, RMON_T_P1024TO2047, ++ RMON_T_P_GTE2048, RMON_T_OCTETS, ++ IEEE_T_DROP, IEEE_T_FRAME_OK, IEEE_T_1COL, IEEE_T_MCOL, IEEE_T_DEF, ++ IEEE_T_LCOL, IEEE_T_EXCOL, IEEE_T_MACERR, IEEE_T_CSERR, IEEE_T_SQE, ++ IEEE_T_FDXFC, IEEE_T_OCTETS_OK, ++ RMON_R_PACKETS, RMON_R_BC_PKT, RMON_R_MC_PKT, RMON_R_CRC_ALIGN, ++ RMON_R_UNDERSIZE, RMON_R_OVERSIZE, RMON_R_FRAG, RMON_R_JAB, ++ RMON_R_RESVD_O, RMON_R_P64, RMON_R_P65TO127, RMON_R_P128TO255, ++ RMON_R_P256TO511, RMON_R_P512TO1023, RMON_R_P1024TO2047, ++ RMON_R_P_GTE2048, RMON_R_OCTETS, ++ IEEE_R_DROP, IEEE_R_FRAME_OK, IEEE_R_CRC, IEEE_R_ALIGN, IEEE_R_MACERR, ++ IEEE_R_FDXFC, IEEE_R_OCTETS_OK ++}; + #else + static __u32 fec_enet_register_version = 1; + static u32 fec_enet_register_offset[] = { +@@ -2360,7 +2385,24 @@ static void fec_enet_get_regs(struct net_device *ndev, + u32 *buf = (u32 *)regbuf; + u32 i, off; + int ret; ++#if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \ ++ defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM) || \ ++ defined(CONFIG_ARM64) || defined(CONFIG_COMPILE_TEST) ++ u32 *reg_list; ++ u32 reg_cnt; + ++ if (!of_machine_is_compatible("fsl,imx6ul")) { ++ reg_list = fec_enet_register_offset; ++ reg_cnt = ARRAY_SIZE(fec_enet_register_offset); ++ } else { ++ reg_list = fec_enet_register_offset_6ul; ++ reg_cnt = ARRAY_SIZE(fec_enet_register_offset_6ul); ++ } ++#else ++ /* coldfire */ ++ static u32 *reg_list = fec_enet_register_offset; ++ static const u32 reg_cnt = ARRAY_SIZE(fec_enet_register_offset); ++#endif + ret = pm_runtime_resume_and_get(dev); + if (ret < 0) + return; +@@ -2369,8 +2411,8 @@ static void fec_enet_get_regs(struct net_device *ndev, + + memset(buf, 0, regs->len); + +- for (i = 0; i < ARRAY_SIZE(fec_enet_register_offset); i++) { +- off = fec_enet_register_offset[i]; ++ for (i = 0; i < reg_cnt; i++) { ++ off = reg_list[i]; + + if ((off == FEC_R_BOUND || off == FEC_R_FSTART) && + !(fep->quirks & FEC_QUIRK_HAS_FRREG)) +@@ -2718,19 +2760,6 @@ static int fec_enet_set_coalesce(struct net_device *ndev, + return 0; + } + +-static void fec_enet_itr_coal_init(struct net_device *ndev) +-{ +- struct ethtool_coalesce ec; +- +- ec.rx_coalesce_usecs = FEC_ITR_ICTT_DEFAULT; +- ec.rx_max_coalesced_frames = FEC_ITR_ICFT_DEFAULT; +- +- ec.tx_coalesce_usecs = FEC_ITR_ICTT_DEFAULT; +- ec.tx_max_coalesced_frames = FEC_ITR_ICFT_DEFAULT; +- +- fec_enet_set_coalesce(ndev, &ec, NULL, NULL); +-} +- + static int fec_enet_get_tunable(struct net_device *netdev, + const struct ethtool_tunable *tuna, + void *data) +@@ -3484,6 +3513,10 @@ static int fec_enet_init(struct net_device *ndev) + fep->rx_align = 0x3; + fep->tx_align = 0x3; + #endif ++ fep->rx_pkts_itr = FEC_ITR_ICFT_DEFAULT; ++ fep->tx_pkts_itr = FEC_ITR_ICFT_DEFAULT; ++ fep->rx_time_itr = FEC_ITR_ICTT_DEFAULT; ++ fep->tx_time_itr = FEC_ITR_ICTT_DEFAULT; + + /* Check mask of the streaming and coherent API */ + ret = dma_set_mask_and_coherent(&fep->pdev->dev, DMA_BIT_MASK(32)); +@@ -3726,7 +3759,7 @@ static int fec_enet_init_stop_mode(struct fec_enet_private *fep, ARRAY_SIZE(out_val)); if (ret) { dev_dbg(&fep->pdev->dev, "no stop mode property\n"); @@ -188374,7 +228757,7 @@ index d9baac0dbc7d0..4c9d05c45c033 100644 of_node_put(port_node); free_port: diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c -index d9fc5c456bf3e..39ae965cd4f64 100644 +index d9fc5c456bf3e..b0c756b65cc2e 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -94,14 +94,17 @@ static void mac_exception(void *handle, enum fman_mac_exceptions ex) @@ -188439,6 +228822,28 @@ index d9fc5c456bf3e..39ae965cd4f64 100644 if (priv->max_speed == SPEED_10000) params.phy_if = PHY_INTERFACE_MODE_XGMII; +@@ -871,12 +882,21 @@ _return: + return err; + } + ++static int mac_remove(struct platform_device *pdev) ++{ ++ struct mac_device *mac_dev = platform_get_drvdata(pdev); ++ ++ platform_device_unregister(mac_dev->priv->eth_dev); ++ return 0; ++} ++ + static struct platform_driver mac_driver = { + .driver = { + .name = KBUILD_MODNAME, + .of_match_table = mac_match, + }, + .probe = mac_probe, ++ .remove = mac_remove, + }; + + builtin_platform_driver(mac_driver); diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c index 99fe2c210d0f6..61f4b6e50d29b 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c @@ -189120,6 +229525,32 @@ index 05ddb6a75c38f..ec394d9916681 100644 } /* Completion types and expected behavior: +diff --git a/drivers/net/ethernet/hisilicon/hisi_femac.c b/drivers/net/ethernet/hisilicon/hisi_femac.c +index 22bf914f2dbd0..ea3e67cf5ffa1 100644 +--- a/drivers/net/ethernet/hisilicon/hisi_femac.c ++++ b/drivers/net/ethernet/hisilicon/hisi_femac.c +@@ -283,7 +283,7 @@ static int hisi_femac_rx(struct net_device *dev, int limit) + skb->protocol = eth_type_trans(skb, dev); + napi_gro_receive(&priv->napi, skb); + dev->stats.rx_packets++; +- dev->stats.rx_bytes += skb->len; ++ dev->stats.rx_bytes += len; + next: + pos = (pos + 1) % rxq->num; + if (rx_pkts_num >= limit) +diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c +index c1aae0fca5e98..0a70fb979f0c3 100644 +--- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c ++++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c +@@ -550,7 +550,7 @@ static int hix5hd2_rx(struct net_device *dev, int limit) + skb->protocol = eth_type_trans(skb, dev); + napi_gro_receive(&priv->napi, skb); + dev->stats.rx_packets++; +- dev->stats.rx_bytes += skb->len; ++ dev->stats.rx_bytes += len; + next: + pos = dma_ring_incr(pos, RX_DESC_NUM); + } diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.c b/drivers/net/ethernet/hisilicon/hns/hnae.c index 00fafc0f85121..430eccea8e5e9 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.c @@ -189318,10 +229749,217 @@ index bd8801065e024..814f7491ca08d 100644 }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c -index 4b886a13e0797..818a028703c65 100644 +index 4b886a13e0797..dc835f316d471 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c -@@ -2255,6 +2255,8 @@ out_err_tx_ok: +@@ -1005,9 +1005,7 @@ static bool hns3_can_use_tx_bounce(struct hns3_enet_ring *ring, + return false; + + if (ALIGN(len, dma_get_cache_alignment()) > space) { +- u64_stats_update_begin(&ring->syncp); +- ring->stats.tx_spare_full++; +- u64_stats_update_end(&ring->syncp); ++ hns3_ring_stats_update(ring, tx_spare_full); + return false; + } + +@@ -1024,9 +1022,7 @@ static bool hns3_can_use_tx_sgl(struct hns3_enet_ring *ring, + return false; + + if (space < HNS3_MAX_SGL_SIZE) { +- u64_stats_update_begin(&ring->syncp); +- ring->stats.tx_spare_full++; +- u64_stats_update_end(&ring->syncp); ++ hns3_ring_stats_update(ring, tx_spare_full); + return false; + } + +@@ -1554,9 +1550,7 @@ static int hns3_fill_skb_desc(struct hns3_enet_ring *ring, + + ret = hns3_handle_vtags(ring, skb); + if (unlikely(ret < 0)) { +- u64_stats_update_begin(&ring->syncp); +- ring->stats.tx_vlan_err++; +- u64_stats_update_end(&ring->syncp); ++ hns3_ring_stats_update(ring, tx_vlan_err); + return ret; + } else if (ret == HNS3_INNER_VLAN_TAG) { + inner_vtag = skb_vlan_tag_get(skb); +@@ -1591,9 +1585,7 @@ static int hns3_fill_skb_desc(struct hns3_enet_ring *ring, + + ret = hns3_get_l4_protocol(skb, &ol4_proto, &il4_proto); + if (unlikely(ret < 0)) { +- u64_stats_update_begin(&ring->syncp); +- ring->stats.tx_l4_proto_err++; +- u64_stats_update_end(&ring->syncp); ++ hns3_ring_stats_update(ring, tx_l4_proto_err); + return ret; + } + +@@ -1601,18 +1593,14 @@ static int hns3_fill_skb_desc(struct hns3_enet_ring *ring, + &type_cs_vlan_tso, + &ol_type_vlan_len_msec); + if (unlikely(ret < 0)) { +- u64_stats_update_begin(&ring->syncp); +- ring->stats.tx_l2l3l4_err++; +- u64_stats_update_end(&ring->syncp); ++ hns3_ring_stats_update(ring, tx_l2l3l4_err); + return ret; + } + + ret = hns3_set_tso(skb, &paylen_ol4cs, &mss_hw_csum, + &type_cs_vlan_tso, &desc_cb->send_bytes); + if (unlikely(ret < 0)) { +- u64_stats_update_begin(&ring->syncp); +- ring->stats.tx_tso_err++; +- u64_stats_update_end(&ring->syncp); ++ hns3_ring_stats_update(ring, tx_tso_err); + return ret; + } + } +@@ -1705,9 +1693,7 @@ static int hns3_map_and_fill_desc(struct hns3_enet_ring *ring, void *priv, + } + + if (unlikely(dma_mapping_error(dev, dma))) { +- u64_stats_update_begin(&ring->syncp); +- ring->stats.sw_err_cnt++; +- u64_stats_update_end(&ring->syncp); ++ hns3_ring_stats_update(ring, sw_err_cnt); + return -ENOMEM; + } + +@@ -1853,9 +1839,7 @@ static int hns3_skb_linearize(struct hns3_enet_ring *ring, + * recursion level of over HNS3_MAX_RECURSION_LEVEL. + */ + if (bd_num == UINT_MAX) { +- u64_stats_update_begin(&ring->syncp); +- ring->stats.over_max_recursion++; +- u64_stats_update_end(&ring->syncp); ++ hns3_ring_stats_update(ring, over_max_recursion); + return -ENOMEM; + } + +@@ -1864,16 +1848,12 @@ static int hns3_skb_linearize(struct hns3_enet_ring *ring, + */ + if (skb->len > HNS3_MAX_TSO_SIZE || + (!skb_is_gso(skb) && skb->len > HNS3_MAX_NON_TSO_SIZE)) { +- u64_stats_update_begin(&ring->syncp); +- ring->stats.hw_limitation++; +- u64_stats_update_end(&ring->syncp); ++ hns3_ring_stats_update(ring, hw_limitation); + return -ENOMEM; + } + + if (__skb_linearize(skb)) { +- u64_stats_update_begin(&ring->syncp); +- ring->stats.sw_err_cnt++; +- u64_stats_update_end(&ring->syncp); ++ hns3_ring_stats_update(ring, sw_err_cnt); + return -ENOMEM; + } + +@@ -1903,9 +1883,7 @@ static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring, + + bd_num = hns3_tx_bd_count(skb->len); + +- u64_stats_update_begin(&ring->syncp); +- ring->stats.tx_copy++; +- u64_stats_update_end(&ring->syncp); ++ hns3_ring_stats_update(ring, tx_copy); + } + + out: +@@ -1925,9 +1903,7 @@ out: + return bd_num; + } + +- u64_stats_update_begin(&ring->syncp); +- ring->stats.tx_busy++; +- u64_stats_update_end(&ring->syncp); ++ hns3_ring_stats_update(ring, tx_busy); + + return -EBUSY; + } +@@ -2012,9 +1988,7 @@ static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num, + ring->pending_buf += num; + + if (!doorbell) { +- u64_stats_update_begin(&ring->syncp); +- ring->stats.tx_more++; +- u64_stats_update_end(&ring->syncp); ++ hns3_ring_stats_update(ring, tx_more); + return; + } + +@@ -2064,9 +2038,7 @@ static int hns3_handle_tx_bounce(struct hns3_enet_ring *ring, + ret = skb_copy_bits(skb, 0, buf, size); + if (unlikely(ret < 0)) { + hns3_tx_spare_rollback(ring, cb_len); +- u64_stats_update_begin(&ring->syncp); +- ring->stats.copy_bits_err++; +- u64_stats_update_end(&ring->syncp); ++ hns3_ring_stats_update(ring, copy_bits_err); + return ret; + } + +@@ -2089,9 +2061,8 @@ static int hns3_handle_tx_bounce(struct hns3_enet_ring *ring, + dma_sync_single_for_device(ring_to_dev(ring), dma, size, + DMA_TO_DEVICE); + +- u64_stats_update_begin(&ring->syncp); +- ring->stats.tx_bounce++; +- u64_stats_update_end(&ring->syncp); ++ hns3_ring_stats_update(ring, tx_bounce); ++ + return bd_num; + } + +@@ -2121,9 +2092,7 @@ static int hns3_handle_tx_sgl(struct hns3_enet_ring *ring, + nents = skb_to_sgvec(skb, sgt->sgl, 0, skb->len); + if (unlikely(nents < 0)) { + hns3_tx_spare_rollback(ring, cb_len); +- u64_stats_update_begin(&ring->syncp); +- ring->stats.skb2sgl_err++; +- u64_stats_update_end(&ring->syncp); ++ hns3_ring_stats_update(ring, skb2sgl_err); + return -ENOMEM; + } + +@@ -2132,9 +2101,7 @@ static int hns3_handle_tx_sgl(struct hns3_enet_ring *ring, + DMA_TO_DEVICE); + if (unlikely(!sgt->nents)) { + hns3_tx_spare_rollback(ring, cb_len); +- u64_stats_update_begin(&ring->syncp); +- ring->stats.map_sg_err++; +- u64_stats_update_end(&ring->syncp); ++ hns3_ring_stats_update(ring, map_sg_err); + return -ENOMEM; + } + +@@ -2146,10 +2113,7 @@ static int hns3_handle_tx_sgl(struct hns3_enet_ring *ring, + for (i = 0; i < sgt->nents; i++) + bd_num += hns3_fill_desc(ring, sg_dma_address(sgt->sgl + i), + sg_dma_len(sgt->sgl + i)); +- +- u64_stats_update_begin(&ring->syncp); +- ring->stats.tx_sgl++; +- u64_stats_update_end(&ring->syncp); ++ hns3_ring_stats_update(ring, tx_sgl); + + return bd_num; + } +@@ -2188,9 +2152,7 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev) + if (skb_put_padto(skb, HNS3_MIN_TX_LEN)) { + hns3_tx_doorbell(ring, 0, !netdev_xmit_more()); + +- u64_stats_update_begin(&ring->syncp); +- ring->stats.sw_err_cnt++; +- u64_stats_update_end(&ring->syncp); ++ hns3_ring_stats_update(ring, sw_err_cnt); + + return NETDEV_TX_OK; + } +@@ -2255,6 +2217,8 @@ out_err_tx_ok: static int hns3_nic_net_set_mac_address(struct net_device *netdev, void *p) { @@ -189330,7 +229968,7 @@ index 4b886a13e0797..818a028703c65 100644 struct hnae3_handle *h = hns3_get_handle(netdev); struct sockaddr *mac_addr = p; int ret; -@@ -2263,8 +2265,9 @@ static int hns3_nic_net_set_mac_address(struct net_device *netdev, void *p) +@@ -2263,8 +2227,9 @@ static int hns3_nic_net_set_mac_address(struct net_device *netdev, void *p) return -EADDRNOTAVAIL; if (ether_addr_equal(netdev->dev_addr, mac_addr->sa_data)) { @@ -189342,7 +229980,7 @@ index 4b886a13e0797..818a028703c65 100644 return 0; } -@@ -2273,8 +2276,10 @@ static int hns3_nic_net_set_mac_address(struct net_device *netdev, void *p) +@@ -2273,8 +2238,10 @@ static int hns3_nic_net_set_mac_address(struct net_device *netdev, void *p) */ if (!hns3_is_phys_func(h->pdev) && !is_zero_ether_addr(netdev->perm_addr)) { @@ -189355,7 +229993,7 @@ index 4b886a13e0797..818a028703c65 100644 return -EPERM; } -@@ -2836,14 +2841,16 @@ static int hns3_nic_set_vf_rate(struct net_device *ndev, int vf, +@@ -2836,14 +2803,16 @@ static int hns3_nic_set_vf_rate(struct net_device *ndev, int vf, static int hns3_nic_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) { struct hnae3_handle *h = hns3_get_handle(netdev); @@ -189374,7 +230012,7 @@ index 4b886a13e0797..818a028703c65 100644 return -EINVAL; } -@@ -2947,6 +2954,21 @@ static int hns3_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +@@ -2947,6 +2916,21 @@ static int hns3_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return ret; } @@ -189396,7 +230034,7 @@ index 4b886a13e0797..818a028703c65 100644 /* hns3_remove - Device removal routine * @pdev: PCI device information struct */ -@@ -2985,7 +3007,10 @@ static int hns3_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) +@@ -2985,7 +2969,10 @@ static int hns3_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) else return num_vfs; } else if (!pci_vfs_assigned(pdev)) { @@ -189407,7 +230045,192 @@ index 4b886a13e0797..818a028703c65 100644 } else { dev_warn(&pdev->dev, "Unable to free VFs because some are assigned to VMs.\n"); -@@ -4927,6 +4952,7 @@ static void hns3_uninit_all_ring(struct hns3_nic_priv *priv) +@@ -3497,17 +3484,13 @@ static bool hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring, + for (i = 0; i < cleand_count; i++) { + desc_cb = &ring->desc_cb[ring->next_to_use]; + if (desc_cb->reuse_flag) { +- u64_stats_update_begin(&ring->syncp); +- ring->stats.reuse_pg_cnt++; +- u64_stats_update_end(&ring->syncp); ++ hns3_ring_stats_update(ring, reuse_pg_cnt); + + hns3_reuse_buffer(ring, ring->next_to_use); + } else { + ret = hns3_alloc_and_map_buffer(ring, &res_cbs); + if (ret) { +- u64_stats_update_begin(&ring->syncp); +- ring->stats.sw_err_cnt++; +- u64_stats_update_end(&ring->syncp); ++ hns3_ring_stats_update(ring, sw_err_cnt); + + hns3_rl_err(ring_to_netdev(ring), + "alloc rx buffer failed: %d\n", +@@ -3519,9 +3502,7 @@ static bool hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring, + } + hns3_replace_buffer(ring, ring->next_to_use, &res_cbs); + +- u64_stats_update_begin(&ring->syncp); +- ring->stats.non_reuse_pg++; +- u64_stats_update_end(&ring->syncp); ++ hns3_ring_stats_update(ring, non_reuse_pg); + } + + ring_ptr_move_fw(ring, next_to_use); +@@ -3536,6 +3517,34 @@ static bool hns3_can_reuse_page(struct hns3_desc_cb *cb) + return page_count(cb->priv) == cb->pagecnt_bias; + } + ++static int hns3_handle_rx_copybreak(struct sk_buff *skb, int i, ++ struct hns3_enet_ring *ring, ++ int pull_len, ++ struct hns3_desc_cb *desc_cb) ++{ ++ struct hns3_desc *desc = &ring->desc[ring->next_to_clean]; ++ u32 frag_offset = desc_cb->page_offset + pull_len; ++ int size = le16_to_cpu(desc->rx.size); ++ u32 frag_size = size - pull_len; ++ void *frag = napi_alloc_frag(frag_size); ++ ++ if (unlikely(!frag)) { ++ hns3_ring_stats_update(ring, frag_alloc_err); ++ ++ hns3_rl_err(ring_to_netdev(ring), ++ "failed to allocate rx frag\n"); ++ return -ENOMEM; ++ } ++ ++ desc_cb->reuse_flag = 1; ++ memcpy(frag, desc_cb->buf + frag_offset, frag_size); ++ skb_add_rx_frag(skb, i, virt_to_page(frag), ++ offset_in_page(frag), frag_size, frag_size); ++ ++ hns3_ring_stats_update(ring, frag_alloc); ++ return 0; ++} ++ + static void hns3_nic_reuse_page(struct sk_buff *skb, int i, + struct hns3_enet_ring *ring, int pull_len, + struct hns3_desc_cb *desc_cb) +@@ -3545,6 +3554,7 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i, + int size = le16_to_cpu(desc->rx.size); + u32 truesize = hns3_buf_size(ring); + u32 frag_size = size - pull_len; ++ int ret = 0; + bool reused; + + if (ring->page_pool) { +@@ -3579,27 +3589,9 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i, + desc_cb->page_offset = 0; + desc_cb->reuse_flag = 1; + } else if (frag_size <= ring->rx_copybreak) { +- void *frag = napi_alloc_frag(frag_size); +- +- if (unlikely(!frag)) { +- u64_stats_update_begin(&ring->syncp); +- ring->stats.frag_alloc_err++; +- u64_stats_update_end(&ring->syncp); +- +- hns3_rl_err(ring_to_netdev(ring), +- "failed to allocate rx frag\n"); +- goto out; +- } +- +- desc_cb->reuse_flag = 1; +- memcpy(frag, desc_cb->buf + frag_offset, frag_size); +- skb_add_rx_frag(skb, i, virt_to_page(frag), +- offset_in_page(frag), frag_size, frag_size); +- +- u64_stats_update_begin(&ring->syncp); +- ring->stats.frag_alloc++; +- u64_stats_update_end(&ring->syncp); +- return; ++ ret = hns3_handle_rx_copybreak(skb, i, ring, pull_len, desc_cb); ++ if (!ret) ++ return; + } + + out: +@@ -3675,20 +3667,16 @@ static int hns3_gro_complete(struct sk_buff *skb, u32 l234info) + return 0; + } + +-static bool hns3_checksum_complete(struct hns3_enet_ring *ring, ++static void hns3_checksum_complete(struct hns3_enet_ring *ring, + struct sk_buff *skb, u32 ptype, u16 csum) + { + if (ptype == HNS3_INVALID_PTYPE || + hns3_rx_ptype_tbl[ptype].ip_summed != CHECKSUM_COMPLETE) +- return false; ++ return; + +- u64_stats_update_begin(&ring->syncp); +- ring->stats.csum_complete++; +- u64_stats_update_end(&ring->syncp); ++ hns3_ring_stats_update(ring, csum_complete); + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum = csum_unfold((__force __sum16)csum); +- +- return true; + } + + static void hns3_rx_handle_csum(struct sk_buff *skb, u32 l234info, +@@ -3748,8 +3736,7 @@ static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb, + ptype = hnae3_get_field(ol_info, HNS3_RXD_PTYPE_M, + HNS3_RXD_PTYPE_S); + +- if (hns3_checksum_complete(ring, skb, ptype, csum)) +- return; ++ hns3_checksum_complete(ring, skb, ptype, csum); + + /* check if hardware has done checksum */ + if (!(bd_base_info & BIT(HNS3_RXD_L3L4P_B))) +@@ -3758,9 +3745,8 @@ static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb, + if (unlikely(l234info & (BIT(HNS3_RXD_L3E_B) | BIT(HNS3_RXD_L4E_B) | + BIT(HNS3_RXD_OL3E_B) | + BIT(HNS3_RXD_OL4E_B)))) { +- u64_stats_update_begin(&ring->syncp); +- ring->stats.l3l4_csum_err++; +- u64_stats_update_end(&ring->syncp); ++ skb->ip_summed = CHECKSUM_NONE; ++ hns3_ring_stats_update(ring, l3l4_csum_err); + + return; + } +@@ -3851,10 +3837,7 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length, + skb = ring->skb; + if (unlikely(!skb)) { + hns3_rl_err(netdev, "alloc rx skb fail\n"); +- +- u64_stats_update_begin(&ring->syncp); +- ring->stats.sw_err_cnt++; +- u64_stats_update_end(&ring->syncp); ++ hns3_ring_stats_update(ring, sw_err_cnt); + + return -ENOMEM; + } +@@ -3885,9 +3868,7 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length, + if (ring->page_pool) + skb_mark_for_recycle(skb); + +- u64_stats_update_begin(&ring->syncp); +- ring->stats.seg_pkt_cnt++; +- u64_stats_update_end(&ring->syncp); ++ hns3_ring_stats_update(ring, seg_pkt_cnt); + + ring->pull_len = eth_get_headlen(netdev, va, HNS3_RX_HEAD_SIZE); + __skb_put(skb, ring->pull_len); +@@ -4079,9 +4060,7 @@ static int hns3_handle_bdinfo(struct hns3_enet_ring *ring, struct sk_buff *skb) + ret = hns3_set_gro_and_checksum(ring, skb, l234info, + bd_base_info, ol_info, csum); + if (unlikely(ret)) { +- u64_stats_update_begin(&ring->syncp); +- ring->stats.rx_err_cnt++; +- u64_stats_update_end(&ring->syncp); ++ hns3_ring_stats_update(ring, rx_err_cnt); + return ret; + } + +@@ -4927,6 +4906,7 @@ static void hns3_uninit_all_ring(struct hns3_nic_priv *priv) static int hns3_init_mac_addr(struct net_device *netdev) { struct hns3_nic_priv *priv = netdev_priv(netdev); @@ -189415,7 +230238,7 @@ index 4b886a13e0797..818a028703c65 100644 struct hnae3_handle *h = priv->ae_handle; u8 mac_addr_temp[ETH_ALEN]; int ret = 0; -@@ -4937,8 +4963,9 @@ static int hns3_init_mac_addr(struct net_device *netdev) +@@ -4937,8 +4917,9 @@ static int hns3_init_mac_addr(struct net_device *netdev) /* Check if the MAC address is valid, if not get a random one */ if (!is_valid_ether_addr(mac_addr_temp)) { eth_hw_addr_random(netdev); @@ -189427,7 +230250,7 @@ index 4b886a13e0797..818a028703c65 100644 } else if (!ether_addr_equal(netdev->dev_addr, mac_addr_temp)) { ether_addr_copy(netdev->dev_addr, mac_addr_temp); ether_addr_copy(netdev->perm_addr, mac_addr_temp); -@@ -4990,8 +5017,10 @@ static void hns3_client_stop(struct hnae3_handle *handle) +@@ -4990,8 +4971,10 @@ static void hns3_client_stop(struct hnae3_handle *handle) static void hns3_info_show(struct hns3_nic_priv *priv) { struct hnae3_knic_private_info *kinfo = &priv->ae_handle->kinfo; @@ -189439,7 +230262,7 @@ index 4b886a13e0797..818a028703c65 100644 dev_info(priv->dev, "Task queue pairs numbers: %u\n", kinfo->num_tqps); dev_info(priv->dev, "RSS size: %u\n", kinfo->rss_size); dev_info(priv->dev, "Allocated RSS size: %u\n", kinfo->req_rss_size); -@@ -5063,6 +5092,13 @@ static void hns3_state_init(struct hnae3_handle *handle) +@@ -5063,6 +5046,13 @@ static void hns3_state_init(struct hnae3_handle *handle) set_bit(HNS3_NIC_STATE_RXD_ADV_LAYOUT_ENABLE, &priv->state); } @@ -189453,7 +230276,7 @@ index 4b886a13e0797..818a028703c65 100644 static int hns3_client_init(struct hnae3_handle *handle) { struct pci_dev *pdev = handle->pdev; -@@ -5180,7 +5216,9 @@ static int hns3_client_init(struct hnae3_handle *handle) +@@ -5180,7 +5170,9 @@ static int hns3_client_init(struct hnae3_handle *handle) return ret; out_reg_netdev_fail: @@ -189463,6 +230286,35 @@ index 4b886a13e0797..818a028703c65 100644 out_client_start: hns3_free_rx_cpu_rmap(netdev); hns3_nic_uninit_irq(priv); +@@ -5280,9 +5272,7 @@ static int hns3_clear_rx_ring(struct hns3_enet_ring *ring) + if (!ring->desc_cb[ring->next_to_use].reuse_flag) { + ret = hns3_alloc_and_map_buffer(ring, &res_cbs); + if (ret) { +- u64_stats_update_begin(&ring->syncp); +- ring->stats.sw_err_cnt++; +- u64_stats_update_end(&ring->syncp); ++ hns3_ring_stats_update(ring, sw_err_cnt); + /* if alloc new buffer fail, exit directly + * and reclear in up flow. + */ +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +index f09a61d9c6264..91b656adaacb0 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +@@ -654,6 +654,13 @@ static inline bool hns3_nic_resetting(struct net_device *netdev) + + #define hns3_buf_size(_ring) ((_ring)->buf_size) + ++#define hns3_ring_stats_update(ring, cnt) do { \ ++ typeof(ring) (tmp) = (ring); \ ++ u64_stats_update_begin(&(tmp)->syncp); \ ++ ((tmp)->stats.cnt)++; \ ++ u64_stats_update_end(&(tmp)->syncp); \ ++} while (0) \ ++ + static inline unsigned int hns3_page_order(struct hns3_enet_ring *ring) + { + #if (PAGE_SIZE < 8192) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index 5ebd96f6833d6..526fb56c84f24 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -189542,7 +230394,7 @@ index 91cb578f56b80..375ebf105a9aa 100644 } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c -index d891390d492f6..15d10775a7570 100644 +index d891390d492f6..f4d58fcdba272 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -26,8 +26,6 @@ @@ -189591,7 +230443,15 @@ index d891390d492f6..15d10775a7570 100644 hdev->vector_status = devm_kcalloc(&pdev->dev, hdev->num_msi, sizeof(u16), GFP_KERNEL); if (!hdev->vector_status) { -@@ -3199,7 +3194,7 @@ static int hclge_tp_port_init(struct hclge_dev *hdev) +@@ -3177,6 +3172,7 @@ static int hclge_update_tp_port_info(struct hclge_dev *hdev) + hdev->hw.mac.autoneg = cmd.base.autoneg; + hdev->hw.mac.speed = cmd.base.speed; + hdev->hw.mac.duplex = cmd.base.duplex; ++ linkmode_copy(hdev->hw.mac.advertising, cmd.link_modes.advertising); + + return 0; + } +@@ -3199,7 +3195,7 @@ static int hclge_tp_port_init(struct hclge_dev *hdev) static int hclge_update_port_info(struct hclge_dev *hdev) { struct hclge_mac *mac = &hdev->hw.mac; @@ -189600,7 +230460,7 @@ index d891390d492f6..15d10775a7570 100644 int ret; /* get the port info from SFP cmd if not copper port */ -@@ -3210,10 +3205,13 @@ static int hclge_update_port_info(struct hclge_dev *hdev) +@@ -3210,10 +3206,13 @@ static int hclge_update_port_info(struct hclge_dev *hdev) if (!hdev->support_sfp_query) return 0; @@ -189616,7 +230476,7 @@ index d891390d492f6..15d10775a7570 100644 if (ret == -EOPNOTSUPP) { hdev->support_sfp_query = false; -@@ -3225,6 +3223,8 @@ static int hclge_update_port_info(struct hclge_dev *hdev) +@@ -3225,6 +3224,8 @@ static int hclge_update_port_info(struct hclge_dev *hdev) if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) { if (mac->speed_type == QUERY_ACTIVE_SPEED) { hclge_update_port_capability(hdev, mac); @@ -189625,7 +230485,7 @@ index d891390d492f6..15d10775a7570 100644 return 0; } return hclge_cfg_mac_speed_dup(hdev, mac->speed, -@@ -3307,6 +3307,12 @@ static int hclge_set_vf_link_state(struct hnae3_handle *handle, int vf, +@@ -3307,6 +3308,12 @@ static int hclge_set_vf_link_state(struct hnae3_handle *handle, int vf, link_state_old = vport->vf_info.link_state; vport->vf_info.link_state = link_state; @@ -189638,7 +230498,7 @@ index d891390d492f6..15d10775a7570 100644 ret = hclge_push_vf_link_status(vport); if (ret) { vport->vf_info.link_state = link_state_old; -@@ -8575,6 +8581,7 @@ int hclge_update_mac_list(struct hclge_vport *vport, +@@ -8575,6 +8582,7 @@ int hclge_update_mac_list(struct hclge_vport *vport, enum HCLGE_MAC_ADDR_TYPE mac_type, const unsigned char *addr) { @@ -189646,7 +230506,7 @@ index d891390d492f6..15d10775a7570 100644 struct hclge_dev *hdev = vport->back; struct hclge_mac_node *mac_node; struct list_head *list; -@@ -8599,9 +8606,10 @@ int hclge_update_mac_list(struct hclge_vport *vport, +@@ -8599,9 +8607,10 @@ int hclge_update_mac_list(struct hclge_vport *vport, /* if this address is never added, unnecessary to delete */ if (state == HCLGE_MAC_TO_DEL) { spin_unlock_bh(&vport->mac_list_lock); @@ -189659,7 +230519,7 @@ index d891390d492f6..15d10775a7570 100644 return -ENOENT; } -@@ -8634,6 +8642,7 @@ static int hclge_add_uc_addr(struct hnae3_handle *handle, +@@ -8634,6 +8643,7 @@ static int hclge_add_uc_addr(struct hnae3_handle *handle, int hclge_add_uc_addr_common(struct hclge_vport *vport, const unsigned char *addr) { @@ -189667,7 +230527,7 @@ index d891390d492f6..15d10775a7570 100644 struct hclge_dev *hdev = vport->back; struct hclge_mac_vlan_tbl_entry_cmd req; struct hclge_desc desc; -@@ -8644,9 +8653,10 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport, +@@ -8644,9 +8654,10 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport, if (is_zero_ether_addr(addr) || is_broadcast_ether_addr(addr) || is_multicast_ether_addr(addr)) { @@ -189680,7 +230540,7 @@ index d891390d492f6..15d10775a7570 100644 is_broadcast_ether_addr(addr), is_multicast_ether_addr(addr)); return -EINVAL; -@@ -8703,6 +8713,7 @@ static int hclge_rm_uc_addr(struct hnae3_handle *handle, +@@ -8703,6 +8714,7 @@ static int hclge_rm_uc_addr(struct hnae3_handle *handle, int hclge_rm_uc_addr_common(struct hclge_vport *vport, const unsigned char *addr) { @@ -189688,7 +230548,7 @@ index d891390d492f6..15d10775a7570 100644 struct hclge_dev *hdev = vport->back; struct hclge_mac_vlan_tbl_entry_cmd req; int ret; -@@ -8711,8 +8722,9 @@ int hclge_rm_uc_addr_common(struct hclge_vport *vport, +@@ -8711,8 +8723,9 @@ int hclge_rm_uc_addr_common(struct hclge_vport *vport, if (is_zero_ether_addr(addr) || is_broadcast_ether_addr(addr) || is_multicast_ether_addr(addr)) { @@ -189700,7 +230560,7 @@ index d891390d492f6..15d10775a7570 100644 return -EINVAL; } -@@ -8720,12 +8732,11 @@ int hclge_rm_uc_addr_common(struct hclge_vport *vport, +@@ -8720,12 +8733,11 @@ int hclge_rm_uc_addr_common(struct hclge_vport *vport, hnae3_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT0_EN_B, 0); hclge_prepare_mac_addr(&req, addr, false); ret = hclge_remove_mac_vlan_tbl(vport, &req); @@ -189715,7 +230575,7 @@ index d891390d492f6..15d10775a7570 100644 } return ret; -@@ -8743,6 +8754,7 @@ static int hclge_add_mc_addr(struct hnae3_handle *handle, +@@ -8743,6 +8755,7 @@ static int hclge_add_mc_addr(struct hnae3_handle *handle, int hclge_add_mc_addr_common(struct hclge_vport *vport, const unsigned char *addr) { @@ -189723,7 +230583,7 @@ index d891390d492f6..15d10775a7570 100644 struct hclge_dev *hdev = vport->back; struct hclge_mac_vlan_tbl_entry_cmd req; struct hclge_desc desc[3]; -@@ -8750,9 +8762,10 @@ int hclge_add_mc_addr_common(struct hclge_vport *vport, +@@ -8750,9 +8763,10 @@ int hclge_add_mc_addr_common(struct hclge_vport *vport, /* mac addr check */ if (!is_multicast_ether_addr(addr)) { @@ -189736,7 +230596,7 @@ index d891390d492f6..15d10775a7570 100644 return -EINVAL; } memset(&req, 0, sizeof(req)); -@@ -8788,6 +8801,7 @@ static int hclge_rm_mc_addr(struct hnae3_handle *handle, +@@ -8788,6 +8802,7 @@ static int hclge_rm_mc_addr(struct hnae3_handle *handle, int hclge_rm_mc_addr_common(struct hclge_vport *vport, const unsigned char *addr) { @@ -189744,7 +230604,7 @@ index d891390d492f6..15d10775a7570 100644 struct hclge_dev *hdev = vport->back; struct hclge_mac_vlan_tbl_entry_cmd req; enum hclge_cmd_status status; -@@ -8795,9 +8809,10 @@ int hclge_rm_mc_addr_common(struct hclge_vport *vport, +@@ -8795,9 +8810,10 @@ int hclge_rm_mc_addr_common(struct hclge_vport *vport, /* mac addr check */ if (!is_multicast_ether_addr(addr)) { @@ -189757,7 +230617,7 @@ index d891390d492f6..15d10775a7570 100644 return -EINVAL; } -@@ -9263,16 +9278,18 @@ static int hclge_set_vf_mac(struct hnae3_handle *handle, int vf, +@@ -9263,16 +9279,18 @@ static int hclge_set_vf_mac(struct hnae3_handle *handle, int vf, u8 *mac_addr) { struct hclge_vport *vport = hclge_get_vport(handle); @@ -189778,7 +230638,7 @@ index d891390d492f6..15d10775a7570 100644 return 0; } -@@ -9284,15 +9301,20 @@ static int hclge_set_vf_mac(struct hnae3_handle *handle, int vf, +@@ -9284,15 +9302,20 @@ static int hclge_set_vf_mac(struct hnae3_handle *handle, int vf, ether_addr_copy(vport->vf_info.mac, mac_addr); @@ -189804,7 +230664,7 @@ index d891390d492f6..15d10775a7570 100644 return 0; } -@@ -9396,6 +9418,7 @@ static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p, +@@ -9396,6 +9419,7 @@ static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p, { const unsigned char *new_addr = (const unsigned char *)p; struct hclge_vport *vport = hclge_get_vport(handle); @@ -189812,7 +230672,7 @@ index d891390d492f6..15d10775a7570 100644 struct hclge_dev *hdev = vport->back; unsigned char *old_addr = NULL; int ret; -@@ -9404,9 +9427,10 @@ static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p, +@@ -9404,9 +9428,10 @@ static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p, if (is_zero_ether_addr(new_addr) || is_broadcast_ether_addr(new_addr) || is_multicast_ether_addr(new_addr)) { @@ -189825,7 +230685,7 @@ index d891390d492f6..15d10775a7570 100644 return -EINVAL; } -@@ -9424,9 +9448,10 @@ static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p, +@@ -9424,9 +9449,10 @@ static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p, spin_lock_bh(&vport->mac_list_lock); ret = hclge_update_mac_node_for_dev_addr(vport, old_addr, new_addr); if (ret) { @@ -189838,7 +230698,7 @@ index d891390d492f6..15d10775a7570 100644 spin_unlock_bh(&vport->mac_list_lock); if (!is_first) -@@ -10084,19 +10109,28 @@ static void hclge_add_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id, +@@ -10084,19 +10110,28 @@ static void hclge_add_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id, bool writen_to_tbl) { struct hclge_vport_vlan_cfg *vlan, *tmp; @@ -189870,7 +230730,7 @@ index d891390d492f6..15d10775a7570 100644 } static int hclge_add_vport_all_vlan_table(struct hclge_vport *vport) -@@ -10105,6 +10139,8 @@ static int hclge_add_vport_all_vlan_table(struct hclge_vport *vport) +@@ -10105,6 +10140,8 @@ static int hclge_add_vport_all_vlan_table(struct hclge_vport *vport) struct hclge_dev *hdev = vport->back; int ret; @@ -189879,7 +230739,7 @@ index d891390d492f6..15d10775a7570 100644 list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) { if (!vlan->hd_tbl_status) { ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q), -@@ -10114,12 +10150,16 @@ static int hclge_add_vport_all_vlan_table(struct hclge_vport *vport) +@@ -10114,12 +10151,16 @@ static int hclge_add_vport_all_vlan_table(struct hclge_vport *vport) dev_err(&hdev->pdev->dev, "restore vport vlan list failed, ret=%d\n", ret); @@ -189896,7 +230756,7 @@ index d891390d492f6..15d10775a7570 100644 return 0; } -@@ -10129,6 +10169,8 @@ static void hclge_rm_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id, +@@ -10129,6 +10170,8 @@ static void hclge_rm_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id, struct hclge_vport_vlan_cfg *vlan, *tmp; struct hclge_dev *hdev = vport->back; @@ -189905,7 +230765,7 @@ index d891390d492f6..15d10775a7570 100644 list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) { if (vlan->vlan_id == vlan_id) { if (is_write_tbl && vlan->hd_tbl_status) -@@ -10143,6 +10185,8 @@ static void hclge_rm_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id, +@@ -10143,6 +10186,8 @@ static void hclge_rm_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id, break; } } @@ -189914,7 +230774,7 @@ index d891390d492f6..15d10775a7570 100644 } void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list) -@@ -10150,6 +10194,8 @@ void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list) +@@ -10150,6 +10195,8 @@ void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list) struct hclge_vport_vlan_cfg *vlan, *tmp; struct hclge_dev *hdev = vport->back; @@ -189923,7 +230783,7 @@ index d891390d492f6..15d10775a7570 100644 list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) { if (vlan->hd_tbl_status) hclge_set_vlan_filter_hw(hdev, -@@ -10165,6 +10211,7 @@ void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list) +@@ -10165,6 +10212,7 @@ void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list) } } clear_bit(vport->vport_id, hdev->vf_vlan_full); @@ -189931,7 +230791,7 @@ index d891390d492f6..15d10775a7570 100644 } void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev) -@@ -10173,6 +10220,8 @@ void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev) +@@ -10173,6 +10221,8 @@ void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev) struct hclge_vport *vport; int i; @@ -189940,7 +230800,7 @@ index d891390d492f6..15d10775a7570 100644 for (i = 0; i < hdev->num_alloc_vport; i++) { vport = &hdev->vport[i]; list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) { -@@ -10180,37 +10229,61 @@ void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev) +@@ -10180,37 +10230,61 @@ void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev) kfree(vlan); } } @@ -190021,7 +230881,7 @@ index d891390d492f6..15d10775a7570 100644 } /* For global reset and imp reset, hardware will clear the mac table, -@@ -10250,6 +10323,7 @@ static void hclge_restore_hw_table(struct hclge_dev *hdev) +@@ -10250,6 +10324,7 @@ static void hclge_restore_hw_table(struct hclge_dev *hdev) struct hnae3_handle *handle = &vport->nic; hclge_restore_mac_table_common(vport); @@ -190029,7 +230889,7 @@ index d891390d492f6..15d10775a7570 100644 hclge_restore_vport_vlan_table(vport); set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state); hclge_restore_fd_entries(handle); -@@ -10306,6 +10380,8 @@ static int hclge_update_vlan_filter_entries(struct hclge_vport *vport, +@@ -10306,6 +10381,8 @@ static int hclge_update_vlan_filter_entries(struct hclge_vport *vport, false); } @@ -190038,7 +230898,7 @@ index d891390d492f6..15d10775a7570 100644 /* force add VLAN 0 */ ret = hclge_set_vf_vlan_common(hdev, vport->vport_id, false, 0); if (ret) -@@ -10332,12 +10408,42 @@ static bool hclge_need_update_vlan_filter(const struct hclge_vlan_info *new_cfg, +@@ -10332,12 +10409,42 @@ static bool hclge_need_update_vlan_filter(const struct hclge_vlan_info *new_cfg, return false; } @@ -190082,7 +230942,7 @@ index d891390d492f6..15d10775a7570 100644 int ret; old_vlan_info = &vport->port_base_vlan_cfg.vlan_info; -@@ -10350,38 +10456,12 @@ int hclge_update_port_base_vlan_cfg(struct hclge_vport *vport, u16 state, +@@ -10350,38 +10457,12 @@ int hclge_update_port_base_vlan_cfg(struct hclge_vport *vport, u16 state, if (!hclge_need_update_vlan_filter(vlan_info, old_vlan_info)) goto out; @@ -190127,7 +230987,7 @@ index d891390d492f6..15d10775a7570 100644 if (ret) return ret; -@@ -10392,7 +10472,9 @@ out: +@@ -10392,7 +10473,9 @@ out: else nic->port_base_vlan_state = HNAE3_PORT_BASE_VLAN_ENABLE; @@ -190137,7 +230997,7 @@ index d891390d492f6..15d10775a7570 100644 hclge_set_vport_vlan_fltr_change(vport); return 0; -@@ -10460,14 +10542,17 @@ static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid, +@@ -10460,14 +10543,17 @@ static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid, return ret; } @@ -190159,7 +231019,7 @@ index d891390d492f6..15d10775a7570 100644 return 0; } -@@ -10525,11 +10610,11 @@ int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto, +@@ -10525,11 +10611,11 @@ int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto, } if (!ret) { @@ -190174,7 +231034,23 @@ index d891390d492f6..15d10775a7570 100644 } else if (is_kill) { /* when remove hw vlan filter failed, record the vlan id, * and try to remove it from hw later, to be consistence -@@ -12103,8 +12188,8 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) +@@ -11584,9 +11670,12 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) + if (ret) + goto err_msi_irq_uninit; + +- if (hdev->hw.mac.media_type == HNAE3_MEDIA_TYPE_COPPER && +- !hnae3_dev_phy_imp_supported(hdev)) { +- ret = hclge_mac_mdio_config(hdev); ++ if (hdev->hw.mac.media_type == HNAE3_MEDIA_TYPE_COPPER) { ++ if (hnae3_dev_phy_imp_supported(hdev)) ++ ret = hclge_update_tp_port_info(hdev); ++ else ++ ret = hclge_mac_mdio_config(hdev); ++ + if (ret) + goto err_msi_irq_uninit; + } +@@ -12103,8 +12192,8 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) hclge_misc_irq_uninit(hdev); hclge_devlink_uninit(hdev); hclge_pci_uninit(hdev); @@ -190184,7 +231060,111 @@ index d891390d492f6..15d10775a7570 100644 ae_dev->priv = NULL; } -@@ -12917,6 +13002,55 @@ static int hclge_get_link_diagnosis_info(struct hnae3_handle *handle, +@@ -12736,60 +12825,71 @@ static int hclge_gro_en(struct hnae3_handle *handle, bool enable) + return ret; + } + +-static void hclge_sync_promisc_mode(struct hclge_dev *hdev) ++static int hclge_sync_vport_promisc_mode(struct hclge_vport *vport) + { +- struct hclge_vport *vport = &hdev->vport[0]; + struct hnae3_handle *handle = &vport->nic; ++ struct hclge_dev *hdev = vport->back; ++ bool uc_en = false; ++ bool mc_en = false; + u8 tmp_flags; ++ bool bc_en; + int ret; +- u16 i; + + if (vport->last_promisc_flags != vport->overflow_promisc_flags) { + set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state); + vport->last_promisc_flags = vport->overflow_promisc_flags; + } + +- if (test_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state)) { ++ if (!test_and_clear_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, ++ &vport->state)) ++ return 0; ++ ++ /* for PF */ ++ if (!vport->vport_id) { + tmp_flags = handle->netdev_flags | vport->last_promisc_flags; + ret = hclge_set_promisc_mode(handle, tmp_flags & HNAE3_UPE, + tmp_flags & HNAE3_MPE); +- if (!ret) { +- clear_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, +- &vport->state); ++ if (!ret) + set_bit(HCLGE_VPORT_STATE_VLAN_FLTR_CHANGE, + &vport->state); +- } ++ else ++ set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, ++ &vport->state); ++ return ret; + } + +- for (i = 1; i < hdev->num_alloc_vport; i++) { +- bool uc_en = false; +- bool mc_en = false; +- bool bc_en; ++ /* for VF */ ++ if (vport->vf_info.trusted) { ++ uc_en = vport->vf_info.request_uc_en > 0 || ++ vport->overflow_promisc_flags & HNAE3_OVERFLOW_UPE; ++ mc_en = vport->vf_info.request_mc_en > 0 || ++ vport->overflow_promisc_flags & HNAE3_OVERFLOW_MPE; ++ } ++ bc_en = vport->vf_info.request_bc_en > 0; + +- vport = &hdev->vport[i]; ++ ret = hclge_cmd_set_promisc_mode(hdev, vport->vport_id, uc_en, ++ mc_en, bc_en); ++ if (ret) { ++ set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state); ++ return ret; ++ } ++ hclge_set_vport_vlan_fltr_change(vport); + +- if (!test_and_clear_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, +- &vport->state)) +- continue; ++ return 0; ++} + +- if (vport->vf_info.trusted) { +- uc_en = vport->vf_info.request_uc_en > 0 || +- vport->overflow_promisc_flags & +- HNAE3_OVERFLOW_UPE; +- mc_en = vport->vf_info.request_mc_en > 0 || +- vport->overflow_promisc_flags & +- HNAE3_OVERFLOW_MPE; +- } +- bc_en = vport->vf_info.request_bc_en > 0; ++static void hclge_sync_promisc_mode(struct hclge_dev *hdev) ++{ ++ struct hclge_vport *vport; ++ int ret; ++ u16 i; + +- ret = hclge_cmd_set_promisc_mode(hdev, vport->vport_id, uc_en, +- mc_en, bc_en); +- if (ret) { +- set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, +- &vport->state); ++ for (i = 0; i < hdev->num_alloc_vport; i++) { ++ vport = &hdev->vport[i]; ++ ++ ret = hclge_sync_vport_promisc_mode(vport); ++ if (ret) + return; +- } +- hclge_set_vport_vlan_fltr_change(vport); + } + } + +@@ -12917,6 +13017,55 @@ static int hclge_get_link_diagnosis_info(struct hnae3_handle *handle, return 0; } @@ -190240,7 +231220,7 @@ index d891390d492f6..15d10775a7570 100644 static const struct hnae3_ae_ops hclge_ops = { .init_ae_dev = hclge_init_ae_dev, .uninit_ae_dev = hclge_uninit_ae_dev, -@@ -13018,6 +13152,7 @@ static const struct hnae3_ae_ops hclge_ops = { +@@ -13018,6 +13167,7 @@ static const struct hnae3_ae_ops hclge_ops = { .get_rx_hwts = hclge_ptp_get_rx_hwts, .get_ts_info = hclge_ptp_get_ts_info, .get_link_diagnosis_info = hclge_get_link_diagnosis_info, @@ -190538,7 +231518,7 @@ index 2ee9b795f71dc..5df18cc3ee556 100644 int hclge_tm_get_pri_num(struct hclge_dev *hdev, u8 *pri_num); int hclge_tm_get_qset_map_pri(struct hclge_dev *hdev, u16 qset_id, u8 *priority, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c -index cf00ad7bb881f..21678c12afa26 100644 +index cf00ad7bb881f..3c1ff33132213 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -703,9 +703,9 @@ static int hclgevf_set_rss_tc_mode(struct hclgevf_dev *hdev, u16 rss_size) @@ -190623,7 +231603,17 @@ index cf00ad7bb881f..21678c12afa26 100644 clear_bit(HCLGEVF_STATE_NIC_REGISTERED, &hdev->state); client->ops->uninit_instance(&hdev->nic, 0); -@@ -3340,6 +3344,11 @@ static int hclgevf_reset_hdev(struct hclgevf_dev *hdev) +@@ -3254,7 +3258,8 @@ static int hclgevf_pci_reset(struct hclgevf_dev *hdev) + struct pci_dev *pdev = hdev->pdev; + int ret = 0; + +- if (hdev->reset_type == HNAE3_VF_FULL_RESET && ++ if ((hdev->reset_type == HNAE3_VF_FULL_RESET || ++ hdev->reset_type == HNAE3_FLR_RESET) && + test_bit(HCLGEVF_STATE_IRQ_INITED, &hdev->state)) { + hclgevf_misc_irq_uninit(hdev); + hclgevf_uninit_msi(hdev); +@@ -3340,6 +3345,11 @@ static int hclgevf_reset_hdev(struct hclgevf_dev *hdev) return ret; } @@ -190671,6 +231661,50 @@ index fdc66fae09601..c5ac6ecf36e10 100644 /* synchronous send */ if (need_resp) { +diff --git a/drivers/net/ethernet/huawei/hinic/hinic_debugfs.c b/drivers/net/ethernet/huawei/hinic/hinic_debugfs.c +index 19eb839177ec2..061952c6c21a4 100644 +--- a/drivers/net/ethernet/huawei/hinic/hinic_debugfs.c ++++ b/drivers/net/ethernet/huawei/hinic/hinic_debugfs.c +@@ -85,6 +85,7 @@ static int hinic_dbg_get_func_table(struct hinic_dev *nic_dev, int idx) + struct tag_sml_funcfg_tbl *funcfg_table_elem; + struct hinic_cmd_lt_rd *read_data; + u16 out_size = sizeof(*read_data); ++ int ret = ~0; + int err; + + read_data = kzalloc(sizeof(*read_data), GFP_KERNEL); +@@ -111,20 +112,25 @@ static int hinic_dbg_get_func_table(struct hinic_dev *nic_dev, int idx) + + switch (idx) { + case VALID: +- return funcfg_table_elem->dw0.bs.valid; ++ ret = funcfg_table_elem->dw0.bs.valid; ++ break; + case RX_MODE: +- return funcfg_table_elem->dw0.bs.nic_rx_mode; ++ ret = funcfg_table_elem->dw0.bs.nic_rx_mode; ++ break; + case MTU: +- return funcfg_table_elem->dw1.bs.mtu; ++ ret = funcfg_table_elem->dw1.bs.mtu; ++ break; + case RQ_DEPTH: +- return funcfg_table_elem->dw13.bs.cfg_rq_depth; ++ ret = funcfg_table_elem->dw13.bs.cfg_rq_depth; ++ break; + case QUEUE_NUM: +- return funcfg_table_elem->dw13.bs.cfg_q_num; ++ ret = funcfg_table_elem->dw13.bs.cfg_q_num; ++ break; + } + + kfree(read_data); + +- return ~0; ++ return ret; + } + + static ssize_t hinic_dbg_cmd_read(struct file *filp, char __user *buffer, size_t count, diff --git a/drivers/net/ethernet/huawei/hinic/hinic_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_dev.h index fb3e89141a0d9..a4fbf44f944cd 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_dev.h @@ -190709,7 +231743,7 @@ index 06586173add77..998717f02136f 100644 return -ENOMEM; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c -index 307a6d4af993d..a627237f694bb 100644 +index 307a6d4af993d..afa816cfcdf4a 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c @@ -796,11 +796,10 @@ static int init_cmdqs_ctxt(struct hinic_hwdev *hwdev, @@ -190745,8 +231779,17 @@ index 307a6d4af993d..a627237f694bb 100644 if (!cmdqs->saved_wqs) { err = -ENOMEM; goto err_saved_wqs; +@@ -931,7 +929,7 @@ int hinic_init_cmdqs(struct hinic_cmdqs *cmdqs, struct hinic_hwif *hwif, + + err_set_cmdq_depth: + hinic_ceq_unregister_cb(&func_to_io->ceqs, HINIC_CEQ_CMDQ); +- ++ free_cmdq(&cmdqs->cmdq[HINIC_CMDQ_SYNC]); + err_cmdq_ctxt: + hinic_wqs_cmdq_free(&cmdqs->cmdq_pages, cmdqs->saved_wqs, + HINIC_MAX_CMDQ_TYPES); diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c -index 56b6b04e209b3..ca76896d9f1c4 100644 +index 56b6b04e209b3..8b04d133b3c47 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c @@ -162,7 +162,6 @@ static int init_msix(struct hinic_hwdev *hwdev) @@ -190768,6 +231811,15 @@ index 56b6b04e209b3..ca76896d9f1c4 100644 GFP_KERNEL); if (!hwdev->msix_entries) return -ENOMEM; +@@ -893,7 +892,7 @@ int hinic_set_interrupt_cfg(struct hinic_hwdev *hwdev, + if (err) + return -EINVAL; + +- interrupt_info->lli_credit_cnt = temp_info.lli_timer_cnt; ++ interrupt_info->lli_credit_cnt = temp_info.lli_credit_cnt; + interrupt_info->lli_timer_cnt = temp_info.lli_timer_cnt; + + err = hinic_msg_to_mgmt(&pfhwdev->pf_to_mgmt, HINIC_MOD_COMM, diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c index d3fc05a07fdb6..045c47786a041 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c @@ -190887,7 +231939,7 @@ index 7f0f1aa3cedd9..4daf6bf291ecb 100644 copy_wqe_to_shadow(wq, shadow_addr, num_wqebbs, *cons_idx); diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c -index ae707e305684b..8c6ec7c258099 100644 +index ae707e305684b..92fba9a0c3718 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -62,8 +62,6 @@ MODULE_PARM_DESC(rx_weight, "Number Rx packets for NAPI budget (default=64)"); @@ -191020,10 +232072,10 @@ index ae707e305684b..8c6ec7c258099 100644 if (nic_dev->flags & HINIC_INTF_UP) - update_nic_stats(nic_dev); -- -- up(&nic_dev->mgmt_lock); + gather_nic_stats(nic_dev, &nic_rx_stats, &nic_tx_stats); +- up(&nic_dev->mgmt_lock); +- - stats->rx_bytes = nic_rx_stats->bytes; - stats->rx_packets = nic_rx_stats->pkts; - stats->rx_errors = nic_rx_stats->errors; @@ -191067,6 +232119,23 @@ index ae707e305684b..8c6ec7c258099 100644 if (!nic_dev->vlan_bitmap) { err = -ENOMEM; goto err_vlan_bitmap; +@@ -1516,8 +1482,15 @@ static struct pci_driver hinic_driver = { + + static int __init hinic_module_init(void) + { ++ int ret; ++ + hinic_dbg_register_debugfs(HINIC_DRV_NAME); +- return pci_register_driver(&hinic_driver); ++ ++ ret = pci_register_driver(&hinic_driver); ++ if (ret) ++ hinic_dbg_unregister_debugfs(); ++ ++ return ret; + } + + static void __exit hinic_module_exit(void) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c index fed3b6bc0d763..d11ec69a2e17d 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c @@ -191091,6 +232160,18 @@ index fed3b6bc0d763..d11ec69a2e17d 100644 } /** +diff --git a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c +index a78c398bf5b25..e81a7b28209b9 100644 +--- a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c ++++ b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c +@@ -1180,7 +1180,6 @@ int hinic_vf_func_init(struct hinic_hwdev *hwdev) + dev_err(&hwdev->hwif->pdev->dev, + "Failed to register VF, err: %d, status: 0x%x, out size: 0x%x\n", + err, register_info.status, out_size); +- hinic_unregister_vf_mbox_cb(hwdev, HINIC_MOD_L2NIC); + return -EIO; + } + } else { diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c index c5bdb0d374efa..8d3ec6c729cc7 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c @@ -191157,8 +232238,20 @@ index 27937c5d79567..daec9ce04531b 100644 goto probe_failed; } +diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c +index d5df131b183c7..6c534b92aeed0 100644 +--- a/drivers/net/ethernet/ibm/ehea/ehea_main.c ++++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c +@@ -2898,6 +2898,7 @@ static struct device *ehea_register_port(struct ehea_port *port, + ret = of_device_register(&port->ofdev); + if (ret) { + pr_err("failed to register device. ret=%d\n", ret); ++ put_device(&port->ofdev.dev); + goto out; + } + diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c -index 6aa6ff89a7651..4a070724a8fb6 100644 +index 6aa6ff89a7651..8a92c6a6e764f 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -108,6 +108,7 @@ static int init_crq_queue(struct ibmvnic_adapter *adapter); @@ -191451,6 +232544,34 @@ index 6aa6ff89a7651..4a070724a8fb6 100644 set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(60 * HZ); } +@@ -2499,19 +2621,19 @@ static void __ibmvnic_reset(struct work_struct *work) + rwi = get_next_rwi(adapter); + + /* +- * If there is another reset queued, free the previous rwi +- * and process the new reset even if previous reset failed +- * (the previous reset could have failed because of a fail +- * over for instance, so process the fail over). +- * + * If there are no resets queued and the previous reset failed, + * the adapter would be in an undefined state. So retry the + * previous reset as a hard reset. ++ * ++ * Else, free the previous rwi and, if there is another reset ++ * queued, process the new reset even if previous reset failed ++ * (the previous reset could have failed because of a fail ++ * over for instance, so process the fail over). + */ +- if (rwi) +- kfree(tmprwi); +- else if (rc) ++ if (!rwi && rc) + rwi = tmprwi; ++ else ++ kfree(tmprwi); + + if (rwi && (rwi->reset_reason == VNIC_RESET_FAILOVER || + rwi->reset_reason == VNIC_RESET_MOBILITY || rc)) @@ -2541,12 +2663,23 @@ static void __ibmvnic_delayed_reset(struct work_struct *work) __ibmvnic_reset(&adapter->ibmvnic_reset); } @@ -192273,10 +233394,23 @@ index 22df602323bc0..b01c439965ff9 100644 bool force_reset_recovery; diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c -index 09ae1939e6db4..36d52246bdc66 100644 +index 09ae1939e6db4..8cd371437c99f 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c -@@ -3003,9 +3003,10 @@ static void __e100_shutdown(struct pci_dev *pdev, bool *enable_wake) +@@ -1742,11 +1742,8 @@ static int e100_xmit_prepare(struct nic *nic, struct cb *cb, + dma_addr = dma_map_single(&nic->pdev->dev, skb->data, skb->len, + DMA_TO_DEVICE); + /* If we can't map the skb, have the upper layer try later */ +- if (dma_mapping_error(&nic->pdev->dev, dma_addr)) { +- dev_kfree_skb_any(skb); +- skb = NULL; ++ if (dma_mapping_error(&nic->pdev->dev, dma_addr)) + return -ENOMEM; +- } + + /* + * Use the last 4 bytes of the SKB payload packet as the CRC, used for +@@ -3003,9 +3000,10 @@ static void __e100_shutdown(struct pci_dev *pdev, bool *enable_wake) struct net_device *netdev = pci_get_drvdata(pdev); struct nic *nic = netdev_priv(netdev); @@ -192288,7 +233422,7 @@ index 09ae1939e6db4..36d52246bdc66 100644 if ((nic->flags & wol_magic) | e100_asf(nic)) { /* enable reverse auto-negotiation */ -@@ -3022,7 +3023,7 @@ static void __e100_shutdown(struct pci_dev *pdev, bool *enable_wake) +@@ -3022,7 +3020,7 @@ static void __e100_shutdown(struct pci_dev *pdev, bool *enable_wake) *enable_wake = false; } @@ -192297,7 +233431,7 @@ index 09ae1939e6db4..36d52246bdc66 100644 } static int __e100_power_off(struct pci_dev *pdev, bool wake) -@@ -3042,8 +3043,6 @@ static int __maybe_unused e100_suspend(struct device *dev_d) +@@ -3042,8 +3040,6 @@ static int __maybe_unused e100_suspend(struct device *dev_d) __e100_shutdown(to_pci_dev(dev_d), &wake); @@ -192306,7 +233440,7 @@ index 09ae1939e6db4..36d52246bdc66 100644 return 0; } -@@ -3051,6 +3050,14 @@ static int __maybe_unused e100_resume(struct device *dev_d) +@@ -3051,6 +3047,14 @@ static int __maybe_unused e100_resume(struct device *dev_d) { struct net_device *netdev = dev_get_drvdata(dev_d); struct nic *nic = netdev_priv(netdev); @@ -192321,7 +233455,7 @@ index 09ae1939e6db4..36d52246bdc66 100644 /* disable reverse auto-negotiation */ if (nic->phy == phy_82552_v) { -@@ -3062,10 +3069,11 @@ static int __maybe_unused e100_resume(struct device *dev_d) +@@ -3062,10 +3066,11 @@ static int __maybe_unused e100_resume(struct device *dev_d) smartspeed & ~(E100_82552_REV_ANEG)); } @@ -192408,7 +233542,7 @@ index 5e4fc9b4e2adb..9466f65a6da77 100644 + .nvm_ops = &spt_nvm_ops, +}; diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c -index ebcb2a30add09..407bbb4cc236f 100644 +index ebcb2a30add09..7e41ce188cc6a 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -52,6 +52,7 @@ static const struct e1000_info *e1000_info_tbl[] = { @@ -192419,6 +233553,18 @@ index ebcb2a30add09..407bbb4cc236f 100644 }; struct e1000_reg_info { +@@ -5940,9 +5941,9 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, + e1000_tx_queue(tx_ring, tx_flags, count); + /* Make sure there is space in the ring for the next send. */ + e1000_maybe_stop_tx(tx_ring, +- (MAX_SKB_FRAGS * ++ ((MAX_SKB_FRAGS + 1) * + DIV_ROUND_UP(PAGE_SIZE, +- adapter->tx_fifo_limit) + 2)); ++ adapter->tx_fifo_limit) + 4)); + + if (!netdev_xmit_more() || + netif_xmit_stopped(netdev_get_tx_queue(netdev, 0))) { @@ -6346,7 +6347,8 @@ static void e1000e_s0ix_entry_flow(struct e1000_adapter *adapter) u32 mac_data; u16 phy_data; @@ -192482,6 +233628,34 @@ index ebcb2a30add09..407bbb4cc236f 100644 { 0, 0, 0, 0, 0, 0, 0 } /* terminate list */ }; +diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c +index 3362f26d7f999..1b273446621c5 100644 +--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c ++++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c +@@ -32,6 +32,8 @@ struct workqueue_struct *fm10k_workqueue; + **/ + static int __init fm10k_init_module(void) + { ++ int ret; ++ + pr_info("%s\n", fm10k_driver_string); + pr_info("%s\n", fm10k_copyright); + +@@ -43,7 +45,13 @@ static int __init fm10k_init_module(void) + + fm10k_dbg_init(); + +- return fm10k_register_pci_driver(); ++ ret = fm10k_register_pci_driver(); ++ if (ret) { ++ fm10k_dbg_exit(); ++ destroy_workqueue(fm10k_workqueue); ++ } ++ ++ return ret; + } + module_init(fm10k_init_module); + diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 39fb3d57c0574..0f19c237cb587 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h @@ -192649,7 +233823,7 @@ index 291e61ac3e448..9db5001297c7e 100644 dev_info(&pf->pdev->dev, "invalid VF id %d\n", vf_id); } diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c -index 513ba69743559..11a17ebfceef2 100644 +index 513ba69743559..813889604ff86 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -2081,9 +2081,6 @@ static int i40e_set_ringparam(struct net_device *netdev, @@ -192705,7 +233879,185 @@ index 513ba69743559..11a17ebfceef2 100644 } static void i40e_get_wol(struct net_device *netdev, -@@ -4376,7 +4382,7 @@ static int i40e_check_fdir_input_set(struct i40e_vsi *vsi, +@@ -3079,10 +3085,17 @@ static int i40e_get_rss_hash_opts(struct i40e_pf *pf, struct ethtool_rxnfc *cmd) + + if (cmd->flow_type == TCP_V4_FLOW || + cmd->flow_type == UDP_V4_FLOW) { +- if (i_set & I40E_L3_SRC_MASK) +- cmd->data |= RXH_IP_SRC; +- if (i_set & I40E_L3_DST_MASK) +- cmd->data |= RXH_IP_DST; ++ if (hw->mac.type == I40E_MAC_X722) { ++ if (i_set & I40E_X722_L3_SRC_MASK) ++ cmd->data |= RXH_IP_SRC; ++ if (i_set & I40E_X722_L3_DST_MASK) ++ cmd->data |= RXH_IP_DST; ++ } else { ++ if (i_set & I40E_L3_SRC_MASK) ++ cmd->data |= RXH_IP_SRC; ++ if (i_set & I40E_L3_DST_MASK) ++ cmd->data |= RXH_IP_DST; ++ } + } else if (cmd->flow_type == TCP_V6_FLOW || + cmd->flow_type == UDP_V6_FLOW) { + if (i_set & I40E_L3_V6_SRC_MASK) +@@ -3440,12 +3453,15 @@ static int i40e_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, + + /** + * i40e_get_rss_hash_bits - Read RSS Hash bits from register ++ * @hw: hw structure + * @nfc: pointer to user request + * @i_setc: bits currently set + * + * Returns value of bits to be set per user request + **/ +-static u64 i40e_get_rss_hash_bits(struct ethtool_rxnfc *nfc, u64 i_setc) ++static u64 i40e_get_rss_hash_bits(struct i40e_hw *hw, ++ struct ethtool_rxnfc *nfc, ++ u64 i_setc) + { + u64 i_set = i_setc; + u64 src_l3 = 0, dst_l3 = 0; +@@ -3464,8 +3480,13 @@ static u64 i40e_get_rss_hash_bits(struct ethtool_rxnfc *nfc, u64 i_setc) + dst_l3 = I40E_L3_V6_DST_MASK; + } else if (nfc->flow_type == TCP_V4_FLOW || + nfc->flow_type == UDP_V4_FLOW) { +- src_l3 = I40E_L3_SRC_MASK; +- dst_l3 = I40E_L3_DST_MASK; ++ if (hw->mac.type == I40E_MAC_X722) { ++ src_l3 = I40E_X722_L3_SRC_MASK; ++ dst_l3 = I40E_X722_L3_DST_MASK; ++ } else { ++ src_l3 = I40E_L3_SRC_MASK; ++ dst_l3 = I40E_L3_DST_MASK; ++ } + } else { + /* Any other flow type are not supported here */ + return i_set; +@@ -3483,6 +3504,7 @@ static u64 i40e_get_rss_hash_bits(struct ethtool_rxnfc *nfc, u64 i_setc) + return i_set; + } + ++#define FLOW_PCTYPES_SIZE 64 + /** + * i40e_set_rss_hash_opt - Enable/Disable flow types for RSS hash + * @pf: pointer to the physical function struct +@@ -3495,9 +3517,11 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) + struct i40e_hw *hw = &pf->hw; + u64 hena = (u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)) | + ((u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)) << 32); +- u8 flow_pctype = 0; ++ DECLARE_BITMAP(flow_pctypes, FLOW_PCTYPES_SIZE); + u64 i_set, i_setc; + ++ bitmap_zero(flow_pctypes, FLOW_PCTYPES_SIZE); ++ + if (pf->flags & I40E_FLAG_MFP_ENABLED) { + dev_err(&pf->pdev->dev, + "Change of RSS hash input set is not supported when MFP mode is enabled\n"); +@@ -3513,36 +3537,35 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) + + switch (nfc->flow_type) { + case TCP_V4_FLOW: +- flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP; ++ set_bit(I40E_FILTER_PCTYPE_NONF_IPV4_TCP, flow_pctypes); + if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) +- hena |= +- BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK); ++ set_bit(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK, ++ flow_pctypes); + break; + case TCP_V6_FLOW: +- flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV6_TCP; +- if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) +- hena |= +- BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK); ++ set_bit(I40E_FILTER_PCTYPE_NONF_IPV6_TCP, flow_pctypes); + if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) +- hena |= +- BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK); ++ set_bit(I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK, ++ flow_pctypes); + break; + case UDP_V4_FLOW: +- flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP; +- if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) +- hena |= +- BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) | +- BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP); +- ++ set_bit(I40E_FILTER_PCTYPE_NONF_IPV4_UDP, flow_pctypes); ++ if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) { ++ set_bit(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP, ++ flow_pctypes); ++ set_bit(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP, ++ flow_pctypes); ++ } + hena |= BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4); + break; + case UDP_V6_FLOW: +- flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV6_UDP; +- if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) +- hena |= +- BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | +- BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP); +- ++ set_bit(I40E_FILTER_PCTYPE_NONF_IPV6_UDP, flow_pctypes); ++ if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) { ++ set_bit(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP, ++ flow_pctypes); ++ set_bit(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP, ++ flow_pctypes); ++ } + hena |= BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6); + break; + case AH_ESP_V4_FLOW: +@@ -3575,17 +3598,20 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) + return -EINVAL; + } + +- if (flow_pctype) { +- i_setc = (u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(0, +- flow_pctype)) | +- ((u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(1, +- flow_pctype)) << 32); +- i_set = i40e_get_rss_hash_bits(nfc, i_setc); +- i40e_write_rx_ctl(hw, I40E_GLQF_HASH_INSET(0, flow_pctype), +- (u32)i_set); +- i40e_write_rx_ctl(hw, I40E_GLQF_HASH_INSET(1, flow_pctype), +- (u32)(i_set >> 32)); +- hena |= BIT_ULL(flow_pctype); ++ if (bitmap_weight(flow_pctypes, FLOW_PCTYPES_SIZE)) { ++ u8 flow_id; ++ ++ for_each_set_bit(flow_id, flow_pctypes, FLOW_PCTYPES_SIZE) { ++ i_setc = (u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(0, flow_id)) | ++ ((u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(1, flow_id)) << 32); ++ i_set = i40e_get_rss_hash_bits(&pf->hw, nfc, i_setc); ++ ++ i40e_write_rx_ctl(hw, I40E_GLQF_HASH_INSET(0, flow_id), ++ (u32)i_set); ++ i40e_write_rx_ctl(hw, I40E_GLQF_HASH_INSET(1, flow_id), ++ (u32)(i_set >> 32)); ++ hena |= BIT_ULL(flow_id); ++ } + } + + i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (u32)hena); +@@ -4338,11 +4364,7 @@ static int i40e_check_fdir_input_set(struct i40e_vsi *vsi, + return -EOPNOTSUPP; + + /* First 4 bytes of L4 header */ +- if (usr_ip4_spec->l4_4_bytes == htonl(0xFFFFFFFF)) +- new_mask |= I40E_L4_SRC_MASK | I40E_L4_DST_MASK; +- else if (!usr_ip4_spec->l4_4_bytes) +- new_mask &= ~(I40E_L4_SRC_MASK | I40E_L4_DST_MASK); +- else ++ if (usr_ip4_spec->l4_4_bytes) + return -EOPNOTSUPP; + + /* Filtering on Type of Service is not supported. */ +@@ -4376,16 +4398,12 @@ static int i40e_check_fdir_input_set(struct i40e_vsi *vsi, (struct in6_addr *)&ipv6_full_mask)) new_mask |= I40E_L3_V6_DST_MASK; else if (ipv6_addr_any((struct in6_addr *) @@ -192714,7 +234066,17 @@ index 513ba69743559..11a17ebfceef2 100644 new_mask &= ~I40E_L3_V6_DST_MASK; else return -EOPNOTSUPP; -@@ -4912,7 +4918,7 @@ static int i40e_set_channels(struct net_device *dev, + +- if (usr_ip6_spec->l4_4_bytes == htonl(0xFFFFFFFF)) +- new_mask |= I40E_L4_SRC_MASK | I40E_L4_DST_MASK; +- else if (!usr_ip6_spec->l4_4_bytes) +- new_mask &= ~(I40E_L4_SRC_MASK | I40E_L4_DST_MASK); +- else ++ if (usr_ip6_spec->l4_4_bytes) + return -EOPNOTSUPP; + + /* Filtering on Traffic class is not supported. */ +@@ -4912,7 +4930,7 @@ static int i40e_set_channels(struct net_device *dev, /* We do not support setting channels via ethtool when TCs are * configured through mqprio */ @@ -192724,7 +234086,7 @@ index 513ba69743559..11a17ebfceef2 100644 /* verify they are not requesting separate vectors */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c -index e04b540cedc85..ad6f6fe25057e 100644 +index e04b540cedc85..5ffcd3cc989f7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -99,6 +99,24 @@ MODULE_LICENSE("GPL v2"); @@ -193036,6 +234398,15 @@ index e04b540cedc85..ad6f6fe25057e 100644 int ret = i40e_sync_vsi_filters(pf->vsi[v]); if (ret) { +@@ -2662,7 +2789,7 @@ static int i40e_change_mtu(struct net_device *netdev, int new_mtu) + struct i40e_pf *pf = vsi->back; + + if (i40e_enabled_xdp_vsi(vsi)) { +- int frame_size = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; ++ int frame_size = new_mtu + I40E_PACKET_HDR_PAD; + + if (frame_size > i40e_max_xdp_frame_size(vsi)) + return -EINVAL; @@ -3294,12 +3421,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) if (ring->vsi->type == I40E_VSI_MAIN) xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq); @@ -193059,7 +234430,58 @@ index e04b540cedc85..ad6f6fe25057e 100644 ring->rx_buf_len = vsi->rx_buf_len; if (ring->vsi->type == I40E_VSI_MAIN) { ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, -@@ -5193,7 +5313,7 @@ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf) +@@ -3428,6 +3548,24 @@ static int i40e_vsi_configure_tx(struct i40e_vsi *vsi) + return err; + } + ++/** ++ * i40e_calculate_vsi_rx_buf_len - Calculates buffer length ++ * ++ * @vsi: VSI to calculate rx_buf_len from ++ */ ++static u16 i40e_calculate_vsi_rx_buf_len(struct i40e_vsi *vsi) ++{ ++ if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX)) ++ return I40E_RXBUFFER_2048; ++ ++#if (PAGE_SIZE < 8192) ++ if (!I40E_2K_TOO_SMALL_WITH_PADDING && vsi->netdev->mtu <= ETH_DATA_LEN) ++ return I40E_RXBUFFER_1536 - NET_IP_ALIGN; ++#endif ++ ++ return PAGE_SIZE < 8192 ? I40E_RXBUFFER_3072 : I40E_RXBUFFER_2048; ++} ++ + /** + * i40e_vsi_configure_rx - Configure the VSI for Rx + * @vsi: the VSI being configured +@@ -3439,20 +3577,14 @@ static int i40e_vsi_configure_rx(struct i40e_vsi *vsi) + int err = 0; + u16 i; + +- if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX)) { +- vsi->max_frame = I40E_MAX_RXBUFFER; +- vsi->rx_buf_len = I40E_RXBUFFER_2048; ++ vsi->max_frame = I40E_MAX_RXBUFFER; ++ vsi->rx_buf_len = i40e_calculate_vsi_rx_buf_len(vsi); ++ + #if (PAGE_SIZE < 8192) +- } else if (!I40E_2K_TOO_SMALL_WITH_PADDING && +- (vsi->netdev->mtu <= ETH_DATA_LEN)) { ++ if (vsi->netdev && !I40E_2K_TOO_SMALL_WITH_PADDING && ++ vsi->netdev->mtu <= ETH_DATA_LEN) + vsi->max_frame = I40E_RXBUFFER_1536 - NET_IP_ALIGN; +- vsi->rx_buf_len = I40E_RXBUFFER_1536 - NET_IP_ALIGN; + #endif +- } else { +- vsi->max_frame = I40E_MAX_RXBUFFER; +- vsi->rx_buf_len = (PAGE_SIZE < 8192) ? I40E_RXBUFFER_3072 : +- I40E_RXBUFFER_2048; +- } + + /* set up individual rings */ + for (i = 0; i < vsi->num_queue_pairs && !err; i++) +@@ -5193,7 +5325,7 @@ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf) u8 num_tc = 0; struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config; @@ -193068,7 +234490,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 return pf->vsi[pf->lan_vsi]->mqprio_qopt.qopt.num_tc; /* If neither MQPRIO nor DCB is enabled, then always use single TC */ -@@ -5225,7 +5345,7 @@ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf) +@@ -5225,7 +5357,7 @@ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf) **/ static u8 i40e_pf_get_tc_map(struct i40e_pf *pf) { @@ -193077,7 +234499,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 return i40e_mqprio_get_enabled_tc(pf); /* If neither MQPRIO nor DCB is enabled for this PF then just return -@@ -5322,7 +5442,7 @@ static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc, +@@ -5322,7 +5454,7 @@ static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc, int i; /* There is no need to reset BW when mqprio mode is on. */ @@ -193086,7 +234508,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 return 0; if (!vsi->mqprio_qopt.qopt.hw && !(pf->flags & I40E_FLAG_DCB_ENABLED)) { ret = i40e_set_bw_limit(vsi, vsi->seid, 0); -@@ -5394,7 +5514,7 @@ static void i40e_vsi_config_netdev_tc(struct i40e_vsi *vsi, u8 enabled_tc) +@@ -5394,7 +5526,7 @@ static void i40e_vsi_config_netdev_tc(struct i40e_vsi *vsi, u8 enabled_tc) vsi->tc_config.tc_info[i].qoffset); } @@ -193095,7 +234517,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 return; /* Assign UP2TC map for the VSI */ -@@ -5426,6 +5546,58 @@ static void i40e_vsi_update_queue_map(struct i40e_vsi *vsi, +@@ -5426,6 +5558,58 @@ static void i40e_vsi_update_queue_map(struct i40e_vsi *vsi, sizeof(vsi->info.tc_mapping)); } @@ -193154,7 +234576,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 /** * i40e_vsi_config_tc - Configure VSI Tx Scheduler for given TC map * @vsi: VSI to be configured -@@ -5503,7 +5675,7 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc) +@@ -5503,7 +5687,7 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc) ctxt.vf_num = 0; ctxt.uplink_seid = vsi->uplink_seid; ctxt.info = vsi->info; @@ -193163,7 +234585,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 ret = i40e_vsi_setup_queue_map_mqprio(vsi, &ctxt, enabled_tc); if (ret) goto out; -@@ -5587,6 +5759,26 @@ static int i40e_get_link_speed(struct i40e_vsi *vsi) +@@ -5587,6 +5771,26 @@ static int i40e_get_link_speed(struct i40e_vsi *vsi) } } @@ -193190,7 +234612,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 /** * i40e_set_bw_limit - setup BW limit for Tx traffic based on max_tx_rate * @vsi: VSI to be configured -@@ -5609,10 +5801,10 @@ int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate) +@@ -5609,10 +5813,10 @@ int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate) max_tx_rate, seid); return -EINVAL; } @@ -193203,7 +234625,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 } /* Tx rate credits are in values of 50Mbps, 0 is disabled */ -@@ -5716,24 +5908,6 @@ static void i40e_remove_queue_channels(struct i40e_vsi *vsi) +@@ -5716,24 +5920,6 @@ static void i40e_remove_queue_channels(struct i40e_vsi *vsi) INIT_LIST_HEAD(&vsi->ch_list); } @@ -193228,7 +234650,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 /** * i40e_get_max_queues_for_channel * @vsi: ptr to VSI to which channels are associated with -@@ -6240,26 +6414,15 @@ int i40e_create_queue_channel(struct i40e_vsi *vsi, +@@ -6240,26 +6426,15 @@ int i40e_create_queue_channel(struct i40e_vsi *vsi, /* By default we are in VEPA mode, if this is the first VF/VMDq * VSI to be added switch to VEB mode. */ @@ -193262,7 +234684,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 } /* now onwards for main VSI, number of queues will be value * of TC0's queue count -@@ -6367,6 +6530,9 @@ static int i40e_configure_queue_channels(struct i40e_vsi *vsi) +@@ -6367,6 +6542,9 @@ static int i40e_configure_queue_channels(struct i40e_vsi *vsi) vsi->tc_seid_map[i] = ch->seid; } } @@ -193272,7 +234694,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 return ret; err_free: -@@ -7464,42 +7630,43 @@ static void i40e_free_macvlan_channels(struct i40e_vsi *vsi) +@@ -7464,42 +7642,43 @@ static void i40e_free_macvlan_channels(struct i40e_vsi *vsi) static int i40e_fwd_ring_up(struct i40e_vsi *vsi, struct net_device *vdev, struct i40e_fwd_adapter *fwd) { @@ -193329,7 +234751,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 /* Guarantee all rings are updated before we update the * MAC address filter. */ -@@ -7649,7 +7816,7 @@ static void *i40e_fwd_add(struct net_device *netdev, struct net_device *vdev) +@@ -7649,7 +7828,7 @@ static void *i40e_fwd_add(struct net_device *netdev, struct net_device *vdev) netdev_info(netdev, "Macvlans are not supported when DCB is enabled\n"); return ERR_PTR(-EINVAL); } @@ -193338,7 +234760,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 netdev_info(netdev, "Macvlans are not supported when HW TC offload is on\n"); return ERR_PTR(-EINVAL); } -@@ -7902,7 +8069,7 @@ config_tc: +@@ -7902,7 +8081,7 @@ config_tc: /* Quiesce VSI queues */ i40e_quiesce_vsi(vsi); @@ -193347,7 +234769,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 i40e_remove_queue_channels(vsi); /* Configure VSI for enabled TCs */ -@@ -7912,17 +8079,25 @@ config_tc: +@@ -7912,17 +8091,25 @@ config_tc: vsi->seid); need_reset = true; goto exit; @@ -193380,7 +234802,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate); if (!ret) { u64 credits = max_tx_rate; -@@ -8443,6 +8618,11 @@ static int i40e_configure_clsflower(struct i40e_vsi *vsi, +@@ -8443,6 +8630,11 @@ static int i40e_configure_clsflower(struct i40e_vsi *vsi, return -EOPNOTSUPP; } @@ -193392,7 +234814,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) || test_bit(__I40E_RESET_INTR_RECEIVED, pf->state)) return -EBUSY; -@@ -8482,9 +8662,8 @@ static int i40e_configure_clsflower(struct i40e_vsi *vsi, +@@ -8482,9 +8674,8 @@ static int i40e_configure_clsflower(struct i40e_vsi *vsi, err = i40e_add_del_cloud_filter(vsi, filter, true); if (err) { @@ -193404,7 +234826,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 goto err; } -@@ -8668,6 +8847,27 @@ int i40e_open(struct net_device *netdev) +@@ -8668,6 +8859,27 @@ int i40e_open(struct net_device *netdev) return 0; } @@ -193432,7 +234854,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 /** * i40e_vsi_open - * @vsi: the VSI to open -@@ -8704,13 +8904,7 @@ int i40e_vsi_open(struct i40e_vsi *vsi) +@@ -8704,13 +8916,7 @@ int i40e_vsi_open(struct i40e_vsi *vsi) goto err_setup_rx; /* Notify the stack of the actual queue counts. */ @@ -193447,7 +234869,41 @@ index e04b540cedc85..ad6f6fe25057e 100644 if (err) goto err_set_queues; -@@ -10459,7 +10653,7 @@ static int i40e_reset(struct i40e_pf *pf) +@@ -10325,6 +10531,21 @@ static int i40e_rebuild_channels(struct i40e_vsi *vsi) + return 0; + } + ++/** ++ * i40e_clean_xps_state - clean xps state for every tx_ring ++ * @vsi: ptr to the VSI ++ **/ ++static void i40e_clean_xps_state(struct i40e_vsi *vsi) ++{ ++ int i; ++ ++ if (vsi->tx_rings) ++ for (i = 0; i < vsi->num_queue_pairs; i++) ++ if (vsi->tx_rings[i]) ++ clear_bit(__I40E_TX_XPS_INIT_DONE, ++ vsi->tx_rings[i]->state); ++} ++ + /** + * i40e_prep_for_reset - prep for the core to reset + * @pf: board private structure +@@ -10349,8 +10570,10 @@ static void i40e_prep_for_reset(struct i40e_pf *pf) + i40e_pf_quiesce_all_vsi(pf); + + for (v = 0; v < pf->num_alloc_vsi; v++) { +- if (pf->vsi[v]) ++ if (pf->vsi[v]) { ++ i40e_clean_xps_state(pf->vsi[v]); + pf->vsi[v]->seid = 0; ++ } + } + + i40e_shutdown_adminq(&pf->hw); +@@ -10459,7 +10682,7 @@ static int i40e_reset(struct i40e_pf *pf) **/ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) { @@ -193456,7 +234912,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; struct i40e_hw *hw = &pf->hw; i40e_status ret; -@@ -10467,13 +10661,11 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) +@@ -10467,13 +10690,11 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) int v; if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) && @@ -193472,7 +234928,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 goto clear_recovery; dev_dbg(&pf->pdev->dev, "Rebuilding internal switch\n"); -@@ -10487,15 +10679,9 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) +@@ -10487,15 +10708,9 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) } i40e_get_oem_version(&pf->hw); @@ -193491,7 +234947,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 } /* re-verify the eeprom if we just had an EMP reset */ -@@ -10506,13 +10692,12 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) +@@ -10506,13 +10721,12 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) * accordingly with regard to resources initialization * and deinitialization */ @@ -193507,7 +234963,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 /* we're staying in recovery mode so we'll reinitialize * misc vector here */ -@@ -10562,7 +10747,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) +@@ -10562,7 +10776,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) * unless I40E_FLAG_TC_MQPRIO was enabled or DCB * is not supported with new link speed */ @@ -193516,7 +234972,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 i40e_aq_set_dcb_parameters(hw, false, NULL); } else { if (I40E_IS_X710TL_DEVICE(hw->device_id) && -@@ -10657,10 +10842,10 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) +@@ -10657,10 +10871,10 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) } if (vsi->mqprio_qopt.max_rate[0]) { @@ -193529,7 +234985,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate); if (ret) goto end_unlock; -@@ -10766,6 +10951,9 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit, +@@ -10766,6 +10980,9 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) { int ret; @@ -193539,7 +234995,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 /* Now we wait for GRST to settle out. * We don't have to delete the VEBs or VSIs from the hw switch * because the reset will make them disappear. -@@ -11705,7 +11893,6 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf) +@@ -11705,7 +11922,6 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf) return -ENOMEM; pf->irq_pile->num_entries = vectors; @@ -193547,7 +235003,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 /* track first vector for misc interrupts, ignore return */ (void)i40e_get_lump(pf, pf->irq_pile, 1, I40E_PILE_VALID_BIT - 1); -@@ -12126,6 +12313,8 @@ int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count) +@@ -12126,6 +12342,8 @@ int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count) vsi->req_queue_pairs = queue_count; i40e_prep_for_reset(pf); @@ -193556,7 +235012,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 pf->alloc_rss_size = new_rss_size; -@@ -12508,7 +12697,6 @@ static int i40e_sw_init(struct i40e_pf *pf) +@@ -12508,7 +12726,6 @@ static int i40e_sw_init(struct i40e_pf *pf) goto sw_init_done; } pf->qp_pile->num_entries = pf->hw.func_caps.num_tx_qp; @@ -193564,7 +235020,25 @@ index e04b540cedc85..ad6f6fe25057e 100644 pf->tx_timeout_recovery_level = 1; -@@ -12953,6 +13141,10 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog, +@@ -12788,6 +13005,8 @@ static int i40e_ndo_bridge_setlink(struct net_device *dev, + } + + br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); ++ if (!br_spec) ++ return -EINVAL; + + nla_for_each_nested(attr, br_spec, rem) { + __u16 mode; +@@ -12942,7 +13161,7 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog, + int i; + + /* Don't allow frames that span over multiple buffers */ +- if (frame_size > vsi->rx_buf_len) { ++ if (frame_size > i40e_calculate_vsi_rx_buf_len(vsi)) { + NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP"); + return -EINVAL; + } +@@ -12953,6 +13172,10 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog, if (need_reset) i40e_prep_for_reset(pf); @@ -193575,7 +235049,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 old_prog = xchg(&vsi->xdp_prog, prog); if (need_reset) { -@@ -12962,6 +13154,14 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog, +@@ -12962,6 +13185,14 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog, i40e_reset_and_rebuild(pf, true, true); } @@ -193590,7 +235064,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 for (i = 0; i < vsi->num_queue_pairs; i++) WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog); -@@ -13194,6 +13394,7 @@ int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair) +@@ -13194,6 +13425,7 @@ int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair) i40e_queue_pair_disable_irq(vsi, queue_pair); err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false /* off */); @@ -193598,7 +235072,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */); i40e_queue_pair_clean_rings(vsi, queue_pair); i40e_queue_pair_reset_stats(vsi, queue_pair); -@@ -13771,7 +13972,7 @@ int i40e_vsi_release(struct i40e_vsi *vsi) +@@ -13771,7 +14003,7 @@ int i40e_vsi_release(struct i40e_vsi *vsi) dev_info(&pf->pdev->dev, "Can't remove PF VSI\n"); return -ENODEV; } @@ -193607,7 +235081,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 uplink_seid = vsi->uplink_seid; if (vsi->type != I40E_VSI_SRIOV) { if (vsi->netdev_registered) { -@@ -14101,6 +14302,9 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type, +@@ -14101,6 +14333,9 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type, case I40E_VSI_MAIN: case I40E_VSI_VMDQ2: ret = i40e_config_netdev(vsi); @@ -193617,7 +235091,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 if (ret) goto err_netdev; ret = register_netdev(vsi->netdev); -@@ -15403,8 +15607,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +@@ -15403,8 +15638,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && hw->aq.api_min_ver > I40E_FW_MINOR_VERSION(hw)) @@ -193628,7 +235102,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 hw->aq.api_maj_ver, hw->aq.api_min_ver, I40E_FW_API_VERSION_MAJOR, -@@ -15840,8 +16044,13 @@ static void i40e_remove(struct pci_dev *pdev) +@@ -15840,8 +16075,13 @@ static void i40e_remove(struct pci_dev *pdev) i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), 0); i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), 0); @@ -193643,7 +235117,7 @@ index e04b540cedc85..ad6f6fe25057e 100644 if (pf->flags & I40E_FLAG_SRIOV_ENABLED) { set_bit(__I40E_VF_RESETS_DISABLED, pf->state); -@@ -16040,6 +16249,9 @@ static void i40e_pci_error_reset_done(struct pci_dev *pdev) +@@ -16040,6 +16280,9 @@ static void i40e_pci_error_reset_done(struct pci_dev *pdev) { struct i40e_pf *pf = pci_get_drvdata(pdev); @@ -193653,6 +235127,31 @@ index e04b540cedc85..ad6f6fe25057e 100644 i40e_reset_and_rebuild(pf, false, false); } +@@ -16282,6 +16525,8 @@ static struct pci_driver i40e_driver = { + **/ + static int __init i40e_init_module(void) + { ++ int err; ++ + pr_info("%s: %s\n", i40e_driver_name, i40e_driver_string); + pr_info("%s: %s\n", i40e_driver_name, i40e_copyright); + +@@ -16299,7 +16544,14 @@ static int __init i40e_init_module(void) + } + + i40e_dbg_init(); +- return pci_register_driver(&i40e_driver); ++ err = pci_register_driver(&i40e_driver); ++ if (err) { ++ destroy_workqueue(i40e_wq); ++ i40e_dbg_exit(); ++ return err; ++ } ++ ++ return 0; + } + module_init(i40e_init_module); + diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h index 8d0588a27a053..7339003aa17cd 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_register.h @@ -193786,7 +235285,7 @@ index bfc2845c99d1c..f3b0b81517096 100644 /** * i40e_get_head - Retrieve head from head writeback diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h -index 36a4ca1ffb1a9..7b3f30beb757a 100644 +index 36a4ca1ffb1a9..388c3d36d96a5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -1172,6 +1172,7 @@ struct i40e_eth_stats { @@ -193797,8 +235296,19 @@ index 36a4ca1ffb1a9..7b3f30beb757a 100644 }; /* Statistics collected per VEB per TC */ +@@ -1403,6 +1404,10 @@ struct i40e_lldp_variables { + #define I40E_PFQF_CTL_0_HASHLUTSIZE_512 0x00010000 + + /* INPUT SET MASK for RSS, flow director, and flexible payload */ ++#define I40E_X722_L3_SRC_SHIFT 49 ++#define I40E_X722_L3_SRC_MASK (0x3ULL << I40E_X722_L3_SRC_SHIFT) ++#define I40E_X722_L3_DST_SHIFT 41 ++#define I40E_X722_L3_DST_MASK (0x3ULL << I40E_X722_L3_DST_SHIFT) + #define I40E_L3_SRC_SHIFT 47 + #define I40E_L3_SRC_MASK (0x3ULL << I40E_L3_SRC_SHIFT) + #define I40E_L3_V6_SRC_SHIFT 43 diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c -index 472f56b360b8c..c078fbaf19fd4 100644 +index 472f56b360b8c..7aedf20a10214 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -183,17 +183,18 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf) @@ -193938,7 +235448,104 @@ index 472f56b360b8c..c078fbaf19fd4 100644 reg = rd32(hw, I40E_VPGEN_VFRTRIG(vf->vf_id)); reg |= I40E_VPGEN_VFRTRIG_VFSWR_MASK; wr32(hw, I40E_VPGEN_VFRTRIG(vf->vf_id), reg); -@@ -1893,25 +1942,6 @@ static int i40e_vc_send_msg_to_vf(struct i40e_vf *vf, u32 v_opcode, +@@ -1487,10 +1536,12 @@ bool i40e_reset_vf(struct i40e_vf *vf, bool flr) + if (test_bit(__I40E_VF_RESETS_DISABLED, pf->state)) + return true; + +- /* If the VFs have been disabled, this means something else is +- * resetting the VF, so we shouldn't continue. +- */ +- if (test_and_set_bit(__I40E_VF_DISABLE, pf->state)) ++ /* Bail out if VFs are disabled. */ ++ if (test_bit(__I40E_VF_DISABLE, pf->state)) ++ return true; ++ ++ /* If VF is being reset already we don't need to continue. */ ++ if (test_and_set_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) + return true; + + i40e_trigger_vf_reset(vf, flr); +@@ -1527,7 +1578,8 @@ bool i40e_reset_vf(struct i40e_vf *vf, bool flr) + i40e_cleanup_reset_vf(vf); + + i40e_flush(hw); +- clear_bit(__I40E_VF_DISABLE, pf->state); ++ usleep_range(20000, 40000); ++ clear_bit(I40E_VF_STATE_RESETTING, &vf->vf_states); + + return true; + } +@@ -1560,8 +1612,12 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) + return false; + + /* Begin reset on all VFs at once */ +- for (v = 0; v < pf->num_alloc_vfs; v++) +- i40e_trigger_vf_reset(&pf->vf[v], flr); ++ for (v = 0; v < pf->num_alloc_vfs; v++) { ++ vf = &pf->vf[v]; ++ /* If VF is being reset no need to trigger reset again */ ++ if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) ++ i40e_trigger_vf_reset(&pf->vf[v], flr); ++ } + + /* HW requires some time to make sure it can flush the FIFO for a VF + * when it resets it. Poll the VPGEN_VFRSTAT register for each VF in +@@ -1577,9 +1633,11 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) + */ + while (v < pf->num_alloc_vfs) { + vf = &pf->vf[v]; +- reg = rd32(hw, I40E_VPGEN_VFRSTAT(vf->vf_id)); +- if (!(reg & I40E_VPGEN_VFRSTAT_VFRD_MASK)) +- break; ++ if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) { ++ reg = rd32(hw, I40E_VPGEN_VFRSTAT(vf->vf_id)); ++ if (!(reg & I40E_VPGEN_VFRSTAT_VFRD_MASK)) ++ break; ++ } + + /* If the current VF has finished resetting, move on + * to the next VF in sequence. +@@ -1607,6 +1665,10 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) + if (pf->vf[v].lan_vsi_idx == 0) + continue; + ++ /* If VF is reset in another thread just continue */ ++ if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) ++ continue; ++ + i40e_vsi_stop_rings_no_wait(pf->vsi[pf->vf[v].lan_vsi_idx]); + } + +@@ -1618,6 +1680,10 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) + if (pf->vf[v].lan_vsi_idx == 0) + continue; + ++ /* If VF is reset in another thread just continue */ ++ if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) ++ continue; ++ + i40e_vsi_wait_queues_disabled(pf->vsi[pf->vf[v].lan_vsi_idx]); + } + +@@ -1627,10 +1693,16 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) + mdelay(50); + + /* Finish the reset on each VF */ +- for (v = 0; v < pf->num_alloc_vfs; v++) ++ for (v = 0; v < pf->num_alloc_vfs; v++) { ++ /* If VF is reset in another thread just continue */ ++ if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) ++ continue; ++ + i40e_cleanup_reset_vf(&pf->vf[v]); ++ } + + i40e_flush(hw); ++ usleep_range(20000, 40000); + clear_bit(__I40E_VF_DISABLE, pf->state); + + return true; +@@ -1893,25 +1965,6 @@ static int i40e_vc_send_msg_to_vf(struct i40e_vf *vf, u32 v_opcode, hw = &pf->hw; abs_vf_id = vf->vf_id + hw->func_caps.vf_base_id; @@ -193964,7 +235571,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 aq_ret = i40e_aq_send_msg_to_vf(hw, abs_vf_id, v_opcode, v_retval, msg, msglen, NULL); if (aq_ret) { -@@ -1939,6 +1969,32 @@ static int i40e_vc_send_resp_to_vf(struct i40e_vf *vf, +@@ -1939,6 +1992,32 @@ static int i40e_vc_send_resp_to_vf(struct i40e_vf *vf, return i40e_vc_send_msg_to_vf(vf, opcode, retval, NULL, 0); } @@ -193997,7 +235604,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 /** * i40e_vc_get_version_msg * @vf: pointer to the VF info -@@ -1982,6 +2038,25 @@ static void i40e_del_qch(struct i40e_vf *vf) +@@ -1982,6 +2061,25 @@ static void i40e_del_qch(struct i40e_vf *vf) } } @@ -194023,7 +235630,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 /** * i40e_vc_get_vf_resources_msg * @vf: pointer to the VF info -@@ -1999,7 +2074,7 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) +@@ -1999,7 +2097,7 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) size_t len = 0; int ret; @@ -194032,7 +235639,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 aq_ret = I40E_ERR_PARAM; goto err; } -@@ -2083,6 +2158,7 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) +@@ -2083,6 +2181,7 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) vfres->max_vectors = pf->hw.func_caps.num_msix_vectors_vf; vfres->rss_key_size = I40E_HKEY_ARRAY_SIZE; vfres->rss_lut_size = I40E_VF_HLUT_ARRAY_SIZE; @@ -194040,7 +235647,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 if (vf->lan_vsi_idx) { vfres->vsi_res[0].vsi_id = vf->lan_vsi_id; -@@ -2091,6 +2167,10 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) +@@ -2091,6 +2190,10 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) /* VFs only use TC 0 */ vfres->vsi_res[0].qset_handle = le16_to_cpu(vsi->info.qs_handle[0]); @@ -194051,7 +235658,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 ether_addr_copy(vfres->vsi_res[0].default_mac_addr, vf->default_lan_addr.addr); } -@@ -2105,20 +2185,6 @@ err: +@@ -2105,20 +2208,6 @@ err: return ret; } @@ -194072,7 +235679,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 /** * i40e_vc_config_promiscuous_mode_msg * @vf: pointer to the VF info -@@ -2136,7 +2202,7 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf, u8 *msg) +@@ -2136,7 +2225,7 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf, u8 *msg) bool allmulti = false; bool alluni = false; @@ -194081,7 +235688,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 aq_ret = I40E_ERR_PARAM; goto err_out; } -@@ -2217,13 +2283,14 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg) +@@ -2217,13 +2306,14 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg) struct virtchnl_vsi_queue_config_info *qci = (struct virtchnl_vsi_queue_config_info *)msg; struct virtchnl_queue_pair_info *qpi; @@ -194099,7 +235706,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 aq_ret = I40E_ERR_PARAM; goto error_param; } -@@ -2239,7 +2306,7 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg) +@@ -2239,7 +2329,7 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg) } if (vf->adq_enabled) { @@ -194108,7 +235715,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 num_qps_all += vf->ch[i].num_qps; if (num_qps_all != qci->num_queue_pairs) { aq_ret = I40E_ERR_PARAM; -@@ -2310,9 +2377,15 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg) +@@ -2310,9 +2400,15 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg) pf->vsi[vf->lan_vsi_idx]->num_queue_pairs = qci->num_queue_pairs; } else { @@ -194127,7 +235734,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 } error_param: -@@ -2366,7 +2439,7 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg) +@@ -2366,7 +2462,7 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; int i; @@ -194136,7 +235743,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 aq_ret = I40E_ERR_PARAM; goto error_param; } -@@ -2538,7 +2611,7 @@ static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg) +@@ -2538,7 +2634,7 @@ static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg) struct i40e_pf *pf = vf->pf; i40e_status aq_ret = 0; @@ -194145,7 +235752,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 aq_ret = I40E_ERR_PARAM; goto error_param; } -@@ -2570,6 +2643,59 @@ error_param: +@@ -2570,6 +2666,59 @@ error_param: aq_ret); } @@ -194205,7 +235812,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 /** * i40e_vc_request_queues_msg * @vf: pointer to the VF info -@@ -2588,7 +2714,7 @@ static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg) +@@ -2588,7 +2737,7 @@ static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg) u8 cur_pairs = vf->num_queue_pairs; struct i40e_pf *pf = vf->pf; @@ -194214,7 +235821,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 return -EINVAL; if (req_pairs > I40E_MAX_VF_QUEUES) { -@@ -2604,11 +2730,16 @@ static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg) +@@ -2604,11 +2753,16 @@ static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg) req_pairs - cur_pairs, pf->queues_left); vfres->num_queue_pairs = pf->queues_left + cur_pairs; @@ -194233,7 +235840,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 return 0; } -@@ -2634,7 +2765,7 @@ static int i40e_vc_get_stats_msg(struct i40e_vf *vf, u8 *msg) +@@ -2634,7 +2788,7 @@ static int i40e_vc_get_stats_msg(struct i40e_vf *vf, u8 *msg) memset(&stats, 0, sizeof(struct i40e_eth_stats)); @@ -194242,7 +235849,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 aq_ret = I40E_ERR_PARAM; goto error_param; } -@@ -2751,7 +2882,7 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg) +@@ -2751,7 +2905,7 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg) i40e_status ret = 0; int i; @@ -194251,7 +235858,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 !i40e_vc_isvalid_vsi_id(vf, al->vsi_id)) { ret = I40E_ERR_PARAM; goto error_param; -@@ -2802,8 +2933,8 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg) +@@ -2802,8 +2956,8 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg) error_param: /* send the response to the VF */ @@ -194262,7 +235869,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 } /** -@@ -2823,7 +2954,7 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg) +@@ -2823,7 +2977,7 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg) i40e_status ret = 0; int i; @@ -194271,7 +235878,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 !i40e_vc_isvalid_vsi_id(vf, al->vsi_id)) { ret = I40E_ERR_PARAM; goto error_param; -@@ -2967,7 +3098,7 @@ static int i40e_vc_remove_vlan_msg(struct i40e_vf *vf, u8 *msg) +@@ -2967,7 +3121,7 @@ static int i40e_vc_remove_vlan_msg(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; int i; @@ -194280,7 +235887,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 !i40e_vc_isvalid_vsi_id(vf, vfl->vsi_id)) { aq_ret = I40E_ERR_PARAM; goto error_param; -@@ -3087,9 +3218,9 @@ static int i40e_vc_config_rss_key(struct i40e_vf *vf, u8 *msg) +@@ -3087,9 +3241,9 @@ static int i40e_vc_config_rss_key(struct i40e_vf *vf, u8 *msg) struct i40e_vsi *vsi = NULL; i40e_status aq_ret = 0; @@ -194292,7 +235899,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 aq_ret = I40E_ERR_PARAM; goto err; } -@@ -3118,9 +3249,9 @@ static int i40e_vc_config_rss_lut(struct i40e_vf *vf, u8 *msg) +@@ -3118,9 +3272,9 @@ static int i40e_vc_config_rss_lut(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; u16 i; @@ -194304,7 +235911,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 aq_ret = I40E_ERR_PARAM; goto err; } -@@ -3153,7 +3284,7 @@ static int i40e_vc_get_rss_hena(struct i40e_vf *vf, u8 *msg) +@@ -3153,7 +3307,7 @@ static int i40e_vc_get_rss_hena(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; int len = 0; @@ -194313,7 +235920,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 aq_ret = I40E_ERR_PARAM; goto err; } -@@ -3189,7 +3320,7 @@ static int i40e_vc_set_rss_hena(struct i40e_vf *vf, u8 *msg) +@@ -3189,7 +3343,7 @@ static int i40e_vc_set_rss_hena(struct i40e_vf *vf, u8 *msg) struct i40e_hw *hw = &pf->hw; i40e_status aq_ret = 0; @@ -194322,7 +235929,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 aq_ret = I40E_ERR_PARAM; goto err; } -@@ -3214,7 +3345,7 @@ static int i40e_vc_enable_vlan_stripping(struct i40e_vf *vf, u8 *msg) +@@ -3214,7 +3368,7 @@ static int i40e_vc_enable_vlan_stripping(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; struct i40e_vsi *vsi; @@ -194331,7 +235938,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 aq_ret = I40E_ERR_PARAM; goto err; } -@@ -3240,7 +3371,7 @@ static int i40e_vc_disable_vlan_stripping(struct i40e_vf *vf, u8 *msg) +@@ -3240,7 +3394,7 @@ static int i40e_vc_disable_vlan_stripping(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; struct i40e_vsi *vsi; @@ -194340,7 +235947,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 aq_ret = I40E_ERR_PARAM; goto err; } -@@ -3467,7 +3598,7 @@ static int i40e_vc_del_cloud_filter(struct i40e_vf *vf, u8 *msg) +@@ -3467,7 +3621,7 @@ static int i40e_vc_del_cloud_filter(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; int i, ret; @@ -194349,7 +235956,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 aq_ret = I40E_ERR_PARAM; goto err; } -@@ -3598,7 +3729,7 @@ static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg) +@@ -3598,7 +3752,7 @@ static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; int i, ret; @@ -194358,7 +235965,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 aq_ret = I40E_ERR_PARAM; goto err_out; } -@@ -3707,7 +3838,7 @@ static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg) +@@ -3707,7 +3861,7 @@ static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; u64 speed = 0; @@ -194367,7 +235974,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 aq_ret = I40E_ERR_PARAM; goto err; } -@@ -3796,15 +3927,9 @@ static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg) +@@ -3796,15 +3950,9 @@ static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg) /* set this flag only after making sure all inputs are sane */ vf->adq_enabled = true; @@ -194384,7 +235991,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 return I40E_SUCCESS; -@@ -3824,7 +3949,7 @@ static int i40e_vc_del_qch_msg(struct i40e_vf *vf, u8 *msg) +@@ -3824,7 +3972,7 @@ static int i40e_vc_del_qch_msg(struct i40e_vf *vf, u8 *msg) struct i40e_pf *pf = vf->pf; i40e_status aq_ret = 0; @@ -194393,7 +236000,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 aq_ret = I40E_ERR_PARAM; goto err; } -@@ -3844,8 +3969,7 @@ static int i40e_vc_del_qch_msg(struct i40e_vf *vf, u8 *msg) +@@ -3844,8 +3992,7 @@ static int i40e_vc_del_qch_msg(struct i40e_vf *vf, u8 *msg) } /* reset the VF in order to allocate resources */ @@ -194403,7 +236010,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 return I40E_SUCCESS; -@@ -3907,7 +4031,7 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode, +@@ -3907,7 +4054,7 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode, i40e_vc_notify_vf_link_state(vf); break; case VIRTCHNL_OP_RESET_VF: @@ -194412,7 +236019,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 ret = 0; break; case VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE: -@@ -4161,7 +4285,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) +@@ -4161,7 +4308,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) /* Force the VF interface down so it has to bring up with new MAC * address */ @@ -194421,7 +236028,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 dev_info(&pf->pdev->dev, "Bring down and up the VF interface to make this change effective.\n"); error_param: -@@ -4169,34 +4293,6 @@ error_param: +@@ -4169,34 +4316,6 @@ error_param: return ret; } @@ -194456,7 +236063,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 /** * i40e_ndo_set_vf_port_vlan * @netdev: network interface device structure -@@ -4253,19 +4349,9 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, +@@ -4253,19 +4372,9 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, /* duplicate request, so just return success */ goto error_pvid; @@ -194479,7 +236086,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 /* Locked once because multiple functions below iterate list */ spin_lock_bh(&vsi->mac_filter_hash_lock); -@@ -4641,7 +4727,7 @@ int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting) +@@ -4641,7 +4750,7 @@ int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting) goto out; vf->trusted = setting; @@ -194489,7 +236096,7 @@ index 472f56b360b8c..c078fbaf19fd4 100644 vf_id, setting ? "" : "un"); diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h -index 091e32c1bb46f..a554d0a0b09bd 100644 +index 091e32c1bb46f..358bbdb587951 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -10,14 +10,15 @@ @@ -194510,7 +236117,15 @@ index 091e32c1bb46f..a554d0a0b09bd 100644 /* Various queue ctrls */ enum i40e_queue_ctrl { I40E_QUEUE_CTRL_UNKNOWN = 0, -@@ -89,9 +90,6 @@ struct i40e_vf { +@@ -38,6 +39,7 @@ enum i40e_vf_states { + I40E_VF_STATE_MC_PROMISC, + I40E_VF_STATE_UC_PROMISC, + I40E_VF_STATE_PRE_ENABLE, ++ I40E_VF_STATE_RESETTING + }; + + /* VF capabilities */ +@@ -89,9 +91,6 @@ struct i40e_vf { u8 num_queue_pairs; /* num of qps assigned to VF vsis */ u8 num_req_queues; /* num of requested qps */ u64 num_mdd_events; /* num of mdd events detected */ @@ -195074,7 +236689,7 @@ index 5a359a0a20ecc..461f5237a2f88 100644 return 0; } diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c -index cada4e0e40b48..629ebdfa48b8f 100644 +index cada4e0e40b48..82c4f1190e41c 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -14,7 +14,7 @@ @@ -195358,9 +236973,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 + + if (adapter->state <= __IAVF_DOWN_PENDING) + return; - -- if (!(adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) && -- adapter->state != __IAVF_RESETTING) { ++ + netif_carrier_off(netdev); + netif_tx_disable(netdev); + adapter->link_up = false; @@ -195371,7 +236984,9 @@ index cada4e0e40b48..629ebdfa48b8f 100644 + iavf_clear_cloud_filters(adapter); + iavf_clear_fdir_filters(adapter); + iavf_clear_adv_rss_conf(adapter); -+ + +- if (!(adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) && +- adapter->state != __IAVF_RESETTING) { + if (!(adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)) { /* cancel any current operation */ adapter->current_op = VIRTCHNL_OP_UNKNOWN; @@ -195397,7 +237012,26 @@ index cada4e0e40b48..629ebdfa48b8f 100644 adapter->aq_required |= IAVF_FLAG_AQ_DISABLE_QUEUES; } -@@ -1630,8 +1746,7 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter) +@@ -1332,7 +1448,6 @@ static void iavf_fill_rss_lut(struct iavf_adapter *adapter) + static int iavf_init_rss(struct iavf_adapter *adapter) + { + struct iavf_hw *hw = &adapter->hw; +- int ret; + + if (!RSS_PF(adapter)) { + /* Enable PCTYPES for RSS, TCP/UDP with IPv4/IPv6 */ +@@ -1348,9 +1463,8 @@ static int iavf_init_rss(struct iavf_adapter *adapter) + + iavf_fill_rss_lut(adapter); + netdev_rss_key_fill((void *)adapter->rss_key, adapter->rss_key_size); +- ret = iavf_config_rss(adapter); + +- return ret; ++ return iavf_config_rss(adapter); + } + + /** +@@ -1630,8 +1744,7 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter) iavf_set_promiscuous(adapter, FLAG_VF_MULTICAST_PROMISC); return 0; } @@ -195407,7 +237041,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 (adapter->aq_required & IAVF_FLAG_AQ_RELEASE_ALLMULTI)) { iavf_set_promiscuous(adapter, 0); return 0; -@@ -1679,6 +1794,11 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter) +@@ -1679,6 +1792,11 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter) iavf_del_adv_rss_cfg(adapter); return 0; } @@ -195419,7 +237053,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 return -EAGAIN; } -@@ -1688,9 +1808,9 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter) +@@ -1688,9 +1806,9 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter) * * Function process __IAVF_STARTUP driver state. * When success the state is changed to __IAVF_INIT_VERSION_CHECK @@ -195431,7 +237065,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 { struct pci_dev *pdev = adapter->pdev; struct iavf_hw *hw = &adapter->hw; -@@ -1729,9 +1849,10 @@ static int iavf_startup(struct iavf_adapter *adapter) +@@ -1729,9 +1847,10 @@ static int iavf_startup(struct iavf_adapter *adapter) iavf_shutdown_adminq(hw); goto err; } @@ -195444,7 +237078,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 } /** -@@ -1740,9 +1861,9 @@ err: +@@ -1740,9 +1859,9 @@ err: * * Function process __IAVF_INIT_VERSION_CHECK driver state. * When success the state is changed to __IAVF_INIT_GET_RESOURCES @@ -195456,7 +237090,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 { struct pci_dev *pdev = adapter->pdev; struct iavf_hw *hw = &adapter->hw; -@@ -1753,7 +1874,7 @@ static int iavf_init_version_check(struct iavf_adapter *adapter) +@@ -1753,7 +1872,7 @@ static int iavf_init_version_check(struct iavf_adapter *adapter) if (!iavf_asq_done(hw)) { dev_err(&pdev->dev, "Admin queue command never completed\n"); iavf_shutdown_adminq(hw); @@ -195465,7 +237099,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 goto err; } -@@ -1776,10 +1897,10 @@ static int iavf_init_version_check(struct iavf_adapter *adapter) +@@ -1776,10 +1895,10 @@ static int iavf_init_version_check(struct iavf_adapter *adapter) err); goto err; } @@ -195479,7 +237113,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 } /** -@@ -1789,9 +1910,9 @@ err: +@@ -1789,9 +1908,9 @@ err: * Function process __IAVF_INIT_GET_RESOURCES driver state and * finishes driver initialization procedure. * When success the state is changed to __IAVF_DOWN @@ -195491,7 +237125,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 { struct net_device *netdev = adapter->netdev; struct pci_dev *pdev = adapter->pdev; -@@ -1819,7 +1940,7 @@ static int iavf_init_get_resources(struct iavf_adapter *adapter) +@@ -1819,7 +1938,7 @@ static int iavf_init_get_resources(struct iavf_adapter *adapter) */ iavf_shutdown_adminq(hw); dev_err(&pdev->dev, "Unable to get VF config due to PF error condition, not retrying\n"); @@ -195500,7 +237134,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 } if (err) { dev_err(&pdev->dev, "Unable to get VF config (%d)\n", err); -@@ -1893,7 +2014,7 @@ static int iavf_init_get_resources(struct iavf_adapter *adapter) +@@ -1893,7 +2012,7 @@ static int iavf_init_get_resources(struct iavf_adapter *adapter) if (netdev->features & NETIF_F_GRO) dev_info(&pdev->dev, "GRO is enabled\n"); @@ -195509,7 +237143,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 set_bit(__IAVF_VSI_DOWN, adapter->vsi.state); rtnl_unlock(); -@@ -1911,7 +2032,7 @@ static int iavf_init_get_resources(struct iavf_adapter *adapter) +@@ -1911,7 +2030,7 @@ static int iavf_init_get_resources(struct iavf_adapter *adapter) else iavf_init_rss(adapter); @@ -195518,7 +237152,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 err_mem: iavf_free_rss(adapter); err_register: -@@ -1922,7 +2043,7 @@ err_alloc: +@@ -1922,7 +2041,7 @@ err_alloc: kfree(adapter->vf_res); adapter->vf_res = NULL; err: @@ -195527,7 +237161,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 } /** -@@ -1937,14 +2058,80 @@ static void iavf_watchdog_task(struct work_struct *work) +@@ -1937,14 +2056,80 @@ static void iavf_watchdog_task(struct work_struct *work) struct iavf_hw *hw = &adapter->hw; u32 reg_val; @@ -195610,7 +237244,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 reg_val = rd32(hw, IAVF_VFGEN_RSTAT) & IAVF_VFGEN_RSTAT_VFR_STATE_MASK; if (reg_val == VIRTCHNL_VFR_VFACTIVE || -@@ -1952,23 +2139,20 @@ static void iavf_watchdog_task(struct work_struct *work) +@@ -1952,23 +2137,20 @@ static void iavf_watchdog_task(struct work_struct *work) /* A chance for redemption! */ dev_err(&adapter->pdev->dev, "Hardware came out of reset. Attempting reinit.\n"); @@ -195639,7 +237273,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 case __IAVF_RESETTING: mutex_unlock(&adapter->crit_lock); queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ * 2); -@@ -1991,15 +2175,16 @@ static void iavf_watchdog_task(struct work_struct *work) +@@ -1991,15 +2173,16 @@ static void iavf_watchdog_task(struct work_struct *work) adapter->state == __IAVF_RUNNING) iavf_request_stats(adapter); } @@ -195659,7 +237293,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 reg_val = rd32(hw, IAVF_VF_ARQLEN1) & IAVF_VF_ARQLEN1_ARQENABLE_MASK; if (!reg_val) { adapter->flags |= IAVF_FLAG_RESET_PENDING; -@@ -2007,24 +2192,31 @@ static void iavf_watchdog_task(struct work_struct *work) +@@ -2007,24 +2190,31 @@ static void iavf_watchdog_task(struct work_struct *work) adapter->current_op = VIRTCHNL_OP_UNKNOWN; dev_err(&adapter->pdev->dev, "Hardware reset detected\n"); queue_work(iavf_wq, &adapter->reset_task); @@ -195697,7 +237331,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 static void iavf_disable_vf(struct iavf_adapter *adapter) { struct iavf_mac_filter *f, *ftmp; -@@ -2074,14 +2266,13 @@ static void iavf_disable_vf(struct iavf_adapter *adapter) +@@ -2074,14 +2264,12 @@ static void iavf_disable_vf(struct iavf_adapter *adapter) iavf_free_misc_irq(adapter); iavf_reset_interrupt_capability(adapter); @@ -195706,7 +237340,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 + iavf_free_queues(adapter); memset(adapter->vf_res, 0, IAVF_VIRTCHNL_VF_RESOURCE_SIZE); iavf_shutdown_adminq(&adapter->hw); - adapter->netdev->flags &= ~IFF_UP; +- adapter->netdev->flags &= ~IFF_UP; - mutex_unlock(&adapter->crit_lock); adapter->flags &= ~IAVF_FLAG_RESET_PENDING; - adapter->state = __IAVF_DOWN; @@ -195714,7 +237348,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 wake_up(&adapter->down_waitqueue); dev_info(&adapter->pdev->dev, "Reset task did not complete, VF disabled\n"); } -@@ -2103,22 +2294,26 @@ static void iavf_reset_task(struct work_struct *work) +@@ -2103,22 +2291,26 @@ static void iavf_reset_task(struct work_struct *work) struct net_device *netdev = adapter->netdev; struct iavf_hw *hw = &adapter->hw; struct iavf_mac_filter *f, *ftmp; @@ -195747,7 +237381,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 while (!mutex_trylock(&adapter->client_lock)) usleep_range(500, 1000); if (CLIENT_ENABLED(adapter)) { -@@ -2166,12 +2361,14 @@ static void iavf_reset_task(struct work_struct *work) +@@ -2166,12 +2358,19 @@ static void iavf_reset_task(struct work_struct *work) } pci_set_master(adapter->pdev); @@ -195759,10 +237393,15 @@ index cada4e0e40b48..629ebdfa48b8f 100644 iavf_disable_vf(adapter); mutex_unlock(&adapter->client_lock); + mutex_unlock(&adapter->crit_lock); ++ if (netif_running(netdev)) { ++ rtnl_lock(); ++ dev_close(netdev); ++ rtnl_unlock(); ++ } return; /* Do not attempt to reinit. It's dead, Jim. */ } -@@ -2180,18 +2377,16 @@ continue_reset: +@@ -2180,8 +2379,7 @@ continue_reset: * ndo_open() returning, so we can't assume it means all our open * tasks have finished, since we're not holding the rtnl_lock here. */ @@ -195772,9 +237411,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 if (running) { netif_carrier_off(netdev); -- netif_tx_stop_all_queues(netdev); - adapter->link_up = false; - iavf_napi_disable_all(adapter); +@@ -2191,7 +2389,7 @@ continue_reset: } iavf_irq_disable(adapter); @@ -195783,7 +237420,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 adapter->flags &= ~IAVF_FLAG_RESET_PENDING; /* free the Tx/Rx rings and descriptors, might be better to just -@@ -2243,11 +2438,6 @@ continue_reset: +@@ -2243,11 +2441,6 @@ continue_reset: list_for_each_entry(f, &adapter->mac_filter_list, list) { f->add = true; } @@ -195795,7 +237432,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 spin_unlock_bh(&adapter->mac_vlan_list_lock); /* check if TCs are running and re-add all cloud filters */ -@@ -2261,7 +2451,6 @@ continue_reset: +@@ -2261,7 +2454,6 @@ continue_reset: spin_unlock_bh(&adapter->cloud_filter_list_lock); adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER; @@ -195803,7 +237440,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 adapter->aq_required |= IAVF_FLAG_AQ_ADD_CLOUD_FILTER; iavf_misc_irq_enable(adapter); -@@ -2291,22 +2480,34 @@ continue_reset: +@@ -2291,22 +2483,44 @@ continue_reset: iavf_configure(adapter); @@ -195832,6 +237469,16 @@ index cada4e0e40b48..629ebdfa48b8f 100644 + mutex_unlock(&adapter->client_lock); mutex_unlock(&adapter->crit_lock); ++ ++ if (netif_running(netdev)) { ++ /* Close device to ensure that Tx queues will not be started ++ * during netif_device_attach() at the end of the reset task. ++ */ ++ rtnl_lock(); ++ dev_close(netdev); ++ rtnl_unlock(); ++ } ++ dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n"); - iavf_close(netdev); +reset_finish: @@ -195841,7 +237488,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 } /** -@@ -2327,13 +2528,19 @@ static void iavf_adminq_task(struct work_struct *work) +@@ -2327,13 +2541,19 @@ static void iavf_adminq_task(struct work_struct *work) if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) goto out; @@ -195863,7 +237510,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 do { ret = iavf_clean_arq_element(hw, &event, &pending); v_op = (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high); -@@ -2349,6 +2556,18 @@ static void iavf_adminq_task(struct work_struct *work) +@@ -2349,6 +2569,18 @@ static void iavf_adminq_task(struct work_struct *work) } while (pending); mutex_unlock(&adapter->crit_lock); @@ -195882,7 +237529,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 if ((adapter->flags & (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED)) || adapter->state == __IAVF_RESETTING) -@@ -2356,7 +2575,7 @@ static void iavf_adminq_task(struct work_struct *work) +@@ -2356,7 +2588,7 @@ static void iavf_adminq_task(struct work_struct *work) /* check for error indications */ val = rd32(hw, hw->aq.arq.len); @@ -195891,7 +237538,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 goto freedom; oldval = val; if (val & IAVF_VF_ARQLEN1_ARQVFE_MASK) { -@@ -2601,6 +2820,7 @@ static int iavf_validate_ch_config(struct iavf_adapter *adapter, +@@ -2601,6 +2833,7 @@ static int iavf_validate_ch_config(struct iavf_adapter *adapter, struct tc_mqprio_qopt_offload *mqprio_qopt) { u64 total_max_rate = 0; @@ -195899,7 +237546,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 int i, num_qps = 0; u64 tx_rate = 0; int ret = 0; -@@ -2615,17 +2835,40 @@ static int iavf_validate_ch_config(struct iavf_adapter *adapter, +@@ -2615,17 +2848,40 @@ static int iavf_validate_ch_config(struct iavf_adapter *adapter, return -EINVAL; if (mqprio_qopt->min_rate[i]) { dev_err(&adapter->pdev->dev, @@ -195943,7 +237590,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 ret = iavf_validate_tx_bandwidth(adapter, total_max_rate); return ret; -@@ -2684,6 +2927,7 @@ static int __iavf_setup_tc(struct net_device *netdev, void *type_data) +@@ -2684,6 +2940,7 @@ static int __iavf_setup_tc(struct net_device *netdev, void *type_data) netif_tx_disable(netdev); iavf_del_all_cloud_filters(adapter); adapter->aq_required = IAVF_FLAG_AQ_DISABLE_CHANNELS; @@ -195951,7 +237598,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 goto exit; } else { return -EINVAL; -@@ -2727,7 +2971,21 @@ static int __iavf_setup_tc(struct net_device *netdev, void *type_data) +@@ -2727,7 +2984,21 @@ static int __iavf_setup_tc(struct net_device *netdev, void *type_data) adapter->ch_config.ch_info[i].offset = 0; } } @@ -195973,7 +237620,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 netif_tx_stop_all_queues(netdev); netif_tx_disable(netdev); adapter->aq_required |= IAVF_FLAG_AQ_ENABLE_CHANNELS; -@@ -2744,6 +3002,12 @@ static int __iavf_setup_tc(struct net_device *netdev, void *type_data) +@@ -2744,6 +3015,12 @@ static int __iavf_setup_tc(struct net_device *netdev, void *type_data) } } exit: @@ -195986,7 +237633,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 return ret; } -@@ -3041,8 +3305,10 @@ static int iavf_configure_clsflower(struct iavf_adapter *adapter, +@@ -3041,8 +3318,10 @@ static int iavf_configure_clsflower(struct iavf_adapter *adapter, return -ENOMEM; while (!mutex_trylock(&adapter->crit_lock)) { @@ -195999,7 +237646,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 udelay(1); } -@@ -3053,11 +3319,11 @@ static int iavf_configure_clsflower(struct iavf_adapter *adapter, +@@ -3053,11 +3332,11 @@ static int iavf_configure_clsflower(struct iavf_adapter *adapter, /* start out with flow type and eth type IPv4 to begin with */ filter->f.flow_type = VIRTCHNL_TCP_V4_FLOW; err = iavf_parse_cls_flower(adapter, cls_flower, filter); @@ -196013,7 +237660,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 goto err; /* add filter to the list */ -@@ -3226,6 +3492,13 @@ static int iavf_open(struct net_device *netdev) +@@ -3226,6 +3505,13 @@ static int iavf_open(struct net_device *netdev) goto err_unlock; } @@ -196027,7 +237674,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 /* allocate transmit descriptors */ err = iavf_setup_all_tx_resources(adapter); if (err) -@@ -3247,6 +3520,9 @@ static int iavf_open(struct net_device *netdev) +@@ -3247,6 +3533,9 @@ static int iavf_open(struct net_device *netdev) spin_unlock_bh(&adapter->mac_vlan_list_lock); @@ -196037,7 +237684,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 iavf_configure(adapter); iavf_up_complete(adapter); -@@ -3284,20 +3560,45 @@ err_unlock: +@@ -3284,20 +3573,45 @@ err_unlock: static int iavf_close(struct net_device *netdev) { struct iavf_adapter *adapter = netdev_priv(netdev); @@ -196088,7 +237735,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 iavf_free_traffic_irqs(adapter); mutex_unlock(&adapter->crit_lock); -@@ -3318,6 +3619,10 @@ static int iavf_close(struct net_device *netdev) +@@ -3318,6 +3632,10 @@ static int iavf_close(struct net_device *netdev) msecs_to_jiffies(500)); if (!status) netdev_warn(netdev, "Device resources not yet released\n"); @@ -196099,7 +237746,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 return 0; } -@@ -3337,8 +3642,11 @@ static int iavf_change_mtu(struct net_device *netdev, int new_mtu) +@@ -3337,8 +3655,11 @@ static int iavf_change_mtu(struct net_device *netdev, int new_mtu) iavf_notify_client_l2_params(&adapter->vsi); adapter->flags |= IAVF_FLAG_SERVICE_CLIENT_REQUESTED; } @@ -196113,7 +237760,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 return 0; } -@@ -3354,11 +3662,16 @@ static int iavf_set_features(struct net_device *netdev, +@@ -3354,11 +3675,16 @@ static int iavf_set_features(struct net_device *netdev, { struct iavf_adapter *adapter = netdev_priv(netdev); @@ -196133,7 +237780,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 return -EINVAL; } else if ((netdev->features ^ features) & NETIF_F_HW_VLAN_CTAG_RX) { if (features & NETIF_F_HW_VLAN_CTAG_RX) -@@ -3442,7 +3755,8 @@ static netdev_features_t iavf_fix_features(struct net_device *netdev, +@@ -3442,7 +3768,8 @@ static netdev_features_t iavf_fix_features(struct net_device *netdev, { struct iavf_adapter *adapter = netdev_priv(netdev); @@ -196143,7 +237790,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 features &= ~(NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER); -@@ -3630,72 +3944,14 @@ int iavf_process_config(struct iavf_adapter *adapter) +@@ -3630,72 +3957,14 @@ int iavf_process_config(struct iavf_adapter *adapter) return 0; } @@ -196218,7 +237865,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 netif_device_detach(netdev); -@@ -3705,7 +3961,7 @@ static void iavf_shutdown(struct pci_dev *pdev) +@@ -3705,7 +3974,7 @@ static void iavf_shutdown(struct pci_dev *pdev) if (iavf_lock_timeout(&adapter->crit_lock, 5000)) dev_warn(&adapter->pdev->dev, "failed to acquire crit_lock in %s\n", __FUNCTION__); /* Prevent the watchdog from running. */ @@ -196227,7 +237874,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 adapter->aq_required = 0; mutex_unlock(&adapter->crit_lock); -@@ -3778,7 +4034,7 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +@@ -3778,7 +4047,7 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw->back = adapter; adapter->msg_enable = BIT(DEFAULT_DEBUG_LEVEL_SHIFT) - 1; @@ -196236,7 +237883,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 /* Call save state here because it relies on the adapter struct. */ pci_save_state(pdev); -@@ -3803,7 +4059,6 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +@@ -3803,7 +4072,6 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) */ mutex_init(&adapter->crit_lock); mutex_init(&adapter->client_lock); @@ -196244,7 +237891,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 mutex_init(&hw->aq.asq_mutex); mutex_init(&hw->aq.arq_mutex); -@@ -3822,8 +4077,7 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +@@ -3822,8 +4090,7 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) INIT_WORK(&adapter->adminq_task, iavf_adminq_task); INIT_DELAYED_WORK(&adapter->watchdog_task, iavf_watchdog_task); INIT_DELAYED_WORK(&adapter->client_task, iavf_client_task); @@ -196254,7 +237901,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 msecs_to_jiffies(5 * (pdev->devfn & 0x07))); /* Setup the wait queue for indicating transition to down status */ -@@ -3880,10 +4134,11 @@ static int __maybe_unused iavf_suspend(struct device *dev_d) +@@ -3880,10 +4147,11 @@ static int __maybe_unused iavf_suspend(struct device *dev_d) static int __maybe_unused iavf_resume(struct device *dev_d) { struct pci_dev *pdev = to_pci_dev(dev_d); @@ -196268,7 +237915,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 pci_set_master(pdev); rtnl_lock(); -@@ -3902,7 +4157,7 @@ static int __maybe_unused iavf_resume(struct device *dev_d) +@@ -3902,7 +4170,7 @@ static int __maybe_unused iavf_resume(struct device *dev_d) queue_work(iavf_wq, &adapter->reset_task); @@ -196277,20 +237924,22 @@ index cada4e0e40b48..629ebdfa48b8f 100644 return err; } -@@ -3918,8 +4173,8 @@ static int __maybe_unused iavf_resume(struct device *dev_d) +@@ -3918,23 +4186,44 @@ static int __maybe_unused iavf_resume(struct device *dev_d) **/ static void iavf_remove(struct pci_dev *pdev) { - struct net_device *netdev = pci_get_drvdata(pdev); - struct iavf_adapter *adapter = netdev_priv(netdev); + struct iavf_adapter *adapter = iavf_pdev_to_adapter(pdev); -+ struct net_device *netdev = adapter->netdev; struct iavf_fdir_fltr *fdir, *fdirtmp; struct iavf_vlan_filter *vlf, *vlftmp; ++ struct iavf_cloud_filter *cf, *cftmp; struct iavf_adv_rss *rss, *rsstmp; -@@ -3927,14 +4182,37 @@ static void iavf_remove(struct pci_dev *pdev) - struct iavf_cloud_filter *cf, *cftmp; - struct iavf_hw *hw = &adapter->hw; + struct iavf_mac_filter *f, *ftmp; +- struct iavf_cloud_filter *cf, *cftmp; +- struct iavf_hw *hw = &adapter->hw; ++ struct net_device *netdev; ++ struct iavf_hw *hw; int err; - /* Indicate we are in remove and not to run reset_task */ - mutex_lock(&adapter->remove_lock); @@ -196298,14 +237947,12 @@ index cada4e0e40b48..629ebdfa48b8f 100644 - cancel_work_sync(&adapter->reset_task); - cancel_delayed_work_sync(&adapter->client_task); + -+ /* When reboot/shutdown is in progress no need to do anything -+ * as the adapter is already REMOVE state that was set during -+ * iavf_shutdown() callback. -+ */ -+ if (adapter->state == __IAVF_REMOVE) ++ netdev = adapter->netdev; ++ hw = &adapter->hw; ++ ++ if (test_and_set_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) + return; + -+ set_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section); + /* Wait until port initialization is complete. + * There are flows where register/unregister netdev may race. + */ @@ -196332,7 +237979,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 } if (CLIENT_ALLOWED(adapter)) { err = iavf_lan_del_device(adapter); -@@ -3943,6 +4221,10 @@ static void iavf_remove(struct pci_dev *pdev) +@@ -3943,6 +4232,10 @@ static void iavf_remove(struct pci_dev *pdev) err); } @@ -196343,7 +237990,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 iavf_request_reset(adapter); msleep(50); /* If the FW isn't responding, kick it once, but only once. */ -@@ -3950,24 +4232,24 @@ static void iavf_remove(struct pci_dev *pdev) +@@ -3950,24 +4243,24 @@ static void iavf_remove(struct pci_dev *pdev) iavf_request_reset(adapter); msleep(50); } @@ -196376,7 +238023,7 @@ index cada4e0e40b48..629ebdfa48b8f 100644 iavf_free_rss(adapter); if (hw->aq.asq.count) -@@ -3979,8 +4261,6 @@ static void iavf_remove(struct pci_dev *pdev) +@@ -3979,8 +4272,6 @@ static void iavf_remove(struct pci_dev *pdev) mutex_destroy(&adapter->client_lock); mutex_unlock(&adapter->crit_lock); mutex_destroy(&adapter->crit_lock); @@ -196385,6 +238032,18 @@ index cada4e0e40b48..629ebdfa48b8f 100644 iounmap(hw->hw_addr); pci_release_regions(pdev); +@@ -4062,7 +4353,11 @@ static int __init iavf_init_module(void) + pr_err("%s: Failed to create workqueue\n", iavf_driver_name); + return -ENOMEM; + } ++ + ret = pci_register_driver(&iavf_driver); ++ if (ret) ++ destroy_workqueue(iavf_wq); ++ + return ret; + } + diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index 3525eab8e9f9a..e76e3df3e2d9e 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -196594,7 +238253,7 @@ index 3c735968e1b85..c6eb0d0748ea9 100644 if (adapter->current_op && (v_opcode != adapter->current_op)) dev_warn(&adapter->pdev->dev, "Expected response %d from PF, received %d\n", diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h -index 3c4f08d20414e..89bca2ed895a0 100644 +index 3c4f08d20414e..a5bc804dc67ad 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -139,13 +139,10 @@ @@ -196683,6 +238342,15 @@ index 3c4f08d20414e..89bca2ed895a0 100644 } static inline void ice_set_ring_xdp(struct ice_ring *ring) +@@ -641,7 +641,7 @@ void ice_set_ethtool_ops(struct net_device *netdev); + void ice_set_ethtool_safe_mode_ops(struct net_device *netdev); + u16 ice_get_avail_txq_count(struct ice_pf *pf); + u16 ice_get_avail_rxq_count(struct ice_pf *pf); +-int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx); ++int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx, bool locked); + void ice_update_vsi_stats(struct ice_vsi *vsi); + void ice_update_pf_stats(struct ice_pf *pf); + int ice_up(struct ice_vsi *vsi); @@ -696,7 +696,7 @@ static inline void ice_set_rdma_cap(struct ice_pf *pf) if (pf->hw.func_caps.common_cap.rdma && pf->num_rdma_msix) { set_bit(ICE_FLAG_RDMA_ENA, pf->flags); @@ -196749,6 +238417,89 @@ index df5ad4de1f00e..3de6f16f985ab 100644 goto out; if (!(tlv.options & ICE_LINK_OVERRIDE_STRICT_MODE) && +diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c +index 926cf748c5ecd..dd4195e964faf 100644 +--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c ++++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c +@@ -355,7 +355,7 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked) + goto out; + } + +- ice_pf_dcb_recfg(pf); ++ ice_pf_dcb_recfg(pf, false); + + out: + ice_ena_vsi(pf_vsi, true); +@@ -644,12 +644,13 @@ static int ice_dcb_noncontig_cfg(struct ice_pf *pf) + /** + * ice_pf_dcb_recfg - Reconfigure all VEBs and VSIs + * @pf: pointer to the PF struct ++ * @locked: is adev device lock held + * + * Assumed caller has already disabled all VSIs before + * calling this function. Reconfiguring DCB based on + * local_dcbx_cfg. + */ +-void ice_pf_dcb_recfg(struct ice_pf *pf) ++void ice_pf_dcb_recfg(struct ice_pf *pf, bool locked) + { + struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg; + struct iidc_event *event; +@@ -688,14 +689,16 @@ void ice_pf_dcb_recfg(struct ice_pf *pf) + if (vsi->type == ICE_VSI_PF) + ice_dcbnl_set_all(vsi); + } +- /* Notify the AUX drivers that TC change is finished */ +- event = kzalloc(sizeof(*event), GFP_KERNEL); +- if (!event) +- return; ++ if (!locked) { ++ /* Notify the AUX drivers that TC change is finished */ ++ event = kzalloc(sizeof(*event), GFP_KERNEL); ++ if (!event) ++ return; + +- set_bit(IIDC_EVENT_AFTER_TC_CHANGE, event->type); +- ice_send_event_to_aux(pf, event); +- kfree(event); ++ set_bit(IIDC_EVENT_AFTER_TC_CHANGE, event->type); ++ ice_send_event_to_aux(pf, event); ++ kfree(event); ++ } + } + + /** +@@ -943,7 +946,7 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, + } + + /* changes in configuration update VSI */ +- ice_pf_dcb_recfg(pf); ++ ice_pf_dcb_recfg(pf, false); + + ice_ena_vsi(pf_vsi, true); + unlock_rtnl: +diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h +index 261b6e2ed7bc2..33a609e92d253 100644 +--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h ++++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h +@@ -23,7 +23,7 @@ u8 ice_dcb_get_tc(struct ice_vsi *vsi, int queue_index); + int + ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked); + int ice_dcb_bwchk(struct ice_pf *pf, struct ice_dcbx_cfg *dcbcfg); +-void ice_pf_dcb_recfg(struct ice_pf *pf); ++void ice_pf_dcb_recfg(struct ice_pf *pf, bool locked); + void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi); + int ice_init_pf_dcb(struct ice_pf *pf, bool locked); + void ice_update_dcb_stats(struct ice_pf *pf); +@@ -113,7 +113,7 @@ ice_is_pfc_causing_hung_q(struct ice_pf __always_unused *pf, + return false; + } + +-static inline void ice_pf_dcb_recfg(struct ice_pf *pf) { } ++static inline void ice_pf_dcb_recfg(struct ice_pf *pf, bool locked) { } + static inline void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi) { } + static inline void ice_update_dcb_stats(struct ice_pf *pf) { } + static inline void diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index da7288bdc9a3f..2ec5d5cb72803 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -196914,7 +238665,7 @@ index e07e74426bde8..e30284ccbed4c 100644 void ice_devlink_init_regions(struct ice_pf *pf); void ice_devlink_destroy_regions(struct ice_pf *pf); diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c -index c451cf401e635..f10d9c377c744 100644 +index c451cf401e635..24001035910e0 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -651,7 +651,8 @@ static int ice_lbtest_receive_frames(struct ice_ring *rx_ring) @@ -196997,7 +238748,55 @@ index c451cf401e635..f10d9c377c744 100644 err = ice_setup_tx_ring(&tx_rings[i]); if (err) { while (i--) -@@ -3466,15 +3505,9 @@ static int ice_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) +@@ -3354,7 +3393,9 @@ static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch) + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + int new_rx = 0, new_tx = 0; ++ bool locked = false; + u32 curr_combined; ++ int ret = 0; + + /* do not support changing channels in Safe Mode */ + if (ice_is_safe_mode(pf)) { +@@ -3403,15 +3444,33 @@ static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch) + return -EINVAL; + } + +- ice_vsi_recfg_qs(vsi, new_rx, new_tx); ++ if (pf->adev) { ++ mutex_lock(&pf->adev_mutex); ++ device_lock(&pf->adev->dev); ++ locked = true; ++ if (pf->adev->dev.driver) { ++ netdev_err(dev, "Cannot change channels when RDMA is active\n"); ++ ret = -EBUSY; ++ goto adev_unlock; ++ } ++ } + +- if (!netif_is_rxfh_configured(dev)) +- return ice_vsi_set_dflt_rss_lut(vsi, new_rx); ++ ice_vsi_recfg_qs(vsi, new_rx, new_tx, locked); ++ ++ if (!netif_is_rxfh_configured(dev)) { ++ ret = ice_vsi_set_dflt_rss_lut(vsi, new_rx); ++ goto adev_unlock; ++ } + + /* Update rss_size due to change in Rx queues */ + vsi->rss_size = ice_get_valid_rss_size(&pf->hw, new_rx); + +- return 0; ++adev_unlock: ++ if (locked) { ++ device_unlock(&pf->adev->dev); ++ mutex_unlock(&pf->adev_mutex); ++ } ++ return ret; + } + + /** +@@ -3466,15 +3525,9 @@ static int ice_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) return 0; } @@ -197013,7 +238812,7 @@ index c451cf401e635..f10d9c377c744 100644 * @rc: ring container that the ITR values will come from * * Query the device for ice_ring_container specific ITR values. This is -@@ -3484,13 +3517,12 @@ enum ice_container_type { +@@ -3484,13 +3537,12 @@ enum ice_container_type { * Returns 0 on success, negative otherwise. */ static int @@ -197029,7 +238828,7 @@ index c451cf401e635..f10d9c377c744 100644 case ICE_RX_CONTAINER: ec->use_adaptive_rx_coalesce = ITR_IS_DYNAMIC(rc); ec->rx_coalesce_usecs = rc->itr_setting; -@@ -3501,7 +3533,7 @@ ice_get_rc_coalesce(struct ethtool_coalesce *ec, enum ice_container_type c_type, +@@ -3501,7 +3553,7 @@ ice_get_rc_coalesce(struct ethtool_coalesce *ec, enum ice_container_type c_type, ec->tx_coalesce_usecs = rc->itr_setting; break; default: @@ -197038,7 +238837,7 @@ index c451cf401e635..f10d9c377c744 100644 return -EINVAL; } -@@ -3522,18 +3554,18 @@ static int +@@ -3522,18 +3574,18 @@ static int ice_get_q_coalesce(struct ice_vsi *vsi, struct ethtool_coalesce *ec, int q_num) { if (q_num < vsi->num_rxq && q_num < vsi->num_txq) { @@ -197061,7 +238860,7 @@ index c451cf401e635..f10d9c377c744 100644 &vsi->tx_rings[q_num]->q_vector->tx)) return -EINVAL; } else { -@@ -3585,7 +3617,6 @@ ice_get_per_q_coalesce(struct net_device *netdev, u32 q_num, +@@ -3585,7 +3637,6 @@ ice_get_per_q_coalesce(struct net_device *netdev, u32 q_num, /** * ice_set_rc_coalesce - set ITR values for specific ring container @@ -197069,7 +238868,7 @@ index c451cf401e635..f10d9c377c744 100644 * @ec: ethtool structure from user to update ITR settings * @rc: ring container that the ITR values will come from * @vsi: VSI associated to the ring container -@@ -3597,10 +3628,10 @@ ice_get_per_q_coalesce(struct net_device *netdev, u32 q_num, +@@ -3597,10 +3648,10 @@ ice_get_per_q_coalesce(struct net_device *netdev, u32 q_num, * Returns 0 on success, negative otherwise. */ static int @@ -197082,7 +238881,7 @@ index c451cf401e635..f10d9c377c744 100644 u32 use_adaptive_coalesce, coalesce_usecs; struct ice_pf *pf = vsi->back; u16 itr_setting; -@@ -3608,7 +3639,7 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, +@@ -3608,7 +3659,7 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, if (!rc->ring) return -EINVAL; @@ -197091,7 +238890,7 @@ index c451cf401e635..f10d9c377c744 100644 case ICE_RX_CONTAINER: if (ec->rx_coalesce_usecs_high > ICE_MAX_INTRL || (ec->rx_coalesce_usecs_high && -@@ -3641,7 +3672,7 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, +@@ -3641,7 +3692,7 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, break; default: dev_dbg(ice_pf_to_dev(pf), "Invalid container type %d\n", @@ -197100,7 +238899,7 @@ index c451cf401e635..f10d9c377c744 100644 return -EINVAL; } -@@ -3690,22 +3721,22 @@ static int +@@ -3690,22 +3741,22 @@ static int ice_set_q_coalesce(struct ice_vsi *vsi, struct ethtool_coalesce *ec, int q_num) { if (q_num < vsi->num_rxq && q_num < vsi->num_txq) { @@ -197389,7 +239188,7 @@ index b718e196af2a4..4417238b0e64f 100644 vsi->q_vectors[i]->intrl = coalesce[0].intrl; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c -index 06fa93e597fbc..ffbba5f6b7a5f 100644 +index 06fa93e597fbc..6f674cd117d3d 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1679,7 +1679,9 @@ static void ice_handle_mdd_event(struct ice_pf *pf) @@ -197544,7 +239343,31 @@ index 06fa93e597fbc..ffbba5f6b7a5f 100644 pf->avail_txqs = NULL; return -ENOMEM; } -@@ -4170,11 +4219,11 @@ static int ice_register_netdev(struct ice_pf *pf) +@@ -3727,12 +3776,13 @@ bool ice_is_wol_supported(struct ice_hw *hw) + * @vsi: VSI being changed + * @new_rx: new number of Rx queues + * @new_tx: new number of Tx queues ++ * @locked: is adev device_lock held + * + * Only change the number of queues if new_tx, or new_rx is non-0. + * + * Returns 0 on success. + */ +-int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx) ++int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx, bool locked) + { + struct ice_pf *pf = vsi->back; + int err = 0, timeout = 50; +@@ -3761,7 +3811,7 @@ int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx) + + ice_vsi_close(vsi); + ice_vsi_rebuild(vsi, false); +- ice_pf_dcb_recfg(pf); ++ ice_pf_dcb_recfg(pf, locked); + ice_vsi_open(vsi); + done: + clear_bit(ICE_CFG_BUSY, pf->state); +@@ -4170,11 +4220,11 @@ static int ice_register_netdev(struct ice_pf *pf) set_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state); netif_carrier_off(vsi->netdev); netif_tx_stop_all_queues(vsi->netdev); @@ -197558,7 +239381,7 @@ index 06fa93e597fbc..ffbba5f6b7a5f 100644 return 0; err_devlink_create: -@@ -4600,9 +4649,6 @@ static void ice_remove(struct pci_dev *pdev) +@@ -4600,9 +4650,6 @@ static void ice_remove(struct pci_dev *pdev) struct ice_pf *pf = pci_get_drvdata(pdev); int i; @@ -197568,7 +239391,16 @@ index 06fa93e597fbc..ffbba5f6b7a5f 100644 for (i = 0; i < ICE_MAX_RESET_WAIT; i++) { if (!ice_is_reset_in_progress(pf->state)) break; -@@ -5433,10 +5479,12 @@ int ice_vsi_cfg(struct ice_vsi *vsi) +@@ -5083,7 +5130,7 @@ static int __init ice_module_init(void) + pr_info("%s\n", ice_driver_string); + pr_info("%s\n", ice_copyright); + +- ice_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, KBUILD_MODNAME); ++ ice_wq = alloc_workqueue("%s", 0, 0, KBUILD_MODNAME); + if (!ice_wq) { + pr_err("Failed to create workqueue\n"); + return -ENOMEM; +@@ -5433,10 +5480,12 @@ int ice_vsi_cfg(struct ice_vsi *vsi) if (vsi->netdev) { ice_set_rx_mode(vsi->netdev); @@ -197584,7 +239416,7 @@ index 06fa93e597fbc..ffbba5f6b7a5f 100644 } ice_vsi_cfg_dcb_rings(vsi); -@@ -5608,6 +5656,10 @@ static int ice_up_complete(struct ice_vsi *vsi) +@@ -5608,6 +5657,10 @@ static int ice_up_complete(struct ice_vsi *vsi) netif_carrier_on(vsi->netdev); } @@ -197595,7 +239427,7 @@ index 06fa93e597fbc..ffbba5f6b7a5f 100644 ice_service_task_schedule(pf); return 0; -@@ -6511,7 +6563,6 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) +@@ -6511,7 +6564,6 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; @@ -197603,7 +239435,7 @@ index 06fa93e597fbc..ffbba5f6b7a5f 100644 u8 count = 0; int err = 0; -@@ -6546,14 +6597,6 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) +@@ -6546,14 +6598,6 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) return -EBUSY; } @@ -197618,7 +239450,7 @@ index 06fa93e597fbc..ffbba5f6b7a5f 100644 netdev->mtu = (unsigned int)new_mtu; /* if VSI is up, bring it down and then back up */ -@@ -6561,21 +6604,18 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) +@@ -6561,21 +6605,18 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) err = ice_down(vsi); if (err) { netdev_err(netdev, "change MTU if_down err %d\n", err); @@ -197643,7 +239475,7 @@ index 06fa93e597fbc..ffbba5f6b7a5f 100644 return err; } -@@ -7190,6 +7230,7 @@ ice_features_check(struct sk_buff *skb, +@@ -7190,6 +7231,7 @@ ice_features_check(struct sk_buff *skb, struct net_device __always_unused *netdev, netdev_features_t features) { @@ -197651,7 +239483,7 @@ index 06fa93e597fbc..ffbba5f6b7a5f 100644 size_t len; /* No point in doing any of this if neither checksum nor GSO are -@@ -7202,24 +7243,32 @@ ice_features_check(struct sk_buff *skb, +@@ -7202,24 +7244,32 @@ ice_features_check(struct sk_buff *skb, /* We cannot support GSO if the MSS is going to be less than * 64 bytes. If it is then we need to drop support for GSO. */ @@ -198621,7 +240453,7 @@ index 2d3daf022651c..015b781441149 100644 /* flags controlling PTP/1588 function */ diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c -index fb1029352c3e7..3cbb5a89b336f 100644 +index fb1029352c3e7..e99e6e44b525a 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -961,10 +961,6 @@ static int igb_set_ringparam(struct net_device *netdev, @@ -198635,11 +240467,35 @@ index fb1029352c3e7..3cbb5a89b336f 100644 temp_ring[i].count = new_rx_count; err = igb_setup_rx_resources(&temp_ring[i]); if (err) { +@@ -1413,6 +1409,8 @@ static int igb_intr_test(struct igb_adapter *adapter, u64 *data) + *data = 1; + return -1; + } ++ wr32(E1000_IVAR_MISC, E1000_IVAR_VALID << 8); ++ wr32(E1000_EIMS, BIT(0)); + } else if (adapter->flags & IGB_FLAG_HAS_MSI) { + shared_int = false; + if (request_irq(irq, diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c -index 751de06019a0e..f19e648307398 100644 +index 751de06019a0e..b246ff8b7c208 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c -@@ -3637,6 +3637,7 @@ static int igb_disable_sriov(struct pci_dev *pdev) +@@ -1204,8 +1204,12 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter, + if (!q_vector) { + q_vector = kzalloc(size, GFP_KERNEL); + } else if (size > ksize(q_vector)) { +- kfree_rcu(q_vector, rcu); +- q_vector = kzalloc(size, GFP_KERNEL); ++ struct igb_q_vector *new_q_vector; ++ ++ new_q_vector = kzalloc(size, GFP_KERNEL); ++ if (new_q_vector) ++ kfree_rcu(q_vector, rcu); ++ q_vector = new_q_vector; + } else { + memset(q_vector, 0, size); + } +@@ -3637,6 +3641,7 @@ static int igb_disable_sriov(struct pci_dev *pdev) struct net_device *netdev = pci_get_drvdata(pdev); struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; @@ -198647,7 +240503,7 @@ index 751de06019a0e..f19e648307398 100644 /* reclaim resources allocated to VFs */ if (adapter->vf_data) { -@@ -3649,12 +3650,13 @@ static int igb_disable_sriov(struct pci_dev *pdev) +@@ -3649,12 +3654,13 @@ static int igb_disable_sriov(struct pci_dev *pdev) pci_disable_sriov(pdev); msleep(500); } @@ -198662,7 +240518,7 @@ index 751de06019a0e..f19e648307398 100644 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ); wrfl(); msleep(100); -@@ -3814,7 +3816,9 @@ static void igb_remove(struct pci_dev *pdev) +@@ -3814,7 +3820,9 @@ static void igb_remove(struct pci_dev *pdev) igb_release_hw_control(adapter); #ifdef CONFIG_PCI_IOV @@ -198672,7 +240528,7 @@ index 751de06019a0e..f19e648307398 100644 #endif unregister_netdev(netdev); -@@ -3974,6 +3978,9 @@ static int igb_sw_init(struct igb_adapter *adapter) +@@ -3974,6 +3982,9 @@ static int igb_sw_init(struct igb_adapter *adapter) spin_lock_init(&adapter->nfc_lock); spin_lock_init(&adapter->stats64_lock); @@ -198682,7 +240538,7 @@ index 751de06019a0e..f19e648307398 100644 #ifdef CONFIG_PCI_IOV switch (hw->mac.type) { case e1000_82576: -@@ -4345,7 +4352,18 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring) +@@ -4345,7 +4356,18 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring) { struct igb_adapter *adapter = netdev_priv(rx_ring->netdev); struct device *dev = rx_ring->dev; @@ -198702,7 +240558,7 @@ index 751de06019a0e..f19e648307398 100644 size = sizeof(struct igb_rx_buffer) * rx_ring->count; -@@ -4368,14 +4386,10 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring) +@@ -4368,14 +4390,10 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring) rx_ring->xdp_prog = adapter->xdp_prog; @@ -198718,7 +240574,7 @@ index 751de06019a0e..f19e648307398 100644 vfree(rx_ring->rx_buffer_info); rx_ring->rx_buffer_info = NULL; dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n"); -@@ -4812,8 +4826,11 @@ static void igb_clean_tx_ring(struct igb_ring *tx_ring) +@@ -4812,8 +4830,11 @@ static void igb_clean_tx_ring(struct igb_ring *tx_ring) while (i != tx_ring->next_to_use) { union e1000_adv_tx_desc *eop_desc, *tx_desc; @@ -198732,7 +240588,7 @@ index 751de06019a0e..f19e648307398 100644 /* unmap skb header data */ dma_unmap_single(tx_ring->dev, -@@ -5498,7 +5515,8 @@ static void igb_watchdog_task(struct work_struct *work) +@@ -5498,7 +5519,8 @@ static void igb_watchdog_task(struct work_struct *work) break; } @@ -198742,7 +240598,16 @@ index 751de06019a0e..f19e648307398 100644 goto no_wait; /* wait for Remote receiver status OK */ -@@ -7641,6 +7659,20 @@ static int igb_set_vf_mac_filter(struct igb_adapter *adapter, const int vf, +@@ -7392,7 +7414,7 @@ static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf) + { + struct e1000_hw *hw = &adapter->hw; + unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses; +- u32 reg, msgbuf[3]; ++ u32 reg, msgbuf[3] = {}; + u8 *addr = (u8 *)(&msgbuf[1]); + + /* process all the same items cleared in a function level reset */ +@@ -7641,6 +7663,20 @@ static int igb_set_vf_mac_filter(struct igb_adapter *adapter, const int vf, struct vf_mac_filter *entry = NULL; int ret = 0; @@ -198763,7 +240628,7 @@ index 751de06019a0e..f19e648307398 100644 switch (info) { case E1000_VF_MAC_FILTER_CLR: /* remove all unicast MAC filters related to the current VF */ -@@ -7654,20 +7686,6 @@ static int igb_set_vf_mac_filter(struct igb_adapter *adapter, const int vf, +@@ -7654,20 +7690,6 @@ static int igb_set_vf_mac_filter(struct igb_adapter *adapter, const int vf, } break; case E1000_VF_MAC_FILTER_ADD: @@ -198784,7 +240649,7 @@ index 751de06019a0e..f19e648307398 100644 /* try to find empty slot in the list */ list_for_each(pos, &adapter->vf_macs.l) { entry = list_entry(pos, struct vf_mac_filter, l); -@@ -7835,8 +7853,10 @@ unlock: +@@ -7835,8 +7857,10 @@ unlock: static void igb_msg_task(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; @@ -198795,7 +240660,7 @@ index 751de06019a0e..f19e648307398 100644 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) { /* process any reset requests */ if (!igb_check_for_rst(hw, vf)) -@@ -7850,6 +7870,7 @@ static void igb_msg_task(struct igb_adapter *adapter) +@@ -7850,6 +7874,7 @@ static void igb_msg_task(struct igb_adapter *adapter) if (!igb_check_for_ack(hw, vf)) igb_rcv_ack_from_vf(adapter, vf); } @@ -198803,7 +240668,7 @@ index 751de06019a0e..f19e648307398 100644 } /** -@@ -8019,7 +8040,7 @@ static int igb_poll(struct napi_struct *napi, int budget) +@@ -8019,7 +8044,7 @@ static int igb_poll(struct napi_struct *napi, int budget) if (likely(napi_complete_done(napi, work_done))) igb_ring_irq_enable(q_vector); @@ -198812,7 +240677,7 @@ index 751de06019a0e..f19e648307398 100644 } /** -@@ -9247,7 +9268,7 @@ static int __maybe_unused igb_suspend(struct device *dev) +@@ -9247,7 +9272,7 @@ static int __maybe_unused igb_suspend(struct device *dev) return __igb_shutdown(to_pci_dev(dev), NULL, 0); } @@ -198821,7 +240686,7 @@ index 751de06019a0e..f19e648307398 100644 { struct pci_dev *pdev = to_pci_dev(dev); struct net_device *netdev = pci_get_drvdata(pdev); -@@ -9290,17 +9311,24 @@ static int __maybe_unused igb_resume(struct device *dev) +@@ -9290,17 +9315,24 @@ static int __maybe_unused igb_resume(struct device *dev) wr32(E1000_WUS, ~0); @@ -198848,7 +240713,7 @@ index 751de06019a0e..f19e648307398 100644 static int __maybe_unused igb_runtime_idle(struct device *dev) { struct net_device *netdev = dev_get_drvdata(dev); -@@ -9319,7 +9347,7 @@ static int __maybe_unused igb_runtime_suspend(struct device *dev) +@@ -9319,7 +9351,7 @@ static int __maybe_unused igb_runtime_suspend(struct device *dev) static int __maybe_unused igb_runtime_resume(struct device *dev) { @@ -198857,7 +240722,7 @@ index 751de06019a0e..f19e648307398 100644 } static void igb_shutdown(struct pci_dev *pdev) -@@ -9435,7 +9463,7 @@ static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev, +@@ -9435,7 +9467,7 @@ static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev, * @pdev: Pointer to PCI device * * Restart the card from scratch, as if from a cold-boot. Implementation @@ -198866,7 +240731,7 @@ index 751de06019a0e..f19e648307398 100644 **/ static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev) { -@@ -9475,7 +9503,7 @@ static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev) +@@ -9475,7 +9507,7 @@ static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev) * * This callback is called when the error recovery driver tells us that * its OK to resume normal operation. Implementation resembles the @@ -198875,7 +240740,7 @@ index 751de06019a0e..f19e648307398 100644 */ static void igb_io_resume(struct pci_dev *pdev) { -@@ -9805,11 +9833,10 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) +@@ -9805,11 +9837,10 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) struct e1000_hw *hw = &adapter->hw; u32 dmac_thr; u16 hwm; @@ -198888,7 +240753,7 @@ index 751de06019a0e..f19e648307398 100644 /* force threshold to 0. */ wr32(E1000_DMCTXTH, 0); -@@ -9842,7 +9869,6 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) +@@ -9842,7 +9873,6 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) /* Disable BMC-to-OS Watchdog Enable */ if (hw->mac.type != e1000_i354) reg &= ~E1000_DMACR_DC_BMC2OSW_EN; @@ -198896,7 +240761,7 @@ index 751de06019a0e..f19e648307398 100644 wr32(E1000_DMACR, reg); /* no lower threshold to disable -@@ -9859,12 +9885,12 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) +@@ -9859,12 +9889,12 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) */ wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE - (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6); @@ -198925,6 +240790,50 @@ index d32e72d953c8d..d051918dfdff9 100644 kfree(adapter->tx_ring); kfree(adapter->rx_ring); err_sw_init: +diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h +index 3e386c38d016c..66678cd72a6cd 100644 +--- a/drivers/net/ethernet/intel/igc/igc.h ++++ b/drivers/net/ethernet/intel/igc/igc.h +@@ -94,6 +94,8 @@ struct igc_ring { + u8 queue_index; /* logical index of the ring*/ + u8 reg_idx; /* physical index of the ring */ + bool launchtime_enable; /* true if LaunchTime is enabled */ ++ ktime_t last_tx_cycle; /* end of the cycle with a launchtime transmission */ ++ ktime_t last_ff_cycle; /* Last cycle with an active first flag */ + + u32 start_time; + u32 end_time; +@@ -182,6 +184,7 @@ struct igc_adapter { + + ktime_t base_time; + ktime_t cycle_time; ++ bool qbv_enable; + + /* OS defined structs */ + struct pci_dev *pdev; +diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h +index a4bbee7487984..60d0ca69ceca9 100644 +--- a/drivers/net/ethernet/intel/igc/igc_defines.h ++++ b/drivers/net/ethernet/intel/igc/igc_defines.h +@@ -324,6 +324,8 @@ + #define IGC_ADVTXD_L4LEN_SHIFT 8 /* Adv ctxt L4LEN shift */ + #define IGC_ADVTXD_MSS_SHIFT 16 /* Adv ctxt MSS shift */ + ++#define IGC_ADVTXD_TSN_CNTX_FIRST 0x00000080 ++ + /* Transmit Control */ + #define IGC_TCTL_EN 0x00000002 /* enable Tx */ + #define IGC_TCTL_PSP 0x00000008 /* pad short packets */ +@@ -467,7 +469,9 @@ + #define IGC_TSAUXC_EN_TT0 BIT(0) /* Enable target time 0. */ + #define IGC_TSAUXC_EN_TT1 BIT(1) /* Enable target time 1. */ + #define IGC_TSAUXC_EN_CLK0 BIT(2) /* Enable Configurable Frequency Clock 0. */ ++#define IGC_TSAUXC_ST0 BIT(4) /* Start Clock 0 Toggle on Target Time 0. */ + #define IGC_TSAUXC_EN_CLK1 BIT(5) /* Enable Configurable Frequency Clock 1. */ ++#define IGC_TSAUXC_ST1 BIT(7) /* Start Clock 1 Toggle on Target Time 1. */ + #define IGC_TSAUXC_EN_TS0 BIT(8) /* Enable hardware timestamp 0. */ + #define IGC_TSAUXC_AUTT0 BIT(9) /* Auxiliary Timestamp Taken. */ + #define IGC_TSAUXC_EN_TS1 BIT(10) /* Enable hardware timestamp 0. */ diff --git a/drivers/net/ethernet/intel/igc/igc_i225.c b/drivers/net/ethernet/intel/igc/igc_i225.c index b2ef9fde97b38..a0e2a404d5355 100644 --- a/drivers/net/ethernet/intel/igc/igc_i225.c @@ -198957,7 +240866,7 @@ index b2ef9fde97b38..a0e2a404d5355 100644 } } diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c -index 0e19b4d02e628..2a84f57ea68b4 100644 +index 0e19b4d02e628..bde3fea2c442e 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -504,6 +504,9 @@ int igc_setup_rx_resources(struct igc_ring *rx_ring) @@ -198970,7 +240879,267 @@ index 0e19b4d02e628..2a84f57ea68b4 100644 res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, ndev, index, rx_ring->q_vector->napi.napi_id); if (res < 0) { -@@ -2434,21 +2437,24 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) +@@ -996,25 +999,118 @@ static int igc_write_mc_addr_list(struct net_device *netdev) + return netdev_mc_count(netdev); + } + +-static __le32 igc_tx_launchtime(struct igc_adapter *adapter, ktime_t txtime) ++static __le32 igc_tx_launchtime(struct igc_ring *ring, ktime_t txtime, ++ bool *first_flag, bool *insert_empty) + { ++ struct igc_adapter *adapter = netdev_priv(ring->netdev); + ktime_t cycle_time = adapter->cycle_time; + ktime_t base_time = adapter->base_time; ++ ktime_t now = ktime_get_clocktai(); ++ ktime_t baset_est, end_of_cycle; + u32 launchtime; ++ s64 n; + +- /* FIXME: when using ETF together with taprio, we may have a +- * case where 'delta' is larger than the cycle_time, this may +- * cause problems if we don't read the current value of +- * IGC_BASET, as the value writen into the launchtime +- * descriptor field may be misinterpreted. ++ n = div64_s64(ktime_sub_ns(now, base_time), cycle_time); ++ ++ baset_est = ktime_add_ns(base_time, cycle_time * (n)); ++ end_of_cycle = ktime_add_ns(baset_est, cycle_time); ++ ++ if (ktime_compare(txtime, end_of_cycle) >= 0) { ++ if (baset_est != ring->last_ff_cycle) { ++ *first_flag = true; ++ ring->last_ff_cycle = baset_est; ++ ++ if (ktime_compare(txtime, ring->last_tx_cycle) > 0) ++ *insert_empty = true; ++ } ++ } ++ ++ /* Introducing a window at end of cycle on which packets ++ * potentially not honor launchtime. Window of 5us chosen ++ * considering software update the tail pointer and packets ++ * are dma'ed to packet buffer. + */ +- div_s64_rem(ktime_sub_ns(txtime, base_time), cycle_time, &launchtime); ++ if ((ktime_sub_ns(end_of_cycle, now) < 5 * NSEC_PER_USEC)) ++ netdev_warn(ring->netdev, "Packet with txtime=%llu may not be honoured\n", ++ txtime); ++ ++ ring->last_tx_cycle = end_of_cycle; ++ ++ launchtime = ktime_sub_ns(txtime, baset_est); ++ if (launchtime > 0) ++ div_s64_rem(launchtime, cycle_time, &launchtime); ++ else ++ launchtime = 0; + + return cpu_to_le32(launchtime); + } + ++static int igc_init_empty_frame(struct igc_ring *ring, ++ struct igc_tx_buffer *buffer, ++ struct sk_buff *skb) ++{ ++ unsigned int size; ++ dma_addr_t dma; ++ ++ size = skb_headlen(skb); ++ ++ dma = dma_map_single(ring->dev, skb->data, size, DMA_TO_DEVICE); ++ if (dma_mapping_error(ring->dev, dma)) { ++ netdev_err_once(ring->netdev, "Failed to map DMA for TX\n"); ++ return -ENOMEM; ++ } ++ ++ buffer->skb = skb; ++ buffer->protocol = 0; ++ buffer->bytecount = skb->len; ++ buffer->gso_segs = 1; ++ buffer->time_stamp = jiffies; ++ dma_unmap_len_set(buffer, len, skb->len); ++ dma_unmap_addr_set(buffer, dma, dma); ++ ++ return 0; ++} ++ ++static int igc_init_tx_empty_descriptor(struct igc_ring *ring, ++ struct sk_buff *skb, ++ struct igc_tx_buffer *first) ++{ ++ union igc_adv_tx_desc *desc; ++ u32 cmd_type, olinfo_status; ++ int err; ++ ++ if (!igc_desc_unused(ring)) ++ return -EBUSY; ++ ++ err = igc_init_empty_frame(ring, first, skb); ++ if (err) ++ return err; ++ ++ cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT | ++ IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD | ++ first->bytecount; ++ olinfo_status = first->bytecount << IGC_ADVTXD_PAYLEN_SHIFT; ++ ++ desc = IGC_TX_DESC(ring, ring->next_to_use); ++ desc->read.cmd_type_len = cpu_to_le32(cmd_type); ++ desc->read.olinfo_status = cpu_to_le32(olinfo_status); ++ desc->read.buffer_addr = cpu_to_le64(dma_unmap_addr(first, dma)); ++ ++ netdev_tx_sent_queue(txring_txq(ring), skb->len); ++ ++ first->next_to_watch = desc; ++ ++ ring->next_to_use++; ++ if (ring->next_to_use == ring->count) ++ ring->next_to_use = 0; ++ ++ return 0; ++} ++ ++#define IGC_EMPTY_FRAME_SIZE 60 ++ + static void igc_tx_ctxtdesc(struct igc_ring *tx_ring, +- struct igc_tx_buffer *first, ++ __le32 launch_time, bool first_flag, + u32 vlan_macip_lens, u32 type_tucmd, + u32 mss_l4len_idx) + { +@@ -1033,26 +1129,17 @@ static void igc_tx_ctxtdesc(struct igc_ring *tx_ring, + if (test_bit(IGC_RING_FLAG_TX_CTX_IDX, &tx_ring->flags)) + mss_l4len_idx |= tx_ring->reg_idx << 4; + ++ if (first_flag) ++ mss_l4len_idx |= IGC_ADVTXD_TSN_CNTX_FIRST; ++ + context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens); + context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd); + context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); +- +- /* We assume there is always a valid Tx time available. Invalid times +- * should have been handled by the upper layers. +- */ +- if (tx_ring->launchtime_enable) { +- struct igc_adapter *adapter = netdev_priv(tx_ring->netdev); +- ktime_t txtime = first->skb->tstamp; +- +- skb_txtime_consumed(first->skb); +- context_desc->launch_time = igc_tx_launchtime(adapter, +- txtime); +- } else { +- context_desc->launch_time = 0; +- } ++ context_desc->launch_time = launch_time; + } + +-static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first) ++static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first, ++ __le32 launch_time, bool first_flag) + { + struct sk_buff *skb = first->skb; + u32 vlan_macip_lens = 0; +@@ -1092,7 +1179,8 @@ no_csum: + vlan_macip_lens |= skb_network_offset(skb) << IGC_ADVTXD_MACLEN_SHIFT; + vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK; + +- igc_tx_ctxtdesc(tx_ring, first, vlan_macip_lens, type_tucmd, 0); ++ igc_tx_ctxtdesc(tx_ring, launch_time, first_flag, ++ vlan_macip_lens, type_tucmd, 0); + } + + static int __igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size) +@@ -1316,6 +1404,7 @@ dma_error: + + static int igc_tso(struct igc_ring *tx_ring, + struct igc_tx_buffer *first, ++ __le32 launch_time, bool first_flag, + u8 *hdr_len) + { + u32 vlan_macip_lens, type_tucmd, mss_l4len_idx; +@@ -1402,8 +1491,8 @@ static int igc_tso(struct igc_ring *tx_ring, + vlan_macip_lens |= (ip.hdr - skb->data) << IGC_ADVTXD_MACLEN_SHIFT; + vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK; + +- igc_tx_ctxtdesc(tx_ring, first, vlan_macip_lens, +- type_tucmd, mss_l4len_idx); ++ igc_tx_ctxtdesc(tx_ring, launch_time, first_flag, ++ vlan_macip_lens, type_tucmd, mss_l4len_idx); + + return 1; + } +@@ -1411,11 +1500,14 @@ static int igc_tso(struct igc_ring *tx_ring, + static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, + struct igc_ring *tx_ring) + { ++ bool first_flag = false, insert_empty = false; + u16 count = TXD_USE_COUNT(skb_headlen(skb)); + __be16 protocol = vlan_get_protocol(skb); + struct igc_tx_buffer *first; ++ __le32 launch_time = 0; + u32 tx_flags = 0; + unsigned short f; ++ ktime_t txtime; + u8 hdr_len = 0; + int tso = 0; + +@@ -1429,11 +1521,40 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, + count += TXD_USE_COUNT(skb_frag_size( + &skb_shinfo(skb)->frags[f])); + +- if (igc_maybe_stop_tx(tx_ring, count + 3)) { ++ if (igc_maybe_stop_tx(tx_ring, count + 5)) { + /* this is a hard error */ + return NETDEV_TX_BUSY; + } + ++ if (!tx_ring->launchtime_enable) ++ goto done; ++ ++ txtime = skb->tstamp; ++ skb->tstamp = ktime_set(0, 0); ++ launch_time = igc_tx_launchtime(tx_ring, txtime, &first_flag, &insert_empty); ++ ++ if (insert_empty) { ++ struct igc_tx_buffer *empty_info; ++ struct sk_buff *empty; ++ void *data; ++ ++ empty_info = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; ++ empty = alloc_skb(IGC_EMPTY_FRAME_SIZE, GFP_ATOMIC); ++ if (!empty) ++ goto done; ++ ++ data = skb_put(empty, IGC_EMPTY_FRAME_SIZE); ++ memset(data, 0, IGC_EMPTY_FRAME_SIZE); ++ ++ igc_tx_ctxtdesc(tx_ring, 0, false, 0, 0, 0); ++ ++ if (igc_init_tx_empty_descriptor(tx_ring, ++ empty, ++ empty_info) < 0) ++ dev_kfree_skb_any(empty); ++ } ++ ++done: + /* record the location of the first descriptor for this packet */ + first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; + first->type = IGC_TX_BUFFER_TYPE_SKB; +@@ -1470,11 +1591,11 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, + first->tx_flags = tx_flags; + first->protocol = protocol; + +- tso = igc_tso(tx_ring, first, &hdr_len); ++ tso = igc_tso(tx_ring, first, launch_time, first_flag, &hdr_len); + if (tso < 0) + goto out_drop; + else if (!tso) +- igc_tx_csum(tx_ring, first); ++ igc_tx_csum(tx_ring, first, launch_time, first_flag); + + igc_tx_map(tx_ring, first, hdr_len); + +@@ -2434,21 +2555,24 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring, struct xdp_buff *xdp) { @@ -199002,7 +241171,52 @@ index 0e19b4d02e628..2a84f57ea68b4 100644 return skb; } -@@ -5466,6 +5472,9 @@ static irqreturn_t igc_intr_msi(int irq, void *data) +@@ -2768,7 +2892,9 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) + if (tx_buffer->next_to_watch && + time_after(jiffies, tx_buffer->time_stamp + + (adapter->tx_timeout_factor * HZ)) && +- !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF)) { ++ !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF) && ++ (rd32(IGC_TDH(tx_ring->reg_idx)) != ++ readl(tx_ring->tail))) { + /* detected Tx unit hang */ + netdev_err(tx_ring->netdev, + "Detected Tx Unit Hang\n" +@@ -4895,6 +5021,24 @@ static int igc_change_mtu(struct net_device *netdev, int new_mtu) + return 0; + } + ++/** ++ * igc_tx_timeout - Respond to a Tx Hang ++ * @netdev: network interface device structure ++ * @txqueue: queue number that timed out ++ **/ ++static void igc_tx_timeout(struct net_device *netdev, ++ unsigned int __always_unused txqueue) ++{ ++ struct igc_adapter *adapter = netdev_priv(netdev); ++ struct igc_hw *hw = &adapter->hw; ++ ++ /* Do the reset outside of interrupt context */ ++ adapter->tx_timeout_count++; ++ schedule_work(&adapter->reset_task); ++ wr32(IGC_EICS, ++ (adapter->eims_enable_mask & ~adapter->eims_other)); ++} ++ + /** + * igc_get_stats64 - Get System Network Statistics + * @netdev: network interface device structure +@@ -5322,7 +5466,7 @@ static void igc_watchdog_task(struct work_struct *work) + case SPEED_100: + case SPEED_1000: + case SPEED_2500: +- adapter->tx_timeout_factor = 7; ++ adapter->tx_timeout_factor = 1; + break; + } + +@@ -5466,6 +5610,9 @@ static irqreturn_t igc_intr_msi(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } @@ -199012,7 +241226,7 @@ index 0e19b4d02e628..2a84f57ea68b4 100644 napi_schedule(&q_vector->napi); return IRQ_HANDLED; -@@ -5509,6 +5518,9 @@ static irqreturn_t igc_intr(int irq, void *data) +@@ -5509,6 +5656,9 @@ static irqreturn_t igc_intr(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } @@ -199022,7 +241236,125 @@ index 0e19b4d02e628..2a84f57ea68b4 100644 napi_schedule(&q_vector->napi); return IRQ_HANDLED; -@@ -6147,6 +6159,9 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg) +@@ -5789,9 +5939,10 @@ static bool validate_schedule(struct igc_adapter *adapter, + return false; + + for (n = 0; n < qopt->num_entries; n++) { +- const struct tc_taprio_sched_entry *e; ++ const struct tc_taprio_sched_entry *e, *prev; + int i; + ++ prev = n ? &qopt->entries[n - 1] : NULL; + e = &qopt->entries[n]; + + /* i225 only supports "global" frame preemption +@@ -5804,7 +5955,12 @@ static bool validate_schedule(struct igc_adapter *adapter, + if (e->gate_mask & BIT(i)) + queue_uses[i]++; + +- if (queue_uses[i] > 1) ++ /* There are limitations: A single queue cannot be ++ * opened and closed multiple times per cycle unless the ++ * gate stays open. Check for it. ++ */ ++ if (queue_uses[i] > 1 && ++ !(prev->gate_mask & BIT(i))) + return false; + } + } +@@ -5848,12 +6004,19 @@ static int igc_tsn_clear_schedule(struct igc_adapter *adapter) + static int igc_save_qbv_schedule(struct igc_adapter *adapter, + struct tc_taprio_qopt_offload *qopt) + { ++ bool queue_configured[IGC_MAX_TX_QUEUES] = { }; + u32 start_time = 0, end_time = 0; + size_t n; ++ int i; ++ ++ adapter->qbv_enable = qopt->enable; + + if (!qopt->enable) + return igc_tsn_clear_schedule(adapter); + ++ if (qopt->base_time < 0) ++ return -ERANGE; ++ + if (adapter->base_time) + return -EALREADY; + +@@ -5863,28 +6026,58 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, + adapter->cycle_time = qopt->cycle_time; + adapter->base_time = qopt->base_time; + +- /* FIXME: be a little smarter about cases when the gate for a +- * queue stays open for more than one entry. +- */ + for (n = 0; n < qopt->num_entries; n++) { + struct tc_taprio_sched_entry *e = &qopt->entries[n]; +- int i; + + end_time += e->interval; + ++ /* If any of the conditions below are true, we need to manually ++ * control the end time of the cycle. ++ * 1. Qbv users can specify a cycle time that is not equal ++ * to the total GCL intervals. Hence, recalculation is ++ * necessary here to exclude the time interval that ++ * exceeds the cycle time. ++ * 2. According to IEEE Std. 802.1Q-2018 section 8.6.9.2, ++ * once the end of the list is reached, it will switch ++ * to the END_OF_CYCLE state and leave the gates in the ++ * same state until the next cycle is started. ++ */ ++ if (end_time > adapter->cycle_time || ++ n + 1 == qopt->num_entries) ++ end_time = adapter->cycle_time; ++ + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *ring = adapter->tx_ring[i]; + + if (!(e->gate_mask & BIT(i))) + continue; + +- ring->start_time = start_time; ++ /* Check whether a queue stays open for more than one ++ * entry. If so, keep the start and advance the end ++ * time. ++ */ ++ if (!queue_configured[i]) ++ ring->start_time = start_time; + ring->end_time = end_time; ++ ++ queue_configured[i] = true; + } + + start_time += e->interval; + } + ++ /* Check whether a queue gets configured. ++ * If not, set the start and end time to be end time. ++ */ ++ for (i = 0; i < adapter->num_tx_queues; i++) { ++ if (!queue_configured[i]) { ++ struct igc_ring *ring = adapter->tx_ring[i]; ++ ++ ring->start_time = end_time; ++ ring->end_time = end_time; ++ } ++ } ++ + return 0; + } + +@@ -6091,6 +6284,7 @@ static const struct net_device_ops igc_netdev_ops = { + .ndo_set_rx_mode = igc_set_rx_mode, + .ndo_set_mac_address = igc_set_mac, + .ndo_change_mtu = igc_change_mtu, ++ .ndo_tx_timeout = igc_tx_timeout, + .ndo_get_stats64 = igc_get_stats64, + .ndo_fix_features = igc_fix_features, + .ndo_set_features = igc_set_features, +@@ -6147,6 +6341,9 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg) u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr); u32 value = 0; @@ -199073,10 +241405,80 @@ index 5cad31c3c7b09..6961f65d36b9a 100644 } else { ret_val = igc_read_xmdio_reg(hw, (u16)offset, dev_addr, diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c -index 0f021909b430a..8e521f99b80ae 100644 +index 0f021909b430a..743c31659709b 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c -@@ -768,12 +768,25 @@ int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr) +@@ -323,7 +323,7 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp, + ts = ns_to_timespec64(ns); + if (rq->perout.index == 1) { + if (use_freq) { +- tsauxc_mask = IGC_TSAUXC_EN_CLK1; ++ tsauxc_mask = IGC_TSAUXC_EN_CLK1 | IGC_TSAUXC_ST1; + tsim_mask = 0; + } else { + tsauxc_mask = IGC_TSAUXC_EN_TT1; +@@ -334,7 +334,7 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp, + freqout = IGC_FREQOUT1; + } else { + if (use_freq) { +- tsauxc_mask = IGC_TSAUXC_EN_CLK0; ++ tsauxc_mask = IGC_TSAUXC_EN_CLK0 | IGC_TSAUXC_ST0; + tsim_mask = 0; + } else { + tsauxc_mask = IGC_TSAUXC_EN_TT0; +@@ -348,10 +348,12 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp, + tsauxc = rd32(IGC_TSAUXC); + tsim = rd32(IGC_TSIM); + if (rq->perout.index == 1) { +- tsauxc &= ~(IGC_TSAUXC_EN_TT1 | IGC_TSAUXC_EN_CLK1); ++ tsauxc &= ~(IGC_TSAUXC_EN_TT1 | IGC_TSAUXC_EN_CLK1 | ++ IGC_TSAUXC_ST1); + tsim &= ~IGC_TSICR_TT1; + } else { +- tsauxc &= ~(IGC_TSAUXC_EN_TT0 | IGC_TSAUXC_EN_CLK0); ++ tsauxc &= ~(IGC_TSAUXC_EN_TT0 | IGC_TSAUXC_EN_CLK0 | ++ IGC_TSAUXC_ST0); + tsim &= ~IGC_TSICR_TT0; + } + if (on) { +@@ -416,10 +418,12 @@ static int igc_ptp_verify_pin(struct ptp_clock_info *ptp, unsigned int pin, + * + * We need to convert the system time value stored in the RX/TXSTMP registers + * into a hwtstamp which can be used by the upper level timestamping functions. ++ * ++ * Returns 0 on success. + **/ +-static void igc_ptp_systim_to_hwtstamp(struct igc_adapter *adapter, +- struct skb_shared_hwtstamps *hwtstamps, +- u64 systim) ++static int igc_ptp_systim_to_hwtstamp(struct igc_adapter *adapter, ++ struct skb_shared_hwtstamps *hwtstamps, ++ u64 systim) + { + switch (adapter->hw.mac.type) { + case igc_i225: +@@ -429,8 +433,9 @@ static void igc_ptp_systim_to_hwtstamp(struct igc_adapter *adapter, + systim & 0xFFFFFFFF); + break; + default: +- break; ++ return -EINVAL; + } ++ return 0; + } + + /** +@@ -655,7 +660,8 @@ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) + + regval = rd32(IGC_TXSTMPL); + regval |= (u64)rd32(IGC_TXSTMPH) << 32; +- igc_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval); ++ if (igc_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval)) ++ return; + + switch (adapter->link_speed) { + case SPEED_10: +@@ -768,12 +774,25 @@ int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr) */ static bool igc_is_crosststamp_supported(struct igc_adapter *adapter) { @@ -199104,7 +241506,7 @@ index 0f021909b430a..8e521f99b80ae 100644 return convert_art_ns_to_tsc(tstamp); #else return (struct system_counterval_t) { }; -@@ -983,6 +996,17 @@ static void igc_ptp_time_restore(struct igc_adapter *adapter) +@@ -983,6 +1002,17 @@ static void igc_ptp_time_restore(struct igc_adapter *adapter) igc_ptp_write_i225(adapter, &ts); } @@ -199122,7 +241524,7 @@ index 0f021909b430a..8e521f99b80ae 100644 /** * igc_ptp_suspend - Disable PTP work items and prepare for suspend * @adapter: Board private structure -@@ -1000,8 +1024,10 @@ void igc_ptp_suspend(struct igc_adapter *adapter) +@@ -1000,8 +1030,10 @@ void igc_ptp_suspend(struct igc_adapter *adapter) adapter->ptp_tx_skb = NULL; clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state); @@ -199155,11 +241557,51 @@ index e197a33d93a03..026c3b65fc37a 100644 +#define IGC_REMOVED(h) unlikely(!(h)) + #endif +diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c +index 0fce22de2ab85..356c7455c5cee 100644 +--- a/drivers/net/ethernet/intel/igc/igc_tsn.c ++++ b/drivers/net/ethernet/intel/igc/igc_tsn.c +@@ -36,7 +36,7 @@ static unsigned int igc_tsn_new_flags(struct igc_adapter *adapter) + { + unsigned int new_flags = adapter->flags & ~IGC_FLAG_TSN_ANY_ENABLED; + +- if (adapter->base_time) ++ if (adapter->qbv_enable) + new_flags |= IGC_FLAG_TSN_QBV_ENABLED; + + if (is_any_launchtime(adapter)) +@@ -110,15 +110,8 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter) + wr32(IGC_STQT(i), ring->start_time); + wr32(IGC_ENDQT(i), ring->end_time); + +- if (adapter->base_time) { +- /* If we have a base_time we are in "taprio" +- * mode and we need to be strict about the +- * cycles: only transmit a packet if it can be +- * completed during that cycle. +- */ +- txqctl |= IGC_TXQCTL_STRICT_CYCLE | +- IGC_TXQCTL_STRICT_END; +- } ++ txqctl |= IGC_TXQCTL_STRICT_CYCLE | ++ IGC_TXQCTL_STRICT_END; + + if (ring->launchtime_enable) + txqctl |= IGC_TXQCTL_QUEUE_MODE_LAUNCHT; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h -index a604552fa634e..c375a5d54b40d 100644 +index a604552fa634e..737590a0d849e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h -@@ -770,6 +770,7 @@ struct ixgbe_adapter { +@@ -67,6 +67,8 @@ + #define IXGBE_RXBUFFER_4K 4096 + #define IXGBE_MAX_RXBUFFER 16384 /* largest size for a single descriptor */ + ++#define IXGBE_PKT_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2)) ++ + /* Attempt to maximize the headroom available for incoming frames. We + * use a 2K buffer for receives and need 1536/1534 to store the data for + * the frame. This leaves us with 512 bytes of room. From that we need +@@ -770,6 +772,7 @@ struct ixgbe_adapter { #ifdef CONFIG_IXGBE_IPSEC struct ixgbe_ipsec *ipsec; #endif /* CONFIG_IXGBE_IPSEC */ @@ -199182,7 +241624,7 @@ index e596e1a9fc757..69d11ff7677d6 100644 goto err_out; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c -index 13c4782b920a7..8cb20af51ecd6 100644 +index 13c4782b920a7..6fb9c18297bc8 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -5526,6 +5526,10 @@ static int ixgbe_non_sfp_link_config(struct ixgbe_hw *hw) @@ -199206,6 +241648,95 @@ index 13c4782b920a7..8cb20af51ecd6 100644 #ifdef CONFIG_IXGBE_DCB ixgbe_init_dcb(adapter); #endif +@@ -6722,6 +6729,18 @@ static void ixgbe_free_all_rx_resources(struct ixgbe_adapter *adapter) + ixgbe_free_rx_resources(adapter->rx_ring[i]); + } + ++/** ++ * ixgbe_max_xdp_frame_size - returns the maximum allowed frame size for XDP ++ * @adapter: device handle, pointer to adapter ++ */ ++static int ixgbe_max_xdp_frame_size(struct ixgbe_adapter *adapter) ++{ ++ if (PAGE_SIZE >= 8192 || adapter->flags2 & IXGBE_FLAG2_RX_LEGACY) ++ return IXGBE_RXBUFFER_2K; ++ else ++ return IXGBE_RXBUFFER_3K; ++} ++ + /** + * ixgbe_change_mtu - Change the Maximum Transfer Unit + * @netdev: network interface device structure +@@ -6733,18 +6752,12 @@ static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu) + { + struct ixgbe_adapter *adapter = netdev_priv(netdev); + +- if (adapter->xdp_prog) { +- int new_frame_size = new_mtu + ETH_HLEN + ETH_FCS_LEN + +- VLAN_HLEN; +- int i; +- +- for (i = 0; i < adapter->num_rx_queues; i++) { +- struct ixgbe_ring *ring = adapter->rx_ring[i]; ++ if (ixgbe_enabled_xdp_adapter(adapter)) { ++ int new_frame_size = new_mtu + IXGBE_PKT_HDR_PAD; + +- if (new_frame_size > ixgbe_rx_bufsz(ring)) { +- e_warn(probe, "Requested MTU size is not supported with XDP\n"); +- return -EINVAL; +- } ++ if (new_frame_size > ixgbe_max_xdp_frame_size(adapter)) { ++ e_warn(probe, "Requested MTU size is not supported with XDP\n"); ++ return -EINVAL; + } + } + +diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +index 24aa97f993ca1..123dca9ce4683 100644 +--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c ++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +@@ -855,9 +855,11 @@ static struct pci_dev *ixgbe_get_first_secondary_devfn(unsigned int devfn) + rp_pdev = pci_get_domain_bus_and_slot(0, 0, devfn); + if (rp_pdev && rp_pdev->subordinate) { + bus = rp_pdev->subordinate->number; ++ pci_dev_put(rp_pdev); + return pci_get_domain_bus_and_slot(0, bus, 0); + } + ++ pci_dev_put(rp_pdev); + return NULL; + } + +@@ -874,6 +876,7 @@ static bool ixgbe_x550em_a_has_mii(struct ixgbe_hw *hw) + struct ixgbe_adapter *adapter = hw->back; + struct pci_dev *pdev = adapter->pdev; + struct pci_dev *func0_pdev; ++ bool has_mii = false; + + /* For the C3000 family of SoCs (x550em_a) the internal ixgbe devices + * are always downstream of root ports @ 0000:00:16.0 & 0000:00:17.0 +@@ -884,15 +887,16 @@ static bool ixgbe_x550em_a_has_mii(struct ixgbe_hw *hw) + func0_pdev = ixgbe_get_first_secondary_devfn(PCI_DEVFN(0x16, 0)); + if (func0_pdev) { + if (func0_pdev == pdev) +- return true; +- else +- return false; ++ has_mii = true; ++ goto out; + } + func0_pdev = ixgbe_get_first_secondary_devfn(PCI_DEVFN(0x17, 0)); + if (func0_pdev == pdev) +- return true; ++ has_mii = true; + +- return false; ++out: ++ pci_dev_put(func0_pdev); ++ return has_mii; + } + + /** diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index 23ddfd79fc8b6..29be1d6eca436 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -199450,7 +241981,7 @@ index b1d22e4d5ec9c..b399b9c147172 100644 break; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c -index c714e1ecd3089..7ef2e1241a76e 100644 +index c714e1ecd3089..0e7ff15af9687 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1984,14 +1984,15 @@ static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter, @@ -199475,6 +242006,42 @@ index c714e1ecd3089..7ef2e1241a76e 100644 } /** +@@ -4858,6 +4859,8 @@ static struct pci_driver ixgbevf_driver = { + **/ + static int __init ixgbevf_init_module(void) + { ++ int err; ++ + pr_info("%s\n", ixgbevf_driver_string); + pr_info("%s\n", ixgbevf_copyright); + ixgbevf_wq = create_singlethread_workqueue(ixgbevf_driver_name); +@@ -4866,7 +4869,13 @@ static int __init ixgbevf_init_module(void) + return -ENOMEM; + } + +- return pci_register_driver(&ixgbevf_driver); ++ err = pci_register_driver(&ixgbevf_driver); ++ if (err) { ++ destroy_workqueue(ixgbevf_wq); ++ return err; ++ } ++ ++ return 0; + } + + module_init(ixgbevf_init_module); +diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c +index 62f8c52121822..057d655d17692 100644 +--- a/drivers/net/ethernet/lantiq_etop.c ++++ b/drivers/net/ethernet/lantiq_etop.c +@@ -466,7 +466,6 @@ ltq_etop_tx(struct sk_buff *skb, struct net_device *dev) + len = skb->len < ETH_ZLEN ? ETH_ZLEN : skb->len; + + if ((desc->ctl & (LTQ_DMA_OWN | LTQ_DMA_C)) || ch->skb[ch->dma.desc]) { +- dev_kfree_skb_any(skb); + netdev_err(dev, "tx ring full\n"); + netif_tx_stop_queue(txq); + return NETDEV_TX_BUSY; diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index fb78f17d734fe..b02f796b5422f 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c @@ -199514,10 +242081,18 @@ index a9bdbf0dcfe1e..5bb1cc8a2ce13 100644 return 0; } diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c -index 28d5ad296646a..90fd5588e20dd 100644 +index 28d5ad296646a..fc67e9d31f6da 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c -@@ -2700,6 +2700,16 @@ MODULE_DEVICE_TABLE(of, mv643xx_eth_shared_ids); +@@ -2477,6 +2477,7 @@ out_free: + for (i = 0; i < mp->rxq_count; i++) + rxq_deinit(mp->rxq + i); + out: ++ napi_disable(&mp->napi); + free_irq(dev->irq, dev); + + return err; +@@ -2700,6 +2701,16 @@ MODULE_DEVICE_TABLE(of, mv643xx_eth_shared_ids); static struct platform_device *port_platdev[3]; @@ -199534,7 +242109,7 @@ index 28d5ad296646a..90fd5588e20dd 100644 static int mv643xx_eth_shared_of_add_port(struct platform_device *pdev, struct device_node *pnp) { -@@ -2736,7 +2746,9 @@ static int mv643xx_eth_shared_of_add_port(struct platform_device *pdev, +@@ -2736,7 +2747,9 @@ static int mv643xx_eth_shared_of_add_port(struct platform_device *pdev, return -EINVAL; } @@ -199545,7 +242120,7 @@ index 28d5ad296646a..90fd5588e20dd 100644 mv643xx_eth_property(pnp, "tx-queue-size", ppd.tx_queue_size); mv643xx_eth_property(pnp, "tx-sram-addr", ppd.tx_sram_addr); -@@ -2800,21 +2812,13 @@ static int mv643xx_eth_shared_of_probe(struct platform_device *pdev) +@@ -2800,21 +2813,13 @@ static int mv643xx_eth_shared_of_probe(struct platform_device *pdev) ret = mv643xx_eth_shared_of_add_port(pdev, pnp); if (ret) { of_node_put(pnp); @@ -199587,6 +242162,19 @@ index 62a97c46fba05..ef878973b8597 100644 static struct platform_driver orion_mdio_driver = { .probe = orion_mdio_probe, +diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c +index 9d460a2706012..5c431a3697622 100644 +--- a/drivers/net/ethernet/marvell/mvneta.c ++++ b/drivers/net/ethernet/marvell/mvneta.c +@@ -4162,7 +4162,7 @@ static void mvneta_percpu_elect(struct mvneta_port *pp) + /* Use the cpu associated to the rxq when it is online, in all + * the other cases, use the cpu 0 which can't be offline. + */ +- if (cpu_online(pp->rxq_def)) ++ if (pp->rxq_def < nr_cpu_ids && cpu_online(pp->rxq_def)) + elected_cpu = pp->rxq_def; + + max_cpu = num_present_cpus(); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h index cf8acabb90ac1..72608a47d4e02 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h @@ -199630,7 +242218,7 @@ index 4a3baa7e01424..75e83ea2a926e 100644 mvpp2_root = debugfs_create_dir(MVPP2_DRIVER_NAME, NULL); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c -index d5c92e43f89e6..ae586f8895fce 100644 +index d5c92e43f89e6..524913c28f3b6 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -1605,7 +1605,7 @@ static void mvpp22_gop_fca_set_periodic_timer(struct mvpp2_port *port) @@ -199812,7 +242400,30 @@ index d5c92e43f89e6..ae586f8895fce 100644 if (err == 0) phy_power_off(port->comphy); } -@@ -7453,7 +7457,7 @@ static int mvpp2_probe(struct platform_device *pdev) +@@ -7352,6 +7356,7 @@ static int mvpp2_get_sram(struct platform_device *pdev, + struct mvpp2 *priv) + { + struct resource *res; ++ void __iomem *base; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 2); + if (!res) { +@@ -7362,9 +7367,12 @@ static int mvpp2_get_sram(struct platform_device *pdev, + return 0; + } + +- priv->cm3_base = devm_ioremap_resource(&pdev->dev, res); ++ base = devm_ioremap_resource(&pdev->dev, res); ++ if (IS_ERR(base)) ++ return PTR_ERR(base); + +- return PTR_ERR_OR_ZERO(priv->cm3_base); ++ priv->cm3_base = base; ++ return 0; + } + + static int mvpp2_probe(struct platform_device *pdev) +@@ -7453,7 +7461,7 @@ static int mvpp2_probe(struct platform_device *pdev) shared = num_present_cpus() - priv->nthreads; if (shared > 0) @@ -199821,7 +242432,7 @@ index d5c92e43f89e6..ae586f8895fce 100644 min_t(int, shared, MVPP2_MAX_THREADS)); for (i = 0; i < MVPP2_MAX_THREADS; i++) { -@@ -7706,7 +7710,18 @@ static struct platform_driver mvpp2_driver = { +@@ -7706,7 +7714,18 @@ static struct platform_driver mvpp2_driver = { }, }; @@ -199854,9 +242465,21 @@ index 3f982ccf2c85f..639893d870550 100644 depends on PCI depends on PTP_1588_CLOCK_OPTIONAL diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c -index 34a089b71e554..6b335139abe7f 100644 +index 34a089b71e554..fd0a31bf94fea 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +@@ -695,9 +695,9 @@ int cgx_lmac_rx_tx_enable(void *cgxd, int lmac_id, bool enable) + + cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_CFG); + if (enable) +- cfg |= CMR_EN | DATA_PKT_RX_EN | DATA_PKT_TX_EN; ++ cfg |= DATA_PKT_RX_EN | DATA_PKT_TX_EN; + else +- cfg &= ~(CMR_EN | DATA_PKT_RX_EN | DATA_PKT_TX_EN); ++ cfg &= ~(DATA_PKT_RX_EN | DATA_PKT_TX_EN); + cgx_write(cgx, lmac_id, CGXX_CMRX_CFG, cfg); + return 0; + } @@ -838,9 +838,6 @@ void cgx_lmac_ptp_config(void *cgxd, int lmac_id, bool enable) if (!cgx) return; @@ -199891,6 +242514,18 @@ index 34a089b71e554..6b335139abe7f 100644 }; static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id) +diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h +index ab1e4abdea38b..5714280a4252d 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h ++++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h +@@ -30,7 +30,6 @@ + #define CMR_P2X_SEL_SHIFT 59ULL + #define CMR_P2X_SEL_NIX0 1ULL + #define CMR_P2X_SEL_NIX1 2ULL +-#define CMR_EN BIT_ULL(55) + #define DATA_PKT_TX_EN BIT_ULL(53) + #define DATA_PKT_RX_EN BIT_ULL(54) + #define CGX_LMAC_TYPE_SHIFT 40 diff --git a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h index c38306b3384a7..b33e7d1d0851c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h @@ -201346,10 +243981,19 @@ index 1f90a7403392d..4895faa667b50 100644 /* Registers that can be accessed from PF/VF */ if ((offset & 0xFF000) == CPT_AF_LFX_CTL(0) || diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c -index 49d822a98adab..f001579569a2b 100644 +index 49d822a98adab..66d34699f160c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c -@@ -1131,6 +1131,8 @@ static void print_nix_cn10k_sq_ctx(struct seq_file *m, +@@ -441,6 +441,8 @@ static int rvu_dbg_rvu_pf_cgx_map_display(struct seq_file *filp, void *unused) + sprintf(lmac, "LMAC%d", lmac_id); + seq_printf(filp, "%s\t0x%x\t\tNIX%d\t\t%s\t%s\n", + dev_name(&pdev->dev), pcifunc, blkid, cgx, lmac); ++ ++ pci_dev_put(pdev); + } + return 0; + } +@@ -1131,6 +1133,8 @@ static void print_nix_cn10k_sq_ctx(struct seq_file *m, seq_printf(m, "W3: head_offset\t\t\t%d\nW3: smenq_next_sqb_vld\t\t%d\n\n", sq_ctx->head_offset, sq_ctx->smenq_next_sqb_vld); @@ -201358,8 +244002,16 @@ index 49d822a98adab..f001579569a2b 100644 seq_printf(m, "W4: next_sqb \t\t\t%llx\n\n", sq_ctx->next_sqb); seq_printf(m, "W5: tail_sqb \t\t\t%llx\n\n", sq_ctx->tail_sqb); seq_printf(m, "W6: smenq_sqb \t\t\t%llx\n\n", sq_ctx->smenq_sqb); +@@ -2125,6 +2129,7 @@ static int cgx_print_dmac_flt(struct seq_file *s, int lmac_id) + } + } + ++ pci_dev_put(pdev); + return 0; + } + diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c -index 6970540dc4709..603361c94786a 100644 +index 6970540dc4709..09892703cfd46 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -28,6 +28,7 @@ static int nix_verify_bandprof(struct nix_cn10k_aq_enq_req *req, @@ -201563,6 +244215,15 @@ index 6970540dc4709..603361c94786a 100644 nix_ctx_free(rvu, pfvf); nix_free_all_bandprof(rvu, pcifunc); +@@ -4736,6 +4832,8 @@ static int nix_setup_ipolicers(struct rvu *rvu, + ipolicer->ref_count = devm_kcalloc(rvu->dev, + ipolicer->band_prof.max, + sizeof(u16), GFP_KERNEL); ++ if (!ipolicer->ref_count) ++ return -ENOMEM; + } + + /* Set policer timeunit to 2us ie (19 + 1) * 100 nsec = 2us */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 5efb4174e82df..d1249da7a18fb 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -201900,8 +244561,33 @@ index 51ddc7b81d0bd..750aaa1676878 100644 /* VF's MAC address is being changed via PF */ if (pf_set_vfs_mac) { ether_addr_copy(pfvf->default_mac, req->packet.dmac); +diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c +index b04fb226f708a..ae50d56258ec6 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c ++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c +@@ -62,15 +62,18 @@ int rvu_sdp_init(struct rvu *rvu) + pfvf->sdp_info = devm_kzalloc(rvu->dev, + sizeof(struct sdp_node_info), + GFP_KERNEL); +- if (!pfvf->sdp_info) ++ if (!pfvf->sdp_info) { ++ pci_dev_put(pdev); + return -ENOMEM; ++ } + + dev_info(rvu->dev, "SDP PF number:%d\n", sdp_pf_num[i]); + +- put_device(&pdev->dev); + i++; + } + ++ pci_dev_put(pdev); ++ + return 0; + } + diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c -index 78df173e6df24..7cf24dd5c8782 100644 +index 78df173e6df24..2e225309de9ca 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -631,6 +631,12 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl) @@ -201935,7 +244621,62 @@ index 78df173e6df24..7cf24dd5c8782 100644 } else if (lvl == NIX_TXSCH_LVL_TL1) { /* Default config for TL1. * For VF this is always ignored. -@@ -1563,6 +1570,8 @@ void mbox_handler_nix_txsch_alloc(struct otx2_nic *pf, +@@ -1006,6 +1013,9 @@ int otx2_config_nix_queues(struct otx2_nic *pfvf) + return err; + } + ++ pfvf->cq_op_addr = (__force u64 *)otx2_get_regaddr(pfvf, ++ NIX_LF_CQ_OP_STATUS); ++ + /* Initialize work queue for receive buffer refill */ + pfvf->refill_wrk = devm_kcalloc(pfvf->dev, pfvf->qset.cq_cnt, + sizeof(struct refill_work), GFP_KERNEL); +@@ -1312,18 +1322,23 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf) + sq = &qset->sq[qidx]; + sq->sqb_count = 0; + sq->sqb_ptrs = kcalloc(num_sqbs, sizeof(*sq->sqb_ptrs), GFP_KERNEL); +- if (!sq->sqb_ptrs) +- return -ENOMEM; ++ if (!sq->sqb_ptrs) { ++ err = -ENOMEM; ++ goto err_mem; ++ } + + for (ptr = 0; ptr < num_sqbs; ptr++) { +- if (otx2_alloc_rbuf(pfvf, pool, &bufptr)) +- return -ENOMEM; ++ err = otx2_alloc_rbuf(pfvf, pool, &bufptr); ++ if (err) ++ goto err_mem; + pfvf->hw_ops->aura_freeptr(pfvf, pool_id, bufptr); + sq->sqb_ptrs[sq->sqb_count++] = (u64)bufptr; + } + } + +- return 0; ++err_mem: ++ return err ? -ENOMEM : 0; ++ + fail: + otx2_mbox_reset(&pfvf->mbox.mbox, 0); + otx2_aura_pool_free(pfvf); +@@ -1366,13 +1381,13 @@ int otx2_rq_aura_pool_init(struct otx2_nic *pfvf) + for (pool_id = 0; pool_id < hw->rqpool_cnt; pool_id++) { + pool = &pfvf->qset.pool[pool_id]; + for (ptr = 0; ptr < num_ptrs; ptr++) { +- if (otx2_alloc_rbuf(pfvf, pool, &bufptr)) ++ err = otx2_alloc_rbuf(pfvf, pool, &bufptr); ++ if (err) + return -ENOMEM; + pfvf->hw_ops->aura_freeptr(pfvf, pool_id, + bufptr + OTX2_HEAD_ROOM); + } + } +- + return 0; + fail: + otx2_mbox_reset(&pfvf->mbox.mbox, 0); +@@ -1563,6 +1578,8 @@ void mbox_handler_nix_txsch_alloc(struct otx2_nic *pf, for (schq = 0; schq < rsp->schq[lvl]; schq++) pf->hw.txschq_list[lvl][schq] = rsp->schq_list[lvl][schq]; @@ -201945,7 +244686,7 @@ index 78df173e6df24..7cf24dd5c8782 100644 EXPORT_SYMBOL(mbox_handler_nix_txsch_alloc); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h -index a51ecd771d075..4ecd0ef05f3b4 100644 +index a51ecd771d075..e685628b92942 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -182,6 +182,7 @@ struct otx2_hw { @@ -201956,7 +244697,15 @@ index a51ecd771d075..4ecd0ef05f3b4 100644 u16 txschq_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC]; u16 matchall_ipolicer; u32 dwrr_mtu; -@@ -591,6 +592,7 @@ static inline void __cn10k_aura_freeptr(struct otx2_nic *pfvf, u64 aura, +@@ -336,6 +337,7 @@ struct otx2_nic { + #define OTX2_FLAG_TC_MATCHALL_INGRESS_ENABLED BIT_ULL(13) + #define OTX2_FLAG_DMACFLTR_SUPPORT BIT_ULL(14) + u64 flags; ++ u64 *cq_op_addr; + + struct otx2_qset qset; + struct otx2_hw hw; +@@ -591,6 +593,7 @@ static inline void __cn10k_aura_freeptr(struct otx2_nic *pfvf, u64 aura, size++; tar_addr |= ((size - 1) & 0x7) << 4; } @@ -201964,11 +244713,30 @@ index a51ecd771d075..4ecd0ef05f3b4 100644 memcpy((u64 *)lmt_info->lmt_addr, ptrs, sizeof(u64) * num_ptrs); /* Perform LMTST flush */ cn10k_lmt_flush(val, tar_addr); +@@ -602,8 +605,10 @@ static inline void cn10k_aura_freeptr(void *dev, int aura, u64 buf) + u64 ptrs[2]; + + ptrs[1] = buf; ++ get_cpu(); + /* Free only one buffer at time during init and teardown */ + __cn10k_aura_freeptr(pfvf, aura, ptrs, 2); ++ put_cpu(); + } + + /* Alloc pointer from pool/aura */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c -index 53df7fff92c40..b1894d4045b8d 100644 +index 53df7fff92c40..ab291c2c30144 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c -@@ -386,7 +386,12 @@ static int otx2_forward_vf_mbox_msgs(struct otx2_nic *pf, +@@ -13,6 +13,7 @@ + #include <linux/if_vlan.h> + #include <linux/iommu.h> + #include <net/ip.h> ++#include <linux/bitfield.h> + + #include "otx2_reg.h" + #include "otx2_common.h" +@@ -386,7 +387,12 @@ static int otx2_forward_vf_mbox_msgs(struct otx2_nic *pf, dst_mdev->msg_size = mbox_hdr->msg_size; dst_mdev->num_msgs = num_msgs; err = otx2_sync_mbox_msg(dst_mbox); @@ -201982,7 +244750,165 @@ index 53df7fff92c40..b1894d4045b8d 100644 dev_warn(pf->dev, "AF not responding to VF%d messages\n", vf); /* restore PF mbase and exit */ -@@ -1493,6 +1498,44 @@ static void otx2_free_hw_resources(struct otx2_nic *pf) +@@ -1148,6 +1154,59 @@ int otx2_set_real_num_queues(struct net_device *netdev, + } + EXPORT_SYMBOL(otx2_set_real_num_queues); + ++static char *nix_sqoperr_e_str[NIX_SQOPERR_MAX] = { ++ "NIX_SQOPERR_OOR", ++ "NIX_SQOPERR_CTX_FAULT", ++ "NIX_SQOPERR_CTX_POISON", ++ "NIX_SQOPERR_DISABLED", ++ "NIX_SQOPERR_SIZE_ERR", ++ "NIX_SQOPERR_OFLOW", ++ "NIX_SQOPERR_SQB_NULL", ++ "NIX_SQOPERR_SQB_FAULT", ++ "NIX_SQOPERR_SQE_SZ_ZERO", ++}; ++ ++static char *nix_mnqerr_e_str[NIX_MNQERR_MAX] = { ++ "NIX_MNQERR_SQ_CTX_FAULT", ++ "NIX_MNQERR_SQ_CTX_POISON", ++ "NIX_MNQERR_SQB_FAULT", ++ "NIX_MNQERR_SQB_POISON", ++ "NIX_MNQERR_TOTAL_ERR", ++ "NIX_MNQERR_LSO_ERR", ++ "NIX_MNQERR_CQ_QUERY_ERR", ++ "NIX_MNQERR_MAX_SQE_SIZE_ERR", ++ "NIX_MNQERR_MAXLEN_ERR", ++ "NIX_MNQERR_SQE_SIZEM1_ZERO", ++}; ++ ++static char *nix_snd_status_e_str[NIX_SND_STATUS_MAX] = { ++ "NIX_SND_STATUS_GOOD", ++ "NIX_SND_STATUS_SQ_CTX_FAULT", ++ "NIX_SND_STATUS_SQ_CTX_POISON", ++ "NIX_SND_STATUS_SQB_FAULT", ++ "NIX_SND_STATUS_SQB_POISON", ++ "NIX_SND_STATUS_HDR_ERR", ++ "NIX_SND_STATUS_EXT_ERR", ++ "NIX_SND_STATUS_JUMP_FAULT", ++ "NIX_SND_STATUS_JUMP_POISON", ++ "NIX_SND_STATUS_CRC_ERR", ++ "NIX_SND_STATUS_IMM_ERR", ++ "NIX_SND_STATUS_SG_ERR", ++ "NIX_SND_STATUS_MEM_ERR", ++ "NIX_SND_STATUS_INVALID_SUBDC", ++ "NIX_SND_STATUS_SUBDC_ORDER_ERR", ++ "NIX_SND_STATUS_DATA_FAULT", ++ "NIX_SND_STATUS_DATA_POISON", ++ "NIX_SND_STATUS_NPC_DROP_ACTION", ++ "NIX_SND_STATUS_LOCK_VIOL", ++ "NIX_SND_STATUS_NPC_UCAST_CHAN_ERR", ++ "NIX_SND_STATUS_NPC_MCAST_CHAN_ERR", ++ "NIX_SND_STATUS_NPC_MCAST_ABORT", ++ "NIX_SND_STATUS_NPC_VTAG_PTR_ERR", ++ "NIX_SND_STATUS_NPC_VTAG_SIZE_ERR", ++ "NIX_SND_STATUS_SEND_STATS_ERR", ++}; ++ + static irqreturn_t otx2_q_intr_handler(int irq, void *data) + { + struct otx2_nic *pf = data; +@@ -1181,46 +1240,67 @@ static irqreturn_t otx2_q_intr_handler(int irq, void *data) + + /* SQ */ + for (qidx = 0; qidx < pf->hw.tx_queues; qidx++) { ++ u64 sq_op_err_dbg, mnq_err_dbg, snd_err_dbg; ++ u8 sq_op_err_code, mnq_err_code, snd_err_code; ++ ++ /* Below debug registers captures first errors corresponding to ++ * those registers. We don't have to check against SQ qid as ++ * these are fatal errors. ++ */ ++ + ptr = otx2_get_regaddr(pf, NIX_LF_SQ_OP_INT); + val = otx2_atomic64_add((qidx << 44), ptr); + otx2_write64(pf, NIX_LF_SQ_OP_INT, (qidx << 44) | + (val & NIX_SQINT_BITS)); + +- if (!(val & (NIX_SQINT_BITS | BIT_ULL(42)))) +- continue; +- + if (val & BIT_ULL(42)) { + netdev_err(pf->netdev, "SQ%lld: error reading NIX_LF_SQ_OP_INT, NIX_LF_ERR_INT 0x%llx\n", + qidx, otx2_read64(pf, NIX_LF_ERR_INT)); +- } else { +- if (val & BIT_ULL(NIX_SQINT_LMT_ERR)) { +- netdev_err(pf->netdev, "SQ%lld: LMT store error NIX_LF_SQ_OP_ERR_DBG:0x%llx", +- qidx, +- otx2_read64(pf, +- NIX_LF_SQ_OP_ERR_DBG)); +- otx2_write64(pf, NIX_LF_SQ_OP_ERR_DBG, +- BIT_ULL(44)); +- } +- if (val & BIT_ULL(NIX_SQINT_MNQ_ERR)) { +- netdev_err(pf->netdev, "SQ%lld: Meta-descriptor enqueue error NIX_LF_MNQ_ERR_DGB:0x%llx\n", +- qidx, +- otx2_read64(pf, NIX_LF_MNQ_ERR_DBG)); +- otx2_write64(pf, NIX_LF_MNQ_ERR_DBG, +- BIT_ULL(44)); +- } +- if (val & BIT_ULL(NIX_SQINT_SEND_ERR)) { +- netdev_err(pf->netdev, "SQ%lld: Send error, NIX_LF_SEND_ERR_DBG 0x%llx", +- qidx, +- otx2_read64(pf, +- NIX_LF_SEND_ERR_DBG)); +- otx2_write64(pf, NIX_LF_SEND_ERR_DBG, +- BIT_ULL(44)); +- } +- if (val & BIT_ULL(NIX_SQINT_SQB_ALLOC_FAIL)) +- netdev_err(pf->netdev, "SQ%lld: SQB allocation failed", +- qidx); ++ goto done; ++ } ++ ++ sq_op_err_dbg = otx2_read64(pf, NIX_LF_SQ_OP_ERR_DBG); ++ if (!(sq_op_err_dbg & BIT(44))) ++ goto chk_mnq_err_dbg; ++ ++ sq_op_err_code = FIELD_GET(GENMASK(7, 0), sq_op_err_dbg); ++ netdev_err(pf->netdev, "SQ%lld: NIX_LF_SQ_OP_ERR_DBG(%llx) err=%s\n", ++ qidx, sq_op_err_dbg, nix_sqoperr_e_str[sq_op_err_code]); ++ ++ otx2_write64(pf, NIX_LF_SQ_OP_ERR_DBG, BIT_ULL(44)); ++ ++ if (sq_op_err_code == NIX_SQOPERR_SQB_NULL) ++ goto chk_mnq_err_dbg; ++ ++ /* Err is not NIX_SQOPERR_SQB_NULL, call aq function to read SQ structure. ++ * TODO: But we are in irq context. How to call mbox functions which does sleep ++ */ ++ ++chk_mnq_err_dbg: ++ mnq_err_dbg = otx2_read64(pf, NIX_LF_MNQ_ERR_DBG); ++ if (!(mnq_err_dbg & BIT(44))) ++ goto chk_snd_err_dbg; ++ ++ mnq_err_code = FIELD_GET(GENMASK(7, 0), mnq_err_dbg); ++ netdev_err(pf->netdev, "SQ%lld: NIX_LF_MNQ_ERR_DBG(%llx) err=%s\n", ++ qidx, mnq_err_dbg, nix_mnqerr_e_str[mnq_err_code]); ++ otx2_write64(pf, NIX_LF_MNQ_ERR_DBG, BIT_ULL(44)); ++ ++chk_snd_err_dbg: ++ snd_err_dbg = otx2_read64(pf, NIX_LF_SEND_ERR_DBG); ++ if (snd_err_dbg & BIT(44)) { ++ snd_err_code = FIELD_GET(GENMASK(7, 0), snd_err_dbg); ++ netdev_err(pf->netdev, "SQ%lld: NIX_LF_SND_ERR_DBG:0x%llx err=%s\n", ++ qidx, snd_err_dbg, nix_snd_status_e_str[snd_err_code]); ++ otx2_write64(pf, NIX_LF_SEND_ERR_DBG, BIT_ULL(44)); + } + ++done: ++ /* Print values and reset */ ++ if (val & BIT_ULL(NIX_SQINT_SQB_ALLOC_FAIL)) ++ netdev_err(pf->netdev, "SQ%lld: SQB allocation failed", ++ qidx); ++ + schedule_work(&pf->reset_task); + } + +@@ -1493,6 +1573,44 @@ static void otx2_free_hw_resources(struct otx2_nic *pf) mutex_unlock(&mbox->lock); } @@ -202027,7 +244953,7 @@ index 53df7fff92c40..b1894d4045b8d 100644 int otx2_open(struct net_device *netdev) { struct otx2_nic *pf = netdev_priv(netdev); -@@ -1646,6 +1689,8 @@ int otx2_open(struct net_device *netdev) +@@ -1646,6 +1764,8 @@ int otx2_open(struct net_device *netdev) if (err) goto err_tx_stop_queues; @@ -202036,7 +244962,7 @@ index 53df7fff92c40..b1894d4045b8d 100644 return 0; err_tx_stop_queues: -@@ -1791,43 +1836,11 @@ static void otx2_set_rx_mode(struct net_device *netdev) +@@ -1791,43 +1911,11 @@ static void otx2_set_rx_mode(struct net_device *netdev) queue_work(pf->otx2_wq, &pf->rx_mode_work); } @@ -202050,7 +244976,7 @@ index 53df7fff92c40..b1894d4045b8d 100644 - - if (!(netdev->flags & IFF_UP)) - return; -- + - if ((netdev->flags & IFF_PROMISC) || - (netdev_uc_count(netdev) > OTX2_MAX_UNICAST_FLOWS)) { - promisc = true; @@ -202059,7 +244985,7 @@ index 53df7fff92c40..b1894d4045b8d 100644 - /* Write unicast address to mcam entries or del from mcam */ - if (!promisc && netdev->priv_flags & IFF_UNICAST_FLT) - __dev_uc_sync(netdev, otx2_add_macfilter, otx2_del_macfilter); - +- - mutex_lock(&pf->mbox.lock); - req = otx2_mbox_alloc_msg_nix_set_rx_mode(&pf->mbox); - if (!req) { @@ -202082,7 +245008,7 @@ index 53df7fff92c40..b1894d4045b8d 100644 } static int otx2_set_features(struct net_device *netdev, -@@ -2358,7 +2371,7 @@ static int otx2_wq_init(struct otx2_nic *pf) +@@ -2358,7 +2446,7 @@ static int otx2_wq_init(struct otx2_nic *pf) if (!pf->otx2_wq) return -ENOMEM; @@ -202091,8 +245017,74 @@ index 53df7fff92c40..b1894d4045b8d 100644 INIT_WORK(&pf->reset_task, otx2_reset_task); return 0; } +diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h +index 4bbd12ff26e64..e5f30fd778fc1 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h ++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h +@@ -274,4 +274,61 @@ enum nix_sqint_e { + BIT_ULL(NIX_SQINT_SEND_ERR) | \ + BIT_ULL(NIX_SQINT_SQB_ALLOC_FAIL)) + ++enum nix_sqoperr_e { ++ NIX_SQOPERR_OOR = 0, ++ NIX_SQOPERR_CTX_FAULT = 1, ++ NIX_SQOPERR_CTX_POISON = 2, ++ NIX_SQOPERR_DISABLED = 3, ++ NIX_SQOPERR_SIZE_ERR = 4, ++ NIX_SQOPERR_OFLOW = 5, ++ NIX_SQOPERR_SQB_NULL = 6, ++ NIX_SQOPERR_SQB_FAULT = 7, ++ NIX_SQOPERR_SQE_SZ_ZERO = 8, ++ NIX_SQOPERR_MAX, ++}; ++ ++enum nix_mnqerr_e { ++ NIX_MNQERR_SQ_CTX_FAULT = 0, ++ NIX_MNQERR_SQ_CTX_POISON = 1, ++ NIX_MNQERR_SQB_FAULT = 2, ++ NIX_MNQERR_SQB_POISON = 3, ++ NIX_MNQERR_TOTAL_ERR = 4, ++ NIX_MNQERR_LSO_ERR = 5, ++ NIX_MNQERR_CQ_QUERY_ERR = 6, ++ NIX_MNQERR_MAX_SQE_SIZE_ERR = 7, ++ NIX_MNQERR_MAXLEN_ERR = 8, ++ NIX_MNQERR_SQE_SIZEM1_ZERO = 9, ++ NIX_MNQERR_MAX, ++}; ++ ++enum nix_snd_status_e { ++ NIX_SND_STATUS_GOOD = 0x0, ++ NIX_SND_STATUS_SQ_CTX_FAULT = 0x1, ++ NIX_SND_STATUS_SQ_CTX_POISON = 0x2, ++ NIX_SND_STATUS_SQB_FAULT = 0x3, ++ NIX_SND_STATUS_SQB_POISON = 0x4, ++ NIX_SND_STATUS_HDR_ERR = 0x5, ++ NIX_SND_STATUS_EXT_ERR = 0x6, ++ NIX_SND_STATUS_JUMP_FAULT = 0x7, ++ NIX_SND_STATUS_JUMP_POISON = 0x8, ++ NIX_SND_STATUS_CRC_ERR = 0x9, ++ NIX_SND_STATUS_IMM_ERR = 0x10, ++ NIX_SND_STATUS_SG_ERR = 0x11, ++ NIX_SND_STATUS_MEM_ERR = 0x12, ++ NIX_SND_STATUS_INVALID_SUBDC = 0x13, ++ NIX_SND_STATUS_SUBDC_ORDER_ERR = 0x14, ++ NIX_SND_STATUS_DATA_FAULT = 0x15, ++ NIX_SND_STATUS_DATA_POISON = 0x16, ++ NIX_SND_STATUS_NPC_DROP_ACTION = 0x17, ++ NIX_SND_STATUS_LOCK_VIOL = 0x18, ++ NIX_SND_STATUS_NPC_UCAST_CHAN_ERR = 0x19, ++ NIX_SND_STATUS_NPC_MCAST_CHAN_ERR = 0x20, ++ NIX_SND_STATUS_NPC_MCAST_ABORT = 0x21, ++ NIX_SND_STATUS_NPC_VTAG_PTR_ERR = 0x22, ++ NIX_SND_STATUS_NPC_VTAG_SIZE_ERR = 0x23, ++ NIX_SND_STATUS_SEND_MEM_FAULT = 0x24, ++ NIX_SND_STATUS_SEND_STATS_ERR = 0x25, ++ NIX_SND_STATUS_MAX, ++}; ++ + #endif /* OTX2_STRUCT_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c -index 626961a41089d..75388a65f349e 100644 +index 626961a41089d..a42373e6f2593 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c @@ -28,6 +28,9 @@ @@ -202288,6 +245280,173 @@ index 626961a41089d..75388a65f349e 100644 } return otx2_tc_parse_actions(nic, &rule->action, req, f, node); +@@ -1050,7 +1090,12 @@ int otx2_init_tc(struct otx2_nic *nic) + return err; + + tc->flow_ht_params = tc_flow_ht_params; +- return rhashtable_init(&tc->flow_table, &tc->flow_ht_params); ++ err = rhashtable_init(&tc->flow_table, &tc->flow_ht_params); ++ if (err) { ++ kfree(tc->tc_entries_bitmap); ++ tc->tc_entries_bitmap = NULL; ++ } ++ return err; + } + + void otx2_shutdown_tc(struct otx2_nic *nic) +diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +index f42b1d4e0c679..3f3ec8ffc4ddf 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c ++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +@@ -18,6 +18,31 @@ + + #define CQE_ADDR(CQ, idx) ((CQ)->cqe_base + ((CQ)->cqe_size * (idx))) + ++static int otx2_nix_cq_op_status(struct otx2_nic *pfvf, ++ struct otx2_cq_queue *cq) ++{ ++ u64 incr = (u64)(cq->cq_idx) << 32; ++ u64 status; ++ ++ status = otx2_atomic64_fetch_add(incr, pfvf->cq_op_addr); ++ ++ if (unlikely(status & BIT_ULL(CQ_OP_STAT_OP_ERR) || ++ status & BIT_ULL(CQ_OP_STAT_CQ_ERR))) { ++ dev_err(pfvf->dev, "CQ stopped due to error"); ++ return -EINVAL; ++ } ++ ++ cq->cq_tail = status & 0xFFFFF; ++ cq->cq_head = (status >> 20) & 0xFFFFF; ++ if (cq->cq_tail < cq->cq_head) ++ cq->pend_cqe = (cq->cqe_cnt - cq->cq_head) + ++ cq->cq_tail; ++ else ++ cq->pend_cqe = cq->cq_tail - cq->cq_head; ++ ++ return 0; ++} ++ + static struct nix_cqe_hdr_s *otx2_get_next_cqe(struct otx2_cq_queue *cq) + { + struct nix_cqe_hdr_s *cqe_hdr; +@@ -318,7 +343,14 @@ static int otx2_rx_napi_handler(struct otx2_nic *pfvf, + struct nix_cqe_rx_s *cqe; + int processed_cqe = 0; + +- while (likely(processed_cqe < budget)) { ++ if (cq->pend_cqe >= budget) ++ goto process_cqe; ++ ++ if (otx2_nix_cq_op_status(pfvf, cq) || !cq->pend_cqe) ++ return 0; ++ ++process_cqe: ++ while (likely(processed_cqe < budget) && cq->pend_cqe) { + cqe = (struct nix_cqe_rx_s *)CQE_ADDR(cq, cq->cq_head); + if (cqe->hdr.cqe_type == NIX_XQE_TYPE_INVALID || + !cqe->sg.seg_addr) { +@@ -334,6 +366,7 @@ static int otx2_rx_napi_handler(struct otx2_nic *pfvf, + cqe->hdr.cqe_type = NIX_XQE_TYPE_INVALID; + cqe->sg.seg_addr = 0x00; + processed_cqe++; ++ cq->pend_cqe--; + } + + /* Free CQEs to HW */ +@@ -368,7 +401,14 @@ static int otx2_tx_napi_handler(struct otx2_nic *pfvf, + struct nix_cqe_tx_s *cqe; + int processed_cqe = 0; + +- while (likely(processed_cqe < budget)) { ++ if (cq->pend_cqe >= budget) ++ goto process_cqe; ++ ++ if (otx2_nix_cq_op_status(pfvf, cq) || !cq->pend_cqe) ++ return 0; ++ ++process_cqe: ++ while (likely(processed_cqe < budget) && cq->pend_cqe) { + cqe = (struct nix_cqe_tx_s *)otx2_get_next_cqe(cq); + if (unlikely(!cqe)) { + if (!processed_cqe) +@@ -380,6 +420,7 @@ static int otx2_tx_napi_handler(struct otx2_nic *pfvf, + + cqe->hdr.cqe_type = NIX_XQE_TYPE_INVALID; + processed_cqe++; ++ cq->pend_cqe--; + } + + /* Free CQEs to HW */ +@@ -936,10 +977,16 @@ void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq) + int processed_cqe = 0; + u64 iova, pa; + +- while ((cqe = (struct nix_cqe_rx_s *)otx2_get_next_cqe(cq))) { +- if (!cqe->sg.subdc) +- continue; ++ if (otx2_nix_cq_op_status(pfvf, cq) || !cq->pend_cqe) ++ return; ++ ++ while (cq->pend_cqe) { ++ cqe = (struct nix_cqe_rx_s *)otx2_get_next_cqe(cq); + processed_cqe++; ++ cq->pend_cqe--; ++ ++ if (!cqe) ++ continue; + if (cqe->sg.segs > 1) { + otx2_free_rcv_seg(pfvf, cqe, cq->cq_idx); + continue; +@@ -965,7 +1012,16 @@ void otx2_cleanup_tx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq) + + sq = &pfvf->qset.sq[cq->cint_idx]; + +- while ((cqe = (struct nix_cqe_tx_s *)otx2_get_next_cqe(cq))) { ++ if (otx2_nix_cq_op_status(pfvf, cq) || !cq->pend_cqe) ++ return; ++ ++ while (cq->pend_cqe) { ++ cqe = (struct nix_cqe_tx_s *)otx2_get_next_cqe(cq); ++ processed_cqe++; ++ cq->pend_cqe--; ++ ++ if (!cqe) ++ continue; + sg = &sq->sg[cqe->comp.sqe_id]; + skb = (struct sk_buff *)sg->skb; + if (skb) { +@@ -973,7 +1029,6 @@ void otx2_cleanup_tx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq) + dev_kfree_skb_any(skb); + sg->skb = (u64)NULL; + } +- processed_cqe++; + } + + /* Free CQEs to HW */ +diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h +index 3ff1ad79c0011..6a97631ff2269 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h ++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h +@@ -56,6 +56,9 @@ + */ + #define CQ_QCOUNT_DEFAULT 1 + ++#define CQ_OP_STAT_OP_ERR 63 ++#define CQ_OP_STAT_CQ_ERR 46 ++ + struct queue_stats { + u64 bytes; + u64 pkts; +@@ -122,6 +125,8 @@ struct otx2_cq_queue { + u16 pool_ptrs; + u32 cqe_cnt; + u32 cq_head; ++ u32 cq_tail; ++ u32 pend_cqe; + void *cqe_base; + struct qmem *cqe; + struct otx2_pool *rbpool; diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c index 44c670807fb3c..656c68cfd7ec6 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_main.c @@ -202407,6 +245566,31 @@ index a250d394da380..a8d7b889ebeee 100644 { } }; MODULE_DEVICE_TABLE(pci, prestera_pci_devices); +diff --git a/drivers/net/ethernet/marvell/prestera/prestera_rxtx.c b/drivers/net/ethernet/marvell/prestera/prestera_rxtx.c +index 73d2eba5262f0..a47aa624f7454 100644 +--- a/drivers/net/ethernet/marvell/prestera/prestera_rxtx.c ++++ b/drivers/net/ethernet/marvell/prestera/prestera_rxtx.c +@@ -776,6 +776,7 @@ tx_done: + int prestera_rxtx_switch_init(struct prestera_switch *sw) + { + struct prestera_rxtx *rxtx; ++ int err; + + rxtx = kzalloc(sizeof(*rxtx), GFP_KERNEL); + if (!rxtx) +@@ -783,7 +784,11 @@ int prestera_rxtx_switch_init(struct prestera_switch *sw) + + sw->rxtx = rxtx; + +- return prestera_sdma_switch_init(sw); ++ err = prestera_sdma_switch_init(sw); ++ if (err) ++ kfree(rxtx); ++ ++ return err; + } + + void prestera_rxtx_switch_fini(struct prestera_switch *sw) diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c index 3ce6ccd0f5394..b4599fe4ca8da 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c @@ -202441,7 +245625,7 @@ index 3ce6ccd0f5394..b4599fe4ca8da 100644 return notifier_from_errno(err); diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c -index 398c23cec8151..8601ef26c2604 100644 +index 398c23cec8151..cc6a5b2f24e3e 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -91,46 +91,53 @@ static int mtk_mdio_busy_wait(struct mtk_eth *eth) @@ -202582,6 +245766,18 @@ index 398c23cec8151..8601ef26c2604 100644 /* only tcp dst ipv4 is meaningful, others are meaningless */ fsp->flow_type = TCP_V4_FLOW; fsp->h_u.tcp_ip4_spec.ip4dst = ntohl(mac->hwlro_ip[fsp->location]); +@@ -2300,8 +2327,10 @@ static int mtk_open(struct net_device *dev) + int err; + + err = mtk_start_dma(eth); +- if (err) ++ if (err) { ++ phylink_disconnect_phy(mac->phylink); + return err; ++ } + + if (eth->soc->offload_version && mtk_ppe_start(ð->ppe) == 0) + gdm_config = MTK_GDMA_TO_PPE; diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 5ef70dd8b49c6..f2d90639d7ed1 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -202689,8 +245885,22 @@ index 8af7f28273225..3bd3603873e32 100644 if (mlx4_en_alloc_resources(tmp)) { en_warn(priv, +diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c +index b149e601f6737..48cfaa7eaf50c 100644 +--- a/drivers/net/ethernet/mellanox/mlx4/qp.c ++++ b/drivers/net/ethernet/mellanox/mlx4/qp.c +@@ -697,7 +697,8 @@ static int mlx4_create_zones(struct mlx4_dev *dev, + err = mlx4_bitmap_init(*bitmap + k, 1, + MLX4_QP_TABLE_RAW_ETH_SIZE - 1, 0, + 0); +- mlx4_bitmap_alloc_range(*bitmap + k, 1, 1, 0); ++ if (!err) ++ mlx4_bitmap_alloc_range(*bitmap + k, 1, 1, 0); + } + + if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c -index db5dfff585c99..e06a6104e91fe 100644 +index db5dfff585c99..41c15a65fb459 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -130,11 +130,8 @@ static int cmd_alloc_index(struct mlx5_cmd *cmd) @@ -202794,6 +246004,33 @@ index db5dfff585c99..e06a6104e91fe 100644 if (alloc_ret < 0) { mlx5_core_err_rl(dev, "failed to allocate command entry\n"); if (ent->callback) { +@@ -985,6 +971,7 @@ static void cmd_work_handler(struct work_struct *work) + cmd_ent_get(ent); + set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state); + ++ cmd_ent_get(ent); /* for the _real_ FW event on completion */ + /* Skip sending command to fw if internal error */ + if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, ent->op)) { + u8 status = 0; +@@ -998,7 +985,6 @@ static void cmd_work_handler(struct work_struct *work) + return; + } + +- cmd_ent_get(ent); /* for the _real_ FW event on completion */ + /* ring doorbell after the descriptor is valid */ + mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx); + wmb(); +@@ -1448,8 +1434,8 @@ static ssize_t outlen_write(struct file *filp, const char __user *buf, + return -EFAULT; + + err = sscanf(outlen_str, "%d", &outlen); +- if (err < 0) +- return err; ++ if (err != 1) ++ return -EINVAL; + + ptr = kzalloc(outlen, GFP_KERNEL); + if (!ptr) @@ -1594,8 +1580,6 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force vector = vec & 0xffffffff; for (i = 0; i < (1 << cmd->log_sz); i++) { @@ -202803,8 +246040,14 @@ index db5dfff585c99..e06a6104e91fe 100644 ent = cmd->ent_arr[i]; /* if we already completed the command, ignore it */ -@@ -1618,10 +1602,6 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force - dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) +@@ -1614,14 +1598,10 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force + cmd_ent_put(ent); /* timeout work was canceled */ + + if (!forced || /* Real FW completion */ +- pci_channel_offline(dev->pdev) || /* FW is inaccessible */ +- dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) ++ mlx5_cmd_is_down(dev) || /* No real FW completion is expected */ ++ !opcode_allowed(cmd, ent->op)) cmd_ent_put(ent); - if (ent->page_queue) @@ -202822,6 +246065,65 @@ index db5dfff585c99..e06a6104e91fe 100644 } } } +@@ -1720,12 +1699,17 @@ void mlx5_cmd_flush(struct mlx5_core_dev *dev) + struct mlx5_cmd *cmd = &dev->cmd; + int i; + +- for (i = 0; i < cmd->max_reg_cmds; i++) +- while (down_trylock(&cmd->sem)) ++ for (i = 0; i < cmd->max_reg_cmds; i++) { ++ while (down_trylock(&cmd->sem)) { + mlx5_cmd_trigger_completions(dev); ++ cond_resched(); ++ } ++ } + +- while (down_trylock(&cmd->pages_sem)) ++ while (down_trylock(&cmd->pages_sem)) { + mlx5_cmd_trigger_completions(dev); ++ cond_resched(); ++ } + + /* Unlock cmdif */ + up(&cmd->pages_sem); +@@ -1886,7 +1870,7 @@ void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev, + ctx->dev = dev; + /* Starts at 1 to avoid doing wake_up if we are not cleaning up */ + atomic_set(&ctx->num_inflight, 1); +- init_waitqueue_head(&ctx->wait); ++ init_completion(&ctx->inflight_done); + } + EXPORT_SYMBOL(mlx5_cmd_init_async_ctx); + +@@ -1900,8 +1884,8 @@ EXPORT_SYMBOL(mlx5_cmd_init_async_ctx); + */ + void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx) + { +- atomic_dec(&ctx->num_inflight); +- wait_event(ctx->wait, atomic_read(&ctx->num_inflight) == 0); ++ if (!atomic_dec_and_test(&ctx->num_inflight)) ++ wait_for_completion(&ctx->inflight_done); + } + EXPORT_SYMBOL(mlx5_cmd_cleanup_async_ctx); + +@@ -1912,7 +1896,7 @@ static void mlx5_cmd_exec_cb_handler(int status, void *_work) + + work->user_callback(status, work); + if (atomic_dec_and_test(&ctx->num_inflight)) +- wake_up(&ctx->wait); ++ complete(&ctx->inflight_done); + } + + int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, +@@ -1928,7 +1912,7 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, + ret = cmd_exec(ctx->dev, in, in_size, out, out_size, + mlx5_cmd_exec_cb_handler, work, false); + if (ret && atomic_dec_and_test(&ctx->num_inflight)) +- wake_up(&ctx->wait); ++ complete(&ctx->inflight_done); + + return ret; + } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index 02e77ffe5c3e4..5371ad0a12eb5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -203035,10 +246337,27 @@ index dcf9f27ba2efd..7d56a927081d0 100644 ARRAY_SIZE(mlx5_devlink_params)); devlink_unregister(devlink); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c -index f9cf9fb315479..ea46152816f90 100644 +index f9cf9fb315479..1c72fc0b7b68a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c -@@ -675,6 +675,9 @@ static void mlx5_fw_tracer_handle_traces(struct work_struct *work) +@@ -64,6 +64,7 @@ static int mlx5_query_mtrc_caps(struct mlx5_fw_tracer *tracer) + MLX5_GET(mtrc_cap, out, num_string_trace); + tracer->str_db.num_string_db = MLX5_GET(mtrc_cap, out, num_string_db); + tracer->owner = !!MLX5_GET(mtrc_cap, out, trace_owner); ++ tracer->str_db.loaded = false; + + for (i = 0; i < tracer->str_db.num_string_db; i++) { + mtrc_cap_sp = MLX5_ADDR_OF(mtrc_cap, out, string_db_param[i]); +@@ -638,7 +639,7 @@ static void mlx5_tracer_handle_timestamp_trace(struct mlx5_fw_tracer *tracer, + trace_timestamp = (timestamp_event.timestamp & MASK_52_7) | + (str_frmt->timestamp & MASK_6_0); + else +- trace_timestamp = ((timestamp_event.timestamp & MASK_52_7) - 1) | ++ trace_timestamp = ((timestamp_event.timestamp - 1) & MASK_52_7) | + (str_frmt->timestamp & MASK_6_0); + + mlx5_tracer_print_trace(str_frmt, dev, trace_timestamp); +@@ -675,6 +676,9 @@ static void mlx5_fw_tracer_handle_traces(struct work_struct *work) if (!tracer->owner) return; @@ -203048,7 +246367,7 @@ index f9cf9fb315479..ea46152816f90 100644 block_count = tracer->buff.size / TRACER_BLOCK_SIZE_BYTE; start_offset = tracer->buff.consumer_index * TRACER_BLOCK_SIZE_BYTE; -@@ -732,6 +735,7 @@ static void mlx5_fw_tracer_handle_traces(struct work_struct *work) +@@ -732,6 +736,7 @@ static void mlx5_fw_tracer_handle_traces(struct work_struct *work) &tmp_trace_block[TRACES_PER_BLOCK - 1]); } @@ -203056,7 +246375,23 @@ index f9cf9fb315479..ea46152816f90 100644 mlx5_fw_tracer_arm(dev); } -@@ -1137,8 +1141,7 @@ static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void +@@ -752,6 +757,7 @@ static int mlx5_fw_tracer_set_mtrc_conf(struct mlx5_fw_tracer *tracer) + if (err) + mlx5_core_warn(dev, "FWTracer: Failed to set tracer configurations %d\n", err); + ++ tracer->buff.consumer_index = 0; + return err; + } + +@@ -816,7 +822,6 @@ static void mlx5_fw_tracer_ownership_change(struct work_struct *work) + mlx5_core_dbg(tracer->dev, "FWTracer: ownership changed, current=(%d)\n", tracer->owner); + if (tracer->owner) { + tracer->owner = false; +- tracer->buff.consumer_index = 0; + return; + } + +@@ -1137,8 +1142,7 @@ static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void queue_work(tracer->work_queue, &tracer->ownership_change_work); break; case MLX5_TRACER_SUBTYPE_TRACES_AVAILABLE: @@ -203147,7 +246482,7 @@ index ed4fb79b4db76..75b6060f7a9ae 100644 } while (err > 0); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h -index 03a7a4ce5cd5e..c22a38e5337b2 100644 +index 03a7a4ce5cd5e..c822c3ac0544b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -103,7 +103,7 @@ struct page_pool; @@ -203211,6 +246546,15 @@ index 03a7a4ce5cd5e..c22a38e5337b2 100644 int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool, struct mlx5e_xdpsq *sq, bool is_redirect); +@@ -993,7 +1002,7 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv); + void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv); + int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx); + +-int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state); ++int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state); + void mlx5e_activate_rq(struct mlx5e_rq *rq); + void mlx5e_deactivate_rq(struct mlx5e_rq *rq); + void mlx5e_activate_icosq(struct mlx5e_icosq *icosq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index a88a1a48229f6..d634c034a4199 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -203332,6 +246676,33 @@ index 673f1c82d3815..c9d5d8d93994d 100644 if (err) return err; } +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h +index a71a32e00ebb9..dc7c57e6de77a 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h +@@ -6,6 +6,7 @@ + + #include "en.h" + #include "en_stats.h" ++#include "en/txrx.h" + #include <linux/ptp_classify.h> + + #define MLX5E_PTP_CHANNEL_IX 0 +@@ -67,6 +68,14 @@ static inline bool mlx5e_use_ptpsq(struct sk_buff *skb) + fk.ports.dst == htons(PTP_EV_PORT)); + } + ++static inline bool mlx5e_ptpsq_fifo_has_room(struct mlx5e_txqsq *sq) ++{ ++ if (!sq->ptpsq) ++ return true; ++ ++ return mlx5e_skb_fifo_has_room(&sq->ptpsq->skb_fifo); ++} ++ + int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, + u8 lag_port, struct mlx5e_ptp **cp); + void mlx5e_ptp_close(struct mlx5e_ptp *c); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c index e8a8d78e3e4d5..965838893432d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c @@ -203411,10 +246782,66 @@ index 9c076aa20306a..b6f5c1bcdbcd4 100644 switch (event) { case NETDEV_CHANGELOWERSTATE: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c -index c6d2f8c78db71..48dc121b2cb4c 100644 +index c6d2f8c78db71..291bd59639044 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c -@@ -491,7 +491,7 @@ void mlx5e_rep_bridge_init(struct mlx5e_priv *priv) +@@ -164,6 +164,36 @@ static int mlx5_esw_bridge_port_changeupper(struct notifier_block *nb, void *ptr + return err; + } + ++static int ++mlx5_esw_bridge_changeupper_validate_netdev(void *ptr) ++{ ++ struct net_device *dev = netdev_notifier_info_to_dev(ptr); ++ struct netdev_notifier_changeupper_info *info = ptr; ++ struct net_device *upper = info->upper_dev; ++ struct net_device *lower; ++ struct list_head *iter; ++ ++ if (!netif_is_bridge_master(upper) || !netif_is_lag_master(dev)) ++ return 0; ++ ++ netdev_for_each_lower_dev(dev, lower, iter) { ++ struct mlx5_core_dev *mdev; ++ struct mlx5e_priv *priv; ++ ++ if (!mlx5e_eswitch_rep(lower)) ++ continue; ++ ++ priv = netdev_priv(lower); ++ mdev = priv->mdev; ++ if (!mlx5_lag_is_active(mdev)) ++ return -EAGAIN; ++ if (!mlx5_lag_is_shared_fdb(mdev)) ++ return -EOPNOTSUPP; ++ } ++ ++ return 0; ++} ++ + static int mlx5_esw_bridge_switchdev_port_event(struct notifier_block *nb, + unsigned long event, void *ptr) + { +@@ -171,6 +201,7 @@ static int mlx5_esw_bridge_switchdev_port_event(struct notifier_block *nb, + + switch (event) { + case NETDEV_PRECHANGEUPPER: ++ err = mlx5_esw_bridge_changeupper_validate_netdev(ptr); + break; + + case NETDEV_CHANGEUPPER: +@@ -401,10 +432,6 @@ static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb, + + switch (event) { + case SWITCHDEV_FDB_ADD_TO_BRIDGE: +- /* only handle the event on native eswtich of representor */ +- if (!mlx5_esw_bridge_is_local(dev, rep, esw)) +- break; +- + fdb_info = container_of(info, + struct switchdev_notifier_fdb_info, + info); +@@ -491,7 +518,7 @@ void mlx5e_rep_bridge_init(struct mlx5e_priv *priv) } br_offloads->netdev_nb.notifier_call = mlx5_esw_bridge_switchdev_port_event; @@ -203423,7 +246850,7 @@ index c6d2f8c78db71..48dc121b2cb4c 100644 if (err) { esw_warn(mdev, "Failed to register bridge offloads netdevice notifier (err=%d)\n", err); -@@ -509,7 +509,9 @@ err_register_swdev_blk: +@@ -509,7 +536,9 @@ err_register_swdev_blk: err_register_swdev: destroy_workqueue(br_offloads->wq); err_alloc_wq: @@ -203433,7 +246860,7 @@ index c6d2f8c78db71..48dc121b2cb4c 100644 } void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv) -@@ -524,7 +526,7 @@ void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv) +@@ -524,7 +553,7 @@ void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv) return; cancel_delayed_work_sync(&br_offloads->update_work); @@ -203457,7 +246884,7 @@ index de03684528bbf..8451940c16ab9 100644 } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c -index 0eb125316fe20..e329158fdc555 100644 +index 0eb125316fe20..899a9a73eef68 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -59,6 +59,7 @@ static void mlx5e_reset_icosq_cc_pc(struct mlx5e_icosq *icosq) @@ -203492,7 +246919,7 @@ index 0eb125316fe20..e329158fdc555 100644 err = mlx5e_wait_for_icosq_flush(icosq); if (err) goto out; -@@ -94,15 +104,28 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) +@@ -94,35 +104,29 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) goto out; mlx5e_reset_icosq_cc_pc(icosq); @@ -203507,22 +246934,65 @@ index 0eb125316fe20..e329158fdc555 100644 + mlx5e_activate_rq(rq); rq->stats->recover++; -+ +- return 0; +-out: +- clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); +- return err; +-} +- +-static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state) +-{ +- struct net_device *dev = rq->netdev; +- int err; + +- err = mlx5e_modify_rq_state(rq, curr_state, MLX5_RQC_STATE_RST); +- if (err) { +- netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn); +- return err; +- } +- err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); +- if (err) { +- netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn); +- return err; + if (xskrq) { + mlx5e_activate_rq(xskrq); + xskrq->stats->recover++; -+ } -+ + } + + mutex_unlock(&icosq->channel->icosq_recovery_lock); + return 0; - out: - clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); ++out: ++ clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); + mutex_unlock(&icosq->channel->icosq_recovery_lock); - return err; ++ return err; } -@@ -703,6 +726,16 @@ void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq) + static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx) +@@ -131,19 +135,14 @@ static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx) + int err; + + mlx5e_deactivate_rq(rq); +- mlx5e_free_rx_descs(rq); +- +- err = mlx5e_rq_to_ready(rq, MLX5_RQC_STATE_ERR); ++ err = mlx5e_flush_rq(rq, MLX5_RQC_STATE_ERR); ++ clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state); + if (err) +- goto out; ++ return err; + +- clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state); + mlx5e_activate_rq(rq); + rq->stats->recover++; + return 0; +-out: +- clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state); +- return err; + } + + static int mlx5e_rx_reporter_timeout_recover(void *ctx) +@@ -703,6 +702,16 @@ void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq) mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); } @@ -204178,10 +247648,31 @@ index 4a13ef561587d..d90c6dc41c9f4 100644 attr.fl.fl6.saddr = tun_key->u.ipv6.src; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c -index 1c44c6c345f5d..700c463ea3679 100644 +index 1c44c6c345f5d..a8d7f07ee2ca0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c -@@ -221,8 +221,14 @@ static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow, +@@ -188,12 +188,19 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, + int err; + + list_for_each_entry(flow, flow_list, tmp_list) { +- if (!mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, SLOW)) ++ if (!mlx5e_is_offloaded_flow(flow)) + continue; + attr = flow->attr; + esw_attr = attr->esw_attr; + spec = &attr->parse_attr->spec; + ++ /* Clear pkt_reformat before checking slow path flag. Because ++ * in next iteration, the same flow is already set slow path ++ * flag, but still need to clear the pkt_reformat. ++ */ ++ if (flow_flag_test(flow, SLOW)) ++ continue; ++ + /* update from encap rule to slow path rule */ + rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); + /* mark the flow's encap dest as non-valid */ +@@ -221,8 +228,14 @@ static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow, struct list_head *flow_list, int index) { @@ -204197,7 +247688,16 @@ index 1c44c6c345f5d..700c463ea3679 100644 wait_for_completion(&flow->init_done); flow->tmp_entry_index = index; -@@ -1538,6 +1544,8 @@ mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv, +@@ -1336,7 +1349,7 @@ static void mlx5e_reoffload_encap(struct mlx5e_priv *priv, + continue; + } + +- err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow); ++ err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr); + if (err) { + mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d", + err); +@@ -1538,6 +1551,8 @@ mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv, struct net_device *fib_dev; fen_info = container_of(info, struct fib_entry_notifier_info, info); @@ -204259,6 +247759,19 @@ index 60952b33b5688..d2333310b56fe 100644 MLX5_SET(fte_match_set_misc2, misc2_c, outer_first_mpls_over_udp.mpls_label, match.mask->ls[0].mpls_label); +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c +index 4267f3a1059e7..78b1a6ddd9675 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c +@@ -88,6 +88,8 @@ static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[], + struct udphdr *udp = (struct udphdr *)(buf); + struct vxlanhdr *vxh; + ++ if (tun_key->tun_flags & TUNNEL_VXLAN_OPT) ++ return -EOPNOTSUPP; + vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr)); + *ip_proto = IPPROTO_UDP; + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c index de936dc4bc483..a1afb8585e37f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c @@ -204320,6 +247833,23 @@ index e45149a78ed9d..857a84bcd53af 100644 void mlx5e_tir_builder_build_rss(struct mlx5e_tir_builder *builder, const struct mlx5e_rss_params_hash *rss_hash, const struct mlx5e_rss_params_traffic_type *rss_tt, +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +index 055c3bc237339..f5c872043bcbd 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +@@ -73,6 +73,12 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev); + bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); + void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq); + ++static inline bool ++mlx5e_skb_fifo_has_room(struct mlx5e_skb_fifo *fifo) ++{ ++ return (*fifo->pc - *fifo->cc) < fifo->mask; ++} ++ + static inline bool + mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n) + { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c index 7b562d2c8a196..279cd8f4e79f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c @@ -204381,6 +247911,30 @@ index 538bc2419bd83..8526a5fbbf0bf 100644 + /* TX queue is disabled on close. */ } +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +index 7cab08a2f7152..05882d1a4407c 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +@@ -113,7 +113,6 @@ static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry) + struct xfrm_replay_state_esn *replay_esn; + u32 seq_bottom = 0; + u8 overlap; +- u32 *esn; + + if (!(sa_entry->x->props.flags & XFRM_STATE_ESN)) { + sa_entry->esn_state.trigger = 0; +@@ -128,11 +127,9 @@ static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry) + + sa_entry->esn_state.esn = xfrm_replay_seqhi(sa_entry->x, + htonl(seq_bottom)); +- esn = &sa_entry->esn_state.esn; + + sa_entry->esn_state.trigger = 1; + if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) { +- ++(*esn); + sa_entry->esn_state.overlap = 0; + return true; + } else if (unlikely(!overlap && diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c index fb5397324aa4f..b56fea142c246 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -204677,10 +248231,81 @@ index 03693fa74a704..d32b70c62c949 100644 return err; eth_rule->rss = rss; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c -index 41ef6eb70a585..c1c4f380803a1 100644 +index 41ef6eb70a585..f1dd966e2bdbf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c -@@ -911,8 +911,6 @@ void mlx5e_deactivate_rq(struct mlx5e_rq *rq) +@@ -392,7 +392,8 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param + rq->icosq = &c->icosq; + rq->ix = c->ix; + rq->mdev = mdev; +- rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); ++ rq->hw_mtu = ++ MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN * !params->scatter_fcs_en; + rq->xdpsq = &c->rq_xdpsq; + rq->stats = &c->priv->channel_stats[c->ix].rq; + rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev); +@@ -672,7 +673,7 @@ int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param) + return err; + } + +-int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state) ++static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state) + { + struct mlx5_core_dev *mdev = rq->mdev; + +@@ -701,33 +702,30 @@ int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state) + return err; + } + +-static int mlx5e_modify_rq_scatter_fcs(struct mlx5e_rq *rq, bool enable) ++static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state) + { +- struct mlx5_core_dev *mdev = rq->mdev; +- +- void *in; +- void *rqc; +- int inlen; ++ struct net_device *dev = rq->netdev; + int err; + +- inlen = MLX5_ST_SZ_BYTES(modify_rq_in); +- in = kvzalloc(inlen, GFP_KERNEL); +- if (!in) +- return -ENOMEM; +- +- rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); +- +- MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY); +- MLX5_SET64(modify_rq_in, in, modify_bitmask, +- MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_SCATTER_FCS); +- MLX5_SET(rqc, rqc, scatter_fcs, enable); +- MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY); ++ err = mlx5e_modify_rq_state(rq, curr_state, MLX5_RQC_STATE_RST); ++ if (err) { ++ netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn); ++ return err; ++ } ++ err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); ++ if (err) { ++ netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn); ++ return err; ++ } + +- err = mlx5_core_modify_rq(mdev, rq->rqn, in); ++ return 0; ++} + +- kvfree(in); ++int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state) ++{ ++ mlx5e_free_rx_descs(rq); + +- return err; ++ return mlx5e_rq_to_ready(rq, curr_state); + } + + static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd) +@@ -911,8 +909,6 @@ void mlx5e_deactivate_rq(struct mlx5e_rq *rq) void mlx5e_close_rq(struct mlx5e_rq *rq) { cancel_work_sync(&rq->dim.work); @@ -204689,7 +248314,16 @@ index 41ef6eb70a585..c1c4f380803a1 100644 cancel_work_sync(&rq->recover_work); mlx5e_destroy_rq(rq); mlx5e_free_rx_descs(rq); -@@ -1038,9 +1036,20 @@ static void mlx5e_icosq_err_cqe_work(struct work_struct *recover_work) +@@ -979,7 +975,7 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, + sq->channel = c; + sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; + sq->min_inline_mode = params->tx_min_inline_mode; +- sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); ++ sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN; + sq->xsk_pool = xsk_pool; + + sq->stats = sq->xsk_pool ? +@@ -1038,9 +1034,20 @@ static void mlx5e_icosq_err_cqe_work(struct work_struct *recover_work) mlx5e_reporter_icosq_cqe_err(sq); } @@ -204711,7 +248345,7 @@ index 41ef6eb70a585..c1c4f380803a1 100644 { void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); struct mlx5_core_dev *mdev = c->mdev; -@@ -1061,7 +1070,7 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c, +@@ -1061,7 +1068,7 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c, if (err) goto err_sq_wq_destroy; @@ -204720,7 +248354,7 @@ index 41ef6eb70a585..c1c4f380803a1 100644 return 0; -@@ -1399,13 +1408,14 @@ void mlx5e_tx_err_cqe_work(struct work_struct *recover_work) +@@ -1399,13 +1406,14 @@ void mlx5e_tx_err_cqe_work(struct work_struct *recover_work) mlx5e_reporter_tx_err_cqe(sq); } @@ -204738,7 +248372,7 @@ index 41ef6eb70a585..c1c4f380803a1 100644 if (err) return err; -@@ -1444,7 +1454,7 @@ void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq) +@@ -1444,7 +1452,7 @@ void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq) synchronize_net(); /* Sync with NAPI. */ } @@ -204747,7 +248381,7 @@ index 41ef6eb70a585..c1c4f380803a1 100644 { struct mlx5e_channel *c = sq->channel; -@@ -1871,11 +1881,15 @@ static int mlx5e_open_queues(struct mlx5e_channel *c, +@@ -1871,11 +1879,15 @@ static int mlx5e_open_queues(struct mlx5e_channel *c, spin_lock_init(&c->async_icosq_lock); @@ -204765,7 +248399,7 @@ index 41ef6eb70a585..c1c4f380803a1 100644 if (err) goto err_close_async_icosq; -@@ -1943,9 +1957,12 @@ static void mlx5e_close_queues(struct mlx5e_channel *c) +@@ -1943,9 +1955,12 @@ static void mlx5e_close_queues(struct mlx5e_channel *c) mlx5e_close_xdpsq(&c->xdpsq); if (c->xdp) mlx5e_close_xdpsq(&c->rq_xdpsq); @@ -204778,7 +248412,7 @@ index 41ef6eb70a585..c1c4f380803a1 100644 mlx5e_close_icosq(&c->async_icosq); if (c->xdp) mlx5e_close_cq(&c->rq_xdpsq.cq); -@@ -2185,17 +2202,14 @@ void mlx5e_close_channels(struct mlx5e_channels *chs) +@@ -2185,17 +2200,14 @@ void mlx5e_close_channels(struct mlx5e_channels *chs) chs->num = 0; } @@ -204799,7 +248433,28 @@ index 41ef6eb70a585..c1c4f380803a1 100644 static int mlx5e_set_mtu(struct mlx5_core_dev *mdev, struct mlx5e_params *params, u16 mtu) -@@ -3270,16 +3284,25 @@ static int set_feature_lro(struct net_device *netdev, bool enable) +@@ -2836,20 +2848,6 @@ static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) + mlx5e_destroy_tises(priv); + } + +-static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable) +-{ +- int err = 0; +- int i; +- +- for (i = 0; i < chs->num; i++) { +- err = mlx5e_modify_rq_scatter_fcs(&chs->c[i]->rq, enable); +- if (err) +- return err; +- } +- +- return 0; +-} +- + static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd) + { + int err; +@@ -3270,16 +3268,25 @@ static int set_feature_lro(struct net_device *netdev, bool enable) } new_params = *cur_params; @@ -204831,7 +248486,7 @@ index 41ef6eb70a585..c1c4f380803a1 100644 out: mutex_unlock(&priv->state_lock); return err; -@@ -3302,7 +3325,9 @@ static int set_feature_hw_tc(struct net_device *netdev, bool enable) +@@ -3302,7 +3309,9 @@ static int set_feature_hw_tc(struct net_device *netdev, bool enable) struct mlx5e_priv *priv = netdev_priv(netdev); #if IS_ENABLED(CONFIG_MLX5_CLS_ACT) @@ -204842,7 +248497,62 @@ index 41ef6eb70a585..c1c4f380803a1 100644 netdev_err(netdev, "Active offloaded tc filters, can't turn hw_tc_offload off\n"); return -EINVAL; -@@ -3427,12 +3452,11 @@ static int set_feature_arfs(struct net_device *netdev, bool enable) +@@ -3350,41 +3359,27 @@ static int mlx5e_set_rx_port_ts(struct mlx5_core_dev *mdev, bool enable) + return mlx5_set_ports_check(mdev, in, sizeof(in)); + } + ++static int mlx5e_set_rx_port_ts_wrap(struct mlx5e_priv *priv, void *ctx) ++{ ++ struct mlx5_core_dev *mdev = priv->mdev; ++ bool enable = *(bool *)ctx; ++ ++ return mlx5e_set_rx_port_ts(mdev, enable); ++} ++ + static int set_feature_rx_fcs(struct net_device *netdev, bool enable) + { + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_channels *chs = &priv->channels; +- struct mlx5_core_dev *mdev = priv->mdev; ++ struct mlx5e_params new_params; + int err; + + mutex_lock(&priv->state_lock); + +- if (enable) { +- err = mlx5e_set_rx_port_ts(mdev, false); +- if (err) +- goto out; +- +- chs->params.scatter_fcs_en = true; +- err = mlx5e_modify_channels_scatter_fcs(chs, true); +- if (err) { +- chs->params.scatter_fcs_en = false; +- mlx5e_set_rx_port_ts(mdev, true); +- } +- } else { +- chs->params.scatter_fcs_en = false; +- err = mlx5e_modify_channels_scatter_fcs(chs, false); +- if (err) { +- chs->params.scatter_fcs_en = true; +- goto out; +- } +- err = mlx5e_set_rx_port_ts(mdev, true); +- if (err) { +- mlx5_core_warn(mdev, "Failed to set RX port timestamp %d\n", err); +- err = 0; +- } +- } +- +-out: ++ new_params = chs->params; ++ new_params.scatter_fcs_en = enable; ++ err = mlx5e_safe_switch_params(priv, &new_params, mlx5e_set_rx_port_ts_wrap, ++ &new_params.scatter_fcs_en, true); + mutex_unlock(&priv->state_lock); + return err; + } +@@ -3427,12 +3422,11 @@ static int set_feature_arfs(struct net_device *netdev, bool enable) static int mlx5e_handle_feature(struct net_device *netdev, netdev_features_t *features, @@ -204857,7 +248567,7 @@ index 41ef6eb70a585..c1c4f380803a1 100644 int err; if (!(changes & feature)) -@@ -3440,22 +3464,22 @@ static int mlx5e_handle_feature(struct net_device *netdev, +@@ -3440,22 +3434,22 @@ static int mlx5e_handle_feature(struct net_device *netdev, err = feature_handler(netdev, enable); if (err) { @@ -204883,7 +248593,7 @@ index 41ef6eb70a585..c1c4f380803a1 100644 err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro); err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER, -@@ -3520,6 +3544,13 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, +@@ -3520,6 +3514,13 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, } } @@ -204897,7 +248607,7 @@ index 41ef6eb70a585..c1c4f380803a1 100644 if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { features &= ~NETIF_F_RXHASH; if (netdev->features & NETIF_F_RXHASH) -@@ -3606,7 +3637,7 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, +@@ -3606,7 +3607,7 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, goto out; } @@ -204906,7 +248616,7 @@ index 41ef6eb70a585..c1c4f380803a1 100644 reset = false; if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { -@@ -4063,8 +4094,8 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog) +@@ -4063,8 +4064,8 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog) struct net_device *netdev = priv->netdev; struct mlx5e_params new_params; @@ -204917,7 +248627,7 @@ index 41ef6eb70a585..c1c4f380803a1 100644 return -EINVAL; } -@@ -4152,6 +4183,11 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) +@@ -4152,6 +4153,11 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) unlock: mutex_unlock(&priv->state_lock); @@ -204929,7 +248639,7 @@ index 41ef6eb70a585..c1c4f380803a1 100644 return err; } -@@ -4316,14 +4352,7 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 +@@ -4316,14 +4322,7 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 /* RQ */ mlx5e_build_rq_params(mdev, params); @@ -204945,7 +248655,7 @@ index 41ef6eb70a585..c1c4f380803a1 100644 /* CQ moderation params */ rx_cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? -@@ -4462,15 +4491,22 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) +@@ -4462,15 +4461,22 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) } if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)) { @@ -204974,7 +248684,7 @@ index 41ef6eb70a585..c1c4f380803a1 100644 } if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_IPIP)) { -@@ -4608,7 +4644,6 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) +@@ -4608,7 +4614,6 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; enum mlx5e_rx_res_features features; @@ -204982,7 +248692,7 @@ index 41ef6eb70a585..c1c4f380803a1 100644 int err; priv->rx_res = mlx5e_rx_res_alloc(); -@@ -4626,9 +4661,9 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) +@@ -4626,9 +4631,9 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) features = MLX5E_RX_RES_FEATURE_XSK | MLX5E_RX_RES_FEATURE_PTP; if (priv->channels.params.tunneled_offload_en) features |= MLX5E_RX_RES_FEATURE_INNER_FT; @@ -204994,7 +248704,7 @@ index 41ef6eb70a585..c1c4f380803a1 100644 priv->channels.params.num_channels); if (err) goto err_close_drop_rq; -@@ -4885,6 +4920,7 @@ mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *prof +@@ -4885,6 +4890,7 @@ mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *prof } netif_carrier_off(netdev); @@ -205119,7 +248829,7 @@ index e1dd17019030e..5a5c6eda29d28 100644 MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c -index 129ff7e0d65cc..843c8435387f3 100644 +index 129ff7e0d65cc..9ea4281a55b81 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -248,7 +248,6 @@ get_ct_priv(struct mlx5e_priv *priv) @@ -205180,6 +248890,40 @@ index 129ff7e0d65cc..843c8435387f3 100644 } struct mlx5_flow_handle * +@@ -1353,10 +1342,10 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro + } + + int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv, +- struct mlx5e_tc_flow_parse_attr *parse_attr, +- struct mlx5e_tc_flow *flow) ++ struct mlx5e_tc_flow *flow, ++ struct mlx5_flow_attr *attr) + { +- struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = &parse_attr->mod_hdr_acts; ++ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = &attr->parse_attr->mod_hdr_acts; + struct mlx5_modify_hdr *mod_hdr; + + mod_hdr = mlx5_modify_header_alloc(priv->mdev, +@@ -1366,8 +1355,8 @@ int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv, + if (IS_ERR(mod_hdr)) + return PTR_ERR(mod_hdr); + +- WARN_ON(flow->attr->modify_hdr); +- flow->attr->modify_hdr = mod_hdr; ++ WARN_ON(attr->modify_hdr); ++ attr->modify_hdr = mod_hdr; + + return 0; + } +@@ -1468,7 +1457,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && + !(attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR)) { + if (vf_tun) { +- err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow); ++ err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr); + if (err) + goto err_out; + } else { @@ -1544,6 +1533,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, else mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); @@ -205448,7 +249192,13 @@ index 129ff7e0d65cc..843c8435387f3 100644 return true; } -@@ -3457,7 +3601,9 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, +@@ -3453,11 +3597,14 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, + if (err) + return err; + +- action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; ++ action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | ++ MLX5_FLOW_CONTEXT_ACTION_COUNT; attr->dest_chain = act->chain_index; break; case FLOW_ACTION_CT: @@ -205459,7 +249209,33 @@ index 129ff7e0d65cc..843c8435387f3 100644 if (err) return err; -@@ -4008,7 +4154,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, +@@ -3486,12 +3633,9 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, + + attr->action = action; + +- if (attr->dest_chain) { +- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { +- NL_SET_ERR_MSG(extack, "Mirroring goto chain rules isn't supported"); +- return -EOPNOTSUPP; +- } +- attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; ++ if (attr->dest_chain && parse_attr->mirred_ifindex[0]) { ++ NL_SET_ERR_MSG(extack, "Mirroring goto chain rules isn't supported"); ++ return -EOPNOTSUPP; + } + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) +@@ -4000,7 +4144,8 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, + if (err) + return err; + +- action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; ++ action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | ++ MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->dest_chain = act->chain_index; + break; + case FLOW_ACTION_CT: +@@ -4008,7 +4153,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, NL_SET_ERR_MSG_MOD(extack, "Sample action with connection tracking is not supported"); return -EOPNOTSUPP; } @@ -205470,21 +249246,48 @@ index 129ff7e0d65cc..843c8435387f3 100644 if (err) return err; -@@ -4090,13 +4238,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, - attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - } +@@ -4070,30 +4217,17 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, + if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack)) + return -EOPNOTSUPP; + +- if (attr->dest_chain) { +- if (decap) { +- /* It can be supported if we'll create a mapping for +- * the tunnel device only (without tunnel), and set +- * this tunnel id with this decap flow. +- * +- * On restore (miss), we'll just set this saved tunnel +- * device. +- */ +- +- NL_SET_ERR_MSG(extack, +- "Decap with goto isn't supported"); +- netdev_warn(priv->netdev, +- "Decap with goto isn't supported"); +- return -EOPNOTSUPP; +- } +- +- attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; +- } ++ if (attr->dest_chain && decap) { ++ /* It can be supported if we'll create a mapping for ++ * the tunnel device only (without tunnel), and set ++ * this tunnel id with this decap flow. ++ * ++ * On restore (miss), we'll just set this saved tunnel ++ * device. ++ */ - if (!(attr->action & - (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { - NL_SET_ERR_MSG_MOD(extack, - "Rule must have at least one forward/drop action"); -- return -EOPNOTSUPP; -- } -- - if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) { - NL_SET_ERR_MSG_MOD(extack, - "current firmware doesn't support split rule for port mirroring"); -@@ -4222,6 +4363,7 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, ++ NL_SET_ERR_MSG(extack, "Decap with goto isn't supported"); ++ netdev_warn(priv->netdev, "Decap with goto isn't supported"); + return -EOPNOTSUPP; + } + +@@ -4222,6 +4356,7 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, INIT_LIST_HEAD(&flow->l3_to_l2_reformat); refcount_set(&flow->refcnt, 1); init_completion(&flow->init_done); @@ -205492,7 +249295,7 @@ index 129ff7e0d65cc..843c8435387f3 100644 *__flow = flow; *__parse_attr = parse_attr; -@@ -4875,6 +5017,33 @@ static int mlx5e_tc_nic_get_ft_size(struct mlx5_core_dev *dev) +@@ -4875,6 +5010,33 @@ static int mlx5e_tc_nic_get_ft_size(struct mlx5_core_dev *dev) return tc_tbl_size; } @@ -205526,7 +249329,7 @@ index 129ff7e0d65cc..843c8435387f3 100644 int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { struct mlx5e_tc_table *tc = &priv->fs.tc; -@@ -4907,19 +5076,23 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) +@@ -4907,19 +5069,23 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) } tc->mapping = chains_mapping; @@ -205552,7 +249355,7 @@ index 129ff7e0d65cc..843c8435387f3 100644 } tc->post_act = mlx5e_tc_post_act_init(priv, tc->chains, MLX5_FLOW_NAMESPACE_KERNEL); -@@ -4942,6 +5115,8 @@ err_reg: +@@ -4942,6 +5108,8 @@ err_reg: mlx5_tc_ct_clean(tc->ct); mlx5e_tc_post_act_destroy(tc->post_act); mlx5_chains_destroy(tc->chains); @@ -205561,7 +249364,7 @@ index 129ff7e0d65cc..843c8435387f3 100644 err_chains: mapping_destroy(chains_mapping); err_mapping: -@@ -4982,6 +5157,7 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) +@@ -4982,6 +5150,7 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) mlx5e_tc_post_act_destroy(tc->post_act); mapping_destroy(tc->mapping); mlx5_chains_destroy(tc->chains); @@ -205569,7 +249372,7 @@ index 129ff7e0d65cc..843c8435387f3 100644 } int mlx5e_tc_esw_init(struct rhashtable *tc_ht) -@@ -5008,9 +5184,7 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht) +@@ -5008,9 +5177,7 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht) MLX5_FLOW_NAMESPACE_FDB, uplink_priv->post_act); @@ -205579,7 +249382,7 @@ index 129ff7e0d65cc..843c8435387f3 100644 mapping_id = mlx5_query_nic_system_image_guid(esw->dev); -@@ -5054,9 +5228,7 @@ err_ht_init: +@@ -5054,9 +5221,7 @@ err_ht_init: err_enc_opts_mapping: mapping_destroy(uplink_priv->tunnel_mapping); err_tun_mapping: @@ -205589,7 +249392,7 @@ index 129ff7e0d65cc..843c8435387f3 100644 mlx5_tc_ct_clean(uplink_priv->ct_priv); netdev_warn(priv->netdev, "Failed to initialize tc (eswitch), err: %d", err); -@@ -5076,9 +5248,7 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht) +@@ -5076,9 +5241,7 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht) mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); mapping_destroy(uplink_priv->tunnel_mapping); @@ -205599,8 +249402,23 @@ index 129ff7e0d65cc..843c8435387f3 100644 mlx5_tc_ct_clean(uplink_priv->ct_priv); mlx5e_tc_post_act_destroy(uplink_priv->post_act); } +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +index 1a4cd882f0fba..f48af82781f88 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +@@ -241,8 +241,8 @@ int mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev, + u32 data); + + int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv, +- struct mlx5e_tc_flow_parse_attr *parse_attr, +- struct mlx5e_tc_flow *flow); ++ struct mlx5e_tc_flow *flow, ++ struct mlx5_flow_attr *attr); + + int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev, + int namespace, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c -index 188994d091c54..1544d4c2c6360 100644 +index 188994d091c54..e18fa5ae0fd84 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -38,6 +38,7 @@ @@ -205670,7 +249488,19 @@ index 188994d091c54..1544d4c2c6360 100644 static inline void mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, const struct mlx5e_tx_attr *attr, -@@ -537,6 +541,7 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, +@@ -475,6 +479,11 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, + if (unlikely(sq->ptpsq)) { + mlx5e_skb_cb_hwtstamp_init(skb); + mlx5e_skb_fifo_push(&sq->ptpsq->skb_fifo, skb); ++ if (!netif_tx_queue_stopped(sq->txq) && ++ !mlx5e_skb_fifo_has_room(&sq->ptpsq->skb_fifo)) { ++ netif_tx_stop_queue(sq->txq); ++ sq->stats->stopped++; ++ } + skb_get(skb); + } + +@@ -537,6 +546,7 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, err_drop: stats->dropped++; dev_kfree_skb_any(skb); @@ -205678,7 +249508,7 @@ index 188994d091c54..1544d4c2c6360 100644 } static bool mlx5e_tx_skb_supports_mpwqe(struct sk_buff *skb, struct mlx5e_tx_attr *attr) -@@ -638,6 +643,13 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, +@@ -638,6 +648,13 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_wqe_ctrl_seg *cseg; struct mlx5e_xmit_data txd; @@ -205692,7 +249522,7 @@ index 188994d091c54..1544d4c2c6360 100644 if (!mlx5e_tx_mpwqe_session_is_active(sq)) { mlx5e_tx_mpwqe_session_start(sq, eseg); } else if (!mlx5e_tx_mpwqe_same_eseg(sq, eseg)) { -@@ -647,18 +659,9 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, +@@ -647,18 +664,9 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, sq->stats->xmit_more += xmit_more; @@ -205711,7 +249541,7 @@ index 188994d091c54..1544d4c2c6360 100644 mlx5e_tx_skb_update_hwts_flags(skb); if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe))) { -@@ -680,6 +683,7 @@ err_unmap: +@@ -680,6 +688,7 @@ err_unmap: mlx5e_dma_unmap_wqe_err(sq, 1); sq->stats->dropped++; dev_kfree_skb_any(skb); @@ -205719,17 +249549,144 @@ index 188994d091c54..1544d4c2c6360 100644 } void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq) -@@ -1049,5 +1053,6 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, +@@ -902,6 +911,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) + + if (netif_tx_queue_stopped(sq->txq) && + mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) && ++ mlx5e_ptpsq_fifo_has_room(sq) && + !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) { + netif_tx_wake_queue(sq->txq); + stats->wake++; +@@ -1049,5 +1059,6 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, err_drop: stats->dropped++; dev_kfree_skb_any(skb); + mlx5e_tx_flush(sq); } #endif +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c +index 60a73990017c2..6b4c9ffad95b2 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c +@@ -67,6 +67,7 @@ static void esw_acl_egress_lgcy_groups_destroy(struct mlx5_vport *vport) + int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) + { ++ bool vst_mode_steering = esw_vst_mode_is_steering(esw); + struct mlx5_flow_destination drop_ctr_dst = {}; + struct mlx5_flow_destination *dst = NULL; + struct mlx5_fc *drop_counter = NULL; +@@ -77,6 +78,7 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, + */ + int table_size = 2; + int dest_num = 0; ++ int actions_flag; + int err = 0; + + if (vport->egress.legacy.drop_counter) { +@@ -119,8 +121,11 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, + vport->vport, vport->info.vlan, vport->info.qos); + + /* Allowed vlan rule */ ++ actions_flag = MLX5_FLOW_CONTEXT_ACTION_ALLOW; ++ if (vst_mode_steering) ++ actions_flag |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; + err = esw_egress_acl_vlan_create(esw, vport, NULL, vport->info.vlan, +- MLX5_FLOW_CONTEXT_ACTION_ALLOW); ++ actions_flag); + if (err) + goto out; + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c +index b1a5199260f69..093ed86a0acd8 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c +@@ -139,11 +139,14 @@ static void esw_acl_ingress_lgcy_groups_destroy(struct mlx5_vport *vport) + int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) + { ++ bool vst_mode_steering = esw_vst_mode_is_steering(esw); + struct mlx5_flow_destination drop_ctr_dst = {}; + struct mlx5_flow_destination *dst = NULL; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec *spec = NULL; + struct mlx5_fc *counter = NULL; ++ bool vst_check_cvlan = false; ++ bool vst_push_cvlan = false; + /* The ingress acl table contains 4 groups + * (2 active rules at the same time - + * 1 allow rule from one of the first 3 groups. +@@ -203,7 +206,26 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, + goto out; + } + +- if (vport->info.vlan || vport->info.qos) ++ if ((vport->info.vlan || vport->info.qos)) { ++ if (vst_mode_steering) ++ vst_push_cvlan = true; ++ else if (!MLX5_CAP_ESW(esw->dev, vport_cvlan_insert_always)) ++ vst_check_cvlan = true; ++ } ++ ++ if (vst_check_cvlan || vport->info.spoofchk) ++ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; ++ ++ /* Create ingress allow rule */ ++ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; ++ if (vst_push_cvlan) { ++ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; ++ flow_act.vlan[0].prio = vport->info.qos; ++ flow_act.vlan[0].vid = vport->info.vlan; ++ flow_act.vlan[0].ethtype = ETH_P_8021Q; ++ } ++ ++ if (vst_check_cvlan) + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.cvlan_tag); + +@@ -218,9 +240,6 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, + ether_addr_copy(smac_v, vport->info.mac); + } + +- /* Create ingress allow rule */ +- spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; +- flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; + vport->ingress.allow_rule = mlx5_add_flow_rules(vport->ingress.acl, spec, + &flow_act, NULL, 0); + if (IS_ERR(vport->ingress.allow_rule)) { +@@ -232,6 +251,9 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, + goto out; + } + ++ if (!vst_check_cvlan && !vport->info.spoofchk) ++ goto out; ++ + memset(&flow_act, 0, sizeof(flow_act)); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; + /* Attach drop flow counter */ +@@ -257,7 +279,8 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, + return 0; + + out: +- esw_acl_ingress_lgcy_cleanup(esw, vport); ++ if (err) ++ esw_acl_ingress_lgcy_cleanup(esw, vport); + kvfree(spec); + return err; + } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c -index 7e221038df8d5..317d76b97c42a 100644 +index 7e221038df8d5..aec0f67cef005 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +@@ -1270,7 +1270,7 @@ void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16 + struct mlx5_esw_bridge *bridge; + + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); +- if (!port || port->flags & MLX5_ESW_BRIDGE_PORT_FLAG_PEER) ++ if (!port) + return; + + bridge = port->bridge; @@ -1385,6 +1385,8 @@ struct mlx5_esw_bridge_offloads *mlx5_esw_bridge_init(struct mlx5_eswitch *esw) { struct mlx5_esw_bridge_offloads *br_offloads; @@ -205801,10 +249758,61 @@ index df277a6cddc0b..3a2575dc5355d 100644 if (vlan || qos) set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c -index c6cc67cb4f6ad..d377ddc70fc70 100644 +index c6cc67cb4f6ad..65c8f1f08472c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c -@@ -130,7 +130,7 @@ static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw, +@@ -22,15 +22,13 @@ struct mlx5_esw_rate_group { + }; + + static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx, +- u32 parent_ix, u32 tsar_ix, +- u32 max_rate, u32 bw_share) ++ u32 tsar_ix, u32 max_rate, u32 bw_share) + { + u32 bitmask = 0; + + if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) + return -EOPNOTSUPP; + +- MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_ix); + MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate); + MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); + bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; +@@ -51,7 +49,7 @@ static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_g + int err; + + err = esw_qos_tsar_config(dev, sched_ctx, +- esw->qos.root_tsar_ix, group->tsar_ix, ++ group->tsar_ix, + max_rate, bw_share); + if (err) + NL_SET_ERR_MSG_MOD(extack, "E-Switch modify group TSAR element failed"); +@@ -67,23 +65,13 @@ static int esw_qos_vport_config(struct mlx5_eswitch *esw, + struct netlink_ext_ack *extack) + { + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; +- struct mlx5_esw_rate_group *group = vport->qos.group; + struct mlx5_core_dev *dev = esw->dev; +- u32 parent_tsar_ix; +- void *vport_elem; + int err; + + if (!vport->qos.enabled) + return -EIO; + +- parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix; +- MLX5_SET(scheduling_context, sched_ctx, element_type, +- SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); +- vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, +- element_attributes); +- MLX5_SET(vport_element, vport_elem, vport_number, vport->vport); +- +- err = esw_qos_tsar_config(dev, sched_ctx, parent_tsar_ix, vport->qos.esw_tsar_ix, ++ err = esw_qos_tsar_config(dev, sched_ctx, vport->qos.esw_tsar_ix, + max_rate, bw_share); + if (err) { + esw_warn(esw->dev, +@@ -130,7 +118,7 @@ static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw, /* If vports min rate divider is 0 but their group has bw_share configured, then * need to set bw_share for vports to minimal value. */ @@ -205813,7 +249821,7 @@ index c6cc67cb4f6ad..d377ddc70fc70 100644 return 1; return 0; } -@@ -423,7 +423,7 @@ static int esw_qos_vport_update_group(struct mlx5_eswitch *esw, +@@ -423,7 +411,7 @@ static int esw_qos_vport_update_group(struct mlx5_eswitch *esw, return err; /* Recalculate bw share weights of old and new groups */ @@ -205823,10 +249831,52 @@ index c6cc67cb4f6ad..d377ddc70fc70 100644 esw_qos_normalize_vports_min_rate(esw, new_group, extack); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c -index ec136b4992045..51a8cecc4a7ce 100644 +index ec136b4992045..2b9278002354c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c -@@ -1305,12 +1305,17 @@ abort: +@@ -160,10 +160,17 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport, + esw_vport_context.vport_cvlan_strip, 1); + + if (set_flags & SET_VLAN_INSERT) { +- /* insert only if no vlan in packet */ +- MLX5_SET(modify_esw_vport_context_in, in, +- esw_vport_context.vport_cvlan_insert, 1); +- ++ if (MLX5_CAP_ESW(dev, vport_cvlan_insert_always)) { ++ /* insert either if vlan exist in packet or not */ ++ MLX5_SET(modify_esw_vport_context_in, in, ++ esw_vport_context.vport_cvlan_insert, ++ MLX5_VPORT_CVLAN_INSERT_ALWAYS); ++ } else { ++ /* insert only if no vlan in packet */ ++ MLX5_SET(modify_esw_vport_context_in, in, ++ esw_vport_context.vport_cvlan_insert, ++ MLX5_VPORT_CVLAN_INSERT_WHEN_NO_CVLAN); ++ } + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.cvlan_pcp, qos); + MLX5_SET(modify_esw_vport_context_in, in, +@@ -773,6 +780,7 @@ static void esw_vport_cleanup_acl(struct mlx5_eswitch *esw, + + static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) + { ++ bool vst_mode_steering = esw_vst_mode_is_steering(esw); + u16 vport_num = vport->vport; + int flags; + int err; +@@ -802,8 +810,9 @@ static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) + + flags = (vport->info.vlan || vport->info.qos) ? + SET_VLAN_STRIP | SET_VLAN_INSERT : 0; +- modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, +- vport->info.qos, flags); ++ if (esw->mode == MLX5_ESWITCH_OFFLOADS || !vst_mode_steering) ++ modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, ++ vport->info.qos, flags); + + return 0; + } +@@ -1305,12 +1314,17 @@ abort: */ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { @@ -205845,7 +249895,7 @@ index ec136b4992045..51a8cecc4a7ce 100644 down_write(&esw->mode_lock); if (esw->mode == MLX5_ESWITCH_NONE) { ret = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, num_vfs); -@@ -1324,7 +1329,10 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) +@@ -1324,7 +1338,10 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) esw->esw_funcs.num_vfs = num_vfs; } up_write(&esw->mode_lock); @@ -205857,7 +249907,7 @@ index ec136b4992045..51a8cecc4a7ce 100644 return ret; } -@@ -1572,6 +1580,11 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) +@@ -1572,6 +1589,11 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) esw->enabled_vports = 0; esw->mode = MLX5_ESWITCH_NONE; esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE; @@ -205869,7 +249919,30 @@ index ec136b4992045..51a8cecc4a7ce 100644 dev->priv.eswitch = esw; BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head); -@@ -1934,7 +1947,7 @@ free_out: +@@ -1833,6 +1855,7 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + u16 vport, u16 vlan, u8 qos, u8 set_flags) + { + struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); ++ bool vst_mode_steering = esw_vst_mode_is_steering(esw); + int err = 0; + + if (IS_ERR(evport)) +@@ -1840,9 +1863,11 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + if (vlan > 4095 || qos > 7) + return -EINVAL; + +- err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags); +- if (err) +- return err; ++ if (esw->mode == MLX5_ESWITCH_OFFLOADS || !vst_mode_steering) { ++ err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags); ++ if (err) ++ return err; ++ } + + evport->info.vlan = vlan; + evport->info.qos = qos; +@@ -1934,7 +1959,7 @@ free_out: return err; } @@ -205878,7 +249951,7 @@ index ec136b4992045..51a8cecc4a7ce 100644 { struct mlx5_eswitch *esw = dev->priv.eswitch; -@@ -1948,7 +1961,7 @@ mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev) +@@ -1948,7 +1973,7 @@ mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev) struct mlx5_eswitch *esw; esw = dev->priv.eswitch; @@ -205887,6 +249960,23 @@ index ec136b4992045..51a8cecc4a7ce 100644 DEVLINK_ESWITCH_ENCAP_MODE_NONE; } EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode); +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +index 2c7444101bb93..0e2c9e6fccb67 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +@@ -505,6 +505,12 @@ static inline bool mlx5_esw_qos_enabled(struct mlx5_eswitch *esw) + return esw->qos.enabled; + } + ++static inline bool esw_vst_mode_is_steering(struct mlx5_eswitch *esw) ++{ ++ return (MLX5_CAP_ESW_EGRESS_ACL(esw->dev, pop_vlan) && ++ MLX5_CAP_ESW_INGRESS_ACL(esw->dev, push_vlan)); ++} ++ + static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev, + u8 vlan_depth) + { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 0d461e38add37..3194cdcd2f630 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -205940,6 +250030,47 @@ index 0d461e38add37..3194cdcd2f630 100644 } static bool +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +index b459549058450..1b417b1d1cf8f 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +@@ -30,9 +30,9 @@ mlx5_eswitch_termtbl_hash(struct mlx5_flow_act *flow_act, + sizeof(dest->vport.num), hash); + hash = jhash((const void *)&dest->vport.vhca_id, + sizeof(dest->vport.num), hash); +- if (dest->vport.pkt_reformat) +- hash = jhash(dest->vport.pkt_reformat, +- sizeof(*dest->vport.pkt_reformat), ++ if (flow_act->pkt_reformat) ++ hash = jhash(flow_act->pkt_reformat, ++ sizeof(*flow_act->pkt_reformat), + hash); + return hash; + } +@@ -53,9 +53,11 @@ mlx5_eswitch_termtbl_cmp(struct mlx5_flow_act *flow_act1, + if (ret) + return ret; + +- return dest1->vport.pkt_reformat && dest2->vport.pkt_reformat ? +- memcmp(dest1->vport.pkt_reformat, dest2->vport.pkt_reformat, +- sizeof(*dest1->vport.pkt_reformat)) : 0; ++ if (flow_act1->pkt_reformat && flow_act2->pkt_reformat) ++ return memcmp(flow_act1->pkt_reformat, flow_act2->pkt_reformat, ++ sizeof(*flow_act1->pkt_reformat)); ++ ++ return !(flow_act1->pkt_reformat == flow_act2->pkt_reformat); + } + + static int +@@ -307,6 +309,8 @@ revert_changes: + for (curr_dest = 0; curr_dest < num_vport_dests; curr_dest++) { + struct mlx5_termtbl_handle *tt = attr->dests[curr_dest].termtbl; + ++ attr->dests[curr_dest].termtbl = NULL; ++ + /* search for the destination associated with the + * current term table + */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index fe501ba88bea9..cb3f9de3d00ba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -206160,11 +250291,72 @@ index 106b50e42b464..8c2b249949b97 100644 mlx5_core_warn(dev, "PCI Sync FW Update Reset Aborted.\n"); } +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c +index 037e18dd4be0e..1504856fafde4 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c +@@ -614,6 +614,13 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work) + priv = container_of(health, struct mlx5_priv, health); + dev = container_of(priv, struct mlx5_core_dev, priv); + ++ mutex_lock(&dev->intf_state_mutex); ++ if (test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) { ++ mlx5_core_err(dev, "health works are not permitted at this stage\n"); ++ mutex_unlock(&dev->intf_state_mutex); ++ return; ++ } ++ mutex_unlock(&dev->intf_state_mutex); + enter_error_state(dev, false); + if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) { + if (mlx5_health_try_recover(dev)) +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c +index 0c8594c7df21d..908e5ee1a30fa 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c +@@ -172,16 +172,16 @@ static inline int mlx5_ptys_rate_enum_to_int(enum mlx5_ptys_rate rate) + } + } + +-static int mlx5i_get_speed_settings(u16 ib_link_width_oper, u16 ib_proto_oper) ++static u32 mlx5i_get_speed_settings(u16 ib_link_width_oper, u16 ib_proto_oper) + { + int rate, width; + + rate = mlx5_ptys_rate_enum_to_int(ib_proto_oper); + if (rate < 0) +- return -EINVAL; ++ return SPEED_UNKNOWN; + width = mlx5_ptys_width_enum_to_int(ib_link_width_oper); + if (width < 0) +- return -EINVAL; ++ return SPEED_UNKNOWN; + + return rate * width; + } +@@ -204,16 +204,13 @@ static int mlx5i_get_link_ksettings(struct net_device *netdev, + ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising); + + speed = mlx5i_get_speed_settings(ib_link_width_oper, ib_proto_oper); +- if (speed < 0) +- return -EINVAL; ++ link_ksettings->base.speed = speed; ++ link_ksettings->base.duplex = speed == SPEED_UNKNOWN ? DUPLEX_UNKNOWN : DUPLEX_FULL; + +- link_ksettings->base.duplex = DUPLEX_FULL; + link_ksettings->base.port = PORT_OTHER; + + link_ksettings->base.autoneg = AUTONEG_DISABLE; + +- link_ksettings->base.speed = speed; +- + return 0; + } + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c -index 269ebb53eda67..cfde0a45b8b8a 100644 +index 269ebb53eda67..10940b8dc83e2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c -@@ -67,7 +67,7 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev, +@@ -67,9 +67,13 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev, MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE : MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE; @@ -206172,8 +250364,14 @@ index 269ebb53eda67..cfde0a45b8b8a 100644 + params->packet_merge.type = MLX5E_PACKET_MERGE_NONE; params->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN; params->tunneled_offload_en = false; ++ ++ /* CQE compression is not supported for IPoIB */ ++ params->rx_cqe_compress_def = false; ++ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def); } -@@ -353,7 +353,6 @@ static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv) + + /* Called directly after IPoIB netdevice was created to initialize SW structs */ +@@ -353,7 +357,6 @@ static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv) static int mlx5i_init_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; @@ -206181,7 +250379,7 @@ index 269ebb53eda67..cfde0a45b8b8a 100644 int err; priv->rx_res = mlx5e_rx_res_alloc(); -@@ -368,9 +367,9 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) +@@ -368,9 +371,9 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) goto err_destroy_q_counters; } @@ -206657,6 +250855,19 @@ index dea199e79beda..b3a7f18b9e303 100644 struct workqueue_struct *wq; }; +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +index 91e806c1aa211..8490c0cf80a8c 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +@@ -599,7 +599,7 @@ static int mlx5_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin, + static const struct ptp_clock_info mlx5_ptp_clock_info = { + .owner = THIS_MODULE, + .name = "mlx5_ptp", +- .max_adj = 100000000, ++ .max_adj = 50000000, + .n_alarm = 0, + .n_ext_ts = 0, + .n_per_out = 0, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c index 97e5845b4cfdd..df58cba37930a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -206681,8 +250892,39 @@ index 97e5845b4cfdd..df58cba37930a 100644 struct mlx5_fs_chains *chains = chain->chains; enum mlx5e_tc_attr_to_reg chain_to_reg; struct mlx5_modify_hdr *mod_hdr; +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c +index 839a01da110f3..8ff16318e32dc 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c +@@ -122,7 +122,7 @@ void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev) + { + struct mlx5_mpfs *mpfs = dev->priv.mpfs; + +- if (!MLX5_ESWITCH_MANAGER(dev)) ++ if (!mpfs) + return; + + WARN_ON(!hlist_empty(mpfs->hash)); +@@ -137,7 +137,7 @@ int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) + int err = 0; + u32 index; + +- if (!MLX5_ESWITCH_MANAGER(dev)) ++ if (!mpfs) + return 0; + + mutex_lock(&mpfs->lock); +@@ -185,7 +185,7 @@ int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) + int err = 0; + u32 index; + +- if (!MLX5_ESWITCH_MANAGER(dev)) ++ if (!mpfs) + return 0; + + mutex_lock(&mpfs->lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c -index 79482824c64ff..740065e21181d 100644 +index 79482824c64ff..485a6a6220f6a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -97,6 +97,8 @@ enum { @@ -206714,7 +250956,16 @@ index 79482824c64ff..740065e21181d 100644 mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n", prof->log_max_qp, MLX5_CAP_GEN_MAX(dev, log_max_qp)); -@@ -1423,7 +1427,9 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) +@@ -924,6 +928,8 @@ err_rl_cleanup: + err_tables_cleanup: + mlx5_geneve_destroy(dev->geneve); + mlx5_vxlan_destroy(dev->vxlan); ++ mlx5_cleanup_clock(dev); ++ mlx5_cleanup_reserved_gids(dev); + mlx5_cq_debugfs_cleanup(dev); + mlx5_fw_reset_cleanup(dev); + err_events_cleanup: +@@ -1423,7 +1429,9 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) memcpy(&dev->profile, &profile[profile_idx], sizeof(dev->profile)); INIT_LIST_HEAD(&priv->ctx_list); spin_lock_init(&priv->ctx_lock); @@ -206724,7 +250975,7 @@ index 79482824c64ff..740065e21181d 100644 mutex_init(&priv->bfregs.reg_head.lock); mutex_init(&priv->bfregs.wc_head.lock); -@@ -1470,6 +1476,7 @@ err_health_init: +@@ -1470,6 +1478,7 @@ err_health_init: mutex_destroy(&priv->bfregs.wc_head.lock); mutex_destroy(&priv->bfregs.reg_head.lock); mutex_destroy(&dev->intf_state_mutex); @@ -206732,7 +250983,7 @@ index 79482824c64ff..740065e21181d 100644 return err; } -@@ -1487,6 +1494,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev) +@@ -1487,6 +1496,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev) mutex_destroy(&priv->bfregs.wc_head.lock); mutex_destroy(&priv->bfregs.reg_head.lock); mutex_destroy(&dev->intf_state_mutex); @@ -206740,7 +250991,117 @@ index 79482824c64ff..740065e21181d 100644 } static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id) -@@ -1756,10 +1764,12 @@ static const struct pci_device_id mlx5_core_pci_table[] = { +@@ -1568,12 +1578,28 @@ static void remove_one(struct pci_dev *pdev) + mlx5_devlink_free(devlink); + } + ++#define mlx5_pci_trace(dev, fmt, ...) ({ \ ++ struct mlx5_core_dev *__dev = (dev); \ ++ mlx5_core_info(__dev, "%s Device state = %d health sensors: %d pci_status: %d. " fmt, \ ++ __func__, __dev->state, mlx5_health_check_fatal_sensors(__dev), \ ++ __dev->pci_status, ##__VA_ARGS__); \ ++}) ++ ++static const char *result2str(enum pci_ers_result result) ++{ ++ return result == PCI_ERS_RESULT_NEED_RESET ? "need reset" : ++ result == PCI_ERS_RESULT_DISCONNECT ? "disconnect" : ++ result == PCI_ERS_RESULT_RECOVERED ? "recovered" : ++ "unknown"; ++} ++ + static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, + pci_channel_state_t state) + { + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); ++ enum pci_ers_result res; + +- mlx5_core_info(dev, "%s was called\n", __func__); ++ mlx5_pci_trace(dev, "Enter, pci channel state = %d\n", state); + + mlx5_enter_error_state(dev, false); + mlx5_error_sw_reset(dev); +@@ -1581,8 +1607,12 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, + mlx5_drain_health_wq(dev); + mlx5_pci_disable_device(dev); + +- return state == pci_channel_io_perm_failure ? ++ res = state == pci_channel_io_perm_failure ? + PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; ++ ++ mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Exit, result = %d, %s\n", ++ __func__, dev->state, dev->pci_status, res, result2str(res)); ++ return res; + } + + /* wait for the device to show vital signs by waiting +@@ -1616,28 +1646,36 @@ static int wait_vital(struct pci_dev *pdev) + + static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) + { ++ enum pci_ers_result res = PCI_ERS_RESULT_DISCONNECT; + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + int err; + +- mlx5_core_info(dev, "%s was called\n", __func__); ++ mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Enter\n", ++ __func__, dev->state, dev->pci_status); + + err = mlx5_pci_enable_device(dev); + if (err) { + mlx5_core_err(dev, "%s: mlx5_pci_enable_device failed with error code: %d\n", + __func__, err); +- return PCI_ERS_RESULT_DISCONNECT; ++ goto out; + } + + pci_set_master(pdev); + pci_restore_state(pdev); + pci_save_state(pdev); + +- if (wait_vital(pdev)) { +- mlx5_core_err(dev, "%s: wait_vital timed out\n", __func__); +- return PCI_ERS_RESULT_DISCONNECT; ++ err = wait_vital(pdev); ++ if (err) { ++ mlx5_core_err(dev, "%s: wait vital failed with error code: %d\n", ++ __func__, err); ++ goto out; + } + +- return PCI_ERS_RESULT_RECOVERED; ++ res = PCI_ERS_RESULT_RECOVERED; ++out: ++ mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Exit, err = %d, result = %d, %s\n", ++ __func__, dev->state, dev->pci_status, err, res, result2str(res)); ++ return res; + } + + static void mlx5_pci_resume(struct pci_dev *pdev) +@@ -1645,14 +1683,16 @@ static void mlx5_pci_resume(struct pci_dev *pdev) + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + int err; + +- mlx5_core_info(dev, "%s was called\n", __func__); ++ mlx5_pci_trace(dev, "Enter, loading driver..\n"); + + err = mlx5_load_one(dev); +- if (err) +- mlx5_core_err(dev, "%s: mlx5_load_one failed with error code: %d\n", +- __func__, err); +- else +- mlx5_core_info(dev, "%s: device recovered\n", __func__); ++ ++ if (!err) ++ devlink_health_reporter_state_update(dev->priv.health.fw_fatal_reporter, ++ DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); ++ ++ mlx5_pci_trace(dev, "Done, err = %d, device %s\n", err, ++ !err ? "recovered" : "Failed"); + } + + static const struct pci_error_handlers mlx5_err_handler = { +@@ -1756,10 +1796,12 @@ static const struct pci_device_id mlx5_core_pci_table[] = { { PCI_VDEVICE(MELLANOX, 0x101e), MLX5_PCI_DEV_IS_VF}, /* ConnectX Family mlx5Gen Virtual Function */ { PCI_VDEVICE(MELLANOX, 0x101f) }, /* ConnectX-6 LX */ { PCI_VDEVICE(MELLANOX, 0x1021) }, /* ConnectX-7 */ @@ -206753,7 +251114,7 @@ index 79482824c64ff..740065e21181d 100644 { 0, } }; -@@ -1773,12 +1783,13 @@ void mlx5_disable_device(struct mlx5_core_dev *dev) +@@ -1773,12 +1815,13 @@ void mlx5_disable_device(struct mlx5_core_dev *dev) int mlx5_recover_device(struct mlx5_core_dev *dev) { @@ -206772,6 +251133,62 @@ index 79482824c64ff..740065e21181d 100644 } static struct pci_driver mlx5_core_driver = { +@@ -1806,7 +1849,7 @@ static void mlx5_core_verify_params(void) + } + } + +-static int __init init(void) ++static int __init mlx5_init(void) + { + int err; + +@@ -1819,7 +1862,7 @@ static int __init init(void) + mlx5_fpga_ipsec_build_fs_cmds(); + mlx5_register_debugfs(); + +- err = pci_register_driver(&mlx5_core_driver); ++ err = mlx5e_init(); + if (err) + goto err_debug; + +@@ -1827,28 +1870,28 @@ static int __init init(void) + if (err) + goto err_sf; + +- err = mlx5e_init(); ++ err = pci_register_driver(&mlx5_core_driver); + if (err) +- goto err_en; ++ goto err_pci; + + return 0; + +-err_en: ++err_pci: + mlx5_sf_driver_unregister(); + err_sf: +- pci_unregister_driver(&mlx5_core_driver); ++ mlx5e_cleanup(); + err_debug: + mlx5_unregister_debugfs(); + return err; + } + +-static void __exit cleanup(void) ++static void __exit mlx5_cleanup(void) + { +- mlx5e_cleanup(); +- mlx5_sf_driver_unregister(); + pci_unregister_driver(&mlx5_core_driver); ++ mlx5_sf_driver_unregister(); ++ mlx5e_cleanup(); + mlx5_unregister_debugfs(); + } + +-module_init(init); +-module_exit(cleanup); ++module_init(mlx5_init); ++module_exit(mlx5_cleanup); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 230eab7e3bc91..3f3ea8d268ce4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -207147,7 +251564,7 @@ index 66c24767e3b00..8ad8d73e17f06 100644 static int dr_icm_pool_sync_all_buddy_pools(struct mlx5dr_icm_pool *pool) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c -index b5409cc021d33..a19e8157c1002 100644 +index b5409cc021d33..0f99d3612f89d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c @@ -13,18 +13,6 @@ static bool dr_mask_is_dmac_set(struct mlx5dr_match_spec *spec) @@ -207197,6 +251614,84 @@ index b5409cc021d33..a19e8157c1002 100644 mlx5dr_ste_build_eth_l3_ipv6_src(ste_ctx, &sb[idx++], &mask, inner, rx); +@@ -721,7 +709,7 @@ static int dr_matcher_add_to_tbl(struct mlx5dr_matcher *matcher) + int ret; + + next_matcher = NULL; +- list_for_each_entry(tmp_matcher, &tbl->matcher_list, matcher_list) { ++ list_for_each_entry(tmp_matcher, &tbl->matcher_list, list_node) { + if (tmp_matcher->prio >= matcher->prio) { + next_matcher = tmp_matcher; + break; +@@ -731,11 +719,11 @@ static int dr_matcher_add_to_tbl(struct mlx5dr_matcher *matcher) + + prev_matcher = NULL; + if (next_matcher && !first) +- prev_matcher = list_prev_entry(next_matcher, matcher_list); ++ prev_matcher = list_prev_entry(next_matcher, list_node); + else if (!first) + prev_matcher = list_last_entry(&tbl->matcher_list, + struct mlx5dr_matcher, +- matcher_list); ++ list_node); + + if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB || + dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX) { +@@ -756,12 +744,12 @@ static int dr_matcher_add_to_tbl(struct mlx5dr_matcher *matcher) + } + + if (prev_matcher) +- list_add(&matcher->matcher_list, &prev_matcher->matcher_list); ++ list_add(&matcher->list_node, &prev_matcher->list_node); + else if (next_matcher) +- list_add_tail(&matcher->matcher_list, +- &next_matcher->matcher_list); ++ list_add_tail(&matcher->list_node, ++ &next_matcher->list_node); + else +- list_add(&matcher->matcher_list, &tbl->matcher_list); ++ list_add(&matcher->list_node, &tbl->matcher_list); + + return 0; + } +@@ -934,7 +922,7 @@ mlx5dr_matcher_create(struct mlx5dr_table *tbl, + matcher->prio = priority; + matcher->match_criteria = match_criteria_enable; + refcount_set(&matcher->refcount, 1); +- INIT_LIST_HEAD(&matcher->matcher_list); ++ INIT_LIST_HEAD(&matcher->list_node); + + mlx5dr_domain_lock(tbl->dmn); + +@@ -997,15 +985,15 @@ static int dr_matcher_remove_from_tbl(struct mlx5dr_matcher *matcher) + struct mlx5dr_domain *dmn = tbl->dmn; + int ret = 0; + +- if (list_is_last(&matcher->matcher_list, &tbl->matcher_list)) ++ if (list_is_last(&matcher->list_node, &tbl->matcher_list)) + next_matcher = NULL; + else +- next_matcher = list_next_entry(matcher, matcher_list); ++ next_matcher = list_next_entry(matcher, list_node); + +- if (matcher->matcher_list.prev == &tbl->matcher_list) ++ if (matcher->list_node.prev == &tbl->matcher_list) + prev_matcher = NULL; + else +- prev_matcher = list_prev_entry(matcher, matcher_list); ++ prev_matcher = list_prev_entry(matcher, list_node); + + if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB || + dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX) { +@@ -1025,7 +1013,7 @@ static int dr_matcher_remove_from_tbl(struct mlx5dr_matcher *matcher) + return ret; + } + +- list_del(&matcher->matcher_list); ++ list_del(&matcher->list_node); + + return 0; + } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c index 1cdfe4fccc7a9..01246a1ae7d13 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c @@ -207252,8 +251747,40 @@ index 1cdfe4fccc7a9..01246a1ae7d13 100644 return 0; } +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c +index 30ae3cda6d2e0..0c7b57bf01d0d 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c +@@ -9,7 +9,7 @@ int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, + struct mlx5dr_matcher *last_matcher = NULL; + struct mlx5dr_htbl_connect_info info; + struct mlx5dr_ste_htbl *last_htbl; +- int ret; ++ int ret = -EOPNOTSUPP; + + if (action && action->action_type != DR_ACTION_TYP_FT) + return -EOPNOTSUPP; +@@ -19,7 +19,7 @@ int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, + if (!list_empty(&tbl->matcher_list)) + last_matcher = list_last_entry(&tbl->matcher_list, + struct mlx5dr_matcher, +- matcher_list); ++ list_node); + + if (tbl->dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX || + tbl->dmn->type == MLX5DR_DOMAIN_TYPE_FDB) { +@@ -68,6 +68,9 @@ int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, + } + } + ++ if (ret) ++ goto out; ++ + /* Release old action */ + if (tbl->miss_action) + refcount_dec(&tbl->miss_action->refcount); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h -index b20e8aabb861b..bc206836af6ac 100644 +index b20e8aabb861b..9e2102f8bed1c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -740,6 +740,16 @@ struct mlx5dr_match_param { @@ -207273,6 +251800,15 @@ index b20e8aabb861b..bc206836af6ac 100644 struct mlx5dr_esw_caps { u64 drop_icm_address_rx; u64 drop_icm_address_tx; +@@ -881,7 +891,7 @@ struct mlx5dr_matcher { + struct mlx5dr_table *tbl; + struct mlx5dr_matcher_rx_tx rx; + struct mlx5dr_matcher_rx_tx tx; +- struct list_head matcher_list; ++ struct list_head list_node; + u32 prio; + struct mlx5dr_match_param mask; + u8 match_criteria; @@ -1384,7 +1394,8 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn, bool reformat_req, u32 *tbl_id, @@ -207838,6 +252374,41 @@ index 2e8fcce50f9d1..c6f517c07bb9a 100644 return ks8851_probe_common(netdev, dev, msg_enable); } +diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c +index a0ee155f9f516..f56bcd3e36d21 100644 +--- a/drivers/net/ethernet/micrel/ksz884x.c ++++ b/drivers/net/ethernet/micrel/ksz884x.c +@@ -6848,7 +6848,7 @@ static int pcidev_init(struct pci_dev *pdev, const struct pci_device_id *id) + char banner[sizeof(version)]; + struct ksz_switch *sw = NULL; + +- result = pci_enable_device(pdev); ++ result = pcim_enable_device(pdev); + if (result) + return result; + +diff --git a/drivers/net/ethernet/microchip/encx24j600-regmap.c b/drivers/net/ethernet/microchip/encx24j600-regmap.c +index 81a8ccca7e5e0..5693784eec5bc 100644 +--- a/drivers/net/ethernet/microchip/encx24j600-regmap.c ++++ b/drivers/net/ethernet/microchip/encx24j600-regmap.c +@@ -359,7 +359,7 @@ static int regmap_encx24j600_phy_reg_read(void *context, unsigned int reg, + goto err_out; + + usleep_range(26, 100); +- while ((ret = regmap_read(ctx->regmap, MISTAT, &mistat) != 0) && ++ while (((ret = regmap_read(ctx->regmap, MISTAT, &mistat)) == 0) && + (mistat & BUSY)) + cpu_relax(); + +@@ -397,7 +397,7 @@ static int regmap_encx24j600_phy_reg_write(void *context, unsigned int reg, + goto err_out; + + usleep_range(26, 100); +- while ((ret = regmap_read(ctx->regmap, MISTAT, &mistat) != 0) && ++ while (((ret = regmap_read(ctx->regmap, MISTAT, &mistat)) == 0) && + (mistat & BUSY)) + cpu_relax(); + diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 4d5a5d6595b3b..d64ce65a3c174 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c @@ -207895,7 +252466,7 @@ index 7bdbb2d09a148..cc5e48e1bb4c3 100644 select PHY_SPARX5_SERDES select RESET_CONTROLLER diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c -index 59783fc46a7b9..10b866e9f7266 100644 +index 59783fc46a7b9..060274caa4d0e 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c @@ -1103,7 +1103,7 @@ void sparx5_get_stats64(struct net_device *ndev, @@ -207907,6 +252478,16 @@ index 59783fc46a7b9..10b866e9f7266 100644 stats->rx_dropped += portstats[spx5_stats_green_p0_rx_port_drop + idx]; stats->tx_dropped = portstats[spx5_stats_tx_local_drop]; +@@ -1219,6 +1219,9 @@ int sparx_stats_init(struct sparx5 *sparx5) + snprintf(queue_name, sizeof(queue_name), "%s-stats", + dev_name(sparx5->dev)); + sparx5->stats_queue = create_singlethread_workqueue(queue_name); ++ if (!sparx5->stats_queue) ++ return -ENOMEM; ++ + INIT_DELAYED_WORK(&sparx5->stats_work, sparx5_check_stats_work); + queue_delayed_work(sparx5->stats_queue, &sparx5->stats_work, + SPX5_STATS_CHECK_DELAY); diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c b/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c index 7436f62fa1525..174ad95e746a3 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c @@ -207920,6 +252501,81 @@ index 7436f62fa1525..174ad95e746a3 100644 db->cpu_addr = cpu_addr; list_add_tail(&db->list, &tx->db_list); } +diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +index 5030dfca38798..174d89ee63749 100644 +--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c ++++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +@@ -629,6 +629,9 @@ static int sparx5_start(struct sparx5 *sparx5) + snprintf(queue_name, sizeof(queue_name), "%s-mact", + dev_name(sparx5->dev)); + sparx5->mact_queue = create_singlethread_workqueue(queue_name); ++ if (!sparx5->mact_queue) ++ return -ENOMEM; ++ + INIT_DELAYED_WORK(&sparx5->mact_work, sparx5_mact_pull_work); + queue_delayed_work(sparx5->mact_queue, &sparx5->mact_work, + SPX5_MACT_PULL_DELAY); +@@ -776,7 +779,7 @@ static int mchp_sparx5_probe(struct platform_device *pdev) + if (err) + goto cleanup_config; + +- if (!of_get_mac_address(np, sparx5->base_mac)) { ++ if (of_get_mac_address(np, sparx5->base_mac)) { + dev_info(sparx5->dev, "MAC addr was not set, use random MAC\n"); + eth_random_addr(sparx5->base_mac); + sparx5->base_mac[5] = 0; +@@ -826,6 +829,8 @@ static int mchp_sparx5_probe(struct platform_device *pdev) + + cleanup_ports: + sparx5_cleanup_ports(sparx5); ++ if (sparx5->mact_queue) ++ destroy_workqueue(sparx5->mact_queue); + cleanup_config: + kfree(configs); + cleanup_pnode: +@@ -849,6 +854,7 @@ static int mchp_sparx5_remove(struct platform_device *pdev) + sparx5_cleanup_ports(sparx5); + /* Unregister netdevs */ + sparx5_unregister_notifier_blocks(sparx5); ++ destroy_workqueue(sparx5->mact_queue); + + return 0; + } +diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c +index cb68eaaac8811..5c7b21ce64edb 100644 +--- a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c ++++ b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c +@@ -83,7 +83,7 @@ static int sparx5_port_open(struct net_device *ndev) + err = phylink_of_phy_connect(port->phylink, port->of_node, 0); + if (err) { + netdev_err(ndev, "Could not attach to PHY\n"); +- return err; ++ goto err_connect; + } + + phylink_start(port->phylink); +@@ -95,10 +95,20 @@ static int sparx5_port_open(struct net_device *ndev) + err = sparx5_serdes_set(port->sparx5, port, &port->conf); + else + err = phy_power_on(port->serdes); +- if (err) ++ if (err) { + netdev_err(ndev, "%s failed\n", __func__); ++ goto out_power; ++ } + } + ++ return 0; ++ ++out_power: ++ phylink_stop(port->phylink); ++ phylink_disconnect_phy(port->phylink); ++err_connect: ++ sparx5_port_enable(port, false); ++ + return err; + } + diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c index dc7e5ea6ec158..c460168131c26 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c @@ -207982,8 +252638,41 @@ index 4ce490a25f332..8e56ffa1c4f7a 100644 sparx5_vlan_port_apply(sparx5, port); return 0; +diff --git a/drivers/net/ethernet/microsoft/mana/gdma.h b/drivers/net/ethernet/microsoft/mana/gdma.h +index 41ecd156e95f5..f74f416a296f6 100644 +--- a/drivers/net/ethernet/microsoft/mana/gdma.h ++++ b/drivers/net/ethernet/microsoft/mana/gdma.h +@@ -324,9 +324,12 @@ struct gdma_queue_spec { + }; + }; + ++#define MANA_IRQ_NAME_SZ 32 ++ + struct gdma_irq_context { + void (*handler)(void *arg); + void *arg; ++ char name[MANA_IRQ_NAME_SZ]; + }; + + struct gdma_context { +@@ -488,7 +491,14 @@ enum { + + #define GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT BIT(0) + +-#define GDMA_DRV_CAP_FLAGS1 GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT ++/* Advertise to the NIC firmware: the NAPI work_done variable race is fixed, ++ * so the driver is able to reliably support features like busy_poll. ++ */ ++#define GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX BIT(2) ++ ++#define GDMA_DRV_CAP_FLAGS1 \ ++ (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \ ++ GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX) + + #define GDMA_DRV_CAP_FLAGS2 0 + diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c -index cee75b561f59d..f577507f522b7 100644 +index cee75b561f59d..0fb42193643dc 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -368,6 +368,11 @@ static void mana_gd_process_eq_events(void *arg) @@ -208010,8 +252699,30 @@ index cee75b561f59d..f577507f522b7 100644 comp->wq_num = cqe->cqe_info.wq_num; comp->is_sq = cqe->cqe_info.is_sq; memcpy(comp->cqe_data, cqe->cqe_data, GDMA_COMP_DATA_SIZE); +@@ -1185,13 +1195,20 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev) + gic->handler = NULL; + gic->arg = NULL; + ++ if (!i) ++ snprintf(gic->name, MANA_IRQ_NAME_SZ, "mana_hwc@pci:%s", ++ pci_name(pdev)); ++ else ++ snprintf(gic->name, MANA_IRQ_NAME_SZ, "mana_q%d@pci:%s", ++ i - 1, pci_name(pdev)); ++ + irq = pci_irq_vector(pdev, i); + if (irq < 0) { + err = irq; + goto free_irq; + } + +- err = request_irq(irq, mana_gd_intr, 0, "mana_intr", gic); ++ err = request_irq(irq, mana_gd_intr, 0, gic->name, gic); + if (err) + goto free_irq; + } diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c -index 030ae89f3a337..18dc64d7f412f 100644 +index 030ae89f3a337..4b8c239932178 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -980,8 +980,10 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, @@ -208035,6 +252746,55 @@ index 030ae89f3a337..18dc64d7f412f 100644 mana_move_wq_tail(rxq->gdma_rq, rxbuf_oob->wqe_inf.wqe_size_in_bu); mana_post_pkt_rxq(rxq); +@@ -1068,10 +1071,11 @@ static void mana_poll_rx_cq(struct mana_cq *cq) + } + } + +-static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue) ++static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue) + { + struct mana_cq *cq = context; + u8 arm_bit; ++ int w; + + WARN_ON_ONCE(cq->gdma_cq != gdma_queue); + +@@ -1080,26 +1084,31 @@ static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue) + else + mana_poll_tx_cq(cq); + +- if (cq->work_done < cq->budget && +- napi_complete_done(&cq->napi, cq->work_done)) { ++ w = cq->work_done; ++ ++ if (w < cq->budget && ++ napi_complete_done(&cq->napi, w)) { + arm_bit = SET_ARM_BIT; + } else { + arm_bit = 0; + } + + mana_gd_ring_cq(gdma_queue, arm_bit); ++ ++ return w; + } + + static int mana_poll(struct napi_struct *napi, int budget) + { + struct mana_cq *cq = container_of(napi, struct mana_cq, napi); ++ int w; + + cq->work_done = 0; + cq->budget = budget; + +- mana_cq_handler(cq, cq->gdma_cq); ++ w = mana_cq_handler(cq, cq->gdma_cq); + +- return min(cq->work_done, budget); ++ return min(w, budget); + } + + static void mana_schedule_napi(void *context, struct gdma_queue *gdma_queue) diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c index 49def6934cad1..fa4c596e6ec6f 100644 --- a/drivers/net/ethernet/moxa/moxart_ether.c @@ -208339,7 +253099,7 @@ index a08e4f530c1c1..96b1e394a397f 100644 /* Allow manual injection via DEVCPU_QS registers, and byte swap these * registers endianness. diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c -index 8b843d3c9189a..a3a5ad5dbb0e0 100644 +index 8b843d3c9189a..b7e7bd744a1b8 100644 --- a/drivers/net/ethernet/mscc/ocelot_flower.c +++ b/drivers/net/ethernet/mscc/ocelot_flower.c @@ -54,6 +54,12 @@ static int ocelot_chain_to_block(int chain, bool ingress) @@ -208385,7 +253145,26 @@ index 8b843d3c9189a..a3a5ad5dbb0e0 100644 return -EOPNOTSUPP; } if (filter->goto_target != -1) { -@@ -467,13 +482,6 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress, +@@ -458,6 +473,18 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress, + flow_rule_match_control(rule, &match); + } + ++ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { ++ struct flow_match_vlan match; ++ ++ flow_rule_match_vlan(rule, &match); ++ filter->key_type = OCELOT_VCAP_KEY_ANY; ++ filter->vlan.vid.value = match.key->vlan_id; ++ filter->vlan.vid.mask = match.mask->vlan_id; ++ filter->vlan.pcp.value[0] = match.key->vlan_priority; ++ filter->vlan.pcp.mask[0] = match.mask->vlan_priority; ++ match_protocol = false; ++ } ++ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_match_eth_addrs match; + +@@ -467,13 +494,6 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress, return -EOPNOTSUPP; } @@ -208399,7 +253178,7 @@ index 8b843d3c9189a..a3a5ad5dbb0e0 100644 /* The hw support mac matches only for MAC_ETYPE key, * therefore if other matches(port, tcp flags, etc) are added * then just bail out -@@ -488,6 +496,14 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress, +@@ -488,6 +508,14 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress, return -EOPNOTSUPP; flow_rule_match_eth_addrs(rule, &match); @@ -208414,6 +253193,25 @@ index 8b843d3c9189a..a3a5ad5dbb0e0 100644 filter->key_type = OCELOT_VCAP_KEY_ETYPE; ether_addr_copy(filter->key.etype.dmac.value, match.key->dst); +@@ -589,18 +617,6 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress, + match_protocol = false; + } + +- if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { +- struct flow_match_vlan match; +- +- flow_rule_match_vlan(rule, &match); +- filter->key_type = OCELOT_VCAP_KEY_ANY; +- filter->vlan.vid.value = match.key->vlan_id; +- filter->vlan.vid.mask = match.mask->vlan_id; +- filter->vlan.pcp.value[0] = match.key->vlan_priority; +- filter->vlan.pcp.mask[0] = match.mask->vlan_priority; +- match_protocol = false; +- } +- + finished_key_parsing: + if (match_protocol && proto != ETH_P_ALL) { + if (filter->block_id == VCAP_ES0) { diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index 2545727fd5b2f..c08c56e07b1d3 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c @@ -208488,7 +253286,7 @@ index 99d7376a70a74..732a4ef22518d 100644 } diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c -index c1a75b08ced7e..052696ce50963 100644 +index c1a75b08ced7e..97c2604df019a 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -2900,11 +2900,9 @@ static netdev_tx_t myri10ge_sw_tso(struct sk_buff *skb, @@ -208505,6 +253303,14 @@ index c1a75b08ced7e..052696ce50963 100644 } goto drop; } +@@ -3925,6 +3923,7 @@ abort_with_slices: + myri10ge_free_slices(mgp); + + abort_with_firmware: ++ kfree(mgp->msix_vectors); + myri10ge_dummy_rdma(mgp, 0); + + abort_with_ioremap: diff --git a/drivers/net/ethernet/natsemi/xtsonic.c b/drivers/net/ethernet/natsemi/xtsonic.c index ca4686094701c..0a02d8bd0a3e5 100644 --- a/drivers/net/ethernet/natsemi/xtsonic.c @@ -208518,6 +253324,75 @@ index ca4686094701c..0a02d8bd0a3e5 100644 { unsigned int silicon_revision; struct sonic_local *lp = netdev_priv(dev); +diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c +index 3b6b2e61139e6..65ccdbe665e5c 100644 +--- a/drivers/net/ethernet/neterion/s2io.c ++++ b/drivers/net/ethernet/neterion/s2io.c +@@ -2386,7 +2386,7 @@ static void free_tx_buffers(struct s2io_nic *nic) + skb = s2io_txdl_getskb(&mac_control->fifos[i], txdp, j); + if (skb) { + swstats->mem_freed += skb->truesize; +- dev_kfree_skb(skb); ++ dev_kfree_skb_irq(skb); + cnt++; + } + } +@@ -7125,9 +7125,8 @@ static int s2io_card_up(struct s2io_nic *sp) + if (ret) { + DBG_PRINT(ERR_DBG, "%s: Out of memory in Open\n", + dev->name); +- s2io_reset(sp); +- free_rx_buffers(sp); +- return -ENOMEM; ++ ret = -ENOMEM; ++ goto err_fill_buff; + } + DBG_PRINT(INFO_DBG, "Buf in ring:%d is %d:\n", i, + ring->rx_bufs_left); +@@ -7165,18 +7164,16 @@ static int s2io_card_up(struct s2io_nic *sp) + /* Enable Rx Traffic and interrupts on the NIC */ + if (start_nic(sp)) { + DBG_PRINT(ERR_DBG, "%s: Starting NIC failed\n", dev->name); +- s2io_reset(sp); +- free_rx_buffers(sp); +- return -ENODEV; ++ ret = -ENODEV; ++ goto err_out; + } + + /* Add interrupt service routine */ + if (s2io_add_isr(sp) != 0) { + if (sp->config.intr_type == MSI_X) + s2io_rem_isr(sp); +- s2io_reset(sp); +- free_rx_buffers(sp); +- return -ENODEV; ++ ret = -ENODEV; ++ goto err_out; + } + + timer_setup(&sp->alarm_timer, s2io_alarm_handle, 0); +@@ -7196,6 +7193,20 @@ static int s2io_card_up(struct s2io_nic *sp) + } + + return 0; ++ ++err_out: ++ if (config->napi) { ++ if (config->intr_type == MSI_X) { ++ for (i = 0; i < sp->config.rx_ring_num; i++) ++ napi_disable(&sp->mac_control.rings[i].napi); ++ } else { ++ napi_disable(&sp->napi); ++ } ++ } ++err_fill_buff: ++ s2io_reset(sp); ++ free_rx_buffers(sp); ++ return ret; + } + + /** diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 2a432de11858d..df5a6a0bf1d5d 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -208693,6 +253568,19 @@ index ab70179728f63..babd374333f34 100644 ida_idx = nfp_tunnel_get_ida_from_global_mac_idx(entry->index); ida_simple_remove(&priv->tun.mac_off_ids, ida_idx); } +diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c +index bea978df77138..1647b6b180cc5 100644 +--- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c ++++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c +@@ -363,7 +363,7 @@ int nfp_devlink_port_register(struct nfp_app *app, struct nfp_port *port) + return ret; + + attrs.split = eth_port.is_split; +- attrs.splittable = !attrs.split; ++ attrs.splittable = eth_port.port_lanes > 1 && !attrs.split; + attrs.lanes = eth_port.port_lanes; + attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; + attrs.phys.port_number = eth_port.label_port; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index df203738511bf..0b1865e9f0b59 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -208785,7 +253673,7 @@ index 5bfa22accf2c9..69ac205bbdbd0 100644 stats->tx_bytes += data[1]; stats->tx_errors += data[2]; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c -index 0685ece1f155d..62546d197bfd2 100644 +index 0685ece1f155d..d295942968f33 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -286,8 +286,6 @@ nfp_net_get_link_ksettings(struct net_device *netdev, @@ -208837,16 +253725,38 @@ index 0685ece1f155d..62546d197bfd2 100644 data += NN_RVEC_PER_Q_STATS; -@@ -1219,6 +1219,8 @@ nfp_port_get_module_info(struct net_device *netdev, +@@ -1219,6 +1219,11 @@ nfp_port_get_module_info(struct net_device *netdev, u8 data; port = nfp_port_from_netdev(netdev); ++ if (!port) ++ return -EOPNOTSUPP; ++ + /* update port state to get latest interface */ + set_bit(NFP_PORT_CHANGED, &port->flags); eth_port = nfp_port_get_eth_port(port); if (!eth_port) return -EOPNOTSUPP; -@@ -1343,7 +1345,7 @@ static int nfp_net_set_coalesce(struct net_device *netdev, +@@ -1262,15 +1267,15 @@ nfp_port_get_module_info(struct net_device *netdev, + + if (data < 0x3) { + modinfo->type = ETH_MODULE_SFF_8436; +- modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; ++ modinfo->eeprom_len = ETH_MODULE_SFF_8436_MAX_LEN; + } else { + modinfo->type = ETH_MODULE_SFF_8636; +- modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; ++ modinfo->eeprom_len = ETH_MODULE_SFF_8636_MAX_LEN; + } + break; + case NFP_INTERFACE_QSFP28: + modinfo->type = ETH_MODULE_SFF_8636; +- modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; ++ modinfo->eeprom_len = ETH_MODULE_SFF_8636_MAX_LEN; + break; + default: + netdev_err(netdev, "Unsupported module 0x%x detected\n", +@@ -1343,7 +1348,7 @@ static int nfp_net_set_coalesce(struct net_device *netdev, * ME timestamp ticks. There are 16 ME clock cycles for each timestamp * count. */ @@ -208856,7 +253766,7 @@ index 0685ece1f155d..62546d197bfd2 100644 /* Each pair of (usecs, max_frames) fields specifies that interrupts * should be coalesced until diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c -index d7ac0307797fd..34c0d2ddf9ef6 100644 +index d7ac0307797fd..a8286d0032d1e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c @@ -803,8 +803,10 @@ int nfp_cpp_area_cache_add(struct nfp_cpp *cpp, size_t size) @@ -208871,6 +253781,76 @@ index d7ac0307797fd..34c0d2ddf9ef6 100644 cache->id = 0; cache->addr = 0; +@@ -872,7 +874,6 @@ area_cache_get(struct nfp_cpp *cpp, u32 id, + } + + /* Adjust the start address to be cache size aligned */ +- cache->id = id; + cache->addr = addr & ~(u64)(cache->size - 1); + + /* Re-init to the new ID and address */ +@@ -892,6 +893,8 @@ area_cache_get(struct nfp_cpp *cpp, u32 id, + return NULL; + } + ++ cache->id = id; ++ + exit: + /* Adjust offset */ + *offset = addr - cache->addr; +diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c +index 346145d3180eb..5d0cecf80b380 100644 +--- a/drivers/net/ethernet/ni/nixge.c ++++ b/drivers/net/ethernet/ni/nixge.c +@@ -249,25 +249,26 @@ static void nixge_hw_dma_bd_release(struct net_device *ndev) + struct sk_buff *skb; + int i; + +- for (i = 0; i < RX_BD_NUM; i++) { +- phys_addr = nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i], +- phys); +- +- dma_unmap_single(ndev->dev.parent, phys_addr, +- NIXGE_MAX_JUMBO_FRAME_SIZE, +- DMA_FROM_DEVICE); +- +- skb = (struct sk_buff *)(uintptr_t) +- nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i], +- sw_id_offset); +- dev_kfree_skb(skb); +- } ++ if (priv->rx_bd_v) { ++ for (i = 0; i < RX_BD_NUM; i++) { ++ phys_addr = nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i], ++ phys); ++ ++ dma_unmap_single(ndev->dev.parent, phys_addr, ++ NIXGE_MAX_JUMBO_FRAME_SIZE, ++ DMA_FROM_DEVICE); ++ ++ skb = (struct sk_buff *)(uintptr_t) ++ nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i], ++ sw_id_offset); ++ dev_kfree_skb(skb); ++ } + +- if (priv->rx_bd_v) + dma_free_coherent(ndev->dev.parent, + sizeof(*priv->rx_bd_v) * RX_BD_NUM, + priv->rx_bd_v, + priv->rx_bd_p); ++ } + + if (priv->tx_skb) + devm_kfree(ndev->dev.parent, priv->tx_skb); +@@ -899,6 +900,7 @@ static int nixge_open(struct net_device *ndev) + err_rx_irq: + free_irq(priv->tx_irq, ndev); + err_tx_irq: ++ napi_disable(&priv->napi); + phy_stop(phy); + phy_disconnect(phy); + tasklet_kill(&priv->dma_err_tasklet); diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index c910fa2f40a4b..919140522885d 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c @@ -208894,6 +253874,44 @@ index c910fa2f40a4b..919140522885d 100644 /* Reset and initialize */ __lpc_eth_reset(pldat); +diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +index ec3e558f890ee..d555b4cc6049d 100644 +--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c ++++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +@@ -1148,6 +1148,7 @@ static void pch_gbe_tx_queue(struct pch_gbe_adapter *adapter, + buffer_info->dma = 0; + buffer_info->time_stamp = 0; + tx_ring->next_to_use = ring_num; ++ dev_kfree_skb_any(skb); + return; + } + buffer_info->mapped = true; +@@ -2464,6 +2465,7 @@ static void pch_gbe_remove(struct pci_dev *pdev) + unregister_netdev(netdev); + + pch_gbe_phy_hw_reset(&adapter->hw); ++ pci_dev_put(adapter->ptp_pdev); + + free_netdev(netdev); + } +@@ -2539,7 +2541,7 @@ static int pch_gbe_probe(struct pci_dev *pdev, + /* setup the private structure */ + ret = pch_gbe_sw_init(adapter); + if (ret) +- goto err_free_netdev; ++ goto err_put_dev; + + /* Initialize PHY */ + ret = pch_gbe_init_phy(adapter); +@@ -2597,6 +2599,8 @@ static int pch_gbe_probe(struct pci_dev *pdev, + + err_free_adapter: + pch_gbe_phy_hw_reset(&adapter->hw); ++err_put_dev: ++ pci_dev_put(adapter->ptp_pdev); + err_free_netdev: + free_netdev(netdev); + return ret; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c index 7e296fa71b368..d324c292318b3 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c @@ -208992,10 +254010,46 @@ index 8311086fb1f49..922bb6c9e01d5 100644 #endif /* _IONIC_DEV_H_ */ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c -index 7f3322ce044c7..886c997a3ad14 100644 +index 7f3322ce044c7..6fbd2a51d66ce 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c -@@ -1565,7 +1565,7 @@ static int ionic_set_nic_features(struct ionic_lif *lif, +@@ -268,6 +268,7 @@ static int ionic_qcq_enable(struct ionic_qcq *qcq) + .oper = IONIC_Q_ENABLE, + }, + }; ++ int ret; + + idev = &lif->ionic->idev; + dev = lif->ionic->dev; +@@ -275,16 +276,24 @@ static int ionic_qcq_enable(struct ionic_qcq *qcq) + dev_dbg(dev, "q_enable.index %d q_enable.qtype %d\n", + ctx.cmd.q_control.index, ctx.cmd.q_control.type); + ++ if (qcq->flags & IONIC_QCQ_F_INTR) ++ ionic_intr_clean(idev->intr_ctrl, qcq->intr.index); ++ ++ ret = ionic_adminq_post_wait(lif, &ctx); ++ if (ret) ++ return ret; ++ ++ if (qcq->napi.poll) ++ napi_enable(&qcq->napi); ++ + if (qcq->flags & IONIC_QCQ_F_INTR) { + irq_set_affinity_hint(qcq->intr.vector, + &qcq->intr.affinity_mask); +- napi_enable(&qcq->napi); +- ionic_intr_clean(idev->intr_ctrl, qcq->intr.index); + ionic_intr_mask(idev->intr_ctrl, qcq->intr.index, + IONIC_INTR_MASK_CLEAR); + } + +- return ionic_adminq_post_wait(lif, &ctx); ++ return 0; + } + + static int ionic_qcq_disable(struct ionic_qcq *qcq, bool send_to_hw) +@@ -1565,7 +1574,7 @@ static int ionic_set_nic_features(struct ionic_lif *lif, if ((old_hw_features ^ lif->hw_features) & IONIC_ETH_HW_RX_HASH) ionic_lif_rss_config(lif, lif->rss_types, NULL, NULL); @@ -209004,7 +254058,7 @@ index 7f3322ce044c7..886c997a3ad14 100644 !(vlan_flags & le64_to_cpu(ctx.comp.lif_setattr.features))) dev_info_once(lif->ionic->dev, "NIC is not supporting vlan offload, likely in SmartNIC mode\n"); -@@ -1692,8 +1692,67 @@ static int ionic_set_features(struct net_device *netdev, +@@ -1692,8 +1701,67 @@ static int ionic_set_features(struct net_device *netdev, return err; } @@ -209072,7 +254126,7 @@ index 7f3322ce044c7..886c997a3ad14 100644 struct sockaddr *addr = sa; u8 *mac; int err; -@@ -1702,6 +1761,14 @@ static int ionic_set_mac_address(struct net_device *netdev, void *sa) +@@ -1702,6 +1770,14 @@ static int ionic_set_mac_address(struct net_device *netdev, void *sa) if (ether_addr_equal(netdev->dev_addr, mac)) return 0; @@ -209087,7 +254141,7 @@ index 7f3322ce044c7..886c997a3ad14 100644 err = eth_prepare_mac_addr_change(netdev, addr); if (err) return err; -@@ -2813,11 +2880,15 @@ err_out: +@@ -2813,11 +2889,15 @@ err_out: * than the full array, but leave the qcq shells in place */ for (i = lif->nxqs; i < lif->ionic->ntxqs_per_lif; i++) { @@ -209107,7 +254161,7 @@ index 7f3322ce044c7..886c997a3ad14 100644 } if (err) -@@ -2974,11 +3045,10 @@ static void ionic_lif_handle_fw_down(struct ionic_lif *lif) +@@ -2974,11 +3054,10 @@ static void ionic_lif_handle_fw_down(struct ionic_lif *lif) netif_device_detach(lif->netdev); @@ -209120,7 +254174,7 @@ index 7f3322ce044c7..886c997a3ad14 100644 } if (netif_running(lif->netdev)) { -@@ -2989,6 +3059,8 @@ static void ionic_lif_handle_fw_down(struct ionic_lif *lif) +@@ -2989,6 +3068,8 @@ static void ionic_lif_handle_fw_down(struct ionic_lif *lif) ionic_reset(ionic); ionic_qcqs_free(lif); @@ -209129,7 +254183,7 @@ index 7f3322ce044c7..886c997a3ad14 100644 dev_info(ionic->dev, "FW Down: LIFs stopped\n"); } -@@ -3012,9 +3084,15 @@ static void ionic_lif_handle_fw_up(struct ionic_lif *lif) +@@ -3012,9 +3093,15 @@ static void ionic_lif_handle_fw_up(struct ionic_lif *lif) err = ionic_port_init(ionic); if (err) goto err_out; @@ -209146,7 +254200,7 @@ index 7f3322ce044c7..886c997a3ad14 100644 err = ionic_lif_init(lif); if (err) -@@ -3035,6 +3113,8 @@ static void ionic_lif_handle_fw_up(struct ionic_lif *lif) +@@ -3035,6 +3122,8 @@ static void ionic_lif_handle_fw_up(struct ionic_lif *lif) goto err_txrx_free; } @@ -209155,7 +254209,7 @@ index 7f3322ce044c7..886c997a3ad14 100644 clear_bit(IONIC_LIF_F_FW_RESET, lif->state); ionic_link_status_check_request(lif, CAN_SLEEP); netif_device_attach(lif->netdev); -@@ -3051,6 +3131,8 @@ err_lifs_deinit: +@@ -3051,6 +3140,8 @@ err_lifs_deinit: ionic_lif_deinit(lif); err_qcqs_free: ionic_qcqs_free(lif); @@ -209164,7 +254218,7 @@ index 7f3322ce044c7..886c997a3ad14 100644 err_out: dev_err(ionic->dev, "FW Up: LIFs restart failed - err %d\n", err); } -@@ -3215,6 +3297,7 @@ static int ionic_station_set(struct ionic_lif *lif) +@@ -3215,6 +3306,7 @@ static int ionic_station_set(struct ionic_lif *lif) .attr = IONIC_LIF_ATTR_MAC, }, }; @@ -209172,7 +254226,7 @@ index 7f3322ce044c7..886c997a3ad14 100644 struct sockaddr addr; int err; -@@ -3223,8 +3306,23 @@ static int ionic_station_set(struct ionic_lif *lif) +@@ -3223,8 +3315,23 @@ static int ionic_station_set(struct ionic_lif *lif) return err; netdev_dbg(lif->netdev, "found initial MAC addr %pM\n", ctx.comp.lif_getattr.mac); @@ -209198,7 +254252,7 @@ index 7f3322ce044c7..886c997a3ad14 100644 if (!is_zero_ether_addr(netdev->dev_addr)) { /* If the netdev mac is non-zero and doesn't match the default -@@ -3232,12 +3330,11 @@ static int ionic_station_set(struct ionic_lif *lif) +@@ -3232,12 +3339,11 @@ static int ionic_station_set(struct ionic_lif *lif) * likely here again after a fw-upgrade reset. We need to be * sure the netdev mac is in our filter list. */ @@ -209213,7 +254267,7 @@ index 7f3322ce044c7..886c997a3ad14 100644 addr.sa_family = AF_INET; err = eth_prepare_mac_addr_change(netdev, &addr); if (err) { -@@ -3283,7 +3380,7 @@ int ionic_lif_init(struct ionic_lif *lif) +@@ -3283,7 +3389,7 @@ int ionic_lif_init(struct ionic_lif *lif) return -EINVAL; } @@ -209223,7 +254277,7 @@ index 7f3322ce044c7..886c997a3ad14 100644 dev_err(dev, "Failed alloc doorbell id bitmap, aborting\n"); return -ENOMEM; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c -index 6f07bf509efed..9ede66842118f 100644 +index 6f07bf509efed..538c024afed52 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_main.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c @@ -328,10 +328,10 @@ int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx) @@ -209292,7 +254346,7 @@ index 6f07bf509efed..9ede66842118f 100644 dma_free_coherent(ionic->dev, idev->port_info_sz, idev->port_info, idev->port_info_pa); -@@ -576,9 +583,6 @@ int ionic_port_reset(struct ionic *ionic) +@@ -576,16 +583,19 @@ int ionic_port_reset(struct ionic *ionic) idev->port_info = NULL; idev->port_info_pa = 0; @@ -209302,6 +254356,107 @@ index 6f07bf509efed..9ede66842118f 100644 return err; } + static int __init ionic_init_module(void) + { ++ int ret; ++ + ionic_debugfs_create(); +- return ionic_bus_register_driver(); ++ ret = ionic_bus_register_driver(); ++ if (ret) ++ ionic_debugfs_destroy(); ++ ++ return ret; + } + + static void __exit ionic_cleanup_module(void) +diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +index 37c39581b6599..376f97b4008bb 100644 +--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c ++++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +@@ -353,16 +353,25 @@ void ionic_rx_fill(struct ionic_queue *q) + struct ionic_rxq_sg_desc *sg_desc; + struct ionic_rxq_sg_elem *sg_elem; + struct ionic_buf_info *buf_info; ++ unsigned int fill_threshold; + struct ionic_rxq_desc *desc; + unsigned int remain_len; + unsigned int frag_len; + unsigned int nfrags; ++ unsigned int n_fill; + unsigned int i, j; + unsigned int len; + ++ n_fill = ionic_q_space_avail(q); ++ ++ fill_threshold = min_t(unsigned int, IONIC_RX_FILL_THRESHOLD, ++ q->num_descs / IONIC_RX_FILL_DIV); ++ if (n_fill < fill_threshold) ++ return; ++ + len = netdev->mtu + ETH_HLEN + VLAN_HLEN; + +- for (i = ionic_q_space_avail(q); i; i--) { ++ for (i = n_fill; i; i--) { + nfrags = 0; + remain_len = len; + desc_info = &q->info[q->head_idx]; +@@ -518,7 +527,6 @@ int ionic_rx_napi(struct napi_struct *napi, int budget) + struct ionic_cq *cq = napi_to_cq(napi); + struct ionic_dev *idev; + struct ionic_lif *lif; +- u16 rx_fill_threshold; + u32 work_done = 0; + u32 flags = 0; + +@@ -528,10 +536,7 @@ int ionic_rx_napi(struct napi_struct *napi, int budget) + work_done = ionic_cq_service(cq, budget, + ionic_rx_service, NULL, NULL); + +- rx_fill_threshold = min_t(u16, IONIC_RX_FILL_THRESHOLD, +- cq->num_descs / IONIC_RX_FILL_DIV); +- if (work_done && ionic_q_space_avail(cq->bound_q) >= rx_fill_threshold) +- ionic_rx_fill(cq->bound_q); ++ ionic_rx_fill(cq->bound_q); + + if (work_done < budget && napi_complete_done(napi, work_done)) { + ionic_dim_update(qcq, IONIC_LIF_F_RX_DIM_INTR); +@@ -559,7 +564,6 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget) + struct ionic_dev *idev; + struct ionic_lif *lif; + struct ionic_cq *txcq; +- u16 rx_fill_threshold; + u32 rx_work_done = 0; + u32 tx_work_done = 0; + u32 flags = 0; +@@ -574,10 +578,7 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget) + rx_work_done = ionic_cq_service(rxcq, budget, + ionic_rx_service, NULL, NULL); + +- rx_fill_threshold = min_t(u16, IONIC_RX_FILL_THRESHOLD, +- rxcq->num_descs / IONIC_RX_FILL_DIV); +- if (rx_work_done && ionic_q_space_avail(rxcq->bound_q) >= rx_fill_threshold) +- ionic_rx_fill(rxcq->bound_q); ++ ionic_rx_fill(rxcq->bound_q); + + if (rx_work_done < budget && napi_complete_done(napi, rx_work_done)) { + ionic_dim_update(qcq, 0); +diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c +index 6ab3e60d4928c..4b4077cf2d266 100644 +--- a/drivers/net/ethernet/qlogic/qed/qed_debug.c ++++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c +@@ -1796,9 +1796,10 @@ static u32 qed_grc_dump_addr_range(struct qed_hwfn *p_hwfn, + u8 split_id) + { + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; +- u8 port_id = 0, pf_id = 0, vf_id = 0, fid = 0; ++ u8 port_id = 0, pf_id = 0, vf_id = 0; + bool read_using_dmae = false; + u32 thresh; ++ u16 fid; + + if (!dump) + return len; diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index dfaf10edfabfd..ba8c7a31cce1f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -209614,7 +254769,7 @@ index a2e4dfb5cb44e..f99b085b56a54 100644 kfree(uc_macs); } diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c -index 065e9004598ee..17f895250e041 100644 +index 065e9004598ee..d67d4e74b326d 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_fp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c @@ -747,6 +747,9 @@ qede_build_skb(struct qede_rx_queue *rxq, @@ -209627,7 +254782,28 @@ index 065e9004598ee..17f895250e041 100644 skb_reserve(skb, pad); skb_put(skb, len); -@@ -1643,6 +1646,13 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, struct net_device *ndev) +@@ -1436,6 +1439,10 @@ int qede_poll(struct napi_struct *napi, int budget) + rx_work_done = (likely(fp->type & QEDE_FASTPATH_RX) && + qede_has_rx_work(fp->rxq)) ? + qede_rx_int(fp, budget) : 0; ++ ++ if (fp->xdp_xmit & QEDE_XDP_REDIRECT) ++ xdp_do_flush(); ++ + /* Handle case where we are called by netpoll with a budget of 0 */ + if (rx_work_done < budget || !budget) { + if (!qede_poll_is_more_work(fp)) { +@@ -1455,9 +1462,6 @@ int qede_poll(struct napi_struct *napi, int budget) + qede_update_tx_producer(fp->xdp_tx); + } + +- if (fp->xdp_xmit & QEDE_XDP_REDIRECT) +- xdp_do_flush_map(); +- + return rx_work_done; + } + +@@ -1643,6 +1647,13 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, struct net_device *ndev) data_split = true; } } else { @@ -209671,10 +254847,18 @@ index 9837bdb89cd40..ee4c3bd28a934 100644 memset(&edev->stats, 0, sizeof(edev->stats)); memcpy(&edev->dev_info, &dev_info, sizeof(dev_info)); diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c -index c00ad57575eab..40d14d80f6f1f 100644 +index c00ad57575eab..29837e533cee8 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c -@@ -3478,20 +3478,19 @@ static int ql_adapter_up(struct ql3_adapter *qdev) +@@ -2469,6 +2469,7 @@ static netdev_tx_t ql3xxx_send(struct sk_buff *skb, + skb_shinfo(skb)->nr_frags); + if (tx_cb->seg_count == -1) { + netdev_err(ndev, "%s: invalid segment count!\n", __func__); ++ dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + +@@ -3478,20 +3479,19 @@ static int ql_adapter_up(struct ql3_adapter *qdev) spin_lock_irqsave(&qdev->hw_lock, hw_flags); @@ -209704,7 +254888,7 @@ index c00ad57575eab..40d14d80f6f1f 100644 spin_unlock_irqrestore(&qdev->hw_lock, hw_flags); set_bit(QL_ADAPTER_UP, &qdev->flags); -@@ -3613,7 +3612,8 @@ static void ql_reset_work(struct work_struct *work) +@@ -3613,7 +3613,8 @@ static void ql_reset_work(struct work_struct *work) qdev->mem_map_registers; unsigned long hw_flags; @@ -209715,7 +254899,7 @@ index c00ad57575eab..40d14d80f6f1f 100644 /* diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c -index d51bac7ba5afa..bd06076803295 100644 +index d51bac7ba5afa..2fd5c6fdb5003 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -1077,8 +1077,14 @@ static int qlcnic_83xx_add_rings(struct qlcnic_adapter *adapter) @@ -209735,11 +254919,58 @@ index d51bac7ba5afa..bd06076803295 100644 cmd.req.arg[1] = 0 | (num_sds << 8) | (context_id << 16); /* set up status rings, mbx 2-81 */ +@@ -2985,7 +2991,7 @@ static void qlcnic_83xx_recover_driver_lock(struct qlcnic_adapter *adapter) + QLCWRX(adapter->ahw, QLC_83XX_RECOVER_DRV_LOCK, val); + dev_info(&adapter->pdev->dev, + "%s: lock recovery initiated\n", __func__); +- msleep(QLC_83XX_DRV_LOCK_RECOVERY_DELAY); ++ mdelay(QLC_83XX_DRV_LOCK_RECOVERY_DELAY); + val = QLCRDX(adapter->ahw, QLC_83XX_RECOVER_DRV_LOCK); + id = ((val >> 2) & 0xF); + if (id == adapter->portnum) { +@@ -3021,7 +3027,7 @@ int qlcnic_83xx_lock_driver(struct qlcnic_adapter *adapter) + if (status) + break; + +- msleep(QLC_83XX_DRV_LOCK_WAIT_DELAY); ++ mdelay(QLC_83XX_DRV_LOCK_WAIT_DELAY); + i++; + + if (i == 1) +diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c +index 27dffa299ca6f..7c3cf9ad4563c 100644 +--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c ++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c +@@ -2505,7 +2505,13 @@ int qlcnic_83xx_init(struct qlcnic_adapter *adapter, int pci_using_dac) + goto disable_mbx_intr; + + qlcnic_83xx_clear_function_resources(adapter); +- qlcnic_dcb_enable(adapter->dcb); ++ ++ err = qlcnic_dcb_enable(adapter->dcb); ++ if (err) { ++ qlcnic_dcb_free(adapter->dcb); ++ goto disable_mbx_intr; ++ } ++ + qlcnic_83xx_initialize_nic(adapter, 1); + qlcnic_dcb_get_info(adapter->dcb); + diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h -index 5d79ee4370bcd..7519773eaca6e 100644 +index 5d79ee4370bcd..22afa2be85fdb 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h -@@ -51,7 +51,7 @@ static inline int qlcnic_dcb_get_hw_capability(struct qlcnic_dcb *dcb) +@@ -41,17 +41,12 @@ struct qlcnic_dcb { + unsigned long state; + }; + +-static inline void qlcnic_clear_dcb_ops(struct qlcnic_dcb *dcb) +-{ +- kfree(dcb); +-} +- + static inline int qlcnic_dcb_get_hw_capability(struct qlcnic_dcb *dcb) + { if (dcb && dcb->ops->get_hw_capability) return dcb->ops->get_hw_capability(dcb); @@ -209748,7 +254979,7 @@ index 5d79ee4370bcd..7519773eaca6e 100644 } static inline void qlcnic_dcb_free(struct qlcnic_dcb *dcb) -@@ -65,7 +65,7 @@ static inline int qlcnic_dcb_attach(struct qlcnic_dcb *dcb) +@@ -65,7 +60,7 @@ static inline int qlcnic_dcb_attach(struct qlcnic_dcb *dcb) if (dcb && dcb->ops->attach) return dcb->ops->attach(dcb); @@ -209757,7 +254988,7 @@ index 5d79ee4370bcd..7519773eaca6e 100644 } static inline int -@@ -74,7 +74,7 @@ qlcnic_dcb_query_hw_capability(struct qlcnic_dcb *dcb, char *buf) +@@ -74,7 +69,7 @@ qlcnic_dcb_query_hw_capability(struct qlcnic_dcb *dcb, char *buf) if (dcb && dcb->ops->query_hw_capability) return dcb->ops->query_hw_capability(dcb, buf); @@ -209766,7 +254997,7 @@ index 5d79ee4370bcd..7519773eaca6e 100644 } static inline void qlcnic_dcb_get_info(struct qlcnic_dcb *dcb) -@@ -89,7 +89,7 @@ qlcnic_dcb_query_cee_param(struct qlcnic_dcb *dcb, char *buf, u8 type) +@@ -89,7 +84,7 @@ qlcnic_dcb_query_cee_param(struct qlcnic_dcb *dcb, char *buf, u8 type) if (dcb && dcb->ops->query_cee_param) return dcb->ops->query_cee_param(dcb, buf, type); @@ -209775,7 +255006,7 @@ index 5d79ee4370bcd..7519773eaca6e 100644 } static inline int qlcnic_dcb_get_cee_cfg(struct qlcnic_dcb *dcb) -@@ -97,7 +97,7 @@ static inline int qlcnic_dcb_get_cee_cfg(struct qlcnic_dcb *dcb) +@@ -97,7 +92,7 @@ static inline int qlcnic_dcb_get_cee_cfg(struct qlcnic_dcb *dcb) if (dcb && dcb->ops->get_cee_cfg) return dcb->ops->get_cee_cfg(dcb); @@ -209784,6 +255015,37 @@ index 5d79ee4370bcd..7519773eaca6e 100644 } static inline void qlcnic_dcb_aen_handler(struct qlcnic_dcb *dcb, void *msg) +@@ -112,9 +107,8 @@ static inline void qlcnic_dcb_init_dcbnl_ops(struct qlcnic_dcb *dcb) + dcb->ops->init_dcbnl_ops(dcb); + } + +-static inline void qlcnic_dcb_enable(struct qlcnic_dcb *dcb) ++static inline int qlcnic_dcb_enable(struct qlcnic_dcb *dcb) + { +- if (dcb && qlcnic_dcb_attach(dcb)) +- qlcnic_clear_dcb_ops(dcb); ++ return dcb ? qlcnic_dcb_attach(dcb) : 0; + } + #endif +diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +index 75960a29f80ea..cec07d5bbe67a 100644 +--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c ++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +@@ -2616,7 +2616,13 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) + "Device does not support MSI interrupts\n"); + + if (qlcnic_82xx_check(adapter)) { +- qlcnic_dcb_enable(adapter->dcb); ++ err = qlcnic_dcb_enable(adapter->dcb); ++ if (err) { ++ qlcnic_dcb_free(adapter->dcb); ++ dev_err(&pdev->dev, "Failed to enable DCB\n"); ++ goto err_out_free_hw; ++ } ++ + qlcnic_dcb_get_info(adapter->dcb); + err = qlcnic_setup_intr(adapter); + diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h index 7160b42f51ddd..d0111cb3b40e1 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h @@ -209798,10 +255060,19 @@ index 7160b42f51ddd..d0111cb3b40e1 100644 void qlcnic_sriov_del_vlan_id(struct qlcnic_sriov *, struct qlcnic_vf_info *, u16); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c -index dd03be3fc82a9..42a44c97572ae 100644 +index dd03be3fc82a9..df9b84f6600fe 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c -@@ -432,7 +432,7 @@ static int qlcnic_sriov_set_guest_vlan_mode(struct qlcnic_adapter *adapter, +@@ -221,6 +221,8 @@ int qlcnic_sriov_init(struct qlcnic_adapter *adapter, int num_vfs) + return 0; + + qlcnic_destroy_async_wq: ++ while (i--) ++ kfree(sriov->vf_info[i].vp); + destroy_workqueue(bc->bc_async_wq); + + qlcnic_destroy_trans_wq: +@@ -432,7 +434,7 @@ static int qlcnic_sriov_set_guest_vlan_mode(struct qlcnic_adapter *adapter, struct qlcnic_cmd_args *cmd) { struct qlcnic_sriov *sriov = adapter->ahw->sriov; @@ -209810,7 +255081,7 @@ index dd03be3fc82a9..42a44c97572ae 100644 u16 *vlans; if (sriov->allowed_vlans) -@@ -443,7 +443,9 @@ static int qlcnic_sriov_set_guest_vlan_mode(struct qlcnic_adapter *adapter, +@@ -443,7 +445,9 @@ static int qlcnic_sriov_set_guest_vlan_mode(struct qlcnic_adapter *adapter, dev_info(&adapter->pdev->dev, "Number of allowed Guest VLANs = %d\n", sriov->num_allowed_vlans); @@ -209821,7 +255092,7 @@ index dd03be3fc82a9..42a44c97572ae 100644 if (!sriov->any_vlan) return 0; -@@ -2154,7 +2156,7 @@ static int qlcnic_sriov_vf_resume(struct qlcnic_adapter *adapter) +@@ -2154,7 +2158,7 @@ static int qlcnic_sriov_vf_resume(struct qlcnic_adapter *adapter) return err; } @@ -209830,7 +255101,7 @@ index dd03be3fc82a9..42a44c97572ae 100644 { struct qlcnic_sriov *sriov = adapter->ahw->sriov; struct qlcnic_vf_info *vf; -@@ -2164,7 +2166,11 @@ void qlcnic_sriov_alloc_vlans(struct qlcnic_adapter *adapter) +@@ -2164,7 +2168,11 @@ void qlcnic_sriov_alloc_vlans(struct qlcnic_adapter *adapter) vf = &sriov->vf_info[i]; vf->sriov_vlans = kcalloc(sriov->num_allowed_vlans, sizeof(*vf->sriov_vlans), GFP_KERNEL); @@ -209857,10 +255128,94 @@ index 447720b93e5ab..e90fa97c0ae6c 100644 return err; +diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c +index 01ef5efd7bc2a..5a8a6977ec9a7 100644 +--- a/drivers/net/ethernet/rdc/r6040.c ++++ b/drivers/net/ethernet/rdc/r6040.c +@@ -1159,10 +1159,12 @@ static int r6040_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) + err = register_netdev(dev); + if (err) { + dev_err(&pdev->dev, "Failed to register net device\n"); +- goto err_out_mdio_unregister; ++ goto err_out_phy_disconnect; + } + return 0; + ++err_out_phy_disconnect: ++ phy_disconnect(dev->phydev); + err_out_mdio_unregister: + mdiobus_unregister(lp->mii_bus); + err_out_mdio: +@@ -1186,6 +1188,7 @@ static void r6040_remove_one(struct pci_dev *pdev) + struct r6040_private *lp = netdev_priv(dev); + + unregister_netdev(dev); ++ phy_disconnect(dev->phydev); + mdiobus_unregister(lp->mii_bus); + mdiobus_free(lp->mii_bus); + netif_napi_del(&lp->napi); diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c -index 2918947dd57c9..2af4c76bcf027 100644 +index 2918947dd57c9..264bb3ec44a59 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c +@@ -2251,28 +2251,6 @@ static int rtl_set_mac_address(struct net_device *dev, void *p) + return 0; + } + +-static void rtl_wol_enable_rx(struct rtl8169_private *tp) +-{ +- if (tp->mac_version >= RTL_GIGA_MAC_VER_25) +- RTL_W32(tp, RxConfig, RTL_R32(tp, RxConfig) | +- AcceptBroadcast | AcceptMulticast | AcceptMyPhys); +-} +- +-static void rtl_prepare_power_down(struct rtl8169_private *tp) +-{ +- if (tp->dash_type != RTL_DASH_NONE) +- return; +- +- if (tp->mac_version == RTL_GIGA_MAC_VER_32 || +- tp->mac_version == RTL_GIGA_MAC_VER_33) +- rtl_ephy_write(tp, 0x19, 0xff64); +- +- if (device_may_wakeup(tp_to_dev(tp))) { +- phy_speed_down(tp->phydev, false); +- rtl_wol_enable_rx(tp); +- } +-} +- + static void rtl_init_rxcfg(struct rtl8169_private *tp) + { + switch (tp->mac_version) { +@@ -2492,6 +2470,28 @@ static void rtl_enable_rxdvgate(struct rtl8169_private *tp) + rtl_wait_txrx_fifo_empty(tp); + } + ++static void rtl_wol_enable_rx(struct rtl8169_private *tp) ++{ ++ if (tp->mac_version >= RTL_GIGA_MAC_VER_25) ++ RTL_W32(tp, RxConfig, RTL_R32(tp, RxConfig) | ++ AcceptBroadcast | AcceptMulticast | AcceptMyPhys); ++} ++ ++static void rtl_prepare_power_down(struct rtl8169_private *tp) ++{ ++ if (tp->dash_type != RTL_DASH_NONE) ++ return; ++ ++ if (tp->mac_version == RTL_GIGA_MAC_VER_32 || ++ tp->mac_version == RTL_GIGA_MAC_VER_33) ++ rtl_ephy_write(tp, 0x19, 0xff64); ++ ++ if (device_may_wakeup(tp_to_dev(tp))) { ++ phy_speed_down(tp->phydev, false); ++ rtl_wol_enable_rx(tp); ++ } ++} ++ + static void rtl_set_tx_config_registers(struct rtl8169_private *tp) + { + u32 val = TX_DMA_BURST << TxDMAShift | @@ -4177,7 +4177,6 @@ static void rtl8169_tso_csum_v1(struct sk_buff *skb, u32 *opts) static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp, struct sk_buff *skb, u32 *opts) @@ -209912,8 +255267,23 @@ index 2918947dd57c9..2af4c76bcf027 100644 rtl_chip_supports_csum_v2(tp)) features &= ~NETIF_F_CSUM_MASK; } +diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h +index 47c5377e4f424..a475f54a6b63c 100644 +--- a/drivers/net/ethernet/renesas/ravb.h ++++ b/drivers/net/ethernet/renesas/ravb.h +@@ -1000,8 +1000,8 @@ struct ravb_hw_info { + unsigned internal_delay:1; /* AVB-DMAC has internal delays */ + unsigned tx_counters:1; /* E-MAC has TX counters */ + unsigned multi_irqs:1; /* AVB-DMAC and E-MAC has multiple irqs */ +- unsigned no_ptp_cfg_active:1; /* AVB-DMAC does not support gPTP active in config mode */ +- unsigned ptp_cfg_active:1; /* AVB-DMAC has gPTP support active in config mode */ ++ unsigned gptp:1; /* AVB-DMAC has gPTP support */ ++ unsigned ccc_gac:1; /* AVB-DMAC has gPTP support active in config mode */ + }; + + struct ravb_private { diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c -index 0f85f2d97b18d..12420239c8ca2 100644 +index 0f85f2d97b18d..c6fe1cda7b889 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -30,8 +30,7 @@ @@ -209926,6 +255296,23 @@ index 0f85f2d97b18d..12420239c8ca2 100644 #include "ravb.h" +@@ -793,14 +792,14 @@ static void ravb_error_interrupt(struct net_device *ndev) + ravb_write(ndev, ~(EIS_QFS | EIS_RESERVED), EIS); + if (eis & EIS_QFS) { + ris2 = ravb_read(ndev, RIS2); +- ravb_write(ndev, ~(RIS2_QFF0 | RIS2_RFFF | RIS2_RESERVED), ++ ravb_write(ndev, ~(RIS2_QFF0 | RIS2_QFF1 | RIS2_RFFF | RIS2_RESERVED), + RIS2); + + /* Receive Descriptor Empty int */ + if (ris2 & RIS2_QFF0) + priv->stats[RAVB_BE].rx_over_errors++; + +- /* Receive Descriptor Empty int */ ++ /* Receive Descriptor Empty int */ + if (ris2 & RIS2_QFF1) + priv->stats[RAVB_NC].rx_over_errors++; + @@ -1116,6 +1115,8 @@ static int ravb_phy_init(struct net_device *ndev) phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_1000baseT_Half_BIT); phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_100baseT_Half_BIT); @@ -209935,6 +255322,87 @@ index 0f85f2d97b18d..12420239c8ca2 100644 phy_attached_info(phydev); return 0; +@@ -1274,7 +1275,7 @@ static int ravb_set_ringparam(struct net_device *ndev, + if (netif_running(ndev)) { + netif_device_detach(ndev); + /* Stop PTP Clock driver */ +- if (info->no_ptp_cfg_active) ++ if (info->gptp) + ravb_ptp_stop(ndev); + /* Wait for DMA stopping */ + error = ravb_stop_dma(ndev); +@@ -1306,7 +1307,7 @@ static int ravb_set_ringparam(struct net_device *ndev, + ravb_emac_init(ndev); + + /* Initialise PTP Clock driver */ +- if (info->no_ptp_cfg_active) ++ if (info->gptp) + ravb_ptp_init(ndev, priv->pdev); + + netif_device_attach(ndev); +@@ -1446,7 +1447,7 @@ static int ravb_open(struct net_device *ndev) + ravb_emac_init(ndev); + + /* Initialise PTP Clock driver */ +- if (info->no_ptp_cfg_active) ++ if (info->gptp) + ravb_ptp_init(ndev, priv->pdev); + + netif_tx_start_all_queues(ndev); +@@ -1460,7 +1461,7 @@ static int ravb_open(struct net_device *ndev) + + out_ptp_stop: + /* Stop PTP Clock driver */ +- if (info->no_ptp_cfg_active) ++ if (info->gptp) + ravb_ptp_stop(ndev); + out_free_irq_nc_tx: + if (!info->multi_irqs) +@@ -1508,7 +1509,7 @@ static void ravb_tx_timeout_work(struct work_struct *work) + netif_tx_stop_all_queues(ndev); + + /* Stop PTP Clock driver */ +- if (info->no_ptp_cfg_active) ++ if (info->gptp) + ravb_ptp_stop(ndev); + + /* Wait for DMA stopping */ +@@ -1543,7 +1544,7 @@ static void ravb_tx_timeout_work(struct work_struct *work) + + out: + /* Initialise PTP Clock driver */ +- if (info->no_ptp_cfg_active) ++ if (info->gptp) + ravb_ptp_init(ndev, priv->pdev); + + netif_tx_start_all_queues(ndev); +@@ -1752,7 +1753,7 @@ static int ravb_close(struct net_device *ndev) + ravb_write(ndev, 0, TIC); + + /* Stop PTP Clock driver */ +- if (info->no_ptp_cfg_active) ++ if (info->gptp) + ravb_ptp_stop(ndev); + + /* Set the config mode to stop the AVB-DMAC's processes */ +@@ -2018,7 +2019,7 @@ static const struct ravb_hw_info ravb_gen3_hw_info = { + .internal_delay = 1, + .tx_counters = 1, + .multi_irqs = 1, +- .ptp_cfg_active = 1, ++ .ccc_gac = 1, + }; + + static const struct ravb_hw_info ravb_gen2_hw_info = { +@@ -2037,7 +2038,7 @@ static const struct ravb_hw_info ravb_gen2_hw_info = { + .stats_len = ARRAY_SIZE(ravb_gstrings_stats), + .max_rx_len = RX_BUF_SZ + RAVB_ALIGN - 1, + .aligned_tx = 1, +- .no_ptp_cfg_active = 1, ++ .gptp = 1, + }; + + static const struct of_device_id ravb_match_table[] = { @@ -2061,8 +2062,7 @@ static int ravb_set_gti(struct net_device *ndev) if (!rate) return -EINVAL; @@ -209945,6 +255413,83 @@ index 0f85f2d97b18d..12420239c8ca2 100644 if (inc < GTI_TIV_MIN || inc > GTI_TIV_MAX) { dev_err(dev, "gti.tiv increment 0x%llx is outside the range 0x%x - 0x%x\n", +@@ -2080,7 +2080,7 @@ static void ravb_set_config_mode(struct net_device *ndev) + struct ravb_private *priv = netdev_priv(ndev); + const struct ravb_hw_info *info = priv->info; + +- if (info->no_ptp_cfg_active) { ++ if (info->gptp) { + ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG); + /* Set CSEL value */ + ravb_modify(ndev, CCC, CCC_CSEL, CCC_CSEL_HPB); +@@ -2301,7 +2301,7 @@ static int ravb_probe(struct platform_device *pdev) + INIT_LIST_HEAD(&priv->ts_skb_list); + + /* Initialise PTP Clock driver */ +- if (info->ptp_cfg_active) ++ if (info->ccc_gac) + ravb_ptp_init(ndev, pdev); + + /* Debug message level */ +@@ -2349,7 +2349,7 @@ out_dma_free: + priv->desc_bat_dma); + + /* Stop PTP Clock driver */ +- if (info->ptp_cfg_active) ++ if (info->ccc_gac) + ravb_ptp_stop(ndev); + out_disable_refclk: + clk_disable_unprepare(priv->refclk); +@@ -2369,7 +2369,7 @@ static int ravb_remove(struct platform_device *pdev) + const struct ravb_hw_info *info = priv->info; + + /* Stop PTP Clock driver */ +- if (info->ptp_cfg_active) ++ if (info->ccc_gac) + ravb_ptp_stop(ndev); + + clk_disable_unprepare(priv->refclk); +@@ -2378,11 +2378,11 @@ static int ravb_remove(struct platform_device *pdev) + priv->desc_bat_dma); + /* Set reset mode */ + ravb_write(ndev, CCC_OPC_RESET, CCC); +- pm_runtime_put_sync(&pdev->dev); + unregister_netdev(ndev); + netif_napi_del(&priv->napi[RAVB_NC]); + netif_napi_del(&priv->napi[RAVB_BE]); + ravb_mdio_release(priv); ++ pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); + reset_control_assert(priv->rstc); + free_netdev(ndev); +@@ -2446,6 +2446,9 @@ static int __maybe_unused ravb_suspend(struct device *dev) + else + ret = ravb_close(ndev); + ++ if (priv->info->ccc_gac) ++ ravb_ptp_stop(ndev); ++ + return ret; + } + +@@ -2482,6 +2485,9 @@ static int __maybe_unused ravb_resume(struct device *dev) + /* Restore descriptor base address table */ + ravb_write(ndev, priv->desc_bat_dma, DBAT); + ++ if (priv->info->ccc_gac) ++ ravb_ptp_init(ndev, priv->pdev); ++ + if (netif_running(ndev)) { + if (priv->wol_enabled) { + ret = ravb_wol_restore(ndev); +@@ -2491,6 +2497,7 @@ static int __maybe_unused ravb_resume(struct device *dev) + ret = ravb_open(ndev); + if (ret < 0) + return ret; ++ ravb_set_rx_mode(ndev); + netif_device_attach(ndev); + } + diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 1374faa229a27..4e190f5e32c3d 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c @@ -210115,6 +255660,18 @@ index e7e2223aebbf5..c316a9eb5be38 100644 rc = efx_mtd_add(efx, &parts[0].common, n_parts, sizeof(*parts)); fail: if (rc) +diff --git a/drivers/net/ethernet/sfc/ef100_netdev.c b/drivers/net/ethernet/sfc/ef100_netdev.c +index 67fe44db6b612..63a44ee763be7 100644 +--- a/drivers/net/ethernet/sfc/ef100_netdev.c ++++ b/drivers/net/ethernet/sfc/ef100_netdev.c +@@ -200,6 +200,7 @@ static netdev_tx_t ef100_hard_start_xmit(struct sk_buff *skb, + skb->len, skb->data_len, channel->channel); + if (!efx->n_channels || !efx->n_tx_channels || !channel) { + netif_stop_queue(net_dev); ++ dev_kfree_skb_any(skb); + goto err; + } + diff --git a/drivers/net/ethernet/sfc/ef100_nic.c b/drivers/net/ethernet/sfc/ef100_nic.c index 518268ce20644..d35cafd422b1c 100644 --- a/drivers/net/ethernet/sfc/ef100_nic.c @@ -210160,6 +255717,23 @@ index 752d6406f07ed..f488461a23d1c 100644 efx_ef10_sriov_free_vf_vswitching(efx); efx->vf_count = 0; +diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c +index 43ef4f5290281..b6243f03e953d 100644 +--- a/drivers/net/ethernet/sfc/efx.c ++++ b/drivers/net/ethernet/sfc/efx.c +@@ -1005,8 +1005,11 @@ static int efx_pci_probe_post_io(struct efx_nic *efx) + /* Determine netdevice features */ + net_dev->features |= (efx->type->offload_features | NETIF_F_SG | + NETIF_F_TSO | NETIF_F_RXCSUM | NETIF_F_RXALL); +- if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) ++ if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) { + net_dev->features |= NETIF_F_TSO6; ++ if (efx_has_cap(efx, TX_TSO_V2_ENCAP)) ++ net_dev->hw_enc_features |= NETIF_F_TSO6; ++ } + /* Check whether device supports TSO */ + if (!efx->type->tso_versions || !efx->type->tso_versions(efx)) + net_dev->features &= ~NETIF_F_ALL_TSO; diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c index 3dbea028b325c..450fcedb7042a 100644 --- a/drivers/net/ethernet/sfc/efx_channels.c @@ -210793,6 +256367,48 @@ index 199a973392806..63b99dd8ca51c 100644 } return 0; +diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c +index 1f46af136aa8c..f0451911ab8f6 100644 +--- a/drivers/net/ethernet/socionext/netsec.c ++++ b/drivers/net/ethernet/socionext/netsec.c +@@ -1964,11 +1964,13 @@ static int netsec_register_mdio(struct netsec_priv *priv, u32 phy_addr) + ret = PTR_ERR(priv->phydev); + dev_err(priv->dev, "get_phy_device err(%d)\n", ret); + priv->phydev = NULL; ++ mdiobus_unregister(bus); + return -ENODEV; + } + + ret = phy_device_register(priv->phydev); + if (ret) { ++ phy_device_free(priv->phydev); + mdiobus_unregister(bus); + dev_err(priv->dev, + "phy_device_register err(%d)\n", ret); +diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c +index ae31ed93aaf02..57dc9680ad50a 100644 +--- a/drivers/net/ethernet/socionext/sni_ave.c ++++ b/drivers/net/ethernet/socionext/sni_ave.c +@@ -1229,6 +1229,8 @@ static int ave_init(struct net_device *ndev) + + phy_support_asym_pause(phydev); + ++ phydev->mac_managed_pm = true; ++ + phy_attached_info(phydev); + + return 0; +@@ -1758,6 +1760,10 @@ static int ave_resume(struct device *dev) + + ave_global_reset(ndev); + ++ ret = phy_init_hw(ndev->phydev); ++ if (ret) ++ return ret; ++ + ave_ethtool_get_wol(ndev, &wol); + wol.wolopts = priv->wolopts; + __ave_ethtool_set_wol(ndev, &wol); diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c index cd478d2cd871a..00f6d347eaf75 100644 --- a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c @@ -210873,7 +256489,7 @@ index 9a6d819b84aea..378b4dd826bb5 100644 } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c -index 8e8778cfbbadd..b32f1f5d841f4 100644 +index 8e8778cfbbadd..fb9ff4ce94530 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -454,6 +454,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev, @@ -210884,7 +256500,77 @@ index 8e8778cfbbadd..b32f1f5d841f4 100644 /* Multiplying factor to the clk_eee_i clock time * period to make it closer to 100 ns. This value -@@ -1072,13 +1073,11 @@ static int intel_eth_pci_probe(struct pci_dev *pdev, +@@ -592,7 +593,6 @@ static int ehl_common_data(struct pci_dev *pdev, + { + plat->rx_queues_to_use = 8; + plat->tx_queues_to_use = 8; +- plat->clk_ptp_rate = 200000000; + plat->use_phy_wol = 1; + + plat->safety_feat_cfg->tsoee = 1; +@@ -617,6 +617,8 @@ static int ehl_sgmii_data(struct pci_dev *pdev, + plat->serdes_powerup = intel_serdes_powerup; + plat->serdes_powerdown = intel_serdes_powerdown; + ++ plat->clk_ptp_rate = 204800000; ++ + return ehl_common_data(pdev, plat); + } + +@@ -630,6 +632,8 @@ static int ehl_rgmii_data(struct pci_dev *pdev, + plat->bus_id = 1; + plat->phy_interface = PHY_INTERFACE_MODE_RGMII; + ++ plat->clk_ptp_rate = 204800000; ++ + return ehl_common_data(pdev, plat); + } + +@@ -646,6 +650,8 @@ static int ehl_pse0_common_data(struct pci_dev *pdev, + plat->bus_id = 2; + plat->addr64 = 32; + ++ plat->clk_ptp_rate = 200000000; ++ + intel_mgbe_pse_crossts_adj(intel_priv, EHL_PSE_ART_MHZ); + + return ehl_common_data(pdev, plat); +@@ -685,6 +691,8 @@ static int ehl_pse1_common_data(struct pci_dev *pdev, + plat->bus_id = 3; + plat->addr64 = 32; + ++ plat->clk_ptp_rate = 200000000; ++ + intel_mgbe_pse_crossts_adj(intel_priv, EHL_PSE_ART_MHZ); + + return ehl_common_data(pdev, plat); +@@ -720,7 +728,8 @@ static int tgl_common_data(struct pci_dev *pdev, + { + plat->rx_queues_to_use = 6; + plat->tx_queues_to_use = 4; +- plat->clk_ptp_rate = 200000000; ++ plat->clk_ptp_rate = 204800000; ++ plat->speed_mode_2500 = intel_speed_mode_2500; + + plat->safety_feat_cfg->tsoee = 1; + plat->safety_feat_cfg->mrxpee = 0; +@@ -740,7 +749,6 @@ static int tgl_sgmii_phy0_data(struct pci_dev *pdev, + { + plat->bus_id = 1; + plat->phy_interface = PHY_INTERFACE_MODE_SGMII; +- plat->speed_mode_2500 = intel_speed_mode_2500; + plat->serdes_powerup = intel_serdes_powerup; + plat->serdes_powerdown = intel_serdes_powerdown; + return tgl_common_data(pdev, plat); +@@ -755,7 +763,6 @@ static int tgl_sgmii_phy1_data(struct pci_dev *pdev, + { + plat->bus_id = 2; + plat->phy_interface = PHY_INTERFACE_MODE_SGMII; +- plat->speed_mode_2500 = intel_speed_mode_2500; + plat->serdes_powerup = intel_serdes_powerup; + plat->serdes_powerdown = intel_serdes_powerdown; + return tgl_common_data(pdev, plat); +@@ -1072,13 +1079,11 @@ static int intel_eth_pci_probe(struct pci_dev *pdev, ret = stmmac_dvr_probe(&pdev->dev, plat, &res); if (ret) { @@ -210899,7 +256585,7 @@ index 8e8778cfbbadd..b32f1f5d841f4 100644 err_alloc_irq: clk_disable_unprepare(plat->stmmac_clk); clk_unregister_fixed_rate(plat->stmmac_clk); -@@ -1099,6 +1098,7 @@ static void intel_eth_pci_remove(struct pci_dev *pdev) +@@ -1099,6 +1104,7 @@ static void intel_eth_pci_remove(struct pci_dev *pdev) stmmac_dvr_remove(&pdev->dev); @@ -210907,8 +256593,148 @@ index 8e8778cfbbadd..b32f1f5d841f4 100644 clk_unregister_fixed_rate(priv->plat->stmmac_clk); pcim_iounmap_regions(pdev, BIT(0)); +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +index ecf759ee1c9f5..2ae59f94afe1d 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +@@ -51,7 +51,6 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id + struct stmmac_resources res; + struct device_node *np; + int ret, i, phy_mode; +- bool mdio = false; + + np = dev_of_node(&pdev->dev); + +@@ -69,29 +68,31 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id + if (!plat) + return -ENOMEM; + ++ plat->mdio_node = of_get_child_by_name(np, "mdio"); + if (plat->mdio_node) { +- dev_err(&pdev->dev, "Found MDIO subnode\n"); +- mdio = true; +- } ++ dev_info(&pdev->dev, "Found MDIO subnode\n"); + +- if (mdio) { + plat->mdio_bus_data = devm_kzalloc(&pdev->dev, + sizeof(*plat->mdio_bus_data), + GFP_KERNEL); +- if (!plat->mdio_bus_data) +- return -ENOMEM; ++ if (!plat->mdio_bus_data) { ++ ret = -ENOMEM; ++ goto err_put_node; ++ } + plat->mdio_bus_data->needs_reset = true; + } + + plat->dma_cfg = devm_kzalloc(&pdev->dev, sizeof(*plat->dma_cfg), GFP_KERNEL); +- if (!plat->dma_cfg) +- return -ENOMEM; ++ if (!plat->dma_cfg) { ++ ret = -ENOMEM; ++ goto err_put_node; ++ } + + /* Enable pci device */ + ret = pci_enable_device(pdev); + if (ret) { + dev_err(&pdev->dev, "%s: ERROR: failed to enable device\n", __func__); +- return ret; ++ goto err_put_node; + } + + /* Get the base address of device */ +@@ -100,7 +101,7 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id + continue; + ret = pcim_iomap_regions(pdev, BIT(0), pci_name(pdev)); + if (ret) +- return ret; ++ goto err_disable_device; + break; + } + +@@ -111,7 +112,8 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id + phy_mode = device_get_phy_mode(&pdev->dev); + if (phy_mode < 0) { + dev_err(&pdev->dev, "phy_mode not found\n"); +- return phy_mode; ++ ret = phy_mode; ++ goto err_disable_device; + } + + plat->phy_interface = phy_mode; +@@ -128,6 +130,7 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id + if (res.irq < 0) { + dev_err(&pdev->dev, "IRQ macirq not found\n"); + ret = -ENODEV; ++ goto err_disable_msi; + } + + res.wol_irq = of_irq_get_byname(np, "eth_wake_irq"); +@@ -140,15 +143,31 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id + if (res.lpi_irq < 0) { + dev_err(&pdev->dev, "IRQ eth_lpi not found\n"); + ret = -ENODEV; ++ goto err_disable_msi; + } + +- return stmmac_dvr_probe(&pdev->dev, plat, &res); ++ ret = stmmac_dvr_probe(&pdev->dev, plat, &res); ++ if (ret) ++ goto err_disable_msi; ++ ++ return ret; ++ ++err_disable_msi: ++ pci_disable_msi(pdev); ++err_disable_device: ++ pci_disable_device(pdev); ++err_put_node: ++ of_node_put(plat->mdio_node); ++ return ret; + } + + static void loongson_dwmac_remove(struct pci_dev *pdev) + { ++ struct net_device *ndev = dev_get_drvdata(&pdev->dev); ++ struct stmmac_priv *priv = netdev_priv(ndev); + int i; + ++ of_node_put(priv->plat->mdio_node); + stmmac_dvr_remove(&pdev->dev); + + for (i = 0; i < PCI_STD_NUM_BARS; i++) { +@@ -158,6 +177,7 @@ static void loongson_dwmac_remove(struct pci_dev *pdev) + break; + } + ++ pci_disable_msi(pdev); + pci_disable_device(pdev); + } + +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c +index c7a6588d9398b..e8b507f88fbce 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c +@@ -272,11 +272,9 @@ static int meson8b_devm_clk_prepare_enable(struct meson8b_dwmac *dwmac, + if (ret) + return ret; + +- devm_add_action_or_reset(dwmac->dev, +- (void(*)(void *))clk_disable_unprepare, +- dwmac->rgmii_tx_clk); +- +- return 0; ++ return devm_add_action_or_reset(dwmac->dev, ++ (void(*)(void *))clk_disable_unprepare, ++ clk); + } + + static int meson8b_init_rgmii_delays(struct meson8b_dwmac *dwmac) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c -index 5c74b6279d690..6b1d9e8879f46 100644 +index 5c74b6279d690..d0c7f22a4e55a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -113,8 +113,10 @@ static void rgmii_updatel(struct qcom_ethqos *ethqos, @@ -210923,7 +256749,7 @@ index 5c74b6279d690..6b1d9e8879f46 100644 dev_dbg(ðqos->pdev->dev, "Rgmii register dump\n"); dev_dbg(ðqos->pdev->dev, "RGMII_IO_MACRO_CONFIG: %x\n", rgmii_readl(ethqos, RGMII_IO_MACRO_CONFIG)); -@@ -499,6 +501,7 @@ static int qcom_ethqos_probe(struct platform_device *pdev) +@@ -499,16 +501,17 @@ static int qcom_ethqos_probe(struct platform_device *pdev) plat_dat->bsp_priv = ethqos; plat_dat->fix_mac_speed = ethqos_fix_mac_speed; @@ -210931,7 +256757,10 @@ index 5c74b6279d690..6b1d9e8879f46 100644 plat_dat->has_gmac4 = 1; plat_dat->pmt = 1; plat_dat->tso_en = of_property_read_bool(np, "snps,tso"); -@@ -507,8 +510,6 @@ static int qcom_ethqos_probe(struct platform_device *pdev) ++ if (of_device_is_compatible(np, "qcom,qcs404-ethqos")) ++ plat_dat->rx_clk_runs_in_lpi = 1; + + ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (ret) goto err_clk; @@ -211186,7 +257015,7 @@ index d046e33b8a297..c27441c08dd6f 100644 plat_dat->fix_mac_speed = visconti_eth_fix_mac_speed; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c -index b217453689839..412abfabd28bc 100644 +index b217453689839..60638bf18f1fe 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -219,6 +219,9 @@ static void dwmac4_map_mtl_dma(struct mac_device_info *hw, u32 queue, u32 chan) @@ -211199,6 +257028,64 @@ index b217453689839..412abfabd28bc 100644 } else { value &= ~MTL_RXQ_DMA_QXMDMACH_MASK(queue); value |= MTL_RXQ_DMA_QXMDMACH(chan, queue); +@@ -742,6 +745,8 @@ static void dwmac4_flow_ctrl(struct mac_device_info *hw, unsigned int duplex, + if (fc & FLOW_RX) { + pr_debug("\tReceive Flow-Control ON\n"); + flow |= GMAC_RX_FLOW_CTRL_RFE; ++ } else { ++ pr_debug("\tReceive Flow-Control OFF\n"); + } + writel(flow, ioaddr + GMAC_RX_FLOW_CTRL); + +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c +index 9c2d40f853ed0..e95d35f1e5a0c 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c +@@ -186,11 +186,25 @@ static void dwmac5_handle_dma_err(struct net_device *ndev, + int dwmac5_safety_feat_config(void __iomem *ioaddr, unsigned int asp, + struct stmmac_safety_feature_cfg *safety_feat_cfg) + { ++ struct stmmac_safety_feature_cfg all_safety_feats = { ++ .tsoee = 1, ++ .mrxpee = 1, ++ .mestee = 1, ++ .mrxee = 1, ++ .mtxee = 1, ++ .epsi = 1, ++ .edpp = 1, ++ .prtyen = 1, ++ .tmouten = 1, ++ }; + u32 value; + + if (!asp) + return -EINVAL; + ++ if (!safety_feat_cfg) ++ safety_feat_cfg = &all_safety_feats; ++ + /* 1. Enable Safety Features */ + value = readl(ioaddr + MTL_ECC_CONTROL); + value |= MEEAO; /* MTL ECC Error Addr Status Override */ +@@ -527,9 +541,9 @@ int dwmac5_flex_pps_config(void __iomem *ioaddr, int index, + return 0; + } + +- val |= PPSCMDx(index, 0x2); + val |= TRGTMODSELx(index, 0x2); + val |= PPSEN0; ++ writel(val, ioaddr + MAC_PPS_CONTROL); + + writel(cfg->start.tv_sec, ioaddr + MAC_PPSx_TARGET_TIME_SEC(index)); + +@@ -554,6 +568,7 @@ int dwmac5_flex_pps_config(void __iomem *ioaddr, int index, + writel(period - 1, ioaddr + MAC_PPSx_WIDTH(index)); + + /* Finally, activate it */ ++ val |= PPSCMDx(index, 0x2); + writel(val, ioaddr + MAC_PPS_CONTROL); + return 0; + } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h index 1914ad698cab2..acd70b9a3173c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h @@ -211286,7 +257173,7 @@ index 43eead726886a..05b5371ca036b 100644 int stmmac_suspend(struct device *dev); int stmmac_dvr_remove(struct device *dev); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c -index d89455803beda..dc31501fec8ff 100644 +index d89455803beda..9e8ae4384e4fb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -21,10 +21,18 @@ @@ -211333,6 +257220,27 @@ index d89455803beda..dc31501fec8ff 100644 memcpy(®_space[ETHTOOL_DMA_OFFSET], ®_space[DMA_BUS_MODE / 4], NUM_DWMAC1000_DMA_REGS * 4); +@@ -533,16 +548,16 @@ static void stmmac_get_per_qstats(struct stmmac_priv *priv, u64 *data) + p = (char *)priv + offsetof(struct stmmac_priv, + xstats.txq_stats[q].tx_pkt_n); + for (stat = 0; stat < STMMAC_TXQ_STATS; stat++) { +- *data++ = (*(u64 *)p); +- p += sizeof(u64 *); ++ *data++ = (*(unsigned long *)p); ++ p += sizeof(unsigned long); + } + } + for (q = 0; q < rx_cnt; q++) { + p = (char *)priv + offsetof(struct stmmac_priv, + xstats.rxq_stats[q].rx_pkt_n); + for (stat = 0; stat < STMMAC_RXQ_STATS; stat++) { +- *data++ = (*(u64 *)p); +- p += sizeof(u64 *); ++ *data++ = (*(unsigned long *)p); ++ p += sizeof(unsigned long); + } + } + } @@ -785,14 +800,6 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev, netdev_warn(priv->dev, "Setting EEE tx-lpi is not supported\n"); @@ -211349,10 +257257,20 @@ index d89455803beda..dc31501fec8ff 100644 stmmac_disable_eee_mode(priv); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c -index 074e2cdfb0fa6..d68ef72dcdde0 100644 +index 074e2cdfb0fa6..4538e4fd81898 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c -@@ -71,9 +71,9 @@ static int init_systime(void __iomem *ioaddr, u32 sec, u32 nsec) +@@ -47,7 +47,8 @@ static void config_sub_second_increment(void __iomem *ioaddr, + if (!(value & PTP_TCR_TSCTRLSSR)) + data = (data * 1000) / 465; + +- data &= PTP_SSIR_SSINC_MASK; ++ if (data > PTP_SSIR_SSINC_MAX) ++ data = PTP_SSIR_SSINC_MAX; + + reg_value = data; + if (gmac4) +@@ -71,9 +72,9 @@ static int init_systime(void __iomem *ioaddr, u32 sec, u32 nsec) writel(value, ioaddr + PTP_TCR); /* wait for present system time initialize to complete */ @@ -211364,7 +257282,7 @@ index 074e2cdfb0fa6..d68ef72dcdde0 100644 } static int config_addend(void __iomem *ioaddr, u32 addend) -@@ -145,15 +145,20 @@ static int adjust_systime(void __iomem *ioaddr, u32 sec, u32 nsec, +@@ -145,15 +146,20 @@ static int adjust_systime(void __iomem *ioaddr, u32 sec, u32 nsec, static void get_systime(void __iomem *ioaddr, u64 *systime) { @@ -211393,7 +257311,7 @@ index 074e2cdfb0fa6..d68ef72dcdde0 100644 static void get_ptptime(void __iomem *ptpaddr, u64 *ptp_time) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c -index 3d67d1fa36906..6f579f4989934 100644 +index 3d67d1fa36906..d56f65338ea66 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -50,6 +50,13 @@ @@ -211674,9 +257592,22 @@ index 3d67d1fa36906..6f579f4989934 100644 if (interface == PHY_INTERFACE_MODE_USXGMII) { switch (speed) { -@@ -1134,7 +1161,8 @@ static void stmmac_mac_link_up(struct phylink_config *config, - if (tx_pause && rx_pause) - stmmac_mac_flow_ctrl(priv, duplex); +@@ -1131,14 +1158,24 @@ static void stmmac_mac_link_up(struct phylink_config *config, + ctrl |= priv->hw->link.duplex; + + /* Flow Control operation */ +- if (tx_pause && rx_pause) +- stmmac_mac_flow_ctrl(priv, duplex); ++ if (rx_pause && tx_pause) ++ priv->flow_ctrl = FLOW_AUTO; ++ else if (rx_pause && !tx_pause) ++ priv->flow_ctrl = FLOW_RX; ++ else if (!rx_pause && tx_pause) ++ priv->flow_ctrl = FLOW_TX; ++ else ++ priv->flow_ctrl = FLOW_OFF; ++ ++ stmmac_mac_flow_ctrl(priv, duplex); - writel(ctrl, priv->ioaddr + MAC_CTRL_REG); + if (ctrl != old_ctrl) @@ -211684,7 +257615,25 @@ index 3d67d1fa36906..6f579f4989934 100644 stmmac_mac_set(priv, priv->ioaddr, true); if (phy && priv->dma_cap.eee) { -@@ -2232,6 +2260,23 @@ static void stmmac_stop_tx_dma(struct stmmac_priv *priv, u32 chan) +- priv->eee_active = phy_init_eee(phy, 1) >= 0; ++ priv->eee_active = ++ phy_init_eee(phy, !priv->plat->rx_clk_runs_in_lpi) >= 0; + priv->eee_enabled = stmmac_eee_init(priv); + priv->tx_lpi_enabled = priv->eee_enabled; + stmmac_set_eee_pls(priv, priv->hw, true); +@@ -1206,6 +1243,11 @@ static int stmmac_init_phy(struct net_device *dev) + int addr = priv->plat->phy_addr; + struct phy_device *phydev; + ++ if (addr < 0) { ++ netdev_err(priv->dev, "no phy found\n"); ++ return -ENODEV; ++ } ++ + phydev = mdiobus_get_phy(priv->mii, addr); + if (!phydev) { + netdev_err(priv->dev, "no phy at addr %d\n", addr); +@@ -2232,6 +2274,23 @@ static void stmmac_stop_tx_dma(struct stmmac_priv *priv, u32 chan) stmmac_stop_tx(priv, priv->ioaddr, chan); } @@ -211708,7 +257657,7 @@ index 3d67d1fa36906..6f579f4989934 100644 /** * stmmac_start_all_dma - start all RX and TX DMA channels * @priv: driver private structure -@@ -2602,8 +2647,8 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue) +@@ -2602,8 +2661,8 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue) if (priv->eee_enabled && !priv->tx_path_in_lpi_mode && priv->eee_sw_timer_en) { @@ -211719,7 +257668,7 @@ index 3d67d1fa36906..6f579f4989934 100644 } /* We still have pending packets, let's call for a new scheduling */ -@@ -2867,8 +2912,10 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv) +@@ -2867,8 +2926,10 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv) stmmac_axi(priv, priv->ioaddr, priv->plat->axi); /* DMA CSR Channel configuration */ @@ -211731,7 +257680,7 @@ index 3d67d1fa36906..6f579f4989934 100644 /* DMA RX Channel Configuration */ for (chan = 0; chan < rx_channels_count; chan++) { -@@ -3203,7 +3250,7 @@ static int stmmac_fpe_start_wq(struct stmmac_priv *priv) +@@ -3203,7 +3264,7 @@ static int stmmac_fpe_start_wq(struct stmmac_priv *priv) /** * stmmac_hw_setup - setup mac in a usable state. * @dev : pointer to the device structure. @@ -211740,7 +257689,7 @@ index 3d67d1fa36906..6f579f4989934 100644 * Description: * this is the main function to setup the HW in a usable state because the * dma engine is reset, the core registers are configured (e.g. AXI, -@@ -3213,7 +3260,7 @@ static int stmmac_fpe_start_wq(struct stmmac_priv *priv) +@@ -3213,7 +3274,7 @@ static int stmmac_fpe_start_wq(struct stmmac_priv *priv) * 0 on success and an appropriate (-)ve integer as defined in errno.h * file on failure. */ @@ -211749,7 +257698,7 @@ index 3d67d1fa36906..6f579f4989934 100644 { struct stmmac_priv *priv = netdev_priv(dev); u32 rx_cnt = priv->plat->rx_queues_to_use; -@@ -3270,18 +3317,22 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) +@@ -3270,18 +3331,22 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) stmmac_mmc_setup(priv); @@ -211780,7 +257729,7 @@ index 3d67d1fa36906..6f579f4989934 100644 priv->eee_tw_timer = STMMAC_DEFAULT_TWT_LS; /* Convert the timer from msec to usec */ -@@ -3635,7 +3686,7 @@ static int stmmac_request_irq(struct net_device *dev) +@@ -3635,7 +3700,7 @@ static int stmmac_request_irq(struct net_device *dev) * 0 on success and an appropriate (-)ve integer as defined in errno.h * file on failure. */ @@ -211789,7 +257738,7 @@ index 3d67d1fa36906..6f579f4989934 100644 { struct stmmac_priv *priv = netdev_priv(dev); int mode = priv->plat->phy_interface; -@@ -3706,6 +3757,15 @@ int stmmac_open(struct net_device *dev) +@@ -3706,6 +3771,15 @@ int stmmac_open(struct net_device *dev) goto init_error; } @@ -211805,7 +257754,7 @@ index 3d67d1fa36906..6f579f4989934 100644 ret = stmmac_hw_setup(dev, true); if (ret < 0) { netdev_err(priv->dev, "%s: Hw setup failed\n", __func__); -@@ -3724,6 +3784,7 @@ int stmmac_open(struct net_device *dev) +@@ -3724,6 +3798,7 @@ int stmmac_open(struct net_device *dev) stmmac_enable_all_queues(priv); netif_tx_start_all_queues(priv->dev); @@ -211813,7 +257762,7 @@ index 3d67d1fa36906..6f579f4989934 100644 return 0; -@@ -3759,11 +3820,13 @@ static void stmmac_fpe_stop_wq(struct stmmac_priv *priv) +@@ -3759,11 +3834,13 @@ static void stmmac_fpe_stop_wq(struct stmmac_priv *priv) * Description: * This is the stop entry point of the driver. */ @@ -211828,7 +257777,7 @@ index 3d67d1fa36906..6f579f4989934 100644 if (device_may_wakeup(priv->device)) phylink_speed_down(priv->phylink, false); /* Stop and disconnect the PHY */ -@@ -3792,6 +3855,10 @@ int stmmac_release(struct net_device *dev) +@@ -3792,6 +3869,10 @@ int stmmac_release(struct net_device *dev) /* Disable the MAC Rx/Tx */ stmmac_mac_set(priv, priv->ioaddr, false); @@ -211839,7 +257788,7 @@ index 3d67d1fa36906..6f579f4989934 100644 netif_carrier_off(dev); stmmac_release_ptp(priv); -@@ -5499,8 +5566,6 @@ static int stmmac_set_features(struct net_device *netdev, +@@ -5499,8 +5580,6 @@ static int stmmac_set_features(struct net_device *netdev, netdev_features_t features) { struct stmmac_priv *priv = netdev_priv(netdev); @@ -211848,7 +257797,7 @@ index 3d67d1fa36906..6f579f4989934 100644 /* Keep the COE Type in case of csum is supporting */ if (features & NETIF_F_RXCSUM) -@@ -5512,10 +5577,13 @@ static int stmmac_set_features(struct net_device *netdev, +@@ -5512,10 +5591,13 @@ static int stmmac_set_features(struct net_device *netdev, */ stmmac_rx_ipc(priv, priv->hw); @@ -211865,7 +257814,7 @@ index 3d67d1fa36906..6f579f4989934 100644 return 0; } -@@ -6421,6 +6489,143 @@ void stmmac_enable_tx_queue(struct stmmac_priv *priv, u32 queue) +@@ -6421,6 +6503,146 @@ void stmmac_enable_tx_queue(struct stmmac_priv *priv, u32 queue) spin_unlock_irqrestore(&ch->lock, flags); } @@ -211874,6 +257823,9 @@ index 3d67d1fa36906..6f579f4989934 100644 + struct stmmac_priv *priv = netdev_priv(dev); + u32 chan; + ++ /* Ensure tx function is not running */ ++ netif_tx_disable(dev); ++ + /* Disable NAPI process */ + stmmac_disable_all_queues(priv); + @@ -212009,7 +257961,17 @@ index 3d67d1fa36906..6f579f4989934 100644 int stmmac_xsk_wakeup(struct net_device *dev, u32 queue, u32 flags) { struct stmmac_priv *priv = netdev_priv(dev); -@@ -6889,7 +7094,7 @@ int stmmac_dvr_probe(struct device *device, +@@ -6830,7 +7052,8 @@ int stmmac_dvr_probe(struct device *device, + priv->wq = create_singlethread_workqueue("stmmac_wq"); + if (!priv->wq) { + dev_err(priv->device, "failed to create workqueue\n"); +- return -ENOMEM; ++ ret = -ENOMEM; ++ goto error_wq_init; + } + + INIT_WORK(&priv->service_task, stmmac_service_task); +@@ -6889,7 +7112,7 @@ int stmmac_dvr_probe(struct device *device, dev_info(priv->device, "TSO feature enabled\n"); } @@ -212018,7 +257980,7 @@ index 3d67d1fa36906..6f579f4989934 100644 ndev->hw_features |= NETIF_F_GRO; priv->sph_cap = true; priv->sph = priv->sph_cap; -@@ -7032,18 +7237,13 @@ int stmmac_dvr_probe(struct device *device, +@@ -7032,18 +7255,13 @@ int stmmac_dvr_probe(struct device *device, goto error_netdev_register; } @@ -212040,7 +258002,7 @@ index 3d67d1fa36906..6f579f4989934 100644 /* Let pm_runtime_put() disable the clocks. * If CONFIG_PM is not enabled, the clocks will stay powered. */ -@@ -7051,8 +7251,6 @@ int stmmac_dvr_probe(struct device *device, +@@ -7051,8 +7269,6 @@ int stmmac_dvr_probe(struct device *device, return ret; @@ -212049,7 +258011,15 @@ index 3d67d1fa36906..6f579f4989934 100644 error_netdev_register: phylink_destroy(priv->phylink); error_xpcs_setup: -@@ -7083,6 +7281,8 @@ int stmmac_dvr_remove(struct device *dev) +@@ -7064,6 +7280,7 @@ error_mdio_register: + stmmac_napi_del(ndev); + error_hw_init: + destroy_workqueue(priv->wq); ++error_wq_init: + bitmap_free(priv->af_xdp_zc_qps); + + return ret; +@@ -7083,6 +7300,8 @@ int stmmac_dvr_remove(struct device *dev) netdev_info(priv->dev, "%s: removing driver", __func__); @@ -212058,7 +258028,7 @@ index 3d67d1fa36906..6f579f4989934 100644 stmmac_stop_all_dma(priv); stmmac_mac_set(priv, priv->ioaddr, false); netif_carrier_off(ndev); -@@ -7101,8 +7301,6 @@ int stmmac_dvr_remove(struct device *dev) +@@ -7101,8 +7320,6 @@ int stmmac_dvr_remove(struct device *dev) if (priv->plat->stmmac_rst) reset_control_assert(priv->plat->stmmac_rst); reset_control_assert(priv->plat->stmmac_ahb_rst); @@ -212067,7 +258037,7 @@ index 3d67d1fa36906..6f579f4989934 100644 if (priv->hw->pcs != STMMAC_PCS_TBI && priv->hw->pcs != STMMAC_PCS_RTBI) stmmac_mdio_unregister(ndev); -@@ -7110,6 +7308,9 @@ int stmmac_dvr_remove(struct device *dev) +@@ -7110,6 +7327,9 @@ int stmmac_dvr_remove(struct device *dev) mutex_destroy(&priv->lock); bitmap_free(priv->af_xdp_zc_qps); @@ -212077,7 +258047,7 @@ index 3d67d1fa36906..6f579f4989934 100644 return 0; } EXPORT_SYMBOL_GPL(stmmac_dvr_remove); -@@ -7280,6 +7481,7 @@ int stmmac_resume(struct device *dev) +@@ -7280,6 +7500,7 @@ int stmmac_resume(struct device *dev) stmmac_restore_hw_vlan_rx_fltr(priv, ndev, priv->hw); stmmac_enable_all_queues(priv); @@ -212085,7 +258055,7 @@ index 3d67d1fa36906..6f579f4989934 100644 mutex_unlock(&priv->lock); rtnl_unlock(); -@@ -7296,7 +7498,7 @@ static int __init stmmac_cmdline_opt(char *str) +@@ -7296,7 +7517,7 @@ static int __init stmmac_cmdline_opt(char *str) char *opt; if (!str || !*str) @@ -212094,7 +258064,7 @@ index 3d67d1fa36906..6f579f4989934 100644 while ((opt = strsep(&str, ",")) != NULL) { if (!strncmp(opt, "debug:", 6)) { if (kstrtoint(opt + 6, 0, &debug)) -@@ -7327,11 +7529,11 @@ static int __init stmmac_cmdline_opt(char *str) +@@ -7327,11 +7548,11 @@ static int __init stmmac_cmdline_opt(char *str) goto err; } } @@ -212131,9 +258101,24 @@ index fcf17d8a0494b..644bb54f5f020 100644 static int __maybe_unused stmmac_pci_suspend(struct device *dev) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c -index 232ac98943cd0..9f5cac4000da6 100644 +index 232ac98943cd0..e12df9d99089f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +@@ -108,10 +108,10 @@ static struct stmmac_axi *stmmac_axi_setup(struct platform_device *pdev) + + axi->axi_lpi_en = of_property_read_bool(np, "snps,lpi_en"); + axi->axi_xit_frm = of_property_read_bool(np, "snps,xit_frm"); +- axi->axi_kbbe = of_property_read_bool(np, "snps,axi_kbbe"); +- axi->axi_fb = of_property_read_bool(np, "snps,axi_fb"); +- axi->axi_mb = of_property_read_bool(np, "snps,axi_mb"); +- axi->axi_rb = of_property_read_bool(np, "snps,axi_rb"); ++ axi->axi_kbbe = of_property_read_bool(np, "snps,kbbe"); ++ axi->axi_fb = of_property_read_bool(np, "snps,fb"); ++ axi->axi_mb = of_property_read_bool(np, "snps,mb"); ++ axi->axi_rb = of_property_read_bool(np, "snps,rb"); + + if (of_property_read_u32(np, "snps,wr_osr_lmt", &axi->axi_wr_osr_lmt)) + axi->axi_wr_osr_lmt = 1; @@ -431,8 +431,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) plat->phylink_node = np; @@ -212144,6 +258129,15 @@ index 232ac98943cd0..9f5cac4000da6 100644 plat->bus_id = of_alias_get_id(np, "ethernet"); if (plat->bus_id < 0) +@@ -559,7 +558,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) + dma_cfg->mixed_burst = of_property_read_bool(np, "snps,mixed-burst"); + + plat->force_thresh_dma_mode = of_property_read_bool(np, "snps,force_thresh_dma_mode"); +- if (plat->force_thresh_dma_mode) { ++ if (plat->force_thresh_dma_mode && plat->force_sf_dma_mode) { + plat->force_sf_dma_mode = 0; + dev_warn(&pdev->dev, + "force_sf_dma_mode is ignored if force_thresh_dma_mode is set.\n"); @@ -816,7 +815,13 @@ static int __maybe_unused stmmac_pltfr_noirq_resume(struct device *dev) if (ret) return ret; @@ -212160,7 +258154,7 @@ index 232ac98943cd0..9f5cac4000da6 100644 return 0; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c -index 580cc035536bd..ac8bc1c8614d3 100644 +index 580cc035536bd..487418ef9b4f8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -102,7 +102,7 @@ static int stmmac_adjust_time(struct ptp_clock_info *ptp, s64 delta) @@ -212172,7 +258166,19 @@ index 580cc035536bd..ac8bc1c8614d3 100644 priv->plat->est->ctr[0]; time = stmmac_calc_tas_basetime(basetime, current_time_ns, -@@ -297,9 +297,6 @@ void stmmac_ptp_register(struct stmmac_priv *priv) +@@ -229,7 +229,10 @@ static int stmmac_enable(struct ptp_clock_info *ptp, + } + writel(acr_value, ptpaddr + PTP_ACR); + mutex_unlock(&priv->aux_ts_lock); +- ret = 0; ++ /* wait for auxts fifo clear to finish */ ++ ret = readl_poll_timeout(ptpaddr + PTP_ACR, acr_value, ++ !(acr_value & PTP_ACR_ATSFC), ++ 10, 10000); + break; + + default: +@@ -297,9 +300,6 @@ void stmmac_ptp_register(struct stmmac_priv *priv) { int i; @@ -212182,8 +258188,21 @@ index 580cc035536bd..ac8bc1c8614d3 100644 for (i = 0; i < priv->dma_cap.pps_out_num; i++) { if (i >= STMMAC_PPS_MAX) break; +diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h +index 53172a4398101..bf619295d079f 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h +@@ -64,7 +64,7 @@ + #define PTP_TCR_TSENMACADDR BIT(18) + + /* SSIR defines */ +-#define PTP_SSIR_SSINC_MASK 0xff ++#define PTP_SSIR_SSINC_MAX 0xff + #define GMAC4_PTP_SSIR_SSINC_SHIFT 16 + + /* Auxiliary Control defines */ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c -index 0462dcc93e536..dd5c4ef92ef3c 100644 +index 0462dcc93e536..ea7200b7b6477 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c @@ -1084,8 +1084,9 @@ static int stmmac_test_rxp(struct stmmac_priv *priv) @@ -212235,6 +258254,25 @@ index 0462dcc93e536..dd5c4ef92ef3c 100644 cleanup_actions: kfree(actions); cleanup_exts: +@@ -1655,12 +1654,16 @@ static int stmmac_test_arpoffload(struct stmmac_priv *priv) + } + + ret = stmmac_set_arp_offload(priv, priv->hw, true, ip_addr); +- if (ret) ++ if (ret) { ++ kfree_skb(skb); + goto cleanup; ++ } + + ret = dev_set_promiscuity(priv->dev, 1); +- if (ret) ++ if (ret) { ++ kfree_skb(skb); + goto cleanup; ++ } + + ret = dev_direct_xmit(skb, 0); + if (ret) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 8160087ee92f2..d0a2b289f4603 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -212482,10 +258520,35 @@ index affcf92cd3aa5..6cba4d2c44830 100644 This driver supports TI's DaVinci MDIO module. diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c -index 130346f74ee8a..901571c2626a1 100644 +index 130346f74ee8a..37b9a798dd624 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c -@@ -1802,6 +1802,7 @@ static int am65_cpsw_init_cpts(struct am65_cpsw_common *common) +@@ -564,7 +564,15 @@ static int am65_cpsw_nuss_common_stop(struct am65_cpsw_common *common) + k3_udma_glue_disable_tx_chn(common->tx_chns[i].tx_chn); + } + ++ reinit_completion(&common->tdown_complete); + k3_udma_glue_tdown_rx_chn(common->rx_chns.rx_chn, true); ++ ++ if (common->pdata.quirks & AM64_CPSW_QUIRK_DMA_RX_TDOWN_IRQ) { ++ i = wait_for_completion_timeout(&common->tdown_complete, msecs_to_jiffies(1000)); ++ if (!i) ++ dev_err(common->dev, "rx teardown timeout\n"); ++ } ++ + napi_disable(&common->napi_rx); + + for (i = 0; i < AM65_CPSW_MAX_RX_FLOWS; i++) +@@ -786,6 +794,8 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common, + + if (cppi5_desc_is_tdcm(desc_dma)) { + dev_dbg(dev, "%s RX tdown flow: %u\n", __func__, flow_idx); ++ if (common->pdata.quirks & AM64_CPSW_QUIRK_DMA_RX_TDOWN_IRQ) ++ complete(&common->tdown_complete); + return 0; + } + +@@ -1802,6 +1812,7 @@ static int am65_cpsw_init_cpts(struct am65_cpsw_common *common) if (IS_ERR(cpts)) { int ret = PTR_ERR(cpts); @@ -212493,7 +258556,16 @@ index 130346f74ee8a..901571c2626a1 100644 if (ret == -EOPNOTSUPP) { dev_info(dev, "cpts disabled\n"); return 0; -@@ -2466,7 +2467,6 @@ static int am65_cpsw_nuss_register_devlink(struct am65_cpsw_common *common) +@@ -2053,7 +2064,7 @@ static void am65_cpsw_nuss_cleanup_ndev(struct am65_cpsw_common *common) + + for (i = 0; i < common->port_num; i++) { + port = &common->ports[i]; +- if (port->ndev) ++ if (port->ndev && port->ndev->reg_state == NETREG_REGISTERED) + unregister_netdev(port->ndev); + } + } +@@ -2466,7 +2477,6 @@ static int am65_cpsw_nuss_register_devlink(struct am65_cpsw_common *common) port->port_id, ret); goto dl_port_unreg; } @@ -212501,7 +258573,7 @@ index 130346f74ee8a..901571c2626a1 100644 } return ret; -@@ -2513,6 +2513,7 @@ static void am65_cpsw_unregister_devlink(struct am65_cpsw_common *common) +@@ -2513,6 +2523,7 @@ static void am65_cpsw_unregister_devlink(struct am65_cpsw_common *common) static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) { struct device *dev = common->dev; @@ -212509,7 +258581,7 @@ index 130346f74ee8a..901571c2626a1 100644 struct am65_cpsw_port *port; int ret = 0, i; -@@ -2529,6 +2530,10 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) +@@ -2529,6 +2540,10 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) return ret; } @@ -212520,7 +258592,7 @@ index 130346f74ee8a..901571c2626a1 100644 for (i = 0; i < common->port_num; i++) { port = &common->ports[i]; -@@ -2541,25 +2546,24 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) +@@ -2541,25 +2556,24 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) i, ret); goto err_cleanup_ndev; } @@ -212551,7 +258623,16 @@ index 130346f74ee8a..901571c2626a1 100644 return ret; } -@@ -2668,9 +2672,9 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev) +@@ -2605,7 +2619,7 @@ static const struct am65_cpsw_pdata j721e_pdata = { + }; + + static const struct am65_cpsw_pdata am64x_cpswxg_pdata = { +- .quirks = 0, ++ .quirks = AM64_CPSW_QUIRK_DMA_RX_TDOWN_IRQ, + .ale_dev_id = "am64-cpswxg", + .fdqring_mode = K3_RINGACC_RING_MODE_RING, + }; +@@ -2668,9 +2682,9 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev) if (!node) return -ENOENT; common->port_num = of_get_child_count(node); @@ -212562,8 +258643,20 @@ index 130346f74ee8a..901571c2626a1 100644 common->rx_flow_id_base = -1; init_completion(&common->tdown_complete); +diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h +index 048ed10143c17..74569c8ed2eca 100644 +--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h ++++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h +@@ -84,6 +84,7 @@ struct am65_cpsw_rx_chn { + }; + + #define AM65_CPSW_QUIRK_I2027_NO_TX_CSUM BIT(0) ++#define AM64_CPSW_QUIRK_DMA_RX_TDOWN_IRQ BIT(1) + + struct am65_cpsw_pdata { + u32 quirks; diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c -index 66f7ddd9b1f99..e226ecd95a2cc 100644 +index 66f7ddd9b1f99..ca587fe281507 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -349,7 +349,7 @@ static void cpsw_rx_handler(void *token, int len, int status) @@ -212593,6 +258686,15 @@ index 66f7ddd9b1f99..e226ecd95a2cc 100644 ret = cpdma_chan_submit_mapped(cpsw->rxv[ch].ch, new_page, dma, pkt_size, 0); if (ret < 0) { +@@ -856,6 +856,8 @@ static int cpsw_ndo_open(struct net_device *ndev) + + err_cleanup: + if (!cpsw->usage_count) { ++ napi_disable(&cpsw->napi_rx); ++ napi_disable(&cpsw->napi_tx); + cpdma_ctlr_stop(cpsw->dma); + cpsw_destroy_xdp_rxqs(cpsw); + } diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c index 0c75e0576ee1f..1ef0aaef5c61c 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.c @@ -213105,6 +259207,40 @@ index a4efd5e351584..995633e1ec5e0 100644 return 0; } #endif +diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c +index eda2961c0fe2a..07bdeece1723d 100644 +--- a/drivers/net/ethernet/ti/netcp_core.c ++++ b/drivers/net/ethernet/ti/netcp_core.c +@@ -1262,7 +1262,7 @@ out: + } + + /* Submit the packet */ +-static int netcp_ndo_start_xmit(struct sk_buff *skb, struct net_device *ndev) ++static netdev_tx_t netcp_ndo_start_xmit(struct sk_buff *skb, struct net_device *ndev) + { + struct netcp_intf *netcp = netdev_priv(ndev); + struct netcp_stats *tx_stats = &netcp->stats; +diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c +index cf0917b29e300..f175c098698d4 100644 +--- a/drivers/net/ethernet/tundra/tsi108_eth.c ++++ b/drivers/net/ethernet/tundra/tsi108_eth.c +@@ -1302,12 +1302,15 @@ static int tsi108_open(struct net_device *dev) + + data->rxring = dma_alloc_coherent(&data->pdev->dev, rxring_size, + &data->rxdma, GFP_KERNEL); +- if (!data->rxring) ++ if (!data->rxring) { ++ free_irq(data->irq_num, dev); + return -ENOMEM; ++ } + + data->txring = dma_alloc_coherent(&data->pdev->dev, txring_size, + &data->txdma, GFP_KERNEL); + if (!data->txring) { ++ free_irq(data->irq_num, dev); + dma_free_coherent(&data->pdev->dev, rxring_size, data->rxring, + data->rxdma); + return -ENOMEM; diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 463094ced104a..2ab29efa6b6e4 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -213459,9 +259595,18 @@ index 871b5ec3183d6..fbbbcfe0e891e 100644 dev_err(&pdev->dev, "phy-handle required for 1000BaseX/SGMII\n"); ret = -EINVAL; diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c -index b780aad3550aa..97c1d1ecba34c 100644 +index b780aad3550aa..b1971c4d5313e 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c +@@ -543,7 +543,7 @@ static void xemaclite_tx_timeout(struct net_device *dev, unsigned int txqueue) + xemaclite_enable_interrupts(lp); + + if (lp->deferred_skb) { +- dev_kfree_skb(lp->deferred_skb); ++ dev_kfree_skb_irq(lp->deferred_skb); + lp->deferred_skb = NULL; + dev->stats.tx_errors++; + } @@ -822,10 +822,10 @@ static int xemaclite_mdio_write(struct mii_bus *bus, int phy_id, int reg, static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev) { @@ -213575,6 +259720,39 @@ index 39234852e01b0..20f6aa508003b 100644 return -ENXIO; ixp_clock.caps = ptp_ixp_caps; +diff --git a/drivers/net/fddi/defxx.c b/drivers/net/fddi/defxx.c +index 6d1e3f49a3d3d..ebf502290e5f3 100644 +--- a/drivers/net/fddi/defxx.c ++++ b/drivers/net/fddi/defxx.c +@@ -3831,10 +3831,24 @@ static int dfx_init(void) + int status; + + status = pci_register_driver(&dfx_pci_driver); +- if (!status) +- status = eisa_driver_register(&dfx_eisa_driver); +- if (!status) +- status = tc_register_driver(&dfx_tc_driver); ++ if (status) ++ goto err_pci_register; ++ ++ status = eisa_driver_register(&dfx_eisa_driver); ++ if (status) ++ goto err_eisa_register; ++ ++ status = tc_register_driver(&dfx_tc_driver); ++ if (status) ++ goto err_tc_register; ++ ++ return 0; ++ ++err_tc_register: ++ eisa_driver_unregister(&dfx_eisa_driver); ++err_eisa_register: ++ pci_unregister_driver(&dfx_pci_driver); ++err_pci_register: + return status; + } + diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c index 185c8a3986816..1d1808afd5295 100644 --- a/drivers/net/fjes/fjes_main.c @@ -213690,6 +259868,32 @@ index 6192244b304ab..36a9fbb704029 100644 } /* Perform I/O control on an active 6pack channel. */ +diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c +index 6b6f28d5b8d5d..f9d03f7b9101e 100644 +--- a/drivers/net/hamradio/baycom_epp.c ++++ b/drivers/net/hamradio/baycom_epp.c +@@ -758,7 +758,7 @@ static void epp_bh(struct work_struct *work) + * ===================== network driver interface ========================= + */ + +-static int baycom_send_packet(struct sk_buff *skb, struct net_device *dev) ++static netdev_tx_t baycom_send_packet(struct sk_buff *skb, struct net_device *dev) + { + struct baycom_state *bc = netdev_priv(dev); + +diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c +index d967b0748773d..027b04795421d 100644 +--- a/drivers/net/hamradio/bpqether.c ++++ b/drivers/net/hamradio/bpqether.c +@@ -534,7 +534,7 @@ static int bpq_device_event(struct notifier_block *this, + if (!net_eq(dev_net(dev), &init_net)) + return NOTIFY_DONE; + +- if (!dev_is_ethdev(dev)) ++ if (!dev_is_ethdev(dev) && !bpq_get_ax25_dev(dev)) + return NOTIFY_DONE; + + switch (event) { diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index 8666110bec555..763d435a9564c 100644 --- a/drivers/net/hamradio/mkiss.c @@ -213720,6 +259924,34 @@ index 8666110bec555..763d435a9564c 100644 free_netdev(ax->dev); } +diff --git a/drivers/net/hamradio/scc.c b/drivers/net/hamradio/scc.c +index e0bb131a33d76..39db3cae4dd1a 100644 +--- a/drivers/net/hamradio/scc.c ++++ b/drivers/net/hamradio/scc.c +@@ -301,12 +301,12 @@ static inline void scc_discard_buffers(struct scc_channel *scc) + spin_lock_irqsave(&scc->lock, flags); + if (scc->tx_buff != NULL) + { +- dev_kfree_skb(scc->tx_buff); ++ dev_kfree_skb_irq(scc->tx_buff); + scc->tx_buff = NULL; + } + + while (!skb_queue_empty(&scc->tx_queue)) +- dev_kfree_skb(skb_dequeue(&scc->tx_queue)); ++ dev_kfree_skb_irq(skb_dequeue(&scc->tx_queue)); + + spin_unlock_irqrestore(&scc->lock, flags); + } +@@ -1668,7 +1668,7 @@ static netdev_tx_t scc_net_tx(struct sk_buff *skb, struct net_device *dev) + if (skb_queue_len(&scc->tx_queue) > scc->dev->tx_queue_len) { + struct sk_buff *skb_del; + skb_del = skb_dequeue(&scc->tx_queue); +- dev_kfree_skb(skb_del); ++ dev_kfree_skb_irq(skb_del); + } + skb_queue_tail(&scc->tx_queue, skb); + netif_trans_update(dev); diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c index 6ddacbdb224ba..528d57a435394 100644 --- a/drivers/net/hamradio/yam.c @@ -213935,9 +260167,18 @@ index 23ee0b14cbfa1..2f5e7b31032aa 100644 build[ret] = 0; dev_info(&usb_dev->dev, "Firmware: build %s\n", build); diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c -index 3a2824f24caa8..96592a20c61ff 100644 +index 3a2824f24caa8..0362917fce7a9 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c +@@ -927,7 +927,7 @@ static int ca8210_spi_transfer( + + dev_dbg(&spi->dev, "%s called\n", __func__); + +- cas_ctl = kmalloc(sizeof(*cas_ctl), GFP_ATOMIC); ++ cas_ctl = kzalloc(sizeof(*cas_ctl), GFP_ATOMIC); + if (!cas_ctl) + return -ENOMEM; + @@ -1771,6 +1771,7 @@ static int ca8210_async_xmit_complete( status ); @@ -213958,7 +260199,7 @@ index 3a2824f24caa8..96592a20c61ff 100644 IEEE802154_HW_AFILT | IEEE802154_HW_OMIT_CKSUM | diff --git a/drivers/net/ieee802154/cc2520.c b/drivers/net/ieee802154/cc2520.c -index 89c046b204e0c..4517517215f2b 100644 +index 89c046b204e0c..a8369bfa4050b 100644 --- a/drivers/net/ieee802154/cc2520.c +++ b/drivers/net/ieee802154/cc2520.c @@ -504,6 +504,7 @@ cc2520_tx(struct ieee802154_hw *hw, struct sk_buff *skb) @@ -213969,6 +260210,15 @@ index 89c046b204e0c..4517517215f2b 100644 dev_err(&priv->spi->dev, "cc2520 tx underflow exception\n"); goto err_tx; } +@@ -969,7 +970,7 @@ static int cc2520_hw_init(struct cc2520_private *priv) + + if (timeout-- <= 0) { + dev_err(&priv->spi->dev, "oscillator start failed!\n"); +- return ret; ++ return -ETIMEDOUT; + } + udelay(1); + } while (!(status & CC2520_STATUS_XOSC32M_STABLE)); diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c index 8caa61ec718f5..36f1c5aa98fc6 100644 --- a/drivers/net/ieee802154/mac802154_hwsim.c @@ -214335,6 +260585,54 @@ index 0a859d10312dc..0313cdc607de3 100644 u32 replenish_ready; atomic_t replenish_saved; atomic_t replenish_backlog; +diff --git a/drivers/net/ipa/ipa_interrupt.c b/drivers/net/ipa/ipa_interrupt.c +index b35170a93b0fa..0c9ff8c055a05 100644 +--- a/drivers/net/ipa/ipa_interrupt.c ++++ b/drivers/net/ipa/ipa_interrupt.c +@@ -122,6 +122,16 @@ out_power_put: + return IRQ_HANDLED; + } + ++void ipa_interrupt_irq_disable(struct ipa *ipa) ++{ ++ disable_irq(ipa->interrupt->irq); ++} ++ ++void ipa_interrupt_irq_enable(struct ipa *ipa) ++{ ++ enable_irq(ipa->interrupt->irq); ++} ++ + /* Common function used to enable/disable TX_SUSPEND for an endpoint */ + static void ipa_interrupt_suspend_control(struct ipa_interrupt *interrupt, + u32 endpoint_id, bool enable) +diff --git a/drivers/net/ipa/ipa_interrupt.h b/drivers/net/ipa/ipa_interrupt.h +index 231390cea52a2..16aa84ee0094f 100644 +--- a/drivers/net/ipa/ipa_interrupt.h ++++ b/drivers/net/ipa/ipa_interrupt.h +@@ -85,6 +85,22 @@ void ipa_interrupt_suspend_clear_all(struct ipa_interrupt *interrupt); + */ + void ipa_interrupt_simulate_suspend(struct ipa_interrupt *interrupt); + ++/** ++ * ipa_interrupt_irq_enable() - Enable IPA interrupts ++ * @ipa: IPA pointer ++ * ++ * This enables the IPA interrupt line ++ */ ++void ipa_interrupt_irq_enable(struct ipa *ipa); ++ ++/** ++ * ipa_interrupt_irq_disable() - Disable IPA interrupts ++ * @ipa: IPA pointer ++ * ++ * This disables the IPA interrupt line ++ */ ++void ipa_interrupt_irq_disable(struct ipa *ipa); ++ + /** + * ipa_interrupt_config() - Configure the IPA interrupt framework + * @ipa: IPA pointer diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c index cdfa98a76e1f4..a448ec198bee1 100644 --- a/drivers/net/ipa/ipa_main.c @@ -214397,7 +260695,7 @@ index ad116bcc0580e..d0ab4d70c303b 100644 if (ret < 0) { dev_err(dev, "error %d getting power to handle crash\n", ret); diff --git a/drivers/net/ipa/ipa_power.c b/drivers/net/ipa/ipa_power.c -index b1c6c0fcb654f..f2989aac47a62 100644 +index b1c6c0fcb654f..07fb367cfc99d 100644 --- a/drivers/net/ipa/ipa_power.c +++ b/drivers/net/ipa/ipa_power.c @@ -11,6 +11,8 @@ @@ -214425,7 +260723,38 @@ index b1c6c0fcb654f..f2989aac47a62 100644 spinlock_t spinlock; /* used with STOPPED/STARTED power flags */ DECLARE_BITMAP(flags, IPA_POWER_FLAG_COUNT); u32 interconnect_count; -@@ -382,6 +386,47 @@ void ipa_power_modem_queue_active(struct ipa *ipa) +@@ -273,6 +277,17 @@ static int ipa_suspend(struct device *dev) + + __set_bit(IPA_POWER_FLAG_SYSTEM, ipa->power->flags); + ++ /* Increment the disable depth to ensure that the IRQ won't ++ * be re-enabled until the matching _enable call in ++ * ipa_resume(). We do this to ensure that the interrupt ++ * handler won't run whilst PM runtime is disabled. ++ * ++ * Note that disabling the IRQ is NOT the same as disabling ++ * irq wake. If wakeup is enabled for the IPA then the IRQ ++ * will still cause the system to wake up, see irq_set_irq_wake(). ++ */ ++ ipa_interrupt_irq_disable(ipa); ++ + return pm_runtime_force_suspend(dev); + } + +@@ -285,6 +300,12 @@ static int ipa_resume(struct device *dev) + + __clear_bit(IPA_POWER_FLAG_SYSTEM, ipa->power->flags); + ++ /* Now that PM runtime is enabled again it's safe ++ * to turn the IRQ back on and process any data ++ * that was received during suspend. ++ */ ++ ipa_interrupt_irq_enable(ipa); ++ + return ret; + } + +@@ -382,6 +403,47 @@ void ipa_power_modem_queue_active(struct ipa *ipa) clear_bit(IPA_POWER_FLAG_STARTED, ipa->power->flags); } @@ -214473,7 +260802,7 @@ index b1c6c0fcb654f..f2989aac47a62 100644 int ipa_power_setup(struct ipa *ipa) { int ret; -@@ -438,12 +483,18 @@ ipa_power_init(struct device *dev, const struct ipa_power_data *data) +@@ -438,12 +500,18 @@ ipa_power_init(struct device *dev, const struct ipa_power_data *data) if (ret) goto err_kfree; @@ -214492,7 +260821,7 @@ index b1c6c0fcb654f..f2989aac47a62 100644 err_kfree: kfree(power); err_clk_put: -@@ -460,6 +511,7 @@ void ipa_power_exit(struct ipa_power *power) +@@ -460,6 +528,7 @@ void ipa_power_exit(struct ipa_power *power) pm_runtime_disable(dev); pm_runtime_dont_use_autosuspend(dev); @@ -214899,8 +261228,21 @@ index 1cedb634f4f7b..f01078b2581ce 100644 { rtnl_link_unregister(&ipvtap_link_ops); unregister_netdevice_notifier(&ipvtap_notifier_block); +diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c +index a1c77cc004165..498e5c8013efb 100644 +--- a/drivers/net/loopback.c ++++ b/drivers/net/loopback.c +@@ -208,7 +208,7 @@ static __net_init int loopback_net_init(struct net *net) + int err; + + err = -ENOMEM; +- dev = alloc_netdev(0, "lo", NET_NAME_UNKNOWN, loopback_setup); ++ dev = alloc_netdev(0, "lo", NET_NAME_PREDICTABLE, loopback_setup); + if (!dev) + goto out; + diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c -index 93dc48b9b4f24..71700f2792786 100644 +index 93dc48b9b4f24..10b3f4fb2612c 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -241,6 +241,7 @@ static struct macsec_cb *macsec_skb_cb(struct sk_buff *skb) @@ -214923,7 +261265,26 @@ index 93dc48b9b4f24..71700f2792786 100644 static void __macsec_pn_wrapped(struct macsec_secy *secy, struct macsec_tx_sa *tx_sa) { -@@ -1695,7 +1691,7 @@ static bool validate_add_rxsa(struct nlattr **attrs) +@@ -1390,7 +1386,8 @@ static struct macsec_rx_sc *del_rx_sc(struct macsec_secy *secy, sci_t sci) + return NULL; + } + +-static struct macsec_rx_sc *create_rx_sc(struct net_device *dev, sci_t sci) ++static struct macsec_rx_sc *create_rx_sc(struct net_device *dev, sci_t sci, ++ bool active) + { + struct macsec_rx_sc *rx_sc; + struct macsec_dev *macsec; +@@ -1414,7 +1411,7 @@ static struct macsec_rx_sc *create_rx_sc(struct net_device *dev, sci_t sci) + } + + rx_sc->sci = sci; +- rx_sc->active = true; ++ rx_sc->active = active; + refcount_set(&rx_sc->refcnt, 1); + + secy = &macsec_priv(dev)->secy; +@@ -1695,7 +1692,7 @@ static bool validate_add_rxsa(struct nlattr **attrs) return false; if (attrs[MACSEC_SA_ATTR_PN] && @@ -214932,7 +261293,7 @@ index 93dc48b9b4f24..71700f2792786 100644 return false; if (attrs[MACSEC_SA_ATTR_ACTIVE]) { -@@ -1751,7 +1747,8 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) +@@ -1751,7 +1748,8 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) } pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN; @@ -214942,7 +261303,7 @@ index 93dc48b9b4f24..71700f2792786 100644 pr_notice("macsec: nl: add_rxsa: bad pn length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len); rtnl_unlock(); -@@ -1767,7 +1764,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) +@@ -1767,7 +1765,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) { pr_notice("macsec: nl: add_rxsa: bad salt length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_SALT]), @@ -214951,7 +261312,15 @@ index 93dc48b9b4f24..71700f2792786 100644 rtnl_unlock(); return -EINVAL; } -@@ -1840,7 +1837,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) +@@ -1822,6 +1820,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) + secy->key_len); + + err = macsec_offload(ops->mdo_add_rxsa, &ctx); ++ memzero_explicit(ctx.sa.key, secy->key_len); + if (err) + goto cleanup; + } +@@ -1840,7 +1839,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) return 0; cleanup: @@ -214960,7 +261329,47 @@ index 93dc48b9b4f24..71700f2792786 100644 rtnl_unlock(); return err; } -@@ -1937,7 +1934,7 @@ static bool validate_add_txsa(struct nlattr **attrs) +@@ -1866,7 +1865,7 @@ static int macsec_add_rxsc(struct sk_buff *skb, struct genl_info *info) + struct macsec_rx_sc *rx_sc; + struct nlattr *tb_rxsc[MACSEC_RXSC_ATTR_MAX + 1]; + struct macsec_secy *secy; +- bool was_active; ++ bool active = true; + int ret; + + if (!attrs[MACSEC_ATTR_IFINDEX]) +@@ -1888,16 +1887,15 @@ static int macsec_add_rxsc(struct sk_buff *skb, struct genl_info *info) + secy = &macsec_priv(dev)->secy; + sci = nla_get_sci(tb_rxsc[MACSEC_RXSC_ATTR_SCI]); + +- rx_sc = create_rx_sc(dev, sci); ++ if (tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]) ++ active = nla_get_u8(tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]); ++ ++ rx_sc = create_rx_sc(dev, sci, active); + if (IS_ERR(rx_sc)) { + rtnl_unlock(); + return PTR_ERR(rx_sc); + } + +- was_active = rx_sc->active; +- if (tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]) +- rx_sc->active = !!nla_get_u8(tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]); +- + if (macsec_is_offloaded(netdev_priv(dev))) { + const struct macsec_ops *ops; + struct macsec_context ctx; +@@ -1921,7 +1919,8 @@ static int macsec_add_rxsc(struct sk_buff *skb, struct genl_info *info) + return 0; + + cleanup: +- rx_sc->active = was_active; ++ del_rx_sc(secy, sci); ++ free_rx_sc(rx_sc); + rtnl_unlock(); + return ret; + } +@@ -1937,7 +1936,7 @@ static bool validate_add_txsa(struct nlattr **attrs) if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN) return false; @@ -214969,7 +261378,7 @@ index 93dc48b9b4f24..71700f2792786 100644 return false; if (attrs[MACSEC_SA_ATTR_ACTIVE]) { -@@ -2009,7 +2006,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) +@@ -2009,7 +2008,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) { pr_notice("macsec: nl: add_txsa: bad salt length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_SALT]), @@ -214978,7 +261387,15 @@ index 93dc48b9b4f24..71700f2792786 100644 rtnl_unlock(); return -EINVAL; } -@@ -2083,7 +2080,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) +@@ -2064,6 +2063,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) + secy->key_len); + + err = macsec_offload(ops->mdo_add_txsa, &ctx); ++ memzero_explicit(ctx.sa.key, secy->key_len); + if (err) + goto cleanup; + } +@@ -2083,7 +2083,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) cleanup: secy->operational = was_operational; @@ -214987,7 +261404,7 @@ index 93dc48b9b4f24..71700f2792786 100644 rtnl_unlock(); return err; } -@@ -2291,7 +2288,7 @@ static bool validate_upd_sa(struct nlattr **attrs) +@@ -2291,7 +2291,7 @@ static bool validate_upd_sa(struct nlattr **attrs) if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN) return false; @@ -214996,7 +261413,137 @@ index 93dc48b9b4f24..71700f2792786 100644 return false; if (attrs[MACSEC_SA_ATTR_ACTIVE]) { -@@ -3614,8 +3611,7 @@ static int macsec_set_mac_address(struct net_device *dev, void *p) +@@ -2560,7 +2560,7 @@ static bool macsec_is_configured(struct macsec_dev *macsec) + struct macsec_tx_sc *tx_sc = &secy->tx_sc; + int i; + +- if (secy->n_rx_sc > 0) ++ if (secy->rx_sc) + return true; + + for (i = 0; i < MACSEC_NUM_AN; i++) +@@ -2580,7 +2580,7 @@ static int macsec_upd_offload(struct sk_buff *skb, struct genl_info *info) + const struct macsec_ops *ops; + struct macsec_context ctx; + struct macsec_dev *macsec; +- int ret; ++ int ret = 0; + + if (!attrs[MACSEC_ATTR_IFINDEX]) + return -EINVAL; +@@ -2593,28 +2593,36 @@ static int macsec_upd_offload(struct sk_buff *skb, struct genl_info *info) + macsec_genl_offload_policy, NULL)) + return -EINVAL; + ++ rtnl_lock(); ++ + dev = get_dev_from_nl(genl_info_net(info), attrs); +- if (IS_ERR(dev)) +- return PTR_ERR(dev); ++ if (IS_ERR(dev)) { ++ ret = PTR_ERR(dev); ++ goto out; ++ } + macsec = macsec_priv(dev); + +- if (!tb_offload[MACSEC_OFFLOAD_ATTR_TYPE]) +- return -EINVAL; ++ if (!tb_offload[MACSEC_OFFLOAD_ATTR_TYPE]) { ++ ret = -EINVAL; ++ goto out; ++ } + + offload = nla_get_u8(tb_offload[MACSEC_OFFLOAD_ATTR_TYPE]); + if (macsec->offload == offload) +- return 0; ++ goto out; + + /* Check if the offloading mode is supported by the underlying layers */ + if (offload != MACSEC_OFFLOAD_OFF && +- !macsec_check_offload(offload, macsec)) +- return -EOPNOTSUPP; ++ !macsec_check_offload(offload, macsec)) { ++ ret = -EOPNOTSUPP; ++ goto out; ++ } + + /* Check if the net device is busy. */ +- if (netif_running(dev)) +- return -EBUSY; +- +- rtnl_lock(); ++ if (netif_running(dev)) { ++ ret = -EBUSY; ++ goto out; ++ } + + prev_offload = macsec->offload; + macsec->offload = offload; +@@ -2644,17 +2652,12 @@ static int macsec_upd_offload(struct sk_buff *skb, struct genl_info *info) + if (ret) + goto rollback; + +- /* Force features update, since they are different for SW MACSec and +- * HW offloading cases. +- */ +- netdev_update_features(dev); +- + rtnl_unlock(); + return 0; + + rollback: + macsec->offload = prev_offload; +- ++out: + rtnl_unlock(); + return ret; + } +@@ -3416,16 +3419,9 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb, + return ret; + } + +-#define SW_MACSEC_FEATURES \ ++#define MACSEC_FEATURES \ + (NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST) + +-/* If h/w offloading is enabled, use real device features save for +- * VLAN_FEATURES - they require additional ops +- * HW_MACSEC - no reason to report it +- */ +-#define REAL_DEV_FEATURES(dev) \ +- ((dev)->features & ~(NETIF_F_VLAN_FEATURES | NETIF_F_HW_MACSEC)) +- + static int macsec_dev_init(struct net_device *dev) + { + struct macsec_dev *macsec = macsec_priv(dev); +@@ -3442,12 +3438,8 @@ static int macsec_dev_init(struct net_device *dev) + return err; + } + +- if (macsec_is_offloaded(macsec)) { +- dev->features = REAL_DEV_FEATURES(real_dev); +- } else { +- dev->features = real_dev->features & SW_MACSEC_FEATURES; +- dev->features |= NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE; +- } ++ dev->features = real_dev->features & MACSEC_FEATURES; ++ dev->features |= NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE; + + dev->needed_headroom = real_dev->needed_headroom + + MACSEC_NEEDED_HEADROOM; +@@ -3476,10 +3468,7 @@ static netdev_features_t macsec_fix_features(struct net_device *dev, + struct macsec_dev *macsec = macsec_priv(dev); + struct net_device *real_dev = macsec->real_dev; + +- if (macsec_is_offloaded(macsec)) +- return REAL_DEV_FEATURES(real_dev); +- +- features &= (real_dev->features & SW_MACSEC_FEATURES) | ++ features &= (real_dev->features & MACSEC_FEATURES) | + NETIF_F_GSO_SOFTWARE | NETIF_F_SOFT_FEATURES; + features |= NETIF_F_LLTX; + +@@ -3614,8 +3603,7 @@ static int macsec_set_mac_address(struct net_device *dev, void *p) dev_uc_del(real_dev, dev->dev_addr); out: @@ -215006,7 +261553,15 @@ index 93dc48b9b4f24..71700f2792786 100644 /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(macsec)) { -@@ -3738,9 +3734,6 @@ static int macsec_changelink_common(struct net_device *dev, +@@ -3695,6 +3683,7 @@ static const struct nla_policy macsec_rtnl_policy[IFLA_MACSEC_MAX + 1] = { + [IFLA_MACSEC_SCB] = { .type = NLA_U8 }, + [IFLA_MACSEC_REPLAY_PROTECT] = { .type = NLA_U8 }, + [IFLA_MACSEC_VALIDATION] = { .type = NLA_U8 }, ++ [IFLA_MACSEC_OFFLOAD] = { .type = NLA_U8 }, + }; + + static void macsec_free_netdev(struct net_device *dev) +@@ -3738,9 +3727,6 @@ static int macsec_changelink_common(struct net_device *dev, secy->operational = tx_sa && tx_sa->active; } @@ -215016,7 +261571,7 @@ index 93dc48b9b4f24..71700f2792786 100644 if (data[IFLA_MACSEC_ENCRYPT]) tx_sc->encrypt = !!nla_get_u8(data[IFLA_MACSEC_ENCRYPT]); -@@ -3786,6 +3779,16 @@ static int macsec_changelink_common(struct net_device *dev, +@@ -3786,6 +3772,16 @@ static int macsec_changelink_common(struct net_device *dev, } } @@ -215033,7 +261588,7 @@ index 93dc48b9b4f24..71700f2792786 100644 return 0; } -@@ -3815,7 +3818,7 @@ static int macsec_changelink(struct net_device *dev, struct nlattr *tb[], +@@ -3815,13 +3811,12 @@ static int macsec_changelink(struct net_device *dev, struct nlattr *tb[], ret = macsec_changelink_common(dev, data); if (ret) @@ -215042,7 +261597,13 @@ index 93dc48b9b4f24..71700f2792786 100644 /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(macsec)) { -@@ -3870,6 +3873,18 @@ static void macsec_common_dellink(struct net_device *dev, struct list_head *head + const struct macsec_ops *ops; + struct macsec_context ctx; +- int ret; + + ops = macsec_get_ops(netdev_priv(dev), &ctx); + if (!ops) { +@@ -3870,6 +3865,18 @@ static void macsec_common_dellink(struct net_device *dev, struct list_head *head struct macsec_dev *macsec = macsec_priv(dev); struct net_device *real_dev = macsec->real_dev; @@ -215061,7 +261622,7 @@ index 93dc48b9b4f24..71700f2792786 100644 unregister_netdevice_queue(dev, head); list_del_rcu(&macsec->secys); macsec_del_dev(macsec); -@@ -3884,18 +3899,6 @@ static void macsec_dellink(struct net_device *dev, struct list_head *head) +@@ -3884,18 +3891,6 @@ static void macsec_dellink(struct net_device *dev, struct list_head *head) struct net_device *real_dev = macsec->real_dev; struct macsec_rxh_data *rxd = macsec_data_rtnl(real_dev); @@ -215080,7 +261641,7 @@ index 93dc48b9b4f24..71700f2792786 100644 macsec_common_dellink(dev, head); if (list_empty(&rxd->secys)) { -@@ -3944,6 +3947,11 @@ static bool sci_exists(struct net_device *dev, sci_t sci) +@@ -3944,6 +3939,11 @@ static bool sci_exists(struct net_device *dev, sci_t sci) return false; } @@ -215092,7 +261653,7 @@ index 93dc48b9b4f24..71700f2792786 100644 static int macsec_add_dev(struct net_device *dev, sci_t sci, u8 icv_len) { struct macsec_dev *macsec = macsec_priv(dev); -@@ -4018,6 +4026,15 @@ static int macsec_newlink(struct net *net, struct net_device *dev, +@@ -4018,6 +4018,15 @@ static int macsec_newlink(struct net *net, struct net_device *dev, !macsec_check_offload(macsec->offload, macsec)) return -EOPNOTSUPP; @@ -215109,9 +261670,18 @@ index 93dc48b9b4f24..71700f2792786 100644 icv_len = nla_get_u8(data[IFLA_MACSEC_ICV_LEN]); mtu = real_dev->mtu - icv_len - macsec_extra_len(true); diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c -index 35f46ad040b0d..6363459ba1d05 100644 +index 35f46ad040b0d..3dd1528dde028 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c +@@ -141,7 +141,7 @@ static struct macvlan_source_entry *macvlan_hash_lookup_source( + u32 idx = macvlan_eth_hash(addr); + struct hlist_head *h = &vlan->port->vlan_source_hash[idx]; + +- hlist_for_each_entry_rcu(entry, h, hlist) { ++ hlist_for_each_entry_rcu(entry, h, hlist, lockdep_rtnl_is_held()) { + if (ether_addr_equal_64bits(entry->addr, addr) && + entry->vlan == vlan) + return entry; @@ -460,8 +460,10 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb) return RX_HANDLER_CONSUMED; *pskb = skb; @@ -215145,6 +261715,36 @@ index 35f46ad040b0d..6363459ba1d05 100644 } else { /* Rehash and update the device filters */ if (macvlan_addr_busy(vlan->port, addr)) +@@ -1177,7 +1181,7 @@ void macvlan_common_setup(struct net_device *dev) + { + ether_setup(dev); + +- dev->min_mtu = 0; ++ /* ether_setup() has set dev->min_mtu to ETH_MIN_MTU. */ + dev->max_mtu = ETH_MAX_MTU; + dev->priv_flags &= ~IFF_TX_SKB_SHARING; + netif_keep_dst(dev); +@@ -1517,8 +1521,10 @@ destroy_macvlan_port: + /* the macvlan port may be freed by macvlan_uninit when fail to register. + * so we destroy the macvlan port only when it's valid. + */ +- if (create && macvlan_port_get_rtnl(lowerdev)) ++ if (create && macvlan_port_get_rtnl(lowerdev)) { ++ macvlan_flush_sources(port, vlan); + macvlan_port_destroy(port->dev); ++ } + return err; + } + EXPORT_SYMBOL_GPL(macvlan_common_newlink); +@@ -1629,7 +1635,7 @@ static int macvlan_fill_info_macaddr(struct sk_buff *skb, + struct hlist_head *h = &vlan->port->vlan_source_hash[i]; + struct macvlan_source_entry *entry; + +- hlist_for_each_entry_rcu(entry, h, hlist) { ++ hlist_for_each_entry_rcu(entry, h, hlist, lockdep_rtnl_is_held()) { + if (entry->vlan != vlan) + continue; + if (nla_put(skb, IFLA_MACVLAN_MACADDR, ETH_ALEN, entry->addr)) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 694e2f5dbbe59..39801c31e5071 100644 --- a/drivers/net/macvtap.c @@ -215168,7 +261768,7 @@ index 694e2f5dbbe59..39801c31e5071 100644 }; diff --git a/drivers/net/mdio/fwnode_mdio.c b/drivers/net/mdio/fwnode_mdio.c -index 1becb1a731f67..1c1584fca6327 100644 +index 1becb1a731f67..2c47efdae73b4 100644 --- a/drivers/net/mdio/fwnode_mdio.c +++ b/drivers/net/mdio/fwnode_mdio.c @@ -43,6 +43,11 @@ int fwnode_mdiobus_phy_device_register(struct mii_bus *mdio, @@ -215183,6 +261783,67 @@ index 1becb1a731f67..1c1584fca6327 100644 if (rc == -EPROBE_DEFER) return rc; +@@ -72,6 +77,7 @@ int fwnode_mdiobus_phy_device_register(struct mii_bus *mdio, + */ + rc = phy_device_register(phy); + if (rc) { ++ device_set_node(&phy->mdio.dev, NULL); + fwnode_handle_put(child); + return rc; + } +@@ -105,8 +111,8 @@ int fwnode_mdiobus_register_phy(struct mii_bus *bus, + else + phy = phy_device_create(bus, addr, phy_id, 0, NULL); + if (IS_ERR(phy)) { +- unregister_mii_timestamper(mii_ts); +- return PTR_ERR(phy); ++ rc = PTR_ERR(phy); ++ goto clean_mii_ts; + } + + if (is_acpi_node(child)) { +@@ -115,22 +121,19 @@ int fwnode_mdiobus_register_phy(struct mii_bus *bus, + /* Associate the fwnode with the device structure so it + * can be looked up later. + */ +- phy->mdio.dev.fwnode = child; ++ phy->mdio.dev.fwnode = fwnode_handle_get(child); + + /* All data is now stored in the phy struct, so register it */ + rc = phy_device_register(phy); + if (rc) { +- phy_device_free(phy); +- fwnode_handle_put(phy->mdio.dev.fwnode); +- return rc; ++ phy->mdio.dev.fwnode = NULL; ++ fwnode_handle_put(child); ++ goto clean_phy; + } + } else if (is_of_node(child)) { + rc = fwnode_mdiobus_phy_device_register(bus, phy, child, addr); +- if (rc) { +- unregister_mii_timestamper(mii_ts); +- phy_device_free(phy); +- return rc; +- } ++ if (rc) ++ goto clean_phy; + } + + /* phy->mii_ts may already be defined by the PHY driver. A +@@ -140,5 +143,12 @@ int fwnode_mdiobus_register_phy(struct mii_bus *bus, + if (mii_ts) + phy->mii_ts = mii_ts; + return 0; ++ ++clean_phy: ++ phy_device_free(phy); ++clean_mii_ts: ++ unregister_mii_timestamper(mii_ts); ++ ++ return rc; + } + EXPORT_SYMBOL(fwnode_mdiobus_register_phy); diff --git a/drivers/net/mdio/mdio-aspeed.c b/drivers/net/mdio/mdio-aspeed.c index cad820568f751..e2273588c75b6 100644 --- a/drivers/net/mdio/mdio-aspeed.c @@ -215263,11 +261924,75 @@ index 6dcbf987d61b5..8b444a8eb6b55 100644 } bus = md->mii_bus; +diff --git a/drivers/net/mdio/mdio-mux-meson-g12a.c b/drivers/net/mdio/mdio-mux-meson-g12a.c +index b8866bc3f2e8b..917c8a10eea02 100644 +--- a/drivers/net/mdio/mdio-mux-meson-g12a.c ++++ b/drivers/net/mdio/mdio-mux-meson-g12a.c +@@ -4,6 +4,7 @@ + */ + + #include <linux/bitfield.h> ++#include <linux/delay.h> + #include <linux/clk.h> + #include <linux/clk-provider.h> + #include <linux/device.h> +@@ -150,6 +151,7 @@ static const struct clk_ops g12a_ephy_pll_ops = { + + static int g12a_enable_internal_mdio(struct g12a_mdio_mux *priv) + { ++ u32 value; + int ret; + + /* Enable the phy clock */ +@@ -163,18 +165,25 @@ static int g12a_enable_internal_mdio(struct g12a_mdio_mux *priv) + + /* Initialize ephy control */ + writel(EPHY_G12A_ID, priv->regs + ETH_PHY_CNTL0); +- writel(FIELD_PREP(PHY_CNTL1_ST_MODE, 3) | +- FIELD_PREP(PHY_CNTL1_ST_PHYADD, EPHY_DFLT_ADD) | +- FIELD_PREP(PHY_CNTL1_MII_MODE, EPHY_MODE_RMII) | +- PHY_CNTL1_CLK_EN | +- PHY_CNTL1_CLKFREQ | +- PHY_CNTL1_PHY_ENB, +- priv->regs + ETH_PHY_CNTL1); ++ ++ /* Make sure we get a 0 -> 1 transition on the enable bit */ ++ value = FIELD_PREP(PHY_CNTL1_ST_MODE, 3) | ++ FIELD_PREP(PHY_CNTL1_ST_PHYADD, EPHY_DFLT_ADD) | ++ FIELD_PREP(PHY_CNTL1_MII_MODE, EPHY_MODE_RMII) | ++ PHY_CNTL1_CLK_EN | ++ PHY_CNTL1_CLKFREQ; ++ writel(value, priv->regs + ETH_PHY_CNTL1); + writel(PHY_CNTL2_USE_INTERNAL | + PHY_CNTL2_SMI_SRC_MAC | + PHY_CNTL2_RX_CLK_EPHY, + priv->regs + ETH_PHY_CNTL2); + ++ value |= PHY_CNTL1_PHY_ENB; ++ writel(value, priv->regs + ETH_PHY_CNTL1); ++ ++ /* The phy needs a bit of time to power up */ ++ mdelay(10); ++ + return 0; + } + diff --git a/drivers/net/mdio/of_mdio.c b/drivers/net/mdio/of_mdio.c -index 9e3c815a070f1..796e9c7857d09 100644 +index 9e3c815a070f1..510822d6d0d90 100644 --- a/drivers/net/mdio/of_mdio.c +++ b/drivers/net/mdio/of_mdio.c -@@ -231,6 +231,7 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) +@@ -68,8 +68,9 @@ static int of_mdiobus_register_device(struct mii_bus *mdio, + /* All data is now stored in the mdiodev struct; register it. */ + rc = mdio_device_register(mdiodev); + if (rc) { ++ device_set_node(&mdiodev->dev, NULL); ++ fwnode_handle_put(fwnode); + mdio_device_free(mdiodev); +- of_node_put(child); + return rc; + } + +@@ -231,6 +232,7 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) return 0; unregister: @@ -215275,6 +262000,19 @@ index 9e3c815a070f1..796e9c7857d09 100644 mdiobus_unregister(mdio); return rc; } +diff --git a/drivers/net/mhi_net.c b/drivers/net/mhi_net.c +index aaa628f859fd4..f84554aa02af1 100644 +--- a/drivers/net/mhi_net.c ++++ b/drivers/net/mhi_net.c +@@ -343,6 +343,8 @@ static void mhi_net_dellink(struct mhi_device *mhi_dev, struct net_device *ndev) + + kfree_skb(mhi_netdev->skbagg_head); + ++ free_netdev(ndev); ++ + dev_set_drvdata(&mhi_dev->dev, NULL); + } + diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index 90aafb56f1409..50854265864d1 100644 --- a/drivers/net/netdevsim/bpf.c @@ -215311,6 +262049,43 @@ index 90aafb56f1409..50854265864d1 100644 } } +diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c +index 54313bd577973..94490dfae6568 100644 +--- a/drivers/net/netdevsim/dev.c ++++ b/drivers/net/netdevsim/dev.c +@@ -229,8 +229,10 @@ static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev) + if (IS_ERR(nsim_dev->ddir)) + return PTR_ERR(nsim_dev->ddir); + nsim_dev->ports_ddir = debugfs_create_dir("ports", nsim_dev->ddir); +- if (IS_ERR(nsim_dev->ports_ddir)) +- return PTR_ERR(nsim_dev->ports_ddir); ++ if (IS_ERR(nsim_dev->ports_ddir)) { ++ err = PTR_ERR(nsim_dev->ports_ddir); ++ goto err_ddir; ++ } + debugfs_create_bool("fw_update_status", 0600, nsim_dev->ddir, + &nsim_dev->fw_update_status); + debugfs_create_u32("fw_update_overwrite_mask", 0600, nsim_dev->ddir, +@@ -267,7 +269,7 @@ static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev) + nsim_dev->nodes_ddir = debugfs_create_dir("rate_nodes", nsim_dev->ddir); + if (IS_ERR(nsim_dev->nodes_ddir)) { + err = PTR_ERR(nsim_dev->nodes_ddir); +- goto err_out; ++ goto err_ports_ddir; + } + debugfs_create_bool("fail_trap_drop_counter_get", 0600, + nsim_dev->ddir, +@@ -275,8 +277,9 @@ static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev) + nsim_udp_tunnels_debugfs_create(nsim_dev); + return 0; + +-err_out: ++err_ports_ddir: + debugfs_remove_recursive(nsim_dev->ports_ddir); ++err_ddir: + debugfs_remove_recursive(nsim_dev->ddir); + return err; + } diff --git a/drivers/net/netdevsim/ethtool.c b/drivers/net/netdevsim/ethtool.c index b03a0513eb7e7..2e7c1cc16cb93 100644 --- a/drivers/net/netdevsim/ethtool.c @@ -215441,6 +262216,44 @@ index 50572e0f1f529..84741715f6705 100644 } static int +diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c +index a5bab614ff845..b701ee83e64a8 100644 +--- a/drivers/net/ntb_netdev.c ++++ b/drivers/net/ntb_netdev.c +@@ -137,7 +137,7 @@ static void ntb_netdev_rx_handler(struct ntb_transport_qp *qp, void *qp_data, + enqueue_again: + rc = ntb_transport_rx_enqueue(qp, skb, skb->data, ndev->mtu + ETH_HLEN); + if (rc) { +- dev_kfree_skb(skb); ++ dev_kfree_skb_any(skb); + ndev->stats.rx_errors++; + ndev->stats.rx_fifo_errors++; + } +@@ -192,7 +192,7 @@ static void ntb_netdev_tx_handler(struct ntb_transport_qp *qp, void *qp_data, + ndev->stats.tx_aborted_errors++; + } + +- dev_kfree_skb(skb); ++ dev_kfree_skb_any(skb); + + if (ntb_transport_tx_free_entry(dev->qp) >= tx_start) { + /* Make sure anybody stopping the queue after this sees the new +@@ -484,7 +484,14 @@ static int __init ntb_netdev_init_module(void) + rc = ntb_transport_register_client_dev(KBUILD_MODNAME); + if (rc) + return rc; +- return ntb_transport_register_client(&ntb_netdev_client); ++ ++ rc = ntb_transport_register_client(&ntb_netdev_client); ++ if (rc) { ++ ntb_transport_unregister_client_dev(KBUILD_MODNAME); ++ return rc; ++ } ++ ++ return 0; + } + module_init(ntb_netdev_init_module); + diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index 7de631f5356fc..fd4cbf8a55ad7 100644 --- a/drivers/net/pcs/pcs-xpcs.c @@ -215700,10 +262513,10 @@ index 83aea5c5cd03c..b330efb98209b 100644 .config_aneg = bcm54616s_config_aneg, .config_intr = bcm_phy_config_intr, diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c -index 211b5476a6f51..0b511abb5422e 100644 +index 211b5476a6f51..f070aa97c77b4 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c -@@ -228,9 +228,7 @@ static int dp83822_config_intr(struct phy_device *phydev) +@@ -228,13 +228,12 @@ static int dp83822_config_intr(struct phy_device *phydev) if (misr_status < 0) return misr_status; @@ -215714,17 +262527,26 @@ index 211b5476a6f51..0b511abb5422e 100644 DP83822_ENERGY_DET_INT_EN | DP83822_LINK_QUAL_INT_EN); -@@ -255,8 +253,7 @@ static int dp83822_config_intr(struct phy_device *phydev) +- if (!dp83822->fx_enabled) ++ /* Private data pointer is NULL on DP83825/26 */ ++ if (!dp83822 || !dp83822->fx_enabled) + misr_status |= DP83822_ANEG_COMPLETE_INT_EN | + DP83822_DUP_MODE_CHANGE_INT_EN | + DP83822_SPEED_CHANGED_INT_EN; +@@ -254,9 +253,9 @@ static int dp83822_config_intr(struct phy_device *phydev) + DP83822_PAGE_RX_INT_EN | DP83822_EEE_ERROR_CHANGE_INT_EN); - if (!dp83822->fx_enabled) +- if (!dp83822->fx_enabled) - misr_status |= DP83822_MDI_XOVER_INT_EN | - DP83822_ANEG_ERR_INT_EN | ++ /* Private data pointer is NULL on DP83825/26 */ ++ if (!dp83822 || !dp83822->fx_enabled) + misr_status |= DP83822_ANEG_ERR_INT_EN | DP83822_WOL_PKT_INT_EN; err = phy_write(phydev, MII_DP83822_MISR2, misr_status); -@@ -274,7 +271,7 @@ static int dp83822_config_intr(struct phy_device *phydev) +@@ -274,7 +273,7 @@ static int dp83822_config_intr(struct phy_device *phydev) if (err < 0) return err; @@ -215803,7 +262625,7 @@ index 6bbc81ad295fb..783e30451e30d 100644 }; module_phy_driver(dp83867_driver); diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c -index 4fcfca4e17021..4d726ee03ce20 100644 +index 4fcfca4e17021..87975b843d276 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -189,6 +189,8 @@ @@ -215898,7 +262720,7 @@ index 4fcfca4e17021..4d726ee03ce20 100644 err = marvell_set_page(phydev, MII_MARVELL_FIBER_PAGE); if (err < 0) goto error; -@@ -1932,6 +1941,58 @@ static void marvell_get_stats(struct phy_device *phydev, +@@ -1932,6 +1941,60 @@ static void marvell_get_stats(struct phy_device *phydev, data[i] = marvell_get_stat(phydev, i); } @@ -215937,14 +262759,16 @@ index 4fcfca4e17021..4d726ee03ce20 100644 + if (err < 0) + return err; + -+ /* FIXME: Based on trial and error test, it seem 1G need to have -+ * delay between soft reset and loopback enablement. -+ */ -+ if (phydev->speed == SPEED_1000) -+ msleep(1000); ++ err = phy_modify(phydev, MII_BMCR, BMCR_LOOPBACK, ++ BMCR_LOOPBACK); + -+ return phy_modify(phydev, MII_BMCR, BMCR_LOOPBACK, -+ BMCR_LOOPBACK); ++ if (!err) { ++ /* It takes some time for PHY device to switch ++ * into/out-of loopback mode. ++ */ ++ msleep(1000); ++ } ++ return err; + } else { + err = phy_modify(phydev, MII_BMCR, BMCR_LOOPBACK, 0); + if (err < 0) @@ -215957,7 +262781,7 @@ index 4fcfca4e17021..4d726ee03ce20 100644 static int marvell_vct5_wait_complete(struct phy_device *phydev) { int i; -@@ -3078,7 +3139,7 @@ static struct phy_driver marvell_drivers[] = { +@@ -3078,7 +3141,7 @@ static struct phy_driver marvell_drivers[] = { .get_sset_count = marvell_get_sset_count, .get_strings = marvell_get_strings, .get_stats = marvell_get_stats, @@ -215980,10 +262804,33 @@ index bd310e8d5e43d..df33637c5269a 100644 /* If the link settings are not resolved, mark the link down */ if (!(cssr1 & MV_PCS_CSSR1_RESOLVED)) { diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c -index 6865d9319197f..2c0216fe58def 100644 +index 6865d9319197f..5f89828fd9f17 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c -@@ -591,7 +591,7 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) +@@ -108,7 +108,12 @@ EXPORT_SYMBOL(mdiobus_unregister_device); + + struct phy_device *mdiobus_get_phy(struct mii_bus *bus, int addr) + { +- struct mdio_device *mdiodev = bus->mdio_map[addr]; ++ struct mdio_device *mdiodev; ++ ++ if (addr < 0 || addr >= ARRAY_SIZE(bus->mdio_map)) ++ return NULL; ++ ++ mdiodev = bus->mdio_map[addr]; + + if (!mdiodev) + return NULL; +@@ -577,7 +582,7 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) + } + + for (i = 0; i < PHY_MAX_ADDR; i++) { +- if ((bus->phy_mask & (1 << i)) == 0) { ++ if ((bus->phy_mask & BIT(i)) == 0) { + struct phy_device *phydev; + + phydev = mdiobus_scan(bus, i); +@@ -591,7 +596,7 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) mdiobus_setup_mdiodev_from_board_info(bus, mdiobus_create_device); bus->state = MDIOBUS_REGISTERED; @@ -215992,7 +262839,7 @@ index 6865d9319197f..2c0216fe58def 100644 return 0; error: -@@ -1011,7 +1011,6 @@ int __init mdio_bus_init(void) +@@ -1011,7 +1016,6 @@ int __init mdio_bus_init(void) return ret; } @@ -216000,6 +262847,26 @@ index 6865d9319197f..2c0216fe58def 100644 #if IS_ENABLED(CONFIG_PHYLIB) void mdio_bus_exit(void) +diff --git a/drivers/net/phy/mdio_device.c b/drivers/net/phy/mdio_device.c +index 250742ffdfd91..044828d081d22 100644 +--- a/drivers/net/phy/mdio_device.c ++++ b/drivers/net/phy/mdio_device.c +@@ -21,6 +21,7 @@ + #include <linux/slab.h> + #include <linux/string.h> + #include <linux/unistd.h> ++#include <linux/property.h> + + void mdio_device_free(struct mdio_device *mdiodev) + { +@@ -30,6 +31,7 @@ EXPORT_SYMBOL(mdio_device_free); + + static void mdio_device_release(struct device *dev) + { ++ fwnode_handle_put(dev->fwnode); + kfree(to_mdio_device(dev)); + } + diff --git a/drivers/net/phy/mediatek-ge.c b/drivers/net/phy/mediatek-ge.c index b7a5ae20edd53..68ee434f9dea3 100644 --- a/drivers/net/phy/mediatek-ge.c @@ -216015,7 +262882,7 @@ index b7a5ae20edd53..68ee434f9dea3 100644 /* PHY link down power saving enable */ diff --git a/drivers/net/phy/meson-gxl.c b/drivers/net/phy/meson-gxl.c -index 7e7904fee1d97..c49062ad72c6c 100644 +index 7e7904fee1d97..a6015cd03bff8 100644 --- a/drivers/net/phy/meson-gxl.c +++ b/drivers/net/phy/meson-gxl.c @@ -30,8 +30,12 @@ @@ -216075,6 +262942,24 @@ index 7e7904fee1d97..c49062ad72c6c 100644 phy_trigger_machine(phydev); return IRQ_HANDLED; +@@ -258,6 +261,8 @@ static struct phy_driver meson_gxl_phy[] = { + .handle_interrupt = meson_gxl_handle_interrupt, + .suspend = genphy_suspend, + .resume = genphy_resume, ++ .read_mmd = genphy_read_mmd_unsupported, ++ .write_mmd = genphy_write_mmd_unsupported, + }, { + PHY_ID_MATCH_EXACT(0x01803301), + .name = "Meson G12A Internal PHY", +@@ -268,6 +273,8 @@ static struct phy_driver meson_gxl_phy[] = { + .handle_interrupt = meson_gxl_handle_interrupt, + .suspend = genphy_suspend, + .resume = genphy_resume, ++ .read_mmd = genphy_read_mmd_unsupported, ++ .write_mmd = genphy_write_mmd_unsupported, + }, + }; + diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 5c928f827173c..05a8985d71073 100644 --- a/drivers/net/phy/micrel.c @@ -216243,6 +263128,18 @@ index 5c928f827173c..05a8985d71073 100644 .resume = kszphy_resume, }, { .phy_id = PHY_ID_KSZ8873MLL, +diff --git a/drivers/net/phy/mscc/mscc_macsec.c b/drivers/net/phy/mscc/mscc_macsec.c +index b7b2521c73fb6..c00eef457b850 100644 +--- a/drivers/net/phy/mscc/mscc_macsec.c ++++ b/drivers/net/phy/mscc/mscc_macsec.c +@@ -632,6 +632,7 @@ static void vsc8584_macsec_free_flow(struct vsc8531_private *priv, + + list_del(&flow->list); + clear_bit(flow->index, bitmap); ++ memzero_explicit(flow->key, sizeof(flow->key)); + kfree(flow); + } + diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c index 6e32da28e138f..f2e3a67198dd6 100644 --- a/drivers/net/phy/mscc/mscc_main.c @@ -216254,6 +263151,36 @@ index 6e32da28e138f..f2e3a67198dd6 100644 + +MODULE_FIRMWARE(MSCC_VSC8584_REVB_INT8051_FW); +MODULE_FIRMWARE(MSCC_VSC8574_REVB_INT8051_FW); +diff --git a/drivers/net/phy/mxl-gpy.c b/drivers/net/phy/mxl-gpy.c +index 5ce1bf03bbd71..f9c70476d7e8c 100644 +--- a/drivers/net/phy/mxl-gpy.c ++++ b/drivers/net/phy/mxl-gpy.c +@@ -96,6 +96,7 @@ static int gpy_config_init(struct phy_device *phydev) + + static int gpy_probe(struct phy_device *phydev) + { ++ int fw_version; + int ret; + + if (!phydev->is_c45) { +@@ -105,12 +106,12 @@ static int gpy_probe(struct phy_device *phydev) + } + + /* Show GPY PHY FW version in dmesg */ +- ret = phy_read(phydev, PHY_FWV); +- if (ret < 0) +- return ret; ++ fw_version = phy_read(phydev, PHY_FWV); ++ if (fw_version < 0) ++ return fw_version; + +- phydev_info(phydev, "Firmware Version: 0x%04X (%s)\n", ret, +- (ret & PHY_FWV_REL_MASK) ? "release" : "test"); ++ phydev_info(phydev, "Firmware Version: 0x%04X (%s)\n", fw_version, ++ (fw_version & PHY_FWV_REL_MASK) ? "release" : "test"); + + return 0; + } diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index 2870c33b8975d..271fc01f7f7fd 100644 --- a/drivers/net/phy/phy-core.c @@ -216335,10 +263262,18 @@ index a3bfb156c83d7..2fc851082e7b4 100644 /** diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c -index 4f9990b47a377..c5b92ffaffb94 100644 +index 4f9990b47a377..996842a1a9a35 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c -@@ -277,6 +277,15 @@ static __maybe_unused int mdio_bus_phy_suspend(struct device *dev) +@@ -215,6 +215,7 @@ static void phy_mdio_device_free(struct mdio_device *mdiodev) + + static void phy_device_release(struct device *dev) + { ++ fwnode_handle_put(dev->fwnode); + kfree(to_phy_device(dev)); + } + +@@ -277,6 +278,15 @@ static __maybe_unused int mdio_bus_phy_suspend(struct device *dev) if (phydev->mac_managed_pm) return 0; @@ -216354,7 +263289,7 @@ index 4f9990b47a377..c5b92ffaffb94 100644 /* We must stop the state machine manually, otherwise it stops out of * control, possibly with the phydev->lock held. Upon resume, netdev * may call phy routines that try to grab the same lock, and that may -@@ -306,6 +315,14 @@ static __maybe_unused int mdio_bus_phy_resume(struct device *dev) +@@ -306,6 +316,14 @@ static __maybe_unused int mdio_bus_phy_resume(struct device *dev) phydev->suspended_by_mdio_bus = 0; @@ -216369,7 +263304,7 @@ index 4f9990b47a377..c5b92ffaffb94 100644 ret = phy_init_hw(phydev); if (ret < 0) return ret; -@@ -314,6 +331,20 @@ static __maybe_unused int mdio_bus_phy_resume(struct device *dev) +@@ -314,6 +332,20 @@ static __maybe_unused int mdio_bus_phy_resume(struct device *dev) if (ret < 0) return ret; no_resume: @@ -216390,7 +263325,15 @@ index 4f9990b47a377..c5b92ffaffb94 100644 if (phydev->attached_dev && phydev->adjust_link) phy_start_machine(phydev); -@@ -1746,6 +1777,9 @@ void phy_detach(struct phy_device *phydev) +@@ -1487,6 +1519,7 @@ error: + + error_module_put: + module_put(d->driver->owner); ++ d->driver = NULL; + error_put_device: + put_device(d); + if (ndev_owner != bus->owner) +@@ -1746,6 +1779,9 @@ void phy_detach(struct phy_device *phydev) phy_driver_is_genphy_10g(phydev)) device_release_driver(&phydev->mdio.dev); @@ -216400,7 +263343,7 @@ index 4f9990b47a377..c5b92ffaffb94 100644 /* * The phydev might go away on the put_device() below, so avoid * a use-after-free bug by reading the underlying bus first. -@@ -1757,9 +1791,6 @@ void phy_detach(struct phy_device *phydev) +@@ -1757,9 +1793,6 @@ void phy_detach(struct phy_device *phydev) ndev_owner = dev->dev.parent->driver->owner; if (ndev_owner != bus->owner) module_put(bus->owner); @@ -216411,7 +263354,7 @@ index 4f9990b47a377..c5b92ffaffb94 100644 EXPORT_SYMBOL(phy_detach); diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c -index 0a0abe8e4be0b..7afcf6310d59f 100644 +index 0a0abe8e4be0b..422dc92ecac94 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -657,6 +657,7 @@ static void phylink_resolve(struct work_struct *w) @@ -216485,7 +263428,20 @@ index 0a0abe8e4be0b..7afcf6310d59f 100644 return 0; } -@@ -1333,7 +1360,10 @@ void phylink_suspend(struct phylink *pl, bool mac_wol) +@@ -1139,10 +1166,9 @@ int phylink_fwnode_phy_connect(struct phylink *pl, + + ret = phy_attach_direct(pl->netdev, phy_dev, flags, + pl->link_interface); +- if (ret) { +- phy_device_free(phy_dev); ++ phy_device_free(phy_dev); ++ if (ret) + return ret; +- } + + ret = phylink_bringup_phy(pl, phy_dev, pl->link_config.interface); + if (ret) +@@ -1333,7 +1359,10 @@ void phylink_suspend(struct phylink *pl, bool mac_wol) * but one would hope all packets have been sent. This * also means phylink_resolve() will do nothing. */ @@ -216497,7 +263453,7 @@ index 0a0abe8e4be0b..7afcf6310d59f 100644 /* We do not call mac_link_down() here as we want the * link to remain up to receive the WoL packets. -@@ -1724,7 +1754,7 @@ int phylink_ethtool_set_pauseparam(struct phylink *pl, +@@ -1724,7 +1753,7 @@ int phylink_ethtool_set_pauseparam(struct phylink *pl, return -EOPNOTSUPP; if (!phylink_test(pl->supported, Asym_Pause) && @@ -216663,10 +263619,37 @@ index d8cac02a79b95..636b0907a5987 100644 /* Disable EDPD to wake up PHY */ int rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS); if (rc < 0) +diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c +index 8dcb49ed1f3d9..7fd9fe6a602bc 100644 +--- a/drivers/net/phy/xilinx_gmii2rgmii.c ++++ b/drivers/net/phy/xilinx_gmii2rgmii.c +@@ -105,6 +105,7 @@ static int xgmiitorgmii_probe(struct mdio_device *mdiodev) + + if (!priv->phy_dev->drv) { + dev_info(dev, "Attached phy not ready\n"); ++ put_device(&priv->phy_dev->mdio.dev); + return -EPROBE_DEFER; + } + diff --git a/drivers/net/plip/plip.c b/drivers/net/plip/plip.c -index 82d6094017113..2a2cb9d453e8e 100644 +index 82d6094017113..b1776116f9f7d 100644 --- a/drivers/net/plip/plip.c +++ b/drivers/net/plip/plip.c +@@ -446,12 +446,12 @@ plip_bh_timeout_error(struct net_device *dev, struct net_local *nl, + } + rcv->state = PLIP_PK_DONE; + if (rcv->skb) { +- kfree_skb(rcv->skb); ++ dev_kfree_skb_irq(rcv->skb); + rcv->skb = NULL; + } + snd->state = PLIP_PK_DONE; + if (snd->skb) { +- dev_kfree_skb(snd->skb); ++ dev_consume_skb_irq(snd->skb); + snd->skb = NULL; + } + spin_unlock_irq(&nl->lock); @@ -1107,7 +1107,7 @@ plip_open(struct net_device *dev) /* Any address will do - we take the first. We already have the first two bytes filled with 0xfc, from @@ -216677,7 +263660,7 @@ index 82d6094017113..2a2cb9d453e8e 100644 memcpy(dev->dev_addr+2, &ifa->ifa_local, 4); } diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c -index fb52cd175b45d..829d6ada1704c 100644 +index fb52cd175b45d..c1f11d1df4cd6 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -69,6 +69,8 @@ @@ -216699,7 +263682,16 @@ index fb52cd175b45d..829d6ada1704c 100644 ret = -ENOMEM; skb = alloc_skb(count + pf->hdrlen, GFP_KERNEL); if (!skb) -@@ -1764,7 +1769,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb) +@@ -1737,6 +1742,8 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb) + int len; + unsigned char *cp; + ++ skb->dev = ppp->dev; ++ + if (proto < 0x8000) { + #ifdef CONFIG_PPP_FILTER + /* check if we should pass this packet */ +@@ -1764,7 +1771,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb) } ++ppp->stats64.tx_packets; @@ -216822,7 +263814,7 @@ index dd7917cab2b12..ab8f5097d3b00 100644 } diff --git a/drivers/net/thunderbolt.c b/drivers/net/thunderbolt.c -index 9a6a8353e1921..ae2211998ded3 100644 +index 9a6a8353e1921..3395dcb0b262b 100644 --- a/drivers/net/thunderbolt.c +++ b/drivers/net/thunderbolt.c @@ -612,18 +612,13 @@ static void tbnet_connected_work(struct work_struct *work) @@ -216872,8 +263864,43 @@ index 9a6a8353e1921..ae2211998ded3 100644 err_free_rx_buffers: tbnet_free_buffers(&net->rx_ring); err_stop_rings: +@@ -896,6 +902,7 @@ static int tbnet_open(struct net_device *dev) + tbnet_start_poll, net); + if (!ring) { + netdev_err(dev, "failed to allocate Rx ring\n"); ++ tb_xdomain_release_out_hopid(xd, hopid); + tb_ring_free(net->tx_ring.ring); + net->tx_ring.ring = NULL; + return -ENOMEM; +@@ -1371,12 +1378,21 @@ static int __init tbnet_init(void) + TBNET_MATCH_FRAGS_ID | TBNET_64K_FRAMES); + + ret = tb_register_property_dir("network", tbnet_dir); +- if (ret) { +- tb_property_free_dir(tbnet_dir); +- return ret; +- } ++ if (ret) ++ goto err_free_dir; + +- return tb_register_service_driver(&tbnet_driver); ++ ret = tb_register_service_driver(&tbnet_driver); ++ if (ret) ++ goto err_unregister; ++ ++ return 0; ++ ++err_unregister: ++ tb_unregister_property_dir("network", tbnet_dir); ++err_free_dir: ++ tb_property_free_dir(tbnet_dir); ++ ++ return ret; + } + module_init(tbnet_init); + diff --git a/drivers/net/tun.c b/drivers/net/tun.c -index fecc9a1d293ae..f92d6a12831fe 100644 +index fecc9a1d293ae..a1dda57c812dd 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -209,6 +209,9 @@ struct tun_struct { @@ -216931,7 +263958,25 @@ index fecc9a1d293ae..f92d6a12831fe 100644 synchronize_net(); tun_flow_delete_by_queue(tun, tun->numqueues + 1); -@@ -722,6 +737,7 @@ static void tun_detach_all(struct net_device *dev) +@@ -672,7 +687,6 @@ static void __tun_detach(struct tun_file *tfile, bool clean) + if (tun) + xdp_rxq_info_unreg(&tfile->xdp_rxq); + ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free); +- sock_put(&tfile->sk); + } + } + +@@ -688,6 +702,9 @@ static void tun_detach(struct tun_file *tfile, bool clean) + if (dev) + netdev_state_change(dev); + rtnl_unlock(); ++ ++ if (clean) ++ sock_put(&tfile->sk); + } + + static void tun_detach_all(struct net_device *dev) +@@ -722,6 +739,7 @@ static void tun_detach_all(struct net_device *dev) sock_put(&tfile->sk); } list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) { @@ -216939,7 +263984,7 @@ index fecc9a1d293ae..f92d6a12831fe 100644 tun_enable_queue(tfile); tun_queue_purge(tfile); xdp_rxq_info_unreg(&tfile->xdp_rxq); -@@ -802,6 +818,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file, +@@ -802,6 +820,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file, if (tfile->detached) { tun_enable_queue(tfile); @@ -216947,7 +263992,7 @@ index fecc9a1d293ae..f92d6a12831fe 100644 } else { sock_hold(&tfile->sk); tun_napi_init(tun, tfile, napi, napi_frags); -@@ -953,6 +970,49 @@ static int check_filter(struct tap_filter *filter, const struct sk_buff *skb) +@@ -953,6 +972,49 @@ static int check_filter(struct tap_filter *filter, const struct sk_buff *skb) static const struct ethtool_ops tun_ethtool_ops; @@ -216997,7 +264042,7 @@ index fecc9a1d293ae..f92d6a12831fe 100644 /* Net device detach from fd. */ static void tun_net_uninit(struct net_device *dev) { -@@ -1010,6 +1070,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) +@@ -1010,6 +1072,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); int txq = skb->queue_mapping; @@ -217005,7 +264050,7 @@ index fecc9a1d293ae..f92d6a12831fe 100644 struct tun_file *tfile; int len = skb->len; -@@ -1036,7 +1097,10 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) +@@ -1036,7 +1099,10 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) goto drop; len = run_ebpf_filter(tun, skb, len); @@ -217017,7 +264062,7 @@ index fecc9a1d293ae..f92d6a12831fe 100644 goto drop; if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) -@@ -1054,6 +1118,10 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) +@@ -1054,6 +1120,10 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) if (ptr_ring_produce(&tfile->tx_ring, skb)) goto drop; @@ -217028,7 +264073,7 @@ index fecc9a1d293ae..f92d6a12831fe 100644 /* Notify and wake up reader process */ if (tfile->flags & TUN_FASYNC) kill_fasync(&tfile->fasync, SIGIO, POLL_IN); -@@ -1164,6 +1232,7 @@ static int tun_net_change_carrier(struct net_device *dev, bool new_carrier) +@@ -1164,6 +1234,7 @@ static int tun_net_change_carrier(struct net_device *dev, bool new_carrier) } static const struct net_device_ops tun_netdev_ops = { @@ -217036,7 +264081,7 @@ index fecc9a1d293ae..f92d6a12831fe 100644 .ndo_uninit = tun_net_uninit, .ndo_open = tun_net_open, .ndo_stop = tun_net_close, -@@ -1247,6 +1316,7 @@ static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp) +@@ -1247,6 +1318,7 @@ static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp) } static const struct net_device_ops tap_netdev_ops = { @@ -217044,7 +264089,7 @@ index fecc9a1d293ae..f92d6a12831fe 100644 .ndo_uninit = tun_net_uninit, .ndo_open = tun_net_open, .ndo_stop = tun_net_close, -@@ -1287,7 +1357,7 @@ static void tun_flow_uninit(struct tun_struct *tun) +@@ -1287,7 +1359,7 @@ static void tun_flow_uninit(struct tun_struct *tun) #define MAX_MTU 65535 /* Initialize net device. */ @@ -217053,7 +264098,48 @@ index fecc9a1d293ae..f92d6a12831fe 100644 { struct tun_struct *tun = netdev_priv(dev); -@@ -2201,11 +2271,6 @@ static void tun_free_netdev(struct net_device *dev) +@@ -1375,7 +1447,8 @@ static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile, + int err; + int i; + +- if (it->nr_segs > MAX_SKB_FRAGS + 1) ++ if (it->nr_segs > MAX_SKB_FRAGS + 1 || ++ len > (ETH_MAX_MTU - NET_SKB_PAD - NET_IP_ALIGN)) + return ERR_PTR(-EMSGSIZE); + + local_bh_disable(); +@@ -1878,17 +1951,25 @@ drop: + skb_headlen(skb)); + + if (unlikely(headlen > skb_headlen(skb))) { ++ WARN_ON_ONCE(1); ++ err = -ENOMEM; + atomic_long_inc(&tun->dev->rx_dropped); ++napi_busy: + napi_free_frags(&tfile->napi); + rcu_read_unlock(); + mutex_unlock(&tfile->napi_mutex); +- WARN_ON(1); +- return -ENOMEM; ++ return err; + } + +- local_bh_disable(); +- napi_gro_frags(&tfile->napi); +- local_bh_enable(); ++ if (likely(napi_schedule_prep(&tfile->napi))) { ++ local_bh_disable(); ++ napi_gro_frags(&tfile->napi); ++ napi_complete(&tfile->napi); ++ local_bh_enable(); ++ } else { ++ err = -EBUSY; ++ goto napi_busy; ++ } + mutex_unlock(&tfile->napi_mutex); + } else if (tfile->napi_enabled) { + struct sk_buff_head *queue = &tfile->sk.sk_write_queue; +@@ -2201,11 +2282,6 @@ static void tun_free_netdev(struct net_device *dev) BUG_ON(!(list_empty(&tun->disabled))); free_percpu(dev->tstats); @@ -217065,7 +264151,7 @@ index fecc9a1d293ae..f92d6a12831fe 100644 tun_flow_uninit(tun); security_tun_dev_free_security(tun->security); __tun_set_ebpf(tun, &tun->steering_prog, NULL); -@@ -2438,7 +2503,8 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) +@@ -2438,7 +2514,8 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) if (!tun) return -EBADFD; @@ -217075,7 +264161,7 @@ index fecc9a1d293ae..f92d6a12831fe 100644 struct tun_page tpage; int n = ctl->num; int flush = 0; -@@ -2711,41 +2777,16 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) +@@ -2711,41 +2788,16 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) tun->rx_batched = 0; RCU_INIT_POINTER(tun->steering_prog, NULL); @@ -217101,12 +264187,12 @@ index fecc9a1d293ae..f92d6a12831fe 100644 - dev->vlan_features = dev->features & - ~(NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX); -- -- tun->flags = (tun->flags & ~TUN_FEATURES) | -- (ifr->ifr_flags & TUN_FEATURES); + tun->ifr = ifr; + tun->file = file; +- tun->flags = (tun->flags & ~TUN_FEATURES) | +- (ifr->ifr_flags & TUN_FEATURES); +- - INIT_LIST_HEAD(&tun->disabled); - err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI, - ifr->ifr_flags & IFF_NAPI_FRAGS, false); @@ -217124,7 +264210,7 @@ index fecc9a1d293ae..f92d6a12831fe 100644 /* free_netdev() won't check refcnt, to avoid race * with dev_put() we need publish tun after registration. */ -@@ -2762,24 +2803,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) +@@ -2762,24 +2814,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) strcpy(ifr->ifr_name, tun->dev->name); return 0; @@ -217489,7 +264575,7 @@ index f25448a088707..0a2c3860179e7 100644 static struct sk_buff * diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c -index eb3817d70f2b8..c6b0de1b752f9 100644 +index eb3817d70f2b8..3497b5a286ea3 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -583,6 +583,11 @@ static const struct usb_device_id products[] = { @@ -217532,6 +264618,19 @@ index eb3817d70f2b8..c6b0de1b752f9 100644 /* ThinkPad USB-C Dock (based on Realtek RTL8153) */ { USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x3062, USB_CLASS_COMM, +@@ -981,6 +1000,12 @@ static const struct usb_device_id products[] = { + USB_CDC_SUBCLASS_ETHERNET, + USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&wwan_info, ++}, { ++ /* Cinterion PLS62-W modem by GEMALTO/THALES */ ++ USB_DEVICE_AND_INTERFACE_INFO(0x1e2d, 0x005b, USB_CLASS_COMM, ++ USB_CDC_SUBCLASS_ETHERNET, ++ USB_CDC_PROTO_NONE), ++ .driver_info = (unsigned long)&wwan_info, + }, { + /* Cinterion PLS83/PLS63 modem by GEMALTO/THALES */ + USB_DEVICE_AND_INTERFACE_INFO(0x1e2d, 0x0069, USB_CLASS_COMM, diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index 82bb5ed94c485..c0b8b4aa78f37 100644 --- a/drivers/net/usb/cdc_mbim.c @@ -217616,6 +264715,32 @@ index 06e2181e58108..d56e276e4d805 100644 ipheth_rcvbulk_callback, dev); dev->rx_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; +diff --git a/drivers/net/usb/kalmia.c b/drivers/net/usb/kalmia.c +index fc5895f85cee2..a552bb1665b8a 100644 +--- a/drivers/net/usb/kalmia.c ++++ b/drivers/net/usb/kalmia.c +@@ -65,8 +65,8 @@ kalmia_send_init_packet(struct usbnet *dev, u8 *init_msg, u8 init_msg_len, + init_msg, init_msg_len, &act_len, KALMIA_USB_TIMEOUT); + if (status != 0) { + netdev_err(dev->net, +- "Error sending init packet. Status %i, length %i\n", +- status, act_len); ++ "Error sending init packet. Status %i\n", ++ status); + return status; + } + else if (act_len != init_msg_len) { +@@ -83,8 +83,8 @@ kalmia_send_init_packet(struct usbnet *dev, u8 *init_msg, u8 init_msg_len, + + if (status != 0) + netdev_err(dev->net, +- "Error receiving init result. Status %i, length %i\n", +- status, act_len); ++ "Error receiving init result. Status %i\n", ++ status); + else if (act_len != expected_len) + netdev_err(dev->net, "Unexpected init result length: %i\n", + act_len); diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 63cd72c5f580c..3e1a83a22fdd6 100644 --- a/drivers/net/usb/lan78xx.c @@ -217690,8 +264815,23 @@ index 6a92a3fef75e5..cd063f45785b7 100644 net->stats.rx_length_errors++; if (rx_status & 0x08) net->stats.rx_crc_errors++; +diff --git a/drivers/net/usb/plusb.c b/drivers/net/usb/plusb.c +index 17c9c63b8eebb..ce7862dac2b75 100644 +--- a/drivers/net/usb/plusb.c ++++ b/drivers/net/usb/plusb.c +@@ -57,9 +57,7 @@ + static inline int + pl_vendor_req(struct usbnet *dev, u8 req, u8 val, u8 index) + { +- return usbnet_read_cmd(dev, req, +- USB_DIR_IN | USB_TYPE_VENDOR | +- USB_RECIP_DEVICE, ++ return usbnet_write_cmd(dev, req, USB_TYPE_VENDOR | USB_RECIP_DEVICE, + val, index, NULL, 0); + } + diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c -index 33ada2c59952e..6bf5c75f519d9 100644 +index 33ada2c59952e..7b358b896a6d7 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1085,6 +1085,7 @@ static const struct usb_device_id products[] = { @@ -217702,7 +264842,11 @@ index 33ada2c59952e..6bf5c75f519d9 100644 /* 3. Combined interface devices matching on interface number */ {QMI_FIXED_INTF(0x0408, 0xea42, 4)}, /* Yota / Megafon M100-1 */ -@@ -1355,6 +1356,7 @@ static const struct usb_device_id products[] = { +@@ -1352,9 +1353,11 @@ static const struct usb_device_id products[] = { + {QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */ + {QMI_FIXED_INTF(0x2357, 0x9000, 4)}, /* TP-LINK MA260 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1031, 3)}, /* Telit LE910C1-EUX */ ++ {QMI_QUIRK_SET_DTR(0x1bc7, 0x103a, 0)}, /* Telit LE910C4-WWX */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)}, /* Telit LE922A */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1050, 2)}, /* Telit FN980 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1060, 2)}, /* Telit LN920 */ @@ -217710,7 +264854,7 @@ index 33ada2c59952e..6bf5c75f519d9 100644 {QMI_FIXED_INTF(0x1bc7, 0x1100, 3)}, /* Telit ME910 */ {QMI_FIXED_INTF(0x1bc7, 0x1101, 3)}, /* Telit ME910 dual modem */ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ -@@ -1391,10 +1393,13 @@ static const struct usb_device_id products[] = { +@@ -1391,10 +1394,13 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x413c, 0x81b3, 8)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */ {QMI_FIXED_INTF(0x413c, 0x81b6, 8)}, /* Dell Wireless 5811e */ {QMI_FIXED_INTF(0x413c, 0x81b6, 10)}, /* Dell Wireless 5811e */ @@ -217724,8 +264868,16 @@ index 33ada2c59952e..6bf5c75f519d9 100644 {QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */ {QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)}, /* HP lt4120 Snapdragon X5 LTE */ {QMI_FIXED_INTF(0x22de, 0x9061, 3)}, /* WeTelecom WPD-600N */ +@@ -1407,6 +1413,7 @@ static const struct usb_device_id products[] = { + {QMI_FIXED_INTF(0x0489, 0xe0b4, 0)}, /* Foxconn T77W968 LTE */ + {QMI_FIXED_INTF(0x0489, 0xe0b5, 0)}, /* Foxconn T77W968 LTE with eSIM support*/ + {QMI_FIXED_INTF(0x2692, 0x9025, 4)}, /* Cellient MPL200 (rebranded Qualcomm 05c6:9025) */ ++ {QMI_QUIRK_SET_DTR(0x1546, 0x1342, 4)}, /* u-blox LARA-L6 */ + + /* 4. Gobi 1000 devices */ + {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c -index f329e39100a7d..109c288d8b47a 100644 +index f329e39100a7d..cf6941b1d2800 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -32,7 +32,7 @@ @@ -217989,8 +265141,11 @@ index f329e39100a7d..109c288d8b47a 100644 case DEVICE_ID_THINKPAD_THUNDERBOLT3_DOCK_GEN2: case DEVICE_ID_THINKPAD_USB_C_DOCK_GEN2: tp->lenovo_macpassthru = 1; -@@ -9763,6 +9811,7 @@ static const struct usb_device_id rtl8152_table[] = { +@@ -9761,8 +9809,10 @@ static const struct usb_device_id rtl8152_table[] = { + REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07ab), + REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07c6), REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x0927), ++ REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x0c5e), REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101), REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x304f), + REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x3054), @@ -217998,10 +265153,20 @@ index f329e39100a7d..109c288d8b47a 100644 REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x3069), REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x3082), diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c -index 85a8b96e39a65..bedd36ab5cf01 100644 +index 85a8b96e39a65..e5f6614da5acc 100644 --- a/drivers/net/usb/rndis_host.c +++ b/drivers/net/usb/rndis_host.c -@@ -608,6 +608,11 @@ static const struct usb_device_id products [] = { +@@ -255,7 +255,8 @@ static int rndis_query(struct usbnet *dev, struct usb_interface *intf, + + off = le32_to_cpu(u.get_c->offset); + len = le32_to_cpu(u.get_c->len); +- if (unlikely((8 + off + len) > CONTROL_BUFFER_SIZE)) ++ if (unlikely((off > CONTROL_BUFFER_SIZE - 8) || ++ (len > CONTROL_BUFFER_SIZE - 8 - off))) + goto response_error; + + if (*reply_len != -1 && len != *reply_len) +@@ -608,6 +609,11 @@ static const struct usb_device_id products [] = { USB_DEVICE_AND_INTERFACE_INFO(0x1630, 0x0042, USB_CLASS_COMM, 2 /* ACM */, 0x0ff), .driver_info = (unsigned long) &rndis_poll_status_info, @@ -218249,7 +265414,7 @@ index 26b1bd8e845b4..7cf9206638c37 100644 .tx_fixup = smsc95xx_tx_fixup, .status = smsc95xx_status, diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c -index 6516a37893e27..0c50f24671da3 100644 +index 6516a37893e27..1fac6ee273c4e 100644 --- a/drivers/net/usb/sr9700.c +++ b/drivers/net/usb/sr9700.c @@ -410,7 +410,7 @@ static int sr9700_rx_fixup(struct usbnet *dev, struct sk_buff *skb) @@ -218257,7 +265422,7 @@ index 6516a37893e27..0c50f24671da3 100644 len = (skb->data[1] | (skb->data[2] << 8)) - 4; - if (len > ETH_FRAME_LEN) -+ if (len > ETH_FRAME_LEN || len > skb->len) ++ if (len > ETH_FRAME_LEN || len > skb->len || len < 0) return 0; /* the last packet of current skb */ @@ -218404,7 +265569,7 @@ index 8e717a0b559b3..7984f2157d222 100644 .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, diff --git a/drivers/net/veth.c b/drivers/net/veth.c -index 50eb43e5bf459..64fa8e9c0a22b 100644 +index 50eb43e5bf459..41cb9179e8b79 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -272,9 +272,10 @@ static void __veth_xdp_flush(struct veth_rq *rq) @@ -218453,7 +265618,14 @@ index 50eb43e5bf459..64fa8e9c0a22b 100644 } done++; } -@@ -916,8 +920,10 @@ static int veth_poll(struct napi_struct *napi, int budget) +@@ -912,19 +916,22 @@ static int veth_poll(struct napi_struct *napi, int budget) + xdp_set_return_frame_no_direct(); + done = veth_xdp_rcv(rq, budget, &bq, &stats); + ++ if (stats.xdp_redirect > 0) ++ xdp_do_flush(); ++ + if (done < budget && napi_complete_done(napi, done)) { /* Write rx_notify_masked before reading ptr_ring */ smp_store_mb(rq->rx_notify_masked, false); if (unlikely(!__ptr_ring_empty(&rq->xdp_ring))) { @@ -218466,8 +265638,15 @@ index 50eb43e5bf459..64fa8e9c0a22b 100644 } } + if (stats.xdp_tx > 0) + veth_xdp_flush(rq, &bq); +- if (stats.xdp_redirect > 0) +- xdp_do_flush(); + xdp_clear_return_frame_no_direct(); + + return done; diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c -index 4ad25a8b0870c..48fb7bdc0f0b1 100644 +index 4ad25a8b0870c..66ca2ea19ba60 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -213,9 +213,15 @@ struct virtnet_info { @@ -218569,6 +265748,23 @@ index 4ad25a8b0870c..48fb7bdc0f0b1 100644 } u64_stats_update_begin(&rq->stats.syncp); +@@ -1535,13 +1580,13 @@ static int virtnet_poll(struct napi_struct *napi, int budget) + + received = virtnet_receive(rq, budget, &xdp_xmit); + ++ if (xdp_xmit & VIRTIO_XDP_REDIR) ++ xdp_do_flush(); ++ + /* Out of packets? */ + if (received < budget) + virtqueue_napi_complete(napi, rq->vq, received); + +- if (xdp_xmit & VIRTIO_XDP_REDIR) +- xdp_do_flush(); +- + if (xdp_xmit & VIRTIO_XDP_TX) { + sq = virtnet_xdp_get_sq(vi); + if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { @@ -1560,6 +1605,8 @@ static int virtnet_open(struct net_device *dev) struct virtnet_info *vi = netdev_priv(dev); int i, err; @@ -218578,7 +265774,20 @@ index 4ad25a8b0870c..48fb7bdc0f0b1 100644 for (i = 0; i < vi->max_queue_pairs; i++) { if (i < vi->curr_queue_pairs) /* Make sure we have some buffers: if oom use wq. */ -@@ -1940,6 +1987,8 @@ static int virtnet_close(struct net_device *dev) +@@ -1733,8 +1780,10 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) + */ + if (sq->vq->num_free < 2+MAX_SKB_FRAGS) { + netif_stop_subqueue(dev, qnum); +- if (!use_napi && +- unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { ++ if (use_napi) { ++ if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) ++ virtqueue_napi_schedule(&sq->napi, sq->vq); ++ } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { + /* More just got used, free them then recheck. */ + free_old_xmit_skbs(sq, false); + if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { +@@ -1940,12 +1989,14 @@ static int virtnet_close(struct net_device *dev) struct virtnet_info *vi = netdev_priv(dev); int i; @@ -218587,7 +265796,14 @@ index 4ad25a8b0870c..48fb7bdc0f0b1 100644 /* Make sure refill_work doesn't re-enable napi! */ cancel_delayed_work_sync(&vi->refill); -@@ -2413,7 +2462,6 @@ static const struct ethtool_ops virtnet_ethtool_ops = { + for (i = 0; i < vi->max_queue_pairs; i++) { +- xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq); + napi_disable(&vi->rq[i].napi); ++ xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq); + virtnet_napi_tx_disable(&vi->sq[i].napi); + } + +@@ -2413,7 +2464,6 @@ static const struct ethtool_ops virtnet_ethtool_ops = { static void virtnet_freeze_down(struct virtio_device *vdev) { struct virtnet_info *vi = vdev->priv; @@ -218595,7 +265811,7 @@ index 4ad25a8b0870c..48fb7bdc0f0b1 100644 /* Make sure no work handler is accessing the device */ flush_work(&vi->config_work); -@@ -2421,14 +2469,8 @@ static void virtnet_freeze_down(struct virtio_device *vdev) +@@ -2421,14 +2471,8 @@ static void virtnet_freeze_down(struct virtio_device *vdev) netif_tx_lock_bh(vi->dev); netif_device_detach(vi->dev); netif_tx_unlock_bh(vi->dev); @@ -218612,7 +265828,7 @@ index 4ad25a8b0870c..48fb7bdc0f0b1 100644 } static int init_vqs(struct virtnet_info *vi); -@@ -2436,7 +2478,7 @@ static int init_vqs(struct virtnet_info *vi); +@@ -2436,7 +2480,7 @@ static int init_vqs(struct virtnet_info *vi); static int virtnet_restore_up(struct virtio_device *vdev) { struct virtnet_info *vi = vdev->priv; @@ -218621,7 +265837,7 @@ index 4ad25a8b0870c..48fb7bdc0f0b1 100644 err = init_vqs(vi); if (err) -@@ -2444,16 +2486,12 @@ static int virtnet_restore_up(struct virtio_device *vdev) +@@ -2444,16 +2488,12 @@ static int virtnet_restore_up(struct virtio_device *vdev) virtio_device_ready(vdev); @@ -218643,7 +265859,7 @@ index 4ad25a8b0870c..48fb7bdc0f0b1 100644 } netif_tx_lock_bh(vi->dev); -@@ -3157,6 +3195,7 @@ static int virtnet_probe(struct virtio_device *vdev) +@@ -3157,6 +3197,7 @@ static int virtnet_probe(struct virtio_device *vdev) vdev->priv = vi; INIT_WORK(&vi->config_work, virtnet_config_changed_work); @@ -218651,7 +265867,7 @@ index 4ad25a8b0870c..48fb7bdc0f0b1 100644 /* If we can receive ANY GSO packets, we must allocate large ones. */ if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || -@@ -3236,14 +3275,20 @@ static int virtnet_probe(struct virtio_device *vdev) +@@ -3236,14 +3277,20 @@ static int virtnet_probe(struct virtio_device *vdev) } } @@ -218674,10 +265890,42 @@ index 4ad25a8b0870c..48fb7bdc0f0b1 100644 if (err) { pr_debug("virtio_net: registering cpu notifier failed\n"); diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c -index 8799854bacb29..bc3192cf48e3e 100644 +index 8799854bacb29..b88092a6bc851 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c -@@ -589,6 +589,7 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx, +@@ -75,8 +75,14 @@ vmxnet3_enable_all_intrs(struct vmxnet3_adapter *adapter) + + for (i = 0; i < adapter->intr.num_intrs; i++) + vmxnet3_enable_intr(adapter, i); +- adapter->shared->devRead.intrConf.intrCtrl &= ++ if (!VMXNET3_VERSION_GE_6(adapter) || ++ !adapter->queuesExtEnabled) { ++ adapter->shared->devRead.intrConf.intrCtrl &= ++ cpu_to_le32(~VMXNET3_IC_DISABLE_ALL); ++ } else { ++ adapter->shared->devReadExt.intrConfExt.intrCtrl &= + cpu_to_le32(~VMXNET3_IC_DISABLE_ALL); ++ } + } + + +@@ -85,8 +91,14 @@ vmxnet3_disable_all_intrs(struct vmxnet3_adapter *adapter) + { + int i; + +- adapter->shared->devRead.intrConf.intrCtrl |= ++ if (!VMXNET3_VERSION_GE_6(adapter) || ++ !adapter->queuesExtEnabled) { ++ adapter->shared->devRead.intrConf.intrCtrl |= + cpu_to_le32(VMXNET3_IC_DISABLE_ALL); ++ } else { ++ adapter->shared->devReadExt.intrConfExt.intrCtrl |= ++ cpu_to_le32(VMXNET3_IC_DISABLE_ALL); ++ } + for (i = 0; i < adapter->intr.num_intrs; i++) + vmxnet3_disable_intr(adapter, i); + } +@@ -589,6 +601,7 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx, if (dma_mapping_error(&adapter->pdev->dev, rbi->dma_addr)) { dev_kfree_skb_any(rbi->skb); @@ -218685,7 +265933,7 @@ index 8799854bacb29..bc3192cf48e3e 100644 rq->stats.rx_buf_alloc_failure++; break; } -@@ -613,6 +614,7 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx, +@@ -613,6 +626,7 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx, if (dma_mapping_error(&adapter->pdev->dev, rbi->dma_addr)) { put_page(rbi->page); @@ -218693,7 +265941,82 @@ index 8799854bacb29..bc3192cf48e3e 100644 rq->stats.rx_buf_alloc_failure++; break; } -@@ -1666,6 +1668,10 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq, +@@ -1228,6 +1242,10 @@ vmxnet3_rx_csum(struct vmxnet3_adapter *adapter, + (le32_to_cpu(gdesc->dword[3]) & + VMXNET3_RCD_CSUM_OK) == VMXNET3_RCD_CSUM_OK) { + skb->ip_summed = CHECKSUM_UNNECESSARY; ++ if ((le32_to_cpu(gdesc->dword[0]) & ++ (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))) { ++ skb->csum_level = 1; ++ } + WARN_ON_ONCE(!(gdesc->rcd.tcp || gdesc->rcd.udp) && + !(le32_to_cpu(gdesc->dword[0]) & + (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))); +@@ -1237,6 +1255,10 @@ vmxnet3_rx_csum(struct vmxnet3_adapter *adapter, + } else if (gdesc->rcd.v6 && (le32_to_cpu(gdesc->dword[3]) & + (1 << VMXNET3_RCD_TUC_SHIFT))) { + skb->ip_summed = CHECKSUM_UNNECESSARY; ++ if ((le32_to_cpu(gdesc->dword[0]) & ++ (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))) { ++ skb->csum_level = 1; ++ } + WARN_ON_ONCE(!(gdesc->rcd.tcp || gdesc->rcd.udp) && + !(le32_to_cpu(gdesc->dword[0]) & + (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))); +@@ -1348,6 +1370,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, + }; + u32 num_pkts = 0; + bool skip_page_frags = false; ++ bool encap_lro = false; + struct Vmxnet3_RxCompDesc *rcd; + struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx; + u16 segCnt = 0, mss = 0; +@@ -1506,13 +1529,18 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, + if (VMXNET3_VERSION_GE_2(adapter) && + rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) { + struct Vmxnet3_RxCompDescExt *rcdlro; ++ union Vmxnet3_GenericDesc *gdesc; ++ + rcdlro = (struct Vmxnet3_RxCompDescExt *)rcd; ++ gdesc = (union Vmxnet3_GenericDesc *)rcd; + + segCnt = rcdlro->segCnt; + WARN_ON_ONCE(segCnt == 0); + mss = rcdlro->mss; + if (unlikely(segCnt <= 1)) + segCnt = 0; ++ encap_lro = (le32_to_cpu(gdesc->dword[0]) & ++ (1UL << VMXNET3_RCD_HDR_INNER_SHIFT)); + } else { + segCnt = 0; + } +@@ -1580,7 +1608,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, + vmxnet3_rx_csum(adapter, skb, + (union Vmxnet3_GenericDesc *)rcd); + skb->protocol = eth_type_trans(skb, adapter->netdev); +- if (!rcd->tcp || ++ if ((!rcd->tcp && !encap_lro) || + !(adapter->netdev->features & NETIF_F_LRO)) + goto not_lro; + +@@ -1589,7 +1617,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, + SKB_GSO_TCPV4 : SKB_GSO_TCPV6; + skb_shinfo(skb)->gso_size = mss; + skb_shinfo(skb)->gso_segs = segCnt; +- } else if (segCnt != 0 || skb->len > mtu) { ++ } else if ((segCnt != 0 || skb->len > mtu) && !encap_lro) { + u32 hlen; + + hlen = vmxnet3_get_hdr_len(adapter, skb, +@@ -1618,6 +1646,7 @@ not_lro: + napi_gro_receive(&rq->napi, skb); + + ctx->skb = NULL; ++ encap_lro = false; + num_pkts++; + } + +@@ -1666,6 +1695,10 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq, u32 i, ring_idx; struct Vmxnet3_RxDesc *rxd; @@ -218704,7 +266027,7 @@ index 8799854bacb29..bc3192cf48e3e 100644 for (ring_idx = 0; ring_idx < 2; ring_idx++) { for (i = 0; i < rq->rx_ring[ring_idx].size; i++) { #ifdef __BIG_ENDIAN_BITFIELD -@@ -3261,7 +3267,7 @@ vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter) +@@ -3261,7 +3294,7 @@ vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter) #ifdef CONFIG_PCI_MSI if (adapter->intr.type == VMXNET3_IT_MSIX) { @@ -218713,7 +266036,7 @@ index 8799854bacb29..bc3192cf48e3e 100644 nvec = adapter->share_intr == VMXNET3_INTR_TXSHARE ? 1 : adapter->num_tx_queues; -@@ -3274,14 +3280,15 @@ vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter) +@@ -3274,14 +3307,15 @@ vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter) for (i = 0; i < nvec; i++) adapter->intr.msix_entries[i].entry = i; @@ -218732,7 +266055,7 @@ index 8799854bacb29..bc3192cf48e3e 100644 if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE || adapter->num_rx_queues != 1) { adapter->share_intr = VMXNET3_INTR_TXSHARE; -@@ -3291,14 +3298,14 @@ vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter) +@@ -3291,14 +3325,14 @@ vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter) } } @@ -218935,6 +266258,65 @@ index 141635a35c28a..129e270e9a7cd 100644 } rd->remote_ip = *ip; +diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c +index 6a212c085435b..5b01642ca44e0 100644 +--- a/drivers/net/wan/farsync.c ++++ b/drivers/net/wan/farsync.c +@@ -2545,6 +2545,7 @@ fst_remove_one(struct pci_dev *pdev) + struct net_device *dev = port_to_dev(&card->ports[i]); + + unregister_hdlc_device(dev); ++ free_netdev(dev); + } + + fst_disable_intr(card); +@@ -2564,6 +2565,7 @@ fst_remove_one(struct pci_dev *pdev) + card->tx_dma_handle_card); + } + fst_card_array[card->card_no] = NULL; ++ kfree(card); + } + + static struct pci_driver fst_driver = { +diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c +index cda1b4ce6b210..8305df1a3008a 100644 +--- a/drivers/net/wan/fsl_ucc_hdlc.c ++++ b/drivers/net/wan/fsl_ucc_hdlc.c +@@ -1241,9 +1241,11 @@ static int ucc_hdlc_probe(struct platform_device *pdev) + free_dev: + free_netdev(dev); + undo_uhdlc_init: +- iounmap(utdm->siram); ++ if (utdm) ++ iounmap(utdm->siram); + unmap_si_regs: +- iounmap(utdm->si_regs); ++ if (utdm) ++ iounmap(utdm->si_regs); + free_utdm: + if (uhdlc_priv->tsa) + kfree(utdm); +diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c +index 89d31adc3809b..5037ef82be461 100644 +--- a/drivers/net/wan/lapbether.c ++++ b/drivers/net/wan/lapbether.c +@@ -325,6 +325,7 @@ static int lapbeth_open(struct net_device *dev) + + err = lapb_register(dev, &lapbeth_callbacks); + if (err != LAPB_OK) { ++ napi_disable(&lapbeth->napi); + pr_err("lapb_register error: %d\n", err); + return -ENODEV; + } +@@ -446,7 +447,7 @@ static int lapbeth_device_event(struct notifier_block *this, + if (dev_net(dev) != &init_net) + return NOTIFY_DONE; + +- if (!dev_is_ethdev(dev)) ++ if (!dev_is_ethdev(dev) && !lapbeth_get_x25_dev(dev)) + return NOTIFY_DONE; + + switch (event) { diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c index b7197e80f2264..5bf7822c53f18 100644 --- a/drivers/net/wireguard/allowedips.c @@ -219482,7 +266864,7 @@ index 8c496b7471082..0414d7a6ce741 100644 } diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c -index 49cc4b7ed5163..1baec4b412c8d 100644 +index 49cc4b7ed5163..efe38b2c1df73 100644 --- a/drivers/net/wireless/ath/ar5523/ar5523.c +++ b/drivers/net/wireless/ath/ar5523/ar5523.c @@ -153,6 +153,10 @@ static void ar5523_cmd_rx_cb(struct urb *urb) @@ -219496,6 +266878,26 @@ index 49cc4b7ed5163..1baec4b412c8d 100644 memcpy(cmd->odata, hdr + 1, sizeof(u32)); cmd->olen = sizeof(u32); cmd->res = 0; +@@ -237,6 +241,11 @@ static void ar5523_cmd_tx_cb(struct urb *urb) + } + } + ++static void ar5523_cancel_tx_cmd(struct ar5523 *ar) ++{ ++ usb_kill_urb(ar->tx_cmd.urb_tx); ++} ++ + static int ar5523_cmd(struct ar5523 *ar, u32 code, const void *idata, + int ilen, void *odata, int olen, int flags) + { +@@ -276,6 +285,7 @@ static int ar5523_cmd(struct ar5523 *ar, u32 code, const void *idata, + } + + if (!wait_for_completion_timeout(&cmd->done, 2 * HZ)) { ++ ar5523_cancel_tx_cmd(ar); + cmd->odata = NULL; + ar5523_err(ar, "timeout waiting for command %02x reply\n", + code); diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index 2f9be182fbfbb..58e86e662ab83 100644 --- a/drivers/net/wireless/ath/ath10k/core.c @@ -219950,6 +267352,41 @@ index c272b290fa73d..8208434d7d2b2 100644 } } spin_unlock_bh(&ar->data_lock); +diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c +index 4d4e2f91e15cf..85a30c301dad7 100644 +--- a/drivers/net/wireless/ath/ath10k/pci.c ++++ b/drivers/net/wireless/ath/ath10k/pci.c +@@ -3793,18 +3793,22 @@ static struct pci_driver ath10k_pci_driver = { + + static int __init ath10k_pci_init(void) + { +- int ret; ++ int ret1, ret2; + +- ret = pci_register_driver(&ath10k_pci_driver); +- if (ret) ++ ret1 = pci_register_driver(&ath10k_pci_driver); ++ if (ret1) + printk(KERN_ERR "failed to register ath10k pci driver: %d\n", +- ret); ++ ret1); + +- ret = ath10k_ahb_init(); +- if (ret) +- printk(KERN_ERR "ahb init failed: %d\n", ret); ++ ret2 = ath10k_ahb_init(); ++ if (ret2) ++ printk(KERN_ERR "ahb init failed: %d\n", ret2); + +- return ret; ++ if (ret1 && ret2) ++ return ret1; ++ ++ /* registered to at least one bus */ ++ return 0; + } + module_init(ath10k_pci_init); + diff --git a/drivers/net/wireless/ath/ath10k/qmi.c b/drivers/net/wireless/ath/ath10k/qmi.c index 07e478f9a808c..80fcb917fe4e1 100644 --- a/drivers/net/wireless/ath/ath10k/qmi.c @@ -221203,8 +268640,21 @@ index b5e34d670715e..e4a65513a1bfd 100644 switch (event->type) { case ATH11K_QMI_EVENT_SERVER_ARRIVE: +diff --git a/drivers/net/wireless/ath/ath11k/qmi.h b/drivers/net/wireless/ath/ath11k/qmi.h +index 3d59303307032..25940b683ea45 100644 +--- a/drivers/net/wireless/ath/ath11k/qmi.h ++++ b/drivers/net/wireless/ath/ath11k/qmi.h +@@ -27,7 +27,7 @@ + #define ATH11K_QMI_WLANFW_MAX_NUM_MEM_SEG_V01 52 + #define ATH11K_QMI_CALDB_SIZE 0x480000 + #define ATH11K_QMI_BDF_EXT_STR_LENGTH 0x20 +-#define ATH11K_QMI_FW_MEM_REQ_SEGMENT_CNT 3 ++#define ATH11K_QMI_FW_MEM_REQ_SEGMENT_CNT 5 + + #define QMI_WLFW_REQUEST_MEM_IND_V01 0x0035 + #define QMI_WLFW_FW_MEM_READY_IND_V01 0x0037 diff --git a/drivers/net/wireless/ath/ath11k/reg.c b/drivers/net/wireless/ath/ath11k/reg.c -index e1a1df169034b..f793324ad0b73 100644 +index e1a1df169034b..562ecfd50742f 100644 --- a/drivers/net/wireless/ath/ath11k/reg.c +++ b/drivers/net/wireless/ath/ath11k/reg.c @@ -198,7 +198,7 @@ static void ath11k_copy_regd(struct ieee80211_regdomain *regd_orig, @@ -221237,7 +268687,20 @@ index e1a1df169034b..f793324ad0b73 100644 } if (!regd) { -@@ -458,6 +459,9 @@ ath11k_reg_adjust_bw(u16 start_freq, u16 end_freq, u16 max_bw) +@@ -246,11 +247,7 @@ int ath11k_regd_update(struct ath11k *ar, bool init) + goto err; + } + +- rtnl_lock(); +- wiphy_lock(ar->hw->wiphy); +- ret = regulatory_set_wiphy_regd_sync(ar->hw->wiphy, regd_copy); +- wiphy_unlock(ar->hw->wiphy); +- rtnl_unlock(); ++ ret = regulatory_set_wiphy_regd(ar->hw->wiphy, regd_copy); + + kfree(regd_copy); + +@@ -458,6 +455,9 @@ ath11k_reg_adjust_bw(u16 start_freq, u16 end_freq, u16 max_bw) { u16 bw; @@ -221247,7 +268710,7 @@ index e1a1df169034b..f793324ad0b73 100644 bw = end_freq - start_freq; bw = min_t(u16, bw, max_bw); -@@ -465,8 +469,10 @@ ath11k_reg_adjust_bw(u16 start_freq, u16 end_freq, u16 max_bw) +@@ -465,8 +465,10 @@ ath11k_reg_adjust_bw(u16 start_freq, u16 end_freq, u16 max_bw) bw = 80; else if (bw >= 40 && bw < 80) bw = 40; @@ -221259,7 +268722,7 @@ index e1a1df169034b..f793324ad0b73 100644 return bw; } -@@ -490,73 +496,77 @@ ath11k_reg_update_weather_radar_band(struct ath11k_base *ab, +@@ -490,73 +492,77 @@ ath11k_reg_update_weather_radar_band(struct ath11k_base *ab, struct cur_reg_rule *reg_rule, u8 *rule_idx, u32 flags, u16 max_bw) { @@ -221383,7 +268846,7 @@ index e1a1df169034b..f793324ad0b73 100644 *rule_idx = i; } -@@ -683,7 +693,7 @@ void ath11k_regd_update_work(struct work_struct *work) +@@ -683,7 +689,7 @@ void ath11k_regd_update_work(struct work_struct *work) regd_update_work); int ret; @@ -221680,7 +269143,7 @@ index a171dbb29fbb6..ad949eb02f3d2 100644 #define AR_CH0_XTAL (AR_SREV_9300(ah) ? 0x16294 : \ ((AR_SREV_9462(ah) || AR_SREV_9565(ah)) ? 0x16298 : \ diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.c b/drivers/net/wireless/ath/ath9k/hif_usb.c -index 860da13bfb6ac..f06eec99de688 100644 +index 860da13bfb6ac..f938ac1a4abd4 100644 --- a/drivers/net/wireless/ath/ath9k/hif_usb.c +++ b/drivers/net/wireless/ath/ath9k/hif_usb.c @@ -590,6 +590,13 @@ static void ath9k_hif_usb_rx_stream(struct hif_device_usb *hif_dev, @@ -221697,6 +269160,123 @@ index 860da13bfb6ac..f06eec99de688 100644 pad_len = 4 - (pkt_len & 0x3); if (pad_len == 4) pad_len = 0; +@@ -702,14 +709,13 @@ static void ath9k_hif_usb_reg_in_cb(struct urb *urb) + struct rx_buf *rx_buf = (struct rx_buf *)urb->context; + struct hif_device_usb *hif_dev = rx_buf->hif_dev; + struct sk_buff *skb = rx_buf->skb; +- struct sk_buff *nskb; + int ret; + + if (!skb) + return; + + if (!hif_dev) +- goto free; ++ goto free_skb; + + switch (urb->status) { + case 0: +@@ -718,7 +724,7 @@ static void ath9k_hif_usb_reg_in_cb(struct urb *urb) + case -ECONNRESET: + case -ENODEV: + case -ESHUTDOWN: +- goto free; ++ goto free_skb; + default: + skb_reset_tail_pointer(skb); + skb_trim(skb, 0); +@@ -729,25 +735,27 @@ static void ath9k_hif_usb_reg_in_cb(struct urb *urb) + if (likely(urb->actual_length != 0)) { + skb_put(skb, urb->actual_length); + +- /* Process the command first */ ++ /* ++ * Process the command first. ++ * skb is either freed here or passed to be ++ * managed to another callback function. ++ */ + ath9k_htc_rx_msg(hif_dev->htc_handle, skb, + skb->len, USB_REG_IN_PIPE); + +- +- nskb = alloc_skb(MAX_REG_IN_BUF_SIZE, GFP_ATOMIC); +- if (!nskb) { ++ skb = alloc_skb(MAX_REG_IN_BUF_SIZE, GFP_ATOMIC); ++ if (!skb) { + dev_err(&hif_dev->udev->dev, + "ath9k_htc: REG_IN memory allocation failure\n"); +- urb->context = NULL; +- return; ++ goto free_rx_buf; + } + +- rx_buf->skb = nskb; ++ rx_buf->skb = skb; + + usb_fill_int_urb(urb, hif_dev->udev, + usb_rcvintpipe(hif_dev->udev, + USB_REG_IN_PIPE), +- nskb->data, MAX_REG_IN_BUF_SIZE, ++ skb->data, MAX_REG_IN_BUF_SIZE, + ath9k_hif_usb_reg_in_cb, rx_buf, 1); + } + +@@ -756,12 +764,13 @@ resubmit: + ret = usb_submit_urb(urb, GFP_ATOMIC); + if (ret) { + usb_unanchor_urb(urb); +- goto free; ++ goto free_skb; + } + + return; +-free: ++free_skb: + kfree_skb(skb); ++free_rx_buf: + kfree(rx_buf); + urb->context = NULL; + } +@@ -774,14 +783,10 @@ static void ath9k_hif_usb_dealloc_tx_urbs(struct hif_device_usb *hif_dev) + spin_lock_irqsave(&hif_dev->tx.tx_lock, flags); + list_for_each_entry_safe(tx_buf, tx_buf_tmp, + &hif_dev->tx.tx_buf, list) { +- usb_get_urb(tx_buf->urb); +- spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags); +- usb_kill_urb(tx_buf->urb); + list_del(&tx_buf->list); + usb_free_urb(tx_buf->urb); + kfree(tx_buf->buf); + kfree(tx_buf); +- spin_lock_irqsave(&hif_dev->tx.tx_lock, flags); + } + spin_unlock_irqrestore(&hif_dev->tx.tx_lock, flags); + +@@ -1323,10 +1328,24 @@ static int send_eject_command(struct usb_interface *interface) + static int ath9k_hif_usb_probe(struct usb_interface *interface, + const struct usb_device_id *id) + { ++ struct usb_endpoint_descriptor *bulk_in, *bulk_out, *int_in, *int_out; + struct usb_device *udev = interface_to_usbdev(interface); ++ struct usb_host_interface *alt; + struct hif_device_usb *hif_dev; + int ret = 0; + ++ /* Verify the expected endpoints are present */ ++ alt = interface->cur_altsetting; ++ if (usb_find_common_endpoints(alt, &bulk_in, &bulk_out, &int_in, &int_out) < 0 || ++ usb_endpoint_num(bulk_in) != USB_WLAN_RX_PIPE || ++ usb_endpoint_num(bulk_out) != USB_WLAN_TX_PIPE || ++ usb_endpoint_num(int_in) != USB_REG_IN_PIPE || ++ usb_endpoint_num(int_out) != USB_REG_OUT_PIPE) { ++ dev_err(&udev->dev, ++ "ath9k_htc: Device endpoint numbers are not the expected ones\n"); ++ return -ENODEV; ++ } ++ + if (id->driver_info == STORAGE_DEVICE) + return send_eject_command(interface); + diff --git a/drivers/net/wireless/ath/ath9k/htc.h b/drivers/net/wireless/ath/ath9k/htc.h index 0a1634238e673..e3d546ef71ddc 100644 --- a/drivers/net/wireless/ath/ath9k/htc.h @@ -223106,6 +270686,48 @@ index 06891b4f837b9..fdf78c10a05c2 100644 end = (channel + 5 < 14) ? channel + 5 : 13; for (i = start; i <= end; i++) { +diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +index 9db12ffd2ff80..fc622e6b329a3 100644 +--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c ++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +@@ -90,6 +90,9 @@ + #define BRCMF_ASSOC_PARAMS_FIXED_SIZE \ + (sizeof(struct brcmf_assoc_params_le) - sizeof(u16)) + ++#define BRCMF_MAX_CHANSPEC_LIST \ ++ (BRCMF_DCMD_MEDLEN / sizeof(__le32) - 1) ++ + static bool check_vif_up(struct brcmf_cfg80211_vif *vif) + { + if (!test_bit(BRCMF_VIF_STATUS_READY, &vif->sme_state)) { +@@ -6557,6 +6560,13 @@ static int brcmf_construct_chaninfo(struct brcmf_cfg80211_info *cfg, + band->channels[i].flags = IEEE80211_CHAN_DISABLED; + + total = le32_to_cpu(list->count); ++ if (total > BRCMF_MAX_CHANSPEC_LIST) { ++ bphy_err(drvr, "Invalid count of channel Spec. (%u)\n", ++ total); ++ err = -EINVAL; ++ goto fail_pbuf; ++ } ++ + for (i = 0; i < total; i++) { + ch.chspec = (u16)le32_to_cpu(list->element[i]); + cfg->d11inf.decchspec(&ch); +@@ -6702,6 +6712,13 @@ static int brcmf_enable_bw40_2g(struct brcmf_cfg80211_info *cfg) + band = cfg_to_wiphy(cfg)->bands[NL80211_BAND_2GHZ]; + list = (struct brcmf_chanspec_list *)pbuf; + num_chan = le32_to_cpu(list->count); ++ if (num_chan > BRCMF_MAX_CHANSPEC_LIST) { ++ bphy_err(drvr, "Invalid count of channel Spec. (%u)\n", ++ num_chan); ++ kfree(pbuf); ++ return -EINVAL; ++ } ++ + for (i = 0; i < num_chan; i++) { + ch.chspec = (u16)le32_to_cpu(list->element[i]); + cfg->d11inf.decchspec(&ch); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c index db5f8535fdb57..e5bae62245215 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c @@ -223149,7 +270771,7 @@ index 6d5188b78f2de..0af452dca7664 100644 /* Match for the GPDwin which unfortunately uses somewhat * generic dmi strings, which is why we test for 4 strings. diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c -index 0eb13e5df5177..dcbe55b56e437 100644 +index 0eb13e5df5177..c54d8722e755c 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c @@ -207,6 +207,8 @@ static int brcmf_init_nvram_parser(struct nvram_parser *nvp, @@ -223181,8 +270803,35 @@ index 0eb13e5df5177..dcbe55b56e437 100644 if (alt_path) { ret = request_firmware_nowait(THIS_MODULE, true, alt_path, fwctx->dev, GFP_KERNEL, fwctx, +@@ -742,6 +746,11 @@ brcmf_fw_alloc_request(u32 chip, u32 chiprev, + u32 i, j; + char end = '\0'; + ++ if (chiprev >= BITS_PER_TYPE(u32)) { ++ brcmf_err("Invalid chip revision %u\n", chiprev); ++ return NULL; ++ } ++ + for (i = 0; i < table_size; i++) { + if (mapping_table[i].chipid == chip && + mapping_table[i].revmask & BIT(chiprev)) +diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c +index bc3f4e4edcdf9..dac7eb77799bd 100644 +--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c ++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c +@@ -228,6 +228,10 @@ static void brcmf_fweh_event_worker(struct work_struct *work) + brcmf_fweh_event_name(event->code), event->code, + event->emsg.ifidx, event->emsg.bsscfgidx, + event->emsg.addr); ++ if (event->emsg.bsscfgidx >= BRCMF_MAX_IFS) { ++ bphy_err(drvr, "invalid bsscfg index: %u\n", event->emsg.bsscfgidx); ++ goto event_free; ++ } + + /* convert event message */ + emsg_be = &event->emsg; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c -index 8b149996fc000..3ff4997e1c97a 100644 +index 8b149996fc000..6d8a042170182 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c @@ -12,6 +12,7 @@ @@ -223255,7 +270904,27 @@ index 8b149996fc000..3ff4997e1c97a 100644 static void brcmf_pcie_copy_dev_tomem(struct brcmf_pciedev_info *devinfo, u32 mem_offset, void *dstaddr, u32 len) -@@ -1348,6 +1315,18 @@ static void brcmf_pcie_down(struct device *dev) +@@ -659,7 +626,7 @@ static int brcmf_pcie_exit_download_state(struct brcmf_pciedev_info *devinfo, + } + + if (!brcmf_chip_set_active(devinfo->ci, resetintr)) +- return -EINVAL; ++ return -EIO; + return 0; + } + +@@ -1151,6 +1118,10 @@ static int brcmf_pcie_init_ringbuffers(struct brcmf_pciedev_info *devinfo) + BRCMF_NROF_H2D_COMMON_MSGRINGS; + max_completionrings = BRCMF_NROF_D2H_COMMON_MSGRINGS; + } ++ if (max_flowrings > 512) { ++ brcmf_err(bus, "invalid max_flowrings(%d)\n", max_flowrings); ++ return -EIO; ++ } + + if (devinfo->dma_idx_sz != 0) { + bufsz = (max_submissionrings + max_completionrings) * +@@ -1348,6 +1319,18 @@ static void brcmf_pcie_down(struct device *dev) { } @@ -223274,7 +270943,7 @@ index 8b149996fc000..3ff4997e1c97a 100644 static int brcmf_pcie_tx(struct device *dev, struct sk_buff *skb) { -@@ -1456,6 +1435,7 @@ static int brcmf_pcie_reset(struct device *dev) +@@ -1456,6 +1439,7 @@ static int brcmf_pcie_reset(struct device *dev) } static const struct brcmf_bus_ops brcmf_pcie_bus_ops = { @@ -223282,7 +270951,7 @@ index 8b149996fc000..3ff4997e1c97a 100644 .txdata = brcmf_pcie_tx, .stop = brcmf_pcie_down, .txctl = brcmf_pcie_tx_ctlpkt, -@@ -1563,8 +1543,8 @@ static int brcmf_pcie_download_fw_nvram(struct brcmf_pciedev_info *devinfo, +@@ -1563,8 +1547,8 @@ static int brcmf_pcie_download_fw_nvram(struct brcmf_pciedev_info *devinfo, return err; brcmf_dbg(PCIE, "Download FW %s\n", devinfo->fw_name); @@ -223293,7 +270962,7 @@ index 8b149996fc000..3ff4997e1c97a 100644 resetintr = get_unaligned_le32(fw->data); release_firmware(fw); -@@ -1578,7 +1558,7 @@ static int brcmf_pcie_download_fw_nvram(struct brcmf_pciedev_info *devinfo, +@@ -1578,7 +1562,7 @@ static int brcmf_pcie_download_fw_nvram(struct brcmf_pciedev_info *devinfo, brcmf_dbg(PCIE, "Download NVRAM %s\n", devinfo->nvram_name); address = devinfo->ci->rambase + devinfo->ci->ramsize - nvram_len; @@ -223302,7 +270971,7 @@ index 8b149996fc000..3ff4997e1c97a 100644 brcmf_fw_nvram_free(nvram); } else { brcmf_dbg(PCIE, "No matching NVRAM file found %s\n", -@@ -1777,6 +1757,8 @@ static void brcmf_pcie_setup(struct device *dev, int ret, +@@ -1777,6 +1761,8 @@ static void brcmf_pcie_setup(struct device *dev, int ret, ret = brcmf_chip_get_raminfo(devinfo->ci); if (ret) { brcmf_err(bus, "Failed to get RAM info\n"); @@ -223311,7 +270980,7 @@ index 8b149996fc000..3ff4997e1c97a 100644 goto fail; } -@@ -1826,9 +1808,6 @@ static void brcmf_pcie_setup(struct device *dev, int ret, +@@ -1826,9 +1812,6 @@ static void brcmf_pcie_setup(struct device *dev, int ret, init_waitqueue_head(&devinfo->mbdata_resp_wait); @@ -223353,7 +271022,7 @@ index fabfbb0b40b0c..d0a7465be586d 100644 sizeof(pfn_mac)); if (err) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c -index 8effeb7a7269b..f7961b22e0518 100644 +index 8effeb7a7269b..5006aa8317513 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -557,7 +557,7 @@ enum brcmf_sdio_frmtype { @@ -223373,6 +271042,61 @@ index 8effeb7a7269b..f7961b22e0518 100644 /* per-board firmware binaries */ MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-sdio.*.bin"); +@@ -3412,6 +3411,7 @@ static int brcmf_sdio_download_firmware(struct brcmf_sdio *bus, + /* Take arm out of reset */ + if (!brcmf_chip_set_active(bus->ci, rstvec)) { + brcmf_err("error getting out of ARM core reset\n"); ++ bcmerror = -EIO; + goto err; + } + +diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c +index 65dd8cff1b011..fc19ecbc4c088 100644 +--- a/drivers/net/wireless/cisco/airo.c ++++ b/drivers/net/wireless/cisco/airo.c +@@ -5233,7 +5233,7 @@ static int get_wep_tx_idx(struct airo_info *ai) + return -1; + } + +-static int set_wep_key(struct airo_info *ai, u16 index, const char *key, ++static int set_wep_key(struct airo_info *ai, u16 index, const u8 *key, + u16 keylen, int perm, int lock) + { + static const unsigned char macaddr[ETH_ALEN] = { 0x01, 0, 0, 0, 0, 0 }; +@@ -5284,7 +5284,7 @@ static void proc_wepkey_on_close(struct inode *inode, struct file *file) + struct net_device *dev = PDE_DATA(inode); + struct airo_info *ai = dev->ml_priv; + int i, rc; +- char key[16]; ++ u8 key[16]; + u16 index = 0; + int j = 0; + +@@ -5312,12 +5312,22 @@ static void proc_wepkey_on_close(struct inode *inode, struct file *file) + } + + for (i = 0; i < 16*3 && data->wbuffer[i+j]; i++) { ++ int val; ++ ++ if (i % 3 == 2) ++ continue; ++ ++ val = hex_to_bin(data->wbuffer[i+j]); ++ if (val < 0) { ++ airo_print_err(ai->dev->name, "WebKey passed invalid key hex"); ++ return; ++ } + switch(i%3) { + case 0: +- key[i/3] = hex_to_bin(data->wbuffer[i+j])<<4; ++ key[i/3] = (u8)val << 4; + break; + case 1: +- key[i/3] |= hex_to_bin(data->wbuffer[i+j]); ++ key[i/3] |= (u8)val; + break; + } + } diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_tx.c b/drivers/net/wireless/intel/ipw2x00/libipw_tx.c index 36d1e6b2568db..4aec1fce1ae29 100644 --- a/drivers/net/wireless/intel/ipw2x00/libipw_tx.c @@ -224290,7 +272014,7 @@ index e91f8e889df70..ab06dcda1462a 100644 te_data->vif = vif; spin_unlock_bh(&mvm->time_event_lock); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c -index 0a13c2bda2eed..b5368cb57ca8c 100644 +index 0a13c2bda2eed..e354918c2480f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -268,7 +268,6 @@ static u32 iwl_mvm_get_tx_rate(struct iwl_mvm *mvm, @@ -224311,6 +272035,33 @@ index 0a13c2bda2eed..b5368cb57ca8c 100644 rate_idx = info->control.rates[0].idx; } +@@ -1150,6 +1150,7 @@ int iwl_mvm_tx_skb_sta(struct iwl_mvm *mvm, struct sk_buff *skb, + struct sk_buff_head mpdus_skbs; + unsigned int payload_len; + int ret; ++ struct sk_buff *orig_skb = skb; + + if (WARN_ON_ONCE(!mvmsta)) + return -1; +@@ -1182,8 +1183,17 @@ int iwl_mvm_tx_skb_sta(struct iwl_mvm *mvm, struct sk_buff *skb, + + ret = iwl_mvm_tx_mpdu(mvm, skb, &info, sta); + if (ret) { ++ /* Free skbs created as part of TSO logic that have not yet been dequeued */ + __skb_queue_purge(&mpdus_skbs); +- return ret; ++ /* skb here is not necessarily same as skb that entered this method, ++ * so free it explicitly. ++ */ ++ if (skb == orig_skb) ++ ieee80211_free_txskb(mvm->hw, skb); ++ else ++ kfree_skb(skb); ++ /* there was error, but we consumed skb one way or another, so return 0 */ ++ return 0; + } + } + diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index 4a3d2971a98b7..ec8a223f90e85 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -224443,7 +272194,7 @@ index ab0fe85658518..cdb57819684ae 100644 err_free_gpio_irq: gpio_free(p54spi_gpio_irq); diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c -index 0adae76eb8df1..b228567b2a732 100644 +index 0adae76eb8df1..c3c3b5aa87b0d 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -663,7 +663,7 @@ struct mac80211_hwsim_data { @@ -224455,7 +272206,26 @@ index 0adae76eb8df1..b228567b2a732 100644 struct sk_buff_head pending; /* packets pending */ /* * Only radios in the same group can communicate together (the -@@ -1339,8 +1339,7 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw, +@@ -845,6 +845,7 @@ static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac, + struct hwsim_vif_priv *vp = (void *)vif->drv_priv; + struct sk_buff *skb; + struct ieee80211_hdr *hdr; ++ struct ieee80211_tx_info *cb; + + if (!vp->assoc) + return; +@@ -866,6 +867,10 @@ static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac, + memcpy(hdr->addr2, mac, ETH_ALEN); + memcpy(hdr->addr3, vp->bssid, ETH_ALEN); + ++ cb = IEEE80211_SKB_CB(skb); ++ cb->control.rates[0].count = 1; ++ cb->control.rates[1].idx = -1; ++ + rcu_read_lock(); + mac80211_hwsim_tx_frame(data->hw, skb, + rcu_dereference(vif->chanctx_conf)->def.chan); +@@ -1339,8 +1344,7 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw, goto nla_put_failure; /* We create a cookie to identify this skb */ @@ -224465,7 +272235,7 @@ index 0adae76eb8df1..b228567b2a732 100644 info->rate_driver_data[0] = (void *)cookie; if (nla_put_u64_64bit(skb, HWSIM_ATTR_COOKIE, cookie, HWSIM_ATTR_PAD)) goto nla_put_failure; -@@ -2336,9 +2335,21 @@ static void hw_scan_work(struct work_struct *work) +@@ -2336,9 +2340,21 @@ static void hw_scan_work(struct work_struct *work) if (req->ie_len) skb_put_data(probe, req->ie, req->ie_len); @@ -224487,7 +272257,7 @@ index 0adae76eb8df1..b228567b2a732 100644 local_bh_enable(); } } -@@ -3570,6 +3581,7 @@ static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2, +@@ -3570,6 +3586,7 @@ static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2, const u8 *src; unsigned int hwsim_flags; int i; @@ -224495,7 +272265,7 @@ index 0adae76eb8df1..b228567b2a732 100644 bool found = false; if (!info->attrs[HWSIM_ATTR_ADDR_TRANSMITTER] || -@@ -3597,18 +3609,20 @@ static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2, +@@ -3597,18 +3614,20 @@ static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2, } /* look for the skb matching the cookie passed back from user */ @@ -224519,7 +272289,7 @@ index 0adae76eb8df1..b228567b2a732 100644 /* not found */ if (!found) -@@ -3641,6 +3655,10 @@ static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2, +@@ -3641,6 +3660,10 @@ static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2, } txi->flags |= IEEE80211_TX_STAT_ACK; } @@ -224530,7 +272300,7 @@ index 0adae76eb8df1..b228567b2a732 100644 ieee80211_tx_status_irqsafe(data2->hw, skb); return 0; out: -@@ -3731,6 +3749,8 @@ static int hwsim_cloned_frame_received_nl(struct sk_buff *skb_2, +@@ -3731,6 +3754,8 @@ static int hwsim_cloned_frame_received_nl(struct sk_buff *skb_2, rx_status.band = channel->band; rx_status.rate_idx = nla_get_u32(info->attrs[HWSIM_ATTR_RX_RATE]); @@ -224539,7 +272309,7 @@ index 0adae76eb8df1..b228567b2a732 100644 rx_status.signal = nla_get_u32(info->attrs[HWSIM_ATTR_SIGNAL]); hdr = (void *)skb->data; -@@ -4260,6 +4280,10 @@ static int hwsim_virtio_handle_cmd(struct sk_buff *skb) +@@ -4260,6 +4285,10 @@ static int hwsim_virtio_handle_cmd(struct sk_buff *skb) nlh = nlmsg_hdr(skb); gnlh = nlmsg_data(nlh); @@ -224550,7 +272320,7 @@ index 0adae76eb8df1..b228567b2a732 100644 err = genlmsg_parse(nlh, &hwsim_genl_family, tb, HWSIM_ATTR_MAX, hwsim_genl_policy, NULL); if (err) { -@@ -4302,7 +4326,8 @@ static void hwsim_virtio_rx_work(struct work_struct *work) +@@ -4302,7 +4331,8 @@ static void hwsim_virtio_rx_work(struct work_struct *work) spin_unlock_irqrestore(&hwsim_virtio_lock, flags); skb->data = skb->head; @@ -224798,6 +272568,18 @@ index c6ccce426b496..d5fb29400bad5 100644 return 0; err_alloc_buffers: +diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c b/drivers/net/wireless/marvell/mwifiex/sdio.c +index bde9e4bbfffe7..7fb6eef409285 100644 +--- a/drivers/net/wireless/marvell/mwifiex/sdio.c ++++ b/drivers/net/wireless/marvell/mwifiex/sdio.c +@@ -485,6 +485,7 @@ static struct memory_type_mapping mem_type_mapping_tbl[] = { + }; + + static const struct of_device_id mwifiex_sdio_of_match_table[] = { ++ { .compatible = "marvell,sd8787" }, + { .compatible = "marvell,sd8897" }, + { .compatible = "marvell,sd8997" }, + { } diff --git a/drivers/net/wireless/marvell/mwifiex/sta_event.c b/drivers/net/wireless/marvell/mwifiex/sta_event.c index 68c63268e2e6b..7d42c5d2dbf65 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_event.c @@ -224994,7 +272776,7 @@ index d03aedc3286bb..028519a739fd1 100644 ewma_signal_init(&wcid->rssi); diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h -index 25c5ceef52577..d1f00706d41ec 100644 +index 25c5ceef52577..4e4af6e17b503 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -19,7 +19,7 @@ @@ -225030,6 +272812,17 @@ index 25c5ceef52577..d1f00706d41ec 100644 int mt76_queues_read(struct seq_file *s, void *data); void mt76_seq_puts_array(struct seq_file *file, const char *str, s8 *val, int len); +@@ -1015,8 +1021,9 @@ static inline bool mt76_is_skb_pktid(u8 pktid) + static inline u8 mt76_tx_power_nss_delta(u8 nss) + { + static const u8 nss_delta[4] = { 0, 6, 9, 12 }; ++ u8 idx = nss - 1; + +- return nss_delta[nss - 1]; ++ return (idx < ARRAY_SIZE(nss_delta)) ? nss_delta[idx] : 0; + } + + static inline bool mt76_testmode_enabled(struct mt76_phy *phy) diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c index 3972c56136a20..65f1f2bb80835 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c @@ -225287,7 +273080,7 @@ index ff3f85e4087c9..9ba7963a89f65 100644 idr_destroy(&dev->mt76.token); } diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c -index dada43d6d879e..8f1338dae2114 100644 +index dada43d6d879e..96667b7d722d5 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c @@ -135,8 +135,6 @@ static int get_omac_idx(enum nl80211_iftype type, u64 mask) @@ -225332,7 +273125,17 @@ index dada43d6d879e..8f1338dae2114 100644 } ret = mt7615_mcu_add_dev_info(phy, vif, true); -@@ -684,6 +682,9 @@ static void mt7615_sta_rate_tbl_update(struct ieee80211_hw *hw, +@@ -292,7 +290,8 @@ static void mt7615_init_dfs_state(struct mt7615_phy *phy) + if (hw->conf.flags & IEEE80211_CONF_OFFCHANNEL) + return; + +- if (!(chandef->chan->flags & IEEE80211_CHAN_RADAR)) ++ if (!(chandef->chan->flags & IEEE80211_CHAN_RADAR) && ++ !(mphy->chandef.chan->flags & IEEE80211_CHAN_RADAR)) + return; + + if (mphy->chandef.chan->center_freq == chandef->chan->center_freq && +@@ -684,6 +683,9 @@ static void mt7615_sta_rate_tbl_update(struct ieee80211_hw *hw, struct ieee80211_sta_rates *sta_rates = rcu_dereference(sta->rates); int i; @@ -225342,7 +273145,7 @@ index dada43d6d879e..8f1338dae2114 100644 spin_lock_bh(&dev->mt76.lock); for (i = 0; i < ARRAY_SIZE(msta->rates); i++) { msta->rates[i].idx = sta_rates->rate[i].idx; -@@ -1185,12 +1186,16 @@ static void mt7615_sta_set_decap_offload(struct ieee80211_hw *hw, +@@ -1185,12 +1187,16 @@ static void mt7615_sta_set_decap_offload(struct ieee80211_hw *hw, struct mt7615_dev *dev = mt7615_hw_dev(hw); struct mt7615_sta *msta = (struct mt7615_sta *)sta->drv_priv; @@ -225836,7 +273639,7 @@ index eb1885f4bd8eb..fee7741b5d421 100644 #define MT_TXP_MAX_BUF_NUM 6 diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c -index c25f8da590dd9..6aca470e24013 100644 +index c25f8da590dd9..7a4f277a16223 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c @@ -243,7 +243,7 @@ static int mt7915_add_interface(struct ieee80211_hw *hw, @@ -225848,6 +273651,16 @@ index c25f8da590dd9..6aca470e24013 100644 } if (vif->type != NL80211_IFTYPE_AP && +@@ -302,7 +302,8 @@ static void mt7915_init_dfs_state(struct mt7915_phy *phy) + if (hw->conf.flags & IEEE80211_CONF_OFFCHANNEL) + return; + +- if (!(chandef->chan->flags & IEEE80211_CHAN_RADAR)) ++ if (!(chandef->chan->flags & IEEE80211_CHAN_RADAR) && ++ !(mphy->chandef.chan->flags & IEEE80211_CHAN_RADAR)) + return; + + if (mphy->chandef.chan->center_freq == chandef->chan->center_freq && diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c index 43960770a9af2..e9d854e3293e4 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c @@ -226338,7 +274151,7 @@ index a9ce10b988273..c059cb419efd8 100644 mt7921_mcu_exit(dev); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c -index 7fe2e3a50428f..426e7a32bdc86 100644 +index 7fe2e3a50428f..6cf0c9b1b8b98 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c @@ -180,12 +180,56 @@ mt7921_mac_decode_he_radiotap_ru(struct mt76_rx_status *status, @@ -226522,6 +274335,15 @@ index 7fe2e3a50428f..426e7a32bdc86 100644 if (!(frame_type & (IEEE80211_FTYPE_DATA >> 2))) return; +@@ -1417,7 +1476,7 @@ mt7921_mac_update_mib_stats(struct mt7921_phy *phy) + mib->rts_retries_cnt += mt76_get_field(dev, MT_MIB_MB_BSDR1(0), + MT_MIB_RTS_FAIL_COUNT_MASK); + +- for (i = 0, aggr1 = aggr0 + 4; i < 4; i++) { ++ for (i = 0, aggr1 = aggr0 + 8; i < 4; i++) { + u32 val, val2; + + val = mt76_rr(dev, MT_TX_AGG_CNT(0, i)); @@ -1493,6 +1552,14 @@ void mt7921_pm_power_save_work(struct work_struct *work) test_bit(MT76_HW_SCHED_SCANNING, &mphy->state)) goto out; @@ -227276,8 +275098,38 @@ index f0f7a913eaabf..dce6f6b5f0712 100644 spin_unlock_bh(&q->lock); +diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c +index 1e9f60bb811ad..b47343e321b81 100644 +--- a/drivers/net/wireless/mediatek/mt76/usb.c ++++ b/drivers/net/wireless/mediatek/mt76/usb.c +@@ -814,6 +814,9 @@ static void mt76u_status_worker(struct mt76_worker *w) + struct mt76_queue *q; + int i; + ++ if (!test_bit(MT76_STATE_RUNNING, &dev->phy.state)) ++ return; ++ + for (i = 0; i < IEEE80211_NUM_ACS; i++) { + q = dev->phy.q_tx[i]; + if (!q) +@@ -833,11 +836,11 @@ static void mt76u_status_worker(struct mt76_worker *w) + wake_up(&dev->tx_wait); + + mt76_worker_schedule(&dev->tx_worker); +- +- if (dev->drv->tx_status_data && +- !test_and_set_bit(MT76_READING_STATS, &dev->phy.state)) +- queue_work(dev->wq, &dev->usb.stat_work); + } ++ ++ if (dev->drv->tx_status_data && ++ !test_and_set_bit(MT76_READING_STATS, &dev->phy.state)) ++ queue_work(dev->wq, &dev->usb.stat_work); + } + + static void mt76u_tx_status_data(struct work_struct *work) diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c -index 96973ec7bd9ac..87c14969c75fa 100644 +index 96973ec7bd9ac..1688144d78475 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -129,8 +129,7 @@ static void cfg_scan_result(enum scan_event scan_event, @@ -227290,6 +275142,103 @@ index 96973ec7bd9ac..87c14969c75fa 100644 } else if (scan_event == SCAN_EVENT_DONE) { mutex_lock(&priv->scan_req_lock); +@@ -940,30 +939,52 @@ static inline void wilc_wfi_cfg_parse_ch_attr(u8 *buf, u32 len, u8 sta_ch) + return; + + while (index + sizeof(*e) <= len) { ++ u16 attr_size; ++ + e = (struct wilc_attr_entry *)&buf[index]; +- if (e->attr_type == IEEE80211_P2P_ATTR_CHANNEL_LIST) ++ attr_size = le16_to_cpu(e->attr_len); ++ ++ if (index + sizeof(*e) + attr_size > len) ++ return; ++ ++ if (e->attr_type == IEEE80211_P2P_ATTR_CHANNEL_LIST && ++ attr_size >= (sizeof(struct wilc_attr_ch_list) - sizeof(*e))) + ch_list_idx = index; +- else if (e->attr_type == IEEE80211_P2P_ATTR_OPER_CHANNEL) ++ else if (e->attr_type == IEEE80211_P2P_ATTR_OPER_CHANNEL && ++ attr_size == (sizeof(struct wilc_attr_oper_ch) - sizeof(*e))) + op_ch_idx = index; ++ + if (ch_list_idx && op_ch_idx) + break; +- index += le16_to_cpu(e->attr_len) + sizeof(*e); ++ ++ index += sizeof(*e) + attr_size; + } + + if (ch_list_idx) { +- u16 attr_size; +- struct wilc_ch_list_elem *e; +- int i; ++ unsigned int i; ++ u16 elem_size; + + ch_list = (struct wilc_attr_ch_list *)&buf[ch_list_idx]; +- attr_size = le16_to_cpu(ch_list->attr_len); +- for (i = 0; i < attr_size;) { ++ /* the number of bytes following the final 'elem' member */ ++ elem_size = le16_to_cpu(ch_list->attr_len) - ++ (sizeof(*ch_list) - sizeof(struct wilc_attr_entry)); ++ for (i = 0; i < elem_size;) { ++ struct wilc_ch_list_elem *e; ++ + e = (struct wilc_ch_list_elem *)(ch_list->elem + i); ++ ++ i += sizeof(*e); ++ if (i > elem_size) ++ break; ++ ++ i += e->no_of_channels; ++ if (i > elem_size) ++ break; ++ + if (e->op_class == WILC_WLAN_OPERATING_CLASS_2_4GHZ) { + memset(e->ch_list, sta_ch, e->no_of_channels); + break; + } +- i += e->no_of_channels; + } + } + +diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c +index a133736a78215..3e5cc947b9b90 100644 +--- a/drivers/net/wireless/microchip/wilc1000/hif.c ++++ b/drivers/net/wireless/microchip/wilc1000/hif.c +@@ -467,14 +467,25 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, + + rsn_ie = cfg80211_find_ie(WLAN_EID_RSN, ies->data, ies->len); + if (rsn_ie) { ++ int rsn_ie_len = sizeof(struct element) + rsn_ie[1]; + int offset = 8; + +- param->mode_802_11i = 2; +- param->rsn_found = true; + /* extract RSN capabilities */ +- offset += (rsn_ie[offset] * 4) + 2; +- offset += (rsn_ie[offset] * 4) + 2; +- memcpy(param->rsn_cap, &rsn_ie[offset], 2); ++ if (offset < rsn_ie_len) { ++ /* skip over pairwise suites */ ++ offset += (rsn_ie[offset] * 4) + 2; ++ ++ if (offset < rsn_ie_len) { ++ /* skip over authentication suites */ ++ offset += (rsn_ie[offset] * 4) + 2; ++ ++ if (offset + 1 < rsn_ie_len) { ++ param->mode_802_11i = 2; ++ param->rsn_found = true; ++ memcpy(param->rsn_cap, &rsn_ie[offset], 2); ++ } ++ } ++ } + } + + if (param->rsn_found) { diff --git a/drivers/net/wireless/microchip/wilc1000/mon.c b/drivers/net/wireless/microchip/wilc1000/mon.c index 6bd63934c2d84..b5a1b65c087ca 100644 --- a/drivers/net/wireless/microchip/wilc1000/mon.c @@ -227337,10 +275286,18 @@ index 86209b391a3d6..e6e23fc585ee8 100644 struct txq_handle txq[NQUEUES]; int txq_entries; diff --git a/drivers/net/wireless/microchip/wilc1000/sdio.c b/drivers/net/wireless/microchip/wilc1000/sdio.c -index 42e03a701ae16..6c0727fc4abd9 100644 +index 42e03a701ae16..cb4efbfd0811f 100644 --- a/drivers/net/wireless/microchip/wilc1000/sdio.c +++ b/drivers/net/wireless/microchip/wilc1000/sdio.c -@@ -27,6 +27,7 @@ struct wilc_sdio { +@@ -20,6 +20,7 @@ static const struct sdio_device_id wilc_sdio_ids[] = { + { SDIO_DEVICE(SDIO_VENDOR_ID_MICROCHIP_WILC, SDIO_DEVICE_ID_MICROCHIP_WILC1000) }, + { }, + }; ++MODULE_DEVICE_TABLE(sdio, wilc_sdio_ids); + + #define WILC_SDIO_BLOCK_SIZE 512 + +@@ -27,6 +28,7 @@ struct wilc_sdio { bool irq_gpio; u32 block_size; int has_thrpt_enh3; @@ -227348,7 +275305,7 @@ index 42e03a701ae16..6c0727fc4abd9 100644 }; struct sdio_cmd52 { -@@ -46,6 +47,7 @@ struct sdio_cmd53 { +@@ -46,6 +48,7 @@ struct sdio_cmd53 { u32 count: 9; u8 *buffer; u32 block_size; @@ -227356,7 +275313,7 @@ index 42e03a701ae16..6c0727fc4abd9 100644 }; static const struct wilc_hif_func wilc_hif_sdio; -@@ -90,6 +92,8 @@ static int wilc_sdio_cmd53(struct wilc *wilc, struct sdio_cmd53 *cmd) +@@ -90,6 +93,8 @@ static int wilc_sdio_cmd53(struct wilc *wilc, struct sdio_cmd53 *cmd) { struct sdio_func *func = container_of(wilc->dev, struct sdio_func, dev); int size, ret; @@ -227365,7 +275322,7 @@ index 42e03a701ae16..6c0727fc4abd9 100644 sdio_claim_host(func); -@@ -100,12 +104,23 @@ static int wilc_sdio_cmd53(struct wilc *wilc, struct sdio_cmd53 *cmd) +@@ -100,12 +105,23 @@ static int wilc_sdio_cmd53(struct wilc *wilc, struct sdio_cmd53 *cmd) else size = cmd->count; @@ -227393,7 +275350,7 @@ index 42e03a701ae16..6c0727fc4abd9 100644 } sdio_release_host(func); -@@ -127,6 +142,12 @@ static int wilc_sdio_probe(struct sdio_func *func, +@@ -127,6 +143,12 @@ static int wilc_sdio_probe(struct sdio_func *func, if (!sdio_priv) return -ENOMEM; @@ -227406,7 +275363,7 @@ index 42e03a701ae16..6c0727fc4abd9 100644 ret = wilc_cfg80211_init(&wilc, &func->dev, WILC_HIF_SDIO, &wilc_hif_sdio); if (ret) -@@ -160,6 +181,7 @@ dispose_irq: +@@ -160,6 +182,7 @@ dispose_irq: irq_dispose_mapping(wilc->dev_irq_num); wilc_netdev_cleanup(wilc); free: @@ -227414,7 +275371,7 @@ index 42e03a701ae16..6c0727fc4abd9 100644 kfree(sdio_priv); return ret; } -@@ -167,9 +189,12 @@ free: +@@ -167,9 +190,12 @@ free: static void wilc_sdio_remove(struct sdio_func *func) { struct wilc *wilc = sdio_get_drvdata(func); @@ -227427,7 +275384,7 @@ index 42e03a701ae16..6c0727fc4abd9 100644 } static int wilc_sdio_reset(struct wilc *wilc) -@@ -365,8 +390,9 @@ static int wilc_sdio_write_reg(struct wilc *wilc, u32 addr, u32 data) +@@ -365,8 +391,9 @@ static int wilc_sdio_write_reg(struct wilc *wilc, u32 addr, u32 data) cmd.address = WILC_SDIO_FBR_DATA_REG; cmd.block_mode = 0; cmd.increment = 1; @@ -227438,7 +275395,7 @@ index 42e03a701ae16..6c0727fc4abd9 100644 cmd.block_size = sdio_priv->block_size; ret = wilc_sdio_cmd53(wilc, &cmd); if (ret) -@@ -404,6 +430,7 @@ static int wilc_sdio_write(struct wilc *wilc, u32 addr, u8 *buf, u32 size) +@@ -404,6 +431,7 @@ static int wilc_sdio_write(struct wilc *wilc, u32 addr, u8 *buf, u32 size) nblk = size / block_size; nleft = size % block_size; @@ -227446,7 +275403,7 @@ index 42e03a701ae16..6c0727fc4abd9 100644 if (nblk > 0) { cmd.block_mode = 1; cmd.increment = 1; -@@ -482,8 +509,9 @@ static int wilc_sdio_read_reg(struct wilc *wilc, u32 addr, u32 *data) +@@ -482,8 +510,9 @@ static int wilc_sdio_read_reg(struct wilc *wilc, u32 addr, u32 *data) cmd.address = WILC_SDIO_FBR_DATA_REG; cmd.block_mode = 0; cmd.increment = 1; @@ -227457,7 +275414,7 @@ index 42e03a701ae16..6c0727fc4abd9 100644 cmd.block_size = sdio_priv->block_size; ret = wilc_sdio_cmd53(wilc, &cmd); -@@ -525,6 +553,7 @@ static int wilc_sdio_read(struct wilc *wilc, u32 addr, u8 *buf, u32 size) +@@ -525,6 +554,7 @@ static int wilc_sdio_read(struct wilc *wilc, u32 addr, u8 *buf, u32 size) nblk = size / block_size; nleft = size % block_size; @@ -227740,10 +275697,74 @@ index 585784258c665..4efab907a3ac6 100644 mutex_unlock(&priv->io_mutex); +diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h +index 7ddce3c3f0c48..cd3ff9847ced3 100644 +--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h ++++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h +@@ -1190,7 +1190,7 @@ struct rtl8723bu_c2h { + u8 bw; + } __packed ra_report; + }; +-}; ++} __packed; + + struct rtl8xxxu_fileops; + diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c -index 774341b0005a3..e74c885a04e50 100644 +index 774341b0005a3..3d3fa2b616a86 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +@@ -1607,18 +1607,18 @@ static void rtl8xxxu_print_chipinfo(struct rtl8xxxu_priv *priv) + static int rtl8xxxu_identify_chip(struct rtl8xxxu_priv *priv) + { + struct device *dev = &priv->udev->dev; +- u32 val32, bonding; ++ u32 val32, bonding, sys_cfg; + u16 val16; + +- val32 = rtl8xxxu_read32(priv, REG_SYS_CFG); +- priv->chip_cut = (val32 & SYS_CFG_CHIP_VERSION_MASK) >> ++ sys_cfg = rtl8xxxu_read32(priv, REG_SYS_CFG); ++ priv->chip_cut = (sys_cfg & SYS_CFG_CHIP_VERSION_MASK) >> + SYS_CFG_CHIP_VERSION_SHIFT; +- if (val32 & SYS_CFG_TRP_VAUX_EN) { ++ if (sys_cfg & SYS_CFG_TRP_VAUX_EN) { + dev_info(dev, "Unsupported test chip\n"); + return -ENOTSUPP; + } + +- if (val32 & SYS_CFG_BT_FUNC) { ++ if (sys_cfg & SYS_CFG_BT_FUNC) { + if (priv->chip_cut >= 3) { + sprintf(priv->chip_name, "8723BU"); + priv->rtl_chip = RTL8723B; +@@ -1640,7 +1640,7 @@ static int rtl8xxxu_identify_chip(struct rtl8xxxu_priv *priv) + if (val32 & MULTI_GPS_FUNC_EN) + priv->has_gps = 1; + priv->is_multi_func = 1; +- } else if (val32 & SYS_CFG_TYPE_ID) { ++ } else if (sys_cfg & SYS_CFG_TYPE_ID) { + bonding = rtl8xxxu_read32(priv, REG_HPON_FSM); + bonding &= HPON_FSM_BONDING_MASK; + if (priv->fops->tx_desc_size == +@@ -1688,7 +1688,7 @@ static int rtl8xxxu_identify_chip(struct rtl8xxxu_priv *priv) + case RTL8188E: + case RTL8192E: + case RTL8723B: +- switch (val32 & SYS_CFG_VENDOR_EXT_MASK) { ++ switch (sys_cfg & SYS_CFG_VENDOR_EXT_MASK) { + case SYS_CFG_VENDOR_ID_TSMC: + sprintf(priv->chip_vendor, "TSMC"); + break; +@@ -1705,7 +1705,7 @@ static int rtl8xxxu_identify_chip(struct rtl8xxxu_priv *priv) + } + break; + default: +- if (val32 & SYS_CFG_VENDOR_ID) { ++ if (sys_cfg & SYS_CFG_VENDOR_ID) { + sprintf(priv->chip_vendor, "UMC"); + priv->vendor_umc = 1; + } else { @@ -1874,13 +1874,6 @@ static int rtl8xxxu_read_efuse(struct rtl8xxxu_priv *priv) /* We have 8 bits to indicate validity */ @@ -227805,7 +275826,30 @@ index 774341b0005a3..e74c885a04e50 100644 rtl8xxxu_gen2_h2c_cmd(priv, &h2c, sizeof(h2c.b_macid_cfg)); } -@@ -4508,6 +4507,53 @@ rtl8xxxu_wireless_mode(struct ieee80211_hw *hw, struct ieee80211_sta *sta) +@@ -4370,12 +4369,9 @@ void rtl8xxxu_gen1_report_connect(struct rtl8xxxu_priv *priv, + void rtl8xxxu_gen2_report_connect(struct rtl8xxxu_priv *priv, + u8 macid, bool connect) + { +-#ifdef RTL8XXXU_GEN2_REPORT_CONNECT + /* +- * Barry Day reports this causes issues with 8192eu and 8723bu +- * devices reconnecting. The reason for this is unclear, but +- * until it is better understood, leave the code in place but +- * disabled, so it is not lost. ++ * The firmware turns on the rate control when it knows it's ++ * connected to a network. + */ + struct h2c_cmd h2c; + +@@ -4388,7 +4384,6 @@ void rtl8xxxu_gen2_report_connect(struct rtl8xxxu_priv *priv, + h2c.media_status_rpt.parm &= ~BIT(0); + + rtl8xxxu_gen2_h2c_cmd(priv, &h2c, sizeof(h2c.media_status_rpt)); +-#endif + } + + void rtl8xxxu_gen1_init_aggregation(struct rtl8xxxu_priv *priv) +@@ -4508,6 +4503,53 @@ rtl8xxxu_wireless_mode(struct ieee80211_hw *hw, struct ieee80211_sta *sta) return network_type; } @@ -227859,7 +275903,7 @@ index 774341b0005a3..e74c885a04e50 100644 static void rtl8xxxu_bss_info_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_bss_conf *bss_conf, u32 changed) -@@ -4593,6 +4639,8 @@ rtl8xxxu_bss_info_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif, +@@ -4593,6 +4635,8 @@ rtl8xxxu_bss_info_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif, else val8 = 20; rtl8xxxu_write8(priv, REG_SLOT, val8); @@ -227868,7 +275912,7 @@ index 774341b0005a3..e74c885a04e50 100644 } if (changed & BSS_CHANGED_BSSID) { -@@ -4984,6 +5032,8 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw, +@@ -4984,6 +5028,8 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw, if (control && control->sta) sta = control->sta; @@ -227877,7 +275921,7 @@ index 774341b0005a3..e74c885a04e50 100644 tx_desc = skb_push(skb, tx_desc_size); memset(tx_desc, 0, tx_desc_size); -@@ -4996,7 +5046,6 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw, +@@ -4996,7 +5042,6 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw, is_broadcast_ether_addr(ieee80211_get_DA(hdr))) tx_desc->txdw0 |= TXDESC_BROADMULTICAST; @@ -227885,6 +275929,30 @@ index 774341b0005a3..e74c885a04e50 100644 tx_desc->txdw1 = cpu_to_le32(queue << TXDESC_QUEUE_SHIFT); if (tx_info->control.hw_key) { +@@ -5468,7 +5513,6 @@ static void rtl8xxxu_c2hcmd_callback(struct work_struct *work) + rarpt->txrate.flags = 0; + rate = c2h->ra_report.rate; + sgi = c2h->ra_report.sgi; +- bw = c2h->ra_report.bw; + + if (rate < DESC_RATE_MCS0) { + rarpt->txrate.legacy = +@@ -5485,8 +5529,13 @@ static void rtl8xxxu_c2hcmd_callback(struct work_struct *work) + RATE_INFO_FLAGS_SHORT_GI; + } + +- if (bw == RATE_INFO_BW_20) +- rarpt->txrate.bw |= RATE_INFO_BW_20; ++ if (skb->len >= offsetofend(typeof(*c2h), ra_report.bw)) { ++ if (c2h->ra_report.bw == RTL8XXXU_CHANNEL_WIDTH_40) ++ bw = RATE_INFO_BW_40; ++ else ++ bw = RATE_INFO_BW_20; ++ rarpt->txrate.bw = bw; ++ } + } + bit_rate = cfg80211_calculate_bitrate(&rarpt->txrate); + rarpt->bit_rate = bit_rate; diff --git a/drivers/net/wireless/realtek/rtlwifi/debug.c b/drivers/net/wireless/realtek/rtlwifi/debug.c index 901cdfe3723cf..0b1bc04cb6adb 100644 --- a/drivers/net/wireless/realtek/rtlwifi/debug.c @@ -228288,8 +276356,50 @@ index f3ad079967a68..bc87e3cb9cdce 100644 #define WLAN_MAC_INT_MIG_CFG 0x33330000 #define WLAN_SIFS_CFG (WLAN_SIFS_CCK_CONT_TX | \ +diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c +index 63ce2443f1364..70841d131d724 100644 +--- a/drivers/net/wireless/rndis_wlan.c ++++ b/drivers/net/wireless/rndis_wlan.c +@@ -694,8 +694,8 @@ static int rndis_query_oid(struct usbnet *dev, u32 oid, void *data, int *len) + struct rndis_query *get; + struct rndis_query_c *get_c; + } u; +- int ret, buflen; +- int resplen, respoffs, copylen; ++ int ret; ++ size_t buflen, resplen, respoffs, copylen; + + buflen = *len + sizeof(*u.get); + if (buflen < CONTROL_BUFFER_SIZE) +@@ -730,22 +730,15 @@ static int rndis_query_oid(struct usbnet *dev, u32 oid, void *data, int *len) + + if (respoffs > buflen) { + /* Device returned data offset outside buffer, error. */ +- netdev_dbg(dev->net, "%s(%s): received invalid " +- "data offset: %d > %d\n", __func__, +- oid_to_string(oid), respoffs, buflen); ++ netdev_dbg(dev->net, ++ "%s(%s): received invalid data offset: %zu > %zu\n", ++ __func__, oid_to_string(oid), respoffs, buflen); + + ret = -EINVAL; + goto exit_unlock; + } + +- if ((resplen + respoffs) > buflen) { +- /* Device would have returned more data if buffer would +- * have been big enough. Copy just the bits that we got. +- */ +- copylen = buflen - respoffs; +- } else { +- copylen = resplen; +- } ++ copylen = min(resplen, buflen - respoffs); + + if (copylen > *len) + copylen = *len; diff --git a/drivers/net/wireless/rsi/rsi_91x_core.c b/drivers/net/wireless/rsi/rsi_91x_core.c -index a48e616e0fb91..6bfaab48b507d 100644 +index a48e616e0fb91..6b64a103f39f0 100644 --- a/drivers/net/wireless/rsi/rsi_91x_core.c +++ b/drivers/net/wireless/rsi/rsi_91x_core.c @@ -399,6 +399,8 @@ void rsi_core_xmit(struct rsi_common *common, struct sk_buff *skb) @@ -228301,11 +276411,39 @@ index a48e616e0fb91..6bfaab48b507d 100644 wh = (struct ieee80211_hdr *)&skb->data[0]; tx_params->sta_id = 0; +@@ -463,7 +465,9 @@ void rsi_core_xmit(struct rsi_common *common, struct sk_buff *skb) + tid, 0); + } + } +- if (skb->protocol == cpu_to_be16(ETH_P_PAE)) { ++ ++ if (IEEE80211_SKB_CB(skb)->control.flags & ++ IEEE80211_TX_CTRL_PORT_CTRL_PROTO) { + q_num = MGMT_SOFT_Q; + skb->priority = q_num; + } diff --git a/drivers/net/wireless/rsi/rsi_91x_hal.c b/drivers/net/wireless/rsi/rsi_91x_hal.c -index f4a26f16f00f4..dca81a4bbdd7f 100644 +index f4a26f16f00f4..30d2eccbcadd5 100644 --- a/drivers/net/wireless/rsi/rsi_91x_hal.c +++ b/drivers/net/wireless/rsi/rsi_91x_hal.c -@@ -203,7 +203,7 @@ int rsi_prepare_data_desc(struct rsi_common *common, struct sk_buff *skb) +@@ -162,12 +162,16 @@ int rsi_prepare_data_desc(struct rsi_common *common, struct sk_buff *skb) + u8 header_size; + u8 vap_id = 0; + u8 dword_align_bytes; ++ bool tx_eapol; + u16 seq_num; + + info = IEEE80211_SKB_CB(skb); + vif = info->control.vif; + tx_params = (struct skb_info *)info->driver_data; + ++ tx_eapol = IEEE80211_SKB_CB(skb)->control.flags & ++ IEEE80211_TX_CTRL_PORT_CTRL_PROTO; ++ + header_size = FRAME_DESC_SZ + sizeof(struct rsi_xtended_desc); + if (header_size > skb_headroom(skb)) { + rsi_dbg(ERR_ZONE, "%s: Unable to send pkt\n", __func__); +@@ -203,7 +207,7 @@ int rsi_prepare_data_desc(struct rsi_common *common, struct sk_buff *skb) wh->frame_control |= cpu_to_le16(RSI_SET_PS_ENABLE); if ((!(info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT)) && @@ -228314,7 +276452,7 @@ index f4a26f16f00f4..dca81a4bbdd7f 100644 if (rsi_is_cipher_wep(common)) ieee80211_size += 4; else -@@ -214,15 +214,17 @@ int rsi_prepare_data_desc(struct rsi_common *common, struct sk_buff *skb) +@@ -214,22 +218,24 @@ int rsi_prepare_data_desc(struct rsi_common *common, struct sk_buff *skb) RSI_WIFI_DATA_Q); data_desc->header_len = ieee80211_size; @@ -228335,6 +276473,14 @@ index f4a26f16f00f4..dca81a4bbdd7f 100644 /* Only MCS rates */ data_desc->rate_info |= cpu_to_le16(ENABLE_SHORTGI_RATE); + } + } + +- if (skb->protocol == cpu_to_be16(ETH_P_PAE)) { ++ if (tx_eapol) { + rsi_dbg(INFO_ZONE, "*** Tx EAPOL ***\n"); + + data_desc->frame_info = cpu_to_le16(RATE_INFO_ENABLE); diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index b66975f545675..e70c1c7fdf595 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -228761,6 +276907,65 @@ index 254d19b664123..961851748bc4c 100644 struct rx_usb_ctrl_block { u8 *data; struct urb *rx_urb; +diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c +index bdb2d32cdb6d7..e323fe1ae5380 100644 +--- a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c ++++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c +@@ -830,8 +830,7 @@ void ipc_mux_ul_encoded_process(struct iosm_mux *ipc_mux, struct sk_buff *skb) + ipc_mux->ul_data_pend_bytes); + + /* Reset the skb settings. */ +- skb->tail = 0; +- skb->len = 0; ++ skb_trim(skb, 0); + + /* Add the consumed ADB to the free list. */ + skb_queue_tail((&ipc_mux->ul_adb.free_list), skb); +diff --git a/drivers/net/wwan/iosm/iosm_ipc_pcie.c b/drivers/net/wwan/iosm/iosm_ipc_pcie.c +index 2fe88b8be3481..8b4222b137d14 100644 +--- a/drivers/net/wwan/iosm/iosm_ipc_pcie.c ++++ b/drivers/net/wwan/iosm/iosm_ipc_pcie.c +@@ -232,6 +232,7 @@ static void ipc_pcie_config_init(struct iosm_pcie *ipc_pcie) + */ + static enum ipc_pcie_sleep_state ipc_pcie_read_bios_cfg(struct device *dev) + { ++ enum ipc_pcie_sleep_state sleep_state = IPC_PCIE_D0L12; + union acpi_object *object; + acpi_handle handle_acpi; + +@@ -242,12 +243,16 @@ static enum ipc_pcie_sleep_state ipc_pcie_read_bios_cfg(struct device *dev) + } + + object = acpi_evaluate_dsm(handle_acpi, &wwan_acpi_guid, 0, 3, NULL); ++ if (!object) ++ goto default_ret; ++ ++ if (object->integer.value == 3) ++ sleep_state = IPC_PCIE_D3L2; + +- if (object && object->integer.value == 3) +- return IPC_PCIE_D3L2; ++ ACPI_FREE(object); + + default_ret: +- return IPC_PCIE_D0L12; ++ return sleep_state; + } + + static int ipc_pcie_probe(struct pci_dev *pci, +diff --git a/drivers/net/wwan/iosm/iosm_ipc_protocol.h b/drivers/net/wwan/iosm/iosm_ipc_protocol.h +index 9b3a6d86ece7a..289397c4ea6ce 100644 +--- a/drivers/net/wwan/iosm/iosm_ipc_protocol.h ++++ b/drivers/net/wwan/iosm/iosm_ipc_protocol.h +@@ -122,7 +122,7 @@ struct iosm_protocol { + struct iosm_imem *imem; + struct ipc_rsp *rsp_ring[IPC_MEM_MSG_ENTRIES]; + struct device *dev; +- phys_addr_t phy_ap_shm; ++ dma_addr_t phy_ap_shm; + u32 old_msg_tail; + }; + diff --git a/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c b/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c index c6b032f95d2e4..4627847c6daab 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c @@ -228790,10 +276995,18 @@ index c6b032f95d2e4..4627847c6daab 100644 dev_err(ipc_protocol->dev, "invalid buf=%llx or skb=%p", (unsigned long long)p_td->buffer.address, skb->data); diff --git a/drivers/net/wwan/iosm/iosm_ipc_wwan.c b/drivers/net/wwan/iosm/iosm_ipc_wwan.c -index b571d9cedba49..92f064a8f8378 100644 +index b571d9cedba49..3449f877e19f0 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_wwan.c +++ b/drivers/net/wwan/iosm/iosm_ipc_wwan.c -@@ -322,15 +322,16 @@ struct iosm_wwan *ipc_wwan_init(struct iosm_imem *ipc_imem, struct device *dev) +@@ -167,6 +167,7 @@ static void ipc_wwan_setup(struct net_device *iosm_dev) + iosm_dev->max_mtu = ETH_MAX_MTU; + + iosm_dev->flags = IFF_POINTOPOINT | IFF_NOARP; ++ iosm_dev->needs_free_netdev = true; + + iosm_dev->netdev_ops = &ipc_inm_ops; + } +@@ -322,15 +323,16 @@ struct iosm_wwan *ipc_wwan_init(struct iosm_imem *ipc_imem, struct device *dev) ipc_wwan->dev = dev; ipc_wwan->ipc_imem = ipc_imem; @@ -228813,7 +277026,7 @@ index b571d9cedba49..92f064a8f8378 100644 } diff --git a/drivers/net/wwan/mhi_wwan_mbim.c b/drivers/net/wwan/mhi_wwan_mbim.c -index 71bf9b4f769f5..6872782e8dd89 100644 +index 71bf9b4f769f5..ef70bb7c88ad6 100644 --- a/drivers/net/wwan/mhi_wwan_mbim.c +++ b/drivers/net/wwan/mhi_wwan_mbim.c @@ -385,13 +385,13 @@ static void mhi_net_rx_refill_work(struct work_struct *work) @@ -228832,6 +277045,14 @@ index 71bf9b4f769f5..6872782e8dd89 100644 if (unlikely(err)) { kfree_skb(skb); break; +@@ -582,6 +582,7 @@ static void mhi_mbim_setup(struct net_device *ndev) + ndev->min_mtu = ETH_MIN_MTU; + ndev->max_mtu = MHI_MAX_BUF_SZ - ndev->needed_headroom; + ndev->tx_queue_len = 1000; ++ ndev->needs_free_netdev = true; + } + + static const struct wwan_ops mhi_mbim_wwan_ops = { diff --git a/drivers/net/wwan/wwan_hwsim.c b/drivers/net/wwan/wwan_hwsim.c index 5b62cf3b3c422..a4230a7376dfd 100644 --- a/drivers/net/wwan/wwan_hwsim.c @@ -228846,10 +277067,27 @@ index 5b62cf3b3c422..a4230a7376dfd 100644 return ERR_PTR(err); } diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h -index 4a16d6e33c093..d9dea4829c86e 100644 +index 4a16d6e33c093..adfd21aa5b6ad 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h -@@ -203,6 +203,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */ +@@ -48,7 +48,6 @@ + #include <linux/debugfs.h> + + typedef unsigned int pending_ring_idx_t; +-#define INVALID_PENDING_RING_IDX (~0U) + + struct pending_tx_info { + struct xen_netif_tx_request req; /* tx request */ +@@ -82,8 +81,6 @@ struct xenvif_rx_meta { + /* Discriminate from any valid pending_idx value. */ + #define INVALID_PENDING_IDX 0xFFFF + +-#define MAX_BUFFER_OFFSET XEN_PAGE_SIZE +- + #define MAX_PENDING_REQS XEN_NETIF_TX_RING_SIZE + + /* The maximum number of frags is derived from the size of a grant (same +@@ -203,6 +200,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */ unsigned int rx_queue_max; unsigned int rx_queue_len; unsigned long last_rx_time; @@ -228857,8 +277095,466 @@ index 4a16d6e33c093..d9dea4829c86e 100644 bool stalled; struct xenvif_copy_state rx_copy; +@@ -366,11 +364,6 @@ void xenvif_free(struct xenvif *vif); + int xenvif_xenbus_init(void); + void xenvif_xenbus_fini(void); + +-int xenvif_schedulable(struct xenvif *vif); +- +-int xenvif_queue_stopped(struct xenvif_queue *queue); +-void xenvif_wake_queue(struct xenvif_queue *queue); +- + /* (Un)Map communication rings. */ + void xenvif_unmap_frontend_data_rings(struct xenvif_queue *queue); + int xenvif_map_frontend_data_rings(struct xenvif_queue *queue, +@@ -393,8 +386,7 @@ int xenvif_dealloc_kthread(void *data); + irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data); + + bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread); +-void xenvif_rx_action(struct xenvif_queue *queue); +-void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); ++bool xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); + + void xenvif_carrier_on(struct xenvif *vif); + +@@ -402,9 +394,6 @@ void xenvif_carrier_on(struct xenvif *vif); + void xenvif_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *ubuf, + bool zerocopy_success); + +-/* Unmap a pending page and release it back to the guest */ +-void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx); +- + static inline pending_ring_idx_t nr_pending_reqs(struct xenvif_queue *queue) + { + return MAX_PENDING_REQS - +diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c +index c58996c1e2309..e1a5610b1747e 100644 +--- a/drivers/net/xen-netback/interface.c ++++ b/drivers/net/xen-netback/interface.c +@@ -70,7 +70,7 @@ void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue) + wake_up(&queue->dealloc_wq); + } + +-int xenvif_schedulable(struct xenvif *vif) ++static int xenvif_schedulable(struct xenvif *vif) + { + return netif_running(vif->dev) && + test_bit(VIF_STATUS_CONNECTED, &vif->status) && +@@ -178,20 +178,6 @@ irqreturn_t xenvif_interrupt(int irq, void *dev_id) + return IRQ_HANDLED; + } + +-int xenvif_queue_stopped(struct xenvif_queue *queue) +-{ +- struct net_device *dev = queue->vif->dev; +- unsigned int id = queue->id; +- return netif_tx_queue_stopped(netdev_get_tx_queue(dev, id)); +-} +- +-void xenvif_wake_queue(struct xenvif_queue *queue) +-{ +- struct net_device *dev = queue->vif->dev; +- unsigned int id = queue->id; +- netif_tx_wake_queue(netdev_get_tx_queue(dev, id)); +-} +- + static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev) + { +@@ -269,14 +255,16 @@ xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) + if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE) + skb_clear_hash(skb); + +- xenvif_rx_queue_tail(queue, skb); ++ if (!xenvif_rx_queue_tail(queue, skb)) ++ goto drop; ++ + xenvif_kick_thread(queue); + + return NETDEV_TX_OK; + + drop: + vif->dev->stats.tx_dropped++; +- dev_kfree_skb(skb); ++ dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + +diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c +index 32d5bc4919d8c..26428db845bea 100644 +--- a/drivers/net/xen-netback/netback.c ++++ b/drivers/net/xen-netback/netback.c +@@ -112,6 +112,8 @@ static void make_tx_response(struct xenvif_queue *queue, + s8 st); + static void push_tx_responses(struct xenvif_queue *queue); + ++static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx); ++ + static inline int tx_work_todo(struct xenvif_queue *queue); + + static inline unsigned long idx_to_pfn(struct xenvif_queue *queue, +@@ -330,10 +332,13 @@ static int xenvif_count_requests(struct xenvif_queue *queue, + + + struct xenvif_tx_cb { +- u16 pending_idx; ++ u16 copy_pending_idx[XEN_NETBK_LEGACY_SLOTS_MAX + 1]; ++ u8 copy_count; + }; + + #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb) ++#define copy_pending_idx(skb, i) (XENVIF_TX_CB(skb)->copy_pending_idx[i]) ++#define copy_count(skb) (XENVIF_TX_CB(skb)->copy_count) + + static inline void xenvif_tx_create_map_op(struct xenvif_queue *queue, + u16 pending_idx, +@@ -368,31 +373,93 @@ static inline struct sk_buff *xenvif_alloc_skb(unsigned int size) + return skb; + } + +-static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *queue, +- struct sk_buff *skb, +- struct xen_netif_tx_request *txp, +- struct gnttab_map_grant_ref *gop, +- unsigned int frag_overflow, +- struct sk_buff *nskb) ++static void xenvif_get_requests(struct xenvif_queue *queue, ++ struct sk_buff *skb, ++ struct xen_netif_tx_request *first, ++ struct xen_netif_tx_request *txfrags, ++ unsigned *copy_ops, ++ unsigned *map_ops, ++ unsigned int frag_overflow, ++ struct sk_buff *nskb, ++ unsigned int extra_count, ++ unsigned int data_len) + { + struct skb_shared_info *shinfo = skb_shinfo(skb); + skb_frag_t *frags = shinfo->frags; +- u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; +- int start; ++ u16 pending_idx; + pending_ring_idx_t index; + unsigned int nr_slots; ++ struct gnttab_copy *cop = queue->tx_copy_ops + *copy_ops; ++ struct gnttab_map_grant_ref *gop = queue->tx_map_ops + *map_ops; ++ struct xen_netif_tx_request *txp = first; ++ ++ nr_slots = shinfo->nr_frags + 1; ++ ++ copy_count(skb) = 0; + +- nr_slots = shinfo->nr_frags; ++ /* Create copy ops for exactly data_len bytes into the skb head. */ ++ __skb_put(skb, data_len); ++ while (data_len > 0) { ++ int amount = data_len > txp->size ? txp->size : data_len; + +- /* Skip first skb fragment if it is on same page as header fragment. */ +- start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); ++ cop->source.u.ref = txp->gref; ++ cop->source.domid = queue->vif->domid; ++ cop->source.offset = txp->offset; + +- for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots; +- shinfo->nr_frags++, txp++, gop++) { ++ cop->dest.domid = DOMID_SELF; ++ cop->dest.offset = (offset_in_page(skb->data + ++ skb_headlen(skb) - ++ data_len)) & ~XEN_PAGE_MASK; ++ cop->dest.u.gmfn = virt_to_gfn(skb->data + skb_headlen(skb) ++ - data_len); ++ ++ cop->len = amount; ++ cop->flags = GNTCOPY_source_gref; ++ ++ index = pending_index(queue->pending_cons); ++ pending_idx = queue->pending_ring[index]; ++ callback_param(queue, pending_idx).ctx = NULL; ++ copy_pending_idx(skb, copy_count(skb)) = pending_idx; ++ copy_count(skb)++; ++ ++ cop++; ++ data_len -= amount; ++ ++ if (amount == txp->size) { ++ /* The copy op covered the full tx_request */ ++ ++ memcpy(&queue->pending_tx_info[pending_idx].req, ++ txp, sizeof(*txp)); ++ queue->pending_tx_info[pending_idx].extra_count = ++ (txp == first) ? extra_count : 0; ++ ++ if (txp == first) ++ txp = txfrags; ++ else ++ txp++; ++ queue->pending_cons++; ++ nr_slots--; ++ } else { ++ /* The copy op partially covered the tx_request. ++ * The remainder will be mapped. ++ */ ++ txp->offset += amount; ++ txp->size -= amount; ++ } ++ } ++ ++ for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots; ++ shinfo->nr_frags++, gop++) { + index = pending_index(queue->pending_cons++); + pending_idx = queue->pending_ring[index]; +- xenvif_tx_create_map_op(queue, pending_idx, txp, 0, gop); ++ xenvif_tx_create_map_op(queue, pending_idx, txp, ++ txp == first ? extra_count : 0, gop); + frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx); ++ ++ if (txp == first) ++ txp = txfrags; ++ else ++ txp++; + } + + if (frag_overflow) { +@@ -413,7 +480,8 @@ static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *que + skb_shinfo(skb)->frag_list = nskb; + } + +- return gop; ++ (*copy_ops) = cop - queue->tx_copy_ops; ++ (*map_ops) = gop - queue->tx_map_ops; + } + + static inline void xenvif_grant_handle_set(struct xenvif_queue *queue, +@@ -449,7 +517,7 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, + struct gnttab_copy **gopp_copy) + { + struct gnttab_map_grant_ref *gop_map = *gopp_map; +- u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; ++ u16 pending_idx; + /* This always points to the shinfo of the skb being checked, which + * could be either the first or the one on the frag_list + */ +@@ -460,24 +528,37 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, + struct skb_shared_info *first_shinfo = NULL; + int nr_frags = shinfo->nr_frags; + const bool sharedslot = nr_frags && +- frag_get_pending_idx(&shinfo->frags[0]) == pending_idx; +- int i, err; ++ frag_get_pending_idx(&shinfo->frags[0]) == ++ copy_pending_idx(skb, copy_count(skb) - 1); ++ int i, err = 0; + +- /* Check status of header. */ +- err = (*gopp_copy)->status; +- if (unlikely(err)) { +- if (net_ratelimit()) +- netdev_dbg(queue->vif->dev, +- "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n", +- (*gopp_copy)->status, +- pending_idx, +- (*gopp_copy)->source.u.ref); +- /* The first frag might still have this slot mapped */ +- if (!sharedslot) +- xenvif_idx_release(queue, pending_idx, +- XEN_NETIF_RSP_ERROR); ++ for (i = 0; i < copy_count(skb); i++) { ++ int newerr; ++ ++ /* Check status of header. */ ++ pending_idx = copy_pending_idx(skb, i); ++ ++ newerr = (*gopp_copy)->status; ++ if (likely(!newerr)) { ++ /* The first frag might still have this slot mapped */ ++ if (i < copy_count(skb) - 1 || !sharedslot) ++ xenvif_idx_release(queue, pending_idx, ++ XEN_NETIF_RSP_OKAY); ++ } else { ++ err = newerr; ++ if (net_ratelimit()) ++ netdev_dbg(queue->vif->dev, ++ "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n", ++ (*gopp_copy)->status, ++ pending_idx, ++ (*gopp_copy)->source.u.ref); ++ /* The first frag might still have this slot mapped */ ++ if (i < copy_count(skb) - 1 || !sharedslot) ++ xenvif_idx_release(queue, pending_idx, ++ XEN_NETIF_RSP_ERROR); ++ } ++ (*gopp_copy)++; + } +- (*gopp_copy)++; + + check_frags: + for (i = 0; i < nr_frags; i++, gop_map++) { +@@ -524,14 +605,6 @@ check_frags: + if (err) + continue; + +- /* First error: if the header haven't shared a slot with the +- * first frag, release it as well. +- */ +- if (!sharedslot) +- xenvif_idx_release(queue, +- XENVIF_TX_CB(skb)->pending_idx, +- XEN_NETIF_RSP_OKAY); +- + /* Invalidate preceding fragments of this skb. */ + for (j = 0; j < i; j++) { + pending_idx = frag_get_pending_idx(&shinfo->frags[j]); +@@ -801,7 +874,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, + unsigned *copy_ops, + unsigned *map_ops) + { +- struct gnttab_map_grant_ref *gop = queue->tx_map_ops; + struct sk_buff *skb, *nskb; + int ret; + unsigned int frag_overflow; +@@ -883,8 +955,12 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, + continue; + } + ++ data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN) ? ++ XEN_NETBACK_TX_COPY_LEN : txreq.size; ++ + ret = xenvif_count_requests(queue, &txreq, extra_count, + txfrags, work_to_do); ++ + if (unlikely(ret < 0)) + break; + +@@ -910,9 +986,8 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, + index = pending_index(queue->pending_cons); + pending_idx = queue->pending_ring[index]; + +- data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN && +- ret < XEN_NETBK_LEGACY_SLOTS_MAX) ? +- XEN_NETBACK_TX_COPY_LEN : txreq.size; ++ if (ret >= XEN_NETBK_LEGACY_SLOTS_MAX - 1 && data_len < txreq.size) ++ data_len = txreq.size; + + skb = xenvif_alloc_skb(data_len); + if (unlikely(skb == NULL)) { +@@ -923,8 +998,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, + } + + skb_shinfo(skb)->nr_frags = ret; +- if (data_len < txreq.size) +- skb_shinfo(skb)->nr_frags++; + /* At this point shinfo->nr_frags is in fact the number of + * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX. + */ +@@ -986,54 +1059,19 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, + type); + } + +- XENVIF_TX_CB(skb)->pending_idx = pending_idx; +- +- __skb_put(skb, data_len); +- queue->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref; +- queue->tx_copy_ops[*copy_ops].source.domid = queue->vif->domid; +- queue->tx_copy_ops[*copy_ops].source.offset = txreq.offset; +- +- queue->tx_copy_ops[*copy_ops].dest.u.gmfn = +- virt_to_gfn(skb->data); +- queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF; +- queue->tx_copy_ops[*copy_ops].dest.offset = +- offset_in_page(skb->data) & ~XEN_PAGE_MASK; +- +- queue->tx_copy_ops[*copy_ops].len = data_len; +- queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref; +- +- (*copy_ops)++; +- +- if (data_len < txreq.size) { +- frag_set_pending_idx(&skb_shinfo(skb)->frags[0], +- pending_idx); +- xenvif_tx_create_map_op(queue, pending_idx, &txreq, +- extra_count, gop); +- gop++; +- } else { +- frag_set_pending_idx(&skb_shinfo(skb)->frags[0], +- INVALID_PENDING_IDX); +- memcpy(&queue->pending_tx_info[pending_idx].req, +- &txreq, sizeof(txreq)); +- queue->pending_tx_info[pending_idx].extra_count = +- extra_count; +- } +- +- queue->pending_cons++; +- +- gop = xenvif_get_requests(queue, skb, txfrags, gop, +- frag_overflow, nskb); ++ xenvif_get_requests(queue, skb, &txreq, txfrags, copy_ops, ++ map_ops, frag_overflow, nskb, extra_count, ++ data_len); + + __skb_queue_tail(&queue->tx_queue, skb); + + queue->tx.req_cons = idx; + +- if (((gop-queue->tx_map_ops) >= ARRAY_SIZE(queue->tx_map_ops)) || ++ if ((*map_ops >= ARRAY_SIZE(queue->tx_map_ops)) || + (*copy_ops >= ARRAY_SIZE(queue->tx_copy_ops))) + break; + } + +- (*map_ops) = gop - queue->tx_map_ops; + return; + } + +@@ -1112,9 +1150,8 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) + while ((skb = __skb_dequeue(&queue->tx_queue)) != NULL) { + struct xen_netif_tx_request *txp; + u16 pending_idx; +- unsigned data_len; + +- pending_idx = XENVIF_TX_CB(skb)->pending_idx; ++ pending_idx = copy_pending_idx(skb, 0); + txp = &queue->pending_tx_info[pending_idx].req; + + /* Check the remap error code. */ +@@ -1133,18 +1170,6 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) + continue; + } + +- data_len = skb->len; +- callback_param(queue, pending_idx).ctx = NULL; +- if (data_len < txp->size) { +- /* Append the packet payload as a fragment. */ +- txp->offset += data_len; +- txp->size -= data_len; +- } else { +- /* Schedule a response immediately. */ +- xenvif_idx_release(queue, pending_idx, +- XEN_NETIF_RSP_OKAY); +- } +- + if (txp->flags & XEN_NETTXF_csum_blank) + skb->ip_summed = CHECKSUM_PARTIAL; + else if (txp->flags & XEN_NETTXF_data_validated) +@@ -1331,7 +1356,7 @@ static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue) + /* Called after netfront has transmitted */ + int xenvif_tx_action(struct xenvif_queue *queue, int budget) + { +- unsigned nr_mops, nr_cops = 0; ++ unsigned nr_mops = 0, nr_cops = 0; + int work_done, ret; + + if (unlikely(!tx_work_todo(queue))) +@@ -1418,7 +1443,7 @@ static void push_tx_responses(struct xenvif_queue *queue) + notify_remote_via_irq(queue->tx_irq); + } + +-void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx) ++static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx) + { + int ret; + struct gnttab_unmap_grant_ref tx_unmap_op; diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c -index accc991d153f7..a0335407be423 100644 +index accc991d153f7..0ba754ebc5baa 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -33,28 +33,36 @@ @@ -228915,7 +277611,15 @@ index accc991d153f7..a0335407be423 100644 do { prod = queue->rx.sring->req_prod; -@@ -80,13 +88,19 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) +@@ -74,22 +82,30 @@ static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) + return false; + } + +-void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) ++bool xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) + { + unsigned long flags; ++ bool ret = true; spin_lock_irqsave(&queue->rx_queue.lock, flags); @@ -228927,8 +277631,7 @@ index accc991d153f7..a0335407be423 100644 struct net_device *dev = queue->vif->dev; netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id)); -+ kfree_skb(skb); -+ queue->vif->dev->stats.rx_dropped++; ++ ret = false; + } else { + if (skb_queue_empty(&queue->rx_queue)) + xenvif_update_needed_slots(queue, skb); @@ -228939,7 +277642,12 @@ index accc991d153f7..a0335407be423 100644 } spin_unlock_irqrestore(&queue->rx_queue.lock, flags); -@@ -100,6 +114,8 @@ static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue) ++ ++ return ret; + } + + static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue) +@@ -100,6 +116,8 @@ static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue) skb = __skb_dequeue(&queue->rx_queue); if (skb) { @@ -228948,7 +277656,7 @@ index accc991d153f7..a0335407be423 100644 queue->rx_queue_len -= skb->len; if (queue->rx_queue_len < queue->rx_queue_max) { struct netdev_queue *txq; -@@ -134,6 +150,7 @@ static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue) +@@ -134,6 +152,7 @@ static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue) break; xenvif_rx_dequeue(queue); kfree_skb(skb); @@ -228956,7 +277664,16 @@ index accc991d153f7..a0335407be423 100644 } } -@@ -478,6 +495,7 @@ void xenvif_rx_action(struct xenvif_queue *queue) +@@ -469,7 +488,7 @@ static void xenvif_rx_skb(struct xenvif_queue *queue) + + #define RX_BATCH_SIZE 64 + +-void xenvif_rx_action(struct xenvif_queue *queue) ++static void xenvif_rx_action(struct xenvif_queue *queue) + { + struct sk_buff_head completed_skbs; + unsigned int work_done = 0; +@@ -478,6 +497,7 @@ void xenvif_rx_action(struct xenvif_queue *queue) queue->rx_copy.completed = &completed_skbs; while (xenvif_rx_ring_slots_available(queue) && @@ -228964,7 +277681,7 @@ index accc991d153f7..a0335407be423 100644 work_done < RX_BATCH_SIZE) { xenvif_rx_skb(queue); work_done++; -@@ -487,27 +505,31 @@ void xenvif_rx_action(struct xenvif_queue *queue) +@@ -487,27 +507,31 @@ void xenvif_rx_action(struct xenvif_queue *queue) xenvif_rx_copy_flush(queue); } @@ -229044,7 +277761,7 @@ index d24b7a7993aa0..e85b3c5d4acce 100644 kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); backend_disconnect(be); diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c -index fc41ba95f81d0..074dceb1930b3 100644 +index fc41ba95f81d0..6e73d3a00eecd 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -66,6 +66,10 @@ module_param_named(max_queues, xennet_max_queues, uint, 0644); @@ -229392,7 +278109,9 @@ index fc41ba95f81d0..074dceb1930b3 100644 + *eoi = 0; + } else if (unlikely(work_queued < queue->rx_rsp_unconsumed)) { + const struct device *dev = &queue->info->netdev->dev; -+ + +- if (likely(netif_carrier_ok(dev) && +- RING_HAS_UNCONSUMED_RESPONSES(&queue->rx))) + spin_unlock_irqrestore(&queue->rx_cons_lock, flags); + dev_alert(dev, "RX producer index going backwards\n"); + dev_alert(dev, "Disabled for further use\n"); @@ -229400,9 +278119,7 @@ index fc41ba95f81d0..074dceb1930b3 100644 + return false; + } + spin_unlock_irqrestore(&queue->rx_cons_lock, flags); - -- if (likely(netif_carrier_ok(dev) && -- RING_HAS_UNCONSUMED_RESPONSES(&queue->rx))) ++ + if (likely(netif_carrier_ok(queue->info->netdev) && work_queued)) napi_schedule(&queue->napi); @@ -229440,7 +278157,20 @@ index fc41ba95f81d0..074dceb1930b3 100644 .ndo_open = xennet_open, .ndo_stop = xennet_close, .ndo_start_xmit = xennet_start_xmit, -@@ -1768,9 +1899,10 @@ static int setup_netfront_single(struct netfront_queue *queue) +@@ -1735,6 +1866,12 @@ static int netfront_resume(struct xenbus_device *dev) + netif_tx_unlock_bh(info->netdev); + + xennet_disconnect_backend(info); ++ ++ rtnl_lock(); ++ if (info->queues) ++ xennet_destroy_queues(info); ++ rtnl_unlock(); ++ + return 0; + } + +@@ -1768,9 +1905,10 @@ static int setup_netfront_single(struct netfront_queue *queue) if (err < 0) goto fail; @@ -229454,7 +278184,7 @@ index fc41ba95f81d0..074dceb1930b3 100644 if (err < 0) goto bind_fail; queue->rx_evtchn = queue->tx_evtchn; -@@ -1798,18 +1930,18 @@ static int setup_netfront_split(struct netfront_queue *queue) +@@ -1798,18 +1936,18 @@ static int setup_netfront_split(struct netfront_queue *queue) snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name), "%s-tx", queue->name); @@ -229479,7 +278209,7 @@ index fc41ba95f81d0..074dceb1930b3 100644 if (err < 0) goto bind_rx_fail; queue->rx_irq = err; -@@ -1833,7 +1965,7 @@ static int setup_netfront(struct xenbus_device *dev, +@@ -1833,7 +1971,7 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_queue *queue, unsigned int feature_split_evtchn) { struct xen_netif_tx_sring *txs; @@ -229488,7 +278218,7 @@ index fc41ba95f81d0..074dceb1930b3 100644 grant_ref_t gref; int err; -@@ -1853,21 +1985,21 @@ static int setup_netfront(struct xenbus_device *dev, +@@ -1853,21 +1991,21 @@ static int setup_netfront(struct xenbus_device *dev, err = xenbus_grant_ring(dev, txs, 1, &gref); if (err < 0) @@ -229513,7 +278243,7 @@ index fc41ba95f81d0..074dceb1930b3 100644 queue->rx_ring_ref = gref; if (feature_split_evtchn) -@@ -1880,22 +2012,28 @@ static int setup_netfront(struct xenbus_device *dev, +@@ -1880,22 +2018,28 @@ static int setup_netfront(struct xenbus_device *dev, err = setup_netfront_single(queue); if (err) @@ -229552,7 +278282,7 @@ index fc41ba95f81d0..074dceb1930b3 100644 return err; } -@@ -1911,6 +2049,7 @@ static int xennet_init_queue(struct netfront_queue *queue) +@@ -1911,6 +2055,7 @@ static int xennet_init_queue(struct netfront_queue *queue) spin_lock_init(&queue->tx_lock); spin_lock_init(&queue->rx_lock); @@ -229560,7 +278290,7 @@ index fc41ba95f81d0..074dceb1930b3 100644 timer_setup(&queue->rx_refill_timer, rx_refill_timeout, 0); -@@ -2040,22 +2179,6 @@ error: +@@ -2040,22 +2185,6 @@ error: return err; } @@ -229583,7 +278313,7 @@ index fc41ba95f81d0..074dceb1930b3 100644 static int xennet_create_page_pool(struct netfront_queue *queue) -@@ -2164,6 +2287,10 @@ static int talk_to_netback(struct xenbus_device *dev, +@@ -2164,6 +2293,10 @@ static int talk_to_netback(struct xenbus_device *dev, info->netdev->irq = 0; @@ -229594,7 +278324,7 @@ index fc41ba95f81d0..074dceb1930b3 100644 /* Check if backend supports multiple queues */ max_queues = xenbus_read_unsigned(info->xbdev->otherend, "multi-queue-max-queues", 1); -@@ -2330,6 +2457,9 @@ static int xennet_connect(struct net_device *dev) +@@ -2330,6 +2463,9 @@ static int xennet_connect(struct net_device *dev) return err; if (np->netback_has_xdp_headroom) pr_info("backend supports XDP headroom\n"); @@ -229604,11 +278334,53 @@ index fc41ba95f81d0..074dceb1930b3 100644 /* talk_to_netback() sets the correct number of queues */ num_queues = dev->real_num_tx_queues; +diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c +index c6b3334f24c9e..f12f903a9dd13 100644 +--- a/drivers/nfc/fdp/fdp.c ++++ b/drivers/nfc/fdp/fdp.c +@@ -249,11 +249,19 @@ static int fdp_nci_close(struct nci_dev *ndev) + static int fdp_nci_send(struct nci_dev *ndev, struct sk_buff *skb) + { + struct fdp_nci_info *info = nci_get_drvdata(ndev); ++ int ret; + + if (atomic_dec_and_test(&info->data_pkt_counter)) + info->data_pkt_counter_cb(ndev); + +- return info->phy_ops->write(info->phy, skb); ++ ret = info->phy_ops->write(info->phy, skb); ++ if (ret < 0) { ++ kfree_skb(skb); ++ return ret; ++ } ++ ++ consume_skb(skb); ++ return 0; + } + + static int fdp_nci_request_firmware(struct nci_dev *ndev) diff --git a/drivers/nfc/nfcmrvl/i2c.c b/drivers/nfc/nfcmrvl/i2c.c -index ceef81d93ac99..01329b91d59d5 100644 +index ceef81d93ac99..a902720cd8493 100644 --- a/drivers/nfc/nfcmrvl/i2c.c +++ b/drivers/nfc/nfcmrvl/i2c.c -@@ -167,9 +167,9 @@ static int nfcmrvl_i2c_parse_dt(struct device_node *node, +@@ -132,10 +132,15 @@ static int nfcmrvl_i2c_nci_send(struct nfcmrvl_private *priv, + ret = -EREMOTEIO; + } else + ret = 0; ++ } ++ ++ if (ret) { + kfree_skb(skb); ++ return ret; + } + +- return ret; ++ consume_skb(skb); ++ return 0; + } + + static void nfcmrvl_i2c_nci_update_config(struct nfcmrvl_private *priv, +@@ -167,9 +172,9 @@ static int nfcmrvl_i2c_parse_dt(struct device_node *node, pdata->irq_polarity = IRQF_TRIGGER_RISING; ret = irq_of_parse_and_map(node, 0); @@ -229690,6 +278462,26 @@ index a99aedff795dc..ea73094530968 100644 } static int nfcmrvl_resume(struct usb_interface *intf) +diff --git a/drivers/nfc/nxp-nci/core.c b/drivers/nfc/nxp-nci/core.c +index 518e2afb43a8d..13c433eb694dc 100644 +--- a/drivers/nfc/nxp-nci/core.c ++++ b/drivers/nfc/nxp-nci/core.c +@@ -77,10 +77,13 @@ static int nxp_nci_send(struct nci_dev *ndev, struct sk_buff *skb) + return -EINVAL; + + r = info->phy_ops->write(info->phy_id, skb); +- if (r < 0) ++ if (r < 0) { + kfree_skb(skb); ++ return r; ++ } + +- return r; ++ consume_skb(skb); ++ return 0; + } + + static const struct nci_ops nxp_nci_ops = { diff --git a/drivers/nfc/nxp-nci/i2c.c b/drivers/nfc/nxp-nci/i2c.c index 7e451c10985df..ae2ba08d8ac3f 100644 --- a/drivers/nfc/nxp-nci/i2c.c @@ -229721,10 +278513,28 @@ index 7e451c10985df..ae2ba08d8ac3f 100644 "Invalid frame payload length: %u (expected %u)\n", r, header.plen); diff --git a/drivers/nfc/pn533/pn533.c b/drivers/nfc/pn533/pn533.c -index 2f3f3fe9a0baa..6dc0af63440f4 100644 +index 2f3f3fe9a0baa..939d27652a4c9 100644 --- a/drivers/nfc/pn533/pn533.c +++ b/drivers/nfc/pn533/pn533.c -@@ -2218,7 +2218,7 @@ static int pn533_fill_fragment_skbs(struct pn533 *dev, struct sk_buff *skb) +@@ -1297,6 +1297,8 @@ static int pn533_poll_dep_complete(struct pn533 *dev, void *arg, + if (IS_ERR(resp)) + return PTR_ERR(resp); + ++ memset(&nfc_target, 0, sizeof(struct nfc_target)); ++ + rsp = (struct pn533_cmd_jump_dep_response *)resp->data; + + rc = rsp->status & PN533_CMD_RET_MASK; +@@ -1928,6 +1930,8 @@ static int pn533_in_dep_link_up_complete(struct pn533 *dev, void *arg, + + dev_dbg(dev->dev, "Creating new target\n"); + ++ memset(&nfc_target, 0, sizeof(struct nfc_target)); ++ + nfc_target.supported_protocols = NFC_PROTO_NFC_DEP_MASK; + nfc_target.nfcid1_len = 10; + memcpy(nfc_target.nfcid1, rsp->nfcid3t, nfc_target.nfcid1_len); +@@ -2218,7 +2222,7 @@ static int pn533_fill_fragment_skbs(struct pn533 *dev, struct sk_buff *skb) frag = pn533_alloc_skb(dev, frag_size); if (!frag) { skb_queue_purge(&dev->fragment_skb); @@ -229733,7 +278543,7 @@ index 2f3f3fe9a0baa..6dc0af63440f4 100644 } if (!dev->tgt_mode) { -@@ -2287,7 +2287,7 @@ static int pn533_transceive(struct nfc_dev *nfc_dev, +@@ -2287,7 +2291,7 @@ static int pn533_transceive(struct nfc_dev *nfc_dev, /* jumbo frame ? */ if (skb->len > PN533_CMD_DATAEXCH_DATA_MAXLEN) { rc = pn533_fill_fragment_skbs(dev, skb); @@ -229742,7 +278552,7 @@ index 2f3f3fe9a0baa..6dc0af63440f4 100644 goto error; skb = skb_dequeue(&dev->fragment_skb); -@@ -2355,7 +2355,7 @@ static int pn533_tm_send(struct nfc_dev *nfc_dev, struct sk_buff *skb) +@@ -2355,7 +2359,7 @@ static int pn533_tm_send(struct nfc_dev *nfc_dev, struct sk_buff *skb) /* let's split in multiple chunks if size's too big */ if (skb->len > PN533_CMD_DATAEXCH_DATA_MAXLEN) { rc = pn533_fill_fragment_skbs(dev, skb); @@ -229751,7 +278561,7 @@ index 2f3f3fe9a0baa..6dc0af63440f4 100644 goto error; /* get the first skb */ -@@ -2789,13 +2789,14 @@ void pn53x_common_clean(struct pn533 *priv) +@@ -2789,13 +2793,14 @@ void pn53x_common_clean(struct pn533 *priv) { struct pn533_cmd *cmd, *n; @@ -229780,6 +278590,92 @@ index 7bdaf82630706..7ad98973648cc 100644 kfree_skb(pn532->recv_skb); kfree(pn532); } +diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c +index bd7f7478d1892..62ad26e4299d1 100644 +--- a/drivers/nfc/pn533/usb.c ++++ b/drivers/nfc/pn533/usb.c +@@ -153,10 +153,17 @@ static int pn533_usb_send_ack(struct pn533 *dev, gfp_t flags) + return usb_submit_urb(phy->ack_urb, flags); + } + ++struct pn533_out_arg { ++ struct pn533_usb_phy *phy; ++ struct completion done; ++}; ++ + static int pn533_usb_send_frame(struct pn533 *dev, + struct sk_buff *out) + { + struct pn533_usb_phy *phy = dev->phy; ++ struct pn533_out_arg arg; ++ void *cntx; + int rc; + + if (phy->priv == NULL) +@@ -168,10 +175,17 @@ static int pn533_usb_send_frame(struct pn533 *dev, + print_hex_dump_debug("PN533 TX: ", DUMP_PREFIX_NONE, 16, 1, + out->data, out->len, false); + ++ init_completion(&arg.done); ++ cntx = phy->out_urb->context; ++ phy->out_urb->context = &arg; ++ + rc = usb_submit_urb(phy->out_urb, GFP_KERNEL); + if (rc) + return rc; + ++ wait_for_completion(&arg.done); ++ phy->out_urb->context = cntx; ++ + if (dev->protocol_type == PN533_PROTO_REQ_RESP) { + /* request for response for sent packet directly */ + rc = pn533_submit_urb_for_response(phy, GFP_KERNEL); +@@ -408,7 +422,31 @@ static int pn533_acr122_poweron_rdr(struct pn533_usb_phy *phy) + return arg.rc; + } + +-static void pn533_send_complete(struct urb *urb) ++static void pn533_out_complete(struct urb *urb) ++{ ++ struct pn533_out_arg *arg = urb->context; ++ struct pn533_usb_phy *phy = arg->phy; ++ ++ switch (urb->status) { ++ case 0: ++ break; /* success */ ++ case -ECONNRESET: ++ case -ENOENT: ++ dev_dbg(&phy->udev->dev, ++ "The urb has been stopped (status %d)\n", ++ urb->status); ++ break; ++ case -ESHUTDOWN: ++ default: ++ nfc_err(&phy->udev->dev, ++ "Urb failure (status %d)\n", ++ urb->status); ++ } ++ ++ complete(&arg->done); ++} ++ ++static void pn533_ack_complete(struct urb *urb) + { + struct pn533_usb_phy *phy = urb->context; + +@@ -496,10 +534,10 @@ static int pn533_usb_probe(struct usb_interface *interface, + + usb_fill_bulk_urb(phy->out_urb, phy->udev, + usb_sndbulkpipe(phy->udev, out_endpoint), +- NULL, 0, pn533_send_complete, phy); ++ NULL, 0, pn533_out_complete, phy); + usb_fill_bulk_urb(phy->ack_urb, phy->udev, + usb_sndbulkpipe(phy->udev, out_endpoint), +- NULL, 0, pn533_send_complete, phy); ++ NULL, 0, pn533_ack_complete, phy); + + switch (id->driver_info) { + case PN533_DEVICE_STD: diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c index 16ceb763594fc..90e30e2f15125 100644 --- a/drivers/nfc/port100.c @@ -229794,6 +278690,102 @@ index 16ceb763594fc..90e30e2f15125 100644 usb_free_urb(dev->out_urb); usb_put_dev(dev->udev); +diff --git a/drivers/nfc/s3fwrn5/core.c b/drivers/nfc/s3fwrn5/core.c +index 1c412007fabb6..0270e05b68dff 100644 +--- a/drivers/nfc/s3fwrn5/core.c ++++ b/drivers/nfc/s3fwrn5/core.c +@@ -110,11 +110,15 @@ static int s3fwrn5_nci_send(struct nci_dev *ndev, struct sk_buff *skb) + } + + ret = s3fwrn5_write(info, skb); +- if (ret < 0) ++ if (ret < 0) { + kfree_skb(skb); ++ mutex_unlock(&info->mutex); ++ return ret; ++ } + ++ consume_skb(skb); + mutex_unlock(&info->mutex); +- return ret; ++ return 0; + } + + static int s3fwrn5_nci_post_setup(struct nci_dev *ndev) +diff --git a/drivers/nfc/st-nci/se.c b/drivers/nfc/st-nci/se.c +index 5fd89f72969d9..04a2cea6d6b61 100644 +--- a/drivers/nfc/st-nci/se.c ++++ b/drivers/nfc/st-nci/se.c +@@ -312,6 +312,8 @@ static int st_nci_hci_connectivity_event_received(struct nci_dev *ndev, + int r = 0; + struct device *dev = &ndev->nfc_dev->dev; + struct nfc_evt_transaction *transaction; ++ u32 aid_len; ++ u8 params_len; + + pr_debug("connectivity gate event: %x\n", event); + +@@ -325,26 +327,47 @@ static int st_nci_hci_connectivity_event_received(struct nci_dev *ndev, + * Description Tag Length + * AID 81 5 to 16 + * PARAMETERS 82 0 to 255 ++ * ++ * The key differences are aid storage length is variably sized ++ * in the packet, but fixed in nfc_evt_transaction, and that ++ * the aid_len is u8 in the packet, but u32 in the structure, ++ * and the tags in the packet are not included in ++ * nfc_evt_transaction. ++ * ++ * size(b): 1 1 5-16 1 1 0-255 ++ * offset: 0 1 2 aid_len + 2 aid_len + 3 aid_len + 4 ++ * mem name: aid_tag(M) aid_len aid params_tag(M) params_len params ++ * example: 0x81 5-16 X 0x82 0-255 X + */ +- if (skb->len < NFC_MIN_AID_LENGTH + 2 && +- skb->data[0] != NFC_EVT_TRANSACTION_AID_TAG) ++ if (skb->len < 2 || skb->data[0] != NFC_EVT_TRANSACTION_AID_TAG) + return -EPROTO; + +- transaction = devm_kzalloc(dev, skb->len - 2, GFP_KERNEL); +- if (!transaction) +- return -ENOMEM; ++ aid_len = skb->data[1]; + +- transaction->aid_len = skb->data[1]; +- memcpy(transaction->aid, &skb->data[2], transaction->aid_len); ++ if (skb->len < aid_len + 4 || ++ aid_len > sizeof(transaction->aid)) ++ return -EPROTO; + +- /* Check next byte is PARAMETERS tag (82) */ +- if (skb->data[transaction->aid_len + 2] != +- NFC_EVT_TRANSACTION_PARAMS_TAG) ++ params_len = skb->data[aid_len + 3]; ++ ++ /* Verify PARAMETERS tag is (82), and final check that there is ++ * enough space in the packet to read everything. ++ */ ++ if (skb->data[aid_len + 2] != NFC_EVT_TRANSACTION_PARAMS_TAG || ++ skb->len < aid_len + 4 + params_len) + return -EPROTO; + +- transaction->params_len = skb->data[transaction->aid_len + 3]; +- memcpy(transaction->params, skb->data + +- transaction->aid_len + 4, transaction->params_len); ++ transaction = devm_kzalloc(dev, sizeof(*transaction) + ++ params_len, GFP_KERNEL); ++ if (!transaction) ++ return -ENOMEM; ++ ++ transaction->aid_len = aid_len; ++ transaction->params_len = params_len; ++ ++ memcpy(transaction->aid, &skb->data[2], aid_len); ++ memcpy(transaction->params, &skb->data[aid_len + 4], ++ params_len); + + r = nfc_se_transaction(ndev->nfc_dev, host, transaction); + break; diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c index 279d88128b2e4..d56bc24709b5c 100644 --- a/drivers/nfc/st21nfca/i2c.c @@ -230006,6 +278998,30 @@ index cb6ad916be911..ae6771cc9894a 100644 }; struct st21nfca_hci_info { +diff --git a/drivers/nfc/virtual_ncidev.c b/drivers/nfc/virtual_ncidev.c +index 221fa3bb8705e..6317e8505aaad 100644 +--- a/drivers/nfc/virtual_ncidev.c ++++ b/drivers/nfc/virtual_ncidev.c +@@ -54,16 +54,19 @@ static int virtual_nci_send(struct nci_dev *ndev, struct sk_buff *skb) + mutex_lock(&nci_mutex); + if (state != virtual_ncidev_enabled) { + mutex_unlock(&nci_mutex); ++ kfree_skb(skb); + return 0; + } + + if (send_buff) { + mutex_unlock(&nci_mutex); ++ kfree_skb(skb); + return -1; + } + send_buff = skb_copy(skb, GFP_KERNEL); + mutex_unlock(&nci_mutex); + wake_up_interruptible(&wq); ++ consume_skb(skb); + + return 0; + } diff --git a/drivers/ntb/hw/intel/ntb_hw_gen4.c b/drivers/ntb/hw/intel/ntb_hw_gen4.c index fede05151f698..4081fc538ff45 100644 --- a/drivers/ntb/hw/intel/ntb_hw_gen4.c @@ -230315,7 +279331,7 @@ index 4b80150e4afa7..b5aa55c614616 100644 if (rc) return rc; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c -index f8dd664b2eda5..92fe67bd24570 100644 +index f8dd664b2eda5..06750f3d52745 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -131,7 +131,7 @@ static void nvme_set_queue_dying(struct nvme_ns *ns) @@ -230327,7 +279343,57 @@ index f8dd664b2eda5..92fe67bd24570 100644 blk_mq_unquiesce_queue(ns->queue); set_capacity_and_notify(ns->disk, 0); -@@ -1354,6 +1354,8 @@ static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids, +@@ -1113,6 +1113,18 @@ static u32 nvme_known_admin_effects(u8 opcode) + return 0; + } + ++static u32 nvme_known_nvm_effects(u8 opcode) ++{ ++ switch (opcode) { ++ case nvme_cmd_write: ++ case nvme_cmd_write_zeroes: ++ case nvme_cmd_write_uncor: ++ return NVME_CMD_EFFECTS_LBCC; ++ default: ++ return 0; ++ } ++} ++ + u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode) + { + u32 effects = 0; +@@ -1120,16 +1132,24 @@ u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode) + if (ns) { + if (ns->head->effects) + effects = le32_to_cpu(ns->head->effects->iocs[opcode]); ++ if (ns->head->ids.csi == NVME_CSI_NVM) ++ effects |= nvme_known_nvm_effects(opcode); + if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC)) + dev_warn_once(ctrl->device, +- "IO command:%02x has unhandled effects:%08x\n", ++ "IO command:%02x has unusual effects:%08x\n", + opcode, effects); +- return 0; +- } + +- if (ctrl->effects) +- effects = le32_to_cpu(ctrl->effects->acs[opcode]); +- effects |= nvme_known_admin_effects(opcode); ++ /* ++ * NVME_CMD_EFFECTS_CSE_MASK causes a freeze all I/O queues, ++ * which would deadlock when done on an I/O command. Note that ++ * We already warn about an unusual effect above. ++ */ ++ effects &= ~NVME_CMD_EFFECTS_CSE_MASK; ++ } else { ++ if (ctrl->effects) ++ effects = le32_to_cpu(ctrl->effects->acs[opcode]); ++ effects |= nvme_known_admin_effects(opcode); ++ } + + return effects; + } +@@ -1354,6 +1374,8 @@ static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids, warn_str, cur->nidl); return -1; } @@ -230336,7 +279402,7 @@ index f8dd664b2eda5..92fe67bd24570 100644 memcpy(ids->eui64, data + sizeof(*cur), NVME_NIDT_EUI64_LEN); return NVME_NIDT_EUI64_LEN; case NVME_NIDT_NGUID: -@@ -1362,6 +1364,8 @@ static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids, +@@ -1362,6 +1384,8 @@ static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids, warn_str, cur->nidl); return -1; } @@ -230345,7 +279411,7 @@ index f8dd664b2eda5..92fe67bd24570 100644 memcpy(ids->nguid, data + sizeof(*cur), NVME_NIDT_NGUID_LEN); return NVME_NIDT_NGUID_LEN; case NVME_NIDT_UUID: -@@ -1370,6 +1374,8 @@ static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids, +@@ -1370,6 +1394,8 @@ static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids, warn_str, cur->nidl); return -1; } @@ -230354,7 +279420,7 @@ index f8dd664b2eda5..92fe67bd24570 100644 uuid_copy(&ids->uuid, data + sizeof(*cur)); return NVME_NIDT_UUID_LEN; case NVME_NIDT_CSI: -@@ -1466,12 +1472,18 @@ static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid, +@@ -1466,12 +1492,18 @@ static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid, if ((*id)->ncap == 0) /* namespace not allocated or attached */ goto out_free_id; @@ -230379,7 +279445,7 @@ index f8dd664b2eda5..92fe67bd24570 100644 return 0; -@@ -1674,13 +1686,6 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) +@@ -1674,13 +1706,6 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) blk_queue_max_write_zeroes_sectors(queue, UINT_MAX); } @@ -230393,7 +279459,7 @@ index f8dd664b2eda5..92fe67bd24570 100644 static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b) { return uuid_equal(&a->uuid, &b->uuid) && -@@ -1714,7 +1719,7 @@ static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns, +@@ -1714,7 +1739,7 @@ static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns, return 0; } @@ -230402,7 +279468,7 @@ index f8dd664b2eda5..92fe67bd24570 100644 { struct nvme_ctrl *ctrl = ns->ctrl; -@@ -1730,7 +1735,8 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) +@@ -1730,7 +1755,8 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) ns->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS); if (!ns->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) @@ -230412,7 +279478,7 @@ index f8dd664b2eda5..92fe67bd24570 100644 if (ctrl->ops->flags & NVME_F_FABRICS) { /* * The NVMe over Fabrics specification only supports metadata as -@@ -1738,10 +1744,21 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) +@@ -1738,10 +1764,21 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) * remap the separate metadata buffer from the block layer. */ if (WARN_ON_ONCE(!(id->flbas & NVME_NS_FLBAS_META_EXT))) @@ -230438,7 +279504,7 @@ index f8dd664b2eda5..92fe67bd24570 100644 } else { /* * For PCIe controllers, we can't easily remap the separate -@@ -1754,8 +1771,6 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) +@@ -1754,8 +1791,6 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) else ns->features |= NVME_NS_METADATA_SUPPORTED; } @@ -230447,7 +279513,7 @@ index f8dd664b2eda5..92fe67bd24570 100644 } static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, -@@ -1772,7 +1787,7 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, +@@ -1772,7 +1807,7 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); } blk_queue_virt_boundary(q, NVME_CTRL_PAGE_SIZE - 1); @@ -230456,7 +279522,7 @@ index f8dd664b2eda5..92fe67bd24570 100644 blk_queue_write_cache(q, vwc, vwc); } -@@ -1845,9 +1860,6 @@ static void nvme_update_disk_info(struct gendisk *disk, +@@ -1845,9 +1880,6 @@ static void nvme_update_disk_info(struct gendisk *disk, nvme_config_discard(disk, ns); blk_queue_max_write_zeroes_sectors(disk->queue, ns->ctrl->max_zeroes_sectors); @@ -230466,7 +279532,7 @@ index f8dd664b2eda5..92fe67bd24570 100644 } static inline bool nvme_first_scan(struct gendisk *disk) -@@ -1896,18 +1908,20 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) +@@ -1896,18 +1928,20 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) ns->lba_shift = id->lbaf[lbaf].ds; nvme_set_queue_limits(ns->ctrl, ns->queue); @@ -230492,7 +279558,7 @@ index f8dd664b2eda5..92fe67bd24570 100644 set_bit(NVME_NS_READY, &ns->flags); blk_mq_unfreeze_queue(ns->disk->queue); -@@ -1920,16 +1934,17 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) +@@ -1920,16 +1954,17 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) if (nvme_ns_head_multipath(ns->head)) { blk_mq_freeze_queue(ns->head->disk->queue); nvme_update_disk_info(ns->head->disk, ns, id); @@ -230513,7 +279579,7 @@ index f8dd664b2eda5..92fe67bd24570 100644 out: /* * If probing fails due an unsupported feature, hide the block device, -@@ -1937,6 +1952,7 @@ out: +@@ -1937,6 +1972,7 @@ out: */ if (ret == -ENODEV) { ns->disk->flags |= GENHD_FL_HIDDEN; @@ -230521,7 +279587,7 @@ index f8dd664b2eda5..92fe67bd24570 100644 ret = 0; } return ret; -@@ -2040,14 +2056,14 @@ static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new, +@@ -2040,14 +2076,14 @@ static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new, static int nvme_pr_clear(struct block_device *bdev, u64 key) { @@ -230539,7 +279605,7 @@ index f8dd664b2eda5..92fe67bd24570 100644 return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release); } -@@ -2095,6 +2111,7 @@ static int nvme_report_zones(struct gendisk *disk, sector_t sector, +@@ -2095,6 +2131,7 @@ static int nvme_report_zones(struct gendisk *disk, sector_t sector, static const struct block_device_operations nvme_bdev_ops = { .owner = THIS_MODULE, .ioctl = nvme_ioctl, @@ -230547,7 +279613,7 @@ index f8dd664b2eda5..92fe67bd24570 100644 .open = nvme_open, .release = nvme_release, .getgeo = nvme_getgeo, -@@ -2468,6 +2485,34 @@ static const struct nvme_core_quirk_entry core_quirks[] = { +@@ -2468,6 +2505,34 @@ static const struct nvme_core_quirk_entry core_quirks[] = { .vid = 0x14a4, .fr = "22301111", .quirks = NVME_QUIRK_SIMPLE_SUSPEND, @@ -230582,7 +279648,7 @@ index f8dd664b2eda5..92fe67bd24570 100644 } }; -@@ -2687,7 +2732,6 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) +@@ -2687,7 +2752,6 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) nvme_init_subnqn(subsys, ctrl, id); memcpy(subsys->serial, id->sn, sizeof(subsys->serial)); memcpy(subsys->model, id->mn, sizeof(subsys->model)); @@ -230590,16 +279656,41 @@ index f8dd664b2eda5..92fe67bd24570 100644 subsys->vendor_id = le16_to_cpu(id->vid); subsys->cmic = id->cmic; subsys->awupf = le16_to_cpu(id->awupf); -@@ -2894,6 +2938,8 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) +@@ -2830,7 +2894,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl) + + id = kzalloc(sizeof(*id), GFP_KERNEL); + if (!id) +- return 0; ++ return -ENOMEM; + + c.identify.opcode = nvme_admin_identify; + c.identify.cns = NVME_ID_CNS_CS_CTRL; +@@ -2877,10 +2941,6 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) + if (!ctrl->identified) { + unsigned int i; + +- ret = nvme_init_subsystem(ctrl, id); +- if (ret) +- goto out_free; +- + /* + * Check for quirks. Quirk can depend on firmware version, + * so, in principle, the set of quirks present can change +@@ -2893,7 +2953,13 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) + if (quirk_matches(id, &core_quirks[i])) ctrl->quirks |= core_quirks[i].quirks; } ++ ++ ret = nvme_init_subsystem(ctrl, id); ++ if (ret) ++ goto out_free; } + memcpy(ctrl->subsys->firmware_rev, id->fr, + sizeof(ctrl->subsys->firmware_rev)); if (force_apst && (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) { dev_warn(ctrl->device, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n"); -@@ -3025,10 +3071,6 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl) +@@ -3025,10 +3091,6 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl) if (ret) return ret; @@ -230610,7 +279701,7 @@ index f8dd664b2eda5..92fe67bd24570 100644 ret = nvme_configure_apst(ctrl); if (ret < 0) return ret; -@@ -3046,8 +3088,12 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl) +@@ -3046,8 +3108,12 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl) return ret; if (!ctrl->identified && !nvme_discovery_ctrl(ctrl)) { @@ -230624,7 +279715,7 @@ index f8dd664b2eda5..92fe67bd24570 100644 return ret; } -@@ -3179,8 +3225,8 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, +@@ -3179,8 +3245,8 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, * we have no UUID set */ if (uuid_is_null(&ids->uuid)) { @@ -230635,7 +279726,7 @@ index f8dd664b2eda5..92fe67bd24570 100644 return sysfs_emit(buf, "%pU\n", ids->nguid); } return sysfs_emit(buf, "%pU\n", &ids->uuid); -@@ -3517,15 +3563,20 @@ static const struct attribute_group *nvme_dev_attr_groups[] = { +@@ -3517,15 +3583,20 @@ static const struct attribute_group *nvme_dev_attr_groups[] = { NULL, }; @@ -230660,7 +279751,7 @@ index f8dd664b2eda5..92fe67bd24570 100644 continue; if (!list_empty(&h->list) && nvme_tryget_ns_head(h)) return h; -@@ -3534,16 +3585,24 @@ static struct nvme_ns_head *nvme_find_ns_head(struct nvme_subsystem *subsys, +@@ -3534,16 +3605,24 @@ static struct nvme_ns_head *nvme_find_ns_head(struct nvme_subsystem *subsys, return NULL; } @@ -230689,7 +279780,7 @@ index f8dd664b2eda5..92fe67bd24570 100644 return -EINVAL; } -@@ -3616,7 +3675,7 @@ static int nvme_add_ns_cdev(struct nvme_ns *ns) +@@ -3616,7 +3695,7 @@ static int nvme_add_ns_cdev(struct nvme_ns *ns) } static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, @@ -230698,7 +279789,7 @@ index f8dd664b2eda5..92fe67bd24570 100644 { struct nvme_ns_head *head; size_t size = sizeof(*head); -@@ -3640,15 +3699,9 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, +@@ -3640,15 +3719,9 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, head->subsys = ctrl->subsys; head->ns_id = nsid; head->ids = *ids; @@ -230715,7 +279806,7 @@ index f8dd664b2eda5..92fe67bd24570 100644 if (head->ids.csi) { ret = nvme_get_effects_log(ctrl, head->ids.csi, &head->effects); if (ret) -@@ -3685,14 +3738,19 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, +@@ -3685,14 +3758,19 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, int ret = 0; mutex_lock(&ctrl->subsys->lock); @@ -230738,7 +279829,16 @@ index f8dd664b2eda5..92fe67bd24570 100644 } else { ret = -EINVAL; if (!is_shared || !head->shared) { -@@ -4076,11 +4134,26 @@ static void nvme_scan_work(struct work_struct *work) +@@ -3862,7 +3940,7 @@ static void nvme_ns_remove(struct nvme_ns *ns) + mutex_unlock(&ns->ctrl->subsys->lock); + + /* guarantee not available in head->list */ +- synchronize_rcu(); ++ synchronize_srcu(&ns->head->srcu); + + /* wait for concurrent submissions */ + if (nvme_mpath_clear_current_path(ns)) +@@ -4076,11 +4154,26 @@ static void nvme_scan_work(struct work_struct *work) { struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, scan_work); @@ -230765,7 +279865,7 @@ index f8dd664b2eda5..92fe67bd24570 100644 if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) { dev_info(ctrl->device, "rescanning namespaces.\n"); nvme_clear_changed_ns_log(ctrl); -@@ -4187,7 +4260,14 @@ static void nvme_async_event_work(struct work_struct *work) +@@ -4187,7 +4280,14 @@ static void nvme_async_event_work(struct work_struct *work) container_of(work, struct nvme_ctrl, async_event_work); nvme_aen_uevent(ctrl); @@ -230781,7 +279881,7 @@ index f8dd664b2eda5..92fe67bd24570 100644 } static bool nvme_ctrl_pp_status(struct nvme_ctrl *ctrl) -@@ -4319,6 +4399,8 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl) +@@ -4319,6 +4419,8 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl) nvme_stop_failfast_work(ctrl); flush_work(&ctrl->async_event_work); cancel_work_sync(&ctrl->fw_act_work); @@ -230790,7 +279890,7 @@ index f8dd664b2eda5..92fe67bd24570 100644 } EXPORT_SYMBOL_GPL(nvme_stop_ctrl); -@@ -4331,6 +4413,7 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl) +@@ -4331,6 +4433,7 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl) if (ctrl->queue_count > 1) { nvme_queue_scan(ctrl); nvme_start_queues(ctrl); @@ -230911,8 +280011,30 @@ index 0a586d7129201..9e6e56c20ec99 100644 kfree(data); } } +diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c +index 22314962842da..7397fad4c96ff 100644 +--- a/drivers/nvme/host/ioctl.c ++++ b/drivers/nvme/host/ioctl.c +@@ -484,11 +484,17 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd, + case NVME_IOCTL_IO_CMD: + return nvme_dev_user_cmd(ctrl, argp); + case NVME_IOCTL_RESET: ++ if (!capable(CAP_SYS_ADMIN)) ++ return -EACCES; + dev_warn(ctrl->device, "resetting controller\n"); + return nvme_reset_ctrl_sync(ctrl); + case NVME_IOCTL_SUBSYS_RESET: ++ if (!capable(CAP_SYS_ADMIN)) ++ return -EACCES; + return nvme_reset_subsystem(ctrl); + case NVME_IOCTL_RESCAN: ++ if (!capable(CAP_SYS_ADMIN)) ++ return -EACCES; + nvme_queue_scan(ctrl); + return 0; + default: diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c -index fba06618c6c23..36b48e2ff642f 100644 +index fba06618c6c23..8d97b942de01f 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -138,13 +138,12 @@ void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl) @@ -230933,7 +280055,18 @@ index fba06618c6c23..36b48e2ff642f 100644 } void nvme_mpath_revalidate_paths(struct nvme_ns *ns) -@@ -160,6 +159,7 @@ void nvme_mpath_revalidate_paths(struct nvme_ns *ns) +@@ -152,14 +151,18 @@ void nvme_mpath_revalidate_paths(struct nvme_ns *ns) + struct nvme_ns_head *head = ns->head; + sector_t capacity = get_capacity(head->disk); + int node; ++ int srcu_idx; + ++ srcu_idx = srcu_read_lock(&head->srcu); + list_for_each_entry_rcu(ns, &head->list, siblings) { + if (capacity != get_capacity(ns->disk)) + clear_bit(NVME_NS_READY, &ns->flags); + } ++ srcu_read_unlock(&head->srcu, srcu_idx); for_each_node(node) rcu_assign_pointer(head->current_path[node], NULL); @@ -230941,7 +280074,16 @@ index fba06618c6c23..36b48e2ff642f 100644 } static bool nvme_path_is_disabled(struct nvme_ns *ns) -@@ -389,6 +389,7 @@ const struct block_device_operations nvme_ns_head_ops = { +@@ -326,6 +329,8 @@ static blk_qc_t nvme_ns_head_submit_bio(struct bio *bio) + * pool from the original queue to allocate the bvecs from. + */ + blk_queue_split(&bio); ++ if (!bio) ++ return BLK_QC_T_NONE; + + srcu_idx = srcu_read_lock(&head->srcu); + ns = nvme_find_path(head); +@@ -389,6 +394,7 @@ const struct block_device_operations nvme_ns_head_ops = { .open = nvme_ns_head_open, .release = nvme_ns_head_release, .ioctl = nvme_ns_head_ioctl, @@ -230949,7 +280091,7 @@ index fba06618c6c23..36b48e2ff642f 100644 .getgeo = nvme_getgeo, .report_zones = nvme_ns_head_report_zones, .pr_ops = &nvme_pr_ops, -@@ -463,10 +464,11 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) +@@ -463,10 +469,11 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) /* * Add a multipath node if the subsystems supports multiple controllers. @@ -230964,7 +280106,7 @@ index fba06618c6c23..36b48e2ff642f 100644 return 0; head->disk = blk_alloc_disk(ctrl->numa_node); -@@ -574,8 +576,17 @@ static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc, +@@ -574,8 +581,17 @@ static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc, ns->ana_grpid = le32_to_cpu(desc->grpid); ns->ana_state = desc->state; clear_bit(NVME_NS_ANA_PENDING, &ns->flags); @@ -230984,7 +280126,7 @@ index fba06618c6c23..36b48e2ff642f 100644 nvme_mpath_set_live(ns); } -@@ -662,6 +673,18 @@ static void nvme_ana_work(struct work_struct *work) +@@ -662,6 +678,18 @@ static void nvme_ana_work(struct work_struct *work) nvme_read_ana_log(ctrl); } @@ -231003,7 +280145,7 @@ index fba06618c6c23..36b48e2ff642f 100644 static void nvme_anatt_timeout(struct timer_list *t) { struct nvme_ctrl *ctrl = from_timer(ctrl, t, anatt_timer); -@@ -793,7 +816,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) +@@ -793,7 +821,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) { if (!head->disk) return; @@ -231013,7 +280155,7 @@ index fba06618c6c23..36b48e2ff642f 100644 kblockd_schedule_work(&head->requeue_work); flush_work(&head->requeue_work); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h -index ed79a6c7e8043..75a7e7baa1fc6 100644 +index ed79a6c7e8043..39ca48babbe82 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -144,6 +144,11 @@ enum nvme_quirks { @@ -231036,7 +280178,40 @@ index ed79a6c7e8043..75a7e7baa1fc6 100644 int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size); }; -@@ -693,6 +699,25 @@ static inline bool nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq, +@@ -552,11 +558,23 @@ static inline void nvme_fault_inject_fini(struct nvme_fault_inject *fault_inj) + static inline void nvme_should_fail(struct request *req) {} + #endif + ++bool nvme_wait_reset(struct nvme_ctrl *ctrl); ++int nvme_try_sched_reset(struct nvme_ctrl *ctrl); ++ + static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl) + { ++ int ret; ++ + if (!ctrl->subsystem) + return -ENOTTY; +- return ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65); ++ if (!nvme_wait_reset(ctrl)) ++ return -EBUSY; ++ ++ ret = ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65); ++ if (ret) ++ return ret; ++ ++ return nvme_try_sched_reset(ctrl); + } + + /* +@@ -644,7 +662,6 @@ void nvme_cancel_tagset(struct nvme_ctrl *ctrl); + void nvme_cancel_admin_tagset(struct nvme_ctrl *ctrl); + bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, + enum nvme_ctrl_state new_state); +-bool nvme_wait_reset(struct nvme_ctrl *ctrl); + int nvme_disable_ctrl(struct nvme_ctrl *ctrl); + int nvme_enable_ctrl(struct nvme_ctrl *ctrl); + int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl); +@@ -693,6 +710,25 @@ static inline bool nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq, return true; return __nvme_check_ready(ctrl, rq, queue_live); } @@ -231062,7 +280237,15 @@ index ed79a6c7e8043..75a7e7baa1fc6 100644 int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, void *buf, unsigned bufflen); int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, -@@ -752,6 +777,7 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id); +@@ -709,7 +745,6 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); + void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); + int nvme_reset_ctrl(struct nvme_ctrl *ctrl); + int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl); +-int nvme_try_sched_reset(struct nvme_ctrl *ctrl); + int nvme_delete_ctrl(struct nvme_ctrl *ctrl); + void nvme_queue_scan(struct nvme_ctrl *ctrl); + int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi, +@@ -752,6 +787,7 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id); void nvme_mpath_remove_disk(struct nvme_ns_head *head); int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id); void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl); @@ -231070,7 +280253,16 @@ index ed79a6c7e8043..75a7e7baa1fc6 100644 void nvme_mpath_uninit(struct nvme_ctrl *ctrl); void nvme_mpath_stop(struct nvme_ctrl *ctrl); bool nvme_mpath_clear_current_path(struct nvme_ns *ns); -@@ -826,6 +852,9 @@ static inline int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, +@@ -763,7 +799,7 @@ static inline void nvme_trace_bio_complete(struct request *req) + { + struct nvme_ns *ns = req->q->queuedata; + +- if (req->cmd_flags & REQ_NVME_MPATH) ++ if ((req->cmd_flags & REQ_NVME_MPATH) && req->bio) + trace_block_bio_complete(ns->head->disk->queue, req->bio); + } + +@@ -826,6 +862,9 @@ static inline int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, "Please enable CONFIG_NVME_MULTIPATH for full support of multi-port devices.\n"); return 0; } @@ -231081,10 +280273,131 @@ index ed79a6c7e8043..75a7e7baa1fc6 100644 { } diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c -index 149ecf73df384..e9f3701dda3fd 100644 +index 149ecf73df384..00552cd02d732 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c -@@ -1680,6 +1680,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) +@@ -33,7 +33,7 @@ + #define SQ_SIZE(q) ((q)->q_depth << (q)->sqes) + #define CQ_SIZE(q) ((q)->q_depth * sizeof(struct nvme_completion)) + +-#define SGES_PER_PAGE (PAGE_SIZE / sizeof(struct nvme_sgl_desc)) ++#define SGES_PER_PAGE (NVME_CTRL_PAGE_SIZE / sizeof(struct nvme_sgl_desc)) + + /* + * These can be higher, but we need to ensure that any command doesn't +@@ -142,9 +142,9 @@ struct nvme_dev { + mempool_t *iod_mempool; + + /* shadow doorbell buffer support: */ +- u32 *dbbuf_dbs; ++ __le32 *dbbuf_dbs; + dma_addr_t dbbuf_dbs_dma_addr; +- u32 *dbbuf_eis; ++ __le32 *dbbuf_eis; + dma_addr_t dbbuf_eis_dma_addr; + + /* host memory buffer support: */ +@@ -208,10 +208,10 @@ struct nvme_queue { + #define NVMEQ_SQ_CMB 1 + #define NVMEQ_DELETE_ERROR 2 + #define NVMEQ_POLLED 3 +- u32 *dbbuf_sq_db; +- u32 *dbbuf_cq_db; +- u32 *dbbuf_sq_ei; +- u32 *dbbuf_cq_ei; ++ __le32 *dbbuf_sq_db; ++ __le32 *dbbuf_cq_db; ++ __le32 *dbbuf_sq_ei; ++ __le32 *dbbuf_cq_ei; + struct completion delete_done; + }; + +@@ -332,11 +332,11 @@ static inline int nvme_dbbuf_need_event(u16 event_idx, u16 new_idx, u16 old) + } + + /* Update dbbuf and return true if an MMIO is required */ +-static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db, +- volatile u32 *dbbuf_ei) ++static bool nvme_dbbuf_update_and_check_event(u16 value, __le32 *dbbuf_db, ++ volatile __le32 *dbbuf_ei) + { + if (dbbuf_db) { +- u16 old_value; ++ u16 old_value, event_idx; + + /* + * Ensure that the queue is written before updating +@@ -344,8 +344,8 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db, + */ + wmb(); + +- old_value = *dbbuf_db; +- *dbbuf_db = value; ++ old_value = le32_to_cpu(*dbbuf_db); ++ *dbbuf_db = cpu_to_le32(value); + + /* + * Ensure that the doorbell is updated before reading the event +@@ -355,7 +355,8 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db, + */ + mb(); + +- if (!nvme_dbbuf_need_event(*dbbuf_ei, value, old_value)) ++ event_idx = le32_to_cpu(*dbbuf_ei); ++ if (!nvme_dbbuf_need_event(event_idx, value, old_value)) + return false; + } + +@@ -369,9 +370,9 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db, + */ + static int nvme_pci_npages_prp(void) + { +- unsigned nprps = DIV_ROUND_UP(NVME_MAX_KB_SZ + NVME_CTRL_PAGE_SIZE, +- NVME_CTRL_PAGE_SIZE); +- return DIV_ROUND_UP(8 * nprps, PAGE_SIZE - 8); ++ unsigned max_bytes = (NVME_MAX_KB_SZ * 1024) + NVME_CTRL_PAGE_SIZE; ++ unsigned nprps = DIV_ROUND_UP(max_bytes, NVME_CTRL_PAGE_SIZE); ++ return DIV_ROUND_UP(8 * nprps, NVME_CTRL_PAGE_SIZE - 8); + } + + /* +@@ -381,7 +382,7 @@ static int nvme_pci_npages_prp(void) + static int nvme_pci_npages_sgl(void) + { + return DIV_ROUND_UP(NVME_MAX_SEGS * sizeof(struct nvme_sgl_desc), +- PAGE_SIZE); ++ NVME_CTRL_PAGE_SIZE); + } + + static size_t nvme_pci_iod_alloc_size(void) +@@ -731,7 +732,7 @@ static void nvme_pci_sgl_set_seg(struct nvme_sgl_desc *sge, + sge->length = cpu_to_le32(entries * sizeof(*sge)); + sge->type = NVME_SGL_FMT_LAST_SEG_DESC << 4; + } else { +- sge->length = cpu_to_le32(PAGE_SIZE); ++ sge->length = cpu_to_le32(NVME_CTRL_PAGE_SIZE); + sge->type = NVME_SGL_FMT_SEG_DESC << 4; + } + } +@@ -814,6 +815,8 @@ static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev, + cmnd->dptr.prp1 = cpu_to_le64(iod->first_dma); + if (bv->bv_len > first_prp_len) + cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma + first_prp_len); ++ else ++ cmnd->dptr.prp2 = 0; + return BLK_STS_OK; + } + +@@ -1277,7 +1280,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) + else + nvme_poll_irqdisable(nvmeq); + +- if (blk_mq_request_completed(req)) { ++ if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT) { + dev_warn(dev->ctrl.device, + "I/O %d QID %d timeout, completion polled\n", + req->tag, nvmeq->qid); +@@ -1680,6 +1683,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset); if (IS_ERR(dev->ctrl.admin_q)) { blk_mq_free_tag_set(&dev->admin_tagset); @@ -231092,7 +280405,7 @@ index 149ecf73df384..e9f3701dda3fd 100644 return -ENOMEM; } if (!blk_get_queue(dev->ctrl.admin_q)) { -@@ -2731,6 +2732,8 @@ static void nvme_reset_work(struct work_struct *work) +@@ -2731,6 +2735,8 @@ static void nvme_reset_work(struct work_struct *work) if (result) goto out_unlock; @@ -231101,7 +280414,7 @@ index 149ecf73df384..e9f3701dda3fd 100644 /* * Limit the max command size to prevent iod->sg allocations going * over a single page. -@@ -2743,7 +2746,6 @@ static void nvme_reset_work(struct work_struct *work) +@@ -2743,7 +2749,6 @@ static void nvme_reset_work(struct work_struct *work) * Don't limit the IOMMU merged segment size. */ dma_set_max_seg_size(dev->dev, 0xffffffff); @@ -231109,7 +280422,7 @@ index 149ecf73df384..e9f3701dda3fd 100644 mutex_unlock(&dev->shutdown_lock); -@@ -3300,7 +3302,8 @@ static const struct pci_device_id nvme_id_table[] = { +@@ -3300,7 +3305,8 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_DEALLOCATE_ZEROES, }, { PCI_VDEVICE(INTEL, 0x0a54), /* Intel P4500/P4600 */ .driver_data = NVME_QUIRK_STRIPE_SIZE | @@ -231119,7 +280432,7 @@ index 149ecf73df384..e9f3701dda3fd 100644 { PCI_VDEVICE(INTEL, 0x0a55), /* Dell Express Flash P4600 */ .driver_data = NVME_QUIRK_STRIPE_SIZE | NVME_QUIRK_DEALLOCATE_ZEROES, }, -@@ -3313,7 +3316,10 @@ static const struct pci_device_id nvme_id_table[] = { +@@ -3313,7 +3319,10 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ .driver_data = NVME_QUIRK_IDENTIFY_CNS | @@ -231131,7 +280444,7 @@ index 149ecf73df384..e9f3701dda3fd 100644 { PCI_DEVICE(0x126f, 0x2263), /* Silicon Motion unidentified */ .driver_data = NVME_QUIRK_NO_NS_DESC_LIST, }, { PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */ -@@ -3332,12 +3338,16 @@ static const struct pci_device_id nvme_id_table[] = { +@@ -3332,15 +3341,23 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_DISABLE_WRITE_ZEROES| NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x1987, 0x5016), /* Phison E16 */ @@ -231150,10 +280463,31 @@ index 149ecf73df384..e9f3701dda3fd 100644 { PCI_DEVICE(0x1cc1, 0x8201), /* ADATA SX8200PNP 512GB */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS | NVME_QUIRK_IGNORE_DEV_SUBNQN, }, -@@ -3351,6 +3361,10 @@ static const struct pci_device_id nvme_id_table[] = { ++ { PCI_DEVICE(0x1344, 0x5407), /* Micron Technology Inc NVMe SSD */ ++ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN }, ++ { PCI_DEVICE(0x1344, 0x6001), /* Micron Nitro NVMe */ ++ .driver_data = NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(0x1c5c, 0x1504), /* SK Hynix PC400 */ + .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, + { PCI_DEVICE(0x15b7, 0x2001), /* Sandisk Skyhawk */ +@@ -3351,6 +3368,24 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, { PCI_DEVICE(0x2646, 0x2263), /* KINGSTON A2000 NVMe SSD */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, ++ { PCI_DEVICE(0x2646, 0x5018), /* KINGSTON OM8SFP4xxxxP OS21012 NVMe SSD */ ++ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, ++ { PCI_DEVICE(0x2646, 0x5016), /* KINGSTON OM3PGP4xxxxP OS21011 NVMe SSD */ ++ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, ++ { PCI_DEVICE(0x2646, 0x501A), /* KINGSTON OM8PGP4xxxxP OS21005 NVMe SSD */ ++ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, ++ { PCI_DEVICE(0x2646, 0x501B), /* KINGSTON OM8PGP4xxxxQ OS21005 NVMe SSD */ ++ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, ++ { PCI_DEVICE(0x2646, 0x501E), /* KINGSTON OM3PGP4xxxxQ OS21011 NVMe SSD */ ++ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, ++ { PCI_DEVICE(0x1f40, 0x5236), /* Netac Technologies Co. NV7000 NVMe SSD */ ++ .driver_data = NVME_QUIRK_BOGUS_NID, }, ++ { PCI_DEVICE(0x1e4B, 0x1001), /* MAXIO MAP1001 */ ++ .driver_data = NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(0x1e4B, 0x1002), /* MAXIO MAP1002 */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(0x1e4B, 0x1202), /* MAXIO MAP1202 */ @@ -231161,7 +280495,7 @@ index 149ecf73df384..e9f3701dda3fd 100644 { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061), .driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, }, { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0065), -@@ -3371,7 +3385,6 @@ static const struct pci_device_id nvme_id_table[] = { +@@ -3371,7 +3406,6 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_128_BYTES_SQES | NVME_QUIRK_SHARED_TAGS | NVME_QUIRK_SKIP_CID_GEN }, @@ -231402,10 +280736,56 @@ index 35bac7a254227..aa8b0f86b2be1 100644 __entry->retries = nvme_req(req)->retries; __entry->flags = nvme_req(req)->flags; diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c -index aa6d84d8848e7..52bb262d267ac 100644 +index aa6d84d8848e7..bf78c58ed41d4 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c -@@ -978,7 +978,7 @@ void nvmet_execute_async_event(struct nvmet_req *req) +@@ -164,26 +164,29 @@ out: + + static void nvmet_get_cmd_effects_nvm(struct nvme_effects_log *log) + { +- log->acs[nvme_admin_get_log_page] = cpu_to_le32(1 << 0); +- log->acs[nvme_admin_identify] = cpu_to_le32(1 << 0); +- log->acs[nvme_admin_abort_cmd] = cpu_to_le32(1 << 0); +- log->acs[nvme_admin_set_features] = cpu_to_le32(1 << 0); +- log->acs[nvme_admin_get_features] = cpu_to_le32(1 << 0); +- log->acs[nvme_admin_async_event] = cpu_to_le32(1 << 0); +- log->acs[nvme_admin_keep_alive] = cpu_to_le32(1 << 0); +- +- log->iocs[nvme_cmd_read] = cpu_to_le32(1 << 0); +- log->iocs[nvme_cmd_write] = cpu_to_le32(1 << 0); +- log->iocs[nvme_cmd_flush] = cpu_to_le32(1 << 0); +- log->iocs[nvme_cmd_dsm] = cpu_to_le32(1 << 0); +- log->iocs[nvme_cmd_write_zeroes] = cpu_to_le32(1 << 0); ++ log->acs[nvme_admin_get_log_page] = ++ log->acs[nvme_admin_identify] = ++ log->acs[nvme_admin_abort_cmd] = ++ log->acs[nvme_admin_set_features] = ++ log->acs[nvme_admin_get_features] = ++ log->acs[nvme_admin_async_event] = ++ log->acs[nvme_admin_keep_alive] = ++ cpu_to_le32(NVME_CMD_EFFECTS_CSUPP); ++ ++ log->iocs[nvme_cmd_read] = ++ log->iocs[nvme_cmd_write] = ++ log->iocs[nvme_cmd_flush] = ++ log->iocs[nvme_cmd_dsm] = ++ log->iocs[nvme_cmd_write_zeroes] = ++ cpu_to_le32(NVME_CMD_EFFECTS_CSUPP); + } + + static void nvmet_get_cmd_effects_zns(struct nvme_effects_log *log) + { +- log->iocs[nvme_cmd_zone_append] = cpu_to_le32(1 << 0); +- log->iocs[nvme_cmd_zone_mgmt_send] = cpu_to_le32(1 << 0); +- log->iocs[nvme_cmd_zone_mgmt_recv] = cpu_to_le32(1 << 0); ++ log->iocs[nvme_cmd_zone_append] = ++ log->iocs[nvme_cmd_zone_mgmt_send] = ++ log->iocs[nvme_cmd_zone_mgmt_recv] = ++ cpu_to_le32(NVME_CMD_EFFECTS_CSUPP); + } + + static void nvmet_execute_get_log_cmd_effects_ns(struct nvmet_req *req) +@@ -978,7 +981,7 @@ void nvmet_execute_async_event(struct nvmet_req *req) ctrl->async_event_cmds[ctrl->nr_async_event_cmds++] = req; mutex_unlock(&ctrl->lock); @@ -231415,10 +280795,32 @@ index aa6d84d8848e7..52bb262d267ac 100644 void nvmet_execute_keep_alive(struct nvmet_req *req) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c -index be5d82421e3a4..cea30e4f50533 100644 +index be5d82421e3a4..625038057a762 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c -@@ -1553,6 +1553,8 @@ static void nvmet_port_release(struct config_item *item) +@@ -1189,6 +1189,7 @@ static ssize_t nvmet_subsys_attr_model_store_locked(struct nvmet_subsys *subsys, + const char *page, size_t count) + { + int pos = 0, len; ++ char *val; + + if (subsys->subsys_discovered) { + pr_err("Can't set model number. %s is already assigned\n", +@@ -1211,9 +1212,11 @@ static ssize_t nvmet_subsys_attr_model_store_locked(struct nvmet_subsys *subsys, + return -EINVAL; + } + +- subsys->model_number = kmemdup_nul(page, len, GFP_KERNEL); +- if (!subsys->model_number) ++ val = kmemdup_nul(page, len, GFP_KERNEL); ++ if (!val) + return -ENOMEM; ++ kfree(subsys->model_number); ++ subsys->model_number = val; + return count; + } + +@@ -1553,6 +1556,8 @@ static void nvmet_port_release(struct config_item *item) { struct nvmet_port *port = to_nvmet_port(item); @@ -231428,10 +280830,16 @@ index be5d82421e3a4..cea30e4f50533 100644 kfree(port->ana_state); diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c -index b8425fa34300f..87a347248c38f 100644 +index b8425fa34300f..cfd0385511564 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c -@@ -20,6 +20,9 @@ struct workqueue_struct *zbd_wq; +@@ -15,11 +15,15 @@ + + #include "nvmet.h" + ++struct kmem_cache *nvmet_bvec_cache; + struct workqueue_struct *buffered_io_wq; + struct workqueue_struct *zbd_wq; static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; static DEFINE_IDA(cntlid_ida); @@ -231441,7 +280849,7 @@ index b8425fa34300f..87a347248c38f 100644 /* * This read/write semaphore is used to synchronize access to configuration * information on a target system that will result in discovery log page -@@ -205,7 +208,7 @@ void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, +@@ -205,7 +209,7 @@ void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, list_add_tail(&aen->entry, &ctrl->async_events); mutex_unlock(&ctrl->lock); @@ -231450,7 +280858,7 @@ index b8425fa34300f..87a347248c38f 100644 } static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid) -@@ -385,7 +388,7 @@ static void nvmet_keep_alive_timer(struct work_struct *work) +@@ -385,7 +389,7 @@ static void nvmet_keep_alive_timer(struct work_struct *work) if (reset_tbkas) { pr_debug("ctrl %d reschedule traffic based keep-alive timer\n", ctrl->cntlid); @@ -231459,7 +280867,7 @@ index b8425fa34300f..87a347248c38f 100644 return; } -@@ -403,7 +406,7 @@ void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl) +@@ -403,7 +407,7 @@ void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl) pr_debug("ctrl %d start keep-alive timer for %d secs\n", ctrl->cntlid, ctrl->kato); @@ -231468,7 +280876,7 @@ index b8425fa34300f..87a347248c38f 100644 } void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl) -@@ -733,6 +736,8 @@ static void nvmet_set_error(struct nvmet_req *req, u16 status) +@@ -733,6 +737,8 @@ static void nvmet_set_error(struct nvmet_req *req, u16 status) static void __nvmet_req_complete(struct nvmet_req *req, u16 status) { @@ -231477,7 +280885,7 @@ index b8425fa34300f..87a347248c38f 100644 if (!req->sq->sqhd_disabled) nvmet_update_sq_head(req); req->cqe->sq_id = cpu_to_le16(req->sq->qid); -@@ -743,9 +748,9 @@ static void __nvmet_req_complete(struct nvmet_req *req, u16 status) +@@ -743,9 +749,9 @@ static void __nvmet_req_complete(struct nvmet_req *req, u16 status) trace_nvmet_req_complete(req); @@ -231489,7 +280897,7 @@ index b8425fa34300f..87a347248c38f 100644 } void nvmet_req_complete(struct nvmet_req *req, u16 status) -@@ -1163,7 +1168,7 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl) +@@ -1163,7 +1169,7 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl) * reset the keep alive timer when the controller is enabled. */ if (ctrl->kato) @@ -231498,7 +280906,7 @@ index b8425fa34300f..87a347248c38f 100644 } static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl) -@@ -1477,7 +1482,7 @@ void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl) +@@ -1477,7 +1483,7 @@ void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl) mutex_lock(&ctrl->lock); if (!(ctrl->csts & NVME_CSTS_CFS)) { ctrl->csts |= NVME_CSTS_CFS; @@ -231507,16 +280915,38 @@ index b8425fa34300f..87a347248c38f 100644 } mutex_unlock(&ctrl->lock); } -@@ -1617,9 +1622,15 @@ static int __init nvmet_init(void) - goto out_free_zbd_work_queue; - } +@@ -1602,24 +1608,32 @@ void nvmet_subsys_put(struct nvmet_subsys *subsys) + + static int __init nvmet_init(void) + { +- int error; ++ int error = -ENOMEM; + nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1; + ++ nvmet_bvec_cache = kmem_cache_create("nvmet-bvec", ++ NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec), 0, ++ SLAB_HWCACHE_ALIGN, NULL); ++ if (!nvmet_bvec_cache) ++ return -ENOMEM; ++ + zbd_wq = alloc_workqueue("nvmet-zbd-wq", WQ_MEM_RECLAIM, 0); + if (!zbd_wq) +- return -ENOMEM; ++ goto out_destroy_bvec_cache; + + buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq", + WQ_MEM_RECLAIM, 0); +- if (!buffered_io_wq) { +- error = -ENOMEM; ++ if (!buffered_io_wq) + goto out_free_zbd_work_queue; +- } ++ + nvmet_wq = alloc_workqueue("nvmet-wq", WQ_MEM_RECLAIM, 0); -+ if (!nvmet_wq) { -+ error = -ENOMEM; ++ if (!nvmet_wq) + goto out_free_buffered_work_queue; -+ } -+ + error = nvmet_init_discovery(); if (error) - goto out_free_work_queue; @@ -231524,7 +280954,7 @@ index b8425fa34300f..87a347248c38f 100644 error = nvmet_init_configfs(); if (error) -@@ -1628,7 +1639,9 @@ static int __init nvmet_init(void) +@@ -1628,10 +1642,14 @@ static int __init nvmet_init(void) out_exit_discovery: nvmet_exit_discovery(); @@ -231535,16 +280965,24 @@ index b8425fa34300f..87a347248c38f 100644 destroy_workqueue(buffered_io_wq); out_free_zbd_work_queue: destroy_workqueue(zbd_wq); -@@ -1640,6 +1653,7 @@ static void __exit nvmet_exit(void) ++out_destroy_bvec_cache: ++ kmem_cache_destroy(nvmet_bvec_cache); + return error; + } + +@@ -1640,8 +1658,10 @@ static void __exit nvmet_exit(void) nvmet_exit_configfs(); nvmet_exit_discovery(); ida_destroy(&cntlid_ida); + destroy_workqueue(nvmet_wq); destroy_workqueue(buffered_io_wq); destroy_workqueue(zbd_wq); ++ kmem_cache_destroy(nvmet_bvec_cache); + BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024); + BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024); diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c -index 22b5108168a6a..c43bc5e1c7a28 100644 +index 22b5108168a6a..00a2a591f5c1f 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1491,7 +1491,7 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport) @@ -231574,7 +281012,19 @@ index 22b5108168a6a..c43bc5e1c7a28 100644 /* already deleting - release local reference */ nvmet_fc_tgt_a_put(assoc); return; -@@ -2060,7 +2060,7 @@ nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *target_port, +@@ -1685,8 +1685,10 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport, + else { + queue = nvmet_fc_alloc_target_queue(iod->assoc, 0, + be16_to_cpu(rqst->assoc_cmd.sqsize)); +- if (!queue) ++ if (!queue) { + ret = VERR_QUEUE_ALLOC_FAIL; ++ nvmet_fc_tgt_a_put(iod->assoc); ++ } + } + } + +@@ -2060,7 +2062,7 @@ nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *target_port, iod->rqstdatalen = lsreqbuf_len; iod->hosthandle = hosthandle; @@ -231660,18 +281110,60 @@ index 54606f1872b4a..5c16372f3b533 100644 /* * as the io has already had the done callback made, diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c -index 1dd1a0fe2e819..228871d48106b 100644 +index 1dd1a0fe2e819..eadba13b276de 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c -@@ -8,6 +8,7 @@ +@@ -8,9 +8,9 @@ #include <linux/uio.h> #include <linux/falloc.h> #include <linux/file.h> +#include <linux/fs.h> #include "nvmet.h" - #define NVMET_MAX_MPOOL_BVEC 16 -@@ -266,7 +267,8 @@ static void nvmet_file_execute_rw(struct nvmet_req *req) +-#define NVMET_MAX_MPOOL_BVEC 16 + #define NVMET_MIN_MPOOL_OBJ 16 + + int nvmet_file_ns_revalidate(struct nvmet_ns *ns) +@@ -32,8 +32,6 @@ void nvmet_file_ns_disable(struct nvmet_ns *ns) + flush_workqueue(buffered_io_wq); + mempool_destroy(ns->bvec_pool); + ns->bvec_pool = NULL; +- kmem_cache_destroy(ns->bvec_cache); +- ns->bvec_cache = NULL; + fput(ns->file); + ns->file = NULL; + } +@@ -67,16 +65,8 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns) + ns->blksize_shift = min_t(u8, + file_inode(ns->file)->i_blkbits, 12); + +- ns->bvec_cache = kmem_cache_create("nvmet-bvec", +- NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec), +- 0, SLAB_HWCACHE_ALIGN, NULL); +- if (!ns->bvec_cache) { +- ret = -ENOMEM; +- goto err; +- } +- + ns->bvec_pool = mempool_create(NVMET_MIN_MPOOL_OBJ, mempool_alloc_slab, +- mempool_free_slab, ns->bvec_cache); ++ mempool_free_slab, nvmet_bvec_cache); + + if (!ns->bvec_pool) { + ret = -ENOMEM; +@@ -85,9 +75,10 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns) + + return ret; + err: ++ fput(ns->file); ++ ns->file = NULL; + ns->size = 0; + ns->blksize_shift = 0; +- nvmet_file_ns_disable(ns); + return ret; + } + +@@ -266,7 +257,8 @@ static void nvmet_file_execute_rw(struct nvmet_req *req) if (req->ns->buffered_io) { if (likely(!req->f.mpool_alloc) && @@ -231681,7 +281173,7 @@ index 1dd1a0fe2e819..228871d48106b 100644 return; nvmet_file_submit_buffered_io(req); } else -@@ -290,7 +292,7 @@ static void nvmet_file_execute_flush(struct nvmet_req *req) +@@ -290,7 +282,7 @@ static void nvmet_file_execute_flush(struct nvmet_req *req) if (!nvmet_check_transfer_len(req, 0)) return; INIT_WORK(&req->f.work, nvmet_file_flush_work); @@ -231690,7 +281182,7 @@ index 1dd1a0fe2e819..228871d48106b 100644 } static void nvmet_file_execute_discard(struct nvmet_req *req) -@@ -350,7 +352,7 @@ static void nvmet_file_execute_dsm(struct nvmet_req *req) +@@ -350,7 +342,7 @@ static void nvmet_file_execute_dsm(struct nvmet_req *req) if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req))) return; INIT_WORK(&req->f.work, nvmet_file_dsm_work); @@ -231699,7 +281191,7 @@ index 1dd1a0fe2e819..228871d48106b 100644 } static void nvmet_file_write_zeroes_work(struct work_struct *w) -@@ -380,7 +382,7 @@ static void nvmet_file_execute_write_zeroes(struct nvmet_req *req) +@@ -380,7 +372,7 @@ static void nvmet_file_execute_write_zeroes(struct nvmet_req *req) if (!nvmet_check_transfer_len(req, 0)) return; INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work); @@ -231731,11 +281223,23 @@ index 0285ccc7541f6..2553f487c9f24 100644 static int nvme_loop_init_iod(struct nvme_loop_ctrl *ctrl, diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h -index 7143c7fa74641..dbeb0b8c11947 100644 +index 7143c7fa74641..fdb06a9d430d2 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h -@@ -365,6 +365,7 @@ struct nvmet_req { +@@ -77,7 +77,6 @@ struct nvmet_ns { + + struct completion disable_done; + mempool_t *bvec_pool; +- struct kmem_cache *bvec_cache; + + int use_p2pmem; + struct pci_dev *p2p_dev; +@@ -363,8 +362,11 @@ struct nvmet_req { + u64 error_slba; + }; ++#define NVMET_MAX_MPOOL_BVEC 16 ++extern struct kmem_cache *nvmet_bvec_cache; extern struct workqueue_struct *buffered_io_wq; extern struct workqueue_struct *zbd_wq; +extern struct workqueue_struct *nvmet_wq; @@ -231743,11 +281247,26 @@ index 7143c7fa74641..dbeb0b8c11947 100644 static inline void nvmet_set_result(struct nvmet_req *req, u32 result) { diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c -index f0efb35379898..6220e1dd961ad 100644 +index f0efb35379898..9b5929754195b 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c -@@ -281,7 +281,7 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req) - if (req->p.use_workqueue || effects) { +@@ -271,17 +271,16 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req) + } + + /* +- * If there are effects for the command we are about to execute, or +- * an end_req function we need to use nvme_execute_passthru_rq() +- * synchronously in a work item seeing the end_req function and +- * nvme_passthru_end() can't be called in the request done callback +- * which is typically in interrupt context. ++ * If a command needs post-execution fixups, or there are any ++ * non-trivial effects, make sure to execute the command synchronously ++ * in a workqueue so that nvme_passthru_end gets called. + */ + effects = nvme_command_effects(ctrl, ns, req->cmd->common.opcode); +- if (req->p.use_workqueue || effects) { ++ if (req->p.use_workqueue || ++ (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC))) { INIT_WORK(&req->p.work, nvmet_passthru_execute_cmd_work); req->p.rq = rq; - schedule_work(&req->p.work); @@ -232036,7 +281555,7 @@ index 46bc30fe85d2b..1466698751c55 100644 done: status = nvmet_copy_to_sgl(req, 0, id_zns, sizeof(*id_zns)); diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c -index 8976da38b375a..38bab84f3c8ae 100644 +index 8976da38b375a..47c1487dcf8cc 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -307,6 +307,8 @@ static umode_t nvmem_bin_attr_is_visible(struct kobject *kobj, @@ -232048,43 +281567,160 @@ index 8976da38b375a..38bab84f3c8ae 100644 return nvmem_bin_attr_get_umode(nvmem); } -@@ -766,7 +768,7 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config) +@@ -764,31 +766,32 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config) + return ERR_PTR(rval); + } - if (config->wp_gpio) - nvmem->wp_gpio = config->wp_gpio; +- if (config->wp_gpio) +- nvmem->wp_gpio = config->wp_gpio; - else -+ else if (!config->ignore_wp) ++ nvmem->id = rval; ++ ++ nvmem->dev.type = &nvmem_provider_type; ++ nvmem->dev.bus = &nvmem_bus_type; ++ nvmem->dev.parent = config->dev; ++ ++ device_initialize(&nvmem->dev); ++ ++ if (!config->ignore_wp) nvmem->wp_gpio = gpiod_get_optional(config->dev, "wp", GPIOD_OUT_HIGH); if (IS_ERR(nvmem->wp_gpio)) { -@@ -822,21 +824,18 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config) - nvmem->dev.groups = nvmem_dev_groups; - #endif +- ida_free(&nvmem_ida, nvmem->id); + rval = PTR_ERR(nvmem->wp_gpio); +- kfree(nvmem); +- return ERR_PTR(rval); ++ nvmem->wp_gpio = NULL; ++ goto err_put_device; + } + + kref_init(&nvmem->refcnt); + INIT_LIST_HEAD(&nvmem->cells); + +- nvmem->id = rval; + nvmem->owner = config->owner; + if (!nvmem->owner && config->dev->driver) + nvmem->owner = config->dev->driver->owner; + nvmem->stride = config->stride ?: 1; + nvmem->word_size = config->word_size ?: 1; + nvmem->size = config->size; +- nvmem->dev.type = &nvmem_provider_type; +- nvmem->dev.bus = &nvmem_bus_type; +- nvmem->dev.parent = config->dev; + nvmem->root_only = config->root_only; + nvmem->priv = config->priv; + nvmem->type = config->type; +@@ -803,18 +806,21 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config) + + switch (config->id) { + case NVMEM_DEVID_NONE: +- dev_set_name(&nvmem->dev, "%s", config->name); ++ rval = dev_set_name(&nvmem->dev, "%s", config->name); + break; + case NVMEM_DEVID_AUTO: +- dev_set_name(&nvmem->dev, "%s%d", config->name, nvmem->id); ++ rval = dev_set_name(&nvmem->dev, "%s%d", config->name, nvmem->id); + break; + default: +- dev_set_name(&nvmem->dev, "%s%d", ++ rval = dev_set_name(&nvmem->dev, "%s%d", + config->name ? : "nvmem", + config->name ? config->id : nvmem->id); + break; + } + ++ if (rval) ++ goto err_put_device; ++ + nvmem->read_only = device_property_present(config->dev, "read-only") || + config->read_only || !nvmem->reg_write; + +@@ -824,29 +830,20 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config) -- if (nvmem->nkeepout) { -- rval = nvmem_validate_keepouts(nvmem); + if (nvmem->nkeepout) { + rval = nvmem_validate_keepouts(nvmem); - if (rval) { - ida_free(&nvmem_ida, nvmem->id); - kfree(nvmem); - return ERR_PTR(rval); - } -- } ++ if (rval) ++ goto err_put_device; + } + +- dev_dbg(&nvmem->dev, "Registering nvmem device %s\n", config->name); - - dev_dbg(&nvmem->dev, "Registering nvmem device %s\n", config->name); +- rval = device_register(&nvmem->dev); +- if (rval) +- goto err_put_device; +- + if (config->compat) { + rval = nvmem_sysfs_setup_compat(nvmem, config); + if (rval) +- goto err_device_del; ++ goto err_put_device; + } + + if (config->cells) { + rval = nvmem_add_cells(nvmem, config->cells, config->ncells); + if (rval) +- goto err_teardown_compat; ++ goto err_remove_cells; + } - rval = device_register(&nvmem->dev); + rval = nvmem_add_cells_from_table(nvmem); +@@ -857,17 +854,20 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config) if (rval) - goto err_put_device; + goto err_remove_cells; -+ if (nvmem->nkeepout) { -+ rval = nvmem_validate_keepouts(nvmem); -+ if (rval) -+ goto err_device_del; -+ } ++ dev_dbg(&nvmem->dev, "Registering nvmem device %s\n", config->name); + - if (config->compat) { - rval = nvmem_sysfs_setup_compat(nvmem, config); - if (rval) ++ rval = device_add(&nvmem->dev); ++ if (rval) ++ goto err_remove_cells; ++ + blocking_notifier_call_chain(&nvmem_notifier, NVMEM_ADD, nvmem); + + return nvmem; + + err_remove_cells: + nvmem_device_remove_all_cells(nvmem); +-err_teardown_compat: + if (config->compat) + nvmem_sysfs_remove_compat(nvmem, config); +-err_device_del: +- device_del(&nvmem->dev); + err_put_device: + put_device(&nvmem->dev); + +diff --git a/drivers/nvmem/qcom-spmi-sdam.c b/drivers/nvmem/qcom-spmi-sdam.c +index 4fcb63507ecd1..8499892044b7b 100644 +--- a/drivers/nvmem/qcom-spmi-sdam.c ++++ b/drivers/nvmem/qcom-spmi-sdam.c +@@ -166,6 +166,7 @@ static const struct of_device_id sdam_match_table[] = { + { .compatible = "qcom,spmi-sdam" }, + {}, + }; ++MODULE_DEVICE_TABLE(of, sdam_match_table); + + static struct platform_driver sdam_driver = { + .driver = { +diff --git a/drivers/nvmem/rmem.c b/drivers/nvmem/rmem.c +index b11c3c974b3d6..80cb187f14817 100644 +--- a/drivers/nvmem/rmem.c ++++ b/drivers/nvmem/rmem.c +@@ -37,9 +37,9 @@ static int rmem_read(void *context, unsigned int offset, + * but as of Dec 2020 this isn't possible on arm64. + */ + addr = memremap(priv->mem->base, available, MEMREMAP_WB); +- if (IS_ERR(addr)) { ++ if (!addr) { + dev_err(priv->dev, "Failed to remap memory region\n"); +- return PTR_ERR(addr); ++ return -ENOMEM; + } + + count = memory_read_from_buffer(val, bytes, &off, addr, available); diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig index 3dfeae8912dfc..80b5fd44ab1c7 100644 --- a/drivers/of/Kconfig @@ -232112,6 +281748,53 @@ index c13b982084a3a..e0360a44306e2 100644 obj-$(CONFIG_OF_UNITTEST) += unittest.o obj-$(CONFIG_OF_RESERVED_MEM) += of_reserved_mem.o obj-$(CONFIG_OF_RESOLVE) += resolver.o +diff --git a/drivers/of/address.c b/drivers/of/address.c +index 94f017d808c44..586fb94005e26 100644 +--- a/drivers/of/address.c ++++ b/drivers/of/address.c +@@ -963,8 +963,19 @@ int of_dma_get_range(struct device_node *np, const struct bus_dma_region **map) + } + + of_dma_range_parser_init(&parser, node); +- for_each_of_range(&parser, &range) ++ for_each_of_range(&parser, &range) { ++ if (range.cpu_addr == OF_BAD_ADDR) { ++ pr_err("translation of DMA address(%llx) to CPU address failed node(%pOF)\n", ++ range.bus_addr, node); ++ continue; ++ } + num_ranges++; ++ } ++ ++ if (!num_ranges) { ++ ret = -EINVAL; ++ goto out; ++ } + + r = kcalloc(num_ranges + 1, sizeof(*r), GFP_KERNEL); + if (!r) { +@@ -973,18 +984,16 @@ int of_dma_get_range(struct device_node *np, const struct bus_dma_region **map) + } + + /* +- * Record all info in the generic DMA ranges array for struct device. ++ * Record all info in the generic DMA ranges array for struct device, ++ * returning an error if we don't find any parsable ranges. + */ + *map = r; + of_dma_range_parser_init(&parser, node); + for_each_of_range(&parser, &range) { + pr_debug("dma_addr(%llx) cpu_addr(%llx) size(%llx)\n", + range.bus_addr, range.cpu_addr, range.size); +- if (range.cpu_addr == OF_BAD_ADDR) { +- pr_err("translation of DMA address(%llx) to CPU address failed node(%pOF)\n", +- range.bus_addr, node); ++ if (range.cpu_addr == OF_BAD_ADDR) + continue; +- } + r->cpu_start = range.cpu_addr; + r->dma_start = range.bus_addr; + r->size = range.size; diff --git a/drivers/of/base.c b/drivers/of/base.c index 0ac17256258d5..54719f8156ed1 100644 --- a/drivers/of/base.c @@ -232245,7 +281928,7 @@ index 4546572af24bb..338171c978cc1 100644 bool __init early_init_dt_scan(void *params) diff --git a/drivers/of/kexec.c b/drivers/of/kexec.c -index 761fd870d1db2..8f9dba11873cb 100644 +index 761fd870d1db2..52bb68fb22169 100644 --- a/drivers/of/kexec.c +++ b/drivers/of/kexec.c @@ -125,6 +125,7 @@ int ima_get_kexec_buffer(void **addr, size_t *size) @@ -232279,6 +281962,39 @@ index 761fd870d1db2..8f9dba11873cb 100644 *addr = __va(tmp_addr); *size = tmp_size; +@@ -267,7 +284,7 @@ void *of_kexec_alloc_and_setup_fdt(const struct kimage *image, + const char *cmdline, size_t extra_fdt_size) + { + void *fdt; +- int ret, chosen_node; ++ int ret, chosen_node, len; + const void *prop; + size_t fdt_size; + +@@ -310,19 +327,19 @@ void *of_kexec_alloc_and_setup_fdt(const struct kimage *image, + goto out; + + /* Did we boot using an initrd? */ +- prop = fdt_getprop(fdt, chosen_node, "linux,initrd-start", NULL); ++ prop = fdt_getprop(fdt, chosen_node, "linux,initrd-start", &len); + if (prop) { + u64 tmp_start, tmp_end, tmp_size; + +- tmp_start = fdt64_to_cpu(*((const fdt64_t *) prop)); ++ tmp_start = of_read_number(prop, len / 4); + +- prop = fdt_getprop(fdt, chosen_node, "linux,initrd-end", NULL); ++ prop = fdt_getprop(fdt, chosen_node, "linux,initrd-end", &len); + if (!prop) { + ret = -EINVAL; + goto out; + } + +- tmp_end = fdt64_to_cpu(*((const fdt64_t *) prop)); ++ tmp_end = of_read_number(prop, len / 4); + + /* + * kexec reserves exact initrd size, while firmware may @@ -386,6 +403,15 @@ void *of_kexec_alloc_and_setup_fdt(const struct kimage *image, crashk_res.end - crashk_res.start + 1); if (ret) @@ -232446,8 +282162,24 @@ index dbac3a172a11e..0000000000000 - return of_get_mac_addr_nvmem(np, addr); -} -EXPORT_SYMBOL(of_get_mac_address); +diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c +index 9da8835ba5a58..9e949ddcb1464 100644 +--- a/drivers/of/of_reserved_mem.c ++++ b/drivers/of/of_reserved_mem.c +@@ -47,9 +47,10 @@ static int __init early_init_dt_alloc_reserved_memory_arch(phys_addr_t size, + err = memblock_mark_nomap(base, size); + if (err) + memblock_free(base, size); +- kmemleak_ignore_phys(base); + } + ++ kmemleak_ignore_phys(base); ++ + return err; + } + diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c -index d80160cf34bb7..d1187123c4fc4 100644 +index d80160cf34bb7..424682372417d 100644 --- a/drivers/of/overlay.c +++ b/drivers/of/overlay.c @@ -170,9 +170,7 @@ static int overlay_notify(struct overlay_changeset *ovcs, @@ -232461,6 +282193,40 @@ index d80160cf34bb7..d1187123c4fc4 100644 ret = notifier_to_errno(ret); pr_err("overlay changeset %s notifier error %d, target: %pOF\n", of_overlay_action_name[action], ret, nd.target); +@@ -549,7 +547,7 @@ static int find_dup_cset_node_entry(struct overlay_changeset *ovcs, + + fn_1 = kasprintf(GFP_KERNEL, "%pOF", ce_1->np); + fn_2 = kasprintf(GFP_KERNEL, "%pOF", ce_2->np); +- node_path_match = !strcmp(fn_1, fn_2); ++ node_path_match = !fn_1 || !fn_2 || !strcmp(fn_1, fn_2); + kfree(fn_1); + kfree(fn_2); + if (node_path_match) { +@@ -584,7 +582,7 @@ static int find_dup_cset_prop(struct overlay_changeset *ovcs, + + fn_1 = kasprintf(GFP_KERNEL, "%pOF", ce_1->np); + fn_2 = kasprintf(GFP_KERNEL, "%pOF", ce_2->np); +- node_path_match = !strcmp(fn_1, fn_2); ++ node_path_match = !fn_1 || !fn_2 || !strcmp(fn_1, fn_2); + kfree(fn_1); + kfree(fn_2); + if (node_path_match && +diff --git a/drivers/of/property.c b/drivers/of/property.c +index a3483484a5a2a..acf0d3110357c 100644 +--- a/drivers/of/property.c ++++ b/drivers/of/property.c +@@ -975,8 +975,10 @@ of_fwnode_get_reference_args(const struct fwnode_handle *fwnode, + nargs, index, &of_args); + if (ret < 0) + return ret; +- if (!args) ++ if (!args) { ++ of_node_put(of_args.np); + return 0; ++ } + + args->nargs = of_args.args_count; + args->fwnode = of_fwnode_handle(of_args.np); diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 8c056972a6ddc..2bee1d992408f 100644 --- a/drivers/of/unittest.c @@ -232840,6 +282606,18 @@ index 86abad3fa2150..73cbd0bb1975a 100644 int global_irq[32]; }; +diff --git a/drivers/parisc/iosapic.c b/drivers/parisc/iosapic.c +index 8a3b0c3a1e92b..fd99735dca3e6 100644 +--- a/drivers/parisc/iosapic.c ++++ b/drivers/parisc/iosapic.c +@@ -875,6 +875,7 @@ int iosapic_serial_irq(struct parisc_device *dev) + + return vi->txn_irq; + } ++EXPORT_SYMBOL(iosapic_serial_irq); + #endif + + diff --git a/drivers/parisc/lasi.c b/drivers/parisc/lasi.c index 4e4fd12c2112e..6ef621adb63a8 100644 --- a/drivers/parisc/lasi.c @@ -232892,6 +282670,20 @@ index 732b516c7bf84..afc6e66ddc31c 100644 /* Read HW Rev First */ func_class = READ_REG32(addr + LBA_FCLASS); +diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c +index cf91cb024be30..4c0551f89c449 100644 +--- a/drivers/parisc/led.c ++++ b/drivers/parisc/led.c +@@ -137,6 +137,9 @@ static int start_task(void) + + /* Create the work queue and queue the LED task */ + led_wq = create_singlethread_workqueue("led_wq"); ++ if (!led_wq) ++ return -ENOMEM; ++ + queue_delayed_work(led_wq, &led_task, 0); + + return 0; diff --git a/drivers/parisc/pdc_stable.c b/drivers/parisc/pdc_stable.c index e090978518f1a..4760f82def6ec 100644 --- a/drivers/parisc/pdc_stable.c @@ -232962,6 +282754,19 @@ index 5b6df15162354..73a2b01f8d9ca 100644 if (ret < 0) { kfree(wax); return ret; +diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c +index eda4ded4d5e52..925be41eeebec 100644 +--- a/drivers/parport/parport_pc.c ++++ b/drivers/parport/parport_pc.c +@@ -468,7 +468,7 @@ static size_t parport_pc_fifo_write_block_pio(struct parport *port, + const unsigned char *bufp = buf; + size_t left = length; + unsigned long expire = jiffies + port->physport->cad->timeout; +- const int fifo = FIFO(port); ++ const unsigned long fifo = FIFO(port); + int poll_for = 8; /* 80 usecs */ + const struct parport_pc_private *priv = port->physport->private_data; + const int fifo_depth = priv->fifo_depth; diff --git a/drivers/pci/access.c b/drivers/pci/access.c index 46935695cfb90..8d0d1f61c650d 100644 --- a/drivers/pci/access.c @@ -233213,7 +283018,7 @@ index d1d9b8344ec9c..7cd4593ad12fa 100644 resource_list_for_each_entry(entry, &pp->bridge->windows) { if (resource_type(entry->res) != IORESOURCE_MEM) diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c -index a945f0c0e73dc..e408ebf5bd738 100644 +index a945f0c0e73dc..00972a7bc9768 100644 --- a/drivers/pci/controller/dwc/pcie-designware.c +++ b/drivers/pci/controller/dwc/pcie-designware.c @@ -287,8 +287,8 @@ static void dw_pcie_prog_outbound_atu_unroll(struct dw_pcie *pci, u8 func_no, @@ -233312,6 +283117,15 @@ index a945f0c0e73dc..e408ebf5bd738 100644 pci->atu_base = pci->dbi_base + DEFAULT_DBI_ATU_OFFSET; } +@@ -715,7 +730,7 @@ void dw_pcie_setup(struct dw_pcie *pci) + if (pci->n_fts[1]) { + val = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL); + val &= ~PORT_LOGIC_N_FTS_MASK; +- val |= pci->n_fts[pci->link_gen - 1]; ++ val |= pci->n_fts[1]; + dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, val); + } + @@ -724,6 +739,13 @@ void dw_pcie_setup(struct dw_pcie *pci) val |= PORT_LINK_DLL_LINK_EN; dw_pcie_writel_dbi(pci, PCIE_PORT_LINK_CONTROL, val); @@ -236057,8 +285871,24 @@ index 5fb9ce6e536e0..d1a200b93b2bf 100644 /* * Region 0 is reserved for configuration space and shouldn't * be used elsewhere per TRM, so leave it out. +diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c +index a5987e52700e3..8dce71142e10f 100644 +--- a/drivers/pci/controller/vmd.c ++++ b/drivers/pci/controller/vmd.c +@@ -900,6 +900,11 @@ static int vmd_resume(struct device *dev) + struct vmd_dev *vmd = pci_get_drvdata(pdev); + int err, i; + ++ if (vmd->irq_domain) ++ vmd_set_msi_remapping(vmd, true); ++ else ++ vmd_set_msi_remapping(vmd, false); ++ + for (i = 0; i < vmd->msix_count; i++) { + err = devm_request_irq(dev, pci_irq_vector(pdev, i), + vmd_irq, IRQF_NO_THREAD, diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c -index 90d84d3bc868f..a5ed779b0a512 100644 +index 90d84d3bc868f..45535d4ae6445 100644 --- a/drivers/pci/endpoint/functions/pci-epf-test.c +++ b/drivers/pci/endpoint/functions/pci-epf-test.c @@ -285,7 +285,17 @@ static int pci_epf_test_copy(struct pci_epf_test *epf_test) @@ -236097,6 +285927,15 @@ index 90d84d3bc868f..a5ed779b0a512 100644 for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) { epf_bar = &epf->bar[bar]; +@@ -874,7 +883,7 @@ static int pci_epf_test_bind(struct pci_epf *epf) + if (ret) + epf_test->dma_supported = false; + +- if (linkup_notifier) { ++ if (linkup_notifier || core_init_notifier) { + epf->nb.notifier_call = pci_epf_test_notifier; + pci_epc_register_notifier(epc, &epf->nb); + } else { diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index 69fd401691be6..e0a614acee059 100644 --- a/drivers/pci/hotplug/pciehp.h @@ -236288,6 +286127,19 @@ index 3024d7e85e6a7..60098a701e83a 100644 DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_QCOM, 0x0400, PCI_CLASS_BRIDGE_PCI, 8, quirk_cmd_compl); DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_QCOM, 0x0401, +diff --git a/drivers/pci/irq.c b/drivers/pci/irq.c +index 12ecd0aaa28d6..0050e8f6814ed 100644 +--- a/drivers/pci/irq.c ++++ b/drivers/pci/irq.c +@@ -44,6 +44,8 @@ int pci_request_irq(struct pci_dev *dev, unsigned int nr, irq_handler_t handler, + va_start(ap, fmt); + devname = kvasprintf(GFP_KERNEL, fmt, ap); + va_end(ap); ++ if (!devname) ++ return -ENOMEM; + + ret = request_threaded_irq(pci_irq_vector(dev, nr), handler, thread_fn, + irqflags, devname, dev_id); diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 4b4792940e869..cc4c2b8a5efd7 100644 --- a/drivers/pci/msi.c @@ -236791,8 +286643,44 @@ index fdaf86a888b73..c994ebec23603 100644 if (write_op) write_op(bridge, reg, old, new, mask); +diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c +index 7fb5cd17cc981..f2909ae93f2f8 100644 +--- a/drivers/pci/pci-sysfs.c ++++ b/drivers/pci/pci-sysfs.c +@@ -1179,11 +1179,9 @@ static int pci_create_attr(struct pci_dev *pdev, int num, int write_combine) + + sysfs_bin_attr_init(res_attr); + if (write_combine) { +- pdev->res_attr_wc[num] = res_attr; + sprintf(res_attr_name, "resource%d_wc", num); + res_attr->mmap = pci_mmap_resource_wc; + } else { +- pdev->res_attr[num] = res_attr; + sprintf(res_attr_name, "resource%d", num); + if (pci_resource_flags(pdev, num) & IORESOURCE_IO) { + res_attr->read = pci_read_resource_io; +@@ -1201,10 +1199,17 @@ static int pci_create_attr(struct pci_dev *pdev, int num, int write_combine) + res_attr->size = pci_resource_len(pdev, num); + res_attr->private = (void *)(unsigned long)num; + retval = sysfs_create_bin_file(&pdev->dev.kobj, res_attr); +- if (retval) ++ if (retval) { + kfree(res_attr); ++ return retval; ++ } ++ ++ if (write_combine) ++ pdev->res_attr_wc[num] = res_attr; ++ else ++ pdev->res_attr[num] = res_attr; + +- return retval; ++ return 0; + } + + /** diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c -index ce2ab62b64cfa..2bfff2328cf87 100644 +index ce2ab62b64cfa..a0c6a9eeb7c6d 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2889,6 +2889,18 @@ static const struct dmi_system_id bridge_d3_blacklist[] = { @@ -236862,6 +286750,15 @@ index ce2ab62b64cfa..2bfff2328cf87 100644 } EXPORT_SYMBOL_GPL(pci_dev_unlock); +@@ -6363,6 +6383,8 @@ bool pci_device_is_present(struct pci_dev *pdev) + { + u32 v; + ++ /* Check PF if pdev is a VF, since VF Vendor/Device IDs are 0xffff */ ++ pdev = pci_physfn(pdev); + if (pci_dev_is_disconnected(pdev)) + return false; + return pci_bus_read_dev_vendor_id(pdev->bus, pdev->devfn, &v, 0); diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 9784fdcf30061..ca9ac8c6a2021 100644 --- a/drivers/pci/pcie/aer.c @@ -237221,6 +287118,49 @@ index bc3cba5f8c5dc..400eb7f579dce 100644 return 0; } +diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c +index 280a6ae3e27cf..54aa4658fb36e 100644 +--- a/drivers/perf/arm_dmc620_pmu.c ++++ b/drivers/perf/arm_dmc620_pmu.c +@@ -725,6 +725,8 @@ static struct platform_driver dmc620_pmu_driver = { + + static int __init dmc620_pmu_init(void) + { ++ int ret; ++ + cpuhp_state_num = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + DMC620_DRVNAME, + NULL, +@@ -732,7 +734,11 @@ static int __init dmc620_pmu_init(void) + if (cpuhp_state_num < 0) + return cpuhp_state_num; + +- return platform_driver_register(&dmc620_pmu_driver); ++ ret = platform_driver_register(&dmc620_pmu_driver); ++ if (ret) ++ cpuhp_remove_multi_state(cpuhp_state_num); ++ ++ return ret; + } + + static void __exit dmc620_pmu_exit(void) +diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c +index a36698a90d2f2..54b8ba032c787 100644 +--- a/drivers/perf/arm_dsu_pmu.c ++++ b/drivers/perf/arm_dsu_pmu.c +@@ -858,7 +858,11 @@ static int __init dsu_pmu_init(void) + if (ret < 0) + return ret; + dsu_pmu_cpuhp_state = ret; +- return platform_driver_register(&dsu_pmu_driver); ++ ret = platform_driver_register(&dsu_pmu_driver); ++ if (ret) ++ cpuhp_remove_multi_state(dsu_pmu_cpuhp_state); ++ ++ return ret; + } + + static void __exit dsu_pmu_exit(void) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 295cc7952d0ed..57d20cf3da7a3 100644 --- a/drivers/perf/arm_pmu.c @@ -237262,6 +287202,32 @@ index 513de1f54e2d7..933b96e243b84 100644 return pmu_parse_percpu_irq(pmu, irq); } +diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c +index 226348822ab39..5933ad151f869 100644 +--- a/drivers/perf/arm_smmuv3_pmu.c ++++ b/drivers/perf/arm_smmuv3_pmu.c +@@ -896,6 +896,8 @@ static struct platform_driver smmu_pmu_driver = { + + static int __init arm_smmu_pmu_init(void) + { ++ int ret; ++ + cpuhp_state_num = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "perf/arm/pmcg:online", + NULL, +@@ -903,7 +905,11 @@ static int __init arm_smmu_pmu_init(void) + if (cpuhp_state_num < 0) + return cpuhp_state_num; + +- return platform_driver_register(&smmu_pmu_driver); ++ ret = platform_driver_register(&smmu_pmu_driver); ++ if (ret) ++ cpuhp_remove_multi_state(cpuhp_state_num); ++ ++ return ret; + } + module_init(arm_smmu_pmu_init); + diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index d44bcc29d99c8..cd5945e17fdf7 100644 --- a/drivers/perf/arm_spe_pmu.c @@ -237615,7 +287581,7 @@ index a39f30fa2e991..1ccb5ddab865c 100644 void brcm_usb_dvr_init_7216(struct brcm_usb_init_params *params); void brcm_usb_dvr_init_7211b0(struct brcm_usb_init_params *params); diff --git a/drivers/phy/broadcom/phy-brcm-usb.c b/drivers/phy/broadcom/phy-brcm-usb.c -index 116fb23aebd99..2cb3779fcdf82 100644 +index 116fb23aebd99..c0c3ab9b2a153 100644 --- a/drivers/phy/broadcom/phy-brcm-usb.c +++ b/drivers/phy/broadcom/phy-brcm-usb.c @@ -18,6 +18,7 @@ @@ -237626,7 +287592,7 @@ index 116fb23aebd99..2cb3779fcdf82 100644 #include "phy-brcm-usb-init.h" -@@ -70,12 +71,35 @@ struct brcm_usb_phy_data { +@@ -70,17 +71,40 @@ struct brcm_usb_phy_data { int init_count; int wake_irq; struct brcm_usb_phy phys[BRCM_USB_PHY_ID_MAX]; @@ -237661,7 +287627,14 @@ index 116fb23aebd99..2cb3779fcdf82 100644 + static irqreturn_t brcm_usb_phy_wake_isr(int irq, void *dev_id) { - struct phy *gphy = dev_id; +- struct phy *gphy = dev_id; ++ struct device *dev = dev_id; + +- pm_wakeup_event(&gphy->dev, 0); ++ pm_wakeup_event(dev, 0); + + return IRQ_HANDLED; + } @@ -91,6 +115,9 @@ static int brcm_usb_phy_init(struct phy *gphy) struct brcm_usb_phy_data *priv = container_of(phy, struct brcm_usb_phy_data, phys[phy->id]); @@ -237707,6 +287680,15 @@ index 116fb23aebd99..2cb3779fcdf82 100644 }, { .compatible = "brcm,bcm7216-usb-phy", +@@ -412,7 +451,7 @@ static int brcm_usb_phy_dvr_init(struct platform_device *pdev, + if (priv->wake_irq >= 0) { + err = devm_request_irq(dev, priv->wake_irq, + brcm_usb_phy_wake_isr, 0, +- dev_name(dev), gphy); ++ dev_name(dev), dev); + if (err < 0) + return err; + device_set_wakeup_capable(dev, 1); @@ -488,6 +527,9 @@ static int brcm_usb_phy_probe(struct platform_device *pdev) if (err) return err; @@ -238127,6 +288109,29 @@ index 5172971f4c360..3cd4d51c247c3 100644 return error; } +diff --git a/drivers/phy/phy-can-transceiver.c b/drivers/phy/phy-can-transceiver.c +index c2cb93b4df71c..4525d3fd903a4 100644 +--- a/drivers/phy/phy-can-transceiver.c ++++ b/drivers/phy/phy-can-transceiver.c +@@ -87,6 +87,7 @@ static int can_transceiver_phy_probe(struct platform_device *pdev) + struct gpio_desc *standby_gpio; + struct gpio_desc *enable_gpio; + u32 max_bitrate = 0; ++ int err; + + can_transceiver_phy = devm_kzalloc(dev, sizeof(struct can_transceiver_phy), GFP_KERNEL); + if (!can_transceiver_phy) +@@ -102,8 +103,8 @@ static int can_transceiver_phy_probe(struct platform_device *pdev) + return PTR_ERR(phy); + } + +- device_property_read_u32(dev, "max-bitrate", &max_bitrate); +- if (!max_bitrate) ++ err = device_property_read_u32(dev, "max-bitrate", &max_bitrate); ++ if ((err != -EINVAL) && !max_bitrate) + dev_warn(dev, "Invalid value for transceiver max bitrate. Ignoring bitrate limit\n"); + phy->attrs.max_link_rate = max_bitrate; + diff --git a/drivers/phy/phy-core-mipi-dphy.c b/drivers/phy/phy-core-mipi-dphy.c index 288c9c67aa748..929e86d6558e0 100644 --- a/drivers/phy/phy-core-mipi-dphy.c @@ -238163,10 +288168,29 @@ index 288c9c67aa748..929e86d6558e0 100644 if (cfg->clk_prepare < 38000 || cfg->clk_prepare > 95000) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp.c b/drivers/phy/qualcomm/phy-qcom-qmp.c -index f14032170b1c1..ed69d455ac0ea 100644 +index f14032170b1c1..eef863108bfe2 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp.c -@@ -3632,7 +3632,7 @@ static const struct qmp_phy_cfg sc8180x_pciephy_cfg = { +@@ -2919,6 +2919,7 @@ struct qcom_qmp { + struct regulator_bulk_data *vregs; + + struct qmp_phy **phys; ++ struct qmp_phy *usb_phy; + + struct mutex phy_mutex; + int init_count; +@@ -3417,8 +3418,8 @@ static const struct qmp_phy_cfg sc7180_dpphy_cfg = { + + .clk_list = qmp_v3_phy_clk_l, + .num_clks = ARRAY_SIZE(qmp_v3_phy_clk_l), +- .reset_list = sc7180_usb3phy_reset_l, +- .num_resets = ARRAY_SIZE(sc7180_usb3phy_reset_l), ++ .reset_list = msm8996_usb3phy_reset_l, ++ .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), + .regs = qmp_v3_usb3phy_regs_layout, +@@ -3632,7 +3633,7 @@ static const struct qmp_phy_cfg sc8180x_pciephy_cfg = { .nlanes = 1, .serdes_tbl = sc8180x_qmp_pcie_serdes_tbl, @@ -238175,7 +288199,47 @@ index f14032170b1c1..ed69d455ac0ea 100644 .tx_tbl = sc8180x_qmp_pcie_tx_tbl, .tx_tbl_num = ARRAY_SIZE(sc8180x_qmp_pcie_tx_tbl), .rx_tbl = sc8180x_qmp_pcie_rx_tbl, -@@ -4802,7 +4802,7 @@ static int qcom_qmp_phy_power_on(struct phy *phy) +@@ -3805,8 +3806,8 @@ static const struct qmp_phy_cfg sm8250_dpphy_cfg = { + .serdes_tbl_hbr3 = qmp_v4_dp_serdes_tbl_hbr3, + .serdes_tbl_hbr3_num = ARRAY_SIZE(qmp_v4_dp_serdes_tbl_hbr3), + +- .clk_list = qmp_v4_phy_clk_l, +- .num_clks = ARRAY_SIZE(qmp_v4_phy_clk_l), ++ .clk_list = qmp_v4_sm8250_usbphy_clk_l, ++ .num_clks = ARRAY_SIZE(qmp_v4_sm8250_usbphy_clk_l), + .reset_list = msm8996_usb3phy_reset_l, + .num_resets = ARRAY_SIZE(msm8996_usb3phy_reset_l), + .vreg_list = qmp_phy_vreg_l, +@@ -4554,7 +4555,7 @@ static int qcom_qmp_phy_com_init(struct qmp_phy *qphy) + struct qcom_qmp *qmp = qphy->qmp; + const struct qmp_phy_cfg *cfg = qphy->cfg; + void __iomem *serdes = qphy->serdes; +- void __iomem *pcs = qphy->pcs; ++ struct qmp_phy *usb_phy = qmp->usb_phy; + void __iomem *dp_com = qmp->dp_com; + int ret, i; + +@@ -4620,13 +4621,13 @@ static int qcom_qmp_phy_com_init(struct qmp_phy *qphy) + qphy_setbits(serdes, cfg->regs[QPHY_COM_POWER_DOWN_CONTROL], + SW_PWRDN); + } else { +- if (cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL]) +- qphy_setbits(pcs, +- cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], +- cfg->pwrdn_ctrl); ++ if (usb_phy->cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL]) ++ qphy_setbits(usb_phy->pcs, ++ usb_phy->cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], ++ usb_phy->cfg->pwrdn_ctrl); + else +- qphy_setbits(pcs, QPHY_POWER_DOWN_CONTROL, +- cfg->pwrdn_ctrl); ++ qphy_setbits(usb_phy->pcs, QPHY_POWER_DOWN_CONTROL, ++ usb_phy->cfg->pwrdn_ctrl); + } + + mutex_unlock(&qmp->phy_mutex); +@@ -4802,7 +4803,7 @@ static int qcom_qmp_phy_power_on(struct phy *phy) ret = reset_control_deassert(qmp->ufs_reset); if (ret) @@ -238184,7 +288248,25 @@ index f14032170b1c1..ed69d455ac0ea 100644 qcom_qmp_phy_configure(pcs_misc, cfg->regs, cfg->pcs_misc_tbl, cfg->pcs_misc_tbl_num); -@@ -5382,6 +5382,11 @@ static const struct phy_ops qcom_qmp_pcie_ufs_ops = { +@@ -4984,7 +4985,7 @@ static void qcom_qmp_phy_disable_autonomous_mode(struct qmp_phy *qphy) + static int __maybe_unused qcom_qmp_phy_runtime_suspend(struct device *dev) + { + struct qcom_qmp *qmp = dev_get_drvdata(dev); +- struct qmp_phy *qphy = qmp->phys[0]; ++ struct qmp_phy *qphy = qmp->usb_phy; + const struct qmp_phy_cfg *cfg = qphy->cfg; + + dev_vdbg(dev, "Suspending QMP phy, mode:%d\n", qphy->mode); +@@ -5009,7 +5010,7 @@ static int __maybe_unused qcom_qmp_phy_runtime_suspend(struct device *dev) + static int __maybe_unused qcom_qmp_phy_runtime_resume(struct device *dev) + { + struct qcom_qmp *qmp = dev_get_drvdata(dev); +- struct qmp_phy *qphy = qmp->phys[0]; ++ struct qmp_phy *qphy = qmp->usb_phy; + const struct qmp_phy_cfg *cfg = qphy->cfg; + int ret = 0; + +@@ -5382,6 +5383,26 @@ static const struct phy_ops qcom_qmp_pcie_ufs_ops = { .owner = THIS_MODULE, }; @@ -238192,11 +288274,106 @@ index f14032170b1c1..ed69d455ac0ea 100644 +{ + reset_control_put(data); +} ++ ++static void __iomem *qmp_usb_iomap(struct device *dev, struct device_node *np, ++ int index, bool exclusive) ++{ ++ struct resource res; ++ ++ if (!exclusive) { ++ if (of_address_to_resource(np, index, &res)) ++ return IOMEM_ERR_PTR(-EINVAL); ++ ++ return devm_ioremap(dev, res.start, resource_size(&res)); ++ } ++ ++ return devm_of_iomap(dev, np, index, NULL); ++} + static int qcom_qmp_phy_create(struct device *dev, struct device_node *np, int id, void __iomem *serdes, const struct qmp_phy_cfg *cfg) -@@ -5454,7 +5459,7 @@ int qcom_qmp_phy_create(struct device *dev, struct device_node *np, int id, +@@ -5391,8 +5412,18 @@ int qcom_qmp_phy_create(struct device *dev, struct device_node *np, int id, + struct qmp_phy *qphy; + const struct phy_ops *ops; + char prop_name[MAX_PROP_NAME]; ++ bool exclusive = true; + int ret; + ++ /* ++ * FIXME: These bindings should be fixed to not rely on overlapping ++ * mappings for PCS. ++ */ ++ if (of_device_is_compatible(dev->of_node, "qcom,sdx65-qmp-usb3-uni-phy")) ++ exclusive = false; ++ if (of_device_is_compatible(dev->of_node, "qcom,sm8350-qmp-usb3-uni-phy")) ++ exclusive = false; ++ + qphy = devm_kzalloc(dev, sizeof(*qphy), GFP_KERNEL); + if (!qphy) + return -ENOMEM; +@@ -5405,17 +5436,17 @@ int qcom_qmp_phy_create(struct device *dev, struct device_node *np, int id, + * For dual lane PHYs: tx2 -> 3, rx2 -> 4, pcs_misc (optional) -> 5 + * For single lane PHYs: pcs_misc (optional) -> 3. + */ +- qphy->tx = of_iomap(np, 0); +- if (!qphy->tx) +- return -ENOMEM; ++ qphy->tx = devm_of_iomap(dev, np, 0, NULL); ++ if (IS_ERR(qphy->tx)) ++ return PTR_ERR(qphy->tx); + +- qphy->rx = of_iomap(np, 1); +- if (!qphy->rx) +- return -ENOMEM; ++ qphy->rx = devm_of_iomap(dev, np, 1, NULL); ++ if (IS_ERR(qphy->rx)) ++ return PTR_ERR(qphy->rx); + +- qphy->pcs = of_iomap(np, 2); +- if (!qphy->pcs) +- return -ENOMEM; ++ qphy->pcs = qmp_usb_iomap(dev, np, 2, exclusive); ++ if (IS_ERR(qphy->pcs)) ++ return PTR_ERR(qphy->pcs); + + /* + * If this is a dual-lane PHY, then there should be registers for the +@@ -5424,9 +5455,9 @@ int qcom_qmp_phy_create(struct device *dev, struct device_node *np, int id, + * offset from the first lane. + */ + if (cfg->is_dual_lane_phy) { +- qphy->tx2 = of_iomap(np, 3); +- qphy->rx2 = of_iomap(np, 4); +- if (!qphy->tx2 || !qphy->rx2) { ++ qphy->tx2 = devm_of_iomap(dev, np, 3, NULL); ++ qphy->rx2 = devm_of_iomap(dev, np, 4, NULL); ++ if (IS_ERR(qphy->tx2) || IS_ERR(qphy->rx2)) { + dev_warn(dev, + "Underspecified device tree, falling back to legacy register regions\n"); + +@@ -5436,15 +5467,17 @@ int qcom_qmp_phy_create(struct device *dev, struct device_node *np, int id, + qphy->rx2 = qphy->rx + QMP_PHY_LEGACY_LANE_STRIDE; + + } else { +- qphy->pcs_misc = of_iomap(np, 5); ++ qphy->pcs_misc = devm_of_iomap(dev, np, 5, NULL); + } + + } else { +- qphy->pcs_misc = of_iomap(np, 3); ++ qphy->pcs_misc = devm_of_iomap(dev, np, 3, NULL); + } + +- if (!qphy->pcs_misc) ++ if (IS_ERR(qphy->pcs_misc)) { + dev_vdbg(dev, "PHY pcs_misc-reg not used\n"); ++ qphy->pcs_misc = NULL; ++ } + + /* + * Get PHY's Pipe clock, if any. USB3 and PCIe are PIPE3 +@@ -5454,7 +5487,7 @@ int qcom_qmp_phy_create(struct device *dev, struct device_node *np, int id, * all phys that don't need this. */ snprintf(prop_name, sizeof(prop_name), "pipe%d", id); @@ -238205,7 +288382,7 @@ index f14032170b1c1..ed69d455ac0ea 100644 if (IS_ERR(qphy->pipe_clk)) { if (cfg->type == PHY_TYPE_PCIE || cfg->type == PHY_TYPE_USB3) { -@@ -5476,6 +5481,10 @@ int qcom_qmp_phy_create(struct device *dev, struct device_node *np, int id, +@@ -5476,6 +5509,10 @@ int qcom_qmp_phy_create(struct device *dev, struct device_node *np, int id, dev_err(dev, "failed to get lane%d reset\n", id); return PTR_ERR(qphy->lane_rst); } @@ -238216,6 +288393,47 @@ index f14032170b1c1..ed69d455ac0ea 100644 } if (cfg->type == PHY_TYPE_UFS || cfg->type == PHY_TYPE_PCIE) +@@ -5731,7 +5768,9 @@ static int qcom_qmp_phy_probe(struct platform_device *pdev) + return -ENOMEM; + + pm_runtime_set_active(dev); +- pm_runtime_enable(dev); ++ ret = devm_pm_runtime_enable(dev); ++ if (ret) ++ return ret; + /* + * Prevent runtime pm from being ON by default. Users can enable + * it using power/control in sysfs. +@@ -5756,6 +5795,9 @@ static int qcom_qmp_phy_probe(struct platform_device *pdev) + goto err_node_put; + } + ++ if (cfg->type != PHY_TYPE_DP) ++ qmp->usb_phy = qmp->phys[id]; ++ + /* + * Register the pipe clock provided by phy. + * See function description to see details of this pipe clock. +@@ -5778,16 +5820,16 @@ static int qcom_qmp_phy_probe(struct platform_device *pdev) + id++; + } + ++ if (!qmp->usb_phy) ++ return -EINVAL; ++ + phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); + if (!IS_ERR(phy_provider)) + dev_info(dev, "Registered Qcom-QMP phy\n"); +- else +- pm_runtime_disable(dev); + + return PTR_ERR_OR_ZERO(phy_provider); + + err_node_put: +- pm_runtime_disable(dev); + of_node_put(child); + return ret; + } diff --git a/drivers/phy/qualcomm/phy-qcom-qusb2.c b/drivers/phy/qualcomm/phy-qcom-qusb2.c index 3c1d3b71c825b..f1d97fbd13318 100644 --- a/drivers/phy/qualcomm/phy-qcom-qusb2.c @@ -238289,6 +288507,20 @@ index 04d18d52f700d..d4741c2dbbb56 100644 ret = pinctrl_select_state(uphy->pctl, pins_default); if (ret) +diff --git a/drivers/phy/ralink/phy-mt7621-pci.c b/drivers/phy/ralink/phy-mt7621-pci.c +index 5e6530f545b5c..85888ab2d307a 100644 +--- a/drivers/phy/ralink/phy-mt7621-pci.c ++++ b/drivers/phy/ralink/phy-mt7621-pci.c +@@ -280,7 +280,8 @@ static struct phy *mt7621_pcie_phy_of_xlate(struct device *dev, + } + + static const struct soc_device_attribute mt7621_pci_quirks_match[] = { +- { .soc_id = "mt7621", .revision = "E2" } ++ { .soc_id = "mt7621", .revision = "E2" }, ++ { /* sentinel */ } + }; + + static const struct regmap_config mt7621_pci_phy_regmap_config = { diff --git a/drivers/phy/rockchip/phy-rockchip-inno-dsidphy.c b/drivers/phy/rockchip/phy-rockchip-inno-dsidphy.c index 347dc79a18c18..630e01b5c19b9 100644 --- a/drivers/phy/rockchip/phy-rockchip-inno-dsidphy.c @@ -238310,6 +288542,22 @@ index 347dc79a18c18..630e01b5c19b9 100644 /* * The value of counter for HS Tlpx Time +diff --git a/drivers/phy/rockchip/phy-rockchip-inno-usb2.c b/drivers/phy/rockchip/phy-rockchip-inno-usb2.c +index 4f569d9307b9e..c167b8c5cc860 100644 +--- a/drivers/phy/rockchip/phy-rockchip-inno-usb2.c ++++ b/drivers/phy/rockchip/phy-rockchip-inno-usb2.c +@@ -467,8 +467,10 @@ static int rockchip_usb2phy_power_on(struct phy *phy) + return ret; + + ret = property_enable(base, &rport->port_cfg->phy_sus, false); +- if (ret) ++ if (ret) { ++ clk_disable_unprepare(rphy->clk480m); + return ret; ++ } + + /* waiting for the utmi_clk to become stable */ + usleep_range(1500, 2000); diff --git a/drivers/phy/samsung/phy-exynos-pcie.c b/drivers/phy/samsung/phy-exynos-pcie.c index 578cfe07d07ab..53c9230c29078 100644 --- a/drivers/phy/samsung/phy-exynos-pcie.c @@ -238491,7 +288739,7 @@ index 6700645bcbe6b..3b5ffc16a6947 100644 uniphier_u3ssphy_testio_write(priv, val); uniphier_u3ssphy_testio_write(priv, val | TESTI_WR_EN); diff --git a/drivers/phy/st/phy-stm32-usbphyc.c b/drivers/phy/st/phy-stm32-usbphyc.c -index 937a14fa7448a..cd0747ab62677 100644 +index 937a14fa7448a..27f7e2292cf0b 100644 --- a/drivers/phy/st/phy-stm32-usbphyc.c +++ b/drivers/phy/st/phy-stm32-usbphyc.c @@ -225,7 +225,7 @@ static int stm32_usbphyc_pll_enable(struct stm32_usbphyc *usbphyc) @@ -238514,6 +288762,37 @@ index 937a14fa7448a..cd0747ab62677 100644 } static int stm32_usbphyc_phy_exit(struct phy *phy) +@@ -530,6 +532,8 @@ static int stm32_usbphyc_probe(struct platform_device *pdev) + ret = of_property_read_u32(child, "reg", &index); + if (ret || index > usbphyc->nphys) { + dev_err(&phy->dev, "invalid reg property: %d\n", ret); ++ if (!ret) ++ ret = -EINVAL; + goto put_child; + } + +diff --git a/drivers/phy/ti/Kconfig b/drivers/phy/ti/Kconfig +index 15a3bcf323086..b905902d57508 100644 +--- a/drivers/phy/ti/Kconfig ++++ b/drivers/phy/ti/Kconfig +@@ -23,7 +23,7 @@ config PHY_DM816X_USB + + config PHY_AM654_SERDES + tristate "TI AM654 SERDES support" +- depends on OF && ARCH_K3 || COMPILE_TEST ++ depends on OF && (ARCH_K3 || COMPILE_TEST) + depends on COMMON_CLK + select GENERIC_PHY + select MULTIPLEXER +@@ -35,7 +35,7 @@ config PHY_AM654_SERDES + + config PHY_J721E_WIZ + tristate "TI J721E WIZ (SERDES Wrapper) support" +- depends on OF && ARCH_K3 || COMPILE_TEST ++ depends on OF && (ARCH_K3 || COMPILE_TEST) + depends on HAS_IOMEM && OF_ADDRESS + depends on COMMON_CLK + select GENERIC_PHY diff --git a/drivers/phy/ti/phy-am654-serdes.c b/drivers/phy/ti/phy-am654-serdes.c index 2ff56ce77b307..21c0088f5ca9e 100644 --- a/drivers/phy/ti/phy-am654-serdes.c @@ -238641,9 +288920,18 @@ index a3fa03bcd9a30..54064714d73fb 100644 ASPEED_PINCTRL_GROUP(GPIT0), ASPEED_PINCTRL_GROUP(GPIT1), diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed.c b/drivers/pinctrl/aspeed/pinctrl-aspeed.c -index c94e24aadf922..83d47ff1cea8f 100644 +index c94e24aadf922..5a12fc7cf91fb 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed.c +@@ -122,7 +122,7 @@ static int aspeed_disable_sig(struct aspeed_pinmux_data *ctx, + int ret = 0; + + if (!exprs) +- return true; ++ return -EINVAL; + + while (*exprs && !ret) { + ret = aspeed_sig_expr_disable(ctx, *exprs); @@ -236,11 +236,11 @@ int aspeed_pinmux_set_mux(struct pinctrl_dev *pctldev, unsigned int function, const struct aspeed_sig_expr **funcs; const struct aspeed_sig_expr ***prios; @@ -238810,11 +289098,40 @@ index 5082102d7d0d9..ffe39336fcaca 100644 mutex_destroy(&pctldev->mutex); kfree(pctldev); +diff --git a/drivers/pinctrl/devicetree.c b/drivers/pinctrl/devicetree.c +index 3fb2387147189..eac55fee5281c 100644 +--- a/drivers/pinctrl/devicetree.c ++++ b/drivers/pinctrl/devicetree.c +@@ -220,6 +220,8 @@ int pinctrl_dt_to_map(struct pinctrl *p, struct pinctrl_dev *pctldev) + for (state = 0; ; state++) { + /* Retrieve the pinctrl-* property */ + propname = kasprintf(GFP_KERNEL, "pinctrl-%d", state); ++ if (!propname) ++ return -ENOMEM; + prop = of_find_property(np, propname, &size); + kfree(propname); + if (!prop) { diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c -index 85750974d1825..48f55991ae8cc 100644 +index 85750974d1825..cc64eda155f57 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c -@@ -451,8 +451,8 @@ static void intel_gpio_set_gpio_mode(void __iomem *padcfg0) +@@ -436,9 +436,14 @@ static void __intel_gpio_set_direction(void __iomem *padcfg0, bool input) + writel(value, padcfg0); + } + ++static int __intel_gpio_get_gpio_mode(u32 value) ++{ ++ return (value & PADCFG0_PMODE_MASK) >> PADCFG0_PMODE_SHIFT; ++} ++ + static int intel_gpio_get_gpio_mode(void __iomem *padcfg0) + { +- return (readl(padcfg0) & PADCFG0_PMODE_MASK) >> PADCFG0_PMODE_SHIFT; ++ return __intel_gpio_get_gpio_mode(readl(padcfg0)); + } + + static void intel_gpio_set_gpio_mode(void __iomem *padcfg0) +@@ -451,8 +456,8 @@ static void intel_gpio_set_gpio_mode(void __iomem *padcfg0) value &= ~PADCFG0_PMODE_MASK; value |= PADCFG0_PMODE_GPIO; @@ -238825,7 +289142,7 @@ index 85750974d1825..48f55991ae8cc 100644 value |= PADCFG0_GPIOTXDIS; /* Disable SCI/SMI/NMI generation */ -@@ -497,9 +497,6 @@ static int intel_gpio_request_enable(struct pinctrl_dev *pctldev, +@@ -497,9 +502,6 @@ static int intel_gpio_request_enable(struct pinctrl_dev *pctldev, intel_gpio_set_gpio_mode(padcfg0); @@ -238835,7 +289152,7 @@ index 85750974d1825..48f55991ae8cc 100644 raw_spin_unlock_irqrestore(&pctrl->lock, flags); return 0; -@@ -1115,9 +1112,6 @@ static int intel_gpio_irq_type(struct irq_data *d, unsigned int type) +@@ -1115,9 +1117,6 @@ static int intel_gpio_irq_type(struct irq_data *d, unsigned int type) intel_gpio_set_gpio_mode(reg); @@ -238845,7 +289162,7 @@ index 85750974d1825..48f55991ae8cc 100644 value = readl(reg); value &= ~(PADCFG0_RXEVCFG_MASK | PADCFG0_RXINV); -@@ -1216,6 +1210,39 @@ static irqreturn_t intel_gpio_irq(int irq, void *data) +@@ -1216,6 +1215,39 @@ static irqreturn_t intel_gpio_irq(int irq, void *data) return IRQ_RETVAL(ret); } @@ -238885,7 +289202,7 @@ index 85750974d1825..48f55991ae8cc 100644 static int intel_gpio_add_community_ranges(struct intel_pinctrl *pctrl, const struct intel_community *community) { -@@ -1320,6 +1347,7 @@ static int intel_gpio_probe(struct intel_pinctrl *pctrl, int irq) +@@ -1320,6 +1352,7 @@ static int intel_gpio_probe(struct intel_pinctrl *pctrl, int irq) girq->num_parents = 0; girq->default_type = IRQ_TYPE_NONE; girq->handler = handle_bad_irq; @@ -238893,7 +289210,7 @@ index 85750974d1825..48f55991ae8cc 100644 ret = devm_gpiochip_add_data(pctrl->dev, &pctrl->chip, pctrl); if (ret) { -@@ -1598,16 +1626,14 @@ EXPORT_SYMBOL_GPL(intel_pinctrl_probe_by_uid); +@@ -1598,16 +1631,14 @@ EXPORT_SYMBOL_GPL(intel_pinctrl_probe_by_uid); const struct intel_pinctrl_soc_data *intel_pinctrl_get_soc_data(struct platform_device *pdev) { @@ -238915,7 +289232,7 @@ index 85750974d1825..48f55991ae8cc 100644 for (i = 0; table[i]; i++) { if (!strcmp(adev->pnp.unique_id, table[i]->uid)) { data = table[i]; -@@ -1621,7 +1647,7 @@ const struct intel_pinctrl_soc_data *intel_pinctrl_get_soc_data(struct platform_ +@@ -1621,7 +1652,7 @@ const struct intel_pinctrl_soc_data *intel_pinctrl_get_soc_data(struct platform_ if (!id) return ERR_PTR(-ENODEV); @@ -238924,7 +289241,49 @@ index 85750974d1825..48f55991ae8cc 100644 data = table[pdev->id]; } -@@ -1695,26 +1721,6 @@ int intel_pinctrl_suspend_noirq(struct device *dev) +@@ -1630,9 +1661,16 @@ const struct intel_pinctrl_soc_data *intel_pinctrl_get_soc_data(struct platform_ + EXPORT_SYMBOL_GPL(intel_pinctrl_get_soc_data); + + #ifdef CONFIG_PM_SLEEP ++static bool __intel_gpio_is_direct_irq(u32 value) ++{ ++ return (value & PADCFG0_GPIROUTIOXAPIC) && (value & PADCFG0_GPIOTXDIS) && ++ (__intel_gpio_get_gpio_mode(value) == PADCFG0_PMODE_GPIO); ++} ++ + static bool intel_pinctrl_should_save(struct intel_pinctrl *pctrl, unsigned int pin) + { + const struct pin_desc *pd = pin_desc_get(pctrl->pctldev, pin); ++ u32 value; + + if (!pd || !intel_pad_usable(pctrl, pin)) + return false; +@@ -1647,6 +1685,24 @@ static bool intel_pinctrl_should_save(struct intel_pinctrl *pctrl, unsigned int + gpiochip_line_is_irq(&pctrl->chip, intel_pin_to_gpio(pctrl, pin))) + return true; + ++ /* ++ * The firmware on some systems may configure GPIO pins to be ++ * an interrupt source in so called "direct IRQ" mode. In such ++ * cases the GPIO controller driver has no idea if those pins ++ * are being used or not. At the same time, there is a known bug ++ * in the firmwares that don't restore the pin settings correctly ++ * after suspend, i.e. by an unknown reason the Rx value becomes ++ * inverted. ++ * ++ * Hence, let's save and restore the pins that are configured ++ * as GPIOs in the input mode with GPIROUTIOXAPIC bit set. ++ * ++ * See https://bugzilla.kernel.org/show_bug.cgi?id=214749. ++ */ ++ value = readl(intel_get_padcfg(pctrl, pin, PADCFG0)); ++ if (__intel_gpio_is_direct_irq(value)) ++ return true; ++ + return false; + } + +@@ -1695,26 +1751,6 @@ int intel_pinctrl_suspend_noirq(struct device *dev) } EXPORT_SYMBOL_GPL(intel_pinctrl_suspend_noirq); @@ -238951,6 +289310,20 @@ index 85750974d1825..48f55991ae8cc 100644 static bool intel_gpio_update_reg(void __iomem *reg, u32 mask, u32 value) { u32 curr, updated; +@@ -1794,7 +1830,12 @@ int intel_pinctrl_resume_noirq(struct device *dev) + for (i = 0; i < pctrl->soc->npins; i++) { + const struct pinctrl_pin_desc *desc = &pctrl->soc->pins[i]; + +- if (!intel_pinctrl_should_save(pctrl, desc->number)) ++ if (!(intel_pinctrl_should_save(pctrl, desc->number) || ++ /* ++ * If the firmware mangled the register contents too much, ++ * check the saved value for the Direct IRQ mode. ++ */ ++ __intel_gpio_is_direct_irq(pads[i].padcfg0))) + continue; + + intel_restore_padcfg(pctrl, desc->number, PADCFG0, pads[i].padcfg0); diff --git a/drivers/pinctrl/intel/pinctrl-tigerlake.c b/drivers/pinctrl/intel/pinctrl-tigerlake.c index 0bcd19597e4ad..3ddaeffc04150 100644 --- a/drivers/pinctrl/intel/pinctrl-tigerlake.c @@ -238983,6 +289356,51 @@ index 7040a7a7bd5d1..8a1706c8bb6ec 100644 select PINCTRL_MTK_PARIS config PINCTRL_MT8365 +diff --git a/drivers/pinctrl/mediatek/mtk-eint.c b/drivers/pinctrl/mediatek/mtk-eint.c +index f7b54a5517641..c24583bffa99d 100644 +--- a/drivers/pinctrl/mediatek/mtk-eint.c ++++ b/drivers/pinctrl/mediatek/mtk-eint.c +@@ -287,12 +287,15 @@ static struct irq_chip mtk_eint_irq_chip = { + + static unsigned int mtk_eint_hw_init(struct mtk_eint *eint) + { +- void __iomem *reg = eint->base + eint->regs->dom_en; ++ void __iomem *dom_en = eint->base + eint->regs->dom_en; ++ void __iomem *mask_set = eint->base + eint->regs->mask_set; + unsigned int i; + + for (i = 0; i < eint->hw->ap_num; i += 32) { +- writel(0xffffffff, reg); +- reg += 4; ++ writel(0xffffffff, dom_en); ++ writel(0xffffffff, mask_set); ++ dom_en += 4; ++ mask_set += 4; + } + + return 0; +diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8195.c b/drivers/pinctrl/mediatek/pinctrl-mt8195.c +index a7500e18bb1de..c32884fc7de79 100644 +--- a/drivers/pinctrl/mediatek/pinctrl-mt8195.c ++++ b/drivers/pinctrl/mediatek/pinctrl-mt8195.c +@@ -659,7 +659,7 @@ static const struct mtk_pin_field_calc mt8195_pin_drv_range[] = { + PIN_FIELD_BASE(10, 10, 4, 0x010, 0x10, 9, 3), + PIN_FIELD_BASE(11, 11, 4, 0x000, 0x10, 24, 3), + PIN_FIELD_BASE(12, 12, 4, 0x010, 0x10, 12, 3), +- PIN_FIELD_BASE(13, 13, 4, 0x010, 0x10, 27, 3), ++ PIN_FIELD_BASE(13, 13, 4, 0x000, 0x10, 27, 3), + PIN_FIELD_BASE(14, 14, 4, 0x010, 0x10, 15, 3), + PIN_FIELD_BASE(15, 15, 4, 0x010, 0x10, 0, 3), + PIN_FIELD_BASE(16, 16, 4, 0x010, 0x10, 18, 3), +@@ -708,7 +708,7 @@ static const struct mtk_pin_field_calc mt8195_pin_drv_range[] = { + PIN_FIELD_BASE(78, 78, 3, 0x000, 0x10, 15, 3), + PIN_FIELD_BASE(79, 79, 3, 0x000, 0x10, 18, 3), + PIN_FIELD_BASE(80, 80, 3, 0x000, 0x10, 21, 3), +- PIN_FIELD_BASE(81, 81, 3, 0x000, 0x10, 28, 3), ++ PIN_FIELD_BASE(81, 81, 3, 0x000, 0x10, 24, 3), + PIN_FIELD_BASE(82, 82, 3, 0x000, 0x10, 27, 3), + PIN_FIELD_BASE(83, 83, 3, 0x010, 0x10, 0, 3), + PIN_FIELD_BASE(84, 84, 3, 0x010, 0x10, 3, 3), diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8365.c b/drivers/pinctrl/mediatek/pinctrl-mt8365.c index 79b1fee5a1eba..ddee0db72d264 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt8365.c @@ -239399,7 +289817,7 @@ index 39828e9c3120a..6dd930a839ecc 100644 if (version == PINCTRL_NMK_STN8815) { dev_info(&pdev->dev, diff --git a/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c b/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c -index 4d81908d6725d..41136f63014a4 100644 +index 4d81908d6725d..e4a0d16b58cc8 100644 --- a/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c +++ b/drivers/pinctrl/nuvoton/pinctrl-npcm7xx.c @@ -78,7 +78,6 @@ struct npcm7xx_gpio { @@ -239410,6 +289828,36 @@ index 4d81908d6725d..41136f63014a4 100644 struct irq_chip irq_chip; u32 pinctrl_id; int (*direction_input)(struct gpio_chip *chip, unsigned offset); +@@ -105,12 +104,12 @@ static void npcm_gpio_set(struct gpio_chip *gc, void __iomem *reg, + unsigned long flags; + unsigned long val; + +- spin_lock_irqsave(&gc->bgpio_lock, flags); ++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags); + + val = ioread32(reg) | pinmask; + iowrite32(val, reg); + +- spin_unlock_irqrestore(&gc->bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags); + } + + static void npcm_gpio_clr(struct gpio_chip *gc, void __iomem *reg, +@@ -119,12 +118,12 @@ static void npcm_gpio_clr(struct gpio_chip *gc, void __iomem *reg, + unsigned long flags; + unsigned long val; + +- spin_lock_irqsave(&gc->bgpio_lock, flags); ++ raw_spin_lock_irqsave(&gc->bgpio_lock, flags); + + val = ioread32(reg) & ~pinmask; + iowrite32(val, reg); + +- spin_unlock_irqrestore(&gc->bgpio_lock, flags); ++ raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags); + } + + static void npcmgpio_dbg_show(struct seq_file *s, struct gpio_chip *chip) @@ -226,7 +225,7 @@ static void npcmgpio_irq_handler(struct irq_desc *desc) chained_irq_enter(chip, desc); sts = ioread32(bank->base + NPCM7XX_GP_N_EVST); @@ -239761,7 +290209,7 @@ index 4d81908d6725d..41136f63014a4 100644 } diff --git a/drivers/pinctrl/pinconf-generic.c b/drivers/pinctrl/pinconf-generic.c -index 22e8d4c4040e1..b1db28007986e 100644 +index 22e8d4c4040e1..e6fe1330eab9f 100644 --- a/drivers/pinctrl/pinconf-generic.c +++ b/drivers/pinctrl/pinconf-generic.c @@ -30,10 +30,10 @@ static const struct pin_config_item conf_items[] = { @@ -239778,8 +290226,20 @@ index 22e8d4c4040e1..b1db28007986e 100644 PCONFDUMP(PIN_CONFIG_DRIVE_OPEN_DRAIN, "output drive open drain", NULL, false), PCONFDUMP(PIN_CONFIG_DRIVE_OPEN_SOURCE, "output drive open source", NULL, false), PCONFDUMP(PIN_CONFIG_DRIVE_PUSH_PULL, "output drive push pull", NULL, false), +@@ -393,8 +393,10 @@ int pinconf_generic_dt_node_to_map(struct pinctrl_dev *pctldev, + for_each_available_child_of_node(np_config, np) { + ret = pinconf_generic_dt_subnode_to_map(pctldev, np, map, + &reserved_maps, num_maps, type); +- if (ret < 0) ++ if (ret < 0) { ++ of_node_put(np); + goto exit; ++ } + } + return 0; + diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c -index bae9d429b813e..5c4acf2308d4f 100644 +index bae9d429b813e..52d1fe5ec3e74 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -598,14 +598,14 @@ static struct irq_chip amd_gpio_irqchip = { @@ -239799,22 +290259,24 @@ index bae9d429b813e..5c4acf2308d4f 100644 u32 regval; u64 status, mask; -@@ -627,6 +627,14 @@ static irqreturn_t amd_gpio_irq_handler(int irq, void *dev_id) +@@ -627,6 +627,16 @@ static irqreturn_t amd_gpio_irq_handler(int irq, void *dev_id) /* Each status bit covers four pins */ for (i = 0; i < 4; i++) { regval = readl(regs + i); -+ /* caused wake on resume context for shared IRQ */ -+ if (irq < 0 && (regval & BIT(WAKE_STS_OFF))) { ++ ++ if (regval & PIN_IRQ_PENDING) + dev_dbg(&gpio_dev->pdev->dev, -+ "Waking due to GPIO %d: 0x%x", ++ "GPIO %d is active: 0x%x", + irqnr + i, regval); ++ ++ /* caused wake on resume context for shared IRQ */ ++ if (irq < 0 && (regval & BIT(WAKE_STS_OFF))) + return true; -+ } + if (!(regval & PIN_IRQ_PENDING) || !(regval & BIT(INTERRUPT_MASK_OFF))) continue; -@@ -650,9 +658,12 @@ static irqreturn_t amd_gpio_irq_handler(int irq, void *dev_id) +@@ -650,9 +660,12 @@ static irqreturn_t amd_gpio_irq_handler(int irq, void *dev_id) } writel(regval, regs + i); raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); @@ -239828,7 +290290,7 @@ index bae9d429b813e..5c4acf2308d4f 100644 /* Signal EOI to the GPIO unit */ raw_spin_lock_irqsave(&gpio_dev->lock, flags); -@@ -664,6 +675,16 @@ static irqreturn_t amd_gpio_irq_handler(int irq, void *dev_id) +@@ -664,6 +677,16 @@ static irqreturn_t amd_gpio_irq_handler(int irq, void *dev_id) return ret; } @@ -239845,7 +290307,7 @@ index bae9d429b813e..5c4acf2308d4f 100644 static int amd_get_groups_count(struct pinctrl_dev *pctldev) { struct amd_gpio *gpio_dev = pinctrl_dev_get_drvdata(pctldev); -@@ -891,6 +912,7 @@ static int amd_gpio_suspend(struct device *dev) +@@ -891,6 +914,7 @@ static int amd_gpio_suspend(struct device *dev) { struct amd_gpio *gpio_dev = dev_get_drvdata(dev); struct pinctrl_desc *desc = gpio_dev->pctrl->desc; @@ -239853,7 +290315,7 @@ index bae9d429b813e..5c4acf2308d4f 100644 int i; for (i = 0; i < desc->npins; i++) { -@@ -899,7 +921,9 @@ static int amd_gpio_suspend(struct device *dev) +@@ -899,7 +923,9 @@ static int amd_gpio_suspend(struct device *dev) if (!amd_gpio_should_save(gpio_dev, pin)) continue; @@ -239864,7 +290326,7 @@ index bae9d429b813e..5c4acf2308d4f 100644 } return 0; -@@ -909,6 +933,7 @@ static int amd_gpio_resume(struct device *dev) +@@ -909,6 +935,7 @@ static int amd_gpio_resume(struct device *dev) { struct amd_gpio *gpio_dev = dev_get_drvdata(dev); struct pinctrl_desc *desc = gpio_dev->pctrl->desc; @@ -239872,7 +290334,7 @@ index bae9d429b813e..5c4acf2308d4f 100644 int i; for (i = 0; i < desc->npins; i++) { -@@ -917,7 +942,10 @@ static int amd_gpio_resume(struct device *dev) +@@ -917,7 +944,10 @@ static int amd_gpio_resume(struct device *dev) if (!amd_gpio_should_save(gpio_dev, pin)) continue; @@ -239884,7 +290346,7 @@ index bae9d429b813e..5c4acf2308d4f 100644 } return 0; -@@ -1033,6 +1061,7 @@ static int amd_gpio_probe(struct platform_device *pdev) +@@ -1033,6 +1063,7 @@ static int amd_gpio_probe(struct platform_device *pdev) goto out2; platform_set_drvdata(pdev, gpio_dev); @@ -239892,7 +290354,7 @@ index bae9d429b813e..5c4acf2308d4f 100644 dev_dbg(&pdev->dev, "amd gpio driver loaded\n"); return ret; -@@ -1050,6 +1079,7 @@ static int amd_gpio_remove(struct platform_device *pdev) +@@ -1050,6 +1081,7 @@ static int amd_gpio_remove(struct platform_device *pdev) gpio_dev = platform_get_drvdata(pdev); gpiochip_remove(&gpio_dev->gc); @@ -239926,7 +290388,7 @@ index fb713f9c53d0e..3f0143087cc77 100644 } diff --git a/drivers/pinctrl/pinctrl-ingenic.c b/drivers/pinctrl/pinctrl-ingenic.c -index 2712f51eb2381..fa6becca17889 100644 +index 2712f51eb2381..c973123e6de9e 100644 --- a/drivers/pinctrl/pinctrl-ingenic.c +++ b/drivers/pinctrl/pinctrl-ingenic.c @@ -119,6 +119,8 @@ struct ingenic_chip_info { @@ -239938,6 +290400,24 @@ index 2712f51eb2381..fa6becca17889 100644 }; struct ingenic_pinctrl { +@@ -641,7 +643,7 @@ static u8 jz4755_lcd_24bit_funcs[] = { 1, 1, 1, 1, 0, 0, }; + static const struct group_desc jz4755_groups[] = { + INGENIC_PIN_GROUP("uart0-data", jz4755_uart0_data, 0), + INGENIC_PIN_GROUP("uart0-hwflow", jz4755_uart0_hwflow, 0), +- INGENIC_PIN_GROUP("uart1-data", jz4755_uart1_data, 0), ++ INGENIC_PIN_GROUP("uart1-data", jz4755_uart1_data, 1), + INGENIC_PIN_GROUP("uart2-data", jz4755_uart2_data, 1), + INGENIC_PIN_GROUP("ssi-dt-b", jz4755_ssi_dt_b, 0), + INGENIC_PIN_GROUP("ssi-dt-f", jz4755_ssi_dt_f, 0), +@@ -695,7 +697,7 @@ static const char *jz4755_ssi_groups[] = { + "ssi-ce1-b", "ssi-ce1-f", + }; + static const char *jz4755_mmc0_groups[] = { "mmc0-1bit", "mmc0-4bit", }; +-static const char *jz4755_mmc1_groups[] = { "mmc0-1bit", "mmc0-4bit", }; ++static const char *jz4755_mmc1_groups[] = { "mmc1-1bit", "mmc1-4bit", }; + static const char *jz4755_i2c_groups[] = { "i2c-data", }; + static const char *jz4755_cim_groups[] = { "cim-data", }; + static const char *jz4755_lcd_groups[] = { @@ -2179,6 +2181,17 @@ static const struct function_desc x1000_functions[] = { { "mac", x1000_mac_groups, ARRAY_SIZE(x1000_mac_groups), }, }; @@ -240046,7 +290526,7 @@ index 2712f51eb2381..fa6becca17889 100644 jzpc->map = devm_regmap_init_mmio(dev, base, ®map_config); if (IS_ERR(jzpc->map)) { diff --git a/drivers/pinctrl/pinctrl-k210.c b/drivers/pinctrl/pinctrl-k210.c -index 49e32684dbb25..ecab6bf63dc6d 100644 +index 49e32684dbb25..ad4db99094a79 100644 --- a/drivers/pinctrl/pinctrl-k210.c +++ b/drivers/pinctrl/pinctrl-k210.c @@ -482,7 +482,7 @@ static int k210_pinconf_get_drive(unsigned int max_strength_ua) @@ -240067,6 +290547,18 @@ index 49e32684dbb25..ecab6bf63dc6d 100644 break; case PIN_CONFIG_DRIVE_STRENGTH: arg *= 1000; +@@ -862,8 +862,10 @@ static int k210_pinctrl_dt_node_to_map(struct pinctrl_dev *pctldev, + for_each_available_child_of_node(np_config, np) { + ret = k210_pinctrl_dt_subnode_to_map(pctldev, np, map, + &reserved_maps, num_maps); +- if (ret < 0) ++ if (ret < 0) { ++ of_node_put(np); + goto err; ++ } + } + return 0; + diff --git a/drivers/pinctrl/pinctrl-microchip-sgpio.c b/drivers/pinctrl/pinctrl-microchip-sgpio.c index 072bccdea2a5d..dfa374195694d 100644 --- a/drivers/pinctrl/pinctrl-microchip-sgpio.c @@ -240187,9 +290679,36 @@ index 8d271c6b0ca41..5de691c630b4f 100644 } diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c -index 5ce260f152ce5..bae6cc83ea362 100644 +index 5ce260f152ce5..c33cbf7568db5 100644 --- a/drivers/pinctrl/pinctrl-rockchip.c +++ b/drivers/pinctrl/pinctrl-rockchip.c +@@ -285,6 +285,7 @@ static int rockchip_dt_node_to_map(struct pinctrl_dev *pctldev, + { + struct rockchip_pinctrl *info = pinctrl_dev_get_drvdata(pctldev); + const struct rockchip_pin_group *grp; ++ struct device *dev = info->dev; + struct pinctrl_map *new_map; + struct device_node *parent; + int map_num = 1; +@@ -296,8 +297,7 @@ static int rockchip_dt_node_to_map(struct pinctrl_dev *pctldev, + */ + grp = pinctrl_name_to_group(info, np->name); + if (!grp) { +- dev_err(info->dev, "unable to find group for node %pOFn\n", +- np); ++ dev_err(dev, "unable to find group for node %pOFn\n", np); + return -EINVAL; + } + +@@ -331,7 +331,7 @@ static int rockchip_dt_node_to_map(struct pinctrl_dev *pctldev, + new_map[i].data.configs.num_configs = grp->data[i].nconfigs; + } + +- dev_dbg(pctldev->dev, "maps: function %s group %s num %d\n", ++ dev_dbg(dev, "maps: function %s group %s num %d\n", + (*map)->data.mux.function, (*map)->data.mux.group, map_num); + + return 0; @@ -455,95 +455,110 @@ static struct rockchip_mux_recalced_data rk3128_mux_recalced_data[] = { static struct rockchip_mux_recalced_data rk3308_mux_recalced_data[] = { @@ -240316,7 +290835,766 @@ index 5ce260f152ce5..bae6cc83ea362 100644 }, }; -@@ -2057,11 +2072,24 @@ static int rockchip_pmx_set(struct pinctrl_dev *pctldev, unsigned selector, +@@ -593,14 +608,54 @@ static void rockchip_get_recalced_mux(struct rockchip_pin_bank *bank, int pin, + } + + static struct rockchip_mux_route_data px30_mux_route_data[] = { ++ RK_MUXROUTE_SAME(2, RK_PB4, 1, 0x184, BIT(16 + 7)), /* cif-d0m0 */ ++ RK_MUXROUTE_SAME(3, RK_PA1, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d0m1 */ ++ RK_MUXROUTE_SAME(2, RK_PB6, 1, 0x184, BIT(16 + 7)), /* cif-d1m0 */ ++ RK_MUXROUTE_SAME(3, RK_PA2, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d1m1 */ + RK_MUXROUTE_SAME(2, RK_PA0, 1, 0x184, BIT(16 + 7)), /* cif-d2m0 */ + RK_MUXROUTE_SAME(3, RK_PA3, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d2m1 */ ++ RK_MUXROUTE_SAME(2, RK_PA1, 1, 0x184, BIT(16 + 7)), /* cif-d3m0 */ ++ RK_MUXROUTE_SAME(3, RK_PA5, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d3m1 */ ++ RK_MUXROUTE_SAME(2, RK_PA2, 1, 0x184, BIT(16 + 7)), /* cif-d4m0 */ ++ RK_MUXROUTE_SAME(3, RK_PA7, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d4m1 */ ++ RK_MUXROUTE_SAME(2, RK_PA3, 1, 0x184, BIT(16 + 7)), /* cif-d5m0 */ ++ RK_MUXROUTE_SAME(3, RK_PB0, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d5m1 */ ++ RK_MUXROUTE_SAME(2, RK_PA4, 1, 0x184, BIT(16 + 7)), /* cif-d6m0 */ ++ RK_MUXROUTE_SAME(3, RK_PB1, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d6m1 */ ++ RK_MUXROUTE_SAME(2, RK_PA5, 1, 0x184, BIT(16 + 7)), /* cif-d7m0 */ ++ RK_MUXROUTE_SAME(3, RK_PB4, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d7m1 */ ++ RK_MUXROUTE_SAME(2, RK_PA6, 1, 0x184, BIT(16 + 7)), /* cif-d8m0 */ ++ RK_MUXROUTE_SAME(3, RK_PB6, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d8m1 */ ++ RK_MUXROUTE_SAME(2, RK_PA7, 1, 0x184, BIT(16 + 7)), /* cif-d9m0 */ ++ RK_MUXROUTE_SAME(3, RK_PB7, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d9m1 */ ++ RK_MUXROUTE_SAME(2, RK_PB7, 1, 0x184, BIT(16 + 7)), /* cif-d10m0 */ ++ RK_MUXROUTE_SAME(3, RK_PC6, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d10m1 */ ++ RK_MUXROUTE_SAME(2, RK_PC0, 1, 0x184, BIT(16 + 7)), /* cif-d11m0 */ ++ RK_MUXROUTE_SAME(3, RK_PC7, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d11m1 */ ++ RK_MUXROUTE_SAME(2, RK_PB0, 1, 0x184, BIT(16 + 7)), /* cif-vsyncm0 */ ++ RK_MUXROUTE_SAME(3, RK_PD1, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-vsyncm1 */ ++ RK_MUXROUTE_SAME(2, RK_PB1, 1, 0x184, BIT(16 + 7)), /* cif-hrefm0 */ ++ RK_MUXROUTE_SAME(3, RK_PD2, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-hrefm1 */ ++ RK_MUXROUTE_SAME(2, RK_PB2, 1, 0x184, BIT(16 + 7)), /* cif-clkinm0 */ ++ RK_MUXROUTE_SAME(3, RK_PD3, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-clkinm1 */ ++ RK_MUXROUTE_SAME(2, RK_PB3, 1, 0x184, BIT(16 + 7)), /* cif-clkoutm0 */ ++ RK_MUXROUTE_SAME(3, RK_PD0, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-clkoutm1 */ + RK_MUXROUTE_SAME(3, RK_PC6, 2, 0x184, BIT(16 + 8)), /* pdm-m0 */ + RK_MUXROUTE_SAME(2, RK_PC6, 1, 0x184, BIT(16 + 8) | BIT(8)), /* pdm-m1 */ ++ RK_MUXROUTE_SAME(3, RK_PD3, 2, 0x184, BIT(16 + 8)), /* pdm-sdi0m0 */ ++ RK_MUXROUTE_SAME(2, RK_PC5, 2, 0x184, BIT(16 + 8) | BIT(8)), /* pdm-sdi0m1 */ + RK_MUXROUTE_SAME(1, RK_PD3, 2, 0x184, BIT(16 + 10)), /* uart2-rxm0 */ + RK_MUXROUTE_SAME(2, RK_PB6, 2, 0x184, BIT(16 + 10) | BIT(10)), /* uart2-rxm1 */ ++ RK_MUXROUTE_SAME(1, RK_PD2, 2, 0x184, BIT(16 + 10)), /* uart2-txm0 */ ++ RK_MUXROUTE_SAME(2, RK_PB4, 2, 0x184, BIT(16 + 10) | BIT(10)), /* uart2-txm1 */ + RK_MUXROUTE_SAME(0, RK_PC1, 2, 0x184, BIT(16 + 9)), /* uart3-rxm0 */ + RK_MUXROUTE_SAME(1, RK_PB7, 2, 0x184, BIT(16 + 9) | BIT(9)), /* uart3-rxm1 */ ++ RK_MUXROUTE_SAME(0, RK_PC0, 2, 0x184, BIT(16 + 9)), /* uart3-txm0 */ ++ RK_MUXROUTE_SAME(1, RK_PB6, 2, 0x184, BIT(16 + 9) | BIT(9)), /* uart3-txm1 */ ++ RK_MUXROUTE_SAME(0, RK_PC2, 2, 0x184, BIT(16 + 9)), /* uart3-ctsm0 */ ++ RK_MUXROUTE_SAME(1, RK_PB4, 2, 0x184, BIT(16 + 9) | BIT(9)), /* uart3-ctsm1 */ ++ RK_MUXROUTE_SAME(0, RK_PC3, 2, 0x184, BIT(16 + 9)), /* uart3-rtsm0 */ ++ RK_MUXROUTE_SAME(1, RK_PB5, 2, 0x184, BIT(16 + 9) | BIT(9)), /* uart3-rtsm1 */ + }; + + static struct rockchip_mux_route_data rk3128_mux_route_data[] = { +@@ -703,19 +758,19 @@ static struct rockchip_mux_route_data rk3568_mux_route_data[] = { + RK_MUXROUTE_PMU(0, RK_PB5, 4, 0x0110, WRITE_MASK_VAL(3, 2, 1)), /* PWM1 IO mux M1 */ + RK_MUXROUTE_PMU(0, RK_PC1, 1, 0x0110, WRITE_MASK_VAL(5, 4, 0)), /* PWM2 IO mux M0 */ + RK_MUXROUTE_PMU(0, RK_PB6, 4, 0x0110, WRITE_MASK_VAL(5, 4, 1)), /* PWM2 IO mux M1 */ +- RK_MUXROUTE_PMU(0, RK_PB3, 2, 0x0300, WRITE_MASK_VAL(0, 0, 0)), /* CAN0 IO mux M0 */ ++ RK_MUXROUTE_GRF(0, RK_PB3, 2, 0x0300, WRITE_MASK_VAL(0, 0, 0)), /* CAN0 IO mux M0 */ + RK_MUXROUTE_GRF(2, RK_PA1, 4, 0x0300, WRITE_MASK_VAL(0, 0, 1)), /* CAN0 IO mux M1 */ + RK_MUXROUTE_GRF(1, RK_PA1, 3, 0x0300, WRITE_MASK_VAL(2, 2, 0)), /* CAN1 IO mux M0 */ + RK_MUXROUTE_GRF(4, RK_PC3, 3, 0x0300, WRITE_MASK_VAL(2, 2, 1)), /* CAN1 IO mux M1 */ + RK_MUXROUTE_GRF(4, RK_PB5, 3, 0x0300, WRITE_MASK_VAL(4, 4, 0)), /* CAN2 IO mux M0 */ + RK_MUXROUTE_GRF(2, RK_PB2, 4, 0x0300, WRITE_MASK_VAL(4, 4, 1)), /* CAN2 IO mux M1 */ + RK_MUXROUTE_GRF(4, RK_PC4, 1, 0x0300, WRITE_MASK_VAL(6, 6, 0)), /* HPDIN IO mux M0 */ +- RK_MUXROUTE_PMU(0, RK_PC2, 2, 0x0300, WRITE_MASK_VAL(6, 6, 1)), /* HPDIN IO mux M1 */ ++ RK_MUXROUTE_GRF(0, RK_PC2, 2, 0x0300, WRITE_MASK_VAL(6, 6, 1)), /* HPDIN IO mux M1 */ + RK_MUXROUTE_GRF(3, RK_PB1, 3, 0x0300, WRITE_MASK_VAL(8, 8, 0)), /* GMAC1 IO mux M0 */ + RK_MUXROUTE_GRF(4, RK_PA7, 3, 0x0300, WRITE_MASK_VAL(8, 8, 1)), /* GMAC1 IO mux M1 */ + RK_MUXROUTE_GRF(4, RK_PD1, 1, 0x0300, WRITE_MASK_VAL(10, 10, 0)), /* HDMITX IO mux M0 */ +- RK_MUXROUTE_PMU(0, RK_PC7, 1, 0x0300, WRITE_MASK_VAL(10, 10, 1)), /* HDMITX IO mux M1 */ +- RK_MUXROUTE_PMU(0, RK_PB6, 1, 0x0300, WRITE_MASK_VAL(14, 14, 0)), /* I2C2 IO mux M0 */ ++ RK_MUXROUTE_GRF(0, RK_PC7, 1, 0x0300, WRITE_MASK_VAL(10, 10, 1)), /* HDMITX IO mux M1 */ ++ RK_MUXROUTE_GRF(0, RK_PB6, 1, 0x0300, WRITE_MASK_VAL(14, 14, 0)), /* I2C2 IO mux M0 */ + RK_MUXROUTE_GRF(4, RK_PB4, 1, 0x0300, WRITE_MASK_VAL(14, 14, 1)), /* I2C2 IO mux M1 */ + RK_MUXROUTE_GRF(1, RK_PA0, 1, 0x0304, WRITE_MASK_VAL(0, 0, 0)), /* I2C3 IO mux M0 */ + RK_MUXROUTE_GRF(3, RK_PB6, 4, 0x0304, WRITE_MASK_VAL(0, 0, 1)), /* I2C3 IO mux M1 */ +@@ -741,7 +796,7 @@ static struct rockchip_mux_route_data rk3568_mux_route_data[] = { + RK_MUXROUTE_GRF(4, RK_PC3, 1, 0x0308, WRITE_MASK_VAL(12, 12, 1)), /* PWM15 IO mux M1 */ + RK_MUXROUTE_GRF(3, RK_PD2, 3, 0x0308, WRITE_MASK_VAL(14, 14, 0)), /* SDMMC2 IO mux M0 */ + RK_MUXROUTE_GRF(3, RK_PA5, 5, 0x0308, WRITE_MASK_VAL(14, 14, 1)), /* SDMMC2 IO mux M1 */ +- RK_MUXROUTE_PMU(0, RK_PB5, 2, 0x030c, WRITE_MASK_VAL(0, 0, 0)), /* SPI0 IO mux M0 */ ++ RK_MUXROUTE_GRF(0, RK_PB5, 2, 0x030c, WRITE_MASK_VAL(0, 0, 0)), /* SPI0 IO mux M0 */ + RK_MUXROUTE_GRF(2, RK_PD3, 3, 0x030c, WRITE_MASK_VAL(0, 0, 1)), /* SPI0 IO mux M1 */ + RK_MUXROUTE_GRF(2, RK_PB5, 3, 0x030c, WRITE_MASK_VAL(2, 2, 0)), /* SPI1 IO mux M0 */ + RK_MUXROUTE_GRF(3, RK_PC3, 3, 0x030c, WRITE_MASK_VAL(2, 2, 1)), /* SPI1 IO mux M1 */ +@@ -750,8 +805,8 @@ static struct rockchip_mux_route_data rk3568_mux_route_data[] = { + RK_MUXROUTE_GRF(4, RK_PB3, 4, 0x030c, WRITE_MASK_VAL(6, 6, 0)), /* SPI3 IO mux M0 */ + RK_MUXROUTE_GRF(4, RK_PC2, 2, 0x030c, WRITE_MASK_VAL(6, 6, 1)), /* SPI3 IO mux M1 */ + RK_MUXROUTE_GRF(2, RK_PB4, 2, 0x030c, WRITE_MASK_VAL(8, 8, 0)), /* UART1 IO mux M0 */ +- RK_MUXROUTE_PMU(0, RK_PD1, 1, 0x030c, WRITE_MASK_VAL(8, 8, 1)), /* UART1 IO mux M1 */ +- RK_MUXROUTE_PMU(0, RK_PD1, 1, 0x030c, WRITE_MASK_VAL(10, 10, 0)), /* UART2 IO mux M0 */ ++ RK_MUXROUTE_GRF(3, RK_PD6, 4, 0x030c, WRITE_MASK_VAL(8, 8, 1)), /* UART1 IO mux M1 */ ++ RK_MUXROUTE_GRF(0, RK_PD1, 1, 0x030c, WRITE_MASK_VAL(10, 10, 0)), /* UART2 IO mux M0 */ + RK_MUXROUTE_GRF(1, RK_PD5, 2, 0x030c, WRITE_MASK_VAL(10, 10, 1)), /* UART2 IO mux M1 */ + RK_MUXROUTE_GRF(1, RK_PA1, 2, 0x030c, WRITE_MASK_VAL(12, 12, 0)), /* UART3 IO mux M0 */ + RK_MUXROUTE_GRF(3, RK_PB7, 4, 0x030c, WRITE_MASK_VAL(12, 12, 1)), /* UART3 IO mux M1 */ +@@ -781,13 +836,13 @@ static struct rockchip_mux_route_data rk3568_mux_route_data[] = { + RK_MUXROUTE_GRF(3, RK_PD6, 5, 0x0314, WRITE_MASK_VAL(1, 0, 1)), /* PDM IO mux M1 */ + RK_MUXROUTE_GRF(4, RK_PA0, 4, 0x0314, WRITE_MASK_VAL(1, 0, 1)), /* PDM IO mux M1 */ + RK_MUXROUTE_GRF(3, RK_PC4, 5, 0x0314, WRITE_MASK_VAL(1, 0, 2)), /* PDM IO mux M2 */ +- RK_MUXROUTE_PMU(0, RK_PA5, 3, 0x0314, WRITE_MASK_VAL(3, 2, 0)), /* PCIE20 IO mux M0 */ ++ RK_MUXROUTE_GRF(0, RK_PA5, 3, 0x0314, WRITE_MASK_VAL(3, 2, 0)), /* PCIE20 IO mux M0 */ + RK_MUXROUTE_GRF(2, RK_PD0, 4, 0x0314, WRITE_MASK_VAL(3, 2, 1)), /* PCIE20 IO mux M1 */ + RK_MUXROUTE_GRF(1, RK_PB0, 4, 0x0314, WRITE_MASK_VAL(3, 2, 2)), /* PCIE20 IO mux M2 */ +- RK_MUXROUTE_PMU(0, RK_PA4, 3, 0x0314, WRITE_MASK_VAL(5, 4, 0)), /* PCIE30X1 IO mux M0 */ ++ RK_MUXROUTE_GRF(0, RK_PA4, 3, 0x0314, WRITE_MASK_VAL(5, 4, 0)), /* PCIE30X1 IO mux M0 */ + RK_MUXROUTE_GRF(2, RK_PD2, 4, 0x0314, WRITE_MASK_VAL(5, 4, 1)), /* PCIE30X1 IO mux M1 */ + RK_MUXROUTE_GRF(1, RK_PA5, 4, 0x0314, WRITE_MASK_VAL(5, 4, 2)), /* PCIE30X1 IO mux M2 */ +- RK_MUXROUTE_PMU(0, RK_PA6, 2, 0x0314, WRITE_MASK_VAL(7, 6, 0)), /* PCIE30X2 IO mux M0 */ ++ RK_MUXROUTE_GRF(0, RK_PA6, 2, 0x0314, WRITE_MASK_VAL(7, 6, 0)), /* PCIE30X2 IO mux M0 */ + RK_MUXROUTE_GRF(2, RK_PD4, 4, 0x0314, WRITE_MASK_VAL(7, 6, 1)), /* PCIE30X2 IO mux M1 */ + RK_MUXROUTE_GRF(4, RK_PC2, 4, 0x0314, WRITE_MASK_VAL(7, 6, 2)), /* PCIE30X2 IO mux M2 */ + }; +@@ -872,20 +927,20 @@ static int rockchip_verify_mux(struct rockchip_pin_bank *bank, + int pin, int mux) + { + struct rockchip_pinctrl *info = bank->drvdata; ++ struct device *dev = info->dev; + int iomux_num = (pin / 8); + + if (iomux_num > 3) + return -EINVAL; + + if (bank->iomux[iomux_num].type & IOMUX_UNROUTED) { +- dev_err(info->dev, "pin %d is unrouted\n", pin); ++ dev_err(dev, "pin %d is unrouted\n", pin); + return -EINVAL; + } + + if (bank->iomux[iomux_num].type & IOMUX_GPIO_ONLY) { + if (mux != RK_FUNC_GPIO) { +- dev_err(info->dev, +- "pin %d only supports a gpio mux\n", pin); ++ dev_err(dev, "pin %d only supports a gpio mux\n", pin); + return -ENOTSUPP; + } + } +@@ -909,6 +964,7 @@ static int rockchip_verify_mux(struct rockchip_pin_bank *bank, + static int rockchip_set_mux(struct rockchip_pin_bank *bank, int pin, int mux) + { + struct rockchip_pinctrl *info = bank->drvdata; ++ struct device *dev = info->dev; + int iomux_num = (pin / 8); + struct regmap *regmap; + int reg, ret, mask, mux_type; +@@ -922,8 +978,7 @@ static int rockchip_set_mux(struct rockchip_pin_bank *bank, int pin, int mux) + if (bank->iomux[iomux_num].type & IOMUX_GPIO_ONLY) + return 0; + +- dev_dbg(info->dev, "setting mux of GPIO%d-%d to %d\n", +- bank->bank_num, pin, mux); ++ dev_dbg(dev, "setting mux of GPIO%d-%d to %d\n", bank->bank_num, pin, mux); + + regmap = (bank->iomux[iomux_num].type & IOMUX_SOURCE_PMU) + ? info->regmap_pmu : info->regmap_base; +@@ -984,9 +1039,9 @@ static int rockchip_set_mux(struct rockchip_pin_bank *bank, int pin, int mux) + #define PX30_PULL_PINS_PER_REG 8 + #define PX30_PULL_BANK_STRIDE 16 + +-static void px30_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, +- int pin_num, struct regmap **regmap, +- int *reg, u8 *bit) ++static int px30_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, ++ int pin_num, struct regmap **regmap, ++ int *reg, u8 *bit) + { + struct rockchip_pinctrl *info = bank->drvdata; + +@@ -1006,6 +1061,8 @@ static void px30_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, + *reg += ((pin_num / PX30_PULL_PINS_PER_REG) * 4); + *bit = (pin_num % PX30_PULL_PINS_PER_REG); + *bit *= PX30_PULL_BITS_PER_PIN; ++ ++ return 0; + } + + #define PX30_DRV_PMU_OFFSET 0x20 +@@ -1014,9 +1071,9 @@ static void px30_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, + #define PX30_DRV_PINS_PER_REG 8 + #define PX30_DRV_BANK_STRIDE 16 + +-static void px30_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank, +- int pin_num, struct regmap **regmap, +- int *reg, u8 *bit) ++static int px30_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank, ++ int pin_num, struct regmap **regmap, ++ int *reg, u8 *bit) + { + struct rockchip_pinctrl *info = bank->drvdata; + +@@ -1036,6 +1093,8 @@ static void px30_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank, + *reg += ((pin_num / PX30_DRV_PINS_PER_REG) * 4); + *bit = (pin_num % PX30_DRV_PINS_PER_REG); + *bit *= PX30_DRV_BITS_PER_PIN; ++ ++ return 0; + } + + #define PX30_SCHMITT_PMU_OFFSET 0x38 +@@ -1075,9 +1134,9 @@ static int px30_calc_schmitt_reg_and_bit(struct rockchip_pin_bank *bank, + #define RV1108_PULL_BITS_PER_PIN 2 + #define RV1108_PULL_BANK_STRIDE 16 + +-static void rv1108_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, +- int pin_num, struct regmap **regmap, +- int *reg, u8 *bit) ++static int rv1108_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, ++ int pin_num, struct regmap **regmap, ++ int *reg, u8 *bit) + { + struct rockchip_pinctrl *info = bank->drvdata; + +@@ -1096,6 +1155,8 @@ static void rv1108_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, + *reg += ((pin_num / RV1108_PULL_PINS_PER_REG) * 4); + *bit = (pin_num % RV1108_PULL_PINS_PER_REG); + *bit *= RV1108_PULL_BITS_PER_PIN; ++ ++ return 0; + } + + #define RV1108_DRV_PMU_OFFSET 0x20 +@@ -1104,9 +1165,9 @@ static void rv1108_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, + #define RV1108_DRV_PINS_PER_REG 8 + #define RV1108_DRV_BANK_STRIDE 16 + +-static void rv1108_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank, +- int pin_num, struct regmap **regmap, +- int *reg, u8 *bit) ++static int rv1108_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank, ++ int pin_num, struct regmap **regmap, ++ int *reg, u8 *bit) + { + struct rockchip_pinctrl *info = bank->drvdata; + +@@ -1126,6 +1187,8 @@ static void rv1108_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank, + *reg += ((pin_num / RV1108_DRV_PINS_PER_REG) * 4); + *bit = pin_num % RV1108_DRV_PINS_PER_REG; + *bit *= RV1108_DRV_BITS_PER_PIN; ++ ++ return 0; + } + + #define RV1108_SCHMITT_PMU_OFFSET 0x30 +@@ -1182,9 +1245,9 @@ static int rk3308_calc_schmitt_reg_and_bit(struct rockchip_pin_bank *bank, + #define RK2928_PULL_PINS_PER_REG 16 + #define RK2928_PULL_BANK_STRIDE 8 + +-static void rk2928_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, +- int pin_num, struct regmap **regmap, +- int *reg, u8 *bit) ++static int rk2928_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, ++ int pin_num, struct regmap **regmap, ++ int *reg, u8 *bit) + { + struct rockchip_pinctrl *info = bank->drvdata; + +@@ -1194,13 +1257,15 @@ static void rk2928_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, + *reg += (pin_num / RK2928_PULL_PINS_PER_REG) * 4; + + *bit = pin_num % RK2928_PULL_PINS_PER_REG; ++ ++ return 0; + }; + + #define RK3128_PULL_OFFSET 0x118 + +-static void rk3128_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, +- int pin_num, struct regmap **regmap, +- int *reg, u8 *bit) ++static int rk3128_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, ++ int pin_num, struct regmap **regmap, ++ int *reg, u8 *bit) + { + struct rockchip_pinctrl *info = bank->drvdata; + +@@ -1210,6 +1275,8 @@ static void rk3128_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, + *reg += ((pin_num / RK2928_PULL_PINS_PER_REG) * 4); + + *bit = pin_num % RK2928_PULL_PINS_PER_REG; ++ ++ return 0; + } + + #define RK3188_PULL_OFFSET 0x164 +@@ -1218,9 +1285,9 @@ static void rk3128_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, + #define RK3188_PULL_BANK_STRIDE 16 + #define RK3188_PULL_PMU_OFFSET 0x64 + +-static void rk3188_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, +- int pin_num, struct regmap **regmap, +- int *reg, u8 *bit) ++static int rk3188_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, ++ int pin_num, struct regmap **regmap, ++ int *reg, u8 *bit) + { + struct rockchip_pinctrl *info = bank->drvdata; + +@@ -1250,12 +1317,14 @@ static void rk3188_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, + *bit = 7 - (pin_num % RK3188_PULL_PINS_PER_REG); + *bit *= RK3188_PULL_BITS_PER_PIN; + } ++ ++ return 0; + } + + #define RK3288_PULL_OFFSET 0x140 +-static void rk3288_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, +- int pin_num, struct regmap **regmap, +- int *reg, u8 *bit) ++static int rk3288_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, ++ int pin_num, struct regmap **regmap, ++ int *reg, u8 *bit) + { + struct rockchip_pinctrl *info = bank->drvdata; + +@@ -1279,6 +1348,8 @@ static void rk3288_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, + *bit = (pin_num % RK3188_PULL_PINS_PER_REG); + *bit *= RK3188_PULL_BITS_PER_PIN; + } ++ ++ return 0; + } + + #define RK3288_DRV_PMU_OFFSET 0x70 +@@ -1287,9 +1358,9 @@ static void rk3288_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, + #define RK3288_DRV_PINS_PER_REG 8 + #define RK3288_DRV_BANK_STRIDE 16 + +-static void rk3288_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank, +- int pin_num, struct regmap **regmap, +- int *reg, u8 *bit) ++static int rk3288_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank, ++ int pin_num, struct regmap **regmap, ++ int *reg, u8 *bit) + { + struct rockchip_pinctrl *info = bank->drvdata; + +@@ -1313,13 +1384,15 @@ static void rk3288_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank, + *bit = (pin_num % RK3288_DRV_PINS_PER_REG); + *bit *= RK3288_DRV_BITS_PER_PIN; + } ++ ++ return 0; + } + + #define RK3228_PULL_OFFSET 0x100 + +-static void rk3228_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, +- int pin_num, struct regmap **regmap, +- int *reg, u8 *bit) ++static int rk3228_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, ++ int pin_num, struct regmap **regmap, ++ int *reg, u8 *bit) + { + struct rockchip_pinctrl *info = bank->drvdata; + +@@ -1330,13 +1403,15 @@ static void rk3228_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, + + *bit = (pin_num % RK3188_PULL_PINS_PER_REG); + *bit *= RK3188_PULL_BITS_PER_PIN; ++ ++ return 0; + } + + #define RK3228_DRV_GRF_OFFSET 0x200 + +-static void rk3228_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank, +- int pin_num, struct regmap **regmap, +- int *reg, u8 *bit) ++static int rk3228_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank, ++ int pin_num, struct regmap **regmap, ++ int *reg, u8 *bit) + { + struct rockchip_pinctrl *info = bank->drvdata; + +@@ -1347,13 +1422,15 @@ static void rk3228_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank, + + *bit = (pin_num % RK3288_DRV_PINS_PER_REG); + *bit *= RK3288_DRV_BITS_PER_PIN; ++ ++ return 0; + } + + #define RK3308_PULL_OFFSET 0xa0 + +-static void rk3308_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, +- int pin_num, struct regmap **regmap, +- int *reg, u8 *bit) ++static int rk3308_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, ++ int pin_num, struct regmap **regmap, ++ int *reg, u8 *bit) + { + struct rockchip_pinctrl *info = bank->drvdata; + +@@ -1364,13 +1441,15 @@ static void rk3308_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, + + *bit = (pin_num % RK3188_PULL_PINS_PER_REG); + *bit *= RK3188_PULL_BITS_PER_PIN; ++ ++ return 0; + } + + #define RK3308_DRV_GRF_OFFSET 0x100 + +-static void rk3308_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank, +- int pin_num, struct regmap **regmap, +- int *reg, u8 *bit) ++static int rk3308_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank, ++ int pin_num, struct regmap **regmap, ++ int *reg, u8 *bit) + { + struct rockchip_pinctrl *info = bank->drvdata; + +@@ -1381,14 +1460,16 @@ static void rk3308_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank, + + *bit = (pin_num % RK3288_DRV_PINS_PER_REG); + *bit *= RK3288_DRV_BITS_PER_PIN; ++ ++ return 0; + } + + #define RK3368_PULL_GRF_OFFSET 0x100 + #define RK3368_PULL_PMU_OFFSET 0x10 + +-static void rk3368_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, +- int pin_num, struct regmap **regmap, +- int *reg, u8 *bit) ++static int rk3368_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, ++ int pin_num, struct regmap **regmap, ++ int *reg, u8 *bit) + { + struct rockchip_pinctrl *info = bank->drvdata; + +@@ -1412,14 +1493,16 @@ static void rk3368_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, + *bit = (pin_num % RK3188_PULL_PINS_PER_REG); + *bit *= RK3188_PULL_BITS_PER_PIN; + } ++ ++ return 0; + } + + #define RK3368_DRV_PMU_OFFSET 0x20 + #define RK3368_DRV_GRF_OFFSET 0x200 + +-static void rk3368_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank, +- int pin_num, struct regmap **regmap, +- int *reg, u8 *bit) ++static int rk3368_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank, ++ int pin_num, struct regmap **regmap, ++ int *reg, u8 *bit) + { + struct rockchip_pinctrl *info = bank->drvdata; + +@@ -1443,15 +1526,17 @@ static void rk3368_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank, + *bit = (pin_num % RK3288_DRV_PINS_PER_REG); + *bit *= RK3288_DRV_BITS_PER_PIN; + } ++ ++ return 0; + } + + #define RK3399_PULL_GRF_OFFSET 0xe040 + #define RK3399_PULL_PMU_OFFSET 0x40 + #define RK3399_DRV_3BITS_PER_PIN 3 + +-static void rk3399_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, +- int pin_num, struct regmap **regmap, +- int *reg, u8 *bit) ++static int rk3399_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, ++ int pin_num, struct regmap **regmap, ++ int *reg, u8 *bit) + { + struct rockchip_pinctrl *info = bank->drvdata; + +@@ -1477,11 +1562,13 @@ static void rk3399_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, + *bit = (pin_num % RK3188_PULL_PINS_PER_REG); + *bit *= RK3188_PULL_BITS_PER_PIN; + } ++ ++ return 0; + } + +-static void rk3399_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank, +- int pin_num, struct regmap **regmap, +- int *reg, u8 *bit) ++static int rk3399_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank, ++ int pin_num, struct regmap **regmap, ++ int *reg, u8 *bit) + { + struct rockchip_pinctrl *info = bank->drvdata; + int drv_num = (pin_num / 8); +@@ -1498,6 +1585,8 @@ static void rk3399_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank, + *bit = (pin_num % 8) * 3; + else + *bit = (pin_num % 8) * 2; ++ ++ return 0; + } + + #define RK3568_PULL_PMU_OFFSET 0x20 +@@ -1506,9 +1595,9 @@ static void rk3399_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank, + #define RK3568_PULL_PINS_PER_REG 8 + #define RK3568_PULL_BANK_STRIDE 0x10 + +-static void rk3568_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, +- int pin_num, struct regmap **regmap, +- int *reg, u8 *bit) ++static int rk3568_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, ++ int pin_num, struct regmap **regmap, ++ int *reg, u8 *bit) + { + struct rockchip_pinctrl *info = bank->drvdata; + +@@ -1529,6 +1618,8 @@ static void rk3568_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, + *bit = (pin_num % RK3568_PULL_PINS_PER_REG); + *bit *= RK3568_PULL_BITS_PER_PIN; + } ++ ++ return 0; + } + + #define RK3568_DRV_PMU_OFFSET 0x70 +@@ -1537,9 +1628,9 @@ static void rk3568_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank, + #define RK3568_DRV_PINS_PER_REG 2 + #define RK3568_DRV_BANK_STRIDE 0x40 + +-static void rk3568_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank, +- int pin_num, struct regmap **regmap, +- int *reg, u8 *bit) ++static int rk3568_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank, ++ int pin_num, struct regmap **regmap, ++ int *reg, u8 *bit) + { + struct rockchip_pinctrl *info = bank->drvdata; + +@@ -1560,6 +1651,8 @@ static void rk3568_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank, + *bit = (pin_num % RK3568_DRV_PINS_PER_REG); + *bit *= RK3568_DRV_BITS_PER_PIN; + } ++ ++ return 0; + } + + static int rockchip_perpin_drv_list[DRV_TYPE_MAX][8] = { +@@ -1575,13 +1668,16 @@ static int rockchip_get_drive_perpin(struct rockchip_pin_bank *bank, + { + struct rockchip_pinctrl *info = bank->drvdata; + struct rockchip_pin_ctrl *ctrl = info->ctrl; ++ struct device *dev = info->dev; + struct regmap *regmap; + int reg, ret; + u32 data, temp, rmask_bits; + u8 bit; + int drv_type = bank->drv[pin_num / 8].drv_type; + +- ctrl->drv_calc_reg(bank, pin_num, ®map, ®, &bit); ++ ret = ctrl->drv_calc_reg(bank, pin_num, ®map, ®, &bit); ++ if (ret) ++ return ret; + + switch (drv_type) { + case DRV_TYPE_IO_1V8_3V0_AUTO: +@@ -1620,7 +1716,7 @@ static int rockchip_get_drive_perpin(struct rockchip_pin_bank *bank, + bit -= 16; + break; + default: +- dev_err(info->dev, "unsupported bit: %d for pinctrl drive type: %d\n", ++ dev_err(dev, "unsupported bit: %d for pinctrl drive type: %d\n", + bit, drv_type); + return -EINVAL; + } +@@ -1632,8 +1728,7 @@ static int rockchip_get_drive_perpin(struct rockchip_pin_bank *bank, + rmask_bits = RK3288_DRV_BITS_PER_PIN; + break; + default: +- dev_err(info->dev, "unsupported pinctrl drive type: %d\n", +- drv_type); ++ dev_err(dev, "unsupported pinctrl drive type: %d\n", drv_type); + return -EINVAL; + } + +@@ -1652,16 +1747,19 @@ static int rockchip_set_drive_perpin(struct rockchip_pin_bank *bank, + { + struct rockchip_pinctrl *info = bank->drvdata; + struct rockchip_pin_ctrl *ctrl = info->ctrl; ++ struct device *dev = info->dev; + struct regmap *regmap; + int reg, ret, i; + u32 data, rmask, rmask_bits, temp; + u8 bit; + int drv_type = bank->drv[pin_num / 8].drv_type; + +- dev_dbg(info->dev, "setting drive of GPIO%d-%d to %d\n", ++ dev_dbg(dev, "setting drive of GPIO%d-%d to %d\n", + bank->bank_num, pin_num, strength); + +- ctrl->drv_calc_reg(bank, pin_num, ®map, ®, &bit); ++ ret = ctrl->drv_calc_reg(bank, pin_num, ®map, ®, &bit); ++ if (ret) ++ return ret; + if (ctrl->type == RK3568) { + rmask_bits = RK3568_DRV_BITS_PER_PIN; + ret = (1 << (strength + 1)) - 1; +@@ -1680,8 +1778,7 @@ static int rockchip_set_drive_perpin(struct rockchip_pin_bank *bank, + } + + if (ret < 0) { +- dev_err(info->dev, "unsupported driver strength %d\n", +- strength); ++ dev_err(dev, "unsupported driver strength %d\n", strength); + return ret; + } + +@@ -1720,7 +1817,7 @@ static int rockchip_set_drive_perpin(struct rockchip_pin_bank *bank, + bit -= 16; + break; + default: +- dev_err(info->dev, "unsupported bit: %d for pinctrl drive type: %d\n", ++ dev_err(dev, "unsupported bit: %d for pinctrl drive type: %d\n", + bit, drv_type); + return -EINVAL; + } +@@ -1731,8 +1828,7 @@ static int rockchip_set_drive_perpin(struct rockchip_pin_bank *bank, + rmask_bits = RK3288_DRV_BITS_PER_PIN; + break; + default: +- dev_err(info->dev, "unsupported pinctrl drive type: %d\n", +- drv_type); ++ dev_err(dev, "unsupported pinctrl drive type: %d\n", drv_type); + return -EINVAL; + } + +@@ -1766,6 +1862,7 @@ static int rockchip_get_pull(struct rockchip_pin_bank *bank, int pin_num) + { + struct rockchip_pinctrl *info = bank->drvdata; + struct rockchip_pin_ctrl *ctrl = info->ctrl; ++ struct device *dev = info->dev; + struct regmap *regmap; + int reg, ret, pull_type; + u8 bit; +@@ -1775,7 +1872,9 @@ static int rockchip_get_pull(struct rockchip_pin_bank *bank, int pin_num) + if (ctrl->type == RK3066B) + return PIN_CONFIG_BIAS_DISABLE; + +- ctrl->pull_calc_reg(bank, pin_num, ®map, ®, &bit); ++ ret = ctrl->pull_calc_reg(bank, pin_num, ®map, ®, &bit); ++ if (ret) ++ return ret; + + ret = regmap_read(regmap, reg, &data); + if (ret) +@@ -1794,13 +1893,22 @@ static int rockchip_get_pull(struct rockchip_pin_bank *bank, int pin_num) + case RK3308: + case RK3368: + case RK3399: ++ case RK3568: + pull_type = bank->pull_type[pin_num / 8]; + data >>= bit; + data &= (1 << RK3188_PULL_BITS_PER_PIN) - 1; ++ /* ++ * In the TRM, pull-up being 1 for everything except the GPIO0_D3-D6, ++ * where that pull up value becomes 3. ++ */ ++ if (ctrl->type == RK3568 && bank->bank_num == 0 && pin_num >= 27 && pin_num <= 30) { ++ if (data == 3) ++ data = 1; ++ } + + return rockchip_pull_list[pull_type][data]; + default: +- dev_err(info->dev, "unsupported pinctrl type\n"); ++ dev_err(dev, "unsupported pinctrl type\n"); + return -EINVAL; + }; + } +@@ -1810,19 +1918,21 @@ static int rockchip_set_pull(struct rockchip_pin_bank *bank, + { + struct rockchip_pinctrl *info = bank->drvdata; + struct rockchip_pin_ctrl *ctrl = info->ctrl; ++ struct device *dev = info->dev; + struct regmap *regmap; + int reg, ret, i, pull_type; + u8 bit; + u32 data, rmask; + +- dev_dbg(info->dev, "setting pull of GPIO%d-%d to %d\n", +- bank->bank_num, pin_num, pull); ++ dev_dbg(dev, "setting pull of GPIO%d-%d to %d\n", bank->bank_num, pin_num, pull); + + /* rk3066b does support any pulls */ + if (ctrl->type == RK3066B) + return pull ? -EINVAL : 0; + +- ctrl->pull_calc_reg(bank, pin_num, ®map, ®, &bit); ++ ret = ctrl->pull_calc_reg(bank, pin_num, ®map, ®, &bit); ++ if (ret) ++ return ret; + + switch (ctrl->type) { + case RK2928: +@@ -1850,7 +1960,7 @@ static int rockchip_set_pull(struct rockchip_pin_bank *bank, + } + } + /* +- * In the TRM, pull-up being 1 for everything except the GPIO0_D0-D6, ++ * In the TRM, pull-up being 1 for everything except the GPIO0_D3-D6, + * where that pull up value becomes 3. + */ + if (ctrl->type == RK3568 && bank->bank_num == 0 && pin_num >= 27 && pin_num <= 30) { +@@ -1859,8 +1969,7 @@ static int rockchip_set_pull(struct rockchip_pin_bank *bank, + } + + if (ret < 0) { +- dev_err(info->dev, "unsupported pull setting %d\n", +- pull); ++ dev_err(dev, "unsupported pull setting %d\n", pull); + return ret; + } + +@@ -1872,7 +1981,7 @@ static int rockchip_set_pull(struct rockchip_pin_bank *bank, + ret = regmap_update_bits(regmap, reg, rmask, data); + break; + default: +- dev_err(info->dev, "unsupported pinctrl type\n"); ++ dev_err(dev, "unsupported pinctrl type\n"); + return -EINVAL; + } + +@@ -1963,12 +2072,13 @@ static int rockchip_set_schmitt(struct rockchip_pin_bank *bank, + { + struct rockchip_pinctrl *info = bank->drvdata; + struct rockchip_pin_ctrl *ctrl = info->ctrl; ++ struct device *dev = info->dev; + struct regmap *regmap; + int reg, ret; + u8 bit; + u32 data, rmask; + +- dev_dbg(info->dev, "setting input schmitt of GPIO%d-%d to %d\n", ++ dev_dbg(dev, "setting input schmitt of GPIO%d-%d to %d\n", + bank->bank_num, pin_num, enable); + + ret = ctrl->schmitt_calc_reg(bank, pin_num, ®map, ®, &bit); +@@ -2028,10 +2138,11 @@ static int rockchip_pmx_set(struct pinctrl_dev *pctldev, unsigned selector, + struct rockchip_pinctrl *info = pinctrl_dev_get_drvdata(pctldev); + const unsigned int *pins = info->groups[group].pins; + const struct rockchip_pin_config *data = info->groups[group].data; ++ struct device *dev = info->dev; + struct rockchip_pin_bank *bank; + int cnt, ret = 0; + +- dev_dbg(info->dev, "enable function %s group %s\n", ++ dev_dbg(dev, "enable function %s group %s\n", + info->functions[selector].name, info->groups[group].name); + + /* +@@ -2057,11 +2168,24 @@ static int rockchip_pmx_set(struct pinctrl_dev *pctldev, unsigned selector, return 0; } @@ -240341,7 +291619,7 @@ index 5ce260f152ce5..bae6cc83ea362 100644 }; /* -@@ -2092,19 +2120,20 @@ static bool rockchip_pinconf_pull_valid(struct rockchip_pin_ctrl *ctrl, +@@ -2092,19 +2216,20 @@ static bool rockchip_pinconf_pull_valid(struct rockchip_pin_ctrl *ctrl, return false; } @@ -240366,7 +291644,7 @@ index 5ce260f152ce5..bae6cc83ea362 100644 return 0; } -@@ -2125,6 +2154,25 @@ static int rockchip_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin, +@@ -2125,6 +2250,25 @@ static int rockchip_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin, param = pinconf_to_config_param(configs[i]); arg = pinconf_to_config_argument(configs[i]); @@ -240392,7 +291670,7 @@ index 5ce260f152ce5..bae6cc83ea362 100644 switch (param) { case PIN_CONFIG_BIAS_DISABLE: rc = rockchip_set_pull(bank, pin - bank->pin_base, -@@ -2153,27 +2201,21 @@ static int rockchip_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin, +@@ -2153,27 +2297,21 @@ static int rockchip_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin, if (rc != RK_FUNC_GPIO) return -EINVAL; @@ -240430,7 +291708,126 @@ index 5ce260f152ce5..bae6cc83ea362 100644 case PIN_CONFIG_DRIVE_STRENGTH: /* rk3288 is the first with per-pin drive-strength */ if (!info->ctrl->drv_calc_reg) -@@ -2489,7 +2531,7 @@ static int rockchip_pinctrl_register(struct platform_device *pdev, +@@ -2310,6 +2448,7 @@ static int rockchip_pinctrl_parse_groups(struct device_node *np, + struct rockchip_pinctrl *info, + u32 index) + { ++ struct device *dev = info->dev; + struct rockchip_pin_bank *bank; + int size; + const __be32 *list; +@@ -2317,7 +2456,7 @@ static int rockchip_pinctrl_parse_groups(struct device_node *np, + int i, j; + int ret; + +- dev_dbg(info->dev, "group(%d): %pOFn\n", index, np); ++ dev_dbg(dev, "group(%d): %pOFn\n", index, np); + + /* Initialise group */ + grp->name = np->name; +@@ -2330,18 +2469,14 @@ static int rockchip_pinctrl_parse_groups(struct device_node *np, + /* we do not check return since it's safe node passed down */ + size /= sizeof(*list); + if (!size || size % 4) { +- dev_err(info->dev, "wrong pins number or pins and configs should be by 4\n"); ++ dev_err(dev, "wrong pins number or pins and configs should be by 4\n"); + return -EINVAL; + } + + grp->npins = size / 4; + +- grp->pins = devm_kcalloc(info->dev, grp->npins, sizeof(unsigned int), +- GFP_KERNEL); +- grp->data = devm_kcalloc(info->dev, +- grp->npins, +- sizeof(struct rockchip_pin_config), +- GFP_KERNEL); ++ grp->pins = devm_kcalloc(dev, grp->npins, sizeof(*grp->pins), GFP_KERNEL); ++ grp->data = devm_kcalloc(dev, grp->npins, sizeof(*grp->data), GFP_KERNEL); + if (!grp->pins || !grp->data) + return -ENOMEM; + +@@ -2375,6 +2510,7 @@ static int rockchip_pinctrl_parse_functions(struct device_node *np, + struct rockchip_pinctrl *info, + u32 index) + { ++ struct device *dev = info->dev; + struct device_node *child; + struct rockchip_pmx_func *func; + struct rockchip_pin_group *grp; +@@ -2382,7 +2518,7 @@ static int rockchip_pinctrl_parse_functions(struct device_node *np, + static u32 grp_index; + u32 i = 0; + +- dev_dbg(info->dev, "parse function(%d): %pOFn\n", index, np); ++ dev_dbg(dev, "parse function(%d): %pOFn\n", index, np); + + func = &info->functions[index]; + +@@ -2392,8 +2528,7 @@ static int rockchip_pinctrl_parse_functions(struct device_node *np, + if (func->ngroups <= 0) + return 0; + +- func->groups = devm_kcalloc(info->dev, +- func->ngroups, sizeof(char *), GFP_KERNEL); ++ func->groups = devm_kcalloc(dev, func->ngroups, sizeof(*func->groups), GFP_KERNEL); + if (!func->groups) + return -ENOMEM; + +@@ -2421,20 +2556,14 @@ static int rockchip_pinctrl_parse_dt(struct platform_device *pdev, + + rockchip_pinctrl_child_count(info, np); + +- dev_dbg(&pdev->dev, "nfunctions = %d\n", info->nfunctions); +- dev_dbg(&pdev->dev, "ngroups = %d\n", info->ngroups); ++ dev_dbg(dev, "nfunctions = %d\n", info->nfunctions); ++ dev_dbg(dev, "ngroups = %d\n", info->ngroups); + +- info->functions = devm_kcalloc(dev, +- info->nfunctions, +- sizeof(struct rockchip_pmx_func), +- GFP_KERNEL); ++ info->functions = devm_kcalloc(dev, info->nfunctions, sizeof(*info->functions), GFP_KERNEL); + if (!info->functions) + return -ENOMEM; + +- info->groups = devm_kcalloc(dev, +- info->ngroups, +- sizeof(struct rockchip_pin_group), +- GFP_KERNEL); ++ info->groups = devm_kcalloc(dev, info->ngroups, sizeof(*info->groups), GFP_KERNEL); + if (!info->groups) + return -ENOMEM; + +@@ -2446,7 +2575,7 @@ static int rockchip_pinctrl_parse_dt(struct platform_device *pdev, + + ret = rockchip_pinctrl_parse_functions(child, info, i++); + if (ret) { +- dev_err(&pdev->dev, "failed to parse function\n"); ++ dev_err(dev, "failed to parse function\n"); + of_node_put(child); + return ret; + } +@@ -2461,6 +2590,7 @@ static int rockchip_pinctrl_register(struct platform_device *pdev, + struct pinctrl_desc *ctrldesc = &info->pctl; + struct pinctrl_pin_desc *pindesc, *pdesc; + struct rockchip_pin_bank *pin_bank; ++ struct device *dev = &pdev->dev; + int pin, bank, ret; + int k; + +@@ -2470,9 +2600,7 @@ static int rockchip_pinctrl_register(struct platform_device *pdev, + ctrldesc->pmxops = &rockchip_pmx_ops; + ctrldesc->confops = &rockchip_pinconf_ops; + +- pindesc = devm_kcalloc(&pdev->dev, +- info->ctrl->nr_pins, sizeof(*pindesc), +- GFP_KERNEL); ++ pindesc = devm_kcalloc(dev, info->ctrl->nr_pins, sizeof(*pindesc), GFP_KERNEL); + if (!pindesc) + return -ENOMEM; + +@@ -2489,7 +2617,7 @@ static int rockchip_pinctrl_register(struct platform_device *pdev, pdesc++; } @@ -240439,7 +291836,49 @@ index 5ce260f152ce5..bae6cc83ea362 100644 mutex_init(&pin_bank->deferred_lock); } -@@ -2702,6 +2744,7 @@ static int rockchip_pinctrl_probe(struct platform_device *pdev) +@@ -2497,9 +2625,9 @@ static int rockchip_pinctrl_register(struct platform_device *pdev, + if (ret) + return ret; + +- info->pctl_dev = devm_pinctrl_register(&pdev->dev, ctrldesc, info); ++ info->pctl_dev = devm_pinctrl_register(dev, ctrldesc, info); + if (IS_ERR(info->pctl_dev)) { +- dev_err(&pdev->dev, "could not register pinctrl driver\n"); ++ dev_err(dev, "could not register pinctrl driver\n"); + return PTR_ERR(info->pctl_dev); + } + +@@ -2513,8 +2641,9 @@ static struct rockchip_pin_ctrl *rockchip_pinctrl_get_soc_data( + struct rockchip_pinctrl *d, + struct platform_device *pdev) + { ++ struct device *dev = &pdev->dev; ++ struct device_node *node = dev->of_node; + const struct of_device_id *match; +- struct device_node *node = pdev->dev.of_node; + struct rockchip_pin_ctrl *ctrl; + struct rockchip_pin_bank *bank; + int grf_offs, pmu_offs, drv_grf_offs, drv_pmu_offs, i, j; +@@ -2566,7 +2695,7 @@ static struct rockchip_pin_ctrl *rockchip_pinctrl_get_soc_data( + drv_pmu_offs : drv_grf_offs; + } + +- dev_dbg(d->dev, "bank %d, iomux %d has iom_offset 0x%x drv_offset 0x%x\n", ++ dev_dbg(dev, "bank %d, iomux %d has iom_offset 0x%x drv_offset 0x%x\n", + i, j, iom->offset, drv->offset); + + /* +@@ -2675,8 +2804,8 @@ static int rockchip_pinctrl_probe(struct platform_device *pdev) + { + struct rockchip_pinctrl *info; + struct device *dev = &pdev->dev; ++ struct device_node *np = dev->of_node, *node; + struct rockchip_pin_ctrl *ctrl; +- struct device_node *np = pdev->dev.of_node, *node; + struct resource *res; + void __iomem *base; + int ret; +@@ -2702,6 +2831,7 @@ static int rockchip_pinctrl_probe(struct platform_device *pdev) node = of_parse_phandle(np, "rockchip,grf", 0); if (node) { info->regmap_base = syscon_node_to_regmap(node); @@ -240447,7 +291886,34 @@ index 5ce260f152ce5..bae6cc83ea362 100644 if (IS_ERR(info->regmap_base)) return PTR_ERR(info->regmap_base); } else { -@@ -2738,6 +2781,7 @@ static int rockchip_pinctrl_probe(struct platform_device *pdev) +@@ -2712,8 +2842,8 @@ static int rockchip_pinctrl_probe(struct platform_device *pdev) + + rockchip_regmap_config.max_register = resource_size(res) - 4; + rockchip_regmap_config.name = "rockchip,pinctrl"; +- info->regmap_base = devm_regmap_init_mmio(&pdev->dev, base, +- &rockchip_regmap_config); ++ info->regmap_base = ++ devm_regmap_init_mmio(dev, base, &rockchip_regmap_config); + + /* to check for the old dt-bindings */ + info->reg_size = resource_size(res); +@@ -2725,12 +2855,10 @@ static int rockchip_pinctrl_probe(struct platform_device *pdev) + if (IS_ERR(base)) + return PTR_ERR(base); + +- rockchip_regmap_config.max_register = +- resource_size(res) - 4; ++ rockchip_regmap_config.max_register = resource_size(res) - 4; + rockchip_regmap_config.name = "rockchip,pinctrl-pull"; +- info->regmap_pull = devm_regmap_init_mmio(&pdev->dev, +- base, +- &rockchip_regmap_config); ++ info->regmap_pull = ++ devm_regmap_init_mmio(dev, base, &rockchip_regmap_config); + } + } + +@@ -2738,6 +2866,7 @@ static int rockchip_pinctrl_probe(struct platform_device *pdev) node = of_parse_phandle(np, "rockchip,pmu", 0); if (node) { info->regmap_pmu = syscon_node_to_regmap(node); @@ -240455,16 +291921,19 @@ index 5ce260f152ce5..bae6cc83ea362 100644 if (IS_ERR(info->regmap_pmu)) return PTR_ERR(info->regmap_pmu); } -@@ -2748,7 +2792,7 @@ static int rockchip_pinctrl_probe(struct platform_device *pdev) +@@ -2748,9 +2877,9 @@ static int rockchip_pinctrl_probe(struct platform_device *pdev) platform_set_drvdata(pdev, info); - ret = of_platform_populate(np, rockchip_bank_match, NULL, NULL); + ret = of_platform_populate(np, NULL, NULL, &pdev->dev); if (ret) { - dev_err(&pdev->dev, "failed to register gpio device\n"); +- dev_err(&pdev->dev, "failed to register gpio device\n"); ++ dev_err(dev, "failed to register gpio device\n"); return ret; -@@ -2761,7 +2805,7 @@ static int rockchip_pinctrl_remove(struct platform_device *pdev) + } + +@@ -2761,7 +2890,7 @@ static int rockchip_pinctrl_remove(struct platform_device *pdev) { struct rockchip_pinctrl *info = platform_get_drvdata(pdev); struct rockchip_pin_bank *bank; @@ -240473,7 +291942,7 @@ index 5ce260f152ce5..bae6cc83ea362 100644 int i; of_platform_depopulate(&pdev->dev); -@@ -2770,9 +2814,9 @@ static int rockchip_pinctrl_remove(struct platform_device *pdev) +@@ -2770,9 +2899,9 @@ static int rockchip_pinctrl_remove(struct platform_device *pdev) bank = &info->ctrl->pin_banks[i]; mutex_lock(&bank->deferred_lock); @@ -240487,7 +291956,7 @@ index 5ce260f152ce5..bae6cc83ea362 100644 kfree(cfg); } diff --git a/drivers/pinctrl/pinctrl-rockchip.h b/drivers/pinctrl/pinctrl-rockchip.h -index 91f10279d0844..98a01a616da67 100644 +index 91f10279d0844..59116e13758d0 100644 --- a/drivers/pinctrl/pinctrl-rockchip.h +++ b/drivers/pinctrl/pinctrl-rockchip.h @@ -171,7 +171,7 @@ struct rockchip_pin_bank { @@ -240499,6 +291968,19 @@ index 91f10279d0844..98a01a616da67 100644 struct mutex deferred_lock; }; +@@ -230,10 +230,10 @@ struct rockchip_pin_ctrl { + struct rockchip_mux_route_data *iomux_routes; + u32 niomux_routes; + +- void (*pull_calc_reg)(struct rockchip_pin_bank *bank, ++ int (*pull_calc_reg)(struct rockchip_pin_bank *bank, + int pin_num, struct regmap **regmap, + int *reg, u8 *bit); +- void (*drv_calc_reg)(struct rockchip_pin_bank *bank, ++ int (*drv_calc_reg)(struct rockchip_pin_bank *bank, + int pin_num, struct regmap **regmap, + int *reg, u8 *bit); + int (*schmitt_calc_reg)(struct rockchip_pin_bank *bank, @@ -247,9 +247,12 @@ struct rockchip_pin_config { unsigned int nconfigs; }; @@ -240513,6 +291995,28 @@ index 91f10279d0844..98a01a616da67 100644 u32 arg; }; +diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c +index 67bec7ea0f8b0..9ad8f70206142 100644 +--- a/drivers/pinctrl/pinctrl-single.c ++++ b/drivers/pinctrl/pinctrl-single.c +@@ -372,6 +372,8 @@ static int pcs_set_mux(struct pinctrl_dev *pctldev, unsigned fselector, + if (!pcs->fmask) + return 0; + function = pinmux_generic_get_function(pctldev, fselector); ++ if (!function) ++ return -EINVAL; + func = function->data; + if (!func) + return -EINVAL; +@@ -727,7 +729,7 @@ static int pcs_allocate_pin_table(struct pcs_device *pcs) + + mux_bytes = pcs->width / BITS_PER_BYTE; + +- if (pcs->bits_per_mux) { ++ if (pcs->bits_per_mux && pcs->fmask) { + pcs->bits_per_pin = fls(pcs->fmask); + nr_pins = (pcs->size * BITS_PER_BYTE) / pcs->bits_per_pin; + } else { diff --git a/drivers/pinctrl/qcom/Kconfig b/drivers/pinctrl/qcom/Kconfig index 5ff4207df66e1..f1b5176a5085b 100644 --- a/drivers/pinctrl/qcom/Kconfig @@ -243450,9 +294954,26 @@ index 7e7cfc98657a4..9bb5cd2c98b8b 100644 /* this part must be outside header guard */ diff --git a/drivers/platform/chrome/cros_ec_typec.c b/drivers/platform/chrome/cros_ec_typec.c -index 262a891eded34..aadb8d237aefc 100644 +index 262a891eded34..b94abb8f7706a 100644 --- a/drivers/platform/chrome/cros_ec_typec.c +++ b/drivers/platform/chrome/cros_ec_typec.c +@@ -156,12 +156,12 @@ static int cros_typec_get_switch_handles(struct cros_typec_port *port, + return 0; + + role_sw_err: +- usb_role_switch_put(port->role_sw); +-ori_sw_err: + typec_switch_put(port->ori_sw); +-mux_err: ++ port->ori_sw = NULL; ++ori_sw_err: + typec_mux_put(port->mux); +- ++ port->mux = NULL; ++mux_err: + return -ENODEV; + } + @@ -691,7 +691,7 @@ static int cros_typec_register_altmodes(struct cros_typec_data *typec, int port_ for (j = 0; j < sop_disc->svids[i].mode_count; j++) { memset(&desc, 0, sizeof(desc)); @@ -243476,10 +294997,36 @@ index 262a891eded34..aadb8d237aefc 100644 platform_set_drvdata(pdev, typec); ret = cros_typec_get_cmd_version(typec); +diff --git a/drivers/platform/chrome/cros_usbpd_notify.c b/drivers/platform/chrome/cros_usbpd_notify.c +index 48a6617aa12f3..de76de6f50900 100644 +--- a/drivers/platform/chrome/cros_usbpd_notify.c ++++ b/drivers/platform/chrome/cros_usbpd_notify.c +@@ -285,7 +285,11 @@ static int __init cros_usbpd_notify_init(void) + return ret; + + #ifdef CONFIG_ACPI +- platform_driver_register(&cros_usbpd_notify_acpi_driver); ++ ret = platform_driver_register(&cros_usbpd_notify_acpi_driver); ++ if (ret) { ++ platform_driver_unregister(&cros_usbpd_notify_plat_driver); ++ return ret; ++ } + #endif + return 0; + } diff --git a/drivers/platform/mellanox/mlxbf-pmc.c b/drivers/platform/mellanox/mlxbf-pmc.c -index 04bc3b50aa7a4..65b4a819f1bdf 100644 +index 04bc3b50aa7a4..c2c9b0d3244cb 100644 --- a/drivers/platform/mellanox/mlxbf-pmc.c +++ b/drivers/platform/mellanox/mlxbf-pmc.c +@@ -358,7 +358,7 @@ static const struct mlxbf_pmc_events mlxbf_pmc_hnfnet_events[] = { + { 0x32, "DDN_DIAG_W_INGRESS" }, + { 0x33, "DDN_DIAG_C_INGRESS" }, + { 0x34, "DDN_DIAG_CORE_SENT" }, +- { 0x35, "NDN_DIAG_S_OUT_OF_CRED" }, ++ { 0x35, "NDN_DIAG_N_OUT_OF_CRED" }, + { 0x36, "NDN_DIAG_S_OUT_OF_CRED" }, + { 0x37, "NDN_DIAG_E_OUT_OF_CRED" }, + { 0x38, "NDN_DIAG_W_OUT_OF_CRED" }, @@ -1374,8 +1374,8 @@ static int mlxbf_pmc_map_counters(struct device *dev) pmc->block[i].counters = info[2]; pmc->block[i].type = info[3]; @@ -243694,6 +295241,22 @@ index 4ff5c3a12991c..921520475ff68 100644 int ec_cmd_bytes; mutex_lock(&ec_dbgfs_lock); +diff --git a/drivers/platform/surface/aggregator/controller.c b/drivers/platform/surface/aggregator/controller.c +index b8c377b3f9321..f23f7128cf2b4 100644 +--- a/drivers/platform/surface/aggregator/controller.c ++++ b/drivers/platform/surface/aggregator/controller.c +@@ -1700,8 +1700,10 @@ int ssam_request_sync(struct ssam_controller *ctrl, + return status; + + status = ssam_request_sync_init(rqst, spec->flags); +- if (status) ++ if (status) { ++ ssam_request_sync_free(rqst); + return status; ++ } + + ssam_request_sync_set_resp(rqst, rsp); + diff --git a/drivers/platform/surface/aggregator/core.c b/drivers/platform/surface/aggregator/core.c index c61bbeeec2dfd..54f86df77a37b 100644 --- a/drivers/platform/surface/aggregator/core.c @@ -243707,6 +295270,88 @@ index c61bbeeec2dfd..54f86df77a37b 100644 static void __exit ssam_core_exit(void) { +diff --git a/drivers/platform/surface/aggregator/ssh_packet_layer.c b/drivers/platform/surface/aggregator/ssh_packet_layer.c +index 8a4451c1ffe57..a652c2763175e 100644 +--- a/drivers/platform/surface/aggregator/ssh_packet_layer.c ++++ b/drivers/platform/surface/aggregator/ssh_packet_layer.c +@@ -1596,16 +1596,32 @@ static void ssh_ptl_timeout_reap(struct work_struct *work) + ssh_ptl_tx_wakeup_packet(ptl); + } + +-static bool ssh_ptl_rx_retransmit_check(struct ssh_ptl *ptl, u8 seq) ++static bool ssh_ptl_rx_retransmit_check(struct ssh_ptl *ptl, const struct ssh_frame *frame) + { + int i; + ++ /* ++ * Ignore unsequenced packets. On some devices (notably Surface Pro 9), ++ * unsequenced events will always be sent with SEQ=0x00. Attempting to ++ * detect retransmission would thus just block all events. ++ * ++ * While sequence numbers would also allow detection of retransmitted ++ * packets in unsequenced communication, they have only ever been used ++ * to cover edge-cases in sequenced transmission. In particular, the ++ * only instance of packets being retransmitted (that we are aware of) ++ * is due to an ACK timeout. As this does not happen in unsequenced ++ * communication, skip the retransmission check for those packets ++ * entirely. ++ */ ++ if (frame->type == SSH_FRAME_TYPE_DATA_NSQ) ++ return false; ++ + /* + * Check if SEQ has been seen recently (i.e. packet was + * re-transmitted and we should ignore it). + */ + for (i = 0; i < ARRAY_SIZE(ptl->rx.blocked.seqs); i++) { +- if (likely(ptl->rx.blocked.seqs[i] != seq)) ++ if (likely(ptl->rx.blocked.seqs[i] != frame->seq)) + continue; + + ptl_dbg(ptl, "ptl: ignoring repeated data packet\n"); +@@ -1613,7 +1629,7 @@ static bool ssh_ptl_rx_retransmit_check(struct ssh_ptl *ptl, u8 seq) + } + + /* Update list of blocked sequence IDs. */ +- ptl->rx.blocked.seqs[ptl->rx.blocked.offset] = seq; ++ ptl->rx.blocked.seqs[ptl->rx.blocked.offset] = frame->seq; + ptl->rx.blocked.offset = (ptl->rx.blocked.offset + 1) + % ARRAY_SIZE(ptl->rx.blocked.seqs); + +@@ -1624,7 +1640,7 @@ static void ssh_ptl_rx_dataframe(struct ssh_ptl *ptl, + const struct ssh_frame *frame, + const struct ssam_span *payload) + { +- if (ssh_ptl_rx_retransmit_check(ptl, frame->seq)) ++ if (ssh_ptl_rx_retransmit_check(ptl, frame)) + return; + + ptl->ops.data_received(ptl, payload); +diff --git a/drivers/platform/surface/aggregator/ssh_request_layer.c b/drivers/platform/surface/aggregator/ssh_request_layer.c +index 790f7f0eee98b..7c0b637c91fc8 100644 +--- a/drivers/platform/surface/aggregator/ssh_request_layer.c ++++ b/drivers/platform/surface/aggregator/ssh_request_layer.c +@@ -916,6 +916,20 @@ static void ssh_rtl_rx_command(struct ssh_ptl *p, const struct ssam_span *data) + if (sshp_parse_command(dev, data, &command, &command_data)) + return; + ++ /* ++ * Check if the message was intended for us. If not, drop it. ++ * ++ * Note: We will need to change this to handle debug messages. On newer ++ * generation devices, these seem to be sent to tid_out=0x03. We as ++ * host can still receive them as they can be forwarded via an override ++ * option on SAM, but doing so does not change tid_out=0x00. ++ */ ++ if (command->tid_out != 0x00) { ++ rtl_warn(rtl, "rtl: dropping message not intended for us (tid = %#04x)\n", ++ command->tid_out); ++ return; ++ } ++ + if (ssh_rqid_is_event(get_unaligned_le16(&command->rqid))) + ssh_rtl_rx_event(rtl, command, &command_data); + else diff --git a/drivers/platform/surface/surface3_power.c b/drivers/platform/surface/surface3_power.c index 90c1568ea4e09..3cc004c68bdba 100644 --- a/drivers/platform/surface/surface3_power.c @@ -243819,10 +295464,18 @@ index 4428c4330229a..5c0451c56ea83 100644 }; MODULE_DEVICE_TABLE(acpi, ssam_platform_hub_match); diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig -index e21ea3d23e6f2..f1ff003bb14bc 100644 +index e21ea3d23e6f2..cd8146dbdd453 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig -@@ -871,6 +871,8 @@ config PANASONIC_LAPTOP +@@ -171,6 +171,7 @@ config ACER_WMI + config AMD_PMC + tristate "AMD SoC PMC driver" + depends on ACPI && PCI ++ select SERIO + help + The driver provides support for AMD Power Management Controller + primarily responsible for S2Idle transactions that are driven from +@@ -871,6 +872,8 @@ config PANASONIC_LAPTOP tristate "Panasonic Laptop Extras" depends on INPUT && ACPI depends on BACKLIGHT_CLASS_DEVICE @@ -243845,7 +295498,7 @@ index 69690e26bb6d4..2734a771d1f00 100644 # MSI obj-$(CONFIG_MSI_LAPTOP) += msi-laptop.o diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c -index 694b45ed06a21..8c2a73d5428d7 100644 +index 694b45ed06a21..82516796a53b0 100644 --- a/drivers/platform/x86/acer-wmi.c +++ b/drivers/platform/x86/acer-wmi.c @@ -99,6 +99,7 @@ static const struct key_entry acer_wmi_keymap[] __initconst = { @@ -243871,11 +295524,46 @@ index 694b45ed06a21..8c2a73d5428d7 100644 {KE_IGNORE, 0x62, {KEY_BRIGHTNESSUP} }, {KE_IGNORE, 0x63, {KEY_BRIGHTNESSDOWN} }, {KE_KEY, 0x64, {KEY_SWITCHVIDEOMODE} }, /* Display Switch */ +@@ -557,6 +564,15 @@ static const struct dmi_system_id acer_quirks[] __initconst = { + }, + .driver_data = (void *)ACER_CAP_KBD_DOCK, + }, ++ { ++ .callback = set_force_caps, ++ .ident = "Acer Aspire Switch V 10 SW5-017", ++ .matches = { ++ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Acer"), ++ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "SW5-017"), ++ }, ++ .driver_data = (void *)ACER_CAP_KBD_DOCK, ++ }, + { + .callback = set_force_caps, + .ident = "Acer One 10 (S1003)", diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c -index fc95620101e85..d3efd514614c0 100644 +index fc95620101e85..83fea28bbb4f7 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c -@@ -70,7 +70,7 @@ +@@ -20,6 +20,7 @@ + #include <linux/module.h> + #include <linux/pci.h> + #include <linux/platform_device.h> ++#include <linux/serio.h> + #include <linux/suspend.h> + #include <linux/seq_file.h> + #include <linux/uaccess.h> +@@ -29,6 +30,10 @@ + #define AMD_PMC_REGISTER_RESPONSE 0x980 + #define AMD_PMC_REGISTER_ARGUMENT 0x9BC + ++/* PMC Scratch Registers */ ++#define AMD_PMC_SCRATCH_REG_CZN 0x94 ++#define AMD_PMC_SCRATCH_REG_YC 0xD14 ++ + /* Base address of SMU for mapping physical address to virtual address */ + #define AMD_PMC_SMU_INDEX_ADDRESS 0xB8 + #define AMD_PMC_SMU_INDEX_DATA 0xBC +@@ -70,7 +75,7 @@ #define AMD_CPU_ID_CZN AMD_CPU_ID_RN #define AMD_CPU_ID_YC 0x14B5 @@ -243884,7 +295572,166 @@ index fc95620101e85..d3efd514614c0 100644 #define RESPONSE_REGISTER_LOOP_MAX 20000 #define SOC_SUBSYSTEM_IP_MAX 12 -@@ -375,7 +375,8 @@ static int __maybe_unused amd_pmc_resume(struct device *dev) +@@ -110,6 +115,11 @@ struct amd_pmc_dev { + u32 base_addr; + u32 cpu_id; + u32 active_ips; ++/* SMU version information */ ++ u8 smu_program; ++ u8 major; ++ u8 minor; ++ u8 rev; + struct device *dev; + struct mutex lock; /* generic mutex lock */ + #if IS_ENABLED(CONFIG_DEBUG_FS) +@@ -147,6 +157,51 @@ struct smu_metrics { + u64 timecondition_notmet_totaltime[SOC_SUBSYSTEM_IP_MAX]; + } __packed; + ++static int amd_pmc_get_smu_version(struct amd_pmc_dev *dev) ++{ ++ int rc; ++ u32 val; ++ ++ rc = amd_pmc_send_cmd(dev, 0, &val, SMU_MSG_GETSMUVERSION, 1); ++ if (rc) ++ return rc; ++ ++ dev->smu_program = (val >> 24) & GENMASK(7, 0); ++ dev->major = (val >> 16) & GENMASK(7, 0); ++ dev->minor = (val >> 8) & GENMASK(7, 0); ++ dev->rev = (val >> 0) & GENMASK(7, 0); ++ ++ dev_dbg(dev->dev, "SMU program %u version is %u.%u.%u\n", ++ dev->smu_program, dev->major, dev->minor, dev->rev); ++ ++ return 0; ++} ++ ++static int amd_pmc_idlemask_read(struct amd_pmc_dev *pdev, struct device *dev, ++ struct seq_file *s) ++{ ++ u32 val; ++ ++ switch (pdev->cpu_id) { ++ case AMD_CPU_ID_CZN: ++ val = amd_pmc_reg_read(pdev, AMD_PMC_SCRATCH_REG_CZN); ++ break; ++ case AMD_CPU_ID_YC: ++ val = amd_pmc_reg_read(pdev, AMD_PMC_SCRATCH_REG_YC); ++ break; ++ default: ++ return -EINVAL; ++ } ++ ++ if (dev) ++ dev_dbg(pdev->dev, "SMU idlemask s0i3: 0x%x\n", val); ++ ++ if (s) ++ seq_printf(s, "SMU idlemask : 0x%x\n", val); ++ ++ return 0; ++} ++ + #ifdef CONFIG_DEBUG_FS + static int smu_fw_info_show(struct seq_file *s, void *unused) + { +@@ -201,6 +256,23 @@ static int s0ix_stats_show(struct seq_file *s, void *unused) + } + DEFINE_SHOW_ATTRIBUTE(s0ix_stats); + ++static int amd_pmc_idlemask_show(struct seq_file *s, void *unused) ++{ ++ struct amd_pmc_dev *dev = s->private; ++ int rc; ++ ++ if (dev->major > 56 || (dev->major >= 55 && dev->minor >= 37)) { ++ rc = amd_pmc_idlemask_read(dev, NULL, s); ++ if (rc) ++ return rc; ++ } else { ++ seq_puts(s, "Unsupported SMU version for Idlemask\n"); ++ } ++ ++ return 0; ++} ++DEFINE_SHOW_ATTRIBUTE(amd_pmc_idlemask); ++ + static void amd_pmc_dbgfs_unregister(struct amd_pmc_dev *dev) + { + debugfs_remove_recursive(dev->dbgfs_dir); +@@ -213,6 +285,8 @@ static void amd_pmc_dbgfs_register(struct amd_pmc_dev *dev) + &smu_fw_info_fops); + debugfs_create_file("s0ix_stats", 0644, dev->dbgfs_dir, dev, + &s0ix_stats_fops); ++ debugfs_create_file("amd_pmc_idlemask", 0644, dev->dbgfs_dir, dev, ++ &amd_pmc_idlemask_fops); + } + #else + static inline void amd_pmc_dbgfs_register(struct amd_pmc_dev *dev) +@@ -339,16 +413,54 @@ static int amd_pmc_get_os_hint(struct amd_pmc_dev *dev) + return -EINVAL; + } + ++static int amd_pmc_czn_wa_irq1(struct amd_pmc_dev *pdev) ++{ ++ struct device *d; ++ int rc; ++ ++ if (!pdev->major) { ++ rc = amd_pmc_get_smu_version(pdev); ++ if (rc) ++ return rc; ++ } ++ ++ if (pdev->major > 64 || (pdev->major == 64 && pdev->minor > 65)) ++ return 0; ++ ++ d = bus_find_device_by_name(&serio_bus, NULL, "serio0"); ++ if (!d) ++ return 0; ++ if (device_may_wakeup(d)) { ++ dev_info_once(d, "Disabling IRQ1 wakeup source to avoid platform firmware bug\n"); ++ disable_irq_wake(1); ++ device_set_wakeup_enable(d, false); ++ } ++ put_device(d); ++ ++ return 0; ++} ++ + static int __maybe_unused amd_pmc_suspend(struct device *dev) + { + struct amd_pmc_dev *pdev = dev_get_drvdata(dev); + int rc; + u8 msg; + ++ if (pdev->cpu_id == AMD_CPU_ID_CZN) { ++ int rc = amd_pmc_czn_wa_irq1(pdev); ++ ++ if (rc) { ++ dev_err(pdev->dev, "failed to adjust keyboard wakeup: %d\n", rc); ++ return rc; ++ } ++ } ++ + /* Reset and Start SMU logging - to monitor the s0i3 stats */ + amd_pmc_send_cmd(pdev, 0, NULL, SMU_MSG_LOG_RESET, 0); + amd_pmc_send_cmd(pdev, 0, NULL, SMU_MSG_LOG_START, 0); + ++ /* Dump the IdleMask before we send hint to SMU */ ++ amd_pmc_idlemask_read(pdev, dev, NULL); + msg = amd_pmc_get_os_hint(pdev); + rc = amd_pmc_send_cmd(pdev, 1, NULL, msg, 0); + if (rc) +@@ -371,11 +483,15 @@ static int __maybe_unused amd_pmc_resume(struct device *dev) + if (rc) + dev_err(pdev->dev, "resume failed\n"); + ++ /* Dump the IdleMask to see the blockers */ ++ amd_pmc_idlemask_read(pdev, dev, NULL); ++ + return 0; } static const struct dev_pm_ops amd_pmc_pm_ops = { @@ -243894,6 +295741,14 @@ index fc95620101e85..d3efd514614c0 100644 }; static const struct pci_device_id pmc_pci_ids[] = { +@@ -457,6 +573,7 @@ static int amd_pmc_probe(struct platform_device *pdev) + if (err) + dev_err(dev->dev, "SMU debugging info not supported on this platform\n"); + ++ amd_pmc_get_smu_version(dev); + platform_set_drvdata(pdev, dev); + amd_pmc_dbgfs_register(dev); + return 0; diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c index 9aae45a452002..57553f9b4d1dc 100644 --- a/drivers/platform/x86/apple-gmux.c @@ -243907,6 +295762,31 @@ index 9aae45a452002..57553f9b4d1dc 100644 if (gmux_data->iolen < GMUX_MIN_IO_LEN) { pr_err("gmux I/O region too small (%lu < %u)\n", +diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c +index a81dc4b191b77..4d7327b67a7db 100644 +--- a/drivers/platform/x86/asus-nb-wmi.c ++++ b/drivers/platform/x86/asus-nb-wmi.c +@@ -521,6 +521,7 @@ static const struct key_entry asus_nb_wmi_keymap[] = { + { KE_KEY, 0x30, { KEY_VOLUMEUP } }, + { KE_KEY, 0x31, { KEY_VOLUMEDOWN } }, + { KE_KEY, 0x32, { KEY_MUTE } }, ++ { KE_KEY, 0x33, { KEY_SCREENLOCK } }, + { KE_KEY, 0x35, { KEY_SCREENLOCK } }, + { KE_KEY, 0x40, { KEY_PREVIOUSSONG } }, + { KE_KEY, 0x41, { KEY_NEXTSONG } }, +diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c +index e14fb5fa73240..f030ea97f1266 100644 +--- a/drivers/platform/x86/asus-wmi.c ++++ b/drivers/platform/x86/asus-wmi.c +@@ -1511,6 +1511,8 @@ static void asus_wmi_set_xusb2pr(struct asus_wmi *asus) + pci_write_config_dword(xhci_pdev, USB_INTEL_XUSB2PR, + cpu_to_le32(ports_available)); + ++ pci_dev_put(xhci_pdev); ++ + pr_info("set USB_INTEL_XUSB2PR old: 0x%04x, new: 0x%04x\n", + orig_ports_available, ports_available); + } diff --git a/drivers/platform/x86/dell/Kconfig b/drivers/platform/x86/dell/Kconfig index 2fffa57e596e4..fe224a54f24c0 100644 --- a/drivers/platform/x86/dell/Kconfig @@ -243920,19 +295800,126 @@ index 2fffa57e596e4..fe224a54f24c0 100644 depends on ACPI_WMI config DELL_WMI_LED +diff --git a/drivers/platform/x86/dell/dell-wmi-base.c b/drivers/platform/x86/dell/dell-wmi-base.c +index 089c125e18f70..b83d6fa6e39b3 100644 +--- a/drivers/platform/x86/dell/dell-wmi-base.c ++++ b/drivers/platform/x86/dell/dell-wmi-base.c +@@ -260,6 +260,9 @@ static const struct key_entry dell_wmi_keymap_type_0010[] = { + { KE_KEY, 0x57, { KEY_BRIGHTNESSDOWN } }, + { KE_KEY, 0x58, { KEY_BRIGHTNESSUP } }, + ++ /*Speaker Mute*/ ++ { KE_KEY, 0x109, { KEY_MUTE} }, ++ + /* Mic mute */ + { KE_KEY, 0x150, { KEY_MICMUTE } }, + +diff --git a/drivers/platform/x86/dell/dell-wmi-privacy.c b/drivers/platform/x86/dell/dell-wmi-privacy.c +index 074b7e68c227c..7b79e987ca088 100644 +--- a/drivers/platform/x86/dell/dell-wmi-privacy.c ++++ b/drivers/platform/x86/dell/dell-wmi-privacy.c +@@ -61,7 +61,7 @@ static const struct key_entry dell_wmi_keymap_type_0012[] = { + /* privacy mic mute */ + { KE_KEY, 0x0001, { KEY_MICMUTE } }, + /* privacy camera mute */ +- { KE_SW, 0x0002, { SW_CAMERA_LENS_COVER } }, ++ { KE_VSW, 0x0002, { SW_CAMERA_LENS_COVER } }, + { KE_END, 0}, + }; + +@@ -115,11 +115,15 @@ bool dell_privacy_process_event(int type, int code, int status) + + switch (code) { + case DELL_PRIVACY_AUDIO_EVENT: /* Mic mute */ +- case DELL_PRIVACY_CAMERA_EVENT: /* Camera mute */ + priv->last_status = status; + sparse_keymap_report_entry(priv->input_dev, key, 1, true); + ret = true; + break; ++ case DELL_PRIVACY_CAMERA_EVENT: /* Camera mute */ ++ priv->last_status = status; ++ sparse_keymap_report_entry(priv->input_dev, key, !(status & CAMERA_STATUS), false); ++ ret = true; ++ break; + default: + dev_dbg(&priv->wdev->dev, "unknown event type 0x%04x 0x%04x\n", type, code); + } +@@ -295,7 +299,7 @@ static int dell_privacy_wmi_probe(struct wmi_device *wdev, const void *context) + { + struct privacy_wmi_data *priv; + struct key_entry *keymap; +- int ret, i; ++ int ret, i, j; + + ret = wmi_has_guid(DELL_PRIVACY_GUID); + if (!ret) +@@ -307,6 +311,11 @@ static int dell_privacy_wmi_probe(struct wmi_device *wdev, const void *context) + + dev_set_drvdata(&wdev->dev, priv); + priv->wdev = wdev; ++ ++ ret = get_current_status(priv->wdev); ++ if (ret) ++ return ret; ++ + /* create evdev passing interface */ + priv->input_dev = devm_input_allocate_device(&wdev->dev); + if (!priv->input_dev) +@@ -321,9 +330,20 @@ static int dell_privacy_wmi_probe(struct wmi_device *wdev, const void *context) + /* remap the keymap code with Dell privacy key type 0x12 as prefix + * KEY_MICMUTE scancode will be reported as 0x120001 + */ +- for (i = 0; i < ARRAY_SIZE(dell_wmi_keymap_type_0012); i++) { +- keymap[i] = dell_wmi_keymap_type_0012[i]; +- keymap[i].code |= (0x0012 << 16); ++ for (i = 0, j = 0; i < ARRAY_SIZE(dell_wmi_keymap_type_0012); i++) { ++ /* ++ * Unlike keys where only presses matter, userspace may act ++ * on switches in both of their positions. Only register ++ * SW_CAMERA_LENS_COVER if it is actually there. ++ */ ++ if (dell_wmi_keymap_type_0012[i].type == KE_VSW && ++ dell_wmi_keymap_type_0012[i].sw.code == SW_CAMERA_LENS_COVER && ++ !(priv->features_present & BIT(DELL_PRIVACY_TYPE_CAMERA))) ++ continue; ++ ++ keymap[j] = dell_wmi_keymap_type_0012[i]; ++ keymap[j].code |= (0x0012 << 16); ++ j++; + } + ret = sparse_keymap_setup(priv->input_dev, keymap, NULL); + kfree(keymap); +@@ -334,11 +354,12 @@ static int dell_privacy_wmi_probe(struct wmi_device *wdev, const void *context) + priv->input_dev->name = "Dell Privacy Driver"; + priv->input_dev->id.bustype = BUS_HOST; + +- ret = input_register_device(priv->input_dev); +- if (ret) +- return ret; ++ /* Report initial camera-cover status */ ++ if (priv->features_present & BIT(DELL_PRIVACY_TYPE_CAMERA)) ++ input_report_switch(priv->input_dev, SW_CAMERA_LENS_COVER, ++ !(priv->last_status & CAMERA_STATUS)); + +- ret = get_current_status(priv->wdev); ++ ret = input_register_device(priv->input_dev); + if (ret) + return ret; + diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c -index 658bab4b79648..ebd15c1d13ec5 100644 +index 658bab4b79648..0163e912fafec 100644 --- a/drivers/platform/x86/gigabyte-wmi.c +++ b/drivers/platform/x86/gigabyte-wmi.c -@@ -140,6 +140,7 @@ static u8 gigabyte_wmi_detect_sensor_usability(struct wmi_device *wdev) +@@ -140,6 +140,8 @@ static u8 gigabyte_wmi_detect_sensor_usability(struct wmi_device *wdev) }} static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = { + DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B450M DS3H-CF"), ++ DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B450M DS3H WIFI-CF"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B450M S2H V2"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE AX V2"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE"), -@@ -153,6 +154,7 @@ static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = { +@@ -153,6 +155,7 @@ static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = { DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 GAMING X"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 I AORUS PRO WIFI"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 UD"), @@ -243941,26 +295928,48 @@ index 658bab4b79648..ebd15c1d13ec5 100644 }; diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c -index 027a1467d009f..1cd168e328109 100644 +index 027a1467d009f..1e390dcee561b 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c -@@ -63,6 +63,7 @@ enum hp_wmi_event_ids { +@@ -63,6 +63,8 @@ enum hp_wmi_event_ids { HPWMI_BACKLIT_KB_BRIGHTNESS = 0x0D, HPWMI_PEAKSHIFT_PERIOD = 0x0F, HPWMI_BATTERY_CHARGE_PERIOD = 0x10, + HPWMI_SANITIZATION_MODE = 0x17, ++ HPWMI_SMART_EXPERIENCE_APP = 0x21, }; struct bios_args { -@@ -638,6 +639,8 @@ static void hp_wmi_notify(u32 value, void *context) +@@ -638,6 +640,10 @@ static void hp_wmi_notify(u32 value, void *context) break; case HPWMI_BATTERY_CHARGE_PERIOD: break; + case HPWMI_SANITIZATION_MODE: ++ break; ++ case HPWMI_SMART_EXPERIENCE_APP: + break; default: pr_info("Unknown event_id - %d - 0x%x\n", event_id, event_data); break; +@@ -981,8 +987,16 @@ static int __init hp_wmi_bios_setup(struct platform_device *device) + wwan_rfkill = NULL; + rfkill2_count = 0; + +- if (hp_wmi_rfkill_setup(device)) +- hp_wmi_rfkill2_setup(device); ++ /* ++ * In pre-2009 BIOS, command 1Bh return 0x4 to indicate that ++ * BIOS no longer controls the power for the wireless ++ * devices. All features supported by this command will no ++ * longer be supported. ++ */ ++ if (!hp_wmi_bios_2009_later()) { ++ if (hp_wmi_rfkill_setup(device)) ++ hp_wmi_rfkill2_setup(device); ++ } + + thermal_profile_setup(); + diff --git a/drivers/platform/x86/hp_accel.c b/drivers/platform/x86/hp_accel.c index cc53f725c0419..ef24f53753c6e 100644 --- a/drivers/platform/x86/hp_accel.c @@ -243978,7 +295987,7 @@ index cc53f725c0419..ef24f53753c6e 100644 } diff --git a/drivers/platform/x86/huawei-wmi.c b/drivers/platform/x86/huawei-wmi.c -index a2d846c4a7eef..eac3e6b4ea113 100644 +index a2d846c4a7eef..935562c870c3d 100644 --- a/drivers/platform/x86/huawei-wmi.c +++ b/drivers/platform/x86/huawei-wmi.c @@ -470,10 +470,17 @@ static DEVICE_ATTR_RW(charge_control_thresholds); @@ -244002,6 +296011,128 @@ index a2d846c4a7eef..eac3e6b4ea113 100644 } static int huawei_wmi_battery_remove(struct power_supply *battery) +@@ -753,6 +760,9 @@ static int huawei_wmi_input_setup(struct device *dev, + const char *guid, + struct input_dev **idev) + { ++ acpi_status status; ++ int err; ++ + *idev = devm_input_allocate_device(dev); + if (!*idev) + return -ENOMEM; +@@ -762,10 +772,19 @@ static int huawei_wmi_input_setup(struct device *dev, + (*idev)->id.bustype = BUS_HOST; + (*idev)->dev.parent = dev; + +- return sparse_keymap_setup(*idev, huawei_wmi_keymap, NULL) || +- input_register_device(*idev) || +- wmi_install_notify_handler(guid, huawei_wmi_input_notify, +- *idev); ++ err = sparse_keymap_setup(*idev, huawei_wmi_keymap, NULL); ++ if (err) ++ return err; ++ ++ err = input_register_device(*idev); ++ if (err) ++ return err; ++ ++ status = wmi_install_notify_handler(guid, huawei_wmi_input_notify, *idev); ++ if (ACPI_FAILURE(status)) ++ return -EIO; ++ ++ return 0; + } + + static void huawei_wmi_input_exit(struct device *dev, const char *guid) +diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c +index e7a1299e37766..e75b09a144a32 100644 +--- a/drivers/platform/x86/ideapad-laptop.c ++++ b/drivers/platform/x86/ideapad-laptop.c +@@ -136,6 +136,7 @@ struct ideapad_private { + bool dytc : 1; + bool fan_mode : 1; + bool fn_lock : 1; ++ bool set_fn_lock_led : 1; + bool hw_rfkill_switch : 1; + bool kbd_bl : 1; + bool touchpad_ctrl_via_ec : 1; +@@ -1467,6 +1468,9 @@ static void ideapad_wmi_notify(u32 value, void *context) + ideapad_input_report(priv, value); + break; + case 208: ++ if (!priv->features.set_fn_lock_led) ++ break; ++ + if (!eval_hals(priv->adev->handle, &result)) { + bool state = test_bit(HALS_FNLOCK_STATE_BIT, &result); + +@@ -1480,6 +1484,24 @@ static void ideapad_wmi_notify(u32 value, void *context) + } + #endif + ++/* On some models we need to call exec_sals(SALS_FNLOCK_ON/OFF) to set the LED */ ++static const struct dmi_system_id set_fn_lock_led_list[] = { ++ { ++ /* https://bugzilla.kernel.org/show_bug.cgi?id=212671 */ ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), ++ DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Legion R7000P2020H"), ++ } ++ }, ++ { ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), ++ DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Legion 5 15ARH05"), ++ } ++ }, ++ {} ++}; ++ + /* + * Some ideapads have a hardware rfkill switch, but most do not have one. + * Reading VPCCMD_R_RF always results in 0 on models without a hardware rfkill, +@@ -1499,15 +1521,39 @@ static const struct dmi_system_id hw_rfkill_list[] = { + {} + }; + ++static const struct dmi_system_id no_touchpad_switch_list[] = { ++ { ++ .ident = "Lenovo Yoga 3 Pro 1370", ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), ++ DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo YOGA 3"), ++ }, ++ }, ++ { ++ .ident = "ZhaoYang K4e-IML", ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), ++ DMI_MATCH(DMI_PRODUCT_VERSION, "ZhaoYang K4e-IML"), ++ }, ++ }, ++ {} ++}; ++ + static void ideapad_check_features(struct ideapad_private *priv) + { + acpi_handle handle = priv->adev->handle; + unsigned long val; + ++ priv->features.set_fn_lock_led = dmi_check_system(set_fn_lock_led_list); + priv->features.hw_rfkill_switch = dmi_check_system(hw_rfkill_list); + + /* Most ideapads with ELAN0634 touchpad don't use EC touchpad switch */ +- priv->features.touchpad_ctrl_via_ec = !acpi_dev_present("ELAN0634", NULL, -1); ++ if (acpi_dev_present("ELAN0634", NULL, -1)) ++ priv->features.touchpad_ctrl_via_ec = 0; ++ else if (dmi_check_system(no_touchpad_switch_list)) ++ priv->features.touchpad_ctrl_via_ec = 0; ++ else ++ priv->features.touchpad_ctrl_via_ec = 1; + + if (!read_ec_data(handle, VPCCMD_R_FAN, &val)) + priv->features.fan_mode = true; diff --git a/drivers/platform/x86/intel/Kconfig b/drivers/platform/x86/intel/Kconfig index 0b21468e1bd01..02e4481b384e4 100644 --- a/drivers/platform/x86/intel/Kconfig @@ -244033,10 +296164,20 @@ index 0b21468e1bd01..02e4481b384e4 100644 - -endif # X86_PLATFORM_DRIVERS_INTEL diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c -index 08598942a6d78..d7d6782c40c20 100644 +index 08598942a6d78..4d1c78635114e 100644 --- a/drivers/platform/x86/intel/hid.c +++ b/drivers/platform/x86/intel/hid.c -@@ -99,6 +99,20 @@ static const struct dmi_system_id button_array_table[] = { +@@ -27,6 +27,9 @@ static const struct acpi_device_id intel_hid_ids[] = { + {"INTC1051", 0}, + {"INTC1054", 0}, + {"INTC1070", 0}, ++ {"INTC1076", 0}, ++ {"INTC1077", 0}, ++ {"INTC1078", 0}, + {"", 0}, + }; + MODULE_DEVICE_TABLE(acpi, intel_hid_ids); +@@ -99,6 +102,20 @@ static const struct dmi_system_id button_array_table[] = { DMI_MATCH(DMI_PRODUCT_FAMILY, "ThinkPad X1 Tablet Gen 2"), }, }, @@ -244057,7 +296198,7 @@ index 08598942a6d78..d7d6782c40c20 100644 { } }; -@@ -115,6 +129,12 @@ static const struct dmi_system_id dmi_vgbs_allow_list[] = { +@@ -115,6 +132,12 @@ static const struct dmi_system_id dmi_vgbs_allow_list[] = { DMI_MATCH(DMI_PRODUCT_NAME, "HP Spectre x360 Convertible 15-df0xxx"), }, }, @@ -244070,7 +296211,7 @@ index 08598942a6d78..d7d6782c40c20 100644 { } }; -@@ -231,7 +251,7 @@ static bool intel_hid_evaluate_method(acpi_handle handle, +@@ -231,7 +254,7 @@ static bool intel_hid_evaluate_method(acpi_handle handle, method_name = (char *)intel_hid_dsm_fn_to_method[fn_index]; @@ -244080,10 +296221,33 @@ index 08598942a6d78..d7d6782c40c20 100644 obj = acpi_evaluate_dsm_typed(handle, &intel_dsm_guid, diff --git a/drivers/platform/x86/intel/pmc/pltdrv.c b/drivers/platform/x86/intel/pmc/pltdrv.c -index 73797680b895c..15ca8afdd973d 100644 +index 73797680b895c..ddfba38c21044 100644 --- a/drivers/platform/x86/intel/pmc/pltdrv.c +++ b/drivers/platform/x86/intel/pmc/pltdrv.c -@@ -65,7 +65,7 @@ static int __init pmc_core_platform_init(void) +@@ -18,6 +18,8 @@ + #include <asm/cpu_device_id.h> + #include <asm/intel-family.h> + ++#include <xen/xen.h> ++ + static void intel_pmc_core_release(struct device *dev) + { + kfree(dev); +@@ -53,6 +55,13 @@ static int __init pmc_core_platform_init(void) + if (acpi_dev_present("INT33A1", NULL, -1)) + return -ENODEV; + ++ /* ++ * Skip forcefully attaching the device for VMs. Make an exception for ++ * Xen dom0, which does have full hardware access. ++ */ ++ if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR) && !xen_initial_domain()) ++ return -ENODEV; ++ + if (!x86_match_cpu(intel_pmc_core_platform_ids)) + return -ENODEV; + +@@ -65,7 +74,7 @@ static int __init pmc_core_platform_init(void) retval = platform_device_register(pmc_core_device); if (retval) @@ -244092,6 +296256,69 @@ index 73797680b895c..15ca8afdd973d 100644 return retval; } +diff --git a/drivers/platform/x86/intel/pmt/class.c b/drivers/platform/x86/intel/pmt/class.c +index 659b1073033c2..586a5877422b5 100644 +--- a/drivers/platform/x86/intel/pmt/class.c ++++ b/drivers/platform/x86/intel/pmt/class.c +@@ -9,6 +9,7 @@ + */ + + #include <linux/kernel.h> ++#include <linux/io-64-nonatomic-lo-hi.h> + #include <linux/module.h> + #include <linux/mm.h> + #include <linux/pci.h> +@@ -18,6 +19,7 @@ + #define PMT_XA_START 0 + #define PMT_XA_MAX INT_MAX + #define PMT_XA_LIMIT XA_LIMIT(PMT_XA_START, PMT_XA_MAX) ++#define GUID_SPR_PUNIT 0x9956f43f + + /* + * Early implementations of PMT on client platforms have some +@@ -41,6 +43,29 @@ bool intel_pmt_is_early_client_hw(struct device *dev) + } + EXPORT_SYMBOL_GPL(intel_pmt_is_early_client_hw); + ++static inline int ++pmt_memcpy64_fromio(void *to, const u64 __iomem *from, size_t count) ++{ ++ int i, remain; ++ u64 *buf = to; ++ ++ if (!IS_ALIGNED((unsigned long)from, 8)) ++ return -EFAULT; ++ ++ for (i = 0; i < count/8; i++) ++ buf[i] = readq(&from[i]); ++ ++ /* Copy any remaining bytes */ ++ remain = count % 8; ++ if (remain) { ++ u64 tmp = readq(&from[i]); ++ ++ memcpy(&buf[i], &tmp, remain); ++ } ++ ++ return count; ++} ++ + /* + * sysfs + */ +@@ -62,7 +87,11 @@ intel_pmt_read(struct file *filp, struct kobject *kobj, + if (count > entry->size - off) + count = entry->size - off; + +- memcpy_fromio(buf, entry->base + off, count); ++ if (entry->guid == GUID_SPR_PUNIT) ++ /* PUNIT on SPR only supports aligned 64-bit read */ ++ count = pmt_memcpy64_fromio(buf, entry->base + off, count); ++ else ++ memcpy_fromio(buf, entry->base + off, count); + + return count; + } diff --git a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c index c9a85eb2e8600..e8424e70d81d2 100644 --- a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c @@ -244262,6 +296489,26 @@ index c9a85eb2e8600..e8424e70d81d2 100644 } EXPORT_SYMBOL_GPL(isst_if_cdev_unregister); +diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c +index 7cc9089d1e14f..e7a3e34028178 100644 +--- a/drivers/platform/x86/intel_scu_ipc.c ++++ b/drivers/platform/x86/intel_scu_ipc.c +@@ -583,7 +583,6 @@ __intel_scu_ipc_register(struct device *parent, + scu->dev.parent = parent; + scu->dev.class = &intel_scu_ipc_class; + scu->dev.release = intel_scu_ipc_release; +- dev_set_name(&scu->dev, "intel_scu_ipc"); + + if (!request_mem_region(scu_data->mem.start, resource_size(&scu_data->mem), + "intel_scu_ipc")) { +@@ -612,6 +611,7 @@ __intel_scu_ipc_register(struct device *parent, + * After this point intel_scu_ipc_release() takes care of + * releasing the SCU IPC resources once refcount drops to zero. + */ ++ dev_set_name(&scu->dev, "intel_scu_ipc"); + err = device_register(&scu->dev); + if (err) { + put_device(&scu->dev); diff --git a/drivers/platform/x86/msi-laptop.c b/drivers/platform/x86/msi-laptop.c index 24ffc8e2d2d1e..0e804b6c2d242 100644 --- a/drivers/platform/x86/msi-laptop.c @@ -244317,6 +296564,40 @@ index 24ffc8e2d2d1e..0e804b6c2d242 100644 input_unregister_device(msi_laptop_input_dev); cancel_delayed_work_sync(&msi_rfkill_dwork); cancel_work_sync(&msi_rfkill_work); +diff --git a/drivers/platform/x86/mxm-wmi.c b/drivers/platform/x86/mxm-wmi.c +index 9a19fbd2f7341..9a457956025a5 100644 +--- a/drivers/platform/x86/mxm-wmi.c ++++ b/drivers/platform/x86/mxm-wmi.c +@@ -35,13 +35,11 @@ int mxm_wmi_call_mxds(int adapter) + .xarg = 1, + }; + struct acpi_buffer input = { (acpi_size)sizeof(args), &args }; +- struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL }; + acpi_status status; + + printk("calling mux switch %d\n", adapter); + +- status = wmi_evaluate_method(MXM_WMMX_GUID, 0x0, adapter, &input, +- &output); ++ status = wmi_evaluate_method(MXM_WMMX_GUID, 0x0, adapter, &input, NULL); + + if (ACPI_FAILURE(status)) + return status; +@@ -60,13 +58,11 @@ int mxm_wmi_call_mxmx(int adapter) + .xarg = 1, + }; + struct acpi_buffer input = { (acpi_size)sizeof(args), &args }; +- struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL }; + acpi_status status; + + printk("calling mux switch %d\n", adapter); + +- status = wmi_evaluate_method(MXM_WMMX_GUID, 0x0, adapter, &input, +- &output); ++ status = wmi_evaluate_method(MXM_WMMX_GUID, 0x0, adapter, &input, NULL); + + if (ACPI_FAILURE(status)) + return status; diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c index d4f444401496e..7ca49b3fc6c28 100644 --- a/drivers/platform/x86/panasonic-laptop.c @@ -244484,6 +296765,39 @@ index 7ee010aa740aa..404bdb4cbfae4 100644 samsung->kbd_led_wk = value; queue_work(samsung->led_workqueue, &samsung->kbd_led_work); +diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c +index 7048133749221..336dee9485d4b 100644 +--- a/drivers/platform/x86/sony-laptop.c ++++ b/drivers/platform/x86/sony-laptop.c +@@ -1892,14 +1892,21 @@ static int sony_nc_kbd_backlight_setup(struct platform_device *pd, + break; + } + +- ret = sony_call_snc_handle(handle, probe_base, &result); +- if (ret) +- return ret; ++ /* ++ * Only probe if there is a separate probe_base, otherwise the probe call ++ * is equivalent to __sony_nc_kbd_backlight_mode_set(0), resulting in ++ * the keyboard backlight being turned off. ++ */ ++ if (probe_base) { ++ ret = sony_call_snc_handle(handle, probe_base, &result); ++ if (ret) ++ return ret; + +- if ((handle == 0x0137 && !(result & 0x02)) || +- !(result & 0x01)) { +- dprintk("no backlight keyboard found\n"); +- return 0; ++ if ((handle == 0x0137 && !(result & 0x02)) || ++ !(result & 0x01)) { ++ dprintk("no backlight keyboard found\n"); ++ return 0; ++ } + } + + kbdbl_ctl = kzalloc(sizeof(*kbdbl_ctl), GFP_KERNEL); diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index 9472aae72df29..c4d9c45350f7c 100644 --- a/drivers/platform/x86/think-lmi.c @@ -244590,13 +296904,53 @@ index 50ff04c84650c..3dc055ce6e61b 100644 level = TP_EC_FAN_FULLSPEED; else if (sscanf(cmd, "level %d", &level) != 1) diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c -index 033f797861d8a..c608078538a79 100644 +index 033f797861d8a..69ba2c5182610 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c -@@ -773,6 +773,21 @@ static const struct ts_dmi_data predia_basic_data = { +@@ -255,6 +255,23 @@ static const struct ts_dmi_data connect_tablet9_data = { + .properties = connect_tablet9_props, + }; + ++static const struct property_entry csl_panther_tab_hd_props[] = { ++ PROPERTY_ENTRY_U32("touchscreen-min-x", 1), ++ PROPERTY_ENTRY_U32("touchscreen-min-y", 20), ++ PROPERTY_ENTRY_U32("touchscreen-size-x", 1980), ++ PROPERTY_ENTRY_U32("touchscreen-size-y", 1526), ++ PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"), ++ PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"), ++ PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-csl-panther-tab-hd.fw"), ++ PROPERTY_ENTRY_U32("silead,max-fingers", 10), ++ { } ++}; ++ ++static const struct ts_dmi_data csl_panther_tab_hd_data = { ++ .acpi_name = "MSSL1680:00", ++ .properties = csl_panther_tab_hd_props, ++}; ++ + static const struct property_entry cube_iwork8_air_props[] = { + PROPERTY_ENTRY_U32("touchscreen-min-x", 1), + PROPERTY_ENTRY_U32("touchscreen-min-y", 3), +@@ -773,6 +790,37 @@ static const struct ts_dmi_data predia_basic_data = { .properties = predia_basic_props, }; ++static const struct property_entry rca_cambio_w101_v2_props[] = { ++ PROPERTY_ENTRY_U32("touchscreen-min-x", 4), ++ PROPERTY_ENTRY_U32("touchscreen-min-y", 20), ++ PROPERTY_ENTRY_U32("touchscreen-size-x", 1644), ++ PROPERTY_ENTRY_U32("touchscreen-size-y", 874), ++ PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"), ++ PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-rca-cambio-w101-v2.fw"), ++ PROPERTY_ENTRY_U32("silead,max-fingers", 10), ++ { } ++}; ++ ++static const struct ts_dmi_data rca_cambio_w101_v2_data = { ++ .acpi_name = "MSSL1680:00", ++ .properties = rca_cambio_w101_v2_props, ++}; ++ +static const struct property_entry rwc_nanote_p8_props[] = { + PROPERTY_ENTRY_U32("touchscreen-min-y", 46), + PROPERTY_ENTRY_U32("touchscreen-size-x", 1728), @@ -244615,11 +296969,51 @@ index 033f797861d8a..c608078538a79 100644 static const struct property_entry schneider_sct101ctm_props[] = { PROPERTY_ENTRY_U32("touchscreen-size-x", 1715), PROPERTY_ENTRY_U32("touchscreen-size-y", 1140), -@@ -1379,6 +1394,15 @@ const struct dmi_system_id touchscreen_dmi_table[] = { +@@ -1025,6 +1073,15 @@ const struct dmi_system_id touchscreen_dmi_table[] = { + DMI_MATCH(DMI_BIOS_DATE, "05/07/2016"), + }, + }, ++ { ++ /* Chuwi Vi8 (CWI501) */ ++ .driver_data = (void *)&chuwi_vi8_data, ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "Insyde"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "i86"), ++ DMI_MATCH(DMI_BIOS_VERSION, "CHUWI.W86JLBNR01"), ++ }, ++ }, + { + /* Chuwi Vi8 (CWI506) */ + .driver_data = (void *)&chuwi_vi8_data, +@@ -1069,6 +1126,14 @@ const struct dmi_system_id touchscreen_dmi_table[] = { + DMI_MATCH(DMI_PRODUCT_NAME, "Tablet 9"), + }, + }, ++ { ++ /* CSL Panther Tab HD */ ++ .driver_data = (void *)&csl_panther_tab_hd_data, ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "CSL Computer GmbH & Co. KG"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "CSL Panther Tab HD"), ++ }, ++ }, + { + /* CUBE iwork8 Air */ + .driver_data = (void *)&cube_iwork8_air_data, +@@ -1379,6 +1444,24 @@ const struct dmi_system_id touchscreen_dmi_table[] = { DMI_EXACT_MATCH(DMI_BOARD_NAME, "0E57"), }, }, + { ++ /* RCA Cambio W101 v2 */ ++ /* https://github.com/onitake/gsl-firmware/discussions/193 */ ++ .driver_data = (void *)&rca_cambio_w101_v2_data, ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "RCA"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "W101SA23T1"), ++ }, ++ }, ++ { + /* RWC NANOTE P8 */ + .driver_data = (void *)&rwc_nanote_p8_data, + .matches = { @@ -244772,6 +297166,27 @@ index a76313006bdc4..c4f917d45b51d 100644 input.count = 1; input.pointer = params; params[0].type = ACPI_TYPE_INTEGER; +diff --git a/drivers/pnp/core.c b/drivers/pnp/core.c +index 4df5aa6a309c3..6a60c5d83383b 100644 +--- a/drivers/pnp/core.c ++++ b/drivers/pnp/core.c +@@ -148,14 +148,14 @@ struct pnp_dev *pnp_alloc_dev(struct pnp_protocol *protocol, int id, + dev->dev.coherent_dma_mask = dev->dma_mask; + dev->dev.release = &pnp_release_device; + +- dev_set_name(&dev->dev, "%02x:%02x", dev->protocol->number, dev->number); +- + dev_id = pnp_add_id(dev, pnpid); + if (!dev_id) { + kfree(dev); + return NULL; + } + ++ dev_set_name(&dev->dev, "%02x:%02x", dev->protocol->number, dev->number); ++ + return dev; + } + diff --git a/drivers/power/reset/arm-versatile-reboot.c b/drivers/power/reset/arm-versatile-reboot.c index 08d0a07b58ef2..c7624d7611a7e 100644 --- a/drivers/power/reset/arm-versatile-reboot.c @@ -244868,6 +297283,26 @@ index ff4b26b1cecae..b809fa5abbbaf 100644 di->maintenance_timer.function = ab8500_chargalg_maintenance_timer_expired; +diff --git a/drivers/power/supply/ab8500_charger.c b/drivers/power/supply/ab8500_charger.c +index 15eadaf46f144..a4f766fc7c9d7 100644 +--- a/drivers/power/supply/ab8500_charger.c ++++ b/drivers/power/supply/ab8500_charger.c +@@ -3726,7 +3726,14 @@ static int __init ab8500_charger_init(void) + if (ret) + return ret; + +- return platform_driver_register(&ab8500_charger_driver); ++ ret = platform_driver_register(&ab8500_charger_driver); ++ if (ret) { ++ platform_unregister_drivers(ab8500_charger_component_drivers, ++ ARRAY_SIZE(ab8500_charger_component_drivers)); ++ return ret; ++ } ++ ++ return 0; + } + + static void __exit ab8500_charger_exit(void) diff --git a/drivers/power/supply/ab8500_fg.c b/drivers/power/supply/ab8500_fg.c index 05fe9724ba508..57799a8079d44 100644 --- a/drivers/power/supply/ab8500_fg.c @@ -245157,10 +297592,22 @@ index 25207fe2aa68e..bfa7a576523df 100644 if (!IS_ERR(charger->reg) && !IS_ERR_OR_NULL(charger->edev)) { diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c -index 0c2132c7f5d40..a6e9afa5a1cff 100644 +index 0c2132c7f5d40..3f9c60c5b250b 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c -@@ -853,6 +853,10 @@ power_supply_find_ocv2cap_table(struct power_supply_battery_info *info, +@@ -696,6 +696,11 @@ int power_supply_get_battery_info(struct power_supply *psy, + int i, tab_len, size; + + propname = kasprintf(GFP_KERNEL, "ocv-capacity-table-%d", index); ++ if (!propname) { ++ power_supply_put_battery_info(psy, info); ++ err = -ENOMEM; ++ goto out_put_node; ++ } + list = of_get_property(battery_np, propname, &size); + if (!list || !size) { + dev_err(&psy->dev, "failed to get %s\n", propname); +@@ -853,6 +858,10 @@ power_supply_find_ocv2cap_table(struct power_supply_battery_info *info, return NULL; for (i = 0; i < POWER_SUPPLY_OCV_TEMP_MAX; i++) { @@ -245171,6 +297618,16 @@ index 0c2132c7f5d40..a6e9afa5a1cff 100644 temp_diff = abs(info->ocv_temp[i] - temp); if (temp_diff < best_temp_diff) { +@@ -1216,8 +1225,8 @@ create_triggers_failed: + register_cooler_failed: + psy_unregister_thermal(psy); + register_thermal_failed: +- device_del(dev); + wakeup_init_failed: ++ device_del(dev); + device_add_failed: + check_supplies_failed: + dev_set_name_failed: diff --git a/drivers/power/supply/rt5033_battery.c b/drivers/power/supply/rt5033_battery.c index 9ad0afe83d1b7..7a23c70f48791 100644 --- a/drivers/power/supply/rt5033_battery.c @@ -245373,6 +297830,25 @@ index e05cee457471b..908cfd45d2624 100644 wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P9, wm8350); wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P1, wm8350); wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_2P85, wm8350); +diff --git a/drivers/power/supply/z2_battery.c b/drivers/power/supply/z2_battery.c +index 7ed4e4bb26eca..fd33cdf9cf12c 100644 +--- a/drivers/power/supply/z2_battery.c ++++ b/drivers/power/supply/z2_battery.c +@@ -206,10 +206,12 @@ static int z2_batt_probe(struct i2c_client *client, + + charger->charge_gpiod = devm_gpiod_get_optional(&client->dev, + NULL, GPIOD_IN); +- if (IS_ERR(charger->charge_gpiod)) +- return dev_err_probe(&client->dev, ++ if (IS_ERR(charger->charge_gpiod)) { ++ ret = dev_err_probe(&client->dev, + PTR_ERR(charger->charge_gpiod), + "failed to get charge GPIO\n"); ++ goto err; ++ } + + if (charger->charge_gpiod) { + gpiod_set_consumer_name(charger->charge_gpiod, "BATT CHRG"); diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index 7c0099e7a6d72..9dfc053878fda 100644 --- a/drivers/powercap/intel_rapl_common.c @@ -245757,6 +298233,42 @@ index 8e461f3baa05a..43b5509dde513 100644 disable_pwmclk: clk_disable_unprepare(lpc18xx_pwm->pwm_clk); return ret; +diff --git a/drivers/pwm/pwm-mediatek.c b/drivers/pwm/pwm-mediatek.c +index 0d4dd80e9f07f..f8f9a74891293 100644 +--- a/drivers/pwm/pwm-mediatek.c ++++ b/drivers/pwm/pwm-mediatek.c +@@ -275,7 +275,7 @@ static const struct pwm_mediatek_of_data mt2712_pwm_data = { + static const struct pwm_mediatek_of_data mt7622_pwm_data = { + .num_pwms = 6, + .pwm45_fixup = false, +- .has_ck_26m_sel = false, ++ .has_ck_26m_sel = true, + }; + + static const struct pwm_mediatek_of_data mt7623_pwm_data = { +diff --git a/drivers/pwm/pwm-mtk-disp.c b/drivers/pwm/pwm-mtk-disp.c +index c605013e4114c..3fbb4bae93a4e 100644 +--- a/drivers/pwm/pwm-mtk-disp.c ++++ b/drivers/pwm/pwm-mtk-disp.c +@@ -178,7 +178,7 @@ static void mtk_disp_pwm_get_state(struct pwm_chip *chip, + { + struct mtk_disp_pwm *mdp = to_mtk_disp_pwm(chip); + u64 rate, period, high_width; +- u32 clk_div, con0, con1; ++ u32 clk_div, pwm_en, con0, con1; + int err; + + err = clk_prepare_enable(mdp->clk_main); +@@ -197,7 +197,8 @@ static void mtk_disp_pwm_get_state(struct pwm_chip *chip, + rate = clk_get_rate(mdp->clk_main); + con0 = readl(mdp->base + mdp->data->con0); + con1 = readl(mdp->base + mdp->data->con1); +- state->enabled = !!(con0 & BIT(0)); ++ pwm_en = readl(mdp->base + DISP_PWM_EN); ++ state->enabled = !!(pwm_en & mdp->data->enable_mask); + clk_div = FIELD_GET(PWM_CLKDIV_MASK, con0); + period = FIELD_GET(PWM_PERIOD_MASK, con1); + /* diff --git a/drivers/pwm/pwm-raspberrypi-poe.c b/drivers/pwm/pwm-raspberrypi-poe.c index 579a15240e0a8..c877de37734d9 100644 --- a/drivers/pwm/pwm-raspberrypi-poe.c @@ -245771,7 +298283,7 @@ index 579a15240e0a8..c877de37734d9 100644 int ret; diff --git a/drivers/pwm/pwm-sifive.c b/drivers/pwm/pwm-sifive.c -index 253c4a17d2553..58347fcd48125 100644 +index 253c4a17d2553..07e9fc58354f4 100644 --- a/drivers/pwm/pwm-sifive.c +++ b/drivers/pwm/pwm-sifive.c @@ -23,7 +23,7 @@ @@ -245812,7 +298324,20 @@ index 253c4a17d2553..58347fcd48125 100644 if (state->enabled != enabled) pwm_sifive_enable(chip, state->enabled); -@@ -233,6 +229,8 @@ static int pwm_sifive_probe(struct platform_device *pdev) +@@ -221,8 +217,11 @@ static int pwm_sifive_clock_notifier(struct notifier_block *nb, + struct pwm_sifive_ddata *ddata = + container_of(nb, struct pwm_sifive_ddata, notifier); + +- if (event == POST_RATE_CHANGE) ++ if (event == POST_RATE_CHANGE) { ++ mutex_lock(&ddata->lock); + pwm_sifive_update_clock(ddata, ndata->new_rate); ++ mutex_unlock(&ddata->lock); ++ } + + return NOTIFY_OK; + } +@@ -233,6 +232,8 @@ static int pwm_sifive_probe(struct platform_device *pdev) struct pwm_sifive_ddata *ddata; struct pwm_chip *chip; int ret; @@ -245821,7 +298346,7 @@ index 253c4a17d2553..58347fcd48125 100644 ddata = devm_kzalloc(dev, sizeof(*ddata), GFP_KERNEL); if (!ddata) -@@ -259,6 +257,33 @@ static int pwm_sifive_probe(struct platform_device *pdev) +@@ -259,6 +260,33 @@ static int pwm_sifive_probe(struct platform_device *pdev) return ret; } @@ -245855,7 +298380,7 @@ index 253c4a17d2553..58347fcd48125 100644 /* Watch for changes to underlying clock frequency */ ddata->notifier.notifier_call = pwm_sifive_clock_notifier; ret = clk_notifier_register(ddata->clk, &ddata->notifier); -@@ -281,7 +306,11 @@ static int pwm_sifive_probe(struct platform_device *pdev) +@@ -281,7 +309,11 @@ static int pwm_sifive_probe(struct platform_device *pdev) unregister_clk: clk_notifier_unregister(ddata->clk, &ddata->notifier); disable_clk: @@ -245868,7 +298393,7 @@ index 253c4a17d2553..58347fcd48125 100644 return ret; } -@@ -289,23 +318,19 @@ disable_clk: +@@ -289,23 +321,19 @@ disable_clk: static int pwm_sifive_remove(struct platform_device *dev) { struct pwm_sifive_ddata *ddata = platform_get_drvdata(dev); @@ -245898,6 +298423,110 @@ index 253c4a17d2553..58347fcd48125 100644 return 0; } +diff --git a/drivers/pwm/pwm-tegra.c b/drivers/pwm/pwm-tegra.c +index 11a10b575ace9..6a1ff9d42f795 100644 +--- a/drivers/pwm/pwm-tegra.c ++++ b/drivers/pwm/pwm-tegra.c +@@ -142,8 +142,8 @@ static int tegra_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, + * source clock rate as required_clk_rate, PWM controller will + * be able to configure the requested period. + */ +- required_clk_rate = +- (NSEC_PER_SEC / period_ns) << PWM_DUTY_WIDTH; ++ required_clk_rate = DIV_ROUND_UP_ULL((u64)NSEC_PER_SEC << PWM_DUTY_WIDTH, ++ period_ns); + + err = clk_set_rate(pc->clk, required_clk_rate); + if (err < 0) +diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c +index 94331d999d273..5ac2dc1e2abd8 100644 +--- a/drivers/rapidio/devices/rio_mport_cdev.c ++++ b/drivers/rapidio/devices/rio_mport_cdev.c +@@ -1803,8 +1803,11 @@ static int rio_mport_add_riodev(struct mport_cdev_priv *priv, + rio_init_dbell_res(&rdev->riores[RIO_DOORBELL_RESOURCE], + 0, 0xffff); + err = rio_add_device(rdev); +- if (err) +- goto cleanup; ++ if (err) { ++ put_device(&rdev->dev); ++ return err; ++ } ++ + rio_dev_get(rdev); + + return 0; +@@ -1900,10 +1903,6 @@ static int mport_cdev_open(struct inode *inode, struct file *filp) + + priv->md = chdev; + +- mutex_lock(&chdev->file_mutex); +- list_add_tail(&priv->list, &chdev->file_list); +- mutex_unlock(&chdev->file_mutex); +- + INIT_LIST_HEAD(&priv->db_filters); + INIT_LIST_HEAD(&priv->pw_filters); + spin_lock_init(&priv->fifo_lock); +@@ -1912,6 +1911,7 @@ static int mport_cdev_open(struct inode *inode, struct file *filp) + sizeof(struct rio_event) * MPORT_EVENT_DEPTH, + GFP_KERNEL); + if (ret < 0) { ++ put_device(&chdev->dev); + dev_err(&chdev->dev, DRV_NAME ": kfifo_alloc failed\n"); + ret = -ENOMEM; + goto err_fifo; +@@ -1922,6 +1922,9 @@ static int mport_cdev_open(struct inode *inode, struct file *filp) + spin_lock_init(&priv->req_lock); + mutex_init(&priv->dma_lock); + #endif ++ mutex_lock(&chdev->file_mutex); ++ list_add_tail(&priv->list, &chdev->file_list); ++ mutex_unlock(&chdev->file_mutex); + + filp->private_data = priv; + goto out; +diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c +index 19b0c33f4a62a..fdcf742b2adbc 100644 +--- a/drivers/rapidio/rio-scan.c ++++ b/drivers/rapidio/rio-scan.c +@@ -454,8 +454,12 @@ static struct rio_dev *rio_setup_device(struct rio_net *net, + 0, 0xffff); + + ret = rio_add_device(rdev); +- if (ret) +- goto cleanup; ++ if (ret) { ++ if (rswitch) ++ kfree(rswitch->route_table); ++ put_device(&rdev->dev); ++ return NULL; ++ } + + rio_dev_get(rdev); + +diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c +index e74cf09eeff07..9544b8ee0c963 100644 +--- a/drivers/rapidio/rio.c ++++ b/drivers/rapidio/rio.c +@@ -2186,11 +2186,16 @@ int rio_register_mport(struct rio_mport *port) + atomic_set(&port->state, RIO_DEVICE_RUNNING); + + res = device_register(&port->dev); +- if (res) ++ if (res) { + dev_err(&port->dev, "RIO: mport%d registration failed ERR=%d\n", + port->id, res); +- else ++ mutex_lock(&rio_mport_list_lock); ++ list_del(&port->node); ++ mutex_unlock(&rio_mport_list_lock); ++ put_device(&port->dev); ++ } else { + dev_dbg(&port->dev, "RIO: registered mport%d\n", port->id); ++ } + + return res; + } diff --git a/drivers/regulator/atc260x-regulator.c b/drivers/regulator/atc260x-regulator.c index 05147d2c38428..485e58b264c04 100644 --- a/drivers/regulator/atc260x-regulator.c @@ -245911,10 +298540,59 @@ index 05147d2c38428..485e58b264c04 100644 } diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c -index ca6caba8a191a..aa4d78b024837 100644 +index ca6caba8a191a..3eae3aa5ad1d2 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c -@@ -2132,10 +2132,13 @@ struct regulator *_regulator_get(struct device *dev, const char *id, +@@ -962,7 +962,7 @@ static int drms_uA_update(struct regulator_dev *rdev) + /* get input voltage */ + input_uV = 0; + if (rdev->supply) +- input_uV = regulator_get_voltage(rdev->supply); ++ input_uV = regulator_get_voltage_rdev(rdev->supply->rdev); + if (input_uV <= 0) + input_uV = rdev->constraints->input_uV; + if (input_uV <= 0) { +@@ -1531,7 +1531,13 @@ static int set_machine_constraints(struct regulator_dev *rdev) + if (rdev->supply_name && !rdev->supply) + return -EPROBE_DEFER; + +- if (rdev->supply) { ++ /* If supplying regulator has already been enabled, ++ * it's not intended to have use_count increment ++ * when rdev is only boot-on. ++ */ ++ if (rdev->supply && ++ (rdev->constraints->always_on || ++ !regulator_is_enabled(rdev->supply))) { + ret = regulator_enable(rdev->supply); + if (ret < 0) { + _regulator_put(rdev->supply); +@@ -1577,6 +1583,7 @@ static int set_supply(struct regulator_dev *rdev, + + rdev->supply = create_regulator(supply_rdev, &rdev->dev, "SUPPLY"); + if (rdev->supply == NULL) { ++ module_put(supply_rdev->owner); + err = -ENOMEM; + return err; + } +@@ -1750,7 +1757,7 @@ static struct regulator *create_regulator(struct regulator_dev *rdev, + + regulator = kzalloc(sizeof(*regulator), GFP_KERNEL); + if (regulator == NULL) { +- kfree(supply_name); ++ kfree_const(supply_name); + return NULL; + } + +@@ -1880,6 +1887,7 @@ static struct regulator_dev *regulator_dev_lookup(struct device *dev, + node = of_get_regulator(dev, supply); + if (node) { + r = of_find_regulator_by_node(node); ++ of_node_put(node); + if (r) + return r; + +@@ -2132,10 +2140,13 @@ struct regulator *_regulator_get(struct device *dev, const char *id, rdev->exclusive = 1; ret = _regulator_is_enabled(rdev); @@ -245930,7 +298608,7 @@ index ca6caba8a191a..aa4d78b024837 100644 } link = device_link_add(dev, &rdev->dev, DL_FLAG_STATELESS); -@@ -2633,7 +2636,7 @@ static int _regulator_do_enable(struct regulator_dev *rdev) +@@ -2633,7 +2644,7 @@ static int _regulator_do_enable(struct regulator_dev *rdev) * expired, return -ETIMEDOUT. */ if (rdev->desc->poll_enabled_time) { @@ -245939,7 +298617,7 @@ index ca6caba8a191a..aa4d78b024837 100644 while (time_remaining > 0) { _regulator_enable_delay(rdev->desc->poll_enabled_time); -@@ -2685,13 +2688,18 @@ static int _regulator_do_enable(struct regulator_dev *rdev) +@@ -2685,13 +2696,18 @@ static int _regulator_do_enable(struct regulator_dev *rdev) */ static int _regulator_handle_consumer_enable(struct regulator *regulator) { @@ -245960,7 +298638,45 @@ index ca6caba8a191a..aa4d78b024837 100644 return 0; } -@@ -6010,9 +6018,8 @@ core_initcall(regulator_init); +@@ -5063,6 +5079,7 @@ static void regulator_dev_release(struct device *dev) + { + struct regulator_dev *rdev = dev_get_drvdata(dev); + ++ debugfs_remove_recursive(rdev->debugfs); + kfree(rdev->constraints); + of_node_put(rdev->dev.of_node); + kfree(rdev); +@@ -5537,15 +5554,20 @@ unset_supplies: + regulator_remove_coupling(rdev); + mutex_unlock(®ulator_list_mutex); + wash: ++ regulator_put(rdev->supply); + kfree(rdev->coupling_desc.coupled_rdevs); + mutex_lock(®ulator_list_mutex); + regulator_ena_gpio_free(rdev); + mutex_unlock(®ulator_list_mutex); ++ put_device(&rdev->dev); ++ rdev = NULL; + clean: + if (dangling_of_gpiod) + gpiod_put(config->ena_gpiod); ++ if (rdev && rdev->dev.of_node) ++ of_node_put(rdev->dev.of_node); ++ kfree(rdev); + kfree(config); +- put_device(&rdev->dev); + rinse: + if (dangling_cfg_gpiod) + gpiod_put(cfg->ena_gpiod); +@@ -5574,7 +5596,6 @@ void regulator_unregister(struct regulator_dev *rdev) + + mutex_lock(®ulator_list_mutex); + +- debugfs_remove_recursive(rdev->debugfs); + WARN_ON(rdev->open_count); + regulator_remove_coupling(rdev); + unset_regulator_supplies(rdev); +@@ -6010,9 +6031,8 @@ core_initcall(regulator_init); static int regulator_late_cleanup(struct device *dev, void *data) { struct regulator_dev *rdev = dev_to_rdev(dev); @@ -245971,7 +298687,7 @@ index ca6caba8a191a..aa4d78b024837 100644 if (c && c->always_on) return 0; -@@ -6025,14 +6032,8 @@ static int regulator_late_cleanup(struct device *dev, void *data) +@@ -6025,14 +6045,8 @@ static int regulator_late_cleanup(struct device *dev, void *data) if (rdev->use_count) goto unlock; @@ -246013,6 +298729,35 @@ index e669250902580..3315994d7e311 100644 } /* Set these up for of_regulator_match call which may want .of_map_modes */ +diff --git a/drivers/regulator/da9211-regulator.c b/drivers/regulator/da9211-regulator.c +index e01b32d1fa17d..00828f5baa972 100644 +--- a/drivers/regulator/da9211-regulator.c ++++ b/drivers/regulator/da9211-regulator.c +@@ -498,6 +498,12 @@ static int da9211_i2c_probe(struct i2c_client *i2c) + + chip->chip_irq = i2c->irq; + ++ ret = da9211_regulator_init(chip); ++ if (ret < 0) { ++ dev_err(chip->dev, "Failed to initialize regulator: %d\n", ret); ++ return ret; ++ } ++ + if (chip->chip_irq != 0) { + ret = devm_request_threaded_irq(chip->dev, chip->chip_irq, NULL, + da9211_irq_handler, +@@ -512,11 +518,6 @@ static int da9211_i2c_probe(struct i2c_client *i2c) + dev_warn(chip->dev, "No IRQ configured\n"); + } + +- ret = da9211_regulator_init(chip); +- +- if (ret < 0) +- dev_err(chip->dev, "Failed to initialize regulator: %d\n", ret); +- + return ret; + } + diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c index f54d4f176882a..e12b681c72e5e 100644 --- a/drivers/regulator/of_regulator.c @@ -246061,7 +298806,7 @@ index d60d7d1b7fa25..a9a0bd918d1e2 100644 ret = pfuze_parse_regulators_dt(pfuze_chip); if (ret) diff --git a/drivers/regulator/qcom-labibb-regulator.c b/drivers/regulator/qcom-labibb-regulator.c -index b3da0dc58782f..639b71eb41ffe 100644 +index b3da0dc58782f..bcf7140f3bc98 100644 --- a/drivers/regulator/qcom-labibb-regulator.c +++ b/drivers/regulator/qcom-labibb-regulator.c @@ -260,7 +260,7 @@ static irqreturn_t qcom_labibb_ocp_isr(int irq, void *chip) @@ -246073,6 +298818,27 @@ index b3da0dc58782f..639b71eb41ffe 100644 /* If we tried to recover for too many times it's not getting better */ if (vreg->ocp_irq_count > LABIBB_MAX_OCP_COUNT) +@@ -822,6 +822,7 @@ static int qcom_labibb_regulator_probe(struct platform_device *pdev) + if (irq == 0) + irq = -EINVAL; + ++ of_node_put(reg_node); + return dev_err_probe(vreg->dev, irq, + "Short-circuit irq not found.\n"); + } +diff --git a/drivers/regulator/qcom-rpmh-regulator.c b/drivers/regulator/qcom-rpmh-regulator.c +index 7f458d510483f..27efdbbd90d9a 100644 +--- a/drivers/regulator/qcom-rpmh-regulator.c ++++ b/drivers/regulator/qcom-rpmh-regulator.c +@@ -1108,7 +1108,7 @@ static const struct rpmh_vreg_init_data pm7325_vreg_data[] = { + static const struct rpmh_vreg_init_data pmr735a_vreg_data[] = { + RPMH_VREG("smps1", "smp%s1", &pmic5_ftsmps520, "vdd-s1"), + RPMH_VREG("smps2", "smp%s2", &pmic5_ftsmps520, "vdd-s2"), +- RPMH_VREG("smps3", "smp%s3", &pmic5_hfsmps510, "vdd-s3"), ++ RPMH_VREG("smps3", "smp%s3", &pmic5_hfsmps515, "vdd-s3"), + RPMH_VREG("ldo1", "ldo%s1", &pmic5_nldo, "vdd-l1-l2"), + RPMH_VREG("ldo2", "ldo%s2", &pmic5_nldo, "vdd-l1-l2"), + RPMH_VREG("ldo3", "ldo%s3", &pmic5_nldo, "vdd-l3"), diff --git a/drivers/regulator/qcom_rpm-regulator.c b/drivers/regulator/qcom_rpm-regulator.c index 7f9d66ac37ff8..3c41b71a1f529 100644 --- a/drivers/regulator/qcom_rpm-regulator.c @@ -246501,6 +299267,79 @@ index 1f02f60ad1366..41ae7ac27ff6a 100644 /* * Register a regulator for each valid regulator-DT-entry that we * can successfully reach via SCMI and has a valid associated voltage +diff --git a/drivers/regulator/slg51000-regulator.c b/drivers/regulator/slg51000-regulator.c +index 75a941fb3c2bd..1b2eee95ad3f9 100644 +--- a/drivers/regulator/slg51000-regulator.c ++++ b/drivers/regulator/slg51000-regulator.c +@@ -457,6 +457,8 @@ static int slg51000_i2c_probe(struct i2c_client *client) + chip->cs_gpiod = cs_gpiod; + } + ++ usleep_range(10000, 11000); ++ + i2c_set_clientdata(client, chip); + chip->chip_irq = client->irq; + chip->dev = dev; +diff --git a/drivers/regulator/twl6030-regulator.c b/drivers/regulator/twl6030-regulator.c +index 430265c404d65..f3856750944f4 100644 +--- a/drivers/regulator/twl6030-regulator.c ++++ b/drivers/regulator/twl6030-regulator.c +@@ -67,6 +67,7 @@ struct twlreg_info { + #define TWL6030_CFG_STATE_SLEEP 0x03 + #define TWL6030_CFG_STATE_GRP_SHIFT 5 + #define TWL6030_CFG_STATE_APP_SHIFT 2 ++#define TWL6030_CFG_STATE_MASK 0x03 + #define TWL6030_CFG_STATE_APP_MASK (0x03 << TWL6030_CFG_STATE_APP_SHIFT) + #define TWL6030_CFG_STATE_APP(v) (((v) & TWL6030_CFG_STATE_APP_MASK) >>\ + TWL6030_CFG_STATE_APP_SHIFT) +@@ -128,13 +129,14 @@ static int twl6030reg_is_enabled(struct regulator_dev *rdev) + if (grp < 0) + return grp; + grp &= P1_GRP_6030; ++ val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE); ++ val = TWL6030_CFG_STATE_APP(val); + } else { ++ val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE); ++ val &= TWL6030_CFG_STATE_MASK; + grp = 1; + } + +- val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE); +- val = TWL6030_CFG_STATE_APP(val); +- + return grp && (val == TWL6030_CFG_STATE_ON); + } + +@@ -187,7 +189,12 @@ static int twl6030reg_get_status(struct regulator_dev *rdev) + + val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE); + +- switch (TWL6030_CFG_STATE_APP(val)) { ++ if (info->features & TWL6032_SUBCLASS) ++ val &= TWL6030_CFG_STATE_MASK; ++ else ++ val = TWL6030_CFG_STATE_APP(val); ++ ++ switch (val) { + case TWL6030_CFG_STATE_ON: + return REGULATOR_STATUS_NORMAL; + +@@ -530,6 +537,7 @@ static const struct twlreg_info TWL6030_INFO_##label = { \ + #define TWL6032_ADJUSTABLE_LDO(label, offset) \ + static const struct twlreg_info TWL6032_INFO_##label = { \ + .base = offset, \ ++ .features = TWL6032_SUBCLASS, \ + .desc = { \ + .name = #label, \ + .id = TWL6032_REG_##label, \ +@@ -562,6 +570,7 @@ static const struct twlreg_info TWLFIXED_INFO_##label = { \ + #define TWL6032_ADJUSTABLE_SMPS(label, offset) \ + static const struct twlreg_info TWLSMPS_INFO_##label = { \ + .base = offset, \ ++ .features = TWL6032_SUBCLASS, \ + .desc = { \ + .name = #label, \ + .id = TWL6032_REG_##label, \ diff --git a/drivers/regulator/wm8994-regulator.c b/drivers/regulator/wm8994-regulator.c index cadea0344486f..40befdd9dfa92 100644 --- a/drivers/regulator/wm8994-regulator.c @@ -246812,7 +299651,7 @@ index 423b31dfa5741..ca1c7387776b5 100644 qproc->mpss_phys = qproc->mpss_reloc = r.start; qproc->mpss_size = resource_size(&r); diff --git a/drivers/remoteproc/qcom_q6v5_pas.c b/drivers/remoteproc/qcom_q6v5_pas.c -index 401b1ec907852..78d90d856e405 100644 +index 401b1ec907852..fbcbc00f2e645 100644 --- a/drivers/remoteproc/qcom_q6v5_pas.c +++ b/drivers/remoteproc/qcom_q6v5_pas.c @@ -87,6 +87,9 @@ static void adsp_minidump(struct rproc *rproc) @@ -246825,7 +299664,32 @@ index 401b1ec907852..78d90d856e405 100644 qcom_minidump(rproc, adsp->minidump_id); } -@@ -661,6 +664,7 @@ static const struct adsp_data sm8350_cdsp_resource = { +@@ -383,6 +386,7 @@ static int adsp_alloc_memory_region(struct qcom_adsp *adsp) + } + + ret = of_address_to_resource(node, 0, &r); ++ of_node_put(node); + if (ret) + return ret; + +@@ -495,6 +499,7 @@ detach_proxy_pds: + detach_active_pds: + adsp_pds_detach(adsp, adsp->active_pds, adsp->active_pd_count); + free_rproc: ++ device_init_wakeup(adsp->dev, false); + rproc_free(rproc); + + return ret; +@@ -510,6 +515,8 @@ static int adsp_remove(struct platform_device *pdev) + qcom_remove_sysmon_subdev(adsp->sysmon); + qcom_remove_smd_subdev(adsp->rproc, &adsp->smd_subdev); + qcom_remove_ssr_subdev(adsp->rproc, &adsp->ssr_subdev); ++ adsp_pds_detach(adsp, adsp->proxy_pds, adsp->proxy_pd_count); ++ device_init_wakeup(adsp->dev, false); + rproc_free(adsp->rproc); + + return 0; +@@ -661,6 +668,7 @@ static const struct adsp_data sm8350_cdsp_resource = { }, .proxy_pd_names = (char*[]){ "cx", @@ -246833,8 +299697,39 @@ index 401b1ec907852..78d90d856e405 100644 NULL }, .ssr_name = "cdsp", +diff --git a/drivers/remoteproc/qcom_q6v5_wcss.c b/drivers/remoteproc/qcom_q6v5_wcss.c +index 20d50ec7eff1b..cfd34ffcbb121 100644 +--- a/drivers/remoteproc/qcom_q6v5_wcss.c ++++ b/drivers/remoteproc/qcom_q6v5_wcss.c +@@ -351,7 +351,7 @@ static int q6v5_wcss_qcs404_power_on(struct q6v5_wcss *wcss) + if (ret) { + dev_err(wcss->dev, + "xo cbcr enabling timed out (rc:%d)\n", ret); +- return ret; ++ goto disable_xo_cbcr_clk; + } + + writel(0, wcss->reg_base + Q6SS_CGC_OVERRIDE); +@@ -417,6 +417,7 @@ disable_sleep_cbcr_clk: + val = readl(wcss->reg_base + Q6SS_SLEEP_CBCR); + val &= ~Q6SS_CLK_ENABLE; + writel(val, wcss->reg_base + Q6SS_SLEEP_CBCR); ++disable_xo_cbcr_clk: + val = readl(wcss->reg_base + Q6SS_XO_CBCR); + val &= ~Q6SS_CLK_ENABLE; + writel(val, wcss->reg_base + Q6SS_XO_CBCR); +@@ -827,6 +828,9 @@ static int q6v5_wcss_init_mmio(struct q6v5_wcss *wcss, + int ret; + + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "qdsp6"); ++ if (!res) ++ return -EINVAL; ++ + wcss->reg_base = devm_ioremap(&pdev->dev, res->start, + resource_size(res)); + if (!wcss->reg_base) diff --git a/drivers/remoteproc/qcom_sysmon.c b/drivers/remoteproc/qcom_sysmon.c -index 9fca814928635..a9f04dd83ab68 100644 +index 9fca814928635..fbfaf2637a91a 100644 --- a/drivers/remoteproc/qcom_sysmon.c +++ b/drivers/remoteproc/qcom_sysmon.c @@ -41,6 +41,7 @@ struct qcom_sysmon { @@ -246882,6 +299777,25 @@ index 9fca814928635..a9f04dd83ab68 100644 mutex_init(&sysmon->lock); mutex_init(&sysmon->state_lock); +@@ -640,7 +650,9 @@ struct qcom_sysmon *qcom_add_sysmon_subdev(struct rproc *rproc, + if (sysmon->shutdown_irq != -ENODATA) { + dev_err(sysmon->dev, + "failed to retrieve shutdown-ack IRQ\n"); +- return ERR_PTR(sysmon->shutdown_irq); ++ ret = sysmon->shutdown_irq; ++ kfree(sysmon); ++ return ERR_PTR(ret); + } + } else { + ret = devm_request_threaded_irq(sysmon->dev, +@@ -651,6 +663,7 @@ struct qcom_sysmon *qcom_add_sysmon_subdev(struct rproc *rproc, + if (ret) { + dev_err(sysmon->dev, + "failed to acquire shutdown-ack IRQ\n"); ++ kfree(sysmon); + return ERR_PTR(ret); + } + } diff --git a/drivers/remoteproc/qcom_wcnss.c b/drivers/remoteproc/qcom_wcnss.c index ebadc6c08e116..97a0c0dc4c77a 100644 --- a/drivers/remoteproc/qcom_wcnss.c @@ -246925,7 +299839,7 @@ index ebadc6c08e116..97a0c0dc4c77a 100644 return ret; diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c -index 502b6604b757b..775df165eb450 100644 +index 502b6604b757b..97e59f7461261 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -556,9 +556,6 @@ static int rproc_handle_vdev(struct rproc *rproc, void *ptr, @@ -246950,6 +299864,34 @@ index 502b6604b757b..775df165eb450 100644 /* Make device dma capable by inheriting from parent's capabilities */ set_dma_ops(&rvdev->dev, get_dma_ops(rproc->dev.parent)); +@@ -1953,12 +1955,18 @@ static void rproc_crash_handler_work(struct work_struct *work) + + mutex_lock(&rproc->lock); + +- if (rproc->state == RPROC_CRASHED || rproc->state == RPROC_OFFLINE) { ++ if (rproc->state == RPROC_CRASHED) { + /* handle only the first crash detected */ + mutex_unlock(&rproc->lock); + return; + } + ++ if (rproc->state == RPROC_OFFLINE) { ++ /* Don't recover if the remote processor was stopped */ ++ mutex_unlock(&rproc->lock); ++ goto out; ++ } ++ + rproc->state = RPROC_CRASHED; + dev_err(dev, "handling crash #%u in %s\n", ++rproc->crash_cnt, + rproc->name); +@@ -1968,6 +1976,7 @@ static void rproc_crash_handler_work(struct work_struct *work) + if (!rproc->recovery_disabled) + rproc_trigger_recovery(rproc); + ++out: + pm_relax(rproc->dev.parent); + } + diff --git a/drivers/remoteproc/remoteproc_coredump.c b/drivers/remoteproc/remoteproc_coredump.c index aee657cc08c6a..c892f433a323e 100644 --- a/drivers/remoteproc/remoteproc_coredump.c @@ -247061,6 +300003,99 @@ index e0704fd2b5336..a8dde46063602 100644 /* put pll and phy into reset state */ spin_lock_irqsave(&priv->lock, flags); +diff --git a/drivers/reset/reset-uniphier-glue.c b/drivers/reset/reset-uniphier-glue.c +index 027990b79f61b..7493e9618837e 100644 +--- a/drivers/reset/reset-uniphier-glue.c ++++ b/drivers/reset/reset-uniphier-glue.c +@@ -23,7 +23,7 @@ struct uniphier_glue_reset_soc_data { + + struct uniphier_glue_reset_priv { + struct clk_bulk_data clk[MAX_CLKS]; +- struct reset_control *rst[MAX_RSTS]; ++ struct reset_control_bulk_data rst[MAX_RSTS]; + struct reset_simple_data rdata; + const struct uniphier_glue_reset_soc_data *data; + }; +@@ -33,9 +33,7 @@ static int uniphier_glue_reset_probe(struct platform_device *pdev) + struct device *dev = &pdev->dev; + struct uniphier_glue_reset_priv *priv; + struct resource *res; +- resource_size_t size; +- const char *name; +- int i, ret, nr; ++ int i, ret; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) +@@ -47,7 +45,6 @@ static int uniphier_glue_reset_probe(struct platform_device *pdev) + return -EINVAL; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); +- size = resource_size(res); + priv->rdata.membase = devm_ioremap_resource(dev, res); + if (IS_ERR(priv->rdata.membase)) + return PTR_ERR(priv->rdata.membase); +@@ -58,26 +55,24 @@ static int uniphier_glue_reset_probe(struct platform_device *pdev) + if (ret) + return ret; + +- for (i = 0; i < priv->data->nrsts; i++) { +- name = priv->data->reset_names[i]; +- priv->rst[i] = devm_reset_control_get_shared(dev, name); +- if (IS_ERR(priv->rst[i])) +- return PTR_ERR(priv->rst[i]); +- } ++ for (i = 0; i < priv->data->nrsts; i++) ++ priv->rst[i].id = priv->data->reset_names[i]; ++ ret = devm_reset_control_bulk_get_shared(dev, priv->data->nrsts, ++ priv->rst); ++ if (ret) ++ return ret; + + ret = clk_bulk_prepare_enable(priv->data->nclks, priv->clk); + if (ret) + return ret; + +- for (nr = 0; nr < priv->data->nrsts; nr++) { +- ret = reset_control_deassert(priv->rst[nr]); +- if (ret) +- goto out_rst_assert; +- } ++ ret = reset_control_bulk_deassert(priv->data->nrsts, priv->rst); ++ if (ret) ++ goto out_clk_disable; + + spin_lock_init(&priv->rdata.lock); + priv->rdata.rcdev.owner = THIS_MODULE; +- priv->rdata.rcdev.nr_resets = size * BITS_PER_BYTE; ++ priv->rdata.rcdev.nr_resets = resource_size(res) * BITS_PER_BYTE; + priv->rdata.rcdev.ops = &reset_simple_ops; + priv->rdata.rcdev.of_node = dev->of_node; + priv->rdata.active_low = true; +@@ -91,9 +86,9 @@ static int uniphier_glue_reset_probe(struct platform_device *pdev) + return 0; + + out_rst_assert: +- while (nr--) +- reset_control_assert(priv->rst[nr]); ++ reset_control_bulk_assert(priv->data->nrsts, priv->rst); + ++out_clk_disable: + clk_bulk_disable_unprepare(priv->data->nclks, priv->clk); + + return ret; +@@ -102,10 +97,8 @@ out_rst_assert: + static int uniphier_glue_reset_remove(struct platform_device *pdev) + { + struct uniphier_glue_reset_priv *priv = platform_get_drvdata(pdev); +- int i; + +- for (i = 0; i < priv->data->nrsts; i++) +- reset_control_assert(priv->rst[i]); ++ reset_control_bulk_assert(priv->data->nrsts, priv->rst); + + clk_bulk_disable_unprepare(priv->data->nclks, priv->clk); + diff --git a/drivers/rpmsg/mtk_rpmsg.c b/drivers/rpmsg/mtk_rpmsg.c index 96a17ec291401..2d8cb596ad691 100644 --- a/drivers/rpmsg/mtk_rpmsg.c @@ -247360,7 +300395,7 @@ index 9a2bd4947007c..3ee17c4d72987 100644 alarm.time = rtc_ktime_to_tm(timer->node.expires); alarm.enabled = 1; diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c -index 4eb53412b8085..b90a603d6b12f 100644 +index 4eb53412b8085..00e2ca7374ecf 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -222,6 +222,8 @@ static inline void cmos_write_bank2(unsigned char val, unsigned char addr) @@ -247386,9 +300421,176 @@ index 4eb53412b8085..b90a603d6b12f 100644 return 0; } -@@ -457,7 +464,10 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t) - min = t->time.tm_min; - sec = t->time.tm_sec; +@@ -242,10 +249,46 @@ static int cmos_set_time(struct device *dev, struct rtc_time *t) + return mc146818_set_time(t); + } + ++struct cmos_read_alarm_callback_param { ++ struct cmos_rtc *cmos; ++ struct rtc_time *time; ++ unsigned char rtc_control; ++}; ++ ++static void cmos_read_alarm_callback(unsigned char __always_unused seconds, ++ void *param_in) ++{ ++ struct cmos_read_alarm_callback_param *p = ++ (struct cmos_read_alarm_callback_param *)param_in; ++ struct rtc_time *time = p->time; ++ ++ time->tm_sec = CMOS_READ(RTC_SECONDS_ALARM); ++ time->tm_min = CMOS_READ(RTC_MINUTES_ALARM); ++ time->tm_hour = CMOS_READ(RTC_HOURS_ALARM); ++ ++ if (p->cmos->day_alrm) { ++ /* ignore upper bits on readback per ACPI spec */ ++ time->tm_mday = CMOS_READ(p->cmos->day_alrm) & 0x3f; ++ if (!time->tm_mday) ++ time->tm_mday = -1; ++ ++ if (p->cmos->mon_alrm) { ++ time->tm_mon = CMOS_READ(p->cmos->mon_alrm); ++ if (!time->tm_mon) ++ time->tm_mon = -1; ++ } ++ } ++ ++ p->rtc_control = CMOS_READ(RTC_CONTROL); ++} ++ + static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t) + { + struct cmos_rtc *cmos = dev_get_drvdata(dev); +- unsigned char rtc_control; ++ struct cmos_read_alarm_callback_param p = { ++ .cmos = cmos, ++ .time = &t->time, ++ }; + + /* This not only a rtc_op, but also called directly */ + if (!is_valid_irq(cmos->irq)) +@@ -256,28 +299,18 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t) + * the future. + */ + +- spin_lock_irq(&rtc_lock); +- t->time.tm_sec = CMOS_READ(RTC_SECONDS_ALARM); +- t->time.tm_min = CMOS_READ(RTC_MINUTES_ALARM); +- t->time.tm_hour = CMOS_READ(RTC_HOURS_ALARM); +- +- if (cmos->day_alrm) { +- /* ignore upper bits on readback per ACPI spec */ +- t->time.tm_mday = CMOS_READ(cmos->day_alrm) & 0x3f; +- if (!t->time.tm_mday) +- t->time.tm_mday = -1; +- +- if (cmos->mon_alrm) { +- t->time.tm_mon = CMOS_READ(cmos->mon_alrm); +- if (!t->time.tm_mon) +- t->time.tm_mon = -1; +- } +- } +- +- rtc_control = CMOS_READ(RTC_CONTROL); +- spin_unlock_irq(&rtc_lock); ++ /* Some Intel chipsets disconnect the alarm registers when the clock ++ * update is in progress - during this time reads return bogus values ++ * and writes may fail silently. See for example "7th Generation Intel® ++ * Processor Family I/O for U/Y Platforms [...] Datasheet", section ++ * 27.7.1 ++ * ++ * Use the mc146818_avoid_UIP() function to avoid this. ++ */ ++ if (!mc146818_avoid_UIP(cmos_read_alarm_callback, &p)) ++ return -EIO; + +- if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { ++ if (!(p.rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { + if (((unsigned)t->time.tm_sec) < 0x60) + t->time.tm_sec = bcd2bin(t->time.tm_sec); + else +@@ -306,7 +339,7 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t) + } + } + +- t->enabled = !!(rtc_control & RTC_AIE); ++ t->enabled = !!(p.rtc_control & RTC_AIE); + t->pending = 0; + + return 0; +@@ -437,10 +470,57 @@ static int cmos_validate_alarm(struct device *dev, struct rtc_wkalrm *t) + return 0; + } + ++struct cmos_set_alarm_callback_param { ++ struct cmos_rtc *cmos; ++ unsigned char mon, mday, hrs, min, sec; ++ struct rtc_wkalrm *t; ++}; ++ ++/* Note: this function may be executed by mc146818_avoid_UIP() more then ++ * once ++ */ ++static void cmos_set_alarm_callback(unsigned char __always_unused seconds, ++ void *param_in) ++{ ++ struct cmos_set_alarm_callback_param *p = ++ (struct cmos_set_alarm_callback_param *)param_in; ++ ++ /* next rtc irq must not be from previous alarm setting */ ++ cmos_irq_disable(p->cmos, RTC_AIE); ++ ++ /* update alarm */ ++ CMOS_WRITE(p->hrs, RTC_HOURS_ALARM); ++ CMOS_WRITE(p->min, RTC_MINUTES_ALARM); ++ CMOS_WRITE(p->sec, RTC_SECONDS_ALARM); ++ ++ /* the system may support an "enhanced" alarm */ ++ if (p->cmos->day_alrm) { ++ CMOS_WRITE(p->mday, p->cmos->day_alrm); ++ if (p->cmos->mon_alrm) ++ CMOS_WRITE(p->mon, p->cmos->mon_alrm); ++ } ++ ++ if (use_hpet_alarm()) { ++ /* ++ * FIXME the HPET alarm glue currently ignores day_alrm ++ * and mon_alrm ... ++ */ ++ hpet_set_alarm_time(p->t->time.tm_hour, p->t->time.tm_min, ++ p->t->time.tm_sec); ++ } ++ ++ if (p->t->enabled) ++ cmos_irq_enable(p->cmos, RTC_AIE); ++} ++ + static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t) + { + struct cmos_rtc *cmos = dev_get_drvdata(dev); +- unsigned char mon, mday, hrs, min, sec, rtc_control; ++ struct cmos_set_alarm_callback_param p = { ++ .cmos = cmos, ++ .t = t ++ }; ++ unsigned char rtc_control; + int ret; + + /* This not only a rtc_op, but also called directly */ +@@ -451,52 +531,33 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t) + if (ret < 0) + return ret; + +- mon = t->time.tm_mon + 1; +- mday = t->time.tm_mday; +- hrs = t->time.tm_hour; +- min = t->time.tm_min; +- sec = t->time.tm_sec; ++ p.mon = t->time.tm_mon + 1; ++ p.mday = t->time.tm_mday; ++ p.hrs = t->time.tm_hour; ++ p.min = t->time.tm_min; ++ p.sec = t->time.tm_sec; + spin_lock_irq(&rtc_lock); rtc_control = CMOS_READ(RTC_CONTROL); @@ -247396,8 +300598,263 @@ index 4eb53412b8085..b90a603d6b12f 100644 + if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { /* Writing 0xff means "don't care" or "match all". */ - mon = (mon <= 12) ? bin2bcd(mon) : 0xff; -@@ -790,16 +800,14 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) +- mon = (mon <= 12) ? bin2bcd(mon) : 0xff; +- mday = (mday >= 1 && mday <= 31) ? bin2bcd(mday) : 0xff; +- hrs = (hrs < 24) ? bin2bcd(hrs) : 0xff; +- min = (min < 60) ? bin2bcd(min) : 0xff; +- sec = (sec < 60) ? bin2bcd(sec) : 0xff; +- } +- +- spin_lock_irq(&rtc_lock); +- +- /* next rtc irq must not be from previous alarm setting */ +- cmos_irq_disable(cmos, RTC_AIE); +- +- /* update alarm */ +- CMOS_WRITE(hrs, RTC_HOURS_ALARM); +- CMOS_WRITE(min, RTC_MINUTES_ALARM); +- CMOS_WRITE(sec, RTC_SECONDS_ALARM); +- +- /* the system may support an "enhanced" alarm */ +- if (cmos->day_alrm) { +- CMOS_WRITE(mday, cmos->day_alrm); +- if (cmos->mon_alrm) +- CMOS_WRITE(mon, cmos->mon_alrm); ++ p.mon = (p.mon <= 12) ? bin2bcd(p.mon) : 0xff; ++ p.mday = (p.mday >= 1 && p.mday <= 31) ? bin2bcd(p.mday) : 0xff; ++ p.hrs = (p.hrs < 24) ? bin2bcd(p.hrs) : 0xff; ++ p.min = (p.min < 60) ? bin2bcd(p.min) : 0xff; ++ p.sec = (p.sec < 60) ? bin2bcd(p.sec) : 0xff; + } + +- if (use_hpet_alarm()) { +- /* +- * FIXME the HPET alarm glue currently ignores day_alrm +- * and mon_alrm ... +- */ +- hpet_set_alarm_time(t->time.tm_hour, t->time.tm_min, +- t->time.tm_sec); +- } +- +- if (t->enabled) +- cmos_irq_enable(cmos, RTC_AIE); +- +- spin_unlock_irq(&rtc_lock); ++ /* ++ * Some Intel chipsets disconnect the alarm registers when the clock ++ * update is in progress - during this time writes fail silently. ++ * ++ * Use mc146818_avoid_UIP() to avoid this. ++ */ ++ if (!mc146818_avoid_UIP(cmos_set_alarm_callback, &p)) ++ return -EIO; + + cmos->alarm_expires = rtc_tm_to_time64(&t->time); + +@@ -683,6 +744,168 @@ static irqreturn_t cmos_interrupt(int irq, void *p) + return IRQ_NONE; + } + ++#ifdef CONFIG_ACPI ++ ++#include <linux/acpi.h> ++ ++static u32 rtc_handler(void *context) ++{ ++ struct device *dev = context; ++ struct cmos_rtc *cmos = dev_get_drvdata(dev); ++ unsigned char rtc_control = 0; ++ unsigned char rtc_intr; ++ unsigned long flags; ++ ++ ++ /* ++ * Always update rtc irq when ACPI is used as RTC Alarm. ++ * Or else, ACPI SCI is enabled during suspend/resume only, ++ * update rtc irq in that case. ++ */ ++ if (cmos_use_acpi_alarm()) ++ cmos_interrupt(0, (void *)cmos->rtc); ++ else { ++ /* Fix me: can we use cmos_interrupt() here as well? */ ++ spin_lock_irqsave(&rtc_lock, flags); ++ if (cmos_rtc.suspend_ctrl) ++ rtc_control = CMOS_READ(RTC_CONTROL); ++ if (rtc_control & RTC_AIE) { ++ cmos_rtc.suspend_ctrl &= ~RTC_AIE; ++ CMOS_WRITE(rtc_control, RTC_CONTROL); ++ rtc_intr = CMOS_READ(RTC_INTR_FLAGS); ++ rtc_update_irq(cmos->rtc, 1, rtc_intr); ++ } ++ spin_unlock_irqrestore(&rtc_lock, flags); ++ } ++ ++ pm_wakeup_hard_event(dev); ++ acpi_clear_event(ACPI_EVENT_RTC); ++ acpi_disable_event(ACPI_EVENT_RTC, 0); ++ return ACPI_INTERRUPT_HANDLED; ++} ++ ++static void acpi_rtc_event_setup(struct device *dev) ++{ ++ if (acpi_disabled) ++ return; ++ ++ acpi_install_fixed_event_handler(ACPI_EVENT_RTC, rtc_handler, dev); ++ /* ++ * After the RTC handler is installed, the Fixed_RTC event should ++ * be disabled. Only when the RTC alarm is set will it be enabled. ++ */ ++ acpi_clear_event(ACPI_EVENT_RTC); ++ acpi_disable_event(ACPI_EVENT_RTC, 0); ++} ++ ++static void acpi_rtc_event_cleanup(void) ++{ ++ if (acpi_disabled) ++ return; ++ ++ acpi_remove_fixed_event_handler(ACPI_EVENT_RTC, rtc_handler); ++} ++ ++static void rtc_wake_on(struct device *dev) ++{ ++ acpi_clear_event(ACPI_EVENT_RTC); ++ acpi_enable_event(ACPI_EVENT_RTC, 0); ++} ++ ++static void rtc_wake_off(struct device *dev) ++{ ++ acpi_disable_event(ACPI_EVENT_RTC, 0); ++} ++ ++#ifdef CONFIG_X86 ++/* Enable use_acpi_alarm mode for Intel platforms no earlier than 2015 */ ++static void use_acpi_alarm_quirks(void) ++{ ++ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) ++ return; ++ ++ if (!is_hpet_enabled()) ++ return; ++ ++ if (dmi_get_bios_year() < 2015) ++ return; ++ ++ use_acpi_alarm = true; ++} ++#else ++static inline void use_acpi_alarm_quirks(void) { } ++#endif ++ ++static void acpi_cmos_wake_setup(struct device *dev) ++{ ++ if (acpi_disabled) ++ return; ++ ++ use_acpi_alarm_quirks(); ++ ++ cmos_rtc.wake_on = rtc_wake_on; ++ cmos_rtc.wake_off = rtc_wake_off; ++ ++ /* ACPI tables bug workaround. */ ++ if (acpi_gbl_FADT.month_alarm && !acpi_gbl_FADT.day_alarm) { ++ dev_dbg(dev, "bogus FADT month_alarm (%d)\n", ++ acpi_gbl_FADT.month_alarm); ++ acpi_gbl_FADT.month_alarm = 0; ++ } ++ ++ cmos_rtc.day_alrm = acpi_gbl_FADT.day_alarm; ++ cmos_rtc.mon_alrm = acpi_gbl_FADT.month_alarm; ++ cmos_rtc.century = acpi_gbl_FADT.century; ++ ++ if (acpi_gbl_FADT.flags & ACPI_FADT_S4_RTC_WAKE) ++ dev_info(dev, "RTC can wake from S4\n"); ++ ++ /* RTC always wakes from S1/S2/S3, and often S4/STD */ ++ device_init_wakeup(dev, 1); ++} ++ ++static void cmos_check_acpi_rtc_status(struct device *dev, ++ unsigned char *rtc_control) ++{ ++ struct cmos_rtc *cmos = dev_get_drvdata(dev); ++ acpi_event_status rtc_status; ++ acpi_status status; ++ ++ if (acpi_gbl_FADT.flags & ACPI_FADT_FIXED_RTC) ++ return; ++ ++ status = acpi_get_event_status(ACPI_EVENT_RTC, &rtc_status); ++ if (ACPI_FAILURE(status)) { ++ dev_err(dev, "Could not get RTC status\n"); ++ } else if (rtc_status & ACPI_EVENT_FLAG_SET) { ++ unsigned char mask; ++ *rtc_control &= ~RTC_AIE; ++ CMOS_WRITE(*rtc_control, RTC_CONTROL); ++ mask = CMOS_READ(RTC_INTR_FLAGS); ++ rtc_update_irq(cmos->rtc, 1, mask); ++ } ++} ++ ++#else /* !CONFIG_ACPI */ ++ ++static inline void acpi_rtc_event_setup(struct device *dev) ++{ ++} ++ ++static inline void acpi_rtc_event_cleanup(void) ++{ ++} ++ ++static inline void acpi_cmos_wake_setup(struct device *dev) ++{ ++} ++ ++static inline void cmos_check_acpi_rtc_status(struct device *dev, ++ unsigned char *rtc_control) ++{ ++} ++#endif /* CONFIG_ACPI */ ++ + #ifdef CONFIG_PNP + #define INITSECTION + +@@ -766,19 +989,27 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) + if (info->address_space) + address_space = info->address_space; + +- if (info->rtc_day_alarm && info->rtc_day_alarm < 128) +- cmos_rtc.day_alrm = info->rtc_day_alarm; +- if (info->rtc_mon_alarm && info->rtc_mon_alarm < 128) +- cmos_rtc.mon_alrm = info->rtc_mon_alarm; +- if (info->rtc_century && info->rtc_century < 128) +- cmos_rtc.century = info->rtc_century; ++ cmos_rtc.day_alrm = info->rtc_day_alarm; ++ cmos_rtc.mon_alrm = info->rtc_mon_alarm; ++ cmos_rtc.century = info->rtc_century; + + if (info->wake_on && info->wake_off) { + cmos_rtc.wake_on = info->wake_on; + cmos_rtc.wake_off = info->wake_off; + } ++ } else { ++ acpi_cmos_wake_setup(dev); + } + ++ if (cmos_rtc.day_alrm >= 128) ++ cmos_rtc.day_alrm = 0; ++ ++ if (cmos_rtc.mon_alrm >= 128) ++ cmos_rtc.mon_alrm = 0; ++ ++ if (cmos_rtc.century >= 128) ++ cmos_rtc.century = 0; ++ + cmos_rtc.dev = dev; + dev_set_drvdata(dev, &cmos_rtc); + +@@ -790,16 +1021,14 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) rename_region(ports, dev_name(&cmos_rtc.rtc->dev)); @@ -247418,6 +300875,242 @@ index 4eb53412b8085..b90a603d6b12f 100644 if (!(flags & CMOS_RTC_FLAGS_NOFREQ)) { /* force periodic irq to CMOS reset default of 1024Hz; * +@@ -869,6 +1098,13 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) + nvmem_cfg.size = address_space - NVRAM_OFFSET; + devm_rtc_nvmem_register(cmos_rtc.rtc, &nvmem_cfg); + ++ /* ++ * Everything has gone well so far, so by default register a handler for ++ * the ACPI RTC fixed event. ++ */ ++ if (!info) ++ acpi_rtc_event_setup(dev); ++ + dev_info(dev, "%s%s, %d bytes nvram%s\n", + !is_valid_irq(rtc_irq) ? "no alarms" : + cmos_rtc.mon_alrm ? "alarms up to one year" : +@@ -914,6 +1150,9 @@ static void cmos_do_remove(struct device *dev) + hpet_unregister_irq_handler(cmos_interrupt); + } + ++ if (!dev_get_platdata(dev)) ++ acpi_rtc_event_cleanup(); ++ + cmos->rtc = NULL; + + ports = cmos->iomem; +@@ -1063,9 +1302,6 @@ static void cmos_check_wkalrm(struct device *dev) + } + } + +-static void cmos_check_acpi_rtc_status(struct device *dev, +- unsigned char *rtc_control); +- + static int __maybe_unused cmos_resume(struct device *dev) + { + struct cmos_rtc *cmos = dev_get_drvdata(dev); +@@ -1132,174 +1368,16 @@ static SIMPLE_DEV_PM_OPS(cmos_pm_ops, cmos_suspend, cmos_resume); + * predate even PNPBIOS should set up platform_bus devices. + */ + +-#ifdef CONFIG_ACPI +- +-#include <linux/acpi.h> +- +-static u32 rtc_handler(void *context) +-{ +- struct device *dev = context; +- struct cmos_rtc *cmos = dev_get_drvdata(dev); +- unsigned char rtc_control = 0; +- unsigned char rtc_intr; +- unsigned long flags; +- +- +- /* +- * Always update rtc irq when ACPI is used as RTC Alarm. +- * Or else, ACPI SCI is enabled during suspend/resume only, +- * update rtc irq in that case. +- */ +- if (cmos_use_acpi_alarm()) +- cmos_interrupt(0, (void *)cmos->rtc); +- else { +- /* Fix me: can we use cmos_interrupt() here as well? */ +- spin_lock_irqsave(&rtc_lock, flags); +- if (cmos_rtc.suspend_ctrl) +- rtc_control = CMOS_READ(RTC_CONTROL); +- if (rtc_control & RTC_AIE) { +- cmos_rtc.suspend_ctrl &= ~RTC_AIE; +- CMOS_WRITE(rtc_control, RTC_CONTROL); +- rtc_intr = CMOS_READ(RTC_INTR_FLAGS); +- rtc_update_irq(cmos->rtc, 1, rtc_intr); +- } +- spin_unlock_irqrestore(&rtc_lock, flags); +- } +- +- pm_wakeup_hard_event(dev); +- acpi_clear_event(ACPI_EVENT_RTC); +- acpi_disable_event(ACPI_EVENT_RTC, 0); +- return ACPI_INTERRUPT_HANDLED; +-} +- +-static inline void rtc_wake_setup(struct device *dev) +-{ +- acpi_install_fixed_event_handler(ACPI_EVENT_RTC, rtc_handler, dev); +- /* +- * After the RTC handler is installed, the Fixed_RTC event should +- * be disabled. Only when the RTC alarm is set will it be enabled. +- */ +- acpi_clear_event(ACPI_EVENT_RTC); +- acpi_disable_event(ACPI_EVENT_RTC, 0); +-} +- +-static void rtc_wake_on(struct device *dev) +-{ +- acpi_clear_event(ACPI_EVENT_RTC); +- acpi_enable_event(ACPI_EVENT_RTC, 0); +-} +- +-static void rtc_wake_off(struct device *dev) +-{ +- acpi_disable_event(ACPI_EVENT_RTC, 0); +-} +- +-#ifdef CONFIG_X86 +-/* Enable use_acpi_alarm mode for Intel platforms no earlier than 2015 */ +-static void use_acpi_alarm_quirks(void) +-{ +- if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) +- return; +- +- if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)) +- return; +- +- if (!is_hpet_enabled()) +- return; +- +- if (dmi_get_bios_year() < 2015) +- return; +- +- use_acpi_alarm = true; +-} +-#else +-static inline void use_acpi_alarm_quirks(void) { } +-#endif +- +-/* Every ACPI platform has a mc146818 compatible "cmos rtc". Here we find +- * its device node and pass extra config data. This helps its driver use +- * capabilities that the now-obsolete mc146818 didn't have, and informs it +- * that this board's RTC is wakeup-capable (per ACPI spec). +- */ +-static struct cmos_rtc_board_info acpi_rtc_info; +- +-static void cmos_wake_setup(struct device *dev) +-{ +- if (acpi_disabled) +- return; +- +- use_acpi_alarm_quirks(); +- +- rtc_wake_setup(dev); +- acpi_rtc_info.wake_on = rtc_wake_on; +- acpi_rtc_info.wake_off = rtc_wake_off; +- +- /* workaround bug in some ACPI tables */ +- if (acpi_gbl_FADT.month_alarm && !acpi_gbl_FADT.day_alarm) { +- dev_dbg(dev, "bogus FADT month_alarm (%d)\n", +- acpi_gbl_FADT.month_alarm); +- acpi_gbl_FADT.month_alarm = 0; +- } +- +- acpi_rtc_info.rtc_day_alarm = acpi_gbl_FADT.day_alarm; +- acpi_rtc_info.rtc_mon_alarm = acpi_gbl_FADT.month_alarm; +- acpi_rtc_info.rtc_century = acpi_gbl_FADT.century; +- +- /* NOTE: S4_RTC_WAKE is NOT currently useful to Linux */ +- if (acpi_gbl_FADT.flags & ACPI_FADT_S4_RTC_WAKE) +- dev_info(dev, "RTC can wake from S4\n"); +- +- dev->platform_data = &acpi_rtc_info; +- +- /* RTC always wakes from S1/S2/S3, and often S4/STD */ +- device_init_wakeup(dev, 1); +-} +- +-static void cmos_check_acpi_rtc_status(struct device *dev, +- unsigned char *rtc_control) +-{ +- struct cmos_rtc *cmos = dev_get_drvdata(dev); +- acpi_event_status rtc_status; +- acpi_status status; +- +- if (acpi_gbl_FADT.flags & ACPI_FADT_FIXED_RTC) +- return; +- +- status = acpi_get_event_status(ACPI_EVENT_RTC, &rtc_status); +- if (ACPI_FAILURE(status)) { +- dev_err(dev, "Could not get RTC status\n"); +- } else if (rtc_status & ACPI_EVENT_FLAG_SET) { +- unsigned char mask; +- *rtc_control &= ~RTC_AIE; +- CMOS_WRITE(*rtc_control, RTC_CONTROL); +- mask = CMOS_READ(RTC_INTR_FLAGS); +- rtc_update_irq(cmos->rtc, 1, mask); +- } +-} +- +-#else +- +-static void cmos_wake_setup(struct device *dev) +-{ +-} +- +-static void cmos_check_acpi_rtc_status(struct device *dev, +- unsigned char *rtc_control) +-{ +-} +- +-#endif +- + #ifdef CONFIG_PNP + + #include <linux/pnp.h> + + static int cmos_pnp_probe(struct pnp_dev *pnp, const struct pnp_device_id *id) + { +- cmos_wake_setup(&pnp->dev); ++ int irq; + + if (pnp_port_start(pnp, 0) == 0x70 && !pnp_irq_valid(pnp, 0)) { +- unsigned int irq = 0; ++ irq = 0; + #ifdef CONFIG_X86 + /* Some machines contain a PNP entry for the RTC, but + * don't define the IRQ. It should always be safe to +@@ -1308,13 +1386,11 @@ static int cmos_pnp_probe(struct pnp_dev *pnp, const struct pnp_device_id *id) + if (nr_legacy_irqs()) + irq = RTC_IRQ; + #endif +- return cmos_do_probe(&pnp->dev, +- pnp_get_resource(pnp, IORESOURCE_IO, 0), irq); + } else { +- return cmos_do_probe(&pnp->dev, +- pnp_get_resource(pnp, IORESOURCE_IO, 0), +- pnp_irq(pnp, 0)); ++ irq = pnp_irq(pnp, 0); + } ++ ++ return cmos_do_probe(&pnp->dev, pnp_get_resource(pnp, IORESOURCE_IO, 0), irq); + } + + static void cmos_pnp_remove(struct pnp_dev *pnp) +@@ -1401,7 +1477,6 @@ static int __init cmos_platform_probe(struct platform_device *pdev) + int irq; + + cmos_of_init(pdev); +- cmos_wake_setup(&pdev->dev); + + if (RTC_IOMAPPED) + resource = platform_get_resource(pdev, IORESOURCE_IO, 0); diff --git a/drivers/rtc/rtc-ds1302.c b/drivers/rtc/rtc-ds1302.c index b3de6d2e680a4..2f83adef966eb 100644 --- a/drivers/rtc/rtc-ds1302.c @@ -247441,6 +301134,19 @@ index b3de6d2e680a4..2f83adef966eb 100644 }; module_spi_driver(ds1302_driver); +diff --git a/drivers/rtc/rtc-ds1347.c b/drivers/rtc/rtc-ds1347.c +index 157bf5209ac40..a40c1a52df659 100644 +--- a/drivers/rtc/rtc-ds1347.c ++++ b/drivers/rtc/rtc-ds1347.c +@@ -112,7 +112,7 @@ static int ds1347_set_time(struct device *dev, struct rtc_time *dt) + return err; + + century = (dt->tm_year / 100) + 19; +- err = regmap_write(map, DS1347_CENTURY_REG, century); ++ err = regmap_write(map, DS1347_CENTURY_REG, bin2bcd(century)); + if (err) + return err; + diff --git a/drivers/rtc/rtc-ds1390.c b/drivers/rtc/rtc-ds1390.c index 66fc8617d07ee..93ce72b9ae59e 100644 --- a/drivers/rtc/rtc-ds1390.c @@ -247534,15 +301240,85 @@ index ad3add5db4c82..25c6e7d9570f0 100644 static int ftrtc010_rtc_remove(struct platform_device *pdev) diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c -index dcfaf09946ee3..f3f5a87fe376e 100644 +index dcfaf09946ee3..347655d24b5d3 100644 --- a/drivers/rtc/rtc-mc146818-lib.c +++ b/drivers/rtc/rtc-mc146818-lib.c -@@ -8,10 +8,36 @@ +@@ -8,10 +8,106 @@ #include <linux/acpi.h> #endif -unsigned int mc146818_get_time(struct rtc_time *time) +/* ++ * Execute a function while the UIP (Update-in-progress) bit of the RTC is ++ * unset. ++ * ++ * Warning: callback may be executed more then once. ++ */ ++bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param), ++ void *param) ++{ ++ int i; ++ unsigned long flags; ++ unsigned char seconds; ++ ++ for (i = 0; i < 10; i++) { ++ spin_lock_irqsave(&rtc_lock, flags); ++ ++ /* ++ * Check whether there is an update in progress during which the ++ * readout is unspecified. The maximum update time is ~2ms. Poll ++ * every msec for completion. ++ * ++ * Store the second value before checking UIP so a long lasting ++ * NMI which happens to hit after the UIP check cannot make ++ * an update cycle invisible. ++ */ ++ seconds = CMOS_READ(RTC_SECONDS); ++ ++ if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) { ++ spin_unlock_irqrestore(&rtc_lock, flags); ++ mdelay(1); ++ continue; ++ } ++ ++ /* Revalidate the above readout */ ++ if (seconds != CMOS_READ(RTC_SECONDS)) { ++ spin_unlock_irqrestore(&rtc_lock, flags); ++ continue; ++ } ++ ++ if (callback) ++ callback(seconds, param); ++ ++ /* ++ * Check for the UIP bit again. If it is set now then ++ * the above values may contain garbage. ++ */ ++ if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) { ++ spin_unlock_irqrestore(&rtc_lock, flags); ++ mdelay(1); ++ continue; ++ } ++ ++ /* ++ * A NMI might have interrupted the above sequence so check ++ * whether the seconds value has changed which indicates that ++ * the NMI took longer than the UIP bit was set. Unlikely, but ++ * possible and there is also virt... ++ */ ++ if (seconds != CMOS_READ(RTC_SECONDS)) { ++ spin_unlock_irqrestore(&rtc_lock, flags); ++ continue; ++ } ++ spin_unlock_irqrestore(&rtc_lock, flags); ++ ++ return true; ++ } ++ return false; ++} ++EXPORT_SYMBOL_GPL(mc146818_avoid_UIP); ++ ++/* + * If the UIP (Update-in-progress) bit of the RTC is set for more then + * 10ms, the RTC is apparently broken or not present. + */ @@ -247575,7 +301351,7 @@ index dcfaf09946ee3..f3f5a87fe376e 100644 unsigned char century = 0; bool retry; -@@ -20,13 +46,13 @@ unsigned int mc146818_get_time(struct rtc_time *time) +@@ -20,13 +116,13 @@ unsigned int mc146818_get_time(struct rtc_time *time) #endif again: @@ -247595,7 +301371,7 @@ index dcfaf09946ee3..f3f5a87fe376e 100644 /* * Check whether there is an update in progress during which the -@@ -104,7 +130,7 @@ again: +@@ -104,7 +200,7 @@ again: time->tm_year += real_year - 72; #endif @@ -247604,7 +301380,7 @@ index dcfaf09946ee3..f3f5a87fe376e 100644 time->tm_year += (century - 19) * 100; /* -@@ -116,10 +142,21 @@ again: +@@ -116,10 +212,21 @@ again: time->tm_mon--; @@ -247627,7 +301403,7 @@ index dcfaf09946ee3..f3f5a87fe376e 100644 /* Set the current date and time in the real time clock. */ int mc146818_set_time(struct rtc_time *time) { -@@ -176,8 +213,10 @@ int mc146818_set_time(struct rtc_time *time) +@@ -176,8 +283,10 @@ int mc146818_set_time(struct rtc_time *time) if (yrs >= 100) yrs -= 100; @@ -247640,7 +301416,7 @@ index dcfaf09946ee3..f3f5a87fe376e 100644 sec = bin2bcd(sec); min = bin2bcd(min); hrs = bin2bcd(hrs); -@@ -191,7 +230,10 @@ int mc146818_set_time(struct rtc_time *time) +@@ -191,7 +300,10 @@ int mc146818_set_time(struct rtc_time *time) save_control = CMOS_READ(RTC_CONTROL); CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); save_freq_select = CMOS_READ(RTC_FREQ_SELECT); @@ -247689,6 +301465,22 @@ index 80dc479a6ff02..1d297af80f878 100644 rtc->addr_base = res->start; rtc->data = of_device_get_match_data(&pdev->dev); +diff --git a/drivers/rtc/rtc-mxc_v2.c b/drivers/rtc/rtc-mxc_v2.c +index 5e03834016294..f6d2ad91ff7a9 100644 +--- a/drivers/rtc/rtc-mxc_v2.c ++++ b/drivers/rtc/rtc-mxc_v2.c +@@ -336,8 +336,10 @@ static int mxc_rtc_probe(struct platform_device *pdev) + } + + pdata->rtc = devm_rtc_allocate_device(&pdev->dev); +- if (IS_ERR(pdata->rtc)) ++ if (IS_ERR(pdata->rtc)) { ++ clk_disable_unprepare(pdata->clk); + return PTR_ERR(pdata->rtc); ++ } + + pdata->rtc->ops = &mxc_rtc_ops; + pdata->rtc->range_max = U32_MAX; diff --git a/drivers/rtc/rtc-pcf2123.c b/drivers/rtc/rtc-pcf2123.c index 0f58cac81d8c0..7473e6c8a183b 100644 --- a/drivers/rtc/rtc-pcf2123.c @@ -247729,6 +301521,59 @@ index 56c58b055dfff..43f8011070952 100644 int ret; ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL2, &ctrl2); +diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c +index 14da4ab301044..bf2e370907b73 100644 +--- a/drivers/rtc/rtc-pcf85063.c ++++ b/drivers/rtc/rtc-pcf85063.c +@@ -167,10 +167,10 @@ static int pcf85063_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) + if (ret) + return ret; + +- alrm->time.tm_sec = bcd2bin(buf[0]); +- alrm->time.tm_min = bcd2bin(buf[1]); +- alrm->time.tm_hour = bcd2bin(buf[2]); +- alrm->time.tm_mday = bcd2bin(buf[3]); ++ alrm->time.tm_sec = bcd2bin(buf[0] & 0x7f); ++ alrm->time.tm_min = bcd2bin(buf[1] & 0x7f); ++ alrm->time.tm_hour = bcd2bin(buf[2] & 0x3f); ++ alrm->time.tm_mday = bcd2bin(buf[3] & 0x3f); + + ret = regmap_read(pcf85063->regmap, PCF85063_REG_CTRL2, &val); + if (ret) +@@ -422,7 +422,7 @@ static int pcf85063_clkout_control(struct clk_hw *hw, bool enable) + unsigned int buf; + int ret; + +- ret = regmap_read(pcf85063->regmap, PCF85063_REG_OFFSET, &buf); ++ ret = regmap_read(pcf85063->regmap, PCF85063_REG_CTRL2, &buf); + if (ret < 0) + return ret; + buf &= PCF85063_REG_CLKO_F_MASK; +diff --git a/drivers/rtc/rtc-pic32.c b/drivers/rtc/rtc-pic32.c +index 7fb9145c43bd5..fa351ac201587 100644 +--- a/drivers/rtc/rtc-pic32.c ++++ b/drivers/rtc/rtc-pic32.c +@@ -324,16 +324,16 @@ static int pic32_rtc_probe(struct platform_device *pdev) + + spin_lock_init(&pdata->alarm_lock); + ++ pdata->rtc = devm_rtc_allocate_device(&pdev->dev); ++ if (IS_ERR(pdata->rtc)) ++ return PTR_ERR(pdata->rtc); ++ + clk_prepare_enable(pdata->clk); + + pic32_rtc_enable(pdata, 1); + + device_init_wakeup(&pdev->dev, 1); + +- pdata->rtc = devm_rtc_allocate_device(&pdev->dev); +- if (IS_ERR(pdata->rtc)) +- return PTR_ERR(pdata->rtc); +- + pdata->rtc->ops = &pic32_rtcops; + pdata->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; + pdata->rtc->range_max = RTC_TIMESTAMP_END_2099; diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c index e38ee88483855..bad6a5d9c6839 100644 --- a/drivers/rtc/rtc-pl031.c @@ -247868,6 +301713,72 @@ index d38aaf08108c2..dc9221393080a 100644 ald[1] = bin2bcd(t->time.tm_hour); else ald[1] = (t->time.tm_hour >= 12 ? 0x20 : 0) +diff --git a/drivers/rtc/rtc-snvs.c b/drivers/rtc/rtc-snvs.c +index bd929b0e7d7de..d82acf1af1fae 100644 +--- a/drivers/rtc/rtc-snvs.c ++++ b/drivers/rtc/rtc-snvs.c +@@ -32,6 +32,14 @@ + #define SNVS_LPPGDR_INIT 0x41736166 + #define CNTR_TO_SECS_SH 15 + ++/* The maximum RTC clock cycles that are allowed to pass between two ++ * consecutive clock counter register reads. If the values are corrupted a ++ * bigger difference is expected. The RTC frequency is 32kHz. With 320 cycles ++ * we end at 10ms which should be enough for most cases. If it once takes ++ * longer than expected we do a retry. ++ */ ++#define MAX_RTC_READ_DIFF_CYCLES 320 ++ + struct snvs_rtc_data { + struct rtc_device *rtc; + struct regmap *regmap; +@@ -56,6 +64,7 @@ static u64 rtc_read_lpsrt(struct snvs_rtc_data *data) + static u32 rtc_read_lp_counter(struct snvs_rtc_data *data) + { + u64 read1, read2; ++ s64 diff; + unsigned int timeout = 100; + + /* As expected, the registers might update between the read of the LSB +@@ -66,7 +75,8 @@ static u32 rtc_read_lp_counter(struct snvs_rtc_data *data) + do { + read2 = read1; + read1 = rtc_read_lpsrt(data); +- } while (read1 != read2 && --timeout); ++ diff = read1 - read2; ++ } while (((diff < 0) || (diff > MAX_RTC_READ_DIFF_CYCLES)) && --timeout); + if (!timeout) + dev_err(&data->rtc->dev, "Timeout trying to get valid LPSRT Counter read\n"); + +@@ -78,13 +88,15 @@ static u32 rtc_read_lp_counter(struct snvs_rtc_data *data) + static int rtc_read_lp_counter_lsb(struct snvs_rtc_data *data, u32 *lsb) + { + u32 count1, count2; ++ s32 diff; + unsigned int timeout = 100; + + regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &count1); + do { + count2 = count1; + regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &count1); +- } while (count1 != count2 && --timeout); ++ diff = count1 - count2; ++ } while (((diff < 0) || (diff > MAX_RTC_READ_DIFF_CYCLES)) && --timeout); + if (!timeout) { + dev_err(&data->rtc->dev, "Timeout trying to get valid LPSRT Counter read\n"); + return -ETIMEDOUT; +diff --git a/drivers/rtc/rtc-st-lpc.c b/drivers/rtc/rtc-st-lpc.c +index bdb20f63254e2..0f8e4231098ef 100644 +--- a/drivers/rtc/rtc-st-lpc.c ++++ b/drivers/rtc/rtc-st-lpc.c +@@ -238,6 +238,7 @@ static int st_rtc_probe(struct platform_device *pdev) + + rtc->clkrate = clk_get_rate(rtc->clk); + if (!rtc->clkrate) { ++ clk_disable_unprepare(rtc->clk); + dev_err(&pdev->dev, "Unable to fetch clock rate\n"); + return -EINVAL; + } diff --git a/drivers/rtc/rtc-sun6i.c b/drivers/rtc/rtc-sun6i.c index adec1b14a8deb..c551ebf0ac00f 100644 --- a/drivers/rtc/rtc-sun6i.c @@ -248024,7 +301935,7 @@ index dc78a523a69f2..b6b938aa66158 100644 if (!alias_device) { if (list_empty(&group->aliaslist)) { diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c -index 460e0f1cca533..ff7b7d470e96f 100644 +index 460e0f1cca533..57dfc92aa756f 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -3095,13 +3095,24 @@ static int dasd_eckd_format_device(struct dasd_device *base, @@ -248099,6 +302010,38 @@ index 460e0f1cca533..ff7b7d470e96f 100644 blk_count++; } } +@@ -4682,7 +4696,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev, + struct dasd_device *basedev; + struct req_iterator iter; + struct dasd_ccw_req *cqr; +- unsigned int first_offs; + unsigned int trkcount; + unsigned long *idaws; + unsigned int size; +@@ -4716,7 +4729,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev, + last_trk = (blk_rq_pos(req) + blk_rq_sectors(req) - 1) / + DASD_RAW_SECTORS_PER_TRACK; + trkcount = last_trk - first_trk + 1; +- first_offs = 0; + + if (rq_data_dir(req) == READ) + cmd = DASD_ECKD_CCW_READ_TRACK; +@@ -4760,13 +4772,13 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev, + + if (use_prefix) { + prefix_LRE(ccw++, data, first_trk, last_trk, cmd, basedev, +- startdev, 1, first_offs + 1, trkcount, 0, 0); ++ startdev, 1, 0, trkcount, 0, 0); + } else { + define_extent(ccw++, data, first_trk, last_trk, cmd, basedev, 0); + ccw[-1].flags |= CCW_FLAG_CC; + + data += sizeof(struct DE_eckd_data); +- locate_record_ext(ccw++, data, first_trk, first_offs + 1, ++ locate_record_ext(ccw++, data, first_trk, 0, + trkcount, cmd, basedev, 0, 0); + } + diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index fa966e0db6ca9..3a6f3af240fa7 100644 --- a/drivers/s390/block/dasd_genhd.c @@ -248150,6 +302093,19 @@ index 155428bfed8ac..d94ae067f085e 100644 /* externals in dasd.c */ #define DASD_PROFILE_OFF 0 #define DASD_PROFILE_ON 1 +diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c +index 5be3d1c39a78e..54176c073547b 100644 +--- a/drivers/s390/block/dcssblk.c ++++ b/drivers/s390/block/dcssblk.c +@@ -866,6 +866,8 @@ dcssblk_submit_bio(struct bio *bio) + unsigned long bytes_done; + + blk_queue_split(&bio); ++ if (!bio) ++ return BLK_QC_T_NONE; + + bytes_done = 0; + dev_info = bio->bi_bdev->bd_disk->private_data; diff --git a/drivers/s390/char/keyboard.h b/drivers/s390/char/keyboard.h index c467589c7f452..c06d399b9b1f1 100644 --- a/drivers/s390/char/keyboard.h @@ -248282,7 +302238,7 @@ index 297fb399363cc..620a917cd3a15 100644 } *rr; int rc; diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c -index 44461928aab8a..c278097926093 100644 +index 44461928aab8a..2ba9e01355659 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -437,8 +437,8 @@ static ssize_t dev_busid_show(struct device *dev, @@ -248296,6 +302252,17 @@ index 44461928aab8a..c278097926093 100644 return sysfs_emit(buf, "0.%x.%04x\n", sch->schid.ssid, pmcw->dev); else +@@ -792,9 +792,8 @@ static int __unset_online(struct device *dev, void *data) + { + struct idset *set = data; + struct subchannel *sch = to_subchannel(dev); +- struct ccw_device *cdev = sch_get_cdev(sch); + +- if (cdev && cdev->online) ++ if (sch->st == SUBCHANNEL_TYPE_IO && sch->config.ena) + idset_sch_del(set, sch->schid); + + return 0; diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 8d14569823d73..61cde02b23fec 100644 --- a/drivers/s390/cio/device.c @@ -248378,6 +302345,37 @@ index 9ea48bf0ee40d..032bf7b282bab 100644 if (aq->queue_count > 0) mod_timer(&aq->timeout, jiffies + aq->request_timeout); +diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c +index fd705429708e8..f91b6cfd7ed01 100644 +--- a/drivers/s390/net/ctcm_main.c ++++ b/drivers/s390/net/ctcm_main.c +@@ -825,16 +825,9 @@ done: + /** + * Start transmission of a packet. + * Called from generic network device layer. +- * +- * skb Pointer to buffer containing the packet. +- * dev Pointer to interface struct. +- * +- * returns 0 if packet consumed, !0 if packet rejected. +- * Note: If we return !0, then the packet is free'd by +- * the generic network layer. + */ + /* first merge version - leaving both functions separated */ +-static int ctcm_tx(struct sk_buff *skb, struct net_device *dev) ++static netdev_tx_t ctcm_tx(struct sk_buff *skb, struct net_device *dev) + { + struct ctcm_priv *priv = dev->ml_priv; + +@@ -877,7 +870,7 @@ static int ctcm_tx(struct sk_buff *skb, struct net_device *dev) + } + + /* unmerged MPC variant of ctcm_tx */ +-static int ctcmpc_tx(struct sk_buff *skb, struct net_device *dev) ++static netdev_tx_t ctcmpc_tx(struct sk_buff *skb, struct net_device *dev) + { + int len = 0; + struct ctcm_priv *priv = dev->ml_priv; diff --git a/drivers/s390/net/ctcm_mpc.c b/drivers/s390/net/ctcm_mpc.c index f0436f555c62a..be03cb123ef48 100644 --- a/drivers/s390/net/ctcm_mpc.c @@ -248439,10 +302437,32 @@ index ded1930a00b2d..e3813a7aa5e68 100644 rc = kstrtouint(buf, 0, &bs1); if (rc) diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c -index 440219bcaa2be..06a322bdced6d 100644 +index 440219bcaa2be..7e743f4717a91 100644 --- a/drivers/s390/net/lcs.c +++ b/drivers/s390/net/lcs.c -@@ -1735,10 +1735,11 @@ lcs_get_control(struct lcs_card *card, struct lcs_cmd *cmd) +@@ -1518,9 +1518,8 @@ lcs_txbuffer_cb(struct lcs_channel *channel, struct lcs_buffer *buffer) + /** + * Packet transmit function called by network stack + */ +-static int +-__lcs_start_xmit(struct lcs_card *card, struct sk_buff *skb, +- struct net_device *dev) ++static netdev_tx_t __lcs_start_xmit(struct lcs_card *card, struct sk_buff *skb, ++ struct net_device *dev) + { + struct lcs_header *header; + int rc = NETDEV_TX_OK; +@@ -1581,8 +1580,7 @@ out: + return rc; + } + +-static int +-lcs_start_xmit(struct sk_buff *skb, struct net_device *dev) ++static netdev_tx_t lcs_start_xmit(struct sk_buff *skb, struct net_device *dev) + { + struct lcs_card *card; + int rc; +@@ -1735,10 +1733,11 @@ lcs_get_control(struct lcs_card *card, struct lcs_cmd *cmd) lcs_schedule_recovery(card); break; case LCS_CMD_STOPLAN: @@ -248457,6 +302477,94 @@ index 440219bcaa2be..06a322bdced6d 100644 break; default: LCS_DBF_TEXT(5, trace, "noLGWcmd"); +diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c +index 5a0c2f07a3a25..ce5f0ffd6cc8d 100644 +--- a/drivers/s390/net/netiucv.c ++++ b/drivers/s390/net/netiucv.c +@@ -1252,15 +1252,8 @@ static int netiucv_close(struct net_device *dev) + /** + * Start transmission of a packet. + * Called from generic network device layer. +- * +- * @param skb Pointer to buffer containing the packet. +- * @param dev Pointer to interface struct. +- * +- * @return 0 if packet consumed, !0 if packet rejected. +- * Note: If we return !0, then the packet is free'd by +- * the generic network layer. + */ +-static int netiucv_tx(struct sk_buff *skb, struct net_device *dev) ++static netdev_tx_t netiucv_tx(struct sk_buff *skb, struct net_device *dev) + { + struct netiucv_priv *privptr = netdev_priv(dev); + int rc; +diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c +index dc6c00768d919..d694e3ff80865 100644 +--- a/drivers/s390/net/qeth_l2_main.c ++++ b/drivers/s390/net/qeth_l2_main.c +@@ -661,13 +661,13 @@ static void qeth_l2_dev2br_fdb_notify(struct qeth_card *card, u8 code, + card->dev, &info.info, NULL); + QETH_CARD_TEXT(card, 4, "andelmac"); + QETH_CARD_TEXT_(card, 4, +- "mc%012lx", ether_addr_to_u64(ntfy_mac)); ++ "mc%012llx", ether_addr_to_u64(ntfy_mac)); + } else { + call_switchdev_notifiers(SWITCHDEV_FDB_ADD_TO_BRIDGE, + card->dev, &info.info, NULL); + QETH_CARD_TEXT(card, 4, "anaddmac"); + QETH_CARD_TEXT_(card, 4, +- "mc%012lx", ether_addr_to_u64(ntfy_mac)); ++ "mc%012llx", ether_addr_to_u64(ntfy_mac)); + } + } + +@@ -764,9 +764,8 @@ static void qeth_l2_br2dev_worker(struct work_struct *work) + struct list_head *iter; + int err = 0; + +- kfree(br2dev_event_work); +- QETH_CARD_TEXT_(card, 4, "b2dw%04x", event); +- QETH_CARD_TEXT_(card, 4, "ma%012lx", ether_addr_to_u64(addr)); ++ QETH_CARD_TEXT_(card, 4, "b2dw%04lx", event); ++ QETH_CARD_TEXT_(card, 4, "ma%012llx", ether_addr_to_u64(addr)); + + rcu_read_lock(); + /* Verify preconditions are still valid: */ +@@ -795,7 +794,7 @@ static void qeth_l2_br2dev_worker(struct work_struct *work) + if (err) { + QETH_CARD_TEXT(card, 2, "b2derris"); + QETH_CARD_TEXT_(card, 2, +- "err%02x%03d", event, ++ "err%02lx%03d", event, + lowerdev->ifindex); + } + } +@@ -813,7 +812,7 @@ static void qeth_l2_br2dev_worker(struct work_struct *work) + break; + } + if (err) +- QETH_CARD_TEXT_(card, 2, "b2derr%02x", event); ++ QETH_CARD_TEXT_(card, 2, "b2derr%02lx", event); + } + + unlock: +@@ -821,6 +820,7 @@ unlock: + dev_put(brdev); + dev_put(lsyncdev); + dev_put(dstdev); ++ kfree(br2dev_event_work); + } + + static int qeth_l2_br2dev_queue_work(struct net_device *brdev, +@@ -878,7 +878,7 @@ static int qeth_l2_switchdev_event(struct notifier_block *unused, + while (lowerdev) { + if (qeth_l2_must_learn(lowerdev, dstdev)) { + card = lowerdev->ml_priv; +- QETH_CARD_TEXT_(card, 4, "b2dqw%03x", event); ++ QETH_CARD_TEXT_(card, 4, "b2dqw%03lx", event); + rc = qeth_l2_br2dev_queue_work(brdev, lowerdev, + dstdev, event, + fdb_info->addr); diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c index d24cafe02708f..b61acbb09be3b 100644 --- a/drivers/s390/scsi/zfcp_fc.c @@ -248592,9 +302700,18 @@ index 8aaf409ce9cba..97755407ce1b5 100644 atomic_t refcount; u32 d_id; diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c -index c1f979296c1a3..33b50b0990a06 100644 +index c1f979296c1a3..e37e1cd1d67f6 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c +@@ -884,7 +884,7 @@ static int zfcp_fsf_req_send(struct zfcp_fsf_req *req) + const bool is_srb = zfcp_fsf_req_is_status_read_buffer(req); + struct zfcp_adapter *adapter = req->adapter; + struct zfcp_qdio *qdio = adapter->qdio; +- int req_id = req->req_id; ++ unsigned long req_id = req->req_id; + + zfcp_reqlist_add(adapter->req_list, req); + @@ -1907,7 +1907,7 @@ static void zfcp_fsf_open_wka_port_handler(struct zfcp_fsf_req *req) wka_port->status = ZFCP_FC_WKA_PORT_ONLINE; } @@ -249052,6 +303169,18 @@ index 37d06f993b761..1d9be771f3ee0 100644 default: req->rq_flags |= RQF_QUIET; return BLK_STS_IOERR; +diff --git a/drivers/scsi/elx/efct/efct_driver.c b/drivers/scsi/elx/efct/efct_driver.c +index eab68fd9337ac..37e1ab96ee5be 100644 +--- a/drivers/scsi/elx/efct/efct_driver.c ++++ b/drivers/scsi/elx/efct/efct_driver.c +@@ -42,6 +42,7 @@ efct_device_init(void) + + rc = efct_scsi_reg_fc_transport(); + if (rc) { ++ efct_scsi_tgt_driver_exit(); + pr_err("failed to register to FC host\n"); + return rc; + } diff --git a/drivers/scsi/elx/libefc/efc_els.c b/drivers/scsi/elx/libefc/efc_els.c index 24db0accb256e..5f690378fe9a9 100644 --- a/drivers/scsi/elx/libefc/efc_els.c @@ -249096,6 +303225,45 @@ index 24db0accb256e..5f690378fe9a9 100644 return els; } +diff --git a/drivers/scsi/elx/libefc/efclib.h b/drivers/scsi/elx/libefc/efclib.h +index ee291cabf7e05..b14e516be7d53 100644 +--- a/drivers/scsi/elx/libefc/efclib.h ++++ b/drivers/scsi/elx/libefc/efclib.h +@@ -58,10 +58,12 @@ enum efc_node_send_ls_acc { + #define EFC_LINK_STATUS_UP 0 + #define EFC_LINK_STATUS_DOWN 1 + ++enum efc_sm_event; ++ + /* State machine context header */ + struct efc_sm_ctx { + void (*current_state)(struct efc_sm_ctx *ctx, +- u32 evt, void *arg); ++ enum efc_sm_event evt, void *arg); + + const char *description; + void *app; +@@ -364,7 +366,7 @@ struct efc_node { + int prev_evt; + + void (*nodedb_state)(struct efc_sm_ctx *ctx, +- u32 evt, void *arg); ++ enum efc_sm_event evt, void *arg); + struct timer_list gidpt_delay_timer; + u64 time_last_gidpt_msec; + +diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c +index 5ae6c207d3ac3..76dbdae0e9874 100644 +--- a/drivers/scsi/fcoe/fcoe.c ++++ b/drivers/scsi/fcoe/fcoe.c +@@ -2501,6 +2501,7 @@ static int __init fcoe_init(void) + + out_free: + mutex_unlock(&fcoe_config_mutex); ++ fcoe_transport_detach(&fcoe_sw_transport); + out_destroy: + destroy_workqueue(fcoe_wq); + return rc; diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c index 1756a0ac6f083..558f3f4e18593 100644 --- a/drivers/scsi/fcoe/fcoe_ctlr.c @@ -249109,6 +303277,52 @@ index 1756a0ac6f083..558f3f4e18593 100644 unsigned int scheme, unsigned int port) { u64 wwn; +diff --git a/drivers/scsi/fcoe/fcoe_sysfs.c b/drivers/scsi/fcoe/fcoe_sysfs.c +index af658aa38fedf..6260aa5ea6af8 100644 +--- a/drivers/scsi/fcoe/fcoe_sysfs.c ++++ b/drivers/scsi/fcoe/fcoe_sysfs.c +@@ -830,14 +830,15 @@ struct fcoe_ctlr_device *fcoe_ctlr_device_add(struct device *parent, + + dev_set_name(&ctlr->dev, "ctlr_%d", ctlr->id); + error = device_register(&ctlr->dev); +- if (error) +- goto out_del_q2; ++ if (error) { ++ destroy_workqueue(ctlr->devloss_work_q); ++ destroy_workqueue(ctlr->work_q); ++ put_device(&ctlr->dev); ++ return NULL; ++ } + + return ctlr; + +-out_del_q2: +- destroy_workqueue(ctlr->devloss_work_q); +- ctlr->devloss_work_q = NULL; + out_del_q: + destroy_workqueue(ctlr->work_q); + ctlr->work_q = NULL; +@@ -1036,16 +1037,16 @@ struct fcoe_fcf_device *fcoe_fcf_device_add(struct fcoe_ctlr_device *ctlr, + fcf->selected = new_fcf->selected; + + error = device_register(&fcf->dev); +- if (error) +- goto out_del; ++ if (error) { ++ put_device(&fcf->dev); ++ goto out; ++ } + + fcf->state = FCOE_FCF_STATE_CONNECTED; + list_add_tail(&fcf->peers, &ctlr->fcfs); + + return fcf; + +-out_del: +- kfree(fcf); + out: + return NULL; + } diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c index f8afbfb468dca..d084a7db3925e 100644 --- a/drivers/scsi/fnic/fnic_scsi.c @@ -249122,6 +303336,19 @@ index f8afbfb468dca..d084a7db3925e 100644 /* if only we issued IO, will we have the io lock */ if (io_lock_acquired) +diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c +index 9515c45affa5e..7d93783c09a50 100644 +--- a/drivers/scsi/hisi_sas/hisi_sas_main.c ++++ b/drivers/scsi/hisi_sas/hisi_sas_main.c +@@ -1414,7 +1414,7 @@ static void hisi_sas_refresh_port_id(struct hisi_hba *hisi_hba) + device->linkrate = phy->sas_phy.linkrate; + + hisi_hba->hw->setup_itct(hisi_hba, sas_dev); +- } else ++ } else if (!port->port_attached) + port->id = 0xff; + } + } diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 3ab669dc806f6..fa22cb712be5a 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -249365,8 +303592,54 @@ index 24b72ee4246fb..0165dad803001 100644 INIT_LIST_HEAD(&shost->eh_cmd_q); INIT_LIST_HEAD(&shost->starved_list); init_waitqueue_head(&shost->host_wait); +diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c +index 3faa87fa296a2..8aa5c22ae3ff9 100644 +--- a/drivers/scsi/hpsa.c ++++ b/drivers/scsi/hpsa.c +@@ -5848,7 +5848,7 @@ static int hpsa_scsi_host_alloc(struct ctlr_info *h) + { + struct Scsi_Host *sh; + +- sh = scsi_host_alloc(&hpsa_driver_template, sizeof(h)); ++ sh = scsi_host_alloc(&hpsa_driver_template, sizeof(struct ctlr_info)); + if (sh == NULL) { + dev_err(&h->pdev->dev, "scsi_host_alloc failed\n"); + return -ENOMEM; +@@ -8927,7 +8927,7 @@ clean1: /* wq/aer/h */ + destroy_workqueue(h->monitor_ctlr_wq); + h->monitor_ctlr_wq = NULL; + } +- kfree(h); ++ hpda_free_ctlr_info(h); + return rc; + } + +@@ -9788,7 +9788,8 @@ static int hpsa_add_sas_host(struct ctlr_info *h) + return 0; + + free_sas_phy: +- hpsa_free_sas_phy(hpsa_sas_phy); ++ sas_phy_free(hpsa_sas_phy->phy); ++ kfree(hpsa_sas_phy); + free_sas_port: + hpsa_free_sas_port(hpsa_sas_port); + free_sas_node: +@@ -9824,10 +9825,12 @@ static int hpsa_add_sas_device(struct hpsa_sas_node *hpsa_sas_node, + + rc = hpsa_sas_port_add_rphy(hpsa_sas_port, rphy); + if (rc) +- goto free_sas_port; ++ goto free_sas_rphy; + + return 0; + ++free_sas_rphy: ++ sas_rphy_free(rphy); + free_sas_port: + hpsa_free_sas_port(hpsa_sas_port); + device->sas_port = NULL; diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c -index 01f79991bf4a2..b3531065a4387 100644 +index 01f79991bf4a2..45ef78f388dc9 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -160,8 +160,8 @@ static void ibmvfc_npiv_logout(struct ibmvfc_host *); @@ -249380,7 +303653,23 @@ index 01f79991bf4a2..b3531065a4387 100644 static const char *unknown_error = "unknown error"; -@@ -917,7 +917,7 @@ static int ibmvfc_reenable_crq_queue(struct ibmvfc_host *vhost) +@@ -708,8 +708,13 @@ static void ibmvfc_init_host(struct ibmvfc_host *vhost) + memset(vhost->async_crq.msgs.async, 0, PAGE_SIZE); + vhost->async_crq.cur = 0; + +- list_for_each_entry(tgt, &vhost->targets, queue) +- ibmvfc_del_tgt(tgt); ++ list_for_each_entry(tgt, &vhost->targets, queue) { ++ if (vhost->client_migrated) ++ tgt->need_login = 1; ++ else ++ ibmvfc_del_tgt(tgt); ++ } ++ + scsi_block_requests(vhost->host); + ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_INIT); + vhost->job_step = ibmvfc_npiv_login; +@@ -917,7 +922,7 @@ static int ibmvfc_reenable_crq_queue(struct ibmvfc_host *vhost) struct vio_dev *vdev = to_vio_dev(vhost->dev); unsigned long flags; @@ -249389,7 +303678,7 @@ index 01f79991bf4a2..b3531065a4387 100644 /* Re-enable the CRQ */ do { -@@ -936,7 +936,7 @@ static int ibmvfc_reenable_crq_queue(struct ibmvfc_host *vhost) +@@ -936,7 +941,7 @@ static int ibmvfc_reenable_crq_queue(struct ibmvfc_host *vhost) spin_unlock(vhost->crq.q_lock); spin_unlock_irqrestore(vhost->host->host_lock, flags); @@ -249398,7 +303687,7 @@ index 01f79991bf4a2..b3531065a4387 100644 return rc; } -@@ -955,7 +955,7 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost) +@@ -955,7 +960,7 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost) struct vio_dev *vdev = to_vio_dev(vhost->dev); struct ibmvfc_queue *crq = &vhost->crq; @@ -249407,7 +303696,7 @@ index 01f79991bf4a2..b3531065a4387 100644 /* Close the CRQ */ do { -@@ -988,7 +988,7 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost) +@@ -988,7 +993,7 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost) spin_unlock(vhost->crq.q_lock); spin_unlock_irqrestore(vhost->host->host_lock, flags); @@ -249416,7 +303705,21 @@ index 01f79991bf4a2..b3531065a4387 100644 return rc; } -@@ -5680,6 +5680,8 @@ static int ibmvfc_alloc_queue(struct ibmvfc_host *vhost, +@@ -3235,9 +3240,12 @@ static void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost, + /* We need to re-setup the interpartition connection */ + dev_info(vhost->dev, "Partition migrated, Re-enabling adapter\n"); + vhost->client_migrated = 1; ++ ++ scsi_block_requests(vhost->host); + ibmvfc_purge_requests(vhost, DID_REQUEUE); +- ibmvfc_link_down(vhost, IBMVFC_LINK_DOWN); ++ ibmvfc_set_host_state(vhost, IBMVFC_LINK_DOWN); + ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_REENABLE); ++ wake_up(&vhost->work_wait_q); + } else if (crq->format == IBMVFC_PARTNER_FAILED || crq->format == IBMVFC_PARTNER_DEREGISTER) { + dev_err(vhost->dev, "Host partner adapter deregistered or failed (rc=%d)\n", crq->format); + ibmvfc_purge_requests(vhost, DID_ERROR); +@@ -5680,6 +5688,8 @@ static int ibmvfc_alloc_queue(struct ibmvfc_host *vhost, queue->cur = 0; queue->fmt = fmt; queue->size = PAGE_SIZE / fmt_size; @@ -249425,7 +303728,7 @@ index 01f79991bf4a2..b3531065a4387 100644 return 0; } -@@ -5755,9 +5757,6 @@ static int ibmvfc_register_scsi_channel(struct ibmvfc_host *vhost, +@@ -5755,9 +5765,6 @@ static int ibmvfc_register_scsi_channel(struct ibmvfc_host *vhost, ENTER; @@ -249435,7 +303738,7 @@ index 01f79991bf4a2..b3531065a4387 100644 rc = h_reg_sub_crq(vdev->unit_address, scrq->msg_token, PAGE_SIZE, &scrq->cookie, &scrq->hw_irq); -@@ -5788,7 +5787,6 @@ static int ibmvfc_register_scsi_channel(struct ibmvfc_host *vhost, +@@ -5788,7 +5795,6 @@ static int ibmvfc_register_scsi_channel(struct ibmvfc_host *vhost, } scrq->hwq_id = index; @@ -249443,7 +303746,7 @@ index 01f79991bf4a2..b3531065a4387 100644 LEAVE; return 0; -@@ -5798,7 +5796,6 @@ irq_failed: +@@ -5798,7 +5804,6 @@ irq_failed: rc = plpar_hcall_norets(H_FREE_SUB_CRQ, vdev->unit_address, scrq->cookie); } while (rtas_busy_delay(rc)); reg_failed: @@ -249451,7 +303754,7 @@ index 01f79991bf4a2..b3531065a4387 100644 LEAVE; return rc; } -@@ -5824,12 +5821,50 @@ static void ibmvfc_deregister_scsi_channel(struct ibmvfc_host *vhost, int index) +@@ -5824,12 +5829,50 @@ static void ibmvfc_deregister_scsi_channel(struct ibmvfc_host *vhost, int index) if (rc) dev_err(dev, "Failed to free sub-crq[%d]: rc=%ld\n", index, rc); @@ -249503,7 +303806,7 @@ index 01f79991bf4a2..b3531065a4387 100644 int i, j; ENTER; -@@ -5845,30 +5880,41 @@ static void ibmvfc_init_sub_crqs(struct ibmvfc_host *vhost) +@@ -5845,30 +5888,41 @@ static void ibmvfc_init_sub_crqs(struct ibmvfc_host *vhost) } for (i = 0; i < nr_scsi_hw_queues; i++) { @@ -249585,7 +303888,7 @@ index 10b6c6daaacda..d43bb18f58fd5 100644 #define MAX_TXU 1024 * 1024 diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c -index 5d78f7e939a36..56b8a2d6ffe49 100644 +index 5d78f7e939a36..04fb7fc012264 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -9791,7 +9791,7 @@ static int ipr_alloc_mem(struct ipr_ioa_cfg *ioa_cfg) @@ -249606,8 +303909,29 @@ index 5d78f7e939a36..56b8a2d6ffe49 100644 free_irq(pci_irq_vector(pdev, i), &ioa_cfg->hrrq[i]); return rc; +@@ -10869,11 +10869,19 @@ static struct notifier_block ipr_notifier = { + **/ + static int __init ipr_init(void) + { ++ int rc; ++ + ipr_info("IBM Power RAID SCSI Device Driver version: %s %s\n", + IPR_DRIVER_VERSION, IPR_DRIVER_DATE); + + register_reboot_notifier(&ipr_notifier); +- return pci_register_driver(&ipr_driver); ++ rc = pci_register_driver(&ipr_driver); ++ if (rc) { ++ unregister_reboot_notifier(&ipr_notifier); ++ return rc; ++ } ++ ++ return 0; + } + + /** diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c -index 1bc37593c88ff..4d2f330878065 100644 +index 1bc37593c88ff..5943360041907 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -52,6 +52,10 @@ static struct iscsi_transport iscsi_sw_tcp_transport; @@ -249847,7 +304171,24 @@ index 1bc37593c88ff..4d2f330878065 100644 if (rc < 0) return rc; -@@ -803,17 +869,21 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost, +@@ -782,7 +848,7 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost, + enum iscsi_host_param param, char *buf) + { + struct iscsi_sw_tcp_host *tcp_sw_host = iscsi_host_priv(shost); +- struct iscsi_session *session = tcp_sw_host->session; ++ struct iscsi_session *session; + struct iscsi_conn *conn; + struct iscsi_tcp_conn *tcp_conn; + struct iscsi_sw_tcp_conn *tcp_sw_conn; +@@ -792,6 +858,7 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost, + + switch (param) { + case ISCSI_HOST_PARAM_IPADDRESS: ++ session = tcp_sw_host->session; + if (!session) + return -ENOTCONN; + +@@ -803,17 +870,21 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost, } tcp_conn = conn->dd_data; tcp_sw_conn = tcp_conn->dd_data; @@ -249878,7 +304219,21 @@ index 1bc37593c88ff..4d2f330878065 100644 if (rc < 0) return rc; -@@ -898,7 +968,7 @@ iscsi_sw_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max, +@@ -888,17 +959,19 @@ iscsi_sw_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max, + if (!cls_session) + goto remove_host; + session = cls_session->dd_data; +- tcp_sw_host = iscsi_host_priv(shost); +- tcp_sw_host->session = session; + + if (iscsi_tcp_r2tpool_alloc(session)) + goto remove_session; ++ ++ /* We are now fully setup so expose the session to sysfs. */ ++ tcp_sw_host = iscsi_host_priv(shost); ++ tcp_sw_host->session = session; + return cls_session; + remove_session: iscsi_session_teardown(cls_session); remove_host: @@ -249887,12 +304242,23 @@ index 1bc37593c88ff..4d2f330878065 100644 free_host: iscsi_host_free(shost); return NULL; -@@ -915,7 +985,7 @@ static void iscsi_sw_tcp_session_destroy(struct iscsi_cls_session *cls_session) +@@ -912,10 +985,17 @@ static void iscsi_sw_tcp_session_destroy(struct iscsi_cls_session *cls_session) + if (WARN_ON_ONCE(session->leadconn)) + return; + ++ iscsi_session_remove(cls_session); ++ /* ++ * Our get_host_param needs to access the session, so remove the ++ * host from sysfs before freeing the session to make sure userspace ++ * is no longer accessing the callout. ++ */ ++ iscsi_host_remove(shost, false); ++ iscsi_tcp_r2tpool_free(cls_session->dd_data); - iscsi_session_teardown(cls_session); +- iscsi_session_teardown(cls_session); - iscsi_host_remove(shost); -+ iscsi_host_remove(shost, false); ++ iscsi_session_free(cls_session); iscsi_host_free(shost); } @@ -249925,7 +304291,7 @@ index 841000445b9a1..aa223db4cf53c 100644 spin_lock_bh(&ep->ex_lock); diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c -index 5bc91d34df634..73d235540b986 100644 +index 5bc91d34df634..d422e8fd7137f 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -83,7 +83,7 @@ MODULE_PARM_DESC(debug_libiscsi_eh, @@ -250048,7 +304414,7 @@ index 5bc91d34df634..73d235540b986 100644 * * This function sets the suspend bit to prevent iscsi_data_xmit * from sending new IO, and if work is queued on the xmit thread -@@ -1953,18 +1964,33 @@ void iscsi_suspend_tx(struct iscsi_conn *conn) +@@ -1953,17 +1964,32 @@ void iscsi_suspend_tx(struct iscsi_conn *conn) struct Scsi_Host *shost = conn->session->host; struct iscsi_host *ihost = shost_priv(shost); @@ -250066,8 +304432,8 @@ index 5bc91d34df634..73d235540b986 100644 - iscsi_conn_queue_work(conn); + clear_bit(ISCSI_CONN_FLAG_SUSPEND_TX, &conn->flags); + iscsi_conn_queue_xmit(conn); - } - ++} ++ +/** + * iscsi_suspend_rx - Prevent recvwork from running again. + * @conn: iscsi conn to stop. @@ -250080,12 +304446,11 @@ index 5bc91d34df634..73d235540b986 100644 + set_bit(ISCSI_CONN_FLAG_SUSPEND_RX, &conn->flags); + if (ihost->workq) + flush_work(&conn->recvwork); -+} + } +EXPORT_SYMBOL_GPL(iscsi_suspend_rx); -+ + /* * We want to make sure a ping is in flight. It has timed out. - * And we are not busy processing a pdu that is making @@ -2214,6 +2240,8 @@ void iscsi_conn_unbind(struct iscsi_cls_conn *cls_conn, bool is_active) iscsi_suspend_tx(conn); @@ -250122,7 +304487,67 @@ index 5bc91d34df634..73d235540b986 100644 wait_event_interruptible(ihost->session_removal_wq, ihost->num_sessions == 0); if (signal_pending(current)) -@@ -3101,6 +3134,8 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn) +@@ -2991,17 +3024,32 @@ dec_session_count: + } + EXPORT_SYMBOL_GPL(iscsi_session_setup); + +-/** +- * iscsi_session_teardown - destroy session, host, and cls_session +- * @cls_session: iscsi session ++/* ++ * issi_session_remove - Remove session from iSCSI class. + */ +-void iscsi_session_teardown(struct iscsi_cls_session *cls_session) ++void iscsi_session_remove(struct iscsi_cls_session *cls_session) + { + struct iscsi_session *session = cls_session->dd_data; +- struct module *owner = cls_session->transport->owner; + struct Scsi_Host *shost = session->host; + + iscsi_remove_session(cls_session); ++ /* ++ * host removal only has to wait for its children to be removed from ++ * sysfs, and iscsi_tcp needs to do iscsi_host_remove before freeing ++ * the session, so drop the session count here. ++ */ ++ iscsi_host_dec_session_cnt(shost); ++} ++EXPORT_SYMBOL_GPL(iscsi_session_remove); ++ ++/** ++ * iscsi_session_free - Free iscsi session and it's resources ++ * @cls_session: iscsi session ++ */ ++void iscsi_session_free(struct iscsi_cls_session *cls_session) ++{ ++ struct iscsi_session *session = cls_session->dd_data; ++ struct module *owner = cls_session->transport->owner; + + iscsi_pool_free(&session->cmdpool); + kfree(session->password); +@@ -3019,10 +3067,19 @@ void iscsi_session_teardown(struct iscsi_cls_session *cls_session) + kfree(session->discovery_parent_type); + + iscsi_free_session(cls_session); +- +- iscsi_host_dec_session_cnt(shost); + module_put(owner); + } ++EXPORT_SYMBOL_GPL(iscsi_session_free); ++ ++/** ++ * iscsi_session_teardown - destroy session and cls_session ++ * @cls_session: iscsi session ++ */ ++void iscsi_session_teardown(struct iscsi_cls_session *cls_session) ++{ ++ iscsi_session_remove(cls_session); ++ iscsi_session_free(cls_session); ++} + EXPORT_SYMBOL_GPL(iscsi_session_teardown); + + /** +@@ -3101,6 +3158,8 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn) { struct iscsi_conn *conn = cls_conn->dd_data; struct iscsi_session *session = conn->session; @@ -250131,7 +304556,7 @@ index 5bc91d34df634..73d235540b986 100644 del_timer_sync(&conn->transport_timer); -@@ -3122,8 +3157,6 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn) +@@ -3122,8 +3181,6 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn) spin_lock_bh(&session->frwd_lock); free_pages((unsigned long) conn->data, get_order(ISCSI_DEF_MAX_RECV_SEG_LEN)); @@ -250140,7 +304565,7 @@ index 5bc91d34df634..73d235540b986 100644 /* regular RX path uses back_lock */ spin_lock_bh(&session->back_lock); kfifo_in(&session->cmdpool.queue, (void*)&conn->login_task, -@@ -3135,6 +3168,8 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn) +@@ -3135,6 +3192,8 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn) mutex_unlock(&session->eh_mutex); iscsi_destroy_conn(cls_conn); @@ -250149,7 +304574,7 @@ index 5bc91d34df634..73d235540b986 100644 } EXPORT_SYMBOL_GPL(iscsi_conn_teardown); -@@ -3310,6 +3345,8 @@ int iscsi_conn_bind(struct iscsi_cls_session *cls_session, +@@ -3310,6 +3369,8 @@ int iscsi_conn_bind(struct iscsi_cls_session *cls_session, spin_lock_bh(&session->frwd_lock); if (is_leading) session->leadconn = conn; @@ -250158,7 +304583,7 @@ index 5bc91d34df634..73d235540b986 100644 spin_unlock_bh(&session->frwd_lock); /* -@@ -3322,8 +3359,8 @@ int iscsi_conn_bind(struct iscsi_cls_session *cls_session, +@@ -3322,8 +3383,8 @@ int iscsi_conn_bind(struct iscsi_cls_session *cls_session, /* * Unblock xmitworker(), Login Phase will pass through. */ @@ -250209,7 +304634,7 @@ index c2150a8184237..9ae35631135d8 100644 break; } diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h -index befeb7c342903..f3bcb56e9ef28 100644 +index befeb7c342903..457ff86e02b30 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -593,6 +593,7 @@ struct lpfc_vport { @@ -250220,18 +304645,7 @@ index befeb7c342903..f3bcb56e9ef28 100644 #define FC_SCSI_SCAN_TMO 0x4000 /* scsi scan timer running */ #define FC_ABORT_DISCOVERY 0x8000 /* we want to abort discovery */ #define FC_NDISC_ACTIVE 0x10000 /* NPort discovery active */ -@@ -919,6 +920,10 @@ struct lpfc_hba { - (struct lpfc_vport *vport, - struct lpfc_io_buf *lpfc_cmd, - uint8_t tmo); -+ int (*lpfc_scsi_prep_task_mgmt_cmd) -+ (struct lpfc_vport *vport, -+ struct lpfc_io_buf *lpfc_cmd, -+ u64 lun, u8 task_mgmt_cmd); - - /* IOCB interface function jump table entries */ - int (*__lpfc_sli_issue_iocb) -@@ -1022,12 +1027,12 @@ struct lpfc_hba { +@@ -1022,12 +1023,12 @@ struct lpfc_hba { #define HBA_DEVLOSS_TMO 0x2000 /* HBA in devloss timeout */ #define HBA_RRQ_ACTIVE 0x4000 /* process the rrq active list */ #define HBA_IOQ_FLUSH 0x8000 /* FCP/NVME I/O queues being flushed */ @@ -250245,7 +304659,7 @@ index befeb7c342903..f3bcb56e9ef28 100644 #define HBA_FLOGI_ISSUED 0x100000 /* FLOGI was issued */ #define HBA_CGN_RSVD1 0x200000 /* Reserved CGN flag */ #define HBA_CGN_DAY_WRAP 0x400000 /* HBA Congestion info day wraps */ -@@ -1038,6 +1043,7 @@ struct lpfc_hba { +@@ -1038,6 +1039,7 @@ struct lpfc_hba { #define HBA_HBEAT_TMO 0x8000000 /* HBEAT initiated after timeout */ #define HBA_FLOGI_OUTSTANDING 0x10000000 /* FLOGI is outstanding */ @@ -250253,7 +304667,7 @@ index befeb7c342903..f3bcb56e9ef28 100644 uint32_t fcp_ring_in_use; /* When polling test if intr-hndlr active*/ struct lpfc_dmabuf slim2p; -@@ -1163,6 +1169,16 @@ struct lpfc_hba { +@@ -1163,6 +1165,16 @@ struct lpfc_hba { uint32_t cfg_hostmem_hgp; uint32_t cfg_log_verbose; uint32_t cfg_enable_fc4_type; @@ -250270,7 +304684,7 @@ index befeb7c342903..f3bcb56e9ef28 100644 uint32_t cfg_aer_support; uint32_t cfg_sriov_nr_virtfn; uint32_t cfg_request_firmware_upgrade; -@@ -1184,9 +1200,6 @@ struct lpfc_hba { +@@ -1184,9 +1196,6 @@ struct lpfc_hba { uint32_t cfg_ras_fwlog_func; uint32_t cfg_enable_bbcr; /* Enable BB Credit Recovery */ uint32_t cfg_enable_dpp; /* Enable Direct Packet Push */ @@ -250280,46 +304694,42 @@ index befeb7c342903..f3bcb56e9ef28 100644 uint32_t cfg_enable_pbde; uint32_t cfg_enable_mi; struct nvmet_fc_target_port *targetport; -@@ -1794,3 +1807,39 @@ static inline int lpfc_is_vmid_enabled(struct lpfc_hba *phba) - { - return phba->cfg_vmid_app_header || phba->cfg_vmid_priority_tagging; - } -+ -+static inline -+u8 get_job_ulpstatus(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq) -+{ -+ if (phba->sli_rev == LPFC_SLI_REV4) -+ return bf_get(lpfc_wcqe_c_status, &iocbq->wcqe_cmpl); -+ else -+ return iocbq->iocb.ulpStatus; -+} -+ -+static inline -+u32 get_job_word4(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq) -+{ -+ if (phba->sli_rev == LPFC_SLI_REV4) -+ return iocbq->wcqe_cmpl.parameter; -+ else -+ return iocbq->iocb.un.ulpWord[4]; -+} -+ -+static inline -+u8 get_job_cmnd(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq) -+{ -+ if (phba->sli_rev == LPFC_SLI_REV4) -+ return bf_get(wqe_cmnd, &iocbq->wqe.generic.wqe_com); -+ else -+ return iocbq->iocb.ulpCommand; -+} +@@ -1549,10 +1558,7 @@ struct lpfc_hba { + u32 cgn_acqe_cnt; + + /* RX monitor handling for CMF */ +- struct rxtable_entry *rxtable; /* RX_monitor information */ +- atomic_t rxtable_idx_head; +-#define LPFC_RXMONITOR_TABLE_IN_USE (LPFC_MAX_RXMONITOR_ENTRY + 73) +- atomic_t rxtable_idx_tail; ++ struct lpfc_rx_info_monitor *rx_monitor; + atomic_t rx_max_read_cnt; /* Maximum read bytes */ + uint64_t rx_block_cnt; + +@@ -1601,7 +1607,8 @@ struct lpfc_hba { + + #define LPFC_MAX_RXMONITOR_ENTRY 800 + #define LPFC_MAX_RXMONITOR_DUMP 32 +-struct rxtable_entry { ++struct rx_info_entry { ++ uint64_t cmf_bytes; /* Total no of read bytes for CMF_SYNC_WQE */ + uint64_t total_bytes; /* Total no of read bytes requested */ + uint64_t rcv_bytes; /* Total no of read bytes completed */ + uint64_t avg_io_size; +@@ -1615,6 +1622,13 @@ struct rxtable_entry { + uint32_t timer_interval; + }; + ++struct lpfc_rx_info_monitor { ++ struct rx_info_entry *ring; /* info organized in a circular buffer */ ++ u32 head_idx, tail_idx; /* index to head/tail of ring */ ++ spinlock_t lock; /* spinlock for ring */ ++ u32 entries; /* storing number entries/size of ring */ ++}; + -+static inline -+u16 get_job_ulpcontext(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq) -+{ -+ if (phba->sli_rev == LPFC_SLI_REV4) -+ return bf_get(wqe_ctxt_tag, &iocbq->wqe.generic.wqe_com); -+ else -+ return iocbq->iocb.ulpContext; -+} + static inline struct Scsi_Host * + lpfc_shost_from_vport(struct lpfc_vport *vport) + { diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index ebe417921dac0..9f3f7805f1f95 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c @@ -250436,188 +304846,26 @@ index ebe417921dac0..9f3f7805f1f95 100644 "Enable FC4 Protocol support - FCP / NVME"); /* -diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c -index fdf08cb572071..6688a575904f2 100644 ---- a/drivers/scsi/lpfc/lpfc_bsg.c -+++ b/drivers/scsi/lpfc/lpfc_bsg.c -@@ -325,7 +325,7 @@ lpfc_bsg_send_mgmt_cmd_cmp(struct lpfc_hba *phba, - - /* Close the timeout handler abort window */ - spin_lock_irqsave(&phba->hbalock, flags); -- cmdiocbq->iocb_flag &= ~LPFC_IO_CMD_OUTSTANDING; -+ cmdiocbq->cmd_flag &= ~LPFC_IO_CMD_OUTSTANDING; - spin_unlock_irqrestore(&phba->hbalock, flags); - - iocb = &dd_data->context_un.iocb; -@@ -481,11 +481,11 @@ lpfc_bsg_send_mgmt_cmd(struct bsg_job *job) - cmd->ulpOwner = OWN_CHIP; - cmdiocbq->vport = phba->pport; - cmdiocbq->context3 = bmp; -- cmdiocbq->iocb_flag |= LPFC_IO_LIBDFC; -+ cmdiocbq->cmd_flag |= LPFC_IO_LIBDFC; - timeout = phba->fc_ratov * 2; - cmd->ulpTimeout = timeout; - -- cmdiocbq->iocb_cmpl = lpfc_bsg_send_mgmt_cmd_cmp; -+ cmdiocbq->cmd_cmpl = lpfc_bsg_send_mgmt_cmd_cmp; - cmdiocbq->context1 = dd_data; - cmdiocbq->context2 = cmp; - cmdiocbq->context3 = bmp; -@@ -516,9 +516,9 @@ lpfc_bsg_send_mgmt_cmd(struct bsg_job *job) - if (iocb_stat == IOCB_SUCCESS) { - spin_lock_irqsave(&phba->hbalock, flags); - /* make sure the I/O had not been completed yet */ -- if (cmdiocbq->iocb_flag & LPFC_IO_LIBDFC) { -+ if (cmdiocbq->cmd_flag & LPFC_IO_LIBDFC) { - /* open up abort window to timeout handler */ -- cmdiocbq->iocb_flag |= LPFC_IO_CMD_OUTSTANDING; -+ cmdiocbq->cmd_flag |= LPFC_IO_CMD_OUTSTANDING; - } - spin_unlock_irqrestore(&phba->hbalock, flags); - return 0; /* done for now */ -@@ -600,7 +600,7 @@ lpfc_bsg_rport_els_cmp(struct lpfc_hba *phba, - - /* Close the timeout handler abort window */ - spin_lock_irqsave(&phba->hbalock, flags); -- cmdiocbq->iocb_flag &= ~LPFC_IO_CMD_OUTSTANDING; -+ cmdiocbq->cmd_flag &= ~LPFC_IO_CMD_OUTSTANDING; - spin_unlock_irqrestore(&phba->hbalock, flags); - - rsp = &rspiocbq->iocb; -@@ -726,10 +726,10 @@ lpfc_bsg_rport_els(struct bsg_job *job) - cmdiocbq->iocb.ulpContext = phba->sli4_hba.rpi_ids[rpi]; - else - cmdiocbq->iocb.ulpContext = rpi; -- cmdiocbq->iocb_flag |= LPFC_IO_LIBDFC; -+ cmdiocbq->cmd_flag |= LPFC_IO_LIBDFC; - cmdiocbq->context1 = dd_data; - cmdiocbq->context_un.ndlp = ndlp; -- cmdiocbq->iocb_cmpl = lpfc_bsg_rport_els_cmp; -+ cmdiocbq->cmd_cmpl = lpfc_bsg_rport_els_cmp; - dd_data->type = TYPE_IOCB; - dd_data->set_job = job; - dd_data->context_un.iocb.cmdiocbq = cmdiocbq; -@@ -757,9 +757,9 @@ lpfc_bsg_rport_els(struct bsg_job *job) - if (rc == IOCB_SUCCESS) { - spin_lock_irqsave(&phba->hbalock, flags); - /* make sure the I/O had not been completed/released */ -- if (cmdiocbq->iocb_flag & LPFC_IO_LIBDFC) { -+ if (cmdiocbq->cmd_flag & LPFC_IO_LIBDFC) { - /* open up abort window to timeout handler */ -- cmdiocbq->iocb_flag |= LPFC_IO_CMD_OUTSTANDING; -+ cmdiocbq->cmd_flag |= LPFC_IO_CMD_OUTSTANDING; - } - spin_unlock_irqrestore(&phba->hbalock, flags); - return 0; /* done for now */ -@@ -1053,7 +1053,7 @@ lpfc_bsg_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, - lpfc_in_buf_free(phba, - dmabuf); - } else { -- lpfc_post_buffer(phba, -+ lpfc_sli3_post_buffer(phba, - pring, - 1); - } -@@ -1061,7 +1061,7 @@ lpfc_bsg_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, - default: - if (!(phba->sli3_options & - LPFC_SLI3_HBQ_ENABLED)) -- lpfc_post_buffer(phba, -+ lpfc_sli3_post_buffer(phba, - pring, - 1); - break; -@@ -1395,7 +1395,7 @@ lpfc_issue_ct_rsp_cmp(struct lpfc_hba *phba, - - /* Close the timeout handler abort window */ - spin_lock_irqsave(&phba->hbalock, flags); -- cmdiocbq->iocb_flag &= ~LPFC_IO_CMD_OUTSTANDING; -+ cmdiocbq->cmd_flag &= ~LPFC_IO_CMD_OUTSTANDING; - spin_unlock_irqrestore(&phba->hbalock, flags); - - ndlp = dd_data->context_un.iocb.ndlp; -@@ -1549,13 +1549,13 @@ lpfc_issue_ct_rsp(struct lpfc_hba *phba, struct bsg_job *job, uint32_t tag, - "2722 Xmit CT response on exchange x%x Data: x%x x%x x%x\n", - icmd->ulpContext, icmd->ulpIoTag, tag, phba->link_state); - -- ctiocb->iocb_flag |= LPFC_IO_LIBDFC; -+ ctiocb->cmd_flag |= LPFC_IO_LIBDFC; - ctiocb->vport = phba->pport; - ctiocb->context1 = dd_data; - ctiocb->context2 = cmp; - ctiocb->context3 = bmp; - ctiocb->context_un.ndlp = ndlp; -- ctiocb->iocb_cmpl = lpfc_issue_ct_rsp_cmp; -+ ctiocb->cmd_cmpl = lpfc_issue_ct_rsp_cmp; - - dd_data->type = TYPE_IOCB; - dd_data->set_job = job; -@@ -1582,9 +1582,9 @@ lpfc_issue_ct_rsp(struct lpfc_hba *phba, struct bsg_job *job, uint32_t tag, - if (rc == IOCB_SUCCESS) { - spin_lock_irqsave(&phba->hbalock, flags); - /* make sure the I/O had not been completed/released */ -- if (ctiocb->iocb_flag & LPFC_IO_LIBDFC) { -+ if (ctiocb->cmd_flag & LPFC_IO_LIBDFC) { - /* open up abort window to timeout handler */ -- ctiocb->iocb_flag |= LPFC_IO_CMD_OUTSTANDING; -+ ctiocb->cmd_flag |= LPFC_IO_CMD_OUTSTANDING; - } - spin_unlock_irqrestore(&phba->hbalock, flags); - return 0; /* done for now */ -@@ -2713,9 +2713,9 @@ static int lpfcdiag_loop_get_xri(struct lpfc_hba *phba, uint16_t rpi, - cmd->ulpClass = CLASS3; - cmd->ulpContext = rpi; - -- cmdiocbq->iocb_flag |= LPFC_IO_LIBDFC; -+ cmdiocbq->cmd_flag |= LPFC_IO_LIBDFC; - cmdiocbq->vport = phba->pport; -- cmdiocbq->iocb_cmpl = NULL; -+ cmdiocbq->cmd_cmpl = NULL; - - iocb_stat = lpfc_sli_issue_iocb_wait(phba, LPFC_ELS_RING, cmdiocbq, - rspiocbq, -@@ -3286,10 +3286,10 @@ lpfc_bsg_diag_loopback_run(struct bsg_job *job) - cmdiocbq->sli4_xritag = NO_XRI; - cmd->unsli3.rcvsli3.ox_id = 0xffff; - } -- cmdiocbq->iocb_flag |= LPFC_IO_LIBDFC; -- cmdiocbq->iocb_flag |= LPFC_IO_LOOPBACK; -+ cmdiocbq->cmd_flag |= LPFC_IO_LIBDFC; -+ cmdiocbq->cmd_flag |= LPFC_IO_LOOPBACK; - cmdiocbq->vport = phba->pport; -- cmdiocbq->iocb_cmpl = NULL; -+ cmdiocbq->cmd_cmpl = NULL; - iocb_stat = lpfc_sli_issue_iocb_wait(phba, LPFC_ELS_RING, cmdiocbq, - rspiocbq, (phba->fc_ratov * 2) + - LPFC_DRVR_TIMEOUT); -@@ -5273,11 +5273,11 @@ lpfc_menlo_cmd(struct bsg_job *job) - cmd->ulpClass = CLASS3; - cmd->ulpOwner = OWN_CHIP; - cmd->ulpLe = 1; /* Limited Edition */ -- cmdiocbq->iocb_flag |= LPFC_IO_LIBDFC; -+ cmdiocbq->cmd_flag |= LPFC_IO_LIBDFC; - cmdiocbq->vport = phba->pport; - /* We want the firmware to timeout before we do */ - cmd->ulpTimeout = MENLO_TIMEOUT - 5; -- cmdiocbq->iocb_cmpl = lpfc_bsg_menlo_cmd_cmp; -+ cmdiocbq->cmd_cmpl = lpfc_bsg_menlo_cmd_cmp; - cmdiocbq->context1 = dd_data; - cmdiocbq->context2 = cmp; - cmdiocbq->context3 = bmp; -@@ -6001,7 +6001,7 @@ lpfc_bsg_timeout(struct bsg_job *job) - - spin_lock_irqsave(&phba->hbalock, flags); - /* make sure the I/O abort window is still open */ -- if (!(cmdiocb->iocb_flag & LPFC_IO_CMD_OUTSTANDING)) { -+ if (!(cmdiocb->cmd_flag & LPFC_IO_CMD_OUTSTANDING)) { - spin_unlock_irqrestore(&phba->hbalock, flags); - return -EAGAIN; - } diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h -index c512f41991429..f7bf589b63fb2 100644 +index c512f41991429..470239394e649 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h -@@ -119,6 +119,8 @@ int lpfc_check_sli_ndlp(struct lpfc_hba *, struct lpfc_sli_ring *, +@@ -90,6 +90,14 @@ void lpfc_cgn_dump_rxmonitor(struct lpfc_hba *phba); + void lpfc_cgn_update_stat(struct lpfc_hba *phba, uint32_t dtag); + void lpfc_unblock_requests(struct lpfc_hba *phba); + void lpfc_block_requests(struct lpfc_hba *phba); ++int lpfc_rx_monitor_create_ring(struct lpfc_rx_info_monitor *rx_monitor, ++ u32 entries); ++void lpfc_rx_monitor_destroy_ring(struct lpfc_rx_info_monitor *rx_monitor); ++void lpfc_rx_monitor_record(struct lpfc_rx_info_monitor *rx_monitor, ++ struct rx_info_entry *entry); ++u32 lpfc_rx_monitor_report(struct lpfc_hba *phba, ++ struct lpfc_rx_info_monitor *rx_monitor, char *buf, ++ u32 buf_len, u32 max_read_entries); + + void lpfc_mbx_cmpl_local_config_link(struct lpfc_hba *, LPFC_MBOXQ_t *); + void lpfc_mbx_cmpl_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *); +@@ -119,6 +127,8 @@ int lpfc_check_sli_ndlp(struct lpfc_hba *, struct lpfc_sli_ring *, struct lpfc_nodelist *lpfc_nlp_init(struct lpfc_vport *vport, uint32_t did); struct lpfc_nodelist *lpfc_nlp_get(struct lpfc_nodelist *); int lpfc_nlp_put(struct lpfc_nodelist *); @@ -250626,65 +304874,8 @@ index c512f41991429..f7bf589b63fb2 100644 void lpfc_ignore_els_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, struct lpfc_iocbq *rspiocb); int lpfc_nlp_not_used(struct lpfc_nodelist *ndlp); -@@ -127,6 +129,7 @@ void lpfc_disc_list_loopmap(struct lpfc_vport *); - void lpfc_disc_start(struct lpfc_vport *); - void lpfc_cleanup_discovery_resources(struct lpfc_vport *); - void lpfc_cleanup(struct lpfc_vport *); -+void lpfc_prep_embed_io(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_ncmd); - void lpfc_disc_timeout(struct timer_list *); - - int lpfc_unregister_fcf_prep(struct lpfc_hba *); -@@ -208,7 +211,7 @@ int lpfc_config_port_post(struct lpfc_hba *); - int lpfc_hba_down_prep(struct lpfc_hba *); - int lpfc_hba_down_post(struct lpfc_hba *); - void lpfc_hba_init(struct lpfc_hba *, uint32_t *); --int lpfc_post_buffer(struct lpfc_hba *, struct lpfc_sli_ring *, int); -+int lpfc_sli3_post_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, int cnt); - void lpfc_decode_firmware_rev(struct lpfc_hba *, char *, int); - int lpfc_online(struct lpfc_hba *); - void lpfc_unblock_mgmt_io(struct lpfc_hba *); -diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c -index dfcb7d4bd7fa1..19e2f8086a6da 100644 ---- a/drivers/scsi/lpfc/lpfc_ct.c -+++ b/drivers/scsi/lpfc/lpfc_ct.c -@@ -239,7 +239,7 @@ lpfc_ct_reject_event(struct lpfc_nodelist *ndlp, - cmdiocbq->context1 = lpfc_nlp_get(ndlp); - cmdiocbq->context2 = (uint8_t *)mp; - cmdiocbq->context3 = (uint8_t *)bmp; -- cmdiocbq->iocb_cmpl = lpfc_ct_unsol_cmpl; -+ cmdiocbq->cmd_cmpl = lpfc_ct_unsol_cmpl; - icmd->ulpContext = rx_id; /* Xri / rx_id */ - icmd->unsli3.rcvsli3.ox_id = ox_id; - icmd->un.ulpWord[3] = -@@ -370,7 +370,7 @@ lpfc_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, - /* Not enough posted buffers; Try posting more buffers */ - phba->fc_stat.NoRcvBuf++; - if (!(phba->sli3_options & LPFC_SLI3_HBQ_ENABLED)) -- lpfc_post_buffer(phba, pring, 2); -+ lpfc_sli3_post_buffer(phba, pring, 2); - return; - } - -@@ -447,7 +447,7 @@ lpfc_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, - lpfc_ct_unsol_buffer(phba, iocbq, mp, size); - lpfc_in_buf_free(phba, mp); - } -- lpfc_post_buffer(phba, pring, i); -+ lpfc_sli3_post_buffer(phba, pring, i); - } - list_del(&head); - } -@@ -652,7 +652,7 @@ lpfc_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp, - "Data: x%x x%x\n", - ndlp->nlp_DID, icmd->ulpIoTag, - vport->port_state); -- geniocb->iocb_cmpl = cmpl; -+ geniocb->cmd_cmpl = cmpl; - geniocb->drvrTimeout = icmd->ulpTimeout + LPFC_DRVR_TIMEOUT; - geniocb->vport = vport; - geniocb->retry = retry; diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c -index bd6d459afce54..79bc86ba59b35 100644 +index bd6d459afce54..8e8bbe734e875 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -2607,8 +2607,8 @@ lpfc_debugfs_multixripools_write(struct file *file, const char __user *buf, @@ -250742,6 +304933,93 @@ index bd6d459afce54..79bc86ba59b35 100644 memset(mybuf, 0, sizeof(mybuf)); +@@ -5520,7 +5520,7 @@ lpfc_rx_monitor_open(struct inode *inode, struct file *file) + if (!debug) + goto out; + +- debug->buffer = vmalloc(MAX_DEBUGFS_RX_TABLE_SIZE); ++ debug->buffer = vmalloc(MAX_DEBUGFS_RX_INFO_SIZE); + if (!debug->buffer) { + kfree(debug); + goto out; +@@ -5541,55 +5541,18 @@ lpfc_rx_monitor_read(struct file *file, char __user *buf, size_t nbytes, + struct lpfc_rx_monitor_debug *debug = file->private_data; + struct lpfc_hba *phba = (struct lpfc_hba *)debug->i_private; + char *buffer = debug->buffer; +- struct rxtable_entry *entry; +- int i, len = 0, head, tail, last, start; +- +- head = atomic_read(&phba->rxtable_idx_head); +- while (head == LPFC_RXMONITOR_TABLE_IN_USE) { +- /* Table is getting updated */ +- msleep(20); +- head = atomic_read(&phba->rxtable_idx_head); +- } + +- tail = atomic_xchg(&phba->rxtable_idx_tail, head); +- if (!phba->rxtable || head == tail) { +- len += scnprintf(buffer + len, MAX_DEBUGFS_RX_TABLE_SIZE - len, +- "Rxtable is empty\n"); +- goto out; +- } +- last = (head > tail) ? head : LPFC_MAX_RXMONITOR_ENTRY; +- start = tail; +- +- len += scnprintf(buffer + len, MAX_DEBUGFS_RX_TABLE_SIZE - len, +- " MaxBPI\t Total Data Cmd Total Data Cmpl " +- " Latency(us) Avg IO Size\tMax IO Size IO cnt " +- "Info BWutil(ms)\n"); +-get_table: +- for (i = start; i < last; i++) { +- entry = &phba->rxtable[i]; +- len += scnprintf(buffer + len, MAX_DEBUGFS_RX_TABLE_SIZE - len, +- "%3d:%12lld %12lld\t%12lld\t" +- "%8lldus\t%8lld\t%10lld " +- "%8d %2d %2d(%2d)\n", +- i, entry->max_bytes_per_interval, +- entry->total_bytes, +- entry->rcv_bytes, +- entry->avg_io_latency, +- entry->avg_io_size, +- entry->max_read_cnt, +- entry->io_cnt, +- entry->cmf_info, +- entry->timer_utilization, +- entry->timer_interval); ++ if (!phba->rx_monitor) { ++ scnprintf(buffer, MAX_DEBUGFS_RX_INFO_SIZE, ++ "Rx Monitor Info is empty.\n"); ++ } else { ++ lpfc_rx_monitor_report(phba, phba->rx_monitor, buffer, ++ MAX_DEBUGFS_RX_INFO_SIZE, ++ LPFC_MAX_RXMONITOR_ENTRY); + } + +- if (head != last) { +- start = 0; +- last = head; +- goto get_table; +- } +-out: +- return simple_read_from_buffer(buf, nbytes, ppos, buffer, len); ++ return simple_read_from_buffer(buf, nbytes, ppos, buffer, ++ strlen(buffer)); + } + + static int +diff --git a/drivers/scsi/lpfc/lpfc_debugfs.h b/drivers/scsi/lpfc/lpfc_debugfs.h +index a5bf71b349720..f71e5b6311ac0 100644 +--- a/drivers/scsi/lpfc/lpfc_debugfs.h ++++ b/drivers/scsi/lpfc/lpfc_debugfs.h +@@ -282,7 +282,7 @@ struct lpfc_idiag { + void *ptr_private; + }; + +-#define MAX_DEBUGFS_RX_TABLE_SIZE (100 * LPFC_MAX_RXMONITOR_ENTRY) ++#define MAX_DEBUGFS_RX_INFO_SIZE (128 * LPFC_MAX_RXMONITOR_ENTRY) + struct lpfc_rx_monitor_debug { + char *i_private; + char *buffer; diff --git a/drivers/scsi/lpfc/lpfc_disc.h b/drivers/scsi/lpfc/lpfc_disc.h index 871b665bd72e3..37a4b79010bfc 100644 --- a/drivers/scsi/lpfc/lpfc_disc.h @@ -250773,37 +305051,9 @@ index 871b665bd72e3..37a4b79010bfc 100644 enum lpfc_fc4_xpt_flags fc4_xpt_flags; diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c -index 052c0e5b11195..0d34a03164f53 100644 +index 052c0e5b11195..5f44a0763f37d 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c -@@ -192,23 +192,23 @@ lpfc_prep_els_iocb(struct lpfc_vport *vport, uint8_t expectRsp, - (elscmd == ELS_CMD_LOGO))) - switch (elscmd) { - case ELS_CMD_FLOGI: -- elsiocb->iocb_flag |= -+ elsiocb->cmd_flag |= - ((LPFC_ELS_ID_FLOGI << LPFC_FIP_ELS_ID_SHIFT) - & LPFC_FIP_ELS_ID_MASK); - break; - case ELS_CMD_FDISC: -- elsiocb->iocb_flag |= -+ elsiocb->cmd_flag |= - ((LPFC_ELS_ID_FDISC << LPFC_FIP_ELS_ID_SHIFT) - & LPFC_FIP_ELS_ID_MASK); - break; - case ELS_CMD_LOGO: -- elsiocb->iocb_flag |= -+ elsiocb->cmd_flag |= - ((LPFC_ELS_ID_LOGO << LPFC_FIP_ELS_ID_SHIFT) - & LPFC_FIP_ELS_ID_MASK); - break; - } - else -- elsiocb->iocb_flag &= ~LPFC_FIP_ELS_ID_MASK; -+ elsiocb->cmd_flag &= ~LPFC_FIP_ELS_ID_MASK; - - icmd = &elsiocb->iocb; - @@ -1059,9 +1059,10 @@ stop_rr_fcf_flogi: lpfc_printf_vlog(vport, KERN_WARNING, LOG_TRACE_EVENT, @@ -250851,82 +305101,6 @@ index 052c0e5b11195..0d34a03164f53 100644 if (!lpfc_error_lost_link(irsp)) { /* FLOGI failed, so just use loop map to make discovery list */ lpfc_disc_list_loopmap(vport); -@@ -1252,10 +1252,10 @@ lpfc_cmpl_els_link_down(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, - "6445 ELS completes after LINK_DOWN: " - " Status %x/%x cmd x%x flg x%x\n", - irsp->ulpStatus, irsp->un.ulpWord[4], cmd, -- cmdiocb->iocb_flag); -+ cmdiocb->cmd_flag); - -- if (cmdiocb->iocb_flag & LPFC_IO_FABRIC) { -- cmdiocb->iocb_flag &= ~LPFC_IO_FABRIC; -+ if (cmdiocb->cmd_flag & LPFC_IO_FABRIC) { -+ cmdiocb->cmd_flag &= ~LPFC_IO_FABRIC; - atomic_dec(&phba->fabric_iocb_count); - } - lpfc_els_free_iocb(phba, cmdiocb); -@@ -1370,7 +1370,7 @@ lpfc_issue_els_flogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, - phba->fc_ratov = tmo; - - phba->fc_stat.elsXmitFLOGI++; -- elsiocb->iocb_cmpl = lpfc_cmpl_els_flogi; -+ elsiocb->cmd_cmpl = lpfc_cmpl_els_flogi; - - lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, - "Issue FLOGI: opt:x%x", -@@ -1463,7 +1463,7 @@ lpfc_els_abort_flogi(struct lpfc_hba *phba) - if (ndlp && ndlp->nlp_DID == Fabric_DID) { - if ((phba->pport->fc_flag & FC_PT2PT) && - !(phba->pport->fc_flag & FC_PT2PT_PLOGI)) -- iocb->fabric_iocb_cmpl = -+ iocb->fabric_cmd_cmpl = - lpfc_ignore_els_cmpl; - lpfc_sli_issue_abort_iotag(phba, pring, iocb, - NULL); -@@ -2226,7 +2226,7 @@ lpfc_issue_els_plogi(struct lpfc_vport *vport, uint32_t did, uint8_t retry) - } - - phba->fc_stat.elsXmitPLOGI++; -- elsiocb->iocb_cmpl = lpfc_cmpl_els_plogi; -+ elsiocb->cmd_cmpl = lpfc_cmpl_els_plogi; - - lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, - "Issue PLOGI: did:x%x refcnt %d", -@@ -2478,7 +2478,7 @@ lpfc_issue_els_prli(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, - /* For FCP support */ - npr->prliType = PRLI_FCP_TYPE; - npr->initiatorFunc = 1; -- elsiocb->iocb_flag |= LPFC_PRLI_FCP_REQ; -+ elsiocb->cmd_flag |= LPFC_PRLI_FCP_REQ; - - /* Remove FCP type - processed. */ - local_nlp_type &= ~NLP_FC4_FCP; -@@ -2512,14 +2512,14 @@ lpfc_issue_els_prli(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, - - npr_nvme->word1 = cpu_to_be32(npr_nvme->word1); - npr_nvme->word4 = cpu_to_be32(npr_nvme->word4); -- elsiocb->iocb_flag |= LPFC_PRLI_NVME_REQ; -+ elsiocb->cmd_flag |= LPFC_PRLI_NVME_REQ; - - /* Remove NVME type - processed. */ - local_nlp_type &= ~NLP_FC4_NVME; - } - - phba->fc_stat.elsXmitPRLI++; -- elsiocb->iocb_cmpl = lpfc_cmpl_els_prli; -+ elsiocb->cmd_cmpl = lpfc_cmpl_els_prli; - spin_lock_irq(&ndlp->lock); - ndlp->nlp_flag |= NLP_PRLI_SND; - -@@ -2842,7 +2842,7 @@ lpfc_issue_els_adisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, - ap->DID = be32_to_cpu(vport->fc_myDID); - - phba->fc_stat.elsXmitADISC++; -- elsiocb->iocb_cmpl = lpfc_cmpl_els_adisc; -+ elsiocb->cmd_cmpl = lpfc_cmpl_els_adisc; - spin_lock_irq(&ndlp->lock); - ndlp->nlp_flag |= NLP_ADISC_SND; - spin_unlock_irq(&ndlp->lock); @@ -2899,9 +2899,9 @@ lpfc_cmpl_els_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, irsp = &(rspiocb->iocb); spin_lock_irq(&ndlp->lock); @@ -250981,33 +305155,6 @@ index 052c0e5b11195..0d34a03164f53 100644 } /** -@@ -3065,7 +3065,7 @@ lpfc_issue_els_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, - memcpy(pcmd, &vport->fc_portname, sizeof(struct lpfc_name)); - - phba->fc_stat.elsXmitLOGO++; -- elsiocb->iocb_cmpl = lpfc_cmpl_els_logo; -+ elsiocb->cmd_cmpl = lpfc_cmpl_els_logo; - spin_lock_irq(&ndlp->lock); - ndlp->nlp_flag |= NLP_LOGO_SND; - ndlp->nlp_flag &= ~NLP_ISSUE_LOGO; -@@ -3417,7 +3417,7 @@ lpfc_issue_els_scr(struct lpfc_vport *vport, uint8_t retry) - ndlp->nlp_DID, 0, 0); - - phba->fc_stat.elsXmitSCR++; -- elsiocb->iocb_cmpl = lpfc_cmpl_els_disc_cmd; -+ elsiocb->cmd_cmpl = lpfc_cmpl_els_disc_cmd; - elsiocb->context1 = lpfc_nlp_get(ndlp); - if (!elsiocb->context1) { - lpfc_els_free_iocb(phba, elsiocb); -@@ -3514,7 +3514,7 @@ lpfc_issue_els_rscn(struct lpfc_vport *vport, uint8_t retry) - event->portid.rscn_fid[2] = nportid & 0x000000FF; - - phba->fc_stat.elsXmitRSCN++; -- elsiocb->iocb_cmpl = lpfc_cmpl_els_cmd; -+ elsiocb->cmd_cmpl = lpfc_cmpl_els_cmd; - elsiocb->context1 = lpfc_nlp_get(ndlp); - if (!elsiocb->context1) { - lpfc_els_free_iocb(phba, elsiocb); @@ -3532,11 +3532,6 @@ lpfc_issue_els_rscn(struct lpfc_vport *vport, uint8_t retry) return 1; } @@ -251020,24 +305167,6 @@ index 052c0e5b11195..0d34a03164f53 100644 return 0; } -@@ -3618,7 +3613,7 @@ lpfc_issue_els_farpr(struct lpfc_vport *vport, uint32_t nportid, uint8_t retry) - ndlp->nlp_DID, 0, 0); - - phba->fc_stat.elsXmitFARPR++; -- elsiocb->iocb_cmpl = lpfc_cmpl_els_cmd; -+ elsiocb->cmd_cmpl = lpfc_cmpl_els_cmd; - elsiocb->context1 = lpfc_nlp_get(ndlp); - if (!elsiocb->context1) { - lpfc_els_free_iocb(phba, elsiocb); -@@ -3709,7 +3704,7 @@ lpfc_issue_els_rdf(struct lpfc_vport *vport, uint8_t retry) - phba->cgn_reg_fpin); - - phba->cgn_fpin_frequency = LPFC_FPIN_INIT_FREQ; -- elsiocb->iocb_cmpl = lpfc_cmpl_els_disc_cmd; -+ elsiocb->cmd_cmpl = lpfc_cmpl_els_disc_cmd; - elsiocb->context1 = lpfc_nlp_get(ndlp); - if (!elsiocb->context1) { - lpfc_els_free_iocb(phba, elsiocb); @@ -3782,9 +3777,6 @@ lpfc_least_capable_settings(struct lpfc_hba *phba, { u32 rsp_sig_cap = 0, drv_sig_cap = 0; @@ -251075,15 +305204,6 @@ index 052c0e5b11195..0d34a03164f53 100644 return; out_no_support: -@@ -4180,7 +4154,7 @@ lpfc_issue_els_edc(struct lpfc_vport *vport, uint8_t retry) - ndlp->nlp_DID, phba->cgn_reg_signal, - phba->cgn_reg_fpin); - -- elsiocb->iocb_cmpl = lpfc_cmpl_els_disc_cmd; -+ elsiocb->cmd_cmpl = lpfc_cmpl_els_disc_cmd; - elsiocb->context1 = lpfc_nlp_get(ndlp); - if (!elsiocb->context1) { - lpfc_els_free_iocb(phba, elsiocb); @@ -4593,6 +4567,23 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, /* Added for Vendor specifc support * Just keep retrying for these Rsn / Exp codes @@ -251108,21 +305228,6 @@ index 052c0e5b11195..0d34a03164f53 100644 switch (stat.un.b.lsRjtRsnCode) { case LSRJT_UNABLE_TPC: /* The driver has a VALID PLOGI but the rport has -@@ -4977,12 +4968,12 @@ lpfc_els_free_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *elsiocb) - - /* context2 = cmd, context2->next = rsp, context3 = bpl */ - if (elsiocb->context2) { -- if (elsiocb->iocb_flag & LPFC_DELAY_MEM_FREE) { -+ if (elsiocb->cmd_flag & LPFC_DELAY_MEM_FREE) { - /* Firmware could still be in progress of DMAing - * payload, so don't free data buffer till after - * a hbeat. - */ -- elsiocb->iocb_flag &= ~LPFC_DELAY_MEM_FREE; -+ elsiocb->cmd_flag &= ~LPFC_DELAY_MEM_FREE; - buf_ptr = elsiocb->context2; - elsiocb->context2 = NULL; - if (buf_ptr) { @@ -5076,14 +5067,9 @@ lpfc_cmpl_els_logo_acc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, /* NPort Recovery mode or node is just allocated */ if (!lpfc_nlp_not_used(ndlp)) { @@ -251140,72 +305245,6 @@ index 052c0e5b11195..0d34a03164f53 100644 lpfc_unreg_rpi(vport, ndlp); } else { /* Indicate the node has already released, should -@@ -5494,9 +5480,9 @@ lpfc_els_rsp_acc(struct lpfc_vport *vport, uint32_t flag, - ndlp->nlp_flag & NLP_REG_LOGIN_SEND)) - ndlp->nlp_flag &= ~NLP_LOGO_ACC; - spin_unlock_irq(&ndlp->lock); -- elsiocb->iocb_cmpl = lpfc_cmpl_els_logo_acc; -+ elsiocb->cmd_cmpl = lpfc_cmpl_els_logo_acc; - } else { -- elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp; -+ elsiocb->cmd_cmpl = lpfc_cmpl_els_rsp; - } - - phba->fc_stat.elsXmitACC++; -@@ -5591,7 +5577,7 @@ lpfc_els_rsp_reject(struct lpfc_vport *vport, uint32_t rejectError, - ndlp->nlp_DID, ndlp->nlp_flag, rejectError); - - phba->fc_stat.elsXmitLSRJT++; -- elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp; -+ elsiocb->cmd_cmpl = lpfc_cmpl_els_rsp; - elsiocb->context1 = lpfc_nlp_get(ndlp); - if (!elsiocb->context1) { - lpfc_els_free_iocb(phba, elsiocb); -@@ -5671,7 +5657,7 @@ lpfc_issue_els_edc_rsp(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, - "Issue EDC ACC: did:x%x flg:x%x refcnt %d", - ndlp->nlp_DID, ndlp->nlp_flag, - kref_read(&ndlp->kref)); -- elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp; -+ elsiocb->cmd_cmpl = lpfc_cmpl_els_rsp; - - phba->fc_stat.elsXmitACC++; - elsiocb->context1 = lpfc_nlp_get(ndlp); -@@ -5764,7 +5750,7 @@ lpfc_els_rsp_adisc_acc(struct lpfc_vport *vport, struct lpfc_iocbq *oldiocb, - ndlp->nlp_DID, ndlp->nlp_flag, kref_read(&ndlp->kref)); - - phba->fc_stat.elsXmitACC++; -- elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp; -+ elsiocb->cmd_cmpl = lpfc_cmpl_els_rsp; - elsiocb->context1 = lpfc_nlp_get(ndlp); - if (!elsiocb->context1) { - lpfc_els_free_iocb(phba, elsiocb); -@@ -5938,7 +5924,7 @@ lpfc_els_rsp_prli_acc(struct lpfc_vport *vport, struct lpfc_iocbq *oldiocb, - ndlp->nlp_DID, ndlp->nlp_flag, kref_read(&ndlp->kref)); - - phba->fc_stat.elsXmitACC++; -- elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp; -+ elsiocb->cmd_cmpl = lpfc_cmpl_els_rsp; - elsiocb->context1 = lpfc_nlp_get(ndlp); - if (!elsiocb->context1) { - lpfc_els_free_iocb(phba, elsiocb); -@@ -6039,7 +6025,7 @@ lpfc_els_rsp_rnid_acc(struct lpfc_vport *vport, uint8_t format, - ndlp->nlp_DID, ndlp->nlp_flag, kref_read(&ndlp->kref)); - - phba->fc_stat.elsXmitACC++; -- elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp; -+ elsiocb->cmd_cmpl = lpfc_cmpl_els_rsp; - elsiocb->context1 = lpfc_nlp_get(ndlp); - if (!elsiocb->context1) { - lpfc_els_free_iocb(phba, elsiocb); -@@ -6153,7 +6139,7 @@ lpfc_els_rsp_echo_acc(struct lpfc_vport *vport, uint8_t *data, - ndlp->nlp_DID, ndlp->nlp_flag, kref_read(&ndlp->kref)); - - phba->fc_stat.elsXmitACC++; -- elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp; -+ elsiocb->cmd_cmpl = lpfc_cmpl_els_rsp; - elsiocb->context1 = lpfc_nlp_get(ndlp); - if (!elsiocb->context1) { - lpfc_els_free_iocb(phba, elsiocb); @@ -6216,6 +6202,7 @@ lpfc_els_disc_adisc(struct lpfc_vport *vport) * from backend */ @@ -251214,24 +305253,6 @@ index 052c0e5b11195..0d34a03164f53 100644 continue; } -@@ -6816,7 +6803,7 @@ lpfc_els_rdp_cmpl(struct lpfc_hba *phba, struct lpfc_rdp_context *rdp_context, - rdp_context->page_a0, vport); - - rdp_res->length = cpu_to_be32(len - 8); -- elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp; -+ elsiocb->cmd_cmpl = lpfc_cmpl_els_rsp; - - /* Now that we know the true size of the payload, update the BPL */ - bpl = (struct ulp_bde64 *) -@@ -6857,7 +6844,7 @@ error: - stat->un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC; - - phba->fc_stat.elsXmitLSRJT++; -- elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp; -+ elsiocb->cmd_cmpl = lpfc_cmpl_els_rsp; - elsiocb->context1 = lpfc_nlp_get(ndlp); - if (!elsiocb->context1) { - lpfc_els_free_iocb(phba, elsiocb); @@ -6882,6 +6869,7 @@ static int lpfc_get_rdp_info(struct lpfc_hba *phba, struct lpfc_rdp_context *rdp_context) { @@ -251253,106 +305274,7 @@ index 052c0e5b11195..0d34a03164f53 100644 return 0; -@@ -7075,7 +7066,7 @@ lpfc_els_lcb_rsp(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) - lcb_res->capability = lcb_context->capability; - lcb_res->lcb_frequency = lcb_context->frequency; - lcb_res->lcb_duration = lcb_context->duration; -- elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp; -+ elsiocb->cmd_cmpl = lpfc_cmpl_els_rsp; - phba->fc_stat.elsXmitACC++; - - elsiocb->context1 = lpfc_nlp_get(ndlp); -@@ -7114,7 +7105,7 @@ error: - if (shdr_add_status == ADD_STATUS_OPERATION_ALREADY_ACTIVE) - stat->un.b.lsRjtRsnCodeExp = LSEXP_CMD_IN_PROGRESS; - -- elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp; -+ elsiocb->cmd_cmpl = lpfc_cmpl_els_rsp; - phba->fc_stat.elsXmitLSRJT++; - elsiocb->context1 = lpfc_nlp_get(ndlp); - if (!elsiocb->context1) { -@@ -8181,7 +8172,7 @@ lpfc_els_rsp_rls_acc(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) - elsiocb->iotag, elsiocb->iocb.ulpContext, - ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_state, - ndlp->nlp_rpi); -- elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp; -+ elsiocb->cmd_cmpl = lpfc_cmpl_els_rsp; - phba->fc_stat.elsXmitACC++; - elsiocb->context1 = lpfc_nlp_get(ndlp); - if (!elsiocb->context1) { -@@ -8333,7 +8324,7 @@ lpfc_els_rcv_rtv(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, - ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_state, - ndlp->nlp_rpi, - rtv_rsp->ratov, rtv_rsp->edtov, rtv_rsp->qtov); -- elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp; -+ elsiocb->cmd_cmpl = lpfc_cmpl_els_rsp; - phba->fc_stat.elsXmitACC++; - elsiocb->context1 = lpfc_nlp_get(ndlp); - if (!elsiocb->context1) { -@@ -8410,7 +8401,7 @@ lpfc_issue_els_rrq(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, - "Issue RRQ: did:x%x", - did, rrq->xritag, rrq->rxid); - elsiocb->context_un.rrq = rrq; -- elsiocb->iocb_cmpl = lpfc_cmpl_els_rrq; -+ elsiocb->cmd_cmpl = lpfc_cmpl_els_rrq; - - lpfc_nlp_get(ndlp); - elsiocb->context1 = ndlp; -@@ -8516,7 +8507,7 @@ lpfc_els_rsp_rpl_acc(struct lpfc_vport *vport, uint16_t cmdsize, - elsiocb->iotag, elsiocb->iocb.ulpContext, - ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_state, - ndlp->nlp_rpi); -- elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp; -+ elsiocb->cmd_cmpl = lpfc_cmpl_els_rsp; - phba->fc_stat.elsXmitACC++; - elsiocb->context1 = lpfc_nlp_get(ndlp); - if (!elsiocb->context1) { -@@ -8956,7 +8947,7 @@ lpfc_els_timeout_handler(struct lpfc_vport *vport) - list_for_each_entry_safe(piocb, tmp_iocb, &pring->txcmplq, list) { - cmd = &piocb->iocb; - -- if ((piocb->iocb_flag & LPFC_IO_LIBDFC) != 0 || -+ if ((piocb->cmd_flag & LPFC_IO_LIBDFC) != 0 || - piocb->iocb.ulpCommand == CMD_ABORT_XRI_CN || - piocb->iocb.ulpCommand == CMD_CLOSE_XRI_CN) - continue; -@@ -9069,13 +9060,13 @@ lpfc_els_flush_cmd(struct lpfc_vport *vport) - - /* First we need to issue aborts to outstanding cmds on txcmpl */ - list_for_each_entry_safe(piocb, tmp_iocb, &pring->txcmplq, list) { -- if (piocb->iocb_flag & LPFC_IO_LIBDFC) -+ if (piocb->cmd_flag & LPFC_IO_LIBDFC) - continue; - - if (piocb->vport != vport) - continue; - -- if (piocb->iocb_flag & LPFC_DRIVER_ABORTED) -+ if (piocb->cmd_flag & LPFC_DRIVER_ABORTED) - continue; - - /* On the ELS ring we can have ELS_REQUESTs or -@@ -9093,7 +9084,7 @@ lpfc_els_flush_cmd(struct lpfc_vport *vport) - * and avoid any retry logic. - */ - if (phba->link_state == LPFC_LINK_DOWN) -- piocb->iocb_cmpl = lpfc_cmpl_els_link_down; -+ piocb->cmd_cmpl = lpfc_cmpl_els_link_down; - } - if (cmd->ulpCommand == CMD_GEN_REQUEST64_CR) - list_add_tail(&piocb->dlist, &abort_list); -@@ -9128,9 +9119,8 @@ lpfc_els_flush_cmd(struct lpfc_vport *vport) - list_for_each_entry_safe(piocb, tmp_iocb, &pring->txq, list) { - cmd = &piocb->iocb; - -- if (piocb->iocb_flag & LPFC_IO_LIBDFC) { -+ if (piocb->cmd_flag & LPFC_IO_LIBDFC) - continue; -- } - - /* Do not flush out the QUE_RING and ABORT/CLOSE iocbs */ - if (cmd->ulpCommand == CMD_QUE_RING_BUF_CN || -@@ -9572,11 +9562,14 @@ lpfc_els_rcv_fpin_cgn(struct lpfc_hba *phba, struct fc_tlv_desc *tlv) +@@ -9572,11 +9563,14 @@ lpfc_els_rcv_fpin_cgn(struct lpfc_hba *phba, struct fc_tlv_desc *tlv) /* Take action here for an Alarm event */ if (phba->cmf_active_mode != LPFC_CFG_OFF) { if (phba->cgn_reg_fpin & LPFC_CGN_FPIN_ALARM) { @@ -251369,7 +305291,7 @@ index 052c0e5b11195..0d34a03164f53 100644 goto cleanup; } break; -@@ -9584,11 +9577,14 @@ lpfc_els_rcv_fpin_cgn(struct lpfc_hba *phba, struct fc_tlv_desc *tlv) +@@ -9584,11 +9578,14 @@ lpfc_els_rcv_fpin_cgn(struct lpfc_hba *phba, struct fc_tlv_desc *tlv) /* Take action here for a Warning event */ if (phba->cmf_active_mode != LPFC_CFG_OFF) { if (phba->cgn_reg_fpin & LPFC_CGN_FPIN_WARN) { @@ -251386,7 +305308,7 @@ index 052c0e5b11195..0d34a03164f53 100644 cleanup: /* Save frequency in ms */ phba->cgn_fpin_frequency = -@@ -9597,14 +9593,10 @@ cleanup: +@@ -9597,14 +9594,10 @@ cleanup: if (phba->cgn_i) { cp = (struct lpfc_cgn_info *) phba->cgn_i->virt; @@ -251405,25 +305327,7 @@ index 052c0e5b11195..0d34a03164f53 100644 crc = lpfc_cgn_calc_crc32 (cp, LPFC_CGN_INFO_SZ, -@@ -9773,7 +9765,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, - payload_len = elsiocb->iocb.unsli3.rcvsli3.acc_len; - cmd = *payload; - if ((phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) == 0) -- lpfc_post_buffer(phba, pring, 1); -+ lpfc_sli3_post_buffer(phba, pring, 1); - - did = icmd->un.rcvels.remoteID; - if (icmd->ulpStatus) { -@@ -10246,7 +10238,7 @@ lpfc_els_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, - phba->fc_stat.NoRcvBuf++; - /* Not enough posted buffers; Try posting more buffers */ - if (!(phba->sli3_options & LPFC_SLI3_HBQ_ENABLED)) -- lpfc_post_buffer(phba, pring, 0); -+ lpfc_sli3_post_buffer(phba, pring, 0); - return; - } - -@@ -10713,6 +10705,9 @@ lpfc_cmpl_els_fdisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, +@@ -10713,6 +10706,9 @@ lpfc_cmpl_els_fdisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, irsp->ulpStatus, irsp->un.ulpWord[4]); goto fdisc_failed; } @@ -251433,134 +305337,6 @@ index 052c0e5b11195..0d34a03164f53 100644 spin_lock_irq(shost->host_lock); vport->fc_flag &= ~FC_VPORT_CVL_RCVD; vport->fc_flag &= ~FC_VPORT_LOGO_RCVD; -@@ -10879,7 +10874,7 @@ lpfc_issue_els_fdisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, - lpfc_set_disctmo(vport); - - phba->fc_stat.elsXmitFDISC++; -- elsiocb->iocb_cmpl = lpfc_cmpl_els_fdisc; -+ elsiocb->cmd_cmpl = lpfc_cmpl_els_fdisc; - - lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, - "Issue FDISC: did:x%x", -@@ -11003,7 +10998,7 @@ lpfc_issue_els_npiv_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) - "Issue LOGO npiv did:x%x flg:x%x", - ndlp->nlp_DID, ndlp->nlp_flag, 0); - -- elsiocb->iocb_cmpl = lpfc_cmpl_els_npiv_logo; -+ elsiocb->cmd_cmpl = lpfc_cmpl_els_npiv_logo; - spin_lock_irq(&ndlp->lock); - ndlp->nlp_flag |= NLP_LOGO_SND; - spin_unlock_irq(&ndlp->lock); -@@ -11088,9 +11083,9 @@ repeat: - } - spin_unlock_irqrestore(&phba->hbalock, iflags); - if (iocb) { -- iocb->fabric_iocb_cmpl = iocb->iocb_cmpl; -- iocb->iocb_cmpl = lpfc_cmpl_fabric_iocb; -- iocb->iocb_flag |= LPFC_IO_FABRIC; -+ iocb->fabric_cmd_cmpl = iocb->cmd_cmpl; -+ iocb->cmd_cmpl = lpfc_cmpl_fabric_iocb; -+ iocb->cmd_flag |= LPFC_IO_FABRIC; - - lpfc_debugfs_disc_trc(iocb->vport, LPFC_DISC_TRC_ELS_CMD, - "Fabric sched1: ste:x%x", -@@ -11099,13 +11094,13 @@ repeat: - ret = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, iocb, 0); - - if (ret == IOCB_ERROR) { -- iocb->iocb_cmpl = iocb->fabric_iocb_cmpl; -- iocb->fabric_iocb_cmpl = NULL; -- iocb->iocb_flag &= ~LPFC_IO_FABRIC; -+ iocb->cmd_cmpl = iocb->fabric_cmd_cmpl; -+ iocb->fabric_cmd_cmpl = NULL; -+ iocb->cmd_flag &= ~LPFC_IO_FABRIC; - cmd = &iocb->iocb; - cmd->ulpStatus = IOSTAT_LOCAL_REJECT; - cmd->un.ulpWord[4] = IOERR_SLI_ABORTED; -- iocb->iocb_cmpl(phba, iocb, iocb); -+ iocb->cmd_cmpl(phba, iocb, iocb); - - atomic_dec(&phba->fabric_iocb_count); - goto repeat; -@@ -11161,8 +11156,8 @@ lpfc_block_fabric_iocbs(struct lpfc_hba *phba) - * @rspiocb: pointer to lpfc response iocb data structure. - * - * This routine is the callback function that is put to the fabric iocb's -- * callback function pointer (iocb->iocb_cmpl). The original iocb's callback -- * function pointer has been stored in iocb->fabric_iocb_cmpl. This callback -+ * callback function pointer (iocb->cmd_cmpl). The original iocb's callback -+ * function pointer has been stored in iocb->fabric_cmd_cmpl. This callback - * function first restores and invokes the original iocb's callback function - * and then invokes the lpfc_resume_fabric_iocbs() routine to issue the next - * fabric bound iocb from the driver internal fabric iocb list onto the wire. -@@ -11173,7 +11168,7 @@ lpfc_cmpl_fabric_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, - { - struct ls_rjt stat; - -- BUG_ON((cmdiocb->iocb_flag & LPFC_IO_FABRIC) != LPFC_IO_FABRIC); -+ WARN_ON((cmdiocb->cmd_flag & LPFC_IO_FABRIC) != LPFC_IO_FABRIC); - - switch (rspiocb->iocb.ulpStatus) { - case IOSTAT_NPORT_RJT: -@@ -11199,10 +11194,10 @@ lpfc_cmpl_fabric_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, - - BUG_ON(atomic_read(&phba->fabric_iocb_count) == 0); - -- cmdiocb->iocb_cmpl = cmdiocb->fabric_iocb_cmpl; -- cmdiocb->fabric_iocb_cmpl = NULL; -- cmdiocb->iocb_flag &= ~LPFC_IO_FABRIC; -- cmdiocb->iocb_cmpl(phba, cmdiocb, rspiocb); -+ cmdiocb->cmd_cmpl = cmdiocb->fabric_cmd_cmpl; -+ cmdiocb->fabric_cmd_cmpl = NULL; -+ cmdiocb->cmd_flag &= ~LPFC_IO_FABRIC; -+ cmdiocb->cmd_cmpl(phba, cmdiocb, rspiocb); - - atomic_dec(&phba->fabric_iocb_count); - if (!test_bit(FABRIC_COMANDS_BLOCKED, &phba->bit_flags)) { -@@ -11253,9 +11248,9 @@ lpfc_issue_fabric_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *iocb) - atomic_inc(&phba->fabric_iocb_count); - spin_unlock_irqrestore(&phba->hbalock, iflags); - if (ready) { -- iocb->fabric_iocb_cmpl = iocb->iocb_cmpl; -- iocb->iocb_cmpl = lpfc_cmpl_fabric_iocb; -- iocb->iocb_flag |= LPFC_IO_FABRIC; -+ iocb->fabric_cmd_cmpl = iocb->cmd_cmpl; -+ iocb->cmd_cmpl = lpfc_cmpl_fabric_iocb; -+ iocb->cmd_flag |= LPFC_IO_FABRIC; - - lpfc_debugfs_disc_trc(iocb->vport, LPFC_DISC_TRC_ELS_CMD, - "Fabric sched2: ste:x%x", -@@ -11264,9 +11259,9 @@ lpfc_issue_fabric_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *iocb) - ret = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, iocb, 0); - - if (ret == IOCB_ERROR) { -- iocb->iocb_cmpl = iocb->fabric_iocb_cmpl; -- iocb->fabric_iocb_cmpl = NULL; -- iocb->iocb_flag &= ~LPFC_IO_FABRIC; -+ iocb->cmd_cmpl = iocb->fabric_cmd_cmpl; -+ iocb->fabric_cmd_cmpl = NULL; -+ iocb->cmd_flag &= ~LPFC_IO_FABRIC; - atomic_dec(&phba->fabric_iocb_count); - } - } else { -@@ -11659,7 +11654,7 @@ int lpfc_issue_els_qfpa(struct lpfc_vport *vport) - *((u32 *)(pcmd)) = ELS_CMD_QFPA; - pcmd += 4; - -- elsiocb->iocb_cmpl = lpfc_cmpl_els_qfpa; -+ elsiocb->cmd_cmpl = lpfc_cmpl_els_qfpa; - - elsiocb->context1 = lpfc_nlp_get(ndlp); - if (!elsiocb->context1) { -@@ -11742,7 +11737,7 @@ lpfc_vmid_uvem(struct lpfc_vport *vport, - } - inst_desc->word6 = cpu_to_be32(inst_desc->word6); - -- elsiocb->iocb_cmpl = lpfc_cmpl_els_uvem; -+ elsiocb->cmd_cmpl = lpfc_cmpl_els_uvem; - - elsiocb->context1 = lpfc_nlp_get(ndlp); - if (!elsiocb->context1) { diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 7195ca0275f93..4bb0a15cfcc01 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -251786,24 +305562,10 @@ index 7195ca0275f93..4bb0a15cfcc01 100644 acc_plogi = 1; } diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h -index 7359505e60419..215fbf1c777ec 100644 +index 7359505e60419..824fc8c08840b 100644 --- a/drivers/scsi/lpfc/lpfc_hw4.h +++ b/drivers/scsi/lpfc/lpfc_hw4.h -@@ -60,6 +60,13 @@ - ((ptr)->name##_WORD = ((((value) & name##_MASK) << name##_SHIFT) | \ - ((ptr)->name##_WORD & ~(name##_MASK << name##_SHIFT)))) - -+#define get_wqe_reqtag(x) (((x)->wqe.words[9] >> 0) & 0xFFFF) -+ -+#define get_job_ulpword(x, y) ((x)->iocb.un.ulpWord[y]) -+ -+#define set_job_ulpstatus(x, y) bf_set(lpfc_wcqe_c_status, &(x)->wcqe_cmpl, y) -+#define set_job_ulpword4(x, y) ((&(x)->wcqe_cmpl)->parameter = y) -+ - struct dma_address { - uint32_t addr_lo; - uint32_t addr_hi; -@@ -4448,6 +4455,9 @@ struct wqe_common { +@@ -4448,6 +4448,9 @@ struct wqe_common { #define wqe_sup_SHIFT 6 #define wqe_sup_MASK 0x00000001 #define wqe_sup_WORD word11 @@ -251814,18 +305576,9 @@ index 7359505e60419..215fbf1c777ec 100644 #define wqe_wqec_MASK 0x00000001 #define wqe_wqec_WORD word11 diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c -index 195169badb372..48043e1ba485b 100644 +index 195169badb372..855817f6fe671 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c -@@ -982,7 +982,7 @@ lpfc_hba_clean_txcmplq(struct lpfc_hba *phba) - spin_lock_irq(&pring->ring_lock); - list_for_each_entry_safe(piocb, next_iocb, - &pring->txcmplq, list) -- piocb->iocb_flag &= ~LPFC_IO_ON_TXCMPLQ; -+ piocb->cmd_flag &= ~LPFC_IO_ON_TXCMPLQ; - list_splice_init(&pring->txcmplq, &completions); - pring->txcmplq_cnt = 0; - spin_unlock_irq(&pring->ring_lock); @@ -1606,6 +1606,11 @@ void lpfc_sli4_offline_eratt(struct lpfc_hba *phba) { @@ -251855,33 +305608,6 @@ index 195169badb372..48043e1ba485b 100644 "3143 Port Down: Firmware Update " "Detected\n"); en_rn_msg = false; -@@ -2639,7 +2643,7 @@ lpfc_get_hba_model_desc(struct lpfc_hba *phba, uint8_t *mdp, uint8_t *descp) - } - - /** -- * lpfc_post_buffer - Post IOCB(s) with DMA buffer descriptor(s) to a IOCB ring -+ * lpfc_sli3_post_buffer - Post IOCB(s) with DMA buffer descriptor(s) to a IOCB ring - * @phba: pointer to lpfc hba data structure. - * @pring: pointer to a IOCB ring. - * @cnt: the number of IOCBs to be posted to the IOCB ring. -@@ -2651,7 +2655,7 @@ lpfc_get_hba_model_desc(struct lpfc_hba *phba, uint8_t *mdp, uint8_t *descp) - * The number of IOCBs NOT able to be posted to the IOCB ring. - **/ - int --lpfc_post_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, int cnt) -+lpfc_sli3_post_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, int cnt) - { - IOCB_t *icmd; - struct lpfc_iocbq *iocb; -@@ -2757,7 +2761,7 @@ lpfc_post_rcv_buf(struct lpfc_hba *phba) - struct lpfc_sli *psli = &phba->sli; - - /* Ring 0, ELS / CT buffers */ -- lpfc_post_buffer(phba, &psli->sli3_ring[LPFC_ELS_RING], LPFC_BUF_RING0); -+ lpfc_sli3_post_buffer(phba, &psli->sli3_ring[LPFC_ELS_RING], LPFC_BUF_RING0); - /* Ring 2 - FCP no buffers needed */ - - return 0; @@ -3643,6 +3647,7 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action) struct lpfc_vport **vports; struct Scsi_Host *shost; @@ -251936,17 +305662,7 @@ index 195169badb372..48043e1ba485b 100644 (NVME_XPT_REGD | SCSI_XPT_REGD))) lpfc_disc_state_machine (vports[i], ndlp, -@@ -4197,8 +4215,7 @@ lpfc_io_buf_replenish(struct lpfc_hba *phba, struct list_head *cbuf) - qp = &phba->sli4_hba.hdwq[idx]; - lpfc_cmd->hdwq_no = idx; - lpfc_cmd->hdwq = qp; -- lpfc_cmd->cur_iocbq.wqe_cmpl = NULL; -- lpfc_cmd->cur_iocbq.iocb_cmpl = NULL; -+ lpfc_cmd->cur_iocbq.cmd_cmpl = NULL; - spin_lock(&qp->io_buf_list_put_lock); - list_add_tail(&lpfc_cmd->list, - &qp->lpfc_io_buf_list_put); -@@ -4649,7 +4666,7 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev) +@@ -4649,7 +4667,7 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev) rc = lpfc_vmid_res_alloc(phba, vport); if (rc) @@ -251955,7 +305671,7 @@ index 195169badb372..48043e1ba485b 100644 /* Initialize all internally managed lists. */ INIT_LIST_HEAD(&vport->fc_nodes); -@@ -4667,16 +4684,17 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev) +@@ -4667,16 +4685,17 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev) error = scsi_add_host_with_dma(shost, dev, &phba->pcidev->dev); if (error) @@ -251975,7 +305691,7 @@ index 195169badb372..48043e1ba485b 100644 scsi_host_put(shost); out: return NULL; -@@ -5310,8 +5328,10 @@ lpfc_sli4_async_link_evt(struct lpfc_hba *phba, +@@ -5310,8 +5329,10 @@ lpfc_sli4_async_link_evt(struct lpfc_hba *phba, */ if (!(phba->hba_flag & HBA_FCOE_MODE)) { rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT); @@ -251987,7 +305703,51 @@ index 195169badb372..48043e1ba485b 100644 return; } /* -@@ -5789,21 +5809,8 @@ lpfc_cgn_save_evt_cnt(struct lpfc_hba *phba) +@@ -5407,38 +5428,12 @@ lpfc_async_link_speed_to_read_top(struct lpfc_hba *phba, uint8_t speed_code) + void + lpfc_cgn_dump_rxmonitor(struct lpfc_hba *phba) + { +- struct rxtable_entry *entry; +- int cnt = 0, head, tail, last, start; +- +- head = atomic_read(&phba->rxtable_idx_head); +- tail = atomic_read(&phba->rxtable_idx_tail); +- if (!phba->rxtable || head == tail) { +- lpfc_printf_log(phba, KERN_ERR, LOG_CGN_MGMT, +- "4411 Rxtable is empty\n"); +- return; +- } +- last = tail; +- start = head; +- +- /* Display the last LPFC_MAX_RXMONITOR_DUMP entries from the rxtable */ +- while (start != last) { +- if (start) +- start--; +- else +- start = LPFC_MAX_RXMONITOR_ENTRY - 1; +- entry = &phba->rxtable[start]; ++ if (!phba->rx_monitor) { + lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT, +- "4410 %02d: MBPI %lld Xmit %lld Cmpl %lld " +- "Lat %lld ASz %lld Info %02d BWUtil %d " +- "Int %d slot %d\n", +- cnt, entry->max_bytes_per_interval, +- entry->total_bytes, entry->rcv_bytes, +- entry->avg_io_latency, entry->avg_io_size, +- entry->cmf_info, entry->timer_utilization, +- entry->timer_interval, start); +- cnt++; +- if (cnt >= LPFC_MAX_RXMONITOR_DUMP) +- return; ++ "4411 Rx Monitor Info is empty.\n"); ++ } else { ++ lpfc_rx_monitor_report(phba, phba->rx_monitor, NULL, 0, ++ LPFC_MAX_RXMONITOR_DUMP); + } + } + +@@ -5789,21 +5784,8 @@ lpfc_cgn_save_evt_cnt(struct lpfc_hba *phba) /* Use the frequency found in the last rcv'ed FPIN */ value = phba->cgn_fpin_frequency; @@ -252011,7 +305771,107 @@ index 195169badb372..48043e1ba485b 100644 lvalue = lpfc_cgn_calc_crc32(cp, LPFC_CGN_INFO_SZ, LPFC_CGN_CRC32_SEED); -@@ -6262,8 +6269,10 @@ lpfc_sli4_async_fc_evt(struct lpfc_hba *phba, struct lpfc_acqe_fc_la *acqe_fc) +@@ -5858,11 +5840,10 @@ lpfc_cmf_timer(struct hrtimer *timer) + { + struct lpfc_hba *phba = container_of(timer, struct lpfc_hba, + cmf_timer); +- struct rxtable_entry *entry; ++ struct rx_info_entry entry; + uint32_t io_cnt; +- uint32_t head, tail; + uint32_t busy, max_read; +- uint64_t total, rcv, lat, mbpi; ++ uint64_t total, rcv, lat, mbpi, extra, cnt; + int timer_interval = LPFC_CMF_INTERVAL; + uint32_t ms; + struct lpfc_cgn_stat *cgs; +@@ -5929,12 +5910,27 @@ lpfc_cmf_timer(struct hrtimer *timer) + phba->hba_flag & HBA_SETUP) { + mbpi = phba->cmf_last_sync_bw; + phba->cmf_last_sync_bw = 0; +- lpfc_issue_cmf_sync_wqe(phba, LPFC_CMF_INTERVAL, total); ++ extra = 0; ++ ++ /* Calculate any extra bytes needed to account for the ++ * timer accuracy. If we are less than LPFC_CMF_INTERVAL ++ * calculate the adjustment needed for total to reflect ++ * a full LPFC_CMF_INTERVAL. ++ */ ++ if (ms && ms < LPFC_CMF_INTERVAL) { ++ cnt = div_u64(total, ms); /* bytes per ms */ ++ cnt *= LPFC_CMF_INTERVAL; /* what total should be */ ++ if (cnt > mbpi) ++ cnt = mbpi; ++ extra = cnt - total; ++ } ++ lpfc_issue_cmf_sync_wqe(phba, LPFC_CMF_INTERVAL, total + extra); + } else { + /* For Monitor mode or link down we want mbpi + * to be the full link speed + */ + mbpi = phba->cmf_link_byte_count; ++ extra = 0; + } + phba->cmf_timer_cnt++; + +@@ -5960,39 +5956,30 @@ lpfc_cmf_timer(struct hrtimer *timer) + } + + /* Save rxmonitor information for debug */ +- if (phba->rxtable) { +- head = atomic_xchg(&phba->rxtable_idx_head, +- LPFC_RXMONITOR_TABLE_IN_USE); +- entry = &phba->rxtable[head]; +- entry->total_bytes = total; +- entry->rcv_bytes = rcv; +- entry->cmf_busy = busy; +- entry->cmf_info = phba->cmf_active_info; ++ if (phba->rx_monitor) { ++ entry.total_bytes = total; ++ entry.cmf_bytes = total + extra; ++ entry.rcv_bytes = rcv; ++ entry.cmf_busy = busy; ++ entry.cmf_info = phba->cmf_active_info; + if (io_cnt) { +- entry->avg_io_latency = div_u64(lat, io_cnt); +- entry->avg_io_size = div_u64(rcv, io_cnt); ++ entry.avg_io_latency = div_u64(lat, io_cnt); ++ entry.avg_io_size = div_u64(rcv, io_cnt); + } else { +- entry->avg_io_latency = 0; +- entry->avg_io_size = 0; ++ entry.avg_io_latency = 0; ++ entry.avg_io_size = 0; + } +- entry->max_read_cnt = max_read; +- entry->io_cnt = io_cnt; +- entry->max_bytes_per_interval = mbpi; ++ entry.max_read_cnt = max_read; ++ entry.io_cnt = io_cnt; ++ entry.max_bytes_per_interval = mbpi; + if (phba->cmf_active_mode == LPFC_CFG_MANAGED) +- entry->timer_utilization = phba->cmf_last_ts; ++ entry.timer_utilization = phba->cmf_last_ts; + else +- entry->timer_utilization = ms; +- entry->timer_interval = ms; ++ entry.timer_utilization = ms; ++ entry.timer_interval = ms; + phba->cmf_last_ts = 0; + +- /* Increment rxtable index */ +- head = (head + 1) % LPFC_MAX_RXMONITOR_ENTRY; +- tail = atomic_read(&phba->rxtable_idx_tail); +- if (head == tail) { +- tail = (tail + 1) % LPFC_MAX_RXMONITOR_ENTRY; +- atomic_set(&phba->rxtable_idx_tail, tail); +- } +- atomic_set(&phba->rxtable_idx_head, head); ++ lpfc_rx_monitor_record(phba->rx_monitor, &entry); + } + + if (phba->cmf_active_mode == LPFC_CFG_MONITOR) { +@@ -6262,8 +6249,10 @@ lpfc_sli4_async_fc_evt(struct lpfc_hba *phba, struct lpfc_acqe_fc_la *acqe_fc) } rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT); @@ -252023,7 +305883,7 @@ index 195169badb372..48043e1ba485b 100644 return; out_free_dmabuf: -@@ -6485,9 +6494,6 @@ lpfc_sli4_async_sli_evt(struct lpfc_hba *phba, struct lpfc_acqe_sli *acqe_sli) +@@ -6485,9 +6474,6 @@ lpfc_sli4_async_sli_evt(struct lpfc_hba *phba, struct lpfc_acqe_sli *acqe_sli) /* Alarm overrides warning, so check that first */ if (cgn_signal->alarm_cnt) { if (phba->cgn_reg_signal == EDC_CG_SIG_WARN_ALARM) { @@ -252033,7 +305893,7 @@ index 195169badb372..48043e1ba485b 100644 /* Keep track of alarm cnt for CMF_SYNC_WQE */ atomic_add(cgn_signal->alarm_cnt, &phba->cgn_sync_alarm_cnt); -@@ -6496,8 +6502,6 @@ lpfc_sli4_async_sli_evt(struct lpfc_hba *phba, struct lpfc_acqe_sli *acqe_sli) +@@ -6496,8 +6482,6 @@ lpfc_sli4_async_sli_evt(struct lpfc_hba *phba, struct lpfc_acqe_sli *acqe_sli) /* signal action needs to be taken */ if (phba->cgn_reg_signal == EDC_CG_SIG_WARN_ONLY || phba->cgn_reg_signal == EDC_CG_SIG_WARN_ALARM) { @@ -252042,7 +305902,7 @@ index 195169badb372..48043e1ba485b 100644 /* Keep track of warning cnt for CMF_SYNC_WQE */ atomic_add(cnt, &phba->cgn_sync_warn_cnt); } -@@ -7890,7 +7894,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) +@@ -7890,7 +7874,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) /* Allocate device driver memory */ rc = lpfc_mem_alloc(phba, SGL_ALIGN_SZ); if (rc) @@ -252051,7 +305911,7 @@ index 195169badb372..48043e1ba485b 100644 /* IF Type 2 ports get initialized now. */ if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) >= -@@ -8306,6 +8310,9 @@ out_free_bsmbx: +@@ -8306,6 +8290,9 @@ out_free_bsmbx: lpfc_destroy_bootstrap_mbox(phba); out_free_mem: lpfc_mem_free(phba); @@ -252061,16 +305921,7 @@ index 195169badb372..48043e1ba485b 100644 return rc; } -@@ -11961,7 +11968,7 @@ lpfc_sli_enable_msi(struct lpfc_hba *phba) - rc = pci_enable_msi(phba->pcidev); - if (!rc) - lpfc_printf_log(phba, KERN_INFO, LOG_INIT, -- "0462 PCI enable MSI mode success.\n"); -+ "0012 PCI enable MSI mode success.\n"); - else { - lpfc_printf_log(phba, KERN_INFO, LOG_INIT, - "0471 PCI enable MSI mode failed (%d)\n", rc); -@@ -14080,6 +14087,10 @@ lpfc_pci_resume_one_s3(struct device *dev_d) +@@ -14080,6 +14067,10 @@ lpfc_pci_resume_one_s3(struct device *dev_d) return error; } @@ -252081,7 +305932,7 @@ index 195169badb372..48043e1ba485b 100644 /* Configure and enable interrupt */ intr_mode = lpfc_sli_enable_intr(phba, phba->intr_mode); if (intr_mode == LPFC_INTR_ERROR) { -@@ -15033,14 +15044,17 @@ lpfc_io_error_detected_s4(struct pci_dev *pdev, pci_channel_state_t state) +@@ -15033,14 +15024,17 @@ lpfc_io_error_detected_s4(struct pci_dev *pdev, pci_channel_state_t state) lpfc_sli4_prep_dev_for_recover(phba); return PCI_ERS_RESULT_CAN_RECOVER; case pci_channel_io_frozen: @@ -252099,7 +305950,7 @@ index 195169badb372..48043e1ba485b 100644 /* Unknown state, prepare and request slot reset */ lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, "2825 Unknown PCI error state: x%x\n", state); -@@ -15084,6 +15098,7 @@ lpfc_io_slot_reset_s4(struct pci_dev *pdev) +@@ -15084,6 +15078,7 @@ lpfc_io_slot_reset_s4(struct pci_dev *pdev) pci_restore_state(pdev); @@ -252107,7 +305958,7 @@ index 195169badb372..48043e1ba485b 100644 /* * As the new kernel behavior of pci_restore_state() API call clears * device saved_state flag, need to save the restored state again. -@@ -15097,6 +15112,8 @@ lpfc_io_slot_reset_s4(struct pci_dev *pdev) +@@ -15097,6 +15092,8 @@ lpfc_io_slot_reset_s4(struct pci_dev *pdev) psli->sli_flag &= ~LPFC_SLI_ACTIVE; spin_unlock_irq(&phba->hbalock); @@ -252116,7 +305967,7 @@ index 195169badb372..48043e1ba485b 100644 /* Configure and enable interrupt */ intr_mode = lpfc_sli4_enable_intr(phba, phba->intr_mode); if (intr_mode == LPFC_INTR_ERROR) { -@@ -15106,6 +15123,7 @@ lpfc_io_slot_reset_s4(struct pci_dev *pdev) +@@ -15106,6 +15103,7 @@ lpfc_io_slot_reset_s4(struct pci_dev *pdev) return PCI_ERS_RESULT_DISCONNECT; } else phba->intr_mode = intr_mode; @@ -252124,7 +305975,7 @@ index 195169badb372..48043e1ba485b 100644 /* Log the current active interrupt mode */ lpfc_log_intr_mode(phba, phba->intr_mode); -@@ -15307,6 +15325,10 @@ lpfc_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state) +@@ -15307,6 +15305,10 @@ lpfc_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state) struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba; pci_ers_result_t rc = PCI_ERS_RESULT_DISCONNECT; @@ -252135,7 +305986,7 @@ index 195169badb372..48043e1ba485b 100644 switch (phba->pci_dev_grp) { case LPFC_PCI_DEV_LP: rc = lpfc_io_error_detected_s3(pdev, state); -@@ -15554,34 +15576,7 @@ void lpfc_dmp_dbg(struct lpfc_hba *phba) +@@ -15554,34 +15556,7 @@ void lpfc_dmp_dbg(struct lpfc_hba *phba) unsigned int temp_idx; int i; int j = 0; @@ -252198,8 +306049,28 @@ index 7d480c7987942..a5aafe230c74f 100644 lpfc_dbg_print(phba, "%d:" fmt, phba->brd_no, ##arg); \ } \ } while (0) +diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c +index 870e53b8f81dd..5d36b35148646 100644 +--- a/drivers/scsi/lpfc/lpfc_mem.c ++++ b/drivers/scsi/lpfc/lpfc_mem.c +@@ -344,9 +344,12 @@ lpfc_mem_free_all(struct lpfc_hba *phba) + phba->cgn_i = NULL; + } + +- /* Free RX table */ +- kfree(phba->rxtable); +- phba->rxtable = NULL; ++ /* Free RX Monitor */ ++ if (phba->rx_monitor) { ++ lpfc_rx_monitor_destroy_ring(phba->rx_monitor); ++ kfree(phba->rx_monitor); ++ phba->rx_monitor = NULL; ++ } + + /* Free the iocb lookup array */ + kfree(psli->iocbq_lookup); diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c -index 27263f02ab9f6..e788610bc996a 100644 +index 27263f02ab9f6..2bd35a7424c25 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -322,6 +322,7 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, @@ -252244,20 +306115,8 @@ index 27263f02ab9f6..e788610bc996a 100644 ndlp->nlp_fc4_type |= NLP_FC4_NVME; /* We need to update the localport also */ lpfc_nvme_update_localport(vport); -@@ -2131,9 +2139,9 @@ lpfc_cmpl_prli_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, - npr = NULL; - nvpr = NULL; - temp_ptr = lpfc_check_elscmpl_iocb(phba, cmdiocb, rspiocb); -- if (cmdiocb->iocb_flag & LPFC_PRLI_FCP_REQ) -+ if (cmdiocb->cmd_flag & LPFC_PRLI_FCP_REQ) - npr = (PRLI *) temp_ptr; -- else if (cmdiocb->iocb_flag & LPFC_PRLI_NVME_REQ) -+ else if (cmdiocb->cmd_flag & LPFC_PRLI_NVME_REQ) - nvpr = (struct lpfc_nvme_prli *) temp_ptr; - - irsp = &rspiocb->iocb; diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c -index 479b3eed62085..c74b2187dbada 100644 +index 479b3eed62085..4e0c0b273e5fe 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -209,8 +209,9 @@ lpfc_nvme_remoteport_delete(struct nvme_fc_remote_port *remoteport) @@ -252272,100 +306131,7 @@ index 479b3eed62085..c74b2187dbada 100644 spin_lock_irq(&ndlp->lock); /* The register rebind might have occurred before the delete -@@ -351,11 +352,12 @@ __lpfc_nvme_ls_req_cmp(struct lpfc_hba *phba, struct lpfc_vport *vport, - - static void - lpfc_nvme_ls_req_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, -- struct lpfc_wcqe_complete *wcqe) -+ struct lpfc_iocbq *rspwqe) - { - struct lpfc_vport *vport = cmdwqe->vport; - struct lpfc_nvme_lport *lport; - uint32_t status; -+ struct lpfc_wcqe_complete *wcqe = &rspwqe->wcqe_cmpl; - - status = bf_get(lpfc_wcqe_c_status, wcqe) & LPFC_IOCB_STATUS_MASK; - -@@ -379,7 +381,7 @@ lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp, - struct lpfc_dmabuf *inp, - struct nvmefc_ls_req *pnvme_lsreq, - void (*cmpl)(struct lpfc_hba *, struct lpfc_iocbq *, -- struct lpfc_wcqe_complete *), -+ struct lpfc_iocbq *), - struct lpfc_nodelist *ndlp, uint32_t num_entry, - uint32_t tmo, uint8_t retry) - { -@@ -400,7 +402,7 @@ lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp, - memset(wqe, 0, sizeof(union lpfc_wqe)); - - genwqe->context3 = (uint8_t *)bmp; -- genwqe->iocb_flag |= LPFC_IO_NVME_LS; -+ genwqe->cmd_flag |= LPFC_IO_NVME_LS; - - /* Save for completion so we can release these resources */ - genwqe->context1 = lpfc_nlp_get(ndlp); -@@ -431,7 +433,7 @@ lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp, - first_len = xmit_len; - } - -- genwqe->rsvd2 = num_entry; -+ genwqe->num_bdes = num_entry; - genwqe->hba_wqidx = 0; - - /* Words 0 - 2 */ -@@ -482,8 +484,7 @@ lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp, - - - /* Issue GEN REQ WQE for NPORT <did> */ -- genwqe->wqe_cmpl = cmpl; -- genwqe->iocb_cmpl = NULL; -+ genwqe->cmd_cmpl = cmpl; - genwqe->drvrTimeout = tmo + LPFC_DRVR_TIMEOUT; - genwqe->vport = vport; - genwqe->retry = retry; -@@ -533,7 +534,7 @@ __lpfc_nvme_ls_req(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, - struct nvmefc_ls_req *pnvme_lsreq, - void (*gen_req_cmp)(struct lpfc_hba *phba, - struct lpfc_iocbq *cmdwqe, -- struct lpfc_wcqe_complete *wcqe)) -+ struct lpfc_iocbq *rspwqe)) - { - struct lpfc_dmabuf *bmp; - struct ulp_bde64 *bpl; -@@ -721,7 +722,7 @@ __lpfc_nvme_ls_abort(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, - spin_lock(&pring->ring_lock); - list_for_each_entry_safe(wqe, next_wqe, &pring->txcmplq, list) { - if (wqe->context2 == pnvme_lsreq) { -- wqe->iocb_flag |= LPFC_DRIVER_ABORTED; -+ wqe->cmd_flag |= LPFC_DRIVER_ABORTED; - foundit = true; - break; - } -@@ -905,7 +906,7 @@ lpfc_nvme_adj_fcp_sgls(struct lpfc_vport *vport, - - - /* -- * lpfc_nvme_io_cmd_wqe_cmpl - Complete an NVME-over-FCP IO -+ * lpfc_nvme_io_cmd_cmpl - Complete an NVME-over-FCP IO - * - * Driver registers this routine as it io request handler. This - * routine issues an fcp WQE with data from the @lpfc_nvme_fcpreq -@@ -916,11 +917,12 @@ lpfc_nvme_adj_fcp_sgls(struct lpfc_vport *vport, - * TODO: What are the failure codes. - **/ - static void --lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn, -- struct lpfc_wcqe_complete *wcqe) -+lpfc_nvme_io_cmd_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn, -+ struct lpfc_iocbq *pwqeOut) - { - struct lpfc_io_buf *lpfc_ncmd = - (struct lpfc_io_buf *)pwqeIn->context1; -+ struct lpfc_wcqe_complete *wcqe = &pwqeOut->wcqe_cmpl; - struct lpfc_vport *vport = pwqeIn->vport; - struct nvmefc_fcp_req *nCmd; - struct nvme_fc_ersp_iu *ep; -@@ -936,6 +938,7 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn, +@@ -936,6 +937,7 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn, #ifdef CONFIG_SCSI_LPFC_DEBUG_FS int cpu; #endif @@ -252373,7 +306139,7 @@ index 479b3eed62085..c74b2187dbada 100644 /* Sanity check on return of outstanding command */ if (!lpfc_ncmd) { -@@ -1097,11 +1100,12 @@ out_err: +@@ -1097,11 +1099,12 @@ out_err: nCmd->transferred_length = 0; nCmd->rcv_rsplen = 0; nCmd->status = NVME_SC_INTERNAL; @@ -252387,7 +306153,7 @@ index 479b3eed62085..c74b2187dbada 100644 lpfc_ncmd->flags |= LPFC_SBUF_XBUSY; else lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY; -@@ -1181,7 +1185,8 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport, +@@ -1181,7 +1184,8 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport, { struct lpfc_hba *phba = vport->phba; struct nvmefc_fcp_req *nCmd = lpfc_ncmd->nvmeCmd; @@ -252397,7 +306163,7 @@ index 479b3eed62085..c74b2187dbada 100644 union lpfc_wqe128 *wqe = &pwqeq->wqe; uint32_t req_len; -@@ -1238,8 +1243,14 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport, +@@ -1238,8 +1242,14 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport, cstat->control_requests++; } @@ -252413,36 +306179,7 @@ index 479b3eed62085..c74b2187dbada 100644 /* * Finish initializing those WQE fields that are independent * of the nvme_cmnd request_buffer -@@ -1863,7 +1874,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, - } - - /* Don't abort IOs no longer on the pending queue. */ -- if (!(nvmereq_wqe->iocb_flag & LPFC_IO_ON_TXCMPLQ)) { -+ if (!(nvmereq_wqe->cmd_flag & LPFC_IO_ON_TXCMPLQ)) { - lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, - "6142 NVME IO req x%px not queued - skipping " - "abort req xri x%x\n", -@@ -1877,7 +1888,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, - nvmereq_wqe->hba_wqidx, pnvme_rport->port_id); - - /* Outstanding abort is in progress */ -- if (nvmereq_wqe->iocb_flag & LPFC_DRIVER_ABORTED) { -+ if (nvmereq_wqe->cmd_flag & LPFC_DRIVER_ABORTED) { - lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, - "6144 Outstanding NVME I/O Abort Request " - "still pending on nvme_fcreq x%px, " -@@ -1972,8 +1983,8 @@ lpfc_get_nvme_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp, - /* Setup key fields in buffer that may have been changed - * if other protocols used this buffer. - */ -- pwqeq->iocb_flag = LPFC_IO_NVME; -- pwqeq->wqe_cmpl = lpfc_nvme_io_cmd_wqe_cmpl; -+ pwqeq->cmd_flag = LPFC_IO_NVME; -+ pwqeq->cmd_cmpl = lpfc_nvme_io_cmd_cmpl; - lpfc_ncmd->start_time = jiffies; - lpfc_ncmd->flags = 0; - -@@ -2166,6 +2177,10 @@ lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport, +@@ -2166,6 +2176,10 @@ lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport, abts_nvme = 0; for (i = 0; i < phba->cfg_hdw_queue; i++) { qp = &phba->sli4_hba.hdwq[i]; @@ -252453,7 +306190,7 @@ index 479b3eed62085..c74b2187dbada 100644 pring = qp->io_wq->pring; if (!pring) continue; -@@ -2173,6 +2188,10 @@ lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport, +@@ -2173,6 +2187,10 @@ lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport, abts_scsi += qp->abts_scsi_io_bufs; abts_nvme += qp->abts_nvme_io_bufs; } @@ -252464,7 +306201,7 @@ index 479b3eed62085..c74b2187dbada 100644 lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, "6176 Lport x%px Localport x%px wait " "timed out. Pending %d [%d:%d]. " -@@ -2212,6 +2231,8 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport) +@@ -2212,6 +2230,8 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport) return; localport = vport->localport; @@ -252473,7 +306210,7 @@ index 479b3eed62085..c74b2187dbada 100644 lport = (struct lpfc_nvme_lport *)localport->private; lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME, -@@ -2528,7 +2549,8 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) +@@ -2528,7 +2548,8 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) * return values is ignored. The upcall is a courtesy to the * transport. */ @@ -252483,7 +306220,7 @@ index 479b3eed62085..c74b2187dbada 100644 (void)nvme_fc_set_remoteport_devloss(remoteport, 0); ret = nvme_fc_unregister_remoteport(remoteport); -@@ -2556,6 +2578,42 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) +@@ -2556,6 +2577,42 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) vport->localport, ndlp->rport, ndlp->nlp_DID); } @@ -252526,383 +306263,10 @@ index 479b3eed62085..c74b2187dbada 100644 /** * lpfc_sli4_nvme_xri_aborted - Fast-path process of NVME xri abort * @phba: pointer to lpfc hba data structure. -@@ -2692,6 +2750,7 @@ lpfc_nvme_cancel_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn, - if (phba->sli.sli_flag & LPFC_SLI_ACTIVE) - bf_set(lpfc_wcqe_c_xb, wcqep, 1); - -- (pwqeIn->wqe_cmpl)(phba, pwqeIn, wcqep); -+ memcpy(&pwqeIn->wcqe_cmpl, wcqep, sizeof(*wcqep)); -+ (pwqeIn->cmd_cmpl)(phba, pwqeIn, pwqeIn); - #endif - } -diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h -index cc54ffb5c2058..d7698977725e0 100644 ---- a/drivers/scsi/lpfc/lpfc_nvme.h -+++ b/drivers/scsi/lpfc/lpfc_nvme.h -@@ -234,7 +234,7 @@ int __lpfc_nvme_ls_req(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, - struct nvmefc_ls_req *pnvme_lsreq, - void (*gen_req_cmp)(struct lpfc_hba *phba, - struct lpfc_iocbq *cmdwqe, -- struct lpfc_wcqe_complete *wcqe)); -+ struct lpfc_iocbq *rspwqe)); - void __lpfc_nvme_ls_req_cmp(struct lpfc_hba *phba, struct lpfc_vport *vport, - struct lpfc_iocbq *cmdwqe, struct lpfc_wcqe_complete *wcqe); - int __lpfc_nvme_ls_abort(struct lpfc_vport *vport, -@@ -248,6 +248,6 @@ int __lpfc_nvme_xmt_ls_rsp(struct lpfc_async_xchg_ctx *axchg, - struct nvmefc_ls_rsp *ls_rsp, - void (*xmt_ls_rsp_cmp)(struct lpfc_hba *phba, - struct lpfc_iocbq *cmdwqe, -- struct lpfc_wcqe_complete *wcqe)); -+ struct lpfc_iocbq *rspwqe)); - void __lpfc_nvme_xmt_ls_rsp_cmp(struct lpfc_hba *phba, -- struct lpfc_iocbq *cmdwqe, struct lpfc_wcqe_complete *wcqe); -+ struct lpfc_iocbq *cmdwqe, struct lpfc_iocbq *rspwqe); -diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c -index 6e3dd0b9bcfa9..5188cc8e2413f 100644 ---- a/drivers/scsi/lpfc/lpfc_nvmet.c -+++ b/drivers/scsi/lpfc/lpfc_nvmet.c -@@ -285,7 +285,7 @@ lpfc_nvmet_defer_release(struct lpfc_hba *phba, - * transmission of an NVME LS response. - * @phba: Pointer to HBA context object. - * @cmdwqe: Pointer to driver command WQE object. -- * @wcqe: Pointer to driver response CQE object. -+ * @rspwqe: Pointer to driver response WQE object. - * - * The function is called from SLI ring event handler with no - * lock held. The function frees memory resources used for the command -@@ -293,9 +293,10 @@ lpfc_nvmet_defer_release(struct lpfc_hba *phba, - **/ - void - __lpfc_nvme_xmt_ls_rsp_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, -- struct lpfc_wcqe_complete *wcqe) -+ struct lpfc_iocbq *rspwqe) - { - struct lpfc_async_xchg_ctx *axchg = cmdwqe->context2; -+ struct lpfc_wcqe_complete *wcqe = &rspwqe->wcqe_cmpl; - struct nvmefc_ls_rsp *ls_rsp = &axchg->ls_rsp; - uint32_t status, result; - -@@ -331,7 +332,7 @@ __lpfc_nvme_xmt_ls_rsp_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, - * lpfc_nvmet_xmt_ls_rsp_cmp - Completion handler for LS Response - * @phba: Pointer to HBA context object. - * @cmdwqe: Pointer to driver command WQE object. -- * @wcqe: Pointer to driver response CQE object. -+ * @rspwqe: Pointer to driver response WQE object. - * - * The function is called from SLI ring event handler with no - * lock held. This function is the completion handler for NVME LS commands -@@ -340,10 +341,11 @@ __lpfc_nvme_xmt_ls_rsp_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, - **/ - static void - lpfc_nvmet_xmt_ls_rsp_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, -- struct lpfc_wcqe_complete *wcqe) -+ struct lpfc_iocbq *rspwqe) - { - struct lpfc_nvmet_tgtport *tgtp; - uint32_t status, result; -+ struct lpfc_wcqe_complete *wcqe = &rspwqe->wcqe_cmpl; - - if (!phba->targetport) - goto finish; -@@ -365,7 +367,7 @@ lpfc_nvmet_xmt_ls_rsp_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, - } - - finish: -- __lpfc_nvme_xmt_ls_rsp_cmp(phba, cmdwqe, wcqe); -+ __lpfc_nvme_xmt_ls_rsp_cmp(phba, cmdwqe, rspwqe); - } - - /** -@@ -707,7 +709,7 @@ out: - * lpfc_nvmet_xmt_fcp_op_cmp - Completion handler for FCP Response - * @phba: Pointer to HBA context object. - * @cmdwqe: Pointer to driver command WQE object. -- * @wcqe: Pointer to driver response CQE object. -+ * @rspwqe: Pointer to driver response WQE object. - * - * The function is called from SLI ring event handler with no - * lock held. This function is the completion handler for NVME FCP commands -@@ -715,12 +717,13 @@ out: - **/ - static void - lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, -- struct lpfc_wcqe_complete *wcqe) -+ struct lpfc_iocbq *rspwqe) - { - struct lpfc_nvmet_tgtport *tgtp; - struct nvmefc_tgt_fcp_req *rsp; - struct lpfc_async_xchg_ctx *ctxp; - uint32_t status, result, op, start_clean, logerr; -+ struct lpfc_wcqe_complete *wcqe = &rspwqe->wcqe_cmpl; - #ifdef CONFIG_SCSI_LPFC_DEBUG_FS - int id; - #endif -@@ -817,7 +820,7 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, - /* lpfc_nvmet_xmt_fcp_release() will recycle the context */ - } else { - ctxp->entry_cnt++; -- start_clean = offsetof(struct lpfc_iocbq, iocb_flag); -+ start_clean = offsetof(struct lpfc_iocbq, cmd_flag); - memset(((char *)cmdwqe) + start_clean, 0, - (sizeof(struct lpfc_iocbq) - start_clean)); - #ifdef CONFIG_SCSI_LPFC_DEBUG_FS -@@ -862,7 +865,7 @@ __lpfc_nvme_xmt_ls_rsp(struct lpfc_async_xchg_ctx *axchg, - struct nvmefc_ls_rsp *ls_rsp, - void (*xmt_ls_rsp_cmp)(struct lpfc_hba *phba, - struct lpfc_iocbq *cmdwqe, -- struct lpfc_wcqe_complete *wcqe)) -+ struct lpfc_iocbq *rspwqe)) - { - struct lpfc_hba *phba = axchg->phba; - struct hbq_dmabuf *nvmebuf = (struct hbq_dmabuf *)axchg->rqb_buffer; -@@ -898,7 +901,7 @@ __lpfc_nvme_xmt_ls_rsp(struct lpfc_async_xchg_ctx *axchg, - } - - /* Save numBdes for bpl2sgl */ -- nvmewqeq->rsvd2 = 1; -+ nvmewqeq->num_bdes = 1; - nvmewqeq->hba_wqidx = 0; - nvmewqeq->context3 = &dmabuf; - dmabuf.virt = &bpl; -@@ -913,8 +916,7 @@ __lpfc_nvme_xmt_ls_rsp(struct lpfc_async_xchg_ctx *axchg, - * be referenced after it returns back to this routine. - */ - -- nvmewqeq->wqe_cmpl = xmt_ls_rsp_cmp; -- nvmewqeq->iocb_cmpl = NULL; -+ nvmewqeq->cmd_cmpl = xmt_ls_rsp_cmp; - nvmewqeq->context2 = axchg; - - lpfc_nvmeio_data(phba, "NVMEx LS RSP: xri x%x wqidx x%x len x%x\n", -@@ -1072,10 +1074,9 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport, - goto aerr; - } - -- nvmewqeq->wqe_cmpl = lpfc_nvmet_xmt_fcp_op_cmp; -- nvmewqeq->iocb_cmpl = NULL; -+ nvmewqeq->cmd_cmpl = lpfc_nvmet_xmt_fcp_op_cmp; - nvmewqeq->context2 = ctxp; -- nvmewqeq->iocb_flag |= LPFC_IO_NVMET; -+ nvmewqeq->cmd_flag |= LPFC_IO_NVMET; - ctxp->wqeq->hba_wqidx = rsp->hwqid; - - lpfc_nvmeio_data(phba, "NVMET FCP CMND: xri x%x op x%x len x%x\n", -@@ -1275,7 +1276,7 @@ lpfc_nvmet_defer_rcv(struct nvmet_fc_target_port *tgtport, - * lpfc_nvmet_ls_req_cmp - completion handler for a nvme ls request - * @phba: Pointer to HBA context object - * @cmdwqe: Pointer to driver command WQE object. -- * @wcqe: Pointer to driver response CQE object. -+ * @rspwqe: Pointer to driver response WQE object. - * - * This function is the completion handler for NVME LS requests. - * The function updates any states and statistics, then calls the -@@ -1283,8 +1284,9 @@ lpfc_nvmet_defer_rcv(struct nvmet_fc_target_port *tgtport, - **/ - static void - lpfc_nvmet_ls_req_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, -- struct lpfc_wcqe_complete *wcqe) -+ struct lpfc_iocbq *rspwqe) - { -+ struct lpfc_wcqe_complete *wcqe = &rspwqe->wcqe_cmpl; - __lpfc_nvme_ls_req_cmp(phba, cmdwqe->vport, cmdwqe, wcqe); - } - -@@ -1581,7 +1583,7 @@ lpfc_nvmet_setup_io_context(struct lpfc_hba *phba) - "6406 Ran out of NVMET iocb/WQEs\n"); - return -ENOMEM; - } -- ctx_buf->iocbq->iocb_flag = LPFC_IO_NVMET; -+ ctx_buf->iocbq->cmd_flag = LPFC_IO_NVMET; - nvmewqe = ctx_buf->iocbq; - wqe = &nvmewqe->wqe; - -@@ -2027,8 +2029,10 @@ lpfc_nvmet_wqfull_flush(struct lpfc_hba *phba, struct lpfc_queue *wq, - list_del(&nvmewqeq->list); - spin_unlock_irqrestore(&pring->ring_lock, - iflags); -+ memcpy(&nvmewqeq->wcqe_cmpl, wcqep, -+ sizeof(*wcqep)); - lpfc_nvmet_xmt_fcp_op_cmp(phba, nvmewqeq, -- wcqep); -+ nvmewqeq); - return; - } - continue; -@@ -2036,7 +2040,8 @@ lpfc_nvmet_wqfull_flush(struct lpfc_hba *phba, struct lpfc_queue *wq, - /* Flush all IOs */ - list_del(&nvmewqeq->list); - spin_unlock_irqrestore(&pring->ring_lock, iflags); -- lpfc_nvmet_xmt_fcp_op_cmp(phba, nvmewqeq, wcqep); -+ memcpy(&nvmewqeq->wcqe_cmpl, wcqep, sizeof(*wcqep)); -+ lpfc_nvmet_xmt_fcp_op_cmp(phba, nvmewqeq, nvmewqeq); - spin_lock_irqsave(&pring->ring_lock, iflags); - } - } -@@ -2676,7 +2681,7 @@ lpfc_nvmet_prep_ls_wqe(struct lpfc_hba *phba, - nvmewqe->retry = 1; - nvmewqe->vport = phba->pport; - nvmewqe->drvrTimeout = (phba->fc_ratov * 3) + LPFC_DRVR_TIMEOUT; -- nvmewqe->iocb_flag |= LPFC_IO_NVME_LS; -+ nvmewqe->cmd_flag |= LPFC_IO_NVME_LS; - - /* Xmit NVMET response to remote NPORT <did> */ - lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC, -@@ -3033,7 +3038,7 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba, - * lpfc_nvmet_sol_fcp_abort_cmp - Completion handler for ABTS - * @phba: Pointer to HBA context object. - * @cmdwqe: Pointer to driver command WQE object. -- * @wcqe: Pointer to driver response CQE object. -+ * @rspwqe: Pointer to driver response WQE object. - * - * The function is called from SLI ring event handler with no - * lock held. This function is the completion handler for NVME ABTS for FCP cmds -@@ -3041,13 +3046,14 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba, - **/ - static void - lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, -- struct lpfc_wcqe_complete *wcqe) -+ struct lpfc_iocbq *rspwqe) - { - struct lpfc_async_xchg_ctx *ctxp; - struct lpfc_nvmet_tgtport *tgtp; - uint32_t result; - unsigned long flags; - bool released = false; -+ struct lpfc_wcqe_complete *wcqe = &rspwqe->wcqe_cmpl; - - ctxp = cmdwqe->context2; - result = wcqe->parameter; -@@ -3102,7 +3108,7 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, - * lpfc_nvmet_unsol_fcp_abort_cmp - Completion handler for ABTS - * @phba: Pointer to HBA context object. - * @cmdwqe: Pointer to driver command WQE object. -- * @wcqe: Pointer to driver response CQE object. -+ * @rspwqe: Pointer to driver response WQE object. - * - * The function is called from SLI ring event handler with no - * lock held. This function is the completion handler for NVME ABTS for FCP cmds -@@ -3110,13 +3116,14 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, - **/ - static void - lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, -- struct lpfc_wcqe_complete *wcqe) -+ struct lpfc_iocbq *rspwqe) - { - struct lpfc_async_xchg_ctx *ctxp; - struct lpfc_nvmet_tgtport *tgtp; - unsigned long flags; - uint32_t result; - bool released = false; -+ struct lpfc_wcqe_complete *wcqe = &rspwqe->wcqe_cmpl; - - ctxp = cmdwqe->context2; - result = wcqe->parameter; -@@ -3183,7 +3190,7 @@ lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, - * lpfc_nvmet_xmt_ls_abort_cmp - Completion handler for ABTS - * @phba: Pointer to HBA context object. - * @cmdwqe: Pointer to driver command WQE object. -- * @wcqe: Pointer to driver response CQE object. -+ * @rspwqe: Pointer to driver response WQE object. - * - * The function is called from SLI ring event handler with no - * lock held. This function is the completion handler for NVME ABTS for LS cmds -@@ -3191,11 +3198,12 @@ lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, - **/ - static void - lpfc_nvmet_xmt_ls_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, -- struct lpfc_wcqe_complete *wcqe) -+ struct lpfc_iocbq *rspwqe) - { - struct lpfc_async_xchg_ctx *ctxp; - struct lpfc_nvmet_tgtport *tgtp; - uint32_t result; -+ struct lpfc_wcqe_complete *wcqe = &rspwqe->wcqe_cmpl; - - ctxp = cmdwqe->context2; - result = wcqe->parameter; -@@ -3319,7 +3327,7 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba, - abts_wqeq->context1 = ndlp; - abts_wqeq->context2 = ctxp; - abts_wqeq->context3 = NULL; -- abts_wqeq->rsvd2 = 0; -+ abts_wqeq->num_bdes = 0; - /* hba_wqidx should already be setup from command we are aborting */ - abts_wqeq->iocb.ulpCommand = CMD_XMIT_SEQUENCE64_CR; - abts_wqeq->iocb.ulpLe = 1; -@@ -3448,7 +3456,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, - } - - /* Outstanding abort is in progress */ -- if (abts_wqeq->iocb_flag & LPFC_DRIVER_ABORTED) { -+ if (abts_wqeq->cmd_flag & LPFC_DRIVER_ABORTED) { - spin_unlock_irqrestore(&phba->hbalock, flags); - atomic_inc(&tgtp->xmt_abort_rsp_error); - lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, -@@ -3463,15 +3471,14 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, - } - - /* Ready - mark outstanding as aborted by driver. */ -- abts_wqeq->iocb_flag |= LPFC_DRIVER_ABORTED; -+ abts_wqeq->cmd_flag |= LPFC_DRIVER_ABORTED; - - lpfc_nvmet_prep_abort_wqe(abts_wqeq, ctxp->wqeq->sli4_xritag, opt); - - /* ABTS WQE must go to the same WQ as the WQE to be aborted */ - abts_wqeq->hba_wqidx = ctxp->wqeq->hba_wqidx; -- abts_wqeq->wqe_cmpl = lpfc_nvmet_sol_fcp_abort_cmp; -- abts_wqeq->iocb_cmpl = NULL; -- abts_wqeq->iocb_flag |= LPFC_IO_NVME; -+ abts_wqeq->cmd_cmpl = lpfc_nvmet_sol_fcp_abort_cmp; -+ abts_wqeq->cmd_flag |= LPFC_IO_NVME; - abts_wqeq->context2 = ctxp; - abts_wqeq->vport = phba->pport; - if (!ctxp->hdwq) -@@ -3528,9 +3535,8 @@ lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba, - - spin_lock_irqsave(&phba->hbalock, flags); - abts_wqeq = ctxp->wqeq; -- abts_wqeq->wqe_cmpl = lpfc_nvmet_unsol_fcp_abort_cmp; -- abts_wqeq->iocb_cmpl = NULL; -- abts_wqeq->iocb_flag |= LPFC_IO_NVMET; -+ abts_wqeq->cmd_cmpl = lpfc_nvmet_unsol_fcp_abort_cmp; -+ abts_wqeq->cmd_flag |= LPFC_IO_NVMET; - if (!ctxp->hdwq) - ctxp->hdwq = &phba->sli4_hba.hdwq[abts_wqeq->hba_wqidx]; - -@@ -3614,9 +3620,8 @@ lpfc_nvme_unsol_ls_issue_abort(struct lpfc_hba *phba, - } - - spin_lock_irqsave(&phba->hbalock, flags); -- abts_wqeq->wqe_cmpl = lpfc_nvmet_xmt_ls_abort_cmp; -- abts_wqeq->iocb_cmpl = NULL; -- abts_wqeq->iocb_flag |= LPFC_IO_NVME_LS; -+ abts_wqeq->cmd_cmpl = lpfc_nvmet_xmt_ls_abort_cmp; -+ abts_wqeq->cmd_flag |= LPFC_IO_NVME_LS; - rc = lpfc_sli4_issue_wqe(phba, ctxp->hdwq, abts_wqeq); - spin_unlock_irqrestore(&phba->hbalock, flags); - if (rc == WQE_SUCCESS) { diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c -index befdf864c43bd..41313fcaf84a3 100644 +index befdf864c43bd..edae98a35fc3b 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c -@@ -362,7 +362,7 @@ lpfc_new_scsi_buf_s3(struct lpfc_vport *vport, int num_to_alloc) - kfree(psb); - break; - } -- psb->cur_iocbq.iocb_flag |= LPFC_IO_FCP; -+ psb->cur_iocbq.cmd_flag |= LPFC_IO_FCP; - - psb->fcp_cmnd = psb->data; - psb->fcp_rsp = psb->data + sizeof(struct fcp_cmnd); -@@ -468,7 +468,7 @@ lpfc_sli4_vport_delete_fcp_xri_aborted(struct lpfc_vport *vport) - spin_lock(&qp->abts_io_buf_list_lock); - list_for_each_entry_safe(psb, next_psb, - &qp->lpfc_abts_io_buf_list, list) { -- if (psb->cur_iocbq.iocb_flag & LPFC_IO_NVME) -+ if (psb->cur_iocbq.cmd_flag & LPFC_IO_NVME) - continue; - - if (psb->rdata && psb->rdata->pnode && @@ -493,8 +493,8 @@ void lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba, struct sli4_wcqe_xri_aborted *axri, int idx) @@ -252914,7 +306278,7 @@ index befdf864c43bd..41313fcaf84a3 100644 struct lpfc_io_buf *psb, *next_psb; struct lpfc_sli4_hdw_queue *qp; unsigned long iflag = 0; -@@ -504,25 +504,39 @@ lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba, +@@ -504,15 +504,22 @@ lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba, int rrq_empty = 0; struct lpfc_sli_ring *pring = phba->sli4_hba.els_wq->pring; struct scsi_cmnd *cmd; @@ -252938,9 +306302,7 @@ index befdf864c43bd..41313fcaf84a3 100644 if (psb->cur_iocbq.sli4_xritag == xri) { list_del_init(&psb->list); psb->flags &= ~LPFC_SBUF_XBUSY; - psb->status = IOSTAT_SUCCESS; -- if (psb->cur_iocbq.iocb_flag & LPFC_IO_NVME) { -+ if (psb->cur_iocbq.cmd_flag & LPFC_IO_NVME) { +@@ -521,8 +528,15 @@ lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba, qp->abts_nvme_io_bufs--; spin_unlock(&qp->abts_io_buf_list_lock); spin_unlock_irqrestore(&phba->hbalock, iflag); @@ -252974,15 +306336,6 @@ index befdf864c43bd..41313fcaf84a3 100644 spin_lock_irqsave(&psb->buf_lock, iflag); cmd = psb->pCmd; psb->pCmd = NULL; -@@ -557,7 +571,7 @@ lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba, - * for command completion wake up the thread. - */ - spin_lock_irqsave(&psb->buf_lock, iflag); -- psb->cur_iocbq.iocb_flag &= -+ psb->cur_iocbq.cmd_flag &= - ~LPFC_DRIVER_ABORTED; - if (psb->waitq) - wake_up(psb->waitq); @@ -567,25 +581,30 @@ lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba, lpfc_release_scsi_buf_s4(phba, psb); if (rrq_empty) @@ -253014,8 +306367,8 @@ index befdf864c43bd..41313fcaf84a3 100644 + for (i = 1; i <= phba->sli.last_iotag; i++) { + iocbq = phba->sli.iocbq_lookup[i]; -+ if (!(iocbq->cmd_flag & LPFC_IO_FCP) || -+ (iocbq->cmd_flag & LPFC_IO_LIBDFC)) ++ if (!(iocbq->iocb_flag & LPFC_IO_FCP) || ++ (iocbq->iocb_flag & LPFC_IO_LIBDFC)) + continue; + if (iocbq->sli4_xritag != xri) + continue; @@ -253029,321 +306382,7 @@ index befdf864c43bd..41313fcaf84a3 100644 } spin_unlock_irqrestore(&phba->hbalock, iflag); } -@@ -676,7 +695,7 @@ lpfc_get_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp, - /* Setup key fields in buffer that may have been changed - * if other protocols used this buffer. - */ -- lpfc_cmd->cur_iocbq.iocb_flag = LPFC_IO_FCP; -+ lpfc_cmd->cur_iocbq.cmd_flag = LPFC_IO_FCP; - lpfc_cmd->prot_seg_cnt = 0; - lpfc_cmd->seg_cnt = 0; - lpfc_cmd->timeout = 0; -@@ -764,7 +783,7 @@ lpfc_release_scsi_buf_s3(struct lpfc_hba *phba, struct lpfc_io_buf *psb) - - spin_lock_irqsave(&phba->scsi_buf_list_put_lock, iflag); - psb->pCmd = NULL; -- psb->cur_iocbq.iocb_flag = LPFC_IO_FCP; -+ psb->cur_iocbq.cmd_flag = LPFC_IO_FCP; - list_add_tail(&psb->list, &phba->lpfc_scsi_buf_list_put); - spin_unlock_irqrestore(&phba->scsi_buf_list_put_lock, iflag); - } -@@ -912,7 +931,7 @@ lpfc_scsi_prep_dma_buf_s3(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd) - physaddr = sg_dma_address(sgel); - if (phba->sli_rev == 3 && - !(phba->sli3_options & LPFC_SLI3_BG_ENABLED) && -- !(iocbq->iocb_flag & DSS_SECURITY_OP) && -+ !(iocbq->cmd_flag & DSS_SECURITY_OP) && - nseg <= LPFC_EXT_DATA_BDE_COUNT) { - data_bde->tus.f.bdeFlags = BUFF_TYPE_BDE_64; - data_bde->tus.f.bdeSize = sg_dma_len(sgel); -@@ -940,7 +959,7 @@ lpfc_scsi_prep_dma_buf_s3(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd) - */ - if (phba->sli_rev == 3 && - !(phba->sli3_options & LPFC_SLI3_BG_ENABLED) && -- !(iocbq->iocb_flag & DSS_SECURITY_OP)) { -+ !(iocbq->cmd_flag & DSS_SECURITY_OP)) { - if (num_bde > LPFC_EXT_DATA_BDE_COUNT) { - /* - * The extended IOCB format can only fit 3 BDE or a BPL. -@@ -2923,154 +2942,58 @@ out: - * -1 - Internal error (bad profile, ...etc) - */ - static int --lpfc_sli4_parse_bg_err(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd, -- struct lpfc_wcqe_complete *wcqe) -+lpfc_parse_bg_err(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd, -+ struct lpfc_iocbq *pIocbOut) - { - struct scsi_cmnd *cmd = lpfc_cmd->pCmd; -+ struct sli3_bg_fields *bgf; - int ret = 0; -- u32 status = bf_get(lpfc_wcqe_c_status, wcqe); -+ struct lpfc_wcqe_complete *wcqe; -+ u32 status; - u32 bghm = 0; - u32 bgstat = 0; - u64 failing_sector = 0; - -- if (status == CQE_STATUS_DI_ERROR) { -- if (bf_get(lpfc_wcqe_c_bg_ge, wcqe)) /* Guard Check failed */ -- bgstat |= BGS_GUARD_ERR_MASK; -- if (bf_get(lpfc_wcqe_c_bg_ae, wcqe)) /* AppTag Check failed */ -- bgstat |= BGS_APPTAG_ERR_MASK; -- if (bf_get(lpfc_wcqe_c_bg_re, wcqe)) /* RefTag Check failed */ -- bgstat |= BGS_REFTAG_ERR_MASK; -- -- /* Check to see if there was any good data before the error */ -- if (bf_get(lpfc_wcqe_c_bg_tdpv, wcqe)) { -- bgstat |= BGS_HI_WATER_MARK_PRESENT_MASK; -- bghm = wcqe->total_data_placed; -- } -- -- /* -- * Set ALL the error bits to indicate we don't know what -- * type of error it is. -- */ -- if (!bgstat) -- bgstat |= (BGS_REFTAG_ERR_MASK | BGS_APPTAG_ERR_MASK | -- BGS_GUARD_ERR_MASK); -- } -- -- if (lpfc_bgs_get_guard_err(bgstat)) { -- ret = 1; -- -- scsi_build_sense(cmd, 1, ILLEGAL_REQUEST, 0x10, 0x1); -- set_host_byte(cmd, DID_ABORT); -- phba->bg_guard_err_cnt++; -- lpfc_printf_log(phba, KERN_WARNING, LOG_FCP | LOG_BG, -- "9059 BLKGRD: Guard Tag error in cmd" -- " 0x%x lba 0x%llx blk cnt 0x%x " -- "bgstat=x%x bghm=x%x\n", cmd->cmnd[0], -- (unsigned long long)scsi_get_lba(cmd), -- scsi_logical_block_count(cmd), bgstat, bghm); -- } -- -- if (lpfc_bgs_get_reftag_err(bgstat)) { -- ret = 1; -- -- scsi_build_sense(cmd, 1, ILLEGAL_REQUEST, 0x10, 0x3); -- set_host_byte(cmd, DID_ABORT); -- -- phba->bg_reftag_err_cnt++; -- lpfc_printf_log(phba, KERN_WARNING, LOG_FCP | LOG_BG, -- "9060 BLKGRD: Ref Tag error in cmd" -- " 0x%x lba 0x%llx blk cnt 0x%x " -- "bgstat=x%x bghm=x%x\n", cmd->cmnd[0], -- (unsigned long long)scsi_get_lba(cmd), -- scsi_logical_block_count(cmd), bgstat, bghm); -- } -+ if (phba->sli_rev == LPFC_SLI_REV4) { -+ wcqe = &pIocbOut->wcqe_cmpl; -+ status = bf_get(lpfc_wcqe_c_status, wcqe); - -- if (lpfc_bgs_get_apptag_err(bgstat)) { -- ret = 1; -+ if (status == CQE_STATUS_DI_ERROR) { -+ /* Guard Check failed */ -+ if (bf_get(lpfc_wcqe_c_bg_ge, wcqe)) -+ bgstat |= BGS_GUARD_ERR_MASK; - -- scsi_build_sense(cmd, 1, ILLEGAL_REQUEST, 0x10, 0x2); -- set_host_byte(cmd, DID_ABORT); -+ /* AppTag Check failed */ -+ if (bf_get(lpfc_wcqe_c_bg_ae, wcqe)) -+ bgstat |= BGS_APPTAG_ERR_MASK; - -- phba->bg_apptag_err_cnt++; -- lpfc_printf_log(phba, KERN_WARNING, LOG_FCP | LOG_BG, -- "9062 BLKGRD: App Tag error in cmd" -- " 0x%x lba 0x%llx blk cnt 0x%x " -- "bgstat=x%x bghm=x%x\n", cmd->cmnd[0], -- (unsigned long long)scsi_get_lba(cmd), -- scsi_logical_block_count(cmd), bgstat, bghm); -- } -+ /* RefTag Check failed */ -+ if (bf_get(lpfc_wcqe_c_bg_re, wcqe)) -+ bgstat |= BGS_REFTAG_ERR_MASK; - -- if (lpfc_bgs_get_hi_water_mark_present(bgstat)) { -- /* -- * setup sense data descriptor 0 per SPC-4 as an information -- * field, and put the failing LBA in it. -- * This code assumes there was also a guard/app/ref tag error -- * indication. -- */ -- cmd->sense_buffer[7] = 0xc; /* Additional sense length */ -- cmd->sense_buffer[8] = 0; /* Information descriptor type */ -- cmd->sense_buffer[9] = 0xa; /* Additional descriptor length */ -- cmd->sense_buffer[10] = 0x80; /* Validity bit */ -+ /* Check to see if there was any good data before the -+ * error -+ */ -+ if (bf_get(lpfc_wcqe_c_bg_tdpv, wcqe)) { -+ bgstat |= BGS_HI_WATER_MARK_PRESENT_MASK; -+ bghm = wcqe->total_data_placed; -+ } - -- /* bghm is a "on the wire" FC frame based count */ -- switch (scsi_get_prot_op(cmd)) { -- case SCSI_PROT_READ_INSERT: -- case SCSI_PROT_WRITE_STRIP: -- bghm /= cmd->device->sector_size; -- break; -- case SCSI_PROT_READ_STRIP: -- case SCSI_PROT_WRITE_INSERT: -- case SCSI_PROT_READ_PASS: -- case SCSI_PROT_WRITE_PASS: -- bghm /= (cmd->device->sector_size + -- sizeof(struct scsi_dif_tuple)); -- break; -+ /* -+ * Set ALL the error bits to indicate we don't know what -+ * type of error it is. -+ */ -+ if (!bgstat) -+ bgstat |= (BGS_REFTAG_ERR_MASK | -+ BGS_APPTAG_ERR_MASK | -+ BGS_GUARD_ERR_MASK); - } - -- failing_sector = scsi_get_lba(cmd); -- failing_sector += bghm; -- -- /* Descriptor Information */ -- put_unaligned_be64(failing_sector, &cmd->sense_buffer[12]); -- } -- -- if (!ret) { -- /* No error was reported - problem in FW? */ -- lpfc_printf_log(phba, KERN_WARNING, LOG_FCP | LOG_BG, -- "9068 BLKGRD: Unknown error in cmd" -- " 0x%x lba 0x%llx blk cnt 0x%x " -- "bgstat=x%x bghm=x%x\n", cmd->cmnd[0], -- (unsigned long long)scsi_get_lba(cmd), -- scsi_logical_block_count(cmd), bgstat, bghm); -- -- /* Calculate what type of error it was */ -- lpfc_calc_bg_err(phba, lpfc_cmd); -+ } else { -+ bgf = &pIocbOut->iocb.unsli3.sli3_bg; -+ bghm = bgf->bghm; -+ bgstat = bgf->bgstat; - } -- return ret; --} -- --/* -- * This function checks for BlockGuard errors detected by -- * the HBA. In case of errors, the ASC/ASCQ fields in the -- * sense buffer will be set accordingly, paired with -- * ILLEGAL_REQUEST to signal to the kernel that the HBA -- * detected corruption. -- * -- * Returns: -- * 0 - No error found -- * 1 - BlockGuard error found -- * -1 - Internal error (bad profile, ...etc) -- */ --static int --lpfc_parse_bg_err(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd, -- struct lpfc_iocbq *pIocbOut) --{ -- struct scsi_cmnd *cmd = lpfc_cmd->pCmd; -- struct sli3_bg_fields *bgf = &pIocbOut->iocb.unsli3.sli3_bg; -- int ret = 0; -- uint32_t bghm = bgf->bghm; -- uint32_t bgstat = bgf->bgstat; -- uint64_t failing_sector = 0; - - if (lpfc_bgs_get_invalid_prof(bgstat)) { - cmd->result = DID_ERROR << 16; -@@ -3098,7 +3021,6 @@ lpfc_parse_bg_err(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd, - - if (lpfc_bgs_get_guard_err(bgstat)) { - ret = 1; -- - scsi_build_sense(cmd, 1, ILLEGAL_REQUEST, 0x10, 0x1); - set_host_byte(cmd, DID_ABORT); - phba->bg_guard_err_cnt++; -@@ -3112,10 +3034,8 @@ lpfc_parse_bg_err(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd, - - if (lpfc_bgs_get_reftag_err(bgstat)) { - ret = 1; -- - scsi_build_sense(cmd, 1, ILLEGAL_REQUEST, 0x10, 0x3); - set_host_byte(cmd, DID_ABORT); -- - phba->bg_reftag_err_cnt++; - lpfc_printf_log(phba, KERN_WARNING, LOG_FCP | LOG_BG, - "9056 BLKGRD: Ref Tag error in cmd " -@@ -3127,10 +3047,8 @@ lpfc_parse_bg_err(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd, - - if (lpfc_bgs_get_apptag_err(bgstat)) { - ret = 1; -- - scsi_build_sense(cmd, 1, ILLEGAL_REQUEST, 0x10, 0x2); - set_host_byte(cmd, DID_ABORT); -- - phba->bg_apptag_err_cnt++; - lpfc_printf_log(phba, KERN_WARNING, LOG_FCP | LOG_BG, - "9061 BLKGRD: App Tag error in cmd " -@@ -3415,7 +3333,7 @@ lpfc_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd) - */ - if ((phba->cfg_fof) && ((struct lpfc_device_data *) - scsi_cmnd->device->hostdata)->oas_enabled) { -- lpfc_cmd->cur_iocbq.iocb_flag |= (LPFC_IO_OAS | LPFC_IO_FOF); -+ lpfc_cmd->cur_iocbq.cmd_flag |= (LPFC_IO_OAS | LPFC_IO_FOF); - lpfc_cmd->cur_iocbq.priority = ((struct lpfc_device_data *) - scsi_cmnd->device->hostdata)->priority; - -@@ -3572,15 +3490,15 @@ lpfc_bg_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, - switch (scsi_get_prot_op(scsi_cmnd)) { - case SCSI_PROT_WRITE_STRIP: - case SCSI_PROT_READ_STRIP: -- lpfc_cmd->cur_iocbq.iocb_flag |= LPFC_IO_DIF_STRIP; -+ lpfc_cmd->cur_iocbq.cmd_flag |= LPFC_IO_DIF_STRIP; - break; - case SCSI_PROT_WRITE_INSERT: - case SCSI_PROT_READ_INSERT: -- lpfc_cmd->cur_iocbq.iocb_flag |= LPFC_IO_DIF_INSERT; -+ lpfc_cmd->cur_iocbq.cmd_flag |= LPFC_IO_DIF_INSERT; - break; - case SCSI_PROT_WRITE_PASS: - case SCSI_PROT_READ_PASS: -- lpfc_cmd->cur_iocbq.iocb_flag |= LPFC_IO_DIF_PASS; -+ lpfc_cmd->cur_iocbq.cmd_flag |= LPFC_IO_DIF_PASS; - break; - } - -@@ -3611,7 +3529,7 @@ lpfc_bg_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, - */ - if ((phba->cfg_fof) && ((struct lpfc_device_data *) - scsi_cmnd->device->hostdata)->oas_enabled) { -- lpfc_cmd->cur_iocbq.iocb_flag |= (LPFC_IO_OAS | LPFC_IO_FOF); -+ lpfc_cmd->cur_iocbq.cmd_flag |= (LPFC_IO_OAS | LPFC_IO_FOF); - - /* Word 10 */ - bf_set(wqe_oas, &wqe->generic.wqe_com, 1); -@@ -3621,14 +3539,14 @@ lpfc_bg_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, - } - - /* Word 7. DIF Flags */ -- if (lpfc_cmd->cur_iocbq.iocb_flag & LPFC_IO_DIF_PASS) -+ if (lpfc_cmd->cur_iocbq.cmd_flag & LPFC_IO_DIF_PASS) - bf_set(wqe_dif, &wqe->generic.wqe_com, LPFC_WQE_DIF_PASSTHRU); -- else if (lpfc_cmd->cur_iocbq.iocb_flag & LPFC_IO_DIF_STRIP) -+ else if (lpfc_cmd->cur_iocbq.cmd_flag & LPFC_IO_DIF_STRIP) - bf_set(wqe_dif, &wqe->generic.wqe_com, LPFC_WQE_DIF_STRIP); -- else if (lpfc_cmd->cur_iocbq.iocb_flag & LPFC_IO_DIF_INSERT) -+ else if (lpfc_cmd->cur_iocbq.cmd_flag & LPFC_IO_DIF_INSERT) - bf_set(wqe_dif, &wqe->generic.wqe_com, LPFC_WQE_DIF_INSERT); - -- lpfc_cmd->cur_iocbq.iocb_flag &= ~(LPFC_IO_DIF_PASS | -+ lpfc_cmd->cur_iocbq.cmd_flag &= ~(LPFC_IO_DIF_PASS | - LPFC_IO_DIF_STRIP | LPFC_IO_DIF_INSERT); - - return 0; -@@ -3917,7 +3835,7 @@ lpfc_update_cmf_cmpl(struct lpfc_hba *phba, +@@ -3917,7 +3936,7 @@ lpfc_update_cmf_cmpl(struct lpfc_hba *phba, else time = div_u64(time + 500, 1000); /* round it */ @@ -253352,7 +306391,7 @@ index befdf864c43bd..41313fcaf84a3 100644 atomic64_add(size, &cgs->rcv_bytes); atomic64_add(time, &cgs->rx_latency); atomic_inc(&cgs->rx_io_cnt); -@@ -3960,7 +3878,7 @@ lpfc_update_cmf_cmd(struct lpfc_hba *phba, uint32_t size) +@@ -3960,7 +3979,7 @@ lpfc_update_cmf_cmd(struct lpfc_hba *phba, uint32_t size) atomic_set(&phba->rx_max_read_cnt, size); } @@ -253361,70 +306400,7 @@ index befdf864c43bd..41313fcaf84a3 100644 atomic64_add(size, &cgs->total_bytes); return 0; } -@@ -4153,7 +4071,7 @@ lpfc_handle_fcp_err(struct lpfc_vport *vport, struct lpfc_io_buf *lpfc_cmd, - * lpfc_fcp_io_cmd_wqe_cmpl - Complete a FCP IO - * @phba: The hba for which this call is being executed. - * @pwqeIn: The command WQE for the scsi cmnd. -- * @wcqe: Pointer to driver response CQE object. -+ * @pwqeOut: Pointer to driver response WQE object. - * - * This routine assigns scsi command result by looking into response WQE - * status field appropriately. This routine handles QUEUE FULL condition as -@@ -4161,10 +4079,11 @@ lpfc_handle_fcp_err(struct lpfc_vport *vport, struct lpfc_io_buf *lpfc_cmd, - **/ - static void - lpfc_fcp_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn, -- struct lpfc_wcqe_complete *wcqe) -+ struct lpfc_iocbq *pwqeOut) - { - struct lpfc_io_buf *lpfc_cmd = - (struct lpfc_io_buf *)pwqeIn->context1; -+ struct lpfc_wcqe_complete *wcqe = &pwqeOut->wcqe_cmpl; - struct lpfc_vport *vport = pwqeIn->vport; - struct lpfc_rport_data *rdata; - struct lpfc_nodelist *ndlp; -@@ -4174,7 +4093,6 @@ lpfc_fcp_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn, - struct Scsi_Host *shost; - u32 logit = LOG_FCP; - u32 status, idx; -- unsigned long iflags = 0; - u32 lat; - u8 wait_xb_clr = 0; - -@@ -4189,30 +4107,16 @@ lpfc_fcp_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn, - rdata = lpfc_cmd->rdata; - ndlp = rdata->pnode; - -- if (bf_get(lpfc_wcqe_c_xb, wcqe)) { -- /* TOREMOVE - currently this flag is checked during -- * the release of lpfc_iocbq. Remove once we move -- * to lpfc_wqe_job construct. -- * -- * This needs to be done outside buf_lock -- */ -- spin_lock_irqsave(&phba->hbalock, iflags); -- lpfc_cmd->cur_iocbq.iocb_flag |= LPFC_EXCHANGE_BUSY; -- spin_unlock_irqrestore(&phba->hbalock, iflags); -- } -- -- /* Guard against abort handler being called at same time */ -- spin_lock(&lpfc_cmd->buf_lock); -- - /* Sanity check on return of outstanding command */ - cmd = lpfc_cmd->pCmd; - if (!cmd) { - lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, - "9042 I/O completion: Not an active IO\n"); -- spin_unlock(&lpfc_cmd->buf_lock); - lpfc_release_scsi_buf(phba, lpfc_cmd); - return; - } -+ /* Guard against abort handler being called at same time */ -+ spin_lock(&lpfc_cmd->buf_lock); - idx = lpfc_cmd->cur_iocbq.hba_wqidx; - if (phba->sli4_hba.hdwq) - phba->sli4_hba.hdwq[idx].scsi_cstat.io_cmpls++; -@@ -4374,7 +4278,7 @@ lpfc_fcp_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn, +@@ -4374,7 +4393,7 @@ lpfc_fcp_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn, lpfc_cmd->result == IOERR_NO_RESOURCES || lpfc_cmd->result == IOERR_ABORT_REQUESTED || lpfc_cmd->result == IOERR_SLER_CMD_RCV_FAILURE) { @@ -253433,44 +306409,7 @@ index befdf864c43bd..41313fcaf84a3 100644 break; } if ((lpfc_cmd->result == IOERR_RX_DMA_FAILED || -@@ -4386,12 +4290,14 @@ lpfc_fcp_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn, - * This is a response for a BG enabled - * cmd. Parse BG error - */ -- lpfc_sli4_parse_bg_err(phba, lpfc_cmd, -- wcqe); -+ lpfc_parse_bg_err(phba, lpfc_cmd, pwqeOut); - break; -+ } else { -+ lpfc_printf_vlog(vport, KERN_WARNING, -+ LOG_BG, -+ "9040 non-zero BGSTAT " -+ "on unprotected cmd\n"); - } -- lpfc_printf_vlog(vport, KERN_WARNING, LOG_BG, -- "9040 non-zero BGSTAT on unprotected cmd\n"); - } - lpfc_printf_vlog(vport, KERN_WARNING, logit, - "9036 Local Reject FCP cmd x%x failed" -@@ -4488,7 +4394,7 @@ lpfc_fcp_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn, - * wake up the thread. - */ - spin_lock(&lpfc_cmd->buf_lock); -- lpfc_cmd->cur_iocbq.iocb_flag &= ~LPFC_DRIVER_ABORTED; -+ lpfc_cmd->cur_iocbq.cmd_flag &= ~LPFC_DRIVER_ABORTED; - if (lpfc_cmd->waitq) - wake_up(lpfc_cmd->waitq); - spin_unlock(&lpfc_cmd->buf_lock); -@@ -4548,7 +4454,7 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn, - lpfc_cmd->status = pIocbOut->iocb.ulpStatus; - /* pick up SLI4 exchange busy status from HBA */ - lpfc_cmd->flags &= ~LPFC_SBUF_XBUSY; -- if (pIocbOut->iocb_flag & LPFC_EXCHANGE_BUSY) -+ if (pIocbOut->cmd_flag & LPFC_EXCHANGE_BUSY) - lpfc_cmd->flags |= LPFC_SBUF_XBUSY; - - #ifdef CONFIG_SCSI_LPFC_DEBUG_FS -@@ -4661,7 +4567,7 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn, +@@ -4661,7 +4680,7 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn, lpfc_cmd->result == IOERR_NO_RESOURCES || lpfc_cmd->result == IOERR_ABORT_REQUESTED || lpfc_cmd->result == IOERR_SLER_CMD_RCV_FAILURE) { @@ -253479,217 +306418,7 @@ index befdf864c43bd..41313fcaf84a3 100644 break; } if ((lpfc_cmd->result == IOERR_RX_DMA_FAILED || -@@ -4757,7 +4663,7 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn, - * wake up the thread. - */ - spin_lock(&lpfc_cmd->buf_lock); -- lpfc_cmd->cur_iocbq.iocb_flag &= ~LPFC_DRIVER_ABORTED; -+ lpfc_cmd->cur_iocbq.cmd_flag &= ~LPFC_DRIVER_ABORTED; - if (lpfc_cmd->waitq) - wake_up(lpfc_cmd->waitq); - spin_unlock(&lpfc_cmd->buf_lock); -@@ -4835,8 +4741,8 @@ static int lpfc_scsi_prep_cmnd_buf_s3(struct lpfc_vport *vport, - - piocbq->iocb.ulpClass = (pnode->nlp_fcp_info & 0x0f); - piocbq->context1 = lpfc_cmd; -- if (!piocbq->iocb_cmpl) -- piocbq->iocb_cmpl = lpfc_scsi_cmd_iocb_cmpl; -+ if (!piocbq->cmd_cmpl) -+ piocbq->cmd_cmpl = lpfc_scsi_cmd_iocb_cmpl; - piocbq->iocb.ulpTimeout = tmo; - piocbq->vport = vport; - return 0; -@@ -4949,7 +4855,7 @@ static int lpfc_scsi_prep_cmnd_buf_s4(struct lpfc_vport *vport, - pwqeq->vport = vport; - pwqeq->context1 = lpfc_cmd; - pwqeq->hba_wqidx = lpfc_cmd->hdwq_no; -- pwqeq->wqe_cmpl = lpfc_fcp_io_cmd_wqe_cmpl; -+ pwqeq->cmd_cmpl = lpfc_fcp_io_cmd_wqe_cmpl; - - return 0; - } -@@ -4996,7 +4902,7 @@ lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_io_buf *lpfc_cmd, - } - - /** -- * lpfc_scsi_prep_task_mgmt_cmd - Convert SLI3 scsi TM cmd to FCP info unit -+ * lpfc_scsi_prep_task_mgmt_cmd_s3 - Convert SLI3 scsi TM cmd to FCP info unit - * @vport: The virtual port for which this call is being executed. - * @lpfc_cmd: Pointer to lpfc_io_buf data structure. - * @lun: Logical unit number. -@@ -5010,10 +4916,9 @@ lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_io_buf *lpfc_cmd, - * 1 - Success - **/ - static int --lpfc_scsi_prep_task_mgmt_cmd(struct lpfc_vport *vport, -- struct lpfc_io_buf *lpfc_cmd, -- uint64_t lun, -- uint8_t task_mgmt_cmd) -+lpfc_scsi_prep_task_mgmt_cmd_s3(struct lpfc_vport *vport, -+ struct lpfc_io_buf *lpfc_cmd, -+ u64 lun, u8 task_mgmt_cmd) - { - struct lpfc_iocbq *piocbq; - IOCB_t *piocb; -@@ -5034,15 +4939,10 @@ lpfc_scsi_prep_task_mgmt_cmd(struct lpfc_vport *vport, - memset(fcp_cmnd, 0, sizeof(struct fcp_cmnd)); - int_to_scsilun(lun, &fcp_cmnd->fcp_lun); - fcp_cmnd->fcpCntl2 = task_mgmt_cmd; -- if (vport->phba->sli_rev == 3 && -- !(vport->phba->sli3_options & LPFC_SLI3_BG_ENABLED)) -+ if (!(vport->phba->sli3_options & LPFC_SLI3_BG_ENABLED)) - lpfc_fcpcmd_to_iocb(piocb->unsli3.fcp_ext.icd, fcp_cmnd); - piocb->ulpCommand = CMD_FCP_ICMND64_CR; - piocb->ulpContext = ndlp->nlp_rpi; -- if (vport->phba->sli_rev == LPFC_SLI_REV4) { -- piocb->ulpContext = -- vport->phba->sli4_hba.rpi_ids[ndlp->nlp_rpi]; -- } - piocb->ulpFCP2Rcvy = (ndlp->nlp_fcp_info & NLP_FCP_2_DEVICE) ? 1 : 0; - piocb->ulpClass = (ndlp->nlp_fcp_info & 0x0f); - piocb->ulpPU = 0; -@@ -5058,8 +4958,79 @@ lpfc_scsi_prep_task_mgmt_cmd(struct lpfc_vport *vport, - } else - piocb->ulpTimeout = lpfc_cmd->timeout; - -- if (vport->phba->sli_rev == LPFC_SLI_REV4) -- lpfc_sli4_set_rsp_sgl_last(vport->phba, lpfc_cmd); -+ return 1; -+} -+ -+/** -+ * lpfc_scsi_prep_task_mgmt_cmd_s4 - Convert SLI4 scsi TM cmd to FCP info unit -+ * @vport: The virtual port for which this call is being executed. -+ * @lpfc_cmd: Pointer to lpfc_io_buf data structure. -+ * @lun: Logical unit number. -+ * @task_mgmt_cmd: SCSI task management command. -+ * -+ * This routine creates FCP information unit corresponding to @task_mgmt_cmd -+ * for device with SLI-4 interface spec. -+ * -+ * Return codes: -+ * 0 - Error -+ * 1 - Success -+ **/ -+static int -+lpfc_scsi_prep_task_mgmt_cmd_s4(struct lpfc_vport *vport, -+ struct lpfc_io_buf *lpfc_cmd, -+ u64 lun, u8 task_mgmt_cmd) -+{ -+ struct lpfc_iocbq *pwqeq = &lpfc_cmd->cur_iocbq; -+ union lpfc_wqe128 *wqe = &pwqeq->wqe; -+ struct fcp_cmnd *fcp_cmnd; -+ struct lpfc_rport_data *rdata = lpfc_cmd->rdata; -+ struct lpfc_nodelist *ndlp = rdata->pnode; -+ -+ if (!ndlp || ndlp->nlp_state != NLP_STE_MAPPED_NODE) -+ return 0; -+ -+ pwqeq->vport = vport; -+ /* Initialize 64 bytes only */ -+ memset(wqe, 0, sizeof(union lpfc_wqe128)); -+ -+ /* From the icmnd template, initialize words 4 - 11 */ -+ memcpy(&wqe->words[4], &lpfc_icmnd_cmd_template.words[4], -+ sizeof(uint32_t) * 8); -+ -+ fcp_cmnd = lpfc_cmd->fcp_cmnd; -+ /* Clear out any old data in the FCP command area */ -+ memset(fcp_cmnd, 0, sizeof(struct fcp_cmnd)); -+ int_to_scsilun(lun, &fcp_cmnd->fcp_lun); -+ fcp_cmnd->fcpCntl3 = 0; -+ fcp_cmnd->fcpCntl2 = task_mgmt_cmd; -+ -+ bf_set(payload_offset_len, &wqe->fcp_icmd, -+ sizeof(struct fcp_cmnd) + sizeof(struct fcp_rsp)); -+ bf_set(cmd_buff_len, &wqe->fcp_icmd, 0); -+ bf_set(wqe_ctxt_tag, &wqe->generic.wqe_com, /* ulpContext */ -+ vport->phba->sli4_hba.rpi_ids[ndlp->nlp_rpi]); -+ bf_set(wqe_erp, &wqe->fcp_icmd.wqe_com, -+ ((ndlp->nlp_fcp_info & NLP_FCP_2_DEVICE) ? 1 : 0)); -+ bf_set(wqe_class, &wqe->fcp_icmd.wqe_com, -+ (ndlp->nlp_fcp_info & 0x0f)); -+ -+ /* ulpTimeout is only one byte */ -+ if (lpfc_cmd->timeout > 0xff) { -+ /* -+ * Do not timeout the command at the firmware level. -+ * The driver will provide the timeout mechanism. -+ */ -+ bf_set(wqe_tmo, &wqe->fcp_icmd.wqe_com, 0); -+ } else { -+ bf_set(wqe_tmo, &wqe->fcp_icmd.wqe_com, lpfc_cmd->timeout); -+ } -+ -+ lpfc_prep_embed_io(vport->phba, lpfc_cmd); -+ bf_set(wqe_xri_tag, &wqe->generic.wqe_com, pwqeq->sli4_xritag); -+ wqe->generic.wqe_com.abort_tag = pwqeq->iotag; -+ bf_set(wqe_reqtag, &wqe->generic.wqe_com, pwqeq->iotag); -+ -+ lpfc_sli4_set_rsp_sgl_last(vport->phba, lpfc_cmd); - - return 1; - } -@@ -5086,6 +5057,8 @@ lpfc_scsi_api_table_setup(struct lpfc_hba *phba, uint8_t dev_grp) - phba->lpfc_release_scsi_buf = lpfc_release_scsi_buf_s3; - phba->lpfc_get_scsi_buf = lpfc_get_scsi_buf_s3; - phba->lpfc_scsi_prep_cmnd_buf = lpfc_scsi_prep_cmnd_buf_s3; -+ phba->lpfc_scsi_prep_task_mgmt_cmd = -+ lpfc_scsi_prep_task_mgmt_cmd_s3; - break; - case LPFC_PCI_DEV_OC: - phba->lpfc_scsi_prep_dma_buf = lpfc_scsi_prep_dma_buf_s4; -@@ -5093,6 +5066,8 @@ lpfc_scsi_api_table_setup(struct lpfc_hba *phba, uint8_t dev_grp) - phba->lpfc_release_scsi_buf = lpfc_release_scsi_buf_s4; - phba->lpfc_get_scsi_buf = lpfc_get_scsi_buf_s4; - phba->lpfc_scsi_prep_cmnd_buf = lpfc_scsi_prep_cmnd_buf_s4; -+ phba->lpfc_scsi_prep_task_mgmt_cmd = -+ lpfc_scsi_prep_task_mgmt_cmd_s4; - break; - default: - lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, -@@ -5571,6 +5546,7 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd) - { - struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; - struct lpfc_hba *phba = vport->phba; -+ struct lpfc_iocbq *cur_iocbq = NULL; - struct lpfc_rport_data *rdata; - struct lpfc_nodelist *ndlp; - struct lpfc_io_buf *lpfc_cmd; -@@ -5664,6 +5640,7 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd) - } - lpfc_cmd->rx_cmd_start = start; - -+ cur_iocbq = &lpfc_cmd->cur_iocbq; - /* - * Store the midlayer's command structure for the completion phase - * and complete the command initialization. -@@ -5671,7 +5648,7 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd) - lpfc_cmd->pCmd = cmnd; - lpfc_cmd->rdata = rdata; - lpfc_cmd->ndlp = ndlp; -- lpfc_cmd->cur_iocbq.iocb_cmpl = NULL; -+ cur_iocbq->cmd_cmpl = NULL; - cmnd->host_scribble = (unsigned char *)lpfc_cmd; - - err = lpfc_scsi_prep_cmnd(vport, lpfc_cmd, ndlp); -@@ -5713,7 +5690,6 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd) - goto out_host_busy_free_buf; - } - -- - /* check the necessary and sufficient condition to support VMID */ - if (lpfc_is_vmid_enabled(phba) && - (ndlp->vmid_support || -@@ -5726,20 +5702,18 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd) - if (uuid) { - err = lpfc_vmid_get_appid(vport, uuid, cmnd, - (union lpfc_vmid_io_tag *) -- &lpfc_cmd->cur_iocbq.vmid_tag); -+ &cur_iocbq->vmid_tag); - if (!err) -- lpfc_cmd->cur_iocbq.iocb_flag |= LPFC_IO_VMID; -+ cur_iocbq->cmd_flag |= LPFC_IO_VMID; +@@ -5732,7 +5751,6 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd) } } @@ -253697,60 +306426,7 @@ index befdf864c43bd..41313fcaf84a3 100644 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS if (unlikely(phba->hdwqstat_on & LPFC_CHECK_SCSI_IO)) this_cpu_inc(phba->sli4_hba.c_stat->xmt_io); - #endif - /* Issue I/O to adapter */ -- err = lpfc_sli_issue_fcp_io(phba, LPFC_FCP_RING, -- &lpfc_cmd->cur_iocbq, -+ err = lpfc_sli_issue_fcp_io(phba, LPFC_FCP_RING, cur_iocbq, - SLI_IOCB_RET_IOCB); - #ifdef CONFIG_SCSI_LPFC_DEBUG_FS - if (start) { -@@ -5752,25 +5726,25 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd) - #endif - if (err) { - lpfc_printf_vlog(vport, KERN_INFO, LOG_FCP, -- "3376 FCP could not issue IOCB err %x " -- "FCP cmd x%x <%d/%llu> " -- "sid: x%x did: x%x oxid: x%x " -- "Data: x%x x%x x%x x%x\n", -- err, cmnd->cmnd[0], -- cmnd->device ? cmnd->device->id : 0xffff, -- cmnd->device ? cmnd->device->lun : (u64)-1, -- vport->fc_myDID, ndlp->nlp_DID, -- phba->sli_rev == LPFC_SLI_REV4 ? -- lpfc_cmd->cur_iocbq.sli4_xritag : 0xffff, -- phba->sli_rev == LPFC_SLI_REV4 ? -- phba->sli4_hba.rpi_ids[ndlp->nlp_rpi] : -- lpfc_cmd->cur_iocbq.iocb.ulpContext, -- lpfc_cmd->cur_iocbq.iotag, -- phba->sli_rev == LPFC_SLI_REV4 ? -- bf_get(wqe_tmo, -- &lpfc_cmd->cur_iocbq.wqe.generic.wqe_com) : -- lpfc_cmd->cur_iocbq.iocb.ulpTimeout, -- (uint32_t)(scsi_cmd_to_rq(cmnd)->timeout / 1000)); -+ "3376 FCP could not issue iocb err %x " -+ "FCP cmd x%x <%d/%llu> " -+ "sid: x%x did: x%x oxid: x%x " -+ "Data: x%x x%x x%x x%x\n", -+ err, cmnd->cmnd[0], -+ cmnd->device ? cmnd->device->id : 0xffff, -+ cmnd->device ? cmnd->device->lun : (u64)-1, -+ vport->fc_myDID, ndlp->nlp_DID, -+ phba->sli_rev == LPFC_SLI_REV4 ? -+ cur_iocbq->sli4_xritag : 0xffff, -+ phba->sli_rev == LPFC_SLI_REV4 ? -+ phba->sli4_hba.rpi_ids[ndlp->nlp_rpi] : -+ cur_iocbq->iocb.ulpContext, -+ cur_iocbq->iotag, -+ phba->sli_rev == LPFC_SLI_REV4 ? -+ bf_get(wqe_tmo, -+ &cur_iocbq->wqe.generic.wqe_com) : -+ cur_iocbq->iocb.ulpTimeout, -+ (uint32_t)(scsi_cmd_to_rq(cmnd)->timeout / 1000)); - - goto out_host_busy_free_buf; - } -@@ -5885,25 +5859,25 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd) +@@ -5885,25 +5903,25 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd) if (!lpfc_cmd) return ret; @@ -253782,7 +306458,7 @@ index befdf864c43bd..41313fcaf84a3 100644 } iocb = &lpfc_cmd->cur_iocbq; -@@ -5911,12 +5885,12 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd) +@@ -5911,7 +5929,7 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd) pring_s4 = phba->sli4_hba.hdwq[iocb->hba_wqidx].io_wq->pring; if (!pring_s4) { ret = FAILED; @@ -253791,19 +306467,7 @@ index befdf864c43bd..41313fcaf84a3 100644 } spin_lock(&pring_s4->ring_lock); } - /* the command is in process of being cancelled */ -- if (!(iocb->iocb_flag & LPFC_IO_ON_TXCMPLQ)) { -+ if (!(iocb->cmd_flag & LPFC_IO_ON_TXCMPLQ)) { - lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP, - "3169 SCSI Layer abort requested I/O has been " - "cancelled by LLD.\n"); -@@ -5939,13 +5913,13 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd) - BUG_ON(iocb->context1 != lpfc_cmd); - - /* abort issued in recovery is still in progress */ -- if (iocb->iocb_flag & LPFC_DRIVER_ABORTED) { -+ if (iocb->cmd_flag & LPFC_DRIVER_ABORTED) { - lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP, +@@ -5944,8 +5962,8 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd) "3389 SCSI Layer I/O Abort Request is pending\n"); if (phba->sli_rev == LPFC_SLI_REV4) spin_unlock(&pring_s4->ring_lock); @@ -253814,7 +306478,7 @@ index befdf864c43bd..41313fcaf84a3 100644 goto wait_for_cmpl; } -@@ -5966,15 +5940,13 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd) +@@ -5966,15 +5984,13 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd) if (ret_val != IOCB_SUCCESS) { /* Indicate the IO is not being aborted by the driver. */ lpfc_cmd->waitq = NULL; @@ -253833,16 +306497,7 @@ index befdf864c43bd..41313fcaf84a3 100644 if (phba->cfg_poll & DISABLE_FCP_RING_INT) lpfc_sli_handle_fast_ring_event(phba, -@@ -5982,7 +5954,7 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd) - - wait_for_cmpl: - /* -- * iocb_flag is set to LPFC_DRIVER_ABORTED before we wait -+ * cmd_flag is set to LPFC_DRIVER_ABORTED before we wait - * for abort to complete. - */ - wait_event_timeout(waitq, -@@ -6009,10 +5981,9 @@ wait_for_cmpl: +@@ -6009,10 +6025,9 @@ wait_for_cmpl: out_unlock_ring: if (phba->sli_rev == LPFC_SLI_REV4) spin_unlock(&pring_s4->ring_lock); @@ -253856,88 +306511,7 @@ index befdf864c43bd..41313fcaf84a3 100644 out: lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP, "0749 SCSI Layer I/O Abort Request Status x%x ID %d " -@@ -6150,7 +6121,7 @@ lpfc_send_taskmgmt(struct lpfc_vport *vport, struct scsi_cmnd *cmnd, - return FAILED; - pnode = rdata->pnode; - -- lpfc_cmd = lpfc_get_scsi_buf(phba, pnode, NULL); -+ lpfc_cmd = lpfc_get_scsi_buf(phba, rdata->pnode, NULL); - if (lpfc_cmd == NULL) - return FAILED; - lpfc_cmd->timeout = phba->cfg_task_mgmt_tmo; -@@ -6158,8 +6129,8 @@ lpfc_send_taskmgmt(struct lpfc_vport *vport, struct scsi_cmnd *cmnd, - lpfc_cmd->pCmd = cmnd; - lpfc_cmd->ndlp = pnode; - -- status = lpfc_scsi_prep_task_mgmt_cmd(vport, lpfc_cmd, lun_id, -- task_mgmt_cmd); -+ status = phba->lpfc_scsi_prep_task_mgmt_cmd(vport, lpfc_cmd, lun_id, -+ task_mgmt_cmd); - if (!status) { - lpfc_release_scsi_buf(phba, lpfc_cmd); - return FAILED; -@@ -6171,38 +6142,41 @@ lpfc_send_taskmgmt(struct lpfc_vport *vport, struct scsi_cmnd *cmnd, - lpfc_release_scsi_buf(phba, lpfc_cmd); - return FAILED; - } -- iocbq->iocb_cmpl = lpfc_tskmgmt_def_cmpl; -+ iocbq->cmd_cmpl = lpfc_tskmgmt_def_cmpl; -+ iocbq->vport = vport; - - lpfc_printf_vlog(vport, KERN_INFO, LOG_FCP, - "0702 Issue %s to TGT %d LUN %llu " - "rpi x%x nlp_flag x%x Data: x%x x%x\n", - lpfc_taskmgmt_name(task_mgmt_cmd), tgt_id, lun_id, - pnode->nlp_rpi, pnode->nlp_flag, iocbq->sli4_xritag, -- iocbq->iocb_flag); -+ iocbq->cmd_flag); - - status = lpfc_sli_issue_iocb_wait(phba, LPFC_FCP_RING, - iocbq, iocbqrsp, lpfc_cmd->timeout); - if ((status != IOCB_SUCCESS) || -- (iocbqrsp->iocb.ulpStatus != IOSTAT_SUCCESS)) { -+ (get_job_ulpstatus(phba, iocbqrsp) != IOSTAT_SUCCESS)) { - if (status != IOCB_SUCCESS || -- iocbqrsp->iocb.ulpStatus != IOSTAT_FCP_RSP_ERROR) -+ get_job_ulpstatus(phba, iocbqrsp) != IOSTAT_FCP_RSP_ERROR) - lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, - "0727 TMF %s to TGT %d LUN %llu " -- "failed (%d, %d) iocb_flag x%x\n", -+ "failed (%d, %d) cmd_flag x%x\n", - lpfc_taskmgmt_name(task_mgmt_cmd), - tgt_id, lun_id, -- iocbqrsp->iocb.ulpStatus, -- iocbqrsp->iocb.un.ulpWord[4], -- iocbq->iocb_flag); -+ get_job_ulpstatus(phba, iocbqrsp), -+ get_job_word4(phba, iocbqrsp), -+ iocbq->cmd_flag); - /* if ulpStatus != IOCB_SUCCESS, then status == IOCB_SUCCESS */ - if (status == IOCB_SUCCESS) { -- if (iocbqrsp->iocb.ulpStatus == IOSTAT_FCP_RSP_ERROR) -+ if (get_job_ulpstatus(phba, iocbqrsp) == -+ IOSTAT_FCP_RSP_ERROR) - /* Something in the FCP_RSP was invalid. - * Check conditions */ - ret = lpfc_check_fcp_rsp(vport, lpfc_cmd); - else - ret = FAILED; -- } else if (status == IOCB_TIMEDOUT) { -+ } else if ((status == IOCB_TIMEDOUT) || -+ (status == IOCB_ABORTED)) { - ret = TIMEOUT_ERROR; - } else { - ret = FAILED; -@@ -6212,7 +6186,7 @@ lpfc_send_taskmgmt(struct lpfc_vport *vport, struct scsi_cmnd *cmnd, - - lpfc_sli_release_iocbq(phba, iocbqrsp); - -- if (ret != TIMEOUT_ERROR) -+ if (status != IOCB_TIMEDOUT) - lpfc_release_scsi_buf(phba, lpfc_cmd); - - return ret; -@@ -6455,28 +6429,28 @@ lpfc_target_reset_handler(struct scsi_cmnd *cmnd) +@@ -6455,28 +6470,28 @@ lpfc_target_reset_handler(struct scsi_cmnd *cmnd) /* Issue LOGO, if no LOGO is outstanding */ spin_lock_irqsave(&pnode->lock, flags); @@ -253971,7 +306545,7 @@ index befdf864c43bd..41313fcaf84a3 100644 } else { spin_lock_irqsave(&pnode->lock, flags); } -@@ -6628,6 +6602,13 @@ lpfc_host_reset_handler(struct scsi_cmnd *cmnd) +@@ -6628,6 +6643,13 @@ lpfc_host_reset_handler(struct scsi_cmnd *cmnd) if (rc) goto error; @@ -253986,222 +306560,22 @@ index befdf864c43bd..41313fcaf84a3 100644 if (rc) goto error; diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c -index 026a1196a54d5..f594a006d04c6 100644 +index 026a1196a54d5..df3b190fccd16 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c -@@ -70,8 +70,9 @@ static int lpfc_sli_issue_mbox_s4(struct lpfc_hba *, LPFC_MBOXQ_t *, - uint32_t); - static int lpfc_sli4_read_rev(struct lpfc_hba *, LPFC_MBOXQ_t *, - uint8_t *, uint32_t *); --static struct lpfc_iocbq *lpfc_sli4_els_wcqe_to_rspiocbq(struct lpfc_hba *, -- struct lpfc_iocbq *); -+static struct lpfc_iocbq * -+lpfc_sli4_els_preprocess_rspiocbq(struct lpfc_hba *phba, -+ struct lpfc_iocbq *rspiocbq); - static void lpfc_sli4_send_seq_to_ulp(struct lpfc_vport *, - struct hbq_dmabuf *); - static void lpfc_sli4_handle_mds_loopback(struct lpfc_vport *vport, -@@ -89,6 +90,9 @@ static struct lpfc_cqe *lpfc_sli4_cq_get(struct lpfc_queue *q); - static void __lpfc_sli4_consume_cqe(struct lpfc_hba *phba, - struct lpfc_queue *cq, - struct lpfc_cqe *cqe); -+static uint16_t lpfc_wqe_bpl2sgl(struct lpfc_hba *phba, -+ struct lpfc_iocbq *pwqeq, -+ struct lpfc_sglq *sglq); - - union lpfc_wqe128 lpfc_iread_cmd_template; - union lpfc_wqe128 lpfc_iwrite_cmd_template; -@@ -1254,21 +1258,21 @@ __lpfc_sli_get_els_sglq(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq) - struct lpfc_sli_ring *pring = NULL; - int found = 0; - -- if (piocbq->iocb_flag & LPFC_IO_NVME_LS) -+ if (piocbq->cmd_flag & LPFC_IO_NVME_LS) - pring = phba->sli4_hba.nvmels_wq->pring; - else - pring = lpfc_phba_elsring(phba); - - lockdep_assert_held(&pring->ring_lock); - -- if (piocbq->iocb_flag & LPFC_IO_FCP) { -+ if (piocbq->cmd_flag & LPFC_IO_FCP) { - lpfc_cmd = (struct lpfc_io_buf *) piocbq->context1; - ndlp = lpfc_cmd->rdata->pnode; - } else if ((piocbq->iocb.ulpCommand == CMD_GEN_REQUEST64_CR) && -- !(piocbq->iocb_flag & LPFC_IO_LIBDFC)) { -+ !(piocbq->cmd_flag & LPFC_IO_LIBDFC)) { - ndlp = piocbq->context_un.ndlp; -- } else if (piocbq->iocb_flag & LPFC_IO_LIBDFC) { -- if (piocbq->iocb_flag & LPFC_IO_LOOPBACK) -+ } else if (piocbq->cmd_flag & LPFC_IO_LIBDFC) { -+ if (piocbq->cmd_flag & LPFC_IO_LOOPBACK) - ndlp = NULL; - else - ndlp = piocbq->context_un.ndlp; -@@ -1380,7 +1384,7 @@ static void - __lpfc_sli_release_iocbq_s4(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq) - { - struct lpfc_sglq *sglq; -- size_t start_clean = offsetof(struct lpfc_iocbq, iocb); -+ size_t start_clean = offsetof(struct lpfc_iocbq, wqe); - unsigned long iflag = 0; - struct lpfc_sli_ring *pring; - -@@ -1391,7 +1395,7 @@ __lpfc_sli_release_iocbq_s4(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq) - - - if (sglq) { -- if (iocbq->iocb_flag & LPFC_IO_NVMET) { -+ if (iocbq->cmd_flag & LPFC_IO_NVMET) { - spin_lock_irqsave(&phba->sli4_hba.sgl_list_lock, - iflag); - sglq->state = SGL_FREED; -@@ -1403,8 +1407,9 @@ __lpfc_sli_release_iocbq_s4(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq) - goto out; +@@ -1404,7 +1404,8 @@ __lpfc_sli_release_iocbq_s4(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq) } -- if ((iocbq->iocb_flag & LPFC_EXCHANGE_BUSY) && + if ((iocbq->iocb_flag & LPFC_EXCHANGE_BUSY) && - (sglq->state != SGL_XRI_ABORTED)) { -+ if ((iocbq->cmd_flag & LPFC_EXCHANGE_BUSY) && + (!(unlikely(pci_channel_offline(phba->pcidev)))) && + sglq->state != SGL_XRI_ABORTED) { spin_lock_irqsave(&phba->sli4_hba.sgl_list_lock, iflag); -@@ -1439,7 +1444,7 @@ out: - memset((char *)iocbq + start_clean, 0, sizeof(*iocbq) - start_clean); - iocbq->sli4_lxritag = NO_XRI; - iocbq->sli4_xritag = NO_XRI; -- iocbq->iocb_flag &= ~(LPFC_IO_NVME | LPFC_IO_NVMET | LPFC_IO_CMF | -+ iocbq->cmd_flag &= ~(LPFC_IO_NVME | LPFC_IO_NVMET | LPFC_IO_CMF | - LPFC_IO_NVME_LS); - list_add_tail(&iocbq->list, &phba->lpfc_iocb_list); - } -@@ -1529,17 +1534,17 @@ lpfc_sli_cancel_iocbs(struct lpfc_hba *phba, struct list_head *iocblist, - - while (!list_empty(iocblist)) { - list_remove_head(iocblist, piocb, struct lpfc_iocbq, list); -- if (piocb->wqe_cmpl) { -- if (piocb->iocb_flag & LPFC_IO_NVME) -+ if (piocb->cmd_cmpl) { -+ if (piocb->cmd_flag & LPFC_IO_NVME) - lpfc_nvme_cancel_iocb(phba, piocb, - ulpstatus, ulpWord4); - else - lpfc_sli_release_iocbq(phba, piocb); - -- } else if (piocb->iocb_cmpl) { -+ } else if (piocb->cmd_cmpl) { - piocb->iocb.ulpStatus = ulpstatus; - piocb->iocb.un.ulpWord[4] = ulpWord4; -- (piocb->iocb_cmpl) (phba, piocb, piocb); -+ (piocb->cmd_cmpl) (phba, piocb, piocb); - } else { - lpfc_sli_release_iocbq(phba, piocb); - } -@@ -1731,7 +1736,7 @@ lpfc_sli_ringtxcmpl_put(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, - BUG_ON(!piocb); - - list_add_tail(&piocb->list, &pring->txcmplq); -- piocb->iocb_flag |= LPFC_IO_ON_TXCMPLQ; -+ piocb->cmd_flag |= LPFC_IO_ON_TXCMPLQ; - pring->txcmplq_cnt++; - - if ((unlikely(pring->ringno == LPFC_ELS_RING)) && -@@ -1772,7 +1777,7 @@ lpfc_sli_ringtx_get(struct lpfc_hba *phba, struct lpfc_sli_ring *pring) - * lpfc_cmf_sync_cmpl - Process a CMF_SYNC_WQE cmpl - * @phba: Pointer to HBA context object. - * @cmdiocb: Pointer to driver command iocb object. -- * @cmf_cmpl: Pointer to completed WCQE. -+ * @rspiocb: Pointer to driver response iocb object. - * - * This routine will inform the driver of any BW adjustments we need - * to make. These changes will be picked up during the next CMF -@@ -1781,10 +1786,11 @@ lpfc_sli_ringtx_get(struct lpfc_hba *phba, struct lpfc_sli_ring *pring) - **/ - static void - lpfc_cmf_sync_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, -- struct lpfc_wcqe_complete *cmf_cmpl) -+ struct lpfc_iocbq *rspiocb) - { - union lpfc_wqe128 *wqe; - uint32_t status, info; -+ struct lpfc_wcqe_complete *wcqe = &rspiocb->wcqe_cmpl; - uint64_t bw, bwdif, slop; - uint64_t pcent, bwpcent; - int asig, afpin, sigcnt, fpincnt; -@@ -1792,22 +1798,22 @@ lpfc_cmf_sync_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, - char *s; - - /* First check for error */ -- status = bf_get(lpfc_wcqe_c_status, cmf_cmpl); -+ status = bf_get(lpfc_wcqe_c_status, wcqe); - if (status) { - lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT, - "6211 CMF_SYNC_WQE Error " - "req_tag x%x status x%x hwstatus x%x " - "tdatap x%x parm x%x\n", -- bf_get(lpfc_wcqe_c_request_tag, cmf_cmpl), -- bf_get(lpfc_wcqe_c_status, cmf_cmpl), -- bf_get(lpfc_wcqe_c_hw_status, cmf_cmpl), -- cmf_cmpl->total_data_placed, -- cmf_cmpl->parameter); -+ bf_get(lpfc_wcqe_c_request_tag, wcqe), -+ bf_get(lpfc_wcqe_c_status, wcqe), -+ bf_get(lpfc_wcqe_c_hw_status, wcqe), -+ wcqe->total_data_placed, -+ wcqe->parameter); - goto out; - } - - /* Gather congestion information on a successful cmpl */ -- info = cmf_cmpl->parameter; -+ info = wcqe->parameter; - phba->cmf_active_info = info; - - /* See if firmware info count is valid or has changed */ -@@ -1816,15 +1822,15 @@ lpfc_cmf_sync_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, - else - phba->cmf_info_per_interval = info; - -- tdp = bf_get(lpfc_wcqe_c_cmf_bw, cmf_cmpl); -- cg = bf_get(lpfc_wcqe_c_cmf_cg, cmf_cmpl); -+ tdp = bf_get(lpfc_wcqe_c_cmf_bw, wcqe); -+ cg = bf_get(lpfc_wcqe_c_cmf_cg, wcqe); - - /* Get BW requirement from firmware */ - bw = (uint64_t)tdp * LPFC_CMF_BLK_SIZE; - if (!bw) { - lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT, - "6212 CMF_SYNC_WQE x%x: NULL bw\n", -- bf_get(lpfc_wcqe_c_request_tag, cmf_cmpl)); -+ bf_get(lpfc_wcqe_c_request_tag, wcqe)); - goto out; - } - -@@ -1933,7 +1939,7 @@ lpfc_issue_cmf_sync_wqe(struct lpfc_hba *phba, u32 ms, u64 total) - sync_buf = __lpfc_sli_get_iocbq(phba); - if (!sync_buf) { - lpfc_printf_log(phba, KERN_ERR, LOG_CGN_MGMT, -- "6213 No available WQEs for CMF_SYNC_WQE\n"); -+ "6244 No available WQEs for CMF_SYNC_WQE\n"); - ret_val = ENOMEM; - goto out_unlock; - } -@@ -1998,19 +2004,20 @@ initpath: - bf_set(cmf_sync_cqid, &wqe->cmf_sync, LPFC_WQE_CQ_ID_DEFAULT); - - sync_buf->vport = phba->pport; -- sync_buf->wqe_cmpl = lpfc_cmf_sync_cmpl; -- sync_buf->iocb_cmpl = NULL; -+ sync_buf->cmd_cmpl = lpfc_cmf_sync_cmpl; - sync_buf->context1 = NULL; - sync_buf->context2 = NULL; - sync_buf->context3 = NULL; - sync_buf->sli4_xritag = NO_XRI; +@@ -2007,10 +2008,12 @@ initpath: -- sync_buf->iocb_flag |= LPFC_IO_CMF; -+ sync_buf->cmd_flag |= LPFC_IO_CMF; + sync_buf->iocb_flag |= LPFC_IO_CMF; ret_val = lpfc_sli4_issue_wqe(phba, &phba->sli4_hba.hdwq[0], sync_buf); - if (ret_val) + if (ret_val) { @@ -254213,582 +306587,7 @@ index 026a1196a54d5..f594a006d04c6 100644 out_unlock: spin_unlock_irqrestore(&phba->hbalock, iflags); return ret_val; -@@ -2172,7 +2179,7 @@ lpfc_sli_submit_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, - /* - * Set up an iotag - */ -- nextiocb->iocb.ulpIoTag = (nextiocb->iocb_cmpl) ? nextiocb->iotag : 0; -+ nextiocb->iocb.ulpIoTag = (nextiocb->cmd_cmpl) ? nextiocb->iotag : 0; - - - if (pring->ringno == LPFC_ELS_RING) { -@@ -2193,9 +2200,9 @@ lpfc_sli_submit_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, - /* - * If there is no completion routine to call, we can release the - * IOCB buffer back right now. For IOCBs, like QUE_RING_BUF, -- * that have no rsp ring completion, iocb_cmpl MUST be NULL. -+ * that have no rsp ring completion, cmd_cmpl MUST be NULL. - */ -- if (nextiocb->iocb_cmpl) -+ if (nextiocb->cmd_cmpl) - lpfc_sli_ringtxcmpl_put(phba, pring, nextiocb); - else - __lpfc_sli_release_iocbq(phba, nextiocb); -@@ -3549,36 +3556,28 @@ lpfc_sli_iocbq_lookup(struct lpfc_hba *phba, - struct lpfc_iocbq *prspiocb) - { - struct lpfc_iocbq *cmd_iocb = NULL; -- uint16_t iotag; -- spinlock_t *temp_lock = NULL; -- unsigned long iflag = 0; -+ u16 iotag; - - if (phba->sli_rev == LPFC_SLI_REV4) -- temp_lock = &pring->ring_lock; -+ iotag = get_wqe_reqtag(prspiocb); - else -- temp_lock = &phba->hbalock; -- -- spin_lock_irqsave(temp_lock, iflag); -- iotag = prspiocb->iocb.ulpIoTag; -+ iotag = prspiocb->iocb.ulpIoTag; - - if (iotag != 0 && iotag <= phba->sli.last_iotag) { - cmd_iocb = phba->sli.iocbq_lookup[iotag]; -- if (cmd_iocb->iocb_flag & LPFC_IO_ON_TXCMPLQ) { -+ if (cmd_iocb->cmd_flag & LPFC_IO_ON_TXCMPLQ) { - /* remove from txcmpl queue list */ - list_del_init(&cmd_iocb->list); -- cmd_iocb->iocb_flag &= ~LPFC_IO_ON_TXCMPLQ; -+ cmd_iocb->cmd_flag &= ~LPFC_IO_ON_TXCMPLQ; - pring->txcmplq_cnt--; -- spin_unlock_irqrestore(temp_lock, iflag); - return cmd_iocb; - } - } - -- spin_unlock_irqrestore(temp_lock, iflag); - lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, - "0317 iotag x%x is out of " -- "range: max iotag x%x wd0 x%x\n", -- iotag, phba->sli.last_iotag, -- *(((uint32_t *) &prspiocb->iocb) + 7)); -+ "range: max iotag x%x\n", -+ iotag, phba->sli.last_iotag); - return NULL; - } - -@@ -3599,33 +3598,23 @@ lpfc_sli_iocbq_lookup_by_tag(struct lpfc_hba *phba, - struct lpfc_sli_ring *pring, uint16_t iotag) - { - struct lpfc_iocbq *cmd_iocb = NULL; -- spinlock_t *temp_lock = NULL; -- unsigned long iflag = 0; - -- if (phba->sli_rev == LPFC_SLI_REV4) -- temp_lock = &pring->ring_lock; -- else -- temp_lock = &phba->hbalock; -- -- spin_lock_irqsave(temp_lock, iflag); - if (iotag != 0 && iotag <= phba->sli.last_iotag) { - cmd_iocb = phba->sli.iocbq_lookup[iotag]; -- if (cmd_iocb->iocb_flag & LPFC_IO_ON_TXCMPLQ) { -+ if (cmd_iocb->cmd_flag & LPFC_IO_ON_TXCMPLQ) { - /* remove from txcmpl queue list */ - list_del_init(&cmd_iocb->list); -- cmd_iocb->iocb_flag &= ~LPFC_IO_ON_TXCMPLQ; -+ cmd_iocb->cmd_flag &= ~LPFC_IO_ON_TXCMPLQ; - pring->txcmplq_cnt--; -- spin_unlock_irqrestore(temp_lock, iflag); - return cmd_iocb; - } - } - -- spin_unlock_irqrestore(temp_lock, iflag); - lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, - "0372 iotag x%x lookup error: max iotag (x%x) " -- "iocb_flag x%x\n", -+ "cmd_flag x%x\n", - iotag, phba->sli.last_iotag, -- cmd_iocb ? cmd_iocb->iocb_flag : 0xffff); -+ cmd_iocb ? cmd_iocb->cmd_flag : 0xffff); - return NULL; - } - -@@ -3653,18 +3642,37 @@ lpfc_sli_process_sol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, - struct lpfc_iocbq *cmdiocbp; - int rc = 1; - unsigned long iflag; -+ u32 ulp_command, ulp_status, ulp_word4, ulp_context, iotag; - -+ if (phba->sli_rev == LPFC_SLI_REV4) -+ spin_lock_irqsave(&pring->ring_lock, iflag); -+ else -+ spin_lock_irqsave(&phba->hbalock, iflag); - cmdiocbp = lpfc_sli_iocbq_lookup(phba, pring, saveq); -+ if (phba->sli_rev == LPFC_SLI_REV4) -+ spin_unlock_irqrestore(&pring->ring_lock, iflag); -+ else -+ spin_unlock_irqrestore(&phba->hbalock, iflag); -+ -+ ulp_command = get_job_cmnd(phba, saveq); -+ ulp_status = get_job_ulpstatus(phba, saveq); -+ ulp_word4 = get_job_word4(phba, saveq); -+ ulp_context = get_job_ulpcontext(phba, saveq); -+ if (phba->sli_rev == LPFC_SLI_REV4) -+ iotag = get_wqe_reqtag(saveq); -+ else -+ iotag = saveq->iocb.ulpIoTag; -+ - if (cmdiocbp) { -- if (cmdiocbp->iocb_cmpl) { -+ ulp_command = get_job_cmnd(phba, cmdiocbp); -+ if (cmdiocbp->cmd_cmpl) { - /* - * If an ELS command failed send an event to mgmt - * application. - */ -- if (saveq->iocb.ulpStatus && -+ if (ulp_status && - (pring->ringno == LPFC_ELS_RING) && -- (cmdiocbp->iocb.ulpCommand == -- CMD_ELS_REQUEST64_CR)) -+ (ulp_command == CMD_ELS_REQUEST64_CR)) - lpfc_send_els_failure_event(phba, - cmdiocbp, saveq); - -@@ -3674,11 +3682,11 @@ lpfc_sli_process_sol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, - */ - if (pring->ringno == LPFC_ELS_RING) { - if ((phba->sli_rev < LPFC_SLI_REV4) && -- (cmdiocbp->iocb_flag & -+ (cmdiocbp->cmd_flag & - LPFC_DRIVER_ABORTED)) { - spin_lock_irqsave(&phba->hbalock, - iflag); -- cmdiocbp->iocb_flag &= -+ cmdiocbp->cmd_flag &= - ~LPFC_DRIVER_ABORTED; - spin_unlock_irqrestore(&phba->hbalock, - iflag); -@@ -3693,12 +3701,12 @@ lpfc_sli_process_sol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, - */ - spin_lock_irqsave(&phba->hbalock, - iflag); -- saveq->iocb_flag |= LPFC_DELAY_MEM_FREE; -+ saveq->cmd_flag |= LPFC_DELAY_MEM_FREE; - spin_unlock_irqrestore(&phba->hbalock, - iflag); - } - if (phba->sli_rev == LPFC_SLI_REV4) { -- if (saveq->iocb_flag & -+ if (saveq->cmd_flag & - LPFC_EXCHANGE_BUSY) { - /* Set cmdiocb flag for the - * exchange busy so sgl (xri) -@@ -3708,12 +3716,12 @@ lpfc_sli_process_sol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, - */ - spin_lock_irqsave( - &phba->hbalock, iflag); -- cmdiocbp->iocb_flag |= -+ cmdiocbp->cmd_flag |= - LPFC_EXCHANGE_BUSY; - spin_unlock_irqrestore( - &phba->hbalock, iflag); - } -- if (cmdiocbp->iocb_flag & -+ if (cmdiocbp->cmd_flag & - LPFC_DRIVER_ABORTED) { - /* - * Clear LPFC_DRIVER_ABORTED -@@ -3722,34 +3730,34 @@ lpfc_sli_process_sol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, - */ - spin_lock_irqsave( - &phba->hbalock, iflag); -- cmdiocbp->iocb_flag &= -+ cmdiocbp->cmd_flag &= - ~LPFC_DRIVER_ABORTED; - spin_unlock_irqrestore( - &phba->hbalock, iflag); -- cmdiocbp->iocb.ulpStatus = -- IOSTAT_LOCAL_REJECT; -- cmdiocbp->iocb.un.ulpWord[4] = -- IOERR_ABORT_REQUESTED; -+ set_job_ulpstatus(cmdiocbp, -+ IOSTAT_LOCAL_REJECT); -+ set_job_ulpword4(cmdiocbp, -+ IOERR_ABORT_REQUESTED); - /* -- * For SLI4, irsiocb contains -+ * For SLI4, irspiocb contains - * NO_XRI in sli_xritag, it - * shall not affect releasing - * sgl (xri) process. - */ -- saveq->iocb.ulpStatus = -- IOSTAT_LOCAL_REJECT; -- saveq->iocb.un.ulpWord[4] = -- IOERR_SLI_ABORTED; -+ set_job_ulpstatus(saveq, -+ IOSTAT_LOCAL_REJECT); -+ set_job_ulpword4(saveq, -+ IOERR_SLI_ABORTED); - spin_lock_irqsave( - &phba->hbalock, iflag); -- saveq->iocb_flag |= -+ saveq->cmd_flag |= - LPFC_DELAY_MEM_FREE; - spin_unlock_irqrestore( - &phba->hbalock, iflag); - } - } - } -- (cmdiocbp->iocb_cmpl) (phba, cmdiocbp, saveq); -+ cmdiocbp->cmd_cmpl(phba, cmdiocbp, saveq); - } else - lpfc_sli_release_iocbq(phba, cmdiocbp); - } else { -@@ -3767,12 +3775,8 @@ lpfc_sli_process_sol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, - "0322 Ring %d handler: " - "unexpected completion IoTag x%x " - "Data: x%x x%x x%x x%x\n", -- pring->ringno, -- saveq->iocb.ulpIoTag, -- saveq->iocb.ulpStatus, -- saveq->iocb.un.ulpWord[4], -- saveq->iocb.ulpCommand, -- saveq->iocb.ulpContext); -+ pring->ringno, iotag, ulp_status, -+ ulp_word4, ulp_command, ulp_context); - } - } - -@@ -3985,18 +3989,15 @@ lpfc_sli_handle_fast_ring_event(struct lpfc_hba *phba, - break; - } - -- spin_unlock_irqrestore(&phba->hbalock, iflag); - cmdiocbq = lpfc_sli_iocbq_lookup(phba, pring, - &rspiocbq); -- spin_lock_irqsave(&phba->hbalock, iflag); - if (unlikely(!cmdiocbq)) - break; -- if (cmdiocbq->iocb_flag & LPFC_DRIVER_ABORTED) -- cmdiocbq->iocb_flag &= ~LPFC_DRIVER_ABORTED; -- if (cmdiocbq->iocb_cmpl) { -+ if (cmdiocbq->cmd_flag & LPFC_DRIVER_ABORTED) -+ cmdiocbq->cmd_flag &= ~LPFC_DRIVER_ABORTED; -+ if (cmdiocbq->cmd_cmpl) { - spin_unlock_irqrestore(&phba->hbalock, iflag); -- (cmdiocbq->iocb_cmpl)(phba, cmdiocbq, -- &rspiocbq); -+ cmdiocbq->cmd_cmpl(phba, cmdiocbq, &rspiocbq); - spin_lock_irqsave(&phba->hbalock, iflag); - } - break; -@@ -4087,155 +4088,159 @@ lpfc_sli_sp_handle_rspiocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, - struct lpfc_iocbq *rspiocbp) - { - struct lpfc_iocbq *saveq; -- struct lpfc_iocbq *cmdiocbp; -+ struct lpfc_iocbq *cmdiocb; - struct lpfc_iocbq *next_iocb; -- IOCB_t *irsp = NULL; -+ IOCB_t *irsp; - uint32_t free_saveq; -- uint8_t iocb_cmd_type; -+ u8 cmd_type; - lpfc_iocb_type type; - unsigned long iflag; -+ u32 ulp_status = get_job_ulpstatus(phba, rspiocbp); -+ u32 ulp_word4 = get_job_word4(phba, rspiocbp); -+ u32 ulp_command = get_job_cmnd(phba, rspiocbp); - int rc; - - spin_lock_irqsave(&phba->hbalock, iflag); - /* First add the response iocb to the countinueq list */ -- list_add_tail(&rspiocbp->list, &(pring->iocb_continueq)); -+ list_add_tail(&rspiocbp->list, &pring->iocb_continueq); - pring->iocb_continueq_cnt++; - -- /* Now, determine whether the list is completed for processing */ -- irsp = &rspiocbp->iocb; -- if (irsp->ulpLe) { -- /* -- * By default, the driver expects to free all resources -- * associated with this iocb completion. -- */ -- free_saveq = 1; -- saveq = list_get_first(&pring->iocb_continueq, -- struct lpfc_iocbq, list); -- irsp = &(saveq->iocb); -- list_del_init(&pring->iocb_continueq); -- pring->iocb_continueq_cnt = 0; -+ /* -+ * By default, the driver expects to free all resources -+ * associated with this iocb completion. -+ */ -+ free_saveq = 1; -+ saveq = list_get_first(&pring->iocb_continueq, -+ struct lpfc_iocbq, list); -+ list_del_init(&pring->iocb_continueq); -+ pring->iocb_continueq_cnt = 0; - -- pring->stats.iocb_rsp++; -+ pring->stats.iocb_rsp++; - -- /* -- * If resource errors reported from HBA, reduce -- * queuedepths of the SCSI device. -- */ -- if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) && -- ((irsp->un.ulpWord[4] & IOERR_PARAM_MASK) == -- IOERR_NO_RESOURCES)) { -- spin_unlock_irqrestore(&phba->hbalock, iflag); -- phba->lpfc_rampdown_queue_depth(phba); -- spin_lock_irqsave(&phba->hbalock, iflag); -- } -+ /* -+ * If resource errors reported from HBA, reduce -+ * queuedepths of the SCSI device. -+ */ -+ if (ulp_status == IOSTAT_LOCAL_REJECT && -+ ((ulp_word4 & IOERR_PARAM_MASK) == -+ IOERR_NO_RESOURCES)) { -+ spin_unlock_irqrestore(&phba->hbalock, iflag); -+ phba->lpfc_rampdown_queue_depth(phba); -+ spin_lock_irqsave(&phba->hbalock, iflag); -+ } - -- if (irsp->ulpStatus) { -- /* Rsp ring <ringno> error: IOCB */ -+ if (ulp_status) { -+ /* Rsp ring <ringno> error: IOCB */ -+ if (phba->sli_rev < LPFC_SLI_REV4) { -+ irsp = &rspiocbp->iocb; -+ lpfc_printf_log(phba, KERN_WARNING, LOG_SLI, -+ "0328 Rsp Ring %d error: ulp_status x%x " -+ "IOCB Data: " -+ "x%08x x%08x x%08x x%08x " -+ "x%08x x%08x x%08x x%08x " -+ "x%08x x%08x x%08x x%08x " -+ "x%08x x%08x x%08x x%08x\n", -+ pring->ringno, ulp_status, -+ get_job_ulpword(rspiocbp, 0), -+ get_job_ulpword(rspiocbp, 1), -+ get_job_ulpword(rspiocbp, 2), -+ get_job_ulpword(rspiocbp, 3), -+ get_job_ulpword(rspiocbp, 4), -+ get_job_ulpword(rspiocbp, 5), -+ *(((uint32_t *)irsp) + 6), -+ *(((uint32_t *)irsp) + 7), -+ *(((uint32_t *)irsp) + 8), -+ *(((uint32_t *)irsp) + 9), -+ *(((uint32_t *)irsp) + 10), -+ *(((uint32_t *)irsp) + 11), -+ *(((uint32_t *)irsp) + 12), -+ *(((uint32_t *)irsp) + 13), -+ *(((uint32_t *)irsp) + 14), -+ *(((uint32_t *)irsp) + 15)); -+ } else { - lpfc_printf_log(phba, KERN_WARNING, LOG_SLI, -- "0328 Rsp Ring %d error: " -+ "0321 Rsp Ring %d error: " - "IOCB Data: " -- "x%x x%x x%x x%x " -- "x%x x%x x%x x%x " -- "x%x x%x x%x x%x " - "x%x x%x x%x x%x\n", - pring->ringno, -- irsp->un.ulpWord[0], -- irsp->un.ulpWord[1], -- irsp->un.ulpWord[2], -- irsp->un.ulpWord[3], -- irsp->un.ulpWord[4], -- irsp->un.ulpWord[5], -- *(((uint32_t *) irsp) + 6), -- *(((uint32_t *) irsp) + 7), -- *(((uint32_t *) irsp) + 8), -- *(((uint32_t *) irsp) + 9), -- *(((uint32_t *) irsp) + 10), -- *(((uint32_t *) irsp) + 11), -- *(((uint32_t *) irsp) + 12), -- *(((uint32_t *) irsp) + 13), -- *(((uint32_t *) irsp) + 14), -- *(((uint32_t *) irsp) + 15)); -+ rspiocbp->wcqe_cmpl.word0, -+ rspiocbp->wcqe_cmpl.total_data_placed, -+ rspiocbp->wcqe_cmpl.parameter, -+ rspiocbp->wcqe_cmpl.word3); - } -+ } - -- /* -- * Fetch the IOCB command type and call the correct completion -- * routine. Solicited and Unsolicited IOCBs on the ELS ring -- * get freed back to the lpfc_iocb_list by the discovery -- * kernel thread. -- */ -- iocb_cmd_type = irsp->ulpCommand & CMD_IOCB_MASK; -- type = lpfc_sli_iocb_cmd_type(iocb_cmd_type); -- switch (type) { -- case LPFC_SOL_IOCB: -- spin_unlock_irqrestore(&phba->hbalock, iflag); -- rc = lpfc_sli_process_sol_iocb(phba, pring, saveq); -- spin_lock_irqsave(&phba->hbalock, iflag); -- break; -- -- case LPFC_UNSOL_IOCB: -- spin_unlock_irqrestore(&phba->hbalock, iflag); -- rc = lpfc_sli_process_unsol_iocb(phba, pring, saveq); -- spin_lock_irqsave(&phba->hbalock, iflag); -- if (!rc) -- free_saveq = 0; -- break; - -- case LPFC_ABORT_IOCB: -- cmdiocbp = NULL; -- if (irsp->ulpCommand != CMD_XRI_ABORTED_CX) { -+ /* -+ * Fetch the iocb command type and call the correct completion -+ * routine. Solicited and Unsolicited IOCBs on the ELS ring -+ * get freed back to the lpfc_iocb_list by the discovery -+ * kernel thread. -+ */ -+ cmd_type = ulp_command & CMD_IOCB_MASK; -+ type = lpfc_sli_iocb_cmd_type(cmd_type); -+ switch (type) { -+ case LPFC_SOL_IOCB: -+ spin_unlock_irqrestore(&phba->hbalock, iflag); -+ rc = lpfc_sli_process_sol_iocb(phba, pring, saveq); -+ spin_lock_irqsave(&phba->hbalock, iflag); -+ break; -+ case LPFC_UNSOL_IOCB: -+ spin_unlock_irqrestore(&phba->hbalock, iflag); -+ rc = lpfc_sli_process_unsol_iocb(phba, pring, saveq); -+ spin_lock_irqsave(&phba->hbalock, iflag); -+ if (!rc) -+ free_saveq = 0; -+ break; -+ case LPFC_ABORT_IOCB: -+ cmdiocb = NULL; -+ if (ulp_command != CMD_XRI_ABORTED_CX) -+ cmdiocb = lpfc_sli_iocbq_lookup(phba, pring, -+ saveq); -+ if (cmdiocb) { -+ /* Call the specified completion routine */ -+ if (cmdiocb->cmd_cmpl) { - spin_unlock_irqrestore(&phba->hbalock, iflag); -- cmdiocbp = lpfc_sli_iocbq_lookup(phba, pring, -- saveq); -+ cmdiocb->cmd_cmpl(phba, cmdiocb, saveq); - spin_lock_irqsave(&phba->hbalock, iflag); -- } -- if (cmdiocbp) { -- /* Call the specified completion routine */ -- if (cmdiocbp->iocb_cmpl) { -- spin_unlock_irqrestore(&phba->hbalock, -- iflag); -- (cmdiocbp->iocb_cmpl)(phba, cmdiocbp, -- saveq); -- spin_lock_irqsave(&phba->hbalock, -- iflag); -- } else -- __lpfc_sli_release_iocbq(phba, -- cmdiocbp); -- } -- break; -- -- case LPFC_UNKNOWN_IOCB: -- if (irsp->ulpCommand == CMD_ADAPTER_MSG) { -- char adaptermsg[LPFC_MAX_ADPTMSG]; -- memset(adaptermsg, 0, LPFC_MAX_ADPTMSG); -- memcpy(&adaptermsg[0], (uint8_t *)irsp, -- MAX_MSG_DATA); -- dev_warn(&((phba->pcidev)->dev), -- "lpfc%d: %s\n", -- phba->brd_no, adaptermsg); - } else { -- /* Unknown IOCB command */ -- lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, -- "0335 Unknown IOCB " -- "command Data: x%x " -- "x%x x%x x%x\n", -- irsp->ulpCommand, -- irsp->ulpStatus, -- irsp->ulpIoTag, -- irsp->ulpContext); -+ __lpfc_sli_release_iocbq(phba, cmdiocb); - } -- break; - } -+ break; -+ case LPFC_UNKNOWN_IOCB: -+ if (ulp_command == CMD_ADAPTER_MSG) { -+ char adaptermsg[LPFC_MAX_ADPTMSG]; -+ -+ memset(adaptermsg, 0, LPFC_MAX_ADPTMSG); -+ memcpy(&adaptermsg[0], (uint8_t *)&rspiocbp->wqe, -+ MAX_MSG_DATA); -+ dev_warn(&((phba->pcidev)->dev), -+ "lpfc%d: %s\n", -+ phba->brd_no, adaptermsg); -+ } else { -+ /* Unknown command */ -+ lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, -+ "0335 Unknown IOCB " -+ "command Data: x%x " -+ "x%x x%x x%x\n", -+ ulp_command, -+ ulp_status, -+ get_wqe_reqtag(rspiocbp), -+ get_job_ulpcontext(phba, rspiocbp)); -+ } -+ break; -+ } - -- if (free_saveq) { -- list_for_each_entry_safe(rspiocbp, next_iocb, -- &saveq->list, list) { -- list_del_init(&rspiocbp->list); -- __lpfc_sli_release_iocbq(phba, rspiocbp); -- } -- __lpfc_sli_release_iocbq(phba, saveq); -+ if (free_saveq) { -+ list_for_each_entry_safe(rspiocbp, next_iocb, -+ &saveq->list, list) { -+ list_del_init(&rspiocbp->list); -+ __lpfc_sli_release_iocbq(phba, rspiocbp); - } -- rspiocbp = NULL; -+ __lpfc_sli_release_iocbq(phba, saveq); - } -+ rspiocbp = NULL; - spin_unlock_irqrestore(&phba->hbalock, iflag); - return rspiocbp; - } -@@ -4428,8 +4433,8 @@ lpfc_sli_handle_slow_ring_event_s4(struct lpfc_hba *phba, - irspiocbq = container_of(cq_event, struct lpfc_iocbq, - cq_event); - /* Translate ELS WCQE to response IOCBQ */ -- irspiocbq = lpfc_sli4_els_wcqe_to_rspiocbq(phba, -- irspiocbq); -+ irspiocbq = lpfc_sli4_els_preprocess_rspiocbq(phba, -+ irspiocbq); - if (irspiocbq) - lpfc_sli_sp_handle_rspiocb(phba, pring, - irspiocbq); -@@ -4572,7 +4577,7 @@ lpfc_sli_flush_io_rings(struct lpfc_hba *phba) - list_splice_init(&pring->txq, &txq); - list_for_each_entry_safe(piocb, next_iocb, - &pring->txcmplq, list) -- piocb->iocb_flag &= ~LPFC_IO_ON_TXCMPLQ; -+ piocb->cmd_flag &= ~LPFC_IO_ON_TXCMPLQ; - /* Retrieve everything on the txcmplq */ - list_splice_init(&pring->txcmplq, &txcmplq); - pring->txq_cnt = 0; -@@ -4583,10 +4588,12 @@ lpfc_sli_flush_io_rings(struct lpfc_hba *phba) +@@ -4583,10 +4586,12 @@ lpfc_sli_flush_io_rings(struct lpfc_hba *phba) lpfc_sli_cancel_iocbs(phba, &txq, IOSTAT_LOCAL_REJECT, IOERR_SLI_DOWN); @@ -254802,16 +306601,7 @@ index 026a1196a54d5..f594a006d04c6 100644 } } else { pring = &psli->sli3_ring[LPFC_FCP_RING]; -@@ -4596,7 +4603,7 @@ lpfc_sli_flush_io_rings(struct lpfc_hba *phba) - list_splice_init(&pring->txq, &txq); - list_for_each_entry_safe(piocb, next_iocb, - &pring->txcmplq, list) -- piocb->iocb_flag &= ~LPFC_IO_ON_TXCMPLQ; -+ piocb->cmd_flag &= ~LPFC_IO_ON_TXCMPLQ; - /* Retrieve everything on the txcmplq */ - list_splice_init(&pring->txcmplq, &txcmplq); - pring->txq_cnt = 0; -@@ -5043,12 +5050,6 @@ lpfc_sli4_brdreset(struct lpfc_hba *phba) +@@ -5043,12 +5048,6 @@ lpfc_sli4_brdreset(struct lpfc_hba *phba) phba->fcf.fcf_flag = 0; spin_unlock_irq(&phba->hbalock); @@ -254824,7 +306614,217 @@ index 026a1196a54d5..f594a006d04c6 100644 /* Now physically reset the device */ lpfc_printf_log(phba, KERN_INFO, LOG_INIT, "0389 Performing PCI function reset!\n"); -@@ -8153,6 +8154,7 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba) +@@ -7879,6 +7878,172 @@ static void lpfc_sli4_dip(struct lpfc_hba *phba) + } + } + ++/** ++ * lpfc_rx_monitor_create_ring - Initialize ring buffer for rx_monitor ++ * @rx_monitor: Pointer to lpfc_rx_info_monitor object ++ * @entries: Number of rx_info_entry objects to allocate in ring ++ * ++ * Return: ++ * 0 - Success ++ * ENOMEM - Failure to kmalloc ++ **/ ++int lpfc_rx_monitor_create_ring(struct lpfc_rx_info_monitor *rx_monitor, ++ u32 entries) ++{ ++ rx_monitor->ring = kmalloc_array(entries, sizeof(struct rx_info_entry), ++ GFP_KERNEL); ++ if (!rx_monitor->ring) ++ return -ENOMEM; ++ ++ rx_monitor->head_idx = 0; ++ rx_monitor->tail_idx = 0; ++ spin_lock_init(&rx_monitor->lock); ++ rx_monitor->entries = entries; ++ ++ return 0; ++} ++ ++/** ++ * lpfc_rx_monitor_destroy_ring - Free ring buffer for rx_monitor ++ * @rx_monitor: Pointer to lpfc_rx_info_monitor object ++ **/ ++void lpfc_rx_monitor_destroy_ring(struct lpfc_rx_info_monitor *rx_monitor) ++{ ++ spin_lock(&rx_monitor->lock); ++ kfree(rx_monitor->ring); ++ rx_monitor->ring = NULL; ++ rx_monitor->entries = 0; ++ rx_monitor->head_idx = 0; ++ rx_monitor->tail_idx = 0; ++ spin_unlock(&rx_monitor->lock); ++} ++ ++/** ++ * lpfc_rx_monitor_record - Insert an entry into rx_monitor's ring ++ * @rx_monitor: Pointer to lpfc_rx_info_monitor object ++ * @entry: Pointer to rx_info_entry ++ * ++ * Used to insert an rx_info_entry into rx_monitor's ring. Note that this is a ++ * deep copy of rx_info_entry not a shallow copy of the rx_info_entry ptr. ++ * ++ * This is called from lpfc_cmf_timer, which is in timer/softirq context. ++ * ++ * In cases of old data overflow, we do a best effort of FIFO order. ++ **/ ++void lpfc_rx_monitor_record(struct lpfc_rx_info_monitor *rx_monitor, ++ struct rx_info_entry *entry) ++{ ++ struct rx_info_entry *ring = rx_monitor->ring; ++ u32 *head_idx = &rx_monitor->head_idx; ++ u32 *tail_idx = &rx_monitor->tail_idx; ++ spinlock_t *ring_lock = &rx_monitor->lock; ++ u32 ring_size = rx_monitor->entries; ++ ++ spin_lock(ring_lock); ++ memcpy(&ring[*tail_idx], entry, sizeof(*entry)); ++ *tail_idx = (*tail_idx + 1) % ring_size; ++ ++ /* Best effort of FIFO saved data */ ++ if (*tail_idx == *head_idx) ++ *head_idx = (*head_idx + 1) % ring_size; ++ ++ spin_unlock(ring_lock); ++} ++ ++/** ++ * lpfc_rx_monitor_report - Read out rx_monitor's ring ++ * @phba: Pointer to lpfc_hba object ++ * @rx_monitor: Pointer to lpfc_rx_info_monitor object ++ * @buf: Pointer to char buffer that will contain rx monitor info data ++ * @buf_len: Length buf including null char ++ * @max_read_entries: Maximum number of entries to read out of ring ++ * ++ * Used to dump/read what's in rx_monitor's ring buffer. ++ * ++ * If buf is NULL || buf_len == 0, then it is implied that we want to log the ++ * information to kmsg instead of filling out buf. ++ * ++ * Return: ++ * Number of entries read out of the ring ++ **/ ++u32 lpfc_rx_monitor_report(struct lpfc_hba *phba, ++ struct lpfc_rx_info_monitor *rx_monitor, char *buf, ++ u32 buf_len, u32 max_read_entries) ++{ ++ struct rx_info_entry *ring = rx_monitor->ring; ++ struct rx_info_entry *entry; ++ u32 *head_idx = &rx_monitor->head_idx; ++ u32 *tail_idx = &rx_monitor->tail_idx; ++ spinlock_t *ring_lock = &rx_monitor->lock; ++ u32 ring_size = rx_monitor->entries; ++ u32 cnt = 0; ++ char tmp[DBG_LOG_STR_SZ] = {0}; ++ bool log_to_kmsg = (!buf || !buf_len) ? true : false; ++ ++ if (!log_to_kmsg) { ++ /* clear the buffer to be sure */ ++ memset(buf, 0, buf_len); ++ ++ scnprintf(buf, buf_len, "\t%-16s%-16s%-16s%-16s%-8s%-8s%-8s" ++ "%-8s%-8s%-8s%-16s\n", ++ "MaxBPI", "Tot_Data_CMF", ++ "Tot_Data_Cmd", "Tot_Data_Cmpl", ++ "Lat(us)", "Avg_IO", "Max_IO", "Bsy", ++ "IO_cnt", "Info", "BWutil(ms)"); ++ } ++ ++ /* Needs to be _irq because record is called from timer interrupt ++ * context ++ */ ++ spin_lock_irq(ring_lock); ++ while (*head_idx != *tail_idx) { ++ entry = &ring[*head_idx]; ++ ++ /* Read out this entry's data. */ ++ if (!log_to_kmsg) { ++ /* If !log_to_kmsg, then store to buf. */ ++ scnprintf(tmp, sizeof(tmp), ++ "%03d:\t%-16llu%-16llu%-16llu%-16llu%-8llu" ++ "%-8llu%-8llu%-8u%-8u%-8u%u(%u)\n", ++ *head_idx, entry->max_bytes_per_interval, ++ entry->cmf_bytes, entry->total_bytes, ++ entry->rcv_bytes, entry->avg_io_latency, ++ entry->avg_io_size, entry->max_read_cnt, ++ entry->cmf_busy, entry->io_cnt, ++ entry->cmf_info, entry->timer_utilization, ++ entry->timer_interval); ++ ++ /* Check for buffer overflow */ ++ if ((strlen(buf) + strlen(tmp)) >= buf_len) ++ break; ++ ++ /* Append entry's data to buffer */ ++ strlcat(buf, tmp, buf_len); ++ } else { ++ lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT, ++ "4410 %02u: MBPI %llu Xmit %llu " ++ "Cmpl %llu Lat %llu ASz %llu Info %02u " ++ "BWUtil %u Int %u slot %u\n", ++ cnt, entry->max_bytes_per_interval, ++ entry->total_bytes, entry->rcv_bytes, ++ entry->avg_io_latency, ++ entry->avg_io_size, entry->cmf_info, ++ entry->timer_utilization, ++ entry->timer_interval, *head_idx); ++ } ++ ++ *head_idx = (*head_idx + 1) % ring_size; ++ ++ /* Don't feed more than max_read_entries */ ++ cnt++; ++ if (cnt >= max_read_entries) ++ break; ++ } ++ spin_unlock_irq(ring_lock); ++ ++ return cnt; ++} ++ + /** + * lpfc_cmf_setup - Initialize idle_stat tracking + * @phba: Pointer to HBA context object. +@@ -8070,19 +8235,29 @@ no_cmf: + phba->cmf_interval_rate = LPFC_CMF_INTERVAL; + + /* Allocate RX Monitor Buffer */ +- if (!phba->rxtable) { +- phba->rxtable = kmalloc_array(LPFC_MAX_RXMONITOR_ENTRY, +- sizeof(struct rxtable_entry), +- GFP_KERNEL); +- if (!phba->rxtable) { ++ if (!phba->rx_monitor) { ++ phba->rx_monitor = kzalloc(sizeof(*phba->rx_monitor), ++ GFP_KERNEL); ++ ++ if (!phba->rx_monitor) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "2644 Failed to alloc memory " + "for RX Monitor Buffer\n"); + return -ENOMEM; + } ++ ++ /* Instruct the rx_monitor object to instantiate its ring */ ++ if (lpfc_rx_monitor_create_ring(phba->rx_monitor, ++ LPFC_MAX_RXMONITOR_ENTRY)) { ++ kfree(phba->rx_monitor); ++ phba->rx_monitor = NULL; ++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT, ++ "2645 Failed to alloc memory " ++ "for RX Monitor's Ring\n"); ++ return -ENOMEM; ++ } + } +- atomic_set(&phba->rxtable_idx_head, 0); +- atomic_set(&phba->rxtable_idx_tail, 0); ++ + return 0; + } + +@@ -8153,6 +8328,7 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba) struct lpfc_vport *vport = phba->pport; struct lpfc_dmabuf *mp; struct lpfc_rqb *rqbp; @@ -254832,7 +306832,7 @@ index 026a1196a54d5..f594a006d04c6 100644 /* Perform a PCI function reset to start from clean */ rc = lpfc_pci_function_reset(phba); -@@ -8166,7 +8168,17 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba) +@@ -8166,7 +8342,17 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba) else { spin_lock_irq(&phba->hbalock); phba->sli.sli_flag |= LPFC_SLI_ACTIVE; @@ -254850,7 +306850,7 @@ index 026a1196a54d5..f594a006d04c6 100644 } lpfc_sli4_dip(phba); -@@ -9750,7 +9762,7 @@ lpfc_sli_issue_mbox_s4(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq, +@@ -9750,7 +9936,7 @@ lpfc_sli_issue_mbox_s4(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq, "(%d):2541 Mailbox command x%x " "(x%x/x%x) failure: " "mqe_sta: x%x mcqe_sta: x%x/x%x " @@ -254859,7 +306859,7 @@ index 026a1196a54d5..f594a006d04c6 100644 mboxq->vport ? mboxq->vport->vpi : 0, mboxq->u.mb.mbxCommand, lpfc_sli_config_mbox_subsys_get(phba, -@@ -9784,7 +9796,7 @@ lpfc_sli_issue_mbox_s4(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq, +@@ -9784,7 +9970,7 @@ lpfc_sli_issue_mbox_s4(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq, "(%d):2597 Sync Mailbox command " "x%x (x%x/x%x) failure: " "mqe_sta: x%x mcqe_sta: x%x/x%x " @@ -254868,289 +306868,7 @@ index 026a1196a54d5..f594a006d04c6 100644 mboxq->vport ? mboxq->vport->vpi : 0, mboxq->u.mb.mbxCommand, lpfc_sli_config_mbox_subsys_get(phba, -@@ -10107,7 +10119,7 @@ __lpfc_sli_issue_iocb_s3(struct lpfc_hba *phba, uint32_t ring_number, - - lockdep_assert_held(&phba->hbalock); - -- if (piocb->iocb_cmpl && (!piocb->vport) && -+ if (piocb->cmd_cmpl && (!piocb->vport) && - (piocb->iocb.ulpCommand != CMD_ABORT_XRI_CN) && - (piocb->iocb.ulpCommand != CMD_CLOSE_XRI_CN)) { - lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, -@@ -10159,10 +10171,10 @@ __lpfc_sli_issue_iocb_s3(struct lpfc_hba *phba, uint32_t ring_number, - case CMD_QUE_RING_BUF64_CN: - /* - * For IOCBs, like QUE_RING_BUF, that have no rsp ring -- * completion, iocb_cmpl MUST be 0. -+ * completion, cmd_cmpl MUST be 0. - */ -- if (piocb->iocb_cmpl) -- piocb->iocb_cmpl = NULL; -+ if (piocb->cmd_cmpl) -+ piocb->cmd_cmpl = NULL; - fallthrough; - case CMD_CREATE_XRI_CR: - case CMD_CLOSE_XRI_CN: -@@ -10353,9 +10365,9 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq, - - fip = phba->hba_flag & HBA_FIP_SUPPORT; - /* The fcp commands will set command type */ -- if (iocbq->iocb_flag & LPFC_IO_FCP) -+ if (iocbq->cmd_flag & LPFC_IO_FCP) - command_type = FCP_COMMAND; -- else if (fip && (iocbq->iocb_flag & LPFC_FIP_ELS_ID_MASK)) -+ else if (fip && (iocbq->cmd_flag & LPFC_FIP_ELS_ID_MASK)) - command_type = ELS_COMMAND_FIP; - else - command_type = ELS_COMMAND_NON_FIP; -@@ -10400,7 +10412,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq, - - switch (iocbq->iocb.ulpCommand) { - case CMD_ELS_REQUEST64_CR: -- if (iocbq->iocb_flag & LPFC_IO_LIBDFC) -+ if (iocbq->cmd_flag & LPFC_IO_LIBDFC) - ndlp = iocbq->context_un.ndlp; - else - ndlp = (struct lpfc_nodelist *)iocbq->context1; -@@ -10427,7 +10439,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq, - bf_set(wqe_pu, &wqe->els_req.wqe_com, 0); - /* CCP CCPE PV PRI in word10 were set in the memcpy */ - if (command_type == ELS_COMMAND_FIP) -- els_id = ((iocbq->iocb_flag & LPFC_FIP_ELS_ID_MASK) -+ els_id = ((iocbq->cmd_flag & LPFC_FIP_ELS_ID_MASK) - >> LPFC_FIP_ELS_ID_SHIFT); - pcmd = (uint32_t *) (((struct lpfc_dmabuf *) - iocbq->context2)->virt); -@@ -10529,7 +10541,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq, - LPFC_WQE_LENLOC_WORD4); - bf_set(wqe_pu, &wqe->fcp_iwrite.wqe_com, iocbq->iocb.ulpPU); - bf_set(wqe_dbde, &wqe->fcp_iwrite.wqe_com, 1); -- if (iocbq->iocb_flag & LPFC_IO_OAS) { -+ if (iocbq->cmd_flag & LPFC_IO_OAS) { - bf_set(wqe_oas, &wqe->fcp_iwrite.wqe_com, 1); - bf_set(wqe_ccpe, &wqe->fcp_iwrite.wqe_com, 1); - if (iocbq->priority) { -@@ -10593,7 +10605,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq, - LPFC_WQE_LENLOC_WORD4); - bf_set(wqe_pu, &wqe->fcp_iread.wqe_com, iocbq->iocb.ulpPU); - bf_set(wqe_dbde, &wqe->fcp_iread.wqe_com, 1); -- if (iocbq->iocb_flag & LPFC_IO_OAS) { -+ if (iocbq->cmd_flag & LPFC_IO_OAS) { - bf_set(wqe_oas, &wqe->fcp_iread.wqe_com, 1); - bf_set(wqe_ccpe, &wqe->fcp_iread.wqe_com, 1); - if (iocbq->priority) { -@@ -10656,7 +10668,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq, - LPFC_WQE_LENLOC_NONE); - bf_set(wqe_erp, &wqe->fcp_icmd.wqe_com, - iocbq->iocb.ulpFCP2Rcvy); -- if (iocbq->iocb_flag & LPFC_IO_OAS) { -+ if (iocbq->cmd_flag & LPFC_IO_OAS) { - bf_set(wqe_oas, &wqe->fcp_icmd.wqe_com, 1); - bf_set(wqe_ccpe, &wqe->fcp_icmd.wqe_com, 1); - if (iocbq->priority) { -@@ -10790,7 +10802,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq, - abrt_iotag = iocbq->iocb.un.acxri.abortContextTag; - if (abrt_iotag != 0 && abrt_iotag <= phba->sli.last_iotag) { - abrtiocbq = phba->sli.iocbq_lookup[abrt_iotag]; -- fip = abrtiocbq->iocb_flag & LPFC_FIP_ELS_ID_MASK; -+ fip = abrtiocbq->cmd_flag & LPFC_FIP_ELS_ID_MASK; - } else - fip = 0; - -@@ -10899,13 +10911,13 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq, - return IOCB_ERROR; - } - -- if (iocbq->iocb_flag & LPFC_IO_DIF_PASS) -+ if (iocbq->cmd_flag & LPFC_IO_DIF_PASS) - bf_set(wqe_dif, &wqe->generic.wqe_com, LPFC_WQE_DIF_PASSTHRU); -- else if (iocbq->iocb_flag & LPFC_IO_DIF_STRIP) -+ else if (iocbq->cmd_flag & LPFC_IO_DIF_STRIP) - bf_set(wqe_dif, &wqe->generic.wqe_com, LPFC_WQE_DIF_STRIP); -- else if (iocbq->iocb_flag & LPFC_IO_DIF_INSERT) -+ else if (iocbq->cmd_flag & LPFC_IO_DIF_INSERT) - bf_set(wqe_dif, &wqe->generic.wqe_com, LPFC_WQE_DIF_INSERT); -- iocbq->iocb_flag &= ~(LPFC_IO_DIF_PASS | LPFC_IO_DIF_STRIP | -+ iocbq->cmd_flag &= ~(LPFC_IO_DIF_PASS | LPFC_IO_DIF_STRIP | - LPFC_IO_DIF_INSERT); - bf_set(wqe_xri_tag, &wqe->generic.wqe_com, xritag); - bf_set(wqe_reqtag, &wqe->generic.wqe_com, iocbq->iotag); -@@ -10925,7 +10937,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq, - * @flag: Flag indicating if this command can be put into txq. - * - * __lpfc_sli_issue_fcp_io_s3 is wrapper function to invoke lockless func to -- * send an iocb command to an HBA with SLI-4 interface spec. -+ * send an iocb command to an HBA with SLI-3 interface spec. - * - * This function takes the hbalock before invoking the lockless version. - * The function will return success after it successfully submit the wqe to -@@ -10966,7 +10978,17 @@ __lpfc_sli_issue_fcp_io_s4(struct lpfc_hba *phba, uint32_t ring_number, - int rc; - struct lpfc_io_buf *lpfc_cmd = - (struct lpfc_io_buf *)piocb->context1; -- union lpfc_wqe128 *wqe = &piocb->wqe; -+ -+ lpfc_prep_embed_io(phba, lpfc_cmd); -+ rc = lpfc_sli4_issue_wqe(phba, lpfc_cmd->hdwq, piocb); -+ return rc; -+} -+ -+void -+lpfc_prep_embed_io(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd) -+{ -+ struct lpfc_iocbq *piocb = &lpfc_cmd->cur_iocbq; -+ union lpfc_wqe128 *wqe = &lpfc_cmd->cur_iocbq.wqe; - struct sli4_sge *sgl; - - /* 128 byte wqe support here */ -@@ -11004,7 +11026,7 @@ __lpfc_sli_issue_fcp_io_s4(struct lpfc_hba *phba, uint32_t ring_number, - } - - /* add the VMID tags as per switch response */ -- if (unlikely(piocb->iocb_flag & LPFC_IO_VMID)) { -+ if (unlikely(piocb->cmd_flag & LPFC_IO_VMID)) { - if (phba->pport->vmid_priority_tagging) { - bf_set(wqe_ccpe, &wqe->fcp_iwrite.wqe_com, 1); - bf_set(wqe_ccp, &wqe->fcp_iwrite.wqe_com, -@@ -11015,8 +11037,6 @@ __lpfc_sli_issue_fcp_io_s4(struct lpfc_hba *phba, uint32_t ring_number, - wqe->words[31] = piocb->vmid_tag.app_id; - } - } -- rc = lpfc_sli4_issue_wqe(phba, lpfc_cmd->hdwq, piocb); -- return rc; - } - - /** -@@ -11038,13 +11058,14 @@ __lpfc_sli_issue_iocb_s4(struct lpfc_hba *phba, uint32_t ring_number, - struct lpfc_iocbq *piocb, uint32_t flag) - { - struct lpfc_sglq *sglq; -- union lpfc_wqe128 wqe; -+ union lpfc_wqe128 *wqe; - struct lpfc_queue *wq; - struct lpfc_sli_ring *pring; -+ u32 ulp_command = get_job_cmnd(phba, piocb); - - /* Get the WQ */ -- if ((piocb->iocb_flag & LPFC_IO_FCP) || -- (piocb->iocb_flag & LPFC_USE_FCPWQIDX)) { -+ if ((piocb->cmd_flag & LPFC_IO_FCP) || -+ (piocb->cmd_flag & LPFC_USE_FCPWQIDX)) { - wq = phba->sli4_hba.hdwq[piocb->hba_wqidx].io_wq; - } else { - wq = phba->sli4_hba.els_wq; -@@ -11058,10 +11079,9 @@ __lpfc_sli_issue_iocb_s4(struct lpfc_hba *phba, uint32_t ring_number, - */ - - lockdep_assert_held(&pring->ring_lock); -- -+ wqe = &piocb->wqe; - if (piocb->sli4_xritag == NO_XRI) { -- if (piocb->iocb.ulpCommand == CMD_ABORT_XRI_CN || -- piocb->iocb.ulpCommand == CMD_CLOSE_XRI_CN) -+ if (ulp_command == CMD_ABORT_XRI_WQE) - sglq = NULL; - else { - if (!list_empty(&pring->txq)) { -@@ -11085,7 +11105,7 @@ __lpfc_sli_issue_iocb_s4(struct lpfc_hba *phba, uint32_t ring_number, - } - } - } -- } else if (piocb->iocb_flag & LPFC_IO_FCP) { -+ } else if (piocb->cmd_flag & LPFC_IO_FCP) { - /* These IO's already have an XRI and a mapped sgl. */ - sglq = NULL; - } -@@ -11102,14 +11122,24 @@ __lpfc_sli_issue_iocb_s4(struct lpfc_hba *phba, uint32_t ring_number, - if (sglq) { - piocb->sli4_lxritag = sglq->sli4_lxritag; - piocb->sli4_xritag = sglq->sli4_xritag; -- if (NO_XRI == lpfc_sli4_bpl2sgl(phba, piocb, sglq)) -+ -+ /* ABTS sent by initiator to CT exchange, the -+ * RX_ID field will be filled with the newly -+ * allocated responder XRI. -+ */ -+ if (ulp_command == CMD_XMIT_BLS_RSP64_CX && -+ piocb->abort_bls == LPFC_ABTS_UNSOL_INT) -+ bf_set(xmit_bls_rsp64_rxid, &wqe->xmit_bls_rsp, -+ piocb->sli4_xritag); -+ -+ bf_set(wqe_xri_tag, &wqe->generic.wqe_com, -+ piocb->sli4_xritag); -+ -+ if (lpfc_wqe_bpl2sgl(phba, piocb, sglq) == NO_XRI) - return IOCB_ERROR; - } - -- if (lpfc_sli4_iocb2wqe(phba, piocb, &wqe)) -- return IOCB_ERROR; -- -- if (lpfc_sli4_wq_put(wq, &wqe)) -+ if (lpfc_sli4_wq_put(wq, wqe)) - return IOCB_ERROR; - lpfc_sli_ringtxcmpl_put(phba, pring, piocb); - -@@ -11202,14 +11232,14 @@ lpfc_sli4_calc_ring(struct lpfc_hba *phba, struct lpfc_iocbq *piocb) - { - struct lpfc_io_buf *lpfc_cmd; - -- if (piocb->iocb_flag & (LPFC_IO_FCP | LPFC_USE_FCPWQIDX)) { -+ if (piocb->cmd_flag & (LPFC_IO_FCP | LPFC_USE_FCPWQIDX)) { - if (unlikely(!phba->sli4_hba.hdwq)) - return NULL; - /* - * for abort iocb hba_wqidx should already - * be setup based on what work queue we used. - */ -- if (!(piocb->iocb_flag & LPFC_USE_FCPWQIDX)) { -+ if (!(piocb->cmd_flag & LPFC_USE_FCPWQIDX)) { - lpfc_cmd = (struct lpfc_io_buf *)piocb->context1; - piocb->hba_wqidx = lpfc_cmd->hdwq_no; - } -@@ -12351,14 +12381,14 @@ lpfc_sli_issue_abort_iotag(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, - icmd = &cmdiocb->iocb; - if (icmd->ulpCommand == CMD_ABORT_XRI_CN || - icmd->ulpCommand == CMD_CLOSE_XRI_CN || -- cmdiocb->iocb_flag & LPFC_DRIVER_ABORTED) -+ cmdiocb->cmd_flag & LPFC_DRIVER_ABORTED) - return IOCB_ABORTING; - - if (!pring) { -- if (cmdiocb->iocb_flag & LPFC_IO_FABRIC) -- cmdiocb->fabric_iocb_cmpl = lpfc_ignore_els_cmpl; -+ if (cmdiocb->cmd_flag & LPFC_IO_FABRIC) -+ cmdiocb->fabric_cmd_cmpl = lpfc_ignore_els_cmpl; - else -- cmdiocb->iocb_cmpl = lpfc_ignore_els_cmpl; -+ cmdiocb->cmd_cmpl = lpfc_ignore_els_cmpl; - return retval; - } - -@@ -12368,10 +12398,10 @@ lpfc_sli_issue_abort_iotag(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, - */ - if ((vport->load_flag & FC_UNLOADING) && - pring->ringno == LPFC_ELS_RING) { -- if (cmdiocb->iocb_flag & LPFC_IO_FABRIC) -- cmdiocb->fabric_iocb_cmpl = lpfc_ignore_els_cmpl; -+ if (cmdiocb->cmd_flag & LPFC_IO_FABRIC) -+ cmdiocb->fabric_cmd_cmpl = lpfc_ignore_els_cmpl; - else -- cmdiocb->iocb_cmpl = lpfc_ignore_els_cmpl; -+ cmdiocb->cmd_cmpl = lpfc_ignore_els_cmpl; - return retval; - } - -@@ -12383,7 +12413,7 @@ lpfc_sli_issue_abort_iotag(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, - /* This signals the response to set the correct status - * before calling the completion handler - */ -- cmdiocb->iocb_flag |= LPFC_DRIVER_ABORTED; -+ cmdiocb->cmd_flag |= LPFC_DRIVER_ABORTED; - - iabt = &abtsiocbp->iocb; - iabt->un.acxri.abortType = ABORT_TYPE_ABTS; -@@ -12404,22 +12434,22 @@ lpfc_sli_issue_abort_iotag(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, +@@ -12404,17 +12590,17 @@ lpfc_sli_issue_abort_iotag(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, /* ABTS WQE must go to the same WQ as the WQE to be aborted */ abtsiocbp->hba_wqidx = cmdiocb->hba_wqidx; @@ -255158,17 +306876,14 @@ index 026a1196a54d5..f594a006d04c6 100644 - abtsiocbp->iocb_flag |= LPFC_IO_FCP; - abtsiocbp->iocb_flag |= LPFC_USE_FCPWQIDX; - } -- if (cmdiocb->iocb_flag & LPFC_IO_FOF) -- abtsiocbp->iocb_flag |= LPFC_IO_FOF; -- ++ if (cmdiocb->iocb_flag & LPFC_IO_FCP) ++ abtsiocbp->iocb_flag |= (LPFC_IO_FCP | LPFC_USE_FCPWQIDX); + if (cmdiocb->iocb_flag & LPFC_IO_FOF) + abtsiocbp->iocb_flag |= LPFC_IO_FOF; + - if (phba->link_state >= LPFC_LINK_UP) - iabt->ulpCommand = CMD_ABORT_XRI_CN; - else -+ if (cmdiocb->cmd_flag & LPFC_IO_FCP) -+ abtsiocbp->cmd_flag |= (LPFC_IO_FCP | LPFC_USE_FCPWQIDX); -+ if (cmdiocb->cmd_flag & LPFC_IO_FOF) -+ abtsiocbp->cmd_flag |= LPFC_IO_FOF; -+ + if (phba->link_state < LPFC_LINK_UP || + (phba->sli_rev == LPFC_SLI_REV4 && + phba->sli4_hba.link_state.status == LPFC_FC_LA_TYPE_LINK_DOWN)) @@ -255177,24 +306892,8 @@ index 026a1196a54d5..f594a006d04c6 100644 + iabt->ulpCommand = CMD_ABORT_XRI_CN; if (cmpl) -- abtsiocbp->iocb_cmpl = cmpl; -+ abtsiocbp->cmd_cmpl = cmpl; - else -- abtsiocbp->iocb_cmpl = lpfc_sli_abort_els_cmpl; -+ abtsiocbp->cmd_cmpl = lpfc_sli_abort_els_cmpl; - abtsiocbp->vport = vport; - - if (phba->sli_rev == LPFC_SLI_REV4) { -@@ -12446,7 +12476,7 @@ abort_iotag_exit: - abtsiocbp->iotag, retval); - - if (retval) { -- cmdiocb->iocb_flag &= ~LPFC_DRIVER_ABORTED; -+ cmdiocb->cmd_flag &= ~LPFC_DRIVER_ABORTED; - __lpfc_sli_release_iocbq(phba, abtsiocbp); - } - -@@ -12488,15 +12518,54 @@ lpfc_sli_hba_iocb_abort(struct lpfc_hba *phba) + abtsiocbp->iocb_cmpl = cmpl; +@@ -12488,15 +12674,54 @@ lpfc_sli_hba_iocb_abort(struct lpfc_hba *phba) } /** @@ -255225,9 +306924,9 @@ index 026a1196a54d5..f594a006d04c6 100644 + * can't be premarked as driver aborted, nor be an ABORT iocb itself + */ + icmd = &iocbq->iocb; -+ if (!(iocbq->cmd_flag & LPFC_IO_FCP) || -+ !(iocbq->cmd_flag & LPFC_IO_ON_TXCMPLQ) || -+ (iocbq->cmd_flag & LPFC_DRIVER_ABORTED) || ++ if (!(iocbq->iocb_flag & LPFC_IO_FCP) || ++ !(iocbq->iocb_flag & LPFC_IO_ON_TXCMPLQ) || ++ (iocbq->iocb_flag & LPFC_DRIVER_ABORTED) || + (icmd->ulpCommand == CMD_ABORT_XRI_CN || + icmd->ulpCommand == CMD_CLOSE_XRI_CN)) + return -EINVAL; @@ -255252,7 +306951,7 @@ index 026a1196a54d5..f594a006d04c6 100644 * 0 if the filtering criteria is met for the given iocb and will return * 1 if the filtering criteria is not met. * If ctx_cmd == LPFC_CTX_LUN, the function returns 0 only if the -@@ -12515,22 +12584,8 @@ lpfc_sli_validate_fcp_iocb(struct lpfc_iocbq *iocbq, struct lpfc_vport *vport, +@@ -12515,22 +12740,8 @@ lpfc_sli_validate_fcp_iocb(struct lpfc_iocbq *iocbq, struct lpfc_vport *vport, lpfc_ctx_cmd ctx_cmd) { struct lpfc_io_buf *lpfc_cmd; @@ -255275,7 +306974,7 @@ index 026a1196a54d5..f594a006d04c6 100644 lpfc_cmd = container_of(iocbq, struct lpfc_io_buf, cur_iocbq); if (lpfc_cmd->pCmd == NULL) -@@ -12585,17 +12640,33 @@ lpfc_sli_sum_iocb(struct lpfc_vport *vport, uint16_t tgt_id, uint64_t lun_id, +@@ -12585,17 +12796,33 @@ lpfc_sli_sum_iocb(struct lpfc_vport *vport, uint16_t tgt_id, uint64_t lun_id, { struct lpfc_hba *phba = vport->phba; struct lpfc_iocbq *iocbq; @@ -255292,8 +306991,8 @@ index 026a1196a54d5..f594a006d04c6 100644 - ctx_cmd) == 0) + if (!iocbq || iocbq->vport != vport) + continue; -+ if (!(iocbq->cmd_flag & LPFC_IO_FCP) || -+ !(iocbq->cmd_flag & LPFC_IO_ON_TXCMPLQ)) ++ if (!(iocbq->iocb_flag & LPFC_IO_FCP) || ++ !(iocbq->iocb_flag & LPFC_IO_ON_TXCMPLQ)) + continue; + + /* Include counting outstanding aborts */ @@ -255313,7 +307012,7 @@ index 026a1196a54d5..f594a006d04c6 100644 return sum; } -@@ -12662,7 +12733,11 @@ lpfc_sli_abort_fcp_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, +@@ -12662,7 +12889,11 @@ lpfc_sli_abort_fcp_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, * * This function sends an abort command for every SCSI command * associated with the given virtual port pending on the ring @@ -255326,7 +307025,7 @@ index 026a1196a54d5..f594a006d04c6 100644 * When abort_cmd == LPFC_CTX_LUN, the function sends abort only to the * FCP iocbs associated with lun specified by tgt_id and lun_id * parameters -@@ -12694,6 +12769,9 @@ lpfc_sli_abort_iocb(struct lpfc_vport *vport, u16 tgt_id, u64 lun_id, +@@ -12694,6 +12925,9 @@ lpfc_sli_abort_iocb(struct lpfc_vport *vport, u16 tgt_id, u64 lun_id, for (i = 1; i <= phba->sli.last_iotag; i++) { iocbq = phba->sli.iocbq_lookup[i]; @@ -255336,7 +307035,7 @@ index 026a1196a54d5..f594a006d04c6 100644 if (lpfc_sli_validate_fcp_iocb(iocbq, vport, tgt_id, lun_id, abort_cmd) != 0) continue; -@@ -12726,7 +12804,11 @@ lpfc_sli_abort_iocb(struct lpfc_vport *vport, u16 tgt_id, u64 lun_id, +@@ -12726,7 +12960,11 @@ lpfc_sli_abort_iocb(struct lpfc_vport *vport, u16 tgt_id, u64 lun_id, * * This function sends an abort command for every SCSI command * associated with the given virtual port pending on the ring @@ -255349,7 +307048,7 @@ index 026a1196a54d5..f594a006d04c6 100644 * When taskmgmt_cmd == LPFC_CTX_LUN, the function sends abort only to the * FCP iocbs associated with lun specified by tgt_id and lun_id * parameters -@@ -12764,6 +12846,9 @@ lpfc_sli_abort_taskmgmt(struct lpfc_vport *vport, struct lpfc_sli_ring *pring, +@@ -12764,6 +13002,9 @@ lpfc_sli_abort_taskmgmt(struct lpfc_vport *vport, struct lpfc_sli_ring *pring, for (i = 1; i <= phba->sli.last_iotag; i++) { iocbq = phba->sli.iocbq_lookup[i]; @@ -255359,186 +307058,7 @@ index 026a1196a54d5..f594a006d04c6 100644 if (lpfc_sli_validate_fcp_iocb(iocbq, vport, tgt_id, lun_id, cmd) != 0) continue; -@@ -12792,8 +12877,8 @@ lpfc_sli_abort_taskmgmt(struct lpfc_vport *vport, struct lpfc_sli_ring *pring, - * If the iocbq is already being aborted, don't take a second - * action, but do count it. - */ -- if ((iocbq->iocb_flag & LPFC_DRIVER_ABORTED) || -- !(iocbq->iocb_flag & LPFC_IO_ON_TXCMPLQ)) { -+ if ((iocbq->cmd_flag & LPFC_DRIVER_ABORTED) || -+ !(iocbq->cmd_flag & LPFC_IO_ON_TXCMPLQ)) { - if (phba->sli_rev == LPFC_SLI_REV4) - spin_unlock(&pring_s4->ring_lock); - spin_unlock(&lpfc_cmd->buf_lock); -@@ -12823,10 +12908,10 @@ lpfc_sli_abort_taskmgmt(struct lpfc_vport *vport, struct lpfc_sli_ring *pring, - - /* ABTS WQE must go to the same WQ as the WQE to be aborted */ - abtsiocbq->hba_wqidx = iocbq->hba_wqidx; -- if (iocbq->iocb_flag & LPFC_IO_FCP) -- abtsiocbq->iocb_flag |= LPFC_USE_FCPWQIDX; -- if (iocbq->iocb_flag & LPFC_IO_FOF) -- abtsiocbq->iocb_flag |= LPFC_IO_FOF; -+ if (iocbq->cmd_flag & LPFC_IO_FCP) -+ abtsiocbq->cmd_flag |= LPFC_USE_FCPWQIDX; -+ if (iocbq->cmd_flag & LPFC_IO_FOF) -+ abtsiocbq->cmd_flag |= LPFC_IO_FOF; - - ndlp = lpfc_cmd->rdata->pnode; - -@@ -12837,13 +12922,13 @@ lpfc_sli_abort_taskmgmt(struct lpfc_vport *vport, struct lpfc_sli_ring *pring, - abtsiocbq->iocb.ulpCommand = CMD_CLOSE_XRI_CN; - - /* Setup callback routine and issue the command. */ -- abtsiocbq->iocb_cmpl = lpfc_sli_abort_fcp_cmpl; -+ abtsiocbq->cmd_cmpl = lpfc_sli_abort_fcp_cmpl; - - /* - * Indicate the IO is being aborted by the driver and set - * the caller's flag into the aborted IO. - */ -- iocbq->iocb_flag |= LPFC_DRIVER_ABORTED; -+ iocbq->cmd_flag |= LPFC_DRIVER_ABORTED; - - if (phba->sli_rev == LPFC_SLI_REV4) { - ret_val = __lpfc_sli_issue_iocb(phba, pring_s4->ringno, -@@ -12890,9 +12975,10 @@ lpfc_sli_wake_iocb_wait(struct lpfc_hba *phba, - wait_queue_head_t *pdone_q; - unsigned long iflags; - struct lpfc_io_buf *lpfc_cmd; -+ size_t offset = offsetof(struct lpfc_iocbq, wqe); - - spin_lock_irqsave(&phba->hbalock, iflags); -- if (cmdiocbq->iocb_flag & LPFC_IO_WAKE_TMO) { -+ if (cmdiocbq->cmd_flag & LPFC_IO_WAKE_TMO) { - - /* - * A time out has occurred for the iocb. If a time out -@@ -12901,26 +12987,27 @@ lpfc_sli_wake_iocb_wait(struct lpfc_hba *phba, - */ - - spin_unlock_irqrestore(&phba->hbalock, iflags); -- cmdiocbq->iocb_cmpl = cmdiocbq->wait_iocb_cmpl; -- cmdiocbq->wait_iocb_cmpl = NULL; -- if (cmdiocbq->iocb_cmpl) -- (cmdiocbq->iocb_cmpl)(phba, cmdiocbq, NULL); -+ cmdiocbq->cmd_cmpl = cmdiocbq->wait_cmd_cmpl; -+ cmdiocbq->wait_cmd_cmpl = NULL; -+ if (cmdiocbq->cmd_cmpl) -+ cmdiocbq->cmd_cmpl(phba, cmdiocbq, NULL); - else - lpfc_sli_release_iocbq(phba, cmdiocbq); - return; - } - -- cmdiocbq->iocb_flag |= LPFC_IO_WAKE; -+ /* Copy the contents of the local rspiocb into the caller's buffer. */ -+ cmdiocbq->cmd_flag |= LPFC_IO_WAKE; - if (cmdiocbq->context2 && rspiocbq) -- memcpy(&((struct lpfc_iocbq *)cmdiocbq->context2)->iocb, -- &rspiocbq->iocb, sizeof(IOCB_t)); -+ memcpy((char *)cmdiocbq->context2 + offset, -+ (char *)rspiocbq + offset, sizeof(*rspiocbq) - offset); - - /* Set the exchange busy flag for task management commands */ -- if ((cmdiocbq->iocb_flag & LPFC_IO_FCP) && -- !(cmdiocbq->iocb_flag & LPFC_IO_LIBDFC)) { -+ if ((cmdiocbq->cmd_flag & LPFC_IO_FCP) && -+ !(cmdiocbq->cmd_flag & LPFC_IO_LIBDFC)) { - lpfc_cmd = container_of(cmdiocbq, struct lpfc_io_buf, -- cur_iocbq); -- if (rspiocbq && (rspiocbq->iocb_flag & LPFC_EXCHANGE_BUSY)) -+ cur_iocbq); -+ if (rspiocbq && (rspiocbq->cmd_flag & LPFC_EXCHANGE_BUSY)) - lpfc_cmd->flags |= LPFC_SBUF_XBUSY; - else - lpfc_cmd->flags &= ~LPFC_SBUF_XBUSY; -@@ -12939,7 +13026,7 @@ lpfc_sli_wake_iocb_wait(struct lpfc_hba *phba, - * @piocbq: Pointer to command iocb. - * @flag: Flag to test. - * -- * This routine grabs the hbalock and then test the iocb_flag to -+ * This routine grabs the hbalock and then test the cmd_flag to - * see if the passed in flag is set. - * Returns: - * 1 if flag is set. -@@ -12953,7 +13040,7 @@ lpfc_chk_iocb_flg(struct lpfc_hba *phba, - int ret; - - spin_lock_irqsave(&phba->hbalock, iflags); -- ret = piocbq->iocb_flag & flag; -+ ret = piocbq->cmd_flag & flag; - spin_unlock_irqrestore(&phba->hbalock, iflags); - return ret; - -@@ -12968,14 +13055,14 @@ lpfc_chk_iocb_flg(struct lpfc_hba *phba, - * @timeout: Timeout in number of seconds. - * - * This function issues the iocb to firmware and waits for the -- * iocb to complete. The iocb_cmpl field of the shall be used -+ * iocb to complete. The cmd_cmpl field of the shall be used - * to handle iocbs which time out. If the field is NULL, the - * function shall free the iocbq structure. If more clean up is - * needed, the caller is expected to provide a completion function - * that will provide the needed clean up. If the iocb command is - * not completed within timeout seconds, the function will either -- * free the iocbq structure (if iocb_cmpl == NULL) or execute the -- * completion function set in the iocb_cmpl field and then return -+ * free the iocbq structure (if cmd_cmpl == NULL) or execute the -+ * completion function set in the cmd_cmpl field and then return - * a status of IOCB_TIMEDOUT. The caller should not free the iocb - * resources if this function returns IOCB_TIMEDOUT. - * The function waits for the iocb completion using an -@@ -12987,7 +13074,7 @@ lpfc_chk_iocb_flg(struct lpfc_hba *phba, - * This function assumes that the iocb completions occur while - * this function sleep. So, this function cannot be called from - * the thread which process iocb completion for this ring. -- * This function clears the iocb_flag of the iocb object before -+ * This function clears the cmd_flag of the iocb object before - * issuing the iocb and the iocb completion handler sets this - * flag and wakes this thread when the iocb completes. - * The contents of the response iocb will be copied to prspiocbq -@@ -13027,10 +13114,10 @@ lpfc_sli_issue_iocb_wait(struct lpfc_hba *phba, - piocb->context2 = prspiocbq; - } - -- piocb->wait_iocb_cmpl = piocb->iocb_cmpl; -- piocb->iocb_cmpl = lpfc_sli_wake_iocb_wait; -+ piocb->wait_cmd_cmpl = piocb->cmd_cmpl; -+ piocb->cmd_cmpl = lpfc_sli_wake_iocb_wait; - piocb->context_un.wait_queue = &done_q; -- piocb->iocb_flag &= ~(LPFC_IO_WAKE | LPFC_IO_WAKE_TMO); -+ piocb->cmd_flag &= ~(LPFC_IO_WAKE | LPFC_IO_WAKE_TMO); - - if (phba->cfg_poll & DISABLE_FCP_RING_INT) { - if (lpfc_readl(phba->HCregaddr, &creg_val)) -@@ -13048,7 +13135,7 @@ lpfc_sli_issue_iocb_wait(struct lpfc_hba *phba, - lpfc_chk_iocb_flg(phba, piocb, LPFC_IO_WAKE), - timeout_req); - spin_lock_irqsave(&phba->hbalock, iflags); -- if (!(piocb->iocb_flag & LPFC_IO_WAKE)) { -+ if (!(piocb->cmd_flag & LPFC_IO_WAKE)) { - - /* - * IOCB timed out. Inform the wake iocb wait -@@ -13056,7 +13143,7 @@ lpfc_sli_issue_iocb_wait(struct lpfc_hba *phba, - */ - - iocb_completed = false; -- piocb->iocb_flag |= LPFC_IO_WAKE_TMO; -+ piocb->cmd_flag |= LPFC_IO_WAKE_TMO; - } - spin_unlock_irqrestore(&phba->hbalock, iflags); - if (iocb_completed) { -@@ -13111,7 +13198,7 @@ lpfc_sli_issue_iocb_wait(struct lpfc_hba *phba, - piocb->context2 = NULL; - - piocb->context_un.wait_queue = NULL; -- piocb->iocb_cmpl = NULL; -+ piocb->cmd_cmpl = NULL; - return retval; - } - -@@ -13317,6 +13404,7 @@ lpfc_sli4_eratt_read(struct lpfc_hba *phba) +@@ -13317,6 +13558,7 @@ lpfc_sli4_eratt_read(struct lpfc_hba *phba) uint32_t uerr_sta_hi, uerr_sta_lo; uint32_t if_type, portsmphr; struct lpfc_register portstat_reg; @@ -255546,7 +307066,7 @@ index 026a1196a54d5..f594a006d04c6 100644 /* * For now, use the SLI4 device internal unrecoverable error -@@ -13367,7 +13455,12 @@ lpfc_sli4_eratt_read(struct lpfc_hba *phba) +@@ -13367,7 +13609,12 @@ lpfc_sli4_eratt_read(struct lpfc_hba *phba) readl(phba->sli4_hba.u.if_type2.ERR1regaddr); phba->work_status[1] = readl(phba->sli4_hba.u.if_type2.ERR2regaddr); @@ -255560,260 +307080,7 @@ index 026a1196a54d5..f594a006d04c6 100644 "2885 Port Status Event: " "port status reg 0x%x, " "port smphr reg 0x%x, " -@@ -14046,135 +14139,19 @@ void lpfc_sli4_els_xri_abort_event_proc(struct lpfc_hba *phba) - } - - /** -- * lpfc_sli4_iocb_param_transfer - Transfer pIocbOut and cmpl status to pIocbIn -- * @phba: pointer to lpfc hba data structure -- * @pIocbIn: pointer to the rspiocbq -- * @pIocbOut: pointer to the cmdiocbq -- * @wcqe: pointer to the complete wcqe -- * -- * This routine transfers the fields of a command iocbq to a response iocbq -- * by copying all the IOCB fields from command iocbq and transferring the -- * completion status information from the complete wcqe. -- **/ --static void --lpfc_sli4_iocb_param_transfer(struct lpfc_hba *phba, -- struct lpfc_iocbq *pIocbIn, -- struct lpfc_iocbq *pIocbOut, -- struct lpfc_wcqe_complete *wcqe) --{ -- int numBdes, i; -- unsigned long iflags; -- uint32_t status, max_response; -- struct lpfc_dmabuf *dmabuf; -- struct ulp_bde64 *bpl, bde; -- size_t offset = offsetof(struct lpfc_iocbq, iocb); -- -- memcpy((char *)pIocbIn + offset, (char *)pIocbOut + offset, -- sizeof(struct lpfc_iocbq) - offset); -- /* Map WCQE parameters into irspiocb parameters */ -- status = bf_get(lpfc_wcqe_c_status, wcqe); -- pIocbIn->iocb.ulpStatus = (status & LPFC_IOCB_STATUS_MASK); -- if (pIocbOut->iocb_flag & LPFC_IO_FCP) -- if (pIocbIn->iocb.ulpStatus == IOSTAT_FCP_RSP_ERROR) -- pIocbIn->iocb.un.fcpi.fcpi_parm = -- pIocbOut->iocb.un.fcpi.fcpi_parm - -- wcqe->total_data_placed; -- else -- pIocbIn->iocb.un.ulpWord[4] = wcqe->parameter; -- else { -- pIocbIn->iocb.un.ulpWord[4] = wcqe->parameter; -- switch (pIocbOut->iocb.ulpCommand) { -- case CMD_ELS_REQUEST64_CR: -- dmabuf = (struct lpfc_dmabuf *)pIocbOut->context3; -- bpl = (struct ulp_bde64 *)dmabuf->virt; -- bde.tus.w = le32_to_cpu(bpl[1].tus.w); -- max_response = bde.tus.f.bdeSize; -- break; -- case CMD_GEN_REQUEST64_CR: -- max_response = 0; -- if (!pIocbOut->context3) -- break; -- numBdes = pIocbOut->iocb.un.genreq64.bdl.bdeSize/ -- sizeof(struct ulp_bde64); -- dmabuf = (struct lpfc_dmabuf *)pIocbOut->context3; -- bpl = (struct ulp_bde64 *)dmabuf->virt; -- for (i = 0; i < numBdes; i++) { -- bde.tus.w = le32_to_cpu(bpl[i].tus.w); -- if (bde.tus.f.bdeFlags != BUFF_TYPE_BDE_64) -- max_response += bde.tus.f.bdeSize; -- } -- break; -- default: -- max_response = wcqe->total_data_placed; -- break; -- } -- if (max_response < wcqe->total_data_placed) -- pIocbIn->iocb.un.genreq64.bdl.bdeSize = max_response; -- else -- pIocbIn->iocb.un.genreq64.bdl.bdeSize = -- wcqe->total_data_placed; -- } -- -- /* Convert BG errors for completion status */ -- if (status == CQE_STATUS_DI_ERROR) { -- pIocbIn->iocb.ulpStatus = IOSTAT_LOCAL_REJECT; -- -- if (bf_get(lpfc_wcqe_c_bg_edir, wcqe)) -- pIocbIn->iocb.un.ulpWord[4] = IOERR_RX_DMA_FAILED; -- else -- pIocbIn->iocb.un.ulpWord[4] = IOERR_TX_DMA_FAILED; -- -- pIocbIn->iocb.unsli3.sli3_bg.bgstat = 0; -- if (bf_get(lpfc_wcqe_c_bg_ge, wcqe)) /* Guard Check failed */ -- pIocbIn->iocb.unsli3.sli3_bg.bgstat |= -- BGS_GUARD_ERR_MASK; -- if (bf_get(lpfc_wcqe_c_bg_ae, wcqe)) /* App Tag Check failed */ -- pIocbIn->iocb.unsli3.sli3_bg.bgstat |= -- BGS_APPTAG_ERR_MASK; -- if (bf_get(lpfc_wcqe_c_bg_re, wcqe)) /* Ref Tag Check failed */ -- pIocbIn->iocb.unsli3.sli3_bg.bgstat |= -- BGS_REFTAG_ERR_MASK; -- -- /* Check to see if there was any good data before the error */ -- if (bf_get(lpfc_wcqe_c_bg_tdpv, wcqe)) { -- pIocbIn->iocb.unsli3.sli3_bg.bgstat |= -- BGS_HI_WATER_MARK_PRESENT_MASK; -- pIocbIn->iocb.unsli3.sli3_bg.bghm = -- wcqe->total_data_placed; -- } -- -- /* -- * Set ALL the error bits to indicate we don't know what -- * type of error it is. -- */ -- if (!pIocbIn->iocb.unsli3.sli3_bg.bgstat) -- pIocbIn->iocb.unsli3.sli3_bg.bgstat |= -- (BGS_REFTAG_ERR_MASK | BGS_APPTAG_ERR_MASK | -- BGS_GUARD_ERR_MASK); -- } -- -- /* Pick up HBA exchange busy condition */ -- if (bf_get(lpfc_wcqe_c_xb, wcqe)) { -- spin_lock_irqsave(&phba->hbalock, iflags); -- pIocbIn->iocb_flag |= LPFC_EXCHANGE_BUSY; -- spin_unlock_irqrestore(&phba->hbalock, iflags); -- } --} -- --/** -- * lpfc_sli4_els_wcqe_to_rspiocbq - Get response iocbq from els wcqe -+ * lpfc_sli4_els_preprocess_rspiocbq - Get response iocbq from els wcqe - * @phba: Pointer to HBA context object. - * @irspiocbq: Pointer to work-queue completion queue entry. - * - * This routine handles an ELS work-queue completion event and construct -- * a pseudo response ELS IODBQ from the SLI4 ELS WCQE for the common -+ * a pseudo response ELS IOCBQ from the SLI4 ELS WCQE for the common - * discovery engine to handle. - * - * Return: Pointer to the receive IOCBQ, NULL otherwise. - **/ - static struct lpfc_iocbq * --lpfc_sli4_els_wcqe_to_rspiocbq(struct lpfc_hba *phba, -- struct lpfc_iocbq *irspiocbq) -+lpfc_sli4_els_preprocess_rspiocbq(struct lpfc_hba *phba, -+ struct lpfc_iocbq *irspiocbq) - { - struct lpfc_sli_ring *pring; - struct lpfc_iocbq *cmdiocbq; -@@ -14186,11 +14163,13 @@ lpfc_sli4_els_wcqe_to_rspiocbq(struct lpfc_hba *phba, - return NULL; - - wcqe = &irspiocbq->cq_event.cqe.wcqe_cmpl; -+ spin_lock_irqsave(&pring->ring_lock, iflags); - pring->stats.iocb_event++; - /* Look up the ELS command IOCB and create pseudo response IOCB */ - cmdiocbq = lpfc_sli_iocbq_lookup_by_tag(phba, pring, - bf_get(lpfc_wcqe_c_request_tag, wcqe)); - if (unlikely(!cmdiocbq)) { -+ spin_unlock_irqrestore(&pring->ring_lock, iflags); - lpfc_printf_log(phba, KERN_WARNING, LOG_SLI, - "0386 ELS complete with no corresponding " - "cmdiocb: 0x%x 0x%x 0x%x 0x%x\n", -@@ -14200,13 +14179,18 @@ lpfc_sli4_els_wcqe_to_rspiocbq(struct lpfc_hba *phba, - return NULL; - } - -- spin_lock_irqsave(&pring->ring_lock, iflags); -+ memcpy(&irspiocbq->wqe, &cmdiocbq->wqe, sizeof(union lpfc_wqe128)); -+ memcpy(&irspiocbq->wcqe_cmpl, wcqe, sizeof(*wcqe)); -+ - /* Put the iocb back on the txcmplq */ - lpfc_sli_ringtxcmpl_put(phba, pring, cmdiocbq); - spin_unlock_irqrestore(&pring->ring_lock, iflags); - -- /* Fake the irspiocbq and copy necessary response information */ -- lpfc_sli4_iocb_param_transfer(phba, irspiocbq, cmdiocbq, wcqe); -+ if (bf_get(lpfc_wcqe_c_xb, wcqe)) { -+ spin_lock_irqsave(&phba->hbalock, iflags); -+ irspiocbq->cmd_flag |= LPFC_EXCHANGE_BUSY; -+ spin_unlock_irqrestore(&phba->hbalock, iflags); -+ } - - return irspiocbq; - } -@@ -15007,7 +14991,6 @@ lpfc_sli4_fp_handle_fcp_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq, - { - struct lpfc_sli_ring *pring = cq->pring; - struct lpfc_iocbq *cmdiocbq; -- struct lpfc_iocbq irspiocbq; - unsigned long iflags; - - /* Check for response status */ -@@ -15033,9 +15016,9 @@ lpfc_sli4_fp_handle_fcp_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq, - /* Look up the FCP command IOCB and create pseudo response IOCB */ - spin_lock_irqsave(&pring->ring_lock, iflags); - pring->stats.iocb_event++; -- spin_unlock_irqrestore(&pring->ring_lock, iflags); - cmdiocbq = lpfc_sli_iocbq_lookup_by_tag(phba, pring, - bf_get(lpfc_wcqe_c_request_tag, wcqe)); -+ spin_unlock_irqrestore(&pring->ring_lock, iflags); - if (unlikely(!cmdiocbq)) { - lpfc_printf_log(phba, KERN_WARNING, LOG_SLI, - "0374 FCP complete with no corresponding " -@@ -15046,39 +15029,31 @@ lpfc_sli4_fp_handle_fcp_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq, - #ifdef CONFIG_SCSI_LPFC_DEBUG_FS - cmdiocbq->isr_timestamp = cq->isr_timestamp; - #endif -- if (cmdiocbq->iocb_cmpl == NULL) { -- if (cmdiocbq->wqe_cmpl) { -- /* For FCP the flag is cleared in wqe_cmpl */ -- if (!(cmdiocbq->iocb_flag & LPFC_IO_FCP) && -- cmdiocbq->iocb_flag & LPFC_DRIVER_ABORTED) { -- spin_lock_irqsave(&phba->hbalock, iflags); -- cmdiocbq->iocb_flag &= ~LPFC_DRIVER_ABORTED; -- spin_unlock_irqrestore(&phba->hbalock, iflags); -- } -+ if (bf_get(lpfc_wcqe_c_xb, wcqe)) { -+ spin_lock_irqsave(&phba->hbalock, iflags); -+ cmdiocbq->cmd_flag |= LPFC_EXCHANGE_BUSY; -+ spin_unlock_irqrestore(&phba->hbalock, iflags); -+ } - -- /* Pass the cmd_iocb and the wcqe to the upper layer */ -- (cmdiocbq->wqe_cmpl)(phba, cmdiocbq, wcqe); -- return; -+ if (cmdiocbq->cmd_cmpl) { -+ /* For FCP the flag is cleared in cmd_cmpl */ -+ if (!(cmdiocbq->cmd_flag & LPFC_IO_FCP) && -+ cmdiocbq->cmd_flag & LPFC_DRIVER_ABORTED) { -+ spin_lock_irqsave(&phba->hbalock, iflags); -+ cmdiocbq->cmd_flag &= ~LPFC_DRIVER_ABORTED; -+ spin_unlock_irqrestore(&phba->hbalock, iflags); - } -+ -+ /* Pass the cmd_iocb and the wcqe to the upper layer */ -+ memcpy(&cmdiocbq->wcqe_cmpl, wcqe, -+ sizeof(struct lpfc_wcqe_complete)); -+ cmdiocbq->cmd_cmpl(phba, cmdiocbq, cmdiocbq); -+ } else { - lpfc_printf_log(phba, KERN_WARNING, LOG_SLI, - "0375 FCP cmdiocb not callback function " - "iotag: (%d)\n", - bf_get(lpfc_wcqe_c_request_tag, wcqe)); -- return; -- } -- -- /* Only SLI4 non-IO commands stil use IOCB */ -- /* Fake the irspiocb and copy necessary response information */ -- lpfc_sli4_iocb_param_transfer(phba, &irspiocbq, cmdiocbq, wcqe); -- -- if (cmdiocbq->iocb_flag & LPFC_DRIVER_ABORTED) { -- spin_lock_irqsave(&phba->hbalock, iflags); -- cmdiocbq->iocb_flag &= ~LPFC_DRIVER_ABORTED; -- spin_unlock_irqrestore(&phba->hbalock, iflags); - } -- -- /* Pass the cmd_iocb and the rsp state to the upper layer */ -- (cmdiocbq->iocb_cmpl)(phba, cmdiocbq, &irspiocbq); - } - - /** -@@ -18389,7 +18364,6 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr) +@@ -18389,7 +18636,6 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr) case FC_RCTL_ELS_REP: /* extended link services reply */ case FC_RCTL_ELS4_REQ: /* FC-4 ELS request */ case FC_RCTL_ELS4_REP: /* FC-4 ELS reply */ @@ -255821,7 +307088,7 @@ index 026a1196a54d5..f594a006d04c6 100644 case FC_RCTL_BA_ABTS: /* basic link service abort */ case FC_RCTL_BA_RMC: /* remove connection */ case FC_RCTL_BA_ACC: /* basic accept */ -@@ -18410,6 +18384,7 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr) +@@ -18410,6 +18656,7 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr) fc_vft_hdr = (struct fc_vft_header *)fc_hdr; fc_hdr = &((struct fc_frame_header *)fc_vft_hdr)[1]; return lpfc_fc_frame_check(phba, fc_hdr); @@ -255829,31 +307096,7 @@ index 026a1196a54d5..f594a006d04c6 100644 default: goto drop; } -@@ -18900,17 +18875,20 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_vport *vport, - } - - ctiocb->vport = phba->pport; -- ctiocb->iocb_cmpl = lpfc_sli4_seq_abort_rsp_cmpl; -+ ctiocb->cmd_cmpl = lpfc_sli4_seq_abort_rsp_cmpl; - ctiocb->sli4_lxritag = NO_XRI; - ctiocb->sli4_xritag = NO_XRI; - -- if (fctl & FC_FC_EX_CTX) -+ if (fctl & FC_FC_EX_CTX) { - /* Exchange responder sent the abort so we - * own the oxid. - */ -+ ctiocb->abort_bls = LPFC_ABTS_UNSOL_RSP; - xri = oxid; -- else -+ } else { -+ ctiocb->abort_bls = LPFC_ABTS_UNSOL_INT; - xri = rxid; -+ } - lxri = lpfc_sli4_xri_inrange(phba, xri); - if (lxri != NO_XRI) - lpfc_set_rrq_active(phba, ndlp, lxri, -@@ -19222,16 +19200,18 @@ lpfc_sli4_send_seq_to_ulp(struct lpfc_vport *vport, +@@ -19222,12 +19469,14 @@ lpfc_sli4_send_seq_to_ulp(struct lpfc_vport *vport, if (!lpfc_complete_unsol_iocb(phba, phba->sli4_hba.els_wq->pring, iocbq, fc_hdr->fh_r_ctl, @@ -255869,32 +307112,7 @@ index 026a1196a54d5..f594a006d04c6 100644 /* Free iocb created in lpfc_prep_seq */ list_for_each_entry_safe(curr_iocb, next_iocb, -- &iocbq->list, list) { -+ &iocbq->list, list) { - list_del_init(&curr_iocb->list); - lpfc_sli_release_iocbq(phba, curr_iocb); - } -@@ -19301,8 +19281,8 @@ lpfc_sli4_handle_mds_loopback(struct lpfc_vport *vport, - - iocbq->context2 = pcmd; - iocbq->vport = vport; -- iocbq->iocb_flag &= ~LPFC_FIP_ELS_ID_MASK; -- iocbq->iocb_flag |= LPFC_USE_FCPWQIDX; -+ iocbq->cmd_flag &= ~LPFC_FIP_ELS_ID_MASK; -+ iocbq->cmd_flag |= LPFC_USE_FCPWQIDX; - - /* - * Setup rest of the iocb as though it were a WQE -@@ -19320,7 +19300,7 @@ lpfc_sli4_handle_mds_loopback(struct lpfc_vport *vport, - - iocbq->iocb.ulpCommand = CMD_SEND_FRAME; - iocbq->iocb.ulpLe = 1; -- iocbq->iocb_cmpl = lpfc_sli4_mds_loopback_cmpl; -+ iocbq->cmd_cmpl = lpfc_sli4_mds_loopback_cmpl; - rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, iocbq, 0); - if (rc == IOCB_ERROR) - goto exit; -@@ -21107,6 +21087,7 @@ lpfc_drain_txq(struct lpfc_hba *phba) +@@ -21107,6 +21356,7 @@ lpfc_drain_txq(struct lpfc_hba *phba) fail_msg, piocbq->iotag, piocbq->sli4_xritag); list_add_tail(&piocbq->list, &completions); @@ -255902,95 +307120,7 @@ index 026a1196a54d5..f594a006d04c6 100644 } spin_unlock_irqrestore(&pring->ring_lock, iflags); } -@@ -21161,7 +21142,7 @@ lpfc_wqe_bpl2sgl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeq, - cmd = bf_get(wqe_cmnd, &wqe->generic.wqe_com); - if (cmd == CMD_XMIT_BLS_RSP64_WQE) - return sglq->sli4_xritag; -- numBdes = pwqeq->rsvd2; -+ numBdes = pwqeq->num_bdes; - if (numBdes) { - /* The addrHigh and addrLow fields within the WQE - * have not been byteswapped yet so there is no -@@ -21262,7 +21243,7 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp, - uint32_t ret = 0; - - /* NVME_LS and NVME_LS ABTS requests. */ -- if (pwqe->iocb_flag & LPFC_IO_NVME_LS) { -+ if (pwqe->cmd_flag & LPFC_IO_NVME_LS) { - pring = phba->sli4_hba.nvmels_wq->pring; - lpfc_qp_spin_lock_irqsave(&pring->ring_lock, iflags, - qp, wq_access); -@@ -21293,7 +21274,7 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp, - } - - /* NVME_FCREQ and NVME_ABTS requests */ -- if (pwqe->iocb_flag & (LPFC_IO_NVME | LPFC_IO_FCP | LPFC_IO_CMF)) { -+ if (pwqe->cmd_flag & (LPFC_IO_NVME | LPFC_IO_FCP | LPFC_IO_CMF)) { - /* Get the IO distribution (hba_wqidx) for WQ assignment. */ - wq = qp->io_wq; - pring = wq->pring; -@@ -21315,7 +21296,7 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp, - } - - /* NVMET requests */ -- if (pwqe->iocb_flag & LPFC_IO_NVMET) { -+ if (pwqe->cmd_flag & LPFC_IO_NVMET) { - /* Get the IO distribution (hba_wqidx) for WQ assignment. */ - wq = qp->io_wq; - pring = wq->pring; -@@ -21381,7 +21362,7 @@ lpfc_sli4_issue_abort_iotag(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, - return WQE_NORESOURCE; - - /* Indicate the IO is being aborted by the driver. */ -- cmdiocb->iocb_flag |= LPFC_DRIVER_ABORTED; -+ cmdiocb->cmd_flag |= LPFC_DRIVER_ABORTED; - - abtswqe = &abtsiocb->wqe; - memset(abtswqe, 0, sizeof(*abtswqe)); -@@ -21400,15 +21381,15 @@ lpfc_sli4_issue_abort_iotag(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, - - /* ABTS WQE must go to the same WQ as the WQE to be aborted */ - abtsiocb->hba_wqidx = cmdiocb->hba_wqidx; -- abtsiocb->iocb_flag |= LPFC_USE_FCPWQIDX; -- if (cmdiocb->iocb_flag & LPFC_IO_FCP) -- abtsiocb->iocb_flag |= LPFC_IO_FCP; -- if (cmdiocb->iocb_flag & LPFC_IO_NVME) -- abtsiocb->iocb_flag |= LPFC_IO_NVME; -- if (cmdiocb->iocb_flag & LPFC_IO_FOF) -- abtsiocb->iocb_flag |= LPFC_IO_FOF; -+ abtsiocb->cmd_flag |= LPFC_USE_FCPWQIDX; -+ if (cmdiocb->cmd_flag & LPFC_IO_FCP) -+ abtsiocb->cmd_flag |= LPFC_IO_FCP; -+ if (cmdiocb->cmd_flag & LPFC_IO_NVME) -+ abtsiocb->cmd_flag |= LPFC_IO_NVME; -+ if (cmdiocb->cmd_flag & LPFC_IO_FOF) -+ abtsiocb->cmd_flag |= LPFC_IO_FOF; - abtsiocb->vport = vport; -- abtsiocb->wqe_cmpl = cmpl; -+ abtsiocb->cmd_cmpl = cmpl; - - lpfc_cmd = container_of(cmdiocb, struct lpfc_io_buf, cur_iocbq); - retval = lpfc_sli4_issue_wqe(phba, lpfc_cmd->hdwq, abtsiocb); -@@ -21419,7 +21400,7 @@ lpfc_sli4_issue_abort_iotag(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, - xritag, cmdiocb->iotag, abtsiocb->iotag, retval); - - if (retval) { -- cmdiocb->iocb_flag &= ~LPFC_DRIVER_ABORTED; -+ cmdiocb->cmd_flag &= ~LPFC_DRIVER_ABORTED; - __lpfc_sli_release_iocbq(phba, abtsiocb); - } - -@@ -21781,8 +21762,7 @@ void lpfc_release_io_buf(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_ncmd, - - /* MUST zero fields if buffer is reused by another protocol */ - lpfc_ncmd->nvmeCmd = NULL; -- lpfc_ncmd->cur_iocbq.wqe_cmpl = NULL; -- lpfc_ncmd->cur_iocbq.iocb_cmpl = NULL; -+ lpfc_ncmd->cur_iocbq.cmd_cmpl = NULL; - - if (phba->cfg_xpsgl && !phba->nvmet_support && - !list_empty(&lpfc_ncmd->dma_sgl_xtra_list)) -@@ -21966,8 +21946,26 @@ lpfc_get_io_buf_from_multixri_pools(struct lpfc_hba *phba, +@@ -21966,8 +22216,26 @@ lpfc_get_io_buf_from_multixri_pools(struct lpfc_hba *phba, qp = &phba->sli4_hba.hdwq[hwqid]; lpfc_ncmd = NULL; @@ -256017,7 +307147,7 @@ index 026a1196a54d5..f594a006d04c6 100644 multixri_pool->io_req_count++; /* If pvt_pool is empty, move some XRIs from public to private pool */ -@@ -22043,6 +22041,12 @@ struct lpfc_io_buf *lpfc_get_io_buf(struct lpfc_hba *phba, +@@ -22043,6 +22311,12 @@ struct lpfc_io_buf *lpfc_get_io_buf(struct lpfc_hba *phba, qp = &phba->sli4_hba.hdwq[hwqid]; lpfc_cmd = NULL; @@ -256030,64 +307160,6 @@ index 026a1196a54d5..f594a006d04c6 100644 if (phba->cfg_xri_rebalancing) lpfc_cmd = lpfc_get_io_buf_from_multixri_pools( -diff --git a/drivers/scsi/lpfc/lpfc_sli.h b/drivers/scsi/lpfc/lpfc_sli.h -index 5161ccacea3e9..06682ad8bbe15 100644 ---- a/drivers/scsi/lpfc/lpfc_sli.h -+++ b/drivers/scsi/lpfc/lpfc_sli.h -@@ -35,7 +35,7 @@ typedef enum _lpfc_ctx_cmd { - LPFC_CTX_HOST - } lpfc_ctx_cmd; - --union lpfc_vmid_iocb_tag { -+union lpfc_vmid_tag { - uint32_t app_id; - uint8_t cs_ctl_vmid; - struct lpfc_vmid_context *vmid_context; /* UVEM context information */ -@@ -69,16 +69,18 @@ struct lpfc_iocbq { - uint16_t sli4_xritag; /* pre-assigned XRI, (OXID) tag. */ - uint16_t hba_wqidx; /* index to HBA work queue */ - struct lpfc_cq_event cq_event; -- struct lpfc_wcqe_complete wcqe_cmpl; /* WQE cmpl */ - uint64_t isr_timestamp; - - union lpfc_wqe128 wqe; /* SLI-4 */ - IOCB_t iocb; /* SLI-3 */ -+ struct lpfc_wcqe_complete wcqe_cmpl; /* WQE cmpl */ -+ -+ uint8_t num_bdes; -+ uint8_t abort_bls; /* ABTS by initiator or responder */ - -- uint8_t rsvd2; - uint8_t priority; /* OAS priority */ - uint8_t retry; /* retry counter for IOCB cmd - if needed */ -- uint32_t iocb_flag; -+ u32 cmd_flag; - #define LPFC_IO_LIBDFC 1 /* libdfc iocb */ - #define LPFC_IO_WAKE 2 /* Synchronous I/O completed */ - #define LPFC_IO_WAKE_TMO LPFC_IO_WAKE /* Synchronous I/O timed out */ -@@ -123,15 +125,13 @@ struct lpfc_iocbq { - struct lpfc_node_rrq *rrq; - } context_un; - -- union lpfc_vmid_iocb_tag vmid_tag; -- void (*fabric_iocb_cmpl)(struct lpfc_hba *, struct lpfc_iocbq *, -- struct lpfc_iocbq *); -- void (*wait_iocb_cmpl)(struct lpfc_hba *, struct lpfc_iocbq *, -- struct lpfc_iocbq *); -- void (*iocb_cmpl)(struct lpfc_hba *, struct lpfc_iocbq *, -- struct lpfc_iocbq *); -- void (*wqe_cmpl)(struct lpfc_hba *, struct lpfc_iocbq *, -- struct lpfc_wcqe_complete *); -+ union lpfc_vmid_tag vmid_tag; -+ void (*fabric_cmd_cmpl)(struct lpfc_hba *phba, struct lpfc_iocbq *cmd, -+ struct lpfc_iocbq *rsp); -+ void (*wait_cmd_cmpl)(struct lpfc_hba *phba, struct lpfc_iocbq *cmd, -+ struct lpfc_iocbq *rsp); -+ void (*cmd_cmpl)(struct lpfc_hba *phba, struct lpfc_iocbq *cmd, -+ struct lpfc_iocbq *rsp); - }; - - #define SLI_IOCB_RET_IOCB 1 /* Return IOCB if cmd ring full */ diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index 99c5d1e4da5ef..5962cf508842f 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h @@ -256201,6 +307273,16 @@ index 26d0cf9353dd6..056837849ead5 100644 return -ENOMEM; } } +diff --git a/drivers/scsi/mpi3mr/Makefile b/drivers/scsi/mpi3mr/Makefile +index 7c2063e04c818..7ebca0ba538da 100644 +--- a/drivers/scsi/mpi3mr/Makefile ++++ b/drivers/scsi/mpi3mr/Makefile +@@ -1,4 +1,4 @@ + # mpi3mr makefile +-obj-m += mpi3mr.o ++obj-$(CONFIG_SCSI_MPI3MR) += mpi3mr.o + mpi3mr-y += mpi3mr_os.o \ + mpi3mr_fw.o \ diff --git a/drivers/scsi/mpi3mr/mpi3mr.h b/drivers/scsi/mpi3mr/mpi3mr.h index 9787b53a2b598..2cc42432bd0c0 100644 --- a/drivers/scsi/mpi3mr/mpi3mr.h @@ -256255,7 +307337,7 @@ index 3cae8803383b6..b2c650542bac5 100644 xfer_count == 0 && (scsi_status == MPI3_SCSI_STATUS_BUSY || scsi_status == MPI3_SCSI_STATUS_RESERVATION_CONFLICT || diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c -index 27eb652b564f5..be024b2b6bd43 100644 +index 27eb652b564f5..766c3a59a900a 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -639,8 +639,8 @@ static void _base_sync_drv_fw_timestamp(struct MPT3SAS_ADAPTER *ioc) @@ -256282,7 +307364,49 @@ index 27eb652b564f5..be024b2b6bd43 100644 } /** -@@ -3004,7 +3005,7 @@ _base_config_dma_addressing(struct MPT3SAS_ADAPTER *ioc, struct pci_dev *pdev) +@@ -2593,12 +2594,8 @@ _base_check_pcie_native_sgl(struct MPT3SAS_ADAPTER *ioc, + + /* Get the SG list pointer and info. */ + sges_left = scsi_dma_map(scmd); +- if (sges_left < 0) { +- sdev_printk(KERN_ERR, scmd->device, +- "scsi_dma_map failed: request for %d bytes!\n", +- scsi_bufflen(scmd)); ++ if (sges_left < 0) + return 1; +- } + + /* Check if we need to build a native SG list. */ + if (!base_is_prp_possible(ioc, pcie_device, +@@ -2705,12 +2702,8 @@ _base_build_sg_scmd(struct MPT3SAS_ADAPTER *ioc, + + sg_scmd = scsi_sglist(scmd); + sges_left = scsi_dma_map(scmd); +- if (sges_left < 0) { +- sdev_printk(KERN_ERR, scmd->device, +- "scsi_dma_map failed: request for %d bytes!\n", +- scsi_bufflen(scmd)); ++ if (sges_left < 0) + return -ENOMEM; +- } + + sg_local = &mpi_request->SGL; + sges_in_segment = ioc->max_sges_in_main_message; +@@ -2853,12 +2846,8 @@ _base_build_sg_scmd_ieee(struct MPT3SAS_ADAPTER *ioc, + + sg_scmd = scsi_sglist(scmd); + sges_left = scsi_dma_map(scmd); +- if (sges_left < 0) { +- sdev_printk(KERN_ERR, scmd->device, +- "scsi_dma_map failed: request for %d bytes!\n", +- scsi_bufflen(scmd)); ++ if (sges_left < 0) + return -ENOMEM; +- } + + sg_local = &mpi_request->SGL; + sges_in_segment = (ioc->request_sz - +@@ -3004,7 +2993,7 @@ _base_config_dma_addressing(struct MPT3SAS_ADAPTER *ioc, struct pci_dev *pdev) if (ioc->is_mcpu_endpoint || sizeof(dma_addr_t) == 4 || ioc->use_32bit_dma || @@ -256291,7 +307415,7 @@ index 27eb652b564f5..be024b2b6bd43 100644 ioc->dma_mask = 32; /* Set 63 bit DMA mask for all SAS3 and SAS35 controllers */ else if (ioc->hba_mpi_version_belonged > MPI2_VERSION) -@@ -5380,6 +5381,7 @@ static int _base_assign_fw_reported_qd(struct MPT3SAS_ADAPTER *ioc) +@@ -5380,6 +5369,7 @@ static int _base_assign_fw_reported_qd(struct MPT3SAS_ADAPTER *ioc) Mpi2ConfigReply_t mpi_reply; Mpi2SasIOUnitPage1_t *sas_iounit_pg1 = NULL; Mpi26PCIeIOUnitPage1_t pcie_iounit_pg1; @@ -256299,7 +307423,7 @@ index 27eb652b564f5..be024b2b6bd43 100644 int sz; int rc = 0; -@@ -5391,7 +5393,7 @@ static int _base_assign_fw_reported_qd(struct MPT3SAS_ADAPTER *ioc) +@@ -5391,7 +5381,7 @@ static int _base_assign_fw_reported_qd(struct MPT3SAS_ADAPTER *ioc) goto out; /* sas iounit page 1 */ sz = offsetof(Mpi2SasIOUnitPage1_t, PhyData); @@ -256308,7 +307432,7 @@ index 27eb652b564f5..be024b2b6bd43 100644 if (!sas_iounit_pg1) { pr_err("%s: failure at %s:%d/%s()!\n", ioc->name, __FILE__, __LINE__, __func__); -@@ -5404,16 +5406,16 @@ static int _base_assign_fw_reported_qd(struct MPT3SAS_ADAPTER *ioc) +@@ -5404,16 +5394,16 @@ static int _base_assign_fw_reported_qd(struct MPT3SAS_ADAPTER *ioc) ioc->name, __FILE__, __LINE__, __func__); goto out; } @@ -256335,7 +307459,7 @@ index 27eb652b564f5..be024b2b6bd43 100644 /* pcie iounit page 1 */ rc = mpt3sas_config_get_pcie_iounit_pg1(ioc, &mpi_reply, &pcie_iounit_pg1, sizeof(Mpi26PCIeIOUnitPage1_t)); -@@ -5736,14 +5738,13 @@ _base_release_memory_pools(struct MPT3SAS_ADAPTER *ioc) +@@ -5736,14 +5726,13 @@ _base_release_memory_pools(struct MPT3SAS_ADAPTER *ioc) */ static int @@ -256354,7 +307478,7 @@ index 27eb652b564f5..be024b2b6bd43 100644 return 1; else return 0; -@@ -5804,7 +5805,7 @@ _base_allocate_pcie_sgl_pool(struct MPT3SAS_ADAPTER *ioc, u32 sz) +@@ -5804,7 +5793,7 @@ _base_allocate_pcie_sgl_pool(struct MPT3SAS_ADAPTER *ioc, u32 sz) } if (!mpt3sas_check_same_4gb_region( @@ -256363,7 +307487,7 @@ index 27eb652b564f5..be024b2b6bd43 100644 ioc_err(ioc, "PCIE SGLs are not in same 4G !! pcie sgl (0x%p) dma = (0x%llx)\n", ioc->pcie_sg_lookup[i].pcie_sgl, (unsigned long long) -@@ -5859,8 +5860,8 @@ _base_allocate_chain_dma_pool(struct MPT3SAS_ADAPTER *ioc, u32 sz) +@@ -5859,8 +5848,8 @@ _base_allocate_chain_dma_pool(struct MPT3SAS_ADAPTER *ioc, u32 sz) GFP_KERNEL, &ctr->chain_buffer_dma); if (!ctr->chain_buffer) return -EAGAIN; @@ -256374,7 +307498,7 @@ index 27eb652b564f5..be024b2b6bd43 100644 ioc_err(ioc, "Chain buffers are not in same 4G !!! Chain buff (0x%p) dma = (0x%llx)\n", ctr->chain_buffer, -@@ -5896,7 +5897,7 @@ _base_allocate_sense_dma_pool(struct MPT3SAS_ADAPTER *ioc, u32 sz) +@@ -5896,7 +5885,7 @@ _base_allocate_sense_dma_pool(struct MPT3SAS_ADAPTER *ioc, u32 sz) GFP_KERNEL, &ioc->sense_dma); if (!ioc->sense) return -EAGAIN; @@ -256383,7 +307507,7 @@ index 27eb652b564f5..be024b2b6bd43 100644 dinitprintk(ioc, pr_err( "Bad Sense Pool! sense (0x%p) sense_dma = (0x%llx)\n", ioc->sense, (unsigned long long) ioc->sense_dma)); -@@ -5929,7 +5930,7 @@ _base_allocate_reply_pool(struct MPT3SAS_ADAPTER *ioc, u32 sz) +@@ -5929,7 +5918,7 @@ _base_allocate_reply_pool(struct MPT3SAS_ADAPTER *ioc, u32 sz) &ioc->reply_dma); if (!ioc->reply) return -EAGAIN; @@ -256392,7 +307516,7 @@ index 27eb652b564f5..be024b2b6bd43 100644 dinitprintk(ioc, pr_err( "Bad Reply Pool! Reply (0x%p) Reply dma = (0x%llx)\n", ioc->reply, (unsigned long long) ioc->reply_dma)); -@@ -5964,7 +5965,7 @@ _base_allocate_reply_free_dma_pool(struct MPT3SAS_ADAPTER *ioc, u32 sz) +@@ -5964,7 +5953,7 @@ _base_allocate_reply_free_dma_pool(struct MPT3SAS_ADAPTER *ioc, u32 sz) GFP_KERNEL, &ioc->reply_free_dma); if (!ioc->reply_free) return -EAGAIN; @@ -256401,7 +307525,7 @@ index 27eb652b564f5..be024b2b6bd43 100644 dinitprintk(ioc, pr_err("Bad Reply Free Pool! Reply Free (0x%p) Reply Free dma = (0x%llx)\n", ioc->reply_free, (unsigned long long) ioc->reply_free_dma)); -@@ -6003,7 +6004,7 @@ _base_allocate_reply_post_free_array(struct MPT3SAS_ADAPTER *ioc, +@@ -6003,7 +5992,7 @@ _base_allocate_reply_post_free_array(struct MPT3SAS_ADAPTER *ioc, GFP_KERNEL, &ioc->reply_post_free_array_dma); if (!ioc->reply_post_free_array) return -EAGAIN; @@ -256410,7 +307534,7 @@ index 27eb652b564f5..be024b2b6bd43 100644 reply_post_free_array_sz)) { dinitprintk(ioc, pr_err( "Bad Reply Free Pool! Reply Free (0x%p) Reply Free dma = (0x%llx)\n", -@@ -6068,7 +6069,7 @@ base_alloc_rdpq_dma_pool(struct MPT3SAS_ADAPTER *ioc, int sz) +@@ -6068,7 +6057,7 @@ base_alloc_rdpq_dma_pool(struct MPT3SAS_ADAPTER *ioc, int sz) * resources and set DMA mask to 32 and allocate. */ if (!mpt3sas_check_same_4gb_region( @@ -256622,6 +307746,19 @@ index ad1b6c2b37a74..9eb3d0b4891dd 100644 ioc->shost_recovery = 1; mpt3sas_base_make_ioc_ready(ioc, SOFT_RESET); ioc->shost_recovery = 0; +diff --git a/drivers/scsi/mpt3sas/mpt3sas_transport.c b/drivers/scsi/mpt3sas/mpt3sas_transport.c +index 0681daee6c149..e5ecd6ada6cdd 100644 +--- a/drivers/scsi/mpt3sas/mpt3sas_transport.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_transport.c +@@ -829,6 +829,8 @@ mpt3sas_transport_port_add(struct MPT3SAS_ADAPTER *ioc, u16 handle, + if ((sas_rphy_add(rphy))) { + ioc_err(ioc, "failure at %s:%d/%s()!\n", + __FILE__, __LINE__, __func__); ++ sas_rphy_free(rphy); ++ rphy = NULL; + } + + if (mpt3sas_port->remote_identify.device_type == SAS_END_DEVICE) { diff --git a/drivers/scsi/mvsas/mv_init.c b/drivers/scsi/mvsas/mv_init.c index f18dd97035955..f6f8ca3c8c7f5 100644 --- a/drivers/scsi/mvsas/mv_init.c @@ -258076,7 +309213,7 @@ index e6dc0b495a829..a117d11f2b078 100644 qedi_ops->stop(qedi->cdev); stop_slowpath: diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c -index d09776b77af2e..a302ed8b610fb 100644 +index d09776b77af2e..b6a427f0570e5 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -555,7 +555,7 @@ qla2x00_sysfs_read_vpd(struct file *filp, struct kobject *kobj, @@ -258208,6 +309345,52 @@ index d09776b77af2e..a302ed8b610fb 100644 qla2x00_port_logout(fcport->vha, fcport); } } +@@ -3286,11 +3318,34 @@ struct fc_function_template qla2xxx_transport_vport_functions = { + .bsg_timeout = qla24xx_bsg_timeout, + }; + ++static uint ++qla2x00_get_host_supported_speeds(scsi_qla_host_t *vha, uint speeds) ++{ ++ uint supported_speeds = FC_PORTSPEED_UNKNOWN; ++ ++ if (speeds & FDMI_PORT_SPEED_64GB) ++ supported_speeds |= FC_PORTSPEED_64GBIT; ++ if (speeds & FDMI_PORT_SPEED_32GB) ++ supported_speeds |= FC_PORTSPEED_32GBIT; ++ if (speeds & FDMI_PORT_SPEED_16GB) ++ supported_speeds |= FC_PORTSPEED_16GBIT; ++ if (speeds & FDMI_PORT_SPEED_8GB) ++ supported_speeds |= FC_PORTSPEED_8GBIT; ++ if (speeds & FDMI_PORT_SPEED_4GB) ++ supported_speeds |= FC_PORTSPEED_4GBIT; ++ if (speeds & FDMI_PORT_SPEED_2GB) ++ supported_speeds |= FC_PORTSPEED_2GBIT; ++ if (speeds & FDMI_PORT_SPEED_1GB) ++ supported_speeds |= FC_PORTSPEED_1GBIT; ++ ++ return supported_speeds; ++} ++ + void + qla2x00_init_host_attr(scsi_qla_host_t *vha) + { + struct qla_hw_data *ha = vha->hw; +- u32 speeds = FC_PORTSPEED_UNKNOWN; ++ u32 speeds = 0, fdmi_speed = 0; + + fc_host_dev_loss_tmo(vha->host) = ha->port_down_retry_count; + fc_host_node_name(vha->host) = wwn_to_u64(vha->node_name); +@@ -3300,7 +3355,8 @@ qla2x00_init_host_attr(scsi_qla_host_t *vha) + fc_host_max_npiv_vports(vha->host) = ha->max_npiv_vports; + fc_host_npiv_vports_inuse(vha->host) = ha->cur_vport_count; + +- speeds = qla25xx_fdmi_port_speed_capability(ha); ++ fdmi_speed = qla25xx_fdmi_port_speed_capability(ha); ++ speeds = qla2x00_get_host_supported_speeds(vha, fdmi_speed); + + fc_host_supported_speeds(vha->host) = speeds; + } diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c index 655cf5de604b8..3650f16cab6cf 100644 --- a/drivers/scsi/qla2xxx/qla_bsg.c @@ -258270,7 +309453,7 @@ index 25549a8a2d72d..7cf1f78cbaeee 100644 vaf.fmt = fmt; diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h -index be2eb75ee1a37..51c7ce5f97923 100644 +index be2eb75ee1a37..307ffdfe048be 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -726,6 +726,11 @@ typedef struct srb { @@ -258403,6 +309586,35 @@ index be2eb75ee1a37..51c7ce5f97923 100644 #define PROCESS_PUREX_IOCB 63 +@@ -5093,17 +5117,17 @@ struct secure_flash_update_block_pk { + (test_bit(ISP_ABORT_NEEDED, &ha->dpc_flags) || \ + test_bit(LOOP_RESYNC_NEEDED, &ha->dpc_flags)) + +-#define QLA_VHA_MARK_BUSY(__vha, __bail) do { \ +- atomic_inc(&__vha->vref_count); \ +- mb(); \ +- if (__vha->flags.delete_progress) { \ +- atomic_dec(&__vha->vref_count); \ +- wake_up(&__vha->vref_waitq); \ +- __bail = 1; \ +- } else { \ +- __bail = 0; \ +- } \ +-} while (0) ++static inline bool qla_vha_mark_busy(scsi_qla_host_t *vha) ++{ ++ atomic_inc(&vha->vref_count); ++ mb(); ++ if (vha->flags.delete_progress) { ++ atomic_dec(&vha->vref_count); ++ wake_up(&vha->vref_waitq); ++ return true; ++ } ++ return false; ++} + + #define QLA_VHA_MARK_NOT_BUSY(__vha) do { \ + atomic_dec(&__vha->vref_count); \ @@ -5427,4 +5451,8 @@ struct ql_vnd_tgt_stats_resp { #include "qla_gbl.h" #include "qla_dbg.h" @@ -260208,7 +311420,7 @@ index ebc8fdb0b43d3..d3742a83d2fd7 100644 return rval; } diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c -index 5fc7697f0af4c..b81797a3ab617 100644 +index 5fc7697f0af4c..30798ab84db91 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -47,10 +47,20 @@ qla2x00_sp_timeout(struct timer_list *t) @@ -260232,12 +311444,35 @@ index 5fc7697f0af4c..b81797a3ab617 100644 } void qla2x00_sp_free(srb_t *sp) -@@ -125,8 +135,13 @@ static void qla24xx_abort_iocb_timeout(void *data) +@@ -100,6 +110,7 @@ static void qla24xx_abort_iocb_timeout(void *data) + struct qla_qpair *qpair = sp->qpair; + u32 handle; + unsigned long flags; ++ int sp_found = 0, cmdsp_found = 0; + + if (sp->cmd_sp) + ql_dbg(ql_dbg_async, sp->vha, 0x507c, +@@ -114,22 +125,32 @@ static void qla24xx_abort_iocb_timeout(void *data) + spin_lock_irqsave(qpair->qp_lock_ptr, flags); + for (handle = 1; handle < qpair->req->num_outstanding_cmds; handle++) { + if (sp->cmd_sp && (qpair->req->outstanding_cmds[handle] == +- sp->cmd_sp)) ++ sp->cmd_sp)) { + qpair->req->outstanding_cmds[handle] = NULL; ++ cmdsp_found = 1; ++ } + + /* removing the abort */ + if (qpair->req->outstanding_cmds[handle] == sp) { + qpair->req->outstanding_cmds[handle] = NULL; ++ sp_found = 1; + break; + } } spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); - if (sp->cmd_sp) -+ if (sp->cmd_sp) { ++ if (cmdsp_found && sp->cmd_sp) { + /* + * This done function should take care of + * original command ref: INIT @@ -260245,9 +311480,16 @@ index 5fc7697f0af4c..b81797a3ab617 100644 sp->cmd_sp->done(sp->cmd_sp, QLA_OS_TIMER_EXPIRED); + } - abt->u.abt.comp_status = cpu_to_le16(CS_TIMEOUT); - sp->done(sp, QLA_OS_TIMER_EXPIRED); -@@ -140,11 +155,11 @@ static void qla24xx_abort_sp_done(srb_t *sp, int res) +- abt->u.abt.comp_status = cpu_to_le16(CS_TIMEOUT); +- sp->done(sp, QLA_OS_TIMER_EXPIRED); ++ if (sp_found) { ++ abt->u.abt.comp_status = cpu_to_le16(CS_TIMEOUT); ++ sp->done(sp, QLA_OS_TIMER_EXPIRED); ++ } + } + + static void qla24xx_abort_sp_done(srb_t *sp, int res) +@@ -140,11 +161,11 @@ static void qla24xx_abort_sp_done(srb_t *sp, int res) if (orig_sp) qla_wait_nvme_release_cmd_kref(orig_sp); @@ -260261,11 +311503,9 @@ index 5fc7697f0af4c..b81797a3ab617 100644 } int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait) -@@ -153,12 +168,15 @@ int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait) - struct srb_iocb *abt_iocb; +@@ -154,11 +175,13 @@ int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait) srb_t *sp; int rval = QLA_FUNCTION_FAILED; -+ uint8_t bail; + /* ref: INIT for ABTS command */ sp = qla2xxx_get_qpair_sp(cmd_sp->vha, cmd_sp->qpair, cmd_sp->fcport, @@ -260273,11 +311513,11 @@ index 5fc7697f0af4c..b81797a3ab617 100644 if (!sp) return QLA_MEMORY_ALLOC_FAILED; -+ QLA_VHA_MARK_BUSY(vha, bail); ++ qla_vha_mark_busy(vha); abt_iocb = &sp->u.iocb_cmd; sp->type = SRB_ABT_CMD; sp->name = "abort"; -@@ -167,23 +185,22 @@ int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait) +@@ -167,23 +190,22 @@ int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait) if (wait) sp->flags = SRB_WAKEUP_ON_COMP; @@ -260305,7 +311545,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 return rval; } -@@ -191,7 +208,8 @@ int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait) +@@ -191,7 +213,8 @@ int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait) wait_for_completion(&abt_iocb->u.abt.comp); rval = abt_iocb->u.abt.comp_status == CS_COMPLETE ? QLA_SUCCESS : QLA_ERR_FROM_FW; @@ -260315,7 +311555,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 } return rval; -@@ -286,10 +304,13 @@ static void qla2x00_async_login_sp_done(srb_t *sp, int res) +@@ -286,10 +309,13 @@ static void qla2x00_async_login_sp_done(srb_t *sp, int res) ea.iop[0] = lio->u.logio.iop[0]; ea.iop[1] = lio->u.logio.iop[1]; ea.sp = sp; @@ -260330,7 +311570,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 } int -@@ -308,6 +329,7 @@ qla2x00_async_login(struct scsi_qla_host *vha, fc_port_t *fcport, +@@ -308,6 +334,7 @@ qla2x00_async_login(struct scsi_qla_host *vha, fc_port_t *fcport, return rval; } @@ -260338,7 +311578,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) goto done; -@@ -320,17 +342,15 @@ qla2x00_async_login(struct scsi_qla_host *vha, fc_port_t *fcport, +@@ -320,17 +347,15 @@ qla2x00_async_login(struct scsi_qla_host *vha, fc_port_t *fcport, sp->name = "login"; sp->gen1 = fcport->rscn_gen; sp->gen2 = fcport->login_gen; @@ -260359,7 +311599,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 lio->u.logio.flags |= (SRB_LOGIN_FCSP | SRB_LOGIN_SKIP_PRLI); ql_dbg(ql_dbg_disc, vha, 0x2072, -@@ -359,7 +379,8 @@ qla2x00_async_login(struct scsi_qla_host *vha, fc_port_t *fcport, +@@ -359,7 +384,8 @@ qla2x00_async_login(struct scsi_qla_host *vha, fc_port_t *fcport, return rval; done_free_sp: @@ -260369,7 +311609,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 fcport->flags &= ~FCF_ASYNC_SENT; done: fcport->flags &= ~FCF_ASYNC_ACTIVE; -@@ -371,29 +392,26 @@ static void qla2x00_async_logout_sp_done(srb_t *sp, int res) +@@ -371,29 +397,26 @@ static void qla2x00_async_logout_sp_done(srb_t *sp, int res) sp->fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE); sp->fcport->login_gen++; qlt_logo_completion_handler(sp->fcport, sp->u.iocb_cmd.u.logio.data[0]); @@ -260404,7 +311644,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 ql_dbg(ql_dbg_disc, vha, 0x2070, "Async-logout - hdl=%x loop-id=%x portid=%02x%02x%02x %8phC explicit %d.\n", -@@ -407,7 +425,8 @@ qla2x00_async_logout(struct scsi_qla_host *vha, fc_port_t *fcport) +@@ -407,7 +430,8 @@ qla2x00_async_logout(struct scsi_qla_host *vha, fc_port_t *fcport) return rval; done_free_sp: @@ -260414,7 +311654,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 done: fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE); return rval; -@@ -433,29 +452,26 @@ static void qla2x00_async_prlo_sp_done(srb_t *sp, int res) +@@ -433,29 +457,26 @@ static void qla2x00_async_prlo_sp_done(srb_t *sp, int res) if (!test_bit(UNLOADING, &vha->dpc_flags)) qla2x00_post_async_prlo_done_work(sp->fcport->vha, sp->fcport, lio->u.logio.data); @@ -260449,7 +311689,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 ql_dbg(ql_dbg_disc, vha, 0x2070, "Async-prlo - hdl=%x loop-id=%x portid=%02x%02x%02x.\n", -@@ -469,7 +485,8 @@ qla2x00_async_prlo(struct scsi_qla_host *vha, fc_port_t *fcport) +@@ -469,7 +490,8 @@ qla2x00_async_prlo(struct scsi_qla_host *vha, fc_port_t *fcport) return rval; done_free_sp: @@ -260459,7 +311699,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 done: fcport->flags &= ~FCF_ASYNC_ACTIVE; return rval; -@@ -552,10 +569,12 @@ static void qla2x00_async_adisc_sp_done(srb_t *sp, int res) +@@ -552,10 +574,12 @@ static void qla2x00_async_adisc_sp_done(srb_t *sp, int res) ea.iop[1] = lio->u.logio.iop[1]; ea.fcport = sp->fcport; ea.sp = sp; @@ -260474,7 +311714,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 } int -@@ -566,26 +585,34 @@ qla2x00_async_adisc(struct scsi_qla_host *vha, fc_port_t *fcport, +@@ -566,26 +590,34 @@ qla2x00_async_adisc(struct scsi_qla_host *vha, fc_port_t *fcport, struct srb_iocb *lio; int rval = QLA_FUNCTION_FAILED; @@ -260515,7 +311755,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 ql_dbg(ql_dbg_disc, vha, 0x206f, "Async-adisc - hdl=%x loopid=%x portid=%06x %8phC.\n", -@@ -598,7 +625,8 @@ qla2x00_async_adisc(struct scsi_qla_host *vha, fc_port_t *fcport, +@@ -598,7 +630,8 @@ qla2x00_async_adisc(struct scsi_qla_host *vha, fc_port_t *fcport, return rval; done_free_sp: @@ -260525,7 +311765,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 done: fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE); qla2x00_post_async_adisc_work(vha, fcport, data); -@@ -862,7 +890,7 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha, +@@ -862,7 +895,7 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha, break; case DSC_LS_PLOGI_COMP: if (vha->hw->flags.edif_enabled && @@ -260534,7 +311774,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 /* check to see if App support secure or not */ qla24xx_post_gpdb_work(vha, fcport, 0); break; -@@ -964,6 +992,9 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha, +@@ -964,6 +997,9 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha, set_bit(RELOGIN_NEEDED, &vha->dpc_flags); } break; @@ -260544,7 +311784,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 default: break; } -@@ -987,8 +1018,6 @@ static void qla24xx_async_gnl_sp_done(srb_t *sp, int res) +@@ -987,8 +1023,6 @@ static void qla24xx_async_gnl_sp_done(srb_t *sp, int res) sp->name, res, sp->u.iocb_cmd.u.mbx.in_mb[1], sp->u.iocb_cmd.u.mbx.in_mb[2]); @@ -260553,7 +311793,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 sp->fcport->flags &= ~(FCF_ASYNC_SENT|FCF_ASYNC_ACTIVE); memset(&ea, 0, sizeof(ea)); -@@ -1026,8 +1055,8 @@ static void qla24xx_async_gnl_sp_done(srb_t *sp, int res) +@@ -1026,8 +1060,8 @@ static void qla24xx_async_gnl_sp_done(srb_t *sp, int res) spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags); list_for_each_entry_safe(fcport, tf, &h, gnl_entry) { @@ -260563,7 +311803,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE); spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags); ea.fcport = fcport; -@@ -1081,13 +1110,13 @@ static void qla24xx_async_gnl_sp_done(srb_t *sp, int res) +@@ -1081,13 +1115,13 @@ static void qla24xx_async_gnl_sp_done(srb_t *sp, int res) } spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags); @@ -260579,7 +311819,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 int rval = QLA_FUNCTION_FAILED; unsigned long flags; u16 *mb; -@@ -1112,6 +1141,7 @@ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport) +@@ -1112,6 +1146,7 @@ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport) vha->gnl.sent = 1; spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags); @@ -260587,7 +311827,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) goto done; -@@ -1120,10 +1150,8 @@ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport) +@@ -1120,10 +1155,8 @@ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport) sp->name = "gnlist"; sp->gen1 = fcport->rscn_gen; sp->gen2 = fcport->login_gen; @@ -260600,7 +311840,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 mb = sp->u.iocb_cmd.u.mbx.out_mb; mb[0] = MBC_PORT_NODE_NAME_LIST; -@@ -1135,8 +1163,6 @@ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport) +@@ -1135,8 +1168,6 @@ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport) mb[8] = vha->gnl.size; mb[9] = vha->vp_idx; @@ -260609,7 +311849,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 ql_dbg(ql_dbg_disc, vha, 0x20da, "Async-%s - OUT WWPN %8phC hndl %x\n", sp->name, fcport->port_name, sp->handle); -@@ -1148,7 +1174,8 @@ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport) +@@ -1148,7 +1179,8 @@ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport) return rval; done_free_sp: @@ -260619,7 +311859,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 done: fcport->flags &= ~(FCF_ASYNC_ACTIVE | FCF_ASYNC_SENT); return rval; -@@ -1194,7 +1221,7 @@ done: +@@ -1194,7 +1226,7 @@ done: dma_pool_free(ha->s_dma_pool, sp->u.iocb_cmd.u.mbx.in, sp->u.iocb_cmd.u.mbx.in_dma); @@ -260628,7 +311868,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 } int qla24xx_post_prli_work(struct scsi_qla_host *vha, fc_port_t *fcport) -@@ -1235,11 +1262,13 @@ static void qla2x00_async_prli_sp_done(srb_t *sp, int res) +@@ -1235,11 +1267,13 @@ static void qla2x00_async_prli_sp_done(srb_t *sp, int res) ea.sp = sp; if (res == QLA_OS_TIMER_EXPIRED) ea.data[0] = QLA_OS_TIMER_EXPIRED; @@ -260643,7 +311883,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 } int -@@ -1272,12 +1301,10 @@ qla24xx_async_prli(struct scsi_qla_host *vha, fc_port_t *fcport) +@@ -1272,12 +1306,10 @@ qla24xx_async_prli(struct scsi_qla_host *vha, fc_port_t *fcport) sp->type = SRB_PRLI_CMD; sp->name = "prli"; @@ -260658,7 +311898,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 lio->u.logio.flags = 0; if (NVME_TARGET(vha->hw, fcport)) -@@ -1299,7 +1326,8 @@ qla24xx_async_prli(struct scsi_qla_host *vha, fc_port_t *fcport) +@@ -1299,7 +1331,8 @@ qla24xx_async_prli(struct scsi_qla_host *vha, fc_port_t *fcport) return rval; done_free_sp: @@ -260668,7 +311908,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 fcport->flags &= ~FCF_ASYNC_SENT; return rval; } -@@ -1328,14 +1356,21 @@ int qla24xx_async_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt) +@@ -1328,14 +1361,21 @@ int qla24xx_async_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt) struct port_database_24xx *pd; struct qla_hw_data *ha = vha->hw; @@ -260694,7 +311934,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) goto done; -@@ -1347,10 +1382,8 @@ int qla24xx_async_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt) +@@ -1347,10 +1387,8 @@ int qla24xx_async_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt) sp->name = "gpdb"; sp->gen1 = fcport->rscn_gen; sp->gen2 = fcport->login_gen; @@ -260707,7 +311947,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 pd = dma_pool_zalloc(ha->s_dma_pool, GFP_KERNEL, &pd_dma); if (pd == NULL) { -@@ -1369,11 +1402,10 @@ int qla24xx_async_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt) +@@ -1369,11 +1407,10 @@ int qla24xx_async_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt) mb[9] = vha->vp_idx; mb[10] = opt; @@ -260721,7 +311961,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 ql_dbg(ql_dbg_disc, vha, 0x20dc, "Async-%s %8phC hndl %x opt %x\n", sp->name, fcport->port_name, sp->handle, opt); -@@ -1387,7 +1419,7 @@ done_free_sp: +@@ -1387,7 +1424,7 @@ done_free_sp: if (pd) dma_pool_free(ha->s_dma_pool, pd, pd_dma); @@ -260730,7 +311970,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 fcport->flags &= ~FCF_ASYNC_SENT; done: fcport->flags &= ~FCF_ASYNC_ACTIVE; -@@ -1454,7 +1486,7 @@ static int qla_chk_secure_login(scsi_qla_host_t *vha, fc_port_t *fcport, +@@ -1454,7 +1491,7 @@ static int qla_chk_secure_login(scsi_qla_host_t *vha, fc_port_t *fcport, qla2x00_post_aen_work(vha, FCH_EVT_PORT_ONLINE, fcport->d_id.b24); @@ -260739,7 +311979,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 ql_dbg(ql_dbg_disc, vha, 0x20ef, "%s %d %8phC EDIF: post DB_AUTH: AUTH needed\n", __func__, __LINE__, fcport->port_name); -@@ -1559,6 +1591,11 @@ static void qla_chk_n2n_b4_login(struct scsi_qla_host *vha, fc_port_t *fcport) +@@ -1559,6 +1596,11 @@ static void qla_chk_n2n_b4_login(struct scsi_qla_host *vha, fc_port_t *fcport) u8 login = 0; int rc; @@ -260751,7 +311991,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 if (qla_tgt_mode_enabled(vha)) return; -@@ -1617,7 +1654,8 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport) +@@ -1617,7 +1659,8 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport) fcport->login_gen, fcport->loop_id, fcport->scan_state, fcport->fc4_type); @@ -260761,7 +312001,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 return 0; if ((fcport->loop_id != FC_NO_LOOP_ID) && -@@ -1638,7 +1676,7 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport) +@@ -1638,7 +1681,7 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport) if (vha->host->active_mode == MODE_TARGET && !N2N_TOPO(vha->hw)) return 0; @@ -260770,7 +312010,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 set_bit(RELOGIN_NEEDED, &vha->dpc_flags); return 0; } -@@ -1735,8 +1773,16 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport) +@@ -1735,8 +1778,16 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport) break; case DSC_LOGIN_PEND: @@ -260788,7 +312028,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 break; case DSC_UPD_FCPORT: -@@ -1786,16 +1832,76 @@ void qla2x00_handle_rscn(scsi_qla_host_t *vha, struct event_arg *ea) +@@ -1786,16 +1837,76 @@ void qla2x00_handle_rscn(scsi_qla_host_t *vha, struct event_arg *ea) fc_port_t *fcport; unsigned long flags; @@ -260844,7 +312084,9 @@ index 5fc7697f0af4c..b81797a3ab617 100644 + fcport->scan_needed = 1; + fcport->rscn_gen++; + } -+ } + } +- fcport->scan_needed = 1; +- fcport->rscn_gen++; + break; + case RSCN_DOM_ADDR: + list_for_each_entry(fcport, &vha->vp_fcports, list) { @@ -260867,18 +312109,14 @@ index 5fc7697f0af4c..b81797a3ab617 100644 + + fcport->scan_needed = 1; + fcport->rscn_gen++; - } -- fcport->scan_needed = 1; -- fcport->rscn_gen++; ++ } + break; } spin_lock_irqsave(&vha->work_lock, flags); -@@ -1916,23 +2022,24 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint32_t lun, - struct srb_iocb *tm_iocb; +@@ -1917,22 +2028,22 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint32_t lun, srb_t *sp; int rval = QLA_FUNCTION_FAILED; -+ uint8_t bail; + /* ref: INIT */ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); @@ -260886,7 +312124,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 goto done; - tm_iocb = &sp->u.iocb_cmd; -+ QLA_VHA_MARK_BUSY(vha, bail); ++ qla_vha_mark_busy(vha); sp->type = SRB_TM_CMD; sp->name = "tmf"; + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha), @@ -260905,7 +312143,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 ql_dbg(ql_dbg_taskm, vha, 0x802f, "Async-tmf hdl=%x loop-id=%x portid=%02x%02x%02x.\n", -@@ -1962,7 +2069,8 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint32_t lun, +@@ -1962,7 +2073,8 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint32_t lun, } done_free_sp: @@ -260915,7 +312153,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 fcport->flags &= ~FCF_ASYNC_SENT; done: return rval; -@@ -2021,13 +2129,6 @@ qla24xx_handle_prli_done_event(struct scsi_qla_host *vha, struct event_arg *ea) +@@ -2021,13 +2133,6 @@ qla24xx_handle_prli_done_event(struct scsi_qla_host *vha, struct event_arg *ea) qla24xx_post_gpdb_work(vha, ea->fcport, 0); break; default: @@ -260929,7 +312167,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 sp = ea->sp; ql_dbg(ql_dbg_disc, vha, 0x2118, "%s %d %8phC priority %s, fc4type %x prev try %s\n", -@@ -2047,6 +2148,13 @@ qla24xx_handle_prli_done_event(struct scsi_qla_host *vha, struct event_arg *ea) +@@ -2047,6 +2152,13 @@ qla24xx_handle_prli_done_event(struct scsi_qla_host *vha, struct event_arg *ea) } if (N2N_TOPO(vha->hw)) { @@ -260943,7 +312181,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 if (ea->fcport->n2n_link_reset_cnt < vha->hw->login_retry_count) { ea->fcport->n2n_link_reset_cnt++; -@@ -2171,12 +2279,7 @@ qla24xx_handle_plogi_done_event(struct scsi_qla_host *vha, struct event_arg *ea) +@@ -2171,12 +2283,7 @@ qla24xx_handle_plogi_done_event(struct scsi_qla_host *vha, struct event_arg *ea) ql_dbg(ql_dbg_disc, vha, 0x20eb, "%s %d %8phC cmd error %x\n", __func__, __LINE__, ea->fcport->port_name, ea->data[1]); @@ -260957,7 +312195,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 break; case MBS_LOOP_ID_USED: /* data[1] = IO PARAM 1 = nport ID */ -@@ -3419,6 +3522,14 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) +@@ -3419,6 +3526,14 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) struct rsp_que *rsp = ha->rsp_q_map[0]; struct qla2xxx_fw_dump *fw_dump; @@ -260972,7 +312210,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 dump_size = fixed_size = mem_size = eft_size = fce_size = mq_size = 0; req_q_size = rsp_q_size = 0; -@@ -3429,7 +3540,7 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) +@@ -3429,7 +3544,7 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) mem_size = (ha->fw_memory_size - 0x11000 + 1) * sizeof(uint16_t); } else if (IS_FWI2_CAPABLE(ha)) { @@ -260981,7 +312219,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 fixed_size = offsetof(struct qla83xx_fw_dump, ext_mem); else if (IS_QLA81XX(ha)) fixed_size = offsetof(struct qla81xx_fw_dump, ext_mem); -@@ -3441,8 +3552,7 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) +@@ -3441,8 +3556,7 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) mem_size = (ha->fw_memory_size - 0x100000 + 1) * sizeof(uint32_t); if (ha->mqenable) { @@ -260991,7 +312229,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 mq_size = sizeof(struct qla2xxx_mq_chain); /* * Allocate maximum buffer size for all queues - Q0. -@@ -4003,8 +4113,7 @@ enable_82xx_npiv: +@@ -4003,8 +4117,7 @@ enable_82xx_npiv: ha->fw_major_version, ha->fw_minor_version, ha->fw_subminor_version); @@ -261001,7 +312239,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 ha->flags.fac_supported = 0; rval = QLA_SUCCESS; } -@@ -4187,7 +4296,7 @@ qla24xx_update_fw_options(scsi_qla_host_t *vha) +@@ -4187,7 +4300,7 @@ qla24xx_update_fw_options(scsi_qla_host_t *vha) * fw shal not send PRLI after PLOGI Acc */ if (ha->flags.edif_enabled && @@ -261010,7 +312248,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 ha->fw_options[3] |= BIT_15; ha->flags.n2n_fw_acc_sec = 1; } else { -@@ -4431,8 +4540,14 @@ qla2x00_init_rings(scsi_qla_host_t *vha) +@@ -4431,8 +4544,14 @@ qla2x00_init_rings(scsi_qla_host_t *vha) BIT_6) != 0; ql_dbg(ql_dbg_init, vha, 0x00bc, "FA-WWPN Support: %s.\n", (ha->flags.fawwpn_enabled) ? "enabled" : "disabled"); @@ -261025,7 +312263,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 rval = qla2x00_init_firmware(vha, ha->init_cb_size); next_check: if (rval) { -@@ -5335,15 +5450,13 @@ qla2x00_configure_loop(scsi_qla_host_t *vha) +@@ -5335,15 +5454,13 @@ qla2x00_configure_loop(scsi_qla_host_t *vha) "LOOP READY.\n"); ha->flags.fw_init_done = 1; @@ -261048,7 +312286,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 /* * Process any ATIO queue entries that came in -@@ -5408,6 +5521,22 @@ static int qla2x00_configure_n2n_loop(scsi_qla_host_t *vha) +@@ -5408,6 +5525,22 @@ static int qla2x00_configure_n2n_loop(scsi_qla_host_t *vha) return QLA_FUNCTION_FAILED; } @@ -261071,7 +312309,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 /* * qla2x00_configure_local_loop * Updates Fibre Channel Device Database with local loop devices. -@@ -5459,6 +5588,19 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha) +@@ -5459,6 +5592,19 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha) spin_unlock_irqrestore(&vha->work_lock, flags); if (vha->scan.scan_retry < MAX_SCAN_RETRIES) { @@ -261091,7 +312329,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags); set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags); } -@@ -5547,6 +5689,13 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha) +@@ -5547,6 +5693,13 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha) memcpy(fcport->node_name, new_fcport->node_name, WWN_SIZE); fcport->scan_state = QLA_FCPORT_FOUND; @@ -261105,7 +312343,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 found++; break; } -@@ -7026,12 +7175,14 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha) +@@ -7026,12 +7179,14 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha) ha->chip_reset++; ha->base_qpair->chip_reset = ha->chip_reset; ha->base_qpair->cmd_cnt = ha->base_qpair->cmd_completion_cnt = 0; @@ -261120,7 +312358,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 } } -@@ -9333,7 +9484,7 @@ struct qla_qpair *qla2xxx_create_qpair(struct scsi_qla_host *vha, int qos, +@@ -9333,7 +9488,7 @@ struct qla_qpair *qla2xxx_create_qpair(struct scsi_qla_host *vha, int qos, qpair->rsp->req = qpair->req; qpair->rsp->qpair = qpair; /* init qpair to this cpu. Will adjust at run time. */ @@ -261129,7 +312367,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 if (IS_T10_PI_CAPABLE(ha) && ql2xenabledif) { if (ha->fw_attributes & BIT_4) -@@ -9591,6 +9742,12 @@ int qla2xxx_disable_port(struct Scsi_Host *host) +@@ -9591,6 +9746,12 @@ int qla2xxx_disable_port(struct Scsi_Host *host) vha->hw->flags.port_isolated = 1; @@ -261142,7 +312380,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 if (qla2x00_chip_is_down(vha)) return 0; -@@ -9606,6 +9763,13 @@ int qla2xxx_enable_port(struct Scsi_Host *host) +@@ -9606,6 +9767,13 @@ int qla2xxx_enable_port(struct Scsi_Host *host) { scsi_qla_host_t *vha = shost_priv(host); @@ -261157,7 +312395,7 @@ index 5fc7697f0af4c..b81797a3ab617 100644 /* Set the flag to 1, so that isp_abort can proceed */ vha->flags.online = 1; diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h -index 5f3b7995cc8f3..db17f7f410cdd 100644 +index 5f3b7995cc8f3..5185dc5daf80d 100644 --- a/drivers/scsi/qla2xxx/qla_inline.h +++ b/drivers/scsi/qla2xxx/qla_inline.h @@ -184,6 +184,8 @@ static void qla2xxx_init_sp(srb_t *sp, scsi_qla_host_t *vha, @@ -261169,6 +312407,19 @@ index 5f3b7995cc8f3..db17f7f410cdd 100644 INIT_LIST_HEAD(&sp->elem); } +@@ -223,11 +225,9 @@ static inline srb_t * + qla2x00_get_sp(scsi_qla_host_t *vha, fc_port_t *fcport, gfp_t flag) + { + srb_t *sp = NULL; +- uint8_t bail; + struct qla_qpair *qpair; + +- QLA_VHA_MARK_BUSY(vha, bail); +- if (unlikely(bail)) ++ if (unlikely(qla_vha_mark_busy(vha))) + return NULL; + + qpair = vha->hw->base_qpair; diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 9d4ad1d2b00a2..42ce4e1fe7441 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c @@ -262003,7 +313254,7 @@ index 1c5da2dbd6f97..3e167dc4eec72 100644 ql_log(ql_log_warn, vha, 0xffff, "register_localport failed: ret=%x\n", ret); diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c -index 836fedcea241b..00e97f0a07ebe 100644 +index 836fedcea241b..05d827227d0b3 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -202,12 +202,6 @@ MODULE_PARM_DESC(ql2xdbwr, @@ -262314,7 +313565,22 @@ index 836fedcea241b..00e97f0a07ebe 100644 if (!ha->purex_dma_pool) { ql_dbg_pci(ql_dbg_init, ha->pdev, 0x011b, -@@ -5469,7 +5500,7 @@ qla2x00_do_work(struct scsi_qla_host *vha) +@@ -5012,13 +5043,11 @@ struct qla_work_evt * + qla2x00_alloc_work(struct scsi_qla_host *vha, enum qla_work_type type) + { + struct qla_work_evt *e; +- uint8_t bail; + + if (test_bit(UNLOADING, &vha->dpc_flags)) + return NULL; + +- QLA_VHA_MARK_BUSY(vha, bail); +- if (bail) ++ if (qla_vha_mark_busy(vha)) + return NULL; + + e = kzalloc(sizeof(struct qla_work_evt), GFP_ATOMIC); +@@ -5469,7 +5498,7 @@ qla2x00_do_work(struct scsi_qla_host *vha) e->u.fcport.fcport, false); break; case QLA_EVT_SA_REPLACE: @@ -262323,7 +313589,7 @@ index 836fedcea241b..00e97f0a07ebe 100644 break; } -@@ -5532,6 +5563,11 @@ void qla2x00_relogin(struct scsi_qla_host *vha) +@@ -5532,6 +5561,11 @@ void qla2x00_relogin(struct scsi_qla_host *vha) memset(&ea, 0, sizeof(ea)); ea.fcport = fcport; qla24xx_handle_relogin_event(vha, &ea); @@ -262335,7 +313601,7 @@ index 836fedcea241b..00e97f0a07ebe 100644 } else if (vha->hw->current_topology == ISP_CFG_NL) { fcport->login_retry--; -@@ -7114,17 +7150,6 @@ intr_on_check: +@@ -7114,17 +7148,6 @@ intr_on_check: qla2x00_lip_reset(base_vha); } @@ -262353,7 +313619,7 @@ index 836fedcea241b..00e97f0a07ebe 100644 ha->dpc_active = 0; end_loop: set_current_state(TASK_INTERRUPTIBLE); -@@ -7183,56 +7208,99 @@ qla2x00_rst_aen(scsi_qla_host_t *vha) +@@ -7183,56 +7206,99 @@ qla2x00_rst_aen(scsi_qla_host_t *vha) static bool qla_do_heartbeat(struct scsi_qla_host *vha) { @@ -262483,7 +313749,7 @@ index 836fedcea241b..00e97f0a07ebe 100644 } } -@@ -7259,6 +7327,8 @@ qla2x00_timer(struct timer_list *t) +@@ -7259,6 +7325,8 @@ qla2x00_timer(struct timer_list *t) fc_port_t *fcport = NULL; if (ha->flags.eeh_busy) { @@ -262492,7 +313758,7 @@ index 836fedcea241b..00e97f0a07ebe 100644 ql_dbg(ql_dbg_timer, vha, 0x6000, "EEH = %d, restarting timer.\n", ha->flags.eeh_busy); -@@ -7424,6 +7494,8 @@ qla2x00_timer(struct timer_list *t) +@@ -7424,6 +7492,8 @@ qla2x00_timer(struct timer_list *t) start_dpc++; } @@ -262501,7 +313767,7 @@ index 836fedcea241b..00e97f0a07ebe 100644 /* Schedule the DPC routine if needed */ if ((test_bit(ISP_ABORT_NEEDED, &vha->dpc_flags) || test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags) || -@@ -7456,9 +7528,10 @@ qla2x00_timer(struct timer_list *t) +@@ -7456,9 +7526,10 @@ qla2x00_timer(struct timer_list *t) test_bit(RELOGIN_NEEDED, &vha->dpc_flags), test_bit(PROCESS_PUREX_IOCB, &vha->dpc_flags)); qla2xxx_wake_dpc(vha); @@ -262513,7 +313779,7 @@ index 836fedcea241b..00e97f0a07ebe 100644 qla2x00_restart_timer(vha, WATCH_INTERVAL); } -@@ -7656,7 +7729,7 @@ qla2xxx_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state) +@@ -7656,7 +7727,7 @@ qla2xxx_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state) switch (state) { case pci_channel_io_normal: @@ -262522,7 +313788,7 @@ index 836fedcea241b..00e97f0a07ebe 100644 if (ql2xmqsupport || ql2xnvmeenable) { set_bit(QPAIR_ONLINE_CHECK_NEEDED, &vha->dpc_flags); qla2xxx_wake_dpc(vha); -@@ -7697,9 +7770,16 @@ qla2xxx_pci_mmio_enabled(struct pci_dev *pdev) +@@ -7697,9 +7768,16 @@ qla2xxx_pci_mmio_enabled(struct pci_dev *pdev) "mmio enabled\n"); ha->pci_error_state = QLA_PCI_MMIO_ENABLED; @@ -262539,7 +313805,7 @@ index 836fedcea241b..00e97f0a07ebe 100644 spin_lock_irqsave(&ha->hardware_lock, flags); if (IS_QLA2100(ha) || IS_QLA2200(ha)){ stat = rd_reg_word(®->hccr); -@@ -7721,6 +7801,7 @@ qla2xxx_pci_mmio_enabled(struct pci_dev *pdev) +@@ -7721,6 +7799,7 @@ qla2xxx_pci_mmio_enabled(struct pci_dev *pdev) "RISC paused -- mmio_enabled, Dumping firmware.\n"); qla2xxx_dump_fw(base_vha); } @@ -262547,7 +313813,7 @@ index 836fedcea241b..00e97f0a07ebe 100644 /* set PCI_ERS_RESULT_NEED_RESET to trigger call to qla2xxx_pci_slot_reset */ ql_dbg(ql_dbg_aer, base_vha, 0x600d, "mmio enabled returning.\n"); -@@ -7828,6 +7909,9 @@ void qla_pci_set_eeh_busy(struct scsi_qla_host *vha) +@@ -7828,6 +7907,9 @@ void qla_pci_set_eeh_busy(struct scsi_qla_host *vha) spin_lock_irqsave(&base_vha->work_lock, flags); if (!ha->flags.eeh_busy) { @@ -262729,7 +313995,7 @@ index 291ecc33b1fe6..4fc9466d820a7 100644 /** diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c -index 66f507469a31a..747e1cbb7ec91 100644 +index 66f507469a31a..591df0a91057e 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -1189,7 +1189,7 @@ static int p_fill_from_dev_buffer(struct scsi_cmnd *scp, const void *arr, @@ -262811,7 +314077,21 @@ index 66f507469a31a..747e1cbb7ec91 100644 alloc_len = get_unaligned_be32(cmd + 10); /* following just in case virtual_gb changed */ -@@ -1885,7 +1887,7 @@ static int resp_readcap16(struct scsi_cmnd *scp, +@@ -1877,6 +1879,13 @@ static int resp_readcap16(struct scsi_cmnd *scp, + arr[14] |= 0x40; + } + ++ /* ++ * Since the scsi_debug READ CAPACITY implementation always reports the ++ * total disk capacity, set RC BASIS = 1 for host-managed ZBC devices. ++ */ ++ if (devip->zmodel == BLK_ZONED_HM) ++ arr[12] |= 1 << 4; ++ + arr[15] = sdebug_lowest_aligned & 0xff; + + if (have_dif_prot) { +@@ -1885,7 +1894,7 @@ static int resp_readcap16(struct scsi_cmnd *scp, } return fill_from_dev_buffer(scp, arr, @@ -262820,7 +314100,7 @@ index 66f507469a31a..747e1cbb7ec91 100644 } #define SDEBUG_MAX_TGTPGS_ARR_SZ 1412 -@@ -1896,8 +1898,9 @@ static int resp_report_tgtpgs(struct scsi_cmnd *scp, +@@ -1896,8 +1905,9 @@ static int resp_report_tgtpgs(struct scsi_cmnd *scp, unsigned char *cmd = scp->cmnd; unsigned char *arr; int host_no = devip->sdbg_host->shost->host_no; @@ -262831,7 +314111,7 @@ index 66f507469a31a..747e1cbb7ec91 100644 alen = get_unaligned_be32(cmd + 6); arr = kzalloc(SDEBUG_MAX_TGTPGS_ARR_SZ, GFP_ATOMIC); -@@ -1959,9 +1962,9 @@ static int resp_report_tgtpgs(struct scsi_cmnd *scp, +@@ -1959,9 +1969,9 @@ static int resp_report_tgtpgs(struct scsi_cmnd *scp, * - The constructed command length * - The maximum array size */ @@ -262843,7 +314123,7 @@ index 66f507469a31a..747e1cbb7ec91 100644 kfree(arr); return ret; } -@@ -2311,7 +2314,8 @@ static int resp_mode_sense(struct scsi_cmnd *scp, +@@ -2311,7 +2321,8 @@ static int resp_mode_sense(struct scsi_cmnd *scp, { int pcontrol, pcode, subpcode, bd_len; unsigned char dev_spec; @@ -262853,7 +314133,7 @@ index 66f507469a31a..747e1cbb7ec91 100644 int target = scp->device->id; unsigned char *ap; unsigned char arr[SDEBUG_MAX_MSENSE_SZ]; -@@ -2467,7 +2471,7 @@ static int resp_mode_sense(struct scsi_cmnd *scp, +@@ -2467,7 +2478,7 @@ static int resp_mode_sense(struct scsi_cmnd *scp, arr[0] = offset - 1; else put_unaligned_be16((offset - 2), arr + 0); @@ -262862,7 +314142,7 @@ index 66f507469a31a..747e1cbb7ec91 100644 } #define SDEBUG_MAX_MSELECT_SZ 512 -@@ -2498,11 +2502,11 @@ static int resp_mode_select(struct scsi_cmnd *scp, +@@ -2498,11 +2509,11 @@ static int resp_mode_select(struct scsi_cmnd *scp, __func__, param_len, res); md_len = mselect6 ? (arr[0] + 1) : (get_unaligned_be16(arr + 0) + 2); bd_len = mselect6 ? arr[3] : get_unaligned_be16(arr + 6); @@ -262876,7 +314156,7 @@ index 66f507469a31a..747e1cbb7ec91 100644 mpage = arr[off] & 0x3f; ps = !!(arr[off] & 0x80); if (ps) { -@@ -2582,7 +2586,8 @@ static int resp_ie_l_pg(unsigned char *arr) +@@ -2582,7 +2593,8 @@ static int resp_ie_l_pg(unsigned char *arr) static int resp_log_sense(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) { @@ -262886,7 +314166,7 @@ index 66f507469a31a..747e1cbb7ec91 100644 unsigned char arr[SDEBUG_MAX_LSENSE_SZ]; unsigned char *cmd = scp->cmnd; -@@ -2652,9 +2657,9 @@ static int resp_log_sense(struct scsi_cmnd *scp, +@@ -2652,9 +2664,9 @@ static int resp_log_sense(struct scsi_cmnd *scp, mk_sense_invalid_fld(scp, SDEB_IN_CDB, 3, -1); return check_condition_result; } @@ -262898,7 +314178,7 @@ index 66f507469a31a..747e1cbb7ec91 100644 } static inline bool sdebug_dev_is_zoned(struct sdebug_dev_info *devip) -@@ -2742,6 +2747,24 @@ static void zbc_open_zone(struct sdebug_dev_info *devip, +@@ -2742,6 +2754,24 @@ static void zbc_open_zone(struct sdebug_dev_info *devip, } } @@ -262923,7 +314203,7 @@ index 66f507469a31a..747e1cbb7ec91 100644 static void zbc_inc_wp(struct sdebug_dev_info *devip, unsigned long long lba, unsigned int num) { -@@ -2754,7 +2777,7 @@ static void zbc_inc_wp(struct sdebug_dev_info *devip, +@@ -2754,7 +2784,7 @@ static void zbc_inc_wp(struct sdebug_dev_info *devip, if (zsp->z_type == ZBC_ZONE_TYPE_SWR) { zsp->z_wp += num; if (zsp->z_wp >= zend) @@ -262932,7 +314212,7 @@ index 66f507469a31a..747e1cbb7ec91 100644 return; } -@@ -2773,7 +2796,7 @@ static void zbc_inc_wp(struct sdebug_dev_info *devip, +@@ -2773,7 +2803,7 @@ static void zbc_inc_wp(struct sdebug_dev_info *devip, n = num; } if (zsp->z_wp >= zend) @@ -262941,7 +314221,16 @@ index 66f507469a31a..747e1cbb7ec91 100644 num -= n; lba += n; -@@ -4258,6 +4281,8 @@ static int resp_verify(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) +@@ -3610,7 +3640,7 @@ static int resp_write_scat(struct scsi_cmnd *scp, + mk_sense_buffer(scp, ILLEGAL_REQUEST, INVALID_FIELD_IN_CDB, 0); + return illegal_condition_result; + } +- lrdp = kzalloc(lbdof_blen, GFP_ATOMIC); ++ lrdp = kzalloc(lbdof_blen, GFP_ATOMIC | __GFP_NOWARN); + if (lrdp == NULL) + return SCSI_MLQUEUE_HOST_BUSY; + if (sdebug_verbose) +@@ -4258,13 +4288,15 @@ static int resp_verify(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) mk_sense_invalid_opcode(scp); return check_condition_result; } @@ -262950,7 +314239,15 @@ index 66f507469a31a..747e1cbb7ec91 100644 a_num = is_bytchk3 ? 1 : vnum; /* Treat following check like one for read (i.e. no write) access */ ret = check_device_access_params(scp, lba, a_num, false); -@@ -4321,6 +4346,8 @@ static int resp_report_zones(struct scsi_cmnd *scp, + if (ret) + return ret; + +- arr = kcalloc(lb_size, vnum, GFP_ATOMIC); ++ arr = kcalloc(lb_size, vnum, GFP_ATOMIC | __GFP_NOWARN); + if (!arr) { + mk_sense_buffer(scp, ILLEGAL_REQUEST, INSUFF_RES_ASC, + INSUFF_RES_ASCQ); +@@ -4321,6 +4353,8 @@ static int resp_report_zones(struct scsi_cmnd *scp, } zs_lba = get_unaligned_be64(cmd + 2); alloc_len = get_unaligned_be32(cmd + 10); @@ -262959,16 +314256,16 @@ index 66f507469a31a..747e1cbb7ec91 100644 rep_opts = cmd[14] & 0x3f; partial = cmd[14] & 0x80; -@@ -4333,7 +4360,7 @@ static int resp_report_zones(struct scsi_cmnd *scp, +@@ -4333,7 +4367,7 @@ static int resp_report_zones(struct scsi_cmnd *scp, rep_max_zones = min((alloc_len - 64) >> ilog2(RZONES_DESC_HD), max_zones); - arr = kcalloc(RZONES_DESC_HD, alloc_len, GFP_ATOMIC); -+ arr = kzalloc(alloc_len, GFP_ATOMIC); ++ arr = kzalloc(alloc_len, GFP_ATOMIC | __GFP_NOWARN); if (!arr) { mk_sense_buffer(scp, ILLEGAL_REQUEST, INSUFF_RES_ASC, INSUFF_RES_ASCQ); -@@ -4425,7 +4452,7 @@ static int resp_report_zones(struct scsi_cmnd *scp, +@@ -4425,7 +4459,7 @@ static int resp_report_zones(struct scsi_cmnd *scp, put_unaligned_be64(sdebug_capacity - 1, arr + 8); rep_len = (unsigned long)desc - (unsigned long)arr; @@ -262977,7 +314274,7 @@ index 66f507469a31a..747e1cbb7ec91 100644 fini: read_unlock(macc_lckp); -@@ -4648,6 +4675,7 @@ static void zbc_rwp_zone(struct sdebug_dev_info *devip, +@@ -4648,6 +4682,7 @@ static void zbc_rwp_zone(struct sdebug_dev_info *devip, struct sdeb_zone_state *zsp) { enum sdebug_z_cond zc; @@ -262985,7 +314282,7 @@ index 66f507469a31a..747e1cbb7ec91 100644 if (zbc_zone_is_conv(zsp)) return; -@@ -4659,6 +4687,10 @@ static void zbc_rwp_zone(struct sdebug_dev_info *devip, +@@ -4659,6 +4694,10 @@ static void zbc_rwp_zone(struct sdebug_dev_info *devip, if (zsp->z_cond == ZC4_CLOSED) devip->nr_closed--; @@ -262996,6 +314293,32 @@ index 66f507469a31a..747e1cbb7ec91 100644 zsp->z_non_seq_resource = false; zsp->z_wp = zsp->z_start; zsp->z_cond = ZC1_EMPTY; +@@ -7100,8 +7139,12 @@ static int sdebug_add_host_helper(int per_host_idx) + dev_set_name(&sdbg_host->dev, "adapter%d", sdebug_num_hosts); + + error = device_register(&sdbg_host->dev); +- if (error) ++ if (error) { ++ spin_lock(&sdebug_host_list_lock); ++ list_del(&sdbg_host->host_list); ++ spin_unlock(&sdebug_host_list_lock); + goto clean; ++ } + + ++sdebug_num_hosts; + return 0; +@@ -7113,7 +7156,10 @@ clean: + kfree(sdbg_devinfo->zstate); + kfree(sdbg_devinfo); + } +- kfree(sdbg_host); ++ if (sdbg_host->dev.release) ++ put_device(&sdbg_host->dev); ++ else ++ kfree(sdbg_host); + pr_warn("%s: failed, errno=%d\n", __func__, -error); + return error; + } diff --git a/drivers/scsi/scsi_debugfs.c b/drivers/scsi/scsi_debugfs.c index d9109771f274d..db8517f1a485a 100644 --- a/drivers/scsi/scsi_debugfs.c @@ -263009,7 +314332,7 @@ index d9109771f274d..db8517f1a485a 100644 #undef SCSI_CMD_FLAG_NAME diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c -index b6c86cce57bfa..bb5a6e0fa49ab 100644 +index b6c86cce57bfa..dd9f5778f687d 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -135,6 +135,23 @@ static bool scsi_eh_should_retry_cmd(struct scsi_cmnd *cmd) @@ -263078,7 +314401,30 @@ index b6c86cce57bfa..bb5a6e0fa49ab 100644 spin_unlock_irqrestore(shost->host_lock, flags); scmd->eh_eflags |= SCSI_EH_ABORT_SCHEDULED; -@@ -460,8 +485,13 @@ static void scsi_report_sense(struct scsi_device *sdev, +@@ -318,19 +343,11 @@ enum blk_eh_timer_return scsi_times_out(struct request *req) + + if (rtn == BLK_EH_DONE) { + /* +- * Set the command to complete first in order to prevent a real +- * completion from releasing the command while error handling +- * is using it. If the command was already completed, then the +- * lower level driver beat the timeout handler, and it is safe +- * to return without escalating error recovery. +- * +- * If timeout handling lost the race to a real completion, the +- * block layer may ignore that due to a fake timeout injection, +- * so return RESET_TIMER to allow error handling another shot +- * at this command. ++ * If scsi_done() has already set SCMD_STATE_COMPLETE, do not ++ * modify *scmd. + */ + if (test_and_set_bit(SCMD_STATE_COMPLETE, &scmd->state)) +- return BLK_EH_RESET_TIMER; ++ return BLK_EH_DONE; + if (scsi_abort_command(scmd) != SUCCESS) { + set_host_byte(scmd, DID_TIME_OUT); + scsi_eh_scmd_add(scmd); +@@ -460,8 +477,13 @@ static void scsi_report_sense(struct scsi_device *sdev, if (sshdr->asc == 0x29) { evt_type = SDEV_EVT_POWER_ON_RESET_OCCURRED; @@ -263216,7 +314562,7 @@ index 3717eea37ecb3..e91a0a5bc7a3e 100644 return err; } diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c -index fe22191522a3b..9466474ff01b6 100644 +index fe22191522a3b..86c10edbb5f1e 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -198,6 +198,53 @@ static void scsi_unlock_floptical(struct scsi_device *sdev, @@ -263300,8 +314646,29 @@ index fe22191522a3b..9466474ff01b6 100644 } if (sdev->scsi_level >= SCSI_3) +@@ -1156,8 +1206,7 @@ static int scsi_probe_and_add_lun(struct scsi_target *starget, + * that no LUN is present, so don't add sdev in these cases. + * Two specific examples are: + * 1) NetApp targets: return PQ=1, PDT=0x1f +- * 2) IBM/2145 targets: return PQ=1, PDT=0 +- * 3) USB UFI: returns PDT=0x1f, with the PQ bits being "reserved" ++ * 2) USB UFI: returns PDT=0x1f, with the PQ bits being "reserved" + * in the UFI 1.0 spec (we cannot rely on reserved bits). + * + * References: +@@ -1171,8 +1220,8 @@ static int scsi_probe_and_add_lun(struct scsi_target *starget, + * PDT=00h Direct-access device (floppy) + * PDT=1Fh none (no FDD connected to the requested logical unit) + */ +- if (((result[0] >> 5) == 1 || +- (starget->pdt_1f_for_no_lun && (result[0] & 0x1f) == 0x1f)) && ++ if (((result[0] >> 5) == 1 || starget->pdt_1f_for_no_lun) && ++ (result[0] & 0x1f) == 0x1f && + !scsi_is_wlun(lun)) { + SCSI_LOG_SCAN_BUS(3, sdev_printk(KERN_INFO, sdev, + "scsi scan: peripheral device type" diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c -index a35841b34bfd9..920aae661c5b2 100644 +index a35841b34bfd9..774864b54b97c 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -797,6 +797,7 @@ store_state_field(struct device *dev, struct device_attribute *attr, @@ -263312,7 +314679,7 @@ index a35841b34bfd9..920aae661c5b2 100644 for (i = 0; i < ARRAY_SIZE(sdev_states); i++) { const int len = strlen(sdev_states[i].name); -@@ -815,20 +816,27 @@ store_state_field(struct device *dev, struct device_attribute *attr, +@@ -815,20 +816,35 @@ store_state_field(struct device *dev, struct device_attribute *attr, } mutex_lock(&sdev->state_mutex); @@ -263326,6 +314693,14 @@ index a35841b34bfd9..920aae661c5b2 100644 - * waiting for pending I/O to finish. - */ - if (ret == 0 && state == SDEV_RUNNING) { ++ switch (sdev->sdev_state) { ++ case SDEV_RUNNING: ++ case SDEV_OFFLINE: ++ break; ++ default: ++ mutex_unlock(&sdev->state_mutex); ++ return -EINVAL; ++ } + if (sdev->sdev_state == SDEV_RUNNING && state == SDEV_RUNNING) { + ret = 0; + } else { @@ -263351,7 +314726,7 @@ index a35841b34bfd9..920aae661c5b2 100644 return ret == 0 ? count : -EINVAL; } -@@ -1388,6 +1396,7 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev) +@@ -1388,6 +1404,7 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev) * We're treating error on bsg register as non-fatal, so * pretend nothing went wrong. */ @@ -263461,7 +314836,7 @@ index 60e406bcf42a9..a2524106206db 100644 /* diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c -index 78343d3f93857..f46ae53917582 100644 +index 78343d3f93857..4d23e5af20d30 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -86,6 +86,9 @@ struct iscsi_internal { @@ -263558,22 +314933,24 @@ index 78343d3f93857..f46ae53917582 100644 err = device_register(&ep->dev); if (err) - goto free_ep; -+ goto free_id; ++ goto put_dev; err = sysfs_create_group(&ep->dev.kobj, &iscsi_endpoint_group); if (err) -@@ -249,6 +246,10 @@ unregister_dev: +@@ -249,6 +246,12 @@ unregister_dev: device_unregister(&ep->dev); return NULL; -+free_id: ++put_dev: + mutex_lock(&iscsi_ep_idr_mutex); + idr_remove(&iscsi_ep_idr, id); + mutex_unlock(&iscsi_ep_idr_mutex); ++ put_device(&ep->dev); ++ return NULL; free_ep: kfree(ep); return NULL; -@@ -276,14 +277,17 @@ EXPORT_SYMBOL_GPL(iscsi_put_endpoint); +@@ -276,14 +279,17 @@ EXPORT_SYMBOL_GPL(iscsi_put_endpoint); */ struct iscsi_endpoint *iscsi_lookup_endpoint(u64 handle) { @@ -263597,7 +314974,95 @@ index 78343d3f93857..f46ae53917582 100644 } EXPORT_SYMBOL_GPL(iscsi_lookup_endpoint); -@@ -1899,12 +1903,12 @@ static void session_recovery_timedout(struct work_struct *work) +@@ -763,7 +769,7 @@ iscsi_create_iface(struct Scsi_Host *shost, struct iscsi_transport *transport, + + err = device_register(&iface->dev); + if (err) +- goto free_iface; ++ goto put_dev; + + err = sysfs_create_group(&iface->dev.kobj, &iscsi_iface_group); + if (err) +@@ -777,9 +783,8 @@ unreg_iface: + device_unregister(&iface->dev); + return NULL; + +-free_iface: +- put_device(iface->dev.parent); +- kfree(iface); ++put_dev: ++ put_device(&iface->dev); + return NULL; + } + EXPORT_SYMBOL_GPL(iscsi_create_iface); +@@ -1248,15 +1253,15 @@ iscsi_create_flashnode_sess(struct Scsi_Host *shost, int index, + + err = device_register(&fnode_sess->dev); + if (err) +- goto free_fnode_sess; ++ goto put_dev; + + if (dd_size) + fnode_sess->dd_data = &fnode_sess[1]; + + return fnode_sess; + +-free_fnode_sess: +- kfree(fnode_sess); ++put_dev: ++ put_device(&fnode_sess->dev); + return NULL; + } + EXPORT_SYMBOL_GPL(iscsi_create_flashnode_sess); +@@ -1296,15 +1301,15 @@ iscsi_create_flashnode_conn(struct Scsi_Host *shost, + + err = device_register(&fnode_conn->dev); + if (err) +- goto free_fnode_conn; ++ goto put_dev; + + if (dd_size) + fnode_conn->dd_data = &fnode_conn[1]; + + return fnode_conn; + +-free_fnode_conn: +- kfree(fnode_conn); ++put_dev: ++ put_device(&fnode_conn->dev); + return NULL; + } + EXPORT_SYMBOL_GPL(iscsi_create_flashnode_conn); +@@ -1674,6 +1679,13 @@ static const char *iscsi_session_state_name(int state) + return name; + } + ++static char *iscsi_session_target_state_name[] = { ++ [ISCSI_SESSION_TARGET_UNBOUND] = "UNBOUND", ++ [ISCSI_SESSION_TARGET_ALLOCATED] = "ALLOCATED", ++ [ISCSI_SESSION_TARGET_SCANNED] = "SCANNED", ++ [ISCSI_SESSION_TARGET_UNBINDING] = "UNBINDING", ++}; ++ + int iscsi_session_chkready(struct iscsi_cls_session *session) + { + int err; +@@ -1802,9 +1814,13 @@ static int iscsi_user_scan_session(struct device *dev, void *data) + if ((scan_data->channel == SCAN_WILD_CARD || + scan_data->channel == 0) && + (scan_data->id == SCAN_WILD_CARD || +- scan_data->id == id)) ++ scan_data->id == id)) { + scsi_scan_target(&session->dev, 0, id, + scan_data->lun, scan_data->rescan); ++ spin_lock_irqsave(&session->lock, flags); ++ session->target_state = ISCSI_SESSION_TARGET_SCANNED; ++ spin_unlock_irqrestore(&session->lock, flags); ++ } + } + + user_scan_exit: +@@ -1899,12 +1915,12 @@ static void session_recovery_timedout(struct work_struct *work) } spin_unlock_irqrestore(&session->lock, flags); @@ -263613,7 +315078,63 @@ index 78343d3f93857..f46ae53917582 100644 } static void __iscsi_unblock_session(struct work_struct *work) -@@ -2221,10 +2225,10 @@ static void iscsi_stop_conn(struct iscsi_cls_conn *conn, int flag) +@@ -1993,31 +2009,41 @@ static void __iscsi_unbind_session(struct work_struct *work) + struct iscsi_cls_host *ihost = shost->shost_data; + unsigned long flags; + unsigned int target_id; ++ bool remove_target = true; + + ISCSI_DBG_TRANS_SESSION(session, "Unbinding session\n"); + + /* Prevent new scans and make sure scanning is not in progress */ + mutex_lock(&ihost->mutex); + spin_lock_irqsave(&session->lock, flags); +- if (session->target_id == ISCSI_MAX_TARGET) { ++ if (session->target_state == ISCSI_SESSION_TARGET_ALLOCATED) { ++ remove_target = false; ++ } else if (session->target_state != ISCSI_SESSION_TARGET_SCANNED) { + spin_unlock_irqrestore(&session->lock, flags); + mutex_unlock(&ihost->mutex); +- goto unbind_session_exit; ++ ISCSI_DBG_TRANS_SESSION(session, ++ "Skipping target unbinding: Session is unbound/unbinding.\n"); ++ return; + } + ++ session->target_state = ISCSI_SESSION_TARGET_UNBINDING; + target_id = session->target_id; + session->target_id = ISCSI_MAX_TARGET; + spin_unlock_irqrestore(&session->lock, flags); + mutex_unlock(&ihost->mutex); + +- scsi_remove_target(&session->dev); ++ if (remove_target) ++ scsi_remove_target(&session->dev); + + if (session->ida_used) + ida_simple_remove(&iscsi_sess_ida, target_id); + +-unbind_session_exit: + iscsi_session_event(session, ISCSI_KEVENT_UNBIND_SESSION); + ISCSI_DBG_TRANS_SESSION(session, "Completed target removal\n"); ++ ++ spin_lock_irqsave(&session->lock, flags); ++ session->target_state = ISCSI_SESSION_TARGET_UNBOUND; ++ spin_unlock_irqrestore(&session->lock, flags); + } + + static void __iscsi_destroy_session(struct work_struct *work) +@@ -2086,6 +2112,9 @@ int iscsi_add_session(struct iscsi_cls_session *session, unsigned int target_id) + session->ida_used = true; + } else + session->target_id = target_id; ++ spin_lock_irqsave(&session->lock, flags); ++ session->target_state = ISCSI_SESSION_TARGET_ALLOCATED; ++ spin_unlock_irqrestore(&session->lock, flags); + + dev_set_name(&session->dev, "session%u", session->sid); + err = device_add(&session->dev); +@@ -2221,10 +2250,10 @@ static void iscsi_stop_conn(struct iscsi_cls_conn *conn, int flag) switch (flag) { case STOP_CONN_RECOVER: @@ -263626,7 +315147,7 @@ index 78343d3f93857..f46ae53917582 100644 break; default: iscsi_cls_conn_printk(KERN_ERR, conn, "invalid stop flag %d\n", -@@ -2236,16 +2240,51 @@ static void iscsi_stop_conn(struct iscsi_cls_conn *conn, int flag) +@@ -2236,16 +2265,51 @@ static void iscsi_stop_conn(struct iscsi_cls_conn *conn, int flag) ISCSI_DBG_TRANS_CONN(conn, "Stopping conn done.\n"); } @@ -263647,7 +315168,7 @@ index 78343d3f93857..f46ae53917582 100644 + + if (!conn->ep || !session->transport->ep_disconnect) + return; -+ + + ep = conn->ep; + conn->ep = NULL; + @@ -263679,13 +315200,13 @@ index 78343d3f93857..f46ae53917582 100644 + mutex_lock(&conn->ep_mutex); + } +} - ++ +static int iscsi_if_stop_conn(struct iscsi_cls_conn *conn, int flag) +{ ISCSI_DBG_TRANS_CONN(conn, "iscsi if conn stop.\n"); /* * If this is a termination we have to call stop_conn with that flag -@@ -2256,12 +2295,25 @@ static int iscsi_if_stop_conn(struct iscsi_transport *transport, +@@ -2256,12 +2320,25 @@ static int iscsi_if_stop_conn(struct iscsi_transport *transport, cancel_work_sync(&conn->cleanup_work); iscsi_stop_conn(conn, flag); } else { @@ -263711,7 +315232,7 @@ index 78343d3f93857..f46ae53917582 100644 ISCSI_DBG_TRANS_CONN(conn, "flush kernel conn cleanup.\n"); flush_work(&conn->cleanup_work); -@@ -2270,31 +2322,14 @@ static int iscsi_if_stop_conn(struct iscsi_transport *transport, +@@ -2270,31 +2347,14 @@ static int iscsi_if_stop_conn(struct iscsi_transport *transport, * Only clear for recovery to avoid extra cleanup runs during * termination. */ @@ -263745,7 +315266,7 @@ index 78343d3f93857..f46ae53917582 100644 static void iscsi_cleanup_conn_work_fn(struct work_struct *work) { struct iscsi_cls_conn *conn = container_of(work, struct iscsi_cls_conn, -@@ -2303,18 +2338,11 @@ static void iscsi_cleanup_conn_work_fn(struct work_struct *work) +@@ -2303,18 +2363,11 @@ static void iscsi_cleanup_conn_work_fn(struct work_struct *work) mutex_lock(&conn->ep_mutex); /* @@ -263768,7 +315289,7 @@ index 78343d3f93857..f46ae53917582 100644 iscsi_ep_disconnect(conn, false); if (system_state != SYSTEM_RUNNING) { -@@ -2332,6 +2360,55 @@ static void iscsi_cleanup_conn_work_fn(struct work_struct *work) +@@ -2332,6 +2385,55 @@ static void iscsi_cleanup_conn_work_fn(struct work_struct *work) ISCSI_DBG_TRANS_CONN(conn, "cleanup done.\n"); } @@ -263824,7 +315345,7 @@ index 78343d3f93857..f46ae53917582 100644 void iscsi_free_session(struct iscsi_cls_session *session) { ISCSI_DBG_TRANS_SESSION(session, "Freeing session\n"); -@@ -2370,11 +2447,12 @@ iscsi_create_conn(struct iscsi_cls_session *session, int dd_size, uint32_t cid) +@@ -2370,11 +2472,12 @@ iscsi_create_conn(struct iscsi_cls_session *session, int dd_size, uint32_t cid) conn->dd_data = &conn[1]; mutex_init(&conn->ep_mutex); @@ -263838,7 +315359,7 @@ index 78343d3f93857..f46ae53917582 100644 /* this is released in the dev's release function */ if (!get_device(&session->dev)) -@@ -2561,9 +2639,32 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error) +@@ -2561,9 +2664,32 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error) struct iscsi_uevent *ev; struct iscsi_internal *priv; int len = nlmsg_total_size(sizeof(*ev)); @@ -263873,7 +315394,7 @@ index 78343d3f93857..f46ae53917582 100644 priv = iscsi_if_transport_lookup(conn->transport); if (!priv) -@@ -2913,7 +3014,7 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev) +@@ -2913,7 +3039,7 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev) char *data = (char*)ev + sizeof(*ev); struct iscsi_cls_conn *conn; struct iscsi_cls_session *session; @@ -263882,7 +315403,7 @@ index 78343d3f93857..f46ae53917582 100644 if (ev->u.set_param.len > PAGE_SIZE) return -EINVAL; -@@ -2930,8 +3031,8 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev) +@@ -2930,8 +3056,8 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev) session->recovery_tmo = value; break; default: @@ -263893,7 +315414,7 @@ index 78343d3f93857..f46ae53917582 100644 err = transport->set_param(conn, ev->u.set_param.param, data, ev->u.set_param.len); } else { -@@ -3003,16 +3104,7 @@ static int iscsi_if_ep_disconnect(struct iscsi_transport *transport, +@@ -3003,16 +3129,7 @@ static int iscsi_if_ep_disconnect(struct iscsi_transport *transport, } mutex_lock(&conn->ep_mutex); @@ -263911,7 +315432,7 @@ index 78343d3f93857..f46ae53917582 100644 mutex_unlock(&conn->ep_mutex); put_ep: iscsi_put_endpoint(ep); -@@ -3688,7 +3780,12 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport, +@@ -3688,7 +3805,12 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport, case ISCSI_UEVENT_DESTROY_CONN: return iscsi_if_destroy_conn(transport, ev); case ISCSI_UEVENT_STOP_CONN: @@ -263925,7 +315446,7 @@ index 78343d3f93857..f46ae53917582 100644 } /* -@@ -3715,24 +3812,17 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport, +@@ -3715,24 +3837,17 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport, return -EINVAL; mutex_lock(&conn->ep_mutex); @@ -263953,7 +315474,7 @@ index 78343d3f93857..f46ae53917582 100644 session = iscsi_session_lookup(ev->u.b_conn.sid); if (!session) { err = -EINVAL; -@@ -3743,7 +3833,7 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport, +@@ -3743,7 +3858,7 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport, ev->u.b_conn.transport_eph, ev->u.b_conn.is_leading); if (!ev->r.retcode) @@ -263962,7 +315483,7 @@ index 78343d3f93857..f46ae53917582 100644 if (ev->r.retcode || !transport->ep_connect) break; -@@ -3762,7 +3852,8 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport, +@@ -3762,7 +3877,8 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport, case ISCSI_UEVENT_START_CONN: ev->r.retcode = transport->start_conn(conn); if (!ev->r.retcode) @@ -263972,7 +315493,7 @@ index 78343d3f93857..f46ae53917582 100644 break; case ISCSI_UEVENT_SEND_PDU: pdu_len = nlh->nlmsg_len - sizeof(*nlh) - sizeof(*ev); -@@ -4070,10 +4161,11 @@ static ssize_t show_conn_state(struct device *dev, +@@ -4070,10 +4186,11 @@ static ssize_t show_conn_state(struct device *dev, { struct iscsi_cls_conn *conn = iscsi_dev_to_conn(dev->parent); const char *state = "unknown"; @@ -263987,8 +315508,91 @@ index 78343d3f93857..f46ae53917582 100644 return sysfs_emit(buf, "%s\n", state); } +@@ -4298,6 +4415,19 @@ iscsi_session_attr(def_taskmgmt_tmo, ISCSI_PARAM_DEF_TASKMGMT_TMO, 0); + iscsi_session_attr(discovery_parent_idx, ISCSI_PARAM_DISCOVERY_PARENT_IDX, 0); + iscsi_session_attr(discovery_parent_type, ISCSI_PARAM_DISCOVERY_PARENT_TYPE, 0); + ++static ssize_t ++show_priv_session_target_state(struct device *dev, struct device_attribute *attr, ++ char *buf) ++{ ++ struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent); ++ ++ return sysfs_emit(buf, "%s\n", ++ iscsi_session_target_state_name[session->target_state]); ++} ++ ++static ISCSI_CLASS_ATTR(priv_sess, target_state, S_IRUGO, ++ show_priv_session_target_state, NULL); ++ + static ssize_t + show_priv_session_state(struct device *dev, struct device_attribute *attr, + char *buf) +@@ -4400,6 +4530,7 @@ static struct attribute *iscsi_session_attrs[] = { + &dev_attr_sess_boot_target.attr, + &dev_attr_priv_sess_recovery_tmo.attr, + &dev_attr_priv_sess_state.attr, ++ &dev_attr_priv_sess_target_state.attr, + &dev_attr_priv_sess_creator.attr, + &dev_attr_sess_chap_out_idx.attr, + &dev_attr_sess_chap_in_idx.attr, +@@ -4513,6 +4644,8 @@ static umode_t iscsi_session_attr_is_visible(struct kobject *kobj, + return S_IRUGO | S_IWUSR; + else if (attr == &dev_attr_priv_sess_state.attr) + return S_IRUGO; ++ else if (attr == &dev_attr_priv_sess_target_state.attr) ++ return S_IRUGO; + else if (attr == &dev_attr_priv_sess_creator.attr) + return S_IRUGO; + else if (attr == &dev_attr_priv_sess_target_id.attr) +@@ -4746,7 +4879,7 @@ iscsi_register_transport(struct iscsi_transport *tt) + dev_set_name(&priv->dev, "%s", tt->name); + err = device_register(&priv->dev); + if (err) +- goto free_priv; ++ goto put_dev; + + err = sysfs_create_group(&priv->dev.kobj, &iscsi_transport_group); + if (err) +@@ -4781,8 +4914,8 @@ iscsi_register_transport(struct iscsi_transport *tt) + unregister_dev: + device_unregister(&priv->dev); + return NULL; +-free_priv: +- kfree(priv); ++put_dev: ++ put_device(&priv->dev); + return NULL; + } + EXPORT_SYMBOL_GPL(iscsi_register_transport); +diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c +index 4a96fb05731d2..c6256fdc24b10 100644 +--- a/drivers/scsi/scsi_transport_sas.c ++++ b/drivers/scsi/scsi_transport_sas.c +@@ -716,12 +716,17 @@ int sas_phy_add(struct sas_phy *phy) + int error; + + error = device_add(&phy->dev); +- if (!error) { +- transport_add_device(&phy->dev); +- transport_configure_device(&phy->dev); ++ if (error) ++ return error; ++ ++ error = transport_add_device(&phy->dev); ++ if (error) { ++ device_del(&phy->dev); ++ return error; + } ++ transport_configure_device(&phy->dev); + +- return error; ++ return 0; + } + EXPORT_SYMBOL(sas_phy_add); + diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c -index fce63335084ed..de6640ad19434 100644 +index fce63335084ed..1e887c11e83d0 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -48,6 +48,7 @@ @@ -263999,7 +315603,24 @@ index fce63335084ed..de6640ad19434 100644 #include <linux/mutex.h> #include <linux/string_helpers.h> #include <linux/async.h> -@@ -2607,6 +2608,13 @@ sd_do_mode_sense(struct scsi_disk *sdkp, int dbd, int modepage, +@@ -1071,6 +1072,7 @@ static blk_status_t sd_setup_write_same_cmnd(struct scsi_cmnd *cmd) + struct bio *bio = rq->bio; + u64 lba = sectors_to_logical(sdp, blk_rq_pos(rq)); + u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq)); ++ unsigned int nr_bytes = blk_rq_bytes(rq); + blk_status_t ret; + + if (sdkp->device->no_write_same) +@@ -1107,7 +1109,7 @@ static blk_status_t sd_setup_write_same_cmnd(struct scsi_cmnd *cmd) + */ + rq->__data_len = sdp->sector_size; + ret = scsi_alloc_sgtables(cmd); +- rq->__data_len = blk_rq_bytes(rq); ++ rq->__data_len = nr_bytes; + + return ret; + } +@@ -2607,6 +2609,13 @@ sd_do_mode_sense(struct scsi_disk *sdkp, int dbd, int modepage, unsigned char *buffer, int len, struct scsi_mode_data *data, struct scsi_sense_hdr *sshdr) { @@ -264013,7 +315634,7 @@ index fce63335084ed..de6640ad19434 100644 return scsi_mode_sense(sdkp->device, dbd, modepage, buffer, len, SD_TIMEOUT, sdkp->max_retries, data, sshdr); -@@ -3472,7 +3480,6 @@ static int sd_probe(struct device *dev) +@@ -3472,7 +3481,6 @@ static int sd_probe(struct device *dev) out_put: put_disk(gd); out_free: @@ -264021,7 +315642,7 @@ index fce63335084ed..de6640ad19434 100644 kfree(sdkp); out: scsi_autopm_put_device(sdp); -@@ -3620,7 +3627,8 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors) +@@ -3620,7 +3628,8 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors) return 0; if (sdkp->WCE && sdkp->media_present) { @@ -264031,7 +315652,7 @@ index fce63335084ed..de6640ad19434 100644 ret = sd_sync_cache(sdkp, &sshdr); if (ret) { -@@ -3642,7 +3650,8 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors) +@@ -3642,7 +3651,8 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors) } if (sdkp->device->manage_start_stop) { @@ -264397,6 +316018,20 @@ index d29c1352a826a..c1db93054c863 100644 +int sis_wait_for_fw_triage_completion(struct pqi_ctrl_info *ctrl_info); #endif /* _SMARTPQI_SIS_H */ +diff --git a/drivers/scsi/snic/snic_disc.c b/drivers/scsi/snic/snic_disc.c +index e9ccfb97773f1..7cf871323b2c4 100644 +--- a/drivers/scsi/snic/snic_disc.c ++++ b/drivers/scsi/snic/snic_disc.c +@@ -318,6 +318,9 @@ snic_tgt_create(struct snic *snic, struct snic_tgt_id *tgtid) + ret); + + put_device(&snic->shost->shost_gendev); ++ spin_lock_irqsave(snic->shost->host_lock, flags); ++ list_del(&tgt->list); ++ spin_unlock_irqrestore(snic->shost->host_lock, flags); + kfree(tgt); + tgt = NULL; + diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 8b17b35283aa5..652cd81d77753 100644 --- a/drivers/scsi/sr.c @@ -264568,10 +316203,107 @@ index f1ba7f5b52a89..b5267dae3355a 100644 if (sizeof(ver) == cp_len) cmd->result = DID_OK << 16; diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c -index 9eb1b88a29dde..3d03e1ca58201 100644 +index 9eb1b88a29dde..6110dfd903f74 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c -@@ -1907,7 +1907,7 @@ static struct scsi_host_template scsi_driver = { +@@ -344,16 +344,21 @@ enum storvsc_request_type { + }; + + /* +- * SRB status codes and masks; a subset of the codes used here. ++ * SRB status codes and masks. In the 8-bit field, the two high order bits ++ * are flags, while the remaining 6 bits are an integer status code. The ++ * definitions here include only the subset of the integer status codes that ++ * are tested for in this driver. + */ +- + #define SRB_STATUS_AUTOSENSE_VALID 0x80 + #define SRB_STATUS_QUEUE_FROZEN 0x40 +-#define SRB_STATUS_INVALID_LUN 0x20 +-#define SRB_STATUS_SUCCESS 0x01 +-#define SRB_STATUS_ABORTED 0x02 +-#define SRB_STATUS_ERROR 0x04 +-#define SRB_STATUS_DATA_OVERRUN 0x12 ++ ++/* SRB status integer codes */ ++#define SRB_STATUS_SUCCESS 0x01 ++#define SRB_STATUS_ABORTED 0x02 ++#define SRB_STATUS_ERROR 0x04 ++#define SRB_STATUS_INVALID_REQUEST 0x06 ++#define SRB_STATUS_DATA_OVERRUN 0x12 ++#define SRB_STATUS_INVALID_LUN 0x20 + + #define SRB_STATUS(status) \ + (status & ~(SRB_STATUS_AUTOSENSE_VALID | SRB_STATUS_QUEUE_FROZEN)) +@@ -1032,38 +1037,25 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb, + void (*process_err_fn)(struct work_struct *work); + struct hv_host_device *host_dev = shost_priv(host); + +- /* +- * In some situations, Hyper-V sets multiple bits in the +- * srb_status, such as ABORTED and ERROR. So process them +- * individually, with the most specific bits first. +- */ +- +- if (vm_srb->srb_status & SRB_STATUS_INVALID_LUN) { +- set_host_byte(scmnd, DID_NO_CONNECT); +- process_err_fn = storvsc_remove_lun; +- goto do_work; +- } ++ switch (SRB_STATUS(vm_srb->srb_status)) { ++ case SRB_STATUS_ERROR: ++ case SRB_STATUS_ABORTED: ++ case SRB_STATUS_INVALID_REQUEST: ++ if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) { ++ /* Check for capacity change */ ++ if ((asc == 0x2a) && (ascq == 0x9)) { ++ process_err_fn = storvsc_device_scan; ++ /* Retry the I/O that triggered this. */ ++ set_host_byte(scmnd, DID_REQUEUE); ++ goto do_work; ++ } + +- if (vm_srb->srb_status & SRB_STATUS_ABORTED) { +- if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID && +- /* Capacity data has changed */ +- (asc == 0x2a) && (ascq == 0x9)) { +- process_err_fn = storvsc_device_scan; + /* +- * Retry the I/O that triggered this. ++ * Otherwise, let upper layer deal with the ++ * error when sense message is present + */ +- set_host_byte(scmnd, DID_REQUEUE); +- goto do_work; +- } +- } +- +- if (vm_srb->srb_status & SRB_STATUS_ERROR) { +- /* +- * Let upper layer deal with error when +- * sense message is present. +- */ +- if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) + return; ++ } + + /* + * If there is an error; offline the device since all +@@ -1086,6 +1078,13 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb, + default: + set_host_byte(scmnd, DID_ERROR); + } ++ return; ++ ++ case SRB_STATUS_INVALID_LUN: ++ set_host_byte(scmnd, DID_NO_CONNECT); ++ process_err_fn = storvsc_remove_lun; ++ goto do_work; ++ + } + return; + +@@ -1907,7 +1906,7 @@ static struct scsi_host_template scsi_driver = { .cmd_per_lun = 2048, .this_id = -1, /* Ensure there are no gaps in presented sgls */ @@ -264580,7 +316312,7 @@ index 9eb1b88a29dde..3d03e1ca58201 100644 .no_write_same = 1, .track_queue_depth = 1, .change_queue_depth = storvsc_change_queue_depth, -@@ -1961,6 +1961,7 @@ static int storvsc_probe(struct hv_device *device, +@@ -1961,6 +1960,7 @@ static int storvsc_probe(struct hv_device *device, int max_targets; int max_channels; int max_sub_channels = 0; @@ -264588,7 +316320,7 @@ index 9eb1b88a29dde..3d03e1ca58201 100644 /* * Based on the windows host we are running on, -@@ -2049,12 +2050,28 @@ static int storvsc_probe(struct hv_device *device, +@@ -2049,12 +2049,28 @@ static int storvsc_probe(struct hv_device *device, } /* max cmd length */ host->max_cmd_len = STORVSC_MAX_CMD_LEN; @@ -264621,7 +316353,7 @@ index 9eb1b88a29dde..3d03e1ca58201 100644 /* * For non-IDE disks, the host supports multiple channels. * Set the number of HW queues we are supporting. -@@ -2076,7 +2093,7 @@ static int storvsc_probe(struct hv_device *device, +@@ -2076,7 +2092,7 @@ static int storvsc_probe(struct hv_device *device, */ host_dev->handle_error_wq = alloc_ordered_workqueue("storvsc_error_wq_%d", @@ -264930,7 +316662,7 @@ index 8859c13f4e091..adc302b1a57ae 100644 pm_runtime_enable(&pdev->dev); diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c -index 41f2ff35f82b2..dae1a85f1512c 100644 +index 41f2ff35f82b2..120831428ec6f 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -112,8 +112,13 @@ int ufshcd_dump_regs(struct ufs_hba *hba, size_t offset, size_t len, @@ -265017,7 +316749,66 @@ index 41f2ff35f82b2..dae1a85f1512c 100644 __func__, hba->pwr_info.gear_rx, hba->pwr_info.gear_tx, hba->pwr_info.lane_rx, hba->pwr_info.lane_tx, -@@ -1658,7 +1669,8 @@ int ufshcd_hold(struct ufs_hba *hba, bool async) +@@ -1174,12 +1185,14 @@ static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba) + * clock scaling is in progress + */ + ufshcd_scsi_block_requests(hba); ++ mutex_lock(&hba->wb_mutex); + down_write(&hba->clk_scaling_lock); + + if (!hba->clk_scaling.is_allowed || + ufshcd_wait_for_doorbell_clr(hba, DOORBELL_CLR_TOUT_US)) { + ret = -EBUSY; + up_write(&hba->clk_scaling_lock); ++ mutex_unlock(&hba->wb_mutex); + ufshcd_scsi_unblock_requests(hba); + goto out; + } +@@ -1191,12 +1204,15 @@ out: + return ret; + } + +-static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, bool writelock) ++static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, int err, bool scale_up) + { +- if (writelock) +- up_write(&hba->clk_scaling_lock); +- else +- up_read(&hba->clk_scaling_lock); ++ up_write(&hba->clk_scaling_lock); ++ ++ /* Enable Write Booster if we have scaled up else disable it */ ++ ufshcd_wb_toggle(hba, scale_up); ++ ++ mutex_unlock(&hba->wb_mutex); ++ + ufshcd_scsi_unblock_requests(hba); + ufshcd_release(hba); + } +@@ -1213,7 +1229,6 @@ static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, bool writelock) + static int ufshcd_devfreq_scale(struct ufs_hba *hba, bool scale_up) + { + int ret = 0; +- bool is_writelock = true; + + ret = ufshcd_clock_scaling_prepare(hba); + if (ret) +@@ -1242,13 +1257,8 @@ static int ufshcd_devfreq_scale(struct ufs_hba *hba, bool scale_up) + } + } + +- /* Enable Write Booster if we have scaled up else disable it */ +- downgrade_write(&hba->clk_scaling_lock); +- is_writelock = false; +- ufshcd_wb_toggle(hba, scale_up); +- + out_unprepare: +- ufshcd_clock_scaling_unprepare(hba, is_writelock); ++ ufshcd_clock_scaling_unprepare(hba, ret, scale_up); + return ret; + } + +@@ -1658,7 +1668,8 @@ int ufshcd_hold(struct ufs_hba *hba, bool async) bool flush_result; unsigned long flags; @@ -265027,7 +316818,7 @@ index 41f2ff35f82b2..dae1a85f1512c 100644 goto out; spin_lock_irqsave(hba->host->host_lock, flags); hba->clk_gating.active_reqs++; -@@ -1818,7 +1830,7 @@ static void __ufshcd_release(struct ufs_hba *hba) +@@ -1818,7 +1829,7 @@ static void __ufshcd_release(struct ufs_hba *hba) if (hba->clk_gating.active_reqs || hba->clk_gating.is_suspended || hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL || @@ -265036,7 +316827,7 @@ index 41f2ff35f82b2..dae1a85f1512c 100644 hba->active_uic_cmd || hba->uic_async_done || hba->clk_gating.state == CLKS_OFF) return; -@@ -1953,11 +1965,15 @@ static void ufshcd_exit_clk_gating(struct ufs_hba *hba) +@@ -1953,11 +1964,15 @@ static void ufshcd_exit_clk_gating(struct ufs_hba *hba) { if (!hba->clk_gating.is_initialized) return; @@ -265055,7 +316846,7 @@ index 41f2ff35f82b2..dae1a85f1512c 100644 } /* Must be called with host lock acquired */ -@@ -2181,6 +2197,7 @@ static inline int ufshcd_hba_capabilities(struct ufs_hba *hba) +@@ -2181,6 +2196,7 @@ static inline int ufshcd_hba_capabilities(struct ufs_hba *hba) hba->nutrs = (hba->capabilities & MASK_TRANSFER_REQUESTS_SLOTS) + 1; hba->nutmrs = ((hba->capabilities & MASK_TASK_MANAGEMENT_REQUEST_SLOTS) >> 16) + 1; @@ -265063,7 +316854,31 @@ index 41f2ff35f82b2..dae1a85f1512c 100644 /* Read crypto capabilities */ err = ufshcd_hba_init_crypto_capabilities(hba); -@@ -2906,30 +2923,15 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba, +@@ -2683,6 +2699,12 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) + if (!down_read_trylock(&hba->clk_scaling_lock)) + return SCSI_MLQUEUE_HOST_BUSY; + ++ /* ++ * Allows the UFS error handler to wait for prior ufshcd_queuecommand() ++ * calls. ++ */ ++ rcu_read_lock(); ++ + switch (hba->ufshcd_state) { + case UFSHCD_STATE_OPERATIONAL: + case UFSHCD_STATE_EH_SCHEDULED_NON_FATAL: +@@ -2749,7 +2771,10 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) + } + + ufshcd_send_command(hba, tag); ++ + out: ++ rcu_read_unlock(); ++ + up_read(&hba->clk_scaling_lock); + + if (ufs_trigger_eh()) { +@@ -2906,30 +2931,15 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba, static int ufshcd_exec_dev_cmd(struct ufs_hba *hba, enum dev_cmd_type cmd_type, int timeout) { @@ -265098,7 +316913,7 @@ index 41f2ff35f82b2..dae1a85f1512c 100644 lrbp = &hba->lrb[tag]; WARN_ON(lrbp->cmd); -@@ -2947,8 +2949,6 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba, +@@ -2947,8 +2957,6 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba, (struct utp_upiu_req *)lrbp->ucd_rsp_ptr); out: @@ -265107,7 +316922,7 @@ index 41f2ff35f82b2..dae1a85f1512c 100644 up_read(&hba->clk_scaling_lock); return err; } -@@ -4073,8 +4073,6 @@ int ufshcd_link_recovery(struct ufs_hba *hba) +@@ -4073,8 +4081,6 @@ int ufshcd_link_recovery(struct ufs_hba *hba) if (ret) dev_err(hba->dev, "%s: link recovery failed, err %d", __func__, ret); @@ -265116,7 +316931,7 @@ index 41f2ff35f82b2..dae1a85f1512c 100644 return ret; } -@@ -4980,6 +4978,12 @@ static int ufshcd_slave_configure(struct scsi_device *sdev) +@@ -4980,6 +4986,12 @@ static int ufshcd_slave_configure(struct scsi_device *sdev) pm_runtime_get_noresume(&sdev->sdev_gendev); else if (ufshcd_is_rpm_autosuspend_allowed(hba)) sdev->rpm_autosuspend = 1; @@ -265129,7 +316944,7 @@ index 41f2ff35f82b2..dae1a85f1512c 100644 ufshcd_crypto_setup_rq_keyslot_manager(hba, q); -@@ -5634,7 +5638,7 @@ int ufshcd_wb_toggle(struct ufs_hba *hba, bool enable) +@@ -5634,7 +5646,7 @@ int ufshcd_wb_toggle(struct ufs_hba *hba, bool enable) } hba->dev_info.wb_enabled = enable; @@ -265138,7 +316953,39 @@ index 41f2ff35f82b2..dae1a85f1512c 100644 __func__, enable ? "enabled" : "disabled"); return ret; -@@ -5959,7 +5963,6 @@ static void ufshcd_err_handling_unprepare(struct ufs_hba *hba) +@@ -5896,11 +5908,21 @@ static inline void ufshcd_schedule_eh_work(struct ufs_hba *hba) + } + } + ++static void ufshcd_force_error_recovery(struct ufs_hba *hba) ++{ ++ spin_lock_irq(hba->host->host_lock); ++ hba->force_reset = true; ++ ufshcd_schedule_eh_work(hba); ++ spin_unlock_irq(hba->host->host_lock); ++} ++ + static void ufshcd_clk_scaling_allow(struct ufs_hba *hba, bool allow) + { ++ mutex_lock(&hba->wb_mutex); + down_write(&hba->clk_scaling_lock); + hba->clk_scaling.is_allowed = allow; + up_write(&hba->clk_scaling_lock); ++ mutex_unlock(&hba->wb_mutex); + } + + static void ufshcd_clk_scaling_suspend(struct ufs_hba *hba, bool suspend) +@@ -5948,8 +5970,7 @@ static void ufshcd_err_handling_prepare(struct ufs_hba *hba) + } + ufshcd_scsi_block_requests(hba); + /* Drain ufshcd_queuecommand() */ +- down_write(&hba->clk_scaling_lock); +- up_write(&hba->clk_scaling_lock); ++ synchronize_rcu(); + cancel_work_sync(&hba->eeh_work); + } + +@@ -5959,7 +5980,6 @@ static void ufshcd_err_handling_unprepare(struct ufs_hba *hba) ufshcd_release(hba); if (ufshcd_is_clkscaling_supported(hba)) ufshcd_clk_scaling_suspend(hba, false); @@ -265146,7 +316993,7 @@ index 41f2ff35f82b2..dae1a85f1512c 100644 ufshcd_rpm_put(hba); } -@@ -6386,9 +6389,8 @@ static irqreturn_t ufshcd_tmc_handler(struct ufs_hba *hba) +@@ -6386,9 +6406,8 @@ static irqreturn_t ufshcd_tmc_handler(struct ufs_hba *hba) irqreturn_t ret = IRQ_NONE; int tag; @@ -265157,7 +317004,7 @@ index 41f2ff35f82b2..dae1a85f1512c 100644 issued = hba->outstanding_tasks & ~pending; for_each_set_bit(tag, &issued, hba->nutmrs) { struct request *req = hba->tmf_rqs[tag]; -@@ -6545,11 +6547,6 @@ static int __ufshcd_issue_tm_cmd(struct ufs_hba *hba, +@@ -6545,11 +6564,6 @@ static int __ufshcd_issue_tm_cmd(struct ufs_hba *hba, err = wait_for_completion_io_timeout(&wait, msecs_to_jiffies(TM_CMD_TIMEOUT)); if (!err) { @@ -265169,7 +317016,7 @@ index 41f2ff35f82b2..dae1a85f1512c 100644 ufshcd_add_tm_upiu_trace(hba, task_tag, UFS_TM_ERR); dev_err(hba->dev, "%s: task management cmd 0x%.2x timed-out\n", __func__, tm_function); -@@ -6645,28 +6642,16 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba, +@@ -6645,28 +6659,16 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba, enum dev_cmd_type cmd_type, enum query_opcode desc_op) { @@ -265202,7 +317049,7 @@ index 41f2ff35f82b2..dae1a85f1512c 100644 lrbp = &hba->lrb[tag]; WARN_ON(lrbp->cmd); -@@ -6735,9 +6720,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba, +@@ -6735,9 +6737,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba, ufshcd_add_query_upiu_trace(hba, err ? UFS_QUERY_ERR : UFS_QUERY_COMP, (struct utp_upiu_req *)lrbp->ucd_rsp_ptr); @@ -265212,7 +317059,7 @@ index 41f2ff35f82b2..dae1a85f1512c 100644 up_read(&hba->clk_scaling_lock); return err; } -@@ -7044,6 +7026,7 @@ static int ufshcd_abort(struct scsi_cmnd *cmd) +@@ -7044,6 +7043,7 @@ static int ufshcd_abort(struct scsi_cmnd *cmd) goto release; } @@ -265220,7 +317067,7 @@ index 41f2ff35f82b2..dae1a85f1512c 100644 err = SUCCESS; release: -@@ -7233,7 +7216,13 @@ static u32 ufshcd_find_max_sup_active_icc_level(struct ufs_hba *hba, +@@ -7233,7 +7233,13 @@ static u32 ufshcd_find_max_sup_active_icc_level(struct ufs_hba *hba, if (!hba->vreg_info.vcc || !hba->vreg_info.vccq || !hba->vreg_info.vccq2) { @@ -265235,7 +317082,7 @@ index 41f2ff35f82b2..dae1a85f1512c 100644 "%s: Regulator capability was not set, actvIccLevel=%d", __func__, icc_level); goto out; -@@ -7875,8 +7864,6 @@ static int ufshcd_add_lus(struct ufs_hba *hba) +@@ -7875,8 +7881,6 @@ static int ufshcd_add_lus(struct ufs_hba *hba) if (ret) goto out; @@ -265244,7 +317091,7 @@ index 41f2ff35f82b2..dae1a85f1512c 100644 /* Initialize devfreq after UFS device is detected */ if (ufshcd_is_clkscaling_supported(hba)) { memcpy(&hba->clk_scaling.saved_pwr_info.info, -@@ -7902,116 +7889,6 @@ out: +@@ -7902,116 +7906,6 @@ out: return ret; } @@ -265361,7 +317208,7 @@ index 41f2ff35f82b2..dae1a85f1512c 100644 /** * ufshcd_probe_hba - probe hba to detect device and initialize it * @hba: per-adapter instance -@@ -8062,8 +7939,6 @@ static int ufshcd_probe_hba(struct ufs_hba *hba, bool init_dev_params) +@@ -8062,8 +7956,6 @@ static int ufshcd_probe_hba(struct ufs_hba *hba, bool init_dev_params) /* UFS device is also active now */ ufshcd_set_ufs_dev_active(hba); ufshcd_force_reset_auto_bkops(hba); @@ -265370,7 +317217,7 @@ index 41f2ff35f82b2..dae1a85f1512c 100644 /* Gear up to HS gear if supported */ if (hba->max_pwr_info.is_valid) { -@@ -8591,7 +8466,7 @@ static void ufshcd_hba_exit(struct ufs_hba *hba) +@@ -8591,7 +8483,7 @@ static void ufshcd_hba_exit(struct ufs_hba *hba) * @pwr_mode: device power mode to set * * Returns 0 if requested power mode is set successfully @@ -265379,18 +317226,16 @@ index 41f2ff35f82b2..dae1a85f1512c 100644 */ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, enum ufs_dev_pwr_mode pwr_mode) -@@ -8600,7 +8475,9 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, +@@ -8600,7 +8492,7 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, struct scsi_sense_hdr sshdr; struct scsi_device *sdp; unsigned long flags; - int ret; + int ret, retries; -+ unsigned long deadline; -+ int32_t remaining; spin_lock_irqsave(hba->host->host_lock, flags); sdp = hba->sdev_ufs_device; -@@ -8625,8 +8502,6 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, +@@ -8625,8 +8517,6 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, * handling context. */ hba->host->eh_noresume = 1; @@ -265399,20 +317244,15 @@ index 41f2ff35f82b2..dae1a85f1512c 100644 cmd[4] = pwr_mode << 4; -@@ -8635,14 +8510,28 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, +@@ -8635,14 +8525,23 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, * callbacks hence set the RQF_PM flag so that it doesn't resume the * already suspended childs. */ - ret = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, &sshdr, - START_STOP_TIMEOUT, 0, 0, RQF_PM, NULL); -+ deadline = jiffies + 10 * HZ; + for (retries = 3; retries > 0; --retries) { -+ ret = -ETIMEDOUT; -+ remaining = deadline - jiffies; -+ if (remaining <= 0) -+ break; + ret = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, &sshdr, -+ remaining / HZ, 0, 0, RQF_PM, NULL); ++ HZ, 0, 0, RQF_PM, NULL); + if (!scsi_status_is_check_condition(ret) || + !scsi_sense_valid(&sshdr) || + sshdr.sense_key != UNIT_ATTENTION) @@ -265432,7 +317272,39 @@ index 41f2ff35f82b2..dae1a85f1512c 100644 } if (!ret) -@@ -9357,12 +9246,8 @@ EXPORT_SYMBOL(ufshcd_runtime_resume); +@@ -8885,6 +8784,15 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) + + if (!hba->dev_info.b_rpm_dev_flush_capable) { + ret = ufshcd_set_dev_pwr_mode(hba, req_dev_pwr_mode); ++ if (ret && pm_op != UFS_SHUTDOWN_PM) { ++ /* ++ * If return err in suspend flow, IO will hang. ++ * Trigger error handler and break suspend for ++ * error recovery. ++ */ ++ ufshcd_force_error_recovery(hba); ++ ret = -EBUSY; ++ } + if (ret) + goto enable_scaling; + } +@@ -8896,6 +8804,15 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) + */ + check_for_bkops = !ufshcd_is_ufs_dev_deepsleep(hba); + ret = ufshcd_link_state_transition(hba, req_link_state, check_for_bkops); ++ if (ret && pm_op != UFS_SHUTDOWN_PM) { ++ /* ++ * If return err in suspend flow, IO will hang. ++ * Trigger error handler and break suspend for ++ * error recovery. ++ */ ++ ufshcd_force_error_recovery(hba); ++ ret = -EBUSY; ++ } + if (ret) + goto set_dev_active; + +@@ -9357,12 +9274,8 @@ EXPORT_SYMBOL(ufshcd_runtime_resume); int ufshcd_shutdown(struct ufs_hba *hba) { if (ufshcd_is_ufs_dev_poweroff(hba) && ufshcd_is_link_off(hba)) @@ -265446,7 +317318,7 @@ index 41f2ff35f82b2..dae1a85f1512c 100644 hba->is_powered = false; /* allow force shutdown even in case of errors */ return 0; -@@ -9485,6 +9370,13 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) +@@ -9485,6 +9398,13 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) struct device *dev = hba->dev; char eh_wq_name[sizeof("ufs_eh_wq_00")]; @@ -265460,7 +317332,7 @@ index 41f2ff35f82b2..dae1a85f1512c 100644 if (!mmio_base) { dev_err(hba->dev, "Invalid memory reference for mmio_base is NULL\n"); -@@ -9527,8 +9419,8 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) +@@ -9527,8 +9447,8 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) /* Configure LRB */ ufshcd_host_memory_configure(hba); @@ -265471,7 +317343,15 @@ index 41f2ff35f82b2..dae1a85f1512c 100644 host->max_id = UFSHCD_MAX_ID; host->max_lun = UFS_MAX_LUNS; host->max_channel = UFSHCD_MAX_CHANNEL; -@@ -9699,10 +9591,6 @@ void ufshcd_resume_complete(struct device *dev) +@@ -9561,6 +9481,7 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) + /* Initialize mutex for exception event control */ + mutex_init(&hba->ee_ctrl_mutex); + ++ mutex_init(&hba->wb_mutex); + init_rwsem(&hba->clk_scaling_lock); + + ufshcd_init_clk_gating(hba); +@@ -9699,10 +9620,6 @@ void ufshcd_resume_complete(struct device *dev) ufshcd_rpm_put(hba); hba->complete_put = false; } @@ -265482,7 +317362,7 @@ index 41f2ff35f82b2..dae1a85f1512c 100644 } EXPORT_SYMBOL_GPL(ufshcd_resume_complete); -@@ -9725,10 +9613,6 @@ int ufshcd_suspend_prepare(struct device *dev) +@@ -9725,10 +9642,6 @@ int ufshcd_suspend_prepare(struct device *dev) } hba->complete_put = true; } @@ -265493,7 +317373,7 @@ index 41f2ff35f82b2..dae1a85f1512c 100644 return 0; } EXPORT_SYMBOL_GPL(ufshcd_suspend_prepare); -@@ -9797,49 +9681,6 @@ static struct scsi_driver ufs_dev_wlun_template = { +@@ -9797,49 +9710,6 @@ static struct scsi_driver ufs_dev_wlun_template = { }, }; @@ -265543,7 +317423,7 @@ index 41f2ff35f82b2..dae1a85f1512c 100644 static int __init ufshcd_core_init(void) { int ret; -@@ -9848,24 +9689,13 @@ static int __init ufshcd_core_init(void) +@@ -9848,24 +9718,13 @@ static int __init ufshcd_core_init(void) ret = scsi_register_driver(&ufs_dev_wlun_template.gendrv); if (ret) @@ -265570,7 +317450,7 @@ index 41f2ff35f82b2..dae1a85f1512c 100644 } diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h -index 41f6e06f91856..d470a52ff24c3 100644 +index 41f6e06f91856..c8513cc6c2bdd 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -725,6 +725,7 @@ struct ufs_hba_monitor { @@ -265581,7 +317461,15 @@ index 41f6e06f91856..d470a52ff24c3 100644 * @ufs_version: UFS Version to which controller complies * @vops: pointer to variant specific operations * @priv: pointer to variant specific private data -@@ -813,6 +814,7 @@ struct ufs_hba { +@@ -762,6 +763,7 @@ struct ufs_hba_monitor { + * @urgent_bkops_lvl: keeps track of urgent bkops level for device + * @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level for + * device is known or not. ++ * @wb_mutex: used to serialize devfreq and sysfs write booster toggling + * @scsi_block_reqs_cnt: reference counting for scsi block requests + * @crypto_capabilities: Content of crypto capabilities register (0x100) + * @crypto_cap_array: Array of crypto capabilities +@@ -813,6 +815,7 @@ struct ufs_hba { u32 capabilities; int nutrs; int nutmrs; @@ -265589,7 +317477,7 @@ index 41f6e06f91856..d470a52ff24c3 100644 u32 ufs_version; const struct ufs_hba_variant_ops *vops; struct ufs_hba_variant_params *vps; -@@ -871,9 +873,6 @@ struct ufs_hba { +@@ -871,9 +874,6 @@ struct ufs_hba { struct ufs_vreg_info vreg_info; struct list_head clk_list_head; @@ -265599,7 +317487,15 @@ index 41f6e06f91856..d470a52ff24c3 100644 /* Number of requests aborts */ int req_abort_count; -@@ -920,7 +919,6 @@ struct ufs_hba { +@@ -893,6 +893,7 @@ struct ufs_hba { + enum bkops_status urgent_bkops_lvl; + bool is_urgent_bkops_lvl_checked; + ++ struct mutex wb_mutex; + struct rw_semaphore clk_scaling_lock; + unsigned char desc_size[QUERY_DESC_IDN_MAX]; + atomic_t scsi_block_reqs_cnt; +@@ -920,7 +921,6 @@ struct ufs_hba { #endif u32 luns_avail; bool complete_put; @@ -265607,7 +317503,7 @@ index 41f6e06f91856..d470a52ff24c3 100644 }; /* Returns true if clocks can be gated. Otherwise false */ -@@ -1393,14 +1391,4 @@ static inline int ufshcd_rpm_put(struct ufs_hba *hba) +@@ -1393,14 +1393,4 @@ static inline int ufshcd_rpm_put(struct ufs_hba *hba) return pm_runtime_put(&hba->sdev_ufs_device->sdev_gendev); } @@ -265883,8 +317779,21 @@ index bd0fbcdbdefe9..e24e220e56eea 100644 goto cleanup_cache; } baseunits[i] = mdev[i]; +diff --git a/drivers/siox/siox-core.c b/drivers/siox/siox-core.c +index 7c4f32d769666..561408583b2bf 100644 +--- a/drivers/siox/siox-core.c ++++ b/drivers/siox/siox-core.c +@@ -839,6 +839,8 @@ static struct siox_device *siox_device_add(struct siox_master *smaster, + + err_device_register: + /* don't care to make the buffer smaller again */ ++ put_device(&sdevice->dev); ++ sdevice = NULL; + + err_buf_alloc: + siox_master_unlock(smaster); diff --git a/drivers/slimbus/Kconfig b/drivers/slimbus/Kconfig -index 1235b7dc8496c..2ed821f75816c 100644 +index 1235b7dc8496c..a0fdf9d792cb4 100644 --- a/drivers/slimbus/Kconfig +++ b/drivers/slimbus/Kconfig @@ -22,7 +22,8 @@ config SLIM_QCOM_CTRL @@ -265893,7 +317802,7 @@ index 1235b7dc8496c..2ed821f75816c 100644 tristate "Qualcomm SLIMbus Satellite Non-Generic Device Component" - depends on HAS_IOMEM && DMA_ENGINE && NET && QCOM_RPROC_COMMON + depends on HAS_IOMEM && DMA_ENGINE && NET -+ depends on QCOM_RPROC_COMMON || COMPILE_TEST ++ depends on QCOM_RPROC_COMMON || (COMPILE_TEST && !QCOM_RPROC_COMMON) depends on ARCH_QCOM || COMPILE_TEST select QCOM_QMI_HELPERS select QCOM_PDR_HELPERS @@ -265947,6 +317856,25 @@ index 7040293c2ee8f..21519ce05bdb8 100644 } static int qcom_slim_ngd_ctrl_remove(struct platform_device *pdev) +diff --git a/drivers/slimbus/stream.c b/drivers/slimbus/stream.c +index 75f87b3d8b953..73a2aa3629572 100644 +--- a/drivers/slimbus/stream.c ++++ b/drivers/slimbus/stream.c +@@ -67,10 +67,10 @@ static const int slim_presence_rate_table[] = { + 384000, + 768000, + 0, /* Reserved */ +- 110250, +- 220500, +- 441000, +- 882000, ++ 11025, ++ 22050, ++ 44100, ++ 88200, + 176400, + 352800, + 705600, diff --git a/drivers/soc/amlogic/meson-mx-socinfo.c b/drivers/soc/amlogic/meson-mx-socinfo.c index 78f0f1aeca578..92125dd65f338 100644 --- a/drivers/soc/amlogic/meson-mx-socinfo.c @@ -266360,6 +318288,49 @@ index ac6d856ba228d..77bc12039c3d4 100644 if (of_machine_is_compatible("fsl,ls1021a")) return 0; +diff --git a/drivers/soc/imx/soc-imx8m.c b/drivers/soc/imx/soc-imx8m.c +index cc57a384d74d2..32ed9dc88e455 100644 +--- a/drivers/soc/imx/soc-imx8m.c ++++ b/drivers/soc/imx/soc-imx8m.c +@@ -11,6 +11,7 @@ + #include <linux/platform_device.h> + #include <linux/arm-smccc.h> + #include <linux/of.h> ++#include <linux/clk.h> + + #define REV_B1 0x21 + +@@ -56,6 +57,7 @@ static u32 __init imx8mq_soc_revision(void) + void __iomem *ocotp_base; + u32 magic; + u32 rev; ++ struct clk *clk; + + np = of_find_compatible_node(NULL, NULL, "fsl,imx8mq-ocotp"); + if (!np) +@@ -63,6 +65,13 @@ static u32 __init imx8mq_soc_revision(void) + + ocotp_base = of_iomap(np, 0); + WARN_ON(!ocotp_base); ++ clk = of_clk_get_by_name(np, NULL); ++ if (IS_ERR(clk)) { ++ WARN_ON(IS_ERR(clk)); ++ return 0; ++ } ++ ++ clk_prepare_enable(clk); + + /* + * SOC revision on older imx8mq is not available in fuses so query +@@ -79,6 +88,8 @@ static u32 __init imx8mq_soc_revision(void) + soc_uid <<= 32; + soc_uid |= readl_relaxed(ocotp_base + OCOTP_UID_LOW); + ++ clk_disable_unprepare(clk); ++ clk_put(clk); + iounmap(ocotp_base); + of_node_put(np); + diff --git a/drivers/soc/ixp4xx/ixp4xx-npe.c b/drivers/soc/ixp4xx/ixp4xx-npe.c index f490c4ca51f51..a0159805d061b 100644 --- a/drivers/soc/ixp4xx/ixp4xx-npe.c @@ -266374,9 +318345,20 @@ index f490c4ca51f51..a0159805d061b 100644 .probe = ixp4xx_npe_probe, .remove = ixp4xx_npe_remove, diff --git a/drivers/soc/mediatek/mtk-pm-domains.c b/drivers/soc/mediatek/mtk-pm-domains.c -index b762bc40f56bd..afd2fd74802d2 100644 +index b762bc40f56bd..52ecde8e446cf 100644 --- a/drivers/soc/mediatek/mtk-pm-domains.c +++ b/drivers/soc/mediatek/mtk-pm-domains.c +@@ -272,9 +272,9 @@ static int scpsys_power_off(struct generic_pm_domain *genpd) + clk_bulk_disable_unprepare(pd->num_subsys_clks, pd->subsys_clks); + + /* subsys power off */ +- regmap_clear_bits(scpsys->base, pd->data->ctl_offs, PWR_RST_B_BIT); + regmap_set_bits(scpsys->base, pd->data->ctl_offs, PWR_ISO_BIT); + regmap_set_bits(scpsys->base, pd->data->ctl_offs, PWR_CLK_DIS_BIT); ++ regmap_clear_bits(scpsys->base, pd->data->ctl_offs, PWR_RST_B_BIT); + regmap_clear_bits(scpsys->base, pd->data->ctl_offs, PWR_ON_2ND_BIT); + regmap_clear_bits(scpsys->base, pd->data->ctl_offs, PWR_ON_BIT); + @@ -443,6 +443,9 @@ generic_pm_domain *scpsys_add_one_domain(struct scpsys *scpsys, struct device_no pd->genpd.power_off = scpsys_power_off; pd->genpd.power_on = scpsys_power_on; @@ -266388,10 +318370,18 @@ index b762bc40f56bd..afd2fd74802d2 100644 pm_genpd_init(&pd->genpd, NULL, true); else diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig -index 79b568f82a1c3..499718e131d72 100644 +index 79b568f82a1c3..6a97e8af93908 100644 --- a/drivers/soc/qcom/Kconfig +++ b/drivers/soc/qcom/Kconfig -@@ -129,6 +129,7 @@ config QCOM_RPMHPD +@@ -63,6 +63,7 @@ config QCOM_GSBI + config QCOM_LLCC + tristate "Qualcomm Technologies, Inc. LLCC driver" + depends on ARCH_QCOM || COMPILE_TEST ++ select REGMAP_MMIO + help + Qualcomm Technologies, Inc. platform specific + Last Level Cache Controller(LLCC) driver for platforms such as, +@@ -129,6 +130,7 @@ config QCOM_RPMHPD config QCOM_RPMPD tristate "Qualcomm RPM Power domain driver" @@ -266400,10 +318390,248 @@ index 79b568f82a1c3..499718e131d72 100644 help QCOM RPM Power domain driver to support power-domains with diff --git a/drivers/soc/qcom/apr.c b/drivers/soc/qcom/apr.c -index 475a57b435b24..2e455d9e3d94a 100644 +index 475a57b435b24..8fd823b40f4be 100644 --- a/drivers/soc/qcom/apr.c +++ b/drivers/soc/qcom/apr.c -@@ -321,12 +321,14 @@ static int of_apr_add_pd_lookups(struct device *dev) +@@ -15,13 +15,18 @@ + #include <linux/rpmsg.h> + #include <linux/of.h> + +-struct apr { ++enum { ++ PR_TYPE_APR = 0, ++}; ++ ++struct packet_router { + struct rpmsg_endpoint *ch; + struct device *dev; + spinlock_t svcs_lock; + spinlock_t rx_lock; + struct idr svcs_idr; + int dest_domain_id; ++ int type; + struct pdr_handle *pdr; + struct workqueue_struct *rxwq; + struct work_struct rx_work; +@@ -44,21 +49,21 @@ struct apr_rx_buf { + */ + int apr_send_pkt(struct apr_device *adev, struct apr_pkt *pkt) + { +- struct apr *apr = dev_get_drvdata(adev->dev.parent); ++ struct packet_router *apr = dev_get_drvdata(adev->dev.parent); + struct apr_hdr *hdr; + unsigned long flags; + int ret; + +- spin_lock_irqsave(&adev->lock, flags); ++ spin_lock_irqsave(&adev->svc.lock, flags); + + hdr = &pkt->hdr; + hdr->src_domain = APR_DOMAIN_APPS; +- hdr->src_svc = adev->svc_id; ++ hdr->src_svc = adev->svc.id; + hdr->dest_domain = adev->domain_id; +- hdr->dest_svc = adev->svc_id; ++ hdr->dest_svc = adev->svc.id; + + ret = rpmsg_trysend(apr->ch, pkt, hdr->pkt_size); +- spin_unlock_irqrestore(&adev->lock, flags); ++ spin_unlock_irqrestore(&adev->svc.lock, flags); + + return ret ? ret : hdr->pkt_size; + } +@@ -74,7 +79,7 @@ static void apr_dev_release(struct device *dev) + static int apr_callback(struct rpmsg_device *rpdev, void *buf, + int len, void *priv, u32 addr) + { +- struct apr *apr = dev_get_drvdata(&rpdev->dev); ++ struct packet_router *apr = dev_get_drvdata(&rpdev->dev); + struct apr_rx_buf *abuf; + unsigned long flags; + +@@ -100,11 +105,11 @@ static int apr_callback(struct rpmsg_device *rpdev, void *buf, + return 0; + } + +- +-static int apr_do_rx_callback(struct apr *apr, struct apr_rx_buf *abuf) ++static int apr_do_rx_callback(struct packet_router *apr, struct apr_rx_buf *abuf) + { + uint16_t hdr_size, msg_type, ver, svc_id; +- struct apr_device *svc = NULL; ++ struct pkt_router_svc *svc; ++ struct apr_device *adev; + struct apr_driver *adrv = NULL; + struct apr_resp_pkt resp; + struct apr_hdr *hdr; +@@ -145,12 +150,15 @@ static int apr_do_rx_callback(struct apr *apr, struct apr_rx_buf *abuf) + svc_id = hdr->dest_svc; + spin_lock_irqsave(&apr->svcs_lock, flags); + svc = idr_find(&apr->svcs_idr, svc_id); +- if (svc && svc->dev.driver) +- adrv = to_apr_driver(svc->dev.driver); ++ if (svc && svc->dev->driver) { ++ adev = svc_to_apr_device(svc); ++ adrv = to_apr_driver(adev->dev.driver); ++ } + spin_unlock_irqrestore(&apr->svcs_lock, flags); + +- if (!adrv) { +- dev_err(apr->dev, "APR: service is not registered\n"); ++ if (!adrv || !adev) { ++ dev_err(apr->dev, "APR: service is not registered (%d)\n", ++ svc_id); + return -EINVAL; + } + +@@ -164,20 +172,26 @@ static int apr_do_rx_callback(struct apr *apr, struct apr_rx_buf *abuf) + if (resp.payload_size > 0) + resp.payload = buf + hdr_size; + +- adrv->callback(svc, &resp); ++ adrv->callback(adev, &resp); + + return 0; + } + + static void apr_rxwq(struct work_struct *work) + { +- struct apr *apr = container_of(work, struct apr, rx_work); ++ struct packet_router *apr = container_of(work, struct packet_router, rx_work); + struct apr_rx_buf *abuf, *b; + unsigned long flags; + + if (!list_empty(&apr->rx_list)) { + list_for_each_entry_safe(abuf, b, &apr->rx_list, node) { +- apr_do_rx_callback(apr, abuf); ++ switch (apr->type) { ++ case PR_TYPE_APR: ++ apr_do_rx_callback(apr, abuf); ++ break; ++ default: ++ break; ++ } + spin_lock_irqsave(&apr->rx_lock, flags); + list_del(&abuf->node); + spin_unlock_irqrestore(&apr->rx_lock, flags); +@@ -201,7 +215,7 @@ static int apr_device_match(struct device *dev, struct device_driver *drv) + + while (id->domain_id != 0 || id->svc_id != 0) { + if (id->domain_id == adev->domain_id && +- id->svc_id == adev->svc_id) ++ id->svc_id == adev->svc.id) + return 1; + id++; + } +@@ -221,14 +235,14 @@ static void apr_device_remove(struct device *dev) + { + struct apr_device *adev = to_apr_device(dev); + struct apr_driver *adrv; +- struct apr *apr = dev_get_drvdata(adev->dev.parent); ++ struct packet_router *apr = dev_get_drvdata(adev->dev.parent); + + if (dev->driver) { + adrv = to_apr_driver(dev->driver); + if (adrv->remove) + adrv->remove(adev); + spin_lock(&apr->svcs_lock); +- idr_remove(&apr->svcs_idr, adev->svc_id); ++ idr_remove(&apr->svcs_idr, adev->svc.id); + spin_unlock(&apr->svcs_lock); + } + } +@@ -255,28 +269,39 @@ struct bus_type aprbus = { + EXPORT_SYMBOL_GPL(aprbus); + + static int apr_add_device(struct device *dev, struct device_node *np, +- const struct apr_device_id *id) ++ u32 svc_id, u32 domain_id) + { +- struct apr *apr = dev_get_drvdata(dev); ++ struct packet_router *apr = dev_get_drvdata(dev); + struct apr_device *adev = NULL; ++ struct pkt_router_svc *svc; + int ret; + + adev = kzalloc(sizeof(*adev), GFP_KERNEL); + if (!adev) + return -ENOMEM; + +- spin_lock_init(&adev->lock); ++ adev->svc_id = svc_id; ++ svc = &adev->svc; ++ ++ svc->id = svc_id; ++ svc->pr = apr; ++ svc->priv = adev; ++ svc->dev = dev; ++ spin_lock_init(&svc->lock); ++ ++ adev->domain_id = domain_id; + +- adev->svc_id = id->svc_id; +- adev->domain_id = id->domain_id; +- adev->version = id->svc_version; + if (np) + snprintf(adev->name, APR_NAME_SIZE, "%pOFn", np); +- else +- strscpy(adev->name, id->name, APR_NAME_SIZE); + +- dev_set_name(&adev->dev, "aprsvc:%s:%x:%x", adev->name, +- id->domain_id, id->svc_id); ++ switch (apr->type) { ++ case PR_TYPE_APR: ++ dev_set_name(&adev->dev, "aprsvc:%s:%x:%x", adev->name, ++ domain_id, svc_id); ++ break; ++ default: ++ break; ++ } + + adev->dev.bus = &aprbus; + adev->dev.parent = dev; +@@ -285,12 +310,20 @@ static int apr_add_device(struct device *dev, struct device_node *np, + adev->dev.driver = NULL; + + spin_lock(&apr->svcs_lock); +- idr_alloc(&apr->svcs_idr, adev, id->svc_id, +- id->svc_id + 1, GFP_ATOMIC); ++ ret = idr_alloc(&apr->svcs_idr, svc, svc_id, svc_id + 1, GFP_ATOMIC); + spin_unlock(&apr->svcs_lock); ++ if (ret < 0) { ++ dev_err(dev, "idr_alloc failed: %d\n", ret); ++ goto out; ++ } + +- of_property_read_string_index(np, "qcom,protection-domain", +- 1, &adev->service_path); ++ /* Protection domain is optional, it does not exist on older platforms */ ++ ret = of_property_read_string_index(np, "qcom,protection-domain", ++ 1, &adev->service_path); ++ if (ret < 0 && ret != -EINVAL) { ++ dev_err(dev, "Failed to read second value of qcom,protection-domain\n"); ++ goto out; ++ } + + dev_info(dev, "Adding APR dev: %s\n", dev_name(&adev->dev)); + +@@ -300,13 +333,14 @@ static int apr_add_device(struct device *dev, struct device_node *np, + put_device(&adev->dev); + } + ++out: + return ret; + } + + static int of_apr_add_pd_lookups(struct device *dev) + { + const char *service_name, *service_path; +- struct apr *apr = dev_get_drvdata(dev); ++ struct packet_router *apr = dev_get_drvdata(dev); + struct device_node *node; + struct pdr_service *pds; + int ret; +@@ -321,12 +355,14 @@ static int of_apr_add_pd_lookups(struct device *dev) 1, &service_path); if (ret < 0) { dev_err(dev, "pdr service path missing: %d\n", ret); @@ -266418,8 +318646,128 @@ index 475a57b435b24..2e455d9e3d94a 100644 return PTR_ERR(pds); } } +@@ -336,13 +372,14 @@ static int of_apr_add_pd_lookups(struct device *dev) + + static void of_register_apr_devices(struct device *dev, const char *svc_path) + { +- struct apr *apr = dev_get_drvdata(dev); ++ struct packet_router *apr = dev_get_drvdata(dev); + struct device_node *node; + const char *service_path; + int ret; + + for_each_child_of_node(dev->of_node, node) { +- struct apr_device_id id = { {0} }; ++ u32 svc_id; ++ u32 domain_id; + + /* + * This function is called with svc_path NULL during +@@ -372,13 +409,13 @@ static void of_register_apr_devices(struct device *dev, const char *svc_path) + continue; + } + +- if (of_property_read_u32(node, "reg", &id.svc_id)) ++ if (of_property_read_u32(node, "reg", &svc_id)) + continue; + +- id.domain_id = apr->dest_domain_id; ++ domain_id = apr->dest_domain_id; + +- if (apr_add_device(dev, node, &id)) +- dev_err(dev, "Failed to add apr %d svc\n", id.svc_id); ++ if (apr_add_device(dev, node, svc_id, domain_id)) ++ dev_err(dev, "Failed to add apr %d svc\n", svc_id); + } + } + +@@ -398,7 +435,7 @@ static int apr_remove_device(struct device *dev, void *svc_path) + + static void apr_pd_status(int state, char *svc_path, void *priv) + { +- struct apr *apr = (struct apr *)priv; ++ struct packet_router *apr = (struct packet_router *)priv; + + switch (state) { + case SERVREG_SERVICE_STATE_UP: +@@ -413,16 +450,20 @@ static void apr_pd_status(int state, char *svc_path, void *priv) + static int apr_probe(struct rpmsg_device *rpdev) + { + struct device *dev = &rpdev->dev; +- struct apr *apr; ++ struct packet_router *apr; + int ret; + + apr = devm_kzalloc(dev, sizeof(*apr), GFP_KERNEL); + if (!apr) + return -ENOMEM; + +- ret = of_property_read_u32(dev->of_node, "qcom,apr-domain", &apr->dest_domain_id); ++ ret = of_property_read_u32(dev->of_node, "qcom,domain", &apr->dest_domain_id); ++ if (ret) /* try deprecated apr-domain property */ ++ ret = of_property_read_u32(dev->of_node, "qcom,apr-domain", ++ &apr->dest_domain_id); ++ apr->type = PR_TYPE_APR; + if (ret) { +- dev_err(dev, "APR Domain ID not specified in DT\n"); ++ dev_err(dev, "Domain ID not specified in DT\n"); + return ret; + } + +@@ -465,7 +506,7 @@ destroy_wq: + + static void apr_remove(struct rpmsg_device *rpdev) + { +- struct apr *apr = dev_get_drvdata(&rpdev->dev); ++ struct packet_router *apr = dev_get_drvdata(&rpdev->dev); + + pdr_handle_release(apr->pdr); + device_for_each_child(&rpdev->dev, NULL, apr_remove_device); +@@ -502,20 +543,20 @@ void apr_driver_unregister(struct apr_driver *drv) + } + EXPORT_SYMBOL_GPL(apr_driver_unregister); + +-static const struct of_device_id apr_of_match[] = { ++static const struct of_device_id pkt_router_of_match[] = { + { .compatible = "qcom,apr"}, + { .compatible = "qcom,apr-v2"}, + {} + }; +-MODULE_DEVICE_TABLE(of, apr_of_match); ++MODULE_DEVICE_TABLE(of, pkt_router_of_match); + +-static struct rpmsg_driver apr_driver = { ++static struct rpmsg_driver packet_router_driver = { + .probe = apr_probe, + .remove = apr_remove, + .callback = apr_callback, + .drv = { + .name = "qcom,apr", +- .of_match_table = apr_of_match, ++ .of_match_table = pkt_router_of_match, + }, + }; + +@@ -525,7 +566,7 @@ static int __init apr_init(void) + + ret = bus_register(&aprbus); + if (!ret) +- ret = register_rpmsg_driver(&apr_driver); ++ ret = register_rpmsg_driver(&packet_router_driver); + else + bus_unregister(&aprbus); + +@@ -535,7 +576,7 @@ static int __init apr_init(void) + static void __exit apr_exit(void) + { + bus_unregister(&aprbus); +- unregister_rpmsg_driver(&apr_driver); ++ unregister_rpmsg_driver(&packet_router_driver); + } + + subsys_initcall(apr_init); diff --git a/drivers/soc/qcom/cpr.c b/drivers/soc/qcom/cpr.c -index 4ce8e816154f9..84dd93472a252 100644 +index 4ce8e816154f9..e61cff3d9c8a6 100644 --- a/drivers/soc/qcom/cpr.c +++ b/drivers/soc/qcom/cpr.c @@ -1010,7 +1010,7 @@ static int cpr_interpolate(const struct corner *corner, int step_volt, @@ -266431,8 +318779,26 @@ index 4ce8e816154f9..84dd93472a252 100644 /* * max_volt_scale has units of uV/MHz while freq values +@@ -1710,12 +1710,16 @@ static int cpr_probe(struct platform_device *pdev) + + ret = of_genpd_add_provider_simple(dev->of_node, &drv->pd); + if (ret) +- return ret; ++ goto err_remove_genpd; + + platform_set_drvdata(pdev, drv); + cpr_debugfs_init(drv); + + return 0; ++ ++err_remove_genpd: ++ pm_genpd_remove(&drv->pd); ++ return ret; + } + + static int cpr_remove(struct platform_device *pdev) diff --git a/drivers/soc/qcom/llcc-qcom.c b/drivers/soc/qcom/llcc-qcom.c -index 15a36dcab990e..cabd8870316d3 100644 +index 15a36dcab990e..47d41804fdf67 100644 --- a/drivers/soc/qcom/llcc-qcom.c +++ b/drivers/soc/qcom/llcc-qcom.c @@ -115,7 +115,7 @@ static const struct llcc_slice_config sc7280_data[] = { @@ -266444,6 +318810,15 @@ index 15a36dcab990e..cabd8870316d3 100644 { LLCC_MDMPNG, 21, 768, 0, 1, 0x3f, 0x0, 0, 0, 0, 1, 0, 0}, { LLCC_WLHW, 24, 256, 1, 1, 0x3f, 0x0, 0, 0, 0, 1, 0, 0}, { LLCC_MODPE, 29, 64, 1, 1, 0x3f, 0x0, 0, 0, 0, 1, 0, 0}, +@@ -607,7 +607,7 @@ static int qcom_llcc_probe(struct platform_device *pdev) + if (ret) + goto err; + +- drv_data->ecc_irq = platform_get_irq(pdev, 0); ++ drv_data->ecc_irq = platform_get_irq_optional(pdev, 0); + if (drv_data->ecc_irq >= 0) { + llcc_edac = platform_device_register_data(&pdev->dev, + "qcom_llcc_edac", -1, drv_data, @@ -630,6 +630,7 @@ static const struct of_device_id qcom_llcc_of_match[] = { { .compatible = "qcom,sm8250-llcc", .data = &sm8250_cfg }, { } @@ -267005,6 +319380,31 @@ index 50091c4ec9481..a60e142ade344 100644 usleep_range(10, 20); +diff --git a/drivers/soc/ti/knav_qmss_queue.c b/drivers/soc/ti/knav_qmss_queue.c +index 2ac3856b8d42d..52389859395c6 100644 +--- a/drivers/soc/ti/knav_qmss_queue.c ++++ b/drivers/soc/ti/knav_qmss_queue.c +@@ -67,7 +67,7 @@ static DEFINE_MUTEX(knav_dev_lock); + * Newest followed by older ones. Search is done from start of the array + * until a firmware file is found. + */ +-const char *knav_acc_firmwares[] = {"ks2_qmss_pdsp_acc48.bin"}; ++static const char * const knav_acc_firmwares[] = {"ks2_qmss_pdsp_acc48.bin"}; + + static bool device_ready; + bool knav_qmss_device_ready(void) +@@ -1785,9 +1785,9 @@ static int knav_queue_probe(struct platform_device *pdev) + INIT_LIST_HEAD(&kdev->pdsps); + + pm_runtime_enable(&pdev->dev); +- ret = pm_runtime_get_sync(&pdev->dev); ++ ret = pm_runtime_resume_and_get(&pdev->dev); + if (ret < 0) { +- pm_runtime_put_noidle(&pdev->dev); ++ pm_runtime_disable(&pdev->dev); + dev_err(dev, "Failed to enable QMSS\n"); + return ret; + } diff --git a/drivers/soc/ti/pruss.c b/drivers/soc/ti/pruss.c index 49da387d77494..b36779309e49b 100644 --- a/drivers/soc/ti/pruss.c @@ -267018,6 +319418,18 @@ index 49da387d77494..b36779309e49b 100644 return -ENODEV; } +diff --git a/drivers/soc/ti/smartreflex.c b/drivers/soc/ti/smartreflex.c +index b5b2fa538d5c3..4d15587324d4f 100644 +--- a/drivers/soc/ti/smartreflex.c ++++ b/drivers/soc/ti/smartreflex.c +@@ -931,6 +931,7 @@ static int omap_sr_probe(struct platform_device *pdev) + err_debugfs: + debugfs_remove_recursive(sr_info->dbg_dir); + err_list_del: ++ pm_runtime_disable(&pdev->dev); + list_del(&sr_info->node); + clk_unprepare(sr_info->fck); + diff --git a/drivers/soc/ti/ti_sci_pm_domains.c b/drivers/soc/ti/ti_sci_pm_domains.c index 8afb3f45d2637..a33ec7eaf23d1 100644 --- a/drivers/soc/ti/ti_sci_pm_domains.c @@ -267047,6 +319459,35 @@ index 09abd17065ba5..8b3ff44fd9010 100644 } ret = devm_request_irq(dev, irq, wkup_m3_txev_handler, +diff --git a/drivers/soc/ux500/ux500-soc-id.c b/drivers/soc/ux500/ux500-soc-id.c +index a9472e0e5d61c..27d6e25a01153 100644 +--- a/drivers/soc/ux500/ux500-soc-id.c ++++ b/drivers/soc/ux500/ux500-soc-id.c +@@ -167,20 +167,18 @@ ATTRIBUTE_GROUPS(ux500_soc); + static const char *db8500_read_soc_id(struct device_node *backupram) + { + void __iomem *base; +- void __iomem *uid; + const char *retstr; ++ u32 uid[5]; + + base = of_iomap(backupram, 0); + if (!base) + return NULL; +- uid = base + 0x1fc0; ++ memcpy_fromio(uid, base + 0x1fc0, sizeof(uid)); + + /* Throw these device-specific numbers into the entropy pool */ +- add_device_randomness(uid, 0x14); ++ add_device_randomness(uid, sizeof(uid)); + retstr = kasprintf(GFP_KERNEL, "%08x%08x%08x%08x%08x", +- readl((u32 *)uid+0), +- readl((u32 *)uid+1), readl((u32 *)uid+2), +- readl((u32 *)uid+3), readl((u32 *)uid+4)); ++ uid[0], uid[1], uid[2], uid[3], uid[4]); + iounmap(base); + return retstr; + } diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c index 1b115734a8f6b..b7cdfa65157c6 100644 --- a/drivers/soundwire/bus.c @@ -267295,7 +319736,7 @@ index b6cad0d59b7b9..49900cd207bc7 100644 } diff --git a/drivers/soundwire/dmi-quirks.c b/drivers/soundwire/dmi-quirks.c -index 0ca2a3e3a02e2..747983743a14b 100644 +index 0ca2a3e3a02e2..2bf534632f644 100644 --- a/drivers/soundwire/dmi-quirks.c +++ b/drivers/soundwire/dmi-quirks.c @@ -59,7 +59,7 @@ static const struct dmi_system_id adr_remap_quirk_table[] = { @@ -267307,8 +319748,23 @@ index 0ca2a3e3a02e2..747983743a14b 100644 }, .driver_data = (void *)intel_tgl_bios, }, +@@ -71,6 +71,14 @@ static const struct dmi_system_id adr_remap_quirk_table[] = { + }, + .driver_data = (void *)intel_tgl_bios, + }, ++ { ++ /* quirk used for NUC15 LAPBC710 skew */ ++ .matches = { ++ DMI_MATCH(DMI_BOARD_VENDOR, "Intel Corporation"), ++ DMI_MATCH(DMI_BOARD_NAME, "LAPBC710"), ++ }, ++ .driver_data = (void *)intel_tgl_bios, ++ }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"), diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c -index 78037ffdb09ba..89ee033f0c353 100644 +index 78037ffdb09ba..90e0bf8ca37d9 100644 --- a/drivers/soundwire/intel.c +++ b/drivers/soundwire/intel.c @@ -448,8 +448,8 @@ static void intel_shim_wake(struct sdw_intel *sdw, bool wake_enable) @@ -267322,7 +319778,37 @@ index 78037ffdb09ba..89ee033f0c353 100644 } mutex_unlock(sdw->link_res->shim_lock); } -@@ -1298,6 +1298,9 @@ static int intel_link_probe(struct auxiliary_device *auxdev, +@@ -1065,8 +1065,8 @@ static const struct snd_soc_dai_ops intel_pcm_dai_ops = { + .prepare = intel_prepare, + .hw_free = intel_hw_free, + .shutdown = intel_shutdown, +- .set_sdw_stream = intel_pcm_set_sdw_stream, +- .get_sdw_stream = intel_get_sdw_stream, ++ .set_stream = intel_pcm_set_sdw_stream, ++ .get_stream = intel_get_sdw_stream, + }; + + static const struct snd_soc_dai_ops intel_pdm_dai_ops = { +@@ -1075,8 +1075,8 @@ static const struct snd_soc_dai_ops intel_pdm_dai_ops = { + .prepare = intel_prepare, + .hw_free = intel_hw_free, + .shutdown = intel_shutdown, +- .set_sdw_stream = intel_pdm_set_sdw_stream, +- .get_sdw_stream = intel_get_sdw_stream, ++ .set_stream = intel_pdm_set_sdw_stream, ++ .get_stream = intel_get_sdw_stream, + }; + + static const struct snd_soc_component_driver dai_component = { +@@ -1285,6 +1285,7 @@ static int intel_link_probe(struct auxiliary_device *auxdev, + cdns->msg_count = 0; + + bus->link_id = auxdev->id; ++ bus->clk_stop_timeout = 1; + + sdw_cdns_probe(cdns); + +@@ -1298,6 +1299,9 @@ static int intel_link_probe(struct auxiliary_device *auxdev, /* use generic bandwidth allocation algorithm */ sdw->cdns.bus.compute_params = sdw_compute_params; @@ -267332,7 +319818,7 @@ index 78037ffdb09ba..89ee033f0c353 100644 ret = sdw_bus_master_add(bus, dev, dev->fwnode); if (ret) { dev_err(dev, "sdw_bus_master_add fail: %d\n", ret); -@@ -1404,7 +1407,6 @@ int intel_link_startup(struct auxiliary_device *auxdev) +@@ -1404,7 +1408,6 @@ int intel_link_startup(struct auxiliary_device *auxdev) ret = intel_register_dai(sdw); if (ret) { dev_err(dev, "DAI registration failed: %d\n", ret); @@ -267341,7 +319827,7 @@ index 78037ffdb09ba..89ee033f0c353 100644 } diff --git a/drivers/soundwire/qcom.c b/drivers/soundwire/qcom.c -index 0ef79d60e88e6..1ce6f948e9a42 100644 +index 0ef79d60e88e6..500035a1fd460 100644 --- a/drivers/soundwire/qcom.c +++ b/drivers/soundwire/qcom.c @@ -97,7 +97,7 @@ @@ -267362,7 +319848,30 @@ index 0ef79d60e88e6..1ce6f948e9a42 100644 int (*reg_read)(struct qcom_swrm_ctrl *ctrl, int reg, u32 *val); int (*reg_write)(struct qcom_swrm_ctrl *ctrl, int reg, int val); u32 slave_status; -@@ -391,7 +391,7 @@ static int qcom_swrm_get_alert_slave_dev_num(struct qcom_swrm_ctrl *ctrl) +@@ -315,6 +315,9 @@ static int qcom_swrm_cmd_fifo_wr_cmd(struct qcom_swrm_ctrl *swrm, u8 cmd_data, + if (swrm_wait_for_wr_fifo_avail(swrm)) + return SDW_CMD_FAIL_OTHER; + ++ if (cmd_id == SWR_BROADCAST_CMD_ID) ++ reinit_completion(&swrm->broadcast); ++ + /* Its assumed that write is okay as we do not get any status back */ + swrm->reg_write(swrm, SWRM_CMD_FIFO_WR_CMD, val); + +@@ -348,6 +351,12 @@ static int qcom_swrm_cmd_fifo_rd_cmd(struct qcom_swrm_ctrl *swrm, + + val = swrm_get_packed_reg_val(&swrm->rcmd_id, len, dev_addr, reg_addr); + ++ /* ++ * Check for outstanding cmd wrt. write fifo depth to avoid ++ * overflow as read will also increase write fifo cnt. ++ */ ++ swrm_wait_for_wr_fifo_avail(swrm); ++ + /* wait for FIFO RD to complete to avoid overflow */ + usleep_range(100, 105); + swrm->reg_write(swrm, SWRM_CMD_FIFO_RD_CMD, val); +@@ -391,7 +400,7 @@ static int qcom_swrm_get_alert_slave_dev_num(struct qcom_swrm_ctrl *ctrl) ctrl->reg_read(ctrl, SWRM_MCP_SLV_STATUS, &val); @@ -267371,7 +319880,7 @@ index 0ef79d60e88e6..1ce6f948e9a42 100644 status = (val >> (dev_num * SWRM_MCP_SLV_STATUS_SZ)); if ((status & SWRM_MCP_SLV_STATUS_MASK) == SDW_SLAVE_ALERT) { -@@ -411,7 +411,7 @@ static void qcom_swrm_get_device_status(struct qcom_swrm_ctrl *ctrl) +@@ -411,7 +420,7 @@ static void qcom_swrm_get_device_status(struct qcom_swrm_ctrl *ctrl) ctrl->reg_read(ctrl, SWRM_MCP_SLV_STATUS, &val); ctrl->slave_status = val; @@ -267380,7 +319889,7 @@ index 0ef79d60e88e6..1ce6f948e9a42 100644 u32 s; s = (val >> (i * 2)); -@@ -451,6 +451,10 @@ static int qcom_swrm_enumerate(struct sdw_bus *bus) +@@ -451,6 +460,10 @@ static int qcom_swrm_enumerate(struct sdw_bus *bus) char *buf1 = (char *)&val1, *buf2 = (char *)&val2; for (i = 1; i <= SDW_MAX_DEVICES; i++) { @@ -267391,6 +319900,28 @@ index 0ef79d60e88e6..1ce6f948e9a42 100644 /*SCP_Devid5 - Devid 4*/ ctrl->reg_read(ctrl, SWRM_ENUMERATOR_SLAVE_DEV_ID_1(i), &val1); +@@ -1019,8 +1032,8 @@ static int qcom_swrm_startup(struct snd_pcm_substream *substream, + ctrl->sruntime[dai->id] = sruntime; + + for_each_rtd_codec_dais(rtd, i, codec_dai) { +- ret = snd_soc_dai_set_sdw_stream(codec_dai, sruntime, +- substream->stream); ++ ret = snd_soc_dai_set_stream(codec_dai, sruntime, ++ substream->stream); + if (ret < 0 && ret != -ENOTSUPP) { + dev_err(dai->dev, "Failed to set sdw stream on %s\n", + codec_dai->name); +@@ -1046,8 +1059,8 @@ static const struct snd_soc_dai_ops qcom_swrm_pdm_dai_ops = { + .hw_free = qcom_swrm_hw_free, + .startup = qcom_swrm_startup, + .shutdown = qcom_swrm_shutdown, +- .set_sdw_stream = qcom_swrm_set_sdw_stream, +- .get_sdw_stream = qcom_swrm_get_sdw_stream, ++ .set_stream = qcom_swrm_set_sdw_stream, ++ .get_stream = qcom_swrm_get_sdw_stream, + }; + + static const struct snd_soc_component_driver qcom_swrm_dai_component = { diff --git a/drivers/soundwire/slave.c b/drivers/soundwire/slave.c index 669d7573320b7..25e76b5d4a1a3 100644 --- a/drivers/soundwire/slave.c @@ -267415,7 +319946,7 @@ index 669d7573320b7..25e76b5d4a1a3 100644 for (i = 0; i < SDW_MAX_PORTS; i++) init_completion(&slave->port_ready[i]); diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c -index 5d4f6b308ef73..ebbe138a56266 100644 +index 5d4f6b308ef73..2a900aa302a3b 100644 --- a/drivers/soundwire/stream.c +++ b/drivers/soundwire/stream.c @@ -13,6 +13,7 @@ @@ -267507,6 +320038,24 @@ index 5d4f6b308ef73..ebbe138a56266 100644 } /** +@@ -1863,7 +1880,7 @@ static int set_stream(struct snd_pcm_substream *substream, + + /* Set stream pointer on all DAIs */ + for_each_rtd_dais(rtd, i, dai) { +- ret = snd_soc_dai_set_sdw_stream(dai, sdw_stream, substream->stream); ++ ret = snd_soc_dai_set_stream(dai, sdw_stream, substream->stream); + if (ret < 0) { + dev_err(rtd->dev, "failed to set stream pointer on dai %s\n", dai->name); + break; +@@ -1934,7 +1951,7 @@ void sdw_shutdown_stream(void *sdw_substream) + /* Find stream from first CPU DAI */ + dai = asoc_rtd_to_cpu(rtd, 0); + +- sdw_stream = snd_soc_dai_get_sdw_stream(dai, substream->stream); ++ sdw_stream = snd_soc_dai_get_stream(dai, substream->stream); + + if (IS_ERR(sdw_stream)) { + dev_err(rtd->dev, "no stream found for DAI %s\n", dai->name); diff --git a/drivers/spi/atmel-quadspi.c b/drivers/spi/atmel-quadspi.c index 95d4fa32c2995..938017a60c8ed 100644 --- a/drivers/spi/atmel-quadspi.c @@ -267922,6 +320471,39 @@ index 5be6b7b80c21b..7e3ff54f6616c 100644 platform_set_drvdata(pdev, dwsbt1); +diff --git a/drivers/spi/spi-dw-core.c b/drivers/spi/spi-dw-core.c +index a305074c482e8..59e22c6b4b201 100644 +--- a/drivers/spi/spi-dw-core.c ++++ b/drivers/spi/spi-dw-core.c +@@ -357,7 +357,7 @@ static void dw_spi_irq_setup(struct dw_spi *dws) + * will be adjusted at the final stage of the IRQ-based SPI transfer + * execution so not to lose the leftover of the incoming data. + */ +- level = min_t(u16, dws->fifo_len / 2, dws->tx_len); ++ level = min_t(unsigned int, dws->fifo_len / 2, dws->tx_len); + dw_writel(dws, DW_SPI_TXFTLR, level); + dw_writel(dws, DW_SPI_RXFTLR, level - 1); + +diff --git a/drivers/spi/spi-dw-dma.c b/drivers/spi/spi-dw-dma.c +index a09831c62192a..32ac8f9068e87 100644 +--- a/drivers/spi/spi-dw-dma.c ++++ b/drivers/spi/spi-dw-dma.c +@@ -127,12 +127,15 @@ static int dw_spi_dma_init_mfld(struct device *dev, struct dw_spi *dws) + + dw_spi_dma_sg_burst_init(dws); + ++ pci_dev_put(dma_dev); ++ + return 0; + + free_rxchan: + dma_release_channel(dws->rxchan); + dws->rxchan = NULL; + err_exit: ++ pci_dev_put(dma_dev); + return -EBUSY; + } + diff --git a/drivers/spi/spi-fsl-qspi.c b/drivers/spi/spi-fsl-qspi.c index 9851551ebbe05..46ae46a944c5c 100644 --- a/drivers/spi/spi-fsl-qspi.c @@ -267937,6 +320519,33 @@ index 9851551ebbe05..46ae46a944c5c 100644 q->memmap_phy = res->start; /* Since there are 4 cs, map size required is 4 times ahb_buf_size */ q->ahb_addr = devm_ioremap(dev, q->memmap_phy, +diff --git a/drivers/spi/spi-gpio.c b/drivers/spi/spi-gpio.c +index 0584f4d2fde29..3ffdab6caac2a 100644 +--- a/drivers/spi/spi-gpio.c ++++ b/drivers/spi/spi-gpio.c +@@ -244,9 +244,19 @@ static int spi_gpio_set_direction(struct spi_device *spi, bool output) + if (output) + return gpiod_direction_output(spi_gpio->mosi, 1); + +- ret = gpiod_direction_input(spi_gpio->mosi); +- if (ret) +- return ret; ++ /* ++ * Only change MOSI to an input if using 3WIRE mode. ++ * Otherwise, MOSI could be left floating if there is ++ * no pull resistor connected to the I/O pin, or could ++ * be left logic high if there is a pull-up. Transmitting ++ * logic high when only clocking MISO data in can put some ++ * SPI devices in to a bad state. ++ */ ++ if (spi->mode & SPI_3WIRE) { ++ ret = gpiod_direction_input(spi_gpio->mosi); ++ if (ret) ++ return ret; ++ } + /* + * Send a turnaround high impedance cycle when switching + * from output to input. Theoretically there should be diff --git a/drivers/spi/spi-hisi-kunpeng.c b/drivers/spi/spi-hisi-kunpeng.c index 58b823a16fc4d..525cc0143a305 100644 --- a/drivers/spi/spi-hisi-kunpeng.c @@ -268010,6 +320619,20 @@ index 5f05d519fbbd0..71376b6df89db 100644 pm_runtime_put_noidle(dev); return ret; } +diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c +index b2dd0a4d24462..890b2cf02149c 100644 +--- a/drivers/spi/spi-imx.c ++++ b/drivers/spi/spi-imx.c +@@ -439,8 +439,7 @@ static unsigned int mx51_ecspi_clkdiv(struct spi_imx_data *spi_imx, + unsigned int pre, post; + unsigned int fin = spi_imx->spi_clk; + +- if (unlikely(fspi > fin)) +- return 0; ++ fspi = min(fspi, fin); + + post = fls(fin) - fls(fspi); + if (fin > fspi << post) diff --git a/drivers/spi/spi-meson-spicc.c b/drivers/spi/spi-meson-spicc.c index c208efeadd184..6974a1c947aad 100644 --- a/drivers/spi/spi-meson-spicc.c @@ -268252,7 +320875,7 @@ index 8eca6f24cb799..c8ed7815c4ba6 100644 spi_master_put(master); return ret; diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c -index a15de10ee286a..2ca19b01948a2 100644 +index a15de10ee286a..49acba1dea1e7 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -43,8 +43,11 @@ @@ -268295,6 +320918,28 @@ index a15de10ee286a..2ca19b01948a2 100644 if (trans->rx_buf) { cnt = mdata->xfer_len / 4; ioread32_rep(mdata->base + SPI_RX_DATA_REG, +@@ -903,14 +912,20 @@ static int mtk_spi_remove(struct platform_device *pdev) + { + struct spi_master *master = platform_get_drvdata(pdev); + struct mtk_spi *mdata = spi_master_get_devdata(master); ++ int ret; + +- pm_runtime_disable(&pdev->dev); ++ ret = pm_runtime_resume_and_get(&pdev->dev); ++ if (ret < 0) ++ return ret; + + mtk_spi_reset(mdata); + + if (mdata->dev_comp->no_need_unprepare) + clk_unprepare(mdata->spi_clk); + ++ pm_runtime_put_noidle(&pdev->dev); ++ pm_runtime_disable(&pdev->dev); ++ + return 0; + } + diff --git a/drivers/spi/spi-mt7621.c b/drivers/spi/spi-mt7621.c index b4b9b7309b5e9..351b0ef52bbc8 100644 --- a/drivers/spi/spi-mt7621.c @@ -268986,6 +321631,27 @@ index 27f35aa2d746d..dd38cb8ffbc20 100644 /* disable qspi */ writel_relaxed(0, qspi->io_base + QSPI_CR); stm32_qspi_dma_free(qspi); +diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c +index 9bd3fd1652f74..3c6f201b5dd85 100644 +--- a/drivers/spi/spi-stm32.c ++++ b/drivers/spi/spi-stm32.c +@@ -434,7 +434,7 @@ static int stm32_spi_prepare_mbr(struct stm32_spi *spi, u32 speed_hz, + u32 div, mbrdiv; + + /* Ensure spi->clk_rate is even */ +- div = DIV_ROUND_UP(spi->clk_rate & ~0x1, speed_hz); ++ div = DIV_ROUND_CLOSEST(spi->clk_rate & ~0x1, speed_hz); + + /* + * SPI framework set xfer->speed_hz to master->max_speed_hz if +@@ -886,6 +886,7 @@ static irqreturn_t stm32h7_spi_irq_thread(int irq, void *dev_id) + static DEFINE_RATELIMIT_STATE(rs, + DEFAULT_RATELIMIT_INTERVAL * 10, + 1); ++ ratelimit_set_flags(&rs, RATELIMIT_MSG_ON_RELEASE); + if (__ratelimit(&rs)) + dev_dbg_ratelimited(spi->dev, "Communication suspended\n"); + if (!spi->cur_usedma && (spi->rx_buf && (spi->rx_len > 0))) diff --git a/drivers/spi/spi-synquacer.c b/drivers/spi/spi-synquacer.c index ea706d9629cb1..47cbe73137c23 100644 --- a/drivers/spi/spi-synquacer.c @@ -269370,6 +322036,55 @@ index 926b68aa45d3e..06dd1be54925e 100644 } EXPORT_SYMBOL_GPL(spi_unregister_controller); +diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c +index 1bd73e322b7bb..922d778df0641 100644 +--- a/drivers/spi/spidev.c ++++ b/drivers/spi/spidev.c +@@ -376,12 +376,23 @@ spidev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) + switch (cmd) { + /* read requests */ + case SPI_IOC_RD_MODE: +- retval = put_user(spi->mode & SPI_MODE_MASK, +- (__u8 __user *)arg); +- break; + case SPI_IOC_RD_MODE32: +- retval = put_user(spi->mode & SPI_MODE_MASK, +- (__u32 __user *)arg); ++ tmp = spi->mode; ++ ++ { ++ struct spi_controller *ctlr = spi->controller; ++ ++ if (ctlr->use_gpio_descriptors && ctlr->cs_gpiods && ++ ctlr->cs_gpiods[spi->chip_select]) ++ tmp &= ~SPI_CS_HIGH; ++ } ++ ++ if (cmd == SPI_IOC_RD_MODE) ++ retval = put_user(tmp & SPI_MODE_MASK, ++ (__u8 __user *)arg); ++ else ++ retval = put_user(tmp & SPI_MODE_MASK, ++ (__u32 __user *)arg); + break; + case SPI_IOC_RD_LSB_FIRST: + retval = put_user((spi->mode & SPI_LSB_FIRST) ? 1 : 0, +@@ -581,7 +592,6 @@ static int spidev_open(struct inode *inode, struct file *filp) + if (!spidev->tx_buffer) { + spidev->tx_buffer = kmalloc(bufsiz, GFP_KERNEL); + if (!spidev->tx_buffer) { +- dev_dbg(&spidev->spi->dev, "open/ENOMEM\n"); + status = -ENOMEM; + goto err_find_dev; + } +@@ -590,7 +600,6 @@ static int spidev_open(struct inode *inode, struct file *filp) + if (!spidev->rx_buffer) { + spidev->rx_buffer = kmalloc(bufsiz, GFP_KERNEL); + if (!spidev->rx_buffer) { +- dev_dbg(&spidev->spi->dev, "open/ENOMEM\n"); + status = -ENOMEM; + goto err_alloc_rx_buf; + } diff --git a/drivers/spmi/spmi-pmic-arb.c b/drivers/spmi/spmi-pmic-arb.c index bbbd311eda030..e6de2aeece8d3 100644 --- a/drivers/spmi/spmi-pmic-arb.c @@ -269788,6 +322503,24 @@ index 1e613d42d8237..62d7674852bec 100644 control->items = le32_to_cpu(gbenum->items); } else { csize = sizeof(struct gb_audio_control); +diff --git a/drivers/staging/iio/accel/adis16203.c b/drivers/staging/iio/accel/adis16203.c +index 1d3026dae827e..62d5397ff1f98 100644 +--- a/drivers/staging/iio/accel/adis16203.c ++++ b/drivers/staging/iio/accel/adis16203.c +@@ -312,3 +312,4 @@ MODULE_AUTHOR("Barry Song <21cnbao@gmail.com>"); + MODULE_DESCRIPTION("Analog Devices ADIS16203 Programmable 360 Degrees Inclinometer"); + MODULE_LICENSE("GPL v2"); + MODULE_ALIAS("spi:adis16203"); ++MODULE_IMPORT_NS(IIO_ADISLIB); +diff --git a/drivers/staging/iio/accel/adis16240.c b/drivers/staging/iio/accel/adis16240.c +index 2a8aa83b8d9e6..bca857eef92e2 100644 +--- a/drivers/staging/iio/accel/adis16240.c ++++ b/drivers/staging/iio/accel/adis16240.c +@@ -440,3 +440,4 @@ MODULE_AUTHOR("Barry Song <21cnbao@gmail.com>"); + MODULE_DESCRIPTION("Analog Devices Programmable Impact Sensor and Recorder"); + MODULE_LICENSE("GPL v2"); + MODULE_ALIAS("spi:adis16240"); ++MODULE_IMPORT_NS(IIO_ADISLIB); diff --git a/drivers/staging/iio/adc/ad7280a.c b/drivers/staging/iio/adc/ad7280a.c index fef0055b89909..20183b2ea1279 100644 --- a/drivers/staging/iio/adc/ad7280a.c @@ -271090,9 +323823,20 @@ index 75489f7d75eec..c1f2f6151c5f8 100644 pipe, port); return err; diff --git a/drivers/staging/media/atomisp/pci/sh_css_params.c b/drivers/staging/media/atomisp/pci/sh_css_params.c -index dbd3bfe3d343c..ccc0078795648 100644 +index dbd3bfe3d343c..deecffd438aeb 100644 --- a/drivers/staging/media/atomisp/pci/sh_css_params.c +++ b/drivers/staging/media/atomisp/pci/sh_css_params.c +@@ -962,8 +962,8 @@ sh_css_set_black_frame(struct ia_css_stream *stream, + params->fpn_config.data = NULL; + } + if (!params->fpn_config.data) { +- params->fpn_config.data = kvmalloc(height * width * +- sizeof(short), GFP_KERNEL); ++ params->fpn_config.data = kvmalloc(array3_size(height, width, sizeof(short)), ++ GFP_KERNEL); + if (!params->fpn_config.data) { + IA_CSS_ERROR("out of memory"); + IA_CSS_LEAVE_ERR_PRIVATE(-ENOMEM); @@ -2431,7 +2431,7 @@ sh_css_create_isp_params(struct ia_css_stream *stream, unsigned int i; struct sh_css_ddr_address_map *ddr_ptrs; @@ -271696,10 +324440,36 @@ index 3c078f15a2959..c0bc57fd678a7 100644 /******************* Firmware functions *******************/ diff --git a/drivers/staging/media/ipu3/ipu3-v4l2.c b/drivers/staging/media/ipu3/ipu3-v4l2.c -index 38a2407645096..f0e61c1b6ffdc 100644 +index 38a2407645096..ed091418f7e73 100644 --- a/drivers/staging/media/ipu3/ipu3-v4l2.c +++ b/drivers/staging/media/ipu3/ipu3-v4l2.c -@@ -192,33 +192,30 @@ static int imgu_subdev_get_selection(struct v4l2_subdev *sd, +@@ -188,37 +188,50 @@ static int imgu_subdev_set_fmt(struct v4l2_subdev *sd, + return 0; + } + ++static struct v4l2_rect * ++imgu_subdev_get_crop(struct imgu_v4l2_subdev *sd, ++ struct v4l2_subdev_state *sd_state, unsigned int pad, ++ enum v4l2_subdev_format_whence which) ++{ ++ if (which == V4L2_SUBDEV_FORMAT_TRY) ++ return v4l2_subdev_get_try_crop(&sd->subdev, sd_state, pad); ++ else ++ return &sd->rect.eff; ++} ++ ++static struct v4l2_rect * ++imgu_subdev_get_compose(struct imgu_v4l2_subdev *sd, ++ struct v4l2_subdev_state *sd_state, unsigned int pad, ++ enum v4l2_subdev_format_whence which) ++{ ++ if (which == V4L2_SUBDEV_FORMAT_TRY) ++ return v4l2_subdev_get_try_compose(&sd->subdev, sd_state, pad); ++ else ++ return &sd->rect.bds; ++} ++ + static int imgu_subdev_get_selection(struct v4l2_subdev *sd, struct v4l2_subdev_state *sd_state, struct v4l2_subdev_selection *sel) { @@ -271718,21 +324488,15 @@ index 38a2407645096..f0e61c1b6ffdc 100644 - try_sel = v4l2_subdev_get_try_crop(sd, sd_state, sel->pad); - r = &imgu_sd->rect.eff; - break; -+ if (sel->which == V4L2_SUBDEV_FORMAT_TRY) -+ sel->r = *v4l2_subdev_get_try_crop(sd, sd_state, -+ sel->pad); -+ else -+ sel->r = imgu_sd->rect.eff; ++ sel->r = *imgu_subdev_get_crop(imgu_sd, sd_state, sel->pad, ++ sel->which); + return 0; case V4L2_SEL_TGT_COMPOSE: - try_sel = v4l2_subdev_get_try_compose(sd, sd_state, sel->pad); - r = &imgu_sd->rect.bds; - break; -+ if (sel->which == V4L2_SUBDEV_FORMAT_TRY) -+ sel->r = *v4l2_subdev_get_try_compose(sd, sd_state, -+ sel->pad); -+ else -+ sel->r = imgu_sd->rect.bds; ++ sel->r = *imgu_subdev_get_compose(imgu_sd, sd_state, sel->pad, ++ sel->which); + return 0; default: return -EINVAL; @@ -271747,7 +324511,49 @@ index 38a2407645096..f0e61c1b6ffdc 100644 } static int imgu_subdev_set_selection(struct v4l2_subdev *sd, -@@ -592,11 +589,12 @@ static const struct imgu_fmt *find_format(struct v4l2_format *f, u32 type) +@@ -226,10 +239,9 @@ static int imgu_subdev_set_selection(struct v4l2_subdev *sd, + struct v4l2_subdev_selection *sel) + { + struct imgu_device *imgu = v4l2_get_subdevdata(sd); +- struct imgu_v4l2_subdev *imgu_sd = container_of(sd, +- struct imgu_v4l2_subdev, +- subdev); +- struct v4l2_rect *rect, *try_sel; ++ struct imgu_v4l2_subdev *imgu_sd = ++ container_of(sd, struct imgu_v4l2_subdev, subdev); ++ struct v4l2_rect *rect; + + dev_dbg(&imgu->pci_dev->dev, + "set subdev %u sel which %u target 0x%4x rect [%ux%u]", +@@ -241,22 +253,18 @@ static int imgu_subdev_set_selection(struct v4l2_subdev *sd, + + switch (sel->target) { + case V4L2_SEL_TGT_CROP: +- try_sel = v4l2_subdev_get_try_crop(sd, sd_state, sel->pad); +- rect = &imgu_sd->rect.eff; ++ rect = imgu_subdev_get_crop(imgu_sd, sd_state, sel->pad, ++ sel->which); + break; + case V4L2_SEL_TGT_COMPOSE: +- try_sel = v4l2_subdev_get_try_compose(sd, sd_state, sel->pad); +- rect = &imgu_sd->rect.bds; ++ rect = imgu_subdev_get_compose(imgu_sd, sd_state, sel->pad, ++ sel->which); + break; + default: + return -EINVAL; + } + +- if (sel->which == V4L2_SUBDEV_FORMAT_TRY) +- *try_sel = sel->r; +- else +- *rect = sel->r; +- ++ *rect = sel->r; + return 0; + } + +@@ -592,11 +600,12 @@ static const struct imgu_fmt *find_format(struct v4l2_format *f, u32 type) static int imgu_vidioc_querycap(struct file *file, void *fh, struct v4l2_capability *cap) { @@ -271762,7 +324568,7 @@ index 38a2407645096..f0e61c1b6ffdc 100644 return 0; } -@@ -696,7 +694,7 @@ static int imgu_fmt(struct imgu_device *imgu, unsigned int pipe, int node, +@@ -696,7 +705,7 @@ static int imgu_fmt(struct imgu_device *imgu, unsigned int pipe, int node, /* CSS expects some format on OUT queue */ if (i != IPU3_CSS_QUEUE_OUT && @@ -271789,6 +324595,26 @@ index db7022707ff8d..86ccc8937afca 100644 dev_dbg(core->dev, "esparser: ts = %llu pld_size = %u offset = %08X flags = %08X\n", vb->timestamp, payload_size, offset, vbuf->flags); +diff --git a/drivers/staging/media/meson/vdec/vdec.c b/drivers/staging/media/meson/vdec/vdec.c +index e51d69c4729df..040ed56eb24f3 100644 +--- a/drivers/staging/media/meson/vdec/vdec.c ++++ b/drivers/staging/media/meson/vdec/vdec.c +@@ -1105,6 +1105,7 @@ static int vdec_probe(struct platform_device *pdev) + + err_vdev_release: + video_device_release(vdev); ++ v4l2_device_unregister(&core->v4l2_dev); + return ret; + } + +@@ -1113,6 +1114,7 @@ static int vdec_remove(struct platform_device *pdev) + struct amvdec_core *core = platform_get_drvdata(pdev); + + video_unregister_device(core->vdev_dec); ++ v4l2_device_unregister(&core->v4l2_dev); + + return 0; + } diff --git a/drivers/staging/media/meson/vdec/vdec_helpers.c b/drivers/staging/media/meson/vdec/vdec_helpers.c index b9125c295d1d3..06fd66539797a 100644 --- a/drivers/staging/media/meson/vdec/vdec_helpers.c @@ -272166,6 +324992,48 @@ index 92ace87c1c7d1..5f34e36702893 100644 #define VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_CR_QP_OFFSET(v) \ SHIFT_AND_MASK_BITS(v, 20, 16) #define VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_CB_QP_OFFSET(v) \ +diff --git a/drivers/staging/media/tegra-video/csi.c b/drivers/staging/media/tegra-video/csi.c +index b26e44adb2be7..426e653bd55d5 100644 +--- a/drivers/staging/media/tegra-video/csi.c ++++ b/drivers/staging/media/tegra-video/csi.c +@@ -433,7 +433,7 @@ static int tegra_csi_channel_alloc(struct tegra_csi *csi, + for (i = 0; i < chan->numgangports; i++) + chan->csi_port_nums[i] = port_num + i * CSI_PORTS_PER_BRICK; + +- chan->of_node = node; ++ chan->of_node = of_node_get(node); + chan->numpads = num_pads; + if (num_pads & 0x2) { + chan->pads[0].flags = MEDIA_PAD_FL_SINK; +@@ -448,6 +448,7 @@ static int tegra_csi_channel_alloc(struct tegra_csi *csi, + chan->mipi = tegra_mipi_request(csi->dev, node); + if (IS_ERR(chan->mipi)) { + ret = PTR_ERR(chan->mipi); ++ chan->mipi = NULL; + dev_err(csi->dev, "failed to get mipi device: %d\n", ret); + } + +@@ -640,6 +641,7 @@ static void tegra_csi_channels_cleanup(struct tegra_csi *csi) + media_entity_cleanup(&subdev->entity); + } + ++ of_node_put(chan->of_node); + list_del(&chan->list); + kfree(chan); + } +diff --git a/drivers/staging/media/tegra-video/csi.h b/drivers/staging/media/tegra-video/csi.h +index 4ee05a1785cfa..6960ea2e3d360 100644 +--- a/drivers/staging/media/tegra-video/csi.h ++++ b/drivers/staging/media/tegra-video/csi.h +@@ -56,7 +56,7 @@ struct tegra_csi; + * @framerate: active framerate for TPG + * @h_blank: horizontal blanking for TPG active format + * @v_blank: vertical blanking for TPG active format +- * @mipi: mipi device for corresponding csi channel pads ++ * @mipi: mipi device for corresponding csi channel pads, or NULL if not applicable (TPG, error) + * @pixel_rate: active pixel rate from the sensor on this channel + */ + struct tegra_csi_channel { diff --git a/drivers/staging/media/zoran/zoran.h b/drivers/staging/media/zoran/zoran.h index b1ad2a2b914cd..50d5a7acfab6c 100644 --- a/drivers/staging/media/zoran/zoran.h @@ -272662,10 +325530,10 @@ index 24277a17cff3d..09115cf4ed00e 100644 - #endif /* DIM2_SYSFS_H */ diff --git a/drivers/staging/mt7621-dts/gbpc1.dts b/drivers/staging/mt7621-dts/gbpc1.dts -index b65d716868146..02fd9be5e1734 100644 +index b65d716868146..cf5d6e9a9b548 100644 --- a/drivers/staging/mt7621-dts/gbpc1.dts +++ b/drivers/staging/mt7621-dts/gbpc1.dts -@@ -11,7 +11,8 @@ +@@ -11,14 +11,15 @@ memory@0 { device_type = "memory"; @@ -272675,6 +325543,14 @@ index b65d716868146..02fd9be5e1734 100644 }; chosen { + bootargs = "console=ttyS0,57600"; + }; + +- palmbus: palmbus@1E000000 { ++ palmbus: palmbus@1e000000 { + i2c@900 { + status = "okay"; + }; @@ -37,24 +38,16 @@ gpio-leds { compatible = "gpio-leds"; @@ -272884,7 +325760,7 @@ index 52760e7351f6c..6f6fed071dda0 100644 + }; }; diff --git a/drivers/staging/mt7621-dts/mt7621.dtsi b/drivers/staging/mt7621-dts/mt7621.dtsi -index eeabe9c0f4fb8..59a9ce282a3b3 100644 +index eeabe9c0f4fb8..04c4d6eeea19b 100644 --- a/drivers/staging/mt7621-dts/mt7621.dtsi +++ b/drivers/staging/mt7621-dts/mt7621.dtsi @@ -36,9 +36,9 @@ @@ -272899,6 +325775,43 @@ index eeabe9c0f4fb8..59a9ce282a3b3 100644 compatible = "regulator-fixed"; regulator-name = "mmc_io"; regulator-min-microvolt = <1800000>; +@@ -47,10 +47,10 @@ + regulator-always-on; + }; + +- palmbus: palmbus@1E000000 { ++ palmbus: palmbus@1e000000 { + compatible = "palmbus"; +- reg = <0x1E000000 0x100000>; +- ranges = <0x0 0x1E000000 0x0FFFFF>; ++ reg = <0x1e000000 0x100000>; ++ ranges = <0x0 0x1e000000 0x0fffff>; + + #address-cells = <1>; + #size-cells = <1>; +@@ -301,11 +301,11 @@ + #reset-cells = <1>; + }; + +- sdhci: sdhci@1E130000 { ++ sdhci: sdhci@1e130000 { + status = "disabled"; + + compatible = "mediatek,mt7620-mmc"; +- reg = <0x1E130000 0x4000>; ++ reg = <0x1e130000 0x4000>; + + bus-width = <4>; + max-frequency = <48000000>; +@@ -327,7 +327,7 @@ + interrupts = <GIC_SHARED 20 IRQ_TYPE_LEVEL_HIGH>; + }; + +- xhci: xhci@1E1C0000 { ++ xhci: xhci@1e1c0000 { + status = "okay"; + + compatible = "mediatek,mt8173-xhci"; @@ -391,37 +391,32 @@ mediatek,ethsys = <&sysc>; @@ -272986,6 +325899,131 @@ index eeabe9c0f4fb8..59a9ce282a3b3 100644 fixed-link { speed = <1000>; full-duplex; +diff --git a/drivers/staging/mt7621-pci/pci-mt7621.c b/drivers/staging/mt7621-pci/pci-mt7621.c +index 6acfc94a16e73..b520d1e0edd14 100644 +--- a/drivers/staging/mt7621-pci/pci-mt7621.c ++++ b/drivers/staging/mt7621-pci/pci-mt7621.c +@@ -93,8 +93,8 @@ struct mt7621_pcie_port { + * reset lines are inverted. + */ + struct mt7621_pcie { +- void __iomem *base; + struct device *dev; ++ void __iomem *base; + struct list_head ports; + bool resets_inverted; + }; +@@ -129,7 +129,7 @@ static inline void pcie_port_write(struct mt7621_pcie_port *port, + writel_relaxed(val, port->base + reg); + } + +-static inline u32 mt7621_pci_get_cfgaddr(unsigned int bus, unsigned int slot, ++static inline u32 mt7621_pcie_get_cfgaddr(unsigned int bus, unsigned int slot, + unsigned int func, unsigned int where) + { + return (((where & 0xF00) >> 8) << 24) | (bus << 16) | (slot << 11) | +@@ -140,7 +140,7 @@ static void __iomem *mt7621_pcie_map_bus(struct pci_bus *bus, + unsigned int devfn, int where) + { + struct mt7621_pcie *pcie = bus->sysdata; +- u32 address = mt7621_pci_get_cfgaddr(bus->number, PCI_SLOT(devfn), ++ u32 address = mt7621_pcie_get_cfgaddr(bus->number, PCI_SLOT(devfn), + PCI_FUNC(devfn), where); + + writel_relaxed(address, pcie->base + RALINK_PCI_CONFIG_ADDR); +@@ -148,7 +148,7 @@ static void __iomem *mt7621_pcie_map_bus(struct pci_bus *bus, + return pcie->base + RALINK_PCI_CONFIG_DATA + (where & 3); + } + +-struct pci_ops mt7621_pci_ops = { ++struct pci_ops mt7621_pcie_ops = { + .map_bus = mt7621_pcie_map_bus, + .read = pci_generic_config_read, + .write = pci_generic_config_write, +@@ -156,7 +156,7 @@ struct pci_ops mt7621_pci_ops = { + + static u32 read_config(struct mt7621_pcie *pcie, unsigned int dev, u32 reg) + { +- u32 address = mt7621_pci_get_cfgaddr(0, dev, 0, reg); ++ u32 address = mt7621_pcie_get_cfgaddr(0, dev, 0, reg); + + pcie_write(pcie, address, RALINK_PCI_CONFIG_ADDR); + return pcie_read(pcie, RALINK_PCI_CONFIG_DATA); +@@ -165,7 +165,7 @@ static u32 read_config(struct mt7621_pcie *pcie, unsigned int dev, u32 reg) + static void write_config(struct mt7621_pcie *pcie, unsigned int dev, + u32 reg, u32 val) + { +- u32 address = mt7621_pci_get_cfgaddr(0, dev, 0, reg); ++ u32 address = mt7621_pcie_get_cfgaddr(0, dev, 0, reg); + + pcie_write(pcie, address, RALINK_PCI_CONFIG_ADDR); + pcie_write(pcie, val, RALINK_PCI_CONFIG_DATA); +@@ -505,16 +505,17 @@ static int mt7621_pcie_register_host(struct pci_host_bridge *host) + { + struct mt7621_pcie *pcie = pci_host_bridge_priv(host); + +- host->ops = &mt7621_pci_ops; ++ host->ops = &mt7621_pcie_ops; + host->sysdata = pcie; + return pci_host_probe(host); + } + +-static const struct soc_device_attribute mt7621_pci_quirks_match[] = { +- { .soc_id = "mt7621", .revision = "E2" } ++static const struct soc_device_attribute mt7621_pcie_quirks_match[] = { ++ { .soc_id = "mt7621", .revision = "E2" }, ++ { /* sentinel */ } + }; + +-static int mt7621_pci_probe(struct platform_device *pdev) ++static int mt7621_pcie_probe(struct platform_device *pdev) + { + struct device *dev = &pdev->dev; + const struct soc_device_attribute *attr; +@@ -535,7 +536,7 @@ static int mt7621_pci_probe(struct platform_device *pdev) + platform_set_drvdata(pdev, pcie); + INIT_LIST_HEAD(&pcie->ports); + +- attr = soc_device_match(mt7621_pci_quirks_match); ++ attr = soc_device_match(mt7621_pcie_quirks_match); + if (attr) + pcie->resets_inverted = true; + +@@ -572,7 +573,7 @@ remove_resets: + return err; + } + +-static int mt7621_pci_remove(struct platform_device *pdev) ++static int mt7621_pcie_remove(struct platform_device *pdev) + { + struct mt7621_pcie *pcie = platform_get_drvdata(pdev); + struct mt7621_pcie_port *port; +@@ -583,18 +584,18 @@ static int mt7621_pci_remove(struct platform_device *pdev) + return 0; + } + +-static const struct of_device_id mt7621_pci_ids[] = { ++static const struct of_device_id mt7621_pcie_ids[] = { + { .compatible = "mediatek,mt7621-pci" }, + {}, + }; +-MODULE_DEVICE_TABLE(of, mt7621_pci_ids); ++MODULE_DEVICE_TABLE(of, mt7621_pcie_ids); + +-static struct platform_driver mt7621_pci_driver = { +- .probe = mt7621_pci_probe, +- .remove = mt7621_pci_remove, ++static struct platform_driver mt7621_pcie_driver = { ++ .probe = mt7621_pcie_probe, ++ .remove = mt7621_pcie_remove, + .driver = { + .name = "mt7621-pci", +- .of_match_table = of_match_ptr(mt7621_pci_ids), ++ .of_match_table = of_match_ptr(mt7621_pcie_ids), + }, + }; +-builtin_platform_driver(mt7621_pci_driver); ++builtin_platform_driver(mt7621_pcie_driver); diff --git a/drivers/staging/r8188eu/core/rtw_led.c b/drivers/staging/r8188eu/core/rtw_led.c index b33e34cce12e4..f9a8cdd9a1689 100644 --- a/drivers/staging/r8188eu/core/rtw_led.c @@ -273427,6 +326465,21 @@ index 64d9feee1f392..f00ac94b2639b 100644 return NULL; } EXPORT_SYMBOL(alloc_rtllib); +diff --git a/drivers/staging/rtl8192e/rtllib_rx.c b/drivers/staging/rtl8192e/rtllib_rx.c +index e3d0a361d370d..98e90670560b5 100644 +--- a/drivers/staging/rtl8192e/rtllib_rx.c ++++ b/drivers/staging/rtl8192e/rtllib_rx.c +@@ -1489,9 +1489,9 @@ static int rtllib_rx_Monitor(struct rtllib_device *ieee, struct sk_buff *skb, + hdrlen += 4; + } + +- rtllib_monitor_rx(ieee, skb, rx_stats, hdrlen); + ieee->stats.rx_packets++; + ieee->stats.rx_bytes += skb->len; ++ rtllib_monitor_rx(ieee, skb, rx_stats, hdrlen); + + return 1; + } diff --git a/drivers/staging/rtl8192e/rtllib_softmac.c b/drivers/staging/rtl8192e/rtllib_softmac.c index d2726d01c7573..ea8bc27fce494 100644 --- a/drivers/staging/rtl8192e/rtllib_softmac.c @@ -273469,6 +326522,23 @@ index d2726d01c7573..ea8bc27fce494 100644 } void rtllib_softmac_free(struct rtllib_device *ieee) +diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c b/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c +index b58e75932ecd5..3686b3c599ce7 100644 +--- a/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c ++++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c +@@ -951,9 +951,11 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, + #endif + + if (ieee->iw_mode == IW_MODE_MONITOR) { ++ unsigned int len = skb->len; ++ + ieee80211_monitor_rx(ieee, skb, rx_stats); + stats->rx_packets++; +- stats->rx_bytes += skb->len; ++ stats->rx_bytes += len; + return 1; + } + diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c b/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c index 1a193f900779d..2b06706a70717 100644 --- a/drivers/staging/rtl8192u/ieee80211/ieee80211_softmac.c @@ -274207,6 +327277,19 @@ index f78bf174de8e2..23f4f706f935c 100644 } void rtw_cancel_all_timer(struct adapter *padapter) +diff --git a/drivers/staging/vc04_services/include/linux/raspberrypi/vchiq.h b/drivers/staging/vc04_services/include/linux/raspberrypi/vchiq.h +index 81db7fb76d6db..97893bb0a0fde 100644 +--- a/drivers/staging/vc04_services/include/linux/raspberrypi/vchiq.h ++++ b/drivers/staging/vc04_services/include/linux/raspberrypi/vchiq.h +@@ -82,7 +82,7 @@ struct vchiq_service_params_kernel { + + struct vchiq_instance; + +-extern enum vchiq_status vchiq_initialise(struct vchiq_instance **pinstance); ++extern int vchiq_initialise(struct vchiq_instance **pinstance); + extern enum vchiq_status vchiq_shutdown(struct vchiq_instance *instance); + extern enum vchiq_status vchiq_connect(struct vchiq_instance *instance); + extern enum vchiq_status vchiq_open_service(struct vchiq_instance *instance, diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c index 967f10b9582a8..099359fc01152 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c @@ -274281,6 +327364,23 @@ index 967f10b9582a8..099359fc01152 100644 /* * There is no list of instances, so instead scan all services, * marking those that have been dumped. +diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h +index e8e39a154c743..69f342e9bb7ab 100644 +--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h ++++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h +@@ -152,10 +152,10 @@ extern struct vchiq_arm_state* + vchiq_platform_get_arm_state(struct vchiq_state *state); + + +-extern enum vchiq_status ++extern int + vchiq_use_internal(struct vchiq_state *state, struct vchiq_service *service, + enum USE_TYPE_E use_type); +-extern enum vchiq_status ++extern int + vchiq_release_internal(struct vchiq_state *state, + struct vchiq_service *service); + diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c index 9429b8a642fbb..630ed0dc24c39 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c @@ -274452,6 +327552,27 @@ index 8075f60fd02c3..2d5cf1714ae05 100644 } spin_unlock(&tiqn->tiqn_tpg_lock); +diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c +index 52db28d868d58..600a4d1ee45ff 100644 +--- a/drivers/target/loopback/tcm_loop.c ++++ b/drivers/target/loopback/tcm_loop.c +@@ -397,6 +397,7 @@ static int tcm_loop_setup_hba_bus(struct tcm_loop_hba *tl_hba, int tcm_loop_host + ret = device_register(&tl_hba->dev); + if (ret) { + pr_err("device_register() failed for tl_hba->dev: %d\n", ret); ++ put_device(&tl_hba->dev); + return -ENODEV; + } + +@@ -1073,7 +1074,7 @@ check_len: + */ + ret = tcm_loop_setup_hba_bus(tl_hba, tcm_loop_hba_no_cnt); + if (ret) +- goto out; ++ return ERR_PTR(ret); + + sh = tl_hba->sh; + tcm_loop_hba_no_cnt++; diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index cb1de1ecaaa61..bd0f2ce011dd7 100644 --- a/drivers/target/target_core_alua.c @@ -274485,6 +327606,28 @@ index 8cb1fa0c05857..fa866acef5bb2 100644 return true; } EXPORT_SYMBOL(target_configure_unmap_from_queue); +diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c +index ef4a8e189fba0..014860716605b 100644 +--- a/drivers/target/target_core_file.c ++++ b/drivers/target/target_core_file.c +@@ -332,7 +332,7 @@ static int fd_do_rw(struct se_cmd *cmd, struct file *fd, + len += sg->length; + } + +- iov_iter_bvec(&iter, READ, bvec, sgl_nents, len); ++ iov_iter_bvec(&iter, is_write, bvec, sgl_nents, len); + if (is_write) + ret = vfs_iter_write(fd, &iter, &pos, 0); + else +@@ -469,7 +469,7 @@ fd_execute_write_same(struct se_cmd *cmd) + len += se_dev->dev_attrib.block_size; + } + +- iov_iter_bvec(&iter, READ, bvec, nolb, len); ++ iov_iter_bvec(&iter, WRITE, bvec, nolb, len); + ret = vfs_iter_write(fd_dev->fd_file, &iter, &pos, 0); + + kfree(bvec); diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index a343bcfa2180f..a889a6237d9c1 100644 --- a/drivers/target/target_core_internal.h @@ -274498,7 +327641,7 @@ index a343bcfa2180f..a889a6237d9c1 100644 bool target_check_fua(struct se_device *dev); void __target_execute_cmd(struct se_cmd *, bool); diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c -index e7fcbc09f9dbc..bac111456fa1d 100644 +index e7fcbc09f9dbc..2b95b4550a637 100644 --- a/drivers/target/target_core_tmr.c +++ b/drivers/target/target_core_tmr.c @@ -50,15 +50,6 @@ EXPORT_SYMBOL(core_tmr_alloc_req); @@ -274517,6 +327660,17 @@ index e7fcbc09f9dbc..bac111456fa1d 100644 kfree(tmr); } +@@ -82,8 +73,8 @@ static bool __target_check_io_state(struct se_cmd *se_cmd, + { + struct se_session *sess = se_cmd->se_sess; + +- assert_spin_locked(&sess->sess_cmd_lock); +- WARN_ON_ONCE(!irqs_disabled()); ++ lockdep_assert_held(&sess->sess_cmd_lock); ++ + /* + * If command already reached CMD_T_COMPLETE state within + * target_complete_cmd() or CMD_T_FABRIC_STOP due to shutdown, @@ -156,13 +147,6 @@ void core_tmr_abort_task( se_cmd->state_active = false; spin_unlock_irqrestore(&dev->queues[i].lock, flags); @@ -274911,6 +328065,19 @@ index 5363ebebfc357..50c0d839fe751 100644 /* * Ensure that there are no pre-existing shm objects before enabling +diff --git a/drivers/tee/optee/device.c b/drivers/tee/optee/device.c +index 128a2d2a50a16..a74d82e230e36 100644 +--- a/drivers/tee/optee/device.c ++++ b/drivers/tee/optee/device.c +@@ -80,7 +80,7 @@ static int optee_register_device(const uuid_t *device_uuid) + rc = device_register(&optee_device->dev); + if (rc) { + pr_err("device registration failed, err: %d\n", rc); +- kfree(optee_device); ++ put_device(&optee_device->dev); + } + + return rc; diff --git a/drivers/tee/optee/optee_private.h b/drivers/tee/optee/optee_private.h index f6bb4a763ba94..ea09533e30cde 100644 --- a/drivers/tee/optee/optee_private.h @@ -274989,7 +328156,7 @@ index d167039af519e..1aa843f2ecc7c 100644 shm->flags |= TEE_SHM_REGISTER; rc = optee_shm_register(shm->ctx, shm, pages, nr_pages, diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c -index 2b37bc408fc3d..3fc426dad2df3 100644 +index 2b37bc408fc3d..a44e5b53e7a91 100644 --- a/drivers/tee/tee_core.c +++ b/drivers/tee/tee_core.c @@ -43,7 +43,7 @@ static DEFINE_SPINLOCK(driver_lock); @@ -275026,8 +328193,18 @@ index 2b37bc408fc3d..3fc426dad2df3 100644 static int tee_open(struct inode *inode, struct file *filp) { +@@ -330,6 +334,9 @@ tee_ioctl_shm_register(struct tee_context *ctx, + if (data.flags) + return -EINVAL; + ++ if (!access_ok((void __user *)(unsigned long)data.addr, data.length)) ++ return -EFAULT; ++ + shm = tee_shm_register(ctx, data.addr, data.length, + TEE_SHM_DMA_BUF | TEE_SHM_USER_MAPPED); + if (IS_ERR(shm)) diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c -index 8a9384a64f3e2..bd96ebb82c8ec 100644 +index 8a9384a64f3e2..6fb4400333fb4 100644 --- a/drivers/tee/tee_shm.c +++ b/drivers/tee/tee_shm.c @@ -1,14 +1,15 @@ @@ -275169,17 +328346,7 @@ index 8a9384a64f3e2..bd96ebb82c8ec 100644 shm->flags = flags | TEE_SHM_REGISTER; shm->ctx = ctx; shm->id = -1; -@@ -287,6 +223,9 @@ struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr, - goto err; - } - -+ if (!access_ok((void __user *)addr, length)) -+ return ERR_PTR(-EFAULT); -+ - mutex_lock(&teedev->mutex); - shm->id = idr_alloc(&teedev->idr, shm, 1, 0, GFP_KERNEL); - mutex_unlock(&teedev->mutex); -@@ -303,22 +242,6 @@ struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr, +@@ -303,22 +239,6 @@ struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr, goto err; } @@ -275202,7 +328369,7 @@ index 8a9384a64f3e2..bd96ebb82c8ec 100644 return shm; err: if (shm) { -@@ -336,6 +259,35 @@ err: +@@ -336,6 +256,35 @@ err: } EXPORT_SYMBOL_GPL(tee_shm_register); @@ -275238,7 +328405,7 @@ index 8a9384a64f3e2..bd96ebb82c8ec 100644 /** * tee_shm_get_fd() - Increase reference count and return file descriptor * @shm: Shared memory handle -@@ -348,10 +300,11 @@ int tee_shm_get_fd(struct tee_shm *shm) +@@ -348,10 +297,11 @@ int tee_shm_get_fd(struct tee_shm *shm) if (!(shm->flags & TEE_SHM_DMA_BUF)) return -EINVAL; @@ -275253,7 +328420,7 @@ index 8a9384a64f3e2..bd96ebb82c8ec 100644 return fd; } -@@ -361,17 +314,7 @@ int tee_shm_get_fd(struct tee_shm *shm) +@@ -361,17 +311,7 @@ int tee_shm_get_fd(struct tee_shm *shm) */ void tee_shm_free(struct tee_shm *shm) { @@ -275272,7 +328439,7 @@ index 8a9384a64f3e2..bd96ebb82c8ec 100644 } EXPORT_SYMBOL_GPL(tee_shm_free); -@@ -478,10 +421,15 @@ struct tee_shm *tee_shm_get_from_id(struct tee_context *ctx, int id) +@@ -478,10 +418,15 @@ struct tee_shm *tee_shm_get_from_id(struct tee_context *ctx, int id) teedev = ctx->teedev; mutex_lock(&teedev->mutex); shm = idr_find(&teedev->idr, id); @@ -275290,7 +328457,7 @@ index 8a9384a64f3e2..bd96ebb82c8ec 100644 mutex_unlock(&teedev->mutex); return shm; } -@@ -493,7 +441,24 @@ EXPORT_SYMBOL_GPL(tee_shm_get_from_id); +@@ -493,7 +438,24 @@ EXPORT_SYMBOL_GPL(tee_shm_get_from_id); */ void tee_shm_put(struct tee_shm *shm) { @@ -275459,8 +328626,60 @@ index 4310cb342a9fb..d38a80adec733 100644 + kfree(ops); } EXPORT_SYMBOL_GPL(devfreq_cooling_unregister); +diff --git a/drivers/thermal/gov_fair_share.c b/drivers/thermal/gov_fair_share.c +index 1e5abf4822bed..a4c30797b5343 100644 +--- a/drivers/thermal/gov_fair_share.c ++++ b/drivers/thermal/gov_fair_share.c +@@ -25,10 +25,10 @@ static int get_trip_level(struct thermal_zone_device *tz) + int trip_temp; + enum thermal_trip_type trip_type; + +- if (tz->trips == 0 || !tz->ops->get_trip_temp) ++ if (tz->num_trips == 0 || !tz->ops->get_trip_temp) + return 0; + +- for (count = 0; count < tz->trips; count++) { ++ for (count = 0; count < tz->num_trips; count++) { + tz->ops->get_trip_temp(tz, count, &trip_temp); + if (tz->temperature < trip_temp) + break; +@@ -49,11 +49,7 @@ static int get_trip_level(struct thermal_zone_device *tz) + static long get_target_state(struct thermal_zone_device *tz, + struct thermal_cooling_device *cdev, int percentage, int level) + { +- unsigned long max_state; +- +- cdev->ops->get_max_state(cdev, &max_state); +- +- return (long)(percentage * level * max_state) / (100 * tz->trips); ++ return (long)(percentage * level * cdev->max_state) / (100 * tz->num_trips); + } + + /** +diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c +index 13e375751d229..1d50524709672 100644 +--- a/drivers/thermal/gov_power_allocator.c ++++ b/drivers/thermal/gov_power_allocator.c +@@ -527,7 +527,7 @@ static void get_governor_trips(struct thermal_zone_device *tz, + last_active = INVALID_TRIP; + last_passive = INVALID_TRIP; + +- for (i = 0; i < tz->trips; i++) { ++ for (i = 0; i < tz->num_trips; i++) { + enum thermal_trip_type type; + int ret; + +@@ -668,7 +668,7 @@ static int power_allocator_bind(struct thermal_zone_device *tz) + + get_governor_trips(tz, params); + +- if (tz->trips > 0) { ++ if (tz->num_trips > 0) { + ret = tz->ops->get_trip_temp(tz, + params->trip_max_desired_temperature, + &control_temp); diff --git a/drivers/thermal/imx8mm_thermal.c b/drivers/thermal/imx8mm_thermal.c -index 7442e013738f8..af666bd9e8d4d 100644 +index 7442e013738f8..c5cd873c6e016 100644 --- a/drivers/thermal/imx8mm_thermal.c +++ b/drivers/thermal/imx8mm_thermal.c @@ -21,6 +21,7 @@ @@ -275471,7 +328690,23 @@ index 7442e013738f8..af666bd9e8d4d 100644 #define TER_EN BIT(31) #define TRITSR_TEMP0_VAL_MASK 0xff #define TRITSR_TEMP1_VAL_MASK 0xff0000 -@@ -113,6 +114,8 @@ static void imx8mm_tmu_enable(struct imx8mm_tmu *tmu, bool enable) +@@ -64,8 +65,14 @@ static int imx8mm_tmu_get_temp(void *data, int *temp) + u32 val; + + val = readl_relaxed(tmu->base + TRITSR) & TRITSR_TEMP0_VAL_MASK; ++ ++ /* ++ * Do not validate against the V bit (bit 31) due to errata ++ * ERR051272: TMU: Bit 31 of registers TMU_TSCR/TMU_TRITSR/TMU_TRATSR invalid ++ */ ++ + *temp = val * 1000; +- if (*temp < VER1_TEMP_LOW_LIMIT) ++ if (*temp < VER1_TEMP_LOW_LIMIT || *temp > VER2_TEMP_HIGH_LIMIT) + return -EAGAIN; + + return 0; +@@ -113,6 +120,8 @@ static void imx8mm_tmu_enable(struct imx8mm_tmu *tmu, bool enable) val = readl_relaxed(tmu->base + TER); val = enable ? (val | TER_EN) : (val & ~TER_EN); @@ -275874,6 +329109,125 @@ index acebc8ba94e29..217786fba185c 100644 static struct platform_driver int3401_driver = { .probe = int3401_add, +diff --git a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c +index 62c0aa5d07837..0a4eaa307156d 100644 +--- a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c ++++ b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c +@@ -44,11 +44,13 @@ static int int340x_thermal_get_trip_temp(struct thermal_zone_device *zone, + int trip, int *temp) + { + struct int34x_thermal_zone *d = zone->devdata; +- int i; ++ int i, ret = 0; + + if (d->override_ops && d->override_ops->get_trip_temp) + return d->override_ops->get_trip_temp(zone, trip, temp); + ++ mutex_lock(&d->trip_mutex); ++ + if (trip < d->aux_trip_nr) + *temp = d->aux_trips[trip]; + else if (trip == d->crt_trip_id) +@@ -66,10 +68,12 @@ static int int340x_thermal_get_trip_temp(struct thermal_zone_device *zone, + } + } + if (i == INT340X_THERMAL_MAX_ACT_TRIP_COUNT) +- return -EINVAL; ++ ret = -EINVAL; + } + +- return 0; ++ mutex_unlock(&d->trip_mutex); ++ ++ return ret; + } + + static int int340x_thermal_get_trip_type(struct thermal_zone_device *zone, +@@ -77,11 +81,13 @@ static int int340x_thermal_get_trip_type(struct thermal_zone_device *zone, + enum thermal_trip_type *type) + { + struct int34x_thermal_zone *d = zone->devdata; +- int i; ++ int i, ret = 0; + + if (d->override_ops && d->override_ops->get_trip_type) + return d->override_ops->get_trip_type(zone, trip, type); + ++ mutex_lock(&d->trip_mutex); ++ + if (trip < d->aux_trip_nr) + *type = THERMAL_TRIP_PASSIVE; + else if (trip == d->crt_trip_id) +@@ -99,10 +105,12 @@ static int int340x_thermal_get_trip_type(struct thermal_zone_device *zone, + } + } + if (i == INT340X_THERMAL_MAX_ACT_TRIP_COUNT) +- return -EINVAL; ++ ret = -EINVAL; + } + +- return 0; ++ mutex_unlock(&d->trip_mutex); ++ ++ return ret; + } + + static int int340x_thermal_set_trip_temp(struct thermal_zone_device *zone, +@@ -180,6 +188,8 @@ int int340x_thermal_read_trips(struct int34x_thermal_zone *int34x_zone) + int trip_cnt = int34x_zone->aux_trip_nr; + int i; + ++ mutex_lock(&int34x_zone->trip_mutex); ++ + int34x_zone->crt_trip_id = -1; + if (!int340x_thermal_get_trip_config(int34x_zone->adev->handle, "_CRT", + &int34x_zone->crt_temp)) +@@ -207,6 +217,8 @@ int int340x_thermal_read_trips(struct int34x_thermal_zone *int34x_zone) + int34x_zone->act_trips[i].valid = true; + } + ++ mutex_unlock(&int34x_zone->trip_mutex); ++ + return trip_cnt; + } + EXPORT_SYMBOL_GPL(int340x_thermal_read_trips); +@@ -230,6 +242,8 @@ struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev, + if (!int34x_thermal_zone) + return ERR_PTR(-ENOMEM); + ++ mutex_init(&int34x_thermal_zone->trip_mutex); ++ + int34x_thermal_zone->adev = adev; + int34x_thermal_zone->override_ops = override_ops; + +@@ -281,6 +295,7 @@ err_thermal_zone: + acpi_lpat_free_conversion_table(int34x_thermal_zone->lpat_table); + kfree(int34x_thermal_zone->aux_trips); + err_trip_alloc: ++ mutex_destroy(&int34x_thermal_zone->trip_mutex); + kfree(int34x_thermal_zone); + return ERR_PTR(ret); + } +@@ -292,6 +307,7 @@ void int340x_thermal_zone_remove(struct int34x_thermal_zone + thermal_zone_device_unregister(int34x_thermal_zone->zone); + acpi_lpat_free_conversion_table(int34x_thermal_zone->lpat_table); + kfree(int34x_thermal_zone->aux_trips); ++ mutex_destroy(&int34x_thermal_zone->trip_mutex); + kfree(int34x_thermal_zone); + } + EXPORT_SYMBOL_GPL(int340x_thermal_zone_remove); +diff --git a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.h b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.h +index 3b4971df1b33b..8f9872afd0d3c 100644 +--- a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.h ++++ b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.h +@@ -32,6 +32,7 @@ struct int34x_thermal_zone { + struct thermal_zone_device_ops *override_ops; + void *priv_data; + struct acpi_lpat_conversion_table *lpat_table; ++ struct mutex trip_mutex; + }; + + struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *, diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c index fb64acfd5e07d..a8d98f1bd6c67 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c @@ -276230,7 +329584,7 @@ index 59e93b04f0a9e..0b89a4340ff4e 100644 return 0; diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c -index 2b8a3235d518b..8c42e76620333 100644 +index 2b8a3235d518b..92ed1213fe379 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c @@ -9,6 +9,8 @@ @@ -276251,7 +329605,29 @@ index 2b8a3235d518b..8c42e76620333 100644 { 0, 0x5A18, 8, 0xFF, 16}, /* vco_ref_code_hi */ { 0, 0x5A08, 8, 0xFF, 0}, /* spread_spectrum_pct */ { 0, 0x5A08, 1, 0x1, 8}, /* spread_spectrum_clk_enable */ -@@ -194,8 +196,7 @@ static ssize_t rfi_restriction_store(struct device *dev, +@@ -170,6 +172,7 @@ static const struct attribute_group fivr_attribute_group = { + RFIM_SHOW(rfi_restriction_run_busy, 1) + RFIM_SHOW(rfi_restriction_err_code, 1) + RFIM_SHOW(rfi_restriction_data_rate, 1) ++RFIM_SHOW(rfi_restriction_data_rate_base, 1) + RFIM_SHOW(ddr_data_rate_point_0, 1) + RFIM_SHOW(ddr_data_rate_point_1, 1) + RFIM_SHOW(ddr_data_rate_point_2, 1) +@@ -179,11 +182,13 @@ RFIM_SHOW(rfi_disable, 1) + RFIM_STORE(rfi_restriction_run_busy, 1) + RFIM_STORE(rfi_restriction_err_code, 1) + RFIM_STORE(rfi_restriction_data_rate, 1) ++RFIM_STORE(rfi_restriction_data_rate_base, 1) + RFIM_STORE(rfi_disable, 1) + + static DEVICE_ATTR_RW(rfi_restriction_run_busy); + static DEVICE_ATTR_RW(rfi_restriction_err_code); + static DEVICE_ATTR_RW(rfi_restriction_data_rate); ++static DEVICE_ATTR_RW(rfi_restriction_data_rate_base); + static DEVICE_ATTR_RO(ddr_data_rate_point_0); + static DEVICE_ATTR_RO(ddr_data_rate_point_1); + static DEVICE_ATTR_RO(ddr_data_rate_point_2); +@@ -194,8 +199,7 @@ static ssize_t rfi_restriction_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { @@ -276261,7 +329637,7 @@ index 2b8a3235d518b..8c42e76620333 100644 u32 input; int ret; -@@ -203,7 +204,7 @@ static ssize_t rfi_restriction_store(struct device *dev, +@@ -203,7 +207,7 @@ static ssize_t rfi_restriction_store(struct device *dev, if (ret) return ret; @@ -276270,7 +329646,7 @@ index 2b8a3235d518b..8c42e76620333 100644 if (ret) return ret; -@@ -214,30 +215,30 @@ static ssize_t rfi_restriction_show(struct device *dev, +@@ -214,30 +218,30 @@ static ssize_t rfi_restriction_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -276309,6 +329685,14 @@ index 2b8a3235d518b..8c42e76620333 100644 } static DEVICE_ATTR_RW(rfi_restriction); +@@ -247,6 +251,7 @@ static struct attribute *dvfs_attrs[] = { + &dev_attr_rfi_restriction_run_busy.attr, + &dev_attr_rfi_restriction_err_code.attr, + &dev_attr_rfi_restriction_data_rate.attr, ++ &dev_attr_rfi_restriction_data_rate_base.attr, + &dev_attr_ddr_data_rate_point_0.attr, + &dev_attr_ddr_data_rate_point_1.attr, + &dev_attr_ddr_data_rate_point_2.attr, diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index a5b58ea89cc6d..5b19e2d460438 100644 --- a/drivers/thermal/intel/intel_powerclamp.c @@ -276337,6 +329721,33 @@ index 7d942f71e5328..bfd889422dd32 100644 help This enables initialization of Qualcomm limits management hardware(LMh). LMh allows for hardware-enforced mitigation for cpus based on +diff --git a/drivers/thermal/qcom/lmh.c b/drivers/thermal/qcom/lmh.c +index eafa7526eb8b4..cc94d8b005d49 100644 +--- a/drivers/thermal/qcom/lmh.c ++++ b/drivers/thermal/qcom/lmh.c +@@ -43,7 +43,7 @@ static irqreturn_t lmh_handle_irq(int hw_irq, void *data) + if (irq) + generic_handle_irq(irq); + +- return 0; ++ return IRQ_HANDLED; + } + + static void lmh_enable_interrupt(struct irq_data *d) +diff --git a/drivers/thermal/qcom/qcom-spmi-temp-alarm.c b/drivers/thermal/qcom/qcom-spmi-temp-alarm.c +index 7419e196dbb06..1037de19873a5 100644 +--- a/drivers/thermal/qcom/qcom-spmi-temp-alarm.c ++++ b/drivers/thermal/qcom/qcom-spmi-temp-alarm.c +@@ -251,7 +251,8 @@ static int qpnp_tm_update_critical_trip_temp(struct qpnp_tm_chip *chip, + disable_s2_shutdown = true; + else + dev_warn(chip->dev, +- "No ADC is configured and critical temperature is above the maximum stage 2 threshold of 140 C! Configuring stage 2 shutdown at 140 C.\n"); ++ "No ADC is configured and critical temperature %d mC is above the maximum stage 2 threshold of %ld mC! Configuring stage 2 shutdown at %ld mC.\n", ++ temp, stage2_threshold_max, stage2_threshold_max); + } + + skip: diff --git a/drivers/thermal/qcom/tsens-v0_1.c b/drivers/thermal/qcom/tsens-v0_1.c index f136cb3502384..327f37202c69f 100644 --- a/drivers/thermal/qcom/tsens-v0_1.c @@ -276391,8 +329802,21 @@ index b1162e566a707..99a8d9f3e03ca 100644 /* Valid bit is set, OK to read the temperature */ *temp = tsens_hw_to_mC(s, temp_idx); +diff --git a/drivers/thermal/tegra/tegra30-tsensor.c b/drivers/thermal/tegra/tegra30-tsensor.c +index 9b6b693cbcf85..05886684f4295 100644 +--- a/drivers/thermal/tegra/tegra30-tsensor.c ++++ b/drivers/thermal/tegra/tegra30-tsensor.c +@@ -316,7 +316,7 @@ static void tegra_tsensor_get_hw_channel_trips(struct thermal_zone_device *tzd, + *hot_trip = 85000; + *crit_trip = 90000; + +- for (i = 0; i < tzd->trips; i++) { ++ for (i = 0; i < tzd->num_trips; i++) { + enum thermal_trip_type type; + int trip_temp; + diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c -index 51374f4e1ccaf..867c8aa92b3ac 100644 +index 51374f4e1ccaf..052e8e8fbb21e 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -419,6 +419,8 @@ static void thermal_zone_device_init(struct thermal_zone_device *tz) @@ -276404,7 +329828,56 @@ index 51374f4e1ccaf..867c8aa92b3ac 100644 list_for_each_entry(pos, &tz->thermal_instances, tz_node) pos->initialized = false; } -@@ -887,7 +889,7 @@ __thermal_cooling_device_register(struct device_node *np, +@@ -501,7 +503,7 @@ void thermal_zone_device_update(struct thermal_zone_device *tz, + + tz->notify_event = event; + +- for (count = 0; count < tz->trips; count++) ++ for (count = 0; count < tz->num_trips; count++) + handle_thermal_trip(tz, count); + } + EXPORT_SYMBOL_GPL(thermal_zone_device_update); +@@ -623,10 +625,9 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz, + struct thermal_instance *pos; + struct thermal_zone_device *pos1; + struct thermal_cooling_device *pos2; +- unsigned long max_state; +- int result, ret; ++ int result; + +- if (trip >= tz->trips || trip < 0) ++ if (trip >= tz->num_trips || trip < 0) + return -EINVAL; + + list_for_each_entry(pos1, &thermal_tz_list, node) { +@@ -641,15 +642,11 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz, + if (tz != pos1 || cdev != pos2) + return -EINVAL; + +- ret = cdev->ops->get_max_state(cdev, &max_state); +- if (ret) +- return ret; +- + /* lower default 0, upper default max_state */ + lower = lower == THERMAL_NO_LIMIT ? 0 : lower; +- upper = upper == THERMAL_NO_LIMIT ? max_state : upper; ++ upper = upper == THERMAL_NO_LIMIT ? cdev->max_state : upper; + +- if (lower > upper || upper > max_state) ++ if (lower > upper || upper > cdev->max_state) + return -EINVAL; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); +@@ -807,7 +804,7 @@ static void __bind(struct thermal_zone_device *tz, int mask, + { + int i, ret; + +- for (i = 0; i < tz->trips; i++) { ++ for (i = 0; i < tz->num_trips; i++) { + if (mask & (1 << i)) { + unsigned long upper, lower; + +@@ -887,7 +884,7 @@ __thermal_cooling_device_register(struct device_node *np, { struct thermal_cooling_device *cdev; struct thermal_zone_device *pos = NULL; @@ -276413,33 +329886,47 @@ index 51374f4e1ccaf..867c8aa92b3ac 100644 if (!ops || !ops->get_max_state || !ops->get_cur_state || !ops->set_cur_state) -@@ -901,6 +903,11 @@ __thermal_cooling_device_register(struct device_node *np, +@@ -901,6 +898,7 @@ __thermal_cooling_device_register(struct device_node *np, if (ret < 0) goto out_kfree_cdev; cdev->id = ret; + id = ret; -+ -+ ret = dev_set_name(&cdev->device, "cooling_device%d", cdev->id); -+ if (ret) -+ goto out_ida_remove; cdev->type = kstrdup(type ? type : "", GFP_KERNEL); if (!cdev->type) { -@@ -916,7 +923,6 @@ __thermal_cooling_device_register(struct device_node *np, +@@ -915,8 +913,22 @@ __thermal_cooling_device_register(struct device_node *np, + cdev->updated = false; cdev->device.class = &thermal_class; cdev->devdata = devdata; ++ ++ ret = cdev->ops->get_max_state(cdev, &cdev->max_state); ++ if (ret) { ++ kfree(cdev->type); ++ goto out_ida_remove; ++ } ++ thermal_cooling_device_setup_sysfs(cdev); - dev_set_name(&cdev->device, "cooling_device%d", cdev->id); ++ ++ ret = dev_set_name(&cdev->device, "cooling_device%d", cdev->id); ++ if (ret) { ++ kfree(cdev->type); ++ thermal_cooling_device_destroy_sysfs(cdev); ++ goto out_ida_remove; ++ } ++ ret = device_register(&cdev->device); if (ret) goto out_kfree_type; -@@ -939,10 +945,12 @@ __thermal_cooling_device_register(struct device_node *np, +@@ -939,10 +951,14 @@ __thermal_cooling_device_register(struct device_node *np, return cdev; out_kfree_type: + thermal_cooling_device_destroy_sysfs(cdev); kfree(cdev->type); put_device(&cdev->device); ++ ++ /* thermal_release() takes care of the rest */ + cdev = NULL; out_ida_remove: - ida_simple_remove(&thermal_cdev_ida, cdev->id); @@ -276447,31 +329934,127 @@ index 51374f4e1ccaf..867c8aa92b3ac 100644 out_kfree_cdev: kfree(cdev); return ERR_PTR(ret); -@@ -1227,6 +1235,10 @@ thermal_zone_device_register(const char *type, int trips, int mask, - tz->id = id; - strlcpy(tz->type, type, sizeof(tz->type)); +@@ -1047,7 +1063,7 @@ static void __unbind(struct thermal_zone_device *tz, int mask, + { + int i; -+ result = dev_set_name(&tz->device, "thermal_zone%d", tz->id); -+ if (result) -+ goto remove_id; -+ - if (!ops->critical) - ops->critical = thermal_zone_device_critical; +- for (i = 0; i < tz->trips; i++) ++ for (i = 0; i < tz->num_trips; i++) + if (mask & (1 << i)) + thermal_zone_unbind_cooling_device(tz, i, cdev); + } +@@ -1152,7 +1168,7 @@ exit: + /** + * thermal_zone_device_register() - register a new thermal zone device + * @type: the thermal zone device type +- * @trips: the number of trip points the thermal zone support ++ * @num_trips: the number of trip points the thermal zone support + * @mask: a bit string indicating the writeablility of trip points + * @devdata: private device data + * @ops: standard thermal zone device callbacks +@@ -1174,7 +1190,7 @@ exit: + * IS_ERR*() helpers. + */ + struct thermal_zone_device * +-thermal_zone_device_register(const char *type, int trips, int mask, ++thermal_zone_device_register(const char *type, int num_trips, int mask, + void *devdata, struct thermal_zone_device_ops *ops, + struct thermal_zone_params *tzp, int passive_delay, + int polling_delay) +@@ -1188,27 +1204,27 @@ thermal_zone_device_register(const char *type, int trips, int mask, + struct thermal_governor *governor; + + if (!type || strlen(type) == 0) { +- pr_err("Error: No thermal zone type defined\n"); ++ pr_err("No thermal zone type defined\n"); + return ERR_PTR(-EINVAL); + } + + if (type && strlen(type) >= THERMAL_NAME_LENGTH) { +- pr_err("Error: Thermal zone name (%s) too long, should be under %d chars\n", ++ pr_err("Thermal zone name (%s) too long, should be under %d chars\n", + type, THERMAL_NAME_LENGTH); + return ERR_PTR(-EINVAL); + } + +- if (trips > THERMAL_MAX_TRIPS || trips < 0 || mask >> trips) { +- pr_err("Error: Incorrect number of thermal trips\n"); ++ if (num_trips > THERMAL_MAX_TRIPS || num_trips < 0 || mask >> num_trips) { ++ pr_err("Incorrect number of thermal trips\n"); + return ERR_PTR(-EINVAL); + } + + if (!ops) { +- pr_err("Error: Thermal zone device ops not defined\n"); ++ pr_err("Thermal zone device ops not defined\n"); + return ERR_PTR(-EINVAL); + } + +- if (trips > 0 && (!ops->get_trip_type || !ops->get_trip_temp)) ++ if (num_trips > 0 && (!ops->get_trip_type || !ops->get_trip_temp)) + return ERR_PTR(-EINVAL); -@@ -1248,7 +1260,6 @@ thermal_zone_device_register(const char *type, int trips, int mask, + tz = kzalloc(sizeof(*tz), GFP_KERNEL); +@@ -1234,7 +1250,7 @@ thermal_zone_device_register(const char *type, int trips, int mask, + tz->tzp = tzp; + tz->device.class = &thermal_class; + tz->devdata = devdata; +- tz->trips = trips; ++ tz->num_trips = num_trips; + + thermal_set_delay_jiffies(&tz->passive_delay_jiffies, passive_delay); + thermal_set_delay_jiffies(&tz->polling_delay_jiffies, polling_delay); +@@ -1248,12 +1264,16 @@ thermal_zone_device_register(const char *type, int trips, int mask, /* A new thermal zone needs to be updated anyway. */ atomic_set(&tz->need_update, 1); - dev_set_name(&tz->device, "thermal_zone%d", tz->id); ++ result = dev_set_name(&tz->device, "thermal_zone%d", tz->id); ++ if (result) { ++ thermal_zone_destroy_device_groups(tz); ++ goto remove_id; ++ } result = device_register(&tz->device); if (result) goto release_device; + +- for (count = 0; count < trips; count++) { ++ for (count = 0; count < num_trips; count++) { + if (tz->ops->get_trip_type(tz, count, &trip_type) || + tz->ops->get_trip_temp(tz, count, &trip_temp) || + !trip_temp) +diff --git a/drivers/thermal/thermal_helpers.c b/drivers/thermal/thermal_helpers.c +index 3edd047e144f0..ee7027bdcafa8 100644 +--- a/drivers/thermal/thermal_helpers.c ++++ b/drivers/thermal/thermal_helpers.c +@@ -90,7 +90,7 @@ int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp) + ret = tz->ops->get_temp(tz, temp); + + if (IS_ENABLED(CONFIG_THERMAL_EMULATION) && tz->emul_temperature) { +- for (count = 0; count < tz->trips; count++) { ++ for (count = 0; count < tz->num_trips; count++) { + ret = tz->ops->get_trip_type(tz, count, &type); + if (!ret && type == THERMAL_TRIP_CRITICAL) { + ret = tz->ops->get_trip_temp(tz, count, +@@ -138,7 +138,7 @@ void thermal_zone_set_trips(struct thermal_zone_device *tz) + if (!tz->ops->set_trips || !tz->ops->get_trip_hyst) + goto exit; + +- for (i = 0; i < tz->trips; i++) { ++ for (i = 0; i < tz->num_trips; i++) { + int trip_low; + + tz->ops->get_trip_temp(tz, i, &trip_temp); diff --git a/drivers/thermal/thermal_netlink.c b/drivers/thermal/thermal_netlink.c -index 1234dbe958951..41c8d47805c4e 100644 +index 1234dbe958951..c70d407c2c714 100644 --- a/drivers/thermal/thermal_netlink.c +++ b/drivers/thermal/thermal_netlink.c -@@ -418,11 +418,12 @@ static int thermal_genl_cmd_tz_get_trip(struct param *p) - for (i = 0; i < tz->trips; i++) { +@@ -415,14 +415,15 @@ static int thermal_genl_cmd_tz_get_trip(struct param *p) + + mutex_lock(&tz->lock); + +- for (i = 0; i < tz->trips; i++) { ++ for (i = 0; i < tz->num_trips; i++) { enum thermal_trip_type type; - int temp, hyst; @@ -276527,10 +330110,122 @@ index 6379f26a335f6..9233f7e744544 100644 ret = data->ops->set_trip_temp(data->sensor_data, trip, temp); diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c -index 1c4aac8464a70..1e5a78131aba9 100644 +index 1c4aac8464a70..de7cdec3db909 100644 --- a/drivers/thermal/thermal_sysfs.c +++ b/drivers/thermal/thermal_sysfs.c -@@ -813,12 +813,13 @@ static const struct attribute_group cooling_device_stats_attr_group = { +@@ -416,15 +416,15 @@ static int create_trip_attrs(struct thermal_zone_device *tz, int mask) + int indx; + + /* This function works only for zones with at least one trip */ +- if (tz->trips <= 0) ++ if (tz->num_trips <= 0) + return -EINVAL; + +- tz->trip_type_attrs = kcalloc(tz->trips, sizeof(*tz->trip_type_attrs), ++ tz->trip_type_attrs = kcalloc(tz->num_trips, sizeof(*tz->trip_type_attrs), + GFP_KERNEL); + if (!tz->trip_type_attrs) + return -ENOMEM; + +- tz->trip_temp_attrs = kcalloc(tz->trips, sizeof(*tz->trip_temp_attrs), ++ tz->trip_temp_attrs = kcalloc(tz->num_trips, sizeof(*tz->trip_temp_attrs), + GFP_KERNEL); + if (!tz->trip_temp_attrs) { + kfree(tz->trip_type_attrs); +@@ -432,7 +432,7 @@ static int create_trip_attrs(struct thermal_zone_device *tz, int mask) + } + + if (tz->ops->get_trip_hyst) { +- tz->trip_hyst_attrs = kcalloc(tz->trips, ++ tz->trip_hyst_attrs = kcalloc(tz->num_trips, + sizeof(*tz->trip_hyst_attrs), + GFP_KERNEL); + if (!tz->trip_hyst_attrs) { +@@ -442,7 +442,7 @@ static int create_trip_attrs(struct thermal_zone_device *tz, int mask) + } + } + +- attrs = kcalloc(tz->trips * 3 + 1, sizeof(*attrs), GFP_KERNEL); ++ attrs = kcalloc(tz->num_trips * 3 + 1, sizeof(*attrs), GFP_KERNEL); + if (!attrs) { + kfree(tz->trip_type_attrs); + kfree(tz->trip_temp_attrs); +@@ -451,7 +451,7 @@ static int create_trip_attrs(struct thermal_zone_device *tz, int mask) + return -ENOMEM; + } + +- for (indx = 0; indx < tz->trips; indx++) { ++ for (indx = 0; indx < tz->num_trips; indx++) { + /* create trip type attribute */ + snprintf(tz->trip_type_attrs[indx].name, THERMAL_NAME_LENGTH, + "trip_point_%d_type", indx); +@@ -478,7 +478,7 @@ static int create_trip_attrs(struct thermal_zone_device *tz, int mask) + tz->trip_temp_attrs[indx].attr.store = + trip_point_temp_store; + } +- attrs[indx + tz->trips] = &tz->trip_temp_attrs[indx].attr.attr; ++ attrs[indx + tz->num_trips] = &tz->trip_temp_attrs[indx].attr.attr; + + /* create Optional trip hyst attribute */ + if (!tz->ops->get_trip_hyst) +@@ -496,10 +496,10 @@ static int create_trip_attrs(struct thermal_zone_device *tz, int mask) + tz->trip_hyst_attrs[indx].attr.store = + trip_point_hyst_store; + } +- attrs[indx + tz->trips * 2] = ++ attrs[indx + tz->num_trips * 2] = + &tz->trip_hyst_attrs[indx].attr.attr; + } +- attrs[tz->trips * 3] = NULL; ++ attrs[tz->num_trips * 3] = NULL; + + tz->trips_attribute_group.attrs = attrs; + +@@ -540,7 +540,7 @@ int thermal_zone_create_device_groups(struct thermal_zone_device *tz, + for (i = 0; i < size - 2; i++) + groups[i] = thermal_zone_attribute_groups[i]; + +- if (tz->trips) { ++ if (tz->num_trips) { + result = create_trip_attrs(tz, mask); + if (result) { + kfree(groups); +@@ -561,7 +561,7 @@ void thermal_zone_destroy_device_groups(struct thermal_zone_device *tz) + if (!tz) + return; + +- if (tz->trips) ++ if (tz->num_trips) + destroy_trip_attrs(tz); + + kfree(tz->device.groups); +@@ -580,13 +580,8 @@ static ssize_t max_state_show(struct device *dev, struct device_attribute *attr, + char *buf) + { + struct thermal_cooling_device *cdev = to_cooling_device(dev); +- unsigned long state; +- int ret; + +- ret = cdev->ops->get_max_state(cdev, &state); +- if (ret) +- return ret; +- return sprintf(buf, "%ld\n", state); ++ return sprintf(buf, "%ld\n", cdev->max_state); + } + + static ssize_t cur_state_show(struct device *dev, struct device_attribute *attr, +@@ -616,6 +611,10 @@ cur_state_store(struct device *dev, struct device_attribute *attr, + if ((long)state < 0) + return -EINVAL; + ++ /* Requested state should be less than max_state + 1 */ ++ if (state > cdev->max_state) ++ return -EINVAL; ++ + mutex_lock(&cdev->lock); + + result = cdev->ops->set_cur_state(cdev, state); +@@ -813,12 +812,13 @@ static const struct attribute_group cooling_device_stats_attr_group = { static void cooling_device_stats_setup(struct thermal_cooling_device *cdev) { @@ -276545,7 +330240,7 @@ index 1c4aac8464a70..1e5a78131aba9 100644 states++; /* Total number of states is highest state + 1 */ -@@ -828,7 +829,7 @@ static void cooling_device_stats_setup(struct thermal_cooling_device *cdev) +@@ -828,7 +828,7 @@ static void cooling_device_stats_setup(struct thermal_cooling_device *cdev) stats = kzalloc(var, GFP_KERNEL); if (!stats) @@ -276554,7 +330249,7 @@ index 1c4aac8464a70..1e5a78131aba9 100644 stats->time_in_state = (ktime_t *)(stats + 1); stats->trans_table = (unsigned int *)(stats->time_in_state + states); -@@ -838,9 +839,12 @@ static void cooling_device_stats_setup(struct thermal_cooling_device *cdev) +@@ -838,9 +838,12 @@ static void cooling_device_stats_setup(struct thermal_cooling_device *cdev) spin_lock_init(&stats->lock); @@ -276744,6 +330439,101 @@ index 69083aab2736c..5091677b3f4ba 100644 #define PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_4C_NHI 0x1137 #define PCI_DEVICE_ID_INTEL_WIN_RIDGE_2C_NHI 0x157d #define PCI_DEVICE_ID_INTEL_WIN_RIDGE_2C_BRIDGE 0x157e +diff --git a/drivers/thunderbolt/path.c b/drivers/thunderbolt/path.c +index 564e2f42cebd9..299712accfe9b 100644 +--- a/drivers/thunderbolt/path.c ++++ b/drivers/thunderbolt/path.c +@@ -85,11 +85,12 @@ static int tb_path_find_src_hopid(struct tb_port *src, + * @dst_hopid: HopID to the @dst (%-1 if don't care) + * @last: Last port is filled here if not %NULL + * @name: Name of the path ++ * @alloc_hopid: Allocate HopIDs for the ports + * + * Follows a path starting from @src and @src_hopid to the last output +- * port of the path. Allocates HopIDs for the visited ports. Call +- * tb_path_free() to release the path and allocated HopIDs when the path +- * is not needed anymore. ++ * port of the path. Allocates HopIDs for the visited ports (if ++ * @alloc_hopid is true). Call tb_path_free() to release the path and ++ * allocated HopIDs when the path is not needed anymore. + * + * Note function discovers also incomplete paths so caller should check + * that the @dst port is the expected one. If it is not, the path can be +@@ -99,7 +100,8 @@ static int tb_path_find_src_hopid(struct tb_port *src, + */ + struct tb_path *tb_path_discover(struct tb_port *src, int src_hopid, + struct tb_port *dst, int dst_hopid, +- struct tb_port **last, const char *name) ++ struct tb_port **last, const char *name, ++ bool alloc_hopid) + { + struct tb_port *out_port; + struct tb_regs_hop hop; +@@ -156,6 +158,7 @@ struct tb_path *tb_path_discover(struct tb_port *src, int src_hopid, + path->tb = src->sw->tb; + path->path_length = num_hops; + path->activated = true; ++ path->alloc_hopid = alloc_hopid; + + path->hops = kcalloc(num_hops, sizeof(*path->hops), GFP_KERNEL); + if (!path->hops) { +@@ -177,13 +180,14 @@ struct tb_path *tb_path_discover(struct tb_port *src, int src_hopid, + goto err; + } + +- if (tb_port_alloc_in_hopid(p, h, h) < 0) ++ if (alloc_hopid && tb_port_alloc_in_hopid(p, h, h) < 0) + goto err; + + out_port = &sw->ports[hop.out_port]; + next_hop = hop.next_hop; + +- if (tb_port_alloc_out_hopid(out_port, next_hop, next_hop) < 0) { ++ if (alloc_hopid && ++ tb_port_alloc_out_hopid(out_port, next_hop, next_hop) < 0) { + tb_port_release_in_hopid(p, h); + goto err; + } +@@ -263,6 +267,8 @@ struct tb_path *tb_path_alloc(struct tb *tb, struct tb_port *src, int src_hopid, + return NULL; + } + ++ path->alloc_hopid = true; ++ + in_hopid = src_hopid; + out_port = NULL; + +@@ -345,17 +351,19 @@ err: + */ + void tb_path_free(struct tb_path *path) + { +- int i; +- +- for (i = 0; i < path->path_length; i++) { +- const struct tb_path_hop *hop = &path->hops[i]; +- +- if (hop->in_port) +- tb_port_release_in_hopid(hop->in_port, +- hop->in_hop_index); +- if (hop->out_port) +- tb_port_release_out_hopid(hop->out_port, +- hop->next_hop_index); ++ if (path->alloc_hopid) { ++ int i; ++ ++ for (i = 0; i < path->path_length; i++) { ++ const struct tb_path_hop *hop = &path->hops[i]; ++ ++ if (hop->in_port) ++ tb_port_release_in_hopid(hop->in_port, ++ hop->in_hop_index); ++ if (hop->out_port) ++ tb_port_release_out_hopid(hop->out_port, ++ hop->next_hop_index); ++ } + } + + kfree(path->hops); diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index 3014146081c19..ef647477ab383 100644 --- a/drivers/thunderbolt/switch.c @@ -276795,10 +330585,111 @@ index 3014146081c19..ef647477ab383 100644 if (ret) { dev_err(&sw->dev, "failed to add device: %d\n", ret); diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c -index 2897a77d44c34..b805b69397944 100644 +index 2897a77d44c34..0c3e1d14cddca 100644 --- a/drivers/thunderbolt/tb.c +++ b/drivers/thunderbolt/tb.c -@@ -851,7 +851,7 @@ static struct tb_port *tb_find_dp_out(struct tb *tb, struct tb_port *in) +@@ -105,10 +105,37 @@ static void tb_remove_dp_resources(struct tb_switch *sw) + } + } + +-static void tb_discover_tunnels(struct tb_switch *sw) ++static void tb_discover_dp_resource(struct tb *tb, struct tb_port *port) + { +- struct tb *tb = sw->tb; + struct tb_cm *tcm = tb_priv(tb); ++ struct tb_port *p; ++ ++ list_for_each_entry(p, &tcm->dp_resources, list) { ++ if (p == port) ++ return; ++ } ++ ++ tb_port_dbg(port, "DP %s resource available discovered\n", ++ tb_port_is_dpin(port) ? "IN" : "OUT"); ++ list_add_tail(&port->list, &tcm->dp_resources); ++} ++ ++static void tb_discover_dp_resources(struct tb *tb) ++{ ++ struct tb_cm *tcm = tb_priv(tb); ++ struct tb_tunnel *tunnel; ++ ++ list_for_each_entry(tunnel, &tcm->tunnel_list, list) { ++ if (tb_tunnel_is_dp(tunnel)) ++ tb_discover_dp_resource(tb, tunnel->dst_port); ++ } ++} ++ ++static void tb_switch_discover_tunnels(struct tb_switch *sw, ++ struct list_head *list, ++ bool alloc_hopids) ++{ ++ struct tb *tb = sw->tb; + struct tb_port *port; + + tb_switch_for_each_port(sw, port) { +@@ -116,24 +143,41 @@ static void tb_discover_tunnels(struct tb_switch *sw) + + switch (port->config.type) { + case TB_TYPE_DP_HDMI_IN: +- tunnel = tb_tunnel_discover_dp(tb, port); ++ tunnel = tb_tunnel_discover_dp(tb, port, alloc_hopids); + break; + + case TB_TYPE_PCIE_DOWN: +- tunnel = tb_tunnel_discover_pci(tb, port); ++ tunnel = tb_tunnel_discover_pci(tb, port, alloc_hopids); + break; + + case TB_TYPE_USB3_DOWN: +- tunnel = tb_tunnel_discover_usb3(tb, port); ++ tunnel = tb_tunnel_discover_usb3(tb, port, alloc_hopids); + break; + + default: + break; + } + +- if (!tunnel) +- continue; ++ if (tunnel) ++ list_add_tail(&tunnel->list, list); ++ } + ++ tb_switch_for_each_port(sw, port) { ++ if (tb_port_has_remote(port)) { ++ tb_switch_discover_tunnels(port->remote->sw, list, ++ alloc_hopids); ++ } ++ } ++} ++ ++static void tb_discover_tunnels(struct tb *tb) ++{ ++ struct tb_cm *tcm = tb_priv(tb); ++ struct tb_tunnel *tunnel; ++ ++ tb_switch_discover_tunnels(tb->root_switch, &tcm->tunnel_list, true); ++ ++ list_for_each_entry(tunnel, &tcm->tunnel_list, list) { + if (tb_tunnel_is_pci(tunnel)) { + struct tb_switch *parent = tunnel->dst_port->sw; + +@@ -146,13 +190,6 @@ static void tb_discover_tunnels(struct tb_switch *sw) + pm_runtime_get_sync(&tunnel->src_port->sw->dev); + pm_runtime_get_sync(&tunnel->dst_port->sw->dev); + } +- +- list_add_tail(&tunnel->list, &tcm->tunnel_list); +- } +- +- tb_switch_for_each_port(sw, port) { +- if (tb_port_has_remote(port)) +- tb_discover_tunnels(port->remote->sw); + } + } + +@@ -851,7 +888,7 @@ static struct tb_port *tb_find_dp_out(struct tb *tb, struct tb_port *in) static void tb_tunnel_dp(struct tb *tb) { @@ -276807,7 +330698,7 @@ index 2897a77d44c34..b805b69397944 100644 struct tb_cm *tcm = tb_priv(tb); struct tb_port *port, *in, *out; struct tb_tunnel *tunnel; -@@ -896,6 +896,20 @@ static void tb_tunnel_dp(struct tb *tb) +@@ -896,6 +933,20 @@ static void tb_tunnel_dp(struct tb *tb) return; } @@ -276828,7 +330719,7 @@ index 2897a77d44c34..b805b69397944 100644 /* * DP stream needs the domain to be active so runtime resume * both ends of the tunnel. -@@ -927,7 +941,8 @@ static void tb_tunnel_dp(struct tb *tb) +@@ -927,7 +978,8 @@ static void tb_tunnel_dp(struct tb *tb) tb_dbg(tb, "available bandwidth for new DP tunnel %u/%u Mb/s\n", available_up, available_down); @@ -276838,11 +330729,90 @@ index 2897a77d44c34..b805b69397944 100644 if (!tunnel) { tb_port_dbg(out, "could not allocate DP tunnel\n"); goto err_reclaim; +@@ -1369,7 +1421,9 @@ static int tb_start(struct tb *tb) + /* Full scan to discover devices added before the driver was loaded. */ + tb_scan_switch(tb->root_switch); + /* Find out tunnels created by the boot firmware */ +- tb_discover_tunnels(tb->root_switch); ++ tb_discover_tunnels(tb); ++ /* Add DP resources from the DP tunnels created by the boot firmware */ ++ tb_discover_dp_resources(tb); + /* + * If the boot firmware did not create USB 3.x tunnels create them + * now for the whole topology. +@@ -1429,6 +1483,8 @@ static int tb_resume_noirq(struct tb *tb) + { + struct tb_cm *tcm = tb_priv(tb); + struct tb_tunnel *tunnel, *n; ++ unsigned int usb3_delay = 0; ++ LIST_HEAD(tunnels); + + tb_dbg(tb, "resuming...\n"); + +@@ -1439,8 +1495,31 @@ static int tb_resume_noirq(struct tb *tb) + tb_free_invalid_tunnels(tb); + tb_free_unplugged_children(tb->root_switch); + tb_restore_children(tb->root_switch); +- list_for_each_entry_safe(tunnel, n, &tcm->tunnel_list, list) ++ ++ /* ++ * If we get here from suspend to disk the boot firmware or the ++ * restore kernel might have created tunnels of its own. Since ++ * we cannot be sure they are usable for us we find and tear ++ * them down. ++ */ ++ tb_switch_discover_tunnels(tb->root_switch, &tunnels, false); ++ list_for_each_entry_safe_reverse(tunnel, n, &tunnels, list) { ++ if (tb_tunnel_is_usb3(tunnel)) ++ usb3_delay = 500; ++ tb_tunnel_deactivate(tunnel); ++ tb_tunnel_free(tunnel); ++ } ++ ++ /* Re-create our tunnels now */ ++ list_for_each_entry_safe(tunnel, n, &tcm->tunnel_list, list) { ++ /* USB3 requires delay before it can be re-activated */ ++ if (tb_tunnel_is_usb3(tunnel)) { ++ msleep(usb3_delay); ++ /* Only need to do it once */ ++ usb3_delay = 0; ++ } + tb_tunnel_restart(tunnel); ++ } + if (!list_empty(&tcm->tunnel_list)) { + /* + * the pcie links need some time to get going. diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h -index 725104c83e3d5..b535d296d37e9 100644 +index 725104c83e3d5..8922217d580c7 100644 --- a/drivers/thunderbolt/tb.h +++ b/drivers/thunderbolt/tb.h -@@ -1067,6 +1067,7 @@ int usb4_switch_add_ports(struct tb_switch *sw); +@@ -354,6 +354,7 @@ enum tb_path_port { + * when deactivating this path + * @hops: Path hops + * @path_length: How many hops the path uses ++ * @alloc_hopid: Does this path consume port HopID + * + * A path consists of a number of hops (see &struct tb_path_hop). To + * establish a PCIe tunnel two paths have to be created between the two +@@ -374,6 +375,7 @@ struct tb_path { + bool clear_fc; + struct tb_path_hop *hops; + int path_length; ++ bool alloc_hopid; + }; + + /* HopIDs 0-7 are reserved by the Thunderbolt protocol */ +@@ -957,7 +959,8 @@ int tb_dp_port_enable(struct tb_port *port, bool enable); + + struct tb_path *tb_path_discover(struct tb_port *src, int src_hopid, + struct tb_port *dst, int dst_hopid, +- struct tb_port **last, const char *name); ++ struct tb_port **last, const char *name, ++ bool alloc_hopid); + struct tb_path *tb_path_alloc(struct tb *tb, struct tb_port *src, int src_hopid, + struct tb_port *dst, int dst_hopid, int link_nr, + const char *name); +@@ -1067,6 +1070,7 @@ int usb4_switch_add_ports(struct tb_switch *sw); void usb4_switch_remove_ports(struct tb_switch *sw); int usb4_port_unlock(struct tb_port *port); @@ -276939,10 +330909,89 @@ index 1f69bab236ee9..66b6e665e96f0 100644 KUNIT_ASSERT_EQ(test, dp_tunnel2->npaths, (size_t)3); diff --git a/drivers/thunderbolt/tunnel.c b/drivers/thunderbolt/tunnel.c -index bb5cc480fc9a3..bd98c719bf55e 100644 +index bb5cc480fc9a3..42cc4ef02e86e 100644 --- a/drivers/thunderbolt/tunnel.c +++ b/drivers/thunderbolt/tunnel.c -@@ -843,6 +843,7 @@ err_free: +@@ -207,12 +207,14 @@ static int tb_pci_init_path(struct tb_path *path) + * tb_tunnel_discover_pci() - Discover existing PCIe tunnels + * @tb: Pointer to the domain structure + * @down: PCIe downstream adapter ++ * @alloc_hopid: Allocate HopIDs from visited ports + * + * If @down adapter is active, follows the tunnel to the PCIe upstream + * adapter and back. Returns the discovered tunnel or %NULL if there was + * no tunnel. + */ +-struct tb_tunnel *tb_tunnel_discover_pci(struct tb *tb, struct tb_port *down) ++struct tb_tunnel *tb_tunnel_discover_pci(struct tb *tb, struct tb_port *down, ++ bool alloc_hopid) + { + struct tb_tunnel *tunnel; + struct tb_path *path; +@@ -233,7 +235,7 @@ struct tb_tunnel *tb_tunnel_discover_pci(struct tb *tb, struct tb_port *down) + * case. + */ + path = tb_path_discover(down, TB_PCI_HOPID, NULL, -1, +- &tunnel->dst_port, "PCIe Up"); ++ &tunnel->dst_port, "PCIe Up", alloc_hopid); + if (!path) { + /* Just disable the downstream port */ + tb_pci_port_enable(down, false); +@@ -244,7 +246,7 @@ struct tb_tunnel *tb_tunnel_discover_pci(struct tb *tb, struct tb_port *down) + goto err_free; + + path = tb_path_discover(tunnel->dst_port, -1, down, TB_PCI_HOPID, NULL, +- "PCIe Down"); ++ "PCIe Down", alloc_hopid); + if (!path) + goto err_deactivate; + tunnel->paths[TB_PCI_PATH_DOWN] = path; +@@ -761,6 +763,7 @@ static int tb_dp_init_video_path(struct tb_path *path) + * tb_tunnel_discover_dp() - Discover existing Display Port tunnels + * @tb: Pointer to the domain structure + * @in: DP in adapter ++ * @alloc_hopid: Allocate HopIDs from visited ports + * + * If @in adapter is active, follows the tunnel to the DP out adapter + * and back. Returns the discovered tunnel or %NULL if there was no +@@ -768,7 +771,8 @@ static int tb_dp_init_video_path(struct tb_path *path) + * + * Return: DP tunnel or %NULL if no tunnel found. + */ +-struct tb_tunnel *tb_tunnel_discover_dp(struct tb *tb, struct tb_port *in) ++struct tb_tunnel *tb_tunnel_discover_dp(struct tb *tb, struct tb_port *in, ++ bool alloc_hopid) + { + struct tb_tunnel *tunnel; + struct tb_port *port; +@@ -787,7 +791,7 @@ struct tb_tunnel *tb_tunnel_discover_dp(struct tb *tb, struct tb_port *in) + tunnel->src_port = in; + + path = tb_path_discover(in, TB_DP_VIDEO_HOPID, NULL, -1, +- &tunnel->dst_port, "Video"); ++ &tunnel->dst_port, "Video", alloc_hopid); + if (!path) { + /* Just disable the DP IN port */ + tb_dp_port_enable(in, false); +@@ -797,14 +801,15 @@ struct tb_tunnel *tb_tunnel_discover_dp(struct tb *tb, struct tb_port *in) + if (tb_dp_init_video_path(tunnel->paths[TB_DP_VIDEO_PATH_OUT])) + goto err_free; + +- path = tb_path_discover(in, TB_DP_AUX_TX_HOPID, NULL, -1, NULL, "AUX TX"); ++ path = tb_path_discover(in, TB_DP_AUX_TX_HOPID, NULL, -1, NULL, "AUX TX", ++ alloc_hopid); + if (!path) + goto err_deactivate; + tunnel->paths[TB_DP_AUX_PATH_OUT] = path; + tb_dp_init_aux_path(tunnel->paths[TB_DP_AUX_PATH_OUT]); + + path = tb_path_discover(tunnel->dst_port, -1, in, TB_DP_AUX_RX_HOPID, +- &port, "AUX RX"); ++ &port, "AUX RX", alloc_hopid); + if (!path) + goto err_deactivate; + tunnel->paths[TB_DP_AUX_PATH_IN] = path; +@@ -843,6 +848,7 @@ err_free: * @tb: Pointer to the domain structure * @in: DP in adapter port * @out: DP out adapter port @@ -276950,7 +330999,7 @@ index bb5cc480fc9a3..bd98c719bf55e 100644 * @max_up: Maximum available upstream bandwidth for the DP tunnel (%0 * if not limited) * @max_down: Maximum available downstream bandwidth for the DP tunnel -@@ -854,8 +855,8 @@ err_free: +@@ -854,8 +860,8 @@ err_free: * Return: Returns a tb_tunnel on success or NULL on failure. */ struct tb_tunnel *tb_tunnel_alloc_dp(struct tb *tb, struct tb_port *in, @@ -276961,7 +331010,7 @@ index bb5cc480fc9a3..bd98c719bf55e 100644 { struct tb_tunnel *tunnel; struct tb_path **paths; -@@ -879,21 +880,21 @@ struct tb_tunnel *tb_tunnel_alloc_dp(struct tb *tb, struct tb_port *in, +@@ -879,21 +885,21 @@ struct tb_tunnel *tb_tunnel_alloc_dp(struct tb *tb, struct tb_port *in, paths = tunnel->paths; path = tb_path_alloc(tb, in, TB_DP_VIDEO_HOPID, out, TB_DP_VIDEO_HOPID, @@ -276986,13 +331035,65 @@ index bb5cc480fc9a3..bd98c719bf55e 100644 if (!path) goto err_free; tb_dp_init_aux_path(path); +@@ -1256,7 +1262,7 @@ static void tb_usb3_reclaim_available_bandwidth(struct tb_tunnel *tunnel, + return; + } else if (!ret) { + /* Use maximum link rate if the link valid is not set */ +- ret = usb4_usb3_port_max_link_rate(tunnel->src_port); ++ ret = tb_usb3_max_link_rate(tunnel->dst_port, tunnel->src_port); + if (ret < 0) { + tb_tunnel_warn(tunnel, "failed to read maximum link rate\n"); + return; +@@ -1343,12 +1349,14 @@ static void tb_usb3_init_path(struct tb_path *path) + * tb_tunnel_discover_usb3() - Discover existing USB3 tunnels + * @tb: Pointer to the domain structure + * @down: USB3 downstream adapter ++ * @alloc_hopid: Allocate HopIDs from visited ports + * + * If @down adapter is active, follows the tunnel to the USB3 upstream + * adapter and back. Returns the discovered tunnel or %NULL if there was + * no tunnel. + */ +-struct tb_tunnel *tb_tunnel_discover_usb3(struct tb *tb, struct tb_port *down) ++struct tb_tunnel *tb_tunnel_discover_usb3(struct tb *tb, struct tb_port *down, ++ bool alloc_hopid) + { + struct tb_tunnel *tunnel; + struct tb_path *path; +@@ -1369,7 +1377,7 @@ struct tb_tunnel *tb_tunnel_discover_usb3(struct tb *tb, struct tb_port *down) + * case. + */ + path = tb_path_discover(down, TB_USB3_HOPID, NULL, -1, +- &tunnel->dst_port, "USB3 Down"); ++ &tunnel->dst_port, "USB3 Down", alloc_hopid); + if (!path) { + /* Just disable the downstream port */ + tb_usb3_port_enable(down, false); +@@ -1379,7 +1387,7 @@ struct tb_tunnel *tb_tunnel_discover_usb3(struct tb *tb, struct tb_port *down) + tb_usb3_init_path(tunnel->paths[TB_USB3_PATH_DOWN]); + + path = tb_path_discover(tunnel->dst_port, -1, down, TB_USB3_HOPID, NULL, +- "USB3 Up"); ++ "USB3 Up", alloc_hopid); + if (!path) + goto err_deactivate; + tunnel->paths[TB_USB3_PATH_UP] = path; diff --git a/drivers/thunderbolt/tunnel.h b/drivers/thunderbolt/tunnel.h -index eea14e24f7e0a..a920274316979 100644 +index eea14e24f7e0a..bb4d1f1d6d0b0 100644 --- a/drivers/thunderbolt/tunnel.h +++ b/drivers/thunderbolt/tunnel.h -@@ -69,8 +69,8 @@ struct tb_tunnel *tb_tunnel_alloc_pci(struct tb *tb, struct tb_port *up, +@@ -64,20 +64,23 @@ struct tb_tunnel { + int allocated_down; + }; + +-struct tb_tunnel *tb_tunnel_discover_pci(struct tb *tb, struct tb_port *down); ++struct tb_tunnel *tb_tunnel_discover_pci(struct tb *tb, struct tb_port *down, ++ bool alloc_hopid); + struct tb_tunnel *tb_tunnel_alloc_pci(struct tb *tb, struct tb_port *up, struct tb_port *down); - struct tb_tunnel *tb_tunnel_discover_dp(struct tb *tb, struct tb_port *in); +-struct tb_tunnel *tb_tunnel_discover_dp(struct tb *tb, struct tb_port *in); ++struct tb_tunnel *tb_tunnel_discover_dp(struct tb *tb, struct tb_port *in, ++ bool alloc_hopid); struct tb_tunnel *tb_tunnel_alloc_dp(struct tb *tb, struct tb_port *in, - struct tb_port *out, int max_up, - int max_down); @@ -277001,6 +331102,15 @@ index eea14e24f7e0a..a920274316979 100644 struct tb_tunnel *tb_tunnel_alloc_dma(struct tb *tb, struct tb_port *nhi, struct tb_port *dst, int transmit_path, int transmit_ring, int receive_path, + int receive_ring); + bool tb_tunnel_match_dma(const struct tb_tunnel *tunnel, int transmit_path, + int transmit_ring, int receive_path, int receive_ring); +-struct tb_tunnel *tb_tunnel_discover_usb3(struct tb *tb, struct tb_port *down); ++struct tb_tunnel *tb_tunnel_discover_usb3(struct tb *tb, struct tb_port *down, ++ bool alloc_hopid); + struct tb_tunnel *tb_tunnel_alloc_usb3(struct tb *tb, struct tb_port *up, + struct tb_port *down, int max_up, + int max_down); diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c index ceddbe7e9f93f..90986567f1f90 100644 --- a/drivers/thunderbolt/usb4.c @@ -277159,7 +331269,7 @@ index 82a76cac94deb..32366caca6623 100644 diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c -index f0bf01ea069ae..8ee7ce1206925 100644 +index f0bf01ea069ae..609a51137e96f 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -37,6 +37,8 @@ struct xencons_info { @@ -277171,7 +331281,34 @@ index f0bf01ea069ae..8ee7ce1206925 100644 struct hvc_struct *hvc; int irq; int vtermno; -@@ -138,6 +140,8 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len) +@@ -50,17 +52,22 @@ static DEFINE_SPINLOCK(xencons_lock); + + static struct xencons_info *vtermno_to_xencons(int vtermno) + { +- struct xencons_info *entry, *n, *ret = NULL; ++ struct xencons_info *entry, *ret = NULL; ++ unsigned long flags; + +- if (list_empty(&xenconsoles)) +- return NULL; ++ spin_lock_irqsave(&xencons_lock, flags); ++ if (list_empty(&xenconsoles)) { ++ spin_unlock_irqrestore(&xencons_lock, flags); ++ return NULL; ++ } + +- list_for_each_entry_safe(entry, n, &xenconsoles, list) { ++ list_for_each_entry(entry, &xenconsoles, list) { + if (entry->vtermno == vtermno) { + ret = entry; + break; + } + } ++ spin_unlock_irqrestore(&xencons_lock, flags); + + return ret; + } +@@ -138,6 +145,8 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len) XENCONS_RING_IDX cons, prod; int recv = 0; struct xencons_info *xencons = vtermno_to_xencons(vtermno); @@ -277180,7 +331317,7 @@ index f0bf01ea069ae..8ee7ce1206925 100644 if (xencons == NULL) return -EINVAL; intf = xencons->intf; -@@ -157,7 +161,27 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len) +@@ -157,7 +166,27 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len) mb(); /* read ring before consuming */ intf->in_cons = cons; @@ -277209,7 +331346,83 @@ index f0bf01ea069ae..8ee7ce1206925 100644 return recv; } -@@ -386,7 +410,7 @@ static int xencons_connect_backend(struct xenbus_device *dev, +@@ -199,7 +228,7 @@ static int xen_hvm_console_init(void) + { + int r; + uint64_t v = 0; +- unsigned long gfn; ++ unsigned long gfn, flags; + struct xencons_info *info; + + if (!xen_hvm_domain()) +@@ -234,9 +263,9 @@ static int xen_hvm_console_init(void) + goto err; + info->vtermno = HVC_COOKIE; + +- spin_lock(&xencons_lock); ++ spin_lock_irqsave(&xencons_lock, flags); + list_add_tail(&info->list, &xenconsoles); +- spin_unlock(&xencons_lock); ++ spin_unlock_irqrestore(&xencons_lock, flags); + + return 0; + err: +@@ -259,6 +288,7 @@ static int xencons_info_pv_init(struct xencons_info *info, int vtermno) + static int xen_pv_console_init(void) + { + struct xencons_info *info; ++ unsigned long flags; + + if (!xen_pv_domain()) + return -ENODEV; +@@ -275,9 +305,9 @@ static int xen_pv_console_init(void) + /* already configured */ + return 0; + } +- spin_lock(&xencons_lock); ++ spin_lock_irqsave(&xencons_lock, flags); + xencons_info_pv_init(info, HVC_COOKIE); +- spin_unlock(&xencons_lock); ++ spin_unlock_irqrestore(&xencons_lock, flags); + + return 0; + } +@@ -285,6 +315,7 @@ static int xen_pv_console_init(void) + static int xen_initial_domain_console_init(void) + { + struct xencons_info *info; ++ unsigned long flags; + + if (!xen_initial_domain()) + return -ENODEV; +@@ -299,9 +330,9 @@ static int xen_initial_domain_console_init(void) + info->irq = bind_virq_to_irq(VIRQ_CONSOLE, 0, false); + info->vtermno = HVC_COOKIE; + +- spin_lock(&xencons_lock); ++ spin_lock_irqsave(&xencons_lock, flags); + list_add_tail(&info->list, &xenconsoles); +- spin_unlock(&xencons_lock); ++ spin_unlock_irqrestore(&xencons_lock, flags); + + return 0; + } +@@ -356,10 +387,12 @@ static void xencons_free(struct xencons_info *info) + + static int xen_console_remove(struct xencons_info *info) + { ++ unsigned long flags; ++ + xencons_disconnect_backend(info); +- spin_lock(&xencons_lock); ++ spin_lock_irqsave(&xencons_lock, flags); + list_del(&info->list); +- spin_unlock(&xencons_lock); ++ spin_unlock_irqrestore(&xencons_lock, flags); + if (info->xbdev != NULL) + xencons_free(info); + else { +@@ -386,7 +419,7 @@ static int xencons_connect_backend(struct xenbus_device *dev, if (ret) return ret; info->evtchn = evtchn; @@ -277218,7 +331431,27 @@ index f0bf01ea069ae..8ee7ce1206925 100644 if (irq < 0) return irq; info->irq = irq; -@@ -550,7 +574,7 @@ static int __init xen_hvc_init(void) +@@ -440,6 +473,7 @@ static int xencons_probe(struct xenbus_device *dev, + { + int ret, devid; + struct xencons_info *info; ++ unsigned long flags; + + devid = dev->nodename[strlen(dev->nodename) - 1] - '0'; + if (devid == 0) +@@ -458,9 +492,9 @@ static int xencons_probe(struct xenbus_device *dev, + ret = xencons_connect_backend(dev, info); + if (ret < 0) + goto error; +- spin_lock(&xencons_lock); ++ spin_lock_irqsave(&xencons_lock, flags); + list_add_tail(&info->list, &xenconsoles); +- spin_unlock(&xencons_lock); ++ spin_unlock_irqrestore(&xencons_lock, flags); + + return 0; + +@@ -550,7 +584,7 @@ static int __init xen_hvc_init(void) return r; info = vtermno_to_xencons(HVC_COOKIE); @@ -277227,6 +331460,21 @@ index f0bf01ea069ae..8ee7ce1206925 100644 } if (info->irq < 0) info->irq = 0; /* NO_IRQ */ +@@ -559,10 +593,12 @@ static int __init xen_hvc_init(void) + + info->hvc = hvc_alloc(HVC_COOKIE, info->irq, ops, 256); + if (IS_ERR(info->hvc)) { ++ unsigned long flags; ++ + r = PTR_ERR(info->hvc); +- spin_lock(&xencons_lock); ++ spin_lock_irqsave(&xencons_lock, flags); + list_del(&info->list); +- spin_unlock(&xencons_lock); ++ spin_unlock_irqrestore(&xencons_lock, flags); + if (info->irq) + unbind_from_irqhandler(info->irq, NULL); + kfree(info); diff --git a/drivers/tty/moxa.c b/drivers/tty/moxa.c index bf17e90858b8c..a29ec5a938396 100644 --- a/drivers/tty/moxa.c @@ -277331,7 +331579,7 @@ index 1216f3985e18e..3b3e169c1f699 100644 outb(MOXA_MUST_FCR_GDA_MODE_ENABLE | UART_FCR_ENABLE_FIFO | UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT, diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c -index 1d92d2a848894..154697be11b0a 100644 +index 1d92d2a848894..813a458871713 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -73,6 +73,8 @@ module_param(debug, int, 0600); @@ -277979,6 +332227,15 @@ index 1d92d2a848894..154697be11b0a 100644 gsm_control_transmit(gsm, ctrl); mod_timer(&gsm->t2_timer, jiffies + gsm->t2 * HZ / 100); } +@@ -1354,7 +1577,7 @@ static struct gsm_control *gsm_control_send(struct gsm_mux *gsm, + unsigned int command, u8 *data, int clen) + { + struct gsm_control *ctrl = kzalloc(sizeof(struct gsm_control), +- GFP_KERNEL); ++ GFP_ATOMIC); + unsigned long flags; + if (ctrl == NULL) + return NULL; @@ -1372,7 +1595,7 @@ retry: /* If DLCI0 is in ADM mode skip retries, it won't respond */ @@ -279277,7 +333534,7 @@ index 2350fb3bb5e4c..179bb1375636b 100644 port.port.flags = UPF_SHARE_IRQ | UPF_BOOT_AUTOCONF | UPF_IOREMAP | UPF_FIXED_PORT | UPF_FIXED_TYPE | UPF_NO_THRE_TEST; diff --git a/drivers/tty/serial/8250/8250_bcm7271.c b/drivers/tty/serial/8250/8250_bcm7271.c -index 7f656fac503fe..711cf30e835aa 100644 +index 7f656fac503fe..60b3ac1a03175 100644 --- a/drivers/tty/serial/8250/8250_bcm7271.c +++ b/drivers/tty/serial/8250/8250_bcm7271.c @@ -237,6 +237,7 @@ struct brcmuart_priv { @@ -279371,8 +333628,27 @@ index 7f656fac503fe..711cf30e835aa 100644 return 0; } +@@ -1184,9 +1214,17 @@ static struct platform_driver brcmuart_platform_driver = { + + static int __init brcmuart_init(void) + { ++ int ret; ++ + brcmuart_debugfs_root = debugfs_create_dir( + brcmuart_platform_driver.driver.name, NULL); +- return platform_driver_register(&brcmuart_platform_driver); ++ ret = platform_driver_register(&brcmuart_platform_driver); ++ if (ret) { ++ debugfs_remove_recursive(brcmuart_debugfs_root); ++ return ret; ++ } ++ ++ return 0; + } + module_init(brcmuart_init); + diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c -index 1ce193daea7f1..c3348d5af9229 100644 +index 1ce193daea7f1..f3bfaa1a794bd 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -23,6 +23,7 @@ @@ -279433,11 +333709,59 @@ index 1ce193daea7f1..c3348d5af9229 100644 serial8250_apply_quirks(up); uart_add_one_port(drv, &up->port); } +@@ -1006,6 +1016,7 @@ int serial8250_register_8250_port(const struct uart_8250_port *up) + uart->port.throttle = up->port.throttle; + uart->port.unthrottle = up->port.unthrottle; + uart->port.rs485_config = up->port.rs485_config; ++ uart->port.rs485_supported = up->port.rs485_supported; + uart->port.rs485 = up->port.rs485; + uart->rs485_start_tx = up->rs485_start_tx; + uart->rs485_stop_tx = up->rs485_stop_tx; diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c -index 890fa7ddaa7f3..1bdc8d6432fef 100644 +index 890fa7ddaa7f3..ec3cd723256fb 100644 --- a/drivers/tty/serial/8250/8250_dma.c +++ b/drivers/tty/serial/8250/8250_dma.c -@@ -64,10 +64,19 @@ int serial8250_tx_dma(struct uart_8250_port *p) +@@ -46,28 +46,57 @@ static void __dma_rx_complete(void *param) + struct uart_8250_dma *dma = p->dma; + struct tty_port *tty_port = &p->port.state->port; + struct dma_tx_state state; ++ enum dma_status dma_status; + int count; + +- dma->rx_running = 0; +- dmaengine_tx_status(dma->rxchan, dma->rx_cookie, &state); ++ /* ++ * New DMA Rx can be started during the completion handler before it ++ * could acquire port's lock and it might still be ongoing. Don't to ++ * anything in such case. ++ */ ++ dma_status = dmaengine_tx_status(dma->rxchan, dma->rx_cookie, &state); ++ if (dma_status == DMA_IN_PROGRESS) ++ return; + + count = dma->rx_size - state.residue; + + tty_insert_flip_string(tty_port, dma->rx_buf, count); + p->port.icount.rx += count; ++ dma->rx_running = 0; + + tty_flip_buffer_push(tty_port); + } + ++static void dma_rx_complete(void *param) ++{ ++ struct uart_8250_port *p = param; ++ struct uart_8250_dma *dma = p->dma; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&p->port.lock, flags); ++ if (dma->rx_running) ++ __dma_rx_complete(p); ++ spin_unlock_irqrestore(&p->port.lock, flags); ++} ++ + int serial8250_tx_dma(struct uart_8250_port *p) + { struct uart_8250_dma *dma = p->dma; struct circ_buf *xmit = &p->port.state->xmit; struct dma_async_tx_descriptor *desc; @@ -279458,7 +333782,7 @@ index 890fa7ddaa7f3..1bdc8d6432fef 100644 if (uart_tx_stopped(&p->port) || uart_circ_empty(xmit)) { /* We have been called from __dma_tx_complete() */ -@@ -77,6 +86,8 @@ int serial8250_tx_dma(struct uart_8250_port *p) +@@ -77,6 +106,8 @@ int serial8250_tx_dma(struct uart_8250_port *p) dma->tx_size = CIRC_CNT_TO_END(xmit->head, xmit->tail, UART_XMIT_SIZE); @@ -279467,7 +333791,7 @@ index 890fa7ddaa7f3..1bdc8d6432fef 100644 desc = dmaengine_prep_slave_single(dma->txchan, dma->tx_addr + xmit->tail, dma->tx_size, DMA_MEM_TO_DEV, -@@ -114,6 +125,8 @@ int serial8250_rx_dma(struct uart_8250_port *p) +@@ -114,6 +145,8 @@ int serial8250_rx_dma(struct uart_8250_port *p) if (dma->rx_running) return 0; @@ -279476,6 +333800,15 @@ index 890fa7ddaa7f3..1bdc8d6432fef 100644 desc = dmaengine_prep_slave_single(dma->rxchan, dma->rx_addr, dma->rx_size, DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); +@@ -121,7 +154,7 @@ int serial8250_rx_dma(struct uart_8250_port *p) + return -EBUSY; + + dma->rx_running = 1; +- desc->callback = __dma_rx_complete; ++ desc->callback = dma_rx_complete; + desc->callback_param = p; + + dma->rx_cookie = dmaengine_submit(desc); diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index a3a0154da567d..ace221afeb039 100644 --- a/drivers/tty/serial/8250/8250_dw.c @@ -279587,7 +333920,7 @@ index 673cda3d011d0..948d0a1c6ae8e 100644 dev->irq = iosapic_serial_irq(dev); #endif diff --git a/drivers/tty/serial/8250/8250_lpss.c b/drivers/tty/serial/8250/8250_lpss.c -index 848d81e3838c2..49ae73f4d3a04 100644 +index 848d81e3838c2..87d70e81273c0 100644 --- a/drivers/tty/serial/8250/8250_lpss.c +++ b/drivers/tty/serial/8250/8250_lpss.c @@ -121,8 +121,7 @@ static int byt_serial_setup(struct lpss8250 *lpss, struct uart_port *port) @@ -279624,7 +333957,13 @@ index 848d81e3838c2..49ae73f4d3a04 100644 static int ehl_serial_setup(struct lpss8250 *lpss, struct uart_port *port) { struct uart_8250_dma *dma = &lpss->data.dma; -@@ -171,6 +180,13 @@ static int ehl_serial_setup(struct lpss8250 *lpss, struct uart_port *port) +@@ -168,9 +177,19 @@ static int ehl_serial_setup(struct lpss8250 *lpss, struct uart_port *port) + * matching with the registered General Purpose DMA controllers. + */ + up->dma = dma; ++ ++ lpss->dma_maxburst = 16; ++ return 0; } @@ -279638,7 +333977,44 @@ index 848d81e3838c2..49ae73f4d3a04 100644 #ifdef CONFIG_SERIAL_8250_DMA static const struct dw_dma_platform_data qrk_serial_dma_pdata = { .nr_channels = 2, -@@ -345,8 +361,7 @@ static int lpss8250_probe(struct pci_dev *pdev, const struct pci_device_id *id) +@@ -262,8 +281,13 @@ static int lpss8250_dma_setup(struct lpss8250 *lpss, struct uart_8250_port *port + struct dw_dma_slave *rx_param, *tx_param; + struct device *dev = port->port.dev; + +- if (!lpss->dma_param.dma_dev) ++ if (!lpss->dma_param.dma_dev) { ++ dma = port->dma; ++ if (dma) ++ goto out_configuration_only; ++ + return 0; ++ } + + rx_param = devm_kzalloc(dev, sizeof(*rx_param), GFP_KERNEL); + if (!rx_param) +@@ -274,16 +298,18 @@ static int lpss8250_dma_setup(struct lpss8250 *lpss, struct uart_8250_port *port + return -ENOMEM; + + *rx_param = lpss->dma_param; +- dma->rxconf.src_maxburst = lpss->dma_maxburst; +- + *tx_param = lpss->dma_param; +- dma->txconf.dst_maxburst = lpss->dma_maxburst; + + dma->fn = lpss8250_dma_filter; + dma->rx_param = rx_param; + dma->tx_param = tx_param; + + port->dma = dma; ++ ++out_configuration_only: ++ dma->rxconf.src_maxburst = lpss->dma_maxburst; ++ dma->txconf.dst_maxburst = lpss->dma_maxburst; ++ + return 0; + } + +@@ -345,8 +371,7 @@ static int lpss8250_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; err_exit: @@ -279648,7 +334024,7 @@ index 848d81e3838c2..49ae73f4d3a04 100644 pci_free_irq_vectors(pdev); return ret; } -@@ -357,8 +372,7 @@ static void lpss8250_remove(struct pci_dev *pdev) +@@ -357,8 +382,7 @@ static void lpss8250_remove(struct pci_dev *pdev) serial8250_unregister_port(lpss->data.line); @@ -279658,7 +334034,7 @@ index 848d81e3838c2..49ae73f4d3a04 100644 pci_free_irq_vectors(pdev); } -@@ -366,12 +380,14 @@ static const struct lpss8250_board byt_board = { +@@ -366,12 +390,14 @@ static const struct lpss8250_board byt_board = { .freq = 100000000, .base_baud = 2764800, .setup = byt_serial_setup, @@ -279839,8 +334215,137 @@ index bce28729dd7bd..be8626234627e 100644 port->iotype = UPIO_MEM; if (of_property_read_u32(np, "reg-io-width", &prop) == 0) { +diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c +index 73e5f1dbd075d..078a7028ee5a2 100644 +--- a/drivers/tty/serial/8250/8250_omap.c ++++ b/drivers/tty/serial/8250/8250_omap.c +@@ -157,7 +157,11 @@ static u32 uart_read(struct uart_8250_port *up, u32 reg) + return readl(up->port.membase + (reg << up->port.regshift)); + } + +-static void omap8250_set_mctrl(struct uart_port *port, unsigned int mctrl) ++/* ++ * Called on runtime PM resume path from omap8250_restore_regs(), and ++ * omap8250_set_mctrl(). ++ */ ++static void __omap8250_set_mctrl(struct uart_port *port, unsigned int mctrl) + { + struct uart_8250_port *up = up_to_u8250p(port); + struct omap8250_priv *priv = up->port.private_data; +@@ -181,6 +185,20 @@ static void omap8250_set_mctrl(struct uart_port *port, unsigned int mctrl) + } + } + ++static void omap8250_set_mctrl(struct uart_port *port, unsigned int mctrl) ++{ ++ int err; ++ ++ err = pm_runtime_resume_and_get(port->dev); ++ if (err) ++ return; ++ ++ __omap8250_set_mctrl(port, mctrl); ++ ++ pm_runtime_mark_last_busy(port->dev); ++ pm_runtime_put_autosuspend(port->dev); ++} ++ + /* + * Work Around for Errata i202 (2430, 3430, 3630, 4430 and 4460) + * The access to uart register after MDR1 Access +@@ -193,27 +211,10 @@ static void omap8250_set_mctrl(struct uart_port *port, unsigned int mctrl) + static void omap_8250_mdr1_errataset(struct uart_8250_port *up, + struct omap8250_priv *priv) + { +- u8 timeout = 255; +- + serial_out(up, UART_OMAP_MDR1, priv->mdr1); + udelay(2); + serial_out(up, UART_FCR, up->fcr | UART_FCR_CLEAR_XMIT | + UART_FCR_CLEAR_RCVR); +- /* +- * Wait for FIFO to empty: when empty, RX_FIFO_E bit is 0 and +- * TX_FIFO_E bit is 1. +- */ +- while (UART_LSR_THRE != (serial_in(up, UART_LSR) & +- (UART_LSR_THRE | UART_LSR_DR))) { +- timeout--; +- if (!timeout) { +- /* Should *never* happen. we warn and carry on */ +- dev_crit(up->port.dev, "Errata i202: timedout %x\n", +- serial_in(up, UART_LSR)); +- break; +- } +- udelay(1); +- } + } + + static void omap_8250_get_divisor(struct uart_port *port, unsigned int baud, +@@ -292,6 +293,7 @@ static void omap8250_restore_regs(struct uart_8250_port *up) + { + struct omap8250_priv *priv = up->port.private_data; + struct uart_8250_dma *dma = up->dma; ++ u8 mcr = serial8250_in_MCR(up); + + if (dma && dma->tx_running) { + /* +@@ -308,7 +310,7 @@ static void omap8250_restore_regs(struct uart_8250_port *up) + serial_out(up, UART_EFR, UART_EFR_ECB); + + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A); +- serial8250_out_MCR(up, UART_MCR_TCRTLR); ++ serial8250_out_MCR(up, mcr | UART_MCR_TCRTLR); + serial_out(up, UART_FCR, up->fcr); + + omap8250_update_scr(up, priv); +@@ -324,7 +326,8 @@ static void omap8250_restore_regs(struct uart_8250_port *up) + serial_out(up, UART_LCR, 0); + + /* drop TCR + TLR access, we setup XON/XOFF later */ +- serial8250_out_MCR(up, up->mcr); ++ serial8250_out_MCR(up, mcr); ++ + serial_out(up, UART_IER, up->ier); + + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); +@@ -341,7 +344,10 @@ static void omap8250_restore_regs(struct uart_8250_port *up) + + omap8250_update_mdr1(up, priv); + +- up->port.ops->set_mctrl(&up->port, up->port.mctrl); ++ __omap8250_set_mctrl(&up->port, up->port.mctrl); ++ ++ if (up->port.rs485.flags & SER_RS485_ENABLED) ++ serial8250_em485_stop_tx(up); + } + + /* +@@ -680,7 +686,6 @@ static int omap_8250_startup(struct uart_port *port) + + pm_runtime_get_sync(port->dev); + +- up->mcr = 0; + serial_out(up, UART_FCR, UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT); + + serial_out(up, UART_LCR, UART_LCR_WLEN8); +@@ -1471,9 +1476,15 @@ err: + static int omap8250_remove(struct platform_device *pdev) + { + struct omap8250_priv *priv = platform_get_drvdata(pdev); ++ int err; ++ ++ err = pm_runtime_resume_and_get(&pdev->dev); ++ if (err) ++ return err; + + pm_runtime_dont_use_autosuspend(&pdev->dev); + pm_runtime_put_sync(&pdev->dev); ++ flush_work(&priv->qos_work); + pm_runtime_disable(&pdev->dev); + serial8250_unregister_port(priv->line); + cpu_latency_qos_remove_request(&priv->pm_qos_request); diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c -index 726912b16a559..1d37ff0ec85a4 100644 +index 726912b16a559..8f0dafbab3bff 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -11,6 +11,7 @@ @@ -280283,15 +334788,15 @@ index 726912b16a559..1d37ff0ec85a4 100644 + + if (baud > maxrate + baud / 50) + continue; - -- if ((baud < actual_baud + tolerance) && -- (baud > actual_baud - tolerance)) { ++ + if (delta > baud / 50) + divisor++; +- if ((baud < actual_baud + tolerance) && +- (baud > actual_baud - tolerance)) { + if (divisor > 0xffff) + continue; -+ + + /* Update delta due to possible divisor change */ + delta = maxrate / divisor - baud; + if (abs(delta) < baud / 50) { @@ -280322,7 +334827,30 @@ index 726912b16a559..1d37ff0ec85a4 100644 port->port.iotype = UPIO_PORT; port->port.iobase = iobase; -@@ -1689,7 +1879,7 @@ static int skip_tx_en_setup(struct serial_private *priv, +@@ -1549,7 +1739,6 @@ static int pci_fintek_init(struct pci_dev *dev) + resource_size_t bar_data[3]; + u8 config_base; + struct serial_private *priv = pci_get_drvdata(dev); +- struct uart_8250_port *port; + + if (!(pci_resource_flags(dev, 5) & IORESOURCE_IO) || + !(pci_resource_flags(dev, 4) & IORESOURCE_IO) || +@@ -1596,13 +1785,7 @@ static int pci_fintek_init(struct pci_dev *dev) + + pci_write_config_byte(dev, config_base + 0x06, dev->irq); + +- if (priv) { +- /* re-apply RS232/485 mode when +- * pciserial_resume_ports() +- */ +- port = serial8250_get_port(priv->line[i]); +- pci_fintek_rs485_config(&port->port, NULL); +- } else { ++ if (!priv) { + /* First init without port data + * force init to RS232 Mode + */ +@@ -1689,7 +1872,7 @@ static int skip_tx_en_setup(struct serial_private *priv, struct uart_8250_port *port, int idx) { port->port.quirks |= UPQ_NO_TXEN_TEST; @@ -280331,7 +334859,7 @@ index 726912b16a559..1d37ff0ec85a4 100644 "serial8250: skipping TxEn test for device [%04x:%04x] subsystem [%04x:%04x]\n", priv->dev->vendor, priv->dev->device, priv->dev->subsystem_vendor, priv->dev->subsystem_device); -@@ -2317,12 +2507,19 @@ static struct pci_serial_quirk pci_serial_quirks[] = { +@@ -2317,12 +2500,19 @@ static struct pci_serial_quirk pci_serial_quirks[] = { .setup = pci_pericom_setup_four_at_eight, }, { @@ -280352,7 +334880,7 @@ index 726912b16a559..1d37ff0ec85a4 100644 { .vendor = PCI_VENDOR_ID_ACCESIO, .device = PCI_DEVICE_ID_ACCESIO_MPCIE_ICM232_4, -@@ -2506,7 +2703,7 @@ static struct pci_serial_quirk pci_serial_quirks[] = { +@@ -2506,7 +2696,7 @@ static struct pci_serial_quirk pci_serial_quirks[] = { .device = PCI_ANY_ID, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, @@ -280361,7 +334889,7 @@ index 726912b16a559..1d37ff0ec85a4 100644 .setup = pci_default_setup, }, /* -@@ -2518,7 +2715,7 @@ static struct pci_serial_quirk pci_serial_quirks[] = { +@@ -2518,7 +2708,7 @@ static struct pci_serial_quirk pci_serial_quirks[] = { .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, .init = pci_oxsemi_tornado_init, @@ -280370,7 +334898,7 @@ index 726912b16a559..1d37ff0ec85a4 100644 }, { .vendor = PCI_VENDOR_ID_MAINPINE, -@@ -2526,7 +2723,7 @@ static struct pci_serial_quirk pci_serial_quirks[] = { +@@ -2526,7 +2716,7 @@ static struct pci_serial_quirk pci_serial_quirks[] = { .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, .init = pci_oxsemi_tornado_init, @@ -280379,7 +334907,7 @@ index 726912b16a559..1d37ff0ec85a4 100644 }, { .vendor = PCI_VENDOR_ID_DIGI, -@@ -2534,7 +2731,7 @@ static struct pci_serial_quirk pci_serial_quirks[] = { +@@ -2534,7 +2724,7 @@ static struct pci_serial_quirk pci_serial_quirks[] = { .subvendor = PCI_SUBVENDOR_ID_IBM, .subdevice = PCI_ANY_ID, .init = pci_oxsemi_tornado_init, @@ -280388,7 +334916,7 @@ index 726912b16a559..1d37ff0ec85a4 100644 }, { .vendor = PCI_VENDOR_ID_INTEL, -@@ -2851,7 +3048,7 @@ enum pci_board_num_t { +@@ -2851,7 +3041,7 @@ enum pci_board_num_t { pbn_b0_2_1843200, pbn_b0_4_1843200, @@ -280397,7 +334925,7 @@ index 726912b16a559..1d37ff0ec85a4 100644 pbn_b0_bt_1_115200, pbn_b0_bt_2_115200, -@@ -2929,12 +3126,11 @@ enum pci_board_num_t { +@@ -2929,12 +3119,11 @@ enum pci_board_num_t { pbn_panacom2, pbn_panacom4, pbn_plx_romulus, @@ -280414,7 +334942,7 @@ index 726912b16a559..1d37ff0ec85a4 100644 pbn_intel_i960, pbn_sgi_ioc3, pbn_computone_4, -@@ -3081,10 +3277,10 @@ static struct pciserial_board pci_boards[] = { +@@ -3081,10 +3270,10 @@ static struct pciserial_board pci_boards[] = { .uart_offset = 8, }, @@ -280427,7 +334955,7 @@ index 726912b16a559..1d37ff0ec85a4 100644 .uart_offset = 8, }, -@@ -3455,20 +3651,6 @@ static struct pciserial_board pci_boards[] = { +@@ -3455,20 +3644,6 @@ static struct pciserial_board pci_boards[] = { .first_offset = 0x03, }, @@ -280448,7 +334976,7 @@ index 726912b16a559..1d37ff0ec85a4 100644 /* * This board uses the size of PCI Base region 0 to * signal now many ports are available -@@ -3479,31 +3661,31 @@ static struct pciserial_board pci_boards[] = { +@@ -3479,31 +3654,31 @@ static struct pciserial_board pci_boards[] = { .base_baud = 115200, .uart_offset = 8, }, @@ -280488,7 +335016,7 @@ index 726912b16a559..1d37ff0ec85a4 100644 .uart_offset = 0x200, .first_offset = 0x1000, }, -@@ -4000,12 +4182,12 @@ pciserial_init_ports(struct pci_dev *dev, const struct pciserial_board *board) +@@ -4000,12 +4175,12 @@ pciserial_init_ports(struct pci_dev *dev, const struct pciserial_board *board) uart.port.irq = 0; } else { if (pci_match_id(pci_use_msi, dev)) { @@ -280503,7 +335031,7 @@ index 726912b16a559..1d37ff0ec85a4 100644 rc = pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_LEGACY); } if (rc < 0) { -@@ -4023,12 +4205,12 @@ pciserial_init_ports(struct pci_dev *dev, const struct pciserial_board *board) +@@ -4023,12 +4198,12 @@ pciserial_init_ports(struct pci_dev *dev, const struct pciserial_board *board) if (quirk->setup(priv, board, &uart, i)) break; @@ -280518,7 +335046,7 @@ index 726912b16a559..1d37ff0ec85a4 100644 "Couldn't register serial port %lx, irq %d, type %d, error %d\n", uart.port.iobase, uart.port.irq, uart.port.iotype, priv->line[i]); -@@ -4124,8 +4306,7 @@ pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent) +@@ -4124,8 +4299,7 @@ pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent) } if (ent->driver_data >= ARRAY_SIZE(pci_boards)) { @@ -280528,7 +335056,7 @@ index 726912b16a559..1d37ff0ec85a4 100644 return -EINVAL; } -@@ -4208,7 +4389,7 @@ static int pciserial_resume_one(struct device *dev) +@@ -4208,7 +4382,7 @@ static int pciserial_resume_one(struct device *dev) err = pci_enable_device(pdev); /* FIXME: We cannot simply error out here */ if (err) @@ -280537,7 +335065,7 @@ index 726912b16a559..1d37ff0ec85a4 100644 pciserial_resume_ports(priv); } return 0; -@@ -4401,13 +4582,6 @@ static const struct pci_device_id serial_pci_tbl[] = { +@@ -4401,13 +4575,6 @@ static const struct pci_device_id serial_pci_tbl[] = { { PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_ROMULUS, 0x10b5, 0x106a, 0, 0, pbn_plx_romulus }, @@ -280551,7 +335079,7 @@ index 726912b16a559..1d37ff0ec85a4 100644 /* * Quatech cards. These actually have configurable clocks but for * now we just use the default. -@@ -4517,158 +4691,165 @@ static const struct pci_device_id serial_pci_tbl[] = { +@@ -4517,158 +4684,165 @@ static const struct pci_device_id serial_pci_tbl[] = { */ { PCI_VENDOR_ID_OXSEMI, 0xc101, /* OXPCIe952 1 Legacy UART */ PCI_ANY_ID, PCI_ANY_ID, 0, 0, @@ -280766,7 +335294,7 @@ index 726912b16a559..1d37ff0ec85a4 100644 /* * SBS Technologies, Inc. P-Octal and PMC-OCTPRO cards, -@@ -5192,8 +5373,30 @@ static const struct pci_device_id serial_pci_tbl[] = { +@@ -5192,8 +5366,30 @@ static const struct pci_device_id serial_pci_tbl[] = { { PCI_VENDOR_ID_INTASHIELD, PCI_DEVICE_ID_INTASHIELD_IS400, PCI_ANY_ID, PCI_ANY_ID, 0, 0, /* 135a.0dc0 */ pbn_b2_4_115200 }, @@ -280798,7 +335326,7 @@ index 726912b16a559..1d37ff0ec85a4 100644 */ { PCI_VENDOR_ID_INTASHIELD, 0x0D21, PCI_ANY_ID, PCI_ANY_ID, -@@ -5201,8 +5404,191 @@ static const struct pci_device_id serial_pci_tbl[] = { +@@ -5201,8 +5397,191 @@ static const struct pci_device_id serial_pci_tbl[] = { pbn_b2_4_115200 }, { PCI_VENDOR_ID_INTASHIELD, 0x0E34, PCI_ANY_ID, PCI_ANY_ID, @@ -280811,7 +335339,7 @@ index 726912b16a559..1d37ff0ec85a4 100644 + { PCI_VENDOR_ID_INTASHIELD, 0x0841, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, - pbn_b2_4_115200 }, ++ pbn_b2_4_115200 }, + /* + * Brainboxes UC-275/279 + */ @@ -280878,7 +335406,7 @@ index 726912b16a559..1d37ff0ec85a4 100644 + { PCI_VENDOR_ID_INTASHIELD, 0x0921, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, -+ pbn_b2_4_115200 }, + pbn_b2_4_115200 }, + /* + * Brainboxes PX-101 + */ @@ -280992,7 +335520,7 @@ index 726912b16a559..1d37ff0ec85a4 100644 * Perle PCI-RAS cards */ diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c -index 66374704747ec..ec7846223f3a1 100644 +index 66374704747ec..691e7a07565c5 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -307,6 +307,14 @@ static const struct serial8250_config uart_config[] = { @@ -281038,7 +335566,27 @@ index 66374704747ec..ec7846223f3a1 100644 /* * FIFO support. */ -@@ -1042,7 +1029,8 @@ static void autoconfig_16550a(struct uart_8250_port *up) +@@ -613,7 +600,7 @@ EXPORT_SYMBOL_GPL(serial8250_rpm_put); + static int serial8250_em485_init(struct uart_8250_port *p) + { + if (p->em485) +- return 0; ++ goto deassert_rts; + + p->em485 = kmalloc(sizeof(struct uart_8250_em485), GFP_ATOMIC); + if (!p->em485) +@@ -629,7 +616,9 @@ static int serial8250_em485_init(struct uart_8250_port *p) + p->em485->active_timer = NULL; + p->em485->tx_stopped = true; + +- p->rs485_stop_tx(p); ++deassert_rts: ++ if (p->em485->tx_stopped) ++ p->rs485_stop_tx(p); + + return 0; + } +@@ -1042,7 +1031,8 @@ static void autoconfig_16550a(struct uart_8250_port *up) up->port.type = PORT_16550A; up->capabilities |= UART_CAP_FIFO; @@ -281048,7 +335596,7 @@ index 66374704747ec..ec7846223f3a1 100644 return; /* -@@ -1527,6 +1515,8 @@ static inline void __stop_tx(struct uart_8250_port *p) +@@ -1527,6 +1517,8 @@ static inline void __stop_tx(struct uart_8250_port *p) if (em485) { unsigned char lsr = serial_in(p, UART_LSR); @@ -281057,7 +335605,7 @@ index 66374704747ec..ec7846223f3a1 100644 /* * To provide required timeing and allow FIFO transfer, * __stop_tx_rs485() must be called only when both FIFO and -@@ -1615,6 +1605,18 @@ static inline void start_tx_rs485(struct uart_port *port) +@@ -1615,6 +1607,18 @@ static inline void start_tx_rs485(struct uart_port *port) struct uart_8250_port *up = up_to_u8250p(port); struct uart_8250_em485 *em485 = up->em485; @@ -281076,7 +335624,7 @@ index 66374704747ec..ec7846223f3a1 100644 em485->active_timer = NULL; if (em485->tx_stopped) { -@@ -1799,9 +1801,7 @@ void serial8250_tx_chars(struct uart_8250_port *up) +@@ -1799,9 +1803,7 @@ void serial8250_tx_chars(struct uart_8250_port *up) int count; if (port->x_char) { @@ -281087,7 +335635,23 @@ index 66374704747ec..ec7846223f3a1 100644 return; } if (uart_tx_stopped(port)) { -@@ -2024,13 +2024,6 @@ void serial8250_do_set_mctrl(struct uart_port *port, unsigned int mctrl) +@@ -1883,10 +1885,13 @@ EXPORT_SYMBOL_GPL(serial8250_modem_status); + static bool handle_rx_dma(struct uart_8250_port *up, unsigned int iir) + { + switch (iir & 0x3f) { +- case UART_IIR_RX_TIMEOUT: +- serial8250_rx_dma_flush(up); ++ case UART_IIR_RDI: ++ if (!up->dma->rx_running) ++ break; + fallthrough; + case UART_IIR_RLSI: ++ case UART_IIR_RX_TIMEOUT: ++ serial8250_rx_dma_flush(up); + return true; + } + return up->dma->rx_dma(up); +@@ -2024,13 +2029,6 @@ void serial8250_do_set_mctrl(struct uart_port *port, unsigned int mctrl) struct uart_8250_port *up = up_to_u8250p(port); unsigned char mcr; @@ -281101,7 +335665,17 @@ index 66374704747ec..ec7846223f3a1 100644 mcr = serial8250_TIOCM_to_MCR(mctrl); mcr = (mcr & up->mcr_mask) | up->mcr_force | up->mcr; -@@ -2287,6 +2280,10 @@ int serial8250_do_startup(struct uart_port *port) +@@ -2041,6 +2039,9 @@ EXPORT_SYMBOL_GPL(serial8250_do_set_mctrl); + + static void serial8250_set_mctrl(struct uart_port *port, unsigned int mctrl) + { ++ if (port->rs485.flags & SER_RS485_ENABLED) ++ return; ++ + if (port->set_mctrl) + port->set_mctrl(port, mctrl); + else +@@ -2287,6 +2288,10 @@ int serial8250_do_startup(struct uart_port *port) if (port->irq && (up->port.flags & UPF_SHARE_IRQ)) up->port.irqflags |= IRQF_SHARED; @@ -281112,7 +335686,7 @@ index 66374704747ec..ec7846223f3a1 100644 if (port->irq && !(up->port.flags & UPF_NO_THRE_TEST)) { unsigned char iir1; -@@ -2329,9 +2326,7 @@ int serial8250_do_startup(struct uart_port *port) +@@ -2329,9 +2334,7 @@ int serial8250_do_startup(struct uart_port *port) } } @@ -281123,7 +335697,7 @@ index 66374704747ec..ec7846223f3a1 100644 /* * Now, initialize the UART -@@ -2696,21 +2691,32 @@ static unsigned int serial8250_get_baud_rate(struct uart_port *port, +@@ -2696,21 +2699,32 @@ static unsigned int serial8250_get_baud_rate(struct uart_port *port, void serial8250_update_uartclk(struct uart_port *port, unsigned int uartclk) { struct uart_8250_port *up = up_to_u8250p(port); @@ -281159,7 +335733,7 @@ index 66374704747ec..ec7846223f3a1 100644 baud = serial8250_get_baud_rate(port, termios, NULL); quot = serial8250_get_divisor(port, baud, &frac); -@@ -2727,7 +2733,9 @@ void serial8250_update_uartclk(struct uart_port *port, unsigned int uartclk) +@@ -2727,7 +2741,9 @@ void serial8250_update_uartclk(struct uart_port *port, unsigned int uartclk) serial8250_rpm_put(up); out_lock: @@ -281170,7 +335744,7 @@ index 66374704747ec..ec7846223f3a1 100644 } EXPORT_SYMBOL_GPL(serial8250_update_uartclk); -@@ -2956,8 +2964,10 @@ static int serial8250_request_std_resource(struct uart_8250_port *up) +@@ -2956,8 +2972,10 @@ static int serial8250_request_std_resource(struct uart_8250_port *up) case UPIO_MEM32BE: case UPIO_MEM16: case UPIO_MEM: @@ -281182,7 +335756,17 @@ index 66374704747ec..ec7846223f3a1 100644 if (!request_mem_region(port->mapbase, size, "serial")) { ret = -EBUSY; -@@ -3308,15 +3318,20 @@ static void serial8250_console_restore(struct uart_8250_port *up) +@@ -3181,9 +3199,6 @@ static void serial8250_config_port(struct uart_port *port, int flags) + if (flags & UART_CONFIG_TYPE) + autoconfig(up); + +- if (port->rs485.flags & SER_RS485_ENABLED) +- port->rs485_config(port, &port->rs485); +- + /* if access method is AU, it is a 16550 with a quirk */ + if (port->type == PORT_16550A && port->iotype == UPIO_AU) + up->bugs |= UART_BUG_NOMSR; +@@ -3308,15 +3323,20 @@ static void serial8250_console_restore(struct uart_8250_port *up) unsigned int baud, quot, frac = 0; termios.c_cflag = port->cons->cflag; @@ -281205,6 +335789,79 @@ index 66374704747ec..ec7846223f3a1 100644 } /* +diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig +index 39fc96dc2531c..da63e76c7530c 100644 +--- a/drivers/tty/serial/8250/Kconfig ++++ b/drivers/tty/serial/8250/Kconfig +@@ -118,7 +118,7 @@ config SERIAL_8250_CONSOLE + + config SERIAL_8250_GSC + tristate +- depends on SERIAL_8250 && GSC ++ depends on SERIAL_8250 && PARISC + default SERIAL_8250 + + config SERIAL_8250_DMA +diff --git a/drivers/tty/serial/altera_uart.c b/drivers/tty/serial/altera_uart.c +index 7c5f4e966b594..91799c420e250 100644 +--- a/drivers/tty/serial/altera_uart.c ++++ b/drivers/tty/serial/altera_uart.c +@@ -199,9 +199,8 @@ static void altera_uart_set_termios(struct uart_port *port, + */ + } + +-static void altera_uart_rx_chars(struct altera_uart *pp) ++static void altera_uart_rx_chars(struct uart_port *port) + { +- struct uart_port *port = &pp->port; + unsigned char ch, flag; + unsigned short status; + +@@ -246,9 +245,8 @@ static void altera_uart_rx_chars(struct altera_uart *pp) + tty_flip_buffer_push(&port->state->port); + } + +-static void altera_uart_tx_chars(struct altera_uart *pp) ++static void altera_uart_tx_chars(struct uart_port *port) + { +- struct uart_port *port = &pp->port; + struct circ_buf *xmit = &port->state->xmit; + + if (port->x_char) { +@@ -272,26 +270,25 @@ static void altera_uart_tx_chars(struct altera_uart *pp) + if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) + uart_write_wakeup(port); + +- if (xmit->head == xmit->tail) { +- pp->imr &= ~ALTERA_UART_CONTROL_TRDY_MSK; +- altera_uart_update_ctrl_reg(pp); +- } ++ if (uart_circ_empty(xmit)) ++ altera_uart_stop_tx(port); + } + + static irqreturn_t altera_uart_interrupt(int irq, void *data) + { + struct uart_port *port = data; + struct altera_uart *pp = container_of(port, struct altera_uart, port); ++ unsigned long flags; + unsigned int isr; + + isr = altera_uart_readl(port, ALTERA_UART_STATUS_REG) & pp->imr; + +- spin_lock(&port->lock); ++ spin_lock_irqsave(&port->lock, flags); + if (isr & ALTERA_UART_STATUS_RRDY_MSK) +- altera_uart_rx_chars(pp); ++ altera_uart_rx_chars(port); + if (isr & ALTERA_UART_STATUS_TRDY_MSK) +- altera_uart_tx_chars(pp); +- spin_unlock(&port->lock); ++ altera_uart_tx_chars(port); ++ spin_unlock_irqrestore(&port->lock, flags); + + return IRQ_RETVAL(isr); + } diff --git a/drivers/tty/serial/amba-pl010.c b/drivers/tty/serial/amba-pl010.c index e744b953ca346..47654073123d6 100644 --- a/drivers/tty/serial/amba-pl010.c @@ -281225,10 +335882,20 @@ index e744b953ca346..47654073123d6 100644 quot -= 1; writel((quot & 0xf00) >> 8, uap->port.membase + UART010_LCRM); diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c -index d361cd84ff8cf..300a8bbb4b807 100644 +index d361cd84ff8cf..b91fe25a64a18 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c -@@ -1288,13 +1288,18 @@ static inline bool pl011_dma_rx_running(struct uart_amba_port *uap) +@@ -1050,6 +1050,9 @@ static void pl011_dma_rx_callback(void *data) + */ + static inline void pl011_dma_rx_stop(struct uart_amba_port *uap) + { ++ if (!uap->using_rx_dma) ++ return; ++ + /* FIXME. Just disable the DMA enable */ + uap->dmacr &= ~UART011_RXDMAE; + pl011_write(uap->dmacr, uap, REG_DMACR); +@@ -1288,13 +1291,18 @@ static inline bool pl011_dma_rx_running(struct uart_amba_port *uap) static void pl011_rs485_tx_stop(struct uart_amba_port *uap) { @@ -281248,7 +335915,7 @@ index d361cd84ff8cf..300a8bbb4b807 100644 dev_warn(port->dev, "timeout while draining hardware tx queue\n"); break; -@@ -1367,6 +1372,15 @@ static void pl011_stop_rx(struct uart_port *port) +@@ -1367,6 +1375,15 @@ static void pl011_stop_rx(struct uart_port *port) pl011_dma_rx_stop(uap); } @@ -281264,7 +335931,29 @@ index d361cd84ff8cf..300a8bbb4b807 100644 static void pl011_enable_ms(struct uart_port *port) { struct uart_amba_port *uap = -@@ -1615,9 +1629,6 @@ static void pl011_set_mctrl(struct uart_port *port, unsigned int mctrl) +@@ -1455,6 +1472,10 @@ static bool pl011_tx_chars(struct uart_amba_port *uap, bool from_irq) + struct circ_buf *xmit = &uap->port.state->xmit; + int count = uap->fifosize >> 1; + ++ if ((uap->port.rs485.flags & SER_RS485_ENABLED) && ++ !uap->rs485_tx_started) ++ pl011_rs485_tx_start(uap); ++ + if (uap->port.x_char) { + if (!pl011_tx_char(uap, uap->port.x_char, from_irq)) + return true; +@@ -1466,10 +1487,6 @@ static bool pl011_tx_chars(struct uart_amba_port *uap, bool from_irq) + return false; + } + +- if ((uap->port.rs485.flags & SER_RS485_ENABLED) && +- !uap->rs485_tx_started) +- pl011_rs485_tx_start(uap); +- + /* If we are using DMA mode, try to send some characters. */ + if (pl011_dma_tx_irq(uap)) + return true; +@@ -1615,9 +1632,6 @@ static void pl011_set_mctrl(struct uart_port *port, unsigned int mctrl) container_of(port, struct uart_amba_port, port); unsigned int cr; @@ -281274,7 +335963,7 @@ index d361cd84ff8cf..300a8bbb4b807 100644 cr = pl011_read(uap, REG_CR); #define TIOCMBIT(tiocmbit, uartbit) \ -@@ -1791,9 +1802,10 @@ static int pl011_allocate_irq(struct uart_amba_port *uap) +@@ -1791,9 +1805,10 @@ static int pl011_allocate_irq(struct uart_amba_port *uap) */ static void pl011_enable_interrupts(struct uart_amba_port *uap) { @@ -281286,7 +335975,7 @@ index d361cd84ff8cf..300a8bbb4b807 100644 /* Clear out any spuriously appearing RX interrupts */ pl011_write(UART011_RTIS | UART011_RXIS, uap, REG_ICR); -@@ -1815,7 +1827,14 @@ static void pl011_enable_interrupts(struct uart_amba_port *uap) +@@ -1815,7 +1830,23 @@ static void pl011_enable_interrupts(struct uart_amba_port *uap) if (!pl011_dma_rx_running(uap)) uap->im |= UART011_RXIM; pl011_write(uap->im, uap, REG_IMSC); @@ -281297,12 +335986,21 @@ index d361cd84ff8cf..300a8bbb4b807 100644 +static void pl011_unthrottle_rx(struct uart_port *port) +{ + struct uart_amba_port *uap = container_of(port, struct uart_amba_port, port); ++ unsigned long flags; ++ ++ spin_lock_irqsave(&uap->port.lock, flags); ++ ++ uap->im = UART011_RTIM; ++ if (!pl011_dma_rx_running(uap)) ++ uap->im |= UART011_RXIM; + -+ pl011_enable_interrupts(uap); ++ pl011_write(uap->im, uap, REG_IMSC); ++ ++ spin_unlock_irqrestore(&uap->port.lock, flags); } static int pl011_startup(struct uart_port *port) -@@ -1841,14 +1860,8 @@ static int pl011_startup(struct uart_port *port) +@@ -1841,14 +1872,8 @@ static int pl011_startup(struct uart_port *port) cr = uap->old_cr & (UART011_CR_RTS | UART011_CR_DTR); cr |= UART01x_CR_UARTEN | UART011_CR_RXE; @@ -281318,7 +336016,7 @@ index d361cd84ff8cf..300a8bbb4b807 100644 pl011_write(cr, uap, REG_CR); -@@ -2095,7 +2108,7 @@ pl011_set_termios(struct uart_port *port, struct ktermios *termios, +@@ -2095,7 +2120,7 @@ pl011_set_termios(struct uart_port *port, struct ktermios *termios, * with the given baud rate. We use this as the poll interval when we * wait for the tx queue to empty. */ @@ -281327,7 +336025,7 @@ index d361cd84ff8cf..300a8bbb4b807 100644 pl011_setup_status_masks(port, termios); -@@ -2105,9 +2118,7 @@ pl011_set_termios(struct uart_port *port, struct ktermios *termios, +@@ -2105,9 +2130,7 @@ pl011_set_termios(struct uart_port *port, struct ktermios *termios, if (port->rs485.flags & SER_RS485_ENABLED) termios->c_cflag &= ~CRTSCTS; @@ -281337,7 +336035,7 @@ index d361cd84ff8cf..300a8bbb4b807 100644 if (termios->c_cflag & CRTSCTS) { if (old_cr & UART011_CR_RTS) -@@ -2183,32 +2194,13 @@ static const char *pl011_type(struct uart_port *port) +@@ -2183,32 +2206,13 @@ static const char *pl011_type(struct uart_port *port) return uap->port.type == PORT_AMBA ? uap->type : NULL; } @@ -281371,7 +336069,7 @@ index d361cd84ff8cf..300a8bbb4b807 100644 } /* -@@ -2223,6 +2215,8 @@ static int pl011_verify_port(struct uart_port *port, struct serial_struct *ser) +@@ -2223,6 +2227,8 @@ static int pl011_verify_port(struct uart_port *port, struct serial_struct *ser) ret = -EINVAL; if (ser->baud_base < 9600) ret = -EINVAL; @@ -281380,7 +336078,7 @@ index d361cd84ff8cf..300a8bbb4b807 100644 return ret; } -@@ -2268,6 +2262,8 @@ static const struct uart_ops amba_pl011_pops = { +@@ -2268,6 +2274,8 @@ static const struct uart_ops amba_pl011_pops = { .stop_tx = pl011_stop_tx, .start_tx = pl011_start_tx, .stop_rx = pl011_stop_rx, @@ -281389,7 +336087,7 @@ index d361cd84ff8cf..300a8bbb4b807 100644 .enable_ms = pl011_enable_ms, .break_ctl = pl011_break_ctl, .startup = pl011_startup, -@@ -2275,8 +2271,6 @@ static const struct uart_ops amba_pl011_pops = { +@@ -2275,8 +2283,6 @@ static const struct uart_ops amba_pl011_pops = { .flush_buffer = pl011_dma_flush_buffer, .set_termios = pl011_set_termios, .type = pl011_type, @@ -281398,7 +336096,7 @@ index d361cd84ff8cf..300a8bbb4b807 100644 .config_port = pl011_config_port, .verify_port = pl011_verify_port, #ifdef CONFIG_CONSOLE_POLL -@@ -2306,8 +2300,6 @@ static const struct uart_ops sbsa_uart_pops = { +@@ -2306,8 +2312,6 @@ static const struct uart_ops sbsa_uart_pops = { .shutdown = sbsa_uart_shutdown, .set_termios = sbsa_uart_set_termios, .type = pl011_type, @@ -281407,7 +336105,7 @@ index d361cd84ff8cf..300a8bbb4b807 100644 .config_port = pl011_config_port, .verify_port = pl011_verify_port, #ifdef CONFIG_CONSOLE_POLL -@@ -2947,6 +2939,7 @@ MODULE_DEVICE_TABLE(of, sbsa_uart_of_match); +@@ -2947,6 +2951,7 @@ MODULE_DEVICE_TABLE(of, sbsa_uart_of_match); static const struct acpi_device_id __maybe_unused sbsa_uart_acpi_match[] = { { "ARMH0011", 0 }, @@ -281415,8 +336113,24 @@ index d361cd84ff8cf..300a8bbb4b807 100644 {}, }; MODULE_DEVICE_TABLE(acpi, sbsa_uart_acpi_match); +diff --git a/drivers/tty/serial/ar933x_uart.c b/drivers/tty/serial/ar933x_uart.c +index 4379ca4842ae7..0f2677695b521 100644 +--- a/drivers/tty/serial/ar933x_uart.c ++++ b/drivers/tty/serial/ar933x_uart.c +@@ -591,6 +591,11 @@ static int ar933x_config_rs485(struct uart_port *port, + dev_err(port->dev, "RS485 needs rts-gpio\n"); + return 1; + } ++ ++ if (rs485conf->flags & SER_RS485_ENABLED) ++ gpiod_set_value(up->rts_gpiod, ++ !!(rs485conf->flags & SER_RS485_RTS_AFTER_SEND)); ++ + port->rs485 = *rs485conf; + return 0; + } diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c -index 249ea35088d27..c0a86558ceaa1 100644 +index 249ea35088d27..714e6ff4a8fbb 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -295,20 +295,16 @@ static int atmel_config_rs485(struct uart_port *port, @@ -281470,6 +336184,21 @@ index 249ea35088d27..c0a86558ceaa1 100644 return 0; +@@ -2605,13 +2615,7 @@ static void __init atmel_console_get_options(struct uart_port *port, int *baud, + else if (mr == ATMEL_US_PAR_ODD) + *parity = 'o'; + +- /* +- * The serial core only rounds down when matching this to a +- * supported baud rate. Make sure we don't end up slightly +- * lower than one of those, as it would make us fall through +- * to a much lower baud rate than we really want. +- */ +- *baud = port->uartclk / (16 * (quot - 1)); ++ *baud = port->uartclk / (16 * quot); + } + + static int __init atmel_console_setup(struct console *co, char *options) diff --git a/drivers/tty/serial/cpm_uart/cpm_uart_core.c b/drivers/tty/serial/cpm_uart/cpm_uart_core.c index c719aa2b18328..db07d6a5d764d 100644 --- a/drivers/tty/serial/cpm_uart/cpm_uart_core.c @@ -281527,10 +336256,18 @@ index 13ac36e2da4f0..5fea9bf86e85e 100644 irq = platform_get_irq(pdev, 0); if (irq < 0) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c -index b1e7190ae4836..185dd417fc498 100644 +index b1e7190ae4836..fc311df9f1c9d 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c -@@ -239,8 +239,6 @@ +@@ -12,6 +12,7 @@ + #include <linux/dmaengine.h> + #include <linux/dmapool.h> + #include <linux/io.h> ++#include <linux/iopoll.h> + #include <linux/irq.h> + #include <linux/module.h> + #include <linux/of.h> +@@ -239,8 +240,6 @@ /* IMX lpuart has four extra unused regs located at the beginning */ #define IMX_REG_OFF 0x10 @@ -281539,7 +336276,7 @@ index b1e7190ae4836..185dd417fc498 100644 enum lpuart_type { VF610_LPUART, LS1021A_LPUART, -@@ -275,7 +273,6 @@ struct lpuart_port { +@@ -275,7 +274,6 @@ struct lpuart_port { int rx_dma_rng_buf_len; unsigned int dma_tx_nents; wait_queue_head_t dma_wait; @@ -281547,7 +336284,41 @@ index b1e7190ae4836..185dd417fc498 100644 }; struct lpuart_soc_data { -@@ -985,12 +982,12 @@ static void lpuart32_rxint(struct lpuart_port *sport) +@@ -398,33 +396,6 @@ static unsigned int lpuart_get_baud_clk_rate(struct lpuart_port *sport) + #define lpuart_enable_clks(x) __lpuart_enable_clks(x, true) + #define lpuart_disable_clks(x) __lpuart_enable_clks(x, false) + +-static int lpuart_global_reset(struct lpuart_port *sport) +-{ +- struct uart_port *port = &sport->port; +- void __iomem *global_addr; +- int ret; +- +- if (uart_console(port)) +- return 0; +- +- ret = clk_prepare_enable(sport->ipg_clk); +- if (ret) { +- dev_err(sport->port.dev, "failed to enable uart ipg clk: %d\n", ret); +- return ret; +- } +- +- if (is_imx7ulp_lpuart(sport) || is_imx8qxp_lpuart(sport)) { +- global_addr = port->membase + UART_GLOBAL - IMX_REG_OFF; +- writel(UART_GLOBAL_RST, global_addr); +- usleep_range(GLOBAL_RST_MIN_US, GLOBAL_RST_MAX_US); +- writel(0, global_addr); +- usleep_range(GLOBAL_RST_MIN_US, GLOBAL_RST_MAX_US); +- } +- +- clk_disable_unprepare(sport->ipg_clk); +- return 0; +-} +- + static void lpuart_stop_tx(struct uart_port *port) + { + unsigned char temp; +@@ -985,12 +956,12 @@ static void lpuart32_rxint(struct lpuart_port *sport) if (sr & (UARTSTAT_PE | UARTSTAT_OR | UARTSTAT_FE)) { if (sr & UARTSTAT_PE) { @@ -281563,7 +336334,7 @@ index b1e7190ae4836..185dd417fc498 100644 } if (sr & UARTSTAT_OR) -@@ -1005,12 +1002,12 @@ static void lpuart32_rxint(struct lpuart_port *sport) +@@ -1005,12 +976,12 @@ static void lpuart32_rxint(struct lpuart_port *sport) sr &= sport->port.read_status_mask; if (sr & UARTSTAT_PE) { @@ -281579,7 +336350,7 @@ index b1e7190ae4836..185dd417fc498 100644 } if (sr & UARTSTAT_OR) -@@ -1384,9 +1381,9 @@ static int lpuart_config_rs485(struct uart_port *port, +@@ -1384,9 +1355,9 @@ static int lpuart_config_rs485(struct uart_port *port, * Note: UART is assumed to be active high. */ if (rs485->flags & SER_RS485_RTS_ON_SEND) @@ -281591,7 +336362,7 @@ index b1e7190ae4836..185dd417fc498 100644 } /* Store the new configuration */ -@@ -1790,6 +1787,7 @@ static void lpuart_dma_shutdown(struct lpuart_port *sport) +@@ -1790,6 +1761,7 @@ static void lpuart_dma_shutdown(struct lpuart_port *sport) if (sport->lpuart_dma_rx_use) { del_timer_sync(&sport->lpuart_timer); lpuart_dma_rx_free(&sport->port); @@ -281599,7 +336370,7 @@ index b1e7190ae4836..185dd417fc498 100644 } if (sport->lpuart_dma_tx_use) { -@@ -1798,6 +1796,7 @@ static void lpuart_dma_shutdown(struct lpuart_port *sport) +@@ -1798,6 +1770,7 @@ static void lpuart_dma_shutdown(struct lpuart_port *sport) sport->dma_tx_in_progress = false; dmaengine_terminate_all(sport->dma_tx_chan); } @@ -281607,7 +336378,7 @@ index b1e7190ae4836..185dd417fc498 100644 } if (sport->dma_tx_chan) -@@ -2206,6 +2205,7 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios, +@@ -2206,6 +2179,7 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios, uart_update_timeout(port, termios->c_cflag, baud); /* wait transmit engin complete */ @@ -281615,7 +336386,7 @@ index b1e7190ae4836..185dd417fc498 100644 lpuart32_wait_bit_set(&sport->port, UARTSTAT, UARTSTAT_TC); /* disable transmit and receive */ -@@ -2625,6 +2625,7 @@ OF_EARLYCON_DECLARE(lpuart, "fsl,vf610-lpuart", lpuart_early_console_setup); +@@ -2625,6 +2599,7 @@ OF_EARLYCON_DECLARE(lpuart, "fsl,vf610-lpuart", lpuart_early_console_setup); OF_EARLYCON_DECLARE(lpuart32, "fsl,ls1021a-lpuart", lpuart32_early_console_setup); OF_EARLYCON_DECLARE(lpuart32, "fsl,ls1028a-lpuart", ls1028a_early_console_setup); OF_EARLYCON_DECLARE(lpuart32, "fsl,imx7ulp-lpuart", lpuart32_imx_early_console_setup); @@ -281623,7 +336394,66 @@ index b1e7190ae4836..185dd417fc498 100644 EARLYCON_DECLARE(lpuart, lpuart_early_console_setup); EARLYCON_DECLARE(lpuart32, lpuart32_early_console_setup); -@@ -2649,6 +2650,7 @@ static int lpuart_probe(struct platform_device *pdev) +@@ -2643,12 +2618,66 @@ static struct uart_driver lpuart_reg = { + .cons = LPUART_CONSOLE, + }; + ++static const struct serial_rs485 lpuart_rs485_supported = { ++ .flags = SER_RS485_ENABLED | SER_RS485_RTS_ON_SEND | SER_RS485_RTS_AFTER_SEND, ++ /* delay_rts_* and RX_DURING_TX are not supported */ ++}; ++ ++static int lpuart_global_reset(struct lpuart_port *sport) ++{ ++ struct uart_port *port = &sport->port; ++ void __iomem *global_addr; ++ unsigned long ctrl, bd; ++ unsigned int val = 0; ++ int ret; ++ ++ ret = clk_prepare_enable(sport->ipg_clk); ++ if (ret) { ++ dev_err(sport->port.dev, "failed to enable uart ipg clk: %d\n", ret); ++ return ret; ++ } ++ ++ if (is_imx7ulp_lpuart(sport) || is_imx8qxp_lpuart(sport)) { ++ /* ++ * If the transmitter is used by earlycon, wait for transmit engine to ++ * complete and then reset. ++ */ ++ ctrl = lpuart32_read(port, UARTCTRL); ++ if (ctrl & UARTCTRL_TE) { ++ bd = lpuart32_read(&sport->port, UARTBAUD); ++ if (read_poll_timeout(lpuart32_tx_empty, val, val, 1, 100000, false, ++ port)) { ++ dev_warn(sport->port.dev, ++ "timeout waiting for transmit engine to complete\n"); ++ clk_disable_unprepare(sport->ipg_clk); ++ return 0; ++ } ++ } ++ ++ global_addr = port->membase + UART_GLOBAL - IMX_REG_OFF; ++ writel(UART_GLOBAL_RST, global_addr); ++ usleep_range(GLOBAL_RST_MIN_US, GLOBAL_RST_MAX_US); ++ writel(0, global_addr); ++ usleep_range(GLOBAL_RST_MIN_US, GLOBAL_RST_MAX_US); ++ ++ /* Recover the transmitter for earlycon. */ ++ if (ctrl & UARTCTRL_TE) { ++ lpuart32_write(port, bd, UARTBAUD); ++ lpuart32_write(port, ctrl, UARTCTRL); ++ } ++ } ++ ++ clk_disable_unprepare(sport->ipg_clk); ++ return 0; ++} ++ + static int lpuart_probe(struct platform_device *pdev) + { + const struct lpuart_soc_data *sdata = of_device_get_match_data(&pdev->dev); struct device_node *np = pdev->dev.of_node; struct lpuart_port *sport; struct resource *res; @@ -281631,7 +336461,15 @@ index b1e7190ae4836..185dd417fc498 100644 int ret; sport = devm_kzalloc(&pdev->dev, sizeof(*sport), GFP_KERNEL); -@@ -2701,23 +2703,18 @@ static int lpuart_probe(struct platform_device *pdev) +@@ -2681,6 +2710,7 @@ static int lpuart_probe(struct platform_device *pdev) + sport->port.rs485_config = lpuart32_config_rs485; + else + sport->port.rs485_config = lpuart_config_rs485; ++ sport->port.rs485_supported = &lpuart_rs485_supported; + + sport->ipg_clk = devm_clk_get(&pdev->dev, "ipg"); + if (IS_ERR(sport->ipg_clk)) { +@@ -2701,23 +2731,18 @@ static int lpuart_probe(struct platform_device *pdev) ret = of_alias_get_id(np, "serial"); if (ret < 0) { @@ -281659,7 +336497,7 @@ index b1e7190ae4836..185dd417fc498 100644 sport->port.uartclk = lpuart_get_baud_clk_rate(sport); lpuart_ports[sport->port.line] = sport; -@@ -2726,25 +2723,20 @@ static int lpuart_probe(struct platform_device *pdev) +@@ -2726,21 +2751,12 @@ static int lpuart_probe(struct platform_device *pdev) if (lpuart_is_32(sport)) { lpuart_reg.cons = LPUART32_CONSOLE; @@ -281673,39 +336511,39 @@ index b1e7190ae4836..185dd417fc498 100644 + handler = lpuart_int; } -+ ret = lpuart_global_reset(sport); - if (ret) +- if (ret) - goto failed_irq_request; -+ goto failed_reset; - - ret = uart_add_one_port(&lpuart_reg, &sport->port); - if (ret) - goto failed_attach_port; - -- ret = lpuart_global_reset(sport); +- +- ret = uart_add_one_port(&lpuart_reg, &sport->port); - if (ret) -- goto failed_reset; +- goto failed_attach_port; - - ret = uart_get_rs485_mode(&sport->port); + ret = lpuart_global_reset(sport); if (ret) - goto failed_get_rs485; -@@ -2758,18 +2750,19 @@ static int lpuart_probe(struct platform_device *pdev) - - sport->port.rs485_config(&sport->port, &sport->port.rs485); + goto failed_reset; +@@ -2756,20 +2772,23 @@ static int lpuart_probe(struct platform_device *pdev) + sport->port.rs485.delay_rts_after_send) + dev_err(&pdev->dev, "driver doesn't support RTS delays\n"); +- sport->port.rs485_config(&sport->port, &sport->port.rs485); ++ ret = uart_add_one_port(&lpuart_reg, &sport->port); ++ if (ret) ++ goto failed_attach_port; ++ + ret = devm_request_irq(&pdev->dev, sport->port.irq, handler, 0, + DRIVER_NAME, sport); + if (ret) + goto failed_irq_request; -+ + return 0; -+failed_irq_request: - failed_get_rs485: +-failed_get_rs485: -failed_reset: ++failed_irq_request: uart_remove_one_port(&lpuart_reg, &sport->port); failed_attach_port: -failed_irq_request: ++failed_get_rs485: +failed_reset: lpuart_disable_clks(sport); -failed_clock_enable: @@ -281715,7 +336553,7 @@ index b1e7190ae4836..185dd417fc498 100644 return ret; } -@@ -2779,9 +2772,6 @@ static int lpuart_remove(struct platform_device *pdev) +@@ -2779,9 +2798,6 @@ static int lpuart_remove(struct platform_device *pdev) uart_remove_one_port(&lpuart_reg, &sport->port); @@ -281725,7 +336563,7 @@ index b1e7190ae4836..185dd417fc498 100644 lpuart_disable_clks(sport); if (sport->dma_tx_chan) -@@ -2911,7 +2901,6 @@ static int __init lpuart_serial_init(void) +@@ -2911,7 +2927,6 @@ static int __init lpuart_serial_init(void) static void __exit lpuart_serial_exit(void) { @@ -281747,10 +336585,30 @@ index 03a2fe9f4c9a9..02b375ba2f078 100644 pci_write_config_dword(dev, PCI_COMMAND, diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c -index 8b121cd869e94..b7ef075a4005b 100644 +index 8b121cd869e94..711edb835c274 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c -@@ -486,18 +486,21 @@ static void imx_uart_stop_tx(struct uart_port *port) +@@ -380,8 +380,7 @@ static void imx_uart_rts_active(struct imx_port *sport, u32 *ucr2) + { + *ucr2 &= ~(UCR2_CTSC | UCR2_CTS); + +- sport->port.mctrl |= TIOCM_RTS; +- mctrl_gpio_set(sport->gpios, sport->port.mctrl); ++ mctrl_gpio_set(sport->gpios, sport->port.mctrl | TIOCM_RTS); + } + + /* called with port.lock taken and irqs caller dependent */ +@@ -390,8 +389,7 @@ static void imx_uart_rts_inactive(struct imx_port *sport, u32 *ucr2) + *ucr2 &= ~UCR2_CTSC; + *ucr2 |= UCR2_CTS; + +- sport->port.mctrl &= ~TIOCM_RTS; +- mctrl_gpio_set(sport->gpios, sport->port.mctrl); ++ mctrl_gpio_set(sport->gpios, sport->port.mctrl & ~TIOCM_RTS); + } + + static void start_hrtimer_ms(struct hrtimer *hrt, unsigned long msec) +@@ -486,18 +484,21 @@ static void imx_uart_stop_tx(struct uart_port *port) static void imx_uart_stop_rx(struct uart_port *port) { struct imx_port *sport = (struct imx_port *)port; @@ -281773,7 +336631,7 @@ index 8b121cd869e94..b7ef075a4005b 100644 ucr2 &= ~UCR2_RXEN; imx_uart_writel(sport, ucr2, UCR2); -@@ -1435,7 +1438,7 @@ static int imx_uart_startup(struct uart_port *port) +@@ -1435,7 +1436,7 @@ static int imx_uart_startup(struct uart_port *port) imx_uart_writel(sport, ucr1, UCR1); ucr4 = imx_uart_readl(sport, UCR4) & ~(UCR4_OREN | UCR4_INVR); @@ -281782,7 +336640,7 @@ index 8b121cd869e94..b7ef075a4005b 100644 ucr4 |= UCR4_OREN; if (sport->inverted_rx) ucr4 |= UCR4_INVR; -@@ -1544,7 +1547,7 @@ static void imx_uart_shutdown(struct uart_port *port) +@@ -1544,7 +1545,7 @@ static void imx_uart_shutdown(struct uart_port *port) imx_uart_writel(sport, ucr1, UCR1); ucr4 = imx_uart_readl(sport, UCR4); @@ -281791,7 +336649,7 @@ index 8b121cd869e94..b7ef075a4005b 100644 imx_uart_writel(sport, ucr4, UCR4); spin_unlock_irqrestore(&sport->port.lock, flags); -@@ -2017,7 +2020,7 @@ imx_uart_console_write(struct console *co, const char *s, unsigned int count) +@@ -2017,7 +2018,7 @@ imx_uart_console_write(struct console *co, const char *s, unsigned int count) * If the port was already initialised (eg, by a boot loader), * try to determine the current setup. */ @@ -281800,7 +336658,7 @@ index 8b121cd869e94..b7ef075a4005b 100644 imx_uart_console_get_options(struct imx_port *sport, int *baud, int *parity, int *bits) { -@@ -2076,7 +2079,7 @@ imx_uart_console_get_options(struct imx_port *sport, int *baud, +@@ -2076,7 +2077,7 @@ imx_uart_console_get_options(struct imx_port *sport, int *baud, } } @@ -281809,6 +336667,23 @@ index 8b121cd869e94..b7ef075a4005b 100644 imx_uart_console_setup(struct console *co, char *options) { struct imx_port *sport; +@@ -2315,8 +2316,6 @@ static int imx_uart_probe(struct platform_device *pdev) + dev_err(&pdev->dev, + "low-active RTS not possible when receiver is off, enabling receiver\n"); + +- imx_uart_rs485_config(&sport->port, &sport->port.rs485); +- + /* Disable interrupts before requesting them */ + ucr1 = imx_uart_readl(sport, UCR1); + ucr1 &= ~(UCR1_ADEN | UCR1_TRDYEN | UCR1_IDEN | UCR1_RRDYEN | UCR1_RTSDEN); +@@ -2564,6 +2563,7 @@ static const struct dev_pm_ops imx_uart_pm_ops = { + .suspend_noirq = imx_uart_suspend_noirq, + .resume_noirq = imx_uart_resume_noirq, + .freeze_noirq = imx_uart_suspend_noirq, ++ .thaw_noirq = imx_uart_resume_noirq, + .restore_noirq = imx_uart_resume_noirq, + .suspend = imx_uart_suspend, + .resume = imx_uart_resume, diff --git a/drivers/tty/serial/jsm/jsm_driver.c b/drivers/tty/serial/jsm/jsm_driver.c index 0ea799bf8dbb1..417a5b6bffc34 100644 --- a/drivers/tty/serial/jsm/jsm_driver.c @@ -282092,7 +336967,7 @@ index 91f1eb0058d7e..9a6611cfc18e9 100644 } owl_port->port.flags = UPF_BOOT_AUTOCONF | UPF_IOREMAP | UPF_LOW_LATENCY; diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c -index f0351e6f0ef6d..1e65933f6ccec 100644 +index f0351e6f0ef6d..49bc5a4b28327 100644 --- a/drivers/tty/serial/pch_uart.c +++ b/drivers/tty/serial/pch_uart.c @@ -624,22 +624,6 @@ static int push_rx(struct eg20t_port *priv, const unsigned char *buf, @@ -282118,7 +336993,41 @@ index f0351e6f0ef6d..1e65933f6ccec 100644 static int dma_push_rx(struct eg20t_port *priv, int size) { int room; -@@ -889,9 +873,10 @@ static unsigned int handle_tx(struct eg20t_port *priv) +@@ -723,6 +707,7 @@ static void pch_request_dma(struct uart_port *port) + if (!chan) { + dev_err(priv->port.dev, "%s:dma_request_channel FAILS(Tx)\n", + __func__); ++ pci_dev_put(dma_dev); + return; + } + priv->chan_tx = chan; +@@ -739,6 +724,7 @@ static void pch_request_dma(struct uart_port *port) + __func__); + dma_release_channel(priv->chan_tx); + priv->chan_tx = NULL; ++ pci_dev_put(dma_dev); + return; + } + +@@ -746,6 +732,8 @@ static void pch_request_dma(struct uart_port *port) + priv->rx_buf_virt = dma_alloc_coherent(port->dev, port->fifosize, + &priv->rx_buf_dma, GFP_KERNEL); + priv->chan_rx = chan; ++ ++ pci_dev_put(dma_dev); + } + + static void pch_dma_rx_complete(void *arg) +@@ -777,7 +765,7 @@ static void pch_dma_tx_complete(void *arg) + } + xmit->tail &= UART_XMIT_SIZE - 1; + async_tx_ack(priv->desc_tx); +- dma_unmap_sg(port->dev, sg, priv->orig_nent, DMA_TO_DEVICE); ++ dma_unmap_sg(port->dev, priv->sg_tx_p, priv->orig_nent, DMA_TO_DEVICE); + priv->tx_dma_use = 0; + priv->nent = 0; + priv->orig_nent = 0; +@@ -889,9 +877,10 @@ static unsigned int handle_tx(struct eg20t_port *priv) fifo_size = max(priv->fifo_size, 1); tx_empty = 1; @@ -282131,7 +337040,7 @@ index f0351e6f0ef6d..1e65933f6ccec 100644 tx_empty = 0; fifo_size--; } -@@ -946,9 +931,11 @@ static unsigned int dma_handle_tx(struct eg20t_port *priv) +@@ -946,9 +935,11 @@ static unsigned int dma_handle_tx(struct eg20t_port *priv) } fifo_size = max(priv->fifo_size, 1); @@ -282145,6 +337054,58 @@ index f0351e6f0ef6d..1e65933f6ccec 100644 fifo_size--; } +diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c +index aedc38893e6cf..ce1c81731a2a8 100644 +--- a/drivers/tty/serial/qcom_geni_serial.c ++++ b/drivers/tty/serial/qcom_geni_serial.c +@@ -866,9 +866,10 @@ out_unlock: + return IRQ_HANDLED; + } + +-static void get_tx_fifo_size(struct qcom_geni_serial_port *port) ++static int setup_fifos(struct qcom_geni_serial_port *port) + { + struct uart_port *uport; ++ u32 old_rx_fifo_depth = port->rx_fifo_depth; + + uport = &port->uport; + port->tx_fifo_depth = geni_se_get_tx_fifo_depth(&port->se); +@@ -876,6 +877,16 @@ static void get_tx_fifo_size(struct qcom_geni_serial_port *port) + port->rx_fifo_depth = geni_se_get_rx_fifo_depth(&port->se); + uport->fifosize = + (port->tx_fifo_depth * port->tx_fifo_width) / BITS_PER_BYTE; ++ ++ if (port->rx_fifo && (old_rx_fifo_depth != port->rx_fifo_depth) && port->rx_fifo_depth) { ++ port->rx_fifo = devm_krealloc(uport->dev, port->rx_fifo, ++ port->rx_fifo_depth * sizeof(u32), ++ GFP_KERNEL); ++ if (!port->rx_fifo) ++ return -ENOMEM; ++ } ++ ++ return 0; + } + + +@@ -890,6 +901,7 @@ static int qcom_geni_serial_port_setup(struct uart_port *uport) + u32 rxstale = DEFAULT_BITS_PER_CHAR * STALE_TIMEOUT; + u32 proto; + u32 pin_swap; ++ int ret; + + proto = geni_se_read_proto(&port->se); + if (proto != GENI_SE_UART) { +@@ -899,7 +911,9 @@ static int qcom_geni_serial_port_setup(struct uart_port *uport) + + qcom_geni_serial_stop_rx(uport); + +- get_tx_fifo_size(port); ++ ret = setup_fifos(port); ++ if (ret) ++ return ret; + + writel(rxstale, uport->membase + SE_UART_RX_STALE_CNT); + diff --git a/drivers/tty/serial/rda-uart.c b/drivers/tty/serial/rda-uart.c index d550d8fa2fabf..a8fe1c3ebcd98 100644 --- a/drivers/tty/serial/rda-uart.c @@ -282237,7 +337198,7 @@ index acbb615dd28fd..0ab788058fa2a 100644 static void sc16is7xx_reconf_rs485(struct uart_port *port) diff --git a/drivers/tty/serial/serial-tegra.c b/drivers/tty/serial/serial-tegra.c -index 45e2e4109acd0..d4dba298de7af 100644 +index 45e2e4109acd0..79187ff9ac131 100644 --- a/drivers/tty/serial/serial-tegra.c +++ b/drivers/tty/serial/serial-tegra.c @@ -525,7 +525,7 @@ static void tegra_uart_tx_dma_complete(void *args) @@ -282249,7 +337210,7 @@ index 45e2e4109acd0..d4dba298de7af 100644 tup->tx_in_progress = 0; if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) uart_write_wakeup(&tup->uport); -@@ -613,7 +613,6 @@ static unsigned int tegra_uart_tx_empty(struct uart_port *u) +@@ -613,18 +613,18 @@ static unsigned int tegra_uart_tx_empty(struct uart_port *u) static void tegra_uart_stop_tx(struct uart_port *u) { struct tegra_uart_port *tup = to_tegra_uport(u); @@ -282257,8 +337218,13 @@ index 45e2e4109acd0..d4dba298de7af 100644 struct dma_tx_state state; unsigned int count; -@@ -624,7 +623,7 @@ static void tegra_uart_stop_tx(struct uart_port *u) + if (tup->tx_in_progress != TEGRA_UART_TX_DMA) + return; + +- dmaengine_terminate_all(tup->tx_dma_chan); ++ dmaengine_pause(tup->tx_dma_chan); dmaengine_tx_status(tup->tx_dma_chan, tup->tx_cookie, &state); ++ dmaengine_terminate_all(tup->tx_dma_chan); count = tup->tx_bytes_requested - state.residue; async_tx_ack(tup->tx_dma_desc); - xmit->tail = (xmit->tail + count) & (UART_XMIT_SIZE - 1); @@ -282266,7 +337232,18 @@ index 45e2e4109acd0..d4dba298de7af 100644 tup->tx_in_progress = 0; } -@@ -1506,7 +1505,7 @@ static struct tegra_uart_chip_data tegra20_uart_chip_data = { +@@ -764,8 +764,9 @@ static void tegra_uart_terminate_rx_dma(struct tegra_uart_port *tup) + return; + } + +- dmaengine_terminate_all(tup->rx_dma_chan); ++ dmaengine_pause(tup->rx_dma_chan); + dmaengine_tx_status(tup->rx_dma_chan, tup->rx_cookie, &state); ++ dmaengine_terminate_all(tup->rx_dma_chan); + + tegra_uart_rx_buffer_push(tup, state.residue); + tup->rx_dma_active = false; +@@ -1506,7 +1507,7 @@ static struct tegra_uart_chip_data tegra20_uart_chip_data = { .fifo_mode_enable_status = false, .uart_max_port = 5, .max_dma_burst_bytes = 4, @@ -282275,7 +337252,7 @@ index 45e2e4109acd0..d4dba298de7af 100644 .error_tolerance_high_range = 4, }; -@@ -1517,7 +1516,7 @@ static struct tegra_uart_chip_data tegra30_uart_chip_data = { +@@ -1517,7 +1518,7 @@ static struct tegra_uart_chip_data tegra30_uart_chip_data = { .fifo_mode_enable_status = false, .uart_max_port = 5, .max_dma_burst_bytes = 4, @@ -282285,21 +337262,30 @@ index 45e2e4109acd0..d4dba298de7af 100644 }; diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c -index 0e2e35ab64c79..82ddbb92d07da 100644 +index 0e2e35ab64c79..45b721abaa2f5 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c -@@ -144,6 +144,11 @@ uart_update_mctrl(struct uart_port *port, unsigned int set, unsigned int clear) - unsigned long flags; - unsigned int old; +@@ -42,6 +42,11 @@ static struct lock_class_key port_lock_key; -+ if (port->rs485.flags & SER_RS485_ENABLED) { -+ set &= ~TIOCM_RTS; -+ clear &= ~TIOCM_RTS; -+ } + #define HIGH_BITS_OFFSET ((sizeof(long)-sizeof(int))*8) + ++/* ++ * Max time with active RTS before/after data is sent. ++ */ ++#define RS485_MAX_RTS_DELAY 100 /* msecs */ + + static void uart_change_speed(struct tty_struct *tty, struct uart_state *state, + struct ktermios *old_termios); + static void uart_wait_until_sent(struct tty_struct *tty, int timeout); +@@ -147,7 +152,7 @@ uart_update_mctrl(struct uart_port *port, unsigned int set, unsigned int clear) spin_lock_irqsave(&port->lock, flags); old = port->mctrl; port->mctrl = (old & ~clear) | set; +- if (old != port->mctrl) ++ if (old != port->mctrl && !(port->rs485.flags & SER_RS485_ENABLED)) + port->ops->set_mctrl(port, port->mctrl); + spin_unlock_irqrestore(&port->lock, flags); + } @@ -157,23 +162,10 @@ uart_update_mctrl(struct uart_port *port, unsigned int set, unsigned int clear) static void uart_port_dtr_rts(struct uart_port *uport, int raise) @@ -282374,7 +337360,49 @@ index 0e2e35ab64c79..82ddbb92d07da 100644 /* * This function is used to send a high-priority XON/XOFF character to * the device -@@ -1542,6 +1555,7 @@ static void uart_tty_port_shutdown(struct tty_port *port) +@@ -1286,8 +1299,41 @@ static int uart_set_rs485_config(struct uart_port *port, + if (copy_from_user(&rs485, rs485_user, sizeof(*rs485_user))) + return -EFAULT; + ++ /* pick sane settings if the user hasn't */ ++ if (!(rs485.flags & SER_RS485_RTS_ON_SEND) == ++ !(rs485.flags & SER_RS485_RTS_AFTER_SEND)) { ++ dev_warn_ratelimited(port->dev, ++ "%s (%d): invalid RTS setting, using RTS_ON_SEND instead\n", ++ port->name, port->line); ++ rs485.flags |= SER_RS485_RTS_ON_SEND; ++ rs485.flags &= ~SER_RS485_RTS_AFTER_SEND; ++ } ++ ++ if (rs485.delay_rts_before_send > RS485_MAX_RTS_DELAY) { ++ rs485.delay_rts_before_send = RS485_MAX_RTS_DELAY; ++ dev_warn_ratelimited(port->dev, ++ "%s (%d): RTS delay before sending clamped to %u ms\n", ++ port->name, port->line, rs485.delay_rts_before_send); ++ } ++ ++ if (rs485.delay_rts_after_send > RS485_MAX_RTS_DELAY) { ++ rs485.delay_rts_after_send = RS485_MAX_RTS_DELAY; ++ dev_warn_ratelimited(port->dev, ++ "%s (%d): RTS delay after sending clamped to %u ms\n", ++ port->name, port->line, rs485.delay_rts_after_send); ++ } ++ /* Return clean padding area to userspace */ ++ memset(rs485.padding, 0, sizeof(rs485.padding)); ++ + spin_lock_irqsave(&port->lock, flags); + ret = port->rs485_config(port, &rs485); ++ if (!ret) { ++ port->rs485 = rs485; ++ ++ /* Reset RTS and other mctrl lines when disabling RS485 */ ++ if (!(rs485.flags & SER_RS485_ENABLED)) ++ port->ops->set_mctrl(port, port->mctrl); ++ } + spin_unlock_irqrestore(&port->lock, flags); + if (ret) + return ret; +@@ -1542,6 +1588,7 @@ static void uart_tty_port_shutdown(struct tty_port *port) { struct uart_state *state = container_of(port, struct uart_state, port); struct uart_port *uport = uart_port_check(state); @@ -282382,7 +337410,7 @@ index 0e2e35ab64c79..82ddbb92d07da 100644 /* * At this point, we stop accepting input. To do this, we -@@ -1563,8 +1577,18 @@ static void uart_tty_port_shutdown(struct tty_port *port) +@@ -1563,8 +1610,18 @@ static void uart_tty_port_shutdown(struct tty_port *port) */ tty_port_set_suspended(port, 0); @@ -282402,7 +337430,7 @@ index 0e2e35ab64c79..82ddbb92d07da 100644 } static void uart_wait_until_sent(struct tty_struct *tty, int timeout) -@@ -1888,11 +1912,6 @@ static int uart_proc_show(struct seq_file *m, void *v) +@@ -1888,11 +1945,6 @@ static int uart_proc_show(struct seq_file *m, void *v) } #endif @@ -282414,7 +337442,7 @@ index 0e2e35ab64c79..82ddbb92d07da 100644 static void uart_port_spin_lock_init(struct uart_port *port) { spin_lock_init(&port->lock); -@@ -2094,8 +2113,11 @@ uart_set_options(struct uart_port *port, struct console *co, +@@ -2094,8 +2146,11 @@ uart_set_options(struct uart_port *port, struct console *co, * Allow the setting of the UART parameters with a NULL console * too: */ @@ -282427,7 +337455,17 @@ index 0e2e35ab64c79..82ddbb92d07da 100644 return 0; } -@@ -2229,6 +2251,8 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *uport) +@@ -2170,7 +2225,8 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *uport) + + spin_lock_irq(&uport->lock); + ops->stop_tx(uport); +- ops->set_mctrl(uport, 0); ++ if (!(uport->rs485.flags & SER_RS485_ENABLED)) ++ ops->set_mctrl(uport, 0); + ops->stop_rx(uport); + spin_unlock_irq(&uport->lock); + +@@ -2229,6 +2285,8 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *uport) */ memset(&termios, 0, sizeof(struct ktermios)); termios.c_cflag = uport->cons->cflag; @@ -282436,16 +337474,38 @@ index 0e2e35ab64c79..82ddbb92d07da 100644 /* * If that's unset, use the tty termios setting. -@@ -2365,7 +2389,11 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state, +@@ -2249,7 +2307,8 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *uport) + + uart_change_pm(state, UART_PM_STATE_ON); + spin_lock_irq(&uport->lock); +- ops->set_mctrl(uport, 0); ++ if (!(uport->rs485.flags & SER_RS485_ENABLED)) ++ ops->set_mctrl(uport, 0); + spin_unlock_irq(&uport->lock); + if (console_suspend_enabled || !uart_console(uport)) { + /* Protected by port mutex for now */ +@@ -2260,7 +2319,10 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *uport) + if (tty) + uart_change_speed(tty, state, NULL); + spin_lock_irq(&uport->lock); +- ops->set_mctrl(uport, uport->mctrl); ++ if (!(uport->rs485.flags & SER_RS485_ENABLED)) ++ ops->set_mctrl(uport, uport->mctrl); ++ else ++ uport->rs485_config(uport, &uport->rs485); + ops->start_tx(uport); + spin_unlock_irq(&uport->lock); + tty_port_set_initialized(port, 1); +@@ -2365,7 +2427,11 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state, * We probably don't need a spinlock around this, but */ spin_lock_irqsave(&port->lock, flags); - port->ops->set_mctrl(port, port->mctrl & TIOCM_DTR); + port->mctrl &= TIOCM_DTR; -+ if (port->rs485.flags & SER_RS485_ENABLED && -+ !(port->rs485.flags & SER_RS485_RTS_AFTER_SEND)) -+ port->mctrl |= TIOCM_RTS; -+ port->ops->set_mctrl(port, port->mctrl); ++ if (!(port->rs485.flags & SER_RS485_ENABLED)) ++ port->ops->set_mctrl(port, port->mctrl); ++ else ++ port->rs485_config(port, &port->rs485); spin_unlock_irqrestore(&port->lock, flags); /* @@ -282531,10 +337591,64 @@ index 87e480cc8206d..5a45633aaea8d 100644 /* set stop bit */ ctrl_val |= (cflag & CSTOPB) ? ASC_CTL_STOP_2BIT : ASC_CTL_STOP_1BIT; diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c -index 8f032e77b954a..fc166cc2c856d 100644 +index 8f032e77b954a..5c60960e185d2 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c -@@ -71,6 +71,8 @@ static void stm32_usart_config_reg_rs485(u32 *cr1, u32 *cr3, u32 delay_ADE, +@@ -61,6 +61,53 @@ static void stm32_usart_clr_bits(struct uart_port *port, u32 reg, u32 bits) + writel_relaxed(val, port->membase + reg); + } + ++static unsigned int stm32_usart_tx_empty(struct uart_port *port) ++{ ++ struct stm32_port *stm32_port = to_stm32_port(port); ++ const struct stm32_usart_offsets *ofs = &stm32_port->info->ofs; ++ ++ if (readl_relaxed(port->membase + ofs->isr) & USART_SR_TC) ++ return TIOCSER_TEMT; ++ ++ return 0; ++} ++ ++static void stm32_usart_rs485_rts_enable(struct uart_port *port) ++{ ++ struct stm32_port *stm32_port = to_stm32_port(port); ++ struct serial_rs485 *rs485conf = &port->rs485; ++ ++ if (stm32_port->hw_flow_control || ++ !(rs485conf->flags & SER_RS485_ENABLED)) ++ return; ++ ++ if (rs485conf->flags & SER_RS485_RTS_ON_SEND) { ++ mctrl_gpio_set(stm32_port->gpios, ++ stm32_port->port.mctrl | TIOCM_RTS); ++ } else { ++ mctrl_gpio_set(stm32_port->gpios, ++ stm32_port->port.mctrl & ~TIOCM_RTS); ++ } ++} ++ ++static void stm32_usart_rs485_rts_disable(struct uart_port *port) ++{ ++ struct stm32_port *stm32_port = to_stm32_port(port); ++ struct serial_rs485 *rs485conf = &port->rs485; ++ ++ if (stm32_port->hw_flow_control || ++ !(rs485conf->flags & SER_RS485_ENABLED)) ++ return; ++ ++ if (rs485conf->flags & SER_RS485_RTS_ON_SEND) { ++ mctrl_gpio_set(stm32_port->gpios, ++ stm32_port->port.mctrl & ~TIOCM_RTS); ++ } else { ++ mctrl_gpio_set(stm32_port->gpios, ++ stm32_port->port.mctrl | TIOCM_RTS); ++ } ++} ++ + static void stm32_usart_config_reg_rs485(u32 *cr1, u32 *cr3, u32 delay_ADE, + u32 delay_DDE, u32 baud) + { +@@ -71,6 +118,8 @@ static void stm32_usart_config_reg_rs485(u32 *cr1, u32 *cr3, u32 delay_ADE, *cr3 |= USART_CR3_DEM; over8 = *cr1 & USART_CR1_OVER8; @@ -282543,12 +337657,62 @@ index 8f032e77b954a..fc166cc2c856d 100644 if (over8) rs485_deat_dedt = delay_ADE * baud * 8; else -@@ -421,10 +423,22 @@ static void stm32_usart_transmit_chars(struct uart_port *port) +@@ -147,6 +196,12 @@ static int stm32_usart_config_rs485(struct uart_port *port, + + stm32_usart_set_bits(port, ofs->cr1, BIT(cfg->uart_enable_bit)); + ++ /* Adjust RTS polarity in case it's driven in software */ ++ if (stm32_usart_tx_empty(port)) ++ stm32_usart_rs485_rts_disable(port); ++ else ++ stm32_usart_rs485_rts_enable(port); ++ + return 0; + } + +@@ -312,6 +367,14 @@ static void stm32_usart_tx_interrupt_enable(struct uart_port *port) + stm32_usart_set_bits(port, ofs->cr1, USART_CR1_TXEIE); + } + ++static void stm32_usart_tc_interrupt_enable(struct uart_port *port) ++{ ++ struct stm32_port *stm32_port = to_stm32_port(port); ++ const struct stm32_usart_offsets *ofs = &stm32_port->info->ofs; ++ ++ stm32_usart_set_bits(port, ofs->cr1, USART_CR1_TCIE); ++} ++ + static void stm32_usart_tx_interrupt_disable(struct uart_port *port) + { + struct stm32_port *stm32_port = to_stm32_port(port); +@@ -323,6 +386,14 @@ static void stm32_usart_tx_interrupt_disable(struct uart_port *port) + stm32_usart_clr_bits(port, ofs->cr1, USART_CR1_TXEIE); + } + ++static void stm32_usart_tc_interrupt_disable(struct uart_port *port) ++{ ++ struct stm32_port *stm32_port = to_stm32_port(port); ++ const struct stm32_usart_offsets *ofs = &stm32_port->info->ofs; ++ ++ stm32_usart_clr_bits(port, ofs->cr1, USART_CR1_TCIE); ++} ++ + static void stm32_usart_transmit_chars_pio(struct uart_port *port) + { + struct stm32_port *stm32_port = to_stm32_port(port); +@@ -421,10 +492,29 @@ static void stm32_usart_transmit_chars(struct uart_port *port) struct stm32_port *stm32_port = to_stm32_port(port); const struct stm32_usart_offsets *ofs = &stm32_port->info->ofs; struct circ_buf *xmit = &port->state->xmit; + u32 isr; + int ret; ++ ++ if (!stm32_port->hw_flow_control && ++ port->rs485.flags & SER_RS485_ENABLED) { ++ stm32_port->txdone = false; ++ stm32_usart_tc_interrupt_disable(port); ++ stm32_usart_rs485_rts_enable(port); ++ } if (port->x_char) { if (stm32_port->tx_dma_busy) @@ -282566,16 +337730,103 @@ index 8f032e77b954a..fc166cc2c856d 100644 writel_relaxed(port->x_char, port->membase + ofs->tdr); port->x_char = 0; port->icount.tx++; -@@ -575,7 +589,7 @@ static void stm32_usart_start_tx(struct uart_port *port) - struct serial_rs485 *rs485conf = &port->rs485; +@@ -451,8 +541,14 @@ static void stm32_usart_transmit_chars(struct uart_port *port) + if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) + uart_write_wakeup(port); + +- if (uart_circ_empty(xmit)) ++ if (uart_circ_empty(xmit)) { + stm32_usart_tx_interrupt_disable(port); ++ if (!stm32_port->hw_flow_control && ++ port->rs485.flags & SER_RS485_ENABLED) { ++ stm32_port->txdone = true; ++ stm32_usart_tc_interrupt_enable(port); ++ } ++ } + } + + static irqreturn_t stm32_usart_interrupt(int irq, void *ptr) +@@ -465,6 +561,13 @@ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr) + + sr = readl_relaxed(port->membase + ofs->isr); + ++ if (!stm32_port->hw_flow_control && ++ port->rs485.flags & SER_RS485_ENABLED && ++ (sr & USART_SR_TC)) { ++ stm32_usart_tc_interrupt_disable(port); ++ stm32_usart_rs485_rts_disable(port); ++ } ++ + if ((sr & USART_SR_RTOF) && ofs->icr != UNDEF_REG) + writel_relaxed(USART_ICR_RTOCF, + port->membase + ofs->icr); +@@ -504,17 +607,6 @@ static irqreturn_t stm32_usart_threaded_interrupt(int irq, void *ptr) + return IRQ_HANDLED; + } + +-static unsigned int stm32_usart_tx_empty(struct uart_port *port) +-{ +- struct stm32_port *stm32_port = to_stm32_port(port); +- const struct stm32_usart_offsets *ofs = &stm32_port->info->ofs; +- +- if (readl_relaxed(port->membase + ofs->isr) & USART_SR_TC) +- return TIOCSER_TEMT; +- +- return 0; +-} +- + static void stm32_usart_set_mctrl(struct uart_port *port, unsigned int mctrl) + { + struct stm32_port *stm32_port = to_stm32_port(port); +@@ -552,42 +644,23 @@ static void stm32_usart_disable_ms(struct uart_port *port) + /* Transmit stop */ + static void stm32_usart_stop_tx(struct uart_port *port) + { +- struct stm32_port *stm32_port = to_stm32_port(port); +- struct serial_rs485 *rs485conf = &port->rs485; +- + stm32_usart_tx_interrupt_disable(port); + +- if (rs485conf->flags & SER_RS485_ENABLED) { +- if (rs485conf->flags & SER_RS485_RTS_ON_SEND) { +- mctrl_gpio_set(stm32_port->gpios, +- stm32_port->port.mctrl & ~TIOCM_RTS); +- } else { +- mctrl_gpio_set(stm32_port->gpios, +- stm32_port->port.mctrl | TIOCM_RTS); +- } +- } ++ stm32_usart_rs485_rts_disable(port); + } + + /* There are probably characters waiting to be transmitted. */ + static void stm32_usart_start_tx(struct uart_port *port) + { +- struct stm32_port *stm32_port = to_stm32_port(port); +- struct serial_rs485 *rs485conf = &port->rs485; struct circ_buf *xmit = &port->state->xmit; - if (uart_circ_empty(xmit)) -+ if (uart_circ_empty(xmit) && !port->x_char) ++ if (uart_circ_empty(xmit) && !port->x_char) { ++ stm32_usart_rs485_rts_disable(port); return; +- +- if (rs485conf->flags & SER_RS485_ENABLED) { +- if (rs485conf->flags & SER_RS485_RTS_ON_SEND) { +- mctrl_gpio_set(stm32_port->gpios, +- stm32_port->port.mctrl | TIOCM_RTS); +- } else { +- mctrl_gpio_set(stm32_port->gpios, +- stm32_port->port.mctrl & ~TIOCM_RTS); +- } + } - if (rs485conf->flags & SER_RS485_ENABLED) { -@@ -691,6 +705,11 @@ static void stm32_usart_shutdown(struct uart_port *port) ++ stm32_usart_rs485_rts_enable(port); ++ + stm32_usart_transmit_chars(port); + } + +@@ -691,6 +764,11 @@ static void stm32_usart_shutdown(struct uart_port *port) u32 val, isr; int ret; @@ -282587,7 +337838,7 @@ index 8f032e77b954a..fc166cc2c856d 100644 /* Disable modem control interrupts */ stm32_usart_disable_ms(port); -@@ -790,13 +809,22 @@ static void stm32_usart_set_termios(struct uart_port *port, +@@ -790,13 +868,22 @@ static void stm32_usart_set_termios(struct uart_port *port, * CS8 or (CS7 + parity), 8 bits word aka [M1:M0] = 0b00 * M0 and M1 already cleared by cr1 initialization. */ @@ -282613,7 +337864,87 @@ index 8f032e77b954a..fc166cc2c856d 100644 if (ofs->rtor != UNDEF_REG && (stm32_port->rx_ch || (stm32_port->fifoen && -@@ -1385,7 +1413,6 @@ static int stm32_usart_serial_remove(struct platform_device *pdev) +@@ -1276,22 +1363,10 @@ static int stm32_usart_serial_probe(struct platform_device *pdev) + if (!stm32port->info) + return -EINVAL; + +- ret = stm32_usart_init_port(stm32port, pdev); +- if (ret) +- return ret; +- +- if (stm32port->wakeup_src) { +- device_set_wakeup_capable(&pdev->dev, true); +- ret = dev_pm_set_wake_irq(&pdev->dev, stm32port->port.irq); +- if (ret) +- goto err_deinit_port; +- } +- + stm32port->rx_ch = dma_request_chan(&pdev->dev, "rx"); +- if (PTR_ERR(stm32port->rx_ch) == -EPROBE_DEFER) { +- ret = -EPROBE_DEFER; +- goto err_wakeirq; +- } ++ if (PTR_ERR(stm32port->rx_ch) == -EPROBE_DEFER) ++ return -EPROBE_DEFER; ++ + /* Fall back in interrupt mode for any non-deferral error */ + if (IS_ERR(stm32port->rx_ch)) + stm32port->rx_ch = NULL; +@@ -1305,6 +1380,17 @@ static int stm32_usart_serial_probe(struct platform_device *pdev) + if (IS_ERR(stm32port->tx_ch)) + stm32port->tx_ch = NULL; + ++ ret = stm32_usart_init_port(stm32port, pdev); ++ if (ret) ++ goto err_dma_tx; ++ ++ if (stm32port->wakeup_src) { ++ device_set_wakeup_capable(&pdev->dev, true); ++ ret = dev_pm_set_wake_irq(&pdev->dev, stm32port->port.irq); ++ if (ret) ++ goto err_deinit_port; ++ } ++ + if (stm32port->rx_ch && stm32_usart_of_dma_rx_probe(stm32port, pdev)) { + /* Fall back in interrupt mode */ + dma_release_channel(stm32port->rx_ch); +@@ -1341,19 +1427,11 @@ err_port: + pm_runtime_set_suspended(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); + +- if (stm32port->tx_ch) { ++ if (stm32port->tx_ch) + stm32_usart_of_dma_tx_remove(stm32port, pdev); +- dma_release_channel(stm32port->tx_ch); +- } +- + if (stm32port->rx_ch) + stm32_usart_of_dma_rx_remove(stm32port, pdev); + +-err_dma_rx: +- if (stm32port->rx_ch) +- dma_release_channel(stm32port->rx_ch); +- +-err_wakeirq: + if (stm32port->wakeup_src) + dev_pm_clear_wake_irq(&pdev->dev); + +@@ -1363,6 +1441,14 @@ err_deinit_port: + + stm32_usart_deinit_port(stm32port); + ++err_dma_tx: ++ if (stm32port->tx_ch) ++ dma_release_channel(stm32port->tx_ch); ++ ++err_dma_rx: ++ if (stm32port->rx_ch) ++ dma_release_channel(stm32port->rx_ch); ++ + return ret; + } + +@@ -1385,7 +1471,6 @@ static int stm32_usart_serial_remove(struct platform_device *pdev) stm32_usart_clr_bits(port, ofs->cr3, USART_CR3_DMAR); if (stm32_port->tx_ch) { @@ -282621,6 +337952,37 @@ index 8f032e77b954a..fc166cc2c856d 100644 stm32_usart_of_dma_tx_remove(stm32_port, pdev); dma_release_channel(stm32_port->tx_ch); } +diff --git a/drivers/tty/serial/stm32-usart.h b/drivers/tty/serial/stm32-usart.h +index 07ac291328cda..ad6335155de2d 100644 +--- a/drivers/tty/serial/stm32-usart.h ++++ b/drivers/tty/serial/stm32-usart.h +@@ -267,6 +267,7 @@ struct stm32_port { + bool hw_flow_control; + bool swap; /* swap RX & TX pins */ + bool fifoen; ++ bool txdone; + int rxftcfg; /* RX FIFO threshold CFG */ + int txftcfg; /* TX FIFO threshold CFG */ + bool wakeup_src; +diff --git a/drivers/tty/serial/sunsab.c b/drivers/tty/serial/sunsab.c +index 92e5726340090..ac7cb80e4d6bd 100644 +--- a/drivers/tty/serial/sunsab.c ++++ b/drivers/tty/serial/sunsab.c +@@ -1137,7 +1137,13 @@ static int __init sunsab_init(void) + } + } + +- return platform_driver_register(&sab_driver); ++ err = platform_driver_register(&sab_driver); ++ if (err) { ++ kfree(sunsab_ports); ++ sunsab_ports = NULL; ++ } ++ ++ return err; + } + + static void __exit sunsab_exit(void) diff --git a/drivers/tty/serial/tegra-tcu.c b/drivers/tty/serial/tegra-tcu.c index 4877c54c613d1..889b701ba7c62 100644 --- a/drivers/tty/serial/tegra-tcu.c @@ -282885,6 +338247,34 @@ index c7fbbcdcc3461..3700cd057f273 100644 } static void fn_scroll_forw(struct vc_data *vc) +diff --git a/drivers/tty/vt/vc_screen.c b/drivers/tty/vt/vc_screen.c +index 1850bacdb5b0e..71e091f879f0e 100644 +--- a/drivers/tty/vt/vc_screen.c ++++ b/drivers/tty/vt/vc_screen.c +@@ -386,10 +386,6 @@ vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) + + uni_mode = use_unicode(inode); + attr = use_attributes(inode); +- ret = -ENXIO; +- vc = vcs_vc(inode, &viewed); +- if (!vc) +- goto unlock_out; + + ret = -EINVAL; + if (pos < 0) +@@ -407,6 +403,12 @@ vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) + unsigned int this_round, skip = 0; + int size; + ++ vc = vcs_vc(inode, &viewed); ++ if (!vc) { ++ ret = -ENXIO; ++ break; ++ } ++ + /* Check whether we are above size each round, + * as copy_to_user at the end of this loop + * could sleep. diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 7359c3e80d63e..b8f5bc19416d9 100644 --- a/drivers/tty/vt/vt.c @@ -282966,8 +338356,49 @@ index 3639bb6dc372e..58013698635f0 100644 console_lock(); ret = vc_allocate(arg); console_unlock(); +diff --git a/drivers/uio/uio_dmem_genirq.c b/drivers/uio/uio_dmem_genirq.c +index 6b5cfa5b06733..28be820b546e9 100644 +--- a/drivers/uio/uio_dmem_genirq.c ++++ b/drivers/uio/uio_dmem_genirq.c +@@ -110,8 +110,10 @@ static irqreturn_t uio_dmem_genirq_handler(int irq, struct uio_info *dev_info) + * remember the state so we can allow user space to enable it later. + */ + ++ spin_lock(&priv->lock); + if (!test_and_set_bit(0, &priv->flags)) + disable_irq_nosync(irq); ++ spin_unlock(&priv->lock); + + return IRQ_HANDLED; + } +@@ -125,20 +127,19 @@ static int uio_dmem_genirq_irqcontrol(struct uio_info *dev_info, s32 irq_on) + * in the interrupt controller, but keep track of the + * state to prevent per-irq depth damage. + * +- * Serialize this operation to support multiple tasks. ++ * Serialize this operation to support multiple tasks and concurrency ++ * with irq handler on SMP systems. + */ + + spin_lock_irqsave(&priv->lock, flags); + if (irq_on) { + if (test_and_clear_bit(0, &priv->flags)) + enable_irq(dev_info->irq); +- spin_unlock_irqrestore(&priv->lock, flags); + } else { +- if (!test_and_set_bit(0, &priv->flags)) { +- spin_unlock_irqrestore(&priv->lock, flags); +- disable_irq(dev_info->irq); +- } ++ if (!test_and_set_bit(0, &priv->flags)) ++ disable_irq_nosync(dev_info->irq); + } ++ spin_unlock_irqrestore(&priv->lock, flags); + + return 0; + } diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c -index 1f3b4a1422126..1802f6818e632 100644 +index 1f3b4a1422126..924c2793c7327 100644 --- a/drivers/usb/cdns3/cdns3-gadget.c +++ b/drivers/usb/cdns3/cdns3-gadget.c @@ -220,7 +220,7 @@ int cdns3_allocate_trb_pool(struct cdns3_endpoint *priv_ep) @@ -283061,7 +338492,7 @@ index 1f3b4a1422126..1802f6818e632 100644 dev_dbg(priv_dev->dev, "usbss: invalid parameters\n"); return -EINVAL; } -@@ -2608,7 +2603,7 @@ int cdns3_gadget_ep_dequeue(struct usb_ep *ep, +@@ -2608,17 +2603,20 @@ int cdns3_gadget_ep_dequeue(struct usb_ep *ep, struct usb_request *request) { struct cdns3_endpoint *priv_ep = ep_to_cdns3_ep(ep); @@ -283070,7 +338501,11 @@ index 1f3b4a1422126..1802f6818e632 100644 struct usb_request *req, *req_temp; struct cdns3_request *priv_req; struct cdns3_trb *link_trb; -@@ -2619,6 +2614,8 @@ int cdns3_gadget_ep_dequeue(struct usb_ep *ep, + u8 req_on_hw_ring = 0; + unsigned long flags; + int ret = 0; ++ int val; + if (!ep || !request || !ep->desc) return -EINVAL; @@ -283079,7 +338514,32 @@ index 1f3b4a1422126..1802f6818e632 100644 spin_lock_irqsave(&priv_dev->lock, flags); priv_req = to_cdns3_request(request); -@@ -2696,6 +2693,7 @@ int __cdns3_gadget_ep_clear_halt(struct cdns3_endpoint *priv_ep) +@@ -2648,6 +2646,13 @@ found: + + /* Update ring only if removed request is on pending_req_list list */ + if (req_on_hw_ring && link_trb) { ++ /* Stop DMA */ ++ writel(EP_CMD_DFLUSH, &priv_dev->regs->ep_cmd); ++ ++ /* wait for DFLUSH cleared */ ++ readl_poll_timeout_atomic(&priv_dev->regs->ep_cmd, val, ++ !(val & EP_CMD_DFLUSH), 1, 1000); ++ + link_trb->buffer = cpu_to_le32(TRB_BUFFER(priv_ep->trb_pool_dma + + ((priv_req->end_trb + 1) * TRB_SIZE))); + link_trb->control = cpu_to_le32((le32_to_cpu(link_trb->control) & TRB_CYCLE) | +@@ -2659,6 +2664,10 @@ found: + + cdns3_gadget_giveback(priv_ep, priv_req, -ECONNRESET); + ++ req = cdns3_next_request(&priv_ep->pending_req_list); ++ if (req) ++ cdns3_rearm_transfer(priv_ep, 1); ++ + not_found: + spin_unlock_irqrestore(&priv_dev->lock, flags); + return ret; +@@ -2696,6 +2705,7 @@ int __cdns3_gadget_ep_clear_halt(struct cdns3_endpoint *priv_ep) struct usb_request *request; struct cdns3_request *priv_req; struct cdns3_trb *trb = NULL; @@ -283087,7 +338547,7 @@ index 1f3b4a1422126..1802f6818e632 100644 int ret; int val; -@@ -2705,8 +2703,10 @@ int __cdns3_gadget_ep_clear_halt(struct cdns3_endpoint *priv_ep) +@@ -2705,8 +2715,10 @@ int __cdns3_gadget_ep_clear_halt(struct cdns3_endpoint *priv_ep) if (request) { priv_req = to_cdns3_request(request); trb = priv_req->trb; @@ -283099,7 +338559,7 @@ index 1f3b4a1422126..1802f6818e632 100644 } writel(EP_CMD_CSTALL | EP_CMD_EPRST, &priv_dev->regs->ep_cmd); -@@ -2721,7 +2721,7 @@ int __cdns3_gadget_ep_clear_halt(struct cdns3_endpoint *priv_ep) +@@ -2721,7 +2733,7 @@ int __cdns3_gadget_ep_clear_halt(struct cdns3_endpoint *priv_ep) if (request) { if (trb) @@ -283476,10 +338936,37 @@ index a8776df2d4e0c..f0ca865cce2a0 100644 } diff --git a/drivers/usb/cdns3/cdnsp-gadget.c b/drivers/usb/cdns3/cdnsp-gadget.c -index 27df0c6978978..e85bf768c66da 100644 +index 27df0c6978978..068ccbd144b24 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.c +++ b/drivers/usb/cdns3/cdnsp-gadget.c -@@ -1541,15 +1541,27 @@ static int cdnsp_gadget_pullup(struct usb_gadget *gadget, int is_on) +@@ -600,11 +600,11 @@ int cdnsp_halt_endpoint(struct cdnsp_device *pdev, + + trace_cdnsp_ep_halt(value ? "Set" : "Clear"); + +- if (value) { +- ret = cdnsp_cmd_stop_ep(pdev, pep); +- if (ret) +- return ret; ++ ret = cdnsp_cmd_stop_ep(pdev, pep); ++ if (ret) ++ return ret; + ++ if (value) { + if (GET_EP_CTX_STATE(pep->out_ctx) == EP_STATE_STOPPED) { + cdnsp_queue_halt_endpoint(pdev, pep->idx); + cdnsp_ring_cmd_db(pdev); +@@ -613,10 +613,6 @@ int cdnsp_halt_endpoint(struct cdnsp_device *pdev, + + pep->ep_state |= EP_HALTED; + } else { +- /* +- * In device mode driver can call reset endpoint command +- * from any endpoint state. +- */ + cdnsp_queue_reset_ep(pdev, pep->idx); + cdnsp_ring_cmd_db(pdev); + ret = cdnsp_wait_for_cmd_compl(pdev); +@@ -1541,15 +1537,27 @@ static int cdnsp_gadget_pullup(struct usb_gadget *gadget, int is_on) { struct cdnsp_device *pdev = gadget_to_cdnsp(gadget); struct cdns *cdns = dev_get_drvdata(pdev->dev); @@ -283522,7 +339009,7 @@ index ad9aee3f1e398..97866bfb2da9d 100644 /* Fill the endpoint context */ diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c -index 1b1438457fb04..794e413800ae8 100644 +index 1b1438457fb04..b23e543b3a3d5 100644 --- a/drivers/usb/cdns3/cdnsp-ring.c +++ b/drivers/usb/cdns3/cdnsp-ring.c @@ -1029,6 +1029,8 @@ static void cdnsp_process_ctrl_td(struct cdnsp_device *pdev, @@ -283550,7 +339037,24 @@ index 1b1438457fb04..794e413800ae8 100644 spin_unlock_irqrestore(&pdev->lock, flags); return IRQ_HANDLED; } -@@ -1932,13 +1941,16 @@ int cdnsp_queue_bulk_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq) +@@ -1754,10 +1763,15 @@ static u32 cdnsp_td_remainder(struct cdnsp_device *pdev, + int trb_buff_len, + unsigned int td_total_len, + struct cdnsp_request *preq, +- bool more_trbs_coming) ++ bool more_trbs_coming, ++ bool zlp) + { + u32 maxp, total_packet_count; + ++ /* Before ZLP driver needs set TD_SIZE = 1. */ ++ if (zlp) ++ return 1; ++ + /* One TRB with a zero-length data packet. */ + if (!more_trbs_coming || (transferred == 0 && trb_buff_len == 0) || + trb_buff_len == td_total_len) +@@ -1932,13 +1946,16 @@ int cdnsp_queue_bulk_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq) } if (enqd_len + trb_buff_len >= full_len) { @@ -283574,7 +339078,14 @@ index 1b1438457fb04..794e413800ae8 100644 } /* Only set interrupt on short packet for OUT endpoints. */ -@@ -1953,7 +1965,7 @@ int cdnsp_queue_bulk_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq) +@@ -1948,12 +1965,13 @@ int cdnsp_queue_bulk_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq) + /* Set the TRB length, TD size, and interrupter fields. */ + remainder = cdnsp_td_remainder(pdev, enqd_len, trb_buff_len, + full_len, preq, +- more_trbs_coming); ++ more_trbs_coming, ++ zero_len_trb); + length_field = TRB_LEN(trb_buff_len) | TRB_TD_SIZE(remainder) | TRB_INTR_TARGET(0); @@ -283583,6 +339094,102 @@ index 1b1438457fb04..794e413800ae8 100644 lower_32_bits(send_addr), upper_32_bits(send_addr), length_field, +@@ -1988,10 +2006,11 @@ int cdnsp_queue_bulk_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq) + + int cdnsp_queue_ctrl_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq) + { +- u32 field, length_field, remainder; ++ u32 field, length_field, zlp = 0; + struct cdnsp_ep *pep = preq->pep; + struct cdnsp_ring *ep_ring; + int num_trbs; ++ u32 maxp; + int ret; + + ep_ring = cdnsp_request_to_transfer_ring(pdev, preq); +@@ -2001,26 +2020,33 @@ int cdnsp_queue_ctrl_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq) + /* 1 TRB for data, 1 for status */ + num_trbs = (pdev->three_stage_setup) ? 2 : 1; + ++ maxp = usb_endpoint_maxp(pep->endpoint.desc); ++ ++ if (preq->request.zero && preq->request.length && ++ (preq->request.length % maxp == 0)) { ++ num_trbs++; ++ zlp = 1; ++ } ++ + ret = cdnsp_prepare_transfer(pdev, preq, num_trbs); + if (ret) + return ret; + + /* If there's data, queue data TRBs */ +- if (pdev->ep0_expect_in) +- field = TRB_TYPE(TRB_DATA) | TRB_IOC; +- else +- field = TRB_ISP | TRB_TYPE(TRB_DATA) | TRB_IOC; +- + if (preq->request.length > 0) { +- remainder = cdnsp_td_remainder(pdev, 0, preq->request.length, +- preq->request.length, preq, 1); ++ field = TRB_TYPE(TRB_DATA); + +- length_field = TRB_LEN(preq->request.length) | +- TRB_TD_SIZE(remainder) | TRB_INTR_TARGET(0); ++ if (zlp) ++ field |= TRB_CHAIN; ++ else ++ field |= TRB_IOC | (pdev->ep0_expect_in ? 0 : TRB_ISP); + + if (pdev->ep0_expect_in) + field |= TRB_DIR_IN; + ++ length_field = TRB_LEN(preq->request.length) | ++ TRB_TD_SIZE(zlp) | TRB_INTR_TARGET(0); ++ + cdnsp_queue_trb(pdev, ep_ring, true, + lower_32_bits(preq->request.dma), + upper_32_bits(preq->request.dma), length_field, +@@ -2028,6 +2054,20 @@ int cdnsp_queue_ctrl_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq) + TRB_SETUPID(pdev->setup_id) | + pdev->setup_speed); + ++ if (zlp) { ++ field = TRB_TYPE(TRB_NORMAL) | TRB_IOC; ++ ++ if (!pdev->ep0_expect_in) ++ field = TRB_ISP; ++ ++ cdnsp_queue_trb(pdev, ep_ring, true, ++ lower_32_bits(preq->request.dma), ++ upper_32_bits(preq->request.dma), 0, ++ field | ep_ring->cycle_state | ++ TRB_SETUPID(pdev->setup_id) | ++ pdev->setup_speed); ++ } ++ + pdev->ep0_stage = CDNSP_DATA_STAGE; + } + +@@ -2064,7 +2104,8 @@ int cdnsp_cmd_stop_ep(struct cdnsp_device *pdev, struct cdnsp_ep *pep) + u32 ep_state = GET_EP_CTX_STATE(pep->out_ctx); + int ret = 0; + +- if (ep_state == EP_STATE_STOPPED || ep_state == EP_STATE_DISABLED) { ++ if (ep_state == EP_STATE_STOPPED || ep_state == EP_STATE_DISABLED || ++ ep_state == EP_STATE_HALTED) { + trace_cdnsp_ep_stopped_or_disabled(pep->out_ctx); + goto ep_stopped; + } +@@ -2213,7 +2254,7 @@ static int cdnsp_queue_isoc_tx(struct cdnsp_device *pdev, + /* Set the TRB length, TD size, & interrupter fields. */ + remainder = cdnsp_td_remainder(pdev, running_total, + trb_buff_len, td_len, preq, +- more_trbs_coming); ++ more_trbs_coming, 0); + + length_field = TRB_LEN(trb_buff_len) | TRB_INTR_TARGET(0); + diff --git a/drivers/usb/cdns3/cdnsp-trace.h b/drivers/usb/cdns3/cdnsp-trace.h index 6a2571c6aa9ed..5983dfb996537 100644 --- a/drivers/usb/cdns3/cdnsp-trace.h @@ -283618,6 +339225,83 @@ index 55c73b1d87047..d00ff98dffabf 100644 return true; } return false; +diff --git a/drivers/usb/cdns3/host.c b/drivers/usb/cdns3/host.c +index 84dadfa726aa6..3e85b5d3cf7a1 100644 +--- a/drivers/usb/cdns3/host.c ++++ b/drivers/usb/cdns3/host.c +@@ -23,11 +23,37 @@ + #define CFG_RXDET_P3_EN BIT(15) + #define LPM_2_STB_SWITCH_EN BIT(25) + +-static int xhci_cdns3_suspend_quirk(struct usb_hcd *hcd); ++static void xhci_cdns3_plat_start(struct usb_hcd *hcd) ++{ ++ struct xhci_hcd *xhci = hcd_to_xhci(hcd); ++ u32 value; ++ ++ /* set usbcmd.EU3S */ ++ value = readl(&xhci->op_regs->command); ++ value |= CMD_PM_INDEX; ++ writel(value, &xhci->op_regs->command); ++ ++ if (hcd->regs) { ++ value = readl(hcd->regs + XECP_AUX_CTRL_REG1); ++ value |= CFG_RXDET_P3_EN; ++ writel(value, hcd->regs + XECP_AUX_CTRL_REG1); ++ ++ value = readl(hcd->regs + XECP_PORT_CAP_REG); ++ value |= LPM_2_STB_SWITCH_EN; ++ writel(value, hcd->regs + XECP_PORT_CAP_REG); ++ } ++} ++ ++static int xhci_cdns3_resume_quirk(struct usb_hcd *hcd) ++{ ++ xhci_cdns3_plat_start(hcd); ++ return 0; ++} + + static const struct xhci_plat_priv xhci_plat_cdns3_xhci = { + .quirks = XHCI_SKIP_PHY_INIT | XHCI_AVOID_BEI, +- .suspend_quirk = xhci_cdns3_suspend_quirk, ++ .plat_start = xhci_cdns3_plat_start, ++ .resume_quirk = xhci_cdns3_resume_quirk, + }; + + static int __cdns_host_init(struct cdns *cdns) +@@ -89,32 +115,6 @@ err1: + return ret; + } + +-static int xhci_cdns3_suspend_quirk(struct usb_hcd *hcd) +-{ +- struct xhci_hcd *xhci = hcd_to_xhci(hcd); +- u32 value; +- +- if (pm_runtime_status_suspended(hcd->self.controller)) +- return 0; +- +- /* set usbcmd.EU3S */ +- value = readl(&xhci->op_regs->command); +- value |= CMD_PM_INDEX; +- writel(value, &xhci->op_regs->command); +- +- if (hcd->regs) { +- value = readl(hcd->regs + XECP_AUX_CTRL_REG1); +- value |= CFG_RXDET_P3_EN; +- writel(value, hcd->regs + XECP_AUX_CTRL_REG1); +- +- value = readl(hcd->regs + XECP_PORT_CAP_REG); +- value |= LPM_2_STB_SWITCH_EN; +- writel(value, hcd->regs + XECP_PORT_CAP_REG); +- } +- +- return 0; +-} +- + static void cdns_host_exit(struct cdns *cdns) + { + kfree(cdns->xhci_plat_data); diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c index f1d100671ee6a..097142ffb1842 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.c @@ -283726,6 +339410,21 @@ index 2b18f5088ae4a..a56f06368d142 100644 } static int ci_controller_resume(struct device *dev) +diff --git a/drivers/usb/chipidea/otg_fsm.c b/drivers/usb/chipidea/otg_fsm.c +index 6ed4b00dba961..7a2a9559693fb 100644 +--- a/drivers/usb/chipidea/otg_fsm.c ++++ b/drivers/usb/chipidea/otg_fsm.c +@@ -256,8 +256,10 @@ static void ci_otg_del_timer(struct ci_hdrc *ci, enum otg_fsm_timer t) + ci->enabled_otg_timer_bits &= ~(1 << t); + if (ci->next_otg_timer == t) { + if (ci->enabled_otg_timer_bits == 0) { ++ spin_unlock_irqrestore(&ci->lock, flags); + /* No enabled timers after delete it */ + hrtimer_cancel(&ci->otg_fsm_hrtimer); ++ spin_lock_irqsave(&ci->lock, flags); + ci->next_otg_timer = NUM_OTG_FSM_TIMERS; + } else { + /* Find the next timer */ diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index 8834ca6137219..aacc37736db6e 100644 --- a/drivers/usb/chipidea/udc.c @@ -284415,10 +340114,20 @@ index 7ee6e4cc0d89e..6c5934dbe9b3f 100644 /* enable irqs just before we start the controller, diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c -index 86658a81d2844..98bdae4ac314e 100644 +index 86658a81d2844..2295d69b4cd2d 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c -@@ -1110,7 +1110,10 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) +@@ -43,6 +43,9 @@ + #define USB_PRODUCT_USB5534B 0x5534 + #define USB_VENDOR_CYPRESS 0x04b4 + #define USB_PRODUCT_CY7C65632 0x6570 ++#define USB_VENDOR_TEXAS_INSTRUMENTS 0x0451 ++#define USB_PRODUCT_TUSB8041_USB3 0x8140 ++#define USB_PRODUCT_TUSB8041_USB2 0x8142 + #define HUB_QUIRK_CHECK_PORT_AUTOSUSPEND 0x01 + #define HUB_QUIRK_DISABLE_AUTOSUSPEND 0x02 + +@@ -1110,7 +1113,10 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) } else { hub_power_on(hub, true); } @@ -284430,7 +340139,7 @@ index 86658a81d2844..98bdae4ac314e 100644 init2: /* -@@ -1225,7 +1228,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) +@@ -1225,7 +1231,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) */ if (portchange || (hub_is_superspeed(hub->hdev) && port_resumed)) @@ -284439,7 +340148,19 @@ index 86658a81d2844..98bdae4ac314e 100644 } else if (udev->persist_enabled) { #ifdef CONFIG_PM -@@ -4700,8 +4703,6 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, +@@ -2376,9 +2382,8 @@ static int usb_enumerate_device_otg(struct usb_device *udev) + * usb_enumerate_device - Read device configs/intfs/otg (usbcore-internal) + * @udev: newly addressed device (in ADDRESS state) + * +- * This is only called by usb_new_device() and usb_authorize_device() +- * and FIXME -- all comments that apply to them apply here wrt to +- * environment. ++ * This is only called by usb_new_device() -- all comments that apply there ++ * apply here wrt to environment. + * + * If the device is WUSB and not authorized, we don't attempt to read + * the string descriptors, as they will be errored out by the device +@@ -4700,8 +4705,6 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, if (oldspeed == USB_SPEED_LOW) delay = HUB_LONG_RESET_TIME; @@ -284448,7 +340169,7 @@ index 86658a81d2844..98bdae4ac314e 100644 /* Reset the device; full speed may morph to high speed */ /* FIXME a USB 2.0 device may morph into SuperSpeed on reset. */ retval = hub_port_reset(hub, port1, udev, delay, false); -@@ -5016,7 +5017,6 @@ fail: +@@ -5016,7 +5019,6 @@ fail: hub_port_disable(hub, port1, 0); update_devnum(udev, devnum); /* for disconnect processing */ } @@ -284456,7 +340177,7 @@ index 86658a81d2844..98bdae4ac314e 100644 return retval; } -@@ -5191,6 +5191,7 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, +@@ -5191,6 +5193,7 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, struct usb_port *port_dev = hub->ports[port1 - 1]; struct usb_device *udev = port_dev->child; static int unreliable_port = -1; @@ -284464,7 +340185,7 @@ index 86658a81d2844..98bdae4ac314e 100644 /* Disconnect any existing devices under this port */ if (udev) { -@@ -5246,8 +5247,11 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, +@@ -5246,8 +5249,11 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, unit_load = 100; status = 0; @@ -284477,7 +340198,7 @@ index 86658a81d2844..98bdae4ac314e 100644 /* reallocate for each attempt, since references * to the previous one can escape in various ways */ -@@ -5255,6 +5259,8 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, +@@ -5255,6 +5261,8 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, if (!udev) { dev_err(&port_dev->dev, "couldn't allocate usb_device\n"); @@ -284486,7 +340207,7 @@ index 86658a81d2844..98bdae4ac314e 100644 goto done; } -@@ -5276,12 +5282,14 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, +@@ -5276,12 +5284,14 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, } /* reset (non-USB 3.0 devices) and get descriptor */ @@ -284503,7 +340224,7 @@ index 86658a81d2844..98bdae4ac314e 100644 if (udev->quirks & USB_QUIRK_DELAY_INIT) msleep(2000); -@@ -5374,6 +5382,10 @@ loop: +@@ -5374,6 +5384,10 @@ loop: usb_ep0_reinit(udev); release_devnum(udev); hub_free_dev(udev); @@ -284514,7 +340235,24 @@ index 86658a81d2844..98bdae4ac314e 100644 usb_put_dev(udev); if ((status == -ENOTCONN) || (status == -ENOTSUPP)) break; -@@ -5915,6 +5927,8 @@ static int usb_reset_and_verify_device(struct usb_device *udev) +@@ -5779,6 +5793,16 @@ static const struct usb_device_id hub_id_table[] = { + .idVendor = USB_VENDOR_GENESYS_LOGIC, + .bInterfaceClass = USB_CLASS_HUB, + .driver_info = HUB_QUIRK_CHECK_PORT_AUTOSUSPEND}, ++ { .match_flags = USB_DEVICE_ID_MATCH_VENDOR ++ | USB_DEVICE_ID_MATCH_PRODUCT, ++ .idVendor = USB_VENDOR_TEXAS_INSTRUMENTS, ++ .idProduct = USB_PRODUCT_TUSB8041_USB2, ++ .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND}, ++ { .match_flags = USB_DEVICE_ID_MATCH_VENDOR ++ | USB_DEVICE_ID_MATCH_PRODUCT, ++ .idVendor = USB_VENDOR_TEXAS_INSTRUMENTS, ++ .idProduct = USB_PRODUCT_TUSB8041_USB3, ++ .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND}, + { .match_flags = USB_DEVICE_ID_MATCH_DEV_CLASS, + .bDeviceClass = USB_CLASS_HUB}, + { .match_flags = USB_DEVICE_ID_MATCH_INT_CLASS, +@@ -5915,6 +5939,8 @@ static int usb_reset_and_verify_device(struct usb_device *udev) bos = udev->bos; udev->bos = NULL; @@ -284523,7 +340261,7 @@ index 86658a81d2844..98bdae4ac314e 100644 for (i = 0; i < PORT_INIT_TRIES; ++i) { /* ep0 maxpacket size may change; let the HCD know about it. -@@ -5924,6 +5938,7 @@ static int usb_reset_and_verify_device(struct usb_device *udev) +@@ -5924,6 +5950,7 @@ static int usb_reset_and_verify_device(struct usb_device *udev) if (ret >= 0 || ret == -ENOTCONN || ret == -ENODEV) break; } @@ -284531,7 +340269,7 @@ index 86658a81d2844..98bdae4ac314e 100644 if (ret < 0) goto re_enumerate; -@@ -6028,6 +6043,11 @@ re_enumerate_no_bos: +@@ -6028,6 +6055,11 @@ re_enumerate_no_bos: * the reset is over (using their post_reset method). * * Return: The same as for usb_reset_and_verify_device(). @@ -284543,7 +340281,7 @@ index 86658a81d2844..98bdae4ac314e 100644 * * Note: * The caller must own the device lock. For example, it's safe to use -@@ -6061,6 +6081,10 @@ int usb_reset_device(struct usb_device *udev) +@@ -6061,6 +6093,10 @@ int usb_reset_device(struct usb_device *udev) return -EISDIR; } @@ -284554,7 +340292,7 @@ index 86658a81d2844..98bdae4ac314e 100644 port_dev = hub->ports[udev->portnum - 1]; /* -@@ -6125,6 +6149,7 @@ int usb_reset_device(struct usb_device *udev) +@@ -6125,6 +6161,7 @@ int usb_reset_device(struct usb_device *udev) usb_autosuspend_device(udev); memalloc_noio_restore(noio_flag); @@ -284563,10 +340301,36 @@ index 86658a81d2844..98bdae4ac314e 100644 } EXPORT_SYMBOL_GPL(usb_reset_device); diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c -index 8239fe7129dd7..999b7c9697fcd 100644 +index 8239fe7129dd7..934b3d997702e 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c -@@ -404,6 +404,9 @@ static const struct usb_device_id usb_quirk_list[] = { +@@ -362,6 +362,9 @@ static const struct usb_device_id usb_quirk_list[] = { + { USB_DEVICE(0x0781, 0x5583), .driver_info = USB_QUIRK_NO_LPM }, + { USB_DEVICE(0x0781, 0x5591), .driver_info = USB_QUIRK_NO_LPM }, + ++ /* Realforce 87U Keyboard */ ++ { USB_DEVICE(0x0853, 0x011b), .driver_info = USB_QUIRK_NO_LPM }, ++ + /* M-Systems Flash Disk Pioneers */ + { USB_DEVICE(0x08ec, 0x1000), .driver_info = USB_QUIRK_RESET_RESUME }, + +@@ -388,6 +391,15 @@ static const struct usb_device_id usb_quirk_list[] = { + /* Kingston DataTraveler 3.0 */ + { USB_DEVICE(0x0951, 0x1666), .driver_info = USB_QUIRK_NO_LPM }, + ++ /* NVIDIA Jetson devices in Force Recovery mode */ ++ { USB_DEVICE(0x0955, 0x7018), .driver_info = USB_QUIRK_RESET_RESUME }, ++ { USB_DEVICE(0x0955, 0x7019), .driver_info = USB_QUIRK_RESET_RESUME }, ++ { USB_DEVICE(0x0955, 0x7418), .driver_info = USB_QUIRK_RESET_RESUME }, ++ { USB_DEVICE(0x0955, 0x7721), .driver_info = USB_QUIRK_RESET_RESUME }, ++ { USB_DEVICE(0x0955, 0x7c18), .driver_info = USB_QUIRK_RESET_RESUME }, ++ { USB_DEVICE(0x0955, 0x7e19), .driver_info = USB_QUIRK_RESET_RESUME }, ++ { USB_DEVICE(0x0955, 0x7f21), .driver_info = USB_QUIRK_RESET_RESUME }, ++ + /* X-Rite/Gretag-Macbeth Eye-One Pro display colorimeter */ + { USB_DEVICE(0x0971, 0x2000), .driver_info = USB_QUIRK_NO_SET_INTF }, + +@@ -404,6 +416,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x0b05, 0x17e0), .driver_info = USB_QUIRK_IGNORE_REMOTE_WAKEUP }, @@ -284576,7 +340340,7 @@ index 8239fe7129dd7..999b7c9697fcd 100644 /* Realtek hub in Dell WD19 (Type-C) */ { USB_DEVICE(0x0bda, 0x0487), .driver_info = USB_QUIRK_NO_LPM }, -@@ -434,6 +437,16 @@ static const struct usb_device_id usb_quirk_list[] = { +@@ -434,6 +449,16 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x1532, 0x0116), .driver_info = USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL }, @@ -284593,10 +340357,13 @@ index 8239fe7129dd7..999b7c9697fcd 100644 /* Lenovo ThinkCenter A630Z TI024Gen3 usb-audio */ { USB_DEVICE(0x17ef, 0xa012), .driver_info = USB_QUIRK_DISCONNECT_SUSPEND }, -@@ -501,6 +514,12 @@ static const struct usb_device_id usb_quirk_list[] = { +@@ -501,6 +526,15 @@ static const struct usb_device_id usb_quirk_list[] = { /* DJI CineSSD */ { USB_DEVICE(0x2ca3, 0x0031), .driver_info = USB_QUIRK_NO_LPM }, ++ /* Alcor Link AK9563 SC Reader used in 2022 Lenovo ThinkPads */ ++ { USB_DEVICE(0x2ce3, 0x9563), .driver_info = USB_QUIRK_NO_LPM }, ++ + /* DELL USB GEN2 */ + { USB_DEVICE(0x413c, 0xb062), .driver_info = USB_QUIRK_NO_LPM | USB_QUIRK_RESET_RESUME }, + @@ -284606,6 +340373,30 @@ index 8239fe7129dd7..999b7c9697fcd 100644 /* INTEL VALUE SSD */ { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME }, +diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c +index fa2e49d432ff6..60ee0469d86e7 100644 +--- a/drivers/usb/core/sysfs.c ++++ b/drivers/usb/core/sysfs.c +@@ -868,11 +868,7 @@ read_descriptors(struct file *filp, struct kobject *kobj, + size_t srclen, n; + int cfgno; + void *src; +- int retval; + +- retval = usb_lock_device_interruptible(udev); +- if (retval < 0) +- return -EINTR; + /* The binary attribute begins with the device descriptor. + * Following that are the raw descriptor entries for all the + * configurations (config plus subsidiary descriptors). +@@ -897,7 +893,6 @@ read_descriptors(struct file *filp, struct kobject *kobj, + off -= srclen; + } + } +- usb_unlock_device(udev); + return count - nleft; + } + diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c index 30727729a44cc..33d62d7e3929f 100644 --- a/drivers/usb/core/urb.c @@ -284636,6 +340427,82 @@ index 30727729a44cc..33d62d7e3929f 100644 if (!urb->dev || !urb->ep) return; +diff --git a/drivers/usb/core/usb-acpi.c b/drivers/usb/core/usb-acpi.c +index 50b2fc7fcc0e3..8751276ef5789 100644 +--- a/drivers/usb/core/usb-acpi.c ++++ b/drivers/usb/core/usb-acpi.c +@@ -37,6 +37,71 @@ bool usb_acpi_power_manageable(struct usb_device *hdev, int index) + } + EXPORT_SYMBOL_GPL(usb_acpi_power_manageable); + ++#define UUID_USB_CONTROLLER_DSM "ce2ee385-00e6-48cb-9f05-2edb927c4899" ++#define USB_DSM_DISABLE_U1_U2_FOR_PORT 5 ++ ++/** ++ * usb_acpi_port_lpm_incapable - check if lpm should be disabled for a port. ++ * @hdev: USB device belonging to the usb hub ++ * @index: zero based port index ++ * ++ * Some USB3 ports may not support USB3 link power management U1/U2 states ++ * due to different retimer setup. ACPI provides _DSM method which returns 0x01 ++ * if U1 and U2 states should be disabled. Evaluate _DSM with: ++ * Arg0: UUID = ce2ee385-00e6-48cb-9f05-2edb927c4899 ++ * Arg1: Revision ID = 0 ++ * Arg2: Function Index = 5 ++ * Arg3: (empty) ++ * ++ * Return 1 if USB3 port is LPM incapable, negative on error, otherwise 0 ++ */ ++ ++int usb_acpi_port_lpm_incapable(struct usb_device *hdev, int index) ++{ ++ union acpi_object *obj; ++ acpi_handle port_handle; ++ int port1 = index + 1; ++ guid_t guid; ++ int ret; ++ ++ ret = guid_parse(UUID_USB_CONTROLLER_DSM, &guid); ++ if (ret) ++ return ret; ++ ++ port_handle = usb_get_hub_port_acpi_handle(hdev, port1); ++ if (!port_handle) { ++ dev_dbg(&hdev->dev, "port-%d no acpi handle\n", port1); ++ return -ENODEV; ++ } ++ ++ if (!acpi_check_dsm(port_handle, &guid, 0, ++ BIT(USB_DSM_DISABLE_U1_U2_FOR_PORT))) { ++ dev_dbg(&hdev->dev, "port-%d no _DSM function %d\n", ++ port1, USB_DSM_DISABLE_U1_U2_FOR_PORT); ++ return -ENODEV; ++ } ++ ++ obj = acpi_evaluate_dsm(port_handle, &guid, 0, ++ USB_DSM_DISABLE_U1_U2_FOR_PORT, NULL); ++ ++ if (!obj) ++ return -ENODEV; ++ ++ if (obj->type != ACPI_TYPE_INTEGER) { ++ dev_dbg(&hdev->dev, "evaluate port-%d _DSM failed\n", port1); ++ ACPI_FREE(obj); ++ return -EINVAL; ++ } ++ ++ if (obj->integer.value == 0x01) ++ ret = 1; ++ ++ ACPI_FREE(obj); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(usb_acpi_port_lpm_incapable); ++ + /** + * usb_acpi_set_power_state - control usb port's power via acpi power + * resource diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h index cb9059a8444b5..71e62b3081dbb 100644 --- a/drivers/usb/dwc2/core.h @@ -284920,10 +340787,10 @@ index c8f18f3ba9e35..265d437ca0f11 100644 retval = dwc2_drd_init(hsotg); diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c -index 0104a80b185e1..a2f3e56aba05c 100644 +index 0104a80b185e1..7258e640e9ee1 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c -@@ -114,8 +114,6 @@ void dwc3_set_prtcap(struct dwc3 *dwc, u32 mode) +@@ -114,29 +114,31 @@ void dwc3_set_prtcap(struct dwc3 *dwc, u32 mode) dwc->current_dr_role = mode; } @@ -284932,7 +340799,35 @@ index 0104a80b185e1..a2f3e56aba05c 100644 static void __dwc3_set_mode(struct work_struct *work) { struct dwc3 *dwc = work_to_dwc(work); -@@ -158,8 +156,13 @@ static void __dwc3_set_mode(struct work_struct *work) + unsigned long flags; + int ret; + u32 reg; ++ u32 desired_dr_role; + + mutex_lock(&dwc->mutex); ++ spin_lock_irqsave(&dwc->lock, flags); ++ desired_dr_role = dwc->desired_dr_role; ++ spin_unlock_irqrestore(&dwc->lock, flags); + + pm_runtime_get_sync(dwc->dev); + + if (dwc->current_dr_role == DWC3_GCTL_PRTCAP_OTG) + dwc3_otg_update(dwc, 0); + +- if (!dwc->desired_dr_role) ++ if (!desired_dr_role) + goto out; + +- if (dwc->desired_dr_role == dwc->current_dr_role) ++ if (desired_dr_role == dwc->current_dr_role) + goto out; + +- if (dwc->desired_dr_role == DWC3_GCTL_PRTCAP_OTG && dwc->edev) ++ if (desired_dr_role == DWC3_GCTL_PRTCAP_OTG && dwc->edev) + goto out; + + switch (dwc->current_dr_role) { +@@ -158,8 +160,13 @@ static void __dwc3_set_mode(struct work_struct *work) break; } @@ -284944,11 +340839,25 @@ index 0104a80b185e1..a2f3e56aba05c 100644 + */ + if (dwc->current_dr_role && ((DWC3_IP_IS(DWC3) || + DWC3_VER_IS_PRIOR(DWC31, 190A)) && -+ dwc->desired_dr_role != DWC3_GCTL_PRTCAP_OTG)) { ++ desired_dr_role != DWC3_GCTL_PRTCAP_OTG)) { reg = dwc3_readl(dwc->regs, DWC3_GCTL); reg |= DWC3_GCTL_CORESOFTRESET; dwc3_writel(dwc->regs, DWC3_GCTL, reg); -@@ -260,7 +263,7 @@ u32 dwc3_core_fifo_space(struct dwc3_ep *dep, u8 type) +@@ -179,11 +186,11 @@ static void __dwc3_set_mode(struct work_struct *work) + + spin_lock_irqsave(&dwc->lock, flags); + +- dwc3_set_prtcap(dwc, dwc->desired_dr_role); ++ dwc3_set_prtcap(dwc, desired_dr_role); + + spin_unlock_irqrestore(&dwc->lock, flags); + +- switch (dwc->desired_dr_role) { ++ switch (desired_dr_role) { + case DWC3_GCTL_PRTCAP_HOST: + ret = dwc3_host_init(dwc); + if (ret) { +@@ -260,7 +267,7 @@ u32 dwc3_core_fifo_space(struct dwc3_ep *dep, u8 type) * dwc3_core_soft_reset - Issues core soft reset and PHY reset * @dwc: pointer to our context structure */ @@ -284957,7 +340866,7 @@ index 0104a80b185e1..a2f3e56aba05c 100644 { u32 reg; int retries = 1000; -@@ -275,7 +278,8 @@ static int dwc3_core_soft_reset(struct dwc3 *dwc) +@@ -275,7 +282,8 @@ static int dwc3_core_soft_reset(struct dwc3 *dwc) reg = dwc3_readl(dwc->regs, DWC3_DCTL); reg |= DWC3_DCTL_CSFTRST; @@ -284967,7 +340876,7 @@ index 0104a80b185e1..a2f3e56aba05c 100644 /* * For DWC_usb31 controller 1.90a and later, the DCTL.CSFRST bit -@@ -725,15 +729,16 @@ static void dwc3_core_exit(struct dwc3 *dwc) +@@ -725,15 +733,16 @@ static void dwc3_core_exit(struct dwc3 *dwc) { dwc3_event_buffers_cleanup(dwc); @@ -284988,7 +340897,22 @@ index 0104a80b185e1..a2f3e56aba05c 100644 clk_bulk_disable_unprepare(dwc->num_clks, dwc->clks); reset_control_assert(dwc->reset); } -@@ -1036,6 +1041,21 @@ static int dwc3_core_init(struct dwc3 *dwc) +@@ -954,8 +963,13 @@ static int dwc3_core_init(struct dwc3 *dwc) + + if (!dwc->ulpi_ready) { + ret = dwc3_core_ulpi_init(dwc); +- if (ret) ++ if (ret) { ++ if (ret == -ETIMEDOUT) { ++ dwc3_core_soft_reset(dwc); ++ ret = -EPROBE_DEFER; ++ } + goto err0; ++ } + dwc->ulpi_ready = true; + } + +@@ -1036,6 +1050,21 @@ static int dwc3_core_init(struct dwc3 *dwc) dwc3_writel(dwc->regs, DWC3_GUCTL2, reg); } @@ -285010,7 +340934,7 @@ index 0104a80b185e1..a2f3e56aba05c 100644 if (!DWC3_VER_IS_PRIOR(DWC3, 250A)) { reg = dwc3_readl(dwc->regs, DWC3_GUCTL1); -@@ -1268,10 +1288,10 @@ static void dwc3_get_properties(struct dwc3 *dwc) +@@ -1268,10 +1297,10 @@ static void dwc3_get_properties(struct dwc3 *dwc) u8 lpm_nyet_threshold; u8 tx_de_emphasis; u8 hird_threshold; @@ -285025,7 +340949,7 @@ index 0104a80b185e1..a2f3e56aba05c 100644 u8 tx_fifo_resize_max_num; const char *usb_psy_name; int ret; -@@ -1378,6 +1398,8 @@ static void dwc3_get_properties(struct dwc3 *dwc) +@@ -1378,6 +1407,8 @@ static void dwc3_get_properties(struct dwc3 *dwc) "snps,dis-del-phy-power-chg-quirk"); dwc->dis_tx_ipgap_linecheck_quirk = device_property_read_bool(dev, "snps,dis-tx-ipgap-linecheck-quirk"); @@ -285034,7 +340958,7 @@ index 0104a80b185e1..a2f3e56aba05c 100644 dwc->parkmode_disable_ss_quirk = device_property_read_bool(dev, "snps,parkmode-disable-ss-quirk"); -@@ -1565,10 +1587,6 @@ static int dwc3_probe(struct platform_device *pdev) +@@ -1565,10 +1596,6 @@ static int dwc3_probe(struct platform_device *pdev) dwc3_get_properties(dwc); @@ -285045,7 +340969,7 @@ index 0104a80b185e1..a2f3e56aba05c 100644 dwc->reset = devm_reset_control_array_get_optional_shared(dev); if (IS_ERR(dwc->reset)) return PTR_ERR(dwc->reset); -@@ -1605,6 +1623,13 @@ static int dwc3_probe(struct platform_device *pdev) +@@ -1605,6 +1632,13 @@ static int dwc3_probe(struct platform_device *pdev) platform_set_drvdata(pdev, dwc); dwc3_cache_hwparams(dwc); @@ -285059,7 +340983,7 @@ index 0104a80b185e1..a2f3e56aba05c 100644 spin_lock_init(&dwc->lock); mutex_init(&dwc->mutex); -@@ -1654,16 +1679,16 @@ err5: +@@ -1654,16 +1688,16 @@ err5: dwc3_debugfs_exit(dwc); dwc3_event_buffers_cleanup(dwc); @@ -285194,6 +341118,35 @@ index d7f76835137fa..81ff21bd405a8 100644 dwc->edev_nb.notifier_call = dwc3_drd_notifier; ret = extcon_register_notifier(dwc->edev, EXTCON_USB_HOST, &dwc->edev_nb); +diff --git a/drivers/usb/dwc3/dwc3-exynos.c b/drivers/usb/dwc3/dwc3-exynos.c +index 0ecf20eeceee9..4be6a873bd071 100644 +--- a/drivers/usb/dwc3/dwc3-exynos.c ++++ b/drivers/usb/dwc3/dwc3-exynos.c +@@ -37,15 +37,6 @@ struct dwc3_exynos { + struct regulator *vdd10; + }; + +-static int dwc3_exynos_remove_child(struct device *dev, void *unused) +-{ +- struct platform_device *pdev = to_platform_device(dev); +- +- platform_device_unregister(pdev); +- +- return 0; +-} +- + static int dwc3_exynos_probe(struct platform_device *pdev) + { + struct dwc3_exynos *exynos; +@@ -142,7 +133,7 @@ static int dwc3_exynos_remove(struct platform_device *pdev) + struct dwc3_exynos *exynos = platform_get_drvdata(pdev); + int i; + +- device_for_each_child(&pdev->dev, NULL, dwc3_exynos_remove_child); ++ of_platform_depopulate(&pdev->dev); + + for (i = exynos->num_clks - 1; i >= 0; i--) + clk_disable_unprepare(exynos->clks[i]); diff --git a/drivers/usb/dwc3/dwc3-meson-g12a.c b/drivers/usb/dwc3/dwc3-meson-g12a.c index d0f9b7c296b0d..bd814df3bf8b8 100644 --- a/drivers/usb/dwc3/dwc3-meson-g12a.c @@ -285271,21 +341224,22 @@ index e196673f5c647..efaf0db595f46 100644 val = dwc3_omap_read_utmi_ctrl(omap); val |= USBOTGSS_UTMI_OTG_CTRL_IDDIG; diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c -index 7ff8fc8f79a9b..c52f7b5b5ec00 100644 +index 7ff8fc8f79a9b..46ef3ae9c0dd6 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c -@@ -43,6 +43,10 @@ +@@ -43,6 +43,11 @@ #define PCI_DEVICE_ID_INTEL_ADLP 0x51ee #define PCI_DEVICE_ID_INTEL_ADLM 0x54ee #define PCI_DEVICE_ID_INTEL_ADLS 0x7ae1 -+#define PCI_DEVICE_ID_INTEL_RPL 0x460e ++#define PCI_DEVICE_ID_INTEL_RPL 0xa70e +#define PCI_DEVICE_ID_INTEL_RPLS 0x7a61 ++#define PCI_DEVICE_ID_INTEL_MTLM 0x7eb1 +#define PCI_DEVICE_ID_INTEL_MTLP 0x7ec1 +#define PCI_DEVICE_ID_INTEL_MTL 0x7e7e #define PCI_DEVICE_ID_INTEL_TGL 0x9a15 #define PCI_DEVICE_ID_AMD_MR 0x163a -@@ -85,8 +89,8 @@ static const struct acpi_gpio_mapping acpi_dwc3_byt_gpios[] = { +@@ -85,8 +90,8 @@ static const struct acpi_gpio_mapping acpi_dwc3_byt_gpios[] = { static struct gpiod_lookup_table platform_bytcr_gpios = { .dev_id = "0000:00:16.0", .table = { @@ -285296,7 +341250,7 @@ index 7ff8fc8f79a9b..c52f7b5b5ec00 100644 {} }, }; -@@ -119,6 +123,13 @@ static const struct property_entry dwc3_pci_intel_properties[] = { +@@ -119,6 +124,13 @@ static const struct property_entry dwc3_pci_intel_properties[] = { {} }; @@ -285310,7 +341264,7 @@ index 7ff8fc8f79a9b..c52f7b5b5ec00 100644 static const struct property_entry dwc3_pci_mrfld_properties[] = { PROPERTY_ENTRY_STRING("dr_mode", "otg"), PROPERTY_ENTRY_STRING("linux,extcon-name", "mrfld_bcove_pwrsrc"), -@@ -161,6 +172,10 @@ static const struct software_node dwc3_pci_intel_swnode = { +@@ -161,6 +173,10 @@ static const struct software_node dwc3_pci_intel_swnode = { .properties = dwc3_pci_intel_properties, }; @@ -285321,7 +341275,7 @@ index 7ff8fc8f79a9b..c52f7b5b5ec00 100644 static const struct software_node dwc3_pci_intel_mrfld_swnode = { .properties = dwc3_pci_mrfld_properties, }; -@@ -173,7 +188,8 @@ static const struct software_node dwc3_pci_amd_mr_swnode = { +@@ -173,7 +189,8 @@ static const struct software_node dwc3_pci_amd_mr_swnode = { .properties = dwc3_pci_mr_properties, }; @@ -285331,7 +341285,7 @@ index 7ff8fc8f79a9b..c52f7b5b5ec00 100644 { struct pci_dev *pdev = dwc->pci; -@@ -230,7 +246,7 @@ static int dwc3_pci_quirks(struct dwc3_pci *dwc) +@@ -230,7 +247,7 @@ static int dwc3_pci_quirks(struct dwc3_pci *dwc) } } @@ -285340,7 +341294,7 @@ index 7ff8fc8f79a9b..c52f7b5b5ec00 100644 } #ifdef CONFIG_PM -@@ -241,7 +257,7 @@ static void dwc3_pci_resume_work(struct work_struct *work) +@@ -241,7 +258,7 @@ static void dwc3_pci_resume_work(struct work_struct *work) int ret; ret = pm_runtime_get_sync(&dwc3->dev); @@ -285349,7 +341303,7 @@ index 7ff8fc8f79a9b..c52f7b5b5ec00 100644 pm_runtime_put_sync_autosuspend(&dwc3->dev); return; } -@@ -295,11 +311,7 @@ static int dwc3_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) +@@ -295,11 +312,7 @@ static int dwc3_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) dwc->dwc3->dev.parent = dev; ACPI_COMPANION_SET(&dwc->dwc3->dev, ACPI_COMPANION(dev)); @@ -285362,7 +341316,7 @@ index 7ff8fc8f79a9b..c52f7b5b5ec00 100644 if (ret) goto err; -@@ -344,7 +356,7 @@ static const struct pci_device_id dwc3_pci_id_table[] = { +@@ -344,7 +357,7 @@ static const struct pci_device_id dwc3_pci_id_table[] = { (kernel_ulong_t) &dwc3_pci_intel_swnode, }, { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_BYT), @@ -285371,7 +341325,7 @@ index 7ff8fc8f79a9b..c52f7b5b5ec00 100644 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MRFLD), (kernel_ulong_t) &dwc3_pci_intel_mrfld_swnode, }, -@@ -409,6 +421,18 @@ static const struct pci_device_id dwc3_pci_id_table[] = { +@@ -409,6 +422,21 @@ static const struct pci_device_id dwc3_pci_id_table[] = { { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ADLS), (kernel_ulong_t) &dwc3_pci_intel_swnode, }, @@ -285381,6 +341335,9 @@ index 7ff8fc8f79a9b..c52f7b5b5ec00 100644 + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_RPLS), + (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + ++ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTLM), ++ (kernel_ulong_t) &dwc3_pci_intel_swnode, }, ++ + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTLP), + (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + @@ -285391,10 +341348,46 @@ index 7ff8fc8f79a9b..c52f7b5b5ec00 100644 (kernel_ulong_t) &dwc3_pci_intel_swnode, }, diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c -index 9abbd01028c5f..d0352daab0128 100644 +index 9abbd01028c5f..28bc7480acf3c 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c -@@ -296,6 +296,14 @@ static void dwc3_qcom_interconnect_exit(struct dwc3_qcom *qcom) +@@ -258,7 +258,8 @@ static int dwc3_qcom_interconnect_init(struct dwc3_qcom *qcom) + if (IS_ERR(qcom->icc_path_apps)) { + dev_err(dev, "failed to get apps-usb path: %ld\n", + PTR_ERR(qcom->icc_path_apps)); +- return PTR_ERR(qcom->icc_path_apps); ++ ret = PTR_ERR(qcom->icc_path_apps); ++ goto put_path_ddr; + } + + if (usb_get_maximum_speed(&qcom->dwc3->dev) >= USB_SPEED_SUPER || +@@ -271,17 +272,23 @@ static int dwc3_qcom_interconnect_init(struct dwc3_qcom *qcom) + + if (ret) { + dev_err(dev, "failed to set bandwidth for usb-ddr path: %d\n", ret); +- return ret; ++ goto put_path_apps; + } + + ret = icc_set_bw(qcom->icc_path_apps, + APPS_USB_AVG_BW, APPS_USB_PEAK_BW); + if (ret) { + dev_err(dev, "failed to set bandwidth for apps-usb path: %d\n", ret); +- return ret; ++ goto put_path_apps; + } + + return 0; ++ ++put_path_apps: ++ icc_put(qcom->icc_path_apps); ++put_path_ddr: ++ icc_put(qcom->icc_path_ddr); ++ return ret; + } + + /** +@@ -296,6 +303,14 @@ static void dwc3_qcom_interconnect_exit(struct dwc3_qcom *qcom) icc_put(qcom->icc_path_apps); } @@ -285409,7 +341402,7 @@ index 9abbd01028c5f..d0352daab0128 100644 static void dwc3_qcom_disable_interrupts(struct dwc3_qcom *qcom) { if (qcom->hs_phy_irq) { -@@ -411,7 +419,11 @@ static irqreturn_t qcom_dwc3_resume_irq(int irq, void *data) +@@ -411,7 +426,11 @@ static irqreturn_t qcom_dwc3_resume_irq(int irq, void *data) if (qcom->pm_suspended) return IRQ_HANDLED; @@ -285422,7 +341415,7 @@ index 9abbd01028c5f..d0352daab0128 100644 pm_runtime_resume(&dwc->xhci->dev); return IRQ_HANDLED; -@@ -443,9 +455,9 @@ static int dwc3_qcom_get_irq(struct platform_device *pdev, +@@ -443,9 +462,9 @@ static int dwc3_qcom_get_irq(struct platform_device *pdev, int ret; if (np) @@ -285434,7 +341427,7 @@ index 9abbd01028c5f..d0352daab0128 100644 return ret; } -@@ -649,7 +661,6 @@ static int dwc3_qcom_of_register_core(struct platform_device *pdev) +@@ -649,7 +668,6 @@ static int dwc3_qcom_of_register_core(struct platform_device *pdev) struct dwc3_qcom *qcom = platform_get_drvdata(pdev); struct device_node *np = pdev->dev.of_node, *dwc3_np; struct device *dev = &pdev->dev; @@ -285442,7 +341435,7 @@ index 9abbd01028c5f..d0352daab0128 100644 int ret; dwc3_np = of_get_compatible_child(np, "snps,dwc3"); -@@ -658,20 +669,6 @@ static int dwc3_qcom_of_register_core(struct platform_device *pdev) +@@ -658,20 +676,6 @@ static int dwc3_qcom_of_register_core(struct platform_device *pdev) return -ENODEV; } @@ -285463,7 +341456,7 @@ index 9abbd01028c5f..d0352daab0128 100644 ret = of_platform_populate(np, NULL, NULL, dev); if (ret) { dev_err(dev, "failed to register dwc3 core - %d\n", ret); -@@ -784,9 +781,12 @@ static int dwc3_qcom_probe(struct platform_device *pdev) +@@ -784,9 +788,12 @@ static int dwc3_qcom_probe(struct platform_device *pdev) if (qcom->acpi_pdata->is_urs) { qcom->urs_usb = dwc3_qcom_create_urs_usb_platdev(dev); @@ -285478,6 +341471,15 @@ index 9abbd01028c5f..d0352daab0128 100644 } } } +@@ -829,7 +836,7 @@ static int dwc3_qcom_probe(struct platform_device *pdev) + qcom->mode = usb_get_dr_mode(&qcom->dwc3->dev); + + /* enable vbus override for device mode */ +- if (qcom->mode == USB_DR_MODE_PERIPHERAL) ++ if (qcom->mode != USB_DR_MODE_HOST) + dwc3_qcom_vbus_override_enable(qcom, true); + + /* register extcon to override sw_vbus on Vbus change later */ diff --git a/drivers/usb/dwc3/dwc3-xilinx.c b/drivers/usb/dwc3/dwc3-xilinx.c index 9cc3ad701a295..a6f3a9b38789e 100644 --- a/drivers/usb/dwc3/dwc3-xilinx.c @@ -285527,10 +341529,20 @@ index 9cc3ad701a295..a6f3a9b38789e 100644 * This routes the USB DMA traffic to go through FPD path instead * of reaching DDR directly. This traffic routing is needed to diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c -index 4519d06c9ca2b..14dcdb923f405 100644 +index 4519d06c9ca2b..4812ba4bbedd7 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c -@@ -310,13 +310,24 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned int cmd, +@@ -291,7 +291,8 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned int cmd, + * + * DWC_usb3 3.30a and DWC_usb31 1.90a programming guide section 3.2.2 + */ +- if (dwc->gadget->speed <= USB_SPEED_HIGH) { ++ if (dwc->gadget->speed <= USB_SPEED_HIGH || ++ DWC3_DEPCMD_CMD(cmd) == DWC3_DEPCMD_ENDTRANSFER) { + reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); + if (unlikely(reg & DWC3_GUSB2PHYCFG_SUSPHY)) { + saved_config |= DWC3_GUSB2PHYCFG_SUSPHY; +@@ -310,13 +311,24 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned int cmd, if (DWC3_DEPCMD_CMD(cmd) == DWC3_DEPCMD_STARTTRANSFER) { int link_state; @@ -285558,7 +341570,7 @@ index 4519d06c9ca2b..14dcdb923f405 100644 } } -@@ -702,6 +713,7 @@ void dwc3_gadget_clear_tx_fifos(struct dwc3 *dwc) +@@ -702,6 +714,7 @@ void dwc3_gadget_clear_tx_fifos(struct dwc3 *dwc) DWC31_GTXFIFOSIZ_TXFRAMNUM; dwc3_writel(dwc->regs, DWC3_GTXFIFOSIZ(num >> 1), size); @@ -285566,7 +341578,7 @@ index 4519d06c9ca2b..14dcdb923f405 100644 } dwc->num_ep_resized = 0; } -@@ -747,6 +759,10 @@ static int dwc3_gadget_resize_tx_fifos(struct dwc3_ep *dep) +@@ -747,6 +760,10 @@ static int dwc3_gadget_resize_tx_fifos(struct dwc3_ep *dep) if (!usb_endpoint_dir_in(dep->endpoint.desc) || dep->number <= 1) return 0; @@ -285577,7 +341589,7 @@ index 4519d06c9ca2b..14dcdb923f405 100644 ram1_depth = DWC3_RAM1_DEPTH(dwc->hwparams.hwparams7); if ((dep->endpoint.maxburst > 1 && -@@ -807,6 +823,7 @@ static int dwc3_gadget_resize_tx_fifos(struct dwc3_ep *dep) +@@ -807,6 +824,7 @@ static int dwc3_gadget_resize_tx_fifos(struct dwc3_ep *dep) } dwc3_writel(dwc->regs, DWC3_GTXFIFOSIZ(dep->number >> 1), fifo_size); @@ -285585,16 +341597,64 @@ index 4519d06c9ca2b..14dcdb923f405 100644 dwc->num_ep_resized++; return 0; -@@ -995,7 +1012,7 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep) +@@ -934,7 +952,7 @@ out: + return 0; + } - dep->stream_capable = false; - dep->type = 0; -- dep->flags = 0; +-static void dwc3_remove_requests(struct dwc3 *dwc, struct dwc3_ep *dep) ++static void dwc3_remove_requests(struct dwc3 *dwc, struct dwc3_ep *dep, int status) + { + struct dwc3_request *req; + +@@ -944,19 +962,19 @@ static void dwc3_remove_requests(struct dwc3 *dwc, struct dwc3_ep *dep) + while (!list_empty(&dep->started_list)) { + req = next_request(&dep->started_list); + +- dwc3_gadget_giveback(dep, req, -ESHUTDOWN); ++ dwc3_gadget_giveback(dep, req, status); + } + + while (!list_empty(&dep->pending_list)) { + req = next_request(&dep->pending_list); + +- dwc3_gadget_giveback(dep, req, -ESHUTDOWN); ++ dwc3_gadget_giveback(dep, req, status); + } + + while (!list_empty(&dep->cancelled_list)) { + req = next_request(&dep->cancelled_list); + +- dwc3_gadget_giveback(dep, req, -ESHUTDOWN); ++ dwc3_gadget_giveback(dep, req, status); + } + } + +@@ -985,18 +1003,18 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep) + reg &= ~DWC3_DALEPENA_EP(dep->number); + dwc3_writel(dwc->regs, DWC3_DALEPENA, reg); + ++ dwc3_remove_requests(dwc, dep, -ESHUTDOWN); ++ ++ dep->stream_capable = false; ++ dep->type = 0; + dep->flags &= DWC3_EP_TXFIFO_RESIZED; ++ + /* Clear out the ep descriptors for non-ep0 */ + if (dep->number > 1) { + dep->endpoint.comp_desc = NULL; + dep->endpoint.desc = NULL; + } +- dwc3_remove_requests(dwc, dep); +- +- dep->stream_capable = false; +- dep->type = 0; +- dep->flags = 0; +- return 0; } -@@ -1152,17 +1169,49 @@ static u32 dwc3_calc_trbs_left(struct dwc3_ep *dep) + +@@ -1152,17 +1170,49 @@ static u32 dwc3_calc_trbs_left(struct dwc3_ep *dep) return trbs_left; } @@ -285650,7 +341710,7 @@ index 4519d06c9ca2b..14dcdb923f405 100644 trb->bpl = lower_32_bits(dma); trb->bph = upper_32_bits(dma); -@@ -1202,10 +1251,10 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb, +@@ -1202,10 +1252,10 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb, unsigned int mult = 2; unsigned int maxp = usb_endpoint_maxp(ep->desc); @@ -285663,7 +341723,18 @@ index 4519d06c9ca2b..14dcdb923f405 100644 mult--; trb->size |= DWC3_TRB_SIZE_PCM1(mult); -@@ -1254,6 +1303,19 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb, +@@ -1214,8 +1264,8 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb, + trb->ctrl = DWC3_TRBCTL_ISOCHRONOUS; + } + +- /* always enable Interrupt on Missed ISOC */ +- trb->ctrl |= DWC3_TRB_CTRL_ISP_IMI; ++ if (!no_interrupt && !chain) ++ trb->ctrl |= DWC3_TRB_CTRL_ISP_IMI; + break; + + case USB_ENDPOINT_XFER_BULK: +@@ -1254,6 +1304,19 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb, if (usb_endpoint_xfer_bulk(dep->endpoint.desc) && dep->stream_capable) trb->ctrl |= DWC3_TRB_CTRL_SID_SOFN(stream_id); @@ -285683,7 +341754,7 @@ index 4519d06c9ca2b..14dcdb923f405 100644 trb->ctrl |= DWC3_TRB_CTRL_HWO; dwc3_ep_inc_enq(dep); -@@ -1261,50 +1323,6 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb, +@@ -1261,50 +1324,6 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb, trace_dwc3_prepare_trb(dep, trb); } @@ -285734,7 +341805,7 @@ index 4519d06c9ca2b..14dcdb923f405 100644 static bool dwc3_needs_extra_trb(struct dwc3_ep *dep, struct dwc3_request *req) { unsigned int maxp = usb_endpoint_maxp(dep->endpoint.desc); -@@ -1925,10 +1943,10 @@ static void dwc3_gadget_ep_skip_trbs(struct dwc3_ep *dep, struct dwc3_request *r +@@ -1925,10 +1944,10 @@ static void dwc3_gadget_ep_skip_trbs(struct dwc3_ep *dep, struct dwc3_request *r static void dwc3_gadget_ep_cleanup_cancelled_requests(struct dwc3_ep *dep) { struct dwc3_request *req; @@ -285747,7 +341818,7 @@ index 4519d06c9ca2b..14dcdb923f405 100644 dwc3_gadget_ep_skip_trbs(dep, req); switch (req->status) { case DWC3_REQUEST_STATUS_DISCONNECTED: -@@ -1945,6 +1963,12 @@ static void dwc3_gadget_ep_cleanup_cancelled_requests(struct dwc3_ep *dep) +@@ -1945,6 +1964,12 @@ static void dwc3_gadget_ep_cleanup_cancelled_requests(struct dwc3_ep *dep) dwc3_gadget_giveback(dep, req, -ECONNRESET); break; } @@ -285760,7 +341831,16 @@ index 4519d06c9ca2b..14dcdb923f405 100644 } } -@@ -2411,14 +2435,42 @@ static void dwc3_gadget_disable_irq(struct dwc3 *dwc); +@@ -2264,7 +2289,7 @@ static void dwc3_stop_active_transfers(struct dwc3 *dwc) + if (!dep) + continue; + +- dwc3_remove_requests(dwc, dep); ++ dwc3_remove_requests(dwc, dep, -ESHUTDOWN); + } + } + +@@ -2411,14 +2436,42 @@ static void dwc3_gadget_disable_irq(struct dwc3 *dwc); static void __dwc3_gadget_stop(struct dwc3 *dwc); static int __dwc3_gadget_start(struct dwc3 *dwc); @@ -285804,7 +341884,7 @@ index 4519d06c9ca2b..14dcdb923f405 100644 /* * Per databook, when we want to stop the gadget, if a control transfer * is still in process, complete it and get the core into setup phase. -@@ -2454,50 +2506,27 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on) +@@ -2454,50 +2507,27 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on) return 0; } @@ -285868,7 +341948,18 @@ index 4519d06c9ca2b..14dcdb923f405 100644 pm_runtime_put(dwc->dev); return ret; -@@ -3169,6 +3198,7 @@ static int dwc3_gadget_ep_cleanup_completed_request(struct dwc3_ep *dep, +@@ -3117,6 +3147,10 @@ static int dwc3_gadget_ep_reclaim_completed_trb(struct dwc3_ep *dep, + if (event->status & DEPEVT_STATUS_SHORT && !chain) + return 1; + ++ if ((trb->ctrl & DWC3_TRB_CTRL_ISP_IMI) && ++ DWC3_TRB_SIZE_TRBSTS(trb->size) == DWC3_TRBSTS_MISSED_ISOC) ++ return 1; ++ + if ((trb->ctrl & DWC3_TRB_CTRL_IOC) || + (trb->ctrl & DWC3_TRB_CTRL_LST)) + return 1; +@@ -3169,6 +3203,7 @@ static int dwc3_gadget_ep_cleanup_completed_request(struct dwc3_ep *dep, const struct dwc3_event_depevt *event, struct dwc3_request *req, int status) { @@ -285876,7 +341967,7 @@ index 4519d06c9ca2b..14dcdb923f405 100644 int ret; if (req->request.num_mapped_sgs) -@@ -3189,7 +3219,35 @@ static int dwc3_gadget_ep_cleanup_completed_request(struct dwc3_ep *dep, +@@ -3189,7 +3224,35 @@ static int dwc3_gadget_ep_cleanup_completed_request(struct dwc3_ep *dep, req->needs_extra_trb = false; } @@ -285913,7 +342004,7 @@ index 4519d06c9ca2b..14dcdb923f405 100644 out: return ret; -@@ -3199,15 +3257,21 @@ static void dwc3_gadget_ep_cleanup_completed_requests(struct dwc3_ep *dep, +@@ -3199,15 +3262,21 @@ static void dwc3_gadget_ep_cleanup_completed_requests(struct dwc3_ep *dep, const struct dwc3_event_depevt *event, int status) { struct dwc3_request *req; @@ -285937,7 +342028,7 @@ index 4519d06c9ca2b..14dcdb923f405 100644 } } -@@ -3251,6 +3315,9 @@ static bool dwc3_gadget_endpoint_trbs_complete(struct dwc3_ep *dep, +@@ -3251,6 +3320,9 @@ static bool dwc3_gadget_endpoint_trbs_complete(struct dwc3_ep *dep, if (dep->flags & DWC3_EP_END_TRANSFER_PENDING) goto out; @@ -285947,7 +342038,7 @@ index 4519d06c9ca2b..14dcdb923f405 100644 if (usb_endpoint_xfer_isoc(dep->endpoint.desc) && list_empty(&dep->started_list) && (list_empty(&dep->pending_list) || status == -EXDEV)) -@@ -3293,6 +3360,9 @@ static void dwc3_gadget_endpoint_transfer_in_progress(struct dwc3_ep *dep, +@@ -3293,6 +3365,9 @@ static void dwc3_gadget_endpoint_transfer_in_progress(struct dwc3_ep *dep, { int status = 0; @@ -285957,7 +342048,7 @@ index 4519d06c9ca2b..14dcdb923f405 100644 if (usb_endpoint_xfer_isoc(dep->endpoint.desc)) dwc3_gadget_endpoint_frame_from_event(dep, event); -@@ -3346,6 +3416,14 @@ static void dwc3_gadget_endpoint_command_complete(struct dwc3_ep *dep, +@@ -3346,6 +3421,14 @@ static void dwc3_gadget_endpoint_command_complete(struct dwc3_ep *dep, if (cmd != DWC3_DEPCMD_ENDTRANSFER) return; @@ -285972,7 +342063,7 @@ index 4519d06c9ca2b..14dcdb923f405 100644 dep->flags &= ~DWC3_EP_END_TRANSFER_PENDING; dep->flags &= ~DWC3_EP_TRANSFER_STARTED; dwc3_gadget_ep_cleanup_cancelled_requests(dep); -@@ -3568,14 +3646,6 @@ static void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, +@@ -3568,14 +3651,6 @@ static void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, WARN_ON_ONCE(ret); dep->resource_index = 0; @@ -285987,7 +342078,7 @@ index 4519d06c9ca2b..14dcdb923f405 100644 if (!interrupt) dep->flags &= ~DWC3_EP_TRANSFER_STARTED; else -@@ -4072,7 +4142,6 @@ static irqreturn_t dwc3_process_event_buf(struct dwc3_event_buffer *evt) +@@ -4072,7 +4147,6 @@ static irqreturn_t dwc3_process_event_buf(struct dwc3_event_buffer *evt) } evt->count = 0; @@ -285995,7 +342086,7 @@ index 4519d06c9ca2b..14dcdb923f405 100644 ret = IRQ_HANDLED; /* Unmask interrupt */ -@@ -4085,6 +4154,9 @@ static irqreturn_t dwc3_process_event_buf(struct dwc3_event_buffer *evt) +@@ -4085,6 +4159,9 @@ static irqreturn_t dwc3_process_event_buf(struct dwc3_event_buffer *evt) dwc3_writel(dwc->regs, DWC3_DEV_IMOD(0), dwc->imod_interval); } @@ -286005,7 +342096,7 @@ index 4519d06c9ca2b..14dcdb923f405 100644 return ret; } -@@ -4095,9 +4167,11 @@ static irqreturn_t dwc3_thread_interrupt(int irq, void *_evt) +@@ -4095,9 +4172,11 @@ static irqreturn_t dwc3_thread_interrupt(int irq, void *_evt) unsigned long flags; irqreturn_t ret = IRQ_NONE; @@ -286017,7 +342108,7 @@ index 4519d06c9ca2b..14dcdb923f405 100644 return ret; } -@@ -4352,7 +4426,7 @@ int dwc3_gadget_resume(struct dwc3 *dwc) +@@ -4352,7 +4431,7 @@ int dwc3_gadget_resume(struct dwc3 *dwc) { int ret; @@ -286027,36 +342118,10 @@ index 4519d06c9ca2b..14dcdb923f405 100644 ret = __dwc3_gadget_start(dwc); diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c -index f29a264635aa1..85165a972076d 100644 +index f29a264635aa1..2078e9d702923 100644 --- a/drivers/usb/dwc3/host.c +++ b/drivers/usb/dwc3/host.c -@@ -10,8 +10,13 @@ - #include <linux/acpi.h> - #include <linux/platform_device.h> - -+#include "../host/xhci-plat.h" - #include "core.h" - -+static const struct xhci_plat_priv dwc3_xhci_plat_priv = { -+ .quirks = XHCI_SKIP_PHY_INIT, -+}; -+ - static int dwc3_host_get_irq(struct dwc3 *dwc) - { - struct platform_device *dwc3_pdev = to_platform_device(dwc->dev); -@@ -87,6 +92,11 @@ int dwc3_host_init(struct dwc3 *dwc) - goto err; - } - -+ ret = platform_device_add_data(xhci, &dwc3_xhci_plat_priv, -+ sizeof(dwc3_xhci_plat_priv)); -+ if (ret) -+ goto err; -+ - memset(props, 0, sizeof(struct property_entry) * ARRAY_SIZE(props)); - - if (dwc->usb3_lpm_capable) -@@ -130,4 +140,5 @@ err: +@@ -130,4 +130,5 @@ err: void dwc3_host_exit(struct dwc3 *dwc) { platform_device_unregister(dwc->xhci); @@ -286151,7 +342216,7 @@ index 477e72a1d11e7..5ade844db4046 100644 spin_unlock_irqrestore(&gi->spinlock, flags); } diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c -index 8260f38025b72..adc44a2685b59 100644 +index 8260f38025b72..f975111bd974f 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -122,8 +122,6 @@ struct ffs_ep { @@ -286173,7 +342238,19 @@ index 8260f38025b72..adc44a2685b59 100644 }; struct ffs_desc_helper { -@@ -614,7 +615,7 @@ static int ffs_ep0_open(struct inode *inode, struct file *file) +@@ -278,6 +279,11 @@ static int __ffs_ep0_queue_wait(struct ffs_data *ffs, char *data, size_t len) + struct usb_request *req = ffs->ep0req; + int ret; + ++ if (!req) { ++ spin_unlock_irq(&ffs->ev.waitq.lock); ++ return -EINVAL; ++ } ++ + req->zero = len < le16_to_cpu(ffs->ev.setup.wLength); + + spin_unlock_irq(&ffs->ev.waitq.lock); +@@ -614,7 +620,7 @@ static int ffs_ep0_open(struct inode *inode, struct file *file) file->private_data = ffs; ffs_data_opened(ffs); @@ -286182,7 +342259,7 @@ index 8260f38025b72..adc44a2685b59 100644 } static int ffs_ep0_release(struct inode *inode, struct file *file) -@@ -707,12 +708,15 @@ static const struct file_operations ffs_ep0_operations = { +@@ -707,12 +713,15 @@ static const struct file_operations ffs_ep0_operations = { static void ffs_epfile_io_complete(struct usb_ep *_ep, struct usb_request *req) { @@ -286203,7 +342280,7 @@ index 8260f38025b72..adc44a2685b59 100644 } static ssize_t ffs_copy_to_iter(void *data, int data_len, struct iov_iter *iter) -@@ -1050,7 +1054,6 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data) +@@ -1050,7 +1059,6 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data) WARN(1, "%s: data_len == -EINVAL\n", __func__); ret = -EINVAL; } else if (!io_data->aio) { @@ -286211,7 +342288,7 @@ index 8260f38025b72..adc44a2685b59 100644 bool interrupted = false; req = ep->req; -@@ -1066,7 +1069,8 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data) +@@ -1066,7 +1074,8 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data) io_data->buf = data; @@ -286221,7 +342298,7 @@ index 8260f38025b72..adc44a2685b59 100644 req->complete = ffs_epfile_io_complete; ret = usb_ep_queue(ep->ep, req, GFP_ATOMIC); -@@ -1075,7 +1079,12 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data) +@@ -1075,7 +1084,12 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data) spin_unlock_irq(&epfile->ffs->eps_lock); @@ -286235,7 +342312,7 @@ index 8260f38025b72..adc44a2685b59 100644 /* * To avoid race condition with ffs_epfile_io_complete, * dequeue the request first then check -@@ -1083,17 +1092,18 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data) +@@ -1083,17 +1097,18 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data) * condition with req->complete callback. */ usb_ep_dequeue(ep->ep, req); @@ -286259,7 +342336,7 @@ index 8260f38025b72..adc44a2685b59 100644 goto error_mutex; } else if (!(req = usb_ep_alloc_request(ep->ep, GFP_ATOMIC))) { ret = -ENOMEM; -@@ -1154,7 +1164,7 @@ ffs_epfile_open(struct inode *inode, struct file *file) +@@ -1154,7 +1169,7 @@ ffs_epfile_open(struct inode *inode, struct file *file) file->private_data = epfile; ffs_data_opened(epfile->ffs); @@ -286268,7 +342345,7 @@ index 8260f38025b72..adc44a2685b59 100644 } static int ffs_aio_cancel(struct kiocb *kiocb) -@@ -1711,16 +1721,24 @@ static void ffs_data_put(struct ffs_data *ffs) +@@ -1711,16 +1726,24 @@ static void ffs_data_put(struct ffs_data *ffs) static void ffs_data_closed(struct ffs_data *ffs) { @@ -286298,7 +342375,7 @@ index 8260f38025b72..adc44a2685b59 100644 if (ffs->setup_state == FFS_SETUP_PENDING) __ffs_ep0_stall(ffs); } else { -@@ -1767,17 +1785,34 @@ static struct ffs_data *ffs_data_new(const char *dev_name) +@@ -1767,17 +1790,34 @@ static struct ffs_data *ffs_data_new(const char *dev_name) static void ffs_data_clear(struct ffs_data *ffs) { @@ -286336,7 +342413,7 @@ index 8260f38025b72..adc44a2685b59 100644 kfree(ffs->raw_descs_data); kfree(ffs->raw_strings); -@@ -1790,7 +1825,6 @@ static void ffs_data_reset(struct ffs_data *ffs) +@@ -1790,7 +1830,6 @@ static void ffs_data_reset(struct ffs_data *ffs) ffs_data_clear(ffs); @@ -286344,7 +342421,22 @@ index 8260f38025b72..adc44a2685b59 100644 ffs->raw_descs_data = NULL; ffs->raw_descs = NULL; ffs->raw_strings = NULL; -@@ -1919,12 +1953,15 @@ static void ffs_epfiles_destroy(struct ffs_epfile *epfiles, unsigned count) +@@ -1858,10 +1897,14 @@ static void functionfs_unbind(struct ffs_data *ffs) + ENTER(); + + if (!WARN_ON(!ffs->gadget)) { ++ /* dequeue before freeing ep0req */ ++ usb_ep_dequeue(ffs->gadget->ep0, ffs->ep0req); ++ mutex_lock(&ffs->mutex); + usb_ep_free_request(ffs->gadget->ep0, ffs->ep0req); + ffs->ep0req = NULL; + ffs->gadget = NULL; + clear_bit(FFS_FL_BOUND, &ffs->flags); ++ mutex_unlock(&ffs->mutex); + ffs_data_put(ffs); + } + } +@@ -1919,12 +1962,15 @@ static void ffs_epfiles_destroy(struct ffs_epfile *epfiles, unsigned count) static void ffs_func_eps_disable(struct ffs_function *func) { @@ -286363,7 +342455,7 @@ index 8260f38025b72..adc44a2685b59 100644 while (count--) { /* pending requests get nuked */ if (ep->ep) -@@ -1942,14 +1979,18 @@ static void ffs_func_eps_disable(struct ffs_function *func) +@@ -1942,14 +1988,18 @@ static void ffs_func_eps_disable(struct ffs_function *func) static int ffs_func_eps_enable(struct ffs_function *func) { @@ -286386,6 +342478,145 @@ index 8260f38025b72..adc44a2685b59 100644 while(count--) { ep->ep->driver_data = ep; +diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c +index ca0a7d9eaa34e..6be6009f911e1 100644 +--- a/drivers/usb/gadget/function/f_hid.c ++++ b/drivers/usb/gadget/function/f_hid.c +@@ -71,7 +71,7 @@ struct f_hidg { + wait_queue_head_t write_queue; + struct usb_request *req; + +- int minor; ++ struct device dev; + struct cdev cdev; + struct usb_function func; + +@@ -84,6 +84,14 @@ static inline struct f_hidg *func_to_hidg(struct usb_function *f) + return container_of(f, struct f_hidg, func); + } + ++static void hidg_release(struct device *dev) ++{ ++ struct f_hidg *hidg = container_of(dev, struct f_hidg, dev); ++ ++ kfree(hidg->set_report_buf); ++ kfree(hidg); ++} ++ + /*-------------------------------------------------------------------------*/ + /* Static descriptors */ + +@@ -904,9 +912,7 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f) + struct usb_ep *ep; + struct f_hidg *hidg = func_to_hidg(f); + struct usb_string *us; +- struct device *device; + int status; +- dev_t dev; + + /* maybe allocate device-global string IDs, and patch descriptors */ + us = usb_gstrings_attach(c->cdev, ct_func_strings, +@@ -999,21 +1005,11 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f) + + /* create char device */ + cdev_init(&hidg->cdev, &f_hidg_fops); +- dev = MKDEV(major, hidg->minor); +- status = cdev_add(&hidg->cdev, dev, 1); ++ status = cdev_device_add(&hidg->cdev, &hidg->dev); + if (status) + goto fail_free_descs; + +- device = device_create(hidg_class, NULL, dev, NULL, +- "%s%d", "hidg", hidg->minor); +- if (IS_ERR(device)) { +- status = PTR_ERR(device); +- goto del; +- } +- + return 0; +-del: +- cdev_del(&hidg->cdev); + fail_free_descs: + usb_free_all_descriptors(f); + fail: +@@ -1244,9 +1240,7 @@ static void hidg_free(struct usb_function *f) + + hidg = func_to_hidg(f); + opts = container_of(f->fi, struct f_hid_opts, func_inst); +- kfree(hidg->report_desc); +- kfree(hidg->set_report_buf); +- kfree(hidg); ++ put_device(&hidg->dev); + mutex_lock(&opts->lock); + --opts->refcnt; + mutex_unlock(&opts->lock); +@@ -1256,8 +1250,7 @@ static void hidg_unbind(struct usb_configuration *c, struct usb_function *f) + { + struct f_hidg *hidg = func_to_hidg(f); + +- device_destroy(hidg_class, MKDEV(major, hidg->minor)); +- cdev_del(&hidg->cdev); ++ cdev_device_del(&hidg->cdev, &hidg->dev); + + usb_free_all_descriptors(f); + } +@@ -1266,6 +1259,7 @@ static struct usb_function *hidg_alloc(struct usb_function_instance *fi) + { + struct f_hidg *hidg; + struct f_hid_opts *opts; ++ int ret; + + /* allocate and initialize one new instance */ + hidg = kzalloc(sizeof(*hidg), GFP_KERNEL); +@@ -1277,17 +1271,28 @@ static struct usb_function *hidg_alloc(struct usb_function_instance *fi) + mutex_lock(&opts->lock); + ++opts->refcnt; + +- hidg->minor = opts->minor; ++ device_initialize(&hidg->dev); ++ hidg->dev.release = hidg_release; ++ hidg->dev.class = hidg_class; ++ hidg->dev.devt = MKDEV(major, opts->minor); ++ ret = dev_set_name(&hidg->dev, "hidg%d", opts->minor); ++ if (ret) { ++ --opts->refcnt; ++ mutex_unlock(&opts->lock); ++ return ERR_PTR(ret); ++ } ++ + hidg->bInterfaceSubClass = opts->subclass; + hidg->bInterfaceProtocol = opts->protocol; + hidg->report_length = opts->report_length; + hidg->report_desc_length = opts->report_desc_length; + if (opts->report_desc) { +- hidg->report_desc = kmemdup(opts->report_desc, +- opts->report_desc_length, +- GFP_KERNEL); ++ hidg->report_desc = devm_kmemdup(&hidg->dev, opts->report_desc, ++ opts->report_desc_length, ++ GFP_KERNEL); + if (!hidg->report_desc) { +- kfree(hidg); ++ put_device(&hidg->dev); ++ --opts->refcnt; + mutex_unlock(&opts->lock); + return ERR_PTR(-ENOMEM); + } +diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c +index dc8f078f918c5..e0c1832342838 100644 +--- a/drivers/usb/gadget/function/f_ncm.c ++++ b/drivers/usb/gadget/function/f_ncm.c +@@ -83,7 +83,9 @@ static inline struct f_ncm *func_to_ncm(struct usb_function *f) + /* peak (theoretical) bulk transfer rate in bits-per-second */ + static inline unsigned ncm_bitrate(struct usb_gadget *g) + { +- if (gadget_is_superspeed(g) && g->speed >= USB_SPEED_SUPER_PLUS) ++ if (!g) ++ return 0; ++ else if (gadget_is_superspeed(g) && g->speed >= USB_SPEED_SUPER_PLUS) + return 4250000000U; + else if (gadget_is_superspeed(g) && g->speed == USB_SPEED_SUPER) + return 3750000000U; diff --git a/drivers/usb/gadget/function/f_printer.c b/drivers/usb/gadget/function/f_printer.c index abec5c58f5251..a881c69b1f2bf 100644 --- a/drivers/usb/gadget/function/f_printer.c @@ -286442,7 +342673,7 @@ index 1abf08e5164af..6803cd60cc6dc 100644 size = ss->isoc_maxpacket * (ss->isoc_mult + 1) * diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c -index ef55b8bb5870a..885a7f593d85e 100644 +index ef55b8bb5870a..850394ed8eb14 100644 --- a/drivers/usb/gadget/function/f_uac2.c +++ b/drivers/usb/gadget/function/f_uac2.c @@ -202,7 +202,7 @@ static struct uac2_input_terminal_descriptor io_in_it_desc = { @@ -286540,11 +342771,31 @@ index ef55b8bb5870a..885a7f593d85e 100644 headers[i++] = USBDHDR(&std_as_out_if0_desc); headers[i++] = USBDHDR(&std_as_out_if1_desc); headers[i++] = USBDHDR(&as_out_hdr_desc); +@@ -1057,6 +1069,7 @@ afunc_bind(struct usb_configuration *cfg, struct usb_function *fn) + } + std_as_out_if0_desc.bInterfaceNumber = ret; + std_as_out_if1_desc.bInterfaceNumber = ret; ++ std_as_out_if1_desc.bNumEndpoints = 1; + uac2->as_out_intf = ret; + uac2->as_out_alt = 0; + diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c -index 9d87c0fb8f92e..bf0a3fc2d7767 100644 +index 9d87c0fb8f92e..5df1b68e5eacc 100644 --- a/drivers/usb/gadget/function/f_uvc.c +++ b/drivers/usb/gadget/function/f_uvc.c -@@ -884,17 +884,42 @@ static void uvc_free(struct usb_function *f) +@@ -213,8 +213,9 @@ uvc_function_ep0_complete(struct usb_ep *ep, struct usb_request *req) + + memset(&v4l2_event, 0, sizeof(v4l2_event)); + v4l2_event.type = UVC_EVENT_DATA; +- uvc_event->data.length = req->actual; +- memcpy(&uvc_event->data.data, req->buf, req->actual); ++ uvc_event->data.length = min_t(unsigned int, req->actual, ++ sizeof(uvc_event->data.data)); ++ memcpy(&uvc_event->data.data, req->buf, uvc_event->data.length); + v4l2_event_queue(&uvc->vdev, &v4l2_event); + } + } +@@ -884,17 +885,42 @@ static void uvc_free(struct usb_function *f) kfree(uvc); } @@ -286589,7 +342840,7 @@ index 9d87c0fb8f92e..bf0a3fc2d7767 100644 usb_ep_free_request(cdev->gadget->ep0, uvc->control_req); kfree(uvc->control_buf); -@@ -913,6 +938,7 @@ static struct usb_function *uvc_alloc(struct usb_function_instance *fi) +@@ -913,6 +939,7 @@ static struct usb_function *uvc_alloc(struct usb_function_instance *fi) mutex_init(&uvc->video.mutex); uvc->state = UVC_STATE_DISCONNECTED; @@ -286597,7 +342848,7 @@ index 9d87c0fb8f92e..bf0a3fc2d7767 100644 opts = fi_to_f_uvc_opts(fi); mutex_lock(&opts->lock); -@@ -943,7 +969,7 @@ static struct usb_function *uvc_alloc(struct usb_function_instance *fi) +@@ -943,7 +970,7 @@ static struct usb_function *uvc_alloc(struct usb_function_instance *fi) /* Register the function. */ uvc->func.name = "uvc"; uvc->func.bind = uvc_function_bind; @@ -286819,6 +343070,64 @@ index 85a3f6d4b5af3..ef253599dcf96 100644 return 0; } EXPORT_SYMBOL_GPL(gether_set_dev_addr); +diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c +index 6f68cbeeee7c0..116d2e15e9b22 100644 +--- a/drivers/usb/gadget/function/u_serial.c ++++ b/drivers/usb/gadget/function/u_serial.c +@@ -81,6 +81,9 @@ + #define WRITE_BUF_SIZE 8192 /* TX only */ + #define GS_CONSOLE_BUF_SIZE 8192 + ++/* Prevents race conditions while accessing gser->ioport */ ++static DEFINE_SPINLOCK(serial_port_lock); ++ + /* console info */ + struct gs_console { + struct console console; +@@ -1374,8 +1377,10 @@ void gserial_disconnect(struct gserial *gser) + if (!port) + return; + ++ spin_lock_irqsave(&serial_port_lock, flags); ++ + /* tell the TTY glue not to do I/O here any more */ +- spin_lock_irqsave(&port->port_lock, flags); ++ spin_lock(&port->port_lock); + + gs_console_disconnect(port); + +@@ -1390,7 +1395,8 @@ void gserial_disconnect(struct gserial *gser) + tty_hangup(port->port.tty); + } + port->suspended = false; +- spin_unlock_irqrestore(&port->port_lock, flags); ++ spin_unlock(&port->port_lock); ++ spin_unlock_irqrestore(&serial_port_lock, flags); + + /* disable endpoints, aborting down any active I/O */ + usb_ep_disable(gser->out); +@@ -1424,10 +1430,19 @@ EXPORT_SYMBOL_GPL(gserial_suspend); + + void gserial_resume(struct gserial *gser) + { +- struct gs_port *port = gser->ioport; ++ struct gs_port *port; + unsigned long flags; + +- spin_lock_irqsave(&port->port_lock, flags); ++ spin_lock_irqsave(&serial_port_lock, flags); ++ port = gser->ioport; ++ ++ if (!port) { ++ spin_unlock_irqrestore(&serial_port_lock, flags); ++ return; ++ } ++ ++ spin_lock(&port->port_lock); ++ spin_unlock(&serial_port_lock); + port->suspended = false; + if (!port->start_delayed) { + spin_unlock_irqrestore(&port->port_lock, flags); diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h index 255a61bd6a6a8..d1a4ef74742b7 100644 --- a/drivers/usb/gadget/function/uvc.h @@ -286868,7 +343177,7 @@ index 255a61bd6a6a8..d1a4ef74742b7 100644 #define to_uvc_file_handle(handle) \ diff --git a/drivers/usb/gadget/function/uvc_queue.c b/drivers/usb/gadget/function/uvc_queue.c -index 7d00ad7c154c2..ec299f5cc65a5 100644 +index 7d00ad7c154c2..0cc8422afe4e2 100644 --- a/drivers/usb/gadget/function/uvc_queue.c +++ b/drivers/usb/gadget/function/uvc_queue.c @@ -44,7 +44,8 @@ static int uvc_queue_setup(struct vb2_queue *vq, @@ -286911,7 +343220,15 @@ index 7d00ad7c154c2..ec299f5cc65a5 100644 /* This must be protected by the irqlock spinlock to avoid race * conditions between uvc_queue_buffer and the disconnection event that * could result in an interruptible wait in uvc_dequeue_buffer. Do not -@@ -326,33 +335,22 @@ int uvcg_queue_enable(struct uvc_video_queue *queue, int enable) +@@ -304,6 +313,7 @@ int uvcg_queue_enable(struct uvc_video_queue *queue, int enable) + + queue->sequence = 0; + queue->buf_used = 0; ++ queue->flags &= ~UVC_QUEUE_DROP_INCOMPLETE; + } else { + ret = vb2_streamoff(&queue->queue, queue->queue.type); + if (ret < 0) +@@ -326,33 +336,23 @@ int uvcg_queue_enable(struct uvc_video_queue *queue, int enable) } /* called with &queue_irqlock held.. */ @@ -286921,11 +343238,15 @@ index 7d00ad7c154c2..ec299f5cc65a5 100644 { - struct uvc_buffer *nextbuf; - - if ((queue->flags & UVC_QUEUE_DROP_INCOMPLETE) && - buf->length != buf->bytesused) { - buf->state = UVC_BUF_STATE_QUEUED; +- if ((queue->flags & UVC_QUEUE_DROP_INCOMPLETE) && +- buf->length != buf->bytesused) { +- buf->state = UVC_BUF_STATE_QUEUED; ++ if (queue->flags & UVC_QUEUE_DROP_INCOMPLETE) { ++ queue->flags &= ~UVC_QUEUE_DROP_INCOMPLETE; ++ buf->state = UVC_BUF_STATE_ERROR; vb2_set_plane_payload(&buf->buf.vb2_buf, 0, 0); - return buf; ++ vb2_buffer_done(&buf->buf.vb2_buf, VB2_BUF_STATE_ERROR); + return; } @@ -287046,7 +343367,7 @@ index 4ca89eab61590..65abd55ce2348 100644 file->private_data = NULL; diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c -index b4a763e5f70e1..1889d75f87881 100644 +index b4a763e5f70e1..0de7f11d14256 100644 --- a/drivers/usb/gadget/function/uvc_video.c +++ b/drivers/usb/gadget/function/uvc_video.c @@ -33,7 +33,7 @@ uvc_video_encode_header(struct uvc_video *video, struct uvc_buffer *buf, @@ -287058,17 +343379,32 @@ index b4a763e5f70e1..1889d75f87881 100644 } static int -@@ -83,7 +83,8 @@ uvc_video_encode_bulk(struct usb_request *req, struct uvc_video *video, +@@ -59,6 +59,7 @@ uvc_video_encode_bulk(struct usb_request *req, struct uvc_video *video, + struct uvc_buffer *buf) + { + void *mem = req->buf; ++ struct uvc_request *ureq = req->context; + int len = video->req_size; + int ret; + +@@ -83,13 +84,15 @@ uvc_video_encode_bulk(struct usb_request *req, struct uvc_video *video, if (buf->bytesused == video->queue.buf_used) { video->queue.buf_used = 0; buf->state = UVC_BUF_STATE_DONE; - uvcg_queue_next_buffer(&video->queue, buf); + list_del(&buf->queue); -+ uvcg_complete_buffer(&video->queue, buf); video->fid ^= UVC_STREAM_FID; ++ ureq->last_buf = buf; + + video->payload_size = 0; + } + if (video->payload_size == video->max_payload_size || ++ video->queue.flags & UVC_QUEUE_DROP_INCOMPLETE || + buf->bytesused == video->queue.buf_used) video->payload_size = 0; -@@ -104,28 +105,28 @@ uvc_video_encode_isoc_sg(struct usb_request *req, struct uvc_video *video, + } +@@ -104,31 +107,31 @@ uvc_video_encode_isoc_sg(struct usb_request *req, struct uvc_video *video, unsigned int len = video->req_size; unsigned int sg_left, part = 0; unsigned int i; @@ -287099,18 +343435,24 @@ index b4a763e5f70e1..1889d75f87881 100644 for_each_sg(sg, iter, ureq->sgt.nents - 1, i) { - if (!len || !buf->sg) -+ if (!len || !buf->sg || !sg_dma_len(buf->sg)) ++ if (!len || !buf->sg || !buf->sg->length) break; - sg_left = sg_dma_len(buf->sg) - buf->offset; -@@ -148,14 +149,15 @@ uvc_video_encode_isoc_sg(struct usb_request *req, struct uvc_video *video, +- sg_left = sg_dma_len(buf->sg) - buf->offset; ++ sg_left = buf->sg->length - buf->offset; + part = min_t(unsigned int, len, sg_left); + + sg_set_page(iter, sg_page(buf->sg), part, buf->offset); +@@ -148,14 +151,16 @@ uvc_video_encode_isoc_sg(struct usb_request *req, struct uvc_video *video, req->num_sgs = i + 1; req->length -= len; - video->queue.buf_used += req->length - UVCG_REQUEST_HEADER_LEN; + video->queue.buf_used += req->length - header_len; - if (buf->bytesused == video->queue.buf_used || !buf->sg) { +- if (buf->bytesused == video->queue.buf_used || !buf->sg) { ++ if (buf->bytesused == video->queue.buf_used || !buf->sg || ++ video->queue.flags & UVC_QUEUE_DROP_INCOMPLETE) { video->queue.buf_used = 0; buf->state = UVC_BUF_STATE_DONE; buf->offset = 0; @@ -287121,17 +343463,42 @@ index b4a763e5f70e1..1889d75f87881 100644 } } -@@ -181,7 +183,8 @@ uvc_video_encode_isoc(struct usb_request *req, struct uvc_video *video, - if (buf->bytesused == video->queue.buf_used) { +@@ -164,6 +169,7 @@ uvc_video_encode_isoc(struct usb_request *req, struct uvc_video *video, + struct uvc_buffer *buf) + { + void *mem = req->buf; ++ struct uvc_request *ureq = req->context; + int len = video->req_size; + int ret; + +@@ -178,11 +184,13 @@ uvc_video_encode_isoc(struct usb_request *req, struct uvc_video *video, + + req->length = video->req_size - len; + +- if (buf->bytesused == video->queue.buf_used) { ++ if (buf->bytesused == video->queue.buf_used || ++ video->queue.flags & UVC_QUEUE_DROP_INCOMPLETE) { video->queue.buf_used = 0; buf->state = UVC_BUF_STATE_DONE; - uvcg_queue_next_buffer(&video->queue, buf); + list_del(&buf->queue); -+ uvcg_complete_buffer(&video->queue, buf); video->fid ^= UVC_STREAM_FID; ++ ureq->last_buf = buf; } } -@@ -225,12 +228,17 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req) + +@@ -219,18 +227,28 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req) + case 0: + break; + ++ case -EXDEV: ++ uvcg_dbg(&video->uvc->func, "VS request missed xfer.\n"); ++ queue->flags |= UVC_QUEUE_DROP_INCOMPLETE; ++ break; ++ + case -ESHUTDOWN: /* disconnect from host. */ + uvcg_dbg(&video->uvc->func, "VS request cancelled.\n"); + uvcg_queue_cancel(queue, 1); break; default: @@ -287150,7 +343517,7 @@ index b4a763e5f70e1..1889d75f87881 100644 spin_lock_irqsave(&video->req_lock, flags); list_add_tail(&req->list, &video->req_free); spin_unlock_irqrestore(&video->req_lock, flags); -@@ -298,12 +306,13 @@ uvc_video_alloc_requests(struct uvc_video *video) +@@ -298,12 +316,13 @@ uvc_video_alloc_requests(struct uvc_video *video) video->ureq[i].req->complete = uvc_video_complete; video->ureq[i].req->context = &video->ureq[i]; video->ureq[i].video = video; @@ -287229,7 +343596,7 @@ index 5b27d289443fe..3912cc805f3af 100644 if (status < 0) { usb_put_function(e->f); diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c -index 539220d7f5b62..9e8b678f0548e 100644 +index 539220d7f5b62..ed28aaa82e251 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -110,6 +110,8 @@ enum ep0_state { @@ -287250,7 +343617,15 @@ index 539220d7f5b62..9e8b678f0548e 100644 }; static inline void get_dev (struct dev_data *data) -@@ -360,6 +362,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len) +@@ -227,6 +229,7 @@ static void put_ep (struct ep_data *data) + */ + + static const char *CHIP; ++static DEFINE_MUTEX(sb_mutex); /* Serialize superblock operations */ + + /*----------------------------------------------------------------------*/ + +@@ -360,6 +363,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len) spin_unlock_irq (&epdata->dev->lock); DBG (epdata->dev, "endpoint gone\n"); @@ -287258,7 +343633,7 @@ index 539220d7f5b62..9e8b678f0548e 100644 epdata->status = -ENODEV; } } -@@ -1334,6 +1337,18 @@ gadgetfs_setup (struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) +@@ -1334,6 +1338,18 @@ gadgetfs_setup (struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) u16 w_value = le16_to_cpu(ctrl->wValue); u16 w_length = le16_to_cpu(ctrl->wLength); @@ -287277,7 +343652,7 @@ index 539220d7f5b62..9e8b678f0548e 100644 spin_lock (&dev->lock); dev->setup_abort = 0; if (dev->state == STATE_DEV_UNCONNECTED) { -@@ -1815,8 +1830,9 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) +@@ -1815,8 +1831,9 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) spin_lock_irq (&dev->lock); value = -EINVAL; if (dev->buf) { @@ -287288,7 +343663,7 @@ index 539220d7f5b62..9e8b678f0548e 100644 } dev->buf = kbuf; -@@ -1863,8 +1879,8 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) +@@ -1863,8 +1880,8 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) value = usb_gadget_probe_driver(&gadgetfs_driver); if (value != 0) { @@ -287299,7 +343674,7 @@ index 539220d7f5b62..9e8b678f0548e 100644 } else { /* at this point "good" hardware has for the first time * let the USB the host see us. alternatively, if users -@@ -1881,6 +1897,9 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) +@@ -1881,6 +1898,9 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) return value; fail: @@ -287309,6 +343684,68 @@ index 539220d7f5b62..9e8b678f0548e 100644 spin_unlock_irq (&dev->lock); pr_debug ("%s: %s fail %zd, %p\n", shortname, __func__, value, dev); kfree (dev->buf); +@@ -1994,13 +2014,20 @@ gadgetfs_fill_super (struct super_block *sb, struct fs_context *fc) + { + struct inode *inode; + struct dev_data *dev; ++ int rc; + +- if (the_device) +- return -ESRCH; ++ mutex_lock(&sb_mutex); ++ ++ if (the_device) { ++ rc = -ESRCH; ++ goto Done; ++ } + + CHIP = usb_get_gadget_udc_name(); +- if (!CHIP) +- return -ENODEV; ++ if (!CHIP) { ++ rc = -ENODEV; ++ goto Done; ++ } + + /* superblock */ + sb->s_blocksize = PAGE_SIZE; +@@ -2037,13 +2064,17 @@ gadgetfs_fill_super (struct super_block *sb, struct fs_context *fc) + * from binding to a controller. + */ + the_device = dev; +- return 0; ++ rc = 0; ++ goto Done; + +-Enomem: ++ Enomem: + kfree(CHIP); + CHIP = NULL; ++ rc = -ENOMEM; + +- return -ENOMEM; ++ Done: ++ mutex_unlock(&sb_mutex); ++ return rc; + } + + /* "mount -t gadgetfs path /dev/gadget" ends up here */ +@@ -2065,6 +2096,7 @@ static int gadgetfs_init_fs_context(struct fs_context *fc) + static void + gadgetfs_kill_sb (struct super_block *sb) + { ++ mutex_lock(&sb_mutex); + kill_litter_super (sb); + if (the_device) { + put_dev (the_device); +@@ -2072,6 +2104,7 @@ gadgetfs_kill_sb (struct super_block *sb) + } + kfree(CHIP); + CHIP = NULL; ++ mutex_unlock(&sb_mutex); + } + + /*----------------------------------------------------------------------*/ diff --git a/drivers/usb/gadget/legacy/raw_gadget.c b/drivers/usb/gadget/legacy/raw_gadget.c index c5a2c734234a5..2869bda642292 100644 --- a/drivers/usb/gadget/legacy/raw_gadget.c @@ -287482,6 +343919,34 @@ index c5a2c734234a5..2869bda642292 100644 dev_dbg(&dev->gadget->dev, "fail, wrong direction\n"); ret = -EINVAL; goto out_unlock; +diff --git a/drivers/usb/gadget/legacy/webcam.c b/drivers/usb/gadget/legacy/webcam.c +index 94e22867da1d0..e9b5846b2322c 100644 +--- a/drivers/usb/gadget/legacy/webcam.c ++++ b/drivers/usb/gadget/legacy/webcam.c +@@ -293,6 +293,7 @@ static const struct uvc_descriptor_header * const uvc_fs_streaming_cls[] = { + (const struct uvc_descriptor_header *) &uvc_format_yuv, + (const struct uvc_descriptor_header *) &uvc_frame_yuv_360p, + (const struct uvc_descriptor_header *) &uvc_frame_yuv_720p, ++ (const struct uvc_descriptor_header *) &uvc_color_matching, + (const struct uvc_descriptor_header *) &uvc_format_mjpg, + (const struct uvc_descriptor_header *) &uvc_frame_mjpg_360p, + (const struct uvc_descriptor_header *) &uvc_frame_mjpg_720p, +@@ -305,6 +306,7 @@ static const struct uvc_descriptor_header * const uvc_hs_streaming_cls[] = { + (const struct uvc_descriptor_header *) &uvc_format_yuv, + (const struct uvc_descriptor_header *) &uvc_frame_yuv_360p, + (const struct uvc_descriptor_header *) &uvc_frame_yuv_720p, ++ (const struct uvc_descriptor_header *) &uvc_color_matching, + (const struct uvc_descriptor_header *) &uvc_format_mjpg, + (const struct uvc_descriptor_header *) &uvc_frame_mjpg_360p, + (const struct uvc_descriptor_header *) &uvc_frame_mjpg_720p, +@@ -317,6 +319,7 @@ static const struct uvc_descriptor_header * const uvc_ss_streaming_cls[] = { + (const struct uvc_descriptor_header *) &uvc_format_yuv, + (const struct uvc_descriptor_header *) &uvc_frame_yuv_360p, + (const struct uvc_descriptor_header *) &uvc_frame_yuv_720p, ++ (const struct uvc_descriptor_header *) &uvc_color_matching, + (const struct uvc_descriptor_header *) &uvc_format_mjpg, + (const struct uvc_descriptor_header *) &uvc_frame_mjpg_360p, + (const struct uvc_descriptor_header *) &uvc_frame_mjpg_720p, diff --git a/drivers/usb/gadget/udc/Kconfig b/drivers/usb/gadget/udc/Kconfig index 8c614bb86c665..2cdd37be165a4 100644 --- a/drivers/usb/gadget/udc/Kconfig @@ -287519,6 +343984,18 @@ index b9960fdd8a515..16a12d2d492e2 100644 else ret = ast_vhub_str_alloc_add(vhub, &ast_vhub_strings); +diff --git a/drivers/usb/gadget/udc/bdc/bdc_udc.c b/drivers/usb/gadget/udc/bdc/bdc_udc.c +index 5ac0ef88334eb..53ffaf4e2e376 100644 +--- a/drivers/usb/gadget/udc/bdc/bdc_udc.c ++++ b/drivers/usb/gadget/udc/bdc/bdc_udc.c +@@ -151,6 +151,7 @@ static void bdc_uspc_disconnected(struct bdc *bdc, bool reinit) + bdc->delayed_status = false; + bdc->reinit = reinit; + bdc->test_mode = false; ++ usb_gadget_set_state(&bdc->gadget, USB_STATE_NOTATTACHED); + } + + /* TNotify wkaeup timer */ diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index 14fdf918ecfeb..61099f2d057dc 100644 --- a/drivers/usb/gadget/udc/core.c @@ -287547,6 +344024,45 @@ index 14fdf918ecfeb..61099f2d057dc 100644 udc->gadget->dev.driver = NULL; return ret; } +diff --git a/drivers/usb/gadget/udc/fotg210-udc.c b/drivers/usb/gadget/udc/fotg210-udc.c +index fdca28e72a3b4..d0e051beb3af9 100644 +--- a/drivers/usb/gadget/udc/fotg210-udc.c ++++ b/drivers/usb/gadget/udc/fotg210-udc.c +@@ -629,10 +629,10 @@ static void fotg210_request_error(struct fotg210_udc *fotg210) + static void fotg210_set_address(struct fotg210_udc *fotg210, + struct usb_ctrlrequest *ctrl) + { +- if (ctrl->wValue >= 0x0100) { ++ if (le16_to_cpu(ctrl->wValue) >= 0x0100) { + fotg210_request_error(fotg210); + } else { +- fotg210_set_dev_addr(fotg210, ctrl->wValue); ++ fotg210_set_dev_addr(fotg210, le16_to_cpu(ctrl->wValue)); + fotg210_set_cxdone(fotg210); + } + } +@@ -713,17 +713,17 @@ static void fotg210_get_status(struct fotg210_udc *fotg210, + + switch (ctrl->bRequestType & USB_RECIP_MASK) { + case USB_RECIP_DEVICE: +- fotg210->ep0_data = 1 << USB_DEVICE_SELF_POWERED; ++ fotg210->ep0_data = cpu_to_le16(1 << USB_DEVICE_SELF_POWERED); + break; + case USB_RECIP_INTERFACE: +- fotg210->ep0_data = 0; ++ fotg210->ep0_data = cpu_to_le16(0); + break; + case USB_RECIP_ENDPOINT: + epnum = ctrl->wIndex & USB_ENDPOINT_NUMBER_MASK; + if (epnum) + fotg210->ep0_data = +- fotg210_is_epnstall(fotg210->ep[epnum]) +- << USB_ENDPOINT_HALT; ++ cpu_to_le16(fotg210_is_epnstall(fotg210->ep[epnum]) ++ << USB_ENDPOINT_HALT); + else + fotg210_request_error(fotg210); + break; diff --git a/drivers/usb/gadget/udc/lpc32xx_udc.c b/drivers/usb/gadget/udc/lpc32xx_udc.c index a25d01c895641..865de8db998a9 100644 --- a/drivers/usb/gadget/udc/lpc32xx_udc.c @@ -287676,6 +344192,34 @@ index fb4ffedd6f0dd..9cf43731bcd18 100644 target_ep = &udc->ep[endpoint]; outinbit = udc->setup.wIndex & USB_ENDPOINT_DIR_MASK; outinbit = outinbit >> 7; +diff --git a/drivers/usb/host/bcma-hcd.c b/drivers/usb/host/bcma-hcd.c +index 2df52f75f6b3c..7558cc4d90cc6 100644 +--- a/drivers/usb/host/bcma-hcd.c ++++ b/drivers/usb/host/bcma-hcd.c +@@ -285,7 +285,7 @@ static void bcma_hci_platform_power_gpio(struct bcma_device *dev, bool val) + { + struct bcma_hcd_device *usb_dev = bcma_get_drvdata(dev); + +- if (IS_ERR_OR_NULL(usb_dev->gpio_desc)) ++ if (!usb_dev->gpio_desc) + return; + + gpiod_set_value(usb_dev->gpio_desc, val); +@@ -406,9 +406,11 @@ static int bcma_hcd_probe(struct bcma_device *core) + return -ENOMEM; + usb_dev->core = core; + +- if (core->dev.of_node) +- usb_dev->gpio_desc = devm_gpiod_get(&core->dev, "vcc", +- GPIOD_OUT_HIGH); ++ usb_dev->gpio_desc = devm_gpiod_get_optional(&core->dev, "vcc", ++ GPIOD_OUT_HIGH); ++ if (IS_ERR(usb_dev->gpio_desc)) ++ return dev_err_probe(&core->dev, PTR_ERR(usb_dev->gpio_desc), ++ "error obtaining VCC GPIO"); + + switch (core->id.id) { + case BCMA_CORE_USB20_HOST: diff --git a/drivers/usb/host/ehci-brcm.c b/drivers/usb/host/ehci-brcm.c index d3626bfa966b4..6a0f64c9e5e88 100644 --- a/drivers/usb/host/ehci-brcm.c @@ -287694,6 +344238,19 @@ index d3626bfa966b4..6a0f64c9e5e88 100644 /* * RESUME is cleared when GetPortStatus() is called 20ms after start +diff --git a/drivers/usb/host/ehci-fsl.c b/drivers/usb/host/ehci-fsl.c +index 385be30baad36..e38dfbd0d9ddd 100644 +--- a/drivers/usb/host/ehci-fsl.c ++++ b/drivers/usb/host/ehci-fsl.c +@@ -29,7 +29,7 @@ + #include "ehci-fsl.h" + + #define DRIVER_DESC "Freescale EHCI Host controller driver" +-#define DRV_NAME "ehci-fsl" ++#define DRV_NAME "fsl-ehci" + + static struct hc_driver __read_mostly fsl_ehci_hc_driver; + diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c index 1776c05d0a486..1440803216297 100644 --- a/drivers/usb/host/ehci-hcd.c @@ -288277,7 +344834,7 @@ index a3f875eea7519..b9754784161d7 100644 /* For each port, did anything change? If so, set that bit in buf. */ diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c -index 0e312066c5c63..02cd4d7c3e7ec 100644 +index 0e312066c5c63..9d9ab7e3560a2 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -642,7 +642,7 @@ struct xhci_stream_info *xhci_alloc_stream_info(struct xhci_hcd *xhci, @@ -288301,7 +344858,35 @@ index 0e312066c5c63..02cd4d7c3e7ec 100644 kfree(stream_info->stream_rings); cleanup_info: kfree(stream_info); -@@ -2583,7 +2588,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) +@@ -893,15 +898,19 @@ void xhci_free_virt_device(struct xhci_hcd *xhci, int slot_id) + if (dev->eps[i].stream_info) + xhci_free_stream_info(xhci, + dev->eps[i].stream_info); +- /* Endpoints on the TT/root port lists should have been removed +- * when usb_disable_device() was called for the device. +- * We can't drop them anyway, because the udev might have gone +- * away by this point, and we can't tell what speed it was. ++ /* ++ * Endpoints are normally deleted from the bandwidth list when ++ * endpoints are dropped, before device is freed. ++ * If host is dying or being removed then endpoints aren't ++ * dropped cleanly, so delete the endpoint from list here. ++ * Only applicable for hosts with software bandwidth checking. + */ +- if (!list_empty(&dev->eps[i].bw_endpoint_list)) +- xhci_warn(xhci, "Slot %u endpoint %u " +- "not removed from BW list!\n", +- slot_id, i); ++ ++ if (!list_empty(&dev->eps[i].bw_endpoint_list)) { ++ list_del_init(&dev->eps[i].bw_endpoint_list); ++ xhci_dbg(xhci, "Slot %u endpoint %u not removed from BW list!\n", ++ slot_id, i); ++ } + } + /* If this is a hub, free the TT(s) from the TT list */ + xhci_free_tt_info(xhci, dev, slot_id); +@@ -2583,7 +2592,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) fail: xhci_halt(xhci); @@ -288398,37 +344983,53 @@ index 134f4789bd897..9d8094afcc8bc 100644 return 0; } +diff --git a/drivers/usb/host/xhci-mtk.c b/drivers/usb/host/xhci-mtk.c +index c53f6f276d5c6..f8a63c1434925 100644 +--- a/drivers/usb/host/xhci-mtk.c ++++ b/drivers/usb/host/xhci-mtk.c +@@ -619,7 +619,6 @@ static int xhci_mtk_probe(struct platform_device *pdev) + + dealloc_usb3_hcd: + usb_remove_hcd(xhci->shared_hcd); +- xhci->shared_hcd = NULL; + + dealloc_usb2_hcd: + usb_remove_hcd(hcd); diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c -index 2484a9d38ce2b..352626f9e451b 100644 +index 2484a9d38ce2b..7308b388c92bd 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c -@@ -59,12 +59,25 @@ +@@ -58,13 +58,15 @@ + #define PCI_DEVICE_ID_INTEL_CML_XHCI 0xa3af #define PCI_DEVICE_ID_INTEL_TIGER_LAKE_XHCI 0x9a13 #define PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI 0x1138 - #define PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI 0x461e -+#define PCI_DEVICE_ID_INTEL_ALDER_LAKE_N_XHCI 0x464e -+#define PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI 0x51ed -+#define PCI_DEVICE_ID_INTEL_RAPTOR_LAKE_XHCI 0xa71e -+#define PCI_DEVICE_ID_INTEL_METEOR_LAKE_XHCI 0x7ec0 +-#define PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI 0x461e ++#define PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI 0x51ed ++#define PCI_DEVICE_ID_INTEL_ALDER_LAKE_N_PCH_XHCI 0x54ed #define PCI_DEVICE_ID_AMD_RENOIR_XHCI 0x1639 #define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9 #define PCI_DEVICE_ID_AMD_PROMONTORYA_3 0x43ba #define PCI_DEVICE_ID_AMD_PROMONTORYA_2 0x43bb #define PCI_DEVICE_ID_AMD_PROMONTORYA_1 0x43bc -+#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_1 0x161a -+#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_2 0x161b -+#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_3 0x161d -+#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_4 0x161e -+#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_5 0x15d6 -+#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_6 0x15d7 -+#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_7 0x161c -+#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_8 0x161f + #define PCI_DEVICE_ID_ASMEDIA_1042_XHCI 0x1042 #define PCI_DEVICE_ID_ASMEDIA_1042A_XHCI 0x1142 #define PCI_DEVICE_ID_ASMEDIA_1142_XHCI 0x1242 -@@ -114,7 +127,6 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) +@@ -76,9 +78,12 @@ static const char hcd_name[] = "xhci_hcd"; + static struct hc_driver __read_mostly xhci_pci_hc_driver; + + static int xhci_pci_setup(struct usb_hcd *hcd); ++static int xhci_pci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev, ++ struct usb_tt *tt, gfp_t mem_flags); + + static const struct xhci_driver_overrides xhci_pci_overrides __initconst = { + .reset = xhci_pci_setup, ++ .update_hub_device = xhci_pci_update_hub_device, + }; + + /* called after powerup, by probe or system-pm "wakeup" */ +@@ -114,7 +119,6 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) /* Look for vendor-specific quirks */ if (pdev->vendor == PCI_VENDOR_ID_FRESCO_LOGIC && (pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_PDK || @@ -288436,7 +345037,7 @@ index 2484a9d38ce2b..352626f9e451b 100644 pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1400)) { if (pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_PDK && pdev->revision == 0x0) { -@@ -149,6 +161,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) +@@ -149,6 +153,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1009) xhci->quirks |= XHCI_BROKEN_STREAMS; @@ -288447,37 +345048,120 @@ index 2484a9d38ce2b..352626f9e451b 100644 if (pdev->vendor == PCI_VENDOR_ID_NEC) xhci->quirks |= XHCI_NEC_HOST; -@@ -254,7 +270,11 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) +@@ -242,6 +250,11 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) + pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI)) + xhci->quirks |= XHCI_MISSING_CAS; + ++ if (pdev->vendor == PCI_VENDOR_ID_INTEL && ++ (pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI || ++ pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_N_PCH_XHCI)) ++ xhci->quirks |= XHCI_RESET_TO_DEFAULT; ++ + if (pdev->vendor == PCI_VENDOR_ID_INTEL && + (pdev->device == PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_2C_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_4C_XHCI || +@@ -253,8 +266,7 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) + pdev->device == PCI_DEVICE_ID_INTEL_TITAN_RIDGE_DD_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_ICE_LAKE_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_TIGER_LAKE_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI || +- pdev->device == PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI)) -+ pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI || -+ pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_N_XHCI || -+ pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI || -+ pdev->device == PCI_DEVICE_ID_INTEL_RAPTOR_LAKE_XHCI || -+ pdev->device == PCI_DEVICE_ID_INTEL_METEOR_LAKE_XHCI)) ++ pdev->device == PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI)) xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW; if (pdev->vendor == PCI_VENDOR_ID_ETRON && -@@ -317,6 +337,17 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) +@@ -287,8 +299,14 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) + } + + if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && +- pdev->device == PCI_DEVICE_ID_ASMEDIA_1042_XHCI) ++ pdev->device == PCI_DEVICE_ID_ASMEDIA_1042_XHCI) { ++ /* ++ * try to tame the ASMedia 1042 controller which reports 0.96 ++ * but appears to behave more like 1.0 ++ */ ++ xhci->quirks |= XHCI_SPURIOUS_SUCCESS; + xhci->quirks |= XHCI_BROKEN_STREAMS; ++ } + if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && + pdev->device == PCI_DEVICE_ID_ASMEDIA_1042A_XHCI) { + xhci->quirks |= XHCI_TRUST_TX_LENGTH; +@@ -317,6 +335,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_4)) xhci->quirks |= XHCI_NO_SOFT_RETRY; -+ if (pdev->vendor == PCI_VENDOR_ID_AMD && -+ (pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_1 || -+ pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_2 || -+ pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_3 || -+ pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_4 || -+ pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_5 || -+ pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_6 || -+ pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_7 || -+ pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_8)) ++ /* xHC spec requires PCI devices to support D3hot and D3cold */ ++ if (xhci->hci_version >= 0x120) + xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW; + if (xhci->quirks & XHCI_RESET_ON_RESUME) xhci_dbg_trace(xhci, trace_xhci_dbg_quirks, "QUIRK: Resetting on resume"); +@@ -334,8 +356,38 @@ static void xhci_pme_acpi_rtd3_enable(struct pci_dev *dev) + NULL); + ACPI_FREE(obj); + } ++ ++static void xhci_find_lpm_incapable_ports(struct usb_hcd *hcd, struct usb_device *hdev) ++{ ++ struct xhci_hcd *xhci = hcd_to_xhci(hcd); ++ struct xhci_hub *rhub = &xhci->usb3_rhub; ++ int ret; ++ int i; ++ ++ /* This is not the usb3 roothub we are looking for */ ++ if (hcd != rhub->hcd) ++ return; ++ ++ if (hdev->maxchild > rhub->num_ports) { ++ dev_err(&hdev->dev, "USB3 roothub port number mismatch\n"); ++ return; ++ } ++ ++ for (i = 0; i < hdev->maxchild; i++) { ++ ret = usb_acpi_port_lpm_incapable(hdev, i); ++ ++ dev_dbg(&hdev->dev, "port-%d disable U1/U2 _DSM: %d\n", i + 1, ret); ++ ++ if (ret >= 0) { ++ rhub->ports[i]->lpm_incapable = ret; ++ continue; ++ } ++ } ++} ++ + #else + static void xhci_pme_acpi_rtd3_enable(struct pci_dev *dev) { } ++static void xhci_find_lpm_incapable_ports(struct usb_hcd *hcd, struct usb_device *hdev) { } + #endif /* CONFIG_ACPI */ + + /* called during probe() after chip reset completes */ +@@ -368,6 +420,16 @@ static int xhci_pci_setup(struct usb_hcd *hcd) + return xhci_pci_reinit(xhci, pdev); + } + ++static int xhci_pci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev, ++ struct usb_tt *tt, gfp_t mem_flags) ++{ ++ /* Check if acpi claims some USB3 roothub ports are lpm incapable */ ++ if (!hdev->parent) ++ xhci_find_lpm_incapable_ports(hcd, hdev); ++ ++ return xhci_update_hub_device(hcd, hdev, tt, mem_flags); ++} ++ + /* + * We need to register our own PCI probe function (instead of the USB core's + * function) in order to create a second roothub under xHCI. +@@ -437,6 +499,8 @@ static int xhci_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) + if (xhci->quirks & XHCI_DEFAULT_PM_RUNTIME_ALLOW) + pm_runtime_allow(&dev->dev); + ++ dma_set_max_seg_size(&dev->dev, UINT_MAX); ++ + return 0; + + put_usb3_hcd: diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index c1edcc9b13cec..972a44b2a7f12 100644 --- a/drivers/usb/host/xhci-plat.c @@ -288532,7 +345216,7 @@ index c1edcc9b13cec..972a44b2a7f12 100644 if (ret) return ret; diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c -index 311597bba80e2..f9707997969d4 100644 +index 311597bba80e2..220d836428d2a 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -366,7 +366,9 @@ static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci, @@ -288571,7 +345255,19 @@ index 311597bba80e2..f9707997969d4 100644 /* Section 4.6.1.2 of xHCI 1.0 spec says software should also time the * completion of the Command Abort operation. If CRR is not negated in 5 -@@ -1518,7 +1525,6 @@ static void xhci_handle_cmd_disable_slot(struct xhci_hcd *xhci, int slot_id) +@@ -1171,7 +1178,10 @@ static void xhci_kill_endpoint_urbs(struct xhci_hcd *xhci, + struct xhci_virt_ep *ep; + struct xhci_ring *ring; + +- ep = &xhci->devs[slot_id]->eps[ep_index]; ++ ep = xhci_get_virt_ep(xhci, slot_id, ep_index); ++ if (!ep) ++ return; ++ + if ((ep->ep_state & EP_HAS_STREAMS) || + (ep->ep_state & EP_GETTING_NO_STREAMS)) { + int stream_id; +@@ -1518,7 +1528,6 @@ static void xhci_handle_cmd_disable_slot(struct xhci_hcd *xhci, int slot_id) if (xhci->quirks & XHCI_EP_LIMIT_QUIRK) /* Delete default control endpoint resources */ xhci_free_device_endpoint_resources(xhci, virt_dev, true); @@ -288579,7 +345275,42 @@ index 311597bba80e2..f9707997969d4 100644 } static void xhci_handle_cmd_config_ep(struct xhci_hcd *xhci, int slot_id, -@@ -3135,6 +3141,7 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) +@@ -2518,7 +2527,7 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, + + switch (trb_comp_code) { + case COMP_SUCCESS: +- ep_ring->err_count = 0; ++ ep->err_count = 0; + /* handle success with untransferred data as short packet */ + if (ep_trb != td->last_trb || remaining) { + xhci_warn(xhci, "WARN Successful completion on short TX\n"); +@@ -2544,7 +2553,7 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, + break; + case COMP_USB_TRANSACTION_ERROR: + if (xhci->quirks & XHCI_NO_SOFT_RETRY || +- (ep_ring->err_count++ > MAX_SOFT_RETRY) || ++ (ep->err_count++ > MAX_SOFT_RETRY) || + le32_to_cpu(slot_ctx->tt_info) & TT_SLOT) + break; + +@@ -2625,8 +2634,14 @@ static int handle_tx_event(struct xhci_hcd *xhci, + case COMP_USB_TRANSACTION_ERROR: + case COMP_INVALID_STREAM_TYPE_ERROR: + case COMP_INVALID_STREAM_ID_ERROR: +- xhci_handle_halted_endpoint(xhci, ep, 0, NULL, +- EP_SOFT_RESET); ++ xhci_dbg(xhci, "Stream transaction error ep %u no id\n", ++ ep_index); ++ if (ep->err_count++ > MAX_SOFT_RETRY) ++ xhci_handle_halted_endpoint(xhci, ep, 0, NULL, ++ EP_HARD_RESET); ++ else ++ xhci_handle_halted_endpoint(xhci, ep, 0, NULL, ++ EP_SOFT_RESET); + goto cleanup; + case COMP_RING_UNDERRUN: + case COMP_RING_OVERRUN: +@@ -3135,6 +3150,7 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) if (event_loop++ < TRBS_PER_SEGMENT / 2) continue; xhci_update_erst_dequeue(xhci, event_ring_deq); @@ -288709,7 +345440,7 @@ index 1bf494b649bd2..bdb776553826b 100644 tegra_xusb_powergate_partitions(tegra); diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c -index 541fe4dcc43a2..8c7710698428c 100644 +index 541fe4dcc43a2..a982b53467644 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -65,7 +65,7 @@ static bool td_on_ring(struct xhci_td *td, struct xhci_ring *ring) @@ -288803,7 +345534,7 @@ index 541fe4dcc43a2..8c7710698428c 100644 spin_unlock_irq(&xhci->lock); xhci_cleanup_msix(xhci); -@@ -781,11 +782,22 @@ void xhci_shutdown(struct usb_hcd *hcd) +@@ -781,11 +782,28 @@ void xhci_shutdown(struct usb_hcd *hcd) if (xhci->quirks & XHCI_SPURIOUS_REBOOT) usb_disable_xhci_ports(to_pci_dev(hcd->self.sysdev)); @@ -288820,14 +345551,22 @@ index 541fe4dcc43a2..8c7710698428c 100644 + spin_lock_irq(&xhci->lock); xhci_halt(xhci); - /* Workaround for spurious wakeups at shutdown with HSW */ - if (xhci->quirks & XHCI_SPURIOUS_WAKEUP) +- /* Workaround for spurious wakeups at shutdown with HSW */ +- if (xhci->quirks & XHCI_SPURIOUS_WAKEUP) - xhci_reset(xhci); ++ ++ /* ++ * Workaround for spurious wakeps at shutdown with HSW, and for boot ++ * firmware delay in ADL-P PCH if port are left in U3 at shutdown ++ */ ++ if (xhci->quirks & XHCI_SPURIOUS_WAKEUP || ++ xhci->quirks & XHCI_RESET_TO_DEFAULT) + xhci_reset(xhci, XHCI_RESET_SHORT_USEC); ++ spin_unlock_irq(&xhci->lock); xhci_cleanup_msix(xhci); -@@ -1092,6 +1104,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) +@@ -1092,6 +1110,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) int retval = 0; bool comp_timer_running = false; bool pending_portevent = false; @@ -288835,7 +345574,7 @@ index 541fe4dcc43a2..8c7710698428c 100644 if (!hcd->state) return 0; -@@ -1108,10 +1121,11 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) +@@ -1108,10 +1127,11 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) set_bit(HCD_FLAG_HW_ACCESSIBLE, &xhci->shared_hcd->flags); spin_lock_irq(&xhci->lock); @@ -288850,7 +345589,7 @@ index 541fe4dcc43a2..8c7710698428c 100644 /* * Some controllers might lose power during suspend, so wait * for controller not ready bit to clear, just as in xHC init. -@@ -1144,12 +1158,18 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) +@@ -1144,12 +1164,18 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) spin_unlock_irq(&xhci->lock); return -ETIMEDOUT; } @@ -288872,7 +345611,7 @@ index 541fe4dcc43a2..8c7710698428c 100644 if ((xhci->quirks & XHCI_COMP_MODE_QUIRK) && !(xhci_all_ports_seen_u0(xhci))) { del_timer_sync(&xhci->comp_mode_recovery_timer); -@@ -1164,7 +1184,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) +@@ -1164,7 +1190,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) xhci_dbg(xhci, "Stop HCD\n"); xhci_halt(xhci); xhci_zero_64b_regs(xhci); @@ -288881,7 +345620,7 @@ index 541fe4dcc43a2..8c7710698428c 100644 spin_unlock_irq(&xhci->lock); if (retval) return retval; -@@ -1605,9 +1625,12 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag +@@ -1605,9 +1631,12 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag struct urb_priv *urb_priv; int num_tds; @@ -288896,7 +345635,7 @@ index 541fe4dcc43a2..8c7710698428c 100644 slot_id = urb->dev->slot_id; ep_index = xhci_get_endpoint_index(&urb->ep->desc); -@@ -3324,7 +3347,7 @@ static int xhci_check_streams_endpoint(struct xhci_hcd *xhci, +@@ -3324,7 +3353,7 @@ static int xhci_check_streams_endpoint(struct xhci_hcd *xhci, return -EINVAL; ret = xhci_check_args(xhci_to_hcd(xhci), udev, ep, 1, true, __func__); if (ret <= 0) @@ -288905,15 +345644,18 @@ index 541fe4dcc43a2..8c7710698428c 100644 if (usb_ss_max_streams(&ep->ss_ep_comp) == 0) { xhci_warn(xhci, "WARN: SuperSpeed Endpoint Companion" " descriptor for ep 0x%x does not support streams\n", -@@ -3935,7 +3958,6 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) +@@ -3933,9 +3962,9 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) + struct xhci_hcd *xhci = hcd_to_xhci(hcd); + struct xhci_virt_device *virt_dev; struct xhci_slot_ctx *slot_ctx; ++ unsigned long flags; int i, ret; -#ifndef CONFIG_USB_DEFAULT_PERSIST /* * We called pm_runtime_get_noresume when the device was attached. * Decrement the counter here to allow controller to runtime suspend -@@ -3943,7 +3965,6 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) +@@ -3943,7 +3972,6 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) */ if (xhci->quirks & XHCI_RESET_ON_RESUME) pm_runtime_put_noidle(hcd->self.controller); @@ -288921,7 +345663,7 @@ index 541fe4dcc43a2..8c7710698428c 100644 ret = xhci_check_args(hcd, udev, NULL, 0, true, __func__); /* If the host is halted due to driver unload, we still need to free the -@@ -3962,9 +3983,8 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) +@@ -3962,9 +3990,12 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) del_timer_sync(&virt_dev->eps[i].stop_cmd_timer); } virt_dev->udev = NULL; @@ -288929,11 +345671,15 @@ index 541fe4dcc43a2..8c7710698428c 100644 - if (ret) - xhci_free_virt_device(xhci, udev->slot_id); + xhci_disable_slot(xhci, udev->slot_id); ++ ++ spin_lock_irqsave(&xhci->lock, flags); + xhci_free_virt_device(xhci, udev->slot_id); ++ spin_unlock_irqrestore(&xhci->lock, flags); ++ } int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id) -@@ -3974,7 +3994,7 @@ int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id) +@@ -3974,7 +4005,7 @@ int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id) u32 state; int ret = 0; @@ -288942,7 +345688,7 @@ index 541fe4dcc43a2..8c7710698428c 100644 if (!command) return -ENOMEM; -@@ -3999,6 +4019,15 @@ int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id) +@@ -3999,6 +4030,15 @@ int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id) } xhci_ring_cmd_db(xhci); spin_unlock_irqrestore(&xhci->lock, flags); @@ -288958,7 +345704,7 @@ index 541fe4dcc43a2..8c7710698428c 100644 return ret; } -@@ -4095,23 +4124,20 @@ int xhci_alloc_dev(struct usb_hcd *hcd, struct usb_device *udev) +@@ -4095,23 +4135,20 @@ int xhci_alloc_dev(struct usb_hcd *hcd, struct usb_device *udev) xhci_debugfs_create_slot(xhci, slot_id); @@ -288984,7 +345730,7 @@ index 541fe4dcc43a2..8c7710698428c 100644 return 0; } -@@ -4241,6 +4267,7 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, +@@ -4241,6 +4278,7 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, mutex_unlock(&xhci->mutex); ret = xhci_disable_slot(xhci, udev->slot_id); @@ -288992,7 +345738,46 @@ index 541fe4dcc43a2..8c7710698428c 100644 if (!ret) xhci_alloc_dev(hcd, udev); kfree(command->completion); -@@ -5305,7 +5332,7 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks) +@@ -5013,6 +5051,7 @@ static int xhci_enable_usb3_lpm_timeout(struct usb_hcd *hcd, + struct usb_device *udev, enum usb3_link_state state) + { + struct xhci_hcd *xhci; ++ struct xhci_port *port; + u16 hub_encoded_timeout; + int mel; + int ret; +@@ -5026,6 +5065,13 @@ static int xhci_enable_usb3_lpm_timeout(struct usb_hcd *hcd, + !xhci->devs[udev->slot_id]) + return USB3_LPM_DISABLED; + ++ /* If connected to root port then check port can handle lpm */ ++ if (udev->parent && !udev->parent->parent) { ++ port = xhci->usb3_rhub.ports[udev->portnum - 1]; ++ if (port->lpm_incapable) ++ return USB3_LPM_DISABLED; ++ } ++ + hub_encoded_timeout = xhci_calculate_lpm_timeout(hcd, udev, state); + mel = calculate_max_exit_latency(udev, state, hub_encoded_timeout); + if (mel < 0) { +@@ -5085,7 +5131,7 @@ static int xhci_disable_usb3_lpm_timeout(struct usb_hcd *hcd, + /* Once a hub descriptor is fetched for a device, we need to update the xHC's + * internal data structures for the device. + */ +-static int xhci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev, ++int xhci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev, + struct usb_tt *tt, gfp_t mem_flags) + { + struct xhci_hcd *xhci = hcd_to_xhci(hcd); +@@ -5185,6 +5231,7 @@ static int xhci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev, + xhci_free_command(xhci, config_cmd); + return ret; + } ++EXPORT_SYMBOL_GPL(xhci_update_hub_device); + + static int xhci_get_frame(struct usb_hcd *hcd) + { +@@ -5305,7 +5352,7 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks) xhci_dbg(xhci, "Resetting HCD\n"); /* Reset the internal HC memory state and registers. */ @@ -289001,8 +345786,17 @@ index 541fe4dcc43a2..8c7710698428c 100644 if (retval) return retval; xhci_dbg(xhci, "Reset complete\n"); +@@ -5462,6 +5509,8 @@ void xhci_init_driver(struct hc_driver *drv, + drv->check_bandwidth = over->check_bandwidth; + if (over->reset_bandwidth) + drv->reset_bandwidth = over->reset_bandwidth; ++ if (over->update_hub_device) ++ drv->update_hub_device = over->update_hub_device; + } + } + EXPORT_SYMBOL_GPL(xhci_init_driver); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h -index 5a75fe5631238..3b39501f26ea9 100644 +index 5a75fe5631238..b8ad9676312bf 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -229,6 +229,9 @@ struct xhci_op_regs { @@ -289015,7 +345809,31 @@ index 5a75fe5631238..3b39501f26ea9 100644 /* IMAN - Interrupt Management Register */ #define IMAN_IE (1 << 1) #define IMAN_IP (1 << 0) -@@ -1827,7 +1830,7 @@ struct xhci_hcd { +@@ -930,6 +933,7 @@ struct xhci_virt_ep { + * have to restore the device state to the previous state + */ + struct xhci_ring *new_ring; ++ unsigned int err_count; + unsigned int ep_state; + #define SET_DEQ_PENDING (1 << 0) + #define EP_HALTED (1 << 1) /* For stall handling */ +@@ -1626,7 +1630,6 @@ struct xhci_ring { + * if we own the TRB (if we are the consumer). See section 4.9.1. + */ + u32 cycle_state; +- unsigned int err_count; + unsigned int stream_id; + unsigned int num_segs; + unsigned int num_trbs_free; +@@ -1734,6 +1737,7 @@ struct xhci_port { + int hcd_portnum; + struct xhci_hub *rhub; + struct xhci_port_cap *port_cap; ++ unsigned int lpm_incapable:1; + }; + + struct xhci_hub { +@@ -1827,7 +1831,7 @@ struct xhci_hcd { /* Host controller watchdog timer structures */ unsigned int xhc_state; @@ -289024,15 +345842,25 @@ index 5a75fe5631238..3b39501f26ea9 100644 u32 command; struct s3_save s3; /* Host controller is dying - not responding to commands. "I'm not dead yet!" -@@ -1900,6 +1903,7 @@ struct xhci_hcd { +@@ -1900,6 +1904,8 @@ struct xhci_hcd { #define XHCI_NO_SOFT_RETRY BIT_ULL(40) #define XHCI_BROKEN_D3COLD BIT_ULL(41) #define XHCI_EP_CTX_BROKEN_DCS BIT_ULL(42) +#define XHCI_SUSPEND_RESUME_CLKS BIT_ULL(43) ++#define XHCI_RESET_TO_DEFAULT BIT_ULL(44) unsigned int num_active_eps; unsigned int limit_active_eps; -@@ -2083,11 +2087,11 @@ void xhci_free_container_ctx(struct xhci_hcd *xhci, +@@ -1943,6 +1949,8 @@ struct xhci_driver_overrides { + struct usb_host_endpoint *ep); + int (*check_bandwidth)(struct usb_hcd *, struct usb_device *); + void (*reset_bandwidth)(struct usb_hcd *, struct usb_device *); ++ int (*update_hub_device)(struct usb_hcd *hcd, struct usb_device *hdev, ++ struct usb_tt *tt, gfp_t mem_flags); + }; + + #define XHCI_CFC_DELAY 10 +@@ -2083,11 +2091,11 @@ void xhci_free_container_ctx(struct xhci_hcd *xhci, /* xHCI host controller glue */ typedef void (*xhci_get_quirks_t)(struct device *, struct xhci_hcd *); @@ -289046,7 +345874,16 @@ index 5a75fe5631238..3b39501f26ea9 100644 int xhci_run(struct usb_hcd *hcd); int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks); void xhci_shutdown(struct usb_hcd *hcd); -@@ -2390,7 +2394,7 @@ static inline const char *xhci_decode_trb(char *str, size_t size, +@@ -2099,6 +2107,8 @@ int xhci_drop_endpoint(struct usb_hcd *hcd, struct usb_device *udev, + struct usb_host_endpoint *ep); + int xhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev); + void xhci_reset_bandwidth(struct usb_hcd *hcd, struct usb_device *udev); ++int xhci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev, ++ struct usb_tt *tt, gfp_t mem_flags); + int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id); + int xhci_ext_cap_init(struct xhci_hcd *xhci); + +@@ -2390,7 +2400,7 @@ static inline const char *xhci_decode_trb(char *str, size_t size, field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_STOP_RING: @@ -289055,7 +345892,7 @@ index 5a75fe5631238..3b39501f26ea9 100644 "%s: slot %d sp %d ep %d flags %c", xhci_trb_type_string(type), TRB_TO_SLOT_ID(field3), -@@ -2467,6 +2471,8 @@ static inline const char *xhci_decode_ctrl_ctx(char *str, +@@ -2467,6 +2477,8 @@ static inline const char *xhci_decode_ctrl_ctx(char *str, unsigned int bit; int ret = 0; @@ -289064,7 +345901,7 @@ index 5a75fe5631238..3b39501f26ea9 100644 if (drop) { ret = sprintf(str, "Drop:"); for_each_set_bit(bit, &drop, 32) -@@ -2624,8 +2630,11 @@ static inline const char *xhci_decode_usbsts(char *str, u32 usbsts) +@@ -2624,8 +2636,11 @@ static inline const char *xhci_decode_usbsts(char *str, u32 usbsts) { int ret = 0; @@ -289156,7 +345993,7 @@ index e9437a176518a..ea39243efee39 100644 dev_dbg(&dev->interface->dev, "read %d bytes fingerprint data\n", bytes_read); diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c -index efbd317f2f252..988a8c02e7e24 100644 +index efbd317f2f252..b421f13260875 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -99,10 +99,6 @@ struct iowarrior { @@ -289188,6 +346025,15 @@ index efbd317f2f252..988a8c02e7e24 100644 } /*---------------------*/ +@@ -818,7 +814,7 @@ static int iowarrior_probe(struct usb_interface *interface, + break; + + case USB_DEVICE_ID_CODEMERCS_IOW100: +- dev->report_size = 13; ++ dev->report_size = 12; + break; + } + } diff --git a/drivers/usb/misc/uss720.c b/drivers/usb/misc/uss720.c index 748139d262633..0be8efcda15d5 100644 --- a/drivers/usb/misc/uss720.c @@ -289369,7 +346215,7 @@ index 8de143807c1ae..70693cae83efb 100644 comment "MUSB DMA mode" diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c -index 98c0f4c1bffd9..dc67fff8e9418 100644 +index 98c0f4c1bffd9..22c3df49ba8af 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -760,6 +760,9 @@ static void rxstate(struct musb *musb, struct musb_request *req) @@ -289395,6 +346241,15 @@ index 98c0f4c1bffd9..dc67fff8e9418 100644 } unlock: +@@ -1623,8 +1628,6 @@ static int musb_gadget_vbus_draw(struct usb_gadget *gadget, unsigned mA) + { + struct musb *musb = gadget_to_musb(gadget); + +- if (!musb->xceiv->set_power) +- return -EOPNOTSUPP; + return usb_phy_set_power(musb->xceiv, mA); + } + diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index f086960fe2b50..bd1de5c4c4340 100644 --- a/drivers/usb/musb/omap2430.c @@ -289471,6 +346326,25 @@ index 24de64edb674b..2d77edefb4b30 100644 if (freq_usb == 0) { if (freq_extal == 12000000) { /* Select 12MHz XTAL */ +diff --git a/drivers/usb/roles/class.c b/drivers/usb/roles/class.c +index dfaed7eee94fc..32e6d19f7011a 100644 +--- a/drivers/usb/roles/class.c ++++ b/drivers/usb/roles/class.c +@@ -106,10 +106,13 @@ usb_role_switch_is_parent(struct fwnode_handle *fwnode) + struct fwnode_handle *parent = fwnode_get_parent(fwnode); + struct device *dev; + +- if (!parent || !fwnode_property_present(parent, "usb-role-switch")) ++ if (!fwnode_property_present(parent, "usb-role-switch")) { ++ fwnode_handle_put(parent); + return NULL; ++ } + + dev = class_find_device_by_fwnode(role_class, parent); ++ fwnode_handle_put(parent); + return dev ? to_role_switch(dev) : ERR_PTR(-EPROBE_DEFER); + } + diff --git a/drivers/usb/serial/Kconfig b/drivers/usb/serial/Kconfig index de5c012570603..ef8d1c73c7545 100644 --- a/drivers/usb/serial/Kconfig @@ -289546,7 +346420,7 @@ index 2db917eab7995..752daa952abd6 100644 r = ch341_control_out(dev, CH341_REQ_SERIAL_INIT, 0, 0); if (r < 0) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c -index 189279869a8b0..a2126b07e854a 100644 +index 189279869a8b0..fbdebf7e5502d 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -51,6 +51,7 @@ static void cp210x_enable_event_mode(struct usb_serial_port *port); @@ -289557,7 +346431,15 @@ index 189279869a8b0..a2126b07e854a 100644 { USB_DEVICE(0x045B, 0x0053) }, /* Renesas RX610 RX-Stick */ { USB_DEVICE(0x0471, 0x066A) }, /* AKTAKOM ACE-1001 cable */ { USB_DEVICE(0x0489, 0xE000) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */ -@@ -68,6 +69,7 @@ static const struct usb_device_id id_table[] = { +@@ -59,6 +60,7 @@ static const struct usb_device_id id_table[] = { + { USB_DEVICE(0x0846, 0x1100) }, /* NetGear Managed Switch M4100 series, M5300 series, M7100 series */ + { USB_DEVICE(0x08e6, 0x5501) }, /* Gemalto Prox-PU/CU contactless smartcard reader */ + { USB_DEVICE(0x08FD, 0x000A) }, /* Digianswer A/S , ZigBee/802.15.4 MAC Device */ ++ { USB_DEVICE(0x0908, 0x0070) }, /* Siemens SCALANCE LPE-9000 USB Serial Console */ + { USB_DEVICE(0x0908, 0x01FF) }, /* Siemens RUGGEDCOM USB Serial Console */ + { USB_DEVICE(0x0988, 0x0578) }, /* Teraoka AD2000 */ + { USB_DEVICE(0x0B00, 0x3070) }, /* Ingenico 3070 */ +@@ -68,6 +70,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x0FCF, 0x1004) }, /* Dynastream ANT2USB */ { USB_DEVICE(0x0FCF, 0x1006) }, /* Dynastream ANT development board */ { USB_DEVICE(0x0FDE, 0xCA05) }, /* OWL Wireless Electricity Monitor CM-160 */ @@ -289565,7 +346447,7 @@ index 189279869a8b0..a2126b07e854a 100644 { USB_DEVICE(0x10A6, 0xAA26) }, /* Knock-off DCU-11 cable */ { USB_DEVICE(0x10AB, 0x10C5) }, /* Siemens MC60 Cable */ { USB_DEVICE(0x10B5, 0xAC70) }, /* Nokia CA-42 USB */ -@@ -128,6 +130,7 @@ static const struct usb_device_id id_table[] = { +@@ -128,6 +131,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x83AA) }, /* Mark-10 Digital Force Gauge */ { USB_DEVICE(0x10C4, 0x83D8) }, /* DekTec DTA Plus VHF/UHF Booster/Attenuator */ { USB_DEVICE(0x10C4, 0x8411) }, /* Kyocera GPS Module */ @@ -289573,16 +346455,18 @@ index 189279869a8b0..a2126b07e854a 100644 { USB_DEVICE(0x10C4, 0x8418) }, /* IRZ Automation Teleport SG-10 GSM/GPRS Modem */ { USB_DEVICE(0x10C4, 0x846E) }, /* BEI USB Sensor Interface (VCP) */ { USB_DEVICE(0x10C4, 0x8470) }, /* Juniper Networks BX Series System Console */ -@@ -192,6 +195,8 @@ static const struct usb_device_id id_table[] = { +@@ -192,6 +196,10 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x16DC, 0x0015) }, /* W-IE-NE-R Plein & Baus GmbH CML Control, Monitoring and Data Logger */ { USB_DEVICE(0x17A8, 0x0001) }, /* Kamstrup Optical Eye/3-wire */ { USB_DEVICE(0x17A8, 0x0005) }, /* Kamstrup M-Bus Master MultiPort 250D */ ++ { USB_DEVICE(0x17A8, 0x0011) }, /* Kamstrup 444 MHz RF sniffer */ ++ { USB_DEVICE(0x17A8, 0x0013) }, /* Kamstrup 870 MHz RF sniffer */ + { USB_DEVICE(0x17A8, 0x0101) }, /* Kamstrup 868 MHz wM-Bus C-Mode Meter Reader (Int Ant) */ + { USB_DEVICE(0x17A8, 0x0102) }, /* Kamstrup 868 MHz wM-Bus C-Mode Meter Reader (Ext Ant) */ { USB_DEVICE(0x17F4, 0xAAAA) }, /* Wavesense Jazz blood glucose meter */ { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */ { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */ -@@ -1682,6 +1687,8 @@ static int cp2105_gpioconf_init(struct usb_serial *serial) +@@ -1682,6 +1690,8 @@ static int cp2105_gpioconf_init(struct usb_serial *serial) /* 2 banks of GPIO - One for the pins taken from each serial port */ if (intf_num == 0) { @@ -289591,7 +346475,7 @@ index 189279869a8b0..a2126b07e854a 100644 if (mode.eci == CP210X_PIN_MODE_MODEM) { /* mark all GPIOs of this interface as reserved */ priv->gpio_altfunc = 0xff; -@@ -1692,8 +1699,9 @@ static int cp2105_gpioconf_init(struct usb_serial *serial) +@@ -1692,8 +1702,9 @@ static int cp2105_gpioconf_init(struct usb_serial *serial) priv->gpio_pushpull = (u8)((le16_to_cpu(config.gpio_mode) & CP210X_ECI_GPIO_MODE_MASK) >> CP210X_ECI_GPIO_MODE_OFFSET); @@ -289602,7 +346486,7 @@ index 189279869a8b0..a2126b07e854a 100644 if (mode.sci == CP210X_PIN_MODE_MODEM) { /* mark all GPIOs of this interface as reserved */ priv->gpio_altfunc = 0xff; -@@ -1704,7 +1712,6 @@ static int cp2105_gpioconf_init(struct usb_serial *serial) +@@ -1704,7 +1715,6 @@ static int cp2105_gpioconf_init(struct usb_serial *serial) priv->gpio_pushpull = (u8)((le16_to_cpu(config.gpio_mode) & CP210X_SCI_GPIO_MODE_MASK) >> CP210X_SCI_GPIO_MODE_OFFSET); @@ -289610,6 +346494,68 @@ index 189279869a8b0..a2126b07e854a 100644 } else { return -ENODEV; } +diff --git a/drivers/usb/serial/f81232.c b/drivers/usb/serial/f81232.c +index a7a7af8d05bff..e04bdb3082657 100644 +--- a/drivers/usb/serial/f81232.c ++++ b/drivers/usb/serial/f81232.c +@@ -130,9 +130,6 @@ static u8 const clock_table[] = { F81232_CLK_1_846_MHZ, F81232_CLK_14_77_MHZ, + + static int calc_baud_divisor(speed_t baudrate, speed_t clockrate) + { +- if (!baudrate) +- return 0; +- + return DIV_ROUND_CLOSEST(clockrate, baudrate); + } + +@@ -519,9 +516,14 @@ static void f81232_set_baudrate(struct tty_struct *tty, + speed_t baud_list[] = { baudrate, old_baudrate, F81232_DEF_BAUDRATE }; + + for (i = 0; i < ARRAY_SIZE(baud_list); ++i) { +- idx = f81232_find_clk(baud_list[i]); ++ baudrate = baud_list[i]; ++ if (baudrate == 0) { ++ tty_encode_baud_rate(tty, 0, 0); ++ return; ++ } ++ ++ idx = f81232_find_clk(baudrate); + if (idx >= 0) { +- baudrate = baud_list[i]; + tty_encode_baud_rate(tty, baudrate, baudrate); + break; + } +diff --git a/drivers/usb/serial/f81534.c b/drivers/usb/serial/f81534.c +index c0bca52ef92aa..556d4e0dda873 100644 +--- a/drivers/usb/serial/f81534.c ++++ b/drivers/usb/serial/f81534.c +@@ -536,9 +536,6 @@ static int f81534_submit_writer(struct usb_serial_port *port, gfp_t mem_flags) + + static u32 f81534_calc_baud_divisor(u32 baudrate, u32 clockrate) + { +- if (!baudrate) +- return 0; +- + /* Round to nearest divisor */ + return DIV_ROUND_CLOSEST(clockrate, baudrate); + } +@@ -568,9 +565,14 @@ static int f81534_set_port_config(struct usb_serial_port *port, + u32 baud_list[] = {baudrate, old_baudrate, F81534_DEFAULT_BAUD_RATE}; + + for (i = 0; i < ARRAY_SIZE(baud_list); ++i) { +- idx = f81534_find_clk(baud_list[i]); ++ baudrate = baud_list[i]; ++ if (baudrate == 0) { ++ tty_encode_baud_rate(tty, 0, 0); ++ return 0; ++ } ++ ++ idx = f81534_find_clk(baudrate); + if (idx >= 0) { +- baudrate = baud_list[i]; + tty_encode_baud_rate(tty, baudrate, baudrate); + break; + } diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 99d19828dae6d..49448cdbe9985 100644 --- a/drivers/usb/serial/ftdi_sio.c @@ -289789,10 +346735,19 @@ index 87b89c99d5177..1cfcd805f2868 100644 return -ENOMEM; diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c -index a484ff5e4ebf8..697683e3fbffa 100644 +index a484ff5e4ebf8..a8534065e0d6d 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c -@@ -198,6 +198,8 @@ static void option_instat_callback(struct urb *urb); +@@ -162,6 +162,8 @@ static void option_instat_callback(struct urb *urb); + #define NOVATELWIRELESS_PRODUCT_G2 0xA010 + #define NOVATELWIRELESS_PRODUCT_MC551 0xB001 + ++#define UBLOX_VENDOR_ID 0x1546 ++ + /* AMOI PRODUCTS */ + #define AMOI_VENDOR_ID 0x1614 + #define AMOI_PRODUCT_H01 0x0800 +@@ -198,6 +200,8 @@ static void option_instat_callback(struct urb *urb); #define DELL_PRODUCT_5821E 0x81d7 #define DELL_PRODUCT_5821E_ESIM 0x81e0 @@ -289801,22 +346756,46 @@ index a484ff5e4ebf8..697683e3fbffa 100644 #define KYOCERA_VENDOR_ID 0x0c88 #define KYOCERA_PRODUCT_KPC650 0x17da -@@ -250,10 +252,14 @@ static void option_instat_callback(struct urb *urb); +@@ -238,7 +242,6 @@ static void option_instat_callback(struct urb *urb); + #define QUECTEL_PRODUCT_UC15 0x9090 + /* These u-blox products use Qualcomm's vendor ID */ + #define UBLOX_PRODUCT_R410M 0x90b2 +-#define UBLOX_PRODUCT_R6XX 0x90fa + /* These Yuga products use Qualcomm's vendor ID */ + #define YUGA_PRODUCT_CLM920_NC5 0x9625 + +@@ -250,10 +253,21 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_EG95 0x0195 #define QUECTEL_PRODUCT_BG96 0x0296 #define QUECTEL_PRODUCT_EP06 0x0306 +#define QUECTEL_PRODUCT_EM05G 0x030a +#define QUECTEL_PRODUCT_EM060K 0x030b ++#define QUECTEL_PRODUCT_EM05G_CS 0x030c ++#define QUECTEL_PRODUCT_EM05CN_SG 0x0310 ++#define QUECTEL_PRODUCT_EM05G_SG 0x0311 ++#define QUECTEL_PRODUCT_EM05CN 0x0312 ++#define QUECTEL_PRODUCT_EM05G_GR 0x0313 ++#define QUECTEL_PRODUCT_EM05G_RS 0x0314 #define QUECTEL_PRODUCT_EM12 0x0512 #define QUECTEL_PRODUCT_RM500Q 0x0800 +#define QUECTEL_PRODUCT_RM520N 0x0801 ++#define QUECTEL_PRODUCT_EC200U 0x0901 #define QUECTEL_PRODUCT_EC200S_CN 0x6002 #define QUECTEL_PRODUCT_EC200T 0x6026 +#define QUECTEL_PRODUCT_RM500K 0x7001 #define CMOTECH_VENDOR_ID 0x16d8 #define CMOTECH_PRODUCT_6001 0x6001 -@@ -430,6 +436,12 @@ static void option_instat_callback(struct urb *urb); +@@ -388,6 +402,8 @@ static void option_instat_callback(struct urb *urb); + #define LONGCHEER_VENDOR_ID 0x1c9e + + /* 4G Systems products */ ++/* This one was sold as the VW and Skoda "Carstick LTE" */ ++#define FOUR_G_SYSTEMS_PRODUCT_CARSTICK_LTE 0x7605 + /* This is the 4G XS Stick W14 a.k.a. Mobilcom Debitel Surf-Stick * + * It seems to contain a Qualcomm QSC6240/6290 chipset */ + #define FOUR_G_SYSTEMS_PRODUCT_W14 0x9603 +@@ -430,6 +446,12 @@ static void option_instat_callback(struct urb *urb); #define CINTERION_PRODUCT_CLS8 0x00b0 #define CINTERION_PRODUCT_MV31_MBIM 0x00b3 #define CINTERION_PRODUCT_MV31_RMNET 0x00b7 @@ -289829,7 +346808,7 @@ index a484ff5e4ebf8..697683e3fbffa 100644 /* Olivetti products */ #define OLIVETTI_VENDOR_ID 0x0b3c -@@ -565,6 +577,10 @@ static void option_instat_callback(struct urb *urb); +@@ -565,6 +587,13 @@ static void option_instat_callback(struct urb *urb); #define WETELECOM_PRODUCT_6802 0x6802 #define WETELECOM_PRODUCT_WMD300 0x6803 @@ -289837,10 +346816,13 @@ index a484ff5e4ebf8..697683e3fbffa 100644 +#define OPPO_VENDOR_ID 0x22d9 +#define OPPO_PRODUCT_R11 0x276c + ++/* Sierra Wireless products */ ++#define SIERRA_VENDOR_ID 0x1199 ++#define SIERRA_PRODUCT_EM9191 0x90d3 /* Device flags */ -@@ -1063,6 +1079,10 @@ static const struct usb_device_id option_ids[] = { +@@ -1063,6 +1092,10 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(0) | RSVD(1) | RSVD(6) }, { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5821E_ESIM), .driver_info = RSVD(0) | RSVD(1) | RSVD(6) }, @@ -289851,7 +346833,25 @@ index a484ff5e4ebf8..697683e3fbffa 100644 { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_E100A) }, /* ADU-E100, ADU-310 */ { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_500A) }, { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_620UW) }, -@@ -1119,22 +1139,35 @@ static const struct usb_device_id option_ids[] = { +@@ -1104,8 +1137,16 @@ static const struct usb_device_id option_ids[] = { + /* u-blox products using Qualcomm vendor ID */ + { USB_DEVICE(QUALCOMM_VENDOR_ID, UBLOX_PRODUCT_R410M), + .driver_info = RSVD(1) | RSVD(3) }, +- { USB_DEVICE(QUALCOMM_VENDOR_ID, UBLOX_PRODUCT_R6XX), ++ { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x908b), /* u-blox LARA-R6 00B */ ++ .driver_info = RSVD(4) }, ++ { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x90fa), + .driver_info = RSVD(3) }, ++ /* u-blox products */ ++ { USB_DEVICE(UBLOX_VENDOR_ID, 0x1341) }, /* u-blox LARA-L6 */ ++ { USB_DEVICE(UBLOX_VENDOR_ID, 0x1342), /* u-blox LARA-L6 (RMNET) */ ++ .driver_info = RSVD(4) }, ++ { USB_DEVICE(UBLOX_VENDOR_ID, 0x1343), /* u-blox LARA-L6 (ECM) */ ++ .driver_info = RSVD(4) }, + /* Quectel products using Quectel vendor ID */ + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21, 0xff, 0xff, 0xff), + .driver_info = NUMEP2 }, +@@ -1119,22 +1160,48 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EG95, 0xff, 0xff, 0xff), .driver_info = NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EG95, 0xff, 0, 0) }, @@ -289862,8 +346862,20 @@ index a484ff5e4ebf8..697683e3fbffa 100644 { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0xff, 0xff), .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) }, ++ { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05CN, 0xff), ++ .driver_info = RSVD(6) | ZLP }, ++ { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05CN_SG, 0xff), ++ .driver_info = RSVD(6) | ZLP }, + { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G, 0xff), + .driver_info = RSVD(6) | ZLP }, ++ { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_GR, 0xff), ++ .driver_info = RSVD(6) | ZLP }, ++ { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_CS, 0xff), ++ .driver_info = RSVD(6) | ZLP }, ++ { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_RS, 0xff), ++ .driver_info = RSVD(6) | ZLP }, ++ { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_SG, 0xff), ++ .driver_info = RSVD(6) | ZLP }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0xff, 0x40) }, @@ -289881,13 +346893,14 @@ index a484ff5e4ebf8..697683e3fbffa 100644 + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM520N, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM520N, 0xff, 0, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM520N, 0xff, 0, 0) }, ++ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200U, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200S_CN, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200T, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500K, 0xff, 0x00, 0x00) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, -@@ -1211,6 +1244,10 @@ static const struct usb_device_id option_ids[] = { +@@ -1211,6 +1278,10 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1056, 0xff), /* Telit FD980 */ .driver_info = NCTRL(2) | RSVD(3) }, @@ -289898,7 +346911,7 @@ index a484ff5e4ebf8..697683e3fbffa 100644 { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1060, 0xff), /* Telit LN920 (rmnet) */ .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1061, 0xff), /* Telit LN920 (MBIM) */ -@@ -1219,6 +1256,16 @@ static const struct usb_device_id option_ids[] = { +@@ -1219,6 +1290,16 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(2) | RSVD(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1063, 0xff), /* Telit LN920 (ECM) */ .driver_info = NCTRL(0) | RSVD(1) }, @@ -289915,7 +346928,7 @@ index a484ff5e4ebf8..697683e3fbffa 100644 { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM), -@@ -1253,6 +1300,7 @@ static const struct usb_device_id option_ids[] = { +@@ -1253,6 +1334,7 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1231, 0xff), /* Telit LE910Cx (RNDIS) */ .driver_info = NCTRL(2) | RSVD(3) }, @@ -289923,7 +346936,7 @@ index a484ff5e4ebf8..697683e3fbffa 100644 { USB_DEVICE(TELIT_VENDOR_ID, 0x1260), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, { USB_DEVICE(TELIT_VENDOR_ID, 0x1261), -@@ -1265,8 +1313,16 @@ static const struct usb_device_id option_ids[] = { +@@ -1265,8 +1347,16 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(2) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x7011, 0xff), /* Telit LE910-S1 (ECM) */ .driver_info = NCTRL(2) }, @@ -289940,7 +346953,7 @@ index a484ff5e4ebf8..697683e3fbffa 100644 { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0002, 0xff, 0xff, 0xff), .driver_info = RSVD(1) }, -@@ -1639,6 +1695,8 @@ static const struct usb_device_id option_ids[] = { +@@ -1639,6 +1729,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(2) }, { USB_DEVICE_INTERFACE_CLASS(ZTE_VENDOR_ID, 0x1476, 0xff) }, /* GosunCn ZTE WeLink ME3630 (ECM/NCM mode) */ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1481, 0xff, 0x00, 0x00) }, /* ZTE MF871A */ @@ -289949,7 +346962,16 @@ index a484ff5e4ebf8..697683e3fbffa 100644 { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1533, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1534, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1535, 0xff, 0xff, 0xff) }, -@@ -1945,6 +2003,18 @@ static const struct usb_device_id option_ids[] = { +@@ -1886,6 +1978,8 @@ static const struct usb_device_id option_ids[] = { + .driver_info = RSVD(2) }, + { USB_DEVICE(AIRPLUS_VENDOR_ID, AIRPLUS_PRODUCT_MCD650) }, + { USB_DEVICE(TLAYTECH_VENDOR_ID, TLAYTECH_PRODUCT_TEU800) }, ++ { USB_DEVICE(LONGCHEER_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_CARSTICK_LTE), ++ .driver_info = RSVD(0) }, + { USB_DEVICE(LONGCHEER_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_W14), + .driver_info = NCTRL(0) | NCTRL(1) }, + { USB_DEVICE(LONGCHEER_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_W100), +@@ -1945,6 +2039,18 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(3)}, { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV31_RMNET, 0xff), .driver_info = RSVD(0)}, @@ -289968,7 +346990,7 @@ index a484ff5e4ebf8..697683e3fbffa 100644 { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD100), .driver_info = RSVD(4) }, { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD120), -@@ -2087,17 +2157,25 @@ static const struct usb_device_id option_ids[] = { +@@ -2087,17 +2193,28 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(3) }, { USB_DEVICE(0x1508, 0x1001), /* Fibocom NL668 (IOT version) */ .driver_info = RSVD(4) | RSVD(5) | RSVD(6) }, @@ -289982,6 +347004,7 @@ index a484ff5e4ebf8..697683e3fbffa 100644 + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x010a, 0xff) }, /* Fibocom MA510 (ECM mode) */ { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0xff, 0x30) }, /* Fibocom FG150 Diag */ { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0, 0) }, /* Fibocom FG150 AT */ ++ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0111, 0xff) }, /* Fibocom FM160 (MBIM mode) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) }, /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */ + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a2, 0xff) }, /* Fibocom FM101-GL (laptop MBIM) */ + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a4, 0xff), /* Fibocom FM101-GL (laptop MBIM) */ @@ -289991,6 +347014,8 @@ index a484ff5e4ebf8..697683e3fbffa 100644 { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1405, 0xff) }, /* GosunCn GM500 MBIM */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1406, 0xff) }, /* GosunCn GM500 ECM/NCM */ + { USB_DEVICE_AND_INTERFACE_INFO(OPPO_VENDOR_ID, OPPO_PRODUCT_R11, 0xff, 0xff, 0x30) }, ++ { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x30) }, ++ { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0, 0) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); @@ -290196,6 +347221,19 @@ index da65d14c9ed5e..06aad0d727ddc 100644 } } exit: +diff --git a/drivers/usb/storage/alauda.c b/drivers/usb/storage/alauda.c +index 20b857e97e60c..7e4ce0e7e05a7 100644 +--- a/drivers/usb/storage/alauda.c ++++ b/drivers/usb/storage/alauda.c +@@ -438,6 +438,8 @@ static int alauda_init_media(struct us_data *us) + + MEDIA_INFO(us).blockshift + MEDIA_INFO(us).pageshift); + MEDIA_INFO(us).pba_to_lba = kcalloc(num_zones, sizeof(u16*), GFP_NOIO); + MEDIA_INFO(us).lba_to_pba = kcalloc(num_zones, sizeof(u16*), GFP_NOIO); ++ if (MEDIA_INFO(us).pba_to_lba == NULL || MEDIA_INFO(us).lba_to_pba == NULL) ++ return USB_STOR_TRANSPORT_ERROR; + + if (alauda_reset_media(us) != USB_STOR_XFER_GOOD) + return USB_STOR_TRANSPORT_ERROR; diff --git a/drivers/usb/storage/ene_ub6250.c b/drivers/usb/storage/ene_ub6250.c index 5f7d678502be4..6012603f3630e 100644 --- a/drivers/usb/storage/ene_ub6250.c @@ -290576,6 +347614,30 @@ index 3789698d9d3c6..0c423916d7bfa 100644 usb_stor_dbg(us, "addr = 0x%x, len = %d\n", addr, len); +diff --git a/drivers/usb/storage/uas-detect.h b/drivers/usb/storage/uas-detect.h +index 3f720faa6f97c..d73282c0ec501 100644 +--- a/drivers/usb/storage/uas-detect.h ++++ b/drivers/usb/storage/uas-detect.h +@@ -116,6 +116,19 @@ static int uas_use_uas_driver(struct usb_interface *intf, + if (le16_to_cpu(udev->descriptor.idVendor) == 0x0bc2) + flags |= US_FL_NO_ATA_1X; + ++ /* ++ * RTL9210-based enclosure from HIKSEMI, MD202 reportedly have issues ++ * with UAS. This isn't distinguishable with just idVendor and ++ * idProduct, use manufacturer and product too. ++ * ++ * Reported-by: Hongling Zeng <zenghongling@kylinos.cn> ++ */ ++ if (le16_to_cpu(udev->descriptor.idVendor) == 0x0bda && ++ le16_to_cpu(udev->descriptor.idProduct) == 0x9210 && ++ (udev->manufacturer && !strcmp(udev->manufacturer, "HIKSEMI")) && ++ (udev->product && !strcmp(udev->product, "MD202"))) ++ flags |= US_FL_IGNORE_UAS; ++ + usb_stor_adjust_quirks(udev, &flags); + + if (flags & US_FL_IGNORE_UAS) { diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index c6b3fcf901805..20dcbccb290b3 100644 --- a/drivers/usb/storage/unusual_devs.h @@ -290642,7 +347704,7 @@ index c6b3fcf901805..20dcbccb290b3 100644 "ST", "2A", diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h -index 4051c8cd0cd8a..251778d14e2dd 100644 +index 4051c8cd0cd8a..c7b763d6d1023 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -52,6 +52,13 @@ UNUSUAL_DEV(0x059f, 0x1061, 0x0000, 0x9999, @@ -290673,21 +347735,7 @@ index 4051c8cd0cd8a..251778d14e2dd 100644 /* Reported-by: David Webb <djw@noc.ac.uk> */ UNUSUAL_DEV(0x0bc2, 0x331a, 0x0000, 0x9999, "Seagate", -@@ -69,6 +83,13 @@ UNUSUAL_DEV(0x0bc2, 0x331a, 0x0000, 0x9999, - USB_SC_DEVICE, USB_PR_DEVICE, NULL, - US_FL_NO_REPORT_LUNS), - -+/* Reported-by: Hongling Zeng <zenghongling@kylinos.cn> */ -+UNUSUAL_DEV(0x0bda, 0x9210, 0x0000, 0x9999, -+ "Hiksemi", -+ "External HDD", -+ USB_SC_DEVICE, USB_PR_DEVICE, NULL, -+ US_FL_IGNORE_UAS), -+ - /* Reported-by: Benjamin Tissoires <benjamin.tissoires@redhat.com> */ - UNUSUAL_DEV(0x13fd, 0x3940, 0x0000, 0x9999, - "Initio Corporation", -@@ -111,6 +132,13 @@ UNUSUAL_DEV(0x154b, 0xf00d, 0x0000, 0x9999, +@@ -111,6 +125,13 @@ UNUSUAL_DEV(0x154b, 0xf00d, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NO_ATA_1X), @@ -290718,7 +347766,7 @@ index a0418f23b4aae..ab480f38523aa 100644 Say Y or M here if your system has STMicroelectronics STUSB160x Type-C port controller. diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c -index b7f094435b00a..998c1e3e318e1 100644 +index b7f094435b00a..c232a735a0c2f 100644 --- a/drivers/usb/typec/altmodes/displayport.c +++ b/drivers/usb/typec/altmodes/displayport.c @@ -88,8 +88,8 @@ static int dp_altmode_configure(struct dp_altmode *dp, u8 con) @@ -290732,8 +347780,79 @@ index b7f094435b00a..998c1e3e318e1 100644 break; default: break; +@@ -418,6 +418,18 @@ static const char * const pin_assignments[] = { + [DP_PIN_ASSIGN_F] = "F", + }; + ++/* ++ * Helper function to extract a peripheral's currently supported ++ * Pin Assignments from its DisplayPort alternate mode state. ++ */ ++static u8 get_current_pin_assignments(struct dp_altmode *dp) ++{ ++ if (DP_CONF_CURRENTLY(dp->data.conf) == DP_CONF_DFP_D) ++ return DP_CAP_PIN_ASSIGN_DFP_D(dp->alt->vdo); ++ else ++ return DP_CAP_PIN_ASSIGN_UFP_D(dp->alt->vdo); ++} ++ + static ssize_t + pin_assignment_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +@@ -444,10 +456,7 @@ pin_assignment_store(struct device *dev, struct device_attribute *attr, + goto out_unlock; + } + +- if (DP_CONF_CURRENTLY(dp->data.conf) == DP_CONF_DFP_D) +- assignments = DP_CAP_UFP_D_PIN_ASSIGN(dp->alt->vdo); +- else +- assignments = DP_CAP_DFP_D_PIN_ASSIGN(dp->alt->vdo); ++ assignments = get_current_pin_assignments(dp); + + if (!(DP_CONF_GET_PIN_ASSIGN(conf) & assignments)) { + ret = -EINVAL; +@@ -484,10 +493,7 @@ static ssize_t pin_assignment_show(struct device *dev, + + cur = get_count_order(DP_CONF_GET_PIN_ASSIGN(dp->data.conf)); + +- if (DP_CONF_CURRENTLY(dp->data.conf) == DP_CONF_DFP_D) +- assignments = DP_CAP_UFP_D_PIN_ASSIGN(dp->alt->vdo); +- else +- assignments = DP_CAP_DFP_D_PIN_ASSIGN(dp->alt->vdo); ++ assignments = get_current_pin_assignments(dp); + + for (i = 0; assignments; assignments >>= 1, i++) { + if (assignments & 1) { +@@ -527,10 +533,10 @@ int dp_altmode_probe(struct typec_altmode *alt) + /* FIXME: Port can only be DFP_U. */ + + /* Make sure we have compatiple pin configurations */ +- if (!(DP_CAP_DFP_D_PIN_ASSIGN(port->vdo) & +- DP_CAP_UFP_D_PIN_ASSIGN(alt->vdo)) && +- !(DP_CAP_UFP_D_PIN_ASSIGN(port->vdo) & +- DP_CAP_DFP_D_PIN_ASSIGN(alt->vdo))) ++ if (!(DP_CAP_PIN_ASSIGN_DFP_D(port->vdo) & ++ DP_CAP_PIN_ASSIGN_UFP_D(alt->vdo)) && ++ !(DP_CAP_PIN_ASSIGN_UFP_D(port->vdo) & ++ DP_CAP_PIN_ASSIGN_DFP_D(alt->vdo))) + return -ENODEV; + + ret = sysfs_create_group(&alt->dev.kobj, &dp_altmode_group); +diff --git a/drivers/usb/typec/bus.c b/drivers/usb/typec/bus.c +index 78e0e78954f2d..0aefb9e14f228 100644 +--- a/drivers/usb/typec/bus.c ++++ b/drivers/usb/typec/bus.c +@@ -134,7 +134,7 @@ int typec_altmode_exit(struct typec_altmode *adev) + if (!adev || !adev->active) + return 0; + +- if (!pdev->ops || !pdev->ops->enter) ++ if (!pdev->ops || !pdev->ops->exit) + return -EOPNOTSUPP; + + /* Moving to USB Safe State */ diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c -index aeef453aa6585..ff6c14d7b1a83 100644 +index aeef453aa6585..339752fef65e0 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -1718,6 +1718,7 @@ void typec_set_pwr_opmode(struct typec_port *port, @@ -290744,6 +347863,56 @@ index aeef453aa6585..ff6c14d7b1a83 100644 } put_device(partner_dev); } +@@ -1894,6 +1895,49 @@ void *typec_get_drvdata(struct typec_port *port) + } + EXPORT_SYMBOL_GPL(typec_get_drvdata); + ++int typec_get_fw_cap(struct typec_capability *cap, ++ struct fwnode_handle *fwnode) ++{ ++ const char *cap_str; ++ int ret; ++ ++ cap->fwnode = fwnode; ++ ++ ret = fwnode_property_read_string(fwnode, "power-role", &cap_str); ++ if (ret < 0) ++ return ret; ++ ++ ret = typec_find_port_power_role(cap_str); ++ if (ret < 0) ++ return ret; ++ cap->type = ret; ++ ++ /* USB data support is optional */ ++ ret = fwnode_property_read_string(fwnode, "data-role", &cap_str); ++ if (ret == 0) { ++ ret = typec_find_port_data_role(cap_str); ++ if (ret < 0) ++ return ret; ++ cap->data = ret; ++ } ++ ++ /* Get the preferred power role for a DRP */ ++ if (cap->type == TYPEC_PORT_DRP) { ++ cap->prefer_role = TYPEC_NO_PREFERRED_ROLE; ++ ++ ret = fwnode_property_read_string(fwnode, "try-power-role", &cap_str); ++ if (ret == 0) { ++ ret = typec_find_power_role(cap_str); ++ if (ret < 0) ++ return ret; ++ cap->prefer_role = ret; ++ } ++ } ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(typec_get_fw_cap); ++ + /** + * typec_port_register_altmode - Register USB Type-C Port Alternate Mode + * @port: USB Type-C Port that supports the alternate mode diff --git a/drivers/usb/typec/mux.c b/drivers/usb/typec/mux.c index c8340de0ed495..d2aaf294b6493 100644 --- a/drivers/usb/typec/mux.c @@ -290777,10 +347946,45 @@ index c8340de0ed495..d2aaf294b6493 100644 ret = device_add(&mux->dev); if (ret) { diff --git a/drivers/usb/typec/mux/intel_pmc_mux.c b/drivers/usb/typec/mux/intel_pmc_mux.c -index 2cdd22130834e..5daec9d79e94f 100644 +index 2cdd22130834e..a2f5cfdcf02ac 100644 --- a/drivers/usb/typec/mux/intel_pmc_mux.c +++ b/drivers/usb/typec/mux/intel_pmc_mux.c -@@ -554,9 +554,11 @@ err_unregister_switch: +@@ -352,13 +352,24 @@ pmc_usb_mux_usb4(struct pmc_usb_port *port, struct typec_mux_state *state) + return pmc_usb_command(port, (void *)&req, sizeof(req)); + } + +-static int pmc_usb_mux_safe_state(struct pmc_usb_port *port) ++static int pmc_usb_mux_safe_state(struct pmc_usb_port *port, ++ struct typec_mux_state *state) + { + u8 msg; + + if (IOM_PORT_ACTIVITY_IS(port->iom_status, SAFE_MODE)) + return 0; + ++ if ((IOM_PORT_ACTIVITY_IS(port->iom_status, DP) || ++ IOM_PORT_ACTIVITY_IS(port->iom_status, DP_MFD)) && ++ state->alt && state->alt->svid == USB_TYPEC_DP_SID) ++ return 0; ++ ++ if ((IOM_PORT_ACTIVITY_IS(port->iom_status, TBT) || ++ IOM_PORT_ACTIVITY_IS(port->iom_status, ALT_MODE_TBT_USB)) && ++ state->alt && state->alt->svid == USB_TYPEC_TBT_SID) ++ return 0; ++ + msg = PMC_USB_SAFE_MODE; + msg |= port->usb3_port << PMC_USB_MSG_USB3_PORT_SHIFT; + +@@ -426,7 +437,7 @@ pmc_usb_mux_set(struct typec_mux *mux, struct typec_mux_state *state) + return 0; + + if (state->mode == TYPEC_STATE_SAFE) +- return pmc_usb_mux_safe_state(port); ++ return pmc_usb_mux_safe_state(port, state); + if (state->mode == TYPEC_STATE_USB) + return pmc_usb_connect(port, port->role); + +@@ -554,9 +565,11 @@ err_unregister_switch: static int is_memory(struct acpi_resource *res, void *data) { @@ -290794,7 +347998,7 @@ index 2cdd22130834e..5daec9d79e94f 100644 } /* IOM ACPI IDs and IOM_PORT_STATUS_OFFSET */ -@@ -566,6 +568,9 @@ static const struct acpi_device_id iom_acpi_ids[] = { +@@ -566,6 +579,9 @@ static const struct acpi_device_id iom_acpi_ids[] = { /* AlderLake */ { "INTC1079", 0x160, }, @@ -290862,7 +348066,7 @@ index 7a2a17866a823..96c55eaf3f808 100644 chip->vbus = devm_regulator_get(chip->dev, "vbus"); diff --git a/drivers/usb/typec/tcpm/tcpci.c b/drivers/usb/typec/tcpm/tcpci.c -index c15eec9cc460a..64e248117c41a 100644 +index c15eec9cc460a..5340a3a3a81bb 100644 --- a/drivers/usb/typec/tcpm/tcpci.c +++ b/drivers/usb/typec/tcpm/tcpci.c @@ -75,9 +75,25 @@ static int tcpci_write16(struct tcpci *tcpci, unsigned int reg, u16 val) @@ -290908,7 +348112,27 @@ index c15eec9cc460a..64e248117c41a 100644 ret = regmap_write(tcpci->regmap, TCPC_ROLE_CTRL, reg); if (ret < 0) return ret; -@@ -851,7 +877,7 @@ static int tcpci_remove(struct i2c_client *client) +@@ -791,8 +817,10 @@ struct tcpci *tcpci_register_port(struct device *dev, struct tcpci_data *data) + return ERR_PTR(err); + + tcpci->port = tcpm_register_port(tcpci->dev, &tcpci->tcpc); +- if (IS_ERR(tcpci->port)) ++ if (IS_ERR(tcpci->port)) { ++ fwnode_handle_put(tcpci->tcpc.fwnode); + return ERR_CAST(tcpci->port); ++ } + + return tcpci; + } +@@ -801,6 +829,7 @@ EXPORT_SYMBOL_GPL(tcpci_register_port); + void tcpci_unregister_port(struct tcpci *tcpci) + { + tcpm_unregister_port(tcpci->port); ++ fwnode_handle_put(tcpci->tcpc.fwnode); + } + EXPORT_SYMBOL_GPL(tcpci_unregister_port); + +@@ -851,7 +880,7 @@ static int tcpci_remove(struct i2c_client *client) /* Disable chip interrupts before unregistering port */ err = tcpci_write16(chip->tcpci, TCPC_ALERT_MASK, 0); if (err < 0) @@ -290981,7 +348205,7 @@ index f1bd9e09bc87f..8a952eaf90163 100644 return regmap_write(regmap, MT6360_REG_MODECTRL2, 0x7A); } diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c -index 7f2f3ff1b3911..33aadc0a29ea8 100644 +index 7f2f3ff1b3911..ee461d314927b 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -324,6 +324,7 @@ struct tcpm_port { @@ -291004,7 +348228,25 @@ index 7f2f3ff1b3911..33aadc0a29ea8 100644 case SRC_TRY: port->try_src_count++; tcpm_set_cc(port, tcpm_rp_cc(port)); -@@ -5159,7 +5156,8 @@ static void _tcpm_pd_vbus_off(struct tcpm_port *port) +@@ -4530,14 +4527,13 @@ static void run_state_machine(struct tcpm_port *port) + tcpm_set_state(port, ready_state(port), 0); + break; + case DR_SWAP_CHANGE_DR: +- if (port->data_role == TYPEC_HOST) { +- tcpm_unregister_altmodes(port); ++ tcpm_unregister_altmodes(port); ++ if (port->data_role == TYPEC_HOST) + tcpm_set_roles(port, true, port->pwr_role, + TYPEC_DEVICE); +- } else { ++ else + tcpm_set_roles(port, true, port->pwr_role, + TYPEC_HOST); +- } + tcpm_ams_finish(port); + tcpm_set_state(port, ready_state(port), 0); + break; +@@ -5159,7 +5155,8 @@ static void _tcpm_pd_vbus_off(struct tcpm_port *port) case SNK_TRYWAIT_DEBOUNCE: break; case SNK_ATTACH_WAIT: @@ -291014,7 +348256,7 @@ index 7f2f3ff1b3911..33aadc0a29ea8 100644 break; case SNK_NEGOTIATE_CAPABILITIES: -@@ -5266,6 +5264,10 @@ static void _tcpm_pd_vbus_vsafe0v(struct tcpm_port *port) +@@ -5266,6 +5263,10 @@ static void _tcpm_pd_vbus_vsafe0v(struct tcpm_port *port) case PR_SWAP_SNK_SRC_SOURCE_ON: /* Do nothing, vsafe0v is expected during transition */ break; @@ -291025,7 +348267,55 @@ index 7f2f3ff1b3911..33aadc0a29ea8 100644 default: if (port->pwr_role == TYPEC_SINK && port->auto_vbus_discharge_enabled) tcpm_set_state(port, SNK_UNATTACHED, 0); -@@ -6211,6 +6213,13 @@ static int tcpm_psy_set_prop(struct power_supply *psy, +@@ -5926,7 +5927,6 @@ static int tcpm_fw_get_caps(struct tcpm_port *port, + struct fwnode_handle *fwnode) + { + const char *opmode_str; +- const char *cap_str; + int ret; + u32 mw, frs_current; + +@@ -5942,23 +5942,10 @@ static int tcpm_fw_get_caps(struct tcpm_port *port, + */ + fw_devlink_purge_absent_suppliers(fwnode); + +- /* USB data support is optional */ +- ret = fwnode_property_read_string(fwnode, "data-role", &cap_str); +- if (ret == 0) { +- ret = typec_find_port_data_role(cap_str); +- if (ret < 0) +- return ret; +- port->typec_caps.data = ret; +- } +- +- ret = fwnode_property_read_string(fwnode, "power-role", &cap_str); ++ ret = typec_get_fw_cap(&port->typec_caps, fwnode); + if (ret < 0) + return ret; + +- ret = typec_find_port_power_role(cap_str); +- if (ret < 0) +- return ret; +- port->typec_caps.type = ret; + port->port_type = port->typec_caps.type; + port->pd_supported = !fwnode_property_read_bool(fwnode, "pd-disable"); + +@@ -5995,14 +5982,6 @@ static int tcpm_fw_get_caps(struct tcpm_port *port, + if (port->port_type == TYPEC_PORT_SRC) + return 0; + +- /* Get the preferred power role for DRP */ +- ret = fwnode_property_read_string(fwnode, "try-power-role", &cap_str); +- if (ret < 0) +- return ret; +- +- port->typec_caps.prefer_role = typec_find_power_role(cap_str); +- if (port->typec_caps.prefer_role < 0) +- return -EINVAL; + sink: + port->self_powered = fwnode_property_read_bool(fwnode, "self-powered"); + +@@ -6211,6 +6190,13 @@ static int tcpm_psy_set_prop(struct power_supply *psy, struct tcpm_port *port = power_supply_get_drvdata(psy); int ret; @@ -291039,7 +348329,7 @@ index 7f2f3ff1b3911..33aadc0a29ea8 100644 switch (psp) { case POWER_SUPPLY_PROP_ONLINE: ret = tcpm_psy_set_online(port, val); -@@ -6295,7 +6304,8 @@ static enum hrtimer_restart state_machine_timer_handler(struct hrtimer *timer) +@@ -6295,7 +6281,8 @@ static enum hrtimer_restart state_machine_timer_handler(struct hrtimer *timer) { struct tcpm_port *port = container_of(timer, struct tcpm_port, state_machine_timer); @@ -291049,7 +348339,7 @@ index 7f2f3ff1b3911..33aadc0a29ea8 100644 return HRTIMER_NORESTART; } -@@ -6303,7 +6313,8 @@ static enum hrtimer_restart vdm_state_machine_timer_handler(struct hrtimer *time +@@ -6303,7 +6290,8 @@ static enum hrtimer_restart vdm_state_machine_timer_handler(struct hrtimer *time { struct tcpm_port *port = container_of(timer, struct tcpm_port, vdm_state_machine_timer); @@ -291059,7 +348349,7 @@ index 7f2f3ff1b3911..33aadc0a29ea8 100644 return HRTIMER_NORESTART; } -@@ -6311,7 +6322,8 @@ static enum hrtimer_restart enable_frs_timer_handler(struct hrtimer *timer) +@@ -6311,7 +6299,8 @@ static enum hrtimer_restart enable_frs_timer_handler(struct hrtimer *timer) { struct tcpm_port *port = container_of(timer, struct tcpm_port, enable_frs_timer); @@ -291069,7 +348359,7 @@ index 7f2f3ff1b3911..33aadc0a29ea8 100644 return HRTIMER_NORESTART; } -@@ -6319,7 +6331,8 @@ static enum hrtimer_restart send_discover_timer_handler(struct hrtimer *timer) +@@ -6319,7 +6308,8 @@ static enum hrtimer_restart send_discover_timer_handler(struct hrtimer *timer) { struct tcpm_port *port = container_of(timer, struct tcpm_port, send_discover_timer); @@ -291079,7 +348369,7 @@ index 7f2f3ff1b3911..33aadc0a29ea8 100644 return HRTIMER_NORESTART; } -@@ -6407,6 +6420,7 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc) +@@ -6407,6 +6397,7 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc) typec_port_register_altmodes(port->typec_port, &tcpm_altmode_ops, port, port->port_altmode, ALTMODE_DISCOVERY_MAX); @@ -291087,7 +348377,7 @@ index 7f2f3ff1b3911..33aadc0a29ea8 100644 mutex_lock(&port->lock); tcpm_init(port); -@@ -6428,6 +6442,9 @@ void tcpm_unregister_port(struct tcpm_port *port) +@@ -6428,6 +6419,9 @@ void tcpm_unregister_port(struct tcpm_port *port) { int i; @@ -291097,7 +348387,7 @@ index 7f2f3ff1b3911..33aadc0a29ea8 100644 hrtimer_cancel(&port->send_discover_timer); hrtimer_cancel(&port->enable_frs_timer); hrtimer_cancel(&port->vdm_state_machine_timer); -@@ -6439,7 +6456,6 @@ void tcpm_unregister_port(struct tcpm_port *port) +@@ -6439,7 +6433,6 @@ void tcpm_unregister_port(struct tcpm_port *port) typec_unregister_port(port->typec_port); usb_role_switch_put(port->role_sw); tcpm_debugfs_exit(port); @@ -291106,7 +348396,7 @@ index 7f2f3ff1b3911..33aadc0a29ea8 100644 EXPORT_SYMBOL_GPL(tcpm_unregister_port); diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c -index ea4cc0a6e40cc..23a8b9b0b1fef 100644 +index ea4cc0a6e40cc..2f32c3fceef87 100644 --- a/drivers/usb/typec/tipd/core.c +++ b/drivers/usb/typec/tipd/core.c @@ -117,7 +117,7 @@ tps6598x_block_read(struct tps6598x *tps, u8 reg, void *val, size_t len) @@ -291152,6 +348442,30 @@ index ea4cc0a6e40cc..23a8b9b0b1fef 100644 /* * This fwnode has a "compatible" property, but is never populated as a +@@ -679,14 +684,13 @@ static int tps6598x_probe(struct i2c_client *client) + + ret = devm_tps6598_psy_register(tps); + if (ret) +- return ret; ++ goto err_role_put; + + tps->port = typec_register_port(&client->dev, &typec_cap); + if (IS_ERR(tps->port)) { + ret = PTR_ERR(tps->port); + goto err_role_put; + } +- fwnode_handle_put(fwnode); + + if (status & TPS_STATUS_PLUG_PRESENT) { + ret = tps6598x_connect(tps, status); +@@ -705,6 +709,7 @@ static int tps6598x_probe(struct i2c_client *client) + } + + i2c_set_clientdata(client, tps); ++ fwnode_handle_put(fwnode); + + return 0; + @@ -712,7 +717,8 @@ err_role_put: usb_role_switch_put(tps->role_sw); err_fwnode_put: @@ -291967,10 +349281,20 @@ index 1dc121a07a934..86571498c1c23 100644 } diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c -index 5f484fff8dbec..2faf3bd1c3ba5 100644 +index 5f484fff8dbec..4d9e3fdae5f6c 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c -@@ -353,11 +353,14 @@ static void vdpasim_set_vq_ready(struct vdpa_device *vdpa, u16 idx, bool ready) +@@ -66,8 +66,7 @@ static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx) + { + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; + +- vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features, +- VDPASIM_QUEUE_MAX, false, ++ vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, false, + (struct vring_desc *)(uintptr_t)vq->desc_addr, + (struct vring_avail *) + (uintptr_t)vq->driver_addr, +@@ -353,11 +352,14 @@ static void vdpasim_set_vq_ready(struct vdpa_device *vdpa, u16 idx, bool ready) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; @@ -291986,7 +349310,7 @@ index 5f484fff8dbec..2faf3bd1c3ba5 100644 spin_unlock(&vdpasim->lock); } -@@ -591,8 +594,11 @@ static void vdpasim_free(struct vdpa_device *vdpa) +@@ -591,8 +593,11 @@ static void vdpasim_free(struct vdpa_device *vdpa) vringh_kiov_cleanup(&vdpasim->vqs[i].in_iov); } @@ -292000,6 +349324,38 @@ index 5f484fff8dbec..2faf3bd1c3ba5 100644 kvfree(vdpasim->buffer); if (vdpasim->iommu) vhost_iotlb_free(vdpasim->iommu); +diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c +index a790903f243e8..22b812c32bee8 100644 +--- a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c ++++ b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c +@@ -308,8 +308,10 @@ static int __init vdpasim_blk_init(void) + int ret; + + ret = device_register(&vdpasim_blk_mgmtdev); +- if (ret) ++ if (ret) { ++ put_device(&vdpasim_blk_mgmtdev); + return ret; ++ } + + ret = vdpa_mgmtdev_register(&mgmt_dev); + if (ret) +diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c +index a1ab6163f7d13..f1c420c5e26eb 100644 +--- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c ++++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c +@@ -194,8 +194,10 @@ static int __init vdpasim_net_init(void) + } + + ret = device_register(&vdpasim_net_mgmtdev); +- if (ret) ++ if (ret) { ++ put_device(&vdpasim_net_mgmtdev); + return ret; ++ } + + ret = vdpa_mgmtdev_register(&mgmt_dev); + if (ret) diff --git a/drivers/vdpa/vdpa_user/iova_domain.c b/drivers/vdpa/vdpa_user/iova_domain.c index 1daae26088609..0678c25141973 100644 --- a/drivers/vdpa/vdpa_user/iova_domain.c @@ -292014,7 +349370,7 @@ index 1daae26088609..0678c25141973 100644 static void vduse_domain_free_iova(struct iova_domain *iovad, diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c -index 841667a896dd0..e7d2d5b7e1257 100644 +index 841667a896dd0..3467c75f310a5 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -655,9 +655,15 @@ static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset, @@ -292044,7 +349400,17 @@ index 841667a896dd0..e7d2d5b7e1257 100644 config.length > dev->config_size - config.offset) break; -@@ -1334,9 +1341,9 @@ static int vduse_create_dev(struct vduse_dev_config *config, +@@ -1244,6 +1251,9 @@ static bool vduse_validate_config(struct vduse_dev_config *config) + if (config->config_size > PAGE_SIZE) + return false; + ++ if (config->vq_num > 0xffff) ++ return false; ++ + if (!device_is_allowed(config->device_id)) + return false; + +@@ -1334,9 +1344,9 @@ static int vduse_create_dev(struct vduse_dev_config *config, dev->minor = ret; dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT; @@ -292057,7 +349423,7 @@ index 841667a896dd0..e7d2d5b7e1257 100644 if (IS_ERR(dev->dev)) { ret = PTR_ERR(dev->dev); goto err_dev; -@@ -1464,16 +1471,12 @@ static char *vduse_devnode(struct device *dev, umode_t *mode) +@@ -1464,16 +1474,12 @@ static char *vduse_devnode(struct device *dev, umode_t *mode) return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev)); } @@ -292078,7 +349444,7 @@ index 841667a896dd0..e7d2d5b7e1257 100644 static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) { -@@ -1498,7 +1501,7 @@ static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) +@@ -1498,7 +1504,7 @@ static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) } set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops); vdev->vdpa.dma_dev = &vdev->vdpa.dev; @@ -292087,7 +349453,7 @@ index 841667a896dd0..e7d2d5b7e1257 100644 return 0; } -@@ -1543,34 +1546,52 @@ static struct virtio_device_id id_table[] = { +@@ -1543,34 +1549,52 @@ static struct virtio_device_id id_table[] = { { 0 }, }; @@ -292154,7 +349520,7 @@ index 841667a896dd0..e7d2d5b7e1257 100644 } static int vduse_init(void) -@@ -1583,7 +1604,6 @@ static int vduse_init(void) +@@ -1583,7 +1607,6 @@ static int vduse_init(void) return PTR_ERR(vduse_class); vduse_class->devnode = vduse_devnode; @@ -292508,6 +349874,24 @@ index 57d3b2cbbd8e5..82ac1569deb05 100644 static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd, bool test_mem) +diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c +index 6af7ce7d619c2..701bd99a87198 100644 +--- a/drivers/vfio/platform/vfio_platform_common.c ++++ b/drivers/vfio/platform/vfio_platform_common.c +@@ -72,12 +72,11 @@ static int vfio_platform_acpi_call_reset(struct vfio_platform_device *vdev, + const char **extra_dbg) + { + #ifdef CONFIG_ACPI +- struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + struct device *dev = vdev->device; + acpi_handle handle = ACPI_HANDLE(dev); + acpi_status acpi_ret; + +- acpi_ret = acpi_evaluate_object(handle, "_RST", NULL, &buffer); ++ acpi_ret = acpi_evaluate_object(handle, "_RST", NULL, NULL); + if (ACPI_FAILURE(acpi_ret)) { + if (extra_dbg) + *extra_dbg = acpi_format_exception(acpi_ret); diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 3c034fe14ccb0..818e47fc08968 100644 --- a/drivers/vfio/vfio.c @@ -292570,7 +349954,7 @@ index 670d56c879e50..5829cf2d0552d 100644 iotlb->nmaps == iotlb->limit && iotlb->flags & VHOST_IOTLB_FLAG_RETIRE) { diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c -index 28ef323882fb2..297b5db474545 100644 +index 28ef323882fb2..32148f0112004 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -473,6 +473,7 @@ static void vhost_tx_batch(struct vhost_net *net, @@ -292604,7 +349988,17 @@ index 28ef323882fb2..297b5db474545 100644 return ring; } -@@ -1551,8 +1547,12 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) +@@ -1521,6 +1517,9 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) + nvq = &n->vqs[index]; + mutex_lock(&vq->mutex); + ++ if (fd == -1) ++ vhost_clear_msg(&n->dev); ++ + /* Verify that ring has been setup correctly. */ + if (!vhost_vq_access_ok(vq)) { + r = -EFAULT; +@@ -1551,8 +1550,12 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) r = vhost_net_enable_vq(n, vq); if (r) goto err_used; @@ -292646,10 +350040,27 @@ index 39039e0461175..299a995326185 100644 if (c->len > size - c->off) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c -index 59edb5a1ffe28..6942472cffb0f 100644 +index 59edb5a1ffe28..c0f926a9c298f 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c -@@ -1170,6 +1170,13 @@ ssize_t vhost_chr_write_iter(struct vhost_dev *dev, +@@ -669,7 +669,7 @@ void vhost_dev_stop(struct vhost_dev *dev) + } + EXPORT_SYMBOL_GPL(vhost_dev_stop); + +-static void vhost_clear_msg(struct vhost_dev *dev) ++void vhost_clear_msg(struct vhost_dev *dev) + { + struct vhost_msg_node *node, *n; + +@@ -687,6 +687,7 @@ static void vhost_clear_msg(struct vhost_dev *dev) + + spin_unlock(&dev->iotlb_lock); + } ++EXPORT_SYMBOL_GPL(vhost_clear_msg); + + void vhost_dev_cleanup(struct vhost_dev *dev) + { +@@ -1170,6 +1171,13 @@ ssize_t vhost_chr_write_iter(struct vhost_dev *dev, goto done; } @@ -292663,8 +350074,38 @@ index 59edb5a1ffe28..6942472cffb0f 100644 if (dev->msg_handler) ret = dev->msg_handler(dev, &msg); else +@@ -2041,7 +2049,7 @@ static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, + struct vhost_dev *dev = vq->dev; + struct vhost_iotlb *umem = dev->iotlb ? dev->iotlb : dev->umem; + struct iovec *_iov; +- u64 s = 0; ++ u64 s = 0, last = addr + len - 1; + int ret = 0; + + while ((u64)len > s) { +@@ -2051,7 +2059,7 @@ static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, + break; + } + +- map = vhost_iotlb_itree_first(umem, addr, addr + len - 1); ++ map = vhost_iotlb_itree_first(umem, addr, last); + if (map == NULL || map->start > addr) { + if (umem != dev->iotlb) { + ret = -EFAULT; +diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h +index 638bb640d6b4b..f2675c0aa08ee 100644 +--- a/drivers/vhost/vhost.h ++++ b/drivers/vhost/vhost.h +@@ -182,6 +182,7 @@ long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user *argp); + long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp); + bool vhost_vq_access_ok(struct vhost_virtqueue *vq); + bool vhost_log_access_ok(struct vhost_dev *); ++void vhost_clear_msg(struct vhost_dev *dev); + + int vhost_get_vq_desc(struct vhost_virtqueue *, + struct iovec iov[], unsigned int iov_count, diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c -index 14e2043d76852..eab55accf381f 100644 +index 14e2043d76852..786876af0a73a 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -292,7 +292,7 @@ __vringh_iov(struct vringh *vrh, u16 i, @@ -292698,8 +350139,27 @@ index 14e2043d76852..eab55accf381f 100644 } else break; } +@@ -1095,7 +1101,7 @@ static int iotlb_translate(const struct vringh *vrh, + struct vhost_iotlb_map *map; + struct vhost_iotlb *iotlb = vrh->iotlb; + int ret = 0; +- u64 s = 0; ++ u64 s = 0, last = addr + len - 1; + + spin_lock(vrh->iotlb_lock); + +@@ -1107,8 +1113,7 @@ static int iotlb_translate(const struct vringh *vrh, + break; + } + +- map = vhost_iotlb_itree_first(iotlb, addr, +- addr + len - 1); ++ map = vhost_iotlb_itree_first(iotlb, addr, last); + if (!map || map->start > addr) { + ret = -EINVAL; + break; diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c -index 938aefbc75ecc..97bfe499222b6 100644 +index 938aefbc75ecc..74ac0c28fe43a 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -393,7 +393,7 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq, @@ -292771,6 +350231,22 @@ index 938aefbc75ecc..97bfe499222b6 100644 case VHOST_GET_FEATURES: features = VHOST_VSOCK_FEATURES; if (copy_to_user(argp, &features, sizeof(features))) +@@ -960,7 +968,14 @@ static int __init vhost_vsock_init(void) + VSOCK_TRANSPORT_F_H2G); + if (ret < 0) + return ret; +- return misc_register(&vhost_vsock_misc); ++ ++ ret = misc_register(&vhost_vsock_misc); ++ if (ret) { ++ vsock_core_unregister(&vhost_transport.transport); ++ return ret; ++ } ++ ++ return 0; + }; + + static void __exit vhost_vsock_exit(void) diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c index 537fe1b376ad7..fc990e576340b 100644 --- a/drivers/video/backlight/backlight.c @@ -293243,6 +350719,26 @@ index ef9c57ce09066..9a49ea6b5112f 100644 c->vc_complement_mask = 0x7700; if (vga_512_chars) c->vc_hi_font_mask = 0x0800; +diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig +index 6ed5e608dd041..26531aa194282 100644 +--- a/drivers/video/fbdev/Kconfig ++++ b/drivers/video/fbdev/Kconfig +@@ -606,6 +606,7 @@ config FB_TGA + config FB_UVESA + tristate "Userspace VESA VGA graphics support" + depends on FB && CONNECTOR ++ depends on !UML + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT +@@ -2218,7 +2219,6 @@ config FB_SSD1307 + select FB_SYS_COPYAREA + select FB_SYS_IMAGEBLIT + select FB_DEFERRED_IO +- select PWM + select FB_BACKLIGHT + help + This driver implements support for the Solomon SSD1307 diff --git a/drivers/video/fbdev/amba-clcd.c b/drivers/video/fbdev/amba-clcd.c index 9ec969e136bfd..f65c96d1394d3 100644 --- a/drivers/video/fbdev/amba-clcd.c @@ -293472,7 +350968,7 @@ index 509311471d515..bd59e7b11ed53 100644 static void invalid_vram_cache(void __force *addr) { diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c -index 22bb3892f6bd1..e035a63bbe5b7 100644 +index 22bb3892f6bd1..d90d807c67561 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -115,8 +115,8 @@ static int logo_lines; @@ -293500,6 +350996,15 @@ index 22bb3892f6bd1..e035a63bbe5b7 100644 fbcon_is_default = 0; continue; } +@@ -599,7 +601,7 @@ static void fbcon_prepare_logo(struct vc_data *vc, struct fb_info *info, + if (scr_readw(r) != vc->vc_video_erase_char) + break; + if (r != q && new_rows >= rows + logo_lines) { +- save = kmalloc(array3_size(logo_lines, new_cols, 2), ++ save = kzalloc(array3_size(logo_lines, new_cols, 2), + GFP_KERNEL); + if (save) { + int i = cols < new_cols ? cols : new_cols; @@ -1135,13 +1137,13 @@ static void fbcon_init(struct vc_data *vc, int init) ops->graphics = 0; @@ -293645,7 +351150,7 @@ index 22bb3892f6bd1..e035a63bbe5b7 100644 } else if (con_is_visible(vc) && vc->vc_mode == KD_TEXT) { fbcon_clear_margins(vc, 0); -@@ -2404,6 +2455,21 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount, +@@ -2404,6 +2455,22 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount, if (old_data && (--REFCOUNT(old_data) == 0)) kfree(old_data - FONT_EXTRA_WORDS * sizeof(int)); return 0; @@ -293656,7 +351161,8 @@ index 22bb3892f6bd1..e035a63bbe5b7 100644 + + if (userfont) { + p->userfont = old_userfont; -+ REFCOUNT(data)--; ++ if (--REFCOUNT(data) == 0) ++ kfree(data - FONT_EXTRA_WORDS * sizeof(int)); + } + + vc->vc_font.width = old_width; @@ -293667,7 +351173,7 @@ index 22bb3892f6bd1..e035a63bbe5b7 100644 } /* -@@ -2435,6 +2501,11 @@ static int fbcon_set_font(struct vc_data *vc, struct console_font *font, +@@ -2435,9 +2502,17 @@ static int fbcon_set_font(struct vc_data *vc, struct console_font *font, if (charcount != 256 && charcount != 512) return -EINVAL; @@ -293675,11 +351181,19 @@ index 22bb3892f6bd1..e035a63bbe5b7 100644 + if (w > FBCON_SWAP(info->var.rotate, info->var.xres, info->var.yres) || + h > FBCON_SWAP(info->var.rotate, info->var.yres, info->var.xres)) + return -EINVAL; ++ ++ if (font->width > 32 || font->height > 32) ++ return -EINVAL; + /* Make sure drawing engine can handle the font */ - if (!(info->pixmap.blit_x & (1 << (font->width - 1))) || - !(info->pixmap.blit_y & (1 << (font->height - 1)))) -@@ -2697,6 +2768,34 @@ void fbcon_update_vcs(struct fb_info *info, bool all) +- if (!(info->pixmap.blit_x & (1 << (font->width - 1))) || +- !(info->pixmap.blit_y & (1 << (font->height - 1)))) ++ if (!(info->pixmap.blit_x & BIT(font->width - 1)) || ++ !(info->pixmap.blit_y & BIT(font->height - 1))) + return -EINVAL; + + /* Make sure driver can handle the font length */ +@@ -2697,6 +2772,34 @@ void fbcon_update_vcs(struct fb_info *info, bool all) } EXPORT_SYMBOL(fbcon_update_vcs); @@ -293714,7 +351228,7 @@ index 22bb3892f6bd1..e035a63bbe5b7 100644 int fbcon_mode_deleted(struct fb_info *info, struct fb_videomode *mode) { -@@ -3220,6 +3319,9 @@ static void fbcon_register_existing_fbs(struct work_struct *work) +@@ -3220,6 +3323,9 @@ static void fbcon_register_existing_fbs(struct work_struct *work) console_lock(); @@ -293724,7 +351238,7 @@ index 22bb3892f6bd1..e035a63bbe5b7 100644 for_each_registered_fb(i) fbcon_fb_registered(registered_fb[i]); -@@ -3237,8 +3339,6 @@ static int fbcon_output_notifier(struct notifier_block *nb, +@@ -3237,8 +3343,6 @@ static int fbcon_output_notifier(struct notifier_block *nb, pr_info("fbcon: Taking over console\n"); dummycon_unregister_output_notifier(&fbcon_output_nb); @@ -294336,8 +351850,40 @@ index 8ea8f079cde26..b3d5f884c5445 100644 return 0; } +diff --git a/drivers/video/fbdev/ep93xx-fb.c b/drivers/video/fbdev/ep93xx-fb.c +index 2398b3d48fedf..305f1587bd898 100644 +--- a/drivers/video/fbdev/ep93xx-fb.c ++++ b/drivers/video/fbdev/ep93xx-fb.c +@@ -552,12 +552,14 @@ static int ep93xxfb_probe(struct platform_device *pdev) + + err = register_framebuffer(info); + if (err) +- goto failed_check; ++ goto failed_framebuffer; + + dev_info(info->dev, "registered. Mode = %dx%d-%d\n", + info->var.xres, info->var.yres, info->var.bits_per_pixel); + return 0; + ++failed_framebuffer: ++ clk_disable_unprepare(fbi->clk); + failed_check: + if (fbi->mach_info->teardown) + fbi->mach_info->teardown(pdev); +diff --git a/drivers/video/fbdev/geode/Kconfig b/drivers/video/fbdev/geode/Kconfig +index ac9c860592aaf..85bc14b6faf64 100644 +--- a/drivers/video/fbdev/geode/Kconfig ++++ b/drivers/video/fbdev/geode/Kconfig +@@ -5,6 +5,7 @@ + config FB_GEODE + bool "AMD Geode family framebuffer support" + depends on FB && PCI && (X86_32 || (X86 && COMPILE_TEST)) ++ depends on !UML + help + Say 'Y' here to allow you to select framebuffer drivers for + the AMD Geode family of processors. diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c -index 23999df527393..58c304a3b7c41 100644 +index 23999df527393..de865e197c8d9 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -287,8 +287,6 @@ struct hvfb_par { @@ -294371,7 +351917,27 @@ index 23999df527393..58c304a3b7c41 100644 screen_width = msg->resolution_resp.supported_resolution[index].width; screen_height = -@@ -941,7 +931,7 @@ static void hvfb_get_option(struct fb_info *info) +@@ -809,12 +799,18 @@ static void hvfb_ondemand_refresh_throttle(struct hvfb_par *par, + static int hvfb_on_panic(struct notifier_block *nb, + unsigned long e, void *p) + { ++ struct hv_device *hdev; + struct hvfb_par *par; + struct fb_info *info; + + par = container_of(nb, struct hvfb_par, hvfb_panic_nb); +- par->synchronous_fb = true; + info = par->info; ++ hdev = device_to_hv_device(info->device); ++ ++ if (hv_ringbuffer_spinlock_busy(hdev->channel)) ++ return NOTIFY_DONE; ++ ++ par->synchronous_fb = true; + if (par->need_docopy) + hvfb_docopy(par, 0, dio_fb_size); + synthvid_update(info, 0, 0, INT_MAX, INT_MAX); +@@ -941,7 +937,7 @@ static void hvfb_get_option(struct fb_info *info) if (x < HVFB_WIDTH_MIN || y < HVFB_HEIGHT_MIN || (synthvid_ver_ge(par->synthvid_version, SYNTHVID_VERSION_WIN10) && @@ -294380,7 +351946,7 @@ index 23999df527393..58c304a3b7c41 100644 (par->synthvid_version == SYNTHVID_VERSION_WIN8 && x * y * screen_depth / 8 > SYNTHVID_FB_SIZE_WIN8) || (par->synthvid_version == SYNTHVID_VERSION_WIN7 && -@@ -1019,7 +1009,6 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) +@@ -1019,7 +1015,6 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) struct pci_dev *pdev = NULL; void __iomem *fb_virt; int gen2vm = efi_enabled(EFI_BOOT); @@ -294388,7 +351954,7 @@ index 23999df527393..58c304a3b7c41 100644 phys_addr_t paddr; int ret; -@@ -1070,23 +1059,7 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) +@@ -1070,23 +1065,7 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) dio_fb_size = screen_width * screen_height * screen_depth / 8; @@ -294413,7 +351979,7 @@ index 23999df527393..58c304a3b7c41 100644 screen_fb_size, 0x100000, true); if (ret != 0) { pr_err("Unable to allocate framebuffer memory\n"); -@@ -1194,8 +1167,8 @@ static int hvfb_probe(struct hv_device *hdev, +@@ -1194,8 +1173,8 @@ static int hvfb_probe(struct hv_device *hdev, } hvfb_get_option(info); @@ -294462,18 +352028,20 @@ index 52cce0db8bd34..8fb4e01e1943f 100644 case 8: var->red.offset = var->green.offset = var->blue.offset = 0; diff --git a/drivers/video/fbdev/matrox/matroxfb_base.c b/drivers/video/fbdev/matrox/matroxfb_base.c -index 5c82611e93d99..236521b19daf7 100644 +index 5c82611e93d99..e7348d657e183 100644 --- a/drivers/video/fbdev/matrox/matroxfb_base.c +++ b/drivers/video/fbdev/matrox/matroxfb_base.c -@@ -1377,7 +1377,7 @@ static struct video_board vbG200 = { +@@ -1377,8 +1377,8 @@ static struct video_board vbG200 = { .lowlevel = &matrox_G100 }; static struct video_board vbG200eW = { - .maxvram = 0x800000, -+ .maxvram = 0x100000, - .maxdisplayable = 0x800000, +- .maxdisplayable = 0x800000, ++ .maxvram = 0x1000000, ++ .maxdisplayable = 0x0800000, .accelID = FB_ACCEL_MATROX_MGAG200, .lowlevel = &matrox_G100 + }; diff --git a/drivers/video/fbdev/nvidia/nv_i2c.c b/drivers/video/fbdev/nvidia/nv_i2c.c index d7994a1732459..0b48965a6420c 100644 --- a/drivers/video/fbdev/nvidia/nv_i2c.c @@ -294574,8 +352142,79 @@ index afac1d9445aa2..57b7d1f490962 100644 } static ssize_t tpo_td043_mode_store(struct device *dev, +diff --git a/drivers/video/fbdev/omap2/omapfb/dss/dsi.c b/drivers/video/fbdev/omap2/omapfb/dss/dsi.c +index d43b081d592f0..db84a662e8de3 100644 +--- a/drivers/video/fbdev/omap2/omapfb/dss/dsi.c ++++ b/drivers/video/fbdev/omap2/omapfb/dss/dsi.c +@@ -1538,22 +1538,28 @@ static void dsi_dump_dsidev_irqs(struct platform_device *dsidev, + { + struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev); + unsigned long flags; +- struct dsi_irq_stats stats; ++ struct dsi_irq_stats *stats; ++ ++ stats = kzalloc(sizeof(*stats), GFP_KERNEL); ++ if (!stats) { ++ seq_printf(s, "out of memory\n"); ++ return; ++ } + + spin_lock_irqsave(&dsi->irq_stats_lock, flags); + +- stats = dsi->irq_stats; ++ *stats = dsi->irq_stats; + memset(&dsi->irq_stats, 0, sizeof(dsi->irq_stats)); + dsi->irq_stats.last_reset = jiffies; + + spin_unlock_irqrestore(&dsi->irq_stats_lock, flags); + + seq_printf(s, "period %u ms\n", +- jiffies_to_msecs(jiffies - stats.last_reset)); ++ jiffies_to_msecs(jiffies - stats->last_reset)); + +- seq_printf(s, "irqs %d\n", stats.irq_count); ++ seq_printf(s, "irqs %d\n", stats->irq_count); + #define PIS(x) \ +- seq_printf(s, "%-20s %10d\n", #x, stats.dsi_irqs[ffs(DSI_IRQ_##x)-1]) ++ seq_printf(s, "%-20s %10d\n", #x, stats->dsi_irqs[ffs(DSI_IRQ_##x)-1]) + + seq_printf(s, "-- DSI%d interrupts --\n", dsi->module_id + 1); + PIS(VC0); +@@ -1577,10 +1583,10 @@ static void dsi_dump_dsidev_irqs(struct platform_device *dsidev, + + #define PIS(x) \ + seq_printf(s, "%-20s %10d %10d %10d %10d\n", #x, \ +- stats.vc_irqs[0][ffs(DSI_VC_IRQ_##x)-1], \ +- stats.vc_irqs[1][ffs(DSI_VC_IRQ_##x)-1], \ +- stats.vc_irqs[2][ffs(DSI_VC_IRQ_##x)-1], \ +- stats.vc_irqs[3][ffs(DSI_VC_IRQ_##x)-1]); ++ stats->vc_irqs[0][ffs(DSI_VC_IRQ_##x)-1], \ ++ stats->vc_irqs[1][ffs(DSI_VC_IRQ_##x)-1], \ ++ stats->vc_irqs[2][ffs(DSI_VC_IRQ_##x)-1], \ ++ stats->vc_irqs[3][ffs(DSI_VC_IRQ_##x)-1]); + + seq_printf(s, "-- VC interrupts --\n"); + PIS(CS); +@@ -1596,7 +1602,7 @@ static void dsi_dump_dsidev_irqs(struct platform_device *dsidev, + + #define PIS(x) \ + seq_printf(s, "%-20s %10d\n", #x, \ +- stats.cio_irqs[ffs(DSI_CIO_IRQ_##x)-1]); ++ stats->cio_irqs[ffs(DSI_CIO_IRQ_##x)-1]); + + seq_printf(s, "-- CIO interrupts --\n"); + PIS(ERRSYNCESC1); +@@ -1620,6 +1626,8 @@ static void dsi_dump_dsidev_irqs(struct platform_device *dsidev, + PIS(ULPSACTIVENOT_ALL0); + PIS(ULPSACTIVENOT_ALL1); + #undef PIS ++ ++ kfree(stats); + } + + static void dsi1_dump_irqs(struct seq_file *s) diff --git a/drivers/video/fbdev/pm2fb.c b/drivers/video/fbdev/pm2fb.c -index c68725eebee3b..cbcf112c88d30 100644 +index c68725eebee3b..e8690f7aea050 100644 --- a/drivers/video/fbdev/pm2fb.c +++ b/drivers/video/fbdev/pm2fb.c @@ -617,6 +617,11 @@ static int pm2fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) @@ -294590,6 +352229,36 @@ index c68725eebee3b..cbcf112c88d30 100644 if (PICOS2KHZ(var->pixclock) > PM2_MAX_PIXCLOCK) { DPRINTK("pixclock too high (%ldKHz)\n", PICOS2KHZ(var->pixclock)); +@@ -1525,8 +1530,10 @@ static int pm2fb_probe(struct pci_dev *pdev, const struct pci_device_id *id) + } + + info = framebuffer_alloc(sizeof(struct pm2fb_par), &pdev->dev); +- if (!info) +- return -ENOMEM; ++ if (!info) { ++ err = -ENOMEM; ++ goto err_exit_disable; ++ } + default_par = info->par; + + switch (pdev->device) { +@@ -1707,6 +1714,8 @@ static int pm2fb_probe(struct pci_dev *pdev, const struct pci_device_id *id) + release_mem_region(pm2fb_fix.mmio_start, pm2fb_fix.mmio_len); + err_exit_neither: + framebuffer_release(info); ++ err_exit_disable: ++ pci_disable_device(pdev); + return retval; + } + +@@ -1733,6 +1742,7 @@ static void pm2fb_remove(struct pci_dev *pdev) + fb_dealloc_cmap(&info->cmap); + kfree(info->pixmap.addr); + framebuffer_release(info); ++ pci_disable_device(pdev); + } + + static const struct pci_device_id pm2fb_id_table[] = { diff --git a/drivers/video/fbdev/pxa3xx-gcu.c b/drivers/video/fbdev/pxa3xx-gcu.c index 4279e13a3b58d..9e9888e40c573 100644 --- a/drivers/video/fbdev/pxa3xx-gcu.c @@ -294806,10 +352475,18 @@ index 0dbc6bf8268ac..092a1caa1208e 100644 *ppos += c; buf += c; diff --git a/drivers/video/fbdev/smscufx.c b/drivers/video/fbdev/smscufx.c -index bfac3ee4a6422..7673db5da26b0 100644 +index bfac3ee4a6422..b3295cd7fd4f9 100644 --- a/drivers/video/fbdev/smscufx.c +++ b/drivers/video/fbdev/smscufx.c -@@ -137,6 +137,8 @@ static int ufx_submit_urb(struct ufx_data *dev, struct urb * urb, size_t len); +@@ -97,7 +97,6 @@ struct ufx_data { + struct kref kref; + int fb_count; + bool virtualized; /* true when physical usb device not present */ +- struct delayed_work free_framebuffer_work; + atomic_t usb_active; /* 0 = update virtual buffer, but no usb traffic */ + atomic_t lost_pixels; /* 1 = a render op failed. Need screen refresh */ + u8 *edid; /* null until we read edid from hw or get from sysfs */ +@@ -137,6 +136,8 @@ static int ufx_submit_urb(struct ufx_data *dev, struct urb * urb, size_t len); static int ufx_alloc_urb_list(struct ufx_data *dev, int count, size_t size); static void ufx_free_urb_list(struct ufx_data *dev); @@ -294818,7 +352495,7 @@ index bfac3ee4a6422..7673db5da26b0 100644 /* reads a control register */ static int ufx_reg_read(struct ufx_data *dev, u32 index, u32 *data) { -@@ -1070,9 +1072,13 @@ static int ufx_ops_open(struct fb_info *info, int user) +@@ -1070,9 +1071,13 @@ static int ufx_ops_open(struct fb_info *info, int user) if (user == 0 && !console) return -EBUSY; @@ -294833,7 +352510,7 @@ index bfac3ee4a6422..7673db5da26b0 100644 dev->fb_count++; -@@ -1096,6 +1102,8 @@ static int ufx_ops_open(struct fb_info *info, int user) +@@ -1096,6 +1101,8 @@ static int ufx_ops_open(struct fb_info *info, int user) pr_debug("open /dev/fb%d user=%d fb_info=%p count=%d", info->node, user, info, dev->fb_count); @@ -294842,7 +352519,125 @@ index bfac3ee4a6422..7673db5da26b0 100644 return 0; } -@@ -1656,6 +1664,7 @@ static int ufx_usb_probe(struct usb_interface *interface, +@@ -1108,15 +1115,24 @@ static void ufx_free(struct kref *kref) + { + struct ufx_data *dev = container_of(kref, struct ufx_data, kref); + +- /* this function will wait for all in-flight urbs to complete */ +- if (dev->urbs.count > 0) +- ufx_free_urb_list(dev); ++ kfree(dev); ++} + +- pr_debug("freeing ufx_data %p", dev); ++static void ufx_ops_destory(struct fb_info *info) ++{ ++ struct ufx_data *dev = info->par; ++ int node = info->node; + +- kfree(dev); ++ /* Assume info structure is freed after this point */ ++ framebuffer_release(info); ++ ++ pr_debug("fb_info for /dev/fb%d has been freed", node); ++ ++ /* release reference taken by kref_init in probe() */ ++ kref_put(&dev->kref, ufx_free); + } + ++ + static void ufx_release_urb_work(struct work_struct *work) + { + struct urb_node *unode = container_of(work, struct urb_node, +@@ -1125,14 +1141,9 @@ static void ufx_release_urb_work(struct work_struct *work) + up(&unode->dev->urbs.limit_sem); + } + +-static void ufx_free_framebuffer_work(struct work_struct *work) ++static void ufx_free_framebuffer(struct ufx_data *dev) + { +- struct ufx_data *dev = container_of(work, struct ufx_data, +- free_framebuffer_work.work); + struct fb_info *info = dev->info; +- int node = info->node; +- +- unregister_framebuffer(info); + + if (info->cmap.len != 0) + fb_dealloc_cmap(&info->cmap); +@@ -1144,11 +1155,6 @@ static void ufx_free_framebuffer_work(struct work_struct *work) + + dev->info = NULL; + +- /* Assume info structure is freed after this point */ +- framebuffer_release(info); +- +- pr_debug("fb_info for /dev/fb%d has been freed", node); +- + /* ref taken in probe() as part of registering framebfufer */ + kref_put(&dev->kref, ufx_free); + } +@@ -1160,11 +1166,13 @@ static int ufx_ops_release(struct fb_info *info, int user) + { + struct ufx_data *dev = info->par; + ++ mutex_lock(&disconnect_mutex); ++ + dev->fb_count--; + + /* We can't free fb_info here - fbmem will touch it when we return */ + if (dev->virtualized && (dev->fb_count == 0)) +- schedule_delayed_work(&dev->free_framebuffer_work, HZ); ++ ufx_free_framebuffer(dev); + + if ((dev->fb_count == 0) && (info->fbdefio)) { + fb_deferred_io_cleanup(info); +@@ -1177,6 +1185,8 @@ static int ufx_ops_release(struct fb_info *info, int user) + + kref_put(&dev->kref, ufx_free); + ++ mutex_unlock(&disconnect_mutex); ++ + return 0; + } + +@@ -1283,6 +1293,7 @@ static const struct fb_ops ufx_ops = { + .fb_blank = ufx_ops_blank, + .fb_check_var = ufx_ops_check_var, + .fb_set_par = ufx_ops_set_par, ++ .fb_destroy = ufx_ops_destory, + }; + + /* Assumes &info->lock held by caller +@@ -1610,7 +1621,7 @@ static int ufx_usb_probe(struct usb_interface *interface, + struct usb_device *usbdev; + struct ufx_data *dev; + struct fb_info *info; +- int retval; ++ int retval = -ENOMEM; + u32 id_rev, fpga_rev; + + /* usb initialization */ +@@ -1642,20 +1653,23 @@ static int ufx_usb_probe(struct usb_interface *interface, + + if (!ufx_alloc_urb_list(dev, WRITES_IN_FLIGHT, MAX_TRANSFER)) { + dev_err(dev->gdev, "ufx_alloc_urb_list failed\n"); +- goto e_nomem; ++ goto put_ref; + } + + /* We don't register a new USB class. Our client interface is fbdev */ + + /* allocates framebuffer driver structure, not framebuffer memory */ + info = framebuffer_alloc(0, &usbdev->dev); +- if (!info) +- goto e_nomem; ++ if (!info) { ++ dev_err(dev->gdev, "framebuffer_alloc failed\n"); ++ goto free_urb_list; ++ } + + dev->info = info; info->par = dev; info->pseudo_palette = dev->pseudo_palette; info->fbops = &ufx_ops; @@ -294850,29 +352645,117 @@ index bfac3ee4a6422..7673db5da26b0 100644 retval = fb_alloc_cmap(&info->cmap, 256, 0); if (retval < 0) { -@@ -1666,8 +1675,6 @@ static int ufx_usb_probe(struct usb_interface *interface, - INIT_DELAYED_WORK(&dev->free_framebuffer_work, - ufx_free_framebuffer_work); +@@ -1663,11 +1677,6 @@ static int ufx_usb_probe(struct usb_interface *interface, + goto destroy_modedb; + } +- INIT_DELAYED_WORK(&dev->free_framebuffer_work, +- ufx_free_framebuffer_work); +- - INIT_LIST_HEAD(&info->modelist); - retval = ufx_reg_read(dev, 0x3000, &id_rev); check_warn_goto_error(retval, "error %d reading 0x3000 register from device", retval); dev_dbg(dev->gdev, "ID_REV register value 0x%08x", id_rev); -@@ -1741,6 +1748,8 @@ static void ufx_usb_disconnect(struct usb_interface *interface) +@@ -1697,22 +1706,34 @@ static int ufx_usb_probe(struct usb_interface *interface, + check_warn_goto_error(retval, "unable to find common mode for display and adapter"); + + retval = ufx_reg_set_bits(dev, 0x4000, 0x00000001); +- check_warn_goto_error(retval, "error %d enabling graphics engine", retval); ++ if (retval < 0) { ++ dev_err(dev->gdev, "error %d enabling graphics engine", retval); ++ goto setup_modes; ++ } + + /* ready to begin using device */ + atomic_set(&dev->usb_active, 1); + + dev_dbg(dev->gdev, "checking var"); + retval = ufx_ops_check_var(&info->var, info); +- check_warn_goto_error(retval, "error %d ufx_ops_check_var", retval); ++ if (retval < 0) { ++ dev_err(dev->gdev, "error %d ufx_ops_check_var", retval); ++ goto reset_active; ++ } + + dev_dbg(dev->gdev, "setting par"); + retval = ufx_ops_set_par(info); +- check_warn_goto_error(retval, "error %d ufx_ops_set_par", retval); ++ if (retval < 0) { ++ dev_err(dev->gdev, "error %d ufx_ops_set_par", retval); ++ goto reset_active; ++ } + + dev_dbg(dev->gdev, "registering framebuffer"); + retval = register_framebuffer(info); +- check_warn_goto_error(retval, "error %d register_framebuffer", retval); ++ if (retval < 0) { ++ dev_err(dev->gdev, "error %d register_framebuffer", retval); ++ goto reset_active; ++ } + + dev_info(dev->gdev, "SMSC UDX USB device /dev/fb%d attached. %dx%d resolution." + " Using %dK framebuffer memory\n", info->node, +@@ -1720,28 +1741,34 @@ static int ufx_usb_probe(struct usb_interface *interface, + + return 0; + +-error: +- fb_dealloc_cmap(&info->cmap); +-destroy_modedb: ++reset_active: ++ atomic_set(&dev->usb_active, 0); ++setup_modes: + fb_destroy_modedb(info->monspecs.modedb); + vfree(info->screen_base); + fb_destroy_modelist(&info->modelist); ++error: ++ fb_dealloc_cmap(&info->cmap); ++destroy_modedb: + framebuffer_release(info); ++free_urb_list: ++ if (dev->urbs.count > 0) ++ ufx_free_urb_list(dev); + put_ref: + kref_put(&dev->kref, ufx_free); /* ref for framebuffer */ + kref_put(&dev->kref, ufx_free); /* last ref from kref_init */ + return retval; +- +-e_nomem: +- retval = -ENOMEM; +- goto put_ref; + } + + static void ufx_usb_disconnect(struct usb_interface *interface) { struct ufx_data *dev; - -+ mutex_lock(&disconnect_mutex); ++ struct fb_info *info; + ++ mutex_lock(&disconnect_mutex); + dev = usb_get_intfdata(interface); ++ info = dev->info; pr_debug("USB disconnect starting\n"); -@@ -1761,6 +1770,8 @@ static void ufx_usb_disconnect(struct usb_interface *interface) - kref_put(&dev->kref, ufx_free); - /* consider ufx_data freed */ +@@ -1755,12 +1782,17 @@ static void ufx_usb_disconnect(struct usb_interface *interface) + + /* if clients still have us open, will be freed on last close */ + if (dev->fb_count == 0) +- schedule_delayed_work(&dev->free_framebuffer_work, 0); ++ ufx_free_framebuffer(dev); + +- /* release reference taken by kref_init in probe() */ +- kref_put(&dev->kref, ufx_free); ++ /* this function will wait for all in-flight urbs to complete */ ++ if (dev->urbs.count > 0) ++ ufx_free_urb_list(dev); ++ ++ pr_debug("freeing ufx_data %p", dev); + ++ unregister_framebuffer(info); + +- /* consider ufx_data freed */ + mutex_unlock(&disconnect_mutex); } @@ -294892,10 +352775,68 @@ index c338f7848ae2b..0ebdd28a0b813 100644 diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c -index 265865610edc6..b0470f4f595ee 100644 +index 265865610edc6..3feb6e40d56d8 100644 --- a/drivers/video/fbdev/stifb.c +++ b/drivers/video/fbdev/stifb.c -@@ -1257,7 +1257,7 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref) +@@ -1041,6 +1041,48 @@ stifb_copyarea(struct fb_info *info, const struct fb_copyarea *area) + SETUP_FB(fb); + } + ++#define ARTIST_VRAM_SIZE 0x000804 ++#define ARTIST_VRAM_SRC 0x000808 ++#define ARTIST_VRAM_SIZE_TRIGGER_WINFILL 0x000a04 ++#define ARTIST_VRAM_DEST_TRIGGER_BLOCKMOVE 0x000b00 ++#define ARTIST_SRC_BM_ACCESS 0x018008 ++#define ARTIST_FGCOLOR 0x018010 ++#define ARTIST_BGCOLOR 0x018014 ++#define ARTIST_BITMAP_OP 0x01801c ++ ++static void ++stifb_fillrect(struct fb_info *info, const struct fb_fillrect *rect) ++{ ++ struct stifb_info *fb = container_of(info, struct stifb_info, info); ++ ++ if (rect->rop != ROP_COPY || ++ (fb->id == S9000_ID_HCRX && fb->info.var.bits_per_pixel == 32)) ++ return cfb_fillrect(info, rect); ++ ++ SETUP_HW(fb); ++ ++ if (fb->info.var.bits_per_pixel == 32) { ++ WRITE_WORD(0xBBA0A000, fb, REG_10); ++ ++ NGLE_REALLY_SET_IMAGE_PLANEMASK(fb, 0xffffffff); ++ } else { ++ WRITE_WORD(fb->id == S9000_ID_HCRX ? 0x13a02000 : 0x13a01000, fb, REG_10); ++ ++ NGLE_REALLY_SET_IMAGE_PLANEMASK(fb, 0xff); ++ } ++ ++ WRITE_WORD(0x03000300, fb, ARTIST_BITMAP_OP); ++ WRITE_WORD(0x2ea01000, fb, ARTIST_SRC_BM_ACCESS); ++ NGLE_QUICK_SET_DST_BM_ACCESS(fb, 0x2ea01000); ++ NGLE_REALLY_SET_IMAGE_FG_COLOR(fb, rect->color); ++ WRITE_WORD(0, fb, ARTIST_BGCOLOR); ++ ++ NGLE_SET_DSTXY(fb, (rect->dx << 16) | (rect->dy)); ++ SET_LENXY_START_RECFILL(fb, (rect->width << 16) | (rect->height)); ++ ++ SETUP_FB(fb); ++} ++ + static void __init + stifb_init_display(struct stifb_info *fb) + { +@@ -1105,7 +1147,7 @@ static const struct fb_ops stifb_ops = { + .owner = THIS_MODULE, + .fb_setcolreg = stifb_setcolreg, + .fb_blank = stifb_blank, +- .fb_fillrect = cfb_fillrect, ++ .fb_fillrect = stifb_fillrect, + .fb_copyarea = stifb_copyarea, + .fb_imageblit = cfb_imageblit, + }; +@@ -1257,7 +1299,7 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref) /* limit fbsize to max visible screen size */ if (fix->smem_len > yres*fix->line_length) @@ -294904,7 +352845,16 @@ index 265865610edc6..b0470f4f595ee 100644 fix->accel = FB_ACCEL_NONE; -@@ -1317,11 +1317,11 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref) +@@ -1297,7 +1339,7 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref) + goto out_err0; + } + info->screen_size = fix->smem_len; +- info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_COPYAREA; ++ info->flags = FBINFO_HWACCEL_COPYAREA | FBINFO_HWACCEL_FILLRECT; + info->pseudo_palette = &fb->pseudo_palette; + + /* This has to be done !!! */ +@@ -1317,11 +1359,11 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref) goto out_err3; } @@ -295002,6 +352952,34 @@ index b9cdd02c10009..d9eec1b60e665 100644 dlfb->info = info; info->par = dlfb; +diff --git a/drivers/video/fbdev/uvesafb.c b/drivers/video/fbdev/uvesafb.c +index 4df6772802d78..1f3b7e013568c 100644 +--- a/drivers/video/fbdev/uvesafb.c ++++ b/drivers/video/fbdev/uvesafb.c +@@ -1758,6 +1758,7 @@ static int uvesafb_probe(struct platform_device *dev) + out_unmap: + iounmap(info->screen_base); + out_mem: ++ arch_phys_wc_del(par->mtrr_handle); + release_mem_region(info->fix.smem_start, info->fix.smem_len); + out_reg: + release_region(0x3c0, 32); +diff --git a/drivers/video/fbdev/vermilion/vermilion.c b/drivers/video/fbdev/vermilion/vermilion.c +index ff61605b8764f..a543643ce014d 100644 +--- a/drivers/video/fbdev/vermilion/vermilion.c ++++ b/drivers/video/fbdev/vermilion/vermilion.c +@@ -277,8 +277,10 @@ static int vmlfb_get_gpu(struct vml_par *par) + + mutex_unlock(&vml_mutex); + +- if (pci_enable_device(par->gpu) < 0) ++ if (pci_enable_device(par->gpu) < 0) { ++ pci_dev_put(par->gpu); + return -ENODEV; ++ } + + return 0; + } diff --git a/drivers/video/fbdev/vesafb.c b/drivers/video/fbdev/vesafb.c index df6de5a9dd4cd..929d4775cb4bc 100644 --- a/drivers/video/fbdev/vesafb.c @@ -295082,6 +353060,26 @@ index e2757ff1c23d2..96e312a3eac75 100644 ret = platform_driver_register(&vga16fb_driver); if (!ret) { +diff --git a/drivers/video/fbdev/via/via-core.c b/drivers/video/fbdev/via/via-core.c +index 89d75079b7307..0363b478fa3ef 100644 +--- a/drivers/video/fbdev/via/via-core.c ++++ b/drivers/video/fbdev/via/via-core.c +@@ -725,7 +725,14 @@ static int __init via_core_init(void) + return ret; + viafb_i2c_init(); + viafb_gpio_init(); +- return pci_register_driver(&via_driver); ++ ret = pci_register_driver(&via_driver); ++ if (ret) { ++ viafb_gpio_exit(); ++ viafb_i2c_exit(); ++ return ret; ++ } ++ ++ return 0; + } + + static void __exit via_core_exit(void) diff --git a/drivers/video/fbdev/vt8623fb.c b/drivers/video/fbdev/vt8623fb.c index 7a959e5ba90b8..c274ec5e965ca 100644 --- a/drivers/video/fbdev/vt8623fb.c @@ -295454,6 +353452,19 @@ index b35bb2d57f62c..1e890ef176873 100644 } if (vp_dev->msix_enabled) { +diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c +index 30654d3a0b41e..a274261f36d63 100644 +--- a/drivers/virtio/virtio_pci_modern.c ++++ b/drivers/virtio/virtio_pci_modern.c +@@ -196,7 +196,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, + int err; + + if (index >= vp_modern_get_num_queues(mdev)) +- return ERR_PTR(-ENOENT); ++ return ERR_PTR(-EINVAL); + + /* Check if queue is either not available or already active. */ + num = vp_modern_get_queue_size(mdev, index); diff --git a/drivers/virtio/virtio_pci_modern_dev.c b/drivers/virtio/virtio_pci_modern_dev.c index e11ed748e6613..9ab66e44738ee 100644 --- a/drivers/virtio/virtio_pci_modern_dev.c @@ -295516,6 +353527,31 @@ index 3035bb6f54585..603a6f4345efd 100644 head = vq->packed.next_avail_idx; avail_used_flags = vq->packed.avail_used_flags; +diff --git a/drivers/vme/bridges/vme_fake.c b/drivers/vme/bridges/vme_fake.c +index 6a1bc284f297c..eae78366eb028 100644 +--- a/drivers/vme/bridges/vme_fake.c ++++ b/drivers/vme/bridges/vme_fake.c +@@ -1073,6 +1073,8 @@ static int __init fake_init(void) + + /* We need a fake parent device */ + vme_root = __root_device_register("vme", THIS_MODULE); ++ if (IS_ERR(vme_root)) ++ return PTR_ERR(vme_root); + + /* If we want to support more than one bridge at some point, we need to + * dynamically allocate this so we get one per device. +diff --git a/drivers/vme/bridges/vme_tsi148.c b/drivers/vme/bridges/vme_tsi148.c +index be9051b02f24c..5b4c766d15e69 100644 +--- a/drivers/vme/bridges/vme_tsi148.c ++++ b/drivers/vme/bridges/vme_tsi148.c +@@ -1765,6 +1765,7 @@ static int tsi148_dma_list_add(struct vme_dma_list *list, + return 0; + + err_dma: ++ list_del(&entry->list); + err_dest: + err_source: + err_align: diff --git a/drivers/w1/slaves/w1_ds28e04.c b/drivers/w1/slaves/w1_ds28e04.c index e4f336111edc6..6cef6e2edb892 100644 --- a/drivers/w1/slaves/w1_ds28e04.c @@ -295594,6 +353630,48 @@ index ca70c5f032060..9cbeeb4923ecf 100644 } /* Exit from CHAIN state */ +diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c +index f2ae2e563dc54..4a2ddf730a3ac 100644 +--- a/drivers/w1/w1.c ++++ b/drivers/w1/w1.c +@@ -1166,6 +1166,8 @@ int w1_process(void *data) + /* remainder if it woke up early */ + unsigned long jremain = 0; + ++ atomic_inc(&dev->refcnt); ++ + for (;;) { + + if (!jremain && dev->search_count) { +@@ -1193,8 +1195,10 @@ int w1_process(void *data) + */ + mutex_unlock(&dev->list_mutex); + +- if (kthread_should_stop()) ++ if (kthread_should_stop()) { ++ __set_current_state(TASK_RUNNING); + break; ++ } + + /* Only sleep when the search is active. */ + if (dev->search_count) { +diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c +index b3e1792d9c49f..3a71c5eb2f837 100644 +--- a/drivers/w1/w1_int.c ++++ b/drivers/w1/w1_int.c +@@ -51,10 +51,9 @@ static struct w1_master *w1_alloc_dev(u32 id, int slave_count, int slave_ttl, + dev->search_count = w1_search_count; + dev->enable_pullup = w1_enable_pullup; + +- /* 1 for w1_process to decrement +- * 1 for __w1_remove_master_device to decrement ++ /* For __w1_remove_master_device to decrement + */ +- atomic_set(&dev->refcnt, 2); ++ atomic_set(&dev->refcnt, 1); + + INIT_LIST_HEAD(&dev->slist); + INIT_LIST_HEAD(&dev->async_list); diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index bf59faeb3de1b..d937f957f8df8 100644 --- a/drivers/watchdog/Kconfig @@ -295620,6 +353698,43 @@ index 1635f421ef2c3..854b1cc723cb6 100644 /* init clock */ dev->clk = devm_clk_get(&pdev->dev, NULL); +diff --git a/drivers/watchdog/diag288_wdt.c b/drivers/watchdog/diag288_wdt.c +index 4cb10877017c7..6ca5d9515d85c 100644 +--- a/drivers/watchdog/diag288_wdt.c ++++ b/drivers/watchdog/diag288_wdt.c +@@ -86,7 +86,7 @@ static int __diag288(unsigned int func, unsigned int timeout, + "1:\n" + EX_TABLE(0b, 1b) + : "+d" (err) : "d"(__func), "d"(__timeout), +- "d"(__action), "d"(__len) : "1", "cc"); ++ "d"(__action), "d"(__len) : "1", "cc", "memory"); + return err; + } + +@@ -268,12 +268,21 @@ static int __init diag288_init(void) + char ebc_begin[] = { + 194, 197, 199, 201, 213 + }; ++ char *ebc_cmd; + + watchdog_set_nowayout(&wdt_dev, nowayout_info); + + if (MACHINE_IS_VM) { +- if (__diag288_vm(WDT_FUNC_INIT, 15, +- ebc_begin, sizeof(ebc_begin)) != 0) { ++ ebc_cmd = kmalloc(sizeof(ebc_begin), GFP_KERNEL); ++ if (!ebc_cmd) { ++ pr_err("The watchdog cannot be initialized\n"); ++ return -ENOMEM; ++ } ++ memcpy(ebc_cmd, ebc_begin, sizeof(ebc_begin)); ++ ret = __diag288_vm(WDT_FUNC_INIT, 15, ++ ebc_cmd, sizeof(ebc_begin)); ++ kfree(ebc_cmd); ++ if (ret != 0) { + pr_err("The watchdog cannot be initialized\n"); + return -EINVAL; + } diff --git a/drivers/watchdog/f71808e_wdt.c b/drivers/watchdog/f71808e_wdt.c index f60beec1bbaea..f7d82d2619133 100644 --- a/drivers/watchdog/f71808e_wdt.c @@ -296984,8 +355099,52 @@ index 3729bea0c9895..0a2d24d6ac6f7 100644 }; static bool gnttab_need_v2(void) +diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c +index 47aa3a1ccaf57..fd3a644b08559 100644 +--- a/drivers/xen/pcpu.c ++++ b/drivers/xen/pcpu.c +@@ -228,7 +228,7 @@ static int register_pcpu(struct pcpu *pcpu) + + err = device_register(dev); + if (err) { +- pcpu_release(dev); ++ put_device(dev); + return err; + } + +diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c +index 18f0ed8b1f93b..6ebd819338ecb 100644 +--- a/drivers/xen/platform-pci.c ++++ b/drivers/xen/platform-pci.c +@@ -144,7 +144,7 @@ static int platform_pci_probe(struct pci_dev *pdev, + if (ret) { + dev_warn(&pdev->dev, "Unable to set the evtchn callback " + "err=%d\n", ret); +- goto out; ++ goto irq_out; + } + } + +@@ -152,13 +152,16 @@ static int platform_pci_probe(struct pci_dev *pdev, + grant_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes); + ret = gnttab_setup_auto_xlat_frames(grant_frames); + if (ret) +- goto out; ++ goto irq_out; + ret = gnttab_init(); + if (ret) + goto grant_out; + return 0; + grant_out: + gnttab_free_auto_xlat_frames(); ++irq_out: ++ if (!xen_have_vector_callback) ++ free_irq(pdev->irq, pdev); + out: + pci_release_region(pdev, 0); + mem_out: diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c -index 3369734108af2..e88e8f6f0a334 100644 +index 3369734108af2..719c5d1dda274 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -581,27 +581,30 @@ static int lock_pages( @@ -297046,6 +355205,51 @@ index 3369734108af2..e88e8f6f0a334 100644 kfree(xbufs); kfree(pages); kfree(kbufs); +@@ -759,7 +760,7 @@ static long privcmd_ioctl_mmap_resource(struct file *file, + goto out; + } + +- pfns = kcalloc(kdata.num, sizeof(*pfns), GFP_KERNEL); ++ pfns = kcalloc(kdata.num, sizeof(*pfns), GFP_KERNEL | __GFP_NOWARN); + if (!pfns) { + rc = -ENOMEM; + goto out; +diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c +index b47fd8435061a..e18df9aea5313 100644 +--- a/drivers/xen/pvcalls-back.c ++++ b/drivers/xen/pvcalls-back.c +@@ -129,13 +129,13 @@ static bool pvcalls_conn_back_read(void *opaque) + if (masked_prod < masked_cons) { + vec[0].iov_base = data->in + masked_prod; + vec[0].iov_len = wanted; +- iov_iter_kvec(&msg.msg_iter, WRITE, vec, 1, wanted); ++ iov_iter_kvec(&msg.msg_iter, READ, vec, 1, wanted); + } else { + vec[0].iov_base = data->in + masked_prod; + vec[0].iov_len = array_size - masked_prod; + vec[1].iov_base = data->in; + vec[1].iov_len = wanted - vec[0].iov_len; +- iov_iter_kvec(&msg.msg_iter, WRITE, vec, 2, wanted); ++ iov_iter_kvec(&msg.msg_iter, READ, vec, 2, wanted); + } + + atomic_set(&map->read, 0); +@@ -188,13 +188,13 @@ static bool pvcalls_conn_back_write(struct sock_mapping *map) + if (pvcalls_mask(prod, array_size) > pvcalls_mask(cons, array_size)) { + vec[0].iov_base = data->out + pvcalls_mask(cons, array_size); + vec[0].iov_len = size; +- iov_iter_kvec(&msg.msg_iter, READ, vec, 1, size); ++ iov_iter_kvec(&msg.msg_iter, WRITE, vec, 1, size); + } else { + vec[0].iov_base = data->out + pvcalls_mask(cons, array_size); + vec[0].iov_len = array_size - pvcalls_mask(cons, array_size); + vec[1].iov_base = data->out; + vec[1].iov_len = size - vec[0].iov_len; +- iov_iter_kvec(&msg.msg_iter, READ, vec, 2, size); ++ iov_iter_kvec(&msg.msg_iter, WRITE, vec, 2, size); + } + + atomic_set(&map->write, 0); diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c index 7984645b59563..bbe337dc296e3 100644 --- a/drivers/xen/pvcalls-front.c @@ -297086,7 +355290,7 @@ index e56a5faac395c..cbdff89799807 100644 return DMA_MAPPING_ERROR; diff --git a/drivers/xen/xen-pciback/conf_space_capability.c b/drivers/xen/xen-pciback/conf_space_capability.c -index 22f13abbe9130..5e53b4817f167 100644 +index 22f13abbe9130..097316a741268 100644 --- a/drivers/xen/xen-pciback/conf_space_capability.c +++ b/drivers/xen/xen-pciback/conf_space_capability.c @@ -160,7 +160,7 @@ static void *pm_ctrl_init(struct pci_dev *dev, int offset) @@ -297098,6 +355302,34 @@ index 22f13abbe9130..5e53b4817f167 100644 } static const struct config_field caplist_pm[] = { +@@ -190,13 +190,16 @@ static const struct config_field caplist_pm[] = { + }; + + static struct msi_msix_field_config { +- u16 enable_bit; /* bit for enabling MSI/MSI-X */ +- unsigned int int_type; /* interrupt type for exclusiveness check */ ++ u16 enable_bit; /* bit for enabling MSI/MSI-X */ ++ u16 allowed_bits; /* bits allowed to be changed */ ++ unsigned int int_type; /* interrupt type for exclusiveness check */ + } msi_field_config = { + .enable_bit = PCI_MSI_FLAGS_ENABLE, ++ .allowed_bits = PCI_MSI_FLAGS_ENABLE, + .int_type = INTERRUPT_TYPE_MSI, + }, msix_field_config = { + .enable_bit = PCI_MSIX_FLAGS_ENABLE, ++ .allowed_bits = PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL, + .int_type = INTERRUPT_TYPE_MSIX, + }; + +@@ -229,7 +232,7 @@ static int msi_msix_flags_write(struct pci_dev *dev, int offset, u16 new_value, + return 0; + + if (!dev_data->allow_interrupt_control || +- (new_value ^ old_value) & ~field_config->enable_bit) ++ (new_value ^ old_value) & ~field_config->allowed_bits) + return PCIBIOS_SET_FAILED; + + if (new_value & field_config->enable_bit) { diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index e8bed1cb76ba2..df68906812315 100644 --- a/drivers/xen/xenbus/xenbus_client.c @@ -297795,6 +356027,32 @@ index c63c3bea5de55..9b28842c63633 100644 +ssize_t v9fs_listxattr(struct dentry *dentry, char *buffer, + size_t buffer_size); #endif /* FS_9P_XATTR_H */ +diff --git a/fs/Makefile b/fs/Makefile +index 84c5e4cdfee5a..d504be65a210a 100644 +--- a/fs/Makefile ++++ b/fs/Makefile +@@ -32,8 +32,6 @@ obj-$(CONFIG_TIMERFD) += timerfd.o + obj-$(CONFIG_EVENTFD) += eventfd.o + obj-$(CONFIG_USERFAULTFD) += userfaultfd.o + obj-$(CONFIG_AIO) += aio.o +-obj-$(CONFIG_IO_URING) += io_uring.o +-obj-$(CONFIG_IO_WQ) += io-wq.o + obj-$(CONFIG_FS_DAX) += dax.o + obj-$(CONFIG_FS_ENCRYPTION) += crypto/ + obj-$(CONFIG_FS_VERITY) += verity/ +diff --git a/fs/affs/file.c b/fs/affs/file.c +index 75ebd2b576ca4..25d480ea797bd 100644 +--- a/fs/affs/file.c ++++ b/fs/affs/file.c +@@ -881,7 +881,7 @@ affs_truncate(struct inode *inode) + if (inode->i_size > AFFS_I(inode)->mmu_private) { + struct address_space *mapping = inode->i_mapping; + struct page *page; +- void *fsdata; ++ void *fsdata = NULL; + loff_t isize = inode->i_size; + int res; + diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 4579bbda46346..948a808a964d1 100644 --- a/fs/afs/dir.c @@ -297841,6 +356099,38 @@ index c4210a3964d8b..bbcc5afd15760 100644 afs_schedule_lock_extension(vnode); spin_unlock(&vnode->lock); } +diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c +index c0031a3ab42f5..daaf3810cc925 100644 +--- a/fs/afs/fs_probe.c ++++ b/fs/afs/fs_probe.c +@@ -167,8 +167,8 @@ responded: + clear_bit(AFS_SERVER_FL_HAS_FS64, &server->flags); + } + +- if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) && +- rtt_us < server->probe.rtt) { ++ rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us); ++ if (rtt_us < server->probe.rtt) { + server->probe.rtt = rtt_us; + server->rtt = rtt_us; + alist->preferred = index; +@@ -366,12 +366,15 @@ void afs_fs_probe_dispatcher(struct work_struct *work) + unsigned long nowj, timer_at, poll_at; + bool first_pass = true, set_timer = false; + +- if (!net->live) ++ if (!net->live) { ++ afs_dec_servers_outstanding(net); + return; ++ } + + _enter(""); + + if (list_empty(&net->fs_probe_fast) && list_empty(&net->fs_probe_slow)) { ++ afs_dec_servers_outstanding(net); + _leave(" [none]"); + return; + } diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 4943413d9c5f7..7d37f63ef0f09 100644 --- a/fs/afs/fsclient.c @@ -298044,7 +356334,7 @@ index 2b35cba8ad62b..88ea20e79ae27 100644 scb->have_cb = true; *_bp += xdr_size(x); diff --git a/fs/aio.c b/fs/aio.c -index 51b08ab01dffc..1a78979663dca 100644 +index 51b08ab01dffc..e88fd9b58f3f1 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -181,8 +181,9 @@ struct poll_iocb { @@ -298058,7 +356348,25 @@ index 51b08ab01dffc..1a78979663dca 100644 struct wait_queue_entry wait; struct work_struct work; }; -@@ -1620,6 +1621,51 @@ static void aio_poll_put_work(struct work_struct *work) +@@ -333,6 +334,9 @@ static int aio_ring_mremap(struct vm_area_struct *vma) + spin_lock(&mm->ioctx_lock); + rcu_read_lock(); + table = rcu_dereference(mm->ioctx_table); ++ if (!table) ++ goto out_unlock; ++ + for (i = 0; i < table->nr; i++) { + struct kioctx *ctx; + +@@ -346,6 +350,7 @@ static int aio_ring_mremap(struct vm_area_struct *vma) + } + } + ++out_unlock: + rcu_read_unlock(); + spin_unlock(&mm->ioctx_lock); + return res; +@@ -1620,6 +1625,51 @@ static void aio_poll_put_work(struct work_struct *work) iocb_put(iocb); } @@ -298110,7 +356418,7 @@ index 51b08ab01dffc..1a78979663dca 100644 static void aio_poll_complete_work(struct work_struct *work) { struct poll_iocb *req = container_of(work, struct poll_iocb, work); -@@ -1639,14 +1685,27 @@ static void aio_poll_complete_work(struct work_struct *work) +@@ -1639,14 +1689,27 @@ static void aio_poll_complete_work(struct work_struct *work) * avoid further branches in the fast path. */ spin_lock_irq(&ctx->ctx_lock); @@ -298144,7 +356452,7 @@ index 51b08ab01dffc..1a78979663dca 100644 spin_unlock_irq(&ctx->ctx_lock); iocb_put(iocb); -@@ -1658,13 +1717,14 @@ static int aio_poll_cancel(struct kiocb *iocb) +@@ -1658,13 +1721,14 @@ static int aio_poll_cancel(struct kiocb *iocb) struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw); struct poll_iocb *req = &aiocb->poll; @@ -298166,7 +356474,7 @@ index 51b08ab01dffc..1a78979663dca 100644 return 0; } -@@ -1681,21 +1741,27 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, +@@ -1681,21 +1745,27 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, if (mask && !(mask & req->events)) return 0; @@ -298205,7 +356513,7 @@ index 51b08ab01dffc..1a78979663dca 100644 iocb = NULL; INIT_WORK(&req->work, aio_poll_put_work); schedule_work(&req->work); -@@ -1704,7 +1770,43 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, +@@ -1704,7 +1774,43 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, if (iocb) iocb_put(iocb); } else { @@ -298250,7 +356558,7 @@ index 51b08ab01dffc..1a78979663dca 100644 } return 1; } -@@ -1712,6 +1814,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, +@@ -1712,6 +1818,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, struct aio_poll_table { struct poll_table_struct pt; struct aio_kiocb *iocb; @@ -298258,7 +356566,7 @@ index 51b08ab01dffc..1a78979663dca 100644 int error; }; -@@ -1722,11 +1825,12 @@ aio_poll_queue_proc(struct file *file, struct wait_queue_head *head, +@@ -1722,11 +1829,12 @@ aio_poll_queue_proc(struct file *file, struct wait_queue_head *head, struct aio_poll_table *pt = container_of(p, struct aio_poll_table, pt); /* multiple wait queues per file are not supported */ @@ -298272,7 +356580,7 @@ index 51b08ab01dffc..1a78979663dca 100644 pt->error = 0; pt->iocb->poll.head = head; add_wait_queue(head, &pt->iocb->poll.wait); -@@ -1751,12 +1855,14 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) +@@ -1751,12 +1859,14 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP; req->head = NULL; @@ -298288,7 +356596,7 @@ index 51b08ab01dffc..1a78979663dca 100644 apt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */ /* initialized the list so that we can do list_empty checks */ -@@ -1765,23 +1871,35 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) +@@ -1765,23 +1875,35 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) mask = vfs_poll(req->file, &apt.pt) & req->events; spin_lock_irq(&ctx->ctx_lock); @@ -298396,7 +356704,7 @@ index 473d21b3a86de..f581c4d008971 100644 unsigned long limit; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c -index a813b70f594e6..c93150f36a52a 100644 +index a813b70f594e6..30379c33ad20c 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -170,8 +170,8 @@ static int padzero(unsigned long elf_bss) @@ -298428,6 +356736,15 @@ index a813b70f594e6..c93150f36a52a 100644 int load_addr_set = 0; unsigned long error; struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL; +@@ -910,7 +910,7 @@ static int load_elf_binary(struct linux_binprm *bprm) + interp_elf_ex = kmalloc(sizeof(*interp_elf_ex), GFP_KERNEL); + if (!interp_elf_ex) { + retval = -ENOMEM; +- goto out_free_ph; ++ goto out_free_file; + } + + /* Get the exec headers */ @@ -1156,6 +1156,17 @@ out_free_interp: reloc_func_desc = load_bias; } @@ -298465,7 +356782,15 @@ index a813b70f594e6..c93150f36a52a 100644 if (retval < 0) goto out; -@@ -1606,17 +1618,16 @@ static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata, +@@ -1319,6 +1331,7 @@ out: + out_free_dentry: + kfree(interp_elf_ex); + kfree(interp_elf_phdata); ++out_free_file: + allow_write_access(interpreter); + if (interpreter) + fput(interpreter); +@@ -1606,17 +1619,16 @@ static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata, * long file_ofs * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL... */ @@ -298486,7 +356811,7 @@ index a813b70f594e6..c93150f36a52a 100644 if (count > UINT_MAX / 64) return -EINVAL; size = count * 64; -@@ -1638,11 +1649,12 @@ static int fill_files_note(struct memelfnote *note) +@@ -1638,11 +1650,12 @@ static int fill_files_note(struct memelfnote *note) name_base = name_curpos = ((char *)data) + names_ofs; remaining = size - names_ofs; count = 0; @@ -298501,7 +356826,7 @@ index a813b70f594e6..c93150f36a52a 100644 if (!file) continue; filename = file_path(file, name_curpos, remaining); -@@ -1662,9 +1674,9 @@ static int fill_files_note(struct memelfnote *note) +@@ -1662,9 +1675,9 @@ static int fill_files_note(struct memelfnote *note) memmove(name_curpos, filename, n); name_curpos += n; @@ -298514,7 +356839,7 @@ index a813b70f594e6..c93150f36a52a 100644 count++; } -@@ -1675,7 +1687,7 @@ static int fill_files_note(struct memelfnote *note) +@@ -1675,7 +1688,7 @@ static int fill_files_note(struct memelfnote *note) * Count usually is less than mm->map_count, * we need to move filenames down. */ @@ -298523,7 +356848,7 @@ index a813b70f594e6..c93150f36a52a 100644 if (n != 0) { unsigned shift_bytes = n * 3 * sizeof(data[0]); memmove(name_base - shift_bytes, name_base, -@@ -1787,7 +1799,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t, +@@ -1787,7 +1800,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t, static int fill_note_info(struct elfhdr *elf, int phdrs, struct elf_note_info *info, @@ -298532,7 +356857,7 @@ index a813b70f594e6..c93150f36a52a 100644 { struct task_struct *dump_task = current; const struct user_regset_view *view = task_user_regset_view(dump_task); -@@ -1859,7 +1871,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, +@@ -1859,7 +1872,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, * Now fill in each thread's information. */ for (t = info->thread; t != NULL; t = t->next) @@ -298541,7 +356866,7 @@ index a813b70f594e6..c93150f36a52a 100644 return 0; /* -@@ -1868,13 +1880,13 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, +@@ -1868,13 +1881,13 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm); info->size += notesize(&info->psinfo); @@ -298557,7 +356882,7 @@ index a813b70f594e6..c93150f36a52a 100644 info->size += notesize(&info->files); return 1; -@@ -2016,7 +2028,7 @@ static int elf_note_info_init(struct elf_note_info *info) +@@ -2016,7 +2029,7 @@ static int elf_note_info_init(struct elf_note_info *info) static int fill_note_info(struct elfhdr *elf, int phdrs, struct elf_note_info *info, @@ -298566,7 +356891,7 @@ index a813b70f594e6..c93150f36a52a 100644 { struct core_thread *ct; struct elf_thread_status *ets; -@@ -2037,13 +2049,13 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, +@@ -2037,13 +2050,13 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, list_for_each_entry(ets, &info->thread_list, list) { int sz; @@ -298583,7 +356908,7 @@ index a813b70f594e6..c93150f36a52a 100644 /* Set up header */ fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS); -@@ -2059,18 +2071,18 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, +@@ -2059,18 +2072,18 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, fill_note(info->notes + 1, "CORE", NT_PRPSINFO, sizeof(*info->psinfo), info->psinfo); @@ -298606,7 +356931,7 @@ index a813b70f594e6..c93150f36a52a 100644 if (info->prstatus->pr_fpvalid) fill_note(info->notes + info->numnote++, "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu); -@@ -2156,8 +2168,7 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum, +@@ -2156,8 +2169,7 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum, static int elf_core_dump(struct coredump_params *cprm) { int has_dumped = 0; @@ -298616,7 +356941,7 @@ index a813b70f594e6..c93150f36a52a 100644 struct elfhdr elf; loff_t offset = 0, dataoff; struct elf_note_info info = { }; -@@ -2165,16 +2176,12 @@ static int elf_core_dump(struct coredump_params *cprm) +@@ -2165,16 +2177,12 @@ static int elf_core_dump(struct coredump_params *cprm) struct elf_shdr *shdr4extnum = NULL; Elf_Half e_phnum; elf_addr_t e_shoff; @@ -298634,7 +356959,7 @@ index a813b70f594e6..c93150f36a52a 100644 /* for notes section */ segs++; -@@ -2188,7 +2195,7 @@ static int elf_core_dump(struct coredump_params *cprm) +@@ -2188,7 +2196,7 @@ static int elf_core_dump(struct coredump_params *cprm) * Collect all the non-memory information about the process for the * notes. This also sets up the file header. */ @@ -298643,7 +356968,7 @@ index a813b70f594e6..c93150f36a52a 100644 goto end_coredump; has_dumped = 1; -@@ -2213,7 +2220,7 @@ static int elf_core_dump(struct coredump_params *cprm) +@@ -2213,7 +2221,7 @@ static int elf_core_dump(struct coredump_params *cprm) dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); @@ -298652,7 +356977,7 @@ index a813b70f594e6..c93150f36a52a 100644 offset += elf_core_extra_data_size(); e_shoff = offset; -@@ -2233,8 +2240,8 @@ static int elf_core_dump(struct coredump_params *cprm) +@@ -2233,8 +2241,8 @@ static int elf_core_dump(struct coredump_params *cprm) goto end_coredump; /* Write program headers for segments dump */ @@ -298663,7 +356988,7 @@ index a813b70f594e6..c93150f36a52a 100644 struct elf_phdr phdr; phdr.p_type = PT_LOAD; -@@ -2271,8 +2278,8 @@ static int elf_core_dump(struct coredump_params *cprm) +@@ -2271,8 +2279,8 @@ static int elf_core_dump(struct coredump_params *cprm) /* Align to page */ dump_skip_to(cprm, dataoff); @@ -298674,7 +356999,7 @@ index a813b70f594e6..c93150f36a52a 100644 if (!dump_user_range(cprm, meta->start, meta->dump_size)) goto end_coredump; -@@ -2289,7 +2296,6 @@ static int elf_core_dump(struct coredump_params *cprm) +@@ -2289,7 +2297,6 @@ static int elf_core_dump(struct coredump_params *cprm) end_coredump: free_note_info(&info); kfree(shdr4extnum); @@ -298683,10 +357008,22 @@ index a813b70f594e6..c93150f36a52a 100644 return has_dumped; } diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c -index 6d8fd6030cbb5..830a6a876ffea 100644 +index 6d8fd6030cbb5..c316931fc99c5 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c -@@ -1465,7 +1465,7 @@ static bool elf_fdpic_dump_segments(struct coredump_params *cprm, +@@ -434,8 +434,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) + current->mm->start_stack = current->mm->start_brk + stack_size; + #endif + +- if (create_elf_fdpic_tables(bprm, current->mm, +- &exec_params, &interp_params) < 0) ++ retval = create_elf_fdpic_tables(bprm, current->mm, &exec_params, ++ &interp_params); ++ if (retval < 0) + goto error; + + kdebug("- start_code %lx", current->mm->start_code); +@@ -1465,7 +1466,7 @@ static bool elf_fdpic_dump_segments(struct coredump_params *cprm, static int elf_fdpic_core_dump(struct coredump_params *cprm) { int has_dumped = 0; @@ -298695,7 +357032,7 @@ index 6d8fd6030cbb5..830a6a876ffea 100644 int i; struct elfhdr *elf = NULL; loff_t offset = 0, dataoff; -@@ -1480,8 +1480,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) +@@ -1480,8 +1481,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) elf_addr_t e_shoff; struct core_thread *ct; struct elf_thread_status *tmp; @@ -298704,7 +357041,7 @@ index 6d8fd6030cbb5..830a6a876ffea 100644 /* alloc memory for large data structures: too large to be on stack */ elf = kmalloc(sizeof(*elf), GFP_KERNEL); -@@ -1491,9 +1489,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) +@@ -1491,9 +1490,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) if (!psinfo) goto end_coredump; @@ -298714,7 +357051,7 @@ index 6d8fd6030cbb5..830a6a876ffea 100644 for (ct = current->mm->core_state->dumper.next; ct; ct = ct->next) { tmp = elf_dump_thread_status(cprm->siginfo->si_signo, -@@ -1513,7 +1508,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) +@@ -1513,7 +1509,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) tmp->next = thread_list; thread_list = tmp; @@ -298723,7 +357060,7 @@ index 6d8fd6030cbb5..830a6a876ffea 100644 /* for notes section */ segs++; -@@ -1558,7 +1553,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) +@@ -1558,7 +1554,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) /* Page-align dumped data */ dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); @@ -298732,7 +357069,7 @@ index 6d8fd6030cbb5..830a6a876ffea 100644 offset += elf_core_extra_data_size(); e_shoff = offset; -@@ -1578,8 +1573,8 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) +@@ -1578,8 +1574,8 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) goto end_coredump; /* write program headers for segments dump */ @@ -298743,7 +357080,7 @@ index 6d8fd6030cbb5..830a6a876ffea 100644 struct elf_phdr phdr; size_t sz; -@@ -1628,7 +1623,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) +@@ -1628,7 +1624,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) dump_skip_to(cprm, dataoff); @@ -298752,7 +357089,7 @@ index 6d8fd6030cbb5..830a6a876ffea 100644 goto end_coredump; if (!elf_core_write_extra_data(cprm)) -@@ -1652,7 +1647,6 @@ end_coredump: +@@ -1652,7 +1648,6 @@ end_coredump: thread_list = thread_list->next; kfree(tmp); } @@ -298805,6 +357142,25 @@ index 5d776f80ee50c..7ca3e0db06ffa 100644 u32 addr, rp_val; if (get_user(rp_val, rp)) return -EFAULT; +diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c +index e1eae7ea823ae..bb202ad369d53 100644 +--- a/fs/binfmt_misc.c ++++ b/fs/binfmt_misc.c +@@ -44,10 +44,10 @@ static LIST_HEAD(entries); + static int enabled = 1; + + enum {Enabled, Magic}; +-#define MISC_FMT_PRESERVE_ARGV0 (1 << 31) +-#define MISC_FMT_OPEN_BINARY (1 << 30) +-#define MISC_FMT_CREDENTIALS (1 << 29) +-#define MISC_FMT_OPEN_FILE (1 << 28) ++#define MISC_FMT_PRESERVE_ARGV0 (1UL << 31) ++#define MISC_FMT_OPEN_BINARY (1UL << 30) ++#define MISC_FMT_CREDENTIALS (1UL << 29) ++#define MISC_FMT_OPEN_FILE (1UL << 28) + + typedef struct { + struct list_head list; diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 309516e6a9682..43c89952b7d25 100644 --- a/fs/btrfs/async-thread.c @@ -298838,7 +357194,7 @@ index 309516e6a9682..43c89952b7d25 100644 run_ordered_work(wq, work); } else { diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c -index f735b8798ba12..2e7c3e48bc9ce 100644 +index f735b8798ba12..cd9202867d98a 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -138,6 +138,7 @@ struct share_check { @@ -298849,7 +357205,69 @@ index f735b8798ba12..2e7c3e48bc9ce 100644 }; static inline int extent_is_shared(struct share_check *sc) -@@ -818,16 +819,11 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, +@@ -288,8 +289,10 @@ static void prelim_release(struct preftree *preftree) + struct prelim_ref *ref, *next_ref; + + rbtree_postorder_for_each_entry_safe(ref, next_ref, +- &preftree->root.rb_root, rbnode) ++ &preftree->root.rb_root, rbnode) { ++ free_inode_elem_list(ref->inode_list); + free_pref(ref); ++ } + + preftree->root = RB_ROOT_CACHED; + preftree->count = 0; +@@ -430,6 +433,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, + u64 wanted_disk_byte = ref->wanted_disk_byte; + u64 count = 0; + u64 data_offset; ++ u8 type; + + if (level != 0) { + eb = path->nodes[level]; +@@ -484,6 +488,9 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, + continue; + } + fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); ++ type = btrfs_file_extent_type(eb, fi); ++ if (type == BTRFS_FILE_EXTENT_INLINE) ++ goto next; + disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); + data_offset = btrfs_file_extent_offset(eb, fi); + +@@ -647,6 +654,18 @@ unode_aux_to_inode_list(struct ulist_node *node) + return (struct extent_inode_elem *)(uintptr_t)node->aux; + } + ++static void free_leaf_list(struct ulist *ulist) ++{ ++ struct ulist_node *node; ++ struct ulist_iterator uiter; ++ ++ ULIST_ITER_INIT(&uiter); ++ while ((node = ulist_next(ulist, &uiter))) ++ free_inode_elem_list(unode_aux_to_inode_list(node)); ++ ++ ulist_free(ulist); ++} ++ + /* + * We maintain three separate rbtrees: one for direct refs, one for + * indirect refs which have a key, and one for indirect refs which do not +@@ -761,7 +780,11 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info, + cond_resched(); + } + out: +- ulist_free(parents); ++ /* ++ * We may have inode lists attached to refs in the parents ulist, so we ++ * must free them before freeing the ulist and its refs. ++ */ ++ free_leaf_list(parents); + return ret; + } + +@@ -818,16 +841,11 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, struct preftrees *preftrees, struct share_check *sc) { struct btrfs_delayed_ref_node *node; @@ -298866,7 +357284,7 @@ index f735b8798ba12..2e7c3e48bc9ce 100644 spin_lock(&head->lock); for (n = rb_first_cached(&head->ref_tree); n; n = rb_next(n)) { node = rb_entry(n, struct btrfs_delayed_ref_node, -@@ -853,10 +849,16 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, +@@ -853,10 +871,16 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, case BTRFS_TREE_BLOCK_REF_KEY: { /* NORMAL INDIRECT METADATA backref */ struct btrfs_delayed_tree_ref *ref; @@ -298884,7 +357302,7 @@ index f735b8798ba12..2e7c3e48bc9ce 100644 node->bytenr, count, sc, GFP_ATOMIC); break; -@@ -882,13 +884,22 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, +@@ -882,13 +906,22 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, key.offset = ref->offset; /* @@ -298913,7 +357331,7 @@ index f735b8798ba12..2e7c3e48bc9ce 100644 ret = add_indirect_ref(fs_info, preftrees, ref->root, &key, 0, node->bytenr, count, sc, -@@ -918,7 +929,7 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, +@@ -918,7 +951,7 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, } if (!ret) ret = extent_is_shared(sc); @@ -298922,7 +357340,7 @@ index f735b8798ba12..2e7c3e48bc9ce 100644 spin_unlock(&head->lock); return ret; } -@@ -1021,7 +1032,8 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info, +@@ -1021,7 +1054,8 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info, key.type = BTRFS_EXTENT_DATA_KEY; key.offset = btrfs_extent_data_ref_offset(leaf, dref); @@ -298932,7 +357350,7 @@ index f735b8798ba12..2e7c3e48bc9ce 100644 ret = BACKREF_FOUND_SHARED; break; } -@@ -1031,6 +1043,7 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info, +@@ -1031,6 +1065,7 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info, ret = add_indirect_ref(fs_info, preftrees, root, &key, 0, bytenr, count, sc, GFP_NOFS); @@ -298940,7 +357358,7 @@ index f735b8798ba12..2e7c3e48bc9ce 100644 break; } default: -@@ -1120,7 +1133,8 @@ static int add_keyed_refs(struct btrfs_fs_info *fs_info, +@@ -1120,7 +1155,8 @@ static int add_keyed_refs(struct btrfs_fs_info *fs_info, key.type = BTRFS_EXTENT_DATA_KEY; key.offset = btrfs_extent_data_ref_offset(leaf, dref); @@ -298950,7 +357368,7 @@ index f735b8798ba12..2e7c3e48bc9ce 100644 ret = BACKREF_FOUND_SHARED; break; } -@@ -1214,7 +1228,12 @@ again: +@@ -1214,7 +1250,12 @@ again: ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); if (ret < 0) goto out; @@ -298964,7 +357382,20 @@ index f735b8798ba12..2e7c3e48bc9ce 100644 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS if (trans && likely(trans->type != __TRANS_DUMMY) && -@@ -1360,10 +1379,18 @@ again: +@@ -1352,6 +1393,12 @@ again: + if (ret < 0) + goto out; + ref->inode_list = eie; ++ /* ++ * We transferred the list ownership to the ref, ++ * so set to NULL to avoid a double free in case ++ * an error happens after this. ++ */ ++ eie = NULL; + } + ret = ulist_add_merge_ptr(refs, ref->parent, + ref->inode_list, +@@ -1360,15 +1407,31 @@ again: goto out; if (!ret && extent_item_pos) { /* @@ -298986,7 +357417,45 @@ index f735b8798ba12..2e7c3e48bc9ce 100644 while (eie->next) eie = eie->next; eie->next = ref->inode_list; -@@ -1534,6 +1561,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr, + } + eie = NULL; ++ /* ++ * We have transferred the inode list ownership from ++ * this ref to the ref we added to the 'refs' ulist. ++ * So set this ref's inode list to NULL to avoid ++ * use-after-free when our caller uses it or double ++ * frees in case an error happens before we return. ++ */ ++ ref->inode_list = NULL; + } + cond_resched(); + } +@@ -1385,24 +1448,6 @@ out: + return ret; + } + +-static void free_leaf_list(struct ulist *blocks) +-{ +- struct ulist_node *node = NULL; +- struct extent_inode_elem *eie; +- struct ulist_iterator uiter; +- +- ULIST_ITER_INIT(&uiter); +- while ((node = ulist_next(blocks, &uiter))) { +- if (!node->aux) +- continue; +- eie = unode_aux_to_inode_list(node); +- free_inode_elem_list(eie); +- node->aux = 0; +- } +- +- ulist_free(blocks); +-} +- + /* + * Finds all leafs with a reference to the specified combination of bytenr and + * offset. key_list_head will point to a list of corresponding keys (caller must +@@ -1534,6 +1579,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr, .root_objectid = root->root_key.objectid, .inum = inum, .share_count = 0, @@ -298994,7 +357463,7 @@ index f735b8798ba12..2e7c3e48bc9ce 100644 }; ulist_init(roots); -@@ -1568,6 +1596,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr, +@@ -1568,6 +1614,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr, break; bytenr = node->val; shared.share_count = 0; @@ -299002,6 +357471,61 @@ index f735b8798ba12..2e7c3e48bc9ce 100644 cond_resched(); } +@@ -2017,10 +2064,29 @@ out: + return ret; + } + ++static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx) ++{ ++ struct btrfs_data_container *inodes = ctx; ++ const size_t c = 3 * sizeof(u64); ++ ++ if (inodes->bytes_left >= c) { ++ inodes->bytes_left -= c; ++ inodes->val[inodes->elem_cnt] = inum; ++ inodes->val[inodes->elem_cnt + 1] = offset; ++ inodes->val[inodes->elem_cnt + 2] = root; ++ inodes->elem_cnt += 3; ++ } else { ++ inodes->bytes_missing += c - inodes->bytes_left; ++ inodes->bytes_left = 0; ++ inodes->elem_missed += 3; ++ } ++ ++ return 0; ++} ++ + int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, + struct btrfs_path *path, +- iterate_extent_inodes_t *iterate, void *ctx, +- bool ignore_offset) ++ void *ctx, bool ignore_offset) + { + int ret; + u64 extent_item_pos; +@@ -2038,7 +2104,7 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, + extent_item_pos = logical - found_key.objectid; + ret = iterate_extent_inodes(fs_info, found_key.objectid, + extent_item_pos, search_commit_root, +- iterate, ctx, ignore_offset); ++ build_ino_list, ctx, ignore_offset); + + return ret; + } +diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h +index ba454032dbe22..2759de7d324c8 100644 +--- a/fs/btrfs/backref.h ++++ b/fs/btrfs/backref.h +@@ -35,8 +35,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, + bool ignore_offset); + + int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, +- struct btrfs_path *path, +- iterate_extent_inodes_t *iterate, void *ctx, ++ struct btrfs_path *path, void *ctx, + bool ignore_offset); + + int paths_from_inode(u64 inum, struct inode_fs_paths *ipath); diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index a3b830b8410a8..5eea56789ccc9 100644 --- a/fs/btrfs/block-group.c @@ -300232,7 +358756,7 @@ index d029be40ea6f0..03d8a2d49bf41 100644 trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c -index 355ea88d5c5f7..f4015556cafad 100644 +index 355ea88d5c5f7..ce2da06c9d7b7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -121,88 +121,6 @@ struct async_submit_bio { @@ -300324,7 +358848,30 @@ index 355ea88d5c5f7..f4015556cafad 100644 /* * Compute the csum of a btree block and store the result to provided buffer. */ -@@ -441,17 +359,31 @@ static int csum_one_extent_buffer(struct extent_buffer *eb) +@@ -284,11 +202,9 @@ static bool btrfs_supported_super_csum(u16 csum_type) + * Return 0 if the superblock checksum type matches the checksum value of that + * algorithm. Pass the raw disk superblock data. + */ +-static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info, +- char *raw_disk_sb) ++int btrfs_check_super_csum(struct btrfs_fs_info *fs_info, ++ const struct btrfs_super_block *disk_sb) + { +- struct btrfs_super_block *disk_sb = +- (struct btrfs_super_block *)raw_disk_sb; + char result[BTRFS_CSUM_SIZE]; + SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); + +@@ -299,7 +215,7 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info, + * BTRFS_SUPER_INFO_SIZE range, we expect that the unused space is + * filled with zeros and is included in the checksum. + */ +- crypto_shash_digest(shash, raw_disk_sb + BTRFS_CSUM_SIZE, ++ crypto_shash_digest(shash, (const u8 *)disk_sb + BTRFS_CSUM_SIZE, + BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, result); + + if (memcmp(disk_sb->csum, result, fs_info->csum_size)) +@@ -441,17 +357,38 @@ static int csum_one_extent_buffer(struct extent_buffer *eb) else ret = btrfs_check_leaf_full(eb); @@ -300357,12 +358904,19 @@ index 355ea88d5c5f7..f4015556cafad 100644 + btrfs_print_tree(eb, 0); + btrfs_err(fs_info, "block=%llu write time tree block corruption detected", + eb->start); -+ WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); ++ /* ++ * Be noisy if this is an extent buffer from a log tree. We don't abort ++ * a transaction in case there's a bad log tree extent buffer, we just ++ * fallback to a transaction commit. Still we want to know when there is ++ * a bad log tree extent buffer, as that may signal a bug somewhere. ++ */ ++ WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG) || ++ btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID); + return ret; } /* Checksum all dirty extent buffers in one bio_vec */ -@@ -1500,7 +1432,7 @@ static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev) +@@ -1500,7 +1437,7 @@ static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev) goto fail; if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID && @@ -300371,7 +358925,7 @@ index 355ea88d5c5f7..f4015556cafad 100644 set_bit(BTRFS_ROOT_SHAREABLE, &root->state); btrfs_check_and_init_root_item(&root->root_item); } -@@ -1724,13 +1656,22 @@ again: +@@ -1724,13 +1661,22 @@ again: ret = btrfs_insert_fs_root(fs_info, root); if (ret) { @@ -300396,7 +358950,47 @@ index 355ea88d5c5f7..f4015556cafad 100644 btrfs_put_root(root); return ERR_PTR(ret); } -@@ -2851,6 +2792,7 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info) +@@ -2463,7 +2409,9 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info) + fs_info->dev_root = root; + } + /* Initialize fs_info for all devices in any case */ +- btrfs_init_devices_late(fs_info); ++ ret = btrfs_init_devices_late(fs_info); ++ if (ret) ++ goto out; + + /* If IGNOREDATACSUMS is set don't bother reading the csum root. */ + if (!btrfs_test_opt(fs_info, IGNOREDATACSUMS)) { +@@ -2548,8 +2496,8 @@ out: + * 1, 2 2nd and 3rd backup copy + * -1 skip bytenr check + */ +-static int validate_super(struct btrfs_fs_info *fs_info, +- struct btrfs_super_block *sb, int mirror_num) ++int btrfs_validate_super(struct btrfs_fs_info *fs_info, ++ struct btrfs_super_block *sb, int mirror_num) + { + u64 nodesize = btrfs_super_nodesize(sb); + u64 sectorsize = btrfs_super_sectorsize(sb); +@@ -2732,7 +2680,7 @@ static int validate_super(struct btrfs_fs_info *fs_info, + */ + static int btrfs_validate_mount_super(struct btrfs_fs_info *fs_info) + { +- return validate_super(fs_info, fs_info->super_copy, 0); ++ return btrfs_validate_super(fs_info, fs_info->super_copy, 0); + } + + /* +@@ -2746,7 +2694,7 @@ static int btrfs_validate_write_super(struct btrfs_fs_info *fs_info, + { + int ret; + +- ret = validate_super(fs_info, sb, -1); ++ ret = btrfs_validate_super(fs_info, sb, -1); + if (ret < 0) + goto out; + if (!btrfs_supported_super_csum(btrfs_super_csum_type(sb))) { +@@ -2851,6 +2799,7 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info) /* All successful */ fs_info->generation = generation; fs_info->last_trans_committed = generation; @@ -300404,7 +358998,7 @@ index 355ea88d5c5f7..f4015556cafad 100644 /* Always begin writing backup roots after the one being used */ if (backup_index < 0) { -@@ -2883,12 +2825,14 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) +@@ -2883,12 +2832,14 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) spin_lock_init(&fs_info->buffer_lock); spin_lock_init(&fs_info->unused_bgs_lock); spin_lock_init(&fs_info->treelog_bg_lock); @@ -300419,7 +359013,7 @@ index 355ea88d5c5f7..f4015556cafad 100644 seqlock_init(&fs_info->profiles_lock); INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); -@@ -2980,12 +2924,11 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) +@@ -2980,12 +2931,11 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) fs_info->sectorsize_bits = ilog2(4096); fs_info->stripesize = 4096; @@ -300434,7 +359028,7 @@ index 355ea88d5c5f7..f4015556cafad 100644 fs_info->bg_reclaim_threshold = BTRFS_DEFAULT_RECLAIM_THRESH; INIT_WORK(&fs_info->reclaim_bgs_work, btrfs_reclaim_bgs_work); } -@@ -3228,12 +3171,12 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device +@@ -3228,12 +3178,12 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); btrfs_init_btree_inode(fs_info); @@ -300449,7 +359043,16 @@ index 355ea88d5c5f7..f4015556cafad 100644 if (IS_ERR(disk_super)) { err = PTR_ERR(disk_super); goto fail_alloc; -@@ -3314,16 +3257,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device +@@ -3265,7 +3215,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device + * We want to check superblock checksum, the type is stored inside. + * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k). + */ +- if (btrfs_check_super_csum(fs_info, (u8 *)disk_super)) { ++ if (btrfs_check_super_csum(fs_info, disk_super)) { + btrfs_err(fs_info, "superblock checksum mismatch"); + err = -EINVAL; + btrfs_release_disk_super(disk_super); +@@ -3314,16 +3264,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device */ fs_info->compress_type = BTRFS_COMPRESS_ZLIB; @@ -300466,7 +359069,7 @@ index 355ea88d5c5f7..f4015556cafad 100644 /* Set up fs_info before parsing mount options */ nodesize = btrfs_super_nodesize(disk_super); -@@ -3348,7 +3281,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device +@@ -3348,7 +3288,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device ~BTRFS_FEATURE_INCOMPAT_SUPP; if (features) { btrfs_err(fs_info, @@ -300475,7 +359078,7 @@ index 355ea88d5c5f7..f4015556cafad 100644 features); err = -EINVAL; goto fail_alloc; -@@ -3364,6 +3297,17 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device +@@ -3364,6 +3304,17 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA) btrfs_info(fs_info, "has skinny extents"); @@ -300493,7 +359096,7 @@ index 355ea88d5c5f7..f4015556cafad 100644 /* * mixed block groups end up with duplicate but slightly offset * extent buffers for the same range. It leads to corruptions -@@ -3386,13 +3330,24 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device +@@ -3386,13 +3337,24 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device ~BTRFS_FEATURE_COMPAT_RO_SUPP; if (!sb_rdonly(sb) && features) { btrfs_err(fs_info, @@ -300519,7 +359122,7 @@ index 355ea88d5c5f7..f4015556cafad 100644 btrfs_warn(fs_info, "read-write for sector size %u with page size %lu is experimental", sectorsize, PAGE_SIZE); -@@ -3465,7 +3420,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device +@@ -3465,7 +3427,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device * below in btrfs_init_dev_replace(). */ btrfs_free_extra_devids(fs_devices); @@ -300528,7 +359131,7 @@ index 355ea88d5c5f7..f4015556cafad 100644 btrfs_err(fs_info, "failed to read devices"); goto fail_tree_roots; } -@@ -3523,6 +3478,20 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device +@@ -3523,6 +3485,20 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device btrfs_err(fs_info, "failed to init dev_replace: %d", ret); goto fail_block_groups; } @@ -300549,7 +359152,7 @@ index 355ea88d5c5f7..f4015556cafad 100644 ret = btrfs_check_zoned_mode(fs_info); if (ret) { -@@ -3556,7 +3525,10 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device +@@ -3556,7 +3532,10 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device goto fail_sysfs; } @@ -300561,7 +359164,7 @@ index 355ea88d5c5f7..f4015556cafad 100644 btrfs_warn(fs_info, "writable mount is not allowed due to too many missing devices"); goto fail_sysfs; -@@ -3647,6 +3619,10 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device +@@ -3647,6 +3626,10 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device set_bit(BTRFS_FS_OPEN, &fs_info->flags); @@ -300572,7 +359175,45 @@ index 355ea88d5c5f7..f4015556cafad 100644 clear_oneshot: btrfs_clear_oneshot_options(fs_info); return 0; -@@ -3968,11 +3944,23 @@ static void btrfs_end_empty_barrier(struct bio *bio) +@@ -3725,7 +3708,7 @@ static void btrfs_end_super_write(struct bio *bio) + } + + struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev, +- int copy_num) ++ int copy_num, bool drop_cache) + { + struct btrfs_super_block *super; + struct page *page; +@@ -3743,6 +3726,19 @@ struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev, + if (bytenr + BTRFS_SUPER_INFO_SIZE >= i_size_read(bdev->bd_inode)) + return ERR_PTR(-EINVAL); + ++ if (drop_cache) { ++ /* This should only be called with the primary sb. */ ++ ASSERT(copy_num == 0); ++ ++ /* ++ * Drop the page of the primary superblock, so later read will ++ * always read from the device. ++ */ ++ invalidate_inode_pages2_range(mapping, ++ bytenr >> PAGE_SHIFT, ++ (bytenr + BTRFS_SUPER_INFO_SIZE) >> PAGE_SHIFT); ++ } ++ + page = read_cache_page_gfp(mapping, bytenr >> PAGE_SHIFT, GFP_NOFS); + if (IS_ERR(page)) + return ERR_CAST(page); +@@ -3774,7 +3770,7 @@ struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev) + * later supers, using BTRFS_SUPER_MIRROR_MAX instead + */ + for (i = 0; i < 1; i++) { +- super = btrfs_read_dev_one_super(bdev, i); ++ super = btrfs_read_dev_one_super(bdev, i, false); + if (IS_ERR(super)) + continue; + +@@ -3968,11 +3964,23 @@ static void btrfs_end_empty_barrier(struct bio *bio) */ static void write_dev_flush(struct btrfs_device *device) { @@ -300597,7 +359238,7 @@ index 355ea88d5c5f7..f4015556cafad 100644 bio_reset(bio); bio->bi_end_io = btrfs_end_empty_barrier; -@@ -4308,6 +4296,28 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) +@@ -4308,6 +4316,28 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) int ret; set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags); @@ -300626,7 +359267,7 @@ index 355ea88d5c5f7..f4015556cafad 100644 /* * We don't want the cleaner to start new transactions, add more delayed * iputs, etc. while we're closing. We can't use kthread_stop() yet -@@ -4338,12 +4348,35 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) +@@ -4338,12 +4368,35 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) /* clear out the rbtree of defraggable inodes */ btrfs_cleanup_defrag_inodes(fs_info); @@ -300665,10 +359306,30 @@ index 355ea88d5c5f7..f4015556cafad 100644 btrfs_discard_cleanup(fs_info); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h -index 0e7e9526b6a83..1b8fd3deafc92 100644 +index 0e7e9526b6a83..718787dfdb8ea 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h -@@ -140,14 +140,4 @@ int btrfs_init_root_free_objectid(struct btrfs_root *root); +@@ -52,14 +52,18 @@ struct extent_buffer *btrfs_find_create_tree_block( + void btrfs_clean_tree_block(struct extent_buffer *buf); + void btrfs_clear_oneshot_options(struct btrfs_fs_info *fs_info); + int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info); ++int btrfs_check_super_csum(struct btrfs_fs_info *fs_info, ++ const struct btrfs_super_block *disk_sb); + int __cold open_ctree(struct super_block *sb, + struct btrfs_fs_devices *fs_devices, + char *options); + void __cold close_ctree(struct btrfs_fs_info *fs_info); ++int btrfs_validate_super(struct btrfs_fs_info *fs_info, ++ struct btrfs_super_block *sb, int mirror_num); + int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors); + struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev); + struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev, +- int copy_num); ++ int copy_num, bool drop_cache); + int btrfs_commit_super(struct btrfs_fs_info *fs_info); + struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, + struct btrfs_key *key); +@@ -140,14 +144,4 @@ int btrfs_init_root_free_objectid(struct btrfs_root *root); int __init btrfs_end_io_wq_init(void); void __cold btrfs_end_io_wq_exit(void); @@ -300683,8 +359344,34 @@ index 0e7e9526b6a83..1b8fd3deafc92 100644 -#endif - #endif +diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c +index 1d4c2397d0d62..fab7eb76e53b2 100644 +--- a/fs/btrfs/export.c ++++ b/fs/btrfs/export.c +@@ -58,7 +58,7 @@ static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len, + } + + struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, +- u64 root_objectid, u32 generation, ++ u64 root_objectid, u64 generation, + int check_generation) + { + struct btrfs_fs_info *fs_info = btrfs_sb(sb); +diff --git a/fs/btrfs/export.h b/fs/btrfs/export.h +index f32f4113c976a..5afb7ca428289 100644 +--- a/fs/btrfs/export.h ++++ b/fs/btrfs/export.h +@@ -19,7 +19,7 @@ struct btrfs_fid { + } __attribute__ ((packed)); + + struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, +- u64 root_objectid, u32 generation, ++ u64 root_objectid, u64 generation, + int check_generation); + struct dentry *btrfs_get_parent(struct dentry *child); + diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c -index 0ab456cb4bf80..b90e9aa24005a 100644 +index 0ab456cb4bf80..750c1ff9947d9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1266,7 +1266,7 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len, @@ -300752,7 +359439,28 @@ index 0ab456cb4bf80..b90e9aa24005a 100644 cur += num_bytes; } out: -@@ -2376,7 +2375,7 @@ int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset, +@@ -1718,6 +1717,11 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, + BUG(); + if (ret && insert_reserved) + btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1); ++ if (ret < 0) ++ btrfs_err(trans->fs_info, ++"failed to run delayed ref for logical %llu num_bytes %llu type %u action %u ref_mod %d: %d", ++ node->bytenr, node->num_bytes, node->type, ++ node->action, node->ref_mod, ret); + return ret; + } + +@@ -1956,8 +1960,6 @@ static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans, + if (ret) { + unselect_delayed_ref_head(delayed_refs, locked_ref); + btrfs_put_delayed_ref(ref); +- btrfs_debug(fs_info, "run_one_delayed_ref returned %d", +- ret); + return ret; + } + +@@ -2376,7 +2378,7 @@ int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset, out: btrfs_free_path(path); @@ -300761,7 +359469,7 @@ index 0ab456cb4bf80..b90e9aa24005a 100644 WARN_ON(ret > 0); return ret; } -@@ -2440,7 +2439,8 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, +@@ -2440,7 +2442,8 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, num_bytes, parent); generic_ref.real_root = root->root_key.objectid; btrfs_init_data_ref(&generic_ref, ref_root, key.objectid, @@ -300771,7 +359479,7 @@ index 0ab456cb4bf80..b90e9aa24005a 100644 generic_ref.skip_qgroup = for_reloc; if (inc) ret = btrfs_inc_extent_ref(trans, &generic_ref); -@@ -2454,7 +2454,8 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, +@@ -2454,7 +2457,8 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, btrfs_init_generic_ref(&generic_ref, action, bytenr, num_bytes, parent); generic_ref.real_root = root->root_key.objectid; @@ -300781,7 +359489,7 @@ index 0ab456cb4bf80..b90e9aa24005a 100644 generic_ref.skip_qgroup = for_reloc; if (inc) ret = btrfs_inc_extent_ref(trans, &generic_ref); -@@ -2571,17 +2572,10 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, +@@ -2571,17 +2575,10 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, return -EINVAL; /* @@ -300789,20 +359497,20 @@ index 0ab456cb4bf80..b90e9aa24005a 100644 - * removes the free space from the cache. We have load_only set - * to one because the slow code to read in the free extents does check - * the pinned extents. -- */ ++ * Fully cache the free space first so that our pin removes the free space ++ * from the cache. + */ - btrfs_cache_block_group(cache, 1); - /* - * Make sure we wait until the cache is completely built in case it is - * missing or is invalid and therefore needs to be rebuilt. -+ * Fully cache the free space first so that our pin removes the free space -+ * from the cache. - */ +- */ - ret = btrfs_wait_block_group_cache_done(cache); + ret = btrfs_cache_block_group(cache, true); if (ret) goto out; -@@ -2604,12 +2598,7 @@ static int __exclude_logged_extent(struct btrfs_fs_info *fs_info, +@@ -2604,12 +2601,7 @@ static int __exclude_logged_extent(struct btrfs_fs_info *fs_info, if (!block_group) return -EINVAL; @@ -300816,7 +359524,7 @@ index 0ab456cb4bf80..b90e9aa24005a 100644 if (ret) goto out; -@@ -3278,20 +3267,20 @@ out_delayed_unlock: +@@ -3278,20 +3270,20 @@ out_delayed_unlock: } void btrfs_free_tree_block(struct btrfs_trans_handle *trans, @@ -300841,7 +359549,7 @@ index 0ab456cb4bf80..b90e9aa24005a 100644 btrfs_ref_tree_mod(fs_info, &generic_ref); ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL); BUG_ON(ret); /* -ENOMEM */ -@@ -3301,7 +3290,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, +@@ -3301,7 +3293,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, struct btrfs_block_group *cache; bool must_pin = false; @@ -300850,7 +359558,42 @@ index 0ab456cb4bf80..b90e9aa24005a 100644 ret = check_ref_cleanup(trans, buf->start); if (!ret) { btrfs_redirty_list_add(trans->transaction, buf); -@@ -3495,6 +3484,9 @@ struct find_free_extent_ctl { +@@ -3318,21 +3310,22 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, + } + + /* +- * If this is a leaf and there are tree mod log users, we may +- * have recorded mod log operations that point to this leaf. +- * So we must make sure no one reuses this leaf's extent before +- * mod log operations are applied to a node, otherwise after +- * rewinding a node using the mod log operations we get an +- * inconsistent btree, as the leaf's extent may now be used as +- * a node or leaf for another different btree. ++ * If there are tree mod log users we may have recorded mod log ++ * operations for this node. If we re-allocate this node we ++ * could replay operations on this node that happened when it ++ * existed in a completely different root. For example if it ++ * was part of root A, then was reallocated to root B, and we ++ * are doing a btrfs_old_search_slot(root b), we could replay ++ * operations that happened when the block was part of root A, ++ * giving us an inconsistent view of the btree. ++ * + * We are safe from races here because at this point no other + * node or root points to this extent buffer, so if after this +- * check a new tree mod log user joins, it will not be able to +- * find a node pointing to this leaf and record operations that +- * point to this leaf. ++ * check a new tree mod log user joins we will not have an ++ * existing log of operations on this node that we have to ++ * contend with. + */ +- if (btrfs_header_level(buf) == 0 && +- test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags)) ++ if (test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags)) + must_pin = true; + + if (must_pin || btrfs_is_zoned(fs_info)) { +@@ -3495,6 +3488,9 @@ struct find_free_extent_ctl { /* Allocation is called for tree-log */ bool for_treelog; @@ -300860,7 +359603,7 @@ index 0ab456cb4bf80..b90e9aa24005a 100644 /* RAID index, converted from flags */ int index; -@@ -3756,6 +3748,7 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, +@@ -3756,6 +3752,7 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, u64 avail; u64 bytenr = block_group->start; u64 log_bytenr; @@ -300868,7 +359611,7 @@ index 0ab456cb4bf80..b90e9aa24005a 100644 int ret = 0; bool skip; -@@ -3773,15 +3766,33 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, +@@ -3773,15 +3770,33 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, if (skip) return 1; @@ -300903,7 +359646,7 @@ index 0ab456cb4bf80..b90e9aa24005a 100644 ret = 1; goto out; } -@@ -3796,6 +3807,16 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, +@@ -3796,6 +3811,16 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, goto out; } @@ -300920,7 +359663,7 @@ index 0ab456cb4bf80..b90e9aa24005a 100644 avail = block_group->length - block_group->alloc_offset; if (avail < num_bytes) { if (ffe_ctl->max_extent_size < avail) { -@@ -3813,6 +3834,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, +@@ -3813,6 +3838,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, if (ffe_ctl->for_treelog && !fs_info->treelog_bg) fs_info->treelog_bg = block_group->start; @@ -300930,7 +359673,7 @@ index 0ab456cb4bf80..b90e9aa24005a 100644 ffe_ctl->found_offset = start + block_group->alloc_offset; block_group->alloc_offset += num_bytes; spin_lock(&ctl->tree_lock); -@@ -3829,6 +3853,25 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, +@@ -3829,6 +3857,25 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, out: if (ret && ffe_ctl->for_treelog) fs_info->treelog_bg = 0; @@ -300956,7 +359699,7 @@ index 0ab456cb4bf80..b90e9aa24005a 100644 spin_unlock(&fs_info->treelog_bg_lock); spin_unlock(&block_group->lock); spin_unlock(&space_info->lock); -@@ -4085,6 +4128,12 @@ static int prepare_allocation(struct btrfs_fs_info *fs_info, +@@ -4085,6 +4132,12 @@ static int prepare_allocation(struct btrfs_fs_info *fs_info, ffe_ctl->hint_byte = fs_info->treelog_bg; spin_unlock(&fs_info->treelog_bg_lock); } @@ -300969,7 +359712,7 @@ index 0ab456cb4bf80..b90e9aa24005a 100644 return 0; default: BUG(); -@@ -4129,6 +4178,8 @@ static noinline int find_free_extent(struct btrfs_root *root, +@@ -4129,6 +4182,8 @@ static noinline int find_free_extent(struct btrfs_root *root, struct btrfs_space_info *space_info; bool full_search = false; bool for_treelog = (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID); @@ -300978,7 +359721,7 @@ index 0ab456cb4bf80..b90e9aa24005a 100644 WARN_ON(num_bytes < fs_info->sectorsize); -@@ -4143,6 +4194,7 @@ static noinline int find_free_extent(struct btrfs_root *root, +@@ -4143,6 +4198,7 @@ static noinline int find_free_extent(struct btrfs_root *root, ffe_ctl.found_offset = 0; ffe_ctl.hint_byte = hint_byte_orig; ffe_ctl.for_treelog = for_treelog; @@ -300986,7 +359729,7 @@ index 0ab456cb4bf80..b90e9aa24005a 100644 ffe_ctl.policy = BTRFS_EXTENT_ALLOC_CLUSTERED; /* For clustered allocation */ -@@ -4220,6 +4272,8 @@ search: +@@ -4220,6 +4276,8 @@ search: if (unlikely(block_group->ro)) { if (for_treelog) btrfs_clear_treelog_bg(block_group); @@ -300995,7 +359738,7 @@ index 0ab456cb4bf80..b90e9aa24005a 100644 continue; } -@@ -4258,7 +4312,7 @@ have_block_group: +@@ -4258,7 +4316,7 @@ have_block_group: ffe_ctl.cached = btrfs_block_group_done(block_group); if (unlikely(!ffe_ctl.cached)) { ffe_ctl.have_caching_bg = true; @@ -301004,7 +359747,7 @@ index 0ab456cb4bf80..b90e9aa24005a 100644 /* * If we get ENOMEM here or something else we want to -@@ -4408,6 +4462,7 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, +@@ -4408,6 +4466,7 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 flags; int ret; bool for_treelog = (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID); @@ -301012,7 +359755,7 @@ index 0ab456cb4bf80..b90e9aa24005a 100644 flags = get_alloc_profile_by_root(root, is_data); again: -@@ -4431,8 +4486,8 @@ again: +@@ -4431,8 +4490,8 @@ again: sinfo = btrfs_find_space_info(fs_info, flags); btrfs_err(fs_info, @@ -301023,7 +359766,7 @@ index 0ab456cb4bf80..b90e9aa24005a 100644 if (sinfo) btrfs_dump_space_info(fs_info, sinfo, num_bytes, 1); -@@ -4655,7 +4710,8 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, +@@ -4655,7 +4714,8 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT, ins->objectid, ins->offset, 0); @@ -301033,7 +359776,7 @@ index 0ab456cb4bf80..b90e9aa24005a 100644 btrfs_ref_tree_mod(root->fs_info, &generic_ref); return btrfs_add_delayed_data_ref(trans, &generic_ref, ram_bytes); -@@ -4713,6 +4769,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, +@@ -4713,6 +4773,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, { struct btrfs_fs_info *fs_info = root->fs_info; struct extent_buffer *buf; @@ -301041,7 +359784,7 @@ index 0ab456cb4bf80..b90e9aa24005a 100644 buf = btrfs_find_create_tree_block(fs_info, bytenr, owner, level); if (IS_ERR(buf)) -@@ -4731,12 +4788,30 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, +@@ -4731,12 +4792,30 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, return ERR_PTR(-EUCLEAN); } @@ -301073,7 +359816,7 @@ index 0ab456cb4bf80..b90e9aa24005a 100644 __btrfs_tree_lock(buf, nest); btrfs_clean_tree_block(buf); clear_bit(EXTENT_BUFFER_STALE, &buf->bflags); -@@ -4848,7 +4923,8 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, +@@ -4848,7 +4927,8 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT, ins.objectid, ins.offset, parent); generic_ref.real_root = root->root_key.objectid; @@ -301083,7 +359826,7 @@ index 0ab456cb4bf80..b90e9aa24005a 100644 btrfs_ref_tree_mod(fs_info, &generic_ref); ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, extent_op); if (ret) -@@ -5265,7 +5341,8 @@ skip: +@@ -5265,7 +5345,8 @@ skip: btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr, fs_info->nodesize, parent); @@ -301093,7 +359836,7 @@ index 0ab456cb4bf80..b90e9aa24005a 100644 ret = btrfs_free_extent(trans, &ref); if (ret) goto out_unlock; -@@ -5386,7 +5463,8 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, +@@ -5386,7 +5467,8 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, goto owner_mismatch; } @@ -301103,7 +359846,7 @@ index 0ab456cb4bf80..b90e9aa24005a 100644 out: wc->refs[level] = 0; wc->flags[level] = 0; -@@ -5491,6 +5569,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc) +@@ -5491,6 +5573,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc) int ret; int level; bool root_dropped = false; @@ -301111,7 +359854,7 @@ index 0ab456cb4bf80..b90e9aa24005a 100644 btrfs_debug(fs_info, "Drop subvolume %llu", root->root_key.objectid); -@@ -5533,6 +5612,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc) +@@ -5533,6 +5616,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc) * already dropped. */ set_bit(BTRFS_ROOT_DELETING, &root->state); @@ -301120,7 +359863,7 @@ index 0ab456cb4bf80..b90e9aa24005a 100644 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { level = btrfs_header_level(root->node); path->nodes[level] = btrfs_lock_root_node(root); -@@ -5707,6 +5788,13 @@ out_free: +@@ -5707,6 +5792,13 @@ out_free: kfree(wc); btrfs_free_path(path); out: @@ -301134,7 +359877,7 @@ index 0ab456cb4bf80..b90e9aa24005a 100644 /* * So if we need to stop dropping the snapshot for whatever reason we * need to make sure to add it back to the dead root list so that we -@@ -5985,13 +6073,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) +@@ -5985,13 +6077,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) if (end - start >= range->minlen) { if (!btrfs_block_group_done(cache)) { @@ -301504,10 +360247,10 @@ index 0b9401a5afd33..161a69d7e117e 100644 return ret; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c -index a1762363f61fa..1c597cd6c0247 100644 +index a1762363f61fa..eae622ef4c6d5 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c -@@ -869,7 +869,8 @@ next_slot: +@@ -869,9 +869,13 @@ next_slot: btrfs_init_data_ref(&ref, root->root_key.objectid, new_key.objectid, @@ -301515,9 +360258,15 @@ index a1762363f61fa..1c597cd6c0247 100644 + args->start - extent_offset, + 0, false); ret = btrfs_inc_extent_ref(trans, &ref); - BUG_ON(ret); /* -ENOMEM */ +- BUG_ON(ret); /* -ENOMEM */ ++ if (ret) { ++ btrfs_abort_transaction(trans, ret); ++ break; ++ } } -@@ -955,7 +956,8 @@ delete_extent_item: + key.offset = args->start; + } +@@ -955,9 +959,13 @@ delete_extent_item: btrfs_init_data_ref(&ref, root->root_key.objectid, key.objectid, @@ -301525,9 +360274,15 @@ index a1762363f61fa..1c597cd6c0247 100644 + key.offset - extent_offset, 0, + false); ret = btrfs_free_extent(trans, &ref); - BUG_ON(ret); /* -ENOMEM */ +- BUG_ON(ret); /* -ENOMEM */ ++ if (ret) { ++ btrfs_abort_transaction(trans, ret); ++ break; ++ } args->bytes_found += extent_end - key.offset; -@@ -1232,7 +1234,7 @@ again: + } + +@@ -1232,7 +1240,7 @@ again: btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, bytenr, num_bytes, 0); btrfs_init_data_ref(&ref, root->root_key.objectid, ino, @@ -301536,7 +360291,7 @@ index a1762363f61fa..1c597cd6c0247 100644 ret = btrfs_inc_extent_ref(trans, &ref); if (ret) { btrfs_abort_transaction(trans, ret); -@@ -1257,7 +1259,8 @@ again: +@@ -1257,7 +1265,8 @@ again: other_end = 0; btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr, num_bytes, 0); @@ -301546,7 +360301,7 @@ index a1762363f61fa..1c597cd6c0247 100644 if (extent_mergeable(leaf, path->slots[0] + 1, ino, bytenr, orig_offset, &other_start, &other_end)) { -@@ -1709,7 +1712,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, +@@ -1709,7 +1718,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, * Fault pages before locking them in prepare_pages * to avoid recursive lock */ @@ -301555,14 +360310,7 @@ index a1762363f61fa..1c597cd6c0247 100644 ret = -EFAULT; break; } -@@ -1903,16 +1906,17 @@ static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info, - - static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) - { -+ const bool is_sync_write = (iocb->ki_flags & IOCB_DSYNC); - struct file *file = iocb->ki_filp; - struct inode *inode = file_inode(file); - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); +@@ -1909,10 +1918,11 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) loff_t pos; ssize_t written = 0; ssize_t written_buffered; @@ -301571,25 +360319,14 @@ index a1762363f61fa..1c597cd6c0247 100644 ssize_t err; unsigned int ilock_flags = 0; - struct iomap_dio *dio = NULL; ++ struct iomap_dio *dio; if (iocb->ki_flags & IOCB_NOWAIT) ilock_flags |= BTRFS_ILOCK_TRY; -@@ -1955,23 +1959,80 @@ relock: +@@ -1955,23 +1965,73 @@ relock: goto buffered; } -- dio = __iomap_dio_rw(iocb, from, &btrfs_dio_iomap_ops, &btrfs_dio_ops, -- 0); -+ /* -+ * We remove IOCB_DSYNC so that we don't deadlock when iomap_dio_rw() -+ * calls generic_write_sync() (through iomap_dio_complete()), because -+ * that results in calling fsync (btrfs_sync_file()) which will try to -+ * lock the inode in exclusive/write mode. -+ */ -+ if (is_sync_write) -+ iocb->ki_flags &= ~IOCB_DSYNC; - -- btrfs_inode_unlock(inode, ilock_flags); + /* + * The iov_iter can be mapped to the same file range we are writing to. + * If that's the case, then we will deadlock in the iomap code, because @@ -301608,18 +360345,29 @@ index a1762363f61fa..1c597cd6c0247 100644 + * So here we disable page faults in the iov_iter and then retry if we + * got -EFAULT, faulting in the pages before the retry. + */ -+again: + from->nofault = true; -+ err = iomap_dio_rw(iocb, from, &btrfs_dio_iomap_ops, &btrfs_dio_ops, -+ IOMAP_DIO_PARTIAL, written); + dio = __iomap_dio_rw(iocb, from, &btrfs_dio_iomap_ops, &btrfs_dio_ops, +- 0); ++ IOMAP_DIO_PARTIAL, written); + from->nofault = false; ++ /* ++ * iomap_dio_complete() will call btrfs_sync_file() if we have a dsync ++ * iocb, and that needs to lock the inode. So unlock it before calling ++ * iomap_dio_complete() to avoid a deadlock. ++ */ + btrfs_inode_unlock(inode, ilock_flags); + - if (IS_ERR_OR_NULL(dio)) { -- err = PTR_ERR_OR_ZERO(dio); ++ if (IS_ERR_OR_NULL(dio)) + err = PTR_ERR_OR_ZERO(dio); - if (err < 0 && err != -ENOTBLK) - goto out; - } else { - written = iomap_dio_complete(dio); ++ else ++ err = iomap_dio_complete(dio); ++ + /* No increment (+=) because iomap returns a cumulative value. */ + if (err > 0) + written = err; @@ -301645,21 +360393,12 @@ index a1762363f61fa..1c597cd6c0247 100644 + } else { + fault_in_iov_iter_readable(from, left); + prev_left = left; -+ goto again; ++ goto relock; + } } - if (written < 0 || !iov_iter_count(from)) { - err = written; -+ btrfs_inode_unlock(inode, ilock_flags); -+ -+ /* -+ * Add back IOCB_DSYNC. Our caller, btrfs_file_write_iter(), will do -+ * the fsync (call generic_write_sync()). -+ */ -+ if (is_sync_write) -+ iocb->ki_flags |= IOCB_DSYNC; -+ + /* If 'err' is -ENOTBLK then it means we must fallback to buffered IO. */ + if ((err < 0 && err != -ENOTBLK) || !iov_iter_count(from)) goto out; @@ -301667,7 +360406,7 @@ index a1762363f61fa..1c597cd6c0247 100644 buffered: pos = iocb->ki_pos; -@@ -1996,7 +2057,7 @@ buffered: +@@ -1996,7 +2056,7 @@ buffered: invalidate_mapping_pages(file->f_mapping, pos >> PAGE_SHIFT, endbyte >> PAGE_SHIFT); out: @@ -301676,7 +360415,7 @@ index a1762363f61fa..1c597cd6c0247 100644 } static ssize_t btrfs_file_write_iter(struct kiocb *iocb, -@@ -2279,25 +2340,62 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) +@@ -2279,25 +2339,62 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) */ btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); @@ -301754,7 +360493,7 @@ index a1762363f61fa..1c597cd6c0247 100644 out: ASSERT(list_empty(&ctx.list)); err = file_check_and_advance_wb_err(file); -@@ -2620,7 +2718,7 @@ static int btrfs_insert_replace_extent(struct btrfs_trans_handle *trans, +@@ -2620,7 +2717,7 @@ static int btrfs_insert_replace_extent(struct btrfs_trans_handle *trans, extent_info->disk_len, 0); ref_offset = extent_info->file_offset - extent_info->data_offset; btrfs_init_data_ref(&ref, root->root_key.objectid, @@ -301763,7 +360502,7 @@ index a1762363f61fa..1c597cd6c0247 100644 ret = btrfs_inc_extent_ref(trans, &ref); } -@@ -2878,8 +2976,9 @@ out: +@@ -2878,8 +2975,9 @@ out: return ret; } @@ -301774,7 +360513,7 @@ index a1762363f61fa..1c597cd6c0247 100644 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_state *cached_state = NULL; -@@ -2911,6 +3010,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) +@@ -2911,6 +3009,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) goto out_only_mutex; } @@ -301785,7 +360524,7 @@ index a1762363f61fa..1c597cd6c0247 100644 lockstart = round_up(offset, btrfs_inode_sectorsize(BTRFS_I(inode))); lockend = round_down(offset + len, btrfs_inode_sectorsize(BTRFS_I(inode))) - 1; -@@ -3351,7 +3454,7 @@ static long btrfs_fallocate(struct file *file, int mode, +@@ -3351,7 +3453,7 @@ static long btrfs_fallocate(struct file *file, int mode, return -EOPNOTSUPP; if (mode & FALLOC_FL_PUNCH_HOLE) @@ -301794,7 +360533,7 @@ index a1762363f61fa..1c597cd6c0247 100644 /* * Only trigger disk allocation, don't trigger qgroup reserve -@@ -3373,6 +3476,10 @@ static long btrfs_fallocate(struct file *file, int mode, +@@ -3373,6 +3475,10 @@ static long btrfs_fallocate(struct file *file, int mode, goto out; } @@ -301805,7 +360544,7 @@ index a1762363f61fa..1c597cd6c0247 100644 /* * TODO: Move these two operations after we have checked * accurate reserved space, or fallocate can still fail but -@@ -3650,6 +3757,8 @@ static int check_direct_read(struct btrfs_fs_info *fs_info, +@@ -3650,6 +3756,8 @@ static int check_direct_read(struct btrfs_fs_info *fs_info, static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to) { struct inode *inode = file_inode(iocb->ki_filp); @@ -301814,7 +360553,7 @@ index a1762363f61fa..1c597cd6c0247 100644 ssize_t ret; if (fsverity_active(inode)) -@@ -3659,9 +3768,57 @@ static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to) +@@ -3659,9 +3767,57 @@ static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to) return 0; btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED); @@ -302727,7 +361466,7 @@ index 7c096ab9bb5eb..f8a01964a2169 100644 isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c -index cc61813213d83..b9dcaae7c8d51 100644 +index cc61813213d83..ed9c715d25796 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -615,11 +615,13 @@ static noinline int create_subvol(struct user_namespace *mnt_userns, @@ -302790,7 +361529,33 @@ index cc61813213d83..b9dcaae7c8d51 100644 break; ret = btrfs_search_forward(root, &key, path, sk->min_transid); -@@ -3098,10 +3098,8 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, +@@ -2788,6 +2788,8 @@ static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp) + } + } + ++ btrfs_free_path(path); ++ path = NULL; + if (copy_to_user(argp, subvol_info, sizeof(*subvol_info))) + ret = -EFAULT; + +@@ -2880,6 +2882,8 @@ static int btrfs_ioctl_get_subvol_rootref(struct file *file, void __user *argp) + } + + out: ++ btrfs_free_path(path); ++ + if (!ret || ret == -EOVERFLOW) { + rootrefs->num_items = found; + /* update min_treeid for next search */ +@@ -2891,7 +2895,6 @@ out: + } + + kfree(rootrefs); +- btrfs_free_path(path); + + return ret; + } +@@ -3098,10 +3101,8 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, btrfs_inode_lock(inode, 0); err = btrfs_delete_subvolume(dir, dentry); btrfs_inode_unlock(inode, 0); @@ -302803,7 +361568,7 @@ index cc61813213d83..b9dcaae7c8d51 100644 out_dput: dput(dentry); -@@ -3220,6 +3218,7 @@ out: +@@ -3220,6 +3221,7 @@ out: static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) { @@ -302811,7 +361576,7 @@ index cc61813213d83..b9dcaae7c8d51 100644 struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_ioctl_vol_args_v2 *vol_args; -@@ -3231,35 +3230,37 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) +@@ -3231,35 +3233,37 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -302865,7 +361630,7 @@ index cc61813213d83..b9dcaae7c8d51 100644 btrfs_exclop_finish(fs_info); -@@ -3271,54 +3272,62 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) +@@ -3271,54 +3275,62 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) btrfs_info(fs_info, "device deleted: %s", vol_args->name); } @@ -302943,7 +361708,7 @@ index cc61813213d83..b9dcaae7c8d51 100644 return ret; } -@@ -3379,22 +3388,21 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info, +@@ -3379,22 +3391,21 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info, static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info, void __user *arg) { @@ -302970,6 +361735,97 @@ index cc61813213d83..b9dcaae7c8d51 100644 if (!dev) { ret = -ENODEV; goto out; +@@ -3404,13 +3415,10 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info, + di_args->bytes_used = btrfs_device_get_bytes_used(dev); + di_args->total_bytes = btrfs_device_get_total_bytes(dev); + memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); +- if (dev->name) { +- strncpy(di_args->path, rcu_str_deref(dev->name), +- sizeof(di_args->path) - 1); +- di_args->path[sizeof(di_args->path) - 1] = 0; +- } else { ++ if (dev->name) ++ strscpy(di_args->path, rcu_str_deref(dev->name), sizeof(di_args->path)); ++ else + di_args->path[0] = '\0'; +- } + + out: + rcu_read_unlock(); +@@ -3883,6 +3891,8 @@ static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) + ipath->fspath->val[i] = rel_ptr; + } + ++ btrfs_free_path(path); ++ path = NULL; + ret = copy_to_user((void __user *)(unsigned long)ipa->fspath, + ipath->fspath, size); + if (ret) { +@@ -3898,26 +3908,6 @@ out: + return ret; + } + +-static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx) +-{ +- struct btrfs_data_container *inodes = ctx; +- const size_t c = 3 * sizeof(u64); +- +- if (inodes->bytes_left >= c) { +- inodes->bytes_left -= c; +- inodes->val[inodes->elem_cnt] = inum; +- inodes->val[inodes->elem_cnt + 1] = offset; +- inodes->val[inodes->elem_cnt + 2] = root; +- inodes->elem_cnt += 3; +- } else { +- inodes->bytes_missing += c - inodes->bytes_left; +- inodes->bytes_left = 0; +- inodes->elem_missed += 3; +- } +- +- return 0; +-} +- + static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info, + void __user *arg, int version) + { +@@ -3953,21 +3943,20 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info, + size = min_t(u32, loi->size, SZ_16M); + } + +- path = btrfs_alloc_path(); +- if (!path) { +- ret = -ENOMEM; +- goto out; +- } +- + inodes = init_data_container(size); + if (IS_ERR(inodes)) { + ret = PTR_ERR(inodes); +- inodes = NULL; +- goto out; ++ goto out_loi; + } + ++ path = btrfs_alloc_path(); ++ if (!path) { ++ ret = -ENOMEM; ++ goto out; ++ } + ret = iterate_inodes_from_logical(loi->logical, fs_info, path, +- build_ino_list, inodes, ignore_offset); ++ inodes, ignore_offset); ++ btrfs_free_path(path); + if (ret == -EINVAL) + ret = -ENOENT; + if (ret < 0) +@@ -3979,7 +3968,6 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info, + ret = -EFAULT; + + out: +- btrfs_free_path(path); + kvfree(inodes); + out_loi: + kfree(loi); diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 313d9d685adb7..9063072b399bd 100644 --- a/fs/btrfs/locking.c @@ -303163,7 +362019,7 @@ index 3dbe6eb5fda75..fcd7eb496478c 100644 copy_compressed_segment(cb, workspace->cbuf, seg_len, &cur_in); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c -index db680f5be745a..e01065696e9cd 100644 +index db680f5be745a..fc40159197f7c 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -940,6 +940,14 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info) @@ -303283,7 +362139,96 @@ index db680f5be745a..e01065696e9cd 100644 btrfs_put_root(quota_root); -@@ -3360,6 +3416,9 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, +@@ -2847,14 +2903,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid, + dstgroup->rsv_rfer = inherit->lim.rsv_rfer; + dstgroup->rsv_excl = inherit->lim.rsv_excl; + +- ret = update_qgroup_limit_item(trans, dstgroup); +- if (ret) { +- fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; +- btrfs_info(fs_info, +- "unable to update quota limit for %llu", +- dstgroup->qgroupid); +- goto unlock; +- } ++ qgroup_dirty(fs_info, dstgroup); + } + + if (srcid) { +@@ -3224,7 +3273,8 @@ out: + static bool rescan_should_stop(struct btrfs_fs_info *fs_info) + { + return btrfs_fs_closing(fs_info) || +- test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); ++ test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state) || ++ !test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); + } + + static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) +@@ -3236,6 +3286,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) + int err = -ENOMEM; + int ret = 0; + bool stopped = false; ++ bool did_leaf_rescans = false; + + path = btrfs_alloc_path(); + if (!path) +@@ -3254,11 +3305,10 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) + err = PTR_ERR(trans); + break; + } +- if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { +- err = -EINTR; +- } else { +- err = qgroup_rescan_leaf(trans, path); +- } ++ ++ err = qgroup_rescan_leaf(trans, path); ++ did_leaf_rescans = true; ++ + if (err > 0) + btrfs_commit_transaction(trans); + else +@@ -3272,22 +3322,29 @@ out: + if (err > 0 && + fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { + fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; +- } else if (err < 0) { ++ } else if (err < 0 || stopped) { + fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; + } + mutex_unlock(&fs_info->qgroup_rescan_lock); + + /* +- * only update status, since the previous part has already updated the +- * qgroup info. ++ * Only update status, since the previous part has already updated the ++ * qgroup info, and only if we did any actual work. This also prevents ++ * race with a concurrent quota disable, which has already set ++ * fs_info->quota_root to NULL and cleared BTRFS_FS_QUOTA_ENABLED at ++ * btrfs_quota_disable(). + */ +- trans = btrfs_start_transaction(fs_info->quota_root, 1); +- if (IS_ERR(trans)) { +- err = PTR_ERR(trans); ++ if (did_leaf_rescans) { ++ trans = btrfs_start_transaction(fs_info->quota_root, 1); ++ if (IS_ERR(trans)) { ++ err = PTR_ERR(trans); ++ trans = NULL; ++ btrfs_err(fs_info, ++ "fail to start transaction for status update: %d", ++ err); ++ } ++ } else { + trans = NULL; +- btrfs_err(fs_info, +- "fail to start transaction for status update: %d", +- err); + } + + mutex_lock(&fs_info->qgroup_rescan_lock); +@@ -3360,6 +3417,9 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, btrfs_warn(fs_info, "qgroup rescan init failed, qgroup is not enabled"); ret = -EINVAL; @@ -303294,7 +362239,7 @@ index db680f5be745a..e01065696e9cd 100644 if (ret) { diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c -index d8d268ca8aa76..3157a26ddf7ea 100644 +index d8d268ca8aa76..5b27c289139ac 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -61,7 +61,7 @@ enum btrfs_rbio_ops { @@ -303877,7 +362822,17 @@ index d8d268ca8aa76..3157a26ddf7ea 100644 if (IS_ERR(rbio)) return NULL; -@@ -2695,7 +2734,7 @@ raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio, +@@ -2689,13 +2728,15 @@ raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio, + + rbio->faila = find_logical_bio_stripe(rbio, bio); + if (rbio->faila == -1) { +- BUG(); +- kfree(rbio); ++ btrfs_warn_rl(fs_info, ++ "can not determine the failed stripe number for full stripe %llu", ++ bioc->raid_map[0]); ++ __free_raid_bio(rbio); + return NULL; } /* @@ -303919,6 +362874,23 @@ index 2503485db859b..838d3a5e07ef4 100644 void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio); int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info); +diff --git a/fs/btrfs/rcu-string.h b/fs/btrfs/rcu-string.h +index 5c1a617eb25de..5c2b66d155ef7 100644 +--- a/fs/btrfs/rcu-string.h ++++ b/fs/btrfs/rcu-string.h +@@ -18,7 +18,11 @@ static inline struct rcu_string *rcu_string_strdup(const char *src, gfp_t mask) + (len * sizeof(char)), mask); + if (!ret) + return ret; +- strncpy(ret->str, src, len); ++ /* Warn if the source got unexpectedly truncated. */ ++ if (WARN_ON(strscpy(ret->str, src, len) < 0)) { ++ kfree(ret); ++ return NULL; ++ } + return ret; + } + diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 06713a8fe26b4..eb96fdc3be25f 100644 --- a/fs/btrfs/reada.c @@ -304740,7 +363712,7 @@ index 088641ba7a8e6..ca8d6979c7887 100644 + btrfs_put_bioc(bioc); } diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c -index 72f9b865e8479..4d2c6ce29fe58 100644 +index 72f9b865e8479..692ae2e2f8cc5 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -24,6 +24,7 @@ @@ -304805,7 +363777,53 @@ index 72f9b865e8479..4d2c6ce29fe58 100644 put_page(page); ret = -EIO; break; -@@ -6592,6 +6626,50 @@ static int changed_cb(struct btrfs_path *left_path, +@@ -5364,6 +5398,7 @@ static int clone_range(struct send_ctx *sctx, + u64 ext_len; + u64 clone_len; + u64 clone_data_offset; ++ bool crossed_src_i_size = false; + + if (slot >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(clone_root->root, path); +@@ -5420,8 +5455,10 @@ static int clone_range(struct send_ctx *sctx, + if (key.offset >= clone_src_i_size) + break; + +- if (key.offset + ext_len > clone_src_i_size) ++ if (key.offset + ext_len > clone_src_i_size) { + ext_len = clone_src_i_size - key.offset; ++ crossed_src_i_size = true; ++ } + + clone_data_offset = btrfs_file_extent_offset(leaf, ei); + if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte) { +@@ -5481,6 +5518,25 @@ static int clone_range(struct send_ctx *sctx, + ret = send_clone(sctx, offset, clone_len, + clone_root); + } ++ } else if (crossed_src_i_size && clone_len < len) { ++ /* ++ * If we are at i_size of the clone source inode and we ++ * can not clone from it, terminate the loop. This is ++ * to avoid sending two write operations, one with a ++ * length matching clone_len and the final one after ++ * this loop with a length of len - clone_len. ++ * ++ * When using encoded writes (BTRFS_SEND_FLAG_COMPRESSED ++ * was passed to the send ioctl), this helps avoid ++ * sending an encoded write for an offset that is not ++ * sector size aligned, in case the i_size of the source ++ * inode is not sector size aligned. That will make the ++ * receiver fallback to decompression of the data and ++ * writing it using regular buffered IO, therefore while ++ * not incorrect, it's not optimal due decompression and ++ * possible re-compression at the receiver. ++ */ ++ break; + } else { + ret = send_extent_data(sctx, offset, clone_len); + } +@@ -6592,6 +6648,50 @@ static int changed_cb(struct btrfs_path *left_path, { int ret = 0; @@ -304856,7 +363874,7 @@ index 72f9b865e8479..4d2c6ce29fe58 100644 if (result == BTRFS_COMPARE_TREE_SAME) { if (key->type == BTRFS_INODE_REF_KEY || key->type == BTRFS_INODE_EXTREF_KEY) { -@@ -6638,14 +6716,46 @@ out: +@@ -6638,14 +6738,46 @@ out: return ret; } @@ -304905,7 +363923,7 @@ index 72f9b865e8479..4d2c6ce29fe58 100644 path = alloc_path_for_send(); if (!path) -@@ -6656,6 +6766,10 @@ static int full_send_tree(struct send_ctx *sctx) +@@ -6656,6 +6788,10 @@ static int full_send_tree(struct send_ctx *sctx) key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; @@ -304916,7 +363934,7 @@ index 72f9b865e8479..4d2c6ce29fe58 100644 ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0); if (ret < 0) goto out; -@@ -6663,15 +6777,35 @@ static int full_send_tree(struct send_ctx *sctx) +@@ -6663,15 +6799,35 @@ static int full_send_tree(struct send_ctx *sctx) goto out_finish; while (1) { @@ -304955,7 +363973,7 @@ index 72f9b865e8479..4d2c6ce29fe58 100644 ret = btrfs_next_item(send_root, path); if (ret < 0) goto out; -@@ -6689,6 +6823,20 @@ out: +@@ -6689,6 +6845,20 @@ out: return ret; } @@ -304976,7 +363994,7 @@ index 72f9b865e8479..4d2c6ce29fe58 100644 static int tree_move_down(struct btrfs_path *path, int *level, u64 reada_min_gen) { struct extent_buffer *eb; -@@ -6698,6 +6846,8 @@ static int tree_move_down(struct btrfs_path *path, int *level, u64 reada_min_gen +@@ -6698,6 +6868,8 @@ static int tree_move_down(struct btrfs_path *path, int *level, u64 reada_min_gen u64 reada_max; u64 reada_done = 0; @@ -304985,7 +364003,7 @@ index 72f9b865e8479..4d2c6ce29fe58 100644 BUG_ON(*level == 0); eb = btrfs_read_node_slot(parent, slot); if (IS_ERR(eb)) -@@ -6721,6 +6871,10 @@ static int tree_move_down(struct btrfs_path *path, int *level, u64 reada_min_gen +@@ -6721,6 +6893,10 @@ static int tree_move_down(struct btrfs_path *path, int *level, u64 reada_min_gen path->nodes[*level - 1] = eb; path->slots[*level - 1] = 0; (*level)--; @@ -304996,7 +364014,7 @@ index 72f9b865e8479..4d2c6ce29fe58 100644 return 0; } -@@ -6734,8 +6888,10 @@ static int tree_move_next_or_upnext(struct btrfs_path *path, +@@ -6734,8 +6910,10 @@ static int tree_move_next_or_upnext(struct btrfs_path *path, path->slots[*level]++; while (path->slots[*level] >= nritems) { @@ -305008,7 +364026,7 @@ index 72f9b865e8479..4d2c6ce29fe58 100644 /* move upnext */ path->slots[*level] = 0; -@@ -6767,14 +6923,20 @@ static int tree_advance(struct btrfs_path *path, +@@ -6767,14 +6945,20 @@ static int tree_advance(struct btrfs_path *path, } else { ret = tree_move_down(path, level, reada_min_gen); } @@ -305037,7 +364055,7 @@ index 72f9b865e8479..4d2c6ce29fe58 100644 return ret; } -@@ -6803,6 +6965,97 @@ static int tree_compare_item(struct btrfs_path *left_path, +@@ -6803,6 +6987,97 @@ static int tree_compare_item(struct btrfs_path *left_path, return 0; } @@ -305135,7 +364153,7 @@ index 72f9b865e8479..4d2c6ce29fe58 100644 /* * This function compares two trees and calls the provided callback for * every changed/new/deleted item it finds. -@@ -6831,10 +7084,10 @@ static int btrfs_compare_trees(struct btrfs_root *left_root, +@@ -6831,10 +7106,10 @@ static int btrfs_compare_trees(struct btrfs_root *left_root, int right_root_level; int left_level; int right_level; @@ -305150,7 +364168,7 @@ index 72f9b865e8479..4d2c6ce29fe58 100644 u64 left_blockptr; u64 right_blockptr; u64 left_gen; -@@ -6902,12 +7155,18 @@ static int btrfs_compare_trees(struct btrfs_root *left_root, +@@ -6902,12 +7177,18 @@ static int btrfs_compare_trees(struct btrfs_root *left_root, down_read(&fs_info->commit_root_sem); left_level = btrfs_header_level(left_root->commit_root); left_root_level = left_level; @@ -305171,7 +364189,7 @@ index 72f9b865e8479..4d2c6ce29fe58 100644 } right_level = btrfs_header_level(right_root->commit_root); -@@ -6915,9 +7174,8 @@ static int btrfs_compare_trees(struct btrfs_root *left_root, +@@ -6915,9 +7196,8 @@ static int btrfs_compare_trees(struct btrfs_root *left_root, right_path->nodes[right_level] = btrfs_clone_extent_buffer(right_root->commit_root); if (!right_path->nodes[right_level]) { @@ -305182,7 +364200,7 @@ index 72f9b865e8479..4d2c6ce29fe58 100644 } /* * Our right root is the parent root, while the left root is the "send" -@@ -6927,7 +7185,6 @@ static int btrfs_compare_trees(struct btrfs_root *left_root, +@@ -6927,7 +7207,6 @@ static int btrfs_compare_trees(struct btrfs_root *left_root, * will need to read them at some point. */ reada_min_gen = btrfs_header_generation(right_root->commit_root); @@ -305190,7 +364208,7 @@ index 72f9b865e8479..4d2c6ce29fe58 100644 if (left_level == 0) btrfs_item_key_to_cpu(left_path->nodes[left_level], -@@ -6942,11 +7199,26 @@ static int btrfs_compare_trees(struct btrfs_root *left_root, +@@ -6942,11 +7221,26 @@ static int btrfs_compare_trees(struct btrfs_root *left_root, btrfs_node_key_to_cpu(right_path->nodes[right_level], &right_key, right_path->slots[right_level]); @@ -305220,7 +364238,7 @@ index 72f9b865e8479..4d2c6ce29fe58 100644 if (advance_left && !left_end_reached) { ret = tree_advance(left_path, &left_level, left_root_level, -@@ -6955,7 +7227,7 @@ static int btrfs_compare_trees(struct btrfs_root *left_root, +@@ -6955,7 +7249,7 @@ static int btrfs_compare_trees(struct btrfs_root *left_root, if (ret == -1) left_end_reached = ADVANCE; else if (ret < 0) @@ -305229,7 +364247,7 @@ index 72f9b865e8479..4d2c6ce29fe58 100644 advance_left = 0; } if (advance_right && !right_end_reached) { -@@ -6966,54 +7238,55 @@ static int btrfs_compare_trees(struct btrfs_root *left_root, +@@ -6966,54 +7260,55 @@ static int btrfs_compare_trees(struct btrfs_root *left_root, if (ret == -1) right_end_reached = ADVANCE; else if (ret < 0) @@ -305291,7 +364309,7 @@ index 72f9b865e8479..4d2c6ce29fe58 100644 advance_right = ADVANCE; } else { enum btrfs_compare_tree_result result; -@@ -7027,11 +7300,13 @@ static int btrfs_compare_trees(struct btrfs_root *left_root, +@@ -7027,11 +7322,13 @@ static int btrfs_compare_trees(struct btrfs_root *left_root, result = BTRFS_COMPARE_TREE_SAME; ret = changed_cb(left_path, right_path, &left_key, result, sctx); @@ -305307,7 +364325,7 @@ index 72f9b865e8479..4d2c6ce29fe58 100644 } else if (left_level == right_level) { cmp = btrfs_comp_cpu_keys(&left_key, &right_key); if (cmp < 0) { -@@ -7071,6 +7346,8 @@ static int btrfs_compare_trees(struct btrfs_root *left_root, +@@ -7071,6 +7368,8 @@ static int btrfs_compare_trees(struct btrfs_root *left_root, } } @@ -305316,7 +364334,21 @@ index 72f9b865e8479..4d2c6ce29fe58 100644 out: btrfs_free_path(left_path); btrfs_free_path(right_path); -@@ -7409,21 +7686,7 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) +@@ -7250,10 +7549,10 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) + /* + * Check that we don't overflow at later allocations, we request + * clone_sources_count + 1 items, and compare to unsigned long inside +- * access_ok. ++ * access_ok. Also set an upper limit for allocation size so this can't ++ * easily exhaust memory. Max number of clone sources is about 200K. + */ +- if (arg->clone_sources_count > +- ULONG_MAX / sizeof(struct clone_root) - 1) { ++ if (arg->clone_sources_count > SZ_8M / sizeof(struct clone_root)) { + ret = -EINVAL; + goto out; + } +@@ -7409,21 +7708,7 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) if (ret) goto out; @@ -305360,7 +364392,7 @@ index aa5be0b24987a..5ed66a794e577 100644 * We don't want to reclaim everything, just a portion, so scale * down the to_reclaim by 1/4. If it takes us down to 0, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c -index 537d90bf5d844..61b84391be58c 100644 +index 537d90bf5d844..4ff55457f9021 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -574,6 +574,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, @@ -305540,7 +364572,94 @@ index 537d90bf5d844..61b84391be58c 100644 if (fs_info->fs_devices->rw_devices == 0) { ret = -EACCES; goto restore; -@@ -2463,30 +2508,16 @@ static int btrfs_unfreeze(struct super_block *sb) +@@ -2452,41 +2497,103 @@ static int btrfs_freeze(struct super_block *sb) + return btrfs_commit_transaction(trans); + } + ++static int check_dev_super(struct btrfs_device *dev) ++{ ++ struct btrfs_fs_info *fs_info = dev->fs_info; ++ struct btrfs_super_block *sb; ++ u16 csum_type; ++ int ret = 0; ++ ++ /* This should be called with fs still frozen. */ ++ ASSERT(test_bit(BTRFS_FS_FROZEN, &fs_info->flags)); ++ ++ /* Missing dev, no need to check. */ ++ if (!dev->bdev) ++ return 0; ++ ++ /* Only need to check the primary super block. */ ++ sb = btrfs_read_dev_one_super(dev->bdev, 0, true); ++ if (IS_ERR(sb)) ++ return PTR_ERR(sb); ++ ++ /* Verify the checksum. */ ++ csum_type = btrfs_super_csum_type(sb); ++ if (csum_type != btrfs_super_csum_type(fs_info->super_copy)) { ++ btrfs_err(fs_info, "csum type changed, has %u expect %u", ++ csum_type, btrfs_super_csum_type(fs_info->super_copy)); ++ ret = -EUCLEAN; ++ goto out; ++ } ++ ++ if (btrfs_check_super_csum(fs_info, sb)) { ++ btrfs_err(fs_info, "csum for on-disk super block no longer matches"); ++ ret = -EUCLEAN; ++ goto out; ++ } ++ ++ /* Btrfs_validate_super() includes fsid check against super->fsid. */ ++ ret = btrfs_validate_super(fs_info, sb, 0); ++ if (ret < 0) ++ goto out; ++ ++ if (btrfs_super_generation(sb) != fs_info->last_trans_committed) { ++ btrfs_err(fs_info, "transid mismatch, has %llu expect %llu", ++ btrfs_super_generation(sb), ++ fs_info->last_trans_committed); ++ ret = -EUCLEAN; ++ goto out; ++ } ++out: ++ btrfs_release_disk_super(sb); ++ return ret; ++} ++ + static int btrfs_unfreeze(struct super_block *sb) + { + struct btrfs_fs_info *fs_info = btrfs_sb(sb); ++ struct btrfs_device *device; ++ int ret = 0; + ++ /* ++ * Make sure the fs is not changed by accident (like hibernation then ++ * modified by other OS). ++ * If we found anything wrong, we mark the fs error immediately. ++ * ++ * And since the fs is frozen, no one can modify the fs yet, thus ++ * we don't need to hold device_list_mutex. ++ */ ++ list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) { ++ ret = check_dev_super(device); ++ if (ret < 0) { ++ btrfs_handle_fs_error(fs_info, ret, ++ "super block on devid %llu got modified unexpectedly", ++ device->devid); ++ break; ++ } ++ } + clear_bit(BTRFS_FS_FROZEN, &fs_info->flags); ++ ++ /* ++ * We still return 0, to allow VFS layer to unfreeze the fs even the ++ * above checks failed. Since the fs is either fine or read-only, we're ++ * safe to continue, without causing further damage. ++ */ + return 0; + } + static int btrfs_show_devname(struct seq_file *m, struct dentry *root) { struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb); @@ -305576,6 +364695,174 @@ index 537d90bf5d844..61b84391be58c 100644 return 0; } +diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c +index 25a6f587852be..1c40e51513210 100644 +--- a/fs/btrfs/sysfs.c ++++ b/fs/btrfs/sysfs.c +@@ -2035,8 +2035,11 @@ int __init btrfs_init_sysfs(void) + + #ifdef CONFIG_BTRFS_DEBUG + ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_debug_feature_attr_group); +- if (ret) +- goto out2; ++ if (ret) { ++ sysfs_unmerge_group(&btrfs_kset->kobj, ++ &btrfs_static_feature_attr_group); ++ goto out_remove_group; ++ } + #endif + + return 0; +diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c +index 3a4099a2bf051..3df9904972546 100644 +--- a/fs/btrfs/tests/btrfs-tests.c ++++ b/fs/btrfs/tests/btrfs-tests.c +@@ -199,7 +199,7 @@ void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info) + + void btrfs_free_dummy_root(struct btrfs_root *root) + { +- if (!root) ++ if (IS_ERR_OR_NULL(root)) + return; + /* Will be freed by btrfs_free_fs_roots */ + if (WARN_ON(test_bit(BTRFS_ROOT_IN_RADIX, &root->state))) +diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c +index 19ba7d5b7d8ff..08c1abd6bb0c8 100644 +--- a/fs/btrfs/tests/qgroup-tests.c ++++ b/fs/btrfs/tests/qgroup-tests.c +@@ -225,20 +225,20 @@ static int test_no_shared_qgroup(struct btrfs_root *root, + */ + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); + if (ret) { +- ulist_free(old_roots); + test_err("couldn't find old roots: %d", ret); + return ret; + } + + ret = insert_normal_tree_ref(root, nodesize, nodesize, 0, + BTRFS_FS_TREE_OBJECTID); +- if (ret) ++ if (ret) { ++ ulist_free(old_roots); + return ret; ++ } + + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); + if (ret) { + ulist_free(old_roots); +- ulist_free(new_roots); + test_err("couldn't find old roots: %d", ret); + return ret; + } +@@ -250,29 +250,31 @@ static int test_no_shared_qgroup(struct btrfs_root *root, + return ret; + } + ++ /* btrfs_qgroup_account_extent() always frees the ulists passed to it. */ ++ old_roots = NULL; ++ new_roots = NULL; ++ + if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID, + nodesize, nodesize)) { + test_err("qgroup counts didn't match expected values"); + return -EINVAL; + } +- old_roots = NULL; +- new_roots = NULL; + + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); + if (ret) { +- ulist_free(old_roots); + test_err("couldn't find old roots: %d", ret); + return ret; + } + + ret = remove_extent_item(root, nodesize, nodesize); +- if (ret) ++ if (ret) { ++ ulist_free(old_roots); + return -EINVAL; ++ } + + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); + if (ret) { + ulist_free(old_roots); +- ulist_free(new_roots); + test_err("couldn't find old roots: %d", ret); + return ret; + } +@@ -322,20 +324,20 @@ static int test_multiple_refs(struct btrfs_root *root, + + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); + if (ret) { +- ulist_free(old_roots); + test_err("couldn't find old roots: %d", ret); + return ret; + } + + ret = insert_normal_tree_ref(root, nodesize, nodesize, 0, + BTRFS_FS_TREE_OBJECTID); +- if (ret) ++ if (ret) { ++ ulist_free(old_roots); + return ret; ++ } + + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); + if (ret) { + ulist_free(old_roots); +- ulist_free(new_roots); + test_err("couldn't find old roots: %d", ret); + return ret; + } +@@ -355,20 +357,20 @@ static int test_multiple_refs(struct btrfs_root *root, + + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); + if (ret) { +- ulist_free(old_roots); + test_err("couldn't find old roots: %d", ret); + return ret; + } + + ret = add_tree_ref(root, nodesize, nodesize, 0, + BTRFS_FIRST_FREE_OBJECTID); +- if (ret) ++ if (ret) { ++ ulist_free(old_roots); + return ret; ++ } + + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); + if (ret) { + ulist_free(old_roots); +- ulist_free(new_roots); + test_err("couldn't find old roots: %d", ret); + return ret; + } +@@ -394,20 +396,20 @@ static int test_multiple_refs(struct btrfs_root *root, + + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); + if (ret) { +- ulist_free(old_roots); + test_err("couldn't find old roots: %d", ret); + return ret; + } + + ret = remove_extent_ref(root, nodesize, nodesize, 0, + BTRFS_FIRST_FREE_OBJECTID); +- if (ret) ++ if (ret) { ++ ulist_free(old_roots); + return ret; ++ } + + ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); + if (ret) { + ulist_free(old_roots); +- ulist_free(new_roots); + test_err("couldn't find old roots: %d", ret); + return ret; + } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 14b9fdc8aaa9a..642cd2b55fa08 100644 --- a/fs/btrfs/transaction.c @@ -305863,8 +365150,25 @@ index 7733e8ac0a698..a84d2d4895104 100644 break; case BTRFS_TREE_BLOCK_REF_KEY: case BTRFS_SHARED_DATA_REF_KEY: +diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c +index 7c45d960b53c6..259a3b5f93032 100644 +--- a/fs/btrfs/tree-defrag.c ++++ b/fs/btrfs/tree-defrag.c +@@ -39,8 +39,10 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, + goto out; + + path = btrfs_alloc_path(); +- if (!path) +- return -ENOMEM; ++ if (!path) { ++ ret = -ENOMEM; ++ goto out; ++ } + + level = btrfs_header_level(root->node); + diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c -index b415c5ec03ea0..7272896587302 100644 +index b415c5ec03ea0..7c0c6fc0c536b 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -761,7 +761,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, @@ -306285,7 +365589,15 @@ index b415c5ec03ea0..7272896587302 100644 } else { if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags)) clear_extent_buffer_dirty(next); -@@ -3192,6 +3179,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, +@@ -3154,7 +3141,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, + ret = 0; + if (ret) { + blk_finish_plug(&plug); +- btrfs_abort_transaction(trans, ret); + btrfs_set_log_full_commit(trans); + mutex_unlock(&root->log_mutex); + goto out; +@@ -3192,6 +3178,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, ret = btrfs_alloc_log_tree_node(trans, log_root_tree); if (ret) { mutex_unlock(&fs_info->tree_root->log_mutex); @@ -306293,7 +365605,15 @@ index b415c5ec03ea0..7272896587302 100644 goto out; } } -@@ -3399,6 +3387,29 @@ static void free_log_tree(struct btrfs_trans_handle *trans, +@@ -3285,7 +3272,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, + goto out_wake_log_root; + } else if (ret) { + btrfs_set_log_full_commit(trans); +- btrfs_abort_transaction(trans, ret); + mutex_unlock(&log_root_tree->log_mutex); + goto out_wake_log_root; + } +@@ -3399,6 +3385,29 @@ static void free_log_tree(struct btrfs_trans_handle *trans, if (log->node) { ret = walk_log_tree(trans, log, &wc); if (ret) { @@ -306323,7 +365643,7 @@ index b415c5ec03ea0..7272896587302 100644 if (trans) btrfs_abort_transaction(trans, ret); else -@@ -3410,8 +3421,6 @@ static void free_log_tree(struct btrfs_trans_handle *trans, +@@ -3410,8 +3419,6 @@ static void free_log_tree(struct btrfs_trans_handle *trans, EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT); extent_io_tree_release(&log->log_csum_range); @@ -306332,7 +365652,7 @@ index b415c5ec03ea0..7272896587302 100644 btrfs_put_root(log); } -@@ -4419,7 +4428,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans, +@@ -4419,7 +4426,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans, /* * Log all prealloc extents beyond the inode's i_size to make sure we do not @@ -306341,7 +365661,7 @@ index b415c5ec03ea0..7272896587302 100644 * subvolume's root instead of iterating the inode's extent map tree because * otherwise we can log incorrect extent items based on extent map conversion. * That can happen due to the fact that extent maps are merged when they -@@ -5204,6 +5213,7 @@ static int copy_inode_items_to_log(struct btrfs_trans_handle *trans, +@@ -5204,6 +5211,7 @@ static int copy_inode_items_to_log(struct btrfs_trans_handle *trans, struct btrfs_log_ctx *ctx, bool *need_log_inode_item) { @@ -306349,7 +365669,7 @@ index b415c5ec03ea0..7272896587302 100644 struct btrfs_root *root = inode->root; int ins_start_slot = 0; int ins_nr = 0; -@@ -5224,13 +5234,21 @@ again: +@@ -5224,13 +5232,21 @@ again: if (min_key->type > max_key->type) break; @@ -306377,7 +365697,7 @@ index b415c5ec03ea0..7272896587302 100644 u64 other_ino = 0; u64 other_parent = 0; -@@ -5261,10 +5279,8 @@ again: +@@ -5261,10 +5277,8 @@ again: btrfs_release_path(path); goto next_key; } @@ -306390,7 +365710,7 @@ index b415c5ec03ea0..7272896587302 100644 if (ins_nr == 0) goto next_slot; ret = copy_items(trans, inode, dst_path, path, -@@ -5317,9 +5333,21 @@ next_key: +@@ -5317,9 +5331,21 @@ next_key: break; } } @@ -306413,7 +365733,7 @@ index b415c5ec03ea0..7272896587302 100644 return ret; } -@@ -5418,6 +5446,18 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, +@@ -5418,6 +5444,18 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, mutex_lock(&inode->log_mutex); } @@ -306432,7 +365752,7 @@ index b415c5ec03ea0..7272896587302 100644 /* * This is for cases where logging a directory could result in losing a * a file after replaying the log. For example, if we move a file from a -@@ -5788,7 +5828,7 @@ process_leaf: +@@ -5788,7 +5826,7 @@ process_leaf: } ctx->log_new_dentries = false; @@ -306441,7 +365761,7 @@ index b415c5ec03ea0..7272896587302 100644 log_mode = LOG_INODE_ALL; ret = btrfs_log_inode(trans, root, BTRFS_I(di_inode), log_mode, ctx); -@@ -6549,14 +6589,25 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans, +@@ -6549,14 +6587,25 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans, mutex_unlock(&dir->log_mutex); } @@ -306485,7 +365805,7 @@ index 731bd9c029f55..7ffcac8a89905 100644 #endif diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c -index 2ec3b8ac8fa35..0f22d91e23927 100644 +index 2ec3b8ac8fa35..24f7ba1478ede 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -14,6 +14,7 @@ @@ -306505,7 +365825,15 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 int mirror_num, int need_raid_map); /* -@@ -529,15 +530,48 @@ error: +@@ -408,6 +409,7 @@ void btrfs_free_device(struct btrfs_device *device) + static void free_fs_devices(struct btrfs_fs_devices *fs_devices) + { + struct btrfs_device *device; ++ + WARN_ON(fs_devices->opened); + while (!list_empty(&fs_devices->devices)) { + device = list_entry(fs_devices->devices.next, +@@ -529,15 +531,48 @@ error: return ret; } @@ -306558,7 +365886,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 } /* -@@ -570,9 +604,7 @@ static int btrfs_free_stale_devices(const char *path, +@@ -570,9 +605,7 @@ static int btrfs_free_stale_devices(const char *path, &fs_devices->devices, dev_list) { if (skip_device && skip_device == device) continue; @@ -306569,7 +365897,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 continue; if (fs_devices->opened) { /* for an already deleted device return 0 */ -@@ -812,9 +844,13 @@ static noinline struct btrfs_device *device_list_add(const char *path, +@@ -812,9 +845,13 @@ static noinline struct btrfs_device *device_list_add(const char *path, device = NULL; } else { @@ -306585,7 +365913,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 /* * If this disk has been pulled into an fs devices created by -@@ -919,6 +955,11 @@ static noinline struct btrfs_device *device_list_add(const char *path, +@@ -919,6 +956,11 @@ static noinline struct btrfs_device *device_list_add(const char *path, /* * We are going to replace the device path for a given devid, * make sure it's the same device if the device is mounted @@ -306597,7 +365925,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 */ if (device->bdev) { int error; -@@ -932,12 +973,6 @@ static noinline struct btrfs_device *device_list_add(const char *path, +@@ -932,12 +974,6 @@ static noinline struct btrfs_device *device_list_add(const char *path, if (device->bdev->bd_dev != path_dev) { mutex_unlock(&fs_devices->device_list_mutex); @@ -306610,7 +365938,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 btrfs_warn_in_rcu(NULL, "duplicate device %s devid %llu generation %llu scanned by %s (%d)", path, devid, found_transid, -@@ -945,7 +980,7 @@ static noinline struct btrfs_device *device_list_add(const char *path, +@@ -945,7 +981,7 @@ static noinline struct btrfs_device *device_list_add(const char *path, task_pid_nr(current)); return ERR_PTR(-EEXIST); } @@ -306619,7 +365947,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 "devid %llu device path %s changed to %s scanned by %s (%d)", devid, rcu_str_deref(device->name), path, current->comm, -@@ -1091,7 +1126,7 @@ void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices) +@@ -1091,7 +1127,7 @@ void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices) list_for_each_entry(seed_dev, &fs_devices->seed_list, seed_list) __btrfs_free_extra_devids(seed_dev, &latest_dev); @@ -306628,7 +365956,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 mutex_unlock(&uuid_mutex); } -@@ -1122,8 +1157,10 @@ static void btrfs_close_one_device(struct btrfs_device *device) +@@ -1122,8 +1158,10 @@ static void btrfs_close_one_device(struct btrfs_device *device) if (device->devid == BTRFS_DEV_REPLACE_DEVID) clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state); @@ -306640,7 +365968,31 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 btrfs_close_bdev(device); if (device->bdev) { -@@ -1222,7 +1259,7 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices, +@@ -1184,9 +1222,22 @@ void btrfs_close_devices(struct btrfs_fs_devices *fs_devices) + + mutex_lock(&uuid_mutex); + close_fs_devices(fs_devices); +- if (!fs_devices->opened) ++ if (!fs_devices->opened) { + list_splice_init(&fs_devices->seed_list, &list); + ++ /* ++ * If the struct btrfs_fs_devices is not assembled with any ++ * other device, it can be re-initialized during the next mount ++ * without the needing device-scan step. Therefore, it can be ++ * fully freed. ++ */ ++ if (fs_devices->num_devices == 1) { ++ list_del(&fs_devices->fs_list); ++ free_fs_devices(fs_devices); ++ } ++ } ++ ++ + list_for_each_entry_safe(fs_devices, tmp, &list, seed_list) { + close_fs_devices(fs_devices); + list_del(&fs_devices->seed_list); +@@ -1222,7 +1273,7 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices, return -EINVAL; fs_devices->opened = 1; @@ -306649,7 +366001,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 fs_devices->total_rw_bytes = 0; fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_REGULAR; fs_devices->read_policy = BTRFS_READ_POLICY_PID; -@@ -1363,8 +1400,10 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags, +@@ -1363,8 +1414,10 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags, bytenr_orig = btrfs_sb_offset(0); ret = btrfs_sb_log_location_bdev(bdev, 0, READ, &bytenr); @@ -306662,7 +366014,34 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 disk_super = btrfs_read_disk_super(bdev, bytenr, bytenr_orig); if (IS_ERR(disk_super)) { -@@ -1843,8 +1882,10 @@ static int btrfs_add_dev_item(struct btrfs_trans_handle *trans, +@@ -1607,7 +1660,7 @@ again: + if (ret < 0) + goto out; + +- while (1) { ++ while (search_start < search_end) { + l = path->nodes[0]; + slot = path->slots[0]; + if (slot >= btrfs_header_nritems(l)) { +@@ -1630,6 +1683,9 @@ again: + if (key.type != BTRFS_DEV_EXTENT_KEY) + goto next; + ++ if (key.offset > search_end) ++ break; ++ + if (key.offset > search_start) { + hole_size = key.offset - search_start; + dev_extent_hole_check(device, &search_start, &hole_size, +@@ -1690,6 +1746,7 @@ next: + else + ret = 0; + ++ ASSERT(max_hole_start + max_hole_size <= search_end); + out: + btrfs_free_path(path); + *start = max_hole_start; +@@ -1843,8 +1900,10 @@ static int btrfs_add_dev_item(struct btrfs_trans_handle *trans, key.type = BTRFS_DEV_ITEM_KEY; key.offset = device->devid; @@ -306673,7 +366052,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 if (ret) goto out; -@@ -1882,60 +1923,52 @@ out: +@@ -1882,60 +1941,52 @@ out: /* * Function to update ctime/mtime for a given device path. * Mainly used for ctime/mtime based probe like libblkid. @@ -306748,7 +366127,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 return ret; } -@@ -1986,7 +2019,7 @@ static struct btrfs_device * btrfs_find_next_active_device( +@@ -1986,7 +2037,7 @@ static struct btrfs_device * btrfs_find_next_active_device( } /* @@ -306757,7 +366136,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 * and replace it with the provided or the next active device, in the context * where this function called, there should be always be another device (or * this_dev) which is active. -@@ -2005,8 +2038,8 @@ void __cold btrfs_assign_next_active_device(struct btrfs_device *device, +@@ -2005,8 +2056,8 @@ void __cold btrfs_assign_next_active_device(struct btrfs_device *device, (fs_info->sb->s_bdev == device->bdev)) fs_info->sb->s_bdev = next_device->bdev; @@ -306768,7 +366147,16 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 } /* -@@ -2069,55 +2102,53 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, +@@ -2041,7 +2092,7 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, + struct page *page; + int ret; + +- disk_super = btrfs_read_dev_one_super(bdev, copy_num); ++ disk_super = btrfs_read_dev_one_super(bdev, copy_num, false); + if (IS_ERR(disk_super)) + continue; + +@@ -2069,55 +2120,53 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, btrfs_kobject_uevent(bdev, KOBJ_CHANGE); /* Update ctime/mtime for device path for libblkid */ @@ -306845,7 +366233,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { mutex_lock(&fs_info->chunk_mutex); -@@ -2126,22 +2157,28 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, +@@ -2126,22 +2175,28 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, mutex_unlock(&fs_info->chunk_mutex); } @@ -306883,7 +366271,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); btrfs_scrub_cancel_dev(device); -@@ -2216,8 +2253,8 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, +@@ -2216,8 +2271,8 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, free_fs_devices(cur_devices); } @@ -306894,7 +366282,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 return ret; error_undo: -@@ -2229,7 +2266,7 @@ error_undo: +@@ -2229,7 +2284,7 @@ error_undo: device->fs_devices->rw_devices++; mutex_unlock(&fs_info->chunk_mutex); } @@ -306903,7 +366291,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 } void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev) -@@ -2305,13 +2342,6 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev) +@@ -2305,13 +2360,6 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev) mutex_unlock(&fs_devices->device_list_mutex); @@ -306917,7 +366305,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 btrfs_scratch_superblocks(tgtdev->fs_info, tgtdev->bdev, tgtdev->name->str); -@@ -2320,69 +2350,101 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev) +@@ -2320,69 +2368,101 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev) btrfs_free_device(tgtdev); } @@ -307056,7 +366444,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 } /* -@@ -2459,6 +2521,7 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info) +@@ -2459,6 +2539,7 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info) */ static int btrfs_finish_sprout(struct btrfs_trans_handle *trans) { @@ -307064,7 +366452,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *root = fs_info->chunk_root; struct btrfs_path *path; -@@ -2468,7 +2531,6 @@ static int btrfs_finish_sprout(struct btrfs_trans_handle *trans) +@@ -2468,7 +2549,6 @@ static int btrfs_finish_sprout(struct btrfs_trans_handle *trans) struct btrfs_key key; u8 fs_uuid[BTRFS_FSID_SIZE]; u8 dev_uuid[BTRFS_UUID_SIZE]; @@ -307072,7 +366460,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 int ret; path = btrfs_alloc_path(); -@@ -2480,7 +2542,9 @@ static int btrfs_finish_sprout(struct btrfs_trans_handle *trans) +@@ -2480,7 +2560,9 @@ static int btrfs_finish_sprout(struct btrfs_trans_handle *trans) key.type = BTRFS_DEV_ITEM_KEY; while (1) { @@ -307082,7 +366470,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 if (ret < 0) goto error; -@@ -2505,13 +2569,14 @@ next_slot: +@@ -2505,13 +2587,14 @@ next_slot: dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item); @@ -307100,7 +366488,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 BUG_ON(!device); /* Logic error */ if (device->fs_devices->seeding) { -@@ -2594,7 +2659,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path +@@ -2594,7 +2677,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path device->fs_info = fs_info; device->bdev = bdev; @@ -307109,7 +366497,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 if (ret) goto error_free_device; -@@ -2627,6 +2692,8 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path +@@ -2627,6 +2710,8 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path btrfs_abort_transaction(trans, ret); goto error_trans; } @@ -307118,7 +366506,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 } device->fs_devices = fs_devices; -@@ -2733,7 +2800,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path +@@ -2733,7 +2818,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path btrfs_forget_devices(device_path); /* Update ctime/mtime for blkid or udev */ @@ -307127,7 +366515,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 return ret; -@@ -2826,6 +2893,7 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans, +@@ -2826,6 +2911,7 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans, struct btrfs_super_block *super_copy = fs_info->super_copy; u64 old_total; u64 diff; @@ -307135,7 +366523,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) return -EACCES; -@@ -2854,7 +2922,11 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans, +@@ -2854,7 +2940,11 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans, &trans->transaction->dev_update_list); mutex_unlock(&fs_info->chunk_mutex); @@ -307148,7 +366536,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 } static int btrfs_free_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) -@@ -3096,7 +3168,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) +@@ -3096,7 +3186,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) const u64 sys_flags = btrfs_system_alloc_profile(fs_info); struct btrfs_block_group *sys_bg; @@ -307157,7 +366545,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 if (IS_ERR(sys_bg)) { ret = PTR_ERR(sys_bg); btrfs_abort_transaction(trans, ret); -@@ -4354,10 +4426,12 @@ static int balance_kthread(void *data) +@@ -4354,10 +4444,12 @@ static int balance_kthread(void *data) struct btrfs_fs_info *fs_info = data; int ret = 0; @@ -307170,7 +366558,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 return ret; } -@@ -4889,8 +4963,10 @@ again: +@@ -4889,8 +4981,10 @@ again: round_down(old_total - diff, fs_info->sectorsize)); mutex_unlock(&fs_info->chunk_mutex); @@ -307181,7 +366569,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 if (ret < 0) { btrfs_abort_transaction(trans, ret); btrfs_end_transaction(trans); -@@ -4973,7 +5049,7 @@ static void check_raid1c34_incompat_flag(struct btrfs_fs_info *info, u64 type) +@@ -4973,7 +5067,7 @@ static void check_raid1c34_incompat_flag(struct btrfs_fs_info *info, u64 type) } /* @@ -307190,7 +366578,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 * Wraps needed parameters. */ struct alloc_chunk_ctl { -@@ -5377,7 +5453,7 @@ error_del_extent: +@@ -5377,7 +5471,7 @@ error_del_extent: return block_group; } @@ -307199,7 +366587,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 u64 type) { struct btrfs_fs_info *info = trans->fs_info; -@@ -5578,12 +5654,12 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans) +@@ -5578,12 +5672,12 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans) */ alloc_profile = btrfs_metadata_alloc_profile(fs_info); @@ -307214,7 +366602,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 if (IS_ERR(sys_bg)) return PTR_ERR(sys_bg); -@@ -5795,7 +5871,7 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info, +@@ -5795,7 +5889,7 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info, } /* Bubble-sort the stripe set to put the parity/syndrome stripes last */ @@ -307223,7 +366611,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 { int i; int again = 1; -@@ -5804,52 +5880,53 @@ static void sort_parity_stripes(struct btrfs_bio *bbio, int num_stripes) +@@ -5804,52 +5898,53 @@ static void sort_parity_stripes(struct btrfs_bio *bbio, int num_stripes) again = 0; for (i = 0; i < num_stripes - 1; i++) { /* Swap if parity is on a smaller index */ @@ -307301,7 +366689,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 } /* can REQ_OP_DISCARD be sent with other REQ like REQ_OP_WRITE? */ -@@ -5859,11 +5936,11 @@ void btrfs_put_bbio(struct btrfs_bio *bbio) +@@ -5859,11 +5954,11 @@ void btrfs_put_bbio(struct btrfs_bio *bbio) */ static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info, u64 logical, u64 *length_ret, @@ -307315,7 +366703,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 u64 length = *length_ret; u64 offset; u64 stripe_nr; -@@ -5882,8 +5959,8 @@ static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info, +@@ -5882,8 +5977,8 @@ static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info, int ret = 0; int i; @@ -307326,7 +366714,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 em = btrfs_get_chunk_map(fs_info, logical, length); if (IS_ERR(em)) -@@ -5946,26 +6023,25 @@ static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info, +@@ -5946,26 +6041,25 @@ static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info, &stripe_index); } @@ -307359,7 +366747,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 /* * Special for the first stripe and -@@ -5976,19 +6052,17 @@ static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info, +@@ -5976,19 +6070,17 @@ static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info, * off end_off */ if (i < sub_stripes) @@ -307382,7 +366770,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 } stripe_index++; -@@ -5998,9 +6072,9 @@ static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info, +@@ -5998,9 +6090,9 @@ static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info, } } @@ -307395,7 +366783,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 out: free_extent_map(em); return ret; -@@ -6024,7 +6098,7 @@ static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info, +@@ -6024,7 +6116,7 @@ static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info, u64 srcdev_devid, int *mirror_num, u64 *physical) { @@ -307404,7 +366792,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 int num_stripes; int index_srcdev = 0; int found = 0; -@@ -6033,20 +6107,20 @@ static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info, +@@ -6033,20 +6125,20 @@ static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info, int ret = 0; ret = __btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, @@ -307429,7 +366817,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 return -EIO; } -@@ -6056,7 +6130,7 @@ static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info, +@@ -6056,7 +6148,7 @@ static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info, * pointer to the one of the target drive. */ for (i = 0; i < num_stripes; i++) { @@ -307438,7 +366826,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 continue; /* -@@ -6064,15 +6138,15 @@ static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info, +@@ -6064,15 +6156,15 @@ static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info, * mirror with the lowest physical address */ if (found && @@ -307457,7 +366845,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 ASSERT(found); if (!found) -@@ -6103,12 +6177,12 @@ static bool is_block_group_to_copy(struct btrfs_fs_info *fs_info, u64 logical) +@@ -6103,12 +6195,12 @@ static bool is_block_group_to_copy(struct btrfs_fs_info *fs_info, u64 logical) } static void handle_ops_on_dev_replace(enum btrfs_map_op op, @@ -307472,7 +366860,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 u64 srcdev_devid = dev_replace->srcdev->devid; int tgtdev_indexes = 0; int num_stripes = *num_stripes_ret; -@@ -6138,17 +6212,17 @@ static void handle_ops_on_dev_replace(enum btrfs_map_op op, +@@ -6138,17 +6230,17 @@ static void handle_ops_on_dev_replace(enum btrfs_map_op op, */ index_where_to_add = num_stripes; for (i = 0; i < num_stripes; i++) { @@ -307496,7 +366884,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 index_where_to_add++; max_errors++; tgtdev_indexes++; -@@ -6168,30 +6242,29 @@ static void handle_ops_on_dev_replace(enum btrfs_map_op op, +@@ -6168,30 +6260,29 @@ static void handle_ops_on_dev_replace(enum btrfs_map_op op, * full copy of the source drive. */ for (i = 0; i < num_stripes; i++) { @@ -307534,7 +366922,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 tgtdev_indexes++; num_stripes++; -@@ -6200,8 +6273,8 @@ static void handle_ops_on_dev_replace(enum btrfs_map_op op, +@@ -6200,8 +6291,8 @@ static void handle_ops_on_dev_replace(enum btrfs_map_op op, *num_stripes_ret = num_stripes; *max_errors_ret = max_errors; @@ -307545,7 +366933,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 } static bool need_full_stripe(enum btrfs_map_op op) -@@ -6304,7 +6377,7 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, struct extent_map *em, +@@ -6304,7 +6395,7 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, struct extent_map *em, static int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, u64 logical, u64 *length, @@ -307554,7 +366942,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 int mirror_num, int need_raid_map) { struct extent_map *em; -@@ -6319,7 +6392,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, +@@ -6319,7 +6410,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int num_stripes; int max_errors = 0; int tgtdev_indexes = 0; @@ -307563,7 +366951,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; int dev_replace_is_ongoing = 0; int num_alloc_stripes; -@@ -6328,7 +6401,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, +@@ -6328,7 +6419,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, u64 raid56_full_stripe_start = (u64)-1; struct btrfs_io_geometry geom; @@ -307572,7 +366960,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 ASSERT(op != BTRFS_MAP_DISCARD); em = btrfs_get_chunk_map(fs_info, logical, *length); -@@ -6472,20 +6545,20 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, +@@ -6472,20 +6563,20 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, tgtdev_indexes = num_stripes; } @@ -307598,7 +366986,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map && (need_full_stripe(op) || mirror_num > 1)) { u64 tmp; -@@ -6497,15 +6570,15 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, +@@ -6497,15 +6588,15 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, /* Fill in the logical address of each stripe */ tmp = stripe_nr * data_stripes; for (i = 0; i < data_stripes; i++) @@ -307618,7 +367006,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 } if (need_full_stripe(op)) -@@ -6513,15 +6586,15 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, +@@ -6513,15 +6604,15 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL && need_full_stripe(op)) { @@ -307640,7 +367028,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 /* * this is the case that REQ_READ && dev_replace_is_ongoing && -@@ -6530,9 +6603,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, +@@ -6530,9 +6621,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, */ if (patch_the_first_stripe_for_dev_replace && num_stripes > 0) { WARN_ON(num_stripes > 1); @@ -307653,7 +367041,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 } out: if (dev_replace_is_ongoing) { -@@ -6546,40 +6619,40 @@ out: +@@ -6546,40 +6637,40 @@ out: int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, u64 logical, u64 *length, @@ -307705,7 +367093,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 if (bio->bi_status == BLK_STS_IOERR || bio->bi_status == BLK_STS_TARGET) { struct btrfs_device *dev = btrfs_io_bio(bio)->device; -@@ -6597,22 +6670,22 @@ static void btrfs_end_bio(struct bio *bio) +@@ -6597,22 +6688,22 @@ static void btrfs_end_bio(struct bio *bio) } } @@ -307734,7 +367122,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 bio->bi_status = BLK_STS_IOERR; } else { /* -@@ -6622,18 +6695,18 @@ static void btrfs_end_bio(struct bio *bio) +@@ -6622,18 +6713,18 @@ static void btrfs_end_bio(struct bio *bio) bio->bi_status = BLK_STS_OK; } @@ -307757,7 +367145,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 btrfs_io_bio(bio)->device = dev; bio->bi_end_io = btrfs_end_bio; bio->bi_iter.bi_sector = physical >> 9; -@@ -6663,20 +6736,20 @@ static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio, +@@ -6663,20 +6754,20 @@ static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio, btrfsic_submit_bio(bio); } @@ -307785,7 +367173,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 } } -@@ -6691,35 +6764,35 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, +@@ -6691,35 +6782,35 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int ret; int dev_nr; int total_devs; @@ -307832,7 +367220,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 map_length, mirror_num, 1); } -@@ -6735,12 +6808,12 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, +@@ -6735,12 +6826,12 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, } for (dev_nr = 0; dev_nr < total_devs; dev_nr++) { @@ -307847,7 +367235,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 continue; } -@@ -6749,12 +6822,39 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, +@@ -6749,12 +6840,39 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, else bio = first_bio; @@ -307871,24 +367259,24 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 +static bool dev_args_match_device(const struct btrfs_dev_lookup_args *args, + const struct btrfs_device *device) +{ -+ ASSERT((args->devid != (u64)-1) || args->missing); ++ if (args->missing) { ++ if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state) && ++ !device->bdev) ++ return true; ++ return false; ++ } + -+ if ((args->devid != (u64)-1) && device->devid != args->devid) ++ if (device->devid != args->devid) + return false; + if (args->uuid && memcmp(device->uuid, args->uuid, BTRFS_UUID_SIZE) != 0) + return false; -+ if (!args->missing) -+ return true; -+ if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state) && -+ !device->bdev) -+ return true; -+ return false; ++ return true; +} + /* * Find a device specified by @devid or @uuid in the list of @fs_devices, or * return NULL. -@@ -6762,31 +6862,25 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, +@@ -6762,31 +6880,25 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, * If devid and uuid are both specified, the match must be exact, otherwise * only devid is used. */ @@ -307929,7 +367317,31 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 } } -@@ -6952,6 +7046,7 @@ static void warn_32bit_meta_chunk(struct btrfs_fs_info *fs_info, +@@ -6949,9 +7061,31 @@ static void warn_32bit_meta_chunk(struct btrfs_fs_info *fs_info, + } + #endif + ++static struct btrfs_device *handle_missing_device(struct btrfs_fs_info *fs_info, ++ u64 devid, u8 *uuid) ++{ ++ struct btrfs_device *dev; ++ ++ if (!btrfs_test_opt(fs_info, DEGRADED)) { ++ btrfs_report_missing_device(fs_info, devid, uuid, true); ++ return ERR_PTR(-ENOENT); ++ } ++ ++ dev = add_missing_dev(fs_info->fs_devices, devid, uuid); ++ if (IS_ERR(dev)) { ++ btrfs_err(fs_info, "failed to init missing device %llu: %ld", ++ devid, PTR_ERR(dev)); ++ return dev; ++ } ++ btrfs_report_missing_device(fs_info, devid, uuid, false); ++ ++ return dev; ++} ++ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf, struct btrfs_chunk *chunk) { @@ -307937,7 +367349,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 struct btrfs_fs_info *fs_info = leaf->fs_info; struct extent_map_tree *map_tree = &fs_info->mapping_tree; struct map_lookup *map; -@@ -7029,11 +7124,12 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf, +@@ -7029,33 +7163,24 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf, map->stripes[i].physical = btrfs_stripe_offset_nr(leaf, chunk, i); devid = btrfs_stripe_devid_nr(leaf, chunk, i); @@ -307947,12 +367359,39 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 BTRFS_UUID_SIZE); - map->stripes[i].dev = btrfs_find_device(fs_info->fs_devices, - devid, uuid, NULL); +- if (!map->stripes[i].dev && +- !btrfs_test_opt(fs_info, DEGRADED)) { +- free_extent_map(em); +- btrfs_report_missing_device(fs_info, devid, uuid, true); +- return -ENOENT; +- } + args.uuid = uuid; + map->stripes[i].dev = btrfs_find_device(fs_info->fs_devices, &args); - if (!map->stripes[i].dev && - !btrfs_test_opt(fs_info, DEGRADED)) { - free_extent_map(em); -@@ -7151,6 +7247,7 @@ static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info, + if (!map->stripes[i].dev) { +- map->stripes[i].dev = +- add_missing_dev(fs_info->fs_devices, devid, +- uuid); ++ map->stripes[i].dev = handle_missing_device(fs_info, ++ devid, uuid); + if (IS_ERR(map->stripes[i].dev)) { ++ ret = PTR_ERR(map->stripes[i].dev); + free_extent_map(em); +- btrfs_err(fs_info, +- "failed to init missing dev %llu: %ld", +- devid, PTR_ERR(map->stripes[i].dev)); +- return PTR_ERR(map->stripes[i].dev); ++ return ret; + } +- btrfs_report_missing_device(fs_info, devid, uuid, false); + } ++ + set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, + &(map->stripes[i].dev->dev_state)); +- + } + + write_lock(&map_tree->lock); +@@ -7151,6 +7276,7 @@ static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info, static int read_one_dev(struct extent_buffer *leaf, struct btrfs_dev_item *dev_item) { @@ -307960,7 +367399,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 struct btrfs_fs_info *fs_info = leaf->fs_info; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; struct btrfs_device *device; -@@ -7159,11 +7256,13 @@ static int read_one_dev(struct extent_buffer *leaf, +@@ -7159,11 +7285,13 @@ static int read_one_dev(struct extent_buffer *leaf, u8 fs_uuid[BTRFS_FSID_SIZE]; u8 dev_uuid[BTRFS_UUID_SIZE]; @@ -307975,7 +367414,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 if (memcmp(fs_uuid, fs_devices->metadata_uuid, BTRFS_FSID_SIZE)) { fs_devices = open_seed_devices(fs_info, fs_uuid); -@@ -7171,8 +7270,7 @@ static int read_one_dev(struct extent_buffer *leaf, +@@ -7171,8 +7299,7 @@ static int read_one_dev(struct extent_buffer *leaf, return PTR_ERR(fs_devices); } @@ -307985,7 +367424,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 if (!device) { if (!btrfs_test_opt(fs_info, DEGRADED)) { btrfs_report_missing_device(fs_info, devid, -@@ -7481,6 +7579,19 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info) +@@ -7481,6 +7608,19 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info) */ fs_info->fs_devices->total_rw_bytes = 0; @@ -308005,7 +367444,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 /* * Read all device items, and then all the chunk items. All * device items are found before any chunk item (their object id -@@ -7506,10 +7617,6 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info) +@@ -7506,10 +7646,6 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info) goto error; break; } @@ -308016,7 +367455,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 node = path->nodes[1]; if (node) { if (last_ra_node != node->start) { -@@ -7537,7 +7644,6 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info) +@@ -7537,7 +7673,6 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info) * requirement for chunk allocation, see the comment on * top of btrfs_chunk_alloc() for details. */ @@ -308024,7 +367463,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); ret = read_one_chunk(&found_key, leaf, chunk); if (ret) -@@ -7551,12 +7657,12 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info) +@@ -7551,12 +7686,12 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info) * do another round of validation checks. */ if (total_dev != fs_info->fs_devices->total_devices) { @@ -308041,7 +367480,40 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 } if (btrfs_super_total_bytes(fs_info->super_copy) < fs_info->fs_devices->total_rw_bytes) { -@@ -7841,12 +7947,14 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev) +@@ -7575,10 +7710,11 @@ error: + return ret; + } + +-void btrfs_init_devices_late(struct btrfs_fs_info *fs_info) ++int btrfs_init_devices_late(struct btrfs_fs_info *fs_info) + { + struct btrfs_fs_devices *fs_devices = fs_info->fs_devices, *seed_devs; + struct btrfs_device *device; ++ int ret = 0; + + fs_devices->fs_info = fs_info; + +@@ -7587,12 +7723,18 @@ void btrfs_init_devices_late(struct btrfs_fs_info *fs_info) + device->fs_info = fs_info; + + list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list) { +- list_for_each_entry(device, &seed_devs->devices, dev_list) ++ list_for_each_entry(device, &seed_devs->devices, dev_list) { + device->fs_info = fs_info; ++ ret = btrfs_get_dev_zone_info(device, false); ++ if (ret) ++ break; ++ } + + seed_devs->fs_info = fs_info; + } + mutex_unlock(&fs_devices->device_list_mutex); ++ ++ return ret; + } + + static u64 btrfs_dev_stats_value(const struct extent_buffer *eb, +@@ -7841,12 +7983,14 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev) int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info, struct btrfs_ioctl_get_dev_stats *stats) { @@ -308057,7 +367529,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 mutex_unlock(&fs_devices->device_list_mutex); if (!dev) { -@@ -7922,6 +8030,7 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, +@@ -7922,6 +8066,7 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, u64 chunk_offset, u64 devid, u64 physical_offset, u64 physical_len) { @@ -308065,7 +367537,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 struct extent_map_tree *em_tree = &fs_info->mapping_tree; struct extent_map *em; struct map_lookup *map; -@@ -7977,7 +8086,7 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, +@@ -7977,7 +8122,7 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, } /* Make sure no dev extent is beyond device boundary */ @@ -308074,7 +367546,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 if (!dev) { btrfs_err(fs_info, "failed to find devid %llu", devid); ret = -EUCLEAN; -@@ -8173,10 +8282,12 @@ static int relocating_repair_kthread(void *data) +@@ -8173,10 +8318,12 @@ static int relocating_repair_kthread(void *data) target = cache->start; btrfs_put_block_group(cache); @@ -308087,7 +367559,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 return -EBUSY; } -@@ -8204,6 +8315,7 @@ out: +@@ -8204,6 +8351,7 @@ out: btrfs_put_block_group(cache); mutex_unlock(&fs_info->reclaim_bgs_lock); btrfs_exclop_finish(fs_info); @@ -308096,7 +367568,7 @@ index 2ec3b8ac8fa35..0f22d91e23927 100644 return ret; } diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h -index 2183361db614d..dfd7457709b32 100644 +index 2183361db614d..b49fa784e5ba3 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -246,7 +246,11 @@ struct btrfs_fs_devices { @@ -308257,6 +367729,15 @@ index 2183361db614d..dfd7457709b32 100644 int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *path); int btrfs_balance(struct btrfs_fs_info *fs_info, +@@ -499,7 +539,7 @@ int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, + void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); + int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info, + struct btrfs_ioctl_get_dev_stats *stats); +-void btrfs_init_devices_late(struct btrfs_fs_info *fs_info); ++int btrfs_init_devices_late(struct btrfs_fs_info *fs_info); + int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info); + int btrfs_run_dev_stats(struct btrfs_trans_handle *trans); + void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 8a4514283a4b8..43fe2c2a955e2 100644 --- a/fs/btrfs/xattr.c @@ -308291,8 +367772,21 @@ index 8a4514283a4b8..43fe2c2a955e2 100644 } btrfs_end_transaction(trans); +diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c +index 767a0c6c9694b..12e674f10baf6 100644 +--- a/fs/btrfs/zlib.c ++++ b/fs/btrfs/zlib.c +@@ -63,7 +63,7 @@ struct list_head *zlib_alloc_workspace(unsigned int level) + + workspacesize = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL), + zlib_inflate_workspacesize()); +- workspace->strm.workspace = kvmalloc(workspacesize, GFP_KERNEL); ++ workspace->strm.workspace = kvzalloc(workspacesize, GFP_KERNEL); + workspace->level = level; + workspace->buf = NULL; + /* diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c -index 47af1ab3bf120..96958ca474bd4 100644 +index 47af1ab3bf120..dbb75c2995e92 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -4,6 +4,7 @@ @@ -308303,7 +367797,17 @@ index 47af1ab3bf120..96958ca474bd4 100644 #include "ctree.h" #include "volumes.h" #include "zoned.h" -@@ -195,6 +196,8 @@ static int emulate_report_zones(struct btrfs_device *device, u64 pos, +@@ -113,7 +114,8 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones, + super[i] = page_address(page[i]); + } + +- if (super[0]->generation > super[1]->generation) ++ if (btrfs_super_generation(super[0]) > ++ btrfs_super_generation(super[1])) + sector = zones[1].start; + else + sector = zones[0].start; +@@ -195,6 +197,8 @@ static int emulate_report_zones(struct btrfs_device *device, u64 pos, static int btrfs_get_dev_zones(struct btrfs_device *device, u64 pos, struct blk_zone *zones, unsigned int *nr_zones) { @@ -308312,7 +367816,7 @@ index 47af1ab3bf120..96958ca474bd4 100644 int ret; if (!*nr_zones) -@@ -206,6 +209,34 @@ static int btrfs_get_dev_zones(struct btrfs_device *device, u64 pos, +@@ -206,6 +210,34 @@ static int btrfs_get_dev_zones(struct btrfs_device *device, u64 pos, return 0; } @@ -308347,7 +367851,7 @@ index 47af1ab3bf120..96958ca474bd4 100644 ret = blkdev_report_zones(device->bdev, pos >> SECTOR_SHIFT, *nr_zones, copy_zone_info_cb, zones); if (ret < 0) { -@@ -219,6 +250,11 @@ static int btrfs_get_dev_zones(struct btrfs_device *device, u64 pos, +@@ -219,6 +251,11 @@ static int btrfs_get_dev_zones(struct btrfs_device *device, u64 pos, if (!ret) return -EIO; @@ -308359,7 +367863,7 @@ index 47af1ab3bf120..96958ca474bd4 100644 return 0; } -@@ -282,7 +318,7 @@ int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info) +@@ -282,7 +319,7 @@ int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info) if (!device->bdev) continue; @@ -308368,7 +367872,7 @@ index 47af1ab3bf120..96958ca474bd4 100644 if (ret) break; } -@@ -291,7 +327,7 @@ int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info) +@@ -291,7 +328,7 @@ int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info) return ret; } @@ -308377,7 +367881,7 @@ index 47af1ab3bf120..96958ca474bd4 100644 { struct btrfs_fs_info *fs_info = device->fs_info; struct btrfs_zoned_device_info *zone_info = NULL; -@@ -318,6 +354,8 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device) +@@ -318,6 +355,8 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device) if (!zone_info) return -ENOMEM; @@ -308386,7 +367890,7 @@ index 47af1ab3bf120..96958ca474bd4 100644 if (!bdev_is_zoned(bdev)) { if (!fs_info->zone_size) { ret = calculate_emulated_zone_size(fs_info); -@@ -348,6 +386,25 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device) +@@ -348,6 +387,25 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device) nr_sectors = bdev_nr_sectors(bdev); zone_info->zone_size_shift = ilog2(zone_info->zone_size); zone_info->nr_zones = nr_sectors >> ilog2(zone_sectors); @@ -308412,7 +367916,14 @@ index 47af1ab3bf120..96958ca474bd4 100644 if (!IS_ALIGNED(nr_sectors, zone_sectors)) zone_info->nr_zones++; -@@ -369,6 +426,23 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device) +@@ -363,12 +421,29 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device) + goto out; + } + +- zones = kcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL); ++ zones = kvcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL); + if (!zones) { + ret = -ENOMEM; goto out; } @@ -308436,18 +367947,23 @@ index 47af1ab3bf120..96958ca474bd4 100644 /* Get zones type */ while (sector < nr_sectors) { nr_zones = BTRFS_REPORT_NR_ZONES; -@@ -444,8 +518,6 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device) +@@ -442,9 +517,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device) + } - kfree(zones); -- device->zone_info = zone_info; +- kfree(zones); - +- device->zone_info = zone_info; ++ kvfree(zones); + switch (bdev_zoned_model(bdev)) { case BLK_ZONED_HM: - model = "host-managed zoned"; -@@ -478,10 +550,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device) +@@ -476,12 +549,9 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device) + return 0; + out: - kfree(zones); +- kfree(zones); ++ kvfree(zones); out_free_zone_info: - bitmap_free(zone_info->empty_zones); - bitmap_free(zone_info->seq_zones); @@ -308457,7 +367973,7 @@ index 47af1ab3bf120..96958ca474bd4 100644 return ret; } -@@ -495,6 +564,7 @@ void btrfs_destroy_dev_zone_info(struct btrfs_device *device) +@@ -495,6 +565,7 @@ void btrfs_destroy_dev_zone_info(struct btrfs_device *device) bitmap_free(zone_info->seq_zones); bitmap_free(zone_info->empty_zones); @@ -308465,7 +367981,7 @@ index 47af1ab3bf120..96958ca474bd4 100644 kfree(zone_info); device->zone_info = NULL; } -@@ -519,6 +589,7 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info) +@@ -519,6 +590,7 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info) u64 zoned_devices = 0; u64 nr_devices = 0; u64 zone_size = 0; @@ -308473,7 +367989,7 @@ index 47af1ab3bf120..96958ca474bd4 100644 const bool incompat_zoned = btrfs_fs_incompat(fs_info, ZONED); int ret = 0; -@@ -554,6 +625,11 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info) +@@ -554,6 +626,11 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info) ret = -EINVAL; goto out; } @@ -308485,7 +368001,7 @@ index 47af1ab3bf120..96958ca474bd4 100644 } nr_devices++; } -@@ -585,7 +661,7 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info) +@@ -585,7 +662,7 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info) /* * stripe_size is always aligned to BTRFS_STRIPE_LEN in @@ -308494,7 +368010,7 @@ index 47af1ab3bf120..96958ca474bd4 100644 * check the alignment here. */ if (!IS_ALIGNED(zone_size, BTRFS_STRIPE_LEN)) { -@@ -603,7 +679,11 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info) +@@ -603,7 +680,11 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info) } fs_info->zone_size = zone_size; @@ -308506,7 +368022,7 @@ index 47af1ab3bf120..96958ca474bd4 100644 /* * Check mount options here, because we might change fs_info->zoned -@@ -1304,6 +1384,17 @@ bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start) +@@ -1304,6 +1385,17 @@ bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start) if (!is_data_inode(&inode->vfs_inode)) return false; @@ -308524,7 +368040,7 @@ index 47af1ab3bf120..96958ca474bd4 100644 cache = btrfs_lookup_block_group(fs_info, start); ASSERT(cache); if (!cache) -@@ -1440,27 +1531,29 @@ int btrfs_zoned_issue_zeroout(struct btrfs_device *device, u64 physical, u64 len +@@ -1440,27 +1532,29 @@ int btrfs_zoned_issue_zeroout(struct btrfs_device *device, u64 physical, u64 len static int read_zone_info(struct btrfs_fs_info *fs_info, u64 logical, struct blk_zone *zone) { @@ -308564,7 +368080,7 @@ index 47af1ab3bf120..96958ca474bd4 100644 /* Missing device */ if (!dev->bdev) -@@ -1473,7 +1566,8 @@ static int read_zone_info(struct btrfs_fs_info *fs_info, u64 logical, +@@ -1473,7 +1567,8 @@ static int read_zone_info(struct btrfs_fs_info *fs_info, u64 logical, break; } memalloc_nofs_restore(nofs_flag); @@ -308574,7 +368090,7 @@ index 47af1ab3bf120..96958ca474bd4 100644 return ret; } -@@ -1530,3 +1624,58 @@ struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info, +@@ -1530,3 +1625,58 @@ struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info, return device; } @@ -308722,7 +368238,7 @@ index 4b299705bb12b..1ef493fcd504e 100644 + #endif diff --git a/fs/buffer.c b/fs/buffer.c -index c615387aedcae..f6d2835794918 100644 +index c615387aedcae..1960e2d43ae2a 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1235,16 +1235,18 @@ static void bh_lru_install(struct buffer_head *bh) @@ -308747,6 +368263,24 @@ index c615387aedcae..f6d2835794918 100644 b = this_cpu_ptr(&bh_lrus); for (i = 0; i < BH_LRU_SIZE; i++) { +@@ -2350,7 +2352,7 @@ int generic_cont_expand_simple(struct inode *inode, loff_t size) + { + struct address_space *mapping = inode->i_mapping; + struct page *page; +- void *fsdata; ++ void *fsdata = NULL; + int err; + + err = inode_newsize_ok(inode, size); +@@ -2376,7 +2378,7 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping, + struct inode *inode = mapping->host; + unsigned int blocksize = i_blocksize(inode); + struct page *page; +- void *fsdata; ++ void *fsdata = NULL; + pgoff_t index, curidx; + loff_t curpos; + unsigned zerofrom, offset, len; diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c index d463d89f5db8c..146291be62637 100644 --- a/fs/cachefiles/bind.c @@ -308792,41 +368326,20 @@ index 99b80b5c7a931..b218a26291b8e 100644 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_osd_request *req; diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c -index 8f537f1d9d1d3..883bb91ee257e 100644 +index 8f537f1d9d1d3..67b782b0a90aa 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c -@@ -2217,6 +2217,7 @@ static int unsafe_request_wait(struct inode *inode) - struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; - struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_mds_request *req1 = NULL, *req2 = NULL; -+ unsigned int max_sessions; - int ret, err = 0; - - spin_lock(&ci->i_unsafe_lock); -@@ -2234,37 +2235,47 @@ static int unsafe_request_wait(struct inode *inode) - } - spin_unlock(&ci->i_unsafe_lock); - -+ /* -+ * The mdsc->max_sessions is unlikely to be changed -+ * mostly, here we will retry it by reallocating the -+ * sessions array memory to get rid of the mdsc->mutex -+ * lock. -+ */ -+retry: -+ max_sessions = mdsc->max_sessions; -+ - /* - * Trigger to flush the journal logs in all the relevant MDSes - * manually, or in the worst case we must wait at most 5 seconds +@@ -2240,33 +2240,29 @@ static int unsafe_request_wait(struct inode *inode) * to wait the journal logs to be flushed by the MDSes periodically. */ -- if (req1 || req2) { -+ if ((req1 || req2) && likely(max_sessions)) { - struct ceph_mds_session **sessions = NULL; - struct ceph_mds_session *s; + if (req1 || req2) { +- struct ceph_mds_session **sessions = NULL; +- struct ceph_mds_session *s; struct ceph_mds_request *req; - unsigned int max; ++ struct ceph_mds_session **sessions; ++ struct ceph_mds_session *s; ++ unsigned int max_sessions; int i; - /* @@ -308840,8 +368353,12 @@ index 8f537f1d9d1d3..883bb91ee257e 100644 - sessions = krealloc(sessions, max * sizeof(s), __GFP_ZERO); - if (!sessions) - return -ENOMEM; -+ sessions = kzalloc(max_sessions * sizeof(s), GFP_KERNEL); ++ mutex_lock(&mdsc->mutex); ++ max_sessions = mdsc->max_sessions; ++ ++ sessions = kcalloc(max_sessions, sizeof(s), GFP_KERNEL); + if (!sessions) { ++ mutex_unlock(&mdsc->mutex); + err = -ENOMEM; + goto out; + } @@ -308852,39 +368369,40 @@ index 8f537f1d9d1d3..883bb91ee257e 100644 r_unsafe_dir_item) { s = req->r_session; - if (unlikely(s->s_mds >= max)) { +- spin_unlock(&ci->i_unsafe_lock); +- goto retry; +- } + if (!s) + continue; -+ if (unlikely(s->s_mds >= max_sessions)) { - spin_unlock(&ci->i_unsafe_lock); -+ for (i = 0; i < max_sessions; i++) { -+ s = sessions[i]; -+ if (s) -+ ceph_put_mds_session(s); -+ } -+ kfree(sessions); - goto retry; - } if (!sessions[s->s_mds]) { -@@ -2277,8 +2288,16 @@ retry: + s = ceph_get_mds_session(s); + sessions[s->s_mds] = s; +@@ -2277,10 +2273,8 @@ retry: list_for_each_entry(req, &ci->i_unsafe_iops, r_unsafe_target_item) { s = req->r_session; - if (unlikely(s->s_mds >= max)) { +- spin_unlock(&ci->i_unsafe_lock); +- goto retry; +- } + if (!s) + continue; -+ if (unlikely(s->s_mds >= max_sessions)) { - spin_unlock(&ci->i_unsafe_lock); -+ for (i = 0; i < max_sessions; i++) { -+ s = sessions[i]; -+ if (s) -+ ceph_put_mds_session(s); -+ } -+ kfree(sessions); - goto retry; - } if (!sessions[s->s_mds]) { -@@ -2299,7 +2318,7 @@ retry: + s = ceph_get_mds_session(s); + sessions[s->s_mds] = s; +@@ -2292,14 +2286,15 @@ retry: + /* the auth MDS */ + spin_lock(&ci->i_ceph_lock); + if (ci->i_auth_cap) { +- s = ci->i_auth_cap->session; +- if (!sessions[s->s_mds]) +- sessions[s->s_mds] = ceph_get_mds_session(s); ++ s = ci->i_auth_cap->session; ++ if (!sessions[s->s_mds]) ++ sessions[s->s_mds] = ceph_get_mds_session(s); + } spin_unlock(&ci->i_ceph_lock); ++ mutex_unlock(&mdsc->mutex); /* send flush mdlog request to MDSes */ - for (i = 0; i < max; i++) { @@ -308892,7 +368410,7 @@ index 8f537f1d9d1d3..883bb91ee257e 100644 s = sessions[i]; if (s) { send_flush_mdlog(s); -@@ -2316,15 +2335,19 @@ retry: +@@ -2316,15 +2311,19 @@ retry: ceph_timeout_jiffies(req1->r_timeout)); if (ret) err = -EIO; @@ -308914,7 +368432,16 @@ index 8f537f1d9d1d3..883bb91ee257e 100644 return err; } -@@ -3520,24 +3543,23 @@ static void handle_cap_grant(struct inode *inode, +@@ -2873,7 +2872,7 @@ int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got + + while (true) { + flags &= CEPH_FILE_MODE_MASK; +- if (atomic_read(&fi->num_locks)) ++ if (vfs_inode_has_locks(inode)) + flags |= CHECK_FILELOCK; + _got = 0; + ret = try_get_cap_refs(inode, need, want, endoff, +@@ -3520,24 +3519,23 @@ static void handle_cap_grant(struct inode *inode, fill_inline = true; } @@ -308952,7 +368479,7 @@ index 8f537f1d9d1d3..883bb91ee257e 100644 if (fill_inline) ceph_fill_inline_data(inode, NULL, extra_info->inline_data, -@@ -4349,7 +4371,7 @@ void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count) +@@ -4349,7 +4347,7 @@ void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(ci->vfs_inode.i_sb); int bits = (fmode << 1) | 1; @@ -308961,7 +368488,7 @@ index 8f537f1d9d1d3..883bb91ee257e 100644 int i; if (count == 1) -@@ -4357,19 +4379,19 @@ void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count) +@@ -4357,19 +4355,19 @@ void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count) spin_lock(&ci->i_ceph_lock); for (i = 0; i < CEPH_FILE_MODE_BITS; i++) { @@ -309165,8 +368692,31 @@ index 1c7574105478f..42e449d3f18b8 100644 } const struct inode_operations ceph_file_iops = { +diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c +index bdeb271f47d95..3e3b8be76b21e 100644 +--- a/fs/ceph/locks.c ++++ b/fs/ceph/locks.c +@@ -32,18 +32,14 @@ void __init ceph_flock_init(void) + + static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src) + { +- struct ceph_file_info *fi = dst->fl_file->private_data; + struct inode *inode = file_inode(dst->fl_file); + atomic_inc(&ceph_inode(inode)->i_filelock_ref); +- atomic_inc(&fi->num_locks); + } + + static void ceph_fl_release_lock(struct file_lock *fl) + { +- struct ceph_file_info *fi = fl->fl_file->private_data; + struct inode *inode = file_inode(fl->fl_file); + struct ceph_inode_info *ci = ceph_inode(inode); +- atomic_dec(&fi->num_locks); + if (atomic_dec_and_test(&ci->i_filelock_ref)) { + /* clear error when all locks are released */ + spin_lock(&ci->i_ceph_lock); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c -index d64413adc0fd2..78d052dc17987 100644 +index d64413adc0fd2..0dc8871a4b660 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1196,14 +1196,17 @@ static int encode_supported_features(void **p, void *end) @@ -309189,7 +368739,20 @@ index d64413adc0fd2..78d052dc17987 100644 *p += size; } else { if (WARN_ON_ONCE(*p + 4 > end)) -@@ -3772,7 +3775,7 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap, +@@ -3540,6 +3543,12 @@ static void handle_session(struct ceph_mds_session *session, + break; + + case CEPH_SESSION_FLUSHMSG: ++ /* flush cap releases */ ++ spin_lock(&session->s_cap_lock); ++ if (session->s_num_cap_releases) ++ ceph_flush_cap_releases(mdsc, session); ++ spin_unlock(&session->s_cap_lock); ++ + send_flushmsg_ack(mdsc, session, seq); + break; + +@@ -3772,7 +3781,7 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap, struct ceph_pagelist *pagelist = recon_state->pagelist; struct dentry *dentry; char *path; @@ -309198,7 +368761,7 @@ index d64413adc0fd2..78d052dc17987 100644 u64 pathbase; u64 snap_follows; -@@ -3792,7 +3795,6 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap, +@@ -3792,7 +3801,6 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap, } } else { path = NULL; @@ -309206,7 +368769,7 @@ index d64413adc0fd2..78d052dc17987 100644 pathbase = 0; } -@@ -4795,15 +4797,17 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) +@@ -4795,15 +4803,17 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) } /* @@ -309227,7 +368790,7 @@ index d64413adc0fd2..78d052dc17987 100644 restart: req = __get_oldest_req(mdsc); while (req && req->r_tid <= want_tid) { -@@ -4815,14 +4819,32 @@ restart: +@@ -4815,14 +4825,32 @@ restart: nextreq = NULL; if (req->r_op != CEPH_MDS_OP_SETFILELOCK && (req->r_op & CEPH_MDS_OP_WRITE)) { @@ -309261,7 +368824,7 @@ index d64413adc0fd2..78d052dc17987 100644 mutex_lock(&mdsc->mutex); ceph_mdsc_put_request(req); if (!nextreq) -@@ -4837,7 +4859,8 @@ restart: +@@ -4837,7 +4865,8 @@ restart: req = nextreq; } mutex_unlock(&mdsc->mutex); @@ -309271,7 +368834,7 @@ index d64413adc0fd2..78d052dc17987 100644 } void ceph_mdsc_sync(struct ceph_mds_client *mdsc) -@@ -4866,7 +4889,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) +@@ -4866,7 +4895,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) dout("sync want tid %lld flush_seq %lld\n", want_tid, want_flush); @@ -309319,6 +368882,79 @@ index 61d67cbcb3671..30387733765d5 100644 info->export_targets[j] = target; } } else { +diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c +index b41e6724c5910..b512c82f9ccdb 100644 +--- a/fs/ceph/snap.c ++++ b/fs/ceph/snap.c +@@ -705,9 +705,10 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, + struct ceph_mds_snap_realm *ri; /* encoded */ + __le64 *snaps; /* encoded */ + __le64 *prior_parent_snaps; /* encoded */ +- struct ceph_snap_realm *realm = NULL; ++ struct ceph_snap_realm *realm; + struct ceph_snap_realm *first_realm = NULL; +- int invalidate = 0; ++ struct ceph_snap_realm *realm_to_rebuild = NULL; ++ int rebuild_snapcs; + int err = -ENOMEM; + LIST_HEAD(dirty_realms); + +@@ -715,6 +716,8 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, + + dout("update_snap_trace deletion=%d\n", deletion); + more: ++ realm = NULL; ++ rebuild_snapcs = 0; + ceph_decode_need(&p, e, sizeof(*ri), bad); + ri = p; + p += sizeof(*ri); +@@ -738,7 +741,7 @@ more: + err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent)); + if (err < 0) + goto fail; +- invalidate += err; ++ rebuild_snapcs += err; + + if (le64_to_cpu(ri->seq) > realm->seq) { + dout("update_snap_trace updating %llx %p %lld -> %lld\n", +@@ -763,22 +766,30 @@ more: + if (realm->seq > mdsc->last_snap_seq) + mdsc->last_snap_seq = realm->seq; + +- invalidate = 1; ++ rebuild_snapcs = 1; + } else if (!realm->cached_context) { + dout("update_snap_trace %llx %p seq %lld new\n", + realm->ino, realm, realm->seq); +- invalidate = 1; ++ rebuild_snapcs = 1; + } else { + dout("update_snap_trace %llx %p seq %lld unchanged\n", + realm->ino, realm, realm->seq); + } + +- dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino, +- realm, invalidate, p, e); ++ dout("done with %llx %p, rebuild_snapcs=%d, %p %p\n", realm->ino, ++ realm, rebuild_snapcs, p, e); ++ ++ /* ++ * this will always track the uppest parent realm from which ++ * we need to rebuild the snapshot contexts _downward_ in ++ * hierarchy. ++ */ ++ if (rebuild_snapcs) ++ realm_to_rebuild = realm; + +- /* invalidate when we reach the _end_ (root) of the trace */ +- if (invalidate && p >= e) +- rebuild_snap_realms(realm, &dirty_realms); ++ /* rebuild_snapcs when we reach the _end_ (root) of the trace */ ++ if (realm_to_rebuild && p >= e) ++ rebuild_snap_realms(realm_to_rebuild, &dirty_realms); + + if (!first_realm) + first_realm = realm; diff --git a/fs/ceph/super.c b/fs/ceph/super.c index fd8742bae8471..202ddde3d62ad 100644 --- a/fs/ceph/super.c @@ -309351,6 +368987,18 @@ index fd8742bae8471..202ddde3d62ad 100644 return 0; } +diff --git a/fs/ceph/super.h b/fs/ceph/super.h +index 14f951cd5b61b..8c9021d0f8374 100644 +--- a/fs/ceph/super.h ++++ b/fs/ceph/super.h +@@ -773,7 +773,6 @@ struct ceph_file_info { + struct list_head rw_contexts; + + u32 filp_gen; +- atomic_t num_locks; + }; + + struct ceph_dir_file_info { diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 159a1ffa4f4b8..db288b4aee6d2 100644 --- a/fs/ceph/xattr.c @@ -309379,6 +369027,19 @@ index 159a1ffa4f4b8..db288b4aee6d2 100644 { .name = "ceph.dir.pin", .name_size = sizeof("ceph.dir.pin"), +diff --git a/fs/char_dev.c b/fs/char_dev.c +index ba0ded7842a77..3f667292608c0 100644 +--- a/fs/char_dev.c ++++ b/fs/char_dev.c +@@ -547,7 +547,7 @@ int cdev_device_add(struct cdev *cdev, struct device *dev) + } + + rc = device_add(dev); +- if (rc) ++ if (rc && dev->devt) + cdev_del(cdev); + + return rc; diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index de2c12bcfa4bc..248a8f973cf9c 100644 --- a/fs/cifs/cifs_debug.c @@ -309407,6 +369068,111 @@ index de2c12bcfa4bc..248a8f973cf9c 100644 seq_puts(m, "\n\n\tShares: "); j = 0; +diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c +index 007427ba75e5f..b0864da9ef434 100644 +--- a/fs/cifs/cifs_dfs_ref.c ++++ b/fs/cifs/cifs_dfs_ref.c +@@ -307,12 +307,8 @@ static struct vfsmount *cifs_dfs_do_mount(struct dentry *mntpt, + static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) + { + struct cifs_sb_info *cifs_sb; +- struct cifs_ses *ses; +- struct cifs_tcon *tcon; + void *page; +- char *full_path, *root_path; +- unsigned int xid; +- int rc; ++ char *full_path; + struct vfsmount *mnt; + + cifs_dbg(FYI, "in %s\n", __func__); +@@ -324,8 +320,6 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) + * the double backslashes usually used in the UNC. This function + * gives us the latter, so we must adjust the result. + */ +- mnt = ERR_PTR(-ENOMEM); +- + cifs_sb = CIFS_SB(mntpt->d_sb); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) { + mnt = ERR_PTR(-EREMOTE); +@@ -341,60 +335,11 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) + } + + convert_delimiter(full_path, '\\'); +- + cifs_dbg(FYI, "%s: full_path: %s\n", __func__, full_path); + +- if (!cifs_sb_master_tlink(cifs_sb)) { +- cifs_dbg(FYI, "%s: master tlink is NULL\n", __func__); +- goto free_full_path; +- } +- +- tcon = cifs_sb_master_tcon(cifs_sb); +- if (!tcon) { +- cifs_dbg(FYI, "%s: master tcon is NULL\n", __func__); +- goto free_full_path; +- } +- +- root_path = kstrdup(tcon->treeName, GFP_KERNEL); +- if (!root_path) { +- mnt = ERR_PTR(-ENOMEM); +- goto free_full_path; +- } +- cifs_dbg(FYI, "%s: root path: %s\n", __func__, root_path); +- +- ses = tcon->ses; +- xid = get_xid(); +- +- /* +- * If DFS root has been expired, then unconditionally fetch it again to +- * refresh DFS referral cache. +- */ +- rc = dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb), +- root_path + 1, NULL, NULL); +- if (!rc) { +- rc = dfs_cache_find(xid, ses, cifs_sb->local_nls, +- cifs_remap(cifs_sb), full_path + 1, +- NULL, NULL); +- } +- +- free_xid(xid); +- +- if (rc) { +- mnt = ERR_PTR(rc); +- goto free_root_path; +- } +- /* +- * OK - we were able to get and cache a referral for @full_path. +- * +- * Now, pass it down to cifs_mount() and it will retry every available +- * node server in case of failures - no need to do it here. +- */ + mnt = cifs_dfs_do_mount(mntpt, cifs_sb, full_path); +- cifs_dbg(FYI, "%s: cifs_dfs_do_mount:%s , mnt:%p\n", __func__, +- full_path + 1, mnt); ++ cifs_dbg(FYI, "%s: cifs_dfs_do_mount:%s , mnt:%p\n", __func__, full_path + 1, mnt); + +-free_root_path: +- kfree(root_path); + free_full_path: + free_dentry_path(page); + cdda_exit: +diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h +index f97407520ea15..013a4bd65280c 100644 +--- a/fs/cifs/cifs_fs_sb.h ++++ b/fs/cifs/cifs_fs_sb.h +@@ -61,11 +61,6 @@ struct cifs_sb_info { + /* only used when CIFS_MOUNT_USE_PREFIX_PATH is set */ + char *prepath; + +- /* +- * Canonical DFS path initially provided by the mount call. We might connect to something +- * different via DFS but we want to keep it to do failover properly. +- */ +- char *origin_fullpath; /* \\HOST\SHARE\[OPTIONAL PATH] */ + /* randomly generated 128-bit number for indexing dfs mount groups in referral cache */ + uuid_t dfs_mount_id; + /* diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index ee3aab3dd4ac6..bf861fef2f0c3 100644 --- a/fs/cifs/cifsacl.c @@ -309449,7 +369215,7 @@ index ee3aab3dd4ac6..bf861fef2f0c3 100644 nsecdesclen += 5 * sizeof(struct cifs_ace); } else { /* chown */ diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c -index 9fa930dfd78d6..668dd6a86295f 100644 +index 9fa930dfd78d6..fc736ced6f4a3 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -210,6 +210,9 @@ cifs_read_super(struct super_block *sb) @@ -309507,7 +369273,24 @@ index 9fa930dfd78d6..668dd6a86295f 100644 kill_anon_super(sb); cifs_umount(cifs_sb); -@@ -816,7 +826,7 @@ cifs_smb3_do_mount(struct file_system_type *fs_type, +@@ -646,9 +656,15 @@ cifs_show_options(struct seq_file *s, struct dentry *root) + seq_printf(s, ",echo_interval=%lu", + tcon->ses->server->echo_interval / HZ); + +- /* Only display max_credits if it was overridden on mount */ ++ /* Only display the following if overridden on mount */ + if (tcon->ses->server->max_credits != SMB2_MAX_CREDITS_AVAILABLE) + seq_printf(s, ",max_credits=%u", tcon->ses->server->max_credits); ++ if (tcon->ses->server->tcp_nodelay) ++ seq_puts(s, ",tcpnodelay"); ++ if (tcon->ses->server->noautotune) ++ seq_puts(s, ",noautotune"); ++ if (tcon->ses->server->noblocksnd) ++ seq_puts(s, ",noblocksend"); + + if (tcon->snapshot_time) + seq_printf(s, ",snapshot=%llu", tcon->snapshot_time); +@@ -816,7 +832,7 @@ cifs_smb3_do_mount(struct file_system_type *fs_type, int flags, struct smb3_fs_context *old_ctx) { int rc; @@ -309516,7 +369299,7 @@ index 9fa930dfd78d6..668dd6a86295f 100644 struct cifs_sb_info *cifs_sb = NULL; struct cifs_mnt_data mnt_data; struct dentry *root; -@@ -909,11 +919,14 @@ cifs_smb3_do_mount(struct file_system_type *fs_type, +@@ -909,11 +925,14 @@ cifs_smb3_do_mount(struct file_system_type *fs_type, out_super: deactivate_locked_super(sb); @@ -309534,7 +369317,7 @@ index 9fa930dfd78d6..668dd6a86295f 100644 } return root; } -@@ -925,7 +938,7 @@ cifs_loose_read_iter(struct kiocb *iocb, struct iov_iter *iter) +@@ -925,7 +944,7 @@ cifs_loose_read_iter(struct kiocb *iocb, struct iov_iter *iter) ssize_t rc; struct inode *inode = file_inode(iocb->ki_filp); @@ -309543,7 +369326,7 @@ index 9fa930dfd78d6..668dd6a86295f 100644 return cifs_user_readv(iocb, iter); rc = cifs_revalidate_mapping(inode); -@@ -1061,7 +1074,7 @@ struct file_system_type cifs_fs_type = { +@@ -1061,7 +1080,7 @@ struct file_system_type cifs_fs_type = { }; MODULE_ALIAS_FS("cifs"); @@ -309552,7 +369335,7 @@ index 9fa930dfd78d6..668dd6a86295f 100644 .owner = THIS_MODULE, .name = "smb3", .init_fs_context = smb3_init_fs_context, -@@ -1250,8 +1263,11 @@ static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off, +@@ -1250,8 +1269,11 @@ static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off, ssize_t rc; struct cifsFileInfo *cfile = dst_file->private_data; @@ -309580,10 +369363,27 @@ index b50da1901ebd2..fa37f2672cd4e 100644 extern const struct address_space_operations cifs_addr_ops_smallbuf; diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h -index e916470468ea9..a97ed30843cff 100644 +index e916470468ea9..0f1b9c48838cc 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h -@@ -74,7 +74,8 @@ +@@ -13,6 +13,8 @@ + #include <linux/in6.h> + #include <linux/inet.h> + #include <linux/slab.h> ++#include <linux/scatterlist.h> ++#include <linux/mm.h> + #include <linux/mempool.h> + #include <linux/workqueue.h> + #include "cifs_fs_sb.h" +@@ -21,6 +23,7 @@ + #include <linux/scatterlist.h> + #include <uapi/linux/cifs/cifs_mount.h> + #include "smb2pdu.h" ++#include "smb2glob.h" + + #define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */ + +@@ -74,7 +77,8 @@ #define SMB_ECHO_INTERVAL_MAX 600 #define SMB_ECHO_INTERVAL_DEFAULT 60 @@ -309593,7 +369393,7 @@ index e916470468ea9..a97ed30843cff 100644 #define SMB_DNS_RESOLVE_INTERVAL_DEFAULT 600 /* maximum number of PDUs in one compound */ -@@ -591,6 +592,7 @@ struct TCP_Server_Info { +@@ -591,6 +595,7 @@ struct TCP_Server_Info { struct list_head pending_mid_q; bool noblocksnd; /* use blocking sendmsg */ bool noautotune; /* do not autotune send buf sizes */ @@ -309601,7 +369401,27 @@ index e916470468ea9..a97ed30843cff 100644 bool tcp_nodelay; unsigned int credits; /* send no more requests at once */ unsigned int max_credits; /* can override large 32000 default at mnt */ -@@ -932,16 +934,21 @@ struct cifs_ses { +@@ -691,6 +696,19 @@ struct TCP_Server_Info { + #endif + #ifdef CONFIG_CIFS_DFS_UPCALL + bool is_dfs_conn; /* if a dfs connection */ ++ struct mutex refpath_lock; /* protects leaf_fullpath */ ++ /* ++ * Canonical DFS full paths that were used to chase referrals in mount and reconnect. ++ * ++ * origin_fullpath: first or original referral path ++ * leaf_fullpath: last referral path (might be changed due to nested links in reconnect) ++ * ++ * current_fullpath: pointer to either origin_fullpath or leaf_fullpath ++ * NOTE: cannot be accessed outside cifs_reconnect() and smb2_reconnect() ++ * ++ * format: \\HOST\SHARE\[OPTIONAL PATH] ++ */ ++ char *origin_fullpath, *leaf_fullpath, *current_fullpath; + #endif + }; + +@@ -932,16 +950,21 @@ struct cifs_ses { * iface_lock should be taken when accessing any of these fields */ spinlock_t iface_lock; @@ -309623,7 +369443,15 @@ index e916470468ea9..a97ed30843cff 100644 }; /* -@@ -1883,11 +1890,13 @@ extern mempool_t *cifs_mid_poolp; +@@ -1090,7 +1113,6 @@ struct cifs_tcon { + struct cached_fid crfid; /* Cached root fid */ + /* BB add field for back pointer to sb struct(s)? */ + #ifdef CONFIG_CIFS_DFS_UPCALL +- char *dfs_path; /* canonical DFS path */ + struct list_head ulist; /* cache update list */ + #endif + }; +@@ -1883,11 +1905,13 @@ extern mempool_t *cifs_mid_poolp; /* Operations for different SMB versions */ #define SMB1_VERSION_STRING "1.0" @@ -309638,11 +369466,145 @@ index e916470468ea9..a97ed30843cff 100644 #define SMB21_VERSION_STRING "2.1" extern struct smb_version_operations smb21_operations; extern struct smb_version_values smb21_values; +@@ -1941,4 +1965,80 @@ static inline bool is_tcon_dfs(struct cifs_tcon *tcon) + tcon->share_flags & (SHI1005_FLAGS_DFS | SHI1005_FLAGS_DFS_ROOT); + } + ++static inline bool cifs_is_referral_server(struct cifs_tcon *tcon, ++ const struct dfs_info3_param *ref) ++{ ++ /* ++ * Check if all targets are capable of handling DFS referrals as per ++ * MS-DFSC 2.2.4 RESP_GET_DFS_REFERRAL. ++ */ ++ return is_tcon_dfs(tcon) || (ref && (ref->flags & DFSREF_REFERRAL_SERVER)); ++} ++ ++static inline unsigned int cifs_get_num_sgs(const struct smb_rqst *rqst, ++ int num_rqst, ++ const u8 *sig) ++{ ++ unsigned int len, skip; ++ unsigned int nents = 0; ++ unsigned long addr; ++ int i, j; ++ ++ /* Assumes the first rqst has a transform header as the first iov. ++ * I.e. ++ * rqst[0].rq_iov[0] is transform header ++ * rqst[0].rq_iov[1+] data to be encrypted/decrypted ++ * rqst[1+].rq_iov[0+] data to be encrypted/decrypted ++ */ ++ for (i = 0; i < num_rqst; i++) { ++ /* ++ * The first rqst has a transform header where the ++ * first 20 bytes are not part of the encrypted blob. ++ */ ++ for (j = 0; j < rqst[i].rq_nvec; j++) { ++ struct kvec *iov = &rqst[i].rq_iov[j]; ++ ++ skip = (i == 0) && (j == 0) ? 20 : 0; ++ addr = (unsigned long)iov->iov_base + skip; ++ if (unlikely(is_vmalloc_addr((void *)addr))) { ++ len = iov->iov_len - skip; ++ nents += DIV_ROUND_UP(offset_in_page(addr) + len, ++ PAGE_SIZE); ++ } else { ++ nents++; ++ } ++ } ++ nents += rqst[i].rq_npages; ++ } ++ nents += DIV_ROUND_UP(offset_in_page(sig) + SMB2_SIGNATURE_SIZE, PAGE_SIZE); ++ return nents; ++} ++ ++/* We can not use the normal sg_set_buf() as we will sometimes pass a ++ * stack object as buf. ++ */ ++static inline struct scatterlist *cifs_sg_set_buf(struct scatterlist *sg, ++ const void *buf, ++ unsigned int buflen) ++{ ++ unsigned long addr = (unsigned long)buf; ++ unsigned int off = offset_in_page(addr); ++ ++ addr &= PAGE_MASK; ++ if (unlikely(is_vmalloc_addr((void *)addr))) { ++ do { ++ unsigned int len = min_t(unsigned int, buflen, PAGE_SIZE - off); ++ ++ sg_set_page(sg++, vmalloc_to_page((void *)addr), len, off); ++ ++ off = 0; ++ addr += PAGE_SIZE; ++ buflen -= len; ++ } while (buflen); ++ } else { ++ sg_set_page(sg++, virt_to_page(addr), buflen, off); ++ } ++ return sg; ++} ++ + #endif /* _CIFS_GLOB_H */ +diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h +index d0f85b666662d..50844d51da5d9 100644 +--- a/fs/cifs/cifsproto.h ++++ b/fs/cifs/cifsproto.h +@@ -590,8 +590,8 @@ int cifs_alloc_hash(const char *name, struct crypto_shash **shash, + struct sdesc **sdesc); + void cifs_free_hash(struct crypto_shash **shash, struct sdesc **sdesc); + +-extern void rqst_page_get_length(struct smb_rqst *rqst, unsigned int page, +- unsigned int *len, unsigned int *offset); ++void rqst_page_get_length(const struct smb_rqst *rqst, unsigned int page, ++ unsigned int *len, unsigned int *offset); + struct cifs_chan * + cifs_ses_find_chan(struct cifs_ses *ses, struct TCP_Server_Info *server); + int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses); +@@ -607,7 +607,7 @@ int smb2_parse_query_directory(struct cifs_tcon *tcon, struct kvec *rsp_iov, + + struct super_block *cifs_get_tcp_super(struct TCP_Server_Info *server); + void cifs_put_tcp_super(struct super_block *sb); +-int update_super_prepath(struct cifs_tcon *tcon, char *prefix); ++int cifs_update_super_prepath(struct cifs_sb_info *cifs_sb, char *prefix); + char *extract_hostname(const char *unc); + char *extract_sharename(const char *unc); + +@@ -634,4 +634,7 @@ static inline int cifs_create_options(struct cifs_sb_info *cifs_sb, int options) + return options; + } + ++struct super_block *cifs_get_tcon_super(struct cifs_tcon *tcon); ++void cifs_put_tcon_super(struct super_block *sb); ++ + #endif /* _CIFSPROTO_H */ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c -index c3b94c1e45913..278634a63895d 100644 +index c3b94c1e45913..555bd386a24df 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c -@@ -115,7 +115,7 @@ static int reconn_set_ipaddr_from_hostname(struct TCP_Server_Info *server) +@@ -61,6 +61,20 @@ extern bool disable_legacy_dialects; + /* Drop the connection to not overload the server */ + #define NUM_STATUS_IO_TIMEOUT 5 + ++struct mount_ctx { ++ struct cifs_sb_info *cifs_sb; ++ struct smb3_fs_context *fs_ctx; ++ unsigned int xid; ++ struct TCP_Server_Info *server; ++ struct cifs_ses *ses; ++ struct cifs_tcon *tcon; ++#ifdef CONFIG_CIFS_DFS_UPCALL ++ struct cifs_ses *root_ses; ++ uuid_t mount_id; ++ char *origin_fullpath, *leaf_fullpath; ++#endif ++}; ++ + static int ip_connect(struct TCP_Server_Info *server); + static int generic_ip_connect(struct TCP_Server_Info *server); + static void tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink); +@@ -115,7 +129,7 @@ static int reconn_set_ipaddr_from_hostname(struct TCP_Server_Info *server) * To make sure we don't use the cached entry, retry 1s * after expiry. */ @@ -309651,7 +369613,412 @@ index c3b94c1e45913..278634a63895d 100644 } rc = !rc ? -1 : 0; -@@ -519,9 +519,6 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg) +@@ -148,131 +162,29 @@ static void cifs_resolve_server(struct work_struct *work) + mutex_unlock(&server->srv_mutex); + } + +-#ifdef CONFIG_CIFS_DFS_UPCALL +-/* These functions must be called with server->srv_mutex held */ +-static void reconn_set_next_dfs_target(struct TCP_Server_Info *server, +- struct cifs_sb_info *cifs_sb, +- struct dfs_cache_tgt_list *tgt_list, +- struct dfs_cache_tgt_iterator **tgt_it) +-{ +- const char *name; +- int rc; +- +- if (!cifs_sb || !cifs_sb->origin_fullpath) +- return; +- +- if (!*tgt_it) { +- *tgt_it = dfs_cache_get_tgt_iterator(tgt_list); +- } else { +- *tgt_it = dfs_cache_get_next_tgt(tgt_list, *tgt_it); +- if (!*tgt_it) +- *tgt_it = dfs_cache_get_tgt_iterator(tgt_list); +- } +- +- cifs_dbg(FYI, "%s: UNC: %s\n", __func__, cifs_sb->origin_fullpath); +- +- name = dfs_cache_get_tgt_name(*tgt_it); +- +- kfree(server->hostname); +- +- server->hostname = extract_hostname(name); +- if (IS_ERR(server->hostname)) { +- cifs_dbg(FYI, +- "%s: failed to extract hostname from target: %ld\n", +- __func__, PTR_ERR(server->hostname)); +- return; +- } +- +- rc = reconn_set_ipaddr_from_hostname(server); +- if (rc) { +- cifs_dbg(FYI, "%s: failed to resolve hostname: %d\n", +- __func__, rc); +- } +-} +- +-static inline int reconn_setup_dfs_targets(struct cifs_sb_info *cifs_sb, +- struct dfs_cache_tgt_list *tl) +-{ +- if (!cifs_sb->origin_fullpath) +- return -EOPNOTSUPP; +- return dfs_cache_noreq_find(cifs_sb->origin_fullpath + 1, NULL, tl); +-} +-#endif +- +-/* +- * cifs tcp session reconnection ++/** ++ * Mark all sessions and tcons for reconnect. + * +- * mark tcp session as reconnecting so temporarily locked +- * mark all smb sessions as reconnecting for tcp session +- * reconnect tcp session +- * wake up waiters on reconnection? - (not needed currently) ++ * @server needs to be previously set to CifsNeedReconnect. + */ +-int +-cifs_reconnect(struct TCP_Server_Info *server) ++static void cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server) + { +- int rc = 0; + struct list_head *tmp, *tmp2; + struct cifs_ses *ses; + struct cifs_tcon *tcon; + struct mid_q_entry *mid_entry; + struct list_head retry_list; +-#ifdef CONFIG_CIFS_DFS_UPCALL +- struct super_block *sb = NULL; +- struct cifs_sb_info *cifs_sb = NULL; +- struct dfs_cache_tgt_list tgt_list = DFS_CACHE_TGT_LIST_INIT(tgt_list); +- struct dfs_cache_tgt_iterator *tgt_it = NULL; +-#endif + +- spin_lock(&GlobalMid_Lock); +- server->nr_targets = 1; +-#ifdef CONFIG_CIFS_DFS_UPCALL +- spin_unlock(&GlobalMid_Lock); +- sb = cifs_get_tcp_super(server); +- if (IS_ERR(sb)) { +- rc = PTR_ERR(sb); +- cifs_dbg(FYI, "%s: will not do DFS failover: rc = %d\n", +- __func__, rc); +- sb = NULL; +- } else { +- cifs_sb = CIFS_SB(sb); +- rc = reconn_setup_dfs_targets(cifs_sb, &tgt_list); +- if (rc) { +- cifs_sb = NULL; +- if (rc != -EOPNOTSUPP) { +- cifs_server_dbg(VFS, "%s: no target servers for DFS failover\n", +- __func__); +- } +- } else { +- server->nr_targets = dfs_cache_get_nr_tgts(&tgt_list); +- } +- } +- cifs_dbg(FYI, "%s: will retry %d target(s)\n", __func__, +- server->nr_targets); +- spin_lock(&GlobalMid_Lock); +-#endif +- if (server->tcpStatus == CifsExiting) { +- /* the demux thread will exit normally +- next time through the loop */ +- spin_unlock(&GlobalMid_Lock); +-#ifdef CONFIG_CIFS_DFS_UPCALL +- dfs_cache_free_tgts(&tgt_list); +- cifs_put_tcp_super(sb); +-#endif +- wake_up(&server->response_q); +- return rc; +- } else +- server->tcpStatus = CifsNeedReconnect; +- spin_unlock(&GlobalMid_Lock); + server->maxBuf = 0; + server->max_read = 0; + + cifs_dbg(FYI, "Mark tcp session as need reconnect\n"); + trace_smb3_reconnect(server->CurrentMid, server->conn_id, server->hostname); +- +- /* before reconnecting the tcp session, mark the smb session (uid) +- and the tid bad so they are not used until reconnected */ +- cifs_dbg(FYI, "%s: marking sessions and tcons for reconnect\n", +- __func__); ++ /* ++ * before reconnecting the tcp session, mark the smb session (uid) and the tid bad so they ++ * are not used until reconnected. ++ */ ++ cifs_dbg(FYI, "%s: marking sessions and tcons for reconnect\n", __func__); + spin_lock(&cifs_tcp_ses_lock); + list_for_each(tmp, &server->smb_ses_list) { + ses = list_entry(tmp, struct cifs_ses, smb_ses_list); +@@ -290,11 +202,11 @@ cifs_reconnect(struct TCP_Server_Info *server) + cifs_dbg(FYI, "%s: tearing down socket\n", __func__); + mutex_lock(&server->srv_mutex); + if (server->ssocket) { +- cifs_dbg(FYI, "State: 0x%x Flags: 0x%lx\n", +- server->ssocket->state, server->ssocket->flags); ++ cifs_dbg(FYI, "State: 0x%x Flags: 0x%lx\n", server->ssocket->state, ++ server->ssocket->flags); + kernel_sock_shutdown(server->ssocket, SHUT_WR); +- cifs_dbg(FYI, "Post shutdown state: 0x%x Flags: 0x%lx\n", +- server->ssocket->state, server->ssocket->flags); ++ cifs_dbg(FYI, "Post shutdown state: 0x%x Flags: 0x%lx\n", server->ssocket->state, ++ server->ssocket->flags); + sock_release(server->ssocket); + server->ssocket = NULL; + } +@@ -333,38 +245,48 @@ cifs_reconnect(struct TCP_Server_Info *server) + smbd_destroy(server); + mutex_unlock(&server->srv_mutex); + } ++} ++ ++static bool cifs_tcp_ses_needs_reconnect(struct TCP_Server_Info *server, int num_targets) ++{ ++ spin_lock(&GlobalMid_Lock); ++ server->nr_targets = num_targets; ++ if (server->tcpStatus == CifsExiting) { ++ /* the demux thread will exit normally next time through the loop */ ++ spin_unlock(&GlobalMid_Lock); ++ wake_up(&server->response_q); ++ return false; ++ } ++ server->tcpStatus = CifsNeedReconnect; ++ spin_unlock(&GlobalMid_Lock); ++ return true; ++} ++ ++/* ++ * cifs tcp session reconnection ++ * ++ * mark tcp session as reconnecting so temporarily locked ++ * mark all smb sessions as reconnecting for tcp session ++ * reconnect tcp session ++ * wake up waiters on reconnection? - (not needed currently) ++ */ ++static int __cifs_reconnect(struct TCP_Server_Info *server) ++{ ++ int rc = 0; ++ ++ if (!cifs_tcp_ses_needs_reconnect(server, 1)) ++ return 0; ++ ++ cifs_mark_tcp_ses_conns_for_reconnect(server); + + do { + try_to_freeze(); +- + mutex_lock(&server->srv_mutex); + +- + if (!cifs_swn_set_server_dstaddr(server)) { +-#ifdef CONFIG_CIFS_DFS_UPCALL +- if (cifs_sb && cifs_sb->origin_fullpath) +- /* +- * Set up next DFS target server (if any) for reconnect. If DFS +- * feature is disabled, then we will retry last server we +- * connected to before. +- */ +- reconn_set_next_dfs_target(server, cifs_sb, &tgt_list, &tgt_it); +- else { +-#endif +- /* +- * Resolve the hostname again to make sure that IP address is up-to-date. +- */ ++ /* resolve the hostname again to make sure that IP address is up-to-date */ + rc = reconn_set_ipaddr_from_hostname(server); +- if (rc) { +- cifs_dbg(FYI, "%s: failed to resolve hostname: %d\n", +- __func__, rc); +- } +- +-#ifdef CONFIG_CIFS_DFS_UPCALL +- } +-#endif +- +- ++ cifs_dbg(FYI, "%s: reconn_set_ipaddr_from_hostname: rc=%d\n", __func__, rc); + } + + if (cifs_rdma_enabled(server)) +@@ -372,8 +294,8 @@ cifs_reconnect(struct TCP_Server_Info *server) + else + rc = generic_ip_connect(server); + if (rc) { +- cifs_dbg(FYI, "reconnect error %d\n", rc); + mutex_unlock(&server->srv_mutex); ++ cifs_dbg(FYI, "%s: reconnect error %d\n", __func__, rc); + msleep(3000); + } else { + atomic_inc(&tcpSesReconnectCount); +@@ -387,19 +309,128 @@ cifs_reconnect(struct TCP_Server_Info *server) + } + } while (server->tcpStatus == CifsNeedReconnect); + ++ if (server->tcpStatus == CifsNeedNegotiate) ++ mod_delayed_work(cifsiod_wq, &server->echo, 0); ++ ++ wake_up(&server->response_q); ++ return rc; ++} ++ + #ifdef CONFIG_CIFS_DFS_UPCALL +- if (tgt_it) { +- rc = dfs_cache_noreq_update_tgthint(cifs_sb->origin_fullpath + 1, +- tgt_it); +- if (rc) { +- cifs_server_dbg(VFS, "%s: failed to update DFS target hint: rc = %d\n", +- __func__, rc); ++static int __reconnect_target_unlocked(struct TCP_Server_Info *server, const char *target) ++{ ++ int rc; ++ char *hostname; ++ ++ if (!cifs_swn_set_server_dstaddr(server)) { ++ if (server->hostname != target) { ++ hostname = extract_hostname(target); ++ if (!IS_ERR(hostname)) { ++ kfree(server->hostname); ++ server->hostname = hostname; ++ } else { ++ cifs_dbg(FYI, "%s: couldn't extract hostname or address from dfs target: %ld\n", ++ __func__, PTR_ERR(hostname)); ++ cifs_dbg(FYI, "%s: default to last target server: %s\n", __func__, ++ server->hostname); ++ } ++ } ++ /* resolve the hostname again to make sure that IP address is up-to-date. */ ++ rc = reconn_set_ipaddr_from_hostname(server); ++ cifs_dbg(FYI, "%s: reconn_set_ipaddr_from_hostname: rc=%d\n", __func__, rc); ++ } ++ /* Reconnect the socket */ ++ if (cifs_rdma_enabled(server)) ++ rc = smbd_reconnect(server); ++ else ++ rc = generic_ip_connect(server); ++ ++ return rc; ++} ++ ++static int reconnect_target_unlocked(struct TCP_Server_Info *server, struct dfs_cache_tgt_list *tl, ++ struct dfs_cache_tgt_iterator **target_hint) ++{ ++ int rc; ++ struct dfs_cache_tgt_iterator *tit; ++ ++ *target_hint = NULL; ++ ++ /* If dfs target list is empty, then reconnect to last server */ ++ tit = dfs_cache_get_tgt_iterator(tl); ++ if (!tit) ++ return __reconnect_target_unlocked(server, server->hostname); ++ ++ /* Otherwise, try every dfs target in @tl */ ++ for (; tit; tit = dfs_cache_get_next_tgt(tl, tit)) { ++ rc = __reconnect_target_unlocked(server, dfs_cache_get_tgt_name(tit)); ++ if (!rc) { ++ *target_hint = tit; ++ break; + } +- dfs_cache_free_tgts(&tgt_list); + } ++ return rc; ++} + +- cifs_put_tcp_super(sb); +-#endif ++static int reconnect_dfs_server(struct TCP_Server_Info *server) ++{ ++ int rc = 0; ++ const char *refpath = server->current_fullpath + 1; ++ struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl); ++ struct dfs_cache_tgt_iterator *target_hint = NULL; ++ int num_targets = 0; ++ ++ /* ++ * Determine the number of dfs targets the referral path in @cifs_sb resolves to. ++ * ++ * smb2_reconnect() needs to know how long it should wait based upon the number of dfs ++ * targets (server->nr_targets). It's also possible that the cached referral was cleared ++ * through /proc/fs/cifs/dfscache or the target list is empty due to server settings after ++ * refreshing the referral, so, in this case, default it to 1. ++ */ ++ if (!dfs_cache_noreq_find(refpath, NULL, &tl)) ++ num_targets = dfs_cache_get_nr_tgts(&tl); ++ if (!num_targets) ++ num_targets = 1; ++ ++ if (!cifs_tcp_ses_needs_reconnect(server, num_targets)) ++ return 0; ++ ++ cifs_mark_tcp_ses_conns_for_reconnect(server); ++ ++ do { ++ try_to_freeze(); ++ mutex_lock(&server->srv_mutex); ++ ++ rc = reconnect_target_unlocked(server, &tl, &target_hint); ++ if (rc) { ++ /* Failed to reconnect socket */ ++ mutex_unlock(&server->srv_mutex); ++ cifs_dbg(FYI, "%s: reconnect error %d\n", __func__, rc); ++ msleep(3000); ++ continue; ++ } ++ /* ++ * Socket was created. Update tcp session status to CifsNeedNegotiate so that a ++ * process waiting for reconnect will know it needs to re-establish session and tcon ++ * through the reconnected target server. ++ */ ++ atomic_inc(&tcpSesReconnectCount); ++ set_credits(server, 1); ++ spin_lock(&GlobalMid_Lock); ++ if (server->tcpStatus != CifsExiting) ++ server->tcpStatus = CifsNeedNegotiate; ++ spin_unlock(&GlobalMid_Lock); ++ cifs_swn_reset_server_dstaddr(server); ++ mutex_unlock(&server->srv_mutex); ++ } while (server->tcpStatus == CifsNeedReconnect); ++ ++ if (target_hint) ++ dfs_cache_noreq_update_tgthint(refpath, target_hint); ++ ++ dfs_cache_free_tgts(&tl); ++ ++ /* Need to set up echo worker again once connection has been established */ + if (server->tcpStatus == CifsNeedNegotiate) + mod_delayed_work(cifsiod_wq, &server->echo, 0); + +@@ -407,6 +438,25 @@ cifs_reconnect(struct TCP_Server_Info *server) + return rc; + } + ++int cifs_reconnect(struct TCP_Server_Info *server) ++{ ++ /* If tcp session is not an dfs connection, then reconnect to last target server */ ++ spin_lock(&cifs_tcp_ses_lock); ++ if (!server->is_dfs_conn || !server->origin_fullpath || !server->leaf_fullpath) { ++ spin_unlock(&cifs_tcp_ses_lock); ++ return __cifs_reconnect(server); ++ } ++ spin_unlock(&cifs_tcp_ses_lock); ++ ++ return reconnect_dfs_server(server); ++} ++#else ++int cifs_reconnect(struct TCP_Server_Info *server) ++{ ++ return __cifs_reconnect(server); ++} ++#endif ++ + static void + cifs_echo_request(struct work_struct *work) + { +@@ -519,9 +569,6 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg) int length = 0; int total_read; @@ -309661,7 +370028,7 @@ index c3b94c1e45913..278634a63895d 100644 for (total_read = 0; msg_data_left(smb_msg); total_read += length) { try_to_freeze(); -@@ -572,7 +569,7 @@ int +@@ -572,7 +619,7 @@ int cifs_read_from_socket(struct TCP_Server_Info *server, char *buf, unsigned int to_read) { @@ -309670,7 +370037,7 @@ index c3b94c1e45913..278634a63895d 100644 struct kvec iov = {.iov_base = buf, .iov_len = to_read}; iov_iter_kvec(&smb_msg.msg_iter, READ, &iov, 1, to_read); -@@ -582,15 +579,13 @@ cifs_read_from_socket(struct TCP_Server_Info *server, char *buf, +@@ -582,15 +629,13 @@ cifs_read_from_socket(struct TCP_Server_Info *server, char *buf, ssize_t cifs_discard_from_socket(struct TCP_Server_Info *server, size_t to_read) { @@ -309687,7 +370054,7 @@ index c3b94c1e45913..278634a63895d 100644 iov_iter_discard(&smb_msg.msg_iter, READ, to_read); return cifs_readv_from_socket(server, &smb_msg); -@@ -600,7 +595,7 @@ int +@@ -600,7 +645,7 @@ int cifs_read_page_from_socket(struct TCP_Server_Info *server, struct page *page, unsigned int page_offset, unsigned int to_read) { @@ -309696,15 +370063,19 @@ index c3b94c1e45913..278634a63895d 100644 struct bio_vec bv = { .bv_page = page, .bv_len = to_read, .bv_offset = page_offset}; iov_iter_bvec(&smb_msg.msg_iter, READ, &bv, 1, to_read); -@@ -794,7 +789,6 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) +@@ -794,7 +839,10 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) */ } - kfree(server->hostname); ++#ifdef CONFIG_CIFS_DFS_UPCALL ++ kfree(server->origin_fullpath); ++ kfree(server->leaf_fullpath); ++#endif kfree(server); length = atomic_dec_return(&tcpSesAllocCount); -@@ -1221,6 +1215,10 @@ static int match_server(struct TCP_Server_Info *server, struct smb3_fs_context * +@@ -1221,6 +1269,10 @@ static int match_server(struct TCP_Server_Info *server, struct smb3_fs_context * if (ctx->nosharesock) return 0; @@ -309715,7 +370086,7 @@ index c3b94c1e45913..278634a63895d 100644 /* If multidialect negotiation see if existing sessions match one */ if (strcmp(ctx->vals->version_string, SMB3ANY_VERSION_STRING) == 0) { if (server->vals->protocol_id < SMB30_PROT_ID) -@@ -1235,6 +1233,9 @@ static int match_server(struct TCP_Server_Info *server, struct smb3_fs_context * +@@ -1235,6 +1287,9 @@ static int match_server(struct TCP_Server_Info *server, struct smb3_fs_context * if (!net_eq(cifs_net_ns(server), current->nsproxy->net_ns)) return 0; @@ -309725,15 +370096,16 @@ index c3b94c1e45913..278634a63895d 100644 if (!match_address(server, addr, (struct sockaddr *)&ctx->srcaddr)) return 0; -@@ -1336,6 +1337,7 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect) +@@ -1336,6 +1391,8 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect) kfree(server->session_key.response); server->session_key.response = NULL; server->session_key.len = 0; + kfree(server->hostname); ++ server->hostname = NULL; task = xchg(&server->tsk, NULL); if (task) -@@ -1361,14 +1363,18 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx) +@@ -1361,14 +1418,18 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx) goto out_err; } @@ -309757,7 +370129,17 @@ index c3b94c1e45913..278634a63895d 100644 tcp_ses->conn_id = atomic_inc_return(&tcpSesNextId); tcp_ses->noblockcnt = ctx->rootfs; -@@ -1497,8 +1503,7 @@ out_err_crypto_release: +@@ -1399,6 +1460,9 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx) + INIT_DELAYED_WORK(&tcp_ses->resolve, cifs_resolve_server); + INIT_DELAYED_WORK(&tcp_ses->reconnect, smb2_reconnect_server); + mutex_init(&tcp_ses->reconnect_mutex); ++#ifdef CONFIG_CIFS_DFS_UPCALL ++ mutex_init(&tcp_ses->refpath_lock); ++#endif + memcpy(&tcp_ses->srcaddr, &ctx->srcaddr, + sizeof(tcp_ses->srcaddr)); + memcpy(&tcp_ses->dstaddr, &ctx->dstaddr, +@@ -1497,8 +1561,7 @@ out_err_crypto_release: out_err: if (tcp_ses) { @@ -309767,7 +370149,7 @@ index c3b94c1e45913..278634a63895d 100644 if (tcp_ses->ssocket) sock_release(tcp_ses->ssocket); kfree(tcp_ses); -@@ -1516,8 +1521,12 @@ static int match_session(struct cifs_ses *ses, struct smb3_fs_context *ctx) +@@ -1516,8 +1579,12 @@ static int match_session(struct cifs_ses *ses, struct smb3_fs_context *ctx) * If an existing session is limited to less channels than * requested, it should not be reused */ @@ -309781,7 +370163,7 @@ index c3b94c1e45913..278634a63895d 100644 switch (ses->sectype) { case Kerberos: -@@ -1652,6 +1661,7 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) +@@ -1652,6 +1719,7 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) void cifs_put_smb_ses(struct cifs_ses *ses) { unsigned int rc, xid; @@ -309789,7 +370171,7 @@ index c3b94c1e45913..278634a63895d 100644 struct TCP_Server_Info *server = ses->server; cifs_dbg(FYI, "%s: ses_count=%d\n", __func__, ses->ses_count); -@@ -1693,12 +1703,24 @@ void cifs_put_smb_ses(struct cifs_ses *ses) +@@ -1693,12 +1761,24 @@ void cifs_put_smb_ses(struct cifs_ses *ses) list_del_init(&ses->smb_ses_list); spin_unlock(&cifs_tcp_ses_lock); @@ -309816,7 +370198,25 @@ index c3b94c1e45913..278634a63895d 100644 } sesInfoFree(ses); -@@ -1949,9 +1971,11 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) +@@ -1868,7 +1948,7 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx __attribute__((unused)), + struct cifs_ses * + cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) + { +- int rc = -ENOMEM; ++ int rc = 0; + unsigned int xid; + struct cifs_ses *ses; + struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr; +@@ -1910,6 +1990,8 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) + return ses; + } + ++ rc = -ENOMEM; ++ + cifs_dbg(FYI, "Existing smb sess not found\n"); + ses = sesInfoAlloc(); + if (ses == NULL) +@@ -1949,9 +2031,11 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) mutex_lock(&ses->session_mutex); /* add server as first channel */ @@ -309828,6 +370228,1435 @@ index c3b94c1e45913..278634a63895d 100644 rc = cifs_negotiate_protocol(xid, ses); if (!rc) +@@ -2845,73 +2929,64 @@ int cifs_setup_cifs_sb(struct cifs_sb_info *cifs_sb) + } + + /* Release all succeed connections */ +-static inline void mount_put_conns(struct cifs_sb_info *cifs_sb, +- unsigned int xid, +- struct TCP_Server_Info *server, +- struct cifs_ses *ses, struct cifs_tcon *tcon) ++static inline void mount_put_conns(struct mount_ctx *mnt_ctx) + { + int rc = 0; + +- if (tcon) +- cifs_put_tcon(tcon); +- else if (ses) +- cifs_put_smb_ses(ses); +- else if (server) +- cifs_put_tcp_session(server, 0); +- cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_POSIX_PATHS; +- free_xid(xid); ++ if (mnt_ctx->tcon) ++ cifs_put_tcon(mnt_ctx->tcon); ++ else if (mnt_ctx->ses) ++ cifs_put_smb_ses(mnt_ctx->ses); ++ else if (mnt_ctx->server) ++ cifs_put_tcp_session(mnt_ctx->server, 0); ++ mnt_ctx->cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_POSIX_PATHS; ++ free_xid(mnt_ctx->xid); + } + + /* Get connections for tcp, ses and tcon */ +-static int mount_get_conns(struct smb3_fs_context *ctx, struct cifs_sb_info *cifs_sb, +- unsigned int *xid, +- struct TCP_Server_Info **nserver, +- struct cifs_ses **nses, struct cifs_tcon **ntcon) ++static int mount_get_conns(struct mount_ctx *mnt_ctx) + { + int rc = 0; +- struct TCP_Server_Info *server; +- struct cifs_ses *ses; +- struct cifs_tcon *tcon; +- +- *nserver = NULL; +- *nses = NULL; +- *ntcon = NULL; ++ struct TCP_Server_Info *server = NULL; ++ struct cifs_ses *ses = NULL; ++ struct cifs_tcon *tcon = NULL; ++ struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; ++ struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; ++ unsigned int xid; + +- *xid = get_xid(); ++ xid = get_xid(); + + /* get a reference to a tcp session */ + server = cifs_get_tcp_session(ctx); + if (IS_ERR(server)) { + rc = PTR_ERR(server); +- return rc; ++ server = NULL; ++ goto out; + } + +- *nserver = server; +- + /* get a reference to a SMB session */ + ses = cifs_get_smb_ses(server, ctx); + if (IS_ERR(ses)) { + rc = PTR_ERR(ses); +- return rc; ++ ses = NULL; ++ goto out; + } + +- *nses = ses; +- + if ((ctx->persistent == true) && (!(ses->server->capabilities & + SMB2_GLOBAL_CAP_PERSISTENT_HANDLES))) { + cifs_server_dbg(VFS, "persistent handles not supported by server\n"); +- return -EOPNOTSUPP; ++ rc = -EOPNOTSUPP; ++ goto out; + } + + /* search for existing tcon to this server share */ + tcon = cifs_get_tcon(ses, ctx); + if (IS_ERR(tcon)) { + rc = PTR_ERR(tcon); +- return rc; ++ tcon = NULL; ++ goto out; + } + +- *ntcon = tcon; +- + /* if new SMB3.11 POSIX extensions are supported do not remap / and \ */ + if (tcon->posix_extensions) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS; +@@ -2922,17 +2997,19 @@ static int mount_get_conns(struct smb3_fs_context *ctx, struct cifs_sb_info *cif + * reset of caps checks mount to see if unix extensions disabled + * for just this mount. + */ +- reset_cifs_unix_caps(*xid, tcon, cifs_sb, ctx); ++ reset_cifs_unix_caps(xid, tcon, cifs_sb, ctx); + if ((tcon->ses->server->tcpStatus == CifsNeedReconnect) && + (le64_to_cpu(tcon->fsUnixInfo.Capability) & +- CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP)) +- return -EACCES; ++ CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP)) { ++ rc = -EACCES; ++ goto out; ++ } + } else + tcon->unix_ext = 0; /* server does not support them */ + + /* do not care if a following call succeed - informational */ + if (!tcon->pipe && server->ops->qfs_tcon) { +- server->ops->qfs_tcon(*xid, tcon, cifs_sb); ++ server->ops->qfs_tcon(xid, tcon, cifs_sb); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RO_CACHE) { + if (tcon->fsDevInfo.DeviceCharacteristics & + cpu_to_le32(FILE_READ_ONLY_DEVICE)) +@@ -2956,7 +3033,13 @@ static int mount_get_conns(struct smb3_fs_context *ctx, struct cifs_sb_info *cif + (cifs_sb->ctx->rsize > server->ops->negotiate_rsize(tcon, ctx))) + cifs_sb->ctx->rsize = server->ops->negotiate_rsize(tcon, ctx); + +- return 0; ++out: ++ mnt_ctx->server = server; ++ mnt_ctx->ses = ses; ++ mnt_ctx->tcon = tcon; ++ mnt_ctx->xid = xid; ++ ++ return rc; + } + + static int mount_setup_tlink(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, +@@ -2986,18 +3069,17 @@ static int mount_setup_tlink(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, + } + + #ifdef CONFIG_CIFS_DFS_UPCALL +-static int mount_get_dfs_conns(struct smb3_fs_context *ctx, struct cifs_sb_info *cifs_sb, +- unsigned int *xid, struct TCP_Server_Info **nserver, +- struct cifs_ses **nses, struct cifs_tcon **ntcon) ++/* Get unique dfs connections */ ++static int mount_get_dfs_conns(struct mount_ctx *mnt_ctx) + { + int rc; + +- ctx->nosharesock = true; +- rc = mount_get_conns(ctx, cifs_sb, xid, nserver, nses, ntcon); +- if (*nserver) { ++ mnt_ctx->fs_ctx->nosharesock = true; ++ rc = mount_get_conns(mnt_ctx); ++ if (mnt_ctx->server) { + cifs_dbg(FYI, "%s: marking tcp session as a dfs connection\n", __func__); + spin_lock(&cifs_tcp_ses_lock); +- (*nserver)->is_dfs_conn = true; ++ mnt_ctx->server->is_dfs_conn = true; + spin_unlock(&cifs_tcp_ses_lock); + } + return rc; +@@ -3039,193 +3121,41 @@ build_unc_path_to_root(const struct smb3_fs_context *ctx, + } + + /* +- * expand_dfs_referral - Perform a dfs referral query and update the cifs_sb ++ * expand_dfs_referral - Update cifs_sb from dfs referral path + * +- * If a referral is found, cifs_sb->ctx->mount_options will be (re-)allocated +- * to a string containing updated options for the submount. Otherwise it +- * will be left untouched. +- * +- * Returns the rc from get_dfs_path to the caller, which can be used to +- * determine whether there were referrals. ++ * cifs_sb->ctx->mount_options will be (re-)allocated to a string containing updated options for the ++ * submount. Otherwise it will be left untouched. + */ +-static int +-expand_dfs_referral(const unsigned int xid, struct cifs_ses *ses, +- struct smb3_fs_context *ctx, struct cifs_sb_info *cifs_sb, +- char *ref_path) ++static int expand_dfs_referral(struct mount_ctx *mnt_ctx, const char *full_path, ++ struct dfs_info3_param *referral) + { + int rc; +- struct dfs_info3_param referral = {0}; +- char *full_path = NULL, *mdata = NULL; +- +- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) +- return -EREMOTE; ++ struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; ++ struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; ++ char *fake_devname = NULL, *mdata = NULL; ++ ++ mdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options, full_path + 1, referral, ++ &fake_devname); ++ if (IS_ERR(mdata)) { ++ rc = PTR_ERR(mdata); ++ mdata = NULL; ++ } else { ++ /* ++ * We can not clear out the whole structure since we no longer have an explicit ++ * function to parse a mount-string. Instead we need to clear out the individual ++ * fields that are no longer valid. ++ */ ++ kfree(ctx->prepath); ++ ctx->prepath = NULL; ++ rc = cifs_setup_volume_info(ctx, mdata, fake_devname); ++ } ++ kfree(fake_devname); ++ kfree(cifs_sb->ctx->mount_options); ++ cifs_sb->ctx->mount_options = mdata; + +- full_path = build_unc_path_to_root(ctx, cifs_sb, true); +- if (IS_ERR(full_path)) +- return PTR_ERR(full_path); +- +- rc = dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb), +- ref_path, &referral, NULL); +- if (!rc) { +- char *fake_devname = NULL; +- +- mdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options, +- full_path + 1, &referral, +- &fake_devname); +- free_dfs_info_param(&referral); +- +- if (IS_ERR(mdata)) { +- rc = PTR_ERR(mdata); +- mdata = NULL; +- } else { +- /* +- * We can not clear out the whole structure since we +- * no longer have an explicit function to parse +- * a mount-string. Instead we need to clear out the +- * individual fields that are no longer valid. +- */ +- kfree(ctx->prepath); +- ctx->prepath = NULL; +- rc = cifs_setup_volume_info(ctx, mdata, fake_devname); +- } +- kfree(fake_devname); +- kfree(cifs_sb->ctx->mount_options); +- cifs_sb->ctx->mount_options = mdata; +- } +- kfree(full_path); +- return rc; +-} +- +-static int get_next_dfs_tgt(struct dfs_cache_tgt_list *tgt_list, +- struct dfs_cache_tgt_iterator **tgt_it) +-{ +- if (!*tgt_it) +- *tgt_it = dfs_cache_get_tgt_iterator(tgt_list); +- else +- *tgt_it = dfs_cache_get_next_tgt(tgt_list, *tgt_it); +- return !*tgt_it ? -EHOSTDOWN : 0; +-} +- +-static int update_vol_info(const struct dfs_cache_tgt_iterator *tgt_it, +- struct smb3_fs_context *fake_ctx, struct smb3_fs_context *ctx) +-{ +- const char *tgt = dfs_cache_get_tgt_name(tgt_it); +- int len = strlen(tgt) + 2; +- char *new_unc; +- +- new_unc = kmalloc(len, GFP_KERNEL); +- if (!new_unc) +- return -ENOMEM; +- scnprintf(new_unc, len, "\\%s", tgt); +- +- kfree(ctx->UNC); +- ctx->UNC = new_unc; +- +- if (fake_ctx->prepath) { +- kfree(ctx->prepath); +- ctx->prepath = fake_ctx->prepath; +- fake_ctx->prepath = NULL; +- } +- memcpy(&ctx->dstaddr, &fake_ctx->dstaddr, sizeof(ctx->dstaddr)); +- +- return 0; +-} +- +-static int do_dfs_failover(const char *path, const char *full_path, struct cifs_sb_info *cifs_sb, +- struct smb3_fs_context *ctx, struct cifs_ses *root_ses, +- unsigned int *xid, struct TCP_Server_Info **server, +- struct cifs_ses **ses, struct cifs_tcon **tcon) +-{ +- int rc; +- char *npath = NULL; +- struct dfs_cache_tgt_list tgt_list = DFS_CACHE_TGT_LIST_INIT(tgt_list); +- struct dfs_cache_tgt_iterator *tgt_it = NULL; +- struct smb3_fs_context tmp_ctx = {NULL}; +- +- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) +- return -EOPNOTSUPP; +- +- npath = dfs_cache_canonical_path(path, cifs_sb->local_nls, cifs_remap(cifs_sb)); +- if (IS_ERR(npath)) +- return PTR_ERR(npath); +- +- cifs_dbg(FYI, "%s: path=%s full_path=%s\n", __func__, npath, full_path); +- +- rc = dfs_cache_noreq_find(npath, NULL, &tgt_list); +- if (rc) +- goto out; +- /* +- * We use a 'tmp_ctx' here because we need pass it down to the mount_{get,put} functions to +- * test connection against new DFS targets. +- */ +- rc = smb3_fs_context_dup(&tmp_ctx, ctx); +- if (rc) +- goto out; +- +- for (;;) { +- struct dfs_info3_param ref = {0}; +- char *fake_devname = NULL, *mdata = NULL; +- +- /* Get next DFS target server - if any */ +- rc = get_next_dfs_tgt(&tgt_list, &tgt_it); +- if (rc) +- break; +- +- rc = dfs_cache_get_tgt_referral(npath, tgt_it, &ref); +- if (rc) +- break; +- +- cifs_dbg(FYI, "%s: old ctx: UNC=%s prepath=%s\n", __func__, tmp_ctx.UNC, +- tmp_ctx.prepath); +- +- mdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options, full_path + 1, &ref, +- &fake_devname); +- free_dfs_info_param(&ref); +- +- if (IS_ERR(mdata)) { +- rc = PTR_ERR(mdata); +- mdata = NULL; +- } else +- rc = cifs_setup_volume_info(&tmp_ctx, mdata, fake_devname); +- +- kfree(mdata); +- kfree(fake_devname); +- +- if (rc) +- break; +- +- cifs_dbg(FYI, "%s: new ctx: UNC=%s prepath=%s\n", __func__, tmp_ctx.UNC, +- tmp_ctx.prepath); +- +- mount_put_conns(cifs_sb, *xid, *server, *ses, *tcon); +- rc = mount_get_dfs_conns(&tmp_ctx, cifs_sb, xid, server, ses, tcon); +- if (!rc || (*server && *ses)) { +- /* +- * We were able to connect to new target server. Update current context with +- * new target server. +- */ +- rc = update_vol_info(tgt_it, &tmp_ctx, ctx); +- break; +- } +- } +- if (!rc) { +- cifs_dbg(FYI, "%s: final ctx: UNC=%s prepath=%s\n", __func__, tmp_ctx.UNC, +- tmp_ctx.prepath); +- /* +- * Update DFS target hint in DFS referral cache with the target server we +- * successfully reconnected to. +- */ +- rc = dfs_cache_update_tgthint(*xid, root_ses ? root_ses : *ses, cifs_sb->local_nls, +- cifs_remap(cifs_sb), path, tgt_it); +- } +- +-out: +- kfree(npath); +- smb3_cleanup_fs_context_contents(&tmp_ctx); +- dfs_cache_free_tgts(&tgt_list); +- return rc; +-} +-#endif ++ return rc; ++} ++#endif + + /* TODO: all callers to this are broken. We are not parsing mount_options here + * we should pass a clone of the original context? +@@ -3329,12 +3259,14 @@ cifs_are_all_path_components_accessible(struct TCP_Server_Info *server, + * Check if path is remote (e.g. a DFS share). Return -EREMOTE if it is, + * otherwise 0. + */ +-static int is_path_remote(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx, +- const unsigned int xid, +- struct TCP_Server_Info *server, +- struct cifs_tcon *tcon) ++static int is_path_remote(struct mount_ctx *mnt_ctx) + { + int rc; ++ struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; ++ struct TCP_Server_Info *server = mnt_ctx->server; ++ unsigned int xid = mnt_ctx->xid; ++ struct cifs_tcon *tcon = mnt_ctx->tcon; ++ struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; + char *full_path; + + if (!server->ops->is_path_accessible) +@@ -3372,280 +3304,298 @@ static int is_path_remote(struct cifs_sb_info *cifs_sb, struct smb3_fs_context * + } + + #ifdef CONFIG_CIFS_DFS_UPCALL +-static void set_root_ses(struct cifs_sb_info *cifs_sb, const uuid_t *mount_id, struct cifs_ses *ses, +- struct cifs_ses **root_ses) ++static void set_root_ses(struct mount_ctx *mnt_ctx) + { +- if (ses) { ++ if (mnt_ctx->ses) { + spin_lock(&cifs_tcp_ses_lock); +- ses->ses_count++; ++ mnt_ctx->ses->ses_count++; + spin_unlock(&cifs_tcp_ses_lock); +- dfs_cache_add_refsrv_session(mount_id, ses); ++ dfs_cache_add_refsrv_session(&mnt_ctx->mount_id, mnt_ctx->ses); + } +- *root_ses = ses; ++ mnt_ctx->root_ses = mnt_ctx->ses; + } + +-/* Set up next dfs prefix path in @dfs_path */ +-static int next_dfs_prepath(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx, +- const unsigned int xid, struct TCP_Server_Info *server, +- struct cifs_tcon *tcon, char **dfs_path) ++static int is_dfs_mount(struct mount_ctx *mnt_ctx, bool *isdfs, struct dfs_cache_tgt_list *root_tl) + { +- char *path, *npath; +- int added_treename = is_tcon_dfs(tcon); + int rc; ++ struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; ++ struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; + +- path = cifs_build_path_to_root(ctx, cifs_sb, tcon, added_treename); +- if (!path) +- return -ENOMEM; ++ *isdfs = true; + +- rc = is_path_remote(cifs_sb, ctx, xid, server, tcon); +- if (rc == -EREMOTE) { +- struct smb3_fs_context v = {NULL}; +- /* if @path contains a tree name, skip it in the prefix path */ +- if (added_treename) { +- rc = smb3_parse_devname(path, &v); +- if (rc) +- goto out; +- npath = build_unc_path_to_root(&v, cifs_sb, true); +- smb3_cleanup_fs_context_contents(&v); +- } else { +- v.UNC = ctx->UNC; +- v.prepath = path + 1; +- npath = build_unc_path_to_root(&v, cifs_sb, true); +- } ++ rc = mount_get_conns(mnt_ctx); ++ /* ++ * If called with 'nodfs' mount option, then skip DFS resolving. Otherwise unconditionally ++ * try to get an DFS referral (even cached) to determine whether it is an DFS mount. ++ * ++ * Skip prefix path to provide support for DFS referrals from w2k8 servers which don't seem ++ * to respond with PATH_NOT_COVERED to requests that include the prefix. ++ */ ++ if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) || ++ dfs_cache_find(mnt_ctx->xid, mnt_ctx->ses, cifs_sb->local_nls, cifs_remap(cifs_sb), ++ ctx->UNC + 1, NULL, root_tl)) { ++ if (rc) ++ return rc; ++ /* Check if it is fully accessible and then mount it */ ++ rc = is_path_remote(mnt_ctx); ++ if (!rc) ++ *isdfs = false; ++ else if (rc != -EREMOTE) ++ return rc; ++ } ++ return 0; ++} + +- if (IS_ERR(npath)) { +- rc = PTR_ERR(npath); +- goto out; +- } ++static int connect_dfs_target(struct mount_ctx *mnt_ctx, const char *full_path, ++ const char *ref_path, struct dfs_cache_tgt_iterator *tit) ++{ ++ int rc; ++ struct dfs_info3_param ref = {}; ++ struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; ++ char *oldmnt = cifs_sb->ctx->mount_options; ++ ++ rc = dfs_cache_get_tgt_referral(ref_path, tit, &ref); ++ if (rc) ++ goto out; ++ ++ rc = expand_dfs_referral(mnt_ctx, full_path, &ref); ++ if (rc) ++ goto out; + +- kfree(*dfs_path); +- *dfs_path = npath; +- rc = -EREMOTE; ++ /* Connect to new target only if we were redirected (e.g. mount options changed) */ ++ if (oldmnt != cifs_sb->ctx->mount_options) { ++ mount_put_conns(mnt_ctx); ++ rc = mount_get_dfs_conns(mnt_ctx); ++ } ++ if (!rc) { ++ if (cifs_is_referral_server(mnt_ctx->tcon, &ref)) ++ set_root_ses(mnt_ctx); ++ rc = dfs_cache_update_tgthint(mnt_ctx->xid, mnt_ctx->root_ses, cifs_sb->local_nls, ++ cifs_remap(cifs_sb), ref_path, tit); + } + + out: +- kfree(path); ++ free_dfs_info_param(&ref); + return rc; + } + +-/* Check if resolved targets can handle any DFS referrals */ +-static int is_referral_server(const char *ref_path, struct cifs_sb_info *cifs_sb, +- struct cifs_tcon *tcon, bool *ref_server) ++static int connect_dfs_root(struct mount_ctx *mnt_ctx, struct dfs_cache_tgt_list *root_tl) + { + int rc; +- struct dfs_info3_param ref = {0}; ++ char *full_path; ++ struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; ++ struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; ++ struct dfs_cache_tgt_iterator *tit; + +- cifs_dbg(FYI, "%s: ref_path=%s\n", __func__, ref_path); ++ /* Put initial connections as they might be shared with other mounts. We need unique dfs ++ * connections per mount to properly failover, so mount_get_dfs_conns() must be used from ++ * now on. ++ */ ++ mount_put_conns(mnt_ctx); ++ mount_get_dfs_conns(mnt_ctx); ++ set_root_ses(mnt_ctx); + +- if (is_tcon_dfs(tcon)) { +- *ref_server = true; +- } else { +- char *npath; ++ full_path = build_unc_path_to_root(ctx, cifs_sb, true); ++ if (IS_ERR(full_path)) ++ return PTR_ERR(full_path); + +- npath = dfs_cache_canonical_path(ref_path, cifs_sb->local_nls, cifs_remap(cifs_sb)); +- if (IS_ERR(npath)) +- return PTR_ERR(npath); ++ mnt_ctx->origin_fullpath = dfs_cache_canonical_path(ctx->UNC, cifs_sb->local_nls, ++ cifs_remap(cifs_sb)); ++ if (IS_ERR(mnt_ctx->origin_fullpath)) { ++ rc = PTR_ERR(mnt_ctx->origin_fullpath); ++ mnt_ctx->origin_fullpath = NULL; ++ goto out; ++ } + +- rc = dfs_cache_noreq_find(npath, &ref, NULL); +- kfree(npath); +- if (rc) { +- cifs_dbg(VFS, "%s: dfs_cache_noreq_find: failed (rc=%d)\n", __func__, rc); +- return rc; ++ /* Try all dfs root targets */ ++ for (rc = -ENOENT, tit = dfs_cache_get_tgt_iterator(root_tl); ++ tit; tit = dfs_cache_get_next_tgt(root_tl, tit)) { ++ rc = connect_dfs_target(mnt_ctx, full_path, mnt_ctx->origin_fullpath + 1, tit); ++ if (!rc) { ++ mnt_ctx->leaf_fullpath = kstrdup(mnt_ctx->origin_fullpath, GFP_KERNEL); ++ if (!mnt_ctx->leaf_fullpath) ++ rc = -ENOMEM; ++ break; + } +- cifs_dbg(FYI, "%s: ref.flags=0x%x\n", __func__, ref.flags); +- /* +- * Check if all targets are capable of handling DFS referrals as per +- * MS-DFSC 2.2.4 RESP_GET_DFS_REFERRAL. +- */ +- *ref_server = !!(ref.flags & DFSREF_REFERRAL_SERVER); +- free_dfs_info_param(&ref); + } +- return 0; ++ ++out: ++ kfree(full_path); ++ return rc; + } + +-int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) ++static int __follow_dfs_link(struct mount_ctx *mnt_ctx) + { +- int rc = 0; +- unsigned int xid; +- struct TCP_Server_Info *server = NULL; +- struct cifs_ses *ses = NULL, *root_ses = NULL; +- struct cifs_tcon *tcon = NULL; +- int count = 0; +- uuid_t mount_id = {0}; +- char *ref_path = NULL, *full_path = NULL; +- char *oldmnt = NULL; +- bool ref_server = false; ++ int rc; ++ struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; ++ struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; ++ char *full_path; ++ struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl); ++ struct dfs_cache_tgt_iterator *tit; + +- rc = mount_get_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon); +- /* +- * If called with 'nodfs' mount option, then skip DFS resolving. Otherwise unconditionally +- * try to get an DFS referral (even cached) to determine whether it is an DFS mount. +- * +- * Skip prefix path to provide support for DFS referrals from w2k8 servers which don't seem +- * to respond with PATH_NOT_COVERED to requests that include the prefix. +- */ +- if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) || +- dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb), ctx->UNC + 1, NULL, +- NULL)) { +- if (rc) +- goto error; +- /* Check if it is fully accessible and then mount it */ +- rc = is_path_remote(cifs_sb, ctx, xid, server, tcon); +- if (!rc) +- goto out; +- if (rc != -EREMOTE) +- goto error; ++ full_path = build_unc_path_to_root(ctx, cifs_sb, true); ++ if (IS_ERR(full_path)) ++ return PTR_ERR(full_path); ++ ++ kfree(mnt_ctx->leaf_fullpath); ++ mnt_ctx->leaf_fullpath = dfs_cache_canonical_path(full_path, cifs_sb->local_nls, ++ cifs_remap(cifs_sb)); ++ if (IS_ERR(mnt_ctx->leaf_fullpath)) { ++ rc = PTR_ERR(mnt_ctx->leaf_fullpath); ++ mnt_ctx->leaf_fullpath = NULL; ++ goto out; + } + +- mount_put_conns(cifs_sb, xid, server, ses, tcon); +- /* +- * Ignore error check here because we may failover to other targets from cached a +- * referral. +- */ +- (void)mount_get_dfs_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon); ++ /* Get referral from dfs link */ ++ rc = dfs_cache_find(mnt_ctx->xid, mnt_ctx->root_ses, cifs_sb->local_nls, ++ cifs_remap(cifs_sb), mnt_ctx->leaf_fullpath + 1, NULL, &tl); ++ if (rc) ++ goto out; + +- /* Get path of DFS root */ +- ref_path = build_unc_path_to_root(ctx, cifs_sb, false); +- if (IS_ERR(ref_path)) { +- rc = PTR_ERR(ref_path); +- ref_path = NULL; +- goto error; ++ /* Try all dfs link targets */ ++ for (rc = -ENOENT, tit = dfs_cache_get_tgt_iterator(&tl); ++ tit; tit = dfs_cache_get_next_tgt(&tl, tit)) { ++ rc = connect_dfs_target(mnt_ctx, full_path, mnt_ctx->leaf_fullpath + 1, tit); ++ if (!rc) { ++ rc = is_path_remote(mnt_ctx); ++ break; ++ } ++ } ++ ++out: ++ kfree(full_path); ++ dfs_cache_free_tgts(&tl); ++ return rc; ++} ++ ++static int follow_dfs_link(struct mount_ctx *mnt_ctx) ++{ ++ int rc; ++ struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; ++ struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; ++ char *full_path; ++ int num_links = 0; ++ ++ full_path = build_unc_path_to_root(ctx, cifs_sb, true); ++ if (IS_ERR(full_path)) ++ return PTR_ERR(full_path); ++ ++ kfree(mnt_ctx->origin_fullpath); ++ mnt_ctx->origin_fullpath = dfs_cache_canonical_path(full_path, cifs_sb->local_nls, ++ cifs_remap(cifs_sb)); ++ kfree(full_path); ++ ++ if (IS_ERR(mnt_ctx->origin_fullpath)) { ++ rc = PTR_ERR(mnt_ctx->origin_fullpath); ++ mnt_ctx->origin_fullpath = NULL; ++ return rc; + } + +- uuid_gen(&mount_id); +- set_root_ses(cifs_sb, &mount_id, ses, &root_ses); + do { +- /* Save full path of last DFS path we used to resolve final target server */ +- kfree(full_path); +- full_path = build_unc_path_to_root(ctx, cifs_sb, !!count); +- if (IS_ERR(full_path)) { +- rc = PTR_ERR(full_path); +- full_path = NULL; ++ rc = __follow_dfs_link(mnt_ctx); ++ if (!rc || rc != -EREMOTE) + break; +- } +- /* Chase referral */ +- oldmnt = cifs_sb->ctx->mount_options; +- rc = expand_dfs_referral(xid, root_ses, ctx, cifs_sb, ref_path + 1); +- if (rc) +- break; +- /* Connect to new DFS target only if we were redirected */ +- if (oldmnt != cifs_sb->ctx->mount_options) { +- mount_put_conns(cifs_sb, xid, server, ses, tcon); +- rc = mount_get_dfs_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon); +- } +- if (rc && !server && !ses) { +- /* Failed to connect. Try to connect to other targets in the referral. */ +- rc = do_dfs_failover(ref_path + 1, full_path, cifs_sb, ctx, root_ses, &xid, +- &server, &ses, &tcon); +- } +- if (rc == -EACCES || rc == -EOPNOTSUPP || !server || !ses) +- break; +- if (!tcon) +- continue; ++ } while (rc = -ELOOP, ++num_links < MAX_NESTED_LINKS); + +- /* Make sure that requests go through new root servers */ +- rc = is_referral_server(ref_path + 1, cifs_sb, tcon, &ref_server); +- if (rc) +- break; +- if (ref_server) +- set_root_ses(cifs_sb, &mount_id, ses, &root_ses); ++ return rc; ++} + +- /* Get next dfs path and then continue chasing them if -EREMOTE */ +- rc = next_dfs_prepath(cifs_sb, ctx, xid, server, tcon, &ref_path); +- /* Prevent recursion on broken link referrals */ +- if (rc == -EREMOTE && ++count > MAX_NESTED_LINKS) +- rc = -ELOOP; +- } while (rc == -EREMOTE); ++/* Set up DFS referral paths for failover */ ++static void setup_server_referral_paths(struct mount_ctx *mnt_ctx) ++{ ++ struct TCP_Server_Info *server = mnt_ctx->server; ++ ++ server->origin_fullpath = mnt_ctx->origin_fullpath; ++ server->leaf_fullpath = mnt_ctx->leaf_fullpath; ++ server->current_fullpath = mnt_ctx->leaf_fullpath; ++ mnt_ctx->origin_fullpath = mnt_ctx->leaf_fullpath = NULL; ++} + +- if (rc || !tcon || !ses) ++int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) ++{ ++ int rc; ++ struct mount_ctx mnt_ctx = { .cifs_sb = cifs_sb, .fs_ctx = ctx, }; ++ struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl); ++ bool isdfs; ++ ++ rc = is_dfs_mount(&mnt_ctx, &isdfs, &tl); ++ if (rc) + goto error; ++ if (!isdfs) ++ goto out; + +- kfree(ref_path); +- /* +- * Store DFS full path in both superblock and tree connect structures. +- * +- * For DFS root mounts, the prefix path (cifs_sb->prepath) is preserved during reconnect so +- * only the root path is set in cifs_sb->origin_fullpath and tcon->dfs_path. And for DFS +- * links, the prefix path is included in both and may be changed during reconnect. See +- * cifs_tree_connect(). +- */ +- ref_path = dfs_cache_canonical_path(full_path, cifs_sb->local_nls, cifs_remap(cifs_sb)); +- kfree(full_path); +- full_path = NULL; ++ uuid_gen(&mnt_ctx.mount_id); ++ rc = connect_dfs_root(&mnt_ctx, &tl); ++ dfs_cache_free_tgts(&tl); + +- if (IS_ERR(ref_path)) { +- rc = PTR_ERR(ref_path); +- ref_path = NULL; ++ if (rc) + goto error; +- } +- cifs_sb->origin_fullpath = ref_path; + +- ref_path = kstrdup(cifs_sb->origin_fullpath, GFP_KERNEL); +- if (!ref_path) { +- rc = -ENOMEM; ++ rc = is_path_remote(&mnt_ctx); ++ if (rc == -EREMOTE) ++ rc = follow_dfs_link(&mnt_ctx); ++ if (rc) + goto error; +- } +- spin_lock(&cifs_tcp_ses_lock); +- tcon->dfs_path = ref_path; +- ref_path = NULL; +- spin_unlock(&cifs_tcp_ses_lock); + ++ setup_server_referral_paths(&mnt_ctx); + /* +- * After reconnecting to a different server, unique ids won't +- * match anymore, so we disable serverino. This prevents +- * dentry revalidation to think the dentry are stale (ESTALE). ++ * After reconnecting to a different server, unique ids won't match anymore, so we disable ++ * serverino. This prevents dentry revalidation to think the dentry are stale (ESTALE). + */ + cifs_autodisable_serverino(cifs_sb); + /* +- * Force the use of prefix path to support failover on DFS paths that +- * resolve to targets that have different prefix paths. ++ * Force the use of prefix path to support failover on DFS paths that resolve to targets ++ * that have different prefix paths. + */ + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH; + kfree(cifs_sb->prepath); + cifs_sb->prepath = ctx->prepath; + ctx->prepath = NULL; +- uuid_copy(&cifs_sb->dfs_mount_id, &mount_id); ++ uuid_copy(&cifs_sb->dfs_mount_id, &mnt_ctx.mount_id); + + out: +- free_xid(xid); +- cifs_try_adding_channels(cifs_sb, ses); +- return mount_setup_tlink(cifs_sb, ses, tcon); ++ cifs_try_adding_channels(cifs_sb, mnt_ctx.ses); ++ rc = mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon); ++ if (rc) ++ goto error; ++ ++ free_xid(mnt_ctx.xid); ++ return rc; + + error: +- kfree(ref_path); +- kfree(full_path); +- kfree(cifs_sb->origin_fullpath); +- dfs_cache_put_refsrv_sessions(&mount_id); +- mount_put_conns(cifs_sb, xid, server, ses, tcon); ++ dfs_cache_put_refsrv_sessions(&mnt_ctx.mount_id); ++ kfree(mnt_ctx.origin_fullpath); ++ kfree(mnt_ctx.leaf_fullpath); ++ mount_put_conns(&mnt_ctx); + return rc; + } + #else + int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) + { + int rc = 0; +- unsigned int xid; +- struct cifs_ses *ses; +- struct cifs_tcon *tcon; +- struct TCP_Server_Info *server; ++ struct mount_ctx mnt_ctx = { .cifs_sb = cifs_sb, .fs_ctx = ctx, }; + +- rc = mount_get_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon); ++ rc = mount_get_conns(&mnt_ctx); + if (rc) + goto error; + +- if (tcon) { +- rc = is_path_remote(cifs_sb, ctx, xid, server, tcon); ++ if (mnt_ctx.tcon) { ++ rc = is_path_remote(&mnt_ctx); + if (rc == -EREMOTE) + rc = -EOPNOTSUPP; + if (rc) + goto error; + } + +- free_xid(xid); ++ rc = mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon); ++ if (rc) ++ goto error; + +- return mount_setup_tlink(cifs_sb, ses, tcon); ++ free_xid(mnt_ctx.xid); ++ return rc; + + error: +- mount_put_conns(cifs_sb, xid, server, ses, tcon); ++ mount_put_conns(&mnt_ctx); + return rc; + } + #endif +@@ -3687,12 +3637,11 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, + pSMB->AndXCommand = 0xFF; + pSMB->Flags = cpu_to_le16(TCON_EXTENDED_SECINFO); + bcc_ptr = &pSMB->Password[0]; +- if (tcon->pipe || (ses->server->sec_mode & SECMODE_USER)) { +- pSMB->PasswordLength = cpu_to_le16(1); /* minimum */ +- *bcc_ptr = 0; /* password is null byte */ +- bcc_ptr++; /* skip password */ +- /* already aligned so no need to do it below */ +- } ++ ++ pSMB->PasswordLength = cpu_to_le16(1); /* minimum */ ++ *bcc_ptr = 0; /* password is null byte */ ++ bcc_ptr++; /* skip password */ ++ /* already aligned so no need to do it below */ + + if (ses->server->sign) + smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; +@@ -3814,7 +3763,6 @@ cifs_umount(struct cifs_sb_info *cifs_sb) + kfree(cifs_sb->prepath); + #ifdef CONFIG_CIFS_DFS_UPCALL + dfs_cache_put_refsrv_sessions(&cifs_sb->dfs_mount_id); +- kfree(cifs_sb->origin_fullpath); + #endif + call_rcu(&cifs_sb->rcu, delayed_free); + } +@@ -4141,104 +4089,249 @@ cifs_prune_tlinks(struct work_struct *work) + } + + #ifdef CONFIG_CIFS_DFS_UPCALL +-int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const struct nls_table *nlsc) ++static void mark_tcon_tcp_ses_for_reconnect(struct cifs_tcon *tcon) ++{ ++ int i; ++ ++ for (i = 0; i < tcon->ses->chan_count; i++) { ++ spin_lock(&GlobalMid_Lock); ++ if (tcon->ses->chans[i].server->tcpStatus != CifsExiting) ++ tcon->ses->chans[i].server->tcpStatus = CifsNeedReconnect; ++ spin_unlock(&GlobalMid_Lock); ++ } ++} ++ ++/* Update dfs referral path of superblock */ ++static int update_server_fullpath(struct TCP_Server_Info *server, struct cifs_sb_info *cifs_sb, ++ const char *target) ++{ ++ int rc = 0; ++ size_t len = strlen(target); ++ char *refpath, *npath; ++ ++ if (unlikely(len < 2 || *target != '\\')) ++ return -EINVAL; ++ ++ if (target[1] == '\\') { ++ len += 1; ++ refpath = kmalloc(len, GFP_KERNEL); ++ if (!refpath) ++ return -ENOMEM; ++ ++ scnprintf(refpath, len, "%s", target); ++ } else { ++ len += sizeof("\\"); ++ refpath = kmalloc(len, GFP_KERNEL); ++ if (!refpath) ++ return -ENOMEM; ++ ++ scnprintf(refpath, len, "\\%s", target); ++ } ++ ++ npath = dfs_cache_canonical_path(refpath, cifs_sb->local_nls, cifs_remap(cifs_sb)); ++ kfree(refpath); ++ ++ if (IS_ERR(npath)) { ++ rc = PTR_ERR(npath); ++ } else { ++ mutex_lock(&server->refpath_lock); ++ kfree(server->leaf_fullpath); ++ server->leaf_fullpath = npath; ++ mutex_unlock(&server->refpath_lock); ++ server->current_fullpath = server->leaf_fullpath; ++ } ++ return rc; ++} ++ ++static int target_share_matches_server(struct TCP_Server_Info *server, const char *tcp_host, ++ size_t tcp_host_len, char *share, bool *target_match) ++{ ++ int rc = 0; ++ const char *dfs_host; ++ size_t dfs_host_len; ++ ++ *target_match = true; ++ extract_unc_hostname(share, &dfs_host, &dfs_host_len); ++ ++ /* Check if hostnames or addresses match */ ++ if (dfs_host_len != tcp_host_len || strncasecmp(dfs_host, tcp_host, dfs_host_len) != 0) { ++ cifs_dbg(FYI, "%s: %.*s doesn't match %.*s\n", __func__, (int)dfs_host_len, ++ dfs_host, (int)tcp_host_len, tcp_host); ++ rc = match_target_ip(server, dfs_host, dfs_host_len, target_match); ++ if (rc) ++ cifs_dbg(VFS, "%s: failed to match target ip: %d\n", __func__, rc); ++ } ++ return rc; ++} ++ ++int __tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *tcon, ++ struct cifs_sb_info *cifs_sb, char *tree, ++ struct dfs_cache_tgt_list *tl, struct dfs_info3_param *ref) + { + int rc; + struct TCP_Server_Info *server = tcon->ses->server; + const struct smb_version_operations *ops = server->ops; +- struct dfs_cache_tgt_list tl; +- struct dfs_cache_tgt_iterator *it = NULL; +- char *tree; ++ struct cifs_tcon *ipc = tcon->ses->tcon_ipc; ++ bool islink; ++ char *share = NULL, *prefix = NULL; + const char *tcp_host; + size_t tcp_host_len; +- const char *dfs_host; +- size_t dfs_host_len; +- char *share = NULL, *prefix = NULL; +- struct dfs_info3_param ref = {0}; +- bool isroot; ++ struct dfs_cache_tgt_iterator *tit; ++ bool target_match; + +- tree = kzalloc(MAX_TREE_SIZE, GFP_KERNEL); +- if (!tree) +- return -ENOMEM; ++ extract_unc_hostname(server->hostname, &tcp_host, &tcp_host_len); + +- /* If it is not dfs or there was no cached dfs referral, then reconnect to same share */ +- if (!tcon->dfs_path || dfs_cache_noreq_find(tcon->dfs_path + 1, &ref, &tl)) { +- if (tcon->ipc) { +- scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname); +- rc = ops->tree_connect(xid, tcon->ses, tree, tcon, nlsc); +- } else { +- rc = ops->tree_connect(xid, tcon->ses, tcon->treeName, tcon, nlsc); +- } ++ islink = ref->server_type == DFS_TYPE_LINK; ++ free_dfs_info_param(ref); ++ ++ tit = dfs_cache_get_tgt_iterator(tl); ++ if (!tit) { ++ rc = -ENOENT; + goto out; + } + +- isroot = ref.server_type == DFS_TYPE_ROOT; +- free_dfs_info_param(&ref); +- +- extract_unc_hostname(server->hostname, &tcp_host, &tcp_host_len); +- +- for (it = dfs_cache_get_tgt_iterator(&tl); it; it = dfs_cache_get_next_tgt(&tl, it)) { +- bool target_match; ++ /* Try to tree connect to all dfs targets */ ++ for (; tit; tit = dfs_cache_get_next_tgt(tl, tit)) { ++ const char *target = dfs_cache_get_tgt_name(tit); ++ struct dfs_cache_tgt_list ntl = DFS_CACHE_TGT_LIST_INIT(ntl); + + kfree(share); + kfree(prefix); +- share = NULL; +- prefix = NULL; + +- rc = dfs_cache_get_tgt_share(tcon->dfs_path + 1, it, &share, &prefix); ++ /* Check if share matches with tcp ses */ ++ rc = dfs_cache_get_tgt_share(server->current_fullpath + 1, tit, &share, &prefix); + if (rc) { +- cifs_dbg(VFS, "%s: failed to parse target share %d\n", +- __func__, rc); +- continue; ++ cifs_dbg(VFS, "%s: failed to parse target share: %d\n", __func__, rc); ++ break; + } + +- extract_unc_hostname(share, &dfs_host, &dfs_host_len); +- +- if (dfs_host_len != tcp_host_len +- || strncasecmp(dfs_host, tcp_host, dfs_host_len) != 0) { +- cifs_dbg(FYI, "%s: %.*s doesn't match %.*s\n", __func__, (int)dfs_host_len, +- dfs_host, (int)tcp_host_len, tcp_host); ++ rc = target_share_matches_server(server, tcp_host, tcp_host_len, share, ++ &target_match); ++ if (rc) ++ break; ++ if (!target_match) { ++ rc = -EHOSTUNREACH; ++ continue; ++ } + +- rc = match_target_ip(server, dfs_host, dfs_host_len, &target_match); +- if (rc) { +- cifs_dbg(VFS, "%s: failed to match target ip: %d\n", __func__, rc); ++ if (ipc->need_reconnect) { ++ scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname); ++ rc = ops->tree_connect(xid, ipc->ses, tree, ipc, cifs_sb->local_nls); ++ if (rc) + break; +- } ++ } + +- if (!target_match) { +- cifs_dbg(FYI, "%s: skipping target\n", __func__); ++ scnprintf(tree, MAX_TREE_SIZE, "\\%s", share); ++ if (!islink) { ++ rc = ops->tree_connect(xid, tcon->ses, tree, tcon, cifs_sb->local_nls); ++ break; ++ } ++ /* ++ * If no dfs referrals were returned from link target, then just do a TREE_CONNECT ++ * to it. Otherwise, cache the dfs referral and then mark current tcp ses for ++ * reconnect so either the demultiplex thread or the echo worker will reconnect to ++ * newly resolved target. ++ */ ++ if (dfs_cache_find(xid, tcon->ses, cifs_sb->local_nls, cifs_remap(cifs_sb), target, ++ ref, &ntl)) { ++ rc = ops->tree_connect(xid, tcon->ses, tree, tcon, cifs_sb->local_nls); ++ if (rc) + continue; +- } ++ rc = dfs_cache_noreq_update_tgthint(server->current_fullpath + 1, tit); ++ if (!rc) ++ rc = cifs_update_super_prepath(cifs_sb, prefix); ++ break; + } ++ /* Target is another dfs share */ ++ rc = update_server_fullpath(server, cifs_sb, target); ++ dfs_cache_free_tgts(tl); + +- if (tcon->ipc) { +- scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", share); +- rc = ops->tree_connect(xid, tcon->ses, tree, tcon, nlsc); ++ if (!rc) { ++ rc = -EREMOTE; ++ list_replace_init(&ntl.tl_list, &tl->tl_list); + } else { +- scnprintf(tree, MAX_TREE_SIZE, "\\%s", share); +- rc = ops->tree_connect(xid, tcon->ses, tree, tcon, nlsc); +- /* Only handle prefix paths of DFS link targets */ +- if (!rc && !isroot) { +- rc = update_super_prepath(tcon, prefix); +- break; +- } ++ dfs_cache_free_tgts(&ntl); ++ free_dfs_info_param(ref); + } +- if (rc == -EREMOTE) +- break; ++ break; + } + ++out: + kfree(share); + kfree(prefix); + +- if (!rc) { +- if (it) +- rc = dfs_cache_noreq_update_tgthint(tcon->dfs_path + 1, it); +- else +- rc = -ENOENT; ++ return rc; ++} ++ ++int tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *tcon, ++ struct cifs_sb_info *cifs_sb, char *tree, ++ struct dfs_cache_tgt_list *tl, struct dfs_info3_param *ref) ++{ ++ int rc; ++ int num_links = 0; ++ struct TCP_Server_Info *server = tcon->ses->server; ++ ++ do { ++ rc = __tree_connect_dfs_target(xid, tcon, cifs_sb, tree, tl, ref); ++ if (!rc || rc != -EREMOTE) ++ break; ++ } while (rc = -ELOOP, ++num_links < MAX_NESTED_LINKS); ++ /* ++ * If we couldn't tree connect to any targets from last referral path, then retry from ++ * original referral path. ++ */ ++ if (rc && server->current_fullpath != server->origin_fullpath) { ++ server->current_fullpath = server->origin_fullpath; ++ mark_tcon_tcp_ses_for_reconnect(tcon); + } +- dfs_cache_free_tgts(&tl); ++ ++ dfs_cache_free_tgts(tl); ++ return rc; ++} ++ ++int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const struct nls_table *nlsc) ++{ ++ int rc; ++ struct TCP_Server_Info *server = tcon->ses->server; ++ const struct smb_version_operations *ops = server->ops; ++ struct super_block *sb = NULL; ++ struct cifs_sb_info *cifs_sb; ++ struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl); ++ char *tree; ++ struct dfs_info3_param ref = {0}; ++ ++ tree = kzalloc(MAX_TREE_SIZE, GFP_KERNEL); ++ if (!tree) ++ return -ENOMEM; ++ ++ if (tcon->ipc) { ++ scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname); ++ rc = ops->tree_connect(xid, tcon->ses, tree, tcon, nlsc); ++ goto out; ++ } ++ ++ sb = cifs_get_tcp_super(server); ++ if (IS_ERR(sb)) { ++ rc = PTR_ERR(sb); ++ cifs_dbg(VFS, "%s: could not find superblock: %d\n", __func__, rc); ++ goto out; ++ } ++ ++ cifs_sb = CIFS_SB(sb); ++ ++ /* If it is not dfs or there was no cached dfs referral, then reconnect to same share */ ++ if (!server->current_fullpath || ++ dfs_cache_noreq_find(server->current_fullpath + 1, &ref, &tl)) { ++ rc = ops->tree_connect(xid, tcon->ses, tcon->treeName, tcon, cifs_sb->local_nls); ++ goto out; ++ } ++ ++ rc = tree_connect_dfs_target(xid, tcon, cifs_sb, tree, &tl, &ref); ++ + out: + kfree(tree); ++ cifs_put_tcp_super(sb); ++ + return rc; + } + #else +diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c +index 2837455928441..1864bdadf3ddd 100644 +--- a/fs/cifs/dfs_cache.c ++++ b/fs/cifs/dfs_cache.c +@@ -792,26 +792,27 @@ static int get_dfs_referral(const unsigned int xid, struct cifs_ses *ses, const + */ + static int cache_refresh_path(const unsigned int xid, struct cifs_ses *ses, const char *path) + { +- int rc; +- struct cache_entry *ce; + struct dfs_info3_param *refs = NULL; ++ struct cache_entry *ce; + int numrefs = 0; +- bool newent = false; ++ int rc; + + cifs_dbg(FYI, "%s: search path: %s\n", __func__, path); + +- down_write(&htable_rw_lock); ++ down_read(&htable_rw_lock); + + ce = lookup_cache_entry(path); +- if (!IS_ERR(ce)) { +- if (!cache_entry_expired(ce)) { +- dump_ce(ce); +- up_write(&htable_rw_lock); +- return 0; +- } +- } else { +- newent = true; ++ if (!IS_ERR(ce) && !cache_entry_expired(ce)) { ++ up_read(&htable_rw_lock); ++ return 0; + } ++ /* ++ * Unlock shared access as we don't want to hold any locks while getting ++ * a new referral. The @ses used for performing the I/O could be ++ * reconnecting and it acquires @htable_rw_lock to look up the dfs cache ++ * in order to failover -- if necessary. ++ */ ++ up_read(&htable_rw_lock); + + /* + * Either the entry was not found, or it is expired. +@@ -819,19 +820,22 @@ static int cache_refresh_path(const unsigned int xid, struct cifs_ses *ses, cons + */ + rc = get_dfs_referral(xid, ses, path, &refs, &numrefs); + if (rc) +- goto out_unlock; ++ goto out; + + dump_refs(refs, numrefs); + +- if (!newent) { +- rc = update_cache_entry_locked(ce, refs, numrefs); +- goto out_unlock; ++ down_write(&htable_rw_lock); ++ /* Re-check as another task might have it added or refreshed already */ ++ ce = lookup_cache_entry(path); ++ if (!IS_ERR(ce)) { ++ if (cache_entry_expired(ce)) ++ rc = update_cache_entry_locked(ce, refs, numrefs); ++ } else { ++ rc = add_cache_entry_locked(refs, numrefs); + } + +- rc = add_cache_entry_locked(refs, numrefs); +- +-out_unlock: + up_write(&htable_rw_lock); ++out: + free_dfs_info_array(refs, numrefs); + return rc; + } +@@ -1046,10 +1050,10 @@ int dfs_cache_update_tgthint(const unsigned int xid, struct cifs_ses *ses, + const struct nls_table *cp, int remap, const char *path, + const struct dfs_cache_tgt_iterator *it) + { +- int rc; +- const char *npath; +- struct cache_entry *ce; + struct cache_dfs_tgt *t; ++ struct cache_entry *ce; ++ const char *npath; ++ int rc = 0; + + npath = dfs_cache_canonical_path(path, cp, remap); + if (IS_ERR(npath)) +@@ -1364,9 +1368,9 @@ static void mark_for_reconnect_if_needed(struct cifs_tcon *tcon, struct dfs_cach + } + + /* Refresh dfs referral of tcon and mark it for reconnect if needed */ +-static int refresh_tcon(struct cifs_ses **sessions, struct cifs_tcon *tcon, bool force_refresh) ++static int __refresh_tcon(const char *path, struct cifs_ses **sessions, struct cifs_tcon *tcon, ++ bool force_refresh) + { +- const char *path = tcon->dfs_path + 1; + struct cifs_ses *ses; + struct cache_entry *ce; + struct dfs_info3_param *refs = NULL; +@@ -1422,6 +1426,20 @@ out: + return rc; + } + ++static int refresh_tcon(struct cifs_ses **sessions, struct cifs_tcon *tcon, bool force_refresh) ++{ ++ struct TCP_Server_Info *server = tcon->ses->server; ++ ++ mutex_lock(&server->refpath_lock); ++ if (strcasecmp(server->leaf_fullpath, server->origin_fullpath)) ++ __refresh_tcon(server->leaf_fullpath + 1, sessions, tcon, force_refresh); ++ mutex_unlock(&server->refpath_lock); ++ ++ __refresh_tcon(server->origin_fullpath + 1, sessions, tcon, force_refresh); ++ ++ return 0; ++} ++ + /** + * dfs_cache_remount_fs - remount a DFS share + * +@@ -1435,6 +1453,7 @@ out: + int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb) + { + struct cifs_tcon *tcon; ++ struct TCP_Server_Info *server; + struct mount_group *mg; + struct cifs_ses *sessions[CACHE_MAX_ENTRIES + 1] = {NULL}; + int rc; +@@ -1443,13 +1462,15 @@ int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb) + return -EINVAL; + + tcon = cifs_sb_master_tcon(cifs_sb); +- if (!tcon->dfs_path) { +- cifs_dbg(FYI, "%s: not a dfs tcon\n", __func__); ++ server = tcon->ses->server; ++ ++ if (!server->origin_fullpath) { ++ cifs_dbg(FYI, "%s: not a dfs mount\n", __func__); + return 0; + } + + if (uuid_is_null(&cifs_sb->dfs_mount_id)) { +- cifs_dbg(FYI, "%s: tcon has no dfs mount group id\n", __func__); ++ cifs_dbg(FYI, "%s: no dfs mount group id\n", __func__); + return -EINVAL; + } + +@@ -1457,7 +1478,7 @@ int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb) + mg = find_mount_group_locked(&cifs_sb->dfs_mount_id); + if (IS_ERR(mg)) { + mutex_unlock(&mount_group_list_lock); +- cifs_dbg(FYI, "%s: tcon has ipc session to refresh referral\n", __func__); ++ cifs_dbg(FYI, "%s: no ipc session for refreshing referral\n", __func__); + return PTR_ERR(mg); + } + kref_get(&mg->refcount); +@@ -1498,9 +1519,12 @@ static void refresh_mounts(struct cifs_ses **sessions) + + spin_lock(&cifs_tcp_ses_lock); + list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { ++ if (!server->is_dfs_conn) ++ continue; ++ + list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { +- if (tcon->dfs_path) { ++ if (!tcon->ipc && !tcon->need_reconnect) { + tcon->tc_count++; + list_add_tail(&tcon->ulist, &tcons); + } +@@ -1510,8 +1534,16 @@ static void refresh_mounts(struct cifs_ses **sessions) + spin_unlock(&cifs_tcp_ses_lock); + + list_for_each_entry_safe(tcon, ntcon, &tcons, ulist) { ++ struct TCP_Server_Info *server = tcon->ses->server; ++ + list_del_init(&tcon->ulist); +- refresh_tcon(sessions, tcon, false); ++ ++ mutex_lock(&server->refpath_lock); ++ if (strcasecmp(server->leaf_fullpath, server->origin_fullpath)) ++ __refresh_tcon(server->leaf_fullpath + 1, sessions, tcon, false); ++ mutex_unlock(&server->refpath_lock); ++ ++ __refresh_tcon(server->origin_fullpath + 1, sessions, tcon, false); + cifs_put_tcon(tcon); + } + } diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 6e8e7cc26ae24..83c929dd6ed59 100644 --- a/fs/cifs/dir.c @@ -309846,7 +371675,7 @@ index 6e8e7cc26ae24..83c929dd6ed59 100644 tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb)); rc = PTR_ERR(tlink); diff --git a/fs/cifs/file.c b/fs/cifs/file.c -index 13f3182cf7969..aa422348824a1 100644 +index 13f3182cf7969..cca9ff01b30c2 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1806,11 +1806,13 @@ int cifs_flock(struct file *file, int cmd, struct file_lock *fl) @@ -309949,6 +371778,15 @@ index 13f3182cf7969..aa422348824a1 100644 return __cifs_writev(iocb, from, true); } +@@ -3584,7 +3613,7 @@ uncached_fill_pages(struct TCP_Server_Info *server, + rdata->got_bytes += result; + } + +- return rdata->got_bytes > 0 && result != -ECONNABORTED ? ++ return result != -ECONNABORTED && rdata->got_bytes > 0 ? + rdata->got_bytes : result; + } + @@ -3711,6 +3740,11 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, break; } @@ -309977,6 +371815,15 @@ index 13f3182cf7969..aa422348824a1 100644 /* grab a lock here due to read response handlers can access ctx */ mutex_lock(&ctx->aio_mutex); +@@ -4345,7 +4388,7 @@ readpages_fill_pages(struct TCP_Server_Info *server, + rdata->got_bytes += result; + } + +- return rdata->got_bytes > 0 && result != -ECONNABORTED ? ++ return result != -ECONNABORTED && rdata->got_bytes > 0 ? + rdata->got_bytes : result; + } + @@ -4489,6 +4532,11 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, break; } @@ -310154,8 +372001,30 @@ index a42ba71d7a81f..29601a4eb4116 100644 char *UNC; char *nodename; char *iocharset; /* local code page for mapping to and from Unicode */ +diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c +index 0359b604bdbc0..71883ba9e5677 100644 +--- a/fs/cifs/ioctl.c ++++ b/fs/cifs/ioctl.c +@@ -342,7 +342,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) + rc = put_user(ExtAttrBits & + FS_FL_USER_VISIBLE, + (int __user *)arg); +- if (rc != EOPNOTSUPP) ++ if (rc != -EOPNOTSUPP) + break; + } + #endif /* CONFIG_CIFS_POSIX */ +@@ -371,7 +371,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) + * pSMBFile->fid.netfid, + * extAttrBits, + * &ExtAttrMask); +- * if (rc != EOPNOTSUPP) ++ * if (rc != -EOPNOTSUPP) + * break; + */ + diff --git a/fs/cifs/link.c b/fs/cifs/link.c -index 852e54ee82c28..bbdf3281559c8 100644 +index 852e54ee82c28..4308b27ba3464 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -85,6 +85,9 @@ parse_mf_symlink(const u8 *buf, unsigned int buf_len, unsigned int *_link_len, @@ -310168,8 +372037,16 @@ index 852e54ee82c28..bbdf3281559c8 100644 rc = symlink_hash(link_len, link_str, md5_hash); if (rc) { cifs_dbg(FYI, "%s: MD5 hash failure: %d\n", __func__, rc); +@@ -456,6 +459,7 @@ smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, + oparms.disposition = FILE_CREATE; + oparms.fid = &fid; + oparms.reconnect = false; ++ oparms.mode = 0644; + + rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL, + NULL, NULL); diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c -index bb1185fff8cc4..699f676ded478 100644 +index bb1185fff8cc4..3a90ee314ed73 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -75,6 +75,7 @@ sesInfoAlloc(void) @@ -310180,7 +372057,17 @@ index bb1185fff8cc4..699f676ded478 100644 } return ret_buf; } -@@ -735,6 +736,8 @@ cifs_close_deferred_file(struct cifsInodeInfo *cifs_inode) +@@ -138,9 +139,6 @@ tconInfoFree(struct cifs_tcon *buf_to_free) + kfree(buf_to_free->nativeFileSystem); + kfree_sensitive(buf_to_free->password); + kfree(buf_to_free->crfid.fid); +-#ifdef CONFIG_CIFS_DFS_UPCALL +- kfree(buf_to_free->dfs_path); +-#endif + kfree(buf_to_free); + } + +@@ -735,6 +733,8 @@ cifs_close_deferred_file(struct cifsInodeInfo *cifs_inode) list_for_each_entry(cfile, &cifs_inode->openFileList, flist) { if (delayed_work_pending(&cfile->deferred)) { if (cancel_delayed_work(&cfile->deferred)) { @@ -310189,7 +372076,7 @@ index bb1185fff8cc4..699f676ded478 100644 tmp_list = kmalloc(sizeof(struct file_list), GFP_ATOMIC); if (tmp_list == NULL) break; -@@ -766,6 +769,8 @@ cifs_close_all_deferred_files(struct cifs_tcon *tcon) +@@ -766,6 +766,8 @@ cifs_close_all_deferred_files(struct cifs_tcon *tcon) cfile = list_entry(tmp, struct cifsFileInfo, tlist); if (delayed_work_pending(&cfile->deferred)) { if (cancel_delayed_work(&cfile->deferred)) { @@ -310198,7 +372085,7 @@ index bb1185fff8cc4..699f676ded478 100644 tmp_list = kmalloc(sizeof(struct file_list), GFP_ATOMIC); if (tmp_list == NULL) break; -@@ -801,6 +806,8 @@ cifs_close_deferred_file_under_dentry(struct cifs_tcon *tcon, const char *path) +@@ -801,6 +803,8 @@ cifs_close_deferred_file_under_dentry(struct cifs_tcon *tcon, const char *path) if (strstr(full_path, path)) { if (delayed_work_pending(&cfile->deferred)) { if (cancel_delayed_work(&cfile->deferred)) { @@ -310207,7 +372094,18 @@ index bb1185fff8cc4..699f676ded478 100644 tmp_list = kmalloc(sizeof(struct file_list), GFP_ATOMIC); if (tmp_list == NULL) break; -@@ -1211,18 +1218,23 @@ static struct super_block *__cifs_get_super(void (*f)(struct super_block *, void +@@ -1130,8 +1134,8 @@ cifs_free_hash(struct crypto_shash **shash, struct sdesc **sdesc) + * @len: Where to store the length for this page: + * @offset: Where to store the offset for this page + */ +-void rqst_page_get_length(struct smb_rqst *rqst, unsigned int page, +- unsigned int *len, unsigned int *offset) ++void rqst_page_get_length(const struct smb_rqst *rqst, unsigned int page, ++ unsigned int *len, unsigned int *offset) + { + *len = rqst->rq_pagesz; + *offset = (page == 0) ? rqst->rq_offset : 0; +@@ -1211,18 +1215,23 @@ static struct super_block *__cifs_get_super(void (*f)(struct super_block *, void .data = data, .sb = NULL, }; @@ -310242,6 +372140,81 @@ index bb1185fff8cc4..699f676ded478 100644 } static void __cifs_put_super(struct super_block *sb) +@@ -1287,69 +1296,20 @@ out: + return rc; + } + +-static void tcon_super_cb(struct super_block *sb, void *arg) +-{ +- struct super_cb_data *sd = arg; +- struct cifs_tcon *tcon = sd->data; +- struct cifs_sb_info *cifs_sb; +- +- if (sd->sb) +- return; +- +- cifs_sb = CIFS_SB(sb); +- if (tcon->dfs_path && cifs_sb->origin_fullpath && +- !strcasecmp(tcon->dfs_path, cifs_sb->origin_fullpath)) +- sd->sb = sb; +-} +- +-static inline struct super_block *cifs_get_tcon_super(struct cifs_tcon *tcon) +-{ +- return __cifs_get_super(tcon_super_cb, tcon); +-} +- +-static inline void cifs_put_tcon_super(struct super_block *sb) +-{ +- __cifs_put_super(sb); +-} +-#else +-static inline struct super_block *cifs_get_tcon_super(struct cifs_tcon *tcon) +-{ +- return ERR_PTR(-EOPNOTSUPP); +-} +- +-static inline void cifs_put_tcon_super(struct super_block *sb) ++int cifs_update_super_prepath(struct cifs_sb_info *cifs_sb, char *prefix) + { +-} +-#endif +- +-int update_super_prepath(struct cifs_tcon *tcon, char *prefix) +-{ +- struct super_block *sb; +- struct cifs_sb_info *cifs_sb; +- int rc = 0; +- +- sb = cifs_get_tcon_super(tcon); +- if (IS_ERR(sb)) +- return PTR_ERR(sb); +- +- cifs_sb = CIFS_SB(sb); +- + kfree(cifs_sb->prepath); + + if (prefix && *prefix) { + cifs_sb->prepath = kstrdup(prefix, GFP_ATOMIC); +- if (!cifs_sb->prepath) { +- rc = -ENOMEM; +- goto out; +- } ++ if (!cifs_sb->prepath) ++ return -ENOMEM; + + convert_delimiter(cifs_sb->prepath, CIFS_DIR_SEP(cifs_sb)); + } else + cifs_sb->prepath = NULL; + + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH; +- +-out: +- cifs_put_tcon_super(sb); +- return rc; ++ return 0; + } ++#endif diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 23e02db7923f6..0fbd0f78f361b 100644 --- a/fs/cifs/sess.c @@ -310398,7 +372371,7 @@ index 8297703492eea..f3e49ef457db9 100644 flags, num_rqst - 2, &rqst[1], &resp_buftype[1], diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c -index bda606dc72b1f..2d31860d56e96 100644 +index bda606dc72b1f..817d78129bd2e 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -745,8 +745,8 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, @@ -310442,7 +372415,25 @@ index bda606dc72b1f..2d31860d56e96 100644 if (ea_name) { if (ea_name_len == name_len && memcmp(ea_name, name, name_len) == 0) { -@@ -1631,6 +1635,7 @@ smb2_ioctl_query_info(const unsigned int xid, +@@ -1357,6 +1361,8 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, + COMPOUND_FID, current->tgid, + FILE_FULL_EA_INFORMATION, + SMB2_O_INFO_FILE, 0, data, size); ++ if (rc) ++ goto sea_exit; + smb2_set_next_command(tcon, &rqst[1]); + smb2_set_related(&rqst[1]); + +@@ -1367,6 +1373,8 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, + rqst[2].rq_nvec = 1; + rc = SMB2_close_init(tcon, server, + &rqst[2], COMPOUND_FID, COMPOUND_FID, false); ++ if (rc) ++ goto sea_exit; + smb2_set_related(&rqst[2]); + + rc = compound_send_recv(xid, ses, server, +@@ -1631,6 +1639,7 @@ smb2_ioctl_query_info(const unsigned int xid, unsigned int size[2]; void *data[2]; int create_options = is_dir ? CREATE_NOT_FILE : CREATE_NOT_DIR; @@ -310450,7 +372441,7 @@ index bda606dc72b1f..2d31860d56e96 100644 vars = kzalloc(sizeof(*vars), GFP_ATOMIC); if (vars == NULL) -@@ -1640,27 +1645,29 @@ smb2_ioctl_query_info(const unsigned int xid, +@@ -1640,27 +1649,29 @@ smb2_ioctl_query_info(const unsigned int xid, resp_buftype[0] = resp_buftype[1] = resp_buftype[2] = CIFS_NO_BUFFER; @@ -310492,7 +372483,7 @@ index bda606dc72b1f..2d31860d56e96 100644 } /* Open */ -@@ -1698,45 +1705,45 @@ smb2_ioctl_query_info(const unsigned int xid, +@@ -1698,45 +1709,45 @@ smb2_ioctl_query_info(const unsigned int xid, rc = SMB2_open_init(tcon, server, &rqst[0], &oplock, &oparms, path); if (rc) @@ -310547,11 +372538,11 @@ index bda606dc72b1f..2d31860d56e96 100644 - FILE_END_OF_FILE_INFORMATION, - SMB2_O_INFO_FILE, 0, data, size); + goto free_open_req; - } ++ } + if (qi.output_buffer_length < 8) { + rc = -EINVAL; + goto free_open_req; -+ } + } + rqst[1].rq_iov = &vars->si_iov[0]; + rqst[1].rq_nvec = 1; + @@ -310566,7 +372557,7 @@ index bda606dc72b1f..2d31860d56e96 100644 } else if (qi.flags == PASSTHRU_QUERY_INFO) { rqst[1].rq_iov = &vars->qi_iov[0]; rqst[1].rq_nvec = 1; -@@ -1747,6 +1754,7 @@ smb2_ioctl_query_info(const unsigned int xid, +@@ -1747,6 +1758,7 @@ smb2_ioctl_query_info(const unsigned int xid, qi.info_type, qi.additional_information, qi.input_buffer_length, qi.output_buffer_length, buffer); @@ -310574,7 +372565,7 @@ index bda606dc72b1f..2d31860d56e96 100644 } else { /* unknown flags */ cifs_tcon_dbg(VFS, "Invalid passthru query flags: 0x%x\n", qi.flags); -@@ -1754,7 +1762,7 @@ smb2_ioctl_query_info(const unsigned int xid, +@@ -1754,7 +1766,7 @@ smb2_ioctl_query_info(const unsigned int xid, } if (rc) @@ -310583,7 +372574,7 @@ index bda606dc72b1f..2d31860d56e96 100644 smb2_set_next_command(tcon, &rqst[1]); smb2_set_related(&rqst[1]); -@@ -1765,14 +1773,14 @@ smb2_ioctl_query_info(const unsigned int xid, +@@ -1765,14 +1777,14 @@ smb2_ioctl_query_info(const unsigned int xid, rc = SMB2_close_init(tcon, server, &rqst[2], COMPOUND_FID, COMPOUND_FID, false); if (rc) @@ -310600,7 +372591,7 @@ index bda606dc72b1f..2d31860d56e96 100644 /* No need to bump num_remote_opens since handle immediately closed */ if (qi.flags & PASSTHRU_FSCTL) { -@@ -1782,18 +1790,22 @@ smb2_ioctl_query_info(const unsigned int xid, +@@ -1782,18 +1794,22 @@ smb2_ioctl_query_info(const unsigned int xid, qi.input_buffer_length = le32_to_cpu(io_rsp->OutputCount); if (qi.input_buffer_length > 0 && le32_to_cpu(io_rsp->OutputOffset) + qi.input_buffer_length @@ -310628,7 +372619,7 @@ index bda606dc72b1f..2d31860d56e96 100644 } else { pqi = (struct smb_query_info __user *)arg; qi_rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base; -@@ -1801,28 +1813,30 @@ smb2_ioctl_query_info(const unsigned int xid, +@@ -1801,28 +1817,30 @@ smb2_ioctl_query_info(const unsigned int xid, qi.input_buffer_length = le32_to_cpu(qi_rsp->OutputBufferLength); if (copy_to_user(&pqi->input_buffer_length, &qi.input_buffer_length, @@ -310671,7 +372662,7 @@ index bda606dc72b1f..2d31860d56e96 100644 } static ssize_t -@@ -1839,9 +1853,17 @@ smb2_copychunk_range(const unsigned int xid, +@@ -1839,9 +1857,17 @@ smb2_copychunk_range(const unsigned int xid, int chunks_copied = 0; bool chunk_sizes_updated = false; ssize_t bytes_written, total_bytes_written = 0; @@ -310689,7 +372680,33 @@ index bda606dc72b1f..2d31860d56e96 100644 if (pcchunk == NULL) return -ENOMEM; -@@ -3577,7 +3599,7 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, +@@ -2843,6 +2869,7 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, + struct fsctl_get_dfs_referral_req *dfs_req = NULL; + struct get_dfs_referral_rsp *dfs_rsp = NULL; + u32 dfs_req_size = 0, dfs_rsp_size = 0; ++ int retry_count = 0; + + cifs_dbg(FYI, "%s: path: %s\n", __func__, search_name); + +@@ -2894,11 +2921,14 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, + true /* is_fsctl */, + (char *)dfs_req, dfs_req_size, CIFSMaxBufSize, + (char **)&dfs_rsp, &dfs_rsp_size); +- } while (rc == -EAGAIN); ++ if (!is_retryable_error(rc)) ++ break; ++ usleep_range(512, 2048); ++ } while (++retry_count < 5); + + if (rc) { +- if ((rc != -ENOENT) && (rc != -EOPNOTSUPP)) +- cifs_tcon_dbg(VFS, "ioctl error in %s rc=%d\n", __func__, rc); ++ if (!is_retryable_error(rc) && rc != -ENOENT && rc != -EOPNOTSUPP) ++ cifs_tcon_dbg(VFS, "%s: ioctl error: rc=%d\n", __func__, rc); + goto out; + } + +@@ -3577,7 +3607,7 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, loff_t offset, loff_t len) { @@ -310698,7 +372715,7 @@ index bda606dc72b1f..2d31860d56e96 100644 struct cifsFileInfo *cfile = file->private_data; struct file_zero_data_information fsctl_buf; long rc; -@@ -3586,14 +3608,12 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, +@@ -3586,14 +3616,12 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, xid = get_xid(); @@ -310715,7 +372732,7 @@ index bda606dc72b1f..2d31860d56e96 100644 } filemap_invalidate_lock(inode->i_mapping); -@@ -3613,8 +3633,10 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, +@@ -3613,8 +3641,10 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, true /* is_fctl */, (char *)&fsctl_buf, sizeof(struct file_zero_data_information), CIFSMaxBufSize, NULL, NULL); @@ -310727,7 +372744,7 @@ index bda606dc72b1f..2d31860d56e96 100644 return rc; } -@@ -3773,7 +3795,7 @@ static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon, +@@ -3773,7 +3803,7 @@ static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon, if (rc) goto out; @@ -310736,7 +372753,7 @@ index bda606dc72b1f..2d31860d56e96 100644 smb2_set_sparse(xid, tcon, cfile, inode, false); eof = cpu_to_le64(off + len); -@@ -4250,11 +4272,13 @@ smb3_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, +@@ -4250,11 +4280,13 @@ smb3_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, } } @@ -310750,7 +372767,209 @@ index bda606dc72b1f..2d31860d56e96 100644 static bool smb21_is_read_op(__u32 oplock) -@@ -5350,7 +5374,7 @@ out: +@@ -4384,69 +4416,82 @@ fill_transform_hdr(struct smb2_transform_hdr *tr_hdr, unsigned int orig_len, + memcpy(&tr_hdr->SessionId, &shdr->SessionId, 8); + } + +-/* We can not use the normal sg_set_buf() as we will sometimes pass a +- * stack object as buf. +- */ +-static inline void smb2_sg_set_buf(struct scatterlist *sg, const void *buf, +- unsigned int buflen) ++static void *smb2_aead_req_alloc(struct crypto_aead *tfm, const struct smb_rqst *rqst, ++ int num_rqst, const u8 *sig, u8 **iv, ++ struct aead_request **req, struct scatterlist **sgl, ++ unsigned int *num_sgs) + { +- void *addr; +- /* +- * VMAP_STACK (at least) puts stack into the vmalloc address space +- */ +- if (is_vmalloc_addr(buf)) +- addr = vmalloc_to_page(buf); +- else +- addr = virt_to_page(buf); +- sg_set_page(sg, addr, buflen, offset_in_page(buf)); ++ unsigned int req_size = sizeof(**req) + crypto_aead_reqsize(tfm); ++ unsigned int iv_size = crypto_aead_ivsize(tfm); ++ unsigned int len; ++ u8 *p; ++ ++ *num_sgs = cifs_get_num_sgs(rqst, num_rqst, sig); ++ ++ len = iv_size; ++ len += crypto_aead_alignmask(tfm) & ~(crypto_tfm_ctx_alignment() - 1); ++ len = ALIGN(len, crypto_tfm_ctx_alignment()); ++ len += req_size; ++ len = ALIGN(len, __alignof__(struct scatterlist)); ++ len += *num_sgs * sizeof(**sgl); ++ ++ p = kmalloc(len, GFP_ATOMIC); ++ if (!p) ++ return NULL; ++ ++ *iv = (u8 *)PTR_ALIGN(p, crypto_aead_alignmask(tfm) + 1); ++ *req = (struct aead_request *)PTR_ALIGN(*iv + iv_size, ++ crypto_tfm_ctx_alignment()); ++ *sgl = (struct scatterlist *)PTR_ALIGN((u8 *)*req + req_size, ++ __alignof__(struct scatterlist)); ++ return p; + } + +-/* Assumes the first rqst has a transform header as the first iov. +- * I.e. +- * rqst[0].rq_iov[0] is transform header +- * rqst[0].rq_iov[1+] data to be encrypted/decrypted +- * rqst[1+].rq_iov[0+] data to be encrypted/decrypted +- */ +-static struct scatterlist * +-init_sg(int num_rqst, struct smb_rqst *rqst, u8 *sign) ++static void *smb2_get_aead_req(struct crypto_aead *tfm, const struct smb_rqst *rqst, ++ int num_rqst, const u8 *sig, u8 **iv, ++ struct aead_request **req, struct scatterlist **sgl) + { +- unsigned int sg_len; ++ unsigned int off, len, skip; + struct scatterlist *sg; +- unsigned int i; +- unsigned int j; +- unsigned int idx = 0; +- int skip; +- +- sg_len = 1; +- for (i = 0; i < num_rqst; i++) +- sg_len += rqst[i].rq_nvec + rqst[i].rq_npages; ++ unsigned int num_sgs; ++ unsigned long addr; ++ int i, j; ++ void *p; + +- sg = kmalloc_array(sg_len, sizeof(struct scatterlist), GFP_KERNEL); +- if (!sg) ++ p = smb2_aead_req_alloc(tfm, rqst, num_rqst, sig, iv, req, sgl, &num_sgs); ++ if (!p) + return NULL; + +- sg_init_table(sg, sg_len); ++ sg_init_table(*sgl, num_sgs); ++ sg = *sgl; ++ ++ /* Assumes the first rqst has a transform header as the first iov. ++ * I.e. ++ * rqst[0].rq_iov[0] is transform header ++ * rqst[0].rq_iov[1+] data to be encrypted/decrypted ++ * rqst[1+].rq_iov[0+] data to be encrypted/decrypted ++ */ + for (i = 0; i < num_rqst; i++) { ++ /* ++ * The first rqst has a transform header where the ++ * first 20 bytes are not part of the encrypted blob. ++ */ + for (j = 0; j < rqst[i].rq_nvec; j++) { +- /* +- * The first rqst has a transform header where the +- * first 20 bytes are not part of the encrypted blob +- */ +- skip = (i == 0) && (j == 0) ? 20 : 0; +- smb2_sg_set_buf(&sg[idx++], +- rqst[i].rq_iov[j].iov_base + skip, +- rqst[i].rq_iov[j].iov_len - skip); +- } ++ struct kvec *iov = &rqst[i].rq_iov[j]; + ++ skip = (i == 0) && (j == 0) ? 20 : 0; ++ addr = (unsigned long)iov->iov_base + skip; ++ len = iov->iov_len - skip; ++ sg = cifs_sg_set_buf(sg, (void *)addr, len); ++ } + for (j = 0; j < rqst[i].rq_npages; j++) { +- unsigned int len, offset; +- +- rqst_page_get_length(&rqst[i], j, &len, &offset); +- sg_set_page(&sg[idx++], rqst[i].rq_pages[j], len, offset); ++ rqst_page_get_length(&rqst[i], j, &len, &off); ++ sg_set_page(sg++, rqst[i].rq_pages[j], len, off); + } + } +- smb2_sg_set_buf(&sg[idx], sign, SMB2_SIGNATURE_SIZE); +- return sg; ++ cifs_sg_set_buf(sg, sig, SMB2_SIGNATURE_SIZE); ++ ++ return p; + } + + static int +@@ -4490,11 +4535,11 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, + u8 sign[SMB2_SIGNATURE_SIZE] = {}; + u8 key[SMB3_ENC_DEC_KEY_SIZE]; + struct aead_request *req; +- char *iv; +- unsigned int iv_len; ++ u8 *iv; + DECLARE_CRYPTO_WAIT(wait); + struct crypto_aead *tfm; + unsigned int crypt_len = le32_to_cpu(tr_hdr->OriginalMessageSize); ++ void *creq; + + rc = smb2_get_enc_key(server, tr_hdr->SessionId, enc, key); + if (rc) { +@@ -4529,32 +4574,15 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, + return rc; + } + +- req = aead_request_alloc(tfm, GFP_KERNEL); +- if (!req) { +- cifs_server_dbg(VFS, "%s: Failed to alloc aead request\n", __func__); ++ creq = smb2_get_aead_req(tfm, rqst, num_rqst, sign, &iv, &req, &sg); ++ if (unlikely(!creq)) + return -ENOMEM; +- } + + if (!enc) { + memcpy(sign, &tr_hdr->Signature, SMB2_SIGNATURE_SIZE); + crypt_len += SMB2_SIGNATURE_SIZE; + } + +- sg = init_sg(num_rqst, rqst, sign); +- if (!sg) { +- cifs_server_dbg(VFS, "%s: Failed to init sg\n", __func__); +- rc = -ENOMEM; +- goto free_req; +- } +- +- iv_len = crypto_aead_ivsize(tfm); +- iv = kzalloc(iv_len, GFP_KERNEL); +- if (!iv) { +- cifs_server_dbg(VFS, "%s: Failed to alloc iv\n", __func__); +- rc = -ENOMEM; +- goto free_sg; +- } +- + if ((server->cipher_type == SMB2_ENCRYPTION_AES128_GCM) || + (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM)) + memcpy(iv, (char *)tr_hdr->Nonce, SMB3_AES_GCM_NONCE); +@@ -4563,6 +4591,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, + memcpy(iv + 1, (char *)tr_hdr->Nonce, SMB3_AES_CCM_NONCE); + } + ++ aead_request_set_tfm(req, tfm); + aead_request_set_crypt(req, sg, sg, crypt_len, iv); + aead_request_set_ad(req, assoc_data_len); + +@@ -4575,11 +4604,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, + if (!rc && enc) + memcpy(&tr_hdr->Signature, sign, SMB2_SIGNATURE_SIZE); + +- kfree(iv); +-free_sg: +- kfree(sg); +-free_req: +- kfree(req); ++ kfree_sensitive(creq); + return rc; + } + +@@ -5350,7 +5375,7 @@ out: return rc; } @@ -310759,7 +372978,7 @@ index bda606dc72b1f..2d31860d56e96 100644 struct smb_version_operations smb20_operations = { .compare_fids = smb2_compare_fids, .setup_request = smb2_setup_request, -@@ -5449,6 +5473,7 @@ struct smb_version_operations smb20_operations = { +@@ -5449,6 +5474,7 @@ struct smb_version_operations smb20_operations = { .is_status_io_timeout = smb2_is_status_io_timeout, .is_network_name_deleted = smb2_is_network_name_deleted, }; @@ -310767,7 +372986,7 @@ index bda606dc72b1f..2d31860d56e96 100644 struct smb_version_operations smb21_operations = { .compare_fids = smb2_compare_fids, -@@ -5780,6 +5805,7 @@ struct smb_version_operations smb311_operations = { +@@ -5780,6 +5806,7 @@ struct smb_version_operations smb311_operations = { .is_network_name_deleted = smb2_is_network_name_deleted, }; @@ -310775,7 +372994,7 @@ index bda606dc72b1f..2d31860d56e96 100644 struct smb_version_values smb20_values = { .version_string = SMB20_VERSION_STRING, .protocol_id = SMB20_PROT_ID, -@@ -5800,6 +5826,7 @@ struct smb_version_values smb20_values = { +@@ -5800,6 +5827,7 @@ struct smb_version_values smb20_values = { .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, .create_lease_size = sizeof(struct create_lease), }; @@ -310784,10 +373003,23 @@ index bda606dc72b1f..2d31860d56e96 100644 struct smb_version_values smb21_values = { .version_string = SMB21_VERSION_STRING, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c -index 7829c590eeac6..8aa0372141f5e 100644 +index 7829c590eeac6..f51fea2e808d1 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c -@@ -268,6 +268,9 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, +@@ -156,7 +156,11 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, + if (tcon == NULL) + return 0; + +- if (smb2_command == SMB2_TREE_CONNECT) ++ /* ++ * Need to also skip SMB2_IOCTL because it is used for checking nested dfs links in ++ * cifs_tree_connect(). ++ */ ++ if (smb2_command == SMB2_TREE_CONNECT || smb2_command == SMB2_IOCTL) + return 0; + + if (tcon->tidStatus == CifsExiting) { +@@ -268,6 +272,9 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, ses->binding_chan = NULL; mutex_unlock(&tcon->ses->session_mutex); goto failed; @@ -310797,7 +373029,7 @@ index 7829c590eeac6..8aa0372141f5e 100644 } } /* -@@ -930,16 +933,17 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) +@@ -930,16 +937,17 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) } else if (rc != 0) goto neg_exit; @@ -310817,7 +373049,7 @@ index 7829c590eeac6..8aa0372141f5e 100644 } else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID)) { /* ops set to 3.0 by default for default so update */ server->ops = &smb311_operations; -@@ -950,7 +954,7 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) +@@ -950,7 +958,7 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) { cifs_server_dbg(VFS, "SMB2 dialect returned but not requested\n"); @@ -310826,7 +373058,7 @@ index 7829c590eeac6..8aa0372141f5e 100644 } else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) { /* ops set to 3.0 by default for default so update */ server->ops = &smb21_operations; -@@ -964,7 +968,7 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) +@@ -964,7 +972,7 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) /* if requested single dialect ensure returned dialect matched */ cifs_server_dbg(VFS, "Invalid 0x%x dialect returned: not requested\n", le16_to_cpu(rsp->DialectRevision)); @@ -310835,7 +373067,7 @@ index 7829c590eeac6..8aa0372141f5e 100644 } cifs_dbg(FYI, "mode 0x%x\n", rsp->SecurityMode); -@@ -982,9 +986,10 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) +@@ -982,9 +990,10 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) else { cifs_server_dbg(VFS, "Invalid dialect returned by server 0x%x\n", le16_to_cpu(rsp->DialectRevision)); @@ -310847,7 +373079,7 @@ index 7829c590eeac6..8aa0372141f5e 100644 server->dialect = le16_to_cpu(rsp->DialectRevision); /* -@@ -1132,9 +1137,9 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) +@@ -1132,9 +1141,9 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) pneg_inbuf->Dialects[0] = cpu_to_le16(server->vals->protocol_id); pneg_inbuf->DialectCount = cpu_to_le16(1); @@ -310859,7 +373091,7 @@ index 7829c590eeac6..8aa0372141f5e 100644 } rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID, -@@ -2349,7 +2354,7 @@ create_sd_buf(umode_t mode, bool set_owner, unsigned int *len) +@@ -2349,7 +2358,7 @@ create_sd_buf(umode_t mode, bool set_owner, unsigned int *len) unsigned int acelen, acl_size, ace_count; unsigned int owner_offset = 0; unsigned int group_offset = 0; @@ -310868,7 +373100,7 @@ index 7829c590eeac6..8aa0372141f5e 100644 *len = roundup(sizeof(struct crt_sd_ctxt) + (sizeof(struct cifs_ace) * 4), 8); -@@ -2422,6 +2427,7 @@ create_sd_buf(umode_t mode, bool set_owner, unsigned int *len) +@@ -2422,6 +2431,7 @@ create_sd_buf(umode_t mode, bool set_owner, unsigned int *len) acl.AclRevision = ACL_REVISION; /* See 2.4.4.1 of MS-DTYP */ acl.AclSize = cpu_to_le16(acl_size); acl.AceCount = cpu_to_le16(ace_count); @@ -310876,6 +373108,28 @@ index 7829c590eeac6..8aa0372141f5e 100644 memcpy(aclptr, &acl, sizeof(struct smb3_acl)); buf->ccontext.DataLength = cpu_to_le32(ptr - (__u8 *)&buf->sd); +@@ -3989,12 +3999,15 @@ smb2_readv_callback(struct mid_q_entry *mid) + (struct smb2_sync_hdr *)rdata->iov[0].iov_base; + struct cifs_credits credits = { .value = 0, .instance = 0 }; + struct smb_rqst rqst = { .rq_iov = &rdata->iov[1], +- .rq_nvec = 1, +- .rq_pages = rdata->pages, +- .rq_offset = rdata->page_offset, +- .rq_npages = rdata->nr_pages, +- .rq_pagesz = rdata->pagesz, +- .rq_tailsz = rdata->tailsz }; ++ .rq_nvec = 1, }; ++ ++ if (rdata->got_bytes) { ++ rqst.rq_pages = rdata->pages; ++ rqst.rq_offset = rdata->page_offset; ++ rqst.rq_npages = rdata->nr_pages; ++ rqst.rq_pagesz = rdata->pagesz; ++ rqst.rq_tailsz = rdata->tailsz; ++ } + + WARN_ONCE(rdata->server != mid->server, + "rdata server %p != mid server %p", diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index f59b956f9d250..390cc5e8c7467 100644 --- a/fs/cifs/smb2transport.c @@ -310905,6 +373159,18 @@ index f59b956f9d250..390cc5e8c7467 100644 if (allocate_crypto) { rc = cifs_alloc_hash("cmac(aes)", &hash, &sdesc); +diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c +index 31ef64eb7fbb9..cb93cccbf0c41 100644 +--- a/fs/cifs/smbdirect.c ++++ b/fs/cifs/smbdirect.c +@@ -1405,6 +1405,7 @@ void smbd_destroy(struct TCP_Server_Info *server) + destroy_workqueue(info->workqueue); + log_rdma_event(INFO, "rdma session destroyed\n"); + kfree(info); ++ server->smbd_conn = NULL; + } + + /* diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index b7379329b741c..514056605fa7a 100644 --- a/fs/cifs/transport.c @@ -310966,7 +373232,7 @@ index 7d8b72d67c803..9d486fbbfbbde 100644 break; case XATTR_CIFS_ACL: diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c -index 1466b5d01cbb9..d1f9d26322027 100644 +index 1466b5d01cbb9..ec6519e1ca3bf 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -34,6 +34,14 @@ @@ -310984,7 +373250,23 @@ index 1466b5d01cbb9..d1f9d26322027 100644 static void configfs_d_iput(struct dentry * dentry, struct inode * inode) { -@@ -1780,8 +1788,8 @@ void configfs_unregister_group(struct config_group *group) +@@ -308,6 +316,7 @@ static int configfs_create_dir(struct config_item *item, struct dentry *dentry, + return 0; + + out_remove: ++ configfs_put(dentry->d_fsdata); + configfs_remove_dirent(dentry); + return PTR_ERR(inode); + } +@@ -374,6 +383,7 @@ int configfs_create_link(struct configfs_dirent *target, struct dentry *parent, + return 0; + + out_remove: ++ configfs_put(dentry->d_fsdata); + configfs_remove_dirent(dentry); + return PTR_ERR(inode); + } +@@ -1780,8 +1790,8 @@ void configfs_unregister_group(struct config_group *group) configfs_detach_group(&group->cg_item); d_inode(dentry)->i_flags |= S_DEAD; dont_mount(dentry); @@ -310994,7 +373276,7 @@ index 1466b5d01cbb9..d1f9d26322027 100644 inode_unlock(d_inode(parent)); dput(dentry); -@@ -1859,7 +1867,9 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) +@@ -1859,7 +1869,9 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) group->cg_item.ci_name = group->cg_item.ci_namebuf; sd = root->d_fsdata; @@ -311004,7 +373286,7 @@ index 1466b5d01cbb9..d1f9d26322027 100644 inode_lock_nested(d_inode(root), I_MUTEX_PARENT); -@@ -1884,7 +1894,9 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) +@@ -1884,7 +1896,9 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) inode_unlock(d_inode(root)); if (err) { @@ -311014,7 +373296,7 @@ index 1466b5d01cbb9..d1f9d26322027 100644 configfs_release_fs(); } put_fragment(frag); -@@ -1922,16 +1934,18 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys) +@@ -1922,16 +1936,18 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys) configfs_detach_group(&group->cg_item); d_inode(dentry)->i_flags |= S_DEAD; dont_mount(dentry); @@ -311215,10 +373497,126 @@ index 3224dee44d30e..26eb5a095832f 100644 + return true; } diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h -index 3fa965eb3336d..cb25ef0cdf1f3 100644 +index 3fa965eb3336d..373c434b375c0 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h -@@ -549,8 +549,9 @@ int __init fscrypt_init_keyring(void); +@@ -220,7 +220,7 @@ struct fscrypt_info { + * will be NULL if the master key was found in a process-subscribed + * keyring rather than in the filesystem-level keyring. + */ +- struct key *ci_master_key; ++ struct fscrypt_master_key *ci_master_key; + + /* + * Link in list of inodes that were unlocked with the master key. +@@ -430,6 +430,40 @@ struct fscrypt_master_key_secret { + */ + struct fscrypt_master_key { + ++ /* ++ * Back-pointer to the super_block of the filesystem to which this ++ * master key has been added. Only valid if ->mk_active_refs > 0. ++ */ ++ struct super_block *mk_sb; ++ ++ /* ++ * Link in ->mk_sb->s_master_keys->key_hashtable. ++ * Only valid if ->mk_active_refs > 0. ++ */ ++ struct hlist_node mk_node; ++ ++ /* Semaphore that protects ->mk_secret and ->mk_users */ ++ struct rw_semaphore mk_sem; ++ ++ /* ++ * Active and structural reference counts. An active ref guarantees ++ * that the struct continues to exist, continues to be in the keyring ++ * ->mk_sb->s_master_keys, and that any embedded subkeys (e.g. ++ * ->mk_direct_keys) that have been prepared continue to exist. ++ * A structural ref only guarantees that the struct continues to exist. ++ * ++ * There is one active ref associated with ->mk_secret being present, ++ * and one active ref for each inode in ->mk_decrypted_inodes. ++ * ++ * There is one structural ref associated with the active refcount being ++ * nonzero. Finding a key in the keyring also takes a structural ref, ++ * which is then held temporarily while the key is operated on. ++ */ ++ refcount_t mk_active_refs; ++ refcount_t mk_struct_refs; ++ ++ struct rcu_head mk_rcu_head; ++ + /* + * The secret key material. After FS_IOC_REMOVE_ENCRYPTION_KEY is + * executed, this is wiped and no new inodes can be unlocked with this +@@ -438,7 +472,10 @@ struct fscrypt_master_key { + * FS_IOC_REMOVE_ENCRYPTION_KEY can be retried, or + * FS_IOC_ADD_ENCRYPTION_KEY can add the secret again. + * +- * Locking: protected by this master key's key->sem. ++ * While ->mk_secret is present, one ref in ->mk_active_refs is held. ++ * ++ * Locking: protected by ->mk_sem. The manipulation of ->mk_active_refs ++ * associated with this field is protected by ->mk_sem as well. + */ + struct fscrypt_master_key_secret mk_secret; + +@@ -459,22 +496,12 @@ struct fscrypt_master_key { + * + * This is NULL for v1 policy keys; those can only be added by root. + * +- * Locking: in addition to this keyring's own semaphore, this is +- * protected by this master key's key->sem, so we can do atomic +- * search+insert. It can also be searched without taking any locks, but +- * in that case the returned key may have already been removed. ++ * Locking: protected by ->mk_sem. (We don't just rely on the keyrings ++ * subsystem semaphore ->mk_users->sem, as we need support for atomic ++ * search+insert along with proper synchronization with ->mk_secret.) + */ + struct key *mk_users; + +- /* +- * Length of ->mk_decrypted_inodes, plus one if mk_secret is present. +- * Once this goes to 0, the master key is removed from ->s_master_keys. +- * The 'struct fscrypt_master_key' will continue to live as long as the +- * 'struct key' whose payload it is, but we won't let this reference +- * count rise again. +- */ +- refcount_t mk_refcount; +- + /* + * List of inodes that were unlocked using this key. This allows the + * inodes to be evicted efficiently if the key is removed. +@@ -500,10 +527,10 @@ static inline bool + is_master_key_secret_present(const struct fscrypt_master_key_secret *secret) + { + /* +- * The READ_ONCE() is only necessary for fscrypt_drop_inode() and +- * fscrypt_key_describe(). These run in atomic context, so they can't +- * take the key semaphore and thus 'secret' can change concurrently +- * which would be a data race. But they only need to know whether the ++ * The READ_ONCE() is only necessary for fscrypt_drop_inode(). ++ * fscrypt_drop_inode() runs in atomic context, so it can't take the key ++ * semaphore and thus 'secret' can change concurrently which would be a ++ * data race. But fscrypt_drop_inode() only need to know whether the + * secret *was* present at the time of check, so READ_ONCE() suffices. + */ + return READ_ONCE(secret->size) != 0; +@@ -532,7 +559,11 @@ static inline int master_key_spec_len(const struct fscrypt_key_specifier *spec) + return 0; + } + +-struct key * ++void fscrypt_put_master_key(struct fscrypt_master_key *mk); ++ ++void fscrypt_put_master_key_activeref(struct fscrypt_master_key *mk); ++ ++struct fscrypt_master_key * + fscrypt_find_master_key(struct super_block *sb, + const struct fscrypt_key_specifier *mk_spec); + +@@ -549,8 +580,9 @@ int __init fscrypt_init_keyring(void); struct fscrypt_mode { const char *friendly_name; const char *cipher_str; @@ -311252,11 +373650,844 @@ index e0ec210555053..7607d18b35fc0 100644 */ #define HKDF_HMAC_ALG "hmac(sha512)" #define HKDF_HASHLEN SHA512_DIGEST_SIZE +diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c +index af74599ae1cf0..be5c650e49573 100644 +--- a/fs/crypto/hooks.c ++++ b/fs/crypto/hooks.c +@@ -5,8 +5,6 @@ + * Encryption hooks for higher-level filesystem operations. + */ + +-#include <linux/key.h> +- + #include "fscrypt_private.h" + + /** +@@ -142,7 +140,6 @@ int fscrypt_prepare_setflags(struct inode *inode, + unsigned int oldflags, unsigned int flags) + { + struct fscrypt_info *ci; +- struct key *key; + struct fscrypt_master_key *mk; + int err; + +@@ -158,14 +155,13 @@ int fscrypt_prepare_setflags(struct inode *inode, + ci = inode->i_crypt_info; + if (ci->ci_policy.version != FSCRYPT_POLICY_V2) + return -EINVAL; +- key = ci->ci_master_key; +- mk = key->payload.data[0]; +- down_read(&key->sem); ++ mk = ci->ci_master_key; ++ down_read(&mk->mk_sem); + if (is_master_key_secret_present(&mk->mk_secret)) + err = fscrypt_derive_dirhash_key(ci, mk); + else + err = -ENOKEY; +- up_read(&key->sem); ++ up_read(&mk->mk_sem); + return err; + } + return 0; +diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c +index 0b3ffbb4faf4a..02f8bf8bd54da 100644 +--- a/fs/crypto/keyring.c ++++ b/fs/crypto/keyring.c +@@ -18,6 +18,7 @@ + * information about these ioctls. + */ + ++#include <asm/unaligned.h> + #include <crypto/skcipher.h> + #include <linux/key-type.h> + #include <linux/random.h> +@@ -25,6 +26,18 @@ + + #include "fscrypt_private.h" + ++/* The master encryption keys for a filesystem (->s_master_keys) */ ++struct fscrypt_keyring { ++ /* ++ * Lock that protects ->key_hashtable. It does *not* protect the ++ * fscrypt_master_key structs themselves. ++ */ ++ spinlock_t lock; ++ ++ /* Hash table that maps fscrypt_key_specifier to fscrypt_master_key */ ++ struct hlist_head key_hashtable[128]; ++}; ++ + static void wipe_master_key_secret(struct fscrypt_master_key_secret *secret) + { + fscrypt_destroy_hkdf(&secret->hkdf); +@@ -38,20 +51,70 @@ static void move_master_key_secret(struct fscrypt_master_key_secret *dst, + memzero_explicit(src, sizeof(*src)); + } + +-static void free_master_key(struct fscrypt_master_key *mk) ++static void fscrypt_free_master_key(struct rcu_head *head) ++{ ++ struct fscrypt_master_key *mk = ++ container_of(head, struct fscrypt_master_key, mk_rcu_head); ++ /* ++ * The master key secret and any embedded subkeys should have already ++ * been wiped when the last active reference to the fscrypt_master_key ++ * struct was dropped; doing it here would be unnecessarily late. ++ * Nevertheless, use kfree_sensitive() in case anything was missed. ++ */ ++ kfree_sensitive(mk); ++} ++ ++void fscrypt_put_master_key(struct fscrypt_master_key *mk) ++{ ++ if (!refcount_dec_and_test(&mk->mk_struct_refs)) ++ return; ++ /* ++ * No structural references left, so free ->mk_users, and also free the ++ * fscrypt_master_key struct itself after an RCU grace period ensures ++ * that concurrent keyring lookups can no longer find it. ++ */ ++ WARN_ON(refcount_read(&mk->mk_active_refs) != 0); ++ key_put(mk->mk_users); ++ mk->mk_users = NULL; ++ call_rcu(&mk->mk_rcu_head, fscrypt_free_master_key); ++} ++ ++void fscrypt_put_master_key_activeref(struct fscrypt_master_key *mk) + { ++ struct super_block *sb = mk->mk_sb; ++ struct fscrypt_keyring *keyring = sb->s_master_keys; + size_t i; + +- wipe_master_key_secret(&mk->mk_secret); ++ if (!refcount_dec_and_test(&mk->mk_active_refs)) ++ return; ++ /* ++ * No active references left, so complete the full removal of this ++ * fscrypt_master_key struct by removing it from the keyring and ++ * destroying any subkeys embedded in it. ++ */ ++ ++ spin_lock(&keyring->lock); ++ hlist_del_rcu(&mk->mk_node); ++ spin_unlock(&keyring->lock); ++ ++ /* ++ * ->mk_active_refs == 0 implies that ->mk_secret is not present and ++ * that ->mk_decrypted_inodes is empty. ++ */ ++ WARN_ON(is_master_key_secret_present(&mk->mk_secret)); ++ WARN_ON(!list_empty(&mk->mk_decrypted_inodes)); + + for (i = 0; i <= FSCRYPT_MODE_MAX; i++) { + fscrypt_destroy_prepared_key(&mk->mk_direct_keys[i]); + fscrypt_destroy_prepared_key(&mk->mk_iv_ino_lblk_64_keys[i]); + fscrypt_destroy_prepared_key(&mk->mk_iv_ino_lblk_32_keys[i]); + } ++ memzero_explicit(&mk->mk_ino_hash_key, ++ sizeof(mk->mk_ino_hash_key)); ++ mk->mk_ino_hash_key_initialized = false; + +- key_put(mk->mk_users); +- kfree_sensitive(mk); ++ /* Drop the structural ref associated with the active refs. */ ++ fscrypt_put_master_key(mk); + } + + static inline bool valid_key_spec(const struct fscrypt_key_specifier *spec) +@@ -61,44 +124,6 @@ static inline bool valid_key_spec(const struct fscrypt_key_specifier *spec) + return master_key_spec_len(spec) != 0; + } + +-static int fscrypt_key_instantiate(struct key *key, +- struct key_preparsed_payload *prep) +-{ +- key->payload.data[0] = (struct fscrypt_master_key *)prep->data; +- return 0; +-} +- +-static void fscrypt_key_destroy(struct key *key) +-{ +- free_master_key(key->payload.data[0]); +-} +- +-static void fscrypt_key_describe(const struct key *key, struct seq_file *m) +-{ +- seq_puts(m, key->description); +- +- if (key_is_positive(key)) { +- const struct fscrypt_master_key *mk = key->payload.data[0]; +- +- if (!is_master_key_secret_present(&mk->mk_secret)) +- seq_puts(m, ": secret removed"); +- } +-} +- +-/* +- * Type of key in ->s_master_keys. Each key of this type represents a master +- * key which has been added to the filesystem. Its payload is a +- * 'struct fscrypt_master_key'. The "." prefix in the key type name prevents +- * users from adding keys of this type via the keyrings syscalls rather than via +- * the intended method of FS_IOC_ADD_ENCRYPTION_KEY. +- */ +-static struct key_type key_type_fscrypt = { +- .name = "._fscrypt", +- .instantiate = fscrypt_key_instantiate, +- .destroy = fscrypt_key_destroy, +- .describe = fscrypt_key_describe, +-}; +- + static int fscrypt_user_key_instantiate(struct key *key, + struct key_preparsed_payload *prep) + { +@@ -131,32 +156,6 @@ static struct key_type key_type_fscrypt_user = { + .describe = fscrypt_user_key_describe, + }; + +-/* Search ->s_master_keys or ->mk_users */ +-static struct key *search_fscrypt_keyring(struct key *keyring, +- struct key_type *type, +- const char *description) +-{ +- /* +- * We need to mark the keyring reference as "possessed" so that we +- * acquire permission to search it, via the KEY_POS_SEARCH permission. +- */ +- key_ref_t keyref = make_key_ref(keyring, true /* possessed */); +- +- keyref = keyring_search(keyref, type, description, false); +- if (IS_ERR(keyref)) { +- if (PTR_ERR(keyref) == -EAGAIN || /* not found */ +- PTR_ERR(keyref) == -EKEYREVOKED) /* recently invalidated */ +- keyref = ERR_PTR(-ENOKEY); +- return ERR_CAST(keyref); +- } +- return key_ref_to_ptr(keyref); +-} +- +-#define FSCRYPT_FS_KEYRING_DESCRIPTION_SIZE \ +- (CONST_STRLEN("fscrypt-") + sizeof_field(struct super_block, s_id)) +- +-#define FSCRYPT_MK_DESCRIPTION_SIZE (2 * FSCRYPT_KEY_IDENTIFIER_SIZE + 1) +- + #define FSCRYPT_MK_USERS_DESCRIPTION_SIZE \ + (CONST_STRLEN("fscrypt-") + 2 * FSCRYPT_KEY_IDENTIFIER_SIZE + \ + CONST_STRLEN("-users") + 1) +@@ -164,21 +163,6 @@ static struct key *search_fscrypt_keyring(struct key *keyring, + #define FSCRYPT_MK_USER_DESCRIPTION_SIZE \ + (2 * FSCRYPT_KEY_IDENTIFIER_SIZE + CONST_STRLEN(".uid.") + 10 + 1) + +-static void format_fs_keyring_description( +- char description[FSCRYPT_FS_KEYRING_DESCRIPTION_SIZE], +- const struct super_block *sb) +-{ +- sprintf(description, "fscrypt-%s", sb->s_id); +-} +- +-static void format_mk_description( +- char description[FSCRYPT_MK_DESCRIPTION_SIZE], +- const struct fscrypt_key_specifier *mk_spec) +-{ +- sprintf(description, "%*phN", +- master_key_spec_len(mk_spec), (u8 *)&mk_spec->u); +-} +- + static void format_mk_users_keyring_description( + char description[FSCRYPT_MK_USERS_DESCRIPTION_SIZE], + const u8 mk_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]) +@@ -199,20 +183,15 @@ static void format_mk_user_description( + /* Create ->s_master_keys if needed. Synchronized by fscrypt_add_key_mutex. */ + static int allocate_filesystem_keyring(struct super_block *sb) + { +- char description[FSCRYPT_FS_KEYRING_DESCRIPTION_SIZE]; +- struct key *keyring; ++ struct fscrypt_keyring *keyring; + + if (sb->s_master_keys) + return 0; + +- format_fs_keyring_description(description, sb); +- keyring = keyring_alloc(description, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, +- current_cred(), KEY_POS_SEARCH | +- KEY_USR_SEARCH | KEY_USR_READ | KEY_USR_VIEW, +- KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL); +- if (IS_ERR(keyring)) +- return PTR_ERR(keyring); +- ++ keyring = kzalloc(sizeof(*keyring), GFP_KERNEL); ++ if (!keyring) ++ return -ENOMEM; ++ spin_lock_init(&keyring->lock); + /* + * Pairs with the smp_load_acquire() in fscrypt_find_master_key(). + * I.e., here we publish ->s_master_keys with a RELEASE barrier so that +@@ -222,21 +201,80 @@ static int allocate_filesystem_keyring(struct super_block *sb) + return 0; + } + +-void fscrypt_sb_free(struct super_block *sb) ++/* ++ * Release all encryption keys that have been added to the filesystem, along ++ * with the keyring that contains them. ++ * ++ * This is called at unmount time. The filesystem's underlying block device(s) ++ * are still available at this time; this is important because after user file ++ * accesses have been allowed, this function may need to evict keys from the ++ * keyslots of an inline crypto engine, which requires the block device(s). ++ * ++ * This is also called when the super_block is being freed. This is needed to ++ * avoid a memory leak if mounting fails after the "test_dummy_encryption" ++ * option was processed, as in that case the unmount-time call isn't made. ++ */ ++void fscrypt_destroy_keyring(struct super_block *sb) + { +- key_put(sb->s_master_keys); ++ struct fscrypt_keyring *keyring = sb->s_master_keys; ++ size_t i; ++ ++ if (!keyring) ++ return; ++ ++ for (i = 0; i < ARRAY_SIZE(keyring->key_hashtable); i++) { ++ struct hlist_head *bucket = &keyring->key_hashtable[i]; ++ struct fscrypt_master_key *mk; ++ struct hlist_node *tmp; ++ ++ hlist_for_each_entry_safe(mk, tmp, bucket, mk_node) { ++ /* ++ * Since all inodes were already evicted, every key ++ * remaining in the keyring should have an empty inode ++ * list, and should only still be in the keyring due to ++ * the single active ref associated with ->mk_secret. ++ * There should be no structural refs beyond the one ++ * associated with the active ref. ++ */ ++ WARN_ON(refcount_read(&mk->mk_active_refs) != 1); ++ WARN_ON(refcount_read(&mk->mk_struct_refs) != 1); ++ WARN_ON(!is_master_key_secret_present(&mk->mk_secret)); ++ wipe_master_key_secret(&mk->mk_secret); ++ fscrypt_put_master_key_activeref(mk); ++ } ++ } ++ kfree_sensitive(keyring); + sb->s_master_keys = NULL; + } + ++static struct hlist_head * ++fscrypt_mk_hash_bucket(struct fscrypt_keyring *keyring, ++ const struct fscrypt_key_specifier *mk_spec) ++{ ++ /* ++ * Since key specifiers should be "random" values, it is sufficient to ++ * use a trivial hash function that just takes the first several bits of ++ * the key specifier. ++ */ ++ unsigned long i = get_unaligned((unsigned long *)&mk_spec->u); ++ ++ return &keyring->key_hashtable[i % ARRAY_SIZE(keyring->key_hashtable)]; ++} ++ + /* +- * Find the specified master key in ->s_master_keys. +- * Returns ERR_PTR(-ENOKEY) if not found. ++ * Find the specified master key struct in ->s_master_keys and take a structural ++ * ref to it. The structural ref guarantees that the key struct continues to ++ * exist, but it does *not* guarantee that ->s_master_keys continues to contain ++ * the key struct. The structural ref needs to be dropped by ++ * fscrypt_put_master_key(). Returns NULL if the key struct is not found. + */ +-struct key *fscrypt_find_master_key(struct super_block *sb, +- const struct fscrypt_key_specifier *mk_spec) ++struct fscrypt_master_key * ++fscrypt_find_master_key(struct super_block *sb, ++ const struct fscrypt_key_specifier *mk_spec) + { +- struct key *keyring; +- char description[FSCRYPT_MK_DESCRIPTION_SIZE]; ++ struct fscrypt_keyring *keyring; ++ struct hlist_head *bucket; ++ struct fscrypt_master_key *mk; + + /* + * Pairs with the smp_store_release() in allocate_filesystem_keyring(). +@@ -246,10 +284,38 @@ struct key *fscrypt_find_master_key(struct super_block *sb, + */ + keyring = smp_load_acquire(&sb->s_master_keys); + if (keyring == NULL) +- return ERR_PTR(-ENOKEY); /* No keyring yet, so no keys yet. */ +- +- format_mk_description(description, mk_spec); +- return search_fscrypt_keyring(keyring, &key_type_fscrypt, description); ++ return NULL; /* No keyring yet, so no keys yet. */ ++ ++ bucket = fscrypt_mk_hash_bucket(keyring, mk_spec); ++ rcu_read_lock(); ++ switch (mk_spec->type) { ++ case FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR: ++ hlist_for_each_entry_rcu(mk, bucket, mk_node) { ++ if (mk->mk_spec.type == ++ FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR && ++ memcmp(mk->mk_spec.u.descriptor, ++ mk_spec->u.descriptor, ++ FSCRYPT_KEY_DESCRIPTOR_SIZE) == 0 && ++ refcount_inc_not_zero(&mk->mk_struct_refs)) ++ goto out; ++ } ++ break; ++ case FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER: ++ hlist_for_each_entry_rcu(mk, bucket, mk_node) { ++ if (mk->mk_spec.type == ++ FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER && ++ memcmp(mk->mk_spec.u.identifier, ++ mk_spec->u.identifier, ++ FSCRYPT_KEY_IDENTIFIER_SIZE) == 0 && ++ refcount_inc_not_zero(&mk->mk_struct_refs)) ++ goto out; ++ } ++ break; ++ } ++ mk = NULL; ++out: ++ rcu_read_unlock(); ++ return mk; + } + + static int allocate_master_key_users_keyring(struct fscrypt_master_key *mk) +@@ -277,17 +343,30 @@ static int allocate_master_key_users_keyring(struct fscrypt_master_key *mk) + static struct key *find_master_key_user(struct fscrypt_master_key *mk) + { + char description[FSCRYPT_MK_USER_DESCRIPTION_SIZE]; ++ key_ref_t keyref; + + format_mk_user_description(description, mk->mk_spec.u.identifier); +- return search_fscrypt_keyring(mk->mk_users, &key_type_fscrypt_user, +- description); ++ ++ /* ++ * We need to mark the keyring reference as "possessed" so that we ++ * acquire permission to search it, via the KEY_POS_SEARCH permission. ++ */ ++ keyref = keyring_search(make_key_ref(mk->mk_users, true /*possessed*/), ++ &key_type_fscrypt_user, description, false); ++ if (IS_ERR(keyref)) { ++ if (PTR_ERR(keyref) == -EAGAIN || /* not found */ ++ PTR_ERR(keyref) == -EKEYREVOKED) /* recently invalidated */ ++ keyref = ERR_PTR(-ENOKEY); ++ return ERR_CAST(keyref); ++ } ++ return key_ref_to_ptr(keyref); + } + + /* + * Give the current user a "key" in ->mk_users. This charges the user's quota + * and marks the master key as added by the current user, so that it cannot be +- * removed by another user with the key. Either the master key's key->sem must +- * be held for write, or the master key must be still undergoing initialization. ++ * removed by another user with the key. Either ->mk_sem must be held for ++ * write, or the master key must be still undergoing initialization. + */ + static int add_master_key_user(struct fscrypt_master_key *mk) + { +@@ -309,7 +388,7 @@ static int add_master_key_user(struct fscrypt_master_key *mk) + + /* + * Remove the current user's "key" from ->mk_users. +- * The master key's key->sem must be held for write. ++ * ->mk_sem must be held for write. + * + * Returns 0 if removed, -ENOKEY if not found, or another -errno code. + */ +@@ -327,63 +406,49 @@ static int remove_master_key_user(struct fscrypt_master_key *mk) + } + + /* +- * Allocate a new fscrypt_master_key which contains the given secret, set it as +- * the payload of a new 'struct key' of type fscrypt, and link the 'struct key' +- * into the given keyring. Synchronized by fscrypt_add_key_mutex. ++ * Allocate a new fscrypt_master_key, transfer the given secret over to it, and ++ * insert it into sb->s_master_keys. + */ +-static int add_new_master_key(struct fscrypt_master_key_secret *secret, +- const struct fscrypt_key_specifier *mk_spec, +- struct key *keyring) ++static int add_new_master_key(struct super_block *sb, ++ struct fscrypt_master_key_secret *secret, ++ const struct fscrypt_key_specifier *mk_spec) + { ++ struct fscrypt_keyring *keyring = sb->s_master_keys; + struct fscrypt_master_key *mk; +- char description[FSCRYPT_MK_DESCRIPTION_SIZE]; +- struct key *key; + int err; + + mk = kzalloc(sizeof(*mk), GFP_KERNEL); + if (!mk) + return -ENOMEM; + ++ mk->mk_sb = sb; ++ init_rwsem(&mk->mk_sem); ++ refcount_set(&mk->mk_struct_refs, 1); + mk->mk_spec = *mk_spec; + +- move_master_key_secret(&mk->mk_secret, secret); +- +- refcount_set(&mk->mk_refcount, 1); /* secret is present */ + INIT_LIST_HEAD(&mk->mk_decrypted_inodes); + spin_lock_init(&mk->mk_decrypted_inodes_lock); + + if (mk_spec->type == FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER) { + err = allocate_master_key_users_keyring(mk); + if (err) +- goto out_free_mk; ++ goto out_put; + err = add_master_key_user(mk); + if (err) +- goto out_free_mk; ++ goto out_put; + } + +- /* +- * Note that we don't charge this key to anyone's quota, since when +- * ->mk_users is in use those keys are charged instead, and otherwise +- * (when ->mk_users isn't in use) only root can add these keys. +- */ +- format_mk_description(description, mk_spec); +- key = key_alloc(&key_type_fscrypt, description, +- GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(), +- KEY_POS_SEARCH | KEY_USR_SEARCH | KEY_USR_VIEW, +- KEY_ALLOC_NOT_IN_QUOTA, NULL); +- if (IS_ERR(key)) { +- err = PTR_ERR(key); +- goto out_free_mk; +- } +- err = key_instantiate_and_link(key, mk, sizeof(*mk), keyring, NULL); +- key_put(key); +- if (err) +- goto out_free_mk; ++ move_master_key_secret(&mk->mk_secret, secret); ++ refcount_set(&mk->mk_active_refs, 1); /* ->mk_secret is present */ + ++ spin_lock(&keyring->lock); ++ hlist_add_head_rcu(&mk->mk_node, ++ fscrypt_mk_hash_bucket(keyring, mk_spec)); ++ spin_unlock(&keyring->lock); + return 0; + +-out_free_mk: +- free_master_key(mk); ++out_put: ++ fscrypt_put_master_key(mk); + return err; + } + +@@ -392,42 +457,34 @@ out_free_mk: + static int add_existing_master_key(struct fscrypt_master_key *mk, + struct fscrypt_master_key_secret *secret) + { +- struct key *mk_user; +- bool rekey; + int err; + + /* + * If the current user is already in ->mk_users, then there's nothing to +- * do. (Not applicable for v1 policy keys, which have NULL ->mk_users.) ++ * do. Otherwise, we need to add the user to ->mk_users. (Neither is ++ * applicable for v1 policy keys, which have NULL ->mk_users.) + */ + if (mk->mk_users) { +- mk_user = find_master_key_user(mk); ++ struct key *mk_user = find_master_key_user(mk); ++ + if (mk_user != ERR_PTR(-ENOKEY)) { + if (IS_ERR(mk_user)) + return PTR_ERR(mk_user); + key_put(mk_user); + return 0; + } +- } +- +- /* If we'll be re-adding ->mk_secret, try to take the reference. */ +- rekey = !is_master_key_secret_present(&mk->mk_secret); +- if (rekey && !refcount_inc_not_zero(&mk->mk_refcount)) +- return KEY_DEAD; +- +- /* Add the current user to ->mk_users, if applicable. */ +- if (mk->mk_users) { + err = add_master_key_user(mk); +- if (err) { +- if (rekey && refcount_dec_and_test(&mk->mk_refcount)) +- return KEY_DEAD; ++ if (err) + return err; +- } + } + + /* Re-add the secret if needed. */ +- if (rekey) ++ if (!is_master_key_secret_present(&mk->mk_secret)) { ++ if (!refcount_inc_not_zero(&mk->mk_active_refs)) ++ return KEY_DEAD; + move_master_key_secret(&mk->mk_secret, secret); ++ } ++ + return 0; + } + +@@ -436,38 +493,36 @@ static int do_add_master_key(struct super_block *sb, + const struct fscrypt_key_specifier *mk_spec) + { + static DEFINE_MUTEX(fscrypt_add_key_mutex); +- struct key *key; ++ struct fscrypt_master_key *mk; + int err; + + mutex_lock(&fscrypt_add_key_mutex); /* serialize find + link */ +-retry: +- key = fscrypt_find_master_key(sb, mk_spec); +- if (IS_ERR(key)) { +- err = PTR_ERR(key); +- if (err != -ENOKEY) +- goto out_unlock; ++ ++ mk = fscrypt_find_master_key(sb, mk_spec); ++ if (!mk) { + /* Didn't find the key in ->s_master_keys. Add it. */ + err = allocate_filesystem_keyring(sb); +- if (err) +- goto out_unlock; +- err = add_new_master_key(secret, mk_spec, sb->s_master_keys); ++ if (!err) ++ err = add_new_master_key(sb, secret, mk_spec); + } else { + /* + * Found the key in ->s_master_keys. Re-add the secret if + * needed, and add the user to ->mk_users if needed. + */ +- down_write(&key->sem); +- err = add_existing_master_key(key->payload.data[0], secret); +- up_write(&key->sem); ++ down_write(&mk->mk_sem); ++ err = add_existing_master_key(mk, secret); ++ up_write(&mk->mk_sem); + if (err == KEY_DEAD) { +- /* Key being removed or needs to be removed */ +- key_invalidate(key); +- key_put(key); +- goto retry; ++ /* ++ * We found a key struct, but it's already been fully ++ * removed. Ignore the old struct and add a new one. ++ * fscrypt_add_key_mutex means we don't need to worry ++ * about concurrent adds. ++ */ ++ err = add_new_master_key(sb, secret, mk_spec); + } +- key_put(key); ++ fscrypt_put_master_key(mk); + } +-out_unlock: + mutex_unlock(&fscrypt_add_key_mutex); + return err; + } +@@ -731,19 +786,19 @@ int fscrypt_verify_key_added(struct super_block *sb, + const u8 identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]) + { + struct fscrypt_key_specifier mk_spec; +- struct key *key, *mk_user; + struct fscrypt_master_key *mk; ++ struct key *mk_user; + int err; + + mk_spec.type = FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER; + memcpy(mk_spec.u.identifier, identifier, FSCRYPT_KEY_IDENTIFIER_SIZE); + +- key = fscrypt_find_master_key(sb, &mk_spec); +- if (IS_ERR(key)) { +- err = PTR_ERR(key); ++ mk = fscrypt_find_master_key(sb, &mk_spec); ++ if (!mk) { ++ err = -ENOKEY; + goto out; + } +- mk = key->payload.data[0]; ++ down_read(&mk->mk_sem); + mk_user = find_master_key_user(mk); + if (IS_ERR(mk_user)) { + err = PTR_ERR(mk_user); +@@ -751,7 +806,8 @@ int fscrypt_verify_key_added(struct super_block *sb, + key_put(mk_user); + err = 0; + } +- key_put(key); ++ up_read(&mk->mk_sem); ++ fscrypt_put_master_key(mk); + out: + if (err == -ENOKEY && capable(CAP_FOWNER)) + err = 0; +@@ -913,11 +969,10 @@ static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users) + struct super_block *sb = file_inode(filp)->i_sb; + struct fscrypt_remove_key_arg __user *uarg = _uarg; + struct fscrypt_remove_key_arg arg; +- struct key *key; + struct fscrypt_master_key *mk; + u32 status_flags = 0; + int err; +- bool dead; ++ bool inodes_remain; + + if (copy_from_user(&arg, uarg, sizeof(arg))) + return -EFAULT; +@@ -937,12 +992,10 @@ static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users) + return -EACCES; + + /* Find the key being removed. */ +- key = fscrypt_find_master_key(sb, &arg.key_spec); +- if (IS_ERR(key)) +- return PTR_ERR(key); +- mk = key->payload.data[0]; +- +- down_write(&key->sem); ++ mk = fscrypt_find_master_key(sb, &arg.key_spec); ++ if (!mk) ++ return -ENOKEY; ++ down_write(&mk->mk_sem); + + /* If relevant, remove current user's (or all users) claim to the key */ + if (mk->mk_users && mk->mk_users->keys.nr_leaves_on_tree != 0) { +@@ -951,7 +1004,7 @@ static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users) + else + err = remove_master_key_user(mk); + if (err) { +- up_write(&key->sem); ++ up_write(&mk->mk_sem); + goto out_put_key; + } + if (mk->mk_users->keys.nr_leaves_on_tree != 0) { +@@ -963,26 +1016,22 @@ static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users) + status_flags |= + FSCRYPT_KEY_REMOVAL_STATUS_FLAG_OTHER_USERS; + err = 0; +- up_write(&key->sem); ++ up_write(&mk->mk_sem); + goto out_put_key; + } + } + + /* No user claims remaining. Go ahead and wipe the secret. */ +- dead = false; ++ err = -ENOKEY; + if (is_master_key_secret_present(&mk->mk_secret)) { + wipe_master_key_secret(&mk->mk_secret); +- dead = refcount_dec_and_test(&mk->mk_refcount); +- } +- up_write(&key->sem); +- if (dead) { +- /* +- * No inodes reference the key, and we wiped the secret, so the +- * key object is free to be removed from the keyring. +- */ +- key_invalidate(key); ++ fscrypt_put_master_key_activeref(mk); + err = 0; +- } else { ++ } ++ inodes_remain = refcount_read(&mk->mk_active_refs) > 0; ++ up_write(&mk->mk_sem); ++ ++ if (inodes_remain) { + /* Some inodes still reference this key; try to evict them. */ + err = try_to_lock_encrypted_files(sb, mk); + if (err == -EBUSY) { +@@ -998,7 +1047,7 @@ static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users) + * has been fully removed including all files locked. + */ + out_put_key: +- key_put(key); ++ fscrypt_put_master_key(mk); + if (err == 0) + err = put_user(status_flags, &uarg->removal_status_flags); + return err; +@@ -1045,7 +1094,6 @@ int fscrypt_ioctl_get_key_status(struct file *filp, void __user *uarg) + { + struct super_block *sb = file_inode(filp)->i_sb; + struct fscrypt_get_key_status_arg arg; +- struct key *key; + struct fscrypt_master_key *mk; + int err; + +@@ -1062,19 +1110,18 @@ int fscrypt_ioctl_get_key_status(struct file *filp, void __user *uarg) + arg.user_count = 0; + memset(arg.__out_reserved, 0, sizeof(arg.__out_reserved)); + +- key = fscrypt_find_master_key(sb, &arg.key_spec); +- if (IS_ERR(key)) { +- if (key != ERR_PTR(-ENOKEY)) +- return PTR_ERR(key); ++ mk = fscrypt_find_master_key(sb, &arg.key_spec); ++ if (!mk) { + arg.status = FSCRYPT_KEY_STATUS_ABSENT; + err = 0; + goto out; + } +- mk = key->payload.data[0]; +- down_read(&key->sem); ++ down_read(&mk->mk_sem); + + if (!is_master_key_secret_present(&mk->mk_secret)) { +- arg.status = FSCRYPT_KEY_STATUS_INCOMPLETELY_REMOVED; ++ arg.status = refcount_read(&mk->mk_active_refs) > 0 ? ++ FSCRYPT_KEY_STATUS_INCOMPLETELY_REMOVED : ++ FSCRYPT_KEY_STATUS_ABSENT /* raced with full removal */; + err = 0; + goto out_release_key; + } +@@ -1096,8 +1143,8 @@ int fscrypt_ioctl_get_key_status(struct file *filp, void __user *uarg) + } + err = 0; + out_release_key: +- up_read(&key->sem); +- key_put(key); ++ up_read(&mk->mk_sem); ++ fscrypt_put_master_key(mk); + out: + if (!err && copy_to_user(uarg, &arg, sizeof(arg))) + err = -EFAULT; +@@ -1109,13 +1156,9 @@ int __init fscrypt_init_keyring(void) + { + int err; + +- err = register_key_type(&key_type_fscrypt); +- if (err) +- return err; +- + err = register_key_type(&key_type_fscrypt_user); + if (err) +- goto err_unregister_fscrypt; ++ return err; + + err = register_key_type(&key_type_fscrypt_provisioning); + if (err) +@@ -1125,7 +1168,5 @@ int __init fscrypt_init_keyring(void) + + err_unregister_fscrypt_user: + unregister_key_type(&key_type_fscrypt_user); +-err_unregister_fscrypt: +- unregister_key_type(&key_type_fscrypt); + return err; + } diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c -index bca9c6658a7c5..89cd533a88bff 100644 +index bca9c6658a7c5..c3fbd594cc79e 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c -@@ -19,6 +19,7 @@ struct fscrypt_mode fscrypt_modes[] = { +@@ -9,7 +9,6 @@ + */ + + #include <crypto/skcipher.h> +-#include <linux/key.h> + #include <linux/random.h> + + #include "fscrypt_private.h" +@@ -19,6 +18,7 @@ struct fscrypt_mode fscrypt_modes[] = { .friendly_name = "AES-256-XTS", .cipher_str = "xts(aes)", .keysize = 64, @@ -311264,7 +374495,7 @@ index bca9c6658a7c5..89cd533a88bff 100644 .ivsize = 16, .blk_crypto_mode = BLK_ENCRYPTION_MODE_AES_256_XTS, }, -@@ -26,12 +27,14 @@ struct fscrypt_mode fscrypt_modes[] = { +@@ -26,12 +26,14 @@ struct fscrypt_mode fscrypt_modes[] = { .friendly_name = "AES-256-CTS-CBC", .cipher_str = "cts(cbc(aes))", .keysize = 32, @@ -311279,7 +374510,7 @@ index bca9c6658a7c5..89cd533a88bff 100644 .ivsize = 16, .blk_crypto_mode = BLK_ENCRYPTION_MODE_AES_128_CBC_ESSIV, }, -@@ -39,12 +42,14 @@ struct fscrypt_mode fscrypt_modes[] = { +@@ -39,12 +41,14 @@ struct fscrypt_mode fscrypt_modes[] = { .friendly_name = "AES-128-CTS-CBC", .cipher_str = "cts(cbc(aes))", .keysize = 16, @@ -311294,7 +374525,15 @@ index bca9c6658a7c5..89cd533a88bff 100644 .ivsize = 32, .blk_crypto_mode = BLK_ENCRYPTION_MODE_ADIANTUM, }, -@@ -357,6 +362,45 @@ static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci, +@@ -146,6 +150,7 @@ void fscrypt_destroy_prepared_key(struct fscrypt_prepared_key *prep_key) + { + crypto_free_skcipher(prep_key->tfm); + fscrypt_destroy_inline_crypt_key(prep_key); ++ memzero_explicit(prep_key, sizeof(*prep_key)); + } + + /* Given a per-file encryption key, set up the file's crypto transform object */ +@@ -357,23 +362,60 @@ static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci, return 0; } @@ -311340,7 +374579,58 @@ index bca9c6658a7c5..89cd533a88bff 100644 /* * Find the master key, then set up the inode's actual encryption key. * -@@ -422,18 +466,7 @@ static int setup_file_encryption_key(struct fscrypt_info *ci, +- * If the master key is found in the filesystem-level keyring, then the +- * corresponding 'struct key' is returned in *master_key_ret with its semaphore +- * read-locked. This is needed to ensure that only one task links the +- * fscrypt_info into ->mk_decrypted_inodes (as multiple tasks may race to create +- * an fscrypt_info for the same inode), and to synchronize the master key being +- * removed with a new inode starting to use it. ++ * If the master key is found in the filesystem-level keyring, then it is ++ * returned in *mk_ret with its semaphore read-locked. This is needed to ensure ++ * that only one task links the fscrypt_info into ->mk_decrypted_inodes (as ++ * multiple tasks may race to create an fscrypt_info for the same inode), and to ++ * synchronize the master key being removed with a new inode starting to use it. + */ + static int setup_file_encryption_key(struct fscrypt_info *ci, + bool need_dirhash_key, +- struct key **master_key_ret) ++ struct fscrypt_master_key **mk_ret) + { +- struct key *key; +- struct fscrypt_master_key *mk = NULL; + struct fscrypt_key_specifier mk_spec; ++ struct fscrypt_master_key *mk; + int err; + + err = fscrypt_select_encryption_impl(ci); +@@ -398,11 +440,10 @@ static int setup_file_encryption_key(struct fscrypt_info *ci, + return -EINVAL; + } + +- key = fscrypt_find_master_key(ci->ci_inode->i_sb, &mk_spec); +- if (IS_ERR(key)) { +- if (key != ERR_PTR(-ENOKEY) || +- ci->ci_policy.version != FSCRYPT_POLICY_V1) +- return PTR_ERR(key); ++ mk = fscrypt_find_master_key(ci->ci_inode->i_sb, &mk_spec); ++ if (!mk) { ++ if (ci->ci_policy.version != FSCRYPT_POLICY_V1) ++ return -ENOKEY; + + /* + * As a legacy fallback for v1 policies, search for the key in +@@ -412,9 +453,7 @@ static int setup_file_encryption_key(struct fscrypt_info *ci, + */ + return fscrypt_setup_v1_file_key_via_subscribed_keyrings(ci); + } +- +- mk = key->payload.data[0]; +- down_read(&key->sem); ++ down_read(&mk->mk_sem); + + /* Has the secret been removed (via FS_IOC_REMOVE_ENCRYPTION_KEY)? */ + if (!is_master_key_secret_present(&mk->mk_secret)) { +@@ -422,18 +461,7 @@ static int setup_file_encryption_key(struct fscrypt_info *ci, goto out_release_key; } @@ -311360,6 +374650,151 @@ index bca9c6658a7c5..89cd533a88bff 100644 err = -ENOKEY; goto out_release_key; } +@@ -453,18 +481,18 @@ static int setup_file_encryption_key(struct fscrypt_info *ci, + if (err) + goto out_release_key; + +- *master_key_ret = key; ++ *mk_ret = mk; + return 0; + + out_release_key: +- up_read(&key->sem); +- key_put(key); ++ up_read(&mk->mk_sem); ++ fscrypt_put_master_key(mk); + return err; + } + + static void put_crypt_info(struct fscrypt_info *ci) + { +- struct key *key; ++ struct fscrypt_master_key *mk; + + if (!ci) + return; +@@ -474,24 +502,18 @@ static void put_crypt_info(struct fscrypt_info *ci) + else if (ci->ci_owns_key) + fscrypt_destroy_prepared_key(&ci->ci_enc_key); + +- key = ci->ci_master_key; +- if (key) { +- struct fscrypt_master_key *mk = key->payload.data[0]; +- ++ mk = ci->ci_master_key; ++ if (mk) { + /* + * Remove this inode from the list of inodes that were unlocked +- * with the master key. +- * +- * In addition, if we're removing the last inode from a key that +- * already had its secret removed, invalidate the key so that it +- * gets removed from ->s_master_keys. ++ * with the master key. In addition, if we're removing the last ++ * inode from a master key struct that already had its secret ++ * removed, then complete the full removal of the struct. + */ + spin_lock(&mk->mk_decrypted_inodes_lock); + list_del(&ci->ci_master_key_link); + spin_unlock(&mk->mk_decrypted_inodes_lock); +- if (refcount_dec_and_test(&mk->mk_refcount)) +- key_invalidate(key); +- key_put(key); ++ fscrypt_put_master_key_activeref(mk); + } + memzero_explicit(ci, sizeof(*ci)); + kmem_cache_free(fscrypt_info_cachep, ci); +@@ -505,7 +527,7 @@ fscrypt_setup_encryption_info(struct inode *inode, + { + struct fscrypt_info *crypt_info; + struct fscrypt_mode *mode; +- struct key *master_key = NULL; ++ struct fscrypt_master_key *mk = NULL; + int res; + + res = fscrypt_initialize(inode->i_sb->s_cop->flags); +@@ -528,8 +550,7 @@ fscrypt_setup_encryption_info(struct inode *inode, + WARN_ON(mode->ivsize > FSCRYPT_MAX_IV_SIZE); + crypt_info->ci_mode = mode; + +- res = setup_file_encryption_key(crypt_info, need_dirhash_key, +- &master_key); ++ res = setup_file_encryption_key(crypt_info, need_dirhash_key, &mk); + if (res) + goto out; + +@@ -544,12 +565,9 @@ fscrypt_setup_encryption_info(struct inode *inode, + * We won the race and set ->i_crypt_info to our crypt_info. + * Now link it into the master key's inode list. + */ +- if (master_key) { +- struct fscrypt_master_key *mk = +- master_key->payload.data[0]; +- +- refcount_inc(&mk->mk_refcount); +- crypt_info->ci_master_key = key_get(master_key); ++ if (mk) { ++ crypt_info->ci_master_key = mk; ++ refcount_inc(&mk->mk_active_refs); + spin_lock(&mk->mk_decrypted_inodes_lock); + list_add(&crypt_info->ci_master_key_link, + &mk->mk_decrypted_inodes); +@@ -559,9 +577,9 @@ fscrypt_setup_encryption_info(struct inode *inode, + } + res = 0; + out: +- if (master_key) { +- up_read(&master_key->sem); +- key_put(master_key); ++ if (mk) { ++ up_read(&mk->mk_sem); ++ fscrypt_put_master_key(mk); + } + put_crypt_info(crypt_info); + return res; +@@ -726,7 +744,6 @@ EXPORT_SYMBOL(fscrypt_free_inode); + int fscrypt_drop_inode(struct inode *inode) + { + const struct fscrypt_info *ci = fscrypt_get_info(inode); +- const struct fscrypt_master_key *mk; + + /* + * If ci is NULL, then the inode doesn't have an encryption key set up +@@ -736,7 +753,6 @@ int fscrypt_drop_inode(struct inode *inode) + */ + if (!ci || !ci->ci_master_key) + return 0; +- mk = ci->ci_master_key->payload.data[0]; + + /* + * With proper, non-racy use of FS_IOC_REMOVE_ENCRYPTION_KEY, all inodes +@@ -755,6 +771,6 @@ int fscrypt_drop_inode(struct inode *inode) + * then the thread removing the key will either evict the inode itself + * or will correctly detect that it wasn't evicted due to the race. + */ +- return !is_master_key_secret_present(&mk->mk_secret); ++ return !is_master_key_secret_present(&ci->ci_master_key->mk_secret); + } + EXPORT_SYMBOL_GPL(fscrypt_drop_inode); +diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c +index ed3d623724cdd..cad34dbe8e298 100644 +--- a/fs/crypto/policy.c ++++ b/fs/crypto/policy.c +@@ -692,12 +692,8 @@ int fscrypt_set_context(struct inode *inode, void *fs_data) + * delayed key setup that requires the inode number. + */ + if (ci->ci_policy.version == FSCRYPT_POLICY_V2 && +- (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32)) { +- const struct fscrypt_master_key *mk = +- ci->ci_master_key->payload.data[0]; +- +- fscrypt_hash_inode_number(ci, mk); +- } ++ (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32)) ++ fscrypt_hash_inode_number(ci, ci->ci_master_key); + + return inode->i_sb->s_cop->set_context(inode, &ctx, ctxsize, fs_data); + } diff --git a/fs/dax.c b/fs/dax.c index 4e3e5a283a916..4ab1c493c73f1 100644 --- a/fs/dax.c @@ -311385,7 +374820,7 @@ index 4e3e5a283a916..4ab1c493c73f1 100644 lockdep_assert_held_write(&iomi.inode->i_rwsem); iomi.flags |= IOMAP_WRITE; diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c -index 7d162b0efbf03..950c63fa4d0b2 100644 +index 7d162b0efbf03..38930d9b0bb73 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -147,7 +147,7 @@ static int debugfs_locked_down(struct inode *inode, @@ -311397,6 +374832,62 @@ index 7d162b0efbf03..950c63fa4d0b2 100644 !(filp->f_mode & FMODE_WRITE) && !real_fops->unlocked_ioctl && !real_fops->compat_ioctl && +@@ -378,8 +378,8 @@ ssize_t debugfs_attr_read(struct file *file, char __user *buf, + } + EXPORT_SYMBOL_GPL(debugfs_attr_read); + +-ssize_t debugfs_attr_write(struct file *file, const char __user *buf, +- size_t len, loff_t *ppos) ++static ssize_t debugfs_attr_write_xsigned(struct file *file, const char __user *buf, ++ size_t len, loff_t *ppos, bool is_signed) + { + struct dentry *dentry = F_DENTRY(file); + ssize_t ret; +@@ -387,12 +387,28 @@ ssize_t debugfs_attr_write(struct file *file, const char __user *buf, + ret = debugfs_file_get(dentry); + if (unlikely(ret)) + return ret; +- ret = simple_attr_write(file, buf, len, ppos); ++ if (is_signed) ++ ret = simple_attr_write_signed(file, buf, len, ppos); ++ else ++ ret = simple_attr_write(file, buf, len, ppos); + debugfs_file_put(dentry); + return ret; + } ++ ++ssize_t debugfs_attr_write(struct file *file, const char __user *buf, ++ size_t len, loff_t *ppos) ++{ ++ return debugfs_attr_write_xsigned(file, buf, len, ppos, false); ++} + EXPORT_SYMBOL_GPL(debugfs_attr_write); + ++ssize_t debugfs_attr_write_signed(struct file *file, const char __user *buf, ++ size_t len, loff_t *ppos) ++{ ++ return debugfs_attr_write_xsigned(file, buf, len, ppos, true); ++} ++EXPORT_SYMBOL_GPL(debugfs_attr_write_signed); ++ + static struct dentry *debugfs_create_mode_unsafe(const char *name, umode_t mode, + struct dentry *parent, void *value, + const struct file_operations *fops, +@@ -738,11 +754,11 @@ static int debugfs_atomic_t_get(void *data, u64 *val) + *val = atomic_read((atomic_t *)data); + return 0; + } +-DEFINE_DEBUGFS_ATTRIBUTE(fops_atomic_t, debugfs_atomic_t_get, ++DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(fops_atomic_t, debugfs_atomic_t_get, + debugfs_atomic_t_set, "%lld\n"); +-DEFINE_DEBUGFS_ATTRIBUTE(fops_atomic_t_ro, debugfs_atomic_t_get, NULL, ++DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(fops_atomic_t_ro, debugfs_atomic_t_get, NULL, + "%lld\n"); +-DEFINE_DEBUGFS_ATTRIBUTE(fops_atomic_t_wo, NULL, debugfs_atomic_t_set, ++DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(fops_atomic_t_wo, NULL, debugfs_atomic_t_set, + "%lld\n"); + + /** diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 2f117c57160dc..26f9cd3282918 100644 --- a/fs/debugfs/inode.c @@ -311595,7 +375086,7 @@ index c502c065d0075..862cb7a353c1c 100644 if (oc || ou) { /* do an unlock or cancel instead of resending */ diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c -index 8f715c620e1f8..9d7078a1dc8b0 100644 +index 8f715c620e1f8..d56a8f88a3852 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -592,8 +592,8 @@ int dlm_lowcomms_nodes_set_mark(int nodeid, unsigned int mark) @@ -311681,7 +375172,20 @@ index 8f715c620e1f8..9d7078a1dc8b0 100644 } void dlm_lowcomms_put_msg(struct dlm_msg *msg) -@@ -1776,7 +1780,7 @@ static int dlm_listen_for_all(void) +@@ -1516,7 +1520,11 @@ static void process_recv_sockets(struct work_struct *work) + + static void process_listen_recv_socket(struct work_struct *work) + { +- accept_from_sock(&listen_con); ++ int ret; ++ ++ do { ++ ret = accept_from_sock(&listen_con); ++ } while (!ret); + } + + static void dlm_connect(struct connection *con) +@@ -1776,7 +1784,7 @@ static int dlm_listen_for_all(void) SOCK_STREAM, dlm_proto_ops->proto, &sock); if (result < 0) { log_print("Can't create comms socket, check SCTP is loaded"); @@ -311690,6 +375194,23 @@ index 8f715c620e1f8..9d7078a1dc8b0 100644 } sock_set_mark(sock->sk, dlm_config.ci_mark); +@@ -1793,7 +1801,7 @@ static int dlm_listen_for_all(void) + result = sock->ops->listen(sock, 5); + if (result < 0) { + dlm_close_sock(&listen_con.sock); +- goto out; ++ return result; + } + + return 0; +@@ -1996,7 +2004,6 @@ fail_listen: + dlm_proto_ops = NULL; + fail_proto_ops: + dlm_allow_conn = 0; +- dlm_close_sock(&listen_con.sock); + work_stop(); + fail_local: + deinit_local(); diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c index c38b2b8ffd1d3..a10d2bcfe75a8 100644 --- a/fs/dlm/plock.c @@ -311813,6 +375334,19 @@ index a5bc4b1b7813e..8193c14bb1115 100644 print_hex_dump(KERN_DEBUG, "[ in]: ", DUMP_PREFIX_OFFSET, 16, 1, src + inputmargin, rq->inputsize, true); print_hex_dump(KERN_DEBUG, "[out]: ", DUMP_PREFIX_OFFSET, +diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c +index a552399e211d8..0c293ff6697b5 100644 +--- a/fs/erofs/inode.c ++++ b/fs/erofs/inode.c +@@ -222,7 +222,7 @@ static int erofs_fill_symlink(struct inode *inode, void *data, + + /* if it cannot be handled with fast symlink scheme */ + if (vi->datalayout != EROFS_INODE_FLAT_INLINE || +- inode->i_size >= PAGE_SIZE) { ++ inode->i_size >= PAGE_SIZE || inode->i_size < 0) { + inode->i_op = &erofs_symlink_iops; + return 0; + } diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 9524e155b38fa..b77acf09726c6 100644 --- a/fs/erofs/internal.h @@ -311940,6 +375474,30 @@ index 11c7a1aaebade..eb51df4a9f770 100644 if (!err) goto retry; } +diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c +index 7a6df35fdc915..73b86b5c1a75b 100644 +--- a/fs/erofs/zmap.c ++++ b/fs/erofs/zmap.c +@@ -700,12 +700,16 @@ static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset, + iomap->type = IOMAP_HOLE; + iomap->addr = IOMAP_NULL_ADDR; + /* +- * No strict rule how to describe extents for post EOF, yet +- * we need do like below. Otherwise, iomap itself will get ++ * No strict rule on how to describe extents for post EOF, yet ++ * we need to do like below. Otherwise, iomap itself will get + * into an endless loop on post EOF. ++ * ++ * Calculate the effective offset by subtracting extent start ++ * (map.m_la) from the requested offset, and add it to length. ++ * (NB: offset >= map.m_la always) + */ + if (iomap->offset >= inode->i_size) +- iomap->length = length + map.m_la - offset; ++ iomap->length = length + offset - map.m_la; + } + iomap->flags = 0; + return 0; diff --git a/fs/erofs/zpvec.h b/fs/erofs/zpvec.h index dfd7fe0503bb1..b05464f4a8083 100644 --- a/fs/erofs/zpvec.h @@ -311967,10 +375525,33 @@ index dfd7fe0503bb1..b05464f4a8083 100644 if (ctor->index >= ctor->nr) z_erofs_pagevec_ctor_pagedown(ctor, false); diff --git a/fs/eventfd.c b/fs/eventfd.c -index 3627dd7d25db8..c0ffee99ad238 100644 +index 3627dd7d25db8..249ca6c0b7843 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c -@@ -69,17 +69,17 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n) +@@ -43,21 +43,7 @@ struct eventfd_ctx { + int id; + }; + +-/** +- * eventfd_signal - Adds @n to the eventfd counter. +- * @ctx: [in] Pointer to the eventfd context. +- * @n: [in] Value of the counter to be added to the eventfd internal counter. +- * The value cannot be negative. +- * +- * This function is supposed to be called by the kernel in paths that do not +- * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX +- * value, and we signal this as overflow condition by returning a EPOLLERR +- * to poll(2). +- * +- * Returns the amount by which the counter was incremented. This will be less +- * than @n if the counter has overflowed. +- */ +-__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n) ++__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, unsigned mask) + { + unsigned long flags; + +@@ -69,21 +55,40 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n) * it returns false, the eventfd_signal() call should be deferred to a * safe context. */ @@ -311985,13 +375566,37 @@ index 3627dd7d25db8..c0ffee99ad238 100644 n = ULLONG_MAX - ctx->count; ctx->count += n; if (waitqueue_active(&ctx->wqh)) - wake_up_locked_poll(&ctx->wqh, EPOLLIN); +- wake_up_locked_poll(&ctx->wqh, EPOLLIN); - current->in_eventfd_signal = 0; ++ wake_up_locked_poll(&ctx->wqh, EPOLLIN | mask); + current->in_eventfd = 0; spin_unlock_irqrestore(&ctx->wqh.lock, flags); return n; -@@ -253,8 +253,10 @@ static ssize_t eventfd_read(struct kiocb *iocb, struct iov_iter *to) + } ++ ++/** ++ * eventfd_signal - Adds @n to the eventfd counter. ++ * @ctx: [in] Pointer to the eventfd context. ++ * @n: [in] Value of the counter to be added to the eventfd internal counter. ++ * The value cannot be negative. ++ * ++ * This function is supposed to be called by the kernel in paths that do not ++ * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX ++ * value, and we signal this as overflow condition by returning a EPOLLERR ++ * to poll(2). ++ * ++ * Returns the amount by which the counter was incremented. This will be less ++ * than @n if the counter has overflowed. ++ */ ++__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n) ++{ ++ return eventfd_signal_mask(ctx, n, 0); ++} + EXPORT_SYMBOL_GPL(eventfd_signal); + + static void eventfd_free_ctx(struct eventfd_ctx *ctx) +@@ -253,8 +258,10 @@ static ssize_t eventfd_read(struct kiocb *iocb, struct iov_iter *to) __set_current_state(TASK_RUNNING); } eventfd_ctx_do_read(ctx, &ucnt); @@ -312002,7 +375607,7 @@ index 3627dd7d25db8..c0ffee99ad238 100644 spin_unlock_irq(&ctx->wqh.lock); if (unlikely(copy_to_iter(&ucnt, sizeof(ucnt), to) != sizeof(ucnt))) return -EFAULT; -@@ -301,8 +303,10 @@ static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t c +@@ -301,8 +308,10 @@ static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t c } if (likely(res > 0)) { ctx->count += ucnt; @@ -312014,10 +375619,77 @@ index 3627dd7d25db8..c0ffee99ad238 100644 spin_unlock_irq(&ctx->wqh.lock); diff --git a/fs/eventpoll.c b/fs/eventpoll.c -index 06f4c5ae1451e..cf326c53db0f7 100644 +index 06f4c5ae1451e..1ec1978255443 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c -@@ -1740,6 +1740,21 @@ static struct timespec64 *ep_timeout_to_timespec(struct timespec64 *to, long ms) +@@ -484,7 +484,8 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi) + */ + #ifdef CONFIG_DEBUG_LOCK_ALLOC + +-static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi) ++static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi, ++ unsigned pollflags) + { + struct eventpoll *ep_src; + unsigned long flags; +@@ -515,16 +516,17 @@ static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi) + } + spin_lock_irqsave_nested(&ep->poll_wait.lock, flags, nests); + ep->nests = nests + 1; +- wake_up_locked_poll(&ep->poll_wait, EPOLLIN); ++ wake_up_locked_poll(&ep->poll_wait, EPOLLIN | pollflags); + ep->nests = 0; + spin_unlock_irqrestore(&ep->poll_wait.lock, flags); + } + + #else + +-static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi) ++static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi, ++ unsigned pollflags) + { +- wake_up_poll(&ep->poll_wait, EPOLLIN); ++ wake_up_poll(&ep->poll_wait, EPOLLIN | pollflags); + } + + #endif +@@ -735,7 +737,7 @@ static void ep_free(struct eventpoll *ep) + + /* We need to release all tasks waiting for these file */ + if (waitqueue_active(&ep->poll_wait)) +- ep_poll_safewake(ep, NULL); ++ ep_poll_safewake(ep, NULL, 0); + + /* + * We need to lock this because we could be hit by +@@ -1201,7 +1203,7 @@ out_unlock: + + /* We have to call this outside the lock */ + if (pwake) +- ep_poll_safewake(ep, epi); ++ ep_poll_safewake(ep, epi, pollflags & EPOLL_URING_WAKE); + + if (!(epi->event.events & EPOLLEXCLUSIVE)) + ewake = 1; +@@ -1546,7 +1548,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, + + /* We have to call this outside the lock */ + if (pwake) +- ep_poll_safewake(ep, NULL); ++ ep_poll_safewake(ep, NULL, 0); + + return 0; + } +@@ -1622,7 +1624,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, + + /* We have to call this outside the lock */ + if (pwake) +- ep_poll_safewake(ep, NULL); ++ ep_poll_safewake(ep, NULL, 0); + + return 0; + } +@@ -1740,6 +1742,21 @@ static struct timespec64 *ep_timeout_to_timespec(struct timespec64 *to, long ms) return to; } @@ -312039,7 +375711,7 @@ index 06f4c5ae1451e..cf326c53db0f7 100644 /** * ep_poll - Retrieves ready events, and delivers them to the caller-supplied * event buffer. -@@ -1821,8 +1836,15 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, +@@ -1821,8 +1838,15 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, * normal wakeup path no need to call __remove_wait_queue() * explicitly, thus ep->lock is not taken, which halts the * event delivery. @@ -312056,7 +375728,7 @@ index 06f4c5ae1451e..cf326c53db0f7 100644 write_lock_irq(&ep->lock); /* diff --git a/fs/exec.c b/fs/exec.c -index a098c133d8d74..7d424337b4ec9 100644 +index a098c133d8d74..881390b44cfdc 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -494,8 +494,14 @@ static int bprm_stack_limits(struct linux_binprm *bprm) @@ -312075,6 +375747,20 @@ index a098c133d8d74..7d424337b4ec9 100644 if (limit <= ptr_size) return -E2BIG; limit -= ptr_size; +@@ -1192,11 +1198,11 @@ static int unshare_sighand(struct task_struct *me) + return -ENOMEM; + + refcount_set(&newsighand->count, 1); +- memcpy(newsighand->action, oldsighand->action, +- sizeof(newsighand->action)); + + write_lock_irq(&tasklist_lock); + spin_lock(&oldsighand->siglock); ++ memcpy(newsighand->action, oldsighand->action, ++ sizeof(newsighand->action)); + rcu_assign_pointer(me->sighand, newsighand); + spin_unlock(&oldsighand->siglock); + write_unlock_irq(&tasklist_lock); @@ -1292,7 +1298,10 @@ int begin_new_exec(struct linux_binprm * bprm) bprm->mm = NULL; @@ -312483,10 +376169,32 @@ index d8d580b609baa..02d82f8fe85d9 100644 kfree(sbi->s_debts); failed_mount: brelse(bh); +diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c +index a0fb0c4bdc7cd..f9a79053f03ad 100644 +--- a/fs/ext4/balloc.c ++++ b/fs/ext4/balloc.c +@@ -665,7 +665,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries) + * it's possible we've just missed a transaction commit here, + * so ignore the returned status + */ +- jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); ++ ext4_debug("%s: retrying operation after ENOSPC\n", sb->s_id); + (void) jbd2_journal_force_commit_nested(sbi->s_journal); + return 1; + } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h -index 3825195539d74..2d84030d7b7fc 100644 +index 3825195539d74..bc209f3033273 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h +@@ -559,7 +559,7 @@ enum { + * + * It's not paranoia if the Murphy's Law really *is* out to get you. :-) + */ +-#define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1 << EXT4_INODE_##FLAG)) ++#define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1U << EXT4_INODE_##FLAG)) + #define CHECK_FLAG_VALUE(FLAG) BUILD_BUG_ON(!TEST_FLAG_VALUE(FLAG)) + + static inline void ext4_check_flag_values(void) @@ -1435,12 +1435,6 @@ struct ext4_super_block { #ifdef __KERNEL__ @@ -312566,7 +376274,17 @@ index 3825195539d74..2d84030d7b7fc 100644 /* mballoc.c */ extern const struct seq_operations ext4_mb_seq_groups_ops; -@@ -3028,7 +3025,7 @@ extern int ext4_inode_attach_jinode(struct inode *inode); +@@ -2999,7 +2996,8 @@ int do_journal_get_write_access(handle_t *handle, struct inode *inode, + typedef enum { + EXT4_IGET_NORMAL = 0, + EXT4_IGET_SPECIAL = 0x0001, /* OK to iget a system inode */ +- EXT4_IGET_HANDLE = 0x0002 /* Inode # is from a handle */ ++ EXT4_IGET_HANDLE = 0x0002, /* Inode # is from a handle */ ++ EXT4_IGET_BAD = 0x0004 /* Allow to iget a bad inode */ + } ext4_iget_flags; + + extern struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, +@@ -3028,7 +3026,7 @@ extern int ext4_inode_attach_jinode(struct inode *inode); extern int ext4_can_truncate(struct inode *inode); extern int ext4_truncate(struct inode *); extern int ext4_break_layouts(struct inode *); @@ -312575,8 +376293,19 @@ index 3825195539d74..2d84030d7b7fc 100644 extern void ext4_set_inode_flags(struct inode *, bool init); extern int ext4_alloc_da_blocks(struct inode *inode); extern void ext4_set_aops(struct inode *inode); +@@ -3649,8 +3647,8 @@ extern void ext4_initialize_dirent_tail(struct buffer_head *bh, + unsigned int blocksize); + extern int ext4_handle_dirty_dirblock(handle_t *handle, struct inode *inode, + struct buffer_head *bh); +-extern int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name, +- struct inode *inode); ++extern int __ext4_unlink(struct inode *dir, const struct qstr *d_name, ++ struct inode *inode, struct dentry *dentry); + extern int __ext4_link(struct inode *dir, struct inode *inode, + struct dentry *dentry); + diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c -index 6def7339056db..3477a16d08aee 100644 +index 6def7339056db..8e1fb18f465ea 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -162,6 +162,8 @@ int __ext4_journal_ensure_credits(handle_t *handle, int check_cred, @@ -312588,8 +376317,18 @@ index 6def7339056db..3477a16d08aee 100644 if (jbd2_handle_buffer_credits(handle) >= check_cred && handle->h_revoke_credits >= revoke_cred) return 0; +@@ -265,8 +267,7 @@ int __ext4_forget(const char *where, unsigned int line, handle_t *handle, + trace_ext4_forget(inode, is_metadata, blocknr); + BUFFER_TRACE(bh, "enter"); + +- jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " +- "data mode %x\n", ++ ext4_debug("forgetting bh %p: is_metadata=%d, mode %o, data mode %x\n", + bh, is_metadata, inode->i_mode, + test_opt(inode->i_sb, DATA_FLAGS)); + diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c -index 0e02571f2f828..725607520e84c 100644 +index 0e02571f2f828..d3fae909fcbf8 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -136,15 +136,25 @@ int ext4_datasem_ensure_credits(handle_t *handle, struct inode *inode, @@ -313010,11 +376749,12 @@ index 0e02571f2f828..725607520e84c 100644 /* Let path point to the last extent */ path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, -@@ -5159,11 +5189,15 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, +@@ -5159,11 +5189,16 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, * till we reach stop. In case of right shift, iterator points to stop * and it is decreased till we reach start. */ +again: ++ ret = 0; if (SHIFT == SHIFT_LEFT) iterator = &start; else @@ -313026,7 +376766,7 @@ index 0e02571f2f828..725607520e84c 100644 /* * Its safe to start updating extents. Start and stop are unsigned, so * in case of right shift if extent with 0 block is reached, iterator -@@ -5192,6 +5226,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, +@@ -5192,24 +5227,35 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, } } @@ -313034,7 +376774,30 @@ index 0e02571f2f828..725607520e84c 100644 if (SHIFT == SHIFT_LEFT) { extent = EXT_LAST_EXTENT(path[depth].p_hdr); *iterator = le32_to_cpu(extent->ee_block) + -@@ -5210,6 +5245,9 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, + ext4_ext_get_actual_len(extent); + } else { + extent = EXT_FIRST_EXTENT(path[depth].p_hdr); +- if (le32_to_cpu(extent->ee_block) > 0) ++ if (le32_to_cpu(extent->ee_block) > start) + *iterator = le32_to_cpu(extent->ee_block) - 1; +- else +- /* Beginning is reached, end of the loop */ ++ else if (le32_to_cpu(extent->ee_block) == start) + iterator = NULL; +- /* Update path extent in case we need to stop */ +- while (le32_to_cpu(extent->ee_block) < start) ++ else { ++ extent = EXT_LAST_EXTENT(path[depth].p_hdr); ++ while (le32_to_cpu(extent->ee_block) >= start) ++ extent--; ++ ++ if (extent == EXT_LAST_EXTENT(path[depth].p_hdr)) ++ break; ++ + extent++; ++ iterator = NULL; ++ } + path[depth].p_ext = extent; } ret = ext4_ext_shift_path_extents(path, shift, inode, handle, SHIFT); @@ -313044,7 +376807,7 @@ index 0e02571f2f828..725607520e84c 100644 if (ret) break; } -@@ -5224,8 +5262,9 @@ out: +@@ -5224,8 +5270,9 @@ out: * This implements the fallocate's collapse range functionality for ext4 * Returns: 0 and non-zero on error. */ @@ -313055,7 +376818,7 @@ index 0e02571f2f828..725607520e84c 100644 struct super_block *sb = inode->i_sb; struct address_space *mapping = inode->i_mapping; ext4_lblk_t punch_start, punch_stop; -@@ -5277,6 +5316,10 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) +@@ -5277,6 +5324,10 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) /* Wait for existing dio to complete */ inode_dio_wait(inode); @@ -313066,7 +376829,7 @@ index 0e02571f2f828..725607520e84c 100644 /* * Prevent page faults from reinstantiating pages we have released from * page cache. -@@ -5316,7 +5359,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) +@@ -5316,7 +5367,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) ret = PTR_ERR(handle); goto out_mmap; } @@ -313075,7 +376838,7 @@ index 0e02571f2f828..725607520e84c 100644 down_write(&EXT4_I(inode)->i_data_sem); ext4_discard_preallocations(inode, 0); -@@ -5355,7 +5398,6 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) +@@ -5355,7 +5406,6 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) out_stop: ext4_journal_stop(handle); @@ -313083,7 +376846,7 @@ index 0e02571f2f828..725607520e84c 100644 out_mmap: filemap_invalidate_unlock(mapping); out_mutex: -@@ -5371,8 +5413,9 @@ out_mutex: +@@ -5371,8 +5421,9 @@ out_mutex: * by len bytes. * Returns 0 on success, error otherwise. */ @@ -313094,7 +376857,7 @@ index 0e02571f2f828..725607520e84c 100644 struct super_block *sb = inode->i_sb; struct address_space *mapping = inode->i_mapping; handle_t *handle; -@@ -5429,6 +5472,10 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) +@@ -5429,6 +5480,10 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) /* Wait for existing dio to complete */ inode_dio_wait(inode); @@ -313105,7 +376868,7 @@ index 0e02571f2f828..725607520e84c 100644 /* * Prevent page faults from reinstantiating pages we have released from * page cache. -@@ -5457,7 +5504,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) +@@ -5457,7 +5512,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) ret = PTR_ERR(handle); goto out_mmap; } @@ -313114,7 +376877,7 @@ index 0e02571f2f828..725607520e84c 100644 /* Expand file to avoid data loss if there is error while shifting */ inode->i_size += len; -@@ -5532,7 +5579,6 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) +@@ -5532,7 +5587,6 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) out_stop: ext4_journal_stop(handle); @@ -313122,7 +376885,22 @@ index 0e02571f2f828..725607520e84c 100644 out_mmap: filemap_invalidate_unlock(mapping); out_mutex: -@@ -6072,11 +6118,15 @@ int ext4_ext_clear_bb(struct inode *inode) +@@ -5756,6 +5810,14 @@ int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu) + struct ext4_extent *extent; + ext4_lblk_t first_lblk, first_lclu, last_lclu; + ++ /* ++ * if data can be stored inline, the logical cluster isn't ++ * mapped - no physical clusters have been allocated, and the ++ * file has no extents ++ */ ++ if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) ++ return 0; ++ + /* search for the extent closest to the first block in the cluster */ + path = ext4_find_extent(inode, EXT4_C2B(sbi, lclu), NULL, 0); + if (IS_ERR(path)) { +@@ -6072,11 +6134,15 @@ int ext4_ext_clear_bb(struct inode *inode) ext4_mb_mark_bb(inode->i_sb, path[j].p_block, 1, 0); @@ -313138,8 +376916,29 @@ index 0e02571f2f828..725607520e84c 100644 } cur = cur + map.m_len; } +diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c +index 9a3a8996aacf7..aa99a3659edfc 100644 +--- a/fs/ext4/extents_status.c ++++ b/fs/ext4/extents_status.c +@@ -1372,7 +1372,7 @@ retry: + if (count_reserved) + count_rsvd(inode, lblk, orig_es.es_len - len1 - len2, + &orig_es, &rc); +- goto out; ++ goto out_get_reserved; + } + + if (len1 > 0) { +@@ -1414,6 +1414,7 @@ retry: + } + } + ++out_get_reserved: + if (count_reserved) + *reserved = get_rsvd(inode, end, es, &rc); + out: diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c -index 8ea5a81e65548..be3f8ce98962f 100644 +index 8ea5a81e65548..a8d0a8081a1da 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -65,21 +65,11 @@ @@ -313265,25 +377064,47 @@ index 8ea5a81e65548..be3f8ce98962f 100644 &sbi->s_fc_q[FC_Q_STAGING] : &sbi->s_fc_q[FC_Q_MAIN]); spin_unlock(&sbi->s_fc_lock); -@@ -437,7 +404,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update) +@@ -432,25 +399,34 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update) + struct __track_dentry_update_args *dentry_update = + (struct __track_dentry_update_args *)arg; + struct dentry *dentry = dentry_update->dentry; +- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); ++ struct inode *dir = dentry->d_parent->d_inode; ++ struct super_block *sb = inode->i_sb; ++ struct ext4_sb_info *sbi = EXT4_SB(sb); + mutex_unlock(&ei->i_fc_lock); ++ ++ if (IS_ENCRYPTED(dir)) { ++ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_ENCRYPTED_FILENAME, ++ NULL); ++ mutex_lock(&ei->i_fc_lock); ++ return -EOPNOTSUPP; ++ } ++ node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS); if (!node) { - ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM); -+ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL); ++ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_NOMEM, NULL); mutex_lock(&ei->i_fc_lock); return -ENOMEM; } -@@ -450,7 +417,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update) + + node->fcd_op = dentry_update->op; +- node->fcd_parent = dentry->d_parent->d_inode->i_ino; ++ node->fcd_parent = dir->i_ino; + node->fcd_ino = inode->i_ino; + if (dentry->d_name.len > DNAME_INLINE_LEN) { + node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS); if (!node->fcd_name.name) { kmem_cache_free(ext4_fc_dentry_cachep, node); - ext4_fc_mark_ineligible(inode->i_sb, +- ext4_fc_mark_ineligible(inode->i_sb, - EXT4_FC_REASON_NOMEM); -+ EXT4_FC_REASON_NOMEM, NULL); ++ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_NOMEM, NULL); mutex_lock(&ei->i_fc_lock); return -ENOMEM; } -@@ -464,7 +431,8 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update) +@@ -464,7 +440,8 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update) node->fcd_name.len = dentry->d_name.len; spin_lock(&sbi->s_fc_lock); @@ -313293,7 +377114,7 @@ index 8ea5a81e65548..be3f8ce98962f 100644 list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_STAGING]); else -@@ -552,7 +520,7 @@ void ext4_fc_track_inode(handle_t *handle, struct inode *inode) +@@ -552,7 +529,7 @@ void ext4_fc_track_inode(handle_t *handle, struct inode *inode) if (ext4_should_journal_data(inode)) { ext4_fc_mark_ineligible(inode->i_sb, @@ -313302,21 +377123,207 @@ index 8ea5a81e65548..be3f8ce98962f 100644 return; } -@@ -826,22 +794,25 @@ static int ext4_fc_write_inode(struct inode *inode, u32 *crc) +@@ -627,6 +604,15 @@ static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail) + + /* Ext4 commit path routines */ + ++/* memcpy to fc reserved space and update CRC */ ++static void *ext4_fc_memcpy(struct super_block *sb, void *dst, const void *src, ++ int len, u32 *crc) ++{ ++ if (crc) ++ *crc = ext4_chksum(EXT4_SB(sb), *crc, src, len); ++ return memcpy(dst, src, len); ++} ++ + /* memzero and update CRC */ + static void *ext4_fc_memzero(struct super_block *sb, void *dst, int len, + u32 *crc) +@@ -652,62 +638,59 @@ static void *ext4_fc_memzero(struct super_block *sb, void *dst, int len, + */ + static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc) + { +- struct ext4_fc_tl *tl; ++ struct ext4_fc_tl tl; + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct buffer_head *bh; + int bsize = sbi->s_journal->j_blocksize; + int ret, off = sbi->s_fc_bytes % bsize; +- int pad_len; ++ int remaining; ++ u8 *dst; + + /* +- * After allocating len, we should have space at least for a 0 byte +- * padding. ++ * If 'len' is too long to fit in any block alongside a PAD tlv, then we ++ * cannot fulfill the request. + */ +- if (len + sizeof(struct ext4_fc_tl) > bsize) ++ if (len > bsize - EXT4_FC_TAG_BASE_LEN) + return NULL; + +- if (bsize - off - 1 > len + sizeof(struct ext4_fc_tl)) { +- /* +- * Only allocate from current buffer if we have enough space for +- * this request AND we have space to add a zero byte padding. +- */ +- if (!sbi->s_fc_bh) { +- ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh); +- if (ret) +- return NULL; +- sbi->s_fc_bh = bh; +- } ++ if (!sbi->s_fc_bh) { ++ ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh); ++ if (ret) ++ return NULL; ++ sbi->s_fc_bh = bh; ++ } ++ dst = sbi->s_fc_bh->b_data + off; ++ ++ /* ++ * Allocate the bytes in the current block if we can do so while still ++ * leaving enough space for a PAD tlv. ++ */ ++ remaining = bsize - EXT4_FC_TAG_BASE_LEN - off; ++ if (len <= remaining) { + sbi->s_fc_bytes += len; +- return sbi->s_fc_bh->b_data + off; ++ return dst; + } +- /* Need to add PAD tag */ +- tl = (struct ext4_fc_tl *)(sbi->s_fc_bh->b_data + off); +- tl->fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD); +- pad_len = bsize - off - 1 - sizeof(struct ext4_fc_tl); +- tl->fc_len = cpu_to_le16(pad_len); +- if (crc) +- *crc = ext4_chksum(sbi, *crc, tl, sizeof(*tl)); +- if (pad_len > 0) +- ext4_fc_memzero(sb, tl + 1, pad_len, crc); ++ ++ /* ++ * Else, terminate the current block with a PAD tlv, then allocate a new ++ * block and allocate the bytes at the start of that new block. ++ */ ++ ++ tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD); ++ tl.fc_len = cpu_to_le16(remaining); ++ ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc); ++ ext4_fc_memzero(sb, dst + EXT4_FC_TAG_BASE_LEN, remaining, crc); ++ + ext4_fc_submit_bh(sb, false); + + ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh); + if (ret) + return NULL; + sbi->s_fc_bh = bh; +- sbi->s_fc_bytes = (sbi->s_fc_bytes / bsize + 1) * bsize + len; ++ sbi->s_fc_bytes += bsize - off + len; + return sbi->s_fc_bh->b_data; + } + +-/* memcpy to fc reserved space and update CRC */ +-static void *ext4_fc_memcpy(struct super_block *sb, void *dst, const void *src, +- int len, u32 *crc) +-{ +- if (crc) +- *crc = ext4_chksum(EXT4_SB(sb), *crc, src, len); +- return memcpy(dst, src, len); +-} +- + /* + * Complete a fast commit by writing tail tag. + * +@@ -728,23 +711,25 @@ static int ext4_fc_write_tail(struct super_block *sb, u32 crc) + * ext4_fc_reserve_space takes care of allocating an extra block if + * there's no enough space on this block for accommodating this tail. + */ +- dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(tail), &crc); ++ dst = ext4_fc_reserve_space(sb, EXT4_FC_TAG_BASE_LEN + sizeof(tail), &crc); + if (!dst) + return -ENOSPC; + + off = sbi->s_fc_bytes % bsize; + + tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_TAIL); +- tl.fc_len = cpu_to_le16(bsize - off - 1 + sizeof(struct ext4_fc_tail)); ++ tl.fc_len = cpu_to_le16(bsize - off + sizeof(struct ext4_fc_tail)); + sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize); + +- ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), &crc); +- dst += sizeof(tl); ++ ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, &crc); ++ dst += EXT4_FC_TAG_BASE_LEN; + tail.fc_tid = cpu_to_le32(sbi->s_journal->j_running_transaction->t_tid); + ext4_fc_memcpy(sb, dst, &tail.fc_tid, sizeof(tail.fc_tid), &crc); + dst += sizeof(tail.fc_tid); + tail.fc_crc = cpu_to_le32(crc); + ext4_fc_memcpy(sb, dst, &tail.fc_crc, sizeof(tail.fc_crc), NULL); ++ dst += sizeof(tail.fc_crc); ++ memset(dst, 0, bsize - off); /* Don't leak uninitialized memory. */ + + ext4_fc_submit_bh(sb, true); + +@@ -761,15 +746,15 @@ static bool ext4_fc_add_tlv(struct super_block *sb, u16 tag, u16 len, u8 *val, + struct ext4_fc_tl tl; + u8 *dst; + +- dst = ext4_fc_reserve_space(sb, sizeof(tl) + len, crc); ++ dst = ext4_fc_reserve_space(sb, EXT4_FC_TAG_BASE_LEN + len, crc); + if (!dst) + return false; + + tl.fc_tag = cpu_to_le16(tag); + tl.fc_len = cpu_to_le16(len); + +- ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc); +- ext4_fc_memcpy(sb, dst + sizeof(tl), val, len, crc); ++ ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc); ++ ext4_fc_memcpy(sb, dst + EXT4_FC_TAG_BASE_LEN, val, len, crc); + + return true; + } +@@ -781,8 +766,8 @@ static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc, + struct ext4_fc_dentry_info fcd; + struct ext4_fc_tl tl; + int dlen = fc_dentry->fcd_name.len; +- u8 *dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(fcd) + dlen, +- crc); ++ u8 *dst = ext4_fc_reserve_space(sb, ++ EXT4_FC_TAG_BASE_LEN + sizeof(fcd) + dlen, crc); + + if (!dst) + return false; +@@ -791,8 +776,8 @@ static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc, + fcd.fc_ino = cpu_to_le32(fc_dentry->fcd_ino); + tl.fc_tag = cpu_to_le16(fc_dentry->fcd_op); + tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen); +- ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc); +- dst += sizeof(tl); ++ ext4_fc_memcpy(sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc); ++ dst += EXT4_FC_TAG_BASE_LEN; + ext4_fc_memcpy(sb, dst, &fcd, sizeof(fcd), crc); + dst += sizeof(fcd); + ext4_fc_memcpy(sb, dst, fc_dentry->fcd_name.name, dlen, crc); +@@ -826,22 +811,25 @@ static int ext4_fc_write_inode(struct inode *inode, u32 *crc) tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE); tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino)); + ret = -ECANCELED; dst = ext4_fc_reserve_space(inode->i_sb, - sizeof(tl) + inode_len + sizeof(fc_inode.fc_ino), crc); +- sizeof(tl) + inode_len + sizeof(fc_inode.fc_ino), crc); ++ EXT4_FC_TAG_BASE_LEN + inode_len + sizeof(fc_inode.fc_ino), crc); if (!dst) - return -ECANCELED; + goto err; - if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, sizeof(tl), crc)) +- if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, sizeof(tl), crc)) - return -ECANCELED; +- dst += sizeof(tl); ++ if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, EXT4_FC_TAG_BASE_LEN, crc)) + goto err; - dst += sizeof(tl); ++ dst += EXT4_FC_TAG_BASE_LEN; if (!ext4_fc_memcpy(inode->i_sb, dst, &fc_inode, sizeof(fc_inode), crc)) - return -ECANCELED; + goto err; @@ -313334,7 +377341,18 @@ index 8ea5a81e65548..be3f8ce98962f 100644 } /* -@@ -928,7 +899,6 @@ static int ext4_fc_submit_inode_data_all(journal_t *journal) +@@ -869,8 +857,8 @@ static int ext4_fc_write_inode_data(struct inode *inode, u32 *crc) + mutex_unlock(&ei->i_fc_lock); + + cur_lblk_off = old_blk_size; +- jbd_debug(1, "%s: will try writing %d to %d for inode %ld\n", +- __func__, cur_lblk_off, new_blk_size, inode->i_ino); ++ ext4_debug("will try writing %d to %d for inode %ld\n", ++ cur_lblk_off, new_blk_size, inode->i_ino); + + while (cur_lblk_off <= new_blk_size) { + map.m_lblk = cur_lblk_off; +@@ -928,7 +916,6 @@ static int ext4_fc_submit_inode_data_all(journal_t *journal) int ret = 0; spin_lock(&sbi->s_fc_lock); @@ -313342,7 +377360,7 @@ index 8ea5a81e65548..be3f8ce98962f 100644 list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING); while (atomic_read(&ei->i_fc_updates)) { -@@ -1121,6 +1091,32 @@ out: +@@ -1121,6 +1108,32 @@ out: return ret; } @@ -313351,7 +377369,7 @@ index 8ea5a81e65548..be3f8ce98962f 100644 +{ + struct ext4_fc_stats *stats = &EXT4_SB(sb)->s_fc_stats; + -+ jbd_debug(1, "Fast commit ended with status = %d", status); ++ ext4_debug("Fast commit ended with status = %d", status); + if (status == EXT4_FC_STATUS_OK) { + stats->fc_num_commits++; + stats->fc_numblks += nblks; @@ -313375,7 +377393,7 @@ index 8ea5a81e65548..be3f8ce98962f 100644 /* * The main commit entry point. Performs a fast commit for transaction * commit_tid if needed. If it's not possible to perform a fast commit -@@ -1133,18 +1129,15 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid) +@@ -1133,18 +1146,15 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid) struct ext4_sb_info *sbi = EXT4_SB(sb); int nblks = 0, ret, bsize = journal->j_blocksize; int subtid = atomic_read(&sbi->s_fc_subtid); @@ -313397,7 +377415,7 @@ index 8ea5a81e65548..be3f8ce98962f 100644 restart_fc: ret = jbd2_fc_begin_commit(journal, commit_tid); -@@ -1153,74 +1146,59 @@ restart_fc: +@@ -1153,74 +1163,59 @@ restart_fc: if (atomic_read(&sbi->s_fc_subtid) <= subtid && commit_tid > journal->j_commit_sequence) goto restart_fc; @@ -313505,7 +377523,7 @@ index 8ea5a81e65548..be3f8ce98962f 100644 { struct super_block *sb = journal->j_private; struct ext4_sb_info *sbi = EXT4_SB(sb); -@@ -1238,7 +1216,8 @@ static void ext4_fc_cleanup(journal_t *journal, int full) +@@ -1238,7 +1233,8 @@ static void ext4_fc_cleanup(journal_t *journal, int full) list_del_init(&iter->i_fc_list); ext4_clear_inode_state(&iter->vfs_inode, EXT4_STATE_FC_COMMITTING); @@ -313515,7 +377533,7 @@ index 8ea5a81e65548..be3f8ce98962f 100644 /* Make sure EXT4_STATE_FC_COMMITTING bit is clear */ smp_mb(); #if (BITS_PER_LONG < 64) -@@ -1267,8 +1246,10 @@ static void ext4_fc_cleanup(journal_t *journal, int full) +@@ -1267,8 +1263,10 @@ static void ext4_fc_cleanup(journal_t *journal, int full) list_splice_init(&sbi->s_fc_q[FC_Q_STAGING], &sbi->s_fc_q[FC_Q_MAIN]); @@ -313528,7 +377546,99 @@ index 8ea5a81e65548..be3f8ce98962f 100644 if (full) sbi->s_fc_bytes = 0; -@@ -1433,14 +1414,17 @@ static int ext4_fc_record_modified_inode(struct super_block *sb, int ino) +@@ -1285,7 +1283,7 @@ struct dentry_info_args { + }; + + static inline void tl_to_darg(struct dentry_info_args *darg, +- struct ext4_fc_tl *tl, u8 *val) ++ struct ext4_fc_tl *tl, u8 *val) + { + struct ext4_fc_dentry_info fcd; + +@@ -1294,8 +1292,14 @@ static inline void tl_to_darg(struct dentry_info_args *darg, + darg->parent_ino = le32_to_cpu(fcd.fc_parent_ino); + darg->ino = le32_to_cpu(fcd.fc_ino); + darg->dname = val + offsetof(struct ext4_fc_dentry_info, fc_dname); +- darg->dname_len = le16_to_cpu(tl->fc_len) - +- sizeof(struct ext4_fc_dentry_info); ++ darg->dname_len = tl->fc_len - sizeof(struct ext4_fc_dentry_info); ++} ++ ++static inline void ext4_fc_get_tl(struct ext4_fc_tl *tl, u8 *val) ++{ ++ memcpy(tl, val, EXT4_FC_TAG_BASE_LEN); ++ tl->fc_len = le16_to_cpu(tl->fc_len); ++ tl->fc_tag = le16_to_cpu(tl->fc_tag); + } + + /* Unlink replay function */ +@@ -1317,19 +1321,19 @@ static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl, + inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); + + if (IS_ERR(inode)) { +- jbd_debug(1, "Inode %d not found", darg.ino); ++ ext4_debug("Inode %d not found", darg.ino); + return 0; + } + + old_parent = ext4_iget(sb, darg.parent_ino, + EXT4_IGET_NORMAL); + if (IS_ERR(old_parent)) { +- jbd_debug(1, "Dir with inode %d not found", darg.parent_ino); ++ ext4_debug("Dir with inode %d not found", darg.parent_ino); + iput(inode); + return 0; + } + +- ret = __ext4_unlink(NULL, old_parent, &entry, inode); ++ ret = __ext4_unlink(old_parent, &entry, inode, NULL); + /* -ENOENT ok coz it might not exist anymore. */ + if (ret == -ENOENT) + ret = 0; +@@ -1349,21 +1353,21 @@ static int ext4_fc_replay_link_internal(struct super_block *sb, + + dir = ext4_iget(sb, darg->parent_ino, EXT4_IGET_NORMAL); + if (IS_ERR(dir)) { +- jbd_debug(1, "Dir with inode %d not found.", darg->parent_ino); ++ ext4_debug("Dir with inode %d not found.", darg->parent_ino); + dir = NULL; + goto out; + } + + dentry_dir = d_obtain_alias(dir); + if (IS_ERR(dentry_dir)) { +- jbd_debug(1, "Failed to obtain dentry"); ++ ext4_debug("Failed to obtain dentry"); + dentry_dir = NULL; + goto out; + } + + dentry_inode = d_alloc(dentry_dir, &qstr_dname); + if (!dentry_inode) { +- jbd_debug(1, "Inode dentry not created."); ++ ext4_debug("Inode dentry not created."); + ret = -ENOMEM; + goto out; + } +@@ -1376,7 +1380,7 @@ static int ext4_fc_replay_link_internal(struct super_block *sb, + * could complete. + */ + if (ret && ret != -EEXIST) { +- jbd_debug(1, "Failed to link\n"); ++ ext4_debug("Failed to link\n"); + goto out; + } + +@@ -1410,7 +1414,7 @@ static int ext4_fc_replay_link(struct super_block *sb, struct ext4_fc_tl *tl, + + inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); + if (IS_ERR(inode)) { +- jbd_debug(1, "Inode not found."); ++ ext4_debug("Inode not found."); + return 0; + } + +@@ -1433,14 +1437,17 @@ static int ext4_fc_record_modified_inode(struct super_block *sb, int ino) if (state->fc_modified_inodes[i] == ino) return 0; if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) { @@ -313552,7 +377662,16 @@ index 8ea5a81e65548..be3f8ce98962f 100644 } state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino; return 0; -@@ -1472,7 +1456,9 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl, +@@ -1457,7 +1464,7 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl, + struct ext4_inode *raw_fc_inode; + struct inode *inode = NULL; + struct ext4_iloc iloc; +- int inode_len, ino, ret, tag = le16_to_cpu(tl->fc_tag); ++ int inode_len, ino, ret, tag = tl->fc_tag; + struct ext4_extent_header *eh; + + memcpy(&fc_inode, val, sizeof(fc_inode)); +@@ -1472,7 +1479,9 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl, } inode = NULL; @@ -313563,7 +377682,43 @@ index 8ea5a81e65548..be3f8ce98962f 100644 raw_fc_inode = (struct ext4_inode *) (val + offsetof(struct ext4_fc_inode, fc_raw_inode)); -@@ -1603,26 +1589,36 @@ out: +@@ -1480,7 +1489,7 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl, + if (ret) + goto out; + +- inode_len = le16_to_cpu(tl->fc_len) - sizeof(struct ext4_fc_inode); ++ inode_len = tl->fc_len - sizeof(struct ext4_fc_inode); + raw_inode = ext4_raw_inode(&iloc); + + memcpy(raw_inode, raw_fc_inode, offsetof(struct ext4_inode, i_block)); +@@ -1515,7 +1524,7 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl, + /* Given that we just wrote the inode on disk, this SHOULD succeed. */ + inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL); + if (IS_ERR(inode)) { +- jbd_debug(1, "Inode not found."); ++ ext4_debug("Inode not found."); + return -EFSCORRUPTED; + } + +@@ -1568,7 +1577,7 @@ static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl, + + inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); + if (IS_ERR(inode)) { +- jbd_debug(1, "inode %d not found.", darg.ino); ++ ext4_debug("inode %d not found.", darg.ino); + inode = NULL; + ret = -EINVAL; + goto out; +@@ -1581,7 +1590,7 @@ static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl, + */ + dir = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL); + if (IS_ERR(dir)) { +- jbd_debug(1, "Dir %d not found.", darg.ino); ++ ext4_debug("Dir %d not found.", darg.ino); + goto out; + } + ret = ext4_init_new_dir(NULL, dir, inode); +@@ -1603,26 +1612,36 @@ out: } /* @@ -313611,7 +377766,7 @@ index 8ea5a81e65548..be3f8ce98962f 100644 } region = &state->fc_regions[state->fc_regions_used++]; region->ino = ino; -@@ -1630,6 +1626,9 @@ static int ext4_fc_record_regions(struct super_block *sb, int ino, +@@ -1630,6 +1649,9 @@ static int ext4_fc_record_regions(struct super_block *sb, int ino, region->pblk = pblk; region->len = len; @@ -313621,7 +377776,13 @@ index 8ea5a81e65548..be3f8ce98962f 100644 return 0; } -@@ -1661,6 +1660,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb, +@@ -1656,11 +1678,13 @@ static int ext4_fc_replay_add_range(struct super_block *sb, + + inode = ext4_iget(sb, le32_to_cpu(fc_add_ex.fc_ino), EXT4_IGET_NORMAL); + if (IS_ERR(inode)) { +- jbd_debug(1, "Inode not found."); ++ ext4_debug("Inode not found."); + return 0; } ret = ext4_fc_record_modified_inode(sb, inode->i_ino); @@ -313630,7 +377791,16 @@ index 8ea5a81e65548..be3f8ce98962f 100644 start = le32_to_cpu(ex->ee_block); start_pblk = ext4_ext_pblock(ex); -@@ -1678,18 +1679,14 @@ static int ext4_fc_replay_add_range(struct super_block *sb, +@@ -1668,7 +1692,7 @@ static int ext4_fc_replay_add_range(struct super_block *sb, + + cur = start; + remaining = len; +- jbd_debug(1, "ADD_RANGE, lblk %d, pblk %lld, len %d, unwritten %d, inode %ld\n", ++ ext4_debug("ADD_RANGE, lblk %d, pblk %lld, len %d, unwritten %d, inode %ld\n", + start, start_pblk, len, ext4_ext_is_unwritten(ex), + inode->i_ino); + +@@ -1678,18 +1702,14 @@ static int ext4_fc_replay_add_range(struct super_block *sb, map.m_pblk = 0; ret = ext4_map_blocks(NULL, inode, &map, 0); @@ -313653,7 +377823,7 @@ index 8ea5a81e65548..be3f8ce98962f 100644 memset(&newex, 0, sizeof(newex)); newex.ee_block = cpu_to_le32(cur); ext4_ext_store_pblock( -@@ -1703,10 +1700,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb, +@@ -1703,10 +1723,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb, up_write((&EXT4_I(inode)->i_data_sem)); ext4_ext_drop_refs(path); kfree(path); @@ -313666,7 +377836,7 @@ index 8ea5a81e65548..be3f8ce98962f 100644 goto next; } -@@ -1719,10 +1714,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb, +@@ -1719,10 +1737,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb, ret = ext4_ext_replay_update_ex(inode, cur, map.m_len, ext4_ext_is_unwritten(ex), start_pblk + cur - start); @@ -313679,7 +377849,13 @@ index 8ea5a81e65548..be3f8ce98962f 100644 /* * Mark the old blocks as free since they aren't used * anymore. We maintain an array of all the modified -@@ -1742,10 +1735,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb, +@@ -1737,15 +1753,13 @@ static int ext4_fc_replay_add_range(struct super_block *sb, + } + + /* Range is mapped and needs a state change */ +- jbd_debug(1, "Converting from %ld to %d %lld", ++ ext4_debug("Converting from %ld to %d %lld", + map.m_flags & EXT4_MAP_UNWRITTEN, ext4_ext_is_unwritten(ex), map.m_pblk); ret = ext4_ext_replay_update_ex(inode, cur, map.m_len, ext4_ext_is_unwritten(ex), map.m_pblk); @@ -313692,7 +377868,7 @@ index 8ea5a81e65548..be3f8ce98962f 100644 /* * We may have split the extent tree while toggling the state. * Try to shrink the extent tree now. -@@ -1757,6 +1748,7 @@ next: +@@ -1757,6 +1771,7 @@ next: } ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >> sb->s_blocksize_bits); @@ -313700,16 +377876,25 @@ index 8ea5a81e65548..be3f8ce98962f 100644 iput(inode); return 0; } -@@ -1786,6 +1778,8 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl, +@@ -1781,13 +1796,15 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl, + + inode = ext4_iget(sb, le32_to_cpu(lrange.fc_ino), EXT4_IGET_NORMAL); + if (IS_ERR(inode)) { +- jbd_debug(1, "Inode %d not found", le32_to_cpu(lrange.fc_ino)); ++ ext4_debug("Inode %d not found", le32_to_cpu(lrange.fc_ino)); + return 0; } ret = ext4_fc_record_modified_inode(sb, inode->i_ino); + if (ret) + goto out; - jbd_debug(1, "DEL_RANGE, inode %ld, lblk %d, len %d\n", +- jbd_debug(1, "DEL_RANGE, inode %ld, lblk %d, len %d\n", ++ ext4_debug("DEL_RANGE, inode %ld, lblk %d, len %d\n", inode->i_ino, le32_to_cpu(lrange.fc_lblk), -@@ -1795,10 +1789,8 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl, + le32_to_cpu(lrange.fc_len)); + while (remaining > 0) { +@@ -1795,10 +1812,8 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl, map.m_len = remaining; ret = ext4_map_blocks(NULL, inode, &map, 0); @@ -313722,7 +377907,7 @@ index 8ea5a81e65548..be3f8ce98962f 100644 if (ret > 0) { remaining -= ret; cur += ret; -@@ -1809,16 +1801,18 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl, +@@ -1809,16 +1824,18 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl, } } @@ -313746,7 +377931,84 @@ index 8ea5a81e65548..be3f8ce98962f 100644 return 0; } -@@ -1970,7 +1964,7 @@ static int ext4_fc_replay_scan(journal_t *journal, +@@ -1836,7 +1853,7 @@ static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb) + inode = ext4_iget(sb, state->fc_modified_inodes[i], + EXT4_IGET_NORMAL); + if (IS_ERR(inode)) { +- jbd_debug(1, "Inode %d not found.", ++ ext4_debug("Inode %d not found.", + state->fc_modified_inodes[i]); + continue; + } +@@ -1902,6 +1919,33 @@ void ext4_fc_replay_cleanup(struct super_block *sb) + kfree(sbi->s_fc_replay_state.fc_modified_inodes); + } + ++static bool ext4_fc_value_len_isvalid(struct ext4_sb_info *sbi, ++ int tag, int len) ++{ ++ switch (tag) { ++ case EXT4_FC_TAG_ADD_RANGE: ++ return len == sizeof(struct ext4_fc_add_range); ++ case EXT4_FC_TAG_DEL_RANGE: ++ return len == sizeof(struct ext4_fc_del_range); ++ case EXT4_FC_TAG_CREAT: ++ case EXT4_FC_TAG_LINK: ++ case EXT4_FC_TAG_UNLINK: ++ len -= sizeof(struct ext4_fc_dentry_info); ++ return len >= 1 && len <= EXT4_NAME_LEN; ++ case EXT4_FC_TAG_INODE: ++ len -= sizeof(struct ext4_fc_inode); ++ return len >= EXT4_GOOD_OLD_INODE_SIZE && ++ len <= sbi->s_inode_size; ++ case EXT4_FC_TAG_PAD: ++ return true; /* padding can have any length */ ++ case EXT4_FC_TAG_TAIL: ++ return len >= sizeof(struct ext4_fc_tail); ++ case EXT4_FC_TAG_HEAD: ++ return len == sizeof(struct ext4_fc_head); ++ } ++ return false; ++} ++ + /* + * Recovery Scan phase handler + * +@@ -1937,7 +1981,7 @@ static int ext4_fc_replay_scan(journal_t *journal, + state = &sbi->s_fc_replay_state; + + start = (u8 *)bh->b_data; +- end = (__u8 *)bh->b_data + journal->j_blocksize - 1; ++ end = start + journal->j_blocksize; + + if (state->fc_replay_expected_off == 0) { + state->fc_cur_tag = 0; +@@ -1958,19 +2002,26 @@ static int ext4_fc_replay_scan(journal_t *journal, + } + + state->fc_replay_expected_off++; +- for (cur = start; cur < end; cur = cur + sizeof(tl) + le16_to_cpu(tl.fc_len)) { +- memcpy(&tl, cur, sizeof(tl)); +- val = cur + sizeof(tl); +- jbd_debug(3, "Scan phase, tag:%s, blk %lld\n", +- tag2str(le16_to_cpu(tl.fc_tag)), bh->b_blocknr); +- switch (le16_to_cpu(tl.fc_tag)) { ++ for (cur = start; cur <= end - EXT4_FC_TAG_BASE_LEN; ++ cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) { ++ ext4_fc_get_tl(&tl, cur); ++ val = cur + EXT4_FC_TAG_BASE_LEN; ++ if (tl.fc_len > end - val || ++ !ext4_fc_value_len_isvalid(sbi, tl.fc_tag, tl.fc_len)) { ++ ret = state->fc_replay_num_tags ? ++ JBD2_FC_REPLAY_STOP : -ECANCELED; ++ goto out_err; ++ } ++ ext4_debug("Scan phase, tag:%s, blk %lld\n", ++ tag2str(tl.fc_tag), bh->b_blocknr); ++ switch (tl.fc_tag) { + case EXT4_FC_TAG_ADD_RANGE: + memcpy(&ext, val, sizeof(ext)); + ex = (struct ext4_extent *)&ext.fc_ex; ret = ext4_fc_record_regions(sb, le32_to_cpu(ext.fc_ino), le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex), @@ -313755,7 +378017,124 @@ index 8ea5a81e65548..be3f8ce98962f 100644 if (ret < 0) break; ret = JBD2_FC_REPLAY_CONTINUE; -@@ -2166,7 +2160,7 @@ int ext4_fc_info_show(struct seq_file *seq, void *v) +@@ -1983,13 +2034,13 @@ static int ext4_fc_replay_scan(journal_t *journal, + case EXT4_FC_TAG_PAD: + state->fc_cur_tag++; + state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur, +- sizeof(tl) + le16_to_cpu(tl.fc_len)); ++ EXT4_FC_TAG_BASE_LEN + tl.fc_len); + break; + case EXT4_FC_TAG_TAIL: + state->fc_cur_tag++; + memcpy(&tail, val, sizeof(tail)); + state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur, +- sizeof(tl) + ++ EXT4_FC_TAG_BASE_LEN + + offsetof(struct ext4_fc_tail, + fc_crc)); + if (le32_to_cpu(tail.fc_tid) == expected_tid && +@@ -2016,7 +2067,7 @@ static int ext4_fc_replay_scan(journal_t *journal, + } + state->fc_cur_tag++; + state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur, +- sizeof(tl) + le16_to_cpu(tl.fc_len)); ++ EXT4_FC_TAG_BASE_LEN + tl.fc_len); + break; + default: + ret = state->fc_replay_num_tags ? +@@ -2056,7 +2107,7 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh, + sbi->s_mount_state |= EXT4_FC_REPLAY; + } + if (!sbi->s_fc_replay_state.fc_replay_num_tags) { +- jbd_debug(1, "Replay stops\n"); ++ ext4_debug("Replay stops\n"); + ext4_fc_set_bitmaps_and_counters(sb); + return 0; + } +@@ -2069,21 +2120,22 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh, + #endif + + start = (u8 *)bh->b_data; +- end = (__u8 *)bh->b_data + journal->j_blocksize - 1; ++ end = start + journal->j_blocksize; + +- for (cur = start; cur < end; cur = cur + sizeof(tl) + le16_to_cpu(tl.fc_len)) { +- memcpy(&tl, cur, sizeof(tl)); +- val = cur + sizeof(tl); ++ for (cur = start; cur <= end - EXT4_FC_TAG_BASE_LEN; ++ cur = cur + EXT4_FC_TAG_BASE_LEN + tl.fc_len) { ++ ext4_fc_get_tl(&tl, cur); ++ val = cur + EXT4_FC_TAG_BASE_LEN; + + if (state->fc_replay_num_tags == 0) { + ret = JBD2_FC_REPLAY_STOP; + ext4_fc_set_bitmaps_and_counters(sb); + break; + } +- jbd_debug(3, "Replay phase, tag:%s\n", +- tag2str(le16_to_cpu(tl.fc_tag))); ++ ++ ext4_debug("Replay phase, tag:%s\n", tag2str(tl.fc_tag)); + state->fc_replay_num_tags--; +- switch (le16_to_cpu(tl.fc_tag)) { ++ switch (tl.fc_tag) { + case EXT4_FC_TAG_LINK: + ret = ext4_fc_replay_link(sb, &tl, val); + break; +@@ -2104,19 +2156,18 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh, + break; + case EXT4_FC_TAG_PAD: + trace_ext4_fc_replay(sb, EXT4_FC_TAG_PAD, 0, +- le16_to_cpu(tl.fc_len), 0); ++ tl.fc_len, 0); + break; + case EXT4_FC_TAG_TAIL: +- trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL, 0, +- le16_to_cpu(tl.fc_len), 0); ++ trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL, ++ 0, tl.fc_len, 0); + memcpy(&tail, val, sizeof(tail)); + WARN_ON(le32_to_cpu(tail.fc_tid) != expected_tid); + break; + case EXT4_FC_TAG_HEAD: + break; + default: +- trace_ext4_fc_replay(sb, le16_to_cpu(tl.fc_tag), 0, +- le16_to_cpu(tl.fc_len), 0); ++ trace_ext4_fc_replay(sb, tl.fc_tag, 0, tl.fc_len, 0); + ret = -ECANCELED; + break; + } +@@ -2140,17 +2191,17 @@ void ext4_fc_init(struct super_block *sb, journal_t *journal) + journal->j_fc_cleanup_callback = ext4_fc_cleanup; + } + +-static const char *fc_ineligible_reasons[] = { +- "Extended attributes changed", +- "Cross rename", +- "Journal flag changed", +- "Insufficient memory", +- "Swap boot", +- "Resize", +- "Dir renamed", +- "Falloc range op", +- "Data journalling", +- "FC Commit Failed" ++static const char * const fc_ineligible_reasons[] = { ++ [EXT4_FC_REASON_XATTR] = "Extended attributes changed", ++ [EXT4_FC_REASON_CROSS_RENAME] = "Cross rename", ++ [EXT4_FC_REASON_JOURNAL_FLAG_CHANGE] = "Journal flag changed", ++ [EXT4_FC_REASON_NOMEM] = "Insufficient memory", ++ [EXT4_FC_REASON_SWAP_BOOT] = "Swap boot", ++ [EXT4_FC_REASON_RESIZE] = "Resize", ++ [EXT4_FC_REASON_RENAME_DIR] = "Dir renamed", ++ [EXT4_FC_REASON_FALLOC_RANGE] = "Falloc range op", ++ [EXT4_FC_REASON_INODE_JOURNAL_DATA] = "Data journalling", ++ [EXT4_FC_REASON_ENCRYPTED_FILENAME] = "Encrypted filename", + }; + + int ext4_fc_info_show(struct seq_file *seq, void *v) +@@ -2166,7 +2217,7 @@ int ext4_fc_info_show(struct seq_file *seq, void *v) "fc stats:\n%ld commits\n%ld ineligible\n%ld numblks\n%lluus avg_commit_time\n", stats->fc_num_commits, stats->fc_ineligible_commits, stats->fc_numblks, @@ -313764,7 +378143,7 @@ index 8ea5a81e65548..be3f8ce98962f 100644 seq_puts(seq, "Ineligible reasons:\n"); for (i = 0; i < EXT4_FC_REASON_MAX; i++) seq_printf(seq, "\"%s\":\t%d\n", fc_ineligible_reasons[i], -@@ -2185,3 +2179,8 @@ int __init ext4_fc_init_dentry_cache(void) +@@ -2185,3 +2236,8 @@ int __init ext4_fc_init_dentry_cache(void) return 0; } @@ -313774,14 +378153,26 @@ index 8ea5a81e65548..be3f8ce98962f 100644 + kmem_cache_destroy(ext4_fc_dentry_cachep); +} diff --git a/fs/ext4/fast_commit.h b/fs/ext4/fast_commit.h -index 937c381b4c85e..083ad1cb705a7 100644 +index 937c381b4c85e..2cbd317eda26b 100644 --- a/fs/ext4/fast_commit.h +++ b/fs/ext4/fast_commit.h -@@ -71,21 +71,19 @@ struct ext4_fc_tail { +@@ -58,7 +58,7 @@ struct ext4_fc_dentry_info { + __u8 fc_dname[0]; }; - /* -- * Fast commit reason codes +-/* Value structure for EXT4_FC_TAG_INODE and EXT4_FC_TAG_INODE_PARTIAL. */ ++/* Value structure for EXT4_FC_TAG_INODE. */ + struct ext4_fc_inode { + __le32 fc_ino; + __u8 fc_raw_inode[0]; +@@ -70,22 +70,23 @@ struct ext4_fc_tail { + __le32 fc_crc; + }; + ++/* Tag base length */ ++#define EXT4_FC_TAG_BASE_LEN (sizeof(struct ext4_fc_tl)) ++ ++/* + * Fast commit status codes + */ +enum { @@ -313791,7 +378182,8 @@ index 937c381b4c85e..083ad1cb705a7 100644 + EXT4_FC_STATUS_FAILED, +}; + -+/* + /* +- * Fast commit reason codes + * Fast commit ineligiblity reasons: */ enum { @@ -313810,7 +378202,16 @@ index 937c381b4c85e..083ad1cb705a7 100644 EXT4_FC_REASON_XATTR = 0, EXT4_FC_REASON_CROSS_RENAME, EXT4_FC_REASON_JOURNAL_FLAG_CHANGE, -@@ -117,7 +115,10 @@ struct ext4_fc_stats { +@@ -95,7 +96,7 @@ enum { + EXT4_FC_REASON_RENAME_DIR, + EXT4_FC_REASON_FALLOC_RANGE, + EXT4_FC_REASON_INODE_JOURNAL_DATA, +- EXT4_FC_COMMIT_FAILED, ++ EXT4_FC_REASON_ENCRYPTED_FILENAME, + EXT4_FC_REASON_MAX + }; + +@@ -117,7 +118,10 @@ struct ext4_fc_stats { unsigned int fc_ineligible_reason_count[EXT4_FC_REASON_MAX]; unsigned long fc_num_commits; unsigned long fc_ineligible_commits; @@ -313870,6 +378271,51 @@ index f73e5eb43eae1..208b87ce88588 100644 min_inodes = avefreei - inodes_per_group*flex_size / 4; if (min_inodes < 1) min_inodes = 1; +diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c +index 89efa78ed4b21..9813cc4b7b2a9 100644 +--- a/fs/ext4/indirect.c ++++ b/fs/ext4/indirect.c +@@ -148,6 +148,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, + struct super_block *sb = inode->i_sb; + Indirect *p = chain; + struct buffer_head *bh; ++ unsigned int key; + int ret = -EIO; + + *err = 0; +@@ -156,7 +157,13 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, + if (!p->key) + goto no_block; + while (--depth) { +- bh = sb_getblk(sb, le32_to_cpu(p->key)); ++ key = le32_to_cpu(p->key); ++ if (key > ext4_blocks_count(EXT4_SB(sb)->s_es)) { ++ /* the block was out of range */ ++ ret = -EFSCORRUPTED; ++ goto failure; ++ } ++ bh = sb_getblk(sb, key); + if (unlikely(!bh)) { + ret = -ENOMEM; + goto failure; +@@ -460,7 +467,7 @@ static int ext4_splice_branch(handle_t *handle, + * the new i_size. But that is not done here - it is done in + * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode. + */ +- jbd_debug(5, "splicing indirect only\n"); ++ ext4_debug("splicing indirect only\n"); + BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata"); + err = ext4_handle_dirty_metadata(handle, ar->inode, where->bh); + if (err) +@@ -472,7 +479,7 @@ static int ext4_splice_branch(handle_t *handle, + err = ext4_mark_inode_dirty(handle, ar->inode); + if (unlikely(err)) + goto err_out; +- jbd_debug(5, "splicing direct\n"); ++ ext4_debug("splicing direct\n"); + } + return err; + diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 39a1ab129fdc9..38ad09e802e48 100644 --- a/fs/ext4/inline.c @@ -313970,7 +378416,7 @@ index 39a1ab129fdc9..38ad09e802e48 100644 needed_blocks = ext4_writepage_trans_blocks(inode); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c -index 0f06305167d5a..bdadbe57ea804 100644 +index 0f06305167d5a..0a63863bc58c1 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -179,6 +179,8 @@ void ext4_evict_inode(struct inode *inode) @@ -313982,16 +378428,42 @@ index 0f06305167d5a..bdadbe57ea804 100644 if (inode->i_nlink) { /* * When journalling data dirty buffers are tracked only in the -@@ -337,7 +339,7 @@ stop_handle: +@@ -223,13 +225,13 @@ void ext4_evict_inode(struct inode *inode) + + /* + * For inodes with journalled data, transaction commit could have +- * dirtied the inode. Flush worker is ignoring it because of I_FREEING +- * flag but we still need to remove the inode from the writeback lists. ++ * dirtied the inode. And for inodes with dioread_nolock, unwritten ++ * extents converting worker could merge extents and also have dirtied ++ * the inode. Flush worker is ignoring it because of I_FREEING flag but ++ * we still need to remove the inode from the writeback lists. + */ +- if (!list_empty_careful(&inode->i_io_list)) { +- WARN_ON_ONCE(!ext4_should_journal_data(inode)); ++ if (!list_empty_careful(&inode->i_io_list)) + inode_io_list_del(inode); +- } + + /* + * Protect us against freezing - iput() caller didn't have to have any +@@ -336,8 +338,14 @@ stop_handle: + ext4_xattr_inode_array_free(ea_inode_array); return; no_delete: ++ /* ++ * Check out some where else accidentally dirty the evicting inode, ++ * which may probably cause inode use-after-free issues later. ++ */ ++ WARN_ON_ONCE(!list_empty_careful(&inode->i_io_list)); ++ if (!list_empty(&EXT4_I(inode)->i_fc_list)) - ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM); + ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL); ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ } -@@ -741,10 +743,11 @@ out_sem: +@@ -741,10 +749,11 @@ out_sem: if (ret) return ret; } @@ -314006,7 +378478,7 @@ index 0f06305167d5a..bdadbe57ea804 100644 if (retval < 0) ext_debug(inode, "failed with err %d\n", retval); return retval; -@@ -1174,6 +1177,13 @@ retry_grab: +@@ -1174,6 +1183,13 @@ retry_grab: page = grab_cache_page_write_begin(mapping, index, flags); if (!page) return -ENOMEM; @@ -314020,7 +378492,17 @@ index 0f06305167d5a..bdadbe57ea804 100644 unlock_page(page); retry_journal: -@@ -1559,7 +1569,14 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd, +@@ -1288,7 +1304,8 @@ static int ext4_write_end(struct file *file, + + trace_ext4_write_end(inode, pos, len, copied); + +- if (ext4_has_inline_data(inode)) ++ if (ext4_has_inline_data(inode) && ++ ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) + return ext4_write_inline_data_end(inode, pos, len, copied, page); + + copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); +@@ -1559,7 +1576,14 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd, ext4_lblk_t start, last; start = index << (PAGE_SHIFT - inode->i_blkbits); last = end << (PAGE_SHIFT - inode->i_blkbits); @@ -314035,7 +378517,7 @@ index 0f06305167d5a..bdadbe57ea804 100644 } pagevec_init(&pvec); -@@ -1711,16 +1728,13 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, +@@ -1711,16 +1735,13 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, } /* @@ -314058,7 +378540,7 @@ index 0f06305167d5a..bdadbe57ea804 100644 return 0; } -@@ -1847,30 +1861,16 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, +@@ -1847,30 +1868,16 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, return 0; } @@ -314090,7 +378572,7 @@ index 0f06305167d5a..bdadbe57ea804 100644 ClearPageChecked(page); -@@ -1880,14 +1880,6 @@ static int __ext4_journalled_writepage(struct page *page, +@@ -1880,14 +1887,6 @@ static int __ext4_journalled_writepage(struct page *page, inode_bh = ext4_journalled_write_inline_data(inode, len, page); if (inode_bh == NULL) goto out; @@ -314105,7 +378587,7 @@ index 0f06305167d5a..bdadbe57ea804 100644 } /* * We need to release the page lock before we start the -@@ -1908,7 +1900,8 @@ static int __ext4_journalled_writepage(struct page *page, +@@ -1908,7 +1907,8 @@ static int __ext4_journalled_writepage(struct page *page, lock_page(page); put_page(page); @@ -314115,7 +378597,7 @@ index 0f06305167d5a..bdadbe57ea804 100644 /* The page got truncated from under us */ ext4_journal_stop(handle); ret = 0; -@@ -1918,6 +1911,13 @@ static int __ext4_journalled_writepage(struct page *page, +@@ -1918,6 +1918,13 @@ static int __ext4_journalled_writepage(struct page *page, if (inline_data) { ret = ext4_mark_inode_dirty(handle, inode); } else { @@ -314129,7 +378611,7 @@ index 0f06305167d5a..bdadbe57ea804 100644 ret = ext4_walk_page_buffers(handle, inode, page_bufs, 0, len, NULL, do_journal_get_write_access); -@@ -1938,9 +1938,6 @@ static int __ext4_journalled_writepage(struct page *page, +@@ -1938,9 +1945,6 @@ static int __ext4_journalled_writepage(struct page *page, out: unlock_page(page); out_no_pagelock: @@ -314139,7 +378621,7 @@ index 0f06305167d5a..bdadbe57ea804 100644 brelse(inode_bh); return ret; } -@@ -2011,6 +2008,15 @@ static int ext4_writepage(struct page *page, +@@ -2011,6 +2015,15 @@ static int ext4_writepage(struct page *page, else len = PAGE_SIZE; @@ -314155,7 +378637,7 @@ index 0f06305167d5a..bdadbe57ea804 100644 page_bufs = page_buffers(page); /* * We cannot do block allocation or other extent handling in this -@@ -2614,6 +2620,22 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) +@@ -2614,6 +2627,22 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) wait_on_page_writeback(page); BUG_ON(PageWriteback(page)); @@ -314178,7 +378660,7 @@ index 0f06305167d5a..bdadbe57ea804 100644 if (mpd->map.m_len == 0) mpd->first_page = page->index; mpd->next_page = page->index + 1; -@@ -3124,13 +3146,15 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) +@@ -3124,13 +3153,15 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) { struct inode *inode = mapping->host; journal_t *journal; @@ -314195,7 +378677,7 @@ index 0f06305167d5a..bdadbe57ea804 100644 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) && test_opt(inode->i_sb, DELALLOC)) { -@@ -3169,10 +3193,14 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) +@@ -3169,10 +3200,14 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) jbd2_journal_unlock_updates(journal); if (err) @@ -314212,7 +378694,7 @@ index 0f06305167d5a..bdadbe57ea804 100644 } static int ext4_readpage(struct file *file, struct page *page) -@@ -3933,27 +3961,20 @@ int ext4_break_layouts(struct inode *inode) +@@ -3933,27 +3968,20 @@ int ext4_break_layouts(struct inode *inode) * Returns: 0 on success or negative on failure */ @@ -314244,7 +378726,7 @@ index 0f06305167d5a..bdadbe57ea804 100644 /* * Write out all dirty pages to avoid race conditions * Then release them. -@@ -3981,6 +4002,14 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) +@@ -3981,6 +4009,14 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) offset; } @@ -314259,7 +378741,7 @@ index 0f06305167d5a..bdadbe57ea804 100644 if (offset & (sb->s_blocksize - 1) || (offset + length) & (sb->s_blocksize - 1)) { /* -@@ -3996,6 +4025,10 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) +@@ -3996,6 +4032,10 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) /* Wait all existing dio workers, newcomers will block on i_mutex */ inode_dio_wait(inode); @@ -314270,7 +378752,36 @@ index 0f06305167d5a..bdadbe57ea804 100644 /* * Prevent page faults from reinstantiating pages we have released from * page cache. -@@ -4374,7 +4407,7 @@ has_buffer: +@@ -4165,7 +4205,8 @@ int ext4_truncate(struct inode *inode) + + /* If we zero-out tail of the page, we have to create jinode for jbd2 */ + if (inode->i_size & (inode->i_sb->s_blocksize - 1)) { +- if (ext4_inode_attach_jinode(inode) < 0) ++ err = ext4_inode_attach_jinode(inode); ++ if (err) + goto out_trace; + } + +@@ -4266,9 +4307,17 @@ static int __ext4_get_inode_loc(struct super_block *sb, unsigned long ino, + inodes_per_block = EXT4_SB(sb)->s_inodes_per_block; + inode_offset = ((ino - 1) % + EXT4_INODES_PER_GROUP(sb)); +- block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block); + iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb); + ++ block = ext4_inode_table(sb, gdp); ++ if ((block <= le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) || ++ (block >= ext4_blocks_count(EXT4_SB(sb)->s_es))) { ++ ext4_error(sb, "Invalid inode table block %llu in " ++ "block_group %u", block, iloc->block_group); ++ return -EFSCORRUPTED; ++ } ++ block += (inode_offset / inodes_per_block); ++ + bh = sb_getblk(sb, block); + if (unlikely(!bh)) + return -ENOMEM; +@@ -4374,7 +4423,7 @@ has_buffer: static int __ext4_get_inode_loc_noinmem(struct inode *inode, struct ext4_iloc *iloc) { @@ -314279,7 +378790,7 @@ index 0f06305167d5a..bdadbe57ea804 100644 int ret; ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, iloc, 0, -@@ -4389,7 +4422,7 @@ static int __ext4_get_inode_loc_noinmem(struct inode *inode, +@@ -4389,7 +4438,7 @@ static int __ext4_get_inode_loc_noinmem(struct inode *inode, int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) { @@ -314288,7 +378799,7 @@ index 0f06305167d5a..bdadbe57ea804 100644 int ret; /* We have all inode data except xattrs in memory here. */ -@@ -4498,8 +4531,7 @@ static inline int ext4_iget_extra_inode(struct inode *inode, +@@ -4498,8 +4547,7 @@ static inline int ext4_iget_extra_inode(struct inode *inode, __le32 *magic = (void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize; @@ -314298,7 +378809,32 @@ index 0f06305167d5a..bdadbe57ea804 100644 *magic == cpu_to_le32(EXT4_XATTR_MAGIC)) { ext4_set_inode_state(inode, EXT4_STATE_XATTR); return ext4_find_inline_data_nolock(inode); -@@ -5353,6 +5385,7 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +@@ -4844,8 +4892,14 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, + if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb)) + ext4_error_inode(inode, function, line, 0, + "casefold flag without casefold feature"); +- brelse(iloc.bh); ++ if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD)) { ++ ext4_error_inode(inode, function, line, 0, ++ "bad inode without EXT4_IGET_BAD flag"); ++ ret = -EUCLEAN; ++ goto bad_inode; ++ } + ++ brelse(iloc.bh); + unlock_new_inode(inode); + return inode; + +@@ -5166,7 +5220,7 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) + + if (EXT4_SB(inode->i_sb)->s_journal) { + if (ext4_journal_current_handle()) { +- jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n"); ++ ext4_debug("called recursively, non-PF_MEMALLOC!\n"); + dump_stack(); + return -EIO; + } +@@ -5353,6 +5407,7 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, if (attr->ia_valid & ATTR_SIZE) { handle_t *handle; loff_t oldsize = inode->i_size; @@ -314306,7 +378842,7 @@ index 0f06305167d5a..bdadbe57ea804 100644 int shrink = (attr->ia_size < inode->i_size); if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { -@@ -5416,8 +5449,7 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +@@ -5416,8 +5471,7 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, ext4_fc_track_range(handle, inode, (attr->ia_size > 0 ? attr->ia_size - 1 : 0) >> inode->i_sb->s_blocksize_bits, @@ -314316,7 +378852,7 @@ index 0f06305167d5a..bdadbe57ea804 100644 else ext4_fc_track_range( handle, inode, -@@ -5427,6 +5459,7 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +@@ -5427,6 +5481,7 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, inode->i_sb->s_blocksize_bits); down_write(&EXT4_I(inode)->i_data_sem); @@ -314324,7 +378860,7 @@ index 0f06305167d5a..bdadbe57ea804 100644 EXT4_I(inode)->i_disksize = attr->ia_size; rc = ext4_mark_inode_dirty(handle, inode); if (!error) -@@ -5438,6 +5471,8 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +@@ -5438,6 +5493,8 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, */ if (!error) i_size_write(inode, attr->ia_size); @@ -314333,7 +378869,7 @@ index 0f06305167d5a..bdadbe57ea804 100644 up_write(&EXT4_I(inode)->i_data_sem); ext4_journal_stop(handle); if (error) -@@ -5673,7 +5708,12 @@ int ext4_mark_iloc_dirty(handle_t *handle, +@@ -5673,7 +5730,12 @@ int ext4_mark_iloc_dirty(handle_t *handle, } ext4_fc_track_inode(handle, inode); @@ -314347,7 +378883,22 @@ index 0f06305167d5a..bdadbe57ea804 100644 inode_inc_iversion(inode); /* the do_update_inode consumes one bh->b_count */ -@@ -5989,7 +6029,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) +@@ -5751,6 +5813,14 @@ static int __ext4_expand_extra_isize(struct inode *inode, + return 0; + } + ++ /* ++ * We may need to allocate external xattr block so we need quotas ++ * initialized. Here we can be called with various locks held so we ++ * cannot affort to initialize quotas ourselves. So just bail. ++ */ ++ if (dquot_initialize_needed(inode)) ++ return -EAGAIN; ++ + /* try to expand with EAs present */ + error = ext4_expand_extra_isize_ea(inode, new_extra_isize, + raw_inode, handle); +@@ -5989,7 +6059,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) return PTR_ERR(handle); ext4_fc_mark_ineligible(inode->i_sb, @@ -314357,10 +378908,20 @@ index 0f06305167d5a..bdadbe57ea804 100644 ext4_handle_sync(handle); ext4_journal_stop(handle); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c -index 606dee9e08a32..f61b59045c6d3 100644 +index 606dee9e08a32..2e6d03e5790e5 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c -@@ -169,7 +169,7 @@ static long swap_inode_boot_loader(struct super_block *sb, +@@ -124,7 +124,8 @@ static long swap_inode_boot_loader(struct super_block *sb, + blkcnt_t blocks; + unsigned short bytes; + +- inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO, EXT4_IGET_SPECIAL); ++ inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO, ++ EXT4_IGET_SPECIAL | EXT4_IGET_BAD); + if (IS_ERR(inode_bl)) + return PTR_ERR(inode_bl); + ei_bl = EXT4_I(inode_bl); +@@ -169,12 +170,12 @@ static long swap_inode_boot_loader(struct super_block *sb, err = -EINVAL; goto err_out; } @@ -314369,7 +378930,13 @@ index 606dee9e08a32..f61b59045c6d3 100644 /* Protect extent tree against block allocations via delalloc */ ext4_double_down_write_data_sem(inode, inode_bl); -@@ -252,7 +252,6 @@ revert: + +- if (inode_bl->i_nlink == 0) { ++ if (is_bad_inode(inode_bl) || !S_ISREG(inode_bl->i_mode)) { + /* this inode has never been used as a BOOT_LOADER */ + set_nlink(inode_bl, 1); + i_uid_write(inode_bl, 0); +@@ -252,7 +253,6 @@ revert: err_out1: ext4_journal_stop(handle); @@ -314377,7 +378944,29 @@ index 606dee9e08a32..f61b59045c6d3 100644 ext4_double_up_write_data_sem(inode, inode_bl); err_out: -@@ -1076,7 +1075,7 @@ mext_out: +@@ -492,6 +492,10 @@ static int ext4_ioctl_setproject(struct inode *inode, __u32 projid) + if (ext4_is_quota_file(inode)) + return err; + ++ err = dquot_initialize(inode); ++ if (err) ++ return err; ++ + err = ext4_get_inode_loc(inode, &iloc); + if (err) + return err; +@@ -507,10 +511,6 @@ static int ext4_ioctl_setproject(struct inode *inode, __u32 projid) + brelse(iloc.bh); + } + +- err = dquot_initialize(inode); +- if (err) +- return err; +- + handle = ext4_journal_start(inode, EXT4_HT_QUOTA, + EXT4_QUOTA_INIT_BLOCKS(sb) + + EXT4_QUOTA_DEL_BLOCKS(sb) + 3); +@@ -1076,7 +1076,7 @@ mext_out: err = ext4_resize_fs(sb, n_blocks_count); if (EXT4_SB(sb)->s_journal) { @@ -314386,7 +378975,7 @@ index 606dee9e08a32..f61b59045c6d3 100644 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0); jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); -@@ -1117,8 +1116,6 @@ resizefs_out: +@@ -1117,8 +1117,6 @@ resizefs_out: sizeof(range))) return -EFAULT; @@ -314935,7 +379524,7 @@ index 72bfac2d6dce9..0c7498a599430 100644 ret = cnt; break; diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c -index 7e0b4f81c6c06..af5a75a89e6e1 100644 +index 7e0b4f81c6c06..b0ea646454ac8 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -417,7 +417,7 @@ int ext4_ext_migrate(struct inode *inode) @@ -314947,7 +379536,17 @@ index 7e0b4f81c6c06..af5a75a89e6e1 100644 uid_t owner[2]; /* -@@ -437,12 +437,12 @@ int ext4_ext_migrate(struct inode *inode) +@@ -425,7 +425,8 @@ int ext4_ext_migrate(struct inode *inode) + * already is extent-based, error out. + */ + if (!ext4_has_feature_extents(inode->i_sb) || +- (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) ++ ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) || ++ ext4_has_inline_data(inode)) + return -EINVAL; + + if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0) +@@ -437,12 +438,12 @@ int ext4_ext_migrate(struct inode *inode) percpu_down_write(&sbi->s_writepages_rwsem); /* @@ -314964,7 +379563,7 @@ index 7e0b4f81c6c06..af5a75a89e6e1 100644 if (IS_ERR(handle)) { retval = PTR_ERR(handle); -@@ -459,6 +459,14 @@ int ext4_ext_migrate(struct inode *inode) +@@ -459,6 +460,14 @@ int ext4_ext_migrate(struct inode *inode) ext4_journal_stop(handle); goto out_unlock; } @@ -314979,7 +379578,7 @@ index 7e0b4f81c6c06..af5a75a89e6e1 100644 i_size_write(tmp_inode, i_size_read(inode)); /* * Set the i_nlink to zero so it will be deleted later -@@ -467,7 +475,6 @@ int ext4_ext_migrate(struct inode *inode) +@@ -467,7 +476,6 @@ int ext4_ext_migrate(struct inode *inode) clear_nlink(tmp_inode); ext4_ext_tree_init(handle, tmp_inode); @@ -314987,7 +379586,7 @@ index 7e0b4f81c6c06..af5a75a89e6e1 100644 ext4_journal_stop(handle); /* -@@ -492,17 +499,10 @@ int ext4_ext_migrate(struct inode *inode) +@@ -492,17 +500,10 @@ int ext4_ext_migrate(struct inode *inode) handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1); if (IS_ERR(handle)) { @@ -315005,7 +379604,7 @@ index 7e0b4f81c6c06..af5a75a89e6e1 100644 i_data = ei->i_data; memset(&lb, 0, sizeof(lb)); -@@ -576,6 +576,7 @@ err_out: +@@ -576,6 +577,7 @@ err_out: * the inode is not visible to user space. */ tmp_inode->i_blocks = 0; @@ -315014,7 +379613,7 @@ index 7e0b4f81c6c06..af5a75a89e6e1 100644 /* Reset the extent details */ ext4_ext_tree_init(handle, tmp_inode); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c -index da7698341d7d3..971a08d947f42 100644 +index da7698341d7d3..1e6cc6c21d606 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -54,6 +54,7 @@ static struct buffer_head *ext4_append(handle_t *handle, @@ -315229,7 +379828,25 @@ index da7698341d7d3..971a08d947f42 100644 map -= count; dx_sort_map(map, count); /* Ensure that neither split block is over half full */ -@@ -2997,14 +3052,14 @@ bool ext4_empty_dir(struct inode *inode) +@@ -2204,8 +2259,16 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, + memset(de, 0, len); /* wipe old data */ + de = (struct ext4_dir_entry_2 *) data2; + top = data2 + len; +- while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) ++ while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) { ++ if (ext4_check_dir_entry(dir, NULL, de, bh2, data2, len, ++ (data2 + (blocksize - csum_size) - ++ (char *) de))) { ++ brelse(bh2); ++ brelse(bh); ++ return -EFSCORRUPTED; ++ } + de = de2; ++ } + de->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) - + (char *) de, blocksize); + +@@ -2997,14 +3060,14 @@ bool ext4_empty_dir(struct inode *inode) if (inode->i_size < ext4_dir_rec_len(1, NULL) + ext4_dir_rec_len(2, NULL)) { EXT4_ERROR_INODE(inode, "invalid size"); @@ -315246,7 +379863,7 @@ index da7698341d7d3..971a08d947f42 100644 de = (struct ext4_dir_entry_2 *) bh->b_data; if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size, -@@ -3012,7 +3067,7 @@ bool ext4_empty_dir(struct inode *inode) +@@ -3012,7 +3075,7 @@ bool ext4_empty_dir(struct inode *inode) le32_to_cpu(de->inode) != inode->i_ino || strcmp(".", de->name)) { ext4_warning_inode(inode, "directory missing '.'"); brelse(bh); @@ -315255,7 +379872,7 @@ index da7698341d7d3..971a08d947f42 100644 } offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize); de = ext4_next_entry(de, sb->s_blocksize); -@@ -3021,7 +3076,7 @@ bool ext4_empty_dir(struct inode *inode) +@@ -3021,7 +3084,7 @@ bool ext4_empty_dir(struct inode *inode) le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) { ext4_warning_inode(inode, "directory missing '..'"); brelse(bh); @@ -315264,7 +379881,7 @@ index da7698341d7d3..971a08d947f42 100644 } offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize); while (offset < inode->i_size) { -@@ -3035,16 +3090,13 @@ bool ext4_empty_dir(struct inode *inode) +@@ -3035,16 +3098,13 @@ bool ext4_empty_dir(struct inode *inode) continue; } if (IS_ERR(bh)) @@ -315284,7 +379901,109 @@ index da7698341d7d3..971a08d947f42 100644 brelse(bh); return false; } -@@ -3455,6 +3507,9 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle, +@@ -3144,14 +3204,20 @@ end_rmdir: + return retval; + } + +-int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name, +- struct inode *inode) ++int __ext4_unlink(struct inode *dir, const struct qstr *d_name, ++ struct inode *inode, ++ struct dentry *dentry /* NULL during fast_commit recovery */) + { + int retval = -ENOENT; + struct buffer_head *bh; + struct ext4_dir_entry_2 *de; ++ handle_t *handle; + int skip_remove_dentry = 0; + ++ /* ++ * Keep this outside the transaction; it may have to set up the ++ * directory's encryption key, which isn't GFP_NOFS-safe. ++ */ + bh = ext4_find_entry(dir, d_name, &de, NULL); + if (IS_ERR(bh)) + return PTR_ERR(bh); +@@ -3168,7 +3234,14 @@ int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name + if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) + skip_remove_dentry = 1; + else +- goto out; ++ goto out_bh; ++ } ++ ++ handle = ext4_journal_start(dir, EXT4_HT_DIR, ++ EXT4_DATA_TRANS_BLOCKS(dir->i_sb)); ++ if (IS_ERR(handle)) { ++ retval = PTR_ERR(handle); ++ goto out_bh; + } + + if (IS_DIRSYNC(dir)) +@@ -3177,12 +3250,12 @@ int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name + if (!skip_remove_dentry) { + retval = ext4_delete_entry(handle, dir, de, bh); + if (retval) +- goto out; ++ goto out_handle; + dir->i_ctime = dir->i_mtime = current_time(dir); + ext4_update_dx_flag(dir); + retval = ext4_mark_inode_dirty(handle, dir); + if (retval) +- goto out; ++ goto out_handle; + } else { + retval = 0; + } +@@ -3195,15 +3268,17 @@ int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name + ext4_orphan_add(handle, inode); + inode->i_ctime = current_time(inode); + retval = ext4_mark_inode_dirty(handle, inode); +- +-out: ++ if (dentry && !retval) ++ ext4_fc_track_unlink(handle, dentry); ++out_handle: ++ ext4_journal_stop(handle); ++out_bh: + brelse(bh); + return retval; + } + + static int ext4_unlink(struct inode *dir, struct dentry *dentry) + { +- handle_t *handle; + int retval; + + if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb)))) +@@ -3221,16 +3296,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) + if (retval) + goto out_trace; + +- handle = ext4_journal_start(dir, EXT4_HT_DIR, +- EXT4_DATA_TRANS_BLOCKS(dir->i_sb)); +- if (IS_ERR(handle)) { +- retval = PTR_ERR(handle); +- goto out_trace; +- } +- +- retval = __ext4_unlink(handle, dir, &dentry->d_name, d_inode(dentry)); +- if (!retval) +- ext4_fc_track_unlink(handle, dentry); ++ retval = __ext4_unlink(dir, &dentry->d_name, d_inode(dentry), dentry); + #ifdef CONFIG_UNICODE + /* VFS negative dentries are incompatible with Encoding and + * Case-insensitiveness. Eventually we'll want avoid +@@ -3241,8 +3307,6 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) + if (IS_CASEFOLDED(dir)) + d_invalidate(dentry); + #endif +- if (handle) +- ext4_journal_stop(handle); + + out_trace: + trace_ext4_unlink_exit(dentry, retval); +@@ -3455,6 +3519,9 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle, struct buffer_head *bh; if (!ext4_has_inline_data(inode)) { @@ -315294,7 +380013,7 @@ index da7698341d7d3..971a08d947f42 100644 /* The first directory block must not be a hole, so * treat it as DIRENT_HTREE */ -@@ -3463,9 +3518,30 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle, +@@ -3463,9 +3530,30 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle, *retval = PTR_ERR(bh); return NULL; } @@ -315328,7 +380047,17 @@ index da7698341d7d3..971a08d947f42 100644 return bh; } -@@ -3889,7 +3965,7 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir, +@@ -3722,6 +3810,9 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir, + return -EXDEV; + + retval = dquot_initialize(old.dir); ++ if (retval) ++ return retval; ++ retval = dquot_initialize(old.inode); + if (retval) + return retval; + retval = dquot_initialize(new.dir); +@@ -3889,7 +3980,7 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir, * dirents in directories. */ ext4_fc_mark_ineligible(old.inode->i_sb, @@ -315337,7 +380066,7 @@ index da7698341d7d3..971a08d947f42 100644 } else { if (new.inode) ext4_fc_track_unlink(handle, new.dentry); -@@ -4049,7 +4125,7 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry, +@@ -4049,7 +4140,7 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry, if (unlikely(retval)) goto end_rename; ext4_fc_mark_ineligible(new.inode->i_sb, @@ -315346,6 +380075,101 @@ index da7698341d7d3..971a08d947f42 100644 if (old.dir_bh) { retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino); if (retval) +diff --git a/fs/ext4/orphan.c b/fs/ext4/orphan.c +index 53adc8f570a3f..c26c404ac58bf 100644 +--- a/fs/ext4/orphan.c ++++ b/fs/ext4/orphan.c +@@ -181,8 +181,8 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) + } else + brelse(iloc.bh); + +- jbd_debug(4, "superblock will point to %lu\n", inode->i_ino); +- jbd_debug(4, "orphan inode %lu will point to %d\n", ++ ext4_debug("superblock will point to %lu\n", inode->i_ino); ++ ext4_debug("orphan inode %lu will point to %d\n", + inode->i_ino, NEXT_ORPHAN(inode)); + out: + ext4_std_error(sb, err); +@@ -251,7 +251,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) + } + + mutex_lock(&sbi->s_orphan_lock); +- jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino); ++ ext4_debug("remove inode %lu from orphan list\n", inode->i_ino); + + prev = ei->i_orphan.prev; + list_del_init(&ei->i_orphan); +@@ -267,7 +267,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) + + ino_next = NEXT_ORPHAN(inode); + if (prev == &sbi->s_orphan) { +- jbd_debug(4, "superblock will point to %u\n", ino_next); ++ ext4_debug("superblock will point to %u\n", ino_next); + BUFFER_TRACE(sbi->s_sbh, "get_write_access"); + err = ext4_journal_get_write_access(handle, inode->i_sb, + sbi->s_sbh, EXT4_JTR_NONE); +@@ -286,7 +286,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) + struct inode *i_prev = + &list_entry(prev, struct ext4_inode_info, i_orphan)->vfs_inode; + +- jbd_debug(4, "orphan inode %lu will point to %u\n", ++ ext4_debug("orphan inode %lu will point to %u\n", + i_prev->i_ino, ino_next); + err = ext4_reserve_inode_write(handle, i_prev, &iloc2); + if (err) { +@@ -332,8 +332,8 @@ static void ext4_process_orphan(struct inode *inode, + ext4_msg(sb, KERN_DEBUG, + "%s: truncating inode %lu to %lld bytes", + __func__, inode->i_ino, inode->i_size); +- jbd_debug(2, "truncating inode %lu to %lld bytes\n", +- inode->i_ino, inode->i_size); ++ ext4_debug("truncating inode %lu to %lld bytes\n", ++ inode->i_ino, inode->i_size); + inode_lock(inode); + truncate_inode_pages(inode->i_mapping, inode->i_size); + ret = ext4_truncate(inode); +@@ -353,8 +353,8 @@ static void ext4_process_orphan(struct inode *inode, + ext4_msg(sb, KERN_DEBUG, + "%s: deleting unreferenced inode %lu", + __func__, inode->i_ino); +- jbd_debug(2, "deleting unreferenced inode %lu\n", +- inode->i_ino); ++ ext4_debug("deleting unreferenced inode %lu\n", ++ inode->i_ino); + (*nr_orphans)++; + } + iput(inode); /* The delete magic happens here! */ +@@ -391,7 +391,7 @@ void ext4_orphan_cleanup(struct super_block *sb, struct ext4_super_block *es) + int inodes_per_ob = ext4_inodes_per_orphan_block(sb); + + if (!es->s_last_orphan && !oi->of_blocks) { +- jbd_debug(4, "no orphan inodes to clean up\n"); ++ ext4_debug("no orphan inodes to clean up\n"); + return; + } + +@@ -412,10 +412,10 @@ void ext4_orphan_cleanup(struct super_block *sb, struct ext4_super_block *es) + /* don't clear list on RO mount w/ errors */ + if (es->s_last_orphan && !(s_flags & SB_RDONLY)) { + ext4_msg(sb, KERN_INFO, "Errors on filesystem, " +- "clearing orphan list.\n"); ++ "clearing orphan list."); + es->s_last_orphan = 0; + } +- jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); ++ ext4_debug("Skipping orphan recovery on fs with errors.\n"); + return; + } + +@@ -459,7 +459,7 @@ void ext4_orphan_cleanup(struct super_block *sb, struct ext4_super_block *es) + * so, skip the rest. + */ + if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { +- jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); ++ ext4_debug("Skipping orphan recovery on fs with errors.\n"); + es->s_last_orphan = 0; + break; + } diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index f038d578d8d8f..18977ff8e4939 100644 --- a/fs/ext4/page-io.c @@ -315363,7 +380187,7 @@ index f038d578d8d8f..18977ff8e4939 100644 spin_unlock_irqrestore(&head->b_uptodate_lock, flags); if (!under_io) { diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c -index b63cb88ccdaed..62bbfe8960f3b 100644 +index b63cb88ccdaed..589ed99856f33 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -52,6 +52,16 @@ int ext4_resize_begin(struct super_block *sb) @@ -315383,14 +380207,46 @@ index b63cb88ccdaed..62bbfe8960f3b 100644 /* * If we are not using the primary superblock/GDT copy don't resize, * because the user tools have no way of handling this. Probably a -@@ -1462,6 +1472,7 @@ static void ext4_update_super(struct super_block *sb, +@@ -1435,8 +1445,6 @@ static void ext4_update_super(struct super_block *sb, + * active. */ + ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) + + reserved_blocks); +- ext4_superblock_csum_set(sb); +- unlock_buffer(sbi->s_sbh); + + /* Update the free space counts */ + percpu_counter_add(&sbi->s_freeclusters_counter, +@@ -1462,7 +1470,10 @@ static void ext4_update_super(struct super_block *sb, * Update the fs overhead information */ ext4_calculate_overhead(sb); + es->s_overhead_clusters = cpu_to_le32(sbi->s_overhead); ++ ext4_superblock_csum_set(sb); ++ unlock_buffer(sbi->s_sbh); if (test_opt(sb, DEBUG)) printk(KERN_DEBUG "EXT4-fs: added group %u:" + "%llu blocks(%llu free %llu reserved)\n", flex_gd->count, +@@ -1546,8 +1557,8 @@ exit_journal: + int meta_bg = ext4_has_feature_meta_bg(sb); + sector_t old_gdb = 0; + +- update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, +- sizeof(struct ext4_super_block), 0); ++ update_backups(sb, ext4_group_first_block_no(sb, 0), ++ (char *)es, sizeof(struct ext4_super_block), 0); + for (; gdb_num <= gdb_num_end; gdb_num++) { + struct buffer_head *gdb_bh; + +@@ -1758,7 +1769,7 @@ errout: + if (test_opt(sb, DEBUG)) + printk(KERN_DEBUG "EXT4-fs: extended group to %llu " + "blocks\n", ext4_blocks_count(es)); +- update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, ++ update_backups(sb, ext4_group_first_block_no(sb, 0), + (char *)es, sizeof(struct ext4_super_block), 0); + } + return err; @@ -1966,6 +1977,16 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) } brelse(bh); @@ -315418,7 +380274,7 @@ index b63cb88ccdaed..62bbfe8960f3b 100644 err = ext4_alloc_flex_bg_array(sb, n_group + 1); diff --git a/fs/ext4/super.c b/fs/ext4/super.c -index 88d5d274a8684..985d79fb61287 100644 +index 88d5d274a8684..802ca160d31ed 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -186,19 +186,12 @@ int ext4_read_bh(struct buffer_head *bh, int op_flags, bh_end_io_t *end_io) @@ -315487,7 +380343,15 @@ index 88d5d274a8684..985d79fb61287 100644 if (sbi->s_journal) { aborted = is_journal_aborted(sbi->s_journal); err = jbd2_journal_destroy(sbi->s_journal); -@@ -1929,6 +1928,7 @@ static const struct mount_opts { +@@ -1289,6 +1288,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) + return NULL; + + inode_set_iversion(&ei->vfs_inode, 1); ++ ei->i_flags = 0; + spin_lock_init(&ei->i_raw_lock); + INIT_LIST_HEAD(&ei->i_prealloc_list); + atomic_set(&ei->i_prealloc_active, 0); +@@ -1929,6 +1929,7 @@ static const struct mount_opts { MOPT_EXT4_ONLY | MOPT_CLEAR}, {Opt_warn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_SET}, {Opt_nowarn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_CLEAR}, @@ -315495,7 +380359,7 @@ index 88d5d274a8684..985d79fb61287 100644 {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, MOPT_EXT4_ONLY | MOPT_CLEAR}, {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, -@@ -2053,6 +2053,12 @@ static int ext4_set_test_dummy_encryption(struct super_block *sb, +@@ -2053,6 +2054,12 @@ static int ext4_set_test_dummy_encryption(struct super_block *sb, struct ext4_sb_info *sbi = EXT4_SB(sb); int err; @@ -315508,7 +380372,7 @@ index 88d5d274a8684..985d79fb61287 100644 /* * This mount option is just for testing, and it's not worthwhile to * implement the extra complexity (e.g. RCU protection) that would be -@@ -2080,11 +2086,13 @@ static int ext4_set_test_dummy_encryption(struct super_block *sb, +@@ -2080,11 +2087,13 @@ static int ext4_set_test_dummy_encryption(struct super_block *sb, return -1; } ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled"); @@ -315524,7 +380388,7 @@ index 88d5d274a8684..985d79fb61287 100644 } struct ext4_parsed_options { -@@ -3263,9 +3271,9 @@ static int ext4_run_li_request(struct ext4_li_request *elr) +@@ -3263,9 +3272,9 @@ static int ext4_run_li_request(struct ext4_li_request *elr) struct super_block *sb = elr->lr_super; ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; ext4_group_t group = elr->lr_next_group; @@ -315535,7 +380399,7 @@ index 88d5d274a8684..985d79fb61287 100644 if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP) { elr->lr_next_group = ext4_mb_prefetch(sb, group, -@@ -3302,14 +3310,13 @@ static int ext4_run_li_request(struct ext4_li_request *elr) +@@ -3302,14 +3311,13 @@ static int ext4_run_li_request(struct ext4_li_request *elr) ret = 1; if (!ret) { @@ -315553,7 +380417,7 @@ index 88d5d274a8684..985d79fb61287 100644 } elr->lr_next_sched = jiffies + elr->lr_timeout; elr->lr_next_group = group + 1; -@@ -3364,6 +3371,7 @@ static int ext4_lazyinit_thread(void *arg) +@@ -3364,6 +3372,7 @@ static int ext4_lazyinit_thread(void *arg) unsigned long next_wakeup, cur; BUG_ON(NULL == eli); @@ -315561,7 +380425,7 @@ index 88d5d274a8684..985d79fb61287 100644 cont_thread: while (true) { -@@ -3579,9 +3587,9 @@ int ext4_register_li_request(struct super_block *sb, +@@ -3579,9 +3588,9 @@ int ext4_register_li_request(struct super_block *sb, goto out; } @@ -315574,7 +380438,7 @@ index 88d5d274a8684..985d79fb61287 100644 goto out; elr = ext4_li_request_new(sb, first_not_zeroed); -@@ -3698,9 +3706,11 @@ static int count_overhead(struct super_block *sb, ext4_group_t grp, +@@ -3698,9 +3707,11 @@ static int count_overhead(struct super_block *sb, ext4_group_t grp, ext4_fsblk_t first_block, last_block, b; ext4_group_t i, ngroups = ext4_get_groups_count(sb); int s, j, count = 0; @@ -315587,7 +380451,7 @@ index 88d5d274a8684..985d79fb61287 100644 sbi->s_itb_per_group + 2); first_block = le32_to_cpu(sbi->s_es->s_first_data_block) + -@@ -4384,7 +4394,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) +@@ -4384,7 +4395,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_inodes_per_block; sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb); sbi->s_sbh = bh; @@ -315596,7 +380460,7 @@ index 88d5d274a8684..985d79fb61287 100644 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); -@@ -4614,14 +4624,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) +@@ -4614,14 +4625,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) /* Initialize fast commit stuff */ atomic_set(&sbi->s_fc_subtid, 0); @@ -315612,7 +380476,48 @@ index 88d5d274a8684..985d79fb61287 100644 spin_lock_init(&sbi->s_fc_lock); memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats)); sbi->s_fc_replay_state.fc_regions = NULL; -@@ -4778,19 +4787,22 @@ no_journal: +@@ -4654,30 +4664,31 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) + ext4_has_feature_journal_needs_recovery(sb)) { + ext4_msg(sb, KERN_ERR, "required journal recovery " + "suppressed and not mounted read-only"); +- goto failed_mount_wq; ++ goto failed_mount3a; + } else { + /* Nojournal mode, all journal mount options are illegal */ +- if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) { +- ext4_msg(sb, KERN_ERR, "can't mount with " +- "journal_checksum, fs mounted w/o journal"); +- goto failed_mount_wq; +- } + if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "journal_async_commit, fs mounted w/o journal"); +- goto failed_mount_wq; ++ goto failed_mount3a; ++ } ++ ++ if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) { ++ ext4_msg(sb, KERN_ERR, "can't mount with " ++ "journal_checksum, fs mounted w/o journal"); ++ goto failed_mount3a; + } + if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "commit=%lu, fs mounted w/o journal", + sbi->s_commit_interval / HZ); +- goto failed_mount_wq; ++ goto failed_mount3a; + } + if (EXT4_MOUNT_DATA_FLAGS & + (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "data=, fs mounted w/o journal"); +- goto failed_mount_wq; ++ goto failed_mount3a; + } + sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM; + clear_opt(sb, JOURNAL_CHECKSUM); +@@ -4778,19 +4789,22 @@ no_journal: goto failed_mount_wq; } @@ -315644,7 +380549,7 @@ index 88d5d274a8684..985d79fb61287 100644 err = ext4_calculate_overhead(sb); if (err) goto failed_mount_wq; -@@ -4892,14 +4904,6 @@ no_journal: +@@ -4892,14 +4906,6 @@ no_journal: err = percpu_counter_init(&sbi->s_freeinodes_counter, freei, GFP_KERNEL); } @@ -315659,7 +380564,7 @@ index 88d5d274a8684..985d79fb61287 100644 if (!err) err = percpu_counter_init(&sbi->s_dirs_counter, ext4_count_dirs(sb), GFP_KERNEL); -@@ -4957,6 +4961,14 @@ no_journal: +@@ -4957,6 +4963,14 @@ no_journal: EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; ext4_orphan_cleanup(sb, es); EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; @@ -315674,7 +380579,19 @@ index 88d5d274a8684..985d79fb61287 100644 if (needs_recovery) { ext4_msg(sb, KERN_INFO, "recovery complete"); err = ext4_mark_recovery_complete(sb, es); -@@ -5922,7 +5934,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) +@@ -5141,9 +5155,9 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb, + return NULL; + } + +- jbd_debug(2, "Journal inode found at %p: %lld bytes\n", ++ ext4_debug("Journal inode found at %p: %lld bytes\n", + journal_inode, journal_inode->i_size); +- if (!S_ISREG(journal_inode->i_mode)) { ++ if (!S_ISREG(journal_inode->i_mode) || IS_ENCRYPTED(journal_inode)) { + ext4_msg(sb, KERN_ERR, "invalid journal inode"); + iput(journal_inode); + return NULL; +@@ -5922,7 +5936,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) if (err) goto restore_opts; } @@ -315684,7 +380601,7 @@ index 88d5d274a8684..985d79fb61287 100644 err = ext4_setup_super(sb, es, 0); if (err) -@@ -6189,7 +6202,7 @@ static int ext4_write_info(struct super_block *sb, int type) +@@ -6189,7 +6204,7 @@ static int ext4_write_info(struct super_block *sb, int type) handle_t *handle; /* Data block + inode block */ @@ -315693,7 +380610,7 @@ index 88d5d274a8684..985d79fb61287 100644 if (IS_ERR(handle)) return PTR_ERR(handle); ret = dquot_commit_info(sb, type); -@@ -6267,10 +6280,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, +@@ -6267,10 +6282,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA); err = dquot_quota_on(sb, type, format_id, path); @@ -315705,7 +380622,7 @@ index 88d5d274a8684..985d79fb61287 100644 struct inode *inode = d_inode(path->dentry); handle_t *handle; -@@ -6290,7 +6300,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, +@@ -6290,10 +6302,29 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, ext4_journal_stop(handle); unlock_inode: inode_unlock(inode); @@ -315718,11 +380635,51 @@ index 88d5d274a8684..985d79fb61287 100644 return err; } -@@ -6353,8 +6368,19 @@ int ext4_enable_quotas(struct super_block *sb) ++static inline bool ext4_check_quota_inum(int type, unsigned long qf_inum) ++{ ++ switch (type) { ++ case USRQUOTA: ++ return qf_inum == EXT4_USR_QUOTA_INO; ++ case GRPQUOTA: ++ return qf_inum == EXT4_GRP_QUOTA_INO; ++ case PRJQUOTA: ++ return qf_inum >= EXT4_GOOD_OLD_FIRST_INO; ++ default: ++ BUG(); ++ } ++} ++ + static int ext4_quota_enable(struct super_block *sb, int type, int format_id, + unsigned int flags) + { +@@ -6310,9 +6341,16 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id, + if (!qf_inums[type]) + return -EPERM; + ++ if (!ext4_check_quota_inum(type, qf_inums[type])) { ++ ext4_error(sb, "Bad quota inum: %lu, type: %d", ++ qf_inums[type], type); ++ return -EUCLEAN; ++ } ++ + qf_inode = ext4_iget(sb, qf_inums[type], EXT4_IGET_SPECIAL); + if (IS_ERR(qf_inode)) { +- ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]); ++ ext4_error(sb, "Bad quota inode: %lu, type: %d", ++ qf_inums[type], type); + return PTR_ERR(qf_inode); + } + +@@ -6351,10 +6389,22 @@ int ext4_enable_quotas(struct super_block *sb) + if (err) { + ext4_warning(sb, "Failed to enable quota tracking " - "(type=%d, err=%d). Please run " - "e2fsck to fix.", type, err); +- "(type=%d, err=%d). Please run " +- "e2fsck to fix.", type, err); - for (type--; type >= 0; type--) ++ "(type=%d, err=%d, ino=%lu). " ++ "Please run e2fsck to fix.", type, ++ err, qf_inums[type]); + for (type--; type >= 0; type--) { + struct inode *inode; + @@ -315739,7 +380696,7 @@ index 88d5d274a8684..985d79fb61287 100644 return err; } -@@ -6458,7 +6484,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, +@@ -6458,7 +6508,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, struct buffer_head *bh; handle_t *handle = journal_current_handle(); @@ -315748,7 +380705,7 @@ index 88d5d274a8684..985d79fb61287 100644 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" " cancelled because transaction is not started", (unsigned long long)off, (unsigned long long)len); -@@ -6641,6 +6667,7 @@ static int __init ext4_init_fs(void) +@@ -6641,6 +6691,7 @@ static int __init ext4_init_fs(void) out: unregister_as_ext2(); unregister_as_ext3(); @@ -315756,7 +380713,7 @@ index 88d5d274a8684..985d79fb61287 100644 out05: destroy_inodecache(); out1: -@@ -6667,6 +6694,7 @@ static void __exit ext4_exit_fs(void) +@@ -6667,6 +6718,7 @@ static void __exit ext4_exit_fs(void) unregister_as_ext2(); unregister_as_ext3(); unregister_filesystem(&ext4_fs_type); @@ -315764,8 +380721,62 @@ index 88d5d274a8684..985d79fb61287 100644 destroy_inodecache(); ext4_exit_mballoc(); ext4_exit_sysfs(); +diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c +index 2314f74465924..aa07b78ba9104 100644 +--- a/fs/ext4/sysfs.c ++++ b/fs/ext4/sysfs.c +@@ -489,6 +489,11 @@ static void ext4_sb_release(struct kobject *kobj) + complete(&sbi->s_kobj_unregister); + } + ++static void ext4_feat_release(struct kobject *kobj) ++{ ++ kfree(kobj); ++} ++ + static const struct sysfs_ops ext4_attr_ops = { + .show = ext4_attr_show, + .store = ext4_attr_store, +@@ -503,7 +508,7 @@ static struct kobj_type ext4_sb_ktype = { + static struct kobj_type ext4_feat_ktype = { + .default_groups = ext4_feat_groups, + .sysfs_ops = &ext4_attr_ops, +- .release = (void (*)(struct kobject *))kfree, ++ .release = ext4_feat_release, + }; + + void ext4_notify_error_sysfs(struct ext4_sb_info *sbi) +diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c +index eacbd489e3bf1..5ece4d3c62109 100644 +--- a/fs/ext4/verity.c ++++ b/fs/ext4/verity.c +@@ -76,7 +76,7 @@ static int pagecache_write(struct inode *inode, const void *buf, size_t count, + size_t n = min_t(size_t, count, + PAGE_SIZE - offset_in_page(pos)); + struct page *page; +- void *fsdata; ++ void *fsdata = NULL; + int res; + + res = pagecache_write_begin(NULL, inode->i_mapping, pos, n, 0, +@@ -364,13 +364,14 @@ static struct page *ext4_read_merkle_tree_page(struct inode *inode, + pgoff_t index, + unsigned long num_ra_pages) + { +- DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index); + struct page *page; + + index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT; + + page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED); + if (!page || !PageUptodate(page)) { ++ DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index); ++ + if (page) + put_page(page); + else if (num_ra_pages > 1) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c -index 1e0fc1ed845bf..533216e80fa2b 100644 +index 1e0fc1ed845bf..b92da41e96409 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -436,6 +436,21 @@ error: @@ -315859,7 +380870,26 @@ index 1e0fc1ed845bf..533216e80fa2b 100644 get_bh(bh); unlock_buffer(bh); -@@ -1858,6 +1867,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, +@@ -1272,7 +1281,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, + ce = mb_cache_entry_get(ea_block_cache, hash, + bh->b_blocknr); + if (ce) { +- ce->e_reusable = 1; ++ set_bit(MBE_REUSABLE_B, &ce->e_flags); + mb_cache_entry_put(ea_block_cache, ce); + } + } +@@ -1432,6 +1441,9 @@ static struct inode *ext4_xattr_inode_create(handle_t *handle, + if (!err) + err = ext4_inode_attach_jinode(ea_inode); + if (err) { ++ if (ext4_xattr_inode_dec_ref(handle, ea_inode)) ++ ext4_warning_inode(ea_inode, ++ "cleanup dec ref error %d", err); + iput(ea_inode); + return ERR_PTR(err); + } +@@ -1858,6 +1870,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, #define header(x) ((struct ext4_xattr_header *)(x)) if (s->base) { @@ -315868,7 +380898,7 @@ index 1e0fc1ed845bf..533216e80fa2b 100644 BUFFER_TRACE(bs->bh, "get_write_access"); error = ext4_journal_get_write_access(handle, sb, bs->bh, EXT4_JTR_NONE); -@@ -1873,9 +1884,20 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, +@@ -1873,9 +1887,20 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, * ext4_xattr_block_set() to reliably detect modified * block */ @@ -315892,7 +380922,7 @@ index 1e0fc1ed845bf..533216e80fa2b 100644 ea_bdebug(bs->bh, "modifying in-place"); error = ext4_xattr_set_entry(i, s, handle, inode, true /* is_block */); -@@ -1890,50 +1912,47 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, +@@ -1890,50 +1915,47 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, if (error) goto cleanup; goto inserted; @@ -315980,7 +381010,7 @@ index 1e0fc1ed845bf..533216e80fa2b 100644 } } else { /* Allocate a buffer where we construct the new block. */ -@@ -2000,18 +2019,13 @@ inserted: +@@ -2000,18 +2022,13 @@ inserted: lock_buffer(new_bh); /* * We have to be careful about races with @@ -316005,18 +381035,40 @@ index 1e0fc1ed845bf..533216e80fa2b 100644 /* * Undo everything and check mbcache * again. -@@ -2026,9 +2040,8 @@ inserted: +@@ -2026,10 +2043,9 @@ inserted: new_bh = NULL; goto inserted; } - ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1; BHDR(new_bh)->h_refcount = cpu_to_le32(ref); - if (ref >= EXT4_XATTR_REFCOUNT_MAX) +- ce->e_reusable = 0; + if (ref == EXT4_XATTR_REFCOUNT_MAX) - ce->e_reusable = 0; ++ clear_bit(MBE_REUSABLE_B, &ce->e_flags); ea_bdebug(new_bh, "reusing; refcount now=%d", ref); -@@ -2176,8 +2189,9 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, + ext4_xattr_block_csum_set(inode, new_bh); +@@ -2057,19 +2073,11 @@ inserted: + + goal = ext4_group_first_block_no(sb, + EXT4_I(inode)->i_block_group); +- +- /* non-extent files can't have physical blocks past 2^32 */ +- if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) +- goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; +- + block = ext4_new_meta_blocks(handle, inode, goal, 0, + NULL, &error); + if (error) + goto cleanup; + +- if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) +- BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS); +- + ea_idebug(inode, "creating block %llu", + (unsigned long long)block); + +@@ -2176,8 +2184,9 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, struct ext4_inode *raw_inode; int error; @@ -316027,7 +381079,7 @@ index 1e0fc1ed845bf..533216e80fa2b 100644 raw_inode = ext4_raw_inode(&is->iloc); header = IHDR(inode, raw_inode); is->s.base = is->s.first = IFIRST(header); -@@ -2205,8 +2219,9 @@ int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, +@@ -2205,8 +2214,9 @@ int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, struct ext4_xattr_search *s = &is->s; int error; @@ -316038,7 +381090,7 @@ index 1e0fc1ed845bf..533216e80fa2b 100644 error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */); if (error) return error; -@@ -2408,7 +2423,7 @@ retry_inode: +@@ -2408,7 +2418,7 @@ retry_inode: if (IS_SYNC(inode)) ext4_handle_sync(handle); } @@ -316047,7 +381099,7 @@ index 1e0fc1ed845bf..533216e80fa2b 100644 cleanup: brelse(is.iloc.bh); -@@ -2486,7 +2501,7 @@ retry: +@@ -2486,7 +2496,7 @@ retry: if (error == 0) error = error2; } @@ -316056,7 +381108,25 @@ index 1e0fc1ed845bf..533216e80fa2b 100644 return error; } -@@ -2920,7 +2935,7 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode, +@@ -2539,7 +2549,7 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode, + + is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS); + bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS); +- buffer = kmalloc(value_size, GFP_NOFS); ++ buffer = kvmalloc(value_size, GFP_NOFS); + b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS); + if (!is || !bs || !buffer || !b_entry_name) { + error = -ENOMEM; +@@ -2591,7 +2601,7 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode, + error = 0; + out: + kfree(b_entry_name); +- kfree(buffer); ++ kvfree(buffer); + if (is) + brelse(is->iloc.bh); + if (bs) +@@ -2920,7 +2930,7 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode, error); goto cleanup; } @@ -316544,10 +381614,20 @@ index 1820e9c106f7d..5c78350158df1 100644 GFP_NOFS, false, F2FS_SB(sb)); if (!fname->cf_name.name) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c -index 866e72b29bd5a..761fd42c93f23 100644 +index 866e72b29bd5a..6a9ab5c11939f 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c -@@ -804,9 +804,8 @@ void f2fs_drop_extent_tree(struct inode *inode) +@@ -415,7 +415,8 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, + struct extent_node *en; + bool ret = false; + +- f2fs_bug_on(sbi, !et); ++ if (!et) ++ return false; + + trace_f2fs_lookup_extent_tree_start(inode, pgofs); + +@@ -804,9 +805,8 @@ void f2fs_drop_extent_tree(struct inode *inode) if (!f2fs_may_extent_tree(inode)) return; @@ -316887,7 +381967,7 @@ index 9c8ef33bd8d32..758048a885d24 100644 if ((iocb->ki_flags & IOCB_NOWAIT)) { diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c -index 77391e3b7d68f..e75a276f5b9c7 100644 +index 77391e3b7d68f..fa1f5fb750b39 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1002,7 +1002,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, @@ -316895,11 +381975,11 @@ index 77391e3b7d68f..e75a276f5b9c7 100644 struct page *node_page; nid_t nid; - unsigned int ofs_in_node; -+ unsigned int ofs_in_node, max_addrs; ++ unsigned int ofs_in_node, max_addrs, base; block_t source_blkaddr; nid = le32_to_cpu(sum->nid); -@@ -1023,6 +1023,19 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, +@@ -1023,6 +1023,26 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, set_sbi_flag(sbi, SBI_NEED_FSCK); } @@ -316908,18 +381988,25 @@ index 77391e3b7d68f..e75a276f5b9c7 100644 + return false; + } + -+ max_addrs = IS_INODE(node_page) ? DEF_ADDRS_PER_INODE : -+ DEF_ADDRS_PER_BLOCK; -+ if (ofs_in_node >= max_addrs) { -+ f2fs_err(sbi, "Inconsistent ofs_in_node:%u in summary, ino:%u, nid:%u, max:%u", -+ ofs_in_node, dni->ino, dni->nid, max_addrs); ++ if (IS_INODE(node_page)) { ++ base = offset_in_addr(F2FS_INODE(node_page)); ++ max_addrs = DEF_ADDRS_PER_INODE; ++ } else { ++ base = 0; ++ max_addrs = DEF_ADDRS_PER_BLOCK; ++ } ++ ++ if (base + ofs_in_node >= max_addrs) { ++ f2fs_err(sbi, "Inconsistent blkaddr offset: base:%u, ofs_in_node:%u, max:%u, ino:%u, nid:%u", ++ base, ofs_in_node, max_addrs, dni->ino, dni->nid); ++ f2fs_put_page(node_page, 1); + return false; + } + *nofs = ofs_of_node(node_page); source_blkaddr = data_blkaddr(NULL, node_page, ofs_in_node); f2fs_put_page(node_page, 1); -@@ -1036,7 +1049,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, +@@ -1036,7 +1056,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (!test_and_set_bit(segno, SIT_I(sbi)->invalid_segmap)) { f2fs_err(sbi, "mismatched blkaddr %u (source_blkaddr %u) in seg %u", blkaddr, source_blkaddr, segno); @@ -316928,7 +382015,7 @@ index 77391e3b7d68f..e75a276f5b9c7 100644 } } #endif -@@ -1454,7 +1467,8 @@ next_step: +@@ -1454,7 +1474,8 @@ next_step: if (phase == 3) { inode = f2fs_iget(sb, dni.ino); @@ -316938,6 +382025,50 @@ index 77391e3b7d68f..e75a276f5b9c7 100644 continue; if (!down_write_trylock( +@@ -1659,8 +1680,9 @@ freed: + get_valid_blocks(sbi, segno, false) == 0) + seg_freed++; + +- if (__is_large_section(sbi) && segno + 1 < end_segno) +- sbi->next_victim_seg[gc_type] = segno + 1; ++ if (__is_large_section(sbi)) ++ sbi->next_victim_seg[gc_type] = ++ (segno + 1 < end_segno) ? segno + 1 : NULL_SEGNO; + skip: + f2fs_put_page(sum_page, 0); + } +@@ -2037,8 +2059,6 @@ out_unlock: + if (err) + return err; + +- set_sbi_flag(sbi, SBI_IS_RESIZEFS); +- + freeze_super(sbi->sb); + down_write(&sbi->gc_lock); + down_write(&sbi->cp_global_sem); +@@ -2054,6 +2074,7 @@ out_unlock: + if (err) + goto out_err; + ++ set_sbi_flag(sbi, SBI_IS_RESIZEFS); + err = free_segment_range(sbi, secs, false); + if (err) + goto recover_out; +@@ -2077,6 +2098,7 @@ out_unlock: + f2fs_commit_super(sbi, false); + } + recover_out: ++ clear_sbi_flag(sbi, SBI_IS_RESIZEFS); + if (err) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_err(sbi, "resize_fs failed, should run fsck to repair!"); +@@ -2089,6 +2111,5 @@ out_err: + up_write(&sbi->cp_global_sem); + up_write(&sbi->gc_lock); + thaw_super(sbi->sb); +- clear_sbi_flag(sbi, SBI_IS_RESIZEFS); + return err; + } diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index e3beac546c63a..2788ceeaf5c22 100644 --- a/fs/f2fs/hash.c @@ -317307,7 +382438,7 @@ index 9c528e583c9d5..7a86a8dcf4f1c 100644 goto out; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c -index e863136081b47..0e6e73bc42d4c 100644 +index e863136081b47..f810c6bbeff02 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1291,7 +1291,11 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs) @@ -317323,7 +382454,17 @@ index e863136081b47..0e6e73bc42d4c 100644 #endif new_ni.nid = dn->nid; new_ni.ino = dn->inode->i_ino; -@@ -1443,6 +1447,7 @@ page_hit: +@@ -1353,8 +1357,7 @@ static int read_node_page(struct page *page, int op_flags) + return err; + + /* NEW_ADDR can be seen, after cp_error drops some dirty node pages */ +- if (unlikely(ni.blk_addr == NULL_ADDR || ni.blk_addr == NEW_ADDR) || +- is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)) { ++ if (unlikely(ni.blk_addr == NULL_ADDR || ni.blk_addr == NEW_ADDR)) { + ClearPageUptodate(page); + return -ENOENT; + } +@@ -1443,6 +1446,7 @@ page_hit: nid, nid_of_node(page), ino_of_node(page), ofs_of_node(page), cpver_of_node(page), next_blkaddr_of_node(page)); @@ -317331,7 +382472,7 @@ index e863136081b47..0e6e73bc42d4c 100644 err = -EINVAL; out_err: ClearPageUptodate(page); -@@ -2105,8 +2110,12 @@ static int f2fs_write_node_pages(struct address_space *mapping, +@@ -2105,8 +2109,12 @@ static int f2fs_write_node_pages(struct address_space *mapping, if (wbc->sync_mode == WB_SYNC_ALL) atomic_inc(&sbi->wb_sync_req[NODE]); @@ -317428,7 +382569,7 @@ index 04655511d7f51..ed21e34b59c7f 100644 f2fs_replace_block(sbi, &dn, src, dest, ni.version, false, false); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c -index a135d22474154..af810b2d5d904 100644 +index a135d22474154..194c0811fbdfe 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -356,16 +356,19 @@ void f2fs_drop_inmem_page(struct inode *inode, struct page *page) @@ -317472,7 +382613,28 @@ index a135d22474154..af810b2d5d904 100644 blk_finish_plug(&plug); mutex_unlock(&sbi->flush_lock); -@@ -4508,7 +4511,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) +@@ -1548,7 +1551,7 @@ retry: + if (i + 1 < dpolicy->granularity) + break; + +- if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered) ++ if (i + 1 < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered) + return __issue_discard_cmd_orderly(sbi, dpolicy); + + pend_list = &dcc->pend_list[i]; +@@ -2126,8 +2129,10 @@ int f2fs_start_discard_thread(struct f2fs_sb_info *sbi) + + dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi, + "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev)); +- if (IS_ERR(dcc->f2fs_issue_discard)) ++ if (IS_ERR(dcc->f2fs_issue_discard)) { + err = PTR_ERR(dcc->f2fs_issue_discard); ++ dcc->f2fs_issue_discard = NULL; ++ } + + return err; + } +@@ -4508,7 +4513,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) unsigned int i, start, end; unsigned int readed, start_blk = 0; int err = 0; @@ -317481,7 +382643,7 @@ index a135d22474154..af810b2d5d904 100644 do { readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_VECS, -@@ -4533,8 +4536,14 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) +@@ -4533,8 +4538,14 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) if (err) return err; seg_info_from_raw_sit(se, &sit); @@ -317498,7 +382660,7 @@ index a135d22474154..af810b2d5d904 100644 if (f2fs_block_unit_discard(sbi)) { /* build discard map only one time */ -@@ -4574,15 +4583,22 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) +@@ -4574,15 +4585,22 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) sit = sit_in_journal(journal, i); old_valid_blocks = se->valid_blocks; @@ -317525,7 +382687,7 @@ index a135d22474154..af810b2d5d904 100644 if (f2fs_block_unit_discard(sbi)) { if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { -@@ -4604,13 +4620,24 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) +@@ -4604,13 +4622,24 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) } up_read(&curseg->journal_rwsem); @@ -317554,7 +382716,7 @@ index a135d22474154..af810b2d5d904 100644 } static void init_free_segmap(struct f2fs_sb_info *sbi) -@@ -4747,6 +4774,13 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi) +@@ -4747,6 +4776,13 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi) sanity_check_seg_type(sbi, curseg->seg_type); @@ -317643,7 +382805,7 @@ index 89fff258727d1..957edb6d70d7b 100644 static inline bool f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c -index 78ebc306ee2b5..a0d1ef73b83ea 100644 +index 78ebc306ee2b5..f4e8de1f47899 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -58,6 +58,7 @@ const char *f2fs_fault_name[FAULT_MAX] = { @@ -317847,7 +383009,7 @@ index 78ebc306ee2b5..a0d1ef73b83ea 100644 /* For write statistics */ sbi->sectors_written_start = f2fs_get_sectors_written(sbi); -@@ -4352,6 +4423,8 @@ free_node_inode: +@@ -4352,12 +4423,14 @@ free_node_inode: free_stats: f2fs_destroy_stats(sbi); free_nm: @@ -317856,6 +383018,13 @@ index 78ebc306ee2b5..a0d1ef73b83ea 100644 f2fs_destroy_node_manager(sbi); free_sm: f2fs_destroy_segment_manager(sbi); +- f2fs_destroy_post_read_wq(sbi); + stop_ckpt_thread: + f2fs_stop_ckpt_thread(sbi); ++ f2fs_destroy_post_read_wq(sbi); + free_devices: + destroy_device_list(sbi); + kvfree(sbi->ckpt); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index a32fe31c33b8e..8b36e61fe7edb 100644 --- a/fs/f2fs/sysfs.c @@ -317881,7 +383050,7 @@ index a32fe31c33b8e..8b36e61fe7edb 100644 sbi->gc_mode = GC_NORMAL; } diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c -index 03549b5ba204a..fe5acdccaae19 100644 +index 03549b5ba204a..a28968bb56e62 100644 --- a/fs/f2fs/verity.c +++ b/fs/f2fs/verity.c @@ -136,7 +136,7 @@ static int f2fs_begin_enable_verity(struct file *filp) @@ -317893,6 +383062,22 @@ index 03549b5ba204a..fe5acdccaae19 100644 if (err) return err; +@@ -261,13 +261,14 @@ static struct page *f2fs_read_merkle_tree_page(struct inode *inode, + pgoff_t index, + unsigned long num_ra_pages) + { +- DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index); + struct page *page; + + index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT; + + page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED); + if (!page || !PageUptodate(page)) { ++ DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index); ++ + if (page) + put_page(page); + else if (num_ra_pages > 1) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 1d2d29dcd41ce..797ac505a075a 100644 --- a/fs/f2fs/xattr.c @@ -317951,7 +383136,7 @@ index 978ac6751aeb7..1db348f8f887a 100644 } fatent->nr_bhs = 1; diff --git a/fs/file.c b/fs/file.c -index 8627dacfc4246..ee93173467025 100644 +index 8627dacfc4246..214364e19d76f 100644 --- a/fs/file.c +++ b/fs/file.c @@ -87,6 +87,21 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt) @@ -318094,6 +383279,24 @@ index 8627dacfc4246..ee93173467025 100644 rcu_read_unlock(); return file; +@@ -956,7 +1029,16 @@ static unsigned long __fget_light(unsigned int fd, fmode_t mask) + struct files_struct *files = current->files; + struct file *file; + +- if (atomic_read(&files->count) == 1) { ++ /* ++ * If another thread is concurrently calling close_fd() followed ++ * by put_files_struct(), we must not observe the old table ++ * entry combined with the new refcount - otherwise we could ++ * return a file that is concurrently being freed. ++ * ++ * atomic_read_acquire() pairs with atomic_dec_and_test() in ++ * put_files_struct(). ++ */ ++ if (atomic_read_acquire(&files->count) == 1) { + file = files_lookup_fd_raw(files, fd); + if (!file || unlikely(file->f_mode & mask)) + return 0; diff --git a/fs/file_table.c b/fs/file_table.c index 45437f8e1003e..6f297f9782fc5 100644 --- a/fs/file_table.c @@ -318121,7 +383324,7 @@ index 45437f8e1003e..6f297f9782fc5 100644 void __init files_init(void) { diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c -index 81ec192ce0673..2a27f0256fa31 100644 +index 81ec192ce0673..f4a5a0c2858a1 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -120,6 +120,7 @@ static bool inode_io_list_move_locked(struct inode *inode, @@ -318188,23 +383391,42 @@ index 81ec192ce0673..2a27f0256fa31 100644 while (!list_empty(&tmp)) { sb = wb_inode(tmp.prev)->i_sb; list_for_each_prev_safe(pos, node, &tmp) { -@@ -1739,6 +1745,15 @@ static int writeback_single_inode(struct inode *inode, +@@ -1733,12 +1739,29 @@ static int writeback_single_inode(struct inode *inode, + wb = inode_to_wb_and_lock_list(inode); + spin_lock(&inode->i_lock); + /* +- * If the inode is now fully clean, then it can be safely removed from +- * its writeback list (if any). Otherwise the flusher threads are +- * responsible for the writeback lists. ++ * If the inode is freeing, its i_io_list shoudn't be updated ++ * as it can be finally deleted at this moment. */ - if (!(inode->i_state & I_DIRTY_ALL)) - inode_cgwb_move_to_attached(inode, wb); -+ else if (!(inode->i_state & I_SYNC_QUEUED)) { -+ if ((inode->i_state & I_DIRTY)) -+ redirty_tail_locked(inode, wb); -+ else if (inode->i_state & I_DIRTY_TIME) { -+ inode->dirtied_when = jiffies; -+ inode_io_list_move_locked(inode, wb, &wb->b_dirty_time); +- if (!(inode->i_state & I_DIRTY_ALL)) +- inode_cgwb_move_to_attached(inode, wb); ++ if (!(inode->i_state & I_FREEING)) { ++ /* ++ * If the inode is now fully clean, then it can be safely ++ * removed from its writeback list (if any). Otherwise the ++ * flusher threads are responsible for the writeback lists. ++ */ ++ if (!(inode->i_state & I_DIRTY_ALL)) ++ inode_cgwb_move_to_attached(inode, wb); ++ else if (!(inode->i_state & I_SYNC_QUEUED)) { ++ if ((inode->i_state & I_DIRTY)) ++ redirty_tail_locked(inode, wb); ++ else if (inode->i_state & I_DIRTY_TIME) { ++ inode->dirtied_when = jiffies; ++ inode_io_list_move_locked(inode, ++ wb, ++ &wb->b_dirty_time); ++ } + } + } + spin_unlock(&wb->list_lock); inode_sync_complete(inode); out: -@@ -1802,11 +1817,12 @@ static long writeback_sb_inodes(struct super_block *sb, +@@ -1802,11 +1825,12 @@ static long writeback_sb_inodes(struct super_block *sb, }; unsigned long start_time = jiffies; long write_chunk; @@ -318218,7 +383440,7 @@ index 81ec192ce0673..2a27f0256fa31 100644 if (inode->i_sb != sb) { if (work->sb) { -@@ -1848,8 +1864,8 @@ static long writeback_sb_inodes(struct super_block *sb, +@@ -1848,8 +1872,8 @@ static long writeback_sb_inodes(struct super_block *sb, * We'll have another go at writing back this inode * when we completed a full scan of b_io. */ @@ -318228,7 +383450,7 @@ index 81ec192ce0673..2a27f0256fa31 100644 trace_writeback_sb_inodes_requeue(inode); continue; } -@@ -1882,7 +1898,9 @@ static long writeback_sb_inodes(struct super_block *sb, +@@ -1882,7 +1906,9 @@ static long writeback_sb_inodes(struct super_block *sb, wbc_detach_inode(&wbc); work->nr_pages -= write_chunk - wbc.nr_to_write; @@ -318239,7 +383461,7 @@ index 81ec192ce0673..2a27f0256fa31 100644 if (need_resched()) { /* -@@ -1904,7 +1922,7 @@ static long writeback_sb_inodes(struct super_block *sb, +@@ -1904,7 +1930,7 @@ static long writeback_sb_inodes(struct super_block *sb, tmp_wb = inode_to_wb_and_lock_list(inode); spin_lock(&inode->i_lock); if (!(inode->i_state & I_DIRTY_ALL)) @@ -318248,7 +383470,7 @@ index 81ec192ce0673..2a27f0256fa31 100644 requeue_inode(inode, tmp_wb, &wbc); inode_sync_complete(inode); spin_unlock(&inode->i_lock); -@@ -1918,14 +1936,14 @@ static long writeback_sb_inodes(struct super_block *sb, +@@ -1918,14 +1944,14 @@ static long writeback_sb_inodes(struct super_block *sb, * bail out to wb_writeback() often enough to check * background threshold and other termination conditions. */ @@ -318265,7 +383487,7 @@ index 81ec192ce0673..2a27f0256fa31 100644 } static long __writeback_inodes_wb(struct bdi_writeback *wb, -@@ -2096,13 +2114,13 @@ static struct wb_writeback_work *get_next_work_item(struct bdi_writeback *wb) +@@ -2096,13 +2122,13 @@ static struct wb_writeback_work *get_next_work_item(struct bdi_writeback *wb) { struct wb_writeback_work *work = NULL; @@ -318281,7 +383503,7 @@ index 81ec192ce0673..2a27f0256fa31 100644 return work; } -@@ -2382,10 +2400,25 @@ void __mark_inode_dirty(struct inode *inode, int flags) +@@ -2382,10 +2408,25 @@ void __mark_inode_dirty(struct inode *inode, int flags) { struct super_block *sb = inode->i_sb; int dirtytime = 0; @@ -318307,7 +383529,7 @@ index 81ec192ce0673..2a27f0256fa31 100644 /* * Notify the filesystem about the inode being dirtied, so that * (if needed) it can update on-disk fields and journal the -@@ -2395,7 +2428,8 @@ void __mark_inode_dirty(struct inode *inode, int flags) +@@ -2395,7 +2436,8 @@ void __mark_inode_dirty(struct inode *inode, int flags) */ trace_writeback_dirty_inode_start(inode, flags); if (sb->s_op->dirty_inode) @@ -318317,7 +383539,7 @@ index 81ec192ce0673..2a27f0256fa31 100644 trace_writeback_dirty_inode(inode, flags); /* I_DIRTY_INODE supersedes I_DIRTY_TIME. */ -@@ -2416,23 +2450,28 @@ void __mark_inode_dirty(struct inode *inode, int flags) +@@ -2416,23 +2458,28 @@ void __mark_inode_dirty(struct inode *inode, int flags) */ smp_mb(); @@ -318353,7 +383575,7 @@ index 81ec192ce0673..2a27f0256fa31 100644 /* * If the inode is queued for writeback by flush worker, just * update its dirty state. Once the flush worker is done with -@@ -2440,7 +2479,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) +@@ -2440,7 +2487,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) * list, based upon its state. */ if (inode->i_state & I_SYNC_QUEUED) @@ -318362,7 +383584,7 @@ index 81ec192ce0673..2a27f0256fa31 100644 /* * Only add valid (hashed) inodes to the superblock's -@@ -2448,22 +2487,19 @@ void __mark_inode_dirty(struct inode *inode, int flags) +@@ -2448,22 +2495,19 @@ void __mark_inode_dirty(struct inode *inode, int flags) */ if (!S_ISBLK(inode->i_mode)) { if (inode_unhashed(inode)) @@ -318387,7 +383609,7 @@ index 81ec192ce0673..2a27f0256fa31 100644 inode->dirtied_when = jiffies; if (dirtytime) inode->dirtied_time_when = jiffies; -@@ -2477,6 +2513,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) +@@ -2477,6 +2521,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) dirty_list); spin_unlock(&wb->list_lock); @@ -318395,7 +383617,7 @@ index 81ec192ce0673..2a27f0256fa31 100644 trace_writeback_dirty_inode_enqueue(inode); /* -@@ -2491,7 +2528,9 @@ void __mark_inode_dirty(struct inode *inode, int flags) +@@ -2491,7 +2536,9 @@ void __mark_inode_dirty(struct inode *inode, int flags) return; } } @@ -318526,7 +383748,7 @@ index d9b977c0f38dc..80a2181b402b2 100644 goto unlock; diff --git a/fs/fuse/file.c b/fs/fuse/file.c -index 11404f8c21c75..71e9e301e569d 100644 +index 11404f8c21c75..cc95a1c376449 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1164,7 +1164,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia, @@ -318573,16 +383795,80 @@ index 11404f8c21c75..71e9e301e569d 100644 if (!err) fuse_sync_writes(inode); -@@ -3002,6 +3014,8 @@ out: - if (lock_inode) - inode_unlock(inode); +@@ -2925,11 +2937,9 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, + .mode = mode + }; + int err; +- bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) || +- (mode & (FALLOC_FL_PUNCH_HOLE | +- FALLOC_FL_ZERO_RANGE)); +- +- bool block_faults = FUSE_IS_DAX(inode) && lock_inode; ++ bool block_faults = FUSE_IS_DAX(inode) && ++ (!(mode & FALLOC_FL_KEEP_SIZE) || ++ (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))); + + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | + FALLOC_FL_ZERO_RANGE)) +@@ -2938,22 +2948,20 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, + if (fm->fc->no_fallocate) + return -EOPNOTSUPP; -+ fuse_flush_time_update(inode); +- if (lock_inode) { +- inode_lock(inode); +- if (block_faults) { +- filemap_invalidate_lock(inode->i_mapping); +- err = fuse_dax_break_layouts(inode, 0, 0); +- if (err) +- goto out; +- } ++ inode_lock(inode); ++ if (block_faults) { ++ filemap_invalidate_lock(inode->i_mapping); ++ err = fuse_dax_break_layouts(inode, 0, 0); ++ if (err) ++ goto out; ++ } + +- if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) { +- loff_t endbyte = offset + length - 1; ++ if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) { ++ loff_t endbyte = offset + length - 1; + +- err = fuse_writeback_range(inode, offset, endbyte); +- if (err) +- goto out; +- } ++ err = fuse_writeback_range(inode, offset, endbyte); ++ if (err) ++ goto out; + } + + if (!(mode & FALLOC_FL_KEEP_SIZE) && +@@ -2963,6 +2971,10 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, + goto out; + } + ++ err = file_modified(file); ++ if (err) ++ goto out; + + if (!(mode & FALLOC_FL_KEEP_SIZE)) + set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); + +@@ -2999,8 +3011,9 @@ out: + if (block_faults) + filemap_invalidate_unlock(inode->i_mapping); + +- if (lock_inode) +- inode_unlock(inode); ++ inode_unlock(inode); ++ ++ fuse_flush_time_update(inode); + return err; } - -@@ -3111,6 +3125,8 @@ out: +@@ -3111,6 +3124,8 @@ out: inode_unlock(inode_out); file_accessed(file_in); @@ -318687,11 +383973,63 @@ index 546ea3d58fb47..e91d407038392 100644 return err; } +diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c +index bc267832310c7..d5294e663df50 100644 +--- a/fs/fuse/readdir.c ++++ b/fs/fuse/readdir.c +@@ -77,8 +77,10 @@ static void fuse_add_dirent_to_cache(struct file *file, + goto unlock; + + addr = kmap_atomic(page); +- if (!offset) ++ if (!offset) { + clear_page(addr); ++ SetPageUptodate(page); ++ } + memcpy(addr + offset, dirent, reclen); + kunmap_atomic(addr); + fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen; +@@ -516,6 +518,12 @@ retry_locked: + + page = find_get_page_flags(file->f_mapping, index, + FGP_ACCESSED | FGP_LOCK); ++ /* Page gone missing, then re-added to cache, but not initialized? */ ++ if (page && !PageUptodate(page)) { ++ unlock_page(page); ++ put_page(page); ++ page = NULL; ++ } + spin_lock(&fi->rdc.lock); + if (!page) { + /* +diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c +index 005e920f5d4a3..4bbfb156e6a40 100644 +--- a/fs/gfs2/aops.c ++++ b/fs/gfs2/aops.c +@@ -452,8 +452,6 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) + return error; + + kaddr = kmap_atomic(page); +- if (dsize > gfs2_max_stuffed_size(ip)) +- dsize = gfs2_max_stuffed_size(ip); + memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize); + memset(kaddr + dsize, 0, PAGE_SIZE - dsize); + kunmap_atomic(kaddr); diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c -index 5414c2c335809..f785af2aa23cf 100644 +index 5414c2c335809..0ec1eaf338338 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c -@@ -940,7 +940,7 @@ do_alloc: +@@ -61,9 +61,6 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, + void *kaddr = kmap(page); + u64 dsize = i_size_read(inode); + +- if (dsize > gfs2_max_stuffed_size(ip)) +- dsize = gfs2_max_stuffed_size(ip); +- + memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize); + memset(kaddr + dsize, 0, PAGE_SIZE - dsize); + kunmap(page); +@@ -940,7 +937,7 @@ do_alloc: else if (height == ip->i_height) ret = gfs2_hole_size(inode, lblock, len, mp, iomap); else @@ -318700,7 +384038,7 @@ index 5414c2c335809..f785af2aa23cf 100644 } else if (flags & IOMAP_WRITE) { u64 alloc_size; -@@ -961,46 +961,6 @@ hole_found: +@@ -961,46 +958,6 @@ hole_found: goto out; } @@ -318747,7 +384085,7 @@ index 5414c2c335809..f785af2aa23cf 100644 static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos, unsigned len) { -@@ -1118,11 +1078,6 @@ out_qunlock: +@@ -1118,11 +1075,6 @@ out_qunlock: return ret; } @@ -318759,7 +384097,7 @@ index 5414c2c335809..f785af2aa23cf 100644 static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length, unsigned flags, struct iomap *iomap, struct iomap *srcmap) -@@ -1135,12 +1090,6 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length, +@@ -1135,12 +1087,6 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length, iomap->flags |= IOMAP_F_BUFFER_HEAD; trace_gfs2_iomap_start(ip, pos, length, flags); @@ -318772,7 +384110,7 @@ index 5414c2c335809..f785af2aa23cf 100644 ret = __gfs2_iomap_get(inode, pos, length, flags, iomap, &mp); if (ret) goto out_unlock; -@@ -1168,10 +1117,7 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length, +@@ -1168,10 +1114,7 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length, ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap, &mp); out_unlock: @@ -318783,7 +384121,7 @@ index 5414c2c335809..f785af2aa23cf 100644 trace_gfs2_iomap_end(ip, iomap, ret); return ret; } -@@ -1208,26 +1154,21 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length, +@@ -1208,26 +1151,21 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length, if (length != written && (iomap->flags & IOMAP_F_NEW)) { /* Deallocate blocks that were just allocated. */ @@ -318816,7 +384154,7 @@ index 5414c2c335809..f785af2aa23cf 100644 return 0; } -@@ -2204,7 +2145,7 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize) +@@ -2204,7 +2142,7 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize) ret = do_shrink(inode, newsize); out: @@ -319777,6 +385115,90 @@ index 31a8f2f649b52..9012487da4c69 100644 extern void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation); extern bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation); +diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c +index 79c621c7863d2..450032b4c886e 100644 +--- a/fs/gfs2/glops.c ++++ b/fs/gfs2/glops.c +@@ -398,38 +398,39 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) + struct timespec64 atime; + u16 height, depth; + umode_t mode = be32_to_cpu(str->di_mode); +- bool is_new = ip->i_inode.i_state & I_NEW; ++ struct inode *inode = &ip->i_inode; ++ bool is_new = inode->i_state & I_NEW; + + if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) + goto corrupt; +- if (unlikely(!is_new && inode_wrong_type(&ip->i_inode, mode))) ++ if (unlikely(!is_new && inode_wrong_type(inode, mode))) + goto corrupt; + ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino); +- ip->i_inode.i_mode = mode; ++ inode->i_mode = mode; + if (is_new) { +- ip->i_inode.i_rdev = 0; ++ inode->i_rdev = 0; + switch (mode & S_IFMT) { + case S_IFBLK: + case S_IFCHR: +- ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major), +- be32_to_cpu(str->di_minor)); ++ inode->i_rdev = MKDEV(be32_to_cpu(str->di_major), ++ be32_to_cpu(str->di_minor)); + break; + } + } + +- i_uid_write(&ip->i_inode, be32_to_cpu(str->di_uid)); +- i_gid_write(&ip->i_inode, be32_to_cpu(str->di_gid)); +- set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink)); +- i_size_write(&ip->i_inode, be64_to_cpu(str->di_size)); +- gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); ++ i_uid_write(inode, be32_to_cpu(str->di_uid)); ++ i_gid_write(inode, be32_to_cpu(str->di_gid)); ++ set_nlink(inode, be32_to_cpu(str->di_nlink)); ++ i_size_write(inode, be64_to_cpu(str->di_size)); ++ gfs2_set_inode_blocks(inode, be64_to_cpu(str->di_blocks)); + atime.tv_sec = be64_to_cpu(str->di_atime); + atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); +- if (timespec64_compare(&ip->i_inode.i_atime, &atime) < 0) +- ip->i_inode.i_atime = atime; +- ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); +- ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); +- ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); +- ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec); ++ if (timespec64_compare(&inode->i_atime, &atime) < 0) ++ inode->i_atime = atime; ++ inode->i_mtime.tv_sec = be64_to_cpu(str->di_mtime); ++ inode->i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); ++ inode->i_ctime.tv_sec = be64_to_cpu(str->di_ctime); ++ inode->i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec); + + ip->i_goal = be64_to_cpu(str->di_goal_meta); + ip->i_generation = be64_to_cpu(str->di_generation); +@@ -437,7 +438,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) + ip->i_diskflags = be32_to_cpu(str->di_flags); + ip->i_eattr = be64_to_cpu(str->di_eattr); + /* i_diskflags and i_eattr must be set before gfs2_set_inode_flags() */ +- gfs2_set_inode_flags(&ip->i_inode); ++ gfs2_set_inode_flags(inode); + height = be16_to_cpu(str->di_height); + if (unlikely(height > GFS2_MAX_META_HEIGHT)) + goto corrupt; +@@ -449,8 +450,11 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) + ip->i_depth = (u8)depth; + ip->i_entries = be32_to_cpu(str->di_entries); + +- if (S_ISREG(ip->i_inode.i_mode)) +- gfs2_set_aops(&ip->i_inode); ++ if (gfs2_is_stuffed(ip) && inode->i_size > gfs2_max_stuffed_size(ip)) ++ goto corrupt; ++ ++ if (S_ISREG(inode->i_mode)) ++ gfs2_set_aops(inode); + + return 0; + corrupt: diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 0fe49770166ea..ca42d310fd4d6 100644 --- a/fs/gfs2/incore.h @@ -319813,6 +385235,52 @@ index 3130f85d2b3f4..97ee17843b4d0 100644 gfs2_qa_put(ip); fail_free_acls: posix_acl_release(default_acl); +diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c +index 7f8410d8fdc1d..fb3b488370834 100644 +--- a/fs/gfs2/ops_fstype.c ++++ b/fs/gfs2/ops_fstype.c +@@ -180,7 +180,10 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent) + pr_warn("Invalid block size\n"); + return -EINVAL; + } +- ++ if (sb->sb_bsize_shift != ffs(sb->sb_bsize) - 1) { ++ pr_warn("Invalid block size shift\n"); ++ return -EINVAL; ++ } + return 0; + } + +@@ -385,8 +388,10 @@ static int init_names(struct gfs2_sbd *sdp, int silent) + if (!table[0]) + table = sdp->sd_vfs->s_id; + +- strlcpy(sdp->sd_proto_name, proto, GFS2_FSNAME_LEN); +- strlcpy(sdp->sd_table_name, table, GFS2_FSNAME_LEN); ++ BUILD_BUG_ON(GFS2_LOCKNAME_LEN > GFS2_FSNAME_LEN); ++ ++ strscpy(sdp->sd_proto_name, proto, GFS2_LOCKNAME_LEN); ++ strscpy(sdp->sd_table_name, table, GFS2_LOCKNAME_LEN); + + table = sdp->sd_table_name; + while ((table = strchr(table, '/'))) +@@ -1443,13 +1448,13 @@ static int gfs2_parse_param(struct fs_context *fc, struct fs_parameter *param) + + switch (o) { + case Opt_lockproto: +- strlcpy(args->ar_lockproto, param->string, GFS2_LOCKNAME_LEN); ++ strscpy(args->ar_lockproto, param->string, GFS2_LOCKNAME_LEN); + break; + case Opt_locktable: +- strlcpy(args->ar_locktable, param->string, GFS2_LOCKNAME_LEN); ++ strscpy(args->ar_locktable, param->string, GFS2_LOCKNAME_LEN); + break; + case Opt_hostdata: +- strlcpy(args->ar_hostdata, param->string, GFS2_LOCKNAME_LEN); ++ strscpy(args->ar_hostdata, param->string, GFS2_LOCKNAME_LEN); + break; + case Opt_spectator: + args->ar_spectator = 1; diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index be0997e24d60b..dc77080a82bbf 100644 --- a/fs/gfs2/quota.c @@ -319946,10 +385414,64 @@ index a6855fd796e03..2f80f3bbf8767 100644 u64 bstart, u32 blen, int meta); extern void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c -index 6e00d15ef0a82..0f2e0530dd433 100644 +index 6e00d15ef0a82..d615974ce4183 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c -@@ -1398,17 +1398,10 @@ out: +@@ -378,6 +378,7 @@ out: + + void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) + { ++ const struct inode *inode = &ip->i_inode; + struct gfs2_dinode *str = buf; + + str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC); +@@ -385,15 +386,15 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) + str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI); + str->di_num.no_addr = cpu_to_be64(ip->i_no_addr); + str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); +- str->di_mode = cpu_to_be32(ip->i_inode.i_mode); +- str->di_uid = cpu_to_be32(i_uid_read(&ip->i_inode)); +- str->di_gid = cpu_to_be32(i_gid_read(&ip->i_inode)); +- str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); +- str->di_size = cpu_to_be64(i_size_read(&ip->i_inode)); +- str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); +- str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); +- str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); +- str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec); ++ str->di_mode = cpu_to_be32(inode->i_mode); ++ str->di_uid = cpu_to_be32(i_uid_read(inode)); ++ str->di_gid = cpu_to_be32(i_gid_read(inode)); ++ str->di_nlink = cpu_to_be32(inode->i_nlink); ++ str->di_size = cpu_to_be64(i_size_read(inode)); ++ str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(inode)); ++ str->di_atime = cpu_to_be64(inode->i_atime.tv_sec); ++ str->di_mtime = cpu_to_be64(inode->i_mtime.tv_sec); ++ str->di_ctime = cpu_to_be64(inode->i_ctime.tv_sec); + + str->di_goal_meta = cpu_to_be64(ip->i_goal); + str->di_goal_data = cpu_to_be64(ip->i_goal); +@@ -401,16 +402,16 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) + + str->di_flags = cpu_to_be32(ip->i_diskflags); + str->di_height = cpu_to_be16(ip->i_height); +- str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) && ++ str->di_payload_format = cpu_to_be32(S_ISDIR(inode->i_mode) && + !(ip->i_diskflags & GFS2_DIF_EXHASH) ? + GFS2_FORMAT_DE : 0); + str->di_depth = cpu_to_be16(ip->i_depth); + str->di_entries = cpu_to_be32(ip->i_entries); + + str->di_eattr = cpu_to_be64(ip->i_eattr); +- str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec); +- str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec); +- str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec); ++ str->di_atime_nsec = cpu_to_be32(inode->i_atime.tv_nsec); ++ str->di_mtime_nsec = cpu_to_be32(inode->i_mtime.tv_nsec); ++ str->di_ctime_nsec = cpu_to_be32(inode->i_ctime.tv_nsec); + } + + /** +@@ -1398,17 +1399,10 @@ out: truncate_inode_pages_final(&inode->i_data); if (ip->i_qadata) gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0); @@ -319968,7 +385490,7 @@ index 6e00d15ef0a82..0f2e0530dd433 100644 if (gfs2_holder_initialized(&ip->i_iopen_gh)) { struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; -@@ -1421,6 +1414,13 @@ out: +@@ -1421,6 +1415,13 @@ out: gfs2_holder_uninit(&ip->i_iopen_gh); gfs2_glock_put_eventually(gl); } @@ -319982,8 +385504,167 @@ index 6e00d15ef0a82..0f2e0530dd433 100644 } static struct inode *gfs2_alloc_inode(struct super_block *sb) +diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c +index 4a95a92546a0d..7c9c6d0b38fd6 100644 +--- a/fs/hfs/inode.c ++++ b/fs/hfs/inode.c +@@ -456,14 +456,16 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) + /* panic? */ + return -EIO; + ++ res = -EIO; ++ if (HFS_I(main_inode)->cat_key.CName.len > HFS_NAMELEN) ++ goto out; + fd.search_key->cat = HFS_I(main_inode)->cat_key; + if (hfs_brec_find(&fd)) +- /* panic? */ + goto out; + + if (S_ISDIR(main_inode->i_mode)) { + if (fd.entrylength < sizeof(struct hfs_cat_dir)) +- /* panic? */; ++ goto out; + hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, + sizeof(struct hfs_cat_dir)); + if (rec.type != HFS_CDR_DIR || +@@ -476,6 +478,8 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) + hfs_bnode_write(fd.bnode, &rec, fd.entryoffset, + sizeof(struct hfs_cat_dir)); + } else if (HFS_IS_RSRC(inode)) { ++ if (fd.entrylength < sizeof(struct hfs_cat_file)) ++ goto out; + hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, + sizeof(struct hfs_cat_file)); + hfs_inode_write_fork(inode, rec.file.RExtRec, +@@ -484,7 +488,7 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) + sizeof(struct hfs_cat_file)); + } else { + if (fd.entrylength < sizeof(struct hfs_cat_file)) +- /* panic? */; ++ goto out; + hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, + sizeof(struct hfs_cat_file)); + if (rec.type != HFS_CDR_FIL || +@@ -501,9 +505,10 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) + hfs_bnode_write(fd.bnode, &rec, fd.entryoffset, + sizeof(struct hfs_cat_file)); + } ++ res = 0; + out: + hfs_find_exit(&fd); +- return 0; ++ return res; + } + + static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry, +diff --git a/fs/hfs/trans.c b/fs/hfs/trans.c +index 39f5e343bf4d4..fdb0edb8a607d 100644 +--- a/fs/hfs/trans.c ++++ b/fs/hfs/trans.c +@@ -109,7 +109,7 @@ void hfs_asc2mac(struct super_block *sb, struct hfs_name *out, const struct qstr + if (nls_io) { + wchar_t ch; + +- while (srclen > 0) { ++ while (srclen > 0 && dstlen > 0) { + size = nls_io->char2uni(src, srclen, &ch); + if (size < 0) { + ch = '?'; +diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h +index 1798949f269bb..ebc0d5c678d0c 100644 +--- a/fs/hfsplus/hfsplus_fs.h ++++ b/fs/hfsplus/hfsplus_fs.h +@@ -198,6 +198,8 @@ struct hfsplus_sb_info { + #define HFSPLUS_SB_HFSX 3 + #define HFSPLUS_SB_CASEFOLD 4 + #define HFSPLUS_SB_NOBARRIER 5 ++#define HFSPLUS_SB_UID 6 ++#define HFSPLUS_SB_GID 7 + + static inline struct hfsplus_sb_info *HFSPLUS_SB(struct super_block *sb) + { +diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c +index 6fef67c2a9f09..bf6f75f569e4d 100644 +--- a/fs/hfsplus/inode.c ++++ b/fs/hfsplus/inode.c +@@ -190,11 +190,11 @@ static void hfsplus_get_perms(struct inode *inode, + mode = be16_to_cpu(perms->mode); + + i_uid_write(inode, be32_to_cpu(perms->owner)); +- if (!i_uid_read(inode) && !mode) ++ if ((test_bit(HFSPLUS_SB_UID, &sbi->flags)) || (!i_uid_read(inode) && !mode)) + inode->i_uid = sbi->uid; + + i_gid_write(inode, be32_to_cpu(perms->group)); +- if (!i_gid_read(inode) && !mode) ++ if ((test_bit(HFSPLUS_SB_GID, &sbi->flags)) || (!i_gid_read(inode) && !mode)) + inode->i_gid = sbi->gid; + + if (dir) { +@@ -509,8 +509,7 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) + if (type == HFSPLUS_FOLDER) { + struct hfsplus_cat_folder *folder = &entry.folder; + +- if (fd->entrylength < sizeof(struct hfsplus_cat_folder)) +- /* panic? */; ++ WARN_ON(fd->entrylength < sizeof(struct hfsplus_cat_folder)); + hfs_bnode_read(fd->bnode, &entry, fd->entryoffset, + sizeof(struct hfsplus_cat_folder)); + hfsplus_get_perms(inode, &folder->permissions, 1); +@@ -530,8 +529,7 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) + } else if (type == HFSPLUS_FILE) { + struct hfsplus_cat_file *file = &entry.file; + +- if (fd->entrylength < sizeof(struct hfsplus_cat_file)) +- /* panic? */; ++ WARN_ON(fd->entrylength < sizeof(struct hfsplus_cat_file)); + hfs_bnode_read(fd->bnode, &entry, fd->entryoffset, + sizeof(struct hfsplus_cat_file)); + +@@ -588,8 +586,7 @@ int hfsplus_cat_write_inode(struct inode *inode) + if (S_ISDIR(main_inode->i_mode)) { + struct hfsplus_cat_folder *folder = &entry.folder; + +- if (fd.entrylength < sizeof(struct hfsplus_cat_folder)) +- /* panic? */; ++ WARN_ON(fd.entrylength < sizeof(struct hfsplus_cat_folder)); + hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, + sizeof(struct hfsplus_cat_folder)); + /* simple node checks? */ +@@ -614,8 +611,7 @@ int hfsplus_cat_write_inode(struct inode *inode) + } else { + struct hfsplus_cat_file *file = &entry.file; + +- if (fd.entrylength < sizeof(struct hfsplus_cat_file)) +- /* panic? */; ++ WARN_ON(fd.entrylength < sizeof(struct hfsplus_cat_file)); + hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, + sizeof(struct hfsplus_cat_file)); + hfsplus_inode_write_fork(inode, &file->data_fork); +diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c +index 047e05c575601..c94a58762ad6d 100644 +--- a/fs/hfsplus/options.c ++++ b/fs/hfsplus/options.c +@@ -140,6 +140,8 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi) + if (!uid_valid(sbi->uid)) { + pr_err("invalid uid specified\n"); + return 0; ++ } else { ++ set_bit(HFSPLUS_SB_UID, &sbi->flags); + } + break; + case opt_gid: +@@ -151,6 +153,8 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi) + if (!gid_valid(sbi->gid)) { + pr_err("invalid gid specified\n"); + return 0; ++ } else { ++ set_bit(HFSPLUS_SB_GID, &sbi->flags); + } + break; + case opt_part: diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c -index cdfb1ae78a3f8..d74a49b188c24 100644 +index cdfb1ae78a3f8..352230a011e08 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -206,7 +206,7 @@ hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr, @@ -320030,7 +385711,20 @@ index cdfb1ae78a3f8..d74a49b188c24 100644 (!vma || addr + len <= vm_start_gap(vma))) return addr; } -@@ -409,10 +410,11 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end) +@@ -360,6 +361,12 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to) + } else { + unlock_page(page); + ++ if (PageHWPoison(page)) { ++ put_page(page); ++ retval = -EIO; ++ break; ++ } ++ + /* + * We have the page, copy it to user space buffer. + */ +@@ -409,10 +416,11 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end) struct vm_area_struct *vma; /* @@ -320045,7 +385739,21 @@ index cdfb1ae78a3f8..d74a49b188c24 100644 unsigned long v_offset; unsigned long v_end; -@@ -1046,12 +1048,12 @@ static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) +@@ -982,13 +990,6 @@ static int hugetlbfs_migrate_page(struct address_space *mapping, + static int hugetlbfs_error_remove_page(struct address_space *mapping, + struct page *page) + { +- struct inode *inode = mapping->host; +- pgoff_t index = page->index; +- +- remove_huge_page(page); +- if (unlikely(hugetlb_unreserve_pages(inode, index, index + 1, 1))) +- hugetlb_fix_reserve_counts(inode); +- + return 0; + } + +@@ -1046,12 +1047,12 @@ static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) if (sbinfo->spool) { long free_pages; @@ -320060,6 +385768,33 @@ index cdfb1ae78a3f8..d74a49b188c24 100644 buf->f_files = sbinfo->max_inodes; buf->f_ffree = sbinfo->free_inodes; } +@@ -1249,7 +1250,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par + + case Opt_size: + /* memparse() will accept a K/M/G without a digit */ +- if (!isdigit(param->string[0])) ++ if (!param->string || !isdigit(param->string[0])) + goto bad_val; + ctx->max_size_opt = memparse(param->string, &rest); + ctx->max_val_type = SIZE_STD; +@@ -1259,7 +1260,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par + + case Opt_nr_inodes: + /* memparse() will accept a K/M/G without a digit */ +- if (!isdigit(param->string[0])) ++ if (!param->string || !isdigit(param->string[0])) + goto bad_val; + ctx->nr_inodes = memparse(param->string, &rest); + return 0; +@@ -1275,7 +1276,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par + + case Opt_min_size: + /* memparse() will accept a K/M/G without a digit */ +- if (!isdigit(param->string[0])) ++ if (!param->string || !isdigit(param->string[0])) + goto bad_val; + ctx->min_size_opt = memparse(param->string, &rest); + ctx->min_val_type = SIZE_STD; diff --git a/fs/inode.c b/fs/inode.c index ed0cab8a32db1..8279c700a2b7f 100644 --- a/fs/inode.c @@ -320203,36661 +385938,76507 @@ index 3cd065c8a66b4..9075490f21a62 100644 +int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry, + struct xattr_ctx *ctx); diff --git a/fs/io-wq.c b/fs/io-wq.c -index 422a7ed6a9bdb..6031fb319d878 100644 +deleted file mode 100644 +index 422a7ed6a9bdb..0000000000000 --- a/fs/io-wq.c -+++ b/fs/io-wq.c -@@ -140,6 +140,8 @@ static void io_wqe_dec_running(struct io_worker *worker); - static bool io_acct_cancel_pending_work(struct io_wqe *wqe, - struct io_wqe_acct *acct, - struct io_cb_cancel_data *match); -+static void create_worker_cb(struct callback_head *cb); -+static void io_wq_cancel_tw_create(struct io_wq *wq); - - static bool io_worker_get(struct io_worker *worker) - { -@@ -174,9 +176,44 @@ static void io_worker_ref_put(struct io_wq *wq) - complete(&wq->worker_done); - } - -+static void io_worker_cancel_cb(struct io_worker *worker) -+{ -+ struct io_wqe_acct *acct = io_wqe_get_acct(worker); -+ struct io_wqe *wqe = worker->wqe; -+ struct io_wq *wq = wqe->wq; -+ -+ atomic_dec(&acct->nr_running); -+ raw_spin_lock(&worker->wqe->lock); -+ acct->nr_workers--; -+ raw_spin_unlock(&worker->wqe->lock); -+ io_worker_ref_put(wq); -+ clear_bit_unlock(0, &worker->create_state); -+ io_worker_release(worker); -+} -+ -+static bool io_task_worker_match(struct callback_head *cb, void *data) -+{ -+ struct io_worker *worker; -+ -+ if (cb->func != create_worker_cb) -+ return false; -+ worker = container_of(cb, struct io_worker, create_work); -+ return worker == data; -+} -+ - static void io_worker_exit(struct io_worker *worker) - { - struct io_wqe *wqe = worker->wqe; -+ struct io_wq *wq = wqe->wq; -+ -+ while (1) { -+ struct callback_head *cb = task_work_cancel_match(wq->task, -+ io_task_worker_match, worker); -+ -+ if (!cb) -+ break; -+ io_worker_cancel_cb(worker); -+ } - - if (refcount_dec_and_test(&worker->ref)) - complete(&worker->ref_done); -@@ -321,10 +358,22 @@ static bool io_queue_worker_create(struct io_worker *worker, - test_and_set_bit_lock(0, &worker->create_state)) - goto fail_release; - -+ atomic_inc(&wq->worker_refs); - init_task_work(&worker->create_work, func); - worker->create_index = acct->index; ++++ /dev/null +@@ -1,1322 +0,0 @@ +-// SPDX-License-Identifier: GPL-2.0 +-/* +- * Basic worker thread pool for io_uring +- * +- * Copyright (C) 2019 Jens Axboe +- * +- */ +-#include <linux/kernel.h> +-#include <linux/init.h> +-#include <linux/errno.h> +-#include <linux/sched/signal.h> +-#include <linux/percpu.h> +-#include <linux/slab.h> +-#include <linux/rculist_nulls.h> +-#include <linux/cpu.h> +-#include <linux/tracehook.h> +-#include <uapi/linux/io_uring.h> +- +-#include "io-wq.h" +- +-#define WORKER_IDLE_TIMEOUT (5 * HZ) +- +-enum { +- IO_WORKER_F_UP = 1, /* up and active */ +- IO_WORKER_F_RUNNING = 2, /* account as running */ +- IO_WORKER_F_FREE = 4, /* worker on free list */ +- IO_WORKER_F_BOUND = 8, /* is doing bounded work */ +-}; +- +-enum { +- IO_WQ_BIT_EXIT = 0, /* wq exiting */ +-}; +- +-enum { +- IO_ACCT_STALLED_BIT = 0, /* stalled on hash */ +-}; +- +-/* +- * One for each thread in a wqe pool +- */ +-struct io_worker { +- refcount_t ref; +- unsigned flags; +- struct hlist_nulls_node nulls_node; +- struct list_head all_list; +- struct task_struct *task; +- struct io_wqe *wqe; +- +- struct io_wq_work *cur_work; +- spinlock_t lock; +- +- struct completion ref_done; +- +- unsigned long create_state; +- struct callback_head create_work; +- int create_index; +- +- union { +- struct rcu_head rcu; +- struct work_struct work; +- }; +-}; +- +-#if BITS_PER_LONG == 64 +-#define IO_WQ_HASH_ORDER 6 +-#else +-#define IO_WQ_HASH_ORDER 5 +-#endif +- +-#define IO_WQ_NR_HASH_BUCKETS (1u << IO_WQ_HASH_ORDER) +- +-struct io_wqe_acct { +- unsigned nr_workers; +- unsigned max_workers; +- int index; +- atomic_t nr_running; +- struct io_wq_work_list work_list; +- unsigned long flags; +-}; +- +-enum { +- IO_WQ_ACCT_BOUND, +- IO_WQ_ACCT_UNBOUND, +- IO_WQ_ACCT_NR, +-}; +- +-/* +- * Per-node worker thread pool +- */ +-struct io_wqe { +- raw_spinlock_t lock; +- struct io_wqe_acct acct[2]; +- +- int node; +- +- struct hlist_nulls_head free_list; +- struct list_head all_list; +- +- struct wait_queue_entry wait; +- +- struct io_wq *wq; +- struct io_wq_work *hash_tail[IO_WQ_NR_HASH_BUCKETS]; +- +- cpumask_var_t cpu_mask; +-}; +- +-/* +- * Per io_wq state +- */ +-struct io_wq { +- unsigned long state; +- +- free_work_fn *free_work; +- io_wq_work_fn *do_work; +- +- struct io_wq_hash *hash; +- +- atomic_t worker_refs; +- struct completion worker_done; +- +- struct hlist_node cpuhp_node; +- +- struct task_struct *task; +- +- struct io_wqe *wqes[]; +-}; +- +-static enum cpuhp_state io_wq_online; +- +-struct io_cb_cancel_data { +- work_cancel_fn *fn; +- void *data; +- int nr_running; +- int nr_pending; +- bool cancel_all; +-}; +- +-static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index); +-static void io_wqe_dec_running(struct io_worker *worker); +-static bool io_acct_cancel_pending_work(struct io_wqe *wqe, +- struct io_wqe_acct *acct, +- struct io_cb_cancel_data *match); +- +-static bool io_worker_get(struct io_worker *worker) +-{ +- return refcount_inc_not_zero(&worker->ref); +-} +- +-static void io_worker_release(struct io_worker *worker) +-{ +- if (refcount_dec_and_test(&worker->ref)) +- complete(&worker->ref_done); +-} +- +-static inline struct io_wqe_acct *io_get_acct(struct io_wqe *wqe, bool bound) +-{ +- return &wqe->acct[bound ? IO_WQ_ACCT_BOUND : IO_WQ_ACCT_UNBOUND]; +-} +- +-static inline struct io_wqe_acct *io_work_get_acct(struct io_wqe *wqe, +- struct io_wq_work *work) +-{ +- return io_get_acct(wqe, !(work->flags & IO_WQ_WORK_UNBOUND)); +-} +- +-static inline struct io_wqe_acct *io_wqe_get_acct(struct io_worker *worker) +-{ +- return io_get_acct(worker->wqe, worker->flags & IO_WORKER_F_BOUND); +-} +- +-static void io_worker_ref_put(struct io_wq *wq) +-{ +- if (atomic_dec_and_test(&wq->worker_refs)) +- complete(&wq->worker_done); +-} +- +-static void io_worker_exit(struct io_worker *worker) +-{ +- struct io_wqe *wqe = worker->wqe; +- +- if (refcount_dec_and_test(&worker->ref)) +- complete(&worker->ref_done); +- wait_for_completion(&worker->ref_done); +- +- raw_spin_lock(&wqe->lock); +- if (worker->flags & IO_WORKER_F_FREE) +- hlist_nulls_del_rcu(&worker->nulls_node); +- list_del_rcu(&worker->all_list); +- preempt_disable(); +- io_wqe_dec_running(worker); +- worker->flags = 0; +- current->flags &= ~PF_IO_WORKER; +- preempt_enable(); +- raw_spin_unlock(&wqe->lock); +- +- kfree_rcu(worker, rcu); +- io_worker_ref_put(wqe->wq); +- do_exit(0); +-} +- +-static inline bool io_acct_run_queue(struct io_wqe_acct *acct) +-{ +- if (!wq_list_empty(&acct->work_list) && +- !test_bit(IO_ACCT_STALLED_BIT, &acct->flags)) +- return true; +- return false; +-} +- +-/* +- * Check head of free list for an available worker. If one isn't available, +- * caller must create one. +- */ +-static bool io_wqe_activate_free_worker(struct io_wqe *wqe, +- struct io_wqe_acct *acct) +- __must_hold(RCU) +-{ +- struct hlist_nulls_node *n; +- struct io_worker *worker; +- +- /* +- * Iterate free_list and see if we can find an idle worker to +- * activate. If a given worker is on the free_list but in the process +- * of exiting, keep trying. +- */ +- hlist_nulls_for_each_entry_rcu(worker, n, &wqe->free_list, nulls_node) { +- if (!io_worker_get(worker)) +- continue; +- if (io_wqe_get_acct(worker) != acct) { +- io_worker_release(worker); +- continue; +- } +- if (wake_up_process(worker->task)) { +- io_worker_release(worker); +- return true; +- } +- io_worker_release(worker); +- } +- +- return false; +-} +- +-/* +- * We need a worker. If we find a free one, we're good. If not, and we're +- * below the max number of workers, create one. +- */ +-static bool io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct) +-{ +- /* +- * Most likely an attempt to queue unbounded work on an io_wq that +- * wasn't setup with any unbounded workers. +- */ +- if (unlikely(!acct->max_workers)) +- pr_warn_once("io-wq is not configured for unbound workers"); +- +- raw_spin_lock(&wqe->lock); +- if (acct->nr_workers >= acct->max_workers) { +- raw_spin_unlock(&wqe->lock); +- return true; +- } +- acct->nr_workers++; +- raw_spin_unlock(&wqe->lock); +- atomic_inc(&acct->nr_running); +- atomic_inc(&wqe->wq->worker_refs); +- return create_io_worker(wqe->wq, wqe, acct->index); +-} +- +-static void io_wqe_inc_running(struct io_worker *worker) +-{ +- struct io_wqe_acct *acct = io_wqe_get_acct(worker); +- +- atomic_inc(&acct->nr_running); +-} +- +-static void create_worker_cb(struct callback_head *cb) +-{ +- struct io_worker *worker; +- struct io_wq *wq; +- struct io_wqe *wqe; +- struct io_wqe_acct *acct; +- bool do_create = false; +- +- worker = container_of(cb, struct io_worker, create_work); +- wqe = worker->wqe; +- wq = wqe->wq; +- acct = &wqe->acct[worker->create_index]; +- raw_spin_lock(&wqe->lock); +- if (acct->nr_workers < acct->max_workers) { +- acct->nr_workers++; +- do_create = true; +- } +- raw_spin_unlock(&wqe->lock); +- if (do_create) { +- create_io_worker(wq, wqe, worker->create_index); +- } else { +- atomic_dec(&acct->nr_running); +- io_worker_ref_put(wq); +- } +- clear_bit_unlock(0, &worker->create_state); +- io_worker_release(worker); +-} +- +-static bool io_queue_worker_create(struct io_worker *worker, +- struct io_wqe_acct *acct, +- task_work_func_t func) +-{ +- struct io_wqe *wqe = worker->wqe; +- struct io_wq *wq = wqe->wq; +- +- /* raced with exit, just ignore create call */ +- if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) +- goto fail; +- if (!io_worker_get(worker)) +- goto fail; +- /* +- * create_state manages ownership of create_work/index. We should +- * only need one entry per worker, as the worker going to sleep +- * will trigger the condition, and waking will clear it once it +- * runs the task_work. +- */ +- if (test_bit(0, &worker->create_state) || +- test_and_set_bit_lock(0, &worker->create_state)) +- goto fail_release; +- +- init_task_work(&worker->create_work, func); +- worker->create_index = acct->index; - if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL)) -+ if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL)) { -+ /* -+ * EXIT may have been set after checking it above, check after -+ * adding the task_work and remove any creation item if it is -+ * now set. wq exit does that too, but we can have added this -+ * work item after we canceled in io_wq_exit_workers(). -+ */ -+ if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) -+ io_wq_cancel_tw_create(wq); -+ io_worker_ref_put(wq); - return true; -+ } -+ io_worker_ref_put(wq); - clear_bit_unlock(0, &worker->create_state); - fail_release: - io_worker_release(worker); -@@ -346,7 +395,9 @@ static void io_wqe_dec_running(struct io_worker *worker) - if (atomic_dec_and_test(&acct->nr_running) && io_acct_run_queue(acct)) { - atomic_inc(&acct->nr_running); - atomic_inc(&wqe->wq->worker_refs); -+ raw_spin_unlock(&wqe->lock); - io_queue_worker_create(worker, acct, create_worker_cb); -+ raw_spin_lock(&wqe->lock); - } - } - -@@ -385,9 +436,10 @@ static inline unsigned int io_get_work_hash(struct io_wq_work *work) - return work->flags >> IO_WQ_HASH_SHIFT; - } - +- return true; +- clear_bit_unlock(0, &worker->create_state); +-fail_release: +- io_worker_release(worker); +-fail: +- atomic_dec(&acct->nr_running); +- io_worker_ref_put(wq); +- return false; +-} +- +-static void io_wqe_dec_running(struct io_worker *worker) +- __must_hold(wqe->lock) +-{ +- struct io_wqe_acct *acct = io_wqe_get_acct(worker); +- struct io_wqe *wqe = worker->wqe; +- +- if (!(worker->flags & IO_WORKER_F_UP)) +- return; +- +- if (atomic_dec_and_test(&acct->nr_running) && io_acct_run_queue(acct)) { +- atomic_inc(&acct->nr_running); +- atomic_inc(&wqe->wq->worker_refs); +- io_queue_worker_create(worker, acct, create_worker_cb); +- } +-} +- +-/* +- * Worker will start processing some work. Move it to the busy list, if +- * it's currently on the freelist +- */ +-static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker, +- struct io_wq_work *work) +- __must_hold(wqe->lock) +-{ +- if (worker->flags & IO_WORKER_F_FREE) { +- worker->flags &= ~IO_WORKER_F_FREE; +- hlist_nulls_del_init_rcu(&worker->nulls_node); +- } +-} +- +-/* +- * No work, worker going to sleep. Move to freelist, and unuse mm if we +- * have one attached. Dropping the mm may potentially sleep, so we drop +- * the lock in that case and return success. Since the caller has to +- * retry the loop in that case (we changed task state), we don't regrab +- * the lock if we return success. +- */ +-static void __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker) +- __must_hold(wqe->lock) +-{ +- if (!(worker->flags & IO_WORKER_F_FREE)) { +- worker->flags |= IO_WORKER_F_FREE; +- hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); +- } +-} +- +-static inline unsigned int io_get_work_hash(struct io_wq_work *work) +-{ +- return work->flags >> IO_WQ_HASH_SHIFT; +-} +- -static void io_wait_on_hash(struct io_wqe *wqe, unsigned int hash) -+static bool io_wait_on_hash(struct io_wqe *wqe, unsigned int hash) - { - struct io_wq *wq = wqe->wq; -+ bool ret = false; - - spin_lock_irq(&wq->hash->wait.lock); - if (list_empty(&wqe->wait.entry)) { -@@ -395,9 +447,11 @@ static void io_wait_on_hash(struct io_wqe *wqe, unsigned int hash) - if (!test_bit(hash, &wq->hash->map)) { - __set_current_state(TASK_RUNNING); - list_del_init(&wqe->wait.entry); -+ ret = true; - } - } - spin_unlock_irq(&wq->hash->wait.lock); -+ return ret; - } - - static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct, -@@ -437,14 +491,21 @@ static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct, - } - - if (stall_hash != -1U) { -+ bool unstalled; -+ - /* - * Set this before dropping the lock to avoid racing with new - * work being added and clearing the stalled bit. - */ - set_bit(IO_ACCT_STALLED_BIT, &acct->flags); - raw_spin_unlock(&wqe->lock); +-{ +- struct io_wq *wq = wqe->wq; +- +- spin_lock_irq(&wq->hash->wait.lock); +- if (list_empty(&wqe->wait.entry)) { +- __add_wait_queue(&wq->hash->wait, &wqe->wait); +- if (!test_bit(hash, &wq->hash->map)) { +- __set_current_state(TASK_RUNNING); +- list_del_init(&wqe->wait.entry); +- } +- } +- spin_unlock_irq(&wq->hash->wait.lock); +-} +- +-static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct, +- struct io_worker *worker) +- __must_hold(wqe->lock) +-{ +- struct io_wq_work_node *node, *prev; +- struct io_wq_work *work, *tail; +- unsigned int stall_hash = -1U; +- struct io_wqe *wqe = worker->wqe; +- +- wq_list_for_each(node, prev, &acct->work_list) { +- unsigned int hash; +- +- work = container_of(node, struct io_wq_work, list); +- +- /* not hashed, can run anytime */ +- if (!io_wq_is_hashed(work)) { +- wq_list_del(&acct->work_list, node, prev); +- return work; +- } +- +- hash = io_get_work_hash(work); +- /* all items with this hash lie in [work, tail] */ +- tail = wqe->hash_tail[hash]; +- +- /* hashed, can run if not already running */ +- if (!test_and_set_bit(hash, &wqe->wq->hash->map)) { +- wqe->hash_tail[hash] = NULL; +- wq_list_cut(&acct->work_list, &tail->list, prev); +- return work; +- } +- if (stall_hash == -1U) +- stall_hash = hash; +- /* fast forward to a next hash, for-each will fix up @prev */ +- node = &tail->list; +- } +- +- if (stall_hash != -1U) { +- /* +- * Set this before dropping the lock to avoid racing with new +- * work being added and clearing the stalled bit. +- */ +- set_bit(IO_ACCT_STALLED_BIT, &acct->flags); +- raw_spin_unlock(&wqe->lock); - io_wait_on_hash(wqe, stall_hash); -+ unstalled = io_wait_on_hash(wqe, stall_hash); - raw_spin_lock(&wqe->lock); -+ if (unstalled) { -+ clear_bit(IO_ACCT_STALLED_BIT, &acct->flags); -+ if (wq_has_sleeper(&wqe->wq->hash->wait)) -+ wake_up(&wqe->wq->hash->wait); -+ } - } - - return NULL; -@@ -526,8 +587,11 @@ get_next: - io_wqe_enqueue(wqe, linked); - - if (hash != -1U && !next_hashed) { -+ /* serialize hash clear with wake_up() */ -+ spin_lock_irq(&wq->hash->wait.lock); - clear_bit(hash, &wq->hash->map); - clear_bit(IO_ACCT_STALLED_BIT, &acct->flags); -+ spin_unlock_irq(&wq->hash->wait.lock); - if (wq_has_sleeper(&wq->hash->wait)) - wake_up(&wq->hash->wait); - raw_spin_lock(&wqe->lock); -@@ -660,6 +724,13 @@ static bool io_wq_work_match_all(struct io_wq_work *work, void *data) - - static inline bool io_should_retry_thread(long err) - { -+ /* -+ * Prevent perpetual task_work retry, if the task (or its group) is -+ * exiting. -+ */ -+ if (fatal_signal_pending(current)) -+ return false; -+ - switch (err) { - case -EAGAIN: - case -ERESTARTSYS: -@@ -716,11 +787,8 @@ static void io_workqueue_create(struct work_struct *work) - struct io_worker *worker = container_of(work, struct io_worker, work); - struct io_wqe_acct *acct = io_wqe_get_acct(worker); - -- if (!io_queue_worker_create(worker, acct, create_worker_cont)) { -- clear_bit_unlock(0, &worker->create_state); -- io_worker_release(worker); -+ if (!io_queue_worker_create(worker, acct, create_worker_cont)) - kfree(worker); +- raw_spin_lock(&wqe->lock); - } - } - - static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) -@@ -1084,10 +1152,10 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) - wqe = kzalloc_node(sizeof(struct io_wqe), GFP_KERNEL, alloc_node); - if (!wqe) - goto err; -+ wq->wqes[node] = wqe; - if (!alloc_cpumask_var(&wqe->cpu_mask, GFP_KERNEL)) - goto err; - cpumask_copy(wqe->cpu_mask, cpumask_of_node(node)); -- wq->wqes[node] = wqe; - wqe->node = alloc_node; - wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded; - wqe->acct[IO_WQ_ACCT_UNBOUND].max_workers = -@@ -1140,28 +1208,26 @@ void io_wq_exit_start(struct io_wq *wq) - set_bit(IO_WQ_BIT_EXIT, &wq->state); - } - --static void io_wq_exit_workers(struct io_wq *wq) -+static void io_wq_cancel_tw_create(struct io_wq *wq) - { - struct callback_head *cb; -- int node; - -- if (!wq->task) +- return NULL; +-} +- +-static bool io_flush_signals(void) +-{ +- if (unlikely(test_thread_flag(TIF_NOTIFY_SIGNAL))) { +- __set_current_state(TASK_RUNNING); +- tracehook_notify_signal(); +- return true; +- } +- return false; +-} +- +-static void io_assign_current_work(struct io_worker *worker, +- struct io_wq_work *work) +-{ +- if (work) { +- io_flush_signals(); +- cond_resched(); +- } +- +- spin_lock(&worker->lock); +- worker->cur_work = work; +- spin_unlock(&worker->lock); +-} +- +-static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work); +- +-static void io_worker_handle_work(struct io_worker *worker) +- __releases(wqe->lock) +-{ +- struct io_wqe_acct *acct = io_wqe_get_acct(worker); +- struct io_wqe *wqe = worker->wqe; +- struct io_wq *wq = wqe->wq; +- bool do_kill = test_bit(IO_WQ_BIT_EXIT, &wq->state); +- +- do { +- struct io_wq_work *work; +-get_next: +- /* +- * If we got some work, mark us as busy. If we didn't, but +- * the list isn't empty, it means we stalled on hashed work. +- * Mark us stalled so we don't keep looking for work when we +- * can't make progress, any work completion or insertion will +- * clear the stalled flag. +- */ +- work = io_get_next_work(acct, worker); +- if (work) +- __io_worker_busy(wqe, worker, work); +- +- raw_spin_unlock(&wqe->lock); +- if (!work) +- break; +- io_assign_current_work(worker, work); +- __set_current_state(TASK_RUNNING); +- +- /* handle a whole dependent link */ +- do { +- struct io_wq_work *next_hashed, *linked; +- unsigned int hash = io_get_work_hash(work); +- +- next_hashed = wq_next_work(work); +- +- if (unlikely(do_kill) && (work->flags & IO_WQ_WORK_UNBOUND)) +- work->flags |= IO_WQ_WORK_CANCEL; +- wq->do_work(work); +- io_assign_current_work(worker, NULL); +- +- linked = wq->free_work(work); +- work = next_hashed; +- if (!work && linked && !io_wq_is_hashed(linked)) { +- work = linked; +- linked = NULL; +- } +- io_assign_current_work(worker, work); +- if (linked) +- io_wqe_enqueue(wqe, linked); +- +- if (hash != -1U && !next_hashed) { +- clear_bit(hash, &wq->hash->map); +- clear_bit(IO_ACCT_STALLED_BIT, &acct->flags); +- if (wq_has_sleeper(&wq->hash->wait)) +- wake_up(&wq->hash->wait); +- raw_spin_lock(&wqe->lock); +- /* skip unnecessary unlock-lock wqe->lock */ +- if (!work) +- goto get_next; +- raw_spin_unlock(&wqe->lock); +- } +- } while (work); +- +- raw_spin_lock(&wqe->lock); +- } while (1); +-} +- +-static int io_wqe_worker(void *data) +-{ +- struct io_worker *worker = data; +- struct io_wqe_acct *acct = io_wqe_get_acct(worker); +- struct io_wqe *wqe = worker->wqe; +- struct io_wq *wq = wqe->wq; +- bool last_timeout = false; +- char buf[TASK_COMM_LEN]; +- +- worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING); +- +- snprintf(buf, sizeof(buf), "iou-wrk-%d", wq->task->pid); +- set_task_comm(current, buf); +- +- while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) { +- long ret; +- +- set_current_state(TASK_INTERRUPTIBLE); +-loop: +- raw_spin_lock(&wqe->lock); +- if (io_acct_run_queue(acct)) { +- io_worker_handle_work(worker); +- goto loop; +- } +- /* timed out, exit unless we're the last worker */ +- if (last_timeout && acct->nr_workers > 1) { +- acct->nr_workers--; +- raw_spin_unlock(&wqe->lock); +- __set_current_state(TASK_RUNNING); +- break; +- } +- last_timeout = false; +- __io_worker_idle(wqe, worker); +- raw_spin_unlock(&wqe->lock); +- if (io_flush_signals()) +- continue; +- ret = schedule_timeout(WORKER_IDLE_TIMEOUT); +- if (signal_pending(current)) { +- struct ksignal ksig; +- +- if (!get_signal(&ksig)) +- continue; +- break; +- } +- last_timeout = !ret; +- } +- +- if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) { +- raw_spin_lock(&wqe->lock); +- io_worker_handle_work(worker); +- } +- +- io_worker_exit(worker); +- return 0; +-} +- +-/* +- * Called when a worker is scheduled in. Mark us as currently running. +- */ +-void io_wq_worker_running(struct task_struct *tsk) +-{ +- struct io_worker *worker = tsk->pf_io_worker; +- +- if (!worker) - return; - - while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) { - struct io_worker *worker; -- struct io_wqe_acct *acct; - - worker = container_of(cb, struct io_worker, create_work); -- acct = io_wqe_get_acct(worker); +- if (!(worker->flags & IO_WORKER_F_UP)) +- return; +- if (worker->flags & IO_WORKER_F_RUNNING) +- return; +- worker->flags |= IO_WORKER_F_RUNNING; +- io_wqe_inc_running(worker); +-} +- +-/* +- * Called when worker is going to sleep. If there are no workers currently +- * running and we have work pending, wake up a free one or create a new one. +- */ +-void io_wq_worker_sleeping(struct task_struct *tsk) +-{ +- struct io_worker *worker = tsk->pf_io_worker; +- +- if (!worker) +- return; +- if (!(worker->flags & IO_WORKER_F_UP)) +- return; +- if (!(worker->flags & IO_WORKER_F_RUNNING)) +- return; +- +- worker->flags &= ~IO_WORKER_F_RUNNING; +- +- raw_spin_lock(&worker->wqe->lock); +- io_wqe_dec_running(worker); +- raw_spin_unlock(&worker->wqe->lock); +-} +- +-static void io_init_new_worker(struct io_wqe *wqe, struct io_worker *worker, +- struct task_struct *tsk) +-{ +- tsk->pf_io_worker = worker; +- worker->task = tsk; +- set_cpus_allowed_ptr(tsk, wqe->cpu_mask); +- tsk->flags |= PF_NO_SETAFFINITY; +- +- raw_spin_lock(&wqe->lock); +- hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); +- list_add_tail_rcu(&worker->all_list, &wqe->all_list); +- worker->flags |= IO_WORKER_F_FREE; +- raw_spin_unlock(&wqe->lock); +- wake_up_new_task(tsk); +-} +- +-static bool io_wq_work_match_all(struct io_wq_work *work, void *data) +-{ +- return true; +-} +- +-static inline bool io_should_retry_thread(long err) +-{ +- switch (err) { +- case -EAGAIN: +- case -ERESTARTSYS: +- case -ERESTARTNOINTR: +- case -ERESTARTNOHAND: +- return true; +- default: +- return false; +- } +-} +- +-static void create_worker_cont(struct callback_head *cb) +-{ +- struct io_worker *worker; +- struct task_struct *tsk; +- struct io_wqe *wqe; +- +- worker = container_of(cb, struct io_worker, create_work); +- clear_bit_unlock(0, &worker->create_state); +- wqe = worker->wqe; +- tsk = create_io_thread(io_wqe_worker, worker, wqe->node); +- if (!IS_ERR(tsk)) { +- io_init_new_worker(wqe, worker, tsk); +- io_worker_release(worker); +- return; +- } else if (!io_should_retry_thread(PTR_ERR(tsk))) { +- struct io_wqe_acct *acct = io_wqe_get_acct(worker); +- - atomic_dec(&acct->nr_running); -- raw_spin_lock(&worker->wqe->lock); +- raw_spin_lock(&wqe->lock); - acct->nr_workers--; -- raw_spin_unlock(&worker->wqe->lock); -- io_worker_ref_put(wq); +- if (!acct->nr_workers) { +- struct io_cb_cancel_data match = { +- .fn = io_wq_work_match_all, +- .cancel_all = true, +- }; +- +- while (io_acct_cancel_pending_work(wqe, acct, &match)) +- raw_spin_lock(&wqe->lock); +- } +- raw_spin_unlock(&wqe->lock); +- io_worker_ref_put(wqe->wq); +- kfree(worker); +- return; +- } +- +- /* re-create attempts grab a new worker ref, drop the existing one */ +- io_worker_release(worker); +- schedule_work(&worker->work); +-} +- +-static void io_workqueue_create(struct work_struct *work) +-{ +- struct io_worker *worker = container_of(work, struct io_worker, work); +- struct io_wqe_acct *acct = io_wqe_get_acct(worker); +- +- if (!io_queue_worker_create(worker, acct, create_worker_cont)) { - clear_bit_unlock(0, &worker->create_state); - io_worker_release(worker); -+ io_worker_cancel_cb(worker); - } -+} -+ -+static void io_wq_exit_workers(struct io_wq *wq) -+{ -+ int node; -+ -+ if (!wq->task) -+ return; -+ -+ io_wq_cancel_tw_create(wq); - - rcu_read_lock(); - for_each_node(node) { -@@ -1278,7 +1344,9 @@ int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask) - */ - int io_wq_max_workers(struct io_wq *wq, int *new_count) - { -- int i, node, prev = 0; -+ int prev[IO_WQ_ACCT_NR]; -+ bool first_node = true; -+ int i, node; - - BUILD_BUG_ON((int) IO_WQ_ACCT_BOUND != (int) IO_WQ_BOUND); - BUILD_BUG_ON((int) IO_WQ_ACCT_UNBOUND != (int) IO_WQ_UNBOUND); -@@ -1289,6 +1357,9 @@ int io_wq_max_workers(struct io_wq *wq, int *new_count) - new_count[i] = task_rlimit(current, RLIMIT_NPROC); - } - -+ for (i = 0; i < IO_WQ_ACCT_NR; i++) -+ prev[i] = 0; -+ - rcu_read_lock(); - for_each_node(node) { - struct io_wqe *wqe = wq->wqes[node]; -@@ -1297,14 +1368,19 @@ int io_wq_max_workers(struct io_wq *wq, int *new_count) - raw_spin_lock(&wqe->lock); - for (i = 0; i < IO_WQ_ACCT_NR; i++) { - acct = &wqe->acct[i]; -- prev = max_t(int, acct->max_workers, prev); -+ if (first_node) -+ prev[i] = max_t(int, acct->max_workers, prev[i]); - if (new_count[i]) - acct->max_workers = new_count[i]; -- new_count[i] = prev; - } - raw_spin_unlock(&wqe->lock); -+ first_node = false; - } - rcu_read_unlock(); -+ -+ for (i = 0; i < IO_WQ_ACCT_NR; i++) -+ new_count[i] = prev[i]; -+ - return 0; - } - -diff --git a/fs/io_uring.c b/fs/io_uring.c -index bc18af5e0a934..b8ae64df90e31 100644 ---- a/fs/io_uring.c -+++ b/fs/io_uring.c -@@ -486,8 +486,6 @@ struct io_poll_iocb { - struct file *file; - struct wait_queue_head *head; - __poll_t events; -- bool done; -- bool canceled; - struct wait_queue_entry wait; - }; - -@@ -623,10 +621,10 @@ struct io_epoll { - - struct io_splice { - struct file *file_out; -- struct file *file_in; - loff_t off_out; - loff_t off_in; - u64 len; -+ int splice_fd_in; - unsigned int flags; - }; - -@@ -885,6 +883,9 @@ struct io_kiocb { - - /* store used ubuf, so we can prevent reloading */ - struct io_mapped_ubuf *imu; -+ /* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */ -+ struct io_buffer *kbuf; -+ atomic_t poll_refs; - }; - - struct io_tctx_node { -@@ -1079,8 +1080,8 @@ static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx, - bool cancel_all); - static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd); - --static bool io_cqring_fill_event(struct io_ring_ctx *ctx, u64 user_data, -- long res, unsigned int cflags); -+static void io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags); -+ - static void io_put_req(struct io_kiocb *req); - static void io_put_req_deferred(struct io_kiocb *req); - static void io_dismantle_req(struct io_kiocb *req); -@@ -1154,12 +1155,6 @@ static inline bool req_ref_put_and_test(struct io_kiocb *req) - return atomic_dec_and_test(&req->refs); - } - --static inline void req_ref_put(struct io_kiocb *req) --{ -- WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT)); -- WARN_ON_ONCE(req_ref_put_and_test(req)); +- kfree(worker); +- } -} - - static inline void req_ref_get(struct io_kiocb *req) - { - WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT)); -@@ -1204,6 +1199,7 @@ static void io_refs_resurrect(struct percpu_ref *ref, struct completion *compl) - - static bool io_match_task(struct io_kiocb *head, struct task_struct *task, - bool cancel_all) -+ __must_hold(&req->ctx->timeout_lock) - { - struct io_kiocb *req; - -@@ -1219,6 +1215,44 @@ static bool io_match_task(struct io_kiocb *head, struct task_struct *task, - return false; - } - -+static bool io_match_linked(struct io_kiocb *head) -+{ -+ struct io_kiocb *req; -+ -+ io_for_each_link(req, head) { -+ if (req->flags & REQ_F_INFLIGHT) -+ return true; -+ } -+ return false; -+} -+ -+/* -+ * As io_match_task() but protected against racing with linked timeouts. -+ * User must not hold timeout_lock. -+ */ -+static bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task, -+ bool cancel_all) -+{ -+ bool matched; -+ -+ if (task && head->task != task) -+ return false; -+ if (cancel_all) -+ return true; -+ -+ if (head->flags & REQ_F_LINK_TIMEOUT) { -+ struct io_ring_ctx *ctx = head->ctx; -+ -+ /* protect against races with linked timeouts */ -+ spin_lock_irq(&ctx->timeout_lock); -+ matched = io_match_linked(head); -+ spin_unlock_irq(&ctx->timeout_lock); -+ } else { -+ matched = io_match_linked(head); -+ } -+ return matched; -+} -+ - static inline void req_set_fail(struct io_kiocb *req) - { - req->flags |= REQ_F_FAIL; -@@ -1366,7 +1400,7 @@ static void io_req_track_inflight(struct io_kiocb *req) - { - if (!(req->flags & REQ_F_INFLIGHT)) { - req->flags |= REQ_F_INFLIGHT; -- atomic_inc(¤t->io_uring->inflight_tracked); -+ atomic_inc(&req->task->io_uring->inflight_tracked); - } - } - -@@ -1413,14 +1447,6 @@ static void io_prep_async_work(struct io_kiocb *req) - if (def->unbound_nonreg_file) - req->work.flags |= IO_WQ_WORK_UNBOUND; - } +-static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) +-{ +- struct io_wqe_acct *acct = &wqe->acct[index]; +- struct io_worker *worker; +- struct task_struct *tsk; - -- switch (req->opcode) { -- case IORING_OP_SPLICE: -- case IORING_OP_TEE: -- if (!S_ISREG(file_inode(req->splice.file_in)->i_mode)) -- req->work.flags |= IO_WQ_WORK_UNBOUND; -- break; +- __set_current_state(TASK_RUNNING); +- +- worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, wqe->node); +- if (!worker) { +-fail: +- atomic_dec(&acct->nr_running); +- raw_spin_lock(&wqe->lock); +- acct->nr_workers--; +- raw_spin_unlock(&wqe->lock); +- io_worker_ref_put(wq); +- return false; - } - } - - static void io_prep_async_link(struct io_kiocb *req) -@@ -1430,10 +1456,10 @@ static void io_prep_async_link(struct io_kiocb *req) - if (req->flags & REQ_F_LINK_TIMEOUT) { - struct io_ring_ctx *ctx = req->ctx; - -- spin_lock(&ctx->completion_lock); -+ spin_lock_irq(&ctx->timeout_lock); - io_for_each_link(cur, req) - io_prep_async_work(cur); -- spin_unlock(&ctx->completion_lock); -+ spin_unlock_irq(&ctx->timeout_lock); - } else { - io_for_each_link(cur, req) - io_prep_async_work(cur); -@@ -1484,7 +1510,7 @@ static void io_kill_timeout(struct io_kiocb *req, int status) - atomic_set(&req->ctx->cq_timeouts, - atomic_read(&req->ctx->cq_timeouts) + 1); - list_del_init(&req->timeout.list); -- io_cqring_fill_event(req->ctx, req->user_data, status, 0); -+ io_fill_cqe_req(req, status, 0); - io_put_req_deferred(req); - } - } -@@ -1507,12 +1533,11 @@ static void io_flush_timeouts(struct io_ring_ctx *ctx) - __must_hold(&ctx->completion_lock) - { - u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); -+ struct io_kiocb *req, *tmp; - - spin_lock_irq(&ctx->timeout_lock); -- while (!list_empty(&ctx->timeout_list)) { -+ list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) { - u32 events_needed, events_got; -- struct io_kiocb *req = list_first_entry(&ctx->timeout_list, -- struct io_kiocb, timeout.list); - - if (io_is_timeout_noseq(req)) - break; -@@ -1529,7 +1554,6 @@ static void io_flush_timeouts(struct io_ring_ctx *ctx) - if (events_got < events_needed) - break; - -- list_del_init(&req->timeout.list); - io_kill_timeout(req, 0); - } - ctx->cq_last_tm_flush = seq; -@@ -1721,8 +1745,20 @@ static inline void io_get_task_refs(int nr) - io_task_refs_refill(tctx); - } - -+static __cold void io_uring_drop_tctx_refs(struct task_struct *task) -+{ -+ struct io_uring_task *tctx = task->io_uring; -+ unsigned int refs = tctx->cached_refs; -+ -+ if (refs) { -+ tctx->cached_refs = 0; -+ percpu_counter_sub(&tctx->inflight, refs); -+ put_task_struct_many(task, refs); -+ } -+} -+ - static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data, -- long res, unsigned int cflags) -+ s32 res, u32 cflags) - { - struct io_overflow_cqe *ocqe; - -@@ -1749,8 +1785,8 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data, - return true; - } - --static inline bool __io_cqring_fill_event(struct io_ring_ctx *ctx, u64 user_data, -- long res, unsigned int cflags) -+static inline bool __io_fill_cqe(struct io_ring_ctx *ctx, u64 user_data, -+ s32 res, u32 cflags) - { - struct io_uring_cqe *cqe; - -@@ -1771,20 +1807,25 @@ static inline bool __io_cqring_fill_event(struct io_ring_ctx *ctx, u64 user_data - return io_cqring_event_overflow(ctx, user_data, res, cflags); - } - --/* not as hot to bloat with inlining */ --static noinline bool io_cqring_fill_event(struct io_ring_ctx *ctx, u64 user_data, -- long res, unsigned int cflags) -+static noinline void io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags) - { -- return __io_cqring_fill_event(ctx, user_data, res, cflags); -+ __io_fill_cqe(req->ctx, req->user_data, res, cflags); - } - --static void io_req_complete_post(struct io_kiocb *req, long res, -- unsigned int cflags) -+static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, -+ s32 res, u32 cflags) -+{ -+ ctx->cq_extra++; -+ return __io_fill_cqe(ctx, user_data, res, cflags); -+} -+ -+static void io_req_complete_post(struct io_kiocb *req, s32 res, -+ u32 cflags) - { - struct io_ring_ctx *ctx = req->ctx; - - spin_lock(&ctx->completion_lock); -- __io_cqring_fill_event(ctx, req->user_data, res, cflags); -+ __io_fill_cqe(ctx, req->user_data, res, cflags); - /* - * If we're the last reference to this request, add to our locked - * free_list cache. -@@ -1820,8 +1861,8 @@ static inline bool io_req_needs_clean(struct io_kiocb *req) - return req->flags & IO_REQ_CLEAN_FLAGS; - } - --static void io_req_complete_state(struct io_kiocb *req, long res, -- unsigned int cflags) -+static inline void io_req_complete_state(struct io_kiocb *req, s32 res, -+ u32 cflags) - { - if (io_req_needs_clean(req)) - io_clean_op(req); -@@ -1831,7 +1872,7 @@ static void io_req_complete_state(struct io_kiocb *req, long res, - } - - static inline void __io_req_complete(struct io_kiocb *req, unsigned issue_flags, -- long res, unsigned cflags) -+ s32 res, u32 cflags) - { - if (issue_flags & IO_URING_F_COMPLETE_DEFER) - io_req_complete_state(req, res, cflags); -@@ -1839,12 +1880,12 @@ static inline void __io_req_complete(struct io_kiocb *req, unsigned issue_flags, - io_req_complete_post(req, res, cflags); - } - --static inline void io_req_complete(struct io_kiocb *req, long res) -+static inline void io_req_complete(struct io_kiocb *req, s32 res) - { - __io_req_complete(req, 0, res, 0); - } - --static void io_req_complete_failed(struct io_kiocb *req, long res) -+static void io_req_complete_failed(struct io_kiocb *req, s32 res) - { - req_set_fail(req); - io_req_complete_post(req, res, 0); -@@ -2010,8 +2051,7 @@ static bool io_kill_linked_timeout(struct io_kiocb *req) - link->timeout.head = NULL; - if (hrtimer_try_to_cancel(&io->timer) != -1) { - list_del(&link->timeout.list); -- io_cqring_fill_event(link->ctx, link->user_data, -- -ECANCELED, 0); -+ io_fill_cqe_req(link, -ECANCELED, 0); - io_put_req_deferred(link); - return true; - } -@@ -2035,7 +2075,7 @@ static void io_fail_links(struct io_kiocb *req) - link->link = NULL; - - trace_io_uring_fail_link(req, link); -- io_cqring_fill_event(link->ctx, link->user_data, res, 0); -+ io_fill_cqe_req(link, res, 0); - io_put_req_deferred(link); - link = nxt; - } -@@ -2052,8 +2092,7 @@ static bool io_disarm_next(struct io_kiocb *req) - req->flags &= ~REQ_F_ARM_LTIMEOUT; - if (link && link->opcode == IORING_OP_LINK_TIMEOUT) { - io_remove_next_linked(req); -- io_cqring_fill_event(link->ctx, link->user_data, -- -ECANCELED, 0); -+ io_fill_cqe_req(link, -ECANCELED, 0); - io_put_req_deferred(link); - posted = true; - } -@@ -2161,6 +2200,10 @@ static void tctx_task_work(struct callback_head *cb) - } - - ctx_flush_and_put(ctx, &locked); -+ -+ /* relaxed read is enough as only the task itself sets ->in_idle */ -+ if (unlikely(atomic_read(&tctx->in_idle))) -+ io_uring_drop_tctx_refs(current); - } - - static void io_req_task_work_add(struct io_kiocb *req) -@@ -2325,8 +2368,8 @@ static void io_submit_flush_completions(struct io_ring_ctx *ctx) - for (i = 0; i < nr; i++) { - struct io_kiocb *req = state->compl_reqs[i]; - -- __io_cqring_fill_event(ctx, req->user_data, req->result, -- req->compl.cflags); -+ __io_fill_cqe(ctx, req->user_data, req->result, -+ req->compl.cflags); - } - io_commit_cqring(ctx); - spin_unlock(&ctx->completion_lock); -@@ -2437,8 +2480,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, - req = list_first_entry(done, struct io_kiocb, inflight_entry); - list_del(&req->inflight_entry); - -- __io_cqring_fill_event(ctx, req->user_data, req->result, -- io_put_rw_kbuf(req)); -+ io_fill_cqe_req(req, req->result, io_put_rw_kbuf(req)); - (*nr_events)++; - - if (req_ref_put_and_test(req)) -@@ -2641,8 +2683,12 @@ static bool io_rw_should_reissue(struct io_kiocb *req) - - static bool __io_complete_rw_common(struct io_kiocb *req, long res) - { -- if (req->rw.kiocb.ki_flags & IOCB_WRITE) -+ if (req->rw.kiocb.ki_flags & IOCB_WRITE) { - kiocb_end_write(req); -+ fsnotify_modify(req->file); -+ } else { -+ fsnotify_access(req->file); -+ } - if (res != req->result) { - if ((res == -EAGAIN || res == -EOPNOTSUPP) && - io_rw_should_reissue(req)) { -@@ -2655,10 +2701,24 @@ static bool __io_complete_rw_common(struct io_kiocb *req, long res) - return false; - } - -+static inline int io_fixup_rw_res(struct io_kiocb *req, unsigned res) -+{ -+ struct io_async_rw *io = req->async_data; -+ -+ /* add previously done IO, if any */ -+ if (io && io->bytes_done > 0) { -+ if (res < 0) -+ res = io->bytes_done; -+ else -+ res += io->bytes_done; -+ } -+ return res; -+} -+ - static void io_req_task_complete(struct io_kiocb *req, bool *locked) - { - unsigned int cflags = io_put_rw_kbuf(req); -- long res = req->result; -+ int res = req->result; - - if (*locked) { - struct io_ring_ctx *ctx = req->ctx; -@@ -2678,7 +2738,7 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2, - { - if (__io_complete_rw_common(req, res)) - return; -- __io_req_complete(req, issue_flags, req->result, io_put_rw_kbuf(req)); -+ __io_req_complete(req, issue_flags, io_fixup_rw_res(req, res), io_put_rw_kbuf(req)); - } - - static void io_complete_rw(struct kiocb *kiocb, long res, long res2) -@@ -2687,7 +2747,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2) - - if (__io_complete_rw_common(req, res)) - return; -- req->result = res; -+ req->result = io_fixup_rw_res(req, res); - req->io_task_work.func = io_req_task_complete; - io_req_task_work_add(req); - } -@@ -2840,9 +2900,13 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, - req->flags |= REQ_F_ISREG; - - kiocb->ki_pos = READ_ONCE(sqe->off); -- if (kiocb->ki_pos == -1 && !(file->f_mode & FMODE_STREAM)) { -- req->flags |= REQ_F_CUR_POS; -- kiocb->ki_pos = file->f_pos; -+ if (kiocb->ki_pos == -1) { -+ if (!(file->f_mode & FMODE_STREAM)) { -+ req->flags |= REQ_F_CUR_POS; -+ kiocb->ki_pos = file->f_pos; -+ } else { -+ kiocb->ki_pos = 0; -+ } - } - kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp)); - kiocb->ki_flags = iocb_flags(kiocb->ki_filp); -@@ -2883,15 +2947,24 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, - kiocb->ki_complete = io_complete_rw; - } - -+ /* used for fixed read/write too - just read unconditionally */ -+ req->buf_index = READ_ONCE(sqe->buf_index); -+ req->imu = NULL; -+ - if (req->opcode == IORING_OP_READ_FIXED || - req->opcode == IORING_OP_WRITE_FIXED) { -- req->imu = NULL; -+ struct io_ring_ctx *ctx = req->ctx; -+ u16 index; -+ -+ if (unlikely(req->buf_index >= ctx->nr_user_bufs)) -+ return -EFAULT; -+ index = array_index_nospec(req->buf_index, ctx->nr_user_bufs); -+ req->imu = ctx->user_bufs[index]; - io_req_set_rsrc_node(req); - } - - req->rw.addr = READ_ONCE(sqe->addr); - req->rw.len = READ_ONCE(sqe->len); -- req->buf_index = READ_ONCE(sqe->buf_index); - return 0; - } - -@@ -2920,15 +2993,6 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret, - unsigned int issue_flags) - { - struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); -- struct io_async_rw *io = req->async_data; - -- /* add previously done IO, if any */ -- if (io && io->bytes_done > 0) { -- if (ret < 0) -- ret = io->bytes_done; -- else -- ret += io->bytes_done; +- refcount_set(&worker->ref, 1); +- worker->wqe = wqe; +- spin_lock_init(&worker->lock); +- init_completion(&worker->ref_done); +- +- if (index == IO_WQ_ACCT_BOUND) +- worker->flags |= IO_WORKER_F_BOUND; +- +- tsk = create_io_thread(io_wqe_worker, worker, wqe->node); +- if (!IS_ERR(tsk)) { +- io_init_new_worker(wqe, worker, tsk); +- } else if (!io_should_retry_thread(PTR_ERR(tsk))) { +- kfree(worker); +- goto fail; +- } else { +- INIT_WORK(&worker->work, io_workqueue_create); +- schedule_work(&worker->work); - } - - if (req->flags & REQ_F_CUR_POS) - req->file->f_pos = kiocb->ki_pos; -@@ -2945,6 +3009,7 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret, - unsigned int cflags = io_put_rw_kbuf(req); - struct io_ring_ctx *ctx = req->ctx; - -+ ret = io_fixup_rw_res(req, ret); - req_set_fail(req); - if (!(issue_flags & IO_URING_F_NONBLOCK)) { - mutex_lock(&ctx->uring_lock); -@@ -3017,18 +3082,9 @@ static int __io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter - - static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter) - { -- struct io_ring_ctx *ctx = req->ctx; -- struct io_mapped_ubuf *imu = req->imu; -- u16 index, buf_index = req->buf_index; - -- if (likely(!imu)) { -- if (unlikely(buf_index >= ctx->nr_user_bufs)) -- return -EFAULT; -- index = array_index_nospec(buf_index, ctx->nr_user_bufs); -- imu = READ_ONCE(ctx->user_bufs[index]); -- req->imu = imu; +- return true; +-} +- +-/* +- * Iterate the passed in list and call the specific function for each +- * worker that isn't exiting +- */ +-static bool io_wq_for_each_worker(struct io_wqe *wqe, +- bool (*func)(struct io_worker *, void *), +- void *data) +-{ +- struct io_worker *worker; +- bool ret = false; +- +- list_for_each_entry_rcu(worker, &wqe->all_list, all_list) { +- if (io_worker_get(worker)) { +- /* no task if node is/was offline */ +- if (worker->task) +- ret = func(worker, data); +- io_worker_release(worker); +- if (ret) +- break; +- } - } -- return __io_import_fixed(req, rw, iter, imu); -+ if (WARN_ON_ONCE(!req->imu)) -+ return -EFAULT; -+ return __io_import_fixed(req, rw, iter, req->imu); - } - - static void io_ring_submit_unlock(struct io_ring_ctx *ctx, bool needs_lock) -@@ -3260,13 +3316,15 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter) - ret = nr; - break; - } -+ ret += nr; - if (!iov_iter_is_bvec(iter)) { - iov_iter_advance(iter, nr); - } else { -- req->rw.len -= nr; - req->rw.addr += nr; -+ req->rw.len -= nr; -+ if (!req->rw.len) -+ break; - } -- ret += nr; - if (nr != iovec.iov_len) - break; - } -@@ -3548,6 +3606,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags) - return -EAGAIN; - } - -+ req->result = iov_iter_count(iter); - /* - * Now retry read with the IOCB_WAITQ parts set in the iocb. If - * we get -EIOCBQUEUED, then we'll get a notification when the -@@ -3665,7 +3724,12 @@ done: - copy_iov: - iov_iter_restore(iter, state); - ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false); -- return ret ?: -EAGAIN; -+ if (!ret) { -+ if (kiocb->ki_flags & IOCB_WRITE) -+ kiocb_end_write(req); -+ return -EAGAIN; -+ } -+ return ret; - } - out_free: - /* it's reportedly faster than delegating the null check to kfree() */ -@@ -3966,18 +4030,11 @@ static int __io_splice_prep(struct io_kiocb *req, - if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) - return -EINVAL; - -- sp->file_in = NULL; - sp->len = READ_ONCE(sqe->len); - sp->flags = READ_ONCE(sqe->splice_flags); - - if (unlikely(sp->flags & ~valid_flags)) - return -EINVAL; +- return ret; +-} - -- sp->file_in = io_file_get(req->ctx, req, READ_ONCE(sqe->splice_fd_in), -- (sp->flags & SPLICE_F_FD_IN_FIXED)); -- if (!sp->file_in) -- return -EBADF; -- req->flags |= REQ_F_NEED_CLEANUP; -+ sp->splice_fd_in = READ_ONCE(sqe->splice_fd_in); - return 0; - } - -@@ -3992,20 +4049,27 @@ static int io_tee_prep(struct io_kiocb *req, - static int io_tee(struct io_kiocb *req, unsigned int issue_flags) - { - struct io_splice *sp = &req->splice; -- struct file *in = sp->file_in; - struct file *out = sp->file_out; - unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED; -+ struct file *in; - long ret = 0; - - if (issue_flags & IO_URING_F_NONBLOCK) - return -EAGAIN; -+ -+ in = io_file_get(req->ctx, req, sp->splice_fd_in, -+ (sp->flags & SPLICE_F_FD_IN_FIXED)); -+ if (!in) { -+ ret = -EBADF; -+ goto done; -+ } -+ - if (sp->len) - ret = do_tee(in, out, sp->len, flags); - - if (!(sp->flags & SPLICE_F_FD_IN_FIXED)) - io_put_file(in); -- req->flags &= ~REQ_F_NEED_CLEANUP; +-static bool io_wq_worker_wake(struct io_worker *worker, void *data) +-{ +- set_notify_signal(worker->task); +- wake_up_process(worker->task); +- return false; +-} - -+done: - if (ret != sp->len) - req_set_fail(req); - io_req_complete(req, ret); -@@ -4024,15 +4088,22 @@ static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) - static int io_splice(struct io_kiocb *req, unsigned int issue_flags) - { - struct io_splice *sp = &req->splice; -- struct file *in = sp->file_in; - struct file *out = sp->file_out; - unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED; - loff_t *poff_in, *poff_out; -+ struct file *in; - long ret = 0; - - if (issue_flags & IO_URING_F_NONBLOCK) - return -EAGAIN; - -+ in = io_file_get(req->ctx, req, sp->splice_fd_in, -+ (sp->flags & SPLICE_F_FD_IN_FIXED)); -+ if (!in) { -+ ret = -EBADF; -+ goto done; -+ } -+ - poff_in = (sp->off_in == -1) ? NULL : &sp->off_in; - poff_out = (sp->off_out == -1) ? NULL : &sp->off_out; - -@@ -4041,8 +4112,7 @@ static int io_splice(struct io_kiocb *req, unsigned int issue_flags) - - if (!(sp->flags & SPLICE_F_FD_IN_FIXED)) - io_put_file(in); -- req->flags &= ~REQ_F_NEED_CLEANUP; +-static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe) +-{ +- struct io_wq *wq = wqe->wq; - -+done: - if (ret != sp->len) - req_set_fail(req); - io_req_complete(req, ret); -@@ -4067,9 +4137,6 @@ static int io_fsync_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) - { - struct io_ring_ctx *ctx = req->ctx; - -- if (!req->file) -- return -EBADF; +- do { +- work->flags |= IO_WQ_WORK_CANCEL; +- wq->do_work(work); +- work = wq->free_work(work); +- } while (work); +-} - - if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) - return -EINVAL; - if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index || -@@ -4129,6 +4196,8 @@ static int io_fallocate(struct io_kiocb *req, unsigned int issue_flags) - req->sync.len); - if (ret < 0) - req_set_fail(req); -+ else -+ fsnotify_modify(req->file); - io_req_complete(req, ret); - return 0; - } -@@ -4304,6 +4373,7 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf, - kfree(nxt); - if (++i == nbufs) - return i; -+ cond_resched(); - } - i++; - kfree(buf); -@@ -4394,6 +4464,7 @@ static int io_add_buffers(struct io_provide_buf *pbuf, struct io_buffer **head) - } else { - list_add_tail(&buf->list, &(*head)->list); - } -+ cond_resched(); - } - - return i ? i : -ENOMEM; -@@ -4415,7 +4486,8 @@ static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags) - - ret = io_add_buffers(p, &head); - if (ret >= 0 && !list) { -- ret = xa_insert(&ctx->io_buffers, p->bgid, head, GFP_KERNEL); -+ ret = xa_insert(&ctx->io_buffers, p->bgid, head, -+ GFP_KERNEL_ACCOUNT); - if (ret < 0) - __io_remove_buffers(ctx, head, p->bgid, -1U); - } -@@ -4697,7 +4769,8 @@ static int io_setup_async_msg(struct io_kiocb *req, - async_msg = req->async_data; - req->flags |= REQ_F_NEED_CLEANUP; - memcpy(async_msg, kmsg, sizeof(*kmsg)); -- async_msg->msg.msg_name = &async_msg->addr; -+ if (async_msg->msg.msg_name) -+ async_msg->msg.msg_name = &async_msg->addr; - /* if were using fast_iov, set it to the new one */ - if (!async_msg->free_iov) - async_msg->msg.msg_iter.iov = async_msg->fast_iov; -@@ -4730,6 +4803,10 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) - - if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) - return -EINVAL; -+ if (unlikely(sqe->addr2 || sqe->file_index)) -+ return -EINVAL; -+ if (unlikely(sqe->addr2 || sqe->file_index || sqe->ioprio)) -+ return -EINVAL; - - sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); - sr->len = READ_ONCE(sqe->len); -@@ -4951,6 +5028,10 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) - - if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) - return -EINVAL; -+ if (unlikely(sqe->addr2 || sqe->file_index)) -+ return -EINVAL; -+ if (unlikely(sqe->addr2 || sqe->file_index || sqe->ioprio)) -+ return -EINVAL; - - sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); - sr->len = READ_ONCE(sqe->len); -@@ -5093,8 +5174,7 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) - accept->nofile = rlimit(RLIMIT_NOFILE); - - accept->file_slot = READ_ONCE(sqe->file_index); -- if (accept->file_slot && ((req->open.how.flags & O_CLOEXEC) || -- (accept->flags & SOCK_CLOEXEC))) -+ if (accept->file_slot && (accept->flags & SOCK_CLOEXEC)) - return -EINVAL; - if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) - return -EINVAL; -@@ -5241,52 +5321,23 @@ struct io_poll_table { - int error; - }; - --static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll, -- __poll_t mask, io_req_tw_func_t func) +-static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work) -{ -- /* for instances that support it check for an event match first: */ -- if (mask && !(mask & poll->events)) -- return 0; +- struct io_wqe_acct *acct = io_work_get_acct(wqe, work); +- unsigned int hash; +- struct io_wq_work *tail; - -- trace_io_uring_task_add(req->ctx, req->opcode, req->user_data, mask); +- if (!io_wq_is_hashed(work)) { +-append: +- wq_list_add_tail(&work->list, &acct->work_list); +- return; +- } - -- list_del_init(&poll->wait.entry); +- hash = io_get_work_hash(work); +- tail = wqe->hash_tail[hash]; +- wqe->hash_tail[hash] = work; +- if (!tail) +- goto append; +- +- wq_list_add_after(&work->list, &tail->list, &acct->work_list); +-} +- +-static bool io_wq_work_match_item(struct io_wq_work *work, void *data) +-{ +- return work == data; +-} +- +-static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) +-{ +- struct io_wqe_acct *acct = io_work_get_acct(wqe, work); +- unsigned work_flags = work->flags; +- bool do_create; - -- req->result = mask; -- req->io_task_work.func = func; -+#define IO_POLL_CANCEL_FLAG BIT(31) -+#define IO_POLL_REF_MASK GENMASK(30, 0) - - /* -- * If this fails, then the task is exiting. When a task exits, the -- * work gets canceled, so just cancel this request as well instead -- * of executing it. We can't safely execute it anyway, as we may not -- * have the needed state needed for it anyway. +- * If io-wq is exiting for this task, or if the request has explicitly +- * been marked as one that should not get executed, cancel it here. - */ -- io_req_task_work_add(req); -- return 1; -+/* -+ * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can -+ * bump it and acquire ownership. It's disallowed to modify requests while not -+ * owning it, that prevents from races for enqueueing task_work's and b/w -+ * arming poll and wakeups. -+ */ -+static inline bool io_poll_get_ownership(struct io_kiocb *req) -+{ -+ return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); - } - --static bool io_poll_rewait(struct io_kiocb *req, struct io_poll_iocb *poll) -- __acquires(&req->ctx->completion_lock) -+static void io_poll_mark_cancelled(struct io_kiocb *req) - { -- struct io_ring_ctx *ctx = req->ctx; +- if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state) || +- (work->flags & IO_WQ_WORK_CANCEL)) { +- io_run_cancel(work, wqe); +- return; +- } - -- /* req->task == current here, checking PF_EXITING is safe */ -- if (unlikely(req->task->flags & PF_EXITING)) -- WRITE_ONCE(poll->canceled, true); +- raw_spin_lock(&wqe->lock); +- io_wqe_insert_work(wqe, work); +- clear_bit(IO_ACCT_STALLED_BIT, &acct->flags); - -- if (!req->result && !READ_ONCE(poll->canceled)) { -- struct poll_table_struct pt = { ._key = poll->events }; +- rcu_read_lock(); +- do_create = !io_wqe_activate_free_worker(wqe, acct); +- rcu_read_unlock(); - -- req->result = vfs_poll(req->file, &pt) & poll->events; +- raw_spin_unlock(&wqe->lock); +- +- if (do_create && ((work_flags & IO_WQ_WORK_CONCURRENT) || +- !atomic_read(&acct->nr_running))) { +- bool did_create; +- +- did_create = io_wqe_create_worker(wqe, acct); +- if (likely(did_create)) +- return; +- +- raw_spin_lock(&wqe->lock); +- /* fatal condition, failed to create the first worker */ +- if (!acct->nr_workers) { +- struct io_cb_cancel_data match = { +- .fn = io_wq_work_match_item, +- .data = work, +- .cancel_all = false, +- }; +- +- if (io_acct_cancel_pending_work(wqe, acct, &match)) +- raw_spin_lock(&wqe->lock); +- } +- raw_spin_unlock(&wqe->lock); - } +-} - -- spin_lock(&ctx->completion_lock); -- if (!req->result && !READ_ONCE(poll->canceled)) { -- add_wait_queue(poll->head, &poll->wait); -- return true; +-void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work) +-{ +- struct io_wqe *wqe = wq->wqes[numa_node_id()]; +- +- io_wqe_enqueue(wqe, work); +-} +- +-/* +- * Work items that hash to the same value will not be done in parallel. +- * Used to limit concurrent writes, generally hashed by inode. +- */ +-void io_wq_hash_work(struct io_wq_work *work, void *val) +-{ +- unsigned int bit; +- +- bit = hash_ptr(val, IO_WQ_HASH_ORDER); +- work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT)); +-} +- +-static bool io_wq_worker_cancel(struct io_worker *worker, void *data) +-{ +- struct io_cb_cancel_data *match = data; +- +- /* +- * Hold the lock to avoid ->cur_work going out of scope, caller +- * may dereference the passed in work. +- */ +- spin_lock(&worker->lock); +- if (worker->cur_work && +- match->fn(worker->cur_work, match->data)) { +- set_notify_signal(worker->task); +- match->nr_running++; - } +- spin_unlock(&worker->lock); - -- return false; -+ atomic_or(IO_POLL_CANCEL_FLAG, &req->poll_refs); - } - - static struct io_poll_iocb *io_poll_get_double(struct io_kiocb *req) -@@ -5304,141 +5355,231 @@ static struct io_poll_iocb *io_poll_get_single(struct io_kiocb *req) - return &req->apoll->poll; - } - --static void io_poll_remove_double(struct io_kiocb *req) -- __must_hold(&req->ctx->completion_lock) -+static void io_poll_req_insert(struct io_kiocb *req) - { -- struct io_poll_iocb *poll = io_poll_get_double(req); -+ struct io_ring_ctx *ctx = req->ctx; -+ struct hlist_head *list; - -- lockdep_assert_held(&req->ctx->completion_lock); -+ list = &ctx->cancel_hash[hash_long(req->user_data, ctx->cancel_hash_bits)]; -+ hlist_add_head(&req->hash_node, list); -+} - -- if (poll && poll->head) { -- struct wait_queue_head *head = poll->head; -+static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events, -+ wait_queue_func_t wake_func) -+{ -+ poll->head = NULL; -+#define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP) -+ /* mask in events that we always want/need */ -+ poll->events = events | IO_POLL_UNMASK; -+ INIT_LIST_HEAD(&poll->wait.entry); -+ init_waitqueue_func_entry(&poll->wait, wake_func); -+} -+ -+static inline void io_poll_remove_entry(struct io_poll_iocb *poll) -+{ -+ struct wait_queue_head *head = smp_load_acquire(&poll->head); - -+ if (head) { - spin_lock_irq(&head->lock); - list_del_init(&poll->wait.entry); -- if (poll->wait.private) -- req_ref_put(req); - poll->head = NULL; - spin_unlock_irq(&head->lock); - } - } - --static bool __io_poll_complete(struct io_kiocb *req, __poll_t mask) -- __must_hold(&req->ctx->completion_lock) -+static void io_poll_remove_entries(struct io_kiocb *req) -+{ -+ struct io_poll_iocb *poll = io_poll_get_single(req); -+ struct io_poll_iocb *poll_double = io_poll_get_double(req); -+ -+ /* -+ * While we hold the waitqueue lock and the waitqueue is nonempty, -+ * wake_up_pollfree() will wait for us. However, taking the waitqueue -+ * lock in the first place can race with the waitqueue being freed. -+ * -+ * We solve this as eventpoll does: by taking advantage of the fact that -+ * all users of wake_up_pollfree() will RCU-delay the actual free. If -+ * we enter rcu_read_lock() and see that the pointer to the queue is -+ * non-NULL, we can then lock it without the memory being freed out from -+ * under us. -+ * -+ * Keep holding rcu_read_lock() as long as we hold the queue lock, in -+ * case the caller deletes the entry from the queue, leaving it empty. -+ * In that case, only RCU prevents the queue memory from being freed. -+ */ -+ rcu_read_lock(); -+ io_poll_remove_entry(poll); -+ if (poll_double) -+ io_poll_remove_entry(poll_double); -+ rcu_read_unlock(); -+} -+ -+/* -+ * All poll tw should go through this. Checks for poll events, manages -+ * references, does rewait, etc. -+ * -+ * Returns a negative error on failure. >0 when no action require, which is -+ * either spurious wakeup or multishot CQE is served. 0 when it's done with -+ * the request, then the mask is stored in req->result. -+ */ -+static int io_poll_check_events(struct io_kiocb *req) - { - struct io_ring_ctx *ctx = req->ctx; -- unsigned flags = IORING_CQE_F_MORE; -- int error; -+ struct io_poll_iocb *poll = io_poll_get_single(req); -+ int v; -+ -+ /* req->task == current here, checking PF_EXITING is safe */ -+ if (unlikely(req->task->flags & PF_EXITING)) -+ io_poll_mark_cancelled(req); -+ -+ do { -+ v = atomic_read(&req->poll_refs); - -- if (READ_ONCE(req->poll.canceled)) { -- error = -ECANCELED; -- req->poll.events |= EPOLLONESHOT; -+ /* tw handler should be the owner, and so have some references */ -+ if (WARN_ON_ONCE(!(v & IO_POLL_REF_MASK))) -+ return 0; -+ if (v & IO_POLL_CANCEL_FLAG) -+ return -ECANCELED; -+ -+ if (!req->result) { -+ struct poll_table_struct pt = { ._key = poll->events }; -+ -+ req->result = vfs_poll(req->file, &pt) & poll->events; -+ } -+ -+ /* multishot, just fill an CQE and proceed */ -+ if (req->result && !(poll->events & EPOLLONESHOT)) { -+ __poll_t mask = mangle_poll(req->result & poll->events); -+ bool filled; -+ -+ spin_lock(&ctx->completion_lock); -+ filled = io_fill_cqe_aux(ctx, req->user_data, mask, -+ IORING_CQE_F_MORE); -+ io_commit_cqring(ctx); -+ spin_unlock(&ctx->completion_lock); -+ if (unlikely(!filled)) -+ return -ECANCELED; -+ io_cqring_ev_posted(ctx); -+ } else if (req->result) { -+ return 0; -+ } -+ -+ /* -+ * Release all references, retry if someone tried to restart -+ * task_work while we were executing it. -+ */ -+ } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs)); -+ -+ return 1; -+} -+ -+static void io_poll_task_func(struct io_kiocb *req, bool *locked) -+{ -+ struct io_ring_ctx *ctx = req->ctx; -+ int ret; -+ -+ ret = io_poll_check_events(req); -+ if (ret > 0) -+ return; -+ -+ if (!ret) { -+ req->result = mangle_poll(req->result & req->poll.events); - } else { -- error = mangle_poll(mask); +- return match->nr_running && !match->cancel_all; +-} +- +-static inline void io_wqe_remove_pending(struct io_wqe *wqe, +- struct io_wq_work *work, +- struct io_wq_work_node *prev) +-{ +- struct io_wqe_acct *acct = io_work_get_acct(wqe, work); +- unsigned int hash = io_get_work_hash(work); +- struct io_wq_work *prev_work = NULL; +- +- if (io_wq_is_hashed(work) && work == wqe->hash_tail[hash]) { +- if (prev) +- prev_work = container_of(prev, struct io_wq_work, list); +- if (prev_work && io_get_work_hash(prev_work) == hash) +- wqe->hash_tail[hash] = prev_work; +- else +- wqe->hash_tail[hash] = NULL; - } -- if (req->poll.events & EPOLLONESHOT) -- flags = 0; -- if (!io_cqring_fill_event(ctx, req->user_data, error, flags)) { -- req->poll.events |= EPOLLONESHOT; -- flags = 0; -+ req->result = ret; -+ req_set_fail(req); - } -- if (flags & IORING_CQE_F_MORE) -- ctx->cq_extra++; - -- return !(flags & IORING_CQE_F_MORE); -+ io_poll_remove_entries(req); -+ spin_lock(&ctx->completion_lock); -+ hash_del(&req->hash_node); -+ spin_unlock(&ctx->completion_lock); -+ io_req_complete_post(req, req->result, 0); - } - --static inline bool io_poll_complete(struct io_kiocb *req, __poll_t mask) -- __must_hold(&req->ctx->completion_lock) -+static void io_apoll_task_func(struct io_kiocb *req, bool *locked) - { -- bool done; -+ struct io_ring_ctx *ctx = req->ctx; -+ int ret; - -- done = __io_poll_complete(req, mask); -- io_commit_cqring(req->ctx); -- return done; -+ ret = io_poll_check_events(req); -+ if (ret > 0) -+ return; -+ -+ io_poll_remove_entries(req); -+ spin_lock(&ctx->completion_lock); -+ hash_del(&req->hash_node); -+ spin_unlock(&ctx->completion_lock); -+ -+ if (!ret) -+ io_req_task_submit(req, locked); -+ else -+ io_req_complete_failed(req, ret); - } - --static void io_poll_task_func(struct io_kiocb *req, bool *locked) -+static void __io_poll_execute(struct io_kiocb *req, int mask) - { -- struct io_ring_ctx *ctx = req->ctx; -- struct io_kiocb *nxt; -+ req->result = mask; -+ if (req->opcode == IORING_OP_POLL_ADD) -+ req->io_task_work.func = io_poll_task_func; -+ else -+ req->io_task_work.func = io_apoll_task_func; - -- if (io_poll_rewait(req, &req->poll)) { -- spin_unlock(&ctx->completion_lock); -- } else { -- bool done; -+ trace_io_uring_task_add(req->ctx, req->opcode, req->user_data, mask); -+ io_req_task_work_add(req); -+} - -- if (req->poll.done) { -- spin_unlock(&ctx->completion_lock); +- wq_list_del(&acct->work_list, &work->list, prev); +-} +- +-static bool io_acct_cancel_pending_work(struct io_wqe *wqe, +- struct io_wqe_acct *acct, +- struct io_cb_cancel_data *match) +- __releases(wqe->lock) +-{ +- struct io_wq_work_node *node, *prev; +- struct io_wq_work *work; +- +- wq_list_for_each(node, prev, &acct->work_list) { +- work = container_of(node, struct io_wq_work, list); +- if (!match->fn(work, match->data)) +- continue; +- io_wqe_remove_pending(wqe, work, prev); +- raw_spin_unlock(&wqe->lock); +- io_run_cancel(work, wqe); +- match->nr_pending++; +- /* not safe to continue after unlock */ +- return true; +- } +- +- return false; +-} +- +-static void io_wqe_cancel_pending_work(struct io_wqe *wqe, +- struct io_cb_cancel_data *match) +-{ +- int i; +-retry: +- raw_spin_lock(&wqe->lock); +- for (i = 0; i < IO_WQ_ACCT_NR; i++) { +- struct io_wqe_acct *acct = io_get_acct(wqe, i == 0); +- +- if (io_acct_cancel_pending_work(wqe, acct, match)) { +- if (match->cancel_all) +- goto retry; - return; - } -- done = __io_poll_complete(req, req->result); -- if (done) { -- io_poll_remove_double(req); -- hash_del(&req->hash_node); -- req->poll.done = true; -- } else { -- req->result = 0; -- add_wait_queue(req->poll.head, &req->poll.wait); -- } -- io_commit_cqring(ctx); -- spin_unlock(&ctx->completion_lock); -- io_cqring_ev_posted(ctx); -+static inline void io_poll_execute(struct io_kiocb *req, int res) -+{ -+ if (io_poll_get_ownership(req)) -+ __io_poll_execute(req, res); -+} - -- if (done) { -- nxt = io_put_req_find_next(req); -- if (nxt) -- io_req_task_submit(nxt, locked); -- } - } -+static void io_poll_cancel_req(struct io_kiocb *req) -+{ -+ io_poll_mark_cancelled(req); -+ /* kick tw, which should complete the request */ -+ io_poll_execute(req, 0); - } - --static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode, -- int sync, void *key) -+static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, -+ void *key) - { - struct io_kiocb *req = wait->private; -- struct io_poll_iocb *poll = io_poll_get_single(req); -+ struct io_poll_iocb *poll = container_of(wait, struct io_poll_iocb, -+ wait); - __poll_t mask = key_to_poll(key); -- unsigned long flags; - -- /* for instances that support it check for an event match first: */ -- if (mask && !(mask & poll->events)) -- return 0; -- if (!(poll->events & EPOLLONESHOT)) -- return poll->wait.func(&poll->wait, mode, sync, key); -+ if (unlikely(mask & POLLFREE)) { -+ io_poll_mark_cancelled(req); -+ /* we have to kick tw in case it's not already */ -+ io_poll_execute(req, 0); - +- raw_spin_unlock(&wqe->lock); +-} +- +-static void io_wqe_cancel_running_work(struct io_wqe *wqe, +- struct io_cb_cancel_data *match) +-{ +- rcu_read_lock(); +- io_wq_for_each_worker(wqe, io_wq_worker_cancel, match); +- rcu_read_unlock(); +-} +- +-enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, +- void *data, bool cancel_all) +-{ +- struct io_cb_cancel_data match = { +- .fn = cancel, +- .data = data, +- .cancel_all = cancel_all, +- }; +- int node; +- +- /* +- * First check pending list, if we're lucky we can just remove it +- * from there. CANCEL_OK means that the work is returned as-new, +- * no completion will be posted for it. +- */ +- for_each_node(node) { +- struct io_wqe *wqe = wq->wqes[node]; +- +- io_wqe_cancel_pending_work(wqe, &match); +- if (match.nr_pending && !match.cancel_all) +- return IO_WQ_CANCEL_OK; +- } +- +- /* +- * Now check if a free (going busy) or busy worker has the work +- * currently running. If we find it there, we'll return CANCEL_RUNNING +- * as an indication that we attempt to signal cancellation. The +- * completion will run normally in this case. +- */ +- for_each_node(node) { +- struct io_wqe *wqe = wq->wqes[node]; +- +- io_wqe_cancel_running_work(wqe, &match); +- if (match.nr_running && !match.cancel_all) +- return IO_WQ_CANCEL_RUNNING; +- } +- +- if (match.nr_running) +- return IO_WQ_CANCEL_RUNNING; +- if (match.nr_pending) +- return IO_WQ_CANCEL_OK; +- return IO_WQ_CANCEL_NOTFOUND; +-} +- +-static int io_wqe_hash_wake(struct wait_queue_entry *wait, unsigned mode, +- int sync, void *key) +-{ +- struct io_wqe *wqe = container_of(wait, struct io_wqe, wait); +- int i; +- - list_del_init(&wait->entry); -+ /* -+ * If the waitqueue is being freed early but someone is already -+ * holds ownership over it, we have to tear down the request as -+ * best we can. That means immediately removing the request from -+ * its waitqueue and preventing all further accesses to the -+ * waitqueue via the request. -+ */ -+ list_del_init(&poll->wait.entry); - -- if (poll->head) { -- bool done; - -- spin_lock_irqsave(&poll->head->lock, flags); -- done = list_empty(&poll->wait.entry); -- if (!done) -- list_del_init(&poll->wait.entry); -- /* make sure double remove sees this as being gone */ -- wait->private = NULL; -- spin_unlock_irqrestore(&poll->head->lock, flags); -- if (!done) { -- /* use wait func handler, so it matches the rq type */ -- poll->wait.func(&poll->wait, mode, sync, key); -- } -+ /* -+ * Careful: this *must* be the last step, since as soon -+ * as req->head is NULL'ed out, the request can be -+ * completed and freed, since aio_poll_complete_work() -+ * will no longer need to take the waitqueue lock. -+ */ -+ smp_store_release(&poll->head, NULL); -+ return 1; - } -- req_ref_put(req); +- rcu_read_lock(); +- for (i = 0; i < IO_WQ_ACCT_NR; i++) { +- struct io_wqe_acct *acct = &wqe->acct[i]; +- +- if (test_and_clear_bit(IO_ACCT_STALLED_BIT, &acct->flags)) +- io_wqe_activate_free_worker(wqe, acct); +- } +- rcu_read_unlock(); - return 1; -} - --static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events, -- wait_queue_func_t wake_func) +- +-struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) -{ -- poll->head = NULL; -- poll->done = false; -- poll->canceled = false; --#define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP) -- /* mask in events that we always want/need */ -- poll->events = events | IO_POLL_UNMASK; -- INIT_LIST_HEAD(&poll->wait.entry); -- init_waitqueue_func_entry(&poll->wait, wake_func); -+ /* for instances that support it check for an event match first */ -+ if (mask && !(mask & poll->events)) -+ return 0; -+ -+ if (io_poll_get_ownership(req)) -+ __io_poll_execute(req, mask); -+ return 1; - } - - static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, -@@ -5453,10 +5594,10 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, - * if this happens. - */ - if (unlikely(pt->nr_entries)) { -- struct io_poll_iocb *poll_one = poll; -+ struct io_poll_iocb *first = poll; - - /* double add on the same waitqueue head, ignore */ -- if (poll_one->head == head) -+ if (first->head == head) - return; - /* already have a 2nd entry, fail a third attempt */ - if (*poll_ptr) { -@@ -5465,25 +5606,19 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, - pt->error = -EINVAL; - return; - } -- /* -- * Can't handle multishot for double wait for now, turn it -- * into one-shot mode. -- */ -- if (!(poll_one->events & EPOLLONESHOT)) -- poll_one->events |= EPOLLONESHOT; -+ - poll = kmalloc(sizeof(*poll), GFP_ATOMIC); - if (!poll) { - pt->error = -ENOMEM; - return; - } -- io_init_poll_iocb(poll, poll_one->events, io_poll_double_wake); -- req_ref_get(req); -- poll->wait.private = req; -+ io_init_poll_iocb(poll, first->events, first->wait.func); - *poll_ptr = poll; - } - - pt->nr_entries++; - poll->head = head; -+ poll->wait.private = req; - - if (poll->events & EPOLLEXCLUSIVE) - add_wait_queue_exclusive(head, &poll->wait); -@@ -5491,70 +5626,24 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, - add_wait_queue(head, &poll->wait); - } - --static void io_async_queue_proc(struct file *file, struct wait_queue_head *head, -+static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head, - struct poll_table_struct *p) - { - struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); -- struct async_poll *apoll = pt->req->apoll; +- int ret, node, i; +- struct io_wq *wq; - -- __io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll); +- if (WARN_ON_ONCE(!data->free_work || !data->do_work)) +- return ERR_PTR(-EINVAL); +- if (WARN_ON_ONCE(!bounded)) +- return ERR_PTR(-EINVAL); +- +- wq = kzalloc(struct_size(wq, wqes, nr_node_ids), GFP_KERNEL); +- if (!wq) +- return ERR_PTR(-ENOMEM); +- ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node); +- if (ret) +- goto err_wq; +- +- refcount_inc(&data->hash->refs); +- wq->hash = data->hash; +- wq->free_work = data->free_work; +- wq->do_work = data->do_work; +- +- ret = -ENOMEM; +- for_each_node(node) { +- struct io_wqe *wqe; +- int alloc_node = node; +- +- if (!node_online(alloc_node)) +- alloc_node = NUMA_NO_NODE; +- wqe = kzalloc_node(sizeof(struct io_wqe), GFP_KERNEL, alloc_node); +- if (!wqe) +- goto err; +- if (!alloc_cpumask_var(&wqe->cpu_mask, GFP_KERNEL)) +- goto err; +- cpumask_copy(wqe->cpu_mask, cpumask_of_node(node)); +- wq->wqes[node] = wqe; +- wqe->node = alloc_node; +- wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded; +- wqe->acct[IO_WQ_ACCT_UNBOUND].max_workers = +- task_rlimit(current, RLIMIT_NPROC); +- INIT_LIST_HEAD(&wqe->wait.entry); +- wqe->wait.func = io_wqe_hash_wake; +- for (i = 0; i < IO_WQ_ACCT_NR; i++) { +- struct io_wqe_acct *acct = &wqe->acct[i]; +- +- acct->index = i; +- atomic_set(&acct->nr_running, 0); +- INIT_WQ_LIST(&acct->work_list); +- } +- wqe->wq = wq; +- raw_spin_lock_init(&wqe->lock); +- INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0); +- INIT_LIST_HEAD(&wqe->all_list); +- } +- +- wq->task = get_task_struct(data->task); +- atomic_set(&wq->worker_refs, 1); +- init_completion(&wq->worker_done); +- return wq; +-err: +- io_wq_put_hash(data->hash); +- cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node); +- for_each_node(node) { +- if (!wq->wqes[node]) +- continue; +- free_cpumask_var(wq->wqes[node]->cpu_mask); +- kfree(wq->wqes[node]); +- } +-err_wq: +- kfree(wq); +- return ERR_PTR(ret); -} - --static void io_async_task_func(struct io_kiocb *req, bool *locked) +-static bool io_task_work_match(struct callback_head *cb, void *data) -{ -- struct async_poll *apoll = req->apoll; -- struct io_ring_ctx *ctx = req->ctx; +- struct io_worker *worker; - -- trace_io_uring_task_run(req->ctx, req, req->opcode, req->user_data); +- if (cb->func != create_worker_cb && cb->func != create_worker_cont) +- return false; +- worker = container_of(cb, struct io_worker, create_work); +- return worker->wqe->wq == data; +-} - -- if (io_poll_rewait(req, &apoll->poll)) { -- spin_unlock(&ctx->completion_lock); +-void io_wq_exit_start(struct io_wq *wq) +-{ +- set_bit(IO_WQ_BIT_EXIT, &wq->state); +-} +- +-static void io_wq_exit_workers(struct io_wq *wq) +-{ +- struct callback_head *cb; +- int node; +- +- if (!wq->task) - return; +- +- while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) { +- struct io_worker *worker; +- struct io_wqe_acct *acct; +- +- worker = container_of(cb, struct io_worker, create_work); +- acct = io_wqe_get_acct(worker); +- atomic_dec(&acct->nr_running); +- raw_spin_lock(&worker->wqe->lock); +- acct->nr_workers--; +- raw_spin_unlock(&worker->wqe->lock); +- io_worker_ref_put(wq); +- clear_bit_unlock(0, &worker->create_state); +- io_worker_release(worker); - } - -- hash_del(&req->hash_node); -- io_poll_remove_double(req); -- apoll->poll.done = true; -- spin_unlock(&ctx->completion_lock); +- rcu_read_lock(); +- for_each_node(node) { +- struct io_wqe *wqe = wq->wqes[node]; - -- if (!READ_ONCE(apoll->poll.canceled)) -- io_req_task_submit(req, locked); +- io_wq_for_each_worker(wqe, io_wq_worker_wake, NULL); +- } +- rcu_read_unlock(); +- io_worker_ref_put(wq); +- wait_for_completion(&wq->worker_done); +- +- for_each_node(node) { +- spin_lock_irq(&wq->hash->wait.lock); +- list_del_init(&wq->wqes[node]->wait.entry); +- spin_unlock_irq(&wq->hash->wait.lock); +- } +- put_task_struct(wq->task); +- wq->task = NULL; +-} +- +-static void io_wq_destroy(struct io_wq *wq) +-{ +- int node; +- +- cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node); +- +- for_each_node(node) { +- struct io_wqe *wqe = wq->wqes[node]; +- struct io_cb_cancel_data match = { +- .fn = io_wq_work_match_all, +- .cancel_all = true, +- }; +- io_wqe_cancel_pending_work(wqe, &match); +- free_cpumask_var(wqe->cpu_mask); +- kfree(wqe); +- } +- io_wq_put_hash(wq->hash); +- kfree(wq); +-} +- +-void io_wq_put_and_exit(struct io_wq *wq) +-{ +- WARN_ON_ONCE(!test_bit(IO_WQ_BIT_EXIT, &wq->state)); +- +- io_wq_exit_workers(wq); +- io_wq_destroy(wq); +-} +- +-struct online_data { +- unsigned int cpu; +- bool online; +-}; +- +-static bool io_wq_worker_affinity(struct io_worker *worker, void *data) +-{ +- struct online_data *od = data; +- +- if (od->online) +- cpumask_set_cpu(od->cpu, worker->wqe->cpu_mask); - else -- io_req_complete_failed(req, -ECANCELED); +- cpumask_clear_cpu(od->cpu, worker->wqe->cpu_mask); +- return false; -} - --static int io_async_wake(struct wait_queue_entry *wait, unsigned mode, int sync, -- void *key) +-static int __io_wq_cpu_online(struct io_wq *wq, unsigned int cpu, bool online) -{ -- struct io_kiocb *req = wait->private; -- struct io_poll_iocb *poll = &req->apoll->poll; +- struct online_data od = { +- .cpu = cpu, +- .online = online +- }; +- int i; - -- trace_io_uring_poll_wake(req->ctx, req->opcode, req->user_data, -- key_to_poll(key)); +- rcu_read_lock(); +- for_each_node(i) +- io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, &od); +- rcu_read_unlock(); +- return 0; +-} - -- return __io_async_wake(req, poll, key_to_poll(key), io_async_task_func); +-static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node) +-{ +- struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node); +- +- return __io_wq_cpu_online(wq, cpu, true); -} - --static void io_poll_req_insert(struct io_kiocb *req) +-static int io_wq_cpu_offline(unsigned int cpu, struct hlist_node *node) -{ -- struct io_ring_ctx *ctx = req->ctx; -- struct hlist_head *list; - -- list = &ctx->cancel_hash[hash_long(req->user_data, ctx->cancel_hash_bits)]; -- hlist_add_head(&req->hash_node, list); -+ __io_queue_proc(&pt->req->poll, pt, head, -+ (struct io_poll_iocb **) &pt->req->async_data); - } - --static __poll_t __io_arm_poll_handler(struct io_kiocb *req, -- struct io_poll_iocb *poll, -- struct io_poll_table *ipt, __poll_t mask, -- wait_queue_func_t wake_func) -- __acquires(&ctx->completion_lock) -+static int __io_arm_poll_handler(struct io_kiocb *req, -+ struct io_poll_iocb *poll, -+ struct io_poll_table *ipt, __poll_t mask) - { - struct io_ring_ctx *ctx = req->ctx; -- bool cancel = false; -+ int v; - - INIT_HLIST_NODE(&req->hash_node); -- io_init_poll_iocb(poll, mask, wake_func); -+ io_init_poll_iocb(poll, mask, io_poll_wake); - poll->file = req->file; - poll->wait.private = req; - -@@ -5563,31 +5652,56 @@ static __poll_t __io_arm_poll_handler(struct io_kiocb *req, - ipt->error = 0; - ipt->nr_entries = 0; - -+ /* -+ * Take the ownership to delay any tw execution up until we're done -+ * with poll arming. see io_poll_get_ownership(). -+ */ -+ atomic_set(&req->poll_refs, 1); - mask = vfs_poll(req->file, &ipt->pt) & poll->events; -- if (unlikely(!ipt->nr_entries) && !ipt->error) -- ipt->error = -EINVAL; -+ -+ if (mask && (poll->events & EPOLLONESHOT)) { -+ io_poll_remove_entries(req); -+ /* no one else has access to the req, forget about the ref */ -+ return mask; -+ } -+ if (!mask && unlikely(ipt->error || !ipt->nr_entries)) { -+ io_poll_remove_entries(req); -+ if (!ipt->error) -+ ipt->error = -EINVAL; -+ return 0; -+ } - - spin_lock(&ctx->completion_lock); -- if (ipt->error || (mask && (poll->events & EPOLLONESHOT))) -- io_poll_remove_double(req); -- if (likely(poll->head)) { -- spin_lock_irq(&poll->head->lock); -- if (unlikely(list_empty(&poll->wait.entry))) { -- if (ipt->error) -- cancel = true; -+ io_poll_req_insert(req); -+ spin_unlock(&ctx->completion_lock); -+ -+ if (mask) { -+ /* can't multishot if failed, just queue the event we've got */ -+ if (unlikely(ipt->error || !ipt->nr_entries)) { -+ poll->events |= EPOLLONESHOT; - ipt->error = 0; -- mask = 0; - } -- if ((mask && (poll->events & EPOLLONESHOT)) || ipt->error) -- list_del_init(&poll->wait.entry); -- else if (cancel) -- WRITE_ONCE(poll->canceled, true); -- else if (!poll->done) /* actually waiting for an event */ -- io_poll_req_insert(req); -- spin_unlock_irq(&poll->head->lock); -+ __io_poll_execute(req, mask); -+ return 0; - } - -- return mask; -+ /* -+ * Release ownership. If someone tried to queue a tw while it was -+ * locked, kick it off for them. -+ */ -+ v = atomic_dec_return(&req->poll_refs); -+ if (unlikely(v & IO_POLL_REF_MASK)) -+ __io_poll_execute(req, 0); -+ return 0; -+} -+ -+static void io_async_queue_proc(struct file *file, struct wait_queue_head *head, -+ struct poll_table_struct *p) -+{ -+ struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); -+ struct async_poll *apoll = pt->req->apoll; -+ -+ __io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll); - } - - enum { -@@ -5602,8 +5716,8 @@ static int io_arm_poll_handler(struct io_kiocb *req) - struct io_ring_ctx *ctx = req->ctx; - struct async_poll *apoll; - struct io_poll_table ipt; -- __poll_t ret, mask = EPOLLONESHOT | POLLERR | POLLPRI; -- int rw; -+ __poll_t mask = EPOLLONESHOT | POLLERR | POLLPRI; -+ int ret; - - if (!req->file || !file_can_poll(req->file)) - return IO_APOLL_ABORTED; -@@ -5613,7 +5727,6 @@ static int io_arm_poll_handler(struct io_kiocb *req) - return IO_APOLL_ABORTED; - - if (def->pollin) { -- rw = READ; - mask |= POLLIN | POLLRDNORM; - - /* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */ -@@ -5621,14 +5734,9 @@ static int io_arm_poll_handler(struct io_kiocb *req) - (req->sr_msg.msg_flags & MSG_ERRQUEUE)) - mask &= ~POLLIN; - } else { -- rw = WRITE; - mask |= POLLOUT | POLLWRNORM; - } - -- /* if we can't nonblock try, then no point in arming a poll handler */ -- if (!io_file_supports_nowait(req, rw)) -- return IO_APOLL_ABORTED; +- struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node); - - apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC); - if (unlikely(!apoll)) - return IO_APOLL_ABORTED; -@@ -5636,11 +5744,8 @@ static int io_arm_poll_handler(struct io_kiocb *req) - req->apoll = apoll; - req->flags |= REQ_F_POLLED; - ipt.pt._qproc = io_async_queue_proc; -- io_req_set_refcount(req); - -- ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask, -- io_async_wake); -- spin_unlock(&ctx->completion_lock); -+ ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask); - if (ret || ipt.error) - return ret ? IO_APOLL_READY : IO_APOLL_ABORTED; - -@@ -5649,43 +5754,6 @@ static int io_arm_poll_handler(struct io_kiocb *req) - return IO_APOLL_OK; - } - --static bool __io_poll_remove_one(struct io_kiocb *req, -- struct io_poll_iocb *poll, bool do_cancel) -- __must_hold(&req->ctx->completion_lock) +- return __io_wq_cpu_online(wq, cpu, false); +-} +- +-int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask) -{ -- bool do_complete = false; +- int i; - -- if (!poll->head) -- return false; -- spin_lock_irq(&poll->head->lock); -- if (do_cancel) -- WRITE_ONCE(poll->canceled, true); -- if (!list_empty(&poll->wait.entry)) { -- list_del_init(&poll->wait.entry); -- do_complete = true; +- rcu_read_lock(); +- for_each_node(i) { +- struct io_wqe *wqe = wq->wqes[i]; +- +- if (mask) +- cpumask_copy(wqe->cpu_mask, mask); +- else +- cpumask_copy(wqe->cpu_mask, cpumask_of_node(i)); - } -- spin_unlock_irq(&poll->head->lock); -- hash_del(&req->hash_node); -- return do_complete; +- rcu_read_unlock(); +- return 0; -} - --static bool io_poll_remove_one(struct io_kiocb *req) -- __must_hold(&req->ctx->completion_lock) +-/* +- * Set max number of unbounded workers, returns old value. If new_count is 0, +- * then just return the old value. +- */ +-int io_wq_max_workers(struct io_wq *wq, int *new_count) -{ -- bool do_complete; +- int i, node, prev = 0; - -- io_poll_remove_double(req); -- do_complete = __io_poll_remove_one(req, io_poll_get_single(req), true); +- BUILD_BUG_ON((int) IO_WQ_ACCT_BOUND != (int) IO_WQ_BOUND); +- BUILD_BUG_ON((int) IO_WQ_ACCT_UNBOUND != (int) IO_WQ_UNBOUND); +- BUILD_BUG_ON((int) IO_WQ_ACCT_NR != 2); - -- if (do_complete) { -- io_cqring_fill_event(req->ctx, req->user_data, -ECANCELED, 0); -- io_commit_cqring(req->ctx); -- req_set_fail(req); -- io_put_req_deferred(req); +- for (i = 0; i < 2; i++) { +- if (new_count[i] > task_rlimit(current, RLIMIT_NPROC)) +- new_count[i] = task_rlimit(current, RLIMIT_NPROC); - } -- return do_complete; +- +- rcu_read_lock(); +- for_each_node(node) { +- struct io_wqe *wqe = wq->wqes[node]; +- struct io_wqe_acct *acct; +- +- raw_spin_lock(&wqe->lock); +- for (i = 0; i < IO_WQ_ACCT_NR; i++) { +- acct = &wqe->acct[i]; +- prev = max_t(int, acct->max_workers, prev); +- if (new_count[i]) +- acct->max_workers = new_count[i]; +- new_count[i] = prev; +- } +- raw_spin_unlock(&wqe->lock); +- } +- rcu_read_unlock(); +- return 0; -} - - /* - * Returns true if we found and killed one or more poll requests - */ -@@ -5694,7 +5762,8 @@ static bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk, - { - struct hlist_node *tmp; - struct io_kiocb *req; -- int posted = 0, i; -+ bool found = false; -+ int i; - - spin_lock(&ctx->completion_lock); - for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { -@@ -5702,16 +5771,15 @@ static bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk, - - list = &ctx->cancel_hash[i]; - hlist_for_each_entry_safe(req, tmp, list, hash_node) { -- if (io_match_task(req, tsk, cancel_all)) -- posted += io_poll_remove_one(req); -+ if (io_match_task_safe(req, tsk, cancel_all)) { -+ hlist_del_init(&req->hash_node); -+ io_poll_cancel_req(req); -+ found = true; -+ } - } - } - spin_unlock(&ctx->completion_lock); +-static __init int io_wq_init(void) +-{ +- int ret; - -- if (posted) -- io_cqring_ev_posted(ctx); +- ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "io-wq/online", +- io_wq_cpu_online, io_wq_cpu_offline); +- if (ret < 0) +- return ret; +- io_wq_online = ret; +- return 0; +-} +-subsys_initcall(io_wq_init); +diff --git a/fs/io-wq.h b/fs/io-wq.h +deleted file mode 100644 +index bf5c4c5337605..0000000000000 +--- a/fs/io-wq.h ++++ /dev/null +@@ -1,160 +0,0 @@ +-#ifndef INTERNAL_IO_WQ_H +-#define INTERNAL_IO_WQ_H - -- return posted != 0; -+ return found; - } - - static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, __u64 sqe_addr, -@@ -5732,19 +5800,26 @@ static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, __u64 sqe_addr, - return NULL; - } - -+static bool io_poll_disarm(struct io_kiocb *req) -+ __must_hold(&ctx->completion_lock) -+{ -+ if (!io_poll_get_ownership(req)) -+ return false; -+ io_poll_remove_entries(req); -+ hash_del(&req->hash_node); -+ return true; -+} -+ - static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr, - bool poll_only) - __must_hold(&ctx->completion_lock) - { -- struct io_kiocb *req; -+ struct io_kiocb *req = io_poll_find(ctx, sqe_addr, poll_only); - -- req = io_poll_find(ctx, sqe_addr, poll_only); - if (!req) - return -ENOENT; -- if (io_poll_remove_one(req)) -- return 0; +-#include <linux/refcount.h> - -- return -EALREADY; -+ io_poll_cancel_req(req); -+ return 0; - } - - static __poll_t io_poll_parse_events(const struct io_uring_sqe *sqe, -@@ -5794,23 +5869,6 @@ static int io_poll_update_prep(struct io_kiocb *req, - return 0; - } - --static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, -- void *key) +-struct io_wq; +- +-enum { +- IO_WQ_WORK_CANCEL = 1, +- IO_WQ_WORK_HASHED = 2, +- IO_WQ_WORK_UNBOUND = 4, +- IO_WQ_WORK_CONCURRENT = 16, +- +- IO_WQ_HASH_SHIFT = 24, /* upper 8 bits are used for hash key */ +-}; +- +-enum io_wq_cancel { +- IO_WQ_CANCEL_OK, /* cancelled before started */ +- IO_WQ_CANCEL_RUNNING, /* found, running, and attempted cancelled */ +- IO_WQ_CANCEL_NOTFOUND, /* work not found */ +-}; +- +-struct io_wq_work_node { +- struct io_wq_work_node *next; +-}; +- +-struct io_wq_work_list { +- struct io_wq_work_node *first; +- struct io_wq_work_node *last; +-}; +- +-static inline void wq_list_add_after(struct io_wq_work_node *node, +- struct io_wq_work_node *pos, +- struct io_wq_work_list *list) -{ -- struct io_kiocb *req = wait->private; -- struct io_poll_iocb *poll = &req->poll; +- struct io_wq_work_node *next = pos->next; - -- return __io_async_wake(req, poll, key_to_poll(key), io_poll_task_func); +- pos->next = node; +- node->next = next; +- if (!next) +- list->last = node; -} - --static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head, -- struct poll_table_struct *p) +-static inline void wq_list_add_tail(struct io_wq_work_node *node, +- struct io_wq_work_list *list) -{ -- struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); +- node->next = NULL; +- if (!list->first) { +- list->last = node; +- WRITE_ONCE(list->first, node); +- } else { +- list->last->next = node; +- list->last = node; +- } +-} - -- __io_queue_proc(&pt->req->poll, pt, head, (struct io_poll_iocb **) &pt->req->async_data); +-static inline void wq_list_cut(struct io_wq_work_list *list, +- struct io_wq_work_node *last, +- struct io_wq_work_node *prev) +-{ +- /* first in the list, if prev==NULL */ +- if (!prev) +- WRITE_ONCE(list->first, last->next); +- else +- prev->next = last->next; +- +- if (last == list->last) +- list->last = prev; +- last->next = NULL; -} - - static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) - { - struct io_poll_iocb *poll = &req->poll; -@@ -5832,89 +5890,57 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe - static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags) - { - struct io_poll_iocb *poll = &req->poll; -- struct io_ring_ctx *ctx = req->ctx; - struct io_poll_table ipt; -- __poll_t mask; -- bool done; -+ int ret; - - ipt.pt._qproc = io_poll_queue_proc; - -- mask = __io_arm_poll_handler(req, &req->poll, &ipt, poll->events, -- io_poll_wake); +-static inline void wq_list_del(struct io_wq_work_list *list, +- struct io_wq_work_node *node, +- struct io_wq_work_node *prev) +-{ +- wq_list_cut(list, node, prev); +-} - -- if (mask) { /* no async, we'd stolen it */ -- ipt.error = 0; -- done = io_poll_complete(req, mask); -- } -- spin_unlock(&ctx->completion_lock); +-#define wq_list_for_each(pos, prv, head) \ +- for (pos = (head)->first, prv = NULL; pos; prv = pos, pos = (pos)->next) - -- if (mask) { -- io_cqring_ev_posted(ctx); -- if (done) -- io_put_req(req); -- } -- return ipt.error; -+ ret = __io_arm_poll_handler(req, &req->poll, &ipt, poll->events); -+ if (!ret && ipt.error) -+ req_set_fail(req); -+ ret = ret ?: ipt.error; -+ if (ret) -+ __io_req_complete(req, issue_flags, ret, 0); -+ return 0; - } - - static int io_poll_update(struct io_kiocb *req, unsigned int issue_flags) - { - struct io_ring_ctx *ctx = req->ctx; - struct io_kiocb *preq; -- bool completing; -- int ret; -+ int ret2, ret = 0; - - spin_lock(&ctx->completion_lock); - preq = io_poll_find(ctx, req->poll_update.old_user_data, true); -- if (!preq) { -- ret = -ENOENT; -- goto err; -+ if (!preq || !io_poll_disarm(preq)) { -+ spin_unlock(&ctx->completion_lock); -+ ret = preq ? -EALREADY : -ENOENT; -+ goto out; - } -+ spin_unlock(&ctx->completion_lock); - -- if (!req->poll_update.update_events && !req->poll_update.update_user_data) { -- completing = true; -- ret = io_poll_remove_one(preq) ? 0 : -EALREADY; -- goto err; -- } -+ if (req->poll_update.update_events || req->poll_update.update_user_data) { -+ /* only mask one event flags, keep behavior flags */ -+ if (req->poll_update.update_events) { -+ preq->poll.events &= ~0xffff; -+ preq->poll.events |= req->poll_update.events & 0xffff; -+ preq->poll.events |= IO_POLL_UNMASK; -+ } -+ if (req->poll_update.update_user_data) -+ preq->user_data = req->poll_update.new_user_data; - +-#define wq_list_empty(list) (READ_ONCE((list)->first) == NULL) +-#define INIT_WQ_LIST(list) do { \ +- (list)->first = NULL; \ +- (list)->last = NULL; \ +-} while (0) +- +-struct io_wq_work { +- struct io_wq_work_node list; +- unsigned flags; +-}; +- +-static inline struct io_wq_work *wq_next_work(struct io_wq_work *work) +-{ +- if (!work->list.next) +- return NULL; +- +- return container_of(work->list.next, struct io_wq_work, list); +-} +- +-typedef struct io_wq_work *(free_work_fn)(struct io_wq_work *); +-typedef void (io_wq_work_fn)(struct io_wq_work *); +- +-struct io_wq_hash { +- refcount_t refs; +- unsigned long map; +- struct wait_queue_head wait; +-}; +- +-static inline void io_wq_put_hash(struct io_wq_hash *hash) +-{ +- if (refcount_dec_and_test(&hash->refs)) +- kfree(hash); +-} +- +-struct io_wq_data { +- struct io_wq_hash *hash; +- struct task_struct *task; +- io_wq_work_fn *do_work; +- free_work_fn *free_work; +-}; +- +-struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data); +-void io_wq_exit_start(struct io_wq *wq); +-void io_wq_put_and_exit(struct io_wq *wq); +- +-void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work); +-void io_wq_hash_work(struct io_wq_work *work, void *val); +- +-int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask); +-int io_wq_max_workers(struct io_wq *wq, int *new_count); +- +-static inline bool io_wq_is_hashed(struct io_wq_work *work) +-{ +- return work->flags & IO_WQ_WORK_HASHED; +-} +- +-typedef bool (work_cancel_fn)(struct io_wq_work *, void *); +- +-enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, +- void *data, bool cancel_all); +- +-#if defined(CONFIG_IO_WQ) +-extern void io_wq_worker_sleeping(struct task_struct *); +-extern void io_wq_worker_running(struct task_struct *); +-#else +-static inline void io_wq_worker_sleeping(struct task_struct *tsk) +-{ +-} +-static inline void io_wq_worker_running(struct task_struct *tsk) +-{ +-} +-#endif +- +-static inline bool io_wq_current_is_worker(void) +-{ +- return in_task() && (current->flags & PF_IO_WORKER) && +- current->pf_io_worker; +-} +-#endif +diff --git a/fs/io_uring.c b/fs/io_uring.c +deleted file mode 100644 +index bc18af5e0a934..0000000000000 +--- a/fs/io_uring.c ++++ /dev/null +@@ -1,11013 +0,0 @@ +-// SPDX-License-Identifier: GPL-2.0 +-/* +- * Shared application/kernel submission and completion ring pairs, for +- * supporting fast/efficient IO. +- * +- * A note on the read/write ordering memory barriers that are matched between +- * the application and kernel side. +- * +- * After the application reads the CQ ring tail, it must use an +- * appropriate smp_rmb() to pair with the smp_wmb() the kernel uses +- * before writing the tail (using smp_load_acquire to read the tail will +- * do). It also needs a smp_mb() before updating CQ head (ordering the +- * entry load(s) with the head store), pairing with an implicit barrier +- * through a control-dependency in io_get_cqe (smp_store_release to +- * store head will do). Failure to do so could lead to reading invalid +- * CQ entries. +- * +- * Likewise, the application must use an appropriate smp_wmb() before +- * writing the SQ tail (ordering SQ entry stores with the tail store), +- * which pairs with smp_load_acquire in io_get_sqring (smp_store_release +- * to store the tail will do). And it needs a barrier ordering the SQ +- * head load before writing new SQ entries (smp_load_acquire to read +- * head will do). +- * +- * When using the SQ poll thread (IORING_SETUP_SQPOLL), the application +- * needs to check the SQ flags for IORING_SQ_NEED_WAKEUP *after* +- * updating the SQ tail; a full memory barrier smp_mb() is needed +- * between. +- * +- * Also see the examples in the liburing library: +- * +- * git://git.kernel.dk/liburing +- * +- * io_uring also uses READ/WRITE_ONCE() for _any_ store or load that happens +- * from data shared between the kernel and application. This is done both +- * for ordering purposes, but also to ensure that once a value is loaded from +- * data that the application could potentially modify, it remains stable. +- * +- * Copyright (C) 2018-2019 Jens Axboe +- * Copyright (c) 2018-2019 Christoph Hellwig +- */ +-#include <linux/kernel.h> +-#include <linux/init.h> +-#include <linux/errno.h> +-#include <linux/syscalls.h> +-#include <linux/compat.h> +-#include <net/compat.h> +-#include <linux/refcount.h> +-#include <linux/uio.h> +-#include <linux/bits.h> +- +-#include <linux/sched/signal.h> +-#include <linux/fs.h> +-#include <linux/file.h> +-#include <linux/fdtable.h> +-#include <linux/mm.h> +-#include <linux/mman.h> +-#include <linux/percpu.h> +-#include <linux/slab.h> +-#include <linux/blkdev.h> +-#include <linux/bvec.h> +-#include <linux/net.h> +-#include <net/sock.h> +-#include <net/af_unix.h> +-#include <net/scm.h> +-#include <linux/anon_inodes.h> +-#include <linux/sched/mm.h> +-#include <linux/uaccess.h> +-#include <linux/nospec.h> +-#include <linux/sizes.h> +-#include <linux/hugetlb.h> +-#include <linux/highmem.h> +-#include <linux/namei.h> +-#include <linux/fsnotify.h> +-#include <linux/fadvise.h> +-#include <linux/eventpoll.h> +-#include <linux/splice.h> +-#include <linux/task_work.h> +-#include <linux/pagemap.h> +-#include <linux/io_uring.h> +-#include <linux/tracehook.h> +- +-#define CREATE_TRACE_POINTS +-#include <trace/events/io_uring.h> +- +-#include <uapi/linux/io_uring.h> +- +-#include "internal.h" +-#include "io-wq.h" +- +-#define IORING_MAX_ENTRIES 32768 +-#define IORING_MAX_CQ_ENTRIES (2 * IORING_MAX_ENTRIES) +-#define IORING_SQPOLL_CAP_ENTRIES_VALUE 8 +- +-/* only define max */ +-#define IORING_MAX_FIXED_FILES (1U << 15) +-#define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \ +- IORING_REGISTER_LAST + IORING_OP_LAST) +- +-#define IO_RSRC_TAG_TABLE_SHIFT (PAGE_SHIFT - 3) +-#define IO_RSRC_TAG_TABLE_MAX (1U << IO_RSRC_TAG_TABLE_SHIFT) +-#define IO_RSRC_TAG_TABLE_MASK (IO_RSRC_TAG_TABLE_MAX - 1) +- +-#define IORING_MAX_REG_BUFFERS (1U << 14) +- +-#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK| \ +- IOSQE_IO_HARDLINK | IOSQE_ASYNC | \ +- IOSQE_BUFFER_SELECT) +-#define IO_REQ_CLEAN_FLAGS (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP | \ +- REQ_F_POLLED | REQ_F_INFLIGHT | REQ_F_CREDS) +- +-#define IO_TCTX_REFS_CACHE_NR (1U << 10) +- +-struct io_uring { +- u32 head ____cacheline_aligned_in_smp; +- u32 tail ____cacheline_aligned_in_smp; +-}; +- +-/* +- * This data is shared with the application through the mmap at offsets +- * IORING_OFF_SQ_RING and IORING_OFF_CQ_RING. +- * +- * The offsets to the member fields are published through struct +- * io_sqring_offsets when calling io_uring_setup. +- */ +-struct io_rings { - /* -- * Don't allow racy completion with singleshot, as we cannot safely -- * update those. For multishot, if we're racing with completion, just -- * let completion re-add it. +- * Head and tail offsets into the ring; the offsets need to be +- * masked to get valid indices. +- * +- * The kernel controls head of the sq ring and the tail of the cq ring, +- * and the application controls tail of the sq ring and the head of the +- * cq ring. - */ -- completing = !__io_poll_remove_one(preq, &preq->poll, false); -- if (completing && (preq->poll.events & EPOLLONESHOT)) { -- ret = -EALREADY; -- goto err; -+ ret2 = io_poll_add(preq, issue_flags); -+ /* successfully updated, don't complete poll request */ -+ if (!ret2) -+ goto out; - } -- /* we now have a detached poll request. reissue. */ -- ret = 0; --err: -- if (ret < 0) { -- spin_unlock(&ctx->completion_lock); -+ req_set_fail(preq); -+ io_req_complete(preq, -ECANCELED); -+out: -+ if (ret < 0) - req_set_fail(req); -- io_req_complete(req, ret); -- return 0; -- } -- /* only mask one event flags, keep behavior flags */ -- if (req->poll_update.update_events) { -- preq->poll.events &= ~0xffff; -- preq->poll.events |= req->poll_update.events & 0xffff; -- preq->poll.events |= IO_POLL_UNMASK; -- } -- if (req->poll_update.update_user_data) -- preq->user_data = req->poll_update.new_user_data; -- spin_unlock(&ctx->completion_lock); +- struct io_uring sq, cq; +- /* +- * Bitmasks to apply to head and tail offsets (constant, equals +- * ring_entries - 1) +- */ +- u32 sq_ring_mask, cq_ring_mask; +- /* Ring sizes (constant, power of 2) */ +- u32 sq_ring_entries, cq_ring_entries; +- /* +- * Number of invalid entries dropped by the kernel due to +- * invalid index stored in array +- * +- * Written by the kernel, shouldn't be modified by the +- * application (i.e. get number of "new events" by comparing to +- * cached value). +- * +- * After a new SQ head value was read by the application this +- * counter includes all submissions that were dropped reaching +- * the new SQ head (and possibly more). +- */ +- u32 sq_dropped; +- /* +- * Runtime SQ flags +- * +- * Written by the kernel, shouldn't be modified by the +- * application. +- * +- * The application needs a full memory barrier before checking +- * for IORING_SQ_NEED_WAKEUP after updating the sq tail. +- */ +- u32 sq_flags; +- /* +- * Runtime CQ flags +- * +- * Written by the application, shouldn't be modified by the +- * kernel. +- */ +- u32 cq_flags; +- /* +- * Number of completion events lost because the queue was full; +- * this should be avoided by the application by making sure +- * there are not more requests pending than there is space in +- * the completion queue. +- * +- * Written by the kernel, shouldn't be modified by the +- * application (i.e. get number of "new events" by comparing to +- * cached value). +- * +- * As completion events come in out of order this counter is not +- * ordered with any other data. +- */ +- u32 cq_overflow; +- /* +- * Ring buffer of completion events. +- * +- * The kernel writes completion events fresh every time they are +- * produced, so the application is allowed to modify pending +- * entries. +- */ +- struct io_uring_cqe cqes[] ____cacheline_aligned_in_smp; +-}; - - /* complete update request, we're done with it */ - io_req_complete(req, ret); +-enum io_uring_cmd_flags { +- IO_URING_F_NONBLOCK = 1, +- IO_URING_F_COMPLETE_DEFER = 2, +-}; - -- if (!completing) { -- ret = io_poll_add(preq, issue_flags); -- if (ret < 0) { -- req_set_fail(preq); -- io_req_complete(preq, ret); -- } -- } - return 0; - } - -@@ -5976,7 +6002,7 @@ static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data) - return PTR_ERR(req); - - req_set_fail(req); -- io_cqring_fill_event(ctx, req->user_data, -ECANCELED, 0); -+ io_fill_cqe_req(req, -ECANCELED, 0); - io_put_req_deferred(req); - return 0; - } -@@ -6147,6 +6173,7 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe, - if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr))) - return -EFAULT; - -+ INIT_LIST_HEAD(&req->timeout.list); - data->mode = io_translate_timeout_mode(flags); - hrtimer_init(&data->timer, io_timeout_get_clock(data), data->mode); - -@@ -6346,6 +6373,7 @@ static int io_files_update(struct io_kiocb *req, unsigned int issue_flags) - up.nr = 0; - up.tags = 0; - up.resv = 0; -+ up.resv2 = 0; - - io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); - ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE, -@@ -6511,11 +6539,14 @@ static bool io_drain_req(struct io_kiocb *req) - } - - /* Still need defer if there is pending req in defer list. */ -+ spin_lock(&ctx->completion_lock); - if (likely(list_empty_careful(&ctx->defer_list) && - !(req->flags & REQ_F_IO_DRAIN))) { -+ spin_unlock(&ctx->completion_lock); - ctx->drain_active = false; - return false; - } -+ spin_unlock(&ctx->completion_lock); - - seq = io_get_sequence(req); - /* Still a chance to pass the sequence check */ -@@ -6586,11 +6617,6 @@ static void io_clean_op(struct io_kiocb *req) - kfree(io->free_iov); - break; - } -- case IORING_OP_SPLICE: -- case IORING_OP_TEE: -- if (!(req->splice.flags & SPLICE_F_FD_IN_FIXED)) -- io_put_file(req->splice.file_in); -- break; - case IORING_OP_OPENAT: - case IORING_OP_OPENAT2: - if (req->open.filename) -@@ -6803,7 +6829,7 @@ static void io_wq_submit_work(struct io_wq_work *work) - * forcing a sync submission from here, since we can't - * wait for request slots on the block side. - */ -- if (ret != -EAGAIN) -+ if (ret != -EAGAIN || !(req->ctx->flags & IORING_SETUP_IOPOLL)) - break; - cond_resched(); - } while (1); -@@ -6884,10 +6910,11 @@ static inline struct file *io_file_get(struct io_ring_ctx *ctx, - static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked) - { - struct io_kiocb *prev = req->timeout.prev; -- int ret; -+ int ret = -ENOENT; - - if (prev) { -- ret = io_try_cancel_userdata(req, prev->user_data); -+ if (!(req->task->flags & PF_EXITING)) -+ ret = io_try_cancel_userdata(req, prev->user_data); - io_req_complete_post(req, ret ?: -ETIME, 0); - io_put_req(prev); - } else { -@@ -7528,7 +7555,7 @@ static int io_run_task_work_sig(void) - /* when returns >0, the caller should retry */ - static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, - struct io_wait_queue *iowq, -- signed long *timeout) -+ ktime_t timeout) - { - int ret; - -@@ -7540,8 +7567,9 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, - if (test_bit(0, &ctx->check_cq_overflow)) - return 1; - -- *timeout = schedule_timeout(*timeout); -- return !*timeout ? -ETIME : 1; -+ if (!schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS)) -+ return -ETIME; -+ return 1; - } - - /* -@@ -7554,7 +7582,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, - { - struct io_wait_queue iowq; - struct io_rings *rings = ctx->rings; -- signed long timeout = MAX_SCHEDULE_TIMEOUT; -+ ktime_t timeout = KTIME_MAX; - int ret; - - do { -@@ -7570,7 +7598,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, - - if (get_timespec64(&ts, uts)) - return -EFAULT; -- timeout = timespec64_to_jiffies(&ts); -+ timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns()); - } - - if (sig) { -@@ -7602,7 +7630,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, - } - prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq, - TASK_INTERRUPTIBLE); -- ret = io_cqring_wait_schedule(ctx, &iowq, &timeout); -+ ret = io_cqring_wait_schedule(ctx, &iowq, timeout); - finish_wait(&ctx->cq_wait, &iowq.wq); - cond_resched(); - } while (ret > 0); -@@ -7656,10 +7684,15 @@ static void io_rsrc_node_ref_zero(struct percpu_ref *ref) - struct io_ring_ctx *ctx = node->rsrc_data->ctx; - unsigned long flags; - bool first_add = false; -+ unsigned long delay = HZ; - - spin_lock_irqsave(&ctx->rsrc_ref_lock, flags); - node->done = true; - -+ /* if we are mid-quiesce then do not delay */ -+ if (node->rsrc_data->quiesce) -+ delay = 0; -+ - while (!list_empty(&ctx->rsrc_ref_list)) { - node = list_first_entry(&ctx->rsrc_ref_list, - struct io_rsrc_node, node); -@@ -7672,7 +7705,7 @@ static void io_rsrc_node_ref_zero(struct percpu_ref *ref) - spin_unlock_irqrestore(&ctx->rsrc_ref_lock, flags); - - if (first_add) -- mod_delayed_work(system_wq, &ctx->rsrc_put_work, HZ); -+ mod_delayed_work(system_wq, &ctx->rsrc_put_work, delay); - } - - static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx) -@@ -7750,7 +7783,15 @@ static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, struct io_ring_ctx *ct - ret = wait_for_completion_interruptible(&data->done); - if (!ret) { - mutex_lock(&ctx->uring_lock); -- break; -+ if (atomic_read(&data->refs) > 0) { -+ /* -+ * it has been revived by another thread while -+ * we were unlocked -+ */ -+ mutex_unlock(&ctx->uring_lock); -+ } else { -+ break; -+ } - } - - atomic_inc(&data->refs); -@@ -7865,11 +7906,19 @@ static void __io_sqe_files_unregister(struct io_ring_ctx *ctx) - - static int io_sqe_files_unregister(struct io_ring_ctx *ctx) - { -+ unsigned nr = ctx->nr_user_files; - int ret; - - if (!ctx->file_data) - return -ENXIO; -+ -+ /* -+ * Quiesce may unlock ->uring_lock, and while it's not held -+ * prevent new requests using the table. -+ */ -+ ctx->nr_user_files = 0; - ret = io_rsrc_ref_quiesce(ctx->file_data, ctx); -+ ctx->nr_user_files = nr; - if (!ret) - __io_sqe_files_unregister(ctx); - return ret; -@@ -8024,6 +8073,7 @@ static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset) - } - - skb->sk = sk; -+ skb->scm_io_uring = 1; - - nr_files = 0; - fpl->user = get_uid(current_user()); -@@ -8045,10 +8095,15 @@ static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset) - refcount_add(skb->truesize, &sk->sk_wmem_alloc); - skb_queue_head(&sk->sk_receive_queue, skb); - -- for (i = 0; i < nr_files; i++) -- fput(fpl->fp[i]); -+ for (i = 0; i < nr; i++) { -+ struct file *file = io_file_from_index(ctx, i + offset); -+ -+ if (file) -+ fput(file); -+ } - } else { - kfree_skb(skb); -+ free_uid(fpl->user); - kfree(fpl); - } - -@@ -8174,8 +8229,7 @@ static void __io_rsrc_put_work(struct io_rsrc_node *ref_node) - - io_ring_submit_lock(ctx, lock_ring); - spin_lock(&ctx->completion_lock); -- io_cqring_fill_event(ctx, prsrc->tag, 0, 0); -- ctx->cq_extra++; -+ io_fill_cqe_aux(ctx, prsrc->tag, 0, 0); - io_commit_cqring(ctx); - spin_unlock(&ctx->completion_lock); - io_cqring_ev_posted(ctx); -@@ -8337,13 +8391,15 @@ static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file, - static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx, - struct io_rsrc_node *node, void *rsrc) - { -+ u64 *tag_slot = io_get_tag_slot(data, idx); - struct io_rsrc_put *prsrc; - - prsrc = kzalloc(sizeof(*prsrc), GFP_KERNEL); - if (!prsrc) - return -ENOMEM; - -- prsrc->tag = *io_get_tag_slot(data, idx); -+ prsrc->tag = *tag_slot; -+ *tag_slot = 0; - prsrc->rsrc = rsrc; - list_add(&prsrc->list, &node->rsrc_list); - return 0; -@@ -8411,7 +8467,7 @@ static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags) - struct io_ring_ctx *ctx = req->ctx; - struct io_fixed_file *file_slot; - struct file *file; -- int ret, i; -+ int ret; - - io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); - ret = -ENXIO; -@@ -8424,8 +8480,8 @@ static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags) - if (ret) - goto out; - -- i = array_index_nospec(offset, ctx->nr_user_files); -- file_slot = io_fixed_file_slot(&ctx->file_table, i); -+ offset = array_index_nospec(offset, ctx->nr_user_files); -+ file_slot = io_fixed_file_slot(&ctx->file_table, offset); - ret = -EBADF; - if (!file_slot->file_ptr) - goto out; -@@ -8481,8 +8537,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, - - if (file_slot->file_ptr) { - file = (struct file *)(file_slot->file_ptr & FFS_MASK); -- err = io_queue_rsrc_removal(data, up->offset + done, -- ctx->rsrc_node, file); -+ err = io_queue_rsrc_removal(data, i, ctx->rsrc_node, file); - if (err) - break; - file_slot->file_ptr = 0; -@@ -8507,7 +8562,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, - err = -EBADF; - break; - } -- *io_get_tag_slot(data, up->offset + done) = tag; -+ *io_get_tag_slot(data, i) = tag; - io_fixed_file_set(file_slot, file); - err = io_sqe_file_register(ctx, file, i); - if (err) { -@@ -8753,10 +8808,9 @@ static void io_mem_free(void *ptr) - - static void *io_mem_alloc(size_t size) - { -- gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP | -- __GFP_NORETRY | __GFP_ACCOUNT; -+ gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP; - -- return (void *) __get_free_pages(gfp_flags, get_order(size)); -+ return (void *) __get_free_pages(gfp, get_order(size)); - } - - static unsigned long rings_size(unsigned sq_entries, unsigned cq_entries, -@@ -8824,12 +8878,19 @@ static void __io_sqe_buffers_unregister(struct io_ring_ctx *ctx) - - static int io_sqe_buffers_unregister(struct io_ring_ctx *ctx) - { -+ unsigned nr = ctx->nr_user_bufs; - int ret; - - if (!ctx->buf_data) - return -ENXIO; - -+ /* -+ * Quiesce may unlock ->uring_lock, and while it's not held -+ * prevent new requests using the table. -+ */ -+ ctx->nr_user_bufs = 0; - ret = io_rsrc_ref_quiesce(ctx->buf_data, ctx); -+ ctx->nr_user_bufs = nr; - if (!ret) - __io_sqe_buffers_unregister(ctx); - return ret; -@@ -9152,7 +9213,7 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx, - - i = array_index_nospec(offset, ctx->nr_user_bufs); - if (ctx->user_bufs[i] != ctx->dummy_ubuf) { -- err = io_queue_rsrc_removal(ctx->buf_data, offset, -+ err = io_queue_rsrc_removal(ctx->buf_data, i, - ctx->rsrc_node, ctx->user_bufs[i]); - if (unlikely(err)) { - io_buffer_unmap(ctx, &imu); -@@ -9209,10 +9270,8 @@ static void io_destroy_buffers(struct io_ring_ctx *ctx) - struct io_buffer *buf; - unsigned long index; - -- xa_for_each(&ctx->io_buffers, index, buf) { -+ xa_for_each(&ctx->io_buffers, index, buf) - __io_remove_buffers(ctx, buf, index, -1U); -- cond_resched(); +-struct io_mapped_ubuf { +- u64 ubuf; +- u64 ubuf_end; +- unsigned int nr_bvecs; +- unsigned long acct_pages; +- struct bio_vec bvec[]; +-}; +- +-struct io_ring_ctx; +- +-struct io_overflow_cqe { +- struct io_uring_cqe cqe; +- struct list_head list; +-}; +- +-struct io_fixed_file { +- /* file * with additional FFS_* flags */ +- unsigned long file_ptr; +-}; +- +-struct io_rsrc_put { +- struct list_head list; +- u64 tag; +- union { +- void *rsrc; +- struct file *file; +- struct io_mapped_ubuf *buf; +- }; +-}; +- +-struct io_file_table { +- struct io_fixed_file *files; +-}; +- +-struct io_rsrc_node { +- struct percpu_ref refs; +- struct list_head node; +- struct list_head rsrc_list; +- struct io_rsrc_data *rsrc_data; +- struct llist_node llist; +- bool done; +-}; +- +-typedef void (rsrc_put_fn)(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc); +- +-struct io_rsrc_data { +- struct io_ring_ctx *ctx; +- +- u64 **tags; +- unsigned int nr; +- rsrc_put_fn *do_put; +- atomic_t refs; +- struct completion done; +- bool quiesce; +-}; +- +-struct io_buffer { +- struct list_head list; +- __u64 addr; +- __u32 len; +- __u16 bid; +-}; +- +-struct io_restriction { +- DECLARE_BITMAP(register_op, IORING_REGISTER_LAST); +- DECLARE_BITMAP(sqe_op, IORING_OP_LAST); +- u8 sqe_flags_allowed; +- u8 sqe_flags_required; +- bool registered; +-}; +- +-enum { +- IO_SQ_THREAD_SHOULD_STOP = 0, +- IO_SQ_THREAD_SHOULD_PARK, +-}; +- +-struct io_sq_data { +- refcount_t refs; +- atomic_t park_pending; +- struct mutex lock; +- +- /* ctx's that are using this sqd */ +- struct list_head ctx_list; +- +- struct task_struct *thread; +- struct wait_queue_head wait; +- +- unsigned sq_thread_idle; +- int sq_cpu; +- pid_t task_pid; +- pid_t task_tgid; +- +- unsigned long state; +- struct completion exited; +-}; +- +-#define IO_COMPL_BATCH 32 +-#define IO_REQ_CACHE_SIZE 32 +-#define IO_REQ_ALLOC_BATCH 8 +- +-struct io_submit_link { +- struct io_kiocb *head; +- struct io_kiocb *last; +-}; +- +-struct io_submit_state { +- struct blk_plug plug; +- struct io_submit_link link; +- +- /* +- * io_kiocb alloc cache +- */ +- void *reqs[IO_REQ_CACHE_SIZE]; +- unsigned int free_reqs; +- +- bool plug_started; +- +- /* +- * Batch completion logic +- */ +- struct io_kiocb *compl_reqs[IO_COMPL_BATCH]; +- unsigned int compl_nr; +- /* inline/task_work completion list, under ->uring_lock */ +- struct list_head free_list; +- +- unsigned int ios_left; +-}; +- +-struct io_ring_ctx { +- /* const or read-mostly hot data */ +- struct { +- struct percpu_ref refs; +- +- struct io_rings *rings; +- unsigned int flags; +- unsigned int compat: 1; +- unsigned int drain_next: 1; +- unsigned int eventfd_async: 1; +- unsigned int restricted: 1; +- unsigned int off_timeout_used: 1; +- unsigned int drain_active: 1; +- } ____cacheline_aligned_in_smp; +- +- /* submission data */ +- struct { +- struct mutex uring_lock; +- +- /* +- * Ring buffer of indices into array of io_uring_sqe, which is +- * mmapped by the application using the IORING_OFF_SQES offset. +- * +- * This indirection could e.g. be used to assign fixed +- * io_uring_sqe entries to operations and only submit them to +- * the queue when needed. +- * +- * The kernel modifies neither the indices array nor the entries +- * array. +- */ +- u32 *sq_array; +- struct io_uring_sqe *sq_sqes; +- unsigned cached_sq_head; +- unsigned sq_entries; +- struct list_head defer_list; +- +- /* +- * Fixed resources fast path, should be accessed only under +- * uring_lock, and updated through io_uring_register(2) +- */ +- struct io_rsrc_node *rsrc_node; +- struct io_file_table file_table; +- unsigned nr_user_files; +- unsigned nr_user_bufs; +- struct io_mapped_ubuf **user_bufs; +- +- struct io_submit_state submit_state; +- struct list_head timeout_list; +- struct list_head ltimeout_list; +- struct list_head cq_overflow_list; +- struct xarray io_buffers; +- struct xarray personalities; +- u32 pers_next; +- unsigned sq_thread_idle; +- } ____cacheline_aligned_in_smp; +- +- /* IRQ completion list, under ->completion_lock */ +- struct list_head locked_free_list; +- unsigned int locked_free_nr; +- +- const struct cred *sq_creds; /* cred used for __io_sq_thread() */ +- struct io_sq_data *sq_data; /* if using sq thread polling */ +- +- struct wait_queue_head sqo_sq_wait; +- struct list_head sqd_list; +- +- unsigned long check_cq_overflow; +- +- struct { +- unsigned cached_cq_tail; +- unsigned cq_entries; +- struct eventfd_ctx *cq_ev_fd; +- struct wait_queue_head poll_wait; +- struct wait_queue_head cq_wait; +- unsigned cq_extra; +- atomic_t cq_timeouts; +- unsigned cq_last_tm_flush; +- } ____cacheline_aligned_in_smp; +- +- struct { +- spinlock_t completion_lock; +- +- spinlock_t timeout_lock; +- +- /* +- * ->iopoll_list is protected by the ctx->uring_lock for +- * io_uring instances that don't use IORING_SETUP_SQPOLL. +- * For SQPOLL, only the single threaded io_sq_thread() will +- * manipulate the list, hence no extra locking is needed there. +- */ +- struct list_head iopoll_list; +- struct hlist_head *cancel_hash; +- unsigned cancel_hash_bits; +- bool poll_multi_queue; +- } ____cacheline_aligned_in_smp; +- +- struct io_restriction restrictions; +- +- /* slow path rsrc auxilary data, used by update/register */ +- struct { +- struct io_rsrc_node *rsrc_backup_node; +- struct io_mapped_ubuf *dummy_ubuf; +- struct io_rsrc_data *file_data; +- struct io_rsrc_data *buf_data; +- +- struct delayed_work rsrc_put_work; +- struct llist_head rsrc_put_llist; +- struct list_head rsrc_ref_list; +- spinlock_t rsrc_ref_lock; +- }; +- +- /* Keep this last, we don't need it for the fast path */ +- struct { +- #if defined(CONFIG_UNIX) +- struct socket *ring_sock; +- #endif +- /* hashed buffered write serialization */ +- struct io_wq_hash *hash_map; +- +- /* Only used for accounting purposes */ +- struct user_struct *user; +- struct mm_struct *mm_account; +- +- /* ctx exit and cancelation */ +- struct llist_head fallback_llist; +- struct delayed_work fallback_work; +- struct work_struct exit_work; +- struct list_head tctx_list; +- struct completion ref_comp; +- u32 iowq_limits[2]; +- bool iowq_limits_set; +- }; +-}; +- +-struct io_uring_task { +- /* submission side */ +- int cached_refs; +- struct xarray xa; +- struct wait_queue_head wait; +- const struct io_ring_ctx *last; +- struct io_wq *io_wq; +- struct percpu_counter inflight; +- atomic_t inflight_tracked; +- atomic_t in_idle; +- +- spinlock_t task_lock; +- struct io_wq_work_list task_list; +- struct callback_head task_work; +- bool task_running; +-}; +- +-/* +- * First field must be the file pointer in all the +- * iocb unions! See also 'struct kiocb' in <linux/fs.h> +- */ +-struct io_poll_iocb { +- struct file *file; +- struct wait_queue_head *head; +- __poll_t events; +- bool done; +- bool canceled; +- struct wait_queue_entry wait; +-}; +- +-struct io_poll_update { +- struct file *file; +- u64 old_user_data; +- u64 new_user_data; +- __poll_t events; +- bool update_events; +- bool update_user_data; +-}; +- +-struct io_close { +- struct file *file; +- int fd; +- u32 file_slot; +-}; +- +-struct io_timeout_data { +- struct io_kiocb *req; +- struct hrtimer timer; +- struct timespec64 ts; +- enum hrtimer_mode mode; +- u32 flags; +-}; +- +-struct io_accept { +- struct file *file; +- struct sockaddr __user *addr; +- int __user *addr_len; +- int flags; +- u32 file_slot; +- unsigned long nofile; +-}; +- +-struct io_sync { +- struct file *file; +- loff_t len; +- loff_t off; +- int flags; +- int mode; +-}; +- +-struct io_cancel { +- struct file *file; +- u64 addr; +-}; +- +-struct io_timeout { +- struct file *file; +- u32 off; +- u32 target_seq; +- struct list_head list; +- /* head of the link, used by linked timeouts only */ +- struct io_kiocb *head; +- /* for linked completions */ +- struct io_kiocb *prev; +-}; +- +-struct io_timeout_rem { +- struct file *file; +- u64 addr; +- +- /* timeout update */ +- struct timespec64 ts; +- u32 flags; +- bool ltimeout; +-}; +- +-struct io_rw { +- /* NOTE: kiocb has the file as the first member, so don't do it here */ +- struct kiocb kiocb; +- u64 addr; +- u64 len; +-}; +- +-struct io_connect { +- struct file *file; +- struct sockaddr __user *addr; +- int addr_len; +-}; +- +-struct io_sr_msg { +- struct file *file; +- union { +- struct compat_msghdr __user *umsg_compat; +- struct user_msghdr __user *umsg; +- void __user *buf; +- }; +- int msg_flags; +- int bgid; +- size_t len; +- struct io_buffer *kbuf; +-}; +- +-struct io_open { +- struct file *file; +- int dfd; +- u32 file_slot; +- struct filename *filename; +- struct open_how how; +- unsigned long nofile; +-}; +- +-struct io_rsrc_update { +- struct file *file; +- u64 arg; +- u32 nr_args; +- u32 offset; +-}; +- +-struct io_fadvise { +- struct file *file; +- u64 offset; +- u32 len; +- u32 advice; +-}; +- +-struct io_madvise { +- struct file *file; +- u64 addr; +- u32 len; +- u32 advice; +-}; +- +-struct io_epoll { +- struct file *file; +- int epfd; +- int op; +- int fd; +- struct epoll_event event; +-}; +- +-struct io_splice { +- struct file *file_out; +- struct file *file_in; +- loff_t off_out; +- loff_t off_in; +- u64 len; +- unsigned int flags; +-}; +- +-struct io_provide_buf { +- struct file *file; +- __u64 addr; +- __u32 len; +- __u32 bgid; +- __u16 nbufs; +- __u16 bid; +-}; +- +-struct io_statx { +- struct file *file; +- int dfd; +- unsigned int mask; +- unsigned int flags; +- const char __user *filename; +- struct statx __user *buffer; +-}; +- +-struct io_shutdown { +- struct file *file; +- int how; +-}; +- +-struct io_rename { +- struct file *file; +- int old_dfd; +- int new_dfd; +- struct filename *oldpath; +- struct filename *newpath; +- int flags; +-}; +- +-struct io_unlink { +- struct file *file; +- int dfd; +- int flags; +- struct filename *filename; +-}; +- +-struct io_mkdir { +- struct file *file; +- int dfd; +- umode_t mode; +- struct filename *filename; +-}; +- +-struct io_symlink { +- struct file *file; +- int new_dfd; +- struct filename *oldpath; +- struct filename *newpath; +-}; +- +-struct io_hardlink { +- struct file *file; +- int old_dfd; +- int new_dfd; +- struct filename *oldpath; +- struct filename *newpath; +- int flags; +-}; +- +-struct io_completion { +- struct file *file; +- u32 cflags; +-}; +- +-struct io_async_connect { +- struct sockaddr_storage address; +-}; +- +-struct io_async_msghdr { +- struct iovec fast_iov[UIO_FASTIOV]; +- /* points to an allocated iov, if NULL we use fast_iov instead */ +- struct iovec *free_iov; +- struct sockaddr __user *uaddr; +- struct msghdr msg; +- struct sockaddr_storage addr; +-}; +- +-struct io_async_rw { +- struct iovec fast_iov[UIO_FASTIOV]; +- const struct iovec *free_iovec; +- struct iov_iter iter; +- struct iov_iter_state iter_state; +- size_t bytes_done; +- struct wait_page_queue wpq; +-}; +- +-enum { +- REQ_F_FIXED_FILE_BIT = IOSQE_FIXED_FILE_BIT, +- REQ_F_IO_DRAIN_BIT = IOSQE_IO_DRAIN_BIT, +- REQ_F_LINK_BIT = IOSQE_IO_LINK_BIT, +- REQ_F_HARDLINK_BIT = IOSQE_IO_HARDLINK_BIT, +- REQ_F_FORCE_ASYNC_BIT = IOSQE_ASYNC_BIT, +- REQ_F_BUFFER_SELECT_BIT = IOSQE_BUFFER_SELECT_BIT, +- +- /* first byte is taken by user flags, shift it to not overlap */ +- REQ_F_FAIL_BIT = 8, +- REQ_F_INFLIGHT_BIT, +- REQ_F_CUR_POS_BIT, +- REQ_F_NOWAIT_BIT, +- REQ_F_LINK_TIMEOUT_BIT, +- REQ_F_NEED_CLEANUP_BIT, +- REQ_F_POLLED_BIT, +- REQ_F_BUFFER_SELECTED_BIT, +- REQ_F_COMPLETE_INLINE_BIT, +- REQ_F_REISSUE_BIT, +- REQ_F_CREDS_BIT, +- REQ_F_REFCOUNT_BIT, +- REQ_F_ARM_LTIMEOUT_BIT, +- /* keep async read/write and isreg together and in order */ +- REQ_F_NOWAIT_READ_BIT, +- REQ_F_NOWAIT_WRITE_BIT, +- REQ_F_ISREG_BIT, +- +- /* not a real bit, just to check we're not overflowing the space */ +- __REQ_F_LAST_BIT, +-}; +- +-enum { +- /* ctx owns file */ +- REQ_F_FIXED_FILE = BIT(REQ_F_FIXED_FILE_BIT), +- /* drain existing IO first */ +- REQ_F_IO_DRAIN = BIT(REQ_F_IO_DRAIN_BIT), +- /* linked sqes */ +- REQ_F_LINK = BIT(REQ_F_LINK_BIT), +- /* doesn't sever on completion < 0 */ +- REQ_F_HARDLINK = BIT(REQ_F_HARDLINK_BIT), +- /* IOSQE_ASYNC */ +- REQ_F_FORCE_ASYNC = BIT(REQ_F_FORCE_ASYNC_BIT), +- /* IOSQE_BUFFER_SELECT */ +- REQ_F_BUFFER_SELECT = BIT(REQ_F_BUFFER_SELECT_BIT), +- +- /* fail rest of links */ +- REQ_F_FAIL = BIT(REQ_F_FAIL_BIT), +- /* on inflight list, should be cancelled and waited on exit reliably */ +- REQ_F_INFLIGHT = BIT(REQ_F_INFLIGHT_BIT), +- /* read/write uses file position */ +- REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT), +- /* must not punt to workers */ +- REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT), +- /* has or had linked timeout */ +- REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT), +- /* needs cleanup */ +- REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT), +- /* already went through poll handler */ +- REQ_F_POLLED = BIT(REQ_F_POLLED_BIT), +- /* buffer already selected */ +- REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT), +- /* completion is deferred through io_comp_state */ +- REQ_F_COMPLETE_INLINE = BIT(REQ_F_COMPLETE_INLINE_BIT), +- /* caller should reissue async */ +- REQ_F_REISSUE = BIT(REQ_F_REISSUE_BIT), +- /* supports async reads */ +- REQ_F_NOWAIT_READ = BIT(REQ_F_NOWAIT_READ_BIT), +- /* supports async writes */ +- REQ_F_NOWAIT_WRITE = BIT(REQ_F_NOWAIT_WRITE_BIT), +- /* regular file */ +- REQ_F_ISREG = BIT(REQ_F_ISREG_BIT), +- /* has creds assigned */ +- REQ_F_CREDS = BIT(REQ_F_CREDS_BIT), +- /* skip refcounting if not set */ +- REQ_F_REFCOUNT = BIT(REQ_F_REFCOUNT_BIT), +- /* there is a linked timeout that has to be armed */ +- REQ_F_ARM_LTIMEOUT = BIT(REQ_F_ARM_LTIMEOUT_BIT), +-}; +- +-struct async_poll { +- struct io_poll_iocb poll; +- struct io_poll_iocb *double_poll; +-}; +- +-typedef void (*io_req_tw_func_t)(struct io_kiocb *req, bool *locked); +- +-struct io_task_work { +- union { +- struct io_wq_work_node node; +- struct llist_node fallback_node; +- }; +- io_req_tw_func_t func; +-}; +- +-enum { +- IORING_RSRC_FILE = 0, +- IORING_RSRC_BUFFER = 1, +-}; +- +-/* +- * NOTE! Each of the iocb union members has the file pointer +- * as the first entry in their struct definition. So you can +- * access the file pointer through any of the sub-structs, +- * or directly as just 'ki_filp' in this struct. +- */ +-struct io_kiocb { +- union { +- struct file *file; +- struct io_rw rw; +- struct io_poll_iocb poll; +- struct io_poll_update poll_update; +- struct io_accept accept; +- struct io_sync sync; +- struct io_cancel cancel; +- struct io_timeout timeout; +- struct io_timeout_rem timeout_rem; +- struct io_connect connect; +- struct io_sr_msg sr_msg; +- struct io_open open; +- struct io_close close; +- struct io_rsrc_update rsrc_update; +- struct io_fadvise fadvise; +- struct io_madvise madvise; +- struct io_epoll epoll; +- struct io_splice splice; +- struct io_provide_buf pbuf; +- struct io_statx statx; +- struct io_shutdown shutdown; +- struct io_rename rename; +- struct io_unlink unlink; +- struct io_mkdir mkdir; +- struct io_symlink symlink; +- struct io_hardlink hardlink; +- /* use only after cleaning per-op data, see io_clean_op() */ +- struct io_completion compl; +- }; +- +- /* opcode allocated if it needs to store data for async defer */ +- void *async_data; +- u8 opcode; +- /* polled IO has completed */ +- u8 iopoll_completed; +- +- u16 buf_index; +- u32 result; +- +- struct io_ring_ctx *ctx; +- unsigned int flags; +- atomic_t refs; +- struct task_struct *task; +- u64 user_data; +- +- struct io_kiocb *link; +- struct percpu_ref *fixed_rsrc_refs; +- +- /* used with ctx->iopoll_list with reads/writes */ +- struct list_head inflight_entry; +- struct io_task_work io_task_work; +- /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */ +- struct hlist_node hash_node; +- struct async_poll *apoll; +- struct io_wq_work work; +- const struct cred *creds; +- +- /* store used ubuf, so we can prevent reloading */ +- struct io_mapped_ubuf *imu; +-}; +- +-struct io_tctx_node { +- struct list_head ctx_node; +- struct task_struct *task; +- struct io_ring_ctx *ctx; +-}; +- +-struct io_defer_entry { +- struct list_head list; +- struct io_kiocb *req; +- u32 seq; +-}; +- +-struct io_op_def { +- /* needs req->file assigned */ +- unsigned needs_file : 1; +- /* hash wq insertion if file is a regular file */ +- unsigned hash_reg_file : 1; +- /* unbound wq insertion if file is a non-regular file */ +- unsigned unbound_nonreg_file : 1; +- /* opcode is not supported by this kernel */ +- unsigned not_supported : 1; +- /* set if opcode supports polled "wait" */ +- unsigned pollin : 1; +- unsigned pollout : 1; +- /* op supports buffer selection */ +- unsigned buffer_select : 1; +- /* do prep async if is going to be punted */ +- unsigned needs_async_setup : 1; +- /* should block plug */ +- unsigned plug : 1; +- /* size of async data needed, if any */ +- unsigned short async_size; +-}; +- +-static const struct io_op_def io_op_defs[] = { +- [IORING_OP_NOP] = {}, +- [IORING_OP_READV] = { +- .needs_file = 1, +- .unbound_nonreg_file = 1, +- .pollin = 1, +- .buffer_select = 1, +- .needs_async_setup = 1, +- .plug = 1, +- .async_size = sizeof(struct io_async_rw), +- }, +- [IORING_OP_WRITEV] = { +- .needs_file = 1, +- .hash_reg_file = 1, +- .unbound_nonreg_file = 1, +- .pollout = 1, +- .needs_async_setup = 1, +- .plug = 1, +- .async_size = sizeof(struct io_async_rw), +- }, +- [IORING_OP_FSYNC] = { +- .needs_file = 1, +- }, +- [IORING_OP_READ_FIXED] = { +- .needs_file = 1, +- .unbound_nonreg_file = 1, +- .pollin = 1, +- .plug = 1, +- .async_size = sizeof(struct io_async_rw), +- }, +- [IORING_OP_WRITE_FIXED] = { +- .needs_file = 1, +- .hash_reg_file = 1, +- .unbound_nonreg_file = 1, +- .pollout = 1, +- .plug = 1, +- .async_size = sizeof(struct io_async_rw), +- }, +- [IORING_OP_POLL_ADD] = { +- .needs_file = 1, +- .unbound_nonreg_file = 1, +- }, +- [IORING_OP_POLL_REMOVE] = {}, +- [IORING_OP_SYNC_FILE_RANGE] = { +- .needs_file = 1, +- }, +- [IORING_OP_SENDMSG] = { +- .needs_file = 1, +- .unbound_nonreg_file = 1, +- .pollout = 1, +- .needs_async_setup = 1, +- .async_size = sizeof(struct io_async_msghdr), +- }, +- [IORING_OP_RECVMSG] = { +- .needs_file = 1, +- .unbound_nonreg_file = 1, +- .pollin = 1, +- .buffer_select = 1, +- .needs_async_setup = 1, +- .async_size = sizeof(struct io_async_msghdr), +- }, +- [IORING_OP_TIMEOUT] = { +- .async_size = sizeof(struct io_timeout_data), +- }, +- [IORING_OP_TIMEOUT_REMOVE] = { +- /* used by timeout updates' prep() */ +- }, +- [IORING_OP_ACCEPT] = { +- .needs_file = 1, +- .unbound_nonreg_file = 1, +- .pollin = 1, +- }, +- [IORING_OP_ASYNC_CANCEL] = {}, +- [IORING_OP_LINK_TIMEOUT] = { +- .async_size = sizeof(struct io_timeout_data), +- }, +- [IORING_OP_CONNECT] = { +- .needs_file = 1, +- .unbound_nonreg_file = 1, +- .pollout = 1, +- .needs_async_setup = 1, +- .async_size = sizeof(struct io_async_connect), +- }, +- [IORING_OP_FALLOCATE] = { +- .needs_file = 1, +- }, +- [IORING_OP_OPENAT] = {}, +- [IORING_OP_CLOSE] = {}, +- [IORING_OP_FILES_UPDATE] = {}, +- [IORING_OP_STATX] = {}, +- [IORING_OP_READ] = { +- .needs_file = 1, +- .unbound_nonreg_file = 1, +- .pollin = 1, +- .buffer_select = 1, +- .plug = 1, +- .async_size = sizeof(struct io_async_rw), +- }, +- [IORING_OP_WRITE] = { +- .needs_file = 1, +- .hash_reg_file = 1, +- .unbound_nonreg_file = 1, +- .pollout = 1, +- .plug = 1, +- .async_size = sizeof(struct io_async_rw), +- }, +- [IORING_OP_FADVISE] = { +- .needs_file = 1, +- }, +- [IORING_OP_MADVISE] = {}, +- [IORING_OP_SEND] = { +- .needs_file = 1, +- .unbound_nonreg_file = 1, +- .pollout = 1, +- }, +- [IORING_OP_RECV] = { +- .needs_file = 1, +- .unbound_nonreg_file = 1, +- .pollin = 1, +- .buffer_select = 1, +- }, +- [IORING_OP_OPENAT2] = { +- }, +- [IORING_OP_EPOLL_CTL] = { +- .unbound_nonreg_file = 1, +- }, +- [IORING_OP_SPLICE] = { +- .needs_file = 1, +- .hash_reg_file = 1, +- .unbound_nonreg_file = 1, +- }, +- [IORING_OP_PROVIDE_BUFFERS] = {}, +- [IORING_OP_REMOVE_BUFFERS] = {}, +- [IORING_OP_TEE] = { +- .needs_file = 1, +- .hash_reg_file = 1, +- .unbound_nonreg_file = 1, +- }, +- [IORING_OP_SHUTDOWN] = { +- .needs_file = 1, +- }, +- [IORING_OP_RENAMEAT] = {}, +- [IORING_OP_UNLINKAT] = {}, +- [IORING_OP_MKDIRAT] = {}, +- [IORING_OP_SYMLINKAT] = {}, +- [IORING_OP_LINKAT] = {}, +-}; +- +-/* requests with any of those set should undergo io_disarm_next() */ +-#define IO_DISARM_MASK (REQ_F_ARM_LTIMEOUT | REQ_F_LINK_TIMEOUT | REQ_F_FAIL) +- +-static bool io_disarm_next(struct io_kiocb *req); +-static void io_uring_del_tctx_node(unsigned long index); +-static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx, +- struct task_struct *task, +- bool cancel_all); +-static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd); +- +-static bool io_cqring_fill_event(struct io_ring_ctx *ctx, u64 user_data, +- long res, unsigned int cflags); +-static void io_put_req(struct io_kiocb *req); +-static void io_put_req_deferred(struct io_kiocb *req); +-static void io_dismantle_req(struct io_kiocb *req); +-static void io_queue_linked_timeout(struct io_kiocb *req); +-static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type, +- struct io_uring_rsrc_update2 *up, +- unsigned nr_args); +-static void io_clean_op(struct io_kiocb *req); +-static struct file *io_file_get(struct io_ring_ctx *ctx, +- struct io_kiocb *req, int fd, bool fixed); +-static void __io_queue_sqe(struct io_kiocb *req); +-static void io_rsrc_put_work(struct work_struct *work); +- +-static void io_req_task_queue(struct io_kiocb *req); +-static void io_submit_flush_completions(struct io_ring_ctx *ctx); +-static int io_req_prep_async(struct io_kiocb *req); +- +-static int io_install_fixed_file(struct io_kiocb *req, struct file *file, +- unsigned int issue_flags, u32 slot_index); +-static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags); +- +-static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer); +- +-static struct kmem_cache *req_cachep; +- +-static const struct file_operations io_uring_fops; +- +-struct sock *io_uring_get_socket(struct file *file) +-{ +-#if defined(CONFIG_UNIX) +- if (file->f_op == &io_uring_fops) { +- struct io_ring_ctx *ctx = file->private_data; +- +- return ctx->ring_sock->sk; - } - } - - static void io_req_cache_free(struct list_head *list) -@@ -9251,11 +9310,6 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx) - { - io_sq_thread_finish(ctx); - -- if (ctx->mm_account) { -- mmdrop(ctx->mm_account); -- ctx->mm_account = NULL; +-#endif +- return NULL; +-} +-EXPORT_SYMBOL(io_uring_get_socket); +- +-static inline void io_tw_lock(struct io_ring_ctx *ctx, bool *locked) +-{ +- if (!*locked) { +- mutex_lock(&ctx->uring_lock); +- *locked = true; - } +-} - - /* __io_rsrc_put_work() may need uring_lock to progress, wait w/o it */ - io_wait_rsrc_data(ctx->buf_data); - io_wait_rsrc_data(ctx->file_data); -@@ -9291,6 +9345,11 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx) - #endif - WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list)); - -+ if (ctx->mm_account) { -+ mmdrop(ctx->mm_account); -+ ctx->mm_account = NULL; -+ } -+ - io_mem_free(ctx->rings); - io_mem_free(ctx->sq_sqes); - -@@ -9517,19 +9576,8 @@ static bool io_cancel_task_cb(struct io_wq_work *work, void *data) - { - struct io_kiocb *req = container_of(work, struct io_kiocb, work); - struct io_task_cancel *cancel = data; -- bool ret; +-#define io_for_each_link(pos, head) \ +- for (pos = (head); pos; pos = pos->link) - -- if (!cancel->all && (req->flags & REQ_F_LINK_TIMEOUT)) { +-/* +- * Shamelessly stolen from the mm implementation of page reference checking, +- * see commit f958d7b528b1 for details. +- */ +-#define req_ref_zero_or_close_to_overflow(req) \ +- ((unsigned int) atomic_read(&(req->refs)) + 127u <= 127u) +- +-static inline bool req_ref_inc_not_zero(struct io_kiocb *req) +-{ +- WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT)); +- return atomic_inc_not_zero(&req->refs); +-} +- +-static inline bool req_ref_put_and_test(struct io_kiocb *req) +-{ +- if (likely(!(req->flags & REQ_F_REFCOUNT))) +- return true; +- +- WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req)); +- return atomic_dec_and_test(&req->refs); +-} +- +-static inline void req_ref_put(struct io_kiocb *req) +-{ +- WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT)); +- WARN_ON_ONCE(req_ref_put_and_test(req)); +-} +- +-static inline void req_ref_get(struct io_kiocb *req) +-{ +- WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT)); +- WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req)); +- atomic_inc(&req->refs); +-} +- +-static inline void __io_req_set_refcount(struct io_kiocb *req, int nr) +-{ +- if (!(req->flags & REQ_F_REFCOUNT)) { +- req->flags |= REQ_F_REFCOUNT; +- atomic_set(&req->refs, nr); +- } +-} +- +-static inline void io_req_set_refcount(struct io_kiocb *req) +-{ +- __io_req_set_refcount(req, 1); +-} +- +-static inline void io_req_set_rsrc_node(struct io_kiocb *req) +-{ +- struct io_ring_ctx *ctx = req->ctx; +- +- if (!req->fixed_rsrc_refs) { +- req->fixed_rsrc_refs = &ctx->rsrc_node->refs; +- percpu_ref_get(req->fixed_rsrc_refs); +- } +-} +- +-static void io_refs_resurrect(struct percpu_ref *ref, struct completion *compl) +-{ +- bool got = percpu_ref_tryget(ref); +- +- /* already at zero, wait for ->release() */ +- if (!got) +- wait_for_completion(compl); +- percpu_ref_resurrect(ref); +- if (got) +- percpu_ref_put(ref); +-} +- +-static bool io_match_task(struct io_kiocb *head, struct task_struct *task, +- bool cancel_all) +-{ +- struct io_kiocb *req; +- +- if (task && head->task != task) +- return false; +- if (cancel_all) +- return true; +- +- io_for_each_link(req, head) { +- if (req->flags & REQ_F_INFLIGHT) +- return true; +- } +- return false; +-} +- +-static inline void req_set_fail(struct io_kiocb *req) +-{ +- req->flags |= REQ_F_FAIL; +-} +- +-static inline void req_fail_link_node(struct io_kiocb *req, int res) +-{ +- req_set_fail(req); +- req->result = res; +-} +- +-static void io_ring_ctx_ref_free(struct percpu_ref *ref) +-{ +- struct io_ring_ctx *ctx = container_of(ref, struct io_ring_ctx, refs); +- +- complete(&ctx->ref_comp); +-} +- +-static inline bool io_is_timeout_noseq(struct io_kiocb *req) +-{ +- return !req->timeout.off; +-} +- +-static void io_fallback_req_func(struct work_struct *work) +-{ +- struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx, +- fallback_work.work); +- struct llist_node *node = llist_del_all(&ctx->fallback_llist); +- struct io_kiocb *req, *tmp; +- bool locked = false; +- +- percpu_ref_get(&ctx->refs); +- llist_for_each_entry_safe(req, tmp, node, io_task_work.fallback_node) +- req->io_task_work.func(req, &locked); +- +- if (locked) { +- if (ctx->submit_state.compl_nr) +- io_submit_flush_completions(ctx); +- mutex_unlock(&ctx->uring_lock); +- } +- percpu_ref_put(&ctx->refs); +- +-} +- +-static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) +-{ +- struct io_ring_ctx *ctx; +- int hash_bits; +- +- ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); +- if (!ctx) +- return NULL; +- +- /* +- * Use 5 bits less than the max cq entries, that should give us around +- * 32 entries per hash list if totally full and uniformly spread. +- */ +- hash_bits = ilog2(p->cq_entries); +- hash_bits -= 5; +- if (hash_bits <= 0) +- hash_bits = 1; +- ctx->cancel_hash_bits = hash_bits; +- ctx->cancel_hash = kmalloc((1U << hash_bits) * sizeof(struct hlist_head), +- GFP_KERNEL); +- if (!ctx->cancel_hash) +- goto err; +- __hash_init(ctx->cancel_hash, 1U << hash_bits); +- +- ctx->dummy_ubuf = kzalloc(sizeof(*ctx->dummy_ubuf), GFP_KERNEL); +- if (!ctx->dummy_ubuf) +- goto err; +- /* set invalid range, so io_import_fixed() fails meeting it */ +- ctx->dummy_ubuf->ubuf = -1UL; +- +- if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free, +- PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) +- goto err; +- +- ctx->flags = p->flags; +- init_waitqueue_head(&ctx->sqo_sq_wait); +- INIT_LIST_HEAD(&ctx->sqd_list); +- init_waitqueue_head(&ctx->poll_wait); +- INIT_LIST_HEAD(&ctx->cq_overflow_list); +- init_completion(&ctx->ref_comp); +- xa_init_flags(&ctx->io_buffers, XA_FLAGS_ALLOC1); +- xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1); +- mutex_init(&ctx->uring_lock); +- init_waitqueue_head(&ctx->cq_wait); +- spin_lock_init(&ctx->completion_lock); +- spin_lock_init(&ctx->timeout_lock); +- INIT_LIST_HEAD(&ctx->iopoll_list); +- INIT_LIST_HEAD(&ctx->defer_list); +- INIT_LIST_HEAD(&ctx->timeout_list); +- INIT_LIST_HEAD(&ctx->ltimeout_list); +- spin_lock_init(&ctx->rsrc_ref_lock); +- INIT_LIST_HEAD(&ctx->rsrc_ref_list); +- INIT_DELAYED_WORK(&ctx->rsrc_put_work, io_rsrc_put_work); +- init_llist_head(&ctx->rsrc_put_llist); +- INIT_LIST_HEAD(&ctx->tctx_list); +- INIT_LIST_HEAD(&ctx->submit_state.free_list); +- INIT_LIST_HEAD(&ctx->locked_free_list); +- INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func); +- return ctx; +-err: +- kfree(ctx->dummy_ubuf); +- kfree(ctx->cancel_hash); +- kfree(ctx); +- return NULL; +-} +- +-static void io_account_cq_overflow(struct io_ring_ctx *ctx) +-{ +- struct io_rings *r = ctx->rings; +- +- WRITE_ONCE(r->cq_overflow, READ_ONCE(r->cq_overflow) + 1); +- ctx->cq_extra--; +-} +- +-static bool req_need_defer(struct io_kiocb *req, u32 seq) +-{ +- if (unlikely(req->flags & REQ_F_IO_DRAIN)) { - struct io_ring_ctx *ctx = req->ctx; - -- /* protect against races with linked timeouts */ +- +- return seq + READ_ONCE(ctx->cq_extra) != ctx->cached_cq_tail; +- } +- +- return false; +-} +- +-#define FFS_ASYNC_READ 0x1UL +-#define FFS_ASYNC_WRITE 0x2UL +-#ifdef CONFIG_64BIT +-#define FFS_ISREG 0x4UL +-#else +-#define FFS_ISREG 0x0UL +-#endif +-#define FFS_MASK ~(FFS_ASYNC_READ|FFS_ASYNC_WRITE|FFS_ISREG) +- +-static inline bool io_req_ffs_set(struct io_kiocb *req) +-{ +- return IS_ENABLED(CONFIG_64BIT) && (req->flags & REQ_F_FIXED_FILE); +-} +- +-static void io_req_track_inflight(struct io_kiocb *req) +-{ +- if (!(req->flags & REQ_F_INFLIGHT)) { +- req->flags |= REQ_F_INFLIGHT; +- atomic_inc(¤t->io_uring->inflight_tracked); +- } +-} +- +-static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req) +-{ +- if (WARN_ON_ONCE(!req->link)) +- return NULL; +- +- req->flags &= ~REQ_F_ARM_LTIMEOUT; +- req->flags |= REQ_F_LINK_TIMEOUT; +- +- /* linked timeouts should have two refs once prep'ed */ +- io_req_set_refcount(req); +- __io_req_set_refcount(req->link, 2); +- return req->link; +-} +- +-static inline struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req) +-{ +- if (likely(!(req->flags & REQ_F_ARM_LTIMEOUT))) +- return NULL; +- return __io_prep_linked_timeout(req); +-} +- +-static void io_prep_async_work(struct io_kiocb *req) +-{ +- const struct io_op_def *def = &io_op_defs[req->opcode]; +- struct io_ring_ctx *ctx = req->ctx; +- +- if (!(req->flags & REQ_F_CREDS)) { +- req->flags |= REQ_F_CREDS; +- req->creds = get_current_cred(); +- } +- +- req->work.list.next = NULL; +- req->work.flags = 0; +- if (req->flags & REQ_F_FORCE_ASYNC) +- req->work.flags |= IO_WQ_WORK_CONCURRENT; +- +- if (req->flags & REQ_F_ISREG) { +- if (def->hash_reg_file || (ctx->flags & IORING_SETUP_IOPOLL)) +- io_wq_hash_work(&req->work, file_inode(req->file)); +- } else if (!req->file || !S_ISBLK(file_inode(req->file)->i_mode)) { +- if (def->unbound_nonreg_file) +- req->work.flags |= IO_WQ_WORK_UNBOUND; +- } +- +- switch (req->opcode) { +- case IORING_OP_SPLICE: +- case IORING_OP_TEE: +- if (!S_ISREG(file_inode(req->splice.file_in)->i_mode)) +- req->work.flags |= IO_WQ_WORK_UNBOUND; +- break; +- } +-} +- +-static void io_prep_async_link(struct io_kiocb *req) +-{ +- struct io_kiocb *cur; +- +- if (req->flags & REQ_F_LINK_TIMEOUT) { +- struct io_ring_ctx *ctx = req->ctx; +- - spin_lock(&ctx->completion_lock); -- ret = io_match_task(req, cancel->task, cancel->all); +- io_for_each_link(cur, req) +- io_prep_async_work(cur); - spin_unlock(&ctx->completion_lock); - } else { -- ret = io_match_task(req, cancel->task, cancel->all); +- io_for_each_link(cur, req) +- io_prep_async_work(cur); +- } +-} +- +-static void io_queue_async_work(struct io_kiocb *req, bool *locked) +-{ +- struct io_ring_ctx *ctx = req->ctx; +- struct io_kiocb *link = io_prep_linked_timeout(req); +- struct io_uring_task *tctx = req->task->io_uring; +- +- /* must not take the lock, NULL it as a precaution */ +- locked = NULL; +- +- BUG_ON(!tctx); +- BUG_ON(!tctx->io_wq); +- +- /* init ->work of the whole link before punting */ +- io_prep_async_link(req); +- +- /* +- * Not expected to happen, but if we do have a bug where this _can_ +- * happen, catch it here and ensure the request is marked as +- * canceled. That will make io-wq go through the usual work cancel +- * procedure rather than attempt to run this request (or create a new +- * worker for it). +- */ +- if (WARN_ON_ONCE(!same_thread_group(req->task, current))) +- req->work.flags |= IO_WQ_WORK_CANCEL; +- +- trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req, +- &req->work, req->flags); +- io_wq_enqueue(tctx->io_wq, &req->work); +- if (link) +- io_queue_linked_timeout(link); +-} +- +-static void io_kill_timeout(struct io_kiocb *req, int status) +- __must_hold(&req->ctx->completion_lock) +- __must_hold(&req->ctx->timeout_lock) +-{ +- struct io_timeout_data *io = req->async_data; +- +- if (hrtimer_try_to_cancel(&io->timer) != -1) { +- if (status) +- req_set_fail(req); +- atomic_set(&req->ctx->cq_timeouts, +- atomic_read(&req->ctx->cq_timeouts) + 1); +- list_del_init(&req->timeout.list); +- io_cqring_fill_event(req->ctx, req->user_data, status, 0); +- io_put_req_deferred(req); +- } +-} +- +-static void io_queue_deferred(struct io_ring_ctx *ctx) +-{ +- while (!list_empty(&ctx->defer_list)) { +- struct io_defer_entry *de = list_first_entry(&ctx->defer_list, +- struct io_defer_entry, list); +- +- if (req_need_defer(de->req, de->seq)) +- break; +- list_del_init(&de->list); +- io_req_task_queue(de->req); +- kfree(de); +- } +-} +- +-static void io_flush_timeouts(struct io_ring_ctx *ctx) +- __must_hold(&ctx->completion_lock) +-{ +- u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); +- +- spin_lock_irq(&ctx->timeout_lock); +- while (!list_empty(&ctx->timeout_list)) { +- u32 events_needed, events_got; +- struct io_kiocb *req = list_first_entry(&ctx->timeout_list, +- struct io_kiocb, timeout.list); +- +- if (io_is_timeout_noseq(req)) +- break; +- +- /* +- * Since seq can easily wrap around over time, subtract +- * the last seq at which timeouts were flushed before comparing. +- * Assuming not more than 2^31-1 events have happened since, +- * these subtractions won't have wrapped, so we can check if +- * target is in [last_seq, current_seq] by comparing the two. +- */ +- events_needed = req->timeout.target_seq - ctx->cq_last_tm_flush; +- events_got = seq - ctx->cq_last_tm_flush; +- if (events_got < events_needed) +- break; +- +- list_del_init(&req->timeout.list); +- io_kill_timeout(req, 0); +- } +- ctx->cq_last_tm_flush = seq; +- spin_unlock_irq(&ctx->timeout_lock); +-} +- +-static void __io_commit_cqring_flush(struct io_ring_ctx *ctx) +-{ +- if (ctx->off_timeout_used) +- io_flush_timeouts(ctx); +- if (ctx->drain_active) +- io_queue_deferred(ctx); +-} +- +-static inline void io_commit_cqring(struct io_ring_ctx *ctx) +-{ +- if (unlikely(ctx->off_timeout_used || ctx->drain_active)) +- __io_commit_cqring_flush(ctx); +- /* order cqe stores with ring update */ +- smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail); +-} +- +-static inline bool io_sqring_full(struct io_ring_ctx *ctx) +-{ +- struct io_rings *r = ctx->rings; +- +- return READ_ONCE(r->sq.tail) - ctx->cached_sq_head == ctx->sq_entries; +-} +- +-static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx) +-{ +- return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head); +-} +- +-static inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx) +-{ +- struct io_rings *rings = ctx->rings; +- unsigned tail, mask = ctx->cq_entries - 1; +- +- /* +- * writes to the cq entry need to come after reading head; the +- * control dependency is enough as we're using WRITE_ONCE to +- * fill the cq entry +- */ +- if (__io_cqring_events(ctx) == ctx->cq_entries) +- return NULL; +- +- tail = ctx->cached_cq_tail++; +- return &rings->cqes[tail & mask]; +-} +- +-static inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx) +-{ +- if (likely(!ctx->cq_ev_fd)) +- return false; +- if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED) +- return false; +- return !ctx->eventfd_async || io_wq_current_is_worker(); +-} +- +-/* +- * This should only get called when at least one event has been posted. +- * Some applications rely on the eventfd notification count only changing +- * IFF a new CQE has been added to the CQ ring. There's no depedency on +- * 1:1 relationship between how many times this function is called (and +- * hence the eventfd count) and number of CQEs posted to the CQ ring. +- */ +-static void io_cqring_ev_posted(struct io_ring_ctx *ctx) +-{ +- /* +- * wake_up_all() may seem excessive, but io_wake_function() and +- * io_should_wake() handle the termination of the loop and only +- * wake as many waiters as we need to. +- */ +- if (wq_has_sleeper(&ctx->cq_wait)) +- wake_up_all(&ctx->cq_wait); +- if (ctx->sq_data && waitqueue_active(&ctx->sq_data->wait)) +- wake_up(&ctx->sq_data->wait); +- if (io_should_trigger_evfd(ctx)) +- eventfd_signal(ctx->cq_ev_fd, 1); +- if (waitqueue_active(&ctx->poll_wait)) +- wake_up_interruptible(&ctx->poll_wait); +-} +- +-static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx) +-{ +- /* see waitqueue_active() comment */ +- smp_mb(); +- +- if (ctx->flags & IORING_SETUP_SQPOLL) { +- if (waitqueue_active(&ctx->cq_wait)) +- wake_up_all(&ctx->cq_wait); +- } +- if (io_should_trigger_evfd(ctx)) +- eventfd_signal(ctx->cq_ev_fd, 1); +- if (waitqueue_active(&ctx->poll_wait)) +- wake_up_interruptible(&ctx->poll_wait); +-} +- +-/* Returns true if there are no backlogged entries after the flush */ +-static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force) +-{ +- bool all_flushed, posted; +- +- if (!force && __io_cqring_events(ctx) == ctx->cq_entries) +- return false; +- +- posted = false; +- spin_lock(&ctx->completion_lock); +- while (!list_empty(&ctx->cq_overflow_list)) { +- struct io_uring_cqe *cqe = io_get_cqe(ctx); +- struct io_overflow_cqe *ocqe; +- +- if (!cqe && !force) +- break; +- ocqe = list_first_entry(&ctx->cq_overflow_list, +- struct io_overflow_cqe, list); +- if (cqe) +- memcpy(cqe, &ocqe->cqe, sizeof(*cqe)); +- else +- io_account_cq_overflow(ctx); +- +- posted = true; +- list_del(&ocqe->list); +- kfree(ocqe); - } +- +- all_flushed = list_empty(&ctx->cq_overflow_list); +- if (all_flushed) { +- clear_bit(0, &ctx->check_cq_overflow); +- WRITE_ONCE(ctx->rings->sq_flags, +- ctx->rings->sq_flags & ~IORING_SQ_CQ_OVERFLOW); +- } +- +- if (posted) +- io_commit_cqring(ctx); +- spin_unlock(&ctx->completion_lock); +- if (posted) +- io_cqring_ev_posted(ctx); +- return all_flushed; +-} +- +-static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx) +-{ +- bool ret = true; +- +- if (test_bit(0, &ctx->check_cq_overflow)) { +- /* iopoll syncs against uring_lock, not completion_lock */ +- if (ctx->flags & IORING_SETUP_IOPOLL) +- mutex_lock(&ctx->uring_lock); +- ret = __io_cqring_overflow_flush(ctx, false); +- if (ctx->flags & IORING_SETUP_IOPOLL) +- mutex_unlock(&ctx->uring_lock); +- } +- - return ret; -+ return io_match_task_safe(req, cancel->task, cancel->all); - } - - static bool io_cancel_defer_files(struct io_ring_ctx *ctx, -@@ -9540,7 +9588,7 @@ static bool io_cancel_defer_files(struct io_ring_ctx *ctx, - - spin_lock(&ctx->completion_lock); - list_for_each_entry_reverse(de, &ctx->defer_list, list) { -- if (io_match_task(de->req, task, cancel_all)) { -+ if (io_match_task_safe(de->req, task, cancel_all)) { - list_cut_position(&list, &ctx->defer_list, &de->list); - break; - } -@@ -9733,21 +9781,9 @@ static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked) - return percpu_counter_sum(&tctx->inflight); - } - --static void io_uring_drop_tctx_refs(struct task_struct *task) +-} +- +-/* must to be called somewhat shortly after putting a request */ +-static inline void io_put_task(struct task_struct *task, int nr) -{ - struct io_uring_task *tctx = task->io_uring; -- unsigned int refs = tctx->cached_refs; - -- if (refs) { -- tctx->cached_refs = 0; -- percpu_counter_sub(&tctx->inflight, refs); -- put_task_struct_many(task, refs); +- if (likely(task == current)) { +- tctx->cached_refs += nr; +- } else { +- percpu_counter_sub(&tctx->inflight, nr); +- if (unlikely(atomic_read(&tctx->in_idle))) +- wake_up(&tctx->wait); +- put_task_struct_many(task, nr); - } -} - - /* - * Find any io_uring ctx that this task has registered or done IO on, and cancel -- * requests. @sqd should be not-null IIF it's an SQPOLL thread cancellation. -+ * requests. @sqd should be not-null IFF it's an SQPOLL thread cancellation. - */ - static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd) - { -@@ -9788,8 +9824,10 @@ static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd) - cancel_all); - } - -- prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE); -+ prepare_to_wait(&tctx->wait, &wait, TASK_INTERRUPTIBLE); -+ io_run_task_work(); - io_uring_drop_tctx_refs(current); -+ - /* - * If we've seen completions, retry without waiting. This - * avoids a race where a completion comes in before we did -@@ -9799,10 +9837,14 @@ static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd) - schedule(); - finish_wait(&tctx->wait, &wait); - } while (1); -- atomic_dec(&tctx->in_idle); - - io_uring_clean_tctx(tctx); - if (cancel_all) { -+ /* -+ * We shouldn't run task_works after cancel, so just leave -+ * ->in_idle set for normal exit. -+ */ -+ atomic_dec(&tctx->in_idle); - /* for exec all current's requests should be gone, kill tctx */ - __io_uring_free(current); - } -@@ -9925,6 +9967,8 @@ static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz - return -EINVAL; - if (copy_from_user(&arg, argp, sizeof(arg))) - return -EFAULT; -+ if (arg.pad) -+ return -EINVAL; - *sig = u64_to_user_ptr(arg.sigmask); - *argsz = arg.sigmask_sz; - *ts = u64_to_user_ptr(arg.ts); -@@ -10540,8 +10584,6 @@ static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type, - __u32 tmp; - int err; - -- if (up->resv) -- return -EINVAL; - if (check_add_overflow(up->offset, nr_args, &tmp)) - return -EOVERFLOW; - err = io_rsrc_node_switch_start(ctx); -@@ -10567,6 +10609,8 @@ static int io_register_files_update(struct io_ring_ctx *ctx, void __user *arg, - memset(&up, 0, sizeof(up)); - if (copy_from_user(&up, arg, sizeof(struct io_uring_rsrc_update))) - return -EFAULT; -+ if (up.resv || up.resv2) -+ return -EINVAL; - return __io_register_rsrc_update(ctx, IORING_RSRC_FILE, &up, nr_args); - } - -@@ -10579,7 +10623,7 @@ static int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg, - return -EINVAL; - if (copy_from_user(&up, arg, sizeof(up))) - return -EFAULT; -- if (!up.nr || up.resv) -+ if (!up.nr || up.resv || up.resv2) - return -EINVAL; - return __io_register_rsrc_update(ctx, type, &up, up.nr); - } -@@ -10627,7 +10671,15 @@ static int io_register_iowq_aff(struct io_ring_ctx *ctx, void __user *arg, - if (len > cpumask_size()) - len = cpumask_size(); - -- if (copy_from_user(new_mask, arg, len)) { -+ if (in_compat_syscall()) { -+ ret = compat_get_bitmap(cpumask_bits(new_mask), -+ (const compat_ulong_t __user *)arg, -+ len * 8 /* CHAR_BIT */); -+ } else { -+ ret = copy_from_user(new_mask, arg, len); -+ } -+ -+ if (ret) { - free_cpumask_var(new_mask); - return -EFAULT; - } -@@ -10684,7 +10736,9 @@ static int io_register_iowq_max_workers(struct io_ring_ctx *ctx, - - BUILD_BUG_ON(sizeof(new_count) != sizeof(ctx->iowq_limits)); - -- memcpy(ctx->iowq_limits, new_count, sizeof(new_count)); -+ for (i = 0; i < ARRAY_SIZE(new_count); i++) -+ if (new_count[i]) -+ ctx->iowq_limits[i] = new_count[i]; - ctx->iowq_limits_set = true; - - ret = -EINVAL; -diff --git a/fs/ioctl.c b/fs/ioctl.c -index 504e695781124..e0a3455f9a0f6 100644 ---- a/fs/ioctl.c -+++ b/fs/ioctl.c -@@ -173,7 +173,7 @@ int fiemap_prep(struct inode *inode, struct fiemap_extent_info *fieinfo, - - if (*len == 0) - return -EINVAL; -- if (start > maxbytes) -+ if (start >= maxbytes) - return -EFBIG; - - /* -diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c -index 9cc5798423d12..87a4f5a2ded0e 100644 ---- a/fs/iomap/buffered-io.c -+++ b/fs/iomap/buffered-io.c -@@ -256,8 +256,13 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter, - unsigned poff, plen; - sector_t sector; - -- if (iomap->type == IOMAP_INLINE) -- return min(iomap_read_inline_data(iter, page), length); -+ if (iomap->type == IOMAP_INLINE) { -+ loff_t ret = iomap_read_inline_data(iter, page); -+ -+ if (ret < 0) -+ return ret; -+ return 0; -+ } - - /* zero post-eof blocks as the page may be mapped */ - iop = iomap_page_create(iter->inode, page); -@@ -370,6 +375,8 @@ static loff_t iomap_readahead_iter(const struct iomap_iter *iter, - ctx->cur_page_in_bio = false; - } - ret = iomap_readpage_iter(iter, ctx, done); -+ if (ret <= 0) -+ return ret; - } - - return done; -@@ -518,7 +525,8 @@ iomap_write_failed(struct inode *inode, loff_t pos, unsigned len) - * write started inside the existing inode size. - */ - if (pos + len > i_size) -- truncate_pagecache_range(inode, max(pos, i_size), pos + len); -+ truncate_pagecache_range(inode, max(pos, i_size), -+ pos + len - 1); - } - - static int -@@ -750,7 +758,7 @@ again: - * same page as we're writing to, without it being marked - * up-to-date. - */ -- if (unlikely(iov_iter_fault_in_readable(i, bytes))) { -+ if (unlikely(fault_in_iov_iter_readable(i, bytes))) { - status = -EFAULT; - break; - } -diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c -index 4ecd255e0511c..468dcbba45bcb 100644 ---- a/fs/iomap/direct-io.c -+++ b/fs/iomap/direct-io.c -@@ -31,6 +31,7 @@ struct iomap_dio { - atomic_t ref; - unsigned flags; - int error; -+ size_t done_before; - bool wait_for_completion; - - union { -@@ -124,6 +125,9 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio) - if (ret > 0 && (dio->flags & IOMAP_DIO_NEED_SYNC)) - ret = generic_write_sync(iocb, ret); - -+ if (ret > 0) -+ ret += dio->done_before; -+ - kfree(dio); - - return ret; -@@ -371,6 +375,8 @@ static loff_t iomap_dio_hole_iter(const struct iomap_iter *iter, - loff_t length = iov_iter_zero(iomap_length(iter), dio->submit.iter); - - dio->size += length; -+ if (!length) -+ return -EFAULT; - return length; - } - -@@ -402,6 +408,8 @@ static loff_t iomap_dio_inline_iter(const struct iomap_iter *iomi, - copied = copy_to_iter(inline_data, length, iter); - } - dio->size += copied; -+ if (!copied) -+ return -EFAULT; - return copied; - } - -@@ -446,13 +454,21 @@ static loff_t iomap_dio_iter(const struct iomap_iter *iter, - * may be pure data writes. In that case, we still need to do a full data sync - * completion. - * -+ * When page faults are disabled and @dio_flags includes IOMAP_DIO_PARTIAL, -+ * __iomap_dio_rw can return a partial result if it encounters a non-resident -+ * page in @iter after preparing a transfer. In that case, the non-resident -+ * pages can be faulted in and the request resumed with @done_before set to the -+ * number of bytes previously transferred. The request will then complete with -+ * the correct total number of bytes transferred; this is essential for -+ * completing partial requests asynchronously. -+ * - * Returns -ENOTBLK In case of a page invalidation invalidation failure for - * writes. The callers needs to fall back to buffered I/O in this case. - */ - struct iomap_dio * - __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, - const struct iomap_ops *ops, const struct iomap_dio_ops *dops, -- unsigned int dio_flags) -+ unsigned int dio_flags, size_t done_before) - { - struct address_space *mapping = iocb->ki_filp->f_mapping; - struct inode *inode = file_inode(iocb->ki_filp); -@@ -482,6 +498,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, - dio->dops = dops; - dio->error = 0; - dio->flags = 0; -+ dio->done_before = done_before; - - dio->submit.iter = iter; - dio->submit.waiter = current; -@@ -577,6 +594,12 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, - if (iov_iter_rw(iter) == READ && iomi.pos >= dio->i_size) - iov_iter_revert(iter, iomi.pos - dio->i_size); - -+ if (ret == -EFAULT && dio->size && (dio_flags & IOMAP_DIO_PARTIAL)) { -+ if (!(iocb->ki_flags & IOCB_NOWAIT)) -+ wait_for_completion = true; -+ ret = 0; -+ } -+ - /* magic error code to fall back to buffered I/O */ - if (ret == -ENOTBLK) { - wait_for_completion = true; -@@ -642,11 +665,11 @@ EXPORT_SYMBOL_GPL(__iomap_dio_rw); - ssize_t - iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, - const struct iomap_ops *ops, const struct iomap_dio_ops *dops, -- unsigned int dio_flags) -+ unsigned int dio_flags, size_t done_before) - { - struct iomap_dio *dio; - -- dio = __iomap_dio_rw(iocb, iter, ops, dops, dio_flags); -+ dio = __iomap_dio_rw(iocb, iter, ops, dops, dio_flags, done_before); - if (IS_ERR_OR_NULL(dio)) - return PTR_ERR_OR_ZERO(dio); - return iomap_dio_complete(dio); -diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c -index 678e2c51b855c..0c6eacfcbeef1 100644 ---- a/fs/isofs/inode.c -+++ b/fs/isofs/inode.c -@@ -1322,6 +1322,8 @@ static int isofs_read_inode(struct inode *inode, int relocated) - - de = (struct iso_directory_record *) (bh->b_data + offset); - de_len = *(unsigned char *) de; -+ if (de_len < sizeof(struct iso_directory_record)) -+ goto fail; - - if (offset + de_len > bufsize) { - int frag1 = bufsize - offset; -diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c -index 3cc4ab2ba7f4f..ac328e3321242 100644 ---- a/fs/jbd2/commit.c -+++ b/fs/jbd2/commit.c -@@ -501,7 +501,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) - } - spin_unlock(&commit_transaction->t_handle_lock); - commit_transaction->t_state = T_SWITCH; -- write_unlock(&journal->j_state_lock); - - J_ASSERT (atomic_read(&commit_transaction->t_outstanding_credits) <= - journal->j_max_transaction_buffers); -@@ -521,6 +520,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) - * has reserved. This is consistent with the existing behaviour - * that multiple jbd2_journal_get_write_access() calls to the same - * buffer are perfectly permissible. -+ * We use journal->j_state_lock here to serialize processing of -+ * t_reserved_list with eviction of buffers from journal_unmap_buffer(). - */ - while (commit_transaction->t_reserved_list) { - jh = commit_transaction->t_reserved_list; -@@ -540,6 +541,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) - jbd2_journal_refile_buffer(journal, jh); - } - -+ write_unlock(&journal->j_state_lock); - /* - * Now try to drop any written-back buffers from the journal's - * checkpoint lists. We do this *before* commit because it potentially -@@ -562,13 +564,13 @@ void jbd2_journal_commit_transaction(journal_t *journal) - */ - jbd2_journal_switch_revoke_table(journal); - -+ write_lock(&journal->j_state_lock); - /* - * Reserved credits cannot be claimed anymore, free them - */ - atomic_sub(atomic_read(&journal->j_reserved_credits), - &commit_transaction->t_outstanding_credits); - -- write_lock(&journal->j_state_lock); - trace_jbd2_commit_flushing(journal, commit_transaction); - stats.run.rs_flushing = jiffies; - stats.run.rs_locked = jbd2_time_diff(stats.run.rs_locked, -@@ -579,7 +581,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) - journal->j_running_transaction = NULL; - start_time = ktime_get(); - commit_transaction->t_log_start = journal->j_head; -- wake_up(&journal->j_wait_transaction_locked); -+ wake_up_all(&journal->j_wait_transaction_locked); - write_unlock(&journal->j_state_lock); - - jbd_debug(3, "JBD2: commit phase 2a\n"); -@@ -1170,7 +1172,7 @@ restart_loop: - if (journal->j_commit_callback) - journal->j_commit_callback(journal, commit_transaction); - if (journal->j_fc_cleanup_callback) -- journal->j_fc_cleanup_callback(journal, 1); -+ journal->j_fc_cleanup_callback(journal, 1, commit_transaction->t_tid); - - trace_jbd2_end_commit(journal, commit_transaction); - jbd_debug(1, "JBD2: commit %d complete, head %d\n", -diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c -index 35302bc192eb9..097ba728d516d 100644 ---- a/fs/jbd2/journal.c -+++ b/fs/jbd2/journal.c -@@ -769,7 +769,7 @@ EXPORT_SYMBOL(jbd2_fc_begin_commit); - static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback) - { - if (journal->j_fc_cleanup_callback) -- journal->j_fc_cleanup_callback(journal, 0); -+ journal->j_fc_cleanup_callback(journal, 0, tid); - write_lock(&journal->j_state_lock); - journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING; - if (fallback) -@@ -924,10 +924,16 @@ int jbd2_fc_wait_bufs(journal_t *journal, int num_blks) - for (i = j_fc_off - 1; i >= j_fc_off - num_blks; i--) { - bh = journal->j_fc_wbuf[i]; - wait_on_buffer(bh); -+ /* -+ * Update j_fc_off so jbd2_fc_release_bufs can release remain -+ * buffer head. -+ */ -+ if (unlikely(!buffer_uptodate(bh))) { -+ journal->j_fc_off = i + 1; -+ return -EIO; -+ } - put_bh(bh); - journal->j_fc_wbuf[i] = NULL; -- if (unlikely(!buffer_uptodate(bh))) -- return -EIO; - } - - return 0; -@@ -2970,6 +2976,7 @@ struct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh) - jbd_unlock_bh_journal_head(bh); - return jh; - } -+EXPORT_SYMBOL(jbd2_journal_grab_journal_head); - - static void __journal_remove_journal_head(struct buffer_head *bh) - { -@@ -3022,6 +3029,7 @@ void jbd2_journal_put_journal_head(struct journal_head *jh) - jbd_unlock_bh_journal_head(bh); - } - } -+EXPORT_SYMBOL(jbd2_journal_put_journal_head); - - /* - * Initialize jbd inode head -diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c -index 8ca3527189f87..3c5dd010e39d2 100644 ---- a/fs/jbd2/recovery.c -+++ b/fs/jbd2/recovery.c -@@ -256,6 +256,7 @@ static int fc_do_one_pass(journal_t *journal, - err = journal->j_fc_replay_callback(journal, bh, pass, - next_fc_block - journal->j_fc_first, - expected_commit_id); -+ brelse(bh); - next_fc_block++; - if (err < 0 || err == JBD2_FC_REPLAY_STOP) - break; -diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c -index 6a3caedd22856..a57c0c8c63c4f 100644 ---- a/fs/jbd2/transaction.c -+++ b/fs/jbd2/transaction.c -@@ -173,7 +173,7 @@ static void wait_transaction_locked(journal_t *journal) - int need_to_start; - tid_t tid = journal->j_running_transaction->t_tid; - -- prepare_to_wait(&journal->j_wait_transaction_locked, &wait, -+ prepare_to_wait_exclusive(&journal->j_wait_transaction_locked, &wait, - TASK_UNINTERRUPTIBLE); - need_to_start = !tid_geq(journal->j_commit_request, tid); - read_unlock(&journal->j_state_lock); -@@ -199,7 +199,7 @@ static void wait_transaction_switching(journal_t *journal) - read_unlock(&journal->j_state_lock); - return; - } -- prepare_to_wait(&journal->j_wait_transaction_locked, &wait, -+ prepare_to_wait_exclusive(&journal->j_wait_transaction_locked, &wait, - TASK_UNINTERRUPTIBLE); - read_unlock(&journal->j_state_lock); - /* -@@ -911,7 +911,7 @@ void jbd2_journal_unlock_updates (journal_t *journal) - write_lock(&journal->j_state_lock); - --journal->j_barrier_count; - write_unlock(&journal->j_state_lock); -- wake_up(&journal->j_wait_transaction_locked); -+ wake_up_all(&journal->j_wait_transaction_locked); - } - - static void warn_dirty_buffer(struct buffer_head *bh) -@@ -1477,8 +1477,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) - struct journal_head *jh; - int ret = 0; - -- if (is_handle_aborted(handle)) -- return -EROFS; - if (!buffer_jbd(bh)) - return -EUCLEAN; - -@@ -1525,6 +1523,18 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) - journal = transaction->t_journal; - spin_lock(&jh->b_state_lock); - -+ if (is_handle_aborted(handle)) { -+ /* -+ * Check journal aborting with @jh->b_state_lock locked, -+ * since 'jh->b_transaction' could be replaced with -+ * 'jh->b_next_transaction' during old transaction -+ * committing if journal aborted, which may fail -+ * assertion on 'jh->b_frozen_data == NULL'. -+ */ -+ ret = -EROFS; -+ goto out_unlock_bh; -+ } -+ - if (jh->b_modified == 0) { - /* - * This buffer's got modified and becoming part -diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c -index b288c8ae1236b..837cd55fd4c5e 100644 ---- a/fs/jffs2/build.c -+++ b/fs/jffs2/build.c -@@ -415,13 +415,15 @@ int jffs2_do_mount_fs(struct jffs2_sb_info *c) - jffs2_free_ino_caches(c); - jffs2_free_raw_node_refs(c); - ret = -EIO; -- goto out_free; -+ goto out_sum_exit; - } - - jffs2_calc_trigger_levels(c); - - return 0; - -+ out_sum_exit: -+ jffs2_sum_exit(c); - out_free: - kvfree(c->blocks); - -diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c -index 4fc8cd698d1a4..bd7d58d27bfc6 100644 ---- a/fs/jffs2/file.c -+++ b/fs/jffs2/file.c -@@ -136,20 +136,15 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, - struct page *pg; - struct inode *inode = mapping->host; - struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); -+ struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); - pgoff_t index = pos >> PAGE_SHIFT; - uint32_t pageofs = index << PAGE_SHIFT; - int ret = 0; - -- pg = grab_cache_page_write_begin(mapping, index, flags); -- if (!pg) -- return -ENOMEM; -- *pagep = pg; +-static void io_task_refs_refill(struct io_uring_task *tctx) +-{ +- unsigned int refill = -tctx->cached_refs + IO_TCTX_REFS_CACHE_NR; - - jffs2_dbg(1, "%s()\n", __func__); - - if (pageofs > inode->i_size) { - /* Make new hole frag from old EOF to new page */ -- struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); - struct jffs2_raw_inode ri; - struct jffs2_full_dnode *fn; - uint32_t alloc_len; -@@ -160,7 +155,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, - ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, - ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); - if (ret) -- goto out_page; -+ goto out_err; - - mutex_lock(&f->sem); - memset(&ri, 0, sizeof(ri)); -@@ -190,7 +185,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, - ret = PTR_ERR(fn); - jffs2_complete_reservation(c); - mutex_unlock(&f->sem); -- goto out_page; -+ goto out_err; - } - ret = jffs2_add_full_dnode_to_inode(c, f, fn); - if (f->metadata) { -@@ -205,13 +200,26 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, - jffs2_free_full_dnode(fn); - jffs2_complete_reservation(c); - mutex_unlock(&f->sem); -- goto out_page; -+ goto out_err; - } - jffs2_complete_reservation(c); - inode->i_size = pageofs; - mutex_unlock(&f->sem); - } - -+ /* -+ * While getting a page and reading data in, lock c->alloc_sem until -+ * the page is Uptodate. Otherwise GC task may attempt to read the same -+ * page in read_cache_page(), which causes a deadlock. -+ */ -+ mutex_lock(&c->alloc_sem); -+ pg = grab_cache_page_write_begin(mapping, index, flags); -+ if (!pg) { -+ ret = -ENOMEM; -+ goto release_sem; -+ } -+ *pagep = pg; -+ - /* - * Read in the page if it wasn't already present. Cannot optimize away - * the whole page write case until jffs2_write_end can handle the -@@ -221,15 +229,17 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, - mutex_lock(&f->sem); - ret = jffs2_do_readpage_nolock(inode, pg); - mutex_unlock(&f->sem); -- if (ret) -- goto out_page; -+ if (ret) { -+ unlock_page(pg); -+ put_page(pg); -+ goto release_sem; -+ } - } - jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags); -- return ret; - --out_page: -- unlock_page(pg); -- put_page(pg); -+release_sem: -+ mutex_unlock(&c->alloc_sem); -+out_err: - return ret; - } - -diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c -index 2ac410477c4f4..f83a468b64883 100644 ---- a/fs/jffs2/fs.c -+++ b/fs/jffs2/fs.c -@@ -603,8 +603,9 @@ out_root: - jffs2_free_ino_caches(c); - jffs2_free_raw_node_refs(c); - kvfree(c->blocks); -- out_inohash: - jffs2_clear_xattr_subsystem(c); -+ jffs2_sum_exit(c); -+ out_inohash: - kfree(c->inocache_list); - out_wbuf: - jffs2_flash_cleanup(c); -diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c -index b676056826beb..29671e33a1714 100644 ---- a/fs/jffs2/scan.c -+++ b/fs/jffs2/scan.c -@@ -136,7 +136,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) - if (!s) { - JFFS2_WARNING("Can't allocate memory for summary\n"); - ret = -ENOMEM; -- goto out; -+ goto out_buf; - } - } - -@@ -275,13 +275,15 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) - } - ret = 0; - out: -+ jffs2_sum_reset_collected(s); -+ kfree(s); -+ out_buf: - if (buf_size) - kfree(flashbuf); - #ifndef __ECOS - else - mtd_unpoint(c->mtd, 0, c->mtd->size); - #endif -- kfree(s); - return ret; - } - -diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c -index 57ab424c05ff0..072821b50ab91 100644 ---- a/fs/jfs/inode.c -+++ b/fs/jfs/inode.c -@@ -146,12 +146,13 @@ void jfs_evict_inode(struct inode *inode) - dquot_initialize(inode); - - if (JFS_IP(inode)->fileset == FILESYSTEM_I) { -+ struct inode *ipimap = JFS_SBI(inode->i_sb)->ipimap; - truncate_inode_pages_final(&inode->i_data); - - if (test_cflag(COMMIT_Freewmap, inode)) - jfs_free_zero_link(inode); - -- if (JFS_SBI(inode->i_sb)->ipimap) -+ if (ipimap && JFS_IP(ipimap)->i_imap) - diFree(inode); - - /* -diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c -index 91f4ec93dab1f..e75f31b81d634 100644 ---- a/fs/jfs/jfs_dmap.c -+++ b/fs/jfs/jfs_dmap.c -@@ -148,6 +148,7 @@ static const s8 budtab[256] = { - * 0 - success - * -ENOMEM - insufficient memory - * -EIO - i/o error -+ * -EINVAL - wrong bmap data - */ - int dbMount(struct inode *ipbmap) - { -@@ -179,6 +180,12 @@ int dbMount(struct inode *ipbmap) - bmp->db_nfree = le64_to_cpu(dbmp_le->dn_nfree); - bmp->db_l2nbperpage = le32_to_cpu(dbmp_le->dn_l2nbperpage); - bmp->db_numag = le32_to_cpu(dbmp_le->dn_numag); -+ if (!bmp->db_numag) { -+ release_metapage(mp); -+ kfree(bmp); -+ return -EINVAL; -+ } -+ - bmp->db_maxlevel = le32_to_cpu(dbmp_le->dn_maxlevel); - bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag); - bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref); -@@ -378,7 +385,8 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks) - } - - /* write the last buffer. */ -- write_metapage(mp); -+ if (mp) -+ write_metapage(mp); - - IREAD_UNLOCK(ipbmap); - -diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c -index 5d7d7170c03c0..aa4ff7bcaff23 100644 ---- a/fs/jfs/jfs_mount.c -+++ b/fs/jfs/jfs_mount.c -@@ -81,14 +81,14 @@ int jfs_mount(struct super_block *sb) - * (initialize mount inode from the superblock) - */ - if ((rc = chkSuper(sb))) { -- goto errout20; -+ goto out; - } - - ipaimap = diReadSpecial(sb, AGGREGATE_I, 0); - if (ipaimap == NULL) { - jfs_err("jfs_mount: Failed to read AGGREGATE_I"); - rc = -EIO; -- goto errout20; -+ goto out; - } - sbi->ipaimap = ipaimap; - -@@ -99,7 +99,7 @@ int jfs_mount(struct super_block *sb) - */ - if ((rc = diMount(ipaimap))) { - jfs_err("jfs_mount: diMount(ipaimap) failed w/rc = %d", rc); -- goto errout21; -+ goto err_ipaimap; - } - - /* -@@ -108,7 +108,7 @@ int jfs_mount(struct super_block *sb) - ipbmap = diReadSpecial(sb, BMAP_I, 0); - if (ipbmap == NULL) { - rc = -EIO; -- goto errout22; -+ goto err_umount_ipaimap; - } - - jfs_info("jfs_mount: ipbmap:0x%p", ipbmap); -@@ -120,7 +120,7 @@ int jfs_mount(struct super_block *sb) - */ - if ((rc = dbMount(ipbmap))) { - jfs_err("jfs_mount: dbMount failed w/rc = %d", rc); -- goto errout22; -+ goto err_ipbmap; - } - - /* -@@ -139,7 +139,7 @@ int jfs_mount(struct super_block *sb) - if (!ipaimap2) { - jfs_err("jfs_mount: Failed to read AGGREGATE_I"); - rc = -EIO; -- goto errout35; -+ goto err_umount_ipbmap; - } - sbi->ipaimap2 = ipaimap2; - -@@ -151,7 +151,7 @@ int jfs_mount(struct super_block *sb) - if ((rc = diMount(ipaimap2))) { - jfs_err("jfs_mount: diMount(ipaimap2) failed, rc = %d", - rc); -- goto errout35; -+ goto err_ipaimap2; - } - } else - /* Secondary aggregate inode table is not valid */ -@@ -168,7 +168,7 @@ int jfs_mount(struct super_block *sb) - jfs_err("jfs_mount: Failed to read FILESYSTEM_I"); - /* open fileset secondary inode allocation map */ - rc = -EIO; -- goto errout40; -+ goto err_umount_ipaimap2; - } - jfs_info("jfs_mount: ipimap:0x%p", ipimap); - -@@ -178,41 +178,34 @@ int jfs_mount(struct super_block *sb) - /* initialize fileset inode allocation map */ - if ((rc = diMount(ipimap))) { - jfs_err("jfs_mount: diMount failed w/rc = %d", rc); -- goto errout41; -+ goto err_ipimap; - } - -- goto out; -+ return rc; - - /* - * unwind on error - */ -- errout41: /* close fileset inode allocation map inode */ -+err_ipimap: -+ /* close fileset inode allocation map inode */ - diFreeSpecial(ipimap); +- percpu_counter_add(&tctx->inflight, refill); +- refcount_add(refill, ¤t->usage); +- tctx->cached_refs += refill; +-} - -- errout40: /* fileset closed */ +-static inline void io_get_task_refs(int nr) +-{ +- struct io_uring_task *tctx = current->io_uring; - -+err_umount_ipaimap2: - /* close secondary aggregate inode allocation map */ -- if (ipaimap2) { -+ if (ipaimap2) - diUnmount(ipaimap2, 1); -+err_ipaimap2: -+ /* close aggregate inodes */ -+ if (ipaimap2) - diFreeSpecial(ipaimap2); +- tctx->cached_refs -= nr; +- if (unlikely(tctx->cached_refs < 0)) +- io_task_refs_refill(tctx); +-} +- +-static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data, +- long res, unsigned int cflags) +-{ +- struct io_overflow_cqe *ocqe; +- +- ocqe = kmalloc(sizeof(*ocqe), GFP_ATOMIC | __GFP_ACCOUNT); +- if (!ocqe) { +- /* +- * If we're in ring overflow flush mode, or in task cancel mode, +- * or cannot allocate an overflow entry, then we need to drop it +- * on the floor. +- */ +- io_account_cq_overflow(ctx); +- return false; - } +- if (list_empty(&ctx->cq_overflow_list)) { +- set_bit(0, &ctx->check_cq_overflow); +- WRITE_ONCE(ctx->rings->sq_flags, +- ctx->rings->sq_flags | IORING_SQ_CQ_OVERFLOW); - -- errout35: +- } +- ocqe->cqe.user_data = user_data; +- ocqe->cqe.res = res; +- ocqe->cqe.flags = cflags; +- list_add_tail(&ocqe->list, &ctx->cq_overflow_list); +- return true; +-} - -- /* close aggregate block allocation map */ -+err_umount_ipbmap: /* close aggregate block allocation map */ - dbUnmount(ipbmap, 1); -+err_ipbmap: /* close aggregate inodes */ - diFreeSpecial(ipbmap); +-static inline bool __io_cqring_fill_event(struct io_ring_ctx *ctx, u64 user_data, +- long res, unsigned int cflags) +-{ +- struct io_uring_cqe *cqe; - -- errout22: /* close aggregate inode allocation map */ +- trace_io_uring_complete(ctx, user_data, res, cflags); - -+err_umount_ipaimap: /* close aggregate inode allocation map */ - diUnmount(ipaimap, 1); +- /* +- * If we can't get a cq entry, userspace overflowed the +- * submission (by quite a lot). Increment the overflow count in +- * the ring. +- */ +- cqe = io_get_cqe(ctx); +- if (likely(cqe)) { +- WRITE_ONCE(cqe->user_data, user_data); +- WRITE_ONCE(cqe->res, res); +- WRITE_ONCE(cqe->flags, cflags); +- return true; +- } +- return io_cqring_event_overflow(ctx, user_data, res, cflags); +-} - -- errout21: /* close aggregate inodes */ -+err_ipaimap: /* close aggregate inodes */ - diFreeSpecial(ipaimap); -- errout20: /* aggregate closed */ +-/* not as hot to bloat with inlining */ +-static noinline bool io_cqring_fill_event(struct io_ring_ctx *ctx, u64 user_data, +- long res, unsigned int cflags) +-{ +- return __io_cqring_fill_event(ctx, user_data, res, cflags); +-} - -- out: +-static void io_req_complete_post(struct io_kiocb *req, long res, +- unsigned int cflags) +-{ +- struct io_ring_ctx *ctx = req->ctx; - -+out: - if (rc) - jfs_err("Mount JFS Failure: %d", rc); - -diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c -index 8e0a1378a4b1f..7bf1d5fc2e9c5 100644 ---- a/fs/kernfs/dir.c -+++ b/fs/kernfs/dir.c -@@ -19,7 +19,15 @@ - - DECLARE_RWSEM(kernfs_rwsem); - static DEFINE_SPINLOCK(kernfs_rename_lock); /* kn->parent and ->name */ --static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by rename_lock */ -+/* -+ * Don't use rename_lock to piggy back on pr_cont_buf. We don't want to -+ * call pr_cont() while holding rename_lock. Because sometimes pr_cont() -+ * will perform wakeups when releasing console_sem. Holding rename_lock -+ * will introduce deadlock if the scheduler reads the kernfs_name in the -+ * wakeup path. -+ */ -+static DEFINE_SPINLOCK(kernfs_pr_cont_lock); -+static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by pr_cont_lock */ - static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */ - - #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb) -@@ -230,12 +238,12 @@ void pr_cont_kernfs_name(struct kernfs_node *kn) - { - unsigned long flags; - -- spin_lock_irqsave(&kernfs_rename_lock, flags); -+ spin_lock_irqsave(&kernfs_pr_cont_lock, flags); - -- kernfs_name_locked(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf)); -+ kernfs_name(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf)); - pr_cont("%s", kernfs_pr_cont_buf); - -- spin_unlock_irqrestore(&kernfs_rename_lock, flags); -+ spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags); - } - - /** -@@ -249,10 +257,10 @@ void pr_cont_kernfs_path(struct kernfs_node *kn) - unsigned long flags; - int sz; - -- spin_lock_irqsave(&kernfs_rename_lock, flags); -+ spin_lock_irqsave(&kernfs_pr_cont_lock, flags); - -- sz = kernfs_path_from_node_locked(kn, NULL, kernfs_pr_cont_buf, -- sizeof(kernfs_pr_cont_buf)); -+ sz = kernfs_path_from_node(kn, NULL, kernfs_pr_cont_buf, -+ sizeof(kernfs_pr_cont_buf)); - if (sz < 0) { - pr_cont("(error)"); - goto out; -@@ -266,7 +274,7 @@ void pr_cont_kernfs_path(struct kernfs_node *kn) - pr_cont("%s", kernfs_pr_cont_buf); - - out: -- spin_unlock_irqrestore(&kernfs_rename_lock, flags); -+ spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags); - } - - /** -@@ -822,13 +830,12 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent, - - lockdep_assert_held_read(&kernfs_rwsem); - -- /* grab kernfs_rename_lock to piggy back on kernfs_pr_cont_buf */ -- spin_lock_irq(&kernfs_rename_lock); -+ spin_lock_irq(&kernfs_pr_cont_lock); - - len = strlcpy(kernfs_pr_cont_buf, path, sizeof(kernfs_pr_cont_buf)); - - if (len >= sizeof(kernfs_pr_cont_buf)) { -- spin_unlock_irq(&kernfs_rename_lock); -+ spin_unlock_irq(&kernfs_pr_cont_lock); - return NULL; - } - -@@ -840,7 +847,7 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent, - parent = kernfs_find_ns(parent, name, ns); - } - -- spin_unlock_irq(&kernfs_rename_lock); -+ spin_unlock_irq(&kernfs_pr_cont_lock); - - return parent; - } -diff --git a/fs/ksmbd/Kconfig b/fs/ksmbd/Kconfig -index b83cbd756ae50..6af339cfdc041 100644 ---- a/fs/ksmbd/Kconfig -+++ b/fs/ksmbd/Kconfig -@@ -19,6 +19,7 @@ config SMB_SERVER - select CRYPTO_GCM - select ASN1 - select OID_REGISTRY -+ select CRC32 - default n - help - Choose Y here if you want to allow SMB3 compliant clients -diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c -index b57a0d8a392ff..02254b09c0daf 100644 ---- a/fs/ksmbd/connection.c -+++ b/fs/ksmbd/connection.c -@@ -62,6 +62,7 @@ struct ksmbd_conn *ksmbd_conn_alloc(void) - atomic_set(&conn->req_running, 0); - atomic_set(&conn->r_count, 0); - conn->total_credits = 1; -+ conn->outstanding_credits = 0; - - init_waitqueue_head(&conn->req_running_q); - INIT_LIST_HEAD(&conn->conns_list); -diff --git a/fs/ksmbd/connection.h b/fs/ksmbd/connection.h -index e5403c587a58c..8694aef482c1a 100644 ---- a/fs/ksmbd/connection.h -+++ b/fs/ksmbd/connection.h -@@ -61,8 +61,8 @@ struct ksmbd_conn { - atomic_t req_running; - /* References which are made for this Server object*/ - atomic_t r_count; -- unsigned short total_credits; -- unsigned short max_credits; -+ unsigned int total_credits; -+ unsigned int outstanding_credits; - spinlock_t credits_lock; - wait_queue_head_t req_running_q; - /* Lock to protect requests list*/ -diff --git a/fs/ksmbd/ksmbd_netlink.h b/fs/ksmbd/ksmbd_netlink.h -index c6718a05d347f..71bfb7de44725 100644 ---- a/fs/ksmbd/ksmbd_netlink.h -+++ b/fs/ksmbd/ksmbd_netlink.h -@@ -103,6 +103,8 @@ struct ksmbd_startup_request { - * we set the SPARSE_FILES bit (0x40). - */ - __u32 sub_auth[3]; /* Subauth value for Security ID */ -+ __u32 smb2_max_credits; /* MAX credits */ -+ __u32 reserved[128]; /* Reserved room */ - __u32 ifc_list_sz; /* interfaces list size */ - __s8 ____payload[]; - }; -@@ -113,7 +115,7 @@ struct ksmbd_startup_request { - * IPC request to shutdown ksmbd server. - */ - struct ksmbd_shutdown_request { -- __s32 reserved; -+ __s32 reserved[16]; - }; - - /* -@@ -122,6 +124,7 @@ struct ksmbd_shutdown_request { - struct ksmbd_login_request { - __u32 handle; - __s8 account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ]; /* user account name */ -+ __u32 reserved[16]; /* Reserved room */ - }; - - /* -@@ -135,6 +138,7 @@ struct ksmbd_login_response { - __u16 status; - __u16 hash_sz; /* hash size */ - __s8 hash[KSMBD_REQ_MAX_HASH_SZ]; /* password hash */ -+ __u32 reserved[16]; /* Reserved room */ - }; - - /* -@@ -143,6 +147,7 @@ struct ksmbd_login_response { - struct ksmbd_share_config_request { - __u32 handle; - __s8 share_name[KSMBD_REQ_MAX_SHARE_NAME]; /* share name */ -+ __u32 reserved[16]; /* Reserved room */ - }; - - /* -@@ -157,6 +162,7 @@ struct ksmbd_share_config_response { - __u16 force_directory_mode; - __u16 force_uid; - __u16 force_gid; -+ __u32 reserved[128]; /* Reserved room */ - __u32 veto_list_sz; - __s8 ____payload[]; - }; -@@ -187,6 +193,7 @@ struct ksmbd_tree_connect_request { - __s8 account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ]; - __s8 share[KSMBD_REQ_MAX_SHARE_NAME]; - __s8 peer_addr[64]; -+ __u32 reserved[16]; /* Reserved room */ - }; - - /* -@@ -196,6 +203,7 @@ struct ksmbd_tree_connect_response { - __u32 handle; - __u16 status; - __u16 connection_flags; -+ __u32 reserved[16]; /* Reserved room */ - }; - - /* -@@ -204,6 +212,7 @@ struct ksmbd_tree_connect_response { - struct ksmbd_tree_disconnect_request { - __u64 session_id; /* session id */ - __u64 connect_id; /* tree connection id */ -+ __u32 reserved[16]; /* Reserved room */ - }; - - /* -@@ -212,6 +221,7 @@ struct ksmbd_tree_disconnect_request { - struct ksmbd_logout_request { - __s8 account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ]; /* user account name */ - __u32 account_flags; -+ __u32 reserved[16]; /* Reserved room */ - }; - - /* -diff --git a/fs/ksmbd/mgmt/tree_connect.c b/fs/ksmbd/mgmt/tree_connect.c -index 0d28e723a28c7..940385c6a9135 100644 ---- a/fs/ksmbd/mgmt/tree_connect.c -+++ b/fs/ksmbd/mgmt/tree_connect.c -@@ -18,7 +18,7 @@ - struct ksmbd_tree_conn_status - ksmbd_tree_conn_connect(struct ksmbd_session *sess, char *share_name) - { -- struct ksmbd_tree_conn_status status = {-EINVAL, NULL}; -+ struct ksmbd_tree_conn_status status = {-ENOENT, NULL}; - struct ksmbd_tree_connect_response *resp = NULL; - struct ksmbd_share_config *sc; - struct ksmbd_tree_connect *tree_conn = NULL; -diff --git a/fs/ksmbd/ndr.c b/fs/ksmbd/ndr.c -index 8317f7ca402b4..5052be9261d91 100644 ---- a/fs/ksmbd/ndr.c -+++ b/fs/ksmbd/ndr.c -@@ -148,7 +148,7 @@ static int ndr_read_int16(struct ndr *n, __u16 *value) - static int ndr_read_int32(struct ndr *n, __u32 *value) - { - if (n->offset + sizeof(__u32) > n->length) -- return 0; -+ return -EINVAL; - - if (value) - *value = le32_to_cpu(*(__le32 *)ndr_get_field(n)); -diff --git a/fs/ksmbd/server.c b/fs/ksmbd/server.c -index 2a2b2135bfded..976d09aaee703 100644 ---- a/fs/ksmbd/server.c -+++ b/fs/ksmbd/server.c -@@ -235,10 +235,8 @@ send: - if (work->sess && work->sess->enc && work->encrypted && - conn->ops->encrypt_resp) { - rc = conn->ops->encrypt_resp(work); -- if (rc < 0) { -+ if (rc < 0) - conn->ops->set_rsp_status(work, STATUS_DATA_ERROR); -- goto send; +- spin_lock(&ctx->completion_lock); +- __io_cqring_fill_event(ctx, req->user_data, res, cflags); +- /* +- * If we're the last reference to this request, add to our locked +- * free_list cache. +- */ +- if (req_ref_put_and_test(req)) { +- if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) { +- if (req->flags & IO_DISARM_MASK) +- io_disarm_next(req); +- if (req->link) { +- io_req_task_queue(req->link); +- req->link = NULL; +- } - } - } - - ksmbd_conn_write(work); -@@ -632,5 +630,6 @@ MODULE_SOFTDEP("pre: sha512"); - MODULE_SOFTDEP("pre: aead2"); - MODULE_SOFTDEP("pre: ccm"); - MODULE_SOFTDEP("pre: gcm"); -+MODULE_SOFTDEP("pre: crc32"); - module_init(ksmbd_server_init) - module_exit(ksmbd_server_exit) -diff --git a/fs/ksmbd/smb2misc.c b/fs/ksmbd/smb2misc.c -index 030ca57c37849..b47be71be4c82 100644 ---- a/fs/ksmbd/smb2misc.c -+++ b/fs/ksmbd/smb2misc.c -@@ -91,11 +91,6 @@ static int smb2_get_data_area_len(unsigned int *off, unsigned int *len, - *off = 0; - *len = 0; - -- /* error reqeusts do not have data area */ -- if (hdr->Status && hdr->Status != STATUS_MORE_PROCESSING_REQUIRED && -- (((struct smb2_err_rsp *)hdr)->StructureSize) == SMB2_ERROR_STRUCTURE_SIZE2_LE) -- return ret; -- - /* - * Following commands have data areas so we have to get the location - * of the data buffer offset and data buffer length for the particular -@@ -137,8 +132,11 @@ static int smb2_get_data_area_len(unsigned int *off, unsigned int *len, - *len = le16_to_cpu(((struct smb2_read_req *)hdr)->ReadChannelInfoLength); - break; - case SMB2_WRITE: -- if (((struct smb2_write_req *)hdr)->DataOffset) { -- *off = le16_to_cpu(((struct smb2_write_req *)hdr)->DataOffset); -+ if (((struct smb2_write_req *)hdr)->DataOffset || -+ ((struct smb2_write_req *)hdr)->Length) { -+ *off = max_t(unsigned int, -+ le16_to_cpu(((struct smb2_write_req *)hdr)->DataOffset), -+ offsetof(struct smb2_write_req, Buffer) - 4); - *len = le32_to_cpu(((struct smb2_write_req *)hdr)->Length); - break; - } -@@ -290,7 +288,7 @@ static int smb2_validate_credit_charge(struct ksmbd_conn *conn, - unsigned int req_len = 0, expect_resp_len = 0, calc_credit_num, max_len; - unsigned short credit_charge = le16_to_cpu(hdr->CreditCharge); - void *__hdr = hdr; -- int ret; -+ int ret = 0; - - switch (hdr->Command) { - case SMB2_QUERY_INFO: -@@ -327,21 +325,27 @@ static int smb2_validate_credit_charge(struct ksmbd_conn *conn, - ksmbd_debug(SMB, "Insufficient credit charge, given: %d, needed: %d\n", - credit_charge, calc_credit_num); - return 1; -- } else if (credit_charge > conn->max_credits) { -+ } else if (credit_charge > conn->vals->max_credits) { - ksmbd_debug(SMB, "Too large credit charge: %d\n", credit_charge); - return 1; - } - - spin_lock(&conn->credits_lock); -- if (credit_charge <= conn->total_credits) { -- conn->total_credits -= credit_charge; -- ret = 0; +- io_dismantle_req(req); +- io_put_task(req->task, 1); +- list_add(&req->inflight_entry, &ctx->locked_free_list); +- ctx->locked_free_nr++; - } else { -+ if (credit_charge > conn->total_credits) { - ksmbd_debug(SMB, "Insufficient credits granted, given: %u, granted: %u\n", - credit_charge, conn->total_credits); - ret = 1; - } -+ -+ if ((u64)conn->outstanding_credits + credit_charge > conn->total_credits) { -+ ksmbd_debug(SMB, "Limits exceeding the maximum allowable outstanding requests, given : %u, pending : %u\n", -+ credit_charge, conn->outstanding_credits); -+ ret = 1; -+ } else -+ conn->outstanding_credits += credit_charge; -+ - spin_unlock(&conn->credits_lock); -+ - return ret; - } - -@@ -358,12 +362,10 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work) - hdr = &pdu->hdr; - } - -- if (le32_to_cpu(hdr->NextCommand) > 0) { -+ if (le32_to_cpu(hdr->NextCommand) > 0) - len = le32_to_cpu(hdr->NextCommand); -- } else if (work->next_smb2_rcv_hdr_off) { -+ else if (work->next_smb2_rcv_hdr_off) - len -= work->next_smb2_rcv_hdr_off; -- len = round_up(len, 8); +- if (!percpu_ref_tryget(&ctx->refs)) +- req = NULL; - } - - if (check_smb2_hdr(hdr)) - return 1; -diff --git a/fs/ksmbd/smb2ops.c b/fs/ksmbd/smb2ops.c -index fb6a65d231391..f0a5b704f301c 100644 ---- a/fs/ksmbd/smb2ops.c -+++ b/fs/ksmbd/smb2ops.c -@@ -20,6 +20,7 @@ static struct smb_version_values smb21_server_values = { - .max_read_size = SMB21_DEFAULT_IOSIZE, - .max_write_size = SMB21_DEFAULT_IOSIZE, - .max_trans_size = SMB21_DEFAULT_IOSIZE, -+ .max_credits = SMB2_MAX_CREDITS, - .large_lock_type = 0, - .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE, - .shared_lock_type = SMB2_LOCKFLAG_SHARED, -@@ -45,6 +46,7 @@ static struct smb_version_values smb30_server_values = { - .max_read_size = SMB3_DEFAULT_IOSIZE, - .max_write_size = SMB3_DEFAULT_IOSIZE, - .max_trans_size = SMB3_DEFAULT_TRANS_SIZE, -+ .max_credits = SMB2_MAX_CREDITS, - .large_lock_type = 0, - .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE, - .shared_lock_type = SMB2_LOCKFLAG_SHARED, -@@ -71,6 +73,7 @@ static struct smb_version_values smb302_server_values = { - .max_read_size = SMB3_DEFAULT_IOSIZE, - .max_write_size = SMB3_DEFAULT_IOSIZE, - .max_trans_size = SMB3_DEFAULT_TRANS_SIZE, -+ .max_credits = SMB2_MAX_CREDITS, - .large_lock_type = 0, - .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE, - .shared_lock_type = SMB2_LOCKFLAG_SHARED, -@@ -97,6 +100,7 @@ static struct smb_version_values smb311_server_values = { - .max_read_size = SMB3_DEFAULT_IOSIZE, - .max_write_size = SMB3_DEFAULT_IOSIZE, - .max_trans_size = SMB3_DEFAULT_TRANS_SIZE, -+ .max_credits = SMB2_MAX_CREDITS, - .large_lock_type = 0, - .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE, - .shared_lock_type = SMB2_LOCKFLAG_SHARED, -@@ -198,7 +202,6 @@ void init_smb2_1_server(struct ksmbd_conn *conn) - conn->ops = &smb2_0_server_ops; - conn->cmds = smb2_0_server_cmds; - conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds); -- conn->max_credits = SMB2_MAX_CREDITS; - conn->signing_algorithm = SIGNING_ALG_HMAC_SHA256; - - if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES) -@@ -216,7 +219,6 @@ void init_smb3_0_server(struct ksmbd_conn *conn) - conn->ops = &smb3_0_server_ops; - conn->cmds = smb2_0_server_cmds; - conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds); -- conn->max_credits = SMB2_MAX_CREDITS; - conn->signing_algorithm = SIGNING_ALG_AES_CMAC; - - if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES) -@@ -241,7 +243,6 @@ void init_smb3_02_server(struct ksmbd_conn *conn) - conn->ops = &smb3_0_server_ops; - conn->cmds = smb2_0_server_cmds; - conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds); -- conn->max_credits = SMB2_MAX_CREDITS; - conn->signing_algorithm = SIGNING_ALG_AES_CMAC; - - if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES) -@@ -266,15 +267,11 @@ int init_smb3_11_server(struct ksmbd_conn *conn) - conn->ops = &smb3_11_server_ops; - conn->cmds = smb2_0_server_cmds; - conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds); -- conn->max_credits = SMB2_MAX_CREDITS; - conn->signing_algorithm = SIGNING_ALG_AES_CMAC; - - if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES) - conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING; - -- if (conn->cipher_type) -- conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION; +- io_commit_cqring(ctx); +- spin_unlock(&ctx->completion_lock); - - if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL) - conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL; - -@@ -308,3 +305,11 @@ void init_smb2_max_trans_size(unsigned int sz) - smb302_server_values.max_trans_size = sz; - smb311_server_values.max_trans_size = sz; - } -+ -+void init_smb2_max_credits(unsigned int sz) -+{ -+ smb21_server_values.max_credits = sz; -+ smb30_server_values.max_credits = sz; -+ smb302_server_values.max_credits = sz; -+ smb311_server_values.max_credits = sz; -+} -diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c -index 7e448df3f8474..65c85ca71ebe0 100644 ---- a/fs/ksmbd/smb2pdu.c -+++ b/fs/ksmbd/smb2pdu.c -@@ -11,6 +11,7 @@ - #include <linux/statfs.h> - #include <linux/ethtool.h> - #include <linux/falloc.h> -+#include <linux/mount.h> - - #include "glob.h" - #include "smb2pdu.h" -@@ -301,16 +302,15 @@ int smb2_set_rsp_credits(struct ksmbd_work *work) - struct smb2_hdr *req_hdr = ksmbd_req_buf_next(work); - struct smb2_hdr *hdr = ksmbd_resp_buf_next(work); - struct ksmbd_conn *conn = work->conn; -- unsigned short credits_requested; -+ unsigned short credits_requested, aux_max; - unsigned short credit_charge, credits_granted = 0; -- unsigned short aux_max, aux_credits; - - if (work->send_no_response) - return 0; - - hdr->CreditCharge = req_hdr->CreditCharge; - -- if (conn->total_credits > conn->max_credits) { -+ if (conn->total_credits > conn->vals->max_credits) { - hdr->CreditRequest = 0; - pr_err("Total credits overflow: %d\n", conn->total_credits); - return -EINVAL; -@@ -318,6 +318,14 @@ int smb2_set_rsp_credits(struct ksmbd_work *work) - - credit_charge = max_t(unsigned short, - le16_to_cpu(req_hdr->CreditCharge), 1); -+ if (credit_charge > conn->total_credits) { -+ ksmbd_debug(SMB, "Insufficient credits granted, given: %u, granted: %u\n", -+ credit_charge, conn->total_credits); -+ return -EINVAL; -+ } -+ -+ conn->total_credits -= credit_charge; -+ conn->outstanding_credits -= credit_charge; - credits_requested = max_t(unsigned short, - le16_to_cpu(req_hdr->CreditRequest), 1); - -@@ -327,16 +335,14 @@ int smb2_set_rsp_credits(struct ksmbd_work *work) - * TODO: Need to adjuct CreditRequest value according to - * current cpu load - */ -- aux_credits = credits_requested - 1; - if (hdr->Command == SMB2_NEGOTIATE) -- aux_max = 0; -+ aux_max = 1; - else -- aux_max = conn->max_credits - credit_charge; -- aux_credits = min_t(unsigned short, aux_credits, aux_max); -- credits_granted = credit_charge + aux_credits; -+ aux_max = conn->vals->max_credits - credit_charge; -+ credits_granted = min_t(unsigned short, credits_requested, aux_max); - -- if (conn->max_credits - conn->total_credits < credits_granted) -- credits_granted = conn->max_credits - -+ if (conn->vals->max_credits - conn->total_credits < credits_granted) -+ credits_granted = conn->vals->max_credits - - conn->total_credits; - - conn->total_credits += credits_granted; -@@ -535,9 +541,10 @@ int smb2_allocate_rsp_buf(struct ksmbd_work *work) - struct smb2_query_info_req *req; - - req = work->request_buf; -- if (req->InfoType == SMB2_O_INFO_FILE && -- (req->FileInfoClass == FILE_FULL_EA_INFORMATION || -- req->FileInfoClass == FILE_ALL_INFORMATION)) -+ if ((req->InfoType == SMB2_O_INFO_FILE && -+ (req->FileInfoClass == FILE_FULL_EA_INFORMATION || -+ req->FileInfoClass == FILE_ALL_INFORMATION)) || -+ req->InfoType == SMB2_O_INFO_SECURITY) - sz = large_sz; - } - -@@ -917,6 +924,25 @@ static void decode_encrypt_ctxt(struct ksmbd_conn *conn, - } - } - -+/** -+ * smb3_encryption_negotiated() - checks if server and client agreed on enabling encryption -+ * @conn: smb connection -+ * -+ * Return: true if connection should be encrypted, else false -+ */ -+static bool smb3_encryption_negotiated(struct ksmbd_conn *conn) -+{ -+ if (!conn->ops->generate_encryptionkey) -+ return false; -+ -+ /* -+ * SMB 3.0 and 3.0.2 dialects use the SMB2_GLOBAL_CAP_ENCRYPTION flag. -+ * SMB 3.1.1 uses the cipher_type field. -+ */ -+ return (conn->vals->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION) || -+ conn->cipher_type; -+} -+ - static void decode_compress_ctxt(struct ksmbd_conn *conn, - struct smb2_compression_ctx *pneg_ctxt) - { -@@ -1121,12 +1147,16 @@ int smb2_handle_negotiate(struct ksmbd_work *work) - status); - rsp->hdr.Status = status; - rc = -EINVAL; -+ kfree(conn->preauth_info); -+ conn->preauth_info = NULL; - goto err_out; - } - - rc = init_smb3_11_server(conn); - if (rc < 0) { - rsp->hdr.Status = STATUS_INVALID_PARAMETER; -+ kfree(conn->preauth_info); -+ conn->preauth_info = NULL; - goto err_out; - } - -@@ -1438,11 +1468,6 @@ static int ntlm_authenticate(struct ksmbd_work *work) - - sess->user = user; - if (user_guest(sess->user)) { -- if (conn->sign) { -- ksmbd_debug(SMB, "Guest login not allowed when signing enabled\n"); -- return -EPERM; +- if (req) { +- io_cqring_ev_posted(ctx); +- percpu_ref_put(&ctx->refs); +- } +-} +- +-static inline bool io_req_needs_clean(struct io_kiocb *req) +-{ +- return req->flags & IO_REQ_CLEAN_FLAGS; +-} +- +-static void io_req_complete_state(struct io_kiocb *req, long res, +- unsigned int cflags) +-{ +- if (io_req_needs_clean(req)) +- io_clean_op(req); +- req->result = res; +- req->compl.cflags = cflags; +- req->flags |= REQ_F_COMPLETE_INLINE; +-} +- +-static inline void __io_req_complete(struct io_kiocb *req, unsigned issue_flags, +- long res, unsigned cflags) +-{ +- if (issue_flags & IO_URING_F_COMPLETE_DEFER) +- io_req_complete_state(req, res, cflags); +- else +- io_req_complete_post(req, res, cflags); +-} +- +-static inline void io_req_complete(struct io_kiocb *req, long res) +-{ +- __io_req_complete(req, 0, res, 0); +-} +- +-static void io_req_complete_failed(struct io_kiocb *req, long res) +-{ +- req_set_fail(req); +- io_req_complete_post(req, res, 0); +-} +- +-static void io_req_complete_fail_submit(struct io_kiocb *req) +-{ +- /* +- * We don't submit, fail them all, for that replace hardlinks with +- * normal links. Extra REQ_F_LINK is tolerated. +- */ +- req->flags &= ~REQ_F_HARDLINK; +- req->flags |= REQ_F_LINK; +- io_req_complete_failed(req, req->result); +-} +- +-/* +- * Don't initialise the fields below on every allocation, but do that in +- * advance and keep them valid across allocations. +- */ +-static void io_preinit_req(struct io_kiocb *req, struct io_ring_ctx *ctx) +-{ +- req->ctx = ctx; +- req->link = NULL; +- req->async_data = NULL; +- /* not necessary, but safer to zero */ +- req->result = 0; +-} +- +-static void io_flush_cached_locked_reqs(struct io_ring_ctx *ctx, +- struct io_submit_state *state) +-{ +- spin_lock(&ctx->completion_lock); +- list_splice_init(&ctx->locked_free_list, &state->free_list); +- ctx->locked_free_nr = 0; +- spin_unlock(&ctx->completion_lock); +-} +- +-/* Returns true IFF there are requests in the cache */ +-static bool io_flush_cached_reqs(struct io_ring_ctx *ctx) +-{ +- struct io_submit_state *state = &ctx->submit_state; +- int nr; +- +- /* +- * If we have more than a batch's worth of requests in our IRQ side +- * locked cache, grab the lock and move them over to our submission +- * side cache. +- */ +- if (READ_ONCE(ctx->locked_free_nr) > IO_COMPL_BATCH) +- io_flush_cached_locked_reqs(ctx, state); +- +- nr = state->free_reqs; +- while (!list_empty(&state->free_list)) { +- struct io_kiocb *req = list_first_entry(&state->free_list, +- struct io_kiocb, inflight_entry); +- +- list_del(&req->inflight_entry); +- state->reqs[nr++] = req; +- if (nr == ARRAY_SIZE(state->reqs)) +- break; +- } +- +- state->free_reqs = nr; +- return nr != 0; +-} +- +-/* +- * A request might get retired back into the request caches even before opcode +- * handlers and io_issue_sqe() are done with it, e.g. inline completion path. +- * Because of that, io_alloc_req() should be called only under ->uring_lock +- * and with extra caution to not get a request that is still worked on. +- */ +-static struct io_kiocb *io_alloc_req(struct io_ring_ctx *ctx) +- __must_hold(&ctx->uring_lock) +-{ +- struct io_submit_state *state = &ctx->submit_state; +- gfp_t gfp = GFP_KERNEL | __GFP_NOWARN; +- int ret, i; +- +- BUILD_BUG_ON(ARRAY_SIZE(state->reqs) < IO_REQ_ALLOC_BATCH); +- +- if (likely(state->free_reqs || io_flush_cached_reqs(ctx))) +- goto got_req; +- +- ret = kmem_cache_alloc_bulk(req_cachep, gfp, IO_REQ_ALLOC_BATCH, +- state->reqs); +- +- /* +- * Bulk alloc is all-or-nothing. If we fail to get a batch, +- * retry single alloc to be on the safe side. +- */ +- if (unlikely(ret <= 0)) { +- state->reqs[0] = kmem_cache_alloc(req_cachep, gfp); +- if (!state->reqs[0]) +- return NULL; +- ret = 1; +- } +- +- for (i = 0; i < ret; i++) +- io_preinit_req(state->reqs[i], ctx); +- state->free_reqs = ret; +-got_req: +- state->free_reqs--; +- return state->reqs[state->free_reqs]; +-} +- +-static inline void io_put_file(struct file *file) +-{ +- if (file) +- fput(file); +-} +- +-static void io_dismantle_req(struct io_kiocb *req) +-{ +- unsigned int flags = req->flags; +- +- if (io_req_needs_clean(req)) +- io_clean_op(req); +- if (!(flags & REQ_F_FIXED_FILE)) +- io_put_file(req->file); +- if (req->fixed_rsrc_refs) +- percpu_ref_put(req->fixed_rsrc_refs); +- if (req->async_data) { +- kfree(req->async_data); +- req->async_data = NULL; +- } +-} +- +-static void __io_free_req(struct io_kiocb *req) +-{ +- struct io_ring_ctx *ctx = req->ctx; +- +- io_dismantle_req(req); +- io_put_task(req->task, 1); +- +- spin_lock(&ctx->completion_lock); +- list_add(&req->inflight_entry, &ctx->locked_free_list); +- ctx->locked_free_nr++; +- spin_unlock(&ctx->completion_lock); +- +- percpu_ref_put(&ctx->refs); +-} +- +-static inline void io_remove_next_linked(struct io_kiocb *req) +-{ +- struct io_kiocb *nxt = req->link; +- +- req->link = nxt->link; +- nxt->link = NULL; +-} +- +-static bool io_kill_linked_timeout(struct io_kiocb *req) +- __must_hold(&req->ctx->completion_lock) +- __must_hold(&req->ctx->timeout_lock) +-{ +- struct io_kiocb *link = req->link; +- +- if (link && link->opcode == IORING_OP_LINK_TIMEOUT) { +- struct io_timeout_data *io = link->async_data; +- +- io_remove_next_linked(req); +- link->timeout.head = NULL; +- if (hrtimer_try_to_cancel(&io->timer) != -1) { +- list_del(&link->timeout.list); +- io_cqring_fill_event(link->ctx, link->user_data, +- -ECANCELED, 0); +- io_put_req_deferred(link); +- return true; - } +- } +- return false; +-} - - rsp->SessionFlags = SMB2_SESSION_FLAG_IS_GUEST_LE; - } else { - struct authenticate_message *authblob; -@@ -1455,39 +1480,39 @@ static int ntlm_authenticate(struct ksmbd_work *work) - ksmbd_debug(SMB, "authentication failed\n"); - return -EPERM; - } -+ } - -- /* -- * If session state is SMB2_SESSION_VALID, We can assume -- * that it is reauthentication. And the user/password -- * has been verified, so return it here. -- */ -- if (sess->state == SMB2_SESSION_VALID) { -- if (conn->binding) -- goto binding_session; -- return 0; +-static void io_fail_links(struct io_kiocb *req) +- __must_hold(&req->ctx->completion_lock) +-{ +- struct io_kiocb *nxt, *link = req->link; +- +- req->link = NULL; +- while (link) { +- long res = -ECANCELED; +- +- if (link->flags & REQ_F_FAIL) +- res = link->result; +- +- nxt = link->link; +- link->link = NULL; +- +- trace_io_uring_fail_link(req, link); +- io_cqring_fill_event(link->ctx, link->user_data, res, 0); +- io_put_req_deferred(link); +- link = nxt; +- } +-} +- +-static bool io_disarm_next(struct io_kiocb *req) +- __must_hold(&req->ctx->completion_lock) +-{ +- bool posted = false; +- +- if (req->flags & REQ_F_ARM_LTIMEOUT) { +- struct io_kiocb *link = req->link; +- +- req->flags &= ~REQ_F_ARM_LTIMEOUT; +- if (link && link->opcode == IORING_OP_LINK_TIMEOUT) { +- io_remove_next_linked(req); +- io_cqring_fill_event(link->ctx, link->user_data, +- -ECANCELED, 0); +- io_put_req_deferred(link); +- posted = true; - } -+ /* -+ * If session state is SMB2_SESSION_VALID, We can assume -+ * that it is reauthentication. And the user/password -+ * has been verified, so return it here. -+ */ -+ if (sess->state == SMB2_SESSION_VALID) { -+ if (conn->binding) -+ goto binding_session; -+ return 0; -+ } - -- if ((conn->sign || server_conf.enforced_signing) || -- (req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED)) -- sess->sign = true; -+ if ((rsp->SessionFlags != SMB2_SESSION_FLAG_IS_GUEST_LE && -+ (conn->sign || server_conf.enforced_signing)) || -+ (req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED)) -+ sess->sign = true; - -- if (conn->vals->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION && -- conn->ops->generate_encryptionkey && -- !(req->Flags & SMB2_SESSION_REQ_FLAG_BINDING)) { -- rc = conn->ops->generate_encryptionkey(sess); -- if (rc) { -- ksmbd_debug(SMB, -- "SMB3 encryption key generation failed\n"); -- return -EINVAL; +- } else if (req->flags & REQ_F_LINK_TIMEOUT) { +- struct io_ring_ctx *ctx = req->ctx; +- +- spin_lock_irq(&ctx->timeout_lock); +- posted = io_kill_linked_timeout(req); +- spin_unlock_irq(&ctx->timeout_lock); +- } +- if (unlikely((req->flags & REQ_F_FAIL) && +- !(req->flags & REQ_F_HARDLINK))) { +- posted |= (req->link != NULL); +- io_fail_links(req); +- } +- return posted; +-} +- +-static struct io_kiocb *__io_req_find_next(struct io_kiocb *req) +-{ +- struct io_kiocb *nxt; +- +- /* +- * If LINK is set, we have dependent requests in this chain. If we +- * didn't fail this request, queue the first one up, moving any other +- * dependencies to the next request. In case of failure, fail the rest +- * of the chain. +- */ +- if (req->flags & IO_DISARM_MASK) { +- struct io_ring_ctx *ctx = req->ctx; +- bool posted; +- +- spin_lock(&ctx->completion_lock); +- posted = io_disarm_next(req); +- if (posted) +- io_commit_cqring(req->ctx); +- spin_unlock(&ctx->completion_lock); +- if (posted) +- io_cqring_ev_posted(ctx); +- } +- nxt = req->link; +- req->link = NULL; +- return nxt; +-} +- +-static inline struct io_kiocb *io_req_find_next(struct io_kiocb *req) +-{ +- if (likely(!(req->flags & (REQ_F_LINK|REQ_F_HARDLINK)))) +- return NULL; +- return __io_req_find_next(req); +-} +- +-static void ctx_flush_and_put(struct io_ring_ctx *ctx, bool *locked) +-{ +- if (!ctx) +- return; +- if (*locked) { +- if (ctx->submit_state.compl_nr) +- io_submit_flush_completions(ctx); +- mutex_unlock(&ctx->uring_lock); +- *locked = false; +- } +- percpu_ref_put(&ctx->refs); +-} +- +-static void tctx_task_work(struct callback_head *cb) +-{ +- bool locked = false; +- struct io_ring_ctx *ctx = NULL; +- struct io_uring_task *tctx = container_of(cb, struct io_uring_task, +- task_work); +- +- while (1) { +- struct io_wq_work_node *node; +- +- if (!tctx->task_list.first && locked && ctx->submit_state.compl_nr) +- io_submit_flush_completions(ctx); +- +- spin_lock_irq(&tctx->task_lock); +- node = tctx->task_list.first; +- INIT_WQ_LIST(&tctx->task_list); +- if (!node) +- tctx->task_running = false; +- spin_unlock_irq(&tctx->task_lock); +- if (!node) +- break; +- +- do { +- struct io_wq_work_node *next = node->next; +- struct io_kiocb *req = container_of(node, struct io_kiocb, +- io_task_work.node); +- +- if (req->ctx != ctx) { +- ctx_flush_and_put(ctx, &locked); +- ctx = req->ctx; +- /* if not contended, grab and improve batching */ +- locked = mutex_trylock(&ctx->uring_lock); +- percpu_ref_get(&ctx->refs); - } -- sess->enc = true; -- rsp->SessionFlags = SMB2_SESSION_FLAG_ENCRYPT_DATA_LE; -- /* -- * signing is disable if encryption is enable -- * on this session -- */ -- sess->sign = false; -+ if (smb3_encryption_negotiated(conn) && -+ !(req->Flags & SMB2_SESSION_REQ_FLAG_BINDING)) { -+ rc = conn->ops->generate_encryptionkey(sess); -+ if (rc) { -+ ksmbd_debug(SMB, -+ "SMB3 encryption key generation failed\n"); -+ return -EINVAL; - } -+ sess->enc = true; -+ rsp->SessionFlags = SMB2_SESSION_FLAG_ENCRYPT_DATA_LE; -+ /* -+ * signing is disable if encryption is enable -+ * on this session -+ */ -+ sess->sign = false; - } - - binding_session: -@@ -1562,8 +1587,7 @@ static int krb5_authenticate(struct ksmbd_work *work) - (req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED)) - sess->sign = true; - -- if ((conn->vals->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION) && -- conn->ops->generate_encryptionkey) { -+ if (smb3_encryption_negotiated(conn)) { - retval = conn->ops->generate_encryptionkey(sess); - if (retval) { - ksmbd_debug(SMB, -@@ -1700,8 +1724,10 @@ int smb2_sess_setup(struct ksmbd_work *work) - negblob_off = le16_to_cpu(req->SecurityBufferOffset); - negblob_len = le16_to_cpu(req->SecurityBufferLength); - if (negblob_off < (offsetof(struct smb2_sess_setup_req, Buffer) - 4) || -- negblob_len < offsetof(struct negotiate_message, NegotiateFlags)) -- return -EINVAL; -+ negblob_len < offsetof(struct negotiate_message, NegotiateFlags)) { -+ rc = -EINVAL; -+ goto out_err; -+ } - - negblob = (struct negotiate_message *)((char *)&req->hdr.ProtocolId + - negblob_off); -@@ -1906,8 +1932,9 @@ out_err1: - rsp->hdr.Status = STATUS_SUCCESS; - rc = 0; - break; -+ case -ENOENT: - case KSMBD_TREE_CONN_STATUS_NO_SHARE: -- rsp->hdr.Status = STATUS_BAD_NETWORK_PATH; -+ rsp->hdr.Status = STATUS_BAD_NETWORK_NAME; - break; - case -ENOMEM: - case KSMBD_TREE_CONN_STATUS_NOMEM: -@@ -2019,6 +2046,7 @@ int smb2_tree_disconnect(struct ksmbd_work *work) - - ksmbd_close_tree_conn_fds(work); - ksmbd_tree_conn_disconnect(sess, tcon); -+ work->tcon = NULL; - return 0; - } - -@@ -2291,15 +2319,15 @@ static int smb2_remove_smb_xattrs(struct path *path) - name += strlen(name) + 1) { - ksmbd_debug(SMB, "%s, len %zd\n", name, strlen(name)); - -- if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) && -- strncmp(&name[XATTR_USER_PREFIX_LEN], DOS_ATTRIBUTE_PREFIX, -- DOS_ATTRIBUTE_PREFIX_LEN) && -- strncmp(&name[XATTR_USER_PREFIX_LEN], STREAM_PREFIX, STREAM_PREFIX_LEN)) +- req->io_task_work.func(req, &locked); +- node = next; +- } while (node); +- +- cond_resched(); +- } +- +- ctx_flush_and_put(ctx, &locked); +-} +- +-static void io_req_task_work_add(struct io_kiocb *req) +-{ +- struct task_struct *tsk = req->task; +- struct io_uring_task *tctx = tsk->io_uring; +- enum task_work_notify_mode notify; +- struct io_wq_work_node *node; +- unsigned long flags; +- bool running; +- +- WARN_ON_ONCE(!tctx); +- +- spin_lock_irqsave(&tctx->task_lock, flags); +- wq_list_add_tail(&req->io_task_work.node, &tctx->task_list); +- running = tctx->task_running; +- if (!running) +- tctx->task_running = true; +- spin_unlock_irqrestore(&tctx->task_lock, flags); +- +- /* task_work already pending, we're done */ +- if (running) +- return; +- +- /* +- * SQPOLL kernel thread doesn't need notification, just a wakeup. For +- * all other cases, use TWA_SIGNAL unconditionally to ensure we're +- * processing task_work. There's no reliable way to tell if TWA_RESUME +- * will do the job. +- */ +- notify = (req->ctx->flags & IORING_SETUP_SQPOLL) ? TWA_NONE : TWA_SIGNAL; +- if (!task_work_add(tsk, &tctx->task_work, notify)) { +- wake_up_process(tsk); +- return; +- } +- +- spin_lock_irqsave(&tctx->task_lock, flags); +- tctx->task_running = false; +- node = tctx->task_list.first; +- INIT_WQ_LIST(&tctx->task_list); +- spin_unlock_irqrestore(&tctx->task_lock, flags); +- +- while (node) { +- req = container_of(node, struct io_kiocb, io_task_work.node); +- node = node->next; +- if (llist_add(&req->io_task_work.fallback_node, +- &req->ctx->fallback_llist)) +- schedule_delayed_work(&req->ctx->fallback_work, 1); +- } +-} +- +-static void io_req_task_cancel(struct io_kiocb *req, bool *locked) +-{ +- struct io_ring_ctx *ctx = req->ctx; +- +- /* not needed for normal modes, but SQPOLL depends on it */ +- io_tw_lock(ctx, locked); +- io_req_complete_failed(req, req->result); +-} +- +-static void io_req_task_submit(struct io_kiocb *req, bool *locked) +-{ +- struct io_ring_ctx *ctx = req->ctx; +- +- io_tw_lock(ctx, locked); +- /* req->task == current here, checking PF_EXITING is safe */ +- if (likely(!(req->task->flags & PF_EXITING))) +- __io_queue_sqe(req); +- else +- io_req_complete_failed(req, -EFAULT); +-} +- +-static void io_req_task_queue_fail(struct io_kiocb *req, int ret) +-{ +- req->result = ret; +- req->io_task_work.func = io_req_task_cancel; +- io_req_task_work_add(req); +-} +- +-static void io_req_task_queue(struct io_kiocb *req) +-{ +- req->io_task_work.func = io_req_task_submit; +- io_req_task_work_add(req); +-} +- +-static void io_req_task_queue_reissue(struct io_kiocb *req) +-{ +- req->io_task_work.func = io_queue_async_work; +- io_req_task_work_add(req); +-} +- +-static inline void io_queue_next(struct io_kiocb *req) +-{ +- struct io_kiocb *nxt = io_req_find_next(req); +- +- if (nxt) +- io_req_task_queue(nxt); +-} +- +-static void io_free_req(struct io_kiocb *req) +-{ +- io_queue_next(req); +- __io_free_req(req); +-} +- +-static void io_free_req_work(struct io_kiocb *req, bool *locked) +-{ +- io_free_req(req); +-} +- +-struct req_batch { +- struct task_struct *task; +- int task_refs; +- int ctx_refs; +-}; +- +-static inline void io_init_req_batch(struct req_batch *rb) +-{ +- rb->task_refs = 0; +- rb->ctx_refs = 0; +- rb->task = NULL; +-} +- +-static void io_req_free_batch_finish(struct io_ring_ctx *ctx, +- struct req_batch *rb) +-{ +- if (rb->ctx_refs) +- percpu_ref_put_many(&ctx->refs, rb->ctx_refs); +- if (rb->task) +- io_put_task(rb->task, rb->task_refs); +-} +- +-static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req, +- struct io_submit_state *state) +-{ +- io_queue_next(req); +- io_dismantle_req(req); +- +- if (req->task != rb->task) { +- if (rb->task) +- io_put_task(rb->task, rb->task_refs); +- rb->task = req->task; +- rb->task_refs = 0; +- } +- rb->task_refs++; +- rb->ctx_refs++; +- +- if (state->free_reqs != ARRAY_SIZE(state->reqs)) +- state->reqs[state->free_reqs++] = req; +- else +- list_add(&req->inflight_entry, &state->free_list); +-} +- +-static void io_submit_flush_completions(struct io_ring_ctx *ctx) +- __must_hold(&ctx->uring_lock) +-{ +- struct io_submit_state *state = &ctx->submit_state; +- int i, nr = state->compl_nr; +- struct req_batch rb; +- +- spin_lock(&ctx->completion_lock); +- for (i = 0; i < nr; i++) { +- struct io_kiocb *req = state->compl_reqs[i]; +- +- __io_cqring_fill_event(ctx, req->user_data, req->result, +- req->compl.cflags); +- } +- io_commit_cqring(ctx); +- spin_unlock(&ctx->completion_lock); +- io_cqring_ev_posted(ctx); +- +- io_init_req_batch(&rb); +- for (i = 0; i < nr; i++) { +- struct io_kiocb *req = state->compl_reqs[i]; +- +- if (req_ref_put_and_test(req)) +- io_req_free_batch(&rb, req, &ctx->submit_state); +- } +- +- io_req_free_batch_finish(ctx, &rb); +- state->compl_nr = 0; +-} +- +-/* +- * Drop reference to request, return next in chain (if there is one) if this +- * was the last reference to this request. +- */ +-static inline struct io_kiocb *io_put_req_find_next(struct io_kiocb *req) +-{ +- struct io_kiocb *nxt = NULL; +- +- if (req_ref_put_and_test(req)) { +- nxt = io_req_find_next(req); +- __io_free_req(req); +- } +- return nxt; +-} +- +-static inline void io_put_req(struct io_kiocb *req) +-{ +- if (req_ref_put_and_test(req)) +- io_free_req(req); +-} +- +-static inline void io_put_req_deferred(struct io_kiocb *req) +-{ +- if (req_ref_put_and_test(req)) { +- req->io_task_work.func = io_free_req_work; +- io_req_task_work_add(req); +- } +-} +- +-static unsigned io_cqring_events(struct io_ring_ctx *ctx) +-{ +- /* See comment at the top of this file */ +- smp_rmb(); +- return __io_cqring_events(ctx); +-} +- +-static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx) +-{ +- struct io_rings *rings = ctx->rings; +- +- /* make sure SQ entry isn't read before tail */ +- return smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head; +-} +- +-static unsigned int io_put_kbuf(struct io_kiocb *req, struct io_buffer *kbuf) +-{ +- unsigned int cflags; +- +- cflags = kbuf->bid << IORING_CQE_BUFFER_SHIFT; +- cflags |= IORING_CQE_F_BUFFER; +- req->flags &= ~REQ_F_BUFFER_SELECTED; +- kfree(kbuf); +- return cflags; +-} +- +-static inline unsigned int io_put_rw_kbuf(struct io_kiocb *req) +-{ +- struct io_buffer *kbuf; +- +- if (likely(!(req->flags & REQ_F_BUFFER_SELECTED))) +- return 0; +- kbuf = (struct io_buffer *) (unsigned long) req->rw.addr; +- return io_put_kbuf(req, kbuf); +-} +- +-static inline bool io_run_task_work(void) +-{ +- if (test_thread_flag(TIF_NOTIFY_SIGNAL) || current->task_works) { +- __set_current_state(TASK_RUNNING); +- tracehook_notify_signal(); +- return true; +- } +- +- return false; +-} +- +-/* +- * Find and free completed poll iocbs +- */ +-static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, +- struct list_head *done) +-{ +- struct req_batch rb; +- struct io_kiocb *req; +- +- /* order with ->result store in io_complete_rw_iopoll() */ +- smp_rmb(); +- +- io_init_req_batch(&rb); +- while (!list_empty(done)) { +- req = list_first_entry(done, struct io_kiocb, inflight_entry); +- list_del(&req->inflight_entry); +- +- __io_cqring_fill_event(ctx, req->user_data, req->result, +- io_put_rw_kbuf(req)); +- (*nr_events)++; +- +- if (req_ref_put_and_test(req)) +- io_req_free_batch(&rb, req, &ctx->submit_state); +- } +- +- io_commit_cqring(ctx); +- io_cqring_ev_posted_iopoll(ctx); +- io_req_free_batch_finish(ctx, &rb); +-} +- +-static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, +- long min) +-{ +- struct io_kiocb *req, *tmp; +- LIST_HEAD(done); +- bool spin; +- +- /* +- * Only spin for completions if we don't have multiple devices hanging +- * off our complete list, and we're under the requested amount. +- */ +- spin = !ctx->poll_multi_queue && *nr_events < min; +- +- list_for_each_entry_safe(req, tmp, &ctx->iopoll_list, inflight_entry) { +- struct kiocb *kiocb = &req->rw.kiocb; +- int ret; +- +- /* +- * Move completed and retryable entries to our local lists. +- * If we find a request that requires polling, break out +- * and complete those lists first, if we have entries there. +- */ +- if (READ_ONCE(req->iopoll_completed)) { +- list_move_tail(&req->inflight_entry, &done); - continue; +- } +- if (!list_empty(&done)) +- break; - -- err = ksmbd_vfs_remove_xattr(user_ns, path->dentry, name); -- if (err) -- ksmbd_debug(SMB, "remove xattr failed : %s\n", name); -+ if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) && -+ !strncmp(&name[XATTR_USER_PREFIX_LEN], STREAM_PREFIX, -+ STREAM_PREFIX_LEN)) { -+ err = ksmbd_vfs_remove_xattr(user_ns, path->dentry, -+ name); -+ if (err) -+ ksmbd_debug(SMB, "remove xattr failed : %s\n", -+ name); -+ } - } - out: - kvfree(xattr_list); -@@ -2670,7 +2698,7 @@ int smb2_open(struct ksmbd_work *work) - (struct create_posix *)context; - if (le16_to_cpu(context->DataOffset) + - le32_to_cpu(context->DataLength) < -- sizeof(struct create_posix)) { -+ sizeof(struct create_posix) - 4) { - rc = -EINVAL; - goto err_out1; - } -@@ -2955,13 +2983,17 @@ int smb2_open(struct ksmbd_work *work) - goto err_out; - - rc = build_sec_desc(user_ns, -- pntsd, NULL, -+ pntsd, NULL, 0, - OWNER_SECINFO | - GROUP_SECINFO | - DACL_SECINFO, - &pntsd_size, &fattr); - posix_acl_release(fattr.cf_acls); - posix_acl_release(fattr.cf_dacls); -+ if (rc) { -+ kfree(pntsd); -+ goto err_out; -+ } - - rc = ksmbd_vfs_set_sd_xattr(conn, - user_ns, -@@ -3398,9 +3430,9 @@ static int smb2_populate_readdir_entry(struct ksmbd_conn *conn, int info_level, - goto free_conv_name; - } - -- struct_sz = readdir_info_level_struct_sz(info_level); -- next_entry_offset = ALIGN(struct_sz - 1 + conv_len, -- KSMBD_DIR_INFO_ALIGNMENT); -+ struct_sz = readdir_info_level_struct_sz(info_level) - 1 + conv_len; -+ next_entry_offset = ALIGN(struct_sz, KSMBD_DIR_INFO_ALIGNMENT); -+ d_info->last_entry_off_align = next_entry_offset - struct_sz; - - if (next_entry_offset > d_info->out_buf_len) { - d_info->out_buf_len = 0; -@@ -3771,11 +3803,6 @@ static int __query_dir(struct dir_context *ctx, const char *name, int namlen, - return 0; - } - --static void restart_ctx(struct dir_context *ctx) +- ret = kiocb->ki_filp->f_op->iopoll(kiocb, spin); +- if (unlikely(ret < 0)) +- return ret; +- else if (ret) +- spin = false; +- +- /* iopoll may have completed current req */ +- if (READ_ONCE(req->iopoll_completed)) +- list_move_tail(&req->inflight_entry, &done); +- } +- +- if (!list_empty(&done)) +- io_iopoll_complete(ctx, nr_events, &done); +- +- return 0; +-} +- +-/* +- * We can't just wait for polled events to come to us, we have to actively +- * find and complete them. +- */ +-static void io_iopoll_try_reap_events(struct io_ring_ctx *ctx) -{ -- ctx->pos = 0; +- if (!(ctx->flags & IORING_SETUP_IOPOLL)) +- return; +- +- mutex_lock(&ctx->uring_lock); +- while (!list_empty(&ctx->iopoll_list)) { +- unsigned int nr_events = 0; +- +- io_do_iopoll(ctx, &nr_events, 0); +- +- /* let it sleep and repeat later if can't complete a request */ +- if (nr_events == 0) +- break; +- /* +- * Ensure we allow local-to-the-cpu processing to take place, +- * in this case we need to ensure that we reap all events. +- * Also let task_work, etc. to progress by releasing the mutex +- */ +- if (need_resched()) { +- mutex_unlock(&ctx->uring_lock); +- cond_resched(); +- mutex_lock(&ctx->uring_lock); +- } +- } +- mutex_unlock(&ctx->uring_lock); -} - - static int verify_info_level(int info_level) - { - switch (info_level) { -@@ -3794,6 +3821,15 @@ static int verify_info_level(int info_level) - return 0; - } - -+static int smb2_resp_buf_len(struct ksmbd_work *work, unsigned short hdr2_len) -+{ -+ int free_len; -+ -+ free_len = (int)(work->response_sz - -+ (get_rfc1002_len(work->response_buf) + 4)) - hdr2_len; -+ return free_len; -+} -+ - static int smb2_calc_max_out_buf_len(struct ksmbd_work *work, - unsigned short hdr2_len, - unsigned int out_buf_len) -@@ -3803,9 +3839,7 @@ static int smb2_calc_max_out_buf_len(struct ksmbd_work *work, - if (out_buf_len > work->conn->vals->max_trans_size) - return -EINVAL; - -- free_len = (int)(work->response_sz - -- (get_rfc1002_len(work->response_buf) + 4)) - -- hdr2_len; -+ free_len = smb2_resp_buf_len(work, hdr2_len); - if (free_len < 0) - return -EINVAL; - -@@ -3882,7 +3916,6 @@ int smb2_query_dir(struct ksmbd_work *work) - if (srch_flag & SMB2_REOPEN || srch_flag & SMB2_RESTART_SCANS) { - ksmbd_debug(SMB, "Restart directory scan\n"); - generic_file_llseek(dir_fp->filp, 0, SEEK_SET); -- restart_ctx(&dir_fp->readdir_data.ctx); - } - - memset(&d_info, 0, sizeof(struct ksmbd_dir_info)); -@@ -3923,11 +3956,15 @@ int smb2_query_dir(struct ksmbd_work *work) - set_ctx_actor(&dir_fp->readdir_data.ctx, __query_dir); - - rc = iterate_dir(dir_fp->filp, &dir_fp->readdir_data.ctx); -- if (rc == 0) -- restart_ctx(&dir_fp->readdir_data.ctx); -- if (rc == -ENOSPC) -+ /* -+ * req->OutputBufferLength is too small to contain even one entry. -+ * In this case, it immediately returns OutputBufferLength 0 to client. -+ */ -+ if (!d_info.out_buf_len && !d_info.num_entry) -+ goto no_buf_len; -+ if (rc > 0 || rc == -ENOSPC) - rc = 0; -- if (rc) -+ else if (rc) - goto err_out; - - d_info.wptr = d_info.rptr; -@@ -3949,9 +3986,12 @@ int smb2_query_dir(struct ksmbd_work *work) - rsp->Buffer[0] = 0; - inc_rfc1001_len(rsp_org, 9); - } else { -+no_buf_len: - ((struct file_directory_info *) - ((char *)rsp->Buffer + d_info.last_entry_offset)) - ->NextEntryOffset = 0; -+ if (d_info.data_count >= d_info.last_entry_off_align) -+ d_info.data_count -= d_info.last_entry_off_align; - - rsp->StructureSize = cpu_to_le16(9); - rsp->OutputBufferOffset = cpu_to_le16(72); -@@ -3981,6 +4021,8 @@ err_out2: - rsp->hdr.Status = STATUS_NO_MEMORY; - else if (rc == -EFAULT) - rsp->hdr.Status = STATUS_INVALID_INFO_CLASS; -+ else if (rc == -EIO) -+ rsp->hdr.Status = STATUS_FILE_CORRUPT_ERROR; - if (!rsp->hdr.Status) - rsp->hdr.Status = STATUS_UNEXPECTED_IO_ERROR; - -@@ -4450,6 +4492,12 @@ static void get_file_stream_info(struct ksmbd_work *work, - &stat); - file_info = (struct smb2_file_stream_info *)rsp->Buffer; - -+ buf_free_len = -+ smb2_calc_max_out_buf_len(work, 8, -+ le32_to_cpu(req->OutputBufferLength)); -+ if (buf_free_len < 0) -+ goto out; -+ - xattr_list_len = ksmbd_vfs_listxattr(path->dentry, &xattr_list); - if (xattr_list_len < 0) { - goto out; -@@ -4458,12 +4506,6 @@ static void get_file_stream_info(struct ksmbd_work *work, - goto out; - } - -- buf_free_len = -- smb2_calc_max_out_buf_len(work, 8, -- le32_to_cpu(req->OutputBufferLength)); -- if (buf_free_len < 0) +-static int io_iopoll_check(struct io_ring_ctx *ctx, long min) +-{ +- unsigned int nr_events = 0; +- int ret = 0; +- +- /* +- * We disallow the app entering submit/complete with polling, but we +- * still need to lock the ring to prevent racing with polled issue +- * that got punted to a workqueue. +- */ +- mutex_lock(&ctx->uring_lock); +- /* +- * Don't enter poll loop if we already have events pending. +- * If we do, we can potentially be spinning for commands that +- * already triggered a CQE (eg in error). +- */ +- if (test_bit(0, &ctx->check_cq_overflow)) +- __io_cqring_overflow_flush(ctx, false); +- if (io_cqring_events(ctx)) - goto out; +- do { +- /* +- * If a submit got punted to a workqueue, we can have the +- * application entering polling for a command before it gets +- * issued. That app will hold the uring_lock for the duration +- * of the poll right here, so we need to take a breather every +- * now and then to ensure that the issue has a chance to add +- * the poll to the issued list. Otherwise we can spin here +- * forever, while the workqueue is stuck trying to acquire the +- * very same mutex. +- */ +- if (list_empty(&ctx->iopoll_list)) { +- u32 tail = ctx->cached_cq_tail; - - while (idx < xattr_list_len) { - stream_name = xattr_list + idx; - streamlen = strlen(stream_name); -@@ -4489,8 +4531,10 @@ static void get_file_stream_info(struct ksmbd_work *work, - ":%s", &stream_name[XATTR_NAME_STREAM_LEN]); - - next = sizeof(struct smb2_file_stream_info) + streamlen * 2; -- if (next > buf_free_len) -+ if (next > buf_free_len) { -+ kfree(stream_buf); - break; -+ } - - file_info = (struct smb2_file_stream_info *)&rsp->Buffer[nbytes]; - streamlen = smbConvertToUTF16((__le16 *)file_info->StreamName, -@@ -4507,6 +4551,7 @@ static void get_file_stream_info(struct ksmbd_work *work, - file_info->NextEntryOffset = cpu_to_le32(next); - } - -+out: - if (!S_ISDIR(stat.mode) && - buf_free_len >= sizeof(struct smb2_file_stream_info) + 7 * 2) { - file_info = (struct smb2_file_stream_info *) -@@ -4515,14 +4560,13 @@ static void get_file_stream_info(struct ksmbd_work *work, - "::$DATA", 7, conn->local_nls, 0); - streamlen *= 2; - file_info->StreamNameLength = cpu_to_le32(streamlen); -- file_info->StreamSize = 0; -- file_info->StreamAllocationSize = 0; -+ file_info->StreamSize = cpu_to_le64(stat.size); -+ file_info->StreamAllocationSize = cpu_to_le64(stat.blocks << 9); - nbytes += sizeof(struct smb2_file_stream_info) + streamlen; - } - - /* last entry offset should be 0 */ - file_info->NextEntryOffset = 0; +- mutex_unlock(&ctx->uring_lock); +- io_run_task_work(); +- mutex_lock(&ctx->uring_lock); +- +- /* some requests don't go through iopoll_list */ +- if (tail != ctx->cached_cq_tail || +- list_empty(&ctx->iopoll_list)) +- break; +- } +- ret = io_do_iopoll(ctx, &nr_events, min); +- } while (!ret && nr_events < min && !need_resched()); -out: - kvfree(xattr_list); - - rsp->OutputBufferLength = cpu_to_le32(nbytes); -@@ -4891,11 +4935,18 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work, - { - struct filesystem_vol_info *info; - size_t sz; -+ unsigned int serial_crc = 0; - - info = (struct filesystem_vol_info *)(rsp->Buffer); - info->VolumeCreationTime = 0; -+ serial_crc = crc32_le(serial_crc, share->name, -+ strlen(share->name)); -+ serial_crc = crc32_le(serial_crc, share->path, -+ strlen(share->path)); -+ serial_crc = crc32_le(serial_crc, ksmbd_netbios_name(), -+ strlen(ksmbd_netbios_name())); - /* Taking dummy value of serial number*/ -- info->SerialNumber = cpu_to_le32(0xbc3ac512); -+ info->SerialNumber = cpu_to_le32(serial_crc); - len = smbConvertToUTF16((__le16 *)info->VolumeLabel, - share->name, PATH_MAX, - conn->local_nls, 0); -@@ -4963,15 +5014,17 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work, - case FS_SECTOR_SIZE_INFORMATION: - { - struct smb3_fs_ss_info *info; -+ unsigned int sector_size = -+ min_t(unsigned int, path.mnt->mnt_sb->s_blocksize, 4096); - - info = (struct smb3_fs_ss_info *)(rsp->Buffer); - -- info->LogicalBytesPerSector = cpu_to_le32(stfs.f_bsize); -+ info->LogicalBytesPerSector = cpu_to_le32(sector_size); - info->PhysicalBytesPerSectorForAtomicity = -- cpu_to_le32(stfs.f_bsize); -- info->PhysicalBytesPerSectorForPerf = cpu_to_le32(stfs.f_bsize); -+ cpu_to_le32(sector_size); -+ info->PhysicalBytesPerSectorForPerf = cpu_to_le32(sector_size); - info->FSEffPhysicalBytesPerSectorForAtomicity = -- cpu_to_le32(stfs.f_bsize); -+ cpu_to_le32(sector_size); - info->Flags = cpu_to_le32(SSINFO_FLAGS_ALIGNED_DEVICE | - SSINFO_FLAGS_PARTITION_ALIGNED_ON_DEVICE); - info->ByteOffsetForSectorAlignment = 0; -@@ -5045,15 +5098,15 @@ static int smb2_get_info_sec(struct ksmbd_work *work, - struct smb_ntsd *pntsd = (struct smb_ntsd *)rsp->Buffer, *ppntsd = NULL; - struct smb_fattr fattr = {{0}}; - struct inode *inode; -- __u32 secdesclen; -+ __u32 secdesclen = 0; - unsigned int id = KSMBD_NO_FID, pid = KSMBD_NO_FID; - int addition_info = le32_to_cpu(req->AdditionalInformation); -- int rc; -+ int rc = 0, ppntsd_size = 0; - - if (addition_info & ~(OWNER_SECINFO | GROUP_SECINFO | DACL_SECINFO | - PROTECTED_DACL_SECINFO | - UNPROTECTED_DACL_SECINFO)) { -- pr_err("Unsupported addition info: 0x%x)\n", -+ ksmbd_debug(SMB, "Unsupported addition info: 0x%x)\n", - addition_info); - - pntsd->revision = cpu_to_le16(1); -@@ -5094,11 +5147,14 @@ static int smb2_get_info_sec(struct ksmbd_work *work, - - if (test_share_config_flag(work->tcon->share_conf, - KSMBD_SHARE_FLAG_ACL_XATTR)) -- ksmbd_vfs_get_sd_xattr(work->conn, user_ns, -- fp->filp->f_path.dentry, &ppntsd); +- mutex_unlock(&ctx->uring_lock); +- return ret; +-} - -- rc = build_sec_desc(user_ns, pntsd, ppntsd, addition_info, -- &secdesclen, &fattr); -+ ppntsd_size = ksmbd_vfs_get_sd_xattr(work->conn, user_ns, -+ fp->filp->f_path.dentry, -+ &ppntsd); -+ -+ /* Check if sd buffer size exceeds response buffer size */ -+ if (smb2_resp_buf_len(work, 8) > ppntsd_size) -+ rc = build_sec_desc(user_ns, pntsd, ppntsd, ppntsd_size, -+ addition_info, &secdesclen, &fattr); - posix_acl_release(fattr.cf_acls); - posix_acl_release(fattr.cf_dacls); - kfree(ppntsd); -@@ -5734,8 +5790,10 @@ static int set_rename_info(struct ksmbd_work *work, struct ksmbd_file *fp, - if (parent_fp) { - if (parent_fp->daccess & FILE_DELETE_LE) { - pr_err("parent dir is opened with delete access\n"); -+ ksmbd_fd_put(work, parent_fp); - return -ESHARE; - } -+ ksmbd_fd_put(work, parent_fp); - } - next: - return smb2_rename(work, fp, user_ns, rename_info, -@@ -6427,10 +6485,8 @@ int smb2_write(struct ksmbd_work *work) - (offsetof(struct smb2_write_req, Buffer) - 4)) { - data_buf = (char *)&req->Buffer[0]; - } else { -- if ((u64)le16_to_cpu(req->DataOffset) + length > get_rfc1002_len(req)) { -- pr_err("invalid write data offset %u, smb_len %u\n", -- le16_to_cpu(req->DataOffset), -- get_rfc1002_len(req)); -+ if (le16_to_cpu(req->DataOffset) < -+ offsetof(struct smb2_write_req, Buffer)) { - err = -EINVAL; - goto out; - } -@@ -7312,7 +7368,7 @@ static int fsctl_validate_negotiate_info(struct ksmbd_conn *conn, - int ret = 0; - int dialect; - -- if (in_buf_len < sizeof(struct validate_negotiate_info_req) + -+ if (in_buf_len < offsetof(struct validate_negotiate_info_req, Dialects) + - le16_to_cpu(neg_req->DialectCount) * sizeof(__le16)) - return -EINVAL; - -@@ -7563,11 +7619,16 @@ int smb2_ioctl(struct ksmbd_work *work) - goto out; - } - -- if (in_buf_len < sizeof(struct validate_negotiate_info_req)) -- return -EINVAL; -+ if (in_buf_len < offsetof(struct validate_negotiate_info_req, -+ Dialects)) { -+ ret = -EINVAL; -+ goto out; -+ } - -- if (out_buf_len < sizeof(struct validate_negotiate_info_rsp)) -- return -EINVAL; -+ if (out_buf_len < sizeof(struct validate_negotiate_info_rsp)) { -+ ret = -EINVAL; -+ goto out; -+ } - - ret = fsctl_validate_negotiate_info(conn, - (struct validate_negotiate_info_req *)&req->Buffer[0], -@@ -7645,7 +7706,7 @@ int smb2_ioctl(struct ksmbd_work *work) - { - struct file_zero_data_information *zero_data; - struct ksmbd_file *fp; -- loff_t off, len; -+ loff_t off, len, bfz; - - if (!test_tree_conn_flag(work->tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) { - ksmbd_debug(SMB, -@@ -7662,19 +7723,26 @@ int smb2_ioctl(struct ksmbd_work *work) - zero_data = - (struct file_zero_data_information *)&req->Buffer[0]; - -- fp = ksmbd_lookup_fd_fast(work, id); -- if (!fp) { -- ret = -ENOENT; -+ off = le64_to_cpu(zero_data->FileOffset); -+ bfz = le64_to_cpu(zero_data->BeyondFinalZero); -+ if (off > bfz) { -+ ret = -EINVAL; - goto out; - } - -- off = le64_to_cpu(zero_data->FileOffset); -- len = le64_to_cpu(zero_data->BeyondFinalZero) - off; -+ len = bfz - off; -+ if (len) { -+ fp = ksmbd_lookup_fd_fast(work, id); -+ if (!fp) { -+ ret = -ENOENT; -+ goto out; -+ } - -- ret = ksmbd_vfs_zero_data(work, fp, off, len); -- ksmbd_fd_put(work, fp); -- if (ret < 0) -- goto out; -+ ret = ksmbd_vfs_zero_data(work, fp, off, len); -+ ksmbd_fd_put(work, fp); -+ if (ret < 0) -+ goto out; -+ } - break; - } - case FSCTL_QUERY_ALLOCATED_RANGES: -@@ -7748,14 +7816,24 @@ int smb2_ioctl(struct ksmbd_work *work) - src_off = le64_to_cpu(dup_ext->SourceFileOffset); - dst_off = le64_to_cpu(dup_ext->TargetFileOffset); - length = le64_to_cpu(dup_ext->ByteCount); -- cloned = vfs_clone_file_range(fp_in->filp, src_off, fp_out->filp, -- dst_off, length, 0); -+ /* -+ * XXX: It is not clear if FSCTL_DUPLICATE_EXTENTS_TO_FILE -+ * should fall back to vfs_copy_file_range(). This could be -+ * beneficial when re-exporting nfs/smb mount, but note that -+ * this can result in partial copy that returns an error status. -+ * If/when FSCTL_DUPLICATE_EXTENTS_TO_FILE_EX is implemented, -+ * fall back to vfs_copy_file_range(), should be avoided when -+ * the flag DUPLICATE_EXTENTS_DATA_EX_SOURCE_ATOMIC is set. -+ */ -+ cloned = vfs_clone_file_range(fp_in->filp, src_off, -+ fp_out->filp, dst_off, length, 0); - if (cloned == -EXDEV || cloned == -EOPNOTSUPP) { - ret = -EOPNOTSUPP; - goto dup_ext_out; - } else if (cloned != length) { - cloned = vfs_copy_file_range(fp_in->filp, src_off, -- fp_out->filp, dst_off, length, 0); -+ fp_out->filp, dst_off, -+ length, 0); - if (cloned != length) { - if (cloned < 0) - ret = cloned; -diff --git a/fs/ksmbd/smb2pdu.h b/fs/ksmbd/smb2pdu.h -index ff5a2f01d34ae..4f8574944ac19 100644 ---- a/fs/ksmbd/smb2pdu.h -+++ b/fs/ksmbd/smb2pdu.h -@@ -1647,6 +1647,7 @@ int init_smb3_11_server(struct ksmbd_conn *conn); - void init_smb2_max_read_size(unsigned int sz); - void init_smb2_max_write_size(unsigned int sz); - void init_smb2_max_trans_size(unsigned int sz); -+void init_smb2_max_credits(unsigned int sz); - - bool is_smb2_neg_cmd(struct ksmbd_work *work); - bool is_smb2_rsp(struct ksmbd_work *work); -diff --git a/fs/ksmbd/smb_common.c b/fs/ksmbd/smb_common.c -index 707490ab1f4c4..22f460984742f 100644 ---- a/fs/ksmbd/smb_common.c -+++ b/fs/ksmbd/smb_common.c -@@ -4,6 +4,8 @@ - * Copyright (C) 2018 Namjae Jeon <linkinjeon@kernel.org> - */ - -+#include <linux/user_namespace.h> -+ - #include "smb_common.h" - #include "server.h" - #include "misc.h" -@@ -140,8 +142,10 @@ int ksmbd_verify_smb_message(struct ksmbd_work *work) - - hdr = work->request_buf; - if (*(__le32 *)hdr->Protocol == SMB1_PROTO_NUMBER && -- hdr->Command == SMB_COM_NEGOTIATE) -+ hdr->Command == SMB_COM_NEGOTIATE) { -+ work->conn->outstanding_credits++; - return 0; -+ } - - return -EINVAL; - } -@@ -308,14 +312,17 @@ int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level, - for (i = 0; i < 2; i++) { - struct kstat kstat; - struct ksmbd_kstat ksmbd_kstat; -+ struct dentry *dentry; - - if (!dir->dot_dotdot[i]) { /* fill dot entry info */ - if (i == 0) { - d_info->name = "."; - d_info->name_len = 1; -+ dentry = dir->filp->f_path.dentry; - } else { - d_info->name = ".."; - d_info->name_len = 2; -+ dentry = dir->filp->f_path.dentry->d_parent; - } - - if (!match_pattern(d_info->name, d_info->name_len, -@@ -327,7 +334,7 @@ int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level, - ksmbd_kstat.kstat = &kstat; - ksmbd_vfs_fill_dentry_attrs(work, - user_ns, -- dir->filp->f_path.dentry->d_parent, -+ dentry, - &ksmbd_kstat); - rc = fn(conn, info_level, d_info, &ksmbd_kstat); - if (rc) -@@ -619,8 +626,8 @@ int ksmbd_override_fsids(struct ksmbd_work *work) - if (!cred) - return -ENOMEM; - -- cred->fsuid = make_kuid(current_user_ns(), uid); -- cred->fsgid = make_kgid(current_user_ns(), gid); -+ cred->fsuid = make_kuid(&init_user_ns, uid); -+ cred->fsgid = make_kgid(&init_user_ns, gid); - - gi = groups_alloc(0); - if (!gi) { -diff --git a/fs/ksmbd/smb_common.h b/fs/ksmbd/smb_common.h -index 6e79e7577f6b7..1eba8dabaf317 100644 ---- a/fs/ksmbd/smb_common.h -+++ b/fs/ksmbd/smb_common.h -@@ -412,6 +412,7 @@ struct smb_version_values { - __u32 max_read_size; - __u32 max_write_size; - __u32 max_trans_size; -+ __u32 max_credits; - __u32 large_lock_type; - __u32 exclusive_lock_type; - __u32 shared_lock_type; -diff --git a/fs/ksmbd/smbacl.c b/fs/ksmbd/smbacl.c -index bd792db326239..3781bca2c8fc4 100644 ---- a/fs/ksmbd/smbacl.c -+++ b/fs/ksmbd/smbacl.c -@@ -9,6 +9,7 @@ - #include <linux/fs.h> - #include <linux/slab.h> - #include <linux/string.h> -+#include <linux/mnt_idmapping.h> - - #include "smbacl.h" - #include "smb_common.h" -@@ -274,14 +275,7 @@ static int sid_to_id(struct user_namespace *user_ns, - uid_t id; - - id = le32_to_cpu(psid->sub_auth[psid->num_subauth - 1]); -- /* -- * Translate raw sid into kuid in the server's user -- * namespace. -- */ -- uid = make_kuid(&init_user_ns, id); +-static void kiocb_end_write(struct io_kiocb *req) +-{ +- /* +- * Tell lockdep we inherited freeze protection from submission +- * thread. +- */ +- if (req->flags & REQ_F_ISREG) { +- struct super_block *sb = file_inode(req->file)->i_sb; - -- /* If this is an idmapped mount, apply the idmapping. */ -- uid = kuid_from_mnt(user_ns, uid); -+ uid = mapped_kuid_user(user_ns, &init_user_ns, KUIDT_INIT(id)); - if (uid_valid(uid)) { - fattr->cf_uid = uid; - rc = 0; -@@ -291,14 +285,7 @@ static int sid_to_id(struct user_namespace *user_ns, - gid_t id; - - id = le32_to_cpu(psid->sub_auth[psid->num_subauth - 1]); +- __sb_writers_acquired(sb, SB_FREEZE_WRITE); +- sb_end_write(sb); +- } +-} +- +-#ifdef CONFIG_BLOCK +-static bool io_resubmit_prep(struct io_kiocb *req) +-{ +- struct io_async_rw *rw = req->async_data; +- +- if (!rw) +- return !io_req_prep_async(req); +- iov_iter_restore(&rw->iter, &rw->iter_state); +- return true; +-} +- +-static bool io_rw_should_reissue(struct io_kiocb *req) +-{ +- umode_t mode = file_inode(req->file)->i_mode; +- struct io_ring_ctx *ctx = req->ctx; +- +- if (!S_ISBLK(mode) && !S_ISREG(mode)) +- return false; +- if ((req->flags & REQ_F_NOWAIT) || (io_wq_current_is_worker() && +- !(ctx->flags & IORING_SETUP_IOPOLL))) +- return false; +- /* +- * If ref is dying, we might be running poll reap from the exit work. +- * Don't attempt to reissue from that path, just let it fail with +- * -EAGAIN. +- */ +- if (percpu_ref_is_dying(&ctx->refs)) +- return false; +- /* +- * Play it safe and assume not safe to re-import and reissue if we're +- * not in the original thread group (or in task context). +- */ +- if (!same_thread_group(req->task, current) || !in_task()) +- return false; +- return true; +-} +-#else +-static bool io_resubmit_prep(struct io_kiocb *req) +-{ +- return false; +-} +-static bool io_rw_should_reissue(struct io_kiocb *req) +-{ +- return false; +-} +-#endif +- +-static bool __io_complete_rw_common(struct io_kiocb *req, long res) +-{ +- if (req->rw.kiocb.ki_flags & IOCB_WRITE) +- kiocb_end_write(req); +- if (res != req->result) { +- if ((res == -EAGAIN || res == -EOPNOTSUPP) && +- io_rw_should_reissue(req)) { +- req->flags |= REQ_F_REISSUE; +- return true; +- } +- req_set_fail(req); +- req->result = res; +- } +- return false; +-} +- +-static void io_req_task_complete(struct io_kiocb *req, bool *locked) +-{ +- unsigned int cflags = io_put_rw_kbuf(req); +- long res = req->result; +- +- if (*locked) { +- struct io_ring_ctx *ctx = req->ctx; +- struct io_submit_state *state = &ctx->submit_state; +- +- io_req_complete_state(req, res, cflags); +- state->compl_reqs[state->compl_nr++] = req; +- if (state->compl_nr == ARRAY_SIZE(state->compl_reqs)) +- io_submit_flush_completions(ctx); +- } else { +- io_req_complete_post(req, res, cflags); +- } +-} +- +-static void __io_complete_rw(struct io_kiocb *req, long res, long res2, +- unsigned int issue_flags) +-{ +- if (__io_complete_rw_common(req, res)) +- return; +- __io_req_complete(req, issue_flags, req->result, io_put_rw_kbuf(req)); +-} +- +-static void io_complete_rw(struct kiocb *kiocb, long res, long res2) +-{ +- struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); +- +- if (__io_complete_rw_common(req, res)) +- return; +- req->result = res; +- req->io_task_work.func = io_req_task_complete; +- io_req_task_work_add(req); +-} +- +-static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) +-{ +- struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); +- +- if (kiocb->ki_flags & IOCB_WRITE) +- kiocb_end_write(req); +- if (unlikely(res != req->result)) { +- if (res == -EAGAIN && io_rw_should_reissue(req)) { +- req->flags |= REQ_F_REISSUE; +- return; +- } +- } +- +- WRITE_ONCE(req->result, res); +- /* order with io_iopoll_complete() checking ->result */ +- smp_wmb(); +- WRITE_ONCE(req->iopoll_completed, 1); +-} +- +-/* +- * After the iocb has been issued, it's safe to be found on the poll list. +- * Adding the kiocb to the list AFTER submission ensures that we don't +- * find it from a io_do_iopoll() thread before the issuer is done +- * accessing the kiocb cookie. +- */ +-static void io_iopoll_req_issued(struct io_kiocb *req) +-{ +- struct io_ring_ctx *ctx = req->ctx; +- const bool in_async = io_wq_current_is_worker(); +- +- /* workqueue context doesn't hold uring_lock, grab it now */ +- if (unlikely(in_async)) +- mutex_lock(&ctx->uring_lock); +- +- /* +- * Track whether we have multiple files in our lists. This will impact +- * how we do polling eventually, not spinning if we're on potentially +- * different devices. +- */ +- if (list_empty(&ctx->iopoll_list)) { +- ctx->poll_multi_queue = false; +- } else if (!ctx->poll_multi_queue) { +- struct io_kiocb *list_req; +- unsigned int queue_num0, queue_num1; +- +- list_req = list_first_entry(&ctx->iopoll_list, struct io_kiocb, +- inflight_entry); +- +- if (list_req->file != req->file) { +- ctx->poll_multi_queue = true; +- } else { +- queue_num0 = blk_qc_t_to_queue_num(list_req->rw.kiocb.ki_cookie); +- queue_num1 = blk_qc_t_to_queue_num(req->rw.kiocb.ki_cookie); +- if (queue_num0 != queue_num1) +- ctx->poll_multi_queue = true; +- } +- } +- +- /* +- * For fast devices, IO may have already completed. If it has, add +- * it to the front so we find it first. +- */ +- if (READ_ONCE(req->iopoll_completed)) +- list_add(&req->inflight_entry, &ctx->iopoll_list); +- else +- list_add_tail(&req->inflight_entry, &ctx->iopoll_list); +- +- if (unlikely(in_async)) { - /* -- * Translate raw sid into kgid in the server's user -- * namespace. +- * If IORING_SETUP_SQPOLL is enabled, sqes are either handle +- * in sq thread task context or in io worker task context. If +- * current task context is sq thread, we don't need to check +- * whether should wake up sq thread. - */ -- gid = make_kgid(&init_user_ns, id); +- if ((ctx->flags & IORING_SETUP_SQPOLL) && +- wq_has_sleeper(&ctx->sq_data->wait)) +- wake_up(&ctx->sq_data->wait); - -- /* If this is an idmapped mount, apply the idmapping. */ -- gid = kgid_from_mnt(user_ns, gid); -+ gid = mapped_kgid_user(user_ns, &init_user_ns, KGIDT_INIT(id)); - if (gid_valid(gid)) { - fattr->cf_gid = gid; - rc = 0; -@@ -703,6 +690,7 @@ posix_default_acl: - static void set_ntacl_dacl(struct user_namespace *user_ns, - struct smb_acl *pndacl, - struct smb_acl *nt_dacl, -+ unsigned int aces_size, - const struct smb_sid *pownersid, - const struct smb_sid *pgrpsid, - struct smb_fattr *fattr) -@@ -716,9 +704,19 @@ static void set_ntacl_dacl(struct user_namespace *user_ns, - if (nt_num_aces) { - ntace = (struct smb_ace *)((char *)nt_dacl + sizeof(struct smb_acl)); - for (i = 0; i < nt_num_aces; i++) { -- memcpy((char *)pndace + size, ntace, le16_to_cpu(ntace->size)); -- size += le16_to_cpu(ntace->size); -- ntace = (struct smb_ace *)((char *)ntace + le16_to_cpu(ntace->size)); -+ unsigned short nt_ace_size; -+ -+ if (offsetof(struct smb_ace, access_req) > aces_size) -+ break; -+ -+ nt_ace_size = le16_to_cpu(ntace->size); -+ if (nt_ace_size > aces_size) -+ break; -+ -+ memcpy((char *)pndace + size, ntace, nt_ace_size); -+ size += nt_ace_size; -+ aces_size -= nt_ace_size; -+ ntace = (struct smb_ace *)((char *)ntace + nt_ace_size); - num_aces++; - } - } -@@ -891,7 +889,7 @@ int parse_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd, - /* Convert permission bits from mode to equivalent CIFS ACL */ - int build_sec_desc(struct user_namespace *user_ns, - struct smb_ntsd *pntsd, struct smb_ntsd *ppntsd, -- int addition_info, __u32 *secdesclen, -+ int ppntsd_size, int addition_info, __u32 *secdesclen, - struct smb_fattr *fattr) - { - int rc = 0; -@@ -951,15 +949,25 @@ int build_sec_desc(struct user_namespace *user_ns, - - if (!ppntsd) { - set_mode_dacl(user_ns, dacl_ptr, fattr); -- } else if (!ppntsd->dacloffset) { -- goto out; - } else { - struct smb_acl *ppdacl_ptr; -+ unsigned int dacl_offset = le32_to_cpu(ppntsd->dacloffset); -+ int ppdacl_size, ntacl_size = ppntsd_size - dacl_offset; -+ -+ if (!dacl_offset || -+ (dacl_offset + sizeof(struct smb_acl) > ppntsd_size)) -+ goto out; -+ -+ ppdacl_ptr = (struct smb_acl *)((char *)ppntsd + dacl_offset); -+ ppdacl_size = le16_to_cpu(ppdacl_ptr->size); -+ if (ppdacl_size > ntacl_size || -+ ppdacl_size < sizeof(struct smb_acl)) -+ goto out; - -- ppdacl_ptr = (struct smb_acl *)((char *)ppntsd + -- le32_to_cpu(ppntsd->dacloffset)); - set_ntacl_dacl(user_ns, dacl_ptr, ppdacl_ptr, -- nowner_sid_ptr, ngroup_sid_ptr, fattr); -+ ntacl_size - sizeof(struct smb_acl), -+ nowner_sid_ptr, ngroup_sid_ptr, -+ fattr); - } - pntsd->dacloffset = cpu_to_le32(offset); - offset += le16_to_cpu(dacl_ptr->size); -@@ -993,24 +1001,31 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, - struct smb_sid owner_sid, group_sid; - struct dentry *parent = path->dentry->d_parent; - struct user_namespace *user_ns = mnt_user_ns(path->mnt); -- int inherited_flags = 0, flags = 0, i, ace_cnt = 0, nt_size = 0; -- int rc = 0, num_aces, dacloffset, pntsd_type, acl_len; -+ int inherited_flags = 0, flags = 0, i, ace_cnt = 0, nt_size = 0, pdacl_size; -+ int rc = 0, num_aces, dacloffset, pntsd_type, pntsd_size, acl_len, aces_size; - char *aces_base; - bool is_dir = S_ISDIR(d_inode(path->dentry)->i_mode); - -- acl_len = ksmbd_vfs_get_sd_xattr(conn, user_ns, -- parent, &parent_pntsd); -- if (acl_len <= 0) -+ pntsd_size = ksmbd_vfs_get_sd_xattr(conn, user_ns, -+ parent, &parent_pntsd); -+ if (pntsd_size <= 0) - return -ENOENT; - dacloffset = le32_to_cpu(parent_pntsd->dacloffset); -- if (!dacloffset) { -+ if (!dacloffset || (dacloffset + sizeof(struct smb_acl) > pntsd_size)) { - rc = -EINVAL; - goto free_parent_pntsd; - } - - parent_pdacl = (struct smb_acl *)((char *)parent_pntsd + dacloffset); -+ acl_len = pntsd_size - dacloffset; - num_aces = le32_to_cpu(parent_pdacl->num_aces); - pntsd_type = le16_to_cpu(parent_pntsd->type); -+ pdacl_size = le16_to_cpu(parent_pdacl->size); -+ -+ if (pdacl_size > acl_len || pdacl_size < sizeof(struct smb_acl)) { -+ rc = -EINVAL; -+ goto free_parent_pntsd; -+ } - - aces_base = kmalloc(sizeof(struct smb_ace) * num_aces * 2, GFP_KERNEL); - if (!aces_base) { -@@ -1021,11 +1036,23 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, - aces = (struct smb_ace *)aces_base; - parent_aces = (struct smb_ace *)((char *)parent_pdacl + - sizeof(struct smb_acl)); -+ aces_size = acl_len - sizeof(struct smb_acl); - - if (pntsd_type & DACL_AUTO_INHERITED) - inherited_flags = INHERITED_ACE; - - for (i = 0; i < num_aces; i++) { -+ int pace_size; -+ -+ if (offsetof(struct smb_ace, access_req) > aces_size) -+ break; -+ -+ pace_size = le16_to_cpu(parent_aces->size); -+ if (pace_size > aces_size) -+ break; -+ -+ aces_size -= pace_size; -+ - flags = parent_aces->flags; - if (!smb_inherit_flags(flags, is_dir)) - goto pass; -@@ -1070,8 +1097,7 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, - aces = (struct smb_ace *)((char *)aces + le16_to_cpu(aces->size)); - ace_cnt++; - pass: -- parent_aces = -- (struct smb_ace *)((char *)parent_aces + le16_to_cpu(parent_aces->size)); -+ parent_aces = (struct smb_ace *)((char *)parent_aces + pace_size); - } - - if (nt_size > 0) { -@@ -1166,7 +1192,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path, - struct smb_ntsd *pntsd = NULL; - struct smb_acl *pdacl; - struct posix_acl *posix_acls; -- int rc = 0, acl_size; -+ int rc = 0, pntsd_size, acl_size, aces_size, pdacl_size, dacl_offset; - struct smb_sid sid; - int granted = le32_to_cpu(*pdaccess & ~FILE_MAXIMAL_ACCESS_LE); - struct smb_ace *ace; -@@ -1175,37 +1201,33 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path, - struct smb_ace *others_ace = NULL; - struct posix_acl_entry *pa_entry; - unsigned int sid_type = SIDOWNER; -- char *end_of_acl; -+ unsigned short ace_size; - - ksmbd_debug(SMB, "check permission using windows acl\n"); -- acl_size = ksmbd_vfs_get_sd_xattr(conn, user_ns, -- path->dentry, &pntsd); -- if (acl_size <= 0 || !pntsd || !pntsd->dacloffset) { -- kfree(pntsd); -- return 0; +- mutex_unlock(&ctx->uring_lock); - } -+ pntsd_size = ksmbd_vfs_get_sd_xattr(conn, user_ns, -+ path->dentry, &pntsd); -+ if (pntsd_size <= 0 || !pntsd) -+ goto err_out; -+ -+ dacl_offset = le32_to_cpu(pntsd->dacloffset); -+ if (!dacl_offset || -+ (dacl_offset + sizeof(struct smb_acl) > pntsd_size)) -+ goto err_out; - - pdacl = (struct smb_acl *)((char *)pntsd + le32_to_cpu(pntsd->dacloffset)); -- end_of_acl = ((char *)pntsd) + acl_size; -- if (end_of_acl <= (char *)pdacl) { -- kfree(pntsd); -- return 0; +-} +- +-static bool io_bdev_nowait(struct block_device *bdev) +-{ +- return !bdev || blk_queue_nowait(bdev_get_queue(bdev)); +-} +- +-/* +- * If we tracked the file through the SCM inflight mechanism, we could support +- * any file. For now, just ensure that anything potentially problematic is done +- * inline. +- */ +-static bool __io_file_supports_nowait(struct file *file, int rw) +-{ +- umode_t mode = file_inode(file)->i_mode; +- +- if (S_ISBLK(mode)) { +- if (IS_ENABLED(CONFIG_BLOCK) && +- io_bdev_nowait(I_BDEV(file->f_mapping->host))) +- return true; +- return false; - } -+ acl_size = pntsd_size - dacl_offset; -+ pdacl_size = le16_to_cpu(pdacl->size); - -- if (end_of_acl < (char *)pdacl + le16_to_cpu(pdacl->size) || -- le16_to_cpu(pdacl->size) < sizeof(struct smb_acl)) { -- kfree(pntsd); +- if (S_ISSOCK(mode)) +- return true; +- if (S_ISREG(mode)) { +- if (IS_ENABLED(CONFIG_BLOCK) && +- io_bdev_nowait(file->f_inode->i_sb->s_bdev) && +- file->f_op != &io_uring_fops) +- return true; +- return false; +- } +- +- /* any ->read/write should understand O_NONBLOCK */ +- if (file->f_flags & O_NONBLOCK) +- return true; +- +- if (!(file->f_mode & FMODE_NOWAIT)) +- return false; +- +- if (rw == READ) +- return file->f_op->read_iter != NULL; +- +- return file->f_op->write_iter != NULL; +-} +- +-static bool io_file_supports_nowait(struct io_kiocb *req, int rw) +-{ +- if (rw == READ && (req->flags & REQ_F_NOWAIT_READ)) +- return true; +- else if (rw == WRITE && (req->flags & REQ_F_NOWAIT_WRITE)) +- return true; +- +- return __io_file_supports_nowait(req->file, rw); +-} +- +-static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, +- int rw) +-{ +- struct io_ring_ctx *ctx = req->ctx; +- struct kiocb *kiocb = &req->rw.kiocb; +- struct file *file = req->file; +- unsigned ioprio; +- int ret; +- +- if (!io_req_ffs_set(req) && S_ISREG(file_inode(file)->i_mode)) +- req->flags |= REQ_F_ISREG; +- +- kiocb->ki_pos = READ_ONCE(sqe->off); +- if (kiocb->ki_pos == -1 && !(file->f_mode & FMODE_STREAM)) { +- req->flags |= REQ_F_CUR_POS; +- kiocb->ki_pos = file->f_pos; +- } +- kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp)); +- kiocb->ki_flags = iocb_flags(kiocb->ki_filp); +- ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags)); +- if (unlikely(ret)) +- return ret; +- +- /* +- * If the file is marked O_NONBLOCK, still allow retry for it if it +- * supports async. Otherwise it's impossible to use O_NONBLOCK files +- * reliably. If not, or it IOCB_NOWAIT is set, don't retry. +- */ +- if ((kiocb->ki_flags & IOCB_NOWAIT) || +- ((file->f_flags & O_NONBLOCK) && !io_file_supports_nowait(req, rw))) +- req->flags |= REQ_F_NOWAIT; +- +- ioprio = READ_ONCE(sqe->ioprio); +- if (ioprio) { +- ret = ioprio_check_cap(ioprio); +- if (ret) +- return ret; +- +- kiocb->ki_ioprio = ioprio; +- } else +- kiocb->ki_ioprio = get_current_ioprio(); +- +- if (ctx->flags & IORING_SETUP_IOPOLL) { +- if (!(kiocb->ki_flags & IOCB_DIRECT) || +- !kiocb->ki_filp->f_op->iopoll) +- return -EOPNOTSUPP; +- +- kiocb->ki_flags |= IOCB_HIPRI | IOCB_ALLOC_CACHE; +- kiocb->ki_complete = io_complete_rw_iopoll; +- req->iopoll_completed = 0; +- } else { +- if (kiocb->ki_flags & IOCB_HIPRI) +- return -EINVAL; +- kiocb->ki_complete = io_complete_rw; +- } +- +- if (req->opcode == IORING_OP_READ_FIXED || +- req->opcode == IORING_OP_WRITE_FIXED) { +- req->imu = NULL; +- io_req_set_rsrc_node(req); +- } +- +- req->rw.addr = READ_ONCE(sqe->addr); +- req->rw.len = READ_ONCE(sqe->len); +- req->buf_index = READ_ONCE(sqe->buf_index); +- return 0; +-} +- +-static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret) +-{ +- switch (ret) { +- case -EIOCBQUEUED: +- break; +- case -ERESTARTSYS: +- case -ERESTARTNOINTR: +- case -ERESTARTNOHAND: +- case -ERESTART_RESTARTBLOCK: +- /* +- * We can't just restart the syscall, since previously +- * submitted sqes may already be in progress. Just fail this +- * IO with EINTR. +- */ +- ret = -EINTR; +- fallthrough; +- default: +- kiocb->ki_complete(kiocb, ret, 0); +- } +-} +- +-static void kiocb_done(struct kiocb *kiocb, ssize_t ret, +- unsigned int issue_flags) +-{ +- struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); +- struct io_async_rw *io = req->async_data; +- +- /* add previously done IO, if any */ +- if (io && io->bytes_done > 0) { +- if (ret < 0) +- ret = io->bytes_done; +- else +- ret += io->bytes_done; +- } +- +- if (req->flags & REQ_F_CUR_POS) +- req->file->f_pos = kiocb->ki_pos; +- if (ret >= 0 && (kiocb->ki_complete == io_complete_rw)) +- __io_complete_rw(req, ret, 0, issue_flags); +- else +- io_rw_done(kiocb, ret); +- +- if (req->flags & REQ_F_REISSUE) { +- req->flags &= ~REQ_F_REISSUE; +- if (io_resubmit_prep(req)) { +- io_req_task_queue_reissue(req); +- } else { +- unsigned int cflags = io_put_rw_kbuf(req); +- struct io_ring_ctx *ctx = req->ctx; +- +- req_set_fail(req); +- if (!(issue_flags & IO_URING_F_NONBLOCK)) { +- mutex_lock(&ctx->uring_lock); +- __io_req_complete(req, issue_flags, ret, cflags); +- mutex_unlock(&ctx->uring_lock); +- } else { +- __io_req_complete(req, issue_flags, ret, cflags); +- } +- } +- } +-} +- +-static int __io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter, +- struct io_mapped_ubuf *imu) +-{ +- size_t len = req->rw.len; +- u64 buf_end, buf_addr = req->rw.addr; +- size_t offset; +- +- if (unlikely(check_add_overflow(buf_addr, (u64)len, &buf_end))) +- return -EFAULT; +- /* not inside the mapped region */ +- if (unlikely(buf_addr < imu->ubuf || buf_end > imu->ubuf_end)) +- return -EFAULT; +- +- /* +- * May not be a start of buffer, set size appropriately +- * and advance us to the beginning. +- */ +- offset = buf_addr - imu->ubuf; +- iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len); +- +- if (offset) { +- /* +- * Don't use iov_iter_advance() here, as it's really slow for +- * using the latter parts of a big fixed buffer - it iterates +- * over each segment manually. We can cheat a bit here, because +- * we know that: +- * +- * 1) it's a BVEC iter, we set it up +- * 2) all bvecs are PAGE_SIZE in size, except potentially the +- * first and last bvec +- * +- * So just find our index, and adjust the iterator afterwards. +- * If the offset is within the first bvec (or the whole first +- * bvec, just use iov_iter_advance(). This makes it easier +- * since we can just skip the first segment, which may not +- * be PAGE_SIZE aligned. +- */ +- const struct bio_vec *bvec = imu->bvec; +- +- if (offset <= bvec->bv_len) { +- iov_iter_advance(iter, offset); +- } else { +- unsigned long seg_skip; +- +- /* skip first vec */ +- offset -= bvec->bv_len; +- seg_skip = 1 + (offset >> PAGE_SHIFT); +- +- iter->bvec = bvec + seg_skip; +- iter->nr_segs -= seg_skip; +- iter->count -= bvec->bv_len + offset; +- iter->iov_offset = offset & ~PAGE_MASK; +- } +- } +- +- return 0; +-} +- +-static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter) +-{ +- struct io_ring_ctx *ctx = req->ctx; +- struct io_mapped_ubuf *imu = req->imu; +- u16 index, buf_index = req->buf_index; +- +- if (likely(!imu)) { +- if (unlikely(buf_index >= ctx->nr_user_bufs)) +- return -EFAULT; +- index = array_index_nospec(buf_index, ctx->nr_user_bufs); +- imu = READ_ONCE(ctx->user_bufs[index]); +- req->imu = imu; +- } +- return __io_import_fixed(req, rw, iter, imu); +-} +- +-static void io_ring_submit_unlock(struct io_ring_ctx *ctx, bool needs_lock) +-{ +- if (needs_lock) +- mutex_unlock(&ctx->uring_lock); +-} +- +-static void io_ring_submit_lock(struct io_ring_ctx *ctx, bool needs_lock) +-{ +- /* +- * "Normal" inline submissions always hold the uring_lock, since we +- * grab it from the system call. Same is true for the SQPOLL offload. +- * The only exception is when we've detached the request and issue it +- * from an async worker thread, grab the lock for that case. +- */ +- if (needs_lock) +- mutex_lock(&ctx->uring_lock); +-} +- +-static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len, +- int bgid, struct io_buffer *kbuf, +- bool needs_lock) +-{ +- struct io_buffer *head; +- +- if (req->flags & REQ_F_BUFFER_SELECTED) +- return kbuf; +- +- io_ring_submit_lock(req->ctx, needs_lock); +- +- lockdep_assert_held(&req->ctx->uring_lock); +- +- head = xa_load(&req->ctx->io_buffers, bgid); +- if (head) { +- if (!list_empty(&head->list)) { +- kbuf = list_last_entry(&head->list, struct io_buffer, +- list); +- list_del(&kbuf->list); +- } else { +- kbuf = head; +- xa_erase(&req->ctx->io_buffers, bgid); +- } +- if (*len > kbuf->len) +- *len = kbuf->len; +- } else { +- kbuf = ERR_PTR(-ENOBUFS); +- } +- +- io_ring_submit_unlock(req->ctx, needs_lock); +- +- return kbuf; +-} +- +-static void __user *io_rw_buffer_select(struct io_kiocb *req, size_t *len, +- bool needs_lock) +-{ +- struct io_buffer *kbuf; +- u16 bgid; +- +- kbuf = (struct io_buffer *) (unsigned long) req->rw.addr; +- bgid = req->buf_index; +- kbuf = io_buffer_select(req, len, bgid, kbuf, needs_lock); +- if (IS_ERR(kbuf)) +- return kbuf; +- req->rw.addr = (u64) (unsigned long) kbuf; +- req->flags |= REQ_F_BUFFER_SELECTED; +- return u64_to_user_ptr(kbuf->addr); +-} +- +-#ifdef CONFIG_COMPAT +-static ssize_t io_compat_import(struct io_kiocb *req, struct iovec *iov, +- bool needs_lock) +-{ +- struct compat_iovec __user *uiov; +- compat_ssize_t clen; +- void __user *buf; +- ssize_t len; +- +- uiov = u64_to_user_ptr(req->rw.addr); +- if (!access_ok(uiov, sizeof(*uiov))) +- return -EFAULT; +- if (__get_user(clen, &uiov->iov_len)) +- return -EFAULT; +- if (clen < 0) +- return -EINVAL; +- +- len = clen; +- buf = io_rw_buffer_select(req, &len, needs_lock); +- if (IS_ERR(buf)) +- return PTR_ERR(buf); +- iov[0].iov_base = buf; +- iov[0].iov_len = (compat_size_t) len; +- return 0; +-} +-#endif +- +-static ssize_t __io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov, +- bool needs_lock) +-{ +- struct iovec __user *uiov = u64_to_user_ptr(req->rw.addr); +- void __user *buf; +- ssize_t len; +- +- if (copy_from_user(iov, uiov, sizeof(*uiov))) +- return -EFAULT; +- +- len = iov[0].iov_len; +- if (len < 0) +- return -EINVAL; +- buf = io_rw_buffer_select(req, &len, needs_lock); +- if (IS_ERR(buf)) +- return PTR_ERR(buf); +- iov[0].iov_base = buf; +- iov[0].iov_len = len; +- return 0; +-} +- +-static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov, +- bool needs_lock) +-{ +- if (req->flags & REQ_F_BUFFER_SELECTED) { +- struct io_buffer *kbuf; +- +- kbuf = (struct io_buffer *) (unsigned long) req->rw.addr; +- iov[0].iov_base = u64_to_user_ptr(kbuf->addr); +- iov[0].iov_len = kbuf->len; - return 0; - } -+ if (pdacl_size > acl_size || pdacl_size < sizeof(struct smb_acl)) -+ goto err_out; - - if (!pdacl->num_aces) { -- if (!(le16_to_cpu(pdacl->size) - sizeof(struct smb_acl)) && -+ if (!(pdacl_size - sizeof(struct smb_acl)) && - *pdaccess & ~(FILE_READ_CONTROL_LE | FILE_WRITE_DAC_LE)) { - rc = -EACCES; - goto err_out; - } -- kfree(pntsd); +- if (req->rw.len != 1) +- return -EINVAL; +- +-#ifdef CONFIG_COMPAT +- if (req->ctx->compat) +- return io_compat_import(req, iov, needs_lock); +-#endif +- +- return __io_iov_buffer_select(req, iov, needs_lock); +-} +- +-static int io_import_iovec(int rw, struct io_kiocb *req, struct iovec **iovec, +- struct iov_iter *iter, bool needs_lock) +-{ +- void __user *buf = u64_to_user_ptr(req->rw.addr); +- size_t sqe_len = req->rw.len; +- u8 opcode = req->opcode; +- ssize_t ret; +- +- if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) { +- *iovec = NULL; +- return io_import_fixed(req, rw, iter); +- } +- +- /* buffer index only valid with fixed read/write, or buffer select */ +- if (req->buf_index && !(req->flags & REQ_F_BUFFER_SELECT)) +- return -EINVAL; +- +- if (opcode == IORING_OP_READ || opcode == IORING_OP_WRITE) { +- if (req->flags & REQ_F_BUFFER_SELECT) { +- buf = io_rw_buffer_select(req, &sqe_len, needs_lock); +- if (IS_ERR(buf)) +- return PTR_ERR(buf); +- req->rw.len = sqe_len; +- } +- +- ret = import_single_range(rw, buf, sqe_len, *iovec, iter); +- *iovec = NULL; +- return ret; +- } +- +- if (req->flags & REQ_F_BUFFER_SELECT) { +- ret = io_iov_buffer_select(req, *iovec, needs_lock); +- if (!ret) +- iov_iter_init(iter, rw, *iovec, 1, (*iovec)->iov_len); +- *iovec = NULL; +- return ret; +- } +- +- return __import_iovec(rw, buf, sqe_len, UIO_FASTIOV, iovec, iter, +- req->ctx->compat); +-} +- +-static inline loff_t *io_kiocb_ppos(struct kiocb *kiocb) +-{ +- return (kiocb->ki_filp->f_mode & FMODE_STREAM) ? NULL : &kiocb->ki_pos; +-} +- +-/* +- * For files that don't have ->read_iter() and ->write_iter(), handle them +- * by looping over ->read() or ->write() manually. +- */ +-static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter) +-{ +- struct kiocb *kiocb = &req->rw.kiocb; +- struct file *file = req->file; +- ssize_t ret = 0; +- +- /* +- * Don't support polled IO through this interface, and we can't +- * support non-blocking either. For the latter, this just causes +- * the kiocb to be handled from an async context. +- */ +- if (kiocb->ki_flags & IOCB_HIPRI) +- return -EOPNOTSUPP; +- if (kiocb->ki_flags & IOCB_NOWAIT) +- return -EAGAIN; +- +- while (iov_iter_count(iter)) { +- struct iovec iovec; +- ssize_t nr; +- +- if (!iov_iter_is_bvec(iter)) { +- iovec = iov_iter_iovec(iter); +- } else { +- iovec.iov_base = u64_to_user_ptr(req->rw.addr); +- iovec.iov_len = req->rw.len; +- } +- +- if (rw == READ) { +- nr = file->f_op->read(file, iovec.iov_base, +- iovec.iov_len, io_kiocb_ppos(kiocb)); +- } else { +- nr = file->f_op->write(file, iovec.iov_base, +- iovec.iov_len, io_kiocb_ppos(kiocb)); +- } +- +- if (nr < 0) { +- if (!ret) +- ret = nr; +- break; +- } +- if (!iov_iter_is_bvec(iter)) { +- iov_iter_advance(iter, nr); +- } else { +- req->rw.len -= nr; +- req->rw.addr += nr; +- } +- ret += nr; +- if (nr != iovec.iov_len) +- break; +- } +- +- return ret; +-} +- +-static void io_req_map_rw(struct io_kiocb *req, const struct iovec *iovec, +- const struct iovec *fast_iov, struct iov_iter *iter) +-{ +- struct io_async_rw *rw = req->async_data; +- +- memcpy(&rw->iter, iter, sizeof(*iter)); +- rw->free_iovec = iovec; +- rw->bytes_done = 0; +- /* can only be fixed buffers, no need to do anything */ +- if (iov_iter_is_bvec(iter)) +- return; +- if (!iovec) { +- unsigned iov_off = 0; +- +- rw->iter.iov = rw->fast_iov; +- if (iter->iov != fast_iov) { +- iov_off = iter->iov - fast_iov; +- rw->iter.iov += iov_off; +- } +- if (rw->fast_iov != fast_iov) +- memcpy(rw->fast_iov + iov_off, fast_iov + iov_off, +- sizeof(struct iovec) * iter->nr_segs); +- } else { +- req->flags |= REQ_F_NEED_CLEANUP; +- } +-} +- +-static inline int io_alloc_async_data(struct io_kiocb *req) +-{ +- WARN_ON_ONCE(!io_op_defs[req->opcode].async_size); +- req->async_data = kmalloc(io_op_defs[req->opcode].async_size, GFP_KERNEL); +- return req->async_data == NULL; +-} +- +-static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec, +- const struct iovec *fast_iov, +- struct iov_iter *iter, bool force) +-{ +- if (!force && !io_op_defs[req->opcode].needs_async_setup) - return 0; -+ goto err_out; - } - - if (*pdaccess & FILE_MAXIMAL_ACCESS_LE) { -@@ -1213,11 +1235,16 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path, - DELETE; - - ace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl)); -+ aces_size = acl_size - sizeof(struct smb_acl); - for (i = 0; i < le32_to_cpu(pdacl->num_aces); i++) { -+ if (offsetof(struct smb_ace, access_req) > aces_size) -+ break; -+ ace_size = le16_to_cpu(ace->size); -+ if (ace_size > aces_size) -+ break; -+ aces_size -= ace_size; - granted |= le32_to_cpu(ace->access_req); - ace = (struct smb_ace *)((char *)ace + le16_to_cpu(ace->size)); -- if (end_of_acl < (char *)ace) -- goto err_out; - } - - if (!pdacl->num_aces) -@@ -1229,7 +1256,15 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path, - id_to_sid(uid, sid_type, &sid); - - ace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl)); -+ aces_size = acl_size - sizeof(struct smb_acl); - for (i = 0; i < le32_to_cpu(pdacl->num_aces); i++) { -+ if (offsetof(struct smb_ace, access_req) > aces_size) -+ break; -+ ace_size = le16_to_cpu(ace->size); -+ if (ace_size > aces_size) -+ break; -+ aces_size -= ace_size; -+ - if (!compare_sids(&sid, &ace->sid) || - !compare_sids(&sid_unix_NFS_mode, &ace->sid)) { - found = 1; -@@ -1239,8 +1274,6 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path, - others_ace = ace; - - ace = (struct smb_ace *)((char *)ace + le16_to_cpu(ace->size)); -- if (end_of_acl < (char *)ace) -- goto err_out; - } - - if (*pdaccess & FILE_MAXIMAL_ACCESS_LE && found) { -@@ -1274,6 +1307,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path, - if (!access_bits) - access_bits = - SET_MINIMUM_RIGHTS; -+ posix_acl_release(posix_acls); - goto check_access_bits; - } - } -diff --git a/fs/ksmbd/smbacl.h b/fs/ksmbd/smbacl.h -index 73e08cad412bd..fcb2c83f29928 100644 ---- a/fs/ksmbd/smbacl.h -+++ b/fs/ksmbd/smbacl.h -@@ -11,6 +11,7 @@ - #include <linux/fs.h> - #include <linux/namei.h> - #include <linux/posix_acl.h> -+#include <linux/mnt_idmapping.h> - - #include "mgmt/tree_connect.h" - -@@ -192,7 +193,7 @@ struct posix_acl_state { - int parse_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd, - int acl_len, struct smb_fattr *fattr); - int build_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd, -- struct smb_ntsd *ppntsd, int addition_info, -+ struct smb_ntsd *ppntsd, int ppntsd_size, int addition_info, - __u32 *secdesclen, struct smb_fattr *fattr); - int init_acl_state(struct posix_acl_state *state, int cnt); - void free_acl_state(struct posix_acl_state *state); -@@ -216,7 +217,7 @@ static inline uid_t posix_acl_uid_translate(struct user_namespace *mnt_userns, - kuid_t kuid; - - /* If this is an idmapped mount, apply the idmapping. */ -- kuid = kuid_into_mnt(mnt_userns, pace->e_uid); -+ kuid = mapped_kuid_fs(mnt_userns, &init_user_ns, pace->e_uid); - - /* Translate the kuid into a userspace id ksmbd would see. */ - return from_kuid(&init_user_ns, kuid); -@@ -228,7 +229,7 @@ static inline gid_t posix_acl_gid_translate(struct user_namespace *mnt_userns, - kgid_t kgid; - - /* If this is an idmapped mount, apply the idmapping. */ -- kgid = kgid_into_mnt(mnt_userns, pace->e_gid); -+ kgid = mapped_kgid_fs(mnt_userns, &init_user_ns, pace->e_gid); - - /* Translate the kgid into a userspace id ksmbd would see. */ - return from_kgid(&init_user_ns, kgid); -diff --git a/fs/ksmbd/transport_ipc.c b/fs/ksmbd/transport_ipc.c -index 1acf1892a466c..3ad6881e0f7ed 100644 ---- a/fs/ksmbd/transport_ipc.c -+++ b/fs/ksmbd/transport_ipc.c -@@ -301,6 +301,8 @@ static int ipc_server_config_on_startup(struct ksmbd_startup_request *req) - init_smb2_max_write_size(req->smb2_max_write); - if (req->smb2_max_trans) - init_smb2_max_trans_size(req->smb2_max_trans); -+ if (req->smb2_max_credits) -+ init_smb2_max_credits(req->smb2_max_credits); - - ret = ksmbd_set_netbios_name(req->netbios_name); - ret |= ksmbd_set_server_string(req->server_string); -diff --git a/fs/ksmbd/transport_tcp.c b/fs/ksmbd/transport_tcp.c -index c14320e03b698..755329c295cab 100644 ---- a/fs/ksmbd/transport_tcp.c -+++ b/fs/ksmbd/transport_tcp.c -@@ -230,7 +230,7 @@ static int ksmbd_kthread_fn(void *p) - break; - } - ret = kernel_accept(iface->ksmbd_socket, &client_sk, -- O_NONBLOCK); -+ SOCK_NONBLOCK); - mutex_unlock(&iface->sock_release_lock); - if (ret) { - if (ret == -EAGAIN) -@@ -404,7 +404,7 @@ static int create_socket(struct interface *iface) - &ksmbd_socket); - if (ret) { - pr_err("Can't create socket for ipv4: %d\n", ret); -- goto out_error; -+ goto out_clear; - } - - sin.sin_family = PF_INET; -@@ -462,6 +462,7 @@ static int create_socket(struct interface *iface) - - out_error: - tcp_destroy_socket(ksmbd_socket); -+out_clear: - iface->ksmbd_socket = NULL; - return ret; - } -diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c -index 835b384b08959..513989b1c8cd7 100644 ---- a/fs/ksmbd/vfs.c -+++ b/fs/ksmbd/vfs.c -@@ -1018,7 +1018,9 @@ int ksmbd_vfs_zero_data(struct ksmbd_work *work, struct ksmbd_file *fp, - FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, - off, len); - -- return vfs_fallocate(fp->filp, FALLOC_FL_ZERO_RANGE, off, len); -+ return vfs_fallocate(fp->filp, -+ FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE, -+ off, len); - } - - int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, -@@ -1049,7 +1051,7 @@ int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, - *out_count = 0; - end = start + length; - while (start < end && *out_count < in_count) { -- extent_start = f->f_op->llseek(f, start, SEEK_DATA); -+ extent_start = vfs_llseek(f, start, SEEK_DATA); - if (extent_start < 0) { - if (extent_start != -ENXIO) - ret = (int)extent_start; -@@ -1059,7 +1061,7 @@ int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, - if (extent_start >= end) - break; - -- extent_end = f->f_op->llseek(f, extent_start, SEEK_HOLE); -+ extent_end = vfs_llseek(f, extent_start, SEEK_HOLE); - if (extent_end < 0) { - if (extent_end != -ENXIO) - ret = (int)extent_end; -@@ -1541,6 +1543,11 @@ int ksmbd_vfs_get_sd_xattr(struct ksmbd_conn *conn, - } - - *pntsd = acl.sd_buf; -+ if (acl.sd_size < sizeof(struct smb_ntsd)) { -+ pr_err("sd size is invalid\n"); -+ goto out_free; -+ } -+ - (*pntsd)->osidoffset = cpu_to_le32(le32_to_cpu((*pntsd)->osidoffset) - - NDR_NTSD_OFFSETOF); - (*pntsd)->gsidoffset = cpu_to_le32(le32_to_cpu((*pntsd)->gsidoffset) - -@@ -1780,6 +1787,10 @@ int ksmbd_vfs_copy_file_ranges(struct ksmbd_work *work, - - ret = vfs_copy_file_range(src_fp->filp, src_off, - dst_fp->filp, dst_off, len, 0); -+ if (ret == -EOPNOTSUPP || ret == -EXDEV) -+ ret = generic_copy_file_range(src_fp->filp, src_off, -+ dst_fp->filp, dst_off, -+ len, 0); - if (ret < 0) - return ret; - -diff --git a/fs/ksmbd/vfs.h b/fs/ksmbd/vfs.h -index b0d5b8feb4a36..432c947731779 100644 ---- a/fs/ksmbd/vfs.h -+++ b/fs/ksmbd/vfs.h -@@ -86,6 +86,7 @@ struct ksmbd_dir_info { - int last_entry_offset; - bool hide_dot_file; - int flags; -+ int last_entry_off_align; - }; - - struct ksmbd_readdir_data { -diff --git a/fs/ksmbd/vfs_cache.c b/fs/ksmbd/vfs_cache.c -index 29c1db66bd0f7..8b873d92d7854 100644 ---- a/fs/ksmbd/vfs_cache.c -+++ b/fs/ksmbd/vfs_cache.c -@@ -497,6 +497,7 @@ struct ksmbd_file *ksmbd_lookup_fd_inode(struct inode *inode) - list_for_each_entry(lfp, &ci->m_fp_list, node) { - if (inode == file_inode(lfp->filp)) { - atomic_dec(&ci->m_count); -+ lfp = ksmbd_fp_get(lfp); - read_unlock(&ci->m_lock); - return lfp; - } -diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c -index e10ae2c41279e..1c9214801e69e 100644 ---- a/fs/lockd/svc4proc.c -+++ b/fs/lockd/svc4proc.c -@@ -32,6 +32,10 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, - if (!nlmsvc_ops) - return nlm_lck_denied_nolocks; - -+ if (lock->lock_start > OFFSET_MAX || -+ (lock->lock_len && ((lock->lock_len - 1) > (OFFSET_MAX - lock->lock_start)))) -+ return nlm4_fbig; -+ - /* Obtain host handle */ - if (!(host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len)) - || (argp->monitor && nsm_monitor(host) < 0)) -@@ -50,6 +54,10 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, - /* Set up the missing parts of the file_lock structure */ - lock->fl.fl_file = file->f_file[mode]; - lock->fl.fl_pid = current->tgid; -+ lock->fl.fl_start = (loff_t)lock->lock_start; -+ lock->fl.fl_end = lock->lock_len ? -+ (loff_t)(lock->lock_start + lock->lock_len - 1) : -+ OFFSET_MAX; - lock->fl.fl_lmops = &nlmsvc_lock_operations; - nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid); - if (!lock->fl.fl_owner) { -diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c -index cb3a7512c33ec..e1c4617de7714 100644 ---- a/fs/lockd/svcsubs.c -+++ b/fs/lockd/svcsubs.c -@@ -176,22 +176,25 @@ nlm_delete_file(struct nlm_file *file) - } - } - --static int nlm_unlock_files(struct nlm_file *file) -+static int nlm_unlock_files(struct nlm_file *file, fl_owner_t owner) - { - struct file_lock lock; -- struct file *f; - -+ locks_init_lock(&lock); - lock.fl_type = F_UNLCK; - lock.fl_start = 0; - lock.fl_end = OFFSET_MAX; -- for (f = file->f_file[0]; f <= file->f_file[1]; f++) { -- if (f && vfs_lock_file(f, F_SETLK, &lock, NULL) < 0) { -- pr_warn("lockd: unlock failure in %s:%d\n", -- __FILE__, __LINE__); -- return 1; +- if (!req->async_data) { +- struct io_async_rw *iorw; +- +- if (io_alloc_async_data(req)) { +- kfree(iovec); +- return -ENOMEM; - } +- +- io_req_map_rw(req, iovec, fast_iov, iter); +- iorw = req->async_data; +- /* we've copied and mapped the iter, ensure state is saved */ +- iov_iter_save_state(&iorw->iter, &iorw->iter_state); - } -+ lock.fl_owner = owner; -+ if (file->f_file[O_RDONLY] && -+ vfs_lock_file(file->f_file[O_RDONLY], F_SETLK, &lock, NULL)) -+ goto out_err; -+ if (file->f_file[O_WRONLY] && -+ vfs_lock_file(file->f_file[O_WRONLY], F_SETLK, &lock, NULL)) -+ goto out_err; - return 0; -+out_err: -+ pr_warn("lockd: unlock failure in %s:%d\n", __FILE__, __LINE__); -+ return 1; - } - - /* -@@ -223,7 +226,7 @@ again: - if (match(lockhost, host)) { - - spin_unlock(&flctx->flc_lock); -- if (nlm_unlock_files(file)) -+ if (nlm_unlock_files(file, fl->fl_owner)) - return 1; - goto again; - } -@@ -280,11 +283,10 @@ nlm_file_inuse(struct nlm_file *file) - - static void nlm_close_files(struct nlm_file *file) - { -- struct file *f; +- return 0; +-} - -- for (f = file->f_file[0]; f <= file->f_file[1]; f++) -- if (f) -- nlmsvc_ops->fclose(f); -+ if (file->f_file[O_RDONLY]) -+ nlmsvc_ops->fclose(file->f_file[O_RDONLY]); -+ if (file->f_file[O_WRONLY]) -+ nlmsvc_ops->fclose(file->f_file[O_WRONLY]); - } - - /* -diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c -index 98e957e4566c2..72f7d190fb3b2 100644 ---- a/fs/lockd/xdr4.c -+++ b/fs/lockd/xdr4.c -@@ -20,13 +20,6 @@ - - #include "svcxdr.h" - --static inline loff_t --s64_to_loff_t(__s64 offset) +-static inline int io_rw_prep_async(struct io_kiocb *req, int rw) -{ -- return (loff_t)offset; +- struct io_async_rw *iorw = req->async_data; +- struct iovec *iov = iorw->fast_iov; +- int ret; +- +- ret = io_import_iovec(rw, req, &iov, &iorw->iter, false); +- if (unlikely(ret < 0)) +- return ret; +- +- iorw->bytes_done = 0; +- iorw->free_iovec = iov; +- if (iov) +- req->flags |= REQ_F_NEED_CLEANUP; +- iov_iter_save_state(&iorw->iter, &iorw->iter_state); +- return 0; -} - +-static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +-{ +- if (unlikely(!(req->file->f_mode & FMODE_READ))) +- return -EBADF; +- return io_prep_rw(req, sqe, READ); +-} - - static inline s64 - loff_t_to_s64(loff_t offset) - { -@@ -70,8 +63,6 @@ static bool - svcxdr_decode_lock(struct xdr_stream *xdr, struct nlm_lock *lock) - { - struct file_lock *fl = &lock->fl; -- u64 len, start; -- s64 end; - - if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len)) - return false; -@@ -81,20 +72,14 @@ svcxdr_decode_lock(struct xdr_stream *xdr, struct nlm_lock *lock) - return false; - if (xdr_stream_decode_u32(xdr, &lock->svid) < 0) - return false; -- if (xdr_stream_decode_u64(xdr, &start) < 0) -+ if (xdr_stream_decode_u64(xdr, &lock->lock_start) < 0) - return false; -- if (xdr_stream_decode_u64(xdr, &len) < 0) -+ if (xdr_stream_decode_u64(xdr, &lock->lock_len) < 0) - return false; - - locks_init_lock(fl); - fl->fl_flags = FL_POSIX; - fl->fl_type = F_RDLCK; -- end = start + len - 1; -- fl->fl_start = s64_to_loff_t(start); -- if (len == 0 || end < 0) -- fl->fl_end = OFFSET_MAX; +-/* +- * This is our waitqueue callback handler, registered through lock_page_async() +- * when we initially tried to do the IO with the iocb armed our waitqueue. +- * This gets called when the page is unlocked, and we generally expect that to +- * happen when the page IO is completed and the page is now uptodate. This will +- * queue a task_work based retry of the operation, attempting to copy the data +- * again. If the latter fails because the page was NOT uptodate, then we will +- * do a thread based blocking retry of the operation. That's the unexpected +- * slow path. +- */ +-static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode, +- int sync, void *arg) +-{ +- struct wait_page_queue *wpq; +- struct io_kiocb *req = wait->private; +- struct wait_page_key *key = arg; +- +- wpq = container_of(wait, struct wait_page_queue, wait); +- +- if (!wake_page_match(wpq, key)) +- return 0; +- +- req->rw.kiocb.ki_flags &= ~IOCB_WAITQ; +- list_del_init(&wait->entry); +- io_req_task_queue(req); +- return 1; +-} +- +-/* +- * This controls whether a given IO request should be armed for async page +- * based retry. If we return false here, the request is handed to the async +- * worker threads for retry. If we're doing buffered reads on a regular file, +- * we prepare a private wait_page_queue entry and retry the operation. This +- * will either succeed because the page is now uptodate and unlocked, or it +- * will register a callback when the page is unlocked at IO completion. Through +- * that callback, io_uring uses task_work to setup a retry of the operation. +- * That retry will attempt the buffered read again. The retry will generally +- * succeed, or in rare cases where it fails, we then fall back to using the +- * async worker threads for a blocking retry. +- */ +-static bool io_rw_should_retry(struct io_kiocb *req) +-{ +- struct io_async_rw *rw = req->async_data; +- struct wait_page_queue *wait = &rw->wpq; +- struct kiocb *kiocb = &req->rw.kiocb; +- +- /* never retry for NOWAIT, we just complete with -EAGAIN */ +- if (req->flags & REQ_F_NOWAIT) +- return false; +- +- /* Only for buffered IO */ +- if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_HIPRI)) +- return false; +- +- /* +- * just use poll if we can, and don't attempt if the fs doesn't +- * support callback based unlocks +- */ +- if (file_can_poll(req->file) || !(req->file->f_mode & FMODE_BUF_RASYNC)) +- return false; +- +- wait->wait.func = io_async_buf_func; +- wait->wait.private = req; +- wait->wait.flags = 0; +- INIT_LIST_HEAD(&wait->wait.entry); +- kiocb->ki_flags |= IOCB_WAITQ; +- kiocb->ki_flags &= ~IOCB_NOWAIT; +- kiocb->ki_waitq = wait; +- return true; +-} +- +-static inline int io_iter_do_read(struct io_kiocb *req, struct iov_iter *iter) +-{ +- if (req->file->f_op->read_iter) +- return call_read_iter(req->file, &req->rw.kiocb, iter); +- else if (req->file->f_op->read) +- return loop_rw_iter(READ, req, iter); - else -- fl->fl_end = s64_to_loff_t(end); - - return true; - } -diff --git a/fs/mbcache.c b/fs/mbcache.c -index 97c54d3a22276..2010bc80a3f2d 100644 ---- a/fs/mbcache.c -+++ b/fs/mbcache.c -@@ -11,7 +11,7 @@ - /* - * Mbcache is a simple key-value store. Keys need not be unique, however - * key-value pairs are expected to be unique (we use this fact in -- * mb_cache_entry_delete()). -+ * mb_cache_entry_delete_or_get()). - * - * Ext2 and ext4 use this cache for deduplication of extended attribute blocks. - * Ext4 also uses it for deduplication of xattr values stored in inodes. -@@ -125,6 +125,19 @@ void __mb_cache_entry_free(struct mb_cache_entry *entry) - } - EXPORT_SYMBOL(__mb_cache_entry_free); - -+/* -+ * mb_cache_entry_wait_unused - wait to be the last user of the entry -+ * -+ * @entry - entry to work on -+ * -+ * Wait to be the last user of the entry. -+ */ -+void mb_cache_entry_wait_unused(struct mb_cache_entry *entry) -+{ -+ wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 3); -+} -+EXPORT_SYMBOL(mb_cache_entry_wait_unused); -+ - static struct mb_cache_entry *__entry_find(struct mb_cache *cache, - struct mb_cache_entry *entry, - u32 key) -@@ -217,7 +230,7 @@ out: - } - EXPORT_SYMBOL(mb_cache_entry_get); - --/* mb_cache_entry_delete - remove a cache entry -+/* mb_cache_entry_delete - try to remove a cache entry - * @cache - cache we work with - * @key - key - * @value - value -@@ -254,6 +267,55 @@ void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value) - } - EXPORT_SYMBOL(mb_cache_entry_delete); - -+/* mb_cache_entry_delete_or_get - remove a cache entry if it has no users -+ * @cache - cache we work with -+ * @key - key -+ * @value - value -+ * -+ * Remove entry from cache @cache with key @key and value @value. The removal -+ * happens only if the entry is unused. The function returns NULL in case the -+ * entry was successfully removed or there's no entry in cache. Otherwise the -+ * function grabs reference of the entry that we failed to delete because it -+ * still has users and return it. -+ */ -+struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache, -+ u32 key, u64 value) -+{ -+ struct hlist_bl_node *node; -+ struct hlist_bl_head *head; -+ struct mb_cache_entry *entry; -+ -+ head = mb_cache_entry_head(cache, key); -+ hlist_bl_lock(head); -+ hlist_bl_for_each_entry(entry, node, head, e_hash_list) { -+ if (entry->e_key == key && entry->e_value == value) { -+ if (atomic_read(&entry->e_refcnt) > 2) { -+ atomic_inc(&entry->e_refcnt); -+ hlist_bl_unlock(head); -+ return entry; -+ } -+ /* We keep hash list reference to keep entry alive */ -+ hlist_bl_del_init(&entry->e_hash_list); -+ hlist_bl_unlock(head); -+ spin_lock(&cache->c_list_lock); -+ if (!list_empty(&entry->e_list)) { -+ list_del_init(&entry->e_list); -+ if (!WARN_ONCE(cache->c_entry_count == 0, -+ "mbcache: attempt to decrement c_entry_count past zero")) -+ cache->c_entry_count--; -+ atomic_dec(&entry->e_refcnt); -+ } -+ spin_unlock(&cache->c_list_lock); -+ mb_cache_entry_put(cache, entry); -+ return NULL; -+ } -+ } -+ hlist_bl_unlock(head); -+ -+ return NULL; -+} -+EXPORT_SYMBOL(mb_cache_entry_delete_or_get); -+ - /* mb_cache_entry_touch - cache entry got used - * @cache - cache the entry belongs to - * @entry - entry that got used -@@ -288,7 +350,7 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache, - while (nr_to_scan-- && !list_empty(&cache->c_list)) { - entry = list_first_entry(&cache->c_list, - struct mb_cache_entry, e_list); -- if (entry->e_referenced) { -+ if (entry->e_referenced || atomic_read(&entry->e_refcnt) > 2) { - entry->e_referenced = 0; - list_move_tail(&entry->e_list, &cache->c_list); - continue; -@@ -302,6 +364,14 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache, - spin_unlock(&cache->c_list_lock); - head = mb_cache_entry_head(cache, entry->e_key); - hlist_bl_lock(head); -+ /* Now a reliable check if the entry didn't get used... */ -+ if (atomic_read(&entry->e_refcnt) > 2) { -+ hlist_bl_unlock(head); -+ spin_lock(&cache->c_list_lock); -+ list_add_tail(&entry->e_list, &cache->c_list); -+ cache->c_entry_count++; -+ continue; -+ } - if (!hlist_bl_unhashed(&entry->e_hash_list)) { - hlist_bl_del_init(&entry->e_hash_list); - atomic_dec(&entry->e_refcnt); -diff --git a/fs/minix/inode.c b/fs/minix/inode.c -index a71f1cf894b9f..d4bd94234ef73 100644 ---- a/fs/minix/inode.c -+++ b/fs/minix/inode.c -@@ -447,7 +447,8 @@ static const struct address_space_operations minix_aops = { - .writepage = minix_writepage, - .write_begin = minix_write_begin, - .write_end = generic_write_end, -- .bmap = minix_bmap -+ .bmap = minix_bmap, -+ .direct_IO = noop_direct_IO - }; - - static const struct inode_operations minix_symlink_inode_operations = { -diff --git a/fs/namei.c b/fs/namei.c -index 1946d96677908..1fd854d4cd2c0 100644 ---- a/fs/namei.c -+++ b/fs/namei.c -@@ -1461,6 +1461,8 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, - * becoming unpinned. - */ - flags = dentry->d_flags; -+ if (read_seqretry(&mount_lock, nd->m_seq)) -+ return false; - continue; - } - if (read_seqretry(&mount_lock, nd->m_seq)) -@@ -2718,7 +2720,8 @@ struct dentry *lookup_one(struct user_namespace *mnt_userns, const char *name, - EXPORT_SYMBOL(lookup_one); - - /** -- * lookup_one_len_unlocked - filesystem helper to lookup single pathname component -+ * lookup_one_unlocked - filesystem helper to lookup single pathname component -+ * @mnt_userns: idmapping of the mount the lookup is performed from - * @name: pathname component to lookup - * @base: base directory to lookup from - * @len: maximum length @len should be interpreted to -@@ -2729,14 +2732,15 @@ EXPORT_SYMBOL(lookup_one); - * Unlike lookup_one_len, it should be called without the parent - * i_mutex held, and will take the i_mutex itself if necessary. - */ --struct dentry *lookup_one_len_unlocked(const char *name, -- struct dentry *base, int len) -+struct dentry *lookup_one_unlocked(struct user_namespace *mnt_userns, -+ const char *name, struct dentry *base, -+ int len) - { - struct qstr this; - int err; - struct dentry *ret; - -- err = lookup_one_common(&init_user_ns, name, base, len, &this); -+ err = lookup_one_common(mnt_userns, name, base, len, &this); - if (err) - return ERR_PTR(err); - -@@ -2745,6 +2749,59 @@ struct dentry *lookup_one_len_unlocked(const char *name, - ret = lookup_slow(&this, base, 0); - return ret; - } -+EXPORT_SYMBOL(lookup_one_unlocked); -+ -+/** -+ * lookup_one_positive_unlocked - filesystem helper to lookup single -+ * pathname component -+ * @mnt_userns: idmapping of the mount the lookup is performed from -+ * @name: pathname component to lookup -+ * @base: base directory to lookup from -+ * @len: maximum length @len should be interpreted to -+ * -+ * This helper will yield ERR_PTR(-ENOENT) on negatives. The helper returns -+ * known positive or ERR_PTR(). This is what most of the users want. -+ * -+ * Note that pinned negative with unlocked parent _can_ become positive at any -+ * time, so callers of lookup_one_unlocked() need to be very careful; pinned -+ * positives have >d_inode stable, so this one avoids such problems. -+ * -+ * Note that this routine is purely a helper for filesystem usage and should -+ * not be called by generic code. -+ * -+ * The helper should be called without i_mutex held. -+ */ -+struct dentry *lookup_one_positive_unlocked(struct user_namespace *mnt_userns, -+ const char *name, -+ struct dentry *base, int len) -+{ -+ struct dentry *ret = lookup_one_unlocked(mnt_userns, name, base, len); -+ -+ if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) { -+ dput(ret); -+ ret = ERR_PTR(-ENOENT); -+ } -+ return ret; -+} -+EXPORT_SYMBOL(lookup_one_positive_unlocked); -+ -+/** -+ * lookup_one_len_unlocked - filesystem helper to lookup single pathname component -+ * @name: pathname component to lookup -+ * @base: base directory to lookup from -+ * @len: maximum length @len should be interpreted to -+ * -+ * Note that this routine is purely a helper for filesystem usage and should -+ * not be called by generic code. -+ * -+ * Unlike lookup_one_len, it should be called without the parent -+ * i_mutex held, and will take the i_mutex itself if necessary. -+ */ -+struct dentry *lookup_one_len_unlocked(const char *name, -+ struct dentry *base, int len) -+{ -+ return lookup_one_unlocked(&init_user_ns, name, base, len); -+} - EXPORT_SYMBOL(lookup_one_len_unlocked); - - /* -@@ -2758,12 +2815,7 @@ EXPORT_SYMBOL(lookup_one_len_unlocked); - struct dentry *lookup_positive_unlocked(const char *name, - struct dentry *base, int len) - { -- struct dentry *ret = lookup_one_len_unlocked(name, base, len); -- if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) { -- dput(ret); -- ret = ERR_PTR(-ENOENT); +- return -EINVAL; +-} +- +-static bool need_read_all(struct io_kiocb *req) +-{ +- return req->flags & REQ_F_ISREG || +- S_ISBLK(file_inode(req->file)->i_mode); +-} +- +-static int io_read(struct io_kiocb *req, unsigned int issue_flags) +-{ +- struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; +- struct kiocb *kiocb = &req->rw.kiocb; +- struct iov_iter __iter, *iter = &__iter; +- struct io_async_rw *rw = req->async_data; +- bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; +- struct iov_iter_state __state, *state; +- ssize_t ret, ret2; +- +- if (rw) { +- iter = &rw->iter; +- state = &rw->iter_state; +- /* +- * We come here from an earlier attempt, restore our state to +- * match in case it doesn't. It's cheap enough that we don't +- * need to make this conditional. +- */ +- iov_iter_restore(iter, state); +- iovec = NULL; +- } else { +- ret = io_import_iovec(READ, req, &iovec, iter, !force_nonblock); +- if (ret < 0) +- return ret; +- state = &__state; +- iov_iter_save_state(iter, state); +- } +- req->result = iov_iter_count(iter); +- +- /* Ensure we clear previously set non-block flag */ +- if (!force_nonblock) +- kiocb->ki_flags &= ~IOCB_NOWAIT; +- else +- kiocb->ki_flags |= IOCB_NOWAIT; +- +- /* If the file doesn't support async, just async punt */ +- if (force_nonblock && !io_file_supports_nowait(req, READ)) { +- ret = io_setup_async_rw(req, iovec, inline_vecs, iter, true); +- return ret ?: -EAGAIN; +- } +- +- ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), req->result); +- if (unlikely(ret)) { +- kfree(iovec); +- return ret; +- } +- +- ret = io_iter_do_read(req, iter); +- +- if (ret == -EAGAIN || (req->flags & REQ_F_REISSUE)) { +- req->flags &= ~REQ_F_REISSUE; +- /* IOPOLL retry should happen for io-wq threads */ +- if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL)) +- goto done; +- /* no retry on NONBLOCK nor RWF_NOWAIT */ +- if (req->flags & REQ_F_NOWAIT) +- goto done; +- ret = 0; +- } else if (ret == -EIOCBQUEUED) { +- goto out_free; +- } else if (ret <= 0 || ret == req->result || !force_nonblock || +- (req->flags & REQ_F_NOWAIT) || !need_read_all(req)) { +- /* read all, failed, already did sync or don't want to retry */ +- goto done; - } -- return ret; -+ return lookup_one_positive_unlocked(&init_user_ns, name, base, len); - } - EXPORT_SYMBOL(lookup_positive_unlocked); - -@@ -3473,6 +3525,8 @@ struct dentry *vfs_tmpfile(struct user_namespace *mnt_userns, - child = d_alloc(dentry, &slash_name); - if (unlikely(!child)) - goto out_err; -+ if (!IS_POSIXACL(dir)) -+ mode &= ~current_umask(); - error = dir->i_op->tmpfile(mnt_userns, dir, child, mode); - if (error) - goto out_err; -@@ -3625,18 +3679,14 @@ static struct dentry *filename_create(int dfd, struct filename *name, - { - struct dentry *dentry = ERR_PTR(-EEXIST); - struct qstr last; -+ bool want_dir = lookup_flags & LOOKUP_DIRECTORY; -+ unsigned int reval_flag = lookup_flags & LOOKUP_REVAL; -+ unsigned int create_flags = LOOKUP_CREATE | LOOKUP_EXCL; - int type; - int err2; - int error; -- bool is_dir = (lookup_flags & LOOKUP_DIRECTORY); - - /* -- * Note that only LOOKUP_REVAL and LOOKUP_DIRECTORY matter here. Any -- * other flags passed in are ignored! +- * Don't depend on the iter state matching what was consumed, or being +- * untouched in case of error. Restore it and we'll advance it +- * manually if we need to. - */ -- lookup_flags &= LOOKUP_REVAL; - -- error = filename_parentat(dfd, name, lookup_flags, path, &last, &type); -+ error = filename_parentat(dfd, name, reval_flag, path, &last, &type); - if (error) - return ERR_PTR(error); - -@@ -3650,11 +3700,13 @@ static struct dentry *filename_create(int dfd, struct filename *name, - /* don't fail immediately if it's r/o, at least try to report other errors */ - err2 = mnt_want_write(path->mnt); - /* -- * Do the final lookup. -+ * Do the final lookup. Suppress 'create' if there is a trailing -+ * '/', and a directory wasn't requested. - */ -- lookup_flags |= LOOKUP_CREATE | LOOKUP_EXCL; -+ if (last.name[last.len] && !want_dir) -+ create_flags = 0; - inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT); -- dentry = __lookup_hash(&last, path->dentry, lookup_flags); -+ dentry = __lookup_hash(&last, path->dentry, reval_flag | create_flags); - if (IS_ERR(dentry)) - goto unlock; - -@@ -3668,7 +3720,7 @@ static struct dentry *filename_create(int dfd, struct filename *name, - * all is fine. Let's be bastards - you had / on the end, you've - * been asking for (non-existent) directory. -ENOENT for you. - */ -- if (unlikely(!is_dir && last.name[last.len])) { -+ if (unlikely(!create_flags)) { - error = -ENOENT; - goto fail; - } -@@ -3975,13 +4027,12 @@ int vfs_rmdir(struct user_namespace *mnt_userns, struct inode *dir, - dentry->d_inode->i_flags |= S_DEAD; - dont_mount(dentry); - detach_mounts(dentry); -- fsnotify_rmdir(dir, dentry); - - out: - inode_unlock(dentry->d_inode); - dput(dentry); - if (!error) -- d_delete(dentry); -+ d_delete_notify(dir, dentry); - return error; - } - EXPORT_SYMBOL(vfs_rmdir); -@@ -4103,7 +4154,6 @@ int vfs_unlink(struct user_namespace *mnt_userns, struct inode *dir, - if (!error) { - dont_mount(dentry); - detach_mounts(dentry); -- fsnotify_unlink(dir, dentry); - } - } - } -@@ -4111,9 +4161,11 @@ out: - inode_unlock(target); - - /* We don't d_delete() NFS sillyrenamed files--they still exist. */ -- if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) { -+ if (!error && dentry->d_flags & DCACHE_NFSFS_RENAMED) { -+ fsnotify_unlink(dir, dentry); -+ } else if (!error) { - fsnotify_link_count(target); -- d_delete(dentry); -+ d_delete_notify(dir, dentry); - } - - return error; -diff --git a/fs/namespace.c b/fs/namespace.c -index 659a8f39c61af..d946298691ed4 100644 ---- a/fs/namespace.c -+++ b/fs/namespace.c -@@ -31,6 +31,7 @@ - #include <uapi/linux/mount.h> - #include <linux/fs_context.h> - #include <linux/shmem_fs.h> -+#include <linux/mnt_idmapping.h> - - #include "pnode.h" - #include "internal.h" -@@ -561,7 +562,7 @@ static void free_vfsmnt(struct mount *mnt) - struct user_namespace *mnt_userns; - - mnt_userns = mnt_user_ns(&mnt->mnt); -- if (mnt_userns != &init_user_ns) -+ if (!initial_idmapping(mnt_userns)) - put_user_ns(mnt_userns); - kfree_const(mnt->mnt_devname); - #ifdef CONFIG_SMP -@@ -965,6 +966,7 @@ static struct mount *skip_mnt_tree(struct mount *p) - struct vfsmount *vfs_create_mount(struct fs_context *fc) - { - struct mount *mnt; -+ struct user_namespace *fs_userns; - - if (!fc->root) - return ERR_PTR(-EINVAL); -@@ -982,6 +984,10 @@ struct vfsmount *vfs_create_mount(struct fs_context *fc) - mnt->mnt_mountpoint = mnt->mnt.mnt_root; - mnt->mnt_parent = mnt; - -+ fs_userns = mnt->mnt.mnt_sb->s_user_ns; -+ if (!initial_idmapping(fs_userns)) -+ mnt->mnt.mnt_userns = get_user_ns(fs_userns); -+ - lock_mount_hash(); - list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts); - unlock_mount_hash(); -@@ -1072,7 +1078,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, - - atomic_inc(&sb->s_active); - mnt->mnt.mnt_userns = mnt_user_ns(&old->mnt); -- if (mnt->mnt.mnt_userns != &init_user_ns) -+ if (!initial_idmapping(mnt->mnt.mnt_userns)) - mnt->mnt.mnt_userns = get_user_ns(mnt->mnt.mnt_userns); - mnt->mnt.mnt_sb = sb; - mnt->mnt.mnt_root = dget(root); -@@ -3927,28 +3933,32 @@ static unsigned int recalc_flags(struct mount_kattr *kattr, struct mount *mnt) - static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt) - { - struct vfsmount *m = &mnt->mnt; -+ struct user_namespace *fs_userns = m->mnt_sb->s_user_ns; - - if (!kattr->mnt_userns) - return 0; - -+ /* -+ * Creating an idmapped mount with the filesystem wide idmapping -+ * doesn't make sense so block that. We don't allow mushy semantics. -+ */ -+ if (kattr->mnt_userns == fs_userns) -+ return -EINVAL; -+ - /* - * Once a mount has been idmapped we don't allow it to change its - * mapping. It makes things simpler and callers can just create - * another bind-mount they can idmap if they want to. - */ -- if (mnt_user_ns(m) != &init_user_ns) -+ if (is_idmapped_mnt(m)) - return -EPERM; - - /* The underlying filesystem doesn't support idmapped mounts yet. */ - if (!(m->mnt_sb->s_type->fs_flags & FS_ALLOW_IDMAP)) - return -EINVAL; - -- /* Don't yet support filesystem mountable in user namespaces. */ -- if (m->mnt_sb->s_user_ns != &init_user_ns) -- return -EINVAL; +- iov_iter_restore(iter, state); - - /* We're not controlling the superblock. */ -- if (!capable(CAP_SYS_ADMIN)) -+ if (!ns_capable(fs_userns, CAP_SYS_ADMIN)) - return -EPERM; - - /* Mount has already been visible in the filesystem hierarchy. */ -@@ -4002,14 +4012,27 @@ out: - - static void do_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt) - { -- struct user_namespace *mnt_userns; -+ struct user_namespace *mnt_userns, *old_mnt_userns; - - if (!kattr->mnt_userns) - return; - -+ /* -+ * We're the only ones able to change the mount's idmapping. So -+ * mnt->mnt.mnt_userns is stable and we can retrieve it directly. -+ */ -+ old_mnt_userns = mnt->mnt.mnt_userns; -+ - mnt_userns = get_user_ns(kattr->mnt_userns); - /* Pairs with smp_load_acquire() in mnt_user_ns(). */ - smp_store_release(&mnt->mnt.mnt_userns, mnt_userns); -+ -+ /* -+ * If this is an idmapped filesystem drop the reference we've taken -+ * in vfs_create_mount() before. -+ */ -+ if (!initial_idmapping(old_mnt_userns)) -+ put_user_ns(old_mnt_userns); - } - - static void mount_setattr_commit(struct mount_kattr *kattr, -@@ -4133,16 +4156,25 @@ static int build_mount_idmapped(const struct mount_attr *attr, size_t usize, - } - - /* -- * The init_user_ns is used to indicate that a vfsmount is not idmapped. -- * This is simpler than just having to treat NULL as unmapped. Users -- * wanting to idmap a mount to init_user_ns can just use a namespace -- * with an identity mapping. -+ * The initial idmapping cannot be used to create an idmapped -+ * mount. We use the initial idmapping as an indicator of a mount -+ * that is not idmapped. It can simply be passed into helpers that -+ * are aware of idmapped mounts as a convenient shortcut. A user -+ * can just create a dedicated identity mapping to achieve the same -+ * result. - */ - mnt_userns = container_of(ns, struct user_namespace, ns); -- if (mnt_userns == &init_user_ns) { -+ if (initial_idmapping(mnt_userns)) { -+ err = -EPERM; -+ goto out_fput; -+ } -+ -+ /* We're not controlling the target namespace. */ -+ if (!ns_capable(mnt_userns, CAP_SYS_ADMIN)) { - err = -EPERM; - goto out_fput; - } -+ - kattr->mnt_userns = get_user_ns(mnt_userns); - - out_fput: -@@ -4263,12 +4295,11 @@ SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path, - return err; - - err = user_path_at(dfd, path, kattr.lookup_flags, &target); -- if (err) -- return err; +- ret2 = io_setup_async_rw(req, iovec, inline_vecs, iter, true); +- if (ret2) +- return ret2; - -- err = do_mount_setattr(&target, &kattr); -+ if (!err) { -+ err = do_mount_setattr(&target, &kattr); -+ path_put(&target); -+ } - finish_mount_kattr(&kattr); -- path_put(&target); - return err; - } - -diff --git a/fs/netfs/read_helper.c b/fs/netfs/read_helper.c -index 994ec22d40402..242f8bcb34a4c 100644 ---- a/fs/netfs/read_helper.c -+++ b/fs/netfs/read_helper.c -@@ -354,16 +354,11 @@ static void netfs_rreq_write_to_cache_work(struct work_struct *work) - netfs_rreq_do_write_to_cache(rreq); - } - --static void netfs_rreq_write_to_cache(struct netfs_read_request *rreq, -- bool was_async) -+static void netfs_rreq_write_to_cache(struct netfs_read_request *rreq) - { -- if (was_async) { -- rreq->work.func = netfs_rreq_write_to_cache_work; -- if (!queue_work(system_unbound_wq, &rreq->work)) -- BUG(); +- iovec = NULL; +- rw = req->async_data; +- /* +- * Now use our persistent iterator and state, if we aren't already. +- * We've restored and mapped the iter to match. +- */ +- if (iter != &rw->iter) { +- iter = &rw->iter; +- state = &rw->iter_state; +- } +- +- do { +- /* +- * We end up here because of a partial read, either from +- * above or inside this loop. Advance the iter by the bytes +- * that were consumed. +- */ +- iov_iter_advance(iter, ret); +- if (!iov_iter_count(iter)) +- break; +- rw->bytes_done += ret; +- iov_iter_save_state(iter, state); +- +- /* if we can retry, do so with the callbacks armed */ +- if (!io_rw_should_retry(req)) { +- kiocb->ki_flags &= ~IOCB_WAITQ; +- return -EAGAIN; +- } +- +- /* +- * Now retry read with the IOCB_WAITQ parts set in the iocb. If +- * we get -EIOCBQUEUED, then we'll get a notification when the +- * desired page gets unlocked. We can also get a partial read +- * here, and if we do, then just retry at the new offset. +- */ +- ret = io_iter_do_read(req, iter); +- if (ret == -EIOCBQUEUED) +- return 0; +- /* we got some bytes, but not all. retry. */ +- kiocb->ki_flags &= ~IOCB_WAITQ; +- iov_iter_restore(iter, state); +- } while (ret > 0); +-done: +- kiocb_done(kiocb, ret, issue_flags); +-out_free: +- /* it's faster to check here then delegate to kfree */ +- if (iovec) +- kfree(iovec); +- return 0; +-} +- +-static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +-{ +- if (unlikely(!(req->file->f_mode & FMODE_WRITE))) +- return -EBADF; +- return io_prep_rw(req, sqe, WRITE); +-} +- +-static int io_write(struct io_kiocb *req, unsigned int issue_flags) +-{ +- struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; +- struct kiocb *kiocb = &req->rw.kiocb; +- struct iov_iter __iter, *iter = &__iter; +- struct io_async_rw *rw = req->async_data; +- bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; +- struct iov_iter_state __state, *state; +- ssize_t ret, ret2; +- +- if (rw) { +- iter = &rw->iter; +- state = &rw->iter_state; +- iov_iter_restore(iter, state); +- iovec = NULL; - } else { -- netfs_rreq_do_write_to_cache(rreq); +- ret = io_import_iovec(WRITE, req, &iovec, iter, !force_nonblock); +- if (ret < 0) +- return ret; +- state = &__state; +- iov_iter_save_state(iter, state); - } -+ rreq->work.func = netfs_rreq_write_to_cache_work; -+ if (!queue_work(system_unbound_wq, &rreq->work)) -+ BUG(); - } - - /* -@@ -560,7 +555,7 @@ again: - wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS); - - if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags)) -- return netfs_rreq_write_to_cache(rreq, was_async); -+ return netfs_rreq_write_to_cache(rreq); - - netfs_rreq_completed(rreq, was_async); - } -@@ -963,7 +958,7 @@ int netfs_readpage(struct file *file, - rreq = netfs_alloc_read_request(ops, netfs_priv, file); - if (!rreq) { - if (netfs_priv) -- ops->cleanup(netfs_priv, page_file_mapping(page)); -+ ops->cleanup(page_file_mapping(page), netfs_priv); - unlock_page(page); - return -ENOMEM; - } -@@ -1190,7 +1185,7 @@ have_page: - goto error; - have_page_no_wait: - if (netfs_priv) -- ops->cleanup(netfs_priv, mapping); -+ ops->cleanup(mapping, netfs_priv); - *_page = page; - _leave(" = 0"); - return 0; -@@ -1201,7 +1196,7 @@ error: - unlock_page(page); - put_page(page); - if (netfs_priv) -- ops->cleanup(netfs_priv, mapping); -+ ops->cleanup(mapping, netfs_priv); - _leave(" = %d", ret); - return ret; - } -diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h -index 6a2033131c068..ccd4f245cae24 100644 ---- a/fs/nfs/callback.h -+++ b/fs/nfs/callback.h -@@ -170,7 +170,7 @@ struct cb_devicenotifyitem { - }; - - struct cb_devicenotifyargs { -- int ndevs; -+ uint32_t ndevs; - struct cb_devicenotifyitem *devs; - }; - -diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c -index ed9d580826f5a..ccf3132384412 100644 ---- a/fs/nfs/callback_proc.c -+++ b/fs/nfs/callback_proc.c -@@ -288,6 +288,7 @@ static u32 initiate_file_draining(struct nfs_client *clp, - rv = NFS4_OK; - break; - case -ENOENT: -+ set_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags); - /* Embrace your forgetfulness! */ - rv = NFS4ERR_NOMATCHING_LAYOUT; - -@@ -358,12 +359,11 @@ __be32 nfs4_callback_devicenotify(void *argp, void *resp, - struct cb_process_state *cps) - { - struct cb_devicenotifyargs *args = argp; -- int i; -+ const struct pnfs_layoutdriver_type *ld = NULL; -+ uint32_t i; - __be32 res = 0; -- struct nfs_client *clp = cps->clp; -- struct nfs_server *server = NULL; - -- if (!clp) { -+ if (!cps->clp) { - res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION); - goto out; - } -@@ -371,23 +371,15 @@ __be32 nfs4_callback_devicenotify(void *argp, void *resp, - for (i = 0; i < args->ndevs; i++) { - struct cb_devicenotifyitem *dev = &args->devs[i]; - -- if (!server || -- server->pnfs_curr_ld->id != dev->cbd_layout_type) { -- rcu_read_lock(); -- list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) -- if (server->pnfs_curr_ld && -- server->pnfs_curr_ld->id == dev->cbd_layout_type) { -- rcu_read_unlock(); -- goto found; -- } -- rcu_read_unlock(); -- continue; -+ if (!ld || ld->id != dev->cbd_layout_type) { -+ pnfs_put_layoutdriver(ld); -+ ld = pnfs_find_layoutdriver(dev->cbd_layout_type); -+ if (!ld) -+ continue; - } +- req->result = iov_iter_count(iter); - -- found: -- nfs4_delete_deviceid(server->pnfs_curr_ld, clp, &dev->cbd_dev_id); -+ nfs4_delete_deviceid(ld, cps->clp, &dev->cbd_dev_id); - } +- /* Ensure we clear previously set non-block flag */ +- if (!force_nonblock) +- kiocb->ki_flags &= ~IOCB_NOWAIT; +- else +- kiocb->ki_flags |= IOCB_NOWAIT; - -+ pnfs_put_layoutdriver(ld); - out: - kfree(args->devs); - return res; -diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c -index 4c48d85f65170..ea17085ef884b 100644 ---- a/fs/nfs/callback_xdr.c -+++ b/fs/nfs/callback_xdr.c -@@ -258,11 +258,9 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp, - void *argp) - { - struct cb_devicenotifyargs *args = argp; -+ uint32_t tmp, n, i; - __be32 *p; - __be32 status = 0; -- u32 tmp; -- int n, i; -- args->ndevs = 0; - - /* Num of device notifications */ - p = xdr_inline_decode(xdr, sizeof(uint32_t)); -@@ -271,12 +269,8 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp, - goto out; - } - n = ntohl(*p++); -- if (n <= 0) -- goto out; -- if (n > ULONG_MAX / sizeof(*args->devs)) { -- status = htonl(NFS4ERR_BADXDR); -+ if (n == 0) - goto out; +- /* If the file doesn't support async, just async punt */ +- if (force_nonblock && !io_file_supports_nowait(req, WRITE)) +- goto copy_iov; +- +- /* file path doesn't support NOWAIT for non-direct_IO */ +- if (force_nonblock && !(kiocb->ki_flags & IOCB_DIRECT) && +- (req->flags & REQ_F_ISREG)) +- goto copy_iov; +- +- ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), req->result); +- if (unlikely(ret)) +- goto out_free; +- +- /* +- * Open-code file_start_write here to grab freeze protection, +- * which will be released by another thread in +- * io_complete_rw(). Fool lockdep by telling it the lock got +- * released so that it doesn't complain about the held lock when +- * we return to userspace. +- */ +- if (req->flags & REQ_F_ISREG) { +- sb_start_write(file_inode(req->file)->i_sb); +- __sb_writers_release(file_inode(req->file)->i_sb, +- SB_FREEZE_WRITE); - } - - args->devs = kmalloc_array(n, sizeof(*args->devs), GFP_KERNEL); - if (!args->devs) { -@@ -330,19 +324,21 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp, - dev->cbd_immediate = 0; - } - -- args->ndevs++; +- kiocb->ki_flags |= IOCB_WRITE; - - dprintk("%s: type %d layout 0x%x immediate %d\n", - __func__, dev->cbd_notify_type, dev->cbd_layout_type, - dev->cbd_immediate); - } -+ args->ndevs = n; -+ dprintk("%s: ndevs %d\n", __func__, args->ndevs); -+ return 0; -+err: -+ kfree(args->devs); - out: -+ args->devs = NULL; -+ args->ndevs = 0; - dprintk("%s: status %d ndevs %d\n", - __func__, ntohl(status), args->ndevs); - return status; --err: -- kfree(args->devs); -- goto out; - } - - static __be32 decode_sessionid(struct xdr_stream *xdr, -diff --git a/fs/nfs/client.c b/fs/nfs/client.c -index 23e165d5ec9ca..090b16890e3d6 100644 ---- a/fs/nfs/client.c -+++ b/fs/nfs/client.c -@@ -177,6 +177,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) - INIT_LIST_HEAD(&clp->cl_superblocks); - clp->cl_rpcclient = ERR_PTR(-EINVAL); - -+ clp->cl_flags = cl_init->init_flags; - clp->cl_proto = cl_init->proto; - clp->cl_nconnect = cl_init->nconnect; - clp->cl_max_connect = cl_init->max_connect ? cl_init->max_connect : 1; -@@ -427,7 +428,6 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init) - list_add_tail(&new->cl_share_link, - &nn->nfs_client_list); - spin_unlock(&nn->nfs_client_lock); -- new->cl_flags = cl_init->init_flags; - return rpc_ops->init_client(new, cl_init); - } - -@@ -860,6 +860,13 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs - server->namelen = pathinfo.max_namelen; - } - -+ if (clp->rpc_ops->discover_trunking != NULL && -+ (server->caps & NFS_CAP_FS_LOCATIONS)) { -+ error = clp->rpc_ops->discover_trunking(server, mntfh); -+ if (error < 0) -+ return error; -+ } -+ - return 0; - } - EXPORT_SYMBOL_GPL(nfs_probe_fsinfo); -diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c -index 11118398f495c..7c9eb679dbdbf 100644 ---- a/fs/nfs/delegation.c -+++ b/fs/nfs/delegation.c -@@ -755,11 +755,13 @@ int nfs4_inode_return_delegation(struct inode *inode) - struct nfs_delegation *delegation; - - delegation = nfs_start_delegation_return(nfsi); -- /* Synchronous recall of any application leases */ -- break_lease(inode, O_WRONLY | O_RDWR); -- nfs_wb_all(inode); -- if (delegation != NULL) -+ if (delegation != NULL) { -+ /* Synchronous recall of any application leases */ -+ break_lease(inode, O_WRONLY | O_RDWR); -+ if (S_ISREG(inode->i_mode)) -+ nfs_wb_all(inode); - return nfs_end_delegation_return(inode, delegation, 1); -+ } - return 0; - } - -diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c -index 1a6d2867fba4f..32c3d0c454b19 100644 ---- a/fs/nfs/dir.c -+++ b/fs/nfs/dir.c -@@ -78,6 +78,7 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir - ctx->attr_gencount = nfsi->attr_gencount; - ctx->dir_cookie = 0; - ctx->dup_cookie = 0; -+ ctx->page_index = 0; - spin_lock(&dir->i_lock); - if (list_empty(&nfsi->open_files) && - (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER)) -@@ -85,6 +86,7 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir - NFS_INO_INVALID_DATA | - NFS_INO_REVAL_FORCED); - list_add(&ctx->list, &nfsi->open_files); -+ clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags); - spin_unlock(&dir->i_lock); - return ctx; - } -@@ -626,8 +628,7 @@ void nfs_force_use_readdirplus(struct inode *dir) - if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) && - !list_empty(&nfsi->open_files)) { - set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags); -- invalidate_mapping_pages(dir->i_mapping, -- nfsi->page_index + 1, -1); -+ set_bit(NFS_INO_FORCE_READDIR, &nfsi->flags); - } - } - -@@ -870,7 +871,8 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc, - - status = nfs_readdir_page_filler(desc, entry, pages, pglen, - arrays, narrays); -- } while (!status && nfs_readdir_page_needs_filling(page)); -+ } while (!status && nfs_readdir_page_needs_filling(page) && -+ page_mapping(page)); - - nfs_readdir_free_pages(pages, array_size); - out_release_label: -@@ -937,10 +939,8 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc) - sizeof(nfsi->cookieverf)); - } - res = nfs_readdir_search_array(desc); -- if (res == 0) { -- nfsi->page_index = desc->page_index; -+ if (res == 0) - return 0; +- if (req->file->f_op->write_iter) +- ret2 = call_write_iter(req->file, kiocb, iter); +- else if (req->file->f_op->write) +- ret2 = loop_rw_iter(WRITE, req, iter); +- else +- ret2 = -EINVAL; +- +- if (req->flags & REQ_F_REISSUE) { +- req->flags &= ~REQ_F_REISSUE; +- ret2 = -EAGAIN; - } - nfs_readdir_page_unlock_and_put_cached(desc); - return res; - } -@@ -1048,6 +1048,7 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc) - goto out; - - desc->page_index = 0; -+ desc->cache_entry_index = 0; - desc->last_cookie = desc->dir_cookie; - desc->duped = 0; - -@@ -1079,6 +1080,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) - struct nfs_inode *nfsi = NFS_I(inode); - struct nfs_open_dir_context *dir_ctx = file->private_data; - struct nfs_readdir_descriptor *desc; -+ pgoff_t page_index; - int res; - - dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n", -@@ -1109,10 +1111,15 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) - desc->dir_cookie = dir_ctx->dir_cookie; - desc->dup_cookie = dir_ctx->dup_cookie; - desc->duped = dir_ctx->duped; -+ page_index = dir_ctx->page_index; - desc->attr_gencount = dir_ctx->attr_gencount; - memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf)); - spin_unlock(&file->f_lock); - -+ if (test_and_clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags) && -+ list_is_singular(&nfsi->open_files)) -+ invalidate_mapping_pages(inode->i_mapping, page_index + 1, -1); -+ - do { - res = readdir_search_pagecache(desc); - -@@ -1149,6 +1156,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) - dir_ctx->dup_cookie = desc->dup_cookie; - dir_ctx->duped = desc->duped; - dir_ctx->attr_gencount = desc->attr_gencount; -+ dir_ctx->page_index = desc->page_index; - memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf)); - spin_unlock(&file->f_lock); - -@@ -1269,13 +1277,12 @@ static bool nfs_verifier_is_delegated(struct dentry *dentry) - static void nfs_set_verifier_locked(struct dentry *dentry, unsigned long verf) - { - struct inode *inode = d_inode(dentry); -+ struct inode *dir = d_inode(dentry->d_parent); - -- if (!nfs_verifier_is_delegated(dentry) && -- !nfs_verify_change_attribute(d_inode(dentry->d_parent), verf)) -- goto out; -+ if (!nfs_verify_change_attribute(dir, verf)) -+ return; - if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) - nfs_set_verifier_delegated(&verf); --out: - dentry->d_time = verf; - } - -@@ -1413,7 +1420,7 @@ out_force: - static void nfs_mark_dir_for_revalidate(struct inode *inode) - { - spin_lock(&inode->i_lock); -- nfs_set_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE); -+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE); - spin_unlock(&inode->i_lock); - } - -@@ -1834,16 +1841,6 @@ const struct dentry_operations nfs4_dentry_operations = { - }; - EXPORT_SYMBOL_GPL(nfs4_dentry_operations); - --static fmode_t flags_to_mode(int flags) --{ -- fmode_t res = (__force fmode_t)flags & FMODE_EXEC; -- if ((flags & O_ACCMODE) != O_WRONLY) -- res |= FMODE_READ; -- if ((flags & O_ACCMODE) != O_RDONLY) -- res |= FMODE_WRITE; -- return res; +- +- /* +- * Raw bdev writes will return -EOPNOTSUPP for IOCB_NOWAIT. Just +- * retry them without IOCB_NOWAIT. +- */ +- if (ret2 == -EOPNOTSUPP && (kiocb->ki_flags & IOCB_NOWAIT)) +- ret2 = -EAGAIN; +- /* no retry on NONBLOCK nor RWF_NOWAIT */ +- if (ret2 == -EAGAIN && (req->flags & REQ_F_NOWAIT)) +- goto done; +- if (!force_nonblock || ret2 != -EAGAIN) { +- /* IOPOLL retry should happen for io-wq threads */ +- if ((req->ctx->flags & IORING_SETUP_IOPOLL) && ret2 == -EAGAIN) +- goto copy_iov; +-done: +- kiocb_done(kiocb, ret2, issue_flags); +- } else { +-copy_iov: +- iov_iter_restore(iter, state); +- ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false); +- return ret ?: -EAGAIN; +- } +-out_free: +- /* it's reportedly faster than delegating the null check to kfree() */ +- if (iovec) +- kfree(iovec); +- return ret; -} - - static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags, struct file *filp) - { - return alloc_nfs_open_context(dentry, flags_to_mode(open_flags), filp); -@@ -1983,6 +1980,24 @@ out: - - no_open: - res = nfs_lookup(dir, dentry, lookup_flags); -+ if (!res) { -+ inode = d_inode(dentry); -+ if ((lookup_flags & LOOKUP_DIRECTORY) && inode && -+ !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) -+ res = ERR_PTR(-ENOTDIR); -+ else if (inode && S_ISREG(inode->i_mode)) -+ res = ERR_PTR(-EOPENSTALE); -+ } else if (!IS_ERR(res)) { -+ inode = d_inode(res); -+ if ((lookup_flags & LOOKUP_DIRECTORY) && inode && -+ !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) { -+ dput(res); -+ res = ERR_PTR(-ENOTDIR); -+ } else if (inode && S_ISREG(inode->i_mode)) { -+ dput(res); -+ res = ERR_PTR(-EOPENSTALE); -+ } -+ } - if (switched) { - d_lookup_done(dentry); - if (!res) -@@ -2383,6 +2398,8 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) - - trace_nfs_link_enter(inode, dir, dentry); - d_drop(dentry); -+ if (S_ISREG(inode->i_mode)) -+ nfs_sync_inode(inode); - error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); - if (error == 0) { - ihold(inode); -@@ -2471,6 +2488,8 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, - } - } - -+ if (S_ISREG(old_inode->i_mode)) -+ nfs_sync_inode(old_inode); - task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL); - if (IS_ERR(task)) { - error = PTR_ERR(task); -@@ -2676,7 +2695,7 @@ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, co - return NULL; - } - --static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, bool may_block) -+static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *cred, u32 *mask, bool may_block) - { - struct nfs_inode *nfsi = NFS_I(inode); - struct nfs_access_entry *cache; -@@ -2706,8 +2725,7 @@ static int nfs_access_get_cached_locked(struct inode *inode, const struct cred * - spin_lock(&inode->i_lock); - retry = false; - } -- res->cred = cache->cred; -- res->mask = cache->mask; -+ *mask = cache->mask; - list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru); - err = 0; - out: -@@ -2719,7 +2737,7 @@ out_zap: - return -ENOENT; - } - --static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res) -+static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cred, u32 *mask) - { - /* Only check the most recently returned cache entry, - * but do it without locking. -@@ -2741,22 +2759,21 @@ static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cre - goto out; - if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS)) - goto out; -- res->cred = cache->cred; -- res->mask = cache->mask; -+ *mask = cache->mask; - err = 0; - out: - rcu_read_unlock(); - return err; - } - --int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct --nfs_access_entry *res, bool may_block) -+int nfs_access_get_cached(struct inode *inode, const struct cred *cred, -+ u32 *mask, bool may_block) - { - int status; - -- status = nfs_access_get_cached_rcu(inode, cred, res); -+ status = nfs_access_get_cached_rcu(inode, cred, mask); - if (status != 0) -- status = nfs_access_get_cached_locked(inode, cred, res, -+ status = nfs_access_get_cached_locked(inode, cred, mask, - may_block); - - return status; -@@ -2877,7 +2894,7 @@ static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask) - - trace_nfs_access_enter(inode); - -- status = nfs_access_get_cached(inode, cred, &cache, may_block); -+ status = nfs_access_get_cached(inode, cred, &cache.mask, may_block); - if (status == 0) - goto out_cached; - -diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c -index 2e894fec036b0..c220810c61d14 100644 ---- a/fs/nfs/direct.c -+++ b/fs/nfs/direct.c -@@ -172,8 +172,8 @@ ssize_t nfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) - VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE); - - if (iov_iter_rw(iter) == READ) -- return nfs_file_direct_read(iocb, iter); -- return nfs_file_direct_write(iocb, iter); -+ return nfs_file_direct_read(iocb, iter, true); -+ return nfs_file_direct_write(iocb, iter, true); - } - - static void nfs_direct_release_pages(struct page **pages, unsigned int npages) -@@ -424,6 +424,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, - * nfs_file_direct_read - file direct read operation for NFS files - * @iocb: target I/O control block - * @iter: vector of user buffers into which to read data -+ * @swap: flag indicating this is swap IO, not O_DIRECT IO - * - * We use this function for direct reads instead of calling - * generic_file_aio_read() in order to avoid gfar's check to see if -@@ -439,7 +440,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, - * client must read the updated atime from the server back into its - * cache. - */ --ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) -+ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, -+ bool swap) - { - struct file *file = iocb->ki_filp; - struct address_space *mapping = file->f_mapping; -@@ -481,12 +483,14 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) - if (iter_is_iovec(iter)) - dreq->flags = NFS_ODIRECT_SHOULD_DIRTY; - -- nfs_start_io_direct(inode); -+ if (!swap) -+ nfs_start_io_direct(inode); - - NFS_I(inode)->read_io += count; - requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos); - -- nfs_end_io_direct(inode); -+ if (!swap) -+ nfs_end_io_direct(inode); - - if (requested > 0) { - result = nfs_direct_wait(dreq); -@@ -620,7 +624,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) - nfs_unlock_and_release_request(req); - } - -- if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) -+ if (nfs_commit_end(cinfo.mds)) - nfs_direct_write_complete(dreq); - } - -@@ -789,7 +793,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { - */ - static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, - struct iov_iter *iter, -- loff_t pos) -+ loff_t pos, int ioflags) - { - struct nfs_pageio_descriptor desc; - struct inode *inode = dreq->inode; -@@ -797,7 +801,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, - size_t requested_bytes = 0; - size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE); - -- nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false, -+ nfs_pageio_init_write(&desc, inode, ioflags, false, - &nfs_direct_write_completion_ops); - desc.pg_dreq = dreq; - get_dreq(dreq); -@@ -875,6 +879,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, - * nfs_file_direct_write - file direct write operation for NFS files - * @iocb: target I/O control block - * @iter: vector of user buffers from which to write data -+ * @swap: flag indicating this is swap IO, not O_DIRECT IO - * - * We use this function for direct writes instead of calling - * generic_file_aio_write() in order to avoid taking the inode -@@ -891,7 +896,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, - * Note that O_APPEND is not supported for NFS direct writes, as there - * is no atomic O_APPEND write facility in the NFS protocol. - */ --ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) -+ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, -+ bool swap) - { - ssize_t result, requested; - size_t count; -@@ -905,7 +911,11 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) - dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n", - file, iov_iter_count(iter), (long long) iocb->ki_pos); - -- result = generic_write_checks(iocb, iter); -+ if (swap) -+ /* bypass generic checks */ -+ result = iov_iter_count(iter); -+ else -+ result = generic_write_checks(iocb, iter); - if (result <= 0) - return result; - count = result; -@@ -936,16 +946,22 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) - dreq->iocb = iocb; - pnfs_init_ds_commit_info_ops(&dreq->ds_cinfo, inode); - -- nfs_start_io_direct(inode); -+ if (swap) { -+ requested = nfs_direct_write_schedule_iovec(dreq, iter, pos, -+ FLUSH_STABLE); -+ } else { -+ nfs_start_io_direct(inode); - -- requested = nfs_direct_write_schedule_iovec(dreq, iter, pos); -+ requested = nfs_direct_write_schedule_iovec(dreq, iter, pos, -+ FLUSH_COND_STABLE); - -- if (mapping->nrpages) { -- invalidate_inode_pages2_range(mapping, -- pos >> PAGE_SHIFT, end); +-static int io_renameat_prep(struct io_kiocb *req, +- const struct io_uring_sqe *sqe) +-{ +- struct io_rename *ren = &req->rename; +- const char __user *oldf, *newf; +- +- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) +- return -EINVAL; +- if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in) +- return -EINVAL; +- if (unlikely(req->flags & REQ_F_FIXED_FILE)) +- return -EBADF; +- +- ren->old_dfd = READ_ONCE(sqe->fd); +- oldf = u64_to_user_ptr(READ_ONCE(sqe->addr)); +- newf = u64_to_user_ptr(READ_ONCE(sqe->addr2)); +- ren->new_dfd = READ_ONCE(sqe->len); +- ren->flags = READ_ONCE(sqe->rename_flags); +- +- ren->oldpath = getname(oldf); +- if (IS_ERR(ren->oldpath)) +- return PTR_ERR(ren->oldpath); +- +- ren->newpath = getname(newf); +- if (IS_ERR(ren->newpath)) { +- putname(ren->oldpath); +- return PTR_ERR(ren->newpath); - } -+ if (mapping->nrpages) { -+ invalidate_inode_pages2_range(mapping, -+ pos >> PAGE_SHIFT, end); -+ } - -- nfs_end_io_direct(inode); -+ nfs_end_io_direct(inode); -+ } - - if (requested > 0) { - result = nfs_direct_wait(dreq); -diff --git a/fs/nfs/file.c b/fs/nfs/file.c -index aa353fd582404..ad5114e480097 100644 ---- a/fs/nfs/file.c -+++ b/fs/nfs/file.c -@@ -161,7 +161,7 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to) - ssize_t result; - - if (iocb->ki_flags & IOCB_DIRECT) -- return nfs_file_direct_read(iocb, to); -+ return nfs_file_direct_read(iocb, to, false); - - dprintk("NFS: read(%pD2, %zu@%lu)\n", - iocb->ki_filp, -@@ -208,22 +208,25 @@ static int - nfs_file_fsync_commit(struct file *file, int datasync) - { - struct inode *inode = file_inode(file); +- +- req->flags |= REQ_F_NEED_CLEANUP; +- return 0; +-} +- +-static int io_renameat(struct io_kiocb *req, unsigned int issue_flags) +-{ +- struct io_rename *ren = &req->rename; - int ret; -+ int ret, ret2; - - dprintk("NFS: fsync file(%pD2) datasync %d\n", file, datasync); - - nfs_inc_stats(inode, NFSIOS_VFSFSYNC); - ret = nfs_commit_inode(inode, FLUSH_SYNC); +- +- if (issue_flags & IO_URING_F_NONBLOCK) +- return -EAGAIN; +- +- ret = do_renameat2(ren->old_dfd, ren->oldpath, ren->new_dfd, +- ren->newpath, ren->flags); +- +- req->flags &= ~REQ_F_NEED_CLEANUP; - if (ret < 0) -- return ret; -- return file_check_and_advance_wb_err(file); -+ ret2 = file_check_and_advance_wb_err(file); -+ if (ret2 < 0) -+ return ret2; -+ return ret; - } - - int - nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) - { -- struct nfs_open_context *ctx = nfs_file_open_context(file); - struct inode *inode = file_inode(file); -+ struct nfs_inode *nfsi = NFS_I(inode); -+ long save_nredirtied = atomic_long_read(&nfsi->redirtied_pages); -+ long nredirtied; - int ret; - - trace_nfs_fsync_enter(inode); -@@ -238,15 +241,10 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) - ret = pnfs_sync_inode(inode, !!datasync); - if (ret != 0) - break; -- if (!test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags)) -+ nredirtied = atomic_long_read(&nfsi->redirtied_pages); -+ if (nredirtied == save_nredirtied) - break; -- /* -- * If nfs_file_fsync_commit detected a server reboot, then -- * resend all dirty pages that might have been covered by -- * the NFS_CONTEXT_RESEND_WRITES flag -- */ -- start = 0; -- end = LLONG_MAX; -+ save_nredirtied = nredirtied; - } - - trace_nfs_fsync_exit(inode, ret); -@@ -389,11 +387,8 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, - return status; - NFS_I(mapping->host)->write_io += copied; - -- if (nfs_ctx_key_to_expire(ctx, mapping->host)) { -- status = nfs_wb_all(mapping->host); -- if (status < 0) -- return status; -- } -+ if (nfs_ctx_key_to_expire(ctx, mapping->host)) -+ nfs_wb_all(mapping->host); - - return copied; - } -@@ -590,18 +585,6 @@ static const struct vm_operations_struct nfs_file_vm_ops = { - .page_mkwrite = nfs_vm_page_mkwrite, - }; - --static int nfs_need_check_write(struct file *filp, struct inode *inode, -- int error) +- req_set_fail(req); +- io_req_complete(req, ret); +- return 0; +-} +- +-static int io_unlinkat_prep(struct io_kiocb *req, +- const struct io_uring_sqe *sqe) -{ -- struct nfs_open_context *ctx; +- struct io_unlink *un = &req->unlink; +- const char __user *fname; - -- ctx = nfs_file_open_context(filp); -- if (nfs_error_is_fatal_on_server(error) || -- nfs_ctx_key_to_expire(ctx, inode)) -- return 1; +- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) +- return -EINVAL; +- if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index || +- sqe->splice_fd_in) +- return -EINVAL; +- if (unlikely(req->flags & REQ_F_FIXED_FILE)) +- return -EBADF; +- +- un->dfd = READ_ONCE(sqe->fd); +- +- un->flags = READ_ONCE(sqe->unlink_flags); +- if (un->flags & ~AT_REMOVEDIR) +- return -EINVAL; +- +- fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); +- un->filename = getname(fname); +- if (IS_ERR(un->filename)) +- return PTR_ERR(un->filename); +- +- req->flags |= REQ_F_NEED_CLEANUP; - return 0; -} - - ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) - { - struct file *file = iocb->ki_filp; -@@ -616,7 +599,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) - return result; - - if (iocb->ki_flags & IOCB_DIRECT) -- return nfs_file_direct_write(iocb, from); -+ return nfs_file_direct_write(iocb, from, false); - - dprintk("NFS: write(%pD2, %zu@%Ld)\n", - file, iov_iter_count(from), (long long) iocb->ki_pos); -@@ -629,7 +612,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) - if (iocb->ki_flags & IOCB_APPEND || iocb->ki_pos > i_size_read(inode)) { - result = nfs_revalidate_file_size(inode, file); - if (result) -- goto out; -+ return result; - } - - nfs_clear_invalid_mapping(file->f_mapping); -@@ -648,6 +631,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) - - written = result; - iocb->ki_pos += written; -+ nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written); - - if (mntflags & NFS_MOUNT_WRITE_EAGER) { - result = filemap_fdatawrite_range(file->f_mapping, -@@ -665,17 +649,22 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) - } - result = generic_write_sync(iocb, written); - if (result < 0) -- goto out; -+ return result; - -+out: - /* Return error values */ - error = filemap_check_wb_err(file->f_mapping, since); -- if (nfs_need_check_write(file, inode, error)) { -- int err = nfs_wb_all(inode); -- if (err < 0) -- result = err; -+ switch (error) { -+ default: -+ break; -+ case -EDQUOT: -+ case -EFBIG: -+ case -ENOSPC: -+ nfs_wb_all(inode); -+ error = file_check_and_advance_wb_err(file); -+ if (error < 0) -+ result = error; - } -- nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written); --out: - return result; - - out_swapfile: -diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c -index d383de00d4868..ceef75b4d2494 100644 ---- a/fs/nfs/flexfilelayout/flexfilelayout.c -+++ b/fs/nfs/flexfilelayout/flexfilelayout.c -@@ -1140,6 +1140,8 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, - case -EIO: - case -ETIMEDOUT: - case -EPIPE: -+ case -EPROTO: -+ case -ENODEV: - dprintk("%s DS connection error %d\n", __func__, - task->tk_status); - nfs4_delete_deviceid(devid->ld, devid->nfs_client, -@@ -1245,6 +1247,8 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, - case -ENOBUFS: - case -EPIPE: - case -EPERM: -+ case -EPROTO: -+ case -ENODEV: - *op_status = status = NFS4ERR_NXIO; - break; - case -EACCES: -diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c -index c9b61b818ec11..bfa7202ca7be1 100644 ---- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c -+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c -@@ -378,10 +378,10 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, - goto noconnect; - - ds = mirror->mirror_ds->ds; -+ if (READ_ONCE(ds->ds_clp)) -+ goto out; - /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ - smp_rmb(); -- if (ds->ds_clp) -- goto out; - - /* FIXME: For now we assume the server sent only one version of NFS - * to use for the DS. -diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c -index 0d444a90f513a..fb3cad38b1497 100644 ---- a/fs/nfs/fs_context.c -+++ b/fs/nfs/fs_context.c -@@ -514,7 +514,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, - if (result.negated) - ctx->flags &= ~NFS_MOUNT_SOFTREVAL; - else -- ctx->flags &= NFS_MOUNT_SOFTREVAL; -+ ctx->flags |= NFS_MOUNT_SOFTREVAL; - break; - case Opt_posix: - if (result.negated) -diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c -index 59355c106eceb..7604cb6a0ac23 100644 ---- a/fs/nfs/getroot.c -+++ b/fs/nfs/getroot.c -@@ -80,18 +80,15 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc) - goto out; - - /* get the actual root for this mount */ -- fsinfo.fattr = nfs_alloc_fattr(); -+ fsinfo.fattr = nfs_alloc_fattr_with_label(server); - if (fsinfo.fattr == NULL) - goto out_name; - -- fsinfo.fattr->label = nfs4_label_alloc(server, GFP_KERNEL); -- if (IS_ERR(fsinfo.fattr->label)) -- goto out_fattr; - error = server->nfs_client->rpc_ops->getroot(server, ctx->mntfh, &fsinfo); - if (error < 0) { - dprintk("nfs_get_root: getattr error = %d\n", -error); - nfs_errorf(fc, "NFS: Couldn't getattr on root"); -- goto out_label; -+ goto out_fattr; - } - - inode = nfs_fhget(s, ctx->mntfh, fsinfo.fattr, NULL); -@@ -99,12 +96,12 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc) - dprintk("nfs_get_root: get root inode failed\n"); - error = PTR_ERR(inode); - nfs_errorf(fc, "NFS: Couldn't get root inode"); -- goto out_label; -+ goto out_fattr; - } - - error = nfs_superblock_set_dummy_root(s, inode); - if (error != 0) -- goto out_label; -+ goto out_fattr; - - /* root dentries normally start off anonymous and get spliced in later - * if the dentry tree reaches them; however if the dentry already -@@ -115,7 +112,7 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc) - dprintk("nfs_get_root: get root dentry failed\n"); - error = PTR_ERR(root); - nfs_errorf(fc, "NFS: Couldn't get root dentry"); -- goto out_label; -+ goto out_fattr; - } - - security_d_instantiate(root, inode); -@@ -154,8 +151,6 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc) - nfs_setsecurity(inode, fsinfo.fattr, fsinfo.fattr->label); - error = 0; - --out_label: -- nfs4_label_free(fsinfo.fattr->label); - out_fattr: - nfs_free_fattr(fsinfo.fattr); - out_name: -@@ -165,5 +160,5 @@ out: - error_splat_root: - dput(fc->root); - fc->root = NULL; -- goto out_label; -+ goto out_fattr; - } -diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c -index 853213b3a2095..e4524635a129a 100644 ---- a/fs/nfs/inode.c -+++ b/fs/nfs/inode.c -@@ -210,10 +210,15 @@ void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) - flags &= ~NFS_INO_INVALID_XATTR; - if (flags & NFS_INO_INVALID_DATA) - nfs_fscache_invalidate(inode); -- if (inode->i_mapping->nrpages == 0) -- flags &= ~(NFS_INO_INVALID_DATA|NFS_INO_DATA_INVAL_DEFER); - flags &= ~(NFS_INO_REVAL_PAGECACHE | NFS_INO_REVAL_FORCED); -+ - nfsi->cache_validity |= flags; -+ -+ if (inode->i_mapping->nrpages == 0) -+ nfsi->cache_validity &= ~(NFS_INO_INVALID_DATA | -+ NFS_INO_DATA_INVAL_DEFER); -+ else if (nfsi->cache_validity & NFS_INO_INVALID_DATA) -+ nfsi->cache_validity &= ~NFS_INO_DATA_INVAL_DEFER; - } - EXPORT_SYMBOL_GPL(nfs_set_cache_invalid); - -@@ -426,6 +431,23 @@ nfs_ilookup(struct super_block *sb, struct nfs_fattr *fattr, struct nfs_fh *fh) - return inode; - } - -+static void nfs_inode_init_regular(struct nfs_inode *nfsi) -+{ -+ atomic_long_set(&nfsi->nrequests, 0); -+ atomic_long_set(&nfsi->redirtied_pages, 0); -+ INIT_LIST_HEAD(&nfsi->commit_info.list); -+ atomic_long_set(&nfsi->commit_info.ncommit, 0); -+ atomic_set(&nfsi->commit_info.rpcs_out, 0); -+ mutex_init(&nfsi->commit_mutex); -+} -+ -+static void nfs_inode_init_dir(struct nfs_inode *nfsi) -+{ -+ nfsi->cache_change_attribute = 0; -+ memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); -+ init_rwsem(&nfsi->rmdir_sem); -+} -+ - /* - * This is our front-end to iget that looks up inodes by file handle - * instead of inode number. -@@ -480,10 +502,12 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st - if (S_ISREG(inode->i_mode)) { - inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops; - inode->i_data.a_ops = &nfs_file_aops; -+ nfs_inode_init_regular(nfsi); - } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; - inode->i_fop = &nfs_dir_operations; - inode->i_data.a_ops = &nfs_dir_aops; -+ nfs_inode_init_dir(nfsi); - /* Deal with crossing mountpoints */ - if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT || - fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { -@@ -509,7 +533,6 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st - inode->i_uid = make_kuid(&init_user_ns, -2); - inode->i_gid = make_kgid(&init_user_ns, -2); - inode->i_blocks = 0; -- memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); - nfsi->write_io = 0; - nfsi->read_io = 0; - -@@ -835,12 +858,9 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path, - } - - /* Flush out writes to the server in order to update c/mtime. */ -- if ((request_mask & (STATX_CTIME|STATX_MTIME)) && -- S_ISREG(inode->i_mode)) { -- err = filemap_write_and_wait(inode->i_mapping); -- if (err) -- goto out; +-static int io_unlinkat(struct io_kiocb *req, unsigned int issue_flags) +-{ +- struct io_unlink *un = &req->unlink; +- int ret; +- +- if (issue_flags & IO_URING_F_NONBLOCK) +- return -EAGAIN; +- +- if (un->flags & AT_REMOVEDIR) +- ret = do_rmdir(un->dfd, un->filename); +- else +- ret = do_unlinkat(un->dfd, un->filename); +- +- req->flags &= ~REQ_F_NEED_CLEANUP; +- if (ret < 0) +- req_set_fail(req); +- io_req_complete(req, ret); +- return 0; +-} +- +-static int io_mkdirat_prep(struct io_kiocb *req, +- const struct io_uring_sqe *sqe) +-{ +- struct io_mkdir *mkd = &req->mkdir; +- const char __user *fname; +- +- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) +- return -EINVAL; +- if (sqe->ioprio || sqe->off || sqe->rw_flags || sqe->buf_index || +- sqe->splice_fd_in) +- return -EINVAL; +- if (unlikely(req->flags & REQ_F_FIXED_FILE)) +- return -EBADF; +- +- mkd->dfd = READ_ONCE(sqe->fd); +- mkd->mode = READ_ONCE(sqe->len); +- +- fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); +- mkd->filename = getname(fname); +- if (IS_ERR(mkd->filename)) +- return PTR_ERR(mkd->filename); +- +- req->flags |= REQ_F_NEED_CLEANUP; +- return 0; +-} +- +-static int io_mkdirat(struct io_kiocb *req, int issue_flags) +-{ +- struct io_mkdir *mkd = &req->mkdir; +- int ret; +- +- if (issue_flags & IO_URING_F_NONBLOCK) +- return -EAGAIN; +- +- ret = do_mkdirat(mkd->dfd, mkd->filename, mkd->mode); +- +- req->flags &= ~REQ_F_NEED_CLEANUP; +- if (ret < 0) +- req_set_fail(req); +- io_req_complete(req, ret); +- return 0; +-} +- +-static int io_symlinkat_prep(struct io_kiocb *req, +- const struct io_uring_sqe *sqe) +-{ +- struct io_symlink *sl = &req->symlink; +- const char __user *oldpath, *newpath; +- +- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) +- return -EINVAL; +- if (sqe->ioprio || sqe->len || sqe->rw_flags || sqe->buf_index || +- sqe->splice_fd_in) +- return -EINVAL; +- if (unlikely(req->flags & REQ_F_FIXED_FILE)) +- return -EBADF; +- +- sl->new_dfd = READ_ONCE(sqe->fd); +- oldpath = u64_to_user_ptr(READ_ONCE(sqe->addr)); +- newpath = u64_to_user_ptr(READ_ONCE(sqe->addr2)); +- +- sl->oldpath = getname(oldpath); +- if (IS_ERR(sl->oldpath)) +- return PTR_ERR(sl->oldpath); +- +- sl->newpath = getname(newpath); +- if (IS_ERR(sl->newpath)) { +- putname(sl->oldpath); +- return PTR_ERR(sl->newpath); - } -+ if ((request_mask & (STATX_CTIME | STATX_MTIME)) && -+ S_ISREG(inode->i_mode)) -+ filemap_write_and_wait(inode->i_mapping); - - /* - * We may force a getattr if the user cares about atime. -@@ -1165,7 +1185,6 @@ int nfs_open(struct inode *inode, struct file *filp) - nfs_fscache_open_file(inode, filp); - return 0; - } --EXPORT_SYMBOL_GPL(nfs_open); - - /* - * This function is called whenever some part of NFS notices that -@@ -1579,18 +1598,37 @@ struct nfs_fattr *nfs_alloc_fattr(void) - { - struct nfs_fattr *fattr; - -- fattr = kmalloc(sizeof(*fattr), GFP_NOFS); -- if (fattr != NULL) -+ fattr = kmalloc(sizeof(*fattr), GFP_KERNEL); -+ if (fattr != NULL) { - nfs_fattr_init(fattr); -+ fattr->label = NULL; -+ } - return fattr; - } - EXPORT_SYMBOL_GPL(nfs_alloc_fattr); - -+struct nfs_fattr *nfs_alloc_fattr_with_label(struct nfs_server *server) -+{ -+ struct nfs_fattr *fattr = nfs_alloc_fattr(); -+ -+ if (!fattr) -+ return NULL; -+ -+ fattr->label = nfs4_label_alloc(server, GFP_KERNEL); -+ if (IS_ERR(fattr->label)) { -+ kfree(fattr); -+ return NULL; -+ } -+ -+ return fattr; -+} -+EXPORT_SYMBOL_GPL(nfs_alloc_fattr_with_label); -+ - struct nfs_fh *nfs_alloc_fhandle(void) - { - struct nfs_fh *fh; - -- fh = kmalloc(sizeof(struct nfs_fh), GFP_NOFS); -+ fh = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL); - if (fh != NULL) - fh->size = 0; - return fh; -@@ -1777,8 +1815,10 @@ static int nfs_inode_finish_partial_attr_update(const struct nfs_fattr *fattr, - NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_OTHER | - NFS_INO_INVALID_NLINK; - unsigned long cache_validity = NFS_I(inode)->cache_validity; -+ enum nfs4_change_attr_type ctype = NFS_SERVER(inode)->change_attr_type; - -- if (!(cache_validity & NFS_INO_INVALID_CHANGE) && -+ if (ctype != NFS4_CHANGE_TYPE_IS_UNDEFINED && -+ !(cache_validity & NFS_INO_INVALID_CHANGE) && - (cache_validity & check_valid) != 0 && - (fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && - nfs_inode_attrs_cmp_monotonic(fattr, inode) == 0) -@@ -2260,14 +2300,7 @@ static void init_once(void *foo) - INIT_LIST_HEAD(&nfsi->open_files); - INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); - INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); -- INIT_LIST_HEAD(&nfsi->commit_info.list); -- atomic_long_set(&nfsi->nrequests, 0); -- atomic_long_set(&nfsi->commit_info.ncommit, 0); -- atomic_set(&nfsi->commit_info.rpcs_out, 0); -- init_rwsem(&nfsi->rmdir_sem); -- mutex_init(&nfsi->commit_mutex); - nfs4_init_once(nfsi); -- nfsi->cache_change_attribute = 0; - } - - static int __init nfs_init_inodecache(void) -diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h -index 66fc936834f23..2ceb4b98ec15f 100644 ---- a/fs/nfs/internal.h -+++ b/fs/nfs/internal.h -@@ -42,6 +42,16 @@ static inline bool nfs_lookup_is_soft_revalidate(const struct dentry *dentry) - return true; - } - -+static inline fmode_t flags_to_mode(int flags) -+{ -+ fmode_t res = (__force fmode_t)flags & FMODE_EXEC; -+ if ((flags & O_ACCMODE) != O_WRONLY) -+ res |= FMODE_READ; -+ if ((flags & O_ACCMODE) != O_RDONLY) -+ res |= FMODE_WRITE; -+ return res; -+} -+ - /* - * Note: RFC 1813 doesn't limit the number of auth flavors that - * a server can return, so make something up. -@@ -341,14 +351,6 @@ nfs4_label_copy(struct nfs4_label *dst, struct nfs4_label *src) - - return dst; - } --static inline void nfs4_label_free(struct nfs4_label *label) +- +- req->flags |= REQ_F_NEED_CLEANUP; +- return 0; +-} +- +-static int io_symlinkat(struct io_kiocb *req, int issue_flags) -{ -- if (label) { -- kfree(label->label); -- kfree(label); +- struct io_symlink *sl = &req->symlink; +- int ret; +- +- if (issue_flags & IO_URING_F_NONBLOCK) +- return -EAGAIN; +- +- ret = do_symlinkat(sl->oldpath, sl->new_dfd, sl->newpath); +- +- req->flags &= ~REQ_F_NEED_CLEANUP; +- if (ret < 0) +- req_set_fail(req); +- io_req_complete(req, ret); +- return 0; +-} +- +-static int io_linkat_prep(struct io_kiocb *req, +- const struct io_uring_sqe *sqe) +-{ +- struct io_hardlink *lnk = &req->hardlink; +- const char __user *oldf, *newf; +- +- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) +- return -EINVAL; +- if (sqe->ioprio || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in) +- return -EINVAL; +- if (unlikely(req->flags & REQ_F_FIXED_FILE)) +- return -EBADF; +- +- lnk->old_dfd = READ_ONCE(sqe->fd); +- lnk->new_dfd = READ_ONCE(sqe->len); +- oldf = u64_to_user_ptr(READ_ONCE(sqe->addr)); +- newf = u64_to_user_ptr(READ_ONCE(sqe->addr2)); +- lnk->flags = READ_ONCE(sqe->hardlink_flags); +- +- lnk->oldpath = getname(oldf); +- if (IS_ERR(lnk->oldpath)) +- return PTR_ERR(lnk->oldpath); +- +- lnk->newpath = getname(newf); +- if (IS_ERR(lnk->newpath)) { +- putname(lnk->oldpath); +- return PTR_ERR(lnk->newpath); - } -- return; +- +- req->flags |= REQ_F_NEED_CLEANUP; +- return 0; -} - - static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi) - { -@@ -357,7 +359,6 @@ static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi) - } - #else - static inline struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) { return NULL; } --static inline void nfs4_label_free(void *label) {} - static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi) - { - } -@@ -580,6 +581,13 @@ nfs_write_match_verf(const struct nfs_writeverf *verf, - !nfs_write_verifier_cmp(&req->wb_verf, &verf->verifier); - } - -+static inline gfp_t nfs_io_gfp_mask(void) -+{ -+ if (current->flags & PF_WQ_WORKER) -+ return GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; -+ return GFP_KERNEL; -+} -+ - /* unlink.c */ - extern struct rpc_task * - nfs_async_rename(struct inode *old_dir, struct inode *new_dir, -@@ -817,6 +825,7 @@ static inline bool nfs_error_is_fatal_on_server(int err) - case 0: - case -ERESTARTSYS: - case -EINTR: -+ case -ENOMEM: - return false; - } - return nfs_error_is_fatal(err); -diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c -index 7fba7711e6b3a..3d5ba43f44bb6 100644 ---- a/fs/nfs/nfs2xdr.c -+++ b/fs/nfs/nfs2xdr.c -@@ -949,7 +949,7 @@ int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, - - error = decode_filename_inline(xdr, &entry->name, &entry->len); - if (unlikely(error)) -- return error; -+ return -EAGAIN; - - /* - * The type (size and byte order) of nfscookie isn't defined in -diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c -index 5601e47360c28..b49359afac883 100644 ---- a/fs/nfs/nfs3client.c -+++ b/fs/nfs/nfs3client.c -@@ -108,7 +108,6 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, - if (mds_srv->flags & NFS_MOUNT_NORESVPORT) - __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); - -- __set_bit(NFS_CS_NOPING, &cl_init.init_flags); - __set_bit(NFS_CS_DS, &cl_init.init_flags); - - /* Use the MDS nfs_client cl_ipaddr. */ -diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c -index e6eca1d7481b8..7ab60ad98776f 100644 ---- a/fs/nfs/nfs3xdr.c -+++ b/fs/nfs/nfs3xdr.c -@@ -1967,7 +1967,6 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, - bool plus) - { - struct user_namespace *userns = rpc_userns(entry->server->client); -- struct nfs_entry old = *entry; - __be32 *p; - int error; - u64 new_cookie; -@@ -1987,15 +1986,15 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, - - error = decode_fileid3(xdr, &entry->ino); - if (unlikely(error)) -- return error; -+ return -EAGAIN; - - error = decode_inline_filename3(xdr, &entry->name, &entry->len); - if (unlikely(error)) -- return error; -+ return -EAGAIN; - - error = decode_cookie3(xdr, &new_cookie); - if (unlikely(error)) -- return error; -+ return -EAGAIN; - - entry->d_type = DT_UNKNOWN; - -@@ -2003,7 +2002,7 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, - entry->fattr->valid = 0; - error = decode_post_op_attr(xdr, entry->fattr, userns); - if (unlikely(error)) -- return error; -+ return -EAGAIN; - if (entry->fattr->valid & NFS_ATTR_FATTR_V3) - entry->d_type = nfs_umode_to_dtype(entry->fattr->mode); - -@@ -2018,11 +2017,8 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, - return -EAGAIN; - if (*p != xdr_zero) { - error = decode_nfs_fh3(xdr, entry->fh); -- if (unlikely(error)) { -- if (error == -E2BIG) -- goto out_truncated; -- return error; -- } -+ if (unlikely(error)) -+ return -EAGAIN; - } else - zero_nfs_fh3(entry->fh); - } -@@ -2031,11 +2027,6 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, - entry->cookie = new_cookie; - - return 0; - --out_truncated: -- dprintk("NFS: directory entry contains invalid file handle\n"); -- *entry = old; -- return -EAGAIN; - } - - /* -@@ -2227,7 +2218,8 @@ static int decode_fsinfo3resok(struct xdr_stream *xdr, - - /* ignore properties */ - result->lease_time = 0; -- result->change_attr_type = NFS4_CHANGE_TYPE_IS_TIME_METADATA; -+ result->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED; -+ result->xattr_support = 0; - return 0; - } - -diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c -index a24349512ffe9..93f4d8257525b 100644 ---- a/fs/nfs/nfs42proc.c -+++ b/fs/nfs/nfs42proc.c -@@ -285,7 +285,9 @@ static void nfs42_copy_dest_done(struct inode *inode, loff_t pos, loff_t len) - loff_t newsize = pos + len; - loff_t end = newsize - 1; - -- truncate_pagecache_range(inode, pos, end); -+ WARN_ON_ONCE(invalidate_inode_pages2_range(inode->i_mapping, -+ pos >> PAGE_SHIFT, end >> PAGE_SHIFT)); -+ - spin_lock(&inode->i_lock); - if (newsize > i_size_read(inode)) - i_size_write(inode, newsize); -@@ -584,8 +586,10 @@ static int _nfs42_proc_copy_notify(struct file *src, struct file *dst, - - ctx = get_nfs_open_context(nfs_file_open_context(src)); - l_ctx = nfs_get_lock_context(ctx); -- if (IS_ERR(l_ctx)) -- return PTR_ERR(l_ctx); -+ if (IS_ERR(l_ctx)) { -+ status = PTR_ERR(l_ctx); -+ goto out; -+ } - - status = nfs4_set_rw_stateid(&args->cna_src_stateid, ctx, l_ctx, - FMODE_READ); -@@ -593,7 +597,7 @@ static int _nfs42_proc_copy_notify(struct file *src, struct file *dst, - if (status) { - if (status == -EAGAIN) - status = -NFS4ERR_BAD_STATEID; -- return status; -+ goto out; - } - - status = nfs4_call_sync(src_server->client, src_server, &msg, -@@ -601,6 +605,7 @@ static int _nfs42_proc_copy_notify(struct file *src, struct file *dst, - if (status == -ENOTSUPP) - src_server->caps &= ~NFS_CAP_COPY_NOTIFY; - -+out: - put_nfs_open_context(nfs_file_open_context(src)); - return status; - } -diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c -index c8bad735e4c19..271e5f92ed019 100644 ---- a/fs/nfs/nfs42xdr.c -+++ b/fs/nfs/nfs42xdr.c -@@ -1434,8 +1434,7 @@ static int nfs4_xdr_dec_clone(struct rpc_rqst *rqstp, - status = decode_clone(xdr); - if (status) - goto out; -- status = decode_getfattr(xdr, res->dst_fattr, res->server); +-static int io_linkat(struct io_kiocb *req, int issue_flags) +-{ +- struct io_hardlink *lnk = &req->hardlink; +- int ret; - -+ decode_getfattr(xdr, res->dst_fattr, res->server); - out: - res->rpc_status = status; - return status; -diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h -index ba78df4b13d94..f8672a34fd635 100644 ---- a/fs/nfs/nfs4_fs.h -+++ b/fs/nfs/nfs4_fs.h -@@ -261,8 +261,8 @@ struct nfs4_state_maintenance_ops { - }; - - struct nfs4_mig_recovery_ops { -- int (*get_locations)(struct inode *, struct nfs4_fs_locations *, -- struct page *, const struct cred *); -+ int (*get_locations)(struct nfs_server *, struct nfs_fh *, -+ struct nfs4_fs_locations *, struct page *, const struct cred *); - int (*fsid_present)(struct inode *, const struct cred *); - }; - -@@ -281,7 +281,8 @@ struct rpc_clnt *nfs4_negotiate_security(struct rpc_clnt *, struct inode *, - int nfs4_submount(struct fs_context *, struct nfs_server *); - int nfs4_replace_transport(struct nfs_server *server, - const struct nfs4_fs_locations *locations); +- if (issue_flags & IO_URING_F_NONBLOCK) +- return -EAGAIN; - -+size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr *sa, -+ size_t salen, struct net *net, int port); - /* nfs4proc.c */ - extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception *); - extern int nfs4_async_handle_error(struct rpc_task *task, -@@ -303,8 +304,9 @@ extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait); - extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); - extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struct qstr *, - struct nfs4_fs_locations *, struct page *); --extern int nfs4_proc_get_locations(struct inode *, struct nfs4_fs_locations *, -- struct page *page, const struct cred *); -+extern int nfs4_proc_get_locations(struct nfs_server *, struct nfs_fh *, -+ struct nfs4_fs_locations *, -+ struct page *page, const struct cred *); - extern int nfs4_proc_fsid_present(struct inode *, const struct cred *); - extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, - struct dentry *, -diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c -index af57332503bed..ed06b68b2b4e9 100644 ---- a/fs/nfs/nfs4client.c -+++ b/fs/nfs/nfs4client.c -@@ -1368,8 +1368,11 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname, - } - nfs_put_client(clp); - -- if (server->nfs_client->cl_hostname == NULL) -+ if (server->nfs_client->cl_hostname == NULL) { - server->nfs_client->cl_hostname = kstrdup(hostname, GFP_KERNEL); -+ if (server->nfs_client->cl_hostname == NULL) -+ return -ENOMEM; -+ } - nfs_server_insert_lists(server); - - return nfs_probe_destination(server); -diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c -index c91565227ea2a..14f2efdecc2f8 100644 ---- a/fs/nfs/nfs4file.c -+++ b/fs/nfs/nfs4file.c -@@ -32,6 +32,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) - struct dentry *parent = NULL; - struct inode *dir; - unsigned openflags = filp->f_flags; -+ fmode_t f_mode; - struct iattr attr; - int err; - -@@ -50,8 +51,9 @@ nfs4_file_open(struct inode *inode, struct file *filp) - if (err) - return err; - -+ f_mode = filp->f_mode; - if ((openflags & O_ACCMODE) == 3) -- return nfs_open(inode, filp); -+ f_mode |= flags_to_mode(openflags); - - /* We can't create new files here */ - openflags &= ~(O_CREAT|O_EXCL); -@@ -59,7 +61,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) - parent = dget_parent(dentry); - dir = d_inode(parent); - -- ctx = alloc_nfs_open_context(file_dentry(filp), filp->f_mode, filp); -+ ctx = alloc_nfs_open_context(file_dentry(filp), f_mode, filp); - err = PTR_ERR(ctx); - if (IS_ERR(ctx)) - goto out; -@@ -317,7 +319,7 @@ static int read_name_gen = 1; - static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, - struct nfs_fh *src_fh, nfs4_stateid *stateid) - { -- struct nfs_fattr fattr; -+ struct nfs_fattr *fattr = nfs_alloc_fattr(); - struct file *filep, *res; - struct nfs_server *server; - struct inode *r_ino = NULL; -@@ -328,14 +330,20 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, - - server = NFS_SERVER(ss_mnt->mnt_root->d_inode); - -- nfs_fattr_init(&fattr); -+ if (!fattr) -+ return ERR_PTR(-ENOMEM); - -- status = nfs4_proc_getattr(server, src_fh, &fattr, NULL, NULL); -+ status = nfs4_proc_getattr(server, src_fh, fattr, NULL, NULL); - if (status < 0) { - res = ERR_PTR(status); - goto out; - } - -+ if (!S_ISREG(fattr->mode)) { -+ res = ERR_PTR(-EBADF); -+ goto out; -+ } -+ - res = ERR_PTR(-ENOMEM); - len = strlen(SSC_READ_NAME_BODY) + 16; - read_name = kzalloc(len, GFP_NOFS); -@@ -343,7 +351,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, - goto out; - snprintf(read_name, len, SSC_READ_NAME_BODY, read_name_gen++); - -- r_ino = nfs_fhget(ss_mnt->mnt_root->d_inode->i_sb, src_fh, &fattr, -+ r_ino = nfs_fhget(ss_mnt->mnt_root->d_inode->i_sb, src_fh, fattr, - NULL); - if (IS_ERR(r_ino)) { - res = ERR_CAST(r_ino); -@@ -354,6 +362,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, - r_ino->i_fop); - if (IS_ERR(filep)) { - res = ERR_CAST(filep); -+ iput(r_ino); - goto out_free_name; - } - filep->f_mode |= FMODE_READ; -@@ -388,6 +397,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, - out_free_name: - kfree(read_name); - out: -+ nfs_free_fattr(fattr); - return res; - out_stateowner: - nfs4_put_state_owner(sp); -diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c -index 8d8aba305ecca..ec6afd3c4bca6 100644 ---- a/fs/nfs/nfs4idmap.c -+++ b/fs/nfs/nfs4idmap.c -@@ -487,7 +487,7 @@ nfs_idmap_new(struct nfs_client *clp) - err_destroy_pipe: - rpc_destroy_pipe_data(idmap->idmap_pipe); - err: -- get_user_ns(idmap->user_ns); -+ put_user_ns(idmap->user_ns); - kfree(idmap); - return error; - } -@@ -561,22 +561,20 @@ nfs_idmap_prepare_pipe_upcall(struct idmap *idmap, - return true; - } - --static void --nfs_idmap_complete_pipe_upcall_locked(struct idmap *idmap, int ret) -+static void nfs_idmap_complete_pipe_upcall(struct idmap_legacy_upcalldata *data, -+ int ret) - { -- struct key *authkey = idmap->idmap_upcall_data->authkey; +- ret = do_linkat(lnk->old_dfd, lnk->oldpath, lnk->new_dfd, +- lnk->newpath, lnk->flags); - -- kfree(idmap->idmap_upcall_data); -- idmap->idmap_upcall_data = NULL; -- complete_request_key(authkey, ret); -- key_put(authkey); -+ complete_request_key(data->authkey, ret); -+ key_put(data->authkey); -+ kfree(data); - } - --static void --nfs_idmap_abort_pipe_upcall(struct idmap *idmap, int ret) -+static void nfs_idmap_abort_pipe_upcall(struct idmap *idmap, -+ struct idmap_legacy_upcalldata *data, -+ int ret) - { -- if (idmap->idmap_upcall_data != NULL) -- nfs_idmap_complete_pipe_upcall_locked(idmap, ret); -+ if (cmpxchg(&idmap->idmap_upcall_data, data, NULL) == data) -+ nfs_idmap_complete_pipe_upcall(data, ret); - } - - static int nfs_idmap_legacy_upcall(struct key *authkey, void *aux) -@@ -613,7 +611,7 @@ static int nfs_idmap_legacy_upcall(struct key *authkey, void *aux) - - ret = rpc_queue_upcall(idmap->idmap_pipe, msg); - if (ret < 0) -- nfs_idmap_abort_pipe_upcall(idmap, ret); -+ nfs_idmap_abort_pipe_upcall(idmap, data, ret); - - return ret; - out2: -@@ -669,6 +667,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) - struct request_key_auth *rka; - struct rpc_inode *rpci = RPC_I(file_inode(filp)); - struct idmap *idmap = (struct idmap *)rpci->private; -+ struct idmap_legacy_upcalldata *data; - struct key *authkey; - struct idmap_msg im; - size_t namelen_in; -@@ -678,10 +677,11 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) - * will have been woken up and someone else may now have used - * idmap_key_cons - so after this point we may no longer touch it. - */ -- if (idmap->idmap_upcall_data == NULL) -+ data = xchg(&idmap->idmap_upcall_data, NULL); -+ if (data == NULL) - goto out_noupcall; - -- authkey = idmap->idmap_upcall_data->authkey; -+ authkey = data->authkey; - rka = get_request_key_auth(authkey); - - if (mlen != sizeof(im)) { -@@ -703,18 +703,17 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) - if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) { - ret = -EINVAL; - goto out; +- req->flags &= ~REQ_F_NEED_CLEANUP; +- if (ret < 0) +- req_set_fail(req); +- io_req_complete(req, ret); +- return 0; -} -+ } - -- ret = nfs_idmap_read_and_verify_message(&im, -- &idmap->idmap_upcall_data->idmap_msg, -- rka->target_key, authkey); -+ ret = nfs_idmap_read_and_verify_message(&im, &data->idmap_msg, -+ rka->target_key, authkey); - if (ret >= 0) { - key_set_timeout(rka->target_key, nfs_idmap_cache_timeout); - ret = mlen; - } - - out: -- nfs_idmap_complete_pipe_upcall_locked(idmap, ret); -+ nfs_idmap_complete_pipe_upcall(data, ret); - out_noupcall: - return ret; - } -@@ -728,7 +727,7 @@ idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg) - struct idmap *idmap = data->idmap; - - if (msg->errno) -- nfs_idmap_abort_pipe_upcall(idmap, msg->errno); -+ nfs_idmap_abort_pipe_upcall(idmap, data, msg->errno); - } - - static void -@@ -736,8 +735,11 @@ idmap_release_pipe(struct inode *inode) - { - struct rpc_inode *rpci = RPC_I(inode); - struct idmap *idmap = (struct idmap *)rpci->private; -+ struct idmap_legacy_upcalldata *data; - -- nfs_idmap_abort_pipe_upcall(idmap, -EPIPE); -+ data = xchg(&idmap->idmap_upcall_data, NULL); -+ if (data) -+ nfs_idmap_complete_pipe_upcall(data, -EPIPE); - } - - int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, kuid_t *uid) -diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c -index 873342308dc0d..3680c8da510c9 100644 ---- a/fs/nfs/nfs4namespace.c -+++ b/fs/nfs/nfs4namespace.c -@@ -164,16 +164,21 @@ static int nfs4_validate_fspath(struct dentry *dentry, - return 0; - } - --static size_t nfs_parse_server_name(char *string, size_t len, -- struct sockaddr *sa, size_t salen, struct net *net) -+size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr *sa, -+ size_t salen, struct net *net, int port) - { - ssize_t ret; - - ret = rpc_pton(net, string, len, sa, salen); - if (ret == 0) { -- ret = nfs_dns_resolve_name(net, string, len, sa, salen); -- if (ret < 0) -- ret = 0; -+ ret = rpc_uaddr2sockaddr(net, string, len, sa, salen); -+ if (ret == 0) { -+ ret = nfs_dns_resolve_name(net, string, len, sa, salen); -+ if (ret < 0) -+ ret = 0; -+ } -+ } else if (port) { -+ rpc_set_port(sa, port); - } - return ret; - } -@@ -328,7 +333,7 @@ static int try_location(struct fs_context *fc, - nfs_parse_server_name(buf->data, buf->len, - &ctx->nfs_server.address, - sizeof(ctx->nfs_server._address), -- fc->net_ns); -+ fc->net_ns, 0); - if (ctx->nfs_server.addrlen == 0) - continue; - -@@ -496,7 +501,7 @@ static int nfs4_try_replacing_one_location(struct nfs_server *server, - continue; - - salen = nfs_parse_server_name(buf->data, buf->len, -- sap, addr_bufsize, net); -+ sap, addr_bufsize, net, 0); - if (salen == 0) - continue; - rpc_set_port(sap, NFS_PORT); -diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c -index e1214bb6b7ee5..a808763c52c19 100644 ---- a/fs/nfs/nfs4proc.c -+++ b/fs/nfs/nfs4proc.c -@@ -366,6 +366,14 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent - kunmap_atomic(start); - } - -+static void nfs4_fattr_set_prechange(struct nfs_fattr *fattr, u64 version) -+{ -+ if (!(fattr->valid & NFS_ATTR_FATTR_PRECHANGE)) { -+ fattr->pre_change_attr = version; -+ fattr->valid |= NFS_ATTR_FATTR_PRECHANGE; -+ } -+} -+ - static void nfs4_test_and_free_stateid(struct nfs_server *server, - nfs4_stateid *stateid, - const struct cred *cred) -@@ -779,10 +787,9 @@ static void nfs4_slot_sequence_record_sent(struct nfs4_slot *slot, - if ((s32)(seqnr - slot->seq_nr_highest_sent) > 0) - slot->seq_nr_highest_sent = seqnr; - } --static void nfs4_slot_sequence_acked(struct nfs4_slot *slot, -- u32 seqnr) -+static void nfs4_slot_sequence_acked(struct nfs4_slot *slot, u32 seqnr) - { -- slot->seq_nr_highest_sent = seqnr; -+ nfs4_slot_sequence_record_sent(slot, seqnr); - slot->seq_nr_last_acked = seqnr; - } - -@@ -849,7 +856,6 @@ static int nfs41_sequence_process(struct rpc_task *task, - __func__, - slot->slot_nr, - slot->seq_nr); -- nfs4_slot_sequence_acked(slot, slot->seq_nr); - goto out_retry; - case -NFS4ERR_RETRY_UNCACHED_REP: - case -NFS4ERR_SEQ_FALSE_RETRY: -@@ -1157,7 +1163,7 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, - { - unsigned short task_flags = 0; - -- if (server->nfs_client->cl_minorversion) -+ if (server->caps & NFS_CAP_MOVEABLE) - task_flags = RPC_TASK_MOVEABLE; - return nfs4_do_call_sync(clnt, server, msg, args, res, task_flags); - } -@@ -1232,8 +1238,7 @@ nfs4_update_changeattr_locked(struct inode *inode, - NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | - NFS_INO_INVALID_SIZE | NFS_INO_INVALID_OTHER | - NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_NLINK | -- NFS_INO_INVALID_MODE | NFS_INO_INVALID_XATTR | -- NFS_INO_REVAL_PAGECACHE; -+ NFS_INO_INVALID_MODE | NFS_INO_INVALID_XATTR; - nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); - } - nfsi->attrtimeo_timestamp = jiffies; -@@ -1609,15 +1614,16 @@ static bool nfs_stateid_is_sequential(struct nfs4_state *state, - { - if (test_bit(NFS_OPEN_STATE, &state->flags)) { - /* The common case - we're updating to a new sequence number */ -- if (nfs4_stateid_match_other(stateid, &state->open_stateid) && -- nfs4_stateid_is_next(&state->open_stateid, stateid)) { -- return true; -+ if (nfs4_stateid_match_other(stateid, &state->open_stateid)) { -+ if (nfs4_stateid_is_next(&state->open_stateid, stateid)) -+ return true; -+ return false; - } -- } else { -- /* This is the first OPEN in this generation */ -- if (stateid->seqid == cpu_to_be32(1)) -- return true; -+ /* The server returned a new stateid */ - } -+ /* This is the first OPEN in this generation */ -+ if (stateid->seqid == cpu_to_be32(1)) -+ return true; - return false; - } - -@@ -2570,7 +2576,7 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, - }; - int status; - -- if (server->nfs_client->cl_minorversion) -+ if (nfs_server_capable(dir, NFS_CAP_MOVEABLE)) - task_setup_data.flags |= RPC_TASK_MOVEABLE; - - kref_get(&data->kref); -@@ -3100,8 +3106,13 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, - } - - out: -- if (!opendata->cancelled) -+ if (!opendata->cancelled) { -+ if (opendata->lgp) { -+ nfs4_lgopen_release(opendata->lgp); -+ opendata->lgp = NULL; -+ } - nfs4_sequence_free_slot(&opendata->o_res.seq_res); -+ } - return ret; - } - -@@ -3753,7 +3764,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) - }; - int status = -ENOMEM; - -- if (server->nfs_client->cl_minorversion) +- +-static int io_shutdown_prep(struct io_kiocb *req, +- const struct io_uring_sqe *sqe) +-{ +-#if defined(CONFIG_NET) +- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) +- return -EINVAL; +- if (unlikely(sqe->ioprio || sqe->off || sqe->addr || sqe->rw_flags || +- sqe->buf_index || sqe->splice_fd_in)) +- return -EINVAL; +- +- req->shutdown.how = READ_ONCE(sqe->len); +- return 0; +-#else +- return -EOPNOTSUPP; +-#endif +-} +- +-static int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) +-{ +-#if defined(CONFIG_NET) +- struct socket *sock; +- int ret; +- +- if (issue_flags & IO_URING_F_NONBLOCK) +- return -EAGAIN; +- +- sock = sock_from_file(req->file); +- if (unlikely(!sock)) +- return -ENOTSOCK; +- +- ret = __sys_shutdown_sock(sock, req->shutdown.how); +- if (ret < 0) +- req_set_fail(req); +- io_req_complete(req, ret); +- return 0; +-#else +- return -EOPNOTSUPP; +-#endif +-} +- +-static int __io_splice_prep(struct io_kiocb *req, +- const struct io_uring_sqe *sqe) +-{ +- struct io_splice *sp = &req->splice; +- unsigned int valid_flags = SPLICE_F_FD_IN_FIXED | SPLICE_F_ALL; +- +- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) +- return -EINVAL; +- +- sp->file_in = NULL; +- sp->len = READ_ONCE(sqe->len); +- sp->flags = READ_ONCE(sqe->splice_flags); +- +- if (unlikely(sp->flags & ~valid_flags)) +- return -EINVAL; +- +- sp->file_in = io_file_get(req->ctx, req, READ_ONCE(sqe->splice_fd_in), +- (sp->flags & SPLICE_F_FD_IN_FIXED)); +- if (!sp->file_in) +- return -EBADF; +- req->flags |= REQ_F_NEED_CLEANUP; +- return 0; +-} +- +-static int io_tee_prep(struct io_kiocb *req, +- const struct io_uring_sqe *sqe) +-{ +- if (READ_ONCE(sqe->splice_off_in) || READ_ONCE(sqe->off)) +- return -EINVAL; +- return __io_splice_prep(req, sqe); +-} +- +-static int io_tee(struct io_kiocb *req, unsigned int issue_flags) +-{ +- struct io_splice *sp = &req->splice; +- struct file *in = sp->file_in; +- struct file *out = sp->file_out; +- unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED; +- long ret = 0; +- +- if (issue_flags & IO_URING_F_NONBLOCK) +- return -EAGAIN; +- if (sp->len) +- ret = do_tee(in, out, sp->len, flags); +- +- if (!(sp->flags & SPLICE_F_FD_IN_FIXED)) +- io_put_file(in); +- req->flags &= ~REQ_F_NEED_CLEANUP; +- +- if (ret != sp->len) +- req_set_fail(req); +- io_req_complete(req, ret); +- return 0; +-} +- +-static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +-{ +- struct io_splice *sp = &req->splice; +- +- sp->off_in = READ_ONCE(sqe->splice_off_in); +- sp->off_out = READ_ONCE(sqe->off); +- return __io_splice_prep(req, sqe); +-} +- +-static int io_splice(struct io_kiocb *req, unsigned int issue_flags) +-{ +- struct io_splice *sp = &req->splice; +- struct file *in = sp->file_in; +- struct file *out = sp->file_out; +- unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED; +- loff_t *poff_in, *poff_out; +- long ret = 0; +- +- if (issue_flags & IO_URING_F_NONBLOCK) +- return -EAGAIN; +- +- poff_in = (sp->off_in == -1) ? NULL : &sp->off_in; +- poff_out = (sp->off_out == -1) ? NULL : &sp->off_out; +- +- if (sp->len) +- ret = do_splice(in, poff_in, out, poff_out, sp->len, flags); +- +- if (!(sp->flags & SPLICE_F_FD_IN_FIXED)) +- io_put_file(in); +- req->flags &= ~REQ_F_NEED_CLEANUP; +- +- if (ret != sp->len) +- req_set_fail(req); +- io_req_complete(req, ret); +- return 0; +-} +- +-/* +- * IORING_OP_NOP just posts a completion event, nothing else. +- */ +-static int io_nop(struct io_kiocb *req, unsigned int issue_flags) +-{ +- struct io_ring_ctx *ctx = req->ctx; +- +- if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) +- return -EINVAL; +- +- __io_req_complete(req, issue_flags, 0, 0); +- return 0; +-} +- +-static int io_fsync_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +-{ +- struct io_ring_ctx *ctx = req->ctx; +- +- if (!req->file) +- return -EBADF; +- +- if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) +- return -EINVAL; +- if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index || +- sqe->splice_fd_in)) +- return -EINVAL; +- +- req->sync.flags = READ_ONCE(sqe->fsync_flags); +- if (unlikely(req->sync.flags & ~IORING_FSYNC_DATASYNC)) +- return -EINVAL; +- +- req->sync.off = READ_ONCE(sqe->off); +- req->sync.len = READ_ONCE(sqe->len); +- return 0; +-} +- +-static int io_fsync(struct io_kiocb *req, unsigned int issue_flags) +-{ +- loff_t end = req->sync.off + req->sync.len; +- int ret; +- +- /* fsync always requires a blocking context */ +- if (issue_flags & IO_URING_F_NONBLOCK) +- return -EAGAIN; +- +- ret = vfs_fsync_range(req->file, req->sync.off, +- end > 0 ? end : LLONG_MAX, +- req->sync.flags & IORING_FSYNC_DATASYNC); +- if (ret < 0) +- req_set_fail(req); +- io_req_complete(req, ret); +- return 0; +-} +- +-static int io_fallocate_prep(struct io_kiocb *req, +- const struct io_uring_sqe *sqe) +-{ +- if (sqe->ioprio || sqe->buf_index || sqe->rw_flags || +- sqe->splice_fd_in) +- return -EINVAL; +- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) +- return -EINVAL; +- +- req->sync.off = READ_ONCE(sqe->off); +- req->sync.len = READ_ONCE(sqe->addr); +- req->sync.mode = READ_ONCE(sqe->len); +- return 0; +-} +- +-static int io_fallocate(struct io_kiocb *req, unsigned int issue_flags) +-{ +- int ret; +- +- /* fallocate always requiring blocking context */ +- if (issue_flags & IO_URING_F_NONBLOCK) +- return -EAGAIN; +- ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off, +- req->sync.len); +- if (ret < 0) +- req_set_fail(req); +- io_req_complete(req, ret); +- return 0; +-} +- +-static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +-{ +- const char __user *fname; +- int ret; +- +- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) +- return -EINVAL; +- if (unlikely(sqe->ioprio || sqe->buf_index)) +- return -EINVAL; +- if (unlikely(req->flags & REQ_F_FIXED_FILE)) +- return -EBADF; +- +- /* open.how should be already initialised */ +- if (!(req->open.how.flags & O_PATH) && force_o_largefile()) +- req->open.how.flags |= O_LARGEFILE; +- +- req->open.dfd = READ_ONCE(sqe->fd); +- fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); +- req->open.filename = getname(fname); +- if (IS_ERR(req->open.filename)) { +- ret = PTR_ERR(req->open.filename); +- req->open.filename = NULL; +- return ret; +- } +- +- req->open.file_slot = READ_ONCE(sqe->file_index); +- if (req->open.file_slot && (req->open.how.flags & O_CLOEXEC)) +- return -EINVAL; +- +- req->open.nofile = rlimit(RLIMIT_NOFILE); +- req->flags |= REQ_F_NEED_CLEANUP; +- return 0; +-} +- +-static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +-{ +- u64 mode = READ_ONCE(sqe->len); +- u64 flags = READ_ONCE(sqe->open_flags); +- +- req->open.how = build_open_how(flags, mode); +- return __io_openat_prep(req, sqe); +-} +- +-static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +-{ +- struct open_how __user *how; +- size_t len; +- int ret; +- +- how = u64_to_user_ptr(READ_ONCE(sqe->addr2)); +- len = READ_ONCE(sqe->len); +- if (len < OPEN_HOW_SIZE_VER0) +- return -EINVAL; +- +- ret = copy_struct_from_user(&req->open.how, sizeof(req->open.how), how, +- len); +- if (ret) +- return ret; +- +- return __io_openat_prep(req, sqe); +-} +- +-static int io_openat2(struct io_kiocb *req, unsigned int issue_flags) +-{ +- struct open_flags op; +- struct file *file; +- bool resolve_nonblock, nonblock_set; +- bool fixed = !!req->open.file_slot; +- int ret; +- +- ret = build_open_flags(&req->open.how, &op); +- if (ret) +- goto err; +- nonblock_set = op.open_flag & O_NONBLOCK; +- resolve_nonblock = req->open.how.resolve & RESOLVE_CACHED; +- if (issue_flags & IO_URING_F_NONBLOCK) { +- /* +- * Don't bother trying for O_TRUNC, O_CREAT, or O_TMPFILE open, +- * it'll always -EAGAIN +- */ +- if (req->open.how.flags & (O_TRUNC | O_CREAT | O_TMPFILE)) +- return -EAGAIN; +- op.lookup_flags |= LOOKUP_CACHED; +- op.open_flag |= O_NONBLOCK; +- } +- +- if (!fixed) { +- ret = __get_unused_fd_flags(req->open.how.flags, req->open.nofile); +- if (ret < 0) +- goto err; +- } +- +- file = do_filp_open(req->open.dfd, req->open.filename, &op); +- if (IS_ERR(file)) { +- /* +- * We could hang on to this 'fd' on retrying, but seems like +- * marginal gain for something that is now known to be a slower +- * path. So just put it, and we'll get a new one when we retry. +- */ +- if (!fixed) +- put_unused_fd(ret); +- +- ret = PTR_ERR(file); +- /* only retry if RESOLVE_CACHED wasn't already set by application */ +- if (ret == -EAGAIN && +- (!resolve_nonblock && (issue_flags & IO_URING_F_NONBLOCK))) +- return -EAGAIN; +- goto err; +- } +- +- if ((issue_flags & IO_URING_F_NONBLOCK) && !nonblock_set) +- file->f_flags &= ~O_NONBLOCK; +- fsnotify_open(file); +- +- if (!fixed) +- fd_install(ret, file); +- else +- ret = io_install_fixed_file(req, file, issue_flags, +- req->open.file_slot - 1); +-err: +- putname(req->open.filename); +- req->flags &= ~REQ_F_NEED_CLEANUP; +- if (ret < 0) +- req_set_fail(req); +- __io_req_complete(req, issue_flags, ret, 0); +- return 0; +-} +- +-static int io_openat(struct io_kiocb *req, unsigned int issue_flags) +-{ +- return io_openat2(req, issue_flags); +-} +- +-static int io_remove_buffers_prep(struct io_kiocb *req, +- const struct io_uring_sqe *sqe) +-{ +- struct io_provide_buf *p = &req->pbuf; +- u64 tmp; +- +- if (sqe->ioprio || sqe->rw_flags || sqe->addr || sqe->len || sqe->off || +- sqe->splice_fd_in) +- return -EINVAL; +- +- tmp = READ_ONCE(sqe->fd); +- if (!tmp || tmp > USHRT_MAX) +- return -EINVAL; +- +- memset(p, 0, sizeof(*p)); +- p->nbufs = tmp; +- p->bgid = READ_ONCE(sqe->buf_group); +- return 0; +-} +- +-static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf, +- int bgid, unsigned nbufs) +-{ +- unsigned i = 0; +- +- /* shouldn't happen */ +- if (!nbufs) +- return 0; +- +- /* the head kbuf is the list itself */ +- while (!list_empty(&buf->list)) { +- struct io_buffer *nxt; +- +- nxt = list_first_entry(&buf->list, struct io_buffer, list); +- list_del(&nxt->list); +- kfree(nxt); +- if (++i == nbufs) +- return i; +- } +- i++; +- kfree(buf); +- xa_erase(&ctx->io_buffers, bgid); +- +- return i; +-} +- +-static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags) +-{ +- struct io_provide_buf *p = &req->pbuf; +- struct io_ring_ctx *ctx = req->ctx; +- struct io_buffer *head; +- int ret = 0; +- bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; +- +- io_ring_submit_lock(ctx, !force_nonblock); +- +- lockdep_assert_held(&ctx->uring_lock); +- +- ret = -ENOENT; +- head = xa_load(&ctx->io_buffers, p->bgid); +- if (head) +- ret = __io_remove_buffers(ctx, head, p->bgid, p->nbufs); +- if (ret < 0) +- req_set_fail(req); +- +- /* complete before unlock, IOPOLL may need the lock */ +- __io_req_complete(req, issue_flags, ret, 0); +- io_ring_submit_unlock(ctx, !force_nonblock); +- return 0; +-} +- +-static int io_provide_buffers_prep(struct io_kiocb *req, +- const struct io_uring_sqe *sqe) +-{ +- unsigned long size, tmp_check; +- struct io_provide_buf *p = &req->pbuf; +- u64 tmp; +- +- if (sqe->ioprio || sqe->rw_flags || sqe->splice_fd_in) +- return -EINVAL; +- +- tmp = READ_ONCE(sqe->fd); +- if (!tmp || tmp > USHRT_MAX) +- return -E2BIG; +- p->nbufs = tmp; +- p->addr = READ_ONCE(sqe->addr); +- p->len = READ_ONCE(sqe->len); +- +- if (check_mul_overflow((unsigned long)p->len, (unsigned long)p->nbufs, +- &size)) +- return -EOVERFLOW; +- if (check_add_overflow((unsigned long)p->addr, size, &tmp_check)) +- return -EOVERFLOW; +- +- size = (unsigned long)p->len * p->nbufs; +- if (!access_ok(u64_to_user_ptr(p->addr), size)) +- return -EFAULT; +- +- p->bgid = READ_ONCE(sqe->buf_group); +- tmp = READ_ONCE(sqe->off); +- if (tmp > USHRT_MAX) +- return -E2BIG; +- p->bid = tmp; +- return 0; +-} +- +-static int io_add_buffers(struct io_provide_buf *pbuf, struct io_buffer **head) +-{ +- struct io_buffer *buf; +- u64 addr = pbuf->addr; +- int i, bid = pbuf->bid; +- +- for (i = 0; i < pbuf->nbufs; i++) { +- buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT); +- if (!buf) +- break; +- +- buf->addr = addr; +- buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT); +- buf->bid = bid; +- addr += pbuf->len; +- bid++; +- if (!*head) { +- INIT_LIST_HEAD(&buf->list); +- *head = buf; +- } else { +- list_add_tail(&buf->list, &(*head)->list); +- } +- } +- +- return i ? i : -ENOMEM; +-} +- +-static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags) +-{ +- struct io_provide_buf *p = &req->pbuf; +- struct io_ring_ctx *ctx = req->ctx; +- struct io_buffer *head, *list; +- int ret = 0; +- bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; +- +- io_ring_submit_lock(ctx, !force_nonblock); +- +- lockdep_assert_held(&ctx->uring_lock); +- +- list = head = xa_load(&ctx->io_buffers, p->bgid); +- +- ret = io_add_buffers(p, &head); +- if (ret >= 0 && !list) { +- ret = xa_insert(&ctx->io_buffers, p->bgid, head, GFP_KERNEL); +- if (ret < 0) +- __io_remove_buffers(ctx, head, p->bgid, -1U); +- } +- if (ret < 0) +- req_set_fail(req); +- /* complete before unlock, IOPOLL may need the lock */ +- __io_req_complete(req, issue_flags, ret, 0); +- io_ring_submit_unlock(ctx, !force_nonblock); +- return 0; +-} +- +-static int io_epoll_ctl_prep(struct io_kiocb *req, +- const struct io_uring_sqe *sqe) +-{ +-#if defined(CONFIG_EPOLL) +- if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in) +- return -EINVAL; +- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) +- return -EINVAL; +- +- req->epoll.epfd = READ_ONCE(sqe->fd); +- req->epoll.op = READ_ONCE(sqe->len); +- req->epoll.fd = READ_ONCE(sqe->off); +- +- if (ep_op_has_event(req->epoll.op)) { +- struct epoll_event __user *ev; +- +- ev = u64_to_user_ptr(READ_ONCE(sqe->addr)); +- if (copy_from_user(&req->epoll.event, ev, sizeof(*ev))) +- return -EFAULT; +- } +- +- return 0; +-#else +- return -EOPNOTSUPP; +-#endif +-} +- +-static int io_epoll_ctl(struct io_kiocb *req, unsigned int issue_flags) +-{ +-#if defined(CONFIG_EPOLL) +- struct io_epoll *ie = &req->epoll; +- int ret; +- bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; +- +- ret = do_epoll_ctl(ie->epfd, ie->op, ie->fd, &ie->event, force_nonblock); +- if (force_nonblock && ret == -EAGAIN) +- return -EAGAIN; +- +- if (ret < 0) +- req_set_fail(req); +- __io_req_complete(req, issue_flags, ret, 0); +- return 0; +-#else +- return -EOPNOTSUPP; +-#endif +-} +- +-static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +-{ +-#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU) +- if (sqe->ioprio || sqe->buf_index || sqe->off || sqe->splice_fd_in) +- return -EINVAL; +- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) +- return -EINVAL; +- +- req->madvise.addr = READ_ONCE(sqe->addr); +- req->madvise.len = READ_ONCE(sqe->len); +- req->madvise.advice = READ_ONCE(sqe->fadvise_advice); +- return 0; +-#else +- return -EOPNOTSUPP; +-#endif +-} +- +-static int io_madvise(struct io_kiocb *req, unsigned int issue_flags) +-{ +-#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU) +- struct io_madvise *ma = &req->madvise; +- int ret; +- +- if (issue_flags & IO_URING_F_NONBLOCK) +- return -EAGAIN; +- +- ret = do_madvise(current->mm, ma->addr, ma->len, ma->advice); +- if (ret < 0) +- req_set_fail(req); +- io_req_complete(req, ret); +- return 0; +-#else +- return -EOPNOTSUPP; +-#endif +-} +- +-static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +-{ +- if (sqe->ioprio || sqe->buf_index || sqe->addr || sqe->splice_fd_in) +- return -EINVAL; +- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) +- return -EINVAL; +- +- req->fadvise.offset = READ_ONCE(sqe->off); +- req->fadvise.len = READ_ONCE(sqe->len); +- req->fadvise.advice = READ_ONCE(sqe->fadvise_advice); +- return 0; +-} +- +-static int io_fadvise(struct io_kiocb *req, unsigned int issue_flags) +-{ +- struct io_fadvise *fa = &req->fadvise; +- int ret; +- +- if (issue_flags & IO_URING_F_NONBLOCK) { +- switch (fa->advice) { +- case POSIX_FADV_NORMAL: +- case POSIX_FADV_RANDOM: +- case POSIX_FADV_SEQUENTIAL: +- break; +- default: +- return -EAGAIN; +- } +- } +- +- ret = vfs_fadvise(req->file, fa->offset, fa->len, fa->advice); +- if (ret < 0) +- req_set_fail(req); +- __io_req_complete(req, issue_flags, ret, 0); +- return 0; +-} +- +-static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +-{ +- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) +- return -EINVAL; +- if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in) +- return -EINVAL; +- if (req->flags & REQ_F_FIXED_FILE) +- return -EBADF; +- +- req->statx.dfd = READ_ONCE(sqe->fd); +- req->statx.mask = READ_ONCE(sqe->len); +- req->statx.filename = u64_to_user_ptr(READ_ONCE(sqe->addr)); +- req->statx.buffer = u64_to_user_ptr(READ_ONCE(sqe->addr2)); +- req->statx.flags = READ_ONCE(sqe->statx_flags); +- +- return 0; +-} +- +-static int io_statx(struct io_kiocb *req, unsigned int issue_flags) +-{ +- struct io_statx *ctx = &req->statx; +- int ret; +- +- if (issue_flags & IO_URING_F_NONBLOCK) +- return -EAGAIN; +- +- ret = do_statx(ctx->dfd, ctx->filename, ctx->flags, ctx->mask, +- ctx->buffer); +- +- if (ret < 0) +- req_set_fail(req); +- io_req_complete(req, ret); +- return 0; +-} +- +-static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +-{ +- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) +- return -EINVAL; +- if (sqe->ioprio || sqe->off || sqe->addr || sqe->len || +- sqe->rw_flags || sqe->buf_index) +- return -EINVAL; +- if (req->flags & REQ_F_FIXED_FILE) +- return -EBADF; +- +- req->close.fd = READ_ONCE(sqe->fd); +- req->close.file_slot = READ_ONCE(sqe->file_index); +- if (req->close.file_slot && req->close.fd) +- return -EINVAL; +- +- return 0; +-} +- +-static int io_close(struct io_kiocb *req, unsigned int issue_flags) +-{ +- struct files_struct *files = current->files; +- struct io_close *close = &req->close; +- struct fdtable *fdt; +- struct file *file = NULL; +- int ret = -EBADF; +- +- if (req->close.file_slot) { +- ret = io_close_fixed(req, issue_flags); +- goto err; +- } +- +- spin_lock(&files->file_lock); +- fdt = files_fdtable(files); +- if (close->fd >= fdt->max_fds) { +- spin_unlock(&files->file_lock); +- goto err; +- } +- file = fdt->fd[close->fd]; +- if (!file || file->f_op == &io_uring_fops) { +- spin_unlock(&files->file_lock); +- file = NULL; +- goto err; +- } +- +- /* if the file has a flush method, be safe and punt to async */ +- if (file->f_op->flush && (issue_flags & IO_URING_F_NONBLOCK)) { +- spin_unlock(&files->file_lock); +- return -EAGAIN; +- } +- +- ret = __close_fd_get_file(close->fd, &file); +- spin_unlock(&files->file_lock); +- if (ret < 0) { +- if (ret == -ENOENT) +- ret = -EBADF; +- goto err; +- } +- +- /* No ->flush() or already async, safely close from here */ +- ret = filp_close(file, current->files); +-err: +- if (ret < 0) +- req_set_fail(req); +- if (file) +- fput(file); +- __io_req_complete(req, issue_flags, ret, 0); +- return 0; +-} +- +-static int io_sfr_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +-{ +- struct io_ring_ctx *ctx = req->ctx; +- +- if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) +- return -EINVAL; +- if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index || +- sqe->splice_fd_in)) +- return -EINVAL; +- +- req->sync.off = READ_ONCE(sqe->off); +- req->sync.len = READ_ONCE(sqe->len); +- req->sync.flags = READ_ONCE(sqe->sync_range_flags); +- return 0; +-} +- +-static int io_sync_file_range(struct io_kiocb *req, unsigned int issue_flags) +-{ +- int ret; +- +- /* sync_file_range always requires a blocking context */ +- if (issue_flags & IO_URING_F_NONBLOCK) +- return -EAGAIN; +- +- ret = sync_file_range(req->file, req->sync.off, req->sync.len, +- req->sync.flags); +- if (ret < 0) +- req_set_fail(req); +- io_req_complete(req, ret); +- return 0; +-} +- +-#if defined(CONFIG_NET) +-static int io_setup_async_msg(struct io_kiocb *req, +- struct io_async_msghdr *kmsg) +-{ +- struct io_async_msghdr *async_msg = req->async_data; +- +- if (async_msg) +- return -EAGAIN; +- if (io_alloc_async_data(req)) { +- kfree(kmsg->free_iov); +- return -ENOMEM; +- } +- async_msg = req->async_data; +- req->flags |= REQ_F_NEED_CLEANUP; +- memcpy(async_msg, kmsg, sizeof(*kmsg)); +- async_msg->msg.msg_name = &async_msg->addr; +- /* if were using fast_iov, set it to the new one */ +- if (!async_msg->free_iov) +- async_msg->msg.msg_iter.iov = async_msg->fast_iov; +- +- return -EAGAIN; +-} +- +-static int io_sendmsg_copy_hdr(struct io_kiocb *req, +- struct io_async_msghdr *iomsg) +-{ +- iomsg->msg.msg_name = &iomsg->addr; +- iomsg->free_iov = iomsg->fast_iov; +- return sendmsg_copy_msghdr(&iomsg->msg, req->sr_msg.umsg, +- req->sr_msg.msg_flags, &iomsg->free_iov); +-} +- +-static int io_sendmsg_prep_async(struct io_kiocb *req) +-{ +- int ret; +- +- ret = io_sendmsg_copy_hdr(req, req->async_data); +- if (!ret) +- req->flags |= REQ_F_NEED_CLEANUP; +- return ret; +-} +- +-static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +-{ +- struct io_sr_msg *sr = &req->sr_msg; +- +- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) +- return -EINVAL; +- +- sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); +- sr->len = READ_ONCE(sqe->len); +- sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; +- if (sr->msg_flags & MSG_DONTWAIT) +- req->flags |= REQ_F_NOWAIT; +- +-#ifdef CONFIG_COMPAT +- if (req->ctx->compat) +- sr->msg_flags |= MSG_CMSG_COMPAT; +-#endif +- return 0; +-} +- +-static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) +-{ +- struct io_async_msghdr iomsg, *kmsg; +- struct socket *sock; +- unsigned flags; +- int min_ret = 0; +- int ret; +- +- sock = sock_from_file(req->file); +- if (unlikely(!sock)) +- return -ENOTSOCK; +- +- kmsg = req->async_data; +- if (!kmsg) { +- ret = io_sendmsg_copy_hdr(req, &iomsg); +- if (ret) +- return ret; +- kmsg = &iomsg; +- } +- +- flags = req->sr_msg.msg_flags; +- if (issue_flags & IO_URING_F_NONBLOCK) +- flags |= MSG_DONTWAIT; +- if (flags & MSG_WAITALL) +- min_ret = iov_iter_count(&kmsg->msg.msg_iter); +- +- ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); +- if ((issue_flags & IO_URING_F_NONBLOCK) && ret == -EAGAIN) +- return io_setup_async_msg(req, kmsg); +- if (ret == -ERESTARTSYS) +- ret = -EINTR; +- +- /* fast path, check for non-NULL to avoid function call */ +- if (kmsg->free_iov) +- kfree(kmsg->free_iov); +- req->flags &= ~REQ_F_NEED_CLEANUP; +- if (ret < min_ret) +- req_set_fail(req); +- __io_req_complete(req, issue_flags, ret, 0); +- return 0; +-} +- +-static int io_send(struct io_kiocb *req, unsigned int issue_flags) +-{ +- struct io_sr_msg *sr = &req->sr_msg; +- struct msghdr msg; +- struct iovec iov; +- struct socket *sock; +- unsigned flags; +- int min_ret = 0; +- int ret; +- +- sock = sock_from_file(req->file); +- if (unlikely(!sock)) +- return -ENOTSOCK; +- +- ret = import_single_range(WRITE, sr->buf, sr->len, &iov, &msg.msg_iter); +- if (unlikely(ret)) +- return ret; +- +- msg.msg_name = NULL; +- msg.msg_control = NULL; +- msg.msg_controllen = 0; +- msg.msg_namelen = 0; +- +- flags = req->sr_msg.msg_flags; +- if (issue_flags & IO_URING_F_NONBLOCK) +- flags |= MSG_DONTWAIT; +- if (flags & MSG_WAITALL) +- min_ret = iov_iter_count(&msg.msg_iter); +- +- msg.msg_flags = flags; +- ret = sock_sendmsg(sock, &msg); +- if ((issue_flags & IO_URING_F_NONBLOCK) && ret == -EAGAIN) +- return -EAGAIN; +- if (ret == -ERESTARTSYS) +- ret = -EINTR; +- +- if (ret < min_ret) +- req_set_fail(req); +- __io_req_complete(req, issue_flags, ret, 0); +- return 0; +-} +- +-static int __io_recvmsg_copy_hdr(struct io_kiocb *req, +- struct io_async_msghdr *iomsg) +-{ +- struct io_sr_msg *sr = &req->sr_msg; +- struct iovec __user *uiov; +- size_t iov_len; +- int ret; +- +- ret = __copy_msghdr_from_user(&iomsg->msg, sr->umsg, +- &iomsg->uaddr, &uiov, &iov_len); +- if (ret) +- return ret; +- +- if (req->flags & REQ_F_BUFFER_SELECT) { +- if (iov_len > 1) +- return -EINVAL; +- if (copy_from_user(iomsg->fast_iov, uiov, sizeof(*uiov))) +- return -EFAULT; +- sr->len = iomsg->fast_iov[0].iov_len; +- iomsg->free_iov = NULL; +- } else { +- iomsg->free_iov = iomsg->fast_iov; +- ret = __import_iovec(READ, uiov, iov_len, UIO_FASTIOV, +- &iomsg->free_iov, &iomsg->msg.msg_iter, +- false); +- if (ret > 0) +- ret = 0; +- } +- +- return ret; +-} +- +-#ifdef CONFIG_COMPAT +-static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req, +- struct io_async_msghdr *iomsg) +-{ +- struct io_sr_msg *sr = &req->sr_msg; +- struct compat_iovec __user *uiov; +- compat_uptr_t ptr; +- compat_size_t len; +- int ret; +- +- ret = __get_compat_msghdr(&iomsg->msg, sr->umsg_compat, &iomsg->uaddr, +- &ptr, &len); +- if (ret) +- return ret; +- +- uiov = compat_ptr(ptr); +- if (req->flags & REQ_F_BUFFER_SELECT) { +- compat_ssize_t clen; +- +- if (len > 1) +- return -EINVAL; +- if (!access_ok(uiov, sizeof(*uiov))) +- return -EFAULT; +- if (__get_user(clen, &uiov->iov_len)) +- return -EFAULT; +- if (clen < 0) +- return -EINVAL; +- sr->len = clen; +- iomsg->free_iov = NULL; +- } else { +- iomsg->free_iov = iomsg->fast_iov; +- ret = __import_iovec(READ, (struct iovec __user *)uiov, len, +- UIO_FASTIOV, &iomsg->free_iov, +- &iomsg->msg.msg_iter, true); +- if (ret < 0) +- return ret; +- } +- +- return 0; +-} +-#endif +- +-static int io_recvmsg_copy_hdr(struct io_kiocb *req, +- struct io_async_msghdr *iomsg) +-{ +- iomsg->msg.msg_name = &iomsg->addr; +- +-#ifdef CONFIG_COMPAT +- if (req->ctx->compat) +- return __io_compat_recvmsg_copy_hdr(req, iomsg); +-#endif +- +- return __io_recvmsg_copy_hdr(req, iomsg); +-} +- +-static struct io_buffer *io_recv_buffer_select(struct io_kiocb *req, +- bool needs_lock) +-{ +- struct io_sr_msg *sr = &req->sr_msg; +- struct io_buffer *kbuf; +- +- kbuf = io_buffer_select(req, &sr->len, sr->bgid, sr->kbuf, needs_lock); +- if (IS_ERR(kbuf)) +- return kbuf; +- +- sr->kbuf = kbuf; +- req->flags |= REQ_F_BUFFER_SELECTED; +- return kbuf; +-} +- +-static inline unsigned int io_put_recv_kbuf(struct io_kiocb *req) +-{ +- return io_put_kbuf(req, req->sr_msg.kbuf); +-} +- +-static int io_recvmsg_prep_async(struct io_kiocb *req) +-{ +- int ret; +- +- ret = io_recvmsg_copy_hdr(req, req->async_data); +- if (!ret) +- req->flags |= REQ_F_NEED_CLEANUP; +- return ret; +-} +- +-static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +-{ +- struct io_sr_msg *sr = &req->sr_msg; +- +- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) +- return -EINVAL; +- +- sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); +- sr->len = READ_ONCE(sqe->len); +- sr->bgid = READ_ONCE(sqe->buf_group); +- sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; +- if (sr->msg_flags & MSG_DONTWAIT) +- req->flags |= REQ_F_NOWAIT; +- +-#ifdef CONFIG_COMPAT +- if (req->ctx->compat) +- sr->msg_flags |= MSG_CMSG_COMPAT; +-#endif +- return 0; +-} +- +-static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) +-{ +- struct io_async_msghdr iomsg, *kmsg; +- struct socket *sock; +- struct io_buffer *kbuf; +- unsigned flags; +- int min_ret = 0; +- int ret, cflags = 0; +- bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; +- +- sock = sock_from_file(req->file); +- if (unlikely(!sock)) +- return -ENOTSOCK; +- +- kmsg = req->async_data; +- if (!kmsg) { +- ret = io_recvmsg_copy_hdr(req, &iomsg); +- if (ret) +- return ret; +- kmsg = &iomsg; +- } +- +- if (req->flags & REQ_F_BUFFER_SELECT) { +- kbuf = io_recv_buffer_select(req, !force_nonblock); +- if (IS_ERR(kbuf)) +- return PTR_ERR(kbuf); +- kmsg->fast_iov[0].iov_base = u64_to_user_ptr(kbuf->addr); +- kmsg->fast_iov[0].iov_len = req->sr_msg.len; +- iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->fast_iov, +- 1, req->sr_msg.len); +- } +- +- flags = req->sr_msg.msg_flags; +- if (force_nonblock) +- flags |= MSG_DONTWAIT; +- if (flags & MSG_WAITALL) +- min_ret = iov_iter_count(&kmsg->msg.msg_iter); +- +- ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.umsg, +- kmsg->uaddr, flags); +- if (force_nonblock && ret == -EAGAIN) +- return io_setup_async_msg(req, kmsg); +- if (ret == -ERESTARTSYS) +- ret = -EINTR; +- +- if (req->flags & REQ_F_BUFFER_SELECTED) +- cflags = io_put_recv_kbuf(req); +- /* fast path, check for non-NULL to avoid function call */ +- if (kmsg->free_iov) +- kfree(kmsg->free_iov); +- req->flags &= ~REQ_F_NEED_CLEANUP; +- if (ret < min_ret || ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)))) +- req_set_fail(req); +- __io_req_complete(req, issue_flags, ret, cflags); +- return 0; +-} +- +-static int io_recv(struct io_kiocb *req, unsigned int issue_flags) +-{ +- struct io_buffer *kbuf; +- struct io_sr_msg *sr = &req->sr_msg; +- struct msghdr msg; +- void __user *buf = sr->buf; +- struct socket *sock; +- struct iovec iov; +- unsigned flags; +- int min_ret = 0; +- int ret, cflags = 0; +- bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; +- +- sock = sock_from_file(req->file); +- if (unlikely(!sock)) +- return -ENOTSOCK; +- +- if (req->flags & REQ_F_BUFFER_SELECT) { +- kbuf = io_recv_buffer_select(req, !force_nonblock); +- if (IS_ERR(kbuf)) +- return PTR_ERR(kbuf); +- buf = u64_to_user_ptr(kbuf->addr); +- } +- +- ret = import_single_range(READ, buf, sr->len, &iov, &msg.msg_iter); +- if (unlikely(ret)) +- goto out_free; +- +- msg.msg_name = NULL; +- msg.msg_control = NULL; +- msg.msg_controllen = 0; +- msg.msg_namelen = 0; +- msg.msg_iocb = NULL; +- msg.msg_flags = 0; +- +- flags = req->sr_msg.msg_flags; +- if (force_nonblock) +- flags |= MSG_DONTWAIT; +- if (flags & MSG_WAITALL) +- min_ret = iov_iter_count(&msg.msg_iter); +- +- ret = sock_recvmsg(sock, &msg, flags); +- if (force_nonblock && ret == -EAGAIN) +- return -EAGAIN; +- if (ret == -ERESTARTSYS) +- ret = -EINTR; +-out_free: +- if (req->flags & REQ_F_BUFFER_SELECTED) +- cflags = io_put_recv_kbuf(req); +- if (ret < min_ret || ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)))) +- req_set_fail(req); +- __io_req_complete(req, issue_flags, ret, cflags); +- return 0; +-} +- +-static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +-{ +- struct io_accept *accept = &req->accept; +- +- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) +- return -EINVAL; +- if (sqe->ioprio || sqe->len || sqe->buf_index) +- return -EINVAL; +- +- accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); +- accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); +- accept->flags = READ_ONCE(sqe->accept_flags); +- accept->nofile = rlimit(RLIMIT_NOFILE); +- +- accept->file_slot = READ_ONCE(sqe->file_index); +- if (accept->file_slot && ((req->open.how.flags & O_CLOEXEC) || +- (accept->flags & SOCK_CLOEXEC))) +- return -EINVAL; +- if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) +- return -EINVAL; +- if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK)) +- accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK; +- return 0; +-} +- +-static int io_accept(struct io_kiocb *req, unsigned int issue_flags) +-{ +- struct io_accept *accept = &req->accept; +- bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; +- unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0; +- bool fixed = !!accept->file_slot; +- struct file *file; +- int ret, fd; +- +- if (req->file->f_flags & O_NONBLOCK) +- req->flags |= REQ_F_NOWAIT; +- +- if (!fixed) { +- fd = __get_unused_fd_flags(accept->flags, accept->nofile); +- if (unlikely(fd < 0)) +- return fd; +- } +- file = do_accept(req->file, file_flags, accept->addr, accept->addr_len, +- accept->flags); +- if (IS_ERR(file)) { +- if (!fixed) +- put_unused_fd(fd); +- ret = PTR_ERR(file); +- if (ret == -EAGAIN && force_nonblock) +- return -EAGAIN; +- if (ret == -ERESTARTSYS) +- ret = -EINTR; +- req_set_fail(req); +- } else if (!fixed) { +- fd_install(fd, file); +- ret = fd; +- } else { +- ret = io_install_fixed_file(req, file, issue_flags, +- accept->file_slot - 1); +- } +- __io_req_complete(req, issue_flags, ret, 0); +- return 0; +-} +- +-static int io_connect_prep_async(struct io_kiocb *req) +-{ +- struct io_async_connect *io = req->async_data; +- struct io_connect *conn = &req->connect; +- +- return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address); +-} +- +-static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +-{ +- struct io_connect *conn = &req->connect; +- +- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) +- return -EINVAL; +- if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags || +- sqe->splice_fd_in) +- return -EINVAL; +- +- conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); +- conn->addr_len = READ_ONCE(sqe->addr2); +- return 0; +-} +- +-static int io_connect(struct io_kiocb *req, unsigned int issue_flags) +-{ +- struct io_async_connect __io, *io; +- unsigned file_flags; +- int ret; +- bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; +- +- if (req->async_data) { +- io = req->async_data; +- } else { +- ret = move_addr_to_kernel(req->connect.addr, +- req->connect.addr_len, +- &__io.address); +- if (ret) +- goto out; +- io = &__io; +- } +- +- file_flags = force_nonblock ? O_NONBLOCK : 0; +- +- ret = __sys_connect_file(req->file, &io->address, +- req->connect.addr_len, file_flags); +- if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) { +- if (req->async_data) +- return -EAGAIN; +- if (io_alloc_async_data(req)) { +- ret = -ENOMEM; +- goto out; +- } +- memcpy(req->async_data, &__io, sizeof(__io)); +- return -EAGAIN; +- } +- if (ret == -ERESTARTSYS) +- ret = -EINTR; +-out: +- if (ret < 0) +- req_set_fail(req); +- __io_req_complete(req, issue_flags, ret, 0); +- return 0; +-} +-#else /* !CONFIG_NET */ +-#define IO_NETOP_FN(op) \ +-static int io_##op(struct io_kiocb *req, unsigned int issue_flags) \ +-{ \ +- return -EOPNOTSUPP; \ +-} +- +-#define IO_NETOP_PREP(op) \ +-IO_NETOP_FN(op) \ +-static int io_##op##_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) \ +-{ \ +- return -EOPNOTSUPP; \ +-} \ +- +-#define IO_NETOP_PREP_ASYNC(op) \ +-IO_NETOP_PREP(op) \ +-static int io_##op##_prep_async(struct io_kiocb *req) \ +-{ \ +- return -EOPNOTSUPP; \ +-} +- +-IO_NETOP_PREP_ASYNC(sendmsg); +-IO_NETOP_PREP_ASYNC(recvmsg); +-IO_NETOP_PREP_ASYNC(connect); +-IO_NETOP_PREP(accept); +-IO_NETOP_FN(send); +-IO_NETOP_FN(recv); +-#endif /* CONFIG_NET */ +- +-struct io_poll_table { +- struct poll_table_struct pt; +- struct io_kiocb *req; +- int nr_entries; +- int error; +-}; +- +-static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll, +- __poll_t mask, io_req_tw_func_t func) +-{ +- /* for instances that support it check for an event match first: */ +- if (mask && !(mask & poll->events)) +- return 0; +- +- trace_io_uring_task_add(req->ctx, req->opcode, req->user_data, mask); +- +- list_del_init(&poll->wait.entry); +- +- req->result = mask; +- req->io_task_work.func = func; +- +- /* +- * If this fails, then the task is exiting. When a task exits, the +- * work gets canceled, so just cancel this request as well instead +- * of executing it. We can't safely execute it anyway, as we may not +- * have the needed state needed for it anyway. +- */ +- io_req_task_work_add(req); +- return 1; +-} +- +-static bool io_poll_rewait(struct io_kiocb *req, struct io_poll_iocb *poll) +- __acquires(&req->ctx->completion_lock) +-{ +- struct io_ring_ctx *ctx = req->ctx; +- +- /* req->task == current here, checking PF_EXITING is safe */ +- if (unlikely(req->task->flags & PF_EXITING)) +- WRITE_ONCE(poll->canceled, true); +- +- if (!req->result && !READ_ONCE(poll->canceled)) { +- struct poll_table_struct pt = { ._key = poll->events }; +- +- req->result = vfs_poll(req->file, &pt) & poll->events; +- } +- +- spin_lock(&ctx->completion_lock); +- if (!req->result && !READ_ONCE(poll->canceled)) { +- add_wait_queue(poll->head, &poll->wait); +- return true; +- } +- +- return false; +-} +- +-static struct io_poll_iocb *io_poll_get_double(struct io_kiocb *req) +-{ +- /* pure poll stashes this in ->async_data, poll driven retry elsewhere */ +- if (req->opcode == IORING_OP_POLL_ADD) +- return req->async_data; +- return req->apoll->double_poll; +-} +- +-static struct io_poll_iocb *io_poll_get_single(struct io_kiocb *req) +-{ +- if (req->opcode == IORING_OP_POLL_ADD) +- return &req->poll; +- return &req->apoll->poll; +-} +- +-static void io_poll_remove_double(struct io_kiocb *req) +- __must_hold(&req->ctx->completion_lock) +-{ +- struct io_poll_iocb *poll = io_poll_get_double(req); +- +- lockdep_assert_held(&req->ctx->completion_lock); +- +- if (poll && poll->head) { +- struct wait_queue_head *head = poll->head; +- +- spin_lock_irq(&head->lock); +- list_del_init(&poll->wait.entry); +- if (poll->wait.private) +- req_ref_put(req); +- poll->head = NULL; +- spin_unlock_irq(&head->lock); +- } +-} +- +-static bool __io_poll_complete(struct io_kiocb *req, __poll_t mask) +- __must_hold(&req->ctx->completion_lock) +-{ +- struct io_ring_ctx *ctx = req->ctx; +- unsigned flags = IORING_CQE_F_MORE; +- int error; +- +- if (READ_ONCE(req->poll.canceled)) { +- error = -ECANCELED; +- req->poll.events |= EPOLLONESHOT; +- } else { +- error = mangle_poll(mask); +- } +- if (req->poll.events & EPOLLONESHOT) +- flags = 0; +- if (!io_cqring_fill_event(ctx, req->user_data, error, flags)) { +- req->poll.events |= EPOLLONESHOT; +- flags = 0; +- } +- if (flags & IORING_CQE_F_MORE) +- ctx->cq_extra++; +- +- return !(flags & IORING_CQE_F_MORE); +-} +- +-static inline bool io_poll_complete(struct io_kiocb *req, __poll_t mask) +- __must_hold(&req->ctx->completion_lock) +-{ +- bool done; +- +- done = __io_poll_complete(req, mask); +- io_commit_cqring(req->ctx); +- return done; +-} +- +-static void io_poll_task_func(struct io_kiocb *req, bool *locked) +-{ +- struct io_ring_ctx *ctx = req->ctx; +- struct io_kiocb *nxt; +- +- if (io_poll_rewait(req, &req->poll)) { +- spin_unlock(&ctx->completion_lock); +- } else { +- bool done; +- +- if (req->poll.done) { +- spin_unlock(&ctx->completion_lock); +- return; +- } +- done = __io_poll_complete(req, req->result); +- if (done) { +- io_poll_remove_double(req); +- hash_del(&req->hash_node); +- req->poll.done = true; +- } else { +- req->result = 0; +- add_wait_queue(req->poll.head, &req->poll.wait); +- } +- io_commit_cqring(ctx); +- spin_unlock(&ctx->completion_lock); +- io_cqring_ev_posted(ctx); +- +- if (done) { +- nxt = io_put_req_find_next(req); +- if (nxt) +- io_req_task_submit(nxt, locked); +- } +- } +-} +- +-static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode, +- int sync, void *key) +-{ +- struct io_kiocb *req = wait->private; +- struct io_poll_iocb *poll = io_poll_get_single(req); +- __poll_t mask = key_to_poll(key); +- unsigned long flags; +- +- /* for instances that support it check for an event match first: */ +- if (mask && !(mask & poll->events)) +- return 0; +- if (!(poll->events & EPOLLONESHOT)) +- return poll->wait.func(&poll->wait, mode, sync, key); +- +- list_del_init(&wait->entry); +- +- if (poll->head) { +- bool done; +- +- spin_lock_irqsave(&poll->head->lock, flags); +- done = list_empty(&poll->wait.entry); +- if (!done) +- list_del_init(&poll->wait.entry); +- /* make sure double remove sees this as being gone */ +- wait->private = NULL; +- spin_unlock_irqrestore(&poll->head->lock, flags); +- if (!done) { +- /* use wait func handler, so it matches the rq type */ +- poll->wait.func(&poll->wait, mode, sync, key); +- } +- } +- req_ref_put(req); +- return 1; +-} +- +-static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events, +- wait_queue_func_t wake_func) +-{ +- poll->head = NULL; +- poll->done = false; +- poll->canceled = false; +-#define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP) +- /* mask in events that we always want/need */ +- poll->events = events | IO_POLL_UNMASK; +- INIT_LIST_HEAD(&poll->wait.entry); +- init_waitqueue_func_entry(&poll->wait, wake_func); +-} +- +-static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, +- struct wait_queue_head *head, +- struct io_poll_iocb **poll_ptr) +-{ +- struct io_kiocb *req = pt->req; +- +- /* +- * The file being polled uses multiple waitqueues for poll handling +- * (e.g. one for read, one for write). Setup a separate io_poll_iocb +- * if this happens. +- */ +- if (unlikely(pt->nr_entries)) { +- struct io_poll_iocb *poll_one = poll; +- +- /* double add on the same waitqueue head, ignore */ +- if (poll_one->head == head) +- return; +- /* already have a 2nd entry, fail a third attempt */ +- if (*poll_ptr) { +- if ((*poll_ptr)->head == head) +- return; +- pt->error = -EINVAL; +- return; +- } +- /* +- * Can't handle multishot for double wait for now, turn it +- * into one-shot mode. +- */ +- if (!(poll_one->events & EPOLLONESHOT)) +- poll_one->events |= EPOLLONESHOT; +- poll = kmalloc(sizeof(*poll), GFP_ATOMIC); +- if (!poll) { +- pt->error = -ENOMEM; +- return; +- } +- io_init_poll_iocb(poll, poll_one->events, io_poll_double_wake); +- req_ref_get(req); +- poll->wait.private = req; +- *poll_ptr = poll; +- } +- +- pt->nr_entries++; +- poll->head = head; +- +- if (poll->events & EPOLLEXCLUSIVE) +- add_wait_queue_exclusive(head, &poll->wait); +- else +- add_wait_queue(head, &poll->wait); +-} +- +-static void io_async_queue_proc(struct file *file, struct wait_queue_head *head, +- struct poll_table_struct *p) +-{ +- struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); +- struct async_poll *apoll = pt->req->apoll; +- +- __io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll); +-} +- +-static void io_async_task_func(struct io_kiocb *req, bool *locked) +-{ +- struct async_poll *apoll = req->apoll; +- struct io_ring_ctx *ctx = req->ctx; +- +- trace_io_uring_task_run(req->ctx, req, req->opcode, req->user_data); +- +- if (io_poll_rewait(req, &apoll->poll)) { +- spin_unlock(&ctx->completion_lock); +- return; +- } +- +- hash_del(&req->hash_node); +- io_poll_remove_double(req); +- apoll->poll.done = true; +- spin_unlock(&ctx->completion_lock); +- +- if (!READ_ONCE(apoll->poll.canceled)) +- io_req_task_submit(req, locked); +- else +- io_req_complete_failed(req, -ECANCELED); +-} +- +-static int io_async_wake(struct wait_queue_entry *wait, unsigned mode, int sync, +- void *key) +-{ +- struct io_kiocb *req = wait->private; +- struct io_poll_iocb *poll = &req->apoll->poll; +- +- trace_io_uring_poll_wake(req->ctx, req->opcode, req->user_data, +- key_to_poll(key)); +- +- return __io_async_wake(req, poll, key_to_poll(key), io_async_task_func); +-} +- +-static void io_poll_req_insert(struct io_kiocb *req) +-{ +- struct io_ring_ctx *ctx = req->ctx; +- struct hlist_head *list; +- +- list = &ctx->cancel_hash[hash_long(req->user_data, ctx->cancel_hash_bits)]; +- hlist_add_head(&req->hash_node, list); +-} +- +-static __poll_t __io_arm_poll_handler(struct io_kiocb *req, +- struct io_poll_iocb *poll, +- struct io_poll_table *ipt, __poll_t mask, +- wait_queue_func_t wake_func) +- __acquires(&ctx->completion_lock) +-{ +- struct io_ring_ctx *ctx = req->ctx; +- bool cancel = false; +- +- INIT_HLIST_NODE(&req->hash_node); +- io_init_poll_iocb(poll, mask, wake_func); +- poll->file = req->file; +- poll->wait.private = req; +- +- ipt->pt._key = mask; +- ipt->req = req; +- ipt->error = 0; +- ipt->nr_entries = 0; +- +- mask = vfs_poll(req->file, &ipt->pt) & poll->events; +- if (unlikely(!ipt->nr_entries) && !ipt->error) +- ipt->error = -EINVAL; +- +- spin_lock(&ctx->completion_lock); +- if (ipt->error || (mask && (poll->events & EPOLLONESHOT))) +- io_poll_remove_double(req); +- if (likely(poll->head)) { +- spin_lock_irq(&poll->head->lock); +- if (unlikely(list_empty(&poll->wait.entry))) { +- if (ipt->error) +- cancel = true; +- ipt->error = 0; +- mask = 0; +- } +- if ((mask && (poll->events & EPOLLONESHOT)) || ipt->error) +- list_del_init(&poll->wait.entry); +- else if (cancel) +- WRITE_ONCE(poll->canceled, true); +- else if (!poll->done) /* actually waiting for an event */ +- io_poll_req_insert(req); +- spin_unlock_irq(&poll->head->lock); +- } +- +- return mask; +-} +- +-enum { +- IO_APOLL_OK, +- IO_APOLL_ABORTED, +- IO_APOLL_READY +-}; +- +-static int io_arm_poll_handler(struct io_kiocb *req) +-{ +- const struct io_op_def *def = &io_op_defs[req->opcode]; +- struct io_ring_ctx *ctx = req->ctx; +- struct async_poll *apoll; +- struct io_poll_table ipt; +- __poll_t ret, mask = EPOLLONESHOT | POLLERR | POLLPRI; +- int rw; +- +- if (!req->file || !file_can_poll(req->file)) +- return IO_APOLL_ABORTED; +- if (req->flags & REQ_F_POLLED) +- return IO_APOLL_ABORTED; +- if (!def->pollin && !def->pollout) +- return IO_APOLL_ABORTED; +- +- if (def->pollin) { +- rw = READ; +- mask |= POLLIN | POLLRDNORM; +- +- /* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */ +- if ((req->opcode == IORING_OP_RECVMSG) && +- (req->sr_msg.msg_flags & MSG_ERRQUEUE)) +- mask &= ~POLLIN; +- } else { +- rw = WRITE; +- mask |= POLLOUT | POLLWRNORM; +- } +- +- /* if we can't nonblock try, then no point in arming a poll handler */ +- if (!io_file_supports_nowait(req, rw)) +- return IO_APOLL_ABORTED; +- +- apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC); +- if (unlikely(!apoll)) +- return IO_APOLL_ABORTED; +- apoll->double_poll = NULL; +- req->apoll = apoll; +- req->flags |= REQ_F_POLLED; +- ipt.pt._qproc = io_async_queue_proc; +- io_req_set_refcount(req); +- +- ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask, +- io_async_wake); +- spin_unlock(&ctx->completion_lock); +- if (ret || ipt.error) +- return ret ? IO_APOLL_READY : IO_APOLL_ABORTED; +- +- trace_io_uring_poll_arm(ctx, req, req->opcode, req->user_data, +- mask, apoll->poll.events); +- return IO_APOLL_OK; +-} +- +-static bool __io_poll_remove_one(struct io_kiocb *req, +- struct io_poll_iocb *poll, bool do_cancel) +- __must_hold(&req->ctx->completion_lock) +-{ +- bool do_complete = false; +- +- if (!poll->head) +- return false; +- spin_lock_irq(&poll->head->lock); +- if (do_cancel) +- WRITE_ONCE(poll->canceled, true); +- if (!list_empty(&poll->wait.entry)) { +- list_del_init(&poll->wait.entry); +- do_complete = true; +- } +- spin_unlock_irq(&poll->head->lock); +- hash_del(&req->hash_node); +- return do_complete; +-} +- +-static bool io_poll_remove_one(struct io_kiocb *req) +- __must_hold(&req->ctx->completion_lock) +-{ +- bool do_complete; +- +- io_poll_remove_double(req); +- do_complete = __io_poll_remove_one(req, io_poll_get_single(req), true); +- +- if (do_complete) { +- io_cqring_fill_event(req->ctx, req->user_data, -ECANCELED, 0); +- io_commit_cqring(req->ctx); +- req_set_fail(req); +- io_put_req_deferred(req); +- } +- return do_complete; +-} +- +-/* +- * Returns true if we found and killed one or more poll requests +- */ +-static bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk, +- bool cancel_all) +-{ +- struct hlist_node *tmp; +- struct io_kiocb *req; +- int posted = 0, i; +- +- spin_lock(&ctx->completion_lock); +- for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { +- struct hlist_head *list; +- +- list = &ctx->cancel_hash[i]; +- hlist_for_each_entry_safe(req, tmp, list, hash_node) { +- if (io_match_task(req, tsk, cancel_all)) +- posted += io_poll_remove_one(req); +- } +- } +- spin_unlock(&ctx->completion_lock); +- +- if (posted) +- io_cqring_ev_posted(ctx); +- +- return posted != 0; +-} +- +-static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, __u64 sqe_addr, +- bool poll_only) +- __must_hold(&ctx->completion_lock) +-{ +- struct hlist_head *list; +- struct io_kiocb *req; +- +- list = &ctx->cancel_hash[hash_long(sqe_addr, ctx->cancel_hash_bits)]; +- hlist_for_each_entry(req, list, hash_node) { +- if (sqe_addr != req->user_data) +- continue; +- if (poll_only && req->opcode != IORING_OP_POLL_ADD) +- continue; +- return req; +- } +- return NULL; +-} +- +-static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr, +- bool poll_only) +- __must_hold(&ctx->completion_lock) +-{ +- struct io_kiocb *req; +- +- req = io_poll_find(ctx, sqe_addr, poll_only); +- if (!req) +- return -ENOENT; +- if (io_poll_remove_one(req)) +- return 0; +- +- return -EALREADY; +-} +- +-static __poll_t io_poll_parse_events(const struct io_uring_sqe *sqe, +- unsigned int flags) +-{ +- u32 events; +- +- events = READ_ONCE(sqe->poll32_events); +-#ifdef __BIG_ENDIAN +- events = swahw32(events); +-#endif +- if (!(flags & IORING_POLL_ADD_MULTI)) +- events |= EPOLLONESHOT; +- return demangle_poll(events) | (events & (EPOLLEXCLUSIVE|EPOLLONESHOT)); +-} +- +-static int io_poll_update_prep(struct io_kiocb *req, +- const struct io_uring_sqe *sqe) +-{ +- struct io_poll_update *upd = &req->poll_update; +- u32 flags; +- +- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) +- return -EINVAL; +- if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in) +- return -EINVAL; +- flags = READ_ONCE(sqe->len); +- if (flags & ~(IORING_POLL_UPDATE_EVENTS | IORING_POLL_UPDATE_USER_DATA | +- IORING_POLL_ADD_MULTI)) +- return -EINVAL; +- /* meaningless without update */ +- if (flags == IORING_POLL_ADD_MULTI) +- return -EINVAL; +- +- upd->old_user_data = READ_ONCE(sqe->addr); +- upd->update_events = flags & IORING_POLL_UPDATE_EVENTS; +- upd->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA; +- +- upd->new_user_data = READ_ONCE(sqe->off); +- if (!upd->update_user_data && upd->new_user_data) +- return -EINVAL; +- if (upd->update_events) +- upd->events = io_poll_parse_events(sqe, flags); +- else if (sqe->poll32_events) +- return -EINVAL; +- +- return 0; +-} +- +-static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, +- void *key) +-{ +- struct io_kiocb *req = wait->private; +- struct io_poll_iocb *poll = &req->poll; +- +- return __io_async_wake(req, poll, key_to_poll(key), io_poll_task_func); +-} +- +-static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head, +- struct poll_table_struct *p) +-{ +- struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); +- +- __io_queue_proc(&pt->req->poll, pt, head, (struct io_poll_iocb **) &pt->req->async_data); +-} +- +-static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +-{ +- struct io_poll_iocb *poll = &req->poll; +- u32 flags; +- +- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) +- return -EINVAL; +- if (sqe->ioprio || sqe->buf_index || sqe->off || sqe->addr) +- return -EINVAL; +- flags = READ_ONCE(sqe->len); +- if (flags & ~IORING_POLL_ADD_MULTI) +- return -EINVAL; +- +- io_req_set_refcount(req); +- poll->events = io_poll_parse_events(sqe, flags); +- return 0; +-} +- +-static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags) +-{ +- struct io_poll_iocb *poll = &req->poll; +- struct io_ring_ctx *ctx = req->ctx; +- struct io_poll_table ipt; +- __poll_t mask; +- bool done; +- +- ipt.pt._qproc = io_poll_queue_proc; +- +- mask = __io_arm_poll_handler(req, &req->poll, &ipt, poll->events, +- io_poll_wake); +- +- if (mask) { /* no async, we'd stolen it */ +- ipt.error = 0; +- done = io_poll_complete(req, mask); +- } +- spin_unlock(&ctx->completion_lock); +- +- if (mask) { +- io_cqring_ev_posted(ctx); +- if (done) +- io_put_req(req); +- } +- return ipt.error; +-} +- +-static int io_poll_update(struct io_kiocb *req, unsigned int issue_flags) +-{ +- struct io_ring_ctx *ctx = req->ctx; +- struct io_kiocb *preq; +- bool completing; +- int ret; +- +- spin_lock(&ctx->completion_lock); +- preq = io_poll_find(ctx, req->poll_update.old_user_data, true); +- if (!preq) { +- ret = -ENOENT; +- goto err; +- } +- +- if (!req->poll_update.update_events && !req->poll_update.update_user_data) { +- completing = true; +- ret = io_poll_remove_one(preq) ? 0 : -EALREADY; +- goto err; +- } +- +- /* +- * Don't allow racy completion with singleshot, as we cannot safely +- * update those. For multishot, if we're racing with completion, just +- * let completion re-add it. +- */ +- completing = !__io_poll_remove_one(preq, &preq->poll, false); +- if (completing && (preq->poll.events & EPOLLONESHOT)) { +- ret = -EALREADY; +- goto err; +- } +- /* we now have a detached poll request. reissue. */ +- ret = 0; +-err: +- if (ret < 0) { +- spin_unlock(&ctx->completion_lock); +- req_set_fail(req); +- io_req_complete(req, ret); +- return 0; +- } +- /* only mask one event flags, keep behavior flags */ +- if (req->poll_update.update_events) { +- preq->poll.events &= ~0xffff; +- preq->poll.events |= req->poll_update.events & 0xffff; +- preq->poll.events |= IO_POLL_UNMASK; +- } +- if (req->poll_update.update_user_data) +- preq->user_data = req->poll_update.new_user_data; +- spin_unlock(&ctx->completion_lock); +- +- /* complete update request, we're done with it */ +- io_req_complete(req, ret); +- +- if (!completing) { +- ret = io_poll_add(preq, issue_flags); +- if (ret < 0) { +- req_set_fail(preq); +- io_req_complete(preq, ret); +- } +- } +- return 0; +-} +- +-static void io_req_task_timeout(struct io_kiocb *req, bool *locked) +-{ +- req_set_fail(req); +- io_req_complete_post(req, -ETIME, 0); +-} +- +-static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer) +-{ +- struct io_timeout_data *data = container_of(timer, +- struct io_timeout_data, timer); +- struct io_kiocb *req = data->req; +- struct io_ring_ctx *ctx = req->ctx; +- unsigned long flags; +- +- spin_lock_irqsave(&ctx->timeout_lock, flags); +- list_del_init(&req->timeout.list); +- atomic_set(&req->ctx->cq_timeouts, +- atomic_read(&req->ctx->cq_timeouts) + 1); +- spin_unlock_irqrestore(&ctx->timeout_lock, flags); +- +- req->io_task_work.func = io_req_task_timeout; +- io_req_task_work_add(req); +- return HRTIMER_NORESTART; +-} +- +-static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx, +- __u64 user_data) +- __must_hold(&ctx->timeout_lock) +-{ +- struct io_timeout_data *io; +- struct io_kiocb *req; +- bool found = false; +- +- list_for_each_entry(req, &ctx->timeout_list, timeout.list) { +- found = user_data == req->user_data; +- if (found) +- break; +- } +- if (!found) +- return ERR_PTR(-ENOENT); +- +- io = req->async_data; +- if (hrtimer_try_to_cancel(&io->timer) == -1) +- return ERR_PTR(-EALREADY); +- list_del_init(&req->timeout.list); +- return req; +-} +- +-static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data) +- __must_hold(&ctx->completion_lock) +- __must_hold(&ctx->timeout_lock) +-{ +- struct io_kiocb *req = io_timeout_extract(ctx, user_data); +- +- if (IS_ERR(req)) +- return PTR_ERR(req); +- +- req_set_fail(req); +- io_cqring_fill_event(ctx, req->user_data, -ECANCELED, 0); +- io_put_req_deferred(req); +- return 0; +-} +- +-static clockid_t io_timeout_get_clock(struct io_timeout_data *data) +-{ +- switch (data->flags & IORING_TIMEOUT_CLOCK_MASK) { +- case IORING_TIMEOUT_BOOTTIME: +- return CLOCK_BOOTTIME; +- case IORING_TIMEOUT_REALTIME: +- return CLOCK_REALTIME; +- default: +- /* can't happen, vetted at prep time */ +- WARN_ON_ONCE(1); +- fallthrough; +- case 0: +- return CLOCK_MONOTONIC; +- } +-} +- +-static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, +- struct timespec64 *ts, enum hrtimer_mode mode) +- __must_hold(&ctx->timeout_lock) +-{ +- struct io_timeout_data *io; +- struct io_kiocb *req; +- bool found = false; +- +- list_for_each_entry(req, &ctx->ltimeout_list, timeout.list) { +- found = user_data == req->user_data; +- if (found) +- break; +- } +- if (!found) +- return -ENOENT; +- +- io = req->async_data; +- if (hrtimer_try_to_cancel(&io->timer) == -1) +- return -EALREADY; +- hrtimer_init(&io->timer, io_timeout_get_clock(io), mode); +- io->timer.function = io_link_timeout_fn; +- hrtimer_start(&io->timer, timespec64_to_ktime(*ts), mode); +- return 0; +-} +- +-static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, +- struct timespec64 *ts, enum hrtimer_mode mode) +- __must_hold(&ctx->timeout_lock) +-{ +- struct io_kiocb *req = io_timeout_extract(ctx, user_data); +- struct io_timeout_data *data; +- +- if (IS_ERR(req)) +- return PTR_ERR(req); +- +- req->timeout.off = 0; /* noseq */ +- data = req->async_data; +- list_add_tail(&req->timeout.list, &ctx->timeout_list); +- hrtimer_init(&data->timer, io_timeout_get_clock(data), mode); +- data->timer.function = io_timeout_fn; +- hrtimer_start(&data->timer, timespec64_to_ktime(*ts), mode); +- return 0; +-} +- +-static int io_timeout_remove_prep(struct io_kiocb *req, +- const struct io_uring_sqe *sqe) +-{ +- struct io_timeout_rem *tr = &req->timeout_rem; +- +- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) +- return -EINVAL; +- if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) +- return -EINVAL; +- if (sqe->ioprio || sqe->buf_index || sqe->len || sqe->splice_fd_in) +- return -EINVAL; +- +- tr->ltimeout = false; +- tr->addr = READ_ONCE(sqe->addr); +- tr->flags = READ_ONCE(sqe->timeout_flags); +- if (tr->flags & IORING_TIMEOUT_UPDATE_MASK) { +- if (hweight32(tr->flags & IORING_TIMEOUT_CLOCK_MASK) > 1) +- return -EINVAL; +- if (tr->flags & IORING_LINK_TIMEOUT_UPDATE) +- tr->ltimeout = true; +- if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK|IORING_TIMEOUT_ABS)) +- return -EINVAL; +- if (get_timespec64(&tr->ts, u64_to_user_ptr(sqe->addr2))) +- return -EFAULT; +- } else if (tr->flags) { +- /* timeout removal doesn't support flags */ +- return -EINVAL; +- } +- +- return 0; +-} +- +-static inline enum hrtimer_mode io_translate_timeout_mode(unsigned int flags) +-{ +- return (flags & IORING_TIMEOUT_ABS) ? HRTIMER_MODE_ABS +- : HRTIMER_MODE_REL; +-} +- +-/* +- * Remove or update an existing timeout command +- */ +-static int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags) +-{ +- struct io_timeout_rem *tr = &req->timeout_rem; +- struct io_ring_ctx *ctx = req->ctx; +- int ret; +- +- if (!(req->timeout_rem.flags & IORING_TIMEOUT_UPDATE)) { +- spin_lock(&ctx->completion_lock); +- spin_lock_irq(&ctx->timeout_lock); +- ret = io_timeout_cancel(ctx, tr->addr); +- spin_unlock_irq(&ctx->timeout_lock); +- spin_unlock(&ctx->completion_lock); +- } else { +- enum hrtimer_mode mode = io_translate_timeout_mode(tr->flags); +- +- spin_lock_irq(&ctx->timeout_lock); +- if (tr->ltimeout) +- ret = io_linked_timeout_update(ctx, tr->addr, &tr->ts, mode); +- else +- ret = io_timeout_update(ctx, tr->addr, &tr->ts, mode); +- spin_unlock_irq(&ctx->timeout_lock); +- } +- +- if (ret < 0) +- req_set_fail(req); +- io_req_complete_post(req, ret, 0); +- return 0; +-} +- +-static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe, +- bool is_timeout_link) +-{ +- struct io_timeout_data *data; +- unsigned flags; +- u32 off = READ_ONCE(sqe->off); +- +- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) +- return -EINVAL; +- if (sqe->ioprio || sqe->buf_index || sqe->len != 1 || +- sqe->splice_fd_in) +- return -EINVAL; +- if (off && is_timeout_link) +- return -EINVAL; +- flags = READ_ONCE(sqe->timeout_flags); +- if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK)) +- return -EINVAL; +- /* more than one clock specified is invalid, obviously */ +- if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1) +- return -EINVAL; +- +- INIT_LIST_HEAD(&req->timeout.list); +- req->timeout.off = off; +- if (unlikely(off && !req->ctx->off_timeout_used)) +- req->ctx->off_timeout_used = true; +- +- if (!req->async_data && io_alloc_async_data(req)) +- return -ENOMEM; +- +- data = req->async_data; +- data->req = req; +- data->flags = flags; +- +- if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr))) +- return -EFAULT; +- +- data->mode = io_translate_timeout_mode(flags); +- hrtimer_init(&data->timer, io_timeout_get_clock(data), data->mode); +- +- if (is_timeout_link) { +- struct io_submit_link *link = &req->ctx->submit_state.link; +- +- if (!link->head) +- return -EINVAL; +- if (link->last->opcode == IORING_OP_LINK_TIMEOUT) +- return -EINVAL; +- req->timeout.head = link->last; +- link->last->flags |= REQ_F_ARM_LTIMEOUT; +- } +- return 0; +-} +- +-static int io_timeout(struct io_kiocb *req, unsigned int issue_flags) +-{ +- struct io_ring_ctx *ctx = req->ctx; +- struct io_timeout_data *data = req->async_data; +- struct list_head *entry; +- u32 tail, off = req->timeout.off; +- +- spin_lock_irq(&ctx->timeout_lock); +- +- /* +- * sqe->off holds how many events that need to occur for this +- * timeout event to be satisfied. If it isn't set, then this is +- * a pure timeout request, sequence isn't used. +- */ +- if (io_is_timeout_noseq(req)) { +- entry = ctx->timeout_list.prev; +- goto add; +- } +- +- tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); +- req->timeout.target_seq = tail + off; +- +- /* Update the last seq here in case io_flush_timeouts() hasn't. +- * This is safe because ->completion_lock is held, and submissions +- * and completions are never mixed in the same ->completion_lock section. +- */ +- ctx->cq_last_tm_flush = tail; +- +- /* +- * Insertion sort, ensuring the first entry in the list is always +- * the one we need first. +- */ +- list_for_each_prev(entry, &ctx->timeout_list) { +- struct io_kiocb *nxt = list_entry(entry, struct io_kiocb, +- timeout.list); +- +- if (io_is_timeout_noseq(nxt)) +- continue; +- /* nxt.seq is behind @tail, otherwise would've been completed */ +- if (off >= nxt->timeout.target_seq - tail) +- break; +- } +-add: +- list_add(&req->timeout.list, entry); +- data->timer.function = io_timeout_fn; +- hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode); +- spin_unlock_irq(&ctx->timeout_lock); +- return 0; +-} +- +-struct io_cancel_data { +- struct io_ring_ctx *ctx; +- u64 user_data; +-}; +- +-static bool io_cancel_cb(struct io_wq_work *work, void *data) +-{ +- struct io_kiocb *req = container_of(work, struct io_kiocb, work); +- struct io_cancel_data *cd = data; +- +- return req->ctx == cd->ctx && req->user_data == cd->user_data; +-} +- +-static int io_async_cancel_one(struct io_uring_task *tctx, u64 user_data, +- struct io_ring_ctx *ctx) +-{ +- struct io_cancel_data data = { .ctx = ctx, .user_data = user_data, }; +- enum io_wq_cancel cancel_ret; +- int ret = 0; +- +- if (!tctx || !tctx->io_wq) +- return -ENOENT; +- +- cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, &data, false); +- switch (cancel_ret) { +- case IO_WQ_CANCEL_OK: +- ret = 0; +- break; +- case IO_WQ_CANCEL_RUNNING: +- ret = -EALREADY; +- break; +- case IO_WQ_CANCEL_NOTFOUND: +- ret = -ENOENT; +- break; +- } +- +- return ret; +-} +- +-static int io_try_cancel_userdata(struct io_kiocb *req, u64 sqe_addr) +-{ +- struct io_ring_ctx *ctx = req->ctx; +- int ret; +- +- WARN_ON_ONCE(!io_wq_current_is_worker() && req->task != current); +- +- ret = io_async_cancel_one(req->task->io_uring, sqe_addr, ctx); +- if (ret != -ENOENT) +- return ret; +- +- spin_lock(&ctx->completion_lock); +- spin_lock_irq(&ctx->timeout_lock); +- ret = io_timeout_cancel(ctx, sqe_addr); +- spin_unlock_irq(&ctx->timeout_lock); +- if (ret != -ENOENT) +- goto out; +- ret = io_poll_cancel(ctx, sqe_addr, false); +-out: +- spin_unlock(&ctx->completion_lock); +- return ret; +-} +- +-static int io_async_cancel_prep(struct io_kiocb *req, +- const struct io_uring_sqe *sqe) +-{ +- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) +- return -EINVAL; +- if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) +- return -EINVAL; +- if (sqe->ioprio || sqe->off || sqe->len || sqe->cancel_flags || +- sqe->splice_fd_in) +- return -EINVAL; +- +- req->cancel.addr = READ_ONCE(sqe->addr); +- return 0; +-} +- +-static int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags) +-{ +- struct io_ring_ctx *ctx = req->ctx; +- u64 sqe_addr = req->cancel.addr; +- struct io_tctx_node *node; +- int ret; +- +- ret = io_try_cancel_userdata(req, sqe_addr); +- if (ret != -ENOENT) +- goto done; +- +- /* slow path, try all io-wq's */ +- io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); +- ret = -ENOENT; +- list_for_each_entry(node, &ctx->tctx_list, ctx_node) { +- struct io_uring_task *tctx = node->task->io_uring; +- +- ret = io_async_cancel_one(tctx, req->cancel.addr, ctx); +- if (ret != -ENOENT) +- break; +- } +- io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); +-done: +- if (ret < 0) +- req_set_fail(req); +- io_req_complete_post(req, ret, 0); +- return 0; +-} +- +-static int io_rsrc_update_prep(struct io_kiocb *req, +- const struct io_uring_sqe *sqe) +-{ +- if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) +- return -EINVAL; +- if (sqe->ioprio || sqe->rw_flags || sqe->splice_fd_in) +- return -EINVAL; +- +- req->rsrc_update.offset = READ_ONCE(sqe->off); +- req->rsrc_update.nr_args = READ_ONCE(sqe->len); +- if (!req->rsrc_update.nr_args) +- return -EINVAL; +- req->rsrc_update.arg = READ_ONCE(sqe->addr); +- return 0; +-} +- +-static int io_files_update(struct io_kiocb *req, unsigned int issue_flags) +-{ +- struct io_ring_ctx *ctx = req->ctx; +- struct io_uring_rsrc_update2 up; +- int ret; +- +- up.offset = req->rsrc_update.offset; +- up.data = req->rsrc_update.arg; +- up.nr = 0; +- up.tags = 0; +- up.resv = 0; +- +- io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); +- ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE, +- &up, req->rsrc_update.nr_args); +- io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); +- +- if (ret < 0) +- req_set_fail(req); +- __io_req_complete(req, issue_flags, ret, 0); +- return 0; +-} +- +-static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +-{ +- switch (req->opcode) { +- case IORING_OP_NOP: +- return 0; +- case IORING_OP_READV: +- case IORING_OP_READ_FIXED: +- case IORING_OP_READ: +- return io_read_prep(req, sqe); +- case IORING_OP_WRITEV: +- case IORING_OP_WRITE_FIXED: +- case IORING_OP_WRITE: +- return io_write_prep(req, sqe); +- case IORING_OP_POLL_ADD: +- return io_poll_add_prep(req, sqe); +- case IORING_OP_POLL_REMOVE: +- return io_poll_update_prep(req, sqe); +- case IORING_OP_FSYNC: +- return io_fsync_prep(req, sqe); +- case IORING_OP_SYNC_FILE_RANGE: +- return io_sfr_prep(req, sqe); +- case IORING_OP_SENDMSG: +- case IORING_OP_SEND: +- return io_sendmsg_prep(req, sqe); +- case IORING_OP_RECVMSG: +- case IORING_OP_RECV: +- return io_recvmsg_prep(req, sqe); +- case IORING_OP_CONNECT: +- return io_connect_prep(req, sqe); +- case IORING_OP_TIMEOUT: +- return io_timeout_prep(req, sqe, false); +- case IORING_OP_TIMEOUT_REMOVE: +- return io_timeout_remove_prep(req, sqe); +- case IORING_OP_ASYNC_CANCEL: +- return io_async_cancel_prep(req, sqe); +- case IORING_OP_LINK_TIMEOUT: +- return io_timeout_prep(req, sqe, true); +- case IORING_OP_ACCEPT: +- return io_accept_prep(req, sqe); +- case IORING_OP_FALLOCATE: +- return io_fallocate_prep(req, sqe); +- case IORING_OP_OPENAT: +- return io_openat_prep(req, sqe); +- case IORING_OP_CLOSE: +- return io_close_prep(req, sqe); +- case IORING_OP_FILES_UPDATE: +- return io_rsrc_update_prep(req, sqe); +- case IORING_OP_STATX: +- return io_statx_prep(req, sqe); +- case IORING_OP_FADVISE: +- return io_fadvise_prep(req, sqe); +- case IORING_OP_MADVISE: +- return io_madvise_prep(req, sqe); +- case IORING_OP_OPENAT2: +- return io_openat2_prep(req, sqe); +- case IORING_OP_EPOLL_CTL: +- return io_epoll_ctl_prep(req, sqe); +- case IORING_OP_SPLICE: +- return io_splice_prep(req, sqe); +- case IORING_OP_PROVIDE_BUFFERS: +- return io_provide_buffers_prep(req, sqe); +- case IORING_OP_REMOVE_BUFFERS: +- return io_remove_buffers_prep(req, sqe); +- case IORING_OP_TEE: +- return io_tee_prep(req, sqe); +- case IORING_OP_SHUTDOWN: +- return io_shutdown_prep(req, sqe); +- case IORING_OP_RENAMEAT: +- return io_renameat_prep(req, sqe); +- case IORING_OP_UNLINKAT: +- return io_unlinkat_prep(req, sqe); +- case IORING_OP_MKDIRAT: +- return io_mkdirat_prep(req, sqe); +- case IORING_OP_SYMLINKAT: +- return io_symlinkat_prep(req, sqe); +- case IORING_OP_LINKAT: +- return io_linkat_prep(req, sqe); +- } +- +- printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n", +- req->opcode); +- return -EINVAL; +-} +- +-static int io_req_prep_async(struct io_kiocb *req) +-{ +- if (!io_op_defs[req->opcode].needs_async_setup) +- return 0; +- if (WARN_ON_ONCE(req->async_data)) +- return -EFAULT; +- if (io_alloc_async_data(req)) +- return -EAGAIN; +- +- switch (req->opcode) { +- case IORING_OP_READV: +- return io_rw_prep_async(req, READ); +- case IORING_OP_WRITEV: +- return io_rw_prep_async(req, WRITE); +- case IORING_OP_SENDMSG: +- return io_sendmsg_prep_async(req); +- case IORING_OP_RECVMSG: +- return io_recvmsg_prep_async(req); +- case IORING_OP_CONNECT: +- return io_connect_prep_async(req); +- } +- printk_once(KERN_WARNING "io_uring: prep_async() bad opcode %d\n", +- req->opcode); +- return -EFAULT; +-} +- +-static u32 io_get_sequence(struct io_kiocb *req) +-{ +- u32 seq = req->ctx->cached_sq_head; +- +- /* need original cached_sq_head, but it was increased for each req */ +- io_for_each_link(req, req) +- seq--; +- return seq; +-} +- +-static bool io_drain_req(struct io_kiocb *req) +-{ +- struct io_kiocb *pos; +- struct io_ring_ctx *ctx = req->ctx; +- struct io_defer_entry *de; +- int ret; +- u32 seq; +- +- if (req->flags & REQ_F_FAIL) { +- io_req_complete_fail_submit(req); +- return true; +- } +- +- /* +- * If we need to drain a request in the middle of a link, drain the +- * head request and the next request/link after the current link. +- * Considering sequential execution of links, IOSQE_IO_DRAIN will be +- * maintained for every request of our link. +- */ +- if (ctx->drain_next) { +- req->flags |= REQ_F_IO_DRAIN; +- ctx->drain_next = false; +- } +- /* not interested in head, start from the first linked */ +- io_for_each_link(pos, req->link) { +- if (pos->flags & REQ_F_IO_DRAIN) { +- ctx->drain_next = true; +- req->flags |= REQ_F_IO_DRAIN; +- break; +- } +- } +- +- /* Still need defer if there is pending req in defer list. */ +- if (likely(list_empty_careful(&ctx->defer_list) && +- !(req->flags & REQ_F_IO_DRAIN))) { +- ctx->drain_active = false; +- return false; +- } +- +- seq = io_get_sequence(req); +- /* Still a chance to pass the sequence check */ +- if (!req_need_defer(req, seq) && list_empty_careful(&ctx->defer_list)) +- return false; +- +- ret = io_req_prep_async(req); +- if (ret) +- goto fail; +- io_prep_async_link(req); +- de = kmalloc(sizeof(*de), GFP_KERNEL); +- if (!de) { +- ret = -ENOMEM; +-fail: +- io_req_complete_failed(req, ret); +- return true; +- } +- +- spin_lock(&ctx->completion_lock); +- if (!req_need_defer(req, seq) && list_empty(&ctx->defer_list)) { +- spin_unlock(&ctx->completion_lock); +- kfree(de); +- io_queue_async_work(req, NULL); +- return true; +- } +- +- trace_io_uring_defer(ctx, req, req->user_data); +- de->req = req; +- de->seq = seq; +- list_add_tail(&de->list, &ctx->defer_list); +- spin_unlock(&ctx->completion_lock); +- return true; +-} +- +-static void io_clean_op(struct io_kiocb *req) +-{ +- if (req->flags & REQ_F_BUFFER_SELECTED) { +- switch (req->opcode) { +- case IORING_OP_READV: +- case IORING_OP_READ_FIXED: +- case IORING_OP_READ: +- kfree((void *)(unsigned long)req->rw.addr); +- break; +- case IORING_OP_RECVMSG: +- case IORING_OP_RECV: +- kfree(req->sr_msg.kbuf); +- break; +- } +- } +- +- if (req->flags & REQ_F_NEED_CLEANUP) { +- switch (req->opcode) { +- case IORING_OP_READV: +- case IORING_OP_READ_FIXED: +- case IORING_OP_READ: +- case IORING_OP_WRITEV: +- case IORING_OP_WRITE_FIXED: +- case IORING_OP_WRITE: { +- struct io_async_rw *io = req->async_data; +- +- kfree(io->free_iovec); +- break; +- } +- case IORING_OP_RECVMSG: +- case IORING_OP_SENDMSG: { +- struct io_async_msghdr *io = req->async_data; +- +- kfree(io->free_iov); +- break; +- } +- case IORING_OP_SPLICE: +- case IORING_OP_TEE: +- if (!(req->splice.flags & SPLICE_F_FD_IN_FIXED)) +- io_put_file(req->splice.file_in); +- break; +- case IORING_OP_OPENAT: +- case IORING_OP_OPENAT2: +- if (req->open.filename) +- putname(req->open.filename); +- break; +- case IORING_OP_RENAMEAT: +- putname(req->rename.oldpath); +- putname(req->rename.newpath); +- break; +- case IORING_OP_UNLINKAT: +- putname(req->unlink.filename); +- break; +- case IORING_OP_MKDIRAT: +- putname(req->mkdir.filename); +- break; +- case IORING_OP_SYMLINKAT: +- putname(req->symlink.oldpath); +- putname(req->symlink.newpath); +- break; +- case IORING_OP_LINKAT: +- putname(req->hardlink.oldpath); +- putname(req->hardlink.newpath); +- break; +- } +- } +- if ((req->flags & REQ_F_POLLED) && req->apoll) { +- kfree(req->apoll->double_poll); +- kfree(req->apoll); +- req->apoll = NULL; +- } +- if (req->flags & REQ_F_INFLIGHT) { +- struct io_uring_task *tctx = req->task->io_uring; +- +- atomic_dec(&tctx->inflight_tracked); +- } +- if (req->flags & REQ_F_CREDS) +- put_cred(req->creds); +- +- req->flags &= ~IO_REQ_CLEAN_FLAGS; +-} +- +-static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) +-{ +- struct io_ring_ctx *ctx = req->ctx; +- const struct cred *creds = NULL; +- int ret; +- +- if ((req->flags & REQ_F_CREDS) && req->creds != current_cred()) +- creds = override_creds(req->creds); +- +- switch (req->opcode) { +- case IORING_OP_NOP: +- ret = io_nop(req, issue_flags); +- break; +- case IORING_OP_READV: +- case IORING_OP_READ_FIXED: +- case IORING_OP_READ: +- ret = io_read(req, issue_flags); +- break; +- case IORING_OP_WRITEV: +- case IORING_OP_WRITE_FIXED: +- case IORING_OP_WRITE: +- ret = io_write(req, issue_flags); +- break; +- case IORING_OP_FSYNC: +- ret = io_fsync(req, issue_flags); +- break; +- case IORING_OP_POLL_ADD: +- ret = io_poll_add(req, issue_flags); +- break; +- case IORING_OP_POLL_REMOVE: +- ret = io_poll_update(req, issue_flags); +- break; +- case IORING_OP_SYNC_FILE_RANGE: +- ret = io_sync_file_range(req, issue_flags); +- break; +- case IORING_OP_SENDMSG: +- ret = io_sendmsg(req, issue_flags); +- break; +- case IORING_OP_SEND: +- ret = io_send(req, issue_flags); +- break; +- case IORING_OP_RECVMSG: +- ret = io_recvmsg(req, issue_flags); +- break; +- case IORING_OP_RECV: +- ret = io_recv(req, issue_flags); +- break; +- case IORING_OP_TIMEOUT: +- ret = io_timeout(req, issue_flags); +- break; +- case IORING_OP_TIMEOUT_REMOVE: +- ret = io_timeout_remove(req, issue_flags); +- break; +- case IORING_OP_ACCEPT: +- ret = io_accept(req, issue_flags); +- break; +- case IORING_OP_CONNECT: +- ret = io_connect(req, issue_flags); +- break; +- case IORING_OP_ASYNC_CANCEL: +- ret = io_async_cancel(req, issue_flags); +- break; +- case IORING_OP_FALLOCATE: +- ret = io_fallocate(req, issue_flags); +- break; +- case IORING_OP_OPENAT: +- ret = io_openat(req, issue_flags); +- break; +- case IORING_OP_CLOSE: +- ret = io_close(req, issue_flags); +- break; +- case IORING_OP_FILES_UPDATE: +- ret = io_files_update(req, issue_flags); +- break; +- case IORING_OP_STATX: +- ret = io_statx(req, issue_flags); +- break; +- case IORING_OP_FADVISE: +- ret = io_fadvise(req, issue_flags); +- break; +- case IORING_OP_MADVISE: +- ret = io_madvise(req, issue_flags); +- break; +- case IORING_OP_OPENAT2: +- ret = io_openat2(req, issue_flags); +- break; +- case IORING_OP_EPOLL_CTL: +- ret = io_epoll_ctl(req, issue_flags); +- break; +- case IORING_OP_SPLICE: +- ret = io_splice(req, issue_flags); +- break; +- case IORING_OP_PROVIDE_BUFFERS: +- ret = io_provide_buffers(req, issue_flags); +- break; +- case IORING_OP_REMOVE_BUFFERS: +- ret = io_remove_buffers(req, issue_flags); +- break; +- case IORING_OP_TEE: +- ret = io_tee(req, issue_flags); +- break; +- case IORING_OP_SHUTDOWN: +- ret = io_shutdown(req, issue_flags); +- break; +- case IORING_OP_RENAMEAT: +- ret = io_renameat(req, issue_flags); +- break; +- case IORING_OP_UNLINKAT: +- ret = io_unlinkat(req, issue_flags); +- break; +- case IORING_OP_MKDIRAT: +- ret = io_mkdirat(req, issue_flags); +- break; +- case IORING_OP_SYMLINKAT: +- ret = io_symlinkat(req, issue_flags); +- break; +- case IORING_OP_LINKAT: +- ret = io_linkat(req, issue_flags); +- break; +- default: +- ret = -EINVAL; +- break; +- } +- +- if (creds) +- revert_creds(creds); +- if (ret) +- return ret; +- /* If the op doesn't have a file, we're not polling for it */ +- if ((ctx->flags & IORING_SETUP_IOPOLL) && req->file) +- io_iopoll_req_issued(req); +- +- return 0; +-} +- +-static struct io_wq_work *io_wq_free_work(struct io_wq_work *work) +-{ +- struct io_kiocb *req = container_of(work, struct io_kiocb, work); +- +- req = io_put_req_find_next(req); +- return req ? &req->work : NULL; +-} +- +-static void io_wq_submit_work(struct io_wq_work *work) +-{ +- struct io_kiocb *req = container_of(work, struct io_kiocb, work); +- struct io_kiocb *timeout; +- int ret = 0; +- +- /* one will be dropped by ->io_free_work() after returning to io-wq */ +- if (!(req->flags & REQ_F_REFCOUNT)) +- __io_req_set_refcount(req, 2); +- else +- req_ref_get(req); +- +- timeout = io_prep_linked_timeout(req); +- if (timeout) +- io_queue_linked_timeout(timeout); +- +- /* either cancelled or io-wq is dying, so don't touch tctx->iowq */ +- if (work->flags & IO_WQ_WORK_CANCEL) +- ret = -ECANCELED; +- +- if (!ret) { +- do { +- ret = io_issue_sqe(req, 0); +- /* +- * We can get EAGAIN for polled IO even though we're +- * forcing a sync submission from here, since we can't +- * wait for request slots on the block side. +- */ +- if (ret != -EAGAIN) +- break; +- cond_resched(); +- } while (1); +- } +- +- /* avoid locking problems by failing it from a clean context */ +- if (ret) +- io_req_task_queue_fail(req, ret); +-} +- +-static inline struct io_fixed_file *io_fixed_file_slot(struct io_file_table *table, +- unsigned i) +-{ +- return &table->files[i]; +-} +- +-static inline struct file *io_file_from_index(struct io_ring_ctx *ctx, +- int index) +-{ +- struct io_fixed_file *slot = io_fixed_file_slot(&ctx->file_table, index); +- +- return (struct file *) (slot->file_ptr & FFS_MASK); +-} +- +-static void io_fixed_file_set(struct io_fixed_file *file_slot, struct file *file) +-{ +- unsigned long file_ptr = (unsigned long) file; +- +- if (__io_file_supports_nowait(file, READ)) +- file_ptr |= FFS_ASYNC_READ; +- if (__io_file_supports_nowait(file, WRITE)) +- file_ptr |= FFS_ASYNC_WRITE; +- if (S_ISREG(file_inode(file)->i_mode)) +- file_ptr |= FFS_ISREG; +- file_slot->file_ptr = file_ptr; +-} +- +-static inline struct file *io_file_get_fixed(struct io_ring_ctx *ctx, +- struct io_kiocb *req, int fd) +-{ +- struct file *file; +- unsigned long file_ptr; +- +- if (unlikely((unsigned int)fd >= ctx->nr_user_files)) +- return NULL; +- fd = array_index_nospec(fd, ctx->nr_user_files); +- file_ptr = io_fixed_file_slot(&ctx->file_table, fd)->file_ptr; +- file = (struct file *) (file_ptr & FFS_MASK); +- file_ptr &= ~FFS_MASK; +- /* mask in overlapping REQ_F and FFS bits */ +- req->flags |= (file_ptr << REQ_F_NOWAIT_READ_BIT); +- io_req_set_rsrc_node(req); +- return file; +-} +- +-static struct file *io_file_get_normal(struct io_ring_ctx *ctx, +- struct io_kiocb *req, int fd) +-{ +- struct file *file = fget(fd); +- +- trace_io_uring_file_get(ctx, fd); +- +- /* we don't allow fixed io_uring files */ +- if (file && unlikely(file->f_op == &io_uring_fops)) +- io_req_track_inflight(req); +- return file; +-} +- +-static inline struct file *io_file_get(struct io_ring_ctx *ctx, +- struct io_kiocb *req, int fd, bool fixed) +-{ +- if (fixed) +- return io_file_get_fixed(ctx, req, fd); +- else +- return io_file_get_normal(ctx, req, fd); +-} +- +-static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked) +-{ +- struct io_kiocb *prev = req->timeout.prev; +- int ret; +- +- if (prev) { +- ret = io_try_cancel_userdata(req, prev->user_data); +- io_req_complete_post(req, ret ?: -ETIME, 0); +- io_put_req(prev); +- } else { +- io_req_complete_post(req, -ETIME, 0); +- } +-} +- +-static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer) +-{ +- struct io_timeout_data *data = container_of(timer, +- struct io_timeout_data, timer); +- struct io_kiocb *prev, *req = data->req; +- struct io_ring_ctx *ctx = req->ctx; +- unsigned long flags; +- +- spin_lock_irqsave(&ctx->timeout_lock, flags); +- prev = req->timeout.head; +- req->timeout.head = NULL; +- +- /* +- * We don't expect the list to be empty, that will only happen if we +- * race with the completion of the linked work. +- */ +- if (prev) { +- io_remove_next_linked(prev); +- if (!req_ref_inc_not_zero(prev)) +- prev = NULL; +- } +- list_del(&req->timeout.list); +- req->timeout.prev = prev; +- spin_unlock_irqrestore(&ctx->timeout_lock, flags); +- +- req->io_task_work.func = io_req_task_link_timeout; +- io_req_task_work_add(req); +- return HRTIMER_NORESTART; +-} +- +-static void io_queue_linked_timeout(struct io_kiocb *req) +-{ +- struct io_ring_ctx *ctx = req->ctx; +- +- spin_lock_irq(&ctx->timeout_lock); +- /* +- * If the back reference is NULL, then our linked request finished +- * before we got a chance to setup the timer +- */ +- if (req->timeout.head) { +- struct io_timeout_data *data = req->async_data; +- +- data->timer.function = io_link_timeout_fn; +- hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), +- data->mode); +- list_add_tail(&req->timeout.list, &ctx->ltimeout_list); +- } +- spin_unlock_irq(&ctx->timeout_lock); +- /* drop submission reference */ +- io_put_req(req); +-} +- +-static void __io_queue_sqe(struct io_kiocb *req) +- __must_hold(&req->ctx->uring_lock) +-{ +- struct io_kiocb *linked_timeout; +- int ret; +- +-issue_sqe: +- ret = io_issue_sqe(req, IO_URING_F_NONBLOCK|IO_URING_F_COMPLETE_DEFER); +- +- /* +- * We async punt it if the file wasn't marked NOWAIT, or if the file +- * doesn't support non-blocking read/write attempts +- */ +- if (likely(!ret)) { +- if (req->flags & REQ_F_COMPLETE_INLINE) { +- struct io_ring_ctx *ctx = req->ctx; +- struct io_submit_state *state = &ctx->submit_state; +- +- state->compl_reqs[state->compl_nr++] = req; +- if (state->compl_nr == ARRAY_SIZE(state->compl_reqs)) +- io_submit_flush_completions(ctx); +- return; +- } +- +- linked_timeout = io_prep_linked_timeout(req); +- if (linked_timeout) +- io_queue_linked_timeout(linked_timeout); +- } else if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) { +- linked_timeout = io_prep_linked_timeout(req); +- +- switch (io_arm_poll_handler(req)) { +- case IO_APOLL_READY: +- if (linked_timeout) +- io_queue_linked_timeout(linked_timeout); +- goto issue_sqe; +- case IO_APOLL_ABORTED: +- /* +- * Queued up for async execution, worker will release +- * submit reference when the iocb is actually submitted. +- */ +- io_queue_async_work(req, NULL); +- break; +- } +- +- if (linked_timeout) +- io_queue_linked_timeout(linked_timeout); +- } else { +- io_req_complete_failed(req, ret); +- } +-} +- +-static inline void io_queue_sqe(struct io_kiocb *req) +- __must_hold(&req->ctx->uring_lock) +-{ +- if (unlikely(req->ctx->drain_active) && io_drain_req(req)) +- return; +- +- if (likely(!(req->flags & (REQ_F_FORCE_ASYNC | REQ_F_FAIL)))) { +- __io_queue_sqe(req); +- } else if (req->flags & REQ_F_FAIL) { +- io_req_complete_fail_submit(req); +- } else { +- int ret = io_req_prep_async(req); +- +- if (unlikely(ret)) +- io_req_complete_failed(req, ret); +- else +- io_queue_async_work(req, NULL); +- } +-} +- +-/* +- * Check SQE restrictions (opcode and flags). +- * +- * Returns 'true' if SQE is allowed, 'false' otherwise. +- */ +-static inline bool io_check_restriction(struct io_ring_ctx *ctx, +- struct io_kiocb *req, +- unsigned int sqe_flags) +-{ +- if (likely(!ctx->restricted)) +- return true; +- +- if (!test_bit(req->opcode, ctx->restrictions.sqe_op)) +- return false; +- +- if ((sqe_flags & ctx->restrictions.sqe_flags_required) != +- ctx->restrictions.sqe_flags_required) +- return false; +- +- if (sqe_flags & ~(ctx->restrictions.sqe_flags_allowed | +- ctx->restrictions.sqe_flags_required)) +- return false; +- +- return true; +-} +- +-static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, +- const struct io_uring_sqe *sqe) +- __must_hold(&ctx->uring_lock) +-{ +- struct io_submit_state *state; +- unsigned int sqe_flags; +- int personality, ret = 0; +- +- /* req is partially pre-initialised, see io_preinit_req() */ +- req->opcode = READ_ONCE(sqe->opcode); +- /* same numerical values with corresponding REQ_F_*, safe to copy */ +- req->flags = sqe_flags = READ_ONCE(sqe->flags); +- req->user_data = READ_ONCE(sqe->user_data); +- req->file = NULL; +- req->fixed_rsrc_refs = NULL; +- req->task = current; +- +- /* enforce forwards compatibility on users */ +- if (unlikely(sqe_flags & ~SQE_VALID_FLAGS)) +- return -EINVAL; +- if (unlikely(req->opcode >= IORING_OP_LAST)) +- return -EINVAL; +- if (!io_check_restriction(ctx, req, sqe_flags)) +- return -EACCES; +- +- if ((sqe_flags & IOSQE_BUFFER_SELECT) && +- !io_op_defs[req->opcode].buffer_select) +- return -EOPNOTSUPP; +- if (unlikely(sqe_flags & IOSQE_IO_DRAIN)) +- ctx->drain_active = true; +- +- personality = READ_ONCE(sqe->personality); +- if (personality) { +- req->creds = xa_load(&ctx->personalities, personality); +- if (!req->creds) +- return -EINVAL; +- get_cred(req->creds); +- req->flags |= REQ_F_CREDS; +- } +- state = &ctx->submit_state; +- +- /* +- * Plug now if we have more than 1 IO left after this, and the target +- * is potentially a read/write to block based storage. +- */ +- if (!state->plug_started && state->ios_left > 1 && +- io_op_defs[req->opcode].plug) { +- blk_start_plug(&state->plug); +- state->plug_started = true; +- } +- +- if (io_op_defs[req->opcode].needs_file) { +- req->file = io_file_get(ctx, req, READ_ONCE(sqe->fd), +- (sqe_flags & IOSQE_FIXED_FILE)); +- if (unlikely(!req->file)) +- ret = -EBADF; +- } +- +- state->ios_left--; +- return ret; +-} +- +-static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, +- const struct io_uring_sqe *sqe) +- __must_hold(&ctx->uring_lock) +-{ +- struct io_submit_link *link = &ctx->submit_state.link; +- int ret; +- +- ret = io_init_req(ctx, req, sqe); +- if (unlikely(ret)) { +-fail_req: +- /* fail even hard links since we don't submit */ +- if (link->head) { +- /* +- * we can judge a link req is failed or cancelled by if +- * REQ_F_FAIL is set, but the head is an exception since +- * it may be set REQ_F_FAIL because of other req's failure +- * so let's leverage req->result to distinguish if a head +- * is set REQ_F_FAIL because of its failure or other req's +- * failure so that we can set the correct ret code for it. +- * init result here to avoid affecting the normal path. +- */ +- if (!(link->head->flags & REQ_F_FAIL)) +- req_fail_link_node(link->head, -ECANCELED); +- } else if (!(req->flags & (REQ_F_LINK | REQ_F_HARDLINK))) { +- /* +- * the current req is a normal req, we should return +- * error and thus break the submittion loop. +- */ +- io_req_complete_failed(req, ret); +- return ret; +- } +- req_fail_link_node(req, ret); +- } else { +- ret = io_req_prep(req, sqe); +- if (unlikely(ret)) +- goto fail_req; +- } +- +- /* don't need @sqe from now on */ +- trace_io_uring_submit_sqe(ctx, req, req->opcode, req->user_data, +- req->flags, true, +- ctx->flags & IORING_SETUP_SQPOLL); +- +- /* +- * If we already have a head request, queue this one for async +- * submittal once the head completes. If we don't have a head but +- * IOSQE_IO_LINK is set in the sqe, start a new head. This one will be +- * submitted sync once the chain is complete. If none of those +- * conditions are true (normal request), then just queue it. +- */ +- if (link->head) { +- struct io_kiocb *head = link->head; +- +- if (!(req->flags & REQ_F_FAIL)) { +- ret = io_req_prep_async(req); +- if (unlikely(ret)) { +- req_fail_link_node(req, ret); +- if (!(head->flags & REQ_F_FAIL)) +- req_fail_link_node(head, -ECANCELED); +- } +- } +- trace_io_uring_link(ctx, req, head); +- link->last->link = req; +- link->last = req; +- +- /* last request of a link, enqueue the link */ +- if (!(req->flags & (REQ_F_LINK | REQ_F_HARDLINK))) { +- link->head = NULL; +- io_queue_sqe(head); +- } +- } else { +- if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) { +- link->head = req; +- link->last = req; +- } else { +- io_queue_sqe(req); +- } +- } +- +- return 0; +-} +- +-/* +- * Batched submission is done, ensure local IO is flushed out. +- */ +-static void io_submit_state_end(struct io_submit_state *state, +- struct io_ring_ctx *ctx) +-{ +- if (state->link.head) +- io_queue_sqe(state->link.head); +- if (state->compl_nr) +- io_submit_flush_completions(ctx); +- if (state->plug_started) +- blk_finish_plug(&state->plug); +-} +- +-/* +- * Start submission side cache. +- */ +-static void io_submit_state_start(struct io_submit_state *state, +- unsigned int max_ios) +-{ +- state->plug_started = false; +- state->ios_left = max_ios; +- /* set only head, no need to init link_last in advance */ +- state->link.head = NULL; +-} +- +-static void io_commit_sqring(struct io_ring_ctx *ctx) +-{ +- struct io_rings *rings = ctx->rings; +- +- /* +- * Ensure any loads from the SQEs are done at this point, +- * since once we write the new head, the application could +- * write new data to them. +- */ +- smp_store_release(&rings->sq.head, ctx->cached_sq_head); +-} +- +-/* +- * Fetch an sqe, if one is available. Note this returns a pointer to memory +- * that is mapped by userspace. This means that care needs to be taken to +- * ensure that reads are stable, as we cannot rely on userspace always +- * being a good citizen. If members of the sqe are validated and then later +- * used, it's important that those reads are done through READ_ONCE() to +- * prevent a re-load down the line. +- */ +-static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx) +-{ +- unsigned head, mask = ctx->sq_entries - 1; +- unsigned sq_idx = ctx->cached_sq_head++ & mask; +- +- /* +- * The cached sq head (or cq tail) serves two purposes: +- * +- * 1) allows us to batch the cost of updating the user visible +- * head updates. +- * 2) allows the kernel side to track the head on its own, even +- * though the application is the one updating it. +- */ +- head = READ_ONCE(ctx->sq_array[sq_idx]); +- if (likely(head < ctx->sq_entries)) +- return &ctx->sq_sqes[head]; +- +- /* drop invalid entries */ +- ctx->cq_extra--; +- WRITE_ONCE(ctx->rings->sq_dropped, +- READ_ONCE(ctx->rings->sq_dropped) + 1); +- return NULL; +-} +- +-static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) +- __must_hold(&ctx->uring_lock) +-{ +- int submitted = 0; +- +- /* make sure SQ entry isn't read before tail */ +- nr = min3(nr, ctx->sq_entries, io_sqring_entries(ctx)); +- if (!percpu_ref_tryget_many(&ctx->refs, nr)) +- return -EAGAIN; +- io_get_task_refs(nr); +- +- io_submit_state_start(&ctx->submit_state, nr); +- while (submitted < nr) { +- const struct io_uring_sqe *sqe; +- struct io_kiocb *req; +- +- req = io_alloc_req(ctx); +- if (unlikely(!req)) { +- if (!submitted) +- submitted = -EAGAIN; +- break; +- } +- sqe = io_get_sqe(ctx); +- if (unlikely(!sqe)) { +- list_add(&req->inflight_entry, &ctx->submit_state.free_list); +- break; +- } +- /* will complete beyond this point, count as submitted */ +- submitted++; +- if (io_submit_sqe(ctx, req, sqe)) +- break; +- } +- +- if (unlikely(submitted != nr)) { +- int ref_used = (submitted == -EAGAIN) ? 0 : submitted; +- int unused = nr - ref_used; +- +- current->io_uring->cached_refs += unused; +- percpu_ref_put_many(&ctx->refs, unused); +- } +- +- io_submit_state_end(&ctx->submit_state, ctx); +- /* Commit SQ ring head once we've consumed and submitted all SQEs */ +- io_commit_sqring(ctx); +- +- return submitted; +-} +- +-static inline bool io_sqd_events_pending(struct io_sq_data *sqd) +-{ +- return READ_ONCE(sqd->state); +-} +- +-static inline void io_ring_set_wakeup_flag(struct io_ring_ctx *ctx) +-{ +- /* Tell userspace we may need a wakeup call */ +- spin_lock(&ctx->completion_lock); +- WRITE_ONCE(ctx->rings->sq_flags, +- ctx->rings->sq_flags | IORING_SQ_NEED_WAKEUP); +- spin_unlock(&ctx->completion_lock); +-} +- +-static inline void io_ring_clear_wakeup_flag(struct io_ring_ctx *ctx) +-{ +- spin_lock(&ctx->completion_lock); +- WRITE_ONCE(ctx->rings->sq_flags, +- ctx->rings->sq_flags & ~IORING_SQ_NEED_WAKEUP); +- spin_unlock(&ctx->completion_lock); +-} +- +-static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) +-{ +- unsigned int to_submit; +- int ret = 0; +- +- to_submit = io_sqring_entries(ctx); +- /* if we're handling multiple rings, cap submit size for fairness */ +- if (cap_entries && to_submit > IORING_SQPOLL_CAP_ENTRIES_VALUE) +- to_submit = IORING_SQPOLL_CAP_ENTRIES_VALUE; +- +- if (!list_empty(&ctx->iopoll_list) || to_submit) { +- unsigned nr_events = 0; +- const struct cred *creds = NULL; +- +- if (ctx->sq_creds != current_cred()) +- creds = override_creds(ctx->sq_creds); +- +- mutex_lock(&ctx->uring_lock); +- if (!list_empty(&ctx->iopoll_list)) +- io_do_iopoll(ctx, &nr_events, 0); +- +- /* +- * Don't submit if refs are dying, good for io_uring_register(), +- * but also it is relied upon by io_ring_exit_work() +- */ +- if (to_submit && likely(!percpu_ref_is_dying(&ctx->refs)) && +- !(ctx->flags & IORING_SETUP_R_DISABLED)) +- ret = io_submit_sqes(ctx, to_submit); +- mutex_unlock(&ctx->uring_lock); +- +- if (to_submit && wq_has_sleeper(&ctx->sqo_sq_wait)) +- wake_up(&ctx->sqo_sq_wait); +- if (creds) +- revert_creds(creds); +- } +- +- return ret; +-} +- +-static void io_sqd_update_thread_idle(struct io_sq_data *sqd) +-{ +- struct io_ring_ctx *ctx; +- unsigned sq_thread_idle = 0; +- +- list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) +- sq_thread_idle = max(sq_thread_idle, ctx->sq_thread_idle); +- sqd->sq_thread_idle = sq_thread_idle; +-} +- +-static bool io_sqd_handle_event(struct io_sq_data *sqd) +-{ +- bool did_sig = false; +- struct ksignal ksig; +- +- if (test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state) || +- signal_pending(current)) { +- mutex_unlock(&sqd->lock); +- if (signal_pending(current)) +- did_sig = get_signal(&ksig); +- cond_resched(); +- mutex_lock(&sqd->lock); +- } +- return did_sig || test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); +-} +- +-static int io_sq_thread(void *data) +-{ +- struct io_sq_data *sqd = data; +- struct io_ring_ctx *ctx; +- unsigned long timeout = 0; +- char buf[TASK_COMM_LEN]; +- DEFINE_WAIT(wait); +- +- snprintf(buf, sizeof(buf), "iou-sqp-%d", sqd->task_pid); +- set_task_comm(current, buf); +- +- if (sqd->sq_cpu != -1) +- set_cpus_allowed_ptr(current, cpumask_of(sqd->sq_cpu)); +- else +- set_cpus_allowed_ptr(current, cpu_online_mask); +- current->flags |= PF_NO_SETAFFINITY; +- +- mutex_lock(&sqd->lock); +- while (1) { +- bool cap_entries, sqt_spin = false; +- +- if (io_sqd_events_pending(sqd) || signal_pending(current)) { +- if (io_sqd_handle_event(sqd)) +- break; +- timeout = jiffies + sqd->sq_thread_idle; +- } +- +- cap_entries = !list_is_singular(&sqd->ctx_list); +- list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { +- int ret = __io_sq_thread(ctx, cap_entries); +- +- if (!sqt_spin && (ret > 0 || !list_empty(&ctx->iopoll_list))) +- sqt_spin = true; +- } +- if (io_run_task_work()) +- sqt_spin = true; +- +- if (sqt_spin || !time_after(jiffies, timeout)) { +- cond_resched(); +- if (sqt_spin) +- timeout = jiffies + sqd->sq_thread_idle; +- continue; +- } +- +- prepare_to_wait(&sqd->wait, &wait, TASK_INTERRUPTIBLE); +- if (!io_sqd_events_pending(sqd) && !current->task_works) { +- bool needs_sched = true; +- +- list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { +- io_ring_set_wakeup_flag(ctx); +- +- if ((ctx->flags & IORING_SETUP_IOPOLL) && +- !list_empty_careful(&ctx->iopoll_list)) { +- needs_sched = false; +- break; +- } +- if (io_sqring_entries(ctx)) { +- needs_sched = false; +- break; +- } +- } +- +- if (needs_sched) { +- mutex_unlock(&sqd->lock); +- schedule(); +- mutex_lock(&sqd->lock); +- } +- list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) +- io_ring_clear_wakeup_flag(ctx); +- } +- +- finish_wait(&sqd->wait, &wait); +- timeout = jiffies + sqd->sq_thread_idle; +- } +- +- io_uring_cancel_generic(true, sqd); +- sqd->thread = NULL; +- list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) +- io_ring_set_wakeup_flag(ctx); +- io_run_task_work(); +- mutex_unlock(&sqd->lock); +- +- complete(&sqd->exited); +- do_exit(0); +-} +- +-struct io_wait_queue { +- struct wait_queue_entry wq; +- struct io_ring_ctx *ctx; +- unsigned cq_tail; +- unsigned nr_timeouts; +-}; +- +-static inline bool io_should_wake(struct io_wait_queue *iowq) +-{ +- struct io_ring_ctx *ctx = iowq->ctx; +- int dist = ctx->cached_cq_tail - (int) iowq->cq_tail; +- +- /* +- * Wake up if we have enough events, or if a timeout occurred since we +- * started waiting. For timeouts, we always want to return to userspace, +- * regardless of event count. +- */ +- return dist >= 0 || atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts; +-} +- +-static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode, +- int wake_flags, void *key) +-{ +- struct io_wait_queue *iowq = container_of(curr, struct io_wait_queue, +- wq); +- +- /* +- * Cannot safely flush overflowed CQEs from here, ensure we wake up +- * the task, and the next invocation will do it. +- */ +- if (io_should_wake(iowq) || test_bit(0, &iowq->ctx->check_cq_overflow)) +- return autoremove_wake_function(curr, mode, wake_flags, key); +- return -1; +-} +- +-static int io_run_task_work_sig(void) +-{ +- if (io_run_task_work()) +- return 1; +- if (!signal_pending(current)) +- return 0; +- if (test_thread_flag(TIF_NOTIFY_SIGNAL)) +- return -ERESTARTSYS; +- return -EINTR; +-} +- +-/* when returns >0, the caller should retry */ +-static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, +- struct io_wait_queue *iowq, +- signed long *timeout) +-{ +- int ret; +- +- /* make sure we run task_work before checking for signals */ +- ret = io_run_task_work_sig(); +- if (ret || io_should_wake(iowq)) +- return ret; +- /* let the caller flush overflows, retry */ +- if (test_bit(0, &ctx->check_cq_overflow)) +- return 1; +- +- *timeout = schedule_timeout(*timeout); +- return !*timeout ? -ETIME : 1; +-} +- +-/* +- * Wait until events become available, if we don't already have some. The +- * application must reap them itself, as they reside on the shared cq ring. +- */ +-static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, +- const sigset_t __user *sig, size_t sigsz, +- struct __kernel_timespec __user *uts) +-{ +- struct io_wait_queue iowq; +- struct io_rings *rings = ctx->rings; +- signed long timeout = MAX_SCHEDULE_TIMEOUT; +- int ret; +- +- do { +- io_cqring_overflow_flush(ctx); +- if (io_cqring_events(ctx) >= min_events) +- return 0; +- if (!io_run_task_work()) +- break; +- } while (1); +- +- if (uts) { +- struct timespec64 ts; +- +- if (get_timespec64(&ts, uts)) +- return -EFAULT; +- timeout = timespec64_to_jiffies(&ts); +- } +- +- if (sig) { +-#ifdef CONFIG_COMPAT +- if (in_compat_syscall()) +- ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig, +- sigsz); +- else +-#endif +- ret = set_user_sigmask(sig, sigsz); +- +- if (ret) +- return ret; +- } +- +- init_waitqueue_func_entry(&iowq.wq, io_wake_function); +- iowq.wq.private = current; +- INIT_LIST_HEAD(&iowq.wq.entry); +- iowq.ctx = ctx; +- iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts); +- iowq.cq_tail = READ_ONCE(ctx->rings->cq.head) + min_events; +- +- trace_io_uring_cqring_wait(ctx, min_events); +- do { +- /* if we can't even flush overflow, don't wait for more */ +- if (!io_cqring_overflow_flush(ctx)) { +- ret = -EBUSY; +- break; +- } +- prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq, +- TASK_INTERRUPTIBLE); +- ret = io_cqring_wait_schedule(ctx, &iowq, &timeout); +- finish_wait(&ctx->cq_wait, &iowq.wq); +- cond_resched(); +- } while (ret > 0); +- +- restore_saved_sigmask_unless(ret == -EINTR); +- +- return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0; +-} +- +-static void io_free_page_table(void **table, size_t size) +-{ +- unsigned i, nr_tables = DIV_ROUND_UP(size, PAGE_SIZE); +- +- for (i = 0; i < nr_tables; i++) +- kfree(table[i]); +- kfree(table); +-} +- +-static void **io_alloc_page_table(size_t size) +-{ +- unsigned i, nr_tables = DIV_ROUND_UP(size, PAGE_SIZE); +- size_t init_size = size; +- void **table; +- +- table = kcalloc(nr_tables, sizeof(*table), GFP_KERNEL_ACCOUNT); +- if (!table) +- return NULL; +- +- for (i = 0; i < nr_tables; i++) { +- unsigned int this_size = min_t(size_t, size, PAGE_SIZE); +- +- table[i] = kzalloc(this_size, GFP_KERNEL_ACCOUNT); +- if (!table[i]) { +- io_free_page_table(table, init_size); +- return NULL; +- } +- size -= this_size; +- } +- return table; +-} +- +-static void io_rsrc_node_destroy(struct io_rsrc_node *ref_node) +-{ +- percpu_ref_exit(&ref_node->refs); +- kfree(ref_node); +-} +- +-static void io_rsrc_node_ref_zero(struct percpu_ref *ref) +-{ +- struct io_rsrc_node *node = container_of(ref, struct io_rsrc_node, refs); +- struct io_ring_ctx *ctx = node->rsrc_data->ctx; +- unsigned long flags; +- bool first_add = false; +- +- spin_lock_irqsave(&ctx->rsrc_ref_lock, flags); +- node->done = true; +- +- while (!list_empty(&ctx->rsrc_ref_list)) { +- node = list_first_entry(&ctx->rsrc_ref_list, +- struct io_rsrc_node, node); +- /* recycle ref nodes in order */ +- if (!node->done) +- break; +- list_del(&node->node); +- first_add |= llist_add(&node->llist, &ctx->rsrc_put_llist); +- } +- spin_unlock_irqrestore(&ctx->rsrc_ref_lock, flags); +- +- if (first_add) +- mod_delayed_work(system_wq, &ctx->rsrc_put_work, HZ); +-} +- +-static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx) +-{ +- struct io_rsrc_node *ref_node; +- +- ref_node = kzalloc(sizeof(*ref_node), GFP_KERNEL); +- if (!ref_node) +- return NULL; +- +- if (percpu_ref_init(&ref_node->refs, io_rsrc_node_ref_zero, +- 0, GFP_KERNEL)) { +- kfree(ref_node); +- return NULL; +- } +- INIT_LIST_HEAD(&ref_node->node); +- INIT_LIST_HEAD(&ref_node->rsrc_list); +- ref_node->done = false; +- return ref_node; +-} +- +-static void io_rsrc_node_switch(struct io_ring_ctx *ctx, +- struct io_rsrc_data *data_to_kill) +-{ +- WARN_ON_ONCE(!ctx->rsrc_backup_node); +- WARN_ON_ONCE(data_to_kill && !ctx->rsrc_node); +- +- if (data_to_kill) { +- struct io_rsrc_node *rsrc_node = ctx->rsrc_node; +- +- rsrc_node->rsrc_data = data_to_kill; +- spin_lock_irq(&ctx->rsrc_ref_lock); +- list_add_tail(&rsrc_node->node, &ctx->rsrc_ref_list); +- spin_unlock_irq(&ctx->rsrc_ref_lock); +- +- atomic_inc(&data_to_kill->refs); +- percpu_ref_kill(&rsrc_node->refs); +- ctx->rsrc_node = NULL; +- } +- +- if (!ctx->rsrc_node) { +- ctx->rsrc_node = ctx->rsrc_backup_node; +- ctx->rsrc_backup_node = NULL; +- } +-} +- +-static int io_rsrc_node_switch_start(struct io_ring_ctx *ctx) +-{ +- if (ctx->rsrc_backup_node) +- return 0; +- ctx->rsrc_backup_node = io_rsrc_node_alloc(ctx); +- return ctx->rsrc_backup_node ? 0 : -ENOMEM; +-} +- +-static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, struct io_ring_ctx *ctx) +-{ +- int ret; +- +- /* As we may drop ->uring_lock, other task may have started quiesce */ +- if (data->quiesce) +- return -ENXIO; +- +- data->quiesce = true; +- do { +- ret = io_rsrc_node_switch_start(ctx); +- if (ret) +- break; +- io_rsrc_node_switch(ctx, data); +- +- /* kill initial ref, already quiesced if zero */ +- if (atomic_dec_and_test(&data->refs)) +- break; +- mutex_unlock(&ctx->uring_lock); +- flush_delayed_work(&ctx->rsrc_put_work); +- ret = wait_for_completion_interruptible(&data->done); +- if (!ret) { +- mutex_lock(&ctx->uring_lock); +- break; +- } +- +- atomic_inc(&data->refs); +- /* wait for all works potentially completing data->done */ +- flush_delayed_work(&ctx->rsrc_put_work); +- reinit_completion(&data->done); +- +- ret = io_run_task_work_sig(); +- mutex_lock(&ctx->uring_lock); +- } while (ret >= 0); +- data->quiesce = false; +- +- return ret; +-} +- +-static u64 *io_get_tag_slot(struct io_rsrc_data *data, unsigned int idx) +-{ +- unsigned int off = idx & IO_RSRC_TAG_TABLE_MASK; +- unsigned int table_idx = idx >> IO_RSRC_TAG_TABLE_SHIFT; +- +- return &data->tags[table_idx][off]; +-} +- +-static void io_rsrc_data_free(struct io_rsrc_data *data) +-{ +- size_t size = data->nr * sizeof(data->tags[0][0]); +- +- if (data->tags) +- io_free_page_table((void **)data->tags, size); +- kfree(data); +-} +- +-static int io_rsrc_data_alloc(struct io_ring_ctx *ctx, rsrc_put_fn *do_put, +- u64 __user *utags, unsigned nr, +- struct io_rsrc_data **pdata) +-{ +- struct io_rsrc_data *data; +- int ret = -ENOMEM; +- unsigned i; +- +- data = kzalloc(sizeof(*data), GFP_KERNEL); +- if (!data) +- return -ENOMEM; +- data->tags = (u64 **)io_alloc_page_table(nr * sizeof(data->tags[0][0])); +- if (!data->tags) { +- kfree(data); +- return -ENOMEM; +- } +- +- data->nr = nr; +- data->ctx = ctx; +- data->do_put = do_put; +- if (utags) { +- ret = -EFAULT; +- for (i = 0; i < nr; i++) { +- u64 *tag_slot = io_get_tag_slot(data, i); +- +- if (copy_from_user(tag_slot, &utags[i], +- sizeof(*tag_slot))) +- goto fail; +- } +- } +- +- atomic_set(&data->refs, 1); +- init_completion(&data->done); +- *pdata = data; +- return 0; +-fail: +- io_rsrc_data_free(data); +- return ret; +-} +- +-static bool io_alloc_file_tables(struct io_file_table *table, unsigned nr_files) +-{ +- table->files = kvcalloc(nr_files, sizeof(table->files[0]), +- GFP_KERNEL_ACCOUNT); +- return !!table->files; +-} +- +-static void io_free_file_tables(struct io_file_table *table) +-{ +- kvfree(table->files); +- table->files = NULL; +-} +- +-static void __io_sqe_files_unregister(struct io_ring_ctx *ctx) +-{ +-#if defined(CONFIG_UNIX) +- if (ctx->ring_sock) { +- struct sock *sock = ctx->ring_sock->sk; +- struct sk_buff *skb; +- +- while ((skb = skb_dequeue(&sock->sk_receive_queue)) != NULL) +- kfree_skb(skb); +- } +-#else +- int i; +- +- for (i = 0; i < ctx->nr_user_files; i++) { +- struct file *file; +- +- file = io_file_from_index(ctx, i); +- if (file) +- fput(file); +- } +-#endif +- io_free_file_tables(&ctx->file_table); +- io_rsrc_data_free(ctx->file_data); +- ctx->file_data = NULL; +- ctx->nr_user_files = 0; +-} +- +-static int io_sqe_files_unregister(struct io_ring_ctx *ctx) +-{ +- int ret; +- +- if (!ctx->file_data) +- return -ENXIO; +- ret = io_rsrc_ref_quiesce(ctx->file_data, ctx); +- if (!ret) +- __io_sqe_files_unregister(ctx); +- return ret; +-} +- +-static void io_sq_thread_unpark(struct io_sq_data *sqd) +- __releases(&sqd->lock) +-{ +- WARN_ON_ONCE(sqd->thread == current); +- +- /* +- * Do the dance but not conditional clear_bit() because it'd race with +- * other threads incrementing park_pending and setting the bit. +- */ +- clear_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); +- if (atomic_dec_return(&sqd->park_pending)) +- set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); +- mutex_unlock(&sqd->lock); +-} +- +-static void io_sq_thread_park(struct io_sq_data *sqd) +- __acquires(&sqd->lock) +-{ +- WARN_ON_ONCE(sqd->thread == current); +- +- atomic_inc(&sqd->park_pending); +- set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); +- mutex_lock(&sqd->lock); +- if (sqd->thread) +- wake_up_process(sqd->thread); +-} +- +-static void io_sq_thread_stop(struct io_sq_data *sqd) +-{ +- WARN_ON_ONCE(sqd->thread == current); +- WARN_ON_ONCE(test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state)); +- +- set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); +- mutex_lock(&sqd->lock); +- if (sqd->thread) +- wake_up_process(sqd->thread); +- mutex_unlock(&sqd->lock); +- wait_for_completion(&sqd->exited); +-} +- +-static void io_put_sq_data(struct io_sq_data *sqd) +-{ +- if (refcount_dec_and_test(&sqd->refs)) { +- WARN_ON_ONCE(atomic_read(&sqd->park_pending)); +- +- io_sq_thread_stop(sqd); +- kfree(sqd); +- } +-} +- +-static void io_sq_thread_finish(struct io_ring_ctx *ctx) +-{ +- struct io_sq_data *sqd = ctx->sq_data; +- +- if (sqd) { +- io_sq_thread_park(sqd); +- list_del_init(&ctx->sqd_list); +- io_sqd_update_thread_idle(sqd); +- io_sq_thread_unpark(sqd); +- +- io_put_sq_data(sqd); +- ctx->sq_data = NULL; +- } +-} +- +-static struct io_sq_data *io_attach_sq_data(struct io_uring_params *p) +-{ +- struct io_ring_ctx *ctx_attach; +- struct io_sq_data *sqd; +- struct fd f; +- +- f = fdget(p->wq_fd); +- if (!f.file) +- return ERR_PTR(-ENXIO); +- if (f.file->f_op != &io_uring_fops) { +- fdput(f); +- return ERR_PTR(-EINVAL); +- } +- +- ctx_attach = f.file->private_data; +- sqd = ctx_attach->sq_data; +- if (!sqd) { +- fdput(f); +- return ERR_PTR(-EINVAL); +- } +- if (sqd->task_tgid != current->tgid) { +- fdput(f); +- return ERR_PTR(-EPERM); +- } +- +- refcount_inc(&sqd->refs); +- fdput(f); +- return sqd; +-} +- +-static struct io_sq_data *io_get_sq_data(struct io_uring_params *p, +- bool *attached) +-{ +- struct io_sq_data *sqd; +- +- *attached = false; +- if (p->flags & IORING_SETUP_ATTACH_WQ) { +- sqd = io_attach_sq_data(p); +- if (!IS_ERR(sqd)) { +- *attached = true; +- return sqd; +- } +- /* fall through for EPERM case, setup new sqd/task */ +- if (PTR_ERR(sqd) != -EPERM) +- return sqd; +- } +- +- sqd = kzalloc(sizeof(*sqd), GFP_KERNEL); +- if (!sqd) +- return ERR_PTR(-ENOMEM); +- +- atomic_set(&sqd->park_pending, 0); +- refcount_set(&sqd->refs, 1); +- INIT_LIST_HEAD(&sqd->ctx_list); +- mutex_init(&sqd->lock); +- init_waitqueue_head(&sqd->wait); +- init_completion(&sqd->exited); +- return sqd; +-} +- +-#if defined(CONFIG_UNIX) +-/* +- * Ensure the UNIX gc is aware of our file set, so we are certain that +- * the io_uring can be safely unregistered on process exit, even if we have +- * loops in the file referencing. +- */ +-static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset) +-{ +- struct sock *sk = ctx->ring_sock->sk; +- struct scm_fp_list *fpl; +- struct sk_buff *skb; +- int i, nr_files; +- +- fpl = kzalloc(sizeof(*fpl), GFP_KERNEL); +- if (!fpl) +- return -ENOMEM; +- +- skb = alloc_skb(0, GFP_KERNEL); +- if (!skb) { +- kfree(fpl); +- return -ENOMEM; +- } +- +- skb->sk = sk; +- +- nr_files = 0; +- fpl->user = get_uid(current_user()); +- for (i = 0; i < nr; i++) { +- struct file *file = io_file_from_index(ctx, i + offset); +- +- if (!file) +- continue; +- fpl->fp[nr_files] = get_file(file); +- unix_inflight(fpl->user, fpl->fp[nr_files]); +- nr_files++; +- } +- +- if (nr_files) { +- fpl->max = SCM_MAX_FD; +- fpl->count = nr_files; +- UNIXCB(skb).fp = fpl; +- skb->destructor = unix_destruct_scm; +- refcount_add(skb->truesize, &sk->sk_wmem_alloc); +- skb_queue_head(&sk->sk_receive_queue, skb); +- +- for (i = 0; i < nr_files; i++) +- fput(fpl->fp[i]); +- } else { +- kfree_skb(skb); +- kfree(fpl); +- } +- +- return 0; +-} +- +-/* +- * If UNIX sockets are enabled, fd passing can cause a reference cycle which +- * causes regular reference counting to break down. We rely on the UNIX +- * garbage collection to take care of this problem for us. +- */ +-static int io_sqe_files_scm(struct io_ring_ctx *ctx) +-{ +- unsigned left, total; +- int ret = 0; +- +- total = 0; +- left = ctx->nr_user_files; +- while (left) { +- unsigned this_files = min_t(unsigned, left, SCM_MAX_FD); +- +- ret = __io_sqe_files_scm(ctx, this_files, total); +- if (ret) +- break; +- left -= this_files; +- total += this_files; +- } +- +- if (!ret) +- return 0; +- +- while (total < ctx->nr_user_files) { +- struct file *file = io_file_from_index(ctx, total); +- +- if (file) +- fput(file); +- total++; +- } +- +- return ret; +-} +-#else +-static int io_sqe_files_scm(struct io_ring_ctx *ctx) +-{ +- return 0; +-} +-#endif +- +-static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc) +-{ +- struct file *file = prsrc->file; +-#if defined(CONFIG_UNIX) +- struct sock *sock = ctx->ring_sock->sk; +- struct sk_buff_head list, *head = &sock->sk_receive_queue; +- struct sk_buff *skb; +- int i; +- +- __skb_queue_head_init(&list); +- +- /* +- * Find the skb that holds this file in its SCM_RIGHTS. When found, +- * remove this entry and rearrange the file array. +- */ +- skb = skb_dequeue(head); +- while (skb) { +- struct scm_fp_list *fp; +- +- fp = UNIXCB(skb).fp; +- for (i = 0; i < fp->count; i++) { +- int left; +- +- if (fp->fp[i] != file) +- continue; +- +- unix_notinflight(fp->user, fp->fp[i]); +- left = fp->count - 1 - i; +- if (left) { +- memmove(&fp->fp[i], &fp->fp[i + 1], +- left * sizeof(struct file *)); +- } +- fp->count--; +- if (!fp->count) { +- kfree_skb(skb); +- skb = NULL; +- } else { +- __skb_queue_tail(&list, skb); +- } +- fput(file); +- file = NULL; +- break; +- } +- +- if (!file) +- break; +- +- __skb_queue_tail(&list, skb); +- +- skb = skb_dequeue(head); +- } +- +- if (skb_peek(&list)) { +- spin_lock_irq(&head->lock); +- while ((skb = __skb_dequeue(&list)) != NULL) +- __skb_queue_tail(head, skb); +- spin_unlock_irq(&head->lock); +- } +-#else +- fput(file); +-#endif +-} +- +-static void __io_rsrc_put_work(struct io_rsrc_node *ref_node) +-{ +- struct io_rsrc_data *rsrc_data = ref_node->rsrc_data; +- struct io_ring_ctx *ctx = rsrc_data->ctx; +- struct io_rsrc_put *prsrc, *tmp; +- +- list_for_each_entry_safe(prsrc, tmp, &ref_node->rsrc_list, list) { +- list_del(&prsrc->list); +- +- if (prsrc->tag) { +- bool lock_ring = ctx->flags & IORING_SETUP_IOPOLL; +- +- io_ring_submit_lock(ctx, lock_ring); +- spin_lock(&ctx->completion_lock); +- io_cqring_fill_event(ctx, prsrc->tag, 0, 0); +- ctx->cq_extra++; +- io_commit_cqring(ctx); +- spin_unlock(&ctx->completion_lock); +- io_cqring_ev_posted(ctx); +- io_ring_submit_unlock(ctx, lock_ring); +- } +- +- rsrc_data->do_put(ctx, prsrc); +- kfree(prsrc); +- } +- +- io_rsrc_node_destroy(ref_node); +- if (atomic_dec_and_test(&rsrc_data->refs)) +- complete(&rsrc_data->done); +-} +- +-static void io_rsrc_put_work(struct work_struct *work) +-{ +- struct io_ring_ctx *ctx; +- struct llist_node *node; +- +- ctx = container_of(work, struct io_ring_ctx, rsrc_put_work.work); +- node = llist_del_all(&ctx->rsrc_put_llist); +- +- while (node) { +- struct io_rsrc_node *ref_node; +- struct llist_node *next = node->next; +- +- ref_node = llist_entry(node, struct io_rsrc_node, llist); +- __io_rsrc_put_work(ref_node); +- node = next; +- } +-} +- +-static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, +- unsigned nr_args, u64 __user *tags) +-{ +- __s32 __user *fds = (__s32 __user *) arg; +- struct file *file; +- int fd, ret; +- unsigned i; +- +- if (ctx->file_data) +- return -EBUSY; +- if (!nr_args) +- return -EINVAL; +- if (nr_args > IORING_MAX_FIXED_FILES) +- return -EMFILE; +- if (nr_args > rlimit(RLIMIT_NOFILE)) +- return -EMFILE; +- ret = io_rsrc_node_switch_start(ctx); +- if (ret) +- return ret; +- ret = io_rsrc_data_alloc(ctx, io_rsrc_file_put, tags, nr_args, +- &ctx->file_data); +- if (ret) +- return ret; +- +- ret = -ENOMEM; +- if (!io_alloc_file_tables(&ctx->file_table, nr_args)) +- goto out_free; +- +- for (i = 0; i < nr_args; i++, ctx->nr_user_files++) { +- if (copy_from_user(&fd, &fds[i], sizeof(fd))) { +- ret = -EFAULT; +- goto out_fput; +- } +- /* allow sparse sets */ +- if (fd == -1) { +- ret = -EINVAL; +- if (unlikely(*io_get_tag_slot(ctx->file_data, i))) +- goto out_fput; +- continue; +- } +- +- file = fget(fd); +- ret = -EBADF; +- if (unlikely(!file)) +- goto out_fput; +- +- /* +- * Don't allow io_uring instances to be registered. If UNIX +- * isn't enabled, then this causes a reference cycle and this +- * instance can never get freed. If UNIX is enabled we'll +- * handle it just fine, but there's still no point in allowing +- * a ring fd as it doesn't support regular read/write anyway. +- */ +- if (file->f_op == &io_uring_fops) { +- fput(file); +- goto out_fput; +- } +- io_fixed_file_set(io_fixed_file_slot(&ctx->file_table, i), file); +- } +- +- ret = io_sqe_files_scm(ctx); +- if (ret) { +- __io_sqe_files_unregister(ctx); +- return ret; +- } +- +- io_rsrc_node_switch(ctx, NULL); +- return ret; +-out_fput: +- for (i = 0; i < ctx->nr_user_files; i++) { +- file = io_file_from_index(ctx, i); +- if (file) +- fput(file); +- } +- io_free_file_tables(&ctx->file_table); +- ctx->nr_user_files = 0; +-out_free: +- io_rsrc_data_free(ctx->file_data); +- ctx->file_data = NULL; +- return ret; +-} +- +-static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file, +- int index) +-{ +-#if defined(CONFIG_UNIX) +- struct sock *sock = ctx->ring_sock->sk; +- struct sk_buff_head *head = &sock->sk_receive_queue; +- struct sk_buff *skb; +- +- /* +- * See if we can merge this file into an existing skb SCM_RIGHTS +- * file set. If there's no room, fall back to allocating a new skb +- * and filling it in. +- */ +- spin_lock_irq(&head->lock); +- skb = skb_peek(head); +- if (skb) { +- struct scm_fp_list *fpl = UNIXCB(skb).fp; +- +- if (fpl->count < SCM_MAX_FD) { +- __skb_unlink(skb, head); +- spin_unlock_irq(&head->lock); +- fpl->fp[fpl->count] = get_file(file); +- unix_inflight(fpl->user, fpl->fp[fpl->count]); +- fpl->count++; +- spin_lock_irq(&head->lock); +- __skb_queue_head(head, skb); +- } else { +- skb = NULL; +- } +- } +- spin_unlock_irq(&head->lock); +- +- if (skb) { +- fput(file); +- return 0; +- } +- +- return __io_sqe_files_scm(ctx, 1, index); +-#else +- return 0; +-#endif +-} +- +-static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx, +- struct io_rsrc_node *node, void *rsrc) +-{ +- struct io_rsrc_put *prsrc; +- +- prsrc = kzalloc(sizeof(*prsrc), GFP_KERNEL); +- if (!prsrc) +- return -ENOMEM; +- +- prsrc->tag = *io_get_tag_slot(data, idx); +- prsrc->rsrc = rsrc; +- list_add(&prsrc->list, &node->rsrc_list); +- return 0; +-} +- +-static int io_install_fixed_file(struct io_kiocb *req, struct file *file, +- unsigned int issue_flags, u32 slot_index) +-{ +- struct io_ring_ctx *ctx = req->ctx; +- bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; +- bool needs_switch = false; +- struct io_fixed_file *file_slot; +- int ret = -EBADF; +- +- io_ring_submit_lock(ctx, !force_nonblock); +- if (file->f_op == &io_uring_fops) +- goto err; +- ret = -ENXIO; +- if (!ctx->file_data) +- goto err; +- ret = -EINVAL; +- if (slot_index >= ctx->nr_user_files) +- goto err; +- +- slot_index = array_index_nospec(slot_index, ctx->nr_user_files); +- file_slot = io_fixed_file_slot(&ctx->file_table, slot_index); +- +- if (file_slot->file_ptr) { +- struct file *old_file; +- +- ret = io_rsrc_node_switch_start(ctx); +- if (ret) +- goto err; +- +- old_file = (struct file *)(file_slot->file_ptr & FFS_MASK); +- ret = io_queue_rsrc_removal(ctx->file_data, slot_index, +- ctx->rsrc_node, old_file); +- if (ret) +- goto err; +- file_slot->file_ptr = 0; +- needs_switch = true; +- } +- +- *io_get_tag_slot(ctx->file_data, slot_index) = 0; +- io_fixed_file_set(file_slot, file); +- ret = io_sqe_file_register(ctx, file, slot_index); +- if (ret) { +- file_slot->file_ptr = 0; +- goto err; +- } +- +- ret = 0; +-err: +- if (needs_switch) +- io_rsrc_node_switch(ctx, ctx->file_data); +- io_ring_submit_unlock(ctx, !force_nonblock); +- if (ret) +- fput(file); +- return ret; +-} +- +-static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags) +-{ +- unsigned int offset = req->close.file_slot - 1; +- struct io_ring_ctx *ctx = req->ctx; +- struct io_fixed_file *file_slot; +- struct file *file; +- int ret, i; +- +- io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); +- ret = -ENXIO; +- if (unlikely(!ctx->file_data)) +- goto out; +- ret = -EINVAL; +- if (offset >= ctx->nr_user_files) +- goto out; +- ret = io_rsrc_node_switch_start(ctx); +- if (ret) +- goto out; +- +- i = array_index_nospec(offset, ctx->nr_user_files); +- file_slot = io_fixed_file_slot(&ctx->file_table, i); +- ret = -EBADF; +- if (!file_slot->file_ptr) +- goto out; +- +- file = (struct file *)(file_slot->file_ptr & FFS_MASK); +- ret = io_queue_rsrc_removal(ctx->file_data, offset, ctx->rsrc_node, file); +- if (ret) +- goto out; +- +- file_slot->file_ptr = 0; +- io_rsrc_node_switch(ctx, ctx->file_data); +- ret = 0; +-out: +- io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); +- return ret; +-} +- +-static int __io_sqe_files_update(struct io_ring_ctx *ctx, +- struct io_uring_rsrc_update2 *up, +- unsigned nr_args) +-{ +- u64 __user *tags = u64_to_user_ptr(up->tags); +- __s32 __user *fds = u64_to_user_ptr(up->data); +- struct io_rsrc_data *data = ctx->file_data; +- struct io_fixed_file *file_slot; +- struct file *file; +- int fd, i, err = 0; +- unsigned int done; +- bool needs_switch = false; +- +- if (!ctx->file_data) +- return -ENXIO; +- if (up->offset + nr_args > ctx->nr_user_files) +- return -EINVAL; +- +- for (done = 0; done < nr_args; done++) { +- u64 tag = 0; +- +- if ((tags && copy_from_user(&tag, &tags[done], sizeof(tag))) || +- copy_from_user(&fd, &fds[done], sizeof(fd))) { +- err = -EFAULT; +- break; +- } +- if ((fd == IORING_REGISTER_FILES_SKIP || fd == -1) && tag) { +- err = -EINVAL; +- break; +- } +- if (fd == IORING_REGISTER_FILES_SKIP) +- continue; +- +- i = array_index_nospec(up->offset + done, ctx->nr_user_files); +- file_slot = io_fixed_file_slot(&ctx->file_table, i); +- +- if (file_slot->file_ptr) { +- file = (struct file *)(file_slot->file_ptr & FFS_MASK); +- err = io_queue_rsrc_removal(data, up->offset + done, +- ctx->rsrc_node, file); +- if (err) +- break; +- file_slot->file_ptr = 0; +- needs_switch = true; +- } +- if (fd != -1) { +- file = fget(fd); +- if (!file) { +- err = -EBADF; +- break; +- } +- /* +- * Don't allow io_uring instances to be registered. If +- * UNIX isn't enabled, then this causes a reference +- * cycle and this instance can never get freed. If UNIX +- * is enabled we'll handle it just fine, but there's +- * still no point in allowing a ring fd as it doesn't +- * support regular read/write anyway. +- */ +- if (file->f_op == &io_uring_fops) { +- fput(file); +- err = -EBADF; +- break; +- } +- *io_get_tag_slot(data, up->offset + done) = tag; +- io_fixed_file_set(file_slot, file); +- err = io_sqe_file_register(ctx, file, i); +- if (err) { +- file_slot->file_ptr = 0; +- fput(file); +- break; +- } +- } +- } +- +- if (needs_switch) +- io_rsrc_node_switch(ctx, data); +- return done ? done : err; +-} +- +-static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx, +- struct task_struct *task) +-{ +- struct io_wq_hash *hash; +- struct io_wq_data data; +- unsigned int concurrency; +- +- mutex_lock(&ctx->uring_lock); +- hash = ctx->hash_map; +- if (!hash) { +- hash = kzalloc(sizeof(*hash), GFP_KERNEL); +- if (!hash) { +- mutex_unlock(&ctx->uring_lock); +- return ERR_PTR(-ENOMEM); +- } +- refcount_set(&hash->refs, 1); +- init_waitqueue_head(&hash->wait); +- ctx->hash_map = hash; +- } +- mutex_unlock(&ctx->uring_lock); +- +- data.hash = hash; +- data.task = task; +- data.free_work = io_wq_free_work; +- data.do_work = io_wq_submit_work; +- +- /* Do QD, or 4 * CPUS, whatever is smallest */ +- concurrency = min(ctx->sq_entries, 4 * num_online_cpus()); +- +- return io_wq_create(concurrency, &data); +-} +- +-static int io_uring_alloc_task_context(struct task_struct *task, +- struct io_ring_ctx *ctx) +-{ +- struct io_uring_task *tctx; +- int ret; +- +- tctx = kzalloc(sizeof(*tctx), GFP_KERNEL); +- if (unlikely(!tctx)) +- return -ENOMEM; +- +- ret = percpu_counter_init(&tctx->inflight, 0, GFP_KERNEL); +- if (unlikely(ret)) { +- kfree(tctx); +- return ret; +- } +- +- tctx->io_wq = io_init_wq_offload(ctx, task); +- if (IS_ERR(tctx->io_wq)) { +- ret = PTR_ERR(tctx->io_wq); +- percpu_counter_destroy(&tctx->inflight); +- kfree(tctx); +- return ret; +- } +- +- xa_init(&tctx->xa); +- init_waitqueue_head(&tctx->wait); +- atomic_set(&tctx->in_idle, 0); +- atomic_set(&tctx->inflight_tracked, 0); +- task->io_uring = tctx; +- spin_lock_init(&tctx->task_lock); +- INIT_WQ_LIST(&tctx->task_list); +- init_task_work(&tctx->task_work, tctx_task_work); +- return 0; +-} +- +-void __io_uring_free(struct task_struct *tsk) +-{ +- struct io_uring_task *tctx = tsk->io_uring; +- +- WARN_ON_ONCE(!xa_empty(&tctx->xa)); +- WARN_ON_ONCE(tctx->io_wq); +- WARN_ON_ONCE(tctx->cached_refs); +- +- percpu_counter_destroy(&tctx->inflight); +- kfree(tctx); +- tsk->io_uring = NULL; +-} +- +-static int io_sq_offload_create(struct io_ring_ctx *ctx, +- struct io_uring_params *p) +-{ +- int ret; +- +- /* Retain compatibility with failing for an invalid attach attempt */ +- if ((ctx->flags & (IORING_SETUP_ATTACH_WQ | IORING_SETUP_SQPOLL)) == +- IORING_SETUP_ATTACH_WQ) { +- struct fd f; +- +- f = fdget(p->wq_fd); +- if (!f.file) +- return -ENXIO; +- if (f.file->f_op != &io_uring_fops) { +- fdput(f); +- return -EINVAL; +- } +- fdput(f); +- } +- if (ctx->flags & IORING_SETUP_SQPOLL) { +- struct task_struct *tsk; +- struct io_sq_data *sqd; +- bool attached; +- +- sqd = io_get_sq_data(p, &attached); +- if (IS_ERR(sqd)) { +- ret = PTR_ERR(sqd); +- goto err; +- } +- +- ctx->sq_creds = get_current_cred(); +- ctx->sq_data = sqd; +- ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle); +- if (!ctx->sq_thread_idle) +- ctx->sq_thread_idle = HZ; +- +- io_sq_thread_park(sqd); +- list_add(&ctx->sqd_list, &sqd->ctx_list); +- io_sqd_update_thread_idle(sqd); +- /* don't attach to a dying SQPOLL thread, would be racy */ +- ret = (attached && !sqd->thread) ? -ENXIO : 0; +- io_sq_thread_unpark(sqd); +- +- if (ret < 0) +- goto err; +- if (attached) +- return 0; +- +- if (p->flags & IORING_SETUP_SQ_AFF) { +- int cpu = p->sq_thread_cpu; +- +- ret = -EINVAL; +- if (cpu >= nr_cpu_ids || !cpu_online(cpu)) +- goto err_sqpoll; +- sqd->sq_cpu = cpu; +- } else { +- sqd->sq_cpu = -1; +- } +- +- sqd->task_pid = current->pid; +- sqd->task_tgid = current->tgid; +- tsk = create_io_thread(io_sq_thread, sqd, NUMA_NO_NODE); +- if (IS_ERR(tsk)) { +- ret = PTR_ERR(tsk); +- goto err_sqpoll; +- } +- +- sqd->thread = tsk; +- ret = io_uring_alloc_task_context(tsk, ctx); +- wake_up_new_task(tsk); +- if (ret) +- goto err; +- } else if (p->flags & IORING_SETUP_SQ_AFF) { +- /* Can't have SQ_AFF without SQPOLL */ +- ret = -EINVAL; +- goto err; +- } +- +- return 0; +-err_sqpoll: +- complete(&ctx->sq_data->exited); +-err: +- io_sq_thread_finish(ctx); +- return ret; +-} +- +-static inline void __io_unaccount_mem(struct user_struct *user, +- unsigned long nr_pages) +-{ +- atomic_long_sub(nr_pages, &user->locked_vm); +-} +- +-static inline int __io_account_mem(struct user_struct *user, +- unsigned long nr_pages) +-{ +- unsigned long page_limit, cur_pages, new_pages; +- +- /* Don't allow more pages than we can safely lock */ +- page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; +- +- do { +- cur_pages = atomic_long_read(&user->locked_vm); +- new_pages = cur_pages + nr_pages; +- if (new_pages > page_limit) +- return -ENOMEM; +- } while (atomic_long_cmpxchg(&user->locked_vm, cur_pages, +- new_pages) != cur_pages); +- +- return 0; +-} +- +-static void io_unaccount_mem(struct io_ring_ctx *ctx, unsigned long nr_pages) +-{ +- if (ctx->user) +- __io_unaccount_mem(ctx->user, nr_pages); +- +- if (ctx->mm_account) +- atomic64_sub(nr_pages, &ctx->mm_account->pinned_vm); +-} +- +-static int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages) +-{ +- int ret; +- +- if (ctx->user) { +- ret = __io_account_mem(ctx->user, nr_pages); +- if (ret) +- return ret; +- } +- +- if (ctx->mm_account) +- atomic64_add(nr_pages, &ctx->mm_account->pinned_vm); +- +- return 0; +-} +- +-static void io_mem_free(void *ptr) +-{ +- struct page *page; +- +- if (!ptr) +- return; +- +- page = virt_to_head_page(ptr); +- if (put_page_testzero(page)) +- free_compound_page(page); +-} +- +-static void *io_mem_alloc(size_t size) +-{ +- gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP | +- __GFP_NORETRY | __GFP_ACCOUNT; +- +- return (void *) __get_free_pages(gfp_flags, get_order(size)); +-} +- +-static unsigned long rings_size(unsigned sq_entries, unsigned cq_entries, +- size_t *sq_offset) +-{ +- struct io_rings *rings; +- size_t off, sq_array_size; +- +- off = struct_size(rings, cqes, cq_entries); +- if (off == SIZE_MAX) +- return SIZE_MAX; +- +-#ifdef CONFIG_SMP +- off = ALIGN(off, SMP_CACHE_BYTES); +- if (off == 0) +- return SIZE_MAX; +-#endif +- +- if (sq_offset) +- *sq_offset = off; +- +- sq_array_size = array_size(sizeof(u32), sq_entries); +- if (sq_array_size == SIZE_MAX) +- return SIZE_MAX; +- +- if (check_add_overflow(off, sq_array_size, &off)) +- return SIZE_MAX; +- +- return off; +-} +- +-static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf **slot) +-{ +- struct io_mapped_ubuf *imu = *slot; +- unsigned int i; +- +- if (imu != ctx->dummy_ubuf) { +- for (i = 0; i < imu->nr_bvecs; i++) +- unpin_user_page(imu->bvec[i].bv_page); +- if (imu->acct_pages) +- io_unaccount_mem(ctx, imu->acct_pages); +- kvfree(imu); +- } +- *slot = NULL; +-} +- +-static void io_rsrc_buf_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc) +-{ +- io_buffer_unmap(ctx, &prsrc->buf); +- prsrc->buf = NULL; +-} +- +-static void __io_sqe_buffers_unregister(struct io_ring_ctx *ctx) +-{ +- unsigned int i; +- +- for (i = 0; i < ctx->nr_user_bufs; i++) +- io_buffer_unmap(ctx, &ctx->user_bufs[i]); +- kfree(ctx->user_bufs); +- io_rsrc_data_free(ctx->buf_data); +- ctx->user_bufs = NULL; +- ctx->buf_data = NULL; +- ctx->nr_user_bufs = 0; +-} +- +-static int io_sqe_buffers_unregister(struct io_ring_ctx *ctx) +-{ +- int ret; +- +- if (!ctx->buf_data) +- return -ENXIO; +- +- ret = io_rsrc_ref_quiesce(ctx->buf_data, ctx); +- if (!ret) +- __io_sqe_buffers_unregister(ctx); +- return ret; +-} +- +-static int io_copy_iov(struct io_ring_ctx *ctx, struct iovec *dst, +- void __user *arg, unsigned index) +-{ +- struct iovec __user *src; +- +-#ifdef CONFIG_COMPAT +- if (ctx->compat) { +- struct compat_iovec __user *ciovs; +- struct compat_iovec ciov; +- +- ciovs = (struct compat_iovec __user *) arg; +- if (copy_from_user(&ciov, &ciovs[index], sizeof(ciov))) +- return -EFAULT; +- +- dst->iov_base = u64_to_user_ptr((u64)ciov.iov_base); +- dst->iov_len = ciov.iov_len; +- return 0; +- } +-#endif +- src = (struct iovec __user *) arg; +- if (copy_from_user(dst, &src[index], sizeof(*dst))) +- return -EFAULT; +- return 0; +-} +- +-/* +- * Not super efficient, but this is just a registration time. And we do cache +- * the last compound head, so generally we'll only do a full search if we don't +- * match that one. +- * +- * We check if the given compound head page has already been accounted, to +- * avoid double accounting it. This allows us to account the full size of the +- * page, not just the constituent pages of a huge page. +- */ +-static bool headpage_already_acct(struct io_ring_ctx *ctx, struct page **pages, +- int nr_pages, struct page *hpage) +-{ +- int i, j; +- +- /* check current page array */ +- for (i = 0; i < nr_pages; i++) { +- if (!PageCompound(pages[i])) +- continue; +- if (compound_head(pages[i]) == hpage) +- return true; +- } +- +- /* check previously registered pages */ +- for (i = 0; i < ctx->nr_user_bufs; i++) { +- struct io_mapped_ubuf *imu = ctx->user_bufs[i]; +- +- for (j = 0; j < imu->nr_bvecs; j++) { +- if (!PageCompound(imu->bvec[j].bv_page)) +- continue; +- if (compound_head(imu->bvec[j].bv_page) == hpage) +- return true; +- } +- } +- +- return false; +-} +- +-static int io_buffer_account_pin(struct io_ring_ctx *ctx, struct page **pages, +- int nr_pages, struct io_mapped_ubuf *imu, +- struct page **last_hpage) +-{ +- int i, ret; +- +- imu->acct_pages = 0; +- for (i = 0; i < nr_pages; i++) { +- if (!PageCompound(pages[i])) { +- imu->acct_pages++; +- } else { +- struct page *hpage; +- +- hpage = compound_head(pages[i]); +- if (hpage == *last_hpage) +- continue; +- *last_hpage = hpage; +- if (headpage_already_acct(ctx, pages, i, hpage)) +- continue; +- imu->acct_pages += page_size(hpage) >> PAGE_SHIFT; +- } +- } +- +- if (!imu->acct_pages) +- return 0; +- +- ret = io_account_mem(ctx, imu->acct_pages); +- if (ret) +- imu->acct_pages = 0; +- return ret; +-} +- +-static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, +- struct io_mapped_ubuf **pimu, +- struct page **last_hpage) +-{ +- struct io_mapped_ubuf *imu = NULL; +- struct vm_area_struct **vmas = NULL; +- struct page **pages = NULL; +- unsigned long off, start, end, ubuf; +- size_t size; +- int ret, pret, nr_pages, i; +- +- if (!iov->iov_base) { +- *pimu = ctx->dummy_ubuf; +- return 0; +- } +- +- ubuf = (unsigned long) iov->iov_base; +- end = (ubuf + iov->iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT; +- start = ubuf >> PAGE_SHIFT; +- nr_pages = end - start; +- +- *pimu = NULL; +- ret = -ENOMEM; +- +- pages = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL); +- if (!pages) +- goto done; +- +- vmas = kvmalloc_array(nr_pages, sizeof(struct vm_area_struct *), +- GFP_KERNEL); +- if (!vmas) +- goto done; +- +- imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL); +- if (!imu) +- goto done; +- +- ret = 0; +- mmap_read_lock(current->mm); +- pret = pin_user_pages(ubuf, nr_pages, FOLL_WRITE | FOLL_LONGTERM, +- pages, vmas); +- if (pret == nr_pages) { +- /* don't support file backed memory */ +- for (i = 0; i < nr_pages; i++) { +- struct vm_area_struct *vma = vmas[i]; +- +- if (vma_is_shmem(vma)) +- continue; +- if (vma->vm_file && +- !is_file_hugepages(vma->vm_file)) { +- ret = -EOPNOTSUPP; +- break; +- } +- } +- } else { +- ret = pret < 0 ? pret : -EFAULT; +- } +- mmap_read_unlock(current->mm); +- if (ret) { +- /* +- * if we did partial map, or found file backed vmas, +- * release any pages we did get +- */ +- if (pret > 0) +- unpin_user_pages(pages, pret); +- goto done; +- } +- +- ret = io_buffer_account_pin(ctx, pages, pret, imu, last_hpage); +- if (ret) { +- unpin_user_pages(pages, pret); +- goto done; +- } +- +- off = ubuf & ~PAGE_MASK; +- size = iov->iov_len; +- for (i = 0; i < nr_pages; i++) { +- size_t vec_len; +- +- vec_len = min_t(size_t, size, PAGE_SIZE - off); +- imu->bvec[i].bv_page = pages[i]; +- imu->bvec[i].bv_len = vec_len; +- imu->bvec[i].bv_offset = off; +- off = 0; +- size -= vec_len; +- } +- /* store original address for later verification */ +- imu->ubuf = ubuf; +- imu->ubuf_end = ubuf + iov->iov_len; +- imu->nr_bvecs = nr_pages; +- *pimu = imu; +- ret = 0; +-done: +- if (ret) +- kvfree(imu); +- kvfree(pages); +- kvfree(vmas); +- return ret; +-} +- +-static int io_buffers_map_alloc(struct io_ring_ctx *ctx, unsigned int nr_args) +-{ +- ctx->user_bufs = kcalloc(nr_args, sizeof(*ctx->user_bufs), GFP_KERNEL); +- return ctx->user_bufs ? 0 : -ENOMEM; +-} +- +-static int io_buffer_validate(struct iovec *iov) +-{ +- unsigned long tmp, acct_len = iov->iov_len + (PAGE_SIZE - 1); +- +- /* +- * Don't impose further limits on the size and buffer +- * constraints here, we'll -EINVAL later when IO is +- * submitted if they are wrong. +- */ +- if (!iov->iov_base) +- return iov->iov_len ? -EFAULT : 0; +- if (!iov->iov_len) +- return -EFAULT; +- +- /* arbitrary limit, but we need something */ +- if (iov->iov_len > SZ_1G) +- return -EFAULT; +- +- if (check_add_overflow((unsigned long)iov->iov_base, acct_len, &tmp)) +- return -EOVERFLOW; +- +- return 0; +-} +- +-static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg, +- unsigned int nr_args, u64 __user *tags) +-{ +- struct page *last_hpage = NULL; +- struct io_rsrc_data *data; +- int i, ret; +- struct iovec iov; +- +- if (ctx->user_bufs) +- return -EBUSY; +- if (!nr_args || nr_args > IORING_MAX_REG_BUFFERS) +- return -EINVAL; +- ret = io_rsrc_node_switch_start(ctx); +- if (ret) +- return ret; +- ret = io_rsrc_data_alloc(ctx, io_rsrc_buf_put, tags, nr_args, &data); +- if (ret) +- return ret; +- ret = io_buffers_map_alloc(ctx, nr_args); +- if (ret) { +- io_rsrc_data_free(data); +- return ret; +- } +- +- for (i = 0; i < nr_args; i++, ctx->nr_user_bufs++) { +- ret = io_copy_iov(ctx, &iov, arg, i); +- if (ret) +- break; +- ret = io_buffer_validate(&iov); +- if (ret) +- break; +- if (!iov.iov_base && *io_get_tag_slot(data, i)) { +- ret = -EINVAL; +- break; +- } +- +- ret = io_sqe_buffer_register(ctx, &iov, &ctx->user_bufs[i], +- &last_hpage); +- if (ret) +- break; +- } +- +- WARN_ON_ONCE(ctx->buf_data); +- +- ctx->buf_data = data; +- if (ret) +- __io_sqe_buffers_unregister(ctx); +- else +- io_rsrc_node_switch(ctx, NULL); +- return ret; +-} +- +-static int __io_sqe_buffers_update(struct io_ring_ctx *ctx, +- struct io_uring_rsrc_update2 *up, +- unsigned int nr_args) +-{ +- u64 __user *tags = u64_to_user_ptr(up->tags); +- struct iovec iov, __user *iovs = u64_to_user_ptr(up->data); +- struct page *last_hpage = NULL; +- bool needs_switch = false; +- __u32 done; +- int i, err; +- +- if (!ctx->buf_data) +- return -ENXIO; +- if (up->offset + nr_args > ctx->nr_user_bufs) +- return -EINVAL; +- +- for (done = 0; done < nr_args; done++) { +- struct io_mapped_ubuf *imu; +- int offset = up->offset + done; +- u64 tag = 0; +- +- err = io_copy_iov(ctx, &iov, iovs, done); +- if (err) +- break; +- if (tags && copy_from_user(&tag, &tags[done], sizeof(tag))) { +- err = -EFAULT; +- break; +- } +- err = io_buffer_validate(&iov); +- if (err) +- break; +- if (!iov.iov_base && tag) { +- err = -EINVAL; +- break; +- } +- err = io_sqe_buffer_register(ctx, &iov, &imu, &last_hpage); +- if (err) +- break; +- +- i = array_index_nospec(offset, ctx->nr_user_bufs); +- if (ctx->user_bufs[i] != ctx->dummy_ubuf) { +- err = io_queue_rsrc_removal(ctx->buf_data, offset, +- ctx->rsrc_node, ctx->user_bufs[i]); +- if (unlikely(err)) { +- io_buffer_unmap(ctx, &imu); +- break; +- } +- ctx->user_bufs[i] = NULL; +- needs_switch = true; +- } +- +- ctx->user_bufs[i] = imu; +- *io_get_tag_slot(ctx->buf_data, offset) = tag; +- } +- +- if (needs_switch) +- io_rsrc_node_switch(ctx, ctx->buf_data); +- return done ? done : err; +-} +- +-static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg) +-{ +- __s32 __user *fds = arg; +- int fd; +- +- if (ctx->cq_ev_fd) +- return -EBUSY; +- +- if (copy_from_user(&fd, fds, sizeof(*fds))) +- return -EFAULT; +- +- ctx->cq_ev_fd = eventfd_ctx_fdget(fd); +- if (IS_ERR(ctx->cq_ev_fd)) { +- int ret = PTR_ERR(ctx->cq_ev_fd); +- +- ctx->cq_ev_fd = NULL; +- return ret; +- } +- +- return 0; +-} +- +-static int io_eventfd_unregister(struct io_ring_ctx *ctx) +-{ +- if (ctx->cq_ev_fd) { +- eventfd_ctx_put(ctx->cq_ev_fd); +- ctx->cq_ev_fd = NULL; +- return 0; +- } +- +- return -ENXIO; +-} +- +-static void io_destroy_buffers(struct io_ring_ctx *ctx) +-{ +- struct io_buffer *buf; +- unsigned long index; +- +- xa_for_each(&ctx->io_buffers, index, buf) { +- __io_remove_buffers(ctx, buf, index, -1U); +- cond_resched(); +- } +-} +- +-static void io_req_cache_free(struct list_head *list) +-{ +- struct io_kiocb *req, *nxt; +- +- list_for_each_entry_safe(req, nxt, list, inflight_entry) { +- list_del(&req->inflight_entry); +- kmem_cache_free(req_cachep, req); +- } +-} +- +-static void io_req_caches_free(struct io_ring_ctx *ctx) +-{ +- struct io_submit_state *state = &ctx->submit_state; +- +- mutex_lock(&ctx->uring_lock); +- +- if (state->free_reqs) { +- kmem_cache_free_bulk(req_cachep, state->free_reqs, state->reqs); +- state->free_reqs = 0; +- } +- +- io_flush_cached_locked_reqs(ctx, state); +- io_req_cache_free(&state->free_list); +- mutex_unlock(&ctx->uring_lock); +-} +- +-static void io_wait_rsrc_data(struct io_rsrc_data *data) +-{ +- if (data && !atomic_dec_and_test(&data->refs)) +- wait_for_completion(&data->done); +-} +- +-static void io_ring_ctx_free(struct io_ring_ctx *ctx) +-{ +- io_sq_thread_finish(ctx); +- +- if (ctx->mm_account) { +- mmdrop(ctx->mm_account); +- ctx->mm_account = NULL; +- } +- +- /* __io_rsrc_put_work() may need uring_lock to progress, wait w/o it */ +- io_wait_rsrc_data(ctx->buf_data); +- io_wait_rsrc_data(ctx->file_data); +- +- mutex_lock(&ctx->uring_lock); +- if (ctx->buf_data) +- __io_sqe_buffers_unregister(ctx); +- if (ctx->file_data) +- __io_sqe_files_unregister(ctx); +- if (ctx->rings) +- __io_cqring_overflow_flush(ctx, true); +- mutex_unlock(&ctx->uring_lock); +- io_eventfd_unregister(ctx); +- io_destroy_buffers(ctx); +- if (ctx->sq_creds) +- put_cred(ctx->sq_creds); +- +- /* there are no registered resources left, nobody uses it */ +- if (ctx->rsrc_node) +- io_rsrc_node_destroy(ctx->rsrc_node); +- if (ctx->rsrc_backup_node) +- io_rsrc_node_destroy(ctx->rsrc_backup_node); +- flush_delayed_work(&ctx->rsrc_put_work); +- +- WARN_ON_ONCE(!list_empty(&ctx->rsrc_ref_list)); +- WARN_ON_ONCE(!llist_empty(&ctx->rsrc_put_llist)); +- +-#if defined(CONFIG_UNIX) +- if (ctx->ring_sock) { +- ctx->ring_sock->file = NULL; /* so that iput() is called */ +- sock_release(ctx->ring_sock); +- } +-#endif +- WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list)); +- +- io_mem_free(ctx->rings); +- io_mem_free(ctx->sq_sqes); +- +- percpu_ref_exit(&ctx->refs); +- free_uid(ctx->user); +- io_req_caches_free(ctx); +- if (ctx->hash_map) +- io_wq_put_hash(ctx->hash_map); +- kfree(ctx->cancel_hash); +- kfree(ctx->dummy_ubuf); +- kfree(ctx); +-} +- +-static __poll_t io_uring_poll(struct file *file, poll_table *wait) +-{ +- struct io_ring_ctx *ctx = file->private_data; +- __poll_t mask = 0; +- +- poll_wait(file, &ctx->poll_wait, wait); +- /* +- * synchronizes with barrier from wq_has_sleeper call in +- * io_commit_cqring +- */ +- smp_rmb(); +- if (!io_sqring_full(ctx)) +- mask |= EPOLLOUT | EPOLLWRNORM; +- +- /* +- * Don't flush cqring overflow list here, just do a simple check. +- * Otherwise there could possible be ABBA deadlock: +- * CPU0 CPU1 +- * ---- ---- +- * lock(&ctx->uring_lock); +- * lock(&ep->mtx); +- * lock(&ctx->uring_lock); +- * lock(&ep->mtx); +- * +- * Users may get EPOLLIN meanwhile seeing nothing in cqring, this +- * pushs them to do the flush. +- */ +- if (io_cqring_events(ctx) || test_bit(0, &ctx->check_cq_overflow)) +- mask |= EPOLLIN | EPOLLRDNORM; +- +- return mask; +-} +- +-static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id) +-{ +- const struct cred *creds; +- +- creds = xa_erase(&ctx->personalities, id); +- if (creds) { +- put_cred(creds); +- return 0; +- } +- +- return -EINVAL; +-} +- +-struct io_tctx_exit { +- struct callback_head task_work; +- struct completion completion; +- struct io_ring_ctx *ctx; +-}; +- +-static void io_tctx_exit_cb(struct callback_head *cb) +-{ +- struct io_uring_task *tctx = current->io_uring; +- struct io_tctx_exit *work; +- +- work = container_of(cb, struct io_tctx_exit, task_work); +- /* +- * When @in_idle, we're in cancellation and it's racy to remove the +- * node. It'll be removed by the end of cancellation, just ignore it. +- */ +- if (!atomic_read(&tctx->in_idle)) +- io_uring_del_tctx_node((unsigned long)work->ctx); +- complete(&work->completion); +-} +- +-static bool io_cancel_ctx_cb(struct io_wq_work *work, void *data) +-{ +- struct io_kiocb *req = container_of(work, struct io_kiocb, work); +- +- return req->ctx == data; +-} +- +-static void io_ring_exit_work(struct work_struct *work) +-{ +- struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx, exit_work); +- unsigned long timeout = jiffies + HZ * 60 * 5; +- unsigned long interval = HZ / 20; +- struct io_tctx_exit exit; +- struct io_tctx_node *node; +- int ret; +- +- /* +- * If we're doing polled IO and end up having requests being +- * submitted async (out-of-line), then completions can come in while +- * we're waiting for refs to drop. We need to reap these manually, +- * as nobody else will be looking for them. +- */ +- do { +- io_uring_try_cancel_requests(ctx, NULL, true); +- if (ctx->sq_data) { +- struct io_sq_data *sqd = ctx->sq_data; +- struct task_struct *tsk; +- +- io_sq_thread_park(sqd); +- tsk = sqd->thread; +- if (tsk && tsk->io_uring && tsk->io_uring->io_wq) +- io_wq_cancel_cb(tsk->io_uring->io_wq, +- io_cancel_ctx_cb, ctx, true); +- io_sq_thread_unpark(sqd); +- } +- +- if (WARN_ON_ONCE(time_after(jiffies, timeout))) { +- /* there is little hope left, don't run it too often */ +- interval = HZ * 60; +- } +- } while (!wait_for_completion_timeout(&ctx->ref_comp, interval)); +- +- init_completion(&exit.completion); +- init_task_work(&exit.task_work, io_tctx_exit_cb); +- exit.ctx = ctx; +- /* +- * Some may use context even when all refs and requests have been put, +- * and they are free to do so while still holding uring_lock or +- * completion_lock, see io_req_task_submit(). Apart from other work, +- * this lock/unlock section also waits them to finish. +- */ +- mutex_lock(&ctx->uring_lock); +- while (!list_empty(&ctx->tctx_list)) { +- WARN_ON_ONCE(time_after(jiffies, timeout)); +- +- node = list_first_entry(&ctx->tctx_list, struct io_tctx_node, +- ctx_node); +- /* don't spin on a single task if cancellation failed */ +- list_rotate_left(&ctx->tctx_list); +- ret = task_work_add(node->task, &exit.task_work, TWA_SIGNAL); +- if (WARN_ON_ONCE(ret)) +- continue; +- wake_up_process(node->task); +- +- mutex_unlock(&ctx->uring_lock); +- wait_for_completion(&exit.completion); +- mutex_lock(&ctx->uring_lock); +- } +- mutex_unlock(&ctx->uring_lock); +- spin_lock(&ctx->completion_lock); +- spin_unlock(&ctx->completion_lock); +- +- io_ring_ctx_free(ctx); +-} +- +-/* Returns true if we found and killed one or more timeouts */ +-static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk, +- bool cancel_all) +-{ +- struct io_kiocb *req, *tmp; +- int canceled = 0; +- +- spin_lock(&ctx->completion_lock); +- spin_lock_irq(&ctx->timeout_lock); +- list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) { +- if (io_match_task(req, tsk, cancel_all)) { +- io_kill_timeout(req, -ECANCELED); +- canceled++; +- } +- } +- spin_unlock_irq(&ctx->timeout_lock); +- if (canceled != 0) +- io_commit_cqring(ctx); +- spin_unlock(&ctx->completion_lock); +- if (canceled != 0) +- io_cqring_ev_posted(ctx); +- return canceled != 0; +-} +- +-static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) +-{ +- unsigned long index; +- struct creds *creds; +- +- mutex_lock(&ctx->uring_lock); +- percpu_ref_kill(&ctx->refs); +- if (ctx->rings) +- __io_cqring_overflow_flush(ctx, true); +- xa_for_each(&ctx->personalities, index, creds) +- io_unregister_personality(ctx, index); +- mutex_unlock(&ctx->uring_lock); +- +- io_kill_timeouts(ctx, NULL, true); +- io_poll_remove_all(ctx, NULL, true); +- +- /* if we failed setting up the ctx, we might not have any rings */ +- io_iopoll_try_reap_events(ctx); +- +- INIT_WORK(&ctx->exit_work, io_ring_exit_work); +- /* +- * Use system_unbound_wq to avoid spawning tons of event kworkers +- * if we're exiting a ton of rings at the same time. It just adds +- * noise and overhead, there's no discernable change in runtime +- * over using system_wq. +- */ +- queue_work(system_unbound_wq, &ctx->exit_work); +-} +- +-static int io_uring_release(struct inode *inode, struct file *file) +-{ +- struct io_ring_ctx *ctx = file->private_data; +- +- file->private_data = NULL; +- io_ring_ctx_wait_and_kill(ctx); +- return 0; +-} +- +-struct io_task_cancel { +- struct task_struct *task; +- bool all; +-}; +- +-static bool io_cancel_task_cb(struct io_wq_work *work, void *data) +-{ +- struct io_kiocb *req = container_of(work, struct io_kiocb, work); +- struct io_task_cancel *cancel = data; +- bool ret; +- +- if (!cancel->all && (req->flags & REQ_F_LINK_TIMEOUT)) { +- struct io_ring_ctx *ctx = req->ctx; +- +- /* protect against races with linked timeouts */ +- spin_lock(&ctx->completion_lock); +- ret = io_match_task(req, cancel->task, cancel->all); +- spin_unlock(&ctx->completion_lock); +- } else { +- ret = io_match_task(req, cancel->task, cancel->all); +- } +- return ret; +-} +- +-static bool io_cancel_defer_files(struct io_ring_ctx *ctx, +- struct task_struct *task, bool cancel_all) +-{ +- struct io_defer_entry *de; +- LIST_HEAD(list); +- +- spin_lock(&ctx->completion_lock); +- list_for_each_entry_reverse(de, &ctx->defer_list, list) { +- if (io_match_task(de->req, task, cancel_all)) { +- list_cut_position(&list, &ctx->defer_list, &de->list); +- break; +- } +- } +- spin_unlock(&ctx->completion_lock); +- if (list_empty(&list)) +- return false; +- +- while (!list_empty(&list)) { +- de = list_first_entry(&list, struct io_defer_entry, list); +- list_del_init(&de->list); +- io_req_complete_failed(de->req, -ECANCELED); +- kfree(de); +- } +- return true; +-} +- +-static bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx) +-{ +- struct io_tctx_node *node; +- enum io_wq_cancel cret; +- bool ret = false; +- +- mutex_lock(&ctx->uring_lock); +- list_for_each_entry(node, &ctx->tctx_list, ctx_node) { +- struct io_uring_task *tctx = node->task->io_uring; +- +- /* +- * io_wq will stay alive while we hold uring_lock, because it's +- * killed after ctx nodes, which requires to take the lock. +- */ +- if (!tctx || !tctx->io_wq) +- continue; +- cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_ctx_cb, ctx, true); +- ret |= (cret != IO_WQ_CANCEL_NOTFOUND); +- } +- mutex_unlock(&ctx->uring_lock); +- +- return ret; +-} +- +-static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx, +- struct task_struct *task, +- bool cancel_all) +-{ +- struct io_task_cancel cancel = { .task = task, .all = cancel_all, }; +- struct io_uring_task *tctx = task ? task->io_uring : NULL; +- +- while (1) { +- enum io_wq_cancel cret; +- bool ret = false; +- +- if (!task) { +- ret |= io_uring_try_cancel_iowq(ctx); +- } else if (tctx && tctx->io_wq) { +- /* +- * Cancels requests of all rings, not only @ctx, but +- * it's fine as the task is in exit/exec. +- */ +- cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_task_cb, +- &cancel, true); +- ret |= (cret != IO_WQ_CANCEL_NOTFOUND); +- } +- +- /* SQPOLL thread does its own polling */ +- if ((!(ctx->flags & IORING_SETUP_SQPOLL) && cancel_all) || +- (ctx->sq_data && ctx->sq_data->thread == current)) { +- while (!list_empty_careful(&ctx->iopoll_list)) { +- io_iopoll_try_reap_events(ctx); +- ret = true; +- } +- } +- +- ret |= io_cancel_defer_files(ctx, task, cancel_all); +- ret |= io_poll_remove_all(ctx, task, cancel_all); +- ret |= io_kill_timeouts(ctx, task, cancel_all); +- if (task) +- ret |= io_run_task_work(); +- if (!ret) +- break; +- cond_resched(); +- } +-} +- +-static int __io_uring_add_tctx_node(struct io_ring_ctx *ctx) +-{ +- struct io_uring_task *tctx = current->io_uring; +- struct io_tctx_node *node; +- int ret; +- +- if (unlikely(!tctx)) { +- ret = io_uring_alloc_task_context(current, ctx); +- if (unlikely(ret)) +- return ret; +- +- tctx = current->io_uring; +- if (ctx->iowq_limits_set) { +- unsigned int limits[2] = { ctx->iowq_limits[0], +- ctx->iowq_limits[1], }; +- +- ret = io_wq_max_workers(tctx->io_wq, limits); +- if (ret) +- return ret; +- } +- } +- if (!xa_load(&tctx->xa, (unsigned long)ctx)) { +- node = kmalloc(sizeof(*node), GFP_KERNEL); +- if (!node) +- return -ENOMEM; +- node->ctx = ctx; +- node->task = current; +- +- ret = xa_err(xa_store(&tctx->xa, (unsigned long)ctx, +- node, GFP_KERNEL)); +- if (ret) { +- kfree(node); +- return ret; +- } +- +- mutex_lock(&ctx->uring_lock); +- list_add(&node->ctx_node, &ctx->tctx_list); +- mutex_unlock(&ctx->uring_lock); +- } +- tctx->last = ctx; +- return 0; +-} +- +-/* +- * Note that this task has used io_uring. We use it for cancelation purposes. +- */ +-static inline int io_uring_add_tctx_node(struct io_ring_ctx *ctx) +-{ +- struct io_uring_task *tctx = current->io_uring; +- +- if (likely(tctx && tctx->last == ctx)) +- return 0; +- return __io_uring_add_tctx_node(ctx); +-} +- +-/* +- * Remove this io_uring_file -> task mapping. +- */ +-static void io_uring_del_tctx_node(unsigned long index) +-{ +- struct io_uring_task *tctx = current->io_uring; +- struct io_tctx_node *node; +- +- if (!tctx) +- return; +- node = xa_erase(&tctx->xa, index); +- if (!node) +- return; +- +- WARN_ON_ONCE(current != node->task); +- WARN_ON_ONCE(list_empty(&node->ctx_node)); +- +- mutex_lock(&node->ctx->uring_lock); +- list_del(&node->ctx_node); +- mutex_unlock(&node->ctx->uring_lock); +- +- if (tctx->last == node->ctx) +- tctx->last = NULL; +- kfree(node); +-} +- +-static void io_uring_clean_tctx(struct io_uring_task *tctx) +-{ +- struct io_wq *wq = tctx->io_wq; +- struct io_tctx_node *node; +- unsigned long index; +- +- xa_for_each(&tctx->xa, index, node) { +- io_uring_del_tctx_node(index); +- cond_resched(); +- } +- if (wq) { +- /* +- * Must be after io_uring_del_task_file() (removes nodes under +- * uring_lock) to avoid race with io_uring_try_cancel_iowq(). +- */ +- io_wq_put_and_exit(wq); +- tctx->io_wq = NULL; +- } +-} +- +-static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked) +-{ +- if (tracked) +- return atomic_read(&tctx->inflight_tracked); +- return percpu_counter_sum(&tctx->inflight); +-} +- +-static void io_uring_drop_tctx_refs(struct task_struct *task) +-{ +- struct io_uring_task *tctx = task->io_uring; +- unsigned int refs = tctx->cached_refs; +- +- if (refs) { +- tctx->cached_refs = 0; +- percpu_counter_sub(&tctx->inflight, refs); +- put_task_struct_many(task, refs); +- } +-} +- +-/* +- * Find any io_uring ctx that this task has registered or done IO on, and cancel +- * requests. @sqd should be not-null IIF it's an SQPOLL thread cancellation. +- */ +-static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd) +-{ +- struct io_uring_task *tctx = current->io_uring; +- struct io_ring_ctx *ctx; +- s64 inflight; +- DEFINE_WAIT(wait); +- +- WARN_ON_ONCE(sqd && sqd->thread != current); +- +- if (!current->io_uring) +- return; +- if (tctx->io_wq) +- io_wq_exit_start(tctx->io_wq); +- +- atomic_inc(&tctx->in_idle); +- do { +- io_uring_drop_tctx_refs(current); +- /* read completions before cancelations */ +- inflight = tctx_inflight(tctx, !cancel_all); +- if (!inflight) +- break; +- +- if (!sqd) { +- struct io_tctx_node *node; +- unsigned long index; +- +- xa_for_each(&tctx->xa, index, node) { +- /* sqpoll task will cancel all its requests */ +- if (node->ctx->sq_data) +- continue; +- io_uring_try_cancel_requests(node->ctx, current, +- cancel_all); +- } +- } else { +- list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) +- io_uring_try_cancel_requests(ctx, current, +- cancel_all); +- } +- +- prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE); +- io_uring_drop_tctx_refs(current); +- /* +- * If we've seen completions, retry without waiting. This +- * avoids a race where a completion comes in before we did +- * prepare_to_wait(). +- */ +- if (inflight == tctx_inflight(tctx, !cancel_all)) +- schedule(); +- finish_wait(&tctx->wait, &wait); +- } while (1); +- atomic_dec(&tctx->in_idle); +- +- io_uring_clean_tctx(tctx); +- if (cancel_all) { +- /* for exec all current's requests should be gone, kill tctx */ +- __io_uring_free(current); +- } +-} +- +-void __io_uring_cancel(bool cancel_all) +-{ +- io_uring_cancel_generic(cancel_all, NULL); +-} +- +-static void *io_uring_validate_mmap_request(struct file *file, +- loff_t pgoff, size_t sz) +-{ +- struct io_ring_ctx *ctx = file->private_data; +- loff_t offset = pgoff << PAGE_SHIFT; +- struct page *page; +- void *ptr; +- +- switch (offset) { +- case IORING_OFF_SQ_RING: +- case IORING_OFF_CQ_RING: +- ptr = ctx->rings; +- break; +- case IORING_OFF_SQES: +- ptr = ctx->sq_sqes; +- break; +- default: +- return ERR_PTR(-EINVAL); +- } +- +- page = virt_to_head_page(ptr); +- if (sz > page_size(page)) +- return ERR_PTR(-EINVAL); +- +- return ptr; +-} +- +-#ifdef CONFIG_MMU +- +-static int io_uring_mmap(struct file *file, struct vm_area_struct *vma) +-{ +- size_t sz = vma->vm_end - vma->vm_start; +- unsigned long pfn; +- void *ptr; +- +- ptr = io_uring_validate_mmap_request(file, vma->vm_pgoff, sz); +- if (IS_ERR(ptr)) +- return PTR_ERR(ptr); +- +- pfn = virt_to_phys(ptr) >> PAGE_SHIFT; +- return remap_pfn_range(vma, vma->vm_start, pfn, sz, vma->vm_page_prot); +-} +- +-#else /* !CONFIG_MMU */ +- +-static int io_uring_mmap(struct file *file, struct vm_area_struct *vma) +-{ +- return vma->vm_flags & (VM_SHARED | VM_MAYSHARE) ? 0 : -EINVAL; +-} +- +-static unsigned int io_uring_nommu_mmap_capabilities(struct file *file) +-{ +- return NOMMU_MAP_DIRECT | NOMMU_MAP_READ | NOMMU_MAP_WRITE; +-} +- +-static unsigned long io_uring_nommu_get_unmapped_area(struct file *file, +- unsigned long addr, unsigned long len, +- unsigned long pgoff, unsigned long flags) +-{ +- void *ptr; +- +- ptr = io_uring_validate_mmap_request(file, pgoff, len); +- if (IS_ERR(ptr)) +- return PTR_ERR(ptr); +- +- return (unsigned long) ptr; +-} +- +-#endif /* !CONFIG_MMU */ +- +-static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx) +-{ +- DEFINE_WAIT(wait); +- +- do { +- if (!io_sqring_full(ctx)) +- break; +- prepare_to_wait(&ctx->sqo_sq_wait, &wait, TASK_INTERRUPTIBLE); +- +- if (!io_sqring_full(ctx)) +- break; +- schedule(); +- } while (!signal_pending(current)); +- +- finish_wait(&ctx->sqo_sq_wait, &wait); +- return 0; +-} +- +-static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz, +- struct __kernel_timespec __user **ts, +- const sigset_t __user **sig) +-{ +- struct io_uring_getevents_arg arg; +- +- /* +- * If EXT_ARG isn't set, then we have no timespec and the argp pointer +- * is just a pointer to the sigset_t. +- */ +- if (!(flags & IORING_ENTER_EXT_ARG)) { +- *sig = (const sigset_t __user *) argp; +- *ts = NULL; +- return 0; +- } +- +- /* +- * EXT_ARG is set - ensure we agree on the size of it and copy in our +- * timespec and sigset_t pointers if good. +- */ +- if (*argsz != sizeof(arg)) +- return -EINVAL; +- if (copy_from_user(&arg, argp, sizeof(arg))) +- return -EFAULT; +- *sig = u64_to_user_ptr(arg.sigmask); +- *argsz = arg.sigmask_sz; +- *ts = u64_to_user_ptr(arg.ts); +- return 0; +-} +- +-SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, +- u32, min_complete, u32, flags, const void __user *, argp, +- size_t, argsz) +-{ +- struct io_ring_ctx *ctx; +- int submitted = 0; +- struct fd f; +- long ret; +- +- io_run_task_work(); +- +- if (unlikely(flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP | +- IORING_ENTER_SQ_WAIT | IORING_ENTER_EXT_ARG))) +- return -EINVAL; +- +- f = fdget(fd); +- if (unlikely(!f.file)) +- return -EBADF; +- +- ret = -EOPNOTSUPP; +- if (unlikely(f.file->f_op != &io_uring_fops)) +- goto out_fput; +- +- ret = -ENXIO; +- ctx = f.file->private_data; +- if (unlikely(!percpu_ref_tryget(&ctx->refs))) +- goto out_fput; +- +- ret = -EBADFD; +- if (unlikely(ctx->flags & IORING_SETUP_R_DISABLED)) +- goto out; +- +- /* +- * For SQ polling, the thread will do all submissions and completions. +- * Just return the requested submit count, and wake the thread if +- * we were asked to. +- */ +- ret = 0; +- if (ctx->flags & IORING_SETUP_SQPOLL) { +- io_cqring_overflow_flush(ctx); +- +- if (unlikely(ctx->sq_data->thread == NULL)) { +- ret = -EOWNERDEAD; +- goto out; +- } +- if (flags & IORING_ENTER_SQ_WAKEUP) +- wake_up(&ctx->sq_data->wait); +- if (flags & IORING_ENTER_SQ_WAIT) { +- ret = io_sqpoll_wait_sq(ctx); +- if (ret) +- goto out; +- } +- submitted = to_submit; +- } else if (to_submit) { +- ret = io_uring_add_tctx_node(ctx); +- if (unlikely(ret)) +- goto out; +- mutex_lock(&ctx->uring_lock); +- submitted = io_submit_sqes(ctx, to_submit); +- mutex_unlock(&ctx->uring_lock); +- +- if (submitted != to_submit) +- goto out; +- } +- if (flags & IORING_ENTER_GETEVENTS) { +- const sigset_t __user *sig; +- struct __kernel_timespec __user *ts; +- +- ret = io_get_ext_arg(flags, argp, &argsz, &ts, &sig); +- if (unlikely(ret)) +- goto out; +- +- min_complete = min(min_complete, ctx->cq_entries); +- +- /* +- * When SETUP_IOPOLL and SETUP_SQPOLL are both enabled, user +- * space applications don't need to do io completion events +- * polling again, they can rely on io_sq_thread to do polling +- * work, which can reduce cpu usage and uring_lock contention. +- */ +- if (ctx->flags & IORING_SETUP_IOPOLL && +- !(ctx->flags & IORING_SETUP_SQPOLL)) { +- ret = io_iopoll_check(ctx, min_complete); +- } else { +- ret = io_cqring_wait(ctx, min_complete, sig, argsz, ts); +- } +- } +- +-out: +- percpu_ref_put(&ctx->refs); +-out_fput: +- fdput(f); +- return submitted ? submitted : ret; +-} +- +-#ifdef CONFIG_PROC_FS +-static int io_uring_show_cred(struct seq_file *m, unsigned int id, +- const struct cred *cred) +-{ +- struct user_namespace *uns = seq_user_ns(m); +- struct group_info *gi; +- kernel_cap_t cap; +- unsigned __capi; +- int g; +- +- seq_printf(m, "%5d\n", id); +- seq_put_decimal_ull(m, "\tUid:\t", from_kuid_munged(uns, cred->uid)); +- seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->euid)); +- seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->suid)); +- seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->fsuid)); +- seq_put_decimal_ull(m, "\n\tGid:\t", from_kgid_munged(uns, cred->gid)); +- seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->egid)); +- seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->sgid)); +- seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->fsgid)); +- seq_puts(m, "\n\tGroups:\t"); +- gi = cred->group_info; +- for (g = 0; g < gi->ngroups; g++) { +- seq_put_decimal_ull(m, g ? " " : "", +- from_kgid_munged(uns, gi->gid[g])); +- } +- seq_puts(m, "\n\tCapEff:\t"); +- cap = cred->cap_effective; +- CAP_FOR_EACH_U32(__capi) +- seq_put_hex_ll(m, NULL, cap.cap[CAP_LAST_U32 - __capi], 8); +- seq_putc(m, '\n'); +- return 0; +-} +- +-static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) +-{ +- struct io_sq_data *sq = NULL; +- bool has_lock; +- int i; +- +- /* +- * Avoid ABBA deadlock between the seq lock and the io_uring mutex, +- * since fdinfo case grabs it in the opposite direction of normal use +- * cases. If we fail to get the lock, we just don't iterate any +- * structures that could be going away outside the io_uring mutex. +- */ +- has_lock = mutex_trylock(&ctx->uring_lock); +- +- if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) { +- sq = ctx->sq_data; +- if (!sq->thread) +- sq = NULL; +- } +- +- seq_printf(m, "SqThread:\t%d\n", sq ? task_pid_nr(sq->thread) : -1); +- seq_printf(m, "SqThreadCpu:\t%d\n", sq ? task_cpu(sq->thread) : -1); +- seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files); +- for (i = 0; has_lock && i < ctx->nr_user_files; i++) { +- struct file *f = io_file_from_index(ctx, i); +- +- if (f) +- seq_printf(m, "%5u: %s\n", i, file_dentry(f)->d_iname); +- else +- seq_printf(m, "%5u: <none>\n", i); +- } +- seq_printf(m, "UserBufs:\t%u\n", ctx->nr_user_bufs); +- for (i = 0; has_lock && i < ctx->nr_user_bufs; i++) { +- struct io_mapped_ubuf *buf = ctx->user_bufs[i]; +- unsigned int len = buf->ubuf_end - buf->ubuf; +- +- seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf, len); +- } +- if (has_lock && !xa_empty(&ctx->personalities)) { +- unsigned long index; +- const struct cred *cred; +- +- seq_printf(m, "Personalities:\n"); +- xa_for_each(&ctx->personalities, index, cred) +- io_uring_show_cred(m, index, cred); +- } +- seq_printf(m, "PollList:\n"); +- spin_lock(&ctx->completion_lock); +- for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { +- struct hlist_head *list = &ctx->cancel_hash[i]; +- struct io_kiocb *req; +- +- hlist_for_each_entry(req, list, hash_node) +- seq_printf(m, " op=%d, task_works=%d\n", req->opcode, +- req->task->task_works != NULL); +- } +- spin_unlock(&ctx->completion_lock); +- if (has_lock) +- mutex_unlock(&ctx->uring_lock); +-} +- +-static void io_uring_show_fdinfo(struct seq_file *m, struct file *f) +-{ +- struct io_ring_ctx *ctx = f->private_data; +- +- if (percpu_ref_tryget(&ctx->refs)) { +- __io_uring_show_fdinfo(ctx, m); +- percpu_ref_put(&ctx->refs); +- } +-} +-#endif +- +-static const struct file_operations io_uring_fops = { +- .release = io_uring_release, +- .mmap = io_uring_mmap, +-#ifndef CONFIG_MMU +- .get_unmapped_area = io_uring_nommu_get_unmapped_area, +- .mmap_capabilities = io_uring_nommu_mmap_capabilities, +-#endif +- .poll = io_uring_poll, +-#ifdef CONFIG_PROC_FS +- .show_fdinfo = io_uring_show_fdinfo, +-#endif +-}; +- +-static int io_allocate_scq_urings(struct io_ring_ctx *ctx, +- struct io_uring_params *p) +-{ +- struct io_rings *rings; +- size_t size, sq_array_offset; +- +- /* make sure these are sane, as we already accounted them */ +- ctx->sq_entries = p->sq_entries; +- ctx->cq_entries = p->cq_entries; +- +- size = rings_size(p->sq_entries, p->cq_entries, &sq_array_offset); +- if (size == SIZE_MAX) +- return -EOVERFLOW; +- +- rings = io_mem_alloc(size); +- if (!rings) +- return -ENOMEM; +- +- ctx->rings = rings; +- ctx->sq_array = (u32 *)((char *)rings + sq_array_offset); +- rings->sq_ring_mask = p->sq_entries - 1; +- rings->cq_ring_mask = p->cq_entries - 1; +- rings->sq_ring_entries = p->sq_entries; +- rings->cq_ring_entries = p->cq_entries; +- +- size = array_size(sizeof(struct io_uring_sqe), p->sq_entries); +- if (size == SIZE_MAX) { +- io_mem_free(ctx->rings); +- ctx->rings = NULL; +- return -EOVERFLOW; +- } +- +- ctx->sq_sqes = io_mem_alloc(size); +- if (!ctx->sq_sqes) { +- io_mem_free(ctx->rings); +- ctx->rings = NULL; +- return -ENOMEM; +- } +- +- return 0; +-} +- +-static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file) +-{ +- int ret, fd; +- +- fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC); +- if (fd < 0) +- return fd; +- +- ret = io_uring_add_tctx_node(ctx); +- if (ret) { +- put_unused_fd(fd); +- return ret; +- } +- fd_install(fd, file); +- return fd; +-} +- +-/* +- * Allocate an anonymous fd, this is what constitutes the application +- * visible backing of an io_uring instance. The application mmaps this +- * fd to gain access to the SQ/CQ ring details. If UNIX sockets are enabled, +- * we have to tie this fd to a socket for file garbage collection purposes. +- */ +-static struct file *io_uring_get_file(struct io_ring_ctx *ctx) +-{ +- struct file *file; +-#if defined(CONFIG_UNIX) +- int ret; +- +- ret = sock_create_kern(&init_net, PF_UNIX, SOCK_RAW, IPPROTO_IP, +- &ctx->ring_sock); +- if (ret) +- return ERR_PTR(ret); +-#endif +- +- file = anon_inode_getfile("[io_uring]", &io_uring_fops, ctx, +- O_RDWR | O_CLOEXEC); +-#if defined(CONFIG_UNIX) +- if (IS_ERR(file)) { +- sock_release(ctx->ring_sock); +- ctx->ring_sock = NULL; +- } else { +- ctx->ring_sock->file = file; +- } +-#endif +- return file; +-} +- +-static int io_uring_create(unsigned entries, struct io_uring_params *p, +- struct io_uring_params __user *params) +-{ +- struct io_ring_ctx *ctx; +- struct file *file; +- int ret; +- +- if (!entries) +- return -EINVAL; +- if (entries > IORING_MAX_ENTRIES) { +- if (!(p->flags & IORING_SETUP_CLAMP)) +- return -EINVAL; +- entries = IORING_MAX_ENTRIES; +- } +- +- /* +- * Use twice as many entries for the CQ ring. It's possible for the +- * application to drive a higher depth than the size of the SQ ring, +- * since the sqes are only used at submission time. This allows for +- * some flexibility in overcommitting a bit. If the application has +- * set IORING_SETUP_CQSIZE, it will have passed in the desired number +- * of CQ ring entries manually. +- */ +- p->sq_entries = roundup_pow_of_two(entries); +- if (p->flags & IORING_SETUP_CQSIZE) { +- /* +- * If IORING_SETUP_CQSIZE is set, we do the same roundup +- * to a power-of-two, if it isn't already. We do NOT impose +- * any cq vs sq ring sizing. +- */ +- if (!p->cq_entries) +- return -EINVAL; +- if (p->cq_entries > IORING_MAX_CQ_ENTRIES) { +- if (!(p->flags & IORING_SETUP_CLAMP)) +- return -EINVAL; +- p->cq_entries = IORING_MAX_CQ_ENTRIES; +- } +- p->cq_entries = roundup_pow_of_two(p->cq_entries); +- if (p->cq_entries < p->sq_entries) +- return -EINVAL; +- } else { +- p->cq_entries = 2 * p->sq_entries; +- } +- +- ctx = io_ring_ctx_alloc(p); +- if (!ctx) +- return -ENOMEM; +- ctx->compat = in_compat_syscall(); +- if (!capable(CAP_IPC_LOCK)) +- ctx->user = get_uid(current_user()); +- +- /* +- * This is just grabbed for accounting purposes. When a process exits, +- * the mm is exited and dropped before the files, hence we need to hang +- * on to this mm purely for the purposes of being able to unaccount +- * memory (locked/pinned vm). It's not used for anything else. +- */ +- mmgrab(current->mm); +- ctx->mm_account = current->mm; +- +- ret = io_allocate_scq_urings(ctx, p); +- if (ret) +- goto err; +- +- ret = io_sq_offload_create(ctx, p); +- if (ret) +- goto err; +- /* always set a rsrc node */ +- ret = io_rsrc_node_switch_start(ctx); +- if (ret) +- goto err; +- io_rsrc_node_switch(ctx, NULL); +- +- memset(&p->sq_off, 0, sizeof(p->sq_off)); +- p->sq_off.head = offsetof(struct io_rings, sq.head); +- p->sq_off.tail = offsetof(struct io_rings, sq.tail); +- p->sq_off.ring_mask = offsetof(struct io_rings, sq_ring_mask); +- p->sq_off.ring_entries = offsetof(struct io_rings, sq_ring_entries); +- p->sq_off.flags = offsetof(struct io_rings, sq_flags); +- p->sq_off.dropped = offsetof(struct io_rings, sq_dropped); +- p->sq_off.array = (char *)ctx->sq_array - (char *)ctx->rings; +- +- memset(&p->cq_off, 0, sizeof(p->cq_off)); +- p->cq_off.head = offsetof(struct io_rings, cq.head); +- p->cq_off.tail = offsetof(struct io_rings, cq.tail); +- p->cq_off.ring_mask = offsetof(struct io_rings, cq_ring_mask); +- p->cq_off.ring_entries = offsetof(struct io_rings, cq_ring_entries); +- p->cq_off.overflow = offsetof(struct io_rings, cq_overflow); +- p->cq_off.cqes = offsetof(struct io_rings, cqes); +- p->cq_off.flags = offsetof(struct io_rings, cq_flags); +- +- p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP | +- IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS | +- IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL | +- IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED | +- IORING_FEAT_EXT_ARG | IORING_FEAT_NATIVE_WORKERS | +- IORING_FEAT_RSRC_TAGS; +- +- if (copy_to_user(params, p, sizeof(*p))) { +- ret = -EFAULT; +- goto err; +- } +- +- file = io_uring_get_file(ctx); +- if (IS_ERR(file)) { +- ret = PTR_ERR(file); +- goto err; +- } +- +- /* +- * Install ring fd as the very last thing, so we don't risk someone +- * having closed it before we finish setup +- */ +- ret = io_uring_install_fd(ctx, file); +- if (ret < 0) { +- /* fput will clean it up */ +- fput(file); +- return ret; +- } +- +- trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags); +- return ret; +-err: +- io_ring_ctx_wait_and_kill(ctx); +- return ret; +-} +- +-/* +- * Sets up an aio uring context, and returns the fd. Applications asks for a +- * ring size, we return the actual sq/cq ring sizes (among other things) in the +- * params structure passed in. +- */ +-static long io_uring_setup(u32 entries, struct io_uring_params __user *params) +-{ +- struct io_uring_params p; +- int i; +- +- if (copy_from_user(&p, params, sizeof(p))) +- return -EFAULT; +- for (i = 0; i < ARRAY_SIZE(p.resv); i++) { +- if (p.resv[i]) +- return -EINVAL; +- } +- +- if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL | +- IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE | +- IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ | +- IORING_SETUP_R_DISABLED)) +- return -EINVAL; +- +- return io_uring_create(entries, &p, params); +-} +- +-SYSCALL_DEFINE2(io_uring_setup, u32, entries, +- struct io_uring_params __user *, params) +-{ +- return io_uring_setup(entries, params); +-} +- +-static int io_probe(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args) +-{ +- struct io_uring_probe *p; +- size_t size; +- int i, ret; +- +- size = struct_size(p, ops, nr_args); +- if (size == SIZE_MAX) +- return -EOVERFLOW; +- p = kzalloc(size, GFP_KERNEL); +- if (!p) +- return -ENOMEM; +- +- ret = -EFAULT; +- if (copy_from_user(p, arg, size)) +- goto out; +- ret = -EINVAL; +- if (memchr_inv(p, 0, size)) +- goto out; +- +- p->last_op = IORING_OP_LAST - 1; +- if (nr_args > IORING_OP_LAST) +- nr_args = IORING_OP_LAST; +- +- for (i = 0; i < nr_args; i++) { +- p->ops[i].op = i; +- if (!io_op_defs[i].not_supported) +- p->ops[i].flags = IO_URING_OP_SUPPORTED; +- } +- p->ops_len = i; +- +- ret = 0; +- if (copy_to_user(arg, p, size)) +- ret = -EFAULT; +-out: +- kfree(p); +- return ret; +-} +- +-static int io_register_personality(struct io_ring_ctx *ctx) +-{ +- const struct cred *creds; +- u32 id; +- int ret; +- +- creds = get_current_cred(); +- +- ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds, +- XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL); +- if (ret < 0) { +- put_cred(creds); +- return ret; +- } +- return id; +-} +- +-static int io_register_restrictions(struct io_ring_ctx *ctx, void __user *arg, +- unsigned int nr_args) +-{ +- struct io_uring_restriction *res; +- size_t size; +- int i, ret; +- +- /* Restrictions allowed only if rings started disabled */ +- if (!(ctx->flags & IORING_SETUP_R_DISABLED)) +- return -EBADFD; +- +- /* We allow only a single restrictions registration */ +- if (ctx->restrictions.registered) +- return -EBUSY; +- +- if (!arg || nr_args > IORING_MAX_RESTRICTIONS) +- return -EINVAL; +- +- size = array_size(nr_args, sizeof(*res)); +- if (size == SIZE_MAX) +- return -EOVERFLOW; +- +- res = memdup_user(arg, size); +- if (IS_ERR(res)) +- return PTR_ERR(res); +- +- ret = 0; +- +- for (i = 0; i < nr_args; i++) { +- switch (res[i].opcode) { +- case IORING_RESTRICTION_REGISTER_OP: +- if (res[i].register_op >= IORING_REGISTER_LAST) { +- ret = -EINVAL; +- goto out; +- } +- +- __set_bit(res[i].register_op, +- ctx->restrictions.register_op); +- break; +- case IORING_RESTRICTION_SQE_OP: +- if (res[i].sqe_op >= IORING_OP_LAST) { +- ret = -EINVAL; +- goto out; +- } +- +- __set_bit(res[i].sqe_op, ctx->restrictions.sqe_op); +- break; +- case IORING_RESTRICTION_SQE_FLAGS_ALLOWED: +- ctx->restrictions.sqe_flags_allowed = res[i].sqe_flags; +- break; +- case IORING_RESTRICTION_SQE_FLAGS_REQUIRED: +- ctx->restrictions.sqe_flags_required = res[i].sqe_flags; +- break; +- default: +- ret = -EINVAL; +- goto out; +- } +- } +- +-out: +- /* Reset all restrictions if an error happened */ +- if (ret != 0) +- memset(&ctx->restrictions, 0, sizeof(ctx->restrictions)); +- else +- ctx->restrictions.registered = true; +- +- kfree(res); +- return ret; +-} +- +-static int io_register_enable_rings(struct io_ring_ctx *ctx) +-{ +- if (!(ctx->flags & IORING_SETUP_R_DISABLED)) +- return -EBADFD; +- +- if (ctx->restrictions.registered) +- ctx->restricted = 1; +- +- ctx->flags &= ~IORING_SETUP_R_DISABLED; +- if (ctx->sq_data && wq_has_sleeper(&ctx->sq_data->wait)) +- wake_up(&ctx->sq_data->wait); +- return 0; +-} +- +-static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type, +- struct io_uring_rsrc_update2 *up, +- unsigned nr_args) +-{ +- __u32 tmp; +- int err; +- +- if (up->resv) +- return -EINVAL; +- if (check_add_overflow(up->offset, nr_args, &tmp)) +- return -EOVERFLOW; +- err = io_rsrc_node_switch_start(ctx); +- if (err) +- return err; +- +- switch (type) { +- case IORING_RSRC_FILE: +- return __io_sqe_files_update(ctx, up, nr_args); +- case IORING_RSRC_BUFFER: +- return __io_sqe_buffers_update(ctx, up, nr_args); +- } +- return -EINVAL; +-} +- +-static int io_register_files_update(struct io_ring_ctx *ctx, void __user *arg, +- unsigned nr_args) +-{ +- struct io_uring_rsrc_update2 up; +- +- if (!nr_args) +- return -EINVAL; +- memset(&up, 0, sizeof(up)); +- if (copy_from_user(&up, arg, sizeof(struct io_uring_rsrc_update))) +- return -EFAULT; +- return __io_register_rsrc_update(ctx, IORING_RSRC_FILE, &up, nr_args); +-} +- +-static int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg, +- unsigned size, unsigned type) +-{ +- struct io_uring_rsrc_update2 up; +- +- if (size != sizeof(up)) +- return -EINVAL; +- if (copy_from_user(&up, arg, sizeof(up))) +- return -EFAULT; +- if (!up.nr || up.resv) +- return -EINVAL; +- return __io_register_rsrc_update(ctx, type, &up, up.nr); +-} +- +-static int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg, +- unsigned int size, unsigned int type) +-{ +- struct io_uring_rsrc_register rr; +- +- /* keep it extendible */ +- if (size != sizeof(rr)) +- return -EINVAL; +- +- memset(&rr, 0, sizeof(rr)); +- if (copy_from_user(&rr, arg, size)) +- return -EFAULT; +- if (!rr.nr || rr.resv || rr.resv2) +- return -EINVAL; +- +- switch (type) { +- case IORING_RSRC_FILE: +- return io_sqe_files_register(ctx, u64_to_user_ptr(rr.data), +- rr.nr, u64_to_user_ptr(rr.tags)); +- case IORING_RSRC_BUFFER: +- return io_sqe_buffers_register(ctx, u64_to_user_ptr(rr.data), +- rr.nr, u64_to_user_ptr(rr.tags)); +- } +- return -EINVAL; +-} +- +-static int io_register_iowq_aff(struct io_ring_ctx *ctx, void __user *arg, +- unsigned len) +-{ +- struct io_uring_task *tctx = current->io_uring; +- cpumask_var_t new_mask; +- int ret; +- +- if (!tctx || !tctx->io_wq) +- return -EINVAL; +- +- if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) +- return -ENOMEM; +- +- cpumask_clear(new_mask); +- if (len > cpumask_size()) +- len = cpumask_size(); +- +- if (copy_from_user(new_mask, arg, len)) { +- free_cpumask_var(new_mask); +- return -EFAULT; +- } +- +- ret = io_wq_cpu_affinity(tctx->io_wq, new_mask); +- free_cpumask_var(new_mask); +- return ret; +-} +- +-static int io_unregister_iowq_aff(struct io_ring_ctx *ctx) +-{ +- struct io_uring_task *tctx = current->io_uring; +- +- if (!tctx || !tctx->io_wq) +- return -EINVAL; +- +- return io_wq_cpu_affinity(tctx->io_wq, NULL); +-} +- +-static int io_register_iowq_max_workers(struct io_ring_ctx *ctx, +- void __user *arg) +- __must_hold(&ctx->uring_lock) +-{ +- struct io_tctx_node *node; +- struct io_uring_task *tctx = NULL; +- struct io_sq_data *sqd = NULL; +- __u32 new_count[2]; +- int i, ret; +- +- if (copy_from_user(new_count, arg, sizeof(new_count))) +- return -EFAULT; +- for (i = 0; i < ARRAY_SIZE(new_count); i++) +- if (new_count[i] > INT_MAX) +- return -EINVAL; +- +- if (ctx->flags & IORING_SETUP_SQPOLL) { +- sqd = ctx->sq_data; +- if (sqd) { +- /* +- * Observe the correct sqd->lock -> ctx->uring_lock +- * ordering. Fine to drop uring_lock here, we hold +- * a ref to the ctx. +- */ +- refcount_inc(&sqd->refs); +- mutex_unlock(&ctx->uring_lock); +- mutex_lock(&sqd->lock); +- mutex_lock(&ctx->uring_lock); +- if (sqd->thread) +- tctx = sqd->thread->io_uring; +- } +- } else { +- tctx = current->io_uring; +- } +- +- BUILD_BUG_ON(sizeof(new_count) != sizeof(ctx->iowq_limits)); +- +- memcpy(ctx->iowq_limits, new_count, sizeof(new_count)); +- ctx->iowq_limits_set = true; +- +- ret = -EINVAL; +- if (tctx && tctx->io_wq) { +- ret = io_wq_max_workers(tctx->io_wq, new_count); +- if (ret) +- goto err; +- } else { +- memset(new_count, 0, sizeof(new_count)); +- } +- +- if (sqd) { +- mutex_unlock(&sqd->lock); +- io_put_sq_data(sqd); +- } +- +- if (copy_to_user(arg, new_count, sizeof(new_count))) +- return -EFAULT; +- +- /* that's it for SQPOLL, only the SQPOLL task creates requests */ +- if (sqd) +- return 0; +- +- /* now propagate the restriction to all registered users */ +- list_for_each_entry(node, &ctx->tctx_list, ctx_node) { +- struct io_uring_task *tctx = node->task->io_uring; +- +- if (WARN_ON_ONCE(!tctx->io_wq)) +- continue; +- +- for (i = 0; i < ARRAY_SIZE(new_count); i++) +- new_count[i] = ctx->iowq_limits[i]; +- /* ignore errors, it always returns zero anyway */ +- (void)io_wq_max_workers(tctx->io_wq, new_count); +- } +- return 0; +-err: +- if (sqd) { +- mutex_unlock(&sqd->lock); +- io_put_sq_data(sqd); +- } +- return ret; +-} +- +-static bool io_register_op_must_quiesce(int op) +-{ +- switch (op) { +- case IORING_REGISTER_BUFFERS: +- case IORING_UNREGISTER_BUFFERS: +- case IORING_REGISTER_FILES: +- case IORING_UNREGISTER_FILES: +- case IORING_REGISTER_FILES_UPDATE: +- case IORING_REGISTER_PROBE: +- case IORING_REGISTER_PERSONALITY: +- case IORING_UNREGISTER_PERSONALITY: +- case IORING_REGISTER_FILES2: +- case IORING_REGISTER_FILES_UPDATE2: +- case IORING_REGISTER_BUFFERS2: +- case IORING_REGISTER_BUFFERS_UPDATE: +- case IORING_REGISTER_IOWQ_AFF: +- case IORING_UNREGISTER_IOWQ_AFF: +- case IORING_REGISTER_IOWQ_MAX_WORKERS: +- return false; +- default: +- return true; +- } +-} +- +-static int io_ctx_quiesce(struct io_ring_ctx *ctx) +-{ +- long ret; +- +- percpu_ref_kill(&ctx->refs); +- +- /* +- * Drop uring mutex before waiting for references to exit. If another +- * thread is currently inside io_uring_enter() it might need to grab the +- * uring_lock to make progress. If we hold it here across the drain +- * wait, then we can deadlock. It's safe to drop the mutex here, since +- * no new references will come in after we've killed the percpu ref. +- */ +- mutex_unlock(&ctx->uring_lock); +- do { +- ret = wait_for_completion_interruptible(&ctx->ref_comp); +- if (!ret) +- break; +- ret = io_run_task_work_sig(); +- } while (ret >= 0); +- mutex_lock(&ctx->uring_lock); +- +- if (ret) +- io_refs_resurrect(&ctx->refs, &ctx->ref_comp); +- return ret; +-} +- +-static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, +- void __user *arg, unsigned nr_args) +- __releases(ctx->uring_lock) +- __acquires(ctx->uring_lock) +-{ +- int ret; +- +- /* +- * We're inside the ring mutex, if the ref is already dying, then +- * someone else killed the ctx or is already going through +- * io_uring_register(). +- */ +- if (percpu_ref_is_dying(&ctx->refs)) +- return -ENXIO; +- +- if (ctx->restricted) { +- if (opcode >= IORING_REGISTER_LAST) +- return -EINVAL; +- opcode = array_index_nospec(opcode, IORING_REGISTER_LAST); +- if (!test_bit(opcode, ctx->restrictions.register_op)) +- return -EACCES; +- } +- +- if (io_register_op_must_quiesce(opcode)) { +- ret = io_ctx_quiesce(ctx); +- if (ret) +- return ret; +- } +- +- switch (opcode) { +- case IORING_REGISTER_BUFFERS: +- ret = io_sqe_buffers_register(ctx, arg, nr_args, NULL); +- break; +- case IORING_UNREGISTER_BUFFERS: +- ret = -EINVAL; +- if (arg || nr_args) +- break; +- ret = io_sqe_buffers_unregister(ctx); +- break; +- case IORING_REGISTER_FILES: +- ret = io_sqe_files_register(ctx, arg, nr_args, NULL); +- break; +- case IORING_UNREGISTER_FILES: +- ret = -EINVAL; +- if (arg || nr_args) +- break; +- ret = io_sqe_files_unregister(ctx); +- break; +- case IORING_REGISTER_FILES_UPDATE: +- ret = io_register_files_update(ctx, arg, nr_args); +- break; +- case IORING_REGISTER_EVENTFD: +- case IORING_REGISTER_EVENTFD_ASYNC: +- ret = -EINVAL; +- if (nr_args != 1) +- break; +- ret = io_eventfd_register(ctx, arg); +- if (ret) +- break; +- if (opcode == IORING_REGISTER_EVENTFD_ASYNC) +- ctx->eventfd_async = 1; +- else +- ctx->eventfd_async = 0; +- break; +- case IORING_UNREGISTER_EVENTFD: +- ret = -EINVAL; +- if (arg || nr_args) +- break; +- ret = io_eventfd_unregister(ctx); +- break; +- case IORING_REGISTER_PROBE: +- ret = -EINVAL; +- if (!arg || nr_args > 256) +- break; +- ret = io_probe(ctx, arg, nr_args); +- break; +- case IORING_REGISTER_PERSONALITY: +- ret = -EINVAL; +- if (arg || nr_args) +- break; +- ret = io_register_personality(ctx); +- break; +- case IORING_UNREGISTER_PERSONALITY: +- ret = -EINVAL; +- if (arg) +- break; +- ret = io_unregister_personality(ctx, nr_args); +- break; +- case IORING_REGISTER_ENABLE_RINGS: +- ret = -EINVAL; +- if (arg || nr_args) +- break; +- ret = io_register_enable_rings(ctx); +- break; +- case IORING_REGISTER_RESTRICTIONS: +- ret = io_register_restrictions(ctx, arg, nr_args); +- break; +- case IORING_REGISTER_FILES2: +- ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_FILE); +- break; +- case IORING_REGISTER_FILES_UPDATE2: +- ret = io_register_rsrc_update(ctx, arg, nr_args, +- IORING_RSRC_FILE); +- break; +- case IORING_REGISTER_BUFFERS2: +- ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_BUFFER); +- break; +- case IORING_REGISTER_BUFFERS_UPDATE: +- ret = io_register_rsrc_update(ctx, arg, nr_args, +- IORING_RSRC_BUFFER); +- break; +- case IORING_REGISTER_IOWQ_AFF: +- ret = -EINVAL; +- if (!arg || !nr_args) +- break; +- ret = io_register_iowq_aff(ctx, arg, nr_args); +- break; +- case IORING_UNREGISTER_IOWQ_AFF: +- ret = -EINVAL; +- if (arg || nr_args) +- break; +- ret = io_unregister_iowq_aff(ctx); +- break; +- case IORING_REGISTER_IOWQ_MAX_WORKERS: +- ret = -EINVAL; +- if (!arg || nr_args != 2) +- break; +- ret = io_register_iowq_max_workers(ctx, arg); +- break; +- default: +- ret = -EINVAL; +- break; +- } +- +- if (io_register_op_must_quiesce(opcode)) { +- /* bring the ctx back to life */ +- percpu_ref_reinit(&ctx->refs); +- reinit_completion(&ctx->ref_comp); +- } +- return ret; +-} +- +-SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode, +- void __user *, arg, unsigned int, nr_args) +-{ +- struct io_ring_ctx *ctx; +- long ret = -EBADF; +- struct fd f; +- +- f = fdget(fd); +- if (!f.file) +- return -EBADF; +- +- ret = -EOPNOTSUPP; +- if (f.file->f_op != &io_uring_fops) +- goto out_fput; +- +- ctx = f.file->private_data; +- +- io_run_task_work(); +- +- mutex_lock(&ctx->uring_lock); +- ret = __io_uring_register(ctx, opcode, arg, nr_args); +- mutex_unlock(&ctx->uring_lock); +- trace_io_uring_register(ctx, opcode, ctx->nr_user_files, ctx->nr_user_bufs, +- ctx->cq_ev_fd != NULL, ret); +-out_fput: +- fdput(f); +- return ret; +-} +- +-static int __init io_uring_init(void) +-{ +-#define __BUILD_BUG_VERIFY_ELEMENT(stype, eoffset, etype, ename) do { \ +- BUILD_BUG_ON(offsetof(stype, ename) != eoffset); \ +- BUILD_BUG_ON(sizeof(etype) != sizeof_field(stype, ename)); \ +-} while (0) +- +-#define BUILD_BUG_SQE_ELEM(eoffset, etype, ename) \ +- __BUILD_BUG_VERIFY_ELEMENT(struct io_uring_sqe, eoffset, etype, ename) +- BUILD_BUG_ON(sizeof(struct io_uring_sqe) != 64); +- BUILD_BUG_SQE_ELEM(0, __u8, opcode); +- BUILD_BUG_SQE_ELEM(1, __u8, flags); +- BUILD_BUG_SQE_ELEM(2, __u16, ioprio); +- BUILD_BUG_SQE_ELEM(4, __s32, fd); +- BUILD_BUG_SQE_ELEM(8, __u64, off); +- BUILD_BUG_SQE_ELEM(8, __u64, addr2); +- BUILD_BUG_SQE_ELEM(16, __u64, addr); +- BUILD_BUG_SQE_ELEM(16, __u64, splice_off_in); +- BUILD_BUG_SQE_ELEM(24, __u32, len); +- BUILD_BUG_SQE_ELEM(28, __kernel_rwf_t, rw_flags); +- BUILD_BUG_SQE_ELEM(28, /* compat */ int, rw_flags); +- BUILD_BUG_SQE_ELEM(28, /* compat */ __u32, rw_flags); +- BUILD_BUG_SQE_ELEM(28, __u32, fsync_flags); +- BUILD_BUG_SQE_ELEM(28, /* compat */ __u16, poll_events); +- BUILD_BUG_SQE_ELEM(28, __u32, poll32_events); +- BUILD_BUG_SQE_ELEM(28, __u32, sync_range_flags); +- BUILD_BUG_SQE_ELEM(28, __u32, msg_flags); +- BUILD_BUG_SQE_ELEM(28, __u32, timeout_flags); +- BUILD_BUG_SQE_ELEM(28, __u32, accept_flags); +- BUILD_BUG_SQE_ELEM(28, __u32, cancel_flags); +- BUILD_BUG_SQE_ELEM(28, __u32, open_flags); +- BUILD_BUG_SQE_ELEM(28, __u32, statx_flags); +- BUILD_BUG_SQE_ELEM(28, __u32, fadvise_advice); +- BUILD_BUG_SQE_ELEM(28, __u32, splice_flags); +- BUILD_BUG_SQE_ELEM(32, __u64, user_data); +- BUILD_BUG_SQE_ELEM(40, __u16, buf_index); +- BUILD_BUG_SQE_ELEM(40, __u16, buf_group); +- BUILD_BUG_SQE_ELEM(42, __u16, personality); +- BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in); +- BUILD_BUG_SQE_ELEM(44, __u32, file_index); +- +- BUILD_BUG_ON(sizeof(struct io_uring_files_update) != +- sizeof(struct io_uring_rsrc_update)); +- BUILD_BUG_ON(sizeof(struct io_uring_rsrc_update) > +- sizeof(struct io_uring_rsrc_update2)); +- +- /* ->buf_index is u16 */ +- BUILD_BUG_ON(IORING_MAX_REG_BUFFERS >= (1u << 16)); +- +- /* should fit into one byte */ +- BUILD_BUG_ON(SQE_VALID_FLAGS >= (1 << 8)); +- +- BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST); +- BUILD_BUG_ON(__REQ_F_LAST_BIT > 8 * sizeof(int)); +- +- req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC | +- SLAB_ACCOUNT); +- return 0; +-}; +-__initcall(io_uring_init); +diff --git a/fs/ioctl.c b/fs/ioctl.c +index 504e695781124..e0a3455f9a0f6 100644 +--- a/fs/ioctl.c ++++ b/fs/ioctl.c +@@ -173,7 +173,7 @@ int fiemap_prep(struct inode *inode, struct fiemap_extent_info *fieinfo, + + if (*len == 0) + return -EINVAL; +- if (start > maxbytes) ++ if (start >= maxbytes) + return -EFBIG; + + /* +diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c +index 9cc5798423d12..87a4f5a2ded0e 100644 +--- a/fs/iomap/buffered-io.c ++++ b/fs/iomap/buffered-io.c +@@ -256,8 +256,13 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter, + unsigned poff, plen; + sector_t sector; + +- if (iomap->type == IOMAP_INLINE) +- return min(iomap_read_inline_data(iter, page), length); ++ if (iomap->type == IOMAP_INLINE) { ++ loff_t ret = iomap_read_inline_data(iter, page); ++ ++ if (ret < 0) ++ return ret; ++ return 0; ++ } + + /* zero post-eof blocks as the page may be mapped */ + iop = iomap_page_create(iter->inode, page); +@@ -370,6 +375,8 @@ static loff_t iomap_readahead_iter(const struct iomap_iter *iter, + ctx->cur_page_in_bio = false; + } + ret = iomap_readpage_iter(iter, ctx, done); ++ if (ret <= 0) ++ return ret; + } + + return done; +@@ -518,7 +525,8 @@ iomap_write_failed(struct inode *inode, loff_t pos, unsigned len) + * write started inside the existing inode size. + */ + if (pos + len > i_size) +- truncate_pagecache_range(inode, max(pos, i_size), pos + len); ++ truncate_pagecache_range(inode, max(pos, i_size), ++ pos + len - 1); + } + + static int +@@ -750,7 +758,7 @@ again: + * same page as we're writing to, without it being marked + * up-to-date. + */ +- if (unlikely(iov_iter_fault_in_readable(i, bytes))) { ++ if (unlikely(fault_in_iov_iter_readable(i, bytes))) { + status = -EFAULT; + break; + } +diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c +index 4ecd255e0511c..468dcbba45bcb 100644 +--- a/fs/iomap/direct-io.c ++++ b/fs/iomap/direct-io.c +@@ -31,6 +31,7 @@ struct iomap_dio { + atomic_t ref; + unsigned flags; + int error; ++ size_t done_before; + bool wait_for_completion; + + union { +@@ -124,6 +125,9 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio) + if (ret > 0 && (dio->flags & IOMAP_DIO_NEED_SYNC)) + ret = generic_write_sync(iocb, ret); + ++ if (ret > 0) ++ ret += dio->done_before; ++ + kfree(dio); + + return ret; +@@ -371,6 +375,8 @@ static loff_t iomap_dio_hole_iter(const struct iomap_iter *iter, + loff_t length = iov_iter_zero(iomap_length(iter), dio->submit.iter); + + dio->size += length; ++ if (!length) ++ return -EFAULT; + return length; + } + +@@ -402,6 +408,8 @@ static loff_t iomap_dio_inline_iter(const struct iomap_iter *iomi, + copied = copy_to_iter(inline_data, length, iter); + } + dio->size += copied; ++ if (!copied) ++ return -EFAULT; + return copied; + } + +@@ -446,13 +454,21 @@ static loff_t iomap_dio_iter(const struct iomap_iter *iter, + * may be pure data writes. In that case, we still need to do a full data sync + * completion. + * ++ * When page faults are disabled and @dio_flags includes IOMAP_DIO_PARTIAL, ++ * __iomap_dio_rw can return a partial result if it encounters a non-resident ++ * page in @iter after preparing a transfer. In that case, the non-resident ++ * pages can be faulted in and the request resumed with @done_before set to the ++ * number of bytes previously transferred. The request will then complete with ++ * the correct total number of bytes transferred; this is essential for ++ * completing partial requests asynchronously. ++ * + * Returns -ENOTBLK In case of a page invalidation invalidation failure for + * writes. The callers needs to fall back to buffered I/O in this case. + */ + struct iomap_dio * + __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, + const struct iomap_ops *ops, const struct iomap_dio_ops *dops, +- unsigned int dio_flags) ++ unsigned int dio_flags, size_t done_before) + { + struct address_space *mapping = iocb->ki_filp->f_mapping; + struct inode *inode = file_inode(iocb->ki_filp); +@@ -482,6 +498,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, + dio->dops = dops; + dio->error = 0; + dio->flags = 0; ++ dio->done_before = done_before; + + dio->submit.iter = iter; + dio->submit.waiter = current; +@@ -577,6 +594,12 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, + if (iov_iter_rw(iter) == READ && iomi.pos >= dio->i_size) + iov_iter_revert(iter, iomi.pos - dio->i_size); + ++ if (ret == -EFAULT && dio->size && (dio_flags & IOMAP_DIO_PARTIAL)) { ++ if (!(iocb->ki_flags & IOCB_NOWAIT)) ++ wait_for_completion = true; ++ ret = 0; ++ } ++ + /* magic error code to fall back to buffered I/O */ + if (ret == -ENOTBLK) { + wait_for_completion = true; +@@ -642,11 +665,11 @@ EXPORT_SYMBOL_GPL(__iomap_dio_rw); + ssize_t + iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, + const struct iomap_ops *ops, const struct iomap_dio_ops *dops, +- unsigned int dio_flags) ++ unsigned int dio_flags, size_t done_before) + { + struct iomap_dio *dio; + +- dio = __iomap_dio_rw(iocb, iter, ops, dops, dio_flags); ++ dio = __iomap_dio_rw(iocb, iter, ops, dops, dio_flags, done_before); + if (IS_ERR_OR_NULL(dio)) + return PTR_ERR_OR_ZERO(dio); + return iomap_dio_complete(dio); +diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c +index 678e2c51b855c..0c6eacfcbeef1 100644 +--- a/fs/isofs/inode.c ++++ b/fs/isofs/inode.c +@@ -1322,6 +1322,8 @@ static int isofs_read_inode(struct inode *inode, int relocated) + + de = (struct iso_directory_record *) (bh->b_data + offset); + de_len = *(unsigned char *) de; ++ if (de_len < sizeof(struct iso_directory_record)) ++ goto fail; + + if (offset + de_len > bufsize) { + int frag1 = bufsize - offset; +diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c +index 3cc4ab2ba7f4f..ac328e3321242 100644 +--- a/fs/jbd2/commit.c ++++ b/fs/jbd2/commit.c +@@ -501,7 +501,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) + } + spin_unlock(&commit_transaction->t_handle_lock); + commit_transaction->t_state = T_SWITCH; +- write_unlock(&journal->j_state_lock); + + J_ASSERT (atomic_read(&commit_transaction->t_outstanding_credits) <= + journal->j_max_transaction_buffers); +@@ -521,6 +520,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) + * has reserved. This is consistent with the existing behaviour + * that multiple jbd2_journal_get_write_access() calls to the same + * buffer are perfectly permissible. ++ * We use journal->j_state_lock here to serialize processing of ++ * t_reserved_list with eviction of buffers from journal_unmap_buffer(). + */ + while (commit_transaction->t_reserved_list) { + jh = commit_transaction->t_reserved_list; +@@ -540,6 +541,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) + jbd2_journal_refile_buffer(journal, jh); + } + ++ write_unlock(&journal->j_state_lock); + /* + * Now try to drop any written-back buffers from the journal's + * checkpoint lists. We do this *before* commit because it potentially +@@ -562,13 +564,13 @@ void jbd2_journal_commit_transaction(journal_t *journal) + */ + jbd2_journal_switch_revoke_table(journal); + ++ write_lock(&journal->j_state_lock); + /* + * Reserved credits cannot be claimed anymore, free them + */ + atomic_sub(atomic_read(&journal->j_reserved_credits), + &commit_transaction->t_outstanding_credits); + +- write_lock(&journal->j_state_lock); + trace_jbd2_commit_flushing(journal, commit_transaction); + stats.run.rs_flushing = jiffies; + stats.run.rs_locked = jbd2_time_diff(stats.run.rs_locked, +@@ -579,7 +581,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) + journal->j_running_transaction = NULL; + start_time = ktime_get(); + commit_transaction->t_log_start = journal->j_head; +- wake_up(&journal->j_wait_transaction_locked); ++ wake_up_all(&journal->j_wait_transaction_locked); + write_unlock(&journal->j_state_lock); + + jbd_debug(3, "JBD2: commit phase 2a\n"); +@@ -1170,7 +1172,7 @@ restart_loop: + if (journal->j_commit_callback) + journal->j_commit_callback(journal, commit_transaction); + if (journal->j_fc_cleanup_callback) +- journal->j_fc_cleanup_callback(journal, 1); ++ journal->j_fc_cleanup_callback(journal, 1, commit_transaction->t_tid); + + trace_jbd2_end_commit(journal, commit_transaction); + jbd_debug(1, "JBD2: commit %d complete, head %d\n", +diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c +index 35302bc192eb9..097ba728d516d 100644 +--- a/fs/jbd2/journal.c ++++ b/fs/jbd2/journal.c +@@ -769,7 +769,7 @@ EXPORT_SYMBOL(jbd2_fc_begin_commit); + static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback) + { + if (journal->j_fc_cleanup_callback) +- journal->j_fc_cleanup_callback(journal, 0); ++ journal->j_fc_cleanup_callback(journal, 0, tid); + write_lock(&journal->j_state_lock); + journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING; + if (fallback) +@@ -924,10 +924,16 @@ int jbd2_fc_wait_bufs(journal_t *journal, int num_blks) + for (i = j_fc_off - 1; i >= j_fc_off - num_blks; i--) { + bh = journal->j_fc_wbuf[i]; + wait_on_buffer(bh); ++ /* ++ * Update j_fc_off so jbd2_fc_release_bufs can release remain ++ * buffer head. ++ */ ++ if (unlikely(!buffer_uptodate(bh))) { ++ journal->j_fc_off = i + 1; ++ return -EIO; ++ } + put_bh(bh); + journal->j_fc_wbuf[i] = NULL; +- if (unlikely(!buffer_uptodate(bh))) +- return -EIO; + } + + return 0; +@@ -2970,6 +2976,7 @@ struct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh) + jbd_unlock_bh_journal_head(bh); + return jh; + } ++EXPORT_SYMBOL(jbd2_journal_grab_journal_head); + + static void __journal_remove_journal_head(struct buffer_head *bh) + { +@@ -3022,6 +3029,7 @@ void jbd2_journal_put_journal_head(struct journal_head *jh) + jbd_unlock_bh_journal_head(bh); + } + } ++EXPORT_SYMBOL(jbd2_journal_put_journal_head); + + /* + * Initialize jbd inode head +diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c +index 8ca3527189f87..3c5dd010e39d2 100644 +--- a/fs/jbd2/recovery.c ++++ b/fs/jbd2/recovery.c +@@ -256,6 +256,7 @@ static int fc_do_one_pass(journal_t *journal, + err = journal->j_fc_replay_callback(journal, bh, pass, + next_fc_block - journal->j_fc_first, + expected_commit_id); ++ brelse(bh); + next_fc_block++; + if (err < 0 || err == JBD2_FC_REPLAY_STOP) + break; +diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c +index 6a3caedd22856..a57c0c8c63c4f 100644 +--- a/fs/jbd2/transaction.c ++++ b/fs/jbd2/transaction.c +@@ -173,7 +173,7 @@ static void wait_transaction_locked(journal_t *journal) + int need_to_start; + tid_t tid = journal->j_running_transaction->t_tid; + +- prepare_to_wait(&journal->j_wait_transaction_locked, &wait, ++ prepare_to_wait_exclusive(&journal->j_wait_transaction_locked, &wait, + TASK_UNINTERRUPTIBLE); + need_to_start = !tid_geq(journal->j_commit_request, tid); + read_unlock(&journal->j_state_lock); +@@ -199,7 +199,7 @@ static void wait_transaction_switching(journal_t *journal) + read_unlock(&journal->j_state_lock); + return; + } +- prepare_to_wait(&journal->j_wait_transaction_locked, &wait, ++ prepare_to_wait_exclusive(&journal->j_wait_transaction_locked, &wait, + TASK_UNINTERRUPTIBLE); + read_unlock(&journal->j_state_lock); + /* +@@ -911,7 +911,7 @@ void jbd2_journal_unlock_updates (journal_t *journal) + write_lock(&journal->j_state_lock); + --journal->j_barrier_count; + write_unlock(&journal->j_state_lock); +- wake_up(&journal->j_wait_transaction_locked); ++ wake_up_all(&journal->j_wait_transaction_locked); + } + + static void warn_dirty_buffer(struct buffer_head *bh) +@@ -1477,8 +1477,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) + struct journal_head *jh; + int ret = 0; + +- if (is_handle_aborted(handle)) +- return -EROFS; + if (!buffer_jbd(bh)) + return -EUCLEAN; + +@@ -1525,6 +1523,18 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) + journal = transaction->t_journal; + spin_lock(&jh->b_state_lock); + ++ if (is_handle_aborted(handle)) { ++ /* ++ * Check journal aborting with @jh->b_state_lock locked, ++ * since 'jh->b_transaction' could be replaced with ++ * 'jh->b_next_transaction' during old transaction ++ * committing if journal aborted, which may fail ++ * assertion on 'jh->b_frozen_data == NULL'. ++ */ ++ ret = -EROFS; ++ goto out_unlock_bh; ++ } ++ + if (jh->b_modified == 0) { + /* + * This buffer's got modified and becoming part +diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c +index b288c8ae1236b..837cd55fd4c5e 100644 +--- a/fs/jffs2/build.c ++++ b/fs/jffs2/build.c +@@ -415,13 +415,15 @@ int jffs2_do_mount_fs(struct jffs2_sb_info *c) + jffs2_free_ino_caches(c); + jffs2_free_raw_node_refs(c); + ret = -EIO; +- goto out_free; ++ goto out_sum_exit; + } + + jffs2_calc_trigger_levels(c); + + return 0; + ++ out_sum_exit: ++ jffs2_sum_exit(c); + out_free: + kvfree(c->blocks); + +diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c +index 4fc8cd698d1a4..bd7d58d27bfc6 100644 +--- a/fs/jffs2/file.c ++++ b/fs/jffs2/file.c +@@ -136,20 +136,15 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, + struct page *pg; + struct inode *inode = mapping->host; + struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); ++ struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); + pgoff_t index = pos >> PAGE_SHIFT; + uint32_t pageofs = index << PAGE_SHIFT; + int ret = 0; + +- pg = grab_cache_page_write_begin(mapping, index, flags); +- if (!pg) +- return -ENOMEM; +- *pagep = pg; +- + jffs2_dbg(1, "%s()\n", __func__); + + if (pageofs > inode->i_size) { + /* Make new hole frag from old EOF to new page */ +- struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); + struct jffs2_raw_inode ri; + struct jffs2_full_dnode *fn; + uint32_t alloc_len; +@@ -160,7 +155,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, + ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, + ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); + if (ret) +- goto out_page; ++ goto out_err; + + mutex_lock(&f->sem); + memset(&ri, 0, sizeof(ri)); +@@ -190,7 +185,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, + ret = PTR_ERR(fn); + jffs2_complete_reservation(c); + mutex_unlock(&f->sem); +- goto out_page; ++ goto out_err; + } + ret = jffs2_add_full_dnode_to_inode(c, f, fn); + if (f->metadata) { +@@ -205,13 +200,26 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, + jffs2_free_full_dnode(fn); + jffs2_complete_reservation(c); + mutex_unlock(&f->sem); +- goto out_page; ++ goto out_err; + } + jffs2_complete_reservation(c); + inode->i_size = pageofs; + mutex_unlock(&f->sem); + } + ++ /* ++ * While getting a page and reading data in, lock c->alloc_sem until ++ * the page is Uptodate. Otherwise GC task may attempt to read the same ++ * page in read_cache_page(), which causes a deadlock. ++ */ ++ mutex_lock(&c->alloc_sem); ++ pg = grab_cache_page_write_begin(mapping, index, flags); ++ if (!pg) { ++ ret = -ENOMEM; ++ goto release_sem; ++ } ++ *pagep = pg; ++ + /* + * Read in the page if it wasn't already present. Cannot optimize away + * the whole page write case until jffs2_write_end can handle the +@@ -221,15 +229,17 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, + mutex_lock(&f->sem); + ret = jffs2_do_readpage_nolock(inode, pg); + mutex_unlock(&f->sem); +- if (ret) +- goto out_page; ++ if (ret) { ++ unlock_page(pg); ++ put_page(pg); ++ goto release_sem; ++ } + } + jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags); +- return ret; + +-out_page: +- unlock_page(pg); +- put_page(pg); ++release_sem: ++ mutex_unlock(&c->alloc_sem); ++out_err: + return ret; + } + +diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c +index 2ac410477c4f4..f83a468b64883 100644 +--- a/fs/jffs2/fs.c ++++ b/fs/jffs2/fs.c +@@ -603,8 +603,9 @@ out_root: + jffs2_free_ino_caches(c); + jffs2_free_raw_node_refs(c); + kvfree(c->blocks); +- out_inohash: + jffs2_clear_xattr_subsystem(c); ++ jffs2_sum_exit(c); ++ out_inohash: + kfree(c->inocache_list); + out_wbuf: + jffs2_flash_cleanup(c); +diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c +index b676056826beb..29671e33a1714 100644 +--- a/fs/jffs2/scan.c ++++ b/fs/jffs2/scan.c +@@ -136,7 +136,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) + if (!s) { + JFFS2_WARNING("Can't allocate memory for summary\n"); + ret = -ENOMEM; +- goto out; ++ goto out_buf; + } + } + +@@ -275,13 +275,15 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) + } + ret = 0; + out: ++ jffs2_sum_reset_collected(s); ++ kfree(s); ++ out_buf: + if (buf_size) + kfree(flashbuf); + #ifndef __ECOS + else + mtd_unpoint(c->mtd, 0, c->mtd->size); + #endif +- kfree(s); + return ret; + } + +diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c +index 57ab424c05ff0..072821b50ab91 100644 +--- a/fs/jfs/inode.c ++++ b/fs/jfs/inode.c +@@ -146,12 +146,13 @@ void jfs_evict_inode(struct inode *inode) + dquot_initialize(inode); + + if (JFS_IP(inode)->fileset == FILESYSTEM_I) { ++ struct inode *ipimap = JFS_SBI(inode->i_sb)->ipimap; + truncate_inode_pages_final(&inode->i_data); + + if (test_cflag(COMMIT_Freewmap, inode)) + jfs_free_zero_link(inode); + +- if (JFS_SBI(inode->i_sb)->ipimap) ++ if (ipimap && JFS_IP(ipimap)->i_imap) + diFree(inode); + + /* +diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c +index 91f4ec93dab1f..f401bc05d5ff6 100644 +--- a/fs/jfs/jfs_dmap.c ++++ b/fs/jfs/jfs_dmap.c +@@ -148,13 +148,14 @@ static const s8 budtab[256] = { + * 0 - success + * -ENOMEM - insufficient memory + * -EIO - i/o error ++ * -EINVAL - wrong bmap data + */ + int dbMount(struct inode *ipbmap) + { + struct bmap *bmp; + struct dbmap_disk *dbmp_le; + struct metapage *mp; +- int i; ++ int i, err; + + /* + * allocate/initialize the in-memory bmap descriptor +@@ -169,8 +170,8 @@ int dbMount(struct inode *ipbmap) + BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage, + PSIZE, 0); + if (mp == NULL) { +- kfree(bmp); +- return -EIO; ++ err = -EIO; ++ goto err_kfree_bmp; + } + + /* copy the on-disk bmap descriptor to its in-memory version. */ +@@ -179,6 +180,11 @@ int dbMount(struct inode *ipbmap) + bmp->db_nfree = le64_to_cpu(dbmp_le->dn_nfree); + bmp->db_l2nbperpage = le32_to_cpu(dbmp_le->dn_l2nbperpage); + bmp->db_numag = le32_to_cpu(dbmp_le->dn_numag); ++ if (!bmp->db_numag) { ++ err = -EINVAL; ++ goto err_release_metapage; ++ } ++ + bmp->db_maxlevel = le32_to_cpu(dbmp_le->dn_maxlevel); + bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag); + bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref); +@@ -187,6 +193,16 @@ int dbMount(struct inode *ipbmap) + bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth); + bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart); + bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size); ++ if (bmp->db_agl2size > L2MAXL2SIZE - L2MAXAG) { ++ err = -EINVAL; ++ goto err_release_metapage; ++ } ++ ++ if (((bmp->db_mapsize - 1) >> bmp->db_agl2size) > MAXAG) { ++ err = -EINVAL; ++ goto err_release_metapage; ++ } ++ + for (i = 0; i < MAXAG; i++) + bmp->db_agfree[i] = le64_to_cpu(dbmp_le->dn_agfree[i]); + bmp->db_agsize = le64_to_cpu(dbmp_le->dn_agsize); +@@ -207,6 +223,12 @@ int dbMount(struct inode *ipbmap) + BMAP_LOCK_INIT(bmp); + + return (0); ++ ++err_release_metapage: ++ release_metapage(mp); ++err_kfree_bmp: ++ kfree(bmp); ++ return err; + } + + +@@ -378,7 +400,8 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks) + } + + /* write the last buffer. */ +- write_metapage(mp); ++ if (mp) ++ write_metapage(mp); + + IREAD_UNLOCK(ipbmap); + +diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c +index 5d7d7170c03c0..aa4ff7bcaff23 100644 +--- a/fs/jfs/jfs_mount.c ++++ b/fs/jfs/jfs_mount.c +@@ -81,14 +81,14 @@ int jfs_mount(struct super_block *sb) + * (initialize mount inode from the superblock) + */ + if ((rc = chkSuper(sb))) { +- goto errout20; ++ goto out; + } + + ipaimap = diReadSpecial(sb, AGGREGATE_I, 0); + if (ipaimap == NULL) { + jfs_err("jfs_mount: Failed to read AGGREGATE_I"); + rc = -EIO; +- goto errout20; ++ goto out; + } + sbi->ipaimap = ipaimap; + +@@ -99,7 +99,7 @@ int jfs_mount(struct super_block *sb) + */ + if ((rc = diMount(ipaimap))) { + jfs_err("jfs_mount: diMount(ipaimap) failed w/rc = %d", rc); +- goto errout21; ++ goto err_ipaimap; + } + + /* +@@ -108,7 +108,7 @@ int jfs_mount(struct super_block *sb) + ipbmap = diReadSpecial(sb, BMAP_I, 0); + if (ipbmap == NULL) { + rc = -EIO; +- goto errout22; ++ goto err_umount_ipaimap; + } + + jfs_info("jfs_mount: ipbmap:0x%p", ipbmap); +@@ -120,7 +120,7 @@ int jfs_mount(struct super_block *sb) + */ + if ((rc = dbMount(ipbmap))) { + jfs_err("jfs_mount: dbMount failed w/rc = %d", rc); +- goto errout22; ++ goto err_ipbmap; + } + + /* +@@ -139,7 +139,7 @@ int jfs_mount(struct super_block *sb) + if (!ipaimap2) { + jfs_err("jfs_mount: Failed to read AGGREGATE_I"); + rc = -EIO; +- goto errout35; ++ goto err_umount_ipbmap; + } + sbi->ipaimap2 = ipaimap2; + +@@ -151,7 +151,7 @@ int jfs_mount(struct super_block *sb) + if ((rc = diMount(ipaimap2))) { + jfs_err("jfs_mount: diMount(ipaimap2) failed, rc = %d", + rc); +- goto errout35; ++ goto err_ipaimap2; + } + } else + /* Secondary aggregate inode table is not valid */ +@@ -168,7 +168,7 @@ int jfs_mount(struct super_block *sb) + jfs_err("jfs_mount: Failed to read FILESYSTEM_I"); + /* open fileset secondary inode allocation map */ + rc = -EIO; +- goto errout40; ++ goto err_umount_ipaimap2; + } + jfs_info("jfs_mount: ipimap:0x%p", ipimap); + +@@ -178,41 +178,34 @@ int jfs_mount(struct super_block *sb) + /* initialize fileset inode allocation map */ + if ((rc = diMount(ipimap))) { + jfs_err("jfs_mount: diMount failed w/rc = %d", rc); +- goto errout41; ++ goto err_ipimap; + } + +- goto out; ++ return rc; + + /* + * unwind on error + */ +- errout41: /* close fileset inode allocation map inode */ ++err_ipimap: ++ /* close fileset inode allocation map inode */ + diFreeSpecial(ipimap); +- +- errout40: /* fileset closed */ +- ++err_umount_ipaimap2: + /* close secondary aggregate inode allocation map */ +- if (ipaimap2) { ++ if (ipaimap2) + diUnmount(ipaimap2, 1); ++err_ipaimap2: ++ /* close aggregate inodes */ ++ if (ipaimap2) + diFreeSpecial(ipaimap2); +- } +- +- errout35: +- +- /* close aggregate block allocation map */ ++err_umount_ipbmap: /* close aggregate block allocation map */ + dbUnmount(ipbmap, 1); ++err_ipbmap: /* close aggregate inodes */ + diFreeSpecial(ipbmap); +- +- errout22: /* close aggregate inode allocation map */ +- ++err_umount_ipaimap: /* close aggregate inode allocation map */ + diUnmount(ipaimap, 1); +- +- errout21: /* close aggregate inodes */ ++err_ipaimap: /* close aggregate inodes */ + diFreeSpecial(ipaimap); +- errout20: /* aggregate closed */ +- +- out: +- ++out: + if (rc) + jfs_err("Mount JFS Failure: %d", rc); + +diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c +index 9db4f5789c0ec..4fbbf88435e69 100644 +--- a/fs/jfs/namei.c ++++ b/fs/jfs/namei.c +@@ -946,7 +946,7 @@ static int jfs_symlink(struct user_namespace *mnt_userns, struct inode *dip, + if (ssize <= IDATASIZE) { + ip->i_op = &jfs_fast_symlink_inode_operations; + +- ip->i_link = JFS_IP(ip)->i_inline; ++ ip->i_link = JFS_IP(ip)->i_inline_all; + memcpy(ip->i_link, name, ssize); + ip->i_size = ssize - 1; + +diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c +index 8e0a1378a4b1f..90677cfbcf9c2 100644 +--- a/fs/kernfs/dir.c ++++ b/fs/kernfs/dir.c +@@ -19,7 +19,15 @@ + + DECLARE_RWSEM(kernfs_rwsem); + static DEFINE_SPINLOCK(kernfs_rename_lock); /* kn->parent and ->name */ +-static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by rename_lock */ ++/* ++ * Don't use rename_lock to piggy back on pr_cont_buf. We don't want to ++ * call pr_cont() while holding rename_lock. Because sometimes pr_cont() ++ * will perform wakeups when releasing console_sem. Holding rename_lock ++ * will introduce deadlock if the scheduler reads the kernfs_name in the ++ * wakeup path. ++ */ ++static DEFINE_SPINLOCK(kernfs_pr_cont_lock); ++static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by pr_cont_lock */ + static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */ + + #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb) +@@ -230,12 +238,12 @@ void pr_cont_kernfs_name(struct kernfs_node *kn) + { + unsigned long flags; + +- spin_lock_irqsave(&kernfs_rename_lock, flags); ++ spin_lock_irqsave(&kernfs_pr_cont_lock, flags); + +- kernfs_name_locked(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf)); ++ kernfs_name(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf)); + pr_cont("%s", kernfs_pr_cont_buf); + +- spin_unlock_irqrestore(&kernfs_rename_lock, flags); ++ spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags); + } + + /** +@@ -249,10 +257,10 @@ void pr_cont_kernfs_path(struct kernfs_node *kn) + unsigned long flags; + int sz; + +- spin_lock_irqsave(&kernfs_rename_lock, flags); ++ spin_lock_irqsave(&kernfs_pr_cont_lock, flags); + +- sz = kernfs_path_from_node_locked(kn, NULL, kernfs_pr_cont_buf, +- sizeof(kernfs_pr_cont_buf)); ++ sz = kernfs_path_from_node(kn, NULL, kernfs_pr_cont_buf, ++ sizeof(kernfs_pr_cont_buf)); + if (sz < 0) { + pr_cont("(error)"); + goto out; +@@ -266,7 +274,7 @@ void pr_cont_kernfs_path(struct kernfs_node *kn) + pr_cont("%s", kernfs_pr_cont_buf); + + out: +- spin_unlock_irqrestore(&kernfs_rename_lock, flags); ++ spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags); + } + + /** +@@ -822,13 +830,12 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent, + + lockdep_assert_held_read(&kernfs_rwsem); + +- /* grab kernfs_rename_lock to piggy back on kernfs_pr_cont_buf */ +- spin_lock_irq(&kernfs_rename_lock); ++ spin_lock_irq(&kernfs_pr_cont_lock); + + len = strlcpy(kernfs_pr_cont_buf, path, sizeof(kernfs_pr_cont_buf)); + + if (len >= sizeof(kernfs_pr_cont_buf)) { +- spin_unlock_irq(&kernfs_rename_lock); ++ spin_unlock_irq(&kernfs_pr_cont_lock); + return NULL; + } + +@@ -840,7 +847,7 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent, + parent = kernfs_find_ns(parent, name, ns); + } + +- spin_unlock_irq(&kernfs_rename_lock); ++ spin_unlock_irq(&kernfs_pr_cont_lock); + + return parent; + } +@@ -1540,8 +1547,11 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, + down_write(&kernfs_rwsem); + + kn = kernfs_find_ns(parent, name, ns); +- if (kn) ++ if (kn) { ++ kernfs_get(kn); + __kernfs_remove(kn); ++ kernfs_put(kn); ++ } + + up_write(&kernfs_rwsem); + +diff --git a/fs/ksmbd/Kconfig b/fs/ksmbd/Kconfig +index b83cbd756ae50..6af339cfdc041 100644 +--- a/fs/ksmbd/Kconfig ++++ b/fs/ksmbd/Kconfig +@@ -19,6 +19,7 @@ config SMB_SERVER + select CRYPTO_GCM + select ASN1 + select OID_REGISTRY ++ select CRC32 + default n + help + Choose Y here if you want to allow SMB3 compliant clients +diff --git a/fs/ksmbd/auth.c b/fs/ksmbd/auth.c +index 30a92ddc18174..b962b16e5aeb7 100644 +--- a/fs/ksmbd/auth.c ++++ b/fs/ksmbd/auth.c +@@ -319,7 +319,8 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob, + dn_off = le32_to_cpu(authblob->DomainName.BufferOffset); + dn_len = le16_to_cpu(authblob->DomainName.Length); + +- if (blob_len < (u64)dn_off + dn_len || blob_len < (u64)nt_off + nt_len) ++ if (blob_len < (u64)dn_off + dn_len || blob_len < (u64)nt_off + nt_len || ++ nt_len < CIFS_ENCPWD_SIZE) + return -EINVAL; + + /* TODO : use domain name that imported from configuration file */ +diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c +index b57a0d8a392ff..ce0edf926c2af 100644 +--- a/fs/ksmbd/connection.c ++++ b/fs/ksmbd/connection.c +@@ -62,6 +62,7 @@ struct ksmbd_conn *ksmbd_conn_alloc(void) + atomic_set(&conn->req_running, 0); + atomic_set(&conn->r_count, 0); + conn->total_credits = 1; ++ conn->outstanding_credits = 0; + + init_waitqueue_head(&conn->req_running_q); + INIT_LIST_HEAD(&conn->conns_list); +@@ -273,7 +274,7 @@ int ksmbd_conn_handler_loop(void *p) + { + struct ksmbd_conn *conn = (struct ksmbd_conn *)p; + struct ksmbd_transport *t = conn->transport; +- unsigned int pdu_size; ++ unsigned int pdu_size, max_allowed_pdu_size; + char hdr_buf[4] = {0,}; + int size; + +@@ -298,20 +299,36 @@ int ksmbd_conn_handler_loop(void *p) + pdu_size = get_rfc1002_len(hdr_buf); + ksmbd_debug(CONN, "RFC1002 header %u bytes\n", pdu_size); + ++ if (conn->status == KSMBD_SESS_GOOD) ++ max_allowed_pdu_size = ++ SMB3_MAX_MSGSIZE + conn->vals->max_write_size; ++ else ++ max_allowed_pdu_size = SMB3_MAX_MSGSIZE; ++ ++ if (pdu_size > max_allowed_pdu_size) { ++ pr_err_ratelimited("PDU length(%u) excceed maximum allowed pdu size(%u) on connection(%d)\n", ++ pdu_size, max_allowed_pdu_size, ++ conn->status); ++ break; ++ } ++ + /* + * Check if pdu size is valid (min : smb header size, + * max : 0x00FFFFFF). + */ + if (pdu_size < __SMB2_HEADER_STRUCTURE_SIZE || + pdu_size > MAX_STREAM_PROT_LEN) { +- continue; ++ break; + } + + /* 4 for rfc1002 length field */ + size = pdu_size + 4; +- conn->request_buf = kvmalloc(size, GFP_KERNEL); ++ conn->request_buf = kvmalloc(size, ++ GFP_KERNEL | ++ __GFP_NOWARN | ++ __GFP_NORETRY); + if (!conn->request_buf) +- continue; ++ break; + + memcpy(conn->request_buf, hdr_buf, sizeof(hdr_buf)); + if (!ksmbd_smb_request(conn)) +diff --git a/fs/ksmbd/connection.h b/fs/ksmbd/connection.h +index e5403c587a58c..8694aef482c1a 100644 +--- a/fs/ksmbd/connection.h ++++ b/fs/ksmbd/connection.h +@@ -61,8 +61,8 @@ struct ksmbd_conn { + atomic_t req_running; + /* References which are made for this Server object*/ + atomic_t r_count; +- unsigned short total_credits; +- unsigned short max_credits; ++ unsigned int total_credits; ++ unsigned int outstanding_credits; + spinlock_t credits_lock; + wait_queue_head_t req_running_q; + /* Lock to protect requests list*/ +diff --git a/fs/ksmbd/ksmbd_netlink.h b/fs/ksmbd/ksmbd_netlink.h +index c6718a05d347f..fae859d59c792 100644 +--- a/fs/ksmbd/ksmbd_netlink.h ++++ b/fs/ksmbd/ksmbd_netlink.h +@@ -103,6 +103,10 @@ struct ksmbd_startup_request { + * we set the SPARSE_FILES bit (0x40). + */ + __u32 sub_auth[3]; /* Subauth value for Security ID */ ++ __u32 smb2_max_credits; /* MAX credits */ ++ __u32 smbd_max_io_size; /* smbd read write size */ ++ __u32 max_connections; /* Number of maximum simultaneous connections */ ++ __u32 reserved[126]; /* Reserved room */ + __u32 ifc_list_sz; /* interfaces list size */ + __s8 ____payload[]; + }; +@@ -113,7 +117,7 @@ struct ksmbd_startup_request { + * IPC request to shutdown ksmbd server. + */ + struct ksmbd_shutdown_request { +- __s32 reserved; ++ __s32 reserved[16]; + }; + + /* +@@ -122,6 +126,7 @@ struct ksmbd_shutdown_request { + struct ksmbd_login_request { + __u32 handle; + __s8 account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ]; /* user account name */ ++ __u32 reserved[16]; /* Reserved room */ + }; + + /* +@@ -135,6 +140,7 @@ struct ksmbd_login_response { + __u16 status; + __u16 hash_sz; /* hash size */ + __s8 hash[KSMBD_REQ_MAX_HASH_SZ]; /* password hash */ ++ __u32 reserved[16]; /* Reserved room */ + }; + + /* +@@ -143,6 +149,7 @@ struct ksmbd_login_response { + struct ksmbd_share_config_request { + __u32 handle; + __s8 share_name[KSMBD_REQ_MAX_SHARE_NAME]; /* share name */ ++ __u32 reserved[16]; /* Reserved room */ + }; + + /* +@@ -157,6 +164,7 @@ struct ksmbd_share_config_response { + __u16 force_directory_mode; + __u16 force_uid; + __u16 force_gid; ++ __u32 reserved[128]; /* Reserved room */ + __u32 veto_list_sz; + __s8 ____payload[]; + }; +@@ -187,6 +195,7 @@ struct ksmbd_tree_connect_request { + __s8 account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ]; + __s8 share[KSMBD_REQ_MAX_SHARE_NAME]; + __s8 peer_addr[64]; ++ __u32 reserved[16]; /* Reserved room */ + }; + + /* +@@ -196,6 +205,7 @@ struct ksmbd_tree_connect_response { + __u32 handle; + __u16 status; + __u16 connection_flags; ++ __u32 reserved[16]; /* Reserved room */ + }; + + /* +@@ -204,6 +214,7 @@ struct ksmbd_tree_connect_response { + struct ksmbd_tree_disconnect_request { + __u64 session_id; /* session id */ + __u64 connect_id; /* tree connection id */ ++ __u32 reserved[16]; /* Reserved room */ + }; + + /* +@@ -212,6 +223,7 @@ struct ksmbd_tree_disconnect_request { + struct ksmbd_logout_request { + __s8 account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ]; /* user account name */ + __u32 account_flags; ++ __u32 reserved[16]; /* Reserved room */ + }; + + /* +diff --git a/fs/ksmbd/mgmt/tree_connect.c b/fs/ksmbd/mgmt/tree_connect.c +index 0d28e723a28c7..940385c6a9135 100644 +--- a/fs/ksmbd/mgmt/tree_connect.c ++++ b/fs/ksmbd/mgmt/tree_connect.c +@@ -18,7 +18,7 @@ + struct ksmbd_tree_conn_status + ksmbd_tree_conn_connect(struct ksmbd_session *sess, char *share_name) + { +- struct ksmbd_tree_conn_status status = {-EINVAL, NULL}; ++ struct ksmbd_tree_conn_status status = {-ENOENT, NULL}; + struct ksmbd_tree_connect_response *resp = NULL; + struct ksmbd_share_config *sc; + struct ksmbd_tree_connect *tree_conn = NULL; +diff --git a/fs/ksmbd/mgmt/user_session.c b/fs/ksmbd/mgmt/user_session.c +index 8d8ffd8c6f192..0fa467f2c8973 100644 +--- a/fs/ksmbd/mgmt/user_session.c ++++ b/fs/ksmbd/mgmt/user_session.c +@@ -106,15 +106,17 @@ int ksmbd_session_rpc_open(struct ksmbd_session *sess, char *rpc_name) + entry->method = method; + entry->id = ksmbd_ipc_id_alloc(); + if (entry->id < 0) +- goto error; ++ goto free_entry; + + resp = ksmbd_rpc_open(sess, entry->id); + if (!resp) +- goto error; ++ goto free_id; + + kvfree(resp); + return entry->id; +-error: ++free_id: ++ ksmbd_rpc_id_free(entry->id); ++free_entry: + list_del(&entry->list); + kfree(entry); + return -EINVAL; +diff --git a/fs/ksmbd/ndr.c b/fs/ksmbd/ndr.c +index 8317f7ca402b4..28f44f0c918c9 100644 +--- a/fs/ksmbd/ndr.c ++++ b/fs/ksmbd/ndr.c +@@ -148,7 +148,7 @@ static int ndr_read_int16(struct ndr *n, __u16 *value) + static int ndr_read_int32(struct ndr *n, __u32 *value) + { + if (n->offset + sizeof(__u32) > n->length) +- return 0; ++ return -EINVAL; + + if (value) + *value = le32_to_cpu(*(__le32 *)ndr_get_field(n)); +@@ -242,7 +242,7 @@ int ndr_decode_dos_attr(struct ndr *n, struct xattr_dos_attrib *da) + return ret; + + if (da->version != 3 && da->version != 4) { +- pr_err("v%d version is not supported\n", da->version); ++ ksmbd_debug(VFS, "v%d version is not supported\n", da->version); + return -EINVAL; + } + +@@ -251,7 +251,7 @@ int ndr_decode_dos_attr(struct ndr *n, struct xattr_dos_attrib *da) + return ret; + + if (da->version != version2) { +- pr_err("ndr version mismatched(version: %d, version2: %d)\n", ++ ksmbd_debug(VFS, "ndr version mismatched(version: %d, version2: %d)\n", + da->version, version2); + return -EINVAL; + } +@@ -453,7 +453,7 @@ int ndr_decode_v4_ntacl(struct ndr *n, struct xattr_ntacl *acl) + if (ret) + return ret; + if (acl->version != 4) { +- pr_err("v%d version is not supported\n", acl->version); ++ ksmbd_debug(VFS, "v%d version is not supported\n", acl->version); + return -EINVAL; + } + +@@ -461,7 +461,7 @@ int ndr_decode_v4_ntacl(struct ndr *n, struct xattr_ntacl *acl) + if (ret) + return ret; + if (acl->version != version2) { +- pr_err("ndr version mismatched(version: %d, version2: %d)\n", ++ ksmbd_debug(VFS, "ndr version mismatched(version: %d, version2: %d)\n", + acl->version, version2); + return -EINVAL; + } +diff --git a/fs/ksmbd/server.c b/fs/ksmbd/server.c +index 2a2b2135bfded..976d09aaee703 100644 +--- a/fs/ksmbd/server.c ++++ b/fs/ksmbd/server.c +@@ -235,10 +235,8 @@ send: + if (work->sess && work->sess->enc && work->encrypted && + conn->ops->encrypt_resp) { + rc = conn->ops->encrypt_resp(work); +- if (rc < 0) { ++ if (rc < 0) + conn->ops->set_rsp_status(work, STATUS_DATA_ERROR); +- goto send; +- } + } + + ksmbd_conn_write(work); +@@ -632,5 +630,6 @@ MODULE_SOFTDEP("pre: sha512"); + MODULE_SOFTDEP("pre: aead2"); + MODULE_SOFTDEP("pre: ccm"); + MODULE_SOFTDEP("pre: gcm"); ++MODULE_SOFTDEP("pre: crc32"); + module_init(ksmbd_server_init) + module_exit(ksmbd_server_exit) +diff --git a/fs/ksmbd/server.h b/fs/ksmbd/server.h +index ac9d932f8c8aa..db72781817603 100644 +--- a/fs/ksmbd/server.h ++++ b/fs/ksmbd/server.h +@@ -41,6 +41,7 @@ struct ksmbd_server_config { + unsigned int share_fake_fscaps; + struct smb_sid domain_sid; + unsigned int auth_mechs; ++ unsigned int max_connections; + + char *conf[SERVER_CONF_WORK_GROUP + 1]; + }; +diff --git a/fs/ksmbd/smb2misc.c b/fs/ksmbd/smb2misc.c +index 030ca57c37849..b47be71be4c82 100644 +--- a/fs/ksmbd/smb2misc.c ++++ b/fs/ksmbd/smb2misc.c +@@ -91,11 +91,6 @@ static int smb2_get_data_area_len(unsigned int *off, unsigned int *len, + *off = 0; + *len = 0; + +- /* error reqeusts do not have data area */ +- if (hdr->Status && hdr->Status != STATUS_MORE_PROCESSING_REQUIRED && +- (((struct smb2_err_rsp *)hdr)->StructureSize) == SMB2_ERROR_STRUCTURE_SIZE2_LE) +- return ret; +- + /* + * Following commands have data areas so we have to get the location + * of the data buffer offset and data buffer length for the particular +@@ -137,8 +132,11 @@ static int smb2_get_data_area_len(unsigned int *off, unsigned int *len, + *len = le16_to_cpu(((struct smb2_read_req *)hdr)->ReadChannelInfoLength); + break; + case SMB2_WRITE: +- if (((struct smb2_write_req *)hdr)->DataOffset) { +- *off = le16_to_cpu(((struct smb2_write_req *)hdr)->DataOffset); ++ if (((struct smb2_write_req *)hdr)->DataOffset || ++ ((struct smb2_write_req *)hdr)->Length) { ++ *off = max_t(unsigned int, ++ le16_to_cpu(((struct smb2_write_req *)hdr)->DataOffset), ++ offsetof(struct smb2_write_req, Buffer) - 4); + *len = le32_to_cpu(((struct smb2_write_req *)hdr)->Length); + break; + } +@@ -290,7 +288,7 @@ static int smb2_validate_credit_charge(struct ksmbd_conn *conn, + unsigned int req_len = 0, expect_resp_len = 0, calc_credit_num, max_len; + unsigned short credit_charge = le16_to_cpu(hdr->CreditCharge); + void *__hdr = hdr; +- int ret; ++ int ret = 0; + + switch (hdr->Command) { + case SMB2_QUERY_INFO: +@@ -327,21 +325,27 @@ static int smb2_validate_credit_charge(struct ksmbd_conn *conn, + ksmbd_debug(SMB, "Insufficient credit charge, given: %d, needed: %d\n", + credit_charge, calc_credit_num); + return 1; +- } else if (credit_charge > conn->max_credits) { ++ } else if (credit_charge > conn->vals->max_credits) { + ksmbd_debug(SMB, "Too large credit charge: %d\n", credit_charge); + return 1; + } + + spin_lock(&conn->credits_lock); +- if (credit_charge <= conn->total_credits) { +- conn->total_credits -= credit_charge; +- ret = 0; +- } else { ++ if (credit_charge > conn->total_credits) { + ksmbd_debug(SMB, "Insufficient credits granted, given: %u, granted: %u\n", + credit_charge, conn->total_credits); + ret = 1; + } ++ ++ if ((u64)conn->outstanding_credits + credit_charge > conn->total_credits) { ++ ksmbd_debug(SMB, "Limits exceeding the maximum allowable outstanding requests, given : %u, pending : %u\n", ++ credit_charge, conn->outstanding_credits); ++ ret = 1; ++ } else ++ conn->outstanding_credits += credit_charge; ++ + spin_unlock(&conn->credits_lock); ++ + return ret; + } + +@@ -358,12 +362,10 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work) + hdr = &pdu->hdr; + } + +- if (le32_to_cpu(hdr->NextCommand) > 0) { ++ if (le32_to_cpu(hdr->NextCommand) > 0) + len = le32_to_cpu(hdr->NextCommand); +- } else if (work->next_smb2_rcv_hdr_off) { ++ else if (work->next_smb2_rcv_hdr_off) + len -= work->next_smb2_rcv_hdr_off; +- len = round_up(len, 8); +- } + + if (check_smb2_hdr(hdr)) + return 1; +diff --git a/fs/ksmbd/smb2ops.c b/fs/ksmbd/smb2ops.c +index fb6a65d231391..f0a5b704f301c 100644 +--- a/fs/ksmbd/smb2ops.c ++++ b/fs/ksmbd/smb2ops.c +@@ -20,6 +20,7 @@ static struct smb_version_values smb21_server_values = { + .max_read_size = SMB21_DEFAULT_IOSIZE, + .max_write_size = SMB21_DEFAULT_IOSIZE, + .max_trans_size = SMB21_DEFAULT_IOSIZE, ++ .max_credits = SMB2_MAX_CREDITS, + .large_lock_type = 0, + .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE, + .shared_lock_type = SMB2_LOCKFLAG_SHARED, +@@ -45,6 +46,7 @@ static struct smb_version_values smb30_server_values = { + .max_read_size = SMB3_DEFAULT_IOSIZE, + .max_write_size = SMB3_DEFAULT_IOSIZE, + .max_trans_size = SMB3_DEFAULT_TRANS_SIZE, ++ .max_credits = SMB2_MAX_CREDITS, + .large_lock_type = 0, + .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE, + .shared_lock_type = SMB2_LOCKFLAG_SHARED, +@@ -71,6 +73,7 @@ static struct smb_version_values smb302_server_values = { + .max_read_size = SMB3_DEFAULT_IOSIZE, + .max_write_size = SMB3_DEFAULT_IOSIZE, + .max_trans_size = SMB3_DEFAULT_TRANS_SIZE, ++ .max_credits = SMB2_MAX_CREDITS, + .large_lock_type = 0, + .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE, + .shared_lock_type = SMB2_LOCKFLAG_SHARED, +@@ -97,6 +100,7 @@ static struct smb_version_values smb311_server_values = { + .max_read_size = SMB3_DEFAULT_IOSIZE, + .max_write_size = SMB3_DEFAULT_IOSIZE, + .max_trans_size = SMB3_DEFAULT_TRANS_SIZE, ++ .max_credits = SMB2_MAX_CREDITS, + .large_lock_type = 0, + .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE, + .shared_lock_type = SMB2_LOCKFLAG_SHARED, +@@ -198,7 +202,6 @@ void init_smb2_1_server(struct ksmbd_conn *conn) + conn->ops = &smb2_0_server_ops; + conn->cmds = smb2_0_server_cmds; + conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds); +- conn->max_credits = SMB2_MAX_CREDITS; + conn->signing_algorithm = SIGNING_ALG_HMAC_SHA256; + + if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES) +@@ -216,7 +219,6 @@ void init_smb3_0_server(struct ksmbd_conn *conn) + conn->ops = &smb3_0_server_ops; + conn->cmds = smb2_0_server_cmds; + conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds); +- conn->max_credits = SMB2_MAX_CREDITS; + conn->signing_algorithm = SIGNING_ALG_AES_CMAC; + + if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES) +@@ -241,7 +243,6 @@ void init_smb3_02_server(struct ksmbd_conn *conn) + conn->ops = &smb3_0_server_ops; + conn->cmds = smb2_0_server_cmds; + conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds); +- conn->max_credits = SMB2_MAX_CREDITS; + conn->signing_algorithm = SIGNING_ALG_AES_CMAC; + + if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES) +@@ -266,15 +267,11 @@ int init_smb3_11_server(struct ksmbd_conn *conn) + conn->ops = &smb3_11_server_ops; + conn->cmds = smb2_0_server_cmds; + conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds); +- conn->max_credits = SMB2_MAX_CREDITS; + conn->signing_algorithm = SIGNING_ALG_AES_CMAC; + + if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES) + conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING; + +- if (conn->cipher_type) +- conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION; +- + if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL) + conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL; + +@@ -308,3 +305,11 @@ void init_smb2_max_trans_size(unsigned int sz) + smb302_server_values.max_trans_size = sz; + smb311_server_values.max_trans_size = sz; + } ++ ++void init_smb2_max_credits(unsigned int sz) ++{ ++ smb21_server_values.max_credits = sz; ++ smb30_server_values.max_credits = sz; ++ smb302_server_values.max_credits = sz; ++ smb311_server_values.max_credits = sz; ++} +diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c +index 7e448df3f8474..ac029dfd23ab8 100644 +--- a/fs/ksmbd/smb2pdu.c ++++ b/fs/ksmbd/smb2pdu.c +@@ -11,6 +11,7 @@ + #include <linux/statfs.h> + #include <linux/ethtool.h> + #include <linux/falloc.h> ++#include <linux/mount.h> + + #include "glob.h" + #include "smb2pdu.h" +@@ -301,16 +302,15 @@ int smb2_set_rsp_credits(struct ksmbd_work *work) + struct smb2_hdr *req_hdr = ksmbd_req_buf_next(work); + struct smb2_hdr *hdr = ksmbd_resp_buf_next(work); + struct ksmbd_conn *conn = work->conn; +- unsigned short credits_requested; ++ unsigned short credits_requested, aux_max; + unsigned short credit_charge, credits_granted = 0; +- unsigned short aux_max, aux_credits; + + if (work->send_no_response) + return 0; + + hdr->CreditCharge = req_hdr->CreditCharge; + +- if (conn->total_credits > conn->max_credits) { ++ if (conn->total_credits > conn->vals->max_credits) { + hdr->CreditRequest = 0; + pr_err("Total credits overflow: %d\n", conn->total_credits); + return -EINVAL; +@@ -318,6 +318,14 @@ int smb2_set_rsp_credits(struct ksmbd_work *work) + + credit_charge = max_t(unsigned short, + le16_to_cpu(req_hdr->CreditCharge), 1); ++ if (credit_charge > conn->total_credits) { ++ ksmbd_debug(SMB, "Insufficient credits granted, given: %u, granted: %u\n", ++ credit_charge, conn->total_credits); ++ return -EINVAL; ++ } ++ ++ conn->total_credits -= credit_charge; ++ conn->outstanding_credits -= credit_charge; + credits_requested = max_t(unsigned short, + le16_to_cpu(req_hdr->CreditRequest), 1); + +@@ -327,16 +335,14 @@ int smb2_set_rsp_credits(struct ksmbd_work *work) + * TODO: Need to adjuct CreditRequest value according to + * current cpu load + */ +- aux_credits = credits_requested - 1; + if (hdr->Command == SMB2_NEGOTIATE) +- aux_max = 0; ++ aux_max = 1; + else +- aux_max = conn->max_credits - credit_charge; +- aux_credits = min_t(unsigned short, aux_credits, aux_max); +- credits_granted = credit_charge + aux_credits; ++ aux_max = conn->vals->max_credits - credit_charge; ++ credits_granted = min_t(unsigned short, credits_requested, aux_max); + +- if (conn->max_credits - conn->total_credits < credits_granted) +- credits_granted = conn->max_credits - ++ if (conn->vals->max_credits - conn->total_credits < credits_granted) ++ credits_granted = conn->vals->max_credits - + conn->total_credits; + + conn->total_credits += credits_granted; +@@ -535,9 +541,10 @@ int smb2_allocate_rsp_buf(struct ksmbd_work *work) + struct smb2_query_info_req *req; + + req = work->request_buf; +- if (req->InfoType == SMB2_O_INFO_FILE && +- (req->FileInfoClass == FILE_FULL_EA_INFORMATION || +- req->FileInfoClass == FILE_ALL_INFORMATION)) ++ if ((req->InfoType == SMB2_O_INFO_FILE && ++ (req->FileInfoClass == FILE_FULL_EA_INFORMATION || ++ req->FileInfoClass == FILE_ALL_INFORMATION)) || ++ req->InfoType == SMB2_O_INFO_SECURITY) + sz = large_sz; + } + +@@ -917,6 +924,25 @@ static void decode_encrypt_ctxt(struct ksmbd_conn *conn, + } + } + ++/** ++ * smb3_encryption_negotiated() - checks if server and client agreed on enabling encryption ++ * @conn: smb connection ++ * ++ * Return: true if connection should be encrypted, else false ++ */ ++static bool smb3_encryption_negotiated(struct ksmbd_conn *conn) ++{ ++ if (!conn->ops->generate_encryptionkey) ++ return false; ++ ++ /* ++ * SMB 3.0 and 3.0.2 dialects use the SMB2_GLOBAL_CAP_ENCRYPTION flag. ++ * SMB 3.1.1 uses the cipher_type field. ++ */ ++ return (conn->vals->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION) || ++ conn->cipher_type; ++} ++ + static void decode_compress_ctxt(struct ksmbd_conn *conn, + struct smb2_compression_ctx *pneg_ctxt) + { +@@ -1121,12 +1147,16 @@ int smb2_handle_negotiate(struct ksmbd_work *work) + status); + rsp->hdr.Status = status; + rc = -EINVAL; ++ kfree(conn->preauth_info); ++ conn->preauth_info = NULL; + goto err_out; + } + + rc = init_smb3_11_server(conn); + if (rc < 0) { + rsp->hdr.Status = STATUS_INVALID_PARAMETER; ++ kfree(conn->preauth_info); ++ conn->preauth_info = NULL; + goto err_out; + } + +@@ -1438,11 +1468,6 @@ static int ntlm_authenticate(struct ksmbd_work *work) + + sess->user = user; + if (user_guest(sess->user)) { +- if (conn->sign) { +- ksmbd_debug(SMB, "Guest login not allowed when signing enabled\n"); +- return -EPERM; +- } +- + rsp->SessionFlags = SMB2_SESSION_FLAG_IS_GUEST_LE; + } else { + struct authenticate_message *authblob; +@@ -1455,39 +1480,39 @@ static int ntlm_authenticate(struct ksmbd_work *work) + ksmbd_debug(SMB, "authentication failed\n"); + return -EPERM; + } ++ } + +- /* +- * If session state is SMB2_SESSION_VALID, We can assume +- * that it is reauthentication. And the user/password +- * has been verified, so return it here. +- */ +- if (sess->state == SMB2_SESSION_VALID) { +- if (conn->binding) +- goto binding_session; +- return 0; +- } ++ /* ++ * If session state is SMB2_SESSION_VALID, We can assume ++ * that it is reauthentication. And the user/password ++ * has been verified, so return it here. ++ */ ++ if (sess->state == SMB2_SESSION_VALID) { ++ if (conn->binding) ++ goto binding_session; ++ return 0; ++ } + +- if ((conn->sign || server_conf.enforced_signing) || +- (req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED)) +- sess->sign = true; ++ if ((rsp->SessionFlags != SMB2_SESSION_FLAG_IS_GUEST_LE && ++ (conn->sign || server_conf.enforced_signing)) || ++ (req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED)) ++ sess->sign = true; + +- if (conn->vals->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION && +- conn->ops->generate_encryptionkey && +- !(req->Flags & SMB2_SESSION_REQ_FLAG_BINDING)) { +- rc = conn->ops->generate_encryptionkey(sess); +- if (rc) { +- ksmbd_debug(SMB, +- "SMB3 encryption key generation failed\n"); +- return -EINVAL; +- } +- sess->enc = true; +- rsp->SessionFlags = SMB2_SESSION_FLAG_ENCRYPT_DATA_LE; +- /* +- * signing is disable if encryption is enable +- * on this session +- */ +- sess->sign = false; ++ if (smb3_encryption_negotiated(conn) && ++ !(req->Flags & SMB2_SESSION_REQ_FLAG_BINDING)) { ++ rc = conn->ops->generate_encryptionkey(sess); ++ if (rc) { ++ ksmbd_debug(SMB, ++ "SMB3 encryption key generation failed\n"); ++ return -EINVAL; + } ++ sess->enc = true; ++ rsp->SessionFlags = SMB2_SESSION_FLAG_ENCRYPT_DATA_LE; ++ /* ++ * signing is disable if encryption is enable ++ * on this session ++ */ ++ sess->sign = false; + } + + binding_session: +@@ -1562,8 +1587,7 @@ static int krb5_authenticate(struct ksmbd_work *work) + (req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED)) + sess->sign = true; + +- if ((conn->vals->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION) && +- conn->ops->generate_encryptionkey) { ++ if (smb3_encryption_negotiated(conn)) { + retval = conn->ops->generate_encryptionkey(sess); + if (retval) { + ksmbd_debug(SMB, +@@ -1700,8 +1724,10 @@ int smb2_sess_setup(struct ksmbd_work *work) + negblob_off = le16_to_cpu(req->SecurityBufferOffset); + negblob_len = le16_to_cpu(req->SecurityBufferLength); + if (negblob_off < (offsetof(struct smb2_sess_setup_req, Buffer) - 4) || +- negblob_len < offsetof(struct negotiate_message, NegotiateFlags)) +- return -EINVAL; ++ negblob_len < offsetof(struct negotiate_message, NegotiateFlags)) { ++ rc = -EINVAL; ++ goto out_err; ++ } + + negblob = (struct negotiate_message *)((char *)&req->hdr.ProtocolId + + negblob_off); +@@ -1906,8 +1932,9 @@ out_err1: + rsp->hdr.Status = STATUS_SUCCESS; + rc = 0; + break; ++ case -ENOENT: + case KSMBD_TREE_CONN_STATUS_NO_SHARE: +- rsp->hdr.Status = STATUS_BAD_NETWORK_PATH; ++ rsp->hdr.Status = STATUS_BAD_NETWORK_NAME; + break; + case -ENOMEM: + case KSMBD_TREE_CONN_STATUS_NOMEM: +@@ -2019,6 +2046,7 @@ int smb2_tree_disconnect(struct ksmbd_work *work) + + ksmbd_close_tree_conn_fds(work); + ksmbd_tree_conn_disconnect(sess, tcon); ++ work->tcon = NULL; + return 0; + } + +@@ -2291,15 +2319,15 @@ static int smb2_remove_smb_xattrs(struct path *path) + name += strlen(name) + 1) { + ksmbd_debug(SMB, "%s, len %zd\n", name, strlen(name)); + +- if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) && +- strncmp(&name[XATTR_USER_PREFIX_LEN], DOS_ATTRIBUTE_PREFIX, +- DOS_ATTRIBUTE_PREFIX_LEN) && +- strncmp(&name[XATTR_USER_PREFIX_LEN], STREAM_PREFIX, STREAM_PREFIX_LEN)) +- continue; +- +- err = ksmbd_vfs_remove_xattr(user_ns, path->dentry, name); +- if (err) +- ksmbd_debug(SMB, "remove xattr failed : %s\n", name); ++ if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) && ++ !strncmp(&name[XATTR_USER_PREFIX_LEN], STREAM_PREFIX, ++ STREAM_PREFIX_LEN)) { ++ err = ksmbd_vfs_remove_xattr(user_ns, path->dentry, ++ name); ++ if (err) ++ ksmbd_debug(SMB, "remove xattr failed : %s\n", ++ name); ++ } + } + out: + kvfree(xattr_list); +@@ -2670,7 +2698,7 @@ int smb2_open(struct ksmbd_work *work) + (struct create_posix *)context; + if (le16_to_cpu(context->DataOffset) + + le32_to_cpu(context->DataLength) < +- sizeof(struct create_posix)) { ++ sizeof(struct create_posix) - 4) { + rc = -EINVAL; + goto err_out1; + } +@@ -2955,13 +2983,17 @@ int smb2_open(struct ksmbd_work *work) + goto err_out; + + rc = build_sec_desc(user_ns, +- pntsd, NULL, ++ pntsd, NULL, 0, + OWNER_SECINFO | + GROUP_SECINFO | + DACL_SECINFO, + &pntsd_size, &fattr); + posix_acl_release(fattr.cf_acls); + posix_acl_release(fattr.cf_dacls); ++ if (rc) { ++ kfree(pntsd); ++ goto err_out; ++ } + + rc = ksmbd_vfs_set_sd_xattr(conn, + user_ns, +@@ -3398,9 +3430,9 @@ static int smb2_populate_readdir_entry(struct ksmbd_conn *conn, int info_level, + goto free_conv_name; + } + +- struct_sz = readdir_info_level_struct_sz(info_level); +- next_entry_offset = ALIGN(struct_sz - 1 + conv_len, +- KSMBD_DIR_INFO_ALIGNMENT); ++ struct_sz = readdir_info_level_struct_sz(info_level) - 1 + conv_len; ++ next_entry_offset = ALIGN(struct_sz, KSMBD_DIR_INFO_ALIGNMENT); ++ d_info->last_entry_off_align = next_entry_offset - struct_sz; + + if (next_entry_offset > d_info->out_buf_len) { + d_info->out_buf_len = 0; +@@ -3771,11 +3803,6 @@ static int __query_dir(struct dir_context *ctx, const char *name, int namlen, + return 0; + } + +-static void restart_ctx(struct dir_context *ctx) +-{ +- ctx->pos = 0; +-} +- + static int verify_info_level(int info_level) + { + switch (info_level) { +@@ -3794,6 +3821,15 @@ static int verify_info_level(int info_level) + return 0; + } + ++static int smb2_resp_buf_len(struct ksmbd_work *work, unsigned short hdr2_len) ++{ ++ int free_len; ++ ++ free_len = (int)(work->response_sz - ++ (get_rfc1002_len(work->response_buf) + 4)) - hdr2_len; ++ return free_len; ++} ++ + static int smb2_calc_max_out_buf_len(struct ksmbd_work *work, + unsigned short hdr2_len, + unsigned int out_buf_len) +@@ -3803,9 +3839,7 @@ static int smb2_calc_max_out_buf_len(struct ksmbd_work *work, + if (out_buf_len > work->conn->vals->max_trans_size) + return -EINVAL; + +- free_len = (int)(work->response_sz - +- (get_rfc1002_len(work->response_buf) + 4)) - +- hdr2_len; ++ free_len = smb2_resp_buf_len(work, hdr2_len); + if (free_len < 0) + return -EINVAL; + +@@ -3882,7 +3916,6 @@ int smb2_query_dir(struct ksmbd_work *work) + if (srch_flag & SMB2_REOPEN || srch_flag & SMB2_RESTART_SCANS) { + ksmbd_debug(SMB, "Restart directory scan\n"); + generic_file_llseek(dir_fp->filp, 0, SEEK_SET); +- restart_ctx(&dir_fp->readdir_data.ctx); + } + + memset(&d_info, 0, sizeof(struct ksmbd_dir_info)); +@@ -3923,11 +3956,15 @@ int smb2_query_dir(struct ksmbd_work *work) + set_ctx_actor(&dir_fp->readdir_data.ctx, __query_dir); + + rc = iterate_dir(dir_fp->filp, &dir_fp->readdir_data.ctx); +- if (rc == 0) +- restart_ctx(&dir_fp->readdir_data.ctx); +- if (rc == -ENOSPC) ++ /* ++ * req->OutputBufferLength is too small to contain even one entry. ++ * In this case, it immediately returns OutputBufferLength 0 to client. ++ */ ++ if (!d_info.out_buf_len && !d_info.num_entry) ++ goto no_buf_len; ++ if (rc > 0 || rc == -ENOSPC) + rc = 0; +- if (rc) ++ else if (rc) + goto err_out; + + d_info.wptr = d_info.rptr; +@@ -3949,9 +3986,12 @@ int smb2_query_dir(struct ksmbd_work *work) + rsp->Buffer[0] = 0; + inc_rfc1001_len(rsp_org, 9); + } else { ++no_buf_len: + ((struct file_directory_info *) + ((char *)rsp->Buffer + d_info.last_entry_offset)) + ->NextEntryOffset = 0; ++ if (d_info.data_count >= d_info.last_entry_off_align) ++ d_info.data_count -= d_info.last_entry_off_align; + + rsp->StructureSize = cpu_to_le16(9); + rsp->OutputBufferOffset = cpu_to_le16(72); +@@ -3981,6 +4021,8 @@ err_out2: + rsp->hdr.Status = STATUS_NO_MEMORY; + else if (rc == -EFAULT) + rsp->hdr.Status = STATUS_INVALID_INFO_CLASS; ++ else if (rc == -EIO) ++ rsp->hdr.Status = STATUS_FILE_CORRUPT_ERROR; + if (!rsp->hdr.Status) + rsp->hdr.Status = STATUS_UNEXPECTED_IO_ERROR; + +@@ -4450,6 +4492,12 @@ static void get_file_stream_info(struct ksmbd_work *work, + &stat); + file_info = (struct smb2_file_stream_info *)rsp->Buffer; + ++ buf_free_len = ++ smb2_calc_max_out_buf_len(work, 8, ++ le32_to_cpu(req->OutputBufferLength)); ++ if (buf_free_len < 0) ++ goto out; ++ + xattr_list_len = ksmbd_vfs_listxattr(path->dentry, &xattr_list); + if (xattr_list_len < 0) { + goto out; +@@ -4458,12 +4506,6 @@ static void get_file_stream_info(struct ksmbd_work *work, + goto out; + } + +- buf_free_len = +- smb2_calc_max_out_buf_len(work, 8, +- le32_to_cpu(req->OutputBufferLength)); +- if (buf_free_len < 0) +- goto out; +- + while (idx < xattr_list_len) { + stream_name = xattr_list + idx; + streamlen = strlen(stream_name); +@@ -4489,8 +4531,10 @@ static void get_file_stream_info(struct ksmbd_work *work, + ":%s", &stream_name[XATTR_NAME_STREAM_LEN]); + + next = sizeof(struct smb2_file_stream_info) + streamlen * 2; +- if (next > buf_free_len) ++ if (next > buf_free_len) { ++ kfree(stream_buf); + break; ++ } + + file_info = (struct smb2_file_stream_info *)&rsp->Buffer[nbytes]; + streamlen = smbConvertToUTF16((__le16 *)file_info->StreamName, +@@ -4507,6 +4551,7 @@ static void get_file_stream_info(struct ksmbd_work *work, + file_info->NextEntryOffset = cpu_to_le32(next); + } + ++out: + if (!S_ISDIR(stat.mode) && + buf_free_len >= sizeof(struct smb2_file_stream_info) + 7 * 2) { + file_info = (struct smb2_file_stream_info *) +@@ -4515,14 +4560,13 @@ static void get_file_stream_info(struct ksmbd_work *work, + "::$DATA", 7, conn->local_nls, 0); + streamlen *= 2; + file_info->StreamNameLength = cpu_to_le32(streamlen); +- file_info->StreamSize = 0; +- file_info->StreamAllocationSize = 0; ++ file_info->StreamSize = cpu_to_le64(stat.size); ++ file_info->StreamAllocationSize = cpu_to_le64(stat.blocks << 9); + nbytes += sizeof(struct smb2_file_stream_info) + streamlen; + } + + /* last entry offset should be 0 */ + file_info->NextEntryOffset = 0; +-out: + kvfree(xattr_list); + + rsp->OutputBufferLength = cpu_to_le32(nbytes); +@@ -4891,11 +4935,18 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work, + { + struct filesystem_vol_info *info; + size_t sz; ++ unsigned int serial_crc = 0; + + info = (struct filesystem_vol_info *)(rsp->Buffer); + info->VolumeCreationTime = 0; ++ serial_crc = crc32_le(serial_crc, share->name, ++ strlen(share->name)); ++ serial_crc = crc32_le(serial_crc, share->path, ++ strlen(share->path)); ++ serial_crc = crc32_le(serial_crc, ksmbd_netbios_name(), ++ strlen(ksmbd_netbios_name())); + /* Taking dummy value of serial number*/ +- info->SerialNumber = cpu_to_le32(0xbc3ac512); ++ info->SerialNumber = cpu_to_le32(serial_crc); + len = smbConvertToUTF16((__le16 *)info->VolumeLabel, + share->name, PATH_MAX, + conn->local_nls, 0); +@@ -4963,15 +5014,17 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work, + case FS_SECTOR_SIZE_INFORMATION: + { + struct smb3_fs_ss_info *info; ++ unsigned int sector_size = ++ min_t(unsigned int, path.mnt->mnt_sb->s_blocksize, 4096); + + info = (struct smb3_fs_ss_info *)(rsp->Buffer); + +- info->LogicalBytesPerSector = cpu_to_le32(stfs.f_bsize); ++ info->LogicalBytesPerSector = cpu_to_le32(sector_size); + info->PhysicalBytesPerSectorForAtomicity = +- cpu_to_le32(stfs.f_bsize); +- info->PhysicalBytesPerSectorForPerf = cpu_to_le32(stfs.f_bsize); ++ cpu_to_le32(sector_size); ++ info->PhysicalBytesPerSectorForPerf = cpu_to_le32(sector_size); + info->FSEffPhysicalBytesPerSectorForAtomicity = +- cpu_to_le32(stfs.f_bsize); ++ cpu_to_le32(sector_size); + info->Flags = cpu_to_le32(SSINFO_FLAGS_ALIGNED_DEVICE | + SSINFO_FLAGS_PARTITION_ALIGNED_ON_DEVICE); + info->ByteOffsetForSectorAlignment = 0; +@@ -5045,15 +5098,15 @@ static int smb2_get_info_sec(struct ksmbd_work *work, + struct smb_ntsd *pntsd = (struct smb_ntsd *)rsp->Buffer, *ppntsd = NULL; + struct smb_fattr fattr = {{0}}; + struct inode *inode; +- __u32 secdesclen; ++ __u32 secdesclen = 0; + unsigned int id = KSMBD_NO_FID, pid = KSMBD_NO_FID; + int addition_info = le32_to_cpu(req->AdditionalInformation); +- int rc; ++ int rc = 0, ppntsd_size = 0; + + if (addition_info & ~(OWNER_SECINFO | GROUP_SECINFO | DACL_SECINFO | + PROTECTED_DACL_SECINFO | + UNPROTECTED_DACL_SECINFO)) { +- pr_err("Unsupported addition info: 0x%x)\n", ++ ksmbd_debug(SMB, "Unsupported addition info: 0x%x)\n", + addition_info); + + pntsd->revision = cpu_to_le16(1); +@@ -5094,11 +5147,14 @@ static int smb2_get_info_sec(struct ksmbd_work *work, + + if (test_share_config_flag(work->tcon->share_conf, + KSMBD_SHARE_FLAG_ACL_XATTR)) +- ksmbd_vfs_get_sd_xattr(work->conn, user_ns, +- fp->filp->f_path.dentry, &ppntsd); +- +- rc = build_sec_desc(user_ns, pntsd, ppntsd, addition_info, +- &secdesclen, &fattr); ++ ppntsd_size = ksmbd_vfs_get_sd_xattr(work->conn, user_ns, ++ fp->filp->f_path.dentry, ++ &ppntsd); ++ ++ /* Check if sd buffer size exceeds response buffer size */ ++ if (smb2_resp_buf_len(work, 8) > ppntsd_size) ++ rc = build_sec_desc(user_ns, pntsd, ppntsd, ppntsd_size, ++ addition_info, &secdesclen, &fattr); + posix_acl_release(fattr.cf_acls); + posix_acl_release(fattr.cf_dacls); + kfree(ppntsd); +@@ -5734,8 +5790,10 @@ static int set_rename_info(struct ksmbd_work *work, struct ksmbd_file *fp, + if (parent_fp) { + if (parent_fp->daccess & FILE_DELETE_LE) { + pr_err("parent dir is opened with delete access\n"); ++ ksmbd_fd_put(work, parent_fp); + return -ESHARE; + } ++ ksmbd_fd_put(work, parent_fp); + } + next: + return smb2_rename(work, fp, user_ns, rename_info, +@@ -6427,10 +6485,8 @@ int smb2_write(struct ksmbd_work *work) + (offsetof(struct smb2_write_req, Buffer) - 4)) { + data_buf = (char *)&req->Buffer[0]; + } else { +- if ((u64)le16_to_cpu(req->DataOffset) + length > get_rfc1002_len(req)) { +- pr_err("invalid write data offset %u, smb_len %u\n", +- le16_to_cpu(req->DataOffset), +- get_rfc1002_len(req)); ++ if (le16_to_cpu(req->DataOffset) < ++ offsetof(struct smb2_write_req, Buffer)) { + err = -EINVAL; + goto out; + } +@@ -7312,7 +7368,7 @@ static int fsctl_validate_negotiate_info(struct ksmbd_conn *conn, + int ret = 0; + int dialect; + +- if (in_buf_len < sizeof(struct validate_negotiate_info_req) + ++ if (in_buf_len < offsetof(struct validate_negotiate_info_req, Dialects) + + le16_to_cpu(neg_req->DialectCount) * sizeof(__le16)) + return -EINVAL; + +@@ -7563,11 +7619,16 @@ int smb2_ioctl(struct ksmbd_work *work) + goto out; + } + +- if (in_buf_len < sizeof(struct validate_negotiate_info_req)) +- return -EINVAL; ++ if (in_buf_len < offsetof(struct validate_negotiate_info_req, ++ Dialects)) { ++ ret = -EINVAL; ++ goto out; ++ } + +- if (out_buf_len < sizeof(struct validate_negotiate_info_rsp)) +- return -EINVAL; ++ if (out_buf_len < sizeof(struct validate_negotiate_info_rsp)) { ++ ret = -EINVAL; ++ goto out; ++ } + + ret = fsctl_validate_negotiate_info(conn, + (struct validate_negotiate_info_req *)&req->Buffer[0], +@@ -7645,7 +7706,7 @@ int smb2_ioctl(struct ksmbd_work *work) + { + struct file_zero_data_information *zero_data; + struct ksmbd_file *fp; +- loff_t off, len; ++ loff_t off, len, bfz; + + if (!test_tree_conn_flag(work->tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) { + ksmbd_debug(SMB, +@@ -7662,19 +7723,26 @@ int smb2_ioctl(struct ksmbd_work *work) + zero_data = + (struct file_zero_data_information *)&req->Buffer[0]; + +- fp = ksmbd_lookup_fd_fast(work, id); +- if (!fp) { +- ret = -ENOENT; ++ off = le64_to_cpu(zero_data->FileOffset); ++ bfz = le64_to_cpu(zero_data->BeyondFinalZero); ++ if (off > bfz) { ++ ret = -EINVAL; + goto out; + } + +- off = le64_to_cpu(zero_data->FileOffset); +- len = le64_to_cpu(zero_data->BeyondFinalZero) - off; ++ len = bfz - off; ++ if (len) { ++ fp = ksmbd_lookup_fd_fast(work, id); ++ if (!fp) { ++ ret = -ENOENT; ++ goto out; ++ } + +- ret = ksmbd_vfs_zero_data(work, fp, off, len); +- ksmbd_fd_put(work, fp); +- if (ret < 0) +- goto out; ++ ret = ksmbd_vfs_zero_data(work, fp, off, len); ++ ksmbd_fd_put(work, fp); ++ if (ret < 0) ++ goto out; ++ } + break; + } + case FSCTL_QUERY_ALLOCATED_RANGES: +@@ -7748,14 +7816,24 @@ int smb2_ioctl(struct ksmbd_work *work) + src_off = le64_to_cpu(dup_ext->SourceFileOffset); + dst_off = le64_to_cpu(dup_ext->TargetFileOffset); + length = le64_to_cpu(dup_ext->ByteCount); +- cloned = vfs_clone_file_range(fp_in->filp, src_off, fp_out->filp, +- dst_off, length, 0); ++ /* ++ * XXX: It is not clear if FSCTL_DUPLICATE_EXTENTS_TO_FILE ++ * should fall back to vfs_copy_file_range(). This could be ++ * beneficial when re-exporting nfs/smb mount, but note that ++ * this can result in partial copy that returns an error status. ++ * If/when FSCTL_DUPLICATE_EXTENTS_TO_FILE_EX is implemented, ++ * fall back to vfs_copy_file_range(), should be avoided when ++ * the flag DUPLICATE_EXTENTS_DATA_EX_SOURCE_ATOMIC is set. ++ */ ++ cloned = vfs_clone_file_range(fp_in->filp, src_off, ++ fp_out->filp, dst_off, length, 0); + if (cloned == -EXDEV || cloned == -EOPNOTSUPP) { + ret = -EOPNOTSUPP; + goto dup_ext_out; + } else if (cloned != length) { + cloned = vfs_copy_file_range(fp_in->filp, src_off, +- fp_out->filp, dst_off, length, 0); ++ fp_out->filp, dst_off, ++ length, 0); + if (cloned != length) { + if (cloned < 0) + ret = cloned; +@@ -8535,6 +8613,7 @@ int smb3_decrypt_req(struct ksmbd_work *work) + bool smb3_11_final_sess_setup_resp(struct ksmbd_work *work) + { + struct ksmbd_conn *conn = work->conn; ++ struct ksmbd_session *sess = work->sess; + struct smb2_hdr *rsp = work->response_buf; + + if (conn->dialect < SMB30_PROT_ID) +@@ -8544,6 +8623,7 @@ bool smb3_11_final_sess_setup_resp(struct ksmbd_work *work) + rsp = ksmbd_resp_buf_next(work); + + if (le16_to_cpu(rsp->Command) == SMB2_SESSION_SETUP_HE && ++ sess->user && !user_guest(sess->user) && + rsp->Status == STATUS_SUCCESS) + return true; + return false; +diff --git a/fs/ksmbd/smb2pdu.h b/fs/ksmbd/smb2pdu.h +index ff5a2f01d34ae..ddc3cea9c9055 100644 +--- a/fs/ksmbd/smb2pdu.h ++++ b/fs/ksmbd/smb2pdu.h +@@ -113,8 +113,9 @@ + #define SMB21_DEFAULT_IOSIZE (1024 * 1024) + #define SMB3_DEFAULT_IOSIZE (4 * 1024 * 1024) + #define SMB3_DEFAULT_TRANS_SIZE (1024 * 1024) +-#define SMB3_MIN_IOSIZE (64 * 1024) +-#define SMB3_MAX_IOSIZE (8 * 1024 * 1024) ++#define SMB3_MIN_IOSIZE (64 * 1024) ++#define SMB3_MAX_IOSIZE (8 * 1024 * 1024) ++#define SMB3_MAX_MSGSIZE (4 * 4096) + + /* + * SMB2 Header Definition +@@ -1647,6 +1648,7 @@ int init_smb3_11_server(struct ksmbd_conn *conn); + void init_smb2_max_read_size(unsigned int sz); + void init_smb2_max_write_size(unsigned int sz); + void init_smb2_max_trans_size(unsigned int sz); ++void init_smb2_max_credits(unsigned int sz); + + bool is_smb2_neg_cmd(struct ksmbd_work *work); + bool is_smb2_rsp(struct ksmbd_work *work); +diff --git a/fs/ksmbd/smb_common.c b/fs/ksmbd/smb_common.c +index 707490ab1f4c4..22f460984742f 100644 +--- a/fs/ksmbd/smb_common.c ++++ b/fs/ksmbd/smb_common.c +@@ -4,6 +4,8 @@ + * Copyright (C) 2018 Namjae Jeon <linkinjeon@kernel.org> + */ + ++#include <linux/user_namespace.h> ++ + #include "smb_common.h" + #include "server.h" + #include "misc.h" +@@ -140,8 +142,10 @@ int ksmbd_verify_smb_message(struct ksmbd_work *work) + + hdr = work->request_buf; + if (*(__le32 *)hdr->Protocol == SMB1_PROTO_NUMBER && +- hdr->Command == SMB_COM_NEGOTIATE) ++ hdr->Command == SMB_COM_NEGOTIATE) { ++ work->conn->outstanding_credits++; + return 0; ++ } + + return -EINVAL; + } +@@ -308,14 +312,17 @@ int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level, + for (i = 0; i < 2; i++) { + struct kstat kstat; + struct ksmbd_kstat ksmbd_kstat; ++ struct dentry *dentry; + + if (!dir->dot_dotdot[i]) { /* fill dot entry info */ + if (i == 0) { + d_info->name = "."; + d_info->name_len = 1; ++ dentry = dir->filp->f_path.dentry; + } else { + d_info->name = ".."; + d_info->name_len = 2; ++ dentry = dir->filp->f_path.dentry->d_parent; + } + + if (!match_pattern(d_info->name, d_info->name_len, +@@ -327,7 +334,7 @@ int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level, + ksmbd_kstat.kstat = &kstat; + ksmbd_vfs_fill_dentry_attrs(work, + user_ns, +- dir->filp->f_path.dentry->d_parent, ++ dentry, + &ksmbd_kstat); + rc = fn(conn, info_level, d_info, &ksmbd_kstat); + if (rc) +@@ -619,8 +626,8 @@ int ksmbd_override_fsids(struct ksmbd_work *work) + if (!cred) + return -ENOMEM; + +- cred->fsuid = make_kuid(current_user_ns(), uid); +- cred->fsgid = make_kgid(current_user_ns(), gid); ++ cred->fsuid = make_kuid(&init_user_ns, uid); ++ cred->fsgid = make_kgid(&init_user_ns, gid); + + gi = groups_alloc(0); + if (!gi) { +diff --git a/fs/ksmbd/smb_common.h b/fs/ksmbd/smb_common.h +index 6e79e7577f6b7..1eba8dabaf317 100644 +--- a/fs/ksmbd/smb_common.h ++++ b/fs/ksmbd/smb_common.h +@@ -412,6 +412,7 @@ struct smb_version_values { + __u32 max_read_size; + __u32 max_write_size; + __u32 max_trans_size; ++ __u32 max_credits; + __u32 large_lock_type; + __u32 exclusive_lock_type; + __u32 shared_lock_type; +diff --git a/fs/ksmbd/smbacl.c b/fs/ksmbd/smbacl.c +index bd792db326239..3781bca2c8fc4 100644 +--- a/fs/ksmbd/smbacl.c ++++ b/fs/ksmbd/smbacl.c +@@ -9,6 +9,7 @@ + #include <linux/fs.h> + #include <linux/slab.h> + #include <linux/string.h> ++#include <linux/mnt_idmapping.h> + + #include "smbacl.h" + #include "smb_common.h" +@@ -274,14 +275,7 @@ static int sid_to_id(struct user_namespace *user_ns, + uid_t id; + + id = le32_to_cpu(psid->sub_auth[psid->num_subauth - 1]); +- /* +- * Translate raw sid into kuid in the server's user +- * namespace. +- */ +- uid = make_kuid(&init_user_ns, id); +- +- /* If this is an idmapped mount, apply the idmapping. */ +- uid = kuid_from_mnt(user_ns, uid); ++ uid = mapped_kuid_user(user_ns, &init_user_ns, KUIDT_INIT(id)); + if (uid_valid(uid)) { + fattr->cf_uid = uid; + rc = 0; +@@ -291,14 +285,7 @@ static int sid_to_id(struct user_namespace *user_ns, + gid_t id; + + id = le32_to_cpu(psid->sub_auth[psid->num_subauth - 1]); +- /* +- * Translate raw sid into kgid in the server's user +- * namespace. +- */ +- gid = make_kgid(&init_user_ns, id); +- +- /* If this is an idmapped mount, apply the idmapping. */ +- gid = kgid_from_mnt(user_ns, gid); ++ gid = mapped_kgid_user(user_ns, &init_user_ns, KGIDT_INIT(id)); + if (gid_valid(gid)) { + fattr->cf_gid = gid; + rc = 0; +@@ -703,6 +690,7 @@ posix_default_acl: + static void set_ntacl_dacl(struct user_namespace *user_ns, + struct smb_acl *pndacl, + struct smb_acl *nt_dacl, ++ unsigned int aces_size, + const struct smb_sid *pownersid, + const struct smb_sid *pgrpsid, + struct smb_fattr *fattr) +@@ -716,9 +704,19 @@ static void set_ntacl_dacl(struct user_namespace *user_ns, + if (nt_num_aces) { + ntace = (struct smb_ace *)((char *)nt_dacl + sizeof(struct smb_acl)); + for (i = 0; i < nt_num_aces; i++) { +- memcpy((char *)pndace + size, ntace, le16_to_cpu(ntace->size)); +- size += le16_to_cpu(ntace->size); +- ntace = (struct smb_ace *)((char *)ntace + le16_to_cpu(ntace->size)); ++ unsigned short nt_ace_size; ++ ++ if (offsetof(struct smb_ace, access_req) > aces_size) ++ break; ++ ++ nt_ace_size = le16_to_cpu(ntace->size); ++ if (nt_ace_size > aces_size) ++ break; ++ ++ memcpy((char *)pndace + size, ntace, nt_ace_size); ++ size += nt_ace_size; ++ aces_size -= nt_ace_size; ++ ntace = (struct smb_ace *)((char *)ntace + nt_ace_size); + num_aces++; + } + } +@@ -891,7 +889,7 @@ int parse_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd, + /* Convert permission bits from mode to equivalent CIFS ACL */ + int build_sec_desc(struct user_namespace *user_ns, + struct smb_ntsd *pntsd, struct smb_ntsd *ppntsd, +- int addition_info, __u32 *secdesclen, ++ int ppntsd_size, int addition_info, __u32 *secdesclen, + struct smb_fattr *fattr) + { + int rc = 0; +@@ -951,15 +949,25 @@ int build_sec_desc(struct user_namespace *user_ns, + + if (!ppntsd) { + set_mode_dacl(user_ns, dacl_ptr, fattr); +- } else if (!ppntsd->dacloffset) { +- goto out; + } else { + struct smb_acl *ppdacl_ptr; ++ unsigned int dacl_offset = le32_to_cpu(ppntsd->dacloffset); ++ int ppdacl_size, ntacl_size = ppntsd_size - dacl_offset; ++ ++ if (!dacl_offset || ++ (dacl_offset + sizeof(struct smb_acl) > ppntsd_size)) ++ goto out; ++ ++ ppdacl_ptr = (struct smb_acl *)((char *)ppntsd + dacl_offset); ++ ppdacl_size = le16_to_cpu(ppdacl_ptr->size); ++ if (ppdacl_size > ntacl_size || ++ ppdacl_size < sizeof(struct smb_acl)) ++ goto out; + +- ppdacl_ptr = (struct smb_acl *)((char *)ppntsd + +- le32_to_cpu(ppntsd->dacloffset)); + set_ntacl_dacl(user_ns, dacl_ptr, ppdacl_ptr, +- nowner_sid_ptr, ngroup_sid_ptr, fattr); ++ ntacl_size - sizeof(struct smb_acl), ++ nowner_sid_ptr, ngroup_sid_ptr, ++ fattr); + } + pntsd->dacloffset = cpu_to_le32(offset); + offset += le16_to_cpu(dacl_ptr->size); +@@ -993,24 +1001,31 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, + struct smb_sid owner_sid, group_sid; + struct dentry *parent = path->dentry->d_parent; + struct user_namespace *user_ns = mnt_user_ns(path->mnt); +- int inherited_flags = 0, flags = 0, i, ace_cnt = 0, nt_size = 0; +- int rc = 0, num_aces, dacloffset, pntsd_type, acl_len; ++ int inherited_flags = 0, flags = 0, i, ace_cnt = 0, nt_size = 0, pdacl_size; ++ int rc = 0, num_aces, dacloffset, pntsd_type, pntsd_size, acl_len, aces_size; + char *aces_base; + bool is_dir = S_ISDIR(d_inode(path->dentry)->i_mode); + +- acl_len = ksmbd_vfs_get_sd_xattr(conn, user_ns, +- parent, &parent_pntsd); +- if (acl_len <= 0) ++ pntsd_size = ksmbd_vfs_get_sd_xattr(conn, user_ns, ++ parent, &parent_pntsd); ++ if (pntsd_size <= 0) + return -ENOENT; + dacloffset = le32_to_cpu(parent_pntsd->dacloffset); +- if (!dacloffset) { ++ if (!dacloffset || (dacloffset + sizeof(struct smb_acl) > pntsd_size)) { + rc = -EINVAL; + goto free_parent_pntsd; + } + + parent_pdacl = (struct smb_acl *)((char *)parent_pntsd + dacloffset); ++ acl_len = pntsd_size - dacloffset; + num_aces = le32_to_cpu(parent_pdacl->num_aces); + pntsd_type = le16_to_cpu(parent_pntsd->type); ++ pdacl_size = le16_to_cpu(parent_pdacl->size); ++ ++ if (pdacl_size > acl_len || pdacl_size < sizeof(struct smb_acl)) { ++ rc = -EINVAL; ++ goto free_parent_pntsd; ++ } + + aces_base = kmalloc(sizeof(struct smb_ace) * num_aces * 2, GFP_KERNEL); + if (!aces_base) { +@@ -1021,11 +1036,23 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, + aces = (struct smb_ace *)aces_base; + parent_aces = (struct smb_ace *)((char *)parent_pdacl + + sizeof(struct smb_acl)); ++ aces_size = acl_len - sizeof(struct smb_acl); + + if (pntsd_type & DACL_AUTO_INHERITED) + inherited_flags = INHERITED_ACE; + + for (i = 0; i < num_aces; i++) { ++ int pace_size; ++ ++ if (offsetof(struct smb_ace, access_req) > aces_size) ++ break; ++ ++ pace_size = le16_to_cpu(parent_aces->size); ++ if (pace_size > aces_size) ++ break; ++ ++ aces_size -= pace_size; ++ + flags = parent_aces->flags; + if (!smb_inherit_flags(flags, is_dir)) + goto pass; +@@ -1070,8 +1097,7 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, + aces = (struct smb_ace *)((char *)aces + le16_to_cpu(aces->size)); + ace_cnt++; + pass: +- parent_aces = +- (struct smb_ace *)((char *)parent_aces + le16_to_cpu(parent_aces->size)); ++ parent_aces = (struct smb_ace *)((char *)parent_aces + pace_size); + } + + if (nt_size > 0) { +@@ -1166,7 +1192,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path, + struct smb_ntsd *pntsd = NULL; + struct smb_acl *pdacl; + struct posix_acl *posix_acls; +- int rc = 0, acl_size; ++ int rc = 0, pntsd_size, acl_size, aces_size, pdacl_size, dacl_offset; + struct smb_sid sid; + int granted = le32_to_cpu(*pdaccess & ~FILE_MAXIMAL_ACCESS_LE); + struct smb_ace *ace; +@@ -1175,37 +1201,33 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path, + struct smb_ace *others_ace = NULL; + struct posix_acl_entry *pa_entry; + unsigned int sid_type = SIDOWNER; +- char *end_of_acl; ++ unsigned short ace_size; + + ksmbd_debug(SMB, "check permission using windows acl\n"); +- acl_size = ksmbd_vfs_get_sd_xattr(conn, user_ns, +- path->dentry, &pntsd); +- if (acl_size <= 0 || !pntsd || !pntsd->dacloffset) { +- kfree(pntsd); +- return 0; +- } ++ pntsd_size = ksmbd_vfs_get_sd_xattr(conn, user_ns, ++ path->dentry, &pntsd); ++ if (pntsd_size <= 0 || !pntsd) ++ goto err_out; ++ ++ dacl_offset = le32_to_cpu(pntsd->dacloffset); ++ if (!dacl_offset || ++ (dacl_offset + sizeof(struct smb_acl) > pntsd_size)) ++ goto err_out; + + pdacl = (struct smb_acl *)((char *)pntsd + le32_to_cpu(pntsd->dacloffset)); +- end_of_acl = ((char *)pntsd) + acl_size; +- if (end_of_acl <= (char *)pdacl) { +- kfree(pntsd); +- return 0; +- } ++ acl_size = pntsd_size - dacl_offset; ++ pdacl_size = le16_to_cpu(pdacl->size); + +- if (end_of_acl < (char *)pdacl + le16_to_cpu(pdacl->size) || +- le16_to_cpu(pdacl->size) < sizeof(struct smb_acl)) { +- kfree(pntsd); +- return 0; +- } ++ if (pdacl_size > acl_size || pdacl_size < sizeof(struct smb_acl)) ++ goto err_out; + + if (!pdacl->num_aces) { +- if (!(le16_to_cpu(pdacl->size) - sizeof(struct smb_acl)) && ++ if (!(pdacl_size - sizeof(struct smb_acl)) && + *pdaccess & ~(FILE_READ_CONTROL_LE | FILE_WRITE_DAC_LE)) { + rc = -EACCES; + goto err_out; + } +- kfree(pntsd); +- return 0; ++ goto err_out; + } + + if (*pdaccess & FILE_MAXIMAL_ACCESS_LE) { +@@ -1213,11 +1235,16 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path, + DELETE; + + ace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl)); ++ aces_size = acl_size - sizeof(struct smb_acl); + for (i = 0; i < le32_to_cpu(pdacl->num_aces); i++) { ++ if (offsetof(struct smb_ace, access_req) > aces_size) ++ break; ++ ace_size = le16_to_cpu(ace->size); ++ if (ace_size > aces_size) ++ break; ++ aces_size -= ace_size; + granted |= le32_to_cpu(ace->access_req); + ace = (struct smb_ace *)((char *)ace + le16_to_cpu(ace->size)); +- if (end_of_acl < (char *)ace) +- goto err_out; + } + + if (!pdacl->num_aces) +@@ -1229,7 +1256,15 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path, + id_to_sid(uid, sid_type, &sid); + + ace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl)); ++ aces_size = acl_size - sizeof(struct smb_acl); + for (i = 0; i < le32_to_cpu(pdacl->num_aces); i++) { ++ if (offsetof(struct smb_ace, access_req) > aces_size) ++ break; ++ ace_size = le16_to_cpu(ace->size); ++ if (ace_size > aces_size) ++ break; ++ aces_size -= ace_size; ++ + if (!compare_sids(&sid, &ace->sid) || + !compare_sids(&sid_unix_NFS_mode, &ace->sid)) { + found = 1; +@@ -1239,8 +1274,6 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path, + others_ace = ace; + + ace = (struct smb_ace *)((char *)ace + le16_to_cpu(ace->size)); +- if (end_of_acl < (char *)ace) +- goto err_out; + } + + if (*pdaccess & FILE_MAXIMAL_ACCESS_LE && found) { +@@ -1274,6 +1307,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path, + if (!access_bits) + access_bits = + SET_MINIMUM_RIGHTS; ++ posix_acl_release(posix_acls); + goto check_access_bits; + } + } +diff --git a/fs/ksmbd/smbacl.h b/fs/ksmbd/smbacl.h +index 73e08cad412bd..fcb2c83f29928 100644 +--- a/fs/ksmbd/smbacl.h ++++ b/fs/ksmbd/smbacl.h +@@ -11,6 +11,7 @@ + #include <linux/fs.h> + #include <linux/namei.h> + #include <linux/posix_acl.h> ++#include <linux/mnt_idmapping.h> + + #include "mgmt/tree_connect.h" + +@@ -192,7 +193,7 @@ struct posix_acl_state { + int parse_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd, + int acl_len, struct smb_fattr *fattr); + int build_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd, +- struct smb_ntsd *ppntsd, int addition_info, ++ struct smb_ntsd *ppntsd, int ppntsd_size, int addition_info, + __u32 *secdesclen, struct smb_fattr *fattr); + int init_acl_state(struct posix_acl_state *state, int cnt); + void free_acl_state(struct posix_acl_state *state); +@@ -216,7 +217,7 @@ static inline uid_t posix_acl_uid_translate(struct user_namespace *mnt_userns, + kuid_t kuid; + + /* If this is an idmapped mount, apply the idmapping. */ +- kuid = kuid_into_mnt(mnt_userns, pace->e_uid); ++ kuid = mapped_kuid_fs(mnt_userns, &init_user_ns, pace->e_uid); + + /* Translate the kuid into a userspace id ksmbd would see. */ + return from_kuid(&init_user_ns, kuid); +@@ -228,7 +229,7 @@ static inline gid_t posix_acl_gid_translate(struct user_namespace *mnt_userns, + kgid_t kgid; + + /* If this is an idmapped mount, apply the idmapping. */ +- kgid = kgid_into_mnt(mnt_userns, pace->e_gid); ++ kgid = mapped_kgid_fs(mnt_userns, &init_user_ns, pace->e_gid); + + /* Translate the kgid into a userspace id ksmbd would see. */ + return from_kgid(&init_user_ns, kgid); +diff --git a/fs/ksmbd/transport_ipc.c b/fs/ksmbd/transport_ipc.c +index 1acf1892a466c..a8313eed4f10d 100644 +--- a/fs/ksmbd/transport_ipc.c ++++ b/fs/ksmbd/transport_ipc.c +@@ -26,6 +26,7 @@ + #include "mgmt/ksmbd_ida.h" + #include "connection.h" + #include "transport_tcp.h" ++#include "transport_rdma.h" + + #define IPC_WAIT_TIMEOUT (2 * HZ) + +@@ -301,6 +302,13 @@ static int ipc_server_config_on_startup(struct ksmbd_startup_request *req) + init_smb2_max_write_size(req->smb2_max_write); + if (req->smb2_max_trans) + init_smb2_max_trans_size(req->smb2_max_trans); ++ if (req->smb2_max_credits) ++ init_smb2_max_credits(req->smb2_max_credits); ++ if (req->smbd_max_io_size) ++ init_smbd_max_io_size(req->smbd_max_io_size); ++ ++ if (req->max_connections) ++ server_conf.max_connections = req->max_connections; + + ret = ksmbd_set_netbios_name(req->netbios_name); + ret |= ksmbd_set_server_string(req->server_string); +diff --git a/fs/ksmbd/transport_rdma.c b/fs/ksmbd/transport_rdma.c +index a2fd5a4d4cd5e..9d67419929d6c 100644 +--- a/fs/ksmbd/transport_rdma.c ++++ b/fs/ksmbd/transport_rdma.c +@@ -75,7 +75,7 @@ static int smb_direct_max_fragmented_recv_size = 1024 * 1024; + /* The maximum single-message size which can be received */ + static int smb_direct_max_receive_size = 8192; + +-static int smb_direct_max_read_write_size = 1024 * 1024; ++static int smb_direct_max_read_write_size = SMBD_DEFAULT_IOSIZE; + + static int smb_direct_max_outstanding_rw_ops = 8; + +@@ -201,6 +201,12 @@ struct smb_direct_rdma_rw_msg { + struct scatterlist sg_list[0]; + }; + ++void init_smbd_max_io_size(unsigned int sz) ++{ ++ sz = clamp_val(sz, SMBD_MIN_IOSIZE, SMBD_MAX_IOSIZE); ++ smb_direct_max_read_write_size = sz; ++} ++ + static inline int get_buf_page_count(void *buf, int size) + { + return DIV_ROUND_UP((uintptr_t)buf + size, PAGE_SIZE) - +diff --git a/fs/ksmbd/transport_rdma.h b/fs/ksmbd/transport_rdma.h +index 0fa8adc0776f2..04a7a37685c34 100644 +--- a/fs/ksmbd/transport_rdma.h ++++ b/fs/ksmbd/transport_rdma.h +@@ -9,6 +9,10 @@ + + #define SMB_DIRECT_PORT 5445 + ++#define SMBD_DEFAULT_IOSIZE (8 * 1024 * 1024) ++#define SMBD_MIN_IOSIZE (512 * 1024) ++#define SMBD_MAX_IOSIZE (16 * 1024 * 1024) ++ + /* SMB DIRECT negotiation request packet [MS-SMBD] 2.2.1 */ + struct smb_direct_negotiate_req { + __le16 min_version; +@@ -54,10 +58,12 @@ struct smb_direct_data_transfer { + int ksmbd_rdma_init(void); + int ksmbd_rdma_destroy(void); + bool ksmbd_rdma_capable_netdev(struct net_device *netdev); ++void init_smbd_max_io_size(unsigned int sz); + #else + static inline int ksmbd_rdma_init(void) { return 0; } + static inline int ksmbd_rdma_destroy(void) { return 0; } + static inline bool ksmbd_rdma_capable_netdev(struct net_device *netdev) { return false; } ++static inline void init_smbd_max_io_size(unsigned int sz) { } + #endif + + #endif /* __KSMBD_TRANSPORT_RDMA_H__ */ +diff --git a/fs/ksmbd/transport_tcp.c b/fs/ksmbd/transport_tcp.c +index c14320e03b698..e0ca6cc04b91c 100644 +--- a/fs/ksmbd/transport_tcp.c ++++ b/fs/ksmbd/transport_tcp.c +@@ -15,6 +15,8 @@ + #define IFACE_STATE_DOWN BIT(0) + #define IFACE_STATE_CONFIGURED BIT(1) + ++static atomic_t active_num_conn; ++ + struct interface { + struct task_struct *ksmbd_kthread; + struct socket *ksmbd_socket; +@@ -185,8 +187,10 @@ static int ksmbd_tcp_new_connection(struct socket *client_sk) + struct tcp_transport *t; + + t = alloc_transport(client_sk); +- if (!t) ++ if (!t) { ++ sock_release(client_sk); + return -ENOMEM; ++ } + + csin = KSMBD_TCP_PEER_SOCKADDR(KSMBD_TRANS(t)->conn); + if (kernel_getpeername(client_sk, csin) < 0) { +@@ -230,7 +234,7 @@ static int ksmbd_kthread_fn(void *p) + break; + } + ret = kernel_accept(iface->ksmbd_socket, &client_sk, +- O_NONBLOCK); ++ SOCK_NONBLOCK); + mutex_unlock(&iface->sock_release_lock); + if (ret) { + if (ret == -EAGAIN) +@@ -239,6 +243,15 @@ static int ksmbd_kthread_fn(void *p) + continue; + } + ++ if (server_conf.max_connections && ++ atomic_inc_return(&active_num_conn) >= server_conf.max_connections) { ++ pr_info_ratelimited("Limit the maximum number of connections(%u)\n", ++ atomic_read(&active_num_conn)); ++ atomic_dec(&active_num_conn); ++ sock_release(client_sk); ++ continue; ++ } ++ + ksmbd_debug(CONN, "connect success: accepted new connection\n"); + client_sk->sk->sk_rcvtimeo = KSMBD_TCP_RECV_TIMEOUT; + client_sk->sk->sk_sndtimeo = KSMBD_TCP_SEND_TIMEOUT; +@@ -295,6 +308,7 @@ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig, + struct msghdr ksmbd_msg; + struct kvec *iov; + struct ksmbd_conn *conn = KSMBD_TRANS(t)->conn; ++ int max_retry = 2; + + iov = get_conn_iovec(t, nr_segs); + if (!iov) +@@ -321,9 +335,11 @@ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig, + } else if (conn->status == KSMBD_SESS_NEED_RECONNECT) { + total_read = -EAGAIN; + break; +- } else if (length == -ERESTARTSYS || length == -EAGAIN) { ++ } else if ((length == -ERESTARTSYS || length == -EAGAIN) && ++ max_retry) { + usleep_range(1000, 2000); + length = 0; ++ max_retry--; + continue; + } else if (length <= 0) { + total_read = -EAGAIN; +@@ -365,6 +381,8 @@ static int ksmbd_tcp_writev(struct ksmbd_transport *t, struct kvec *iov, + static void ksmbd_tcp_disconnect(struct ksmbd_transport *t) + { + free_transport(TCP_TRANS(t)); ++ if (server_conf.max_connections) ++ atomic_dec(&active_num_conn); + } + + static void tcp_destroy_socket(struct socket *ksmbd_socket) +@@ -404,7 +422,7 @@ static int create_socket(struct interface *iface) + &ksmbd_socket); + if (ret) { + pr_err("Can't create socket for ipv4: %d\n", ret); +- goto out_error; ++ goto out_clear; + } + + sin.sin_family = PF_INET; +@@ -462,6 +480,7 @@ static int create_socket(struct interface *iface) + + out_error: + tcp_destroy_socket(ksmbd_socket); ++out_clear: + iface->ksmbd_socket = NULL; + return ret; + } +diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c +index 835b384b08959..5d40a00fbce50 100644 +--- a/fs/ksmbd/vfs.c ++++ b/fs/ksmbd/vfs.c +@@ -1018,7 +1018,9 @@ int ksmbd_vfs_zero_data(struct ksmbd_work *work, struct ksmbd_file *fp, + FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + off, len); + +- return vfs_fallocate(fp->filp, FALLOC_FL_ZERO_RANGE, off, len); ++ return vfs_fallocate(fp->filp, ++ FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE, ++ off, len); + } + + int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, +@@ -1049,7 +1051,7 @@ int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, + *out_count = 0; + end = start + length; + while (start < end && *out_count < in_count) { +- extent_start = f->f_op->llseek(f, start, SEEK_DATA); ++ extent_start = vfs_llseek(f, start, SEEK_DATA); + if (extent_start < 0) { + if (extent_start != -ENXIO) + ret = (int)extent_start; +@@ -1059,7 +1061,7 @@ int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, + if (extent_start >= end) + break; + +- extent_end = f->f_op->llseek(f, extent_start, SEEK_HOLE); ++ extent_end = vfs_llseek(f, extent_start, SEEK_HOLE); + if (extent_end < 0) { + if (extent_end != -ENXIO) + ret = (int)extent_end; +@@ -1541,6 +1543,11 @@ int ksmbd_vfs_get_sd_xattr(struct ksmbd_conn *conn, + } + + *pntsd = acl.sd_buf; ++ if (acl.sd_size < sizeof(struct smb_ntsd)) { ++ pr_err("sd size is invalid\n"); ++ goto out_free; ++ } ++ + (*pntsd)->osidoffset = cpu_to_le32(le32_to_cpu((*pntsd)->osidoffset) - + NDR_NTSD_OFFSETOF); + (*pntsd)->gsidoffset = cpu_to_le32(le32_to_cpu((*pntsd)->gsidoffset) - +@@ -1780,6 +1787,10 @@ int ksmbd_vfs_copy_file_ranges(struct ksmbd_work *work, + + ret = vfs_copy_file_range(src_fp->filp, src_off, + dst_fp->filp, dst_off, len, 0); ++ if (ret == -EOPNOTSUPP || ret == -EXDEV) ++ ret = vfs_copy_file_range(src_fp->filp, src_off, ++ dst_fp->filp, dst_off, len, ++ COPY_FILE_SPLICE); + if (ret < 0) + return ret; + +diff --git a/fs/ksmbd/vfs.h b/fs/ksmbd/vfs.h +index b0d5b8feb4a36..432c947731779 100644 +--- a/fs/ksmbd/vfs.h ++++ b/fs/ksmbd/vfs.h +@@ -86,6 +86,7 @@ struct ksmbd_dir_info { + int last_entry_offset; + bool hide_dot_file; + int flags; ++ int last_entry_off_align; + }; + + struct ksmbd_readdir_data { +diff --git a/fs/ksmbd/vfs_cache.c b/fs/ksmbd/vfs_cache.c +index 29c1db66bd0f7..8b873d92d7854 100644 +--- a/fs/ksmbd/vfs_cache.c ++++ b/fs/ksmbd/vfs_cache.c +@@ -497,6 +497,7 @@ struct ksmbd_file *ksmbd_lookup_fd_inode(struct inode *inode) + list_for_each_entry(lfp, &ci->m_fp_list, node) { + if (inode == file_inode(lfp->filp)) { + atomic_dec(&ci->m_count); ++ lfp = ksmbd_fp_get(lfp); + read_unlock(&ci->m_lock); + return lfp; + } +diff --git a/fs/libfs.c b/fs/libfs.c +index 51b4de3b3447f..7bb5d90319cc6 100644 +--- a/fs/libfs.c ++++ b/fs/libfs.c +@@ -967,8 +967,8 @@ out: + EXPORT_SYMBOL_GPL(simple_attr_read); + + /* interpret the buffer as a number to call the set function with */ +-ssize_t simple_attr_write(struct file *file, const char __user *buf, +- size_t len, loff_t *ppos) ++static ssize_t simple_attr_write_xsigned(struct file *file, const char __user *buf, ++ size_t len, loff_t *ppos, bool is_signed) + { + struct simple_attr *attr; + unsigned long long val; +@@ -989,7 +989,10 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, + goto out; + + attr->set_buf[size] = '\0'; +- ret = kstrtoull(attr->set_buf, 0, &val); ++ if (is_signed) ++ ret = kstrtoll(attr->set_buf, 0, &val); ++ else ++ ret = kstrtoull(attr->set_buf, 0, &val); + if (ret) + goto out; + ret = attr->set(attr->data, val); +@@ -999,8 +1002,21 @@ out: + mutex_unlock(&attr->mutex); + return ret; + } ++ ++ssize_t simple_attr_write(struct file *file, const char __user *buf, ++ size_t len, loff_t *ppos) ++{ ++ return simple_attr_write_xsigned(file, buf, len, ppos, false); ++} + EXPORT_SYMBOL_GPL(simple_attr_write); + ++ssize_t simple_attr_write_signed(struct file *file, const char __user *buf, ++ size_t len, loff_t *ppos) ++{ ++ return simple_attr_write_xsigned(file, buf, len, ppos, true); ++} ++EXPORT_SYMBOL_GPL(simple_attr_write_signed); ++ + /** + * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation + * @sb: filesystem to do the file handle conversion on +diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c +index e10ae2c41279e..1c9214801e69e 100644 +--- a/fs/lockd/svc4proc.c ++++ b/fs/lockd/svc4proc.c +@@ -32,6 +32,10 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, + if (!nlmsvc_ops) + return nlm_lck_denied_nolocks; + ++ if (lock->lock_start > OFFSET_MAX || ++ (lock->lock_len && ((lock->lock_len - 1) > (OFFSET_MAX - lock->lock_start)))) ++ return nlm4_fbig; ++ + /* Obtain host handle */ + if (!(host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len)) + || (argp->monitor && nsm_monitor(host) < 0)) +@@ -50,6 +54,10 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, + /* Set up the missing parts of the file_lock structure */ + lock->fl.fl_file = file->f_file[mode]; + lock->fl.fl_pid = current->tgid; ++ lock->fl.fl_start = (loff_t)lock->lock_start; ++ lock->fl.fl_end = lock->lock_len ? ++ (loff_t)(lock->lock_start + lock->lock_len - 1) : ++ OFFSET_MAX; + lock->fl.fl_lmops = &nlmsvc_lock_operations; + nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid); + if (!lock->fl.fl_owner) { +diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c +index cb3a7512c33ec..3515f17eaf3fb 100644 +--- a/fs/lockd/svcsubs.c ++++ b/fs/lockd/svcsubs.c +@@ -176,22 +176,28 @@ nlm_delete_file(struct nlm_file *file) + } + } + +-static int nlm_unlock_files(struct nlm_file *file) ++static int nlm_unlock_files(struct nlm_file *file, const struct file_lock *fl) + { + struct file_lock lock; +- struct file *f; + ++ locks_init_lock(&lock); + lock.fl_type = F_UNLCK; + lock.fl_start = 0; + lock.fl_end = OFFSET_MAX; +- for (f = file->f_file[0]; f <= file->f_file[1]; f++) { +- if (f && vfs_lock_file(f, F_SETLK, &lock, NULL) < 0) { +- pr_warn("lockd: unlock failure in %s:%d\n", +- __FILE__, __LINE__); +- return 1; +- } +- } ++ lock.fl_owner = fl->fl_owner; ++ lock.fl_pid = fl->fl_pid; ++ lock.fl_flags = FL_POSIX; ++ ++ lock.fl_file = file->f_file[O_RDONLY]; ++ if (lock.fl_file && vfs_lock_file(lock.fl_file, F_SETLK, &lock, NULL)) ++ goto out_err; ++ lock.fl_file = file->f_file[O_WRONLY]; ++ if (lock.fl_file && vfs_lock_file(lock.fl_file, F_SETLK, &lock, NULL)) ++ goto out_err; + return 0; ++out_err: ++ pr_warn("lockd: unlock failure in %s:%d\n", __FILE__, __LINE__); ++ return 1; + } + + /* +@@ -223,7 +229,7 @@ again: + if (match(lockhost, host)) { + + spin_unlock(&flctx->flc_lock); +- if (nlm_unlock_files(file)) ++ if (nlm_unlock_files(file, fl)) + return 1; + goto again; + } +@@ -280,11 +286,10 @@ nlm_file_inuse(struct nlm_file *file) + + static void nlm_close_files(struct nlm_file *file) + { +- struct file *f; +- +- for (f = file->f_file[0]; f <= file->f_file[1]; f++) +- if (f) +- nlmsvc_ops->fclose(f); ++ if (file->f_file[O_RDONLY]) ++ nlmsvc_ops->fclose(file->f_file[O_RDONLY]); ++ if (file->f_file[O_WRONLY]) ++ nlmsvc_ops->fclose(file->f_file[O_WRONLY]); + } + + /* +diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c +index 98e957e4566c2..72f7d190fb3b2 100644 +--- a/fs/lockd/xdr4.c ++++ b/fs/lockd/xdr4.c +@@ -20,13 +20,6 @@ + + #include "svcxdr.h" + +-static inline loff_t +-s64_to_loff_t(__s64 offset) +-{ +- return (loff_t)offset; +-} +- +- + static inline s64 + loff_t_to_s64(loff_t offset) + { +@@ -70,8 +63,6 @@ static bool + svcxdr_decode_lock(struct xdr_stream *xdr, struct nlm_lock *lock) + { + struct file_lock *fl = &lock->fl; +- u64 len, start; +- s64 end; + + if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len)) + return false; +@@ -81,20 +72,14 @@ svcxdr_decode_lock(struct xdr_stream *xdr, struct nlm_lock *lock) + return false; + if (xdr_stream_decode_u32(xdr, &lock->svid) < 0) + return false; +- if (xdr_stream_decode_u64(xdr, &start) < 0) ++ if (xdr_stream_decode_u64(xdr, &lock->lock_start) < 0) + return false; +- if (xdr_stream_decode_u64(xdr, &len) < 0) ++ if (xdr_stream_decode_u64(xdr, &lock->lock_len) < 0) + return false; + + locks_init_lock(fl); + fl->fl_flags = FL_POSIX; + fl->fl_type = F_RDLCK; +- end = start + len - 1; +- fl->fl_start = s64_to_loff_t(start); +- if (len == 0 || end < 0) +- fl->fl_end = OFFSET_MAX; +- else +- fl->fl_end = s64_to_loff_t(end); + + return true; + } +diff --git a/fs/locks.c b/fs/locks.c +index 3d6fb4ae847b4..82a4487e95b37 100644 +--- a/fs/locks.c ++++ b/fs/locks.c +@@ -2703,6 +2703,29 @@ int vfs_cancel_lock(struct file *filp, struct file_lock *fl) + } + EXPORT_SYMBOL_GPL(vfs_cancel_lock); + ++/** ++ * vfs_inode_has_locks - are any file locks held on @inode? ++ * @inode: inode to check for locks ++ * ++ * Return true if there are any FL_POSIX or FL_FLOCK locks currently ++ * set on @inode. ++ */ ++bool vfs_inode_has_locks(struct inode *inode) ++{ ++ struct file_lock_context *ctx; ++ bool ret; ++ ++ ctx = smp_load_acquire(&inode->i_flctx); ++ if (!ctx) ++ return false; ++ ++ spin_lock(&ctx->flc_lock); ++ ret = !list_empty(&ctx->flc_posix) || !list_empty(&ctx->flc_flock); ++ spin_unlock(&ctx->flc_lock); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(vfs_inode_has_locks); ++ + #ifdef CONFIG_PROC_FS + #include <linux/proc_fs.h> + #include <linux/seq_file.h> +diff --git a/fs/mbcache.c b/fs/mbcache.c +index 97c54d3a22276..95b047256d093 100644 +--- a/fs/mbcache.c ++++ b/fs/mbcache.c +@@ -11,7 +11,7 @@ + /* + * Mbcache is a simple key-value store. Keys need not be unique, however + * key-value pairs are expected to be unique (we use this fact in +- * mb_cache_entry_delete()). ++ * mb_cache_entry_delete_or_get()). + * + * Ext2 and ext4 use this cache for deduplication of extended attribute blocks. + * Ext4 also uses it for deduplication of xattr values stored in inodes. +@@ -90,12 +90,19 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, + return -ENOMEM; + + INIT_LIST_HEAD(&entry->e_list); +- /* One ref for hash, one ref returned */ +- atomic_set(&entry->e_refcnt, 1); ++ /* ++ * We create entry with two references. One reference is kept by the ++ * hash table, the other reference is used to protect us from ++ * mb_cache_entry_delete_or_get() until the entry is fully setup. This ++ * avoids nesting of cache->c_list_lock into hash table bit locks which ++ * is problematic for RT. ++ */ ++ atomic_set(&entry->e_refcnt, 2); + entry->e_key = key; + entry->e_value = value; +- entry->e_reusable = reusable; +- entry->e_referenced = 0; ++ entry->e_flags = 0; ++ if (reusable) ++ set_bit(MBE_REUSABLE_B, &entry->e_flags); + head = mb_cache_entry_head(cache, key); + hlist_bl_lock(head); + hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) { +@@ -107,24 +114,41 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, + } + hlist_bl_add_head(&entry->e_hash_list, head); + hlist_bl_unlock(head); +- + spin_lock(&cache->c_list_lock); + list_add_tail(&entry->e_list, &cache->c_list); +- /* Grab ref for LRU list */ +- atomic_inc(&entry->e_refcnt); + cache->c_entry_count++; + spin_unlock(&cache->c_list_lock); ++ mb_cache_entry_put(cache, entry); + + return 0; + } + EXPORT_SYMBOL(mb_cache_entry_create); + +-void __mb_cache_entry_free(struct mb_cache_entry *entry) ++void __mb_cache_entry_free(struct mb_cache *cache, struct mb_cache_entry *entry) + { ++ struct hlist_bl_head *head; ++ ++ head = mb_cache_entry_head(cache, entry->e_key); ++ hlist_bl_lock(head); ++ hlist_bl_del(&entry->e_hash_list); ++ hlist_bl_unlock(head); + kmem_cache_free(mb_entry_cache, entry); + } + EXPORT_SYMBOL(__mb_cache_entry_free); + ++/* ++ * mb_cache_entry_wait_unused - wait to be the last user of the entry ++ * ++ * @entry - entry to work on ++ * ++ * Wait to be the last user of the entry. ++ */ ++void mb_cache_entry_wait_unused(struct mb_cache_entry *entry) ++{ ++ wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 2); ++} ++EXPORT_SYMBOL(mb_cache_entry_wait_unused); ++ + static struct mb_cache_entry *__entry_find(struct mb_cache *cache, + struct mb_cache_entry *entry, + u32 key) +@@ -142,10 +166,10 @@ static struct mb_cache_entry *__entry_find(struct mb_cache *cache, + while (node) { + entry = hlist_bl_entry(node, struct mb_cache_entry, + e_hash_list); +- if (entry->e_key == key && entry->e_reusable) { +- atomic_inc(&entry->e_refcnt); ++ if (entry->e_key == key && ++ test_bit(MBE_REUSABLE_B, &entry->e_flags) && ++ atomic_inc_not_zero(&entry->e_refcnt)) + goto out; +- } + node = node->next; + } + entry = NULL; +@@ -205,10 +229,9 @@ struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key, + head = mb_cache_entry_head(cache, key); + hlist_bl_lock(head); + hlist_bl_for_each_entry(entry, node, head, e_hash_list) { +- if (entry->e_key == key && entry->e_value == value) { +- atomic_inc(&entry->e_refcnt); ++ if (entry->e_key == key && entry->e_value == value && ++ atomic_inc_not_zero(&entry->e_refcnt)) + goto out; +- } + } + entry = NULL; + out: +@@ -217,7 +240,7 @@ out: + } + EXPORT_SYMBOL(mb_cache_entry_get); + +-/* mb_cache_entry_delete - remove a cache entry ++/* mb_cache_entry_delete - try to remove a cache entry + * @cache - cache we work with + * @key - key + * @value - value +@@ -254,6 +277,43 @@ void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value) + } + EXPORT_SYMBOL(mb_cache_entry_delete); + ++/* mb_cache_entry_delete_or_get - remove a cache entry if it has no users ++ * @cache - cache we work with ++ * @key - key ++ * @value - value ++ * ++ * Remove entry from cache @cache with key @key and value @value. The removal ++ * happens only if the entry is unused. The function returns NULL in case the ++ * entry was successfully removed or there's no entry in cache. Otherwise the ++ * function grabs reference of the entry that we failed to delete because it ++ * still has users and return it. ++ */ ++struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache, ++ u32 key, u64 value) ++{ ++ struct mb_cache_entry *entry; ++ ++ entry = mb_cache_entry_get(cache, key, value); ++ if (!entry) ++ return NULL; ++ ++ /* ++ * Drop the ref we got from mb_cache_entry_get() and the initial hash ++ * ref if we are the last user ++ */ ++ if (atomic_cmpxchg(&entry->e_refcnt, 2, 0) != 2) ++ return entry; ++ ++ spin_lock(&cache->c_list_lock); ++ if (!list_empty(&entry->e_list)) ++ list_del_init(&entry->e_list); ++ cache->c_entry_count--; ++ spin_unlock(&cache->c_list_lock); ++ __mb_cache_entry_free(cache, entry); ++ return NULL; ++} ++EXPORT_SYMBOL(mb_cache_entry_delete_or_get); ++ + /* mb_cache_entry_touch - cache entry got used + * @cache - cache the entry belongs to + * @entry - entry that got used +@@ -263,7 +323,7 @@ EXPORT_SYMBOL(mb_cache_entry_delete); + void mb_cache_entry_touch(struct mb_cache *cache, + struct mb_cache_entry *entry) + { +- entry->e_referenced = 1; ++ set_bit(MBE_REFERENCED_B, &entry->e_flags); + } + EXPORT_SYMBOL(mb_cache_entry_touch); + +@@ -281,34 +341,24 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache, + unsigned long nr_to_scan) + { + struct mb_cache_entry *entry; +- struct hlist_bl_head *head; + unsigned long shrunk = 0; + + spin_lock(&cache->c_list_lock); + while (nr_to_scan-- && !list_empty(&cache->c_list)) { + entry = list_first_entry(&cache->c_list, + struct mb_cache_entry, e_list); +- if (entry->e_referenced) { +- entry->e_referenced = 0; ++ /* Drop initial hash reference if there is no user */ ++ if (test_bit(MBE_REFERENCED_B, &entry->e_flags) || ++ atomic_cmpxchg(&entry->e_refcnt, 1, 0) != 1) { ++ clear_bit(MBE_REFERENCED_B, &entry->e_flags); + list_move_tail(&entry->e_list, &cache->c_list); + continue; + } + list_del_init(&entry->e_list); + cache->c_entry_count--; +- /* +- * We keep LRU list reference so that entry doesn't go away +- * from under us. +- */ + spin_unlock(&cache->c_list_lock); +- head = mb_cache_entry_head(cache, entry->e_key); +- hlist_bl_lock(head); +- if (!hlist_bl_unhashed(&entry->e_hash_list)) { +- hlist_bl_del_init(&entry->e_hash_list); +- atomic_dec(&entry->e_refcnt); +- } +- hlist_bl_unlock(head); +- if (mb_cache_entry_put(cache, entry)) +- shrunk++; ++ __mb_cache_entry_free(cache, entry); ++ shrunk++; + cond_resched(); + spin_lock(&cache->c_list_lock); + } +@@ -400,11 +450,6 @@ void mb_cache_destroy(struct mb_cache *cache) + * point. + */ + list_for_each_entry_safe(entry, next, &cache->c_list, e_list) { +- if (!hlist_bl_unhashed(&entry->e_hash_list)) { +- hlist_bl_del_init(&entry->e_hash_list); +- atomic_dec(&entry->e_refcnt); +- } else +- WARN_ON(1); + list_del(&entry->e_list); + WARN_ON(atomic_read(&entry->e_refcnt) != 1); + mb_cache_entry_put(cache, entry); +diff --git a/fs/minix/inode.c b/fs/minix/inode.c +index a71f1cf894b9f..d4bd94234ef73 100644 +--- a/fs/minix/inode.c ++++ b/fs/minix/inode.c +@@ -447,7 +447,8 @@ static const struct address_space_operations minix_aops = { + .writepage = minix_writepage, + .write_begin = minix_write_begin, + .write_end = generic_write_end, +- .bmap = minix_bmap ++ .bmap = minix_bmap, ++ .direct_IO = noop_direct_IO + }; + + static const struct inode_operations minix_symlink_inode_operations = { +diff --git a/fs/namei.c b/fs/namei.c +index 1946d96677908..81b31d9a063f2 100644 +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -1461,6 +1461,8 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, + * becoming unpinned. + */ + flags = dentry->d_flags; ++ if (read_seqretry(&mount_lock, nd->m_seq)) ++ return false; + continue; + } + if (read_seqretry(&mount_lock, nd->m_seq)) +@@ -2718,7 +2720,8 @@ struct dentry *lookup_one(struct user_namespace *mnt_userns, const char *name, + EXPORT_SYMBOL(lookup_one); + + /** +- * lookup_one_len_unlocked - filesystem helper to lookup single pathname component ++ * lookup_one_unlocked - filesystem helper to lookup single pathname component ++ * @mnt_userns: idmapping of the mount the lookup is performed from + * @name: pathname component to lookup + * @base: base directory to lookup from + * @len: maximum length @len should be interpreted to +@@ -2729,14 +2732,15 @@ EXPORT_SYMBOL(lookup_one); + * Unlike lookup_one_len, it should be called without the parent + * i_mutex held, and will take the i_mutex itself if necessary. + */ +-struct dentry *lookup_one_len_unlocked(const char *name, +- struct dentry *base, int len) ++struct dentry *lookup_one_unlocked(struct user_namespace *mnt_userns, ++ const char *name, struct dentry *base, ++ int len) + { + struct qstr this; + int err; + struct dentry *ret; + +- err = lookup_one_common(&init_user_ns, name, base, len, &this); ++ err = lookup_one_common(mnt_userns, name, base, len, &this); + if (err) + return ERR_PTR(err); + +@@ -2745,6 +2749,59 @@ struct dentry *lookup_one_len_unlocked(const char *name, + ret = lookup_slow(&this, base, 0); + return ret; + } ++EXPORT_SYMBOL(lookup_one_unlocked); ++ ++/** ++ * lookup_one_positive_unlocked - filesystem helper to lookup single ++ * pathname component ++ * @mnt_userns: idmapping of the mount the lookup is performed from ++ * @name: pathname component to lookup ++ * @base: base directory to lookup from ++ * @len: maximum length @len should be interpreted to ++ * ++ * This helper will yield ERR_PTR(-ENOENT) on negatives. The helper returns ++ * known positive or ERR_PTR(). This is what most of the users want. ++ * ++ * Note that pinned negative with unlocked parent _can_ become positive at any ++ * time, so callers of lookup_one_unlocked() need to be very careful; pinned ++ * positives have >d_inode stable, so this one avoids such problems. ++ * ++ * Note that this routine is purely a helper for filesystem usage and should ++ * not be called by generic code. ++ * ++ * The helper should be called without i_mutex held. ++ */ ++struct dentry *lookup_one_positive_unlocked(struct user_namespace *mnt_userns, ++ const char *name, ++ struct dentry *base, int len) ++{ ++ struct dentry *ret = lookup_one_unlocked(mnt_userns, name, base, len); ++ ++ if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) { ++ dput(ret); ++ ret = ERR_PTR(-ENOENT); ++ } ++ return ret; ++} ++EXPORT_SYMBOL(lookup_one_positive_unlocked); ++ ++/** ++ * lookup_one_len_unlocked - filesystem helper to lookup single pathname component ++ * @name: pathname component to lookup ++ * @base: base directory to lookup from ++ * @len: maximum length @len should be interpreted to ++ * ++ * Note that this routine is purely a helper for filesystem usage and should ++ * not be called by generic code. ++ * ++ * Unlike lookup_one_len, it should be called without the parent ++ * i_mutex held, and will take the i_mutex itself if necessary. ++ */ ++struct dentry *lookup_one_len_unlocked(const char *name, ++ struct dentry *base, int len) ++{ ++ return lookup_one_unlocked(&init_user_ns, name, base, len); ++} + EXPORT_SYMBOL(lookup_one_len_unlocked); + + /* +@@ -2758,12 +2815,7 @@ EXPORT_SYMBOL(lookup_one_len_unlocked); + struct dentry *lookup_positive_unlocked(const char *name, + struct dentry *base, int len) + { +- struct dentry *ret = lookup_one_len_unlocked(name, base, len); +- if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) { +- dput(ret); +- ret = ERR_PTR(-ENOENT); +- } +- return ret; ++ return lookup_one_positive_unlocked(&init_user_ns, name, base, len); + } + EXPORT_SYMBOL(lookup_positive_unlocked); + +@@ -3473,6 +3525,8 @@ struct dentry *vfs_tmpfile(struct user_namespace *mnt_userns, + child = d_alloc(dentry, &slash_name); + if (unlikely(!child)) + goto out_err; ++ if (!IS_POSIXACL(dir)) ++ mode &= ~current_umask(); + error = dir->i_op->tmpfile(mnt_userns, dir, child, mode); + if (error) + goto out_err; +@@ -3625,18 +3679,14 @@ static struct dentry *filename_create(int dfd, struct filename *name, + { + struct dentry *dentry = ERR_PTR(-EEXIST); + struct qstr last; ++ bool want_dir = lookup_flags & LOOKUP_DIRECTORY; ++ unsigned int reval_flag = lookup_flags & LOOKUP_REVAL; ++ unsigned int create_flags = LOOKUP_CREATE | LOOKUP_EXCL; + int type; + int err2; + int error; +- bool is_dir = (lookup_flags & LOOKUP_DIRECTORY); +- +- /* +- * Note that only LOOKUP_REVAL and LOOKUP_DIRECTORY matter here. Any +- * other flags passed in are ignored! +- */ +- lookup_flags &= LOOKUP_REVAL; + +- error = filename_parentat(dfd, name, lookup_flags, path, &last, &type); ++ error = filename_parentat(dfd, name, reval_flag, path, &last, &type); + if (error) + return ERR_PTR(error); + +@@ -3650,11 +3700,13 @@ static struct dentry *filename_create(int dfd, struct filename *name, + /* don't fail immediately if it's r/o, at least try to report other errors */ + err2 = mnt_want_write(path->mnt); + /* +- * Do the final lookup. ++ * Do the final lookup. Suppress 'create' if there is a trailing ++ * '/', and a directory wasn't requested. + */ +- lookup_flags |= LOOKUP_CREATE | LOOKUP_EXCL; ++ if (last.name[last.len] && !want_dir) ++ create_flags = 0; + inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT); +- dentry = __lookup_hash(&last, path->dentry, lookup_flags); ++ dentry = __lookup_hash(&last, path->dentry, reval_flag | create_flags); + if (IS_ERR(dentry)) + goto unlock; + +@@ -3668,7 +3720,7 @@ static struct dentry *filename_create(int dfd, struct filename *name, + * all is fine. Let's be bastards - you had / on the end, you've + * been asking for (non-existent) directory. -ENOENT for you. + */ +- if (unlikely(!is_dir && last.name[last.len])) { ++ if (unlikely(!create_flags)) { + error = -ENOENT; + goto fail; + } +@@ -3975,13 +4027,12 @@ int vfs_rmdir(struct user_namespace *mnt_userns, struct inode *dir, + dentry->d_inode->i_flags |= S_DEAD; + dont_mount(dentry); + detach_mounts(dentry); +- fsnotify_rmdir(dir, dentry); + + out: + inode_unlock(dentry->d_inode); + dput(dentry); + if (!error) +- d_delete(dentry); ++ d_delete_notify(dir, dentry); + return error; + } + EXPORT_SYMBOL(vfs_rmdir); +@@ -4103,7 +4154,6 @@ int vfs_unlink(struct user_namespace *mnt_userns, struct inode *dir, + if (!error) { + dont_mount(dentry); + detach_mounts(dentry); +- fsnotify_unlink(dir, dentry); + } + } + } +@@ -4111,9 +4161,11 @@ out: + inode_unlock(target); + + /* We don't d_delete() NFS sillyrenamed files--they still exist. */ +- if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) { ++ if (!error && dentry->d_flags & DCACHE_NFSFS_RENAMED) { ++ fsnotify_unlink(dir, dentry); ++ } else if (!error) { + fsnotify_link_count(target); +- d_delete(dentry); ++ d_delete_notify(dir, dentry); + } + + return error; +@@ -4961,7 +5013,7 @@ int __page_symlink(struct inode *inode, const char *symname, int len, int nofs) + { + struct address_space *mapping = inode->i_mapping; + struct page *page; +- void *fsdata; ++ void *fsdata = NULL; + int err; + unsigned int flags = 0; + if (nofs) +diff --git a/fs/namespace.c b/fs/namespace.c +index 659a8f39c61af..d946298691ed4 100644 +--- a/fs/namespace.c ++++ b/fs/namespace.c +@@ -31,6 +31,7 @@ + #include <uapi/linux/mount.h> + #include <linux/fs_context.h> + #include <linux/shmem_fs.h> ++#include <linux/mnt_idmapping.h> + + #include "pnode.h" + #include "internal.h" +@@ -561,7 +562,7 @@ static void free_vfsmnt(struct mount *mnt) + struct user_namespace *mnt_userns; + + mnt_userns = mnt_user_ns(&mnt->mnt); +- if (mnt_userns != &init_user_ns) ++ if (!initial_idmapping(mnt_userns)) + put_user_ns(mnt_userns); + kfree_const(mnt->mnt_devname); + #ifdef CONFIG_SMP +@@ -965,6 +966,7 @@ static struct mount *skip_mnt_tree(struct mount *p) + struct vfsmount *vfs_create_mount(struct fs_context *fc) + { + struct mount *mnt; ++ struct user_namespace *fs_userns; + + if (!fc->root) + return ERR_PTR(-EINVAL); +@@ -982,6 +984,10 @@ struct vfsmount *vfs_create_mount(struct fs_context *fc) + mnt->mnt_mountpoint = mnt->mnt.mnt_root; + mnt->mnt_parent = mnt; + ++ fs_userns = mnt->mnt.mnt_sb->s_user_ns; ++ if (!initial_idmapping(fs_userns)) ++ mnt->mnt.mnt_userns = get_user_ns(fs_userns); ++ + lock_mount_hash(); + list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts); + unlock_mount_hash(); +@@ -1072,7 +1078,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, + + atomic_inc(&sb->s_active); + mnt->mnt.mnt_userns = mnt_user_ns(&old->mnt); +- if (mnt->mnt.mnt_userns != &init_user_ns) ++ if (!initial_idmapping(mnt->mnt.mnt_userns)) + mnt->mnt.mnt_userns = get_user_ns(mnt->mnt.mnt_userns); + mnt->mnt.mnt_sb = sb; + mnt->mnt.mnt_root = dget(root); +@@ -3927,28 +3933,32 @@ static unsigned int recalc_flags(struct mount_kattr *kattr, struct mount *mnt) + static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt) + { + struct vfsmount *m = &mnt->mnt; ++ struct user_namespace *fs_userns = m->mnt_sb->s_user_ns; + + if (!kattr->mnt_userns) + return 0; + ++ /* ++ * Creating an idmapped mount with the filesystem wide idmapping ++ * doesn't make sense so block that. We don't allow mushy semantics. ++ */ ++ if (kattr->mnt_userns == fs_userns) ++ return -EINVAL; ++ + /* + * Once a mount has been idmapped we don't allow it to change its + * mapping. It makes things simpler and callers can just create + * another bind-mount they can idmap if they want to. + */ +- if (mnt_user_ns(m) != &init_user_ns) ++ if (is_idmapped_mnt(m)) + return -EPERM; + + /* The underlying filesystem doesn't support idmapped mounts yet. */ + if (!(m->mnt_sb->s_type->fs_flags & FS_ALLOW_IDMAP)) + return -EINVAL; + +- /* Don't yet support filesystem mountable in user namespaces. */ +- if (m->mnt_sb->s_user_ns != &init_user_ns) +- return -EINVAL; +- + /* We're not controlling the superblock. */ +- if (!capable(CAP_SYS_ADMIN)) ++ if (!ns_capable(fs_userns, CAP_SYS_ADMIN)) + return -EPERM; + + /* Mount has already been visible in the filesystem hierarchy. */ +@@ -4002,14 +4012,27 @@ out: + + static void do_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt) + { +- struct user_namespace *mnt_userns; ++ struct user_namespace *mnt_userns, *old_mnt_userns; + + if (!kattr->mnt_userns) + return; + ++ /* ++ * We're the only ones able to change the mount's idmapping. So ++ * mnt->mnt.mnt_userns is stable and we can retrieve it directly. ++ */ ++ old_mnt_userns = mnt->mnt.mnt_userns; ++ + mnt_userns = get_user_ns(kattr->mnt_userns); + /* Pairs with smp_load_acquire() in mnt_user_ns(). */ + smp_store_release(&mnt->mnt.mnt_userns, mnt_userns); ++ ++ /* ++ * If this is an idmapped filesystem drop the reference we've taken ++ * in vfs_create_mount() before. ++ */ ++ if (!initial_idmapping(old_mnt_userns)) ++ put_user_ns(old_mnt_userns); + } + + static void mount_setattr_commit(struct mount_kattr *kattr, +@@ -4133,16 +4156,25 @@ static int build_mount_idmapped(const struct mount_attr *attr, size_t usize, + } + + /* +- * The init_user_ns is used to indicate that a vfsmount is not idmapped. +- * This is simpler than just having to treat NULL as unmapped. Users +- * wanting to idmap a mount to init_user_ns can just use a namespace +- * with an identity mapping. ++ * The initial idmapping cannot be used to create an idmapped ++ * mount. We use the initial idmapping as an indicator of a mount ++ * that is not idmapped. It can simply be passed into helpers that ++ * are aware of idmapped mounts as a convenient shortcut. A user ++ * can just create a dedicated identity mapping to achieve the same ++ * result. + */ + mnt_userns = container_of(ns, struct user_namespace, ns); +- if (mnt_userns == &init_user_ns) { ++ if (initial_idmapping(mnt_userns)) { ++ err = -EPERM; ++ goto out_fput; ++ } ++ ++ /* We're not controlling the target namespace. */ ++ if (!ns_capable(mnt_userns, CAP_SYS_ADMIN)) { + err = -EPERM; + goto out_fput; + } ++ + kattr->mnt_userns = get_user_ns(mnt_userns); + + out_fput: +@@ -4263,12 +4295,11 @@ SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path, + return err; + + err = user_path_at(dfd, path, kattr.lookup_flags, &target); +- if (err) +- return err; +- +- err = do_mount_setattr(&target, &kattr); ++ if (!err) { ++ err = do_mount_setattr(&target, &kattr); ++ path_put(&target); ++ } + finish_mount_kattr(&kattr); +- path_put(&target); + return err; + } + +diff --git a/fs/netfs/read_helper.c b/fs/netfs/read_helper.c +index 994ec22d40402..242f8bcb34a4c 100644 +--- a/fs/netfs/read_helper.c ++++ b/fs/netfs/read_helper.c +@@ -354,16 +354,11 @@ static void netfs_rreq_write_to_cache_work(struct work_struct *work) + netfs_rreq_do_write_to_cache(rreq); + } + +-static void netfs_rreq_write_to_cache(struct netfs_read_request *rreq, +- bool was_async) ++static void netfs_rreq_write_to_cache(struct netfs_read_request *rreq) + { +- if (was_async) { +- rreq->work.func = netfs_rreq_write_to_cache_work; +- if (!queue_work(system_unbound_wq, &rreq->work)) +- BUG(); +- } else { +- netfs_rreq_do_write_to_cache(rreq); +- } ++ rreq->work.func = netfs_rreq_write_to_cache_work; ++ if (!queue_work(system_unbound_wq, &rreq->work)) ++ BUG(); + } + + /* +@@ -560,7 +555,7 @@ again: + wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS); + + if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags)) +- return netfs_rreq_write_to_cache(rreq, was_async); ++ return netfs_rreq_write_to_cache(rreq); + + netfs_rreq_completed(rreq, was_async); + } +@@ -963,7 +958,7 @@ int netfs_readpage(struct file *file, + rreq = netfs_alloc_read_request(ops, netfs_priv, file); + if (!rreq) { + if (netfs_priv) +- ops->cleanup(netfs_priv, page_file_mapping(page)); ++ ops->cleanup(page_file_mapping(page), netfs_priv); + unlock_page(page); + return -ENOMEM; + } +@@ -1190,7 +1185,7 @@ have_page: + goto error; + have_page_no_wait: + if (netfs_priv) +- ops->cleanup(netfs_priv, mapping); ++ ops->cleanup(mapping, netfs_priv); + *_page = page; + _leave(" = 0"); + return 0; +@@ -1201,7 +1196,7 @@ error: + unlock_page(page); + put_page(page); + if (netfs_priv) +- ops->cleanup(netfs_priv, mapping); ++ ops->cleanup(mapping, netfs_priv); + _leave(" = %d", ret); + return ret; + } +diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h +index 6a2033131c068..ccd4f245cae24 100644 +--- a/fs/nfs/callback.h ++++ b/fs/nfs/callback.h +@@ -170,7 +170,7 @@ struct cb_devicenotifyitem { + }; + + struct cb_devicenotifyargs { +- int ndevs; ++ uint32_t ndevs; + struct cb_devicenotifyitem *devs; + }; + +diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c +index ed9d580826f5a..ccf3132384412 100644 +--- a/fs/nfs/callback_proc.c ++++ b/fs/nfs/callback_proc.c +@@ -288,6 +288,7 @@ static u32 initiate_file_draining(struct nfs_client *clp, + rv = NFS4_OK; + break; + case -ENOENT: ++ set_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags); + /* Embrace your forgetfulness! */ + rv = NFS4ERR_NOMATCHING_LAYOUT; + +@@ -358,12 +359,11 @@ __be32 nfs4_callback_devicenotify(void *argp, void *resp, + struct cb_process_state *cps) + { + struct cb_devicenotifyargs *args = argp; +- int i; ++ const struct pnfs_layoutdriver_type *ld = NULL; ++ uint32_t i; + __be32 res = 0; +- struct nfs_client *clp = cps->clp; +- struct nfs_server *server = NULL; + +- if (!clp) { ++ if (!cps->clp) { + res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION); + goto out; + } +@@ -371,23 +371,15 @@ __be32 nfs4_callback_devicenotify(void *argp, void *resp, + for (i = 0; i < args->ndevs; i++) { + struct cb_devicenotifyitem *dev = &args->devs[i]; + +- if (!server || +- server->pnfs_curr_ld->id != dev->cbd_layout_type) { +- rcu_read_lock(); +- list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) +- if (server->pnfs_curr_ld && +- server->pnfs_curr_ld->id == dev->cbd_layout_type) { +- rcu_read_unlock(); +- goto found; +- } +- rcu_read_unlock(); +- continue; ++ if (!ld || ld->id != dev->cbd_layout_type) { ++ pnfs_put_layoutdriver(ld); ++ ld = pnfs_find_layoutdriver(dev->cbd_layout_type); ++ if (!ld) ++ continue; + } +- +- found: +- nfs4_delete_deviceid(server->pnfs_curr_ld, clp, &dev->cbd_dev_id); ++ nfs4_delete_deviceid(ld, cps->clp, &dev->cbd_dev_id); + } +- ++ pnfs_put_layoutdriver(ld); + out: + kfree(args->devs); + return res; +diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c +index 4c48d85f65170..ea17085ef884b 100644 +--- a/fs/nfs/callback_xdr.c ++++ b/fs/nfs/callback_xdr.c +@@ -258,11 +258,9 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp, + void *argp) + { + struct cb_devicenotifyargs *args = argp; ++ uint32_t tmp, n, i; + __be32 *p; + __be32 status = 0; +- u32 tmp; +- int n, i; +- args->ndevs = 0; + + /* Num of device notifications */ + p = xdr_inline_decode(xdr, sizeof(uint32_t)); +@@ -271,12 +269,8 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp, + goto out; + } + n = ntohl(*p++); +- if (n <= 0) +- goto out; +- if (n > ULONG_MAX / sizeof(*args->devs)) { +- status = htonl(NFS4ERR_BADXDR); ++ if (n == 0) + goto out; +- } + + args->devs = kmalloc_array(n, sizeof(*args->devs), GFP_KERNEL); + if (!args->devs) { +@@ -330,19 +324,21 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp, + dev->cbd_immediate = 0; + } + +- args->ndevs++; +- + dprintk("%s: type %d layout 0x%x immediate %d\n", + __func__, dev->cbd_notify_type, dev->cbd_layout_type, + dev->cbd_immediate); + } ++ args->ndevs = n; ++ dprintk("%s: ndevs %d\n", __func__, args->ndevs); ++ return 0; ++err: ++ kfree(args->devs); + out: ++ args->devs = NULL; ++ args->ndevs = 0; + dprintk("%s: status %d ndevs %d\n", + __func__, ntohl(status), args->ndevs); + return status; +-err: +- kfree(args->devs); +- goto out; + } + + static __be32 decode_sessionid(struct xdr_stream *xdr, +diff --git a/fs/nfs/client.c b/fs/nfs/client.c +index 23e165d5ec9ca..090b16890e3d6 100644 +--- a/fs/nfs/client.c ++++ b/fs/nfs/client.c +@@ -177,6 +177,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) + INIT_LIST_HEAD(&clp->cl_superblocks); + clp->cl_rpcclient = ERR_PTR(-EINVAL); + ++ clp->cl_flags = cl_init->init_flags; + clp->cl_proto = cl_init->proto; + clp->cl_nconnect = cl_init->nconnect; + clp->cl_max_connect = cl_init->max_connect ? cl_init->max_connect : 1; +@@ -427,7 +428,6 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init) + list_add_tail(&new->cl_share_link, + &nn->nfs_client_list); + spin_unlock(&nn->nfs_client_lock); +- new->cl_flags = cl_init->init_flags; + return rpc_ops->init_client(new, cl_init); + } + +@@ -860,6 +860,13 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs + server->namelen = pathinfo.max_namelen; + } + ++ if (clp->rpc_ops->discover_trunking != NULL && ++ (server->caps & NFS_CAP_FS_LOCATIONS)) { ++ error = clp->rpc_ops->discover_trunking(server, mntfh); ++ if (error < 0) ++ return error; ++ } ++ + return 0; + } + EXPORT_SYMBOL_GPL(nfs_probe_fsinfo); +diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c +index 11118398f495c..6a3ba306c3216 100644 +--- a/fs/nfs/delegation.c ++++ b/fs/nfs/delegation.c +@@ -228,8 +228,7 @@ again: + * + */ + void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, +- fmode_t type, +- const nfs4_stateid *stateid, ++ fmode_t type, const nfs4_stateid *stateid, + unsigned long pagemod_limit) + { + struct nfs_delegation *delegation; +@@ -239,25 +238,24 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation != NULL) { + spin_lock(&delegation->lock); +- if (nfs4_is_valid_delegation(delegation, 0)) { +- nfs4_stateid_copy(&delegation->stateid, stateid); +- delegation->type = type; +- delegation->pagemod_limit = pagemod_limit; +- oldcred = delegation->cred; +- delegation->cred = get_cred(cred); +- clear_bit(NFS_DELEGATION_NEED_RECLAIM, +- &delegation->flags); +- spin_unlock(&delegation->lock); +- rcu_read_unlock(); +- put_cred(oldcred); +- trace_nfs4_reclaim_delegation(inode, type); +- return; +- } +- /* We appear to have raced with a delegation return. */ ++ nfs4_stateid_copy(&delegation->stateid, stateid); ++ delegation->type = type; ++ delegation->pagemod_limit = pagemod_limit; ++ oldcred = delegation->cred; ++ delegation->cred = get_cred(cred); ++ clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); ++ if (test_and_clear_bit(NFS_DELEGATION_REVOKED, ++ &delegation->flags)) ++ atomic_long_inc(&nfs_active_delegations); + spin_unlock(&delegation->lock); ++ rcu_read_unlock(); ++ put_cred(oldcred); ++ trace_nfs4_reclaim_delegation(inode, type); ++ } else { ++ rcu_read_unlock(); ++ nfs_inode_set_delegation(inode, cred, type, stateid, ++ pagemod_limit); + } +- rcu_read_unlock(); +- nfs_inode_set_delegation(inode, cred, type, stateid, pagemod_limit); + } + + static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync) +@@ -755,11 +753,13 @@ int nfs4_inode_return_delegation(struct inode *inode) + struct nfs_delegation *delegation; + + delegation = nfs_start_delegation_return(nfsi); +- /* Synchronous recall of any application leases */ +- break_lease(inode, O_WRONLY | O_RDWR); +- nfs_wb_all(inode); +- if (delegation != NULL) ++ if (delegation != NULL) { ++ /* Synchronous recall of any application leases */ ++ break_lease(inode, O_WRONLY | O_RDWR); ++ if (S_ISREG(inode->i_mode)) ++ nfs_wb_all(inode); + return nfs_end_delegation_return(inode, delegation, 1); ++ } + return 0; + } + +diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c +index 1a6d2867fba4f..32c3d0c454b19 100644 +--- a/fs/nfs/dir.c ++++ b/fs/nfs/dir.c +@@ -78,6 +78,7 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir + ctx->attr_gencount = nfsi->attr_gencount; + ctx->dir_cookie = 0; + ctx->dup_cookie = 0; ++ ctx->page_index = 0; + spin_lock(&dir->i_lock); + if (list_empty(&nfsi->open_files) && + (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER)) +@@ -85,6 +86,7 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir + NFS_INO_INVALID_DATA | + NFS_INO_REVAL_FORCED); + list_add(&ctx->list, &nfsi->open_files); ++ clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags); + spin_unlock(&dir->i_lock); + return ctx; + } +@@ -626,8 +628,7 @@ void nfs_force_use_readdirplus(struct inode *dir) + if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) && + !list_empty(&nfsi->open_files)) { + set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags); +- invalidate_mapping_pages(dir->i_mapping, +- nfsi->page_index + 1, -1); ++ set_bit(NFS_INO_FORCE_READDIR, &nfsi->flags); + } + } + +@@ -870,7 +871,8 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc, + + status = nfs_readdir_page_filler(desc, entry, pages, pglen, + arrays, narrays); +- } while (!status && nfs_readdir_page_needs_filling(page)); ++ } while (!status && nfs_readdir_page_needs_filling(page) && ++ page_mapping(page)); + + nfs_readdir_free_pages(pages, array_size); + out_release_label: +@@ -937,10 +939,8 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc) + sizeof(nfsi->cookieverf)); + } + res = nfs_readdir_search_array(desc); +- if (res == 0) { +- nfsi->page_index = desc->page_index; ++ if (res == 0) + return 0; +- } + nfs_readdir_page_unlock_and_put_cached(desc); + return res; + } +@@ -1048,6 +1048,7 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc) + goto out; + + desc->page_index = 0; ++ desc->cache_entry_index = 0; + desc->last_cookie = desc->dir_cookie; + desc->duped = 0; + +@@ -1079,6 +1080,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_open_dir_context *dir_ctx = file->private_data; + struct nfs_readdir_descriptor *desc; ++ pgoff_t page_index; + int res; + + dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n", +@@ -1109,10 +1111,15 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) + desc->dir_cookie = dir_ctx->dir_cookie; + desc->dup_cookie = dir_ctx->dup_cookie; + desc->duped = dir_ctx->duped; ++ page_index = dir_ctx->page_index; + desc->attr_gencount = dir_ctx->attr_gencount; + memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf)); + spin_unlock(&file->f_lock); + ++ if (test_and_clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags) && ++ list_is_singular(&nfsi->open_files)) ++ invalidate_mapping_pages(inode->i_mapping, page_index + 1, -1); ++ + do { + res = readdir_search_pagecache(desc); + +@@ -1149,6 +1156,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) + dir_ctx->dup_cookie = desc->dup_cookie; + dir_ctx->duped = desc->duped; + dir_ctx->attr_gencount = desc->attr_gencount; ++ dir_ctx->page_index = desc->page_index; + memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf)); + spin_unlock(&file->f_lock); + +@@ -1269,13 +1277,12 @@ static bool nfs_verifier_is_delegated(struct dentry *dentry) + static void nfs_set_verifier_locked(struct dentry *dentry, unsigned long verf) + { + struct inode *inode = d_inode(dentry); ++ struct inode *dir = d_inode(dentry->d_parent); + +- if (!nfs_verifier_is_delegated(dentry) && +- !nfs_verify_change_attribute(d_inode(dentry->d_parent), verf)) +- goto out; ++ if (!nfs_verify_change_attribute(dir, verf)) ++ return; + if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + nfs_set_verifier_delegated(&verf); +-out: + dentry->d_time = verf; + } + +@@ -1413,7 +1420,7 @@ out_force: + static void nfs_mark_dir_for_revalidate(struct inode *inode) + { + spin_lock(&inode->i_lock); +- nfs_set_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE); ++ nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE); + spin_unlock(&inode->i_lock); + } + +@@ -1834,16 +1841,6 @@ const struct dentry_operations nfs4_dentry_operations = { + }; + EXPORT_SYMBOL_GPL(nfs4_dentry_operations); + +-static fmode_t flags_to_mode(int flags) +-{ +- fmode_t res = (__force fmode_t)flags & FMODE_EXEC; +- if ((flags & O_ACCMODE) != O_WRONLY) +- res |= FMODE_READ; +- if ((flags & O_ACCMODE) != O_RDONLY) +- res |= FMODE_WRITE; +- return res; +-} +- + static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags, struct file *filp) + { + return alloc_nfs_open_context(dentry, flags_to_mode(open_flags), filp); +@@ -1983,6 +1980,24 @@ out: + + no_open: + res = nfs_lookup(dir, dentry, lookup_flags); ++ if (!res) { ++ inode = d_inode(dentry); ++ if ((lookup_flags & LOOKUP_DIRECTORY) && inode && ++ !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) ++ res = ERR_PTR(-ENOTDIR); ++ else if (inode && S_ISREG(inode->i_mode)) ++ res = ERR_PTR(-EOPENSTALE); ++ } else if (!IS_ERR(res)) { ++ inode = d_inode(res); ++ if ((lookup_flags & LOOKUP_DIRECTORY) && inode && ++ !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) { ++ dput(res); ++ res = ERR_PTR(-ENOTDIR); ++ } else if (inode && S_ISREG(inode->i_mode)) { ++ dput(res); ++ res = ERR_PTR(-EOPENSTALE); ++ } ++ } + if (switched) { + d_lookup_done(dentry); + if (!res) +@@ -2383,6 +2398,8 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) + + trace_nfs_link_enter(inode, dir, dentry); + d_drop(dentry); ++ if (S_ISREG(inode->i_mode)) ++ nfs_sync_inode(inode); + error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); + if (error == 0) { + ihold(inode); +@@ -2471,6 +2488,8 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, + } + } + ++ if (S_ISREG(old_inode->i_mode)) ++ nfs_sync_inode(old_inode); + task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL); + if (IS_ERR(task)) { + error = PTR_ERR(task); +@@ -2676,7 +2695,7 @@ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, co + return NULL; + } + +-static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, bool may_block) ++static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *cred, u32 *mask, bool may_block) + { + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_access_entry *cache; +@@ -2706,8 +2725,7 @@ static int nfs_access_get_cached_locked(struct inode *inode, const struct cred * + spin_lock(&inode->i_lock); + retry = false; + } +- res->cred = cache->cred; +- res->mask = cache->mask; ++ *mask = cache->mask; + list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru); + err = 0; + out: +@@ -2719,7 +2737,7 @@ out_zap: + return -ENOENT; + } + +-static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res) ++static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cred, u32 *mask) + { + /* Only check the most recently returned cache entry, + * but do it without locking. +@@ -2741,22 +2759,21 @@ static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cre + goto out; + if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS)) + goto out; +- res->cred = cache->cred; +- res->mask = cache->mask; ++ *mask = cache->mask; + err = 0; + out: + rcu_read_unlock(); + return err; + } + +-int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct +-nfs_access_entry *res, bool may_block) ++int nfs_access_get_cached(struct inode *inode, const struct cred *cred, ++ u32 *mask, bool may_block) + { + int status; + +- status = nfs_access_get_cached_rcu(inode, cred, res); ++ status = nfs_access_get_cached_rcu(inode, cred, mask); + if (status != 0) +- status = nfs_access_get_cached_locked(inode, cred, res, ++ status = nfs_access_get_cached_locked(inode, cred, mask, + may_block); + + return status; +@@ -2877,7 +2894,7 @@ static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask) + + trace_nfs_access_enter(inode); + +- status = nfs_access_get_cached(inode, cred, &cache, may_block); ++ status = nfs_access_get_cached(inode, cred, &cache.mask, may_block); + if (status == 0) + goto out_cached; + +diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c +index 2e894fec036b0..c220810c61d14 100644 +--- a/fs/nfs/direct.c ++++ b/fs/nfs/direct.c +@@ -172,8 +172,8 @@ ssize_t nfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) + VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE); + + if (iov_iter_rw(iter) == READ) +- return nfs_file_direct_read(iocb, iter); +- return nfs_file_direct_write(iocb, iter); ++ return nfs_file_direct_read(iocb, iter, true); ++ return nfs_file_direct_write(iocb, iter, true); + } + + static void nfs_direct_release_pages(struct page **pages, unsigned int npages) +@@ -424,6 +424,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, + * nfs_file_direct_read - file direct read operation for NFS files + * @iocb: target I/O control block + * @iter: vector of user buffers into which to read data ++ * @swap: flag indicating this is swap IO, not O_DIRECT IO + * + * We use this function for direct reads instead of calling + * generic_file_aio_read() in order to avoid gfar's check to see if +@@ -439,7 +440,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, + * client must read the updated atime from the server back into its + * cache. + */ +-ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) ++ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, ++ bool swap) + { + struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; +@@ -481,12 +483,14 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) + if (iter_is_iovec(iter)) + dreq->flags = NFS_ODIRECT_SHOULD_DIRTY; + +- nfs_start_io_direct(inode); ++ if (!swap) ++ nfs_start_io_direct(inode); + + NFS_I(inode)->read_io += count; + requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos); + +- nfs_end_io_direct(inode); ++ if (!swap) ++ nfs_end_io_direct(inode); + + if (requested > 0) { + result = nfs_direct_wait(dreq); +@@ -620,7 +624,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) + nfs_unlock_and_release_request(req); + } + +- if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) ++ if (nfs_commit_end(cinfo.mds)) + nfs_direct_write_complete(dreq); + } + +@@ -789,7 +793,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { + */ + static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, + struct iov_iter *iter, +- loff_t pos) ++ loff_t pos, int ioflags) + { + struct nfs_pageio_descriptor desc; + struct inode *inode = dreq->inode; +@@ -797,7 +801,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, + size_t requested_bytes = 0; + size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE); + +- nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false, ++ nfs_pageio_init_write(&desc, inode, ioflags, false, + &nfs_direct_write_completion_ops); + desc.pg_dreq = dreq; + get_dreq(dreq); +@@ -875,6 +879,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, + * nfs_file_direct_write - file direct write operation for NFS files + * @iocb: target I/O control block + * @iter: vector of user buffers from which to write data ++ * @swap: flag indicating this is swap IO, not O_DIRECT IO + * + * We use this function for direct writes instead of calling + * generic_file_aio_write() in order to avoid taking the inode +@@ -891,7 +896,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, + * Note that O_APPEND is not supported for NFS direct writes, as there + * is no atomic O_APPEND write facility in the NFS protocol. + */ +-ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) ++ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, ++ bool swap) + { + ssize_t result, requested; + size_t count; +@@ -905,7 +911,11 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) + dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n", + file, iov_iter_count(iter), (long long) iocb->ki_pos); + +- result = generic_write_checks(iocb, iter); ++ if (swap) ++ /* bypass generic checks */ ++ result = iov_iter_count(iter); ++ else ++ result = generic_write_checks(iocb, iter); + if (result <= 0) + return result; + count = result; +@@ -936,16 +946,22 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) + dreq->iocb = iocb; + pnfs_init_ds_commit_info_ops(&dreq->ds_cinfo, inode); + +- nfs_start_io_direct(inode); ++ if (swap) { ++ requested = nfs_direct_write_schedule_iovec(dreq, iter, pos, ++ FLUSH_STABLE); ++ } else { ++ nfs_start_io_direct(inode); + +- requested = nfs_direct_write_schedule_iovec(dreq, iter, pos); ++ requested = nfs_direct_write_schedule_iovec(dreq, iter, pos, ++ FLUSH_COND_STABLE); + +- if (mapping->nrpages) { +- invalidate_inode_pages2_range(mapping, +- pos >> PAGE_SHIFT, end); +- } ++ if (mapping->nrpages) { ++ invalidate_inode_pages2_range(mapping, ++ pos >> PAGE_SHIFT, end); ++ } + +- nfs_end_io_direct(inode); ++ nfs_end_io_direct(inode); ++ } + + if (requested > 0) { + result = nfs_direct_wait(dreq); +diff --git a/fs/nfs/file.c b/fs/nfs/file.c +index aa353fd582404..ad5114e480097 100644 +--- a/fs/nfs/file.c ++++ b/fs/nfs/file.c +@@ -161,7 +161,7 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to) + ssize_t result; + + if (iocb->ki_flags & IOCB_DIRECT) +- return nfs_file_direct_read(iocb, to); ++ return nfs_file_direct_read(iocb, to, false); + + dprintk("NFS: read(%pD2, %zu@%lu)\n", + iocb->ki_filp, +@@ -208,22 +208,25 @@ static int + nfs_file_fsync_commit(struct file *file, int datasync) + { + struct inode *inode = file_inode(file); +- int ret; ++ int ret, ret2; + + dprintk("NFS: fsync file(%pD2) datasync %d\n", file, datasync); + + nfs_inc_stats(inode, NFSIOS_VFSFSYNC); + ret = nfs_commit_inode(inode, FLUSH_SYNC); +- if (ret < 0) +- return ret; +- return file_check_and_advance_wb_err(file); ++ ret2 = file_check_and_advance_wb_err(file); ++ if (ret2 < 0) ++ return ret2; ++ return ret; + } + + int + nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) + { +- struct nfs_open_context *ctx = nfs_file_open_context(file); + struct inode *inode = file_inode(file); ++ struct nfs_inode *nfsi = NFS_I(inode); ++ long save_nredirtied = atomic_long_read(&nfsi->redirtied_pages); ++ long nredirtied; + int ret; + + trace_nfs_fsync_enter(inode); +@@ -238,15 +241,10 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) + ret = pnfs_sync_inode(inode, !!datasync); + if (ret != 0) + break; +- if (!test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags)) ++ nredirtied = atomic_long_read(&nfsi->redirtied_pages); ++ if (nredirtied == save_nredirtied) + break; +- /* +- * If nfs_file_fsync_commit detected a server reboot, then +- * resend all dirty pages that might have been covered by +- * the NFS_CONTEXT_RESEND_WRITES flag +- */ +- start = 0; +- end = LLONG_MAX; ++ save_nredirtied = nredirtied; + } + + trace_nfs_fsync_exit(inode, ret); +@@ -389,11 +387,8 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, + return status; + NFS_I(mapping->host)->write_io += copied; + +- if (nfs_ctx_key_to_expire(ctx, mapping->host)) { +- status = nfs_wb_all(mapping->host); +- if (status < 0) +- return status; +- } ++ if (nfs_ctx_key_to_expire(ctx, mapping->host)) ++ nfs_wb_all(mapping->host); + + return copied; + } +@@ -590,18 +585,6 @@ static const struct vm_operations_struct nfs_file_vm_ops = { + .page_mkwrite = nfs_vm_page_mkwrite, + }; + +-static int nfs_need_check_write(struct file *filp, struct inode *inode, +- int error) +-{ +- struct nfs_open_context *ctx; +- +- ctx = nfs_file_open_context(filp); +- if (nfs_error_is_fatal_on_server(error) || +- nfs_ctx_key_to_expire(ctx, inode)) +- return 1; +- return 0; +-} +- + ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) + { + struct file *file = iocb->ki_filp; +@@ -616,7 +599,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) + return result; + + if (iocb->ki_flags & IOCB_DIRECT) +- return nfs_file_direct_write(iocb, from); ++ return nfs_file_direct_write(iocb, from, false); + + dprintk("NFS: write(%pD2, %zu@%Ld)\n", + file, iov_iter_count(from), (long long) iocb->ki_pos); +@@ -629,7 +612,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) + if (iocb->ki_flags & IOCB_APPEND || iocb->ki_pos > i_size_read(inode)) { + result = nfs_revalidate_file_size(inode, file); + if (result) +- goto out; ++ return result; + } + + nfs_clear_invalid_mapping(file->f_mapping); +@@ -648,6 +631,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) + + written = result; + iocb->ki_pos += written; ++ nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written); + + if (mntflags & NFS_MOUNT_WRITE_EAGER) { + result = filemap_fdatawrite_range(file->f_mapping, +@@ -665,17 +649,22 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) + } + result = generic_write_sync(iocb, written); + if (result < 0) +- goto out; ++ return result; + ++out: + /* Return error values */ + error = filemap_check_wb_err(file->f_mapping, since); +- if (nfs_need_check_write(file, inode, error)) { +- int err = nfs_wb_all(inode); +- if (err < 0) +- result = err; ++ switch (error) { ++ default: ++ break; ++ case -EDQUOT: ++ case -EFBIG: ++ case -ENOSPC: ++ nfs_wb_all(inode); ++ error = file_check_and_advance_wb_err(file); ++ if (error < 0) ++ result = error; + } +- nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written); +-out: + return result; + + out_swapfile: +diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c +index d2103852475fa..45eec08ec904f 100644 +--- a/fs/nfs/filelayout/filelayout.c ++++ b/fs/nfs/filelayout/filelayout.c +@@ -783,6 +783,12 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, + return &fl->generic_hdr; + } + ++static bool ++filelayout_lseg_is_striped(const struct nfs4_filelayout_segment *flseg) ++{ ++ return flseg->num_fh > 1; ++} ++ + /* + * filelayout_pg_test(). Called by nfs_can_coalesce_requests() + * +@@ -803,6 +809,8 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, + size = pnfs_generic_pg_test(pgio, prev, req); + if (!size) + return 0; ++ else if (!filelayout_lseg_is_striped(FILELAYOUT_LSEG(pgio->pg_lseg))) ++ return size; + + /* see if req and prev are in the same stripe */ + if (prev) { +diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c +index d383de00d4868..ceef75b4d2494 100644 +--- a/fs/nfs/flexfilelayout/flexfilelayout.c ++++ b/fs/nfs/flexfilelayout/flexfilelayout.c +@@ -1140,6 +1140,8 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, + case -EIO: + case -ETIMEDOUT: + case -EPIPE: ++ case -EPROTO: ++ case -ENODEV: + dprintk("%s DS connection error %d\n", __func__, + task->tk_status); + nfs4_delete_deviceid(devid->ld, devid->nfs_client, +@@ -1245,6 +1247,8 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, + case -ENOBUFS: + case -EPIPE: + case -EPERM: ++ case -EPROTO: ++ case -ENODEV: + *op_status = status = NFS4ERR_NXIO; + break; + case -EACCES: +diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c +index c9b61b818ec11..bfa7202ca7be1 100644 +--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c ++++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c +@@ -378,10 +378,10 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, + goto noconnect; + + ds = mirror->mirror_ds->ds; ++ if (READ_ONCE(ds->ds_clp)) ++ goto out; + /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ + smp_rmb(); +- if (ds->ds_clp) +- goto out; + + /* FIXME: For now we assume the server sent only one version of NFS + * to use for the DS. +diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c +index 0d444a90f513a..fb3cad38b1497 100644 +--- a/fs/nfs/fs_context.c ++++ b/fs/nfs/fs_context.c +@@ -514,7 +514,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, + if (result.negated) + ctx->flags &= ~NFS_MOUNT_SOFTREVAL; + else +- ctx->flags &= NFS_MOUNT_SOFTREVAL; ++ ctx->flags |= NFS_MOUNT_SOFTREVAL; + break; + case Opt_posix: + if (result.negated) +diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c +index 59355c106eceb..7604cb6a0ac23 100644 +--- a/fs/nfs/getroot.c ++++ b/fs/nfs/getroot.c +@@ -80,18 +80,15 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc) + goto out; + + /* get the actual root for this mount */ +- fsinfo.fattr = nfs_alloc_fattr(); ++ fsinfo.fattr = nfs_alloc_fattr_with_label(server); + if (fsinfo.fattr == NULL) + goto out_name; + +- fsinfo.fattr->label = nfs4_label_alloc(server, GFP_KERNEL); +- if (IS_ERR(fsinfo.fattr->label)) +- goto out_fattr; + error = server->nfs_client->rpc_ops->getroot(server, ctx->mntfh, &fsinfo); + if (error < 0) { + dprintk("nfs_get_root: getattr error = %d\n", -error); + nfs_errorf(fc, "NFS: Couldn't getattr on root"); +- goto out_label; ++ goto out_fattr; + } + + inode = nfs_fhget(s, ctx->mntfh, fsinfo.fattr, NULL); +@@ -99,12 +96,12 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc) + dprintk("nfs_get_root: get root inode failed\n"); + error = PTR_ERR(inode); + nfs_errorf(fc, "NFS: Couldn't get root inode"); +- goto out_label; ++ goto out_fattr; + } + + error = nfs_superblock_set_dummy_root(s, inode); + if (error != 0) +- goto out_label; ++ goto out_fattr; + + /* root dentries normally start off anonymous and get spliced in later + * if the dentry tree reaches them; however if the dentry already +@@ -115,7 +112,7 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc) + dprintk("nfs_get_root: get root dentry failed\n"); + error = PTR_ERR(root); + nfs_errorf(fc, "NFS: Couldn't get root dentry"); +- goto out_label; ++ goto out_fattr; + } + + security_d_instantiate(root, inode); +@@ -154,8 +151,6 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc) + nfs_setsecurity(inode, fsinfo.fattr, fsinfo.fattr->label); + error = 0; + +-out_label: +- nfs4_label_free(fsinfo.fattr->label); + out_fattr: + nfs_free_fattr(fsinfo.fattr); + out_name: +@@ -165,5 +160,5 @@ out: + error_splat_root: + dput(fc->root); + fc->root = NULL; +- goto out_label; ++ goto out_fattr; + } +diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c +index 853213b3a2095..e4524635a129a 100644 +--- a/fs/nfs/inode.c ++++ b/fs/nfs/inode.c +@@ -210,10 +210,15 @@ void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) + flags &= ~NFS_INO_INVALID_XATTR; + if (flags & NFS_INO_INVALID_DATA) + nfs_fscache_invalidate(inode); +- if (inode->i_mapping->nrpages == 0) +- flags &= ~(NFS_INO_INVALID_DATA|NFS_INO_DATA_INVAL_DEFER); + flags &= ~(NFS_INO_REVAL_PAGECACHE | NFS_INO_REVAL_FORCED); ++ + nfsi->cache_validity |= flags; ++ ++ if (inode->i_mapping->nrpages == 0) ++ nfsi->cache_validity &= ~(NFS_INO_INVALID_DATA | ++ NFS_INO_DATA_INVAL_DEFER); ++ else if (nfsi->cache_validity & NFS_INO_INVALID_DATA) ++ nfsi->cache_validity &= ~NFS_INO_DATA_INVAL_DEFER; + } + EXPORT_SYMBOL_GPL(nfs_set_cache_invalid); + +@@ -426,6 +431,23 @@ nfs_ilookup(struct super_block *sb, struct nfs_fattr *fattr, struct nfs_fh *fh) + return inode; + } + ++static void nfs_inode_init_regular(struct nfs_inode *nfsi) ++{ ++ atomic_long_set(&nfsi->nrequests, 0); ++ atomic_long_set(&nfsi->redirtied_pages, 0); ++ INIT_LIST_HEAD(&nfsi->commit_info.list); ++ atomic_long_set(&nfsi->commit_info.ncommit, 0); ++ atomic_set(&nfsi->commit_info.rpcs_out, 0); ++ mutex_init(&nfsi->commit_mutex); ++} ++ ++static void nfs_inode_init_dir(struct nfs_inode *nfsi) ++{ ++ nfsi->cache_change_attribute = 0; ++ memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); ++ init_rwsem(&nfsi->rmdir_sem); ++} ++ + /* + * This is our front-end to iget that looks up inodes by file handle + * instead of inode number. +@@ -480,10 +502,12 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st + if (S_ISREG(inode->i_mode)) { + inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops; + inode->i_data.a_ops = &nfs_file_aops; ++ nfs_inode_init_regular(nfsi); + } else if (S_ISDIR(inode->i_mode)) { + inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; + inode->i_fop = &nfs_dir_operations; + inode->i_data.a_ops = &nfs_dir_aops; ++ nfs_inode_init_dir(nfsi); + /* Deal with crossing mountpoints */ + if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT || + fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { +@@ -509,7 +533,6 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st + inode->i_uid = make_kuid(&init_user_ns, -2); + inode->i_gid = make_kgid(&init_user_ns, -2); + inode->i_blocks = 0; +- memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); + nfsi->write_io = 0; + nfsi->read_io = 0; + +@@ -835,12 +858,9 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path, + } + + /* Flush out writes to the server in order to update c/mtime. */ +- if ((request_mask & (STATX_CTIME|STATX_MTIME)) && +- S_ISREG(inode->i_mode)) { +- err = filemap_write_and_wait(inode->i_mapping); +- if (err) +- goto out; +- } ++ if ((request_mask & (STATX_CTIME | STATX_MTIME)) && ++ S_ISREG(inode->i_mode)) ++ filemap_write_and_wait(inode->i_mapping); + + /* + * We may force a getattr if the user cares about atime. +@@ -1165,7 +1185,6 @@ int nfs_open(struct inode *inode, struct file *filp) + nfs_fscache_open_file(inode, filp); + return 0; + } +-EXPORT_SYMBOL_GPL(nfs_open); + + /* + * This function is called whenever some part of NFS notices that +@@ -1579,18 +1598,37 @@ struct nfs_fattr *nfs_alloc_fattr(void) + { + struct nfs_fattr *fattr; + +- fattr = kmalloc(sizeof(*fattr), GFP_NOFS); +- if (fattr != NULL) ++ fattr = kmalloc(sizeof(*fattr), GFP_KERNEL); ++ if (fattr != NULL) { + nfs_fattr_init(fattr); ++ fattr->label = NULL; ++ } + return fattr; + } + EXPORT_SYMBOL_GPL(nfs_alloc_fattr); + ++struct nfs_fattr *nfs_alloc_fattr_with_label(struct nfs_server *server) ++{ ++ struct nfs_fattr *fattr = nfs_alloc_fattr(); ++ ++ if (!fattr) ++ return NULL; ++ ++ fattr->label = nfs4_label_alloc(server, GFP_KERNEL); ++ if (IS_ERR(fattr->label)) { ++ kfree(fattr); ++ return NULL; ++ } ++ ++ return fattr; ++} ++EXPORT_SYMBOL_GPL(nfs_alloc_fattr_with_label); ++ + struct nfs_fh *nfs_alloc_fhandle(void) + { + struct nfs_fh *fh; + +- fh = kmalloc(sizeof(struct nfs_fh), GFP_NOFS); ++ fh = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL); + if (fh != NULL) + fh->size = 0; + return fh; +@@ -1777,8 +1815,10 @@ static int nfs_inode_finish_partial_attr_update(const struct nfs_fattr *fattr, + NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_OTHER | + NFS_INO_INVALID_NLINK; + unsigned long cache_validity = NFS_I(inode)->cache_validity; ++ enum nfs4_change_attr_type ctype = NFS_SERVER(inode)->change_attr_type; + +- if (!(cache_validity & NFS_INO_INVALID_CHANGE) && ++ if (ctype != NFS4_CHANGE_TYPE_IS_UNDEFINED && ++ !(cache_validity & NFS_INO_INVALID_CHANGE) && + (cache_validity & check_valid) != 0 && + (fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && + nfs_inode_attrs_cmp_monotonic(fattr, inode) == 0) +@@ -2260,14 +2300,7 @@ static void init_once(void *foo) + INIT_LIST_HEAD(&nfsi->open_files); + INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); + INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); +- INIT_LIST_HEAD(&nfsi->commit_info.list); +- atomic_long_set(&nfsi->nrequests, 0); +- atomic_long_set(&nfsi->commit_info.ncommit, 0); +- atomic_set(&nfsi->commit_info.rpcs_out, 0); +- init_rwsem(&nfsi->rmdir_sem); +- mutex_init(&nfsi->commit_mutex); + nfs4_init_once(nfsi); +- nfsi->cache_change_attribute = 0; + } + + static int __init nfs_init_inodecache(void) +diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h +index 66fc936834f23..2ceb4b98ec15f 100644 +--- a/fs/nfs/internal.h ++++ b/fs/nfs/internal.h +@@ -42,6 +42,16 @@ static inline bool nfs_lookup_is_soft_revalidate(const struct dentry *dentry) + return true; + } + ++static inline fmode_t flags_to_mode(int flags) ++{ ++ fmode_t res = (__force fmode_t)flags & FMODE_EXEC; ++ if ((flags & O_ACCMODE) != O_WRONLY) ++ res |= FMODE_READ; ++ if ((flags & O_ACCMODE) != O_RDONLY) ++ res |= FMODE_WRITE; ++ return res; ++} ++ + /* + * Note: RFC 1813 doesn't limit the number of auth flavors that + * a server can return, so make something up. +@@ -341,14 +351,6 @@ nfs4_label_copy(struct nfs4_label *dst, struct nfs4_label *src) + + return dst; + } +-static inline void nfs4_label_free(struct nfs4_label *label) +-{ +- if (label) { +- kfree(label->label); +- kfree(label); +- } +- return; +-} + + static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi) + { +@@ -357,7 +359,6 @@ static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi) + } + #else + static inline struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) { return NULL; } +-static inline void nfs4_label_free(void *label) {} + static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi) + { + } +@@ -580,6 +581,13 @@ nfs_write_match_verf(const struct nfs_writeverf *verf, + !nfs_write_verifier_cmp(&req->wb_verf, &verf->verifier); + } + ++static inline gfp_t nfs_io_gfp_mask(void) ++{ ++ if (current->flags & PF_WQ_WORKER) ++ return GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; ++ return GFP_KERNEL; ++} ++ + /* unlink.c */ + extern struct rpc_task * + nfs_async_rename(struct inode *old_dir, struct inode *new_dir, +@@ -817,6 +825,7 @@ static inline bool nfs_error_is_fatal_on_server(int err) + case 0: + case -ERESTARTSYS: + case -EINTR: ++ case -ENOMEM: + return false; + } + return nfs_error_is_fatal(err); +diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c +index bc0c698f33508..565421c6682ed 100644 +--- a/fs/nfs/namespace.c ++++ b/fs/nfs/namespace.c +@@ -147,7 +147,7 @@ struct vfsmount *nfs_d_automount(struct path *path) + struct nfs_fs_context *ctx; + struct fs_context *fc; + struct vfsmount *mnt = ERR_PTR(-ENOMEM); +- struct nfs_server *server = NFS_SERVER(d_inode(path->dentry)); ++ struct nfs_server *server = NFS_SB(path->dentry->d_sb); + struct nfs_client *client = server->nfs_client; + int timeout = READ_ONCE(nfs_mountpoint_expiry_timeout); + int ret; +diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c +index 7fba7711e6b3a..3d5ba43f44bb6 100644 +--- a/fs/nfs/nfs2xdr.c ++++ b/fs/nfs/nfs2xdr.c +@@ -949,7 +949,7 @@ int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, + + error = decode_filename_inline(xdr, &entry->name, &entry->len); + if (unlikely(error)) +- return error; ++ return -EAGAIN; + + /* + * The type (size and byte order) of nfscookie isn't defined in +diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c +index 5601e47360c28..b49359afac883 100644 +--- a/fs/nfs/nfs3client.c ++++ b/fs/nfs/nfs3client.c +@@ -108,7 +108,6 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, + if (mds_srv->flags & NFS_MOUNT_NORESVPORT) + __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); + +- __set_bit(NFS_CS_NOPING, &cl_init.init_flags); + __set_bit(NFS_CS_DS, &cl_init.init_flags); + + /* Use the MDS nfs_client cl_ipaddr. */ +diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c +index e6eca1d7481b8..7ab60ad98776f 100644 +--- a/fs/nfs/nfs3xdr.c ++++ b/fs/nfs/nfs3xdr.c +@@ -1967,7 +1967,6 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, + bool plus) + { + struct user_namespace *userns = rpc_userns(entry->server->client); +- struct nfs_entry old = *entry; + __be32 *p; + int error; + u64 new_cookie; +@@ -1987,15 +1986,15 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, + + error = decode_fileid3(xdr, &entry->ino); + if (unlikely(error)) +- return error; ++ return -EAGAIN; + + error = decode_inline_filename3(xdr, &entry->name, &entry->len); + if (unlikely(error)) +- return error; ++ return -EAGAIN; + + error = decode_cookie3(xdr, &new_cookie); + if (unlikely(error)) +- return error; ++ return -EAGAIN; + + entry->d_type = DT_UNKNOWN; + +@@ -2003,7 +2002,7 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, + entry->fattr->valid = 0; + error = decode_post_op_attr(xdr, entry->fattr, userns); + if (unlikely(error)) +- return error; ++ return -EAGAIN; + if (entry->fattr->valid & NFS_ATTR_FATTR_V3) + entry->d_type = nfs_umode_to_dtype(entry->fattr->mode); + +@@ -2018,11 +2017,8 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, + return -EAGAIN; + if (*p != xdr_zero) { + error = decode_nfs_fh3(xdr, entry->fh); +- if (unlikely(error)) { +- if (error == -E2BIG) +- goto out_truncated; +- return error; +- } ++ if (unlikely(error)) ++ return -EAGAIN; + } else + zero_nfs_fh3(entry->fh); + } +@@ -2031,11 +2027,6 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, + entry->cookie = new_cookie; + + return 0; +- +-out_truncated: +- dprintk("NFS: directory entry contains invalid file handle\n"); +- *entry = old; +- return -EAGAIN; + } + + /* +@@ -2227,7 +2218,8 @@ static int decode_fsinfo3resok(struct xdr_stream *xdr, + + /* ignore properties */ + result->lease_time = 0; +- result->change_attr_type = NFS4_CHANGE_TYPE_IS_TIME_METADATA; ++ result->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED; ++ result->xattr_support = 0; + return 0; + } + +diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c +index a24349512ffe9..da94bf2afd070 100644 +--- a/fs/nfs/nfs42proc.c ++++ b/fs/nfs/nfs42proc.c +@@ -285,7 +285,9 @@ static void nfs42_copy_dest_done(struct inode *inode, loff_t pos, loff_t len) + loff_t newsize = pos + len; + loff_t end = newsize - 1; + +- truncate_pagecache_range(inode, pos, end); ++ WARN_ON_ONCE(invalidate_inode_pages2_range(inode->i_mapping, ++ pos >> PAGE_SHIFT, end >> PAGE_SHIFT)); ++ + spin_lock(&inode->i_lock); + if (newsize > i_size_read(inode)) + i_size_write(inode, newsize); +@@ -584,8 +586,10 @@ static int _nfs42_proc_copy_notify(struct file *src, struct file *dst, + + ctx = get_nfs_open_context(nfs_file_open_context(src)); + l_ctx = nfs_get_lock_context(ctx); +- if (IS_ERR(l_ctx)) +- return PTR_ERR(l_ctx); ++ if (IS_ERR(l_ctx)) { ++ status = PTR_ERR(l_ctx); ++ goto out; ++ } + + status = nfs4_set_rw_stateid(&args->cna_src_stateid, ctx, l_ctx, + FMODE_READ); +@@ -593,7 +597,7 @@ static int _nfs42_proc_copy_notify(struct file *src, struct file *dst, + if (status) { + if (status == -EAGAIN) + status = -NFS4ERR_BAD_STATEID; +- return status; ++ goto out; + } + + status = nfs4_call_sync(src_server->client, src_server, &msg, +@@ -601,6 +605,7 @@ static int _nfs42_proc_copy_notify(struct file *src, struct file *dst, + if (status == -ENOTSUPP) + src_server->caps &= ~NFS_CAP_COPY_NOTIFY; + ++out: + put_nfs_open_context(nfs_file_open_context(src)); + return status; + } +@@ -1072,6 +1077,9 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f, + status = nfs4_call_sync(server->client, server, msg, + &args.seq_args, &res.seq_res, 0); + if (status == 0) { ++ /* a zero-length count means clone to EOF in src */ ++ if (count == 0 && res.dst_fattr->valid & NFS_ATTR_FATTR_SIZE) ++ count = nfs_size_to_loff_t(res.dst_fattr->size) - dst_offset; + nfs42_copy_dest_done(dst_inode, dst_offset, count); + status = nfs_post_op_update_inode(dst_inode, res.dst_fattr); + } +diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c +index c8bad735e4c19..271e5f92ed019 100644 +--- a/fs/nfs/nfs42xdr.c ++++ b/fs/nfs/nfs42xdr.c +@@ -1434,8 +1434,7 @@ static int nfs4_xdr_dec_clone(struct rpc_rqst *rqstp, + status = decode_clone(xdr); + if (status) + goto out; +- status = decode_getfattr(xdr, res->dst_fattr, res->server); +- ++ decode_getfattr(xdr, res->dst_fattr, res->server); + out: + res->rpc_status = status; + return status; +diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h +index ba78df4b13d94..f8672a34fd635 100644 +--- a/fs/nfs/nfs4_fs.h ++++ b/fs/nfs/nfs4_fs.h +@@ -261,8 +261,8 @@ struct nfs4_state_maintenance_ops { + }; + + struct nfs4_mig_recovery_ops { +- int (*get_locations)(struct inode *, struct nfs4_fs_locations *, +- struct page *, const struct cred *); ++ int (*get_locations)(struct nfs_server *, struct nfs_fh *, ++ struct nfs4_fs_locations *, struct page *, const struct cred *); + int (*fsid_present)(struct inode *, const struct cred *); + }; + +@@ -281,7 +281,8 @@ struct rpc_clnt *nfs4_negotiate_security(struct rpc_clnt *, struct inode *, + int nfs4_submount(struct fs_context *, struct nfs_server *); + int nfs4_replace_transport(struct nfs_server *server, + const struct nfs4_fs_locations *locations); +- ++size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr *sa, ++ size_t salen, struct net *net, int port); + /* nfs4proc.c */ + extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception *); + extern int nfs4_async_handle_error(struct rpc_task *task, +@@ -303,8 +304,9 @@ extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait); + extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); + extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struct qstr *, + struct nfs4_fs_locations *, struct page *); +-extern int nfs4_proc_get_locations(struct inode *, struct nfs4_fs_locations *, +- struct page *page, const struct cred *); ++extern int nfs4_proc_get_locations(struct nfs_server *, struct nfs_fh *, ++ struct nfs4_fs_locations *, ++ struct page *page, const struct cred *); + extern int nfs4_proc_fsid_present(struct inode *, const struct cred *); + extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, + struct dentry *, +diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c +index af57332503bed..1bf7a72ebda6e 100644 +--- a/fs/nfs/nfs4client.c ++++ b/fs/nfs/nfs4client.c +@@ -346,6 +346,7 @@ int nfs40_init_client(struct nfs_client *clp) + ret = nfs4_setup_slot_table(tbl, NFS4_MAX_SLOT_TABLE, + "NFSv4.0 transport Slot table"); + if (ret) { ++ nfs4_shutdown_slot_table(tbl); + kfree(tbl); + return ret; + } +@@ -1368,8 +1369,11 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname, + } + nfs_put_client(clp); + +- if (server->nfs_client->cl_hostname == NULL) ++ if (server->nfs_client->cl_hostname == NULL) { + server->nfs_client->cl_hostname = kstrdup(hostname, GFP_KERNEL); ++ if (server->nfs_client->cl_hostname == NULL) ++ return -ENOMEM; ++ } + nfs_server_insert_lists(server); + + return nfs_probe_destination(server); +diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c +index c91565227ea2a..14f2efdecc2f8 100644 +--- a/fs/nfs/nfs4file.c ++++ b/fs/nfs/nfs4file.c +@@ -32,6 +32,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) + struct dentry *parent = NULL; + struct inode *dir; + unsigned openflags = filp->f_flags; ++ fmode_t f_mode; + struct iattr attr; + int err; + +@@ -50,8 +51,9 @@ nfs4_file_open(struct inode *inode, struct file *filp) + if (err) + return err; + ++ f_mode = filp->f_mode; + if ((openflags & O_ACCMODE) == 3) +- return nfs_open(inode, filp); ++ f_mode |= flags_to_mode(openflags); + + /* We can't create new files here */ + openflags &= ~(O_CREAT|O_EXCL); +@@ -59,7 +61,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) + parent = dget_parent(dentry); + dir = d_inode(parent); + +- ctx = alloc_nfs_open_context(file_dentry(filp), filp->f_mode, filp); ++ ctx = alloc_nfs_open_context(file_dentry(filp), f_mode, filp); + err = PTR_ERR(ctx); + if (IS_ERR(ctx)) + goto out; +@@ -317,7 +319,7 @@ static int read_name_gen = 1; + static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, + struct nfs_fh *src_fh, nfs4_stateid *stateid) + { +- struct nfs_fattr fattr; ++ struct nfs_fattr *fattr = nfs_alloc_fattr(); + struct file *filep, *res; + struct nfs_server *server; + struct inode *r_ino = NULL; +@@ -328,14 +330,20 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, + + server = NFS_SERVER(ss_mnt->mnt_root->d_inode); + +- nfs_fattr_init(&fattr); ++ if (!fattr) ++ return ERR_PTR(-ENOMEM); + +- status = nfs4_proc_getattr(server, src_fh, &fattr, NULL, NULL); ++ status = nfs4_proc_getattr(server, src_fh, fattr, NULL, NULL); + if (status < 0) { + res = ERR_PTR(status); + goto out; + } + ++ if (!S_ISREG(fattr->mode)) { ++ res = ERR_PTR(-EBADF); ++ goto out; ++ } ++ + res = ERR_PTR(-ENOMEM); + len = strlen(SSC_READ_NAME_BODY) + 16; + read_name = kzalloc(len, GFP_NOFS); +@@ -343,7 +351,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, + goto out; + snprintf(read_name, len, SSC_READ_NAME_BODY, read_name_gen++); + +- r_ino = nfs_fhget(ss_mnt->mnt_root->d_inode->i_sb, src_fh, &fattr, ++ r_ino = nfs_fhget(ss_mnt->mnt_root->d_inode->i_sb, src_fh, fattr, + NULL); + if (IS_ERR(r_ino)) { + res = ERR_CAST(r_ino); +@@ -354,6 +362,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, + r_ino->i_fop); + if (IS_ERR(filep)) { + res = ERR_CAST(filep); ++ iput(r_ino); + goto out_free_name; + } + filep->f_mode |= FMODE_READ; +@@ -388,6 +397,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, + out_free_name: + kfree(read_name); + out: ++ nfs_free_fattr(fattr); + return res; + out_stateowner: + nfs4_put_state_owner(sp); +diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c +index 8d8aba305ecca..ec6afd3c4bca6 100644 +--- a/fs/nfs/nfs4idmap.c ++++ b/fs/nfs/nfs4idmap.c +@@ -487,7 +487,7 @@ nfs_idmap_new(struct nfs_client *clp) + err_destroy_pipe: + rpc_destroy_pipe_data(idmap->idmap_pipe); + err: +- get_user_ns(idmap->user_ns); ++ put_user_ns(idmap->user_ns); + kfree(idmap); + return error; + } +@@ -561,22 +561,20 @@ nfs_idmap_prepare_pipe_upcall(struct idmap *idmap, + return true; + } + +-static void +-nfs_idmap_complete_pipe_upcall_locked(struct idmap *idmap, int ret) ++static void nfs_idmap_complete_pipe_upcall(struct idmap_legacy_upcalldata *data, ++ int ret) + { +- struct key *authkey = idmap->idmap_upcall_data->authkey; +- +- kfree(idmap->idmap_upcall_data); +- idmap->idmap_upcall_data = NULL; +- complete_request_key(authkey, ret); +- key_put(authkey); ++ complete_request_key(data->authkey, ret); ++ key_put(data->authkey); ++ kfree(data); + } + +-static void +-nfs_idmap_abort_pipe_upcall(struct idmap *idmap, int ret) ++static void nfs_idmap_abort_pipe_upcall(struct idmap *idmap, ++ struct idmap_legacy_upcalldata *data, ++ int ret) + { +- if (idmap->idmap_upcall_data != NULL) +- nfs_idmap_complete_pipe_upcall_locked(idmap, ret); ++ if (cmpxchg(&idmap->idmap_upcall_data, data, NULL) == data) ++ nfs_idmap_complete_pipe_upcall(data, ret); + } + + static int nfs_idmap_legacy_upcall(struct key *authkey, void *aux) +@@ -613,7 +611,7 @@ static int nfs_idmap_legacy_upcall(struct key *authkey, void *aux) + + ret = rpc_queue_upcall(idmap->idmap_pipe, msg); + if (ret < 0) +- nfs_idmap_abort_pipe_upcall(idmap, ret); ++ nfs_idmap_abort_pipe_upcall(idmap, data, ret); + + return ret; + out2: +@@ -669,6 +667,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) + struct request_key_auth *rka; + struct rpc_inode *rpci = RPC_I(file_inode(filp)); + struct idmap *idmap = (struct idmap *)rpci->private; ++ struct idmap_legacy_upcalldata *data; + struct key *authkey; + struct idmap_msg im; + size_t namelen_in; +@@ -678,10 +677,11 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) + * will have been woken up and someone else may now have used + * idmap_key_cons - so after this point we may no longer touch it. + */ +- if (idmap->idmap_upcall_data == NULL) ++ data = xchg(&idmap->idmap_upcall_data, NULL); ++ if (data == NULL) + goto out_noupcall; + +- authkey = idmap->idmap_upcall_data->authkey; ++ authkey = data->authkey; + rka = get_request_key_auth(authkey); + + if (mlen != sizeof(im)) { +@@ -703,18 +703,17 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) + if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) { + ret = -EINVAL; + goto out; +-} ++ } + +- ret = nfs_idmap_read_and_verify_message(&im, +- &idmap->idmap_upcall_data->idmap_msg, +- rka->target_key, authkey); ++ ret = nfs_idmap_read_and_verify_message(&im, &data->idmap_msg, ++ rka->target_key, authkey); + if (ret >= 0) { + key_set_timeout(rka->target_key, nfs_idmap_cache_timeout); + ret = mlen; + } + + out: +- nfs_idmap_complete_pipe_upcall_locked(idmap, ret); ++ nfs_idmap_complete_pipe_upcall(data, ret); + out_noupcall: + return ret; + } +@@ -728,7 +727,7 @@ idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg) + struct idmap *idmap = data->idmap; + + if (msg->errno) +- nfs_idmap_abort_pipe_upcall(idmap, msg->errno); ++ nfs_idmap_abort_pipe_upcall(idmap, data, msg->errno); + } + + static void +@@ -736,8 +735,11 @@ idmap_release_pipe(struct inode *inode) + { + struct rpc_inode *rpci = RPC_I(inode); + struct idmap *idmap = (struct idmap *)rpci->private; ++ struct idmap_legacy_upcalldata *data; + +- nfs_idmap_abort_pipe_upcall(idmap, -EPIPE); ++ data = xchg(&idmap->idmap_upcall_data, NULL); ++ if (data) ++ nfs_idmap_complete_pipe_upcall(data, -EPIPE); + } + + int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, kuid_t *uid) +diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c +index 873342308dc0d..f2dbf904c5989 100644 +--- a/fs/nfs/nfs4namespace.c ++++ b/fs/nfs/nfs4namespace.c +@@ -164,16 +164,21 @@ static int nfs4_validate_fspath(struct dentry *dentry, + return 0; + } + +-static size_t nfs_parse_server_name(char *string, size_t len, +- struct sockaddr *sa, size_t salen, struct net *net) ++size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr *sa, ++ size_t salen, struct net *net, int port) + { + ssize_t ret; + + ret = rpc_pton(net, string, len, sa, salen); + if (ret == 0) { +- ret = nfs_dns_resolve_name(net, string, len, sa, salen); +- if (ret < 0) +- ret = 0; ++ ret = rpc_uaddr2sockaddr(net, string, len, sa, salen); ++ if (ret == 0) { ++ ret = nfs_dns_resolve_name(net, string, len, sa, salen); ++ if (ret < 0) ++ ret = 0; ++ } ++ } else if (port) { ++ rpc_set_port(sa, port); + } + return ret; + } +@@ -328,7 +333,7 @@ static int try_location(struct fs_context *fc, + nfs_parse_server_name(buf->data, buf->len, + &ctx->nfs_server.address, + sizeof(ctx->nfs_server._address), +- fc->net_ns); ++ fc->net_ns, 0); + if (ctx->nfs_server.addrlen == 0) + continue; + +@@ -412,6 +417,9 @@ static int nfs_do_refmount(struct fs_context *fc, struct rpc_clnt *client) + fs_locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL); + if (!fs_locations) + goto out_free; ++ fs_locations->fattr = nfs_alloc_fattr(); ++ if (!fs_locations->fattr) ++ goto out_free_2; + + /* Get locations */ + dentry = ctx->clone_data.dentry; +@@ -422,14 +430,16 @@ static int nfs_do_refmount(struct fs_context *fc, struct rpc_clnt *client) + err = nfs4_proc_fs_locations(client, d_inode(parent), &dentry->d_name, fs_locations, page); + dput(parent); + if (err != 0) +- goto out_free_2; ++ goto out_free_3; + + err = -ENOENT; + if (fs_locations->nlocations <= 0 || + fs_locations->fs_path.ncomponents <= 0) +- goto out_free_2; ++ goto out_free_3; + + err = nfs_follow_referral(fc, fs_locations); ++out_free_3: ++ kfree(fs_locations->fattr); + out_free_2: + kfree(fs_locations); + out_free: +@@ -496,7 +506,7 @@ static int nfs4_try_replacing_one_location(struct nfs_server *server, + continue; + + salen = nfs_parse_server_name(buf->data, buf->len, +- sap, addr_bufsize, net); ++ sap, addr_bufsize, net, 0); + if (salen == 0) + continue; + rpc_set_port(sap, NFS_PORT); +diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c +index e1214bb6b7ee5..b6b1fad031c78 100644 +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -126,6 +126,11 @@ nfs4_label_init_security(struct inode *dir, struct dentry *dentry, + if (nfs_server_capable(dir, NFS_CAP_SECURITY_LABEL) == 0) + return NULL; + ++ label->lfs = 0; ++ label->pi = 0; ++ label->len = 0; ++ label->label = NULL; ++ + err = security_dentry_init_security(dentry, sattr->ia_mode, + &dentry->d_name, (void **)&label->label, &label->len); + if (err == 0) +@@ -366,6 +371,14 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent + kunmap_atomic(start); + } + ++static void nfs4_fattr_set_prechange(struct nfs_fattr *fattr, u64 version) ++{ ++ if (!(fattr->valid & NFS_ATTR_FATTR_PRECHANGE)) { ++ fattr->pre_change_attr = version; ++ fattr->valid |= NFS_ATTR_FATTR_PRECHANGE; ++ } ++} ++ + static void nfs4_test_and_free_stateid(struct nfs_server *server, + nfs4_stateid *stateid, + const struct cred *cred) +@@ -779,10 +792,9 @@ static void nfs4_slot_sequence_record_sent(struct nfs4_slot *slot, + if ((s32)(seqnr - slot->seq_nr_highest_sent) > 0) + slot->seq_nr_highest_sent = seqnr; + } +-static void nfs4_slot_sequence_acked(struct nfs4_slot *slot, +- u32 seqnr) ++static void nfs4_slot_sequence_acked(struct nfs4_slot *slot, u32 seqnr) + { +- slot->seq_nr_highest_sent = seqnr; ++ nfs4_slot_sequence_record_sent(slot, seqnr); + slot->seq_nr_last_acked = seqnr; + } + +@@ -849,7 +861,6 @@ static int nfs41_sequence_process(struct rpc_task *task, + __func__, + slot->slot_nr, + slot->seq_nr); +- nfs4_slot_sequence_acked(slot, slot->seq_nr); + goto out_retry; + case -NFS4ERR_RETRY_UNCACHED_REP: + case -NFS4ERR_SEQ_FALSE_RETRY: +@@ -1157,7 +1168,7 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, + { + unsigned short task_flags = 0; + +- if (server->nfs_client->cl_minorversion) ++ if (server->caps & NFS_CAP_MOVEABLE) + task_flags = RPC_TASK_MOVEABLE; + return nfs4_do_call_sync(clnt, server, msg, args, res, task_flags); + } +@@ -1232,8 +1243,7 @@ nfs4_update_changeattr_locked(struct inode *inode, + NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | + NFS_INO_INVALID_SIZE | NFS_INO_INVALID_OTHER | + NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_NLINK | +- NFS_INO_INVALID_MODE | NFS_INO_INVALID_XATTR | +- NFS_INO_REVAL_PAGECACHE; ++ NFS_INO_INVALID_MODE | NFS_INO_INVALID_XATTR; + nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); + } + nfsi->attrtimeo_timestamp = jiffies; +@@ -1609,15 +1619,16 @@ static bool nfs_stateid_is_sequential(struct nfs4_state *state, + { + if (test_bit(NFS_OPEN_STATE, &state->flags)) { + /* The common case - we're updating to a new sequence number */ +- if (nfs4_stateid_match_other(stateid, &state->open_stateid) && +- nfs4_stateid_is_next(&state->open_stateid, stateid)) { +- return true; ++ if (nfs4_stateid_match_other(stateid, &state->open_stateid)) { ++ if (nfs4_stateid_is_next(&state->open_stateid, stateid)) ++ return true; ++ return false; + } +- } else { +- /* This is the first OPEN in this generation */ +- if (stateid->seqid == cpu_to_be32(1)) +- return true; ++ /* The server returned a new stateid */ + } ++ /* This is the first OPEN in this generation */ ++ if (stateid->seqid == cpu_to_be32(1)) ++ return true; + return false; + } + +@@ -2133,18 +2144,18 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context + } + + static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, +- fmode_t fmode) ++ fmode_t fmode) + { + struct nfs4_state *newstate; ++ struct nfs_server *server = NFS_SB(opendata->dentry->d_sb); ++ int openflags = opendata->o_arg.open_flags; + int ret; + + if (!nfs4_mode_match_open_stateid(opendata->state, fmode)) + return 0; +- opendata->o_arg.open_flags = 0; + opendata->o_arg.fmode = fmode; +- opendata->o_arg.share_access = nfs4_map_atomic_open_share( +- NFS_SB(opendata->dentry->d_sb), +- fmode, 0); ++ opendata->o_arg.share_access = ++ nfs4_map_atomic_open_share(server, fmode, openflags); + memset(&opendata->o_res, 0, sizeof(opendata->o_res)); + memset(&opendata->c_res, 0, sizeof(opendata->c_res)); + nfs4_init_opendata_res(opendata); +@@ -2570,7 +2581,7 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, + }; + int status; + +- if (server->nfs_client->cl_minorversion) ++ if (nfs_server_capable(dir, NFS_CAP_MOVEABLE)) + task_setup_data.flags |= RPC_TASK_MOVEABLE; + + kref_get(&data->kref); +@@ -2724,10 +2735,15 @@ static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *s + struct nfs4_opendata *opendata; + int ret; + +- opendata = nfs4_open_recoverdata_alloc(ctx, state, +- NFS4_OPEN_CLAIM_FH); ++ opendata = nfs4_open_recoverdata_alloc(ctx, state, NFS4_OPEN_CLAIM_FH); + if (IS_ERR(opendata)) + return PTR_ERR(opendata); ++ /* ++ * We're not recovering a delegation, so ask for no delegation. ++ * Otherwise the recovery thread could deadlock with an outstanding ++ * delegation return. ++ */ ++ opendata->o_arg.open_flags = O_DIRECT; + ret = nfs4_open_recover(opendata, state); + if (ret == -ESTALE) + d_drop(ctx->dentry); +@@ -3100,8 +3116,13 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, + } + + out: +- if (!opendata->cancelled) ++ if (!opendata->cancelled) { ++ if (opendata->lgp) { ++ nfs4_lgopen_release(opendata->lgp); ++ opendata->lgp = NULL; ++ } + nfs4_sequence_free_slot(&opendata->o_res.seq_res); ++ } + return ret; + } + +@@ -3753,7 +3774,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) + }; + int status = -ENOMEM; + +- if (server->nfs_client->cl_minorversion) ++ if (nfs_server_capable(state->inode, NFS_CAP_MOVEABLE)) + task_setup_data.flags |= RPC_TASK_MOVEABLE; + + nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_CLEANUP, +@@ -3812,7 +3833,7 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, + int open_flags, struct iattr *attr, int *opened) + { + struct nfs4_state *state; +- struct nfs4_label l = {0, 0, 0, NULL}, *label = NULL; ++ struct nfs4_label l, *label; + + label = nfs4_label_init_security(dir, ctx->dentry, attr, &l); + +@@ -3893,6 +3914,8 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f + if (res.attr_bitmask[2] & FATTR4_WORD2_SECURITY_LABEL) + server->caps |= NFS_CAP_SECURITY_LABEL; + #endif ++ if (res.attr_bitmask[0] & FATTR4_WORD0_FS_LOCATIONS) ++ server->caps |= NFS_CAP_FS_LOCATIONS; + if (!(res.attr_bitmask[0] & FATTR4_WORD0_FILEID)) + server->fattr_valid &= ~NFS_ATTR_FATTR_FILEID; + if (!(res.attr_bitmask[1] & FATTR4_WORD1_MODE)) +@@ -3949,6 +3972,67 @@ int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) + return err; + } + ++static int _nfs4_discover_trunking(struct nfs_server *server, ++ struct nfs_fh *fhandle) ++{ ++ struct nfs4_fs_locations *locations = NULL; ++ struct page *page; ++ const struct cred *cred; ++ struct nfs_client *clp = server->nfs_client; ++ const struct nfs4_state_maintenance_ops *ops = ++ clp->cl_mvops->state_renewal_ops; ++ int status = -ENOMEM; ++ ++ cred = ops->get_state_renewal_cred(clp); ++ if (cred == NULL) { ++ cred = nfs4_get_clid_cred(clp); ++ if (cred == NULL) ++ return -ENOKEY; ++ } ++ ++ page = alloc_page(GFP_KERNEL); ++ if (!page) ++ goto out_put_cred; ++ locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL); ++ if (!locations) ++ goto out_free; ++ locations->fattr = nfs_alloc_fattr(); ++ if (!locations->fattr) ++ goto out_free_2; ++ ++ status = nfs4_proc_get_locations(server, fhandle, locations, page, ++ cred); ++ ++ kfree(locations->fattr); ++out_free_2: ++ kfree(locations); ++out_free: ++ __free_page(page); ++out_put_cred: ++ put_cred(cred); ++ return status; ++} ++ ++static int nfs4_discover_trunking(struct nfs_server *server, ++ struct nfs_fh *fhandle) ++{ ++ struct nfs4_exception exception = { ++ .interruptible = true, ++ }; ++ struct nfs_client *clp = server->nfs_client; ++ int err = 0; ++ ++ if (!nfs4_has_session(clp)) ++ goto out; ++ do { ++ err = nfs4_handle_exception(server, ++ _nfs4_discover_trunking(server, fhandle), ++ &exception); ++ } while (exception.retry); ++out: ++ return err; ++} ++ + static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) + { +@@ -4146,6 +4230,8 @@ static int nfs4_get_referral(struct rpc_clnt *client, struct inode *dir, + if (locations == NULL) + goto out; + ++ locations->fattr = fattr; ++ + status = nfs4_proc_fs_locations(client, dir, name, locations, page); + if (status != 0) + goto out; +@@ -4155,17 +4241,14 @@ static int nfs4_get_referral(struct rpc_clnt *client, struct inode *dir, + * referral. Cause us to drop into the exception handler, which + * will kick off migration recovery. + */ +- if (nfs_fsid_equal(&NFS_SERVER(dir)->fsid, &locations->fattr.fsid)) { ++ if (nfs_fsid_equal(&NFS_SERVER(dir)->fsid, &fattr->fsid)) { + dprintk("%s: server did not return a different fsid for" + " a referral at %s\n", __func__, name->name); + status = -NFS4ERR_MOVED; + goto out; + } + /* Fixup attributes for the nfs_lookup() call to nfs_fhget() */ +- nfs_fixup_referral_attributes(&locations->fattr); +- +- /* replace the lookup nfs_fattr with the locations nfs_fattr */ +- memcpy(fattr, &locations->fattr, sizeof(struct nfs_fattr)); ++ nfs_fixup_referral_attributes(fattr); + memset(fhandle, 0, sizeof(struct nfs_fh)); + out: + if (page) +@@ -4317,7 +4400,7 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, + }; + unsigned short task_flags = 0; + +- if (server->nfs_client->cl_minorversion) ++ if (nfs_server_capable(dir, NFS_CAP_MOVEABLE)) + task_flags = RPC_TASK_MOVEABLE; + + /* Is this is an attribute revalidation, subject to softreval? */ +@@ -4586,7 +4669,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, + int flags) + { + struct nfs_server *server = NFS_SERVER(dir); +- struct nfs4_label l, *ilabel = NULL; ++ struct nfs4_label l, *ilabel; + struct nfs_open_context *ctx; + struct nfs4_state *state; + int status = 0; +@@ -4946,7 +5029,7 @@ static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, + struct nfs4_exception exception = { + .interruptible = true, + }; +- struct nfs4_label l, *label = NULL; ++ struct nfs4_label l, *label; + int err; + + label = nfs4_label_init_security(dir, dentry, sattr, &l); +@@ -4987,7 +5070,7 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, + struct nfs4_exception exception = { + .interruptible = true, + }; +- struct nfs4_label l, *label = NULL; ++ struct nfs4_label l, *label; + int err; + + label = nfs4_label_init_security(dir, dentry, sattr, &l); +@@ -5106,7 +5189,7 @@ static int nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, + struct nfs4_exception exception = { + .interruptible = true, + }; +- struct nfs4_label l, *label = NULL; ++ struct nfs4_label l, *label; + int err; + + label = nfs4_label_init_security(dir, dentry, sattr, &l); +@@ -5836,7 +5919,7 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu + buflen = server->rsize; + + npages = DIV_ROUND_UP(buflen, PAGE_SIZE) + 1; +- pages = kmalloc_array(npages, sizeof(struct page *), GFP_NOFS); ++ pages = kmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); + if (!pages) + return -ENOMEM; + +@@ -6502,7 +6585,9 @@ static void nfs4_delegreturn_release(void *calldata) + pnfs_roc_release(&data->lr.arg, &data->lr.res, + data->res.lr_ret); + if (inode) { +- nfs_post_op_update_inode_force_wcc(inode, &data->fattr); ++ nfs4_fattr_set_prechange(&data->fattr, ++ inode_peek_iversion_raw(inode)); ++ nfs_refresh_inode(inode, &data->fattr); + nfs_iput_and_deactive(inode); + } + kfree(calldata); +@@ -6551,11 +6636,14 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, + .rpc_client = server->client, + .rpc_message = &msg, + .callback_ops = &nfs4_delegreturn_ops, +- .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT | RPC_TASK_MOVEABLE, ++ .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT, + }; + int status = 0; + +- data = kzalloc(sizeof(*data), GFP_NOFS); ++ if (nfs_server_capable(inode, NFS_CAP_MOVEABLE)) ++ task_setup_data.flags |= RPC_TASK_MOVEABLE; ++ ++ data = kzalloc(sizeof(*data), GFP_KERNEL); + if (data == NULL) + return -ENOMEM; + +@@ -6744,7 +6832,7 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, + struct nfs4_state *state = lsp->ls_state; + struct inode *inode = state->inode; + +- p = kzalloc(sizeof(*p), GFP_NOFS); ++ p = kzalloc(sizeof(*p), GFP_KERNEL); + if (p == NULL) + return NULL; + p->arg.fh = NFS_FH(inode); +@@ -6869,10 +6957,8 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, + .workqueue = nfsiod_workqueue, + .flags = RPC_TASK_ASYNC, + }; +- struct nfs_client *client = +- NFS_SERVER(lsp->ls_state->inode)->nfs_client; + +- if (client->cl_minorversion) ++ if (nfs_server_capable(lsp->ls_state->inode, NFS_CAP_MOVEABLE)) + task_setup_data.flags |= RPC_TASK_MOVEABLE; + + nfs4_state_protect(NFS_SERVER(lsp->ls_state->inode)->nfs_client, +@@ -7044,6 +7130,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) + { + struct nfs4_lockdata *data = calldata; + struct nfs4_lock_state *lsp = data->lsp; ++ struct nfs_server *server = NFS_SERVER(d_inode(data->ctx->dentry)); + + dprintk("%s: begin!\n", __func__); + +@@ -7053,8 +7140,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) + data->rpc_status = task->tk_status; + switch (task->tk_status) { + case 0: +- renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)), +- data->timestamp); ++ renew_lease(server, data->timestamp); + if (data->arg.new_lock && !data->cancelled) { + data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS); + if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0) +@@ -7075,6 +7161,8 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) + if (!nfs4_stateid_match(&data->arg.open_stateid, + &lsp->ls_state->open_stateid)) + goto out_restart; ++ else if (nfs4_async_handle_error(task, server, lsp->ls_state, NULL) == -EAGAIN) ++ goto out_restart; + } else if (!nfs4_stateid_match(&data->arg.lock_stateid, + &lsp->ls_stateid)) + goto out_restart; +@@ -7148,15 +7236,13 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f + .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF, + }; + int ret; +- struct nfs_client *client = NFS_SERVER(state->inode)->nfs_client; + +- if (client->cl_minorversion) + if (nfs_server_capable(state->inode, NFS_CAP_MOVEABLE)) task_setup_data.flags |= RPC_TASK_MOVEABLE; - nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_CLEANUP, -@@ -3893,6 +3904,8 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f - if (res.attr_bitmask[2] & FATTR4_WORD2_SECURITY_LABEL) - server->caps |= NFS_CAP_SECURITY_LABEL; + dprintk("%s: begin!\n", __func__); + data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file), +- fl->fl_u.nfs4_fl.owner, +- recovery_type == NFS_LOCK_NEW ? GFP_KERNEL : GFP_NOFS); ++ fl->fl_u.nfs4_fl.owner, GFP_KERNEL); + if (data == NULL) + return -ENOMEM; + if (IS_SETLKW(cmd)) +@@ -7579,7 +7665,7 @@ nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp) + if (server->nfs_client->cl_mvops->minor_version != 0) + return; + +- data = kmalloc(sizeof(*data), GFP_NOFS); ++ data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return; + data->lsp = lsp; +@@ -7676,7 +7762,7 @@ static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler, + const char *key, const void *buf, + size_t buflen, int flags) + { +- struct nfs_access_entry cache; ++ u32 mask; + int ret; + + if (!nfs_server_capable(inode, NFS_CAP_XATTR)) +@@ -7691,8 +7777,8 @@ static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler, + * do a cached access check for the XA* flags to possibly avoid + * doing an RPC and getting EACCES back. + */ +- if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) { +- if (!(cache.mask & NFS_ACCESS_XAWRITE)) ++ if (!nfs_access_get_cached(inode, current_cred(), &mask, true)) { ++ if (!(mask & NFS_ACCESS_XAWRITE)) + return -EACCES; + } + +@@ -7713,14 +7799,14 @@ static int nfs4_xattr_get_nfs4_user(const struct xattr_handler *handler, + struct dentry *unused, struct inode *inode, + const char *key, void *buf, size_t buflen) + { +- struct nfs_access_entry cache; ++ u32 mask; + ssize_t ret; + + if (!nfs_server_capable(inode, NFS_CAP_XATTR)) + return -EOPNOTSUPP; + +- if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) { +- if (!(cache.mask & NFS_ACCESS_XAREAD)) ++ if (!nfs_access_get_cached(inode, current_cred(), &mask, true)) { ++ if (!(mask & NFS_ACCESS_XAREAD)) + return -EACCES; + } + +@@ -7745,13 +7831,13 @@ nfs4_listxattr_nfs4_user(struct inode *inode, char *list, size_t list_len) + ssize_t ret, size; + char *buf; + size_t buflen; +- struct nfs_access_entry cache; ++ u32 mask; + + if (!nfs_server_capable(inode, NFS_CAP_XATTR)) + return 0; + +- if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) { +- if (!(cache.mask & NFS_ACCESS_XALIST)) ++ if (!nfs_access_get_cached(inode, current_cred(), &mask, true)) { ++ if (!(mask & NFS_ACCESS_XALIST)) + return 0; + } + +@@ -7849,7 +7935,7 @@ static int _nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir, + else + bitmask[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; + +- nfs_fattr_init(&fs_locations->fattr); ++ nfs_fattr_init(fs_locations->fattr); + fs_locations->server = server; + fs_locations->nlocations = 0; + status = nfs4_call_sync(client, server, &msg, &args.seq_args, &res.seq_res, 0); +@@ -7883,18 +7969,18 @@ int nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir, + * appended to this compound to identify the client ID which is + * performing recovery. + */ +-static int _nfs40_proc_get_locations(struct inode *inode, ++static int _nfs40_proc_get_locations(struct nfs_server *server, ++ struct nfs_fh *fhandle, + struct nfs4_fs_locations *locations, + struct page *page, const struct cred *cred) + { +- struct nfs_server *server = NFS_SERVER(inode); + struct rpc_clnt *clnt = server->client; + u32 bitmask[2] = { + [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS, + }; + struct nfs4_fs_locations_arg args = { + .clientid = server->nfs_client->cl_clientid, +- .fh = NFS_FH(inode), ++ .fh = fhandle, + .page = page, + .bitmask = bitmask, + .migration = 1, /* skip LOOKUP */ +@@ -7914,7 +8000,7 @@ static int _nfs40_proc_get_locations(struct inode *inode, + unsigned long now = jiffies; + int status; + +- nfs_fattr_init(&locations->fattr); ++ nfs_fattr_init(locations->fattr); + locations->server = server; + locations->nlocations = 0; + +@@ -7940,17 +8026,17 @@ static int _nfs40_proc_get_locations(struct inode *inode, + * When the client supports GETATTR(fs_locations_info), it can + * be plumbed in here. + */ +-static int _nfs41_proc_get_locations(struct inode *inode, ++static int _nfs41_proc_get_locations(struct nfs_server *server, ++ struct nfs_fh *fhandle, + struct nfs4_fs_locations *locations, + struct page *page, const struct cred *cred) + { +- struct nfs_server *server = NFS_SERVER(inode); + struct rpc_clnt *clnt = server->client; + u32 bitmask[2] = { + [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS, + }; + struct nfs4_fs_locations_arg args = { +- .fh = NFS_FH(inode), ++ .fh = fhandle, + .page = page, + .bitmask = bitmask, + .migration = 1, /* skip LOOKUP */ +@@ -7967,7 +8053,7 @@ static int _nfs41_proc_get_locations(struct inode *inode, + }; + int status; + +- nfs_fattr_init(&locations->fattr); ++ nfs_fattr_init(locations->fattr); + locations->server = server; + locations->nlocations = 0; + +@@ -7999,11 +8085,11 @@ static int _nfs41_proc_get_locations(struct inode *inode, + * -NFS4ERR_LEASE_MOVED is returned if the server still has leases + * from this client that require migration recovery. + */ +-int nfs4_proc_get_locations(struct inode *inode, ++int nfs4_proc_get_locations(struct nfs_server *server, ++ struct nfs_fh *fhandle, + struct nfs4_fs_locations *locations, + struct page *page, const struct cred *cred) + { +- struct nfs_server *server = NFS_SERVER(inode); + struct nfs_client *clp = server->nfs_client; + const struct nfs4_mig_recovery_ops *ops = + clp->cl_mvops->mig_recovery_ops; +@@ -8016,10 +8102,11 @@ int nfs4_proc_get_locations(struct inode *inode, + (unsigned long long)server->fsid.major, + (unsigned long long)server->fsid.minor, + clp->cl_hostname); +- nfs_display_fhandle(NFS_FH(inode), __func__); ++ nfs_display_fhandle(fhandle, __func__); + + do { +- status = ops->get_locations(inode, locations, page, cred); ++ status = ops->get_locations(server, fhandle, locations, page, ++ cred); + if (status != -NFS4ERR_DELAY) + break; + nfs4_handle_exception(server, status, &exception); +@@ -8284,6 +8371,7 @@ nfs4_bind_one_conn_to_session_done(struct rpc_task *task, void *calldata) + case -NFS4ERR_DEADSESSION: + nfs4_schedule_session_recovery(clp->cl_session, + task->tk_status); ++ return; + } + if (args->dir == NFS4_CDFC4_FORE_OR_BOTH && + res->dir != NFS4_CDFS4_BOTH) { +@@ -9254,7 +9342,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, + goto out_err; + + ret = ERR_PTR(-ENOMEM); +- calldata = kzalloc(sizeof(*calldata), GFP_NOFS); ++ calldata = kzalloc(sizeof(*calldata), GFP_KERNEL); + if (calldata == NULL) + goto out_put_clp; + nfs4_init_sequence(&calldata->args, &calldata->res, 0, is_privileged); +@@ -9339,6 +9427,9 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf + rpc_delay(task, NFS4_POLL_RETRY_MAX); + fallthrough; + case -NFS4ERR_RETRY_UNCACHED_REP: ++ case -EACCES: ++ dprintk("%s: failed to reclaim complete error %d for server %s, retrying\n", ++ __func__, task->tk_status, clp->cl_hostname); + return -EAGAIN; + case -NFS4ERR_BADSESSION: + case -NFS4ERR_DEADSESSION: +@@ -10198,7 +10289,7 @@ static int nfs41_free_stateid(struct nfs_server *server, + &task_setup.rpc_client, &msg); + + dprintk("NFS call free_stateid %p\n", stateid); +- data = kmalloc(sizeof(*data), GFP_NOFS); ++ data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + data->server = server; +@@ -10347,7 +10438,8 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { + | NFS_CAP_POSIX_LOCK + | NFS_CAP_STATEID_NFSV41 + | NFS_CAP_ATOMIC_OPEN_V1 +- | NFS_CAP_LGOPEN, ++ | NFS_CAP_LGOPEN ++ | NFS_CAP_MOVEABLE, + .init_client = nfs41_init_client, + .shutdown_client = nfs41_shutdown_client, + .match_stateid = nfs41_match_stateid, +@@ -10382,7 +10474,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { + | NFS_CAP_LAYOUTSTATS + | NFS_CAP_CLONE + | NFS_CAP_LAYOUTERROR +- | NFS_CAP_READ_PLUS, ++ | NFS_CAP_READ_PLUS ++ | NFS_CAP_MOVEABLE, + .init_client = nfs41_init_client, + .shutdown_client = nfs41_shutdown_client, + .match_stateid = nfs41_match_stateid, +@@ -10513,6 +10606,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { + .free_client = nfs4_free_client, + .create_server = nfs4_create_server, + .clone_server = nfs_clone_server, ++ .discover_trunking = nfs4_discover_trunking, + }; + + static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { +diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c +index f22818a80c2c7..0cd803b4d90ce 100644 +--- a/fs/nfs/nfs4state.c ++++ b/fs/nfs/nfs4state.c +@@ -49,6 +49,7 @@ + #include <linux/workqueue.h> + #include <linux/bitops.h> + #include <linux/jiffies.h> ++#include <linux/sched/mm.h> + + #include <linux/sunrpc/clnt.h> + +@@ -820,7 +821,7 @@ static void __nfs4_close(struct nfs4_state *state, + + void nfs4_close_state(struct nfs4_state *state, fmode_t fmode) + { +- __nfs4_close(state, fmode, GFP_NOFS, 0); ++ __nfs4_close(state, fmode, GFP_KERNEL, 0); + } + + void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode) +@@ -1226,6 +1227,8 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) + if (IS_ERR(task)) { + printk(KERN_ERR "%s: kthread_run: %ld\n", + __func__, PTR_ERR(task)); ++ if (!nfs_client_init_is_complete(clp)) ++ nfs_mark_client_ready(clp, PTR_ERR(task)); + nfs4_clear_state_manager_bit(clp); + nfs_put_client(clp); + module_put(THIS_MODULE); +@@ -1777,6 +1780,7 @@ static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp, + + static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp) + { ++ set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); + /* Mark all delegations for reclaim */ + nfs_delegation_mark_reclaim(clp); + nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_reboot); +@@ -2095,9 +2099,15 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred + dprintk("<-- %s: no memory\n", __func__); + goto out; + } ++ locations->fattr = nfs_alloc_fattr(); ++ if (locations->fattr == NULL) { ++ dprintk("<-- %s: no memory\n", __func__); ++ goto out; ++ } + + inode = d_inode(server->super->s_root); +- result = nfs4_proc_get_locations(inode, locations, page, cred); ++ result = nfs4_proc_get_locations(server, NFS_FH(inode), locations, ++ page, cred); + if (result) { + dprintk("<-- %s: failed to retrieve fs_locations: %d\n", + __func__, result); +@@ -2105,7 +2115,10 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred + } + + result = -NFS4ERR_NXIO; +- if (!(locations->fattr.valid & NFS_ATTR_FATTR_V4_LOCATIONS)) { ++ if (!locations->nlocations) ++ goto out; ++ ++ if (!(locations->fattr->valid & NFS_ATTR_FATTR_V4_LOCATIONS)) { + dprintk("<-- %s: No fs_locations data, migration skipped\n", + __func__); + goto out; +@@ -2130,6 +2143,8 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred + out: + if (page != NULL) + __free_page(page); ++ if (locations != NULL) ++ kfree(locations->fattr); + kfree(locations); + if (result) { + pr_err("NFS: migration recovery failed (server %s)\n", +@@ -2555,9 +2570,17 @@ static void nfs4_layoutreturn_any_run(struct nfs_client *clp) + + static void nfs4_state_manager(struct nfs_client *clp) + { ++ unsigned int memflags; + int status = 0; + const char *section = "", *section_sep = ""; + ++ /* ++ * State recovery can deadlock if the direct reclaim code tries ++ * start NFS writeback. So ensure memory allocations are all ++ * GFP_NOFS. ++ */ ++ memflags = memalloc_nofs_save(); ++ + /* Ensure exclusive access to NFSv4 state */ + do { + trace_nfs4_state_mgr(clp); +@@ -2631,6 +2654,7 @@ static void nfs4_state_manager(struct nfs_client *clp) + if (status < 0) + goto out_error; + nfs4_state_end_reclaim_reboot(clp); ++ continue; + } + + /* Detect expired delegations... */ +@@ -2652,6 +2676,7 @@ static void nfs4_state_manager(struct nfs_client *clp) + clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state); + } + ++ memalloc_nofs_restore(memflags); + nfs4_end_drain_session(clp); + nfs4_clear_state_manager_bit(clp); + +@@ -2669,6 +2694,7 @@ static void nfs4_state_manager(struct nfs_client *clp) + return; + if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) + return; ++ memflags = memalloc_nofs_save(); + } while (refcount_read(&clp->cl_count) > 1 && !signalled()); + goto out_drain; + +@@ -2681,6 +2707,7 @@ out_error: + clp->cl_hostname, -status); + ssleep(1); + out_drain: ++ memalloc_nofs_restore(memflags); + nfs4_end_drain_session(clp); + nfs4_clear_state_manager_bit(clp); + } +diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c +index a8cff19c6f00c..0ae9e06a0bba2 100644 +--- a/fs/nfs/nfs4xdr.c ++++ b/fs/nfs/nfs4xdr.c +@@ -3693,8 +3693,6 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st + if (unlikely(!p)) + goto out_eio; + n = be32_to_cpup(p); +- if (n <= 0) +- goto out_eio; + for (res->nlocations = 0; res->nlocations < n; res->nlocations++) { + u32 m; + struct nfs4_fs_location *loc; +@@ -4181,26 +4179,25 @@ static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap, + p = xdr_inline_decode(xdr, len); + if (unlikely(!p)) + return -EIO; ++ bitmap[2] &= ~FATTR4_WORD2_SECURITY_LABEL; + if (len < NFS4_MAXLABELLEN) { +- if (label) { +- if (label->len) { +- if (label->len < len) +- return -ERANGE; +- memcpy(label->label, p, len); +- } ++ if (label && label->len) { ++ if (label->len < len) ++ return -ERANGE; ++ memcpy(label->label, p, len); + label->len = len; + label->pi = pi; + label->lfs = lfs; + status = NFS_ATTR_FATTR_V4_SECURITY_LABEL; + } +- bitmap[2] &= ~FATTR4_WORD2_SECURITY_LABEL; + } else + printk(KERN_WARNING "%s: label too long (%u)!\n", + __func__, len); ++ if (label && label->label) ++ dprintk("%s: label=%.*s, len=%d, PI=%d, LFS=%d\n", ++ __func__, label->len, (char *)label->label, ++ label->len, label->pi, label->lfs); + } +- if (label && label->label) +- dprintk("%s: label=%s, len=%d, PI=%d, LFS=%d\n", __func__, +- (char *)label->label, label->len, label->pi, label->lfs); + return status; + } + +@@ -7029,7 +7026,7 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, + if (res->migration) { + xdr_enter_page(xdr, PAGE_SIZE); + status = decode_getfattr_generic(xdr, +- &res->fs_locations->fattr, ++ res->fs_locations->fattr, + NULL, res->fs_locations, + NULL, res->fs_locations->server); + if (status) +@@ -7042,7 +7039,7 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, + goto out; + xdr_enter_page(xdr, PAGE_SIZE); + status = decode_getfattr_generic(xdr, +- &res->fs_locations->fattr, ++ res->fs_locations->fattr, + NULL, res->fs_locations, + NULL, res->fs_locations->server); + } +diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c +index cc232d1f16f2f..fdecf729fa92b 100644 +--- a/fs/nfs/pagelist.c ++++ b/fs/nfs/pagelist.c +@@ -90,10 +90,10 @@ void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos) + } + } + +-static inline struct nfs_page * +-nfs_page_alloc(void) ++static inline struct nfs_page *nfs_page_alloc(void) + { +- struct nfs_page *p = kmem_cache_zalloc(nfs_page_cachep, GFP_KERNEL); ++ struct nfs_page *p = ++ kmem_cache_zalloc(nfs_page_cachep, nfs_io_gfp_mask()); + if (p) + INIT_LIST_HEAD(&p->wb_list); + return p; +@@ -773,6 +773,9 @@ int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, + .flags = RPC_TASK_ASYNC | flags, + }; + ++ if (nfs_server_capable(hdr->inode, NFS_CAP_MOVEABLE)) ++ task_setup_data.flags |= RPC_TASK_MOVEABLE; ++ + hdr->rw_ops->rw_initiate(hdr, &msg, rpc_ops, &task_setup_data, how); + + dprintk("NFS: initiated pgio call " +@@ -901,7 +904,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, + struct nfs_commit_info cinfo; + struct nfs_page_array *pg_array = &hdr->page_array; + unsigned int pagecount, pageused; +- gfp_t gfp_flags = GFP_KERNEL; ++ gfp_t gfp_flags = nfs_io_gfp_mask(); + + pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count); + pg_array->npages = pagecount; +@@ -988,7 +991,7 @@ nfs_pageio_alloc_mirrors(struct nfs_pageio_descriptor *desc, + desc->pg_mirrors_dynamic = NULL; + if (mirror_count == 1) + return desc->pg_mirrors_static; +- ret = kmalloc_array(mirror_count, sizeof(*ret), GFP_KERNEL); ++ ret = kmalloc_array(mirror_count, sizeof(*ret), nfs_io_gfp_mask()); + if (ret != NULL) { + for (i = 0; i < mirror_count; i++) + nfs_pageio_mirror_init(&ret[i], desc->pg_bsize); +@@ -1227,6 +1230,7 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) + + do { + list_splice_init(&mirror->pg_list, &head); ++ mirror->pg_recoalesce = 0; + + while (!list_empty(&head)) { + struct nfs_page *req; +diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c +index 7c9090a28e5c3..7217f3eeb0692 100644 +--- a/fs/nfs/pnfs.c ++++ b/fs/nfs/pnfs.c +@@ -92,6 +92,17 @@ find_pnfs_driver(u32 id) + return local; + } + ++const struct pnfs_layoutdriver_type *pnfs_find_layoutdriver(u32 id) ++{ ++ return find_pnfs_driver(id); ++} ++ ++void pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type *ld) ++{ ++ if (ld) ++ module_put(ld->owner); ++} ++ + void + unset_pnfs_layoutdriver(struct nfs_server *nfss) + { +@@ -458,6 +469,7 @@ pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, + pnfs_clear_lseg_state(lseg, lseg_list); + pnfs_clear_layoutreturn_info(lo); + pnfs_free_returned_lsegs(lo, lseg_list, &range, 0); ++ set_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags); + if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) && + !test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) + pnfs_clear_layoutreturn_waitbit(lo); +@@ -1233,7 +1245,7 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, + int status = 0; + + *pcred = NULL; +- lrp = kzalloc(sizeof(*lrp), GFP_NOFS); ++ lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); + if (unlikely(lrp == NULL)) { + status = -ENOMEM; + spin_lock(&ino->i_lock); +@@ -1906,8 +1918,9 @@ static void nfs_layoutget_begin(struct pnfs_layout_hdr *lo) + + static void nfs_layoutget_end(struct pnfs_layout_hdr *lo) + { +- if (atomic_dec_and_test(&lo->plh_outstanding)) +- wake_up_var(&lo->plh_outstanding); ++ if (atomic_dec_and_test(&lo->plh_outstanding) && ++ test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags)) ++ wake_up_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN); + } + + static bool pnfs_is_first_layoutget(struct pnfs_layout_hdr *lo) +@@ -1989,6 +2002,7 @@ lookup_again: + lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); + if (lo == NULL) { + spin_unlock(&ino->i_lock); ++ lseg = ERR_PTR(-ENOMEM); + trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, + PNFS_UPDATE_LAYOUT_NOMEM); + goto out; +@@ -2013,11 +2027,11 @@ lookup_again: + * If the layout segment list is empty, but there are outstanding + * layoutget calls, then they might be subject to a layoutrecall. + */ +- if ((list_empty(&lo->plh_segs) || !pnfs_layout_is_valid(lo)) && ++ if (test_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags) && + atomic_read(&lo->plh_outstanding) != 0) { + spin_unlock(&ino->i_lock); +- lseg = ERR_PTR(wait_var_event_killable(&lo->plh_outstanding, +- !atomic_read(&lo->plh_outstanding))); ++ lseg = ERR_PTR(wait_on_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN, ++ TASK_KILLABLE)); + if (IS_ERR(lseg)) + goto out_put_layout_hdr; + pnfs_put_layout_hdr(lo); +@@ -2117,6 +2131,7 @@ lookup_again: + + lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &stateid, &arg, gfp_flags); + if (!lgp) { ++ lseg = ERR_PTR(-ENOMEM); + trace_pnfs_update_layout(ino, pos, count, iomode, lo, NULL, + PNFS_UPDATE_LAYOUT_NOMEM); + nfs_layoutget_end(lo); +@@ -2139,6 +2154,12 @@ lookup_again: + case -ERECALLCONFLICT: + case -EAGAIN: + break; ++ case -ENODATA: ++ /* The server returned NFS4ERR_LAYOUTUNAVAILABLE */ ++ pnfs_layout_set_fail_bit( ++ lo, pnfs_iomode_to_fail_bit(iomode)); ++ lseg = NULL; ++ goto out_put_layout_hdr; + default: + if (!nfs_error_is_fatal(PTR_ERR(lseg))) { + pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); +@@ -2394,7 +2415,8 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) + goto out_forget; + } + +- if (!pnfs_layout_is_valid(lo) && !pnfs_is_first_layoutget(lo)) ++ if (test_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags) && ++ !pnfs_is_first_layoutget(lo)) + goto out_forget; + + if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) { +@@ -3250,7 +3272,7 @@ struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) + { + struct nfs4_threshold *thp; + +- thp = kzalloc(sizeof(*thp), GFP_NOFS); ++ thp = kzalloc(sizeof(*thp), GFP_KERNEL); + if (!thp) { + dprintk("%s mdsthreshold allocation failed\n", __func__); + return NULL; +diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h +index d810ae674f4e8..3307361c79560 100644 +--- a/fs/nfs/pnfs.h ++++ b/fs/nfs/pnfs.h +@@ -109,6 +109,7 @@ enum { + NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */ + NFS_LAYOUT_INODE_FREEING, /* The inode is being freed */ + NFS_LAYOUT_HASHED, /* The layout visible */ ++ NFS_LAYOUT_DRAIN, + }; + + enum layoutdriver_policy_flags { +@@ -238,6 +239,8 @@ struct pnfs_devicelist { + + extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); + extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); ++extern const struct pnfs_layoutdriver_type *pnfs_find_layoutdriver(u32 id); ++extern void pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type *ld); + + /* nfs4proc.c */ + extern size_t max_response_pages(struct nfs_server *server); +@@ -517,7 +520,7 @@ pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, + { + struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; + +- if (!lseg || !fl_cinfo->ops->mark_request_commit) ++ if (!lseg || !fl_cinfo->ops || !fl_cinfo->ops->mark_request_commit) + return false; + fl_cinfo->ops->mark_request_commit(req, lseg, cinfo, ds_commit_idx); + return true; +diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c +index cf19914fec817..657c242a18ff1 100644 +--- a/fs/nfs/pnfs_nfs.c ++++ b/fs/nfs/pnfs_nfs.c +@@ -419,7 +419,7 @@ static struct nfs_commit_data * + pnfs_bucket_fetch_commitdata(struct pnfs_commit_bucket *bucket, + struct nfs_commit_info *cinfo) + { +- struct nfs_commit_data *data = nfs_commitdata_alloc(false); ++ struct nfs_commit_data *data = nfs_commitdata_alloc(); + + if (!data) + return NULL; +@@ -468,7 +468,6 @@ pnfs_bucket_alloc_ds_commits(struct list_head *list, + goto out_error; + data->ds_commit_index = i; + list_add_tail(&data->list, list); +- atomic_inc(&cinfo->mds->rpcs_out); + nreq++; + } + mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); +@@ -516,11 +515,14 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, + unsigned int nreq = 0; + + if (!list_empty(mds_pages)) { +- data = nfs_commitdata_alloc(true); ++ data = nfs_commitdata_alloc(); ++ if (!data) { ++ nfs_retry_commit(mds_pages, NULL, cinfo, -1); ++ return -ENOMEM; ++ } + data->ds_commit_index = -1; + list_splice_init(mds_pages, &data->pages); + list_add_tail(&data->list, &list); +- atomic_inc(&cinfo->mds->rpcs_out); + nreq++; + } + +@@ -895,7 +897,7 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv, + } + + smp_wmb(); +- ds->ds_clp = clp; ++ WRITE_ONCE(ds->ds_clp, clp); + dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); + out: + return status; +@@ -973,7 +975,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, + } + + smp_wmb(); +- ds->ds_clp = clp; ++ WRITE_ONCE(ds->ds_clp, clp); + dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); + out: + return status; +diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c +index ea19dbf123014..a5b0bdcb53963 100644 +--- a/fs/nfs/proc.c ++++ b/fs/nfs/proc.c +@@ -91,7 +91,8 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, + info->dtpref = fsinfo.tsize; + info->maxfilesize = 0x7FFFFFFF; + info->lease_time = 0; +- info->change_attr_type = NFS4_CHANGE_TYPE_IS_TIME_METADATA; ++ info->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED; ++ info->xattr_support = 0; + return 0; + } + +diff --git a/fs/nfs/super.c b/fs/nfs/super.c +index e65c83494c052..a847011f36c96 100644 +--- a/fs/nfs/super.c ++++ b/fs/nfs/super.c +@@ -1046,22 +1046,31 @@ static void nfs_fill_super(struct super_block *sb, struct nfs_fs_context *ctx) + if (ctx->bsize) + sb->s_blocksize = nfs_block_size(ctx->bsize, &sb->s_blocksize_bits); + +- if (server->nfs_client->rpc_ops->version != 2) { +- /* The VFS shouldn't apply the umask to mode bits. We will do +- * so ourselves when necessary. ++ switch (server->nfs_client->rpc_ops->version) { ++ case 2: ++ sb->s_time_gran = 1000; ++ sb->s_time_min = 0; ++ sb->s_time_max = U32_MAX; ++ break; ++ case 3: ++ /* ++ * The VFS shouldn't apply the umask to mode bits. ++ * We will do so ourselves when necessary. + */ + sb->s_flags |= SB_POSIXACL; + sb->s_time_gran = 1; +- sb->s_export_op = &nfs_export_ops; +- } else +- sb->s_time_gran = 1000; +- +- if (server->nfs_client->rpc_ops->version != 4) { + sb->s_time_min = 0; + sb->s_time_max = U32_MAX; +- } else { ++ sb->s_export_op = &nfs_export_ops; ++ break; ++ case 4: ++ sb->s_flags |= SB_POSIXACL; ++ sb->s_time_gran = 1; + sb->s_time_min = S64_MIN; + sb->s_time_max = S64_MAX; ++ if (server->caps & NFS_CAP_ATOMIC_OPEN_V1) ++ sb->s_export_op = &nfs_export_ops; ++ break; + } + + sb->s_magic = NFS_SUPER_MAGIC; +diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c +index 5fa11e1aca4c2..d5ccf095b2a7d 100644 +--- a/fs/nfs/unlink.c ++++ b/fs/nfs/unlink.c +@@ -102,6 +102,10 @@ static void nfs_do_call_unlink(struct inode *inode, struct nfs_unlinkdata *data) + }; + struct rpc_task *task; + struct inode *dir = d_inode(data->dentry->d_parent); ++ ++ if (nfs_server_capable(inode, NFS_CAP_MOVEABLE)) ++ task_setup_data.flags |= RPC_TASK_MOVEABLE; ++ + nfs_sb_active(dir->i_sb); + data->args.fh = NFS_FH(dir); + nfs_fattr_init(data->res.dir_attr); +@@ -344,6 +348,10 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir, + .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF, + }; + ++ if (nfs_server_capable(old_dir, NFS_CAP_MOVEABLE) && ++ nfs_server_capable(new_dir, NFS_CAP_MOVEABLE)) ++ task_setup_data.flags |= RPC_TASK_MOVEABLE; ++ + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (data == NULL) + return ERR_PTR(-ENOMEM); +diff --git a/fs/nfs/write.c b/fs/nfs/write.c +index eae9bf1140417..be70874bc3292 100644 +--- a/fs/nfs/write.c ++++ b/fs/nfs/write.c +@@ -70,27 +70,17 @@ static mempool_t *nfs_wdata_mempool; + static struct kmem_cache *nfs_cdata_cachep; + static mempool_t *nfs_commit_mempool; + +-struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail) ++struct nfs_commit_data *nfs_commitdata_alloc(void) + { + struct nfs_commit_data *p; + +- if (never_fail) +- p = mempool_alloc(nfs_commit_mempool, GFP_NOIO); +- else { +- /* It is OK to do some reclaim, not no safe to wait +- * for anything to be returned to the pool. +- * mempool_alloc() cannot handle that particular combination, +- * so we need two separate attempts. +- */ ++ p = kmem_cache_zalloc(nfs_cdata_cachep, nfs_io_gfp_mask()); ++ if (!p) { + p = mempool_alloc(nfs_commit_mempool, GFP_NOWAIT); +- if (!p) +- p = kmem_cache_alloc(nfs_cdata_cachep, GFP_NOIO | +- __GFP_NOWARN | __GFP_NORETRY); + if (!p) + return NULL; ++ memset(p, 0, sizeof(*p)); + } +- +- memset(p, 0, sizeof(*p)); + INIT_LIST_HEAD(&p->pages); + return p; + } +@@ -104,9 +94,15 @@ EXPORT_SYMBOL_GPL(nfs_commit_free); + + static struct nfs_pgio_header *nfs_writehdr_alloc(void) + { +- struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_KERNEL); ++ struct nfs_pgio_header *p; + +- memset(p, 0, sizeof(*p)); ++ p = kmem_cache_zalloc(nfs_wdata_cachep, nfs_io_gfp_mask()); ++ if (!p) { ++ p = mempool_alloc(nfs_wdata_mempool, GFP_NOWAIT); ++ if (!p) ++ return NULL; ++ memset(p, 0, sizeof(*p)); ++ } + p->rw_mode = FMODE_WRITE; + return p; + } +@@ -314,7 +310,10 @@ static void nfs_mapping_set_error(struct page *page, int error) + struct address_space *mapping = page_file_mapping(page); + + SetPageError(page); +- mapping_set_error(mapping, error); ++ filemap_set_wb_err(mapping, error); ++ if (mapping->host) ++ errseq_set(&mapping->host->i_sb->s_wb_err, ++ error == -ENOSPC ? -ENOSPC : -EIO); + nfs_set_pageerror(mapping); + } + +@@ -602,8 +601,9 @@ static void nfs_write_error(struct nfs_page *req, int error) + * Find an associated nfs write request, and prepare to flush it out + * May return an error if the user signalled nfs_wait_on_request(). + */ +-static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, +- struct page *page) ++static int nfs_page_async_flush(struct page *page, ++ struct writeback_control *wbc, ++ struct nfs_pageio_descriptor *pgio) + { + struct nfs_page *req; + int ret = 0; +@@ -629,11 +629,11 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, + /* + * Remove the problematic req upon fatal errors on the server + */ +- if (nfs_error_is_fatal(ret)) { +- if (nfs_error_is_fatal_on_server(ret)) +- goto out_launder; +- } else +- ret = -EAGAIN; ++ if (nfs_error_is_fatal_on_server(ret)) ++ goto out_launder; ++ if (wbc->sync_mode == WB_SYNC_NONE) ++ ret = AOP_WRITEPAGE_ACTIVATE; ++ redirty_page_for_writepage(wbc, page); + nfs_redirty_request(req); + pgio->pg_error = 0; + } else +@@ -649,15 +649,8 @@ out_launder: + static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, + struct nfs_pageio_descriptor *pgio) + { +- int ret; +- + nfs_pageio_cond_complete(pgio, page_index(page)); +- ret = nfs_page_async_flush(pgio, page); +- if (ret == -EAGAIN) { +- redirty_page_for_writepage(wbc, page); +- ret = AOP_WRITEPAGE_ACTIVATE; +- } +- return ret; ++ return nfs_page_async_flush(page, wbc, pgio); + } + + /* +@@ -676,11 +669,7 @@ static int nfs_writepage_locked(struct page *page, + err = nfs_do_writepage(page, wbc, &pgio); + pgio.pg_error = 0; + nfs_pageio_complete(&pgio); +- if (err < 0) +- return err; +- if (nfs_error_is_fatal(pgio.pg_error)) +- return pgio.pg_error; +- return 0; ++ return err; + } + + int nfs_writepage(struct page *page, struct writeback_control *wbc) +@@ -728,19 +717,19 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) + priority = wb_priority(wbc); + } + +- nfs_pageio_init_write(&pgio, inode, priority, false, +- &nfs_async_write_completion_ops); +- pgio.pg_io_completion = ioc; +- err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); +- pgio.pg_error = 0; +- nfs_pageio_complete(&pgio); ++ do { ++ nfs_pageio_init_write(&pgio, inode, priority, false, ++ &nfs_async_write_completion_ops); ++ pgio.pg_io_completion = ioc; ++ err = write_cache_pages(mapping, wbc, nfs_writepages_callback, ++ &pgio); ++ pgio.pg_error = 0; ++ nfs_pageio_complete(&pgio); ++ } while (err < 0 && !nfs_error_is_fatal(err)); + nfs_io_completion_put(ioc); + + if (err < 0) + goto out_err; +- err = pgio.pg_error; +- if (nfs_error_is_fatal(err)) +- goto out_err; + return 0; + out_err: + return err; +@@ -1038,25 +1027,11 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, + struct nfs_page *req, *tmp; + int ret = 0; + +-restart: + list_for_each_entry_safe(req, tmp, src, wb_list) { + kref_get(&req->wb_kref); + if (!nfs_lock_request(req)) { +- int status; +- +- /* Prevent deadlock with nfs_lock_and_join_requests */ +- if (!list_empty(dst)) { +- nfs_release_request(req); +- continue; +- } +- /* Ensure we make progress to prevent livelock */ +- mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); +- status = nfs_wait_on_request(req); + nfs_release_request(req); +- mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); +- if (status < 0) +- break; +- goto restart; ++ continue; + } + nfs_request_remove_commit_list(req, cinfo); + clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); +@@ -1419,10 +1394,12 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr, + */ + static void nfs_redirty_request(struct nfs_page *req) + { ++ struct nfs_inode *nfsi = NFS_I(page_file_mapping(req->wb_page)->host); ++ + /* Bump the transmission count */ + req->wb_nio++; + nfs_mark_request_dirty(req); +- set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags); ++ atomic_long_inc(&nfsi->redirtied_pages); + nfs_end_page_writeback(req); + nfs_release_request(req); + } +@@ -1434,7 +1411,7 @@ static void nfs_async_write_error(struct list_head *head, int error) + while (!list_empty(head)) { + req = nfs_list_entry(head->next); + nfs_list_remove_request(req); +- if (nfs_error_is_fatal(error)) ++ if (nfs_error_is_fatal_on_server(error)) + nfs_write_error(req, error); + else + nfs_redirty_request(req); +@@ -1671,10 +1648,13 @@ static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo) + atomic_inc(&cinfo->rpcs_out); + } + +-static void nfs_commit_end(struct nfs_mds_commit_info *cinfo) ++bool nfs_commit_end(struct nfs_mds_commit_info *cinfo) + { +- if (atomic_dec_and_test(&cinfo->rpcs_out)) ++ if (atomic_dec_and_test(&cinfo->rpcs_out)) { + wake_up_var(&cinfo->rpcs_out); ++ return true; ++ } ++ return false; + } + + void nfs_commitdata_release(struct nfs_commit_data *data) +@@ -1706,6 +1686,10 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, + .flags = RPC_TASK_ASYNC | flags, + .priority = priority, + }; ++ ++ if (nfs_server_capable(data->inode, NFS_CAP_MOVEABLE)) ++ task_setup_data.flags |= RPC_TASK_MOVEABLE; ++ + /* Set up the initial task struct. */ + nfs_ops->commit_setup(data, &msg, &task_setup_data.rpc_client); + trace_nfs_initiate_commit(data); +@@ -1774,6 +1758,7 @@ void nfs_init_commit(struct nfs_commit_data *data, + data->res.fattr = &data->fattr; + data->res.verf = &data->verf; + nfs_fattr_init(&data->fattr); ++ nfs_commit_begin(cinfo->mds); + } + EXPORT_SYMBOL_GPL(nfs_init_commit); + +@@ -1816,11 +1801,14 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, + if (list_empty(head)) + return 0; + +- data = nfs_commitdata_alloc(true); ++ data = nfs_commitdata_alloc(); ++ if (!data) { ++ nfs_retry_commit(head, NULL, cinfo, -1); ++ return -ENOMEM; ++ } + + /* Set up the argument struct */ + nfs_init_commit(data, head, NULL, cinfo); +- atomic_inc(&cinfo->mds->rpcs_out); + if (NFS_SERVER(inode)->nfs_client->cl_minorversion) + task_flags = RPC_TASK_MOVEABLE; + return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode), +@@ -1884,7 +1872,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) + /* We have a mismatch. Write the page again */ + dprintk_cont(" mismatch\n"); + nfs_mark_request_dirty(req); +- set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags); ++ atomic_long_inc(&NFS_I(data->inode)->redirtied_pages); + next: + nfs_unlock_and_release_request(req); + /* Latency breaker */ +@@ -1936,6 +1924,7 @@ static int __nfs_commit_inode(struct inode *inode, int how, + int may_wait = how & FLUSH_SYNC; + int ret, nscan; + ++ how &= ~FLUSH_SYNC; + nfs_init_cinfo_from_inode(&cinfo, inode); + nfs_commit_begin(cinfo.mds); + for (;;) { +diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c +index 9421dae227374..668c7527b17e8 100644 +--- a/fs/nfsd/export.c ++++ b/fs/nfsd/export.c +@@ -427,7 +427,7 @@ static int check_export(struct path *path, int *flags, unsigned char *uuid) + return -EINVAL; + } + +- if (mnt_user_ns(path->mnt) != &init_user_ns) { ++ if (is_idmapped_mnt(path->mnt)) { + dprintk("exp_export: export of idmapped mounts not yet supported.\n"); + return -EINVAL; + } +diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c +index be3c1aad50ea3..1e8c31ed6c7c4 100644 +--- a/fs/nfsd/filecache.c ++++ b/fs/nfsd/filecache.c +@@ -187,14 +187,7 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, + nf->nf_hashval = hashval; + refcount_set(&nf->nf_ref, 1); + nf->nf_may = may & NFSD_FILE_MAY_MASK; +- if (may & NFSD_MAY_NOT_BREAK_LEASE) { +- if (may & NFSD_MAY_WRITE) +- __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags); +- if (may & NFSD_MAY_READ) +- __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); +- } + nf->nf_mark = NULL; +- init_rwsem(&nf->nf_rwsem); + trace_nfsd_file_alloc(nf); + } + return nf; +@@ -641,7 +634,7 @@ nfsd_file_cache_init(void) + if (!nfsd_filecache_wq) + goto out; + +- nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE, ++ nfsd_file_hashtbl = kvcalloc(NFSD_FILE_HASH_SIZE, + sizeof(*nfsd_file_hashtbl), GFP_KERNEL); + if (!nfsd_file_hashtbl) { + pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n"); +@@ -709,7 +702,7 @@ out_err: + nfsd_file_slab = NULL; + kmem_cache_destroy(nfsd_file_mark_slab); + nfsd_file_mark_slab = NULL; +- kfree(nfsd_file_hashtbl); ++ kvfree(nfsd_file_hashtbl); + nfsd_file_hashtbl = NULL; + destroy_workqueue(nfsd_filecache_wq); + nfsd_filecache_wq = NULL; +@@ -855,7 +848,7 @@ nfsd_file_cache_shutdown(void) + fsnotify_wait_marks_destroyed(); + kmem_cache_destroy(nfsd_file_mark_slab); + nfsd_file_mark_slab = NULL; +- kfree(nfsd_file_hashtbl); ++ kvfree(nfsd_file_hashtbl); + nfsd_file_hashtbl = NULL; + destroy_workqueue(nfsd_filecache_wq); + nfsd_filecache_wq = NULL; +@@ -991,21 +984,7 @@ wait_for_construction: + + this_cpu_inc(nfsd_file_cache_hits); + +- if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) { +- bool write = (may_flags & NFSD_MAY_WRITE); +- +- if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) || +- (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) { +- status = nfserrno(nfsd_open_break_lease( +- file_inode(nf->nf_file), may_flags)); +- if (status == nfs_ok) { +- clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); +- if (write) +- clear_bit(NFSD_FILE_BREAK_WRITE, +- &nf->nf_flags); +- } +- } +- } ++ status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); + out: + if (status == nfs_ok) { + *pnf = nf; +diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h +index 7872df5a0fe3a..63104be2865c5 100644 +--- a/fs/nfsd/filecache.h ++++ b/fs/nfsd/filecache.h +@@ -37,16 +37,13 @@ struct nfsd_file { + struct net *nf_net; + #define NFSD_FILE_HASHED (0) + #define NFSD_FILE_PENDING (1) +-#define NFSD_FILE_BREAK_READ (2) +-#define NFSD_FILE_BREAK_WRITE (3) +-#define NFSD_FILE_REFERENCED (4) ++#define NFSD_FILE_REFERENCED (2) + unsigned long nf_flags; + struct inode *nf_inode; + unsigned int nf_hashval; + refcount_t nf_ref; + unsigned char nf_may; + struct nfsd_file_mark *nf_mark; +- struct rw_semaphore nf_rwsem; + }; + + int nfsd_file_cache_init(void); +diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c +index 4b43929c1f255..30a1782a03f01 100644 +--- a/fs/nfsd/nfs2acl.c ++++ b/fs/nfsd/nfs2acl.c +@@ -246,37 +246,27 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p) + struct nfsd3_getaclres *resp = rqstp->rq_resp; + struct dentry *dentry = resp->fh.fh_dentry; + struct inode *inode; +- int w; + + if (!svcxdr_encode_stat(xdr, resp->status)) +- return 0; ++ return false; + + if (dentry == NULL || d_really_is_negative(dentry)) +- return 1; ++ return true; + inode = d_inode(dentry); + + if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) +- return 0; ++ return false; + if (xdr_stream_encode_u32(xdr, resp->mask) < 0) +- return 0; +- +- rqstp->rq_res.page_len = w = nfsacl_size( +- (resp->mask & NFS_ACL) ? resp->acl_access : NULL, +- (resp->mask & NFS_DFACL) ? resp->acl_default : NULL); +- while (w > 0) { +- if (!*(rqstp->rq_next_page++)) +- return 1; +- w -= PAGE_SIZE; +- } ++ return false; + + if (!nfs_stream_encode_acl(xdr, inode, resp->acl_access, + resp->mask & NFS_ACL, 0)) +- return 0; ++ return false; + if (!nfs_stream_encode_acl(xdr, inode, resp->acl_default, + resp->mask & NFS_DFACL, NFS_ACL_DEFAULT)) +- return 0; ++ return false; + +- return 1; ++ return true; + } + + /* ACCESS */ +@@ -286,17 +276,17 @@ static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p) + struct nfsd3_accessres *resp = rqstp->rq_resp; + + if (!svcxdr_encode_stat(xdr, resp->status)) +- return 0; ++ return false; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) +- return 0; ++ return false; + if (xdr_stream_encode_u32(xdr, resp->access) < 0) +- return 0; ++ return false; + break; + } + +- return 1; ++ return true; + } + + /* +diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c +index 17715a6c7a409..eaf785aec0708 100644 +--- a/fs/nfsd/nfs3proc.c ++++ b/fs/nfsd/nfs3proc.c +@@ -146,17 +146,21 @@ nfsd3_proc_read(struct svc_rqst *rqstp) + { + struct nfsd3_readargs *argp = rqstp->rq_argp; + struct nfsd3_readres *resp = rqstp->rq_resp; +- u32 max_blocksize = svc_max_payload(rqstp); + unsigned int len; + int v; + +- argp->count = min_t(u32, argp->count, max_blocksize); +- + dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n", + SVCFH_fmt(&argp->fh), + (unsigned long) argp->count, + (unsigned long long) argp->offset); + ++ argp->count = min_t(u32, argp->count, svc_max_payload(rqstp)); ++ argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen); ++ if (argp->offset > (u64)OFFSET_MAX) ++ argp->offset = (u64)OFFSET_MAX; ++ if (argp->offset + argp->count > (u64)OFFSET_MAX) ++ argp->count = (u64)OFFSET_MAX - argp->offset; ++ + v = 0; + len = argp->count; + resp->pages = rqstp->rq_next_page; +@@ -199,19 +203,19 @@ nfsd3_proc_write(struct svc_rqst *rqstp) + (unsigned long long) argp->offset, + argp->stable? " stable" : ""); + ++ resp->status = nfserr_fbig; ++ if (argp->offset > (u64)OFFSET_MAX || ++ argp->offset + argp->len > (u64)OFFSET_MAX) ++ return rpc_success; ++ + fh_copy(&resp->fh, &argp->fh); + resp->committed = argp->stable; +- nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages, +- &argp->first, cnt); +- if (!nvecs) { +- resp->status = nfserr_io; +- goto out; +- } ++ nvecs = svc_fill_write_vector(rqstp, &argp->payload); ++ + resp->status = nfsd_write(rqstp, &resp->fh, argp->offset, + rqstp->rq_vec, nvecs, &cnt, + resp->committed, resp->verf); + resp->count = cnt; +-out: + return rpc_success; + } + +@@ -439,22 +443,20 @@ nfsd3_proc_link(struct svc_rqst *rqstp) + + static void nfsd3_init_dirlist_pages(struct svc_rqst *rqstp, + struct nfsd3_readdirres *resp, +- int count) ++ u32 count) + { + struct xdr_buf *buf = &resp->dirlist; + struct xdr_stream *xdr = &resp->xdr; +- +- count = min_t(u32, count, svc_max_payload(rqstp)); ++ unsigned int sendbuf = min_t(unsigned int, rqstp->rq_res.buflen, ++ svc_max_payload(rqstp)); + + memset(buf, 0, sizeof(*buf)); + + /* Reserve room for the NULL ptr & eof flag (-2 words) */ +- buf->buflen = count - XDR_UNIT * 2; ++ buf->buflen = clamp(count, (u32)(XDR_UNIT * 2), sendbuf); ++ buf->buflen -= XDR_UNIT * 2; + buf->pages = rqstp->rq_next_page; +- while (count > 0) { +- rqstp->rq_next_page++; +- count -= PAGE_SIZE; +- } ++ rqstp->rq_next_page += (buf->buflen + PAGE_SIZE - 1) >> PAGE_SHIFT; + + /* This is xdr_init_encode(), but it assumes that + * the head kvec has already been consumed. */ +@@ -463,7 +465,7 @@ static void nfsd3_init_dirlist_pages(struct svc_rqst *rqstp, + xdr->page_ptr = buf->pages; + xdr->iov = NULL; + xdr->p = page_address(*buf->pages); +- xdr->end = xdr->p + (PAGE_SIZE >> 2); ++ xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE); + xdr->rqst = NULL; + } + +@@ -659,15 +661,9 @@ nfsd3_proc_commit(struct svc_rqst *rqstp) + argp->count, + (unsigned long long) argp->offset); + +- if (argp->offset > NFS_OFFSET_MAX) { +- resp->status = nfserr_inval; +- goto out; +- } +- + fh_copy(&resp->fh, &argp->fh); + resp->status = nfsd_commit(rqstp, &resp->fh, argp->offset, + argp->count, resp->verf); +-out: + return rpc_success; + } + +diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c +index 0a5ebc52e6a9c..48d4f99b7f901 100644 +--- a/fs/nfsd/nfs3xdr.c ++++ b/fs/nfsd/nfs3xdr.c +@@ -254,7 +254,7 @@ svcxdr_decode_sattr3(struct svc_rqst *rqstp, struct xdr_stream *xdr, + if (xdr_stream_decode_u64(xdr, &newsize) < 0) + return false; + iap->ia_valid |= ATTR_SIZE; +- iap->ia_size = min_t(u64, newsize, NFS_OFFSET_MAX); ++ iap->ia_size = newsize; + } + if (xdr_stream_decode_u32(xdr, &set_it) < 0) + return false; +@@ -487,11 +487,6 @@ neither: + return true; + } + +-static bool fs_supports_change_attribute(struct super_block *sb) +-{ +- return sb->s_flags & SB_I_VERSION || sb->s_export_op->fetch_iversion; +-} +- + /* + * Fill in the pre_op attr for the wcc data + */ +@@ -500,26 +495,24 @@ void fill_pre_wcc(struct svc_fh *fhp) + struct inode *inode; + struct kstat stat; + bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE); ++ __be32 err; + + if (fhp->fh_no_wcc || fhp->fh_pre_saved) + return; + inode = d_inode(fhp->fh_dentry); +- if (fs_supports_change_attribute(inode->i_sb) || !v4) { +- __be32 err = fh_getattr(fhp, &stat); +- +- if (err) { +- /* Grab the times from inode anyway */ +- stat.mtime = inode->i_mtime; +- stat.ctime = inode->i_ctime; +- stat.size = inode->i_size; +- } +- fhp->fh_pre_mtime = stat.mtime; +- fhp->fh_pre_ctime = stat.ctime; +- fhp->fh_pre_size = stat.size; ++ err = fh_getattr(fhp, &stat); ++ if (err) { ++ /* Grab the times from inode anyway */ ++ stat.mtime = inode->i_mtime; ++ stat.ctime = inode->i_ctime; ++ stat.size = inode->i_size; + } + if (v4) + fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode); + ++ fhp->fh_pre_mtime = stat.mtime; ++ fhp->fh_pre_ctime = stat.ctime; ++ fhp->fh_pre_size = stat.size; + fhp->fh_pre_saved = true; + } + +@@ -530,6 +523,7 @@ void fill_post_wcc(struct svc_fh *fhp) + { + bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE); + struct inode *inode = d_inode(fhp->fh_dentry); ++ __be32 err; + + if (fhp->fh_no_wcc) + return; +@@ -537,16 +531,12 @@ void fill_post_wcc(struct svc_fh *fhp) + if (fhp->fh_post_saved) + printk("nfsd: inode locked twice during operation.\n"); + +- fhp->fh_post_saved = true; +- +- if (fs_supports_change_attribute(inode->i_sb) || !v4) { +- __be32 err = fh_getattr(fhp, &fhp->fh_post_attr); +- +- if (err) { +- fhp->fh_post_saved = false; +- fhp->fh_post_attr.ctime = inode->i_ctime; +- } +- } ++ err = fh_getattr(fhp, &fhp->fh_post_attr); ++ if (err) { ++ fhp->fh_post_saved = false; ++ fhp->fh_post_attr.ctime = inode->i_ctime; ++ } else ++ fhp->fh_post_saved = true; + if (v4) + fhp->fh_post_change = + nfsd4_change_attribute(&fhp->fh_post_attr, inode); +@@ -621,9 +611,6 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) + struct xdr_stream *xdr = &rqstp->rq_arg_stream; + struct nfsd3_writeargs *args = rqstp->rq_argp; + u32 max_blocksize = svc_max_payload(rqstp); +- struct kvec *head = rqstp->rq_arg.head; +- struct kvec *tail = rqstp->rq_arg.tail; +- size_t remaining; + + if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) + return 0; +@@ -641,17 +628,12 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) + /* request sanity */ + if (args->count != args->len) + return 0; +- remaining = head->iov_len + rqstp->rq_arg.page_len + tail->iov_len; +- remaining -= xdr_stream_pos(xdr); +- if (remaining < xdr_align_size(args->len)) +- return 0; + if (args->count > max_blocksize) { + args->count = max_blocksize; + args->len = max_blocksize; + } +- +- args->first.iov_base = xdr->p; +- args->first.iov_len = head->iov_len - xdr_stream_pos(xdr); ++ if (!xdr_stream_subsegment(xdr, &args->payload, args->count)) ++ return 0; + + return 1; + } +diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c +index 0f8b10f363e7f..2e0040d3bca79 100644 +--- a/fs/nfsd/nfs4callback.c ++++ b/fs/nfsd/nfs4callback.c +@@ -917,7 +917,6 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c + } else { + if (!conn->cb_xprt) + return -EINVAL; +- clp->cl_cb_conn.cb_xprt = conn->cb_xprt; + clp->cl_cb_session = ses; + args.bc_xprt = conn->cb_xprt; + args.prognumber = clp->cl_cb_session->se_cb_prog; +@@ -937,6 +936,9 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c + rpc_shutdown_client(client); + return -ENOMEM; + } ++ ++ if (clp->cl_minorversion != 0) ++ clp->cl_cb_conn.cb_xprt = conn->cb_xprt; + clp->cl_cb_client = client; + clp->cl_cb_cred = cred; + rcu_read_lock(); +diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c +index 486c5dba4b650..0a900b9e39eac 100644 +--- a/fs/nfsd/nfs4proc.c ++++ b/fs/nfsd/nfs4proc.c +@@ -782,12 +782,16 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + __be32 status; + + read->rd_nf = NULL; +- if (read->rd_offset >= OFFSET_MAX) +- return nfserr_inval; + + trace_nfsd_read_start(rqstp, &cstate->current_fh, + read->rd_offset, read->rd_length); + ++ read->rd_length = min_t(u32, read->rd_length, svc_max_payload(rqstp)); ++ if (read->rd_offset > (u64)OFFSET_MAX) ++ read->rd_offset = (u64)OFFSET_MAX; ++ if (read->rd_offset + read->rd_length > (u64)OFFSET_MAX) ++ read->rd_length = (u64)OFFSET_MAX - read->rd_offset; ++ + /* + * If we do a zero copy read, then a client will see read data + * that reflects the state of the file *after* performing the +@@ -1018,8 +1022,9 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + unsigned long cnt; + int nvecs; + +- if (write->wr_offset >= OFFSET_MAX) +- return nfserr_inval; ++ if (write->wr_offset > (u64)OFFSET_MAX || ++ write->wr_offset + write->wr_buflen > (u64)OFFSET_MAX) ++ return nfserr_fbig; + + cnt = write->wr_buflen; + trace_nfsd_write_start(rqstp, &cstate->current_fh, +@@ -1033,8 +1038,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + + write->wr_how_written = write->wr_stable_how; + +- nvecs = svc_fill_write_vector(rqstp, write->wr_payload.pages, +- write->wr_payload.head, write->wr_buflen); ++ nvecs = svc_fill_write_vector(rqstp, &write->wr_payload); + WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec)); + + status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf, +@@ -1201,6 +1205,7 @@ try_again: + /* allow 20secs for mount/unmount for now - revisit */ + if (signal_pending(current) || + (schedule_timeout(20*HZ) == 0)) { ++ finish_wait(&nn->nfsd_ssc_waitq, &wait); + kfree(work); + return nfserr_eagain; + } +@@ -1511,6 +1516,9 @@ static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync) + + static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy) + { ++ struct file *dst = copy->nf_dst->nf_file; ++ struct file *src = copy->nf_src->nf_file; ++ errseq_t since; + ssize_t bytes_copied = 0; + u64 bytes_total = copy->cp_count; + u64 src_pos = copy->cp_src_pos; +@@ -1523,9 +1531,8 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy) + do { + if (kthread_should_stop()) + break; +- bytes_copied = nfsd_copy_file_range(copy->nf_src->nf_file, +- src_pos, copy->nf_dst->nf_file, dst_pos, +- bytes_total); ++ bytes_copied = nfsd_copy_file_range(src, src_pos, dst, dst_pos, ++ bytes_total); + if (bytes_copied <= 0) + break; + bytes_total -= bytes_copied; +@@ -1535,11 +1542,11 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy) + } while (bytes_total > 0 && !copy->cp_synchronous); + /* for a non-zero asynchronous copy do a commit of data */ + if (!copy->cp_synchronous && copy->cp_res.wr_bytes_written > 0) { +- down_write(©->nf_dst->nf_rwsem); +- status = vfs_fsync_range(copy->nf_dst->nf_file, +- copy->cp_dst_pos, ++ since = READ_ONCE(dst->f_wb_err); ++ status = vfs_fsync_range(dst, copy->cp_dst_pos, + copy->cp_res.wr_bytes_written, 0); +- up_write(©->nf_dst->nf_rwsem); ++ if (!status) ++ status = filemap_check_wb_err(dst->f_mapping, since); + if (!status) + copy->committed = true; + } +@@ -2487,9 +2494,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) + status = nfserr_minor_vers_mismatch; + if (nfsd_minorversion(nn, args->minorversion, NFSD_TEST) <= 0) + goto out; +- status = nfserr_resource; +- if (args->opcnt > NFSD_MAX_OPS_PER_COMPOUND) +- goto out; + + status = nfs41_check_op_ordering(args); + if (status) { +@@ -2502,10 +2506,20 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) + + rqstp->rq_lease_breaker = (void **)&cstate->clp; + +- trace_nfsd_compound(rqstp, args->opcnt); ++ trace_nfsd_compound(rqstp, args->client_opcnt); + while (!status && resp->opcnt < args->opcnt) { + op = &args->ops[resp->opcnt++]; + ++ if (unlikely(resp->opcnt == NFSD_MAX_OPS_PER_COMPOUND)) { ++ /* If there are still more operations to process, ++ * stop here and report NFS4ERR_RESOURCE. */ ++ if (cstate->minorversion == 0 && ++ args->client_opcnt > resp->opcnt) { ++ op->status = nfserr_resource; ++ goto encode_op; ++ } ++ } ++ + /* + * The XDR decode routines may have pre-set op->status; + * for example, if there is a miscellaneous XDR error +@@ -2581,8 +2595,8 @@ encode_op: + status = op->status; + } + +- trace_nfsd_compound_status(args->opcnt, resp->opcnt, status, +- nfsd4_op_name(op->opnum)); ++ trace_nfsd_compound_status(args->client_opcnt, resp->opcnt, ++ status, nfsd4_op_name(op->opnum)); + + nfsd4_cstate_clear_replay(cstate); + nfsd4_increment_op_stats(op->opnum); +diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c +index 6fedc49726bf7..8f24485e0f04f 100644 +--- a/fs/nfsd/nfs4recover.c ++++ b/fs/nfsd/nfs4recover.c +@@ -815,8 +815,10 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg, + princhash.data = memdup_user( + &ci->cc_princhash.cp_data, + princhashlen); +- if (IS_ERR_OR_NULL(princhash.data)) ++ if (IS_ERR_OR_NULL(princhash.data)) { ++ kfree(name.data); + return -EFAULT; ++ } + princhash.len = princhashlen; + } else + princhash.len = 0; +@@ -2156,6 +2158,7 @@ static struct notifier_block nfsd4_cld_block = { + int + register_cld_notifier(void) + { ++ WARN_ON(!nfsd_net_id); + return rpc_pipefs_notifier_register(&nfsd4_cld_block); + } + +diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c +index 3f4027a5de883..c062728034ad0 100644 +--- a/fs/nfsd/nfs4state.c ++++ b/fs/nfsd/nfs4state.c +@@ -627,15 +627,26 @@ find_any_file(struct nfs4_file *f) + return ret; + } + +-static struct nfsd_file *find_deleg_file(struct nfs4_file *f) ++static struct nfsd_file *find_any_file_locked(struct nfs4_file *f) + { +- struct nfsd_file *ret = NULL; ++ lockdep_assert_held(&f->fi_lock); ++ ++ if (f->fi_fds[O_RDWR]) ++ return f->fi_fds[O_RDWR]; ++ if (f->fi_fds[O_WRONLY]) ++ return f->fi_fds[O_WRONLY]; ++ if (f->fi_fds[O_RDONLY]) ++ return f->fi_fds[O_RDONLY]; ++ return NULL; ++} ++ ++static struct nfsd_file *find_deleg_file_locked(struct nfs4_file *f) ++{ ++ lockdep_assert_held(&f->fi_lock); + +- spin_lock(&f->fi_lock); + if (f->fi_deleg_file) +- ret = nfsd_file_get(f->fi_deleg_file); +- spin_unlock(&f->fi_lock); +- return ret; ++ return f->fi_deleg_file; ++ return NULL; + } + + static atomic_long_t num_delegations; +@@ -961,6 +972,7 @@ static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp) + + static void nfs4_free_deleg(struct nfs4_stid *stid) + { ++ WARN_ON(!list_empty(&stid->sc_cp_list)); + kmem_cache_free(deleg_slab, stid); + atomic_long_dec(&num_delegations); + } +@@ -1207,6 +1219,11 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) + return 0; + } + ++static bool delegation_hashed(struct nfs4_delegation *dp) ++{ ++ return !(list_empty(&dp->dl_perfile)); ++} ++ + static bool + unhash_delegation_locked(struct nfs4_delegation *dp) + { +@@ -1214,7 +1231,7 @@ unhash_delegation_locked(struct nfs4_delegation *dp) + + lockdep_assert_held(&state_lock); + +- if (list_empty(&dp->dl_perfile)) ++ if (!delegation_hashed(dp)) + return false; + + dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID; +@@ -1369,6 +1386,7 @@ static void nfs4_free_ol_stateid(struct nfs4_stid *stid) + release_all_access(stp); + if (stp->st_stateowner) + nfs4_put_stateowner(stp->st_stateowner); ++ WARN_ON(!list_empty(&stid->sc_cp_list)); + kmem_cache_free(stateid_slab, stid); + } + +@@ -2494,9 +2512,11 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st) + ols = openlockstateid(st); + oo = ols->st_stateowner; + nf = st->sc_file; +- file = find_any_file(nf); ++ ++ spin_lock(&nf->fi_lock); ++ file = find_any_file_locked(nf); + if (!file) +- return 0; ++ goto out; + + seq_printf(s, "- "); + nfs4_show_stateid(s, &st->sc_stateid); +@@ -2518,8 +2538,8 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st) + seq_printf(s, ", "); + nfs4_show_owner(s, oo); + seq_printf(s, " }\n"); +- nfsd_file_put(file); +- ++out: ++ spin_unlock(&nf->fi_lock); + return 0; + } + +@@ -2533,9 +2553,10 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st) + ols = openlockstateid(st); + oo = ols->st_stateowner; + nf = st->sc_file; +- file = find_any_file(nf); ++ spin_lock(&nf->fi_lock); ++ file = find_any_file_locked(nf); + if (!file) +- return 0; ++ goto out; + + seq_printf(s, "- "); + nfs4_show_stateid(s, &st->sc_stateid); +@@ -2555,8 +2576,8 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st) + seq_printf(s, ", "); + nfs4_show_owner(s, oo); + seq_printf(s, " }\n"); +- nfsd_file_put(file); +- ++out: ++ spin_unlock(&nf->fi_lock); + return 0; + } + +@@ -2568,9 +2589,10 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st) + + ds = delegstateid(st); + nf = st->sc_file; +- file = find_deleg_file(nf); ++ spin_lock(&nf->fi_lock); ++ file = find_deleg_file_locked(nf); + if (!file) +- return 0; ++ goto out; + + seq_printf(s, "- "); + nfs4_show_stateid(s, &st->sc_stateid); +@@ -2586,8 +2608,8 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st) + seq_printf(s, ", "); + nfs4_show_fname(s, file); + seq_printf(s, " }\n"); +- nfsd_file_put(file); +- ++out: ++ spin_unlock(&nf->fi_lock); + return 0; + } + +@@ -4107,8 +4129,10 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, + status = nfserr_clid_inuse; + if (client_has_state(old) + && !same_creds(&unconf->cl_cred, +- &old->cl_cred)) ++ &old->cl_cred)) { ++ old = NULL; + goto out; ++ } + status = mark_client_expired_locked(old); + if (status) { + old = NULL; +@@ -4598,7 +4622,7 @@ static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb) + * queued for a lease break. Don't queue it again. + */ + spin_lock(&state_lock); +- if (dp->dl_time == 0) { ++ if (delegation_hashed(dp) && dp->dl_time == 0) { + dp->dl_time = ktime_get_boottime_seconds(); + list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru); + } +@@ -4686,6 +4710,14 @@ nfsd_break_deleg_cb(struct file_lock *fl) + return ret; + } + ++/** ++ * nfsd_breaker_owns_lease - Check if lease conflict was resolved ++ * @fl: Lock state to check ++ * ++ * Return values: ++ * %true: Lease conflict was resolved ++ * %false: Lease conflict was not resolved. ++ */ + static bool nfsd_breaker_owns_lease(struct file_lock *fl) + { + struct nfs4_delegation *dl = fl->fl_owner; +@@ -4693,11 +4725,11 @@ static bool nfsd_breaker_owns_lease(struct file_lock *fl) + struct nfs4_client *clp; + + if (!i_am_nfsd()) +- return NULL; ++ return false; + rqst = kthread_data(current); + /* Note rq_prog == NFS_ACL_PROGRAM is also possible: */ + if (rqst->rq_prog != NFS_PROGRAM || rqst->rq_vers < 4) +- return NULL; ++ return false; + clp = *(rqst->rq_lease_breaker); + return dl->dl_stid.sc_client == clp; + } +@@ -6035,7 +6067,11 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, + *nfp = NULL; + + if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) { +- status = check_special_stateids(net, fhp, stateid, flags); ++ if (cstid) ++ status = nfserr_bad_stateid; ++ else ++ status = check_special_stateids(net, fhp, stateid, ++ flags); + goto done; + } + +@@ -6370,6 +6406,7 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) + struct nfs4_client *clp = s->st_stid.sc_client; + bool unhashed; + LIST_HEAD(reaplist); ++ struct nfs4_ol_stateid *stp; + + spin_lock(&clp->cl_lock); + unhashed = unhash_open_stateid(s, &reaplist); +@@ -6378,6 +6415,8 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) + if (unhashed) + put_ol_stateid_locked(s, &reaplist); + spin_unlock(&clp->cl_lock); ++ list_for_each_entry(stp, &reaplist, st_locks) ++ nfs4_free_cpntf_statelist(clp->net, &stp->st_stid); + free_ol_stateid_reaplist(&reaplist); + } else { + spin_unlock(&clp->cl_lock); +@@ -7280,16 +7319,12 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, + if (sop->so_is_open_owner || !same_owner_str(sop, owner)) + continue; + +- /* see if there are still any locks associated with it */ +- lo = lockowner(sop); +- list_for_each_entry(stp, &sop->so_stateids, st_perstateowner) { +- if (check_for_locks(stp->st_stid.sc_file, lo)) { +- status = nfserr_locks_held; +- spin_unlock(&clp->cl_lock); +- return status; +- } ++ if (atomic_read(&sop->so_count) != 1) { ++ spin_unlock(&clp->cl_lock); ++ return nfserr_locks_held; + } + ++ lo = lockowner(sop); + nfs4_get_stateowner(sop); + break; + } +diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c +index cf030ebe28275..dfd3877fdd818 100644 +--- a/fs/nfsd/nfs4xdr.c ++++ b/fs/nfsd/nfs4xdr.c +@@ -288,11 +288,8 @@ nfsd4_decode_bitmap4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen) + p = xdr_inline_decode(argp->xdr, count << 2); + if (!p) + return nfserr_bad_xdr; +- i = 0; +- while (i < count) +- bmval[i++] = be32_to_cpup(p++); +- while (i < bmlen) +- bmval[i++] = 0; ++ for (i = 0; i < bmlen; i++) ++ bmval[i] = (i < count) ? be32_to_cpup(p++) : 0; + + return nfs_ok; + } +@@ -2352,16 +2349,11 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) + + if (xdr_stream_decode_u32(argp->xdr, &argp->minorversion) < 0) + return 0; +- if (xdr_stream_decode_u32(argp->xdr, &argp->opcnt) < 0) ++ if (xdr_stream_decode_u32(argp->xdr, &argp->client_opcnt) < 0) + return 0; + +- /* +- * NFS4ERR_RESOURCE is a more helpful error than GARBAGE_ARGS +- * here, so we return success at the xdr level so that +- * nfsd4_proc can handle this is an NFS-level error. +- */ +- if (argp->opcnt > NFSD_MAX_OPS_PER_COMPOUND) +- return 1; ++ argp->opcnt = min_t(u32, argp->client_opcnt, ++ NFSD_MAX_OPS_PER_COMPOUND); + + if (argp->opcnt > ARRAY_SIZE(argp->iops)) { + argp->ops = kzalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL); +@@ -3522,6 +3514,17 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, + case nfserr_noent: + xdr_truncate_encode(xdr, start_offset); + goto skip_entry; ++ case nfserr_jukebox: ++ /* ++ * The pseudoroot should only display dentries that lead to ++ * exports. If we get EJUKEBOX here, then we can't tell whether ++ * this entry should be included. Just fail the whole READDIR ++ * with NFS4ERR_DELAY in that case, and hope that the situation ++ * will resolve itself by the client's next attempt. ++ */ ++ if (cd->rd_fhp->fh_export->ex_flags & NFSEXP_V4ROOT) ++ goto fail; ++ fallthrough; + default: + /* + * If the client requested the RDATTR_ERROR attribute, +@@ -3996,14 +3999,12 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, + if (resp->xdr->buf->page_len && + test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) { + WARN_ON_ONCE(1); +- return nfserr_resource; ++ return nfserr_serverfault; + } + xdr_commit_encode(xdr); + +- maxcount = svc_max_payload(resp->rqstp); +- maxcount = min_t(unsigned long, maxcount, ++ maxcount = min_t(unsigned long, read->rd_length, + (xdr->buf->buflen - xdr->buf->len)); +- maxcount = min_t(unsigned long, maxcount, read->rd_length); + + if (file->f_op->splice_read && + test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) +@@ -4840,10 +4841,8 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr, + return nfserr_resource; + xdr_commit_encode(xdr); + +- maxcount = svc_max_payload(resp->rqstp); +- maxcount = min_t(unsigned long, maxcount, ++ maxcount = min_t(unsigned long, read->rd_length, + (xdr->buf->buflen - xdr->buf->len)); +- maxcount = min_t(unsigned long, maxcount, read->rd_length); + count = maxcount; + + eof = read->rd_offset >= i_size_read(file_inode(file)); +diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c +index 96cdf77925f33..830bb8493c7fd 100644 +--- a/fs/nfsd/nfscache.c ++++ b/fs/nfsd/nfscache.c +@@ -212,7 +212,6 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn) + struct svc_cacherep *rp; + unsigned int i; + +- nfsd_reply_cache_stats_destroy(nn); + unregister_shrinker(&nn->nfsd_reply_cache_shrinker); + + for (i = 0; i < nn->drc_hashsize; i++) { +@@ -223,6 +222,7 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn) + rp, nn); + } + } ++ nfsd_reply_cache_stats_destroy(nn); + + kvfree(nn->drc_hashtbl); + nn->drc_hashtbl = NULL; +diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c +index 070e5dd03e26f..cb73c12925629 100644 +--- a/fs/nfsd/nfsctl.c ++++ b/fs/nfsd/nfsctl.c +@@ -1249,7 +1249,8 @@ static void nfsdfs_remove_file(struct inode *dir, struct dentry *dentry) + clear_ncl(d_inode(dentry)); + dget(dentry); + ret = simple_unlink(dir, dentry); +- d_delete(dentry); ++ d_drop(dentry); ++ fsnotify_unlink(dir, dentry); + dput(dentry); + WARN_ON_ONCE(ret); + } +@@ -1340,8 +1341,8 @@ void nfsd_client_rmdir(struct dentry *dentry) + dget(dentry); + ret = simple_rmdir(dir, dentry); + WARN_ON_ONCE(ret); ++ d_drop(dentry); + fsnotify_rmdir(dir, dentry); +- d_delete(dentry); + dput(dentry); + inode_unlock(dir); + } +@@ -1521,12 +1522,9 @@ static int __init init_nfsd(void) + int retval; + printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n"); + +- retval = register_cld_notifier(); +- if (retval) +- return retval; + retval = nfsd4_init_slabs(); + if (retval) +- goto out_unregister_notifier; ++ return retval; + retval = nfsd4_init_pnfs(); + if (retval) + goto out_free_slabs; +@@ -1545,9 +1543,14 @@ static int __init init_nfsd(void) + goto out_free_exports; + retval = register_pernet_subsys(&nfsd_net_ops); + if (retval < 0) ++ goto out_free_filesystem; ++ retval = register_cld_notifier(); ++ if (retval) + goto out_free_all; + return 0; + out_free_all: ++ unregister_pernet_subsys(&nfsd_net_ops); ++out_free_filesystem: + unregister_filesystem(&nfsd_fs_type); + out_free_exports: + remove_proc_entry("fs/nfs/exports", NULL); +@@ -1561,13 +1564,12 @@ out_free_pnfs: + nfsd4_exit_pnfs(); + out_free_slabs: + nfsd4_free_slabs(); +-out_unregister_notifier: +- unregister_cld_notifier(); + return retval; + } + + static void __exit exit_nfsd(void) + { ++ unregister_cld_notifier(); + unregister_pernet_subsys(&nfsd_net_ops); + nfsd_drc_slab_free(); + remove_proc_entry("fs/nfs/exports", NULL); +@@ -1577,7 +1579,6 @@ static void __exit exit_nfsd(void) + nfsd4_free_slabs(); + nfsd4_exit_pnfs(); + unregister_filesystem(&nfsd_fs_type); +- unregister_cld_notifier(); + } + + MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); +diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c +index 90fcd6178823b..b009da1dcbb50 100644 +--- a/fs/nfsd/nfsproc.c ++++ b/fs/nfsd/nfsproc.c +@@ -182,6 +182,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) + argp->count, argp->offset); + + argp->count = min_t(u32, argp->count, NFSSVC_MAXBLKSIZE_V2); ++ argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen); + + v = 0; + len = argp->count; +@@ -230,16 +231,11 @@ nfsd_proc_write(struct svc_rqst *rqstp) + unsigned long cnt = argp->len; + unsigned int nvecs; + +- dprintk("nfsd: WRITE %s %d bytes at %d\n", ++ dprintk("nfsd: WRITE %s %u bytes at %d\n", + SVCFH_fmt(&argp->fh), + argp->len, argp->offset); + +- nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages, +- &argp->first, cnt); +- if (!nvecs) { +- resp->status = nfserr_io; +- goto out; +- } ++ nvecs = svc_fill_write_vector(rqstp, &argp->payload); + + resp->status = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), + argp->offset, rqstp->rq_vec, nvecs, +@@ -248,7 +244,6 @@ nfsd_proc_write(struct svc_rqst *rqstp) + resp->status = fh_getattr(&resp->fh, &resp->stat); + else if (resp->status == nfserr_jukebox) + return rpc_drop_reply; +-out: + return rpc_success; + } + +@@ -557,17 +552,16 @@ nfsd_proc_rmdir(struct svc_rqst *rqstp) + + static void nfsd_init_dirlist_pages(struct svc_rqst *rqstp, + struct nfsd_readdirres *resp, +- int count) ++ u32 count) + { + struct xdr_buf *buf = &resp->dirlist; + struct xdr_stream *xdr = &resp->xdr; + +- count = min_t(u32, count, PAGE_SIZE); +- + memset(buf, 0, sizeof(*buf)); + + /* Reserve room for the NULL ptr & eof flag (-2 words) */ +- buf->buflen = count - sizeof(__be32) * 2; ++ buf->buflen = clamp(count, (u32)(XDR_UNIT * 2), (u32)PAGE_SIZE); ++ buf->buflen -= XDR_UNIT * 2; + buf->pages = rqstp->rq_next_page; + rqstp->rq_next_page++; + +@@ -578,7 +572,7 @@ static void nfsd_init_dirlist_pages(struct svc_rqst *rqstp, + xdr->page_ptr = buf->pages; + xdr->iov = NULL; + xdr->p = page_address(*buf->pages); +- xdr->end = xdr->p + (PAGE_SIZE >> 2); ++ xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE); + xdr->rqst = NULL; + } + +diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c +index ccb59e91011b7..373695cc62a7a 100644 +--- a/fs/nfsd/nfssvc.c ++++ b/fs/nfsd/nfssvc.c +@@ -425,8 +425,8 @@ static void nfsd_shutdown_net(struct net *net) + { + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + +- nfsd_file_cache_shutdown_net(net); + nfs4_state_shutdown_net(net); ++ nfsd_file_cache_shutdown_net(net); + if (nn->lockd_up) { + lockd_down(net); + nn->lockd_up = false; +diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c +index a06c05fe3b421..26a42f87c2409 100644 +--- a/fs/nfsd/nfsxdr.c ++++ b/fs/nfsd/nfsxdr.c +@@ -325,10 +325,7 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) + { + struct xdr_stream *xdr = &rqstp->rq_arg_stream; + struct nfsd_writeargs *args = rqstp->rq_argp; +- struct kvec *head = rqstp->rq_arg.head; +- struct kvec *tail = rqstp->rq_arg.tail; + u32 beginoffset, totalcount; +- size_t remaining; + + if (!svcxdr_decode_fhandle(xdr, &args->fh)) + return 0; +@@ -346,12 +343,8 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) + return 0; + if (args->len > NFSSVC_MAXBLKSIZE_V2) + return 0; +- remaining = head->iov_len + rqstp->rq_arg.page_len + tail->iov_len; +- remaining -= xdr_stream_pos(xdr); +- if (remaining < xdr_align_size(args->len)) ++ if (!xdr_stream_subsegment(xdr, &args->payload, args->len)) + return 0; +- args->first.iov_base = xdr->p; +- args->first.iov_len = head->iov_len - xdr_stream_pos(xdr); + + return 1; + } +diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h +index 538520957a815..0fc1fa6f28e0b 100644 +--- a/fs/nfsd/trace.h ++++ b/fs/nfsd/trace.h +@@ -319,14 +319,14 @@ TRACE_EVENT(nfsd_export_update, + DECLARE_EVENT_CLASS(nfsd_io_class, + TP_PROTO(struct svc_rqst *rqstp, + struct svc_fh *fhp, +- loff_t offset, +- unsigned long len), ++ u64 offset, ++ u32 len), + TP_ARGS(rqstp, fhp, offset, len), + TP_STRUCT__entry( + __field(u32, xid) + __field(u32, fh_hash) +- __field(loff_t, offset) +- __field(unsigned long, len) ++ __field(u64, offset) ++ __field(u32, len) + ), + TP_fast_assign( + __entry->xid = be32_to_cpu(rqstp->rq_xid); +@@ -334,7 +334,7 @@ DECLARE_EVENT_CLASS(nfsd_io_class, + __entry->offset = offset; + __entry->len = len; + ), +- TP_printk("xid=0x%08x fh_hash=0x%08x offset=%lld len=%lu", ++ TP_printk("xid=0x%08x fh_hash=0x%08x offset=%llu len=%u", + __entry->xid, __entry->fh_hash, + __entry->offset, __entry->len) + ) +@@ -343,8 +343,8 @@ DECLARE_EVENT_CLASS(nfsd_io_class, + DEFINE_EVENT(nfsd_io_class, nfsd_##name, \ + TP_PROTO(struct svc_rqst *rqstp, \ + struct svc_fh *fhp, \ +- loff_t offset, \ +- unsigned long len), \ ++ u64 offset, \ ++ u32 len), \ + TP_ARGS(rqstp, fhp, offset, len)) + + DEFINE_NFSD_IO_EVENT(read_start); +@@ -636,18 +636,10 @@ DEFINE_CLID_EVENT(confirmed_r); + /* + * from fs/nfsd/filecache.h + */ +-TRACE_DEFINE_ENUM(NFSD_FILE_HASHED); +-TRACE_DEFINE_ENUM(NFSD_FILE_PENDING); +-TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_READ); +-TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_WRITE); +-TRACE_DEFINE_ENUM(NFSD_FILE_REFERENCED); +- + #define show_nf_flags(val) \ + __print_flags(val, "|", \ + { 1 << NFSD_FILE_HASHED, "HASHED" }, \ + { 1 << NFSD_FILE_PENDING, "PENDING" }, \ +- { 1 << NFSD_FILE_BREAK_READ, "BREAK_READ" }, \ +- { 1 << NFSD_FILE_BREAK_WRITE, "BREAK_WRITE" }, \ + { 1 << NFSD_FILE_REFERENCED, "REFERENCED"}) + + DECLARE_EVENT_CLASS(nfsd_file_class, +diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c +index 738d564ca4ce3..d4adc599737d8 100644 +--- a/fs/nfsd/vfs.c ++++ b/fs/nfsd/vfs.c +@@ -433,6 +433,10 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, + .ia_size = iap->ia_size, + }; + ++ host_err = -EFBIG; ++ if (iap->ia_size < 0) ++ goto out_unlock; ++ + host_err = notify_change(&init_user_ns, dentry, &size_attr, NULL); + if (host_err) + goto out_unlock; +@@ -521,10 +525,11 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos, + { + struct file *src = nf_src->nf_file; + struct file *dst = nf_dst->nf_file; ++ errseq_t since; + loff_t cloned; + __be32 ret = 0; + +- down_write(&nf_dst->nf_rwsem); ++ since = READ_ONCE(dst->f_wb_err); + cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0); + if (cloned < 0) { + ret = nfserrno(cloned); +@@ -538,6 +543,8 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos, + loff_t dst_end = count ? dst_pos + count - 1 : LLONG_MAX; + int status = vfs_fsync_range(dst, dst_pos, dst_end, 0); + ++ if (!status) ++ status = filemap_check_wb_err(dst->f_mapping, since); + if (!status) + status = commit_inode_metadata(file_inode(src)); + if (status < 0) { +@@ -547,13 +554,13 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos, + } + } + out_err: +- up_write(&nf_dst->nf_rwsem); + return ret; + } + + ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, + u64 dst_pos, u64 count) + { ++ ssize_t ret; + + /* + * Limit copy to 4MB to prevent indefinitely blocking an nfsd +@@ -564,7 +571,12 @@ ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, + * limit like this and pipeline multiple COPY requests. + */ + count = min_t(u64, count, 1 << 22); +- return vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0); ++ ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0); ++ ++ if (ret == -EOPNOTSUPP || ret == -EXDEV) ++ ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, ++ COPY_FILE_SPLICE); ++ return ret; + } + + __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp, +@@ -950,6 +962,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, + struct super_block *sb = file_inode(file)->i_sb; + struct svc_export *exp; + struct iov_iter iter; ++ errseq_t since; + __be32 nfserr; + int host_err; + int use_wgather; +@@ -987,21 +1000,22 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, + flags |= RWF_SYNC; + + iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt); ++ since = READ_ONCE(file->f_wb_err); + if (flags & RWF_SYNC) { +- down_write(&nf->nf_rwsem); ++ if (verf) ++ nfsd_copy_boot_verifier(verf, ++ net_generic(SVC_NET(rqstp), ++ nfsd_net_id)); + host_err = vfs_iter_write(file, &iter, &pos, flags); + if (host_err < 0) + nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp), + nfsd_net_id)); +- up_write(&nf->nf_rwsem); + } else { +- down_read(&nf->nf_rwsem); + if (verf) + nfsd_copy_boot_verifier(verf, + net_generic(SVC_NET(rqstp), + nfsd_net_id)); + host_err = vfs_iter_write(file, &iter, &pos, flags); +- up_read(&nf->nf_rwsem); + } + if (host_err < 0) { + nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp), +@@ -1011,6 +1025,9 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, + *cnt = host_err; + nfsd_stats_io_write_add(exp, *cnt); + fsnotify_modify(file); ++ host_err = filemap_check_wb_err(file->f_mapping, since); ++ if (host_err < 0) ++ goto out_nfserr; + + if (stable && use_wgather) { + host_err = wait_for_concurrent_writes(file); +@@ -1091,71 +1108,77 @@ out: + } + + #ifdef CONFIG_NFSD_V3 +-static int +-nfsd_filemap_write_and_wait_range(struct nfsd_file *nf, loff_t offset, +- loff_t end) +-{ +- struct address_space *mapping = nf->nf_file->f_mapping; +- int ret = filemap_fdatawrite_range(mapping, offset, end); +- +- if (ret) +- return ret; +- filemap_fdatawait_range_keep_errors(mapping, offset, end); +- return 0; +-} +- +-/* +- * Commit all pending writes to stable storage. ++/** ++ * nfsd_commit - Commit pending writes to stable storage ++ * @rqstp: RPC request being processed ++ * @fhp: NFS filehandle ++ * @offset: raw offset from beginning of file ++ * @count: raw count of bytes to sync ++ * @verf: filled in with the server's current write verifier + * +- * Note: we only guarantee that data that lies within the range specified +- * by the 'offset' and 'count' parameters will be synced. ++ * Note: we guarantee that data that lies within the range specified ++ * by the 'offset' and 'count' parameters will be synced. The server ++ * is permitted to sync data that lies outside this range at the ++ * same time. + * + * Unfortunately we cannot lock the file to make sure we return full WCC + * data to the client, as locking happens lower down in the filesystem. ++ * ++ * Return values: ++ * An nfsstat value in network byte order. + */ + __be32 +-nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, +- loff_t offset, unsigned long count, __be32 *verf) ++nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, u64 offset, ++ u32 count, __be32 *verf) + { ++ u64 maxbytes; ++ loff_t start, end; ++ struct nfsd_net *nn; + struct nfsd_file *nf; +- loff_t end = LLONG_MAX; +- __be32 err = nfserr_inval; +- +- if (offset < 0) +- goto out; +- if (count != 0) { +- end = offset + (loff_t)count - 1; +- if (end < offset) +- goto out; +- } ++ __be32 err; + + err = nfsd_file_acquire(rqstp, fhp, + NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf); + if (err) + goto out; ++ ++ /* ++ * Convert the client-provided (offset, count) range to a ++ * (start, end) range. If the client-provided range falls ++ * outside the maximum file size of the underlying FS, ++ * clamp the sync range appropriately. ++ */ ++ start = 0; ++ end = LLONG_MAX; ++ maxbytes = (u64)fhp->fh_dentry->d_sb->s_maxbytes; ++ if (offset < maxbytes) { ++ start = offset; ++ if (count && (offset + count - 1 < maxbytes)) ++ end = offset + count - 1; ++ } ++ ++ nn = net_generic(nf->nf_net, nfsd_net_id); + if (EX_ISSYNC(fhp->fh_export)) { +- int err2 = nfsd_filemap_write_and_wait_range(nf, offset, end); ++ errseq_t since = READ_ONCE(nf->nf_file->f_wb_err); ++ int err2; + +- down_write(&nf->nf_rwsem); +- if (!err2) +- err2 = vfs_fsync_range(nf->nf_file, offset, end, 0); ++ err2 = vfs_fsync_range(nf->nf_file, start, end, 0); + switch (err2) { + case 0: +- nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net, +- nfsd_net_id)); ++ nfsd_copy_boot_verifier(verf, nn); ++ err2 = filemap_check_wb_err(nf->nf_file->f_mapping, ++ since); ++ err = nfserrno(err2); + break; + case -EINVAL: + err = nfserr_notsupp; + break; + default: ++ nfsd_reset_boot_verifier(nn); + err = nfserrno(err2); +- nfsd_reset_boot_verifier(net_generic(nf->nf_net, +- nfsd_net_id)); + } +- up_write(&nf->nf_rwsem); + } else +- nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net, +- nfsd_net_id)); ++ nfsd_copy_boot_verifier(verf, nn); + + nfsd_file_put(nf); + out: +diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h +index b21b76e6b9a87..3cf5a8a13da50 100644 +--- a/fs/nfsd/vfs.h ++++ b/fs/nfsd/vfs.h +@@ -73,8 +73,8 @@ __be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *, + char *name, int len, struct iattr *attrs, + struct svc_fh *res, int createmode, + u32 *verifier, bool *truncp, bool *created); +-__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, +- loff_t, unsigned long, __be32 *verf); ++__be32 nfsd_commit(struct svc_rqst *rqst, struct svc_fh *fhp, ++ u64 offset, u32 count, __be32 *verf); + #endif /* CONFIG_NFSD_V3 */ + #ifdef CONFIG_NFSD_V4 + __be32 nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, +diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h +index f45b4bc93f527..863a35f24910a 100644 +--- a/fs/nfsd/xdr.h ++++ b/fs/nfsd/xdr.h +@@ -32,8 +32,8 @@ struct nfsd_readargs { + struct nfsd_writeargs { + svc_fh fh; + __u32 offset; +- int len; +- struct kvec first; ++ __u32 len; ++ struct xdr_buf payload; + }; + + struct nfsd_createargs { +diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h +index 933008382bbeb..712c117300cb7 100644 +--- a/fs/nfsd/xdr3.h ++++ b/fs/nfsd/xdr3.h +@@ -40,7 +40,7 @@ struct nfsd3_writeargs { + __u32 count; + int stable; + __u32 len; +- struct kvec first; ++ struct xdr_buf payload; + }; + + struct nfsd3_createargs { +diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h +index 3e4052e3bd50e..45257666a6888 100644 +--- a/fs/nfsd/xdr4.h ++++ b/fs/nfsd/xdr4.h +@@ -688,9 +688,10 @@ struct nfsd4_compoundargs { + struct svcxdr_tmpbuf *to_free; + struct svc_rqst *rqstp; + +- u32 taglen; + char * tag; ++ u32 taglen; + u32 minorversion; ++ u32 client_opcnt; + u32 opcnt; + struct nfsd4_op *ops; + struct nfsd4_op iops[8]; +diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c +index 4391fd3abd8f8..e00e184b12615 100644 +--- a/fs/nilfs2/btnode.c ++++ b/fs/nilfs2/btnode.c +@@ -20,6 +20,23 @@ + #include "page.h" + #include "btnode.h" + ++ ++/** ++ * nilfs_init_btnc_inode - initialize B-tree node cache inode ++ * @btnc_inode: inode to be initialized ++ * ++ * nilfs_init_btnc_inode() sets up an inode for B-tree node cache. ++ */ ++void nilfs_init_btnc_inode(struct inode *btnc_inode) ++{ ++ struct nilfs_inode_info *ii = NILFS_I(btnc_inode); ++ ++ btnc_inode->i_mode = S_IFREG; ++ ii->i_flags = 0; ++ memset(&ii->i_bmap_data, 0, sizeof(struct nilfs_bmap)); ++ mapping_set_gfp_mask(btnc_inode->i_mapping, GFP_NOFS); ++} ++ + void nilfs_btnode_cache_clear(struct address_space *btnc) + { + invalidate_mapping_pages(btnc, 0, -1); +@@ -29,7 +46,7 @@ void nilfs_btnode_cache_clear(struct address_space *btnc) + struct buffer_head * + nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) + { +- struct inode *inode = NILFS_BTNC_I(btnc); ++ struct inode *inode = btnc->host; + struct buffer_head *bh; + + bh = nilfs_grab_buffer(inode, btnc, blocknr, BIT(BH_NILFS_Node)); +@@ -57,7 +74,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, + struct buffer_head **pbh, sector_t *submit_ptr) + { + struct buffer_head *bh; +- struct inode *inode = NILFS_BTNC_I(btnc); ++ struct inode *inode = btnc->host; + struct page *page; + int err; + +@@ -157,7 +174,7 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc, + struct nilfs_btnode_chkey_ctxt *ctxt) + { + struct buffer_head *obh, *nbh; +- struct inode *inode = NILFS_BTNC_I(btnc); ++ struct inode *inode = btnc->host; + __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey; + int err; + +diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h +index 0f88dbc9bcb3e..05ab64d354dc9 100644 +--- a/fs/nilfs2/btnode.h ++++ b/fs/nilfs2/btnode.h +@@ -30,6 +30,7 @@ struct nilfs_btnode_chkey_ctxt { + struct buffer_head *newbh; + }; + ++void nilfs_init_btnc_inode(struct inode *btnc_inode); + void nilfs_btnode_cache_clear(struct address_space *); + struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc, + __u64 blocknr); +diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c +index ab9ec073330f1..def9121a466ef 100644 +--- a/fs/nilfs2/btree.c ++++ b/fs/nilfs2/btree.c +@@ -58,7 +58,8 @@ static void nilfs_btree_free_path(struct nilfs_btree_path *path) + static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree, + __u64 ptr, struct buffer_head **bhp) + { +- struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache; ++ struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; ++ struct address_space *btnc = btnc_inode->i_mapping; + struct buffer_head *bh; + + bh = nilfs_btnode_create_block(btnc, ptr); +@@ -470,7 +471,8 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, + struct buffer_head **bhp, + const struct nilfs_btree_readahead_info *ra) + { +- struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache; ++ struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; ++ struct address_space *btnc = btnc_inode->i_mapping; + struct buffer_head *bh, *ra_bh; + sector_t submit_ptr = 0; + int ret; +@@ -478,9 +480,18 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, + ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, 0, &bh, + &submit_ptr); + if (ret) { +- if (ret != -EEXIST) +- return ret; +- goto out_check; ++ if (likely(ret == -EEXIST)) ++ goto out_check; ++ if (ret == -ENOENT) { ++ /* ++ * Block address translation failed due to invalid ++ * value of 'ptr'. In this case, return internal code ++ * -EINVAL (broken bmap) to notify bmap layer of fatal ++ * metadata corruption. ++ */ ++ ret = -EINVAL; ++ } ++ return ret; + } + + if (ra) { +@@ -1741,6 +1752,10 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key, + dat = nilfs_bmap_get_dat(btree); + } + ++ ret = nilfs_attach_btree_node_cache(&NILFS_BMAP_I(btree)->vfs_inode); ++ if (ret < 0) ++ return ret; ++ + ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat); + if (ret < 0) + return ret; +@@ -1913,7 +1928,7 @@ static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree, + path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr; + path[level].bp_ctxt.bh = path[level].bp_bh; + ret = nilfs_btnode_prepare_change_key( +- &NILFS_BMAP_I(btree)->i_btnode_cache, ++ NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, + &path[level].bp_ctxt); + if (ret < 0) { + nilfs_dat_abort_update(dat, +@@ -1939,7 +1954,7 @@ static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree, + + if (buffer_nilfs_node(path[level].bp_bh)) { + nilfs_btnode_commit_change_key( +- &NILFS_BMAP_I(btree)->i_btnode_cache, ++ NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, + &path[level].bp_ctxt); + path[level].bp_bh = path[level].bp_ctxt.bh; + } +@@ -1958,7 +1973,7 @@ static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree, + &path[level].bp_newreq.bpr_req); + if (buffer_nilfs_node(path[level].bp_bh)) + nilfs_btnode_abort_change_key( +- &NILFS_BMAP_I(btree)->i_btnode_cache, ++ NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, + &path[level].bp_ctxt); + } + +@@ -2134,7 +2149,8 @@ static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree, + static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree, + struct list_head *listp) + { +- struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache; ++ struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; ++ struct address_space *btcache = btnc_inode->i_mapping; + struct list_head lists[NILFS_BTREE_LEVEL_MAX]; + struct pagevec pvec; + struct buffer_head *bh, *head; +@@ -2188,12 +2204,12 @@ static int nilfs_btree_assign_p(struct nilfs_bmap *btree, + path[level].bp_ctxt.newkey = blocknr; + path[level].bp_ctxt.bh = *bh; + ret = nilfs_btnode_prepare_change_key( +- &NILFS_BMAP_I(btree)->i_btnode_cache, ++ NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, + &path[level].bp_ctxt); + if (ret < 0) + return ret; + nilfs_btnode_commit_change_key( +- &NILFS_BMAP_I(btree)->i_btnode_cache, ++ NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, + &path[level].bp_ctxt); + *bh = path[level].bp_ctxt.bh; + } +@@ -2398,6 +2414,10 @@ int nilfs_btree_init(struct nilfs_bmap *bmap) + + if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode)) + ret = -EIO; ++ else ++ ret = nilfs_attach_btree_node_cache( ++ &NILFS_BMAP_I(bmap)->vfs_inode); ++ + return ret; + } + +diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c +index 8bccdf1158fce..8fedc7104320d 100644 +--- a/fs/nilfs2/dat.c ++++ b/fs/nilfs2/dat.c +@@ -111,6 +111,13 @@ static void nilfs_dat_commit_free(struct inode *dat, + kunmap_atomic(kaddr); + + nilfs_dat_commit_entry(dat, req); ++ ++ if (unlikely(req->pr_desc_bh == NULL || req->pr_bitmap_bh == NULL)) { ++ nilfs_error(dat->i_sb, ++ "state inconsistency probably due to duplicate use of vblocknr = %llu", ++ (unsigned long long)req->pr_entry_nr); ++ return; ++ } + nilfs_palloc_commit_free_entry(dat, req); + } + +@@ -497,7 +504,9 @@ int nilfs_dat_read(struct super_block *sb, size_t entry_size, + di = NILFS_DAT_I(dat); + lockdep_set_class(&di->mi.mi_sem, &dat_lock_key); + nilfs_palloc_setup_cache(dat, &di->palloc_cache); +- nilfs_mdt_setup_shadow_map(dat, &di->shadow); ++ err = nilfs_mdt_setup_shadow_map(dat, &di->shadow); ++ if (err) ++ goto failed; + + err = nilfs_read_inode_common(dat, raw_inode); + if (err) +diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c +index 4483204968568..aadea660c66c9 100644 +--- a/fs/nilfs2/gcinode.c ++++ b/fs/nilfs2/gcinode.c +@@ -126,9 +126,10 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, + int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn, + __u64 vbn, struct buffer_head **out_bh) + { ++ struct inode *btnc_inode = NILFS_I(inode)->i_assoc_inode; + int ret; + +- ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache, ++ ret = nilfs_btnode_submit_block(btnc_inode->i_mapping, + vbn ? : pbn, pbn, REQ_OP_READ, 0, + out_bh, &pbn); + if (ret == -EEXIST) /* internal code (cache hit) */ +@@ -170,7 +171,7 @@ int nilfs_init_gcinode(struct inode *inode) + ii->i_flags = 0; + nilfs_bmap_init_gc(ii->i_bmap); + +- return 0; ++ return nilfs_attach_btree_node_cache(inode); + } + + /** +@@ -185,7 +186,7 @@ void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs) + ii = list_first_entry(head, struct nilfs_inode_info, i_dirty); + list_del_init(&ii->i_dirty); + truncate_inode_pages(&ii->vfs_inode.i_data, 0); +- nilfs_btnode_cache_clear(&ii->i_btnode_cache); ++ nilfs_btnode_cache_clear(ii->i_assoc_inode->i_mapping); + iput(&ii->vfs_inode); + } + } +diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c +index 2e8eb263cf0f6..f4e74fac2c51f 100644 +--- a/fs/nilfs2/inode.c ++++ b/fs/nilfs2/inode.c +@@ -29,12 +29,16 @@ + * @cno: checkpoint number + * @root: pointer on NILFS root object (mounted checkpoint) + * @for_gc: inode for GC flag ++ * @for_btnc: inode for B-tree node cache flag ++ * @for_shadow: inode for shadowed page cache flag + */ + struct nilfs_iget_args { + u64 ino; + __u64 cno; + struct nilfs_root *root; +- int for_gc; ++ bool for_gc; ++ bool for_btnc; ++ bool for_shadow; + }; + + static int nilfs_iget_test(struct inode *inode, void *opaque); +@@ -314,7 +318,8 @@ static int nilfs_insert_inode_locked(struct inode *inode, + unsigned long ino) + { + struct nilfs_iget_args args = { +- .ino = ino, .root = root, .cno = 0, .for_gc = 0 ++ .ino = ino, .root = root, .cno = 0, .for_gc = false, ++ .for_btnc = false, .for_shadow = false + }; + + return insert_inode_locked4(inode, ino, nilfs_iget_test, &args); +@@ -327,6 +332,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) + struct inode *inode; + struct nilfs_inode_info *ii; + struct nilfs_root *root; ++ struct buffer_head *bh; + int err = -ENOMEM; + ino_t ino; + +@@ -342,11 +348,25 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) + ii->i_state = BIT(NILFS_I_NEW); + ii->i_root = root; + +- err = nilfs_ifile_create_inode(root->ifile, &ino, &ii->i_bh); ++ err = nilfs_ifile_create_inode(root->ifile, &ino, &bh); + if (unlikely(err)) + goto failed_ifile_create_inode; + /* reference count of i_bh inherits from nilfs_mdt_read_block() */ + ++ if (unlikely(ino < NILFS_USER_INO)) { ++ nilfs_warn(sb, ++ "inode bitmap is inconsistent for reserved inodes"); ++ do { ++ brelse(bh); ++ err = nilfs_ifile_create_inode(root->ifile, &ino, &bh); ++ if (unlikely(err)) ++ goto failed_ifile_create_inode; ++ } while (ino < NILFS_USER_INO); ++ ++ nilfs_info(sb, "repaired inode bitmap for reserved inodes"); ++ } ++ ii->i_bh = bh; ++ + atomic64_inc(&root->inodes_count); + inode_init_owner(&init_user_ns, inode, dir, mode); + inode->i_ino = ino; +@@ -439,6 +459,8 @@ int nilfs_read_inode_common(struct inode *inode, + inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); + inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); + inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); ++ if (nilfs_is_metadata_file_inode(inode) && !S_ISREG(inode->i_mode)) ++ return -EIO; /* this inode is for metadata and corrupted */ + if (inode->i_nlink == 0) + return -ESTALE; /* this inode is deleted */ + +@@ -527,6 +549,19 @@ static int nilfs_iget_test(struct inode *inode, void *opaque) + return 0; + + ii = NILFS_I(inode); ++ if (test_bit(NILFS_I_BTNC, &ii->i_state)) { ++ if (!args->for_btnc) ++ return 0; ++ } else if (args->for_btnc) { ++ return 0; ++ } ++ if (test_bit(NILFS_I_SHADOW, &ii->i_state)) { ++ if (!args->for_shadow) ++ return 0; ++ } else if (args->for_shadow) { ++ return 0; ++ } ++ + if (!test_bit(NILFS_I_GCINODE, &ii->i_state)) + return !args->for_gc; + +@@ -538,15 +573,17 @@ static int nilfs_iget_set(struct inode *inode, void *opaque) + struct nilfs_iget_args *args = opaque; + + inode->i_ino = args->ino; +- if (args->for_gc) { ++ NILFS_I(inode)->i_cno = args->cno; ++ NILFS_I(inode)->i_root = args->root; ++ if (args->root && args->ino == NILFS_ROOT_INO) ++ nilfs_get_root(args->root); ++ ++ if (args->for_gc) + NILFS_I(inode)->i_state = BIT(NILFS_I_GCINODE); +- NILFS_I(inode)->i_cno = args->cno; +- NILFS_I(inode)->i_root = NULL; +- } else { +- if (args->root && args->ino == NILFS_ROOT_INO) +- nilfs_get_root(args->root); +- NILFS_I(inode)->i_root = args->root; +- } ++ if (args->for_btnc) ++ NILFS_I(inode)->i_state |= BIT(NILFS_I_BTNC); ++ if (args->for_shadow) ++ NILFS_I(inode)->i_state |= BIT(NILFS_I_SHADOW); + return 0; + } + +@@ -554,7 +591,8 @@ struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, + unsigned long ino) + { + struct nilfs_iget_args args = { +- .ino = ino, .root = root, .cno = 0, .for_gc = 0 ++ .ino = ino, .root = root, .cno = 0, .for_gc = false, ++ .for_btnc = false, .for_shadow = false + }; + + return ilookup5(sb, ino, nilfs_iget_test, &args); +@@ -564,7 +602,8 @@ struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, + unsigned long ino) + { + struct nilfs_iget_args args = { +- .ino = ino, .root = root, .cno = 0, .for_gc = 0 ++ .ino = ino, .root = root, .cno = 0, .for_gc = false, ++ .for_btnc = false, .for_shadow = false + }; + + return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); +@@ -595,7 +634,8 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, + __u64 cno) + { + struct nilfs_iget_args args = { +- .ino = ino, .root = NULL, .cno = cno, .for_gc = 1 ++ .ino = ino, .root = NULL, .cno = cno, .for_gc = true, ++ .for_btnc = false, .for_shadow = false + }; + struct inode *inode; + int err; +@@ -615,6 +655,113 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, + return inode; + } + ++/** ++ * nilfs_attach_btree_node_cache - attach a B-tree node cache to the inode ++ * @inode: inode object ++ * ++ * nilfs_attach_btree_node_cache() attaches a B-tree node cache to @inode, ++ * or does nothing if the inode already has it. This function allocates ++ * an additional inode to maintain page cache of B-tree nodes one-on-one. ++ * ++ * Return Value: On success, 0 is returned. On errors, one of the following ++ * negative error code is returned. ++ * ++ * %-ENOMEM - Insufficient memory available. ++ */ ++int nilfs_attach_btree_node_cache(struct inode *inode) ++{ ++ struct nilfs_inode_info *ii = NILFS_I(inode); ++ struct inode *btnc_inode; ++ struct nilfs_iget_args args; ++ ++ if (ii->i_assoc_inode) ++ return 0; ++ ++ args.ino = inode->i_ino; ++ args.root = ii->i_root; ++ args.cno = ii->i_cno; ++ args.for_gc = test_bit(NILFS_I_GCINODE, &ii->i_state) != 0; ++ args.for_btnc = true; ++ args.for_shadow = test_bit(NILFS_I_SHADOW, &ii->i_state) != 0; ++ ++ btnc_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, ++ nilfs_iget_set, &args); ++ if (unlikely(!btnc_inode)) ++ return -ENOMEM; ++ if (btnc_inode->i_state & I_NEW) { ++ nilfs_init_btnc_inode(btnc_inode); ++ unlock_new_inode(btnc_inode); ++ } ++ NILFS_I(btnc_inode)->i_assoc_inode = inode; ++ NILFS_I(btnc_inode)->i_bmap = ii->i_bmap; ++ ii->i_assoc_inode = btnc_inode; ++ ++ return 0; ++} ++ ++/** ++ * nilfs_detach_btree_node_cache - detach the B-tree node cache from the inode ++ * @inode: inode object ++ * ++ * nilfs_detach_btree_node_cache() detaches the B-tree node cache and its ++ * holder inode bound to @inode, or does nothing if @inode doesn't have it. ++ */ ++void nilfs_detach_btree_node_cache(struct inode *inode) ++{ ++ struct nilfs_inode_info *ii = NILFS_I(inode); ++ struct inode *btnc_inode = ii->i_assoc_inode; ++ ++ if (btnc_inode) { ++ NILFS_I(btnc_inode)->i_assoc_inode = NULL; ++ ii->i_assoc_inode = NULL; ++ iput(btnc_inode); ++ } ++} ++ ++/** ++ * nilfs_iget_for_shadow - obtain inode for shadow mapping ++ * @inode: inode object that uses shadow mapping ++ * ++ * nilfs_iget_for_shadow() allocates a pair of inodes that holds page ++ * caches for shadow mapping. The page cache for data pages is set up ++ * in one inode and the one for b-tree node pages is set up in the ++ * other inode, which is attached to the former inode. ++ * ++ * Return Value: On success, a pointer to the inode for data pages is ++ * returned. On errors, one of the following negative error code is returned ++ * in a pointer type. ++ * ++ * %-ENOMEM - Insufficient memory available. ++ */ ++struct inode *nilfs_iget_for_shadow(struct inode *inode) ++{ ++ struct nilfs_iget_args args = { ++ .ino = inode->i_ino, .root = NULL, .cno = 0, .for_gc = false, ++ .for_btnc = false, .for_shadow = true ++ }; ++ struct inode *s_inode; ++ int err; ++ ++ s_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, ++ nilfs_iget_set, &args); ++ if (unlikely(!s_inode)) ++ return ERR_PTR(-ENOMEM); ++ if (!(s_inode->i_state & I_NEW)) ++ return inode; ++ ++ NILFS_I(s_inode)->i_flags = 0; ++ memset(NILFS_I(s_inode)->i_bmap, 0, sizeof(struct nilfs_bmap)); ++ mapping_set_gfp_mask(s_inode->i_mapping, GFP_NOFS); ++ ++ err = nilfs_attach_btree_node_cache(s_inode); ++ if (unlikely(err)) { ++ iget_failed(s_inode); ++ return ERR_PTR(err); ++ } ++ unlock_new_inode(s_inode); ++ return s_inode; ++} ++ + void nilfs_write_inode_common(struct inode *inode, + struct nilfs_inode *raw_inode, int has_bmap) + { +@@ -762,7 +909,8 @@ static void nilfs_clear_inode(struct inode *inode) + if (test_bit(NILFS_I_BMAP, &ii->i_state)) + nilfs_bmap_clear(ii->i_bmap); + +- nilfs_btnode_cache_clear(&ii->i_btnode_cache); ++ if (!test_bit(NILFS_I_BTNC, &ii->i_state)) ++ nilfs_detach_btree_node_cache(inode); + + if (ii->i_root && inode->i_ino == NILFS_ROOT_INO) + nilfs_put_root(ii->i_root); +diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c +index 640ac8fe891e6..a8509f364bf5c 100644 +--- a/fs/nilfs2/ioctl.c ++++ b/fs/nilfs2/ioctl.c +@@ -1114,7 +1114,14 @@ static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp) + + minseg = range[0] + segbytes - 1; + do_div(minseg, segbytes); ++ ++ if (range[1] < 4096) ++ goto out; ++ + maxseg = NILFS_SB2_OFFSET_BYTES(range[1]); ++ if (maxseg < segbytes) ++ goto out; ++ + do_div(maxseg, segbytes); + maxseg--; + +diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c +index 97769fe4d5885..131b5add32eeb 100644 +--- a/fs/nilfs2/mdt.c ++++ b/fs/nilfs2/mdt.c +@@ -470,9 +470,18 @@ int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz) + void nilfs_mdt_clear(struct inode *inode) + { + struct nilfs_mdt_info *mdi = NILFS_MDT(inode); ++ struct nilfs_shadow_map *shadow = mdi->mi_shadow; + + if (mdi->mi_palloc_cache) + nilfs_palloc_destroy_cache(inode); ++ ++ if (shadow) { ++ struct inode *s_inode = shadow->inode; ++ ++ shadow->inode = NULL; ++ iput(s_inode); ++ mdi->mi_shadow = NULL; ++ } + } + + /** +@@ -506,12 +515,15 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode, + struct nilfs_shadow_map *shadow) + { + struct nilfs_mdt_info *mi = NILFS_MDT(inode); ++ struct inode *s_inode; + + INIT_LIST_HEAD(&shadow->frozen_buffers); +- address_space_init_once(&shadow->frozen_data); +- nilfs_mapping_init(&shadow->frozen_data, inode); +- address_space_init_once(&shadow->frozen_btnodes); +- nilfs_mapping_init(&shadow->frozen_btnodes, inode); ++ ++ s_inode = nilfs_iget_for_shadow(inode); ++ if (IS_ERR(s_inode)) ++ return PTR_ERR(s_inode); ++ ++ shadow->inode = s_inode; + mi->mi_shadow = shadow; + return 0; + } +@@ -525,14 +537,15 @@ int nilfs_mdt_save_to_shadow_map(struct inode *inode) + struct nilfs_mdt_info *mi = NILFS_MDT(inode); + struct nilfs_inode_info *ii = NILFS_I(inode); + struct nilfs_shadow_map *shadow = mi->mi_shadow; ++ struct inode *s_inode = shadow->inode; + int ret; + +- ret = nilfs_copy_dirty_pages(&shadow->frozen_data, inode->i_mapping); ++ ret = nilfs_copy_dirty_pages(s_inode->i_mapping, inode->i_mapping); + if (ret) + goto out; + +- ret = nilfs_copy_dirty_pages(&shadow->frozen_btnodes, +- &ii->i_btnode_cache); ++ ret = nilfs_copy_dirty_pages(NILFS_I(s_inode)->i_assoc_inode->i_mapping, ++ ii->i_assoc_inode->i_mapping); + if (ret) + goto out; + +@@ -548,7 +561,7 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh) + struct page *page; + int blkbits = inode->i_blkbits; + +- page = grab_cache_page(&shadow->frozen_data, bh->b_page->index); ++ page = grab_cache_page(shadow->inode->i_mapping, bh->b_page->index); + if (!page) + return -ENOMEM; + +@@ -580,7 +593,7 @@ nilfs_mdt_get_frozen_buffer(struct inode *inode, struct buffer_head *bh) + struct page *page; + int n; + +- page = find_lock_page(&shadow->frozen_data, bh->b_page->index); ++ page = find_lock_page(shadow->inode->i_mapping, bh->b_page->index); + if (page) { + if (page_has_buffers(page)) { + n = bh_offset(bh) >> inode->i_blkbits; +@@ -621,10 +634,11 @@ void nilfs_mdt_restore_from_shadow_map(struct inode *inode) + nilfs_palloc_clear_cache(inode); + + nilfs_clear_dirty_pages(inode->i_mapping, true); +- nilfs_copy_back_pages(inode->i_mapping, &shadow->frozen_data); ++ nilfs_copy_back_pages(inode->i_mapping, shadow->inode->i_mapping); + +- nilfs_clear_dirty_pages(&ii->i_btnode_cache, true); +- nilfs_copy_back_pages(&ii->i_btnode_cache, &shadow->frozen_btnodes); ++ nilfs_clear_dirty_pages(ii->i_assoc_inode->i_mapping, true); ++ nilfs_copy_back_pages(ii->i_assoc_inode->i_mapping, ++ NILFS_I(shadow->inode)->i_assoc_inode->i_mapping); + + nilfs_bmap_restore(ii->i_bmap, &shadow->bmap_store); + +@@ -639,10 +653,11 @@ void nilfs_mdt_clear_shadow_map(struct inode *inode) + { + struct nilfs_mdt_info *mi = NILFS_MDT(inode); + struct nilfs_shadow_map *shadow = mi->mi_shadow; ++ struct inode *shadow_btnc_inode = NILFS_I(shadow->inode)->i_assoc_inode; + + down_write(&mi->mi_sem); + nilfs_release_frozen_buffers(shadow); +- truncate_inode_pages(&shadow->frozen_data, 0); +- truncate_inode_pages(&shadow->frozen_btnodes, 0); ++ truncate_inode_pages(shadow->inode->i_mapping, 0); ++ truncate_inode_pages(shadow_btnc_inode->i_mapping, 0); + up_write(&mi->mi_sem); + } +diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h +index e77aea4bb921c..9d8ac0d27c16e 100644 +--- a/fs/nilfs2/mdt.h ++++ b/fs/nilfs2/mdt.h +@@ -18,14 +18,12 @@ + /** + * struct nilfs_shadow_map - shadow mapping of meta data file + * @bmap_store: shadow copy of bmap state +- * @frozen_data: shadowed dirty data pages +- * @frozen_btnodes: shadowed dirty b-tree nodes' pages ++ * @inode: holder of page caches used in shadow mapping + * @frozen_buffers: list of frozen buffers + */ + struct nilfs_shadow_map { + struct nilfs_bmap_store bmap_store; +- struct address_space frozen_data; +- struct address_space frozen_btnodes; ++ struct inode *inode; + struct list_head frozen_buffers; + }; + +diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h +index 60b21b6eeac06..aceb8aadca148 100644 +--- a/fs/nilfs2/nilfs.h ++++ b/fs/nilfs2/nilfs.h +@@ -28,7 +28,7 @@ + * @i_xattr: <TODO> + * @i_dir_start_lookup: page index of last successful search + * @i_cno: checkpoint number for GC inode +- * @i_btnode_cache: cached pages of b-tree nodes ++ * @i_assoc_inode: associated inode (B-tree node cache holder or back pointer) + * @i_dirty: list for connecting dirty files + * @xattr_sem: semaphore for extended attributes processing + * @i_bh: buffer contains disk inode +@@ -43,7 +43,7 @@ struct nilfs_inode_info { + __u64 i_xattr; /* sector_t ??? */ + __u32 i_dir_start_lookup; + __u64 i_cno; /* check point number for GC inode */ +- struct address_space i_btnode_cache; ++ struct inode *i_assoc_inode; + struct list_head i_dirty; /* List for connecting dirty files */ + + #ifdef CONFIG_NILFS_XATTR +@@ -75,13 +75,6 @@ NILFS_BMAP_I(const struct nilfs_bmap *bmap) + return container_of(bmap, struct nilfs_inode_info, i_bmap_data); + } + +-static inline struct inode *NILFS_BTNC_I(struct address_space *btnc) +-{ +- struct nilfs_inode_info *ii = +- container_of(btnc, struct nilfs_inode_info, i_btnode_cache); +- return &ii->vfs_inode; +-} +- + /* + * Dynamic state flags of NILFS on-memory inode (i_state) + */ +@@ -98,6 +91,8 @@ enum { + NILFS_I_INODE_SYNC, /* dsync is not allowed for inode */ + NILFS_I_BMAP, /* has bmap and btnode_cache */ + NILFS_I_GCINODE, /* inode for GC, on memory only */ ++ NILFS_I_BTNC, /* inode for btree node cache */ ++ NILFS_I_SHADOW, /* inode for shadowed page cache */ + }; + + /* +@@ -203,6 +198,9 @@ static inline int nilfs_acl_chmod(struct inode *inode) + + static inline int nilfs_init_acl(struct inode *inode, struct inode *dir) + { ++ if (S_ISLNK(inode->i_mode)) ++ return 0; ++ + inode->i_mode &= ~current_umask(); + return 0; + } +@@ -267,6 +265,9 @@ struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root, + unsigned long ino); + extern struct inode *nilfs_iget_for_gc(struct super_block *sb, + unsigned long ino, __u64 cno); ++int nilfs_attach_btree_node_cache(struct inode *inode); ++void nilfs_detach_btree_node_cache(struct inode *inode); ++struct inode *nilfs_iget_for_shadow(struct inode *inode); + extern void nilfs_update_inode(struct inode *, struct buffer_head *, int); + extern void nilfs_truncate(struct inode *); + extern void nilfs_evict_inode(struct inode *); +diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c +index 171fb5cd427fd..d1a148f0cae33 100644 +--- a/fs/nilfs2/page.c ++++ b/fs/nilfs2/page.c +@@ -448,10 +448,9 @@ void nilfs_mapping_init(struct address_space *mapping, struct inode *inode) + /* + * NILFS2 needs clear_page_dirty() in the following two cases: + * +- * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears +- * page dirty flags when it copies back pages from the shadow cache +- * (gcdat->{i_mapping,i_btnode_cache}) to its original cache +- * (dat->{i_mapping,i_btnode_cache}). ++ * 1) For B-tree node pages and data pages of DAT file, NILFS2 clears dirty ++ * flag of pages when it copies back pages from shadow cache to the ++ * original cache. + * + * 2) Some B-tree operations like insertion or deletion may dispose buffers + * in dirty state, and this needs to cancel the dirty state of their pages. +diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c +index 686c8ee7b29ce..6d21f9bc6de1c 100644 +--- a/fs/nilfs2/segment.c ++++ b/fs/nilfs2/segment.c +@@ -317,7 +317,7 @@ void nilfs_relax_pressure_in_lock(struct super_block *sb) + struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_sc_info *sci = nilfs->ns_writer; + +- if (!sci || !sci->sc_flush_request) ++ if (sb_rdonly(sb) || unlikely(!sci) || !sci->sc_flush_request) + return; + + set_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags); +@@ -733,15 +733,18 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode, + struct list_head *listp) + { + struct nilfs_inode_info *ii = NILFS_I(inode); +- struct address_space *mapping = &ii->i_btnode_cache; ++ struct inode *btnc_inode = ii->i_assoc_inode; + struct pagevec pvec; + struct buffer_head *bh, *head; + unsigned int i; + pgoff_t index = 0; + ++ if (!btnc_inode) ++ return; ++ + pagevec_init(&pvec); + +- while (pagevec_lookup_tag(&pvec, mapping, &index, ++ while (pagevec_lookup_tag(&pvec, btnc_inode->i_mapping, &index, + PAGECACHE_TAG_DIRTY)) { + for (i = 0; i < pagevec_count(&pvec); i++) { + bh = head = page_buffers(pvec.pages[i]); +@@ -872,9 +875,11 @@ static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci) + nilfs_mdt_mark_dirty(nilfs->ns_cpfile); + nilfs_cpfile_put_checkpoint( + nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); +- } else +- WARN_ON(err == -EINVAL || err == -ENOENT); +- ++ } else if (err == -EINVAL || err == -ENOENT) { ++ nilfs_error(sci->sc_super, ++ "checkpoint creation failed due to metadata corruption."); ++ err = -EIO; ++ } + return err; + } + +@@ -888,7 +893,11 @@ static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) + err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 0, + &raw_cp, &bh_cp); + if (unlikely(err)) { +- WARN_ON(err == -EINVAL || err == -ENOENT); ++ if (err == -EINVAL || err == -ENOENT) { ++ nilfs_error(sci->sc_super, ++ "checkpoint finalization failed due to metadata corruption."); ++ err = -EIO; ++ } + goto failed_ibh; + } + raw_cp->cp_snapshot_list.ssl_next = 0; +@@ -2234,7 +2243,7 @@ int nilfs_construct_segment(struct super_block *sb) + struct nilfs_transaction_info *ti; + int err; + +- if (!sci) ++ if (sb_rdonly(sb) || unlikely(!sci)) + return -EROFS; + + /* A call inside transactions causes a deadlock. */ +@@ -2273,7 +2282,7 @@ int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode, + struct nilfs_transaction_info ti; + int err = 0; + +- if (!sci) ++ if (sb_rdonly(sb) || unlikely(!sci)) + return -EROFS; + + nilfs_transaction_lock(sb, &ti, 0); +@@ -2410,7 +2419,7 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head) + continue; + list_del_init(&ii->i_dirty); + truncate_inode_pages(&ii->vfs_inode.i_data, 0); +- nilfs_btnode_cache_clear(&ii->i_btnode_cache); ++ nilfs_btnode_cache_clear(ii->i_assoc_inode->i_mapping); + iput(&ii->vfs_inode); + } + } +@@ -2769,11 +2778,12 @@ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root) + + if (nilfs->ns_writer) { + /* +- * This happens if the filesystem was remounted +- * read/write after nilfs_error degenerated it into a +- * read-only mount. ++ * This happens if the filesystem is made read-only by ++ * __nilfs_error or nilfs_remount and then remounted ++ * read/write. In these cases, reuse the existing ++ * writer. + */ +- nilfs_detach_log_writer(sb); ++ return 0; + } + + nilfs->ns_writer = nilfs_segctor_new(sb, root); +@@ -2783,10 +2793,9 @@ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root) + inode_attach_wb(nilfs->ns_bdev->bd_inode, NULL); + + err = nilfs_segctor_start_thread(nilfs->ns_writer); +- if (err) { +- kfree(nilfs->ns_writer); +- nilfs->ns_writer = NULL; +- } ++ if (unlikely(err)) ++ nilfs_detach_log_writer(sb); ++ + return err; + } + +diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c +index 63722475e17e1..51f4cb060231f 100644 +--- a/fs/nilfs2/sufile.c ++++ b/fs/nilfs2/sufile.c +@@ -495,14 +495,22 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, + int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum) + { + struct buffer_head *bh; ++ void *kaddr; ++ struct nilfs_segment_usage *su; + int ret; + ++ down_write(&NILFS_MDT(sufile)->mi_sem); + ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); + if (!ret) { + mark_buffer_dirty(bh); + nilfs_mdt_mark_dirty(sufile); ++ kaddr = kmap_atomic(bh->b_page); ++ su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); ++ nilfs_segment_usage_set_dirty(su); ++ kunmap_atomic(kaddr); + brelse(bh); + } ++ up_write(&NILFS_MDT(sufile)->mi_sem); + return ret; + } + +diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c +index f6b2d280aab5a..bc96900262254 100644 +--- a/fs/nilfs2/super.c ++++ b/fs/nilfs2/super.c +@@ -157,7 +157,8 @@ struct inode *nilfs_alloc_inode(struct super_block *sb) + ii->i_bh = NULL; + ii->i_state = 0; + ii->i_cno = 0; +- nilfs_mapping_init(&ii->i_btnode_cache, &ii->vfs_inode); ++ ii->i_assoc_inode = NULL; ++ ii->i_bmap = &ii->i_bmap_data; + return &ii->vfs_inode; + } + +@@ -407,6 +408,15 @@ int nilfs_resize_fs(struct super_block *sb, __u64 newsize) + if (newsize > devsize) + goto out; + ++ /* ++ * Prevent underflow in second superblock position calculation. ++ * The exact minimum size check is done in nilfs_sufile_resize(). ++ */ ++ if (newsize < 4096) { ++ ret = -ENOSPC; ++ goto out; ++ } ++ + /* + * Write lock is required to protect some functions depending + * on the number of segments, the number of reserved segments, +@@ -1132,8 +1142,6 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) + if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb)) + goto out; + if (*flags & SB_RDONLY) { +- /* Shutting down log writer */ +- nilfs_detach_log_writer(sb); + sb->s_flags |= SB_RDONLY; + + /* +@@ -1377,8 +1385,6 @@ static void nilfs_inode_init_once(void *obj) + #ifdef CONFIG_NILFS_XATTR + init_rwsem(&ii->xattr_sem); + #endif +- address_space_init_once(&ii->i_btnode_cache); +- ii->i_bmap = &ii->i_bmap_data; + inode_init_once(&ii->vfs_inode); + } + +diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c +index c8bfc01da5d71..1068ff40077ca 100644 +--- a/fs/nilfs2/the_nilfs.c ++++ b/fs/nilfs2/the_nilfs.c +@@ -13,6 +13,7 @@ + #include <linux/blkdev.h> + #include <linux/backing-dev.h> + #include <linux/random.h> ++#include <linux/log2.h> + #include <linux/crc32.h> + #include "nilfs.h" + #include "segment.h" +@@ -192,6 +193,34 @@ static int nilfs_store_log_cursor(struct the_nilfs *nilfs, + return ret; + } + ++/** ++ * nilfs_get_blocksize - get block size from raw superblock data ++ * @sb: super block instance ++ * @sbp: superblock raw data buffer ++ * @blocksize: place to store block size ++ * ++ * nilfs_get_blocksize() calculates the block size from the block size ++ * exponent information written in @sbp and stores it in @blocksize, ++ * or aborts with an error message if it's too large. ++ * ++ * Return Value: On success, 0 is returned. If the block size is too ++ * large, -EINVAL is returned. ++ */ ++static int nilfs_get_blocksize(struct super_block *sb, ++ struct nilfs_super_block *sbp, int *blocksize) ++{ ++ unsigned int shift_bits = le32_to_cpu(sbp->s_log_block_size); ++ ++ if (unlikely(shift_bits > ++ ilog2(NILFS_MAX_BLOCK_SIZE) - BLOCK_SIZE_BITS)) { ++ nilfs_err(sb, "too large filesystem blocksize: 2 ^ %u KiB", ++ shift_bits); ++ return -EINVAL; ++ } ++ *blocksize = BLOCK_SIZE << shift_bits; ++ return 0; ++} ++ + /** + * load_nilfs - load and recover the nilfs + * @nilfs: the_nilfs structure to be released +@@ -245,11 +274,15 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) + nilfs->ns_sbwtime = le64_to_cpu(sbp[0]->s_wtime); + + /* verify consistency between two super blocks */ +- blocksize = BLOCK_SIZE << le32_to_cpu(sbp[0]->s_log_block_size); ++ err = nilfs_get_blocksize(sb, sbp[0], &blocksize); ++ if (err) ++ goto scan_error; ++ + if (blocksize != nilfs->ns_blocksize) { + nilfs_warn(sb, + "blocksize differs between two super blocks (%d != %d)", + blocksize, nilfs->ns_blocksize); ++ err = -EINVAL; + goto scan_error; + } + +@@ -443,11 +476,33 @@ static int nilfs_valid_sb(struct nilfs_super_block *sbp) + return crc == le32_to_cpu(sbp->s_sum); + } + +-static int nilfs_sb2_bad_offset(struct nilfs_super_block *sbp, u64 offset) ++/** ++ * nilfs_sb2_bad_offset - check the location of the second superblock ++ * @sbp: superblock raw data buffer ++ * @offset: byte offset of second superblock calculated from device size ++ * ++ * nilfs_sb2_bad_offset() checks if the position on the second ++ * superblock is valid or not based on the filesystem parameters ++ * stored in @sbp. If @offset points to a location within the segment ++ * area, or if the parameters themselves are not normal, it is ++ * determined to be invalid. ++ * ++ * Return Value: true if invalid, false if valid. ++ */ ++static bool nilfs_sb2_bad_offset(struct nilfs_super_block *sbp, u64 offset) + { +- return offset < ((le64_to_cpu(sbp->s_nsegments) * +- le32_to_cpu(sbp->s_blocks_per_segment)) << +- (le32_to_cpu(sbp->s_log_block_size) + 10)); ++ unsigned int shift_bits = le32_to_cpu(sbp->s_log_block_size); ++ u32 blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment); ++ u64 nsegments = le64_to_cpu(sbp->s_nsegments); ++ u64 index; ++ ++ if (blocks_per_segment < NILFS_SEG_MIN_BLOCKS || ++ shift_bits > ilog2(NILFS_MAX_BLOCK_SIZE) - BLOCK_SIZE_BITS) ++ return true; ++ ++ index = offset >> (shift_bits + BLOCK_SIZE_BITS); ++ do_div(index, blocks_per_segment); ++ return index < nsegments; + } + + static void nilfs_release_super_block(struct the_nilfs *nilfs) +@@ -489,9 +544,15 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, + { + struct nilfs_super_block **sbp = nilfs->ns_sbp; + struct buffer_head **sbh = nilfs->ns_sbh; +- u64 sb2off = NILFS_SB2_OFFSET_BYTES(nilfs->ns_bdev->bd_inode->i_size); ++ u64 sb2off, devsize = nilfs->ns_bdev->bd_inode->i_size; + int valid[2], swp = 0; + ++ if (devsize < NILFS_SEG_MIN_BLOCKS * NILFS_MIN_BLOCK_SIZE + 4096) { ++ nilfs_err(sb, "device size too small"); ++ return -EINVAL; ++ } ++ sb2off = NILFS_SB2_OFFSET_BYTES(devsize); ++ + sbp[0] = nilfs_read_super_block(sb, NILFS_SB_OFFSET_BYTES, blocksize, + &sbh[0]); + sbp[1] = nilfs_read_super_block(sb, sb2off, blocksize, &sbh[1]); +@@ -586,9 +647,11 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) + if (err) + goto failed_sbh; + +- blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size); +- if (blocksize < NILFS_MIN_BLOCK_SIZE || +- blocksize > NILFS_MAX_BLOCK_SIZE) { ++ err = nilfs_get_blocksize(sb, sbp, &blocksize); ++ if (err) ++ goto failed_sbh; ++ ++ if (blocksize < NILFS_MIN_BLOCK_SIZE) { + nilfs_err(sb, + "couldn't mount because of unsupported filesystem blocksize %d", + blocksize); +@@ -690,9 +753,7 @@ int nilfs_count_free_blocks(struct the_nilfs *nilfs, sector_t *nblocks) + { + unsigned long ncleansegs; + +- down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); + ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile); +- up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); + *nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment; + return 0; + } +diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c +index 6facdf476255d..84ec851211d91 100644 +--- a/fs/notify/fanotify/fanotify_user.c ++++ b/fs/notify/fanotify/fanotify_user.c +@@ -611,9 +611,6 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, + if (fanotify_is_perm_event(event->mask)) + FANOTIFY_PERM(event)->fd = fd; + +- if (f) +- fd_install(fd, f); +- + if (info_mode) { + ret = copy_info_records_to_user(event, info, info_mode, pidfd, + buf, count); +@@ -621,6 +618,9 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, + goto out_close_fd; + } + ++ if (f) ++ fd_install(fd, f); ++ + return metadata.event_len; + + out_close_fd: +diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c +index 57f0d5d9f934e..3451708fd035c 100644 +--- a/fs/notify/fdinfo.c ++++ b/fs/notify/fdinfo.c +@@ -83,16 +83,9 @@ static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) + inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark); + inode = igrab(fsnotify_conn_inode(mark->connector)); + if (inode) { +- /* +- * IN_ALL_EVENTS represents all of the mask bits +- * that we expose to userspace. There is at +- * least one bit (FS_EVENT_ON_CHILD) which is +- * used only internally to the kernel. +- */ +- u32 mask = mark->mask & IN_ALL_EVENTS; +- seq_printf(m, "inotify wd:%x ino:%lx sdev:%x mask:%x ignored_mask:%x ", ++ seq_printf(m, "inotify wd:%x ino:%lx sdev:%x mask:%x ignored_mask:0 ", + inode_mark->wd, inode->i_ino, inode->i_sb->s_dev, +- mask, mark->ignored_mask); ++ inotify_mark_user_mask(mark)); + show_mark_fhandle(m, inode); + seq_putc(m, '\n'); + iput(inode); +diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h +index 2007e37119160..8f00151eb731f 100644 +--- a/fs/notify/inotify/inotify.h ++++ b/fs/notify/inotify/inotify.h +@@ -22,6 +22,18 @@ static inline struct inotify_event_info *INOTIFY_E(struct fsnotify_event *fse) + return container_of(fse, struct inotify_event_info, fse); + } + ++/* ++ * INOTIFY_USER_FLAGS represents all of the mask bits that we expose to ++ * userspace. There is at least one bit (FS_EVENT_ON_CHILD) which is ++ * used only internally to the kernel. ++ */ ++#define INOTIFY_USER_MASK (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK) ++ ++static inline __u32 inotify_mark_user_mask(struct fsnotify_mark *fsn_mark) ++{ ++ return fsn_mark->mask & INOTIFY_USER_MASK; ++} ++ + extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, + struct fsnotify_group *group); + extern int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, +diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c +index 62051247f6d21..9fb7701d2f8a0 100644 +--- a/fs/notify/inotify/inotify_user.c ++++ b/fs/notify/inotify/inotify_user.c +@@ -102,7 +102,7 @@ static inline __u32 inotify_arg_to_mask(struct inode *inode, u32 arg) + mask |= FS_EVENT_ON_CHILD; + + /* mask off the flags used to open the fd */ +- mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK)); ++ mask |= (arg & INOTIFY_USER_MASK); + + return mask; + } +diff --git a/fs/notify/mark.c b/fs/notify/mark.c +index fa1d99101f895..bea106fac0901 100644 +--- a/fs/notify/mark.c ++++ b/fs/notify/mark.c +@@ -452,7 +452,7 @@ void fsnotify_free_mark(struct fsnotify_mark *mark) + void fsnotify_destroy_mark(struct fsnotify_mark *mark, + struct fsnotify_group *group) + { +- mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); ++ mutex_lock(&group->mark_mutex); + fsnotify_detach_mark(mark); + mutex_unlock(&group->mark_mutex); + fsnotify_free_mark(mark); +@@ -767,7 +767,7 @@ void fsnotify_clear_marks_by_group(struct fsnotify_group *group, + * move marks to free to to_free list in one go and then free marks in + * to_free list one by one. + */ +- mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); ++ mutex_lock(&group->mark_mutex); + list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) { + if ((1U << mark->connector->type) & type_mask) + list_move(&mark->g_list, &to_free); +@@ -776,7 +776,7 @@ void fsnotify_clear_marks_by_group(struct fsnotify_group *group, + + clear: + while (1) { +- mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); ++ mutex_lock(&group->mark_mutex); + if (list_empty(head)) { + mutex_unlock(&group->mark_mutex); + break; +diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c +index d563abc3e1364..c0881d39d36a9 100644 +--- a/fs/ntfs/attrib.c ++++ b/fs/ntfs/attrib.c +@@ -592,15 +592,39 @@ static int ntfs_attr_find(const ATTR_TYPE type, const ntfschar *name, + a = (ATTR_RECORD*)((u8*)ctx->attr + + le32_to_cpu(ctx->attr->length)); + for (;; a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))) { +- if ((u8*)a < (u8*)ctx->mrec || (u8*)a > (u8*)ctx->mrec + +- le32_to_cpu(ctx->mrec->bytes_allocated)) ++ u8 *mrec_end = (u8 *)ctx->mrec + ++ le32_to_cpu(ctx->mrec->bytes_allocated); ++ u8 *name_end; ++ ++ /* check whether ATTR_RECORD wrap */ ++ if ((u8 *)a < (u8 *)ctx->mrec) ++ break; ++ ++ /* check whether Attribute Record Header is within bounds */ ++ if ((u8 *)a > mrec_end || ++ (u8 *)a + sizeof(ATTR_RECORD) > mrec_end) ++ break; ++ ++ /* check whether ATTR_RECORD's name is within bounds */ ++ name_end = (u8 *)a + le16_to_cpu(a->name_offset) + ++ a->name_length * sizeof(ntfschar); ++ if (name_end > mrec_end) + break; ++ + ctx->attr = a; + if (unlikely(le32_to_cpu(a->type) > le32_to_cpu(type) || + a->type == AT_END)) + return -ENOENT; + if (unlikely(!a->length)) + break; ++ ++ /* check whether ATTR_RECORD's length wrap */ ++ if ((u8 *)a + le32_to_cpu(a->length) < (u8 *)a) ++ break; ++ /* check whether ATTR_RECORD's length is within bounds */ ++ if ((u8 *)a + le32_to_cpu(a->length) > mrec_end) ++ break; ++ + if (a->type != type) + continue; + /* +diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c +index ab4f3362466d0..a43adeacd930c 100644 +--- a/fs/ntfs/file.c ++++ b/fs/ntfs/file.c +@@ -1829,7 +1829,7 @@ again: + * pages being swapped out between us bringing them into memory + * and doing the actual copying. + */ +- if (unlikely(iov_iter_fault_in_readable(i, bytes))) { ++ if (unlikely(fault_in_iov_iter_readable(i, bytes))) { + status = -EFAULT; + break; + } +diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c +index 4474adb393ca8..dc4aefd024b54 100644 +--- a/fs/ntfs/inode.c ++++ b/fs/ntfs/inode.c +@@ -1829,6 +1829,13 @@ int ntfs_read_inode_mount(struct inode *vi) + goto err_out; + } + ++ /* Sanity check offset to the first attribute */ ++ if (le16_to_cpu(m->attrs_offset) >= le32_to_cpu(m->bytes_allocated)) { ++ ntfs_error(sb, "Incorrect mft offset to the first attribute %u in superblock.", ++ le16_to_cpu(m->attrs_offset)); ++ goto err_out; ++ } ++ + /* Need this to sanity check attribute list references to $MFT. */ + vi->i_generation = ni->seq_no = le16_to_cpu(m->sequence_number); + +@@ -1881,6 +1888,10 @@ int ntfs_read_inode_mount(struct inode *vi) + } + /* Now allocate memory for the attribute list. */ + ni->attr_list_size = (u32)ntfs_attr_size(a); ++ if (!ni->attr_list_size) { ++ ntfs_error(sb, "Attr_list_size is zero"); ++ goto put_err_out; ++ } + ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size); + if (!ni->attr_list) { + ntfs_error(sb, "Not enough memory to allocate buffer " +diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c +index 0d7e948cb29c9..7f69422d5191d 100644 +--- a/fs/ntfs/super.c ++++ b/fs/ntfs/super.c +@@ -2092,7 +2092,8 @@ get_ctx_vol_failed: + // TODO: Initialize security. + /* Get the extended system files' directory inode. */ + vol->extend_ino = ntfs_iget(sb, FILE_Extend); +- if (IS_ERR(vol->extend_ino) || is_bad_inode(vol->extend_ino)) { ++ if (IS_ERR(vol->extend_ino) || is_bad_inode(vol->extend_ino) || ++ !S_ISDIR(vol->extend_ino->i_mode)) { + if (!IS_ERR(vol->extend_ino)) + iput(vol->extend_ino); + ntfs_error(sb, "Failed to load $Extend."); +diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c +index e8c00dda42adb..321d55b3ca17d 100644 +--- a/fs/ntfs3/attrib.c ++++ b/fs/ntfs3/attrib.c +@@ -101,6 +101,10 @@ int attr_load_runs(struct ATTRIB *attr, struct ntfs_inode *ni, + + asize = le32_to_cpu(attr->size); + run_off = le16_to_cpu(attr->nres.run_off); ++ ++ if (run_off > asize) ++ return -EINVAL; ++ + err = run_unpack_ex(run, ni->mi.sbi, ni->mi.rno, svcn, evcn, + vcn ? *vcn : svcn, Add2Ptr(attr, run_off), + asize - run_off); +@@ -1142,6 +1146,11 @@ int attr_load_runs_vcn(struct ntfs_inode *ni, enum ATTR_TYPE type, + CLST svcn, evcn; + u16 ro; + ++ if (!ni) { ++ /* Is record corrupted? */ ++ return -ENOENT; ++ } ++ + attr = ni_find_attr(ni, NULL, NULL, type, name, name_len, &vcn, NULL); + if (!attr) { + /* Is record corrupted? */ +@@ -1157,6 +1166,10 @@ int attr_load_runs_vcn(struct ntfs_inode *ni, enum ATTR_TYPE type, + } + + ro = le16_to_cpu(attr->nres.run_off); ++ ++ if (ro > le32_to_cpu(attr->size)) ++ return -EINVAL; ++ + err = run_unpack_ex(run, ni->mi.sbi, ni->mi.rno, svcn, evcn, svcn, + Add2Ptr(attr, ro), le32_to_cpu(attr->size) - ro); + if (err < 0) +@@ -1832,6 +1845,11 @@ int attr_collapse_range(struct ntfs_inode *ni, u64 vbo, u64 bytes) + u16 le_sz; + u16 roff = le16_to_cpu(attr->nres.run_off); + ++ if (roff > le32_to_cpu(attr->size)) { ++ err = -EINVAL; ++ goto out; ++ } ++ + run_unpack_ex(RUN_DEALLOCATE, sbi, ni->mi.rno, svcn, + evcn1 - 1, svcn, Add2Ptr(attr, roff), + le32_to_cpu(attr->size) - roff); +@@ -1949,7 +1967,7 @@ int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes, u32 *frame_size) + return -ENOENT; + + if (!attr_b->non_res) { +- u32 data_size = le32_to_cpu(attr->res.data_size); ++ u32 data_size = le32_to_cpu(attr_b->res.data_size); + u32 from, to; + + if (vbo > data_size) +diff --git a/fs/ntfs3/attrlist.c b/fs/ntfs3/attrlist.c +index bad6d8a849a24..c0c6bcbc8c05c 100644 +--- a/fs/ntfs3/attrlist.c ++++ b/fs/ntfs3/attrlist.c +@@ -68,6 +68,11 @@ int ntfs_load_attr_list(struct ntfs_inode *ni, struct ATTRIB *attr) + + run_init(&ni->attr_list.run); + ++ if (run_off > le32_to_cpu(attr->size)) { ++ err = -EINVAL; ++ goto out; ++ } ++ + err = run_unpack_ex(&ni->attr_list.run, ni->mi.sbi, ni->mi.rno, + 0, le64_to_cpu(attr->nres.evcn), 0, + Add2Ptr(attr, run_off), +diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c +index aa184407520f0..2a63793f522d4 100644 +--- a/fs/ntfs3/bitmap.c ++++ b/fs/ntfs3/bitmap.c +@@ -666,7 +666,7 @@ int wnd_init(struct wnd_bitmap *wnd, struct super_block *sb, size_t nbits) + if (!wnd->bits_last) + wnd->bits_last = wbits; + +- wnd->free_bits = kcalloc(wnd->nwnd, sizeof(u16), GFP_NOFS); ++ wnd->free_bits = kcalloc(wnd->nwnd, sizeof(u16), GFP_NOFS | __GFP_NOWARN); + if (!wnd->free_bits) + return -ENOMEM; + +@@ -1432,7 +1432,7 @@ int ntfs_trim_fs(struct ntfs_sb_info *sbi, struct fstrim_range *range) + + down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS); + +- for (; iw < wnd->nbits; iw++, wbit = 0) { ++ for (; iw < wnd->nwnd; iw++, wbit = 0) { + CLST lcn_wnd = iw * wbits; + struct buffer_head *bh; + +diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c +index 43b1451bff539..c526e0427f2bf 100644 +--- a/fs/ntfs3/file.c ++++ b/fs/ntfs3/file.c +@@ -488,13 +488,13 @@ static int ntfs_truncate(struct inode *inode, loff_t new_size) + + new_valid = ntfs_up_block(sb, min_t(u64, ni->i_valid, new_size)); + +- ni_lock(ni); +- + truncate_setsize(inode, new_size); + ++ ni_lock(ni); ++ + down_write(&ni->file.run_lock); + err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size, +- &new_valid, true, NULL); ++ &new_valid, ni->mi.sbi->options->prealloc, NULL); + up_write(&ni->file.run_lock); + + if (new_valid < ni->i_valid) +@@ -661,7 +661,13 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) + /* + * Normal file: Allocate clusters, do not change 'valid' size. + */ +- err = ntfs_set_size(inode, max(end, i_size)); ++ loff_t new_size = max(end, i_size); ++ ++ err = inode_newsize_ok(inode, new_size); ++ if (err) ++ goto out; ++ ++ err = ntfs_set_size(inode, new_size); + if (err) + goto out; + +@@ -761,7 +767,7 @@ int ntfs3_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, + } + inode_dio_wait(inode); + +- if (attr->ia_size < oldsize) ++ if (attr->ia_size <= oldsize) + err = ntfs_truncate(inode, attr->ia_size); + else if (attr->ia_size > oldsize) + err = ntfs_extend(inode, attr->ia_size, 0, NULL); +@@ -989,7 +995,7 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) + frame_vbo = pos & ~(frame_size - 1); + index = frame_vbo >> PAGE_SHIFT; + +- if (unlikely(iov_iter_fault_in_readable(from, bytes))) { ++ if (unlikely(fault_in_iov_iter_readable(from, bytes))) { + err = -EFAULT; + goto out; + } +diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c +index 6f47a9c17f896..cdeb0b51f0ba8 100644 +--- a/fs/ntfs3/frecord.c ++++ b/fs/ntfs3/frecord.c +@@ -567,6 +567,12 @@ static int ni_repack(struct ntfs_inode *ni) + } + + roff = le16_to_cpu(attr->nres.run_off); ++ ++ if (roff > le32_to_cpu(attr->size)) { ++ err = -EINVAL; ++ break; ++ } ++ + err = run_unpack(&run, sbi, ni->mi.rno, svcn, evcn, svcn, + Add2Ptr(attr, roff), + le32_to_cpu(attr->size) - roff); +@@ -1541,6 +1547,9 @@ int ni_delete_all(struct ntfs_inode *ni) + asize = le32_to_cpu(attr->size); + roff = le16_to_cpu(attr->nres.run_off); + ++ if (roff > asize) ++ return -EINVAL; ++ + /* run==1 means unpack and deallocate. */ + run_unpack_ex(RUN_DEALLOCATE, sbi, ni->mi.rno, svcn, evcn, svcn, + Add2Ptr(attr, roff), asize - roff); +@@ -1964,10 +1973,8 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, + + vcn += clen; + +- if (vbo + bytes >= end) { ++ if (vbo + bytes >= end) + bytes = end - vbo; +- flags |= FIEMAP_EXTENT_LAST; +- } + + if (vbo + bytes <= valid) { + ; +@@ -1977,6 +1984,9 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, + /* vbo < valid && valid < vbo + bytes */ + u64 dlen = valid - vbo; + ++ if (vbo + dlen >= end) ++ flags |= FIEMAP_EXTENT_LAST; ++ + err = fiemap_fill_next_extent(fieinfo, vbo, lbo, dlen, + flags); + if (err < 0) +@@ -1995,6 +2005,9 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, + flags |= FIEMAP_EXTENT_UNWRITTEN; + } + ++ if (vbo + bytes >= end) ++ flags |= FIEMAP_EXTENT_LAST; ++ + err = fiemap_fill_next_extent(fieinfo, vbo, lbo, bytes, flags); + if (err < 0) + break; +@@ -2238,6 +2251,11 @@ remove_wof: + asize = le32_to_cpu(attr->size); + roff = le16_to_cpu(attr->nres.run_off); + ++ if (roff > asize) { ++ err = -EINVAL; ++ goto out; ++ } ++ + /*run==1 Means unpack and deallocate. */ + run_unpack_ex(RUN_DEALLOCATE, sbi, ni->mi.rno, svcn, evcn, svcn, + Add2Ptr(attr, roff), asize - roff); +diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c +index 06492f088d602..20abdb2682860 100644 +--- a/fs/ntfs3/fslog.c ++++ b/fs/ntfs3/fslog.c +@@ -1132,7 +1132,7 @@ static int read_log_page(struct ntfs_log *log, u32 vbo, + return -EINVAL; + + if (!*buffer) { +- to_free = kmalloc(bytes, GFP_NOFS); ++ to_free = kmalloc(log->page_size, GFP_NOFS); + if (!to_free) + return -ENOMEM; + *buffer = to_free; +@@ -1180,12 +1180,7 @@ static int log_read_rst(struct ntfs_log *log, u32 l_size, bool first, + struct restart_info *info) + { + u32 skip, vbo; +- struct RESTART_HDR *r_page = kmalloc(DefaultLogPageSize, GFP_NOFS); +- +- if (!r_page) +- return -ENOMEM; +- +- memset(info, 0, sizeof(struct restart_info)); ++ struct RESTART_HDR *r_page = NULL; + + /* Determine which restart area we are looking for. */ + if (first) { +@@ -1199,7 +1194,6 @@ static int log_read_rst(struct ntfs_log *log, u32 l_size, bool first, + /* Loop continuously until we succeed. */ + for (; vbo < l_size; vbo = 2 * vbo + skip, skip = 0) { + bool usa_error; +- u32 sys_page_size; + bool brst, bchk; + struct RESTART_AREA *ra; + +@@ -1253,24 +1247,6 @@ static int log_read_rst(struct ntfs_log *log, u32 l_size, bool first, + goto check_result; + } + +- /* Read the entire restart area. */ +- sys_page_size = le32_to_cpu(r_page->sys_page_size); +- if (DefaultLogPageSize != sys_page_size) { +- kfree(r_page); +- r_page = kzalloc(sys_page_size, GFP_NOFS); +- if (!r_page) +- return -ENOMEM; +- +- if (read_log_page(log, vbo, +- (struct RECORD_PAGE_HDR **)&r_page, +- &usa_error)) { +- /* Ignore any errors. */ +- kfree(r_page); +- r_page = NULL; +- continue; +- } +- } +- + if (is_client_area_valid(r_page, usa_error)) { + info->valid_page = true; + ra = Add2Ptr(r_page, le16_to_cpu(r_page->ra_off)); +@@ -2729,6 +2705,9 @@ static inline bool check_attr(const struct MFT_REC *rec, + return false; + } + ++ if (run_off > asize) ++ return false; ++ + if (run_unpack(NULL, sbi, 0, svcn, evcn, svcn, + Add2Ptr(attr, run_off), asize - run_off) < 0) { + return false; +@@ -3791,10 +3770,11 @@ int log_replay(struct ntfs_inode *ni, bool *initialized) + if (!log) + return -ENOMEM; + ++ memset(&rst_info, 0, sizeof(struct restart_info)); ++ + log->ni = ni; + log->l_size = l_size; + log->one_page_buf = kmalloc(page_size, GFP_NOFS); +- + if (!log->one_page_buf) { + err = -ENOMEM; + goto out; +@@ -3842,6 +3822,7 @@ int log_replay(struct ntfs_inode *ni, bool *initialized) + if (rst_info.vbo) + goto check_restart_area; + ++ memset(&rst_info2, 0, sizeof(struct restart_info)); + err = log_read_rst(log, l_size, false, &rst_info2); + + /* Determine which restart area to use. */ +@@ -4085,8 +4066,10 @@ process_log: + if (client == LFS_NO_CLIENT_LE) { + /* Insert "NTFS" client LogFile. */ + client = ra->client_idx[0]; +- if (client == LFS_NO_CLIENT_LE) +- return -EINVAL; ++ if (client == LFS_NO_CLIENT_LE) { ++ err = -EINVAL; ++ goto out; ++ } + + t16 = le16_to_cpu(client); + cr = ca + t16; +@@ -4767,6 +4750,12 @@ fake_attr: + u16 roff = le16_to_cpu(attr->nres.run_off); + CLST svcn = le64_to_cpu(attr->nres.svcn); + ++ if (roff > t32) { ++ kfree(oa->attr); ++ oa->attr = NULL; ++ goto fake_attr; ++ } ++ + err = run_unpack(&oa->run0, sbi, inode->i_ino, svcn, + le64_to_cpu(attr->nres.evcn), svcn, + Add2Ptr(attr, roff), t32 - roff); +@@ -5055,7 +5044,7 @@ undo_action_next: + goto add_allocated_vcns; + + vcn = le64_to_cpu(lrh->target_vcn); +- vcn &= ~(log->clst_per_page - 1); ++ vcn &= ~(u64)(log->clst_per_page - 1); + + add_allocated_vcns: + for (i = 0, vcn = le64_to_cpu(lrh->target_vcn), +diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c +index 4de9acb169689..4a97a28cb8f29 100644 +--- a/fs/ntfs3/fsntfs.c ++++ b/fs/ntfs3/fsntfs.c +@@ -831,10 +831,15 @@ int ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait) + { + int err; + struct super_block *sb = sbi->sb; +- u32 blocksize = sb->s_blocksize; ++ u32 blocksize; + sector_t block1, block2; + u32 bytes; + ++ if (!sb) ++ return -EINVAL; ++ ++ blocksize = sb->s_blocksize; ++ + if (!(sbi->flags & NTFS_FLAGS_MFTMIRR)) + return 0; + +@@ -1873,9 +1878,10 @@ int ntfs_security_init(struct ntfs_sb_info *sbi) + goto out; + } + +- root_sdh = resident_data(attr); ++ root_sdh = resident_data_ex(attr, sizeof(struct INDEX_ROOT)); + if (root_sdh->type != ATTR_ZERO || +- root_sdh->rule != NTFS_COLLATION_TYPE_SECURITY_HASH) { ++ root_sdh->rule != NTFS_COLLATION_TYPE_SECURITY_HASH || ++ offsetof(struct INDEX_ROOT, ihdr) + root_sdh->ihdr.used > attr->res.data_size) { + err = -EINVAL; + goto out; + } +@@ -1891,9 +1897,10 @@ int ntfs_security_init(struct ntfs_sb_info *sbi) + goto out; + } + +- root_sii = resident_data(attr); ++ root_sii = resident_data_ex(attr, sizeof(struct INDEX_ROOT)); + if (root_sii->type != ATTR_ZERO || +- root_sii->rule != NTFS_COLLATION_TYPE_UINT) { ++ root_sii->rule != NTFS_COLLATION_TYPE_UINT || ++ offsetof(struct INDEX_ROOT, ihdr) + root_sii->ihdr.used > attr->res.data_size) { + err = -EINVAL; + goto out; + } +diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c +index 6f81e3a49abfb..99f8a57e9f7a9 100644 +--- a/fs/ntfs3/index.c ++++ b/fs/ntfs3/index.c +@@ -1017,6 +1017,12 @@ ok: + err = 0; + } + ++ /* check for index header length */ ++ if (offsetof(struct INDEX_BUFFER, ihdr) + ib->ihdr.used > bytes) { ++ err = -EINVAL; ++ goto out; ++ } ++ + in->index = ib; + *node = in; + +@@ -1994,7 +2000,7 @@ static int indx_free_children(struct ntfs_index *indx, struct ntfs_inode *ni, + const struct NTFS_DE *e, bool trim) + { + int err; +- struct indx_node *n; ++ struct indx_node *n = NULL; + struct INDEX_HDR *hdr; + CLST vbn = de_get_vbn(e); + size_t i; +diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c +index 859951d785cb2..136236a25da60 100644 +--- a/fs/ntfs3/inode.c ++++ b/fs/ntfs3/inode.c +@@ -129,6 +129,16 @@ next_attr: + rsize = attr->non_res ? 0 : le32_to_cpu(attr->res.data_size); + asize = le32_to_cpu(attr->size); + ++ if (le16_to_cpu(attr->name_off) + attr->name_len > asize) ++ goto out; ++ ++ if (attr->non_res) { ++ t64 = le64_to_cpu(attr->nres.alloc_size); ++ if (le64_to_cpu(attr->nres.data_size) > t64 || ++ le64_to_cpu(attr->nres.valid_size) > t64) ++ goto out; ++ } ++ + switch (attr->type) { + case ATTR_STD: + if (attr->non_res || +@@ -364,7 +374,13 @@ next_attr: + attr_unpack_run: + roff = le16_to_cpu(attr->nres.run_off); + ++ if (roff > asize) { ++ err = -EINVAL; ++ goto out; ++ } ++ + t64 = le64_to_cpu(attr->nres.svcn); ++ + err = run_unpack_ex(run, sbi, ino, t64, le64_to_cpu(attr->nres.evcn), + t64, Add2Ptr(attr, roff), asize - roff); + if (err < 0) +@@ -430,6 +446,7 @@ end_enum: + } else if (fname && fname->home.low == cpu_to_le32(MFT_REC_EXTEND) && + fname->home.seq == cpu_to_le16(MFT_REC_EXTEND)) { + /* Records in $Extend are not a files or general directories. */ ++ inode->i_op = &ntfs_file_inode_operations; + } else { + err = -EINVAL; + goto out; +@@ -757,6 +774,7 @@ static ssize_t ntfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) + loff_t vbo = iocb->ki_pos; + loff_t end; + int wr = iov_iter_rw(iter) & WRITE; ++ size_t iter_count = iov_iter_count(iter); + loff_t valid; + ssize_t ret; + +@@ -770,10 +788,13 @@ static ssize_t ntfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) + wr ? ntfs_get_block_direct_IO_W + : ntfs_get_block_direct_IO_R); + +- if (ret <= 0) ++ if (ret > 0) ++ end = vbo + ret; ++ else if (wr && ret == -EIOCBQUEUED) ++ end = vbo + iter_count; ++ else + goto out; + +- end = vbo + ret; + valid = ni->i_valid; + if (wr) { + if (end > valid && !S_ISBLK(inode->i_mode)) { +@@ -1937,8 +1958,6 @@ const struct inode_operations ntfs_link_inode_operations = { + .setattr = ntfs3_setattr, + .listxattr = ntfs_listxattr, + .permission = ntfs_permission, +- .get_acl = ntfs_get_acl, +- .set_acl = ntfs_set_acl, + }; + + const struct address_space_operations ntfs_aops = { +diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c +index 861e35791506e..fd342da398bea 100644 +--- a/fs/ntfs3/record.c ++++ b/fs/ntfs3/record.c +@@ -220,6 +220,11 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) + return NULL; + } + ++ if (off + asize < off) { ++ /* overflow check */ ++ return NULL; ++ } ++ + attr = Add2Ptr(attr, asize); + off += asize; + } +@@ -260,6 +265,11 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) + if (t16 + t32 > asize) + return NULL; + ++ if (attr->name_len && ++ le16_to_cpu(attr->name_off) + sizeof(short) * attr->name_len > t16) { ++ return NULL; ++ } ++ + return attr; + } + +diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c +index d41d76979e121..33b1833ad525c 100644 +--- a/fs/ntfs3/super.c ++++ b/fs/ntfs3/super.c +@@ -30,6 +30,7 @@ + #include <linux/fs_context.h> + #include <linux/fs_parser.h> + #include <linux/log2.h> ++#include <linux/minmax.h> + #include <linux/module.h> + #include <linux/nls.h> + #include <linux/seq_file.h> +@@ -390,7 +391,7 @@ static int ntfs_fs_reconfigure(struct fs_context *fc) + return -EINVAL; + } + +- memcpy(sbi->options, new_opts, sizeof(*new_opts)); ++ swap(sbi->options, fc->fs_private); + + return 0; + } +@@ -668,9 +669,11 @@ static u32 format_size_gb(const u64 bytes, u32 *mb) + + static u32 true_sectors_per_clst(const struct NTFS_BOOT *boot) + { +- return boot->sectors_per_clusters <= 0x80 +- ? boot->sectors_per_clusters +- : (1u << (0 - boot->sectors_per_clusters)); ++ if (boot->sectors_per_clusters <= 0x80) ++ return boot->sectors_per_clusters; ++ if (boot->sectors_per_clusters >= 0xf4) /* limit shift to 2MB max */ ++ return 1U << -(s8)boot->sectors_per_clusters; ++ return -EINVAL; + } + + /* +@@ -713,6 +716,8 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, + + /* cluster size: 512, 1K, 2K, 4K, ... 2M */ + sct_per_clst = true_sectors_per_clst(boot); ++ if ((int)sct_per_clst < 0) ++ goto out; + if (!is_power_of_2(sct_per_clst)) + goto out; + +@@ -784,7 +789,7 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, + : (u32)boot->record_size + << sbi->cluster_bits; + +- if (record_size > MAXIMUM_BYTES_PER_MFT) ++ if (record_size > MAXIMUM_BYTES_PER_MFT || record_size < SECTOR_SIZE) + goto out; + + sbi->record_bits = blksize_bits(record_size); +@@ -897,6 +902,8 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) + ref.high = 0; + + sbi->sb = sb; ++ sbi->options = fc->fs_private; ++ fc->fs_private = NULL; + sb->s_flags |= SB_NODIRATIME; + sb->s_magic = 0x7366746e; // "ntfs" + sb->s_op = &ntfs_sops; +@@ -1129,7 +1136,7 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) + goto put_inode_out; + } + bytes = inode->i_size; +- sbi->def_table = t = kmalloc(bytes, GFP_NOFS); ++ sbi->def_table = t = kmalloc(bytes, GFP_NOFS | __GFP_NOWARN); + if (!t) { + err = -ENOMEM; + goto put_inode_out; +@@ -1248,9 +1255,9 @@ load_root: + ref.low = cpu_to_le32(MFT_REC_ROOT); + ref.seq = cpu_to_le16(MFT_REC_ROOT); + inode = ntfs_iget5(sb, &ref, &NAME_ROOT); +- if (IS_ERR(inode)) { ++ if (IS_ERR(inode) || !inode->i_op) { + ntfs_err(sb, "Failed to load root."); +- err = PTR_ERR(inode); ++ err = IS_ERR(inode) ? PTR_ERR(inode) : -EINVAL; + goto out; + } + +@@ -1260,8 +1267,6 @@ load_root: + goto put_inode_out; + } + +- fc->fs_private = NULL; +- + return 0; + + put_inode_out: +@@ -1271,6 +1276,7 @@ out: + * Free resources here. + * ntfs_fs_free will be called with fc->s_fs_info = NULL + */ ++ put_mount_options(sbi->options); + put_ntfs(sbi); + sb->s_fs_info = NULL; + +@@ -1414,7 +1420,6 @@ static int ntfs_init_fs_context(struct fs_context *fc) + mutex_init(&sbi->compress.mtx_lzx); + #endif + +- sbi->options = opts; + fc->s_fs_info = sbi; + ok: + fc->fs_private = opts; +diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c +index afd0ddad826ff..8847db0159084 100644 +--- a/fs/ntfs3/xattr.c ++++ b/fs/ntfs3/xattr.c +@@ -107,18 +107,18 @@ static int ntfs_read_ea(struct ntfs_inode *ni, struct EA_FULL **ea, + return -EFBIG; + + /* Allocate memory for packed Ea. */ +- ea_p = kmalloc(size + add_bytes, GFP_NOFS); ++ ea_p = kmalloc(size_add(size, add_bytes), GFP_NOFS); + if (!ea_p) + return -ENOMEM; + + if (!size) { +- ; ++ /* EA info persists, but xattr is empty. Looks like EA problem. */ + } else if (attr_ea->non_res) { + struct runs_tree run; + + run_init(&run); + +- err = attr_load_runs(attr_ea, ni, &run, NULL); ++ err = attr_load_runs_range(ni, ATTR_EA, NULL, 0, &run, 0, size); + if (!err) + err = ntfs_read_run_nb(sbi, &run, 0, ea_p, size, NULL); + run_close(&run); +@@ -443,6 +443,11 @@ update_ea: + /* Delete xattr, ATTR_EA */ + ni_remove_attr_le(ni, attr, mi, le); + } else if (attr->non_res) { ++ err = attr_load_runs_range(ni, ATTR_EA, NULL, 0, &ea_run, 0, ++ size); ++ if (err) ++ goto out; ++ + err = ntfs_sb_write_run(sbi, &ea_run, 0, ea_all, size, 0); + if (err) + goto out; +@@ -476,8 +481,7 @@ out: + } + + #ifdef CONFIG_NTFS3_FS_POSIX_ACL +-static struct posix_acl *ntfs_get_acl_ex(struct user_namespace *mnt_userns, +- struct inode *inode, int type, ++static struct posix_acl *ntfs_get_acl_ex(struct inode *inode, int type, + int locked) + { + struct ntfs_inode *ni = ntfs_i(inode); +@@ -512,7 +516,7 @@ static struct posix_acl *ntfs_get_acl_ex(struct user_namespace *mnt_userns, + + /* Translate extended attribute to acl. */ + if (err >= 0) { +- acl = posix_acl_from_xattr(mnt_userns, buf, err); ++ acl = posix_acl_from_xattr(&init_user_ns, buf, err); + } else if (err == -ENODATA) { + acl = NULL; + } else { +@@ -535,37 +539,32 @@ struct posix_acl *ntfs_get_acl(struct inode *inode, int type, bool rcu) + if (rcu) + return ERR_PTR(-ECHILD); + +- /* TODO: init_user_ns? */ +- return ntfs_get_acl_ex(&init_user_ns, inode, type, 0); ++ return ntfs_get_acl_ex(inode, type, 0); + } + + static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns, + struct inode *inode, struct posix_acl *acl, +- int type) ++ int type, bool init_acl) + { + const char *name; + size_t size, name_len; +- void *value = NULL; +- int err = 0; ++ void *value; ++ int err; + int flags; ++ umode_t mode; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + ++ mode = inode->i_mode; + switch (type) { + case ACL_TYPE_ACCESS: +- if (acl) { +- umode_t mode = inode->i_mode; +- ++ /* Do not change i_mode if we are in init_acl */ ++ if (acl && !init_acl) { + err = posix_acl_update_mode(mnt_userns, inode, &mode, + &acl); + if (err) +- goto out; +- +- if (inode->i_mode != mode) { +- inode->i_mode = mode; +- mark_inode_dirty(inode); +- } ++ return err; + } + name = XATTR_NAME_POSIX_ACL_ACCESS; + name_len = sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1; +@@ -592,7 +591,7 @@ static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns, + value = kmalloc(size, GFP_NOFS); + if (!value) + return -ENOMEM; +- err = posix_acl_to_xattr(mnt_userns, acl, value, size); ++ err = posix_acl_to_xattr(&init_user_ns, acl, value, size); + if (err < 0) + goto out; + flags = 0; +@@ -601,8 +600,13 @@ static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns, + err = ntfs_set_ea(inode, name, name_len, value, size, flags); + if (err == -ENODATA && !size) + err = 0; /* Removing non existed xattr. */ +- if (!err) ++ if (!err) { + set_cached_acl(inode, type, acl); ++ if (inode->i_mode != mode) { ++ inode->i_mode = mode; ++ mark_inode_dirty(inode); ++ } ++ } + + out: + kfree(value); +@@ -616,7 +620,7 @@ out: + int ntfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, + struct posix_acl *acl, int type) + { +- return ntfs_set_acl_ex(mnt_userns, inode, acl, type); ++ return ntfs_set_acl_ex(mnt_userns, inode, acl, type, false); + } + + /* +@@ -636,7 +640,7 @@ int ntfs_init_acl(struct user_namespace *mnt_userns, struct inode *inode, + + if (default_acl) { + err = ntfs_set_acl_ex(mnt_userns, inode, default_acl, +- ACL_TYPE_DEFAULT); ++ ACL_TYPE_DEFAULT, true); + posix_acl_release(default_acl); + } else { + inode->i_default_acl = NULL; +@@ -647,7 +651,7 @@ int ntfs_init_acl(struct user_namespace *mnt_userns, struct inode *inode, + else { + if (!err) + err = ntfs_set_acl_ex(mnt_userns, inode, acl, +- ACL_TYPE_ACCESS); ++ ACL_TYPE_ACCESS, true); + posix_acl_release(acl); + } + +@@ -901,6 +905,9 @@ set_new_fa: + err = ntfs_set_ea(inode, name, name_len, value, size, flags); + + out: ++ inode->i_ctime = current_time(inode); ++ mark_inode_dirty(inode); ++ + return err; + } + +@@ -981,7 +988,7 @@ static bool ntfs_xattr_user_list(struct dentry *dentry) + } + + // clang-format off +-static const struct xattr_handler ntfs_xattr_handler = { ++static const struct xattr_handler ntfs_other_xattr_handler = { + .prefix = "", + .get = ntfs_getxattr, + .set = ntfs_setxattr, +@@ -989,7 +996,11 @@ static const struct xattr_handler ntfs_xattr_handler = { + }; + + const struct xattr_handler *ntfs_xattr_handlers[] = { +- &ntfs_xattr_handler, ++#ifdef CONFIG_NTFS3_FS_POSIX_ACL ++ &posix_acl_access_xattr_handler, ++ &posix_acl_default_xattr_handler, ++#endif ++ &ntfs_other_xattr_handler, + NULL, + }; + // clang-format on +diff --git a/fs/ocfs2/dlmfs/userdlm.c b/fs/ocfs2/dlmfs/userdlm.c +index 29f183a15798e..c1d67c806e1d3 100644 +--- a/fs/ocfs2/dlmfs/userdlm.c ++++ b/fs/ocfs2/dlmfs/userdlm.c +@@ -433,6 +433,11 @@ again: + } + + spin_lock(&lockres->l_lock); ++ if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { ++ spin_unlock(&lockres->l_lock); ++ status = -EAGAIN; ++ goto bail; ++ } + + /* We only compare against the currently granted level + * here. If the lock is blocked waiting on a downconvert, +@@ -595,7 +600,7 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres) + spin_lock(&lockres->l_lock); + if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { + spin_unlock(&lockres->l_lock); +- return 0; ++ goto bail; + } + + lockres->l_flags |= USER_LOCK_IN_TEARDOWN; +@@ -609,12 +614,17 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres) + } + + if (lockres->l_ro_holders || lockres->l_ex_holders) { ++ lockres->l_flags &= ~USER_LOCK_IN_TEARDOWN; + spin_unlock(&lockres->l_lock); + goto bail; + } + + status = 0; + if (!(lockres->l_flags & USER_LOCK_ATTACHED)) { ++ /* ++ * lock is never requested, leave USER_LOCK_IN_TEARDOWN set ++ * to avoid new lock request coming in. ++ */ + spin_unlock(&lockres->l_lock); + goto bail; + } +@@ -625,6 +635,10 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres) + + status = ocfs2_dlm_unlock(conn, &lockres->l_lksb, DLM_LKF_VALBLK); + if (status) { ++ spin_lock(&lockres->l_lock); ++ lockres->l_flags &= ~USER_LOCK_IN_TEARDOWN; ++ lockres->l_flags &= ~USER_LOCK_BUSY; ++ spin_unlock(&lockres->l_lock); + user_log_dlm_error("ocfs2_dlm_unlock", status, lockres); + goto bail; + } +diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c +index 54d7843c02114..fc5f780fa2355 100644 +--- a/fs/ocfs2/file.c ++++ b/fs/ocfs2/file.c +@@ -476,10 +476,11 @@ int ocfs2_truncate_file(struct inode *inode, + * greater than page size, so we have to truncate them + * anyway. + */ +- unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); +- truncate_inode_pages(inode->i_mapping, new_i_size); + + if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { ++ unmap_mapping_range(inode->i_mapping, ++ new_i_size + PAGE_SIZE - 1, 0, 1); ++ truncate_inode_pages(inode->i_mapping, new_i_size); + status = ocfs2_truncate_inline(inode, di_bh, new_i_size, + i_size_read(inode), 1); + if (status) +@@ -498,6 +499,9 @@ int ocfs2_truncate_file(struct inode *inode, + goto bail_unlock_sem; + } + ++ unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); ++ truncate_inode_pages(inode->i_mapping, new_i_size); ++ + status = ocfs2_commit_truncate(osb, inode, di_bh); + if (status < 0) { + mlog_errno(status); +diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c +index 2c46ff6ba4ea2..11807034dd483 100644 +--- a/fs/ocfs2/namei.c ++++ b/fs/ocfs2/namei.c +@@ -231,6 +231,7 @@ static int ocfs2_mknod(struct user_namespace *mnt_userns, + handle_t *handle = NULL; + struct ocfs2_super *osb; + struct ocfs2_dinode *dirfe; ++ struct ocfs2_dinode *fe = NULL; + struct buffer_head *new_fe_bh = NULL; + struct inode *inode = NULL; + struct ocfs2_alloc_context *inode_ac = NULL; +@@ -381,6 +382,7 @@ static int ocfs2_mknod(struct user_namespace *mnt_userns, + goto leave; + } + ++ fe = (struct ocfs2_dinode *) new_fe_bh->b_data; + if (S_ISDIR(mode)) { + status = ocfs2_fill_new_dir(osb, handle, dir, inode, + new_fe_bh, data_ac, meta_ac); +@@ -453,8 +455,11 @@ roll_back: + leave: + if (status < 0 && did_quota_inode) + dquot_free_inode(inode); +- if (handle) ++ if (handle) { ++ if (status < 0 && fe) ++ ocfs2_set_links_count(fe, 0); + ocfs2_commit_trans(osb, handle); ++ } + + ocfs2_inode_unlock(dir, 1); + if (did_block_signals) +@@ -631,18 +636,9 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, + return status; + } + +- status = __ocfs2_mknod_locked(dir, inode, dev, new_fe_bh, ++ return __ocfs2_mknod_locked(dir, inode, dev, new_fe_bh, + parent_fe_bh, handle, inode_ac, + fe_blkno, suballoc_loc, suballoc_bit); +- if (status < 0) { +- u64 bg_blkno = ocfs2_which_suballoc_group(fe_blkno, suballoc_bit); +- int tmp = ocfs2_free_suballoc_bits(handle, inode_ac->ac_inode, +- inode_ac->ac_bh, suballoc_bit, bg_blkno, 1); +- if (tmp) +- mlog_errno(tmp); +- } +- +- return status; + } + + static int ocfs2_mkdir(struct user_namespace *mnt_userns, +@@ -2027,8 +2023,11 @@ bail: + ocfs2_clusters_to_bytes(osb->sb, 1)); + if (status < 0 && did_quota_inode) + dquot_free_inode(inode); +- if (handle) ++ if (handle) { ++ if (status < 0 && fe) ++ ocfs2_set_links_count(fe, 0); + ocfs2_commit_trans(osb, handle); ++ } + + ocfs2_inode_unlock(dir, 1); + if (did_block_signals) +diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h +index bb62cc2e0211b..cf21aecdf5476 100644 +--- a/fs/ocfs2/ocfs2.h ++++ b/fs/ocfs2/ocfs2.h +@@ -277,7 +277,6 @@ enum ocfs2_mount_options + OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15, /* Journal Async Commit */ + OCFS2_MOUNT_ERRORS_CONT = 1 << 16, /* Return EIO to the calling process on error */ + OCFS2_MOUNT_ERRORS_ROFS = 1 << 17, /* Change filesystem to read-only on error */ +- OCFS2_MOUNT_NOCLUSTER = 1 << 18, /* No cluster aware filesystem mount */ + }; + + #define OCFS2_OSB_SOFT_RO 0x0001 +@@ -673,8 +672,7 @@ static inline int ocfs2_cluster_o2cb_global_heartbeat(struct ocfs2_super *osb) + + static inline int ocfs2_mount_local(struct ocfs2_super *osb) + { +- return ((osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT) +- || (osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER)); ++ return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT); + } + + static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb) +diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c +index f033de733adb3..effe92c7d6937 100644 +--- a/fs/ocfs2/quota_global.c ++++ b/fs/ocfs2/quota_global.c +@@ -337,7 +337,6 @@ void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex) + /* Read information header from global quota file */ + int ocfs2_global_read_info(struct super_block *sb, int type) + { +- struct inode *gqinode = NULL; + unsigned int ino[OCFS2_MAXQUOTAS] = { USER_QUOTA_SYSTEM_INODE, + GROUP_QUOTA_SYSTEM_INODE }; + struct ocfs2_global_disk_dqinfo dinfo; +@@ -346,29 +345,31 @@ int ocfs2_global_read_info(struct super_block *sb, int type) + u64 pcount; + int status; + ++ oinfo->dqi_gi.dqi_sb = sb; ++ oinfo->dqi_gi.dqi_type = type; ++ ocfs2_qinfo_lock_res_init(&oinfo->dqi_gqlock, oinfo); ++ oinfo->dqi_gi.dqi_entry_size = sizeof(struct ocfs2_global_disk_dqblk); ++ oinfo->dqi_gi.dqi_ops = &ocfs2_global_ops; ++ oinfo->dqi_gqi_bh = NULL; ++ oinfo->dqi_gqi_count = 0; ++ + /* Read global header */ +- gqinode = ocfs2_get_system_file_inode(OCFS2_SB(sb), ino[type], ++ oinfo->dqi_gqinode = ocfs2_get_system_file_inode(OCFS2_SB(sb), ino[type], + OCFS2_INVALID_SLOT); +- if (!gqinode) { ++ if (!oinfo->dqi_gqinode) { + mlog(ML_ERROR, "failed to get global quota inode (type=%d)\n", + type); + status = -EINVAL; + goto out_err; + } +- oinfo->dqi_gi.dqi_sb = sb; +- oinfo->dqi_gi.dqi_type = type; +- oinfo->dqi_gi.dqi_entry_size = sizeof(struct ocfs2_global_disk_dqblk); +- oinfo->dqi_gi.dqi_ops = &ocfs2_global_ops; +- oinfo->dqi_gqi_bh = NULL; +- oinfo->dqi_gqi_count = 0; +- oinfo->dqi_gqinode = gqinode; ++ + status = ocfs2_lock_global_qf(oinfo, 0); + if (status < 0) { + mlog_errno(status); + goto out_err; + } + +- status = ocfs2_extent_map_get_blocks(gqinode, 0, &oinfo->dqi_giblk, ++ status = ocfs2_extent_map_get_blocks(oinfo->dqi_gqinode, 0, &oinfo->dqi_giblk, + &pcount, NULL); + if (status < 0) + goto out_unlock; +diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c +index 0e4b16d4c037f..b1a8b046f4c22 100644 +--- a/fs/ocfs2/quota_local.c ++++ b/fs/ocfs2/quota_local.c +@@ -702,8 +702,6 @@ static int ocfs2_local_read_info(struct super_block *sb, int type) + info->dqi_priv = oinfo; + oinfo->dqi_type = type; + INIT_LIST_HEAD(&oinfo->dqi_chunk); +- oinfo->dqi_gqinode = NULL; +- ocfs2_qinfo_lock_res_init(&oinfo->dqi_gqlock, oinfo); + oinfo->dqi_rec = NULL; + oinfo->dqi_lqi_bh = NULL; + oinfo->dqi_libh = NULL; +diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c +index 0b0ae3ebb0cf5..da7718cef735e 100644 +--- a/fs/ocfs2/slot_map.c ++++ b/fs/ocfs2/slot_map.c +@@ -252,16 +252,14 @@ static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si, + int i, ret = -ENOSPC; + + if ((preferred >= 0) && (preferred < si->si_num_slots)) { +- if (!si->si_slots[preferred].sl_valid || +- !si->si_slots[preferred].sl_node_num) { ++ if (!si->si_slots[preferred].sl_valid) { + ret = preferred; + goto out; + } + } + + for(i = 0; i < si->si_num_slots; i++) { +- if (!si->si_slots[i].sl_valid || +- !si->si_slots[i].sl_node_num) { ++ if (!si->si_slots[i].sl_valid) { + ret = i; + break; + } +@@ -456,30 +454,24 @@ int ocfs2_find_slot(struct ocfs2_super *osb) + spin_lock(&osb->osb_lock); + ocfs2_update_slot_info(si); + +- if (ocfs2_mount_local(osb)) +- /* use slot 0 directly in local mode */ +- slot = 0; +- else { +- /* search for ourselves first and take the slot if it already +- * exists. Perhaps we need to mark this in a variable for our +- * own journal recovery? Possibly not, though we certainly +- * need to warn to the user */ +- slot = __ocfs2_node_num_to_slot(si, osb->node_num); ++ /* search for ourselves first and take the slot if it already ++ * exists. Perhaps we need to mark this in a variable for our ++ * own journal recovery? Possibly not, though we certainly ++ * need to warn to the user */ ++ slot = __ocfs2_node_num_to_slot(si, osb->node_num); ++ if (slot < 0) { ++ /* if no slot yet, then just take 1st available ++ * one. */ ++ slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); + if (slot < 0) { +- /* if no slot yet, then just take 1st available +- * one. */ +- slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); +- if (slot < 0) { +- spin_unlock(&osb->osb_lock); +- mlog(ML_ERROR, "no free slots available!\n"); +- status = -EINVAL; +- goto bail; +- } +- } else +- printk(KERN_INFO "ocfs2: Slot %d on device (%s) was " +- "already allocated to this node!\n", +- slot, osb->dev_str); +- } ++ spin_unlock(&osb->osb_lock); ++ mlog(ML_ERROR, "no free slots available!\n"); ++ status = -EINVAL; ++ goto bail; ++ } ++ } else ++ printk(KERN_INFO "ocfs2: Slot %d on device (%s) was already " ++ "allocated to this node!\n", slot, osb->dev_str); + + ocfs2_set_slot(si, slot, osb->node_num); + osb->slot_num = slot; +diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c +index 16f1bfc407f2a..955f475f9aca6 100644 +--- a/fs/ocfs2/stackglue.c ++++ b/fs/ocfs2/stackglue.c +@@ -703,6 +703,8 @@ static struct ctl_table_header *ocfs2_table_header; + + static int __init ocfs2_stack_glue_init(void) + { ++ int ret; ++ + strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB); + + ocfs2_table_header = register_sysctl_table(ocfs2_root_table); +@@ -712,7 +714,11 @@ static int __init ocfs2_stack_glue_init(void) + return -ENOMEM; /* or something. */ + } + +- return ocfs2_sysfs_init(); ++ ret = ocfs2_sysfs_init(); ++ if (ret) ++ unregister_sysctl_table(ocfs2_table_header); ++ ++ return ret; + } + + static void __exit ocfs2_stack_glue_exit(void) +diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c +index 481017e1dac5a..166c8918c825a 100644 +--- a/fs/ocfs2/suballoc.c ++++ b/fs/ocfs2/suballoc.c +@@ -1251,26 +1251,23 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, + { + struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; + struct journal_head *jh; +- int ret = 1; ++ int ret; + + if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap)) + return 0; + +- if (!buffer_jbd(bg_bh)) ++ jh = jbd2_journal_grab_journal_head(bg_bh); ++ if (!jh) + return 1; + +- jbd_lock_bh_journal_head(bg_bh); +- if (buffer_jbd(bg_bh)) { +- jh = bh2jh(bg_bh); +- spin_lock(&jh->b_state_lock); +- bg = (struct ocfs2_group_desc *) jh->b_committed_data; +- if (bg) +- ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); +- else +- ret = 1; +- spin_unlock(&jh->b_state_lock); +- } +- jbd_unlock_bh_journal_head(bg_bh); ++ spin_lock(&jh->b_state_lock); ++ bg = (struct ocfs2_group_desc *) jh->b_committed_data; ++ if (bg) ++ ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); ++ else ++ ret = 1; ++ spin_unlock(&jh->b_state_lock); ++ jbd2_journal_put_journal_head(jh); + + return ret; + } +diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c +index 5c914ce9b3ac9..a03f0cabff0bf 100644 +--- a/fs/ocfs2/super.c ++++ b/fs/ocfs2/super.c +@@ -173,7 +173,6 @@ enum { + Opt_dir_resv_level, + Opt_journal_async_commit, + Opt_err_cont, +- Opt_nocluster, + Opt_err, + }; + +@@ -207,7 +206,6 @@ static const match_table_t tokens = { + {Opt_dir_resv_level, "dir_resv_level=%u"}, + {Opt_journal_async_commit, "journal_async_commit"}, + {Opt_err_cont, "errors=continue"}, +- {Opt_nocluster, "nocluster"}, + {Opt_err, NULL} + }; + +@@ -619,13 +617,6 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) + goto out; + } + +- tmp = OCFS2_MOUNT_NOCLUSTER; +- if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) { +- ret = -EINVAL; +- mlog(ML_ERROR, "Cannot change nocluster option on remount\n"); +- goto out; +- } +- + tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | + OCFS2_MOUNT_HB_NONE; + if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) { +@@ -866,7 +857,6 @@ static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb, + } + + if (ocfs2_userspace_stack(osb) && +- !(osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) && + strncmp(osb->osb_cluster_stack, mopt->cluster_stack, + OCFS2_STACK_LABEL_LEN)) { + mlog(ML_ERROR, +@@ -1106,17 +1096,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) + goto read_super_error; + } + +- root = d_make_root(inode); +- if (!root) { +- status = -ENOMEM; +- mlog_errno(status); +- goto read_super_error; +- } +- +- sb->s_root = root; +- +- ocfs2_complete_mount_recovery(osb); +- + osb->osb_dev_kset = kset_create_and_add(sb->s_id, NULL, + &ocfs2_kset->kobj); + if (!osb->osb_dev_kset) { +@@ -1134,6 +1113,17 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) + goto read_super_error; + } + ++ root = d_make_root(inode); ++ if (!root) { ++ status = -ENOMEM; ++ mlog_errno(status); ++ goto read_super_error; ++ } ++ ++ sb->s_root = root; ++ ++ ocfs2_complete_mount_recovery(osb); ++ + if (ocfs2_mount_local(osb)) + snprintf(nodestr, sizeof(nodestr), "local"); + else +@@ -1145,11 +1135,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) + osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" : + "ordered"); + +- if ((osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) && +- !(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT)) +- printk(KERN_NOTICE "ocfs2: The shared device (%s) is mounted " +- "without cluster aware mode.\n", osb->dev_str); +- + atomic_set(&osb->vol_state, VOLUME_MOUNTED); + wake_up(&osb->osb_mount_event); + +@@ -1456,9 +1441,6 @@ static int ocfs2_parse_options(struct super_block *sb, + case Opt_journal_async_commit: + mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT; + break; +- case Opt_nocluster: +- mopt->mount_opt |= OCFS2_MOUNT_NOCLUSTER; +- break; + default: + mlog(ML_ERROR, + "Unrecognized mount option \"%s\" " +@@ -1570,9 +1552,6 @@ static int ocfs2_show_options(struct seq_file *s, struct dentry *root) + if (opts & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT) + seq_printf(s, ",journal_async_commit"); + +- if (opts & OCFS2_MOUNT_NOCLUSTER) +- seq_printf(s, ",nocluster"); +- + return 0; + } + +diff --git a/fs/open.c b/fs/open.c +index daa324606a41f..5e322f188e839 100644 +--- a/fs/open.c ++++ b/fs/open.c +@@ -32,6 +32,7 @@ + #include <linux/ima.h> + #include <linux/dnotify.h> + #include <linux/compat.h> ++#include <linux/mnt_idmapping.h> + + #include "internal.h" + +@@ -640,7 +641,7 @@ SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode) + + int chown_common(const struct path *path, uid_t user, gid_t group) + { +- struct user_namespace *mnt_userns; ++ struct user_namespace *mnt_userns, *fs_userns; + struct inode *inode = path->dentry->d_inode; + struct inode *delegated_inode = NULL; + int error; +@@ -652,8 +653,9 @@ int chown_common(const struct path *path, uid_t user, gid_t group) + gid = make_kgid(current_user_ns(), group); + + mnt_userns = mnt_user_ns(path->mnt); +- uid = kuid_from_mnt(mnt_userns, uid); +- gid = kgid_from_mnt(mnt_userns, gid); ++ fs_userns = i_user_ns(inode); ++ uid = mapped_kuid_user(mnt_userns, fs_userns, uid); ++ gid = mapped_kgid_user(mnt_userns, fs_userns, gid); + + retry_deleg: + newattrs.ia_valid = ATTR_CTIME; +@@ -784,7 +786,9 @@ static int do_dentry_open(struct file *f, + return 0; + } + +- if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) { ++ if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) { ++ i_readcount_inc(inode); ++ } else if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) { + error = get_write_access(inode); + if (unlikely(error)) + goto cleanup_file; +@@ -824,8 +828,6 @@ static int do_dentry_open(struct file *f, + goto cleanup_all; + } + f->f_mode |= FMODE_OPENED; +- if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) +- i_readcount_inc(inode); + if ((f->f_mode & FMODE_READ) && + likely(f->f_op->read || f->f_op->read_iter)) + f->f_mode |= FMODE_CAN_READ; +@@ -856,8 +858,20 @@ static int do_dentry_open(struct file *f, + * of THPs into the page cache will fail. + */ + smp_mb(); +- if (filemap_nr_thps(inode->i_mapping)) +- truncate_pagecache(inode, 0); ++ if (filemap_nr_thps(inode->i_mapping)) { ++ struct address_space *mapping = inode->i_mapping; ++ ++ filemap_invalidate_lock(inode->i_mapping); ++ /* ++ * unmap_mapping_range just need to be called once ++ * here, because the private pages is not need to be ++ * unmapped mapping (e.g. data segment of dynamic ++ * shared libraries here). ++ */ ++ unmap_mapping_range(mapping, 0, 0, 0); ++ truncate_inode_pages(mapping, 0); ++ filemap_invalidate_unlock(inode->i_mapping); ++ } + } + + return 0; +@@ -866,10 +880,7 @@ cleanup_all: + if (WARN_ON_ONCE(error > 0)) + error = -EINVAL; + fops_put(f->f_op); +- if (f->f_mode & FMODE_WRITER) { +- put_write_access(inode); +- __mnt_drop_write(f->f_path.mnt); +- } ++ put_file_access(f); + cleanup_file: + path_put(&f->f_path); + f->f_path.mnt = NULL; +diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c +index fe484cf93e5cd..8bbe9486e3a62 100644 +--- a/fs/orangefs/dcache.c ++++ b/fs/orangefs/dcache.c +@@ -26,8 +26,10 @@ static int orangefs_revalidate_lookup(struct dentry *dentry) + gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: attempting lookup.\n", __func__); + + new_op = op_alloc(ORANGEFS_VFS_OP_LOOKUP); +- if (!new_op) ++ if (!new_op) { ++ ret = -ENOMEM; + goto out_put_parent; ++ } + + new_op->upcall.req.lookup.sym_follow = ORANGEFS_LOOKUP_LINK_NO_FOLLOW; + new_op->upcall.req.lookup.parent_refn = parent->refn; +diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c +index 538e839590ef5..b501dc07f9222 100644 +--- a/fs/orangefs/orangefs-bufmap.c ++++ b/fs/orangefs/orangefs-bufmap.c +@@ -176,7 +176,7 @@ orangefs_bufmap_free(struct orangefs_bufmap *bufmap) + { + kfree(bufmap->page_array); + kfree(bufmap->desc_array); +- kfree(bufmap->buffer_index_array); ++ bitmap_free(bufmap->buffer_index_array); + kfree(bufmap); + } + +@@ -226,8 +226,7 @@ orangefs_bufmap_alloc(struct ORANGEFS_dev_map_desc *user_desc) + bufmap->desc_size = user_desc->size; + bufmap->desc_shift = ilog2(bufmap->desc_size); + +- bufmap->buffer_index_array = +- kzalloc(DIV_ROUND_UP(bufmap->desc_count, BITS_PER_LONG), GFP_KERNEL); ++ bufmap->buffer_index_array = bitmap_zalloc(bufmap->desc_count, GFP_KERNEL); + if (!bufmap->buffer_index_array) + goto out_free_bufmap; + +@@ -250,7 +249,7 @@ orangefs_bufmap_alloc(struct ORANGEFS_dev_map_desc *user_desc) + out_free_desc_array: + kfree(bufmap->desc_array); + out_free_index_array: +- kfree(bufmap->buffer_index_array); ++ bitmap_free(bufmap->buffer_index_array); + out_free_bufmap: + kfree(bufmap); + out: +diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c +index 29eaa45443727..1b508f5433846 100644 +--- a/fs/orangefs/orangefs-debugfs.c ++++ b/fs/orangefs/orangefs-debugfs.c +@@ -194,15 +194,10 @@ void orangefs_debugfs_init(int debug_mask) + */ + static void orangefs_kernel_debug_init(void) + { +- int rc = -ENOMEM; +- char *k_buffer = NULL; ++ static char k_buffer[ORANGEFS_MAX_DEBUG_STRING_LEN] = { }; + + gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: start\n", __func__); + +- k_buffer = kzalloc(ORANGEFS_MAX_DEBUG_STRING_LEN, GFP_KERNEL); +- if (!k_buffer) +- goto out; +- + if (strlen(kernel_debug_string) + 1 < ORANGEFS_MAX_DEBUG_STRING_LEN) { + strcpy(k_buffer, kernel_debug_string); + strcat(k_buffer, "\n"); +@@ -213,15 +208,14 @@ static void orangefs_kernel_debug_init(void) + + debugfs_create_file(ORANGEFS_KMOD_DEBUG_FILE, 0444, debug_dir, k_buffer, + &kernel_debug_fops); +- +-out: +- gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: rc:%d:\n", __func__, rc); + } + + + void orangefs_debugfs_cleanup(void) + { + debugfs_remove_recursive(debug_dir); ++ kfree(debug_help_string); ++ debug_help_string = NULL; + } + + /* open ORANGEFS_KMOD_DEBUG_HELP_FILE */ +@@ -297,18 +291,13 @@ static int help_show(struct seq_file *m, void *v) + /* + * initialize the client-debug file. + */ +-static int orangefs_client_debug_init(void) ++static void orangefs_client_debug_init(void) + { + +- int rc = -ENOMEM; +- char *c_buffer = NULL; ++ static char c_buffer[ORANGEFS_MAX_DEBUG_STRING_LEN] = { }; + + gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: start\n", __func__); + +- c_buffer = kzalloc(ORANGEFS_MAX_DEBUG_STRING_LEN, GFP_KERNEL); +- if (!c_buffer) +- goto out; +- + if (strlen(client_debug_string) + 1 < ORANGEFS_MAX_DEBUG_STRING_LEN) { + strcpy(c_buffer, client_debug_string); + strcat(c_buffer, "\n"); +@@ -322,13 +311,6 @@ static int orangefs_client_debug_init(void) + debug_dir, + c_buffer, + &kernel_debug_fops); +- +- rc = 0; +- +-out: +- +- gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: rc:%d:\n", __func__, rc); +- return rc; + } + + /* open ORANGEFS_KMOD_DEBUG_FILE or ORANGEFS_CLIENT_DEBUG_FILE.*/ +@@ -671,6 +653,7 @@ int orangefs_prepare_debugfs_help_string(int at_boot) + memset(debug_help_string, 0, DEBUG_HELP_STRING_SIZE); + strlcat(debug_help_string, new, string_size); + mutex_unlock(&orangefs_help_file_lock); ++ kfree(new); + } + + rc = 0; +diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c +index cd7297815f91e..5ab741c60b7e2 100644 +--- a/fs/orangefs/orangefs-mod.c ++++ b/fs/orangefs/orangefs-mod.c +@@ -141,7 +141,7 @@ static int __init orangefs_init(void) + gossip_err("%s: could not initialize device subsystem %d!\n", + __func__, + ret); +- goto cleanup_device; ++ goto cleanup_sysfs; + } + + ret = register_filesystem(&orangefs_fs_type); +@@ -152,11 +152,11 @@ static int __init orangefs_init(void) + goto out; + } + +- orangefs_sysfs_exit(); +- +-cleanup_device: + orangefs_dev_cleanup(); + ++cleanup_sysfs: ++ orangefs_sysfs_exit(); ++ + sysfs_init_failed: + orangefs_debugfs_cleanup(); + +diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c +index 4e7d5bfa2949f..ef0bf98b620d7 100644 +--- a/fs/overlayfs/copy_up.c ++++ b/fs/overlayfs/copy_up.c +@@ -140,12 +140,14 @@ static int ovl_copy_fileattr(struct inode *inode, struct path *old, + int err; + + err = ovl_real_fileattr_get(old, &oldfa); +- if (err) +- return err; +- +- err = ovl_real_fileattr_get(new, &newfa); +- if (err) ++ if (err) { ++ /* Ntfs-3g returns -EINVAL for "no fileattr support" */ ++ if (err == -ENOTTY || err == -EINVAL) ++ return 0; ++ pr_warn("failed to retrieve lower fileattr (%pd2, err=%i)\n", ++ old->dentry, err); + return err; ++ } + + /* + * We cannot set immutable and append-only flags on upper inode, +@@ -155,10 +157,31 @@ static int ovl_copy_fileattr(struct inode *inode, struct path *old, + */ + if (oldfa.flags & OVL_PROT_FS_FLAGS_MASK) { + err = ovl_set_protattr(inode, new->dentry, &oldfa); +- if (err) ++ if (err == -EPERM) ++ pr_warn_once("copying fileattr: no xattr on upper\n"); ++ else if (err) + return err; + } + ++ /* Don't bother copying flags if none are set */ ++ if (!(oldfa.flags & OVL_COPY_FS_FLAGS_MASK)) ++ return 0; ++ ++ err = ovl_real_fileattr_get(new, &newfa); ++ if (err) { ++ /* ++ * Returning an error if upper doesn't support fileattr will ++ * result in a regression, so revert to the old behavior. ++ */ ++ if (err == -ENOTTY || err == -EINVAL) { ++ pr_warn_once("copying fileattr: no support on upper\n"); ++ return 0; ++ } ++ pr_warn("failed to retrieve upper fileattr (%pd2, err=%i)\n", ++ new->dentry, err); ++ return err; ++ } ++ + BUILD_BUG_ON(OVL_COPY_FS_FLAGS_MASK & ~FS_COMMON_FL); + newfa.flags &= ~OVL_COPY_FS_FLAGS_MASK; + newfa.flags |= (oldfa.flags & OVL_COPY_FS_FLAGS_MASK); +@@ -937,6 +960,10 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, + if (err) + return err; + ++ if (!kuid_has_mapping(current_user_ns(), ctx.stat.uid) || ++ !kgid_has_mapping(current_user_ns(), ctx.stat.gid)) ++ return -EOVERFLOW; ++ + ctx.metacopy = ovl_need_meta_copy_up(dentry, ctx.stat.mode, flags); + + if (parent) { +diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c +index 93c7c267de934..eca984d6484d1 100644 +--- a/fs/overlayfs/dir.c ++++ b/fs/overlayfs/dir.c +@@ -137,8 +137,7 @@ kill_whiteout: + goto out; + } + +-static int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry, +- umode_t mode) ++int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry, umode_t mode) + { + int err; + struct dentry *d, *dentry = *newdentry; +@@ -590,28 +589,42 @@ static int ovl_create_or_link(struct dentry *dentry, struct inode *inode, + goto out_revert_creds; + } + +- err = -ENOMEM; +- override_cred = prepare_creds(); +- if (override_cred) { ++ if (!attr->hardlink) { ++ err = -ENOMEM; ++ override_cred = prepare_creds(); ++ if (!override_cred) ++ goto out_revert_creds; ++ /* ++ * In the creation cases(create, mkdir, mknod, symlink), ++ * ovl should transfer current's fs{u,g}id to underlying ++ * fs. Because underlying fs want to initialize its new ++ * inode owner using current's fs{u,g}id. And in this ++ * case, the @inode is a new inode that is initialized ++ * in inode_init_owner() to current's fs{u,g}id. So use ++ * the inode's i_{u,g}id to override the cred's fs{u,g}id. ++ * ++ * But in the other hardlink case, ovl_link() does not ++ * create a new inode, so just use the ovl mounter's ++ * fs{u,g}id. ++ */ + override_cred->fsuid = inode->i_uid; + override_cred->fsgid = inode->i_gid; +- if (!attr->hardlink) { +- err = security_dentry_create_files_as(dentry, +- attr->mode, &dentry->d_name, old_cred, +- override_cred); +- if (err) { +- put_cred(override_cred); +- goto out_revert_creds; +- } ++ err = security_dentry_create_files_as(dentry, ++ attr->mode, &dentry->d_name, old_cred, ++ override_cred); ++ if (err) { ++ put_cred(override_cred); ++ goto out_revert_creds; + } + put_cred(override_creds(override_cred)); + put_cred(override_cred); +- +- if (!ovl_dentry_is_whiteout(dentry)) +- err = ovl_create_upper(dentry, inode, attr); +- else +- err = ovl_create_over_whiteout(dentry, inode, attr); + } ++ ++ if (!ovl_dentry_is_whiteout(dentry)) ++ err = ovl_create_upper(dentry, inode, attr); ++ else ++ err = ovl_create_over_whiteout(dentry, inode, attr); ++ + out_revert_creds: + revert_creds(old_cred); + return err; +@@ -881,7 +894,6 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) + { + int err; + const struct cred *old_cred; +- struct dentry *upperdentry; + bool lower_positive = ovl_lower_positive(dentry); + LIST_HEAD(list); + +@@ -924,9 +936,8 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) + * Note: we fail to update ctime if there was no copy-up, only a + * whiteout + */ +- upperdentry = ovl_dentry_upper(dentry); +- if (upperdentry) +- ovl_copyattr(d_inode(upperdentry), d_inode(dentry)); ++ if (ovl_dentry_upper(dentry)) ++ ovl_copyattr(d_inode(dentry)); + + out_drop_write: + ovl_drop_write(dentry); +@@ -1273,9 +1284,9 @@ static int ovl_rename(struct user_namespace *mnt_userns, struct inode *olddir, + (d_inode(new) && ovl_type_origin(new))); + + /* copy ctime: */ +- ovl_copyattr(d_inode(olddentry), d_inode(old)); ++ ovl_copyattr(d_inode(old)); + if (d_inode(new) && ovl_dentry_upper(new)) +- ovl_copyattr(d_inode(newdentry), d_inode(new)); ++ ovl_copyattr(d_inode(new)); + + out_dput: + dput(newdentry); +diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c +index ebde05c9cf62e..0cc14ce8c7e83 100644 +--- a/fs/overlayfs/export.c ++++ b/fs/overlayfs/export.c +@@ -259,7 +259,7 @@ static int ovl_encode_fh(struct inode *inode, u32 *fid, int *max_len, + return FILEID_INVALID; + + dentry = d_find_any_alias(inode); +- if (WARN_ON(!dentry)) ++ if (!dentry) + return FILEID_INVALID; + + bytes = ovl_dentry_to_fid(ofs, dentry, fid, buflen); +@@ -791,7 +791,7 @@ static struct ovl_fh *ovl_fid_to_fh(struct fid *fid, int buflen, int fh_type) + return ERR_PTR(-ENOMEM); + + /* Copy unaligned inner fh into aligned buffer */ +- memcpy(&fh->fb, fid, buflen - OVL_FH_WIRE_OFFSET); ++ memcpy(fh->buf, fid, buflen - OVL_FH_WIRE_OFFSET); + return fh; + } + +diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c +index c88ac571593dc..28cb05ef018c7 100644 +--- a/fs/overlayfs/file.c ++++ b/fs/overlayfs/file.c +@@ -17,6 +17,7 @@ + + struct ovl_aio_req { + struct kiocb iocb; ++ refcount_t ref; + struct kiocb *orig_iocb; + struct fd fd; + }; +@@ -252,6 +253,14 @@ static rwf_t ovl_iocb_to_rwf(int ifl) + return flags; + } + ++static inline void ovl_aio_put(struct ovl_aio_req *aio_req) ++{ ++ if (refcount_dec_and_test(&aio_req->ref)) { ++ fdput(aio_req->fd); ++ kmem_cache_free(ovl_aio_request_cachep, aio_req); ++ } ++} ++ + static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req) + { + struct kiocb *iocb = &aio_req->iocb; +@@ -264,12 +273,11 @@ static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req) + __sb_writers_acquired(file_inode(iocb->ki_filp)->i_sb, + SB_FREEZE_WRITE); + file_end_write(iocb->ki_filp); +- ovl_copyattr(ovl_inode_real(inode), inode); ++ ovl_copyattr(inode); + } + + orig_iocb->ki_pos = iocb->ki_pos; +- fdput(aio_req->fd); +- kmem_cache_free(ovl_aio_request_cachep, aio_req); ++ ovl_aio_put(aio_req); + } + + static void ovl_aio_rw_complete(struct kiocb *iocb, long res, long res2) +@@ -319,7 +327,9 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) + aio_req->orig_iocb = iocb; + kiocb_clone(&aio_req->iocb, iocb, real.file); + aio_req->iocb.ki_complete = ovl_aio_rw_complete; ++ refcount_set(&aio_req->ref, 2); + ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter); ++ ovl_aio_put(aio_req); + if (ret != -EIOCBQUEUED) + ovl_aio_cleanup_handler(aio_req); + } +@@ -346,7 +356,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) + + inode_lock(inode); + /* Update mode */ +- ovl_copyattr(ovl_inode_real(inode), inode); ++ ovl_copyattr(inode); + ret = file_remove_privs(file); + if (ret) + goto out_unlock; +@@ -371,7 +381,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) + ovl_iocb_to_rwf(ifl)); + file_end_write(real.file); + /* Update size */ +- ovl_copyattr(ovl_inode_real(inode), inode); ++ ovl_copyattr(inode); + } else { + struct ovl_aio_req *aio_req; + +@@ -390,7 +400,9 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) + kiocb_clone(&aio_req->iocb, iocb, real.file); + aio_req->iocb.ki_flags = ifl; + aio_req->iocb.ki_complete = ovl_aio_rw_complete; ++ refcount_set(&aio_req->ref, 2); + ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter); ++ ovl_aio_put(aio_req); + if (ret != -EIOCBQUEUED) + ovl_aio_cleanup_handler(aio_req); + } +@@ -419,12 +431,11 @@ static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out, + struct fd real; + const struct cred *old_cred; + struct inode *inode = file_inode(out); +- struct inode *realinode = ovl_inode_real(inode); + ssize_t ret; + + inode_lock(inode); + /* Update mode */ +- ovl_copyattr(realinode, inode); ++ ovl_copyattr(inode); + ret = file_remove_privs(out); + if (ret) + goto out_unlock; +@@ -440,7 +451,7 @@ static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out, + + file_end_write(real.file); + /* Update size */ +- ovl_copyattr(realinode, inode); ++ ovl_copyattr(inode); + revert_creds(old_cred); + fdput(real); + +@@ -505,19 +516,29 @@ static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len + const struct cred *old_cred; + int ret; + ++ inode_lock(inode); ++ /* Update mode */ ++ ovl_copyattr(inode); ++ ret = file_remove_privs(file); ++ if (ret) ++ goto out_unlock; ++ + ret = ovl_real_fdget(file, &real); + if (ret) +- return ret; ++ goto out_unlock; + + old_cred = ovl_override_creds(file_inode(file)->i_sb); + ret = vfs_fallocate(real.file, mode, offset, len); + revert_creds(old_cred); + + /* Update size */ +- ovl_copyattr(ovl_inode_real(inode), inode); ++ ovl_copyattr(inode); + + fdput(real); + ++out_unlock: ++ inode_unlock(inode); ++ + return ret; + } + +@@ -555,14 +576,23 @@ static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in, + const struct cred *old_cred; + loff_t ret; + ++ inode_lock(inode_out); ++ if (op != OVL_DEDUPE) { ++ /* Update mode */ ++ ovl_copyattr(inode_out); ++ ret = file_remove_privs(file_out); ++ if (ret) ++ goto out_unlock; ++ } ++ + ret = ovl_real_fdget(file_out, &real_out); + if (ret) +- return ret; ++ goto out_unlock; + + ret = ovl_real_fdget(file_in, &real_in); + if (ret) { + fdput(real_out); +- return ret; ++ goto out_unlock; + } + + old_cred = ovl_override_creds(file_inode(file_out)->i_sb); +@@ -586,11 +616,14 @@ static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in, + revert_creds(old_cred); + + /* Update size */ +- ovl_copyattr(ovl_inode_real(inode_out), inode_out); ++ ovl_copyattr(inode_out); + + fdput(real_in); + fdput(real_out); + ++out_unlock: ++ inode_unlock(inode_out); ++ + return ret; + } + +diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c +index 832b17589733a..d41f0c8e0e2a5 100644 +--- a/fs/overlayfs/inode.c ++++ b/fs/overlayfs/inode.c +@@ -80,7 +80,7 @@ int ovl_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, + err = notify_change(&init_user_ns, upperdentry, attr, NULL); + revert_creds(old_cred); + if (!err) +- ovl_copyattr(upperdentry->d_inode, dentry->d_inode); ++ ovl_copyattr(dentry->d_inode); + inode_unlock(upperdentry->d_inode); + + if (winode) +@@ -377,7 +377,7 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, + revert_creds(old_cred); + + /* copy c/mtime */ +- ovl_copyattr(d_inode(realdentry), inode); ++ ovl_copyattr(inode); + + out_drop_write: + ovl_drop_write(dentry); +@@ -579,7 +579,7 @@ int ovl_fileattr_set(struct user_namespace *mnt_userns, + inode_set_flags(inode, flags, OVL_COPY_I_FLAGS_MASK); + + /* Update ctime */ +- ovl_copyattr(ovl_inode_real(inode), inode); ++ ovl_copyattr(inode); + } + ovl_drop_write(dentry); + out: +@@ -610,7 +610,10 @@ int ovl_real_fileattr_get(struct path *realpath, struct fileattr *fa) + if (err) + return err; + +- return vfs_fileattr_get(realpath->dentry, fa); ++ err = vfs_fileattr_get(realpath->dentry, fa); ++ if (err == -ENOIOCTLCMD) ++ err = -ENOTTY; ++ return err; + } + + int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa) +@@ -774,16 +777,19 @@ void ovl_inode_init(struct inode *inode, struct ovl_inode_params *oip, + unsigned long ino, int fsid) + { + struct inode *realinode; ++ struct ovl_inode *oi = OVL_I(inode); + + if (oip->upperdentry) +- OVL_I(inode)->__upperdentry = oip->upperdentry; +- if (oip->lowerpath && oip->lowerpath->dentry) +- OVL_I(inode)->lower = igrab(d_inode(oip->lowerpath->dentry)); ++ oi->__upperdentry = oip->upperdentry; ++ if (oip->lowerpath && oip->lowerpath->dentry) { ++ oi->lowerpath.dentry = dget(oip->lowerpath->dentry); ++ oi->lowerpath.layer = oip->lowerpath->layer; ++ } + if (oip->lowerdata) +- OVL_I(inode)->lowerdata = igrab(d_inode(oip->lowerdata)); ++ oi->lowerdata = igrab(d_inode(oip->lowerdata)); + + realinode = ovl_inode_real(inode); +- ovl_copyattr(realinode, inode); ++ ovl_copyattr(inode); + ovl_copyflags(realinode, inode); + ovl_map_ino(inode, ino, fsid); + } +diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h +index 3894f33479552..ae4876da2ced2 100644 +--- a/fs/overlayfs/overlayfs.h ++++ b/fs/overlayfs/overlayfs.h +@@ -107,7 +107,7 @@ struct ovl_fh { + u8 padding[3]; /* make sure fb.fid is 32bit aligned */ + union { + struct ovl_fb fb; +- u8 buf[0]; ++ DECLARE_FLEX_ARRAY(u8, buf); + }; + } __packed; + +@@ -293,10 +293,12 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry); + void ovl_path_upper(struct dentry *dentry, struct path *path); + void ovl_path_lower(struct dentry *dentry, struct path *path); + void ovl_path_lowerdata(struct dentry *dentry, struct path *path); ++void ovl_i_path_real(struct inode *inode, struct path *path); + enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path); + struct dentry *ovl_dentry_upper(struct dentry *dentry); + struct dentry *ovl_dentry_lower(struct dentry *dentry); + struct dentry *ovl_dentry_lowerdata(struct dentry *dentry); ++const struct ovl_layer *ovl_i_layer_lower(struct inode *inode); + const struct ovl_layer *ovl_layer_lower(struct dentry *dentry); + struct dentry *ovl_dentry_real(struct dentry *dentry); + struct dentry *ovl_i_dentry_upper(struct inode *inode); +@@ -520,16 +522,7 @@ bool ovl_lookup_trap_inode(struct super_block *sb, struct dentry *dir); + struct inode *ovl_get_trap_inode(struct super_block *sb, struct dentry *dir); + struct inode *ovl_get_inode(struct super_block *sb, + struct ovl_inode_params *oip); +-static inline void ovl_copyattr(struct inode *from, struct inode *to) +-{ +- to->i_uid = from->i_uid; +- to->i_gid = from->i_gid; +- to->i_mode = from->i_mode; +- to->i_atime = from->i_atime; +- to->i_mtime = from->i_mtime; +- to->i_ctime = from->i_ctime; +- i_size_write(to, i_size_read(from)); +-} ++void ovl_copyattr(struct inode *to); + + /* vfs inode flags copied from real to ovl inode */ + #define OVL_COPY_I_FLAGS_MASK (S_SYNC | S_NOATIME | S_APPEND | S_IMMUTABLE) +@@ -570,6 +563,7 @@ struct ovl_cattr { + + #define OVL_CATTR(m) (&(struct ovl_cattr) { .mode = (m) }) + ++int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry, umode_t mode); + struct dentry *ovl_create_real(struct inode *dir, struct dentry *newdentry, + struct ovl_cattr *attr); + int ovl_cleanup(struct inode *dir, struct dentry *dentry); +diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h +index 63efee554f69a..b2d64f3c974bb 100644 +--- a/fs/overlayfs/ovl_entry.h ++++ b/fs/overlayfs/ovl_entry.h +@@ -129,7 +129,7 @@ struct ovl_inode { + unsigned long flags; + struct inode vfs_inode; + struct dentry *__upperdentry; +- struct inode *lower; ++ struct ovl_path lowerpath; + + /* synchronize copy up and more */ + struct mutex lock; +diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c +index 178daa5e82c9d..b3675d13c1ac2 100644 +--- a/fs/overlayfs/super.c ++++ b/fs/overlayfs/super.c +@@ -138,11 +138,16 @@ static int ovl_dentry_revalidate_common(struct dentry *dentry, + unsigned int flags, bool weak) + { + struct ovl_entry *oe = dentry->d_fsdata; ++ struct inode *inode = d_inode_rcu(dentry); + struct dentry *upper; + unsigned int i; + int ret = 1; + +- upper = ovl_dentry_upper(dentry); ++ /* Careful in RCU mode */ ++ if (!inode) ++ return -ECHILD; ++ ++ upper = ovl_i_dentry_upper(inode); + if (upper) + ret = ovl_revalidate_real(upper, flags, weak); + +@@ -184,7 +189,8 @@ static struct inode *ovl_alloc_inode(struct super_block *sb) + oi->version = 0; + oi->flags = 0; + oi->__upperdentry = NULL; +- oi->lower = NULL; ++ oi->lowerpath.dentry = NULL; ++ oi->lowerpath.layer = NULL; + oi->lowerdata = NULL; + mutex_init(&oi->lock); + +@@ -205,7 +211,7 @@ static void ovl_destroy_inode(struct inode *inode) + struct ovl_inode *oi = OVL_I(inode); + + dput(oi->__upperdentry); +- iput(oi->lower); ++ dput(oi->lowerpath.dentry); + if (S_ISDIR(inode->i_mode)) + ovl_dir_cache_free(inode); + else +@@ -787,10 +793,14 @@ retry: + goto retry; + } + +- work = ovl_create_real(dir, work, OVL_CATTR(attr.ia_mode)); +- err = PTR_ERR(work); +- if (IS_ERR(work)) +- goto out_err; ++ err = ovl_mkdir_real(dir, &work, attr.ia_mode); ++ if (err) ++ goto out_dput; ++ ++ /* Weird filesystem returning with hashed negative (kernfs)? */ ++ err = -EINVAL; ++ if (d_really_is_negative(work)) ++ goto out_dput; + + /* + * Try to remove POSIX ACL xattrs from workdir. We are good if: +@@ -869,7 +879,7 @@ static int ovl_mount_dir_noesc(const char *name, struct path *path) + pr_err("filesystem on '%s' not supported\n", name); + goto out_put; + } +- if (mnt_user_ns(path->mnt) != &init_user_ns) { ++ if (is_idmapped_mnt(path->mnt)) { + pr_err("idmapped layers are currently not supported\n"); + goto out_put; + } +@@ -1409,11 +1419,12 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, + */ + err = ovl_do_setxattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE, "0", 1); + if (err) { ++ pr_warn("failed to set xattr on upper\n"); + ofs->noxattr = true; + if (ofs->config.index || ofs->config.metacopy) { + ofs->config.index = false; + ofs->config.metacopy = false; +- pr_warn("upper fs does not support xattr, falling back to index=off,metacopy=off.\n"); ++ pr_warn("...falling back to index=off,metacopy=off.\n"); + } + /* + * xattr support is required for persistent st_ino. +@@ -1421,8 +1432,10 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, + */ + if (ofs->config.xino == OVL_XINO_AUTO) { + ofs->config.xino = OVL_XINO_OFF; +- pr_warn("upper fs does not support xattr, falling back to xino=off.\n"); ++ pr_warn("...falling back to xino=off.\n"); + } ++ if (err == -EPERM && !ofs->config.userxattr) ++ pr_info("try mounting with 'userxattr' option\n"); + err = 0; + } else { + ovl_do_removexattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE); +diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c +index f48284a2a8960..9d33ce385bef0 100644 +--- a/fs/overlayfs/util.c ++++ b/fs/overlayfs/util.c +@@ -236,6 +236,17 @@ struct dentry *ovl_i_dentry_upper(struct inode *inode) + return ovl_upperdentry_dereference(OVL_I(inode)); + } + ++void ovl_i_path_real(struct inode *inode, struct path *path) ++{ ++ path->dentry = ovl_i_dentry_upper(inode); ++ if (!path->dentry) { ++ path->dentry = OVL_I(inode)->lowerpath.dentry; ++ path->mnt = OVL_I(inode)->lowerpath.layer->mnt; ++ } else { ++ path->mnt = ovl_upper_mnt(OVL_FS(inode->i_sb)); ++ } ++} ++ + struct inode *ovl_inode_upper(struct inode *inode) + { + struct dentry *upperdentry = ovl_i_dentry_upper(inode); +@@ -245,7 +256,9 @@ struct inode *ovl_inode_upper(struct inode *inode) + + struct inode *ovl_inode_lower(struct inode *inode) + { +- return OVL_I(inode)->lower; ++ struct dentry *lowerdentry = OVL_I(inode)->lowerpath.dentry; ++ ++ return lowerdentry ? d_inode(lowerdentry) : NULL; + } + + struct inode *ovl_inode_real(struct inode *inode) +@@ -443,7 +456,7 @@ static void ovl_dir_version_inc(struct dentry *dentry, bool impurity) + void ovl_dir_modified(struct dentry *dentry, bool impurity) + { + /* Copy mtime/ctime */ +- ovl_copyattr(d_inode(ovl_dentry_upper(dentry)), d_inode(dentry)); ++ ovl_copyattr(d_inode(dentry)); + + ovl_dir_version_inc(dentry, impurity); + } +@@ -1060,3 +1073,33 @@ int ovl_sync_status(struct ovl_fs *ofs) + + return errseq_check(&mnt->mnt_sb->s_wb_err, ofs->errseq); + } ++ ++/* ++ * ovl_copyattr() - copy inode attributes from layer to ovl inode ++ * ++ * When overlay copies inode information from an upper or lower layer to the ++ * relevant overlay inode it will apply the idmapping of the upper or lower ++ * layer when doing so ensuring that the ovl inode ownership will correctly ++ * reflect the ownership of the idmapped upper or lower layer. For example, an ++ * idmapped upper or lower layer mapping id 1001 to id 1000 will take care to ++ * map any lower or upper inode owned by id 1001 to id 1000. These mapping ++ * helpers are nops when the relevant layer isn't idmapped. ++ */ ++void ovl_copyattr(struct inode *inode) ++{ ++ struct path realpath; ++ struct inode *realinode; ++ struct user_namespace *real_mnt_userns; ++ ++ ovl_i_path_real(inode, &realpath); ++ realinode = d_inode(realpath.dentry); ++ real_mnt_userns = mnt_user_ns(realpath.mnt); ++ ++ inode->i_uid = i_uid_into_mnt(real_mnt_userns, realinode); ++ inode->i_gid = i_gid_into_mnt(real_mnt_userns, realinode); ++ inode->i_mode = realinode->i_mode; ++ inode->i_atime = realinode->i_atime; ++ inode->i_mtime = realinode->i_mtime; ++ inode->i_ctime = realinode->i_ctime; ++ i_size_write(inode, i_size_read(realinode)); ++} +diff --git a/fs/pipe.c b/fs/pipe.c +index 6d4342bad9f15..e08f0fe55584b 100644 +--- a/fs/pipe.c ++++ b/fs/pipe.c +@@ -252,7 +252,8 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) + */ + was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage); + for (;;) { +- unsigned int head = pipe->head; ++ /* Read ->head with a barrier vs post_one_notification() */ ++ unsigned int head = smp_load_acquire(&pipe->head); + unsigned int tail = pipe->tail; + unsigned int mask = pipe->ring_size - 1; + +@@ -651,7 +652,7 @@ pipe_poll(struct file *filp, poll_table *wait) + unsigned int head, tail; + + /* Epoll has some historical nasty semantics, this enables them */ +- pipe->poll_usage = 1; ++ WRITE_ONCE(pipe->poll_usage, true); + + /* + * Reading pipe state only -- no need for acquiring the semaphore. +@@ -830,10 +831,8 @@ void free_pipe_info(struct pipe_inode_info *pipe) + int i; + + #ifdef CONFIG_WATCH_QUEUE +- if (pipe->watch_queue) { ++ if (pipe->watch_queue) + watch_queue_clear(pipe->watch_queue); +- put_watch_queue(pipe->watch_queue); +- } + #endif + + (void) account_pipe_buffers(pipe->user, pipe->nr_accounted, 0); +@@ -843,6 +842,10 @@ void free_pipe_info(struct pipe_inode_info *pipe) + if (buf->ops) + pipe_buf_release(pipe, buf); + } ++#ifdef CONFIG_WATCH_QUEUE ++ if (pipe->watch_queue) ++ put_watch_queue(pipe->watch_queue); ++#endif + if (pipe->tmp_page) + __free_page(pipe->tmp_page); + kfree(pipe->bufs); +@@ -1241,30 +1244,33 @@ unsigned int round_pipe_size(unsigned long size) + + /* + * Resize the pipe ring to a number of slots. ++ * ++ * Note the pipe can be reduced in capacity, but only if the current ++ * occupancy doesn't exceed nr_slots; if it does, EBUSY will be ++ * returned instead. + */ + int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots) + { + struct pipe_buffer *bufs; + unsigned int head, tail, mask, n; + +- /* +- * We can shrink the pipe, if arg is greater than the ring occupancy. +- * Since we don't expect a lot of shrink+grow operations, just free and +- * allocate again like we would do for growing. If the pipe currently +- * contains more buffers than arg, then return busy. +- */ +- mask = pipe->ring_size - 1; +- head = pipe->head; +- tail = pipe->tail; +- n = pipe_occupancy(pipe->head, pipe->tail); +- if (nr_slots < n) +- return -EBUSY; +- + bufs = kcalloc(nr_slots, sizeof(*bufs), + GFP_KERNEL_ACCOUNT | __GFP_NOWARN); + if (unlikely(!bufs)) + return -ENOMEM; + ++ spin_lock_irq(&pipe->rd_wait.lock); ++ mask = pipe->ring_size - 1; ++ head = pipe->head; ++ tail = pipe->tail; ++ ++ n = pipe_occupancy(head, tail); ++ if (nr_slots < n) { ++ spin_unlock_irq(&pipe->rd_wait.lock); ++ kfree(bufs); ++ return -EBUSY; ++ } ++ + /* + * The pipe array wraps around, so just start the new one at zero + * and adjust the indices. +@@ -1296,6 +1302,8 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots) + pipe->tail = tail; + pipe->head = head; + ++ spin_unlock_irq(&pipe->rd_wait.lock); ++ + /* This might have made more room for writers */ + wake_up_interruptible(&pipe->wr_wait); + return 0; +diff --git a/fs/pnode.c b/fs/pnode.c +index 1106137c747a3..468e4e65a615d 100644 +--- a/fs/pnode.c ++++ b/fs/pnode.c +@@ -244,7 +244,7 @@ static int propagate_one(struct mount *m) + } + do { + struct mount *parent = last_source->mnt_parent; +- if (last_source == first_source) ++ if (peers(last_source, first_source)) + break; + done = parent->mnt_master == p; + if (done && peers(n, parent)) +diff --git a/fs/posix_acl.c b/fs/posix_acl.c +index f5c25f580dd92..ceb1e3b868577 100644 +--- a/fs/posix_acl.c ++++ b/fs/posix_acl.c +@@ -23,6 +23,7 @@ + #include <linux/export.h> + #include <linux/user_namespace.h> + #include <linux/namei.h> ++#include <linux/mnt_idmapping.h> + + static struct posix_acl **acl_by_type(struct inode *inode, int type) + { +@@ -375,7 +376,9 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode, + goto check_perm; + break; + case ACL_USER: +- uid = kuid_into_mnt(mnt_userns, pa->e_uid); ++ uid = mapped_kuid_fs(mnt_userns, ++ i_user_ns(inode), ++ pa->e_uid); + if (uid_eq(uid, current_fsuid())) + goto mask; + break; +@@ -388,7 +391,9 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode, + } + break; + case ACL_GROUP: +- gid = kgid_into_mnt(mnt_userns, pa->e_gid); ++ gid = mapped_kgid_fs(mnt_userns, ++ i_user_ns(inode), ++ pa->e_gid); + if (in_group_p(gid)) { + found = 1; + if ((pa->e_perm & want) == want) +@@ -735,17 +740,17 @@ static void posix_acl_fix_xattr_userns( + case ACL_USER: + uid = make_kuid(from, le32_to_cpu(entry->e_id)); + if (from_user) +- uid = kuid_from_mnt(mnt_userns, uid); ++ uid = mapped_kuid_user(mnt_userns, &init_user_ns, uid); + else +- uid = kuid_into_mnt(mnt_userns, uid); ++ uid = mapped_kuid_fs(mnt_userns, &init_user_ns, uid); + entry->e_id = cpu_to_le32(from_kuid(to, uid)); + break; + case ACL_GROUP: + gid = make_kgid(from, le32_to_cpu(entry->e_id)); + if (from_user) +- gid = kgid_from_mnt(mnt_userns, gid); ++ gid = mapped_kgid_user(mnt_userns, &init_user_ns, gid); + else +- gid = kgid_into_mnt(mnt_userns, gid); ++ gid = mapped_kgid_fs(mnt_userns, &init_user_ns, gid); + entry->e_id = cpu_to_le32(from_kgid(to, gid)); + break; + default: +@@ -755,9 +760,14 @@ static void posix_acl_fix_xattr_userns( + } + + void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns, ++ struct inode *inode, + void *value, size_t size) + { + struct user_namespace *user_ns = current_user_ns(); ++ ++ /* Leave ids untouched on non-idmapped mounts. */ ++ if (no_idmapping(mnt_userns, i_user_ns(inode))) ++ mnt_userns = &init_user_ns; + if ((user_ns == &init_user_ns) && (mnt_userns == &init_user_ns)) + return; + posix_acl_fix_xattr_userns(&init_user_ns, user_ns, mnt_userns, value, +@@ -765,9 +775,14 @@ void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns, + } + + void posix_acl_fix_xattr_to_user(struct user_namespace *mnt_userns, ++ struct inode *inode, + void *value, size_t size) + { + struct user_namespace *user_ns = current_user_ns(); ++ ++ /* Leave ids untouched on non-idmapped mounts. */ ++ if (no_idmapping(mnt_userns, i_user_ns(inode))) ++ mnt_userns = &init_user_ns; + if ((user_ns == &init_user_ns) && (mnt_userns == &init_user_ns)) + return; + posix_acl_fix_xattr_userns(user_ns, &init_user_ns, mnt_userns, value, +diff --git a/fs/proc/base.c b/fs/proc/base.c +index 533d5836eb9a4..300d53ee7040c 100644 +--- a/fs/proc/base.c ++++ b/fs/proc/base.c +@@ -67,6 +67,7 @@ + #include <linux/mm.h> + #include <linux/swap.h> + #include <linux/rcupdate.h> ++#include <linux/kallsyms.h> + #include <linux/stacktrace.h> + #include <linux/resource.h> + #include <linux/module.h> +@@ -386,17 +387,19 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) + { + unsigned long wchan; ++ char symname[KSYM_NAME_LEN]; + +- if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) +- wchan = get_wchan(task); +- else +- wchan = 0; ++ if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) ++ goto print0; + +- if (wchan) +- seq_printf(m, "%ps", (void *) wchan); +- else +- seq_putc(m, '0'); ++ wchan = get_wchan(task); ++ if (wchan && !lookup_symbol_name(wchan, symname)) { ++ seq_puts(m, symname); ++ return 0; ++ } + ++print0: ++ seq_putc(m, '0'); + return 0; + } + #endif /* CONFIG_KALLSYMS */ +@@ -1883,7 +1886,7 @@ void proc_pid_evict_inode(struct proc_inode *ei) + put_pid(pid); + } + +-struct inode *proc_pid_make_inode(struct super_block * sb, ++struct inode *proc_pid_make_inode(struct super_block *sb, + struct task_struct *task, umode_t mode) + { + struct inode * inode; +@@ -1912,11 +1915,6 @@ struct inode *proc_pid_make_inode(struct super_block * sb, + + /* Let the pid remember us for quick removal */ + ei->pid = pid; +- if (S_ISDIR(mode)) { +- spin_lock(&pid->lock); +- hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes); +- spin_unlock(&pid->lock); +- } + + task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); + security_task_to_inode(task, inode); +@@ -1929,6 +1927,39 @@ out_unlock: + return NULL; + } + ++/* ++ * Generating an inode and adding it into @pid->inodes, so that task will ++ * invalidate inode's dentry before being released. ++ * ++ * This helper is used for creating dir-type entries under '/proc' and ++ * '/proc/<tgid>/task'. Other entries(eg. fd, stat) under '/proc/<tgid>' ++ * can be released by invalidating '/proc/<tgid>' dentry. ++ * In theory, dentries under '/proc/<tgid>/task' can also be released by ++ * invalidating '/proc/<tgid>' dentry, we reserve it to handle single ++ * thread exiting situation: Any one of threads should invalidate its ++ * '/proc/<tgid>/task/<pid>' dentry before released. ++ */ ++static struct inode *proc_pid_make_base_inode(struct super_block *sb, ++ struct task_struct *task, umode_t mode) ++{ ++ struct inode *inode; ++ struct proc_inode *ei; ++ struct pid *pid; ++ ++ inode = proc_pid_make_inode(sb, task, mode); ++ if (!inode) ++ return NULL; ++ ++ /* Let proc_flush_pid find this directory inode */ ++ ei = PROC_I(inode); ++ pid = ei->pid; ++ spin_lock(&pid->lock); ++ hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes); ++ spin_unlock(&pid->lock); ++ ++ return inode; ++} ++ + int pid_getattr(struct user_namespace *mnt_userns, const struct path *path, + struct kstat *stat, u32 request_mask, unsigned int query_flags) + { +@@ -3346,7 +3377,8 @@ static struct dentry *proc_pid_instantiate(struct dentry * dentry, + { + struct inode *inode; + +- inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); ++ inode = proc_pid_make_base_inode(dentry->d_sb, task, ++ S_IFDIR | S_IRUGO | S_IXUGO); + if (!inode) + return ERR_PTR(-ENOENT); + +@@ -3645,7 +3677,8 @@ static struct dentry *proc_task_instantiate(struct dentry *dentry, + struct task_struct *task, const void *ptr) + { + struct inode *inode; +- inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); ++ inode = proc_pid_make_base_inode(dentry->d_sb, task, ++ S_IFDIR | S_IRUGO | S_IXUGO); + if (!inode) + return ERR_PTR(-ENOENT); + +diff --git a/fs/proc/bootconfig.c b/fs/proc/bootconfig.c +index 6d8d4bf208377..2e244ada1f970 100644 +--- a/fs/proc/bootconfig.c ++++ b/fs/proc/bootconfig.c +@@ -32,6 +32,8 @@ static int __init copy_xbc_key_value_list(char *dst, size_t size) + int ret = 0; + + key = kzalloc(XBC_KEYLEN_MAX, GFP_KERNEL); ++ if (!key) ++ return -ENOMEM; + + xbc_for_each_key_value(leaf, val) { + ret = xbc_node_compose_key(leaf, key, XBC_KEYLEN_MAX); +diff --git a/fs/proc/fd.c b/fs/proc/fd.c +index 172c86270b312..913bef0d2a36c 100644 +--- a/fs/proc/fd.c ++++ b/fs/proc/fd.c +@@ -72,7 +72,7 @@ out: + return 0; + } + +-static int seq_fdinfo_open(struct inode *inode, struct file *file) ++static int proc_fdinfo_access_allowed(struct inode *inode) + { + bool allowed = false; + struct task_struct *task = get_proc_task(inode); +@@ -86,6 +86,16 @@ static int seq_fdinfo_open(struct inode *inode, struct file *file) + if (!allowed) + return -EACCES; + ++ return 0; ++} ++ ++static int seq_fdinfo_open(struct inode *inode, struct file *file) ++{ ++ int ret = proc_fdinfo_access_allowed(inode); ++ ++ if (ret) ++ return ret; ++ + return single_open(file, seq_show, inode); + } + +@@ -348,12 +358,23 @@ static int proc_readfdinfo(struct file *file, struct dir_context *ctx) + proc_fdinfo_instantiate); + } + ++static int proc_open_fdinfo(struct inode *inode, struct file *file) ++{ ++ int ret = proc_fdinfo_access_allowed(inode); ++ ++ if (ret) ++ return ret; ++ ++ return 0; ++} ++ + const struct inode_operations proc_fdinfo_inode_operations = { + .lookup = proc_lookupfdinfo, + .setattr = proc_setattr, + }; + + const struct file_operations proc_fdinfo_operations = { ++ .open = proc_open_fdinfo, + .read = generic_read_dir, + .iterate_shared = proc_readfdinfo, + .llseek = generic_file_llseek, +diff --git a/fs/proc/generic.c b/fs/proc/generic.c +index 5b78739e60e40..d32f69aaaa36f 100644 +--- a/fs/proc/generic.c ++++ b/fs/proc/generic.c +@@ -448,6 +448,9 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, + proc_set_user(ent, (*parent)->uid, (*parent)->gid); + + ent->proc_dops = &proc_misc_dentry_ops; ++ /* Revalidate everything under /proc/${pid}/net */ ++ if ((*parent)->proc_dops == &proc_net_dentry_ops) ++ pde_force_lookup(ent); + + out: + return ent; +diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c +index 15c2e55d2ed2c..123e3c9d8674b 100644 +--- a/fs/proc/proc_net.c ++++ b/fs/proc/proc_net.c +@@ -363,6 +363,9 @@ static __net_init int proc_net_ns_init(struct net *net) + + proc_set_user(netd, uid, gid); + ++ /* Seed dentry revalidation for /proc/${pid}/net */ ++ pde_force_lookup(netd); ++ + err = -EEXIST; + net_statd = proc_net_mkdir(net, "stat", netd); + if (!net_statd) +diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c +index 5d66faecd4ef0..0b7a00ed6c49b 100644 +--- a/fs/proc/proc_sysctl.c ++++ b/fs/proc/proc_sysctl.c +@@ -16,6 +16,7 @@ + #include <linux/module.h> + #include <linux/bpf-cgroup.h> + #include <linux/mount.h> ++#include <linux/kmemleak.h> + #include "internal.h" + + static const struct dentry_operations proc_sys_dentry_operations; +@@ -25,7 +26,7 @@ static const struct file_operations proc_sys_dir_file_operations; + static const struct inode_operations proc_sys_dir_operations; + + /* shared constants to be used in various sysctls */ +-const int sysctl_vals[] = { 0, 1, INT_MAX }; ++const int sysctl_vals[] = { -1, 0, 1, 2, 4, 100, 200, 1000, 3000, INT_MAX }; + EXPORT_SYMBOL(sysctl_vals); + + /* Support for permanently empty directories */ +@@ -1384,6 +1385,38 @@ struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *tab + } + EXPORT_SYMBOL(register_sysctl); + ++/** ++ * __register_sysctl_init() - register sysctl table to path ++ * @path: path name for sysctl base ++ * @table: This is the sysctl table that needs to be registered to the path ++ * @table_name: The name of sysctl table, only used for log printing when ++ * registration fails ++ * ++ * The sysctl interface is used by userspace to query or modify at runtime ++ * a predefined value set on a variable. These variables however have default ++ * values pre-set. Code which depends on these variables will always work even ++ * if register_sysctl() fails. If register_sysctl() fails you'd just loose the ++ * ability to query or modify the sysctls dynamically at run time. Chances of ++ * register_sysctl() failing on init are extremely low, and so for both reasons ++ * this function does not return any error as it is used by initialization code. ++ * ++ * Context: Can only be called after your respective sysctl base path has been ++ * registered. So for instance, most base directories are registered early on ++ * init before init levels are processed through proc_sys_init() and ++ * sysctl_init(). ++ */ ++void __init __register_sysctl_init(const char *path, struct ctl_table *table, ++ const char *table_name) ++{ ++ struct ctl_table_header *hdr = register_sysctl(path, table); ++ ++ if (unlikely(!hdr)) { ++ pr_err("failed when register_sysctl %s to %s\n", table_name, path); ++ return; ++ } ++ kmemleak_not_leak(hdr); ++} ++ + static char *append_path(const char *path, char *pos, const char *name) + { + int namelen; +diff --git a/fs/proc/stat.c b/fs/proc/stat.c +index 6561a06ef9059..4fb8729a68d4e 100644 +--- a/fs/proc/stat.c ++++ b/fs/proc/stat.c +@@ -24,7 +24,7 @@ + + #ifdef arch_idle_time + +-static u64 get_idle_time(struct kernel_cpustat *kcs, int cpu) ++u64 get_idle_time(struct kernel_cpustat *kcs, int cpu) + { + u64 idle; + +@@ -46,7 +46,7 @@ static u64 get_iowait_time(struct kernel_cpustat *kcs, int cpu) + + #else + +-static u64 get_idle_time(struct kernel_cpustat *kcs, int cpu) ++u64 get_idle_time(struct kernel_cpustat *kcs, int cpu) + { + u64 idle, idle_usecs = -1ULL; + +diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c +index cf25be3e03212..705a41f4d6b36 100644 +--- a/fs/proc/task_mmu.c ++++ b/fs/proc/task_mmu.c +@@ -430,7 +430,8 @@ static void smaps_page_accumulate(struct mem_size_stats *mss, + } + + static void smaps_account(struct mem_size_stats *mss, struct page *page, +- bool compound, bool young, bool dirty, bool locked) ++ bool compound, bool young, bool dirty, bool locked, ++ bool migration) + { + int i, nr = compound ? compound_nr(page) : 1; + unsigned long size = nr * PAGE_SIZE; +@@ -457,8 +458,15 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page, + * page_count(page) == 1 guarantees the page is mapped exactly once. + * If any subpage of the compound page mapped with PTE it would elevate + * page_count(). ++ * ++ * The page_mapcount() is called to get a snapshot of the mapcount. ++ * Without holding the page lock this snapshot can be slightly wrong as ++ * we cannot always read the mapcount atomically. It is not safe to ++ * call page_mapcount() even with PTL held if the page is not mapped, ++ * especially for migration entries. Treat regular migration entries ++ * as mapcount == 1. + */ +- if (page_count(page) == 1) { ++ if ((page_count(page) == 1) || migration) { + smaps_page_accumulate(mss, page, size, size << PSS_SHIFT, dirty, + locked, true); + return; +@@ -495,9 +503,12 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, + struct vm_area_struct *vma = walk->vma; + bool locked = !!(vma->vm_flags & VM_LOCKED); + struct page *page = NULL; ++ bool migration = false, young = false, dirty = false; + + if (pte_present(*pte)) { + page = vm_normal_page(vma, addr, *pte); ++ young = pte_young(*pte); ++ dirty = pte_dirty(*pte); + } else if (is_swap_pte(*pte)) { + swp_entry_t swpent = pte_to_swp_entry(*pte); + +@@ -514,8 +525,11 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, + } else { + mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT; + } +- } else if (is_pfn_swap_entry(swpent)) ++ } else if (is_pfn_swap_entry(swpent)) { ++ if (is_migration_entry(swpent)) ++ migration = true; + page = pfn_swap_entry_to_page(swpent); ++ } + } else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap + && pte_none(*pte))) { + page = xa_load(&vma->vm_file->f_mapping->i_pages, +@@ -528,7 +542,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, + if (!page) + return; + +- smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), locked); ++ smaps_account(mss, page, false, young, dirty, locked, migration); + } + + #ifdef CONFIG_TRANSPARENT_HUGEPAGE +@@ -539,6 +553,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, + struct vm_area_struct *vma = walk->vma; + bool locked = !!(vma->vm_flags & VM_LOCKED); + struct page *page = NULL; ++ bool migration = false; + + if (pmd_present(*pmd)) { + /* FOLL_DUMP will return -EFAULT on huge zero page */ +@@ -546,8 +561,10 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, + } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) { + swp_entry_t entry = pmd_to_swp_entry(*pmd); + +- if (is_migration_entry(entry)) ++ if (is_migration_entry(entry)) { ++ migration = true; + page = pfn_swap_entry_to_page(entry); ++ } + } + if (IS_ERR_OR_NULL(page)) + return; +@@ -559,7 +576,9 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, + /* pass */; + else + mss->file_thp += HPAGE_PMD_SIZE; +- smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked); ++ ++ smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), ++ locked, migration); + } + #else + static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, +@@ -695,9 +714,7 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, + page = pfn_swap_entry_to_page(swpent); + } + if (page) { +- int mapcount = page_mapcount(page); +- +- if (mapcount >= 2) ++ if (page_mapcount(page) >= 2 || hugetlb_pmd_shared(pte)) + mss->shared_hugetlb += huge_page_size(hstate_vma(vma)); + else + mss->private_hugetlb += huge_page_size(hstate_vma(vma)); +@@ -932,7 +949,7 @@ static int show_smaps_rollup(struct seq_file *m, void *v) + vma = vma->vm_next; + } + +- show_vma_header_prefix(m, priv->mm->mmap->vm_start, ++ show_vma_header_prefix(m, priv->mm->mmap ? priv->mm->mmap->vm_start : 0, + last_vma_end, 0, 0, 0, 0); + seq_pad(m, ' '); + seq_puts(m, "[rollup]\n"); +@@ -1363,6 +1380,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, + { + u64 frame = 0, flags = 0; + struct page *page = NULL; ++ bool migration = false; + + if (pte_present(pte)) { + if (pm->show_pfn) +@@ -1384,13 +1402,14 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, + frame = swp_type(entry) | + (swp_offset(entry) << MAX_SWAPFILES_SHIFT); + flags |= PM_SWAP; ++ migration = is_migration_entry(entry); + if (is_pfn_swap_entry(entry)) + page = pfn_swap_entry_to_page(entry); + } + + if (page && !PageAnon(page)) + flags |= PM_FILE; +- if (page && page_mapcount(page) == 1) ++ if (page && !migration && page_mapcount(page) == 1) + flags |= PM_MMAP_EXCLUSIVE; + if (vma->vm_flags & VM_SOFTDIRTY) + flags |= PM_SOFT_DIRTY; +@@ -1406,8 +1425,9 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, + spinlock_t *ptl; + pte_t *pte, *orig_pte; + int err = 0; +- + #ifdef CONFIG_TRANSPARENT_HUGEPAGE ++ bool migration = false; ++ + ptl = pmd_trans_huge_lock(pmdp, vma); + if (ptl) { + u64 flags = 0, frame = 0; +@@ -1446,11 +1466,12 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, + if (pmd_swp_uffd_wp(pmd)) + flags |= PM_UFFD_WP; + VM_BUG_ON(!is_pmd_migration_entry(pmd)); ++ migration = is_migration_entry(entry); + page = pfn_swap_entry_to_page(entry); + } + #endif + +- if (page && page_mapcount(page) == 1) ++ if (page && !migration && page_mapcount(page) == 1) + flags |= PM_MMAP_EXCLUSIVE; + + for (; addr != end; addr += PAGE_SIZE) { +@@ -1560,7 +1581,8 @@ static const struct mm_walk_ops pagemap_ops = { + * Bits 5-54 swap offset if swapped + * Bit 55 pte is soft-dirty (see Documentation/admin-guide/mm/soft-dirty.rst) + * Bit 56 page exclusively mapped +- * Bits 57-60 zero ++ * Bit 57 pte is uffd-wp write-protected ++ * Bits 58-60 zero + * Bit 61 page is file-page or shared-anon + * Bit 62 page swapped + * Bit 63 page present +diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c +index 5a1b228964fb7..deb99bc9b7e6b 100644 +--- a/fs/proc/uptime.c ++++ b/fs/proc/uptime.c +@@ -12,18 +12,22 @@ static int uptime_proc_show(struct seq_file *m, void *v) + { + struct timespec64 uptime; + struct timespec64 idle; +- u64 nsec; ++ u64 idle_nsec; + u32 rem; + int i; + +- nsec = 0; +- for_each_possible_cpu(i) +- nsec += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE]; ++ idle_nsec = 0; ++ for_each_possible_cpu(i) { ++ struct kernel_cpustat kcs; ++ ++ kcpustat_cpu_fetch(&kcs, i); ++ idle_nsec += get_idle_time(&kcs, i); ++ } + + ktime_get_boottime_ts64(&uptime); + timens_add_boottime(&uptime); + +- idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem); ++ idle.tv_sec = div_u64_rem(idle_nsec, NSEC_PER_SEC, &rem); + idle.tv_nsec = rem; + seq_printf(m, "%lu.%02lu %lu.%02lu\n", + (unsigned long) uptime.tv_sec, +diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c +index 9a15334da2086..e5730986758fa 100644 +--- a/fs/proc/vmcore.c ++++ b/fs/proc/vmcore.c +@@ -124,9 +124,13 @@ ssize_t read_from_oldmem(char *buf, size_t count, + nr_bytes = count; + + /* If pfn is not ram, return zeros for sparse dump files */ +- if (pfn_is_ram(pfn) == 0) +- memset(buf, 0, nr_bytes); +- else { ++ if (pfn_is_ram(pfn) == 0) { ++ tmp = 0; ++ if (!userbuf) ++ memset(buf, 0, nr_bytes); ++ else if (clear_user(buf, nr_bytes)) ++ tmp = -EFAULT; ++ } else { + if (encrypted) + tmp = copy_oldmem_page_encrypted(pfn, buf, + nr_bytes, +@@ -135,10 +139,10 @@ ssize_t read_from_oldmem(char *buf, size_t count, + else + tmp = copy_oldmem_page(pfn, buf, nr_bytes, + offset, userbuf); +- +- if (tmp < 0) +- return tmp; + } ++ if (tmp < 0) ++ return tmp; ++ + *ppos += nr_bytes; + count -= nr_bytes; + buf += nr_bytes; +diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c +index 392ef5162655b..49650e54d2f88 100644 +--- a/fs/proc_namespace.c ++++ b/fs/proc_namespace.c +@@ -80,7 +80,7 @@ static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt) + seq_puts(m, fs_infop->str); + } + +- if (mnt_user_ns(mnt) != &init_user_ns) ++ if (is_idmapped_mnt(mnt)) + seq_puts(m, ",idmapped"); + } + +diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig +index 328da35da3908..c49d554cc9ae9 100644 +--- a/fs/pstore/Kconfig ++++ b/fs/pstore/Kconfig +@@ -126,6 +126,7 @@ config PSTORE_CONSOLE + config PSTORE_PMSG + bool "Log user space messages" + depends on PSTORE ++ select RT_MUTEXES + help + When the option is enabled, pstore will export a character + interface /dev/pmsg0 to log user space messages. On reboot +@@ -173,7 +174,6 @@ config PSTORE_BLK + tristate "Log panic/oops to a block device" + depends on PSTORE + depends on BLOCK +- depends on BROKEN + select PSTORE_ZONE + default n + help +diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c +index 04ce58c939a0b..6093088de49fd 100644 +--- a/fs/pstore/blk.c ++++ b/fs/pstore/blk.c +@@ -311,7 +311,7 @@ static int __init __best_effort_init(void) + if (ret) + kfree(best_effort_dev); + else +- pr_info("attached %s (%zu) (no dedicated panic_write!)\n", ++ pr_info("attached %s (%lu) (no dedicated panic_write!)\n", + blkdev, best_effort_dev->zone.total_size); + + return ret; +diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c +index b9614db48b1de..ad96ba97d8f97 100644 +--- a/fs/pstore/platform.c ++++ b/fs/pstore/platform.c +@@ -143,21 +143,22 @@ static void pstore_timer_kick(void) + mod_timer(&pstore_timer, jiffies + msecs_to_jiffies(pstore_update_ms)); + } + +-/* +- * Should pstore_dump() wait for a concurrent pstore_dump()? If +- * not, the current pstore_dump() will report a failure to dump +- * and return. +- */ +-static bool pstore_cannot_wait(enum kmsg_dump_reason reason) ++static bool pstore_cannot_block_path(enum kmsg_dump_reason reason) + { +- /* In NMI path, pstore shouldn't block regardless of reason. */ ++ /* ++ * In case of NMI path, pstore shouldn't be blocked ++ * regardless of reason. ++ */ + if (in_nmi()) + return true; + + switch (reason) { + /* In panic case, other cpus are stopped by smp_send_stop(). */ + case KMSG_DUMP_PANIC: +- /* Emergency restart shouldn't be blocked. */ ++ /* ++ * Emergency restart shouldn't be blocked by spinning on ++ * pstore_info::buf_lock. ++ */ + case KMSG_DUMP_EMERG: + return true; + default: +@@ -389,21 +390,19 @@ static void pstore_dump(struct kmsg_dumper *dumper, + unsigned long total = 0; + const char *why; + unsigned int part = 1; ++ unsigned long flags = 0; + int ret; + + why = kmsg_dump_reason_str(reason); + +- if (down_trylock(&psinfo->buf_lock)) { +- /* Failed to acquire lock: give up if we cannot wait. */ +- if (pstore_cannot_wait(reason)) { +- pr_err("dump skipped in %s path: may corrupt error record\n", +- in_nmi() ? "NMI" : why); +- return; +- } +- if (down_interruptible(&psinfo->buf_lock)) { +- pr_err("could not grab semaphore?!\n"); ++ if (pstore_cannot_block_path(reason)) { ++ if (!spin_trylock_irqsave(&psinfo->buf_lock, flags)) { ++ pr_err("dump skipped in %s path because of concurrent dump\n", ++ in_nmi() ? "NMI" : why); + return; + } ++ } else { ++ spin_lock_irqsave(&psinfo->buf_lock, flags); + } + + kmsg_dump_rewind(&iter); +@@ -467,8 +466,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, + total += record.size; + part++; + } +- +- up(&psinfo->buf_lock); ++ spin_unlock_irqrestore(&psinfo->buf_lock, flags); + } + + static struct kmsg_dumper pstore_dumper = { +@@ -594,7 +592,7 @@ int pstore_register(struct pstore_info *psi) + psi->write_user = pstore_write_user_compat; + psinfo = psi; + mutex_init(&psinfo->read_mutex); +- sema_init(&psinfo->buf_lock, 1); ++ spin_lock_init(&psinfo->buf_lock); + + if (psi->flags & PSTORE_FLAGS_DMESG) + allocate_buf_for_compression(); +diff --git a/fs/pstore/pmsg.c b/fs/pstore/pmsg.c +index d8542ec2f38c6..18cf94b597e05 100644 +--- a/fs/pstore/pmsg.c ++++ b/fs/pstore/pmsg.c +@@ -7,9 +7,10 @@ + #include <linux/device.h> + #include <linux/fs.h> + #include <linux/uaccess.h> ++#include <linux/rtmutex.h> + #include "internal.h" + +-static DEFINE_MUTEX(pmsg_lock); ++static DEFINE_RT_MUTEX(pmsg_lock); + + static ssize_t write_pmsg(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +@@ -28,9 +29,9 @@ static ssize_t write_pmsg(struct file *file, const char __user *buf, + if (!access_ok(buf, count)) + return -EFAULT; + +- mutex_lock(&pmsg_lock); ++ rt_mutex_lock(&pmsg_lock); + ret = psinfo->write_user(&record, buf); +- mutex_unlock(&pmsg_lock); ++ rt_mutex_unlock(&pmsg_lock); + return ret ? ret : count; + } + +diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c +index fefe3d391d3af..f3fa3625d772c 100644 +--- a/fs/pstore/ram.c ++++ b/fs/pstore/ram.c +@@ -670,7 +670,7 @@ static int ramoops_parse_dt(struct platform_device *pdev, + field = value; \ + } + +- parse_u32("mem-type", pdata->record_size, pdata->mem_type); ++ parse_u32("mem-type", pdata->mem_type, pdata->mem_type); + parse_u32("record-size", pdata->record_size, 0); + parse_u32("console-size", pdata->console_size, 0); + parse_u32("ftrace-size", pdata->ftrace_size, 0); +@@ -735,6 +735,7 @@ static int ramoops_probe(struct platform_device *pdev) + /* Make sure we didn't get bogus platform data pointer. */ + if (!pdata) { + pr_err("NULL platform data\n"); ++ err = -EINVAL; + goto fail_out; + } + +@@ -742,6 +743,7 @@ static int ramoops_probe(struct platform_device *pdev) + !pdata->ftrace_size && !pdata->pmsg_size)) { + pr_err("The memory size and the record/console size must be " + "non-zero\n"); ++ err = -EINVAL; + goto fail_out; + } + +diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c +index fe5305028c6e2..155c7010b1f83 100644 +--- a/fs/pstore/ram_core.c ++++ b/fs/pstore/ram_core.c +@@ -439,7 +439,11 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size, + phys_addr_t addr = page_start + i * PAGE_SIZE; + pages[i] = pfn_to_page(addr >> PAGE_SHIFT); + } +- vaddr = vmap(pages, page_count, VM_MAP, prot); ++ /* ++ * VM_IOREMAP used here to bypass this region during vread() ++ * and kmap_atomic() (i.e. kcore) to avoid __va() failures. ++ */ ++ vaddr = vmap(pages, page_count, VM_MAP | VM_IOREMAP, prot); + kfree(pages); + + /* +diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c +index 7c8f8feac6c34..5d3f944f60185 100644 +--- a/fs/pstore/zone.c ++++ b/fs/pstore/zone.c +@@ -761,7 +761,7 @@ static inline int notrace psz_kmsg_write_record(struct psz_context *cxt, + /* avoid destroying old data, allocate a new one */ + len = zone->buffer_size + sizeof(*zone->buffer); + zone->oldbuf = zone->buffer; +- zone->buffer = kzalloc(len, GFP_KERNEL); ++ zone->buffer = kzalloc(len, GFP_ATOMIC); + if (!zone->buffer) { + zone->buffer = zone->oldbuf; + return -ENOMEM; +diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c +index 22d904bde6ab9..cddd07b7d1329 100644 +--- a/fs/quota/dquot.c ++++ b/fs/quota/dquot.c +@@ -79,6 +79,7 @@ + #include <linux/capability.h> + #include <linux/quotaops.h> + #include <linux/blkdev.h> ++#include <linux/sched/mm.h> + #include "../internal.h" /* ugh */ + + #include <linux/uaccess.h> +@@ -425,9 +426,11 @@ EXPORT_SYMBOL(mark_info_dirty); + int dquot_acquire(struct dquot *dquot) + { + int ret = 0, ret2 = 0; ++ unsigned int memalloc; + struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); + + mutex_lock(&dquot->dq_lock); ++ memalloc = memalloc_nofs_save(); + if (!test_bit(DQ_READ_B, &dquot->dq_flags)) { + ret = dqopt->ops[dquot->dq_id.type]->read_dqblk(dquot); + if (ret < 0) +@@ -458,6 +461,7 @@ int dquot_acquire(struct dquot *dquot) + smp_mb__before_atomic(); + set_bit(DQ_ACTIVE_B, &dquot->dq_flags); + out_iolock: ++ memalloc_nofs_restore(memalloc); + mutex_unlock(&dquot->dq_lock); + return ret; + } +@@ -469,9 +473,11 @@ EXPORT_SYMBOL(dquot_acquire); + int dquot_commit(struct dquot *dquot) + { + int ret = 0; ++ unsigned int memalloc; + struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); + + mutex_lock(&dquot->dq_lock); ++ memalloc = memalloc_nofs_save(); + if (!clear_dquot_dirty(dquot)) + goto out_lock; + /* Inactive dquot can be only if there was error during read/init +@@ -481,6 +487,7 @@ int dquot_commit(struct dquot *dquot) + else + ret = -EIO; + out_lock: ++ memalloc_nofs_restore(memalloc); + mutex_unlock(&dquot->dq_lock); + return ret; + } +@@ -492,9 +499,11 @@ EXPORT_SYMBOL(dquot_commit); + int dquot_release(struct dquot *dquot) + { + int ret = 0, ret2 = 0; ++ unsigned int memalloc; + struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); + + mutex_lock(&dquot->dq_lock); ++ memalloc = memalloc_nofs_save(); + /* Check whether we are not racing with some other dqget() */ + if (dquot_is_busy(dquot)) + goto out_dqlock; +@@ -510,6 +519,7 @@ int dquot_release(struct dquot *dquot) + } + clear_bit(DQ_ACTIVE_B, &dquot->dq_flags); + out_dqlock: ++ memalloc_nofs_restore(memalloc); + mutex_unlock(&dquot->dq_lock); + return ret; + } +@@ -690,9 +700,14 @@ int dquot_quota_sync(struct super_block *sb, int type) + /* This is not very clever (and fast) but currently I don't know about + * any other simple way of getting quota data to disk and we must get + * them there for userspace to be visible... */ +- if (sb->s_op->sync_fs) +- sb->s_op->sync_fs(sb, 1); +- sync_blockdev(sb->s_bdev); ++ if (sb->s_op->sync_fs) { ++ ret = sb->s_op->sync_fs(sb, 1); ++ if (ret) ++ return ret; ++ } ++ ret = sync_blockdev(sb->s_bdev); ++ if (ret) ++ return ret; + + /* + * Now when everything is written we can discard the pagecache so +@@ -2302,6 +2317,8 @@ static int vfs_setup_quota_inode(struct inode *inode, int type) + struct super_block *sb = inode->i_sb; + struct quota_info *dqopt = sb_dqopt(sb); + ++ if (is_bad_inode(inode)) ++ return -EUCLEAN; + if (!S_ISREG(inode->i_mode)) + return -EACCES; + if (IS_RDONLY(inode)) +diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c +index d3e995e1046fb..7e65d67de9f33 100644 +--- a/fs/quota/quota_tree.c ++++ b/fs/quota/quota_tree.c +@@ -71,6 +71,35 @@ static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf) + return ret; + } + ++static inline int do_check_range(struct super_block *sb, const char *val_name, ++ uint val, uint min_val, uint max_val) ++{ ++ if (val < min_val || val > max_val) { ++ quota_error(sb, "Getting %s %u out of range %u-%u", ++ val_name, val, min_val, max_val); ++ return -EUCLEAN; ++ } ++ ++ return 0; ++} ++ ++static int check_dquot_block_header(struct qtree_mem_dqinfo *info, ++ struct qt_disk_dqdbheader *dh) ++{ ++ int err = 0; ++ ++ err = do_check_range(info->dqi_sb, "dqdh_next_free", ++ le32_to_cpu(dh->dqdh_next_free), 0, ++ info->dqi_blocks - 1); ++ if (err) ++ return err; ++ err = do_check_range(info->dqi_sb, "dqdh_prev_free", ++ le32_to_cpu(dh->dqdh_prev_free), 0, ++ info->dqi_blocks - 1); ++ ++ return err; ++} ++ + /* Remove empty block from list and return it */ + static int get_free_dqblk(struct qtree_mem_dqinfo *info) + { +@@ -85,6 +114,9 @@ static int get_free_dqblk(struct qtree_mem_dqinfo *info) + ret = read_blk(info, blk, buf); + if (ret < 0) + goto out_buf; ++ ret = check_dquot_block_header(info, dh); ++ if (ret) ++ goto out_buf; + info->dqi_free_blk = le32_to_cpu(dh->dqdh_next_free); + } + else { +@@ -232,6 +264,9 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info, + *err = read_blk(info, blk, buf); + if (*err < 0) + goto out_buf; ++ *err = check_dquot_block_header(info, dh); ++ if (*err) ++ goto out_buf; + } else { + blk = get_free_dqblk(info); + if ((int)blk < 0) { +@@ -414,6 +449,7 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot, + quota_error(dquot->dq_sb, "Quota structure has offset to " + "other block (%u) than it should (%u)", blk, + (uint)(dquot->dq_off >> info->dqi_blocksize_bits)); ++ ret = -EIO; + goto out_buf; + } + ret = read_blk(info, blk, buf); +@@ -423,6 +459,9 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot, + goto out_buf; + } + dh = (struct qt_disk_dqdbheader *)buf; ++ ret = check_dquot_block_header(info, dh); ++ if (ret) ++ goto out_buf; + le16_add_cpu(&dh->dqdh_entries, -1); + if (!le16_to_cpu(dh->dqdh_entries)) { /* Block got free? */ + ret = remove_free_dqentry(info, buf, blk); +@@ -479,6 +518,13 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot, + goto out_buf; + } + newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]); ++ if (newblk < QT_TREEOFF || newblk >= info->dqi_blocks) { ++ quota_error(dquot->dq_sb, "Getting block too big (%u >= %u)", ++ newblk, info->dqi_blocks); ++ ret = -EUCLEAN; ++ goto out_buf; ++ } ++ + if (depth == info->dqi_qtree_depth - 1) { + ret = free_dqentry(info, dquot, newblk); + newblk = 0; +@@ -578,6 +624,13 @@ static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info, + blk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]); + if (!blk) /* No reference? */ + goto out_buf; ++ if (blk < QT_TREEOFF || blk >= info->dqi_blocks) { ++ quota_error(dquot->dq_sb, "Getting block too big (%u >= %u)", ++ blk, info->dqi_blocks); ++ ret = -EUCLEAN; ++ goto out_buf; ++ } ++ + if (depth < info->dqi_qtree_depth - 1) + ret = find_tree_dqentry(info, dquot, blk, depth+1); + else +diff --git a/fs/read_write.c b/fs/read_write.c +index af057c57bdc64..b4b15279b66b6 100644 +--- a/fs/read_write.c ++++ b/fs/read_write.c +@@ -1250,6 +1250,9 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, + count, fl); + file_end_write(out.file); + } else { ++ if (out.file->f_flags & O_NONBLOCK) ++ fl |= SPLICE_F_NONBLOCK; ++ + retval = splice_file_to_pipe(in.file, opipe, &pos, count, fl); + } + +@@ -1384,28 +1387,6 @@ ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, + } + EXPORT_SYMBOL(generic_copy_file_range); + +-static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, +- struct file *file_out, loff_t pos_out, +- size_t len, unsigned int flags) +-{ +- /* +- * Although we now allow filesystems to handle cross sb copy, passing +- * a file of the wrong filesystem type to filesystem driver can result +- * in an attempt to dereference the wrong type of ->private_data, so +- * avoid doing that until we really have a good reason. NFS defines +- * several different file_system_type structures, but they all end up +- * using the same ->copy_file_range() function pointer. +- */ +- if (file_out->f_op->copy_file_range && +- file_out->f_op->copy_file_range == file_in->f_op->copy_file_range) +- return file_out->f_op->copy_file_range(file_in, pos_in, +- file_out, pos_out, +- len, flags); +- +- return generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, +- flags); +-} +- + /* + * Performs necessary checks before doing a file copy + * +@@ -1427,6 +1408,26 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in, + if (ret) + return ret; + ++ /* ++ * We allow some filesystems to handle cross sb copy, but passing ++ * a file of the wrong filesystem type to filesystem driver can result ++ * in an attempt to dereference the wrong type of ->private_data, so ++ * avoid doing that until we really have a good reason. ++ * ++ * nfs and cifs define several different file_system_type structures ++ * and several different sets of file_operations, but they all end up ++ * using the same ->copy_file_range() function pointer. ++ */ ++ if (flags & COPY_FILE_SPLICE) { ++ /* cross sb splice is allowed */ ++ } else if (file_out->f_op->copy_file_range) { ++ if (file_in->f_op->copy_file_range != ++ file_out->f_op->copy_file_range) ++ return -EXDEV; ++ } else if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) { ++ return -EXDEV; ++ } ++ + /* Don't touch certain kinds of inodes */ + if (IS_IMMUTABLE(inode_out)) + return -EPERM; +@@ -1469,8 +1470,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, + size_t len, unsigned int flags) + { + ssize_t ret; ++ bool splice = flags & COPY_FILE_SPLICE; + +- if (flags != 0) ++ if (flags & ~COPY_FILE_SPLICE) + return -EINVAL; + + ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len, +@@ -1492,26 +1494,43 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, + file_start_write(file_out); + + /* +- * Try cloning first, this is supported by more file systems, and +- * more efficient if both clone and copy are supported (e.g. NFS). ++ * Cloning is supported by more file systems, so we implement copy on ++ * same sb using clone, but for filesystems where both clone and copy ++ * are supported (e.g. nfs,cifs), we only call the copy method. + */ +- if (file_in->f_op->remap_file_range && +- file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) { +- loff_t cloned; ++ if (!splice && file_out->f_op->copy_file_range) { ++ ret = file_out->f_op->copy_file_range(file_in, pos_in, ++ file_out, pos_out, ++ len, flags); ++ goto done; ++ } + +- cloned = file_in->f_op->remap_file_range(file_in, pos_in, ++ if (!splice && file_in->f_op->remap_file_range && ++ file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) { ++ ret = file_in->f_op->remap_file_range(file_in, pos_in, + file_out, pos_out, + min_t(loff_t, MAX_RW_COUNT, len), + REMAP_FILE_CAN_SHORTEN); +- if (cloned > 0) { +- ret = cloned; ++ if (ret > 0) + goto done; +- } + } + +- ret = do_copy_file_range(file_in, pos_in, file_out, pos_out, len, +- flags); +- WARN_ON_ONCE(ret == -EOPNOTSUPP); ++ /* ++ * We can get here for same sb copy of filesystems that do not implement ++ * ->copy_file_range() in case filesystem does not support clone or in ++ * case filesystem supports clone but rejected the clone request (e.g. ++ * because it was not block aligned). ++ * ++ * In both cases, fall back to kernel copy so we are able to maintain a ++ * consistent story about which filesystems support copy_file_range() ++ * and which filesystems do not, that will allow userspace tools to ++ * make consistent desicions w.r.t using copy_file_range(). ++ * ++ * We also get here if caller (e.g. nfsd) requested COPY_FILE_SPLICE. ++ */ ++ ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, ++ flags); ++ + done: + if (ret > 0) { + fsnotify_access(file_in); +@@ -1562,6 +1581,10 @@ SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in, + pos_out = f_out.file->f_pos; + } + ++ ret = -EINVAL; ++ if (flags != 0) ++ goto out; ++ + ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len, + flags); + if (ret > 0) { +diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c +index 3d7a35d6a18bc..b916859992ec8 100644 +--- a/fs/reiserfs/namei.c ++++ b/fs/reiserfs/namei.c +@@ -696,6 +696,7 @@ static int reiserfs_create(struct user_namespace *mnt_userns, struct inode *dir, + + out_failed: + reiserfs_write_unlock(dir->i_sb); ++ reiserfs_security_free(&security); + return retval; + } + +@@ -779,6 +780,7 @@ static int reiserfs_mknod(struct user_namespace *mnt_userns, struct inode *dir, + + out_failed: + reiserfs_write_unlock(dir->i_sb); ++ reiserfs_security_free(&security); + return retval; + } + +@@ -878,6 +880,7 @@ static int reiserfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, + retval = journal_end(&th); + out_failed: + reiserfs_write_unlock(dir->i_sb); ++ reiserfs_security_free(&security); + return retval; + } + +@@ -1194,6 +1197,7 @@ static int reiserfs_symlink(struct user_namespace *mnt_userns, + retval = journal_end(&th); + out_failed: + reiserfs_write_unlock(parent_dir->i_sb); ++ reiserfs_security_free(&security); + return retval; + } + +diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c +index 58481f8d63d5b..f7b05c6b3dcf4 100644 +--- a/fs/reiserfs/super.c ++++ b/fs/reiserfs/super.c +@@ -1437,7 +1437,6 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) + unsigned long safe_mask = 0; + unsigned int commit_max_age = (unsigned int)-1; + struct reiserfs_journal *journal = SB_JOURNAL(s); +- char *new_opts; + int err; + char *qf_names[REISERFS_MAXQUOTAS]; + unsigned int qfmt = 0; +@@ -1445,10 +1444,6 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) + int i; + #endif + +- new_opts = kstrdup(arg, GFP_KERNEL); +- if (arg && !new_opts) +- return -ENOMEM; +- + sync_filesystem(s); + reiserfs_write_lock(s); + +@@ -1599,7 +1594,6 @@ out_ok_unlocked: + out_err_unlock: + reiserfs_write_unlock(s); + out_err: +- kfree(new_opts); + return err; + } + +diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c +index 8965c8e5e172b..857a65b057264 100644 +--- a/fs/reiserfs/xattr_security.c ++++ b/fs/reiserfs/xattr_security.c +@@ -50,6 +50,7 @@ int reiserfs_security_init(struct inode *dir, struct inode *inode, + int error; + + sec->name = NULL; ++ sec->value = NULL; + + /* Don't add selinux attributes on xattrs - they'll never get used */ + if (IS_PRIVATE(dir)) +@@ -95,7 +96,6 @@ int reiserfs_security_write(struct reiserfs_transaction_handle *th, + + void reiserfs_security_free(struct reiserfs_security_handle *sec) + { +- kfree(sec->name); + kfree(sec->value); + sec->name = NULL; + sec->value = NULL; +diff --git a/fs/remap_range.c b/fs/remap_range.c +index 6d4a9beaa0974..e69bafb96f093 100644 +--- a/fs/remap_range.c ++++ b/fs/remap_range.c +@@ -71,7 +71,8 @@ static int generic_remap_checks(struct file *file_in, loff_t pos_in, + * Otherwise, make sure the count is also block-aligned, having + * already confirmed the starting offsets' block alignment. + */ +- if (pos_in + count == size_in) { ++ if (pos_in + count == size_in && ++ (!(remap_flags & REMAP_FILE_DEDUP) || pos_out + count == size_out)) { + bcount = ALIGN(size_in, bs) - pos_in; + } else { + if (!IS_ALIGNED(count, bs)) +diff --git a/fs/select.c b/fs/select.c +index 945896d0ac9e7..5edffee1162c2 100644 +--- a/fs/select.c ++++ b/fs/select.c +@@ -458,9 +458,11 @@ get_max: + return max; + } + +-#define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR) +-#define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR) +-#define POLLEX_SET (EPOLLPRI) ++#define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR |\ ++ EPOLLNVAL) ++#define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR |\ ++ EPOLLNVAL) ++#define POLLEX_SET (EPOLLPRI | EPOLLNVAL) + + static inline void wait_key_set(poll_table *wait, unsigned long in, + unsigned long out, unsigned long bit, +@@ -527,6 +529,7 @@ static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time) + break; + if (!(bit & all_bits)) + continue; ++ mask = EPOLLNVAL; + f = fdget(i); + if (f.file) { + wait_key_set(wait, in, out, bit, +@@ -534,34 +537,34 @@ static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time) + mask = vfs_poll(f.file, wait); + + fdput(f); +- if ((mask & POLLIN_SET) && (in & bit)) { +- res_in |= bit; +- retval++; +- wait->_qproc = NULL; +- } +- if ((mask & POLLOUT_SET) && (out & bit)) { +- res_out |= bit; +- retval++; +- wait->_qproc = NULL; +- } +- if ((mask & POLLEX_SET) && (ex & bit)) { +- res_ex |= bit; +- retval++; +- wait->_qproc = NULL; +- } +- /* got something, stop busy polling */ +- if (retval) { +- can_busy_loop = false; +- busy_flag = 0; +- +- /* +- * only remember a returned +- * POLL_BUSY_LOOP if we asked for it +- */ +- } else if (busy_flag & mask) +- can_busy_loop = true; +- + } ++ if ((mask & POLLIN_SET) && (in & bit)) { ++ res_in |= bit; ++ retval++; ++ wait->_qproc = NULL; ++ } ++ if ((mask & POLLOUT_SET) && (out & bit)) { ++ res_out |= bit; ++ retval++; ++ wait->_qproc = NULL; ++ } ++ if ((mask & POLLEX_SET) && (ex & bit)) { ++ res_ex |= bit; ++ retval++; ++ wait->_qproc = NULL; ++ } ++ /* got something, stop busy polling */ ++ if (retval) { ++ can_busy_loop = false; ++ busy_flag = 0; ++ ++ /* ++ * only remember a returned ++ * POLL_BUSY_LOOP if we asked for it ++ */ ++ } else if (busy_flag & mask) ++ can_busy_loop = true; ++ + } + if (res_in) + *rinp = res_in; +diff --git a/fs/seq_file.c b/fs/seq_file.c +index 4a2cda04d3e29..b17ee4c4f618a 100644 +--- a/fs/seq_file.c ++++ b/fs/seq_file.c +@@ -947,6 +947,38 @@ struct list_head *seq_list_next(void *v, struct list_head *head, loff_t *ppos) + } + EXPORT_SYMBOL(seq_list_next); + ++struct list_head *seq_list_start_rcu(struct list_head *head, loff_t pos) ++{ ++ struct list_head *lh; ++ ++ list_for_each_rcu(lh, head) ++ if (pos-- == 0) ++ return lh; ++ ++ return NULL; ++} ++EXPORT_SYMBOL(seq_list_start_rcu); ++ ++struct list_head *seq_list_start_head_rcu(struct list_head *head, loff_t pos) ++{ ++ if (!pos) ++ return head; ++ ++ return seq_list_start_rcu(head, pos - 1); ++} ++EXPORT_SYMBOL(seq_list_start_head_rcu); ++ ++struct list_head *seq_list_next_rcu(void *v, struct list_head *head, ++ loff_t *ppos) ++{ ++ struct list_head *lh; ++ ++ lh = list_next_rcu((struct list_head *)v); ++ ++*ppos; ++ return lh == head ? NULL : lh; ++} ++EXPORT_SYMBOL(seq_list_next_rcu); ++ + /** + * seq_hlist_start - start an iteration of a hlist + * @head: the head of the hlist +diff --git a/fs/signalfd.c b/fs/signalfd.c +index 040e1cf905282..65ce0e72e7b95 100644 +--- a/fs/signalfd.c ++++ b/fs/signalfd.c +@@ -35,17 +35,7 @@ + + void signalfd_cleanup(struct sighand_struct *sighand) + { +- wait_queue_head_t *wqh = &sighand->signalfd_wqh; +- /* +- * The lockless check can race with remove_wait_queue() in progress, +- * but in this case its caller should run under rcu_read_lock() and +- * sighand_cachep is SLAB_TYPESAFE_BY_RCU, we can safely return. +- */ +- if (likely(!waitqueue_active(wqh))) +- return; +- +- /* wait_queue_entry_t->func(POLLFREE) should do remove_wait_queue() */ +- wake_up_poll(wqh, EPOLLHUP | POLLFREE); ++ wake_up_pollfree(&sighand->signalfd_wqh); + } + + struct signalfd_ctx { +diff --git a/fs/smbfs_common/cifs_arc4.c b/fs/smbfs_common/cifs_arc4.c +index 85ba15a60b13b..043e4cb839fa2 100644 +--- a/fs/smbfs_common/cifs_arc4.c ++++ b/fs/smbfs_common/cifs_arc4.c +@@ -72,16 +72,3 @@ void cifs_arc4_crypt(struct arc4_ctx *ctx, u8 *out, const u8 *in, unsigned int l + ctx->y = y; + } + EXPORT_SYMBOL_GPL(cifs_arc4_crypt); +- +-static int __init +-init_smbfs_common(void) +-{ +- return 0; +-} +-static void __init +-exit_smbfs_common(void) +-{ +-} +- +-module_init(init_smbfs_common) +-module_exit(exit_smbfs_common) +diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h +index b3fdc8212c5f5..95f8e89017689 100644 +--- a/fs/squashfs/squashfs_fs.h ++++ b/fs/squashfs/squashfs_fs.h +@@ -183,7 +183,7 @@ static inline int squashfs_block_size(__le32 raw) + #define SQUASHFS_ID_BLOCK_BYTES(A) (SQUASHFS_ID_BLOCKS(A) *\ + sizeof(u64)) + /* xattr id lookup table defines */ +-#define SQUASHFS_XATTR_BYTES(A) ((A) * sizeof(struct squashfs_xattr_id)) ++#define SQUASHFS_XATTR_BYTES(A) (((u64) (A)) * sizeof(struct squashfs_xattr_id)) + + #define SQUASHFS_XATTR_BLOCK(A) (SQUASHFS_XATTR_BYTES(A) / \ + SQUASHFS_METADATA_SIZE) +diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h +index 1e90c2575f9bf..0c1ae97897317 100644 +--- a/fs/squashfs/squashfs_fs_sb.h ++++ b/fs/squashfs/squashfs_fs_sb.h +@@ -63,7 +63,7 @@ struct squashfs_sb_info { + long long bytes_used; + unsigned int inodes; + unsigned int fragments; +- int xattr_ids; ++ unsigned int xattr_ids; + unsigned int ids; + bool panic_on_errors; + }; +diff --git a/fs/squashfs/xattr.h b/fs/squashfs/xattr.h +index d8a270d3ac4cb..f1a463d8bfa02 100644 +--- a/fs/squashfs/xattr.h ++++ b/fs/squashfs/xattr.h +@@ -10,12 +10,12 @@ + + #ifdef CONFIG_SQUASHFS_XATTR + extern __le64 *squashfs_read_xattr_id_table(struct super_block *, u64, +- u64 *, int *); ++ u64 *, unsigned int *); + extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *, + unsigned int *, unsigned long long *); + #else + static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb, +- u64 start, u64 *xattr_table_start, int *xattr_ids) ++ u64 start, u64 *xattr_table_start, unsigned int *xattr_ids) + { + struct squashfs_xattr_id_table *id_table; + +diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c +index 087cab8c78f4e..c8469c656e0dc 100644 +--- a/fs/squashfs/xattr_id.c ++++ b/fs/squashfs/xattr_id.c +@@ -56,7 +56,7 @@ int squashfs_xattr_lookup(struct super_block *sb, unsigned int index, + * Read uncompressed xattr id lookup table indexes from disk into memory + */ + __le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 table_start, +- u64 *xattr_table_start, int *xattr_ids) ++ u64 *xattr_table_start, unsigned int *xattr_ids) + { + struct squashfs_sb_info *msblk = sb->s_fs_info; + unsigned int len, indexes; +diff --git a/fs/stat.c b/fs/stat.c +index 28d2020ba1f42..246d138ec0669 100644 +--- a/fs/stat.c ++++ b/fs/stat.c +@@ -334,9 +334,6 @@ SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, stat + # define choose_32_64(a,b) b + #endif + +-#define valid_dev(x) choose_32_64(old_valid_dev(x),true) +-#define encode_dev(x) choose_32_64(old_encode_dev,new_encode_dev)(x) +- + #ifndef INIT_STRUCT_STAT_PADDING + # define INIT_STRUCT_STAT_PADDING(st) memset(&st, 0, sizeof(st)) + #endif +@@ -345,7 +342,9 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf) + { + struct stat tmp; + +- if (!valid_dev(stat->dev) || !valid_dev(stat->rdev)) ++ if (sizeof(tmp.st_dev) < 4 && !old_valid_dev(stat->dev)) ++ return -EOVERFLOW; ++ if (sizeof(tmp.st_rdev) < 4 && !old_valid_dev(stat->rdev)) + return -EOVERFLOW; + #if BITS_PER_LONG == 32 + if (stat->size > MAX_NON_LFS) +@@ -353,7 +352,7 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf) + #endif + + INIT_STRUCT_STAT_PADDING(tmp); +- tmp.st_dev = encode_dev(stat->dev); ++ tmp.st_dev = new_encode_dev(stat->dev); + tmp.st_ino = stat->ino; + if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) + return -EOVERFLOW; +@@ -363,7 +362,7 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf) + return -EOVERFLOW; + SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid)); + SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid)); +- tmp.st_rdev = encode_dev(stat->rdev); ++ tmp.st_rdev = new_encode_dev(stat->rdev); + tmp.st_size = stat->size; + tmp.st_atime = stat->atime.tv_sec; + tmp.st_mtime = stat->mtime.tv_sec; +@@ -644,11 +643,13 @@ static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) + { + struct compat_stat tmp; + +- if (!old_valid_dev(stat->dev) || !old_valid_dev(stat->rdev)) ++ if (sizeof(tmp.st_dev) < 4 && !old_valid_dev(stat->dev)) ++ return -EOVERFLOW; ++ if (sizeof(tmp.st_rdev) < 4 && !old_valid_dev(stat->rdev)) + return -EOVERFLOW; + + memset(&tmp, 0, sizeof(tmp)); +- tmp.st_dev = old_encode_dev(stat->dev); ++ tmp.st_dev = new_encode_dev(stat->dev); + tmp.st_ino = stat->ino; + if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) + return -EOVERFLOW; +@@ -658,7 +659,7 @@ static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) + return -EOVERFLOW; + SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid)); + SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid)); +- tmp.st_rdev = old_encode_dev(stat->rdev); ++ tmp.st_rdev = new_encode_dev(stat->rdev); + if ((u64) stat->size > MAX_NON_LFS) + return -EOVERFLOW; + tmp.st_size = stat->size; +diff --git a/fs/super.c b/fs/super.c +index bcef3a6f4c4b5..7fa3ee79ec898 100644 +--- a/fs/super.c ++++ b/fs/super.c +@@ -293,7 +293,7 @@ static void __put_super(struct super_block *s) + WARN_ON(s->s_inode_lru.node); + WARN_ON(!list_empty(&s->s_mounts)); + security_sb_free(s); +- fscrypt_sb_free(s); ++ fscrypt_destroy_keyring(s); + put_user_ns(s->s_user_ns); + kfree(s->s_subtype); + call_rcu(&s->rcu, destroy_super_rcu); +@@ -454,6 +454,7 @@ void generic_shutdown_super(struct super_block *sb) + evict_inodes(sb); + /* only nonzero refcount inodes can have marks */ + fsnotify_sb_delete(sb); ++ fscrypt_destroy_keyring(sb); + security_sb_delete(sb); + + if (sb->s_dio_done_wq) { +@@ -1421,8 +1422,8 @@ struct dentry *mount_nodev(struct file_system_type *fs_type, + } + EXPORT_SYMBOL(mount_nodev); + +-static int reconfigure_single(struct super_block *s, +- int flags, void *data) ++int reconfigure_single(struct super_block *s, ++ int flags, void *data) + { + struct fs_context *fc; + int ret; +@@ -1616,11 +1617,9 @@ static void lockdep_sb_freeze_acquire(struct super_block *sb) + percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_); + } + +-static void sb_freeze_unlock(struct super_block *sb) ++static void sb_freeze_unlock(struct super_block *sb, int level) + { +- int level; +- +- for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--) ++ for (level--; level >= 0; level--) + percpu_up_write(sb->s_writers.rw_sem + level); + } + +@@ -1691,7 +1690,14 @@ int freeze_super(struct super_block *sb) + sb_wait_write(sb, SB_FREEZE_PAGEFAULT); + + /* All writers are done so after syncing there won't be dirty data */ +- sync_filesystem(sb); ++ ret = sync_filesystem(sb); ++ if (ret) { ++ sb->s_writers.frozen = SB_UNFROZEN; ++ sb_freeze_unlock(sb, SB_FREEZE_PAGEFAULT); ++ wake_up(&sb->s_writers.wait_unfrozen); ++ deactivate_locked_super(sb); ++ return ret; ++ } + + /* Now wait for internal filesystem counter */ + sb->s_writers.frozen = SB_FREEZE_FS; +@@ -1703,7 +1709,7 @@ int freeze_super(struct super_block *sb) + printk(KERN_ERR + "VFS:Filesystem freeze failed\n"); + sb->s_writers.frozen = SB_UNFROZEN; +- sb_freeze_unlock(sb); ++ sb_freeze_unlock(sb, SB_FREEZE_FS); + wake_up(&sb->s_writers.wait_unfrozen); + deactivate_locked_super(sb); + return ret; +@@ -1748,7 +1754,7 @@ static int thaw_super_locked(struct super_block *sb) + } + + sb->s_writers.frozen = SB_UNFROZEN; +- sb_freeze_unlock(sb); ++ sb_freeze_unlock(sb, SB_FREEZE_FS); + out: + wake_up(&sb->s_writers.wait_unfrozen); + deactivate_locked_super(sb); +diff --git a/fs/sync.c b/fs/sync.c +index 1373a610dc784..c7690016453e4 100644 +--- a/fs/sync.c ++++ b/fs/sync.c +@@ -3,6 +3,7 @@ + * High-level sync()-related operations + */ + ++#include <linux/blkdev.h> + #include <linux/kernel.h> + #include <linux/file.h> + #include <linux/fs.h> +@@ -21,25 +22,6 @@ + #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \ + SYNC_FILE_RANGE_WAIT_AFTER) + +-/* +- * Do the filesystem syncing work. For simple filesystems +- * writeback_inodes_sb(sb) just dirties buffers with inodes so we have to +- * submit IO for these buffers via __sync_blockdev(). This also speeds up the +- * wait == 1 case since in that case write_inode() functions do +- * sync_dirty_buffer() and thus effectively write one block at a time. +- */ +-static int __sync_filesystem(struct super_block *sb, int wait) +-{ +- if (wait) +- sync_inodes_sb(sb); +- else +- writeback_inodes_sb(sb, WB_REASON_SYNC); +- +- if (sb->s_op->sync_fs) +- sb->s_op->sync_fs(sb, wait); +- return __sync_blockdev(sb->s_bdev, wait); +-} +- + /* + * Write out and wait upon all dirty data associated with this + * superblock. Filesystem data as well as the underlying block +@@ -47,7 +29,7 @@ static int __sync_filesystem(struct super_block *sb, int wait) + */ + int sync_filesystem(struct super_block *sb) + { +- int ret; ++ int ret = 0; + + /* + * We need to be protected against the filesystem going from +@@ -61,10 +43,31 @@ int sync_filesystem(struct super_block *sb) + if (sb_rdonly(sb)) + return 0; + +- ret = __sync_filesystem(sb, 0); +- if (ret < 0) ++ /* ++ * Do the filesystem syncing work. For simple filesystems ++ * writeback_inodes_sb(sb) just dirties buffers with inodes so we have ++ * to submit I/O for these buffers via sync_blockdev(). This also ++ * speeds up the wait == 1 case since in that case write_inode() ++ * methods call sync_dirty_buffer() and thus effectively write one block ++ * at a time. ++ */ ++ writeback_inodes_sb(sb, WB_REASON_SYNC); ++ if (sb->s_op->sync_fs) { ++ ret = sb->s_op->sync_fs(sb, 0); ++ if (ret) ++ return ret; ++ } ++ ret = sync_blockdev_nowait(sb->s_bdev); ++ if (ret) + return ret; +- return __sync_filesystem(sb, 1); ++ ++ sync_inodes_sb(sb); ++ if (sb->s_op->sync_fs) { ++ ret = sb->s_op->sync_fs(sb, 1); ++ if (ret) ++ return ret; ++ } ++ return sync_blockdev(sb->s_bdev); + } + EXPORT_SYMBOL(sync_filesystem); + +@@ -81,21 +84,6 @@ static void sync_fs_one_sb(struct super_block *sb, void *arg) + sb->s_op->sync_fs(sb, *(int *)arg); + } + +-static void fdatawrite_one_bdev(struct block_device *bdev, void *arg) +-{ +- filemap_fdatawrite(bdev->bd_inode->i_mapping); +-} +- +-static void fdatawait_one_bdev(struct block_device *bdev, void *arg) +-{ +- /* +- * We keep the error status of individual mapping so that +- * applications can catch the writeback error using fsync(2). +- * See filemap_fdatawait_keep_errors() for details. +- */ +- filemap_fdatawait_keep_errors(bdev->bd_inode->i_mapping); +-} +- + /* + * Sync everything. We start by waking flusher threads so that most of + * writeback runs on all devices in parallel. Then we sync all inodes reliably +@@ -114,8 +102,8 @@ void ksys_sync(void) + iterate_supers(sync_inodes_one_sb, NULL); + iterate_supers(sync_fs_one_sb, &nowait); + iterate_supers(sync_fs_one_sb, &wait); +- iterate_bdevs(fdatawrite_one_bdev, NULL); +- iterate_bdevs(fdatawait_one_bdev, NULL); ++ sync_bdevs(false); ++ sync_bdevs(true); + if (unlikely(laptop_mode)) + laptop_sync_completion(); + } +@@ -136,10 +124,10 @@ static void do_sync_work(struct work_struct *work) + */ + iterate_supers(sync_inodes_one_sb, &nowait); + iterate_supers(sync_fs_one_sb, &nowait); +- iterate_bdevs(fdatawrite_one_bdev, NULL); ++ sync_bdevs(false); + iterate_supers(sync_inodes_one_sb, &nowait); + iterate_supers(sync_fs_one_sb, &nowait); +- iterate_bdevs(fdatawrite_one_bdev, NULL); ++ sync_bdevs(false); + printk("Emergency Sync complete\n"); + kfree(work); + } +diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c +index 749385015a8d3..5a59d56a2038c 100644 +--- a/fs/sysv/itree.c ++++ b/fs/sysv/itree.c +@@ -438,7 +438,7 @@ static unsigned sysv_nblocks(struct super_block *s, loff_t size) + res += blocks; + direct = 1; + } +- return blocks; ++ return res; + } + + int sysv_getattr(struct user_namespace *mnt_userns, const struct path *path, +diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c +index 1261e8b41edb4..066e8344934de 100644 +--- a/fs/tracefs/inode.c ++++ b/fs/tracefs/inode.c +@@ -141,6 +141,8 @@ struct tracefs_mount_opts { + kuid_t uid; + kgid_t gid; + umode_t mode; ++ /* Opt_* bitfield. */ ++ unsigned int opts; + }; + + enum { +@@ -161,6 +163,77 @@ struct tracefs_fs_info { + struct tracefs_mount_opts mount_opts; + }; + ++static void change_gid(struct dentry *dentry, kgid_t gid) ++{ ++ if (!dentry->d_inode) ++ return; ++ dentry->d_inode->i_gid = gid; ++} ++ ++/* ++ * Taken from d_walk, but without he need for handling renames. ++ * Nothing can be renamed while walking the list, as tracefs ++ * does not support renames. This is only called when mounting ++ * or remounting the file system, to set all the files to ++ * the given gid. ++ */ ++static void set_gid(struct dentry *parent, kgid_t gid) ++{ ++ struct dentry *this_parent; ++ struct list_head *next; ++ ++ this_parent = parent; ++ spin_lock(&this_parent->d_lock); ++ ++ change_gid(this_parent, gid); ++repeat: ++ next = this_parent->d_subdirs.next; ++resume: ++ while (next != &this_parent->d_subdirs) { ++ struct list_head *tmp = next; ++ struct dentry *dentry = list_entry(tmp, struct dentry, d_child); ++ next = tmp->next; ++ ++ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); ++ ++ change_gid(dentry, gid); ++ ++ if (!list_empty(&dentry->d_subdirs)) { ++ spin_unlock(&this_parent->d_lock); ++ spin_release(&dentry->d_lock.dep_map, _RET_IP_); ++ this_parent = dentry; ++ spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); ++ goto repeat; ++ } ++ spin_unlock(&dentry->d_lock); ++ } ++ /* ++ * All done at this level ... ascend and resume the search. ++ */ ++ rcu_read_lock(); ++ascend: ++ if (this_parent != parent) { ++ struct dentry *child = this_parent; ++ this_parent = child->d_parent; ++ ++ spin_unlock(&child->d_lock); ++ spin_lock(&this_parent->d_lock); ++ ++ /* go into the first sibling still alive */ ++ do { ++ next = child->d_child.next; ++ if (next == &this_parent->d_subdirs) ++ goto ascend; ++ child = list_entry(next, struct dentry, d_child); ++ } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)); ++ rcu_read_unlock(); ++ goto resume; ++ } ++ rcu_read_unlock(); ++ spin_unlock(&this_parent->d_lock); ++ return; ++} ++ + static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts) + { + substring_t args[MAX_OPT_ARGS]; +@@ -170,6 +243,7 @@ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts) + kgid_t gid; + char *p; + ++ opts->opts = 0; + opts->mode = TRACEFS_DEFAULT_MODE; + + while ((p = strsep(&data, ",")) != NULL) { +@@ -204,22 +278,36 @@ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts) + * but traditionally tracefs has ignored all mount options + */ + } ++ ++ opts->opts |= BIT(token); + } + + return 0; + } + +-static int tracefs_apply_options(struct super_block *sb) ++static int tracefs_apply_options(struct super_block *sb, bool remount) + { + struct tracefs_fs_info *fsi = sb->s_fs_info; + struct inode *inode = sb->s_root->d_inode; + struct tracefs_mount_opts *opts = &fsi->mount_opts; + +- inode->i_mode &= ~S_IALLUGO; +- inode->i_mode |= opts->mode; ++ /* ++ * On remount, only reset mode/uid/gid if they were provided as mount ++ * options. ++ */ ++ ++ if (!remount || opts->opts & BIT(Opt_mode)) { ++ inode->i_mode &= ~S_IALLUGO; ++ inode->i_mode |= opts->mode; ++ } + +- inode->i_uid = opts->uid; +- inode->i_gid = opts->gid; ++ if (!remount || opts->opts & BIT(Opt_uid)) ++ inode->i_uid = opts->uid; ++ ++ if (!remount || opts->opts & BIT(Opt_gid)) { ++ /* Set all the group ids to the mount option */ ++ set_gid(sb->s_root, opts->gid); ++ } + + return 0; + } +@@ -234,7 +322,7 @@ static int tracefs_remount(struct super_block *sb, int *flags, char *data) + if (err) + goto fail; + +- tracefs_apply_options(sb); ++ tracefs_apply_options(sb, true); + + fail: + return err; +@@ -286,7 +374,7 @@ static int trace_fill_super(struct super_block *sb, void *data, int silent) + + sb->s_op = &tracefs_super_operations; + +- tracefs_apply_options(sb); ++ tracefs_apply_options(sb, false); + + return 0; + +@@ -414,6 +502,8 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode, + inode->i_mode = mode; + inode->i_fop = fops ? fops : &tracefs_file_operations; + inode->i_private = data; ++ inode->i_uid = d_inode(dentry->d_parent)->i_uid; ++ inode->i_gid = d_inode(dentry->d_parent)->i_gid; + d_instantiate(dentry, inode); + fsnotify_create(dentry->d_parent->d_inode, dentry); + return end_creating(dentry); +@@ -432,9 +522,12 @@ static struct dentry *__create_dir(const char *name, struct dentry *parent, + if (unlikely(!inode)) + return failed_creating(dentry); + +- inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; ++ /* Do not set bits for OTH */ ++ inode->i_mode = S_IFDIR | S_IRWXU | S_IRUSR| S_IRGRP | S_IXUSR | S_IXGRP; + inode->i_op = ops; + inode->i_fop = &simple_dir_operations; ++ inode->i_uid = d_inode(dentry->d_parent)->i_uid; ++ inode->i_gid = d_inode(dentry->d_parent)->i_gid; + + /* directory inodes start off with i_nlink == 2 (for "." entry) */ + inc_nlink(inode); +diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c +index 7c61d0ec0159e..79e371bc15e1e 100644 +--- a/fs/ubifs/dir.c ++++ b/fs/ubifs/dir.c +@@ -349,20 +349,97 @@ out_budg: + return err; + } + +-static int do_tmpfile(struct inode *dir, struct dentry *dentry, +- umode_t mode, struct inode **whiteout) ++static struct inode *create_whiteout(struct inode *dir, struct dentry *dentry) ++{ ++ int err; ++ umode_t mode = S_IFCHR | WHITEOUT_MODE; ++ struct inode *inode; ++ struct ubifs_info *c = dir->i_sb->s_fs_info; ++ struct fscrypt_name nm; ++ ++ /* ++ * Create an inode('nlink = 1') for whiteout without updating journal, ++ * let ubifs_jnl_rename() store it on flash to complete rename whiteout ++ * atomically. ++ */ ++ ++ dbg_gen("dent '%pd', mode %#hx in dir ino %lu", ++ dentry, mode, dir->i_ino); ++ ++ err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &nm); ++ if (err) ++ return ERR_PTR(err); ++ ++ inode = ubifs_new_inode(c, dir, mode); ++ if (IS_ERR(inode)) { ++ err = PTR_ERR(inode); ++ goto out_free; ++ } ++ ++ init_special_inode(inode, inode->i_mode, WHITEOUT_DEV); ++ ubifs_assert(c, inode->i_op == &ubifs_file_inode_operations); ++ ++ err = ubifs_init_security(dir, inode, &dentry->d_name); ++ if (err) ++ goto out_inode; ++ ++ /* The dir size is updated by do_rename. */ ++ insert_inode_hash(inode); ++ ++ return inode; ++ ++out_inode: ++ make_bad_inode(inode); ++ iput(inode); ++out_free: ++ fscrypt_free_filename(&nm); ++ ubifs_err(c, "cannot create whiteout file, error %d", err); ++ return ERR_PTR(err); ++} ++ ++/** ++ * lock_2_inodes - a wrapper for locking two UBIFS inodes. ++ * @inode1: first inode ++ * @inode2: second inode ++ * ++ * We do not implement any tricks to guarantee strict lock ordering, because ++ * VFS has already done it for us on the @i_mutex. So this is just a simple ++ * wrapper function. ++ */ ++static void lock_2_inodes(struct inode *inode1, struct inode *inode2) ++{ ++ mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1); ++ mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2); ++} ++ ++/** ++ * unlock_2_inodes - a wrapper for unlocking two UBIFS inodes. ++ * @inode1: first inode ++ * @inode2: second inode ++ */ ++static void unlock_2_inodes(struct inode *inode1, struct inode *inode2) ++{ ++ mutex_unlock(&ubifs_inode(inode2)->ui_mutex); ++ mutex_unlock(&ubifs_inode(inode1)->ui_mutex); ++} ++ ++static int ubifs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, ++ struct dentry *dentry, umode_t mode) + { + struct inode *inode; + struct ubifs_info *c = dir->i_sb->s_fs_info; +- struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1}; ++ struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, ++ .dirtied_ino = 1}; + struct ubifs_budget_req ino_req = { .dirtied_ino = 1 }; +- struct ubifs_inode *ui, *dir_ui = ubifs_inode(dir); ++ struct ubifs_inode *ui; + int err, instantiated = 0; + struct fscrypt_name nm; + + /* +- * Budget request settings: new dirty inode, new direntry, +- * budget for dirtied inode will be released via writeback. ++ * Budget request settings: new inode, new direntry, changing the ++ * parent directory inode. ++ * Allocate budget separately for new dirtied inode, the budget will ++ * be released via writeback. + */ + + dbg_gen("dent '%pd', mode %#hx in dir ino %lu", +@@ -392,42 +469,30 @@ static int do_tmpfile(struct inode *dir, struct dentry *dentry, + } + ui = ubifs_inode(inode); + +- if (whiteout) { +- init_special_inode(inode, inode->i_mode, WHITEOUT_DEV); +- ubifs_assert(c, inode->i_op == &ubifs_file_inode_operations); +- } +- + err = ubifs_init_security(dir, inode, &dentry->d_name); + if (err) + goto out_inode; + + mutex_lock(&ui->ui_mutex); + insert_inode_hash(inode); +- +- if (whiteout) { +- mark_inode_dirty(inode); +- drop_nlink(inode); +- *whiteout = inode; +- } else { +- d_tmpfile(dentry, inode); +- } ++ d_tmpfile(dentry, inode); + ubifs_assert(c, ui->dirty); + + instantiated = 1; + mutex_unlock(&ui->ui_mutex); + +- mutex_lock(&dir_ui->ui_mutex); ++ lock_2_inodes(dir, inode); + err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0); + if (err) + goto out_cancel; +- mutex_unlock(&dir_ui->ui_mutex); ++ unlock_2_inodes(dir, inode); + + ubifs_release_budget(c, &req); + + return 0; + + out_cancel: +- mutex_unlock(&dir_ui->ui_mutex); ++ unlock_2_inodes(dir, inode); + out_inode: + make_bad_inode(inode); + if (!instantiated) +@@ -441,12 +506,6 @@ out_budg: + return err; + } + +-static int ubifs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, +- struct dentry *dentry, umode_t mode) +-{ +- return do_tmpfile(dir, dentry, mode, NULL); +-} +- + /** + * vfs_dent_type - get VFS directory entry type. + * @type: UBIFS directory entry type +@@ -660,32 +719,6 @@ static int ubifs_dir_release(struct inode *dir, struct file *file) + return 0; + } + +-/** +- * lock_2_inodes - a wrapper for locking two UBIFS inodes. +- * @inode1: first inode +- * @inode2: second inode +- * +- * We do not implement any tricks to guarantee strict lock ordering, because +- * VFS has already done it for us on the @i_mutex. So this is just a simple +- * wrapper function. +- */ +-static void lock_2_inodes(struct inode *inode1, struct inode *inode2) +-{ +- mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1); +- mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2); +-} +- +-/** +- * unlock_2_inodes - a wrapper for unlocking two UBIFS inodes. +- * @inode1: first inode +- * @inode2: second inode +- */ +-static void unlock_2_inodes(struct inode *inode1, struct inode *inode2) +-{ +- mutex_unlock(&ubifs_inode(inode2)->ui_mutex); +- mutex_unlock(&ubifs_inode(inode1)->ui_mutex); +-} +- + static int ubifs_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *dentry) + { +@@ -949,7 +982,8 @@ static int ubifs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, + struct ubifs_inode *dir_ui = ubifs_inode(dir); + struct ubifs_info *c = dir->i_sb->s_fs_info; + int err, sz_change; +- struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1 }; ++ struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, ++ .dirtied_ino = 1}; + struct fscrypt_name nm; + + /* +@@ -1264,17 +1298,19 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, + .dirtied_ino = 3 }; + struct ubifs_budget_req ino_req = { .dirtied_ino = 1, + .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) }; ++ struct ubifs_budget_req wht_req; + struct timespec64 time; + unsigned int saved_nlink; + struct fscrypt_name old_nm, new_nm; + + /* +- * Budget request settings: deletion direntry, new direntry, removing +- * the old inode, and changing old and new parent directory inodes. ++ * Budget request settings: ++ * req: deletion direntry, new direntry, removing the old inode, ++ * and changing old and new parent directory inodes. ++ * ++ * wht_req: new whiteout inode for RENAME_WHITEOUT. + * +- * However, this operation also marks the target inode as dirty and +- * does not write it, so we allocate budget for the target inode +- * separately. ++ * ino_req: marks the target inode as dirty and does not write it. + */ + + dbg_gen("dent '%pd' ino %lu in dir ino %lu to dent '%pd' in dir ino %lu flags 0x%x", +@@ -1331,20 +1367,44 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, + goto out_release; + } + +- err = do_tmpfile(old_dir, old_dentry, S_IFCHR | WHITEOUT_MODE, &whiteout); +- if (err) { ++ /* ++ * The whiteout inode without dentry is pinned in memory, ++ * umount won't happen during rename process because we ++ * got parent dentry. ++ */ ++ whiteout = create_whiteout(old_dir, old_dentry); ++ if (IS_ERR(whiteout)) { ++ err = PTR_ERR(whiteout); + kfree(dev); + goto out_release; + } + +- spin_lock(&whiteout->i_lock); +- whiteout->i_state |= I_LINKABLE; +- spin_unlock(&whiteout->i_lock); +- + whiteout_ui = ubifs_inode(whiteout); + whiteout_ui->data = dev; + whiteout_ui->data_len = ubifs_encode_dev(dev, MKDEV(0, 0)); + ubifs_assert(c, !whiteout_ui->dirty); ++ ++ memset(&wht_req, 0, sizeof(struct ubifs_budget_req)); ++ wht_req.new_ino = 1; ++ wht_req.new_ino_d = ALIGN(whiteout_ui->data_len, 8); ++ /* ++ * To avoid deadlock between space budget (holds ui_mutex and ++ * waits wb work) and writeback work(waits ui_mutex), do space ++ * budget before ubifs inodes locked. ++ */ ++ err = ubifs_budget_space(c, &wht_req); ++ if (err) { ++ /* ++ * Whiteout inode can not be written on flash by ++ * ubifs_jnl_write_inode(), because it's neither ++ * dirty nor zero-nlink. ++ */ ++ iput(whiteout); ++ goto out_release; ++ } ++ ++ /* Add the old_dentry size to the old_dir size. */ ++ old_sz -= CALC_DENT_SIZE(fname_len(&old_nm)); + } + + lock_4_inodes(old_dir, new_dir, new_inode, whiteout); +@@ -1416,29 +1476,11 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, + sync = IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir); + if (unlink && IS_SYNC(new_inode)) + sync = 1; +- } +- +- if (whiteout) { +- struct ubifs_budget_req wht_req = { .dirtied_ino = 1, +- .dirtied_ino_d = \ +- ALIGN(ubifs_inode(whiteout)->data_len, 8) }; +- +- err = ubifs_budget_space(c, &wht_req); +- if (err) { +- kfree(whiteout_ui->data); +- whiteout_ui->data_len = 0; +- iput(whiteout); +- goto out_release; +- } +- +- inc_nlink(whiteout); +- mark_inode_dirty(whiteout); +- +- spin_lock(&whiteout->i_lock); +- whiteout->i_state &= ~I_LINKABLE; +- spin_unlock(&whiteout->i_lock); +- +- iput(whiteout); ++ /* ++ * S_SYNC flag of whiteout inherits from the old_dir, and we ++ * have already checked the old dir inode. So there is no need ++ * to check whiteout. ++ */ + } + + err = ubifs_jnl_rename(c, old_dir, old_inode, &old_nm, new_dir, +@@ -1449,6 +1491,11 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, + unlock_4_inodes(old_dir, new_dir, new_inode, whiteout); + ubifs_release_budget(c, &req); + ++ if (whiteout) { ++ ubifs_release_budget(c, &wht_req); ++ iput(whiteout); ++ } ++ + mutex_lock(&old_inode_ui->ui_mutex); + release = old_inode_ui->dirty; + mark_inode_dirty_sync(old_inode); +@@ -1457,11 +1504,16 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, + if (release) + ubifs_release_budget(c, &ino_req); + if (IS_SYNC(old_inode)) +- err = old_inode->i_sb->s_op->write_inode(old_inode, NULL); ++ /* ++ * Rename finished here. Although old inode cannot be updated ++ * on flash, old ctime is not a big problem, don't return err ++ * code to userspace. ++ */ ++ old_inode->i_sb->s_op->write_inode(old_inode, NULL); + + fscrypt_free_filename(&old_nm); + fscrypt_free_filename(&new_nm); +- return err; ++ return 0; + + out_cancel: + if (unlink) { +@@ -1482,11 +1534,11 @@ out_cancel: + inc_nlink(old_dir); + } + } ++ unlock_4_inodes(old_dir, new_dir, new_inode, whiteout); + if (whiteout) { +- drop_nlink(whiteout); ++ ubifs_release_budget(c, &wht_req); + iput(whiteout); + } +- unlock_4_inodes(old_dir, new_dir, new_inode, whiteout); + out_release: + ubifs_release_budget(c, &ino_req); + ubifs_release_budget(c, &req); +diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c +index 5cfa28cd00cdc..6b45a037a0471 100644 +--- a/fs/ubifs/file.c ++++ b/fs/ubifs/file.c +@@ -570,7 +570,7 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping, + } + + if (!PagePrivate(page)) { +- SetPagePrivate(page); ++ attach_page_private(page, (void *)1); + atomic_long_inc(&c->dirty_pg_cnt); + __set_page_dirty_nobuffers(page); + } +@@ -947,7 +947,7 @@ static int do_writepage(struct page *page, int len) + release_existing_page_budget(c); + + atomic_long_dec(&c->dirty_pg_cnt); +- ClearPagePrivate(page); ++ detach_page_private(page); + ClearPageChecked(page); + + kunmap(page); +@@ -1304,7 +1304,7 @@ static void ubifs_invalidatepage(struct page *page, unsigned int offset, + release_existing_page_budget(c); + + atomic_long_dec(&c->dirty_pg_cnt); +- ClearPagePrivate(page); ++ detach_page_private(page); + ClearPageChecked(page); + } + +@@ -1471,8 +1471,8 @@ static int ubifs_migrate_page(struct address_space *mapping, + return rc; + + if (PagePrivate(page)) { +- ClearPagePrivate(page); +- SetPagePrivate(newpage); ++ detach_page_private(page); ++ attach_page_private(newpage, (void *)1); + } + + if (mode != MIGRATE_SYNC_NO_COPY) +@@ -1496,7 +1496,7 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags) + return 0; + ubifs_assert(c, PagePrivate(page)); + ubifs_assert(c, 0); +- ClearPagePrivate(page); ++ detach_page_private(page); + ClearPageChecked(page); + return 1; + } +@@ -1567,7 +1567,7 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf) + else { + if (!PageChecked(page)) + ubifs_convert_page_budget(c); +- SetPagePrivate(page); ++ attach_page_private(page, (void *)1); + atomic_long_inc(&c->dirty_pg_cnt); + __set_page_dirty_nobuffers(page); + } +diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c +index 00b61dba62b70..b019dd6f7fa06 100644 +--- a/fs/ubifs/io.c ++++ b/fs/ubifs/io.c +@@ -833,16 +833,42 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) + */ + n = aligned_len >> c->max_write_shift; + if (n) { +- n <<= c->max_write_shift; ++ int m = n - 1; ++ + dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, + wbuf->offs); +- err = ubifs_leb_write(c, wbuf->lnum, buf + written, +- wbuf->offs, n); ++ ++ if (m) { ++ /* '(n-1)<<c->max_write_shift < len' is always true. */ ++ m <<= c->max_write_shift; ++ err = ubifs_leb_write(c, wbuf->lnum, buf + written, ++ wbuf->offs, m); ++ if (err) ++ goto out; ++ wbuf->offs += m; ++ aligned_len -= m; ++ len -= m; ++ written += m; ++ } ++ ++ /* ++ * The non-written len of buf may be less than 'n' because ++ * parameter 'len' is not 8 bytes aligned, so here we read ++ * min(len, n) bytes from buf. ++ */ ++ n = 1 << c->max_write_shift; ++ memcpy(wbuf->buf, buf + written, min(len, n)); ++ if (n > len) { ++ ubifs_assert(c, n - len < 8); ++ ubifs_pad(c, wbuf->buf + len, n - len); ++ } ++ ++ err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, n); + if (err) + goto out; + wbuf->offs += n; + aligned_len -= n; +- len -= n; ++ len -= min(len, n); + written += n; + } + +diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c +index c6a8634877803..71bcebe45f9c5 100644 +--- a/fs/ubifs/ioctl.c ++++ b/fs/ubifs/ioctl.c +@@ -108,7 +108,7 @@ static int setflags(struct inode *inode, int flags) + struct ubifs_inode *ui = ubifs_inode(inode); + struct ubifs_info *c = inode->i_sb->s_fs_info; + struct ubifs_budget_req req = { .dirtied_ino = 1, +- .dirtied_ino_d = ui->data_len }; ++ .dirtied_ino_d = ALIGN(ui->data_len, 8) }; + + err = ubifs_budget_space(c, &req); + if (err) +diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c +index 8ea680dba61e3..75dab0ae3939d 100644 +--- a/fs/ubifs/journal.c ++++ b/fs/ubifs/journal.c +@@ -1207,9 +1207,9 @@ out_free: + * @sync: non-zero if the write-buffer has to be synchronized + * + * This function implements the re-name operation which may involve writing up +- * to 4 inodes and 2 directory entries. It marks the written inodes as clean +- * and returns zero on success. In case of failure, a negative error code is +- * returned. ++ * to 4 inodes(new inode, whiteout inode, old and new parent directory inodes) ++ * and 2 directory entries. It marks the written inodes as clean and returns ++ * zero on success. In case of failure, a negative error code is returned. + */ + int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, + const struct inode *old_inode, +@@ -1222,14 +1222,15 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, + void *p; + union ubifs_key key; + struct ubifs_dent_node *dent, *dent2; +- int err, dlen1, dlen2, ilen, lnum, offs, len, orphan_added = 0; ++ int err, dlen1, dlen2, ilen, wlen, lnum, offs, len, orphan_added = 0; + int aligned_dlen1, aligned_dlen2, plen = UBIFS_INO_NODE_SZ; + int last_reference = !!(new_inode && new_inode->i_nlink == 0); + int move = (old_dir != new_dir); +- struct ubifs_inode *new_ui; ++ struct ubifs_inode *new_ui, *whiteout_ui; + u8 hash_old_dir[UBIFS_HASH_ARR_SZ]; + u8 hash_new_dir[UBIFS_HASH_ARR_SZ]; + u8 hash_new_inode[UBIFS_HASH_ARR_SZ]; ++ u8 hash_whiteout_inode[UBIFS_HASH_ARR_SZ]; + u8 hash_dent1[UBIFS_HASH_ARR_SZ]; + u8 hash_dent2[UBIFS_HASH_ARR_SZ]; + +@@ -1249,9 +1250,20 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, + } else + ilen = 0; + ++ if (whiteout) { ++ whiteout_ui = ubifs_inode(whiteout); ++ ubifs_assert(c, mutex_is_locked(&whiteout_ui->ui_mutex)); ++ ubifs_assert(c, whiteout->i_nlink == 1); ++ ubifs_assert(c, !whiteout_ui->dirty); ++ wlen = UBIFS_INO_NODE_SZ; ++ wlen += whiteout_ui->data_len; ++ } else ++ wlen = 0; ++ + aligned_dlen1 = ALIGN(dlen1, 8); + aligned_dlen2 = ALIGN(dlen2, 8); +- len = aligned_dlen1 + aligned_dlen2 + ALIGN(ilen, 8) + ALIGN(plen, 8); ++ len = aligned_dlen1 + aligned_dlen2 + ALIGN(ilen, 8) + ++ ALIGN(wlen, 8) + ALIGN(plen, 8); + if (move) + len += plen; + +@@ -1313,6 +1325,15 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, + p += ALIGN(ilen, 8); + } + ++ if (whiteout) { ++ pack_inode(c, p, whiteout, 0); ++ err = ubifs_node_calc_hash(c, p, hash_whiteout_inode); ++ if (err) ++ goto out_release; ++ ++ p += ALIGN(wlen, 8); ++ } ++ + if (!move) { + pack_inode(c, p, old_dir, 1); + err = ubifs_node_calc_hash(c, p, hash_old_dir); +@@ -1352,6 +1373,9 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, + if (new_inode) + ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, + new_inode->i_ino); ++ if (whiteout) ++ ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, ++ whiteout->i_ino); + } + release_head(c, BASEHD); + +@@ -1368,8 +1392,6 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, + err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen2, hash_dent2, old_nm); + if (err) + goto out_ro; +- +- ubifs_delete_orphan(c, whiteout->i_ino); + } else { + err = ubifs_add_dirt(c, lnum, dlen2); + if (err) +@@ -1390,6 +1412,15 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, + offs += ALIGN(ilen, 8); + } + ++ if (whiteout) { ++ ino_key_init(c, &key, whiteout->i_ino); ++ err = ubifs_tnc_add(c, &key, lnum, offs, wlen, ++ hash_whiteout_inode); ++ if (err) ++ goto out_ro; ++ offs += ALIGN(wlen, 8); ++ } ++ + ino_key_init(c, &key, old_dir->i_ino); + err = ubifs_tnc_add(c, &key, lnum, offs, plen, hash_old_dir); + if (err) +@@ -1410,6 +1441,11 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, + new_ui->synced_i_size = new_ui->ui_size; + spin_unlock(&new_ui->ui_lock); + } ++ /* ++ * No need to mark whiteout inode clean. ++ * Whiteout doesn't have non-zero size, no need to update ++ * synced_i_size for whiteout_ui. ++ */ + mark_inode_clean(c, ubifs_inode(old_dir)); + if (move) + mark_inode_clean(c, ubifs_inode(new_dir)); +diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c +index f0fb25727d961..eb05038b71911 100644 +--- a/fs/ubifs/super.c ++++ b/fs/ubifs/super.c +@@ -1853,7 +1853,6 @@ out: + kthread_stop(c->bgt); + c->bgt = NULL; + } +- free_wbufs(c); + kfree(c->write_reserve_buf); + c->write_reserve_buf = NULL; + vfree(c->ileb_buf); +diff --git a/fs/udf/dir.c b/fs/udf/dir.c +index 70abdfad2df17..42e3e551fa4c3 100644 +--- a/fs/udf/dir.c ++++ b/fs/udf/dir.c +@@ -31,6 +31,7 @@ + #include <linux/mm.h> + #include <linux/slab.h> + #include <linux/bio.h> ++#include <linux/iversion.h> + + #include "udf_i.h" + #include "udf_sb.h" +@@ -43,7 +44,7 @@ static int udf_readdir(struct file *file, struct dir_context *ctx) + struct fileIdentDesc *fi = NULL; + struct fileIdentDesc cfi; + udf_pblk_t block, iblock; +- loff_t nf_pos; ++ loff_t nf_pos, emit_pos = 0; + int flen; + unsigned char *fname = NULL, *copy_name = NULL; + unsigned char *nameptr; +@@ -57,6 +58,7 @@ static int udf_readdir(struct file *file, struct dir_context *ctx) + int i, num, ret = 0; + struct extent_position epos = { NULL, 0, {0, 0} }; + struct super_block *sb = dir->i_sb; ++ bool pos_valid = false; + + if (ctx->pos == 0) { + if (!dir_emit_dot(file, ctx)) +@@ -67,6 +69,21 @@ static int udf_readdir(struct file *file, struct dir_context *ctx) + if (nf_pos >= size) + goto out; + ++ /* ++ * Something changed since last readdir (either lseek was called or dir ++ * changed)? We need to verify the position correctly points at the ++ * beginning of some dir entry so that the directory parsing code does ++ * not get confused. Since UDF does not have any reliable way of ++ * identifying beginning of dir entry (names are under user control), ++ * we need to scan the directory from the beginning. ++ */ ++ if (!inode_eq_iversion(dir, file->f_version)) { ++ emit_pos = nf_pos; ++ nf_pos = 0; ++ } else { ++ pos_valid = true; ++ } ++ + fname = kmalloc(UDF_NAME_LEN, GFP_NOFS); + if (!fname) { + ret = -ENOMEM; +@@ -122,13 +139,21 @@ static int udf_readdir(struct file *file, struct dir_context *ctx) + + while (nf_pos < size) { + struct kernel_lb_addr tloc; ++ loff_t cur_pos = nf_pos; + +- ctx->pos = (nf_pos >> 2) + 1; ++ /* Update file position only if we got past the current one */ ++ if (nf_pos >= emit_pos) { ++ ctx->pos = (nf_pos >> 2) + 1; ++ pos_valid = true; ++ } + + fi = udf_fileident_read(dir, &nf_pos, &fibh, &cfi, &epos, &eloc, + &elen, &offset); + if (!fi) + goto out; ++ /* Still not at offset where user asked us to read from? */ ++ if (cur_pos < emit_pos) ++ continue; + + liu = le16_to_cpu(cfi.lengthOfImpUse); + lfi = cfi.lengthFileIdent; +@@ -186,8 +211,11 @@ static int udf_readdir(struct file *file, struct dir_context *ctx) + } /* end while */ + + ctx->pos = (nf_pos >> 2) + 1; ++ pos_valid = true; + + out: ++ if (pos_valid) ++ file->f_version = inode_query_iversion(dir); + if (fibh.sbh != fibh.ebh) + brelse(fibh.ebh); + brelse(fibh.sbh); +diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c +index 2ecf0e87660e3..b5d611cee749c 100644 +--- a/fs/udf/ialloc.c ++++ b/fs/udf/ialloc.c +@@ -77,6 +77,7 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode) + GFP_KERNEL); + } + if (!iinfo->i_data) { ++ make_bad_inode(inode); + iput(inode); + return ERR_PTR(-ENOMEM); + } +@@ -86,6 +87,7 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode) + dinfo->i_location.partitionReferenceNum, + start, &err); + if (err) { ++ make_bad_inode(inode); + iput(inode); + return ERR_PTR(err); + } +diff --git a/fs/udf/inode.c b/fs/udf/inode.c +index 1d6b7a50736ba..d2488b7e54a58 100644 +--- a/fs/udf/inode.c ++++ b/fs/udf/inode.c +@@ -258,10 +258,6 @@ int udf_expand_file_adinicb(struct inode *inode) + char *kaddr; + struct udf_inode_info *iinfo = UDF_I(inode); + int err; +- struct writeback_control udf_wbc = { +- .sync_mode = WB_SYNC_NONE, +- .nr_to_write = 1, +- }; + + WARN_ON_ONCE(!inode_is_locked(inode)); + if (!iinfo->i_lenAlloc) { +@@ -305,8 +301,10 @@ int udf_expand_file_adinicb(struct inode *inode) + iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; + /* from now on we have normal address_space methods */ + inode->i_data.a_ops = &udf_aops; ++ set_page_dirty(page); ++ unlock_page(page); + up_write(&iinfo->i_data_sem); +- err = inode->i_data.a_ops->writepage(page, &udf_wbc); ++ err = filemap_fdatawrite(inode->i_mapping); + if (err) { + /* Restore everything back so that we don't lose data... */ + lock_page(page); +@@ -317,6 +315,7 @@ int udf_expand_file_adinicb(struct inode *inode) + unlock_page(page); + iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; + inode->i_data.a_ops = &udf_adinicb_aops; ++ iinfo->i_lenAlloc = inode->i_size; + up_write(&iinfo->i_data_sem); + } + put_page(page); +@@ -439,6 +438,12 @@ static int udf_get_block(struct inode *inode, sector_t block, + iinfo->i_next_alloc_goal++; + } + ++ /* ++ * Block beyond EOF and prealloc extents? Just discard preallocation ++ * as it is not useful and complicates things. ++ */ ++ if (((loff_t)block) << inode->i_blkbits > iinfo->i_lenExtents) ++ udf_discard_prealloc(inode); + udf_clear_extent_cache(inode); + phys = inode_getblk(inode, block, &err, &new); + if (!phys) +@@ -488,8 +493,6 @@ static int udf_do_extend_file(struct inode *inode, + uint32_t add; + int count = 0, fake = !(last_ext->extLength & UDF_EXTENT_LENGTH_MASK); + struct super_block *sb = inode->i_sb; +- struct kernel_lb_addr prealloc_loc = {}; +- uint32_t prealloc_len = 0; + struct udf_inode_info *iinfo; + int err; + +@@ -510,19 +513,6 @@ static int udf_do_extend_file(struct inode *inode, + ~(sb->s_blocksize - 1); + } + +- /* Last extent are just preallocated blocks? */ +- if ((last_ext->extLength & UDF_EXTENT_FLAG_MASK) == +- EXT_NOT_RECORDED_ALLOCATED) { +- /* Save the extent so that we can reattach it to the end */ +- prealloc_loc = last_ext->extLocation; +- prealloc_len = last_ext->extLength; +- /* Mark the extent as a hole */ +- last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | +- (last_ext->extLength & UDF_EXTENT_LENGTH_MASK); +- last_ext->extLocation.logicalBlockNum = 0; +- last_ext->extLocation.partitionReferenceNum = 0; +- } +- + /* Can we merge with the previous extent? */ + if ((last_ext->extLength & UDF_EXTENT_FLAG_MASK) == + EXT_NOT_RECORDED_NOT_ALLOCATED) { +@@ -550,7 +540,7 @@ static int udf_do_extend_file(struct inode *inode, + * more extents, we may need to enter possible following + * empty indirect extent. + */ +- if (new_block_bytes || prealloc_len) ++ if (new_block_bytes) + udf_next_aext(inode, last_pos, &tmploc, &tmplen, 0); + } + +@@ -584,17 +574,6 @@ static int udf_do_extend_file(struct inode *inode, + } + + out: +- /* Do we have some preallocated blocks saved? */ +- if (prealloc_len) { +- err = udf_add_aext(inode, last_pos, &prealloc_loc, +- prealloc_len, 1); +- if (err) +- return err; +- last_ext->extLocation = prealloc_loc; +- last_ext->extLength = prealloc_len; +- count++; +- } +- + /* last_pos should point to the last written extent... */ + if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) + last_pos->offset -= sizeof(struct short_ad); +@@ -610,13 +589,17 @@ out: + static void udf_do_extend_final_block(struct inode *inode, + struct extent_position *last_pos, + struct kernel_long_ad *last_ext, +- uint32_t final_block_len) ++ uint32_t new_elen) + { +- struct super_block *sb = inode->i_sb; + uint32_t added_bytes; + +- added_bytes = final_block_len - +- (last_ext->extLength & (sb->s_blocksize - 1)); ++ /* ++ * Extent already large enough? It may be already rounded up to block ++ * size... ++ */ ++ if (new_elen <= (last_ext->extLength & UDF_EXTENT_LENGTH_MASK)) ++ return; ++ added_bytes = new_elen - (last_ext->extLength & UDF_EXTENT_LENGTH_MASK); + last_ext->extLength += added_bytes; + UDF_I(inode)->i_lenExtents += added_bytes; + +@@ -633,12 +616,12 @@ static int udf_extend_file(struct inode *inode, loff_t newsize) + int8_t etype; + struct super_block *sb = inode->i_sb; + sector_t first_block = newsize >> sb->s_blocksize_bits, offset; +- unsigned long partial_final_block; ++ loff_t new_elen; + int adsize; + struct udf_inode_info *iinfo = UDF_I(inode); + struct kernel_long_ad extent; + int err = 0; +- int within_final_block; ++ bool within_last_ext; + + if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) + adsize = sizeof(struct short_ad); +@@ -647,8 +630,17 @@ static int udf_extend_file(struct inode *inode, loff_t newsize) + else + BUG(); + ++ /* ++ * When creating hole in file, just don't bother with preserving ++ * preallocation. It likely won't be very useful anyway. ++ */ ++ udf_discard_prealloc(inode); ++ + etype = inode_bmap(inode, first_block, &epos, &eloc, &elen, &offset); +- within_final_block = (etype != -1); ++ within_last_ext = (etype != -1); ++ /* We don't expect extents past EOF... */ ++ WARN_ON_ONCE(within_last_ext && ++ elen > ((loff_t)offset + 1) << inode->i_blkbits); + + if ((!epos.bh && epos.offset == udf_file_entry_alloc_offset(inode)) || + (epos.bh && epos.offset == sizeof(struct allocExtDesc))) { +@@ -664,19 +656,17 @@ static int udf_extend_file(struct inode *inode, loff_t newsize) + extent.extLength |= etype << 30; + } + +- partial_final_block = newsize & (sb->s_blocksize - 1); ++ new_elen = ((loff_t)offset << inode->i_blkbits) | ++ (newsize & (sb->s_blocksize - 1)); + + /* File has extent covering the new size (could happen when extending + * inside a block)? + */ +- if (within_final_block) { ++ if (within_last_ext) { + /* Extending file within the last file block */ +- udf_do_extend_final_block(inode, &epos, &extent, +- partial_final_block); ++ udf_do_extend_final_block(inode, &epos, &extent, new_elen); + } else { +- loff_t add = ((loff_t)offset << sb->s_blocksize_bits) | +- partial_final_block; +- err = udf_do_extend_file(inode, &epos, &extent, add); ++ err = udf_do_extend_file(inode, &epos, &extent, new_elen); + } + + if (err < 0) +@@ -777,10 +767,11 @@ static sector_t inode_getblk(struct inode *inode, sector_t block, + goto out_free; + } + +- /* Are we beyond EOF? */ ++ /* Are we beyond EOF and preallocated extent? */ + if (etype == -1) { + int ret; + loff_t hole_len; ++ + isBeyondEOF = true; + if (count) { + if (c) +diff --git a/fs/udf/namei.c b/fs/udf/namei.c +index caeef08efed23..0e30a50060d9d 100644 +--- a/fs/udf/namei.c ++++ b/fs/udf/namei.c +@@ -30,6 +30,7 @@ + #include <linux/sched.h> + #include <linux/crc-itu-t.h> + #include <linux/exportfs.h> ++#include <linux/iversion.h> + + static inline int udf_match(int len1, const unsigned char *name1, int len2, + const unsigned char *name2) +@@ -74,11 +75,11 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi, + + if (fileident) { + if (adinicb || (offset + lfi < 0)) { +- memcpy(udf_get_fi_ident(sfi), fileident, lfi); ++ memcpy(sfi->impUse + liu, fileident, lfi); + } else if (offset >= 0) { + memcpy(fibh->ebh->b_data + offset, fileident, lfi); + } else { +- memcpy(udf_get_fi_ident(sfi), fileident, -offset); ++ memcpy(sfi->impUse + liu, fileident, -offset); + memcpy(fibh->ebh->b_data, fileident - offset, + lfi + offset); + } +@@ -87,11 +88,11 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi, + offset += lfi; + + if (adinicb || (offset + padlen < 0)) { +- memset(udf_get_fi_ident(sfi) + lfi, 0x00, padlen); ++ memset(sfi->impUse + liu + lfi, 0x00, padlen); + } else if (offset >= 0) { + memset(fibh->ebh->b_data + offset, 0x00, padlen); + } else { +- memset(udf_get_fi_ident(sfi) + lfi, 0x00, -offset); ++ memset(sfi->impUse + liu + lfi, 0x00, -offset); + memset(fibh->ebh->b_data, 0x00, padlen + offset); + } + +@@ -134,6 +135,8 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi, + mark_buffer_dirty_inode(fibh->ebh, inode); + mark_buffer_dirty_inode(fibh->sbh, inode); + } ++ inode_inc_iversion(inode); ++ + return 0; + } + +@@ -237,7 +240,7 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir, + poffset - lfi); + else { + if (!copy_name) { +- copy_name = kmalloc(UDF_NAME_LEN, ++ copy_name = kmalloc(UDF_NAME_LEN_CS0, + GFP_NOFS); + if (!copy_name) { + fi = ERR_PTR(-ENOMEM); +@@ -1088,8 +1091,9 @@ static int udf_rename(struct user_namespace *mnt_userns, struct inode *old_dir, + return -EINVAL; + + ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi); +- if (IS_ERR(ofi)) { +- retval = PTR_ERR(ofi); ++ if (!ofi || IS_ERR(ofi)) { ++ if (IS_ERR(ofi)) ++ retval = PTR_ERR(ofi); + goto end_rename; + } + +@@ -1098,8 +1102,7 @@ static int udf_rename(struct user_namespace *mnt_userns, struct inode *old_dir, + + brelse(ofibh.sbh); + tloc = lelb_to_cpu(ocfi.icb.extLocation); +- if (!ofi || udf_get_lb_pblock(old_dir->i_sb, &tloc, 0) +- != old_inode->i_ino) ++ if (udf_get_lb_pblock(old_dir->i_sb, &tloc, 0) != old_inode->i_ino) + goto end_rename; + + nfi = udf_find_entry(new_dir, &new_dentry->d_name, &nfibh, &ncfi); +diff --git a/fs/udf/super.c b/fs/udf/super.c +index b2d7c57d06881..aa2f6093d3f6f 100644 +--- a/fs/udf/super.c ++++ b/fs/udf/super.c +@@ -57,6 +57,7 @@ + #include <linux/crc-itu-t.h> + #include <linux/log2.h> + #include <asm/byteorder.h> ++#include <linux/iversion.h> + + #include "udf_sb.h" + #include "udf_i.h" +@@ -149,6 +150,7 @@ static struct inode *udf_alloc_inode(struct super_block *sb) + init_rwsem(&ei->i_data_sem); + ei->cached_extent.lstart = -1; + spin_lock_init(&ei->i_extent_cache_lock); ++ inode_set_iversion(&ei->vfs_inode, 1); + + return &ei->vfs_inode; + } +diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c +index 532cda99644ee..036ebd892b852 100644 +--- a/fs/udf/truncate.c ++++ b/fs/udf/truncate.c +@@ -120,60 +120,42 @@ void udf_truncate_tail_extent(struct inode *inode) + + void udf_discard_prealloc(struct inode *inode) + { +- struct extent_position epos = { NULL, 0, {0, 0} }; ++ struct extent_position epos = {}; ++ struct extent_position prev_epos = {}; + struct kernel_lb_addr eloc; + uint32_t elen; + uint64_t lbcount = 0; + int8_t etype = -1, netype; +- int adsize; + struct udf_inode_info *iinfo = UDF_I(inode); ++ int bsize = 1 << inode->i_blkbits; + + if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB || +- inode->i_size == iinfo->i_lenExtents) ++ ALIGN(inode->i_size, bsize) == ALIGN(iinfo->i_lenExtents, bsize)) + return; + +- if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) +- adsize = sizeof(struct short_ad); +- else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) +- adsize = sizeof(struct long_ad); +- else +- adsize = 0; +- + epos.block = iinfo->i_location; + + /* Find the last extent in the file */ +- while ((netype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1) { +- etype = netype; ++ while ((netype = udf_next_aext(inode, &epos, &eloc, &elen, 0)) != -1) { ++ brelse(prev_epos.bh); ++ prev_epos = epos; ++ if (prev_epos.bh) ++ get_bh(prev_epos.bh); ++ ++ etype = udf_next_aext(inode, &epos, &eloc, &elen, 1); + lbcount += elen; + } + if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) { +- epos.offset -= adsize; + lbcount -= elen; +- extent_trunc(inode, &epos, &eloc, etype, elen, 0); +- if (!epos.bh) { +- iinfo->i_lenAlloc = +- epos.offset - +- udf_file_entry_alloc_offset(inode); +- mark_inode_dirty(inode); +- } else { +- struct allocExtDesc *aed = +- (struct allocExtDesc *)(epos.bh->b_data); +- aed->lengthAllocDescs = +- cpu_to_le32(epos.offset - +- sizeof(struct allocExtDesc)); +- if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || +- UDF_SB(inode->i_sb)->s_udfrev >= 0x0201) +- udf_update_tag(epos.bh->b_data, epos.offset); +- else +- udf_update_tag(epos.bh->b_data, +- sizeof(struct allocExtDesc)); +- mark_buffer_dirty_inode(epos.bh, inode); +- } ++ udf_delete_aext(inode, prev_epos); ++ udf_free_blocks(inode->i_sb, inode, &eloc, 0, ++ DIV_ROUND_UP(elen, 1 << inode->i_blkbits)); + } + /* This inode entry is in-memory only and thus we don't have to mark + * the inode dirty */ + iinfo->i_lenExtents = lbcount; + brelse(epos.bh); ++ brelse(prev_epos.bh); + } + + static void udf_update_alloc_ext_desc(struct inode *inode, +diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c +index 22bf14ab2d163..b56e8e31d967f 100644 +--- a/fs/userfaultfd.c ++++ b/fs/userfaultfd.c +@@ -982,7 +982,7 @@ static int resolve_userfault_fork(struct userfaultfd_ctx *new, + int fd; + + fd = anon_inode_getfd_secure("[userfaultfd]", &userfaultfd_fops, new, +- O_RDWR | (new->flags & UFFD_SHARED_FCNTL_FLAGS), inode); ++ O_RDONLY | (new->flags & UFFD_SHARED_FCNTL_FLAGS), inode); + if (fd < 0) + return fd; + +@@ -2097,7 +2097,7 @@ SYSCALL_DEFINE1(userfaultfd, int, flags) + mmgrab(ctx->mm); + + fd = anon_inode_getfd_secure("[userfaultfd]", &userfaultfd_fops, ctx, +- O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS), NULL); ++ O_RDONLY | (flags & UFFD_SHARED_FCNTL_FLAGS), NULL); + if (fd < 0) { + mmdrop(ctx->mm); + kmem_cache_free(userfaultfd_ctx_cachep, ctx); +diff --git a/fs/xattr.c b/fs/xattr.c +index 5c8c5175b385c..4c82f271f4aa3 100644 +--- a/fs/xattr.c ++++ b/fs/xattr.c +@@ -25,6 +25,8 @@ + + #include <linux/uaccess.h> + ++#include "internal.h" ++ + static const char * + strcmp_prefix(const char *a, const char *a_prefix) + { +@@ -539,43 +541,76 @@ EXPORT_SYMBOL_GPL(vfs_removexattr); + /* + * Extended attribute SET operations + */ +-static long +-setxattr(struct user_namespace *mnt_userns, struct dentry *d, +- const char __user *name, const void __user *value, size_t size, +- int flags) ++ ++int setxattr_copy(const char __user *name, struct xattr_ctx *ctx) + { + int error; +- void *kvalue = NULL; +- char kname[XATTR_NAME_MAX + 1]; + +- if (flags & ~(XATTR_CREATE|XATTR_REPLACE)) ++ if (ctx->flags & ~(XATTR_CREATE|XATTR_REPLACE)) + return -EINVAL; + +- error = strncpy_from_user(kname, name, sizeof(kname)); +- if (error == 0 || error == sizeof(kname)) +- error = -ERANGE; ++ error = strncpy_from_user(ctx->kname->name, name, ++ sizeof(ctx->kname->name)); ++ if (error == 0 || error == sizeof(ctx->kname->name)) ++ return -ERANGE; + if (error < 0) + return error; + +- if (size) { +- if (size > XATTR_SIZE_MAX) ++ error = 0; ++ if (ctx->size) { ++ if (ctx->size > XATTR_SIZE_MAX) + return -E2BIG; +- kvalue = kvmalloc(size, GFP_KERNEL); +- if (!kvalue) +- return -ENOMEM; +- if (copy_from_user(kvalue, value, size)) { +- error = -EFAULT; +- goto out; ++ ++ ctx->kvalue = vmemdup_user(ctx->cvalue, ctx->size); ++ if (IS_ERR(ctx->kvalue)) { ++ error = PTR_ERR(ctx->kvalue); ++ ctx->kvalue = NULL; + } +- if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || +- (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)) +- posix_acl_fix_xattr_from_user(mnt_userns, kvalue, size); + } + +- error = vfs_setxattr(mnt_userns, d, kname, kvalue, size, flags); +-out: +- kvfree(kvalue); ++ return error; ++} ++ ++static void setxattr_convert(struct user_namespace *mnt_userns, ++ struct dentry *d, struct xattr_ctx *ctx) ++{ ++ if (ctx->size && ++ ((strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || ++ (strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))) ++ posix_acl_fix_xattr_from_user(mnt_userns, d_inode(d), ++ ctx->kvalue, ctx->size); ++} ++ ++int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry, ++ struct xattr_ctx *ctx) ++{ ++ setxattr_convert(mnt_userns, dentry, ctx); ++ return vfs_setxattr(mnt_userns, dentry, ctx->kname->name, ++ ctx->kvalue, ctx->size, ctx->flags); ++} ++ ++static long ++setxattr(struct user_namespace *mnt_userns, struct dentry *d, ++ const char __user *name, const void __user *value, size_t size, ++ int flags) ++{ ++ struct xattr_name kname; ++ struct xattr_ctx ctx = { ++ .cvalue = value, ++ .kvalue = NULL, ++ .size = size, ++ .kname = &kname, ++ .flags = flags, ++ }; ++ int error; ++ ++ error = setxattr_copy(name, &ctx); ++ if (error) ++ return error; ++ ++ error = do_setxattr(mnt_userns, d, &ctx); + ++ kvfree(ctx.kvalue); + return error; + } + +@@ -667,7 +702,8 @@ getxattr(struct user_namespace *mnt_userns, struct dentry *d, + if (error > 0) { + if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || + (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)) +- posix_acl_fix_xattr_to_user(mnt_userns, kvalue, error); ++ posix_acl_fix_xattr_to_user(mnt_userns, d_inode(d), ++ kvalue, error); + if (size && copy_to_user(value, kvalue, error)) + error = -EFAULT; + } else if (error == -ERANGE && size >= XATTR_SIZE_MAX) { +@@ -1083,7 +1119,7 @@ static int xattr_list_one(char **buffer, ssize_t *remaining_size, + ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, + char *buffer, size_t size) + { +- bool trusted = capable(CAP_SYS_ADMIN); ++ bool trusted = ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN); + struct simple_xattr *xattr; + ssize_t remaining_size = size; + int err = 0; +diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c +index 005abfd9fd347..aff6fb5281f63 100644 +--- a/fs/xfs/libxfs/xfs_ag.c ++++ b/fs/xfs/libxfs/xfs_ag.c +@@ -173,7 +173,6 @@ __xfs_free_perag( + struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head); + + ASSERT(!delayed_work_pending(&pag->pag_blockgc_work)); +- ASSERT(atomic_read(&pag->pag_ref) == 0); + kmem_free(pag); + } + +@@ -192,7 +191,7 @@ xfs_free_perag( + pag = radix_tree_delete(&mp->m_perag_tree, agno); + spin_unlock(&mp->m_perag_lock); + ASSERT(pag); +- ASSERT(atomic_read(&pag->pag_ref) == 0); ++ XFS_IS_CORRUPT(pag->pag_mount, atomic_read(&pag->pag_ref) != 0); + + cancel_delayed_work_sync(&pag->pag_blockgc_work); + xfs_iunlink_destroy(pag); +diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h +index 4c6f9045baca0..3f597cad2c333 100644 +--- a/fs/xfs/libxfs/xfs_ag.h ++++ b/fs/xfs/libxfs/xfs_ag.h +@@ -116,23 +116,29 @@ void xfs_perag_put(struct xfs_perag *pag); + + /* + * Perag iteration APIs +- * +- * XXX: for_each_perag_range() usage really needs an iterator to clean up when +- * we terminate at end_agno because we may have taken a reference to the perag +- * beyond end_agno. Right now callers have to be careful to catch and clean that +- * up themselves. This is not necessary for the callers of for_each_perag() and +- * for_each_perag_from() because they terminate at sb_agcount where there are +- * no perag structures in tree beyond end_agno. + */ +-#define for_each_perag_range(mp, next_agno, end_agno, pag) \ +- for ((pag) = xfs_perag_get((mp), (next_agno)); \ +- (pag) != NULL && (next_agno) <= (end_agno); \ +- (next_agno) = (pag)->pag_agno + 1, \ +- xfs_perag_put(pag), \ +- (pag) = xfs_perag_get((mp), (next_agno))) ++static inline struct xfs_perag * ++xfs_perag_next( ++ struct xfs_perag *pag, ++ xfs_agnumber_t *agno, ++ xfs_agnumber_t end_agno) ++{ ++ struct xfs_mount *mp = pag->pag_mount; ++ ++ *agno = pag->pag_agno + 1; ++ xfs_perag_put(pag); ++ if (*agno > end_agno) ++ return NULL; ++ return xfs_perag_get(mp, *agno); ++} ++ ++#define for_each_perag_range(mp, agno, end_agno, pag) \ ++ for ((pag) = xfs_perag_get((mp), (agno)); \ ++ (pag) != NULL; \ ++ (pag) = xfs_perag_next((pag), &(agno), (end_agno))) + +-#define for_each_perag_from(mp, next_agno, pag) \ +- for_each_perag_range((mp), (next_agno), (mp)->m_sb.sb_agcount, (pag)) ++#define for_each_perag_from(mp, agno, pag) \ ++ for_each_perag_range((mp), (agno), (mp)->m_sb.sb_agcount - 1, (pag)) + + + #define for_each_perag(mp, agno, pag) \ +diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c +index fbc9d816882ce..23523b802539e 100644 +--- a/fs/xfs/libxfs/xfs_attr.c ++++ b/fs/xfs/libxfs/xfs_attr.c +@@ -1077,21 +1077,18 @@ xfs_attr_node_hasname( + + state = xfs_da_state_alloc(args); + if (statep != NULL) +- *statep = NULL; ++ *statep = state; + + /* + * Search to see if name exists, and get back a pointer to it. + */ + error = xfs_da3_node_lookup_int(state, &retval); +- if (error) { +- xfs_da_state_free(state); +- return error; +- } ++ if (error) ++ retval = error; + +- if (statep != NULL) +- *statep = state; +- else ++ if (!statep) + xfs_da_state_free(state); ++ + return retval; + } + +@@ -1112,7 +1109,7 @@ xfs_attr_node_addname_find_attr( + */ + retval = xfs_attr_node_hasname(args, &dac->da_state); + if (retval != -ENOATTR && retval != -EEXIST) +- return retval; ++ goto error; + + if (retval == -ENOATTR && (args->attr_flags & XATTR_REPLACE)) + goto error; +@@ -1337,7 +1334,7 @@ int xfs_attr_node_removename_setup( + + error = xfs_attr_node_hasname(args, state); + if (error != -EEXIST) +- return error; ++ goto out; + error = 0; + + ASSERT((*state)->path.blk[(*state)->path.active - 1].bp != NULL); +diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c +index 2983954817135..dffe4ca584935 100644 +--- a/fs/xfs/libxfs/xfs_btree.c ++++ b/fs/xfs/libxfs/xfs_btree.c +@@ -51,6 +51,71 @@ xfs_btree_magic( + return magic; + } + ++/* ++ * These sibling pointer checks are optimised for null sibling pointers. This ++ * happens a lot, and we don't need to byte swap at runtime if the sibling ++ * pointer is NULL. ++ * ++ * These are explicitly marked at inline because the cost of calling them as ++ * functions instead of inlining them is about 36 bytes extra code per call site ++ * on x86-64. Yes, gcc-11 fails to inline them, and explicit inlining of these ++ * two sibling check functions reduces the compiled code size by over 300 ++ * bytes. ++ */ ++static inline xfs_failaddr_t ++xfs_btree_check_lblock_siblings( ++ struct xfs_mount *mp, ++ struct xfs_btree_cur *cur, ++ int level, ++ xfs_fsblock_t fsb, ++ __be64 dsibling) ++{ ++ xfs_fsblock_t sibling; ++ ++ if (dsibling == cpu_to_be64(NULLFSBLOCK)) ++ return NULL; ++ ++ sibling = be64_to_cpu(dsibling); ++ if (sibling == fsb) ++ return __this_address; ++ if (level >= 0) { ++ if (!xfs_btree_check_lptr(cur, sibling, level + 1)) ++ return __this_address; ++ } else { ++ if (!xfs_verify_fsbno(mp, sibling)) ++ return __this_address; ++ } ++ ++ return NULL; ++} ++ ++static inline xfs_failaddr_t ++xfs_btree_check_sblock_siblings( ++ struct xfs_mount *mp, ++ struct xfs_btree_cur *cur, ++ int level, ++ xfs_agnumber_t agno, ++ xfs_agblock_t agbno, ++ __be32 dsibling) ++{ ++ xfs_agblock_t sibling; ++ ++ if (dsibling == cpu_to_be32(NULLAGBLOCK)) ++ return NULL; ++ ++ sibling = be32_to_cpu(dsibling); ++ if (sibling == agbno) ++ return __this_address; ++ if (level >= 0) { ++ if (!xfs_btree_check_sptr(cur, sibling, level + 1)) ++ return __this_address; ++ } else { ++ if (!xfs_verify_agbno(mp, agno, sibling)) ++ return __this_address; ++ } ++ return NULL; ++} ++ + /* + * Check a long btree block header. Return the address of the failing check, + * or NULL if everything is ok. +@@ -65,6 +130,8 @@ __xfs_btree_check_lblock( + struct xfs_mount *mp = cur->bc_mp; + xfs_btnum_t btnum = cur->bc_btnum; + int crc = xfs_has_crc(mp); ++ xfs_failaddr_t fa; ++ xfs_fsblock_t fsb = NULLFSBLOCK; + + if (crc) { + if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) +@@ -83,16 +150,16 @@ __xfs_btree_check_lblock( + if (be16_to_cpu(block->bb_numrecs) > + cur->bc_ops->get_maxrecs(cur, level)) + return __this_address; +- if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) && +- !xfs_btree_check_lptr(cur, be64_to_cpu(block->bb_u.l.bb_leftsib), +- level + 1)) +- return __this_address; +- if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) && +- !xfs_btree_check_lptr(cur, be64_to_cpu(block->bb_u.l.bb_rightsib), +- level + 1)) +- return __this_address; + +- return NULL; ++ if (bp) ++ fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); ++ ++ fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb, ++ block->bb_u.l.bb_leftsib); ++ if (!fa) ++ fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb, ++ block->bb_u.l.bb_rightsib); ++ return fa; + } + + /* Check a long btree block header. */ +@@ -130,6 +197,9 @@ __xfs_btree_check_sblock( + struct xfs_mount *mp = cur->bc_mp; + xfs_btnum_t btnum = cur->bc_btnum; + int crc = xfs_has_crc(mp); ++ xfs_failaddr_t fa; ++ xfs_agblock_t agbno = NULLAGBLOCK; ++ xfs_agnumber_t agno = NULLAGNUMBER; + + if (crc) { + if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) +@@ -146,16 +216,18 @@ __xfs_btree_check_sblock( + if (be16_to_cpu(block->bb_numrecs) > + cur->bc_ops->get_maxrecs(cur, level)) + return __this_address; +- if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) && +- !xfs_btree_check_sptr(cur, be32_to_cpu(block->bb_u.s.bb_leftsib), +- level + 1)) +- return __this_address; +- if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) && +- !xfs_btree_check_sptr(cur, be32_to_cpu(block->bb_u.s.bb_rightsib), +- level + 1)) +- return __this_address; + +- return NULL; ++ if (bp) { ++ agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp)); ++ agno = xfs_daddr_to_agno(mp, xfs_buf_daddr(bp)); ++ } ++ ++ fa = xfs_btree_check_sblock_siblings(mp, cur, level, agno, agbno, ++ block->bb_u.s.bb_leftsib); ++ if (!fa) ++ fa = xfs_btree_check_sblock_siblings(mp, cur, level, agno, ++ agbno, block->bb_u.s.bb_rightsib); ++ return fa; + } + + /* Check a short btree block header. */ +@@ -373,8 +445,14 @@ xfs_btree_del_cursor( + break; + } + ++ /* ++ * If we are doing a BMBT update, the number of unaccounted blocks ++ * allocated during this cursor life time should be zero. If it's not ++ * zero, then we should be shut down or on our way to shutdown due to ++ * cancelling a dirty transaction on error. ++ */ + ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || cur->bc_ino.allocated == 0 || +- xfs_is_shutdown(cur->bc_mp)); ++ xfs_is_shutdown(cur->bc_mp) || error != 0); + if (unlikely(cur->bc_flags & XFS_BTREE_STAGING)) + kmem_free(cur->bc_ops); + if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS) && cur->bc_ag.pag) +@@ -3188,7 +3266,7 @@ xfs_btree_insrec( + struct xfs_btree_block *block; /* btree block */ + struct xfs_buf *bp; /* buffer for block */ + union xfs_btree_ptr nptr; /* new block ptr */ +- struct xfs_btree_cur *ncur; /* new btree cursor */ ++ struct xfs_btree_cur *ncur = NULL; /* new btree cursor */ + union xfs_btree_key nkey; /* new block key */ + union xfs_btree_key *lkey; + int optr; /* old key/record index */ +@@ -3268,7 +3346,7 @@ xfs_btree_insrec( + #ifdef DEBUG + error = xfs_btree_check_block(cur, block, level, bp); + if (error) +- return error; ++ goto error0; + #endif + + /* +@@ -3288,7 +3366,7 @@ xfs_btree_insrec( + for (i = numrecs - ptr; i >= 0; i--) { + error = xfs_btree_debug_check_ptr(cur, pp, i, level); + if (error) +- return error; ++ goto error0; + } + + xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1); +@@ -3373,6 +3451,8 @@ xfs_btree_insrec( + return 0; + + error0: ++ if (ncur) ++ xfs_btree_del_cursor(ncur, error); + return error; + } + +@@ -4265,6 +4345,21 @@ xfs_btree_visit_block( + if (xfs_btree_ptr_is_null(cur, &rptr)) + return -ENOENT; + ++ /* ++ * We only visit blocks once in this walk, so we have to avoid the ++ * internal xfs_btree_lookup_get_block() optimisation where it will ++ * return the same block without checking if the right sibling points ++ * back to us and creates a cyclic reference in the btree. ++ */ ++ if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { ++ if (be64_to_cpu(rptr.l) == XFS_DADDR_TO_FSB(cur->bc_mp, ++ xfs_buf_daddr(bp))) ++ return -EFSCORRUPTED; ++ } else { ++ if (be32_to_cpu(rptr.s) == xfs_daddr_to_agbno(cur->bc_mp, ++ xfs_buf_daddr(bp))) ++ return -EFSCORRUPTED; ++ } + return xfs_btree_lookup_get_block(cur, level, &rptr, &block); + } + +@@ -4439,20 +4534,21 @@ xfs_btree_lblock_verify( + { + struct xfs_mount *mp = bp->b_mount; + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); ++ xfs_fsblock_t fsb; ++ xfs_failaddr_t fa; + + /* numrecs verification */ + if (be16_to_cpu(block->bb_numrecs) > max_recs) + return __this_address; + + /* sibling pointer verification */ +- if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) && +- !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))) +- return __this_address; +- if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) && +- !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))) +- return __this_address; +- +- return NULL; ++ fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); ++ fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb, ++ block->bb_u.l.bb_leftsib); ++ if (!fa) ++ fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb, ++ block->bb_u.l.bb_rightsib); ++ return fa; + } + + /** +@@ -4493,7 +4589,9 @@ xfs_btree_sblock_verify( + { + struct xfs_mount *mp = bp->b_mount; + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); +- xfs_agblock_t agno; ++ xfs_agnumber_t agno; ++ xfs_agblock_t agbno; ++ xfs_failaddr_t fa; + + /* numrecs verification */ + if (be16_to_cpu(block->bb_numrecs) > max_recs) +@@ -4501,14 +4599,13 @@ xfs_btree_sblock_verify( + + /* sibling pointer verification */ + agno = xfs_daddr_to_agno(mp, xfs_buf_daddr(bp)); +- if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) && +- !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_leftsib))) +- return __this_address; +- if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) && +- !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_rightsib))) +- return __this_address; +- +- return NULL; ++ agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp)); ++ fa = xfs_btree_check_sblock_siblings(mp, NULL, -1, agno, agbno, ++ block->bb_u.s.bb_leftsib); ++ if (!fa) ++ fa = xfs_btree_check_sblock_siblings(mp, NULL, -1, agno, agbno, ++ block->bb_u.s.bb_rightsib); ++ return fa; + } + + /* +diff --git a/fs/xfs/libxfs/xfs_btree_staging.c b/fs/xfs/libxfs/xfs_btree_staging.c +index ac9e80152b5cf..89c8a1498df1d 100644 +--- a/fs/xfs/libxfs/xfs_btree_staging.c ++++ b/fs/xfs/libxfs/xfs_btree_staging.c +@@ -662,7 +662,7 @@ xfs_btree_bload_compute_geometry( + xfs_btree_bload_ensure_slack(cur, &bbl->node_slack, 1); + + bbl->nr_records = nr_this_level = nr_records; +- for (cur->bc_nlevels = 1; cur->bc_nlevels < XFS_BTREE_MAXLEVELS;) { ++ for (cur->bc_nlevels = 1; cur->bc_nlevels <= XFS_BTREE_MAXLEVELS;) { + uint64_t level_blocks; + uint64_t dontcare64; + unsigned int level = cur->bc_nlevels - 1; +@@ -724,7 +724,7 @@ xfs_btree_bload_compute_geometry( + nr_this_level = level_blocks; + } + +- if (cur->bc_nlevels == XFS_BTREE_MAXLEVELS) ++ if (cur->bc_nlevels > XFS_BTREE_MAXLEVELS) + return -EOVERFLOW; + + bbl->btree_height = cur->bc_nlevels; +diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c +index 3932b4ebf9037..f84d3fbb9d3da 100644 +--- a/fs/xfs/libxfs/xfs_inode_buf.c ++++ b/fs/xfs/libxfs/xfs_inode_buf.c +@@ -337,19 +337,36 @@ xfs_dinode_verify_fork( + int whichfork) + { + uint32_t di_nextents = XFS_DFORK_NEXTENTS(dip, whichfork); ++ mode_t mode = be16_to_cpu(dip->di_mode); ++ uint32_t fork_size = XFS_DFORK_SIZE(dip, mp, whichfork); ++ uint32_t fork_format = XFS_DFORK_FORMAT(dip, whichfork); + +- switch (XFS_DFORK_FORMAT(dip, whichfork)) { ++ /* ++ * For fork types that can contain local data, check that the fork ++ * format matches the size of local data contained within the fork. ++ * ++ * For all types, check that when the size says the should be in extent ++ * or btree format, the inode isn't claiming it is in local format. ++ */ ++ if (whichfork == XFS_DATA_FORK) { ++ if (S_ISDIR(mode) || S_ISLNK(mode)) { ++ if (be64_to_cpu(dip->di_size) <= fork_size && ++ fork_format != XFS_DINODE_FMT_LOCAL) ++ return __this_address; ++ } ++ ++ if (be64_to_cpu(dip->di_size) > fork_size && ++ fork_format == XFS_DINODE_FMT_LOCAL) ++ return __this_address; ++ } ++ ++ switch (fork_format) { + case XFS_DINODE_FMT_LOCAL: + /* +- * no local regular files yet ++ * No local regular files yet. + */ +- if (whichfork == XFS_DATA_FORK) { +- if (S_ISREG(be16_to_cpu(dip->di_mode))) +- return __this_address; +- if (be64_to_cpu(dip->di_size) > +- XFS_DFORK_SIZE(dip, mp, whichfork)) +- return __this_address; +- } ++ if (S_ISREG(mode) && whichfork == XFS_DATA_FORK) ++ return __this_address; + if (di_nextents) + return __this_address; + break; +diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c +index 1d174909f9bdf..20095233d7bc0 100644 +--- a/fs/xfs/libxfs/xfs_inode_fork.c ++++ b/fs/xfs/libxfs/xfs_inode_fork.c +@@ -50,8 +50,13 @@ xfs_init_local_fork( + mem_size++; + + if (size) { ++ /* ++ * As we round up the allocation here, we need to ensure the ++ * bytes we don't copy data into are zeroed because the log ++ * vectors still copy them into the journal. ++ */ + real_size = roundup(mem_size, 4); +- ifp->if_u1.if_data = kmem_alloc(real_size, KM_NOFS); ++ ifp->if_u1.if_data = kmem_zalloc(real_size, KM_NOFS); + memcpy(ifp->if_u1.if_data, data, size); + if (zero_terminate) + ifp->if_u1.if_data[size] = '\0'; +@@ -500,10 +505,11 @@ xfs_idata_realloc( + /* + * For inline data, the underlying buffer must be a multiple of 4 bytes + * in size so that it can be logged and stay on word boundaries. +- * We enforce that here. ++ * We enforce that here, and use __GFP_ZERO to ensure that size ++ * extensions always zero the unused roundup area. + */ + ifp->if_u1.if_data = krealloc(ifp->if_u1.if_data, roundup(new_size, 4), +- GFP_NOFS | __GFP_NOFAIL); ++ GFP_NOFS | __GFP_NOFAIL | __GFP_ZERO); + ifp->if_bytes = new_size; + } + +diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c +index e58349be78bd5..04e2a57313fa0 100644 +--- a/fs/xfs/libxfs/xfs_sb.c ++++ b/fs/xfs/libxfs/xfs_sb.c +@@ -30,6 +30,47 @@ + * Physical superblock buffer manipulations. Shared with libxfs in userspace. + */ + ++/* ++ * Check that all the V4 feature bits that the V5 filesystem format requires are ++ * correctly set. ++ */ ++static bool ++xfs_sb_validate_v5_features( ++ struct xfs_sb *sbp) ++{ ++ /* We must not have any unknown V4 feature bits set */ ++ if (sbp->sb_versionnum & ~XFS_SB_VERSION_OKBITS) ++ return false; ++ ++ /* ++ * The CRC bit is considered an invalid V4 flag, so we have to add it ++ * manually to the OKBITS mask. ++ */ ++ if (sbp->sb_features2 & ~(XFS_SB_VERSION2_OKBITS | ++ XFS_SB_VERSION2_CRCBIT)) ++ return false; ++ ++ /* Now check all the required V4 feature flags are set. */ ++ ++#define V5_VERS_FLAGS (XFS_SB_VERSION_NLINKBIT | \ ++ XFS_SB_VERSION_ALIGNBIT | \ ++ XFS_SB_VERSION_LOGV2BIT | \ ++ XFS_SB_VERSION_EXTFLGBIT | \ ++ XFS_SB_VERSION_DIRV2BIT | \ ++ XFS_SB_VERSION_MOREBITSBIT) ++ ++#define V5_FEAT_FLAGS (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \ ++ XFS_SB_VERSION2_ATTR2BIT | \ ++ XFS_SB_VERSION2_PROJID32BIT | \ ++ XFS_SB_VERSION2_CRCBIT) ++ ++ if ((sbp->sb_versionnum & V5_VERS_FLAGS) != V5_VERS_FLAGS) ++ return false; ++ if ((sbp->sb_features2 & V5_FEAT_FLAGS) != V5_FEAT_FLAGS) ++ return false; ++ return true; ++} ++ + /* + * We support all XFS versions newer than a v4 superblock with V2 directories. + */ +@@ -37,9 +78,19 @@ bool + xfs_sb_good_version( + struct xfs_sb *sbp) + { +- /* all v5 filesystems are supported */ ++ /* ++ * All v5 filesystems are supported, but we must check that all the ++ * required v4 feature flags are enabled correctly as the code checks ++ * those flags and not for v5 support. ++ */ + if (xfs_sb_is_v5(sbp)) +- return true; ++ return xfs_sb_validate_v5_features(sbp); ++ ++ /* We must not have any unknown v4 feature bits set */ ++ if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKBITS) || ++ ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) && ++ (sbp->sb_features2 & ~XFS_SB_VERSION2_OKBITS))) ++ return false; + + /* versions prior to v4 are not supported */ + if (XFS_SB_VERSION_NUM(sbp) < XFS_SB_VERSION_4) +@@ -51,12 +102,6 @@ xfs_sb_good_version( + if (!(sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT)) + return false; + +- /* And must not have any unknown v4 feature bits set */ +- if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKBITS) || +- ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) && +- (sbp->sb_features2 & ~XFS_SB_VERSION2_OKBITS))) +- return false; +- + /* It's a supported v4 filesystem */ + return true; + } +@@ -70,6 +115,8 @@ xfs_sb_version_to_features( + /* optional V4 features */ + if (sbp->sb_rblocks > 0) + features |= XFS_FEAT_REALTIME; ++ if (sbp->sb_versionnum & XFS_SB_VERSION_NLINKBIT) ++ features |= XFS_FEAT_NLINK; + if (sbp->sb_versionnum & XFS_SB_VERSION_ATTRBIT) + features |= XFS_FEAT_ATTR; + if (sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT) +@@ -262,12 +309,15 @@ xfs_validate_sb_common( + bool has_dalign; + + if (!xfs_verify_magic(bp, dsb->sb_magicnum)) { +- xfs_warn(mp, "bad magic number"); ++ xfs_warn(mp, ++"Superblock has bad magic number 0x%x. Not an XFS filesystem?", ++ be32_to_cpu(dsb->sb_magicnum)); + return -EWRONGFS; + } + + if (!xfs_sb_good_version(sbp)) { +- xfs_warn(mp, "bad version"); ++ xfs_warn(mp, ++"Superblock has unknown features enabled or corrupted feature masks."); + return -EWRONGFS; + } + +diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c +index 34fc6148032a3..c8c15c3c31471 100644 +--- a/fs/xfs/xfs_aops.c ++++ b/fs/xfs/xfs_aops.c +@@ -82,6 +82,7 @@ xfs_end_ioend( + struct iomap_ioend *ioend) + { + struct xfs_inode *ip = XFS_I(ioend->io_inode); ++ struct xfs_mount *mp = ip->i_mount; + xfs_off_t offset = ioend->io_offset; + size_t size = ioend->io_size; + unsigned int nofs_flag; +@@ -97,18 +98,26 @@ xfs_end_ioend( + /* + * Just clean up the in-memory structures if the fs has been shut down. + */ +- if (xfs_is_shutdown(ip->i_mount)) { ++ if (xfs_is_shutdown(mp)) { + error = -EIO; + goto done; + } + + /* +- * Clean up any COW blocks on an I/O error. ++ * Clean up all COW blocks and underlying data fork delalloc blocks on ++ * I/O error. The delalloc punch is required because this ioend was ++ * mapped to blocks in the COW fork and the associated pages are no ++ * longer dirty. If we don't remove delalloc blocks here, they become ++ * stale and can corrupt free space accounting on unmount. + */ + error = blk_status_to_errno(ioend->io_bio->bi_status); + if (unlikely(error)) { +- if (ioend->io_flags & IOMAP_F_SHARED) ++ if (ioend->io_flags & IOMAP_F_SHARED) { + xfs_reflink_cancel_cow_range(ip, offset, size, true); ++ xfs_bmap_punch_delalloc_range(ip, ++ XFS_B_TO_FSBT(mp, offset), ++ XFS_B_TO_FSB(mp, size)); ++ } + goto done; + } + +diff --git a/fs/xfs/xfs_bio_io.c b/fs/xfs/xfs_bio_io.c +index 667e297f59b16..17f36db2f7928 100644 +--- a/fs/xfs/xfs_bio_io.c ++++ b/fs/xfs/xfs_bio_io.c +@@ -9,41 +9,6 @@ static inline unsigned int bio_max_vecs(unsigned int count) + return bio_max_segs(howmany(count, PAGE_SIZE)); + } + +-static void +-xfs_flush_bdev_async_endio( +- struct bio *bio) +-{ +- complete(bio->bi_private); +-} +- +-/* +- * Submit a request for an async cache flush to run. If the request queue does +- * not require flush operations, just skip it altogether. If the caller needs +- * to wait for the flush completion at a later point in time, they must supply a +- * valid completion. This will be signalled when the flush completes. The +- * caller never sees the bio that is issued here. +- */ +-void +-xfs_flush_bdev_async( +- struct bio *bio, +- struct block_device *bdev, +- struct completion *done) +-{ +- struct request_queue *q = bdev->bd_disk->queue; +- +- if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) { +- complete(done); +- return; +- } +- +- bio_init(bio, NULL, 0); +- bio_set_dev(bio, bdev); +- bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC; +- bio->bi_private = done; +- bio->bi_end_io = xfs_flush_bdev_async_endio; +- +- submit_bio(bio); +-} + int + xfs_rw_bdev( + struct block_device *bdev, +diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c +index 03159970133ff..51ffdec5e4faa 100644 +--- a/fs/xfs/xfs_bmap_item.c ++++ b/fs/xfs/xfs_bmap_item.c +@@ -39,6 +39,7 @@ STATIC void + xfs_bui_item_free( + struct xfs_bui_log_item *buip) + { ++ kmem_free(buip->bui_item.li_lv_shadow); + kmem_cache_free(xfs_bui_zone, buip); + } + +@@ -198,6 +199,7 @@ xfs_bud_item_release( + struct xfs_bud_log_item *budp = BUD_ITEM(lip); + + xfs_bui_release(budp->bud_buip); ++ kmem_free(budp->bud_item.li_lv_shadow); + kmem_cache_free(xfs_bud_zone, budp); + } + +diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c +index a476c7ef5d533..991fbf1eb5640 100644 +--- a/fs/xfs/xfs_buf_item_recover.c ++++ b/fs/xfs/xfs_buf_item_recover.c +@@ -816,7 +816,7 @@ xlog_recover_get_buf_lsn( + } + + if (lsn != (xfs_lsn_t)-1) { +- if (!uuid_equal(&mp->m_sb.sb_uuid, uuid)) ++ if (!uuid_equal(&mp->m_sb.sb_meta_uuid, uuid)) + goto recover_immediately; + return lsn; + } +diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c +index 3f8a0713573ad..a4b8caa2c601d 100644 +--- a/fs/xfs/xfs_extfree_item.c ++++ b/fs/xfs/xfs_extfree_item.c +@@ -482,7 +482,7 @@ xfs_extent_free_finish_item( + free->xefi_startblock, + free->xefi_blockcount, + &free->xefi_oinfo, free->xefi_skip_discard); +- kmem_free(free); ++ kmem_cache_free(xfs_bmap_free_item_zone, free); + return error; + } + +@@ -502,7 +502,7 @@ xfs_extent_free_cancel_item( + struct xfs_extent_free_item *free; + + free = container_of(item, struct xfs_extent_free_item, xefi_list); +- kmem_free(free); ++ kmem_cache_free(xfs_bmap_free_item_zone, free); + } + + const struct xfs_defer_op_type xfs_extent_free_defer_type = { +@@ -564,7 +564,7 @@ xfs_agfl_free_finish_item( + extp->ext_len = free->xefi_blockcount; + efdp->efd_next_extent++; + +- kmem_free(free); ++ kmem_cache_free(xfs_bmap_free_item_zone, free); + return error; + } + +diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c +index 7aa943edfc02f..240eb932c014b 100644 +--- a/fs/xfs/xfs_file.c ++++ b/fs/xfs/xfs_file.c +@@ -259,7 +259,7 @@ xfs_file_dio_read( + ret = xfs_ilock_iocb(iocb, XFS_IOLOCK_SHARED); + if (ret) + return ret; +- ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL, 0); ++ ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL, 0, 0); + xfs_iunlock(ip, XFS_IOLOCK_SHARED); + + return ret; +@@ -569,7 +569,7 @@ xfs_file_dio_write_aligned( + } + trace_xfs_file_direct_write(iocb, from); + ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops, +- &xfs_dio_write_ops, 0); ++ &xfs_dio_write_ops, 0, 0); + out_unlock: + if (iolock) + xfs_iunlock(ip, iolock); +@@ -647,7 +647,7 @@ retry_exclusive: + + trace_xfs_file_direct_write(iocb, from); + ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops, +- &xfs_dio_write_ops, flags); ++ &xfs_dio_write_ops, flags, 0); + + /* + * Retry unaligned I/O with exclusive blocking semantics if the DIO +diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c +index 6a3ce0f6dc9e9..be9bcf8a1f991 100644 +--- a/fs/xfs/xfs_filestream.c ++++ b/fs/xfs/xfs_filestream.c +@@ -128,11 +128,12 @@ xfs_filestream_pick_ag( + if (!pag->pagf_init) { + err = xfs_alloc_pagf_init(mp, NULL, ag, trylock); + if (err) { +- xfs_perag_put(pag); +- if (err != -EAGAIN) ++ if (err != -EAGAIN) { ++ xfs_perag_put(pag); + return err; ++ } + /* Couldn't lock the AGF, skip this AG. */ +- continue; ++ goto next_ag; + } + } + +diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c +index 33e26690a8c4f..5b5b68affe66d 100644 +--- a/fs/xfs/xfs_fsops.c ++++ b/fs/xfs/xfs_fsops.c +@@ -430,46 +430,36 @@ xfs_reserve_blocks( + * If the request is larger than the current reservation, reserve the + * blocks before we update the reserve counters. Sample m_fdblocks and + * perform a partial reservation if the request exceeds free space. ++ * ++ * The code below estimates how many blocks it can request from ++ * fdblocks to stash in the reserve pool. This is a classic TOCTOU ++ * race since fdblocks updates are not always coordinated via ++ * m_sb_lock. Set the reserve size even if there's not enough free ++ * space to fill it because mod_fdblocks will refill an undersized ++ * reserve when it can. + */ +- error = -ENOSPC; +- do { +- free = percpu_counter_sum(&mp->m_fdblocks) - +- mp->m_alloc_set_aside; +- if (free <= 0) +- break; +- +- delta = request - mp->m_resblks; +- lcounter = free - delta; +- if (lcounter < 0) +- /* We can't satisfy the request, just get what we can */ +- fdblks_delta = free; +- else +- fdblks_delta = delta; +- ++ free = percpu_counter_sum(&mp->m_fdblocks) - ++ xfs_fdblocks_unavailable(mp); ++ delta = request - mp->m_resblks; ++ mp->m_resblks = request; ++ if (delta > 0 && free > 0) { + /* + * We'll either succeed in getting space from the free block +- * count or we'll get an ENOSPC. If we get a ENOSPC, it means +- * things changed while we were calculating fdblks_delta and so +- * we should try again to see if there is anything left to +- * reserve. ++ * count or we'll get an ENOSPC. Don't set the reserved flag ++ * here - we don't want to reserve the extra reserve blocks ++ * from the reserve. + * +- * Don't set the reserved flag here - we don't want to reserve +- * the extra reserve blocks from the reserve..... ++ * The desired reserve size can change after we drop the lock. ++ * Use mod_fdblocks to put the space into the reserve or into ++ * fdblocks as appropriate. + */ ++ fdblks_delta = min(free, delta); + spin_unlock(&mp->m_sb_lock); + error = xfs_mod_fdblocks(mp, -fdblks_delta, 0); ++ if (!error) ++ xfs_mod_fdblocks(mp, fdblks_delta, 0); + spin_lock(&mp->m_sb_lock); +- } while (error == -ENOSPC); +- +- /* +- * Update the reserve counters if blocks have been successfully +- * allocated. +- */ +- if (!error && fdblks_delta) { +- mp->m_resblks += fdblks_delta; +- mp->m_resblks_avail += fdblks_delta; + } +- + out: + if (outval) { + outval->resblks = mp->m_resblks; +diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c +index f2210d927481b..5e44d7bbd8fca 100644 +--- a/fs/xfs/xfs_icache.c ++++ b/fs/xfs/xfs_icache.c +@@ -1872,28 +1872,20 @@ xfs_inodegc_worker( + } + + /* +- * Force all currently queued inode inactivation work to run immediately, and +- * wait for the work to finish. Two pass - queue all the work first pass, wait +- * for it in a second pass. ++ * Force all currently queued inode inactivation work to run immediately and ++ * wait for the work to finish. + */ + void + xfs_inodegc_flush( + struct xfs_mount *mp) + { +- struct xfs_inodegc *gc; +- int cpu; +- + if (!xfs_is_inodegc_enabled(mp)) + return; + + trace_xfs_inodegc_flush(mp, __return_address); + + xfs_inodegc_queue_all(mp); +- +- for_each_online_cpu(cpu) { +- gc = per_cpu_ptr(mp->m_inodegc, cpu); +- flush_work(&gc->work); +- } ++ flush_workqueue(mp->m_inodegc_wq); + } + + /* +@@ -1904,18 +1896,12 @@ void + xfs_inodegc_stop( + struct xfs_mount *mp) + { +- struct xfs_inodegc *gc; +- int cpu; +- + if (!xfs_clear_inodegc_enabled(mp)) + return; + + xfs_inodegc_queue_all(mp); ++ drain_workqueue(mp->m_inodegc_wq); + +- for_each_online_cpu(cpu) { +- gc = per_cpu_ptr(mp->m_inodegc, cpu); +- cancel_work_sync(&gc->work); +- } + trace_xfs_inodegc_stop(mp, __return_address); + } + +diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c +index 017904a34c023..c265ae20946d5 100644 +--- a/fs/xfs/xfs_icreate_item.c ++++ b/fs/xfs/xfs_icreate_item.c +@@ -63,6 +63,7 @@ STATIC void + xfs_icreate_item_release( + struct xfs_log_item *lip) + { ++ kmem_free(ICR_ITEM(lip)->ic_item.li_lv_shadow); + kmem_cache_free(xfs_icreate_zone, ICR_ITEM(lip)); + } + +diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c +index a4f6f034fb813..b2ea853182141 100644 +--- a/fs/xfs/xfs_inode.c ++++ b/fs/xfs/xfs_inode.c +@@ -994,8 +994,8 @@ xfs_create( + /* + * Make sure that we have allocated dquot(s) on disk. + */ +- error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns), +- mapped_fsgid(mnt_userns), prid, ++ error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns), ++ mapped_fsgid(mnt_userns, &init_user_ns), prid, + XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, + &udqp, &gdqp, &pdqp); + if (error) +@@ -1148,8 +1148,8 @@ xfs_create_tmpfile( + /* + * Make sure that we have allocated dquot(s) on disk. + */ +- error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns), +- mapped_fsgid(mnt_userns), prid, ++ error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns), ++ mapped_fsgid(mnt_userns, &init_user_ns), prid, + XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, + &udqp, &gdqp, &pdqp); + if (error) +@@ -1223,7 +1223,7 @@ xfs_link( + { + xfs_mount_t *mp = tdp->i_mount; + xfs_trans_t *tp; +- int error; ++ int error, nospace_error = 0; + int resblks; + + trace_xfs_link(tdp, target_name); +@@ -1242,19 +1242,11 @@ xfs_link( + goto std_return; + + resblks = XFS_LINK_SPACE_RES(mp, target_name->len); +- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, resblks, 0, 0, &tp); +- if (error == -ENOSPC) { +- resblks = 0; +- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, 0, 0, 0, &tp); +- } ++ error = xfs_trans_alloc_dir(tdp, &M_RES(mp)->tr_link, sip, &resblks, ++ &tp, &nospace_error); + if (error) + goto std_return; + +- xfs_lock_two_inodes(sip, XFS_ILOCK_EXCL, tdp, XFS_ILOCK_EXCL); +- +- xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); +- xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); +- + error = xfs_iext_count_may_overflow(tdp, XFS_DATA_FORK, + XFS_IEXT_DIR_MANIP_CNT(mp)); + if (error) +@@ -1312,6 +1304,8 @@ xfs_link( + error_return: + xfs_trans_cancel(tp); + std_return: ++ if (error == -ENOSPC && nospace_error) ++ error = nospace_error; + return error; + } + +@@ -2605,14 +2599,13 @@ xfs_ifree_cluster( + } + + /* +- * This is called to return an inode to the inode free list. +- * The inode should already be truncated to 0 length and have +- * no pages associated with it. This routine also assumes that +- * the inode is already a part of the transaction. ++ * This is called to return an inode to the inode free list. The inode should ++ * already be truncated to 0 length and have no pages associated with it. This ++ * routine also assumes that the inode is already a part of the transaction. + * +- * The on-disk copy of the inode will have been added to the list +- * of unlinked inodes in the AGI. We need to remove the inode from +- * that list atomically with respect to freeing it here. ++ * The on-disk copy of the inode will have been added to the list of unlinked ++ * inodes in the AGI. We need to remove the inode from that list atomically with ++ * respect to freeing it here. + */ + int + xfs_ifree( +@@ -2634,13 +2627,16 @@ xfs_ifree( + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); + + /* +- * Pull the on-disk inode from the AGI unlinked list. ++ * Free the inode first so that we guarantee that the AGI lock is going ++ * to be taken before we remove the inode from the unlinked list. This ++ * makes the AGI lock -> unlinked list modification order the same as ++ * used in O_TMPFILE creation. + */ +- error = xfs_iunlink_remove(tp, pag, ip); ++ error = xfs_difree(tp, pag, ip->i_ino, &xic); + if (error) + goto out; + +- error = xfs_difree(tp, pag, ip->i_ino, &xic); ++ error = xfs_iunlink_remove(tp, pag, ip); + if (error) + goto out; + +@@ -2761,6 +2757,7 @@ xfs_remove( + xfs_mount_t *mp = dp->i_mount; + xfs_trans_t *tp = NULL; + int is_dir = S_ISDIR(VFS_I(ip)->i_mode); ++ int dontcare; + int error = 0; + uint resblks; + +@@ -2778,31 +2775,24 @@ xfs_remove( + goto std_return; + + /* +- * We try to get the real space reservation first, +- * allowing for directory btree deletion(s) implying +- * possible bmap insert(s). If we can't get the space +- * reservation then we use 0 instead, and avoid the bmap +- * btree insert(s) in the directory code by, if the bmap +- * insert tries to happen, instead trimming the LAST +- * block from the directory. ++ * We try to get the real space reservation first, allowing for ++ * directory btree deletion(s) implying possible bmap insert(s). If we ++ * can't get the space reservation then we use 0 instead, and avoid the ++ * bmap btree insert(s) in the directory code by, if the bmap insert ++ * tries to happen, instead trimming the LAST block from the directory. ++ * ++ * Ignore EDQUOT and ENOSPC being returned via nospace_error because ++ * the directory code can handle a reservationless update and we don't ++ * want to prevent a user from trying to free space by deleting things. + */ + resblks = XFS_REMOVE_SPACE_RES(mp); +- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_remove, resblks, 0, 0, &tp); +- if (error == -ENOSPC) { +- resblks = 0; +- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_remove, 0, 0, 0, +- &tp); +- } ++ error = xfs_trans_alloc_dir(dp, &M_RES(mp)->tr_remove, ip, &resblks, ++ &tp, &dontcare); + if (error) { + ASSERT(error != -ENOSPC); + goto std_return; + } + +- xfs_lock_two_inodes(dp, XFS_ILOCK_EXCL, ip, XFS_ILOCK_EXCL); +- +- xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); +- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); +- + /* + * If we're removing a directory perform some additional validation. + */ +@@ -3115,7 +3105,8 @@ xfs_rename( + bool new_parent = (src_dp != target_dp); + bool src_is_directory = S_ISDIR(VFS_I(src_ip)->i_mode); + int spaceres; +- int error; ++ bool retried = false; ++ int error, nospace_error = 0; + + trace_xfs_rename(src_dp, target_dp, src_name, target_name); + +@@ -3128,7 +3119,6 @@ xfs_rename( + * appropriately. + */ + if (flags & RENAME_WHITEOUT) { +- ASSERT(!(flags & (RENAME_NOREPLACE | RENAME_EXCHANGE))); + error = xfs_rename_alloc_whiteout(mnt_userns, target_dp, &wip); + if (error) + return error; +@@ -3140,9 +3130,12 @@ xfs_rename( + xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip, + inodes, &num_inodes); + ++retry: ++ nospace_error = 0; + spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len); + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, spaceres, 0, 0, &tp); + if (error == -ENOSPC) { ++ nospace_error = error; + spaceres = 0; + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, 0, 0, 0, + &tp); +@@ -3196,6 +3189,31 @@ xfs_rename( + target_dp, target_name, target_ip, + spaceres); + ++ /* ++ * Try to reserve quota to handle an expansion of the target directory. ++ * We'll allow the rename to continue in reservationless mode if we hit ++ * a space usage constraint. If we trigger reservationless mode, save ++ * the errno if there isn't any free space in the target directory. ++ */ ++ if (spaceres != 0) { ++ error = xfs_trans_reserve_quota_nblks(tp, target_dp, spaceres, ++ 0, false); ++ if (error == -EDQUOT || error == -ENOSPC) { ++ if (!retried) { ++ xfs_trans_cancel(tp); ++ xfs_blockgc_free_quota(target_dp, 0); ++ retried = true; ++ goto retry; ++ } ++ ++ nospace_error = error; ++ spaceres = 0; ++ error = 0; ++ } ++ if (error) ++ goto out_trans_cancel; ++ } ++ + /* + * Check for expected errors before we dirty the transaction + * so we can return an error without a transaction abort. +@@ -3442,6 +3460,8 @@ out_trans_cancel: + out_release_wip: + if (wip) + xfs_irele(wip); ++ if (error == -ENOSPC && nospace_error) ++ error = nospace_error; + return error; + } + +diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c +index 0c795dc093efa..bcc3c18c8080b 100644 +--- a/fs/xfs/xfs_ioctl.c ++++ b/fs/xfs/xfs_ioctl.c +@@ -372,7 +372,7 @@ int + xfs_ioc_attr_list( + struct xfs_inode *dp, + void __user *ubuf, +- int bufsize, ++ size_t bufsize, + int flags, + struct xfs_attrlist_cursor __user *ucursor) + { +@@ -687,7 +687,8 @@ xfs_ioc_space( + + if (bf->l_start > XFS_ISIZE(ip)) { + error = xfs_alloc_file_space(ip, XFS_ISIZE(ip), +- bf->l_start - XFS_ISIZE(ip), 0); ++ bf->l_start - XFS_ISIZE(ip), ++ XFS_BMAPI_PREALLOC); + if (error) + goto out_unlock; + } +@@ -1544,7 +1545,7 @@ xfs_ioc_getbmap( + + if (bmx.bmv_count < 2) + return -EINVAL; +- if (bmx.bmv_count > ULONG_MAX / recsize) ++ if (bmx.bmv_count >= INT_MAX / recsize) + return -ENOMEM; + + buf = kvzalloc(bmx.bmv_count * sizeof(*buf), GFP_KERNEL); +diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h +index 28453a6d44618..845d3bcab74b4 100644 +--- a/fs/xfs/xfs_ioctl.h ++++ b/fs/xfs/xfs_ioctl.h +@@ -38,8 +38,9 @@ xfs_readlink_by_handle( + int xfs_ioc_attrmulti_one(struct file *parfilp, struct inode *inode, + uint32_t opcode, void __user *uname, void __user *value, + uint32_t *len, uint32_t flags); +-int xfs_ioc_attr_list(struct xfs_inode *dp, void __user *ubuf, int bufsize, +- int flags, struct xfs_attrlist_cursor __user *ucursor); ++int xfs_ioc_attr_list(struct xfs_inode *dp, void __user *ubuf, ++ size_t bufsize, int flags, ++ struct xfs_attrlist_cursor __user *ucursor); + + extern struct dentry * + xfs_handle_to_dentry( +diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h +index c174262a074e3..cb9105d667db4 100644 +--- a/fs/xfs/xfs_linux.h ++++ b/fs/xfs/xfs_linux.h +@@ -61,6 +61,7 @@ typedef __u32 xfs_nlink_t; + #include <linux/ratelimit.h> + #include <linux/rhashtable.h> + #include <linux/xattr.h> ++#include <linux/mnt_idmapping.h> + + #include <asm/page.h> + #include <asm/div64.h> +@@ -196,8 +197,6 @@ static inline uint64_t howmany_64(uint64_t x, uint32_t y) + + int xfs_rw_bdev(struct block_device *bdev, sector_t sector, unsigned int count, + char *data, unsigned int op); +-void xfs_flush_bdev_async(struct bio *bio, struct block_device *bdev, +- struct completion *done); + + #define ASSERT_ALWAYS(expr) \ + (likely(expr) ? (void)0 : assfail(NULL, #expr, __FILE__, __LINE__)) +diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c +index f6cd2d4aa770d..0fb7d05ca308d 100644 +--- a/fs/xfs/xfs_log.c ++++ b/fs/xfs/xfs_log.c +@@ -487,7 +487,10 @@ out_error: + * Run all the pending iclog callbacks and wake log force waiters and iclog + * space waiters so they can process the newly set shutdown state. We really + * don't care what order we process callbacks here because the log is shut down +- * and so state cannot change on disk anymore. ++ * and so state cannot change on disk anymore. However, we cannot wake waiters ++ * until the callbacks have been processed because we may be in unmount and ++ * we must ensure that all AIL operations the callbacks perform have completed ++ * before we tear down the AIL. + * + * We avoid processing actively referenced iclogs so that we don't run callbacks + * while the iclog owner might still be preparing the iclog for IO submssion. +@@ -501,7 +504,6 @@ xlog_state_shutdown_callbacks( + struct xlog_in_core *iclog; + LIST_HEAD(cb_list); + +- spin_lock(&log->l_icloglock); + iclog = log->l_iclog; + do { + if (atomic_read(&iclog->ic_refcnt)) { +@@ -509,26 +511,22 @@ xlog_state_shutdown_callbacks( + continue; + } + list_splice_init(&iclog->ic_callbacks, &cb_list); ++ spin_unlock(&log->l_icloglock); ++ ++ xlog_cil_process_committed(&cb_list); ++ ++ spin_lock(&log->l_icloglock); + wake_up_all(&iclog->ic_write_wait); + wake_up_all(&iclog->ic_force_wait); + } while ((iclog = iclog->ic_next) != log->l_iclog); + + wake_up_all(&log->l_flush_wait); +- spin_unlock(&log->l_icloglock); +- +- xlog_cil_process_committed(&cb_list); + } + + /* + * Flush iclog to disk if this is the last reference to the given iclog and the + * it is in the WANT_SYNC state. + * +- * If the caller passes in a non-zero @old_tail_lsn and the current log tail +- * does not match, there may be metadata on disk that must be persisted before +- * this iclog is written. To satisfy that requirement, set the +- * XLOG_ICL_NEED_FLUSH flag as a condition for writing this iclog with the new +- * log tail value. +- * + * If XLOG_ICL_NEED_FUA is already set on the iclog, we need to ensure that the + * log tail is updated correctly. NEED_FUA indicates that the iclog will be + * written to stable storage, and implies that a commit record is contained +@@ -545,12 +543,10 @@ xlog_state_shutdown_callbacks( + * always capture the tail lsn on the iclog on the first NEED_FUA release + * regardless of the number of active reference counts on this iclog. + */ +- + int + xlog_state_release_iclog( + struct xlog *log, +- struct xlog_in_core *iclog, +- xfs_lsn_t old_tail_lsn) ++ struct xlog_in_core *iclog) + { + xfs_lsn_t tail_lsn; + bool last_ref; +@@ -561,18 +557,14 @@ xlog_state_release_iclog( + /* + * Grabbing the current log tail needs to be atomic w.r.t. the writing + * of the tail LSN into the iclog so we guarantee that the log tail does +- * not move between deciding if a cache flush is required and writing +- * the LSN into the iclog below. ++ * not move between the first time we know that the iclog needs to be ++ * made stable and when we eventually submit it. + */ +- if (old_tail_lsn || iclog->ic_state == XLOG_STATE_WANT_SYNC) { ++ if ((iclog->ic_state == XLOG_STATE_WANT_SYNC || ++ (iclog->ic_flags & XLOG_ICL_NEED_FUA)) && ++ !iclog->ic_header.h_tail_lsn) { + tail_lsn = xlog_assign_tail_lsn(log->l_mp); +- +- if (old_tail_lsn && tail_lsn != old_tail_lsn) +- iclog->ic_flags |= XLOG_ICL_NEED_FLUSH; +- +- if ((iclog->ic_flags & XLOG_ICL_NEED_FUA) && +- !iclog->ic_header.h_tail_lsn) +- iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); ++ iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); + } + + last_ref = atomic_dec_and_test(&iclog->ic_refcnt); +@@ -583,11 +575,8 @@ xlog_state_release_iclog( + * pending iclog callbacks that were waiting on the release of + * this iclog. + */ +- if (last_ref) { +- spin_unlock(&log->l_icloglock); ++ if (last_ref) + xlog_state_shutdown_callbacks(log); +- spin_lock(&log->l_icloglock); +- } + return -EIO; + } + +@@ -600,8 +589,6 @@ xlog_state_release_iclog( + } + + iclog->ic_state = XLOG_STATE_SYNCING; +- if (!iclog->ic_header.h_tail_lsn) +- iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); + xlog_verify_tail_lsn(log, iclog); + trace_xlog_iclog_syncing(iclog, _RET_IP_); + +@@ -874,7 +861,7 @@ xlog_force_iclog( + iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA; + if (iclog->ic_state == XLOG_STATE_ACTIVE) + xlog_state_switch_iclogs(iclog->ic_log, iclog, 0); +- return xlog_state_release_iclog(iclog->ic_log, iclog, 0); ++ return xlog_state_release_iclog(iclog->ic_log, iclog); + } + + /* +@@ -2412,7 +2399,7 @@ xlog_write_copy_finish( + ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC || + xlog_is_shutdown(log)); + release_iclog: +- error = xlog_state_release_iclog(log, iclog, 0); ++ error = xlog_state_release_iclog(log, iclog); + spin_unlock(&log->l_icloglock); + return error; + } +@@ -2629,7 +2616,7 @@ next_lv: + + spin_lock(&log->l_icloglock); + xlog_state_finish_copy(log, iclog, record_cnt, data_cnt); +- error = xlog_state_release_iclog(log, iclog, 0); ++ error = xlog_state_release_iclog(log, iclog); + spin_unlock(&log->l_icloglock); + + return error; +@@ -3053,7 +3040,7 @@ restart: + * reference to the iclog. + */ + if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1)) +- error = xlog_state_release_iclog(log, iclog, 0); ++ error = xlog_state_release_iclog(log, iclog); + spin_unlock(&log->l_icloglock); + if (error) + return error; +@@ -3904,7 +3891,10 @@ xlog_force_shutdown( + wake_up_all(&log->l_cilp->xc_start_wait); + wake_up_all(&log->l_cilp->xc_commit_wait); + spin_unlock(&log->l_cilp->xc_push_lock); ++ ++ spin_lock(&log->l_icloglock); + xlog_state_shutdown_callbacks(log); ++ spin_unlock(&log->l_icloglock); + + return log_error; + } +diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c +index 6c93c8ada6f35..eafe30843ff0f 100644 +--- a/fs/xfs/xfs_log_cil.c ++++ b/fs/xfs/xfs_log_cil.c +@@ -681,11 +681,21 @@ xlog_cil_set_ctx_write_state( + * The LSN we need to pass to the log items on transaction + * commit is the LSN reported by the first log vector write, not + * the commit lsn. If we use the commit record lsn then we can +- * move the tail beyond the grant write head. ++ * move the grant write head beyond the tail LSN and overwrite ++ * it. + */ + ctx->start_lsn = lsn; + wake_up_all(&cil->xc_start_wait); + spin_unlock(&cil->xc_push_lock); ++ ++ /* ++ * Make sure the metadata we are about to overwrite in the log ++ * has been flushed to stable storage before this iclog is ++ * issued. ++ */ ++ spin_lock(&cil->xc_log->l_icloglock); ++ iclog->ic_flags |= XLOG_ICL_NEED_FLUSH; ++ spin_unlock(&cil->xc_log->l_icloglock); + return; + } + +@@ -864,10 +874,7 @@ xlog_cil_push_work( + struct xfs_trans_header thdr; + struct xfs_log_iovec lhdr; + struct xfs_log_vec lvhdr = { NULL }; +- xfs_lsn_t preflush_tail_lsn; + xfs_csn_t push_seq; +- struct bio bio; +- DECLARE_COMPLETION_ONSTACK(bdev_flush); + bool push_commit_stable; + + new_ctx = xlog_cil_ctx_alloc(); +@@ -937,23 +944,6 @@ xlog_cil_push_work( + list_add(&ctx->committing, &cil->xc_committing); + spin_unlock(&cil->xc_push_lock); + +- /* +- * The CIL is stable at this point - nothing new will be added to it +- * because we hold the flush lock exclusively. Hence we can now issue +- * a cache flush to ensure all the completed metadata in the journal we +- * are about to overwrite is on stable storage. +- * +- * Because we are issuing this cache flush before we've written the +- * tail lsn to the iclog, we can have metadata IO completions move the +- * tail forwards between the completion of this flush and the iclog +- * being written. In this case, we need to re-issue the cache flush +- * before the iclog write. To detect whether the log tail moves, sample +- * the tail LSN *before* we issue the flush. +- */ +- preflush_tail_lsn = atomic64_read(&log->l_tail_lsn); +- xfs_flush_bdev_async(&bio, log->l_mp->m_ddev_targp->bt_bdev, +- &bdev_flush); +- + /* + * Pull all the log vectors off the items in the CIL, and remove the + * items from the CIL. We don't need the CIL lock here because it's only +@@ -1030,12 +1020,6 @@ xlog_cil_push_work( + lvhdr.lv_iovecp = &lhdr; + lvhdr.lv_next = ctx->lv_chain; + +- /* +- * Before we format and submit the first iclog, we have to ensure that +- * the metadata writeback ordering cache flush is complete. +- */ +- wait_for_completion(&bdev_flush); +- + error = xlog_cil_write_chain(ctx, &lvhdr); + if (error) + goto out_abort_free_ticket; +@@ -1094,7 +1078,7 @@ xlog_cil_push_work( + if (push_commit_stable && + ctx->commit_iclog->ic_state == XLOG_STATE_ACTIVE) + xlog_state_switch_iclogs(log, ctx->commit_iclog, 0); +- xlog_state_release_iclog(log, ctx->commit_iclog, preflush_tail_lsn); ++ xlog_state_release_iclog(log, ctx->commit_iclog); + + /* Not safe to reference ctx now! */ + +@@ -1115,7 +1099,7 @@ out_abort_free_ticket: + return; + } + spin_lock(&log->l_icloglock); +- xlog_state_release_iclog(log, ctx->commit_iclog, 0); ++ xlog_state_release_iclog(log, ctx->commit_iclog); + /* Not safe to reference ctx now! */ + spin_unlock(&log->l_icloglock); + } +@@ -1442,9 +1426,9 @@ out_shutdown: + */ + bool + xfs_log_item_in_current_chkpt( +- struct xfs_log_item *lip) ++ struct xfs_log_item *lip) + { +- struct xfs_cil_ctx *ctx = lip->li_mountp->m_log->l_cilp->xc_ctx; ++ struct xfs_cil *cil = lip->li_mountp->m_log->l_cilp; + + if (list_empty(&lip->li_cil)) + return false; +@@ -1454,7 +1438,7 @@ xfs_log_item_in_current_chkpt( + * first checkpoint it is written to. Hence if it is different to the + * current sequence, we're in a new checkpoint. + */ +- return lip->li_seq == ctx->sequence; ++ return lip->li_seq == READ_ONCE(cil->xc_current_sequence); + } + + /* +diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h +index 844fbeec3545a..f3d68ca39f45c 100644 +--- a/fs/xfs/xfs_log_priv.h ++++ b/fs/xfs/xfs_log_priv.h +@@ -524,8 +524,7 @@ void xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket); + + void xlog_state_switch_iclogs(struct xlog *log, struct xlog_in_core *iclog, + int eventual_size); +-int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog, +- xfs_lsn_t log_tail_lsn); ++int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog); + + /* + * When we crack an atomic LSN, we sample it first so that the value will not +diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c +index 10562ecbd9eac..581aeb288b32b 100644 +--- a/fs/xfs/xfs_log_recover.c ++++ b/fs/xfs/xfs_log_recover.c +@@ -27,7 +27,7 @@ + #include "xfs_buf_item.h" + #include "xfs_ag.h" + #include "xfs_quota.h" +- ++#include "xfs_reflink.h" + + #define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1) + +@@ -3502,6 +3502,28 @@ xlog_recover_finish( + + xlog_recover_process_iunlinks(log); + xlog_recover_check_summary(log); ++ ++ /* ++ * Recover any CoW staging blocks that are still referenced by the ++ * ondisk refcount metadata. During mount there cannot be any live ++ * staging extents as we have not permitted any user modifications. ++ * Therefore, it is safe to free them all right now, even on a ++ * read-only mount. ++ */ ++ error = xfs_reflink_recover_cow(log->l_mp); ++ if (error) { ++ xfs_alert(log->l_mp, ++ "Failed to recover leftover CoW staging extents, err %d.", ++ error); ++ /* ++ * If we get an error here, make sure the log is shut down ++ * but return zero so that any log items committed since the ++ * end of intents processing can be pushed through the CIL ++ * and AIL. ++ */ ++ xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); ++ } ++ + return 0; + } + +diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c +index 06dac09eddbd8..76056de83971c 100644 +--- a/fs/xfs/xfs_mount.c ++++ b/fs/xfs/xfs_mount.c +@@ -922,15 +922,6 @@ xfs_mountfs( + xfs_warn(mp, + "Unable to allocate reserve blocks. Continuing without reserve pool."); + +- /* Recover any CoW blocks that never got remapped. */ +- error = xfs_reflink_recover_cow(mp); +- if (error) { +- xfs_err(mp, +- "Error %d recovering leftover CoW allocations.", error); +- xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); +- goto out_quota; +- } +- + /* Reserve AG blocks for future btree expansion. */ + error = xfs_fs_reserve_ag_blocks(mp); + if (error && error != -ENOSPC) +@@ -941,7 +932,6 @@ xfs_mountfs( + + out_agresv: + xfs_fs_unreserve_ag_blocks(mp); +- out_quota: + xfs_qm_unmount_quotas(mp); + out_rtunmount: + xfs_rtunmount_inodes(mp); +@@ -1142,7 +1132,7 @@ xfs_mod_fdblocks( + * problems (i.e. transaction abort, pagecache discards, etc.) than + * slightly premature -ENOSPC. + */ +- set_aside = mp->m_alloc_set_aside + atomic64_read(&mp->m_allocbt_blks); ++ set_aside = xfs_fdblocks_unavailable(mp); + percpu_counter_add_batch(&mp->m_fdblocks, delta, batch); + if (__percpu_counter_compare(&mp->m_fdblocks, set_aside, + XFS_FDBLOCKS_BATCH) >= 0) { +diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h +index e091f3b3fa158..86564295fce6d 100644 +--- a/fs/xfs/xfs_mount.h ++++ b/fs/xfs/xfs_mount.h +@@ -478,6 +478,21 @@ extern void xfs_unmountfs(xfs_mount_t *); + */ + #define XFS_FDBLOCKS_BATCH 1024 + ++/* ++ * Estimate the amount of free space that is not available to userspace and is ++ * not explicitly reserved from the incore fdblocks. This includes: ++ * ++ * - The minimum number of blocks needed to support splitting a bmap btree ++ * - The blocks currently in use by the freespace btrees because they record ++ * the actual blocks that will fill per-AG metadata space reservations ++ */ ++static inline uint64_t ++xfs_fdblocks_unavailable( ++ struct xfs_mount *mp) ++{ ++ return mp->m_alloc_set_aside + atomic64_read(&mp->m_allocbt_blks); ++} ++ + extern int xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta, + bool reserved); + extern int xfs_mod_frextents(struct xfs_mount *mp, int64_t delta); +diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c +index 5608066d6e539..623244650a2f0 100644 +--- a/fs/xfs/xfs_qm.c ++++ b/fs/xfs/xfs_qm.c +@@ -1317,8 +1317,15 @@ xfs_qm_quotacheck( + + error = xfs_iwalk_threaded(mp, 0, 0, xfs_qm_dqusage_adjust, 0, true, + NULL); +- if (error) ++ if (error) { ++ /* ++ * The inode walk may have partially populated the dquot ++ * caches. We must purge them before disabling quota and ++ * tearing down the quotainfo, or else the dquots will leak. ++ */ ++ xfs_qm_dqpurge_all(mp); + goto error_return; ++ } + + /* + * We've made all the changes that we need to make incore. Flush them +diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c +index 46904b793bd48..8ef842d17916a 100644 +--- a/fs/xfs/xfs_refcount_item.c ++++ b/fs/xfs/xfs_refcount_item.c +@@ -35,6 +35,7 @@ STATIC void + xfs_cui_item_free( + struct xfs_cui_log_item *cuip) + { ++ kmem_free(cuip->cui_item.li_lv_shadow); + if (cuip->cui_format.cui_nextents > XFS_CUI_MAX_FAST_EXTENTS) + kmem_free(cuip); + else +@@ -204,6 +205,7 @@ xfs_cud_item_release( + struct xfs_cud_log_item *cudp = CUD_ITEM(lip); + + xfs_cui_release(cudp->cud_cuip); ++ kmem_free(cudp->cud_item.li_lv_shadow); + kmem_cache_free(xfs_cud_zone, cudp); + } + +diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c +index 76355f2934884..36832e4bc803c 100644 +--- a/fs/xfs/xfs_reflink.c ++++ b/fs/xfs/xfs_reflink.c +@@ -749,7 +749,10 @@ xfs_reflink_end_cow( + } + + /* +- * Free leftover CoW reservations that didn't get cleaned out. ++ * Free all CoW staging blocks that are still referenced by the ondisk refcount ++ * metadata. The ondisk metadata does not track which inode created the ++ * staging extent, so callers must ensure that there are no cached inodes with ++ * live CoW staging extents. + */ + int + xfs_reflink_recover_cow( +diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c +index 5f06959804678..15e7b01740a77 100644 +--- a/fs/xfs/xfs_rmap_item.c ++++ b/fs/xfs/xfs_rmap_item.c +@@ -35,6 +35,7 @@ STATIC void + xfs_rui_item_free( + struct xfs_rui_log_item *ruip) + { ++ kmem_free(ruip->rui_item.li_lv_shadow); + if (ruip->rui_format.rui_nextents > XFS_RUI_MAX_FAST_EXTENTS) + kmem_free(ruip); + else +@@ -227,6 +228,7 @@ xfs_rud_item_release( + struct xfs_rud_log_item *rudp = RUD_ITEM(lip); + + xfs_rui_release(rudp->rud_ruip); ++ kmem_free(rudp->rud_item.li_lv_shadow); + kmem_cache_free(xfs_rud_zone, rudp); + } + +diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c +index c4e0cd1c1c8ca..df1d6be61bfa3 100644 +--- a/fs/xfs/xfs_super.c ++++ b/fs/xfs/xfs_super.c +@@ -642,7 +642,7 @@ xfs_fs_destroy_inode( + static void + xfs_fs_dirty_inode( + struct inode *inode, +- int flag) ++ int flags) + { + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; +@@ -650,7 +650,13 @@ xfs_fs_dirty_inode( + + if (!(inode->i_sb->s_flags & SB_LAZYTIME)) + return; +- if (flag != I_DIRTY_SYNC || !(inode->i_state & I_DIRTY_TIME)) ++ ++ /* ++ * Only do the timestamp update if the inode is dirty (I_DIRTY_SYNC) ++ * and has dirty timestamp (I_DIRTY_TIME). I_DIRTY_TIME can be passed ++ * in flags possibly together with I_DIRTY_SYNC. ++ */ ++ if ((flags & ~I_DIRTY_TIME) != I_DIRTY_SYNC || !(flags & I_DIRTY_TIME)) + return; + + if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp)) +@@ -729,6 +735,7 @@ xfs_fs_sync_fs( + int wait) + { + struct xfs_mount *mp = XFS_M(sb); ++ int error; + + trace_xfs_fs_sync_fs(mp, __return_address); + +@@ -738,7 +745,10 @@ xfs_fs_sync_fs( + if (!wait) + return 0; + +- xfs_log_force(mp, XFS_LOG_SYNC); ++ error = xfs_log_force(mp, XFS_LOG_SYNC); ++ if (error) ++ return error; ++ + if (laptop_mode) { + /* + * The disk must be active because we're syncing. +@@ -1738,15 +1748,6 @@ xfs_remount_rw( + */ + xfs_restore_resvblks(mp); + xfs_log_work_queue(mp); +- +- /* Recover any CoW blocks that never got remapped. */ +- error = xfs_reflink_recover_cow(mp); +- if (error) { +- xfs_err(mp, +- "Error %d recovering leftover CoW allocations.", error); +- xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); +- return error; +- } + xfs_blockgc_start(mp); + + /* Create the per-AG metadata reservation pool .*/ +@@ -1764,7 +1765,15 @@ static int + xfs_remount_ro( + struct xfs_mount *mp) + { +- int error; ++ struct xfs_icwalk icw = { ++ .icw_flags = XFS_ICWALK_FLAG_SYNC, ++ }; ++ int error; ++ ++ /* Flush all the dirty data to disk. */ ++ error = sync_filesystem(mp->m_super); ++ if (error) ++ return error; + + /* + * Cancel background eofb scanning so it cannot race with the final +@@ -1772,8 +1781,13 @@ xfs_remount_ro( + */ + xfs_blockgc_stop(mp); + +- /* Get rid of any leftover CoW reservations... */ +- error = xfs_blockgc_free_space(mp, NULL); ++ /* ++ * Clear out all remaining COW staging extents and speculative post-EOF ++ * preallocations so that we don't leave inodes requiring inactivation ++ * cleanups during reclaim on a read-only mount. We must process every ++ * cached inode, so this requires a synchronous cache scan. ++ */ ++ error = xfs_blockgc_free_space(mp, &icw); + if (error) { + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + return error; +@@ -1839,8 +1853,6 @@ xfs_fs_reconfigure( + if (error) + return error; + +- sync_filesystem(mp->m_super); +- + /* inode32 -> inode64 */ + if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) { + mp->m_features &= ~XFS_FEAT_SMALL_INUMS; +diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c +index fc2c6a4046471..a31d2e5d03214 100644 +--- a/fs/xfs/xfs_symlink.c ++++ b/fs/xfs/xfs_symlink.c +@@ -184,8 +184,8 @@ xfs_symlink( + /* + * Make sure that we have allocated dquot(s) on disk. + */ +- error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns), +- mapped_fsgid(mnt_userns), prid, ++ error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns), ++ mapped_fsgid(mnt_userns, &init_user_ns), prid, + XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, + &udqp, &gdqp, &pdqp); + if (error) +diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c +index 67dec11e34c7e..95c183072e7a2 100644 +--- a/fs/xfs/xfs_trans.c ++++ b/fs/xfs/xfs_trans.c +@@ -1201,3 +1201,89 @@ out_cancel: + xfs_trans_cancel(tp); + return error; + } ++ ++/* ++ * Allocate an transaction, lock and join the directory and child inodes to it, ++ * and reserve quota for a directory update. If there isn't sufficient space, ++ * @dblocks will be set to zero for a reservationless directory update and ++ * @nospace_error will be set to a negative errno describing the space ++ * constraint we hit. ++ * ++ * The caller must ensure that the on-disk dquots attached to this inode have ++ * already been allocated and initialized. The ILOCKs will be dropped when the ++ * transaction is committed or cancelled. ++ */ ++int ++xfs_trans_alloc_dir( ++ struct xfs_inode *dp, ++ struct xfs_trans_res *resv, ++ struct xfs_inode *ip, ++ unsigned int *dblocks, ++ struct xfs_trans **tpp, ++ int *nospace_error) ++{ ++ struct xfs_trans *tp; ++ struct xfs_mount *mp = ip->i_mount; ++ unsigned int resblks; ++ bool retried = false; ++ int error; ++ ++retry: ++ *nospace_error = 0; ++ resblks = *dblocks; ++ error = xfs_trans_alloc(mp, resv, resblks, 0, 0, &tp); ++ if (error == -ENOSPC) { ++ *nospace_error = error; ++ resblks = 0; ++ error = xfs_trans_alloc(mp, resv, resblks, 0, 0, &tp); ++ } ++ if (error) ++ return error; ++ ++ xfs_lock_two_inodes(dp, XFS_ILOCK_EXCL, ip, XFS_ILOCK_EXCL); ++ ++ xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); ++ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); ++ ++ error = xfs_qm_dqattach_locked(dp, false); ++ if (error) { ++ /* Caller should have allocated the dquots! */ ++ ASSERT(error != -ENOENT); ++ goto out_cancel; ++ } ++ ++ error = xfs_qm_dqattach_locked(ip, false); ++ if (error) { ++ /* Caller should have allocated the dquots! */ ++ ASSERT(error != -ENOENT); ++ goto out_cancel; ++ } ++ ++ if (resblks == 0) ++ goto done; ++ ++ error = xfs_trans_reserve_quota_nblks(tp, dp, resblks, 0, false); ++ if (error == -EDQUOT || error == -ENOSPC) { ++ if (!retried) { ++ xfs_trans_cancel(tp); ++ xfs_blockgc_free_quota(dp, 0); ++ retried = true; ++ goto retry; ++ } ++ ++ *nospace_error = error; ++ resblks = 0; ++ error = 0; ++ } ++ if (error) ++ goto out_cancel; ++ ++done: ++ *tpp = tp; ++ *dblocks = resblks; ++ return 0; ++ ++out_cancel: ++ xfs_trans_cancel(tp); ++ return error; ++} +diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h +index 50da47f23a077..faba74d4c7026 100644 +--- a/fs/xfs/xfs_trans.h ++++ b/fs/xfs/xfs_trans.h +@@ -265,6 +265,9 @@ int xfs_trans_alloc_icreate(struct xfs_mount *mp, struct xfs_trans_res *resv, + int xfs_trans_alloc_ichange(struct xfs_inode *ip, struct xfs_dquot *udqp, + struct xfs_dquot *gdqp, struct xfs_dquot *pdqp, bool force, + struct xfs_trans **tpp); ++int xfs_trans_alloc_dir(struct xfs_inode *dp, struct xfs_trans_res *resv, ++ struct xfs_inode *ip, unsigned int *dblocks, ++ struct xfs_trans **tpp, int *nospace_error); + + static inline void + xfs_trans_set_context( +diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c +index 3872ce6714119..955c457e585a3 100644 +--- a/fs/xfs/xfs_trans_dquot.c ++++ b/fs/xfs/xfs_trans_dquot.c +@@ -603,7 +603,6 @@ xfs_dqresv_check( + return QUOTA_NL_ISOFTLONGWARN; + } + +- res->warnings++; + return QUOTA_NL_ISOFTWARN; + } + +diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c +index ddc346a9df9ba..d3e182c1a1281 100644 +--- a/fs/zonefs/super.c ++++ b/fs/zonefs/super.c +@@ -35,6 +35,17 @@ static inline int zonefs_zone_mgmt(struct inode *inode, + + lockdep_assert_held(&zi->i_truncate_mutex); + ++ /* ++ * With ZNS drives, closing an explicitly open zone that has not been ++ * written will change the zone state to "closed", that is, the zone ++ * will remain active. Since this can then cause failure of explicit ++ * open operation on other zones if the drive active zone resources ++ * are exceeded, make sure that the zone does not remain active by ++ * resetting it. ++ */ ++ if (op == REQ_OP_ZONE_CLOSE && !zi->i_wpoffset) ++ op = REQ_OP_ZONE_RESET; ++ + trace_zonefs_zone_mgmt(inode, op); + ret = blkdev_zone_mgmt(inode->i_sb->s_bdev, op, zi->i_zsector, + zi->i_zone_size >> SECTOR_SHIFT, GFP_NOFS); +@@ -61,15 +72,51 @@ static inline void zonefs_i_size_write(struct inode *inode, loff_t isize) + zi->i_flags &= ~ZONEFS_ZONE_OPEN; + } + +-static int zonefs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, +- unsigned int flags, struct iomap *iomap, +- struct iomap *srcmap) ++static int zonefs_read_iomap_begin(struct inode *inode, loff_t offset, ++ loff_t length, unsigned int flags, ++ struct iomap *iomap, struct iomap *srcmap) + { + struct zonefs_inode_info *zi = ZONEFS_I(inode); + struct super_block *sb = inode->i_sb; + loff_t isize; + +- /* All I/Os should always be within the file maximum size */ ++ /* ++ * All blocks are always mapped below EOF. If reading past EOF, ++ * act as if there is a hole up to the file maximum size. ++ */ ++ mutex_lock(&zi->i_truncate_mutex); ++ iomap->bdev = inode->i_sb->s_bdev; ++ iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize); ++ isize = i_size_read(inode); ++ if (iomap->offset >= isize) { ++ iomap->type = IOMAP_HOLE; ++ iomap->addr = IOMAP_NULL_ADDR; ++ iomap->length = length; ++ } else { ++ iomap->type = IOMAP_MAPPED; ++ iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset; ++ iomap->length = isize - iomap->offset; ++ } ++ mutex_unlock(&zi->i_truncate_mutex); ++ ++ trace_zonefs_iomap_begin(inode, iomap); ++ ++ return 0; ++} ++ ++static const struct iomap_ops zonefs_read_iomap_ops = { ++ .iomap_begin = zonefs_read_iomap_begin, ++}; ++ ++static int zonefs_write_iomap_begin(struct inode *inode, loff_t offset, ++ loff_t length, unsigned int flags, ++ struct iomap *iomap, struct iomap *srcmap) ++{ ++ struct zonefs_inode_info *zi = ZONEFS_I(inode); ++ struct super_block *sb = inode->i_sb; ++ loff_t isize; ++ ++ /* All write I/Os should always be within the file maximum size */ + if (WARN_ON_ONCE(offset + length > zi->i_max_size)) + return -EIO; + +@@ -79,7 +126,7 @@ static int zonefs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, + * operation. + */ + if (WARN_ON_ONCE(zi->i_ztype == ZONEFS_ZTYPE_SEQ && +- (flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT))) ++ !(flags & IOMAP_DIRECT))) + return -EIO; + + /* +@@ -88,47 +135,44 @@ static int zonefs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, + * write pointer) and unwriten beyond. + */ + mutex_lock(&zi->i_truncate_mutex); ++ iomap->bdev = inode->i_sb->s_bdev; ++ iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize); ++ iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset; + isize = i_size_read(inode); +- if (offset >= isize) ++ if (iomap->offset >= isize) { + iomap->type = IOMAP_UNWRITTEN; +- else ++ iomap->length = zi->i_max_size - iomap->offset; ++ } else { + iomap->type = IOMAP_MAPPED; +- if (flags & IOMAP_WRITE) +- length = zi->i_max_size - offset; +- else +- length = min(length, isize - offset); ++ iomap->length = isize - iomap->offset; ++ } + mutex_unlock(&zi->i_truncate_mutex); + +- iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize); +- iomap->length = ALIGN(offset + length, sb->s_blocksize) - iomap->offset; +- iomap->bdev = inode->i_sb->s_bdev; +- iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset; +- + trace_zonefs_iomap_begin(inode, iomap); + + return 0; + } + +-static const struct iomap_ops zonefs_iomap_ops = { +- .iomap_begin = zonefs_iomap_begin, ++static const struct iomap_ops zonefs_write_iomap_ops = { ++ .iomap_begin = zonefs_write_iomap_begin, + }; + + static int zonefs_readpage(struct file *unused, struct page *page) + { +- return iomap_readpage(page, &zonefs_iomap_ops); ++ return iomap_readpage(page, &zonefs_read_iomap_ops); + } + + static void zonefs_readahead(struct readahead_control *rac) + { +- iomap_readahead(rac, &zonefs_iomap_ops); ++ iomap_readahead(rac, &zonefs_read_iomap_ops); + } + + /* + * Map blocks for page writeback. This is used only on conventional zone files, + * which implies that the page range can only be within the fixed inode size. + */ +-static int zonefs_map_blocks(struct iomap_writepage_ctx *wpc, +- struct inode *inode, loff_t offset) ++static int zonefs_write_map_blocks(struct iomap_writepage_ctx *wpc, ++ struct inode *inode, loff_t offset) + { + struct zonefs_inode_info *zi = ZONEFS_I(inode); + +@@ -142,12 +186,12 @@ static int zonefs_map_blocks(struct iomap_writepage_ctx *wpc, + offset < wpc->iomap.offset + wpc->iomap.length) + return 0; + +- return zonefs_iomap_begin(inode, offset, zi->i_max_size - offset, +- IOMAP_WRITE, &wpc->iomap, NULL); ++ return zonefs_write_iomap_begin(inode, offset, zi->i_max_size - offset, ++ IOMAP_WRITE, &wpc->iomap, NULL); + } + + static const struct iomap_writeback_ops zonefs_writeback_ops = { +- .map_blocks = zonefs_map_blocks, ++ .map_blocks = zonefs_write_map_blocks, + }; + + static int zonefs_writepage(struct page *page, struct writeback_control *wbc) +@@ -177,7 +221,8 @@ static int zonefs_swap_activate(struct swap_info_struct *sis, + return -EINVAL; + } + +- return iomap_swapfile_activate(sis, swap_file, span, &zonefs_iomap_ops); ++ return iomap_swapfile_activate(sis, swap_file, span, ++ &zonefs_read_iomap_ops); + } + + static const struct address_space_operations zonefs_file_aops = { +@@ -357,6 +402,10 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, + data_size = zonefs_check_zone_condition(inode, zone, + false, false); + } ++ } else if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO && ++ data_size > isize) { ++ /* Do not expose garbage data */ ++ data_size = isize; + } + + /* +@@ -403,14 +452,22 @@ static void __zonefs_io_error(struct inode *inode, bool write) + struct super_block *sb = inode->i_sb; + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + unsigned int noio_flag; +- unsigned int nr_zones = +- zi->i_zone_size >> (sbi->s_zone_sectors_shift + SECTOR_SHIFT); ++ unsigned int nr_zones = 1; + struct zonefs_ioerr_data err = { + .inode = inode, + .write = write, + }; + int ret; + ++ /* ++ * The only files that have more than one zone are conventional zone ++ * files with aggregated conventional zones, for which the inode zone ++ * size is always larger than the device zone size. ++ */ ++ if (zi->i_zone_size > bdev_zone_sectors(sb->s_bdev)) ++ nr_zones = zi->i_zone_size >> ++ (sbi->s_zone_sectors_shift + SECTOR_SHIFT); ++ + /* + * Memory allocations in blkdev_report_zones() can trigger a memory + * reclaim which may in turn cause a recursion into zonefs as well as +@@ -596,7 +653,7 @@ static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf) + + /* Serialize against truncates */ + filemap_invalidate_lock_shared(inode->i_mapping); +- ret = iomap_page_mkwrite(vmf, &zonefs_iomap_ops); ++ ret = iomap_page_mkwrite(vmf, &zonefs_write_iomap_ops); + filemap_invalidate_unlock_shared(inode->i_mapping); + + sb_end_pagefault(inode->i_sb); +@@ -678,13 +735,12 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from) + struct inode *inode = file_inode(iocb->ki_filp); + struct zonefs_inode_info *zi = ZONEFS_I(inode); + struct block_device *bdev = inode->i_sb->s_bdev; +- unsigned int max; ++ unsigned int max = bdev_max_zone_append_sectors(bdev); + struct bio *bio; + ssize_t size; + int nr_pages; + ssize_t ret; + +- max = queue_max_zone_append_sectors(bdev_get_queue(bdev)); + max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize); + iov_iter_truncate(from, max); + +@@ -713,6 +769,24 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from) + + ret = submit_bio_wait(bio); + ++ /* ++ * If the file zone was written underneath the file system, the zone ++ * write pointer may not be where we expect it to be, but the zone ++ * append write can still succeed. So check manually that we wrote where ++ * we intended to, that is, at zi->i_wpoffset. ++ */ ++ if (!ret) { ++ sector_t wpsector = ++ zi->i_zsector + (zi->i_wpoffset >> SECTOR_SHIFT); ++ ++ if (bio->bi_iter.bi_sector != wpsector) { ++ zonefs_warn(inode->i_sb, ++ "Corrupted write pointer %llu for zone at %llu\n", ++ wpsector, zi->i_zsector); ++ ret = -EIO; ++ } ++ } ++ + zonefs_file_write_dio_end_io(iocb, size, ret, 0); + trace_zonefs_file_dio_append(inode, size, ret); + +@@ -851,8 +925,8 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) + if (append) + ret = zonefs_file_dio_append(iocb, from); + else +- ret = iomap_dio_rw(iocb, from, &zonefs_iomap_ops, +- &zonefs_write_dio_ops, 0); ++ ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops, ++ &zonefs_write_dio_ops, 0, 0); + if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && + (ret > 0 || ret == -EIOCBQUEUED)) { + if (ret > 0) +@@ -893,7 +967,7 @@ static ssize_t zonefs_file_buffered_write(struct kiocb *iocb, + if (ret <= 0) + goto inode_unlock; + +- ret = iomap_file_buffered_write(iocb, from, &zonefs_iomap_ops); ++ ret = iomap_file_buffered_write(iocb, from, &zonefs_write_iomap_ops); + if (ret > 0) + iocb->ki_pos += ret; + else if (ret == -EIO) +@@ -986,8 +1060,8 @@ static ssize_t zonefs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) + goto inode_unlock; + } + file_accessed(iocb->ki_filp); +- ret = iomap_dio_rw(iocb, to, &zonefs_iomap_ops, +- &zonefs_read_dio_ops, 0); ++ ret = iomap_dio_rw(iocb, to, &zonefs_read_iomap_ops, ++ &zonefs_read_dio_ops, 0, 0); + } else { + ret = generic_file_read_iter(iocb, to); + if (ret == -EIO) +@@ -1144,6 +1218,7 @@ static struct inode *zonefs_alloc_inode(struct super_block *sb) + inode_init_once(&zi->i_vnode); + mutex_init(&zi->i_truncate_mutex); + zi->i_wr_refcnt = 0; ++ zi->i_flags = 0; + + return &zi->i_vnode; + } +@@ -1295,12 +1370,13 @@ static void zonefs_init_dir_inode(struct inode *parent, struct inode *inode, + inc_nlink(parent); + } + +-static void zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone, +- enum zonefs_ztype type) ++static int zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone, ++ enum zonefs_ztype type) + { + struct super_block *sb = inode->i_sb; + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + struct zonefs_inode_info *zi = ZONEFS_I(inode); ++ int ret = 0; + + inode->i_ino = zone->start >> sbi->s_zone_sectors_shift; + inode->i_mode = S_IFREG | sbi->s_perm; +@@ -1308,6 +1384,14 @@ static void zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone, + zi->i_ztype = type; + zi->i_zsector = zone->start; + zi->i_zone_size = zone->len << SECTOR_SHIFT; ++ if (zi->i_zone_size > bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT && ++ !(sbi->s_features & ZONEFS_F_AGGRCNV)) { ++ zonefs_err(sb, ++ "zone size %llu doesn't match device's zone sectors %llu\n", ++ zi->i_zone_size, ++ bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT); ++ return -EINVAL; ++ } + + zi->i_max_size = min_t(loff_t, MAX_LFS_FILESIZE, + zone->capacity << SECTOR_SHIFT); +@@ -1325,6 +1409,22 @@ static void zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone, + sb->s_maxbytes = max(zi->i_max_size, sb->s_maxbytes); + sbi->s_blocks += zi->i_max_size >> sb->s_blocksize_bits; + sbi->s_used_blocks += zi->i_wpoffset >> sb->s_blocksize_bits; ++ ++ /* ++ * For sequential zones, make sure that any open zone is closed first ++ * to ensure that the initial number of open zones is 0, in sync with ++ * the open zone accounting done when the mount option ++ * ZONEFS_MNTOPT_EXPLICIT_OPEN is used. ++ */ ++ if (type == ZONEFS_ZTYPE_SEQ && ++ (zone->cond == BLK_ZONE_COND_IMP_OPEN || ++ zone->cond == BLK_ZONE_COND_EXP_OPEN)) { ++ mutex_lock(&zi->i_truncate_mutex); ++ ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_CLOSE); ++ mutex_unlock(&zi->i_truncate_mutex); ++ } ++ ++ return ret; + } + + static struct dentry *zonefs_create_inode(struct dentry *parent, +@@ -1334,20 +1434,27 @@ static struct dentry *zonefs_create_inode(struct dentry *parent, + struct inode *dir = d_inode(parent); + struct dentry *dentry; + struct inode *inode; ++ int ret = -ENOMEM; + + dentry = d_alloc_name(parent, name); + if (!dentry) +- return NULL; ++ return ERR_PTR(ret); + + inode = new_inode(parent->d_sb); + if (!inode) + goto dput; + + inode->i_ctime = inode->i_mtime = inode->i_atime = dir->i_ctime; +- if (zone) +- zonefs_init_file_inode(inode, zone, type); +- else ++ if (zone) { ++ ret = zonefs_init_file_inode(inode, zone, type); ++ if (ret) { ++ iput(inode); ++ goto dput; ++ } ++ } else { + zonefs_init_dir_inode(dir, inode, type); ++ } ++ + d_add(dentry, inode); + dir->i_size++; + +@@ -1356,7 +1463,7 @@ static struct dentry *zonefs_create_inode(struct dentry *parent, + dput: + dput(dentry); + +- return NULL; ++ return ERR_PTR(ret); + } + + struct zonefs_zone_data { +@@ -1376,7 +1483,7 @@ static int zonefs_create_zgroup(struct zonefs_zone_data *zd, + struct blk_zone *zone, *next, *end; + const char *zgroup_name; + char *file_name; +- struct dentry *dir; ++ struct dentry *dir, *dent; + unsigned int n = 0; + int ret; + +@@ -1394,8 +1501,8 @@ static int zonefs_create_zgroup(struct zonefs_zone_data *zd, + zgroup_name = "seq"; + + dir = zonefs_create_inode(sb->s_root, zgroup_name, NULL, type); +- if (!dir) { +- ret = -ENOMEM; ++ if (IS_ERR(dir)) { ++ ret = PTR_ERR(dir); + goto free; + } + +@@ -1441,8 +1548,9 @@ static int zonefs_create_zgroup(struct zonefs_zone_data *zd, + * Use the file number within its group as file name. + */ + snprintf(file_name, ZONEFS_NAME_MAX - 1, "%u", n); +- if (!zonefs_create_inode(dir, file_name, zone, type)) { +- ret = -ENOMEM; ++ dent = zonefs_create_inode(dir, file_name, zone, type); ++ if (IS_ERR(dent)) { ++ ret = PTR_ERR(dent); + goto free; + } + +@@ -1658,11 +1766,6 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent) + sbi->s_mount_opts = ZONEFS_MNTOPT_ERRORS_RO; + sbi->s_max_open_zones = bdev_max_open_zones(sb->s_bdev); + atomic_set(&sbi->s_open_zones, 0); +- if (!sbi->s_max_open_zones && +- sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) { +- zonefs_info(sb, "No open zones limit. Ignoring explicit_open mount option\n"); +- sbi->s_mount_opts &= ~ZONEFS_MNTOPT_EXPLICIT_OPEN; +- } + + ret = zonefs_read_super(sb); + if (ret) +@@ -1681,6 +1784,12 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent) + zonefs_info(sb, "Mounting %u zones", + blkdev_nr_zones(sb->s_bdev->bd_disk)); + ++ if (!sbi->s_max_open_zones && ++ sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) { ++ zonefs_info(sb, "No open zones limit. Ignoring explicit_open mount option\n"); ++ sbi->s_mount_opts &= ~ZONEFS_MNTOPT_EXPLICIT_OPEN; ++ } ++ + /* Create root directory inode */ + ret = -ENOMEM; + inode = new_inode(sb); +@@ -1787,5 +1896,6 @@ static void __exit zonefs_exit(void) + MODULE_AUTHOR("Damien Le Moal"); + MODULE_DESCRIPTION("Zone file system for zoned block devices"); + MODULE_LICENSE("GPL"); ++MODULE_ALIAS_FS("zonefs"); + module_init(zonefs_init); + module_exit(zonefs_exit); +diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h +index 13d93371790ec..e9c7d7b270e73 100644 +--- a/include/acpi/acpi_bus.h ++++ b/include/acpi/acpi_bus.h +@@ -613,9 +613,10 @@ int acpi_enable_wakeup_device_power(struct acpi_device *dev, int state); + int acpi_disable_wakeup_device_power(struct acpi_device *dev); + + #ifdef CONFIG_X86 +-bool acpi_device_always_present(struct acpi_device *adev); ++bool acpi_device_override_status(struct acpi_device *adev, unsigned long long *status); + #else +-static inline bool acpi_device_always_present(struct acpi_device *adev) ++static inline bool acpi_device_override_status(struct acpi_device *adev, ++ unsigned long long *status) + { + return false; + } +diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h +index 92c71dfce0d5d..cefbb7ad253e0 100644 +--- a/include/acpi/actypes.h ++++ b/include/acpi/actypes.h +@@ -536,8 +536,14 @@ typedef u64 acpi_integer; + * Can be used with access_width of struct acpi_generic_address and access_size of + * struct acpi_resource_generic_register. + */ +-#define ACPI_ACCESS_BIT_WIDTH(size) (1 << ((size) + 2)) +-#define ACPI_ACCESS_BYTE_WIDTH(size) (1 << ((size) - 1)) ++#define ACPI_ACCESS_BIT_SHIFT 2 ++#define ACPI_ACCESS_BYTE_SHIFT -1 ++#define ACPI_ACCESS_BIT_MAX (31 - ACPI_ACCESS_BIT_SHIFT) ++#define ACPI_ACCESS_BYTE_MAX (31 - ACPI_ACCESS_BYTE_SHIFT) ++#define ACPI_ACCESS_BIT_DEFAULT (8 - ACPI_ACCESS_BIT_SHIFT) ++#define ACPI_ACCESS_BYTE_DEFAULT (8 - ACPI_ACCESS_BYTE_SHIFT) ++#define ACPI_ACCESS_BIT_WIDTH(size) (1 << ((size) + ACPI_ACCESS_BIT_SHIFT)) ++#define ACPI_ACCESS_BYTE_WIDTH(size) (1 << ((size) + ACPI_ACCESS_BYTE_SHIFT)) + + /******************************************************************************* + * +diff --git a/include/acpi/apei.h b/include/acpi/apei.h +index 680f80960c3dc..a6ac2e8b72da8 100644 +--- a/include/acpi/apei.h ++++ b/include/acpi/apei.h +@@ -27,14 +27,16 @@ extern int hest_disable; + extern int erst_disable; + #ifdef CONFIG_ACPI_APEI_GHES + extern bool ghes_disable; ++void __init ghes_init(void); + #else + #define ghes_disable 1 ++static inline void ghes_init(void) { } + #endif + + #ifdef CONFIG_ACPI_APEI + void __init acpi_hest_init(void); + #else +-static inline void acpi_hest_init(void) { return; } ++static inline void acpi_hest_init(void) { } + #endif + + typedef int (*apei_hest_func_t)(struct acpi_hest_header *hest_hdr, void *data); +diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h +index bc159a9b4a733..6b14414b9ec12 100644 +--- a/include/acpi/cppc_acpi.h ++++ b/include/acpi/cppc_acpi.h +@@ -17,7 +17,7 @@ + #include <acpi/pcc.h> + #include <acpi/processor.h> + +-/* Support CPPCv2 and CPPCv3 */ ++/* CPPCv2 and CPPCv3 support */ + #define CPPC_V2_REV 2 + #define CPPC_V3_REV 3 + #define CPPC_V2_NUM_ENT 21 +diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h +index 34fb3431a8f36..292a5c40bd0c6 100644 +--- a/include/acpi/ghes.h ++++ b/include/acpi/ghes.h +@@ -71,7 +71,7 @@ int ghes_register_vendor_record_notifier(struct notifier_block *nb); + void ghes_unregister_vendor_record_notifier(struct notifier_block *nb); + #endif + +-int ghes_estatus_pool_init(int num_ghes); ++int ghes_estatus_pool_init(unsigned int num_ghes); + + /* From drivers/edac/ghes_edac.c */ + +diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h +index 3096f086b5a32..71ab4ba9c25d1 100644 +--- a/include/asm-generic/bitops/atomic.h ++++ b/include/asm-generic/bitops/atomic.h +@@ -39,9 +39,6 @@ arch_test_and_set_bit(unsigned int nr, volatile unsigned long *p) + unsigned long mask = BIT_MASK(nr); + + p += BIT_WORD(nr); +- if (READ_ONCE(*p) & mask) +- return 1; +- + old = arch_atomic_long_fetch_or(mask, (atomic_long_t *)p); + return !!(old & mask); + } +@@ -53,9 +50,6 @@ arch_test_and_clear_bit(unsigned int nr, volatile unsigned long *p) + unsigned long mask = BIT_MASK(nr); + + p += BIT_WORD(nr); +- if (!(READ_ONCE(*p) & mask)) +- return 0; +- + old = arch_atomic_long_fetch_andnot(mask, (atomic_long_t *)p); + return !!(old & mask); + } +diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h +index 0d132ee2a2913..835f959a25f25 100644 +--- a/include/asm-generic/bitops/find.h ++++ b/include/asm-generic/bitops/find.h +@@ -97,6 +97,7 @@ unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, + + #ifdef CONFIG_GENERIC_FIND_FIRST_BIT + ++#ifndef find_first_bit + /** + * find_first_bit - find the first set bit in a memory region + * @addr: The address to start the search at +@@ -116,7 +117,9 @@ unsigned long find_first_bit(const unsigned long *addr, unsigned long size) + + return _find_first_bit(addr, size); + } ++#endif + ++#ifndef find_first_zero_bit + /** + * find_first_zero_bit - find the first cleared bit in a memory region + * @addr: The address to start the search at +@@ -136,6 +139,8 @@ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) + + return _find_first_zero_bit(addr, size); + } ++#endif ++ + #else /* CONFIG_GENERIC_FIND_FIRST_BIT */ + + #ifndef find_first_bit +diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h +index 7ce93aaf69f8d..98954dda57344 100644 +--- a/include/asm-generic/io.h ++++ b/include/asm-generic/io.h +@@ -1125,9 +1125,7 @@ static inline void memcpy_toio(volatile void __iomem *addr, const void *buffer, + } + #endif + +-#ifndef CONFIG_GENERIC_DEVMEM_IS_ALLOWED + extern int devmem_is_allowed(unsigned long pfn); +-#endif + + #endif /* __KERNEL__ */ + +diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h +index d16302d3eb597..72f1e2a8c1670 100644 +--- a/include/asm-generic/sections.h ++++ b/include/asm-generic/sections.h +@@ -114,7 +114,7 @@ static inline bool memory_contains(void *begin, void *end, void *virt, + /** + * memory_intersects - checks if the region occupied by an object intersects + * with another memory region +- * @begin: virtual address of the beginning of the memory regien ++ * @begin: virtual address of the beginning of the memory region + * @end: virtual address of the end of the memory region + * @virt: virtual address of the memory object + * @size: size of the memory object +@@ -127,7 +127,10 @@ static inline bool memory_intersects(void *begin, void *end, void *virt, + { + void *vend = virt + size; + +- return (virt >= begin && virt < end) || (vend >= begin && vend < end); ++ if (virt < end && vend > begin) ++ return true; ++ ++ return false; + } + + /** +diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h +index 2c68a545ffa7d..c99710b3027a0 100644 +--- a/include/asm-generic/tlb.h ++++ b/include/asm-generic/tlb.h +@@ -207,12 +207,16 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table); + #define tlb_needs_table_invalidate() (true) + #endif + ++void tlb_remove_table_sync_one(void); ++ + #else + + #ifdef tlb_needs_table_invalidate + #error tlb_needs_table_invalidate() requires MMU_GATHER_RCU_TABLE_FREE + #endif + ++static inline void tlb_remove_table_sync_one(void) { } ++ + #endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */ + + +@@ -565,10 +569,14 @@ static inline void tlb_flush_p4d_range(struct mmu_gather *tlb, + #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \ + do { \ + unsigned long _sz = huge_page_size(h); \ +- if (_sz == PMD_SIZE) \ +- tlb_flush_pmd_range(tlb, address, _sz); \ +- else if (_sz == PUD_SIZE) \ ++ if (_sz >= P4D_SIZE) \ ++ tlb_flush_p4d_range(tlb, address, _sz); \ ++ else if (_sz >= PUD_SIZE) \ + tlb_flush_pud_range(tlb, address, _sz); \ ++ else if (_sz >= PMD_SIZE) \ ++ tlb_flush_pmd_range(tlb, address, _sz); \ ++ else \ ++ tlb_flush_pte_range(tlb, address, _sz); \ + __tlb_remove_tlb_entry(tlb, ptep, address); \ + } while (0) + +diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h +index f2984af2b85bd..e28792ca25a1f 100644 +--- a/include/asm-generic/vmlinux.lds.h ++++ b/include/asm-generic/vmlinux.lds.h +@@ -337,6 +337,7 @@ + #define DATA_DATA \ + *(.xiptext) \ + *(DATA_MAIN) \ ++ *(.data..decrypted) \ + *(.ref.data) \ + *(.data..shared_aligned) /* percpu related */ \ + MEM_KEEP(init.data*) \ +@@ -549,10 +550,9 @@ + */ + #ifdef CONFIG_CFI_CLANG + #define TEXT_CFI_JT \ +- . = ALIGN(PMD_SIZE); \ ++ ALIGN_FUNCTION(); \ + __cfi_jt_start = .; \ + *(.text..L.cfi.jumptable .text..L.cfi.jumptable.*) \ +- . = ALIGN(PMD_SIZE); \ + __cfi_jt_end = .; + #else + #define TEXT_CFI_JT +@@ -970,7 +970,6 @@ + #ifdef CONFIG_AMD_MEM_ENCRYPT + #define PERCPU_DECRYPTED_SECTION \ + . = ALIGN(PAGE_SIZE); \ +- *(.data..decrypted) \ + *(.data..percpu..decrypted) \ + . = ALIGN(PAGE_SIZE); + #else +diff --git a/include/crypto/blake2s.h b/include/crypto/blake2s.h +index bc3fb59442ce5..4e30e1799e614 100644 +--- a/include/crypto/blake2s.h ++++ b/include/crypto/blake2s.h +@@ -101,7 +101,4 @@ static inline void blake2s(u8 *out, const u8 *in, const u8 *key, + blake2s_final(&state, out); + } + +-void blake2s256_hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen, +- const size_t keylen); +- + #endif /* _CRYPTO_BLAKE2S_H */ +diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h +index dabaee6987186..b3ea73b819443 100644 +--- a/include/crypto/chacha.h ++++ b/include/crypto/chacha.h +@@ -47,12 +47,19 @@ static inline void hchacha_block(const u32 *state, u32 *out, int nrounds) + hchacha_block_generic(state, out, nrounds); + } + ++enum chacha_constants { /* expand 32-byte k */ ++ CHACHA_CONSTANT_EXPA = 0x61707865U, ++ CHACHA_CONSTANT_ND_3 = 0x3320646eU, ++ CHACHA_CONSTANT_2_BY = 0x79622d32U, ++ CHACHA_CONSTANT_TE_K = 0x6b206574U ++}; ++ + static inline void chacha_init_consts(u32 *state) + { +- state[0] = 0x61707865; /* "expa" */ +- state[1] = 0x3320646e; /* "nd 3" */ +- state[2] = 0x79622d32; /* "2-by" */ +- state[3] = 0x6b206574; /* "te k" */ ++ state[0] = CHACHA_CONSTANT_EXPA; ++ state[1] = CHACHA_CONSTANT_ND_3; ++ state[2] = CHACHA_CONSTANT_2_BY; ++ state[3] = CHACHA_CONSTANT_TE_K; + } + + void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv); +diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h +index c4165126937e4..a6c3b8e7deb64 100644 +--- a/include/crypto/drbg.h ++++ b/include/crypto/drbg.h +@@ -105,6 +105,12 @@ struct drbg_test_data { + struct drbg_string *testentropy; /* TEST PARAMETER: test entropy */ + }; + ++enum drbg_seed_state { ++ DRBG_SEED_STATE_UNSEEDED, ++ DRBG_SEED_STATE_PARTIAL, /* Seeded with !rng_is_initialized() */ ++ DRBG_SEED_STATE_FULL, ++}; ++ + struct drbg_state { + struct mutex drbg_mutex; /* lock around DRBG */ + unsigned char *V; /* internal state 10.1.1.1 1a) */ +@@ -127,16 +133,14 @@ struct drbg_state { + struct crypto_wait ctr_wait; /* CTR mode async wait obj */ + struct scatterlist sg_in, sg_out; /* CTR mode SGLs */ + +- bool seeded; /* DRBG fully seeded? */ ++ enum drbg_seed_state seeded; /* DRBG fully seeded? */ + bool pr; /* Prediction resistance enabled? */ + bool fips_primed; /* Continuous test primed? */ + unsigned char *prev; /* FIPS 140-2 continuous test value */ +- struct work_struct seed_work; /* asynchronous seeding support */ + struct crypto_rng *jent; + const struct drbg_state_ops *d_ops; + const struct drbg_core *core; + struct drbg_string test_data; +- struct random_ready_callback random_ready; + }; + + static inline __u8 drbg_statelen(struct drbg_state *drbg) +diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h +index 8e50d487500f2..506d56530ca93 100644 +--- a/include/crypto/internal/blake2s.h ++++ b/include/crypto/internal/blake2s.h +@@ -8,112 +8,14 @@ + #define _CRYPTO_INTERNAL_BLAKE2S_H + + #include <crypto/blake2s.h> +-#include <crypto/internal/hash.h> + #include <linux/string.h> + +-void blake2s_compress_generic(struct blake2s_state *state,const u8 *block, ++void blake2s_compress_generic(struct blake2s_state *state, const u8 *block, + size_t nblocks, const u32 inc); + +-void blake2s_compress_arch(struct blake2s_state *state,const u8 *block, +- size_t nblocks, const u32 inc); ++void blake2s_compress(struct blake2s_state *state, const u8 *block, ++ size_t nblocks, const u32 inc); + + bool blake2s_selftest(void); + +-static inline void blake2s_set_lastblock(struct blake2s_state *state) +-{ +- state->f[0] = -1; +-} +- +-typedef void (*blake2s_compress_t)(struct blake2s_state *state, +- const u8 *block, size_t nblocks, u32 inc); +- +-/* Helper functions for BLAKE2s shared by the library and shash APIs */ +- +-static inline void __blake2s_update(struct blake2s_state *state, +- const u8 *in, size_t inlen, +- blake2s_compress_t compress) +-{ +- const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen; +- +- if (unlikely(!inlen)) +- return; +- if (inlen > fill) { +- memcpy(state->buf + state->buflen, in, fill); +- (*compress)(state, state->buf, 1, BLAKE2S_BLOCK_SIZE); +- state->buflen = 0; +- in += fill; +- inlen -= fill; +- } +- if (inlen > BLAKE2S_BLOCK_SIZE) { +- const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE); +- /* Hash one less (full) block than strictly possible */ +- (*compress)(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE); +- in += BLAKE2S_BLOCK_SIZE * (nblocks - 1); +- inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1); +- } +- memcpy(state->buf + state->buflen, in, inlen); +- state->buflen += inlen; +-} +- +-static inline void __blake2s_final(struct blake2s_state *state, u8 *out, +- blake2s_compress_t compress) +-{ +- blake2s_set_lastblock(state); +- memset(state->buf + state->buflen, 0, +- BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */ +- (*compress)(state, state->buf, 1, state->buflen); +- cpu_to_le32_array(state->h, ARRAY_SIZE(state->h)); +- memcpy(out, state->h, state->outlen); +-} +- +-/* Helper functions for shash implementations of BLAKE2s */ +- +-struct blake2s_tfm_ctx { +- u8 key[BLAKE2S_KEY_SIZE]; +- unsigned int keylen; +-}; +- +-static inline int crypto_blake2s_setkey(struct crypto_shash *tfm, +- const u8 *key, unsigned int keylen) +-{ +- struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(tfm); +- +- if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE) +- return -EINVAL; +- +- memcpy(tctx->key, key, keylen); +- tctx->keylen = keylen; +- +- return 0; +-} +- +-static inline int crypto_blake2s_init(struct shash_desc *desc) +-{ +- const struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); +- struct blake2s_state *state = shash_desc_ctx(desc); +- unsigned int outlen = crypto_shash_digestsize(desc->tfm); +- +- __blake2s_init(state, outlen, tctx->key, tctx->keylen); +- return 0; +-} +- +-static inline int crypto_blake2s_update(struct shash_desc *desc, +- const u8 *in, unsigned int inlen, +- blake2s_compress_t compress) +-{ +- struct blake2s_state *state = shash_desc_ctx(desc); +- +- __blake2s_update(state, in, inlen, compress); +- return 0; +-} +- +-static inline int crypto_blake2s_final(struct shash_desc *desc, u8 *out, +- blake2s_compress_t compress) +-{ +- struct blake2s_state *state = shash_desc_ctx(desc); +- +- __blake2s_final(state, out, compress); +- return 0; +-} +- + #endif /* _CRYPTO_INTERNAL_BLAKE2S_H */ +diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h +index 46bdfa48c4134..1648ce265cba0 100644 +--- a/include/drm/drm_bridge.h ++++ b/include/drm/drm_bridge.h +@@ -914,4 +914,17 @@ struct drm_bridge *devm_drm_panel_bridge_add_typed(struct device *dev, + struct drm_connector *drm_panel_bridge_connector(struct drm_bridge *bridge); + #endif + ++#if defined(CONFIG_OF) && defined(CONFIG_DRM_PANEL_BRIDGE) ++struct drm_bridge *devm_drm_of_get_bridge(struct device *dev, struct device_node *node, ++ u32 port, u32 endpoint); ++#else ++static inline struct drm_bridge *devm_drm_of_get_bridge(struct device *dev, ++ struct device_node *node, ++ u32 port, ++ u32 endpoint) ++{ ++ return ERR_PTR(-ENODEV); ++} ++#endif ++ + #endif +diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h +index 1647960c9e506..1f43d7c6724aa 100644 +--- a/include/drm/drm_connector.h ++++ b/include/drm/drm_connector.h +@@ -566,10 +566,16 @@ struct drm_display_info { + bool rgb_quant_range_selectable; + + /** +- * @edid_hdmi_dc_modes: Mask of supported hdmi deep color modes. Even +- * more stuff redundant with @bus_formats. ++ * @edid_hdmi_rgb444_dc_modes: Mask of supported hdmi deep color modes ++ * in RGB 4:4:4. Even more stuff redundant with @bus_formats. + */ +- u8 edid_hdmi_dc_modes; ++ u8 edid_hdmi_rgb444_dc_modes; ++ ++ /** ++ * @edid_hdmi_ycbcr444_dc_modes: Mask of supported hdmi deep color ++ * modes in YCbCr 4:4:4. Even more stuff redundant with @bus_formats. ++ */ ++ u8 edid_hdmi_ycbcr444_dc_modes; + + /** + * @cea_rev: CEA revision of the HDMI sink. +diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h +index 1d5b3dbb6e563..dfb46915015b1 100644 +--- a/include/drm/drm_dp_helper.h ++++ b/include/drm/drm_dp_helper.h +@@ -455,7 +455,7 @@ struct drm_panel; + # define DP_FEC_BIT_ERROR_COUNT_CAP (1 << 3) + + /* DP-HDMI2.1 PCON DSC ENCODER SUPPORT */ +-#define DP_PCON_DSC_ENCODER_CAP_SIZE 0xC /* 0x9E - 0x92 */ ++#define DP_PCON_DSC_ENCODER_CAP_SIZE 0xD /* 0x92 through 0x9E */ + #define DP_PCON_DSC_ENCODER 0x092 + # define DP_PCON_DSC_ENCODER_SUPPORTED (1 << 0) + # define DP_PCON_DSC_PPS_ENC_OVERRIDE (1 << 1) +diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h +index deccfd39e6db2..c24559f5329dd 100644 +--- a/include/drm/drm_edid.h ++++ b/include/drm/drm_edid.h +@@ -121,7 +121,7 @@ struct detailed_data_monitor_range { + u8 supported_scalings; + u8 preferred_refresh; + } __attribute__((packed)) cvt; +- } formula; ++ } __attribute__((packed)) formula; + } __attribute__((packed)); + + struct detailed_data_wpindex { +@@ -154,7 +154,7 @@ struct detailed_non_pixel { + struct detailed_data_wpindex color; + struct std_timing timings[6]; + struct cvt_timing cvt[4]; +- } data; ++ } __attribute__((packed)) data; + } __attribute__((packed)); + + #define EDID_DETAIL_EST_TIMINGS 0xf7 +@@ -172,7 +172,7 @@ struct detailed_timing { + union { + struct detailed_pixel_timing pixel_data; + struct detailed_non_pixel other_data; +- } data; ++ } __attribute__((packed)) data; + } __attribute__((packed)); + + #define DRM_EDID_INPUT_SERRATION_VSYNC (1 << 0) +diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h +index 434328d8a0d90..311d66c9cf4b1 100644 +--- a/include/drm/drm_gem_shmem_helper.h ++++ b/include/drm/drm_gem_shmem_helper.h +@@ -107,16 +107,17 @@ struct drm_gem_shmem_object { + container_of(obj, struct drm_gem_shmem_object, base) + + struct drm_gem_shmem_object *drm_gem_shmem_create(struct drm_device *dev, size_t size); +-void drm_gem_shmem_free_object(struct drm_gem_object *obj); ++void drm_gem_shmem_free(struct drm_gem_shmem_object *shmem); + + int drm_gem_shmem_get_pages(struct drm_gem_shmem_object *shmem); + void drm_gem_shmem_put_pages(struct drm_gem_shmem_object *shmem); +-int drm_gem_shmem_pin(struct drm_gem_object *obj); +-void drm_gem_shmem_unpin(struct drm_gem_object *obj); +-int drm_gem_shmem_vmap(struct drm_gem_object *obj, struct dma_buf_map *map); +-void drm_gem_shmem_vunmap(struct drm_gem_object *obj, struct dma_buf_map *map); ++int drm_gem_shmem_pin(struct drm_gem_shmem_object *shmem); ++void drm_gem_shmem_unpin(struct drm_gem_shmem_object *shmem); ++int drm_gem_shmem_vmap(struct drm_gem_shmem_object *shmem, struct dma_buf_map *map); ++void drm_gem_shmem_vunmap(struct drm_gem_shmem_object *shmem, struct dma_buf_map *map); ++int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct *vma); + +-int drm_gem_shmem_madvise(struct drm_gem_object *obj, int madv); ++int drm_gem_shmem_madvise(struct drm_gem_shmem_object *shmem, int madv); + + static inline bool drm_gem_shmem_is_purgeable(struct drm_gem_shmem_object *shmem) + { +@@ -125,29 +126,156 @@ static inline bool drm_gem_shmem_is_purgeable(struct drm_gem_shmem_object *shmem + !shmem->base.dma_buf && !shmem->base.import_attach; + } + +-void drm_gem_shmem_purge_locked(struct drm_gem_object *obj); +-bool drm_gem_shmem_purge(struct drm_gem_object *obj); ++void drm_gem_shmem_purge_locked(struct drm_gem_shmem_object *shmem); ++bool drm_gem_shmem_purge(struct drm_gem_shmem_object *shmem); + +-struct drm_gem_shmem_object * +-drm_gem_shmem_create_with_handle(struct drm_file *file_priv, +- struct drm_device *dev, size_t size, +- uint32_t *handle); ++struct sg_table *drm_gem_shmem_get_sg_table(struct drm_gem_shmem_object *shmem); ++struct sg_table *drm_gem_shmem_get_pages_sgt(struct drm_gem_shmem_object *shmem); + +-int drm_gem_shmem_dumb_create(struct drm_file *file, struct drm_device *dev, +- struct drm_mode_create_dumb *args); ++void drm_gem_shmem_print_info(const struct drm_gem_shmem_object *shmem, ++ struct drm_printer *p, unsigned int indent); ++ ++/* ++ * GEM object functions ++ */ ++ ++/** ++ * drm_gem_shmem_object_free - GEM object function for drm_gem_shmem_free() ++ * @obj: GEM object to free ++ * ++ * This function wraps drm_gem_shmem_free(). Drivers that employ the shmem helpers ++ * should use it as their &drm_gem_object_funcs.free handler. ++ */ ++static inline void drm_gem_shmem_object_free(struct drm_gem_object *obj) ++{ ++ struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); ++ ++ drm_gem_shmem_free(shmem); ++} ++ ++/** ++ * drm_gem_shmem_object_print_info() - Print &drm_gem_shmem_object info for debugfs ++ * @p: DRM printer ++ * @indent: Tab indentation level ++ * @obj: GEM object ++ * ++ * This function wraps drm_gem_shmem_print_info(). Drivers that employ the shmem helpers should ++ * use this function as their &drm_gem_object_funcs.print_info handler. ++ */ ++static inline void drm_gem_shmem_object_print_info(struct drm_printer *p, unsigned int indent, ++ const struct drm_gem_object *obj) ++{ ++ const struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); ++ ++ drm_gem_shmem_print_info(shmem, p, indent); ++} ++ ++/** ++ * drm_gem_shmem_object_pin - GEM object function for drm_gem_shmem_pin() ++ * @obj: GEM object ++ * ++ * This function wraps drm_gem_shmem_pin(). Drivers that employ the shmem helpers should ++ * use it as their &drm_gem_object_funcs.pin handler. ++ */ ++static inline int drm_gem_shmem_object_pin(struct drm_gem_object *obj) ++{ ++ struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); ++ ++ return drm_gem_shmem_pin(shmem); ++} ++ ++/** ++ * drm_gem_shmem_object_unpin - GEM object function for drm_gem_shmem_unpin() ++ * @obj: GEM object ++ * ++ * This function wraps drm_gem_shmem_unpin(). Drivers that employ the shmem helpers should ++ * use it as their &drm_gem_object_funcs.unpin handler. ++ */ ++static inline void drm_gem_shmem_object_unpin(struct drm_gem_object *obj) ++{ ++ struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); ++ ++ drm_gem_shmem_unpin(shmem); ++} + +-int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); ++/** ++ * drm_gem_shmem_object_get_sg_table - GEM object function for drm_gem_shmem_get_sg_table() ++ * @obj: GEM object ++ * ++ * This function wraps drm_gem_shmem_get_sg_table(). Drivers that employ the shmem helpers should ++ * use it as their &drm_gem_object_funcs.get_sg_table handler. ++ * ++ * Returns: ++ * A pointer to the scatter/gather table of pinned pages or NULL on failure. ++ */ ++static inline struct sg_table *drm_gem_shmem_object_get_sg_table(struct drm_gem_object *obj) ++{ ++ struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); ++ ++ return drm_gem_shmem_get_sg_table(shmem); ++} ++ ++/* ++ * drm_gem_shmem_object_vmap - GEM object function for drm_gem_shmem_vmap() ++ * @obj: GEM object ++ * @map: Returns the kernel virtual address of the SHMEM GEM object's backing store. ++ * ++ * This function wraps drm_gem_shmem_vmap(). Drivers that employ the shmem helpers should ++ * use it as their &drm_gem_object_funcs.vmap handler. ++ * ++ * Returns: ++ * 0 on success or a negative error code on failure. ++ */ ++static inline int drm_gem_shmem_object_vmap(struct drm_gem_object *obj, struct dma_buf_map *map) ++{ ++ struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); ++ ++ return drm_gem_shmem_vmap(shmem, map); ++} ++ ++/* ++ * drm_gem_shmem_object_vunmap - GEM object function for drm_gem_shmem_vunmap() ++ * @obj: GEM object ++ * @map: Kernel virtual address where the SHMEM GEM object was mapped ++ * ++ * This function wraps drm_gem_shmem_vunmap(). Drivers that employ the shmem helpers should ++ * use it as their &drm_gem_object_funcs.vunmap handler. ++ */ ++static inline void drm_gem_shmem_object_vunmap(struct drm_gem_object *obj, struct dma_buf_map *map) ++{ ++ struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); ++ ++ drm_gem_shmem_vunmap(shmem, map); ++} ++ ++/** ++ * drm_gem_shmem_object_mmap - GEM object function for drm_gem_shmem_mmap() ++ * @obj: GEM object ++ * @vma: VMA for the area to be mapped ++ * ++ * This function wraps drm_gem_shmem_mmap(). Drivers that employ the shmem helpers should ++ * use it as their &drm_gem_object_funcs.mmap handler. ++ * ++ * Returns: ++ * 0 on success or a negative error code on failure. ++ */ ++static inline int drm_gem_shmem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) ++{ ++ struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); ++ ++ return drm_gem_shmem_mmap(shmem, vma); ++} + +-void drm_gem_shmem_print_info(struct drm_printer *p, unsigned int indent, +- const struct drm_gem_object *obj); ++/* ++ * Driver ops ++ */ + +-struct sg_table *drm_gem_shmem_get_sg_table(struct drm_gem_object *obj); + struct drm_gem_object * + drm_gem_shmem_prime_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sgt); +- +-struct sg_table *drm_gem_shmem_get_pages_sgt(struct drm_gem_object *obj); ++int drm_gem_shmem_dumb_create(struct drm_file *file, struct drm_device *dev, ++ struct drm_mode_create_dumb *args); + + /** + * DRM_GEM_SHMEM_DRIVER_OPS - Default shmem GEM operations +diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h +index f681bbdbc6982..36f7eb9d06639 100644 +--- a/include/drm/ttm/ttm_bo_api.h ++++ b/include/drm/ttm/ttm_bo_api.h +@@ -594,8 +594,7 @@ vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, + + vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, + pgprot_t prot, +- pgoff_t num_prefault, +- pgoff_t fault_page_size); ++ pgoff_t num_prefault); + + vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf); + +diff --git a/include/dt-bindings/clock/imx8mn-clock.h b/include/dt-bindings/clock/imx8mn-clock.h +index 01e8bab1d767a..1aa462e5cafd4 100644 +--- a/include/dt-bindings/clock/imx8mn-clock.h ++++ b/include/dt-bindings/clock/imx8mn-clock.h +@@ -19,7 +19,8 @@ + #define IMX8MN_VIDEO_PLL1_REF_SEL 10 + #define IMX8MN_DRAM_PLL_REF_SEL 11 + #define IMX8MN_GPU_PLL_REF_SEL 12 +-#define IMX8MN_VPU_PLL_REF_SEL 13 ++#define IMX8MN_M7_ALT_PLL_REF_SEL 13 ++#define IMX8MN_VPU_PLL_REF_SEL IMX8MN_M7_ALT_PLL_REF_SEL + #define IMX8MN_ARM_PLL_REF_SEL 14 + #define IMX8MN_SYS_PLL1_REF_SEL 15 + #define IMX8MN_SYS_PLL2_REF_SEL 16 +@@ -29,7 +30,8 @@ + #define IMX8MN_VIDEO_PLL1 20 + #define IMX8MN_DRAM_PLL 21 + #define IMX8MN_GPU_PLL 22 +-#define IMX8MN_VPU_PLL 23 ++#define IMX8MN_M7_ALT_PLL 23 ++#define IMX8MN_VPU_PLL IMX8MN_M7_ALT_PLL + #define IMX8MN_ARM_PLL 24 + #define IMX8MN_SYS_PLL1 25 + #define IMX8MN_SYS_PLL2 26 +@@ -39,7 +41,8 @@ + #define IMX8MN_VIDEO_PLL1_BYPASS 30 + #define IMX8MN_DRAM_PLL_BYPASS 31 + #define IMX8MN_GPU_PLL_BYPASS 32 +-#define IMX8MN_VPU_PLL_BYPASS 33 ++#define IMX8MN_M7_ALT_PLL_BYPASS 33 ++#define IMX8MN_VPU_PLL_BYPASS IMX8MN_M7_ALT_PLL_BYPASS + #define IMX8MN_ARM_PLL_BYPASS 34 + #define IMX8MN_SYS_PLL1_BYPASS 35 + #define IMX8MN_SYS_PLL2_BYPASS 36 +@@ -49,7 +52,8 @@ + #define IMX8MN_VIDEO_PLL1_OUT 40 + #define IMX8MN_DRAM_PLL_OUT 41 + #define IMX8MN_GPU_PLL_OUT 42 +-#define IMX8MN_VPU_PLL_OUT 43 ++#define IMX8MN_M7_ALT_PLL_OUT 43 ++#define IMX8MN_VPU_PLL_OUT IMX8MN_M7_ALT_PLL_OUT + #define IMX8MN_ARM_PLL_OUT 44 + #define IMX8MN_SYS_PLL1_OUT 45 + #define IMX8MN_SYS_PLL2_OUT 46 +diff --git a/include/dt-bindings/clock/qcom,gcc-msm8939.h b/include/dt-bindings/clock/qcom,gcc-msm8939.h +index 0634467c4ce5a..2d545ed0d35ab 100644 +--- a/include/dt-bindings/clock/qcom,gcc-msm8939.h ++++ b/include/dt-bindings/clock/qcom,gcc-msm8939.h +@@ -192,6 +192,7 @@ + #define GCC_VENUS0_CORE0_VCODEC0_CLK 183 + #define GCC_VENUS0_CORE1_VCODEC0_CLK 184 + #define GCC_OXILI_TIMER_CLK 185 ++#define SYSTEM_MM_NOC_BFDCD_CLK_SRC 186 + + /* Indexes for GDSCs */ + #define BIMC_GDSC 0 +diff --git a/include/linux/acpi.h b/include/linux/acpi.h +index 974d497a897dc..2d7df5cea2494 100644 +--- a/include/linux/acpi.h ++++ b/include/linux/acpi.h +@@ -976,6 +976,15 @@ static inline int acpi_get_local_address(acpi_handle handle, u32 *addr) + return -ENODEV; + } + ++static inline int acpi_register_wakeup_handler(int wake_irq, ++ bool (*wakeup)(void *context), void *context) ++{ ++ return -ENXIO; ++} ++ ++static inline void acpi_unregister_wakeup_handler( ++ bool (*wakeup)(void *context), void *context) { } ++ + #endif /* !CONFIG_ACPI */ + + #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC +@@ -996,7 +1005,15 @@ void acpi_os_set_prepare_extended_sleep(int (*func)(u8 sleep_state, + + acpi_status acpi_os_prepare_extended_sleep(u8 sleep_state, + u32 val_a, u32 val_b); +- ++#ifdef CONFIG_X86 ++struct acpi_s2idle_dev_ops { ++ struct list_head list_node; ++ void (*prepare)(void); ++ void (*restore)(void); ++}; ++int acpi_register_lps0_dev(struct acpi_s2idle_dev_ops *arg); ++void acpi_unregister_lps0_dev(struct acpi_s2idle_dev_ops *arg); ++#endif /* CONFIG_X86 */ + #ifndef CONFIG_IA64 + void arch_reserve_mem_area(acpi_physical_address addr, size_t size); + #else +diff --git a/include/linux/acpi_viot.h b/include/linux/acpi_viot.h +index 1eb8ee5b0e5fe..a5a1224315637 100644 +--- a/include/linux/acpi_viot.h ++++ b/include/linux/acpi_viot.h +@@ -6,9 +6,11 @@ + #include <linux/acpi.h> + + #ifdef CONFIG_ACPI_VIOT ++void __init acpi_viot_early_init(void); + void __init acpi_viot_init(void); + int viot_iommu_configure(struct device *dev); + #else ++static inline void acpi_viot_early_init(void) {} + static inline void acpi_viot_init(void) {} + static inline int viot_iommu_configure(struct device *dev) + { +diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h +index 63ccb52521902..220c8c60e021a 100644 +--- a/include/linux/arm-smccc.h ++++ b/include/linux/arm-smccc.h +@@ -92,6 +92,11 @@ + ARM_SMCCC_SMC_32, \ + 0, 0x7fff) + ++#define ARM_SMCCC_ARCH_WORKAROUND_3 \ ++ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ ++ ARM_SMCCC_SMC_32, \ ++ 0, 0x3fff) ++ + #define ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ +diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h +index 0a241c5c911d8..14dc461b0e829 100644 +--- a/include/linux/arm_sdei.h ++++ b/include/linux/arm_sdei.h +@@ -46,9 +46,11 @@ int sdei_unregister_ghes(struct ghes *ghes); + /* For use by arch code when CPU hotplug notifiers are not appropriate. */ + int sdei_mask_local_cpu(void); + int sdei_unmask_local_cpu(void); ++void __init sdei_init(void); + #else + static inline int sdei_mask_local_cpu(void) { return 0; } + static inline int sdei_unmask_local_cpu(void) { return 0; } ++static inline void sdei_init(void) { } + #endif /* CONFIG_ARM_SDE_INTERFACE */ + + +diff --git a/include/linux/ata.h b/include/linux/ata.h +index 1b44f40c7700b..3b1ad57d0e017 100644 +--- a/include/linux/ata.h ++++ b/include/linux/ata.h +@@ -565,6 +565,18 @@ struct ata_bmdma_prd { + ((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \ + ((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \ + ((id)[ATA_ID_FEATURE_SUPP] & (1 << 2))) ++#define ata_id_has_devslp(id) \ ++ ((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \ ++ ((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \ ++ ((id)[ATA_ID_FEATURE_SUPP] & (1 << 8))) ++#define ata_id_has_ncq_autosense(id) \ ++ ((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \ ++ ((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \ ++ ((id)[ATA_ID_FEATURE_SUPP] & (1 << 7))) ++#define ata_id_has_dipm(id) \ ++ ((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \ ++ ((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \ ++ ((id)[ATA_ID_FEATURE_SUPP] & (1 << 3))) + #define ata_id_iordy_disable(id) ((id)[ATA_ID_CAPABILITY] & (1 << 10)) + #define ata_id_has_iordy(id) ((id)[ATA_ID_CAPABILITY] & (1 << 11)) + #define ata_id_u32(id,n) \ +@@ -577,9 +589,6 @@ struct ata_bmdma_prd { + + #define ata_id_cdb_intr(id) (((id)[ATA_ID_CONFIG] & 0x60) == 0x20) + #define ata_id_has_da(id) ((id)[ATA_ID_SATA_CAPABILITY_2] & (1 << 4)) +-#define ata_id_has_devslp(id) ((id)[ATA_ID_FEATURE_SUPP] & (1 << 8)) +-#define ata_id_has_ncq_autosense(id) \ +- ((id)[ATA_ID_FEATURE_SUPP] & (1 << 7)) + + static inline bool ata_id_has_hipm(const u16 *id) + { +@@ -591,17 +600,6 @@ static inline bool ata_id_has_hipm(const u16 *id) + return val & (1 << 9); + } + +-static inline bool ata_id_has_dipm(const u16 *id) +-{ +- u16 val = id[ATA_ID_FEATURE_SUPP]; +- +- if (val == 0 || val == 0xffff) +- return false; +- +- return val & (1 << 3); +-} +- +- + static inline bool ata_id_has_fua(const u16 *id) + { + if ((id[ATA_ID_CFSSE] & 0xC000) != 0x4000) +@@ -770,16 +768,21 @@ static inline bool ata_id_has_read_log_dma_ext(const u16 *id) + + static inline bool ata_id_has_sense_reporting(const u16 *id) + { +- if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15))) ++ if (!(id[ATA_ID_CFS_ENABLE_2] & BIT(15))) ++ return false; ++ if ((id[ATA_ID_COMMAND_SET_3] & (BIT(15) | BIT(14))) != BIT(14)) + return false; +- return id[ATA_ID_COMMAND_SET_3] & (1 << 6); ++ return id[ATA_ID_COMMAND_SET_3] & BIT(6); + } + + static inline bool ata_id_sense_reporting_enabled(const u16 *id) + { +- if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15))) ++ if (!ata_id_has_sense_reporting(id)) ++ return false; ++ /* ata_id_has_sense_reporting() == true, word 86 must have bit 15 set */ ++ if ((id[ATA_ID_COMMAND_SET_4] & (BIT(15) | BIT(14))) != BIT(14)) + return false; +- return id[ATA_ID_COMMAND_SET_4] & (1 << 6); ++ return id[ATA_ID_COMMAND_SET_4] & BIT(6); + } + + /** +diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h +index a3dba31df01e9..6db58d1808665 100644 +--- a/include/linux/atomic/atomic-arch-fallback.h ++++ b/include/linux/atomic/atomic-arch-fallback.h +@@ -151,7 +151,16 @@ + static __always_inline int + arch_atomic_read_acquire(const atomic_t *v) + { +- return smp_load_acquire(&(v)->counter); ++ int ret; ++ ++ if (__native_word(atomic_t)) { ++ ret = smp_load_acquire(&(v)->counter); ++ } else { ++ ret = arch_atomic_read(v); ++ __atomic_acquire_fence(); ++ } ++ ++ return ret; + } + #define arch_atomic_read_acquire arch_atomic_read_acquire + #endif +@@ -160,7 +169,12 @@ arch_atomic_read_acquire(const atomic_t *v) + static __always_inline void + arch_atomic_set_release(atomic_t *v, int i) + { +- smp_store_release(&(v)->counter, i); ++ if (__native_word(atomic_t)) { ++ smp_store_release(&(v)->counter, i); ++ } else { ++ __atomic_release_fence(); ++ arch_atomic_set(v, i); ++ } + } + #define arch_atomic_set_release arch_atomic_set_release + #endif +@@ -1258,7 +1272,16 @@ arch_atomic_dec_if_positive(atomic_t *v) + static __always_inline s64 + arch_atomic64_read_acquire(const atomic64_t *v) + { +- return smp_load_acquire(&(v)->counter); ++ s64 ret; ++ ++ if (__native_word(atomic64_t)) { ++ ret = smp_load_acquire(&(v)->counter); ++ } else { ++ ret = arch_atomic64_read(v); ++ __atomic_acquire_fence(); ++ } ++ ++ return ret; + } + #define arch_atomic64_read_acquire arch_atomic64_read_acquire + #endif +@@ -1267,7 +1290,12 @@ arch_atomic64_read_acquire(const atomic64_t *v) + static __always_inline void + arch_atomic64_set_release(atomic64_t *v, s64 i) + { +- smp_store_release(&(v)->counter, i); ++ if (__native_word(atomic64_t)) { ++ smp_store_release(&(v)->counter, i); ++ } else { ++ __atomic_release_fence(); ++ arch_atomic64_set(v, i); ++ } + } + #define arch_atomic64_set_release arch_atomic64_set_release + #endif +@@ -2358,4 +2386,4 @@ arch_atomic64_dec_if_positive(atomic64_t *v) + #endif + + #endif /* _LINUX_ATOMIC_FALLBACK_H */ +-// cca554917d7ea73d5e3e7397dd70c484cad9b2c4 ++// 8e2cc06bc0d2c0967d2f8424762bd48555ee40ae +diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h +index ac7f231b88258..eed9a98eae0d0 100644 +--- a/include/linux/backing-dev.h ++++ b/include/linux/backing-dev.h +@@ -121,6 +121,8 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); + + extern struct backing_dev_info noop_backing_dev_info; + ++int bdi_init(struct backing_dev_info *bdi); ++ + /** + * writeback_in_progress - determine whether there is writeback in progress + * @wb: bdi_writeback of interest +diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h +index 049cf9421d831..f821b72433613 100644 +--- a/include/linux/binfmts.h ++++ b/include/linux/binfmts.h +@@ -87,6 +87,9 @@ struct coredump_params { + loff_t written; + loff_t pos; + loff_t to_skip; ++ int vma_count; ++ size_t vma_data_size; ++ struct core_vma_metadata *vma_meta; + }; + + /* +diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h +index 4e035aca6f7e6..6093fa6db2600 100644 +--- a/include/linux/bitfield.h ++++ b/include/linux/bitfield.h +@@ -41,6 +41,22 @@ + + #define __bf_shf(x) (__builtin_ffsll(x) - 1) + ++#define __scalar_type_to_unsigned_cases(type) \ ++ unsigned type: (unsigned type)0, \ ++ signed type: (unsigned type)0 ++ ++#define __unsigned_scalar_typeof(x) typeof( \ ++ _Generic((x), \ ++ char: (unsigned char)0, \ ++ __scalar_type_to_unsigned_cases(char), \ ++ __scalar_type_to_unsigned_cases(short), \ ++ __scalar_type_to_unsigned_cases(int), \ ++ __scalar_type_to_unsigned_cases(long), \ ++ __scalar_type_to_unsigned_cases(long long), \ ++ default: (x))) ++ ++#define __bf_cast_unsigned(type, x) ((__unsigned_scalar_typeof(type))(x)) ++ + #define __BF_FIELD_CHECK(_mask, _reg, _val, _pfx) \ + ({ \ + BUILD_BUG_ON_MSG(!__builtin_constant_p(_mask), \ +@@ -49,7 +65,8 @@ + BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ? \ + ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \ + _pfx "value too large for the field"); \ +- BUILD_BUG_ON_MSG((_mask) > (typeof(_reg))~0ull, \ ++ BUILD_BUG_ON_MSG(__bf_cast_unsigned(_mask, _mask) > \ ++ __bf_cast_unsigned(_reg, ~0ull), \ + _pfx "type of reg too small for mask"); \ + __BUILD_BUG_ON_NOT_POWER_OF_2((_mask) + \ + (1ULL << __bf_shf(_mask))); \ +diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h +index b4de2010fba55..bc5c04d711bbc 100644 +--- a/include/linux/blk-cgroup.h ++++ b/include/linux/blk-cgroup.h +@@ -24,6 +24,7 @@ + #include <linux/atomic.h> + #include <linux/kthread.h> + #include <linux/fs.h> ++#include <linux/blk-mq.h> + + /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */ + #define BLKG_STAT_CPU_BATCH (INT_MAX / 2) +@@ -604,6 +605,21 @@ static inline void blkcg_clear_delay(struct blkcg_gq *blkg) + atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); + } + ++/** ++ * blk_cgroup_mergeable - Determine whether to allow or disallow merges ++ * @rq: request to merge into ++ * @bio: bio to merge ++ * ++ * @bio and @rq should belong to the same cgroup and their issue_as_root should ++ * match. The latter is necessary as we don't want to throttle e.g. a metadata ++ * update because it happens to be next to a regular IO. ++ */ ++static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) ++{ ++ return rq->bio->bi_blkg == bio->bi_blkg && ++ bio_issue_as_root_blkg(rq->bio) == bio_issue_as_root_blkg(bio); ++} ++ + void blk_cgroup_bio_start(struct bio *bio); + void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta); + void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay); +@@ -659,6 +675,7 @@ static inline void blkg_put(struct blkcg_gq *blkg) { } + static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; } + static inline void blkcg_bio_issue_init(struct bio *bio) { } + static inline void blk_cgroup_bio_start(struct bio *bio) { } ++static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { return true; } + + #define blk_queue_for_each_rl(rl, q) \ + for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) +diff --git a/include/linux/blk-pm.h b/include/linux/blk-pm.h +index b80c65aba2493..2580e05a8ab67 100644 +--- a/include/linux/blk-pm.h ++++ b/include/linux/blk-pm.h +@@ -14,7 +14,7 @@ extern void blk_pm_runtime_init(struct request_queue *q, struct device *dev); + extern int blk_pre_runtime_suspend(struct request_queue *q); + extern void blk_post_runtime_suspend(struct request_queue *q, int err); + extern void blk_pre_runtime_resume(struct request_queue *q); +-extern void blk_post_runtime_resume(struct request_queue *q, int err); ++extern void blk_post_runtime_resume(struct request_queue *q); + extern void blk_set_runtime_active(struct request_queue *q); + #else + static inline void blk_pm_runtime_init(struct request_queue *q, +diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h +index be622b5a21ed5..36ce3d0fb9f3b 100644 +--- a/include/linux/blk_types.h ++++ b/include/linux/blk_types.h +@@ -215,9 +215,8 @@ static inline void bio_issue_init(struct bio_issue *issue, + struct bio { + struct bio *bi_next; /* request queue link */ + struct block_device *bi_bdev; +- unsigned int bi_opf; /* bottom bits req flags, +- * top bits REQ_OP. Use +- * accessors. ++ unsigned int bi_opf; /* bottom bits REQ_OP, top bits ++ * req_flags. + */ + unsigned short bi_flags; /* BIO_* below */ + unsigned short bi_ioprio; +@@ -295,7 +294,8 @@ enum { + BIO_TRACE_COMPLETION, /* bio_endio() should trace the final completion + * of this bio. */ + BIO_CGROUP_ACCT, /* has been accounted to a cgroup */ +- BIO_TRACKED, /* set if bio goes through the rq_qos path */ ++ BIO_QOS_THROTTLED, /* bio went through rq_qos throttle path */ ++ BIO_QOS_MERGED, /* but went through rq_qos merge path */ + BIO_REMAPPED, + BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */ + BIO_PERCPU_CACHE, /* can participate in per-cpu alloc cache */ +diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h +index 12b9dbcc980ee..67344dfe07a7c 100644 +--- a/include/linux/blkdev.h ++++ b/include/linux/blkdev.h +@@ -235,6 +235,14 @@ struct request { + void *end_io_data; + }; + ++static inline int blk_validate_block_size(unsigned int bsize) ++{ ++ if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize)) ++ return -EINVAL; ++ ++ return 0; ++} ++ + static inline bool blk_op_is_passthrough(unsigned int op) + { + op &= REQ_OP_MASK; +@@ -253,8 +261,6 @@ static inline unsigned short req_get_ioprio(struct request *req) + + #include <linux/elevator.h> + +-struct blk_queue_ctx; +- + struct bio_vec; + + enum blk_eh_timer_return { +@@ -1176,7 +1182,8 @@ extern void blk_dump_rq_flags(struct request *, char *); + + bool __must_check blk_get_queue(struct request_queue *); + extern void blk_put_queue(struct request_queue *); +-extern void blk_set_queue_dying(struct request_queue *); ++ ++void blk_mark_disk_dead(struct gendisk *disk); + + #ifdef CONFIG_BLOCK + /* +@@ -1198,8 +1205,6 @@ struct blk_plug { + bool multiple_queues; + bool nowait; + }; +-#define BLK_MAX_REQUEST_COUNT 16 +-#define BLK_PLUG_FLUSH_SIZE (128 * 1024) + + struct blk_plug_cb; + typedef void (*blk_plug_cb_fn)(struct blk_plug_cb *, bool); +@@ -1382,6 +1387,17 @@ static inline unsigned int queue_max_zone_append_sectors(const struct request_qu + return min(l->max_zone_append_sectors, l->max_sectors); + } + ++static inline unsigned int ++bdev_max_zone_append_sectors(struct block_device *bdev) ++{ ++ return queue_max_zone_append_sectors(bdev_get_queue(bdev)); ++} ++ ++static inline unsigned int bdev_max_segments(struct block_device *bdev) ++{ ++ return queue_max_segments(bdev_get_queue(bdev)); ++} ++ + static inline unsigned queue_logical_block_size(const struct request_queue *q) + { + int retval = 512; +@@ -1941,6 +1957,7 @@ unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, + void disk_end_io_acct(struct gendisk *disk, unsigned int op, + unsigned long start_time); + ++void bio_start_io_acct_time(struct bio *bio, unsigned long start_time); + unsigned long bio_start_io_acct(struct bio *bio); + void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time, + struct block_device *orig_bdev); +@@ -1991,6 +2008,8 @@ int truncate_bdev_range(struct block_device *bdev, fmode_t mode, loff_t lstart, + #ifdef CONFIG_BLOCK + void invalidate_bdev(struct block_device *bdev); + int sync_blockdev(struct block_device *bdev); ++int sync_blockdev_nowait(struct block_device *bdev); ++void sync_bdevs(bool wait); + #else + static inline void invalidate_bdev(struct block_device *bdev) + { +@@ -1999,6 +2018,13 @@ static inline int sync_blockdev(struct block_device *bdev) + { + return 0; + } ++static inline int sync_blockdev_nowait(struct block_device *bdev) ++{ ++ return 0; ++} ++static inline void sync_bdevs(bool wait) ++{ ++} + #endif + int fsync_bdev(struct block_device *bdev); + +diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h +index 2746fd8042162..3536ab432b30c 100644 +--- a/include/linux/bpf-cgroup.h ++++ b/include/linux/bpf-cgroup.h +@@ -517,6 +517,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, + + #define cgroup_bpf_enabled(atype) (0) + #define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) ({ 0; }) ++#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) ({ 0; }) + #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0) + #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) + #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) +diff --git a/include/linux/bpf.h b/include/linux/bpf.h +index 3db6f6c95489e..84efd8dd139d9 100644 +--- a/include/linux/bpf.h ++++ b/include/linux/bpf.h +@@ -190,7 +190,7 @@ struct bpf_map { + atomic64_t usercnt; + struct work_struct work; + struct mutex freeze_mutex; +- u64 writecnt; /* writable mmap cnt; protected by freeze_mutex */ ++ atomic64_t writecnt; + }; + + static inline bool map_value_has_spin_lock(const struct bpf_map *map) +@@ -206,11 +206,9 @@ static inline bool map_value_has_timer(const struct bpf_map *map) + static inline void check_and_init_map_value(struct bpf_map *map, void *dst) + { + if (unlikely(map_value_has_spin_lock(map))) +- *(struct bpf_spin_lock *)(dst + map->spin_lock_off) = +- (struct bpf_spin_lock){}; ++ memset(dst + map->spin_lock_off, 0, sizeof(struct bpf_spin_lock)); + if (unlikely(map_value_has_timer(map))) +- *(struct bpf_timer *)(dst + map->timer_off) = +- (struct bpf_timer){}; ++ memset(dst + map->timer_off, 0, sizeof(struct bpf_timer)); + } + + /* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */ +@@ -221,7 +219,8 @@ static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) + if (unlikely(map_value_has_spin_lock(map))) { + s_off = map->spin_lock_off; + s_sz = sizeof(struct bpf_spin_lock); +- } else if (unlikely(map_value_has_timer(map))) { ++ } ++ if (unlikely(map_value_has_timer(map))) { + t_off = map->timer_off; + t_sz = sizeof(struct bpf_timer); + } +@@ -294,6 +293,34 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0, + + extern const struct bpf_map_ops bpf_map_offload_ops; + ++/* bpf_type_flag contains a set of flags that are applicable to the values of ++ * arg_type, ret_type and reg_type. For example, a pointer value may be null, ++ * or a memory is read-only. We classify types into two categories: base types ++ * and extended types. Extended types are base types combined with a type flag. ++ * ++ * Currently there are no more than 32 base types in arg_type, ret_type and ++ * reg_types. ++ */ ++#define BPF_BASE_TYPE_BITS 8 ++ ++enum bpf_type_flag { ++ /* PTR may be NULL. */ ++ PTR_MAYBE_NULL = BIT(0 + BPF_BASE_TYPE_BITS), ++ ++ /* MEM is read-only. When applied on bpf_arg, it indicates the arg is ++ * compatible with both mutable and immutable memory. ++ */ ++ MEM_RDONLY = BIT(1 + BPF_BASE_TYPE_BITS), ++ ++ __BPF_TYPE_LAST_FLAG = MEM_RDONLY, ++}; ++ ++/* Max number of base types. */ ++#define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS) ++ ++/* Max number of all types. */ ++#define BPF_TYPE_LIMIT (__BPF_TYPE_LAST_FLAG | (__BPF_TYPE_LAST_FLAG - 1)) ++ + /* function argument constraints */ + enum bpf_arg_type { + ARG_DONTCARE = 0, /* unused argument in helper function */ +@@ -305,13 +332,11 @@ enum bpf_arg_type { + ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */ + ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */ + ARG_PTR_TO_UNINIT_MAP_VALUE, /* pointer to valid memory used to store a map value */ +- ARG_PTR_TO_MAP_VALUE_OR_NULL, /* pointer to stack used as map value or NULL */ + + /* the following constraints used to prototype bpf_memcmp() and other + * functions that access data on eBPF program stack + */ + ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map value) */ +- ARG_PTR_TO_MEM_OR_NULL, /* pointer to valid memory or NULL */ + ARG_PTR_TO_UNINIT_MEM, /* pointer to memory does not need to be initialized, + * helper function must fill all bytes or clear + * them in error case. +@@ -321,42 +346,65 @@ enum bpf_arg_type { + ARG_CONST_SIZE_OR_ZERO, /* number of bytes accessed from memory or 0 */ + + ARG_PTR_TO_CTX, /* pointer to context */ +- ARG_PTR_TO_CTX_OR_NULL, /* pointer to context or NULL */ + ARG_ANYTHING, /* any (initialized) argument is ok */ + ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ + ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */ + ARG_PTR_TO_INT, /* pointer to int */ + ARG_PTR_TO_LONG, /* pointer to long */ + ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */ +- ARG_PTR_TO_SOCKET_OR_NULL, /* pointer to bpf_sock (fullsock) or NULL */ + ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */ + ARG_PTR_TO_ALLOC_MEM, /* pointer to dynamically allocated memory */ +- ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */ + ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */ + ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */ + ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */ + ARG_PTR_TO_FUNC, /* pointer to a bpf program function */ +- ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */ ++ ARG_PTR_TO_STACK, /* pointer to stack */ + ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */ + ARG_PTR_TO_TIMER, /* pointer to bpf_timer */ + __BPF_ARG_TYPE_MAX, ++ ++ /* Extended arg_types. */ ++ ARG_PTR_TO_MAP_VALUE_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_MAP_VALUE, ++ ARG_PTR_TO_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_MEM, ++ ARG_PTR_TO_CTX_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_CTX, ++ ARG_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_SOCKET, ++ ARG_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_ALLOC_MEM, ++ ARG_PTR_TO_STACK_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_STACK, ++ ++ /* This must be the last entry. Its purpose is to ensure the enum is ++ * wide enough to hold the higher bits reserved for bpf_type_flag. ++ */ ++ __BPF_ARG_TYPE_LIMIT = BPF_TYPE_LIMIT, + }; ++static_assert(__BPF_ARG_TYPE_MAX <= BPF_BASE_TYPE_LIMIT); + + /* type of values returned from helper functions */ + enum bpf_return_type { + RET_INTEGER, /* function returns integer */ + RET_VOID, /* function doesn't return anything */ + RET_PTR_TO_MAP_VALUE, /* returns a pointer to map elem value */ +- RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */ +- RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */ +- RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */ +- RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */ +- RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */ +- RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */ +- RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */ ++ RET_PTR_TO_SOCKET, /* returns a pointer to a socket */ ++ RET_PTR_TO_TCP_SOCK, /* returns a pointer to a tcp_sock */ ++ RET_PTR_TO_SOCK_COMMON, /* returns a pointer to a sock_common */ ++ RET_PTR_TO_ALLOC_MEM, /* returns a pointer to dynamically allocated memory */ + RET_PTR_TO_MEM_OR_BTF_ID, /* returns a pointer to a valid memory or a btf_id */ + RET_PTR_TO_BTF_ID, /* returns a pointer to a btf_id */ ++ __BPF_RET_TYPE_MAX, ++ ++ /* Extended ret_types. */ ++ RET_PTR_TO_MAP_VALUE_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_MAP_VALUE, ++ RET_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCKET, ++ RET_PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK, ++ RET_PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON, ++ RET_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_ALLOC_MEM, ++ RET_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID, ++ ++ /* This must be the last entry. Its purpose is to ensure the enum is ++ * wide enough to hold the higher bits reserved for bpf_type_flag. ++ */ ++ __BPF_RET_TYPE_LIMIT = BPF_TYPE_LIMIT, + }; ++static_assert(__BPF_RET_TYPE_MAX <= BPF_BASE_TYPE_LIMIT); + + /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs + * to in-kernel helper functions and for adjusting imm32 field in BPF_CALL +@@ -418,18 +466,15 @@ enum bpf_reg_type { + PTR_TO_CTX, /* reg points to bpf_context */ + CONST_PTR_TO_MAP, /* reg points to struct bpf_map */ + PTR_TO_MAP_VALUE, /* reg points to map element value */ +- PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */ ++ PTR_TO_MAP_KEY, /* reg points to a map element key */ + PTR_TO_STACK, /* reg == frame_pointer + offset */ + PTR_TO_PACKET_META, /* skb->data - meta_len */ + PTR_TO_PACKET, /* reg points to skb->data */ + PTR_TO_PACKET_END, /* skb->data + headlen */ + PTR_TO_FLOW_KEYS, /* reg points to bpf_flow_keys */ + PTR_TO_SOCKET, /* reg points to struct bpf_sock */ +- PTR_TO_SOCKET_OR_NULL, /* reg points to struct bpf_sock or NULL */ + PTR_TO_SOCK_COMMON, /* reg points to sock_common */ +- PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */ + PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */ +- PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */ + PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */ + PTR_TO_XDP_SOCK, /* reg points to struct xdp_sock */ + /* PTR_TO_BTF_ID points to a kernel struct that does not need +@@ -447,18 +492,25 @@ enum bpf_reg_type { + * been checked for null. Used primarily to inform the verifier + * an explicit null check is required for this struct. + */ +- PTR_TO_BTF_ID_OR_NULL, + PTR_TO_MEM, /* reg points to valid memory region */ +- PTR_TO_MEM_OR_NULL, /* reg points to valid memory region or NULL */ +- PTR_TO_RDONLY_BUF, /* reg points to a readonly buffer */ +- PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */ +- PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */ +- PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */ ++ PTR_TO_BUF, /* reg points to a read/write buffer */ + PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */ + PTR_TO_FUNC, /* reg points to a bpf program function */ +- PTR_TO_MAP_KEY, /* reg points to a map element key */ + __BPF_REG_TYPE_MAX, ++ ++ /* Extended reg_types. */ ++ PTR_TO_MAP_VALUE_OR_NULL = PTR_MAYBE_NULL | PTR_TO_MAP_VALUE, ++ PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | PTR_TO_SOCKET, ++ PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | PTR_TO_SOCK_COMMON, ++ PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | PTR_TO_TCP_SOCK, ++ PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | PTR_TO_BTF_ID, ++ ++ /* This must be the last entry. Its purpose is to ensure the enum is ++ * wide enough to hold the higher bits reserved for bpf_type_flag. ++ */ ++ __BPF_REG_TYPE_LIMIT = BPF_TYPE_LIMIT, + }; ++static_assert(__BPF_REG_TYPE_MAX <= BPF_BASE_TYPE_LIMIT); + + /* The information passed from prog-specific *_is_valid_access + * back to the verifier. +@@ -481,6 +533,12 @@ bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size) + aux->ctx_field_size = size; + } + ++static inline bool bpf_pseudo_func(const struct bpf_insn *insn) ++{ ++ return insn->code == (BPF_LD | BPF_IMM | BPF_DW) && ++ insn->src_reg == BPF_PSEUDO_FUNC; ++} ++ + struct bpf_prog_ops { + int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr, + union bpf_attr __user *uattr); +@@ -723,6 +781,7 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) + struct bpf_trampoline *bpf_trampoline_get(u64 key, + struct bpf_attach_target_info *tgt_info); + void bpf_trampoline_put(struct bpf_trampoline *tr); ++int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs); + #define BPF_DISPATCHER_INIT(_name) { \ + .mutex = __MUTEX_INITIALIZER(_name.mutex), \ + .func = &_name##_func, \ +@@ -1320,28 +1379,16 @@ extern struct mutex bpf_stats_enabled_mutex; + * kprobes, tracepoints) to prevent deadlocks on map operations as any of + * these events can happen inside a region which holds a map bucket lock + * and can deadlock on it. +- * +- * Use the preemption safe inc/dec variants on RT because migrate disable +- * is preemptible on RT and preemption in the middle of the RMW operation +- * might lead to inconsistent state. Use the raw variants for non RT +- * kernels as migrate_disable() maps to preempt_disable() so the slightly +- * more expensive save operation can be avoided. + */ + static inline void bpf_disable_instrumentation(void) + { + migrate_disable(); +- if (IS_ENABLED(CONFIG_PREEMPT_RT)) +- this_cpu_inc(bpf_prog_active); +- else +- __this_cpu_inc(bpf_prog_active); ++ this_cpu_inc(bpf_prog_active); + } + + static inline void bpf_enable_instrumentation(void) + { +- if (IS_ENABLED(CONFIG_PREEMPT_RT)) +- this_cpu_dec(bpf_prog_active); +- else +- __this_cpu_dec(bpf_prog_active); ++ this_cpu_dec(bpf_prog_active); + migrate_enable(); + } + +@@ -1387,6 +1434,7 @@ void bpf_map_put(struct bpf_map *map); + void *bpf_map_area_alloc(u64 size, int numa_node); + void *bpf_map_area_mmapable_alloc(u64 size, int numa_node); + void bpf_map_area_free(void *base); ++bool bpf_map_write_active(const struct bpf_map *map); + void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); + int generic_map_lookup_batch(struct bpf_map *map, + const union bpf_attr *attr, +@@ -1677,6 +1725,12 @@ bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog); + const struct btf_func_model * + bpf_jit_find_kfunc_model(const struct bpf_prog *prog, + const struct bpf_insn *insn); ++ ++static inline bool unprivileged_ebpf_enabled(void) ++{ ++ return !sysctl_unprivileged_bpf_disabled; ++} ++ + #else /* !CONFIG_BPF_SYSCALL */ + static inline struct bpf_prog *bpf_prog_get(u32 ufd) + { +@@ -1895,6 +1949,12 @@ bpf_jit_find_kfunc_model(const struct bpf_prog *prog, + { + return NULL; + } ++ ++static inline bool unprivileged_ebpf_enabled(void) ++{ ++ return false; ++} ++ + #endif /* CONFIG_BPF_SYSCALL */ + + void __bpf_free_used_btfs(struct bpf_prog_aux *aux, +@@ -1937,6 +1997,8 @@ void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev, + struct net_device *netdev); + bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev); + ++void unpriv_ebpf_notify(int new_state); ++ + #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) + int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr); + +@@ -1960,6 +2022,7 @@ int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); + int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); + int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags); + void sock_map_unhash(struct sock *sk); ++void sock_map_destroy(struct sock *sk); + void sock_map_close(struct sock *sk, long timeout); + #else + static inline int bpf_prog_offload_init(struct bpf_prog *prog, +diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h +index 5424124dbe365..3d04b48e502de 100644 +--- a/include/linux/bpf_verifier.h ++++ b/include/linux/bpf_verifier.h +@@ -18,6 +18,8 @@ + * that converting umax_value to int cannot overflow. + */ + #define BPF_MAX_VAR_SIZ (1 << 29) ++/* size of type_str_buf in bpf_verifier. */ ++#define TYPE_STR_BUF_LEN 64 + + /* Liveness marks, used for registers and spilled-regs (in stack slots). + * Read marks propagate upwards until they find a write mark; they record that +@@ -190,6 +192,17 @@ struct bpf_reference_state { + * is used purely to inform the user of a reference leak. + */ + int insn_idx; ++ /* There can be a case like: ++ * main (frame 0) ++ * cb (frame 1) ++ * func (frame 3) ++ * cb (frame 4) ++ * Hence for frame 4, if callback_ref just stored boolean, it would be ++ * impossible to distinguish nested callback refs. Hence store the ++ * frameno and compare that to callback_ref in check_reference_leak when ++ * exiting a callback function. ++ */ ++ int callback_ref; + }; + + /* state of the program: +@@ -315,6 +328,27 @@ struct bpf_verifier_state { + iter < frame->allocated_stack / BPF_REG_SIZE; \ + iter++, reg = bpf_get_spilled_reg(iter, frame)) + ++/* Invoke __expr over regsiters in __vst, setting __state and __reg */ ++#define bpf_for_each_reg_in_vstate(__vst, __state, __reg, __expr) \ ++ ({ \ ++ struct bpf_verifier_state *___vstate = __vst; \ ++ int ___i, ___j; \ ++ for (___i = 0; ___i <= ___vstate->curframe; ___i++) { \ ++ struct bpf_reg_state *___regs; \ ++ __state = ___vstate->frame[___i]; \ ++ ___regs = __state->regs; \ ++ for (___j = 0; ___j < MAX_BPF_REG; ___j++) { \ ++ __reg = &___regs[___j]; \ ++ (void)(__expr); \ ++ } \ ++ bpf_for_each_spilled_reg(___j, __state, __reg) { \ ++ if (!__reg) \ ++ continue; \ ++ (void)(__expr); \ ++ } \ ++ } \ ++ }) ++ + /* linked list of verifier states used to prune search */ + struct bpf_verifier_state_list { + struct bpf_verifier_state state; +@@ -396,6 +430,13 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) + log->level == BPF_LOG_KERNEL); + } + ++static inline bool ++bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log) ++{ ++ return log->len_total >= 128 && log->len_total <= UINT_MAX >> 2 && ++ log->level && log->ubuf && !(log->level & ~BPF_LOG_MASK); ++} ++ + #define BPF_MAX_SUBPROGS 256 + + struct bpf_subprog_info { +@@ -467,6 +508,8 @@ struct bpf_verifier_env { + /* longest register parentage chain walked for liveness marking */ + u32 longest_mark_read_walk; + bpfptr_t fd_array; ++ /* buffer used in reg_type_str() to generate reg_type string */ ++ char type_str_buf[TYPE_STR_BUF_LEN]; + }; + + __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log, +@@ -528,4 +571,18 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, + u32 btf_id, + struct bpf_attach_target_info *tgt_info); + ++#define BPF_BASE_TYPE_MASK GENMASK(BPF_BASE_TYPE_BITS - 1, 0) ++ ++/* extract base type from bpf_{arg, return, reg}_type. */ ++static inline u32 base_type(u32 type) ++{ ++ return type & BPF_BASE_TYPE_MASK; ++} ++ ++/* extract flags from an extended type. See bpf_type_flag in bpf.h. */ ++static inline u32 type_flag(u32 type) ++{ ++ return type & ~BPF_BASE_TYPE_MASK; ++} ++ + #endif /* _LINUX_BPF_VERIFIER_H */ +diff --git a/include/linux/bpfptr.h b/include/linux/bpfptr.h +index 546e27fc6d462..ee28d2b0a3091 100644 +--- a/include/linux/bpfptr.h ++++ b/include/linux/bpfptr.h +@@ -48,7 +48,9 @@ static inline void bpfptr_add(bpfptr_t *bpfptr, size_t val) + static inline int copy_from_bpfptr_offset(void *dst, bpfptr_t src, + size_t offset, size_t size) + { +- return copy_from_sockptr_offset(dst, (sockptr_t) src, offset, size); ++ if (!bpfptr_is_kernel(src)) ++ return copy_from_user(dst, src.user + offset, size); ++ return copy_from_kernel_nofault(dst, src.kernel + offset, size); + } + + static inline int copy_from_bpfptr(void *dst, bpfptr_t src, size_t size) +@@ -77,7 +79,9 @@ static inline void *kvmemdup_bpfptr(bpfptr_t src, size_t len) + + static inline long strncpy_from_bpfptr(char *dst, bpfptr_t src, size_t count) + { +- return strncpy_from_sockptr(dst, (sockptr_t) src, count); ++ if (bpfptr_is_kernel(src)) ++ return strncpy_from_kernel_nofault(dst, src.kernel, count); ++ return strncpy_from_user(dst, src.user, count); + } + + #endif /* _LINUX_BPFPTR_H */ +diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h +index 36f33685c8c00..25b4263d66d70 100644 +--- a/include/linux/buffer_head.h ++++ b/include/linux/buffer_head.h +@@ -117,7 +117,6 @@ static __always_inline int test_clear_buffer_##name(struct buffer_head *bh) \ + * of the form "mark_buffer_foo()". These are higher-level functions which + * do something in addition to setting a b_state bit. + */ +-BUFFER_FNS(Uptodate, uptodate) + BUFFER_FNS(Dirty, dirty) + TAS_BUFFER_FNS(Dirty, dirty) + BUFFER_FNS(Lock, locked) +@@ -135,6 +134,41 @@ BUFFER_FNS(Meta, meta) + BUFFER_FNS(Prio, prio) + BUFFER_FNS(Defer_Completion, defer_completion) + ++static __always_inline void set_buffer_uptodate(struct buffer_head *bh) ++{ ++ /* ++ * If somebody else already set this uptodate, they will ++ * have done the memory barrier, and a reader will thus ++ * see *some* valid buffer state. ++ * ++ * Any other serialization (with IO errors or whatever that ++ * might clear the bit) has to come from other state (eg BH_Lock). ++ */ ++ if (test_bit(BH_Uptodate, &bh->b_state)) ++ return; ++ ++ /* ++ * make it consistent with folio_mark_uptodate ++ * pairs with smp_load_acquire in buffer_uptodate ++ */ ++ smp_mb__before_atomic(); ++ set_bit(BH_Uptodate, &bh->b_state); ++} ++ ++static __always_inline void clear_buffer_uptodate(struct buffer_head *bh) ++{ ++ clear_bit(BH_Uptodate, &bh->b_state); ++} ++ ++static __always_inline int buffer_uptodate(const struct buffer_head *bh) ++{ ++ /* ++ * make it consistent with folio_test_uptodate ++ * pairs with smp_mb__before_atomic in set_buffer_uptodate ++ */ ++ return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0; ++} ++ + #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) + + /* If we *know* page->private refers to buffer_heads */ +diff --git a/include/linux/can/platform/sja1000.h b/include/linux/can/platform/sja1000.h +index 5755ae5a47122..6a869682c1207 100644 +--- a/include/linux/can/platform/sja1000.h ++++ b/include/linux/can/platform/sja1000.h +@@ -14,7 +14,7 @@ + #define OCR_MODE_TEST 0x01 + #define OCR_MODE_NORMAL 0x02 + #define OCR_MODE_CLOCK 0x03 +-#define OCR_MODE_MASK 0x07 ++#define OCR_MODE_MASK 0x03 + #define OCR_TX0_INVERT 0x04 + #define OCR_TX0_PULLDOWN 0x08 + #define OCR_TX0_PULLUP 0x10 +diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h +new file mode 100644 +index 0000000000000..a075b70b9a70c +--- /dev/null ++++ b/include/linux/cc_platform.h +@@ -0,0 +1,88 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++/* ++ * Confidential Computing Platform Capability checks ++ * ++ * Copyright (C) 2021 Advanced Micro Devices, Inc. ++ * ++ * Author: Tom Lendacky <thomas.lendacky@amd.com> ++ */ ++ ++#ifndef _LINUX_CC_PLATFORM_H ++#define _LINUX_CC_PLATFORM_H ++ ++#include <linux/types.h> ++#include <linux/stddef.h> ++ ++/** ++ * enum cc_attr - Confidential computing attributes ++ * ++ * These attributes represent confidential computing features that are ++ * currently active. ++ */ ++enum cc_attr { ++ /** ++ * @CC_ATTR_MEM_ENCRYPT: Memory encryption is active ++ * ++ * The platform/OS is running with active memory encryption. This ++ * includes running either as a bare-metal system or a hypervisor ++ * and actively using memory encryption or as a guest/virtual machine ++ * and actively using memory encryption. ++ * ++ * Examples include SME, SEV and SEV-ES. ++ */ ++ CC_ATTR_MEM_ENCRYPT, ++ ++ /** ++ * @CC_ATTR_HOST_MEM_ENCRYPT: Host memory encryption is active ++ * ++ * The platform/OS is running as a bare-metal system or a hypervisor ++ * and actively using memory encryption. ++ * ++ * Examples include SME. ++ */ ++ CC_ATTR_HOST_MEM_ENCRYPT, ++ ++ /** ++ * @CC_ATTR_GUEST_MEM_ENCRYPT: Guest memory encryption is active ++ * ++ * The platform/OS is running as a guest/virtual machine and actively ++ * using memory encryption. ++ * ++ * Examples include SEV and SEV-ES. ++ */ ++ CC_ATTR_GUEST_MEM_ENCRYPT, ++ ++ /** ++ * @CC_ATTR_GUEST_STATE_ENCRYPT: Guest state encryption is active ++ * ++ * The platform/OS is running as a guest/virtual machine and actively ++ * using memory encryption and register state encryption. ++ * ++ * Examples include SEV-ES. ++ */ ++ CC_ATTR_GUEST_STATE_ENCRYPT, ++}; ++ ++#ifdef CONFIG_ARCH_HAS_CC_PLATFORM ++ ++/** ++ * cc_platform_has() - Checks if the specified cc_attr attribute is active ++ * @attr: Confidential computing attribute to check ++ * ++ * The cc_platform_has() function will return an indicator as to whether the ++ * specified Confidential Computing attribute is currently active. ++ * ++ * Context: Any context ++ * Return: ++ * * TRUE - Specified Confidential Computing attribute is active ++ * * FALSE - Specified Confidential Computing attribute is not active ++ */ ++bool cc_platform_has(enum cc_attr attr); ++ ++#else /* !CONFIG_ARCH_HAS_CC_PLATFORM */ ++ ++static inline bool cc_platform_has(enum cc_attr attr) { return false; } ++ ++#endif /* CONFIG_ARCH_HAS_CC_PLATFORM */ ++ ++#endif /* _LINUX_CC_PLATFORM_H */ +diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h +index 83fa08a065071..787fff5ec7f58 100644 +--- a/include/linux/ceph/osd_client.h ++++ b/include/linux/ceph/osd_client.h +@@ -287,6 +287,9 @@ struct ceph_osd_linger_request { + rados_watcherrcb_t errcb; + void *data; + ++ struct ceph_pagelist *request_pl; ++ struct page **notify_id_pages; ++ + struct page ***preply_pages; + size_t *preply_len; + }; +diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h +index db2e147e069fe..cd8b8bd5ec4d5 100644 +--- a/include/linux/cgroup-defs.h ++++ b/include/linux/cgroup-defs.h +@@ -264,7 +264,8 @@ struct css_set { + * List of csets participating in the on-going migration either as + * source or destination. Protected by cgroup_mutex. + */ +- struct list_head mg_preload_node; ++ struct list_head mg_src_preload_node; ++ struct list_head mg_dst_preload_node; + struct list_head mg_node; + + /* +diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h +index 75c151413fda8..45cdb12243e3f 100644 +--- a/include/linux/cgroup.h ++++ b/include/linux/cgroup.h +@@ -68,6 +68,7 @@ struct css_task_iter { + struct list_head iters_node; /* css_set->task_iters */ + }; + ++extern struct file_system_type cgroup_fs_type; + extern struct cgroup_root cgrp_dfl_root; + extern struct css_set init_css_set; + +diff --git a/include/linux/clk.h b/include/linux/clk.h +index 266e8de3cb515..e280e0acb55c6 100644 +--- a/include/linux/clk.h ++++ b/include/linux/clk.h +@@ -458,6 +458,47 @@ int __must_check devm_clk_bulk_get_all(struct device *dev, + */ + struct clk *devm_clk_get(struct device *dev, const char *id); + ++/** ++ * devm_clk_get_prepared - devm_clk_get() + clk_prepare() ++ * @dev: device for clock "consumer" ++ * @id: clock consumer ID ++ * ++ * Context: May sleep. ++ * ++ * Return: a struct clk corresponding to the clock producer, or ++ * valid IS_ERR() condition containing errno. The implementation ++ * uses @dev and @id to determine the clock consumer, and thereby ++ * the clock producer. (IOW, @id may be identical strings, but ++ * clk_get may return different clock producers depending on @dev.) ++ * ++ * The returned clk (if valid) is prepared. Drivers must however assume ++ * that the clock is not enabled. ++ * ++ * The clock will automatically be unprepared and freed when the device ++ * is unbound from the bus. ++ */ ++struct clk *devm_clk_get_prepared(struct device *dev, const char *id); ++ ++/** ++ * devm_clk_get_enabled - devm_clk_get() + clk_prepare_enable() ++ * @dev: device for clock "consumer" ++ * @id: clock consumer ID ++ * ++ * Context: May sleep. ++ * ++ * Return: a struct clk corresponding to the clock producer, or ++ * valid IS_ERR() condition containing errno. The implementation ++ * uses @dev and @id to determine the clock consumer, and thereby ++ * the clock producer. (IOW, @id may be identical strings, but ++ * clk_get may return different clock producers depending on @dev.) ++ * ++ * The returned clk (if valid) is prepared and enabled. ++ * ++ * The clock will automatically be disabled, unprepared and freed ++ * when the device is unbound from the bus. ++ */ ++struct clk *devm_clk_get_enabled(struct device *dev, const char *id); ++ + /** + * devm_clk_get_optional - lookup and obtain a managed reference to an optional + * clock producer. +@@ -469,6 +510,50 @@ struct clk *devm_clk_get(struct device *dev, const char *id); + */ + struct clk *devm_clk_get_optional(struct device *dev, const char *id); + ++/** ++ * devm_clk_get_optional_prepared - devm_clk_get_optional() + clk_prepare() ++ * @dev: device for clock "consumer" ++ * @id: clock consumer ID ++ * ++ * Context: May sleep. ++ * ++ * Return: a struct clk corresponding to the clock producer, or ++ * valid IS_ERR() condition containing errno. The implementation ++ * uses @dev and @id to determine the clock consumer, and thereby ++ * the clock producer. If no such clk is found, it returns NULL ++ * which serves as a dummy clk. That's the only difference compared ++ * to devm_clk_get_prepared(). ++ * ++ * The returned clk (if valid) is prepared. Drivers must however ++ * assume that the clock is not enabled. ++ * ++ * The clock will automatically be unprepared and freed when the ++ * device is unbound from the bus. ++ */ ++struct clk *devm_clk_get_optional_prepared(struct device *dev, const char *id); ++ ++/** ++ * devm_clk_get_optional_enabled - devm_clk_get_optional() + ++ * clk_prepare_enable() ++ * @dev: device for clock "consumer" ++ * @id: clock consumer ID ++ * ++ * Context: May sleep. ++ * ++ * Return: a struct clk corresponding to the clock producer, or ++ * valid IS_ERR() condition containing errno. The implementation ++ * uses @dev and @id to determine the clock consumer, and thereby ++ * the clock producer. If no such clk is found, it returns NULL ++ * which serves as a dummy clk. That's the only difference compared ++ * to devm_clk_get_enabled(). ++ * ++ * The returned clk (if valid) is prepared and enabled. ++ * ++ * The clock will automatically be disabled, unprepared and freed ++ * when the device is unbound from the bus. ++ */ ++struct clk *devm_clk_get_optional_enabled(struct device *dev, const char *id); ++ + /** + * devm_get_clk_from_child - lookup and obtain a managed reference to a + * clock producer from child node. +@@ -813,12 +898,36 @@ static inline struct clk *devm_clk_get(struct device *dev, const char *id) + return NULL; + } + ++static inline struct clk *devm_clk_get_prepared(struct device *dev, ++ const char *id) ++{ ++ return NULL; ++} ++ ++static inline struct clk *devm_clk_get_enabled(struct device *dev, ++ const char *id) ++{ ++ return NULL; ++} ++ + static inline struct clk *devm_clk_get_optional(struct device *dev, + const char *id) + { + return NULL; + } + ++static inline struct clk *devm_clk_get_optional_prepared(struct device *dev, ++ const char *id) ++{ ++ return NULL; ++} ++ ++static inline struct clk *devm_clk_get_optional_enabled(struct device *dev, ++ const char *id) ++{ ++ return NULL; ++} ++ + static inline int __must_check devm_clk_bulk_get(struct device *dev, int num_clks, + struct clk_bulk_data *clks) + { +diff --git a/include/linux/compat.h b/include/linux/compat.h +index 1c758b0e03598..01fddf72a81f0 100644 +--- a/include/linux/compat.h ++++ b/include/linux/compat.h +@@ -235,6 +235,7 @@ typedef struct compat_siginfo { + struct { + compat_ulong_t _data; + u32 _type; ++ u32 _flags; + } _perf; + }; + } _sigfault; +diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h +index bd2b881c6b63a..b9d5f9c373a09 100644 +--- a/include/linux/compiler-gcc.h ++++ b/include/linux/compiler-gcc.h +@@ -144,3 +144,11 @@ + #else + #define __diag_GCC_8(s) + #endif ++ ++/* ++ * Prior to 9.1, -Wno-alloc-size-larger-than (and therefore the "alloc_size" ++ * attribute) do not work, and must be disabled. ++ */ ++#if GCC_VERSION < 90100 ++#undef __alloc_size__ ++#endif +diff --git a/include/linux/compiler.h b/include/linux/compiler.h +index 3d5af56337bdb..0f7fd205ab7ea 100644 +--- a/include/linux/compiler.h ++++ b/include/linux/compiler.h +@@ -117,40 +117,29 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, + */ + #define __stringify_label(n) #n + +-#define __annotate_reachable(c) ({ \ +- asm volatile(__stringify_label(c) ":\n\t" \ +- ".pushsection .discard.reachable\n\t" \ +- ".long " __stringify_label(c) "b - .\n\t" \ +- ".popsection\n\t"); \ +-}) +-#define annotate_reachable() __annotate_reachable(__COUNTER__) +- + #define __annotate_unreachable(c) ({ \ + asm volatile(__stringify_label(c) ":\n\t" \ + ".pushsection .discard.unreachable\n\t" \ + ".long " __stringify_label(c) "b - .\n\t" \ +- ".popsection\n\t"); \ ++ ".popsection\n\t" : : "i" (c)); \ + }) + #define annotate_unreachable() __annotate_unreachable(__COUNTER__) + +-#define ASM_UNREACHABLE \ +- "999:\n\t" \ +- ".pushsection .discard.unreachable\n\t" \ +- ".long 999b - .\n\t" \ ++#define ASM_REACHABLE \ ++ "998:\n\t" \ ++ ".pushsection .discard.reachable\n\t" \ ++ ".long 998b - .\n\t" \ + ".popsection\n\t" + + /* Annotate a C jump table to allow objtool to follow the code flow */ + #define __annotate_jump_table __section(".rodata..c_jump_table") + + #else +-#define annotate_reachable() + #define annotate_unreachable() ++# define ASM_REACHABLE + #define __annotate_jump_table + #endif + +-#ifndef ASM_UNREACHABLE +-# define ASM_UNREACHABLE +-#endif + #ifndef unreachable + # define unreachable() do { \ + annotate_unreachable(); \ +diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h +index e6ec634039658..3de06a8fae73b 100644 +--- a/include/linux/compiler_attributes.h ++++ b/include/linux/compiler_attributes.h +@@ -33,6 +33,15 @@ + #define __aligned(x) __attribute__((__aligned__(x))) + #define __aligned_largest __attribute__((__aligned__)) + ++/* ++ * Note: do not use this directly. Instead, use __alloc_size() since it is conditionally ++ * available and includes other attributes. ++ * ++ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-alloc_005fsize-function-attribute ++ * clang: https://clang.llvm.org/docs/AttributeReference.html#alloc-size ++ */ ++#define __alloc_size__(x, ...) __attribute__((__alloc_size__(x, ## __VA_ARGS__))) ++ + /* + * Note: users of __always_inline currently do not write "inline" themselves, + * which seems to be required by gcc to apply the attribute according +@@ -153,6 +162,7 @@ + + /* + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-malloc-function-attribute ++ * clang: https://clang.llvm.org/docs/AttributeReference.html#malloc + */ + #define __malloc __attribute__((__malloc__)) + +diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h +index b6ff83a714ca9..4f2203c4a2574 100644 +--- a/include/linux/compiler_types.h ++++ b/include/linux/compiler_types.h +@@ -250,6 +250,18 @@ struct ftrace_likely_data { + # define __cficanonical + #endif + ++/* ++ * Any place that could be marked with the "alloc_size" attribute is also ++ * a place to be marked with the "malloc" attribute. Do this as part of the ++ * __alloc_size macro to avoid redundant attributes and to avoid missing a ++ * __malloc marking. ++ */ ++#ifdef __alloc_size__ ++# define __alloc_size(x, ...) __alloc_size__(x, ## __VA_ARGS__) __malloc ++#else ++# define __alloc_size(x, ...) __malloc ++#endif ++ + #ifndef asm_volatile_goto + #define asm_volatile_goto(x...) asm goto(x) + #endif +diff --git a/include/linux/console.h b/include/linux/console.h +index 20874db50bc8a..a97f277cfdfa3 100644 +--- a/include/linux/console.h ++++ b/include/linux/console.h +@@ -149,6 +149,8 @@ struct console { + short flags; + short index; + int cflag; ++ uint ispeed; ++ uint ospeed; + void *data; + struct console *next; + }; +diff --git a/include/linux/coredump.h b/include/linux/coredump.h +index 78fcd776b185a..4b95e46d215f1 100644 +--- a/include/linux/coredump.h ++++ b/include/linux/coredump.h +@@ -12,6 +12,8 @@ struct core_vma_metadata { + unsigned long start, end; + unsigned long flags; + unsigned long dump_size; ++ unsigned long pgoff; ++ struct file *file; + }; + + extern int core_uses_pid; +@@ -29,9 +31,6 @@ extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr); + extern int dump_align(struct coredump_params *cprm, int align); + int dump_user_range(struct coredump_params *cprm, unsigned long start, + unsigned long len); +-int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count, +- struct core_vma_metadata **vma_meta, +- size_t *vma_data_size_ptr); + extern void do_coredump(const kernel_siginfo_t *siginfo); + #else + static inline void do_coredump(const kernel_siginfo_t *siginfo) {} +diff --git a/include/linux/cpu.h b/include/linux/cpu.h +index 9cf51e41e6972..6102a21a01d9a 100644 +--- a/include/linux/cpu.h ++++ b/include/linux/cpu.h +@@ -65,6 +65,11 @@ extern ssize_t cpu_show_tsx_async_abort(struct device *dev, + extern ssize_t cpu_show_itlb_multihit(struct device *dev, + struct device_attribute *attr, char *buf); + extern ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *buf); ++extern ssize_t cpu_show_mmio_stale_data(struct device *dev, ++ struct device_attribute *attr, ++ char *buf); ++extern ssize_t cpu_show_retbleed(struct device *dev, ++ struct device_attribute *attr, char *buf); + + extern __printf(4, 5) + struct device *cpu_device_create(struct device *parent, void *drvdata, +diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h +index ff88bb3e44fca..025391be1b199 100644 +--- a/include/linux/cpufreq.h ++++ b/include/linux/cpufreq.h +@@ -643,6 +643,11 @@ struct gov_attr_set { + /* sysfs ops for cpufreq governors */ + extern const struct sysfs_ops governor_sysfs_ops; + ++static inline struct gov_attr_set *to_gov_attr_set(struct kobject *kobj) ++{ ++ return container_of(kobj, struct gov_attr_set, kobj); ++} ++ + void gov_attr_set_init(struct gov_attr_set *attr_set, struct list_head *list_node); + void gov_attr_set_get(struct gov_attr_set *attr_set, struct list_head *list_node); + unsigned int gov_attr_set_put(struct gov_attr_set *attr_set, struct list_head *list_node); +@@ -1041,7 +1046,7 @@ static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_ + if (cpu == pcpu) + continue; + +- ret = parse_perf_domain(pcpu, list_name, cell_name); ++ ret = parse_perf_domain(cpu, list_name, cell_name); + if (ret < 0) + continue; + +diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h +index 991911048857a..c88ccc48877d6 100644 +--- a/include/linux/cpuhotplug.h ++++ b/include/linux/cpuhotplug.h +@@ -99,6 +99,7 @@ enum cpuhp_state { + CPUHP_LUSTRE_CFS_DEAD, + CPUHP_AP_ARM_CACHE_B15_RAC_DEAD, + CPUHP_PADATA_DEAD, ++ CPUHP_RANDOM_PREPARE, + CPUHP_WORKQUEUE_PREP, + CPUHP_POWER_NUMA_PREPARE, + CPUHP_HRTIMERS_PREPARE, +@@ -238,6 +239,7 @@ enum cpuhp_state { + CPUHP_AP_PERF_CSKY_ONLINE, + CPUHP_AP_WATCHDOG_ONLINE, + CPUHP_AP_WORKQUEUE_ONLINE, ++ CPUHP_AP_RANDOM_ONLINE, + CPUHP_AP_RCUTREE_ONLINE, + CPUHP_AP_BASE_CACHEINFO_ONLINE, + CPUHP_AP_ONLINE_DYN, +diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h +index 1e7399fc69c0a..b3c230dea0713 100644 +--- a/include/linux/cpumask.h ++++ b/include/linux/cpumask.h +@@ -1045,4 +1045,23 @@ cpumap_print_list_to_buf(char *buf, const struct cpumask *mask, + [0] = 1UL \ + } } + ++/* ++ * Provide a valid theoretical max size for cpumap and cpulist sysfs files ++ * to avoid breaking userspace which may allocate a buffer based on the size ++ * reported by e.g. fstat. ++ * ++ * for cpumap NR_CPUS * 9/32 - 1 should be an exact length. ++ * ++ * For cpulist 7 is (ceil(log10(NR_CPUS)) + 1) allowing for NR_CPUS to be up ++ * to 2 orders of magnitude larger than 8192. And then we divide by 2 to ++ * cover a worst-case of every other cpu being on one of two nodes for a ++ * very large NR_CPUS. ++ * ++ * Use PAGE_SIZE as a minimum for smaller configurations while avoiding ++ * unsigned comparison to -1. ++ */ ++#define CPUMAP_FILE_MAX_BYTES (((NR_CPUS * 9)/32 > PAGE_SIZE) \ ++ ? (NR_CPUS * 9)/32 - 1 : PAGE_SIZE) ++#define CPULIST_FILE_MAX_BYTES (((NR_CPUS * 7)/2 > PAGE_SIZE) ? (NR_CPUS * 7)/2 : PAGE_SIZE) ++ + #endif /* __LINUX_CPUMASK_H */ +diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h +index c869f1e73d755..ea2d919fd9c79 100644 +--- a/include/linux/debugfs.h ++++ b/include/linux/debugfs.h +@@ -45,7 +45,7 @@ struct debugfs_u32_array { + + extern struct dentry *arch_debugfs_dir; + +-#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \ ++#define DEFINE_DEBUGFS_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, __is_signed) \ + static int __fops ## _open(struct inode *inode, struct file *file) \ + { \ + __simple_attr_check_format(__fmt, 0ull); \ +@@ -56,10 +56,16 @@ static const struct file_operations __fops = { \ + .open = __fops ## _open, \ + .release = simple_attr_release, \ + .read = debugfs_attr_read, \ +- .write = debugfs_attr_write, \ ++ .write = (__is_signed) ? debugfs_attr_write_signed : debugfs_attr_write, \ + .llseek = no_llseek, \ + } + ++#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \ ++ DEFINE_DEBUGFS_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, false) ++ ++#define DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(__fops, __get, __set, __fmt) \ ++ DEFINE_DEBUGFS_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, true) ++ + typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, void *); + + #if defined(CONFIG_DEBUG_FS) +@@ -91,6 +97,8 @@ struct dentry *debugfs_create_automount(const char *name, + void debugfs_remove(struct dentry *dentry); + #define debugfs_remove_recursive debugfs_remove + ++void debugfs_lookup_and_remove(const char *name, struct dentry *parent); ++ + const struct file_operations *debugfs_real_fops(const struct file *filp); + + int debugfs_file_get(struct dentry *dentry); +@@ -100,6 +108,8 @@ ssize_t debugfs_attr_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos); + ssize_t debugfs_attr_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos); ++ssize_t debugfs_attr_write_signed(struct file *file, const char __user *buf, ++ size_t len, loff_t *ppos); + + struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, + struct dentry *new_dir, const char *new_name); +@@ -225,6 +235,10 @@ static inline void debugfs_remove(struct dentry *dentry) + static inline void debugfs_remove_recursive(struct dentry *dentry) + { } + ++static inline void debugfs_lookup_and_remove(const char *name, ++ struct dentry *parent) ++{ } ++ + const struct file_operations *debugfs_real_fops(const struct file *filp); + + static inline int debugfs_file_get(struct dentry *dentry) +@@ -248,6 +262,13 @@ static inline ssize_t debugfs_attr_write(struct file *file, + return -ENODEV; + } + ++static inline ssize_t debugfs_attr_write_signed(struct file *file, ++ const char __user *buf, ++ size_t len, loff_t *ppos) ++{ ++ return -ENODEV; ++} ++ + static inline struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, + struct dentry *new_dir, char *new_name) + { +diff --git a/include/linux/delay.h b/include/linux/delay.h +index 1d0e2ce6b6d9f..e8607992c68a5 100644 +--- a/include/linux/delay.h ++++ b/include/linux/delay.h +@@ -20,6 +20,7 @@ + */ + + #include <linux/kernel.h> ++#include <linux/sched.h> + + extern unsigned long loops_per_jiffy; + +@@ -58,7 +59,18 @@ void calibrate_delay(void); + void __attribute__((weak)) calibration_delay_done(void); + void msleep(unsigned int msecs); + unsigned long msleep_interruptible(unsigned int msecs); +-void usleep_range(unsigned long min, unsigned long max); ++void usleep_range_state(unsigned long min, unsigned long max, ++ unsigned int state); ++ ++static inline void usleep_range(unsigned long min, unsigned long max) ++{ ++ usleep_range_state(min, max, TASK_UNINTERRUPTIBLE); ++} ++ ++static inline void usleep_idle_range(unsigned long min, unsigned long max) ++{ ++ usleep_range_state(min, max, TASK_IDLE); ++} + + static inline void ssleep(unsigned int seconds) + { +diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h +index 142474b4af963..d94b9ed9443e5 100644 +--- a/include/linux/devfreq.h ++++ b/include/linux/devfreq.h +@@ -149,8 +149,8 @@ struct devfreq_stats { + * @work: delayed work for load monitoring. + * @previous_freq: previously configured frequency value. + * @last_status: devfreq user device info, performance statistics +- * @data: Private data of the governor. The devfreq framework does not +- * touch this. ++ * @data: devfreq driver pass to governors, governor should not change it. ++ * @governor_data: private data for governors, devfreq core doesn't touch it. + * @user_min_freq_req: PM QoS minimum frequency request from user (via sysfs) + * @user_max_freq_req: PM QoS maximum frequency request from user (via sysfs) + * @scaling_min_freq: Limit minimum frequency requested by OPP interface +@@ -187,7 +187,8 @@ struct devfreq { + unsigned long previous_freq; + struct devfreq_dev_status last_status; + +- void *data; /* private data for governors */ ++ void *data; ++ void *governor_data; + + struct dev_pm_qos_request user_min_freq_req; + struct dev_pm_qos_request user_max_freq_req; +diff --git a/include/linux/dim.h b/include/linux/dim.h +index b698266d00356..6c5733981563e 100644 +--- a/include/linux/dim.h ++++ b/include/linux/dim.h +@@ -21,7 +21,7 @@ + * We consider 10% difference as significant. + */ + #define IS_SIGNIFICANT_DIFF(val, ref) \ +- (((100UL * abs((val) - (ref))) / (ref)) > 10) ++ ((ref) && (((100UL * abs((val) - (ref))) / (ref)) > 10)) + + /* + * Calculate the gap between two values. +diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h +index 8b32b4bdd5908..3ad636a13b8e9 100644 +--- a/include/linux/dma-buf.h ++++ b/include/linux/dma-buf.h +@@ -433,7 +433,7 @@ struct dma_buf { + wait_queue_head_t *poll; + + __poll_t active; +- } cb_excl, cb_shared; ++ } cb_in, cb_out; + #ifdef CONFIG_DMABUF_SYSFS_STATS + /** + * @sysfs_entry: +diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h +index e5c2c9e71bf10..9000f3ffce8b3 100644 +--- a/include/linux/dmaengine.h ++++ b/include/linux/dmaengine.h +@@ -944,10 +944,8 @@ struct dma_device { + void (*device_issue_pending)(struct dma_chan *chan); + void (*device_release)(struct dma_device *dev); + /* debugfs support */ +-#ifdef CONFIG_DEBUG_FS + void (*dbg_summary_show)(struct seq_file *s, struct dma_device *dev); + struct dentry *dbg_dev_root; +-#endif + }; + + static inline int dmaengine_slave_config(struct dma_chan *chan, +diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h +index 8ae999f587c48..289064b51fa9a 100644 +--- a/include/linux/dsa/ocelot.h ++++ b/include/linux/dsa/ocelot.h +@@ -12,6 +12,7 @@ + struct ocelot_skb_cb { + struct sk_buff *clone; + unsigned int ptp_class; /* valid only for clones */ ++ u32 tstamp_lo; + u8 ptp_cmd; + u8 ts_id; + }; +diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h +index dce631e678dd6..8d9eec5f6d8bb 100644 +--- a/include/linux/dynamic_debug.h ++++ b/include/linux/dynamic_debug.h +@@ -55,9 +55,6 @@ struct _ddebug { + + #if defined(CONFIG_DYNAMIC_DEBUG_CORE) + +-/* exported for module authors to exercise >control */ +-int dynamic_debug_exec_queries(const char *query, const char *modname); +- + int ddebug_add_module(struct _ddebug *tab, unsigned int n, + const char *modname); + extern int ddebug_remove_module(const char *mod_name); +@@ -201,7 +198,7 @@ static inline int ddebug_remove_module(const char *mod) + static inline int ddebug_dyndbg_module_param_cb(char *param, char *val, + const char *modname) + { +- if (strstr(param, "dyndbg")) { ++ if (!strcmp(param, "dyndbg")) { + /* avoid pr_warn(), which wants pr_fmt() fully defined */ + printk(KERN_WARNING "dyndbg param is supported only in " + "CONFIG_DYNAMIC_DEBUG builds\n"); +@@ -221,12 +218,6 @@ static inline int ddebug_dyndbg_module_param_cb(char *param, char *val, + rowsize, groupsize, buf, len, ascii); \ + } while (0) + +-static inline int dynamic_debug_exec_queries(const char *query, const char *modname) +-{ +- pr_warn("kernel not built with CONFIG_DYNAMIC_DEBUG_CORE\n"); +- return 0; +-} +- + #endif /* !CONFIG_DYNAMIC_DEBUG_CORE */ + + #endif +diff --git a/include/linux/efi.h b/include/linux/efi.h +index 6b5d36babfcc4..5598fc348c69a 100644 +--- a/include/linux/efi.h ++++ b/include/linux/efi.h +@@ -167,6 +167,8 @@ struct capsule_info { + size_t page_bytes_remain; + }; + ++int efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff, ++ size_t hdr_bytes); + int __efi_capsule_setup_info(struct capsule_info *cap_info); + + /* +@@ -1112,8 +1114,6 @@ void efi_check_for_embedded_firmwares(void); + static inline void efi_check_for_embedded_firmwares(void) { } + #endif + +-efi_status_t efi_random_get_seed(void); +- + /* + * Arch code can implement the following three template macros, avoiding + * reptition for the void/non-void return cases of {__,}efi_call_virt(): +@@ -1163,7 +1163,7 @@ efi_status_t efi_random_get_seed(void); + arch_efi_call_virt_teardown(); \ + }) + +-#define EFI_RANDOM_SEED_SIZE 64U ++#define EFI_RANDOM_SEED_SIZE 32U // BLAKE2S_HASH_SIZE + + struct linux_efi_random_seed { + u32 size; +@@ -1282,4 +1282,10 @@ static inline struct efi_mokvar_table_entry *efi_mokvar_entry_find( + } + #endif + ++#ifdef CONFIG_SYSFB ++extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt); ++#else ++static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt) { } ++#endif ++ + #endif /* _LINUX_EFI_H */ +diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h +index c58d504514854..7f28fa702bb72 100644 +--- a/include/linux/etherdevice.h ++++ b/include/linux/etherdevice.h +@@ -127,7 +127,7 @@ static inline bool is_multicast_ether_addr(const u8 *addr) + #endif + } + +-static inline bool is_multicast_ether_addr_64bits(const u8 addr[6+2]) ++static inline bool is_multicast_ether_addr_64bits(const u8 *addr) + { + #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 + #ifdef __BIG_ENDIAN +@@ -364,8 +364,7 @@ static inline bool ether_addr_equal(const u8 *addr1, const u8 *addr2) + * Please note that alignment of addr1 & addr2 are only guaranteed to be 16 bits. + */ + +-static inline bool ether_addr_equal_64bits(const u8 addr1[6+2], +- const u8 addr2[6+2]) ++static inline bool ether_addr_equal_64bits(const u8 *addr1, const u8 *addr2) + { + #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 + u64 fold = (*(const u64 *)addr1) ^ (*(const u64 *)addr2); +diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h +index 849524b55d89a..3fad741df53ef 100644 +--- a/include/linux/ethtool.h ++++ b/include/linux/ethtool.h +@@ -94,7 +94,7 @@ struct ethtool_link_ext_state_info { + enum ethtool_link_ext_substate_link_logical_mismatch link_logical_mismatch; + enum ethtool_link_ext_substate_bad_signal_integrity bad_signal_integrity; + enum ethtool_link_ext_substate_cable_issue cable_issue; +- u8 __link_ext_substate; ++ u32 __link_ext_substate; + }; + }; + +diff --git a/include/linux/ethtool_netlink.h b/include/linux/ethtool_netlink.h +index 1e7bf78cb3829..aba348d58ff61 100644 +--- a/include/linux/ethtool_netlink.h ++++ b/include/linux/ethtool_netlink.h +@@ -10,6 +10,9 @@ + #define __ETHTOOL_LINK_MODE_MASK_NWORDS \ + DIV_ROUND_UP(__ETHTOOL_LINK_MODE_MASK_NBITS, 32) + ++#define ETHTOOL_PAUSE_STAT_CNT (__ETHTOOL_A_PAUSE_STAT_CNT - \ ++ ETHTOOL_A_PAUSE_STAT_TX_FRAMES) ++ + enum ethtool_multicast_groups { + ETHNL_MCGRP_MONITOR, + }; +diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h +index 305d5f19093b9..36a486505b081 100644 +--- a/include/linux/eventfd.h ++++ b/include/linux/eventfd.h +@@ -40,13 +40,14 @@ struct file *eventfd_fget(int fd); + struct eventfd_ctx *eventfd_ctx_fdget(int fd); + struct eventfd_ctx *eventfd_ctx_fileget(struct file *file); + __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n); ++__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, unsigned mask); + int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait, + __u64 *cnt); + void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt); + + static inline bool eventfd_signal_allowed(void) + { +- return !current->in_eventfd_signal; ++ return !current->in_eventfd; + } + + #else /* CONFIG_EVENTFD */ +@@ -61,7 +62,13 @@ static inline struct eventfd_ctx *eventfd_ctx_fdget(int fd) + return ERR_PTR(-ENOSYS); + } + +-static inline int eventfd_signal(struct eventfd_ctx *ctx, int n) ++static inline int eventfd_signal(struct eventfd_ctx *ctx, __u64 n) ++{ ++ return -ENOSYS; ++} ++ ++static inline int eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, ++ unsigned mask) + { + return -ENOSYS; + } +diff --git a/include/linux/export.h b/include/linux/export.h +index 27d848712b90b..5910ccb66ca2d 100644 +--- a/include/linux/export.h ++++ b/include/linux/export.h +@@ -2,6 +2,8 @@ + #ifndef _LINUX_EXPORT_H + #define _LINUX_EXPORT_H + ++#include <linux/stringify.h> ++ + /* + * Export symbols from the kernel to modules. Forked from module.h + * to reduce the amount of pointless cruft we feed to gcc when only +@@ -154,7 +156,6 @@ struct kernel_symbol { + #endif /* CONFIG_MODULES */ + + #ifdef DEFAULT_SYMBOL_NAMESPACE +-#include <linux/stringify.h> + #define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, __stringify(DEFAULT_SYMBOL_NAMESPACE)) + #else + #define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, "") +@@ -162,8 +163,8 @@ struct kernel_symbol { + + #define EXPORT_SYMBOL(sym) _EXPORT_SYMBOL(sym, "") + #define EXPORT_SYMBOL_GPL(sym) _EXPORT_SYMBOL(sym, "_gpl") +-#define EXPORT_SYMBOL_NS(sym, ns) __EXPORT_SYMBOL(sym, "", #ns) +-#define EXPORT_SYMBOL_NS_GPL(sym, ns) __EXPORT_SYMBOL(sym, "_gpl", #ns) ++#define EXPORT_SYMBOL_NS(sym, ns) __EXPORT_SYMBOL(sym, "", __stringify(ns)) ++#define EXPORT_SYMBOL_NS_GPL(sym, ns) __EXPORT_SYMBOL(sym, "_gpl", __stringify(ns)) + + #endif /* !__ASSEMBLY__ */ + +diff --git a/include/linux/extcon.h b/include/linux/extcon.h +index 0c19010da77fa..685401d94d398 100644 +--- a/include/linux/extcon.h ++++ b/include/linux/extcon.h +@@ -296,7 +296,7 @@ static inline void devm_extcon_unregister_notifier_all(struct device *dev, + + static inline struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name) + { +- return ERR_PTR(-ENODEV); ++ return NULL; + } + + static inline struct extcon_dev *extcon_find_edev_by_node(struct device_node *node) +diff --git a/include/linux/fb.h b/include/linux/fb.h +index 5950f8f5dc74d..3d7306c9a7065 100644 +--- a/include/linux/fb.h ++++ b/include/linux/fb.h +@@ -502,6 +502,7 @@ struct fb_info { + } *apertures; + + bool skip_vt_switch; /* no VT switch on suspend/resume required */ ++ bool forced_out; /* set when being removed by another driver */ + }; + + static inline struct apertures_struct *alloc_apertures(unsigned int max_num) { +@@ -610,6 +611,7 @@ extern int remove_conflicting_pci_framebuffers(struct pci_dev *pdev, + const char *name); + extern int remove_conflicting_framebuffers(struct apertures_struct *a, + const char *name, bool primary); ++extern bool is_firmware_framebuffer(struct apertures_struct *a); + extern int fb_prepare_logo(struct fb_info *fb_info, int rotate); + extern int fb_show_logo(struct fb_info *fb_info, int rotate); + extern char* fb_get_buffer_offset(struct fb_info *info, struct fb_pixmap *buf, u32 size); +diff --git a/include/linux/fbcon.h b/include/linux/fbcon.h +index ff5596dd30f85..2382dec6d6ab8 100644 +--- a/include/linux/fbcon.h ++++ b/include/linux/fbcon.h +@@ -15,6 +15,8 @@ void fbcon_new_modelist(struct fb_info *info); + void fbcon_get_requirement(struct fb_info *info, + struct fb_blit_caps *caps); + void fbcon_fb_blanked(struct fb_info *info, int blank); ++int fbcon_modechange_possible(struct fb_info *info, ++ struct fb_var_screeninfo *var); + void fbcon_update_vcs(struct fb_info *info, bool all); + void fbcon_remap_all(struct fb_info *info); + int fbcon_set_con2fb_map_ioctl(void __user *argp); +@@ -33,6 +35,8 @@ static inline void fbcon_new_modelist(struct fb_info *info) {} + static inline void fbcon_get_requirement(struct fb_info *info, + struct fb_blit_caps *caps) {} + static inline void fbcon_fb_blanked(struct fb_info *info, int blank) {} ++static inline int fbcon_modechange_possible(struct fb_info *info, ++ struct fb_var_screeninfo *var) { return 0; } + static inline void fbcon_update_vcs(struct fb_info *info, bool all) {} + static inline void fbcon_remap_all(struct fb_info *info) {} + static inline int fbcon_set_con2fb_map_ioctl(void __user *argp) { return 0; } +diff --git a/include/linux/filter.h b/include/linux/filter.h +index ef03ff34234d8..a9956b681f090 100644 +--- a/include/linux/filter.h ++++ b/include/linux/filter.h +@@ -554,9 +554,9 @@ struct bpf_binary_header { + }; + + struct bpf_prog_stats { +- u64 cnt; +- u64 nsecs; +- u64 misses; ++ u64_stats_t cnt; ++ u64_stats_t nsecs; ++ u64_stats_t misses; + struct u64_stats_sync syncp; + } __aligned(2 * sizeof(u64)); + +@@ -613,13 +613,14 @@ static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog, + if (static_branch_unlikely(&bpf_stats_enabled_key)) { + struct bpf_prog_stats *stats; + u64 start = sched_clock(); ++ unsigned long flags; + + ret = dfunc(ctx, prog->insnsi, prog->bpf_func); + stats = this_cpu_ptr(prog->stats); +- u64_stats_update_begin(&stats->syncp); +- stats->cnt++; +- stats->nsecs += sched_clock() - start; +- u64_stats_update_end(&stats->syncp); ++ flags = u64_stats_update_begin_irqsave(&stats->syncp); ++ u64_stats_inc(&stats->cnt); ++ u64_stats_add(&stats->nsecs, sched_clock() - start); ++ u64_stats_update_end_irqrestore(&stats->syncp, flags); + } else { + ret = dfunc(ctx, prog->insnsi, prog->bpf_func); + } +@@ -638,9 +639,6 @@ static __always_inline u32 bpf_prog_run(const struct bpf_prog *prog, const void + * This uses migrate_disable/enable() explicitly to document that the + * invocation of a BPF program does not require reentrancy protection + * against a BPF program which is invoked from a preempting task. +- * +- * For non RT enabled kernels migrate_disable/enable() maps to +- * preempt_disable/enable(), i.e. it disables also preemption. + */ + static inline u32 bpf_prog_run_pin_on_cpu(const struct bpf_prog *prog, + const void *ctx) +diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h +index c1be37437e778..0c70febd03e95 100644 +--- a/include/linux/fortify-string.h ++++ b/include/linux/fortify-string.h +@@ -280,7 +280,10 @@ __FORTIFY_INLINE char *strcpy(char *p, const char *q) + if (p_size == (size_t)-1 && q_size == (size_t)-1) + return __underlying_strcpy(p, q); + size = strlen(q) + 1; +- /* test here to use the more stringent object size */ ++ /* Compile-time check for const size overflow. */ ++ if (__builtin_constant_p(size) && p_size < size) ++ __write_overflow(); ++ /* Run-time check for dynamic size overflow. */ + if (p_size < size) + fortify_panic(__func__); + memcpy(p, q, size); +diff --git a/include/linux/fs.h b/include/linux/fs.h +index e7a633353fd20..1e1ac116dd136 100644 +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -41,6 +41,7 @@ + #include <linux/stddef.h> + #include <linux/mount.h> + #include <linux/cred.h> ++#include <linux/mnt_idmapping.h> + + #include <asm/byteorder.h> + #include <uapi/linux/fs.h> +@@ -1194,6 +1195,7 @@ extern int locks_delete_block(struct file_lock *); + extern int vfs_test_lock(struct file *, struct file_lock *); + extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); + extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); ++bool vfs_inode_has_locks(struct inode *inode); + extern int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl); + extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type); + extern void lease_get_mtime(struct inode *, struct timespec64 *time); +@@ -1306,6 +1308,11 @@ static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl) + return 0; + } + ++static inline bool vfs_inode_has_locks(struct inode *inode) ++{ ++ return false; ++} ++ + static inline int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl) + { + return -ENOLCK; +@@ -1486,7 +1493,7 @@ struct super_block { + const struct xattr_handler **s_xattr; + #ifdef CONFIG_FS_ENCRYPTION + const struct fscrypt_operations *s_cop; +- struct key *s_master_keys; /* master crypto keys in use */ ++ struct fscrypt_keyring *s_master_keys; /* master crypto keys in use */ + #endif + #ifdef CONFIG_FS_VERITY + const struct fsverity_operations *s_vop; +@@ -1601,6 +1608,11 @@ struct super_block { + struct list_head s_inodes_wb; /* writeback inodes */ + } __randomize_layout; + ++static inline struct user_namespace *i_user_ns(const struct inode *inode) ++{ ++ return inode->i_sb->s_user_ns; ++} ++ + /* Helper functions so that in most cases filesystems will + * not need to deal directly with kuid_t and kgid_t and can + * instead deal with the raw numeric values that are stored +@@ -1608,50 +1620,22 @@ struct super_block { + */ + static inline uid_t i_uid_read(const struct inode *inode) + { +- return from_kuid(inode->i_sb->s_user_ns, inode->i_uid); ++ return from_kuid(i_user_ns(inode), inode->i_uid); + } + + static inline gid_t i_gid_read(const struct inode *inode) + { +- return from_kgid(inode->i_sb->s_user_ns, inode->i_gid); ++ return from_kgid(i_user_ns(inode), inode->i_gid); + } + + static inline void i_uid_write(struct inode *inode, uid_t uid) + { +- inode->i_uid = make_kuid(inode->i_sb->s_user_ns, uid); ++ inode->i_uid = make_kuid(i_user_ns(inode), uid); + } + + static inline void i_gid_write(struct inode *inode, gid_t gid) + { +- inode->i_gid = make_kgid(inode->i_sb->s_user_ns, gid); +-} +- +-/** +- * kuid_into_mnt - map a kuid down into a mnt_userns +- * @mnt_userns: user namespace of the relevant mount +- * @kuid: kuid to be mapped +- * +- * Return: @kuid mapped according to @mnt_userns. +- * If @kuid has no mapping INVALID_UID is returned. +- */ +-static inline kuid_t kuid_into_mnt(struct user_namespace *mnt_userns, +- kuid_t kuid) +-{ +- return make_kuid(mnt_userns, __kuid_val(kuid)); +-} +- +-/** +- * kgid_into_mnt - map a kgid down into a mnt_userns +- * @mnt_userns: user namespace of the relevant mount +- * @kgid: kgid to be mapped +- * +- * Return: @kgid mapped according to @mnt_userns. +- * If @kgid has no mapping INVALID_GID is returned. +- */ +-static inline kgid_t kgid_into_mnt(struct user_namespace *mnt_userns, +- kgid_t kgid) +-{ +- return make_kgid(mnt_userns, __kgid_val(kgid)); ++ inode->i_gid = make_kgid(i_user_ns(inode), gid); + } + + /** +@@ -1665,7 +1649,7 @@ static inline kgid_t kgid_into_mnt(struct user_namespace *mnt_userns, + static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns, + const struct inode *inode) + { +- return kuid_into_mnt(mnt_userns, inode->i_uid); ++ return mapped_kuid_fs(mnt_userns, i_user_ns(inode), inode->i_uid); + } + + /** +@@ -1679,69 +1663,7 @@ static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns, + static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns, + const struct inode *inode) + { +- return kgid_into_mnt(mnt_userns, inode->i_gid); +-} +- +-/** +- * kuid_from_mnt - map a kuid up into a mnt_userns +- * @mnt_userns: user namespace of the relevant mount +- * @kuid: kuid to be mapped +- * +- * Return: @kuid mapped up according to @mnt_userns. +- * If @kuid has no mapping INVALID_UID is returned. +- */ +-static inline kuid_t kuid_from_mnt(struct user_namespace *mnt_userns, +- kuid_t kuid) +-{ +- return KUIDT_INIT(from_kuid(mnt_userns, kuid)); +-} +- +-/** +- * kgid_from_mnt - map a kgid up into a mnt_userns +- * @mnt_userns: user namespace of the relevant mount +- * @kgid: kgid to be mapped +- * +- * Return: @kgid mapped up according to @mnt_userns. +- * If @kgid has no mapping INVALID_GID is returned. +- */ +-static inline kgid_t kgid_from_mnt(struct user_namespace *mnt_userns, +- kgid_t kgid) +-{ +- return KGIDT_INIT(from_kgid(mnt_userns, kgid)); +-} +- +-/** +- * mapped_fsuid - return caller's fsuid mapped up into a mnt_userns +- * @mnt_userns: user namespace of the relevant mount +- * +- * Use this helper to initialize a new vfs or filesystem object based on +- * the caller's fsuid. A common example is initializing the i_uid field of +- * a newly allocated inode triggered by a creation event such as mkdir or +- * O_CREAT. Other examples include the allocation of quotas for a specific +- * user. +- * +- * Return: the caller's current fsuid mapped up according to @mnt_userns. +- */ +-static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns) +-{ +- return kuid_from_mnt(mnt_userns, current_fsuid()); +-} +- +-/** +- * mapped_fsgid - return caller's fsgid mapped up into a mnt_userns +- * @mnt_userns: user namespace of the relevant mount +- * +- * Use this helper to initialize a new vfs or filesystem object based on +- * the caller's fsgid. A common example is initializing the i_gid field of +- * a newly allocated inode triggered by a creation event such as mkdir or +- * O_CREAT. Other examples include the allocation of quotas for a specific +- * user. +- * +- * Return: the caller's current fsgid mapped up according to @mnt_userns. +- */ +-static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns) +-{ +- return kgid_from_mnt(mnt_userns, current_fsgid()); ++ return mapped_kgid_fs(mnt_userns, i_user_ns(inode), inode->i_gid); + } + + /** +@@ -1755,7 +1677,7 @@ static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns) + static inline void inode_fsuid_set(struct inode *inode, + struct user_namespace *mnt_userns) + { +- inode->i_uid = mapped_fsuid(mnt_userns); ++ inode->i_uid = mapped_fsuid(mnt_userns, i_user_ns(inode)); + } + + /** +@@ -1769,7 +1691,7 @@ static inline void inode_fsuid_set(struct inode *inode, + static inline void inode_fsgid_set(struct inode *inode, + struct user_namespace *mnt_userns) + { +- inode->i_gid = mapped_fsgid(mnt_userns); ++ inode->i_gid = mapped_fsgid(mnt_userns, i_user_ns(inode)); + } + + /** +@@ -1786,10 +1708,18 @@ static inline void inode_fsgid_set(struct inode *inode, + static inline bool fsuidgid_has_mapping(struct super_block *sb, + struct user_namespace *mnt_userns) + { +- struct user_namespace *s_user_ns = sb->s_user_ns; ++ struct user_namespace *fs_userns = sb->s_user_ns; ++ kuid_t kuid; ++ kgid_t kgid; + +- return kuid_has_mapping(s_user_ns, mapped_fsuid(mnt_userns)) && +- kgid_has_mapping(s_user_ns, mapped_fsgid(mnt_userns)); ++ kuid = mapped_fsuid(mnt_userns, fs_userns); ++ if (!uid_valid(kuid)) ++ return false; ++ kgid = mapped_fsgid(mnt_userns, fs_userns); ++ if (!gid_valid(kgid)) ++ return false; ++ return kuid_has_mapping(fs_userns, kuid) && ++ kgid_has_mapping(fs_userns, kgid); + } + + extern struct timespec64 current_time(struct inode *inode); +@@ -2066,6 +1996,14 @@ struct dir_context { + */ + #define REMAP_FILE_ADVISORY (REMAP_FILE_CAN_SHORTEN) + ++/* ++ * These flags control the behavior of vfs_copy_file_range(). ++ * They are not available to the user via syscall. ++ * ++ * COPY_FILE_SPLICE: call splice direct instead of fs clone/copy ops ++ */ ++#define COPY_FILE_SPLICE (1 << 0) ++ + struct iov_iter; + + struct file_operations { +@@ -2364,13 +2302,14 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, + * don't have to write inode on fdatasync() when only + * e.g. the timestamps have changed. + * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean. +- * I_DIRTY_TIME The inode itself only has dirty timestamps, and the ++ * I_DIRTY_TIME The inode itself has dirty timestamps, and the + * lazytime mount option is enabled. We keep track of this + * separately from I_DIRTY_SYNC in order to implement + * lazytime. This gets cleared if I_DIRTY_INODE +- * (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. I.e. +- * either I_DIRTY_TIME *or* I_DIRTY_INODE can be set in +- * i_state, but not both. I_DIRTY_PAGES may still be set. ++ * (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. But ++ * I_DIRTY_TIME can still be set if I_DIRTY_SYNC is already ++ * in place because writeback might already be in progress ++ * and we don't want to lose the time update + * I_NEW Serves as both a mutex and completion notification. + * New inodes set I_NEW. If two processes both create + * the same inode, one of them will release its inode and +@@ -2498,6 +2437,8 @@ enum file_time_flags { + + extern bool atime_needs_update(const struct path *, struct inode *); + extern void touch_atime(const struct path *); ++int inode_update_time(struct inode *inode, struct timespec64 *time, int flags); ++ + static inline void file_accessed(struct file *file) + { + if (!(file->f_flags & O_NOATIME)) +@@ -2724,6 +2665,21 @@ static inline struct user_namespace *file_mnt_user_ns(struct file *file) + { + return mnt_user_ns(file->f_path.mnt); + } ++ ++/** ++ * is_idmapped_mnt - check whether a mount is mapped ++ * @mnt: the mount to check ++ * ++ * If @mnt has an idmapping attached different from the ++ * filesystem's idmapping then @mnt is mapped. ++ * ++ * Return: true if mount is mapped, false if not. ++ */ ++static inline bool is_idmapped_mnt(const struct vfsmount *mnt) ++{ ++ return mnt_user_ns(mnt) != mnt->mnt_sb->s_user_ns; ++} ++ + extern long vfs_truncate(const struct path *, loff_t); + int do_truncate(struct user_namespace *, struct dentry *, loff_t start, + unsigned int time_attrs, struct file *filp); +@@ -3557,7 +3513,7 @@ void simple_transaction_set(struct file *file, size_t n); + * All attributes contain a text representation of a numeric value + * that are accessed with the get() and set() functions. + */ +-#define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \ ++#define DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, __is_signed) \ + static int __fops ## _open(struct inode *inode, struct file *file) \ + { \ + __simple_attr_check_format(__fmt, 0ull); \ +@@ -3568,10 +3524,16 @@ static const struct file_operations __fops = { \ + .open = __fops ## _open, \ + .release = simple_attr_release, \ + .read = simple_attr_read, \ +- .write = simple_attr_write, \ ++ .write = (__is_signed) ? simple_attr_write_signed : simple_attr_write, \ + .llseek = generic_file_llseek, \ + } + ++#define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \ ++ DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, false) ++ ++#define DEFINE_SIMPLE_ATTRIBUTE_SIGNED(__fops, __get, __set, __fmt) \ ++ DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, true) ++ + static inline __printf(1, 2) + void __simple_attr_check_format(const char *fmt, ...) + { +@@ -3586,6 +3548,8 @@ ssize_t simple_attr_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos); + ssize_t simple_attr_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos); ++ssize_t simple_attr_write_signed(struct file *file, const char __user *buf, ++ size_t len, loff_t *ppos); + + struct ctl_table; + int proc_nr_files(struct ctl_table *table, int write, +diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h +index 6b54982fc5f37..13fa6f3df8e46 100644 +--- a/include/linux/fs_context.h ++++ b/include/linux/fs_context.h +@@ -142,6 +142,8 @@ extern void put_fs_context(struct fs_context *fc); + extern int vfs_parse_fs_param_source(struct fs_context *fc, + struct fs_parameter *param); + extern void fc_drop_locked(struct fs_context *fc); ++int reconfigure_single(struct super_block *s, ++ int flags, void *data); + + /* + * sget() wrappers to be called from the ->get_tree() op. +diff --git a/include/linux/fscache.h b/include/linux/fscache.h +index a4dab59986137..3b2282c157f79 100644 +--- a/include/linux/fscache.h ++++ b/include/linux/fscache.h +@@ -167,7 +167,7 @@ struct fscache_cookie { + + static inline bool fscache_cookie_enabled(struct fscache_cookie *cookie) + { +- return test_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags); ++ return fscache_cookie_valid(cookie) && test_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags); + } + + /* +diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h +index e912ed9141d9d..3c7ea2cf85a58 100644 +--- a/include/linux/fscrypt.h ++++ b/include/linux/fscrypt.h +@@ -294,7 +294,7 @@ fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy) + } + + /* keyring.c */ +-void fscrypt_sb_free(struct super_block *sb); ++void fscrypt_destroy_keyring(struct super_block *sb); + int fscrypt_ioctl_add_key(struct file *filp, void __user *arg); + int fscrypt_ioctl_remove_key(struct file *filp, void __user *arg); + int fscrypt_ioctl_remove_key_all_users(struct file *filp, void __user *arg); +@@ -482,7 +482,7 @@ fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy) + } + + /* keyring.c */ +-static inline void fscrypt_sb_free(struct super_block *sb) ++static inline void fscrypt_destroy_keyring(struct super_block *sb) + { + } + +diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h +index 12d3a7d308ab9..a9477c14fad5c 100644 +--- a/include/linux/fsnotify.h ++++ b/include/linux/fsnotify.h +@@ -212,6 +212,42 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, + fsnotify_name(dir, FS_CREATE, inode, &new_dentry->d_name, 0); + } + ++/* ++ * fsnotify_delete - @dentry was unlinked and unhashed ++ * ++ * Caller must make sure that dentry->d_name is stable. ++ * ++ * Note: unlike fsnotify_unlink(), we have to pass also the unlinked inode ++ * as this may be called after d_delete() and old_dentry may be negative. ++ */ ++static inline void fsnotify_delete(struct inode *dir, struct inode *inode, ++ struct dentry *dentry) ++{ ++ __u32 mask = FS_DELETE; ++ ++ if (S_ISDIR(inode->i_mode)) ++ mask |= FS_ISDIR; ++ ++ fsnotify_name(dir, mask, inode, &dentry->d_name, 0); ++} ++ ++/** ++ * d_delete_notify - delete a dentry and call fsnotify_delete() ++ * @dentry: The dentry to delete ++ * ++ * This helper is used to guaranty that the unlinked inode cannot be found ++ * by lookup of this name after fsnotify_delete() event has been delivered. ++ */ ++static inline void d_delete_notify(struct inode *dir, struct dentry *dentry) ++{ ++ struct inode *inode = d_inode(dentry); ++ ++ ihold(inode); ++ d_delete(dentry); ++ fsnotify_delete(dir, inode, dentry); ++ iput(inode); ++} ++ + /* + * fsnotify_unlink - 'name' was unlinked + * +@@ -219,10 +255,10 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, + */ + static inline void fsnotify_unlink(struct inode *dir, struct dentry *dentry) + { +- /* Expected to be called before d_delete() */ +- WARN_ON_ONCE(d_is_negative(dentry)); ++ if (WARN_ON_ONCE(d_is_negative(dentry))) ++ return; + +- fsnotify_dirent(dir, dentry, FS_DELETE); ++ fsnotify_delete(dir, d_inode(dentry), dentry); + } + + /* +@@ -242,10 +278,10 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) + */ + static inline void fsnotify_rmdir(struct inode *dir, struct dentry *dentry) + { +- /* Expected to be called before d_delete() */ +- WARN_ON_ONCE(d_is_negative(dentry)); ++ if (WARN_ON_ONCE(d_is_negative(dentry))) ++ return; + +- fsnotify_dirent(dir, dentry, FS_DELETE | FS_ISDIR); ++ fsnotify_delete(dir, d_inode(dentry), dentry); + } + + /* +diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h +index 9f4ad719bfe3f..2d68606fb725d 100644 +--- a/include/linux/fwnode.h ++++ b/include/linux/fwnode.h +@@ -147,12 +147,12 @@ struct fwnode_operations { + int (*add_links)(struct fwnode_handle *fwnode); + }; + +-#define fwnode_has_op(fwnode, op) \ +- ((fwnode) && (fwnode)->ops && (fwnode)->ops->op) ++#define fwnode_has_op(fwnode, op) \ ++ (!IS_ERR_OR_NULL(fwnode) && (fwnode)->ops && (fwnode)->ops->op) ++ + #define fwnode_call_int_op(fwnode, op, ...) \ +- (fwnode ? (fwnode_has_op(fwnode, op) ? \ +- (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : -ENXIO) : \ +- -EINVAL) ++ (fwnode_has_op(fwnode, op) ? \ ++ (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : (IS_ERR_OR_NULL(fwnode) ? -EINVAL : -ENXIO)) + + #define fwnode_call_bool_op(fwnode, op, ...) \ + (fwnode_has_op(fwnode, op) ? \ +diff --git a/include/linux/genhd.h b/include/linux/genhd.h +index 0f5315c2b5a34..0b48a0cf42624 100644 +--- a/include/linux/genhd.h ++++ b/include/linux/genhd.h +@@ -12,12 +12,10 @@ + + #include <linux/types.h> + #include <linux/kdev_t.h> +-#include <linux/rcupdate.h> +-#include <linux/slab.h> +-#include <linux/percpu-refcount.h> + #include <linux/uuid.h> + #include <linux/blk_types.h> +-#include <asm/local.h> ++#include <linux/device.h> ++#include <linux/xarray.h> + + extern const struct device_type disk_type; + extern struct device_type part_type; +@@ -26,14 +24,6 @@ extern struct class block_class; + #define DISK_MAX_PARTS 256 + #define DISK_NAME_LEN 32 + +-#include <linux/major.h> +-#include <linux/device.h> +-#include <linux/smp.h> +-#include <linux/string.h> +-#include <linux/fs.h> +-#include <linux/workqueue.h> +-#include <linux/xarray.h> +- + #define PARTITION_META_INFO_VOLNAMELTH 64 + /* + * Enough for the string representation of any kind of UUID plus NULL. +diff --git a/include/linux/goldfish.h b/include/linux/goldfish.h +index 12be1601fd845..bcc17f95b9066 100644 +--- a/include/linux/goldfish.h ++++ b/include/linux/goldfish.h +@@ -8,14 +8,21 @@ + + /* Helpers for Goldfish virtual platform */ + ++#ifndef gf_ioread32 ++#define gf_ioread32 ioread32 ++#endif ++#ifndef gf_iowrite32 ++#define gf_iowrite32 iowrite32 ++#endif ++ + static inline void gf_write_ptr(const void *ptr, void __iomem *portl, + void __iomem *porth) + { + const unsigned long addr = (unsigned long)ptr; + +- __raw_writel(lower_32_bits(addr), portl); ++ gf_iowrite32(lower_32_bits(addr), portl); + #ifdef CONFIG_64BIT +- __raw_writel(upper_32_bits(addr), porth); ++ gf_iowrite32(upper_32_bits(addr), porth); + #endif + } + +@@ -23,9 +30,9 @@ static inline void gf_write_dma_addr(const dma_addr_t addr, + void __iomem *portl, + void __iomem *porth) + { +- __raw_writel(lower_32_bits(addr), portl); ++ gf_iowrite32(lower_32_bits(addr), portl); + #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT +- __raw_writel(upper_32_bits(addr), porth); ++ gf_iowrite32(upper_32_bits(addr), porth); + #endif + } + +diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h +index 97a28ad3393b5..369902d52f9cd 100644 +--- a/include/linux/gpio/consumer.h ++++ b/include/linux/gpio/consumer.h +@@ -8,27 +8,16 @@ + #include <linux/err.h> + + struct device; +- +-/** +- * Opaque descriptor for a GPIO. These are obtained using gpiod_get() and are +- * preferable to the old integer-based handles. +- * +- * Contrary to integers, a pointer to a gpio_desc is guaranteed to be valid +- * until the GPIO is released. +- */ + struct gpio_desc; +- +-/** +- * Opaque descriptor for a structure of GPIO array attributes. This structure +- * is attached to struct gpiod_descs obtained from gpiod_get_array() and can be +- * passed back to get/set array functions in order to activate fast processing +- * path if applicable. +- */ + struct gpio_array; + + /** +- * Struct containing an array of descriptors that can be obtained using +- * gpiod_get_array(). ++ * struct gpio_descs - Struct containing an array of descriptors that can be ++ * obtained using gpiod_get_array() ++ * ++ * @info: Pointer to the opaque gpio_array structure ++ * @ndescs: Number of held descriptors ++ * @desc: Array of pointers to GPIO descriptors + */ + struct gpio_descs { + struct gpio_array *info; +@@ -43,8 +32,16 @@ struct gpio_descs { + #define GPIOD_FLAGS_BIT_NONEXCLUSIVE BIT(4) + + /** +- * Optional flags that can be passed to one of gpiod_* to configure direction +- * and output value. These values cannot be OR'd. ++ * enum gpiod_flags - Optional flags that can be passed to one of gpiod_* to ++ * configure direction and output value. These values ++ * cannot be OR'd. ++ * ++ * @GPIOD_ASIS: Don't change anything ++ * @GPIOD_IN: Set lines to input mode ++ * @GPIOD_OUT_LOW: Set lines to output and drive them low ++ * @GPIOD_OUT_HIGH: Set lines to output and drive them high ++ * @GPIOD_OUT_LOW_OPEN_DRAIN: Set lines to open-drain output and drive them low ++ * @GPIOD_OUT_HIGH_OPEN_DRAIN: Set lines to open-drain output and drive them high + */ + enum gpiod_flags { + GPIOD_ASIS = 0, +diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h +index a0f9901dcae60..11c26ae7b4fa4 100644 +--- a/include/linux/gpio/driver.h ++++ b/include/linux/gpio/driver.h +@@ -224,6 +224,15 @@ struct gpio_irq_chip { + unsigned long *valid_mask, + unsigned int ngpios); + ++ /** ++ * @initialized: ++ * ++ * Flag to track GPIO chip irq member's initialization. ++ * This flag will make sure GPIO chip irq members are not used ++ * before they are initialized. ++ */ ++ bool initialized; ++ + /** + * @valid_mask: + * +@@ -416,7 +425,7 @@ struct gpio_chip { + void __iomem *reg_dir_in; + bool bgpio_dir_unreadable; + int bgpio_bits; +- spinlock_t bgpio_lock; ++ raw_spinlock_t bgpio_lock; + unsigned long bgpio_data; + unsigned long bgpio_dir; + #endif /* CONFIG_GPIO_GENERIC */ +@@ -472,6 +481,18 @@ struct gpio_chip { + */ + int (*of_xlate)(struct gpio_chip *gc, + const struct of_phandle_args *gpiospec, u32 *flags); ++ ++ /** ++ * @of_gpio_ranges_fallback: ++ * ++ * Optional hook for the case that no gpio-ranges property is defined ++ * within the device tree node "np" (usually DT before introduction ++ * of gpio-ranges). So this callback is helpful to provide the ++ * necessary backward compatibility for the pin ranges. ++ */ ++ int (*of_gpio_ranges_fallback)(struct gpio_chip *gc, ++ struct device_node *np); ++ + #endif /* CONFIG_OF_GPIO */ + }; + +diff --git a/include/linux/hid.h b/include/linux/hid.h +index 9e067f937dbc2..26742ca14609a 100644 +--- a/include/linux/hid.h ++++ b/include/linux/hid.h +@@ -349,6 +349,8 @@ struct hid_item { + /* BIT(9) reserved for backward compatibility, was NO_INIT_INPUT_REPORTS */ + #define HID_QUIRK_ALWAYS_POLL BIT(10) + #define HID_QUIRK_INPUT_PER_APP BIT(11) ++#define HID_QUIRK_X_INVERT BIT(12) ++#define HID_QUIRK_Y_INVERT BIT(13) + #define HID_QUIRK_SKIP_OUTPUT_REPORTS BIT(16) + #define HID_QUIRK_SKIP_OUTPUT_REPORT_ID BIT(17) + #define HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP BIT(18) +@@ -840,6 +842,11 @@ static inline bool hid_is_using_ll_driver(struct hid_device *hdev, + return hdev->ll_driver == driver; + } + ++static inline bool hid_is_usb(struct hid_device *hdev) ++{ ++ return hid_is_using_ll_driver(hdev, &usb_hid_driver); ++} ++ + #define PM_HINT_FULLON 1<<5 + #define PM_HINT_NORMAL 1<<1 + +diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h +index 4aa1031d3e4c3..de17904b7cb44 100644 +--- a/include/linux/highmem-internal.h ++++ b/include/linux/highmem-internal.h +@@ -184,7 +184,7 @@ static inline void *kmap_local_pfn(unsigned long pfn) + static inline void __kunmap_local(void *addr) + { + #ifdef ARCH_HAS_FLUSH_ON_KUNMAP +- kunmap_flush_on_unmap(addr); ++ kunmap_flush_on_unmap(PTR_ALIGN_DOWN(addr, PAGE_SIZE)); + #endif + } + +@@ -211,7 +211,7 @@ static inline void *kmap_atomic_pfn(unsigned long pfn) + static inline void __kunmap_atomic(void *addr) + { + #ifdef ARCH_HAS_FLUSH_ON_KUNMAP +- kunmap_flush_on_unmap(addr); ++ kunmap_flush_on_unmap(PTR_ALIGN_DOWN(addr, PAGE_SIZE)); + #endif + pagefault_enable(); + if (IS_ENABLED(CONFIG_PREEMPT_RT)) +diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h +index 1faebe1cd0ed5..4ede8df5818e1 100644 +--- a/include/linux/hugetlb.h ++++ b/include/linux/hugetlb.h +@@ -7,6 +7,7 @@ + #include <linux/fs.h> + #include <linux/hugetlb_inline.h> + #include <linux/cgroup.h> ++#include <linux/page_ref.h> + #include <linux/list.h> + #include <linux/kref.h> + #include <linux/pgtable.h> +@@ -165,8 +166,9 @@ bool hugetlb_reserve_pages(struct inode *inode, long from, long to, + vm_flags_t vm_flags); + long hugetlb_unreserve_pages(struct inode *inode, long start, long end, + long freed); +-bool isolate_huge_page(struct page *page, struct list_head *list); ++int isolate_hugetlb(struct page *page, struct list_head *list); + int get_hwpoison_huge_page(struct page *page, bool *hugetlb); ++int get_huge_page_for_hwpoison(unsigned long pfn, int flags); + void putback_active_hugepage(struct page *page); + void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason); + void free_huge_page(struct page *page); +@@ -197,8 +199,8 @@ struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, + struct page *follow_huge_pd(struct vm_area_struct *vma, + unsigned long address, hugepd_t hpd, + int flags, int pdshift); +-struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, +- pmd_t *pmd, int flags); ++struct page *follow_huge_pmd_pte(struct vm_area_struct *vma, unsigned long address, ++ int flags); + struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, + pud_t *pud, int flags); + struct page *follow_huge_pgd(struct mm_struct *mm, unsigned long address, +@@ -285,8 +287,8 @@ static inline struct page *follow_huge_pd(struct vm_area_struct *vma, + return NULL; + } + +-static inline struct page *follow_huge_pmd(struct mm_struct *mm, +- unsigned long address, pmd_t *pmd, int flags) ++static inline struct page *follow_huge_pmd_pte(struct vm_area_struct *vma, ++ unsigned long address, int flags) + { + return NULL; + } +@@ -352,9 +354,9 @@ static inline pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, + return NULL; + } + +-static inline bool isolate_huge_page(struct page *page, struct list_head *list) ++static inline int isolate_hugetlb(struct page *page, struct list_head *list) + { +- return false; ++ return -EBUSY; + } + + static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb) +@@ -362,6 +364,11 @@ static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb) + return 0; + } + ++static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags) ++{ ++ return 0; ++} ++ + static inline void putback_active_hugepage(struct page *page) + { + } +@@ -677,7 +684,10 @@ static inline struct hstate *hstate_sizelog(int page_size_log) + if (!page_size_log) + return &default_hstate; + +- return size_to_hstate(1UL << page_size_log); ++ if (page_size_log < BITS_PER_LONG) ++ return size_to_hstate(1UL << page_size_log); ++ ++ return NULL; + } + + static inline struct hstate *hstate_vma(struct vm_area_struct *vma) +@@ -1093,6 +1103,18 @@ static inline __init void hugetlb_cma_check(void) + } + #endif + ++#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE ++static inline bool hugetlb_pmd_shared(pte_t *pte) ++{ ++ return page_count(virt_to_page(pte)) > 1; ++} ++#else ++static inline bool hugetlb_pmd_shared(pte_t *pte) ++{ ++ return false; ++} ++#endif ++ + bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr); + + #ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE +diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h +index 8e6dd908da216..aa1d4da03538b 100644 +--- a/include/linux/hw_random.h ++++ b/include/linux/hw_random.h +@@ -60,7 +60,5 @@ extern int devm_hwrng_register(struct device *dev, struct hwrng *rng); + /** Unregister a Hardware Random Number Generator driver. */ + extern void hwrng_unregister(struct hwrng *rng); + extern void devm_hwrng_unregister(struct device *dve, struct hwrng *rng); +-/** Feed random bits into the pool. */ +-extern void add_hwgenerator_randomness(const char *buffer, size_t count, size_t entropy); + + #endif /* LINUX_HWRANDOM_H_ */ +diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h +index ddc8713ce57b7..8499fc9220e07 100644 +--- a/include/linux/hyperv.h ++++ b/include/linux/hyperv.h +@@ -1307,6 +1307,8 @@ struct hv_ring_buffer_debug_info { + int hv_ringbuffer_get_debuginfo(struct hv_ring_buffer_info *ring_info, + struct hv_ring_buffer_debug_info *debug_info); + ++bool hv_ringbuffer_spinlock_busy(struct vmbus_channel *channel); ++ + /* Vmbus interface */ + #define vmbus_driver_register(driver) \ + __vmbus_driver_register(driver, THIS_MODULE, KBUILD_MODNAME) +diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h +index 694264503119d..00ed7c17698d1 100644 +--- a/include/linux/ieee80211.h ++++ b/include/linux/ieee80211.h +@@ -1023,6 +1023,8 @@ struct ieee80211_tpc_report_ie { + #define IEEE80211_ADDBA_EXT_FRAG_LEVEL_MASK GENMASK(2, 1) + #define IEEE80211_ADDBA_EXT_FRAG_LEVEL_SHIFT 1 + #define IEEE80211_ADDBA_EXT_NO_FRAG BIT(0) ++#define IEEE80211_ADDBA_EXT_BUF_SIZE_MASK GENMASK(7, 5) ++#define IEEE80211_ADDBA_EXT_BUF_SIZE_SHIFT 10 + + struct ieee80211_addba_ext_ie { + u8 data; +@@ -1697,10 +1699,12 @@ struct ieee80211_ht_operation { + * A-MPDU buffer sizes + * According to HT size varies from 8 to 64 frames + * HE adds the ability to have up to 256 frames. ++ * EHT adds the ability to have up to 1K frames. + */ + #define IEEE80211_MIN_AMPDU_BUF 0x8 + #define IEEE80211_MAX_AMPDU_BUF_HT 0x40 +-#define IEEE80211_MAX_AMPDU_BUF 0x100 ++#define IEEE80211_MAX_AMPDU_BUF_HE 0x100 ++#define IEEE80211_MAX_AMPDU_BUF_EHT 0x400 + + + /* Spatial Multiplexing Power Save Modes (for capability) */ +diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h +index b712217f70304..1ed52441972f9 100644 +--- a/include/linux/if_arp.h ++++ b/include/linux/if_arp.h +@@ -52,6 +52,7 @@ static inline bool dev_is_mac_header_xmit(const struct net_device *dev) + case ARPHRD_VOID: + case ARPHRD_NONE: + case ARPHRD_RAWIP: ++ case ARPHRD_PIMREG: + return false; + default: + return true; +diff --git a/include/linux/iio/common/cros_ec_sensors_core.h b/include/linux/iio/common/cros_ec_sensors_core.h +index c582e1a142320..7b5dbd7499957 100644 +--- a/include/linux/iio/common/cros_ec_sensors_core.h ++++ b/include/linux/iio/common/cros_ec_sensors_core.h +@@ -95,8 +95,11 @@ int cros_ec_sensors_read_cmd(struct iio_dev *indio_dev, unsigned long scan_mask, + struct platform_device; + int cros_ec_sensors_core_init(struct platform_device *pdev, + struct iio_dev *indio_dev, bool physical_device, +- cros_ec_sensors_capture_t trigger_capture, +- cros_ec_sensorhub_push_data_cb_t push_data); ++ cros_ec_sensors_capture_t trigger_capture); ++ ++int cros_ec_sensors_core_register(struct device *dev, ++ struct iio_dev *indio_dev, ++ cros_ec_sensorhub_push_data_cb_t push_data); + + irqreturn_t cros_ec_sensors_capture(int irq, void *p); + int cros_ec_sensors_push_data(struct iio_dev *indio_dev, +diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h +index 8bdbaf3f3796b..69f4a1f6b536d 100644 +--- a/include/linux/iio/common/st_sensors.h ++++ b/include/linux/iio/common/st_sensors.h +@@ -238,6 +238,7 @@ struct st_sensor_settings { + * @hw_irq_trigger: if we're using the hardware interrupt on the sensor. + * @hw_timestamp: Latest timestamp from the interrupt handler, when in use. + * @buffer_data: Data used by buffer part. ++ * @odr_lock: Local lock for preventing concurrent ODR accesses/changes + */ + struct st_sensor_data { + struct device *dev; +@@ -263,6 +264,8 @@ struct st_sensor_data { + s64 hw_timestamp; + + char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] ____cacheline_aligned; ++ ++ struct mutex odr_lock; + }; + + #ifdef CONFIG_IIO_BUFFER +diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h +index cf49997d5903e..8210a9e682154 100644 +--- a/include/linux/iio/imu/adis.h ++++ b/include/linux/iio/imu/adis.h +@@ -32,6 +32,7 @@ struct adis_timeout { + u16 sw_reset_ms; + u16 self_test_ms; + }; ++ + /** + * struct adis_data - ADIS chip variant specific data + * @read_delay: SPI delay for read operations in us +@@ -45,10 +46,11 @@ struct adis_timeout { + * @self_test_mask: Bitmask of supported self-test operations + * @self_test_reg: Register address to request self test command + * @self_test_no_autoclear: True if device's self-test needs clear of ctrl reg +- * @status_error_msgs: Array of error messgaes ++ * @status_error_msgs: Array of error messages + * @status_error_mask: Bitmask of errors supported by the device + * @timeouts: Chip specific delays + * @enable_irq: Hook for ADIS devices that have a special IRQ enable/disable ++ * @unmasked_drdy: True for devices that cannot mask/unmask the data ready pin + * @has_paging: True if ADIS device has paged registers + * @burst_reg_cmd: Register command that triggers burst + * @burst_len: Burst size in the SPI RX buffer. If @burst_max_len is defined, +@@ -78,6 +80,7 @@ struct adis_data { + unsigned int status_error_mask; + + int (*enable_irq)(struct adis *adis, bool enable); ++ bool unmasked_drdy; + + bool has_paging; + +@@ -128,12 +131,12 @@ struct adis { + unsigned long irq_flag; + void *buffer; + +- uint8_t tx[10] ____cacheline_aligned; +- uint8_t rx[4]; ++ u8 tx[10] ____cacheline_aligned; ++ u8 rx[4]; + }; + + int adis_init(struct adis *adis, struct iio_dev *indio_dev, +- struct spi_device *spi, const struct adis_data *data); ++ struct spi_device *spi, const struct adis_data *data); + int __adis_reset(struct adis *adis); + + /** +@@ -154,9 +157,9 @@ static inline int adis_reset(struct adis *adis) + } + + int __adis_write_reg(struct adis *adis, unsigned int reg, +- unsigned int val, unsigned int size); ++ unsigned int val, unsigned int size); + int __adis_read_reg(struct adis *adis, unsigned int reg, +- unsigned int *val, unsigned int size); ++ unsigned int *val, unsigned int size); + + /** + * __adis_write_reg_8() - Write single byte to a register (unlocked) +@@ -165,7 +168,7 @@ int __adis_read_reg(struct adis *adis, unsigned int reg, + * @value: The value to write + */ + static inline int __adis_write_reg_8(struct adis *adis, unsigned int reg, +- uint8_t val) ++ u8 val) + { + return __adis_write_reg(adis, reg, val, 1); + } +@@ -177,7 +180,7 @@ static inline int __adis_write_reg_8(struct adis *adis, unsigned int reg, + * @value: Value to be written + */ + static inline int __adis_write_reg_16(struct adis *adis, unsigned int reg, +- uint16_t val) ++ u16 val) + { + return __adis_write_reg(adis, reg, val, 2); + } +@@ -189,7 +192,7 @@ static inline int __adis_write_reg_16(struct adis *adis, unsigned int reg, + * @value: Value to be written + */ + static inline int __adis_write_reg_32(struct adis *adis, unsigned int reg, +- uint32_t val) ++ u32 val) + { + return __adis_write_reg(adis, reg, val, 4); + } +@@ -201,7 +204,7 @@ static inline int __adis_write_reg_32(struct adis *adis, unsigned int reg, + * @val: The value read back from the device + */ + static inline int __adis_read_reg_16(struct adis *adis, unsigned int reg, +- uint16_t *val) ++ u16 *val) + { + unsigned int tmp; + int ret; +@@ -220,7 +223,7 @@ static inline int __adis_read_reg_16(struct adis *adis, unsigned int reg, + * @val: The value read back from the device + */ + static inline int __adis_read_reg_32(struct adis *adis, unsigned int reg, +- uint32_t *val) ++ u32 *val) + { + unsigned int tmp; + int ret; +@@ -240,7 +243,7 @@ static inline int __adis_read_reg_32(struct adis *adis, unsigned int reg, + * @size: The size of the @value (in bytes) + */ + static inline int adis_write_reg(struct adis *adis, unsigned int reg, +- unsigned int val, unsigned int size) ++ unsigned int val, unsigned int size) + { + int ret; + +@@ -259,7 +262,7 @@ static inline int adis_write_reg(struct adis *adis, unsigned int reg, + * @size: The size of the @val buffer + */ + static int adis_read_reg(struct adis *adis, unsigned int reg, +- unsigned int *val, unsigned int size) ++ unsigned int *val, unsigned int size) + { + int ret; + +@@ -277,7 +280,7 @@ static int adis_read_reg(struct adis *adis, unsigned int reg, + * @value: The value to write + */ + static inline int adis_write_reg_8(struct adis *adis, unsigned int reg, +- uint8_t val) ++ u8 val) + { + return adis_write_reg(adis, reg, val, 1); + } +@@ -289,7 +292,7 @@ static inline int adis_write_reg_8(struct adis *adis, unsigned int reg, + * @value: Value to be written + */ + static inline int adis_write_reg_16(struct adis *adis, unsigned int reg, +- uint16_t val) ++ u16 val) + { + return adis_write_reg(adis, reg, val, 2); + } +@@ -301,7 +304,7 @@ static inline int adis_write_reg_16(struct adis *adis, unsigned int reg, + * @value: Value to be written + */ + static inline int adis_write_reg_32(struct adis *adis, unsigned int reg, +- uint32_t val) ++ u32 val) + { + return adis_write_reg(adis, reg, val, 4); + } +@@ -313,7 +316,7 @@ static inline int adis_write_reg_32(struct adis *adis, unsigned int reg, + * @val: The value read back from the device + */ + static inline int adis_read_reg_16(struct adis *adis, unsigned int reg, +- uint16_t *val) ++ u16 *val) + { + unsigned int tmp; + int ret; +@@ -332,7 +335,7 @@ static inline int adis_read_reg_16(struct adis *adis, unsigned int reg, + * @val: The value read back from the device + */ + static inline int adis_read_reg_32(struct adis *adis, unsigned int reg, +- uint32_t *val) ++ u32 *val) + { + unsigned int tmp; + int ret; +@@ -403,9 +406,20 @@ static inline int adis_update_bits_base(struct adis *adis, unsigned int reg, + __adis_update_bits_base(adis, reg, mask, val, 2)); \ + }) + +-int adis_enable_irq(struct adis *adis, bool enable); + int __adis_check_status(struct adis *adis); + int __adis_initial_startup(struct adis *adis); ++int __adis_enable_irq(struct adis *adis, bool enable); ++ ++static inline int adis_enable_irq(struct adis *adis, bool enable) ++{ ++ int ret; ++ ++ mutex_lock(&adis->state_lock); ++ ret = __adis_enable_irq(adis, enable); ++ mutex_unlock(&adis->state_lock); ++ ++ return ret; ++} + + static inline int adis_check_status(struct adis *adis) + { +@@ -441,8 +455,8 @@ static inline void adis_dev_unlock(struct adis *adis) + } + + int adis_single_conversion(struct iio_dev *indio_dev, +- const struct iio_chan_spec *chan, unsigned int error_mask, +- int *val); ++ const struct iio_chan_spec *chan, ++ unsigned int error_mask, int *val); + + #define ADIS_VOLTAGE_CHAN(addr, si, chan, name, info_all, bits) { \ + .type = IIO_VOLTAGE, \ +@@ -491,7 +505,7 @@ int adis_single_conversion(struct iio_dev *indio_dev, + .modified = 1, \ + .channel2 = IIO_MOD_ ## mod, \ + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \ +- info_sep, \ ++ (info_sep), \ + .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), \ + .info_mask_shared_by_all = info_all, \ + .address = (addr), \ +@@ -525,7 +539,7 @@ devm_adis_setup_buffer_and_trigger(struct adis *adis, struct iio_dev *indio_dev, + int devm_adis_probe_trigger(struct adis *adis, struct iio_dev *indio_dev); + + int adis_update_scan_mode(struct iio_dev *indio_dev, +- const unsigned long *scan_mask); ++ const unsigned long *scan_mask); + + #else /* CONFIG_IIO_BUFFER */ + +@@ -549,7 +563,8 @@ static inline int devm_adis_probe_trigger(struct adis *adis, + #ifdef CONFIG_DEBUG_FS + + int adis_debugfs_reg_access(struct iio_dev *indio_dev, +- unsigned int reg, unsigned int writeval, unsigned int *readval); ++ unsigned int reg, unsigned int writeval, ++ unsigned int *readval); + + #else + +diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h +index 096f68dd2e0ca..4c69b144677b1 100644 +--- a/include/linux/iio/trigger.h ++++ b/include/linux/iio/trigger.h +@@ -55,6 +55,7 @@ struct iio_trigger_ops { + * @attached_own_device:[INTERN] if we are using our own device as trigger, + * i.e. if we registered a poll function to the same + * device as the one providing the trigger. ++ * @reenable_work: [INTERN] work item used to ensure reenable can sleep. + **/ + struct iio_trigger { + const struct iio_trigger_ops *ops; +@@ -74,6 +75,7 @@ struct iio_trigger { + unsigned long pool[BITS_TO_LONGS(CONFIG_IIO_CONSUMERS_PER_TRIGGER)]; + struct mutex pool_lock; + bool attached_own_device; ++ struct work_struct reenable_work; + }; + + +diff --git a/include/linux/instrumentation.h b/include/linux/instrumentation.h +index fa2cd8c63dcc9..24359b4a96053 100644 +--- a/include/linux/instrumentation.h ++++ b/include/linux/instrumentation.h +@@ -11,7 +11,7 @@ + asm volatile(__stringify(c) ": nop\n\t" \ + ".pushsection .discard.instr_begin\n\t" \ + ".long " __stringify(c) "b - .\n\t" \ +- ".popsection\n\t"); \ ++ ".popsection\n\t" : : "i" (c)); \ + }) + #define instrumentation_begin() __instrumentation_begin(__COUNTER__) + +@@ -50,7 +50,7 @@ + asm volatile(__stringify(c) ": nop\n\t" \ + ".pushsection .discard.instr_end\n\t" \ + ".long " __stringify(c) "b - .\n\t" \ +- ".popsection\n\t"); \ ++ ".popsection\n\t" : : "i" (c)); \ + }) + #define instrumentation_end() __instrumentation_end(__COUNTER__) + #else +diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h +index 05a65eb155f76..81da7107e3bd0 100644 +--- a/include/linux/intel-iommu.h ++++ b/include/linux/intel-iommu.h +@@ -196,7 +196,6 @@ + #define ecap_dis(e) (((e) >> 27) & 0x1) + #define ecap_nest(e) (((e) >> 26) & 0x1) + #define ecap_mts(e) (((e) >> 25) & 0x1) +-#define ecap_ecs(e) (((e) >> 24) & 0x1) + #define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16) + #define ecap_max_iotlb_offset(e) (ecap_iotlb_offset(e) + 16) + #define ecap_coherent(e) ((e) & 0x1) +@@ -264,7 +263,6 @@ + #define DMA_GSTS_CFIS (((u32)1) << 23) + + /* DMA_RTADDR_REG */ +-#define DMA_RTADDR_RTT (((u64)1) << 11) + #define DMA_RTADDR_SMT (((u64)1) << 10) + + /* CCMD_REG */ +@@ -594,6 +592,7 @@ struct intel_iommu { + #ifdef CONFIG_INTEL_IOMMU + unsigned long *domain_ids; /* bitmap of domains */ + struct dmar_domain ***domains; /* ptr to domains */ ++ unsigned long *copied_tables; /* bitmap of copied tables */ + spinlock_t lock; /* protect context, domain ids */ + struct root_entry *root_entry; /* virtual address */ + +@@ -713,6 +712,11 @@ static inline int first_pte_in_page(struct dma_pte *pte) + return !((unsigned long)pte & ~VTD_PAGE_MASK); + } + ++static inline bool context_present(struct context_entry *context) ++{ ++ return (context->lo & 1); ++} ++ + extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); + extern int dmar_find_matched_atsr_unit(struct pci_dev *dev); + +@@ -806,7 +810,6 @@ static inline void intel_iommu_debugfs_init(void) {} + #endif /* CONFIG_INTEL_IOMMU_DEBUGFS */ + + extern const struct attribute_group *intel_iommu_groups[]; +-bool context_present(struct context_entry *context); + struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, + u8 devfn, int alloc); + +diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h +index 86af6f0a00a2a..ca98aeadcc804 100644 +--- a/include/linux/io-pgtable.h ++++ b/include/linux/io-pgtable.h +@@ -74,17 +74,22 @@ struct io_pgtable_cfg { + * to support up to 35 bits PA where the bit32, bit33 and bit34 are + * encoded in the bit9, bit4 and bit5 of the PTE respectively. + * ++ * IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT: (ARM v7s format) MediaTek IOMMUs ++ * extend the translation table base support up to 35 bits PA, the ++ * encoding format is same with IO_PGTABLE_QUIRK_ARM_MTK_EXT. ++ * + * IO_PGTABLE_QUIRK_ARM_TTBR1: (ARM LPAE format) Configure the table + * for use in the upper half of a split address space. + * + * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the outer-cacheability + * attributes set in the TCR for a non-coherent page-table walker. + */ +- #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) +- #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) +- #define IO_PGTABLE_QUIRK_ARM_MTK_EXT BIT(3) +- #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) +- #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6) ++ #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) ++ #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) ++ #define IO_PGTABLE_QUIRK_ARM_MTK_EXT BIT(3) ++ #define IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT BIT(4) ++ #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) ++ #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6) + unsigned long quirks; + unsigned long pgsize_bitmap; + unsigned int ias; +diff --git a/include/linux/iomap.h b/include/linux/iomap.h +index 24f8489583ca7..829f2325ecbab 100644 +--- a/include/linux/iomap.h ++++ b/include/linux/iomap.h +@@ -330,12 +330,19 @@ struct iomap_dio_ops { + */ + #define IOMAP_DIO_OVERWRITE_ONLY (1 << 1) + ++/* ++ * When a page fault occurs, return a partial synchronous result and allow ++ * the caller to retry the rest of the operation after dealing with the page ++ * fault. ++ */ ++#define IOMAP_DIO_PARTIAL (1 << 2) ++ + ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, + const struct iomap_ops *ops, const struct iomap_dio_ops *dops, +- unsigned int dio_flags); ++ unsigned int dio_flags, size_t done_before); + struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, + const struct iomap_ops *ops, const struct iomap_dio_ops *dops, +- unsigned int dio_flags); ++ unsigned int dio_flags, size_t done_before); + ssize_t iomap_dio_complete(struct iomap_dio *dio); + int iomap_dio_iopoll(struct kiocb *kiocb, bool spin); + +diff --git a/include/linux/ioport.h b/include/linux/ioport.h +index 8359c50f99884..ec5f71f7135b0 100644 +--- a/include/linux/ioport.h ++++ b/include/linux/ioport.h +@@ -262,6 +262,8 @@ resource_union(struct resource *r1, struct resource *r2, struct resource *r) + #define request_muxed_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name), IORESOURCE_MUXED) + #define __request_mem_region(start,n,name, excl) __request_region(&iomem_resource, (start), (n), (name), excl) + #define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name), 0) ++#define request_mem_region_muxed(start, n, name) \ ++ __request_region(&iomem_resource, (start), (n), (name), IORESOURCE_MUXED) + #define request_mem_region_exclusive(start,n,name) \ + __request_region(&iomem_resource, (start), (n), (name), IORESOURCE_EXCLUSIVE) + #define rename_region(region, newname) do { (region)->name = (newname); } while (0) +diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h +index 3f53bc27a19bf..3d088a88f8320 100644 +--- a/include/linux/ioprio.h ++++ b/include/linux/ioprio.h +@@ -11,7 +11,7 @@ + /* + * Default IO priority. + */ +-#define IOPRIO_DEFAULT IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM) ++#define IOPRIO_DEFAULT IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0) + + /* + * Check that a priority value has a valid class. +diff --git a/include/linux/iova.h b/include/linux/iova.h +index 71d8a2de66354..6b6cc104e300d 100644 +--- a/include/linux/iova.h ++++ b/include/linux/iova.h +@@ -133,7 +133,7 @@ static inline unsigned long iova_pfn(struct iova_domain *iovad, dma_addr_t iova) + return iova >> iova_shift(iovad); + } + +-#if IS_ENABLED(CONFIG_IOMMU_IOVA) ++#if IS_REACHABLE(CONFIG_IOMMU_IOVA) + int iova_cache_get(void); + void iova_cache_put(void); + +diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h +index 05e22770af517..b75395ec8d521 100644 +--- a/include/linux/ipc_namespace.h ++++ b/include/linux/ipc_namespace.h +@@ -131,6 +131,16 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) + return ns; + } + ++static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns) ++{ ++ if (ns) { ++ if (refcount_inc_not_zero(&ns->ns.count)) ++ return ns; ++ } ++ ++ return NULL; ++} ++ + extern void put_ipc_ns(struct ipc_namespace *ns); + #else + static inline struct ipc_namespace *copy_ipcs(unsigned long flags, +@@ -147,6 +157,11 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) + return ns; + } + ++static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns) ++{ ++ return ns; ++} ++ + static inline void put_ipc_ns(struct ipc_namespace *ns) + { + } +diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h +index ef4a69865737c..d1f3864307959 100644 +--- a/include/linux/ipv6.h ++++ b/include/linux/ipv6.h +@@ -51,7 +51,7 @@ struct ipv6_devconf { + __s32 use_optimistic; + #endif + #ifdef CONFIG_IPV6_MROUTE +- __s32 mc_forwarding; ++ atomic_t mc_forwarding; + #endif + __s32 disable_ipv6; + __s32 drop_unicast_in_l2_multicast; +@@ -132,6 +132,7 @@ struct inet6_skb_parm { + __u16 dsthao; + #endif + __u16 frag_max_size; ++ __u16 srhoff; + + #define IP6SKB_XFRM_TRANSFORMED 1 + #define IP6SKB_FORWARDED 2 +@@ -141,6 +142,7 @@ struct inet6_skb_parm { + #define IP6SKB_HOPBYHOP 32 + #define IP6SKB_L3SLAVE 64 + #define IP6SKB_JUMBOGRAM 128 ++#define IP6SKB_SEG6 256 + }; + + #if defined(CONFIG_NET_L3_MASTER_DEV) +@@ -282,7 +284,6 @@ struct ipv6_pinfo { + __be32 rcv_flowinfo; + + __u32 dst_cookie; +- __u32 rx_dst_cookie; + + struct ipv6_mc_socklist __rcu *ipv6_mc_list; + struct ipv6_ac_socklist *ipv6_ac_list; +diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h +index 600c10da321a7..747f40e0c3260 100644 +--- a/include/linux/irqflags.h ++++ b/include/linux/irqflags.h +@@ -20,13 +20,13 @@ + #ifdef CONFIG_PROVE_LOCKING + extern void lockdep_softirqs_on(unsigned long ip); + extern void lockdep_softirqs_off(unsigned long ip); +- extern void lockdep_hardirqs_on_prepare(unsigned long ip); ++ extern void lockdep_hardirqs_on_prepare(void); + extern void lockdep_hardirqs_on(unsigned long ip); + extern void lockdep_hardirqs_off(unsigned long ip); + #else + static inline void lockdep_softirqs_on(unsigned long ip) { } + static inline void lockdep_softirqs_off(unsigned long ip) { } +- static inline void lockdep_hardirqs_on_prepare(unsigned long ip) { } ++ static inline void lockdep_hardirqs_on_prepare(void) { } + static inline void lockdep_hardirqs_on(unsigned long ip) { } + static inline void lockdep_hardirqs_off(unsigned long ip) { } + #endif +diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h +index fd933c45281af..d63b8106796e2 100644 +--- a/include/linux/jbd2.h ++++ b/include/linux/jbd2.h +@@ -1295,7 +1295,7 @@ struct journal_s + * Clean-up after fast commit or full commit. JBD2 calls this function + * after every commit operation. + */ +- void (*j_fc_cleanup_callback)(struct journal_s *journal, int); ++ void (*j_fc_cleanup_callback)(struct journal_s *journal, int full, tid_t tid); + + /** + * @j_fc_replay_callback: +diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h +index 48b9b2a82767d..019e55c13248b 100644 +--- a/include/linux/jump_label.h ++++ b/include/linux/jump_label.h +@@ -261,9 +261,9 @@ extern void static_key_disable_cpuslocked(struct static_key *key); + #include <linux/atomic.h> + #include <linux/bug.h> + +-static inline int static_key_count(struct static_key *key) ++static __always_inline int static_key_count(struct static_key *key) + { +- return atomic_read(&key->enabled); ++ return arch_atomic_read(&key->enabled); + } + + static __always_inline void jump_label_init(void) +diff --git a/include/linux/kasan.h b/include/linux/kasan.h +index dd874a1ee862a..f407e937241af 100644 +--- a/include/linux/kasan.h ++++ b/include/linux/kasan.h +@@ -461,12 +461,12 @@ static inline void kasan_release_vmalloc(unsigned long start, + * allocations with real shadow memory. With KASAN vmalloc, the special + * case is unnecessary, as the work is handled in the generic case. + */ +-int kasan_module_alloc(void *addr, size_t size); ++int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask); + void kasan_free_shadow(const struct vm_struct *vm); + + #else /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */ + +-static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } ++static inline int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask) { return 0; } + static inline void kasan_free_shadow(const struct vm_struct *vm) {} + + #endif /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */ +diff --git a/include/linux/kernel.h b/include/linux/kernel.h +index 2776423a587e4..f56cd8879a594 100644 +--- a/include/linux/kernel.h ++++ b/include/linux/kernel.h +@@ -277,7 +277,7 @@ static inline char *hex_byte_pack_upper(char *buf, u8 byte) + return buf; + } + +-extern int hex_to_bin(char ch); ++extern int hex_to_bin(unsigned char ch); + extern int __must_check hex2bin(u8 *dst, const char *src, size_t count); + extern char *bin2hex(char *dst, const void *src, size_t count); + +diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h +index 44ae1a7eb9e39..69ae6b2784645 100644 +--- a/include/linux/kernel_stat.h ++++ b/include/linux/kernel_stat.h +@@ -102,6 +102,7 @@ extern void account_system_index_time(struct task_struct *, u64, + enum cpu_usage_stat); + extern void account_steal_time(u64); + extern void account_idle_time(u64); ++extern u64 get_idle_time(struct kernel_cpustat *kcs, int cpu); + + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + static inline void account_process_tick(struct task_struct *tsk, int user) +diff --git a/include/linux/kexec.h b/include/linux/kexec.h +index 0c994ae37729e..cf042d41c87b9 100644 +--- a/include/linux/kexec.h ++++ b/include/linux/kexec.h +@@ -187,14 +187,6 @@ void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name); + int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, + unsigned long buf_len); + void *arch_kexec_kernel_image_load(struct kimage *image); +-int arch_kexec_apply_relocations_add(struct purgatory_info *pi, +- Elf_Shdr *section, +- const Elf_Shdr *relsec, +- const Elf_Shdr *symtab); +-int arch_kexec_apply_relocations(struct purgatory_info *pi, +- Elf_Shdr *section, +- const Elf_Shdr *relsec, +- const Elf_Shdr *symtab); + int arch_kimage_file_post_load_cleanup(struct kimage *image); + #ifdef CONFIG_KEXEC_SIG + int arch_kexec_kernel_verify_sig(struct kimage *image, void *buf, +@@ -223,6 +215,44 @@ extern int crash_exclude_mem_range(struct crash_mem *mem, + unsigned long long mend); + extern int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map, + void **addr, unsigned long *sz); ++ ++#ifndef arch_kexec_apply_relocations_add ++/* ++ * arch_kexec_apply_relocations_add - apply relocations of type RELA ++ * @pi: Purgatory to be relocated. ++ * @section: Section relocations applying to. ++ * @relsec: Section containing RELAs. ++ * @symtab: Corresponding symtab. ++ * ++ * Return: 0 on success, negative errno on error. ++ */ ++static inline int ++arch_kexec_apply_relocations_add(struct purgatory_info *pi, Elf_Shdr *section, ++ const Elf_Shdr *relsec, const Elf_Shdr *symtab) ++{ ++ pr_err("RELA relocation unsupported.\n"); ++ return -ENOEXEC; ++} ++#endif ++ ++#ifndef arch_kexec_apply_relocations ++/* ++ * arch_kexec_apply_relocations - apply relocations of type REL ++ * @pi: Purgatory to be relocated. ++ * @section: Section relocations applying to. ++ * @relsec: Section containing RELs. ++ * @symtab: Corresponding symtab. ++ * ++ * Return: 0 on success, negative errno on error. ++ */ ++static inline int ++arch_kexec_apply_relocations(struct purgatory_info *pi, Elf_Shdr *section, ++ const Elf_Shdr *relsec, const Elf_Shdr *symtab) ++{ ++ pr_err("REL relocation unsupported.\n"); ++ return -ENOEXEC; ++} ++#endif + #endif /* CONFIG_KEXEC_FILE */ + + #ifdef CONFIG_KEXEC_ELF +@@ -422,6 +452,12 @@ static inline int kexec_crash_loaded(void) { return 0; } + #define kexec_in_progress false + #endif /* CONFIG_KEXEC_CORE */ + ++#ifdef CONFIG_KEXEC_SIG ++void set_kexec_sig_enforced(void); ++#else ++static inline void set_kexec_sig_enforced(void) {} ++#endif ++ + #endif /* !defined(__ASSEBMLY__) */ + + #endif /* LINUX_KEXEC_H */ +diff --git a/include/linux/kfence.h b/include/linux/kfence.h +index 3fe6dd8a18c19..3c75209a545e1 100644 +--- a/include/linux/kfence.h ++++ b/include/linux/kfence.h +@@ -14,6 +14,9 @@ + + #ifdef CONFIG_KFENCE + ++#include <linux/atomic.h> ++#include <linux/static_key.h> ++ + /* + * We allocate an even number of pages, as it simplifies calculations to map + * address to metadata indices; effectively, the very first page serves as an +@@ -22,13 +25,8 @@ + #define KFENCE_POOL_SIZE ((CONFIG_KFENCE_NUM_OBJECTS + 1) * 2 * PAGE_SIZE) + extern char *__kfence_pool; + +-#ifdef CONFIG_KFENCE_STATIC_KEYS +-#include <linux/static_key.h> + DECLARE_STATIC_KEY_FALSE(kfence_allocation_key); +-#else +-#include <linux/atomic.h> + extern atomic_t kfence_allocation_gate; +-#endif + + /** + * is_kfence_address() - check if an address belongs to KFENCE pool +@@ -116,13 +114,16 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags); + */ + static __always_inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) + { +-#ifdef CONFIG_KFENCE_STATIC_KEYS +- if (static_branch_unlikely(&kfence_allocation_key)) ++#if defined(CONFIG_KFENCE_STATIC_KEYS) || CONFIG_KFENCE_SAMPLE_INTERVAL == 0 ++ if (!static_branch_unlikely(&kfence_allocation_key)) ++ return NULL; + #else +- if (unlikely(!atomic_read(&kfence_allocation_gate))) ++ if (!static_branch_likely(&kfence_allocation_key)) ++ return NULL; + #endif +- return __kfence_alloc(s, size, flags); +- return NULL; ++ if (likely(atomic_read(&kfence_allocation_gate))) ++ return NULL; ++ return __kfence_alloc(s, size, flags); + } + + /** +@@ -201,6 +202,22 @@ static __always_inline __must_check bool kfence_free(void *addr) + */ + bool __must_check kfence_handle_page_fault(unsigned long addr, bool is_write, struct pt_regs *regs); + ++#ifdef CONFIG_PRINTK ++struct kmem_obj_info; ++/** ++ * __kfence_obj_info() - fill kmem_obj_info struct ++ * @kpp: kmem_obj_info to be filled ++ * @object: the object ++ * ++ * Return: ++ * * false - not a KFENCE object ++ * * true - a KFENCE object, filled @kpp ++ * ++ * Copies information to @kpp for KFENCE objects. ++ */ ++bool __kfence_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page); ++#endif ++ + #else /* CONFIG_KFENCE */ + + static inline bool is_kfence_address(const void *addr) { return false; } +@@ -218,6 +235,14 @@ static inline bool __must_check kfence_handle_page_fault(unsigned long addr, boo + return false; + } + ++#ifdef CONFIG_PRINTK ++struct kmem_obj_info; ++static inline bool __kfence_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page) ++{ ++ return false; ++} ++#endif ++ + #endif + + #endif /* _LINUX_KFENCE_H */ +diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h +index 86249476b57f4..0b35a41440ff1 100644 +--- a/include/linux/kfifo.h ++++ b/include/linux/kfifo.h +@@ -688,7 +688,7 @@ __kfifo_uint_must_check_helper( \ + * writer, you don't need extra locking to use these macro. + */ + #define kfifo_to_user(fifo, to, len, copied) \ +-__kfifo_uint_must_check_helper( \ ++__kfifo_int_must_check_helper( \ + ({ \ + typeof((fifo) + 1) __tmp = (fifo); \ + void __user *__to = (to); \ +diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h +index e4f3bfe087570..ef8c7accbc689 100644 +--- a/include/linux/kprobes.h ++++ b/include/linux/kprobes.h +@@ -154,6 +154,8 @@ struct kretprobe { + struct kretprobe_holder *rph; + }; + ++#define KRETPROBE_MAX_DATA_SIZE 4096 ++ + struct kretprobe_instance { + union { + struct freelist_node freelist; +diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h +index 0f18df7fe8749..7e2423ffaf593 100644 +--- a/include/linux/kvm_host.h ++++ b/include/linux/kvm_host.h +@@ -15,6 +15,8 @@ + #include <linux/minmax.h> + #include <linux/mm.h> + #include <linux/mmu_notifier.h> ++#include <linux/ftrace.h> ++#include <linux/instrumentation.h> + #include <linux/preempt.h> + #include <linux/msi.h> + #include <linux/slab.h> +@@ -363,8 +365,11 @@ struct kvm_vcpu { + int last_used_slot; + }; + +-/* must be called with irqs disabled */ +-static __always_inline void guest_enter_irqoff(void) ++/* ++ * Start accounting time towards a guest. ++ * Must be called before entering guest context. ++ */ ++static __always_inline void guest_timing_enter_irqoff(void) + { + /* + * This is running in ioctl context so its safe to assume that it's the +@@ -373,7 +378,18 @@ static __always_inline void guest_enter_irqoff(void) + instrumentation_begin(); + vtime_account_guest_enter(); + instrumentation_end(); ++} + ++/* ++ * Enter guest context and enter an RCU extended quiescent state. ++ * ++ * Between guest_context_enter_irqoff() and guest_context_exit_irqoff() it is ++ * unsafe to use any code which may directly or indirectly use RCU, tracing ++ * (including IRQ flag tracing), or lockdep. All code in this period must be ++ * non-instrumentable. ++ */ ++static __always_inline void guest_context_enter_irqoff(void) ++{ + /* + * KVM does not hold any references to rcu protected data when it + * switches CPU into a guest mode. In fact switching to a guest mode +@@ -389,16 +405,79 @@ static __always_inline void guest_enter_irqoff(void) + } + } + +-static __always_inline void guest_exit_irqoff(void) ++/* ++ * Deprecated. Architectures should move to guest_timing_enter_irqoff() and ++ * guest_state_enter_irqoff(). ++ */ ++static __always_inline void guest_enter_irqoff(void) ++{ ++ guest_timing_enter_irqoff(); ++ guest_context_enter_irqoff(); ++} ++ ++/** ++ * guest_state_enter_irqoff - Fixup state when entering a guest ++ * ++ * Entry to a guest will enable interrupts, but the kernel state is interrupts ++ * disabled when this is invoked. Also tell RCU about it. ++ * ++ * 1) Trace interrupts on state ++ * 2) Invoke context tracking if enabled to adjust RCU state ++ * 3) Tell lockdep that interrupts are enabled ++ * ++ * Invoked from architecture specific code before entering a guest. ++ * Must be called with interrupts disabled and the caller must be ++ * non-instrumentable. ++ * The caller has to invoke guest_timing_enter_irqoff() before this. ++ * ++ * Note: this is analogous to exit_to_user_mode(). ++ */ ++static __always_inline void guest_state_enter_irqoff(void) ++{ ++ instrumentation_begin(); ++ trace_hardirqs_on_prepare(); ++ lockdep_hardirqs_on_prepare(); ++ instrumentation_end(); ++ ++ guest_context_enter_irqoff(); ++ lockdep_hardirqs_on(CALLER_ADDR0); ++} ++ ++/* ++ * Exit guest context and exit an RCU extended quiescent state. ++ * ++ * Between guest_context_enter_irqoff() and guest_context_exit_irqoff() it is ++ * unsafe to use any code which may directly or indirectly use RCU, tracing ++ * (including IRQ flag tracing), or lockdep. All code in this period must be ++ * non-instrumentable. ++ */ ++static __always_inline void guest_context_exit_irqoff(void) + { + context_tracking_guest_exit(); ++} + ++/* ++ * Stop accounting time towards a guest. ++ * Must be called after exiting guest context. ++ */ ++static __always_inline void guest_timing_exit_irqoff(void) ++{ + instrumentation_begin(); + /* Flush the guest cputime we spent on the guest */ + vtime_account_guest_exit(); + instrumentation_end(); + } + ++/* ++ * Deprecated. Architectures should move to guest_state_exit_irqoff() and ++ * guest_timing_exit_irqoff(). ++ */ ++static __always_inline void guest_exit_irqoff(void) ++{ ++ guest_context_exit_irqoff(); ++ guest_timing_exit_irqoff(); ++} ++ + static inline void guest_exit(void) + { + unsigned long flags; +@@ -408,6 +487,33 @@ static inline void guest_exit(void) + local_irq_restore(flags); + } + ++/** ++ * guest_state_exit_irqoff - Establish state when returning from guest mode ++ * ++ * Entry from a guest disables interrupts, but guest mode is traced as ++ * interrupts enabled. Also with NO_HZ_FULL RCU might be idle. ++ * ++ * 1) Tell lockdep that interrupts are disabled ++ * 2) Invoke context tracking if enabled to reactivate RCU ++ * 3) Trace interrupts off state ++ * ++ * Invoked from architecture specific code after exiting a guest. ++ * Must be invoked with interrupts disabled and the caller must be ++ * non-instrumentable. ++ * The caller has to invoke guest_timing_exit_irqoff() after this. ++ * ++ * Note: this is analogous to enter_from_user_mode(). ++ */ ++static __always_inline void guest_state_exit_irqoff(void) ++{ ++ lockdep_hardirqs_off(CALLER_ADDR0); ++ guest_context_exit_irqoff(); ++ ++ instrumentation_begin(); ++ trace_hardirqs_off_finish(); ++ instrumentation_end(); ++} ++ + static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) + { + /* +@@ -1018,6 +1124,8 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, + struct kvm_enable_cap *cap); + long kvm_arch_vm_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg); ++long kvm_arch_vm_compat_ioctl(struct file *filp, unsigned int ioctl, ++ unsigned long arg); + + int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu); + int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu); +@@ -1127,7 +1235,7 @@ static inline void kvm_arch_end_assignment(struct kvm *kvm) + { + } + +-static inline bool kvm_arch_has_assigned_device(struct kvm *kvm) ++static __always_inline bool kvm_arch_has_assigned_device(struct kvm *kvm) + { + return false; + } +@@ -1806,6 +1914,8 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp, + void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, + unsigned long start, unsigned long end); + ++void kvm_arch_guest_memory_reclaimed(struct kvm *kvm); ++ + #ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE + int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu); + #else +diff --git a/include/linux/libata.h b/include/linux/libata.h +index c0c64f03e1074..d890c43cff146 100644 +--- a/include/linux/libata.h ++++ b/include/linux/libata.h +@@ -394,7 +394,7 @@ enum { + /* This should match the actual table size of + * ata_eh_cmd_timeout_table in libata-eh.c. + */ +- ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 6, ++ ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 7, + + /* Horkage types. May be set by libata or controller on drives + (some horkage may be drive/controller pair dependent */ +@@ -565,7 +565,10 @@ struct ata_taskfile { + u8 hob_lbam; + u8 hob_lbah; + +- u8 feature; ++ union { ++ u8 error; ++ u8 feature; ++ }; + u8 nsect; + u8 lbal; + u8 lbam; +@@ -573,7 +576,10 @@ struct ata_taskfile { + + u8 device; + +- u8 command; /* IO operation */ ++ union { ++ u8 status; ++ u8 command; ++ }; + + u32 auxiliary; /* auxiliary field */ + /* from SATA 3.1 and */ +@@ -1471,51 +1477,61 @@ static inline int sata_srst_pmp(struct ata_link *link) + return link->pmp; + } + +-/* +- * printk helpers +- */ +-__printf(3, 4) +-void ata_port_printk(const struct ata_port *ap, const char *level, +- const char *fmt, ...); +-__printf(3, 4) +-void ata_link_printk(const struct ata_link *link, const char *level, +- const char *fmt, ...); +-__printf(3, 4) +-void ata_dev_printk(const struct ata_device *dev, const char *level, +- const char *fmt, ...); ++#define ata_port_printk(level, ap, fmt, ...) \ ++ pr_ ## level ("ata%u: " fmt, (ap)->print_id, ##__VA_ARGS__) + + #define ata_port_err(ap, fmt, ...) \ +- ata_port_printk(ap, KERN_ERR, fmt, ##__VA_ARGS__) ++ ata_port_printk(err, ap, fmt, ##__VA_ARGS__) + #define ata_port_warn(ap, fmt, ...) \ +- ata_port_printk(ap, KERN_WARNING, fmt, ##__VA_ARGS__) ++ ata_port_printk(warn, ap, fmt, ##__VA_ARGS__) + #define ata_port_notice(ap, fmt, ...) \ +- ata_port_printk(ap, KERN_NOTICE, fmt, ##__VA_ARGS__) ++ ata_port_printk(notice, ap, fmt, ##__VA_ARGS__) + #define ata_port_info(ap, fmt, ...) \ +- ata_port_printk(ap, KERN_INFO, fmt, ##__VA_ARGS__) ++ ata_port_printk(info, ap, fmt, ##__VA_ARGS__) + #define ata_port_dbg(ap, fmt, ...) \ +- ata_port_printk(ap, KERN_DEBUG, fmt, ##__VA_ARGS__) ++ ata_port_printk(debug, ap, fmt, ##__VA_ARGS__) ++ ++#define ata_link_printk(level, link, fmt, ...) \ ++do { \ ++ if (sata_pmp_attached((link)->ap) || \ ++ (link)->ap->slave_link) \ ++ pr_ ## level ("ata%u.%02u: " fmt, \ ++ (link)->ap->print_id, \ ++ (link)->pmp, \ ++ ##__VA_ARGS__); \ ++ else \ ++ pr_ ## level ("ata%u: " fmt, \ ++ (link)->ap->print_id, \ ++ ##__VA_ARGS__); \ ++} while (0) + + #define ata_link_err(link, fmt, ...) \ +- ata_link_printk(link, KERN_ERR, fmt, ##__VA_ARGS__) ++ ata_link_printk(err, link, fmt, ##__VA_ARGS__) + #define ata_link_warn(link, fmt, ...) \ +- ata_link_printk(link, KERN_WARNING, fmt, ##__VA_ARGS__) ++ ata_link_printk(warn, link, fmt, ##__VA_ARGS__) + #define ata_link_notice(link, fmt, ...) \ +- ata_link_printk(link, KERN_NOTICE, fmt, ##__VA_ARGS__) ++ ata_link_printk(notice, link, fmt, ##__VA_ARGS__) + #define ata_link_info(link, fmt, ...) \ +- ata_link_printk(link, KERN_INFO, fmt, ##__VA_ARGS__) ++ ata_link_printk(info, link, fmt, ##__VA_ARGS__) + #define ata_link_dbg(link, fmt, ...) \ +- ata_link_printk(link, KERN_DEBUG, fmt, ##__VA_ARGS__) ++ ata_link_printk(debug, link, fmt, ##__VA_ARGS__) ++ ++#define ata_dev_printk(level, dev, fmt, ...) \ ++ pr_ ## level("ata%u.%02u: " fmt, \ ++ (dev)->link->ap->print_id, \ ++ (dev)->link->pmp + (dev)->devno, \ ++ ##__VA_ARGS__) + + #define ata_dev_err(dev, fmt, ...) \ +- ata_dev_printk(dev, KERN_ERR, fmt, ##__VA_ARGS__) ++ ata_dev_printk(err, dev, fmt, ##__VA_ARGS__) + #define ata_dev_warn(dev, fmt, ...) \ +- ata_dev_printk(dev, KERN_WARNING, fmt, ##__VA_ARGS__) ++ ata_dev_printk(warn, dev, fmt, ##__VA_ARGS__) + #define ata_dev_notice(dev, fmt, ...) \ +- ata_dev_printk(dev, KERN_NOTICE, fmt, ##__VA_ARGS__) ++ ata_dev_printk(notice, dev, fmt, ##__VA_ARGS__) + #define ata_dev_info(dev, fmt, ...) \ +- ata_dev_printk(dev, KERN_INFO, fmt, ##__VA_ARGS__) ++ ata_dev_printk(info, dev, fmt, ##__VA_ARGS__) + #define ata_dev_dbg(dev, fmt, ...) \ +- ata_dev_printk(dev, KERN_DEBUG, fmt, ##__VA_ARGS__) ++ ata_dev_printk(debug, dev, fmt, ##__VA_ARGS__) + + void ata_print_version(const struct device *dev, const char *version); + +@@ -2049,11 +2065,8 @@ static inline u8 ata_wait_idle(struct ata_port *ap) + { + u8 status = ata_sff_busy_wait(ap, ATA_BUSY | ATA_DRQ, 1000); + +-#ifdef ATA_DEBUG + if (status != 0xff && (status & (ATA_BUSY | ATA_DRQ))) +- ata_port_printk(ap, KERN_DEBUG, "abnormal Status 0x%X\n", +- status); +-#endif ++ ata_port_dbg(ap, "abnormal Status 0x%X\n", status); + + return status; + } +diff --git a/include/linux/list.h b/include/linux/list.h +index f2af4b4aa4e9a..d206ae93c06da 100644 +--- a/include/linux/list.h ++++ b/include/linux/list.h +@@ -33,7 +33,7 @@ + static inline void INIT_LIST_HEAD(struct list_head *list) + { + WRITE_ONCE(list->next, list); +- list->prev = list; ++ WRITE_ONCE(list->prev, list); + } + + #ifdef CONFIG_DEBUG_LIST +@@ -256,8 +256,7 @@ static inline void list_bulk_move_tail(struct list_head *head, + * @list: the entry to test + * @head: the head of the list + */ +-static inline int list_is_first(const struct list_head *list, +- const struct list_head *head) ++static inline int list_is_first(const struct list_head *list, const struct list_head *head) + { + return list->prev == head; + } +@@ -267,12 +266,21 @@ static inline int list_is_first(const struct list_head *list, + * @list: the entry to test + * @head: the head of the list + */ +-static inline int list_is_last(const struct list_head *list, +- const struct list_head *head) ++static inline int list_is_last(const struct list_head *list, const struct list_head *head) + { + return list->next == head; + } + ++/** ++ * list_is_head - tests whether @list is the list @head ++ * @list: the entry to test ++ * @head: the head of the list ++ */ ++static inline int list_is_head(const struct list_head *list, const struct list_head *head) ++{ ++ return list == head; ++} ++ + /** + * list_empty - tests whether a list is empty + * @head: the list to test. +@@ -296,7 +304,7 @@ static inline int list_empty(const struct list_head *head) + static inline void list_del_init_careful(struct list_head *entry) + { + __list_del_entry(entry); +- entry->prev = entry; ++ WRITE_ONCE(entry->prev, entry); + smp_store_release(&entry->next, entry); + } + +@@ -316,7 +324,7 @@ static inline void list_del_init_careful(struct list_head *entry) + static inline int list_empty_careful(const struct list_head *head) + { + struct list_head *next = smp_load_acquire(&head->next); +- return (next == head) && (next == head->prev); ++ return list_is_head(next, head) && (next == READ_ONCE(head->prev)); + } + + /** +@@ -391,10 +399,9 @@ static inline void list_cut_position(struct list_head *list, + { + if (list_empty(head)) + return; +- if (list_is_singular(head) && +- (head->next != entry && head != entry)) ++ if (list_is_singular(head) && !list_is_head(entry, head) && (entry != head->next)) + return; +- if (entry == head) ++ if (list_is_head(entry, head)) + INIT_LIST_HEAD(list); + else + __list_cut_position(list, head, entry); +@@ -568,7 +575,17 @@ static inline void list_splice_tail_init(struct list_head *list, + * @head: the head for your list. + */ + #define list_for_each(pos, head) \ +- for (pos = (head)->next; pos != (head); pos = pos->next) ++ for (pos = (head)->next; !list_is_head(pos, (head)); pos = pos->next) ++ ++/** ++ * list_for_each_rcu - Iterate over a list in an RCU-safe fashion ++ * @pos: the &struct list_head to use as a loop cursor. ++ * @head: the head for your list. ++ */ ++#define list_for_each_rcu(pos, head) \ ++ for (pos = rcu_dereference((head)->next); \ ++ !list_is_head(pos, (head)); \ ++ pos = rcu_dereference(pos->next)) + + /** + * list_for_each_continue - continue iteration over a list +@@ -578,7 +595,7 @@ static inline void list_splice_tail_init(struct list_head *list, + * Continue to iterate over a list, continuing after the current position. + */ + #define list_for_each_continue(pos, head) \ +- for (pos = pos->next; pos != (head); pos = pos->next) ++ for (pos = pos->next; !list_is_head(pos, (head)); pos = pos->next) + + /** + * list_for_each_prev - iterate over a list backwards +@@ -586,7 +603,7 @@ static inline void list_splice_tail_init(struct list_head *list, + * @head: the head for your list. + */ + #define list_for_each_prev(pos, head) \ +- for (pos = (head)->prev; pos != (head); pos = pos->prev) ++ for (pos = (head)->prev; !list_is_head(pos, (head)); pos = pos->prev) + + /** + * list_for_each_safe - iterate over a list safe against removal of list entry +@@ -595,8 +612,9 @@ static inline void list_splice_tail_init(struct list_head *list, + * @head: the head for your list. + */ + #define list_for_each_safe(pos, n, head) \ +- for (pos = (head)->next, n = pos->next; pos != (head); \ +- pos = n, n = pos->next) ++ for (pos = (head)->next, n = pos->next; \ ++ !list_is_head(pos, (head)); \ ++ pos = n, n = pos->next) + + /** + * list_for_each_prev_safe - iterate over a list backwards safe against removal of list entry +@@ -606,7 +624,7 @@ static inline void list_splice_tail_init(struct list_head *list, + */ + #define list_for_each_prev_safe(pos, n, head) \ + for (pos = (head)->prev, n = pos->prev; \ +- pos != (head); \ ++ !list_is_head(pos, (head)); \ + pos = n, n = pos->prev) + + /** +diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h +index a98309c0121cb..bed63156b0521 100644 +--- a/include/linux/lockd/xdr.h ++++ b/include/linux/lockd/xdr.h +@@ -41,6 +41,8 @@ struct nlm_lock { + struct nfs_fh fh; + struct xdr_netobj oh; + u32 svid; ++ u64 lock_start; ++ u64 lock_len; + struct file_lock fl; + }; + +diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h +index 9fe165beb0f9e..aa0ecfc6cdb4b 100644 +--- a/include/linux/lockdep.h ++++ b/include/linux/lockdep.h +@@ -192,7 +192,7 @@ static inline void + lockdep_init_map_waits(struct lockdep_map *lock, const char *name, + struct lock_class_key *key, int subclass, u8 inner, u8 outer) + { +- lockdep_init_map_type(lock, name, key, subclass, inner, LD_WAIT_INV, LD_LOCK_NORMAL); ++ lockdep_init_map_type(lock, name, key, subclass, inner, outer, LD_LOCK_NORMAL); + } + + static inline void +@@ -215,24 +215,28 @@ static inline void lockdep_init_map(struct lockdep_map *lock, const char *name, + * or they are too narrow (they suffer from a false class-split): + */ + #define lockdep_set_class(lock, key) \ +- lockdep_init_map_waits(&(lock)->dep_map, #key, key, 0, \ +- (lock)->dep_map.wait_type_inner, \ +- (lock)->dep_map.wait_type_outer) ++ lockdep_init_map_type(&(lock)->dep_map, #key, key, 0, \ ++ (lock)->dep_map.wait_type_inner, \ ++ (lock)->dep_map.wait_type_outer, \ ++ (lock)->dep_map.lock_type) + + #define lockdep_set_class_and_name(lock, key, name) \ +- lockdep_init_map_waits(&(lock)->dep_map, name, key, 0, \ +- (lock)->dep_map.wait_type_inner, \ +- (lock)->dep_map.wait_type_outer) ++ lockdep_init_map_type(&(lock)->dep_map, name, key, 0, \ ++ (lock)->dep_map.wait_type_inner, \ ++ (lock)->dep_map.wait_type_outer, \ ++ (lock)->dep_map.lock_type) + + #define lockdep_set_class_and_subclass(lock, key, sub) \ +- lockdep_init_map_waits(&(lock)->dep_map, #key, key, sub,\ +- (lock)->dep_map.wait_type_inner, \ +- (lock)->dep_map.wait_type_outer) ++ lockdep_init_map_type(&(lock)->dep_map, #key, key, sub, \ ++ (lock)->dep_map.wait_type_inner, \ ++ (lock)->dep_map.wait_type_outer, \ ++ (lock)->dep_map.lock_type) + + #define lockdep_set_subclass(lock, sub) \ +- lockdep_init_map_waits(&(lock)->dep_map, #lock, (lock)->dep_map.key, sub,\ +- (lock)->dep_map.wait_type_inner, \ +- (lock)->dep_map.wait_type_outer) ++ lockdep_init_map_type(&(lock)->dep_map, #lock, (lock)->dep_map.key, sub,\ ++ (lock)->dep_map.wait_type_inner, \ ++ (lock)->dep_map.wait_type_outer, \ ++ (lock)->dep_map.lock_type) + + #define lockdep_set_novalidate_class(lock) \ + lockdep_set_class_and_name(lock, &__lockdep_no_validate__, #lock) +diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h +index 2adeea44c0d53..61590c1f2d333 100644 +--- a/include/linux/lsm_hook_defs.h ++++ b/include/linux/lsm_hook_defs.h +@@ -26,13 +26,13 @@ + * #undef LSM_HOOK + * }; + */ +-LSM_HOOK(int, 0, binder_set_context_mgr, struct task_struct *mgr) +-LSM_HOOK(int, 0, binder_transaction, struct task_struct *from, +- struct task_struct *to) +-LSM_HOOK(int, 0, binder_transfer_binder, struct task_struct *from, +- struct task_struct *to) +-LSM_HOOK(int, 0, binder_transfer_file, struct task_struct *from, +- struct task_struct *to, struct file *file) ++LSM_HOOK(int, 0, binder_set_context_mgr, const struct cred *mgr) ++LSM_HOOK(int, 0, binder_transaction, const struct cred *from, ++ const struct cred *to) ++LSM_HOOK(int, 0, binder_transfer_binder, const struct cred *from, ++ const struct cred *to) ++LSM_HOOK(int, 0, binder_transfer_file, const struct cred *from, ++ const struct cred *to, struct file *file) + LSM_HOOK(int, 0, ptrace_access_check, struct task_struct *child, + unsigned int mode) + LSM_HOOK(int, 0, ptrace_traceme, struct task_struct *parent) +diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h +index 5c4c5c0602cb7..59024618554e2 100644 +--- a/include/linux/lsm_hooks.h ++++ b/include/linux/lsm_hooks.h +@@ -1313,22 +1313,22 @@ + * + * @binder_set_context_mgr: + * Check whether @mgr is allowed to be the binder context manager. +- * @mgr contains the task_struct for the task being registered. ++ * @mgr contains the struct cred for the current binder process. + * Return 0 if permission is granted. + * @binder_transaction: + * Check whether @from is allowed to invoke a binder transaction call + * to @to. +- * @from contains the task_struct for the sending task. +- * @to contains the task_struct for the receiving task. ++ * @from contains the struct cred for the sending process. ++ * @to contains the struct cred for the receiving process. + * @binder_transfer_binder: + * Check whether @from is allowed to transfer a binder reference to @to. +- * @from contains the task_struct for the sending task. +- * @to contains the task_struct for the receiving task. ++ * @from contains the struct cred for the sending process. ++ * @to contains the struct cred for the receiving process. + * @binder_transfer_file: + * Check whether @from is allowed to transfer @file to @to. +- * @from contains the task_struct for the sending task. ++ * @from contains the struct cred for the sending process. + * @file contains the struct file being transferred. +- * @to contains the task_struct for the receiving task. ++ * @to contains the struct cred for the receiving process. + * + * @ptrace_access_check: + * Check permission before allowing the current process to trace the +diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h +index 36d6ce673503c..6fee33cb52f58 100644 +--- a/include/linux/mailbox_controller.h ++++ b/include/linux/mailbox_controller.h +@@ -83,6 +83,7 @@ struct mbox_controller { + const struct of_phandle_args *sp); + /* Internal to API */ + struct hrtimer poll_hrt; ++ spinlock_t poll_hrt_lock; + struct list_head node; + }; + +diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h +index 20f1e3ff60130..591bc4cefe1d6 100644 +--- a/include/linux/mbcache.h ++++ b/include/linux/mbcache.h +@@ -10,16 +10,29 @@ + + struct mb_cache; + ++/* Cache entry flags */ ++enum { ++ MBE_REFERENCED_B = 0, ++ MBE_REUSABLE_B ++}; ++ + struct mb_cache_entry { + /* List of entries in cache - protected by cache->c_list_lock */ + struct list_head e_list; +- /* Hash table list - protected by hash chain bitlock */ ++ /* ++ * Hash table list - protected by hash chain bitlock. The entry is ++ * guaranteed to be hashed while e_refcnt > 0. ++ */ + struct hlist_bl_node e_hash_list; ++ /* ++ * Entry refcount. Once it reaches zero, entry is unhashed and freed. ++ * While refcount > 0, the entry is guaranteed to stay in the hash and ++ * e.g. mb_cache_entry_try_delete() will fail. ++ */ + atomic_t e_refcnt; + /* Key in hash - stable during lifetime of the entry */ + u32 e_key; +- u32 e_referenced:1; +- u32 e_reusable:1; ++ unsigned long e_flags; + /* User provided value - stable during lifetime of the entry */ + u64 e_value; + }; +@@ -29,16 +42,24 @@ void mb_cache_destroy(struct mb_cache *cache); + + int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, + u64 value, bool reusable); +-void __mb_cache_entry_free(struct mb_cache_entry *entry); +-static inline int mb_cache_entry_put(struct mb_cache *cache, +- struct mb_cache_entry *entry) ++void __mb_cache_entry_free(struct mb_cache *cache, ++ struct mb_cache_entry *entry); ++void mb_cache_entry_wait_unused(struct mb_cache_entry *entry); ++static inline void mb_cache_entry_put(struct mb_cache *cache, ++ struct mb_cache_entry *entry) + { +- if (!atomic_dec_and_test(&entry->e_refcnt)) +- return 0; +- __mb_cache_entry_free(entry); +- return 1; ++ unsigned int cnt = atomic_dec_return(&entry->e_refcnt); ++ ++ if (cnt > 0) { ++ if (cnt <= 2) ++ wake_up_var(&entry->e_refcnt); ++ return; ++ } ++ __mb_cache_entry_free(cache, entry); + } + ++struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache, ++ u32 key, u64 value); + void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value); + struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key, + u64 value); +diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h +index 0661af17a7584..b0da04fe087bb 100644 +--- a/include/linux/mc146818rtc.h ++++ b/include/linux/mc146818rtc.h +@@ -86,6 +86,8 @@ struct cmos_rtc_board_info { + /* 2 values for divider stage reset, others for "testing purposes only" */ + # define RTC_DIV_RESET1 0x60 + # define RTC_DIV_RESET2 0x70 ++ /* In AMD BKDG bit 5 and 6 are reserved, bit 4 is for select dv0 bank */ ++# define RTC_AMD_BANK_SELECT 0x10 + /* Periodic intr. / Square wave rate select. 0=none, 1=32.8kHz,... 15=2Hz */ + # define RTC_RATE_SELECT 0x0F + +@@ -123,7 +125,11 @@ struct cmos_rtc_board_info { + #define RTC_IO_EXTENT_USED RTC_IO_EXTENT + #endif /* ARCH_RTC_LOCATION */ + +-unsigned int mc146818_get_time(struct rtc_time *time); ++bool mc146818_does_rtc_work(void); ++int mc146818_get_time(struct rtc_time *time); + int mc146818_set_time(struct rtc_time *time); + ++bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param), ++ void *param); ++ + #endif /* _MC146818RTC_H */ +diff --git a/include/linux/memblock.h b/include/linux/memblock.h +index 34de69b3b8bad..5df38332e4139 100644 +--- a/include/linux/memblock.h ++++ b/include/linux/memblock.h +@@ -388,8 +388,8 @@ phys_addr_t memblock_alloc_range_nid(phys_addr_t size, + phys_addr_t end, int nid, bool exact_nid); + phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid); + +-static inline phys_addr_t memblock_phys_alloc(phys_addr_t size, +- phys_addr_t align) ++static __always_inline phys_addr_t memblock_phys_alloc(phys_addr_t size, ++ phys_addr_t align) + { + return memblock_phys_alloc_range(size, align, 0, + MEMBLOCK_ALLOC_ACCESSIBLE); +diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h +index 3096c9a0ee014..4f189b17dafcc 100644 +--- a/include/linux/memcontrol.h ++++ b/include/linux/memcontrol.h +@@ -223,7 +223,7 @@ struct obj_cgroup { + struct mem_cgroup *memcg; + atomic_t nr_charged_bytes; + union { +- struct list_head list; ++ struct list_head list; /* protected by objcg_lock */ + struct rcu_head rcu; + }; + }; +@@ -320,7 +320,8 @@ struct mem_cgroup { + int kmemcg_id; + enum memcg_kmem_state kmem_state; + struct obj_cgroup __rcu *objcg; +- struct list_head objcg_list; /* list of inherited objcgs */ ++ /* list of inherited objcgs, protected by objcg_lock */ ++ struct list_head objcg_list; + #endif + + MEMCG_PADDING(_pad2_); +@@ -965,19 +966,30 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg, + + static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) + { +- return READ_ONCE(memcg->vmstats.state[idx]); ++ long x = READ_ONCE(memcg->vmstats.state[idx]); ++#ifdef CONFIG_SMP ++ if (x < 0) ++ x = 0; ++#endif ++ return x; + } + + static inline unsigned long lruvec_page_state(struct lruvec *lruvec, + enum node_stat_item idx) + { + struct mem_cgroup_per_node *pn; ++ long x; + + if (mem_cgroup_disabled()) + return node_page_state(lruvec_pgdat(lruvec), idx); + + pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); +- return READ_ONCE(pn->lruvec_stats.state[idx]); ++ x = READ_ONCE(pn->lruvec_stats.state[idx]); ++#ifdef CONFIG_SMP ++ if (x < 0) ++ x = 0; ++#endif ++ return x; + } + + static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, +@@ -1001,6 +1013,7 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, + } + + void mem_cgroup_flush_stats(void); ++void mem_cgroup_flush_stats_delayed(void); + + void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, + int val); +@@ -1421,6 +1434,10 @@ static inline void mem_cgroup_flush_stats(void) + { + } + ++static inline void mem_cgroup_flush_stats_delayed(void) ++{ ++} ++ + static inline void __mod_memcg_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx, int val) + { +diff --git a/include/linux/memregion.h b/include/linux/memregion.h +index e11595256cac0..c04c4fd2e2091 100644 +--- a/include/linux/memregion.h ++++ b/include/linux/memregion.h +@@ -16,7 +16,7 @@ static inline int memregion_alloc(gfp_t gfp) + { + return -ENOMEM; + } +-void memregion_free(int id) ++static inline void memregion_free(int id) + { + } + #endif +diff --git a/include/linux/memremap.h b/include/linux/memremap.h +index c0e9d35889e8d..a8bc588fe7aa8 100644 +--- a/include/linux/memremap.h ++++ b/include/linux/memremap.h +@@ -72,16 +72,6 @@ struct dev_pagemap_ops { + */ + void (*page_free)(struct page *page); + +- /* +- * Transition the refcount in struct dev_pagemap to the dead state. +- */ +- void (*kill)(struct dev_pagemap *pgmap); +- +- /* +- * Wait for refcount in struct dev_pagemap to be idle and reap it. +- */ +- void (*cleanup)(struct dev_pagemap *pgmap); +- + /* + * Used for private (un-addressable) device memory only. Must migrate + * the page back to a CPU accessible page. +@@ -95,8 +85,7 @@ struct dev_pagemap_ops { + * struct dev_pagemap - metadata for ZONE_DEVICE mappings + * @altmap: pre-allocated/reserved memory for vmemmap allocations + * @ref: reference count that pins the devm_memremap_pages() mapping +- * @internal_ref: internal reference if @ref is not provided by the caller +- * @done: completion for @internal_ref ++ * @done: completion for @ref + * @type: memory type: see MEMORY_* in memory_hotplug.h + * @flags: PGMAP_* flags to specify defailed behavior + * @ops: method table +@@ -109,8 +98,7 @@ struct dev_pagemap_ops { + */ + struct dev_pagemap { + struct vmem_altmap altmap; +- struct percpu_ref *ref; +- struct percpu_ref internal_ref; ++ struct percpu_ref ref; + struct completion done; + enum memory_type type; + unsigned int flags; +@@ -191,7 +179,7 @@ static inline unsigned long memremap_compat_align(void) + static inline void put_dev_pagemap(struct dev_pagemap *pgmap) + { + if (pgmap) +- percpu_ref_put(pgmap->ref); ++ percpu_ref_put(&pgmap->ref); + } + + #endif /* _LINUX_MEMREMAP_H_ */ +diff --git a/include/linux/mfd/t7l66xb.h b/include/linux/mfd/t7l66xb.h +index 69632c1b07bd8..ae3e7a5c5219b 100644 +--- a/include/linux/mfd/t7l66xb.h ++++ b/include/linux/mfd/t7l66xb.h +@@ -12,7 +12,6 @@ + + struct t7l66xb_platform_data { + int (*enable)(struct platform_device *dev); +- int (*disable)(struct platform_device *dev); + int (*suspend)(struct platform_device *dev); + int (*resume)(struct platform_device *dev); + +diff --git a/include/linux/mhi.h b/include/linux/mhi.h +index 7239858790353..a5cc4cdf9cc86 100644 +--- a/include/linux/mhi.h ++++ b/include/linux/mhi.h +@@ -663,6 +663,19 @@ int mhi_pm_suspend(struct mhi_controller *mhi_cntrl); + */ + int mhi_pm_resume(struct mhi_controller *mhi_cntrl); + ++/** ++ * mhi_pm_resume_force - Force resume MHI from suspended state ++ * @mhi_cntrl: MHI controller ++ * ++ * Resume the device irrespective of its MHI state. As per the MHI spec, devices ++ * has to be in M3 state during resume. But some devices seem to be in a ++ * different MHI state other than M3 but they continue working fine if allowed. ++ * This API is intented to be used for such devices. ++ * ++ * Return: 0 if the resume succeeds, a negative error code otherwise ++ */ ++int mhi_pm_resume_force(struct mhi_controller *mhi_cntrl); ++ + /** + * mhi_download_rddm_image - Download ramdump image from device for + * debugging purpose. +diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h +index 66eaf0aa7f698..3e72133545caf 100644 +--- a/include/linux/mlx5/device.h ++++ b/include/linux/mlx5/device.h +@@ -1074,6 +1074,11 @@ enum { + MLX5_VPORT_ADMIN_STATE_AUTO = 0x2, + }; + ++enum { ++ MLX5_VPORT_CVLAN_INSERT_WHEN_NO_CVLAN = 0x1, ++ MLX5_VPORT_CVLAN_INSERT_ALWAYS = 0x3, ++}; ++ + enum { + MLX5_L3_PROT_TYPE_IPV4 = 0, + MLX5_L3_PROT_TYPE_IPV6 = 1, +diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h +index f17d2101af7a0..26095c0fd781d 100644 +--- a/include/linux/mlx5/driver.h ++++ b/include/linux/mlx5/driver.h +@@ -759,6 +759,7 @@ struct mlx5_core_dev { + enum mlx5_device_state state; + /* sync interface state */ + struct mutex intf_state_mutex; ++ struct lock_class_key lock_key; + unsigned long intf_state; + struct mlx5_priv priv; + struct mlx5_profile profile; +@@ -965,7 +966,7 @@ void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode); + struct mlx5_async_ctx { + struct mlx5_core_dev *dev; + atomic_t num_inflight; +- struct wait_queue_head wait; ++ struct completion inflight_done; + }; + + struct mlx5_async_work; +diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h +index 4ab5c1fc1270d..a09ed4c8361b6 100644 +--- a/include/linux/mlx5/eswitch.h ++++ b/include/linux/mlx5/eswitch.h +@@ -136,13 +136,13 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw, + ESW_TUN_OPTS_SLOW_TABLE_GOTO_VPORT) + #define ESW_TUN_SLOW_TABLE_GOTO_VPORT_MARK ESW_TUN_OPTS_MASK + +-u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev); ++u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev); + u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev); + struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw); + + #else /* CONFIG_MLX5_ESWITCH */ + +-static inline u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev) ++static inline u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev) + { + return MLX5_ESWITCH_NONE; + } +diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h +index 993204a6c1a13..49ea0004109e1 100644 +--- a/include/linux/mlx5/mlx5_ifc.h ++++ b/include/linux/mlx5/mlx5_ifc.h +@@ -822,7 +822,8 @@ struct mlx5_ifc_e_switch_cap_bits { + u8 vport_svlan_insert[0x1]; + u8 vport_cvlan_insert_if_not_exist[0x1]; + u8 vport_cvlan_insert_overwrite[0x1]; +- u8 reserved_at_5[0x2]; ++ u8 reserved_at_5[0x1]; ++ u8 vport_cvlan_insert_always[0x1]; + u8 esw_shared_ingress_acl[0x1]; + u8 esw_uplink_ingress_acl[0x1]; + u8 root_ft_on_other_esw[0x1]; +@@ -3309,8 +3310,8 @@ enum { + }; + + enum { +- MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO = 0x1, +- MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO = 0x2, ++ MLX5_TIRC_PACKET_MERGE_MASK_IPV4_LRO = BIT(0), ++ MLX5_TIRC_PACKET_MERGE_MASK_IPV6_LRO = BIT(1), + }; + + enum { +@@ -3335,7 +3336,7 @@ struct mlx5_ifc_tirc_bits { + + u8 reserved_at_80[0x4]; + u8 lro_timeout_period_usecs[0x10]; +- u8 lro_enable_mask[0x4]; ++ u8 packet_merge_mask[0x4]; + u8 lro_max_ip_payload_size[0x8]; + + u8 reserved_at_a0[0x40]; +@@ -5028,12 +5029,11 @@ struct mlx5_ifc_query_qp_out_bits { + + u8 syndrome[0x20]; + +- u8 reserved_at_40[0x20]; +- u8 ece[0x20]; ++ u8 reserved_at_40[0x40]; + + u8 opt_param_mask[0x20]; + +- u8 reserved_at_a0[0x20]; ++ u8 ece[0x20]; + + struct mlx5_ifc_qpc_bits qpc; + +@@ -6369,7 +6369,7 @@ struct mlx5_ifc_modify_tir_bitmask_bits { + u8 reserved_at_3c[0x1]; + u8 hash[0x1]; + u8 reserved_at_3e[0x1]; +- u8 lro[0x1]; ++ u8 packet_merge[0x1]; + }; + + struct mlx5_ifc_modify_tir_out_bits { +@@ -9508,8 +9508,8 @@ struct mlx5_ifc_bufferx_reg_bits { + u8 reserved_at_0[0x6]; + u8 lossy[0x1]; + u8 epsb[0x1]; +- u8 reserved_at_8[0xc]; +- u8 size[0xc]; ++ u8 reserved_at_8[0x8]; ++ u8 size[0x10]; + + u8 xoff_threshold[0x10]; + u8 xon_threshold[0x10]; +diff --git a/include/linux/mm.h b/include/linux/mm.h +index 73a52aba448f9..e4e1817bb3b89 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -1511,11 +1511,18 @@ static inline u8 page_kasan_tag(const struct page *page) + + static inline void page_kasan_tag_set(struct page *page, u8 tag) + { +- if (kasan_enabled()) { +- tag ^= 0xff; +- page->flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT); +- page->flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT; +- } ++ unsigned long old_flags, flags; ++ ++ if (!kasan_enabled()) ++ return; ++ ++ tag ^= 0xff; ++ old_flags = READ_ONCE(page->flags); ++ do { ++ flags = old_flags; ++ flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT); ++ flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT; ++ } while (unlikely(!try_cmpxchg(&page->flags, &old_flags, flags))); + } + + static inline void page_kasan_tag_reset(struct page *page) +@@ -2600,6 +2607,7 @@ extern int install_special_mapping(struct mm_struct *mm, + unsigned long flags, struct page **pages); + + unsigned long randomize_stack_top(unsigned long stack_top); ++unsigned long randomize_page(unsigned long start, unsigned long range); + + extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); + +@@ -2851,7 +2859,8 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, + #define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ + #define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO + * and return without waiting upon it */ +-#define FOLL_POPULATE 0x40 /* fault in page */ ++#define FOLL_POPULATE 0x40 /* fault in pages (with FOLL_MLOCK) */ ++#define FOLL_NOFAULT 0x80 /* do not fault in pages */ + #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ + #define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ + #define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ +@@ -3123,6 +3132,14 @@ extern int sysctl_memory_failure_recovery; + extern void shake_page(struct page *p); + extern atomic_long_t num_poisoned_pages __read_mostly; + extern int soft_offline_page(unsigned long pfn, int flags); ++#ifdef CONFIG_MEMORY_FAILURE ++extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags); ++#else ++static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags) ++{ ++ return 0; ++} ++#endif + + + /* +diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h +index 37f9758751020..12c7f2d3e2107 100644 +--- a/include/linux/mmc/card.h ++++ b/include/linux/mmc/card.h +@@ -292,6 +292,7 @@ struct mmc_card { + #define MMC_QUIRK_BROKEN_IRQ_POLLING (1<<11) /* Polling SDIO_CCCR_INTx could create a fake interrupt */ + #define MMC_QUIRK_TRIM_BROKEN (1<<12) /* Skip trim */ + #define MMC_QUIRK_BROKEN_HPI (1<<13) /* Disable broken HPI support */ ++#define MMC_QUIRK_BROKEN_SD_DISCARD (1<<14) /* Disable broken SD discard support */ + + bool reenable_cmdq; /* Re-enable Command Queue */ + +diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h +index d9a65c6a8816f..545578fb814b0 100644 +--- a/include/linux/mmc/mmc.h ++++ b/include/linux/mmc/mmc.h +@@ -445,7 +445,7 @@ static inline bool mmc_ready_for_data(u32 status) + #define MMC_SECURE_TRIM1_ARG 0x80000001 + #define MMC_SECURE_TRIM2_ARG 0x80008000 + #define MMC_SECURE_ARGS 0x80000000 +-#define MMC_TRIM_ARGS 0x00008001 ++#define MMC_TRIM_OR_DISCARD_ARGS 0x00008003 + + #define mmc_driver_type_mask(n) (1 << (n)) + +diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h +index 6a1d79d84675a..6ba1002165302 100644 +--- a/include/linux/mmzone.h ++++ b/include/linux/mmzone.h +@@ -1031,6 +1031,15 @@ static inline int is_highmem_idx(enum zone_type idx) + #endif + } + ++#ifdef CONFIG_ZONE_DMA ++bool has_managed_dma(void); ++#else ++static inline bool has_managed_dma(void) ++{ ++ return false; ++} ++#endif ++ + /** + * is_highmem - helper function to quickly check if a struct zone is a + * highmem zone or not. This is an attempt to keep references +@@ -1342,13 +1351,16 @@ static inline unsigned long *section_to_usemap(struct mem_section *ms) + + static inline struct mem_section *__nr_to_section(unsigned long nr) + { ++ unsigned long root = SECTION_NR_TO_ROOT(nr); ++ ++ if (unlikely(root >= NR_SECTION_ROOTS)) ++ return NULL; ++ + #ifdef CONFIG_SPARSEMEM_EXTREME +- if (!mem_section) ++ if (!mem_section || !mem_section[root]) + return NULL; + #endif +- if (!mem_section[SECTION_NR_TO_ROOT(nr)]) +- return NULL; +- return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; ++ return &mem_section[root][nr & SECTION_ROOT_MASK]; + } + extern size_t mem_section_usage_size(void); + +diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h +new file mode 100644 +index 0000000000000..ee5a217de2a88 +--- /dev/null ++++ b/include/linux/mnt_idmapping.h +@@ -0,0 +1,234 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef _LINUX_MNT_IDMAPPING_H ++#define _LINUX_MNT_IDMAPPING_H ++ ++#include <linux/types.h> ++#include <linux/uidgid.h> ++ ++struct user_namespace; ++/* ++ * Carries the initial idmapping of 0:0:4294967295 which is an identity ++ * mapping. This means that {g,u}id 0 is mapped to {g,u}id 0, {g,u}id 1 is ++ * mapped to {g,u}id 1, [...], {g,u}id 1000 to {g,u}id 1000, [...]. ++ */ ++extern struct user_namespace init_user_ns; ++ ++/** ++ * initial_idmapping - check whether this is the initial mapping ++ * @ns: idmapping to check ++ * ++ * Check whether this is the initial mapping, mapping 0 to 0, 1 to 1, ++ * [...], 1000 to 1000 [...]. ++ * ++ * Return: true if this is the initial mapping, false if not. ++ */ ++static inline bool initial_idmapping(const struct user_namespace *ns) ++{ ++ return ns == &init_user_ns; ++} ++ ++/** ++ * no_idmapping - check whether we can skip remapping a kuid/gid ++ * @mnt_userns: the mount's idmapping ++ * @fs_userns: the filesystem's idmapping ++ * ++ * This function can be used to check whether a remapping between two ++ * idmappings is required. ++ * An idmapped mount is a mount that has an idmapping attached to it that ++ * is different from the filsystem's idmapping and the initial idmapping. ++ * If the initial mapping is used or the idmapping of the mount and the ++ * filesystem are identical no remapping is required. ++ * ++ * Return: true if remapping can be skipped, false if not. ++ */ ++static inline bool no_idmapping(const struct user_namespace *mnt_userns, ++ const struct user_namespace *fs_userns) ++{ ++ return initial_idmapping(mnt_userns) || mnt_userns == fs_userns; ++} ++ ++/** ++ * mapped_kuid_fs - map a filesystem kuid into a mnt_userns ++ * @mnt_userns: the mount's idmapping ++ * @fs_userns: the filesystem's idmapping ++ * @kuid : kuid to be mapped ++ * ++ * Take a @kuid and remap it from @fs_userns into @mnt_userns. Use this ++ * function when preparing a @kuid to be reported to userspace. ++ * ++ * If no_idmapping() determines that this is not an idmapped mount we can ++ * simply return @kuid unchanged. ++ * If initial_idmapping() tells us that the filesystem is not mounted with an ++ * idmapping we know the value of @kuid won't change when calling ++ * from_kuid() so we can simply retrieve the value via __kuid_val() ++ * directly. ++ * ++ * Return: @kuid mapped according to @mnt_userns. ++ * If @kuid has no mapping in either @mnt_userns or @fs_userns INVALID_UID is ++ * returned. ++ */ ++static inline kuid_t mapped_kuid_fs(struct user_namespace *mnt_userns, ++ struct user_namespace *fs_userns, ++ kuid_t kuid) ++{ ++ uid_t uid; ++ ++ if (no_idmapping(mnt_userns, fs_userns)) ++ return kuid; ++ if (initial_idmapping(fs_userns)) ++ uid = __kuid_val(kuid); ++ else ++ uid = from_kuid(fs_userns, kuid); ++ if (uid == (uid_t)-1) ++ return INVALID_UID; ++ return make_kuid(mnt_userns, uid); ++} ++ ++/** ++ * mapped_kgid_fs - map a filesystem kgid into a mnt_userns ++ * @mnt_userns: the mount's idmapping ++ * @fs_userns: the filesystem's idmapping ++ * @kgid : kgid to be mapped ++ * ++ * Take a @kgid and remap it from @fs_userns into @mnt_userns. Use this ++ * function when preparing a @kgid to be reported to userspace. ++ * ++ * If no_idmapping() determines that this is not an idmapped mount we can ++ * simply return @kgid unchanged. ++ * If initial_idmapping() tells us that the filesystem is not mounted with an ++ * idmapping we know the value of @kgid won't change when calling ++ * from_kgid() so we can simply retrieve the value via __kgid_val() ++ * directly. ++ * ++ * Return: @kgid mapped according to @mnt_userns. ++ * If @kgid has no mapping in either @mnt_userns or @fs_userns INVALID_GID is ++ * returned. ++ */ ++static inline kgid_t mapped_kgid_fs(struct user_namespace *mnt_userns, ++ struct user_namespace *fs_userns, ++ kgid_t kgid) ++{ ++ gid_t gid; ++ ++ if (no_idmapping(mnt_userns, fs_userns)) ++ return kgid; ++ if (initial_idmapping(fs_userns)) ++ gid = __kgid_val(kgid); ++ else ++ gid = from_kgid(fs_userns, kgid); ++ if (gid == (gid_t)-1) ++ return INVALID_GID; ++ return make_kgid(mnt_userns, gid); ++} ++ ++/** ++ * mapped_kuid_user - map a user kuid into a mnt_userns ++ * @mnt_userns: the mount's idmapping ++ * @fs_userns: the filesystem's idmapping ++ * @kuid : kuid to be mapped ++ * ++ * Use the idmapping of @mnt_userns to remap a @kuid into @fs_userns. Use this ++ * function when preparing a @kuid to be written to disk or inode. ++ * ++ * If no_idmapping() determines that this is not an idmapped mount we can ++ * simply return @kuid unchanged. ++ * If initial_idmapping() tells us that the filesystem is not mounted with an ++ * idmapping we know the value of @kuid won't change when calling ++ * make_kuid() so we can simply retrieve the value via KUIDT_INIT() ++ * directly. ++ * ++ * Return: @kuid mapped according to @mnt_userns. ++ * If @kuid has no mapping in either @mnt_userns or @fs_userns INVALID_UID is ++ * returned. ++ */ ++static inline kuid_t mapped_kuid_user(struct user_namespace *mnt_userns, ++ struct user_namespace *fs_userns, ++ kuid_t kuid) ++{ ++ uid_t uid; ++ ++ if (no_idmapping(mnt_userns, fs_userns)) ++ return kuid; ++ uid = from_kuid(mnt_userns, kuid); ++ if (uid == (uid_t)-1) ++ return INVALID_UID; ++ if (initial_idmapping(fs_userns)) ++ return KUIDT_INIT(uid); ++ return make_kuid(fs_userns, uid); ++} ++ ++/** ++ * mapped_kgid_user - map a user kgid into a mnt_userns ++ * @mnt_userns: the mount's idmapping ++ * @fs_userns: the filesystem's idmapping ++ * @kgid : kgid to be mapped ++ * ++ * Use the idmapping of @mnt_userns to remap a @kgid into @fs_userns. Use this ++ * function when preparing a @kgid to be written to disk or inode. ++ * ++ * If no_idmapping() determines that this is not an idmapped mount we can ++ * simply return @kgid unchanged. ++ * If initial_idmapping() tells us that the filesystem is not mounted with an ++ * idmapping we know the value of @kgid won't change when calling ++ * make_kgid() so we can simply retrieve the value via KGIDT_INIT() ++ * directly. ++ * ++ * Return: @kgid mapped according to @mnt_userns. ++ * If @kgid has no mapping in either @mnt_userns or @fs_userns INVALID_GID is ++ * returned. ++ */ ++static inline kgid_t mapped_kgid_user(struct user_namespace *mnt_userns, ++ struct user_namespace *fs_userns, ++ kgid_t kgid) ++{ ++ gid_t gid; ++ ++ if (no_idmapping(mnt_userns, fs_userns)) ++ return kgid; ++ gid = from_kgid(mnt_userns, kgid); ++ if (gid == (gid_t)-1) ++ return INVALID_GID; ++ if (initial_idmapping(fs_userns)) ++ return KGIDT_INIT(gid); ++ return make_kgid(fs_userns, gid); ++} ++ ++/** ++ * mapped_fsuid - return caller's fsuid mapped up into a mnt_userns ++ * @mnt_userns: the mount's idmapping ++ * @fs_userns: the filesystem's idmapping ++ * ++ * Use this helper to initialize a new vfs or filesystem object based on ++ * the caller's fsuid. A common example is initializing the i_uid field of ++ * a newly allocated inode triggered by a creation event such as mkdir or ++ * O_CREAT. Other examples include the allocation of quotas for a specific ++ * user. ++ * ++ * Return: the caller's current fsuid mapped up according to @mnt_userns. ++ */ ++static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns, ++ struct user_namespace *fs_userns) ++{ ++ return mapped_kuid_user(mnt_userns, fs_userns, current_fsuid()); ++} ++ ++/** ++ * mapped_fsgid - return caller's fsgid mapped up into a mnt_userns ++ * @mnt_userns: the mount's idmapping ++ * @fs_userns: the filesystem's idmapping ++ * ++ * Use this helper to initialize a new vfs or filesystem object based on ++ * the caller's fsgid. A common example is initializing the i_gid field of ++ * a newly allocated inode triggered by a creation event such as mkdir or ++ * O_CREAT. Other examples include the allocation of quotas for a specific ++ * user. ++ * ++ * Return: the caller's current fsgid mapped up according to @mnt_userns. ++ */ ++static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns, ++ struct user_namespace *fs_userns) ++{ ++ return mapped_kgid_user(mnt_userns, fs_userns, current_fsgid()); ++} ++ ++#endif /* _LINUX_MNT_IDMAPPING_H */ +diff --git a/include/linux/msi.h b/include/linux/msi.h +index 49cf6eb222e76..e616f94c7c585 100644 +--- a/include/linux/msi.h ++++ b/include/linux/msi.h +@@ -148,7 +148,7 @@ struct msi_desc { + u8 is_msix : 1; + u8 multiple : 3; + u8 multi_cap : 3; +- u8 maskbit : 1; ++ u8 can_mask : 1; + u8 is_64 : 1; + u8 is_virtual : 1; + u16 entry_nr; +diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h +index fd1ecb8211060..d88bb56c18e2e 100644 +--- a/include/linux/mtd/cfi.h ++++ b/include/linux/mtd/cfi.h +@@ -286,6 +286,7 @@ struct cfi_private { + map_word sector_erase_cmd; + unsigned long chipshift; /* Because they're of the same type */ + const char *im_name; /* inter_module name for cmdset_setup */ ++ unsigned long quirks; + struct flchip chips[]; /* per-chip data structure for each chip */ + }; + +diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h +index 88227044fc86c..8a2c60235ebb8 100644 +--- a/include/linux/mtd/mtd.h ++++ b/include/linux/mtd/mtd.h +@@ -394,10 +394,8 @@ struct mtd_info { + /* List of partitions attached to this MTD device */ + struct list_head partitions; + +- union { +- struct mtd_part part; +- struct mtd_master master; +- }; ++ struct mtd_part part; ++ struct mtd_master master; + }; + + static inline struct mtd_info *mtd_get_master(struct mtd_info *mtd) +diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h +index b2f9dd3cbd695..dcf90144d70b7 100644 +--- a/include/linux/mtd/rawnand.h ++++ b/include/linux/mtd/rawnand.h +@@ -1240,6 +1240,7 @@ struct nand_secure_region { + * @lock: Lock protecting the suspended field. Also used to serialize accesses + * to the NAND device + * @suspended: Set to 1 when the device is suspended, 0 when it's not ++ * @resume_wq: wait queue to sleep if rawnand is in suspended state. + * @cur_cs: Currently selected target. -1 means no target selected, otherwise we + * should always have cur_cs >= 0 && cur_cs < nanddev_ntargets(). + * NAND Controller drivers should not modify this value, but they're +@@ -1294,6 +1295,7 @@ struct nand_chip { + /* Internals */ + struct mutex lock; + unsigned int suspended : 1; ++ wait_queue_head_t resume_wq; + int cur_cs; + int read_retries; + struct nand_secure_region *secure_regions; +@@ -1539,6 +1541,8 @@ int nand_read_data_op(struct nand_chip *chip, void *buf, unsigned int len, + bool force_8bit, bool check_only); + int nand_write_data_op(struct nand_chip *chip, const void *buf, + unsigned int len, bool force_8bit); ++int nand_read_page_hwecc_oob_first(struct nand_chip *chip, uint8_t *buf, ++ int oob_required, int page); + + /* Scan and identify a NAND device */ + int nand_scan_with_ids(struct nand_chip *chip, unsigned int max_chips, +diff --git a/include/linux/namei.h b/include/linux/namei.h +index e89329bb3134e..caeb08a98536c 100644 +--- a/include/linux/namei.h ++++ b/include/linux/namei.h +@@ -69,6 +69,12 @@ extern struct dentry *lookup_one_len(const char *, struct dentry *, int); + extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int); + extern struct dentry *lookup_positive_unlocked(const char *, struct dentry *, int); + struct dentry *lookup_one(struct user_namespace *, const char *, struct dentry *, int); ++struct dentry *lookup_one_unlocked(struct user_namespace *mnt_userns, ++ const char *name, struct dentry *base, ++ int len); ++struct dentry *lookup_one_positive_unlocked(struct user_namespace *mnt_userns, ++ const char *name, ++ struct dentry *base, int len); + + extern int follow_down_one(struct path *); + extern int follow_down(struct path *); +diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h +index 2c6b9e4162254..7c2d77d75a888 100644 +--- a/include/linux/netdev_features.h ++++ b/include/linux/netdev_features.h +@@ -169,7 +169,7 @@ enum { + #define NETIF_F_HW_HSR_FWD __NETIF_F(HW_HSR_FWD) + #define NETIF_F_HW_HSR_DUP __NETIF_F(HW_HSR_DUP) + +-/* Finds the next feature with the highest number of the range of start till 0. ++/* Finds the next feature with the highest number of the range of start-1 till 0. + */ + static inline int find_next_netdev_feature(u64 feature, unsigned long start) + { +@@ -188,7 +188,7 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start) + for ((bit) = find_next_netdev_feature((mask_addr), \ + NETDEV_FEATURE_COUNT); \ + (bit) >= 0; \ +- (bit) = find_next_netdev_feature((mask_addr), (bit) - 1)) ++ (bit) = find_next_netdev_feature((mask_addr), (bit))) + + /* Features valid for ethtool to change */ + /* = all defined minus driver/device-class-related */ +diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h +index d79163208dfdb..3a75d644a1204 100644 +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -167,31 +167,38 @@ static inline bool dev_xmit_complete(int rc) + * (unsigned long) so they can be read and written atomically. + */ + ++#define NET_DEV_STAT(FIELD) \ ++ union { \ ++ unsigned long FIELD; \ ++ atomic_long_t __##FIELD; \ ++ } ++ + struct net_device_stats { +- unsigned long rx_packets; +- unsigned long tx_packets; +- unsigned long rx_bytes; +- unsigned long tx_bytes; +- unsigned long rx_errors; +- unsigned long tx_errors; +- unsigned long rx_dropped; +- unsigned long tx_dropped; +- unsigned long multicast; +- unsigned long collisions; +- unsigned long rx_length_errors; +- unsigned long rx_over_errors; +- unsigned long rx_crc_errors; +- unsigned long rx_frame_errors; +- unsigned long rx_fifo_errors; +- unsigned long rx_missed_errors; +- unsigned long tx_aborted_errors; +- unsigned long tx_carrier_errors; +- unsigned long tx_fifo_errors; +- unsigned long tx_heartbeat_errors; +- unsigned long tx_window_errors; +- unsigned long rx_compressed; +- unsigned long tx_compressed; ++ NET_DEV_STAT(rx_packets); ++ NET_DEV_STAT(tx_packets); ++ NET_DEV_STAT(rx_bytes); ++ NET_DEV_STAT(tx_bytes); ++ NET_DEV_STAT(rx_errors); ++ NET_DEV_STAT(tx_errors); ++ NET_DEV_STAT(rx_dropped); ++ NET_DEV_STAT(tx_dropped); ++ NET_DEV_STAT(multicast); ++ NET_DEV_STAT(collisions); ++ NET_DEV_STAT(rx_length_errors); ++ NET_DEV_STAT(rx_over_errors); ++ NET_DEV_STAT(rx_crc_errors); ++ NET_DEV_STAT(rx_frame_errors); ++ NET_DEV_STAT(rx_fifo_errors); ++ NET_DEV_STAT(rx_missed_errors); ++ NET_DEV_STAT(tx_aborted_errors); ++ NET_DEV_STAT(tx_carrier_errors); ++ NET_DEV_STAT(tx_fifo_errors); ++ NET_DEV_STAT(tx_heartbeat_errors); ++ NET_DEV_STAT(tx_window_errors); ++ NET_DEV_STAT(rx_compressed); ++ NET_DEV_STAT(tx_compressed); + }; ++#undef NET_DEV_STAT + + + #include <linux/cache.h> +@@ -626,9 +633,23 @@ extern int sysctl_devconf_inherit_init_net; + */ + static inline bool net_has_fallback_tunnels(const struct net *net) + { +- return !IS_ENABLED(CONFIG_SYSCTL) || +- !sysctl_fb_tunnels_only_for_init_net || +- (net == &init_net && sysctl_fb_tunnels_only_for_init_net == 1); ++#if IS_ENABLED(CONFIG_SYSCTL) ++ int fb_tunnels_only_for_init_net = READ_ONCE(sysctl_fb_tunnels_only_for_init_net); ++ ++ return !fb_tunnels_only_for_init_net || ++ (net_eq(net, &init_net) && fb_tunnels_only_for_init_net == 1); ++#else ++ return true; ++#endif ++} ++ ++static inline int net_inherit_devconf(void) ++{ ++#if IS_ENABLED(CONFIG_SYSCTL) ++ return READ_ONCE(sysctl_devconf_inherit_init_net); ++#else ++ return 0; ++#endif + } + + static inline int netdev_queue_numa_node_read(const struct netdev_queue *q) +@@ -887,7 +908,7 @@ struct net_device_path_stack { + + struct net_device_path_ctx { + const struct net_device *dev; +- const u8 *daddr; ++ u8 daddr[ETH_ALEN]; + + int num_vlans; + struct { +@@ -1645,7 +1666,7 @@ enum netdev_priv_flags { + IFF_FAILOVER_SLAVE = 1<<28, + IFF_L3MDEV_RX_HANDLER = 1<<29, + IFF_LIVE_RENAME_OK = 1<<30, +- IFF_TX_SKB_NO_LINEAR = 1<<31, ++ IFF_TX_SKB_NO_LINEAR = BIT_ULL(31), + }; + + #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN +@@ -2149,7 +2170,7 @@ struct net_device { + struct netdev_queue *_tx ____cacheline_aligned_in_smp; + unsigned int num_tx_queues; + unsigned int real_num_tx_queues; +- struct Qdisc *qdisc; ++ struct Qdisc __rcu *qdisc; + unsigned int tx_queue_len; + spinlock_t tx_global_lock; + +@@ -2636,6 +2657,7 @@ struct packet_type { + struct net_device *); + bool (*id_match)(struct packet_type *ptype, + struct sock *sk); ++ struct net *af_packet_net; + void *af_packet_priv; + struct list_head list; + }; +@@ -4403,7 +4425,8 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits) + static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) + { + spin_lock(&txq->_xmit_lock); +- txq->xmit_lock_owner = cpu; ++ /* Pairs with READ_ONCE() in __dev_queue_xmit() */ ++ WRITE_ONCE(txq->xmit_lock_owner, cpu); + } + + static inline bool __netif_tx_acquire(struct netdev_queue *txq) +@@ -4420,26 +4443,32 @@ static inline void __netif_tx_release(struct netdev_queue *txq) + static inline void __netif_tx_lock_bh(struct netdev_queue *txq) + { + spin_lock_bh(&txq->_xmit_lock); +- txq->xmit_lock_owner = smp_processor_id(); ++ /* Pairs with READ_ONCE() in __dev_queue_xmit() */ ++ WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id()); + } + + static inline bool __netif_tx_trylock(struct netdev_queue *txq) + { + bool ok = spin_trylock(&txq->_xmit_lock); +- if (likely(ok)) +- txq->xmit_lock_owner = smp_processor_id(); ++ ++ if (likely(ok)) { ++ /* Pairs with READ_ONCE() in __dev_queue_xmit() */ ++ WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id()); ++ } + return ok; + } + + static inline void __netif_tx_unlock(struct netdev_queue *txq) + { +- txq->xmit_lock_owner = -1; ++ /* Pairs with READ_ONCE() in __dev_queue_xmit() */ ++ WRITE_ONCE(txq->xmit_lock_owner, -1); + spin_unlock(&txq->_xmit_lock); + } + + static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) + { +- txq->xmit_lock_owner = -1; ++ /* Pairs with READ_ONCE() in __dev_queue_xmit() */ ++ WRITE_ONCE(txq->xmit_lock_owner, -1); + spin_unlock_bh(&txq->_xmit_lock); + } + +@@ -5455,4 +5484,9 @@ extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; + + extern struct net_device *blackhole_netdev; + ++/* Note: Avoid these macros in fast path, prefer per-cpu or per-queue counters. */ ++#define DEV_STATS_INC(DEV, FIELD) atomic_long_inc(&(DEV)->stats.__##FIELD) ++#define DEV_STATS_ADD(DEV, FIELD, VAL) \ ++ atomic_long_add((VAL), &(DEV)->stats.__##FIELD) ++ + #endif /* _LINUX_NETDEVICE_H */ +diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h +index ada1296c87d50..72f5ebc5c97a9 100644 +--- a/include/linux/netfilter/ipset/ip_set.h ++++ b/include/linux/netfilter/ipset/ip_set.h +@@ -197,7 +197,7 @@ struct ip_set_region { + }; + + /* Max range where every element is added/deleted in one step */ +-#define IPSET_MAX_RANGE (1<<20) ++#define IPSET_MAX_RANGE (1<<14) + + /* The max revision number supported by any set type + 1 */ + #define IPSET_REVISION_MAX 9 +diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h +index 700ea077ce2d6..2770db2fa080d 100644 +--- a/include/linux/netfilter/nf_conntrack_common.h ++++ b/include/linux/netfilter/nf_conntrack_common.h +@@ -2,7 +2,7 @@ + #ifndef _NF_CONNTRACK_COMMON_H + #define _NF_CONNTRACK_COMMON_H + +-#include <linux/atomic.h> ++#include <linux/refcount.h> + #include <uapi/linux/netfilter/nf_conntrack_common.h> + + struct ip_conntrack_stat { +@@ -25,19 +25,21 @@ struct ip_conntrack_stat { + #define NFCT_PTRMASK ~(NFCT_INFOMASK) + + struct nf_conntrack { +- atomic_t use; ++ refcount_t use; + }; + + void nf_conntrack_destroy(struct nf_conntrack *nfct); ++ ++/* like nf_ct_put, but without module dependency on nf_conntrack */ + static inline void nf_conntrack_put(struct nf_conntrack *nfct) + { +- if (nfct && atomic_dec_and_test(&nfct->use)) ++ if (nfct && refcount_dec_and_test(&nfct->use)) + nf_conntrack_destroy(nfct); + } + static inline void nf_conntrack_get(struct nf_conntrack *nfct) + { + if (nfct) +- atomic_inc(&nfct->use); ++ refcount_inc(&nfct->use); + } + + #endif /* _NF_CONNTRACK_COMMON_H */ +diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h +index 10a01978bc0d3..bde9db771ae41 100644 +--- a/include/linux/netfilter_bridge/ebtables.h ++++ b/include/linux/netfilter_bridge/ebtables.h +@@ -94,10 +94,6 @@ struct ebt_table { + struct ebt_replace_kernel *table; + unsigned int valid_hooks; + rwlock_t lock; +- /* e.g. could be the table explicitly only allows certain +- * matches, targets, ... 0 == let it in */ +- int (*check)(const struct ebt_table_info *info, +- unsigned int valid_hooks); + /* the data used by the kernel */ + struct ebt_table_info *private; + struct nf_hook_ops *ops; +diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h +index b9a8b925db430..5ddc30405f7f4 100644 +--- a/include/linux/nfs_fs.h ++++ b/include/linux/nfs_fs.h +@@ -103,6 +103,7 @@ struct nfs_open_dir_context { + __be32 verf[NFS_DIR_VERIFIER_SIZE]; + __u64 dir_cookie; + __u64 dup_cookie; ++ pgoff_t page_index; + signed char duped; + }; + +@@ -154,36 +155,40 @@ struct nfs_inode { + unsigned long attrtimeo_timestamp; + + unsigned long attr_gencount; +- /* "Generation counter" for the attribute cache. This is +- * bumped whenever we update the metadata on the +- * server. +- */ +- unsigned long cache_change_attribute; + + struct rb_root access_cache; + struct list_head access_cache_entry_lru; + struct list_head access_cache_inode_lru; + +- /* +- * This is the cookie verifier used for NFSv3 readdir +- * operations +- */ +- __be32 cookieverf[NFS_DIR_VERIFIER_SIZE]; +- +- atomic_long_t nrequests; +- struct nfs_mds_commit_info commit_info; ++ union { ++ /* Directory */ ++ struct { ++ /* "Generation counter" for the attribute cache. ++ * This is bumped whenever we update the metadata ++ * on the server. ++ */ ++ unsigned long cache_change_attribute; ++ /* ++ * This is the cookie verifier used for NFSv3 readdir ++ * operations ++ */ ++ __be32 cookieverf[NFS_DIR_VERIFIER_SIZE]; ++ /* Readers: in-flight sillydelete RPC calls */ ++ /* Writers: rmdir */ ++ struct rw_semaphore rmdir_sem; ++ }; ++ /* Regular file */ ++ struct { ++ atomic_long_t nrequests; ++ atomic_long_t redirtied_pages; ++ struct nfs_mds_commit_info commit_info; ++ struct mutex commit_mutex; ++ }; ++ }; + + /* Open contexts for shared mmap writes */ + struct list_head open_files; + +- /* Readers: in-flight sillydelete RPC calls */ +- /* Writers: rmdir */ +- struct rw_semaphore rmdir_sem; +- struct mutex commit_mutex; +- +- /* track last access to cached pages */ +- unsigned long page_index; +- + #if IS_ENABLED(CONFIG_NFS_V4) + struct nfs4_cached_acl *nfs4_acl; + /* NFSv4 state */ +@@ -272,6 +277,7 @@ struct nfs4_copy_state { + #define NFS_INO_INVALIDATING (3) /* inode is being invalidated */ + #define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */ + #define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */ ++#define NFS_INO_FORCE_READDIR (7) /* force readdirplus */ + #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ + #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ + #define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */ +@@ -421,9 +427,22 @@ extern void nfs_fattr_set_barrier(struct nfs_fattr *fattr); + extern unsigned long nfs_inc_attr_generation_counter(void); + + extern struct nfs_fattr *nfs_alloc_fattr(void); ++extern struct nfs_fattr *nfs_alloc_fattr_with_label(struct nfs_server *server); ++ ++static inline void nfs4_label_free(struct nfs4_label *label) ++{ ++#ifdef CONFIG_NFS_V4_SECURITY_LABEL ++ if (label) { ++ kfree(label->label); ++ kfree(label); ++ } ++#endif ++} + + static inline void nfs_free_fattr(const struct nfs_fattr *fattr) + { ++ if (fattr) ++ nfs4_label_free(fattr->label); + kfree(fattr); + } + +@@ -494,10 +513,10 @@ static inline const struct cred *nfs_file_cred(struct file *file) + * linux/fs/nfs/direct.c + */ + extern ssize_t nfs_direct_IO(struct kiocb *, struct iov_iter *); +-extern ssize_t nfs_file_direct_read(struct kiocb *iocb, +- struct iov_iter *iter); +-extern ssize_t nfs_file_direct_write(struct kiocb *iocb, +- struct iov_iter *iter); ++ssize_t nfs_file_direct_read(struct kiocb *iocb, ++ struct iov_iter *iter, bool swap); ++ssize_t nfs_file_direct_write(struct kiocb *iocb, ++ struct iov_iter *iter, bool swap); + + /* + * linux/fs/nfs/dir.c +@@ -517,8 +536,8 @@ extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, + struct nfs_fattr *fattr, struct nfs4_label *label); + extern int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags); + extern void nfs_access_zap_cache(struct inode *inode); +-extern int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, +- bool may_block); ++extern int nfs_access_get_cached(struct inode *inode, const struct cred *cred, ++ u32 *mask, bool may_block); + + /* + * linux/fs/nfs/symlink.c +@@ -567,13 +586,16 @@ extern int nfs_wb_all(struct inode *inode); + extern int nfs_wb_page(struct inode *inode, struct page *page); + extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); + extern int nfs_commit_inode(struct inode *, int); +-extern struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail); ++extern struct nfs_commit_data *nfs_commitdata_alloc(void); + extern void nfs_commit_free(struct nfs_commit_data *data); ++bool nfs_commit_end(struct nfs_mds_commit_info *cinfo); + + static inline int + nfs_have_writebacks(struct inode *inode) + { +- return atomic_long_read(&NFS_I(inode)->nrequests) != 0; ++ if (S_ISREG(inode->i_mode)) ++ return atomic_long_read(&NFS_I(inode)->nrequests) != 0; ++ return 0; + } + + /* +diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h +index 2a9acbfe00f0f..da9ef0ab9b4b6 100644 +--- a/include/linux/nfs_fs_sb.h ++++ b/include/linux/nfs_fs_sb.h +@@ -287,5 +287,6 @@ struct nfs_server { + #define NFS_CAP_COPY_NOTIFY (1U << 27) + #define NFS_CAP_XATTR (1U << 28) + #define NFS_CAP_READ_PLUS (1U << 29) +- ++#define NFS_CAP_FS_LOCATIONS (1U << 30) ++#define NFS_CAP_MOVEABLE (1U << 31) + #endif +diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h +index e9698b6278a52..783f871b4e12d 100644 +--- a/include/linux/nfs_xdr.h ++++ b/include/linux/nfs_xdr.h +@@ -1219,7 +1219,7 @@ struct nfs4_fs_location { + + #define NFS4_FS_LOCATIONS_MAXENTRIES 10 + struct nfs4_fs_locations { +- struct nfs_fattr fattr; ++ struct nfs_fattr *fattr; + const struct nfs_server *server; + struct nfs4_pathname fs_path; + int nlocations; +@@ -1805,6 +1805,7 @@ struct nfs_rpc_ops { + struct nfs_server *(*create_server)(struct fs_context *); + struct nfs_server *(*clone_server)(struct nfs_server *, struct nfs_fh *, + struct nfs_fattr *, rpc_authflavor_t); ++ int (*discover_trunking)(struct nfs_server *, struct nfs_fh *); + }; + + /* +diff --git a/include/linux/nmi.h b/include/linux/nmi.h +index 750c7f395ca90..f700ff2df074e 100644 +--- a/include/linux/nmi.h ++++ b/include/linux/nmi.h +@@ -122,6 +122,8 @@ int watchdog_nmi_probe(void); + int watchdog_nmi_enable(unsigned int cpu); + void watchdog_nmi_disable(unsigned int cpu); + ++void lockup_detector_reconfigure(void); ++ + /** + * touch_nmi_watchdog - restart NMI watchdog timeout. + * +diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h +index 567c3ddba2c42..0f233b76c9cec 100644 +--- a/include/linux/nodemask.h ++++ b/include/linux/nodemask.h +@@ -42,11 +42,11 @@ + * void nodes_shift_right(dst, src, n) Shift right + * void nodes_shift_left(dst, src, n) Shift left + * +- * int first_node(mask) Number lowest set bit, or MAX_NUMNODES +- * int next_node(node, mask) Next node past 'node', or MAX_NUMNODES +- * int next_node_in(node, mask) Next node past 'node', or wrap to first, ++ * unsigned int first_node(mask) Number lowest set bit, or MAX_NUMNODES ++ * unsigend int next_node(node, mask) Next node past 'node', or MAX_NUMNODES ++ * unsigned int next_node_in(node, mask) Next node past 'node', or wrap to first, + * or MAX_NUMNODES +- * int first_unset_node(mask) First node not set in mask, or ++ * unsigned int first_unset_node(mask) First node not set in mask, or + * MAX_NUMNODES + * + * nodemask_t nodemask_of_node(node) Return nodemask with bit 'node' set +@@ -153,7 +153,7 @@ static inline void __nodes_clear(nodemask_t *dstp, unsigned int nbits) + + #define node_test_and_set(node, nodemask) \ + __node_test_and_set((node), &(nodemask)) +-static inline int __node_test_and_set(int node, nodemask_t *addr) ++static inline bool __node_test_and_set(int node, nodemask_t *addr) + { + return test_and_set_bit(node, addr->bits); + } +@@ -200,7 +200,7 @@ static inline void __nodes_complement(nodemask_t *dstp, + + #define nodes_equal(src1, src2) \ + __nodes_equal(&(src1), &(src2), MAX_NUMNODES) +-static inline int __nodes_equal(const nodemask_t *src1p, ++static inline bool __nodes_equal(const nodemask_t *src1p, + const nodemask_t *src2p, unsigned int nbits) + { + return bitmap_equal(src1p->bits, src2p->bits, nbits); +@@ -208,7 +208,7 @@ static inline int __nodes_equal(const nodemask_t *src1p, + + #define nodes_intersects(src1, src2) \ + __nodes_intersects(&(src1), &(src2), MAX_NUMNODES) +-static inline int __nodes_intersects(const nodemask_t *src1p, ++static inline bool __nodes_intersects(const nodemask_t *src1p, + const nodemask_t *src2p, unsigned int nbits) + { + return bitmap_intersects(src1p->bits, src2p->bits, nbits); +@@ -216,20 +216,20 @@ static inline int __nodes_intersects(const nodemask_t *src1p, + + #define nodes_subset(src1, src2) \ + __nodes_subset(&(src1), &(src2), MAX_NUMNODES) +-static inline int __nodes_subset(const nodemask_t *src1p, ++static inline bool __nodes_subset(const nodemask_t *src1p, + const nodemask_t *src2p, unsigned int nbits) + { + return bitmap_subset(src1p->bits, src2p->bits, nbits); + } + + #define nodes_empty(src) __nodes_empty(&(src), MAX_NUMNODES) +-static inline int __nodes_empty(const nodemask_t *srcp, unsigned int nbits) ++static inline bool __nodes_empty(const nodemask_t *srcp, unsigned int nbits) + { + return bitmap_empty(srcp->bits, nbits); + } + + #define nodes_full(nodemask) __nodes_full(&(nodemask), MAX_NUMNODES) +-static inline int __nodes_full(const nodemask_t *srcp, unsigned int nbits) ++static inline bool __nodes_full(const nodemask_t *srcp, unsigned int nbits) + { + return bitmap_full(srcp->bits, nbits); + } +@@ -260,15 +260,15 @@ static inline void __nodes_shift_left(nodemask_t *dstp, + > MAX_NUMNODES, then the silly min_ts could be dropped. */ + + #define first_node(src) __first_node(&(src)) +-static inline int __first_node(const nodemask_t *srcp) ++static inline unsigned int __first_node(const nodemask_t *srcp) + { +- return min_t(int, MAX_NUMNODES, find_first_bit(srcp->bits, MAX_NUMNODES)); ++ return min_t(unsigned int, MAX_NUMNODES, find_first_bit(srcp->bits, MAX_NUMNODES)); + } + + #define next_node(n, src) __next_node((n), &(src)) +-static inline int __next_node(int n, const nodemask_t *srcp) ++static inline unsigned int __next_node(int n, const nodemask_t *srcp) + { +- return min_t(int,MAX_NUMNODES,find_next_bit(srcp->bits, MAX_NUMNODES, n+1)); ++ return min_t(unsigned int, MAX_NUMNODES, find_next_bit(srcp->bits, MAX_NUMNODES, n+1)); + } + + /* +@@ -276,7 +276,7 @@ static inline int __next_node(int n, const nodemask_t *srcp) + * the first node in src if needed. Returns MAX_NUMNODES if src is empty. + */ + #define next_node_in(n, src) __next_node_in((n), &(src)) +-int __next_node_in(int node, const nodemask_t *srcp); ++unsigned int __next_node_in(int node, const nodemask_t *srcp); + + static inline void init_nodemask_of_node(nodemask_t *mask, int node) + { +@@ -296,9 +296,9 @@ static inline void init_nodemask_of_node(nodemask_t *mask, int node) + }) + + #define first_unset_node(mask) __first_unset_node(&(mask)) +-static inline int __first_unset_node(const nodemask_t *maskp) ++static inline unsigned int __first_unset_node(const nodemask_t *maskp) + { +- return min_t(int,MAX_NUMNODES, ++ return min_t(unsigned int, MAX_NUMNODES, + find_first_zero_bit(maskp->bits, MAX_NUMNODES)); + } + +@@ -375,14 +375,13 @@ static inline void __nodes_fold(nodemask_t *dstp, const nodemask_t *origp, + } + + #if MAX_NUMNODES > 1 +-#define for_each_node_mask(node, mask) \ +- for ((node) = first_node(mask); \ +- (node) < MAX_NUMNODES; \ +- (node) = next_node((node), (mask))) ++#define for_each_node_mask(node, mask) \ ++ for ((node) = first_node(mask); \ ++ (node >= 0) && (node) < MAX_NUMNODES; \ ++ (node) = next_node((node), (mask))) + #else /* MAX_NUMNODES == 1 */ +-#define for_each_node_mask(node, mask) \ +- if (!nodes_empty(mask)) \ +- for ((node) = 0; (node) < 1; (node)++) ++#define for_each_node_mask(node, mask) \ ++ for ((node) = 0; (node) < 1 && !nodes_empty(mask); (node)++) + #endif /* MAX_NUMNODES */ + + /* +@@ -436,11 +435,11 @@ static inline int num_node_state(enum node_states state) + + #define first_online_node first_node(node_states[N_ONLINE]) + #define first_memory_node first_node(node_states[N_MEMORY]) +-static inline int next_online_node(int nid) ++static inline unsigned int next_online_node(int nid) + { + return next_node(nid, node_states[N_ONLINE]); + } +-static inline int next_memory_node(int nid) ++static inline unsigned int next_memory_node(int nid) + { + return next_node(nid, node_states[N_MEMORY]); + } +diff --git a/include/linux/nospec.h b/include/linux/nospec.h +index c1e79f72cd892..9f0af4f116d98 100644 +--- a/include/linux/nospec.h ++++ b/include/linux/nospec.h +@@ -11,6 +11,10 @@ + + struct task_struct; + ++#ifndef barrier_nospec ++# define barrier_nospec() do { } while (0) ++#endif ++ + /** + * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise + * @index: array element index +diff --git a/include/linux/nvme.h b/include/linux/nvme.h +index b7c4c4130b65e..de235916c31c2 100644 +--- a/include/linux/nvme.h ++++ b/include/linux/nvme.h +@@ -7,6 +7,7 @@ + #ifndef _LINUX_NVME_H + #define _LINUX_NVME_H + ++#include <linux/bits.h> + #include <linux/types.h> + #include <linux/uuid.h> + +@@ -322,6 +323,7 @@ enum { + NVME_CTRL_ONCS_TIMESTAMP = 1 << 6, + NVME_CTRL_VWC_PRESENT = 1 << 0, + NVME_CTRL_OACS_SEC_SUPP = 1 << 0, ++ NVME_CTRL_OACS_NS_MNGT_SUPP = 1 << 3, + NVME_CTRL_OACS_DIRECTIVES = 1 << 5, + NVME_CTRL_OACS_DBBUF_SUPP = 1 << 8, + NVME_CTRL_LPA_CMD_EFFECTS_LOG = 1 << 1, +@@ -538,7 +540,7 @@ enum { + NVME_CMD_EFFECTS_NCC = 1 << 2, + NVME_CMD_EFFECTS_NIC = 1 << 3, + NVME_CMD_EFFECTS_CCC = 1 << 4, +- NVME_CMD_EFFECTS_CSE_MASK = 3 << 16, ++ NVME_CMD_EFFECTS_CSE_MASK = GENMASK(18, 16), + NVME_CMD_EFFECTS_UUID_SEL = 1 << 19, + }; + +diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h +index 104505e9028f7..089597600e26c 100644 +--- a/include/linux/nvmem-provider.h ++++ b/include/linux/nvmem-provider.h +@@ -66,7 +66,7 @@ struct nvmem_keepout { + * @word_size: Minimum read/write access granularity. + * @stride: Minimum read/write access stride. + * @priv: User context passed to read/write callbacks. +- * @wp-gpio: Write protect pin ++ * @ignore_wp: Write Protect pin is managed by the provider. + * + * Note: A default "nvmem<id>" name will be assigned to the device if + * no name is specified in its configuration. In such case "<id>" is +@@ -80,7 +80,6 @@ struct nvmem_config { + const char *name; + int id; + struct module *owner; +- struct gpio_desc *wp_gpio; + const struct nvmem_cell_info *cells; + int ncells; + const struct nvmem_keepout *keepout; +@@ -88,6 +87,7 @@ struct nvmem_config { + enum nvmem_type type; + bool read_only; + bool root_only; ++ bool ignore_wp; + struct device_node *of_node; + bool no_of_node; + nvmem_reg_read_t reg_read; +diff --git a/include/linux/objtool.h b/include/linux/objtool.h +index 7e72d975cb761..a2042c4186864 100644 +--- a/include/linux/objtool.h ++++ b/include/linux/objtool.h +@@ -32,11 +32,16 @@ struct unwind_hint { + * + * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function. + * Useful for code which doesn't have an ELF function annotation. ++ * ++ * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc. + */ + #define UNWIND_HINT_TYPE_CALL 0 + #define UNWIND_HINT_TYPE_REGS 1 + #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 + #define UNWIND_HINT_TYPE_FUNC 3 ++#define UNWIND_HINT_TYPE_ENTRY 4 ++#define UNWIND_HINT_TYPE_SAVE 5 ++#define UNWIND_HINT_TYPE_RESTORE 6 + + #ifdef CONFIG_STACK_VALIDATION + +@@ -99,7 +104,7 @@ struct unwind_hint { + * the debuginfo as necessary. It will also warn if it sees any + * inconsistencies. + */ +-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 ++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 + .Lunwind_hint_ip_\@: + .pushsection .discard.unwind_hints + /* struct unwind_hint */ +@@ -129,7 +134,7 @@ struct unwind_hint { + #define STACK_FRAME_NON_STANDARD(func) + #else + #define ANNOTATE_INTRA_FUNCTION_CALL +-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 ++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 + .endm + .macro STACK_FRAME_NON_STANDARD func:req + .endm +diff --git a/include/linux/of_device.h b/include/linux/of_device.h +index 1d7992a02e36e..1a803e4335d30 100644 +--- a/include/linux/of_device.h ++++ b/include/linux/of_device.h +@@ -101,8 +101,9 @@ static inline struct device_node *of_cpu_device_node_get(int cpu) + } + + static inline int of_dma_configure_id(struct device *dev, +- struct device_node *np, +- bool force_dma) ++ struct device_node *np, ++ bool force_dma, ++ const u32 *id) + { + return 0; + } +diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h +index cf6a65b94d40e..6508b97dbf1d2 100644 +--- a/include/linux/of_fdt.h ++++ b/include/linux/of_fdt.h +@@ -62,6 +62,7 @@ extern int early_init_dt_scan_chosen(unsigned long node, const char *uname, + int depth, void *data); + extern int early_init_dt_scan_memory(unsigned long node, const char *uname, + int depth, void *data); ++extern void early_init_dt_check_for_usable_mem_range(void); + extern int early_init_dt_scan_chosen_stdout(void); + extern void early_init_fdt_scan_reserved_mem(void); + extern void early_init_fdt_reserve_self(void); +@@ -87,6 +88,7 @@ extern void unflatten_and_copy_device_tree(void); + extern void early_init_devtree(void *); + extern void early_get_first_memblock_info(void *, phys_addr_t *); + #else /* CONFIG_OF_EARLY_FLATTREE */ ++static inline void early_init_dt_check_for_usable_mem_range(void) {} + static inline int early_init_dt_scan_chosen_stdout(void) { return -ENODEV; } + static inline void early_init_fdt_scan_reserved_mem(void) {} + static inline void early_init_fdt_reserve_self(void) {} +diff --git a/include/linux/of_net.h b/include/linux/of_net.h +index daef3b0d9270d..55460ecfa50ad 100644 +--- a/include/linux/of_net.h ++++ b/include/linux/of_net.h +@@ -8,7 +8,7 @@ + + #include <linux/phy.h> + +-#ifdef CONFIG_OF_NET ++#if defined(CONFIG_OF) && defined(CONFIG_NET) + #include <linux/of.h> + + struct net_device; +diff --git a/include/linux/once.h b/include/linux/once.h +index d361fb14ac3a2..1528625087b69 100644 +--- a/include/linux/once.h ++++ b/include/linux/once.h +@@ -5,10 +5,18 @@ + #include <linux/types.h> + #include <linux/jump_label.h> + ++/* Helpers used from arbitrary contexts. ++ * Hard irqs are blocked, be cautious. ++ */ + bool __do_once_start(bool *done, unsigned long *flags); + void __do_once_done(bool *done, struct static_key_true *once_key, + unsigned long *flags, struct module *mod); + ++/* Variant for process contexts only. */ ++bool __do_once_slow_start(bool *done); ++void __do_once_slow_done(bool *done, struct static_key_true *once_key, ++ struct module *mod); ++ + /* Call a function exactly once. The idea of DO_ONCE() is to perform + * a function call such as initialization of random seeds, etc, only + * once, where DO_ONCE() can live in the fast-path. After @func has +@@ -52,9 +60,29 @@ void __do_once_done(bool *done, struct static_key_true *once_key, + ___ret; \ + }) + ++/* Variant of DO_ONCE() for process/sleepable contexts. */ ++#define DO_ONCE_SLOW(func, ...) \ ++ ({ \ ++ bool ___ret = false; \ ++ static bool __section(".data.once") ___done = false; \ ++ static DEFINE_STATIC_KEY_TRUE(___once_key); \ ++ if (static_branch_unlikely(&___once_key)) { \ ++ ___ret = __do_once_slow_start(&___done); \ ++ if (unlikely(___ret)) { \ ++ func(__VA_ARGS__); \ ++ __do_once_slow_done(&___done, &___once_key, \ ++ THIS_MODULE); \ ++ } \ ++ } \ ++ ___ret; \ ++ }) ++ + #define get_random_once(buf, nbytes) \ + DO_ONCE(get_random_bytes, (buf), (nbytes)) + #define get_random_once_wait(buf, nbytes) \ + DO_ONCE(get_random_bytes_wait, (buf), (nbytes)) \ + ++#define get_random_slow_once(buf, nbytes) \ ++ DO_ONCE_SLOW(get_random_bytes, (buf), (nbytes)) ++ + #endif /* _LINUX_ONCE_H */ +diff --git a/include/linux/once_lite.h b/include/linux/once_lite.h +index 861e606b820fa..b7bce4983638f 100644 +--- a/include/linux/once_lite.h ++++ b/include/linux/once_lite.h +@@ -9,15 +9,27 @@ + */ + #define DO_ONCE_LITE(func, ...) \ + DO_ONCE_LITE_IF(true, func, ##__VA_ARGS__) +-#define DO_ONCE_LITE_IF(condition, func, ...) \ ++ ++#define __ONCE_LITE_IF(condition) \ + ({ \ + static bool __section(".data.once") __already_done; \ +- bool __ret_do_once = !!(condition); \ ++ bool __ret_cond = !!(condition); \ ++ bool __ret_once = false; \ + \ +- if (unlikely(__ret_do_once && !__already_done)) { \ ++ if (unlikely(__ret_cond && !__already_done)) { \ + __already_done = true; \ +- func(__VA_ARGS__); \ ++ __ret_once = true; \ + } \ ++ unlikely(__ret_once); \ ++ }) ++ ++#define DO_ONCE_LITE_IF(condition, func, ...) \ ++ ({ \ ++ bool __ret_do_once = !!(condition); \ ++ \ ++ if (__ONCE_LITE_IF(__ret_do_once)) \ ++ func(__VA_ARGS__); \ ++ \ + unlikely(__ret_do_once); \ + }) + +diff --git a/include/linux/overflow.h b/include/linux/overflow.h +index 4669632bd72bc..59d7228104d02 100644 +--- a/include/linux/overflow.h ++++ b/include/linux/overflow.h +@@ -118,81 +118,94 @@ static inline bool __must_check __must_check_overflow(bool overflow) + })) + + /** +- * array_size() - Calculate size of 2-dimensional array. +- * +- * @a: dimension one +- * @b: dimension two ++ * size_mul() - Calculate size_t multiplication with saturation at SIZE_MAX + * +- * Calculates size of 2-dimensional array: @a * @b. ++ * @factor1: first factor ++ * @factor2: second factor + * +- * Returns: number of bytes needed to represent the array or SIZE_MAX on +- * overflow. ++ * Returns: calculate @factor1 * @factor2, both promoted to size_t, ++ * with any overflow causing the return value to be SIZE_MAX. The ++ * lvalue must be size_t to avoid implicit type conversion. + */ +-static inline __must_check size_t array_size(size_t a, size_t b) ++static inline size_t __must_check size_mul(size_t factor1, size_t factor2) + { + size_t bytes; + +- if (check_mul_overflow(a, b, &bytes)) ++ if (check_mul_overflow(factor1, factor2, &bytes)) + return SIZE_MAX; + + return bytes; + } + + /** +- * array3_size() - Calculate size of 3-dimensional array. ++ * size_add() - Calculate size_t addition with saturation at SIZE_MAX + * +- * @a: dimension one +- * @b: dimension two +- * @c: dimension three +- * +- * Calculates size of 3-dimensional array: @a * @b * @c. ++ * @addend1: first addend ++ * @addend2: second addend + * +- * Returns: number of bytes needed to represent the array or SIZE_MAX on +- * overflow. ++ * Returns: calculate @addend1 + @addend2, both promoted to size_t, ++ * with any overflow causing the return value to be SIZE_MAX. The ++ * lvalue must be size_t to avoid implicit type conversion. + */ +-static inline __must_check size_t array3_size(size_t a, size_t b, size_t c) ++static inline size_t __must_check size_add(size_t addend1, size_t addend2) + { + size_t bytes; + +- if (check_mul_overflow(a, b, &bytes)) +- return SIZE_MAX; +- if (check_mul_overflow(bytes, c, &bytes)) ++ if (check_add_overflow(addend1, addend2, &bytes)) + return SIZE_MAX; + + return bytes; + } + +-/* +- * Compute a*b+c, returning SIZE_MAX on overflow. Internal helper for +- * struct_size() below. ++/** ++ * size_sub() - Calculate size_t subtraction with saturation at SIZE_MAX ++ * ++ * @minuend: value to subtract from ++ * @subtrahend: value to subtract from @minuend ++ * ++ * Returns: calculate @minuend - @subtrahend, both promoted to size_t, ++ * with any overflow causing the return value to be SIZE_MAX. For ++ * composition with the size_add() and size_mul() helpers, neither ++ * argument may be SIZE_MAX (or the result with be forced to SIZE_MAX). ++ * The lvalue must be size_t to avoid implicit type conversion. + */ +-static inline __must_check size_t __ab_c_size(size_t a, size_t b, size_t c) ++static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) + { + size_t bytes; + +- if (check_mul_overflow(a, b, &bytes)) +- return SIZE_MAX; +- if (check_add_overflow(bytes, c, &bytes)) ++ if (minuend == SIZE_MAX || subtrahend == SIZE_MAX || ++ check_sub_overflow(minuend, subtrahend, &bytes)) + return SIZE_MAX; + + return bytes; + } + + /** +- * struct_size() - Calculate size of structure with trailing array. +- * @p: Pointer to the structure. +- * @member: Name of the array member. +- * @count: Number of elements in the array. ++ * array_size() - Calculate size of 2-dimensional array. + * +- * Calculates size of memory needed for structure @p followed by an +- * array of @count number of @member elements. ++ * @a: dimension one ++ * @b: dimension two + * +- * Return: number of bytes needed or SIZE_MAX on overflow. ++ * Calculates size of 2-dimensional array: @a * @b. ++ * ++ * Returns: number of bytes needed to represent the array or SIZE_MAX on ++ * overflow. + */ +-#define struct_size(p, member, count) \ +- __ab_c_size(count, \ +- sizeof(*(p)->member) + __must_be_array((p)->member),\ +- sizeof(*(p))) ++#define array_size(a, b) size_mul(a, b) ++ ++/** ++ * array3_size() - Calculate size of 3-dimensional array. ++ * ++ * @a: dimension one ++ * @b: dimension two ++ * @c: dimension three ++ * ++ * Calculates size of 3-dimensional array: @a * @b * @c. ++ * ++ * Returns: number of bytes needed to represent the array or SIZE_MAX on ++ * overflow. ++ */ ++#define array3_size(a, b, c) size_mul(size_mul(a, b), c) + + /** + * flex_array_size() - Calculate size of a flexible array member +@@ -208,7 +221,22 @@ static inline __must_check size_t __ab_c_size(size_t a, size_t b, size_t c) + * Return: number of bytes needed or SIZE_MAX on overflow. + */ + #define flex_array_size(p, member, count) \ +- array_size(count, \ +- sizeof(*(p)->member) + __must_be_array((p)->member)) ++ size_mul(count, \ ++ sizeof(*(p)->member) + __must_be_array((p)->member)) ++ ++/** ++ * struct_size() - Calculate size of structure with trailing flexible array. ++ * ++ * @p: Pointer to the structure. ++ * @member: Name of the array member. ++ * @count: Number of elements in the array. ++ * ++ * Calculates size of memory needed for structure @p followed by an ++ * array of @count number of @member elements. ++ * ++ * Return: number of bytes needed or SIZE_MAX on overflow. ++ */ ++#define struct_size(p, member, count) \ ++ size_add(sizeof(*(p)), flex_array_size(p, member, count)) + + #endif /* __LINUX_OVERFLOW_H */ +diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h +index 62db6b0176b95..2f7dd14083d94 100644 +--- a/include/linux/pagemap.h ++++ b/include/linux/pagemap.h +@@ -733,61 +733,11 @@ int wait_on_page_private_2_killable(struct page *page); + extern void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter); + + /* +- * Fault everything in given userspace address range in. ++ * Fault in userspace address range. + */ +-static inline int fault_in_pages_writeable(char __user *uaddr, size_t size) +-{ +- char __user *end = uaddr + size - 1; +- +- if (unlikely(size == 0)) +- return 0; +- +- if (unlikely(uaddr > end)) +- return -EFAULT; +- /* +- * Writing zeroes into userspace here is OK, because we know that if +- * the zero gets there, we'll be overwriting it. +- */ +- do { +- if (unlikely(__put_user(0, uaddr) != 0)) +- return -EFAULT; +- uaddr += PAGE_SIZE; +- } while (uaddr <= end); +- +- /* Check whether the range spilled into the next page. */ +- if (((unsigned long)uaddr & PAGE_MASK) == +- ((unsigned long)end & PAGE_MASK)) +- return __put_user(0, end); +- +- return 0; +-} +- +-static inline int fault_in_pages_readable(const char __user *uaddr, size_t size) +-{ +- volatile char c; +- const char __user *end = uaddr + size - 1; +- +- if (unlikely(size == 0)) +- return 0; +- +- if (unlikely(uaddr > end)) +- return -EFAULT; +- +- do { +- if (unlikely(__get_user(c, uaddr) != 0)) +- return -EFAULT; +- uaddr += PAGE_SIZE; +- } while (uaddr <= end); +- +- /* Check whether the range spilled into the next page. */ +- if (((unsigned long)uaddr & PAGE_MASK) == +- ((unsigned long)end & PAGE_MASK)) { +- return __get_user(c, end); +- } +- +- (void)c; +- return 0; +-} ++size_t fault_in_writeable(char __user *uaddr, size_t size); ++size_t fault_in_safe_writeable(const char __user *uaddr, size_t size); ++size_t fault_in_readable(const char __user *uaddr, size_t size); + + int add_to_page_cache_locked(struct page *page, struct address_space *mapping, + pgoff_t index, gfp_t gfp_mask); +diff --git a/include/linux/panic.h b/include/linux/panic.h +index f5844908a089e..8eb5897c164fc 100644 +--- a/include/linux/panic.h ++++ b/include/linux/panic.h +@@ -11,16 +11,11 @@ extern long (*panic_blink)(int state); + __printf(1, 2) + void panic(const char *fmt, ...) __noreturn __cold; + void nmi_panic(struct pt_regs *regs, const char *msg); ++void check_panic_on_warn(const char *origin); + extern void oops_enter(void); + extern void oops_exit(void); + extern bool oops_may_print(void); + +-#ifdef CONFIG_SMP +-extern unsigned int sysctl_oops_all_cpu_backtrace; +-#else +-#define sysctl_oops_all_cpu_backtrace 0 +-#endif /* CONFIG_SMP */ +- + extern int panic_timeout; + extern unsigned long panic_print; + extern int panic_on_oops; +diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h +index d2558121d48c0..6f7949b2fd8dc 100644 +--- a/include/linux/part_stat.h ++++ b/include/linux/part_stat.h +@@ -3,6 +3,7 @@ + #define _LINUX_PART_STAT_H + + #include <linux/genhd.h> ++#include <asm/local.h> + + struct disk_stats { + u64 nsecs[NR_STAT_GROUPS]; +diff --git a/include/linux/pci.h b/include/linux/pci.h +index cd8aa6fce2041..9d6e75222868f 100644 +--- a/include/linux/pci.h ++++ b/include/linux/pci.h +@@ -233,6 +233,8 @@ enum pci_dev_flags { + PCI_DEV_FLAGS_NO_FLR_RESET = (__force pci_dev_flags_t) (1 << 10), + /* Don't use Relaxed Ordering for TLPs directed at this device */ + PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 11), ++ /* Device does honor MSI masking despite saying otherwise */ ++ PCI_DEV_FLAGS_HAS_MSI_MASKING = (__force pci_dev_flags_t) (1 << 12), + }; + + enum pci_irq_reroute_variant { +@@ -654,6 +656,7 @@ struct pci_bus { + struct bin_attribute *legacy_io; /* Legacy I/O for this bus */ + struct bin_attribute *legacy_mem; /* Legacy mem */ + unsigned int is_added:1; ++ unsigned int unsafe_warn:1; /* warned about RW1C config write */ + }; + + #define to_pci_bus(n) container_of(n, struct pci_bus, dev) +diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h +index 011f2f1ea5bb5..04f44a4694a2e 100644 +--- a/include/linux/pci_ids.h ++++ b/include/linux/pci_ids.h +@@ -60,6 +60,8 @@ + #define PCI_CLASS_BRIDGE_EISA 0x0602 + #define PCI_CLASS_BRIDGE_MC 0x0603 + #define PCI_CLASS_BRIDGE_PCI 0x0604 ++#define PCI_CLASS_BRIDGE_PCI_NORMAL 0x060400 ++#define PCI_CLASS_BRIDGE_PCI_SUBTRACTIVE 0x060401 + #define PCI_CLASS_BRIDGE_PCMCIA 0x0605 + #define PCI_CLASS_BRIDGE_NUBUS 0x0606 + #define PCI_CLASS_BRIDGE_CARDBUS 0x0607 +diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h +index 9b60bb89d86ab..014eb0a963fcb 100644 +--- a/include/linux/perf_event.h ++++ b/include/linux/perf_event.h +@@ -680,18 +680,6 @@ struct perf_event { + u64 total_time_running; + u64 tstamp; + +- /* +- * timestamp shadows the actual context timing but it can +- * be safely used in NMI interrupt context. It reflects the +- * context time as it was when the event was last scheduled in, +- * or when ctx_sched_in failed to schedule the event because we +- * run out of PMC. +- * +- * ctx_time already accounts for ctx->timestamp. Therefore to +- * compute ctx_time for a sample, simply add perf_clock(). +- */ +- u64 shadow_ctx_time; +- + struct perf_event_attr attr; + u16 header_size; + u16 id_header_size; +@@ -735,11 +723,14 @@ struct perf_event { + struct fasync_struct *fasync; + + /* delayed work for NMIs and such */ +- int pending_wakeup; +- int pending_kill; +- int pending_disable; ++ unsigned int pending_wakeup; ++ unsigned int pending_kill; ++ unsigned int pending_disable; ++ unsigned int pending_sigtrap; + unsigned long pending_addr; /* SIGTRAP */ +- struct irq_work pending; ++ struct irq_work pending_irq; ++ struct callback_head pending_task; ++ unsigned int pending_work; + + atomic_t event_limit; + +@@ -838,6 +829,7 @@ struct perf_event_context { + */ + u64 time; + u64 timestamp; ++ u64 timeoffset; + + /* + * These fields let us detect when two contexts have both +@@ -852,6 +844,14 @@ struct perf_event_context { + #endif + void *task_ctx_data; /* pmu specific data */ + struct rcu_head rcu_head; ++ ++ /* ++ * Sum (event->pending_sigtrap + event->pending_work) ++ * ++ * The SIGTRAP is targeted at ctx->task, as such it won't do changing ++ * that until the signal is delivered. ++ */ ++ local_t nr_pending; + }; + + /* +@@ -920,6 +920,8 @@ struct bpf_perf_event_data_kern { + struct perf_cgroup_info { + u64 time; + u64 timestamp; ++ u64 timeoffset; ++ int active; + }; + + struct perf_cgroup { +@@ -1239,7 +1241,18 @@ extern void perf_event_bpf_event(struct bpf_prog *prog, + enum perf_bpf_event_type type, + u16 flags); + +-extern struct perf_guest_info_callbacks *perf_guest_cbs; ++extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs; ++static inline struct perf_guest_info_callbacks *perf_get_guest_cbs(void) ++{ ++ /* ++ * Callbacks are RCU-protected and must be READ_ONCE to avoid reloading ++ * the callbacks between a !NULL check and dereferences, to ensure ++ * pending stores/changes to the callback pointers are visible before a ++ * non-NULL perf_guest_cbs is visible to readers, and to prevent a ++ * module from unloading callbacks while readers are active. ++ */ ++ return rcu_dereference(perf_guest_cbs); ++} + extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); + extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); + +diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h +index e24d2c992b112..d468efcf48f45 100644 +--- a/include/linux/pgtable.h ++++ b/include/linux/pgtable.h +@@ -62,6 +62,7 @@ static inline unsigned long pte_index(unsigned long address) + { + return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); + } ++#define pte_index pte_index + + #ifndef pmd_index + static inline unsigned long pmd_index(unsigned long address) +diff --git a/include/linux/phy.h b/include/linux/phy.h +index 736e1d1a47c40..946ccec178588 100644 +--- a/include/linux/phy.h ++++ b/include/linux/phy.h +@@ -536,6 +536,10 @@ struct macsec_ops; + * @mdix: Current crossover + * @mdix_ctrl: User setting of crossover + * @interrupts: Flag interrupts have been enabled ++ * @irq_suspended: Flag indicating PHY is suspended and therefore interrupt ++ * handling shall be postponed until PHY has resumed ++ * @irq_rerun: Flag indicating interrupts occurred while PHY was suspended, ++ * requiring a rerun of the interrupt handler after resume + * @interface: enum phy_interface_t value + * @skb: Netlink message for cable diagnostics + * @nest: Netlink nest used for cable diagnostics +@@ -590,6 +594,8 @@ struct phy_device { + + /* Interrupts are enabled */ + unsigned interrupts:1; ++ unsigned irq_suspended:1; ++ unsigned irq_rerun:1; + + enum phy_state state; + +diff --git a/include/linux/phylink.h b/include/linux/phylink.h +index 237291196ce28..b306159c1fada 100644 +--- a/include/linux/phylink.h ++++ b/include/linux/phylink.h +@@ -64,6 +64,7 @@ enum phylink_op_type { + * @pcs_poll: MAC PCS cannot provide link change interrupt + * @poll_fixed_state: if true, starts link_poll, + * if MAC link is at %MLO_AN_FIXED mode. ++ * @mac_managed_pm: if true, indicate the MAC driver is responsible for PHY PM. + * @ovr_an_inband: if true, override PCS to MLO_AN_INBAND + * @get_fixed_state: callback to execute to determine the fixed link state, + * if MAC link is at %MLO_AN_FIXED mode. +@@ -73,6 +74,7 @@ struct phylink_config { + enum phylink_op_type type; + bool pcs_poll; + bool poll_fixed_state; ++ bool mac_managed_pm; + bool ovr_an_inband; + void (*get_fixed_state)(struct phylink_config *config, + struct phylink_link_state *state); +diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h +index fc5642431b923..18dcca51829e2 100644 +--- a/include/linux/pipe_fs_i.h ++++ b/include/linux/pipe_fs_i.h +@@ -71,7 +71,7 @@ struct pipe_inode_info { + unsigned int files; + unsigned int r_counter; + unsigned int w_counter; +- unsigned int poll_usage; ++ bool poll_usage; + struct page *tmp_page; + struct fasync_struct *fasync_readers; + struct fasync_struct *fasync_writers; +@@ -229,6 +229,15 @@ static inline bool pipe_buf_try_steal(struct pipe_inode_info *pipe, + return buf->ops->try_steal(pipe, buf); + } + ++static inline void pipe_discard_from(struct pipe_inode_info *pipe, ++ unsigned int old_head) ++{ ++ unsigned int mask = pipe->ring_size - 1; ++ ++ while (pipe->head > old_head) ++ pipe_buf_release(pipe, &pipe->bufs[--pipe->head & mask]); ++} ++ + /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual + memory allocation, whereas PIPE_BUF makes atomicity guarantees. */ + #define PIPE_SIZE PAGE_SIZE +diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h +index 02599687770c5..7f03e02c48cd4 100644 +--- a/include/linux/platform_data/cros_ec_proto.h ++++ b/include/linux/platform_data/cros_ec_proto.h +@@ -216,6 +216,9 @@ int cros_ec_prepare_tx(struct cros_ec_device *ec_dev, + int cros_ec_check_result(struct cros_ec_device *ec_dev, + struct cros_ec_command *msg); + ++int cros_ec_cmd_xfer(struct cros_ec_device *ec_dev, ++ struct cros_ec_command *msg); ++ + int cros_ec_cmd_xfer_status(struct cros_ec_device *ec_dev, + struct cros_ec_command *msg); + +diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h +index 9837fb011f2fb..989aa30c598dc 100644 +--- a/include/linux/platform_data/ti-sysc.h ++++ b/include/linux/platform_data/ti-sysc.h +@@ -50,6 +50,7 @@ struct sysc_regbits { + s8 emufree_shift; + }; + ++#define SYSC_QUIRK_REINIT_ON_CTX_LOST BIT(28) + #define SYSC_QUIRK_REINIT_ON_RESUME BIT(27) + #define SYSC_QUIRK_GPMC_DEBUG BIT(26) + #define SYSC_MODULE_QUIRK_ENA_RESETDONE BIT(25) +diff --git a/include/linux/platform_data/x86/intel-spi.h b/include/linux/platform_data/x86/intel-spi.h +index 7f53a5c6f35e8..7dda3f6904654 100644 +--- a/include/linux/platform_data/x86/intel-spi.h ++++ b/include/linux/platform_data/x86/intel-spi.h +@@ -19,11 +19,13 @@ enum intel_spi_type { + /** + * struct intel_spi_boardinfo - Board specific data for Intel SPI driver + * @type: Type which this controller is compatible with +- * @writeable: The chip is writeable ++ * @set_writeable: Try to make the chip writeable (optional) ++ * @data: Data to be passed to @set_writeable can be %NULL + */ + struct intel_spi_boardinfo { + enum intel_spi_type type; +- bool writeable; ++ bool (*set_writeable)(void __iomem *base, void *data); ++ void *data; + }; + + #endif /* INTEL_SPI_PDATA_H */ +diff --git a/include/linux/platform_data/x86/pmc_atom.h b/include/linux/platform_data/x86/pmc_atom.h +index 022bcea9edec5..99a9b09dc839d 100644 +--- a/include/linux/platform_data/x86/pmc_atom.h ++++ b/include/linux/platform_data/x86/pmc_atom.h +@@ -7,6 +7,8 @@ + #ifndef PMC_ATOM_H + #define PMC_ATOM_H + ++#include <linux/bits.h> ++ + /* ValleyView Power Control Unit PCI Device ID */ + #define PCI_DEVICE_ID_VLV_PMC 0x0F1C + /* CherryTrail Power Control Unit PCI Device ID */ +@@ -139,9 +141,9 @@ + #define ACPI_MMIO_REG_LEN 0x100 + + #define PM1_CNT 0x4 +-#define SLEEP_TYPE_MASK 0xFFFFECFF ++#define SLEEP_TYPE_MASK GENMASK(12, 10) + #define SLEEP_TYPE_S5 0x1C00 +-#define SLEEP_ENABLE 0x2000 ++#define SLEEP_ENABLE BIT(13) + + extern int pmc_atom_read(int offset, u32 *value); + extern int pmc_atom_write(int offset, u32 value); +diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h +index 222da43b7096d..90eaff8b78fc9 100644 +--- a/include/linux/pm_runtime.h ++++ b/include/linux/pm_runtime.h +@@ -58,6 +58,7 @@ extern void pm_runtime_get_suppliers(struct device *dev); + extern void pm_runtime_put_suppliers(struct device *dev); + extern void pm_runtime_new_link(struct device *dev); + extern void pm_runtime_drop_link(struct device_link *link); ++extern void pm_runtime_release_supplier(struct device_link *link); + + extern int devm_pm_runtime_enable(struct device *dev); + +@@ -129,7 +130,7 @@ static inline bool pm_runtime_suspended(struct device *dev) + * pm_runtime_active - Check whether or not a device is runtime-active. + * @dev: Target device. + * +- * Return %true if runtime PM is enabled for @dev and its runtime PM status is ++ * Return %true if runtime PM is disabled for @dev or its runtime PM status is + * %RPM_ACTIVE, or %false otherwise. + * + * Note that the return value of this function can only be trusted if it is +@@ -283,6 +284,7 @@ static inline void pm_runtime_get_suppliers(struct device *dev) {} + static inline void pm_runtime_put_suppliers(struct device *dev) {} + static inline void pm_runtime_new_link(struct device *dev) {} + static inline void pm_runtime_drop_link(struct device_link *link) {} ++static inline void pm_runtime_release_supplier(struct device_link *link) {} + + #endif /* !CONFIG_PM */ + +diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h +index 00fef0064355f..5bbcd280bfd26 100644 +--- a/include/linux/posix-timers.h ++++ b/include/linux/posix-timers.h +@@ -184,8 +184,10 @@ static inline void posix_cputimers_group_init(struct posix_cputimers *pct, + #endif + + #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK ++void clear_posix_cputimers_work(struct task_struct *p); + void posix_cputimers_init_work(void); + #else ++static inline void clear_posix_cputimers_work(struct task_struct *p) { } + static inline void posix_cputimers_init_work(void) { } + #endif + +diff --git a/include/linux/posix_acl_xattr.h b/include/linux/posix_acl_xattr.h +index 060e8d2031814..1766e1de69560 100644 +--- a/include/linux/posix_acl_xattr.h ++++ b/include/linux/posix_acl_xattr.h +@@ -34,15 +34,19 @@ posix_acl_xattr_count(size_t size) + + #ifdef CONFIG_FS_POSIX_ACL + void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns, ++ struct inode *inode, + void *value, size_t size); + void posix_acl_fix_xattr_to_user(struct user_namespace *mnt_userns, ++ struct inode *inode, + void *value, size_t size); + #else + static inline void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns, ++ struct inode *inode, + void *value, size_t size) + { + } + static inline void posix_acl_fix_xattr_to_user(struct user_namespace *mnt_userns, ++ struct inode *inode, + void *value, size_t size) + { + } +diff --git a/include/linux/prandom.h b/include/linux/prandom.h +index 056d31317e499..a4aadd2dc153e 100644 +--- a/include/linux/prandom.h ++++ b/include/linux/prandom.h +@@ -10,6 +10,7 @@ + + #include <linux/types.h> + #include <linux/percpu.h> ++#include <linux/siphash.h> + + u32 prandom_u32(void); + void prandom_bytes(void *buf, size_t nbytes); +@@ -27,15 +28,10 @@ DECLARE_PER_CPU(unsigned long, net_rand_noise); + * The core SipHash round function. Each line can be executed in + * parallel given enough CPU resources. + */ +-#define PRND_SIPROUND(v0, v1, v2, v3) ( \ +- v0 += v1, v1 = rol64(v1, 13), v2 += v3, v3 = rol64(v3, 16), \ +- v1 ^= v0, v0 = rol64(v0, 32), v3 ^= v2, \ +- v0 += v3, v3 = rol64(v3, 21), v2 += v1, v1 = rol64(v1, 17), \ +- v3 ^= v0, v1 ^= v2, v2 = rol64(v2, 32) \ +-) ++#define PRND_SIPROUND(v0, v1, v2, v3) SIPHASH_PERMUTATION(v0, v1, v2, v3) + +-#define PRND_K0 (0x736f6d6570736575 ^ 0x6c7967656e657261) +-#define PRND_K1 (0x646f72616e646f6d ^ 0x7465646279746573) ++#define PRND_K0 (SIPHASH_CONST_0 ^ SIPHASH_CONST_2) ++#define PRND_K1 (SIPHASH_CONST_1 ^ SIPHASH_CONST_3) + + #elif BITS_PER_LONG == 32 + /* +@@ -43,14 +39,9 @@ DECLARE_PER_CPU(unsigned long, net_rand_noise); + * This is weaker, but 32-bit machines are not used for high-traffic + * applications, so there is less output for an attacker to analyze. + */ +-#define PRND_SIPROUND(v0, v1, v2, v3) ( \ +- v0 += v1, v1 = rol32(v1, 5), v2 += v3, v3 = rol32(v3, 8), \ +- v1 ^= v0, v0 = rol32(v0, 16), v3 ^= v2, \ +- v0 += v3, v3 = rol32(v3, 7), v2 += v1, v1 = rol32(v1, 13), \ +- v3 ^= v0, v1 ^= v2, v2 = rol32(v2, 16) \ +-) +-#define PRND_K0 0x6c796765 +-#define PRND_K1 0x74656462 ++#define PRND_SIPROUND(v0, v1, v2, v3) HSIPHASH_PERMUTATION(v0, v1, v2, v3) ++#define PRND_K0 (HSIPHASH_CONST_0 ^ HSIPHASH_CONST_2) ++#define PRND_K1 (HSIPHASH_CONST_1 ^ HSIPHASH_CONST_3) + + #else + #error Unsupported BITS_PER_LONG +diff --git a/include/linux/printk.h b/include/linux/printk.h +index 85b656f82d752..9497f6b983399 100644 +--- a/include/linux/printk.h ++++ b/include/linux/printk.h +@@ -198,6 +198,7 @@ void dump_stack_print_info(const char *log_lvl); + void show_regs_print_info(const char *log_lvl); + extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold; + extern asmlinkage void dump_stack(void) __cold; ++void printk_trigger_flush(void); + #else + static inline __printf(1, 0) + int vprintk(const char *s, va_list args) +@@ -274,6 +275,9 @@ static inline void dump_stack_lvl(const char *log_lvl) + static inline void dump_stack(void) + { + } ++static inline void printk_trigger_flush(void) ++{ ++} + #endif + + #ifdef CONFIG_SMP +diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h +index 069c7fd953961..a2f25b26ae1ec 100644 +--- a/include/linux/proc_fs.h ++++ b/include/linux/proc_fs.h +@@ -191,8 +191,10 @@ static inline void proc_remove(struct proc_dir_entry *de) {} + static inline int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) { return 0; } + + #define proc_create_net_data(name, mode, parent, ops, state_size, data) ({NULL;}) ++#define proc_create_net_data_write(name, mode, parent, ops, write, state_size, data) ({NULL;}) + #define proc_create_net(name, mode, parent, state_size, ops) ({NULL;}) + #define proc_create_net_single(name, mode, parent, show, data) ({NULL;}) ++#define proc_create_net_single_write(name, mode, parent, show, write, data) ({NULL;}) + + static inline struct pid *tgid_pidfd_to_pid(const struct file *file) + { +diff --git a/include/linux/psi.h b/include/linux/psi.h +index 65eb1476ac705..57823b30c2d3d 100644 +--- a/include/linux/psi.h ++++ b/include/linux/psi.h +@@ -24,18 +24,17 @@ void psi_memstall_enter(unsigned long *flags); + void psi_memstall_leave(unsigned long *flags); + + int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res); +- +-#ifdef CONFIG_CGROUPS +-int psi_cgroup_alloc(struct cgroup *cgrp); +-void psi_cgroup_free(struct cgroup *cgrp); +-void cgroup_move_task(struct task_struct *p, struct css_set *to); +- + struct psi_trigger *psi_trigger_create(struct psi_group *group, + char *buf, size_t nbytes, enum psi_res res); +-void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *t); ++void psi_trigger_destroy(struct psi_trigger *t); + + __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file, + poll_table *wait); ++ ++#ifdef CONFIG_CGROUPS ++int psi_cgroup_alloc(struct cgroup *cgrp); ++void psi_cgroup_free(struct cgroup *cgrp); ++void cgroup_move_task(struct task_struct *p, struct css_set *to); + #endif + + #else /* CONFIG_PSI */ +diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h +index 0a23300d49af7..6f190002a2022 100644 +--- a/include/linux/psi_types.h ++++ b/include/linux/psi_types.h +@@ -21,7 +21,17 @@ enum psi_task_count { + * don't have to special case any state tracking for it. + */ + NR_ONCPU, +- NR_PSI_TASK_COUNTS = 4, ++ /* ++ * For IO and CPU stalls the presence of running/oncpu tasks ++ * in the domain means a partial rather than a full stall. ++ * For memory it's not so simple because of page reclaimers: ++ * they are running/oncpu while representing a stall. To tell ++ * whether a domain has productivity left or not, we need to ++ * distinguish between regular running (i.e. productive) ++ * threads and memstall ones. ++ */ ++ NR_MEMSTALL_RUNNING, ++ NR_PSI_TASK_COUNTS = 5, + }; + + /* Task state bitmasks */ +@@ -29,6 +39,7 @@ enum psi_task_count { + #define TSK_MEMSTALL (1 << NR_MEMSTALL) + #define TSK_RUNNING (1 << NR_RUNNING) + #define TSK_ONCPU (1 << NR_ONCPU) ++#define TSK_MEMSTALL_RUNNING (1 << NR_MEMSTALL_RUNNING) + + /* Resources that workloads could be stalled on */ + enum psi_res { +@@ -129,9 +140,6 @@ struct psi_trigger { + * events to one per window + */ + u64 last_event_time; +- +- /* Refcounting to prevent premature destruction */ +- struct kref refcount; + }; + + struct psi_group { +diff --git a/include/linux/pstore.h b/include/linux/pstore.h +index eb93a54cff31f..e97a8188f0fd8 100644 +--- a/include/linux/pstore.h ++++ b/include/linux/pstore.h +@@ -14,7 +14,7 @@ + #include <linux/errno.h> + #include <linux/kmsg_dump.h> + #include <linux/mutex.h> +-#include <linux/semaphore.h> ++#include <linux/spinlock.h> + #include <linux/time.h> + #include <linux/types.h> + +@@ -87,7 +87,7 @@ struct pstore_record { + * @owner: module which is responsible for this backend driver + * @name: name of the backend driver + * +- * @buf_lock: semaphore to serialize access to @buf ++ * @buf_lock: spinlock to serialize access to @buf + * @buf: preallocated crash dump buffer + * @bufsize: size of @buf available for crash dump bytes (must match + * smallest number of bytes available for writing to a +@@ -178,7 +178,7 @@ struct pstore_info { + struct module *owner; + const char *name; + +- struct semaphore buf_lock; ++ spinlock_t buf_lock; + char *buf; + size_t bufsize; + +diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h +index ae04968a3a472..7a526b52bd748 100644 +--- a/include/linux/ptp_classify.h ++++ b/include/linux/ptp_classify.h +@@ -42,6 +42,9 @@ + #define OFF_PTP_SOURCE_UUID 22 /* PTPv1 only */ + #define OFF_PTP_SEQUENCE_ID 30 + ++/* PTP header flag fields */ ++#define PTP_FLAG_TWOSTEP BIT(1) ++ + /* Below defines should actually be removed at some point in time. */ + #define IP6_HLEN 40 + #define UDP_HLEN 8 +diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h +index 2e5565067355b..554454cb86931 100644 +--- a/include/linux/ptp_clock_kernel.h ++++ b/include/linux/ptp_clock_kernel.h +@@ -351,15 +351,17 @@ int ptp_get_vclocks_index(int pclock_index, int **vclock_index); + * + * @hwtstamps: skb_shared_hwtstamps structure pointer + * @vclock_index: phc index of ptp vclock. ++ * ++ * Returns converted timestamp, or 0 on error. + */ +-void ptp_convert_timestamp(struct skb_shared_hwtstamps *hwtstamps, +- int vclock_index); ++ktime_t ptp_convert_timestamp(const struct skb_shared_hwtstamps *hwtstamps, ++ int vclock_index); + #else + static inline int ptp_get_vclocks_index(int pclock_index, int **vclock_index) + { return 0; } +-static inline void ptp_convert_timestamp(struct skb_shared_hwtstamps *hwtstamps, +- int vclock_index) +-{ } ++static inline ktime_t ptp_convert_timestamp(const struct skb_shared_hwtstamps *hwtstamps, ++ int vclock_index) ++{ return 0; } + + #endif + +diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h +index b5ebf6c012924..d695c43fd740d 100644 +--- a/include/linux/ptrace.h ++++ b/include/linux/ptrace.h +@@ -30,7 +30,6 @@ extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, + + #define PT_SEIZED 0x00010000 /* SEIZE used, enable new behavior */ + #define PT_PTRACED 0x00000001 +-#define PT_DTRACE 0x00000002 /* delayed trace (used on m68k, i386) */ + + #define PT_OPT_FLAG_SHIFT 3 + /* PT_TRACE_* event enable flags */ +@@ -47,12 +46,6 @@ extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, + #define PT_EXITKILL (PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT) + #define PT_SUSPEND_SECCOMP (PTRACE_O_SUSPEND_SECCOMP << PT_OPT_FLAG_SHIFT) + +-/* single stepping state bits (used on ARM and PA-RISC) */ +-#define PT_SINGLESTEP_BIT 31 +-#define PT_SINGLESTEP (1<<PT_SINGLESTEP_BIT) +-#define PT_BLOCKSTEP_BIT 30 +-#define PT_BLOCKSTEP (1<<PT_BLOCKSTEP_BIT) +- + extern long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data); + extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len); +diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h +index 812a4d7511633..4df0bf0a0864e 100644 +--- a/include/linux/qed/qed_eth_if.h ++++ b/include/linux/qed/qed_eth_if.h +@@ -145,12 +145,6 @@ struct qed_filter_mcast_params { + unsigned char mac[64][ETH_ALEN]; + }; + +-union qed_filter_type_params { +- enum qed_filter_rx_mode_type accept_flags; +- struct qed_filter_ucast_params ucast; +- struct qed_filter_mcast_params mcast; +-}; +- + enum qed_filter_type { + QED_FILTER_TYPE_UCAST, + QED_FILTER_TYPE_MCAST, +@@ -158,11 +152,6 @@ enum qed_filter_type { + QED_MAX_FILTER_TYPES, + }; + +-struct qed_filter_params { +- enum qed_filter_type type; +- union qed_filter_type_params filter; +-}; +- + struct qed_tunn_params { + u16 vxlan_port; + u8 update_vxlan_port; +@@ -314,8 +303,14 @@ struct qed_eth_ops { + + int (*q_tx_stop)(struct qed_dev *cdev, u8 rss_id, void *handle); + +- int (*filter_config)(struct qed_dev *cdev, +- struct qed_filter_params *params); ++ int (*filter_config_rx_mode)(struct qed_dev *cdev, ++ enum qed_filter_rx_mode_type type); ++ ++ int (*filter_config_ucast)(struct qed_dev *cdev, ++ struct qed_filter_ucast_params *params); ++ ++ int (*filter_config_mcast)(struct qed_dev *cdev, ++ struct qed_filter_mcast_params *params); + + int (*fastpath_stop)(struct qed_dev *cdev); + +diff --git a/include/linux/random.h b/include/linux/random.h +index f45b8be3e3c4e..ed75fb2b0ca94 100644 +--- a/include/linux/random.h ++++ b/include/linux/random.h +@@ -1,9 +1,5 @@ + /* SPDX-License-Identifier: GPL-2.0 */ +-/* +- * include/linux/random.h +- * +- * Include file for the random number generator. +- */ ++ + #ifndef _LINUX_RANDOM_H + #define _LINUX_RANDOM_H + +@@ -14,41 +10,26 @@ + + #include <uapi/linux/random.h> + +-struct random_ready_callback { +- struct list_head list; +- void (*func)(struct random_ready_callback *rdy); +- struct module *owner; +-}; ++struct notifier_block; + +-extern void add_device_randomness(const void *, unsigned int); +-extern void add_bootloader_randomness(const void *, unsigned int); ++void add_device_randomness(const void *buf, size_t len); ++void __init add_bootloader_randomness(const void *buf, size_t len); ++void add_input_randomness(unsigned int type, unsigned int code, ++ unsigned int value) __latent_entropy; ++void add_interrupt_randomness(int irq) __latent_entropy; ++void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy); + +-#if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__) + static inline void add_latent_entropy(void) + { +- add_device_randomness((const void *)&latent_entropy, +- sizeof(latent_entropy)); +-} ++#if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__) ++ add_device_randomness((const void *)&latent_entropy, sizeof(latent_entropy)); + #else +-static inline void add_latent_entropy(void) {} +-#endif +- +-extern void add_input_randomness(unsigned int type, unsigned int code, +- unsigned int value) __latent_entropy; +-extern void add_interrupt_randomness(int irq, int irq_flags) __latent_entropy; +- +-extern void get_random_bytes(void *buf, int nbytes); +-extern int wait_for_random_bytes(void); +-extern int __init rand_initialize(void); +-extern bool rng_is_initialized(void); +-extern int add_random_ready_callback(struct random_ready_callback *rdy); +-extern void del_random_ready_callback(struct random_ready_callback *rdy); +-extern int __must_check get_random_bytes_arch(void *buf, int nbytes); +- +-#ifndef MODULE +-extern const struct file_operations random_fops, urandom_fops; ++ add_device_randomness(NULL, 0); + #endif ++} + ++void get_random_bytes(void *buf, size_t len); ++size_t __must_check get_random_bytes_arch(void *buf, size_t len); + u32 get_random_u32(void); + u64 get_random_u64(void); + static inline unsigned int get_random_int(void) +@@ -80,36 +61,38 @@ static inline unsigned long get_random_long(void) + + static inline unsigned long get_random_canary(void) + { +- unsigned long val = get_random_long(); +- +- return val & CANARY_MASK; ++ return get_random_long() & CANARY_MASK; + } + ++int __init random_init(const char *command_line); ++bool rng_is_initialized(void); ++int wait_for_random_bytes(void); ++int register_random_ready_notifier(struct notifier_block *nb); ++int unregister_random_ready_notifier(struct notifier_block *nb); ++ + /* Calls wait_for_random_bytes() and then calls get_random_bytes(buf, nbytes). + * Returns the result of the call to wait_for_random_bytes. */ +-static inline int get_random_bytes_wait(void *buf, int nbytes) ++static inline int get_random_bytes_wait(void *buf, size_t nbytes) + { + int ret = wait_for_random_bytes(); + get_random_bytes(buf, nbytes); + return ret; + } + +-#define declare_get_random_var_wait(var) \ +- static inline int get_random_ ## var ## _wait(var *out) { \ ++#define declare_get_random_var_wait(name, ret_type) \ ++ static inline int get_random_ ## name ## _wait(ret_type *out) { \ + int ret = wait_for_random_bytes(); \ + if (unlikely(ret)) \ + return ret; \ +- *out = get_random_ ## var(); \ ++ *out = get_random_ ## name(); \ + return 0; \ + } +-declare_get_random_var_wait(u32) +-declare_get_random_var_wait(u64) +-declare_get_random_var_wait(int) +-declare_get_random_var_wait(long) ++declare_get_random_var_wait(u32, u32) ++declare_get_random_var_wait(u64, u32) ++declare_get_random_var_wait(int, unsigned int) ++declare_get_random_var_wait(long, unsigned long) + #undef declare_get_random_var + +-unsigned long randomize_page(unsigned long start, unsigned long range); +- + /* + * This is designed to be standalone for just prandom + * users, but for now we include it from <linux/random.h> +@@ -120,22 +103,10 @@ unsigned long randomize_page(unsigned long start, unsigned long range); + #ifdef CONFIG_ARCH_RANDOM + # include <asm/archrandom.h> + #else +-static inline bool __must_check arch_get_random_long(unsigned long *v) +-{ +- return false; +-} +-static inline bool __must_check arch_get_random_int(unsigned int *v) +-{ +- return false; +-} +-static inline bool __must_check arch_get_random_seed_long(unsigned long *v) +-{ +- return false; +-} +-static inline bool __must_check arch_get_random_seed_int(unsigned int *v) +-{ +- return false; +-} ++static inline bool __must_check arch_get_random_long(unsigned long *v) { return false; } ++static inline bool __must_check arch_get_random_int(unsigned int *v) { return false; } ++static inline bool __must_check arch_get_random_seed_long(unsigned long *v) { return false; } ++static inline bool __must_check arch_get_random_seed_int(unsigned int *v) { return false; } + #endif + + /* +@@ -158,4 +129,13 @@ static inline bool __init arch_get_random_long_early(unsigned long *v) + } + #endif + ++#ifdef CONFIG_SMP ++int random_prepare_cpu(unsigned int cpu); ++int random_online_cpu(unsigned int cpu); ++#endif ++ ++#ifndef MODULE ++extern const struct file_operations random_fops, urandom_fops; ++#endif ++ + #endif /* _LINUX_RANDOM_H */ +diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h +index bebc911161b6f..d373f1bcbf7ca 100644 +--- a/include/linux/randomize_kstack.h ++++ b/include/linux/randomize_kstack.h +@@ -16,8 +16,20 @@ DECLARE_PER_CPU(u32, kstack_offset); + * alignment. Also, since this use is being explicitly masked to a max of + * 10 bits, stack-clash style attacks are unlikely. For more details see + * "VLAs" in Documentation/process/deprecated.rst ++ * ++ * The normal __builtin_alloca() is initialized with INIT_STACK_ALL (currently ++ * only with Clang and not GCC). Initializing the unused area on each syscall ++ * entry is expensive, and generating an implicit call to memset() may also be ++ * problematic (such as in noinstr functions). Therefore, if the compiler ++ * supports it (which it should if it initializes allocas), always use the ++ * "uninitialized" variant of the builtin. + */ +-void *__builtin_alloca(size_t size); ++#if __has_builtin(__builtin_alloca_uninitialized) ++#define __kstack_alloca __builtin_alloca_uninitialized ++#else ++#define __kstack_alloca __builtin_alloca ++#endif ++ + /* + * Use, at most, 10 bits of entropy. We explicitly cap this to keep the + * "VLA" from being unbounded (see above). 10 bits leaves enough room for +@@ -36,7 +48,7 @@ void *__builtin_alloca(size_t size); + if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \ + &randomize_kstack_offset)) { \ + u32 offset = raw_cpu_read(kstack_offset); \ +- u8 *ptr = __builtin_alloca(KSTACK_OFFSET_MAX(offset)); \ ++ u8 *ptr = __kstack_alloca(KSTACK_OFFSET_MAX(offset)); \ + /* Keep allocation even after "ptr" loses scope. */ \ + asm volatile("" :: "r"(ptr) : "memory"); \ + } \ +diff --git a/include/linux/ratelimit_types.h b/include/linux/ratelimit_types.h +index b676aa419eef8..f0e535f199bef 100644 +--- a/include/linux/ratelimit_types.h ++++ b/include/linux/ratelimit_types.h +@@ -23,12 +23,16 @@ struct ratelimit_state { + unsigned long flags; + }; + +-#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) { \ +- .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ +- .interval = interval_init, \ +- .burst = burst_init, \ ++#define RATELIMIT_STATE_INIT_FLAGS(name, interval_init, burst_init, flags_init) { \ ++ .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ ++ .interval = interval_init, \ ++ .burst = burst_init, \ ++ .flags = flags_init, \ + } + ++#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) \ ++ RATELIMIT_STATE_INIT_FLAGS(name, interval_init, burst_init, 0) ++ + #define RATELIMIT_STATE_INIT_DISABLED \ + RATELIMIT_STATE_INIT(ratelimit_state, 0, DEFAULT_RATELIMIT_BURST) + +diff --git a/include/linux/reset.h b/include/linux/reset.h +index db0e6115a2f6a..7bb5837375289 100644 +--- a/include/linux/reset.h ++++ b/include/linux/reset.h +@@ -711,7 +711,7 @@ static inline int __must_check + devm_reset_control_bulk_get_optional_exclusive(struct device *dev, int num_rstcs, + struct reset_control_bulk_data *rstcs) + { +- return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, true, false, true); ++ return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, false, true, true); + } + + /** +diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h +index dac53fd3afea3..3c7d295746f67 100644 +--- a/include/linux/ring_buffer.h ++++ b/include/linux/ring_buffer.h +@@ -100,8 +100,8 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k + + int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full); + __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, +- struct file *filp, poll_table *poll_table); +- ++ struct file *filp, poll_table *poll_table, int full); ++void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu); + + #define RING_BUFFER_ALL_CPUS -1 + +diff --git a/include/linux/rmap.h b/include/linux/rmap.h +index c976cc6de2574..c29d9c13378b3 100644 +--- a/include/linux/rmap.h ++++ b/include/linux/rmap.h +@@ -39,12 +39,15 @@ struct anon_vma { + atomic_t refcount; + + /* +- * Count of child anon_vmas and VMAs which points to this anon_vma. ++ * Count of child anon_vmas. Equals to the count of all anon_vmas that ++ * have ->parent pointing to this one, including itself. + * + * This counter is used for making decision about reusing anon_vma + * instead of forking new one. See comments in function anon_vma_clone. + */ +- unsigned degree; ++ unsigned long num_children; ++ /* Count of VMAs whose ->anon_vma pointer points to this object. */ ++ unsigned long num_active_vmas; + + struct anon_vma *parent; /* Parent of this anon_vma */ + +diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h +index d97dcd049f18f..a8dcf8a9ae885 100644 +--- a/include/linux/rpmsg.h ++++ b/include/linux/rpmsg.h +@@ -231,7 +231,7 @@ static inline struct rpmsg_endpoint *rpmsg_create_ept(struct rpmsg_device *rpdev + /* This shouldn't be possible */ + WARN_ON(1); + +- return ERR_PTR(-ENXIO); ++ return NULL; + } + + static inline int rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len) +diff --git a/include/linux/rtsx_usb.h b/include/linux/rtsx_usb.h +index 159729cffd8e1..3247ed8e9ff0f 100644 +--- a/include/linux/rtsx_usb.h ++++ b/include/linux/rtsx_usb.h +@@ -54,8 +54,6 @@ struct rtsx_ucr { + struct usb_device *pusb_dev; + struct usb_interface *pusb_intf; + struct usb_sg_request current_sg; +- unsigned char *iobuf; +- dma_addr_t iobuf_dma; + + struct timer_list sg_timer; + struct mutex dev_mutex; +diff --git a/include/linux/sched.h b/include/linux/sched.h +index c1a927ddec646..e418935f8db6a 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -933,7 +933,7 @@ struct task_struct { + #endif + #ifdef CONFIG_EVENTFD + /* Recursion prevention for eventfd_signal() */ +- unsigned in_eventfd_signal:1; ++ unsigned in_eventfd:1; + #endif + + unsigned long atomic_flags; /* Flags requiring atomic access. */ +@@ -1436,6 +1436,7 @@ struct task_struct { + int pagefault_disabled; + #ifdef CONFIG_MMU + struct task_struct *oom_reaper_list; ++ struct timer_list oom_reaper_timer; + #endif + #ifdef CONFIG_VMAP_STACK + struct vm_struct *stack_vm_area; +@@ -1626,6 +1627,14 @@ static inline unsigned int task_state_index(struct task_struct *tsk) + if (tsk_state == TASK_IDLE) + state = TASK_REPORT_IDLE; + ++ /* ++ * We're lying here, but rather than expose a completely new task state ++ * to userspace, we can make this appear as if the task has gone through ++ * a regular rt_mutex_lock() call. ++ */ ++ if (tsk_state == TASK_RTLOCK_WAIT) ++ state = TASK_UNINTERRUPTIBLE; ++ + return fls(state); + } + +@@ -1675,7 +1684,6 @@ extern struct pid *cad_pid; + #define PF_MEMALLOC 0x00000800 /* Allocating memory */ + #define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */ + #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ +-#define PF_USED_ASYNC 0x00004000 /* Used async_schedule*(), used by module init */ + #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ + #define PF_FROZEN 0x00010000 /* Frozen for system suspend */ + #define PF_KSWAPD 0x00020000 /* I am kswapd */ +@@ -1789,7 +1797,7 @@ current_restore_flags(unsigned long orig_flags, unsigned long flags) + } + + extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial); +-extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed); ++extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_effective_cpus); + #ifdef CONFIG_SMP + extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask); + extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); +diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h +index 5561486fddef7..95fb7aaaec8de 100644 +--- a/include/linux/sched/mm.h ++++ b/include/linux/sched/mm.h +@@ -106,6 +106,14 @@ static inline void mm_update_next_owner(struct mm_struct *mm) + #endif /* CONFIG_MEMCG */ + + #ifdef CONFIG_MMU ++#ifndef arch_get_mmap_end ++#define arch_get_mmap_end(addr) (TASK_SIZE) ++#endif ++ ++#ifndef arch_get_mmap_base ++#define arch_get_mmap_base(addr, base) (base) ++#endif ++ + extern void arch_pick_mmap_layout(struct mm_struct *mm, + struct rlimit *rlim_stack); + extern unsigned long +diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h +index e5af028c08b49..994c25640e156 100644 +--- a/include/linux/sched/rt.h ++++ b/include/linux/sched/rt.h +@@ -39,20 +39,12 @@ static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *p) + } + extern void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task); + extern void rt_mutex_adjust_pi(struct task_struct *p); +-static inline bool tsk_is_pi_blocked(struct task_struct *tsk) +-{ +- return tsk->pi_blocked_on != NULL; +-} + #else + static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task) + { + return NULL; + } + # define rt_mutex_adjust_pi(p) do { } while (0) +-static inline bool tsk_is_pi_blocked(struct task_struct *tsk) +-{ +- return false; +-} + #endif + + extern void normalize_rt_tasks(void); +diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h +index e5f4ce622ee61..5f0e8403e8ceb 100644 +--- a/include/linux/sched/signal.h ++++ b/include/linux/sched/signal.h +@@ -318,7 +318,7 @@ int send_sig_mceerr(int code, void __user *, short, struct task_struct *); + + int force_sig_bnderr(void __user *addr, void __user *lower, void __user *upper); + int force_sig_pkuerr(void __user *addr, u32 pkey); +-int force_sig_perf(void __user *addr, u32 type, u64 sig_data); ++int send_sig_perf(void __user *addr, u32 type, u64 sig_data); + + int force_sig_ptrace_errno_trap(int errno, void __user *addr); + int force_sig_fault_trapno(int sig, int code, void __user *addr, int trapno); +@@ -338,6 +338,8 @@ extern int kill_pid(struct pid *pid, int sig, int priv); + extern __must_check bool do_notify_parent(struct task_struct *, int); + extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); + extern void force_sig(int); ++extern void force_fatal_sig(int); ++extern void force_exit_sig(int); + extern int send_sig(int, struct task_struct *, int); + extern int zap_other_threads(struct task_struct *p); + extern struct sigqueue *sigqueue_alloc(void); +diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h +index ef02be869cf28..d351f1b362ef9 100644 +--- a/include/linux/sched/task.h ++++ b/include/linux/sched/task.h +@@ -54,10 +54,12 @@ extern asmlinkage void schedule_tail(struct task_struct *prev); + extern void init_idle(struct task_struct *idle, int cpu); + + extern int sched_fork(unsigned long clone_flags, struct task_struct *p); ++extern void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs); + extern void sched_post_fork(struct task_struct *p); + extern void sched_dead(struct task_struct *p); + + void __noreturn do_task_dead(void); ++void __noreturn make_task_dead(int signr); + + extern void proc_caches_init(void); + +@@ -80,7 +82,7 @@ static inline void exit_thread(struct task_struct *tsk) + extern void do_group_exit(int); + + extern void exit_files(struct task_struct *); +-extern void exit_itimers(struct signal_struct *); ++extern void exit_itimers(struct task_struct *); + + extern pid_t kernel_clone(struct kernel_clone_args *kargs); + struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node); +@@ -157,7 +159,7 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) + * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring + * subscriptions and synchronises with wait4(). Also used in procfs. Also + * pins the final release of task.io_context. Also protects ->cpuset and +- * ->cgroup.subsys[]. And ->vfork_done. ++ * ->cgroup.subsys[]. And ->vfork_done. And ->sysvshm.shm_clist. + * + * Nests both inside and outside of read_lock(&tasklist_lock). + * It must not be nested with write_lock_irq(&tasklist_lock), +diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h +index 2413427e439c7..1009b6b5ce403 100644 +--- a/include/linux/sched/task_stack.h ++++ b/include/linux/sched/task_stack.h +@@ -16,7 +16,7 @@ + * try_get_task_stack() instead. task_stack_page will return a pointer + * that could get freed out from under you. + */ +-static inline void *task_stack_page(const struct task_struct *task) ++static __always_inline void *task_stack_page(const struct task_struct *task) + { + return task->stack; + } +@@ -25,7 +25,11 @@ static inline void *task_stack_page(const struct task_struct *task) + + static inline unsigned long *end_of_stack(const struct task_struct *task) + { ++#ifdef CONFIG_STACK_GROWSUP ++ return (unsigned long *)((unsigned long)task->stack + THREAD_SIZE) - 1; ++#else + return task->stack; ++#endif + } + + #elif !defined(__HAVE_THREAD_FUNCTIONS) +diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h +index 8f0f778b7c911..63a04a65e3106 100644 +--- a/include/linux/sched/topology.h ++++ b/include/linux/sched/topology.h +@@ -74,6 +74,7 @@ struct sched_domain_shared { + atomic_t ref; + atomic_t nr_busy_cpus; + int has_idle_cores; ++ int nr_idle_scan; + }; + + struct sched_domain { +diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h +index 80e781c51ddc1..d22f62203ee36 100644 +--- a/include/linux/scmi_protocol.h ++++ b/include/linux/scmi_protocol.h +@@ -74,7 +74,7 @@ struct scmi_protocol_handle; + struct scmi_clk_proto_ops { + int (*count_get)(const struct scmi_protocol_handle *ph); + +- const struct scmi_clock_info *(*info_get) ++ const struct scmi_clock_info __must_check *(*info_get) + (const struct scmi_protocol_handle *ph, u32 clk_id); + int (*rate_get)(const struct scmi_protocol_handle *ph, u32 clk_id, + u64 *rate); +@@ -452,7 +452,7 @@ enum scmi_sensor_class { + */ + struct scmi_sensor_proto_ops { + int (*count_get)(const struct scmi_protocol_handle *ph); +- const struct scmi_sensor_info *(*info_get) ++ const struct scmi_sensor_info __must_check *(*info_get) + (const struct scmi_protocol_handle *ph, u32 sensor_id); + int (*trip_point_config)(const struct scmi_protocol_handle *ph, + u32 sensor_id, u8 trip_id, u64 trip_value); +diff --git a/include/linux/security.h b/include/linux/security.h +index 5b7288521300b..da184e7b361f4 100644 +--- a/include/linux/security.h ++++ b/include/linux/security.h +@@ -121,10 +121,12 @@ enum lockdown_reason { + LOCKDOWN_DEBUGFS, + LOCKDOWN_XMON_WR, + LOCKDOWN_BPF_WRITE_USER, ++ LOCKDOWN_DBG_WRITE_KERNEL, + LOCKDOWN_INTEGRITY_MAX, + LOCKDOWN_KCORE, + LOCKDOWN_KPROBES, + LOCKDOWN_BPF_READ_KERNEL, ++ LOCKDOWN_DBG_READ_KERNEL, + LOCKDOWN_PERF, + LOCKDOWN_TRACEFS, + LOCKDOWN_XMON_RW, +@@ -258,13 +260,13 @@ extern int security_init(void); + extern int early_security_init(void); + + /* Security operations */ +-int security_binder_set_context_mgr(struct task_struct *mgr); +-int security_binder_transaction(struct task_struct *from, +- struct task_struct *to); +-int security_binder_transfer_binder(struct task_struct *from, +- struct task_struct *to); +-int security_binder_transfer_file(struct task_struct *from, +- struct task_struct *to, struct file *file); ++int security_binder_set_context_mgr(const struct cred *mgr); ++int security_binder_transaction(const struct cred *from, ++ const struct cred *to); ++int security_binder_transfer_binder(const struct cred *from, ++ const struct cred *to); ++int security_binder_transfer_file(const struct cred *from, ++ const struct cred *to, struct file *file); + int security_ptrace_access_check(struct task_struct *child, unsigned int mode); + int security_ptrace_traceme(struct task_struct *parent); + int security_capget(struct task_struct *target, +@@ -508,25 +510,25 @@ static inline int early_security_init(void) + return 0; + } + +-static inline int security_binder_set_context_mgr(struct task_struct *mgr) ++static inline int security_binder_set_context_mgr(const struct cred *mgr) + { + return 0; + } + +-static inline int security_binder_transaction(struct task_struct *from, +- struct task_struct *to) ++static inline int security_binder_transaction(const struct cred *from, ++ const struct cred *to) + { + return 0; + } + +-static inline int security_binder_transfer_binder(struct task_struct *from, +- struct task_struct *to) ++static inline int security_binder_transfer_binder(const struct cred *from, ++ const struct cred *to) + { + return 0; + } + +-static inline int security_binder_transfer_file(struct task_struct *from, +- struct task_struct *to, ++static inline int security_binder_transfer_file(const struct cred *from, ++ const struct cred *to, + struct file *file) + { + return 0; +@@ -1041,6 +1043,11 @@ static inline void security_transfer_creds(struct cred *new, + { + } + ++static inline void security_cred_getsecid(const struct cred *c, u32 *secid) ++{ ++ *secid = 0; ++} ++ + static inline int security_kernel_act_as(struct cred *cred, u32 secid) + { + return 0; +diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h +index dd99569595fd3..0b429111f85e4 100644 +--- a/include/linux/seq_file.h ++++ b/include/linux/seq_file.h +@@ -194,7 +194,7 @@ static const struct file_operations __name ## _fops = { \ + #define DEFINE_PROC_SHOW_ATTRIBUTE(__name) \ + static int __name ## _open(struct inode *inode, struct file *file) \ + { \ +- return single_open(file, __name ## _show, inode->i_private); \ ++ return single_open(file, __name ## _show, PDE_DATA(inode)); \ + } \ + \ + static const struct proc_ops __name ## _proc_ops = { \ +@@ -261,6 +261,10 @@ extern struct list_head *seq_list_start_head(struct list_head *head, + extern struct list_head *seq_list_next(void *v, struct list_head *head, + loff_t *ppos); + ++extern struct list_head *seq_list_start_rcu(struct list_head *head, loff_t pos); ++extern struct list_head *seq_list_start_head_rcu(struct list_head *head, loff_t pos); ++extern struct list_head *seq_list_next_rcu(void *v, struct list_head *head, loff_t *ppos); ++ + /* + * Helpers for iteration over hlist_head-s in seq_files + */ +diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h +index 5db211f43b29e..68abc6bdd8914 100644 +--- a/include/linux/serial_8250.h ++++ b/include/linux/serial_8250.h +@@ -74,6 +74,7 @@ struct uart_8250_port; + struct uart_8250_ops { + int (*setup_irq)(struct uart_8250_port *); + void (*release_irq)(struct uart_8250_port *); ++ void (*setup_timer)(struct uart_8250_port *); + }; + + struct uart_8250_em485 { +diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h +index c58cc142d23f4..d5b6b1550d596 100644 +--- a/include/linux/serial_core.h ++++ b/include/linux/serial_core.h +@@ -100,7 +100,7 @@ struct uart_icount { + __u32 buf_overrun; + }; + +-typedef unsigned int __bitwise upf_t; ++typedef u64 __bitwise upf_t; + typedef unsigned int __bitwise upstat_t; + + struct uart_port { +@@ -207,6 +207,7 @@ struct uart_port { + #define UPF_FIXED_PORT ((__force upf_t) (1 << 29)) + #define UPF_DEAD ((__force upf_t) (1 << 30)) + #define UPF_IOREMAP ((__force upf_t) (1 << 31)) ++#define UPF_FULL_PROBE ((__force upf_t) (1ULL << 32)) + + #define __UPF_CHANGE_MASK 0x17fff + #define UPF_CHANGE_MASK ((__force upf_t) __UPF_CHANGE_MASK) +@@ -253,6 +254,7 @@ struct uart_port { + struct attribute_group *attr_group; /* port specific attributes */ + const struct attribute_group **tty_groups; /* all attributes (serial core use only) */ + struct serial_rs485 rs485; ++ const struct serial_rs485 *rs485_supported; /* Supported mask for serial_rs485 */ + struct gpio_desc *rs485_term_gpio; /* enable RS485 bus termination */ + struct serial_iso7816 iso7816; + void *private_data; /* generic platform data pointer */ +@@ -300,6 +302,23 @@ struct uart_state { + /* number of characters left in xmit buffer before we ask for more */ + #define WAKEUP_CHARS 256 + ++/** ++ * uart_xmit_advance - Advance xmit buffer and account Tx'ed chars ++ * @up: uart_port structure describing the port ++ * @chars: number of characters sent ++ * ++ * This function advances the tail of circular xmit buffer by the number of ++ * @chars transmitted and handles accounting of transmitted bytes (into ++ * @up's icount.tx). ++ */ ++static inline void uart_xmit_advance(struct uart_port *up, unsigned int chars) ++{ ++ struct circ_buf *xmit = &up->state->xmit; ++ ++ xmit->tail = (xmit->tail + chars) & (UART_XMIT_SIZE - 1); ++ up->icount.tx += chars; ++} ++ + struct module; + struct tty_driver; + +@@ -388,6 +407,11 @@ static const bool earlycon_acpi_spcr_enable EARLYCON_USED_OR_UNUSED; + static inline int setup_earlycon(char *buf) { return 0; } + #endif + ++static inline bool uart_console_enabled(struct uart_port *port) ++{ ++ return uart_console(port) && (port->cons->flags & CON_ENABLED); ++} ++ + struct uart_port *uart_get_console(struct uart_port *ports, int nr, + struct console *c); + int uart_parse_earlycon(char *p, unsigned char *iotype, resource_size_t *addr, +@@ -458,6 +482,8 @@ extern void uart_handle_cts_change(struct uart_port *uport, + extern void uart_insert_char(struct uart_port *port, unsigned int status, + unsigned int overrun, unsigned int ch, unsigned int flag); + ++void uart_xchar_out(struct uart_port *uport, int offset); ++ + #ifdef CONFIG_MAGIC_SYSRQ_SERIAL + #define SYSRQ_TIMEOUT (HZ * 5) + +diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h +index 34cb28b8f16ca..a70b2bdbf4d96 100644 +--- a/include/linux/signal_types.h ++++ b/include/linux/signal_types.h +@@ -70,6 +70,9 @@ struct ksignal { + int sig; + }; + ++/* Used to kill the race between sigaction and forced signals */ ++#define SA_IMMUTABLE 0x00800000 ++ + #ifndef __ARCH_UAPI_SA_FLAGS + #ifdef SA_RESTORER + #define __ARCH_UAPI_SA_FLAGS SA_RESTORER +diff --git a/include/linux/siphash.h b/include/linux/siphash.h +index bf21591a9e5e6..0bb5ecd507bef 100644 +--- a/include/linux/siphash.h ++++ b/include/linux/siphash.h +@@ -27,9 +27,7 @@ static inline bool siphash_key_is_zero(const siphash_key_t *key) + } + + u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key); +-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key); +-#endif + + u64 siphash_1u64(const u64 a, const siphash_key_t *key); + u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t *key); +@@ -82,10 +80,9 @@ static inline u64 ___siphash_aligned(const __le64 *data, size_t len, + static inline u64 siphash(const void *data, size_t len, + const siphash_key_t *key) + { +-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +- if (!IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT)) ++ if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || ++ !IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT)) + return __siphash_unaligned(data, len, key); +-#endif + return ___siphash_aligned(data, len, key); + } + +@@ -96,10 +93,8 @@ typedef struct { + + u32 __hsiphash_aligned(const void *data, size_t len, + const hsiphash_key_t *key); +-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + u32 __hsiphash_unaligned(const void *data, size_t len, + const hsiphash_key_t *key); +-#endif + + u32 hsiphash_1u32(const u32 a, const hsiphash_key_t *key); + u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t *key); +@@ -135,11 +130,38 @@ static inline u32 ___hsiphash_aligned(const __le32 *data, size_t len, + static inline u32 hsiphash(const void *data, size_t len, + const hsiphash_key_t *key) + { +-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +- if (!IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT)) ++ if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || ++ !IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT)) + return __hsiphash_unaligned(data, len, key); +-#endif + return ___hsiphash_aligned(data, len, key); + } + ++/* ++ * These macros expose the raw SipHash and HalfSipHash permutations. ++ * Do not use them directly! If you think you have a use for them, ++ * be sure to CC the maintainer of this file explaining why. ++ */ ++ ++#define SIPHASH_PERMUTATION(a, b, c, d) ( \ ++ (a) += (b), (b) = rol64((b), 13), (b) ^= (a), (a) = rol64((a), 32), \ ++ (c) += (d), (d) = rol64((d), 16), (d) ^= (c), \ ++ (a) += (d), (d) = rol64((d), 21), (d) ^= (a), \ ++ (c) += (b), (b) = rol64((b), 17), (b) ^= (c), (c) = rol64((c), 32)) ++ ++#define SIPHASH_CONST_0 0x736f6d6570736575ULL ++#define SIPHASH_CONST_1 0x646f72616e646f6dULL ++#define SIPHASH_CONST_2 0x6c7967656e657261ULL ++#define SIPHASH_CONST_3 0x7465646279746573ULL ++ ++#define HSIPHASH_PERMUTATION(a, b, c, d) ( \ ++ (a) += (b), (b) = rol32((b), 5), (b) ^= (a), (a) = rol32((a), 16), \ ++ (c) += (d), (d) = rol32((d), 8), (d) ^= (c), \ ++ (a) += (d), (d) = rol32((d), 7), (d) ^= (a), \ ++ (c) += (b), (b) = rol32((b), 13), (b) ^= (c), (c) = rol32((c), 16)) ++ ++#define HSIPHASH_CONST_0 0U ++#define HSIPHASH_CONST_1 0U ++#define HSIPHASH_CONST_2 0x6c796765U ++#define HSIPHASH_CONST_3 0x74656462U ++ + #endif /* _LINUX_SIPHASH_H */ +diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h +index 841e2f0f5240b..19e595cab23ac 100644 +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -286,7 +286,10 @@ struct nf_bridge_info { + struct tc_skb_ext { + __u32 chain; + __u16 mru; +- bool post_ct; ++ __u16 zone; ++ u8 post_ct:1; ++ u8 post_ct_snat:1; ++ u8 post_ct_dnat:1; + }; + #endif + +@@ -301,6 +304,41 @@ struct sk_buff_head { + + struct sk_buff; + ++/* The reason of skb drop, which is used in kfree_skb_reason(). ++ * en...maybe they should be splited by group? ++ * ++ * Each item here should also be in 'TRACE_SKB_DROP_REASON', which is ++ * used to translate the reason to string. ++ */ ++enum skb_drop_reason { ++ SKB_DROP_REASON_NOT_SPECIFIED, /* drop reason is not specified */ ++ SKB_DROP_REASON_NO_SOCKET, /* socket not found */ ++ SKB_DROP_REASON_PKT_TOO_SMALL, /* packet size is too small */ ++ SKB_DROP_REASON_TCP_CSUM, /* TCP checksum error */ ++ SKB_DROP_REASON_SOCKET_FILTER, /* dropped by socket filter */ ++ SKB_DROP_REASON_UDP_CSUM, /* UDP checksum error */ ++ SKB_DROP_REASON_NETFILTER_DROP, /* dropped by netfilter */ ++ SKB_DROP_REASON_OTHERHOST, /* packet don't belong to current ++ * host (interface is in promisc ++ * mode) ++ */ ++ SKB_DROP_REASON_IP_CSUM, /* IP checksum error */ ++ SKB_DROP_REASON_IP_INHDR, /* there is something wrong with ++ * IP header (see ++ * IPSTATS_MIB_INHDRERRORS) ++ */ ++ SKB_DROP_REASON_IP_RPFILTER, /* IP rpfilter validate failed. ++ * see the document for rp_filter ++ * in ip-sysctl.rst for more ++ * information ++ */ ++ SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, /* destination address of L2 ++ * is multicast, but L3 is ++ * unicast. ++ */ ++ SKB_DROP_REASON_MAX, ++}; ++ + /* To allow 64K frame to be packed as single skb without frag_list we + * require 64K/PAGE_SIZE pages plus 1 additional page to allow for + * buffers which do not start on a page boundary. +@@ -687,6 +725,7 @@ typedef unsigned char *sk_buff_data_t; + * @csum_level: indicates the number of consecutive checksums found in + * the packet minus one that have been verified as + * CHECKSUM_UNNECESSARY (max 3) ++ * @scm_io_uring: SKB holds io_uring registered files + * @dst_pending_confirm: need to confirm neighbour + * @decrypted: Decrypted SKB + * @slow_gro: state present at GRO time, slower prepare step required +@@ -872,6 +911,7 @@ struct sk_buff { + __u8 decrypted:1; + #endif + __u8 slow_gro:1; ++ __u8 scm_io_uring:1; + + #ifdef CONFIG_NET_SCHED + __u16 tc_index; /* traffic control index */ +@@ -1071,8 +1111,18 @@ static inline bool skb_unref(struct sk_buff *skb) + return true; + } + ++void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason); ++ ++/** ++ * kfree_skb - free an sk_buff with 'NOT_SPECIFIED' reason ++ * @skb: buffer to free ++ */ ++static inline void kfree_skb(struct sk_buff *skb) ++{ ++ kfree_skb_reason(skb, SKB_DROP_REASON_NOT_SPECIFIED); ++} ++ + void skb_release_head_state(struct sk_buff *skb); +-void kfree_skb(struct sk_buff *skb); + void kfree_skb_list(struct sk_buff *segs); + void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt); + void skb_tx_error(struct sk_buff *skb); +@@ -1370,7 +1420,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container, + u16 *ctinfo_map, size_t mapsize, +- bool post_ct); ++ bool post_ct, u16 zone); + void + skb_flow_dissect_tunnel_info(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, +@@ -1433,6 +1483,11 @@ static inline unsigned int skb_end_offset(const struct sk_buff *skb) + { + return skb->end; + } ++ ++static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset) ++{ ++ skb->end = offset; ++} + #else + static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) + { +@@ -1443,6 +1498,11 @@ static inline unsigned int skb_end_offset(const struct sk_buff *skb) + { + return skb->end - skb->head; + } ++ ++static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset) ++{ ++ skb->end = skb->head + offset; ++} + #endif + + /* Internal */ +@@ -1671,6 +1731,22 @@ static inline int skb_unclone(struct sk_buff *skb, gfp_t pri) + return 0; + } + ++/* This variant of skb_unclone() makes sure skb->truesize ++ * and skb_end_offset() are not changed, whenever a new skb->head is needed. ++ * ++ * Indeed there is no guarantee that ksize(kmalloc(X)) == ksize(kmalloc(X)) ++ * when various debugging features are in place. ++ */ ++int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri); ++static inline int skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) ++{ ++ might_sleep_if(gfpflags_allow_blocking(pri)); ++ ++ if (skb_cloned(skb)) ++ return __skb_unclone_keeptruesize(skb, pri); ++ return 0; ++} ++ + /** + * skb_header_cloned - is the header a clone + * @skb: buffer to check +@@ -2158,6 +2234,22 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb) + return skb_headlen(skb) + __skb_pagelen(skb); + } + ++static inline void __skb_fill_page_desc_noacc(struct skb_shared_info *shinfo, ++ int i, struct page *page, ++ int off, int size) ++{ ++ skb_frag_t *frag = &shinfo->frags[i]; ++ ++ /* ++ * Propagate page pfmemalloc to the skb if we can. The problem is ++ * that not all callers have unique ownership of the page but rely ++ * on page_is_pfmemalloc doing the right thing(tm). ++ */ ++ frag->bv_page = page; ++ frag->bv_offset = off; ++ skb_frag_size_set(frag, size); ++} ++ + /** + * __skb_fill_page_desc - initialise a paged fragment in an skb + * @skb: buffer containing fragment to be initialised +@@ -2174,17 +2266,7 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb) + static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, + struct page *page, int off, int size) + { +- skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; +- +- /* +- * Propagate page pfmemalloc to the skb if we can. The problem is +- * that not all callers have unique ownership of the page but rely +- * on page_is_pfmemalloc doing the right thing(tm). +- */ +- frag->bv_page = page; +- frag->bv_offset = off; +- skb_frag_size_set(frag, size); +- ++ __skb_fill_page_desc_noacc(skb_shinfo(skb), i, page, off, size); + page = compound_head(page); + if (page_is_pfmemalloc(page)) + skb->pfmemalloc = true; +@@ -2211,6 +2293,27 @@ static inline void skb_fill_page_desc(struct sk_buff *skb, int i, + skb_shinfo(skb)->nr_frags = i + 1; + } + ++/** ++ * skb_fill_page_desc_noacc - initialise a paged fragment in an skb ++ * @skb: buffer containing fragment to be initialised ++ * @i: paged fragment index to initialise ++ * @page: the page to use for this fragment ++ * @off: the offset to the data with @page ++ * @size: the length of the data ++ * ++ * Variant of skb_fill_page_desc() which does not deal with ++ * pfmemalloc, if page is not owned by us. ++ */ ++static inline void skb_fill_page_desc_noacc(struct sk_buff *skb, int i, ++ struct page *page, int off, ++ int size) ++{ ++ struct skb_shared_info *shinfo = skb_shinfo(skb); ++ ++ __skb_fill_page_desc_noacc(shinfo, i, page, off, size); ++ shinfo->nr_frags = i + 1; ++} ++ + void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, + int size, unsigned int truesize); + +@@ -2254,6 +2357,14 @@ static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset) + + #endif /* NET_SKBUFF_DATA_USES_OFFSET */ + ++static inline void skb_assert_len(struct sk_buff *skb) ++{ ++#ifdef CONFIG_DEBUG_NET ++ if (WARN_ONCE(!skb->len, "%s\n", __func__)) ++ DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false); ++#endif /* CONFIG_DEBUG_NET */ ++} ++ + /* + * Add data to an sk_buff + */ +diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h +index 1ce9a9eb223b6..6e18ca234f812 100644 +--- a/include/linux/skmsg.h ++++ b/include/linux/skmsg.h +@@ -83,6 +83,7 @@ struct sk_psock { + u32 apply_bytes; + u32 cork_bytes; + u32 eval; ++ bool redir_ingress; /* undefined if sk_redir is null */ + struct sk_msg *cork; + struct sk_psock_progs progs; + #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) +@@ -96,6 +97,7 @@ struct sk_psock { + spinlock_t link_lock; + refcount_t refcnt; + void (*saved_unhash)(struct sock *sk); ++ void (*saved_destroy)(struct sock *sk); + void (*saved_close)(struct sock *sk, long timeout); + void (*saved_write_space)(struct sock *sk); + void (*saved_data_ready)(struct sock *sk); +@@ -283,7 +285,8 @@ static inline void sk_msg_sg_copy_clear(struct sk_msg *msg, u32 start) + + static inline struct sk_psock *sk_psock(const struct sock *sk) + { +- return rcu_dereference_sk_user_data(sk); ++ return __rcu_dereference_sk_user_data_with_flags(sk, ++ SK_USER_DATA_PSOCK); + } + + static inline void sk_psock_set_state(struct sk_psock *psock, +@@ -310,21 +313,16 @@ static inline void sock_drop(struct sock *sk, struct sk_buff *skb) + kfree_skb(skb); + } + +-static inline void drop_sk_msg(struct sk_psock *psock, struct sk_msg *msg) +-{ +- if (msg->skb) +- sock_drop(psock->sk, msg->skb); +- kfree(msg); +-} +- + static inline void sk_psock_queue_msg(struct sk_psock *psock, + struct sk_msg *msg) + { + spin_lock_bh(&psock->ingress_lock); + if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) + list_add_tail(&msg->list, &psock->ingress_msg); +- else +- drop_sk_msg(psock, msg); ++ else { ++ sk_msg_free(psock->sk, msg); ++ kfree(msg); ++ } + spin_unlock_bh(&psock->ingress_lock); + } + +@@ -385,7 +383,7 @@ static inline void sk_psock_report_error(struct sk_psock *psock, int err) + } + + struct sk_psock *sk_psock_init(struct sock *sk, int node); +-void sk_psock_stop(struct sk_psock *psock, bool wait); ++void sk_psock_stop(struct sk_psock *psock); + + #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) + int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock); +@@ -509,8 +507,22 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock) + + #if IS_ENABLED(CONFIG_NET_SOCK_MSG) + +-/* We only have one bit so far. */ +-#define BPF_F_PTR_MASK ~(BPF_F_INGRESS) ++#define BPF_F_STRPARSER (1UL << 1) ++ ++/* We only have two bits so far. */ ++#define BPF_F_PTR_MASK ~(BPF_F_INGRESS | BPF_F_STRPARSER) ++ ++static inline bool skb_bpf_strparser(const struct sk_buff *skb) ++{ ++ unsigned long sk_redir = skb->_sk_redir; ++ ++ return sk_redir & BPF_F_STRPARSER; ++} ++ ++static inline void skb_bpf_set_strparser(struct sk_buff *skb) ++{ ++ skb->_sk_redir |= BPF_F_STRPARSER; ++} + + static inline bool skb_bpf_ingress(const struct sk_buff *skb) + { +diff --git a/include/linux/soc/marvell/octeontx2/asm.h b/include/linux/soc/marvell/octeontx2/asm.h +index fa1d6af0164ee..d683251a0b409 100644 +--- a/include/linux/soc/marvell/octeontx2/asm.h ++++ b/include/linux/soc/marvell/octeontx2/asm.h +@@ -5,6 +5,7 @@ + #ifndef __SOC_OTX2_ASM_H + #define __SOC_OTX2_ASM_H + ++#include <linux/types.h> + #if defined(CONFIG_ARM64) + /* + * otx2_lmt_flush is used for LMT store operation. +@@ -34,9 +35,23 @@ + : [rf] "+r"(val) \ + : [rs] "r"(addr)); \ + }) ++ ++static inline u64 otx2_atomic64_fetch_add(u64 incr, u64 *ptr) ++{ ++ u64 result; ++ ++ asm volatile (".cpu generic+lse\n" ++ "ldadda %x[i], %x[r], [%[b]]" ++ : [r] "=r" (result), "+m" (*ptr) ++ : [i] "r" (incr), [b] "r" (ptr) ++ : "memory"); ++ return result; ++} ++ + #else + #define otx2_lmt_flush(ioaddr) ({ 0; }) + #define cn10k_lmt_flush(val, addr) ({ addr = val; }) ++#define otx2_atomic64_fetch_add(incr, ptr) ({ incr; }) + #endif + + #endif /* __SOC_OTX2_ASM_H */ +diff --git a/include/linux/soc/qcom/apr.h b/include/linux/soc/qcom/apr.h +index 137f9f2ac4c3c..7bca213a3f838 100644 +--- a/include/linux/soc/qcom/apr.h ++++ b/include/linux/soc/qcom/apr.h +@@ -79,6 +79,15 @@ struct apr_resp_pkt { + #define APR_SVC_MAJOR_VERSION(v) ((v >> 16) & 0xFF) + #define APR_SVC_MINOR_VERSION(v) (v & 0xFF) + ++struct packet_router; ++struct pkt_router_svc { ++ struct device *dev; ++ struct packet_router *pr; ++ spinlock_t lock; ++ int id; ++ void *priv; ++}; ++ + struct apr_device { + struct device dev; + uint16_t svc_id; +@@ -86,11 +95,12 @@ struct apr_device { + uint32_t version; + char name[APR_NAME_SIZE]; + const char *service_path; +- spinlock_t lock; ++ struct pkt_router_svc svc; + struct list_head node; + }; + + #define to_apr_device(d) container_of(d, struct apr_device, dev) ++#define svc_to_apr_device(d) container_of(d, struct apr_device, svc) + + struct apr_driver { + int (*probe)(struct apr_device *sl); +diff --git a/include/linux/soc/qcom/qcom_aoss.h b/include/linux/soc/qcom/qcom_aoss.h +new file mode 100644 +index 0000000000000..3c2a82e606f81 +--- /dev/null ++++ b/include/linux/soc/qcom/qcom_aoss.h +@@ -0,0 +1,38 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++/* ++ * Copyright (c) 2021, The Linux Foundation. All rights reserved. ++ */ ++ ++#ifndef __QCOM_AOSS_H__ ++#define __QCOM_AOSS_H__ ++ ++#include <linux/err.h> ++#include <linux/device.h> ++ ++struct qmp; ++ ++#if IS_ENABLED(CONFIG_QCOM_AOSS_QMP) ++ ++int qmp_send(struct qmp *qmp, const void *data, size_t len); ++struct qmp *qmp_get(struct device *dev); ++void qmp_put(struct qmp *qmp); ++ ++#else ++ ++static inline int qmp_send(struct qmp *qmp, const void *data, size_t len) ++{ ++ return -ENODEV; ++} ++ ++static inline struct qmp *qmp_get(struct device *dev) ++{ ++ return ERR_PTR(-ENODEV); ++} ++ ++static inline void qmp_put(struct qmp *qmp) ++{ ++} ++ ++#endif ++ ++#endif +diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h +index 0aad7009b50e6..bd0d11af76c5e 100644 +--- a/include/linux/soc/ti/ti_sci_protocol.h ++++ b/include/linux/soc/ti/ti_sci_protocol.h +@@ -645,7 +645,7 @@ devm_ti_sci_get_of_resource(const struct ti_sci_handle *handle, + + static inline struct ti_sci_resource * + devm_ti_sci_get_resource(const struct ti_sci_handle *handle, struct device *dev, +- u32 dev_id, u32 sub_type); ++ u32 dev_id, u32 sub_type) + { + return ERR_PTR(-EINVAL); + } +diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h +index 76ce3f3ac0f22..bf6f0decb3f6d 100644 +--- a/include/linux/soundwire/sdw.h ++++ b/include/linux/soundwire/sdw.h +@@ -646,9 +646,6 @@ struct sdw_slave_ops { + * @dev_num: Current Device Number, values can be 0 or dev_num_sticky + * @dev_num_sticky: one-time static Device Number assigned by Bus + * @probed: boolean tracking driver state +- * @probe_complete: completion utility to control potential races +- * on startup between driver probe/initialization and SoundWire +- * Slave state changes/implementation-defined interrupts + * @enumeration_complete: completion utility to control potential races + * on startup between device enumeration and read/write access to the + * Slave device +@@ -663,6 +660,7 @@ struct sdw_slave_ops { + * for a Slave happens for the first time after enumeration + * @is_mockup_device: status flag used to squelch errors in the command/control + * protocol for SoundWire mockup devices ++ * @sdw_dev_lock: mutex used to protect callbacks/remove races + */ + struct sdw_slave { + struct sdw_slave_id id; +@@ -680,12 +678,12 @@ struct sdw_slave { + u16 dev_num; + u16 dev_num_sticky; + bool probed; +- struct completion probe_complete; + struct completion enumeration_complete; + struct completion initialization_complete; + u32 unattach_request; + bool first_interrupt_done; + bool is_mockup_device; ++ struct mutex sdw_dev_lock; /* protect callbacks/remove races */ + }; + + #define dev_to_sdw_dev(_dev) container_of(_dev, struct sdw_slave, dev) +diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h +index 6bb4bc1a5f545..22919a94ca19d 100644 +--- a/include/linux/stackdepot.h ++++ b/include/linux/stackdepot.h +@@ -19,8 +19,6 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries, + unsigned int stack_depot_fetch(depot_stack_handle_t handle, + unsigned long **entries); + +-unsigned int filter_irq_stacks(unsigned long *entries, unsigned int nr_entries); +- + #ifdef CONFIG_STACKDEPOT + int stack_depot_init(void); + #else +diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h +index 9edecb494e9e2..bef158815e83d 100644 +--- a/include/linux/stacktrace.h ++++ b/include/linux/stacktrace.h +@@ -21,6 +21,7 @@ unsigned int stack_trace_save_tsk(struct task_struct *task, + unsigned int stack_trace_save_regs(struct pt_regs *regs, unsigned long *store, + unsigned int size, unsigned int skipnr); + unsigned int stack_trace_save_user(unsigned long *store, unsigned int size); ++unsigned int filter_irq_stacks(unsigned long *entries, unsigned int nr_entries); + + /* Internal interfaces. Do not use in generic code */ + #ifdef CONFIG_ARCH_STACKWALK +diff --git a/include/linux/static_call.h b/include/linux/static_call.h +index 3e56a9751c062..fcc5b48989b3c 100644 +--- a/include/linux/static_call.h ++++ b/include/linux/static_call.h +@@ -248,10 +248,7 @@ static inline int static_call_text_reserved(void *start, void *end) + return 0; + } + +-static inline long __static_call_return0(void) +-{ +- return 0; +-} ++extern long __static_call_return0(void); + + #define EXPORT_STATIC_CALL(name) \ + EXPORT_SYMBOL(STATIC_CALL_KEY(name)); \ +diff --git a/include/linux/stddef.h b/include/linux/stddef.h +index 998a4ba28eba4..31fdbb784c24e 100644 +--- a/include/linux/stddef.h ++++ b/include/linux/stddef.h +@@ -36,4 +36,65 @@ enum { + #define offsetofend(TYPE, MEMBER) \ + (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) + ++/** ++ * struct_group() - Wrap a set of declarations in a mirrored struct ++ * ++ * @NAME: The identifier name of the mirrored sub-struct ++ * @MEMBERS: The member declarations for the mirrored structs ++ * ++ * Used to create an anonymous union of two structs with identical ++ * layout and size: one anonymous and one named. The former can be ++ * used normally without sub-struct naming, and the latter can be ++ * used to reason about the start, end, and size of the group of ++ * struct members. ++ */ ++#define struct_group(NAME, MEMBERS...) \ ++ __struct_group(/* no tag */, NAME, /* no attrs */, MEMBERS) ++ ++/** ++ * struct_group_attr() - Create a struct_group() with trailing attributes ++ * ++ * @NAME: The identifier name of the mirrored sub-struct ++ * @ATTRS: Any struct attributes to apply ++ * @MEMBERS: The member declarations for the mirrored structs ++ * ++ * Used to create an anonymous union of two structs with identical ++ * layout and size: one anonymous and one named. The former can be ++ * used normally without sub-struct naming, and the latter can be ++ * used to reason about the start, end, and size of the group of ++ * struct members. Includes structure attributes argument. ++ */ ++#define struct_group_attr(NAME, ATTRS, MEMBERS...) \ ++ __struct_group(/* no tag */, NAME, ATTRS, MEMBERS) ++ ++/** ++ * struct_group_tagged() - Create a struct_group with a reusable tag ++ * ++ * @TAG: The tag name for the named sub-struct ++ * @NAME: The identifier name of the mirrored sub-struct ++ * @MEMBERS: The member declarations for the mirrored structs ++ * ++ * Used to create an anonymous union of two structs with identical ++ * layout and size: one anonymous and one named. The former can be ++ * used normally without sub-struct naming, and the latter can be ++ * used to reason about the start, end, and size of the group of ++ * struct members. Includes struct tag argument for the named copy, ++ * so the specified layout can be reused later. ++ */ ++#define struct_group_tagged(TAG, NAME, MEMBERS...) \ ++ __struct_group(TAG, NAME, /* no attrs */, MEMBERS) ++ ++/** ++ * DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union ++ * ++ * @TYPE: The type of each flexible array element ++ * @NAME: The name of the flexible array member ++ * ++ * In order to have a flexible array member in a union or alone in a ++ * struct, it needs to be wrapped in an anonymous struct with at least 1 ++ * named member, but that member can be empty. ++ */ ++#define DECLARE_FLEX_ARRAY(TYPE, NAME) \ ++ __DECLARE_FLEX_ARRAY(TYPE, NAME) ++ + #endif +diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h +index a6f03b36fc4f7..cc338c6c74954 100644 +--- a/include/linux/stmmac.h ++++ b/include/linux/stmmac.h +@@ -233,6 +233,7 @@ struct plat_stmmacenet_data { + int (*clks_config)(void *priv, bool enabled); + int (*crosststamp)(ktime_t *device, struct system_counterval_t *system, + void *ctx); ++ void (*dump_debug_regs)(void *priv); + void *bsp_priv; + struct clk *stmmac_clk; + struct clk *pclk; +@@ -250,6 +251,7 @@ struct plat_stmmacenet_data { + int rss_en; + int mac_port_sel_speed; + bool en_tx_lpi_clockgating; ++ bool rx_clk_runs_in_lpi; + int has_xgmac; + bool vlan_fail_q_en; + u8 vlan_fail_q; +@@ -268,5 +270,6 @@ struct plat_stmmacenet_data { + int msi_rx_base_vec; + int msi_tx_base_vec; + bool use_phy_wol; ++ bool sph_disable; + }; + #endif +diff --git a/include/linux/string.h b/include/linux/string.h +index 5e96d656be7ae..d68097b4f600b 100644 +--- a/include/linux/string.h ++++ b/include/linux/string.h +@@ -262,23 +262,8 @@ void __write_overflow(void) __compiletime_error("detected write beyond size of o + #include <linux/fortify-string.h> + #endif + +-/** +- * memcpy_and_pad - Copy one buffer to another with padding +- * @dest: Where to copy to +- * @dest_len: The destination buffer size +- * @src: Where to copy from +- * @count: The number of bytes to copy +- * @pad: Character to use for padding if space is left in destination. +- */ +-static inline void memcpy_and_pad(void *dest, size_t dest_len, +- const void *src, size_t count, int pad) +-{ +- if (dest_len > count) { +- memcpy(dest, src, count); +- memset(dest + count, pad, dest_len - count); +- } else +- memcpy(dest, src, dest_len); +-} ++void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, ++ int pad); + + /** + * str_has_prefix - Test if a string has a given prefix +diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h +index a4661646adc9c..9fcf5ffc4f9ad 100644 +--- a/include/linux/sunrpc/clnt.h ++++ b/include/linux/sunrpc/clnt.h +@@ -159,6 +159,7 @@ struct rpc_add_xprt_test { + #define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT (1UL << 9) + #define RPC_CLNT_CREATE_SOFTERR (1UL << 10) + #define RPC_CLNT_CREATE_REUSEPORT (1UL << 11) ++#define RPC_CLNT_CREATE_CONNECTED (1UL << 12) + + struct rpc_clnt *rpc_create(struct rpc_create_args *args); + struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, +diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h +index cd188a527d169..3b35b6f6533aa 100644 +--- a/include/linux/sunrpc/rpc_pipe_fs.h ++++ b/include/linux/sunrpc/rpc_pipe_fs.h +@@ -92,6 +92,11 @@ extern ssize_t rpc_pipe_generic_upcall(struct file *, struct rpc_pipe_msg *, + char __user *, size_t); + extern int rpc_queue_upcall(struct rpc_pipe *, struct rpc_pipe_msg *); + ++/* returns true if the msg is in-flight, i.e., already eaten by the peer */ ++static inline bool rpc_msg_is_inflight(const struct rpc_pipe_msg *msg) { ++ return (msg->copied != 0 && list_empty(&msg->list)); ++} ++ + struct rpc_clnt; + extern struct dentry *rpc_create_client_dir(struct dentry *, const char *, struct rpc_clnt *); + extern int rpc_remove_client_dir(struct rpc_clnt *); +diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h +index 064c96157d1f0..045f34add206f 100644 +--- a/include/linux/sunrpc/svc.h ++++ b/include/linux/sunrpc/svc.h +@@ -384,6 +384,7 @@ struct svc_deferred_req { + size_t addrlen; + struct sockaddr_storage daddr; /* where reply must come from */ + size_t daddrlen; ++ void *xprt_ctxt; + struct cache_deferred_req handle; + size_t xprt_hlen; + int argslen; +@@ -532,8 +533,7 @@ int svc_encode_result_payload(struct svc_rqst *rqstp, + unsigned int offset, + unsigned int length); + unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, +- struct page **pages, +- struct kvec *first, size_t total); ++ struct xdr_buf *payload); + char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, + struct kvec *first, void *p, + size_t total); +@@ -566,16 +566,27 @@ static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space) + } + + /** +- * svcxdr_init_decode - Prepare an xdr_stream for svc Call decoding ++ * svcxdr_init_decode - Prepare an xdr_stream for Call decoding + * @rqstp: controlling server RPC transaction context + * ++ * This function currently assumes the RPC header in rq_arg has ++ * already been decoded. Upon return, xdr->p points to the ++ * location of the upper layer header. + */ + static inline void svcxdr_init_decode(struct svc_rqst *rqstp) + { + struct xdr_stream *xdr = &rqstp->rq_arg_stream; +- struct kvec *argv = rqstp->rq_arg.head; ++ struct xdr_buf *buf = &rqstp->rq_arg; ++ struct kvec *argv = buf->head; + +- xdr_init_decode(xdr, &rqstp->rq_arg, argv->iov_base, NULL); ++ /* ++ * svc_getnl() and friends do not keep the xdr_buf's ::len ++ * field up to date. Refresh that field before initializing ++ * the argument decoding stream. ++ */ ++ buf->len = buf->head->iov_len + buf->page_len + buf->tail->iov_len; ++ ++ xdr_init_decode(xdr, buf, argv->iov_base, NULL); + xdr_set_scratch_page(xdr, rqstp->rq_scratch_page); + } + +@@ -598,7 +609,7 @@ static inline void svcxdr_init_encode(struct svc_rqst *rqstp) + xdr->end = resv->iov_base + PAGE_SIZE - rqstp->rq_auth_slack; + buf->len = resv->iov_len; + xdr->page_ptr = buf->pages - 1; +- buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages); ++ buf->buflen = PAGE_SIZE * (rqstp->rq_page_end - buf->pages); + buf->buflen -= rqstp->rq_auth_slack; + xdr->rqst = NULL; + } +diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h +index b519609af1d02..3a2c714d6b629 100644 +--- a/include/linux/sunrpc/xdr.h ++++ b/include/linux/sunrpc/xdr.h +@@ -405,8 +405,8 @@ static inline int xdr_stream_encode_item_absent(struct xdr_stream *xdr) + */ + static inline __be32 *xdr_encode_bool(__be32 *p, u32 n) + { +- *p = n ? xdr_one : xdr_zero; +- return p++; ++ *p++ = n ? xdr_one : xdr_zero; ++ return p; + } + + /** +@@ -731,6 +731,8 @@ xdr_stream_decode_uint32_array(struct xdr_stream *xdr, + + if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) + return -EBADMSG; ++ if (len > SIZE_MAX / sizeof(*p)) ++ return -EBADMSG; + p = xdr_inline_decode(xdr, len * sizeof(*p)); + if (unlikely(!p)) + return -EBADMSG; +diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h +index 8c2a712cb2420..689062afdd610 100644 +--- a/include/linux/sunrpc/xprtsock.h ++++ b/include/linux/sunrpc/xprtsock.h +@@ -89,5 +89,6 @@ struct sock_xprt { + #define XPRT_SOCK_WAKE_WRITE (5) + #define XPRT_SOCK_WAKE_PENDING (6) + #define XPRT_SOCK_WAKE_DISCONNECT (7) ++#define XPRT_SOCK_CONNECT_SENT (8) + + #endif /* _LINUX_SUNRPC_XPRTSOCK_H */ +diff --git a/include/linux/surface_aggregator/controller.h b/include/linux/surface_aggregator/controller.h +index 068e1982ad371..74bfdffaf7b0e 100644 +--- a/include/linux/surface_aggregator/controller.h ++++ b/include/linux/surface_aggregator/controller.h +@@ -792,8 +792,8 @@ enum ssam_event_mask { + #define SSAM_EVENT_REGISTRY_KIP \ + SSAM_EVENT_REGISTRY(SSAM_SSH_TC_KIP, 0x02, 0x27, 0x28) + +-#define SSAM_EVENT_REGISTRY_REG \ +- SSAM_EVENT_REGISTRY(SSAM_SSH_TC_REG, 0x02, 0x01, 0x02) ++#define SSAM_EVENT_REGISTRY_REG(tid)\ ++ SSAM_EVENT_REGISTRY(SSAM_SSH_TC_REG, tid, 0x01, 0x02) + + /** + * enum ssam_event_notifier_flags - Flags for event notifiers. +diff --git a/include/linux/suspend.h b/include/linux/suspend.h +index 8af13ba60c7e4..4bcd65679cee0 100644 +--- a/include/linux/suspend.h ++++ b/include/linux/suspend.h +@@ -430,15 +430,7 @@ struct platform_hibernation_ops { + + #ifdef CONFIG_HIBERNATION + /* kernel/power/snapshot.c */ +-extern void __register_nosave_region(unsigned long b, unsigned long e, int km); +-static inline void __init register_nosave_region(unsigned long b, unsigned long e) +-{ +- __register_nosave_region(b, e, 0); +-} +-static inline void __init register_nosave_region_late(unsigned long b, unsigned long e) +-{ +- __register_nosave_region(b, e, 1); +-} ++extern void register_nosave_region(unsigned long b, unsigned long e); + extern int swsusp_page_is_forbidden(struct page *); + extern void swsusp_set_page_free(struct page *); + extern void swsusp_unset_page_free(struct page *); +@@ -457,7 +449,6 @@ int pfn_is_nosave(unsigned long pfn); + int hibernate_quiet_exec(int (*func)(void *data), void *data); + #else /* CONFIG_HIBERNATION */ + static inline void register_nosave_region(unsigned long b, unsigned long e) {} +-static inline void register_nosave_region_late(unsigned long b, unsigned long e) {} + static inline int swsusp_page_is_forbidden(struct page *p) { return 0; } + static inline void swsusp_set_page_free(struct page *p) {} + static inline void swsusp_unset_page_free(struct page *p) {} +@@ -505,14 +496,14 @@ extern void ksys_sync_helper(void); + + /* drivers/base/power/wakeup.c */ + extern bool events_check_enabled; +-extern unsigned int pm_wakeup_irq; + extern suspend_state_t pm_suspend_target_state; + + extern bool pm_wakeup_pending(void); + extern void pm_system_wakeup(void); + extern void pm_system_cancel_wakeup(void); +-extern void pm_wakeup_clear(bool reset); ++extern void pm_wakeup_clear(unsigned int irq_number); + extern void pm_system_irq_wakeup(unsigned int irq_number); ++extern unsigned int pm_wakeup_irq(void); + extern bool pm_get_wakeup_count(unsigned int *count, bool block); + extern bool pm_save_wakeup_count(unsigned int count); + extern void pm_wakep_autosleep_enabled(bool set); +diff --git a/include/linux/swap.h b/include/linux/swap.h +index ba52f3a3478e3..4efd267e2937e 100644 +--- a/include/linux/swap.h ++++ b/include/linux/swap.h +@@ -378,7 +378,6 @@ extern void lru_cache_add_inactive_or_unevictable(struct page *page, + extern unsigned long zone_reclaimable_pages(struct zone *zone); + extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, + gfp_t gfp_mask, nodemask_t *mask); +-extern bool __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode); + extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, + unsigned long nr_pages, + gfp_t gfp_mask, +diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h +index b0cb2a9973f49..569272871375c 100644 +--- a/include/linux/swiotlb.h ++++ b/include/linux/swiotlb.h +@@ -45,7 +45,8 @@ extern void __init swiotlb_update_mem_attributes(void); + + phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys, + size_t mapping_size, size_t alloc_size, +- enum dma_data_direction dir, unsigned long attrs); ++ unsigned int alloc_aligned_mask, enum dma_data_direction dir, ++ unsigned long attrs); + + extern void swiotlb_tbl_unmap_single(struct device *hwdev, + phys_addr_t tlb_addr, +diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h +index 1fa2b69c6fc3d..47cf70c8eb93c 100644 +--- a/include/linux/sysctl.h ++++ b/include/linux/sysctl.h +@@ -38,9 +38,16 @@ struct ctl_table_header; + struct ctl_dir; + + /* Keep the same order as in fs/proc/proc_sysctl.c */ +-#define SYSCTL_ZERO ((void *)&sysctl_vals[0]) +-#define SYSCTL_ONE ((void *)&sysctl_vals[1]) +-#define SYSCTL_INT_MAX ((void *)&sysctl_vals[2]) ++#define SYSCTL_NEG_ONE ((void *)&sysctl_vals[0]) ++#define SYSCTL_ZERO ((void *)&sysctl_vals[1]) ++#define SYSCTL_ONE ((void *)&sysctl_vals[2]) ++#define SYSCTL_TWO ((void *)&sysctl_vals[3]) ++#define SYSCTL_FOUR ((void *)&sysctl_vals[4]) ++#define SYSCTL_ONE_HUNDRED ((void *)&sysctl_vals[5]) ++#define SYSCTL_TWO_HUNDRED ((void *)&sysctl_vals[6]) ++#define SYSCTL_ONE_THOUSAND ((void *)&sysctl_vals[7]) ++#define SYSCTL_THREE_THOUSAND ((void *)&sysctl_vals[8]) ++#define SYSCTL_INT_MAX ((void *)&sysctl_vals[9]) + + extern const int sysctl_vals[]; + +@@ -199,6 +206,9 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, + void unregister_sysctl_table(struct ctl_table_header * table); + + extern int sysctl_init(void); ++extern void __register_sysctl_init(const char *path, struct ctl_table *table, ++ const char *table_name); ++#define register_sysctl_init(path, table) __register_sysctl_init(path, table, #table) + void do_sysctl_args(void); + + extern int pwrsw_enabled; +diff --git a/include/linux/sysfb.h b/include/linux/sysfb.h +index b0dcfa26d07bd..8ba8b5be55675 100644 +--- a/include/linux/sysfb.h ++++ b/include/linux/sysfb.h +@@ -55,6 +55,18 @@ struct efifb_dmi_info { + int flags; + }; + ++#ifdef CONFIG_SYSFB ++ ++void sysfb_disable(void); ++ ++#else /* CONFIG_SYSFB */ ++ ++static inline void sysfb_disable(void) ++{ ++} ++ ++#endif /* CONFIG_SYSFB */ ++ + #ifdef CONFIG_EFI + + extern struct efifb_dmi_info efifb_dmi_list[]; +@@ -72,8 +84,8 @@ static inline void sysfb_apply_efi_quirks(struct platform_device *pd) + + bool sysfb_parse_mode(const struct screen_info *si, + struct simplefb_platform_data *mode); +-int sysfb_create_simplefb(const struct screen_info *si, +- const struct simplefb_platform_data *mode); ++struct platform_device *sysfb_create_simplefb(const struct screen_info *si, ++ const struct simplefb_platform_data *mode); + + #else /* CONFIG_SYSFB_SIMPLE */ + +@@ -83,10 +95,10 @@ static inline bool sysfb_parse_mode(const struct screen_info *si, + return false; + } + +-static inline int sysfb_create_simplefb(const struct screen_info *si, +- const struct simplefb_platform_data *mode) ++static inline struct platform_device *sysfb_create_simplefb(const struct screen_info *si, ++ const struct simplefb_platform_data *mode) + { +- return -EINVAL; ++ return ERR_PTR(-EINVAL); + } + + #endif /* CONFIG_SYSFB_SIMPLE */ +diff --git a/include/linux/tcp.h b/include/linux/tcp.h +index 48d8a363319e5..a7ebadf83c681 100644 +--- a/include/linux/tcp.h ++++ b/include/linux/tcp.h +@@ -265,7 +265,7 @@ struct tcp_sock { + u32 packets_out; /* Packets which are "in flight" */ + u32 retrans_out; /* Retransmitted packets out */ + u32 max_packets_out; /* max packets_out in last window */ +- u32 max_packets_seq; /* right edge of max_packets_out flight */ ++ u32 cwnd_usage_seq; /* right edge of cwnd usage tracking flight */ + + u16 urg_data; /* Saved octet of OOB data and control flags */ + u8 ecn_flags; /* ECN status bits. */ +diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h +index 3ebfea0781f10..38b701b7af4cf 100644 +--- a/include/linux/tee_drv.h ++++ b/include/linux/tee_drv.h +@@ -195,7 +195,7 @@ int tee_session_calc_client_uuid(uuid_t *uuid, u32 connection_method, + * @offset: offset of buffer in user space + * @pages: locked pages from userspace + * @num_pages: number of locked pages +- * @dmabuf: dmabuf used to for exporting to user space ++ * @refcount: reference counter + * @flags: defined by TEE_SHM_* in tee_drv.h + * @id: unique id of a shared memory object on this device + * +@@ -210,7 +210,7 @@ struct tee_shm { + unsigned int offset; + struct page **pages; + size_t num_pages; +- struct dma_buf *dmabuf; ++ refcount_t refcount; + u32 flags; + int id; + }; +@@ -582,4 +582,18 @@ struct tee_client_driver { + #define to_tee_client_driver(d) \ + container_of(d, struct tee_client_driver, driver) + ++/** ++ * teedev_open() - Open a struct tee_device ++ * @teedev: Device to open ++ * ++ * @return a pointer to struct tee_context on success or an ERR_PTR on failure. ++ */ ++struct tee_context *teedev_open(struct tee_device *teedev); ++ ++/** ++ * teedev_close_context() - closes a struct tee_context ++ * @ctx: The struct tee_context to close ++ */ ++void teedev_close_context(struct tee_context *ctx); ++ + #endif /*__TEE_DRV_H*/ +diff --git a/include/linux/thermal.h b/include/linux/thermal.h +index c314893970b35..b94314ed0c965 100644 +--- a/include/linux/thermal.h ++++ b/include/linux/thermal.h +@@ -92,6 +92,7 @@ struct thermal_cooling_device_ops { + struct thermal_cooling_device { + int id; + char *type; ++ unsigned long max_state; + struct device device; + struct device_node *np; + void *devdata; +@@ -113,7 +114,7 @@ struct thermal_cooling_device { + * @trip_hyst_attrs: attributes for trip points for sysfs: trip hysteresis + * @mode: current mode of this thermal zone + * @devdata: private pointer for device private data +- * @trips: number of trip points the thermal zone supports ++ * @num_trips: number of trip points the thermal zone supports + * @trips_disabled; bitmap for disabled trips + * @passive_delay_jiffies: number of jiffies to wait between polls when + * performing passive cooling. +@@ -153,7 +154,7 @@ struct thermal_zone_device { + struct thermal_attr *trip_hyst_attrs; + enum thermal_device_mode mode; + void *devdata; +- int trips; ++ int num_trips; + unsigned long trips_disabled; /* bitmap for disabled trips */ + unsigned long passive_delay_jiffies; + unsigned long polling_delay_jiffies; +diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h +index 93884086f3924..adc80e29168ea 100644 +--- a/include/linux/timerqueue.h ++++ b/include/linux/timerqueue.h +@@ -35,7 +35,7 @@ struct timerqueue_node *timerqueue_getnext(struct timerqueue_head *head) + { + struct rb_node *leftmost = rb_first_cached(&head->rb_root); + +- return rb_entry(leftmost, struct timerqueue_node, node); ++ return rb_entry_safe(leftmost, struct timerqueue_node, node); + } + + static inline void timerqueue_init(struct timerqueue_node *node) +diff --git a/include/linux/timex.h b/include/linux/timex.h +index 059b18eb1f1fa..3871b06bd302c 100644 +--- a/include/linux/timex.h ++++ b/include/linux/timex.h +@@ -62,6 +62,8 @@ + #include <linux/types.h> + #include <linux/param.h> + ++unsigned long random_get_entropy_fallback(void); ++ + #include <asm/timex.h> + + #ifndef random_get_entropy +@@ -74,8 +76,14 @@ + * + * By default we use get_cycles() for this purpose, but individual + * architectures may override this in their asm/timex.h header file. ++ * If a given arch does not have get_cycles(), then we fallback to ++ * using random_get_entropy_fallback(). + */ +-#define random_get_entropy() get_cycles() ++#ifdef get_cycles ++#define random_get_entropy() ((unsigned long)get_cycles()) ++#else ++#define random_get_entropy() random_get_entropy_fallback() ++#endif + #endif + + /* +diff --git a/include/linux/torture.h b/include/linux/torture.h +index 0910c5803f35a..24f58e50a94b8 100644 +--- a/include/linux/torture.h ++++ b/include/linux/torture.h +@@ -47,6 +47,14 @@ do { \ + } while (0) + void verbose_torout_sleep(void); + ++#define torture_init_error(firsterr) \ ++({ \ ++ int ___firsterr = (firsterr); \ ++ \ ++ WARN_ONCE(!IS_MODULE(CONFIG_RCU_TORTURE_TEST) && ___firsterr < 0, "Torture-test initialization failed with error code %d\n", ___firsterr); \ ++ ___firsterr < 0; \ ++}) ++ + /* Definitions for online/offline exerciser. */ + #ifdef CONFIG_HOTPLUG_CPU + int torture_num_online_cpus(void); +diff --git a/include/linux/tpm.h b/include/linux/tpm.h +index aa11fe323c56b..12d827734686d 100644 +--- a/include/linux/tpm.h ++++ b/include/linux/tpm.h +@@ -269,6 +269,7 @@ enum tpm2_cc_attrs { + #define TPM_VID_INTEL 0x8086 + #define TPM_VID_WINBOND 0x1050 + #define TPM_VID_STM 0x104A ++#define TPM_VID_ATML 0x1114 + + enum tpm_chip_flags { + TPM_CHIP_FLAG_TPM2 = BIT(1), +diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h +index 739ba9a03ec16..7d68a5cc58816 100644 +--- a/include/linux/tpm_eventlog.h ++++ b/include/linux/tpm_eventlog.h +@@ -157,7 +157,7 @@ struct tcg_algorithm_info { + * Return: size of the event on success, 0 on failure + */ + +-static inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event, ++static __always_inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event, + struct tcg_pcr_event *event_header, + bool do_mapping) + { +@@ -198,8 +198,8 @@ static inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event, + * The loop below will unmap these fields if the log is larger than + * one page, so save them here for reference: + */ +- count = READ_ONCE(event->count); +- event_type = READ_ONCE(event->event_type); ++ count = event->count; ++ event_type = event->event_type; + + /* Verify that it's the log header */ + if (event_header->pcr_idx != 0 || +diff --git a/include/linux/trace.h b/include/linux/trace.h +index bf169612ffe12..80ffda8717491 100644 +--- a/include/linux/trace.h ++++ b/include/linux/trace.h +@@ -2,8 +2,6 @@ + #ifndef _LINUX_TRACE_H + #define _LINUX_TRACE_H + +-#ifdef CONFIG_TRACING +- + #define TRACE_EXPORT_FUNCTION BIT(0) + #define TRACE_EXPORT_EVENT BIT(1) + #define TRACE_EXPORT_MARKER BIT(2) +@@ -28,11 +26,13 @@ struct trace_export { + int flags; + }; + ++struct trace_array; ++ ++#ifdef CONFIG_TRACING ++ + int register_ftrace_export(struct trace_export *export); + int unregister_ftrace_export(struct trace_export *export); + +-struct trace_array; +- + void trace_printk_init_buffers(void); + __printf(3, 4) + int trace_array_printk(struct trace_array *tr, unsigned long ip, +@@ -48,6 +48,38 @@ void osnoise_arch_unregister(void); + void osnoise_trace_irq_entry(int id); + void osnoise_trace_irq_exit(int id, const char *desc); + ++#else /* CONFIG_TRACING */ ++static inline int register_ftrace_export(struct trace_export *export) ++{ ++ return -EINVAL; ++} ++static inline int unregister_ftrace_export(struct trace_export *export) ++{ ++ return 0; ++} ++static inline void trace_printk_init_buffers(void) ++{ ++} ++static inline int trace_array_printk(struct trace_array *tr, unsigned long ip, ++ const char *fmt, ...) ++{ ++ return 0; ++} ++static inline int trace_array_init_printk(struct trace_array *tr) ++{ ++ return -EINVAL; ++} ++static inline void trace_array_put(struct trace_array *tr) ++{ ++} ++static inline struct trace_array *trace_array_get_by_name(const char *name) ++{ ++ return NULL; ++} ++static inline int trace_array_destroy(struct trace_array *tr) ++{ ++ return 0; ++} + #endif /* CONFIG_TRACING */ + + #endif /* _LINUX_TRACE_H */ +diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h +index 3e475eeb5a995..ff137179e0c30 100644 +--- a/include/linux/trace_events.h ++++ b/include/linux/trace_events.h +@@ -91,6 +91,7 @@ struct trace_iterator { + unsigned int temp_size; + char *fmt; /* modified format holder */ + unsigned int fmt_size; ++ long wait_index; + + /* trace_seq for __print_flags() and __print_symbolic() etc. */ + struct trace_seq tmp_seq; +@@ -673,7 +674,7 @@ struct trace_event_file { + + #define PERF_MAX_TRACE_SIZE 2048 + +-#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ ++#define MAX_FILTER_STR_VAL 256U /* Should handle KSYM_SYMBOL_LEN */ + + enum event_trigger_type { + ETT_NONE = (0), +diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h +index 32284992b31a0..1fb727b7b969a 100644 +--- a/include/linux/tty_flip.h ++++ b/include/linux/tty_flip.h +@@ -17,7 +17,6 @@ extern int tty_insert_flip_string_fixed_flag(struct tty_port *port, + extern int tty_prepare_flip_string(struct tty_port *port, + unsigned char **chars, size_t size); + extern void tty_flip_buffer_push(struct tty_port *port); +-void tty_schedule_flip(struct tty_port *port); + int __tty_insert_flip_char(struct tty_port *port, unsigned char ch, char flag); + + static inline int tty_insert_flip_char(struct tty_port *port, +diff --git a/include/linux/uacce.h b/include/linux/uacce.h +index 48e319f402751..9ce88c28b0a87 100644 +--- a/include/linux/uacce.h ++++ b/include/linux/uacce.h +@@ -70,6 +70,7 @@ enum uacce_q_state { + * @wait: wait queue head + * @list: index into uacce queues list + * @qfrs: pointer of qfr regions ++ * @mutex: protects queue state + * @state: queue state machine + * @pasid: pasid associated to the mm + * @handle: iommu_sva handle returned by iommu_sva_bind_device() +@@ -80,6 +81,7 @@ struct uacce_queue { + wait_queue_head_t wait; + struct list_head list; + struct uacce_qfile_region *qfrs[UACCE_MAX_REGION]; ++ struct mutex mutex; + enum uacce_q_state state; + u32 pasid; + struct iommu_sva *handle; +@@ -97,9 +99,9 @@ struct uacce_queue { + * @dev_id: id of the uacce device + * @cdev: cdev of the uacce + * @dev: dev of the uacce ++ * @mutex: protects uacce operation + * @priv: private pointer of the uacce + * @queues: list of queues +- * @queues_lock: lock for queues list + * @inode: core vfs + */ + struct uacce_device { +@@ -113,9 +115,9 @@ struct uacce_device { + u32 dev_id; + struct cdev *cdev; + struct device dev; ++ struct mutex mutex; + void *priv; + struct list_head queues; +- struct mutex queues_lock; + struct inode *inode; + }; + +diff --git a/include/linux/udp.h b/include/linux/udp.h +index ae66dadd85434..0727276e7538c 100644 +--- a/include/linux/udp.h ++++ b/include/linux/udp.h +@@ -75,6 +75,7 @@ struct udp_sock { + * For encapsulation sockets. + */ + int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); ++ void (*encap_err_rcv)(struct sock *sk, struct sk_buff *skb, unsigned int udp_offset); + int (*encap_err_lookup)(struct sock *sk, struct sk_buff *skb); + void (*encap_destroy)(struct sock *sk); + +diff --git a/include/linux/uio.h b/include/linux/uio.h +index 207101a9c5c32..6350354f97e90 100644 +--- a/include/linux/uio.h ++++ b/include/linux/uio.h +@@ -35,6 +35,7 @@ struct iov_iter_state { + + struct iov_iter { + u8 iter_type; ++ bool nofault; + bool data_source; + size_t iov_offset; + size_t count; +@@ -133,7 +134,8 @@ size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, + size_t bytes, struct iov_iter *i); + void iov_iter_advance(struct iov_iter *i, size_t bytes); + void iov_iter_revert(struct iov_iter *i, size_t bytes); +-int iov_iter_fault_in_readable(const struct iov_iter *i, size_t bytes); ++size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t bytes); ++size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t bytes); + size_t iov_iter_single_seg_count(const struct iov_iter *i); + size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i); +diff --git a/include/linux/usb.h b/include/linux/usb.h +index 7ccaa76a9a968..89f58d070470c 100644 +--- a/include/linux/usb.h ++++ b/include/linux/usb.h +@@ -575,6 +575,7 @@ struct usb3_lpm_parameters { + * @devaddr: device address, XHCI: assigned by HW, others: same as devnum + * @can_submit: URBs may be submitted + * @persist_enabled: USB_PERSIST enabled for this device ++ * @reset_in_progress: the device is being reset + * @have_langid: whether string_langid is valid + * @authorized: policy has said we can use it; + * (user space) policy determines if we authorize this device to be +@@ -661,6 +662,7 @@ struct usb_device { + + unsigned can_submit:1; + unsigned persist_enabled:1; ++ unsigned reset_in_progress:1; + unsigned have_langid:1; + unsigned authorized:1; + unsigned authenticated:1; +@@ -747,11 +749,14 @@ extern struct device *usb_intf_get_dma_device(struct usb_interface *intf); + extern int usb_acpi_set_power_state(struct usb_device *hdev, int index, + bool enable); + extern bool usb_acpi_power_manageable(struct usb_device *hdev, int index); ++extern int usb_acpi_port_lpm_incapable(struct usb_device *hdev, int index); + #else + static inline int usb_acpi_set_power_state(struct usb_device *hdev, int index, + bool enable) { return 0; } + static inline bool usb_acpi_power_manageable(struct usb_device *hdev, int index) + { return true; } ++static inline int usb_acpi_port_lpm_incapable(struct usb_device *hdev, int index) ++ { return 0; } + #endif + + /* USB autosuspend and autoresume */ +diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h +index 2c1fc9212cf28..98d1921f02b1e 100644 +--- a/include/linux/usb/hcd.h ++++ b/include/linux/usb/hcd.h +@@ -66,6 +66,7 @@ + + struct giveback_urb_bh { + bool running; ++ bool high_prio; + spinlock_t lock; + struct list_head head; + struct tasklet_struct bh; +diff --git a/include/linux/usb/role.h b/include/linux/usb/role.h +index 031f148ab3734..b5deafd91f67b 100644 +--- a/include/linux/usb/role.h ++++ b/include/linux/usb/role.h +@@ -91,6 +91,12 @@ fwnode_usb_role_switch_get(struct fwnode_handle *node) + + static inline void usb_role_switch_put(struct usb_role_switch *sw) { } + ++static inline struct usb_role_switch * ++usb_role_switch_find_by_fwnode(const struct fwnode_handle *fwnode) ++{ ++ return NULL; ++} ++ + static inline struct usb_role_switch * + usb_role_switch_register(struct device *parent, + const struct usb_role_switch_desc *desc) +diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h +index e2e44bb1dad85..c1e5910809add 100644 +--- a/include/linux/usb/typec.h ++++ b/include/linux/usb/typec.h +@@ -295,6 +295,9 @@ int typec_set_mode(struct typec_port *port, int mode); + + void *typec_get_drvdata(struct typec_port *port); + ++int typec_get_fw_cap(struct typec_capability *cap, ++ struct fwnode_handle *fwnode); ++ + int typec_find_pwr_opmode(const char *name); + int typec_find_orientation(const char *name); + int typec_find_port_power_role(const char *name); +diff --git a/include/linux/usb/typec_dp.h b/include/linux/usb/typec_dp.h +index cfb916cccd316..8d09c2f0a9b80 100644 +--- a/include/linux/usb/typec_dp.h ++++ b/include/linux/usb/typec_dp.h +@@ -73,6 +73,11 @@ enum { + #define DP_CAP_USB BIT(7) + #define DP_CAP_DFP_D_PIN_ASSIGN(_cap_) (((_cap_) & GENMASK(15, 8)) >> 8) + #define DP_CAP_UFP_D_PIN_ASSIGN(_cap_) (((_cap_) & GENMASK(23, 16)) >> 16) ++/* Get pin assignment taking plug & receptacle into consideration */ ++#define DP_CAP_PIN_ASSIGN_UFP_D(_cap_) ((_cap_ & DP_CAP_RECEPTACLE) ? \ ++ DP_CAP_UFP_D_PIN_ASSIGN(_cap_) : DP_CAP_DFP_D_PIN_ASSIGN(_cap_)) ++#define DP_CAP_PIN_ASSIGN_DFP_D(_cap_) ((_cap_ & DP_CAP_RECEPTACLE) ? \ ++ DP_CAP_DFP_D_PIN_ASSIGN(_cap_) : DP_CAP_UFP_D_PIN_ASSIGN(_cap_)) + + /* DisplayPort Status Update VDO bits */ + #define DP_STATUS_CONNECTION(_status_) ((_status_) & 3) +diff --git a/include/linux/util_macros.h b/include/linux/util_macros.h +index 72299f261b253..43db6e47503c7 100644 +--- a/include/linux/util_macros.h ++++ b/include/linux/util_macros.h +@@ -38,4 +38,16 @@ + */ + #define find_closest_descending(x, a, as) __find_closest(x, a, as, >=) + ++/** ++ * is_insidevar - check if the @ptr points inside the @var memory range. ++ * @ptr: the pointer to a memory address. ++ * @var: the variable which address and size identify the memory range. ++ * ++ * Evaluates to true if the address in @ptr lies within the memory ++ * range allocated to @var. ++ */ ++#define is_insidevar(ptr, var) \ ++ ((uintptr_t)(ptr) >= (uintptr_t)(var) && \ ++ (uintptr_t)(ptr) < (uintptr_t)(var) + sizeof(var)) ++ + #endif +diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h +index ef9a44b6cf5d5..6e5db4edc3359 100644 +--- a/include/linux/vfio_pci_core.h ++++ b/include/linux/vfio_pci_core.h +@@ -133,6 +133,8 @@ struct vfio_pci_core_device { + struct mutex ioeventfds_lock; + struct list_head ioeventfds_list; + struct vfio_pci_vf_token *vf_token; ++ struct list_head sriov_pfs_item; ++ struct vfio_pci_core_device *sriov_pf_core_dev; + struct notifier_block nb; + struct mutex vma_lock; + struct list_head vma_list; +@@ -159,8 +161,17 @@ extern ssize_t vfio_pci_config_rw(struct vfio_pci_core_device *vdev, + extern ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, + size_t count, loff_t *ppos, bool iswrite); + ++#ifdef CONFIG_VFIO_PCI_VGA + extern ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf, + size_t count, loff_t *ppos, bool iswrite); ++#else ++static inline ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, ++ char __user *buf, size_t count, ++ loff_t *ppos, bool iswrite) ++{ ++ return -EINVAL; ++} ++#endif + + extern long vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset, + uint64_t data, int count, int fd); +diff --git a/include/linux/virtio.h b/include/linux/virtio.h +index 41edbc01ffa40..1af8d65d4c8f7 100644 +--- a/include/linux/virtio.h ++++ b/include/linux/virtio.h +@@ -133,7 +133,6 @@ bool is_virtio_device(struct device *dev); + void virtio_break_device(struct virtio_device *dev); + + void virtio_config_changed(struct virtio_device *dev); +-int virtio_finalize_features(struct virtio_device *dev); + #ifdef CONFIG_PM_SLEEP + int virtio_device_freeze(struct virtio_device *dev); + int virtio_device_restore(struct virtio_device *dev); +diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h +index 8519b3ae5d52e..b341dd62aa4da 100644 +--- a/include/linux/virtio_config.h ++++ b/include/linux/virtio_config.h +@@ -62,8 +62,9 @@ struct virtio_shm_region { + * Returns the first 64 feature bits (all we currently need). + * @finalize_features: confirm what device features we'll be using. + * vdev: the virtio_device +- * This gives the final feature bits for the device: it can change ++ * This sends the driver feature bits to the device: it can change + * the dev->feature bits if it wants. ++ * Note: despite the name this can be called any number of times. + * Returns 0 on success or error status + * @bus_name: return the bus name associated with the device (optional) + * vdev: the virtio_device +diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h +index b465f8f3e554f..a960de68ac69e 100644 +--- a/include/linux/virtio_net.h ++++ b/include/linux/virtio_net.h +@@ -7,9 +7,27 @@ + #include <uapi/linux/udp.h> + #include <uapi/linux/virtio_net.h> + ++static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type) ++{ ++ switch (gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { ++ case VIRTIO_NET_HDR_GSO_TCPV4: ++ return protocol == cpu_to_be16(ETH_P_IP); ++ case VIRTIO_NET_HDR_GSO_TCPV6: ++ return protocol == cpu_to_be16(ETH_P_IPV6); ++ case VIRTIO_NET_HDR_GSO_UDP: ++ return protocol == cpu_to_be16(ETH_P_IP) || ++ protocol == cpu_to_be16(ETH_P_IPV6); ++ default: ++ return false; ++ } ++} ++ + static inline int virtio_net_hdr_set_proto(struct sk_buff *skb, + const struct virtio_net_hdr *hdr) + { ++ if (skb->protocol) ++ return 0; ++ + switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + case VIRTIO_NET_HDR_GSO_TCPV4: + case VIRTIO_NET_HDR_GSO_UDP: +@@ -88,9 +106,12 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, + if (!skb->protocol) { + __be16 protocol = dev_parse_header_protocol(skb); + +- virtio_net_hdr_set_proto(skb, hdr); +- if (protocol && protocol != skb->protocol) ++ if (!protocol) ++ virtio_net_hdr_set_proto(skb, hdr); ++ else if (!virtio_net_hdr_match_proto(protocol, hdr->gso_type)) + return -EINVAL; ++ else ++ skb->protocol = protocol; + } + retry: + if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, +@@ -120,10 +141,15 @@ retry: + + if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { + u16 gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size); ++ unsigned int nh_off = p_off; + struct skb_shared_info *shinfo = skb_shinfo(skb); + ++ /* UFO may not include transport header in gso_size. */ ++ if (gso_type & SKB_GSO_UDP) ++ nh_off -= thlen; ++ + /* Too small packets are not really GSO ones. */ +- if (skb->len - p_off > gso_size) { ++ if (skb->len - nh_off > gso_size) { + shinfo->gso_size = gso_size; + shinfo->gso_type = gso_type; + +diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h +index 671d402c3778f..5535be1012a28 100644 +--- a/include/linux/vmalloc.h ++++ b/include/linux/vmalloc.h +@@ -28,6 +28,13 @@ struct notifier_block; /* in notifier.h */ + #define VM_MAP_PUT_PAGES 0x00000200 /* put pages and free array in vfree */ + #define VM_NO_HUGE_VMAP 0x00000400 /* force PAGE_SIZE pte mapping */ + ++#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ ++ !defined(CONFIG_KASAN_VMALLOC) ++#define VM_DEFER_KMEMLEAK 0x00000800 /* defer kmemleak object creation */ ++#else ++#define VM_DEFER_KMEMLEAK 0 ++#endif ++ + /* + * VM_KASAN is used slightly differently depending on CONFIG_KASAN_VMALLOC. + * +@@ -152,6 +159,11 @@ void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, + int node, const void *caller); + void *vmalloc_no_huge(unsigned long size); + ++extern void *__vmalloc_array(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2); ++extern void *vmalloc_array(size_t n, size_t size) __alloc_size(1, 2); ++extern void *__vcalloc(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2); ++extern void *vcalloc(size_t n, size_t size) __alloc_size(1, 2); ++ + extern void vfree(const void *addr); + extern void vfree_atomic(const void *addr); + +diff --git a/include/linux/wait.h b/include/linux/wait.h +index 93dab0e9580f8..21044562aab74 100644 +--- a/include/linux/wait.h ++++ b/include/linux/wait.h +@@ -217,6 +217,7 @@ void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void + void __wake_up_locked_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); + void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr); + void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode); ++void __wake_up_pollfree(struct wait_queue_head *wq_head); + + #define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL) + #define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL) +@@ -245,6 +246,31 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode); + #define wake_up_interruptible_sync_poll_locked(x, m) \ + __wake_up_locked_sync_key((x), TASK_INTERRUPTIBLE, poll_to_key(m)) + ++/** ++ * wake_up_pollfree - signal that a polled waitqueue is going away ++ * @wq_head: the wait queue head ++ * ++ * In the very rare cases where a ->poll() implementation uses a waitqueue whose ++ * lifetime is tied to a task rather than to the 'struct file' being polled, ++ * this function must be called before the waitqueue is freed so that ++ * non-blocking polls (e.g. epoll) are notified that the queue is going away. ++ * ++ * The caller must also RCU-delay the freeing of the wait_queue_head, e.g. via ++ * an explicit synchronize_rcu() or call_rcu(), or via SLAB_TYPESAFE_BY_RCU. ++ */ ++static inline void wake_up_pollfree(struct wait_queue_head *wq_head) ++{ ++ /* ++ * For performance reasons, we don't always take the queue lock here. ++ * Therefore, we might race with someone removing the last entry from ++ * the queue, and proceed while they still hold the queue lock. ++ * However, rcu_read_lock() is required to be held in such cases, so we ++ * can safely proceed with an RCU-delayed free. ++ */ ++ if (waitqueue_active(wq_head)) ++ __wake_up_pollfree(wq_head); ++} ++ + #define ___wait_cond_timeout(condition) \ + ({ \ + bool __cond = (condition); \ +@@ -518,10 +544,11 @@ do { \ + \ + hrtimer_init_sleeper_on_stack(&__t, CLOCK_MONOTONIC, \ + HRTIMER_MODE_REL); \ +- if ((timeout) != KTIME_MAX) \ +- hrtimer_start_range_ns(&__t.timer, timeout, \ +- current->timer_slack_ns, \ +- HRTIMER_MODE_REL); \ ++ if ((timeout) != KTIME_MAX) { \ ++ hrtimer_set_expires_range_ns(&__t.timer, timeout, \ ++ current->timer_slack_ns); \ ++ hrtimer_sleeper_start_expires(&__t, HRTIMER_MODE_REL); \ ++ } \ + \ + __ret = ___wait_event(wq_head, condition, state, 0, 0, \ + if (!__t.task) { \ +diff --git a/include/linux/watch_queue.h b/include/linux/watch_queue.h +index c994d1b2cdbaa..3b9a40ae8bdba 100644 +--- a/include/linux/watch_queue.h ++++ b/include/linux/watch_queue.h +@@ -28,7 +28,8 @@ struct watch_type_filter { + struct watch_filter { + union { + struct rcu_head rcu; +- unsigned long type_filter[2]; /* Bitmask of accepted types */ ++ /* Bitmask of accepted types */ ++ DECLARE_BITMAP(type_filter, WATCH_TYPE__NR); + }; + u32 nr_filters; /* Number of filters */ + struct watch_type_filter filters[]; +diff --git a/include/linux/wireless.h b/include/linux/wireless.h +index 2d1b54556eff4..e6e34d74dda04 100644 +--- a/include/linux/wireless.h ++++ b/include/linux/wireless.h +@@ -26,7 +26,15 @@ struct compat_iw_point { + struct __compat_iw_event { + __u16 len; /* Real length of this stuff */ + __u16 cmd; /* Wireless IOCTL */ +- compat_caddr_t pointer; ++ ++ union { ++ compat_caddr_t pointer; ++ ++ /* we need ptr_bytes to make memcpy() run-time destination ++ * buffer bounds checking happy, nothing special ++ */ ++ DECLARE_FLEX_ARRAY(__u8, ptr_bytes); ++ }; + }; + #define IW_EV_COMPAT_LCP_LEN offsetof(struct __compat_iw_event, pointer) + #define IW_EV_COMPAT_POINT_OFF offsetof(struct compat_iw_point, length) +diff --git a/include/media/cec.h b/include/media/cec.h +index 208c9613c07eb..77346f757036d 100644 +--- a/include/media/cec.h ++++ b/include/media/cec.h +@@ -26,13 +26,17 @@ + * @dev: cec device + * @cdev: cec character device + * @minor: device node minor number ++ * @lock: lock to serialize open/release and registration + * @registered: the device was correctly registered + * @unregistered: the device was unregistered ++ * @lock_fhs: lock to control access to @fhs + * @fhs: the list of open filehandles (cec_fh) +- * @lock: lock to control access to this structure + * + * This structure represents a cec-related device node. + * ++ * To add or remove filehandles from @fhs the @lock must be taken first, ++ * followed by @lock_fhs. It is safe to access @fhs if either lock is held. ++ * + * The @parent is a physical device. It must be set by core or device drivers + * before registering the node. + */ +@@ -43,10 +47,13 @@ struct cec_devnode { + + /* device info */ + int minor; ++ /* serialize open/release and registration */ ++ struct mutex lock; + bool registered; + bool unregistered; ++ /* protect access to fhs */ ++ struct mutex lock_fhs; + struct list_head fhs; +- struct mutex lock; + }; + + struct cec_adapter; +diff --git a/include/media/dvbdev.h b/include/media/dvbdev.h +index 2f6b0861322ae..ac60c9fcfe9a6 100644 +--- a/include/media/dvbdev.h ++++ b/include/media/dvbdev.h +@@ -126,6 +126,7 @@ struct dvb_adapter { + * struct dvb_device - represents a DVB device node + * + * @list_head: List head with all DVB devices ++ * @ref: reference counter + * @fops: pointer to struct file_operations + * @adapter: pointer to the adapter that holds this device node + * @type: type of the device, as defined by &enum dvb_device_type. +@@ -156,6 +157,7 @@ struct dvb_adapter { + */ + struct dvb_device { + struct list_head list_head; ++ struct kref ref; + const struct file_operations *fops; + struct dvb_adapter *adapter; + enum dvb_device_type type; +@@ -187,6 +189,20 @@ struct dvb_device { + void *priv; + }; + ++/** ++ * dvb_device_get - Increase dvb_device reference ++ * ++ * @dvbdev: pointer to struct dvb_device ++ */ ++struct dvb_device *dvb_device_get(struct dvb_device *dvbdev); ++ ++/** ++ * dvb_device_put - Decrease dvb_device reference ++ * ++ * @dvbdev: pointer to struct dvb_device ++ */ ++void dvb_device_put(struct dvb_device *dvbdev); ++ + /** + * dvb_register_adapter - Registers a new DVB adapter + * +@@ -231,29 +247,17 @@ int dvb_register_device(struct dvb_adapter *adap, + /** + * dvb_remove_device - Remove a registered DVB device + * +- * This does not free memory. To do that, call dvb_free_device(). ++ * This does not free memory. dvb_free_device() will do that when ++ * reference counter is empty + * + * @dvbdev: pointer to struct dvb_device + */ + void dvb_remove_device(struct dvb_device *dvbdev); + +-/** +- * dvb_free_device - Free memory occupied by a DVB device. +- * +- * Call dvb_unregister_device() before calling this function. +- * +- * @dvbdev: pointer to struct dvb_device +- */ +-void dvb_free_device(struct dvb_device *dvbdev); + + /** + * dvb_unregister_device - Unregisters a DVB device + * +- * This is a combination of dvb_remove_device() and dvb_free_device(). +- * Using this function is usually a mistake, and is often an indicator +- * for a use-after-free bug (when a userspace process keeps a file +- * handle to a detached device). +- * + * @dvbdev: pointer to struct dvb_device + */ + void dvb_unregister_device(struct dvb_device *dvbdev); +diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h +index 3eb202259e8cc..5e25a098e8ce4 100644 +--- a/include/media/v4l2-common.h ++++ b/include/media/v4l2-common.h +@@ -175,7 +175,8 @@ struct v4l2_subdev *v4l2_i2c_new_subdev_board(struct v4l2_device *v4l2_dev, + * + * @sd: pointer to &struct v4l2_subdev + * @client: pointer to struct i2c_client +- * @devname: the name of the device; if NULL, the I²C device's name will be used ++ * @devname: the name of the device; if NULL, the I²C device drivers's name ++ * will be used + * @postfix: sub-device specific string to put right after the I²C device name; + * may be NULL + */ +diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h +index 95ec18c2f49ce..9a476f902c425 100644 +--- a/include/media/v4l2-subdev.h ++++ b/include/media/v4l2-subdev.h +@@ -995,6 +995,8 @@ v4l2_subdev_get_try_format(struct v4l2_subdev *sd, + struct v4l2_subdev_state *state, + unsigned int pad) + { ++ if (WARN_ON(!state)) ++ return NULL; + if (WARN_ON(pad >= sd->entity.num_pads)) + pad = 0; + return &state->pads[pad].try_fmt; +@@ -1013,6 +1015,8 @@ v4l2_subdev_get_try_crop(struct v4l2_subdev *sd, + struct v4l2_subdev_state *state, + unsigned int pad) + { ++ if (WARN_ON(!state)) ++ return NULL; + if (WARN_ON(pad >= sd->entity.num_pads)) + pad = 0; + return &state->pads[pad].try_crop; +@@ -1031,6 +1035,8 @@ v4l2_subdev_get_try_compose(struct v4l2_subdev *sd, + struct v4l2_subdev_state *state, + unsigned int pad) + { ++ if (WARN_ON(!state)) ++ return NULL; + if (WARN_ON(pad >= sd->entity.num_pads)) + pad = 0; + return &state->pads[pad].try_compose; +diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h +index 12955cb460d23..3b5986cee0739 100644 +--- a/include/media/videobuf2-core.h ++++ b/include/media/videobuf2-core.h +@@ -46,6 +46,7 @@ enum vb2_memory { + + struct vb2_fileio_data; + struct vb2_threadio_data; ++struct vb2_buffer; + + /** + * struct vb2_mem_ops - memory handling/memory allocator operations. +@@ -53,10 +54,8 @@ struct vb2_threadio_data; + * return ERR_PTR() on failure or a pointer to allocator private, + * per-buffer data on success; the returned private structure + * will then be passed as @buf_priv argument to other ops in this +- * structure. Additional gfp_flags to use when allocating the +- * are also passed to this operation. These flags are from the +- * gfp_flags field of vb2_queue. The size argument to this function +- * shall be *page aligned*. ++ * structure. The size argument to this function shall be ++ * *page aligned*. + * @put: inform the allocator that the buffer will no longer be used; + * usually will result in the allocator freeing the buffer (if + * no other users of this buffer are present); the @buf_priv +@@ -117,31 +116,33 @@ struct vb2_threadio_data; + * map_dmabuf, unmap_dmabuf. + */ + struct vb2_mem_ops { +- void *(*alloc)(struct device *dev, unsigned long attrs, +- unsigned long size, +- enum dma_data_direction dma_dir, +- gfp_t gfp_flags); ++ void *(*alloc)(struct vb2_buffer *vb, ++ struct device *dev, ++ unsigned long size); + void (*put)(void *buf_priv); +- struct dma_buf *(*get_dmabuf)(void *buf_priv, unsigned long flags); +- +- void *(*get_userptr)(struct device *dev, unsigned long vaddr, +- unsigned long size, +- enum dma_data_direction dma_dir); ++ struct dma_buf *(*get_dmabuf)(struct vb2_buffer *vb, ++ void *buf_priv, ++ unsigned long flags); ++ ++ void *(*get_userptr)(struct vb2_buffer *vb, ++ struct device *dev, ++ unsigned long vaddr, ++ unsigned long size); + void (*put_userptr)(void *buf_priv); + + void (*prepare)(void *buf_priv); + void (*finish)(void *buf_priv); + +- void *(*attach_dmabuf)(struct device *dev, ++ void *(*attach_dmabuf)(struct vb2_buffer *vb, ++ struct device *dev, + struct dma_buf *dbuf, +- unsigned long size, +- enum dma_data_direction dma_dir); ++ unsigned long size); + void (*detach_dmabuf)(void *buf_priv); + int (*map_dmabuf)(void *buf_priv); + void (*unmap_dmabuf)(void *buf_priv); + +- void *(*vaddr)(void *buf_priv); +- void *(*cookie)(void *buf_priv); ++ void *(*vaddr)(struct vb2_buffer *vb, void *buf_priv); ++ void *(*cookie)(struct vb2_buffer *vb, void *buf_priv); + + unsigned int (*num_users)(void *buf_priv); + +diff --git a/include/memory/renesas-rpc-if.h b/include/memory/renesas-rpc-if.h +index e3e770f76f349..15dd0076c2936 100644 +--- a/include/memory/renesas-rpc-if.h ++++ b/include/memory/renesas-rpc-if.h +@@ -59,12 +59,14 @@ struct rpcif_op { + + struct rpcif { + struct device *dev; ++ void __iomem *base; + void __iomem *dirmap; + struct regmap *regmap; + struct reset_control *rstc; + size_t size; + enum rpcif_data_dir dir; + u8 bus_size; ++ u8 xfer_size; + void *buffer; + u32 xferlen; + u32 smcr; +diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h +index 03614de869425..6d0615140dbcf 100644 +--- a/include/net/9p/9p.h ++++ b/include/net/9p/9p.h +@@ -32,13 +32,13 @@ + */ + + enum p9_debug_flags { +- P9_DEBUG_ERROR = (1<<0), +- P9_DEBUG_9P = (1<<2), ++ P9_DEBUG_ERROR = (1<<0), ++ P9_DEBUG_9P = (1<<2), + P9_DEBUG_VFS = (1<<3), + P9_DEBUG_CONV = (1<<4), + P9_DEBUG_MUX = (1<<5), + P9_DEBUG_TRANS = (1<<6), +- P9_DEBUG_SLABS = (1<<7), ++ P9_DEBUG_SLABS = (1<<7), + P9_DEBUG_FCALL = (1<<8), + P9_DEBUG_FID = (1<<9), + P9_DEBUG_PKT = (1<<10), +@@ -317,8 +317,8 @@ enum p9_qid_t { + }; + + /* 9P Magic Numbers */ +-#define P9_NOTAG (u16)(~0) +-#define P9_NOFID (u32)(~0) ++#define P9_NOTAG ((u16)(~0)) ++#define P9_NOFID ((u32)(~0)) + #define P9_MAXWELEM 16 + + /* Minimal header size: size[4] type[1] tag[2] */ +diff --git a/include/net/9p/client.h b/include/net/9p/client.h +index e1c308d8d288e..7060de84c5593 100644 +--- a/include/net/9p/client.h ++++ b/include/net/9p/client.h +@@ -23,7 +23,7 @@ + * @p9_proto_2000L: 9P2000.L extension + */ + +-enum p9_proto_versions{ ++enum p9_proto_versions { + p9_proto_legacy, + p9_proto_2000u, + p9_proto_2000L, +@@ -78,7 +78,7 @@ enum p9_req_status_t { + struct p9_req_t { + int status; + int t_err; +- struct kref refcount; ++ refcount_t refcount; + wait_queue_head_t wq; + struct p9_fcall tc; + struct p9_fcall rc; +@@ -219,36 +219,40 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, + u64 request_mask); + + int p9_client_mknod_dotl(struct p9_fid *oldfid, const char *name, int mode, +- dev_t rdev, kgid_t gid, struct p9_qid *); ++ dev_t rdev, kgid_t gid, struct p9_qid *qid); + int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode, +- kgid_t gid, struct p9_qid *); ++ kgid_t gid, struct p9_qid *qid); + int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status); + int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl); + void p9_fcall_fini(struct p9_fcall *fc); +-struct p9_req_t *p9_tag_lookup(struct p9_client *, u16); ++struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag); + + static inline void p9_req_get(struct p9_req_t *r) + { +- kref_get(&r->refcount); ++ refcount_inc(&r->refcount); + } + + static inline int p9_req_try_get(struct p9_req_t *r) + { +- return kref_get_unless_zero(&r->refcount); ++ return refcount_inc_not_zero(&r->refcount); + } + +-int p9_req_put(struct p9_req_t *r); ++int p9_req_put(struct p9_client *c, struct p9_req_t *r); + + void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status); + +-int p9_parse_header(struct p9_fcall *, int32_t *, int8_t *, int16_t *, int); +-int p9stat_read(struct p9_client *, char *, int, struct p9_wstat *); +-void p9stat_free(struct p9_wstat *); ++int p9_parse_header(struct p9_fcall *pdu, int32_t *size, int8_t *type, ++ int16_t *tag, int rewind); ++int p9stat_read(struct p9_client *clnt, char *buf, int len, ++ struct p9_wstat *st); ++void p9stat_free(struct p9_wstat *stbuf); + + int p9_is_proto_dotu(struct p9_client *clnt); + int p9_is_proto_dotl(struct p9_client *clnt); +-struct p9_fid *p9_client_xattrwalk(struct p9_fid *, const char *, u64 *); +-int p9_client_xattrcreate(struct p9_fid *, const char *, u64, int); ++struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid, ++ const char *attr_name, u64 *attr_size); ++int p9_client_xattrcreate(struct p9_fid *fid, const char *name, ++ u64 attr_size, int flags); + int p9_client_readlink(struct p9_fid *fid, char **target); + + int p9_client_init(void); +diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h +index 3eb4261b29588..7215976116257 100644 +--- a/include/net/9p/transport.h ++++ b/include/net/9p/transport.h +@@ -40,14 +40,16 @@ struct p9_trans_module { + int maxsize; /* max message size of transport */ + int def; /* this transport should be default */ + struct module *owner; +- int (*create)(struct p9_client *, const char *, char *); +- void (*close) (struct p9_client *); +- int (*request) (struct p9_client *, struct p9_req_t *req); +- int (*cancel) (struct p9_client *, struct p9_req_t *req); +- int (*cancelled)(struct p9_client *, struct p9_req_t *req); +- int (*zc_request)(struct p9_client *, struct p9_req_t *, +- struct iov_iter *, struct iov_iter *, int , int, int); +- int (*show_options)(struct seq_file *, struct p9_client *); ++ int (*create)(struct p9_client *client, ++ const char *devname, char *args); ++ void (*close)(struct p9_client *client); ++ int (*request)(struct p9_client *client, struct p9_req_t *req); ++ int (*cancel)(struct p9_client *client, struct p9_req_t *req); ++ int (*cancelled)(struct p9_client *client, struct p9_req_t *req); ++ int (*zc_request)(struct p9_client *client, struct p9_req_t *req, ++ struct iov_iter *uidata, struct iov_iter *uodata, ++ int inlen, int outlen, int in_hdr_len); ++ int (*show_options)(struct seq_file *m, struct p9_client *client); + }; + + void v9fs_register_trans(struct p9_trans_module *m); +diff --git a/include/net/addrconf.h b/include/net/addrconf.h +index 78ea3e332688f..53627afab1044 100644 +--- a/include/net/addrconf.h ++++ b/include/net/addrconf.h +@@ -6,6 +6,8 @@ + #define RTR_SOLICITATION_INTERVAL (4*HZ) + #define RTR_SOLICITATION_MAX_INTERVAL (3600*HZ) /* 1 hour */ + ++#define MIN_VALID_LIFETIME (2*3600) /* 2 hours */ ++ + #define TEMP_VALID_LIFETIME (7*86400) + #define TEMP_PREFERRED_LIFETIME (86400) + #define REGEN_MAX_RETRY (3) +@@ -107,8 +109,6 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, + int ipv6_dev_get_saddr(struct net *net, const struct net_device *dev, + const struct in6_addr *daddr, unsigned int srcprefs, + struct in6_addr *saddr); +-int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, +- u32 banned_flags); + int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, + u32 banned_flags); + bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, +@@ -403,6 +403,9 @@ static inline bool ip6_ignore_linkdown(const struct net_device *dev) + { + const struct inet6_dev *idev = __in6_dev_get(dev); + ++ if (unlikely(!idev)) ++ return true; ++ + return !!idev->cnf.ignore_routes_with_linkdown; + } + +diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h +index ab207677e0a8b..f742e50207fbd 100644 +--- a/include/net/af_vsock.h ++++ b/include/net/af_vsock.h +@@ -205,7 +205,8 @@ struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); + struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, + struct sockaddr_vm *dst); + void vsock_remove_sock(struct vsock_sock *vsk); +-void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); ++void vsock_for_each_connected_socket(struct vsock_transport *transport, ++ void (*fn)(struct sock *sk)); + int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk); + bool vsock_find_cid(unsigned int cid); + +diff --git a/include/net/arp.h b/include/net/arp.h +index 4950191f6b2bf..4a23a97195f33 100644 +--- a/include/net/arp.h ++++ b/include/net/arp.h +@@ -71,6 +71,7 @@ void arp_send(int type, int ptype, __be32 dest_ip, + const unsigned char *src_hw, const unsigned char *th); + int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir); + void arp_ifdown(struct net_device *dev); ++int arp_invalidate(struct net_device *dev, __be32 ip, bool force); + + struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, + struct net_device *dev, __be32 src_ip, +diff --git a/include/net/ax25.h b/include/net/ax25.h +index 8b7eb46ad72d8..aadff553e4b73 100644 +--- a/include/net/ax25.h ++++ b/include/net/ax25.h +@@ -236,6 +236,7 @@ typedef struct ax25_dev { + #if defined(CONFIG_AX25_DAMA_SLAVE) || defined(CONFIG_AX25_DAMA_MASTER) + ax25_dama_info dama; + #endif ++ refcount_t refcount; + } ax25_dev; + + typedef struct ax25_cb { +@@ -290,6 +291,17 @@ static __inline__ void ax25_cb_put(ax25_cb *ax25) + } + } + ++static inline void ax25_dev_hold(ax25_dev *ax25_dev) ++{ ++ refcount_inc(&ax25_dev->refcount); ++} ++ ++static inline void ax25_dev_put(ax25_dev *ax25_dev) ++{ ++ if (refcount_dec_and_test(&ax25_dev->refcount)) { ++ kfree(ax25_dev); ++ } ++} + static inline __be16 ax25_type_trans(struct sk_buff *skb, struct net_device *dev) + { + skb->dev = dev; +diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h +index 9125effbf4483..355835639ae58 100644 +--- a/include/net/bluetooth/bluetooth.h ++++ b/include/net/bluetooth/bluetooth.h +@@ -180,19 +180,21 @@ void bt_err_ratelimited(const char *fmt, ...); + #define BT_DBG(fmt, ...) pr_debug(fmt "\n", ##__VA_ARGS__) + #endif + ++#define bt_dev_name(hdev) ((hdev) ? (hdev)->name : "null") ++ + #define bt_dev_info(hdev, fmt, ...) \ +- BT_INFO("%s: " fmt, (hdev)->name, ##__VA_ARGS__) ++ BT_INFO("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) + #define bt_dev_warn(hdev, fmt, ...) \ +- BT_WARN("%s: " fmt, (hdev)->name, ##__VA_ARGS__) ++ BT_WARN("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) + #define bt_dev_err(hdev, fmt, ...) \ +- BT_ERR("%s: " fmt, (hdev)->name, ##__VA_ARGS__) ++ BT_ERR("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) + #define bt_dev_dbg(hdev, fmt, ...) \ +- BT_DBG("%s: " fmt, (hdev)->name, ##__VA_ARGS__) ++ BT_DBG("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) + + #define bt_dev_warn_ratelimited(hdev, fmt, ...) \ +- bt_warn_ratelimited("%s: " fmt, (hdev)->name, ##__VA_ARGS__) ++ bt_warn_ratelimited("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) + #define bt_dev_err_ratelimited(hdev, fmt, ...) \ +- bt_err_ratelimited("%s: " fmt, (hdev)->name, ##__VA_ARGS__) ++ bt_err_ratelimited("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) + + /* Connection and socket states */ + enum { +@@ -420,6 +422,71 @@ out: + return NULL; + } + ++/* Shall not be called with lock_sock held */ ++static inline struct sk_buff *bt_skb_sendmsg(struct sock *sk, ++ struct msghdr *msg, ++ size_t len, size_t mtu, ++ size_t headroom, size_t tailroom) ++{ ++ struct sk_buff *skb; ++ size_t size = min_t(size_t, len, mtu); ++ int err; ++ ++ skb = bt_skb_send_alloc(sk, size + headroom + tailroom, ++ msg->msg_flags & MSG_DONTWAIT, &err); ++ if (!skb) ++ return ERR_PTR(err); ++ ++ skb_reserve(skb, headroom); ++ skb_tailroom_reserve(skb, mtu, tailroom); ++ ++ if (!copy_from_iter_full(skb_put(skb, size), size, &msg->msg_iter)) { ++ kfree_skb(skb); ++ return ERR_PTR(-EFAULT); ++ } ++ ++ skb->priority = sk->sk_priority; ++ ++ return skb; ++} ++ ++/* Similar to bt_skb_sendmsg but can split the msg into multiple fragments ++ * accourding to the MTU. ++ */ ++static inline struct sk_buff *bt_skb_sendmmsg(struct sock *sk, ++ struct msghdr *msg, ++ size_t len, size_t mtu, ++ size_t headroom, size_t tailroom) ++{ ++ struct sk_buff *skb, **frag; ++ ++ skb = bt_skb_sendmsg(sk, msg, len, mtu, headroom, tailroom); ++ if (IS_ERR_OR_NULL(skb)) ++ return skb; ++ ++ len -= skb->len; ++ if (!len) ++ return skb; ++ ++ /* Add remaining data over MTU as continuation fragments */ ++ frag = &skb_shinfo(skb)->frag_list; ++ while (len) { ++ struct sk_buff *tmp; ++ ++ tmp = bt_skb_sendmsg(sk, msg, len, mtu, headroom, tailroom); ++ if (IS_ERR(tmp)) { ++ return skb; ++ } ++ ++ len -= tmp->len; ++ ++ *frag = tmp; ++ frag = &(*frag)->next; ++ } ++ ++ return skb; ++} ++ + int bt_to_errno(u16 code); + + void hci_sock_set_flag(struct sock *sk, int nr); +diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h +index b80415011dcd5..9ce46cb8564d6 100644 +--- a/include/net/bluetooth/hci.h ++++ b/include/net/bluetooth/hci.h +@@ -246,6 +246,15 @@ enum { + * HCI after resume. + */ + HCI_QUIRK_NO_SUSPEND_NOTIFIER, ++ ++ /* ++ * When this quirk is set, LE tx power is not queried on startup ++ * and the min/max tx power values default to HCI_TX_POWER_INVALID. ++ * ++ * This quirk can be set before hci_register_dev is called or ++ * during the hdev->setup vendor callback. ++ */ ++ HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER, + }; + + /* HCI device flags */ +diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h +index a7360c8c72f82..3da5cfcf84c1d 100644 +--- a/include/net/bluetooth/hci_core.h ++++ b/include/net/bluetooth/hci_core.h +@@ -35,6 +35,9 @@ + /* HCI priority */ + #define HCI_PRIO_MAX 7 + ++/* HCI maximum id value */ ++#define HCI_MAX_ID 10000 ++ + /* HCI Core structures */ + struct inquiry_data { + bdaddr_t bdaddr; +diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h +index 3c4f550e5a8b7..2f766e3437ce2 100644 +--- a/include/net/bluetooth/l2cap.h ++++ b/include/net/bluetooth/l2cap.h +@@ -847,6 +847,7 @@ enum { + }; + + void l2cap_chan_hold(struct l2cap_chan *c); ++struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c); + void l2cap_chan_put(struct l2cap_chan *c); + + static inline void l2cap_chan_lock(struct l2cap_chan *chan) +diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h +index 38785d48baff9..f2273bd5a4c58 100644 +--- a/include/net/bond_3ad.h ++++ b/include/net/bond_3ad.h +@@ -15,8 +15,6 @@ + #define PKT_TYPE_LACPDU cpu_to_be16(ETH_P_SLOW) + #define AD_TIMER_INTERVAL 100 /*msec*/ + +-#define MULTICAST_LACPDU_ADDR {0x01, 0x80, 0xC2, 0x00, 0x00, 0x02} +- + #define AD_LACP_SLOW 0 + #define AD_LACP_FAST 1 + +@@ -262,7 +260,7 @@ struct ad_system { + struct ad_bond_info { + struct ad_system system; /* 802.3ad system structure */ + struct bond_3ad_stats stats; +- u32 agg_select_timer; /* Timer to select aggregator after all adapter's hand shakes */ ++ atomic_t agg_select_timer; /* Timer to select aggregator after all adapter's hand shakes */ + u16 aggregator_identifier; + }; + +diff --git a/include/net/bond_alb.h b/include/net/bond_alb.h +index f6af76c87a6c3..191c36afa1f4a 100644 +--- a/include/net/bond_alb.h ++++ b/include/net/bond_alb.h +@@ -126,7 +126,7 @@ struct tlb_slave_info { + struct alb_bond_info { + struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */ + u32 unbalanced_load; +- int tx_rebalance_counter; ++ atomic_t tx_rebalance_counter; + int lp_counter; + /* -------- rlb parameters -------- */ + int rlb_enabled; +diff --git a/include/net/bonding.h b/include/net/bonding.h +index 15e083e18f75f..8c18c6b01634c 100644 +--- a/include/net/bonding.h ++++ b/include/net/bonding.h +@@ -757,6 +757,9 @@ extern struct rtnl_link_ops bond_link_ops; + /* exported from bond_sysfs_slave.c */ + extern const struct sysfs_ops slave_sysfs_ops; + ++/* exported from bond_3ad.c */ ++extern const u8 lacpdu_mcast_addr[]; ++ + static inline netdev_tx_t bond_tx_drop(struct net_device *dev, struct sk_buff *skb) + { + atomic_long_inc(&dev->tx_dropped); +diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h +index 40296ed976a97..3459a04a3d61c 100644 +--- a/include/net/busy_poll.h ++++ b/include/net/busy_poll.h +@@ -33,7 +33,7 @@ extern unsigned int sysctl_net_busy_poll __read_mostly; + + static inline bool net_busy_loop_on(void) + { +- return sysctl_net_busy_poll; ++ return READ_ONCE(sysctl_net_busy_poll); + } + + static inline bool sk_can_busy_loop(const struct sock *sk) +diff --git a/include/net/checksum.h b/include/net/checksum.h +index 5b96d5bd6e545..d3b5d368a0caa 100644 +--- a/include/net/checksum.h ++++ b/include/net/checksum.h +@@ -22,7 +22,7 @@ + #include <asm/checksum.h> + + #ifndef _HAVE_ARCH_COPY_AND_CSUM_FROM_USER +-static inline ++static __always_inline + __wsum csum_and_copy_from_user (const void __user *src, void *dst, + int len) + { +@@ -33,7 +33,7 @@ __wsum csum_and_copy_from_user (const void __user *src, void *dst, + #endif + + #ifndef HAVE_CSUM_COPY_USER +-static __inline__ __wsum csum_and_copy_to_user ++static __always_inline __wsum csum_and_copy_to_user + (const void *src, void __user *dst, int len) + { + __wsum sum = csum_partial(src, len, ~0U); +@@ -45,7 +45,7 @@ static __inline__ __wsum csum_and_copy_to_user + #endif + + #ifndef _HAVE_ARCH_CSUM_AND_COPY +-static inline __wsum ++static __always_inline __wsum + csum_partial_copy_nocheck(const void *src, void *dst, int len) + { + memcpy(dst, src, len); +@@ -54,7 +54,7 @@ csum_partial_copy_nocheck(const void *src, void *dst, int len) + #endif + + #ifndef HAVE_ARCH_CSUM_ADD +-static inline __wsum csum_add(__wsum csum, __wsum addend) ++static __always_inline __wsum csum_add(__wsum csum, __wsum addend) + { + u32 res = (__force u32)csum; + res += (__force u32)addend; +@@ -62,12 +62,12 @@ static inline __wsum csum_add(__wsum csum, __wsum addend) + } + #endif + +-static inline __wsum csum_sub(__wsum csum, __wsum addend) ++static __always_inline __wsum csum_sub(__wsum csum, __wsum addend) + { + return csum_add(csum, ~addend); + } + +-static inline __sum16 csum16_add(__sum16 csum, __be16 addend) ++static __always_inline __sum16 csum16_add(__sum16 csum, __be16 addend) + { + u16 res = (__force u16)csum; + +@@ -75,12 +75,12 @@ static inline __sum16 csum16_add(__sum16 csum, __be16 addend) + return (__force __sum16)(res + (res < (__force u16)addend)); + } + +-static inline __sum16 csum16_sub(__sum16 csum, __be16 addend) ++static __always_inline __sum16 csum16_sub(__sum16 csum, __be16 addend) + { + return csum16_add(csum, ~addend); + } + +-static inline __wsum csum_shift(__wsum sum, int offset) ++static __always_inline __wsum csum_shift(__wsum sum, int offset) + { + /* rotate sum to align it with a 16b boundary */ + if (offset & 1) +@@ -88,42 +88,43 @@ static inline __wsum csum_shift(__wsum sum, int offset) + return sum; + } + +-static inline __wsum ++static __always_inline __wsum + csum_block_add(__wsum csum, __wsum csum2, int offset) + { + return csum_add(csum, csum_shift(csum2, offset)); + } + +-static inline __wsum ++static __always_inline __wsum + csum_block_add_ext(__wsum csum, __wsum csum2, int offset, int len) + { + return csum_block_add(csum, csum2, offset); + } + +-static inline __wsum ++static __always_inline __wsum + csum_block_sub(__wsum csum, __wsum csum2, int offset) + { + return csum_block_add(csum, ~csum2, offset); + } + +-static inline __wsum csum_unfold(__sum16 n) ++static __always_inline __wsum csum_unfold(__sum16 n) + { + return (__force __wsum)n; + } + +-static inline __wsum csum_partial_ext(const void *buff, int len, __wsum sum) ++static __always_inline ++__wsum csum_partial_ext(const void *buff, int len, __wsum sum) + { + return csum_partial(buff, len, sum); + } + + #define CSUM_MANGLED_0 ((__force __sum16)0xffff) + +-static inline void csum_replace_by_diff(__sum16 *sum, __wsum diff) ++static __always_inline void csum_replace_by_diff(__sum16 *sum, __wsum diff) + { + *sum = csum_fold(csum_add(diff, ~csum_unfold(*sum))); + } + +-static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) ++static __always_inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) + { + __wsum tmp = csum_sub(~csum_unfold(*sum), (__force __wsum)from); + +@@ -136,11 +137,16 @@ static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) + * m : old value of a 16bit field + * m' : new value of a 16bit field + */ +-static inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new) ++static __always_inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new) + { + *sum = ~csum16_add(csum16_sub(~(*sum), old), new); + } + ++static inline void csum_replace(__wsum *csum, __wsum old, __wsum new) ++{ ++ *csum = csum_add(csum_sub(*csum, old), new); ++} ++ + struct sk_buff; + void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, + __be32 from, __be32 to, bool pseudohdr); +@@ -150,16 +156,16 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, + void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, + __wsum diff, bool pseudohdr); + +-static inline void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb, +- __be16 from, __be16 to, +- bool pseudohdr) ++static __always_inline ++void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb, ++ __be16 from, __be16 to, bool pseudohdr) + { + inet_proto_csum_replace4(sum, skb, (__force __be32)from, + (__force __be32)to, pseudohdr); + } + +-static inline __wsum remcsum_adjust(void *ptr, __wsum csum, +- int start, int offset) ++static __always_inline __wsum remcsum_adjust(void *ptr, __wsum csum, ++ int start, int offset) + { + __sum16 *psum = (__sum16 *)(ptr + offset); + __wsum delta; +@@ -175,7 +181,7 @@ static inline __wsum remcsum_adjust(void *ptr, __wsum csum, + return delta; + } + +-static inline void remcsum_unadjust(__sum16 *psum, __wsum delta) ++static __always_inline void remcsum_unadjust(__sum16 *psum, __wsum delta) + { + *psum = csum_fold(csum_sub(delta, (__force __wsum)*psum)); + } +diff --git a/include/net/dst.h b/include/net/dst.h +index a057319aabefa..17697ec79949f 100644 +--- a/include/net/dst.h ++++ b/include/net/dst.h +@@ -361,9 +361,8 @@ static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev, + static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev, + struct net *net) + { +- /* TODO : stats should be SMP safe */ +- dev->stats.rx_packets++; +- dev->stats.rx_bytes += skb->len; ++ DEV_STATS_INC(dev, rx_packets); ++ DEV_STATS_ADD(dev, rx_bytes, skb->len); + __skb_tunnel_rx(skb, dev, net); + } + +diff --git a/include/net/dst_cache.h b/include/net/dst_cache.h +index 67634675e9197..df6622a5fe98f 100644 +--- a/include/net/dst_cache.h ++++ b/include/net/dst_cache.h +@@ -79,6 +79,17 @@ static inline void dst_cache_reset(struct dst_cache *dst_cache) + dst_cache->reset_ts = jiffies; + } + ++/** ++ * dst_cache_reset_now - invalidate the cache contents immediately ++ * @dst_cache: the cache ++ * ++ * The caller must be sure there are no concurrent users, as this frees ++ * all dst_cache users immediately, rather than waiting for the next ++ * per-cpu usage like dst_cache_reset does. Most callers should use the ++ * higher speed lazily-freed dst_cache_reset function instead. ++ */ ++void dst_cache_reset_now(struct dst_cache *dst_cache); ++ + /** + * dst_cache_init - initialize the cache, allocating the required storage + * @dst_cache: the cache +diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h +index 14efa0ded75dd..adab27ba1ecbf 100644 +--- a/include/net/dst_metadata.h ++++ b/include/net/dst_metadata.h +@@ -123,8 +123,20 @@ static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb) + + memcpy(&new_md->u.tun_info, &md_dst->u.tun_info, + sizeof(struct ip_tunnel_info) + md_size); ++#ifdef CONFIG_DST_CACHE ++ /* Unclone the dst cache if there is one */ ++ if (new_md->u.tun_info.dst_cache.cache) { ++ int ret; ++ ++ ret = dst_cache_init(&new_md->u.tun_info.dst_cache, GFP_ATOMIC); ++ if (ret) { ++ metadata_dst_free(new_md); ++ return ERR_PTR(ret); ++ } ++ } ++#endif ++ + skb_dst_drop(skb); +- dst_hold(&new_md->dst); + skb_dst_set(skb, &new_md->dst); + return new_md; + } +diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h +index 4b10676c69d19..bd07484ab9dd5 100644 +--- a/include/net/fib_rules.h ++++ b/include/net/fib_rules.h +@@ -69,7 +69,7 @@ struct fib_rules_ops { + int (*action)(struct fib_rule *, + struct flowi *, int, + struct fib_lookup_arg *); +- bool (*suppress)(struct fib_rule *, ++ bool (*suppress)(struct fib_rule *, int, + struct fib_lookup_arg *); + int (*match)(struct fib_rule *, + struct flowi *, int); +@@ -218,7 +218,9 @@ INDIRECT_CALLABLE_DECLARE(int fib4_rule_action(struct fib_rule *rule, + struct fib_lookup_arg *arg)); + + INDIRECT_CALLABLE_DECLARE(bool fib6_rule_suppress(struct fib_rule *rule, ++ int flags, + struct fib_lookup_arg *arg)); + INDIRECT_CALLABLE_DECLARE(bool fib4_rule_suppress(struct fib_rule *rule, ++ int flags, + struct fib_lookup_arg *arg)); + #endif +diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h +index ffd386ea0dbb3..c8d1c5e187e4b 100644 +--- a/include/net/flow_dissector.h ++++ b/include/net/flow_dissector.h +@@ -59,6 +59,8 @@ struct flow_dissector_key_vlan { + __be16 vlan_tci; + }; + __be16 vlan_tpid; ++ __be16 vlan_eth_type; ++ u16 padding; + }; + + struct flow_dissector_mpls_lse { +diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h +index 3961461d9c8bc..7a2b0223a02c7 100644 +--- a/include/net/flow_offload.h ++++ b/include/net/flow_offload.h +@@ -575,5 +575,6 @@ int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch, + enum tc_setup_type type, void *data, + struct flow_block_offload *bo, + void (*cleanup)(struct flow_block_cb *block_cb)); ++bool flow_indr_dev_exists(void); + + #endif /* _NET_FLOW_OFFLOAD_H */ +diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h +index d0d188c3294bd..03b64bf876a46 100644 +--- a/include/net/ieee802154_netdev.h ++++ b/include/net/ieee802154_netdev.h +@@ -15,6 +15,22 @@ + #ifndef IEEE802154_NETDEVICE_H + #define IEEE802154_NETDEVICE_H + ++#define IEEE802154_REQUIRED_SIZE(struct_type, member) \ ++ (offsetof(typeof(struct_type), member) + \ ++ sizeof(((typeof(struct_type) *)(NULL))->member)) ++ ++#define IEEE802154_ADDR_OFFSET \ ++ offsetof(typeof(struct sockaddr_ieee802154), addr) ++ ++#define IEEE802154_MIN_NAMELEN (IEEE802154_ADDR_OFFSET + \ ++ IEEE802154_REQUIRED_SIZE(struct ieee802154_addr_sa, addr_type)) ++ ++#define IEEE802154_NAMELEN_SHORT (IEEE802154_ADDR_OFFSET + \ ++ IEEE802154_REQUIRED_SIZE(struct ieee802154_addr_sa, short_addr)) ++ ++#define IEEE802154_NAMELEN_LONG (IEEE802154_ADDR_OFFSET + \ ++ IEEE802154_REQUIRED_SIZE(struct ieee802154_addr_sa, hwaddr)) ++ + #include <net/af_ieee802154.h> + #include <linux/netdevice.h> + #include <linux/skbuff.h> +@@ -165,6 +181,33 @@ static inline void ieee802154_devaddr_to_raw(void *raw, __le64 addr) + memcpy(raw, &temp, IEEE802154_ADDR_LEN); + } + ++static inline int ++ieee802154_sockaddr_check_size(struct sockaddr_ieee802154 *daddr, int len) ++{ ++ struct ieee802154_addr_sa *sa; ++ int ret = 0; ++ ++ sa = &daddr->addr; ++ if (len < IEEE802154_MIN_NAMELEN) ++ return -EINVAL; ++ switch (sa->addr_type) { ++ case IEEE802154_ADDR_NONE: ++ break; ++ case IEEE802154_ADDR_SHORT: ++ if (len < IEEE802154_NAMELEN_SHORT) ++ ret = -EINVAL; ++ break; ++ case IEEE802154_ADDR_LONG: ++ if (len < IEEE802154_NAMELEN_LONG) ++ ret = -EINVAL; ++ break; ++ default: ++ ret = -EINVAL; ++ break; ++ } ++ return ret; ++} ++ + static inline void ieee802154_addr_from_sa(struct ieee802154_addr *a, + const struct ieee802154_addr_sa *sa) + { +diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h +index 653e7d0f65cb7..8ec0878a90a7a 100644 +--- a/include/net/if_inet6.h ++++ b/include/net/if_inet6.h +@@ -64,6 +64,14 @@ struct inet6_ifaddr { + + struct hlist_node addr_lst; + struct list_head if_list; ++ /* ++ * Used to safely traverse idev->addr_list in process context ++ * if the idev->lock needed to protect idev->addr_list cannot be held. ++ * In that case, add the items to this list temporarily and iterate ++ * without holding idev->lock. ++ * See addrconf_ifdown and dev_forward_change. ++ */ ++ struct list_head if_list_aux; + + struct list_head tmp_list; + struct inet6_ifaddr *ifpub; +diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h +index 81b9659530368..56f1286583d3c 100644 +--- a/include/net/inet6_hashtables.h ++++ b/include/net/inet6_hashtables.h +@@ -103,15 +103,24 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, + const int dif); + + int inet6_hash(struct sock *sk); +-#endif /* IS_ENABLED(CONFIG_IPV6) */ + +-#define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif, __sdif) \ +- (((__sk)->sk_portpair == (__ports)) && \ +- ((__sk)->sk_family == AF_INET6) && \ +- ipv6_addr_equal(&(__sk)->sk_v6_daddr, (__saddr)) && \ +- ipv6_addr_equal(&(__sk)->sk_v6_rcv_saddr, (__daddr)) && \ +- (((__sk)->sk_bound_dev_if == (__dif)) || \ +- ((__sk)->sk_bound_dev_if == (__sdif))) && \ +- net_eq(sock_net(__sk), (__net))) ++static inline bool inet6_match(struct net *net, const struct sock *sk, ++ const struct in6_addr *saddr, ++ const struct in6_addr *daddr, ++ const __portpair ports, ++ const int dif, const int sdif) ++{ ++ if (!net_eq(sock_net(sk), net) || ++ sk->sk_family != AF_INET6 || ++ sk->sk_portpair != ports || ++ !ipv6_addr_equal(&sk->sk_v6_daddr, saddr) || ++ !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) ++ return false; ++ ++ /* READ_ONCE() paired with WRITE_ONCE() in sock_bindtoindex_locked() */ ++ return inet_sk_bound_dev_eq(net, READ_ONCE(sk->sk_bound_dev_if), dif, ++ sdif); ++} ++#endif /* IS_ENABLED(CONFIG_IPV6) */ + + #endif /* _INET6_HASHTABLES_H */ +diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h +index b06c2d02ec84e..695ed45841f06 100644 +--- a/include/net/inet_connection_sock.h ++++ b/include/net/inet_connection_sock.h +@@ -289,7 +289,7 @@ static inline void inet_csk_prepare_for_destroy_sock(struct sock *sk) + { + /* The below has to be done to allow calling inet_csk_destroy_sock */ + sock_set_flag(sk, SOCK_DEAD); +- percpu_counter_inc(sk->sk_prot->orphan_count); ++ this_cpu_inc(*sk->sk_prot->orphan_count); + } + + void inet_csk_destroy_sock(struct sock *sk); +@@ -315,7 +315,7 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, + + struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); + +-#define TCP_PINGPONG_THRESH 3 ++#define TCP_PINGPONG_THRESH 1 + + static inline void inet_csk_enter_pingpong_mode(struct sock *sk) + { +@@ -332,14 +332,6 @@ static inline bool inet_csk_in_pingpong_mode(struct sock *sk) + return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH; + } + +-static inline void inet_csk_inc_pingpong_cnt(struct sock *sk) +-{ +- struct inet_connection_sock *icsk = inet_csk(sk); +- +- if (icsk->icsk_ack.pingpong < U8_MAX) +- icsk->icsk_ack.pingpong++; +-} +- + static inline bool inet_csk_has_ulp(struct sock *sk) + { + return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops; +diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h +index 48cc5795ceda6..63540be0fc34a 100644 +--- a/include/net/inet_frag.h ++++ b/include/net/inet_frag.h +@@ -117,8 +117,15 @@ int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net); + + static inline void fqdir_pre_exit(struct fqdir *fqdir) + { +- fqdir->high_thresh = 0; /* prevent creation of new frags */ +- fqdir->dead = true; ++ /* Prevent creation of new frags. ++ * Pairs with READ_ONCE() in inet_frag_find(). ++ */ ++ WRITE_ONCE(fqdir->high_thresh, 0); ++ ++ /* Pairs with READ_ONCE() in inet_frag_kill(), ip_expire() ++ * and ip6frag_expire_frag_queue(). ++ */ ++ WRITE_ONCE(fqdir->dead, true); + } + void fqdir_exit(struct fqdir *fqdir); + +diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h +index f72ec113ae568..53c22b64e9724 100644 +--- a/include/net/inet_hashtables.h ++++ b/include/net/inet_hashtables.h +@@ -203,17 +203,6 @@ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo) + hashinfo->ehash_locks = NULL; + } + +-static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if, +- int dif, int sdif) +-{ +-#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) +- return inet_bound_dev_eq(!!net->ipv4.sysctl_tcp_l3mdev_accept, +- bound_dev_if, dif, sdif); +-#else +- return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); +-#endif +-} +- + struct inet_bind_bucket * + inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net, + struct inet_bind_hashbucket *head, +@@ -295,7 +284,6 @@ static inline struct sock *inet_lookup_listener(struct net *net, + ((__force __portpair)(((__u32)(__dport) << 16) | (__force __u32)(__be16)(__sport))) + #endif + +-#if (BITS_PER_LONG == 64) + #ifdef __BIG_ENDIAN + #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ + const __addrpair __name = (__force __addrpair) ( \ +@@ -307,24 +295,20 @@ static inline struct sock *inet_lookup_listener(struct net *net, + (((__force __u64)(__be32)(__daddr)) << 32) | \ + ((__force __u64)(__be32)(__saddr))) + #endif /* __BIG_ENDIAN */ +-#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \ +- (((__sk)->sk_portpair == (__ports)) && \ +- ((__sk)->sk_addrpair == (__cookie)) && \ +- (((__sk)->sk_bound_dev_if == (__dif)) || \ +- ((__sk)->sk_bound_dev_if == (__sdif))) && \ +- net_eq(sock_net(__sk), (__net))) +-#else /* 32-bit arch */ +-#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ +- const int __name __deprecated __attribute__((unused)) + +-#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \ +- (((__sk)->sk_portpair == (__ports)) && \ +- ((__sk)->sk_daddr == (__saddr)) && \ +- ((__sk)->sk_rcv_saddr == (__daddr)) && \ +- (((__sk)->sk_bound_dev_if == (__dif)) || \ +- ((__sk)->sk_bound_dev_if == (__sdif))) && \ +- net_eq(sock_net(__sk), (__net))) +-#endif /* 64-bit arch */ ++static inline bool INET_MATCH(struct net *net, const struct sock *sk, ++ const __addrpair cookie, const __portpair ports, ++ int dif, int sdif) ++{ ++ if (!net_eq(sock_net(sk), net) || ++ sk->sk_portpair != ports || ++ sk->sk_addrpair != cookie) ++ return false; ++ ++ /* READ_ONCE() paired with WRITE_ONCE() in sock_bindtoindex_locked() */ ++ return inet_sk_bound_dev_eq(net, READ_ONCE(sk->sk_bound_dev_if), dif, ++ sdif); ++} + + /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need + * not check it for lookups anymore, thanks Alexey. -DaveM +@@ -425,7 +409,7 @@ static inline void sk_rcv_saddr_set(struct sock *sk, __be32 addr) + } + + int __inet_hash_connect(struct inet_timewait_death_row *death_row, +- struct sock *sk, u32 port_offset, ++ struct sock *sk, u64 port_offset, + int (*check_established)(struct inet_timewait_death_row *, + struct sock *, __u16, + struct inet_timewait_sock **)); +diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h +index 89163ef8cf4be..2c2b41ea7f81d 100644 +--- a/include/net/inet_sock.h ++++ b/include/net/inet_sock.h +@@ -107,7 +107,8 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) + + static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb) + { +- if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept) ++ if (!sk->sk_mark && ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)) + return skb->mark; + + return sk->sk_mark; +@@ -116,14 +117,15 @@ static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb) + static inline int inet_request_bound_dev_if(const struct sock *sk, + struct sk_buff *skb) + { ++ int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); + #ifdef CONFIG_NET_L3_MASTER_DEV + struct net *net = sock_net(sk); + +- if (!sk->sk_bound_dev_if && net->ipv4.sysctl_tcp_l3mdev_accept) ++ if (!bound_dev_if && READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept)) + return l3mdev_master_ifindex_by_index(net, skb->skb_iif); + #endif + +- return sk->sk_bound_dev_if; ++ return bound_dev_if; + } + + static inline int inet_sk_bound_l3mdev(const struct sock *sk) +@@ -131,7 +133,7 @@ static inline int inet_sk_bound_l3mdev(const struct sock *sk) + #ifdef CONFIG_NET_L3_MASTER_DEV + struct net *net = sock_net(sk); + +- if (!net->ipv4.sysctl_tcp_l3mdev_accept) ++ if (!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept)) + return l3mdev_master_ifindex_by_index(net, + sk->sk_bound_dev_if); + #endif +@@ -147,6 +149,17 @@ static inline bool inet_bound_dev_eq(bool l3mdev_accept, int bound_dev_if, + return bound_dev_if == dif || bound_dev_if == sdif; + } + ++static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if, ++ int dif, int sdif) ++{ ++#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) ++ return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept), ++ bound_dev_if, dif, sdif); ++#else ++ return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); ++#endif ++} ++ + struct inet_cork { + unsigned int flags; + __be32 addr; +@@ -207,11 +220,10 @@ struct inet_sock { + __be32 inet_saddr; + __s16 uc_ttl; + __u16 cmsg_flags; ++ struct ip_options_rcu __rcu *inet_opt; + __be16 inet_sport; + __u16 inet_id; + +- struct ip_options_rcu __rcu *inet_opt; +- int rx_dst_ifindex; + __u8 tos; + __u8 min_ttl; + __u8 mc_ttl; +@@ -253,6 +265,11 @@ struct inet_sock { + #define IP_CMSG_CHECKSUM BIT(7) + #define IP_CMSG_RECVFRAGSIZE BIT(8) + ++static inline bool sk_is_inet(struct sock *sk) ++{ ++ return sk->sk_family == AF_INET || sk->sk_family == AF_INET6; ++} ++ + /** + * sk_to_full_sk - Access to a full socket + * @sk: pointer to a socket +@@ -369,7 +386,7 @@ static inline bool inet_get_convert_csum(struct sock *sk) + static inline bool inet_can_nonlocal_bind(struct net *net, + struct inet_sock *inet) + { +- return net->ipv4.sysctl_ip_nonlocal_bind || ++ return READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind) || + inet->freebind || inet->transparent; + } + +diff --git a/include/net/ip.h b/include/net/ip.h +index 9192444f2964e..6ae923c55cf44 100644 +--- a/include/net/ip.h ++++ b/include/net/ip.h +@@ -55,6 +55,7 @@ struct inet_skb_parm { + #define IPSKB_DOREDIRECT BIT(5) + #define IPSKB_FRAG_PMTU BIT(6) + #define IPSKB_L3SLAVE BIT(7) ++#define IPSKB_NOPOLICY BIT(8) + + u16 frag_max_size; + }; +@@ -351,7 +352,7 @@ static inline bool sysctl_dev_name_is_allowed(const char *name) + + static inline bool inet_port_requires_bind_service(struct net *net, unsigned short port) + { +- return port < net->ipv4.sysctl_ip_prot_sock; ++ return port < READ_ONCE(net->ipv4.sysctl_ip_prot_sock); + } + + #else +@@ -378,7 +379,7 @@ void ipfrag_init(void); + void ip_static_sysctl_init(void); + + #define IP4_REPLY_MARK(net, mark) \ +- ((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0) ++ (READ_ONCE((net)->ipv4.sysctl_fwmark_reflect) ? (mark) : 0) + + static inline bool ip_is_fragment(const struct iphdr *iph) + { +@@ -440,7 +441,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, + struct net *net = dev_net(dst->dev); + unsigned int mtu; + +- if (net->ipv4.sysctl_ip_fwd_use_pmtu || ++ if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) || + ip_mtu_locked(dst) || + !forwarding) { + mtu = rt->rt_pmtu; +@@ -520,19 +521,18 @@ static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb, + { + struct iphdr *iph = ip_hdr(skb); + ++ /* We had many attacks based on IPID, use the private ++ * generator as much as we can. ++ */ ++ if (sk && inet_sk(sk)->inet_daddr) { ++ iph->id = htons(inet_sk(sk)->inet_id); ++ inet_sk(sk)->inet_id += segs; ++ return; ++ } + if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) { +- /* This is only to work around buggy Windows95/2000 +- * VJ compression implementations. If the ID field +- * does not change, they drop every other packet in +- * a TCP stream using header compression. +- */ +- if (sk && inet_sk(sk)->inet_daddr) { +- iph->id = htons(inet_sk(sk)->inet_id); +- inet_sk(sk)->inet_id += segs; +- } else { +- iph->id = 0; +- } ++ iph->id = 0; + } else { ++ /* Unfortunately we need the big hammer to get a suitable IPID */ + __ip_select_ident(net, iph, segs); + } + } +@@ -559,7 +559,7 @@ static inline void iph_to_flow_copy_v4addrs(struct flow_keys *flow, + BUILD_BUG_ON(offsetof(typeof(flow->addrs), v4addrs.dst) != + offsetof(typeof(flow->addrs), v4addrs.src) + + sizeof(flow->addrs.v4addrs.src)); +- memcpy(&flow->addrs.v4addrs, &iph->saddr, sizeof(flow->addrs.v4addrs)); ++ memcpy(&flow->addrs.v4addrs, &iph->addrs, sizeof(flow->addrs.v4addrs)); + flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + } + +diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h +index c412dde4d67dc..bbb27639f2933 100644 +--- a/include/net/ip6_fib.h ++++ b/include/net/ip6_fib.h +@@ -189,14 +189,16 @@ struct fib6_info { + u32 fib6_metric; + u8 fib6_protocol; + u8 fib6_type; ++ ++ u8 offload; ++ u8 trap; ++ u8 offload_failed; ++ + u8 should_flush:1, + dst_nocount:1, + dst_nopolicy:1, + fib6_destroying:1, +- offload:1, +- trap:1, +- offload_failed:1, +- unused:1; ++ unused:4; + + struct rcu_head rcu; + struct nexthop *nh; +@@ -281,7 +283,7 @@ static inline bool fib6_get_cookie_safe(const struct fib6_info *f6i, + fn = rcu_dereference(f6i->fib6_node); + + if (fn) { +- *cookie = fn->fn_sernum; ++ *cookie = READ_ONCE(fn->fn_sernum); + /* pairs with smp_wmb() in __fib6_update_sernum_upto_root() */ + smp_rmb(); + status = true; +@@ -485,6 +487,7 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, + struct fib6_config *cfg, gfp_t gfp_flags, + struct netlink_ext_ack *extack); + void fib6_nh_release(struct fib6_nh *fib6_nh); ++void fib6_nh_release_dsts(struct fib6_nh *fib6_nh); + + int call_fib6_entry_notifiers(struct net *net, + enum fib_event_type event_type, +diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h +index 028eaea1c8544..42d50856fcf24 100644 +--- a/include/net/ip6_tunnel.h ++++ b/include/net/ip6_tunnel.h +@@ -57,7 +57,7 @@ struct ip6_tnl { + + /* These fields used only by GRE */ + __u32 i_seqno; /* The last seen seqno */ +- __u32 o_seqno; /* The last output seqno */ ++ atomic_t o_seqno; /* The last output seqno */ + int hlen; /* tun_hlen + encap_hlen */ + int tun_hlen; /* Precalculated header length */ + int encap_hlen; /* Encap header length (FOU,GUE) */ +diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h +index ab5348e57db1a..3417ba2d27ad6 100644 +--- a/include/net/ip_fib.h ++++ b/include/net/ip_fib.h +@@ -438,7 +438,7 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, + #ifdef CONFIG_IP_ROUTE_CLASSID + static inline int fib_num_tclassid_users(struct net *net) + { +- return net->ipv4.fib_num_tclassid_users; ++ return atomic_read(&net->ipv4.fib_num_tclassid_users); + } + #else + static inline int fib_num_tclassid_users(struct net *net) +diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h +index bc3b13ec93c9d..37d5d4968e20a 100644 +--- a/include/net/ip_tunnels.h ++++ b/include/net/ip_tunnels.h +@@ -113,7 +113,7 @@ struct ip_tunnel { + + /* These four fields used only by GRE */ + u32 i_seqno; /* The last seen seqno */ +- u32 o_seqno; /* The last output seqno */ ++ atomic_t o_seqno; /* The last output seqno */ + int tun_hlen; /* Precalculated header length */ + + /* These four fields used only by ERSPAN */ +diff --git a/include/net/ipv6.h b/include/net/ipv6.h +index f2d0ecc257bb2..a5e18d65c82db 100644 +--- a/include/net/ipv6.h ++++ b/include/net/ipv6.h +@@ -391,17 +391,20 @@ static inline void txopt_put(struct ipv6_txoptions *opt) + kfree_rcu(opt, rcu); + } + ++#if IS_ENABLED(CONFIG_IPV6) + struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label); + + extern struct static_key_false_deferred ipv6_flowlabel_exclusive; + static inline struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, + __be32 label) + { +- if (static_branch_unlikely(&ipv6_flowlabel_exclusive.key)) ++ if (static_branch_unlikely(&ipv6_flowlabel_exclusive.key) && ++ READ_ONCE(sock_net(sk)->ipv6.flowlabel_has_excl)) + return __fl6_sock_lookup(sk, label) ? : ERR_PTR(-ENOENT); + + return NULL; + } ++#endif + + struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space, + struct ip6_flowlabel *fl, +@@ -840,7 +843,7 @@ static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow, + BUILD_BUG_ON(offsetof(typeof(flow->addrs), v6addrs.dst) != + offsetof(typeof(flow->addrs), v6addrs.src) + + sizeof(flow->addrs.v6addrs.src)); +- memcpy(&flow->addrs.v6addrs, &iph->saddr, sizeof(flow->addrs.v6addrs)); ++ memcpy(&flow->addrs.v6addrs, &iph->addrs, sizeof(flow->addrs.v6addrs)); + flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + } + +diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h +index 851029ecff13c..0a4779175a523 100644 +--- a/include/net/ipv6_frag.h ++++ b/include/net/ipv6_frag.h +@@ -67,7 +67,8 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) + struct sk_buff *head; + + rcu_read_lock(); +- if (fq->q.fqdir->dead) ++ /* Paired with the WRITE_ONCE() in fqdir_pre_exit(). */ ++ if (READ_ONCE(fq->q.fqdir->dead)) + goto out_rcu_unlock; + spin_lock(&fq->q.lock); + +diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h +index afbce90c44808..45e0339be6fa4 100644 +--- a/include/net/ipv6_stubs.h ++++ b/include/net/ipv6_stubs.h +@@ -47,6 +47,7 @@ struct ipv6_stub { + struct fib6_config *cfg, gfp_t gfp_flags, + struct netlink_ext_ack *extack); + void (*fib6_nh_release)(struct fib6_nh *fib6_nh); ++ void (*fib6_nh_release_dsts)(struct fib6_nh *fib6_nh); + void (*fib6_update_sernum)(struct net *net, struct fib6_info *rt); + int (*ip6_del_rt)(struct net *net, struct fib6_info *rt, bool skip_notify); + void (*fib6_rt_update)(struct net *net, struct fib6_info *rt, +diff --git a/include/net/llc.h b/include/net/llc.h +index df282d9b40170..9c10b121b49b0 100644 +--- a/include/net/llc.h ++++ b/include/net/llc.h +@@ -72,7 +72,9 @@ struct llc_sap { + static inline + struct hlist_head *llc_sk_dev_hash(struct llc_sap *sap, int ifindex) + { +- return &sap->sk_dev_hash[ifindex % LLC_SK_DEV_HASH_ENTRIES]; ++ u32 bucket = hash_32(ifindex, LLC_SK_DEV_HASH_BITS); ++ ++ return &sap->sk_dev_hash[bucket]; + } + + static inline +diff --git a/include/net/mptcp.h b/include/net/mptcp.h +index 3214848402ec9..1120363987d05 100644 +--- a/include/net/mptcp.h ++++ b/include/net/mptcp.h +@@ -93,8 +93,6 @@ struct mptcp_out_options { + }; + + #ifdef CONFIG_MPTCP +-extern struct request_sock_ops mptcp_subflow_request_sock_ops; +- + void mptcp_init(void); + + static inline bool sk_is_mptcp(const struct sock *sk) +@@ -182,6 +180,9 @@ void mptcp_seq_show(struct seq_file *seq); + int mptcp_subflow_init_cookie_req(struct request_sock *req, + const struct sock *sk_listener, + struct sk_buff *skb); ++struct request_sock *mptcp_subflow_reqsk_alloc(const struct request_sock_ops *ops, ++ struct sock *sk_listener, ++ bool attach_listener); + + __be32 mptcp_get_reset_option(const struct sk_buff *skb); + +@@ -274,6 +275,13 @@ static inline int mptcp_subflow_init_cookie_req(struct request_sock *req, + return 0; /* TCP fallback */ + } + ++static inline struct request_sock *mptcp_subflow_reqsk_alloc(const struct request_sock_ops *ops, ++ struct sock *sk_listener, ++ bool attach_listener) ++{ ++ return NULL; ++} ++ + static inline __be32 mptcp_reset_option(const struct sk_buff *skb) { return htonl(0u); } + #endif /* CONFIG_MPTCP */ + +diff --git a/include/net/mrp.h b/include/net/mrp.h +index 1c308c034e1a6..a8102661fd613 100644 +--- a/include/net/mrp.h ++++ b/include/net/mrp.h +@@ -120,6 +120,7 @@ struct mrp_applicant { + struct sk_buff *pdu; + struct rb_root mad; + struct rcu_head rcu; ++ bool active; + }; + + struct mrp_port { +diff --git a/include/net/ndisc.h b/include/net/ndisc.h +index 38e4094960cee..e97ef508664f4 100644 +--- a/include/net/ndisc.h ++++ b/include/net/ndisc.h +@@ -487,9 +487,9 @@ int igmp6_late_init(void); + void igmp6_cleanup(void); + void igmp6_late_cleanup(void); + +-int igmp6_event_query(struct sk_buff *skb); ++void igmp6_event_query(struct sk_buff *skb); + +-int igmp6_event_report(struct sk_buff *skb); ++void igmp6_event_report(struct sk_buff *skb); + + + #ifdef CONFIG_SYSCTL +diff --git a/include/net/neighbour.h b/include/net/neighbour.h +index 22ced1381ede5..d5767e25509cc 100644 +--- a/include/net/neighbour.h ++++ b/include/net/neighbour.h +@@ -253,6 +253,7 @@ static inline void *neighbour_priv(const struct neighbour *n) + #define NEIGH_UPDATE_F_OVERRIDE 0x00000001 + #define NEIGH_UPDATE_F_WEAK_OVERRIDE 0x00000002 + #define NEIGH_UPDATE_F_OVERRIDE_ISROUTER 0x00000004 ++#define NEIGH_UPDATE_F_USE 0x10000000 + #define NEIGH_UPDATE_F_EXT_LEARNED 0x20000000 + #define NEIGH_UPDATE_F_ISROUTER 0x40000000 + #define NEIGH_UPDATE_F_ADMIN 0x80000000 +@@ -504,10 +505,15 @@ static inline int neigh_output(struct neighbour *n, struct sk_buff *skb, + { + const struct hh_cache *hh = &n->hh; + +- if ((n->nud_state & NUD_CONNECTED) && hh->hh_len && !skip_cache) ++ /* n->nud_state and hh->hh_len could be changed under us. ++ * neigh_hh_output() is taking care of the race later. ++ */ ++ if (!skip_cache && ++ (READ_ONCE(n->nud_state) & NUD_CONNECTED) && ++ READ_ONCE(hh->hh_len)) + return neigh_hh_output(hh, skb); +- else +- return n->output(n, skb); ++ ++ return n->output(n, skb); + } + + static inline struct neighbour * +diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h +index bb5fa59140321..2ba326f9e004d 100644 +--- a/include/net/net_namespace.h ++++ b/include/net/net_namespace.h +@@ -479,4 +479,10 @@ static inline void fnhe_genid_bump(struct net *net) + atomic_inc(&net->fnhe_genid); + } + ++#ifdef CONFIG_NET ++void net_ns_init(void); ++#else ++static inline void net_ns_init(void) {} ++#endif ++ + #endif /* __NET_NET_NAMESPACE_H */ +diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h +index cc663c68ddc4b..34c266502a50e 100644 +--- a/include/net/netfilter/nf_conntrack.h ++++ b/include/net/netfilter/nf_conntrack.h +@@ -76,6 +76,8 @@ struct nf_conn { + * Hint, SKB address this struct and refcnt via skb->_nfct and + * helpers nf_conntrack_get() and nf_conntrack_put(). + * Helper nf_ct_put() equals nf_conntrack_put() by dec refcnt, ++ * except that the latter uses internal indirection and does not ++ * result in a conntrack module dependency. + * beware nf_ct_get() is different and don't inc refcnt. + */ + struct nf_conntrack ct_general; +@@ -169,11 +171,13 @@ nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) + return (struct nf_conn *)(nfct & NFCT_PTRMASK); + } + ++void nf_ct_destroy(struct nf_conntrack *nfct); ++ + /* decrement reference count on a conntrack */ + static inline void nf_ct_put(struct nf_conn *ct) + { +- WARN_ON(!ct); +- nf_conntrack_put(&ct->ct_general); ++ if (ct && refcount_dec_and_test(&ct->ct_general.use)) ++ nf_ct_destroy(&ct->ct_general); + } + + /* Protocol module loading */ +@@ -276,14 +280,14 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb) + /* jiffies until ct expires, 0 if already expired */ + static inline unsigned long nf_ct_expires(const struct nf_conn *ct) + { +- s32 timeout = ct->timeout - nfct_time_stamp; ++ s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp; + + return timeout > 0 ? timeout : 0; + } + + static inline bool nf_ct_is_expired(const struct nf_conn *ct) + { +- return (__s32)(ct->timeout - nfct_time_stamp) <= 0; ++ return (__s32)(READ_ONCE(ct->timeout) - nfct_time_stamp) <= 0; + } + + /* use after obtaining a reference count */ +@@ -302,7 +306,7 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct) + static inline void nf_ct_offload_timeout(struct nf_conn *ct) + { + if (nf_ct_expires(ct) < NF_CT_DAY / 2) +- ct->timeout = nfct_time_stamp + NF_CT_DAY; ++ WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY); + } + + struct kernel_param; +diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h +index 13807ea94cd2b..2d524782f53b7 100644 +--- a/include/net/netfilter/nf_conntrack_core.h ++++ b/include/net/netfilter/nf_conntrack_core.h +@@ -58,8 +58,13 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb) + int ret = NF_ACCEPT; + + if (ct) { +- if (!nf_ct_is_confirmed(ct)) ++ if (!nf_ct_is_confirmed(ct)) { + ret = __nf_conntrack_confirm(skb); ++ ++ if (ret == NF_ACCEPT) ++ ct = (struct nf_conn *)skb_nfct(skb); ++ } ++ + if (likely(ret == NF_ACCEPT)) + nf_ct_deliver_cached_events(ct); + } +diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h +index 37f0fbefb060f..9939c366f720d 100644 +--- a/include/net/netfilter/nf_conntrack_helper.h ++++ b/include/net/netfilter/nf_conntrack_helper.h +@@ -177,4 +177,5 @@ void nf_nat_helper_unregister(struct nf_conntrack_nat_helper *nat); + int nf_nat_helper_try_module_get(const char *name, u16 l3num, + u8 protonum); + void nf_nat_helper_put(struct nf_conntrack_helper *helper); ++void nf_ct_set_auto_assign_helper_warned(struct net *net); + #endif /*_NF_CONNTRACK_HELPER_H*/ +diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h +index a3647fadf1ccb..aaa518e777e9e 100644 +--- a/include/net/netfilter/nf_flow_table.h ++++ b/include/net/netfilter/nf_flow_table.h +@@ -10,6 +10,8 @@ + #include <linux/netfilter/nf_conntrack_tuple_common.h> + #include <net/flow_offload.h> + #include <net/dst.h> ++#include <linux/if_pppox.h> ++#include <linux/ppp_defs.h> + + struct nf_flowtable; + struct nf_flow_rule; +@@ -264,6 +266,7 @@ void flow_offload_refresh(struct nf_flowtable *flow_table, + + struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table, + struct flow_offload_tuple *tuple); ++void nf_flow_table_gc_run(struct nf_flowtable *flow_table); + void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable, + struct net_device *dev); + void nf_flow_table_cleanup(struct net_device *dev); +@@ -300,6 +303,8 @@ void nf_flow_offload_stats(struct nf_flowtable *flowtable, + struct flow_offload *flow); + + void nf_flow_table_offload_flush(struct nf_flowtable *flowtable); ++void nf_flow_table_offload_flush_cleanup(struct nf_flowtable *flowtable); ++ + int nf_flow_table_offload_setup(struct nf_flowtable *flowtable, + struct net_device *dev, + enum flow_block_command cmd); +@@ -313,4 +318,20 @@ int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow, + int nf_flow_table_offload_init(void); + void nf_flow_table_offload_exit(void); + ++static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb) ++{ ++ __be16 proto; ++ ++ proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN + ++ sizeof(struct pppoe_hdr))); ++ switch (proto) { ++ case htons(PPP_IP): ++ return htons(ETH_P_IP); ++ case htons(PPP_IPV6): ++ return htons(ETH_P_IPV6); ++ } ++ ++ return 0; ++} ++ + #endif /* _NF_FLOW_TABLE_H */ +diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h +index 9eed51e920e87..980daa6e1e3aa 100644 +--- a/include/net/netfilter/nf_queue.h ++++ b/include/net/netfilter/nf_queue.h +@@ -37,7 +37,7 @@ void nf_register_queue_handler(const struct nf_queue_handler *qh); + void nf_unregister_queue_handler(void); + void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict); + +-void nf_queue_entry_get_refs(struct nf_queue_entry *entry); ++bool nf_queue_entry_get_refs(struct nf_queue_entry *entry); + void nf_queue_entry_free(struct nf_queue_entry *entry); + + static inline void init_hashrandom(u32 *jhash_initval) +diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h +index a16171c5fd9eb..80df8ff5e6752 100644 +--- a/include/net/netfilter/nf_tables.h ++++ b/include/net/netfilter/nf_tables.h +@@ -21,13 +21,19 @@ struct module; + + #define NFT_JUMP_STACK_SIZE 16 + ++enum { ++ NFT_PKTINFO_L4PROTO = (1 << 0), ++ NFT_PKTINFO_INNER = (1 << 1), ++}; ++ + struct nft_pktinfo { + struct sk_buff *skb; + const struct nf_hook_state *state; +- bool tprot_set; ++ u8 flags; + u8 tprot; + u16 fragoff; + unsigned int thoff; ++ unsigned int inneroff; + }; + + static inline struct sock *nft_sk(const struct nft_pktinfo *pkt) +@@ -75,7 +81,7 @@ static inline void nft_set_pktinfo(struct nft_pktinfo *pkt, + + static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt) + { +- pkt->tprot_set = false; ++ pkt->flags = 0; + pkt->tprot = 0; + pkt->thoff = 0; + pkt->fragoff = 0; +@@ -187,13 +193,18 @@ struct nft_ctx { + bool report; + }; + ++enum nft_data_desc_flags { ++ NFT_DATA_DESC_SETELEM = (1 << 0), ++}; ++ + struct nft_data_desc { + enum nft_data_types type; ++ unsigned int size; + unsigned int len; ++ unsigned int flags; + }; + +-int nft_data_init(const struct nft_ctx *ctx, +- struct nft_data *data, unsigned int size, ++int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data, + struct nft_data_desc *desc, const struct nlattr *nla); + void nft_data_hold(const struct nft_data *data, enum nft_data_types type); + void nft_data_release(const struct nft_data *data, enum nft_data_types type); +@@ -272,17 +283,29 @@ struct nft_set_iter { + /** + * struct nft_set_desc - description of set elements + * ++ * @ktype: key type + * @klen: key length ++ * @dtype: data type + * @dlen: data length ++ * @objtype: object type ++ * @flags: flags + * @size: number of set elements ++ * @policy: set policy ++ * @gc_int: garbage collector interval + * @field_len: length of each field in concatenation, bytes + * @field_count: number of concatenated fields in element + * @expr: set must support for expressions + */ + struct nft_set_desc { ++ u32 ktype; + unsigned int klen; ++ u32 dtype; + unsigned int dlen; ++ u32 objtype; + unsigned int size; ++ u32 policy; ++ u32 gc_int; ++ u64 timeout; + u8 field_len[NFT_REG32_COUNT]; + u8 field_count; + bool expr; +@@ -539,7 +562,9 @@ void *nft_set_catchall_gc(const struct nft_set *set); + + static inline unsigned long nft_set_gc_interval(const struct nft_set *set) + { +- return set->gc_int ? msecs_to_jiffies(set->gc_int) : HZ; ++ u32 gc_int = READ_ONCE(set->gc_int); ++ ++ return gc_int ? msecs_to_jiffies(gc_int) : HZ; + } + + /** +@@ -636,18 +661,22 @@ static inline void nft_set_ext_prepare(struct nft_set_ext_tmpl *tmpl) + tmpl->len = sizeof(struct nft_set_ext); + } + +-static inline void nft_set_ext_add_length(struct nft_set_ext_tmpl *tmpl, u8 id, +- unsigned int len) ++static inline int nft_set_ext_add_length(struct nft_set_ext_tmpl *tmpl, u8 id, ++ unsigned int len) + { + tmpl->len = ALIGN(tmpl->len, nft_set_ext_types[id].align); +- BUG_ON(tmpl->len > U8_MAX); ++ if (tmpl->len > U8_MAX) ++ return -EINVAL; ++ + tmpl->offset[id] = tmpl->len; + tmpl->len += nft_set_ext_types[id].len + len; ++ ++ return 0; + } + +-static inline void nft_set_ext_add(struct nft_set_ext_tmpl *tmpl, u8 id) ++static inline int nft_set_ext_add(struct nft_set_ext_tmpl *tmpl, u8 id) + { +- nft_set_ext_add_length(tmpl, id, 0); ++ return nft_set_ext_add_length(tmpl, id, 0); + } + + static inline void nft_set_ext_init(struct nft_set_ext *ext, +@@ -883,9 +912,9 @@ struct nft_expr_ops { + int (*offload)(struct nft_offload_ctx *ctx, + struct nft_flow_rule *flow, + const struct nft_expr *expr); ++ bool (*offload_action)(const struct nft_expr *expr); + void (*offload_stats)(struct nft_expr *expr, + const struct flow_stats *stats); +- u32 offload_flags; + const struct nft_expr_type *type; + void *data; + }; +@@ -1053,7 +1082,6 @@ struct nft_stats { + + struct nft_hook { + struct list_head list; +- bool inactive; + struct nf_hook_ops ops; + struct rcu_head rcu; + }; +@@ -1485,6 +1513,9 @@ struct nft_trans_rule { + struct nft_trans_set { + struct nft_set *set; + u32 set_id; ++ u32 gc_int; ++ u64 timeout; ++ bool update; + bool bound; + }; + +@@ -1494,6 +1525,12 @@ struct nft_trans_set { + (((struct nft_trans_set *)trans->data)->set_id) + #define nft_trans_set_bound(trans) \ + (((struct nft_trans_set *)trans->data)->bound) ++#define nft_trans_set_update(trans) \ ++ (((struct nft_trans_set *)trans->data)->update) ++#define nft_trans_set_timeout(trans) \ ++ (((struct nft_trans_set *)trans->data)->timeout) ++#define nft_trans_set_gc_int(trans) \ ++ (((struct nft_trans_set *)trans->data)->gc_int) + + struct nft_trans_chain { + bool update; +@@ -1586,6 +1623,7 @@ struct nftables_pernet { + struct list_head module_list; + struct list_head notify_list; + struct mutex commit_mutex; ++ u64 table_handle; + unsigned int base_seq; + u8 validate_state; + }; +diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h +index 0fa5a6d98a00b..9dfa11d4224d2 100644 +--- a/include/net/netfilter/nf_tables_core.h ++++ b/include/net/netfilter/nf_tables_core.h +@@ -40,6 +40,14 @@ struct nft_cmp_fast_expr { + bool inv; + }; + ++struct nft_cmp16_fast_expr { ++ struct nft_data data; ++ struct nft_data mask; ++ u8 sreg; ++ u8 len; ++ bool inv; ++}; ++ + struct nft_immediate_expr { + struct nft_data data; + u8 dreg; +@@ -57,6 +65,7 @@ static inline u32 nft_cmp_fast_mask(unsigned int len) + } + + extern const struct nft_expr_ops nft_cmp_fast_ops; ++extern const struct nft_expr_ops nft_cmp16_fast_ops; + + struct nft_payload { + enum nft_payload_bases base:8; +diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h +index eb4c094cd54d2..c4a6147b0ef8c 100644 +--- a/include/net/netfilter/nf_tables_ipv4.h ++++ b/include/net/netfilter/nf_tables_ipv4.h +@@ -10,7 +10,7 @@ static inline void nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt) + struct iphdr *ip; + + ip = ip_hdr(pkt->skb); +- pkt->tprot_set = true; ++ pkt->flags = NFT_PKTINFO_L4PROTO; + pkt->tprot = ip->protocol; + pkt->thoff = ip_hdrlen(pkt->skb); + pkt->fragoff = ntohs(ip->frag_off) & IP_OFFSET; +@@ -36,7 +36,7 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt) + else if (len < thoff) + return -1; + +- pkt->tprot_set = true; ++ pkt->flags = NFT_PKTINFO_L4PROTO; + pkt->tprot = iph->protocol; + pkt->thoff = thoff; + pkt->fragoff = ntohs(iph->frag_off) & IP_OFFSET; +@@ -71,7 +71,7 @@ static inline int nft_set_pktinfo_ipv4_ingress(struct nft_pktinfo *pkt) + goto inhdr_error; + } + +- pkt->tprot_set = true; ++ pkt->flags = NFT_PKTINFO_L4PROTO; + pkt->tprot = iph->protocol; + pkt->thoff = thoff; + pkt->fragoff = ntohs(iph->frag_off) & IP_OFFSET; +@@ -82,4 +82,5 @@ inhdr_error: + __IP_INC_STATS(nft_net(pkt), IPSTATS_MIB_INHDRERRORS); + return -1; + } ++ + #endif +diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h +index 7595e02b00ba0..ec7eaeaf4f04c 100644 +--- a/include/net/netfilter/nf_tables_ipv6.h ++++ b/include/net/netfilter/nf_tables_ipv6.h +@@ -18,7 +18,7 @@ static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt) + return; + } + +- pkt->tprot_set = true; ++ pkt->flags = NFT_PKTINFO_L4PROTO; + pkt->tprot = protohdr; + pkt->thoff = thoff; + pkt->fragoff = frag_off; +@@ -50,7 +50,7 @@ static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt) + if (protohdr < 0) + return -1; + +- pkt->tprot_set = true; ++ pkt->flags = NFT_PKTINFO_L4PROTO; + pkt->tprot = protohdr; + pkt->thoff = thoff; + pkt->fragoff = frag_off; +@@ -96,7 +96,7 @@ static inline int nft_set_pktinfo_ipv6_ingress(struct nft_pktinfo *pkt) + if (protohdr < 0) + goto inhdr_error; + +- pkt->tprot_set = true; ++ pkt->flags = NFT_PKTINFO_L4PROTO; + pkt->tprot = protohdr; + pkt->thoff = thoff; + pkt->fragoff = frag_off; +diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h +index f9d95ff82df83..3568b6a2f5f0f 100644 +--- a/include/net/netfilter/nf_tables_offload.h ++++ b/include/net/netfilter/nf_tables_offload.h +@@ -67,8 +67,6 @@ struct nft_flow_rule { + struct flow_rule *rule; + }; + +-#define NFT_OFFLOAD_F_ACTION (1 << 0) +- + void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow, + enum flow_dissector_key_id addr_type); + +@@ -94,7 +92,7 @@ int nft_flow_rule_offload_commit(struct net *net); + NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg) \ + memset(&(__reg)->mask, 0xff, (__reg)->len); + +-int nft_chain_offload_priority(struct nft_base_chain *basechain); ++bool nft_chain_offload_support(const struct nft_base_chain *basechain); + + int nft_offload_init(void); + void nft_offload_exit(void); +diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h +index 2f65701a43c95..d60a10cfc3823 100644 +--- a/include/net/netns/ipv4.h ++++ b/include/net/netns/ipv4.h +@@ -65,7 +65,7 @@ struct netns_ipv4 { + bool fib_has_custom_local_routes; + bool fib_offload_disabled; + #ifdef CONFIG_IP_ROUTE_CLASSID +- int fib_num_tclassid_users; ++ atomic_t fib_num_tclassid_users; + #endif + struct hlist_head *fib_table_hash; + struct sock *fibnl; +@@ -74,7 +74,6 @@ struct netns_ipv4 { + struct sock *mc_autojoin_sk; + + struct inet_peer_base *peers; +- struct sock * __percpu *tcp_sk; + struct fqdir *fqdir; + + u8 sysctl_icmp_echo_ignore_all; +diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h +index a4b5503803165..ff82983b7ab41 100644 +--- a/include/net/netns/ipv6.h ++++ b/include/net/netns/ipv6.h +@@ -75,11 +75,12 @@ struct netns_ipv6 { + struct list_head fib6_walkers; + rwlock_t fib6_walker_lock; + spinlock_t fib6_gc_lock; +- unsigned int ip6_rt_gc_expire; +- unsigned long ip6_rt_last_gc; ++ atomic_t ip6_rt_gc_expire; ++ unsigned long ip6_rt_last_gc; ++ unsigned char flowlabel_has_excl; + #ifdef CONFIG_IPV6_MULTIPLE_TABLES +- unsigned int fib6_rules_require_fldissect; + bool fib6_has_custom_rules; ++ unsigned int fib6_rules_require_fldissect; + #ifdef CONFIG_IPV6_SUBTREES + unsigned int fib6_routes_require_src; + #endif +diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h +index 947733a639a6f..bd7c3be4af5d7 100644 +--- a/include/net/netns/xfrm.h ++++ b/include/net/netns/xfrm.h +@@ -66,11 +66,7 @@ struct netns_xfrm { + int sysctl_larval_drop; + u32 sysctl_acq_expires; + +- u8 policy_default; +-#define XFRM_POL_DEFAULT_IN 1 +-#define XFRM_POL_DEFAULT_OUT 2 +-#define XFRM_POL_DEFAULT_FWD 4 +-#define XFRM_POL_DEFAULT_MASK 7 ++ u8 policy_default[XFRM_POLICY_MAX]; + + #ifdef CONFIG_SYSCTL + struct ctl_table_header *sysctl_hdr; +diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h +index a964daedc17b6..ea8595651c384 100644 +--- a/include/net/nfc/nci_core.h ++++ b/include/net/nfc/nci_core.h +@@ -30,6 +30,7 @@ enum nci_flag { + NCI_UP, + NCI_DATA_EXCHANGE, + NCI_DATA_EXCHANGE_TO, ++ NCI_UNREG, + }; + + /* NCI device states */ +diff --git a/include/net/nl802154.h b/include/net/nl802154.h +index ddcee128f5d9a..145acb8f25095 100644 +--- a/include/net/nl802154.h ++++ b/include/net/nl802154.h +@@ -19,6 +19,8 @@ + * + */ + ++#include <linux/types.h> ++ + #define NL802154_GENL_NAME "nl802154" + + enum nl802154_commands { +@@ -150,10 +152,9 @@ enum nl802154_attrs { + }; + + enum nl802154_iftype { +- /* for backwards compatibility TODO */ +- NL802154_IFTYPE_UNSPEC = -1, ++ NL802154_IFTYPE_UNSPEC = (~(__u32)0), + +- NL802154_IFTYPE_NODE, ++ NL802154_IFTYPE_NODE = 0, + NL802154_IFTYPE_MONITOR, + NL802154_IFTYPE_COORD, + +diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h +index bf79f3a890af2..9e7b21c0b3a6d 100644 +--- a/include/net/pkt_sched.h ++++ b/include/net/pkt_sched.h +@@ -193,4 +193,22 @@ static inline void skb_txtime_consumed(struct sk_buff *skb) + skb->tstamp = ktime_set(0, 0); + } + ++struct tc_skb_cb { ++ struct qdisc_skb_cb qdisc_cb; ++ ++ u16 mru; ++ u8 post_ct:1; ++ u8 post_ct_snat:1; ++ u8 post_ct_dnat:1; ++ u16 zone; /* Only valid if post_ct = true */ ++}; ++ ++static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb) ++{ ++ struct tc_skb_cb *cb = (struct tc_skb_cb *)skb->cb; ++ ++ BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb)); ++ return cb; ++} ++ + #endif +diff --git a/include/net/protocol.h b/include/net/protocol.h +index f51c06ae365f5..6aef8cb11cc8c 100644 +--- a/include/net/protocol.h ++++ b/include/net/protocol.h +@@ -35,8 +35,6 @@ + + /* This is used to register protocols. */ + struct net_protocol { +- int (*early_demux)(struct sk_buff *skb); +- int (*early_demux_handler)(struct sk_buff *skb); + int (*handler)(struct sk_buff *skb); + + /* This returns an error if we weren't able to handle the error. */ +@@ -52,8 +50,6 @@ struct net_protocol { + + #if IS_ENABLED(CONFIG_IPV6) + struct inet6_protocol { +- void (*early_demux)(struct sk_buff *skb); +- void (*early_demux_handler)(struct sk_buff *skb); + int (*handler)(struct sk_buff *skb); + + /* This returns an error if we weren't able to handle the error. */ +diff --git a/include/net/raw.h b/include/net/raw.h +index 8ad8df5948536..c51a635671a73 100644 +--- a/include/net/raw.h ++++ b/include/net/raw.h +@@ -75,7 +75,7 @@ static inline bool raw_sk_bound_dev_eq(struct net *net, int bound_dev_if, + int dif, int sdif) + { + #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) +- return inet_bound_dev_eq(!!net->ipv4.sysctl_raw_l3mdev_accept, ++ return inet_bound_dev_eq(READ_ONCE(net->ipv4.sysctl_raw_l3mdev_accept), + bound_dev_if, dif, sdif); + #else + return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); +diff --git a/include/net/route.h b/include/net/route.h +index 2e6c0e153e3a5..30610101ea14f 100644 +--- a/include/net/route.h ++++ b/include/net/route.h +@@ -360,7 +360,7 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst) + struct net *net = dev_net(dst->dev); + + if (hoplimit == 0) +- hoplimit = net->ipv4.sysctl_ip_default_ttl; ++ hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); + return hoplimit; + } + +@@ -369,7 +369,7 @@ static inline struct neighbour *ip_neigh_gw4(struct net_device *dev, + { + struct neighbour *neigh; + +- neigh = __ipv4_neigh_lookup_noref(dev, daddr); ++ neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)daddr); + if (unlikely(!neigh)) + neigh = __neigh_create(&arp_tbl, &daddr, dev, false); + +diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h +index c0069ac00e62d..6906da5c733ea 100644 +--- a/include/net/sch_generic.h ++++ b/include/net/sch_generic.h +@@ -173,37 +173,17 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) + if (spin_trylock(&qdisc->seqlock)) + return true; + +- /* Paired with smp_mb__after_atomic() to make sure +- * STATE_MISSED checking is synchronized with clearing +- * in pfifo_fast_dequeue(). ++ /* No need to insist if the MISSED flag was already set. ++ * Note that test_and_set_bit() also gives us memory ordering ++ * guarantees wrt potential earlier enqueue() and below ++ * spin_trylock(), both of which are necessary to prevent races + */ +- smp_mb__before_atomic(); +- +- /* If the MISSED flag is set, it means other thread has +- * set the MISSED flag before second spin_trylock(), so +- * we can return false here to avoid multi cpus doing +- * the set_bit() and second spin_trylock() concurrently. +- */ +- if (test_bit(__QDISC_STATE_MISSED, &qdisc->state)) ++ if (test_and_set_bit(__QDISC_STATE_MISSED, &qdisc->state)) + return false; + +- /* Set the MISSED flag before the second spin_trylock(), +- * if the second spin_trylock() return false, it means +- * other cpu holding the lock will do dequeuing for us +- * or it will see the MISSED flag set after releasing +- * lock and reschedule the net_tx_action() to do the +- * dequeuing. +- */ +- set_bit(__QDISC_STATE_MISSED, &qdisc->state); +- +- /* spin_trylock() only has load-acquire semantic, so use +- * smp_mb__after_atomic() to ensure STATE_MISSED is set +- * before doing the second spin_trylock(). +- */ +- smp_mb__after_atomic(); +- +- /* Retry again in case other CPU may not see the new flag +- * after it releases the lock at the end of qdisc_run_end(). ++ /* Try to take the lock again to make sure that we will either ++ * grab it or the CPU that still has it will see MISSED set ++ * when testing it in qdisc_run_end() + */ + return spin_trylock(&qdisc->seqlock); + } else if (qdisc_is_running(qdisc)) { +@@ -222,6 +202,12 @@ static inline void qdisc_run_end(struct Qdisc *qdisc) + if (qdisc->flags & TCQ_F_NOLOCK) { + spin_unlock(&qdisc->seqlock); + ++ /* spin_unlock() only has store-release semantic. The unlock ++ * and test_bit() ordering is a store-load ordering, so a full ++ * memory barrier is needed here. ++ */ ++ smp_mb(); ++ + if (unlikely(test_bit(__QDISC_STATE_MISSED, + &qdisc->state))) + __netif_schedule(qdisc); +@@ -308,6 +294,8 @@ struct Qdisc_ops { + struct netlink_ext_ack *extack); + void (*attach)(struct Qdisc *sch); + int (*change_tx_queue_len)(struct Qdisc *, unsigned int); ++ void (*change_real_num_tx)(struct Qdisc *sch, ++ unsigned int new_real_tx); + + int (*dump)(struct Qdisc *, struct sk_buff *); + int (*dump_stats)(struct Qdisc *, struct gnet_dump *); +@@ -438,8 +426,6 @@ struct qdisc_skb_cb { + }; + #define QDISC_CB_PRIV_LEN 20 + unsigned char data[QDISC_CB_PRIV_LEN]; +- u16 mru; +- bool post_ct; + }; + + typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv); +@@ -684,6 +670,8 @@ void qdisc_class_hash_grow(struct Qdisc *, struct Qdisc_class_hash *); + void qdisc_class_hash_destroy(struct Qdisc_class_hash *); + + int dev_qdisc_change_tx_queue_len(struct net_device *dev); ++void dev_qdisc_change_real_num_tx(struct net_device *dev, ++ unsigned int new_real_tx); + void dev_init_scheduler(struct net_device *dev); + void dev_shutdown(struct net_device *dev); + void dev_activate(struct net_device *dev); +@@ -1189,7 +1177,6 @@ static inline void __qdisc_reset_queue(struct qdisc_skb_head *qh) + static inline void qdisc_reset_queue(struct Qdisc *sch) + { + __qdisc_reset_queue(&sch->q); +- sch->qstats.backlog = 0; + } + + static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new, +@@ -1258,6 +1245,7 @@ struct psched_ratecfg { + u64 rate_bytes_ps; /* bytes per second */ + u32 mult; + u16 overhead; ++ u16 mpu; + u8 linklayer; + u8 shift; + }; +@@ -1267,6 +1255,9 @@ static inline u64 psched_l2t_ns(const struct psched_ratecfg *r, + { + len += r->overhead; + ++ if (len < r->mpu) ++ len = r->mpu; ++ + if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) + return ((u64)(DIV_ROUND_UP(len,48)*53) * r->mult) >> r->shift; + +@@ -1289,6 +1280,7 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res, + res->rate = min_t(u64, r->rate_bytes_ps, ~0U); + + res->overhead = r->overhead; ++ res->mpu = r->mpu; + res->linklayer = (r->linklayer & TC_LINKLAYER_MASK); + } + +@@ -1343,4 +1335,11 @@ void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp, + + int sch_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb)); + ++/* Make sure qdisc is no longer in SCHED state. */ ++static inline void qdisc_synchronize(const struct Qdisc *q) ++{ ++ while (test_bit(__QDISC_STATE_SCHED, &q->state)) ++ msleep(1); ++} ++ + #endif +diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h +index 69bab88ad66b1..3ae61ce2eabd0 100644 +--- a/include/net/sctp/sctp.h ++++ b/include/net/sctp/sctp.h +@@ -105,19 +105,18 @@ extern struct percpu_counter sctp_sockets_allocated; + int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *); + struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *); + ++typedef int (*sctp_callback_t)(struct sctp_endpoint *, struct sctp_transport *, void *); + void sctp_transport_walk_start(struct rhashtable_iter *iter); + void sctp_transport_walk_stop(struct rhashtable_iter *iter); + struct sctp_transport *sctp_transport_get_next(struct net *net, + struct rhashtable_iter *iter); + struct sctp_transport *sctp_transport_get_idx(struct net *net, + struct rhashtable_iter *iter, int pos); +-int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), +- struct net *net, ++int sctp_transport_lookup_process(sctp_callback_t cb, struct net *net, + const union sctp_addr *laddr, + const union sctp_addr *paddr, void *p); +-int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), +- int (*cb_done)(struct sctp_transport *, void *), +- struct net *net, int *pos, void *p); ++int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done, ++ struct net *net, int *pos, void *p); + int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p); + int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, + struct sctp_info *info); +@@ -626,7 +625,8 @@ static inline __u32 sctp_min_frag_point(struct sctp_sock *sp, __u16 datasize) + + static inline int sctp_transport_pl_hlen(struct sctp_transport *t) + { +- return __sctp_mtu_payload(sctp_sk(t->asoc->base.sk), t, 0, 0); ++ return __sctp_mtu_payload(sctp_sk(t->asoc->base.sk), t, 0, 0) - ++ sizeof(struct sctphdr); + } + + static inline void sctp_transport_pl_reset(struct sctp_transport *t) +@@ -653,12 +653,10 @@ static inline void sctp_transport_pl_update(struct sctp_transport *t) + if (t->pl.state == SCTP_PL_DISABLED) + return; + +- if (del_timer(&t->probe_timer)) +- sctp_transport_put(t); +- + t->pl.state = SCTP_PL_BASE; + t->pl.pmtu = SCTP_BASE_PLPMTU; + t->pl.probe_size = SCTP_BASE_PLPMTU; ++ sctp_transport_reset_probe_timer(t); + } + + static inline bool sctp_transport_pl_enabled(struct sctp_transport *t) +diff --git a/include/net/sctp/stream_sched.h b/include/net/sctp/stream_sched.h +index 01a70b27e026b..65058faea4db1 100644 +--- a/include/net/sctp/stream_sched.h ++++ b/include/net/sctp/stream_sched.h +@@ -26,6 +26,8 @@ struct sctp_sched_ops { + int (*init)(struct sctp_stream *stream); + /* Init a stream */ + int (*init_sid)(struct sctp_stream *stream, __u16 sid, gfp_t gfp); ++ /* free a stream */ ++ void (*free_sid)(struct sctp_stream *stream, __u16 sid); + /* Frees the entire thing */ + void (*free)(struct sctp_stream *stream); + +diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h +index 651bba654d77d..8d2c3dd9f5953 100644 +--- a/include/net/sctp/structs.h ++++ b/include/net/sctp/structs.h +@@ -1365,6 +1365,7 @@ struct sctp_endpoint { + + u32 secid; + u32 peer_secid; ++ struct rcu_head rcu; + }; + + /* Recover the outter endpoint structure. */ +@@ -1380,7 +1381,7 @@ static inline struct sctp_endpoint *sctp_ep(struct sctp_ep_common *base) + struct sctp_endpoint *sctp_endpoint_new(struct sock *, gfp_t); + void sctp_endpoint_free(struct sctp_endpoint *); + void sctp_endpoint_put(struct sctp_endpoint *); +-void sctp_endpoint_hold(struct sctp_endpoint *); ++int sctp_endpoint_hold(struct sctp_endpoint *ep); + void sctp_endpoint_add_asoc(struct sctp_endpoint *, struct sctp_association *); + struct sctp_association *sctp_endpoint_lookup_assoc( + const struct sctp_endpoint *ep, +diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h +index d7d2495f83c27..dac91aa38c5af 100644 +--- a/include/net/secure_seq.h ++++ b/include/net/secure_seq.h +@@ -4,8 +4,8 @@ + + #include <linux/types.h> + +-u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); +-u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, ++u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); ++u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, + __be16 dport); + u32 secure_tcp_seq(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport); +diff --git a/include/net/seg6.h b/include/net/seg6.h +index 9d19c15e8545c..af668f17b3988 100644 +--- a/include/net/seg6.h ++++ b/include/net/seg6.h +@@ -58,9 +58,30 @@ extern int seg6_local_init(void); + extern void seg6_local_exit(void); + + extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len, bool reduced); ++extern struct ipv6_sr_hdr *seg6_get_srh(struct sk_buff *skb, int flags); ++extern void seg6_icmp_srh(struct sk_buff *skb, struct inet6_skb_parm *opt); + extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, + int proto); + extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh); + extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, + u32 tbl_id); ++ ++/* If the packet which invoked an ICMP error contains an SRH return ++ * the true destination address from within the SRH, otherwise use the ++ * destination address in the IP header. ++ */ ++static inline const struct in6_addr *seg6_get_daddr(struct sk_buff *skb, ++ struct inet6_skb_parm *opt) ++{ ++ struct ipv6_sr_hdr *srh; ++ ++ if (opt->flags & IP6SKB_SEG6) { ++ srh = (struct ipv6_sr_hdr *)(skb->data + opt->srhoff); ++ return &srh->segments[0]; ++ } ++ ++ return NULL; ++} ++ ++ + #endif +diff --git a/include/net/sock.h b/include/net/sock.h +index 463f390d90b3e..cd6f2ae28ecf2 100644 +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -161,9 +161,6 @@ typedef __u64 __bitwise __addrpair; + * for struct sock and struct inet_timewait_sock. + */ + struct sock_common { +- /* skc_daddr and skc_rcv_saddr must be grouped on a 8 bytes aligned +- * address on 64bit arches : cf INET_MATCH() +- */ + union { + __addrpair skc_addrpair; + struct { +@@ -259,6 +256,8 @@ struct bpf_local_storage; + * @sk_rcvbuf: size of receive buffer in bytes + * @sk_wq: sock wait queue and async head + * @sk_rx_dst: receive input route used by early demux ++ * @sk_rx_dst_ifindex: ifindex for @sk_rx_dst ++ * @sk_rx_dst_cookie: cookie for @sk_rx_dst + * @sk_dst_cache: destination cache + * @sk_dst_pending_confirm: need to confirm neighbour + * @sk_policy: flow policy +@@ -324,7 +323,7 @@ struct bpf_local_storage; + * @sk_tskey: counter to disambiguate concurrent tstamp requests + * @sk_zckey: counter to order MSG_ZEROCOPY notifications + * @sk_socket: Identd and reporting IO signals +- * @sk_user_data: RPC layer private data ++ * @sk_user_data: RPC layer private data. Write-protected by @sk_callback_lock. + * @sk_frag: cached page frag + * @sk_peek_off: current peek_offset value + * @sk_send_head: front of stuff to transmit +@@ -430,7 +429,10 @@ struct sock { + #ifdef CONFIG_XFRM + struct xfrm_policy __rcu *sk_policy[2]; + #endif +- struct dst_entry *sk_rx_dst; ++ struct dst_entry __rcu *sk_rx_dst; ++ int sk_rx_dst_ifindex; ++ u32 sk_rx_dst_cookie; ++ + struct dst_entry __rcu *sk_dst_cache; + atomic_t sk_omem_alloc; + int sk_sndbuf; +@@ -501,7 +503,7 @@ struct sock { + u16 sk_tsflags; + int sk_bind_phc; + u8 sk_shutdown; +- u32 sk_tskey; ++ atomic_t sk_tskey; + atomic_t sk_zckey; + + u8 sk_clockid; +@@ -541,14 +543,26 @@ enum sk_pacing { + SK_PACING_FQ = 2, + }; + +-/* Pointer stored in sk_user_data might not be suitable for copying +- * when cloning the socket. For instance, it can point to a reference +- * counted object. sk_user_data bottom bit is set if pointer must not +- * be copied. ++/* flag bits in sk_user_data ++ * ++ * - SK_USER_DATA_NOCOPY: Pointer stored in sk_user_data might ++ * not be suitable for copying when cloning the socket. For instance, ++ * it can point to a reference counted object. sk_user_data bottom ++ * bit is set if pointer must not be copied. ++ * ++ * - SK_USER_DATA_BPF: Mark whether sk_user_data field is ++ * managed/owned by a BPF reuseport array. This bit should be set ++ * when sk_user_data's sk is added to the bpf's reuseport_array. ++ * ++ * - SK_USER_DATA_PSOCK: Mark whether pointer stored in ++ * sk_user_data points to psock type. This bit should be set ++ * when sk_user_data is assigned to a psock object. + */ + #define SK_USER_DATA_NOCOPY 1UL +-#define SK_USER_DATA_BPF 2UL /* Managed by BPF */ +-#define SK_USER_DATA_PTRMASK ~(SK_USER_DATA_NOCOPY | SK_USER_DATA_BPF) ++#define SK_USER_DATA_BPF 2UL ++#define SK_USER_DATA_PSOCK 4UL ++#define SK_USER_DATA_PTRMASK ~(SK_USER_DATA_NOCOPY | SK_USER_DATA_BPF |\ ++ SK_USER_DATA_PSOCK) + + /** + * sk_user_data_is_nocopy - Test if sk_user_data pointer must not be copied +@@ -561,24 +575,40 @@ static inline bool sk_user_data_is_nocopy(const struct sock *sk) + + #define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data))) + ++/** ++ * __rcu_dereference_sk_user_data_with_flags - return the pointer ++ * only if argument flags all has been set in sk_user_data. Otherwise ++ * return NULL ++ * ++ * @sk: socket ++ * @flags: flag bits ++ */ ++static inline void * ++__rcu_dereference_sk_user_data_with_flags(const struct sock *sk, ++ uintptr_t flags) ++{ ++ uintptr_t sk_user_data = (uintptr_t)rcu_dereference(__sk_user_data(sk)); ++ ++ WARN_ON_ONCE(flags & SK_USER_DATA_PTRMASK); ++ ++ if ((sk_user_data & flags) == flags) ++ return (void *)(sk_user_data & SK_USER_DATA_PTRMASK); ++ return NULL; ++} ++ + #define rcu_dereference_sk_user_data(sk) \ ++ __rcu_dereference_sk_user_data_with_flags(sk, 0) ++#define __rcu_assign_sk_user_data_with_flags(sk, ptr, flags) \ + ({ \ +- void *__tmp = rcu_dereference(__sk_user_data((sk))); \ +- (void *)((uintptr_t)__tmp & SK_USER_DATA_PTRMASK); \ +-}) +-#define rcu_assign_sk_user_data(sk, ptr) \ +-({ \ +- uintptr_t __tmp = (uintptr_t)(ptr); \ +- WARN_ON_ONCE(__tmp & ~SK_USER_DATA_PTRMASK); \ +- rcu_assign_pointer(__sk_user_data((sk)), __tmp); \ +-}) +-#define rcu_assign_sk_user_data_nocopy(sk, ptr) \ +-({ \ +- uintptr_t __tmp = (uintptr_t)(ptr); \ +- WARN_ON_ONCE(__tmp & ~SK_USER_DATA_PTRMASK); \ ++ uintptr_t __tmp1 = (uintptr_t)(ptr), \ ++ __tmp2 = (uintptr_t)(flags); \ ++ WARN_ON_ONCE(__tmp1 & ~SK_USER_DATA_PTRMASK); \ ++ WARN_ON_ONCE(__tmp2 & SK_USER_DATA_PTRMASK); \ + rcu_assign_pointer(__sk_user_data((sk)), \ +- __tmp | SK_USER_DATA_NOCOPY); \ ++ __tmp1 | __tmp2); \ + }) ++#define rcu_assign_sk_user_data(sk, ptr) \ ++ __rcu_assign_sk_user_data_with_flags(sk, ptr, 0) + + /* + * SK_CAN_REUSE and SK_NO_REUSE on a socket mean that the socket is OK +@@ -1237,7 +1267,7 @@ struct proto { + unsigned int useroffset; /* Usercopy region offset */ + unsigned int usersize; /* Usercopy region size */ + +- struct percpu_counter *orphan_count; ++ unsigned int __percpu *orphan_count; + + struct request_sock_ops *rsk_prot; + struct timewait_sock_ops *twsk_prot; +@@ -1479,7 +1509,7 @@ void __sk_mem_reclaim(struct sock *sk, int amount); + /* sysctl_mem values are in pages, we convert them in SK_MEM_QUANTUM units */ + static inline long sk_prot_mem_limits(const struct sock *sk, int index) + { +- long val = sk->sk_prot->sysctl_mem[index]; ++ long val = READ_ONCE(sk->sk_prot->sysctl_mem[index]); + + #if PAGE_SIZE > SK_MEM_QUANTUM + val <<= PAGE_SHIFT - SK_MEM_QUANTUM_SHIFT; +@@ -1502,19 +1532,23 @@ static inline bool sk_has_account(struct sock *sk) + + static inline bool sk_wmem_schedule(struct sock *sk, int size) + { ++ int delta; ++ + if (!sk_has_account(sk)) + return true; +- return size <= sk->sk_forward_alloc || +- __sk_mem_schedule(sk, size, SK_MEM_SEND); ++ delta = size - sk->sk_forward_alloc; ++ return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_SEND); + } + + static inline bool + sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size) + { ++ int delta; ++ + if (!sk_has_account(sk)) + return true; +- return size <= sk->sk_forward_alloc || +- __sk_mem_schedule(sk, size, SK_MEM_RECV) || ++ delta = size - sk->sk_forward_alloc; ++ return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_RECV) || + skb_pfmemalloc(skb); + } + +@@ -2281,6 +2315,19 @@ static inline __must_check bool skb_set_owner_sk_safe(struct sk_buff *skb, struc + return false; + } + ++static inline struct sk_buff *skb_clone_and_charge_r(struct sk_buff *skb, struct sock *sk) ++{ ++ skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); ++ if (skb) { ++ if (sk_rmem_schedule(sk, skb, skb->truesize)) { ++ skb_set_owner_r(skb, sk); ++ return skb; ++ } ++ __kfree_skb(skb); ++ } ++ return NULL; ++} ++ + static inline void skb_prepare_for_gro(struct sk_buff *skb) + { + if (skb->destructor != sock_wfree) { +@@ -2400,19 +2447,22 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, + * @sk: socket + * + * Use the per task page_frag instead of the per socket one for +- * optimization when we know that we're in the normal context and owns ++ * optimization when we know that we're in process context and own + * everything that's associated with %current. + * +- * gfpflags_allow_blocking() isn't enough here as direct reclaim may nest +- * inside other socket operations and end up recursing into sk_page_frag() +- * while it's already in use. ++ * Both direct reclaim and page faults can nest inside other ++ * socket operations and end up recursing into sk_page_frag() ++ * while it's already in use: explicitly avoid task page_frag ++ * usage if the caller is potentially doing any of them. ++ * This assumes that page fault handlers use the GFP_NOFS flags. + * + * Return: a per task page_frag if context allows that, + * otherwise a per socket one. + */ + static inline struct page_frag *sk_page_frag(struct sock *sk) + { +- if (gfpflags_normal_context(sk->sk_allocation)) ++ if ((sk->sk_allocation & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC | __GFP_FS)) == ++ (__GFP_DIRECT_RECLAIM | __GFP_FS)) + return ¤t->task_frag; + + return &sk->sk_frag; +@@ -2435,7 +2485,7 @@ static inline gfp_t gfp_any(void) + + static inline gfp_t gfp_memcg_charge(void) + { +- return in_softirq() ? GFP_NOWAIT : GFP_KERNEL; ++ return in_softirq() ? GFP_ATOMIC : GFP_KERNEL; + } + + static inline long sock_rcvtimeo(const struct sock *sk, bool noblock) +@@ -2590,7 +2640,7 @@ static inline void _sock_tx_timestamp(struct sock *sk, __u16 tsflags, + __sock_tx_timestamp(tsflags, tx_flags); + if (tsflags & SOF_TIMESTAMPING_OPT_ID && tskey && + tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) +- *tskey = sk->sk_tskey++; ++ *tskey = atomic_inc_return(&sk->sk_tskey) - 1; + } + if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS))) + *tx_flags |= SKBTX_WIFI_STATUS; +@@ -2757,18 +2807,18 @@ static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto) + { + /* Does this proto have per netns sysctl_wmem ? */ + if (proto->sysctl_wmem_offset) +- return *(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset); ++ return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset)); + +- return *proto->sysctl_wmem; ++ return READ_ONCE(*proto->sysctl_wmem); + } + + static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto) + { + /* Does this proto have per netns sysctl_rmem ? */ + if (proto->sysctl_rmem_offset) +- return *(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset); ++ return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset)); + +- return *proto->sysctl_rmem; ++ return READ_ONCE(*proto->sysctl_rmem); + } + + /* Default TCP Small queue budget is ~1 ms of data (1sec >> 10) +diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h +index 473b0b0fa4abc..6ec140b0a61bf 100644 +--- a/include/net/sock_reuseport.h ++++ b/include/net/sock_reuseport.h +@@ -16,6 +16,7 @@ struct sock_reuseport { + u16 max_socks; /* length of socks */ + u16 num_socks; /* elements in socks */ + u16 num_closed_socks; /* closed elements in socks */ ++ u16 incoming_cpu; + /* The last synq overflow event timestamp of this + * reuse->socks[] group. + */ +@@ -43,21 +44,21 @@ struct sock *reuseport_migrate_sock(struct sock *sk, + extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog); + extern int reuseport_detach_prog(struct sock *sk); + +-static inline bool reuseport_has_conns(struct sock *sk, bool set) ++static inline bool reuseport_has_conns(struct sock *sk) + { + struct sock_reuseport *reuse; + bool ret = false; + + rcu_read_lock(); + reuse = rcu_dereference(sk->sk_reuseport_cb); +- if (reuse) { +- if (set) +- reuse->has_conns = 1; +- ret = reuse->has_conns; +- } ++ if (reuse && reuse->has_conns) ++ ret = true; + rcu_read_unlock(); + + return ret; + } + ++void reuseport_has_conns_set(struct sock *sk); ++void reuseport_update_incoming_cpu(struct sock *sk, int val); ++ + #endif /* _SOCK_REUSEPORT_H */ +diff --git a/include/net/strparser.h b/include/net/strparser.h +index 1d20b98493a10..732b7097d78e4 100644 +--- a/include/net/strparser.h ++++ b/include/net/strparser.h +@@ -54,10 +54,28 @@ struct strp_msg { + int offset; + }; + ++struct _strp_msg { ++ /* Internal cb structure. struct strp_msg must be first for passing ++ * to upper layer. ++ */ ++ struct strp_msg strp; ++ int accum_len; ++}; ++ ++struct sk_skb_cb { ++#define SK_SKB_CB_PRIV_LEN 20 ++ unsigned char data[SK_SKB_CB_PRIV_LEN]; ++ struct _strp_msg strp; ++ /* temp_reg is a temporary register used for bpf_convert_data_end_access ++ * when dst_reg == src_reg. ++ */ ++ u64 temp_reg; ++}; ++ + static inline struct strp_msg *strp_msg(struct sk_buff *skb) + { + return (struct strp_msg *)((void *)skb->cb + +- offsetof(struct qdisc_skb_cb, data)); ++ offsetof(struct sk_skb_cb, strp)); + } + + /* Structure for an attached lower socket */ +diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h +index 748cf87a4d7ea..3e02709a1df65 100644 +--- a/include/net/tc_act/tc_pedit.h ++++ b/include/net/tc_act/tc_pedit.h +@@ -14,6 +14,7 @@ struct tcf_pedit { + struct tc_action common; + unsigned char tcfp_nkeys; + unsigned char tcfp_flags; ++ u32 tcfp_off_max_hint; + struct tc_pedit_key *tcfp_keys; + struct tcf_pedit_key_ex *tcfp_keys_ex; + }; +diff --git a/include/net/tcp.h b/include/net/tcp.h +index 60c384569e9cd..fdac6913b6c8f 100644 +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -48,7 +48,9 @@ + + extern struct inet_hashinfo tcp_hashinfo; + +-extern struct percpu_counter tcp_orphan_count; ++DECLARE_PER_CPU(unsigned int, tcp_orphan_count); ++int tcp_orphan_count_sum(void); ++ + void tcp_time_wait(struct sock *sk, int state, int timeo); + + #define MAX_TCP_HEADER L1_CACHE_ALIGN(128 + MAX_HEADER) +@@ -290,19 +292,6 @@ static inline bool tcp_out_of_memory(struct sock *sk) + + void sk_forced_mem_schedule(struct sock *sk, int size); + +-static inline bool tcp_too_many_orphans(struct sock *sk, int shift) +-{ +- struct percpu_counter *ocp = sk->sk_prot->orphan_count; +- int orphans = percpu_counter_read_positive(ocp); +- +- if (orphans << shift > sysctl_tcp_max_orphans) { +- orphans = percpu_counter_sum_positive(ocp); +- if (orphans << shift > sysctl_tcp_max_orphans) +- return true; +- } +- return false; +-} +- + bool tcp_check_oom(struct sock *sk, int shift); + + +@@ -481,6 +470,7 @@ int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, + u32 cookie); + struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); + struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, ++ const struct tcp_request_sock_ops *af_ops, + struct sock *sk, struct sk_buff *skb); + #ifdef CONFIG_SYN_COOKIES + +@@ -581,6 +571,8 @@ __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss); + #endif + /* tcp_output.c */ + ++void tcp_skb_entail(struct sock *sk, struct sk_buff *skb); ++void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb); + void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, + int nonagle); + int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs); +@@ -619,6 +611,7 @@ void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req); + void tcp_reset(struct sock *sk, struct sk_buff *skb); + void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb); + void tcp_fin(struct sock *sk); ++void tcp_check_space(struct sock *sk); + + /* tcp_timer.c */ + void tcp_init_xmit_timers(struct sock *); +@@ -928,7 +921,7 @@ extern const struct inet_connection_sock_af_ops ipv6_specific; + + INDIRECT_CALLABLE_DECLARE(void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)); + INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb)); +-INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *skb)); ++void tcp_v6_early_demux(struct sk_buff *skb); + + #endif + +@@ -1037,6 +1030,7 @@ struct rate_sample { + int losses; /* number of packets marked lost upon ACK */ + u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */ + u32 prior_in_flight; /* in flight before this ACK */ ++ u32 last_end_seq; /* end_seq of most recently ACKed packet */ + bool is_app_limited; /* is sample from packet with bubble in pipe? */ + bool is_retrans; /* is sample from retransmission? */ + bool is_ack_delayed; /* is this (likely) a delayed ACK? */ +@@ -1159,6 +1153,11 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, + bool is_sack_reneg, struct rate_sample *rs); + void tcp_rate_check_app_limited(struct sock *sk); + ++static inline bool tcp_skb_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2) ++{ ++ return t1 > t2 || (t1 == t2 && after(seq1, seq2)); ++} ++ + /* These functions determine how the current flow behaves in respect of SACK + * handling. SACK is negotiated with the peer, and therefore it can vary + * between different flows. +@@ -1202,9 +1201,20 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) + + #define TCP_INFINITE_SSTHRESH 0x7fffffff + ++static inline u32 tcp_snd_cwnd(const struct tcp_sock *tp) ++{ ++ return tp->snd_cwnd; ++} ++ ++static inline void tcp_snd_cwnd_set(struct tcp_sock *tp, u32 val) ++{ ++ WARN_ON_ONCE((int)val <= 0); ++ tp->snd_cwnd = val; ++} ++ + static inline bool tcp_in_slow_start(const struct tcp_sock *tp) + { +- return tp->snd_cwnd < tp->snd_ssthresh; ++ return tcp_snd_cwnd(tp) < tp->snd_ssthresh; + } + + static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp) +@@ -1230,8 +1240,8 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) + return tp->snd_ssthresh; + else + return max(tp->snd_ssthresh, +- ((tp->snd_cwnd >> 1) + +- (tp->snd_cwnd >> 2))); ++ ((tcp_snd_cwnd(tp) >> 1) + ++ (tcp_snd_cwnd(tp) >> 2))); + } + + /* Use define here intentionally to get WARN_ON location shown at the caller */ +@@ -1271,11 +1281,14 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk) + { + const struct tcp_sock *tp = tcp_sk(sk); + ++ if (tp->is_cwnd_limited) ++ return true; ++ + /* If in slow start, ensure cwnd grows to twice what was ACKed. */ + if (tcp_in_slow_start(tp)) +- return tp->snd_cwnd < 2 * tp->max_packets_out; ++ return tcp_snd_cwnd(tp) < 2 * tp->max_packets_out; + +- return tp->is_cwnd_limited; ++ return false; + } + + /* BBR congestion control needs pacing. +@@ -1382,8 +1395,8 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk) + struct tcp_sock *tp = tcp_sk(sk); + s32 delta; + +- if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out || +- ca_ops->cong_control) ++ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) || ++ tp->packets_out || ca_ops->cong_control) + return; + delta = tcp_jiffies32 - tp->lsndtime; + if (delta > inet_csk(sk)->icsk_rto) +@@ -1398,7 +1411,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, + + static inline int tcp_win_from_space(const struct sock *sk, int space) + { +- int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale; ++ int tcp_adv_win_scale = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale); + + return tcp_adv_win_scale <= 0 ? + (space>>(-tcp_adv_win_scale)) : +@@ -1461,21 +1474,24 @@ static inline int keepalive_intvl_when(const struct tcp_sock *tp) + { + struct net *net = sock_net((struct sock *)tp); + +- return tp->keepalive_intvl ? : net->ipv4.sysctl_tcp_keepalive_intvl; ++ return tp->keepalive_intvl ? : ++ READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl); + } + + static inline int keepalive_time_when(const struct tcp_sock *tp) + { + struct net *net = sock_net((struct sock *)tp); + +- return tp->keepalive_time ? : net->ipv4.sysctl_tcp_keepalive_time; ++ return tp->keepalive_time ? : ++ READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time); + } + + static inline int keepalive_probes(const struct tcp_sock *tp) + { + struct net *net = sock_net((struct sock *)tp); + +- return tp->keepalive_probes ? : net->ipv4.sysctl_tcp_keepalive_probes; ++ return tp->keepalive_probes ? : ++ READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes); + } + + static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) +@@ -1488,7 +1504,8 @@ static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) + + static inline int tcp_fin_time(const struct sock *sk) + { +- int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout; ++ int fin_timeout = tcp_sk(sk)->linger2 ? : ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fin_timeout); + const int rto = inet_csk(sk)->icsk_rto; + + if (fin_timeout < (rto << 2) - (rto >> 1)) +@@ -1982,7 +1999,7 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr); + static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) + { + struct net *net = sock_net((struct sock *)tp); +- return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat; ++ return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat); + } + + bool tcp_stream_memory_free(const struct sock *sk, int wake); +@@ -2226,8 +2243,8 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore); + void tcp_bpf_clone(const struct sock *sk, struct sock *newsk); + #endif /* CONFIG_BPF_SYSCALL */ + +-int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes, +- int flags); ++int tcp_bpf_sendmsg_redir(struct sock *sk, bool ingress, ++ struct sk_msg *msg, u32 bytes, int flags); + #endif /* CONFIG_NET_SOCK_MSG */ + + #if !defined(CONFIG_BPF_SYSCALL) || !defined(CONFIG_NET_SOCK_MSG) +diff --git a/include/net/tls.h b/include/net/tls.h +index 1fffb206f09f5..bf3d63a527885 100644 +--- a/include/net/tls.h ++++ b/include/net/tls.h +@@ -707,7 +707,7 @@ int tls_sw_fallback_init(struct sock *sk, + struct tls_crypto_info *crypto_info); + + #ifdef CONFIG_TLS_DEVICE +-void tls_device_init(void); ++int tls_device_init(void); + void tls_device_cleanup(void); + void tls_device_sk_destruct(struct sock *sk); + int tls_set_device_offload(struct sock *sk, struct tls_context *ctx); +@@ -727,7 +727,7 @@ static inline bool tls_is_sk_rx_device_offloaded(struct sock *sk) + return tls_get_ctx(sk)->rx_conf == TLS_HW; + } + #else +-static inline void tls_device_init(void) {} ++static inline int tls_device_init(void) { return 0; } + static inline void tls_device_cleanup(void) {} + + static inline int +diff --git a/include/net/udp.h b/include/net/udp.h +index 909ecf447e0fb..930666c0b6e50 100644 +--- a/include/net/udp.h ++++ b/include/net/udp.h +@@ -173,7 +173,7 @@ INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int)); + INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *, + struct sk_buff *)); + INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int)); +-INDIRECT_CALLABLE_DECLARE(void udp_v6_early_demux(struct sk_buff *)); ++void udp_v6_early_demux(struct sk_buff *skb); + INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *)); + + struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, +@@ -262,7 +262,7 @@ static inline bool udp_sk_bound_dev_eq(struct net *net, int bound_dev_if, + int dif, int sdif) + { + #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) +- return inet_bound_dev_eq(!!net->ipv4.sysctl_udp_l3mdev_accept, ++ return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_udp_l3mdev_accept), + bound_dev_if, dif, sdif); + #else + return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); +diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h +index afc7ce713657b..72394f441dad8 100644 +--- a/include/net/udp_tunnel.h ++++ b/include/net/udp_tunnel.h +@@ -67,6 +67,9 @@ static inline int udp_sock_create(struct net *net, + typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb); + typedef int (*udp_tunnel_encap_err_lookup_t)(struct sock *sk, + struct sk_buff *skb); ++typedef void (*udp_tunnel_encap_err_rcv_t)(struct sock *sk, ++ struct sk_buff *skb, ++ unsigned int udp_offset); + typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk); + typedef struct sk_buff *(*udp_tunnel_gro_receive_t)(struct sock *sk, + struct list_head *head, +@@ -80,6 +83,7 @@ struct udp_tunnel_sock_cfg { + __u8 encap_type; + udp_tunnel_encap_rcv_t encap_rcv; + udp_tunnel_encap_err_lookup_t encap_err_lookup; ++ udp_tunnel_encap_err_rcv_t encap_err_rcv; + udp_tunnel_encap_destroy_t encap_destroy; + udp_tunnel_gro_receive_t gro_receive; + udp_tunnel_gro_complete_t gro_complete; +diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h +index 4e295541e3967..ffe13a10bc963 100644 +--- a/include/net/xdp_sock_drv.h ++++ b/include/net/xdp_sock_drv.h +@@ -13,7 +13,7 @@ + + void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries); + bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc); +-u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *desc, u32 max); ++u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max); + void xsk_tx_release(struct xsk_buff_pool *pool); + struct xsk_buff_pool *xsk_get_pool_from_qid(struct net_device *dev, + u16 queue_id); +@@ -129,8 +129,7 @@ static inline bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, + return false; + } + +-static inline u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *desc, +- u32 max) ++static inline u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max) + { + return 0; + } +diff --git a/include/net/xfrm.h b/include/net/xfrm.h +index 2308210793a01..73030094c6e6f 100644 +--- a/include/net/xfrm.h ++++ b/include/net/xfrm.h +@@ -200,6 +200,11 @@ struct xfrm_state { + struct xfrm_algo_aead *aead; + const char *geniv; + ++ /* mapping change rate limiting */ ++ __be16 new_mapping_sport; ++ u32 new_mapping; /* seconds */ ++ u32 mapping_maxage; /* seconds for input SA */ ++ + /* Data for encapsulator */ + struct xfrm_encap_tmpl *encap; + struct sock __rcu *encap_sk; +@@ -1075,24 +1080,29 @@ xfrm_state_addr_cmp(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x, un + } + + #ifdef CONFIG_XFRM +-static inline bool +-xfrm_default_allow(struct net *net, int dir) +-{ +- u8 def = net->xfrm.policy_default; +- +- switch (dir) { +- case XFRM_POLICY_IN: +- return def & XFRM_POL_DEFAULT_IN ? false : true; +- case XFRM_POLICY_OUT: +- return def & XFRM_POL_DEFAULT_OUT ? false : true; +- case XFRM_POLICY_FWD: +- return def & XFRM_POL_DEFAULT_FWD ? false : true; +- } ++int __xfrm_policy_check(struct sock *, int dir, struct sk_buff *skb, ++ unsigned short family); ++ ++static inline bool __xfrm_check_nopolicy(struct net *net, struct sk_buff *skb, ++ int dir) ++{ ++ if (!net->xfrm.policy_count[dir] && !secpath_exists(skb)) ++ return net->xfrm.policy_default[dir] == XFRM_USERPOLICY_ACCEPT; ++ + return false; + } + +-int __xfrm_policy_check(struct sock *, int dir, struct sk_buff *skb, +- unsigned short family); ++static inline bool __xfrm_check_dev_nopolicy(struct sk_buff *skb, ++ int dir, unsigned short family) ++{ ++ if (dir != XFRM_POLICY_OUT && family == AF_INET) { ++ /* same dst may be used for traffic originating from ++ * devices with different policy settings. ++ */ ++ return IPCB(skb)->flags & IPSKB_NOPOLICY; ++ } ++ return skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY); ++} + + static inline int __xfrm_policy_check2(struct sock *sk, int dir, + struct sk_buff *skb, +@@ -1104,13 +1114,9 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir, + if (sk && sk->sk_policy[XFRM_POLICY_IN]) + return __xfrm_policy_check(sk, ndir, skb, family); + +- if (xfrm_default_allow(net, dir)) +- return (!net->xfrm.policy_count[dir] && !secpath_exists(skb)) || +- (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) || +- __xfrm_policy_check(sk, ndir, skb, family); +- else +- return (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) || +- __xfrm_policy_check(sk, ndir, skb, family); ++ return __xfrm_check_nopolicy(net, skb, dir) || ++ __xfrm_check_dev_nopolicy(skb, dir, family) || ++ __xfrm_policy_check(sk, ndir, skb, family); + } + + static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family) +@@ -1162,13 +1168,12 @@ static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family) + { + struct net *net = dev_net(skb->dev); + +- if (xfrm_default_allow(net, XFRM_POLICY_FWD)) +- return !net->xfrm.policy_count[XFRM_POLICY_OUT] || +- (skb_dst(skb)->flags & DST_NOXFRM) || +- __xfrm_route_forward(skb, family); +- else +- return (skb_dst(skb)->flags & DST_NOXFRM) || +- __xfrm_route_forward(skb, family); ++ if (!net->xfrm.policy_count[XFRM_POLICY_OUT] && ++ net->xfrm.policy_default[XFRM_POLICY_OUT] == XFRM_USERPOLICY_ACCEPT) ++ return true; ++ ++ return (skb_dst(skb)->flags & DST_NOXFRM) || ++ __xfrm_route_forward(skb, family); + } + + static inline int xfrm4_route_forward(struct sk_buff *skb) +@@ -1185,6 +1190,8 @@ int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk); + + static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) + { ++ if (!sk_fullsock(osk)) ++ return 0; + sk->sk_policy[0] = NULL; + sk->sk_policy[1] = NULL; + if (unlikely(osk->sk_policy[0] || osk->sk_policy[1])) +@@ -1562,7 +1569,6 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si); + void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si); + u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq); + int xfrm_init_replay(struct xfrm_state *x); +-u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu); + u32 xfrm_state_mtu(struct xfrm_state *x, int mtu); + int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload); + int xfrm_init_state(struct xfrm_state *x); +@@ -1675,14 +1681,15 @@ int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, + const struct xfrm_migrate *m, int num_bundles, + const struct xfrm_kmaddress *k, + const struct xfrm_encap_tmpl *encap); +-struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net); ++struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net, ++ u32 if_id); + struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x, + struct xfrm_migrate *m, + struct xfrm_encap_tmpl *encap); + int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, + struct xfrm_migrate *m, int num_bundles, + struct xfrm_kmaddress *k, struct net *net, +- struct xfrm_encap_tmpl *encap); ++ struct xfrm_encap_tmpl *encap, u32 if_id); + #endif + + int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport); +diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h +index 7a9a23e7a604a..7517f4faf6b32 100644 +--- a/include/net/xsk_buff_pool.h ++++ b/include/net/xsk_buff_pool.h +@@ -60,6 +60,7 @@ struct xsk_buff_pool { + */ + dma_addr_t *dma_pages; + struct xdp_buff_xsk *heads; ++ struct xdp_desc *tx_descs; + u64 chunk_mask; + u64 addrs_cnt; + u32 free_list_cnt; +@@ -86,8 +87,9 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, + struct xdp_umem *umem); + int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *dev, + u16 queue_id, u16 flags); +-int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem, ++int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_sock *umem_xs, + struct net_device *dev, u16 queue_id); ++int xp_alloc_tx_descs(struct xsk_buff_pool *pool, struct xdp_sock *xs); + void xp_destroy(struct xsk_buff_pool *pool); + void xp_release(struct xdp_buff_xsk *xskb); + void xp_get_pool(struct xsk_buff_pool *pool); +diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h +index 4b50d9a3018a6..4ba642fc8a19a 100644 +--- a/include/rdma/ib_verbs.h ++++ b/include/rdma/ib_verbs.h +@@ -4097,8 +4097,13 @@ static inline int ib_dma_map_sgtable_attrs(struct ib_device *dev, + enum dma_data_direction direction, + unsigned long dma_attrs) + { ++ int nents; ++ + if (ib_uses_virt_dma(dev)) { +- ib_dma_virt_map_sg(dev, sgt->sgl, sgt->orig_nents); ++ nents = ib_dma_virt_map_sg(dev, sgt->sgl, sgt->orig_nents); ++ if (!nents) ++ return -EIO; ++ sgt->nents = nents; + return 0; + } + return dma_map_sgtable(dev->dma_device, sgt, direction, dma_attrs); +diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h +index 2758d9df71ee9..c2a79aeee113c 100644 +--- a/include/rdma/rdma_netlink.h ++++ b/include/rdma/rdma_netlink.h +@@ -30,7 +30,7 @@ enum rdma_nl_flags { + * constant as well and the compiler checks they are the same. + */ + #define MODULE_ALIAS_RDMA_NETLINK(_index, _val) \ +- static inline void __chk_##_index(void) \ ++ static inline void __maybe_unused __chk_##_index(void) \ + { \ + BUILD_BUG_ON(_index != _val); \ + } \ +diff --git a/include/scsi/libfcoe.h b/include/scsi/libfcoe.h +index fac8e89aed81d..310e0dbffda99 100644 +--- a/include/scsi/libfcoe.h ++++ b/include/scsi/libfcoe.h +@@ -249,7 +249,8 @@ int fcoe_ctlr_recv_flogi(struct fcoe_ctlr *, struct fc_lport *, + struct fc_frame *); + + /* libfcoe funcs */ +-u64 fcoe_wwn_from_mac(unsigned char mac[MAX_ADDR_LEN], unsigned int, unsigned int); ++u64 fcoe_wwn_from_mac(unsigned char mac[ETH_ALEN], unsigned int scheme, ++ unsigned int port); + int fcoe_libfc_config(struct fc_lport *, struct fcoe_ctlr *, + const struct libfc_function_template *, int init_fcp); + u32 fcoe_fc_crc(struct fc_frame *fp); +diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h +index 4ee233e5a6ffa..c7ee5279e7fc9 100644 +--- a/include/scsi/libiscsi.h ++++ b/include/scsi/libiscsi.h +@@ -52,8 +52,10 @@ enum { + + #define ISID_SIZE 6 + +-/* Connection suspend "bit" */ +-#define ISCSI_SUSPEND_BIT 1 ++/* Connection flags */ ++#define ISCSI_CONN_FLAG_SUSPEND_TX 0 ++#define ISCSI_CONN_FLAG_SUSPEND_RX 1 ++#define ISCSI_CONN_FLAG_BOUND 2 + + #define ISCSI_ITT_MASK 0x1fff + #define ISCSI_TOTAL_CMDS_MAX 4096 +@@ -199,8 +201,9 @@ struct iscsi_conn { + struct list_head cmdqueue; /* data-path cmd queue */ + struct list_head requeue; /* tasks needing another run */ + struct work_struct xmitwork; /* per-conn. xmit workqueue */ +- unsigned long suspend_tx; /* suspend Tx */ +- unsigned long suspend_rx; /* suspend Rx */ ++ /* recv */ ++ struct work_struct recvwork; ++ unsigned long flags; /* ISCSI_CONN_FLAGs */ + + /* negotiated params */ + unsigned max_recv_dlength; /* initiator_max_recv_dsl*/ +@@ -399,7 +402,7 @@ extern int iscsi_host_add(struct Scsi_Host *shost, struct device *pdev); + extern struct Scsi_Host *iscsi_host_alloc(struct scsi_host_template *sht, + int dd_data_size, + bool xmit_can_sleep); +-extern void iscsi_host_remove(struct Scsi_Host *shost); ++extern void iscsi_host_remove(struct Scsi_Host *shost, bool is_shutdown); + extern void iscsi_host_free(struct Scsi_Host *shost); + extern int iscsi_target_alloc(struct scsi_target *starget); + extern int iscsi_host_get_max_scsi_cmds(struct Scsi_Host *shost, +@@ -411,6 +414,8 @@ extern int iscsi_host_get_max_scsi_cmds(struct Scsi_Host *shost, + extern struct iscsi_cls_session * + iscsi_session_setup(struct iscsi_transport *, struct Scsi_Host *shost, + uint16_t, int, int, uint32_t, unsigned int); ++void iscsi_session_remove(struct iscsi_cls_session *cls_session); ++void iscsi_session_free(struct iscsi_cls_session *cls_session); + extern void iscsi_session_teardown(struct iscsi_cls_session *); + extern void iscsi_session_recovery_timedout(struct iscsi_cls_session *); + extern int iscsi_set_param(struct iscsi_cls_conn *cls_conn, +@@ -440,8 +445,10 @@ extern int iscsi_conn_get_param(struct iscsi_cls_conn *cls_conn, + extern int iscsi_conn_get_addr_param(struct sockaddr_storage *addr, + enum iscsi_param param, char *buf); + extern void iscsi_suspend_tx(struct iscsi_conn *conn); ++extern void iscsi_suspend_rx(struct iscsi_conn *conn); + extern void iscsi_suspend_queue(struct iscsi_conn *conn); +-extern void iscsi_conn_queue_work(struct iscsi_conn *conn); ++extern void iscsi_conn_queue_xmit(struct iscsi_conn *conn); ++extern void iscsi_conn_queue_recv(struct iscsi_conn *conn); + + #define iscsi_conn_printk(prefix, _c, fmt, a...) \ + iscsi_cls_conn_printk(prefix, ((struct iscsi_conn *)_c)->cls_conn, \ +diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h +index eaf04c9a1dfcb..685249233f2fe 100644 +--- a/include/scsi/scsi_cmnd.h ++++ b/include/scsi/scsi_cmnd.h +@@ -68,7 +68,7 @@ struct scsi_pointer { + struct scsi_cmnd { + struct scsi_request req; + struct scsi_device *device; +- struct list_head eh_entry; /* entry for the host eh_cmd_q */ ++ struct list_head eh_entry; /* entry for the host eh_abort_list/eh_cmd_q */ + struct delayed_work abort_work; + + struct rcu_head rcu; +@@ -211,7 +211,7 @@ static inline unsigned int scsi_get_resid(struct scsi_cmnd *cmd) + for_each_sg(scsi_sglist(cmd), sg, nseg, __i) + + static inline int scsi_sg_copy_from_buffer(struct scsi_cmnd *cmd, +- void *buf, int buflen) ++ const void *buf, int buflen) + { + return sg_copy_from_buffer(scsi_sglist(cmd), scsi_sg_count(cmd), + buf, buflen); +diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h +index b97e142a7ca92..3b3dbc37653da 100644 +--- a/include/scsi/scsi_device.h ++++ b/include/scsi/scsi_device.h +@@ -206,6 +206,7 @@ struct scsi_device { + unsigned rpm_autosuspend:1; /* Enable runtime autosuspend at device + * creation time */ + unsigned ignore_media_change:1; /* Ignore MEDIA CHANGE on resume */ ++ unsigned silence_suspend:1; /* Do not print runtime PM related messages */ + + bool offline_already; /* Device offline message logged */ + +diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h +index 75363707b73f9..1a02e58eb4e44 100644 +--- a/include/scsi/scsi_host.h ++++ b/include/scsi/scsi_host.h +@@ -556,6 +556,7 @@ struct Scsi_Host { + + struct mutex scan_mutex;/* serialize scanning activity */ + ++ struct list_head eh_abort_list; + struct list_head eh_cmd_q; + struct task_struct * ehandler; /* Error recovery thread. */ + struct completion * eh_action; /* Wait for specific actions on the +diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h +index c5d7810fd7926..304ccf1539285 100644 +--- a/include/scsi/scsi_transport_iscsi.h ++++ b/include/scsi/scsi_transport_iscsi.h +@@ -211,6 +211,8 @@ struct iscsi_cls_conn { + struct mutex ep_mutex; + struct iscsi_endpoint *ep; + ++ /* Used when accessing flags and queueing work. */ ++ spinlock_t lock; + unsigned long flags; + struct work_struct cleanup_work; + +@@ -234,6 +236,14 @@ enum { + ISCSI_SESSION_FREE, + }; + ++enum { ++ ISCSI_SESSION_TARGET_UNBOUND, ++ ISCSI_SESSION_TARGET_ALLOCATED, ++ ISCSI_SESSION_TARGET_SCANNED, ++ ISCSI_SESSION_TARGET_UNBINDING, ++ ISCSI_SESSION_TARGET_MAX, ++}; ++ + #define ISCSI_MAX_TARGET -1 + + struct iscsi_cls_session { +@@ -260,6 +270,7 @@ struct iscsi_cls_session { + */ + pid_t creator; + int state; ++ int target_state; /* session target bind state */ + int sid; /* session id */ + void *dd_data; /* LLD private data */ + struct device dev; /* sysfs transport/container device */ +@@ -294,7 +305,7 @@ extern void iscsi_host_for_each_session(struct Scsi_Host *shost, + struct iscsi_endpoint { + void *dd_data; /* LLD private data */ + struct device dev; +- uint64_t id; ++ int id; + struct iscsi_cls_conn *conn; + }; + +@@ -439,6 +450,7 @@ extern struct iscsi_cls_session *iscsi_create_session(struct Scsi_Host *shost, + struct iscsi_transport *t, + int dd_size, + unsigned int target_id); ++extern void iscsi_force_destroy_session(struct iscsi_cls_session *session); + extern void iscsi_remove_session(struct iscsi_cls_session *session); + extern void iscsi_free_session(struct iscsi_cls_session *session); + extern struct iscsi_cls_conn *iscsi_create_conn(struct iscsi_cls_session *sess, +diff --git a/include/soc/at91/sama7-ddr.h b/include/soc/at91/sama7-ddr.h +index f6542584ca139..cac3f9cd25f9c 100644 +--- a/include/soc/at91/sama7-ddr.h ++++ b/include/soc/at91/sama7-ddr.h +@@ -11,8 +11,6 @@ + #ifndef __SAMA7_DDR_H__ + #define __SAMA7_DDR_H__ + +-#ifdef CONFIG_SOC_SAMA7 +- + /* DDR3PHY */ + #define DDR3PHY_PIR (0x04) /* DDR3PHY PHY Initialization Register */ + #define DDR3PHY_PIR_DLLBYP (1 << 17) /* DLL Bypass */ +@@ -28,7 +26,10 @@ + #define DDR3PHY_PGSR (0x0C) /* DDR3PHY PHY General Status Register */ + #define DDR3PHY_PGSR_IDONE (1 << 0) /* Initialization Done */ + +-#define DDR3PHY_ACIOCR (0x24) /* DDR3PHY AC I/O Configuration Register */ ++#define DDR3PHY_ACDLLCR (0x14) /* DDR3PHY AC DLL Control Register */ ++#define DDR3PHY_ACDLLCR_DLLSRST (1 << 30) /* DLL Soft Reset */ ++ ++#define DDR3PHY_ACIOCR (0x24) /* DDR3PHY AC I/O Configuration Register */ + #define DDR3PHY_ACIOCR_CSPDD_CS0 (1 << 18) /* CS#[0] Power Down Driver */ + #define DDR3PHY_ACIOCR_CKPDD_CK0 (1 << 8) /* CK[0] Power Down Driver */ + #define DDR3PHY_ACIORC_ACPDD (1 << 3) /* AC Power Down Driver */ +@@ -40,6 +41,14 @@ + #define DDR3PHY_DSGCR_ODTPDD_ODT0 (1 << 20) /* ODT[0] Power Down Driver */ + + #define DDR3PHY_ZQ0SR0 (0x188) /* ZQ status register 0 */ ++#define DDR3PHY_ZQ0SR0_PDO_OFF (0) /* Pull-down output impedance select offset */ ++#define DDR3PHY_ZQ0SR0_PUO_OFF (5) /* Pull-up output impedance select offset */ ++#define DDR3PHY_ZQ0SR0_PDODT_OFF (10) /* Pull-down on-die termination impedance select offset */ ++#define DDR3PHY_ZQ0SRO_PUODT_OFF (15) /* Pull-up on-die termination impedance select offset */ ++ ++#define DDR3PHY_DX0DLLCR (0x1CC) /* DDR3PHY DATX8 DLL Control Register */ ++#define DDR3PHY_DX1DLLCR (0x20C) /* DDR3PHY DATX8 DLL Control Register */ ++#define DDR3PHY_DXDLLCR_DLLDIS (1 << 31) /* DLL Disable */ + + /* UDDRC */ + #define UDDRC_STAT (0x04) /* UDDRC Operating Mode Status Register */ +@@ -75,6 +84,4 @@ + #define UDDRC_PCTRL_3 (0x6A0) /* UDDRC Port 3 Control Register */ + #define UDDRC_PCTRL_4 (0x750) /* UDDRC Port 4 Control Register */ + +-#endif /* CONFIG_SOC_SAMA7 */ +- + #endif /* __SAMA7_DDR_H__ */ +diff --git a/include/sound/control.h b/include/sound/control.h +index 985c51a8fb748..a1fc7e0a47d95 100644 +--- a/include/sound/control.h ++++ b/include/sound/control.h +@@ -109,7 +109,7 @@ struct snd_ctl_file { + int preferred_subdevice[SND_CTL_SUBDEV_ITEMS]; + wait_queue_head_t change_sleep; + spinlock_t read_lock; +- struct fasync_struct *fasync; ++ struct snd_fasync *fasync; + int subscribed; /* read interface is activated */ + struct list_head events; /* waiting events for read */ + }; +diff --git a/include/sound/core.h b/include/sound/core.h +index b7e9b58d3c788..39cee40ac22e0 100644 +--- a/include/sound/core.h ++++ b/include/sound/core.h +@@ -284,6 +284,7 @@ int snd_card_disconnect(struct snd_card *card); + void snd_card_disconnect_sync(struct snd_card *card); + int snd_card_free(struct snd_card *card); + int snd_card_free_when_closed(struct snd_card *card); ++int snd_card_free_on_error(struct device *dev, int ret); + void snd_card_set_id(struct snd_card *card, const char *id); + int snd_card_register(struct snd_card *card); + int snd_card_info_init(void); +@@ -500,4 +501,12 @@ snd_pci_quirk_lookup_id(u16 vendor, u16 device, + } + #endif + ++/* async signal helpers */ ++struct snd_fasync; ++ ++int snd_fasync_helper(int fd, struct file *file, int on, ++ struct snd_fasync **fasyncp); ++void snd_kill_fasync(struct snd_fasync *fasync, int signal, int poll); ++void snd_fasync_free(struct snd_fasync *fasync); ++ + #endif /* __SOUND_CORE_H */ +diff --git a/include/sound/hda_codec.h b/include/sound/hda_codec.h +index 0e45963bb767f..82d9daa178517 100644 +--- a/include/sound/hda_codec.h ++++ b/include/sound/hda_codec.h +@@ -8,7 +8,7 @@ + #ifndef __SOUND_HDA_CODEC_H + #define __SOUND_HDA_CODEC_H + +-#include <linux/kref.h> ++#include <linux/refcount.h> + #include <linux/mod_devicetable.h> + #include <sound/info.h> + #include <sound/control.h> +@@ -166,8 +166,8 @@ struct hda_pcm { + bool own_chmap; /* codec driver provides own channel maps */ + /* private: */ + struct hda_codec *codec; +- struct kref kref; + struct list_head list; ++ unsigned int disconnected:1; + }; + + /* codec information */ +@@ -187,6 +187,8 @@ struct hda_codec { + + /* PCM to create, set by patch_ops.build_pcms callback */ + struct list_head pcm_list_head; ++ refcount_t pcm_ref; ++ wait_queue_head_t remove_sleep; + + /* codec specific info */ + void *spec; +@@ -420,7 +422,7 @@ void snd_hda_codec_cleanup_for_unbind(struct hda_codec *codec); + + static inline void snd_hda_codec_pcm_get(struct hda_pcm *pcm) + { +- kref_get(&pcm->kref); ++ refcount_inc(&pcm->codec->pcm_ref); + } + void snd_hda_codec_pcm_put(struct hda_pcm *pcm); + +diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h +index 22af68b014262..658fccdc8660f 100644 +--- a/include/sound/hdaudio.h ++++ b/include/sound/hdaudio.h +@@ -558,6 +558,8 @@ int snd_hdac_stream_set_params(struct hdac_stream *azx_dev, + void snd_hdac_stream_start(struct hdac_stream *azx_dev, bool fresh_start); + void snd_hdac_stream_clear(struct hdac_stream *azx_dev); + void snd_hdac_stream_stop(struct hdac_stream *azx_dev); ++void snd_hdac_stop_streams(struct hdac_bus *bus); ++void snd_hdac_stop_streams_and_chip(struct hdac_bus *bus); + void snd_hdac_stream_reset(struct hdac_stream *azx_dev); + void snd_hdac_stream_sync_trigger(struct hdac_stream *azx_dev, bool set, + unsigned int streams, unsigned int reg); +diff --git a/include/sound/hdaudio_ext.h b/include/sound/hdaudio_ext.h +index 375581634143c..56ea5cde5e63a 100644 +--- a/include/sound/hdaudio_ext.h ++++ b/include/sound/hdaudio_ext.h +@@ -88,9 +88,10 @@ struct hdac_ext_stream *snd_hdac_ext_stream_assign(struct hdac_bus *bus, + struct snd_pcm_substream *substream, + int type); + void snd_hdac_ext_stream_release(struct hdac_ext_stream *azx_dev, int type); ++void snd_hdac_ext_stream_decouple_locked(struct hdac_bus *bus, ++ struct hdac_ext_stream *azx_dev, bool decouple); + void snd_hdac_ext_stream_decouple(struct hdac_bus *bus, + struct hdac_ext_stream *azx_dev, bool decouple); +-void snd_hdac_ext_stop_streams(struct hdac_bus *bus); + + int snd_hdac_ext_stream_set_spib(struct hdac_bus *bus, + struct hdac_ext_stream *stream, u32 value); +diff --git a/include/sound/jack.h b/include/sound/jack.h +index 1181f536557eb..1ed90e2109e9b 100644 +--- a/include/sound/jack.h ++++ b/include/sound/jack.h +@@ -62,6 +62,7 @@ struct snd_jack { + const char *id; + #ifdef CONFIG_SND_JACK_INPUT_DEV + struct input_dev *input_dev; ++ struct mutex input_dev_lock; + int registered; + int type; + char name[100]; +diff --git a/include/sound/pcm.h b/include/sound/pcm.h +index 33451f8ff755b..181df0452ae2e 100644 +--- a/include/sound/pcm.h ++++ b/include/sound/pcm.h +@@ -106,24 +106,24 @@ struct snd_pcm_ops { + #define SNDRV_PCM_POS_XRUN ((snd_pcm_uframes_t)-1) + + /* If you change this don't forget to change rates[] table in pcm_native.c */ +-#define SNDRV_PCM_RATE_5512 (1<<0) /* 5512Hz */ +-#define SNDRV_PCM_RATE_8000 (1<<1) /* 8000Hz */ +-#define SNDRV_PCM_RATE_11025 (1<<2) /* 11025Hz */ +-#define SNDRV_PCM_RATE_16000 (1<<3) /* 16000Hz */ +-#define SNDRV_PCM_RATE_22050 (1<<4) /* 22050Hz */ +-#define SNDRV_PCM_RATE_32000 (1<<5) /* 32000Hz */ +-#define SNDRV_PCM_RATE_44100 (1<<6) /* 44100Hz */ +-#define SNDRV_PCM_RATE_48000 (1<<7) /* 48000Hz */ +-#define SNDRV_PCM_RATE_64000 (1<<8) /* 64000Hz */ +-#define SNDRV_PCM_RATE_88200 (1<<9) /* 88200Hz */ +-#define SNDRV_PCM_RATE_96000 (1<<10) /* 96000Hz */ +-#define SNDRV_PCM_RATE_176400 (1<<11) /* 176400Hz */ +-#define SNDRV_PCM_RATE_192000 (1<<12) /* 192000Hz */ +-#define SNDRV_PCM_RATE_352800 (1<<13) /* 352800Hz */ +-#define SNDRV_PCM_RATE_384000 (1<<14) /* 384000Hz */ +- +-#define SNDRV_PCM_RATE_CONTINUOUS (1<<30) /* continuous range */ +-#define SNDRV_PCM_RATE_KNOT (1<<31) /* supports more non-continuos rates */ ++#define SNDRV_PCM_RATE_5512 (1U<<0) /* 5512Hz */ ++#define SNDRV_PCM_RATE_8000 (1U<<1) /* 8000Hz */ ++#define SNDRV_PCM_RATE_11025 (1U<<2) /* 11025Hz */ ++#define SNDRV_PCM_RATE_16000 (1U<<3) /* 16000Hz */ ++#define SNDRV_PCM_RATE_22050 (1U<<4) /* 22050Hz */ ++#define SNDRV_PCM_RATE_32000 (1U<<5) /* 32000Hz */ ++#define SNDRV_PCM_RATE_44100 (1U<<6) /* 44100Hz */ ++#define SNDRV_PCM_RATE_48000 (1U<<7) /* 48000Hz */ ++#define SNDRV_PCM_RATE_64000 (1U<<8) /* 64000Hz */ ++#define SNDRV_PCM_RATE_88200 (1U<<9) /* 88200Hz */ ++#define SNDRV_PCM_RATE_96000 (1U<<10) /* 96000Hz */ ++#define SNDRV_PCM_RATE_176400 (1U<<11) /* 176400Hz */ ++#define SNDRV_PCM_RATE_192000 (1U<<12) /* 192000Hz */ ++#define SNDRV_PCM_RATE_352800 (1U<<13) /* 352800Hz */ ++#define SNDRV_PCM_RATE_384000 (1U<<14) /* 384000Hz */ ++ ++#define SNDRV_PCM_RATE_CONTINUOUS (1U<<30) /* continuous range */ ++#define SNDRV_PCM_RATE_KNOT (1U<<31) /* supports more non-continuos rates */ + + #define SNDRV_PCM_RATE_8000_44100 (SNDRV_PCM_RATE_8000|SNDRV_PCM_RATE_11025|\ + SNDRV_PCM_RATE_16000|SNDRV_PCM_RATE_22050|\ +@@ -398,6 +398,8 @@ struct snd_pcm_runtime { + wait_queue_head_t tsleep; /* transfer sleep */ + struct fasync_struct *fasync; + bool stop_operating; /* sync_stop will be called */ ++ struct mutex buffer_mutex; /* protect for buffer changes */ ++ atomic_t buffer_accessing; /* >0: in r/w operation, <0: blocked */ + + /* -- private section -- */ + void *private_data; +diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h +index 0dcb361a98bb3..ef3bb1bcea4e0 100644 +--- a/include/sound/soc-dai.h ++++ b/include/sound/soc-dai.h +@@ -295,9 +295,9 @@ struct snd_soc_dai_ops { + unsigned int *rx_num, unsigned int *rx_slot); + int (*set_tristate)(struct snd_soc_dai *dai, int tristate); + +- int (*set_sdw_stream)(struct snd_soc_dai *dai, +- void *stream, int direction); +- void *(*get_sdw_stream)(struct snd_soc_dai *dai, int direction); ++ int (*set_stream)(struct snd_soc_dai *dai, ++ void *stream, int direction); ++ void *(*get_stream)(struct snd_soc_dai *dai, int direction); + + /* + * DAI digital mute - optional. +@@ -515,42 +515,42 @@ static inline void *snd_soc_dai_get_drvdata(struct snd_soc_dai *dai) + } + + /** +- * snd_soc_dai_set_sdw_stream() - Configures a DAI for SDW stream operation ++ * snd_soc_dai_set_stream() - Configures a DAI for stream operation + * @dai: DAI +- * @stream: STREAM ++ * @stream: STREAM (opaque structure depending on DAI type) + * @direction: Stream direction(Playback/Capture) +- * SoundWire subsystem doesn't have a notion of direction and we reuse ++ * Some subsystems, such as SoundWire, don't have a notion of direction and we reuse + * the ASoC stream direction to configure sink/source ports. + * Playback maps to source ports and Capture for sink ports. + * + * This should be invoked with NULL to clear the stream set previously. + * Returns 0 on success, a negative error code otherwise. + */ +-static inline int snd_soc_dai_set_sdw_stream(struct snd_soc_dai *dai, +- void *stream, int direction) ++static inline int snd_soc_dai_set_stream(struct snd_soc_dai *dai, ++ void *stream, int direction) + { +- if (dai->driver->ops->set_sdw_stream) +- return dai->driver->ops->set_sdw_stream(dai, stream, direction); ++ if (dai->driver->ops->set_stream) ++ return dai->driver->ops->set_stream(dai, stream, direction); + else + return -ENOTSUPP; + } + + /** +- * snd_soc_dai_get_sdw_stream() - Retrieves SDW stream from DAI ++ * snd_soc_dai_get_stream() - Retrieves stream from DAI + * @dai: DAI + * @direction: Stream direction(Playback/Capture) + * + * This routine only retrieves that was previously configured +- * with snd_soc_dai_get_sdw_stream() ++ * with snd_soc_dai_get_stream() + * + * Returns pointer to stream or an ERR_PTR value, e.g. + * ERR_PTR(-ENOTSUPP) if callback is not supported; + */ +-static inline void *snd_soc_dai_get_sdw_stream(struct snd_soc_dai *dai, +- int direction) ++static inline void *snd_soc_dai_get_stream(struct snd_soc_dai *dai, ++ int direction) + { +- if (dai->driver->ops->get_sdw_stream) +- return dai->driver->ops->get_sdw_stream(dai, direction); ++ if (dai->driver->ops->get_stream) ++ return dai->driver->ops->get_stream(dai, direction); + else + return ERR_PTR(-ENOTSUPP); + } +diff --git a/include/sound/soc-topology.h b/include/sound/soc-topology.h +index 4afd667e124c2..3e8a85e1e8094 100644 +--- a/include/sound/soc-topology.h ++++ b/include/sound/soc-topology.h +@@ -188,8 +188,7 @@ int snd_soc_tplg_widget_bind_event(struct snd_soc_dapm_widget *w, + + #else + +-static inline int snd_soc_tplg_component_remove(struct snd_soc_component *comp, +- u32 index) ++static inline int snd_soc_tplg_component_remove(struct snd_soc_component *comp) + { + return 0; + } +diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h +index fb11c7693b257..2121a323fd6c3 100644 +--- a/include/target/target_core_base.h ++++ b/include/target/target_core_base.h +@@ -812,8 +812,9 @@ struct se_device { + atomic_long_t read_bytes; + atomic_long_t write_bytes; + /* Active commands on this virtual SE device */ +- atomic_t simple_cmds; +- atomic_t dev_ordered_sync; ++ atomic_t non_ordered; ++ bool ordered_sync_in_progress; ++ atomic_t delayed_cmd_count; + atomic_t dev_qf_count; + u32 export_count; + spinlock_t delayed_cmd_lock; +@@ -834,6 +835,7 @@ struct se_device { + struct list_head dev_sep_list; + struct list_head dev_tmr_list; + struct work_struct qf_work_queue; ++ struct work_struct delayed_cmd_work; + struct list_head delayed_cmd_list; + struct list_head qf_cmd_list; + /* Pointer to associated SE HBA */ +diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h +index a23be89119aa5..04939b2d2f192 100644 +--- a/include/trace/bpf_probe.h ++++ b/include/trace/bpf_probe.h +@@ -21,6 +21,22 @@ + #undef __get_bitmask + #define __get_bitmask(field) (char *)__get_dynamic_array(field) + ++#undef __get_rel_dynamic_array ++#define __get_rel_dynamic_array(field) \ ++ ((void *)(&__entry->__rel_loc_##field) + \ ++ sizeof(__entry->__rel_loc_##field) + \ ++ (__entry->__rel_loc_##field & 0xffff)) ++ ++#undef __get_rel_dynamic_array_len ++#define __get_rel_dynamic_array_len(field) \ ++ ((__entry->__rel_loc_##field >> 16) & 0xffff) ++ ++#undef __get_rel_str ++#define __get_rel_str(field) ((char *)__get_rel_dynamic_array(field)) ++ ++#undef __get_rel_bitmask ++#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field) ++ + #undef __perf_count + #define __perf_count(c) (c) + +diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h +index 8f58fd95efc74..9271b5dfae4c4 100644 +--- a/include/trace/events/btrfs.h ++++ b/include/trace/events/btrfs.h +@@ -96,7 +96,7 @@ struct btrfs_space_info; + EM( FLUSH_DELALLOC_WAIT, "FLUSH_DELALLOC_WAIT") \ + EM( FLUSH_DELALLOC_FULL, "FLUSH_DELALLOC_FULL") \ + EM( FLUSH_DELAYED_REFS_NR, "FLUSH_DELAYED_REFS_NR") \ +- EM( FLUSH_DELAYED_REFS, "FLUSH_ELAYED_REFS") \ ++ EM( FLUSH_DELAYED_REFS, "FLUSH_DELAYED_REFS") \ + EM( ALLOC_CHUNK, "ALLOC_CHUNK") \ + EM( ALLOC_CHUNK_FORCE, "ALLOC_CHUNK_FORCE") \ + EM( RUN_DELAYED_IPUTS, "RUN_DELAYED_IPUTS") \ +diff --git a/include/trace/events/cgroup.h b/include/trace/events/cgroup.h +index 7f42a3de59e6b..dd7d7c9efecdf 100644 +--- a/include/trace/events/cgroup.h ++++ b/include/trace/events/cgroup.h +@@ -59,8 +59,8 @@ DECLARE_EVENT_CLASS(cgroup, + + TP_STRUCT__entry( + __field( int, root ) +- __field( int, id ) + __field( int, level ) ++ __field( u64, id ) + __string( path, path ) + ), + +@@ -71,7 +71,7 @@ DECLARE_EVENT_CLASS(cgroup, + __assign_str(path, path); + ), + +- TP_printk("root=%d id=%d level=%d path=%s", ++ TP_printk("root=%d id=%llu level=%d path=%s", + __entry->root, __entry->id, __entry->level, __get_str(path)) + ); + +@@ -126,8 +126,8 @@ DECLARE_EVENT_CLASS(cgroup_migrate, + + TP_STRUCT__entry( + __field( int, dst_root ) +- __field( int, dst_id ) + __field( int, dst_level ) ++ __field( u64, dst_id ) + __field( int, pid ) + __string( dst_path, path ) + __string( comm, task->comm ) +@@ -142,7 +142,7 @@ DECLARE_EVENT_CLASS(cgroup_migrate, + __assign_str(comm, task->comm); + ), + +- TP_printk("dst_root=%d dst_id=%d dst_level=%d dst_path=%s pid=%d comm=%s", ++ TP_printk("dst_root=%d dst_id=%llu dst_level=%d dst_path=%s pid=%d comm=%s", + __entry->dst_root, __entry->dst_id, __entry->dst_level, + __get_str(dst_path), __entry->pid, __get_str(comm)) + ); +@@ -171,8 +171,8 @@ DECLARE_EVENT_CLASS(cgroup_event, + + TP_STRUCT__entry( + __field( int, root ) +- __field( int, id ) + __field( int, level ) ++ __field( u64, id ) + __string( path, path ) + __field( int, val ) + ), +@@ -185,7 +185,7 @@ DECLARE_EVENT_CLASS(cgroup_event, + __entry->val = val; + ), + +- TP_printk("root=%d id=%d level=%d path=%s val=%d", ++ TP_printk("root=%d id=%llu level=%d path=%s val=%d", + __entry->root, __entry->id, __entry->level, __get_str(path), + __entry->val) + ); +diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h +index 0ea36b2b0662a..c649c7fcb9afb 100644 +--- a/include/trace/events/ext4.h ++++ b/include/trace/events/ext4.h +@@ -95,6 +95,18 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B); + { FALLOC_FL_COLLAPSE_RANGE, "COLLAPSE_RANGE"}, \ + { FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"}) + ++TRACE_DEFINE_ENUM(EXT4_FC_REASON_XATTR); ++TRACE_DEFINE_ENUM(EXT4_FC_REASON_CROSS_RENAME); ++TRACE_DEFINE_ENUM(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE); ++TRACE_DEFINE_ENUM(EXT4_FC_REASON_NOMEM); ++TRACE_DEFINE_ENUM(EXT4_FC_REASON_SWAP_BOOT); ++TRACE_DEFINE_ENUM(EXT4_FC_REASON_RESIZE); ++TRACE_DEFINE_ENUM(EXT4_FC_REASON_RENAME_DIR); ++TRACE_DEFINE_ENUM(EXT4_FC_REASON_FALLOC_RANGE); ++TRACE_DEFINE_ENUM(EXT4_FC_REASON_INODE_JOURNAL_DATA); ++TRACE_DEFINE_ENUM(EXT4_FC_REASON_ENCRYPTED_FILENAME); ++TRACE_DEFINE_ENUM(EXT4_FC_REASON_MAX); ++ + #define show_fc_reason(reason) \ + __print_symbolic(reason, \ + { EXT4_FC_REASON_XATTR, "XATTR"}, \ +@@ -105,7 +117,8 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B); + { EXT4_FC_REASON_RESIZE, "RESIZE"}, \ + { EXT4_FC_REASON_RENAME_DIR, "RENAME_DIR"}, \ + { EXT4_FC_REASON_FALLOC_RANGE, "FALLOC_RANGE"}, \ +- { EXT4_FC_REASON_INODE_JOURNAL_DATA, "INODE_JOURNAL_DATA"}) ++ { EXT4_FC_REASON_INODE_JOURNAL_DATA, "INODE_JOURNAL_DATA"}, \ ++ { EXT4_FC_REASON_ENCRYPTED_FILENAME, "ENCRYPTED_FILENAME"}) + + TRACE_EVENT(ext4_other_inode_update_time, + TP_PROTO(struct inode *inode, ino_t orig_ino), +@@ -2723,41 +2736,51 @@ TRACE_EVENT(ext4_fc_commit_stop, + + #define FC_REASON_NAME_STAT(reason) \ + show_fc_reason(reason), \ +- __entry->sbi->s_fc_stats.fc_ineligible_reason_count[reason] ++ __entry->fc_ineligible_rc[reason] + + TRACE_EVENT(ext4_fc_stats, +- TP_PROTO(struct super_block *sb), +- +- TP_ARGS(sb), ++ TP_PROTO(struct super_block *sb), + +- TP_STRUCT__entry( +- __field(dev_t, dev) +- __field(struct ext4_sb_info *, sbi) +- __field(int, count) +- ), ++ TP_ARGS(sb), + +- TP_fast_assign( +- __entry->dev = sb->s_dev; +- __entry->sbi = EXT4_SB(sb); +- ), ++ TP_STRUCT__entry( ++ __field(dev_t, dev) ++ __array(unsigned int, fc_ineligible_rc, EXT4_FC_REASON_MAX) ++ __field(unsigned long, fc_commits) ++ __field(unsigned long, fc_ineligible_commits) ++ __field(unsigned long, fc_numblks) ++ ), + +- TP_printk("dev %d:%d fc ineligible reasons:\n" +- "%s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d; " +- "num_commits:%ld, ineligible: %ld, numblks: %ld", +- MAJOR(__entry->dev), MINOR(__entry->dev), +- FC_REASON_NAME_STAT(EXT4_FC_REASON_XATTR), +- FC_REASON_NAME_STAT(EXT4_FC_REASON_CROSS_RENAME), +- FC_REASON_NAME_STAT(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE), +- FC_REASON_NAME_STAT(EXT4_FC_REASON_NOMEM), +- FC_REASON_NAME_STAT(EXT4_FC_REASON_SWAP_BOOT), +- FC_REASON_NAME_STAT(EXT4_FC_REASON_RESIZE), +- FC_REASON_NAME_STAT(EXT4_FC_REASON_RENAME_DIR), +- FC_REASON_NAME_STAT(EXT4_FC_REASON_FALLOC_RANGE), +- FC_REASON_NAME_STAT(EXT4_FC_REASON_INODE_JOURNAL_DATA), +- __entry->sbi->s_fc_stats.fc_num_commits, +- __entry->sbi->s_fc_stats.fc_ineligible_commits, +- __entry->sbi->s_fc_stats.fc_numblks) ++ TP_fast_assign( ++ int i; + ++ __entry->dev = sb->s_dev; ++ for (i = 0; i < EXT4_FC_REASON_MAX; i++) { ++ __entry->fc_ineligible_rc[i] = ++ EXT4_SB(sb)->s_fc_stats.fc_ineligible_reason_count[i]; ++ } ++ __entry->fc_commits = EXT4_SB(sb)->s_fc_stats.fc_num_commits; ++ __entry->fc_ineligible_commits = ++ EXT4_SB(sb)->s_fc_stats.fc_ineligible_commits; ++ __entry->fc_numblks = EXT4_SB(sb)->s_fc_stats.fc_numblks; ++ ), ++ ++ TP_printk("dev %d,%d fc ineligible reasons:\n" ++ "%s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u" ++ "num_commits:%lu, ineligible: %lu, numblks: %lu", ++ MAJOR(__entry->dev), MINOR(__entry->dev), ++ FC_REASON_NAME_STAT(EXT4_FC_REASON_XATTR), ++ FC_REASON_NAME_STAT(EXT4_FC_REASON_CROSS_RENAME), ++ FC_REASON_NAME_STAT(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE), ++ FC_REASON_NAME_STAT(EXT4_FC_REASON_NOMEM), ++ FC_REASON_NAME_STAT(EXT4_FC_REASON_SWAP_BOOT), ++ FC_REASON_NAME_STAT(EXT4_FC_REASON_RESIZE), ++ FC_REASON_NAME_STAT(EXT4_FC_REASON_RENAME_DIR), ++ FC_REASON_NAME_STAT(EXT4_FC_REASON_FALLOC_RANGE), ++ FC_REASON_NAME_STAT(EXT4_FC_REASON_INODE_JOURNAL_DATA), ++ FC_REASON_NAME_STAT(EXT4_FC_REASON_ENCRYPTED_FILENAME), ++ __entry->fc_commits, __entry->fc_ineligible_commits, ++ __entry->fc_numblks) + ); + + #define DEFINE_TRACE_DENTRY_EVENT(__type) \ +diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h +index 4e881d91c8744..4cb055af1ec0b 100644 +--- a/include/trace/events/f2fs.h ++++ b/include/trace/events/f2fs.h +@@ -807,20 +807,20 @@ TRACE_EVENT(f2fs_lookup_start, + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) +- __field(const char *, name) ++ __string(name, dentry->d_name.name) + __field(unsigned int, flags) + ), + + TP_fast_assign( + __entry->dev = dir->i_sb->s_dev; + __entry->ino = dir->i_ino; +- __entry->name = dentry->d_name.name; ++ __assign_str(name, dentry->d_name.name); + __entry->flags = flags; + ), + + TP_printk("dev = (%d,%d), pino = %lu, name:%s, flags:%u", + show_dev_ino(__entry), +- __entry->name, ++ __get_str(name), + __entry->flags) + ); + +@@ -834,7 +834,7 @@ TRACE_EVENT(f2fs_lookup_end, + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) +- __field(const char *, name) ++ __string(name, dentry->d_name.name) + __field(nid_t, cino) + __field(int, err) + ), +@@ -842,14 +842,14 @@ TRACE_EVENT(f2fs_lookup_end, + TP_fast_assign( + __entry->dev = dir->i_sb->s_dev; + __entry->ino = dir->i_ino; +- __entry->name = dentry->d_name.name; ++ __assign_str(name, dentry->d_name.name); + __entry->cino = ino; + __entry->err = err; + ), + + TP_printk("dev = (%d,%d), pino = %lu, name:%s, ino:%u, err:%d", + show_dev_ino(__entry), +- __entry->name, ++ __get_str(name), + __entry->cino, + __entry->err) + ); +diff --git a/include/trace/events/ib_mad.h b/include/trace/events/ib_mad.h +index 59363a083ecb9..d92691c78cff6 100644 +--- a/include/trace/events/ib_mad.h ++++ b/include/trace/events/ib_mad.h +@@ -49,7 +49,6 @@ DECLARE_EVENT_CLASS(ib_mad_send_template, + __field(int, retries_left) + __field(int, max_retries) + __field(int, retry) +- __field(u16, pkey) + ), + + TP_fast_assign( +@@ -89,7 +88,7 @@ DECLARE_EVENT_CLASS(ib_mad_send_template, + "hdr : base_ver 0x%x class 0x%x class_ver 0x%x " \ + "method 0x%x status 0x%x class_specific 0x%x tid 0x%llx " \ + "attr_id 0x%x attr_mod 0x%x => dlid 0x%08x sl %d "\ +- "pkey 0x%x rpqn 0x%x rqpkey 0x%x", ++ "rpqn 0x%x rqpkey 0x%x", + __entry->dev_index, __entry->port_num, __entry->qp_num, + __entry->agent_priv, be64_to_cpu(__entry->wrtid), + __entry->retries_left, __entry->max_retries, +@@ -100,7 +99,7 @@ DECLARE_EVENT_CLASS(ib_mad_send_template, + be16_to_cpu(__entry->class_specific), + be64_to_cpu(__entry->tid), be16_to_cpu(__entry->attr_id), + be32_to_cpu(__entry->attr_mod), +- be32_to_cpu(__entry->dlid), __entry->sl, __entry->pkey, ++ be32_to_cpu(__entry->dlid), __entry->sl, + __entry->rqpn, __entry->rqkey + ) + ); +@@ -204,7 +203,6 @@ TRACE_EVENT(ib_mad_recv_done_handler, + __field(u16, wc_status) + __field(u32, slid) + __field(u32, dev_index) +- __field(u16, pkey) + ), + + TP_fast_assign( +@@ -224,9 +222,6 @@ TRACE_EVENT(ib_mad_recv_done_handler, + __entry->slid = wc->slid; + __entry->src_qp = wc->src_qp; + __entry->sl = wc->sl; +- ib_query_pkey(qp_info->port_priv->device, +- qp_info->port_priv->port_num, +- wc->pkey_index, &__entry->pkey); + __entry->wc_status = wc->status; + ), + +@@ -234,7 +229,7 @@ TRACE_EVENT(ib_mad_recv_done_handler, + "base_ver 0x%02x class 0x%02x class_ver 0x%02x " \ + "method 0x%02x status 0x%04x class_specific 0x%04x " \ + "tid 0x%016llx attr_id 0x%04x attr_mod 0x%08x " \ +- "slid 0x%08x src QP%d, sl %d pkey 0x%04x", ++ "slid 0x%08x src QP%d, sl %d", + __entry->dev_index, __entry->port_num, __entry->qp_num, + __entry->wc_status, + __entry->length, +@@ -244,7 +239,7 @@ TRACE_EVENT(ib_mad_recv_done_handler, + be16_to_cpu(__entry->class_specific), + be64_to_cpu(__entry->tid), be16_to_cpu(__entry->attr_id), + be32_to_cpu(__entry->attr_mod), +- __entry->slid, __entry->src_qp, __entry->sl, __entry->pkey ++ __entry->slid, __entry->src_qp, __entry->sl + ) + ); + +diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h +index a4dfe005983d3..29414288ea3e0 100644 +--- a/include/trace/events/jbd2.h ++++ b/include/trace/events/jbd2.h +@@ -40,7 +40,7 @@ DECLARE_EVENT_CLASS(jbd2_commit, + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( char, sync_commit ) +- __field( int, transaction ) ++ __field( tid_t, transaction ) + ), + + TP_fast_assign( +@@ -49,7 +49,7 @@ DECLARE_EVENT_CLASS(jbd2_commit, + __entry->transaction = commit_transaction->t_tid; + ), + +- TP_printk("dev %d,%d transaction %d sync %d", ++ TP_printk("dev %d,%d transaction %u sync %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->transaction, __entry->sync_commit) + ); +@@ -97,8 +97,8 @@ TRACE_EVENT(jbd2_end_commit, + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( char, sync_commit ) +- __field( int, transaction ) +- __field( int, head ) ++ __field( tid_t, transaction ) ++ __field( tid_t, head ) + ), + + TP_fast_assign( +@@ -108,7 +108,7 @@ TRACE_EVENT(jbd2_end_commit, + __entry->head = journal->j_tail_sequence; + ), + +- TP_printk("dev %d,%d transaction %d sync %d head %d", ++ TP_printk("dev %d,%d transaction %u sync %d head %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->transaction, __entry->sync_commit, __entry->head) + ); +@@ -134,14 +134,14 @@ TRACE_EVENT(jbd2_submit_inode_data, + ); + + DECLARE_EVENT_CLASS(jbd2_handle_start_class, +- TP_PROTO(dev_t dev, unsigned long tid, unsigned int type, ++ TP_PROTO(dev_t dev, tid_t tid, unsigned int type, + unsigned int line_no, int requested_blocks), + + TP_ARGS(dev, tid, type, line_no, requested_blocks), + + TP_STRUCT__entry( + __field( dev_t, dev ) +- __field( unsigned long, tid ) ++ __field( tid_t, tid ) + __field( unsigned int, type ) + __field( unsigned int, line_no ) + __field( int, requested_blocks) +@@ -155,28 +155,28 @@ DECLARE_EVENT_CLASS(jbd2_handle_start_class, + __entry->requested_blocks = requested_blocks; + ), + +- TP_printk("dev %d,%d tid %lu type %u line_no %u " ++ TP_printk("dev %d,%d tid %u type %u line_no %u " + "requested_blocks %d", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid, + __entry->type, __entry->line_no, __entry->requested_blocks) + ); + + DEFINE_EVENT(jbd2_handle_start_class, jbd2_handle_start, +- TP_PROTO(dev_t dev, unsigned long tid, unsigned int type, ++ TP_PROTO(dev_t dev, tid_t tid, unsigned int type, + unsigned int line_no, int requested_blocks), + + TP_ARGS(dev, tid, type, line_no, requested_blocks) + ); + + DEFINE_EVENT(jbd2_handle_start_class, jbd2_handle_restart, +- TP_PROTO(dev_t dev, unsigned long tid, unsigned int type, ++ TP_PROTO(dev_t dev, tid_t tid, unsigned int type, + unsigned int line_no, int requested_blocks), + + TP_ARGS(dev, tid, type, line_no, requested_blocks) + ); + + TRACE_EVENT(jbd2_handle_extend, +- TP_PROTO(dev_t dev, unsigned long tid, unsigned int type, ++ TP_PROTO(dev_t dev, tid_t tid, unsigned int type, + unsigned int line_no, int buffer_credits, + int requested_blocks), + +@@ -184,7 +184,7 @@ TRACE_EVENT(jbd2_handle_extend, + + TP_STRUCT__entry( + __field( dev_t, dev ) +- __field( unsigned long, tid ) ++ __field( tid_t, tid ) + __field( unsigned int, type ) + __field( unsigned int, line_no ) + __field( int, buffer_credits ) +@@ -200,7 +200,7 @@ TRACE_EVENT(jbd2_handle_extend, + __entry->requested_blocks = requested_blocks; + ), + +- TP_printk("dev %d,%d tid %lu type %u line_no %u " ++ TP_printk("dev %d,%d tid %u type %u line_no %u " + "buffer_credits %d requested_blocks %d", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid, + __entry->type, __entry->line_no, __entry->buffer_credits, +@@ -208,7 +208,7 @@ TRACE_EVENT(jbd2_handle_extend, + ); + + TRACE_EVENT(jbd2_handle_stats, +- TP_PROTO(dev_t dev, unsigned long tid, unsigned int type, ++ TP_PROTO(dev_t dev, tid_t tid, unsigned int type, + unsigned int line_no, int interval, int sync, + int requested_blocks, int dirtied_blocks), + +@@ -217,7 +217,7 @@ TRACE_EVENT(jbd2_handle_stats, + + TP_STRUCT__entry( + __field( dev_t, dev ) +- __field( unsigned long, tid ) ++ __field( tid_t, tid ) + __field( unsigned int, type ) + __field( unsigned int, line_no ) + __field( int, interval ) +@@ -237,7 +237,7 @@ TRACE_EVENT(jbd2_handle_stats, + __entry->dirtied_blocks = dirtied_blocks; + ), + +- TP_printk("dev %d,%d tid %lu type %u line_no %u interval %d " ++ TP_printk("dev %d,%d tid %u type %u line_no %u interval %d " + "sync %d requested_blocks %d dirtied_blocks %d", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid, + __entry->type, __entry->line_no, __entry->interval, +@@ -246,14 +246,14 @@ TRACE_EVENT(jbd2_handle_stats, + ); + + TRACE_EVENT(jbd2_run_stats, +- TP_PROTO(dev_t dev, unsigned long tid, ++ TP_PROTO(dev_t dev, tid_t tid, + struct transaction_run_stats_s *stats), + + TP_ARGS(dev, tid, stats), + + TP_STRUCT__entry( + __field( dev_t, dev ) +- __field( unsigned long, tid ) ++ __field( tid_t, tid ) + __field( unsigned long, wait ) + __field( unsigned long, request_delay ) + __field( unsigned long, running ) +@@ -279,7 +279,7 @@ TRACE_EVENT(jbd2_run_stats, + __entry->blocks_logged = stats->rs_blocks_logged; + ), + +- TP_printk("dev %d,%d tid %lu wait %u request_delay %u running %u " ++ TP_printk("dev %d,%d tid %u wait %u request_delay %u running %u " + "locked %u flushing %u logging %u handle_count %u " + "blocks %u blocks_logged %u", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid, +@@ -294,14 +294,14 @@ TRACE_EVENT(jbd2_run_stats, + ); + + TRACE_EVENT(jbd2_checkpoint_stats, +- TP_PROTO(dev_t dev, unsigned long tid, ++ TP_PROTO(dev_t dev, tid_t tid, + struct transaction_chp_stats_s *stats), + + TP_ARGS(dev, tid, stats), + + TP_STRUCT__entry( + __field( dev_t, dev ) +- __field( unsigned long, tid ) ++ __field( tid_t, tid ) + __field( unsigned long, chp_time ) + __field( __u32, forced_to_close ) + __field( __u32, written ) +@@ -317,7 +317,7 @@ TRACE_EVENT(jbd2_checkpoint_stats, + __entry->dropped = stats->cs_dropped; + ), + +- TP_printk("dev %d,%d tid %lu chp_time %u forced_to_close %u " ++ TP_printk("dev %d,%d tid %u chp_time %u forced_to_close %u " + "written %u dropped %u", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid, + jiffies_to_msecs(__entry->chp_time), +diff --git a/include/trace/events/libata.h b/include/trace/events/libata.h +index ab69434e2329e..72e785a903b65 100644 +--- a/include/trace/events/libata.h ++++ b/include/trace/events/libata.h +@@ -249,6 +249,7 @@ DECLARE_EVENT_CLASS(ata_qc_complete_template, + __entry->hob_feature = qc->result_tf.hob_feature; + __entry->nsect = qc->result_tf.nsect; + __entry->hob_nsect = qc->result_tf.hob_nsect; ++ __entry->flags = qc->flags; + ), + + TP_printk("ata_port=%u ata_dev=%u tag=%d flags=%s status=%s " \ +diff --git a/include/trace/events/random.h b/include/trace/events/random.h +deleted file mode 100644 +index 3d7b432ca5f31..0000000000000 +--- a/include/trace/events/random.h ++++ /dev/null +@@ -1,247 +0,0 @@ +-/* SPDX-License-Identifier: GPL-2.0 */ +-#undef TRACE_SYSTEM +-#define TRACE_SYSTEM random +- +-#if !defined(_TRACE_RANDOM_H) || defined(TRACE_HEADER_MULTI_READ) +-#define _TRACE_RANDOM_H +- +-#include <linux/writeback.h> +-#include <linux/tracepoint.h> +- +-TRACE_EVENT(add_device_randomness, +- TP_PROTO(int bytes, unsigned long IP), +- +- TP_ARGS(bytes, IP), +- +- TP_STRUCT__entry( +- __field( int, bytes ) +- __field(unsigned long, IP ) +- ), +- +- TP_fast_assign( +- __entry->bytes = bytes; +- __entry->IP = IP; +- ), +- +- TP_printk("bytes %d caller %pS", +- __entry->bytes, (void *)__entry->IP) +-); +- +-DECLARE_EVENT_CLASS(random__mix_pool_bytes, +- TP_PROTO(const char *pool_name, int bytes, unsigned long IP), +- +- TP_ARGS(pool_name, bytes, IP), +- +- TP_STRUCT__entry( +- __field( const char *, pool_name ) +- __field( int, bytes ) +- __field(unsigned long, IP ) +- ), +- +- TP_fast_assign( +- __entry->pool_name = pool_name; +- __entry->bytes = bytes; +- __entry->IP = IP; +- ), +- +- TP_printk("%s pool: bytes %d caller %pS", +- __entry->pool_name, __entry->bytes, (void *)__entry->IP) +-); +- +-DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes, +- TP_PROTO(const char *pool_name, int bytes, unsigned long IP), +- +- TP_ARGS(pool_name, bytes, IP) +-); +- +-DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes_nolock, +- TP_PROTO(const char *pool_name, int bytes, unsigned long IP), +- +- TP_ARGS(pool_name, bytes, IP) +-); +- +-TRACE_EVENT(credit_entropy_bits, +- TP_PROTO(const char *pool_name, int bits, int entropy_count, +- unsigned long IP), +- +- TP_ARGS(pool_name, bits, entropy_count, IP), +- +- TP_STRUCT__entry( +- __field( const char *, pool_name ) +- __field( int, bits ) +- __field( int, entropy_count ) +- __field(unsigned long, IP ) +- ), +- +- TP_fast_assign( +- __entry->pool_name = pool_name; +- __entry->bits = bits; +- __entry->entropy_count = entropy_count; +- __entry->IP = IP; +- ), +- +- TP_printk("%s pool: bits %d entropy_count %d caller %pS", +- __entry->pool_name, __entry->bits, +- __entry->entropy_count, (void *)__entry->IP) +-); +- +-TRACE_EVENT(debit_entropy, +- TP_PROTO(const char *pool_name, int debit_bits), +- +- TP_ARGS(pool_name, debit_bits), +- +- TP_STRUCT__entry( +- __field( const char *, pool_name ) +- __field( int, debit_bits ) +- ), +- +- TP_fast_assign( +- __entry->pool_name = pool_name; +- __entry->debit_bits = debit_bits; +- ), +- +- TP_printk("%s: debit_bits %d", __entry->pool_name, +- __entry->debit_bits) +-); +- +-TRACE_EVENT(add_input_randomness, +- TP_PROTO(int input_bits), +- +- TP_ARGS(input_bits), +- +- TP_STRUCT__entry( +- __field( int, input_bits ) +- ), +- +- TP_fast_assign( +- __entry->input_bits = input_bits; +- ), +- +- TP_printk("input_pool_bits %d", __entry->input_bits) +-); +- +-TRACE_EVENT(add_disk_randomness, +- TP_PROTO(dev_t dev, int input_bits), +- +- TP_ARGS(dev, input_bits), +- +- TP_STRUCT__entry( +- __field( dev_t, dev ) +- __field( int, input_bits ) +- ), +- +- TP_fast_assign( +- __entry->dev = dev; +- __entry->input_bits = input_bits; +- ), +- +- TP_printk("dev %d,%d input_pool_bits %d", MAJOR(__entry->dev), +- MINOR(__entry->dev), __entry->input_bits) +-); +- +-DECLARE_EVENT_CLASS(random__get_random_bytes, +- TP_PROTO(int nbytes, unsigned long IP), +- +- TP_ARGS(nbytes, IP), +- +- TP_STRUCT__entry( +- __field( int, nbytes ) +- __field(unsigned long, IP ) +- ), +- +- TP_fast_assign( +- __entry->nbytes = nbytes; +- __entry->IP = IP; +- ), +- +- TP_printk("nbytes %d caller %pS", __entry->nbytes, (void *)__entry->IP) +-); +- +-DEFINE_EVENT(random__get_random_bytes, get_random_bytes, +- TP_PROTO(int nbytes, unsigned long IP), +- +- TP_ARGS(nbytes, IP) +-); +- +-DEFINE_EVENT(random__get_random_bytes, get_random_bytes_arch, +- TP_PROTO(int nbytes, unsigned long IP), +- +- TP_ARGS(nbytes, IP) +-); +- +-DECLARE_EVENT_CLASS(random__extract_entropy, +- TP_PROTO(const char *pool_name, int nbytes, int entropy_count, +- unsigned long IP), +- +- TP_ARGS(pool_name, nbytes, entropy_count, IP), +- +- TP_STRUCT__entry( +- __field( const char *, pool_name ) +- __field( int, nbytes ) +- __field( int, entropy_count ) +- __field(unsigned long, IP ) +- ), +- +- TP_fast_assign( +- __entry->pool_name = pool_name; +- __entry->nbytes = nbytes; +- __entry->entropy_count = entropy_count; +- __entry->IP = IP; +- ), +- +- TP_printk("%s pool: nbytes %d entropy_count %d caller %pS", +- __entry->pool_name, __entry->nbytes, __entry->entropy_count, +- (void *)__entry->IP) +-); +- +- +-DEFINE_EVENT(random__extract_entropy, extract_entropy, +- TP_PROTO(const char *pool_name, int nbytes, int entropy_count, +- unsigned long IP), +- +- TP_ARGS(pool_name, nbytes, entropy_count, IP) +-); +- +-TRACE_EVENT(urandom_read, +- TP_PROTO(int got_bits, int pool_left, int input_left), +- +- TP_ARGS(got_bits, pool_left, input_left), +- +- TP_STRUCT__entry( +- __field( int, got_bits ) +- __field( int, pool_left ) +- __field( int, input_left ) +- ), +- +- TP_fast_assign( +- __entry->got_bits = got_bits; +- __entry->pool_left = pool_left; +- __entry->input_left = input_left; +- ), +- +- TP_printk("got_bits %d nonblocking_pool_entropy_left %d " +- "input_entropy_left %d", __entry->got_bits, +- __entry->pool_left, __entry->input_left) +-); +- +-TRACE_EVENT(prandom_u32, +- +- TP_PROTO(unsigned int ret), +- +- TP_ARGS(ret), +- +- TP_STRUCT__entry( +- __field( unsigned int, ret) +- ), +- +- TP_fast_assign( +- __entry->ret = ret; +- ), +- +- TP_printk("ret=%u" , __entry->ret) +-); +- +-#endif /* _TRACE_RANDOM_H */ +- +-/* This part must be outside protection */ +-#include <trace/define_trace.h> +diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h +index e70c90116edae..221856f2d295c 100644 +--- a/include/trace/events/rxrpc.h ++++ b/include/trace/events/rxrpc.h +@@ -83,12 +83,15 @@ enum rxrpc_call_trace { + rxrpc_call_error, + rxrpc_call_got, + rxrpc_call_got_kernel, ++ rxrpc_call_got_timer, + rxrpc_call_got_userid, + rxrpc_call_new_client, + rxrpc_call_new_service, + rxrpc_call_put, + rxrpc_call_put_kernel, + rxrpc_call_put_noqueue, ++ rxrpc_call_put_notimer, ++ rxrpc_call_put_timer, + rxrpc_call_put_userid, + rxrpc_call_queued, + rxrpc_call_queued_ref, +@@ -278,12 +281,15 @@ enum rxrpc_tx_point { + EM(rxrpc_call_error, "*E*") \ + EM(rxrpc_call_got, "GOT") \ + EM(rxrpc_call_got_kernel, "Gke") \ ++ EM(rxrpc_call_got_timer, "GTM") \ + EM(rxrpc_call_got_userid, "Gus") \ + EM(rxrpc_call_new_client, "NWc") \ + EM(rxrpc_call_new_service, "NWs") \ + EM(rxrpc_call_put, "PUT") \ + EM(rxrpc_call_put_kernel, "Pke") \ +- EM(rxrpc_call_put_noqueue, "PNQ") \ ++ EM(rxrpc_call_put_noqueue, "PnQ") \ ++ EM(rxrpc_call_put_notimer, "PnT") \ ++ EM(rxrpc_call_put_timer, "PTM") \ + EM(rxrpc_call_put_userid, "Pus") \ + EM(rxrpc_call_queued, "QUE") \ + EM(rxrpc_call_queued_ref, "QUR") \ +@@ -577,7 +583,7 @@ TRACE_EVENT(rxrpc_client, + TP_fast_assign( + __entry->conn = conn ? conn->debug_id : 0; + __entry->channel = channel; +- __entry->usage = conn ? atomic_read(&conn->usage) : -2; ++ __entry->usage = conn ? refcount_read(&conn->ref) : -2; + __entry->op = op; + __entry->cid = conn ? conn->proto.cid : 0; + ), +@@ -1503,7 +1509,7 @@ TRACE_EVENT(rxrpc_call_reset, + __entry->call_serial = call->rx_serial; + __entry->conn_serial = call->conn->hi_serial; + __entry->tx_seq = call->tx_hard_ack; +- __entry->rx_seq = call->ackr_seen; ++ __entry->rx_seq = call->rx_hard_ack; + ), + + TP_printk("c=%08x %08x:%08x r=%08x/%08x tx=%08x rx=%08x", +diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h +index 9e92f22eb086c..485a1d3034a4b 100644 +--- a/include/trace/events/skb.h ++++ b/include/trace/events/skb.h +@@ -9,29 +9,63 @@ + #include <linux/netdevice.h> + #include <linux/tracepoint.h> + ++#define TRACE_SKB_DROP_REASON \ ++ EM(SKB_DROP_REASON_NOT_SPECIFIED, NOT_SPECIFIED) \ ++ EM(SKB_DROP_REASON_NO_SOCKET, NO_SOCKET) \ ++ EM(SKB_DROP_REASON_PKT_TOO_SMALL, PKT_TOO_SMALL) \ ++ EM(SKB_DROP_REASON_TCP_CSUM, TCP_CSUM) \ ++ EM(SKB_DROP_REASON_SOCKET_FILTER, SOCKET_FILTER) \ ++ EM(SKB_DROP_REASON_UDP_CSUM, UDP_CSUM) \ ++ EM(SKB_DROP_REASON_NETFILTER_DROP, NETFILTER_DROP) \ ++ EM(SKB_DROP_REASON_OTHERHOST, OTHERHOST) \ ++ EM(SKB_DROP_REASON_IP_CSUM, IP_CSUM) \ ++ EM(SKB_DROP_REASON_IP_INHDR, IP_INHDR) \ ++ EM(SKB_DROP_REASON_IP_RPFILTER, IP_RPFILTER) \ ++ EM(SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, \ ++ UNICAST_IN_L2_MULTICAST) \ ++ EMe(SKB_DROP_REASON_MAX, MAX) ++ ++#undef EM ++#undef EMe ++ ++#define EM(a, b) TRACE_DEFINE_ENUM(a); ++#define EMe(a, b) TRACE_DEFINE_ENUM(a); ++ ++TRACE_SKB_DROP_REASON ++ ++#undef EM ++#undef EMe ++#define EM(a, b) { a, #b }, ++#define EMe(a, b) { a, #b } ++ + /* + * Tracepoint for free an sk_buff: + */ + TRACE_EVENT(kfree_skb, + +- TP_PROTO(struct sk_buff *skb, void *location), ++ TP_PROTO(struct sk_buff *skb, void *location, ++ enum skb_drop_reason reason), + +- TP_ARGS(skb, location), ++ TP_ARGS(skb, location, reason), + + TP_STRUCT__entry( +- __field( void *, skbaddr ) +- __field( void *, location ) +- __field( unsigned short, protocol ) ++ __field(void *, skbaddr) ++ __field(void *, location) ++ __field(unsigned short, protocol) ++ __field(enum skb_drop_reason, reason) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + __entry->location = location; + __entry->protocol = ntohs(skb->protocol); ++ __entry->reason = reason; + ), + +- TP_printk("skbaddr=%p protocol=%u location=%p", +- __entry->skbaddr, __entry->protocol, __entry->location) ++ TP_printk("skbaddr=%p protocol=%u location=%p reason: %s", ++ __entry->skbaddr, __entry->protocol, __entry->location, ++ __print_symbolic(__entry->reason, ++ TRACE_SKB_DROP_REASON)) + ); + + TRACE_EVENT(consume_skb, +diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h +index 12c315782766a..777ee6cbe9330 100644 +--- a/include/trace/events/sock.h ++++ b/include/trace/events/sock.h +@@ -98,7 +98,7 @@ TRACE_EVENT(sock_exceed_buf_limit, + + TP_STRUCT__entry( + __array(char, name, 32) +- __field(long *, sysctl_mem) ++ __array(long, sysctl_mem, 3) + __field(long, allocated) + __field(int, sysctl_rmem) + __field(int, rmem_alloc) +@@ -110,7 +110,9 @@ TRACE_EVENT(sock_exceed_buf_limit, + + TP_fast_assign( + strncpy(__entry->name, prot->name, 32); +- __entry->sysctl_mem = prot->sysctl_mem; ++ __entry->sysctl_mem[0] = READ_ONCE(prot->sysctl_mem[0]); ++ __entry->sysctl_mem[1] = READ_ONCE(prot->sysctl_mem[1]); ++ __entry->sysctl_mem[2] = READ_ONCE(prot->sysctl_mem[2]); + __entry->allocated = allocated; + __entry->sysctl_rmem = sk_get_rmem0(sk, prot); + __entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc); +diff --git a/include/trace/events/spmi.h b/include/trace/events/spmi.h +index 8b60efe18ba68..a6819fd85cdf4 100644 +--- a/include/trace/events/spmi.h ++++ b/include/trace/events/spmi.h +@@ -21,15 +21,15 @@ TRACE_EVENT(spmi_write_begin, + __field ( u8, sid ) + __field ( u16, addr ) + __field ( u8, len ) +- __dynamic_array ( u8, buf, len + 1 ) ++ __dynamic_array ( u8, buf, len ) + ), + + TP_fast_assign( + __entry->opcode = opcode; + __entry->sid = sid; + __entry->addr = addr; +- __entry->len = len + 1; +- memcpy(__get_dynamic_array(buf), buf, len + 1); ++ __entry->len = len; ++ memcpy(__get_dynamic_array(buf), buf, len); + ), + + TP_printk("opc=%d sid=%02d addr=0x%04x len=%d buf=0x[%*phD]", +@@ -92,7 +92,7 @@ TRACE_EVENT(spmi_read_end, + __field ( u16, addr ) + __field ( int, ret ) + __field ( u8, len ) +- __dynamic_array ( u8, buf, len + 1 ) ++ __dynamic_array ( u8, buf, len ) + ), + + TP_fast_assign( +@@ -100,8 +100,8 @@ TRACE_EVENT(spmi_read_end, + __entry->sid = sid; + __entry->addr = addr; + __entry->ret = ret; +- __entry->len = len + 1; +- memcpy(__get_dynamic_array(buf), buf, len + 1); ++ __entry->len = len; ++ memcpy(__get_dynamic_array(buf), buf, len); + ), + + TP_printk("opc=%d sid=%02d addr=0x%04x ret=%d len=%02d buf=0x[%*phD]", +diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h +index 2d04eb96d4183..2a598fb45bf4f 100644 +--- a/include/trace/events/sunrpc.h ++++ b/include/trace/events/sunrpc.h +@@ -925,18 +925,19 @@ TRACE_EVENT(rpc_socket_nospace, + + #define rpc_show_xprt_state(x) \ + __print_flags(x, "|", \ +- { (1UL << XPRT_LOCKED), "LOCKED"}, \ +- { (1UL << XPRT_CONNECTED), "CONNECTED"}, \ +- { (1UL << XPRT_CONNECTING), "CONNECTING"}, \ +- { (1UL << XPRT_CLOSE_WAIT), "CLOSE_WAIT"}, \ +- { (1UL << XPRT_BOUND), "BOUND"}, \ +- { (1UL << XPRT_BINDING), "BINDING"}, \ +- { (1UL << XPRT_CLOSING), "CLOSING"}, \ +- { (1UL << XPRT_OFFLINE), "OFFLINE"}, \ +- { (1UL << XPRT_REMOVE), "REMOVE"}, \ +- { (1UL << XPRT_CONGESTED), "CONGESTED"}, \ +- { (1UL << XPRT_CWND_WAIT), "CWND_WAIT"}, \ +- { (1UL << XPRT_WRITE_SPACE), "WRITE_SPACE"}) ++ { BIT(XPRT_LOCKED), "LOCKED" }, \ ++ { BIT(XPRT_CONNECTED), "CONNECTED" }, \ ++ { BIT(XPRT_CONNECTING), "CONNECTING" }, \ ++ { BIT(XPRT_CLOSE_WAIT), "CLOSE_WAIT" }, \ ++ { BIT(XPRT_BOUND), "BOUND" }, \ ++ { BIT(XPRT_BINDING), "BINDING" }, \ ++ { BIT(XPRT_CLOSING), "CLOSING" }, \ ++ { BIT(XPRT_OFFLINE), "OFFLINE" }, \ ++ { BIT(XPRT_REMOVE), "REMOVE" }, \ ++ { BIT(XPRT_CONGESTED), "CONGESTED" }, \ ++ { BIT(XPRT_CWND_WAIT), "CWND_WAIT" }, \ ++ { BIT(XPRT_WRITE_SPACE), "WRITE_SPACE" }, \ ++ { BIT(XPRT_SND_IS_COOKIE), "SND_IS_COOKIE" }) + + DECLARE_EVENT_CLASS(rpc_xprt_lifetime_class, + TP_PROTO( +@@ -975,7 +976,6 @@ DEFINE_RPC_XPRT_LIFETIME_EVENT(connect); + DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_auto); + DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_done); + DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_force); +-DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_cleanup); + DEFINE_RPC_XPRT_LIFETIME_EVENT(destroy); + + DECLARE_EVENT_CLASS(rpc_xprt_event, +@@ -1133,8 +1133,11 @@ DECLARE_EVENT_CLASS(xprt_writelock_event, + __entry->task_id = -1; + __entry->client_id = -1; + } +- __entry->snd_task_id = xprt->snd_task ? +- xprt->snd_task->tk_pid : -1; ++ if (xprt->snd_task && ++ !test_bit(XPRT_SND_IS_COOKIE, &xprt->state)) ++ __entry->snd_task_id = xprt->snd_task->tk_pid; ++ else ++ __entry->snd_task_id = -1; + ), + + TP_printk("task:%u@%u snd_task:%u", +@@ -1178,8 +1181,12 @@ DECLARE_EVENT_CLASS(xprt_cong_event, + __entry->task_id = -1; + __entry->client_id = -1; + } +- __entry->snd_task_id = xprt->snd_task ? +- xprt->snd_task->tk_pid : -1; ++ if (xprt->snd_task && ++ !test_bit(XPRT_SND_IS_COOKIE, &xprt->state)) ++ __entry->snd_task_id = xprt->snd_task->tk_pid; ++ else ++ __entry->snd_task_id = -1; ++ + __entry->cong = xprt->cong; + __entry->cwnd = xprt->cwnd; + __entry->wait = test_bit(XPRT_CWND_WAIT, &xprt->state); +@@ -1719,10 +1726,11 @@ TRACE_EVENT(svc_xprt_create_err, + const char *program, + const char *protocol, + struct sockaddr *sap, ++ size_t salen, + const struct svc_xprt *xprt + ), + +- TP_ARGS(program, protocol, sap, xprt), ++ TP_ARGS(program, protocol, sap, salen, xprt), + + TP_STRUCT__entry( + __field(long, error) +@@ -1735,7 +1743,7 @@ TRACE_EVENT(svc_xprt_create_err, + __entry->error = PTR_ERR(xprt); + __assign_str(program, program); + __assign_str(protocol, protocol); +- memcpy(__entry->addr, sap, sizeof(__entry->addr)); ++ memcpy(__entry->addr, sap, min(salen, sizeof(__entry->addr))); + ), + + TP_printk("addr=%pISpc program=%s protocol=%s error=%ld", +@@ -1915,17 +1923,18 @@ DECLARE_EVENT_CLASS(svc_deferred_event, + TP_STRUCT__entry( + __field(const void *, dr) + __field(u32, xid) +- __string(addr, dr->xprt->xpt_remotebuf) ++ __array(__u8, addr, INET6_ADDRSTRLEN + 10) + ), + + TP_fast_assign( + __entry->dr = dr; + __entry->xid = be32_to_cpu(*(__be32 *)(dr->args + + (dr->xprt_hlen>>2))); +- __assign_str(addr, dr->xprt->xpt_remotebuf); ++ snprintf(__entry->addr, sizeof(__entry->addr) - 1, ++ "%pISpc", (struct sockaddr *)&dr->addr); + ), + +- TP_printk("addr=%s dr=%p xid=0x%08x", __get_str(addr), __entry->dr, ++ TP_printk("addr=%s dr=%p xid=0x%08x", __entry->addr, __entry->dr, + __entry->xid) + ); + +@@ -2103,17 +2112,17 @@ DECLARE_EVENT_CLASS(svcsock_accept_class, + TP_STRUCT__entry( + __field(long, status) + __string(service, service) +- __array(unsigned char, addr, sizeof(struct sockaddr_in6)) ++ __field(unsigned int, netns_ino) + ), + + TP_fast_assign( + __entry->status = status; + __assign_str(service, service); +- memcpy(__entry->addr, &xprt->xpt_local, sizeof(__entry->addr)); ++ __entry->netns_ino = xprt->xpt_net->ns.inum; + ), + +- TP_printk("listener=%pISpc service=%s status=%ld", +- __entry->addr, __get_str(service), __entry->status ++ TP_printk("addr=listener service=%s status=%ld", ++ __get_str(service), __entry->status + ) + ); + +diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h +index 521059d8dc0a6..edcd6369de102 100644 +--- a/include/trace/events/tcp.h ++++ b/include/trace/events/tcp.h +@@ -279,7 +279,7 @@ TRACE_EVENT(tcp_probe, + __entry->data_len = skb->len - __tcp_hdrlen(th); + __entry->snd_nxt = tp->snd_nxt; + __entry->snd_una = tp->snd_una; +- __entry->snd_cwnd = tp->snd_cwnd; ++ __entry->snd_cwnd = tcp_snd_cwnd(tp); + __entry->snd_wnd = tp->snd_wnd; + __entry->rcv_wnd = tp->rcv_wnd; + __entry->ssthresh = tcp_current_ssthresh(sk); +diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h +index 88faf2400ec25..b2eeeb0800126 100644 +--- a/include/trace/events/vmscan.h ++++ b/include/trace/events/vmscan.h +@@ -283,7 +283,7 @@ TRACE_EVENT(mm_vmscan_lru_isolate, + __field(unsigned long, nr_scanned) + __field(unsigned long, nr_skipped) + __field(unsigned long, nr_taken) +- __field(isolate_mode_t, isolate_mode) ++ __field(unsigned int, isolate_mode) + __field(int, lru) + ), + +@@ -294,7 +294,7 @@ TRACE_EVENT(mm_vmscan_lru_isolate, + __entry->nr_scanned = nr_scanned; + __entry->nr_skipped = nr_skipped; + __entry->nr_taken = nr_taken; +- __entry->isolate_mode = isolate_mode; ++ __entry->isolate_mode = (__force unsigned int)isolate_mode; + __entry->lru = lru; + ), + +diff --git a/include/trace/perf.h b/include/trace/perf.h +index dbc6c74defc38..5d48c46a30083 100644 +--- a/include/trace/perf.h ++++ b/include/trace/perf.h +@@ -21,6 +21,23 @@ + #undef __get_bitmask + #define __get_bitmask(field) (char *)__get_dynamic_array(field) + ++#undef __get_rel_dynamic_array ++#define __get_rel_dynamic_array(field) \ ++ ((void *)__entry + \ ++ offsetof(typeof(*__entry), __rel_loc_##field) + \ ++ sizeof(__entry->__rel_loc_##field) + \ ++ (__entry->__rel_loc_##field & 0xffff)) ++ ++#undef __get_rel_dynamic_array_len ++#define __get_rel_dynamic_array_len(field) \ ++ ((__entry->__rel_loc_##field >> 16) & 0xffff) ++ ++#undef __get_rel_str ++#define __get_rel_str(field) ((char *)__get_rel_dynamic_array(field)) ++ ++#undef __get_rel_bitmask ++#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field) ++ + #undef __perf_count + #define __perf_count(c) (__count = (c)) + +diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h +index 08810a4638805..7f0b91dfb532d 100644 +--- a/include/trace/trace_events.h ++++ b/include/trace/trace_events.h +@@ -108,6 +108,18 @@ TRACE_MAKE_SYSTEM_STR(); + #undef __bitmask + #define __bitmask(item, nr_bits) __dynamic_array(char, item, -1) + ++#undef __rel_dynamic_array ++#define __rel_dynamic_array(type, item, len) u32 __rel_loc_##item; ++ ++#undef __rel_string ++#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) ++ ++#undef __rel_string_len ++#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) ++ ++#undef __rel_bitmask ++#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(char, item, -1) ++ + #undef TP_STRUCT__entry + #define TP_STRUCT__entry(args...) args + +@@ -116,7 +128,7 @@ TRACE_MAKE_SYSTEM_STR(); + struct trace_event_raw_##name { \ + struct trace_entry ent; \ + tstruct \ +- char __data[0]; \ ++ char __data[]; \ + }; \ + \ + static struct trace_event_class event_class_##name; +@@ -200,11 +212,23 @@ TRACE_MAKE_SYSTEM_STR(); + #undef __string + #define __string(item, src) __dynamic_array(char, item, -1) + ++#undef __bitmask ++#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) ++ + #undef __string_len + #define __string_len(item, src, len) __dynamic_array(char, item, -1) + +-#undef __bitmask +-#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) ++#undef __rel_dynamic_array ++#define __rel_dynamic_array(type, item, len) u32 item; ++ ++#undef __rel_string ++#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) ++ ++#undef __rel_string_len ++#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) ++ ++#undef __rel_bitmask ++#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) + + #undef DECLARE_EVENT_CLASS + #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ +@@ -293,6 +317,20 @@ TRACE_MAKE_SYSTEM_STR(); + #undef __get_str + #define __get_str(field) ((char *)__get_dynamic_array(field)) + ++#undef __get_rel_dynamic_array ++#define __get_rel_dynamic_array(field) \ ++ ((void *)__entry + \ ++ offsetof(typeof(*__entry), __rel_loc_##field) + \ ++ sizeof(__entry->__rel_loc_##field) + \ ++ (__entry->__rel_loc_##field & 0xffff)) ++ ++#undef __get_rel_dynamic_array_len ++#define __get_rel_dynamic_array_len(field) \ ++ ((__entry->__rel_loc_##field >> 16) & 0xffff) ++ ++#undef __get_rel_str ++#define __get_rel_str(field) ((char *)__get_rel_dynamic_array(field)) ++ + #undef __get_bitmask + #define __get_bitmask(field) \ + ({ \ +@@ -302,6 +340,15 @@ TRACE_MAKE_SYSTEM_STR(); + trace_print_bitmask_seq(p, __bitmask, __bitmask_size); \ + }) + ++#undef __get_rel_bitmask ++#define __get_rel_bitmask(field) \ ++ ({ \ ++ void *__bitmask = __get_rel_dynamic_array(field); \ ++ unsigned int __bitmask_size; \ ++ __bitmask_size = __get_rel_dynamic_array_len(field); \ ++ trace_print_bitmask_seq(p, __bitmask, __bitmask_size); \ ++ }) ++ + #undef __print_flags + #define __print_flags(flag, delim, flag_array...) \ + ({ \ +@@ -432,16 +479,18 @@ static struct trace_event_functions trace_event_type_funcs_##call = { \ + + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + ++#define ALIGN_STRUCTFIELD(type) ((int)(__alignof__(struct {type b;}))) ++ + #undef __field_ext + #define __field_ext(_type, _item, _filter_type) { \ + .type = #_type, .name = #_item, \ +- .size = sizeof(_type), .align = __alignof__(_type), \ ++ .size = sizeof(_type), .align = ALIGN_STRUCTFIELD(_type), \ + .is_signed = is_signed_type(_type), .filter_type = _filter_type }, + + #undef __field_struct_ext + #define __field_struct_ext(_type, _item, _filter_type) { \ + .type = #_type, .name = #_item, \ +- .size = sizeof(_type), .align = __alignof__(_type), \ ++ .size = sizeof(_type), .align = ALIGN_STRUCTFIELD(_type), \ + 0, .filter_type = _filter_type }, + + #undef __field +@@ -453,7 +502,7 @@ static struct trace_event_functions trace_event_type_funcs_##call = { \ + #undef __array + #define __array(_type, _item, _len) { \ + .type = #_type"["__stringify(_len)"]", .name = #_item, \ +- .size = sizeof(_type[_len]), .align = __alignof__(_type), \ ++ .size = sizeof(_type[_len]), .align = ALIGN_STRUCTFIELD(_type), \ + .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }, + + #undef __dynamic_array +@@ -471,6 +520,21 @@ static struct trace_event_functions trace_event_type_funcs_##call = { \ + #undef __bitmask + #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) + ++#undef __rel_dynamic_array ++#define __rel_dynamic_array(_type, _item, _len) { \ ++ .type = "__rel_loc " #_type "[]", .name = #_item, \ ++ .size = 4, .align = 4, \ ++ .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }, ++ ++#undef __rel_string ++#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) ++ ++#undef __rel_string_len ++#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) ++ ++#undef __rel_bitmask ++#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) ++ + #undef DECLARE_EVENT_CLASS + #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \ + static struct trace_event_fields trace_event_fields_##call[] = { \ +@@ -519,6 +583,22 @@ static struct trace_event_fields trace_event_fields_##call[] = { \ + #undef __string_len + #define __string_len(item, src, len) __dynamic_array(char, item, (len) + 1) + ++#undef __rel_dynamic_array ++#define __rel_dynamic_array(type, item, len) \ ++ __item_length = (len) * sizeof(type); \ ++ __data_offsets->item = __data_size + \ ++ offsetof(typeof(*entry), __data) - \ ++ offsetof(typeof(*entry), __rel_loc_##item) - \ ++ sizeof(u32); \ ++ __data_offsets->item |= __item_length << 16; \ ++ __data_size += __item_length; ++ ++#undef __rel_string ++#define __rel_string(item, src) __rel_dynamic_array(char, item, \ ++ strlen((src) ? (const char *)(src) : "(null)") + 1) ++ ++#undef __rel_string_len ++#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, (len) + 1) + /* + * __bitmask_size_in_bytes_raw is the number of bytes needed to hold + * num_possible_cpus(). +@@ -542,6 +622,10 @@ static struct trace_event_fields trace_event_fields_##call[] = { \ + #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, \ + __bitmask_size_in_longs(nr_bits)) + ++#undef __rel_bitmask ++#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, \ ++ __bitmask_size_in_longs(nr_bits)) ++ + #undef DECLARE_EVENT_CLASS + #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ + static inline notrace int trace_event_get_offsets_##call( \ +@@ -706,6 +790,37 @@ static inline notrace int trace_event_get_offsets_##call( \ + #define __assign_bitmask(dst, src, nr_bits) \ + memcpy(__get_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits)) + ++#undef __rel_dynamic_array ++#define __rel_dynamic_array(type, item, len) \ ++ __entry->__rel_loc_##item = __data_offsets.item; ++ ++#undef __rel_string ++#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) ++ ++#undef __rel_string_len ++#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) ++ ++#undef __assign_rel_str ++#define __assign_rel_str(dst, src) \ ++ strcpy(__get_rel_str(dst), (src) ? (const char *)(src) : "(null)"); ++ ++#undef __assign_rel_str_len ++#define __assign_rel_str_len(dst, src, len) \ ++ do { \ ++ memcpy(__get_rel_str(dst), (src), (len)); \ ++ __get_rel_str(dst)[len] = '\0'; \ ++ } while (0) ++ ++#undef __rel_bitmask ++#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) ++ ++#undef __get_rel_bitmask ++#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field) ++ ++#undef __assign_rel_bitmask ++#define __assign_rel_bitmask(dst, src, nr_bits) \ ++ memcpy(__get_rel_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits)) ++ + #undef TP_fast_assign + #define TP_fast_assign(args...) args + +@@ -770,6 +885,10 @@ static inline void ftrace_test_probe_##call(void) \ + #undef __get_dynamic_array_len + #undef __get_str + #undef __get_bitmask ++#undef __get_rel_dynamic_array ++#undef __get_rel_dynamic_array_len ++#undef __get_rel_str ++#undef __get_rel_bitmask + #undef __print_array + #undef __print_hex_dump + +diff --git a/include/uapi/asm-generic/poll.h b/include/uapi/asm-generic/poll.h +index 41b509f410bf9..f9c520ce4bf4e 100644 +--- a/include/uapi/asm-generic/poll.h ++++ b/include/uapi/asm-generic/poll.h +@@ -29,7 +29,7 @@ + #define POLLRDHUP 0x2000 + #endif + +-#define POLLFREE (__force __poll_t)0x4000 /* currently only for epoll */ ++#define POLLFREE (__force __poll_t)0x4000 + + #define POLL_BUSY_LOOP (__force __poll_t)0x8000 + +diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h +index 3ba180f550d7c..ffbe4cec9f32d 100644 +--- a/include/uapi/asm-generic/siginfo.h ++++ b/include/uapi/asm-generic/siginfo.h +@@ -99,6 +99,7 @@ union __sifields { + struct { + unsigned long _data; + __u32 _type; ++ __u32 _flags; + } _perf; + }; + } _sigfault; +@@ -164,6 +165,7 @@ typedef struct siginfo { + #define si_pkey _sifields._sigfault._addr_pkey._pkey + #define si_perf_data _sifields._sigfault._perf._data + #define si_perf_type _sifields._sigfault._perf._type ++#define si_perf_flags _sifields._sigfault._perf._flags + #define si_band _sifields._sigpoll._band + #define si_fd _sifields._sigpoll._fd + #define si_call_addr _sifields._sigsys._call_addr +@@ -270,6 +272,11 @@ typedef struct siginfo { + * that are of the form: ((PTRACE_EVENT_XXX << 8) | SIGTRAP) + */ + ++/* ++ * Flags for si_perf_flags if SIGTRAP si_code is TRAP_PERF. ++ */ ++#define TRAP_PERF_FLAG_ASYNC (1u << 0) ++ + /* + * SIGCHLD si_codes + */ +diff --git a/include/uapi/asm-generic/signal-defs.h b/include/uapi/asm-generic/signal-defs.h +index fe929e7b77ca1..7572f2f46ee89 100644 +--- a/include/uapi/asm-generic/signal-defs.h ++++ b/include/uapi/asm-generic/signal-defs.h +@@ -45,6 +45,7 @@ + #define SA_UNSUPPORTED 0x00000400 + #define SA_EXPOSE_TAGBITS 0x00000800 + /* 0x00010000 used on mips */ ++/* 0x00800000 used for internal SA_IMMUTABLE */ + /* 0x01000000 used on x86 */ + /* 0x02000000 used on x86 */ + /* +diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h +index 9f4bb4a6f358c..a50e4646bd6de 100644 +--- a/include/uapi/drm/drm_fourcc.h ++++ b/include/uapi/drm/drm_fourcc.h +@@ -308,6 +308,13 @@ extern "C" { + */ + #define DRM_FORMAT_P016 fourcc_code('P', '0', '1', '6') /* 2x2 subsampled Cr:Cb plane 16 bits per channel */ + ++/* 2 plane YCbCr420. ++ * 3 10 bit components and 2 padding bits packed into 4 bytes. ++ * index 0 = Y plane, [31:0] x:Y2:Y1:Y0 2:10:10:10 little endian ++ * index 1 = Cr:Cb plane, [63:0] x:Cr2:Cb2:Cr1:x:Cb1:Cr0:Cb0 [2:10:10:10:2:10:10:10] little endian ++ */ ++#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') /* 2x2 subsampled Cr:Cb plane 10 bits per channel packed */ ++ + /* 3 plane non-subsampled (444) YCbCr + * 16 bits per component, but only 10 bits are used and 6 bits are padded + * index 0: Y plane, [15:0] Y:x [10:6] little endian +@@ -842,6 +849,10 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) + * and UV. Some SAND-using hardware stores UV in a separate tiled + * image from Y to reduce the column height, which is not supported + * with these modifiers. ++ * ++ * The DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT modifier is also ++ * supported for DRM_FORMAT_P030 where the columns remain as 128 bytes ++ * wide, but as this is a 10 bpp format that translates to 96 pixels. + */ + + #define DRM_FORMAT_MOD_BROADCOM_SAND32_COL_HEIGHT(v) \ +@@ -1352,11 +1363,11 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) + #define AMD_FMT_MOD_PIPE_MASK 0x7 + + #define AMD_FMT_MOD_SET(field, value) \ +- ((uint64_t)(value) << AMD_FMT_MOD_##field##_SHIFT) ++ ((__u64)(value) << AMD_FMT_MOD_##field##_SHIFT) + #define AMD_FMT_MOD_GET(field, value) \ + (((value) >> AMD_FMT_MOD_##field##_SHIFT) & AMD_FMT_MOD_##field##_MASK) + #define AMD_FMT_MOD_CLEAR(field) \ +- (~((uint64_t)AMD_FMT_MOD_##field##_MASK << AMD_FMT_MOD_##field##_SHIFT)) ++ (~((__u64)AMD_FMT_MOD_##field##_MASK << AMD_FMT_MOD_##field##_SHIFT)) + + #if defined(__cplusplus) + } +diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h +index daa481729e9ba..27799acd0e5e0 100644 +--- a/include/uapi/linux/audit.h ++++ b/include/uapi/linux/audit.h +@@ -182,7 +182,7 @@ + #define AUDIT_MAX_KEY_LEN 256 + #define AUDIT_BITMASK_SIZE 64 + #define AUDIT_WORD(nr) ((__u32)((nr)/32)) +-#define AUDIT_BIT(nr) (1 << ((nr) - AUDIT_WORD(nr)*32)) ++#define AUDIT_BIT(nr) (1U << ((nr) - AUDIT_WORD(nr)*32)) + + #define AUDIT_SYSCALL_CLASSES 16 + #define AUDIT_CLASS_DIR_WRITE 0 +diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h +index 791f31dd0abee..a887e582f0e78 100644 +--- a/include/uapi/linux/bpf.h ++++ b/include/uapi/linux/bpf.h +@@ -2276,8 +2276,8 @@ union bpf_attr { + * Return + * The return value depends on the result of the test, and can be: + * +- * * 0, if current task belongs to the cgroup2. +- * * 1, if current task does not belong to the cgroup2. ++ * * 1, if current task belongs to the cgroup2. ++ * * 0, if current task does not belong to the cgroup2. + * * A negative error code, if an error occurred. + * + * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) +@@ -2965,8 +2965,8 @@ union bpf_attr { + * + * # sysctl kernel.perf_event_max_stack=<new value> + * Return +- * A non-negative value equal to or less than *size* on success, +- * or a negative error in case of failure. ++ * The non-negative copied *buf* length equal to or less than ++ * *size* on success, or a negative error in case of failure. + * + * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header) + * Description +@@ -4269,8 +4269,8 @@ union bpf_attr { + * + * # sysctl kernel.perf_event_max_stack=<new value> + * Return +- * A non-negative value equal to or less than *size* on success, +- * or a negative error in case of failure. ++ * The non-negative copied *buf* length equal to or less than ++ * *size* on success, or a negative error in case of failure. + * + * long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags) + * Description +@@ -5347,7 +5347,8 @@ struct bpf_sock { + __u32 src_ip4; + __u32 src_ip6[4]; + __u32 src_port; /* host byte order */ +- __u32 dst_port; /* network byte order */ ++ __be16 dst_port; /* network byte order */ ++ __u16 :16; /* zero padding */ + __u32 dst_ip4; + __u32 dst_ip6[4]; + __u32 state; +@@ -6222,7 +6223,8 @@ struct bpf_sk_lookup { + __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */ + __u32 remote_ip4; /* Network byte order */ + __u32 remote_ip6[4]; /* Network byte order */ +- __u32 remote_port; /* Network byte order */ ++ __be16 remote_port; /* Network byte order */ ++ __u16 :16; /* Zero padding */ + __u32 local_ip4; /* Network byte order */ + __u32 local_ip6[4]; /* Network byte order */ + __u32 local_port; /* Host byte order */ +diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h +index e1c4c732aabac..5416f1f1a77a8 100644 +--- a/include/uapi/linux/btrfs_tree.h ++++ b/include/uapi/linux/btrfs_tree.h +@@ -146,7 +146,9 @@ + + /* + * dir items are the name -> inode pointers in a directory. There is one +- * for every name in a directory. ++ * for every name in a directory. BTRFS_DIR_LOG_ITEM_KEY is no longer used ++ * but it's still defined here for documentation purposes and to help avoid ++ * having its numerical value reused in the future. + */ + #define BTRFS_DIR_LOG_ITEM_KEY 60 + #define BTRFS_DIR_LOG_INDEX_KEY 72 +diff --git a/include/uapi/linux/byteorder/big_endian.h b/include/uapi/linux/byteorder/big_endian.h +index 2199adc6a6c20..80aa5c41a7636 100644 +--- a/include/uapi/linux/byteorder/big_endian.h ++++ b/include/uapi/linux/byteorder/big_endian.h +@@ -9,6 +9,7 @@ + #define __BIG_ENDIAN_BITFIELD + #endif + ++#include <linux/stddef.h> + #include <linux/types.h> + #include <linux/swab.h> + +diff --git a/include/uapi/linux/byteorder/little_endian.h b/include/uapi/linux/byteorder/little_endian.h +index 601c904fd5cd9..cd98982e7523e 100644 +--- a/include/uapi/linux/byteorder/little_endian.h ++++ b/include/uapi/linux/byteorder/little_endian.h +@@ -9,6 +9,7 @@ + #define __LITTLE_ENDIAN_BITFIELD + #endif + ++#include <linux/stddef.h> + #include <linux/types.h> + #include <linux/swab.h> + +diff --git a/include/uapi/linux/can/error.h b/include/uapi/linux/can/error.h +index 34633283de641..a1000cb630632 100644 +--- a/include/uapi/linux/can/error.h ++++ b/include/uapi/linux/can/error.h +@@ -120,6 +120,9 @@ + #define CAN_ERR_TRX_CANL_SHORT_TO_GND 0x70 /* 0111 0000 */ + #define CAN_ERR_TRX_CANL_SHORT_TO_CANH 0x80 /* 1000 0000 */ + +-/* controller specific additional information / data[5..7] */ ++/* data[5] is reserved (do not use) */ ++ ++/* TX error counter / data[6] */ ++/* RX error counter / data[7] */ + + #endif /* _UAPI_CAN_ERROR_H */ +diff --git a/include/uapi/linux/can/isotp.h b/include/uapi/linux/can/isotp.h +index c55935b64ccc8..590f8aea2b6d2 100644 +--- a/include/uapi/linux/can/isotp.h ++++ b/include/uapi/linux/can/isotp.h +@@ -137,20 +137,16 @@ struct can_isotp_ll_options { + #define CAN_ISOTP_WAIT_TX_DONE 0x400 /* wait for tx completion */ + #define CAN_ISOTP_SF_BROADCAST 0x800 /* 1-to-N functional addressing */ + +-/* default values */ ++/* protocol machine default values */ + + #define CAN_ISOTP_DEFAULT_FLAGS 0 + #define CAN_ISOTP_DEFAULT_EXT_ADDRESS 0x00 + #define CAN_ISOTP_DEFAULT_PAD_CONTENT 0xCC /* prevent bit-stuffing */ +-#define CAN_ISOTP_DEFAULT_FRAME_TXTIME 0 ++#define CAN_ISOTP_DEFAULT_FRAME_TXTIME 50000 /* 50 micro seconds */ + #define CAN_ISOTP_DEFAULT_RECV_BS 0 + #define CAN_ISOTP_DEFAULT_RECV_STMIN 0x00 + #define CAN_ISOTP_DEFAULT_RECV_WFTMAX 0 + +-#define CAN_ISOTP_DEFAULT_LL_MTU CAN_MTU +-#define CAN_ISOTP_DEFAULT_LL_TX_DL CAN_MAX_DLEN +-#define CAN_ISOTP_DEFAULT_LL_TX_FLAGS 0 +- + /* + * Remark on CAN_ISOTP_DEFAULT_RECV_* values: + * +@@ -162,4 +158,24 @@ struct can_isotp_ll_options { + * consistency and copied directly into the flow control (FC) frame. + */ + ++/* link layer default values => make use of Classical CAN frames */ ++ ++#define CAN_ISOTP_DEFAULT_LL_MTU CAN_MTU ++#define CAN_ISOTP_DEFAULT_LL_TX_DL CAN_MAX_DLEN ++#define CAN_ISOTP_DEFAULT_LL_TX_FLAGS 0 ++ ++/* ++ * The CAN_ISOTP_DEFAULT_FRAME_TXTIME has become a non-zero value as ++ * it only makes sense for isotp implementation tests to run without ++ * a N_As value. As user space applications usually do not set the ++ * frame_txtime element of struct can_isotp_options the new in-kernel ++ * default is very likely overwritten with zero when the sockopt() ++ * CAN_ISOTP_OPTS is invoked. ++ * To make sure that a N_As value of zero is only set intentional the ++ * value '0' is now interpreted as 'do not change the current value'. ++ * When a frame_txtime of zero is required for testing purposes this ++ * CAN_ISOTP_FRAME_TXTIME_ZERO u32 value has to be set in frame_txtime. ++ */ ++#define CAN_ISOTP_FRAME_TXTIME_ZERO 0xFFFFFFFF ++ + #endif /* !_UAPI_CAN_ISOTP_H */ +diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h +index 463d1ba2232ac..3d61a0ae055d4 100644 +--- a/include/uapi/linux/capability.h ++++ b/include/uapi/linux/capability.h +@@ -426,7 +426,7 @@ struct vfs_ns_cap_data { + */ + + #define CAP_TO_INDEX(x) ((x) >> 5) /* 1 << 5 == bits in __u32 */ +-#define CAP_TO_MASK(x) (1 << ((x) & 31)) /* mask for indexed __u32 */ ++#define CAP_TO_MASK(x) (1U << ((x) & 31)) /* mask for indexed __u32 */ + + + #endif /* _UAPI_LINUX_CAPABILITY_H */ +diff --git a/include/uapi/linux/cyclades.h b/include/uapi/linux/cyclades.h +new file mode 100644 +index 0000000000000..6225c5aebe06a +--- /dev/null ++++ b/include/uapi/linux/cyclades.h +@@ -0,0 +1,35 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++ ++#ifndef _UAPI_LINUX_CYCLADES_H ++#define _UAPI_LINUX_CYCLADES_H ++ ++#warning "Support for features provided by this header has been removed" ++#warning "Please consider updating your code" ++ ++struct cyclades_monitor { ++ unsigned long int_count; ++ unsigned long char_count; ++ unsigned long char_max; ++ unsigned long char_last; ++}; ++ ++#define CYGETMON 0x435901 ++#define CYGETTHRESH 0x435902 ++#define CYSETTHRESH 0x435903 ++#define CYGETDEFTHRESH 0x435904 ++#define CYSETDEFTHRESH 0x435905 ++#define CYGETTIMEOUT 0x435906 ++#define CYSETTIMEOUT 0x435907 ++#define CYGETDEFTIMEOUT 0x435908 ++#define CYSETDEFTIMEOUT 0x435909 ++#define CYSETRFLOW 0x43590a ++#define CYGETRFLOW 0x43590b ++#define CYSETRTSDTR_INV 0x43590c ++#define CYGETRTSDTR_INV 0x43590d ++#define CYZSETPOLLCYCLE 0x43590e ++#define CYZGETPOLLCYCLE 0x43590f ++#define CYGETCD1400VER 0x435910 ++#define CYSETWAIT 0x435912 ++#define CYGETWAIT 0x435913 ++ ++#endif /* _UAPI_LINUX_CYCLADES_H */ +diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h +index 8e4a2ca0bcbf7..b1523cb8ab307 100644 +--- a/include/uapi/linux/dma-buf.h ++++ b/include/uapi/linux/dma-buf.h +@@ -92,7 +92,7 @@ struct dma_buf_sync { + * between them in actual uapi, they're just different numbers. + */ + #define DMA_BUF_SET_NAME _IOW(DMA_BUF_BASE, 1, const char *) +-#define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, u32) +-#define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, u64) ++#define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, __u32) ++#define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, __u64) + #endif -+ if (res.attr_bitmask[0] & FATTR4_WORD0_FS_LOCATIONS) -+ server->caps |= NFS_CAP_FS_LOCATIONS; - if (!(res.attr_bitmask[0] & FATTR4_WORD0_FILEID)) - server->fattr_valid &= ~NFS_ATTR_FATTR_FILEID; - if (!(res.attr_bitmask[1] & FATTR4_WORD1_MODE)) -@@ -3949,6 +3962,60 @@ int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) - return err; +diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h +index 5545f1ca9237c..f7204bdfe8db1 100644 +--- a/include/uapi/linux/ethtool_netlink.h ++++ b/include/uapi/linux/ethtool_netlink.h +@@ -407,7 +407,9 @@ enum { + ETHTOOL_A_PAUSE_STAT_TX_FRAMES, + ETHTOOL_A_PAUSE_STAT_RX_FRAMES, + +- /* add new constants above here */ ++ /* add new constants above here ++ * adjust ETHTOOL_PAUSE_STAT_CNT if adding non-stats! ++ */ + __ETHTOOL_A_PAUSE_STAT_CNT, + ETHTOOL_A_PAUSE_STAT_MAX = (__ETHTOOL_A_PAUSE_STAT_CNT - 1) + }; +diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h +index 8a3432d0f0dcb..e687658843b1c 100644 +--- a/include/uapi/linux/eventpoll.h ++++ b/include/uapi/linux/eventpoll.h +@@ -41,6 +41,12 @@ + #define EPOLLMSG (__force __poll_t)0x00000400 + #define EPOLLRDHUP (__force __poll_t)0x00002000 + ++/* ++ * Internal flag - wakeup generated by io_uring, used to detect recursion back ++ * into the io_uring poll handler. ++ */ ++#define EPOLL_URING_WAKE ((__force __poll_t)(1U << 27)) ++ + /* Set exclusive wakeup mode for the target file descriptor */ + #define EPOLLEXCLUSIVE ((__force __poll_t)(1U << 28)) + +diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h +index c750eac09fc9c..f7c01709cb0ff 100644 +--- a/include/uapi/linux/idxd.h ++++ b/include/uapi/linux/idxd.h +@@ -272,7 +272,7 @@ struct dsa_completion_record { + }; + + uint32_t delta_rec_size; +- uint32_t crc_val; ++ uint64_t crc_val; + + /* DIF check & strip */ + struct { +diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h +index 225ec87d4f228..7989d9483ea75 100644 +--- a/include/uapi/linux/input-event-codes.h ++++ b/include/uapi/linux/input-event-codes.h +@@ -278,7 +278,8 @@ + #define KEY_PAUSECD 201 + #define KEY_PROG3 202 + #define KEY_PROG4 203 +-#define KEY_DASHBOARD 204 /* AL Dashboard */ ++#define KEY_ALL_APPLICATIONS 204 /* AC Desktop Show All Applications */ ++#define KEY_DASHBOARD KEY_ALL_APPLICATIONS + #define KEY_SUSPEND 205 + #define KEY_CLOSE 206 /* AC Close */ + #define KEY_PLAY 207 +@@ -612,6 +613,7 @@ + #define KEY_ASSISTANT 0x247 /* AL Context-aware desktop assistant */ + #define KEY_KBD_LAYOUT_NEXT 0x248 /* AC Next Keyboard Layout Select */ + #define KEY_EMOJI_PICKER 0x249 /* Show/hide emoji picker (HUTRR101) */ ++#define KEY_DICTATE 0x24a /* Start or Stop Voice Dictation Session (HUTRR99) */ + + #define KEY_BRIGHTNESS_MIN 0x250 /* Set Brightness to Minimum */ + #define KEY_BRIGHTNESS_MAX 0x251 /* Set Brightness to Maximum */ +diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h +index e42d13b55cf3a..860bbf6bf29cb 100644 +--- a/include/uapi/linux/ip.h ++++ b/include/uapi/linux/ip.h +@@ -18,6 +18,7 @@ + #ifndef _UAPI_LINUX_IP_H + #define _UAPI_LINUX_IP_H + #include <linux/types.h> ++#include <linux/stddef.h> + #include <asm/byteorder.h> + + #define IPTOS_TOS_MASK 0x1E +@@ -100,8 +101,10 @@ struct iphdr { + __u8 ttl; + __u8 protocol; + __sum16 check; +- __be32 saddr; +- __be32 daddr; ++ __struct_group(/* no tag */, addrs, /* no attrs */, ++ __be32 saddr; ++ __be32 daddr; ++ ); + /*The options start here. */ + }; + +diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h +index b243a53fa985b..39c6add59a1a6 100644 +--- a/include/uapi/linux/ipv6.h ++++ b/include/uapi/linux/ipv6.h +@@ -4,6 +4,7 @@ + + #include <linux/libc-compat.h> + #include <linux/types.h> ++#include <linux/stddef.h> + #include <linux/in6.h> + #include <asm/byteorder.h> + +@@ -130,8 +131,10 @@ struct ipv6hdr { + __u8 nexthdr; + __u8 hop_limit; + +- struct in6_addr saddr; +- struct in6_addr daddr; ++ __struct_group(/* no tag */, addrs, /* no attrs */, ++ struct in6_addr saddr; ++ struct in6_addr daddr; ++ ); + }; + + +diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h +index b3d952067f59c..21c8d58283c9e 100644 +--- a/include/uapi/linux/landlock.h ++++ b/include/uapi/linux/landlock.h +@@ -33,7 +33,9 @@ struct landlock_ruleset_attr { + * - %LANDLOCK_CREATE_RULESET_VERSION: Get the highest supported Landlock ABI + * version. + */ ++/* clang-format off */ + #define LANDLOCK_CREATE_RULESET_VERSION (1U << 0) ++/* clang-format on */ + + /** + * enum landlock_rule_type - Landlock rule type +@@ -60,8 +62,9 @@ struct landlock_path_beneath_attr { + */ + __u64 allowed_access; + /** +- * @parent_fd: File descriptor, open with ``O_PATH``, which identifies +- * the parent directory of a file hierarchy, or just a file. ++ * @parent_fd: File descriptor, preferably opened with ``O_PATH``, ++ * which identifies the parent directory of a file hierarchy, or just a ++ * file. + */ + __s32 parent_fd; + /* +@@ -120,6 +123,7 @@ struct landlock_path_beneath_attr { + * :manpage:`access(2)`. + * Future Landlock evolutions will enable to restrict them. + */ ++/* clang-format off */ + #define LANDLOCK_ACCESS_FS_EXECUTE (1ULL << 0) + #define LANDLOCK_ACCESS_FS_WRITE_FILE (1ULL << 1) + #define LANDLOCK_ACCESS_FS_READ_FILE (1ULL << 2) +@@ -133,5 +137,6 @@ struct landlock_path_beneath_attr { + #define LANDLOCK_ACCESS_FS_MAKE_FIFO (1ULL << 10) + #define LANDLOCK_ACCESS_FS_MAKE_BLOCK (1ULL << 11) + #define LANDLOCK_ACCESS_FS_MAKE_SYM (1ULL << 12) ++/* clang-format on */ + + #endif /* _UAPI_LINUX_LANDLOCK_H */ +diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h +index f66038b9551fa..80c40194e2977 100644 +--- a/include/uapi/linux/mptcp.h ++++ b/include/uapi/linux/mptcp.h +@@ -129,19 +129,21 @@ struct mptcp_info { + * MPTCP_EVENT_REMOVED: token, rem_id + * An address has been lost by the peer. + * +- * MPTCP_EVENT_SUB_ESTABLISHED: token, family, saddr4 | saddr6, +- * daddr4 | daddr6, sport, dport, backup, +- * if_idx [, error] ++ * MPTCP_EVENT_SUB_ESTABLISHED: token, family, loc_id, rem_id, ++ * saddr4 | saddr6, daddr4 | daddr6, sport, ++ * dport, backup, if_idx [, error] + * A new subflow has been established. 'error' should not be set. + * +- * MPTCP_EVENT_SUB_CLOSED: token, family, saddr4 | saddr6, daddr4 | daddr6, +- * sport, dport, backup, if_idx [, error] ++ * MPTCP_EVENT_SUB_CLOSED: token, family, loc_id, rem_id, saddr4 | saddr6, ++ * daddr4 | daddr6, sport, dport, backup, if_idx ++ * [, error] + * A subflow has been closed. An error (copy of sk_err) could be set if an + * error has been detected for this subflow. + * +- * MPTCP_EVENT_SUB_PRIORITY: token, family, saddr4 | saddr6, daddr4 | daddr6, +- * sport, dport, backup, if_idx [, error] +- * The priority of a subflow has changed. 'error' should not be set. ++ * MPTCP_EVENT_SUB_PRIORITY: token, family, loc_id, rem_id, saddr4 | saddr6, ++ * daddr4 | daddr6, sport, dport, backup, if_idx ++ * [, error] ++ * The priority of a subflow has changed. 'error' should not be set. + */ + enum mptcp_event_type { + MPTCP_EVENT_UNSPEC = 0, +diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h +index 4b3395082d15c..26071021e986f 100644 +--- a/include/uapi/linux/netfilter/nf_conntrack_common.h ++++ b/include/uapi/linux/netfilter/nf_conntrack_common.h +@@ -106,7 +106,7 @@ enum ip_conntrack_status { + IPS_NAT_CLASH = IPS_UNTRACKED, + #endif + +- /* Conntrack got a helper explicitly attached via CT target. */ ++ /* Conntrack got a helper explicitly attached (ruleset, ctnetlink). */ + IPS_HELPER_BIT = 13, + IPS_HELPER = (1 << IPS_HELPER_BIT), + +diff --git a/include/uapi/linux/netfilter/nf_conntrack_sctp.h b/include/uapi/linux/netfilter/nf_conntrack_sctp.h +index edc6ddab0de6a..2d6f80d75ae74 100644 +--- a/include/uapi/linux/netfilter/nf_conntrack_sctp.h ++++ b/include/uapi/linux/netfilter/nf_conntrack_sctp.h +@@ -15,7 +15,7 @@ enum sctp_conntrack { + SCTP_CONNTRACK_SHUTDOWN_RECD, + SCTP_CONNTRACK_SHUTDOWN_ACK_SENT, + SCTP_CONNTRACK_HEARTBEAT_SENT, +- SCTP_CONNTRACK_HEARTBEAT_ACKED, ++ SCTP_CONNTRACK_HEARTBEAT_ACKED, /* no longer used */ + SCTP_CONNTRACK_MAX + }; + +diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h +index e94d1fa554cb2..07871c8a06014 100644 +--- a/include/uapi/linux/netfilter/nf_tables.h ++++ b/include/uapi/linux/netfilter/nf_tables.h +@@ -753,11 +753,13 @@ enum nft_dynset_attributes { + * @NFT_PAYLOAD_LL_HEADER: link layer header + * @NFT_PAYLOAD_NETWORK_HEADER: network header + * @NFT_PAYLOAD_TRANSPORT_HEADER: transport header ++ * @NFT_PAYLOAD_INNER_HEADER: inner header / payload + */ + enum nft_payload_bases { + NFT_PAYLOAD_LL_HEADER, + NFT_PAYLOAD_NETWORK_HEADER, + NFT_PAYLOAD_TRANSPORT_HEADER, ++ NFT_PAYLOAD_INNER_HEADER, + }; + + /** +diff --git a/include/uapi/linux/netfilter/nfnetlink_cttimeout.h b/include/uapi/linux/netfilter/nfnetlink_cttimeout.h +index 6b20fb22717b2..aa805e6d4e284 100644 +--- a/include/uapi/linux/netfilter/nfnetlink_cttimeout.h ++++ b/include/uapi/linux/netfilter/nfnetlink_cttimeout.h +@@ -94,7 +94,7 @@ enum ctattr_timeout_sctp { + CTA_TIMEOUT_SCTP_SHUTDOWN_RECD, + CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT, + CTA_TIMEOUT_SCTP_HEARTBEAT_SENT, +- CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED, ++ CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED, /* no longer used */ + __CTA_TIMEOUT_SCTP_MAX + }; + #define CTA_TIMEOUT_SCTP_MAX (__CTA_TIMEOUT_SCTP_MAX - 1) +diff --git a/include/uapi/linux/netfilter/xt_IDLETIMER.h b/include/uapi/linux/netfilter/xt_IDLETIMER.h +index 49ddcdc61c094..7bfb31a66fc9b 100644 +--- a/include/uapi/linux/netfilter/xt_IDLETIMER.h ++++ b/include/uapi/linux/netfilter/xt_IDLETIMER.h +@@ -1,6 +1,5 @@ ++/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ + /* +- * linux/include/linux/netfilter/xt_IDLETIMER.h +- * + * Header file for Xtables timer target module. + * + * Copyright (C) 2004, 2010 Nokia Corporation +@@ -10,20 +9,6 @@ + * by Luciano Coelho <luciano.coelho@nokia.com> + * + * Contact: Luciano Coelho <luciano.coelho@nokia.com> +- * +- * This program is free software; you can redistribute it and/or +- * modify it under the terms of the GNU General Public License +- * version 2 as published by the Free Software Foundation. +- * +- * This program is distributed in the hope that it will be useful, but +- * WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- * General Public License for more details. +- * +- * You should have received a copy of the GNU General Public License +- * along with this program; if not, write to the Free Software +- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +- * 02110-1301 USA + */ + + #ifndef _XT_IDLETIMER_H +diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h +index f6e3c8c9c7449..4fa4e979e948a 100644 +--- a/include/uapi/linux/nfc.h ++++ b/include/uapi/linux/nfc.h +@@ -263,7 +263,7 @@ enum nfc_sdp_attr { + #define NFC_SE_ENABLED 0x1 + + struct sockaddr_nfc { +- sa_family_t sa_family; ++ __kernel_sa_family_t sa_family; + __u32 dev_idx; + __u32 target_idx; + __u32 nfc_protocol; +@@ -271,14 +271,14 @@ struct sockaddr_nfc { + + #define NFC_LLCP_MAX_SERVICE_NAME 63 + struct sockaddr_nfc_llcp { +- sa_family_t sa_family; ++ __kernel_sa_family_t sa_family; + __u32 dev_idx; + __u32 target_idx; + __u32 nfc_protocol; + __u8 dsap; /* Destination SAP, if known */ + __u8 ssap; /* Source SAP to be bound to */ + char service_name[NFC_LLCP_MAX_SERVICE_NAME]; /* Service name URI */; +- size_t service_name_len; ++ __kernel_size_t service_name_len; + }; + + /* NFC socket protocols */ +diff --git a/include/uapi/linux/omap3isp.h b/include/uapi/linux/omap3isp.h +index 87b55755f4ffe..d9db7ad438908 100644 +--- a/include/uapi/linux/omap3isp.h ++++ b/include/uapi/linux/omap3isp.h +@@ -162,6 +162,7 @@ struct omap3isp_h3a_aewb_config { + * struct omap3isp_stat_data - Statistic data sent to or received from user + * @ts: Timestamp of returned framestats. + * @buf: Pointer to pass to user. ++ * @buf_size: Size of buffer. + * @frame_number: Frame number of requested stats. + * @cur_frame: Current frame number being processed. + * @config_counter: Number of the configuration associated with the data. +@@ -176,10 +177,12 @@ struct omap3isp_stat_data { + struct timeval ts; + #endif + void __user *buf; +- __u32 buf_size; +- __u16 frame_number; +- __u16 cur_frame; +- __u16 config_counter; ++ __struct_group(/* no tag */, frame, /* no attrs */, ++ __u32 buf_size; ++ __u16 frame_number; ++ __u16 cur_frame; ++ __u16 config_counter; ++ ); + }; + + #ifdef __KERNEL__ +@@ -189,10 +192,12 @@ struct omap3isp_stat_data_time32 { + __s32 tv_usec; + } ts; + __u32 buf; +- __u32 buf_size; +- __u16 frame_number; +- __u16 cur_frame; +- __u16 config_counter; ++ __struct_group(/* no tag */, frame, /* no attrs */, ++ __u32 buf_size; ++ __u16 frame_number; ++ __u16 cur_frame; ++ __u16 config_counter; ++ ); + }; + #endif + +diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h +index e709ae8235e7f..ff6ccbc6efe96 100644 +--- a/include/uapi/linux/pci_regs.h ++++ b/include/uapi/linux/pci_regs.h +@@ -504,6 +504,12 @@ + #define PCI_EXP_DEVCTL_URRE 0x0008 /* Unsupported Request Reporting En. */ + #define PCI_EXP_DEVCTL_RELAX_EN 0x0010 /* Enable relaxed ordering */ + #define PCI_EXP_DEVCTL_PAYLOAD 0x00e0 /* Max_Payload_Size */ ++#define PCI_EXP_DEVCTL_PAYLOAD_128B 0x0000 /* 128 Bytes */ ++#define PCI_EXP_DEVCTL_PAYLOAD_256B 0x0020 /* 256 Bytes */ ++#define PCI_EXP_DEVCTL_PAYLOAD_512B 0x0040 /* 512 Bytes */ ++#define PCI_EXP_DEVCTL_PAYLOAD_1024B 0x0060 /* 1024 Bytes */ ++#define PCI_EXP_DEVCTL_PAYLOAD_2048B 0x0080 /* 2048 Bytes */ ++#define PCI_EXP_DEVCTL_PAYLOAD_4096B 0x00a0 /* 4096 Bytes */ + #define PCI_EXP_DEVCTL_EXT_TAG 0x0100 /* Extended Tag Field Enable */ + #define PCI_EXP_DEVCTL_PHANTOM 0x0200 /* Phantom Functions Enable */ + #define PCI_EXP_DEVCTL_AUX_PME 0x0400 /* Auxiliary Power PM Enable */ +diff --git a/include/uapi/linux/rfkill.h b/include/uapi/linux/rfkill.h +index 9b77cfc42efa3..db6c8588c1d0c 100644 +--- a/include/uapi/linux/rfkill.h ++++ b/include/uapi/linux/rfkill.h +@@ -159,8 +159,16 @@ struct rfkill_event_ext { + * old behaviour for all userspace, unless it explicitly opts in to the + * rules outlined here by using the new &struct rfkill_event_ext. + * +- * Userspace using &struct rfkill_event_ext must adhere to the following +- * rules ++ * Additionally, some other userspace (bluez, g-s-d) was reading with a ++ * large size but as streaming reads rather than message-based, or with ++ * too strict checks for the returned size. So eventually, we completely ++ * reverted this, and extended messages need to be opted in to by using ++ * an ioctl: ++ * ++ * ioctl(fd, RFKILL_IOCTL_MAX_SIZE, sizeof(struct rfkill_event_ext)); ++ * ++ * Userspace using &struct rfkill_event_ext and the ioctl must adhere to ++ * the following rules: + * + * 1. accept short writes, optionally using them to detect that it's + * running on an older kernel; +@@ -175,6 +183,8 @@ struct rfkill_event_ext { + #define RFKILL_IOC_MAGIC 'R' + #define RFKILL_IOC_NOINPUT 1 + #define RFKILL_IOCTL_NOINPUT _IO(RFKILL_IOC_MAGIC, RFKILL_IOC_NOINPUT) ++#define RFKILL_IOC_MAX_SIZE 2 ++#define RFKILL_IOCTL_MAX_SIZE _IOW(RFKILL_IOC_MAGIC, RFKILL_IOC_MAX_SIZE, __u32) + + /* and that's all userspace gets */ + +diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h +index 9a402fdb60e97..77ee207623a9b 100644 +--- a/include/uapi/linux/rseq.h ++++ b/include/uapi/linux/rseq.h +@@ -105,23 +105,11 @@ struct rseq { + * Read and set by the kernel. Set by user-space with single-copy + * atomicity semantics. This field should only be updated by the + * thread which registered this data structure. Aligned on 64-bit. ++ * ++ * 32-bit architectures should update the low order bits of the ++ * rseq_cs field, leaving the high order bits initialized to 0. + */ +- union { +- __u64 ptr64; +-#ifdef __LP64__ +- __u64 ptr; +-#else +- struct { +-#if (defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN)) || defined(__BIG_ENDIAN) +- __u32 padding; /* Initialized to zero. */ +- __u32 ptr32; +-#else /* LITTLE */ +- __u32 ptr32; +- __u32 padding; /* Initialized to zero. */ +-#endif /* ENDIAN */ +- } ptr; +-#endif +- } rseq_cs; ++ __u64 rseq_cs; + + /* + * Restartable sequences flags field. +diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h +index c4042dcfdc0c3..8885e69178bd7 100644 +--- a/include/uapi/linux/serial_core.h ++++ b/include/uapi/linux/serial_core.h +@@ -68,6 +68,9 @@ + /* NVIDIA Tegra Combined UART */ + #define PORT_TEGRA_TCU 41 + ++/* ASPEED AST2x00 virtual UART */ ++#define PORT_ASPEED_VUART 42 ++ + /* Intel EG20 */ + #define PORT_PCH_8LINE 44 + #define PORT_PCH_2LINE 45 +diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h +index ee8220f8dcf5f..7837ba4fe7289 100644 +--- a/include/uapi/linux/stddef.h ++++ b/include/uapi/linux/stddef.h +@@ -1,6 +1,47 @@ + /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++#ifndef _UAPI_LINUX_STDDEF_H ++#define _UAPI_LINUX_STDDEF_H ++ + #include <linux/compiler_types.h> + + #ifndef __always_inline + #define __always_inline inline + #endif ++ ++/** ++ * __struct_group() - Create a mirrored named and anonyomous struct ++ * ++ * @TAG: The tag name for the named sub-struct (usually empty) ++ * @NAME: The identifier name of the mirrored sub-struct ++ * @ATTRS: Any struct attributes (usually empty) ++ * @MEMBERS: The member declarations for the mirrored structs ++ * ++ * Used to create an anonymous union of two structs with identical layout ++ * and size: one anonymous and one named. The former's members can be used ++ * normally without sub-struct naming, and the latter can be used to ++ * reason about the start, end, and size of the group of struct members. ++ * The named struct can also be explicitly tagged for layer reuse, as well ++ * as both having struct attributes appended. ++ */ ++#define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \ ++ union { \ ++ struct { MEMBERS } ATTRS; \ ++ struct TAG { MEMBERS } ATTRS NAME; \ ++ } ++ ++/** ++ * __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union ++ * ++ * @TYPE: The type of each flexible array element ++ * @NAME: The name of the flexible array member ++ * ++ * In order to have a flexible array member in a union or alone in a ++ * struct, it needs to be wrapped in an anonymous struct with at least 1 ++ * named member, but that member can be empty. ++ */ ++#define __DECLARE_FLEX_ARRAY(TYPE, NAME) \ ++ struct { \ ++ struct { } __empty_ ## NAME; \ ++ TYPE NAME[]; \ ++ } ++#endif +diff --git a/include/uapi/linux/swab.h b/include/uapi/linux/swab.h +index 7272f85d6d6ab..3736f2fe15418 100644 +--- a/include/uapi/linux/swab.h ++++ b/include/uapi/linux/swab.h +@@ -3,7 +3,7 @@ + #define _UAPI_LINUX_SWAB_H + + #include <linux/types.h> +-#include <linux/compiler.h> ++#include <linux/stddef.h> + #include <asm/bitsperlong.h> + #include <asm/swab.h> + +diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h +index 9260791b8438f..61c5011dfc13d 100644 +--- a/include/uapi/linux/videodev2.h ++++ b/include/uapi/linux/videodev2.h +@@ -1560,7 +1560,8 @@ struct v4l2_bt_timings { + ((bt)->width + V4L2_DV_BT_BLANKING_WIDTH(bt)) + #define V4L2_DV_BT_BLANKING_HEIGHT(bt) \ + ((bt)->vfrontporch + (bt)->vsync + (bt)->vbackporch + \ +- (bt)->il_vfrontporch + (bt)->il_vsync + (bt)->il_vbackporch) ++ ((bt)->interlaced ? \ ++ ((bt)->il_vfrontporch + (bt)->il_vsync + (bt)->il_vbackporch) : 0)) + #define V4L2_DV_BT_FRAME_HEIGHT(bt) \ + ((bt)->height + V4L2_DV_BT_BLANKING_HEIGHT(bt)) + +diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h +index 80d76b75bccd9..7aa2eb7662050 100644 +--- a/include/uapi/linux/virtio_ids.h ++++ b/include/uapi/linux/virtio_ids.h +@@ -73,12 +73,12 @@ + * Virtio Transitional IDs + */ + +-#define VIRTIO_TRANS_ID_NET 1000 /* transitional virtio net */ +-#define VIRTIO_TRANS_ID_BLOCK 1001 /* transitional virtio block */ +-#define VIRTIO_TRANS_ID_BALLOON 1002 /* transitional virtio balloon */ +-#define VIRTIO_TRANS_ID_CONSOLE 1003 /* transitional virtio console */ +-#define VIRTIO_TRANS_ID_SCSI 1004 /* transitional virtio SCSI */ +-#define VIRTIO_TRANS_ID_RNG 1005 /* transitional virtio rng */ +-#define VIRTIO_TRANS_ID_9P 1009 /* transitional virtio 9p console */ ++#define VIRTIO_TRANS_ID_NET 0x1000 /* transitional virtio net */ ++#define VIRTIO_TRANS_ID_BLOCK 0x1001 /* transitional virtio block */ ++#define VIRTIO_TRANS_ID_BALLOON 0x1002 /* transitional virtio balloon */ ++#define VIRTIO_TRANS_ID_CONSOLE 0x1003 /* transitional virtio console */ ++#define VIRTIO_TRANS_ID_SCSI 0x1004 /* transitional virtio SCSI */ ++#define VIRTIO_TRANS_ID_RNG 0x1005 /* transitional virtio rng */ ++#define VIRTIO_TRANS_ID_9P 0x1009 /* transitional virtio 9p console */ + + #endif /* _LINUX_VIRTIO_IDS_H */ +diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h +index eda0426ec4c2b..65e13a099b1a0 100644 +--- a/include/uapi/linux/xfrm.h ++++ b/include/uapi/linux/xfrm.h +@@ -313,6 +313,7 @@ enum xfrm_attr_type_t { + XFRMA_SET_MARK, /* __u32 */ + XFRMA_SET_MARK_MASK, /* __u32 */ + XFRMA_IF_ID, /* __u32 */ ++ XFRMA_MTIMER_THRESH, /* __u32 in seconds for input SA */ + __XFRMA_MAX + + #define XFRMA_OUTPUT_MARK XFRMA_SET_MARK /* Compatibility */ +@@ -510,6 +511,12 @@ struct xfrm_user_offload { + int ifindex; + __u8 flags; + }; ++/* This flag was exposed without any kernel code that supporting it. ++ * Unfortunately, strongswan has the code that uses sets this flag, ++ * which makes impossible to reuse this bit. ++ * ++ * So leave it here to make sure that it won't be reused by mistake. ++ */ + #define XFRM_OFFLOAD_IPV6 1 + #define XFRM_OFFLOAD_INBOUND 2 + +diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h +index 86be4a92b67bf..a96b7d2770e15 100644 +--- a/include/uapi/rdma/mlx5-abi.h ++++ b/include/uapi/rdma/mlx5-abi.h +@@ -104,6 +104,7 @@ enum mlx5_ib_alloc_ucontext_resp_mask { + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE = 1UL << 2, + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_SQD2RTS = 1UL << 3, + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_REAL_TIME_TS = 1UL << 4, ++ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_MKEY_UPDATE_TAG = 1UL << 5, + }; + + enum mlx5_user_cmds_supp_uhw { +diff --git a/include/uapi/sound/asequencer.h b/include/uapi/sound/asequencer.h +index a75e14edc957e..dbd60f48b4b01 100644 +--- a/include/uapi/sound/asequencer.h ++++ b/include/uapi/sound/asequencer.h +@@ -344,10 +344,10 @@ typedef int __bitwise snd_seq_client_type_t; + #define KERNEL_CLIENT ((__force snd_seq_client_type_t) 2) + + /* event filter flags */ +-#define SNDRV_SEQ_FILTER_BROADCAST (1<<0) /* accept broadcast messages */ +-#define SNDRV_SEQ_FILTER_MULTICAST (1<<1) /* accept multicast messages */ +-#define SNDRV_SEQ_FILTER_BOUNCE (1<<2) /* accept bounce event in error */ +-#define SNDRV_SEQ_FILTER_USE_EVENT (1<<31) /* use event filter */ ++#define SNDRV_SEQ_FILTER_BROADCAST (1U<<0) /* accept broadcast messages */ ++#define SNDRV_SEQ_FILTER_MULTICAST (1U<<1) /* accept multicast messages */ ++#define SNDRV_SEQ_FILTER_BOUNCE (1U<<2) /* accept bounce event in error */ ++#define SNDRV_SEQ_FILTER_USE_EVENT (1U<<31) /* use event filter */ + + struct snd_seq_client_info { + int client; /* client number to inquire */ +diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h +index 5859ca0a1439b..93e40f91bd49a 100644 +--- a/include/uapi/sound/asound.h ++++ b/include/uapi/sound/asound.h +@@ -56,8 +56,10 @@ + * * + ****************************************************************************/ + ++#define AES_IEC958_STATUS_SIZE 24 ++ + struct snd_aes_iec958 { +- unsigned char status[24]; /* AES/IEC958 channel status bits */ ++ unsigned char status[AES_IEC958_STATUS_SIZE]; /* AES/IEC958 channel status bits */ + unsigned char subcode[147]; /* AES/IEC958 subcode bits */ + unsigned char pad; /* nothing */ + unsigned char dig_subframe[4]; /* AES/IEC958 subframe bits */ +diff --git a/include/video/of_display_timing.h b/include/video/of_display_timing.h +index e1126a74882a5..eff166fdd81b9 100644 +--- a/include/video/of_display_timing.h ++++ b/include/video/of_display_timing.h +@@ -8,6 +8,8 @@ + #ifndef __LINUX_OF_DISPLAY_TIMING_H + #define __LINUX_OF_DISPLAY_TIMING_H + ++#include <linux/errno.h> ++ + struct device_node; + struct display_timing; + struct display_timings; +diff --git a/include/xen/events.h b/include/xen/events.h +index c204262d9fc24..344081e71584b 100644 +--- a/include/xen/events.h ++++ b/include/xen/events.h +@@ -17,6 +17,7 @@ struct xenbus_device; + unsigned xen_evtchn_nr_channels(void); + + int bind_evtchn_to_irq(evtchn_port_t evtchn); ++int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn); + int bind_evtchn_to_irqhandler(evtchn_port_t evtchn, + irq_handler_t handler, + unsigned long irqflags, const char *devname, +diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h +index cb854df031ce0..c9fea9389ebec 100644 +--- a/include/xen/grant_table.h ++++ b/include/xen/grant_table.h +@@ -104,17 +104,32 @@ int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly); + * access has been ended, free the given page too. Access will be ended + * immediately iff the grant entry is not in use, otherwise it will happen + * some time later. page may be 0, in which case no freeing will occur. ++ * Note that the granted page might still be accessed (read or write) by the ++ * other side after gnttab_end_foreign_access() returns, so even if page was ++ * specified as 0 it is not allowed to just reuse the page for other ++ * purposes immediately. gnttab_end_foreign_access() will take an additional ++ * reference to the granted page in this case, which is dropped only after ++ * the grant is no longer in use. ++ * This requires that multi page allocations for areas subject to ++ * gnttab_end_foreign_access() are done via alloc_pages_exact() (and freeing ++ * via free_pages_exact()) in order to avoid high order pages. + */ + void gnttab_end_foreign_access(grant_ref_t ref, int readonly, + unsigned long page); + ++/* ++ * End access through the given grant reference, iff the grant entry is ++ * no longer in use. In case of success ending foreign access, the ++ * grant reference is deallocated. ++ * Return 1 if the grant entry was freed, 0 if it is still in use. ++ */ ++int gnttab_try_end_foreign_access(grant_ref_t ref); ++ + int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn); + + unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref); + unsigned long gnttab_end_foreign_transfer(grant_ref_t ref); + +-int gnttab_query_foreign_access(grant_ref_t ref); +- + /* + * operations on reserved batches of grant references + */ +diff --git a/init/Kconfig b/init/Kconfig +index 11f8a845f259d..dafc3ba6fa7a1 100644 +--- a/init/Kconfig ++++ b/init/Kconfig +@@ -77,6 +77,11 @@ config CC_HAS_ASM_GOTO_OUTPUT + depends on CC_HAS_ASM_GOTO + def_bool $(success,echo 'int foo(int x) { asm goto ("": "=r"(x) ::: bar); return x; bar: return 0; }' | $(CC) -x c - -c -o /dev/null) + ++config CC_HAS_ASM_GOTO_TIED_OUTPUT ++ depends on CC_HAS_ASM_GOTO_OUTPUT ++ # Detect buggy gcc and clang, fixed in gcc-11 clang-14. ++ def_bool $(success,echo 'int foo(int *x) { asm goto (".long (%l[bar]) - .": "+m"(*x) ::: bar); return *x; bar: return 0; }' | $CC -x c - -c -o /dev/null) ++ + config TOOLS_SUPPORT_RELR + def_bool $(success,env "CC=$(CC)" "LD=$(LD)" "NM=$(NM)" "OBJCOPY=$(OBJCOPY)" $(srctree)/scripts/tools-support-relr.sh) + +@@ -86,6 +91,10 @@ config CC_HAS_ASM_INLINE + config CC_HAS_NO_PROFILE_FN_ATTR + def_bool $(success,echo '__attribute__((no_profile_instrument_function)) int x();' | $(CC) -x c - -c -o /dev/null -Werror) + ++config PAHOLE_VERSION ++ int ++ default $(shell,$(srctree)/scripts/pahole-version.sh $(PAHOLE)) ++ + config CONSTRUCTORS + bool + +diff --git a/init/main.c b/init/main.c +index 3c4054a955458..649d9e4201a80 100644 +--- a/init/main.c ++++ b/init/main.c +@@ -100,6 +100,8 @@ + #include <linux/kcsan.h> + #include <linux/init_syscalls.h> + #include <linux/stackdepot.h> ++#include <linux/randomize_kstack.h> ++#include <net/net_namespace.h> + + #include <asm/io.h> + #include <asm/bugs.h> +@@ -924,7 +926,9 @@ static void __init print_unknown_bootoptions(void) + for (p = &envp_init[2]; *p; p++) + end += sprintf(end, " %s", *p); + +- pr_notice("Unknown command line parameters:%s\n", unknown_options); ++ /* Start at unknown_options[1] to skip the initial space */ ++ pr_notice("Unknown kernel command line parameters \"%s\", will be passed to user space.\n", ++ &unknown_options[1]); + memblock_free_ptr(unknown_options, len); } -+static int _nfs4_discover_trunking(struct nfs_server *server, -+ struct nfs_fh *fhandle) +@@ -1038,21 +1042,18 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) + softirq_init(); + timekeeping_init(); + kfence_init(); ++ time_init(); + + /* + * For best initial stack canary entropy, prepare it after: + * - setup_arch() for any UEFI RNG entropy and boot cmdline access +- * - timekeeping_init() for ktime entropy used in rand_initialize() +- * - rand_initialize() to get any arch-specific entropy like RDRAND +- * - add_latent_entropy() to get any latent entropy +- * - adding command line entropy ++ * - timekeeping_init() for ktime entropy used in random_init() ++ * - time_init() for making random_get_entropy() work on some platforms ++ * - random_init() to initialize the RNG from from early entropy sources + */ +- rand_initialize(); +- add_latent_entropy(); +- add_device_randomness(command_line, strlen(command_line)); ++ random_init(command_line); + boot_init_stack_canary(); + +- time_init(); + perf_event_init(); + profile_init(); + call_function_init(); +@@ -1120,6 +1121,7 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) + key_init(); + security_init(); + dbg_late_init(); ++ net_ns_init(); + vfs_caches_init(); + pagecache_init(); + signals_init(); +@@ -1194,7 +1196,7 @@ static int __init initcall_blacklist(char *str) + } + } while (str_entry); + +- return 0; ++ return 1; + } + + static bool __init_or_module initcall_blacklisted(initcall_t fn) +@@ -1456,7 +1458,9 @@ static noinline void __init kernel_init_freeable(void); + bool rodata_enabled __ro_after_init = true; + static int __init set_debug_rodata(char *str) + { +- return strtobool(str, &rodata_enabled); ++ if (strtobool(str, &rodata_enabled)) ++ pr_warn("Invalid option string for rodata: '%s'\n", str); ++ return 1; + } + __setup("rodata=", set_debug_rodata); + #endif +diff --git a/io_uring/Makefile b/io_uring/Makefile +new file mode 100644 +index 0000000000000..3680425df9478 +--- /dev/null ++++ b/io_uring/Makefile +@@ -0,0 +1,6 @@ ++# SPDX-License-Identifier: GPL-2.0 ++# ++# Makefile for io_uring ++ ++obj-$(CONFIG_IO_URING) += io_uring.o ++obj-$(CONFIG_IO_WQ) += io-wq.o +diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c +new file mode 100644 +index 0000000000000..81485c1a9879e +--- /dev/null ++++ b/io_uring/io-wq.c +@@ -0,0 +1,1404 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Basic worker thread pool for io_uring ++ * ++ * Copyright (C) 2019 Jens Axboe ++ * ++ */ ++#include <linux/kernel.h> ++#include <linux/init.h> ++#include <linux/errno.h> ++#include <linux/sched/signal.h> ++#include <linux/percpu.h> ++#include <linux/slab.h> ++#include <linux/rculist_nulls.h> ++#include <linux/cpu.h> ++#include <linux/tracehook.h> ++#include <uapi/linux/io_uring.h> ++ ++#include "io-wq.h" ++ ++#define WORKER_IDLE_TIMEOUT (5 * HZ) ++ ++enum { ++ IO_WORKER_F_UP = 1, /* up and active */ ++ IO_WORKER_F_RUNNING = 2, /* account as running */ ++ IO_WORKER_F_FREE = 4, /* worker on free list */ ++ IO_WORKER_F_BOUND = 8, /* is doing bounded work */ ++}; ++ ++enum { ++ IO_WQ_BIT_EXIT = 0, /* wq exiting */ ++}; ++ ++enum { ++ IO_ACCT_STALLED_BIT = 0, /* stalled on hash */ ++}; ++ ++/* ++ * One for each thread in a wqe pool ++ */ ++struct io_worker { ++ refcount_t ref; ++ unsigned flags; ++ struct hlist_nulls_node nulls_node; ++ struct list_head all_list; ++ struct task_struct *task; ++ struct io_wqe *wqe; ++ ++ struct io_wq_work *cur_work; ++ spinlock_t lock; ++ ++ struct completion ref_done; ++ ++ unsigned long create_state; ++ struct callback_head create_work; ++ int create_index; ++ ++ union { ++ struct rcu_head rcu; ++ struct work_struct work; ++ }; ++}; ++ ++#if BITS_PER_LONG == 64 ++#define IO_WQ_HASH_ORDER 6 ++#else ++#define IO_WQ_HASH_ORDER 5 ++#endif ++ ++#define IO_WQ_NR_HASH_BUCKETS (1u << IO_WQ_HASH_ORDER) ++ ++struct io_wqe_acct { ++ unsigned nr_workers; ++ unsigned max_workers; ++ int index; ++ atomic_t nr_running; ++ struct io_wq_work_list work_list; ++ unsigned long flags; ++}; ++ ++enum { ++ IO_WQ_ACCT_BOUND, ++ IO_WQ_ACCT_UNBOUND, ++ IO_WQ_ACCT_NR, ++}; ++ ++/* ++ * Per-node worker thread pool ++ */ ++struct io_wqe { ++ raw_spinlock_t lock; ++ struct io_wqe_acct acct[2]; ++ ++ int node; ++ ++ struct hlist_nulls_head free_list; ++ struct list_head all_list; ++ ++ struct wait_queue_entry wait; ++ ++ struct io_wq *wq; ++ struct io_wq_work *hash_tail[IO_WQ_NR_HASH_BUCKETS]; ++ ++ cpumask_var_t cpu_mask; ++}; ++ ++/* ++ * Per io_wq state ++ */ ++struct io_wq { ++ unsigned long state; ++ ++ free_work_fn *free_work; ++ io_wq_work_fn *do_work; ++ ++ struct io_wq_hash *hash; ++ ++ atomic_t worker_refs; ++ struct completion worker_done; ++ ++ struct hlist_node cpuhp_node; ++ ++ struct task_struct *task; ++ ++ struct io_wqe *wqes[]; ++}; ++ ++static enum cpuhp_state io_wq_online; ++ ++struct io_cb_cancel_data { ++ work_cancel_fn *fn; ++ void *data; ++ int nr_running; ++ int nr_pending; ++ bool cancel_all; ++}; ++ ++static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index); ++static void io_wqe_dec_running(struct io_worker *worker); ++static bool io_acct_cancel_pending_work(struct io_wqe *wqe, ++ struct io_wqe_acct *acct, ++ struct io_cb_cancel_data *match); ++static void create_worker_cb(struct callback_head *cb); ++static void io_wq_cancel_tw_create(struct io_wq *wq); ++ ++static bool io_worker_get(struct io_worker *worker) ++{ ++ return refcount_inc_not_zero(&worker->ref); ++} ++ ++static void io_worker_release(struct io_worker *worker) ++{ ++ if (refcount_dec_and_test(&worker->ref)) ++ complete(&worker->ref_done); ++} ++ ++static inline struct io_wqe_acct *io_get_acct(struct io_wqe *wqe, bool bound) ++{ ++ return &wqe->acct[bound ? IO_WQ_ACCT_BOUND : IO_WQ_ACCT_UNBOUND]; ++} ++ ++static inline struct io_wqe_acct *io_work_get_acct(struct io_wqe *wqe, ++ struct io_wq_work *work) ++{ ++ return io_get_acct(wqe, !(work->flags & IO_WQ_WORK_UNBOUND)); ++} ++ ++static inline struct io_wqe_acct *io_wqe_get_acct(struct io_worker *worker) ++{ ++ return io_get_acct(worker->wqe, worker->flags & IO_WORKER_F_BOUND); ++} ++ ++static void io_worker_ref_put(struct io_wq *wq) ++{ ++ if (atomic_dec_and_test(&wq->worker_refs)) ++ complete(&wq->worker_done); ++} ++ ++static void io_worker_cancel_cb(struct io_worker *worker) ++{ ++ struct io_wqe_acct *acct = io_wqe_get_acct(worker); ++ struct io_wqe *wqe = worker->wqe; ++ struct io_wq *wq = wqe->wq; ++ ++ atomic_dec(&acct->nr_running); ++ raw_spin_lock(&worker->wqe->lock); ++ acct->nr_workers--; ++ raw_spin_unlock(&worker->wqe->lock); ++ io_worker_ref_put(wq); ++ clear_bit_unlock(0, &worker->create_state); ++ io_worker_release(worker); ++} ++ ++static bool io_task_worker_match(struct callback_head *cb, void *data) ++{ ++ struct io_worker *worker; ++ ++ if (cb->func != create_worker_cb) ++ return false; ++ worker = container_of(cb, struct io_worker, create_work); ++ return worker == data; ++} ++ ++static void io_worker_exit(struct io_worker *worker) ++{ ++ struct io_wqe *wqe = worker->wqe; ++ struct io_wq *wq = wqe->wq; ++ ++ while (1) { ++ struct callback_head *cb = task_work_cancel_match(wq->task, ++ io_task_worker_match, worker); ++ ++ if (!cb) ++ break; ++ io_worker_cancel_cb(worker); ++ } ++ ++ if (refcount_dec_and_test(&worker->ref)) ++ complete(&worker->ref_done); ++ wait_for_completion(&worker->ref_done); ++ ++ raw_spin_lock(&wqe->lock); ++ if (worker->flags & IO_WORKER_F_FREE) ++ hlist_nulls_del_rcu(&worker->nulls_node); ++ list_del_rcu(&worker->all_list); ++ preempt_disable(); ++ io_wqe_dec_running(worker); ++ worker->flags = 0; ++ current->flags &= ~PF_IO_WORKER; ++ preempt_enable(); ++ raw_spin_unlock(&wqe->lock); ++ ++ kfree_rcu(worker, rcu); ++ io_worker_ref_put(wqe->wq); ++ do_exit(0); ++} ++ ++static inline bool io_acct_run_queue(struct io_wqe_acct *acct) ++{ ++ if (!wq_list_empty(&acct->work_list) && ++ !test_bit(IO_ACCT_STALLED_BIT, &acct->flags)) ++ return true; ++ return false; ++} ++ ++/* ++ * Check head of free list for an available worker. If one isn't available, ++ * caller must create one. ++ */ ++static bool io_wqe_activate_free_worker(struct io_wqe *wqe, ++ struct io_wqe_acct *acct) ++ __must_hold(RCU) ++{ ++ struct hlist_nulls_node *n; ++ struct io_worker *worker; ++ ++ /* ++ * Iterate free_list and see if we can find an idle worker to ++ * activate. If a given worker is on the free_list but in the process ++ * of exiting, keep trying. ++ */ ++ hlist_nulls_for_each_entry_rcu(worker, n, &wqe->free_list, nulls_node) { ++ if (!io_worker_get(worker)) ++ continue; ++ if (io_wqe_get_acct(worker) != acct) { ++ io_worker_release(worker); ++ continue; ++ } ++ if (wake_up_process(worker->task)) { ++ io_worker_release(worker); ++ return true; ++ } ++ io_worker_release(worker); ++ } ++ ++ return false; ++} ++ ++/* ++ * We need a worker. If we find a free one, we're good. If not, and we're ++ * below the max number of workers, create one. ++ */ ++static bool io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct) ++{ ++ /* ++ * Most likely an attempt to queue unbounded work on an io_wq that ++ * wasn't setup with any unbounded workers. ++ */ ++ if (unlikely(!acct->max_workers)) ++ pr_warn_once("io-wq is not configured for unbound workers"); ++ ++ raw_spin_lock(&wqe->lock); ++ if (acct->nr_workers >= acct->max_workers) { ++ raw_spin_unlock(&wqe->lock); ++ return true; ++ } ++ acct->nr_workers++; ++ raw_spin_unlock(&wqe->lock); ++ atomic_inc(&acct->nr_running); ++ atomic_inc(&wqe->wq->worker_refs); ++ return create_io_worker(wqe->wq, wqe, acct->index); ++} ++ ++static void io_wqe_inc_running(struct io_worker *worker) ++{ ++ struct io_wqe_acct *acct = io_wqe_get_acct(worker); ++ ++ atomic_inc(&acct->nr_running); ++} ++ ++static void create_worker_cb(struct callback_head *cb) ++{ ++ struct io_worker *worker; ++ struct io_wq *wq; ++ struct io_wqe *wqe; ++ struct io_wqe_acct *acct; ++ bool do_create = false; ++ ++ worker = container_of(cb, struct io_worker, create_work); ++ wqe = worker->wqe; ++ wq = wqe->wq; ++ acct = &wqe->acct[worker->create_index]; ++ raw_spin_lock(&wqe->lock); ++ if (acct->nr_workers < acct->max_workers) { ++ acct->nr_workers++; ++ do_create = true; ++ } ++ raw_spin_unlock(&wqe->lock); ++ if (do_create) { ++ create_io_worker(wq, wqe, worker->create_index); ++ } else { ++ atomic_dec(&acct->nr_running); ++ io_worker_ref_put(wq); ++ } ++ clear_bit_unlock(0, &worker->create_state); ++ io_worker_release(worker); ++} ++ ++static bool io_queue_worker_create(struct io_worker *worker, ++ struct io_wqe_acct *acct, ++ task_work_func_t func) ++{ ++ struct io_wqe *wqe = worker->wqe; ++ struct io_wq *wq = wqe->wq; ++ ++ /* raced with exit, just ignore create call */ ++ if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) ++ goto fail; ++ if (!io_worker_get(worker)) ++ goto fail; ++ /* ++ * create_state manages ownership of create_work/index. We should ++ * only need one entry per worker, as the worker going to sleep ++ * will trigger the condition, and waking will clear it once it ++ * runs the task_work. ++ */ ++ if (test_bit(0, &worker->create_state) || ++ test_and_set_bit_lock(0, &worker->create_state)) ++ goto fail_release; ++ ++ atomic_inc(&wq->worker_refs); ++ init_task_work(&worker->create_work, func); ++ worker->create_index = acct->index; ++ if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL)) { ++ /* ++ * EXIT may have been set after checking it above, check after ++ * adding the task_work and remove any creation item if it is ++ * now set. wq exit does that too, but we can have added this ++ * work item after we canceled in io_wq_exit_workers(). ++ */ ++ if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) ++ io_wq_cancel_tw_create(wq); ++ io_worker_ref_put(wq); ++ return true; ++ } ++ io_worker_ref_put(wq); ++ clear_bit_unlock(0, &worker->create_state); ++fail_release: ++ io_worker_release(worker); ++fail: ++ atomic_dec(&acct->nr_running); ++ io_worker_ref_put(wq); ++ return false; ++} ++ ++static void io_wqe_dec_running(struct io_worker *worker) ++ __must_hold(wqe->lock) ++{ ++ struct io_wqe_acct *acct = io_wqe_get_acct(worker); ++ struct io_wqe *wqe = worker->wqe; ++ ++ if (!(worker->flags & IO_WORKER_F_UP)) ++ return; ++ ++ if (atomic_dec_and_test(&acct->nr_running) && io_acct_run_queue(acct)) { ++ atomic_inc(&acct->nr_running); ++ atomic_inc(&wqe->wq->worker_refs); ++ raw_spin_unlock(&wqe->lock); ++ io_queue_worker_create(worker, acct, create_worker_cb); ++ raw_spin_lock(&wqe->lock); ++ } ++} ++ ++/* ++ * Worker will start processing some work. Move it to the busy list, if ++ * it's currently on the freelist ++ */ ++static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker, ++ struct io_wq_work *work) ++ __must_hold(wqe->lock) ++{ ++ if (worker->flags & IO_WORKER_F_FREE) { ++ worker->flags &= ~IO_WORKER_F_FREE; ++ hlist_nulls_del_init_rcu(&worker->nulls_node); ++ } ++} ++ ++/* ++ * No work, worker going to sleep. Move to freelist, and unuse mm if we ++ * have one attached. Dropping the mm may potentially sleep, so we drop ++ * the lock in that case and return success. Since the caller has to ++ * retry the loop in that case (we changed task state), we don't regrab ++ * the lock if we return success. ++ */ ++static void __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker) ++ __must_hold(wqe->lock) ++{ ++ if (!(worker->flags & IO_WORKER_F_FREE)) { ++ worker->flags |= IO_WORKER_F_FREE; ++ hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); ++ } ++} ++ ++static inline unsigned int io_get_work_hash(struct io_wq_work *work) ++{ ++ return work->flags >> IO_WQ_HASH_SHIFT; ++} ++ ++static bool io_wait_on_hash(struct io_wqe *wqe, unsigned int hash) ++{ ++ struct io_wq *wq = wqe->wq; ++ bool ret = false; ++ ++ spin_lock_irq(&wq->hash->wait.lock); ++ if (list_empty(&wqe->wait.entry)) { ++ __add_wait_queue(&wq->hash->wait, &wqe->wait); ++ if (!test_bit(hash, &wq->hash->map)) { ++ __set_current_state(TASK_RUNNING); ++ list_del_init(&wqe->wait.entry); ++ ret = true; ++ } ++ } ++ spin_unlock_irq(&wq->hash->wait.lock); ++ return ret; ++} ++ ++static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct, ++ struct io_worker *worker) ++ __must_hold(wqe->lock) ++{ ++ struct io_wq_work_node *node, *prev; ++ struct io_wq_work *work, *tail; ++ unsigned int stall_hash = -1U; ++ struct io_wqe *wqe = worker->wqe; ++ ++ wq_list_for_each(node, prev, &acct->work_list) { ++ unsigned int hash; ++ ++ work = container_of(node, struct io_wq_work, list); ++ ++ /* not hashed, can run anytime */ ++ if (!io_wq_is_hashed(work)) { ++ wq_list_del(&acct->work_list, node, prev); ++ return work; ++ } ++ ++ hash = io_get_work_hash(work); ++ /* all items with this hash lie in [work, tail] */ ++ tail = wqe->hash_tail[hash]; ++ ++ /* hashed, can run if not already running */ ++ if (!test_and_set_bit(hash, &wqe->wq->hash->map)) { ++ wqe->hash_tail[hash] = NULL; ++ wq_list_cut(&acct->work_list, &tail->list, prev); ++ return work; ++ } ++ if (stall_hash == -1U) ++ stall_hash = hash; ++ /* fast forward to a next hash, for-each will fix up @prev */ ++ node = &tail->list; ++ } ++ ++ if (stall_hash != -1U) { ++ bool unstalled; ++ ++ /* ++ * Set this before dropping the lock to avoid racing with new ++ * work being added and clearing the stalled bit. ++ */ ++ set_bit(IO_ACCT_STALLED_BIT, &acct->flags); ++ raw_spin_unlock(&wqe->lock); ++ unstalled = io_wait_on_hash(wqe, stall_hash); ++ raw_spin_lock(&wqe->lock); ++ if (unstalled) { ++ clear_bit(IO_ACCT_STALLED_BIT, &acct->flags); ++ if (wq_has_sleeper(&wqe->wq->hash->wait)) ++ wake_up(&wqe->wq->hash->wait); ++ } ++ } ++ ++ return NULL; ++} ++ ++static bool io_flush_signals(void) ++{ ++ if (test_thread_flag(TIF_NOTIFY_SIGNAL) || current->task_works) { ++ __set_current_state(TASK_RUNNING); ++ tracehook_notify_signal(); ++ return true; ++ } ++ return false; ++} ++ ++static void io_assign_current_work(struct io_worker *worker, ++ struct io_wq_work *work) ++{ ++ if (work) { ++ io_flush_signals(); ++ cond_resched(); ++ } ++ ++ spin_lock(&worker->lock); ++ worker->cur_work = work; ++ spin_unlock(&worker->lock); ++} ++ ++static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work); ++ ++static void io_worker_handle_work(struct io_worker *worker) ++ __releases(wqe->lock) ++{ ++ struct io_wqe_acct *acct = io_wqe_get_acct(worker); ++ struct io_wqe *wqe = worker->wqe; ++ struct io_wq *wq = wqe->wq; ++ bool do_kill = test_bit(IO_WQ_BIT_EXIT, &wq->state); ++ ++ do { ++ struct io_wq_work *work; ++get_next: ++ /* ++ * If we got some work, mark us as busy. If we didn't, but ++ * the list isn't empty, it means we stalled on hashed work. ++ * Mark us stalled so we don't keep looking for work when we ++ * can't make progress, any work completion or insertion will ++ * clear the stalled flag. ++ */ ++ work = io_get_next_work(acct, worker); ++ if (work) ++ __io_worker_busy(wqe, worker, work); ++ ++ raw_spin_unlock(&wqe->lock); ++ if (!work) ++ break; ++ io_assign_current_work(worker, work); ++ __set_current_state(TASK_RUNNING); ++ ++ /* handle a whole dependent link */ ++ do { ++ struct io_wq_work *next_hashed, *linked; ++ unsigned int hash = io_get_work_hash(work); ++ ++ next_hashed = wq_next_work(work); ++ ++ if (unlikely(do_kill) && (work->flags & IO_WQ_WORK_UNBOUND)) ++ work->flags |= IO_WQ_WORK_CANCEL; ++ wq->do_work(work); ++ io_assign_current_work(worker, NULL); ++ ++ linked = wq->free_work(work); ++ work = next_hashed; ++ if (!work && linked && !io_wq_is_hashed(linked)) { ++ work = linked; ++ linked = NULL; ++ } ++ io_assign_current_work(worker, work); ++ if (linked) ++ io_wqe_enqueue(wqe, linked); ++ ++ if (hash != -1U && !next_hashed) { ++ /* serialize hash clear with wake_up() */ ++ spin_lock_irq(&wq->hash->wait.lock); ++ clear_bit(hash, &wq->hash->map); ++ clear_bit(IO_ACCT_STALLED_BIT, &acct->flags); ++ spin_unlock_irq(&wq->hash->wait.lock); ++ if (wq_has_sleeper(&wq->hash->wait)) ++ wake_up(&wq->hash->wait); ++ raw_spin_lock(&wqe->lock); ++ /* skip unnecessary unlock-lock wqe->lock */ ++ if (!work) ++ goto get_next; ++ raw_spin_unlock(&wqe->lock); ++ } ++ } while (work); ++ ++ raw_spin_lock(&wqe->lock); ++ } while (1); ++} ++ ++static int io_wqe_worker(void *data) ++{ ++ struct io_worker *worker = data; ++ struct io_wqe_acct *acct = io_wqe_get_acct(worker); ++ struct io_wqe *wqe = worker->wqe; ++ struct io_wq *wq = wqe->wq; ++ bool last_timeout = false; ++ char buf[TASK_COMM_LEN]; ++ ++ worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING); ++ ++ snprintf(buf, sizeof(buf), "iou-wrk-%d", wq->task->pid); ++ set_task_comm(current, buf); ++ ++ while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) { ++ long ret; ++ ++ set_current_state(TASK_INTERRUPTIBLE); ++loop: ++ raw_spin_lock(&wqe->lock); ++ if (io_acct_run_queue(acct)) { ++ io_worker_handle_work(worker); ++ goto loop; ++ } ++ /* timed out, exit unless we're the last worker */ ++ if (last_timeout && acct->nr_workers > 1) { ++ acct->nr_workers--; ++ raw_spin_unlock(&wqe->lock); ++ __set_current_state(TASK_RUNNING); ++ break; ++ } ++ last_timeout = false; ++ __io_worker_idle(wqe, worker); ++ raw_spin_unlock(&wqe->lock); ++ if (io_flush_signals()) ++ continue; ++ ret = schedule_timeout(WORKER_IDLE_TIMEOUT); ++ if (signal_pending(current)) { ++ struct ksignal ksig; ++ ++ if (!get_signal(&ksig)) ++ continue; ++ break; ++ } ++ last_timeout = !ret; ++ } ++ ++ if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) { ++ raw_spin_lock(&wqe->lock); ++ io_worker_handle_work(worker); ++ } ++ ++ io_worker_exit(worker); ++ return 0; ++} ++ ++/* ++ * Called when a worker is scheduled in. Mark us as currently running. ++ */ ++void io_wq_worker_running(struct task_struct *tsk) +{ -+ struct nfs4_fs_locations *locations = NULL; -+ struct page *page; -+ const struct cred *cred; -+ struct nfs_client *clp = server->nfs_client; -+ const struct nfs4_state_maintenance_ops *ops = -+ clp->cl_mvops->state_renewal_ops; -+ int status = -ENOMEM; ++ struct io_worker *worker = tsk->pf_io_worker; + -+ cred = ops->get_state_renewal_cred(clp); -+ if (cred == NULL) { -+ cred = nfs4_get_clid_cred(clp); -+ if (cred == NULL) -+ return -ENOKEY; ++ if (!worker) ++ return; ++ if (!(worker->flags & IO_WORKER_F_UP)) ++ return; ++ if (worker->flags & IO_WORKER_F_RUNNING) ++ return; ++ worker->flags |= IO_WORKER_F_RUNNING; ++ io_wqe_inc_running(worker); ++} ++ ++/* ++ * Called when worker is going to sleep. If there are no workers currently ++ * running and we have work pending, wake up a free one or create a new one. ++ */ ++void io_wq_worker_sleeping(struct task_struct *tsk) ++{ ++ struct io_worker *worker = tsk->pf_io_worker; ++ ++ if (!worker) ++ return; ++ if (!(worker->flags & IO_WORKER_F_UP)) ++ return; ++ if (!(worker->flags & IO_WORKER_F_RUNNING)) ++ return; ++ ++ worker->flags &= ~IO_WORKER_F_RUNNING; ++ ++ raw_spin_lock(&worker->wqe->lock); ++ io_wqe_dec_running(worker); ++ raw_spin_unlock(&worker->wqe->lock); ++} ++ ++static void io_init_new_worker(struct io_wqe *wqe, struct io_worker *worker, ++ struct task_struct *tsk) ++{ ++ tsk->pf_io_worker = worker; ++ worker->task = tsk; ++ set_cpus_allowed_ptr(tsk, wqe->cpu_mask); ++ tsk->flags |= PF_NO_SETAFFINITY; ++ ++ raw_spin_lock(&wqe->lock); ++ hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); ++ list_add_tail_rcu(&worker->all_list, &wqe->all_list); ++ worker->flags |= IO_WORKER_F_FREE; ++ raw_spin_unlock(&wqe->lock); ++ wake_up_new_task(tsk); ++} ++ ++static bool io_wq_work_match_all(struct io_wq_work *work, void *data) ++{ ++ return true; ++} ++ ++static inline bool io_should_retry_thread(long err) ++{ ++ /* ++ * Prevent perpetual task_work retry, if the task (or its group) is ++ * exiting. ++ */ ++ if (fatal_signal_pending(current)) ++ return false; ++ ++ switch (err) { ++ case -EAGAIN: ++ case -ERESTARTSYS: ++ case -ERESTARTNOINTR: ++ case -ERESTARTNOHAND: ++ return true; ++ default: ++ return false; + } ++} + -+ page = alloc_page(GFP_KERNEL); -+ locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL); -+ if (page == NULL || locations == NULL) -+ goto out; ++static void create_worker_cont(struct callback_head *cb) ++{ ++ struct io_worker *worker; ++ struct task_struct *tsk; ++ struct io_wqe *wqe; + -+ status = nfs4_proc_get_locations(server, fhandle, locations, page, -+ cred); -+ if (status) -+ goto out; -+out: -+ if (page) -+ __free_page(page); -+ kfree(locations); -+ return status; ++ worker = container_of(cb, struct io_worker, create_work); ++ clear_bit_unlock(0, &worker->create_state); ++ wqe = worker->wqe; ++ tsk = create_io_thread(io_wqe_worker, worker, wqe->node); ++ if (!IS_ERR(tsk)) { ++ io_init_new_worker(wqe, worker, tsk); ++ io_worker_release(worker); ++ return; ++ } else if (!io_should_retry_thread(PTR_ERR(tsk))) { ++ struct io_wqe_acct *acct = io_wqe_get_acct(worker); ++ ++ atomic_dec(&acct->nr_running); ++ raw_spin_lock(&wqe->lock); ++ acct->nr_workers--; ++ if (!acct->nr_workers) { ++ struct io_cb_cancel_data match = { ++ .fn = io_wq_work_match_all, ++ .cancel_all = true, ++ }; ++ ++ while (io_acct_cancel_pending_work(wqe, acct, &match)) ++ raw_spin_lock(&wqe->lock); ++ } ++ raw_spin_unlock(&wqe->lock); ++ io_worker_ref_put(wqe->wq); ++ kfree(worker); ++ return; ++ } ++ ++ /* re-create attempts grab a new worker ref, drop the existing one */ ++ io_worker_release(worker); ++ schedule_work(&worker->work); +} + -+static int nfs4_discover_trunking(struct nfs_server *server, -+ struct nfs_fh *fhandle) ++static void io_workqueue_create(struct work_struct *work) +{ -+ struct nfs4_exception exception = { -+ .interruptible = true, -+ }; -+ struct nfs_client *clp = server->nfs_client; -+ int err = 0; ++ struct io_worker *worker = container_of(work, struct io_worker, work); ++ struct io_wqe_acct *acct = io_wqe_get_acct(worker); ++ ++ if (!io_queue_worker_create(worker, acct, create_worker_cont)) ++ kfree(worker); ++} ++ ++static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) ++{ ++ struct io_wqe_acct *acct = &wqe->acct[index]; ++ struct io_worker *worker; ++ struct task_struct *tsk; ++ ++ __set_current_state(TASK_RUNNING); ++ ++ worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, wqe->node); ++ if (!worker) { ++fail: ++ atomic_dec(&acct->nr_running); ++ raw_spin_lock(&wqe->lock); ++ acct->nr_workers--; ++ raw_spin_unlock(&wqe->lock); ++ io_worker_ref_put(wq); ++ return false; ++ } ++ ++ refcount_set(&worker->ref, 1); ++ worker->wqe = wqe; ++ spin_lock_init(&worker->lock); ++ init_completion(&worker->ref_done); ++ ++ if (index == IO_WQ_ACCT_BOUND) ++ worker->flags |= IO_WORKER_F_BOUND; ++ ++ tsk = create_io_thread(io_wqe_worker, worker, wqe->node); ++ if (!IS_ERR(tsk)) { ++ io_init_new_worker(wqe, worker, tsk); ++ } else if (!io_should_retry_thread(PTR_ERR(tsk))) { ++ kfree(worker); ++ goto fail; ++ } else { ++ INIT_WORK(&worker->work, io_workqueue_create); ++ schedule_work(&worker->work); ++ } ++ ++ return true; ++} ++ ++/* ++ * Iterate the passed in list and call the specific function for each ++ * worker that isn't exiting ++ */ ++static bool io_wq_for_each_worker(struct io_wqe *wqe, ++ bool (*func)(struct io_worker *, void *), ++ void *data) ++{ ++ struct io_worker *worker; ++ bool ret = false; ++ ++ list_for_each_entry_rcu(worker, &wqe->all_list, all_list) { ++ if (io_worker_get(worker)) { ++ /* no task if node is/was offline */ ++ if (worker->task) ++ ret = func(worker, data); ++ io_worker_release(worker); ++ if (ret) ++ break; ++ } ++ } ++ ++ return ret; ++} ++ ++static bool io_wq_worker_wake(struct io_worker *worker, void *data) ++{ ++ set_notify_signal(worker->task); ++ wake_up_process(worker->task); ++ return false; ++} ++ ++static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe) ++{ ++ struct io_wq *wq = wqe->wq; + -+ if (!nfs4_has_session(clp)) -+ goto out; + do { -+ err = nfs4_handle_exception(server, -+ _nfs4_discover_trunking(server, fhandle), -+ &exception); -+ } while (exception.retry); -+out: -+ return err; ++ work->flags |= IO_WQ_WORK_CANCEL; ++ wq->do_work(work); ++ work = wq->free_work(work); ++ } while (work); +} + - static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info) - { -@@ -4317,7 +4384,7 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, - }; - unsigned short task_flags = 0; - -- if (server->nfs_client->cl_minorversion) -+ if (nfs_server_capable(dir, NFS_CAP_MOVEABLE)) - task_flags = RPC_TASK_MOVEABLE; - - /* Is this is an attribute revalidation, subject to softreval? */ -@@ -5836,7 +5903,7 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu - buflen = server->rsize; - - npages = DIV_ROUND_UP(buflen, PAGE_SIZE) + 1; -- pages = kmalloc_array(npages, sizeof(struct page *), GFP_NOFS); -+ pages = kmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); - if (!pages) - return -ENOMEM; - -@@ -6502,7 +6569,9 @@ static void nfs4_delegreturn_release(void *calldata) - pnfs_roc_release(&data->lr.arg, &data->lr.res, - data->res.lr_ret); - if (inode) { -- nfs_post_op_update_inode_force_wcc(inode, &data->fattr); -+ nfs4_fattr_set_prechange(&data->fattr, -+ inode_peek_iversion_raw(inode)); -+ nfs_refresh_inode(inode, &data->fattr); - nfs_iput_and_deactive(inode); - } - kfree(calldata); -@@ -6551,11 +6620,14 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, - .rpc_client = server->client, - .rpc_message = &msg, - .callback_ops = &nfs4_delegreturn_ops, -- .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT | RPC_TASK_MOVEABLE, -+ .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT, - }; - int status = 0; - -- data = kzalloc(sizeof(*data), GFP_NOFS); -+ if (nfs_server_capable(inode, NFS_CAP_MOVEABLE)) -+ task_setup_data.flags |= RPC_TASK_MOVEABLE; ++static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work) ++{ ++ struct io_wqe_acct *acct = io_work_get_acct(wqe, work); ++ unsigned int hash; ++ struct io_wq_work *tail; + -+ data = kzalloc(sizeof(*data), GFP_KERNEL); - if (data == NULL) - return -ENOMEM; - -@@ -6744,7 +6816,7 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, - struct nfs4_state *state = lsp->ls_state; - struct inode *inode = state->inode; - -- p = kzalloc(sizeof(*p), GFP_NOFS); -+ p = kzalloc(sizeof(*p), GFP_KERNEL); - if (p == NULL) - return NULL; - p->arg.fh = NFS_FH(inode); -@@ -6869,10 +6941,8 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, - .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC, - }; -- struct nfs_client *client = -- NFS_SERVER(lsp->ls_state->inode)->nfs_client; - -- if (client->cl_minorversion) -+ if (nfs_server_capable(lsp->ls_state->inode, NFS_CAP_MOVEABLE)) - task_setup_data.flags |= RPC_TASK_MOVEABLE; - - nfs4_state_protect(NFS_SERVER(lsp->ls_state->inode)->nfs_client, -@@ -7148,15 +7218,13 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f - .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF, - }; - int ret; -- struct nfs_client *client = NFS_SERVER(state->inode)->nfs_client; - -- if (client->cl_minorversion) -+ if (nfs_server_capable(state->inode, NFS_CAP_MOVEABLE)) - task_setup_data.flags |= RPC_TASK_MOVEABLE; - - dprintk("%s: begin!\n", __func__); - data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file), -- fl->fl_u.nfs4_fl.owner, -- recovery_type == NFS_LOCK_NEW ? GFP_KERNEL : GFP_NOFS); -+ fl->fl_u.nfs4_fl.owner, GFP_KERNEL); - if (data == NULL) - return -ENOMEM; - if (IS_SETLKW(cmd)) -@@ -7579,7 +7647,7 @@ nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp) - if (server->nfs_client->cl_mvops->minor_version != 0) - return; - -- data = kmalloc(sizeof(*data), GFP_NOFS); -+ data = kmalloc(sizeof(*data), GFP_KERNEL); - if (!data) - return; - data->lsp = lsp; -@@ -7676,7 +7744,7 @@ static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler, - const char *key, const void *buf, - size_t buflen, int flags) - { -- struct nfs_access_entry cache; -+ u32 mask; - int ret; - - if (!nfs_server_capable(inode, NFS_CAP_XATTR)) -@@ -7691,8 +7759,8 @@ static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler, - * do a cached access check for the XA* flags to possibly avoid - * doing an RPC and getting EACCES back. - */ -- if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) { -- if (!(cache.mask & NFS_ACCESS_XAWRITE)) -+ if (!nfs_access_get_cached(inode, current_cred(), &mask, true)) { -+ if (!(mask & NFS_ACCESS_XAWRITE)) - return -EACCES; - } - -@@ -7713,14 +7781,14 @@ static int nfs4_xattr_get_nfs4_user(const struct xattr_handler *handler, - struct dentry *unused, struct inode *inode, - const char *key, void *buf, size_t buflen) - { -- struct nfs_access_entry cache; -+ u32 mask; - ssize_t ret; - - if (!nfs_server_capable(inode, NFS_CAP_XATTR)) - return -EOPNOTSUPP; - -- if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) { -- if (!(cache.mask & NFS_ACCESS_XAREAD)) -+ if (!nfs_access_get_cached(inode, current_cred(), &mask, true)) { -+ if (!(mask & NFS_ACCESS_XAREAD)) - return -EACCES; - } - -@@ -7745,13 +7813,13 @@ nfs4_listxattr_nfs4_user(struct inode *inode, char *list, size_t list_len) - ssize_t ret, size; - char *buf; - size_t buflen; -- struct nfs_access_entry cache; -+ u32 mask; - - if (!nfs_server_capable(inode, NFS_CAP_XATTR)) - return 0; - -- if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) { -- if (!(cache.mask & NFS_ACCESS_XALIST)) -+ if (!nfs_access_get_cached(inode, current_cred(), &mask, true)) { -+ if (!(mask & NFS_ACCESS_XALIST)) - return 0; - } - -@@ -7883,18 +7951,18 @@ int nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir, - * appended to this compound to identify the client ID which is - * performing recovery. - */ --static int _nfs40_proc_get_locations(struct inode *inode, -+static int _nfs40_proc_get_locations(struct nfs_server *server, -+ struct nfs_fh *fhandle, - struct nfs4_fs_locations *locations, - struct page *page, const struct cred *cred) - { -- struct nfs_server *server = NFS_SERVER(inode); - struct rpc_clnt *clnt = server->client; - u32 bitmask[2] = { - [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS, - }; - struct nfs4_fs_locations_arg args = { - .clientid = server->nfs_client->cl_clientid, -- .fh = NFS_FH(inode), -+ .fh = fhandle, - .page = page, - .bitmask = bitmask, - .migration = 1, /* skip LOOKUP */ -@@ -7940,17 +8008,17 @@ static int _nfs40_proc_get_locations(struct inode *inode, - * When the client supports GETATTR(fs_locations_info), it can - * be plumbed in here. - */ --static int _nfs41_proc_get_locations(struct inode *inode, -+static int _nfs41_proc_get_locations(struct nfs_server *server, -+ struct nfs_fh *fhandle, - struct nfs4_fs_locations *locations, - struct page *page, const struct cred *cred) - { -- struct nfs_server *server = NFS_SERVER(inode); - struct rpc_clnt *clnt = server->client; - u32 bitmask[2] = { - [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS, - }; - struct nfs4_fs_locations_arg args = { -- .fh = NFS_FH(inode), -+ .fh = fhandle, - .page = page, - .bitmask = bitmask, - .migration = 1, /* skip LOOKUP */ -@@ -7999,11 +8067,11 @@ static int _nfs41_proc_get_locations(struct inode *inode, - * -NFS4ERR_LEASE_MOVED is returned if the server still has leases - * from this client that require migration recovery. - */ --int nfs4_proc_get_locations(struct inode *inode, -+int nfs4_proc_get_locations(struct nfs_server *server, -+ struct nfs_fh *fhandle, - struct nfs4_fs_locations *locations, - struct page *page, const struct cred *cred) - { -- struct nfs_server *server = NFS_SERVER(inode); - struct nfs_client *clp = server->nfs_client; - const struct nfs4_mig_recovery_ops *ops = - clp->cl_mvops->mig_recovery_ops; -@@ -8016,10 +8084,11 @@ int nfs4_proc_get_locations(struct inode *inode, - (unsigned long long)server->fsid.major, - (unsigned long long)server->fsid.minor, - clp->cl_hostname); -- nfs_display_fhandle(NFS_FH(inode), __func__); -+ nfs_display_fhandle(fhandle, __func__); - - do { -- status = ops->get_locations(inode, locations, page, cred); -+ status = ops->get_locations(server, fhandle, locations, page, -+ cred); - if (status != -NFS4ERR_DELAY) - break; - nfs4_handle_exception(server, status, &exception); -@@ -8284,6 +8353,7 @@ nfs4_bind_one_conn_to_session_done(struct rpc_task *task, void *calldata) - case -NFS4ERR_DEADSESSION: - nfs4_schedule_session_recovery(clp->cl_session, - task->tk_status); ++ if (!io_wq_is_hashed(work)) { ++append: ++ wq_list_add_tail(&work->list, &acct->work_list); + return; - } - if (args->dir == NFS4_CDFC4_FORE_OR_BOTH && - res->dir != NFS4_CDFS4_BOTH) { -@@ -9254,7 +9324,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, - goto out_err; - - ret = ERR_PTR(-ENOMEM); -- calldata = kzalloc(sizeof(*calldata), GFP_NOFS); -+ calldata = kzalloc(sizeof(*calldata), GFP_KERNEL); - if (calldata == NULL) - goto out_put_clp; - nfs4_init_sequence(&calldata->args, &calldata->res, 0, is_privileged); -@@ -9339,6 +9409,9 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf - rpc_delay(task, NFS4_POLL_RETRY_MAX); - fallthrough; - case -NFS4ERR_RETRY_UNCACHED_REP: -+ case -EACCES: -+ dprintk("%s: failed to reclaim complete error %d for server %s, retrying\n", -+ __func__, task->tk_status, clp->cl_hostname); - return -EAGAIN; - case -NFS4ERR_BADSESSION: - case -NFS4ERR_DEADSESSION: -@@ -10198,7 +10271,7 @@ static int nfs41_free_stateid(struct nfs_server *server, - &task_setup.rpc_client, &msg); - - dprintk("NFS call free_stateid %p\n", stateid); -- data = kmalloc(sizeof(*data), GFP_NOFS); -+ data = kmalloc(sizeof(*data), GFP_KERNEL); - if (!data) - return -ENOMEM; - data->server = server; -@@ -10347,7 +10420,8 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { - | NFS_CAP_POSIX_LOCK - | NFS_CAP_STATEID_NFSV41 - | NFS_CAP_ATOMIC_OPEN_V1 -- | NFS_CAP_LGOPEN, -+ | NFS_CAP_LGOPEN -+ | NFS_CAP_MOVEABLE, - .init_client = nfs41_init_client, - .shutdown_client = nfs41_shutdown_client, - .match_stateid = nfs41_match_stateid, -@@ -10382,7 +10456,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { - | NFS_CAP_LAYOUTSTATS - | NFS_CAP_CLONE - | NFS_CAP_LAYOUTERROR -- | NFS_CAP_READ_PLUS, -+ | NFS_CAP_READ_PLUS -+ | NFS_CAP_MOVEABLE, - .init_client = nfs41_init_client, - .shutdown_client = nfs41_shutdown_client, - .match_stateid = nfs41_match_stateid, -@@ -10513,6 +10588,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { - .free_client = nfs4_free_client, - .create_server = nfs4_create_server, - .clone_server = nfs_clone_server, -+ .discover_trunking = nfs4_discover_trunking, - }; - - static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { -diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c -index f22818a80c2c7..83c88b54d712a 100644 ---- a/fs/nfs/nfs4state.c -+++ b/fs/nfs/nfs4state.c -@@ -49,6 +49,7 @@ - #include <linux/workqueue.h> - #include <linux/bitops.h> - #include <linux/jiffies.h> ++ } ++ ++ hash = io_get_work_hash(work); ++ tail = wqe->hash_tail[hash]; ++ wqe->hash_tail[hash] = work; ++ if (!tail) ++ goto append; ++ ++ wq_list_add_after(&work->list, &tail->list, &acct->work_list); ++} ++ ++static bool io_wq_work_match_item(struct io_wq_work *work, void *data) ++{ ++ return work == data; ++} ++ ++static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) ++{ ++ struct io_wqe_acct *acct = io_work_get_acct(wqe, work); ++ unsigned work_flags = work->flags; ++ bool do_create; ++ ++ /* ++ * If io-wq is exiting for this task, or if the request has explicitly ++ * been marked as one that should not get executed, cancel it here. ++ */ ++ if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state) || ++ (work->flags & IO_WQ_WORK_CANCEL)) { ++ io_run_cancel(work, wqe); ++ return; ++ } ++ ++ raw_spin_lock(&wqe->lock); ++ io_wqe_insert_work(wqe, work); ++ clear_bit(IO_ACCT_STALLED_BIT, &acct->flags); ++ ++ rcu_read_lock(); ++ do_create = !io_wqe_activate_free_worker(wqe, acct); ++ rcu_read_unlock(); ++ ++ raw_spin_unlock(&wqe->lock); ++ ++ if (do_create && ((work_flags & IO_WQ_WORK_CONCURRENT) || ++ !atomic_read(&acct->nr_running))) { ++ bool did_create; ++ ++ did_create = io_wqe_create_worker(wqe, acct); ++ if (likely(did_create)) ++ return; ++ ++ raw_spin_lock(&wqe->lock); ++ /* fatal condition, failed to create the first worker */ ++ if (!acct->nr_workers) { ++ struct io_cb_cancel_data match = { ++ .fn = io_wq_work_match_item, ++ .data = work, ++ .cancel_all = false, ++ }; ++ ++ if (io_acct_cancel_pending_work(wqe, acct, &match)) ++ raw_spin_lock(&wqe->lock); ++ } ++ raw_spin_unlock(&wqe->lock); ++ } ++} ++ ++void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work) ++{ ++ struct io_wqe *wqe = wq->wqes[numa_node_id()]; ++ ++ io_wqe_enqueue(wqe, work); ++} ++ ++/* ++ * Work items that hash to the same value will not be done in parallel. ++ * Used to limit concurrent writes, generally hashed by inode. ++ */ ++void io_wq_hash_work(struct io_wq_work *work, void *val) ++{ ++ unsigned int bit; ++ ++ bit = hash_ptr(val, IO_WQ_HASH_ORDER); ++ work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT)); ++} ++ ++static bool io_wq_worker_cancel(struct io_worker *worker, void *data) ++{ ++ struct io_cb_cancel_data *match = data; ++ ++ /* ++ * Hold the lock to avoid ->cur_work going out of scope, caller ++ * may dereference the passed in work. ++ */ ++ spin_lock(&worker->lock); ++ if (worker->cur_work && ++ match->fn(worker->cur_work, match->data)) { ++ set_notify_signal(worker->task); ++ match->nr_running++; ++ } ++ spin_unlock(&worker->lock); ++ ++ return match->nr_running && !match->cancel_all; ++} ++ ++static inline void io_wqe_remove_pending(struct io_wqe *wqe, ++ struct io_wq_work *work, ++ struct io_wq_work_node *prev) ++{ ++ struct io_wqe_acct *acct = io_work_get_acct(wqe, work); ++ unsigned int hash = io_get_work_hash(work); ++ struct io_wq_work *prev_work = NULL; ++ ++ if (io_wq_is_hashed(work) && work == wqe->hash_tail[hash]) { ++ if (prev) ++ prev_work = container_of(prev, struct io_wq_work, list); ++ if (prev_work && io_get_work_hash(prev_work) == hash) ++ wqe->hash_tail[hash] = prev_work; ++ else ++ wqe->hash_tail[hash] = NULL; ++ } ++ wq_list_del(&acct->work_list, &work->list, prev); ++} ++ ++static bool io_acct_cancel_pending_work(struct io_wqe *wqe, ++ struct io_wqe_acct *acct, ++ struct io_cb_cancel_data *match) ++ __releases(wqe->lock) ++{ ++ struct io_wq_work_node *node, *prev; ++ struct io_wq_work *work; ++ ++ wq_list_for_each(node, prev, &acct->work_list) { ++ work = container_of(node, struct io_wq_work, list); ++ if (!match->fn(work, match->data)) ++ continue; ++ io_wqe_remove_pending(wqe, work, prev); ++ raw_spin_unlock(&wqe->lock); ++ io_run_cancel(work, wqe); ++ match->nr_pending++; ++ /* not safe to continue after unlock */ ++ return true; ++ } ++ ++ return false; ++} ++ ++static void io_wqe_cancel_pending_work(struct io_wqe *wqe, ++ struct io_cb_cancel_data *match) ++{ ++ int i; ++retry: ++ raw_spin_lock(&wqe->lock); ++ for (i = 0; i < IO_WQ_ACCT_NR; i++) { ++ struct io_wqe_acct *acct = io_get_acct(wqe, i == 0); ++ ++ if (io_acct_cancel_pending_work(wqe, acct, match)) { ++ if (match->cancel_all) ++ goto retry; ++ return; ++ } ++ } ++ raw_spin_unlock(&wqe->lock); ++} ++ ++static void io_wqe_cancel_running_work(struct io_wqe *wqe, ++ struct io_cb_cancel_data *match) ++{ ++ rcu_read_lock(); ++ io_wq_for_each_worker(wqe, io_wq_worker_cancel, match); ++ rcu_read_unlock(); ++} ++ ++enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, ++ void *data, bool cancel_all) ++{ ++ struct io_cb_cancel_data match = { ++ .fn = cancel, ++ .data = data, ++ .cancel_all = cancel_all, ++ }; ++ int node; ++ ++ /* ++ * First check pending list, if we're lucky we can just remove it ++ * from there. CANCEL_OK means that the work is returned as-new, ++ * no completion will be posted for it. ++ */ ++ for_each_node(node) { ++ struct io_wqe *wqe = wq->wqes[node]; ++ ++ io_wqe_cancel_pending_work(wqe, &match); ++ if (match.nr_pending && !match.cancel_all) ++ return IO_WQ_CANCEL_OK; ++ } ++ ++ /* ++ * Now check if a free (going busy) or busy worker has the work ++ * currently running. If we find it there, we'll return CANCEL_RUNNING ++ * as an indication that we attempt to signal cancellation. The ++ * completion will run normally in this case. ++ */ ++ for_each_node(node) { ++ struct io_wqe *wqe = wq->wqes[node]; ++ ++ io_wqe_cancel_running_work(wqe, &match); ++ if (match.nr_running && !match.cancel_all) ++ return IO_WQ_CANCEL_RUNNING; ++ } ++ ++ if (match.nr_running) ++ return IO_WQ_CANCEL_RUNNING; ++ if (match.nr_pending) ++ return IO_WQ_CANCEL_OK; ++ return IO_WQ_CANCEL_NOTFOUND; ++} ++ ++static int io_wqe_hash_wake(struct wait_queue_entry *wait, unsigned mode, ++ int sync, void *key) ++{ ++ struct io_wqe *wqe = container_of(wait, struct io_wqe, wait); ++ int i; ++ ++ list_del_init(&wait->entry); ++ ++ rcu_read_lock(); ++ for (i = 0; i < IO_WQ_ACCT_NR; i++) { ++ struct io_wqe_acct *acct = &wqe->acct[i]; ++ ++ if (test_and_clear_bit(IO_ACCT_STALLED_BIT, &acct->flags)) ++ io_wqe_activate_free_worker(wqe, acct); ++ } ++ rcu_read_unlock(); ++ return 1; ++} ++ ++struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) ++{ ++ int ret, node, i; ++ struct io_wq *wq; ++ ++ if (WARN_ON_ONCE(!data->free_work || !data->do_work)) ++ return ERR_PTR(-EINVAL); ++ if (WARN_ON_ONCE(!bounded)) ++ return ERR_PTR(-EINVAL); ++ ++ wq = kzalloc(struct_size(wq, wqes, nr_node_ids), GFP_KERNEL); ++ if (!wq) ++ return ERR_PTR(-ENOMEM); ++ ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node); ++ if (ret) ++ goto err_wq; ++ ++ refcount_inc(&data->hash->refs); ++ wq->hash = data->hash; ++ wq->free_work = data->free_work; ++ wq->do_work = data->do_work; ++ ++ ret = -ENOMEM; ++ for_each_node(node) { ++ struct io_wqe *wqe; ++ int alloc_node = node; ++ ++ if (!node_online(alloc_node)) ++ alloc_node = NUMA_NO_NODE; ++ wqe = kzalloc_node(sizeof(struct io_wqe), GFP_KERNEL, alloc_node); ++ if (!wqe) ++ goto err; ++ wq->wqes[node] = wqe; ++ if (!alloc_cpumask_var(&wqe->cpu_mask, GFP_KERNEL)) ++ goto err; ++ cpumask_copy(wqe->cpu_mask, cpumask_of_node(node)); ++ wqe->node = alloc_node; ++ wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded; ++ wqe->acct[IO_WQ_ACCT_UNBOUND].max_workers = ++ task_rlimit(current, RLIMIT_NPROC); ++ INIT_LIST_HEAD(&wqe->wait.entry); ++ wqe->wait.func = io_wqe_hash_wake; ++ for (i = 0; i < IO_WQ_ACCT_NR; i++) { ++ struct io_wqe_acct *acct = &wqe->acct[i]; ++ ++ acct->index = i; ++ atomic_set(&acct->nr_running, 0); ++ INIT_WQ_LIST(&acct->work_list); ++ } ++ wqe->wq = wq; ++ raw_spin_lock_init(&wqe->lock); ++ INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0); ++ INIT_LIST_HEAD(&wqe->all_list); ++ } ++ ++ wq->task = get_task_struct(data->task); ++ atomic_set(&wq->worker_refs, 1); ++ init_completion(&wq->worker_done); ++ return wq; ++err: ++ io_wq_put_hash(data->hash); ++ cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node); ++ for_each_node(node) { ++ if (!wq->wqes[node]) ++ continue; ++ free_cpumask_var(wq->wqes[node]->cpu_mask); ++ kfree(wq->wqes[node]); ++ } ++err_wq: ++ kfree(wq); ++ return ERR_PTR(ret); ++} ++ ++static bool io_task_work_match(struct callback_head *cb, void *data) ++{ ++ struct io_worker *worker; ++ ++ if (cb->func != create_worker_cb && cb->func != create_worker_cont) ++ return false; ++ worker = container_of(cb, struct io_worker, create_work); ++ return worker->wqe->wq == data; ++} ++ ++void io_wq_exit_start(struct io_wq *wq) ++{ ++ set_bit(IO_WQ_BIT_EXIT, &wq->state); ++} ++ ++static void io_wq_cancel_tw_create(struct io_wq *wq) ++{ ++ struct callback_head *cb; ++ ++ while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) { ++ struct io_worker *worker; ++ ++ worker = container_of(cb, struct io_worker, create_work); ++ io_worker_cancel_cb(worker); ++ /* ++ * Only the worker continuation helper has worker allocated and ++ * hence needs freeing. ++ */ ++ if (cb->func == create_worker_cont) ++ kfree(worker); ++ } ++} ++ ++static void io_wq_exit_workers(struct io_wq *wq) ++{ ++ int node; ++ ++ if (!wq->task) ++ return; ++ ++ io_wq_cancel_tw_create(wq); ++ ++ rcu_read_lock(); ++ for_each_node(node) { ++ struct io_wqe *wqe = wq->wqes[node]; ++ ++ io_wq_for_each_worker(wqe, io_wq_worker_wake, NULL); ++ } ++ rcu_read_unlock(); ++ io_worker_ref_put(wq); ++ wait_for_completion(&wq->worker_done); ++ ++ for_each_node(node) { ++ spin_lock_irq(&wq->hash->wait.lock); ++ list_del_init(&wq->wqes[node]->wait.entry); ++ spin_unlock_irq(&wq->hash->wait.lock); ++ } ++ put_task_struct(wq->task); ++ wq->task = NULL; ++} ++ ++static void io_wq_destroy(struct io_wq *wq) ++{ ++ int node; ++ ++ cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node); ++ ++ for_each_node(node) { ++ struct io_wqe *wqe = wq->wqes[node]; ++ struct io_cb_cancel_data match = { ++ .fn = io_wq_work_match_all, ++ .cancel_all = true, ++ }; ++ io_wqe_cancel_pending_work(wqe, &match); ++ free_cpumask_var(wqe->cpu_mask); ++ kfree(wqe); ++ } ++ io_wq_put_hash(wq->hash); ++ kfree(wq); ++} ++ ++void io_wq_put_and_exit(struct io_wq *wq) ++{ ++ WARN_ON_ONCE(!test_bit(IO_WQ_BIT_EXIT, &wq->state)); ++ ++ io_wq_exit_workers(wq); ++ io_wq_destroy(wq); ++} ++ ++struct online_data { ++ unsigned int cpu; ++ bool online; ++}; ++ ++static bool io_wq_worker_affinity(struct io_worker *worker, void *data) ++{ ++ struct online_data *od = data; ++ ++ if (od->online) ++ cpumask_set_cpu(od->cpu, worker->wqe->cpu_mask); ++ else ++ cpumask_clear_cpu(od->cpu, worker->wqe->cpu_mask); ++ return false; ++} ++ ++static int __io_wq_cpu_online(struct io_wq *wq, unsigned int cpu, bool online) ++{ ++ struct online_data od = { ++ .cpu = cpu, ++ .online = online ++ }; ++ int i; ++ ++ rcu_read_lock(); ++ for_each_node(i) ++ io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, &od); ++ rcu_read_unlock(); ++ return 0; ++} ++ ++static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node) ++{ ++ struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node); ++ ++ return __io_wq_cpu_online(wq, cpu, true); ++} ++ ++static int io_wq_cpu_offline(unsigned int cpu, struct hlist_node *node) ++{ ++ struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node); ++ ++ return __io_wq_cpu_online(wq, cpu, false); ++} ++ ++int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask) ++{ ++ int i; ++ ++ rcu_read_lock(); ++ for_each_node(i) { ++ struct io_wqe *wqe = wq->wqes[i]; ++ ++ if (mask) ++ cpumask_copy(wqe->cpu_mask, mask); ++ else ++ cpumask_copy(wqe->cpu_mask, cpumask_of_node(i)); ++ } ++ rcu_read_unlock(); ++ return 0; ++} ++ ++/* ++ * Set max number of unbounded workers, returns old value. If new_count is 0, ++ * then just return the old value. ++ */ ++int io_wq_max_workers(struct io_wq *wq, int *new_count) ++{ ++ int prev[IO_WQ_ACCT_NR]; ++ bool first_node = true; ++ int i, node; ++ ++ BUILD_BUG_ON((int) IO_WQ_ACCT_BOUND != (int) IO_WQ_BOUND); ++ BUILD_BUG_ON((int) IO_WQ_ACCT_UNBOUND != (int) IO_WQ_UNBOUND); ++ BUILD_BUG_ON((int) IO_WQ_ACCT_NR != 2); ++ ++ for (i = 0; i < 2; i++) { ++ if (new_count[i] > task_rlimit(current, RLIMIT_NPROC)) ++ new_count[i] = task_rlimit(current, RLIMIT_NPROC); ++ } ++ ++ for (i = 0; i < IO_WQ_ACCT_NR; i++) ++ prev[i] = 0; ++ ++ rcu_read_lock(); ++ for_each_node(node) { ++ struct io_wqe *wqe = wq->wqes[node]; ++ struct io_wqe_acct *acct; ++ ++ raw_spin_lock(&wqe->lock); ++ for (i = 0; i < IO_WQ_ACCT_NR; i++) { ++ acct = &wqe->acct[i]; ++ if (first_node) ++ prev[i] = max_t(int, acct->max_workers, prev[i]); ++ if (new_count[i]) ++ acct->max_workers = new_count[i]; ++ } ++ raw_spin_unlock(&wqe->lock); ++ first_node = false; ++ } ++ rcu_read_unlock(); ++ ++ for (i = 0; i < IO_WQ_ACCT_NR; i++) ++ new_count[i] = prev[i]; ++ ++ return 0; ++} ++ ++static __init int io_wq_init(void) ++{ ++ int ret; ++ ++ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "io-wq/online", ++ io_wq_cpu_online, io_wq_cpu_offline); ++ if (ret < 0) ++ return ret; ++ io_wq_online = ret; ++ return 0; ++} ++subsys_initcall(io_wq_init); +diff --git a/io_uring/io-wq.h b/io_uring/io-wq.h +new file mode 100644 +index 0000000000000..bf5c4c5337605 +--- /dev/null ++++ b/io_uring/io-wq.h +@@ -0,0 +1,160 @@ ++#ifndef INTERNAL_IO_WQ_H ++#define INTERNAL_IO_WQ_H ++ ++#include <linux/refcount.h> ++ ++struct io_wq; ++ ++enum { ++ IO_WQ_WORK_CANCEL = 1, ++ IO_WQ_WORK_HASHED = 2, ++ IO_WQ_WORK_UNBOUND = 4, ++ IO_WQ_WORK_CONCURRENT = 16, ++ ++ IO_WQ_HASH_SHIFT = 24, /* upper 8 bits are used for hash key */ ++}; ++ ++enum io_wq_cancel { ++ IO_WQ_CANCEL_OK, /* cancelled before started */ ++ IO_WQ_CANCEL_RUNNING, /* found, running, and attempted cancelled */ ++ IO_WQ_CANCEL_NOTFOUND, /* work not found */ ++}; ++ ++struct io_wq_work_node { ++ struct io_wq_work_node *next; ++}; ++ ++struct io_wq_work_list { ++ struct io_wq_work_node *first; ++ struct io_wq_work_node *last; ++}; ++ ++static inline void wq_list_add_after(struct io_wq_work_node *node, ++ struct io_wq_work_node *pos, ++ struct io_wq_work_list *list) ++{ ++ struct io_wq_work_node *next = pos->next; ++ ++ pos->next = node; ++ node->next = next; ++ if (!next) ++ list->last = node; ++} ++ ++static inline void wq_list_add_tail(struct io_wq_work_node *node, ++ struct io_wq_work_list *list) ++{ ++ node->next = NULL; ++ if (!list->first) { ++ list->last = node; ++ WRITE_ONCE(list->first, node); ++ } else { ++ list->last->next = node; ++ list->last = node; ++ } ++} ++ ++static inline void wq_list_cut(struct io_wq_work_list *list, ++ struct io_wq_work_node *last, ++ struct io_wq_work_node *prev) ++{ ++ /* first in the list, if prev==NULL */ ++ if (!prev) ++ WRITE_ONCE(list->first, last->next); ++ else ++ prev->next = last->next; ++ ++ if (last == list->last) ++ list->last = prev; ++ last->next = NULL; ++} ++ ++static inline void wq_list_del(struct io_wq_work_list *list, ++ struct io_wq_work_node *node, ++ struct io_wq_work_node *prev) ++{ ++ wq_list_cut(list, node, prev); ++} ++ ++#define wq_list_for_each(pos, prv, head) \ ++ for (pos = (head)->first, prv = NULL; pos; prv = pos, pos = (pos)->next) ++ ++#define wq_list_empty(list) (READ_ONCE((list)->first) == NULL) ++#define INIT_WQ_LIST(list) do { \ ++ (list)->first = NULL; \ ++ (list)->last = NULL; \ ++} while (0) ++ ++struct io_wq_work { ++ struct io_wq_work_node list; ++ unsigned flags; ++}; ++ ++static inline struct io_wq_work *wq_next_work(struct io_wq_work *work) ++{ ++ if (!work->list.next) ++ return NULL; ++ ++ return container_of(work->list.next, struct io_wq_work, list); ++} ++ ++typedef struct io_wq_work *(free_work_fn)(struct io_wq_work *); ++typedef void (io_wq_work_fn)(struct io_wq_work *); ++ ++struct io_wq_hash { ++ refcount_t refs; ++ unsigned long map; ++ struct wait_queue_head wait; ++}; ++ ++static inline void io_wq_put_hash(struct io_wq_hash *hash) ++{ ++ if (refcount_dec_and_test(&hash->refs)) ++ kfree(hash); ++} ++ ++struct io_wq_data { ++ struct io_wq_hash *hash; ++ struct task_struct *task; ++ io_wq_work_fn *do_work; ++ free_work_fn *free_work; ++}; ++ ++struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data); ++void io_wq_exit_start(struct io_wq *wq); ++void io_wq_put_and_exit(struct io_wq *wq); ++ ++void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work); ++void io_wq_hash_work(struct io_wq_work *work, void *val); ++ ++int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask); ++int io_wq_max_workers(struct io_wq *wq, int *new_count); ++ ++static inline bool io_wq_is_hashed(struct io_wq_work *work) ++{ ++ return work->flags & IO_WQ_WORK_HASHED; ++} ++ ++typedef bool (work_cancel_fn)(struct io_wq_work *, void *); ++ ++enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, ++ void *data, bool cancel_all); ++ ++#if defined(CONFIG_IO_WQ) ++extern void io_wq_worker_sleeping(struct task_struct *); ++extern void io_wq_worker_running(struct task_struct *); ++#else ++static inline void io_wq_worker_sleeping(struct task_struct *tsk) ++{ ++} ++static inline void io_wq_worker_running(struct task_struct *tsk) ++{ ++} ++#endif ++ ++static inline bool io_wq_current_is_worker(void) ++{ ++ return in_task() && (current->flags & PF_IO_WORKER) && ++ current->pf_io_worker; ++} ++#endif +diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c +new file mode 100644 +index 0000000000000..51d6fbe17f7f3 +--- /dev/null ++++ b/io_uring/io_uring.c +@@ -0,0 +1,11261 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Shared application/kernel submission and completion ring pairs, for ++ * supporting fast/efficient IO. ++ * ++ * A note on the read/write ordering memory barriers that are matched between ++ * the application and kernel side. ++ * ++ * After the application reads the CQ ring tail, it must use an ++ * appropriate smp_rmb() to pair with the smp_wmb() the kernel uses ++ * before writing the tail (using smp_load_acquire to read the tail will ++ * do). It also needs a smp_mb() before updating CQ head (ordering the ++ * entry load(s) with the head store), pairing with an implicit barrier ++ * through a control-dependency in io_get_cqe (smp_store_release to ++ * store head will do). Failure to do so could lead to reading invalid ++ * CQ entries. ++ * ++ * Likewise, the application must use an appropriate smp_wmb() before ++ * writing the SQ tail (ordering SQ entry stores with the tail store), ++ * which pairs with smp_load_acquire in io_get_sqring (smp_store_release ++ * to store the tail will do). And it needs a barrier ordering the SQ ++ * head load before writing new SQ entries (smp_load_acquire to read ++ * head will do). ++ * ++ * When using the SQ poll thread (IORING_SETUP_SQPOLL), the application ++ * needs to check the SQ flags for IORING_SQ_NEED_WAKEUP *after* ++ * updating the SQ tail; a full memory barrier smp_mb() is needed ++ * between. ++ * ++ * Also see the examples in the liburing library: ++ * ++ * git://git.kernel.dk/liburing ++ * ++ * io_uring also uses READ/WRITE_ONCE() for _any_ store or load that happens ++ * from data shared between the kernel and application. This is done both ++ * for ordering purposes, but also to ensure that once a value is loaded from ++ * data that the application could potentially modify, it remains stable. ++ * ++ * Copyright (C) 2018-2019 Jens Axboe ++ * Copyright (c) 2018-2019 Christoph Hellwig ++ */ ++#include <linux/kernel.h> ++#include <linux/init.h> ++#include <linux/errno.h> ++#include <linux/syscalls.h> ++#include <linux/compat.h> ++#include <net/compat.h> ++#include <linux/refcount.h> ++#include <linux/uio.h> ++#include <linux/bits.h> ++ ++#include <linux/sched/signal.h> ++#include <linux/fs.h> ++#include <linux/file.h> ++#include <linux/fdtable.h> ++#include <linux/mm.h> ++#include <linux/mman.h> ++#include <linux/percpu.h> ++#include <linux/slab.h> ++#include <linux/blkdev.h> ++#include <linux/bvec.h> ++#include <linux/net.h> ++#include <net/sock.h> ++#include <net/af_unix.h> ++#include <net/scm.h> ++#include <linux/anon_inodes.h> +#include <linux/sched/mm.h> - - #include <linux/sunrpc/clnt.h> - -@@ -820,7 +821,7 @@ static void __nfs4_close(struct nfs4_state *state, - - void nfs4_close_state(struct nfs4_state *state, fmode_t fmode) - { -- __nfs4_close(state, fmode, GFP_NOFS, 0); -+ __nfs4_close(state, fmode, GFP_KERNEL, 0); - } - - void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode) -@@ -2097,7 +2098,8 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred - } - - inode = d_inode(server->super->s_root); -- result = nfs4_proc_get_locations(inode, locations, page, cred); -+ result = nfs4_proc_get_locations(server, NFS_FH(inode), locations, -+ page, cred); - if (result) { - dprintk("<-- %s: failed to retrieve fs_locations: %d\n", - __func__, result); -@@ -2105,6 +2107,9 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred - } - - result = -NFS4ERR_NXIO; -+ if (!locations->nlocations) -+ goto out; ++#include <linux/uaccess.h> ++#include <linux/nospec.h> ++#include <linux/sizes.h> ++#include <linux/hugetlb.h> ++#include <linux/highmem.h> ++#include <linux/namei.h> ++#include <linux/fsnotify.h> ++#include <linux/fadvise.h> ++#include <linux/eventpoll.h> ++#include <linux/splice.h> ++#include <linux/task_work.h> ++#include <linux/pagemap.h> ++#include <linux/io_uring.h> ++#include <linux/tracehook.h> ++ ++#define CREATE_TRACE_POINTS ++#include <trace/events/io_uring.h> ++ ++#include <uapi/linux/io_uring.h> ++ ++#include "../fs/internal.h" ++#include "io-wq.h" ++ ++#define IORING_MAX_ENTRIES 32768 ++#define IORING_MAX_CQ_ENTRIES (2 * IORING_MAX_ENTRIES) ++#define IORING_SQPOLL_CAP_ENTRIES_VALUE 8 ++ ++/* only define max */ ++#define IORING_MAX_FIXED_FILES (1U << 15) ++#define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \ ++ IORING_REGISTER_LAST + IORING_OP_LAST) ++ ++#define IO_RSRC_TAG_TABLE_SHIFT (PAGE_SHIFT - 3) ++#define IO_RSRC_TAG_TABLE_MAX (1U << IO_RSRC_TAG_TABLE_SHIFT) ++#define IO_RSRC_TAG_TABLE_MASK (IO_RSRC_TAG_TABLE_MAX - 1) ++ ++#define IORING_MAX_REG_BUFFERS (1U << 14) ++ ++#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK| \ ++ IOSQE_IO_HARDLINK | IOSQE_ASYNC | \ ++ IOSQE_BUFFER_SELECT) ++#define IO_REQ_CLEAN_FLAGS (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP | \ ++ REQ_F_POLLED | REQ_F_INFLIGHT | REQ_F_CREDS) ++ ++#define IO_TCTX_REFS_CACHE_NR (1U << 10) ++ ++struct io_uring { ++ u32 head ____cacheline_aligned_in_smp; ++ u32 tail ____cacheline_aligned_in_smp; ++}; ++ ++/* ++ * This data is shared with the application through the mmap at offsets ++ * IORING_OFF_SQ_RING and IORING_OFF_CQ_RING. ++ * ++ * The offsets to the member fields are published through struct ++ * io_sqring_offsets when calling io_uring_setup. ++ */ ++struct io_rings { ++ /* ++ * Head and tail offsets into the ring; the offsets need to be ++ * masked to get valid indices. ++ * ++ * The kernel controls head of the sq ring and the tail of the cq ring, ++ * and the application controls tail of the sq ring and the head of the ++ * cq ring. ++ */ ++ struct io_uring sq, cq; ++ /* ++ * Bitmasks to apply to head and tail offsets (constant, equals ++ * ring_entries - 1) ++ */ ++ u32 sq_ring_mask, cq_ring_mask; ++ /* Ring sizes (constant, power of 2) */ ++ u32 sq_ring_entries, cq_ring_entries; ++ /* ++ * Number of invalid entries dropped by the kernel due to ++ * invalid index stored in array ++ * ++ * Written by the kernel, shouldn't be modified by the ++ * application (i.e. get number of "new events" by comparing to ++ * cached value). ++ * ++ * After a new SQ head value was read by the application this ++ * counter includes all submissions that were dropped reaching ++ * the new SQ head (and possibly more). ++ */ ++ u32 sq_dropped; ++ /* ++ * Runtime SQ flags ++ * ++ * Written by the kernel, shouldn't be modified by the ++ * application. ++ * ++ * The application needs a full memory barrier before checking ++ * for IORING_SQ_NEED_WAKEUP after updating the sq tail. ++ */ ++ u32 sq_flags; ++ /* ++ * Runtime CQ flags ++ * ++ * Written by the application, shouldn't be modified by the ++ * kernel. ++ */ ++ u32 cq_flags; ++ /* ++ * Number of completion events lost because the queue was full; ++ * this should be avoided by the application by making sure ++ * there are not more requests pending than there is space in ++ * the completion queue. ++ * ++ * Written by the kernel, shouldn't be modified by the ++ * application (i.e. get number of "new events" by comparing to ++ * cached value). ++ * ++ * As completion events come in out of order this counter is not ++ * ordered with any other data. ++ */ ++ u32 cq_overflow; ++ /* ++ * Ring buffer of completion events. ++ * ++ * The kernel writes completion events fresh every time they are ++ * produced, so the application is allowed to modify pending ++ * entries. ++ */ ++ struct io_uring_cqe cqes[] ____cacheline_aligned_in_smp; ++}; ++ ++enum io_uring_cmd_flags { ++ IO_URING_F_NONBLOCK = 1, ++ IO_URING_F_COMPLETE_DEFER = 2, ++}; ++ ++struct io_mapped_ubuf { ++ u64 ubuf; ++ u64 ubuf_end; ++ unsigned int nr_bvecs; ++ unsigned long acct_pages; ++ struct bio_vec bvec[]; ++}; ++ ++struct io_ring_ctx; ++ ++struct io_overflow_cqe { ++ struct io_uring_cqe cqe; ++ struct list_head list; ++}; ++ ++struct io_fixed_file { ++ /* file * with additional FFS_* flags */ ++ unsigned long file_ptr; ++}; ++ ++struct io_rsrc_put { ++ struct list_head list; ++ u64 tag; ++ union { ++ void *rsrc; ++ struct file *file; ++ struct io_mapped_ubuf *buf; ++ }; ++}; ++ ++struct io_file_table { ++ struct io_fixed_file *files; ++}; ++ ++struct io_rsrc_node { ++ struct percpu_ref refs; ++ struct list_head node; ++ struct list_head rsrc_list; ++ struct io_rsrc_data *rsrc_data; ++ struct llist_node llist; ++ bool done; ++}; ++ ++typedef void (rsrc_put_fn)(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc); ++ ++struct io_rsrc_data { ++ struct io_ring_ctx *ctx; ++ ++ u64 **tags; ++ unsigned int nr; ++ rsrc_put_fn *do_put; ++ atomic_t refs; ++ struct completion done; ++ bool quiesce; ++}; ++ ++struct io_buffer { ++ struct list_head list; ++ __u64 addr; ++ __u32 len; ++ __u16 bid; ++}; ++ ++struct io_restriction { ++ DECLARE_BITMAP(register_op, IORING_REGISTER_LAST); ++ DECLARE_BITMAP(sqe_op, IORING_OP_LAST); ++ u8 sqe_flags_allowed; ++ u8 sqe_flags_required; ++ bool registered; ++}; ++ ++enum { ++ IO_SQ_THREAD_SHOULD_STOP = 0, ++ IO_SQ_THREAD_SHOULD_PARK, ++}; ++ ++struct io_sq_data { ++ refcount_t refs; ++ atomic_t park_pending; ++ struct mutex lock; ++ ++ /* ctx's that are using this sqd */ ++ struct list_head ctx_list; ++ ++ struct task_struct *thread; ++ struct wait_queue_head wait; ++ ++ unsigned sq_thread_idle; ++ int sq_cpu; ++ pid_t task_pid; ++ pid_t task_tgid; ++ ++ unsigned long state; ++ struct completion exited; ++}; ++ ++#define IO_COMPL_BATCH 32 ++#define IO_REQ_CACHE_SIZE 32 ++#define IO_REQ_ALLOC_BATCH 8 ++ ++struct io_submit_link { ++ struct io_kiocb *head; ++ struct io_kiocb *last; ++}; ++ ++struct io_submit_state { ++ struct blk_plug plug; ++ struct io_submit_link link; ++ ++ /* ++ * io_kiocb alloc cache ++ */ ++ void *reqs[IO_REQ_CACHE_SIZE]; ++ unsigned int free_reqs; ++ ++ bool plug_started; ++ ++ /* ++ * Batch completion logic ++ */ ++ struct io_kiocb *compl_reqs[IO_COMPL_BATCH]; ++ unsigned int compl_nr; ++ /* inline/task_work completion list, under ->uring_lock */ ++ struct list_head free_list; ++ ++ unsigned int ios_left; ++}; ++ ++struct io_ring_ctx { ++ /* const or read-mostly hot data */ ++ struct { ++ struct percpu_ref refs; ++ ++ struct io_rings *rings; ++ unsigned int flags; ++ unsigned int compat: 1; ++ unsigned int drain_next: 1; ++ unsigned int eventfd_async: 1; ++ unsigned int restricted: 1; ++ unsigned int off_timeout_used: 1; ++ unsigned int drain_active: 1; ++ } ____cacheline_aligned_in_smp; ++ ++ /* submission data */ ++ struct { ++ struct mutex uring_lock; ++ ++ /* ++ * Ring buffer of indices into array of io_uring_sqe, which is ++ * mmapped by the application using the IORING_OFF_SQES offset. ++ * ++ * This indirection could e.g. be used to assign fixed ++ * io_uring_sqe entries to operations and only submit them to ++ * the queue when needed. ++ * ++ * The kernel modifies neither the indices array nor the entries ++ * array. ++ */ ++ u32 *sq_array; ++ struct io_uring_sqe *sq_sqes; ++ unsigned cached_sq_head; ++ unsigned sq_entries; ++ struct list_head defer_list; ++ ++ /* ++ * Fixed resources fast path, should be accessed only under ++ * uring_lock, and updated through io_uring_register(2) ++ */ ++ struct io_rsrc_node *rsrc_node; ++ struct io_file_table file_table; ++ unsigned nr_user_files; ++ unsigned nr_user_bufs; ++ struct io_mapped_ubuf **user_bufs; ++ ++ struct io_submit_state submit_state; ++ struct list_head timeout_list; ++ struct list_head ltimeout_list; ++ struct list_head cq_overflow_list; ++ struct xarray io_buffers; ++ struct xarray personalities; ++ u32 pers_next; ++ unsigned sq_thread_idle; ++ } ____cacheline_aligned_in_smp; ++ ++ /* IRQ completion list, under ->completion_lock */ ++ struct list_head locked_free_list; ++ unsigned int locked_free_nr; ++ ++ const struct cred *sq_creds; /* cred used for __io_sq_thread() */ ++ struct io_sq_data *sq_data; /* if using sq thread polling */ ++ ++ struct wait_queue_head sqo_sq_wait; ++ struct list_head sqd_list; ++ ++ unsigned long check_cq_overflow; ++ ++ struct { ++ unsigned cached_cq_tail; ++ unsigned cq_entries; ++ struct eventfd_ctx *cq_ev_fd; ++ struct wait_queue_head poll_wait; ++ struct wait_queue_head cq_wait; ++ unsigned cq_extra; ++ atomic_t cq_timeouts; ++ unsigned cq_last_tm_flush; ++ } ____cacheline_aligned_in_smp; ++ ++ struct { ++ spinlock_t completion_lock; ++ ++ spinlock_t timeout_lock; ++ ++ /* ++ * ->iopoll_list is protected by the ctx->uring_lock for ++ * io_uring instances that don't use IORING_SETUP_SQPOLL. ++ * For SQPOLL, only the single threaded io_sq_thread() will ++ * manipulate the list, hence no extra locking is needed there. ++ */ ++ struct list_head iopoll_list; ++ struct hlist_head *cancel_hash; ++ unsigned cancel_hash_bits; ++ bool poll_multi_queue; ++ } ____cacheline_aligned_in_smp; ++ ++ struct io_restriction restrictions; ++ ++ /* slow path rsrc auxilary data, used by update/register */ ++ struct { ++ struct io_rsrc_node *rsrc_backup_node; ++ struct io_mapped_ubuf *dummy_ubuf; ++ struct io_rsrc_data *file_data; ++ struct io_rsrc_data *buf_data; ++ ++ struct delayed_work rsrc_put_work; ++ struct llist_head rsrc_put_llist; ++ struct list_head rsrc_ref_list; ++ spinlock_t rsrc_ref_lock; ++ }; ++ ++ /* Keep this last, we don't need it for the fast path */ ++ struct { ++ #if defined(CONFIG_UNIX) ++ struct socket *ring_sock; ++ #endif ++ /* hashed buffered write serialization */ ++ struct io_wq_hash *hash_map; ++ ++ /* Only used for accounting purposes */ ++ struct user_struct *user; ++ struct mm_struct *mm_account; ++ ++ /* ctx exit and cancelation */ ++ struct llist_head fallback_llist; ++ struct delayed_work fallback_work; ++ struct work_struct exit_work; ++ struct list_head tctx_list; ++ struct completion ref_comp; ++ u32 iowq_limits[2]; ++ bool iowq_limits_set; ++ }; ++}; ++ ++struct io_uring_task { ++ /* submission side */ ++ int cached_refs; ++ struct xarray xa; ++ struct wait_queue_head wait; ++ const struct io_ring_ctx *last; ++ struct io_wq *io_wq; ++ struct percpu_counter inflight; ++ atomic_t inflight_tracked; ++ atomic_t in_idle; ++ ++ spinlock_t task_lock; ++ struct io_wq_work_list task_list; ++ struct callback_head task_work; ++ bool task_running; ++}; ++ ++/* ++ * First field must be the file pointer in all the ++ * iocb unions! See also 'struct kiocb' in <linux/fs.h> ++ */ ++struct io_poll_iocb { ++ struct file *file; ++ struct wait_queue_head *head; ++ __poll_t events; ++ struct wait_queue_entry wait; ++}; ++ ++struct io_poll_update { ++ struct file *file; ++ u64 old_user_data; ++ u64 new_user_data; ++ __poll_t events; ++ bool update_events; ++ bool update_user_data; ++}; ++ ++struct io_close { ++ struct file *file; ++ int fd; ++ u32 file_slot; ++}; ++ ++struct io_timeout_data { ++ struct io_kiocb *req; ++ struct hrtimer timer; ++ struct timespec64 ts; ++ enum hrtimer_mode mode; ++ u32 flags; ++}; ++ ++struct io_accept { ++ struct file *file; ++ struct sockaddr __user *addr; ++ int __user *addr_len; ++ int flags; ++ u32 file_slot; ++ unsigned long nofile; ++}; ++ ++struct io_sync { ++ struct file *file; ++ loff_t len; ++ loff_t off; ++ int flags; ++ int mode; ++}; ++ ++struct io_cancel { ++ struct file *file; ++ u64 addr; ++}; ++ ++struct io_timeout { ++ struct file *file; ++ u32 off; ++ u32 target_seq; ++ struct list_head list; ++ /* head of the link, used by linked timeouts only */ ++ struct io_kiocb *head; ++ /* for linked completions */ ++ struct io_kiocb *prev; ++}; ++ ++struct io_timeout_rem { ++ struct file *file; ++ u64 addr; ++ ++ /* timeout update */ ++ struct timespec64 ts; ++ u32 flags; ++ bool ltimeout; ++}; ++ ++struct io_rw { ++ /* NOTE: kiocb has the file as the first member, so don't do it here */ ++ struct kiocb kiocb; ++ u64 addr; ++ u64 len; ++}; ++ ++struct io_connect { ++ struct file *file; ++ struct sockaddr __user *addr; ++ int addr_len; ++}; ++ ++struct io_sr_msg { ++ struct file *file; ++ union { ++ struct compat_msghdr __user *umsg_compat; ++ struct user_msghdr __user *umsg; ++ void __user *buf; ++ }; ++ int msg_flags; ++ int bgid; ++ size_t len; ++ size_t done_io; ++ struct io_buffer *kbuf; ++}; ++ ++struct io_open { ++ struct file *file; ++ int dfd; ++ u32 file_slot; ++ struct filename *filename; ++ struct open_how how; ++ unsigned long nofile; ++}; ++ ++struct io_rsrc_update { ++ struct file *file; ++ u64 arg; ++ u32 nr_args; ++ u32 offset; ++}; ++ ++struct io_fadvise { ++ struct file *file; ++ u64 offset; ++ u32 len; ++ u32 advice; ++}; ++ ++struct io_madvise { ++ struct file *file; ++ u64 addr; ++ u32 len; ++ u32 advice; ++}; ++ ++struct io_epoll { ++ struct file *file; ++ int epfd; ++ int op; ++ int fd; ++ struct epoll_event event; ++}; ++ ++struct io_splice { ++ struct file *file_out; ++ loff_t off_out; ++ loff_t off_in; ++ u64 len; ++ int splice_fd_in; ++ unsigned int flags; ++}; ++ ++struct io_provide_buf { ++ struct file *file; ++ __u64 addr; ++ __u32 len; ++ __u32 bgid; ++ __u16 nbufs; ++ __u16 bid; ++}; ++ ++struct io_statx { ++ struct file *file; ++ int dfd; ++ unsigned int mask; ++ unsigned int flags; ++ const char __user *filename; ++ struct statx __user *buffer; ++}; ++ ++struct io_shutdown { ++ struct file *file; ++ int how; ++}; ++ ++struct io_rename { ++ struct file *file; ++ int old_dfd; ++ int new_dfd; ++ struct filename *oldpath; ++ struct filename *newpath; ++ int flags; ++}; ++ ++struct io_unlink { ++ struct file *file; ++ int dfd; ++ int flags; ++ struct filename *filename; ++}; ++ ++struct io_mkdir { ++ struct file *file; ++ int dfd; ++ umode_t mode; ++ struct filename *filename; ++}; ++ ++struct io_symlink { ++ struct file *file; ++ int new_dfd; ++ struct filename *oldpath; ++ struct filename *newpath; ++}; ++ ++struct io_hardlink { ++ struct file *file; ++ int old_dfd; ++ int new_dfd; ++ struct filename *oldpath; ++ struct filename *newpath; ++ int flags; ++}; ++ ++struct io_completion { ++ struct file *file; ++ u32 cflags; ++}; ++ ++struct io_async_connect { ++ struct sockaddr_storage address; ++}; ++ ++struct io_async_msghdr { ++ struct iovec fast_iov[UIO_FASTIOV]; ++ /* points to an allocated iov, if NULL we use fast_iov instead */ ++ struct iovec *free_iov; ++ struct sockaddr __user *uaddr; ++ struct msghdr msg; ++ struct sockaddr_storage addr; ++}; ++ ++struct io_async_rw { ++ struct iovec fast_iov[UIO_FASTIOV]; ++ const struct iovec *free_iovec; ++ struct iov_iter iter; ++ struct iov_iter_state iter_state; ++ size_t bytes_done; ++ struct wait_page_queue wpq; ++}; ++ ++enum { ++ REQ_F_FIXED_FILE_BIT = IOSQE_FIXED_FILE_BIT, ++ REQ_F_IO_DRAIN_BIT = IOSQE_IO_DRAIN_BIT, ++ REQ_F_LINK_BIT = IOSQE_IO_LINK_BIT, ++ REQ_F_HARDLINK_BIT = IOSQE_IO_HARDLINK_BIT, ++ REQ_F_FORCE_ASYNC_BIT = IOSQE_ASYNC_BIT, ++ REQ_F_BUFFER_SELECT_BIT = IOSQE_BUFFER_SELECT_BIT, ++ ++ /* first byte is taken by user flags, shift it to not overlap */ ++ REQ_F_FAIL_BIT = 8, ++ REQ_F_INFLIGHT_BIT, ++ REQ_F_CUR_POS_BIT, ++ REQ_F_NOWAIT_BIT, ++ REQ_F_LINK_TIMEOUT_BIT, ++ REQ_F_NEED_CLEANUP_BIT, ++ REQ_F_POLLED_BIT, ++ REQ_F_BUFFER_SELECTED_BIT, ++ REQ_F_COMPLETE_INLINE_BIT, ++ REQ_F_REISSUE_BIT, ++ REQ_F_CREDS_BIT, ++ REQ_F_REFCOUNT_BIT, ++ REQ_F_ARM_LTIMEOUT_BIT, ++ REQ_F_PARTIAL_IO_BIT, ++ /* keep async read/write and isreg together and in order */ ++ REQ_F_NOWAIT_READ_BIT, ++ REQ_F_NOWAIT_WRITE_BIT, ++ REQ_F_ISREG_BIT, ++ ++ /* not a real bit, just to check we're not overflowing the space */ ++ __REQ_F_LAST_BIT, ++}; ++ ++enum { ++ /* ctx owns file */ ++ REQ_F_FIXED_FILE = BIT(REQ_F_FIXED_FILE_BIT), ++ /* drain existing IO first */ ++ REQ_F_IO_DRAIN = BIT(REQ_F_IO_DRAIN_BIT), ++ /* linked sqes */ ++ REQ_F_LINK = BIT(REQ_F_LINK_BIT), ++ /* doesn't sever on completion < 0 */ ++ REQ_F_HARDLINK = BIT(REQ_F_HARDLINK_BIT), ++ /* IOSQE_ASYNC */ ++ REQ_F_FORCE_ASYNC = BIT(REQ_F_FORCE_ASYNC_BIT), ++ /* IOSQE_BUFFER_SELECT */ ++ REQ_F_BUFFER_SELECT = BIT(REQ_F_BUFFER_SELECT_BIT), ++ ++ /* fail rest of links */ ++ REQ_F_FAIL = BIT(REQ_F_FAIL_BIT), ++ /* on inflight list, should be cancelled and waited on exit reliably */ ++ REQ_F_INFLIGHT = BIT(REQ_F_INFLIGHT_BIT), ++ /* read/write uses file position */ ++ REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT), ++ /* must not punt to workers */ ++ REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT), ++ /* has or had linked timeout */ ++ REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT), ++ /* needs cleanup */ ++ REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT), ++ /* already went through poll handler */ ++ REQ_F_POLLED = BIT(REQ_F_POLLED_BIT), ++ /* buffer already selected */ ++ REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT), ++ /* completion is deferred through io_comp_state */ ++ REQ_F_COMPLETE_INLINE = BIT(REQ_F_COMPLETE_INLINE_BIT), ++ /* caller should reissue async */ ++ REQ_F_REISSUE = BIT(REQ_F_REISSUE_BIT), ++ /* supports async reads */ ++ REQ_F_NOWAIT_READ = BIT(REQ_F_NOWAIT_READ_BIT), ++ /* supports async writes */ ++ REQ_F_NOWAIT_WRITE = BIT(REQ_F_NOWAIT_WRITE_BIT), ++ /* regular file */ ++ REQ_F_ISREG = BIT(REQ_F_ISREG_BIT), ++ /* has creds assigned */ ++ REQ_F_CREDS = BIT(REQ_F_CREDS_BIT), ++ /* skip refcounting if not set */ ++ REQ_F_REFCOUNT = BIT(REQ_F_REFCOUNT_BIT), ++ /* there is a linked timeout that has to be armed */ ++ REQ_F_ARM_LTIMEOUT = BIT(REQ_F_ARM_LTIMEOUT_BIT), ++ /* request has already done partial IO */ ++ REQ_F_PARTIAL_IO = BIT(REQ_F_PARTIAL_IO_BIT), ++}; ++ ++struct async_poll { ++ struct io_poll_iocb poll; ++ struct io_poll_iocb *double_poll; ++}; ++ ++typedef void (*io_req_tw_func_t)(struct io_kiocb *req, bool *locked); ++ ++struct io_task_work { ++ union { ++ struct io_wq_work_node node; ++ struct llist_node fallback_node; ++ }; ++ io_req_tw_func_t func; ++}; + - if (!(locations->fattr.valid & NFS_ATTR_FATTR_V4_LOCATIONS)) { - dprintk("<-- %s: No fs_locations data, migration skipped\n", - __func__); -@@ -2555,9 +2560,17 @@ static void nfs4_layoutreturn_any_run(struct nfs_client *clp) - - static void nfs4_state_manager(struct nfs_client *clp) - { -+ unsigned int memflags; - int status = 0; - const char *section = "", *section_sep = ""; - -+ /* -+ * State recovery can deadlock if the direct reclaim code tries -+ * start NFS writeback. So ensure memory allocations are all -+ * GFP_NOFS. -+ */ -+ memflags = memalloc_nofs_save(); ++enum { ++ IORING_RSRC_FILE = 0, ++ IORING_RSRC_BUFFER = 1, ++}; + - /* Ensure exclusive access to NFSv4 state */ - do { - trace_nfs4_state_mgr(clp); -@@ -2652,6 +2665,7 @@ static void nfs4_state_manager(struct nfs_client *clp) - clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state); - } - -+ memalloc_nofs_restore(memflags); - nfs4_end_drain_session(clp); - nfs4_clear_state_manager_bit(clp); - -@@ -2669,6 +2683,7 @@ static void nfs4_state_manager(struct nfs_client *clp) - return; - if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) - return; -+ memflags = memalloc_nofs_save(); - } while (refcount_read(&clp->cl_count) > 1 && !signalled()); - goto out_drain; - -@@ -2681,6 +2696,7 @@ out_error: - clp->cl_hostname, -status); - ssleep(1); - out_drain: -+ memalloc_nofs_restore(memflags); - nfs4_end_drain_session(clp); - nfs4_clear_state_manager_bit(clp); - } -diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c -index a8cff19c6f00c..2a1bf0a72d5bf 100644 ---- a/fs/nfs/nfs4xdr.c -+++ b/fs/nfs/nfs4xdr.c -@@ -3693,8 +3693,6 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st - if (unlikely(!p)) - goto out_eio; - n = be32_to_cpup(p); -- if (n <= 0) -- goto out_eio; - for (res->nlocations = 0; res->nlocations < n; res->nlocations++) { - u32 m; - struct nfs4_fs_location *loc; -@@ -4197,10 +4195,11 @@ static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap, - } else - printk(KERN_WARNING "%s: label too long (%u)!\n", - __func__, len); -+ if (label && label->label) -+ dprintk("%s: label=%.*s, len=%d, PI=%d, LFS=%d\n", -+ __func__, label->len, (char *)label->label, -+ label->len, label->pi, label->lfs); - } -- if (label && label->label) -- dprintk("%s: label=%s, len=%d, PI=%d, LFS=%d\n", __func__, -- (char *)label->label, label->len, label->pi, label->lfs); - return status; - } - -diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c -index cc232d1f16f2f..fdecf729fa92b 100644 ---- a/fs/nfs/pagelist.c -+++ b/fs/nfs/pagelist.c -@@ -90,10 +90,10 @@ void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos) - } - } - --static inline struct nfs_page * --nfs_page_alloc(void) -+static inline struct nfs_page *nfs_page_alloc(void) - { -- struct nfs_page *p = kmem_cache_zalloc(nfs_page_cachep, GFP_KERNEL); -+ struct nfs_page *p = -+ kmem_cache_zalloc(nfs_page_cachep, nfs_io_gfp_mask()); - if (p) - INIT_LIST_HEAD(&p->wb_list); - return p; -@@ -773,6 +773,9 @@ int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, - .flags = RPC_TASK_ASYNC | flags, - }; - -+ if (nfs_server_capable(hdr->inode, NFS_CAP_MOVEABLE)) -+ task_setup_data.flags |= RPC_TASK_MOVEABLE; ++/* ++ * NOTE! Each of the iocb union members has the file pointer ++ * as the first entry in their struct definition. So you can ++ * access the file pointer through any of the sub-structs, ++ * or directly as just 'ki_filp' in this struct. ++ */ ++struct io_kiocb { ++ union { ++ struct file *file; ++ struct io_rw rw; ++ struct io_poll_iocb poll; ++ struct io_poll_update poll_update; ++ struct io_accept accept; ++ struct io_sync sync; ++ struct io_cancel cancel; ++ struct io_timeout timeout; ++ struct io_timeout_rem timeout_rem; ++ struct io_connect connect; ++ struct io_sr_msg sr_msg; ++ struct io_open open; ++ struct io_close close; ++ struct io_rsrc_update rsrc_update; ++ struct io_fadvise fadvise; ++ struct io_madvise madvise; ++ struct io_epoll epoll; ++ struct io_splice splice; ++ struct io_provide_buf pbuf; ++ struct io_statx statx; ++ struct io_shutdown shutdown; ++ struct io_rename rename; ++ struct io_unlink unlink; ++ struct io_mkdir mkdir; ++ struct io_symlink symlink; ++ struct io_hardlink hardlink; ++ /* use only after cleaning per-op data, see io_clean_op() */ ++ struct io_completion compl; ++ }; + - hdr->rw_ops->rw_initiate(hdr, &msg, rpc_ops, &task_setup_data, how); - - dprintk("NFS: initiated pgio call " -@@ -901,7 +904,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, - struct nfs_commit_info cinfo; - struct nfs_page_array *pg_array = &hdr->page_array; - unsigned int pagecount, pageused; -- gfp_t gfp_flags = GFP_KERNEL; -+ gfp_t gfp_flags = nfs_io_gfp_mask(); - - pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count); - pg_array->npages = pagecount; -@@ -988,7 +991,7 @@ nfs_pageio_alloc_mirrors(struct nfs_pageio_descriptor *desc, - desc->pg_mirrors_dynamic = NULL; - if (mirror_count == 1) - return desc->pg_mirrors_static; -- ret = kmalloc_array(mirror_count, sizeof(*ret), GFP_KERNEL); -+ ret = kmalloc_array(mirror_count, sizeof(*ret), nfs_io_gfp_mask()); - if (ret != NULL) { - for (i = 0; i < mirror_count; i++) - nfs_pageio_mirror_init(&ret[i], desc->pg_bsize); -@@ -1227,6 +1230,7 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) - - do { - list_splice_init(&mirror->pg_list, &head); -+ mirror->pg_recoalesce = 0; - - while (!list_empty(&head)) { - struct nfs_page *req; -diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c -index 7c9090a28e5c3..7217f3eeb0692 100644 ---- a/fs/nfs/pnfs.c -+++ b/fs/nfs/pnfs.c -@@ -92,6 +92,17 @@ find_pnfs_driver(u32 id) - return local; - } - -+const struct pnfs_layoutdriver_type *pnfs_find_layoutdriver(u32 id) ++ /* opcode allocated if it needs to store data for async defer */ ++ void *async_data; ++ u8 opcode; ++ /* polled IO has completed */ ++ u8 iopoll_completed; ++ ++ u16 buf_index; ++ u32 result; ++ ++ struct io_ring_ctx *ctx; ++ unsigned int flags; ++ atomic_t refs; ++ struct task_struct *task; ++ u64 user_data; ++ ++ struct io_kiocb *link; ++ struct percpu_ref *fixed_rsrc_refs; ++ ++ /* used with ctx->iopoll_list with reads/writes */ ++ struct list_head inflight_entry; ++ struct io_task_work io_task_work; ++ /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */ ++ struct hlist_node hash_node; ++ struct async_poll *apoll; ++ struct io_wq_work work; ++ const struct cred *creds; ++ ++ /* store used ubuf, so we can prevent reloading */ ++ struct io_mapped_ubuf *imu; ++ /* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */ ++ struct io_buffer *kbuf; ++ atomic_t poll_refs; ++}; ++ ++struct io_tctx_node { ++ struct list_head ctx_node; ++ struct task_struct *task; ++ struct io_ring_ctx *ctx; ++}; ++ ++struct io_defer_entry { ++ struct list_head list; ++ struct io_kiocb *req; ++ u32 seq; ++}; ++ ++struct io_op_def { ++ /* needs req->file assigned */ ++ unsigned needs_file : 1; ++ /* hash wq insertion if file is a regular file */ ++ unsigned hash_reg_file : 1; ++ /* unbound wq insertion if file is a non-regular file */ ++ unsigned unbound_nonreg_file : 1; ++ /* opcode is not supported by this kernel */ ++ unsigned not_supported : 1; ++ /* set if opcode supports polled "wait" */ ++ unsigned pollin : 1; ++ unsigned pollout : 1; ++ /* op supports buffer selection */ ++ unsigned buffer_select : 1; ++ /* do prep async if is going to be punted */ ++ unsigned needs_async_setup : 1; ++ /* should block plug */ ++ unsigned plug : 1; ++ /* size of async data needed, if any */ ++ unsigned short async_size; ++}; ++ ++static const struct io_op_def io_op_defs[] = { ++ [IORING_OP_NOP] = {}, ++ [IORING_OP_READV] = { ++ .needs_file = 1, ++ .unbound_nonreg_file = 1, ++ .pollin = 1, ++ .buffer_select = 1, ++ .needs_async_setup = 1, ++ .plug = 1, ++ .async_size = sizeof(struct io_async_rw), ++ }, ++ [IORING_OP_WRITEV] = { ++ .needs_file = 1, ++ .hash_reg_file = 1, ++ .unbound_nonreg_file = 1, ++ .pollout = 1, ++ .needs_async_setup = 1, ++ .plug = 1, ++ .async_size = sizeof(struct io_async_rw), ++ }, ++ [IORING_OP_FSYNC] = { ++ .needs_file = 1, ++ }, ++ [IORING_OP_READ_FIXED] = { ++ .needs_file = 1, ++ .unbound_nonreg_file = 1, ++ .pollin = 1, ++ .plug = 1, ++ .async_size = sizeof(struct io_async_rw), ++ }, ++ [IORING_OP_WRITE_FIXED] = { ++ .needs_file = 1, ++ .hash_reg_file = 1, ++ .unbound_nonreg_file = 1, ++ .pollout = 1, ++ .plug = 1, ++ .async_size = sizeof(struct io_async_rw), ++ }, ++ [IORING_OP_POLL_ADD] = { ++ .needs_file = 1, ++ .unbound_nonreg_file = 1, ++ }, ++ [IORING_OP_POLL_REMOVE] = {}, ++ [IORING_OP_SYNC_FILE_RANGE] = { ++ .needs_file = 1, ++ }, ++ [IORING_OP_SENDMSG] = { ++ .needs_file = 1, ++ .unbound_nonreg_file = 1, ++ .pollout = 1, ++ .needs_async_setup = 1, ++ .async_size = sizeof(struct io_async_msghdr), ++ }, ++ [IORING_OP_RECVMSG] = { ++ .needs_file = 1, ++ .unbound_nonreg_file = 1, ++ .pollin = 1, ++ .buffer_select = 1, ++ .needs_async_setup = 1, ++ .async_size = sizeof(struct io_async_msghdr), ++ }, ++ [IORING_OP_TIMEOUT] = { ++ .async_size = sizeof(struct io_timeout_data), ++ }, ++ [IORING_OP_TIMEOUT_REMOVE] = { ++ /* used by timeout updates' prep() */ ++ }, ++ [IORING_OP_ACCEPT] = { ++ .needs_file = 1, ++ .unbound_nonreg_file = 1, ++ .pollin = 1, ++ }, ++ [IORING_OP_ASYNC_CANCEL] = {}, ++ [IORING_OP_LINK_TIMEOUT] = { ++ .async_size = sizeof(struct io_timeout_data), ++ }, ++ [IORING_OP_CONNECT] = { ++ .needs_file = 1, ++ .unbound_nonreg_file = 1, ++ .pollout = 1, ++ .needs_async_setup = 1, ++ .async_size = sizeof(struct io_async_connect), ++ }, ++ [IORING_OP_FALLOCATE] = { ++ .needs_file = 1, ++ }, ++ [IORING_OP_OPENAT] = {}, ++ [IORING_OP_CLOSE] = {}, ++ [IORING_OP_FILES_UPDATE] = {}, ++ [IORING_OP_STATX] = {}, ++ [IORING_OP_READ] = { ++ .needs_file = 1, ++ .unbound_nonreg_file = 1, ++ .pollin = 1, ++ .buffer_select = 1, ++ .plug = 1, ++ .async_size = sizeof(struct io_async_rw), ++ }, ++ [IORING_OP_WRITE] = { ++ .needs_file = 1, ++ .hash_reg_file = 1, ++ .unbound_nonreg_file = 1, ++ .pollout = 1, ++ .plug = 1, ++ .async_size = sizeof(struct io_async_rw), ++ }, ++ [IORING_OP_FADVISE] = { ++ .needs_file = 1, ++ }, ++ [IORING_OP_MADVISE] = {}, ++ [IORING_OP_SEND] = { ++ .needs_file = 1, ++ .unbound_nonreg_file = 1, ++ .pollout = 1, ++ }, ++ [IORING_OP_RECV] = { ++ .needs_file = 1, ++ .unbound_nonreg_file = 1, ++ .pollin = 1, ++ .buffer_select = 1, ++ }, ++ [IORING_OP_OPENAT2] = { ++ }, ++ [IORING_OP_EPOLL_CTL] = { ++ .unbound_nonreg_file = 1, ++ }, ++ [IORING_OP_SPLICE] = { ++ .needs_file = 1, ++ .hash_reg_file = 1, ++ .unbound_nonreg_file = 1, ++ }, ++ [IORING_OP_PROVIDE_BUFFERS] = {}, ++ [IORING_OP_REMOVE_BUFFERS] = {}, ++ [IORING_OP_TEE] = { ++ .needs_file = 1, ++ .hash_reg_file = 1, ++ .unbound_nonreg_file = 1, ++ }, ++ [IORING_OP_SHUTDOWN] = { ++ .needs_file = 1, ++ }, ++ [IORING_OP_RENAMEAT] = {}, ++ [IORING_OP_UNLINKAT] = {}, ++ [IORING_OP_MKDIRAT] = {}, ++ [IORING_OP_SYMLINKAT] = {}, ++ [IORING_OP_LINKAT] = {}, ++}; ++ ++/* requests with any of those set should undergo io_disarm_next() */ ++#define IO_DISARM_MASK (REQ_F_ARM_LTIMEOUT | REQ_F_LINK_TIMEOUT | REQ_F_FAIL) ++ ++static bool io_disarm_next(struct io_kiocb *req); ++static void io_uring_del_tctx_node(unsigned long index); ++static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx, ++ struct task_struct *task, ++ bool cancel_all); ++static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd); ++ ++static void io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags); ++ ++static void io_put_req(struct io_kiocb *req); ++static void io_put_req_deferred(struct io_kiocb *req); ++static void io_dismantle_req(struct io_kiocb *req); ++static void io_queue_linked_timeout(struct io_kiocb *req); ++static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type, ++ struct io_uring_rsrc_update2 *up, ++ unsigned nr_args); ++static void io_clean_op(struct io_kiocb *req); ++static struct file *io_file_get(struct io_ring_ctx *ctx, ++ struct io_kiocb *req, int fd, bool fixed, ++ unsigned int issue_flags); ++static void __io_queue_sqe(struct io_kiocb *req); ++static void io_rsrc_put_work(struct work_struct *work); ++ ++static void io_req_task_queue(struct io_kiocb *req); ++static void io_submit_flush_completions(struct io_ring_ctx *ctx); ++static int io_req_prep_async(struct io_kiocb *req); ++ ++static int io_install_fixed_file(struct io_kiocb *req, struct file *file, ++ unsigned int issue_flags, u32 slot_index); ++static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags); ++ ++static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer); ++ ++static struct kmem_cache *req_cachep; ++ ++static const struct file_operations io_uring_fops; ++ ++struct sock *io_uring_get_socket(struct file *file) +{ -+ return find_pnfs_driver(id); ++#if defined(CONFIG_UNIX) ++ if (file->f_op == &io_uring_fops) { ++ struct io_ring_ctx *ctx = file->private_data; ++ ++ return ctx->ring_sock->sk; ++ } ++#endif ++ return NULL; +} ++EXPORT_SYMBOL(io_uring_get_socket); + -+void pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type *ld) ++static inline void io_tw_lock(struct io_ring_ctx *ctx, bool *locked) +{ -+ if (ld) -+ module_put(ld->owner); ++ if (!*locked) { ++ mutex_lock(&ctx->uring_lock); ++ *locked = true; ++ } +} + - void - unset_pnfs_layoutdriver(struct nfs_server *nfss) - { -@@ -458,6 +469,7 @@ pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, - pnfs_clear_lseg_state(lseg, lseg_list); - pnfs_clear_layoutreturn_info(lo); - pnfs_free_returned_lsegs(lo, lseg_list, &range, 0); -+ set_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags); - if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) && - !test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) - pnfs_clear_layoutreturn_waitbit(lo); -@@ -1233,7 +1245,7 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, - int status = 0; - - *pcred = NULL; -- lrp = kzalloc(sizeof(*lrp), GFP_NOFS); -+ lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); - if (unlikely(lrp == NULL)) { - status = -ENOMEM; - spin_lock(&ino->i_lock); -@@ -1906,8 +1918,9 @@ static void nfs_layoutget_begin(struct pnfs_layout_hdr *lo) - - static void nfs_layoutget_end(struct pnfs_layout_hdr *lo) - { -- if (atomic_dec_and_test(&lo->plh_outstanding)) -- wake_up_var(&lo->plh_outstanding); -+ if (atomic_dec_and_test(&lo->plh_outstanding) && -+ test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags)) -+ wake_up_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN); - } - - static bool pnfs_is_first_layoutget(struct pnfs_layout_hdr *lo) -@@ -1989,6 +2002,7 @@ lookup_again: - lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); - if (lo == NULL) { - spin_unlock(&ino->i_lock); -+ lseg = ERR_PTR(-ENOMEM); - trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, - PNFS_UPDATE_LAYOUT_NOMEM); - goto out; -@@ -2013,11 +2027,11 @@ lookup_again: - * If the layout segment list is empty, but there are outstanding - * layoutget calls, then they might be subject to a layoutrecall. - */ -- if ((list_empty(&lo->plh_segs) || !pnfs_layout_is_valid(lo)) && -+ if (test_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags) && - atomic_read(&lo->plh_outstanding) != 0) { - spin_unlock(&ino->i_lock); -- lseg = ERR_PTR(wait_var_event_killable(&lo->plh_outstanding, -- !atomic_read(&lo->plh_outstanding))); -+ lseg = ERR_PTR(wait_on_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN, -+ TASK_KILLABLE)); - if (IS_ERR(lseg)) - goto out_put_layout_hdr; - pnfs_put_layout_hdr(lo); -@@ -2117,6 +2131,7 @@ lookup_again: - - lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &stateid, &arg, gfp_flags); - if (!lgp) { -+ lseg = ERR_PTR(-ENOMEM); - trace_pnfs_update_layout(ino, pos, count, iomode, lo, NULL, - PNFS_UPDATE_LAYOUT_NOMEM); - nfs_layoutget_end(lo); -@@ -2139,6 +2154,12 @@ lookup_again: - case -ERECALLCONFLICT: - case -EAGAIN: - break; -+ case -ENODATA: -+ /* The server returned NFS4ERR_LAYOUTUNAVAILABLE */ -+ pnfs_layout_set_fail_bit( -+ lo, pnfs_iomode_to_fail_bit(iomode)); -+ lseg = NULL; -+ goto out_put_layout_hdr; - default: - if (!nfs_error_is_fatal(PTR_ERR(lseg))) { - pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); -@@ -2394,7 +2415,8 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) - goto out_forget; - } - -- if (!pnfs_layout_is_valid(lo) && !pnfs_is_first_layoutget(lo)) -+ if (test_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags) && -+ !pnfs_is_first_layoutget(lo)) - goto out_forget; - - if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) { -@@ -3250,7 +3272,7 @@ struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) - { - struct nfs4_threshold *thp; - -- thp = kzalloc(sizeof(*thp), GFP_NOFS); -+ thp = kzalloc(sizeof(*thp), GFP_KERNEL); - if (!thp) { - dprintk("%s mdsthreshold allocation failed\n", __func__); - return NULL; -diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h -index d810ae674f4e8..3307361c79560 100644 ---- a/fs/nfs/pnfs.h -+++ b/fs/nfs/pnfs.h -@@ -109,6 +109,7 @@ enum { - NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */ - NFS_LAYOUT_INODE_FREEING, /* The inode is being freed */ - NFS_LAYOUT_HASHED, /* The layout visible */ -+ NFS_LAYOUT_DRAIN, - }; - - enum layoutdriver_policy_flags { -@@ -238,6 +239,8 @@ struct pnfs_devicelist { - - extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); - extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); -+extern const struct pnfs_layoutdriver_type *pnfs_find_layoutdriver(u32 id); -+extern void pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type *ld); - - /* nfs4proc.c */ - extern size_t max_response_pages(struct nfs_server *server); -@@ -517,7 +520,7 @@ pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, - { - struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; - -- if (!lseg || !fl_cinfo->ops->mark_request_commit) -+ if (!lseg || !fl_cinfo->ops || !fl_cinfo->ops->mark_request_commit) - return false; - fl_cinfo->ops->mark_request_commit(req, lseg, cinfo, ds_commit_idx); - return true; -diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c -index cf19914fec817..657c242a18ff1 100644 ---- a/fs/nfs/pnfs_nfs.c -+++ b/fs/nfs/pnfs_nfs.c -@@ -419,7 +419,7 @@ static struct nfs_commit_data * - pnfs_bucket_fetch_commitdata(struct pnfs_commit_bucket *bucket, - struct nfs_commit_info *cinfo) - { -- struct nfs_commit_data *data = nfs_commitdata_alloc(false); -+ struct nfs_commit_data *data = nfs_commitdata_alloc(); - - if (!data) - return NULL; -@@ -468,7 +468,6 @@ pnfs_bucket_alloc_ds_commits(struct list_head *list, - goto out_error; - data->ds_commit_index = i; - list_add_tail(&data->list, list); -- atomic_inc(&cinfo->mds->rpcs_out); - nreq++; - } - mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); -@@ -516,11 +515,14 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, - unsigned int nreq = 0; - - if (!list_empty(mds_pages)) { -- data = nfs_commitdata_alloc(true); -+ data = nfs_commitdata_alloc(); -+ if (!data) { -+ nfs_retry_commit(mds_pages, NULL, cinfo, -1); -+ return -ENOMEM; -+ } - data->ds_commit_index = -1; - list_splice_init(mds_pages, &data->pages); - list_add_tail(&data->list, &list); -- atomic_inc(&cinfo->mds->rpcs_out); - nreq++; - } - -@@ -895,7 +897,7 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv, - } - - smp_wmb(); -- ds->ds_clp = clp; -+ WRITE_ONCE(ds->ds_clp, clp); - dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); - out: - return status; -@@ -973,7 +975,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, - } - - smp_wmb(); -- ds->ds_clp = clp; -+ WRITE_ONCE(ds->ds_clp, clp); - dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); - out: - return status; -diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c -index ea19dbf123014..a5b0bdcb53963 100644 ---- a/fs/nfs/proc.c -+++ b/fs/nfs/proc.c -@@ -91,7 +91,8 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, - info->dtpref = fsinfo.tsize; - info->maxfilesize = 0x7FFFFFFF; - info->lease_time = 0; -- info->change_attr_type = NFS4_CHANGE_TYPE_IS_TIME_METADATA; -+ info->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED; -+ info->xattr_support = 0; - return 0; - } - -diff --git a/fs/nfs/super.c b/fs/nfs/super.c -index e65c83494c052..a847011f36c96 100644 ---- a/fs/nfs/super.c -+++ b/fs/nfs/super.c -@@ -1046,22 +1046,31 @@ static void nfs_fill_super(struct super_block *sb, struct nfs_fs_context *ctx) - if (ctx->bsize) - sb->s_blocksize = nfs_block_size(ctx->bsize, &sb->s_blocksize_bits); - -- if (server->nfs_client->rpc_ops->version != 2) { -- /* The VFS shouldn't apply the umask to mode bits. We will do -- * so ourselves when necessary. -+ switch (server->nfs_client->rpc_ops->version) { -+ case 2: -+ sb->s_time_gran = 1000; -+ sb->s_time_min = 0; -+ sb->s_time_max = U32_MAX; -+ break; -+ case 3: -+ /* -+ * The VFS shouldn't apply the umask to mode bits. -+ * We will do so ourselves when necessary. - */ - sb->s_flags |= SB_POSIXACL; - sb->s_time_gran = 1; -- sb->s_export_op = &nfs_export_ops; -- } else -- sb->s_time_gran = 1000; -- -- if (server->nfs_client->rpc_ops->version != 4) { - sb->s_time_min = 0; - sb->s_time_max = U32_MAX; -- } else { -+ sb->s_export_op = &nfs_export_ops; -+ break; -+ case 4: -+ sb->s_flags |= SB_POSIXACL; -+ sb->s_time_gran = 1; - sb->s_time_min = S64_MIN; - sb->s_time_max = S64_MAX; -+ if (server->caps & NFS_CAP_ATOMIC_OPEN_V1) -+ sb->s_export_op = &nfs_export_ops; -+ break; - } - - sb->s_magic = NFS_SUPER_MAGIC; -diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c -index 5fa11e1aca4c2..d5ccf095b2a7d 100644 ---- a/fs/nfs/unlink.c -+++ b/fs/nfs/unlink.c -@@ -102,6 +102,10 @@ static void nfs_do_call_unlink(struct inode *inode, struct nfs_unlinkdata *data) - }; - struct rpc_task *task; - struct inode *dir = d_inode(data->dentry->d_parent); ++#define io_for_each_link(pos, head) \ ++ for (pos = (head); pos; pos = pos->link) + -+ if (nfs_server_capable(inode, NFS_CAP_MOVEABLE)) -+ task_setup_data.flags |= RPC_TASK_MOVEABLE; ++/* ++ * Shamelessly stolen from the mm implementation of page reference checking, ++ * see commit f958d7b528b1 for details. ++ */ ++#define req_ref_zero_or_close_to_overflow(req) \ ++ ((unsigned int) atomic_read(&(req->refs)) + 127u <= 127u) + - nfs_sb_active(dir->i_sb); - data->args.fh = NFS_FH(dir); - nfs_fattr_init(data->res.dir_attr); -@@ -344,6 +348,10 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir, - .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF, - }; - -+ if (nfs_server_capable(old_dir, NFS_CAP_MOVEABLE) && -+ nfs_server_capable(new_dir, NFS_CAP_MOVEABLE)) -+ task_setup_data.flags |= RPC_TASK_MOVEABLE; ++static inline bool req_ref_inc_not_zero(struct io_kiocb *req) ++{ ++ WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT)); ++ return atomic_inc_not_zero(&req->refs); ++} + - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (data == NULL) - return ERR_PTR(-ENOMEM); -diff --git a/fs/nfs/write.c b/fs/nfs/write.c -index eae9bf1140417..be70874bc3292 100644 ---- a/fs/nfs/write.c -+++ b/fs/nfs/write.c -@@ -70,27 +70,17 @@ static mempool_t *nfs_wdata_mempool; - static struct kmem_cache *nfs_cdata_cachep; - static mempool_t *nfs_commit_mempool; - --struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail) -+struct nfs_commit_data *nfs_commitdata_alloc(void) - { - struct nfs_commit_data *p; - -- if (never_fail) -- p = mempool_alloc(nfs_commit_mempool, GFP_NOIO); -- else { -- /* It is OK to do some reclaim, not no safe to wait -- * for anything to be returned to the pool. -- * mempool_alloc() cannot handle that particular combination, -- * so we need two separate attempts. -- */ -+ p = kmem_cache_zalloc(nfs_cdata_cachep, nfs_io_gfp_mask()); -+ if (!p) { - p = mempool_alloc(nfs_commit_mempool, GFP_NOWAIT); -- if (!p) -- p = kmem_cache_alloc(nfs_cdata_cachep, GFP_NOIO | -- __GFP_NOWARN | __GFP_NORETRY); - if (!p) - return NULL; -+ memset(p, 0, sizeof(*p)); - } -- -- memset(p, 0, sizeof(*p)); - INIT_LIST_HEAD(&p->pages); - return p; - } -@@ -104,9 +94,15 @@ EXPORT_SYMBOL_GPL(nfs_commit_free); - - static struct nfs_pgio_header *nfs_writehdr_alloc(void) - { -- struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_KERNEL); -+ struct nfs_pgio_header *p; - -- memset(p, 0, sizeof(*p)); -+ p = kmem_cache_zalloc(nfs_wdata_cachep, nfs_io_gfp_mask()); -+ if (!p) { -+ p = mempool_alloc(nfs_wdata_mempool, GFP_NOWAIT); -+ if (!p) -+ return NULL; -+ memset(p, 0, sizeof(*p)); ++static inline bool req_ref_put_and_test(struct io_kiocb *req) ++{ ++ if (likely(!(req->flags & REQ_F_REFCOUNT))) ++ return true; ++ ++ WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req)); ++ return atomic_dec_and_test(&req->refs); ++} ++ ++static inline void req_ref_get(struct io_kiocb *req) ++{ ++ WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT)); ++ WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req)); ++ atomic_inc(&req->refs); ++} ++ ++static inline void __io_req_set_refcount(struct io_kiocb *req, int nr) ++{ ++ if (!(req->flags & REQ_F_REFCOUNT)) { ++ req->flags |= REQ_F_REFCOUNT; ++ atomic_set(&req->refs, nr); + } - p->rw_mode = FMODE_WRITE; - return p; - } -@@ -314,7 +310,10 @@ static void nfs_mapping_set_error(struct page *page, int error) - struct address_space *mapping = page_file_mapping(page); - - SetPageError(page); -- mapping_set_error(mapping, error); -+ filemap_set_wb_err(mapping, error); -+ if (mapping->host) -+ errseq_set(&mapping->host->i_sb->s_wb_err, -+ error == -ENOSPC ? -ENOSPC : -EIO); - nfs_set_pageerror(mapping); - } - -@@ -602,8 +601,9 @@ static void nfs_write_error(struct nfs_page *req, int error) - * Find an associated nfs write request, and prepare to flush it out - * May return an error if the user signalled nfs_wait_on_request(). - */ --static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, -- struct page *page) -+static int nfs_page_async_flush(struct page *page, -+ struct writeback_control *wbc, -+ struct nfs_pageio_descriptor *pgio) - { - struct nfs_page *req; - int ret = 0; -@@ -629,11 +629,11 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, - /* - * Remove the problematic req upon fatal errors on the server - */ -- if (nfs_error_is_fatal(ret)) { -- if (nfs_error_is_fatal_on_server(ret)) -- goto out_launder; -- } else -- ret = -EAGAIN; -+ if (nfs_error_is_fatal_on_server(ret)) -+ goto out_launder; -+ if (wbc->sync_mode == WB_SYNC_NONE) -+ ret = AOP_WRITEPAGE_ACTIVATE; -+ redirty_page_for_writepage(wbc, page); - nfs_redirty_request(req); - pgio->pg_error = 0; - } else -@@ -649,15 +649,8 @@ out_launder: - static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, - struct nfs_pageio_descriptor *pgio) - { -- int ret; -- - nfs_pageio_cond_complete(pgio, page_index(page)); -- ret = nfs_page_async_flush(pgio, page); -- if (ret == -EAGAIN) { -- redirty_page_for_writepage(wbc, page); -- ret = AOP_WRITEPAGE_ACTIVATE; -- } -- return ret; -+ return nfs_page_async_flush(page, wbc, pgio); - } - - /* -@@ -676,11 +669,7 @@ static int nfs_writepage_locked(struct page *page, - err = nfs_do_writepage(page, wbc, &pgio); - pgio.pg_error = 0; - nfs_pageio_complete(&pgio); -- if (err < 0) -- return err; -- if (nfs_error_is_fatal(pgio.pg_error)) -- return pgio.pg_error; -- return 0; -+ return err; - } - - int nfs_writepage(struct page *page, struct writeback_control *wbc) -@@ -728,19 +717,19 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) - priority = wb_priority(wbc); - } - -- nfs_pageio_init_write(&pgio, inode, priority, false, -- &nfs_async_write_completion_ops); -- pgio.pg_io_completion = ioc; -- err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); -- pgio.pg_error = 0; -- nfs_pageio_complete(&pgio); -+ do { -+ nfs_pageio_init_write(&pgio, inode, priority, false, -+ &nfs_async_write_completion_ops); -+ pgio.pg_io_completion = ioc; -+ err = write_cache_pages(mapping, wbc, nfs_writepages_callback, -+ &pgio); -+ pgio.pg_error = 0; -+ nfs_pageio_complete(&pgio); -+ } while (err < 0 && !nfs_error_is_fatal(err)); - nfs_io_completion_put(ioc); - - if (err < 0) - goto out_err; -- err = pgio.pg_error; -- if (nfs_error_is_fatal(err)) -- goto out_err; - return 0; - out_err: - return err; -@@ -1038,25 +1027,11 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, - struct nfs_page *req, *tmp; - int ret = 0; - --restart: - list_for_each_entry_safe(req, tmp, src, wb_list) { - kref_get(&req->wb_kref); - if (!nfs_lock_request(req)) { -- int status; -- -- /* Prevent deadlock with nfs_lock_and_join_requests */ -- if (!list_empty(dst)) { -- nfs_release_request(req); -- continue; -- } -- /* Ensure we make progress to prevent livelock */ -- mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); -- status = nfs_wait_on_request(req); - nfs_release_request(req); -- mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); -- if (status < 0) -- break; -- goto restart; -+ continue; - } - nfs_request_remove_commit_list(req, cinfo); - clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); -@@ -1419,10 +1394,12 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr, - */ - static void nfs_redirty_request(struct nfs_page *req) - { -+ struct nfs_inode *nfsi = NFS_I(page_file_mapping(req->wb_page)->host); ++} + - /* Bump the transmission count */ - req->wb_nio++; - nfs_mark_request_dirty(req); -- set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags); -+ atomic_long_inc(&nfsi->redirtied_pages); - nfs_end_page_writeback(req); - nfs_release_request(req); - } -@@ -1434,7 +1411,7 @@ static void nfs_async_write_error(struct list_head *head, int error) - while (!list_empty(head)) { - req = nfs_list_entry(head->next); - nfs_list_remove_request(req); -- if (nfs_error_is_fatal(error)) -+ if (nfs_error_is_fatal_on_server(error)) - nfs_write_error(req, error); - else - nfs_redirty_request(req); -@@ -1671,10 +1648,13 @@ static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo) - atomic_inc(&cinfo->rpcs_out); - } - --static void nfs_commit_end(struct nfs_mds_commit_info *cinfo) -+bool nfs_commit_end(struct nfs_mds_commit_info *cinfo) - { -- if (atomic_dec_and_test(&cinfo->rpcs_out)) -+ if (atomic_dec_and_test(&cinfo->rpcs_out)) { - wake_up_var(&cinfo->rpcs_out); ++static inline void io_req_set_refcount(struct io_kiocb *req) ++{ ++ __io_req_set_refcount(req, 1); ++} ++ ++static inline void io_req_set_rsrc_node(struct io_kiocb *req) ++{ ++ struct io_ring_ctx *ctx = req->ctx; ++ ++ if (!req->fixed_rsrc_refs) { ++ req->fixed_rsrc_refs = &ctx->rsrc_node->refs; ++ percpu_ref_get(req->fixed_rsrc_refs); ++ } ++} ++ ++static void io_refs_resurrect(struct percpu_ref *ref, struct completion *compl) ++{ ++ bool got = percpu_ref_tryget(ref); ++ ++ /* already at zero, wait for ->release() */ ++ if (!got) ++ wait_for_completion(compl); ++ percpu_ref_resurrect(ref); ++ if (got) ++ percpu_ref_put(ref); ++} ++ ++static bool io_match_task(struct io_kiocb *head, struct task_struct *task, ++ bool cancel_all) ++ __must_hold(&req->ctx->timeout_lock) ++{ ++ struct io_kiocb *req; ++ ++ if (task && head->task != task) ++ return false; ++ if (cancel_all) + return true; ++ ++ io_for_each_link(req, head) { ++ if (req->flags & REQ_F_INFLIGHT) ++ return true; + } + return false; - } - - void nfs_commitdata_release(struct nfs_commit_data *data) -@@ -1706,6 +1686,10 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, - .flags = RPC_TASK_ASYNC | flags, - .priority = priority, - }; ++} + -+ if (nfs_server_capable(data->inode, NFS_CAP_MOVEABLE)) -+ task_setup_data.flags |= RPC_TASK_MOVEABLE; ++static bool io_match_linked(struct io_kiocb *head) ++{ ++ struct io_kiocb *req; + - /* Set up the initial task struct. */ - nfs_ops->commit_setup(data, &msg, &task_setup_data.rpc_client); - trace_nfs_initiate_commit(data); -@@ -1774,6 +1758,7 @@ void nfs_init_commit(struct nfs_commit_data *data, - data->res.fattr = &data->fattr; - data->res.verf = &data->verf; - nfs_fattr_init(&data->fattr); -+ nfs_commit_begin(cinfo->mds); - } - EXPORT_SYMBOL_GPL(nfs_init_commit); - -@@ -1816,11 +1801,14 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, - if (list_empty(head)) - return 0; - -- data = nfs_commitdata_alloc(true); -+ data = nfs_commitdata_alloc(); -+ if (!data) { -+ nfs_retry_commit(head, NULL, cinfo, -1); -+ return -ENOMEM; ++ io_for_each_link(req, head) { ++ if (req->flags & REQ_F_INFLIGHT) ++ return true; + } - - /* Set up the argument struct */ - nfs_init_commit(data, head, NULL, cinfo); -- atomic_inc(&cinfo->mds->rpcs_out); - if (NFS_SERVER(inode)->nfs_client->cl_minorversion) - task_flags = RPC_TASK_MOVEABLE; - return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode), -@@ -1884,7 +1872,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) - /* We have a mismatch. Write the page again */ - dprintk_cont(" mismatch\n"); - nfs_mark_request_dirty(req); -- set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags); -+ atomic_long_inc(&NFS_I(data->inode)->redirtied_pages); - next: - nfs_unlock_and_release_request(req); - /* Latency breaker */ -@@ -1936,6 +1924,7 @@ static int __nfs_commit_inode(struct inode *inode, int how, - int may_wait = how & FLUSH_SYNC; - int ret, nscan; - -+ how &= ~FLUSH_SYNC; - nfs_init_cinfo_from_inode(&cinfo, inode); - nfs_commit_begin(cinfo.mds); - for (;;) { -diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c -index 9421dae227374..668c7527b17e8 100644 ---- a/fs/nfsd/export.c -+++ b/fs/nfsd/export.c -@@ -427,7 +427,7 @@ static int check_export(struct path *path, int *flags, unsigned char *uuid) - return -EINVAL; - } - -- if (mnt_user_ns(path->mnt) != &init_user_ns) { -+ if (is_idmapped_mnt(path->mnt)) { - dprintk("exp_export: export of idmapped mounts not yet supported.\n"); - return -EINVAL; - } -diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c -index be3c1aad50ea3..1e8c31ed6c7c4 100644 ---- a/fs/nfsd/filecache.c -+++ b/fs/nfsd/filecache.c -@@ -187,14 +187,7 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, - nf->nf_hashval = hashval; - refcount_set(&nf->nf_ref, 1); - nf->nf_may = may & NFSD_FILE_MAY_MASK; -- if (may & NFSD_MAY_NOT_BREAK_LEASE) { -- if (may & NFSD_MAY_WRITE) -- __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags); -- if (may & NFSD_MAY_READ) -- __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); -- } - nf->nf_mark = NULL; -- init_rwsem(&nf->nf_rwsem); - trace_nfsd_file_alloc(nf); - } - return nf; -@@ -641,7 +634,7 @@ nfsd_file_cache_init(void) - if (!nfsd_filecache_wq) - goto out; - -- nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE, -+ nfsd_file_hashtbl = kvcalloc(NFSD_FILE_HASH_SIZE, - sizeof(*nfsd_file_hashtbl), GFP_KERNEL); - if (!nfsd_file_hashtbl) { - pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n"); -@@ -709,7 +702,7 @@ out_err: - nfsd_file_slab = NULL; - kmem_cache_destroy(nfsd_file_mark_slab); - nfsd_file_mark_slab = NULL; -- kfree(nfsd_file_hashtbl); -+ kvfree(nfsd_file_hashtbl); - nfsd_file_hashtbl = NULL; - destroy_workqueue(nfsd_filecache_wq); - nfsd_filecache_wq = NULL; -@@ -855,7 +848,7 @@ nfsd_file_cache_shutdown(void) - fsnotify_wait_marks_destroyed(); - kmem_cache_destroy(nfsd_file_mark_slab); - nfsd_file_mark_slab = NULL; -- kfree(nfsd_file_hashtbl); -+ kvfree(nfsd_file_hashtbl); - nfsd_file_hashtbl = NULL; - destroy_workqueue(nfsd_filecache_wq); - nfsd_filecache_wq = NULL; -@@ -991,21 +984,7 @@ wait_for_construction: - - this_cpu_inc(nfsd_file_cache_hits); - -- if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) { -- bool write = (may_flags & NFSD_MAY_WRITE); -- -- if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) || -- (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) { -- status = nfserrno(nfsd_open_break_lease( -- file_inode(nf->nf_file), may_flags)); -- if (status == nfs_ok) { -- clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); -- if (write) -- clear_bit(NFSD_FILE_BREAK_WRITE, -- &nf->nf_flags); -- } -- } -- } -+ status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); - out: - if (status == nfs_ok) { - *pnf = nf; -diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h -index 7872df5a0fe3a..63104be2865c5 100644 ---- a/fs/nfsd/filecache.h -+++ b/fs/nfsd/filecache.h -@@ -37,16 +37,13 @@ struct nfsd_file { - struct net *nf_net; - #define NFSD_FILE_HASHED (0) - #define NFSD_FILE_PENDING (1) --#define NFSD_FILE_BREAK_READ (2) --#define NFSD_FILE_BREAK_WRITE (3) --#define NFSD_FILE_REFERENCED (4) -+#define NFSD_FILE_REFERENCED (2) - unsigned long nf_flags; - struct inode *nf_inode; - unsigned int nf_hashval; - refcount_t nf_ref; - unsigned char nf_may; - struct nfsd_file_mark *nf_mark; -- struct rw_semaphore nf_rwsem; - }; - - int nfsd_file_cache_init(void); -diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c -index 17715a6c7a409..eaf785aec0708 100644 ---- a/fs/nfsd/nfs3proc.c -+++ b/fs/nfsd/nfs3proc.c -@@ -146,17 +146,21 @@ nfsd3_proc_read(struct svc_rqst *rqstp) - { - struct nfsd3_readargs *argp = rqstp->rq_argp; - struct nfsd3_readres *resp = rqstp->rq_resp; -- u32 max_blocksize = svc_max_payload(rqstp); - unsigned int len; - int v; - -- argp->count = min_t(u32, argp->count, max_blocksize); -- - dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n", - SVCFH_fmt(&argp->fh), - (unsigned long) argp->count, - (unsigned long long) argp->offset); - -+ argp->count = min_t(u32, argp->count, svc_max_payload(rqstp)); -+ argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen); -+ if (argp->offset > (u64)OFFSET_MAX) -+ argp->offset = (u64)OFFSET_MAX; -+ if (argp->offset + argp->count > (u64)OFFSET_MAX) -+ argp->count = (u64)OFFSET_MAX - argp->offset; ++ return false; ++} ++ ++/* ++ * As io_match_task() but protected against racing with linked timeouts. ++ * User must not hold timeout_lock. ++ */ ++static bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task, ++ bool cancel_all) ++{ ++ bool matched; ++ ++ if (task && head->task != task) ++ return false; ++ if (cancel_all) ++ return true; ++ ++ if (head->flags & REQ_F_LINK_TIMEOUT) { ++ struct io_ring_ctx *ctx = head->ctx; ++ ++ /* protect against races with linked timeouts */ ++ spin_lock_irq(&ctx->timeout_lock); ++ matched = io_match_linked(head); ++ spin_unlock_irq(&ctx->timeout_lock); ++ } else { ++ matched = io_match_linked(head); ++ } ++ return matched; ++} ++ ++static inline void req_set_fail(struct io_kiocb *req) ++{ ++ req->flags |= REQ_F_FAIL; ++} ++ ++static inline void req_fail_link_node(struct io_kiocb *req, int res) ++{ ++ req_set_fail(req); ++ req->result = res; ++} ++ ++static void io_ring_ctx_ref_free(struct percpu_ref *ref) ++{ ++ struct io_ring_ctx *ctx = container_of(ref, struct io_ring_ctx, refs); ++ ++ complete(&ctx->ref_comp); ++} ++ ++static inline bool io_is_timeout_noseq(struct io_kiocb *req) ++{ ++ return !req->timeout.off; ++} ++ ++static void io_fallback_req_func(struct work_struct *work) ++{ ++ struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx, ++ fallback_work.work); ++ struct llist_node *node = llist_del_all(&ctx->fallback_llist); ++ struct io_kiocb *req, *tmp; ++ bool locked = false; ++ ++ percpu_ref_get(&ctx->refs); ++ llist_for_each_entry_safe(req, tmp, node, io_task_work.fallback_node) ++ req->io_task_work.func(req, &locked); ++ ++ if (locked) { ++ if (ctx->submit_state.compl_nr) ++ io_submit_flush_completions(ctx); ++ mutex_unlock(&ctx->uring_lock); ++ } ++ percpu_ref_put(&ctx->refs); ++ ++} ++ ++static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) ++{ ++ struct io_ring_ctx *ctx; ++ int hash_bits; ++ ++ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); ++ if (!ctx) ++ return NULL; ++ ++ /* ++ * Use 5 bits less than the max cq entries, that should give us around ++ * 32 entries per hash list if totally full and uniformly spread. ++ */ ++ hash_bits = ilog2(p->cq_entries); ++ hash_bits -= 5; ++ if (hash_bits <= 0) ++ hash_bits = 1; ++ ctx->cancel_hash_bits = hash_bits; ++ ctx->cancel_hash = kmalloc((1U << hash_bits) * sizeof(struct hlist_head), ++ GFP_KERNEL); ++ if (!ctx->cancel_hash) ++ goto err; ++ __hash_init(ctx->cancel_hash, 1U << hash_bits); ++ ++ ctx->dummy_ubuf = kzalloc(sizeof(*ctx->dummy_ubuf), GFP_KERNEL); ++ if (!ctx->dummy_ubuf) ++ goto err; ++ /* set invalid range, so io_import_fixed() fails meeting it */ ++ ctx->dummy_ubuf->ubuf = -1UL; ++ ++ if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free, ++ PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) ++ goto err; ++ ++ ctx->flags = p->flags; ++ init_waitqueue_head(&ctx->sqo_sq_wait); ++ INIT_LIST_HEAD(&ctx->sqd_list); ++ init_waitqueue_head(&ctx->poll_wait); ++ INIT_LIST_HEAD(&ctx->cq_overflow_list); ++ init_completion(&ctx->ref_comp); ++ xa_init_flags(&ctx->io_buffers, XA_FLAGS_ALLOC1); ++ xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1); ++ mutex_init(&ctx->uring_lock); ++ init_waitqueue_head(&ctx->cq_wait); ++ spin_lock_init(&ctx->completion_lock); ++ spin_lock_init(&ctx->timeout_lock); ++ INIT_LIST_HEAD(&ctx->iopoll_list); ++ INIT_LIST_HEAD(&ctx->defer_list); ++ INIT_LIST_HEAD(&ctx->timeout_list); ++ INIT_LIST_HEAD(&ctx->ltimeout_list); ++ spin_lock_init(&ctx->rsrc_ref_lock); ++ INIT_LIST_HEAD(&ctx->rsrc_ref_list); ++ INIT_DELAYED_WORK(&ctx->rsrc_put_work, io_rsrc_put_work); ++ init_llist_head(&ctx->rsrc_put_llist); ++ INIT_LIST_HEAD(&ctx->tctx_list); ++ INIT_LIST_HEAD(&ctx->submit_state.free_list); ++ INIT_LIST_HEAD(&ctx->locked_free_list); ++ INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func); ++ return ctx; ++err: ++ kfree(ctx->dummy_ubuf); ++ kfree(ctx->cancel_hash); ++ kfree(ctx); ++ return NULL; ++} ++ ++static void io_account_cq_overflow(struct io_ring_ctx *ctx) ++{ ++ struct io_rings *r = ctx->rings; ++ ++ WRITE_ONCE(r->cq_overflow, READ_ONCE(r->cq_overflow) + 1); ++ ctx->cq_extra--; ++} ++ ++static bool req_need_defer(struct io_kiocb *req, u32 seq) ++{ ++ if (unlikely(req->flags & REQ_F_IO_DRAIN)) { ++ struct io_ring_ctx *ctx = req->ctx; ++ ++ return seq + READ_ONCE(ctx->cq_extra) != ctx->cached_cq_tail; ++ } ++ ++ return false; ++} ++ ++#define FFS_ASYNC_READ 0x1UL ++#define FFS_ASYNC_WRITE 0x2UL ++#ifdef CONFIG_64BIT ++#define FFS_ISREG 0x4UL ++#else ++#define FFS_ISREG 0x0UL ++#endif ++#define FFS_MASK ~(FFS_ASYNC_READ|FFS_ASYNC_WRITE|FFS_ISREG) ++ ++static inline bool io_req_ffs_set(struct io_kiocb *req) ++{ ++ return IS_ENABLED(CONFIG_64BIT) && (req->flags & REQ_F_FIXED_FILE); ++} ++ ++static void io_req_track_inflight(struct io_kiocb *req) ++{ ++ if (!(req->flags & REQ_F_INFLIGHT)) { ++ req->flags |= REQ_F_INFLIGHT; ++ atomic_inc(&req->task->io_uring->inflight_tracked); ++ } ++} ++ ++static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req) ++{ ++ if (WARN_ON_ONCE(!req->link)) ++ return NULL; ++ ++ req->flags &= ~REQ_F_ARM_LTIMEOUT; ++ req->flags |= REQ_F_LINK_TIMEOUT; ++ ++ /* linked timeouts should have two refs once prep'ed */ ++ io_req_set_refcount(req); ++ __io_req_set_refcount(req->link, 2); ++ return req->link; ++} ++ ++static inline struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req) ++{ ++ if (likely(!(req->flags & REQ_F_ARM_LTIMEOUT))) ++ return NULL; ++ return __io_prep_linked_timeout(req); ++} ++ ++static void io_prep_async_work(struct io_kiocb *req) ++{ ++ const struct io_op_def *def = &io_op_defs[req->opcode]; ++ struct io_ring_ctx *ctx = req->ctx; ++ ++ if (!(req->flags & REQ_F_CREDS)) { ++ req->flags |= REQ_F_CREDS; ++ req->creds = get_current_cred(); ++ } ++ ++ req->work.list.next = NULL; ++ req->work.flags = 0; ++ if (req->flags & REQ_F_FORCE_ASYNC) ++ req->work.flags |= IO_WQ_WORK_CONCURRENT; ++ ++ if (req->flags & REQ_F_ISREG) { ++ if (def->hash_reg_file || (ctx->flags & IORING_SETUP_IOPOLL)) ++ io_wq_hash_work(&req->work, file_inode(req->file)); ++ } else if (!req->file || !S_ISBLK(file_inode(req->file)->i_mode)) { ++ if (def->unbound_nonreg_file) ++ req->work.flags |= IO_WQ_WORK_UNBOUND; ++ } ++} ++ ++static void io_prep_async_link(struct io_kiocb *req) ++{ ++ struct io_kiocb *cur; ++ ++ if (req->flags & REQ_F_LINK_TIMEOUT) { ++ struct io_ring_ctx *ctx = req->ctx; ++ ++ spin_lock_irq(&ctx->timeout_lock); ++ io_for_each_link(cur, req) ++ io_prep_async_work(cur); ++ spin_unlock_irq(&ctx->timeout_lock); ++ } else { ++ io_for_each_link(cur, req) ++ io_prep_async_work(cur); ++ } ++} ++ ++static void io_queue_async_work(struct io_kiocb *req, bool *locked) ++{ ++ struct io_ring_ctx *ctx = req->ctx; ++ struct io_kiocb *link = io_prep_linked_timeout(req); ++ struct io_uring_task *tctx = req->task->io_uring; + - v = 0; - len = argp->count; - resp->pages = rqstp->rq_next_page; -@@ -199,19 +203,19 @@ nfsd3_proc_write(struct svc_rqst *rqstp) - (unsigned long long) argp->offset, - argp->stable? " stable" : ""); - -+ resp->status = nfserr_fbig; -+ if (argp->offset > (u64)OFFSET_MAX || -+ argp->offset + argp->len > (u64)OFFSET_MAX) -+ return rpc_success; ++ /* must not take the lock, NULL it as a precaution */ ++ locked = NULL; + - fh_copy(&resp->fh, &argp->fh); - resp->committed = argp->stable; -- nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages, -- &argp->first, cnt); -- if (!nvecs) { -- resp->status = nfserr_io; -- goto out; -- } -+ nvecs = svc_fill_write_vector(rqstp, &argp->payload); ++ BUG_ON(!tctx); ++ BUG_ON(!tctx->io_wq); + - resp->status = nfsd_write(rqstp, &resp->fh, argp->offset, - rqstp->rq_vec, nvecs, &cnt, - resp->committed, resp->verf); - resp->count = cnt; --out: - return rpc_success; - } - -@@ -439,22 +443,20 @@ nfsd3_proc_link(struct svc_rqst *rqstp) - - static void nfsd3_init_dirlist_pages(struct svc_rqst *rqstp, - struct nfsd3_readdirres *resp, -- int count) -+ u32 count) - { - struct xdr_buf *buf = &resp->dirlist; - struct xdr_stream *xdr = &resp->xdr; -- -- count = min_t(u32, count, svc_max_payload(rqstp)); -+ unsigned int sendbuf = min_t(unsigned int, rqstp->rq_res.buflen, -+ svc_max_payload(rqstp)); - - memset(buf, 0, sizeof(*buf)); - - /* Reserve room for the NULL ptr & eof flag (-2 words) */ -- buf->buflen = count - XDR_UNIT * 2; -+ buf->buflen = clamp(count, (u32)(XDR_UNIT * 2), sendbuf); -+ buf->buflen -= XDR_UNIT * 2; - buf->pages = rqstp->rq_next_page; -- while (count > 0) { -- rqstp->rq_next_page++; -- count -= PAGE_SIZE; -- } -+ rqstp->rq_next_page += (buf->buflen + PAGE_SIZE - 1) >> PAGE_SHIFT; - - /* This is xdr_init_encode(), but it assumes that - * the head kvec has already been consumed. */ -@@ -463,7 +465,7 @@ static void nfsd3_init_dirlist_pages(struct svc_rqst *rqstp, - xdr->page_ptr = buf->pages; - xdr->iov = NULL; - xdr->p = page_address(*buf->pages); -- xdr->end = xdr->p + (PAGE_SIZE >> 2); -+ xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE); - xdr->rqst = NULL; - } - -@@ -659,15 +661,9 @@ nfsd3_proc_commit(struct svc_rqst *rqstp) - argp->count, - (unsigned long long) argp->offset); - -- if (argp->offset > NFS_OFFSET_MAX) { -- resp->status = nfserr_inval; -- goto out; -- } -- - fh_copy(&resp->fh, &argp->fh); - resp->status = nfsd_commit(rqstp, &resp->fh, argp->offset, - argp->count, resp->verf); --out: - return rpc_success; - } - -diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c -index 0a5ebc52e6a9c..48d4f99b7f901 100644 ---- a/fs/nfsd/nfs3xdr.c -+++ b/fs/nfsd/nfs3xdr.c -@@ -254,7 +254,7 @@ svcxdr_decode_sattr3(struct svc_rqst *rqstp, struct xdr_stream *xdr, - if (xdr_stream_decode_u64(xdr, &newsize) < 0) - return false; - iap->ia_valid |= ATTR_SIZE; -- iap->ia_size = min_t(u64, newsize, NFS_OFFSET_MAX); -+ iap->ia_size = newsize; - } - if (xdr_stream_decode_u32(xdr, &set_it) < 0) - return false; -@@ -487,11 +487,6 @@ neither: - return true; - } - --static bool fs_supports_change_attribute(struct super_block *sb) --{ -- return sb->s_flags & SB_I_VERSION || sb->s_export_op->fetch_iversion; --} -- - /* - * Fill in the pre_op attr for the wcc data - */ -@@ -500,26 +495,24 @@ void fill_pre_wcc(struct svc_fh *fhp) - struct inode *inode; - struct kstat stat; - bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE); -+ __be32 err; - - if (fhp->fh_no_wcc || fhp->fh_pre_saved) - return; - inode = d_inode(fhp->fh_dentry); -- if (fs_supports_change_attribute(inode->i_sb) || !v4) { -- __be32 err = fh_getattr(fhp, &stat); -- -- if (err) { -- /* Grab the times from inode anyway */ -- stat.mtime = inode->i_mtime; -- stat.ctime = inode->i_ctime; -- stat.size = inode->i_size; -- } -- fhp->fh_pre_mtime = stat.mtime; -- fhp->fh_pre_ctime = stat.ctime; -- fhp->fh_pre_size = stat.size; -+ err = fh_getattr(fhp, &stat); -+ if (err) { -+ /* Grab the times from inode anyway */ -+ stat.mtime = inode->i_mtime; -+ stat.ctime = inode->i_ctime; -+ stat.size = inode->i_size; - } - if (v4) - fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode); - -+ fhp->fh_pre_mtime = stat.mtime; -+ fhp->fh_pre_ctime = stat.ctime; -+ fhp->fh_pre_size = stat.size; - fhp->fh_pre_saved = true; - } - -@@ -530,6 +523,7 @@ void fill_post_wcc(struct svc_fh *fhp) - { - bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE); - struct inode *inode = d_inode(fhp->fh_dentry); -+ __be32 err; - - if (fhp->fh_no_wcc) - return; -@@ -537,16 +531,12 @@ void fill_post_wcc(struct svc_fh *fhp) - if (fhp->fh_post_saved) - printk("nfsd: inode locked twice during operation.\n"); - -- fhp->fh_post_saved = true; -- -- if (fs_supports_change_attribute(inode->i_sb) || !v4) { -- __be32 err = fh_getattr(fhp, &fhp->fh_post_attr); -- -- if (err) { -- fhp->fh_post_saved = false; -- fhp->fh_post_attr.ctime = inode->i_ctime; -- } -- } -+ err = fh_getattr(fhp, &fhp->fh_post_attr); -+ if (err) { -+ fhp->fh_post_saved = false; -+ fhp->fh_post_attr.ctime = inode->i_ctime; -+ } else -+ fhp->fh_post_saved = true; - if (v4) - fhp->fh_post_change = - nfsd4_change_attribute(&fhp->fh_post_attr, inode); -@@ -621,9 +611,6 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) - struct xdr_stream *xdr = &rqstp->rq_arg_stream; - struct nfsd3_writeargs *args = rqstp->rq_argp; - u32 max_blocksize = svc_max_payload(rqstp); -- struct kvec *head = rqstp->rq_arg.head; -- struct kvec *tail = rqstp->rq_arg.tail; -- size_t remaining; - - if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) - return 0; -@@ -641,17 +628,12 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) - /* request sanity */ - if (args->count != args->len) - return 0; -- remaining = head->iov_len + rqstp->rq_arg.page_len + tail->iov_len; -- remaining -= xdr_stream_pos(xdr); -- if (remaining < xdr_align_size(args->len)) -- return 0; - if (args->count > max_blocksize) { - args->count = max_blocksize; - args->len = max_blocksize; - } -- -- args->first.iov_base = xdr->p; -- args->first.iov_len = head->iov_len - xdr_stream_pos(xdr); -+ if (!xdr_stream_subsegment(xdr, &args->payload, args->count)) -+ return 0; - - return 1; - } -diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c -index 486c5dba4b650..09dd70f791581 100644 ---- a/fs/nfsd/nfs4proc.c -+++ b/fs/nfsd/nfs4proc.c -@@ -782,12 +782,16 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, - __be32 status; - - read->rd_nf = NULL; -- if (read->rd_offset >= OFFSET_MAX) -- return nfserr_inval; - - trace_nfsd_read_start(rqstp, &cstate->current_fh, - read->rd_offset, read->rd_length); - -+ read->rd_length = min_t(u32, read->rd_length, svc_max_payload(rqstp)); -+ if (read->rd_offset > (u64)OFFSET_MAX) -+ read->rd_offset = (u64)OFFSET_MAX; -+ if (read->rd_offset + read->rd_length > (u64)OFFSET_MAX) -+ read->rd_length = (u64)OFFSET_MAX - read->rd_offset; ++ /* init ->work of the whole link before punting */ ++ io_prep_async_link(req); + - /* - * If we do a zero copy read, then a client will see read data - * that reflects the state of the file *after* performing the -@@ -1018,8 +1022,9 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, - unsigned long cnt; - int nvecs; - -- if (write->wr_offset >= OFFSET_MAX) -- return nfserr_inval; -+ if (write->wr_offset > (u64)OFFSET_MAX || -+ write->wr_offset + write->wr_buflen > (u64)OFFSET_MAX) -+ return nfserr_fbig; - - cnt = write->wr_buflen; - trace_nfsd_write_start(rqstp, &cstate->current_fh, -@@ -1033,8 +1038,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, - - write->wr_how_written = write->wr_stable_how; - -- nvecs = svc_fill_write_vector(rqstp, write->wr_payload.pages, -- write->wr_payload.head, write->wr_buflen); -+ nvecs = svc_fill_write_vector(rqstp, &write->wr_payload); - WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec)); - - status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf, -@@ -1511,6 +1515,9 @@ static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync) - - static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy) - { -+ struct file *dst = copy->nf_dst->nf_file; -+ struct file *src = copy->nf_src->nf_file; -+ errseq_t since; - ssize_t bytes_copied = 0; - u64 bytes_total = copy->cp_count; - u64 src_pos = copy->cp_src_pos; -@@ -1523,9 +1530,8 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy) - do { - if (kthread_should_stop()) - break; -- bytes_copied = nfsd_copy_file_range(copy->nf_src->nf_file, -- src_pos, copy->nf_dst->nf_file, dst_pos, -- bytes_total); -+ bytes_copied = nfsd_copy_file_range(src, src_pos, dst, dst_pos, -+ bytes_total); - if (bytes_copied <= 0) - break; - bytes_total -= bytes_copied; -@@ -1535,11 +1541,11 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy) - } while (bytes_total > 0 && !copy->cp_synchronous); - /* for a non-zero asynchronous copy do a commit of data */ - if (!copy->cp_synchronous && copy->cp_res.wr_bytes_written > 0) { -- down_write(©->nf_dst->nf_rwsem); -- status = vfs_fsync_range(copy->nf_dst->nf_file, -- copy->cp_dst_pos, -+ since = READ_ONCE(dst->f_wb_err); -+ status = vfs_fsync_range(dst, copy->cp_dst_pos, - copy->cp_res.wr_bytes_written, 0); -- up_write(©->nf_dst->nf_rwsem); -+ if (!status) -+ status = filemap_check_wb_err(dst->f_mapping, since); - if (!status) - copy->committed = true; - } -@@ -2487,9 +2493,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) - status = nfserr_minor_vers_mismatch; - if (nfsd_minorversion(nn, args->minorversion, NFSD_TEST) <= 0) - goto out; -- status = nfserr_resource; -- if (args->opcnt > NFSD_MAX_OPS_PER_COMPOUND) -- goto out; - - status = nfs41_check_op_ordering(args); - if (status) { -@@ -2502,10 +2505,20 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) - - rqstp->rq_lease_breaker = (void **)&cstate->clp; - -- trace_nfsd_compound(rqstp, args->opcnt); -+ trace_nfsd_compound(rqstp, args->client_opcnt); - while (!status && resp->opcnt < args->opcnt) { - op = &args->ops[resp->opcnt++]; - -+ if (unlikely(resp->opcnt == NFSD_MAX_OPS_PER_COMPOUND)) { -+ /* If there are still more operations to process, -+ * stop here and report NFS4ERR_RESOURCE. */ -+ if (cstate->minorversion == 0 && -+ args->client_opcnt > resp->opcnt) { -+ op->status = nfserr_resource; -+ goto encode_op; -+ } -+ } ++ /* ++ * Not expected to happen, but if we do have a bug where this _can_ ++ * happen, catch it here and ensure the request is marked as ++ * canceled. That will make io-wq go through the usual work cancel ++ * procedure rather than attempt to run this request (or create a new ++ * worker for it). ++ */ ++ if (WARN_ON_ONCE(!same_thread_group(req->task, current))) ++ req->work.flags |= IO_WQ_WORK_CANCEL; + - /* - * The XDR decode routines may have pre-set op->status; - * for example, if there is a miscellaneous XDR error -@@ -2581,8 +2594,8 @@ encode_op: - status = op->status; - } - -- trace_nfsd_compound_status(args->opcnt, resp->opcnt, status, -- nfsd4_op_name(op->opnum)); -+ trace_nfsd_compound_status(args->client_opcnt, resp->opcnt, -+ status, nfsd4_op_name(op->opnum)); - - nfsd4_cstate_clear_replay(cstate); - nfsd4_increment_op_stats(op->opnum); -diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c -index 6fedc49726bf7..8f24485e0f04f 100644 ---- a/fs/nfsd/nfs4recover.c -+++ b/fs/nfsd/nfs4recover.c -@@ -815,8 +815,10 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg, - princhash.data = memdup_user( - &ci->cc_princhash.cp_data, - princhashlen); -- if (IS_ERR_OR_NULL(princhash.data)) -+ if (IS_ERR_OR_NULL(princhash.data)) { -+ kfree(name.data); - return -EFAULT; -+ } - princhash.len = princhashlen; - } else - princhash.len = 0; -@@ -2156,6 +2158,7 @@ static struct notifier_block nfsd4_cld_block = { - int - register_cld_notifier(void) - { -+ WARN_ON(!nfsd_net_id); - return rpc_pipefs_notifier_register(&nfsd4_cld_block); - } - -diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c -index 3f4027a5de883..7b763f146b621 100644 ---- a/fs/nfsd/nfs4state.c -+++ b/fs/nfsd/nfs4state.c -@@ -961,6 +961,7 @@ static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp) - - static void nfs4_free_deleg(struct nfs4_stid *stid) - { -+ WARN_ON(!list_empty(&stid->sc_cp_list)); - kmem_cache_free(deleg_slab, stid); - atomic_long_dec(&num_delegations); - } -@@ -1207,6 +1208,11 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) - return 0; - } - -+static bool delegation_hashed(struct nfs4_delegation *dp) ++ trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req, ++ &req->work, req->flags); ++ io_wq_enqueue(tctx->io_wq, &req->work); ++ if (link) ++ io_queue_linked_timeout(link); ++} ++ ++static void io_kill_timeout(struct io_kiocb *req, int status) ++ __must_hold(&req->ctx->completion_lock) ++ __must_hold(&req->ctx->timeout_lock) +{ -+ return !(list_empty(&dp->dl_perfile)); ++ struct io_timeout_data *io = req->async_data; ++ ++ if (hrtimer_try_to_cancel(&io->timer) != -1) { ++ if (status) ++ req_set_fail(req); ++ atomic_set(&req->ctx->cq_timeouts, ++ atomic_read(&req->ctx->cq_timeouts) + 1); ++ list_del_init(&req->timeout.list); ++ io_fill_cqe_req(req, status, 0); ++ io_put_req_deferred(req); ++ } +} + - static bool - unhash_delegation_locked(struct nfs4_delegation *dp) - { -@@ -1214,7 +1220,7 @@ unhash_delegation_locked(struct nfs4_delegation *dp) - - lockdep_assert_held(&state_lock); - -- if (list_empty(&dp->dl_perfile)) -+ if (!delegation_hashed(dp)) - return false; - - dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID; -@@ -1369,6 +1375,7 @@ static void nfs4_free_ol_stateid(struct nfs4_stid *stid) - release_all_access(stp); - if (stp->st_stateowner) - nfs4_put_stateowner(stp->st_stateowner); -+ WARN_ON(!list_empty(&stid->sc_cp_list)); - kmem_cache_free(stateid_slab, stid); - } - -@@ -4107,8 +4114,10 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, - status = nfserr_clid_inuse; - if (client_has_state(old) - && !same_creds(&unconf->cl_cred, -- &old->cl_cred)) -+ &old->cl_cred)) { -+ old = NULL; - goto out; -+ } - status = mark_client_expired_locked(old); - if (status) { - old = NULL; -@@ -4598,7 +4607,7 @@ static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb) - * queued for a lease break. Don't queue it again. - */ - spin_lock(&state_lock); -- if (dp->dl_time == 0) { -+ if (delegation_hashed(dp) && dp->dl_time == 0) { - dp->dl_time = ktime_get_boottime_seconds(); - list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru); - } -@@ -4686,6 +4695,14 @@ nfsd_break_deleg_cb(struct file_lock *fl) - return ret; - } - -+/** -+ * nfsd_breaker_owns_lease - Check if lease conflict was resolved -+ * @fl: Lock state to check -+ * -+ * Return values: -+ * %true: Lease conflict was resolved -+ * %false: Lease conflict was not resolved. -+ */ - static bool nfsd_breaker_owns_lease(struct file_lock *fl) - { - struct nfs4_delegation *dl = fl->fl_owner; -@@ -4693,11 +4710,11 @@ static bool nfsd_breaker_owns_lease(struct file_lock *fl) - struct nfs4_client *clp; - - if (!i_am_nfsd()) -- return NULL; ++static void io_queue_deferred(struct io_ring_ctx *ctx) ++{ ++ while (!list_empty(&ctx->defer_list)) { ++ struct io_defer_entry *de = list_first_entry(&ctx->defer_list, ++ struct io_defer_entry, list); ++ ++ if (req_need_defer(de->req, de->seq)) ++ break; ++ list_del_init(&de->list); ++ io_req_task_queue(de->req); ++ kfree(de); ++ } ++} ++ ++static void io_flush_timeouts(struct io_ring_ctx *ctx) ++ __must_hold(&ctx->completion_lock) ++{ ++ u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); ++ struct io_kiocb *req, *tmp; ++ ++ spin_lock_irq(&ctx->timeout_lock); ++ list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) { ++ u32 events_needed, events_got; ++ ++ if (io_is_timeout_noseq(req)) ++ break; ++ ++ /* ++ * Since seq can easily wrap around over time, subtract ++ * the last seq at which timeouts were flushed before comparing. ++ * Assuming not more than 2^31-1 events have happened since, ++ * these subtractions won't have wrapped, so we can check if ++ * target is in [last_seq, current_seq] by comparing the two. ++ */ ++ events_needed = req->timeout.target_seq - ctx->cq_last_tm_flush; ++ events_got = seq - ctx->cq_last_tm_flush; ++ if (events_got < events_needed) ++ break; ++ ++ io_kill_timeout(req, 0); ++ } ++ ctx->cq_last_tm_flush = seq; ++ spin_unlock_irq(&ctx->timeout_lock); ++} ++ ++static void __io_commit_cqring_flush(struct io_ring_ctx *ctx) ++{ ++ if (ctx->off_timeout_used) ++ io_flush_timeouts(ctx); ++ if (ctx->drain_active) ++ io_queue_deferred(ctx); ++} ++ ++static inline void io_commit_cqring(struct io_ring_ctx *ctx) ++{ ++ if (unlikely(ctx->off_timeout_used || ctx->drain_active)) ++ __io_commit_cqring_flush(ctx); ++ /* order cqe stores with ring update */ ++ smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail); ++} ++ ++static inline bool io_sqring_full(struct io_ring_ctx *ctx) ++{ ++ struct io_rings *r = ctx->rings; ++ ++ return READ_ONCE(r->sq.tail) - ctx->cached_sq_head == ctx->sq_entries; ++} ++ ++static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx) ++{ ++ return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head); ++} ++ ++static inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx) ++{ ++ struct io_rings *rings = ctx->rings; ++ unsigned tail, mask = ctx->cq_entries - 1; ++ ++ /* ++ * writes to the cq entry need to come after reading head; the ++ * control dependency is enough as we're using WRITE_ONCE to ++ * fill the cq entry ++ */ ++ if (__io_cqring_events(ctx) == ctx->cq_entries) ++ return NULL; ++ ++ tail = ctx->cached_cq_tail++; ++ return &rings->cqes[tail & mask]; ++} ++ ++static inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx) ++{ ++ if (likely(!ctx->cq_ev_fd)) + return false; - rqst = kthread_data(current); - /* Note rq_prog == NFS_ACL_PROGRAM is also possible: */ - if (rqst->rq_prog != NFS_PROGRAM || rqst->rq_vers < 4) -- return NULL; ++ if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED) + return false; - clp = *(rqst->rq_lease_breaker); - return dl->dl_stid.sc_client == clp; - } -@@ -6035,7 +6052,11 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, - *nfp = NULL; - - if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) { -- status = check_special_stateids(net, fhp, stateid, flags); -+ if (cstid) -+ status = nfserr_bad_stateid; -+ else -+ status = check_special_stateids(net, fhp, stateid, -+ flags); - goto done; - } - -@@ -6370,6 +6391,7 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) - struct nfs4_client *clp = s->st_stid.sc_client; - bool unhashed; - LIST_HEAD(reaplist); -+ struct nfs4_ol_stateid *stp; - - spin_lock(&clp->cl_lock); - unhashed = unhash_open_stateid(s, &reaplist); -@@ -6378,6 +6400,8 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) - if (unhashed) - put_ol_stateid_locked(s, &reaplist); - spin_unlock(&clp->cl_lock); -+ list_for_each_entry(stp, &reaplist, st_locks) -+ nfs4_free_cpntf_statelist(clp->net, &stp->st_stid); - free_ol_stateid_reaplist(&reaplist); - } else { - spin_unlock(&clp->cl_lock); -@@ -7280,16 +7304,12 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, - if (sop->so_is_open_owner || !same_owner_str(sop, owner)) - continue; - -- /* see if there are still any locks associated with it */ -- lo = lockowner(sop); -- list_for_each_entry(stp, &sop->so_stateids, st_perstateowner) { -- if (check_for_locks(stp->st_stid.sc_file, lo)) { -- status = nfserr_locks_held; -- spin_unlock(&clp->cl_lock); -- return status; -- } -+ if (atomic_read(&sop->so_count) != 1) { -+ spin_unlock(&clp->cl_lock); -+ return nfserr_locks_held; - } - -+ lo = lockowner(sop); - nfs4_get_stateowner(sop); - break; - } -diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c -index cf030ebe28275..e0409f6cdfd5f 100644 ---- a/fs/nfsd/nfs4xdr.c -+++ b/fs/nfsd/nfs4xdr.c -@@ -288,11 +288,8 @@ nfsd4_decode_bitmap4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen) - p = xdr_inline_decode(argp->xdr, count << 2); - if (!p) - return nfserr_bad_xdr; -- i = 0; -- while (i < count) -- bmval[i++] = be32_to_cpup(p++); -- while (i < bmlen) -- bmval[i++] = 0; -+ for (i = 0; i < bmlen; i++) -+ bmval[i] = (i < count) ? be32_to_cpup(p++) : 0; - - return nfs_ok; - } -@@ -2352,16 +2349,11 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) - - if (xdr_stream_decode_u32(argp->xdr, &argp->minorversion) < 0) - return 0; -- if (xdr_stream_decode_u32(argp->xdr, &argp->opcnt) < 0) -+ if (xdr_stream_decode_u32(argp->xdr, &argp->client_opcnt) < 0) - return 0; - -- /* -- * NFS4ERR_RESOURCE is a more helpful error than GARBAGE_ARGS -- * here, so we return success at the xdr level so that -- * nfsd4_proc can handle this is an NFS-level error. -- */ -- if (argp->opcnt > NFSD_MAX_OPS_PER_COMPOUND) -- return 1; -+ argp->opcnt = min_t(u32, argp->client_opcnt, -+ NFSD_MAX_OPS_PER_COMPOUND); - - if (argp->opcnt > ARRAY_SIZE(argp->iops)) { - argp->ops = kzalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL); -@@ -3996,14 +3988,12 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, - if (resp->xdr->buf->page_len && - test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) { - WARN_ON_ONCE(1); -- return nfserr_resource; -+ return nfserr_serverfault; - } - xdr_commit_encode(xdr); - -- maxcount = svc_max_payload(resp->rqstp); -- maxcount = min_t(unsigned long, maxcount, -+ maxcount = min_t(unsigned long, read->rd_length, - (xdr->buf->buflen - xdr->buf->len)); -- maxcount = min_t(unsigned long, maxcount, read->rd_length); - - if (file->f_op->splice_read && - test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) -@@ -4840,10 +4830,8 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr, - return nfserr_resource; - xdr_commit_encode(xdr); - -- maxcount = svc_max_payload(resp->rqstp); -- maxcount = min_t(unsigned long, maxcount, -+ maxcount = min_t(unsigned long, read->rd_length, - (xdr->buf->buflen - xdr->buf->len)); -- maxcount = min_t(unsigned long, maxcount, read->rd_length); - count = maxcount; - - eof = read->rd_offset >= i_size_read(file_inode(file)); -diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c -index 96cdf77925f33..830bb8493c7fd 100644 ---- a/fs/nfsd/nfscache.c -+++ b/fs/nfsd/nfscache.c -@@ -212,7 +212,6 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn) - struct svc_cacherep *rp; - unsigned int i; - -- nfsd_reply_cache_stats_destroy(nn); - unregister_shrinker(&nn->nfsd_reply_cache_shrinker); - - for (i = 0; i < nn->drc_hashsize; i++) { -@@ -223,6 +222,7 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn) - rp, nn); - } - } -+ nfsd_reply_cache_stats_destroy(nn); - - kvfree(nn->drc_hashtbl); - nn->drc_hashtbl = NULL; -diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c -index 070e5dd03e26f..cb73c12925629 100644 ---- a/fs/nfsd/nfsctl.c -+++ b/fs/nfsd/nfsctl.c -@@ -1249,7 +1249,8 @@ static void nfsdfs_remove_file(struct inode *dir, struct dentry *dentry) - clear_ncl(d_inode(dentry)); - dget(dentry); - ret = simple_unlink(dir, dentry); -- d_delete(dentry); -+ d_drop(dentry); -+ fsnotify_unlink(dir, dentry); - dput(dentry); - WARN_ON_ONCE(ret); - } -@@ -1340,8 +1341,8 @@ void nfsd_client_rmdir(struct dentry *dentry) - dget(dentry); - ret = simple_rmdir(dir, dentry); - WARN_ON_ONCE(ret); -+ d_drop(dentry); - fsnotify_rmdir(dir, dentry); -- d_delete(dentry); - dput(dentry); - inode_unlock(dir); - } -@@ -1521,12 +1522,9 @@ static int __init init_nfsd(void) - int retval; - printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n"); - -- retval = register_cld_notifier(); -- if (retval) -- return retval; - retval = nfsd4_init_slabs(); - if (retval) -- goto out_unregister_notifier; -+ return retval; - retval = nfsd4_init_pnfs(); - if (retval) - goto out_free_slabs; -@@ -1545,9 +1543,14 @@ static int __init init_nfsd(void) - goto out_free_exports; - retval = register_pernet_subsys(&nfsd_net_ops); - if (retval < 0) -+ goto out_free_filesystem; -+ retval = register_cld_notifier(); -+ if (retval) - goto out_free_all; - return 0; - out_free_all: -+ unregister_pernet_subsys(&nfsd_net_ops); -+out_free_filesystem: - unregister_filesystem(&nfsd_fs_type); - out_free_exports: - remove_proc_entry("fs/nfs/exports", NULL); -@@ -1561,13 +1564,12 @@ out_free_pnfs: - nfsd4_exit_pnfs(); - out_free_slabs: - nfsd4_free_slabs(); --out_unregister_notifier: -- unregister_cld_notifier(); - return retval; - } - - static void __exit exit_nfsd(void) - { -+ unregister_cld_notifier(); - unregister_pernet_subsys(&nfsd_net_ops); - nfsd_drc_slab_free(); - remove_proc_entry("fs/nfs/exports", NULL); -@@ -1577,7 +1579,6 @@ static void __exit exit_nfsd(void) - nfsd4_free_slabs(); - nfsd4_exit_pnfs(); - unregister_filesystem(&nfsd_fs_type); -- unregister_cld_notifier(); - } - - MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); -diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c -index 90fcd6178823b..b009da1dcbb50 100644 ---- a/fs/nfsd/nfsproc.c -+++ b/fs/nfsd/nfsproc.c -@@ -182,6 +182,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) - argp->count, argp->offset); - - argp->count = min_t(u32, argp->count, NFSSVC_MAXBLKSIZE_V2); -+ argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen); - - v = 0; - len = argp->count; -@@ -230,16 +231,11 @@ nfsd_proc_write(struct svc_rqst *rqstp) - unsigned long cnt = argp->len; - unsigned int nvecs; - -- dprintk("nfsd: WRITE %s %d bytes at %d\n", -+ dprintk("nfsd: WRITE %s %u bytes at %d\n", - SVCFH_fmt(&argp->fh), - argp->len, argp->offset); - -- nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages, -- &argp->first, cnt); -- if (!nvecs) { -- resp->status = nfserr_io; -- goto out; -- } -+ nvecs = svc_fill_write_vector(rqstp, &argp->payload); - - resp->status = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), - argp->offset, rqstp->rq_vec, nvecs, -@@ -248,7 +244,6 @@ nfsd_proc_write(struct svc_rqst *rqstp) - resp->status = fh_getattr(&resp->fh, &resp->stat); - else if (resp->status == nfserr_jukebox) - return rpc_drop_reply; --out: - return rpc_success; - } - -@@ -557,17 +552,16 @@ nfsd_proc_rmdir(struct svc_rqst *rqstp) - - static void nfsd_init_dirlist_pages(struct svc_rqst *rqstp, - struct nfsd_readdirres *resp, -- int count) -+ u32 count) - { - struct xdr_buf *buf = &resp->dirlist; - struct xdr_stream *xdr = &resp->xdr; - -- count = min_t(u32, count, PAGE_SIZE); -- - memset(buf, 0, sizeof(*buf)); - - /* Reserve room for the NULL ptr & eof flag (-2 words) */ -- buf->buflen = count - sizeof(__be32) * 2; -+ buf->buflen = clamp(count, (u32)(XDR_UNIT * 2), (u32)PAGE_SIZE); -+ buf->buflen -= XDR_UNIT * 2; - buf->pages = rqstp->rq_next_page; - rqstp->rq_next_page++; - -@@ -578,7 +572,7 @@ static void nfsd_init_dirlist_pages(struct svc_rqst *rqstp, - xdr->page_ptr = buf->pages; - xdr->iov = NULL; - xdr->p = page_address(*buf->pages); -- xdr->end = xdr->p + (PAGE_SIZE >> 2); -+ xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE); - xdr->rqst = NULL; - } - -diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c -index a06c05fe3b421..26a42f87c2409 100644 ---- a/fs/nfsd/nfsxdr.c -+++ b/fs/nfsd/nfsxdr.c -@@ -325,10 +325,7 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) - { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; - struct nfsd_writeargs *args = rqstp->rq_argp; -- struct kvec *head = rqstp->rq_arg.head; -- struct kvec *tail = rqstp->rq_arg.tail; - u32 beginoffset, totalcount; -- size_t remaining; - - if (!svcxdr_decode_fhandle(xdr, &args->fh)) - return 0; -@@ -346,12 +343,8 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) - return 0; - if (args->len > NFSSVC_MAXBLKSIZE_V2) - return 0; -- remaining = head->iov_len + rqstp->rq_arg.page_len + tail->iov_len; -- remaining -= xdr_stream_pos(xdr); -- if (remaining < xdr_align_size(args->len)) -+ if (!xdr_stream_subsegment(xdr, &args->payload, args->len)) - return 0; -- args->first.iov_base = xdr->p; -- args->first.iov_len = head->iov_len - xdr_stream_pos(xdr); - - return 1; - } -diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h -index 538520957a815..0fc1fa6f28e0b 100644 ---- a/fs/nfsd/trace.h -+++ b/fs/nfsd/trace.h -@@ -319,14 +319,14 @@ TRACE_EVENT(nfsd_export_update, - DECLARE_EVENT_CLASS(nfsd_io_class, - TP_PROTO(struct svc_rqst *rqstp, - struct svc_fh *fhp, -- loff_t offset, -- unsigned long len), -+ u64 offset, -+ u32 len), - TP_ARGS(rqstp, fhp, offset, len), - TP_STRUCT__entry( - __field(u32, xid) - __field(u32, fh_hash) -- __field(loff_t, offset) -- __field(unsigned long, len) -+ __field(u64, offset) -+ __field(u32, len) - ), - TP_fast_assign( - __entry->xid = be32_to_cpu(rqstp->rq_xid); -@@ -334,7 +334,7 @@ DECLARE_EVENT_CLASS(nfsd_io_class, - __entry->offset = offset; - __entry->len = len; - ), -- TP_printk("xid=0x%08x fh_hash=0x%08x offset=%lld len=%lu", -+ TP_printk("xid=0x%08x fh_hash=0x%08x offset=%llu len=%u", - __entry->xid, __entry->fh_hash, - __entry->offset, __entry->len) - ) -@@ -343,8 +343,8 @@ DECLARE_EVENT_CLASS(nfsd_io_class, - DEFINE_EVENT(nfsd_io_class, nfsd_##name, \ - TP_PROTO(struct svc_rqst *rqstp, \ - struct svc_fh *fhp, \ -- loff_t offset, \ -- unsigned long len), \ -+ u64 offset, \ -+ u32 len), \ - TP_ARGS(rqstp, fhp, offset, len)) - - DEFINE_NFSD_IO_EVENT(read_start); -@@ -636,18 +636,10 @@ DEFINE_CLID_EVENT(confirmed_r); - /* - * from fs/nfsd/filecache.h - */ --TRACE_DEFINE_ENUM(NFSD_FILE_HASHED); --TRACE_DEFINE_ENUM(NFSD_FILE_PENDING); --TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_READ); --TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_WRITE); --TRACE_DEFINE_ENUM(NFSD_FILE_REFERENCED); -- - #define show_nf_flags(val) \ - __print_flags(val, "|", \ - { 1 << NFSD_FILE_HASHED, "HASHED" }, \ - { 1 << NFSD_FILE_PENDING, "PENDING" }, \ -- { 1 << NFSD_FILE_BREAK_READ, "BREAK_READ" }, \ -- { 1 << NFSD_FILE_BREAK_WRITE, "BREAK_WRITE" }, \ - { 1 << NFSD_FILE_REFERENCED, "REFERENCED"}) - - DECLARE_EVENT_CLASS(nfsd_file_class, -diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c -index 738d564ca4ce3..abfbb6953e89a 100644 ---- a/fs/nfsd/vfs.c -+++ b/fs/nfsd/vfs.c -@@ -433,6 +433,10 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, - .ia_size = iap->ia_size, - }; - -+ host_err = -EFBIG; -+ if (iap->ia_size < 0) -+ goto out_unlock; ++ return !ctx->eventfd_async || io_wq_current_is_worker(); ++} + - host_err = notify_change(&init_user_ns, dentry, &size_attr, NULL); - if (host_err) - goto out_unlock; -@@ -521,10 +525,11 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos, - { - struct file *src = nf_src->nf_file; - struct file *dst = nf_dst->nf_file; -+ errseq_t since; - loff_t cloned; - __be32 ret = 0; - -- down_write(&nf_dst->nf_rwsem); -+ since = READ_ONCE(dst->f_wb_err); - cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0); - if (cloned < 0) { - ret = nfserrno(cloned); -@@ -538,6 +543,8 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos, - loff_t dst_end = count ? dst_pos + count - 1 : LLONG_MAX; - int status = vfs_fsync_range(dst, dst_pos, dst_end, 0); - -+ if (!status) -+ status = filemap_check_wb_err(dst->f_mapping, since); - if (!status) - status = commit_inode_metadata(file_inode(src)); - if (status < 0) { -@@ -547,13 +554,13 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos, - } - } - out_err: -- up_write(&nf_dst->nf_rwsem); - return ret; - } - - ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, - u64 dst_pos, u64 count) - { -+ ssize_t ret; - - /* - * Limit copy to 4MB to prevent indefinitely blocking an nfsd -@@ -564,7 +571,12 @@ ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, - * limit like this and pipeline multiple COPY requests. - */ - count = min_t(u64, count, 1 << 22); -- return vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0); -+ ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0); ++/* ++ * This should only get called when at least one event has been posted. ++ * Some applications rely on the eventfd notification count only changing ++ * IFF a new CQE has been added to the CQ ring. There's no depedency on ++ * 1:1 relationship between how many times this function is called (and ++ * hence the eventfd count) and number of CQEs posted to the CQ ring. ++ */ ++static void io_cqring_ev_posted(struct io_ring_ctx *ctx) ++{ ++ /* ++ * wake_up_all() may seem excessive, but io_wake_function() and ++ * io_should_wake() handle the termination of the loop and only ++ * wake as many waiters as we need to. ++ */ ++ if (wq_has_sleeper(&ctx->cq_wait)) ++ __wake_up(&ctx->cq_wait, TASK_NORMAL, 0, ++ poll_to_key(EPOLL_URING_WAKE | EPOLLIN)); ++ if (ctx->sq_data && waitqueue_active(&ctx->sq_data->wait)) ++ wake_up(&ctx->sq_data->wait); ++ if (io_should_trigger_evfd(ctx)) ++ eventfd_signal_mask(ctx->cq_ev_fd, 1, EPOLL_URING_WAKE); ++ if (waitqueue_active(&ctx->poll_wait)) ++ __wake_up(&ctx->poll_wait, TASK_INTERRUPTIBLE, 0, ++ poll_to_key(EPOLL_URING_WAKE | EPOLLIN)); ++} + -+ if (ret == -EOPNOTSUPP || ret == -EXDEV) -+ ret = generic_copy_file_range(src, src_pos, dst, dst_pos, -+ count, 0); -+ return ret; - } - - __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp, -@@ -950,6 +962,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, - struct super_block *sb = file_inode(file)->i_sb; - struct svc_export *exp; - struct iov_iter iter; -+ errseq_t since; - __be32 nfserr; - int host_err; - int use_wgather; -@@ -987,21 +1000,22 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, - flags |= RWF_SYNC; - - iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt); -+ since = READ_ONCE(file->f_wb_err); - if (flags & RWF_SYNC) { -- down_write(&nf->nf_rwsem); -+ if (verf) -+ nfsd_copy_boot_verifier(verf, -+ net_generic(SVC_NET(rqstp), -+ nfsd_net_id)); - host_err = vfs_iter_write(file, &iter, &pos, flags); - if (host_err < 0) - nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp), - nfsd_net_id)); -- up_write(&nf->nf_rwsem); - } else { -- down_read(&nf->nf_rwsem); - if (verf) - nfsd_copy_boot_verifier(verf, - net_generic(SVC_NET(rqstp), - nfsd_net_id)); - host_err = vfs_iter_write(file, &iter, &pos, flags); -- up_read(&nf->nf_rwsem); - } - if (host_err < 0) { - nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp), -@@ -1011,6 +1025,9 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, - *cnt = host_err; - nfsd_stats_io_write_add(exp, *cnt); - fsnotify_modify(file); -+ host_err = filemap_check_wb_err(file->f_mapping, since); -+ if (host_err < 0) -+ goto out_nfserr; - - if (stable && use_wgather) { - host_err = wait_for_concurrent_writes(file); -@@ -1091,71 +1108,77 @@ out: - } - - #ifdef CONFIG_NFSD_V3 --static int --nfsd_filemap_write_and_wait_range(struct nfsd_file *nf, loff_t offset, -- loff_t end) --{ -- struct address_space *mapping = nf->nf_file->f_mapping; -- int ret = filemap_fdatawrite_range(mapping, offset, end); -- -- if (ret) -- return ret; -- filemap_fdatawait_range_keep_errors(mapping, offset, end); -- return 0; --} -- --/* -- * Commit all pending writes to stable storage. -+/** -+ * nfsd_commit - Commit pending writes to stable storage -+ * @rqstp: RPC request being processed -+ * @fhp: NFS filehandle -+ * @offset: raw offset from beginning of file -+ * @count: raw count of bytes to sync -+ * @verf: filled in with the server's current write verifier - * -- * Note: we only guarantee that data that lies within the range specified -- * by the 'offset' and 'count' parameters will be synced. -+ * Note: we guarantee that data that lies within the range specified -+ * by the 'offset' and 'count' parameters will be synced. The server -+ * is permitted to sync data that lies outside this range at the -+ * same time. - * - * Unfortunately we cannot lock the file to make sure we return full WCC - * data to the client, as locking happens lower down in the filesystem. -+ * -+ * Return values: -+ * An nfsstat value in network byte order. - */ - __be32 --nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, -- loff_t offset, unsigned long count, __be32 *verf) -+nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, u64 offset, -+ u32 count, __be32 *verf) - { -+ u64 maxbytes; -+ loff_t start, end; -+ struct nfsd_net *nn; - struct nfsd_file *nf; -- loff_t end = LLONG_MAX; -- __be32 err = nfserr_inval; -- -- if (offset < 0) -- goto out; -- if (count != 0) { -- end = offset + (loff_t)count - 1; -- if (end < offset) -- goto out; -- } -+ __be32 err; - - err = nfsd_file_acquire(rqstp, fhp, - NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf); - if (err) - goto out; ++static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx) ++{ ++ /* see waitqueue_active() comment */ ++ smp_mb(); ++ ++ if (ctx->flags & IORING_SETUP_SQPOLL) { ++ if (waitqueue_active(&ctx->cq_wait)) ++ __wake_up(&ctx->cq_wait, TASK_NORMAL, 0, ++ poll_to_key(EPOLL_URING_WAKE | EPOLLIN)); ++ } ++ if (io_should_trigger_evfd(ctx)) ++ eventfd_signal_mask(ctx->cq_ev_fd, 1, EPOLL_URING_WAKE); ++ if (waitqueue_active(&ctx->poll_wait)) ++ __wake_up(&ctx->poll_wait, TASK_INTERRUPTIBLE, 0, ++ poll_to_key(EPOLL_URING_WAKE | EPOLLIN)); ++} ++ ++/* Returns true if there are no backlogged entries after the flush */ ++static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force) ++{ ++ bool all_flushed, posted; ++ ++ if (!force && __io_cqring_events(ctx) == ctx->cq_entries) ++ return false; ++ ++ posted = false; ++ spin_lock(&ctx->completion_lock); ++ while (!list_empty(&ctx->cq_overflow_list)) { ++ struct io_uring_cqe *cqe = io_get_cqe(ctx); ++ struct io_overflow_cqe *ocqe; ++ ++ if (!cqe && !force) ++ break; ++ ocqe = list_first_entry(&ctx->cq_overflow_list, ++ struct io_overflow_cqe, list); ++ if (cqe) ++ memcpy(cqe, &ocqe->cqe, sizeof(*cqe)); ++ else ++ io_account_cq_overflow(ctx); ++ ++ posted = true; ++ list_del(&ocqe->list); ++ kfree(ocqe); ++ } ++ ++ all_flushed = list_empty(&ctx->cq_overflow_list); ++ if (all_flushed) { ++ clear_bit(0, &ctx->check_cq_overflow); ++ WRITE_ONCE(ctx->rings->sq_flags, ++ ctx->rings->sq_flags & ~IORING_SQ_CQ_OVERFLOW); ++ } ++ ++ if (posted) ++ io_commit_cqring(ctx); ++ spin_unlock(&ctx->completion_lock); ++ if (posted) ++ io_cqring_ev_posted(ctx); ++ return all_flushed; ++} ++ ++static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx) ++{ ++ bool ret = true; ++ ++ if (test_bit(0, &ctx->check_cq_overflow)) { ++ /* iopoll syncs against uring_lock, not completion_lock */ ++ if (ctx->flags & IORING_SETUP_IOPOLL) ++ mutex_lock(&ctx->uring_lock); ++ ret = __io_cqring_overflow_flush(ctx, false); ++ if (ctx->flags & IORING_SETUP_IOPOLL) ++ mutex_unlock(&ctx->uring_lock); ++ } ++ ++ return ret; ++} ++ ++/* must to be called somewhat shortly after putting a request */ ++static inline void io_put_task(struct task_struct *task, int nr) ++{ ++ struct io_uring_task *tctx = task->io_uring; ++ ++ if (likely(task == current)) { ++ tctx->cached_refs += nr; ++ } else { ++ percpu_counter_sub(&tctx->inflight, nr); ++ if (unlikely(atomic_read(&tctx->in_idle))) ++ wake_up(&tctx->wait); ++ put_task_struct_many(task, nr); ++ } ++} ++ ++static void io_task_refs_refill(struct io_uring_task *tctx) ++{ ++ unsigned int refill = -tctx->cached_refs + IO_TCTX_REFS_CACHE_NR; ++ ++ percpu_counter_add(&tctx->inflight, refill); ++ refcount_add(refill, ¤t->usage); ++ tctx->cached_refs += refill; ++} ++ ++static inline void io_get_task_refs(int nr) ++{ ++ struct io_uring_task *tctx = current->io_uring; ++ ++ tctx->cached_refs -= nr; ++ if (unlikely(tctx->cached_refs < 0)) ++ io_task_refs_refill(tctx); ++} ++ ++static __cold void io_uring_drop_tctx_refs(struct task_struct *task) ++{ ++ struct io_uring_task *tctx = task->io_uring; ++ unsigned int refs = tctx->cached_refs; ++ ++ if (refs) { ++ tctx->cached_refs = 0; ++ percpu_counter_sub(&tctx->inflight, refs); ++ put_task_struct_many(task, refs); ++ } ++} ++ ++static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data, ++ s32 res, u32 cflags) ++{ ++ struct io_overflow_cqe *ocqe; ++ ++ ocqe = kmalloc(sizeof(*ocqe), GFP_ATOMIC | __GFP_ACCOUNT); ++ if (!ocqe) { ++ /* ++ * If we're in ring overflow flush mode, or in task cancel mode, ++ * or cannot allocate an overflow entry, then we need to drop it ++ * on the floor. ++ */ ++ io_account_cq_overflow(ctx); ++ return false; ++ } ++ if (list_empty(&ctx->cq_overflow_list)) { ++ set_bit(0, &ctx->check_cq_overflow); ++ WRITE_ONCE(ctx->rings->sq_flags, ++ ctx->rings->sq_flags | IORING_SQ_CQ_OVERFLOW); ++ ++ } ++ ocqe->cqe.user_data = user_data; ++ ocqe->cqe.res = res; ++ ocqe->cqe.flags = cflags; ++ list_add_tail(&ocqe->list, &ctx->cq_overflow_list); ++ return true; ++} ++ ++static inline bool __io_fill_cqe(struct io_ring_ctx *ctx, u64 user_data, ++ s32 res, u32 cflags) ++{ ++ struct io_uring_cqe *cqe; ++ ++ trace_io_uring_complete(ctx, user_data, res, cflags); + + /* -+ * Convert the client-provided (offset, count) range to a -+ * (start, end) range. If the client-provided range falls -+ * outside the maximum file size of the underlying FS, -+ * clamp the sync range appropriately. ++ * If we can't get a cq entry, userspace overflowed the ++ * submission (by quite a lot). Increment the overflow count in ++ * the ring. + */ -+ start = 0; -+ end = LLONG_MAX; -+ maxbytes = (u64)fhp->fh_dentry->d_sb->s_maxbytes; -+ if (offset < maxbytes) { -+ start = offset; -+ if (count && (offset + count - 1 < maxbytes)) -+ end = offset + count - 1; ++ cqe = io_get_cqe(ctx); ++ if (likely(cqe)) { ++ WRITE_ONCE(cqe->user_data, user_data); ++ WRITE_ONCE(cqe->res, res); ++ WRITE_ONCE(cqe->flags, cflags); ++ return true; + } ++ return io_cqring_event_overflow(ctx, user_data, res, cflags); ++} + -+ nn = net_generic(nf->nf_net, nfsd_net_id); - if (EX_ISSYNC(fhp->fh_export)) { -- int err2 = nfsd_filemap_write_and_wait_range(nf, offset, end); -+ errseq_t since = READ_ONCE(nf->nf_file->f_wb_err); -+ int err2; - -- down_write(&nf->nf_rwsem); -- if (!err2) -- err2 = vfs_fsync_range(nf->nf_file, offset, end, 0); -+ err2 = vfs_fsync_range(nf->nf_file, start, end, 0); - switch (err2) { - case 0: -- nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net, -- nfsd_net_id)); -+ nfsd_copy_boot_verifier(verf, nn); -+ err2 = filemap_check_wb_err(nf->nf_file->f_mapping, -+ since); -+ err = nfserrno(err2); - break; - case -EINVAL: - err = nfserr_notsupp; - break; - default: -+ nfsd_reset_boot_verifier(nn); - err = nfserrno(err2); -- nfsd_reset_boot_verifier(net_generic(nf->nf_net, -- nfsd_net_id)); - } -- up_write(&nf->nf_rwsem); - } else -- nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net, -- nfsd_net_id)); -+ nfsd_copy_boot_verifier(verf, nn); - - nfsd_file_put(nf); - out: -diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h -index b21b76e6b9a87..3cf5a8a13da50 100644 ---- a/fs/nfsd/vfs.h -+++ b/fs/nfsd/vfs.h -@@ -73,8 +73,8 @@ __be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *, - char *name, int len, struct iattr *attrs, - struct svc_fh *res, int createmode, - u32 *verifier, bool *truncp, bool *created); --__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, -- loff_t, unsigned long, __be32 *verf); -+__be32 nfsd_commit(struct svc_rqst *rqst, struct svc_fh *fhp, -+ u64 offset, u32 count, __be32 *verf); - #endif /* CONFIG_NFSD_V3 */ - #ifdef CONFIG_NFSD_V4 - __be32 nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, -diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h -index f45b4bc93f527..863a35f24910a 100644 ---- a/fs/nfsd/xdr.h -+++ b/fs/nfsd/xdr.h -@@ -32,8 +32,8 @@ struct nfsd_readargs { - struct nfsd_writeargs { - svc_fh fh; - __u32 offset; -- int len; -- struct kvec first; -+ __u32 len; -+ struct xdr_buf payload; - }; - - struct nfsd_createargs { -diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h -index 933008382bbeb..712c117300cb7 100644 ---- a/fs/nfsd/xdr3.h -+++ b/fs/nfsd/xdr3.h -@@ -40,7 +40,7 @@ struct nfsd3_writeargs { - __u32 count; - int stable; - __u32 len; -- struct kvec first; -+ struct xdr_buf payload; - }; - - struct nfsd3_createargs { -diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h -index 3e4052e3bd50e..45257666a6888 100644 ---- a/fs/nfsd/xdr4.h -+++ b/fs/nfsd/xdr4.h -@@ -688,9 +688,10 @@ struct nfsd4_compoundargs { - struct svcxdr_tmpbuf *to_free; - struct svc_rqst *rqstp; - -- u32 taglen; - char * tag; -+ u32 taglen; - u32 minorversion; -+ u32 client_opcnt; - u32 opcnt; - struct nfsd4_op *ops; - struct nfsd4_op iops[8]; -diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c -index 4391fd3abd8f8..e00e184b12615 100644 ---- a/fs/nilfs2/btnode.c -+++ b/fs/nilfs2/btnode.c -@@ -20,6 +20,23 @@ - #include "page.h" - #include "btnode.h" - ++static noinline void io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags) ++{ ++ __io_fill_cqe(req->ctx, req->user_data, res, cflags); ++} + -+/** -+ * nilfs_init_btnc_inode - initialize B-tree node cache inode -+ * @btnc_inode: inode to be initialized -+ * -+ * nilfs_init_btnc_inode() sets up an inode for B-tree node cache. -+ */ -+void nilfs_init_btnc_inode(struct inode *btnc_inode) ++static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, ++ s32 res, u32 cflags) +{ -+ struct nilfs_inode_info *ii = NILFS_I(btnc_inode); ++ ctx->cq_extra++; ++ return __io_fill_cqe(ctx, user_data, res, cflags); ++} + -+ btnc_inode->i_mode = S_IFREG; -+ ii->i_flags = 0; -+ memset(&ii->i_bmap_data, 0, sizeof(struct nilfs_bmap)); -+ mapping_set_gfp_mask(btnc_inode->i_mapping, GFP_NOFS); ++static void io_req_complete_post(struct io_kiocb *req, s32 res, ++ u32 cflags) ++{ ++ struct io_ring_ctx *ctx = req->ctx; ++ ++ spin_lock(&ctx->completion_lock); ++ __io_fill_cqe(ctx, req->user_data, res, cflags); ++ /* ++ * If we're the last reference to this request, add to our locked ++ * free_list cache. ++ */ ++ if (req_ref_put_and_test(req)) { ++ if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) { ++ if (req->flags & IO_DISARM_MASK) ++ io_disarm_next(req); ++ if (req->link) { ++ io_req_task_queue(req->link); ++ req->link = NULL; ++ } ++ } ++ io_dismantle_req(req); ++ io_put_task(req->task, 1); ++ list_add(&req->inflight_entry, &ctx->locked_free_list); ++ ctx->locked_free_nr++; ++ } else { ++ if (!percpu_ref_tryget(&ctx->refs)) ++ req = NULL; ++ } ++ io_commit_cqring(ctx); ++ spin_unlock(&ctx->completion_lock); ++ ++ if (req) { ++ io_cqring_ev_posted(ctx); ++ percpu_ref_put(&ctx->refs); ++ } +} + - void nilfs_btnode_cache_clear(struct address_space *btnc) - { - invalidate_mapping_pages(btnc, 0, -1); -@@ -29,7 +46,7 @@ void nilfs_btnode_cache_clear(struct address_space *btnc) - struct buffer_head * - nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) - { -- struct inode *inode = NILFS_BTNC_I(btnc); -+ struct inode *inode = btnc->host; - struct buffer_head *bh; - - bh = nilfs_grab_buffer(inode, btnc, blocknr, BIT(BH_NILFS_Node)); -@@ -57,7 +74,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, - struct buffer_head **pbh, sector_t *submit_ptr) - { - struct buffer_head *bh; -- struct inode *inode = NILFS_BTNC_I(btnc); -+ struct inode *inode = btnc->host; - struct page *page; - int err; - -@@ -157,7 +174,7 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc, - struct nilfs_btnode_chkey_ctxt *ctxt) - { - struct buffer_head *obh, *nbh; -- struct inode *inode = NILFS_BTNC_I(btnc); -+ struct inode *inode = btnc->host; - __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey; - int err; - -diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h -index 0f88dbc9bcb3e..05ab64d354dc9 100644 ---- a/fs/nilfs2/btnode.h -+++ b/fs/nilfs2/btnode.h -@@ -30,6 +30,7 @@ struct nilfs_btnode_chkey_ctxt { - struct buffer_head *newbh; - }; - -+void nilfs_init_btnc_inode(struct inode *btnc_inode); - void nilfs_btnode_cache_clear(struct address_space *); - struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc, - __u64 blocknr); -diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c -index ab9ec073330f1..2301b57ca17ff 100644 ---- a/fs/nilfs2/btree.c -+++ b/fs/nilfs2/btree.c -@@ -58,7 +58,8 @@ static void nilfs_btree_free_path(struct nilfs_btree_path *path) - static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree, - __u64 ptr, struct buffer_head **bhp) - { -- struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache; -+ struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; -+ struct address_space *btnc = btnc_inode->i_mapping; - struct buffer_head *bh; - - bh = nilfs_btnode_create_block(btnc, ptr); -@@ -470,7 +471,8 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, - struct buffer_head **bhp, - const struct nilfs_btree_readahead_info *ra) - { -- struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache; -+ struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; -+ struct address_space *btnc = btnc_inode->i_mapping; - struct buffer_head *bh, *ra_bh; - sector_t submit_ptr = 0; - int ret; -@@ -1741,6 +1743,10 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key, - dat = nilfs_bmap_get_dat(btree); - } - -+ ret = nilfs_attach_btree_node_cache(&NILFS_BMAP_I(btree)->vfs_inode); -+ if (ret < 0) -+ return ret; ++static inline bool io_req_needs_clean(struct io_kiocb *req) ++{ ++ return req->flags & IO_REQ_CLEAN_FLAGS; ++} + - ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat); - if (ret < 0) - return ret; -@@ -1913,7 +1919,7 @@ static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree, - path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr; - path[level].bp_ctxt.bh = path[level].bp_bh; - ret = nilfs_btnode_prepare_change_key( -- &NILFS_BMAP_I(btree)->i_btnode_cache, -+ NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, - &path[level].bp_ctxt); - if (ret < 0) { - nilfs_dat_abort_update(dat, -@@ -1939,7 +1945,7 @@ static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree, - - if (buffer_nilfs_node(path[level].bp_bh)) { - nilfs_btnode_commit_change_key( -- &NILFS_BMAP_I(btree)->i_btnode_cache, -+ NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, - &path[level].bp_ctxt); - path[level].bp_bh = path[level].bp_ctxt.bh; - } -@@ -1958,7 +1964,7 @@ static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree, - &path[level].bp_newreq.bpr_req); - if (buffer_nilfs_node(path[level].bp_bh)) - nilfs_btnode_abort_change_key( -- &NILFS_BMAP_I(btree)->i_btnode_cache, -+ NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, - &path[level].bp_ctxt); - } - -@@ -2134,7 +2140,8 @@ static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree, - static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree, - struct list_head *listp) - { -- struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache; -+ struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; -+ struct address_space *btcache = btnc_inode->i_mapping; - struct list_head lists[NILFS_BTREE_LEVEL_MAX]; - struct pagevec pvec; - struct buffer_head *bh, *head; -@@ -2188,12 +2195,12 @@ static int nilfs_btree_assign_p(struct nilfs_bmap *btree, - path[level].bp_ctxt.newkey = blocknr; - path[level].bp_ctxt.bh = *bh; - ret = nilfs_btnode_prepare_change_key( -- &NILFS_BMAP_I(btree)->i_btnode_cache, -+ NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, - &path[level].bp_ctxt); - if (ret < 0) - return ret; - nilfs_btnode_commit_change_key( -- &NILFS_BMAP_I(btree)->i_btnode_cache, -+ NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, - &path[level].bp_ctxt); - *bh = path[level].bp_ctxt.bh; - } -@@ -2398,6 +2405,10 @@ int nilfs_btree_init(struct nilfs_bmap *bmap) - - if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode)) - ret = -EIO; ++static inline void io_req_complete_state(struct io_kiocb *req, s32 res, ++ u32 cflags) ++{ ++ if (io_req_needs_clean(req)) ++ io_clean_op(req); ++ req->result = res; ++ req->compl.cflags = cflags; ++ req->flags |= REQ_F_COMPLETE_INLINE; ++} ++ ++static inline void __io_req_complete(struct io_kiocb *req, unsigned issue_flags, ++ s32 res, u32 cflags) ++{ ++ if (issue_flags & IO_URING_F_COMPLETE_DEFER) ++ io_req_complete_state(req, res, cflags); + else -+ ret = nilfs_attach_btree_node_cache( -+ &NILFS_BMAP_I(bmap)->vfs_inode); ++ io_req_complete_post(req, res, cflags); ++} + - return ret; - } - -diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c -index 8bccdf1158fce..1a3d183027b9e 100644 ---- a/fs/nilfs2/dat.c -+++ b/fs/nilfs2/dat.c -@@ -497,7 +497,9 @@ int nilfs_dat_read(struct super_block *sb, size_t entry_size, - di = NILFS_DAT_I(dat); - lockdep_set_class(&di->mi.mi_sem, &dat_lock_key); - nilfs_palloc_setup_cache(dat, &di->palloc_cache); -- nilfs_mdt_setup_shadow_map(dat, &di->shadow); -+ err = nilfs_mdt_setup_shadow_map(dat, &di->shadow); -+ if (err) -+ goto failed; - - err = nilfs_read_inode_common(dat, raw_inode); - if (err) -diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c -index 4483204968568..aadea660c66c9 100644 ---- a/fs/nilfs2/gcinode.c -+++ b/fs/nilfs2/gcinode.c -@@ -126,9 +126,10 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, - int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn, - __u64 vbn, struct buffer_head **out_bh) - { -+ struct inode *btnc_inode = NILFS_I(inode)->i_assoc_inode; - int ret; - -- ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache, -+ ret = nilfs_btnode_submit_block(btnc_inode->i_mapping, - vbn ? : pbn, pbn, REQ_OP_READ, 0, - out_bh, &pbn); - if (ret == -EEXIST) /* internal code (cache hit) */ -@@ -170,7 +171,7 @@ int nilfs_init_gcinode(struct inode *inode) - ii->i_flags = 0; - nilfs_bmap_init_gc(ii->i_bmap); - -- return 0; -+ return nilfs_attach_btree_node_cache(inode); - } - - /** -@@ -185,7 +186,7 @@ void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs) - ii = list_first_entry(head, struct nilfs_inode_info, i_dirty); - list_del_init(&ii->i_dirty); - truncate_inode_pages(&ii->vfs_inode.i_data, 0); -- nilfs_btnode_cache_clear(&ii->i_btnode_cache); -+ nilfs_btnode_cache_clear(ii->i_assoc_inode->i_mapping); - iput(&ii->vfs_inode); - } - } -diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c -index 2e8eb263cf0f6..f4e74fac2c51f 100644 ---- a/fs/nilfs2/inode.c -+++ b/fs/nilfs2/inode.c -@@ -29,12 +29,16 @@ - * @cno: checkpoint number - * @root: pointer on NILFS root object (mounted checkpoint) - * @for_gc: inode for GC flag -+ * @for_btnc: inode for B-tree node cache flag -+ * @for_shadow: inode for shadowed page cache flag - */ - struct nilfs_iget_args { - u64 ino; - __u64 cno; - struct nilfs_root *root; -- int for_gc; -+ bool for_gc; -+ bool for_btnc; -+ bool for_shadow; - }; - - static int nilfs_iget_test(struct inode *inode, void *opaque); -@@ -314,7 +318,8 @@ static int nilfs_insert_inode_locked(struct inode *inode, - unsigned long ino) - { - struct nilfs_iget_args args = { -- .ino = ino, .root = root, .cno = 0, .for_gc = 0 -+ .ino = ino, .root = root, .cno = 0, .for_gc = false, -+ .for_btnc = false, .for_shadow = false - }; - - return insert_inode_locked4(inode, ino, nilfs_iget_test, &args); -@@ -327,6 +332,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) - struct inode *inode; - struct nilfs_inode_info *ii; - struct nilfs_root *root; -+ struct buffer_head *bh; - int err = -ENOMEM; - ino_t ino; - -@@ -342,11 +348,25 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) - ii->i_state = BIT(NILFS_I_NEW); - ii->i_root = root; - -- err = nilfs_ifile_create_inode(root->ifile, &ino, &ii->i_bh); -+ err = nilfs_ifile_create_inode(root->ifile, &ino, &bh); - if (unlikely(err)) - goto failed_ifile_create_inode; - /* reference count of i_bh inherits from nilfs_mdt_read_block() */ - -+ if (unlikely(ino < NILFS_USER_INO)) { -+ nilfs_warn(sb, -+ "inode bitmap is inconsistent for reserved inodes"); -+ do { -+ brelse(bh); -+ err = nilfs_ifile_create_inode(root->ifile, &ino, &bh); -+ if (unlikely(err)) -+ goto failed_ifile_create_inode; -+ } while (ino < NILFS_USER_INO); ++static inline void io_req_complete(struct io_kiocb *req, s32 res) ++{ ++ __io_req_complete(req, 0, res, 0); ++} + -+ nilfs_info(sb, "repaired inode bitmap for reserved inodes"); ++static void io_req_complete_failed(struct io_kiocb *req, s32 res) ++{ ++ req_set_fail(req); ++ io_req_complete_post(req, res, 0); ++} ++ ++static void io_req_complete_fail_submit(struct io_kiocb *req) ++{ ++ /* ++ * We don't submit, fail them all, for that replace hardlinks with ++ * normal links. Extra REQ_F_LINK is tolerated. ++ */ ++ req->flags &= ~REQ_F_HARDLINK; ++ req->flags |= REQ_F_LINK; ++ io_req_complete_failed(req, req->result); ++} ++ ++/* ++ * Don't initialise the fields below on every allocation, but do that in ++ * advance and keep them valid across allocations. ++ */ ++static void io_preinit_req(struct io_kiocb *req, struct io_ring_ctx *ctx) ++{ ++ req->ctx = ctx; ++ req->link = NULL; ++ req->async_data = NULL; ++ /* not necessary, but safer to zero */ ++ req->result = 0; ++} ++ ++static void io_flush_cached_locked_reqs(struct io_ring_ctx *ctx, ++ struct io_submit_state *state) ++{ ++ spin_lock(&ctx->completion_lock); ++ list_splice_init(&ctx->locked_free_list, &state->free_list); ++ ctx->locked_free_nr = 0; ++ spin_unlock(&ctx->completion_lock); ++} ++ ++/* Returns true IFF there are requests in the cache */ ++static bool io_flush_cached_reqs(struct io_ring_ctx *ctx) ++{ ++ struct io_submit_state *state = &ctx->submit_state; ++ int nr; ++ ++ /* ++ * If we have more than a batch's worth of requests in our IRQ side ++ * locked cache, grab the lock and move them over to our submission ++ * side cache. ++ */ ++ if (READ_ONCE(ctx->locked_free_nr) > IO_COMPL_BATCH) ++ io_flush_cached_locked_reqs(ctx, state); ++ ++ nr = state->free_reqs; ++ while (!list_empty(&state->free_list)) { ++ struct io_kiocb *req = list_first_entry(&state->free_list, ++ struct io_kiocb, inflight_entry); ++ ++ list_del(&req->inflight_entry); ++ state->reqs[nr++] = req; ++ if (nr == ARRAY_SIZE(state->reqs)) ++ break; + } -+ ii->i_bh = bh; + - atomic64_inc(&root->inodes_count); - inode_init_owner(&init_user_ns, inode, dir, mode); - inode->i_ino = ino; -@@ -439,6 +459,8 @@ int nilfs_read_inode_common(struct inode *inode, - inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); - inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); - inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); -+ if (nilfs_is_metadata_file_inode(inode) && !S_ISREG(inode->i_mode)) -+ return -EIO; /* this inode is for metadata and corrupted */ - if (inode->i_nlink == 0) - return -ESTALE; /* this inode is deleted */ - -@@ -527,6 +549,19 @@ static int nilfs_iget_test(struct inode *inode, void *opaque) - return 0; - - ii = NILFS_I(inode); -+ if (test_bit(NILFS_I_BTNC, &ii->i_state)) { -+ if (!args->for_btnc) -+ return 0; -+ } else if (args->for_btnc) { -+ return 0; ++ state->free_reqs = nr; ++ return nr != 0; ++} ++ ++/* ++ * A request might get retired back into the request caches even before opcode ++ * handlers and io_issue_sqe() are done with it, e.g. inline completion path. ++ * Because of that, io_alloc_req() should be called only under ->uring_lock ++ * and with extra caution to not get a request that is still worked on. ++ */ ++static struct io_kiocb *io_alloc_req(struct io_ring_ctx *ctx) ++ __must_hold(&ctx->uring_lock) ++{ ++ struct io_submit_state *state = &ctx->submit_state; ++ gfp_t gfp = GFP_KERNEL | __GFP_NOWARN; ++ int ret, i; ++ ++ BUILD_BUG_ON(ARRAY_SIZE(state->reqs) < IO_REQ_ALLOC_BATCH); ++ ++ if (likely(state->free_reqs || io_flush_cached_reqs(ctx))) ++ goto got_req; ++ ++ ret = kmem_cache_alloc_bulk(req_cachep, gfp, IO_REQ_ALLOC_BATCH, ++ state->reqs); ++ ++ /* ++ * Bulk alloc is all-or-nothing. If we fail to get a batch, ++ * retry single alloc to be on the safe side. ++ */ ++ if (unlikely(ret <= 0)) { ++ state->reqs[0] = kmem_cache_alloc(req_cachep, gfp); ++ if (!state->reqs[0]) ++ return NULL; ++ ret = 1; + } -+ if (test_bit(NILFS_I_SHADOW, &ii->i_state)) { -+ if (!args->for_shadow) -+ return 0; -+ } else if (args->for_shadow) { -+ return 0; ++ ++ for (i = 0; i < ret; i++) ++ io_preinit_req(state->reqs[i], ctx); ++ state->free_reqs = ret; ++got_req: ++ state->free_reqs--; ++ return state->reqs[state->free_reqs]; ++} ++ ++static inline void io_put_file(struct file *file) ++{ ++ if (file) ++ fput(file); ++} ++ ++static void io_dismantle_req(struct io_kiocb *req) ++{ ++ unsigned int flags = req->flags; ++ ++ if (io_req_needs_clean(req)) ++ io_clean_op(req); ++ if (!(flags & REQ_F_FIXED_FILE)) ++ io_put_file(req->file); ++ if (req->fixed_rsrc_refs) ++ percpu_ref_put(req->fixed_rsrc_refs); ++ if (req->async_data) { ++ kfree(req->async_data); ++ req->async_data = NULL; + } ++} + - if (!test_bit(NILFS_I_GCINODE, &ii->i_state)) - return !args->for_gc; - -@@ -538,15 +573,17 @@ static int nilfs_iget_set(struct inode *inode, void *opaque) - struct nilfs_iget_args *args = opaque; - - inode->i_ino = args->ino; -- if (args->for_gc) { -+ NILFS_I(inode)->i_cno = args->cno; -+ NILFS_I(inode)->i_root = args->root; -+ if (args->root && args->ino == NILFS_ROOT_INO) -+ nilfs_get_root(args->root); ++static void __io_free_req(struct io_kiocb *req) ++{ ++ struct io_ring_ctx *ctx = req->ctx; + -+ if (args->for_gc) - NILFS_I(inode)->i_state = BIT(NILFS_I_GCINODE); -- NILFS_I(inode)->i_cno = args->cno; -- NILFS_I(inode)->i_root = NULL; -- } else { -- if (args->root && args->ino == NILFS_ROOT_INO) -- nilfs_get_root(args->root); -- NILFS_I(inode)->i_root = args->root; -- } -+ if (args->for_btnc) -+ NILFS_I(inode)->i_state |= BIT(NILFS_I_BTNC); -+ if (args->for_shadow) -+ NILFS_I(inode)->i_state |= BIT(NILFS_I_SHADOW); - return 0; - } - -@@ -554,7 +591,8 @@ struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, - unsigned long ino) - { - struct nilfs_iget_args args = { -- .ino = ino, .root = root, .cno = 0, .for_gc = 0 -+ .ino = ino, .root = root, .cno = 0, .for_gc = false, -+ .for_btnc = false, .for_shadow = false - }; - - return ilookup5(sb, ino, nilfs_iget_test, &args); -@@ -564,7 +602,8 @@ struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, - unsigned long ino) - { - struct nilfs_iget_args args = { -- .ino = ino, .root = root, .cno = 0, .for_gc = 0 -+ .ino = ino, .root = root, .cno = 0, .for_gc = false, -+ .for_btnc = false, .for_shadow = false - }; - - return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); -@@ -595,7 +634,8 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, - __u64 cno) - { - struct nilfs_iget_args args = { -- .ino = ino, .root = NULL, .cno = cno, .for_gc = 1 -+ .ino = ino, .root = NULL, .cno = cno, .for_gc = true, -+ .for_btnc = false, .for_shadow = false - }; - struct inode *inode; - int err; -@@ -615,6 +655,113 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, - return inode; - } - -+/** -+ * nilfs_attach_btree_node_cache - attach a B-tree node cache to the inode -+ * @inode: inode object -+ * -+ * nilfs_attach_btree_node_cache() attaches a B-tree node cache to @inode, -+ * or does nothing if the inode already has it. This function allocates -+ * an additional inode to maintain page cache of B-tree nodes one-on-one. -+ * -+ * Return Value: On success, 0 is returned. On errors, one of the following -+ * negative error code is returned. -+ * -+ * %-ENOMEM - Insufficient memory available. -+ */ -+int nilfs_attach_btree_node_cache(struct inode *inode) ++ io_dismantle_req(req); ++ io_put_task(req->task, 1); ++ ++ spin_lock(&ctx->completion_lock); ++ list_add(&req->inflight_entry, &ctx->locked_free_list); ++ ctx->locked_free_nr++; ++ spin_unlock(&ctx->completion_lock); ++ ++ percpu_ref_put(&ctx->refs); ++} ++ ++static inline void io_remove_next_linked(struct io_kiocb *req) +{ -+ struct nilfs_inode_info *ii = NILFS_I(inode); -+ struct inode *btnc_inode; -+ struct nilfs_iget_args args; ++ struct io_kiocb *nxt = req->link; + -+ if (ii->i_assoc_inode) -+ return 0; ++ req->link = nxt->link; ++ nxt->link = NULL; ++} + -+ args.ino = inode->i_ino; -+ args.root = ii->i_root; -+ args.cno = ii->i_cno; -+ args.for_gc = test_bit(NILFS_I_GCINODE, &ii->i_state) != 0; -+ args.for_btnc = true; -+ args.for_shadow = test_bit(NILFS_I_SHADOW, &ii->i_state) != 0; ++static bool io_kill_linked_timeout(struct io_kiocb *req) ++ __must_hold(&req->ctx->completion_lock) ++ __must_hold(&req->ctx->timeout_lock) ++{ ++ struct io_kiocb *link = req->link; + -+ btnc_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, -+ nilfs_iget_set, &args); -+ if (unlikely(!btnc_inode)) -+ return -ENOMEM; -+ if (btnc_inode->i_state & I_NEW) { -+ nilfs_init_btnc_inode(btnc_inode); -+ unlock_new_inode(btnc_inode); ++ if (link && link->opcode == IORING_OP_LINK_TIMEOUT) { ++ struct io_timeout_data *io = link->async_data; ++ ++ io_remove_next_linked(req); ++ link->timeout.head = NULL; ++ if (hrtimer_try_to_cancel(&io->timer) != -1) { ++ list_del(&link->timeout.list); ++ io_fill_cqe_req(link, -ECANCELED, 0); ++ io_put_req_deferred(link); ++ return true; ++ } + } -+ NILFS_I(btnc_inode)->i_assoc_inode = inode; -+ NILFS_I(btnc_inode)->i_bmap = ii->i_bmap; -+ ii->i_assoc_inode = btnc_inode; ++ return false; ++} + -+ return 0; ++static void io_fail_links(struct io_kiocb *req) ++ __must_hold(&req->ctx->completion_lock) ++{ ++ struct io_kiocb *nxt, *link = req->link; ++ ++ req->link = NULL; ++ while (link) { ++ long res = -ECANCELED; ++ ++ if (link->flags & REQ_F_FAIL) ++ res = link->result; ++ ++ nxt = link->link; ++ link->link = NULL; ++ ++ trace_io_uring_fail_link(req, link); ++ io_fill_cqe_req(link, res, 0); ++ io_put_req_deferred(link); ++ link = nxt; ++ } +} + -+/** -+ * nilfs_detach_btree_node_cache - detach the B-tree node cache from the inode -+ * @inode: inode object -+ * -+ * nilfs_detach_btree_node_cache() detaches the B-tree node cache and its -+ * holder inode bound to @inode, or does nothing if @inode doesn't have it. -+ */ -+void nilfs_detach_btree_node_cache(struct inode *inode) ++static bool io_disarm_next(struct io_kiocb *req) ++ __must_hold(&req->ctx->completion_lock) +{ -+ struct nilfs_inode_info *ii = NILFS_I(inode); -+ struct inode *btnc_inode = ii->i_assoc_inode; ++ bool posted = false; + -+ if (btnc_inode) { -+ NILFS_I(btnc_inode)->i_assoc_inode = NULL; -+ ii->i_assoc_inode = NULL; -+ iput(btnc_inode); ++ if (req->flags & REQ_F_ARM_LTIMEOUT) { ++ struct io_kiocb *link = req->link; ++ ++ req->flags &= ~REQ_F_ARM_LTIMEOUT; ++ if (link && link->opcode == IORING_OP_LINK_TIMEOUT) { ++ io_remove_next_linked(req); ++ io_fill_cqe_req(link, -ECANCELED, 0); ++ io_put_req_deferred(link); ++ posted = true; ++ } ++ } else if (req->flags & REQ_F_LINK_TIMEOUT) { ++ struct io_ring_ctx *ctx = req->ctx; ++ ++ spin_lock_irq(&ctx->timeout_lock); ++ posted = io_kill_linked_timeout(req); ++ spin_unlock_irq(&ctx->timeout_lock); ++ } ++ if (unlikely((req->flags & REQ_F_FAIL) && ++ !(req->flags & REQ_F_HARDLINK))) { ++ posted |= (req->link != NULL); ++ io_fail_links(req); + } ++ return posted; +} + -+/** -+ * nilfs_iget_for_shadow - obtain inode for shadow mapping -+ * @inode: inode object that uses shadow mapping -+ * -+ * nilfs_iget_for_shadow() allocates a pair of inodes that holds page -+ * caches for shadow mapping. The page cache for data pages is set up -+ * in one inode and the one for b-tree node pages is set up in the -+ * other inode, which is attached to the former inode. -+ * -+ * Return Value: On success, a pointer to the inode for data pages is -+ * returned. On errors, one of the following negative error code is returned -+ * in a pointer type. -+ * -+ * %-ENOMEM - Insufficient memory available. -+ */ -+struct inode *nilfs_iget_for_shadow(struct inode *inode) ++static struct io_kiocb *__io_req_find_next(struct io_kiocb *req) +{ -+ struct nilfs_iget_args args = { -+ .ino = inode->i_ino, .root = NULL, .cno = 0, .for_gc = false, -+ .for_btnc = false, .for_shadow = true -+ }; -+ struct inode *s_inode; -+ int err; ++ struct io_kiocb *nxt; + -+ s_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, -+ nilfs_iget_set, &args); -+ if (unlikely(!s_inode)) -+ return ERR_PTR(-ENOMEM); -+ if (!(s_inode->i_state & I_NEW)) -+ return inode; ++ /* ++ * If LINK is set, we have dependent requests in this chain. If we ++ * didn't fail this request, queue the first one up, moving any other ++ * dependencies to the next request. In case of failure, fail the rest ++ * of the chain. ++ */ ++ if (req->flags & IO_DISARM_MASK) { ++ struct io_ring_ctx *ctx = req->ctx; ++ bool posted; + -+ NILFS_I(s_inode)->i_flags = 0; -+ memset(NILFS_I(s_inode)->i_bmap, 0, sizeof(struct nilfs_bmap)); -+ mapping_set_gfp_mask(s_inode->i_mapping, GFP_NOFS); ++ spin_lock(&ctx->completion_lock); ++ posted = io_disarm_next(req); ++ if (posted) ++ io_commit_cqring(req->ctx); ++ spin_unlock(&ctx->completion_lock); ++ if (posted) ++ io_cqring_ev_posted(ctx); ++ } ++ nxt = req->link; ++ req->link = NULL; ++ return nxt; ++} + -+ err = nilfs_attach_btree_node_cache(s_inode); -+ if (unlikely(err)) { -+ iget_failed(s_inode); -+ return ERR_PTR(err); ++static inline struct io_kiocb *io_req_find_next(struct io_kiocb *req) ++{ ++ if (likely(!(req->flags & (REQ_F_LINK|REQ_F_HARDLINK)))) ++ return NULL; ++ return __io_req_find_next(req); ++} ++ ++static void ctx_flush_and_put(struct io_ring_ctx *ctx, bool *locked) ++{ ++ if (!ctx) ++ return; ++ if (*locked) { ++ if (ctx->submit_state.compl_nr) ++ io_submit_flush_completions(ctx); ++ mutex_unlock(&ctx->uring_lock); ++ *locked = false; + } -+ unlock_new_inode(s_inode); -+ return s_inode; ++ percpu_ref_put(&ctx->refs); +} + - void nilfs_write_inode_common(struct inode *inode, - struct nilfs_inode *raw_inode, int has_bmap) - { -@@ -762,7 +909,8 @@ static void nilfs_clear_inode(struct inode *inode) - if (test_bit(NILFS_I_BMAP, &ii->i_state)) - nilfs_bmap_clear(ii->i_bmap); - -- nilfs_btnode_cache_clear(&ii->i_btnode_cache); -+ if (!test_bit(NILFS_I_BTNC, &ii->i_state)) -+ nilfs_detach_btree_node_cache(inode); - - if (ii->i_root && inode->i_ino == NILFS_ROOT_INO) - nilfs_put_root(ii->i_root); -diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c -index 97769fe4d5885..131b5add32eeb 100644 ---- a/fs/nilfs2/mdt.c -+++ b/fs/nilfs2/mdt.c -@@ -470,9 +470,18 @@ int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz) - void nilfs_mdt_clear(struct inode *inode) - { - struct nilfs_mdt_info *mdi = NILFS_MDT(inode); -+ struct nilfs_shadow_map *shadow = mdi->mi_shadow; - - if (mdi->mi_palloc_cache) - nilfs_palloc_destroy_cache(inode); ++static void tctx_task_work(struct callback_head *cb) ++{ ++ bool locked = false; ++ struct io_ring_ctx *ctx = NULL; ++ struct io_uring_task *tctx = container_of(cb, struct io_uring_task, ++ task_work); + -+ if (shadow) { -+ struct inode *s_inode = shadow->inode; ++ while (1) { ++ struct io_wq_work_node *node; ++ ++ if (!tctx->task_list.first && locked && ctx->submit_state.compl_nr) ++ io_submit_flush_completions(ctx); ++ ++ spin_lock_irq(&tctx->task_lock); ++ node = tctx->task_list.first; ++ INIT_WQ_LIST(&tctx->task_list); ++ if (!node) ++ tctx->task_running = false; ++ spin_unlock_irq(&tctx->task_lock); ++ if (!node) ++ break; + -+ shadow->inode = NULL; -+ iput(s_inode); -+ mdi->mi_shadow = NULL; ++ do { ++ struct io_wq_work_node *next = node->next; ++ struct io_kiocb *req = container_of(node, struct io_kiocb, ++ io_task_work.node); ++ ++ if (req->ctx != ctx) { ++ ctx_flush_and_put(ctx, &locked); ++ ctx = req->ctx; ++ /* if not contended, grab and improve batching */ ++ locked = mutex_trylock(&ctx->uring_lock); ++ percpu_ref_get(&ctx->refs); ++ } ++ req->io_task_work.func(req, &locked); ++ node = next; ++ } while (node); ++ ++ cond_resched(); + } - } - - /** -@@ -506,12 +515,15 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode, - struct nilfs_shadow_map *shadow) - { - struct nilfs_mdt_info *mi = NILFS_MDT(inode); -+ struct inode *s_inode; - - INIT_LIST_HEAD(&shadow->frozen_buffers); -- address_space_init_once(&shadow->frozen_data); -- nilfs_mapping_init(&shadow->frozen_data, inode); -- address_space_init_once(&shadow->frozen_btnodes); -- nilfs_mapping_init(&shadow->frozen_btnodes, inode); + -+ s_inode = nilfs_iget_for_shadow(inode); -+ if (IS_ERR(s_inode)) -+ return PTR_ERR(s_inode); ++ ctx_flush_and_put(ctx, &locked); + -+ shadow->inode = s_inode; - mi->mi_shadow = shadow; - return 0; - } -@@ -525,14 +537,15 @@ int nilfs_mdt_save_to_shadow_map(struct inode *inode) - struct nilfs_mdt_info *mi = NILFS_MDT(inode); - struct nilfs_inode_info *ii = NILFS_I(inode); - struct nilfs_shadow_map *shadow = mi->mi_shadow; -+ struct inode *s_inode = shadow->inode; - int ret; - -- ret = nilfs_copy_dirty_pages(&shadow->frozen_data, inode->i_mapping); -+ ret = nilfs_copy_dirty_pages(s_inode->i_mapping, inode->i_mapping); - if (ret) - goto out; - -- ret = nilfs_copy_dirty_pages(&shadow->frozen_btnodes, -- &ii->i_btnode_cache); -+ ret = nilfs_copy_dirty_pages(NILFS_I(s_inode)->i_assoc_inode->i_mapping, -+ ii->i_assoc_inode->i_mapping); - if (ret) - goto out; - -@@ -548,7 +561,7 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh) - struct page *page; - int blkbits = inode->i_blkbits; - -- page = grab_cache_page(&shadow->frozen_data, bh->b_page->index); -+ page = grab_cache_page(shadow->inode->i_mapping, bh->b_page->index); - if (!page) - return -ENOMEM; - -@@ -580,7 +593,7 @@ nilfs_mdt_get_frozen_buffer(struct inode *inode, struct buffer_head *bh) - struct page *page; - int n; - -- page = find_lock_page(&shadow->frozen_data, bh->b_page->index); -+ page = find_lock_page(shadow->inode->i_mapping, bh->b_page->index); - if (page) { - if (page_has_buffers(page)) { - n = bh_offset(bh) >> inode->i_blkbits; -@@ -621,10 +634,11 @@ void nilfs_mdt_restore_from_shadow_map(struct inode *inode) - nilfs_palloc_clear_cache(inode); - - nilfs_clear_dirty_pages(inode->i_mapping, true); -- nilfs_copy_back_pages(inode->i_mapping, &shadow->frozen_data); -+ nilfs_copy_back_pages(inode->i_mapping, shadow->inode->i_mapping); - -- nilfs_clear_dirty_pages(&ii->i_btnode_cache, true); -- nilfs_copy_back_pages(&ii->i_btnode_cache, &shadow->frozen_btnodes); -+ nilfs_clear_dirty_pages(ii->i_assoc_inode->i_mapping, true); -+ nilfs_copy_back_pages(ii->i_assoc_inode->i_mapping, -+ NILFS_I(shadow->inode)->i_assoc_inode->i_mapping); - - nilfs_bmap_restore(ii->i_bmap, &shadow->bmap_store); - -@@ -639,10 +653,11 @@ void nilfs_mdt_clear_shadow_map(struct inode *inode) - { - struct nilfs_mdt_info *mi = NILFS_MDT(inode); - struct nilfs_shadow_map *shadow = mi->mi_shadow; -+ struct inode *shadow_btnc_inode = NILFS_I(shadow->inode)->i_assoc_inode; - - down_write(&mi->mi_sem); - nilfs_release_frozen_buffers(shadow); -- truncate_inode_pages(&shadow->frozen_data, 0); -- truncate_inode_pages(&shadow->frozen_btnodes, 0); -+ truncate_inode_pages(shadow->inode->i_mapping, 0); -+ truncate_inode_pages(shadow_btnc_inode->i_mapping, 0); - up_write(&mi->mi_sem); - } -diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h -index e77aea4bb921c..9d8ac0d27c16e 100644 ---- a/fs/nilfs2/mdt.h -+++ b/fs/nilfs2/mdt.h -@@ -18,14 +18,12 @@ - /** - * struct nilfs_shadow_map - shadow mapping of meta data file - * @bmap_store: shadow copy of bmap state -- * @frozen_data: shadowed dirty data pages -- * @frozen_btnodes: shadowed dirty b-tree nodes' pages -+ * @inode: holder of page caches used in shadow mapping - * @frozen_buffers: list of frozen buffers - */ - struct nilfs_shadow_map { - struct nilfs_bmap_store bmap_store; -- struct address_space frozen_data; -- struct address_space frozen_btnodes; -+ struct inode *inode; - struct list_head frozen_buffers; - }; - -diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h -index 60b21b6eeac06..aceb8aadca148 100644 ---- a/fs/nilfs2/nilfs.h -+++ b/fs/nilfs2/nilfs.h -@@ -28,7 +28,7 @@ - * @i_xattr: <TODO> - * @i_dir_start_lookup: page index of last successful search - * @i_cno: checkpoint number for GC inode -- * @i_btnode_cache: cached pages of b-tree nodes -+ * @i_assoc_inode: associated inode (B-tree node cache holder or back pointer) - * @i_dirty: list for connecting dirty files - * @xattr_sem: semaphore for extended attributes processing - * @i_bh: buffer contains disk inode -@@ -43,7 +43,7 @@ struct nilfs_inode_info { - __u64 i_xattr; /* sector_t ??? */ - __u32 i_dir_start_lookup; - __u64 i_cno; /* check point number for GC inode */ -- struct address_space i_btnode_cache; -+ struct inode *i_assoc_inode; - struct list_head i_dirty; /* List for connecting dirty files */ - - #ifdef CONFIG_NILFS_XATTR -@@ -75,13 +75,6 @@ NILFS_BMAP_I(const struct nilfs_bmap *bmap) - return container_of(bmap, struct nilfs_inode_info, i_bmap_data); - } - --static inline struct inode *NILFS_BTNC_I(struct address_space *btnc) --{ -- struct nilfs_inode_info *ii = -- container_of(btnc, struct nilfs_inode_info, i_btnode_cache); -- return &ii->vfs_inode; --} -- - /* - * Dynamic state flags of NILFS on-memory inode (i_state) - */ -@@ -98,6 +91,8 @@ enum { - NILFS_I_INODE_SYNC, /* dsync is not allowed for inode */ - NILFS_I_BMAP, /* has bmap and btnode_cache */ - NILFS_I_GCINODE, /* inode for GC, on memory only */ -+ NILFS_I_BTNC, /* inode for btree node cache */ -+ NILFS_I_SHADOW, /* inode for shadowed page cache */ - }; - - /* -@@ -203,6 +198,9 @@ static inline int nilfs_acl_chmod(struct inode *inode) - - static inline int nilfs_init_acl(struct inode *inode, struct inode *dir) - { -+ if (S_ISLNK(inode->i_mode)) -+ return 0; ++ /* relaxed read is enough as only the task itself sets ->in_idle */ ++ if (unlikely(atomic_read(&tctx->in_idle))) ++ io_uring_drop_tctx_refs(current); ++} + - inode->i_mode &= ~current_umask(); - return 0; - } -@@ -267,6 +265,9 @@ struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root, - unsigned long ino); - extern struct inode *nilfs_iget_for_gc(struct super_block *sb, - unsigned long ino, __u64 cno); -+int nilfs_attach_btree_node_cache(struct inode *inode); -+void nilfs_detach_btree_node_cache(struct inode *inode); -+struct inode *nilfs_iget_for_shadow(struct inode *inode); - extern void nilfs_update_inode(struct inode *, struct buffer_head *, int); - extern void nilfs_truncate(struct inode *); - extern void nilfs_evict_inode(struct inode *); -diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c -index 171fb5cd427fd..d1a148f0cae33 100644 ---- a/fs/nilfs2/page.c -+++ b/fs/nilfs2/page.c -@@ -448,10 +448,9 @@ void nilfs_mapping_init(struct address_space *mapping, struct inode *inode) - /* - * NILFS2 needs clear_page_dirty() in the following two cases: - * -- * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears -- * page dirty flags when it copies back pages from the shadow cache -- * (gcdat->{i_mapping,i_btnode_cache}) to its original cache -- * (dat->{i_mapping,i_btnode_cache}). -+ * 1) For B-tree node pages and data pages of DAT file, NILFS2 clears dirty -+ * flag of pages when it copies back pages from shadow cache to the -+ * original cache. - * - * 2) Some B-tree operations like insertion or deletion may dispose buffers - * in dirty state, and this needs to cancel the dirty state of their pages. -diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c -index 686c8ee7b29ce..96c5cab5c8ae5 100644 ---- a/fs/nilfs2/segment.c -+++ b/fs/nilfs2/segment.c -@@ -733,15 +733,18 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode, - struct list_head *listp) - { - struct nilfs_inode_info *ii = NILFS_I(inode); -- struct address_space *mapping = &ii->i_btnode_cache; -+ struct inode *btnc_inode = ii->i_assoc_inode; - struct pagevec pvec; - struct buffer_head *bh, *head; - unsigned int i; - pgoff_t index = 0; - -+ if (!btnc_inode) ++static void io_req_task_work_add(struct io_kiocb *req) ++{ ++ struct task_struct *tsk = req->task; ++ struct io_uring_task *tctx = tsk->io_uring; ++ enum task_work_notify_mode notify; ++ struct io_wq_work_node *node; ++ unsigned long flags; ++ bool running; ++ ++ WARN_ON_ONCE(!tctx); ++ ++ spin_lock_irqsave(&tctx->task_lock, flags); ++ wq_list_add_tail(&req->io_task_work.node, &tctx->task_list); ++ running = tctx->task_running; ++ if (!running) ++ tctx->task_running = true; ++ spin_unlock_irqrestore(&tctx->task_lock, flags); ++ ++ /* task_work already pending, we're done */ ++ if (running) ++ return; ++ ++ /* ++ * SQPOLL kernel thread doesn't need notification, just a wakeup. For ++ * all other cases, use TWA_SIGNAL unconditionally to ensure we're ++ * processing task_work. There's no reliable way to tell if TWA_RESUME ++ * will do the job. ++ */ ++ notify = (req->ctx->flags & IORING_SETUP_SQPOLL) ? TWA_NONE : TWA_SIGNAL; ++ if (!task_work_add(tsk, &tctx->task_work, notify)) { ++ wake_up_process(tsk); + return; ++ } ++ ++ spin_lock_irqsave(&tctx->task_lock, flags); ++ tctx->task_running = false; ++ node = tctx->task_list.first; ++ INIT_WQ_LIST(&tctx->task_list); ++ spin_unlock_irqrestore(&tctx->task_lock, flags); ++ ++ while (node) { ++ req = container_of(node, struct io_kiocb, io_task_work.node); ++ node = node->next; ++ if (llist_add(&req->io_task_work.fallback_node, ++ &req->ctx->fallback_llist)) ++ schedule_delayed_work(&req->ctx->fallback_work, 1); ++ } ++} ++ ++static void io_req_task_cancel(struct io_kiocb *req, bool *locked) ++{ ++ struct io_ring_ctx *ctx = req->ctx; ++ ++ /* not needed for normal modes, but SQPOLL depends on it */ ++ io_tw_lock(ctx, locked); ++ io_req_complete_failed(req, req->result); ++} ++ ++static void io_req_task_submit(struct io_kiocb *req, bool *locked) ++{ ++ struct io_ring_ctx *ctx = req->ctx; ++ ++ io_tw_lock(ctx, locked); ++ /* req->task == current here, checking PF_EXITING is safe */ ++ if (likely(!(req->task->flags & PF_EXITING))) ++ __io_queue_sqe(req); ++ else ++ io_req_complete_failed(req, -EFAULT); ++} ++ ++static void io_req_task_queue_fail(struct io_kiocb *req, int ret) ++{ ++ req->result = ret; ++ req->io_task_work.func = io_req_task_cancel; ++ io_req_task_work_add(req); ++} ++ ++static void io_req_task_queue(struct io_kiocb *req) ++{ ++ req->io_task_work.func = io_req_task_submit; ++ io_req_task_work_add(req); ++} + - pagevec_init(&pvec); - -- while (pagevec_lookup_tag(&pvec, mapping, &index, -+ while (pagevec_lookup_tag(&pvec, btnc_inode->i_mapping, &index, - PAGECACHE_TAG_DIRTY)) { - for (i = 0; i < pagevec_count(&pvec); i++) { - bh = head = page_buffers(pvec.pages[i]); -@@ -872,9 +875,11 @@ static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci) - nilfs_mdt_mark_dirty(nilfs->ns_cpfile); - nilfs_cpfile_put_checkpoint( - nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); -- } else -- WARN_ON(err == -EINVAL || err == -ENOENT); -- -+ } else if (err == -EINVAL || err == -ENOENT) { -+ nilfs_error(sci->sc_super, -+ "checkpoint creation failed due to metadata corruption."); -+ err = -EIO; ++static void io_req_task_queue_reissue(struct io_kiocb *req) ++{ ++ req->io_task_work.func = io_queue_async_work; ++ io_req_task_work_add(req); ++} ++ ++static inline void io_queue_next(struct io_kiocb *req) ++{ ++ struct io_kiocb *nxt = io_req_find_next(req); ++ ++ if (nxt) ++ io_req_task_queue(nxt); ++} ++ ++static void io_free_req(struct io_kiocb *req) ++{ ++ io_queue_next(req); ++ __io_free_req(req); ++} ++ ++static void io_free_req_work(struct io_kiocb *req, bool *locked) ++{ ++ io_free_req(req); ++} ++ ++struct req_batch { ++ struct task_struct *task; ++ int task_refs; ++ int ctx_refs; ++}; ++ ++static inline void io_init_req_batch(struct req_batch *rb) ++{ ++ rb->task_refs = 0; ++ rb->ctx_refs = 0; ++ rb->task = NULL; ++} ++ ++static void io_req_free_batch_finish(struct io_ring_ctx *ctx, ++ struct req_batch *rb) ++{ ++ if (rb->ctx_refs) ++ percpu_ref_put_many(&ctx->refs, rb->ctx_refs); ++ if (rb->task) ++ io_put_task(rb->task, rb->task_refs); ++} ++ ++static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req, ++ struct io_submit_state *state) ++{ ++ io_queue_next(req); ++ io_dismantle_req(req); ++ ++ if (req->task != rb->task) { ++ if (rb->task) ++ io_put_task(rb->task, rb->task_refs); ++ rb->task = req->task; ++ rb->task_refs = 0; + } - return err; - } - -@@ -888,7 +893,11 @@ static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) - err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 0, - &raw_cp, &bh_cp); - if (unlikely(err)) { -- WARN_ON(err == -EINVAL || err == -ENOENT); -+ if (err == -EINVAL || err == -ENOENT) { -+ nilfs_error(sci->sc_super, -+ "checkpoint finalization failed due to metadata corruption."); -+ err = -EIO; -+ } - goto failed_ibh; - } - raw_cp->cp_snapshot_list.ssl_next = 0; -@@ -2410,7 +2419,7 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head) - continue; - list_del_init(&ii->i_dirty); - truncate_inode_pages(&ii->vfs_inode.i_data, 0); -- nilfs_btnode_cache_clear(&ii->i_btnode_cache); -+ nilfs_btnode_cache_clear(ii->i_assoc_inode->i_mapping); - iput(&ii->vfs_inode); - } - } -@@ -2783,10 +2792,9 @@ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root) - inode_attach_wb(nilfs->ns_bdev->bd_inode, NULL); - - err = nilfs_segctor_start_thread(nilfs->ns_writer); -- if (err) { -- kfree(nilfs->ns_writer); -- nilfs->ns_writer = NULL; -- } -+ if (unlikely(err)) -+ nilfs_detach_log_writer(sb); ++ rb->task_refs++; ++ rb->ctx_refs++; + - return err; - } - -diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c -index f6b2d280aab5a..2883ab625f61f 100644 ---- a/fs/nilfs2/super.c -+++ b/fs/nilfs2/super.c -@@ -157,7 +157,8 @@ struct inode *nilfs_alloc_inode(struct super_block *sb) - ii->i_bh = NULL; - ii->i_state = 0; - ii->i_cno = 0; -- nilfs_mapping_init(&ii->i_btnode_cache, &ii->vfs_inode); -+ ii->i_assoc_inode = NULL; -+ ii->i_bmap = &ii->i_bmap_data; - return &ii->vfs_inode; - } - -@@ -1377,8 +1378,6 @@ static void nilfs_inode_init_once(void *obj) - #ifdef CONFIG_NILFS_XATTR - init_rwsem(&ii->xattr_sem); - #endif -- address_space_init_once(&ii->i_btnode_cache); -- ii->i_bmap = &ii->i_bmap_data; - inode_init_once(&ii->vfs_inode); - } - -diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c -index 6facdf476255d..84ec851211d91 100644 ---- a/fs/notify/fanotify/fanotify_user.c -+++ b/fs/notify/fanotify/fanotify_user.c -@@ -611,9 +611,6 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, - if (fanotify_is_perm_event(event->mask)) - FANOTIFY_PERM(event)->fd = fd; - -- if (f) -- fd_install(fd, f); -- - if (info_mode) { - ret = copy_info_records_to_user(event, info, info_mode, pidfd, - buf, count); -@@ -621,6 +618,9 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, - goto out_close_fd; - } - -+ if (f) -+ fd_install(fd, f); ++ if (state->free_reqs != ARRAY_SIZE(state->reqs)) ++ state->reqs[state->free_reqs++] = req; ++ else ++ list_add(&req->inflight_entry, &state->free_list); ++} ++ ++static void io_submit_flush_completions(struct io_ring_ctx *ctx) ++ __must_hold(&ctx->uring_lock) ++{ ++ struct io_submit_state *state = &ctx->submit_state; ++ int i, nr = state->compl_nr; ++ struct req_batch rb; ++ ++ spin_lock(&ctx->completion_lock); ++ for (i = 0; i < nr; i++) { ++ struct io_kiocb *req = state->compl_reqs[i]; ++ ++ __io_fill_cqe(ctx, req->user_data, req->result, ++ req->compl.cflags); ++ } ++ io_commit_cqring(ctx); ++ spin_unlock(&ctx->completion_lock); ++ io_cqring_ev_posted(ctx); ++ ++ io_init_req_batch(&rb); ++ for (i = 0; i < nr; i++) { ++ struct io_kiocb *req = state->compl_reqs[i]; ++ ++ if (req_ref_put_and_test(req)) ++ io_req_free_batch(&rb, req, &ctx->submit_state); ++ } ++ ++ io_req_free_batch_finish(ctx, &rb); ++ state->compl_nr = 0; ++} + - return metadata.event_len; - - out_close_fd: -diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c -index 57f0d5d9f934e..3451708fd035c 100644 ---- a/fs/notify/fdinfo.c -+++ b/fs/notify/fdinfo.c -@@ -83,16 +83,9 @@ static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) - inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark); - inode = igrab(fsnotify_conn_inode(mark->connector)); - if (inode) { -- /* -- * IN_ALL_EVENTS represents all of the mask bits -- * that we expose to userspace. There is at -- * least one bit (FS_EVENT_ON_CHILD) which is -- * used only internally to the kernel. -- */ -- u32 mask = mark->mask & IN_ALL_EVENTS; -- seq_printf(m, "inotify wd:%x ino:%lx sdev:%x mask:%x ignored_mask:%x ", -+ seq_printf(m, "inotify wd:%x ino:%lx sdev:%x mask:%x ignored_mask:0 ", - inode_mark->wd, inode->i_ino, inode->i_sb->s_dev, -- mask, mark->ignored_mask); -+ inotify_mark_user_mask(mark)); - show_mark_fhandle(m, inode); - seq_putc(m, '\n'); - iput(inode); -diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h -index 2007e37119160..8f00151eb731f 100644 ---- a/fs/notify/inotify/inotify.h -+++ b/fs/notify/inotify/inotify.h -@@ -22,6 +22,18 @@ static inline struct inotify_event_info *INOTIFY_E(struct fsnotify_event *fse) - return container_of(fse, struct inotify_event_info, fse); - } - +/* -+ * INOTIFY_USER_FLAGS represents all of the mask bits that we expose to -+ * userspace. There is at least one bit (FS_EVENT_ON_CHILD) which is -+ * used only internally to the kernel. ++ * Drop reference to request, return next in chain (if there is one) if this ++ * was the last reference to this request. + */ -+#define INOTIFY_USER_MASK (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK) ++static inline struct io_kiocb *io_put_req_find_next(struct io_kiocb *req) ++{ ++ struct io_kiocb *nxt = NULL; + -+static inline __u32 inotify_mark_user_mask(struct fsnotify_mark *fsn_mark) ++ if (req_ref_put_and_test(req)) { ++ nxt = io_req_find_next(req); ++ __io_free_req(req); ++ } ++ return nxt; ++} ++ ++static inline void io_put_req(struct io_kiocb *req) +{ -+ return fsn_mark->mask & INOTIFY_USER_MASK; ++ if (req_ref_put_and_test(req)) ++ io_free_req(req); +} + - extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, - struct fsnotify_group *group); - extern int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, -diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c -index 62051247f6d21..9fb7701d2f8a0 100644 ---- a/fs/notify/inotify/inotify_user.c -+++ b/fs/notify/inotify/inotify_user.c -@@ -102,7 +102,7 @@ static inline __u32 inotify_arg_to_mask(struct inode *inode, u32 arg) - mask |= FS_EVENT_ON_CHILD; - - /* mask off the flags used to open the fd */ -- mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK)); -+ mask |= (arg & INOTIFY_USER_MASK); - - return mask; - } -diff --git a/fs/notify/mark.c b/fs/notify/mark.c -index fa1d99101f895..bea106fac0901 100644 ---- a/fs/notify/mark.c -+++ b/fs/notify/mark.c -@@ -452,7 +452,7 @@ void fsnotify_free_mark(struct fsnotify_mark *mark) - void fsnotify_destroy_mark(struct fsnotify_mark *mark, - struct fsnotify_group *group) - { -- mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); -+ mutex_lock(&group->mark_mutex); - fsnotify_detach_mark(mark); - mutex_unlock(&group->mark_mutex); - fsnotify_free_mark(mark); -@@ -767,7 +767,7 @@ void fsnotify_clear_marks_by_group(struct fsnotify_group *group, - * move marks to free to to_free list in one go and then free marks in - * to_free list one by one. - */ -- mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); -+ mutex_lock(&group->mark_mutex); - list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) { - if ((1U << mark->connector->type) & type_mask) - list_move(&mark->g_list, &to_free); -@@ -776,7 +776,7 @@ void fsnotify_clear_marks_by_group(struct fsnotify_group *group, - - clear: - while (1) { -- mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); -+ mutex_lock(&group->mark_mutex); - if (list_empty(head)) { - mutex_unlock(&group->mark_mutex); - break; -diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c -index d563abc3e1364..914e991731300 100644 ---- a/fs/ntfs/attrib.c -+++ b/fs/ntfs/attrib.c -@@ -592,8 +592,12 @@ static int ntfs_attr_find(const ATTR_TYPE type, const ntfschar *name, - a = (ATTR_RECORD*)((u8*)ctx->attr + - le32_to_cpu(ctx->attr->length)); - for (;; a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))) { -- if ((u8*)a < (u8*)ctx->mrec || (u8*)a > (u8*)ctx->mrec + -- le32_to_cpu(ctx->mrec->bytes_allocated)) -+ u8 *mrec_end = (u8 *)ctx->mrec + -+ le32_to_cpu(ctx->mrec->bytes_allocated); -+ u8 *name_end = (u8 *)a + le16_to_cpu(a->name_offset) + -+ a->name_length * sizeof(ntfschar); -+ if ((u8*)a < (u8*)ctx->mrec || (u8*)a > mrec_end || -+ name_end > mrec_end) - break; - ctx->attr = a; - if (unlikely(le32_to_cpu(a->type) > le32_to_cpu(type) || -diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c -index ab4f3362466d0..a43adeacd930c 100644 ---- a/fs/ntfs/file.c -+++ b/fs/ntfs/file.c -@@ -1829,7 +1829,7 @@ again: - * pages being swapped out between us bringing them into memory - * and doing the actual copying. - */ -- if (unlikely(iov_iter_fault_in_readable(i, bytes))) { -+ if (unlikely(fault_in_iov_iter_readable(i, bytes))) { - status = -EFAULT; - break; - } -diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c -index 4474adb393ca8..517b71c73aa96 100644 ---- a/fs/ntfs/inode.c -+++ b/fs/ntfs/inode.c -@@ -1881,6 +1881,10 @@ int ntfs_read_inode_mount(struct inode *vi) - } - /* Now allocate memory for the attribute list. */ - ni->attr_list_size = (u32)ntfs_attr_size(a); -+ if (!ni->attr_list_size) { -+ ntfs_error(sb, "Attr_list_size is zero"); -+ goto put_err_out; ++static inline void io_put_req_deferred(struct io_kiocb *req) ++{ ++ if (req_ref_put_and_test(req)) { ++ req->io_task_work.func = io_free_req_work; ++ io_req_task_work_add(req); ++ } ++} ++ ++static unsigned io_cqring_events(struct io_ring_ctx *ctx) ++{ ++ /* See comment at the top of this file */ ++ smp_rmb(); ++ return __io_cqring_events(ctx); ++} ++ ++static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx) ++{ ++ struct io_rings *rings = ctx->rings; ++ ++ /* make sure SQ entry isn't read before tail */ ++ return smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head; ++} ++ ++static unsigned int io_put_kbuf(struct io_kiocb *req, struct io_buffer *kbuf) ++{ ++ unsigned int cflags; ++ ++ cflags = kbuf->bid << IORING_CQE_BUFFER_SHIFT; ++ cflags |= IORING_CQE_F_BUFFER; ++ req->flags &= ~REQ_F_BUFFER_SELECTED; ++ kfree(kbuf); ++ return cflags; ++} ++ ++static inline unsigned int io_put_rw_kbuf(struct io_kiocb *req) ++{ ++ struct io_buffer *kbuf; ++ ++ if (likely(!(req->flags & REQ_F_BUFFER_SELECTED))) ++ return 0; ++ kbuf = (struct io_buffer *) (unsigned long) req->rw.addr; ++ return io_put_kbuf(req, kbuf); ++} ++ ++static inline bool io_run_task_work(void) ++{ ++ if (test_thread_flag(TIF_NOTIFY_SIGNAL) || current->task_works) { ++ __set_current_state(TASK_RUNNING); ++ tracehook_notify_signal(); ++ return true; ++ } ++ ++ return false; ++} ++ ++/* ++ * Find and free completed poll iocbs ++ */ ++static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, ++ struct list_head *done) ++{ ++ struct req_batch rb; ++ struct io_kiocb *req; ++ ++ /* order with ->result store in io_complete_rw_iopoll() */ ++ smp_rmb(); ++ ++ io_init_req_batch(&rb); ++ while (!list_empty(done)) { ++ struct io_uring_cqe *cqe; ++ unsigned cflags; ++ ++ req = list_first_entry(done, struct io_kiocb, inflight_entry); ++ list_del(&req->inflight_entry); ++ cflags = io_put_rw_kbuf(req); ++ (*nr_events)++; ++ ++ cqe = io_get_cqe(ctx); ++ if (cqe) { ++ WRITE_ONCE(cqe->user_data, req->user_data); ++ WRITE_ONCE(cqe->res, req->result); ++ WRITE_ONCE(cqe->flags, cflags); ++ } else { ++ spin_lock(&ctx->completion_lock); ++ io_cqring_event_overflow(ctx, req->user_data, ++ req->result, cflags); ++ spin_unlock(&ctx->completion_lock); + } - ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size); - if (!ni->attr_list) { - ntfs_error(sb, "Not enough memory to allocate buffer " -diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c -index 0d7e948cb29c9..7f69422d5191d 100644 ---- a/fs/ntfs/super.c -+++ b/fs/ntfs/super.c -@@ -2092,7 +2092,8 @@ get_ctx_vol_failed: - // TODO: Initialize security. - /* Get the extended system files' directory inode. */ - vol->extend_ino = ntfs_iget(sb, FILE_Extend); -- if (IS_ERR(vol->extend_ino) || is_bad_inode(vol->extend_ino)) { -+ if (IS_ERR(vol->extend_ino) || is_bad_inode(vol->extend_ino) || -+ !S_ISDIR(vol->extend_ino->i_mode)) { - if (!IS_ERR(vol->extend_ino)) - iput(vol->extend_ino); - ntfs_error(sb, "Failed to load $Extend."); -diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c -index 43b1451bff539..7a678a5b1ca5f 100644 ---- a/fs/ntfs3/file.c -+++ b/fs/ntfs3/file.c -@@ -494,7 +494,7 @@ static int ntfs_truncate(struct inode *inode, loff_t new_size) - - down_write(&ni->file.run_lock); - err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size, -- &new_valid, true, NULL); -+ &new_valid, ni->mi.sbi->options->prealloc, NULL); - up_write(&ni->file.run_lock); - - if (new_valid < ni->i_valid) -@@ -661,7 +661,13 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) - /* - * Normal file: Allocate clusters, do not change 'valid' size. - */ -- err = ntfs_set_size(inode, max(end, i_size)); -+ loff_t new_size = max(end, i_size); + -+ err = inode_newsize_ok(inode, new_size); -+ if (err) -+ goto out; ++ if (req_ref_put_and_test(req)) ++ io_req_free_batch(&rb, req, &ctx->submit_state); ++ } + -+ err = ntfs_set_size(inode, new_size); - if (err) - goto out; - -@@ -761,7 +767,7 @@ int ntfs3_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, - } - inode_dio_wait(inode); - -- if (attr->ia_size < oldsize) -+ if (attr->ia_size <= oldsize) - err = ntfs_truncate(inode, attr->ia_size); - else if (attr->ia_size > oldsize) - err = ntfs_extend(inode, attr->ia_size, 0, NULL); -@@ -989,7 +995,7 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) - frame_vbo = pos & ~(frame_size - 1); - index = frame_vbo >> PAGE_SHIFT; - -- if (unlikely(iov_iter_fault_in_readable(from, bytes))) { -+ if (unlikely(fault_in_iov_iter_readable(from, bytes))) { - err = -EFAULT; - goto out; - } -diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c -index 6f47a9c17f896..18842998c8fa3 100644 ---- a/fs/ntfs3/frecord.c -+++ b/fs/ntfs3/frecord.c -@@ -1964,10 +1964,8 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, - - vcn += clen; - -- if (vbo + bytes >= end) { -+ if (vbo + bytes >= end) - bytes = end - vbo; -- flags |= FIEMAP_EXTENT_LAST; -- } - - if (vbo + bytes <= valid) { - ; -@@ -1977,6 +1975,9 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, - /* vbo < valid && valid < vbo + bytes */ - u64 dlen = valid - vbo; - -+ if (vbo + dlen >= end) -+ flags |= FIEMAP_EXTENT_LAST; ++ io_commit_cqring(ctx); ++ io_cqring_ev_posted_iopoll(ctx); ++ io_req_free_batch_finish(ctx, &rb); ++} + - err = fiemap_fill_next_extent(fieinfo, vbo, lbo, dlen, - flags); - if (err < 0) -@@ -1995,6 +1996,9 @@ int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, - flags |= FIEMAP_EXTENT_UNWRITTEN; - } - -+ if (vbo + bytes >= end) -+ flags |= FIEMAP_EXTENT_LAST; ++static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, ++ long min) ++{ ++ struct io_kiocb *req, *tmp; ++ LIST_HEAD(done); ++ bool spin; + - err = fiemap_fill_next_extent(fieinfo, vbo, lbo, bytes, flags); - if (err < 0) - break; -diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c -index 06492f088d602..614513460b8e0 100644 ---- a/fs/ntfs3/fslog.c -+++ b/fs/ntfs3/fslog.c -@@ -1185,8 +1185,6 @@ static int log_read_rst(struct ntfs_log *log, u32 l_size, bool first, - if (!r_page) - return -ENOMEM; - -- memset(info, 0, sizeof(struct restart_info)); -- - /* Determine which restart area we are looking for. */ - if (first) { - vbo = 0; -@@ -3791,10 +3789,11 @@ int log_replay(struct ntfs_inode *ni, bool *initialized) - if (!log) - return -ENOMEM; - -+ memset(&rst_info, 0, sizeof(struct restart_info)); ++ /* ++ * Only spin for completions if we don't have multiple devices hanging ++ * off our complete list, and we're under the requested amount. ++ */ ++ spin = !ctx->poll_multi_queue && *nr_events < min; + - log->ni = ni; - log->l_size = l_size; - log->one_page_buf = kmalloc(page_size, GFP_NOFS); -- - if (!log->one_page_buf) { - err = -ENOMEM; - goto out; -@@ -3842,6 +3841,7 @@ int log_replay(struct ntfs_inode *ni, bool *initialized) - if (rst_info.vbo) - goto check_restart_area; - -+ memset(&rst_info2, 0, sizeof(struct restart_info)); - err = log_read_rst(log, l_size, false, &rst_info2); - - /* Determine which restart area to use. */ -@@ -4085,8 +4085,10 @@ process_log: - if (client == LFS_NO_CLIENT_LE) { - /* Insert "NTFS" client LogFile. */ - client = ra->client_idx[0]; -- if (client == LFS_NO_CLIENT_LE) -- return -EINVAL; -+ if (client == LFS_NO_CLIENT_LE) { -+ err = -EINVAL; -+ goto out; -+ } - - t16 = le16_to_cpu(client); - cr = ca + t16; -@@ -5055,7 +5057,7 @@ undo_action_next: - goto add_allocated_vcns; - - vcn = le64_to_cpu(lrh->target_vcn); -- vcn &= ~(log->clst_per_page - 1); -+ vcn &= ~(u64)(log->clst_per_page - 1); - - add_allocated_vcns: - for (i = 0, vcn = le64_to_cpu(lrh->target_vcn), -diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c -index 4de9acb169689..24b57c3cc625c 100644 ---- a/fs/ntfs3/fsntfs.c -+++ b/fs/ntfs3/fsntfs.c -@@ -831,10 +831,15 @@ int ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait) - { - int err; - struct super_block *sb = sbi->sb; -- u32 blocksize = sb->s_blocksize; -+ u32 blocksize; - sector_t block1, block2; - u32 bytes; - -+ if (!sb) -+ return -EINVAL; ++ list_for_each_entry_safe(req, tmp, &ctx->iopoll_list, inflight_entry) { ++ struct kiocb *kiocb = &req->rw.kiocb; ++ int ret; + -+ blocksize = sb->s_blocksize; ++ /* ++ * Move completed and retryable entries to our local lists. ++ * If we find a request that requires polling, break out ++ * and complete those lists first, if we have entries there. ++ */ ++ if (READ_ONCE(req->iopoll_completed)) { ++ list_move_tail(&req->inflight_entry, &done); ++ continue; ++ } ++ if (!list_empty(&done)) ++ break; + - if (!(sbi->flags & NTFS_FLAGS_MFTMIRR)) - return 0; - -diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c -index 6f81e3a49abfb..76ebea253fa25 100644 ---- a/fs/ntfs3/index.c -+++ b/fs/ntfs3/index.c -@@ -1994,7 +1994,7 @@ static int indx_free_children(struct ntfs_index *indx, struct ntfs_inode *ni, - const struct NTFS_DE *e, bool trim) - { - int err; -- struct indx_node *n; -+ struct indx_node *n = NULL; - struct INDEX_HDR *hdr; - CLST vbn = de_get_vbn(e); - size_t i; -diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c -index 859951d785cb2..64b4a3c29878c 100644 ---- a/fs/ntfs3/inode.c -+++ b/fs/ntfs3/inode.c -@@ -430,6 +430,7 @@ end_enum: - } else if (fname && fname->home.low == cpu_to_le32(MFT_REC_EXTEND) && - fname->home.seq == cpu_to_le16(MFT_REC_EXTEND)) { - /* Records in $Extend are not a files or general directories. */ -+ inode->i_op = &ntfs_file_inode_operations; - } else { - err = -EINVAL; - goto out; -@@ -757,6 +758,7 @@ static ssize_t ntfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) - loff_t vbo = iocb->ki_pos; - loff_t end; - int wr = iov_iter_rw(iter) & WRITE; -+ size_t iter_count = iov_iter_count(iter); - loff_t valid; - ssize_t ret; - -@@ -770,10 +772,13 @@ static ssize_t ntfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) - wr ? ntfs_get_block_direct_IO_W - : ntfs_get_block_direct_IO_R); - -- if (ret <= 0) -+ if (ret > 0) -+ end = vbo + ret; -+ else if (wr && ret == -EIOCBQUEUED) -+ end = vbo + iter_count; -+ else - goto out; - -- end = vbo + ret; - valid = ni->i_valid; - if (wr) { - if (end > valid && !S_ISBLK(inode->i_mode)) { -@@ -1937,8 +1942,6 @@ const struct inode_operations ntfs_link_inode_operations = { - .setattr = ntfs3_setattr, - .listxattr = ntfs_listxattr, - .permission = ntfs_permission, -- .get_acl = ntfs_get_acl, -- .set_acl = ntfs_set_acl, - }; - - const struct address_space_operations ntfs_aops = { -diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c -index d41d76979e121..f3b88c7e35f73 100644 ---- a/fs/ntfs3/super.c -+++ b/fs/ntfs3/super.c -@@ -30,6 +30,7 @@ - #include <linux/fs_context.h> - #include <linux/fs_parser.h> - #include <linux/log2.h> -+#include <linux/minmax.h> - #include <linux/module.h> - #include <linux/nls.h> - #include <linux/seq_file.h> -@@ -390,7 +391,7 @@ static int ntfs_fs_reconfigure(struct fs_context *fc) - return -EINVAL; - } - -- memcpy(sbi->options, new_opts, sizeof(*new_opts)); -+ swap(sbi->options, fc->fs_private); - - return 0; - } -@@ -668,9 +669,11 @@ static u32 format_size_gb(const u64 bytes, u32 *mb) - - static u32 true_sectors_per_clst(const struct NTFS_BOOT *boot) - { -- return boot->sectors_per_clusters <= 0x80 -- ? boot->sectors_per_clusters -- : (1u << (0 - boot->sectors_per_clusters)); -+ if (boot->sectors_per_clusters <= 0x80) -+ return boot->sectors_per_clusters; -+ if (boot->sectors_per_clusters >= 0xf4) /* limit shift to 2MB max */ -+ return 1U << (0 - boot->sectors_per_clusters); -+ return -EINVAL; - } - - /* -@@ -713,6 +716,8 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, - - /* cluster size: 512, 1K, 2K, 4K, ... 2M */ - sct_per_clst = true_sectors_per_clst(boot); -+ if ((int)sct_per_clst < 0) ++ ret = kiocb->ki_filp->f_op->iopoll(kiocb, spin); ++ if (unlikely(ret < 0)) ++ return ret; ++ else if (ret) ++ spin = false; ++ ++ /* iopoll may have completed current req */ ++ if (READ_ONCE(req->iopoll_completed)) ++ list_move_tail(&req->inflight_entry, &done); ++ } ++ ++ if (!list_empty(&done)) ++ io_iopoll_complete(ctx, nr_events, &done); ++ ++ return 0; ++} ++ ++/* ++ * We can't just wait for polled events to come to us, we have to actively ++ * find and complete them. ++ */ ++static void io_iopoll_try_reap_events(struct io_ring_ctx *ctx) ++{ ++ if (!(ctx->flags & IORING_SETUP_IOPOLL)) ++ return; ++ ++ mutex_lock(&ctx->uring_lock); ++ while (!list_empty(&ctx->iopoll_list)) { ++ unsigned int nr_events = 0; ++ ++ io_do_iopoll(ctx, &nr_events, 0); ++ ++ /* let it sleep and repeat later if can't complete a request */ ++ if (nr_events == 0) ++ break; ++ /* ++ * Ensure we allow local-to-the-cpu processing to take place, ++ * in this case we need to ensure that we reap all events. ++ * Also let task_work, etc. to progress by releasing the mutex ++ */ ++ if (need_resched()) { ++ mutex_unlock(&ctx->uring_lock); ++ cond_resched(); ++ mutex_lock(&ctx->uring_lock); ++ } ++ } ++ mutex_unlock(&ctx->uring_lock); ++} ++ ++static int io_iopoll_check(struct io_ring_ctx *ctx, long min) ++{ ++ unsigned int nr_events = 0; ++ int ret = 0; ++ ++ /* ++ * We disallow the app entering submit/complete with polling, but we ++ * still need to lock the ring to prevent racing with polled issue ++ * that got punted to a workqueue. ++ */ ++ mutex_lock(&ctx->uring_lock); ++ /* ++ * Don't enter poll loop if we already have events pending. ++ * If we do, we can potentially be spinning for commands that ++ * already triggered a CQE (eg in error). ++ */ ++ if (test_bit(0, &ctx->check_cq_overflow)) ++ __io_cqring_overflow_flush(ctx, false); ++ if (io_cqring_events(ctx)) + goto out; - if (!is_power_of_2(sct_per_clst)) - goto out; - -@@ -897,6 +902,8 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) - ref.high = 0; - - sbi->sb = sb; -+ sbi->options = fc->fs_private; -+ fc->fs_private = NULL; - sb->s_flags |= SB_NODIRATIME; - sb->s_magic = 0x7366746e; // "ntfs" - sb->s_op = &ntfs_sops; -@@ -1260,8 +1267,6 @@ load_root: - goto put_inode_out; - } - -- fc->fs_private = NULL; -- - return 0; - - put_inode_out: -@@ -1414,7 +1419,6 @@ static int ntfs_init_fs_context(struct fs_context *fc) - mutex_init(&sbi->compress.mtx_lzx); - #endif - -- sbi->options = opts; - fc->s_fs_info = sbi; - ok: - fc->fs_private = opts; -diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c -index afd0ddad826ff..eb799a5cdfade 100644 ---- a/fs/ntfs3/xattr.c -+++ b/fs/ntfs3/xattr.c -@@ -112,13 +112,13 @@ static int ntfs_read_ea(struct ntfs_inode *ni, struct EA_FULL **ea, - return -ENOMEM; - - if (!size) { -- ; -+ /* EA info persists, but xattr is empty. Looks like EA problem. */ - } else if (attr_ea->non_res) { - struct runs_tree run; - - run_init(&run); - -- err = attr_load_runs(attr_ea, ni, &run, NULL); -+ err = attr_load_runs_range(ni, ATTR_EA, NULL, 0, &run, 0, size); - if (!err) - err = ntfs_read_run_nb(sbi, &run, 0, ea_p, size, NULL); - run_close(&run); -@@ -443,6 +443,11 @@ update_ea: - /* Delete xattr, ATTR_EA */ - ni_remove_attr_le(ni, attr, mi, le); - } else if (attr->non_res) { -+ err = attr_load_runs_range(ni, ATTR_EA, NULL, 0, &ea_run, 0, -+ size); -+ if (err) -+ goto out; ++ do { ++ /* ++ * If a submit got punted to a workqueue, we can have the ++ * application entering polling for a command before it gets ++ * issued. That app will hold the uring_lock for the duration ++ * of the poll right here, so we need to take a breather every ++ * now and then to ensure that the issue has a chance to add ++ * the poll to the issued list. Otherwise we can spin here ++ * forever, while the workqueue is stuck trying to acquire the ++ * very same mutex. ++ */ ++ if (list_empty(&ctx->iopoll_list)) { ++ u32 tail = ctx->cached_cq_tail; + - err = ntfs_sb_write_run(sbi, &ea_run, 0, ea_all, size, 0); - if (err) - goto out; -@@ -476,8 +481,7 @@ out: - } - - #ifdef CONFIG_NTFS3_FS_POSIX_ACL --static struct posix_acl *ntfs_get_acl_ex(struct user_namespace *mnt_userns, -- struct inode *inode, int type, -+static struct posix_acl *ntfs_get_acl_ex(struct inode *inode, int type, - int locked) - { - struct ntfs_inode *ni = ntfs_i(inode); -@@ -512,7 +516,7 @@ static struct posix_acl *ntfs_get_acl_ex(struct user_namespace *mnt_userns, - - /* Translate extended attribute to acl. */ - if (err >= 0) { -- acl = posix_acl_from_xattr(mnt_userns, buf, err); -+ acl = posix_acl_from_xattr(&init_user_ns, buf, err); - } else if (err == -ENODATA) { - acl = NULL; - } else { -@@ -535,37 +539,32 @@ struct posix_acl *ntfs_get_acl(struct inode *inode, int type, bool rcu) - if (rcu) - return ERR_PTR(-ECHILD); - -- /* TODO: init_user_ns? */ -- return ntfs_get_acl_ex(&init_user_ns, inode, type, 0); -+ return ntfs_get_acl_ex(inode, type, 0); - } - - static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns, - struct inode *inode, struct posix_acl *acl, -- int type) -+ int type, bool init_acl) - { - const char *name; - size_t size, name_len; -- void *value = NULL; -- int err = 0; -+ void *value; -+ int err; - int flags; -+ umode_t mode; - - if (S_ISLNK(inode->i_mode)) - return -EOPNOTSUPP; - -+ mode = inode->i_mode; - switch (type) { - case ACL_TYPE_ACCESS: -- if (acl) { -- umode_t mode = inode->i_mode; -- -+ /* Do not change i_mode if we are in init_acl */ -+ if (acl && !init_acl) { - err = posix_acl_update_mode(mnt_userns, inode, &mode, - &acl); - if (err) -- goto out; -- -- if (inode->i_mode != mode) { -- inode->i_mode = mode; -- mark_inode_dirty(inode); -- } -+ return err; - } - name = XATTR_NAME_POSIX_ACL_ACCESS; - name_len = sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1; -@@ -592,7 +591,7 @@ static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns, - value = kmalloc(size, GFP_NOFS); - if (!value) - return -ENOMEM; -- err = posix_acl_to_xattr(mnt_userns, acl, value, size); -+ err = posix_acl_to_xattr(&init_user_ns, acl, value, size); - if (err < 0) - goto out; - flags = 0; -@@ -601,8 +600,13 @@ static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns, - err = ntfs_set_ea(inode, name, name_len, value, size, flags); - if (err == -ENODATA && !size) - err = 0; /* Removing non existed xattr. */ -- if (!err) -+ if (!err) { - set_cached_acl(inode, type, acl); -+ if (inode->i_mode != mode) { -+ inode->i_mode = mode; -+ mark_inode_dirty(inode); ++ mutex_unlock(&ctx->uring_lock); ++ io_run_task_work(); ++ mutex_lock(&ctx->uring_lock); ++ ++ /* some requests don't go through iopoll_list */ ++ if (tail != ctx->cached_cq_tail || ++ list_empty(&ctx->iopoll_list)) ++ break; + } ++ ret = io_do_iopoll(ctx, &nr_events, min); ++ } while (!ret && nr_events < min && !need_resched()); ++out: ++ mutex_unlock(&ctx->uring_lock); ++ return ret; ++} ++ ++static void kiocb_end_write(struct io_kiocb *req) ++{ ++ /* ++ * Tell lockdep we inherited freeze protection from submission ++ * thread. ++ */ ++ if (req->flags & REQ_F_ISREG) { ++ struct super_block *sb = file_inode(req->file)->i_sb; ++ ++ __sb_writers_acquired(sb, SB_FREEZE_WRITE); ++ sb_end_write(sb); + } - - out: - kfree(value); -@@ -616,7 +620,7 @@ out: - int ntfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, - struct posix_acl *acl, int type) - { -- return ntfs_set_acl_ex(mnt_userns, inode, acl, type); -+ return ntfs_set_acl_ex(mnt_userns, inode, acl, type, false); - } - - /* -@@ -636,7 +640,7 @@ int ntfs_init_acl(struct user_namespace *mnt_userns, struct inode *inode, - - if (default_acl) { - err = ntfs_set_acl_ex(mnt_userns, inode, default_acl, -- ACL_TYPE_DEFAULT); -+ ACL_TYPE_DEFAULT, true); - posix_acl_release(default_acl); - } else { - inode->i_default_acl = NULL; -@@ -647,7 +651,7 @@ int ntfs_init_acl(struct user_namespace *mnt_userns, struct inode *inode, - else { - if (!err) - err = ntfs_set_acl_ex(mnt_userns, inode, acl, -- ACL_TYPE_ACCESS); -+ ACL_TYPE_ACCESS, true); - posix_acl_release(acl); - } - -@@ -901,6 +905,9 @@ set_new_fa: - err = ntfs_set_ea(inode, name, name_len, value, size, flags); - - out: -+ inode->i_ctime = current_time(inode); -+ mark_inode_dirty(inode); ++} + - return err; - } - -@@ -981,7 +988,7 @@ static bool ntfs_xattr_user_list(struct dentry *dentry) - } - - // clang-format off --static const struct xattr_handler ntfs_xattr_handler = { -+static const struct xattr_handler ntfs_other_xattr_handler = { - .prefix = "", - .get = ntfs_getxattr, - .set = ntfs_setxattr, -@@ -989,7 +996,11 @@ static const struct xattr_handler ntfs_xattr_handler = { - }; - - const struct xattr_handler *ntfs_xattr_handlers[] = { -- &ntfs_xattr_handler, -+#ifdef CONFIG_NTFS3_FS_POSIX_ACL -+ &posix_acl_access_xattr_handler, -+ &posix_acl_default_xattr_handler, ++#ifdef CONFIG_BLOCK ++static bool io_resubmit_prep(struct io_kiocb *req) ++{ ++ struct io_async_rw *rw = req->async_data; ++ ++ if (!rw) ++ return !io_req_prep_async(req); ++ iov_iter_restore(&rw->iter, &rw->iter_state); ++ return true; ++} ++ ++static bool io_rw_should_reissue(struct io_kiocb *req) ++{ ++ umode_t mode = file_inode(req->file)->i_mode; ++ struct io_ring_ctx *ctx = req->ctx; ++ ++ if (!S_ISBLK(mode) && !S_ISREG(mode)) ++ return false; ++ if ((req->flags & REQ_F_NOWAIT) || (io_wq_current_is_worker() && ++ !(ctx->flags & IORING_SETUP_IOPOLL))) ++ return false; ++ /* ++ * If ref is dying, we might be running poll reap from the exit work. ++ * Don't attempt to reissue from that path, just let it fail with ++ * -EAGAIN. ++ */ ++ if (percpu_ref_is_dying(&ctx->refs)) ++ return false; ++ /* ++ * Play it safe and assume not safe to re-import and reissue if we're ++ * not in the original thread group (or in task context). ++ */ ++ if (!same_thread_group(req->task, current) || !in_task()) ++ return false; ++ return true; ++} ++#else ++static bool io_resubmit_prep(struct io_kiocb *req) ++{ ++ return false; ++} ++static bool io_rw_should_reissue(struct io_kiocb *req) ++{ ++ return false; ++} +#endif -+ &ntfs_other_xattr_handler, - NULL, - }; - // clang-format on -diff --git a/fs/ocfs2/dlmfs/userdlm.c b/fs/ocfs2/dlmfs/userdlm.c -index 29f183a15798e..c1d67c806e1d3 100644 ---- a/fs/ocfs2/dlmfs/userdlm.c -+++ b/fs/ocfs2/dlmfs/userdlm.c -@@ -433,6 +433,11 @@ again: - } - - spin_lock(&lockres->l_lock); -+ if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { -+ spin_unlock(&lockres->l_lock); -+ status = -EAGAIN; -+ goto bail; ++ ++/* ++ * Trigger the notifications after having done some IO, and finish the write ++ * accounting, if any. ++ */ ++static void io_req_io_end(struct io_kiocb *req) ++{ ++ struct io_rw *rw = &req->rw; ++ ++ if (rw->kiocb.ki_flags & IOCB_WRITE) { ++ kiocb_end_write(req); ++ fsnotify_modify(req->file); ++ } else { ++ fsnotify_access(req->file); + } - - /* We only compare against the currently granted level - * here. If the lock is blocked waiting on a downconvert, -@@ -595,7 +600,7 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres) - spin_lock(&lockres->l_lock); - if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { - spin_unlock(&lockres->l_lock); -- return 0; -+ goto bail; - } - - lockres->l_flags |= USER_LOCK_IN_TEARDOWN; -@@ -609,12 +614,17 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres) - } - - if (lockres->l_ro_holders || lockres->l_ex_holders) { -+ lockres->l_flags &= ~USER_LOCK_IN_TEARDOWN; - spin_unlock(&lockres->l_lock); - goto bail; - } - - status = 0; - if (!(lockres->l_flags & USER_LOCK_ATTACHED)) { -+ /* -+ * lock is never requested, leave USER_LOCK_IN_TEARDOWN set -+ * to avoid new lock request coming in. -+ */ - spin_unlock(&lockres->l_lock); - goto bail; - } -@@ -625,6 +635,10 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres) - - status = ocfs2_dlm_unlock(conn, &lockres->l_lksb, DLM_LKF_VALBLK); - if (status) { -+ spin_lock(&lockres->l_lock); -+ lockres->l_flags &= ~USER_LOCK_IN_TEARDOWN; -+ lockres->l_flags &= ~USER_LOCK_BUSY; -+ spin_unlock(&lockres->l_lock); - user_log_dlm_error("ocfs2_dlm_unlock", status, lockres); - goto bail; - } -diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c -index 54d7843c02114..fc5f780fa2355 100644 ---- a/fs/ocfs2/file.c -+++ b/fs/ocfs2/file.c -@@ -476,10 +476,11 @@ int ocfs2_truncate_file(struct inode *inode, - * greater than page size, so we have to truncate them - * anyway. - */ -- unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); -- truncate_inode_pages(inode->i_mapping, new_i_size); - - if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { -+ unmap_mapping_range(inode->i_mapping, -+ new_i_size + PAGE_SIZE - 1, 0, 1); -+ truncate_inode_pages(inode->i_mapping, new_i_size); - status = ocfs2_truncate_inline(inode, di_bh, new_i_size, - i_size_read(inode), 1); - if (status) -@@ -498,6 +499,9 @@ int ocfs2_truncate_file(struct inode *inode, - goto bail_unlock_sem; - } - -+ unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); -+ truncate_inode_pages(inode->i_mapping, new_i_size); ++} + - status = ocfs2_commit_truncate(osb, inode, di_bh); - if (status < 0) { - mlog_errno(status); -diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c -index 2c46ff6ba4ea2..11807034dd483 100644 ---- a/fs/ocfs2/namei.c -+++ b/fs/ocfs2/namei.c -@@ -231,6 +231,7 @@ static int ocfs2_mknod(struct user_namespace *mnt_userns, - handle_t *handle = NULL; - struct ocfs2_super *osb; - struct ocfs2_dinode *dirfe; -+ struct ocfs2_dinode *fe = NULL; - struct buffer_head *new_fe_bh = NULL; - struct inode *inode = NULL; - struct ocfs2_alloc_context *inode_ac = NULL; -@@ -381,6 +382,7 @@ static int ocfs2_mknod(struct user_namespace *mnt_userns, - goto leave; - } - -+ fe = (struct ocfs2_dinode *) new_fe_bh->b_data; - if (S_ISDIR(mode)) { - status = ocfs2_fill_new_dir(osb, handle, dir, inode, - new_fe_bh, data_ac, meta_ac); -@@ -453,8 +455,11 @@ roll_back: - leave: - if (status < 0 && did_quota_inode) - dquot_free_inode(inode); -- if (handle) -+ if (handle) { -+ if (status < 0 && fe) -+ ocfs2_set_links_count(fe, 0); - ocfs2_commit_trans(osb, handle); ++static bool __io_complete_rw_common(struct io_kiocb *req, long res) ++{ ++ if (res != req->result) { ++ if ((res == -EAGAIN || res == -EOPNOTSUPP) && ++ io_rw_should_reissue(req)) { ++ /* ++ * Reissue will start accounting again, finish the ++ * current cycle. ++ */ ++ io_req_io_end(req); ++ req->flags |= REQ_F_REISSUE; ++ return true; ++ } ++ req_set_fail(req); ++ req->result = res; + } - - ocfs2_inode_unlock(dir, 1); - if (did_block_signals) -@@ -631,18 +636,9 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, - return status; - } - -- status = __ocfs2_mknod_locked(dir, inode, dev, new_fe_bh, -+ return __ocfs2_mknod_locked(dir, inode, dev, new_fe_bh, - parent_fe_bh, handle, inode_ac, - fe_blkno, suballoc_loc, suballoc_bit); -- if (status < 0) { -- u64 bg_blkno = ocfs2_which_suballoc_group(fe_blkno, suballoc_bit); -- int tmp = ocfs2_free_suballoc_bits(handle, inode_ac->ac_inode, -- inode_ac->ac_bh, suballoc_bit, bg_blkno, 1); -- if (tmp) -- mlog_errno(tmp); -- } -- -- return status; - } - - static int ocfs2_mkdir(struct user_namespace *mnt_userns, -@@ -2027,8 +2023,11 @@ bail: - ocfs2_clusters_to_bytes(osb->sb, 1)); - if (status < 0 && did_quota_inode) - dquot_free_inode(inode); -- if (handle) -+ if (handle) { -+ if (status < 0 && fe) -+ ocfs2_set_links_count(fe, 0); - ocfs2_commit_trans(osb, handle); ++ return false; ++} ++ ++static inline int io_fixup_rw_res(struct io_kiocb *req, long res) ++{ ++ struct io_async_rw *io = req->async_data; ++ ++ /* add previously done IO, if any */ ++ if (io && io->bytes_done > 0) { ++ if (res < 0) ++ res = io->bytes_done; ++ else ++ res += io->bytes_done; ++ } ++ return res; ++} ++ ++static void io_req_task_complete(struct io_kiocb *req, bool *locked) ++{ ++ unsigned int cflags = io_put_rw_kbuf(req); ++ int res = req->result; ++ ++ if (*locked) { ++ struct io_ring_ctx *ctx = req->ctx; ++ struct io_submit_state *state = &ctx->submit_state; ++ ++ io_req_complete_state(req, res, cflags); ++ state->compl_reqs[state->compl_nr++] = req; ++ if (state->compl_nr == ARRAY_SIZE(state->compl_reqs)) ++ io_submit_flush_completions(ctx); ++ } else { ++ io_req_complete_post(req, res, cflags); ++ } ++} ++ ++static void io_req_rw_complete(struct io_kiocb *req, bool *locked) ++{ ++ io_req_io_end(req); ++ io_req_task_complete(req, locked); ++} ++ ++static void io_complete_rw(struct kiocb *kiocb, long res, long res2) ++{ ++ struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); ++ ++ if (__io_complete_rw_common(req, res)) ++ return; ++ req->result = io_fixup_rw_res(req, res); ++ req->io_task_work.func = io_req_rw_complete; ++ io_req_task_work_add(req); ++} ++ ++static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) ++{ ++ struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); ++ ++ if (kiocb->ki_flags & IOCB_WRITE) ++ kiocb_end_write(req); ++ if (unlikely(res != req->result)) { ++ if (res == -EAGAIN && io_rw_should_reissue(req)) { ++ req->flags |= REQ_F_REISSUE; ++ return; ++ } + } - - ocfs2_inode_unlock(dir, 1); - if (did_block_signals) -diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h -index bb62cc2e0211b..cf21aecdf5476 100644 ---- a/fs/ocfs2/ocfs2.h -+++ b/fs/ocfs2/ocfs2.h -@@ -277,7 +277,6 @@ enum ocfs2_mount_options - OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15, /* Journal Async Commit */ - OCFS2_MOUNT_ERRORS_CONT = 1 << 16, /* Return EIO to the calling process on error */ - OCFS2_MOUNT_ERRORS_ROFS = 1 << 17, /* Change filesystem to read-only on error */ -- OCFS2_MOUNT_NOCLUSTER = 1 << 18, /* No cluster aware filesystem mount */ - }; - - #define OCFS2_OSB_SOFT_RO 0x0001 -@@ -673,8 +672,7 @@ static inline int ocfs2_cluster_o2cb_global_heartbeat(struct ocfs2_super *osb) - - static inline int ocfs2_mount_local(struct ocfs2_super *osb) - { -- return ((osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT) -- || (osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER)); -+ return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT); - } - - static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb) -diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c -index f033de733adb3..effe92c7d6937 100644 ---- a/fs/ocfs2/quota_global.c -+++ b/fs/ocfs2/quota_global.c -@@ -337,7 +337,6 @@ void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex) - /* Read information header from global quota file */ - int ocfs2_global_read_info(struct super_block *sb, int type) - { -- struct inode *gqinode = NULL; - unsigned int ino[OCFS2_MAXQUOTAS] = { USER_QUOTA_SYSTEM_INODE, - GROUP_QUOTA_SYSTEM_INODE }; - struct ocfs2_global_disk_dqinfo dinfo; -@@ -346,29 +345,31 @@ int ocfs2_global_read_info(struct super_block *sb, int type) - u64 pcount; - int status; - -+ oinfo->dqi_gi.dqi_sb = sb; -+ oinfo->dqi_gi.dqi_type = type; -+ ocfs2_qinfo_lock_res_init(&oinfo->dqi_gqlock, oinfo); -+ oinfo->dqi_gi.dqi_entry_size = sizeof(struct ocfs2_global_disk_dqblk); -+ oinfo->dqi_gi.dqi_ops = &ocfs2_global_ops; -+ oinfo->dqi_gqi_bh = NULL; -+ oinfo->dqi_gqi_count = 0; + - /* Read global header */ -- gqinode = ocfs2_get_system_file_inode(OCFS2_SB(sb), ino[type], -+ oinfo->dqi_gqinode = ocfs2_get_system_file_inode(OCFS2_SB(sb), ino[type], - OCFS2_INVALID_SLOT); -- if (!gqinode) { -+ if (!oinfo->dqi_gqinode) { - mlog(ML_ERROR, "failed to get global quota inode (type=%d)\n", - type); - status = -EINVAL; - goto out_err; - } -- oinfo->dqi_gi.dqi_sb = sb; -- oinfo->dqi_gi.dqi_type = type; -- oinfo->dqi_gi.dqi_entry_size = sizeof(struct ocfs2_global_disk_dqblk); -- oinfo->dqi_gi.dqi_ops = &ocfs2_global_ops; -- oinfo->dqi_gqi_bh = NULL; -- oinfo->dqi_gqi_count = 0; -- oinfo->dqi_gqinode = gqinode; ++ WRITE_ONCE(req->result, res); ++ /* order with io_iopoll_complete() checking ->result */ ++ smp_wmb(); ++ WRITE_ONCE(req->iopoll_completed, 1); ++} + - status = ocfs2_lock_global_qf(oinfo, 0); - if (status < 0) { - mlog_errno(status); - goto out_err; - } - -- status = ocfs2_extent_map_get_blocks(gqinode, 0, &oinfo->dqi_giblk, -+ status = ocfs2_extent_map_get_blocks(oinfo->dqi_gqinode, 0, &oinfo->dqi_giblk, - &pcount, NULL); - if (status < 0) - goto out_unlock; -diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c -index 0e4b16d4c037f..b1a8b046f4c22 100644 ---- a/fs/ocfs2/quota_local.c -+++ b/fs/ocfs2/quota_local.c -@@ -702,8 +702,6 @@ static int ocfs2_local_read_info(struct super_block *sb, int type) - info->dqi_priv = oinfo; - oinfo->dqi_type = type; - INIT_LIST_HEAD(&oinfo->dqi_chunk); -- oinfo->dqi_gqinode = NULL; -- ocfs2_qinfo_lock_res_init(&oinfo->dqi_gqlock, oinfo); - oinfo->dqi_rec = NULL; - oinfo->dqi_lqi_bh = NULL; - oinfo->dqi_libh = NULL; -diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c -index 0b0ae3ebb0cf5..da7718cef735e 100644 ---- a/fs/ocfs2/slot_map.c -+++ b/fs/ocfs2/slot_map.c -@@ -252,16 +252,14 @@ static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si, - int i, ret = -ENOSPC; - - if ((preferred >= 0) && (preferred < si->si_num_slots)) { -- if (!si->si_slots[preferred].sl_valid || -- !si->si_slots[preferred].sl_node_num) { -+ if (!si->si_slots[preferred].sl_valid) { - ret = preferred; - goto out; - } - } - - for(i = 0; i < si->si_num_slots; i++) { -- if (!si->si_slots[i].sl_valid || -- !si->si_slots[i].sl_node_num) { -+ if (!si->si_slots[i].sl_valid) { - ret = i; - break; - } -@@ -456,30 +454,24 @@ int ocfs2_find_slot(struct ocfs2_super *osb) - spin_lock(&osb->osb_lock); - ocfs2_update_slot_info(si); - -- if (ocfs2_mount_local(osb)) -- /* use slot 0 directly in local mode */ -- slot = 0; -- else { -- /* search for ourselves first and take the slot if it already -- * exists. Perhaps we need to mark this in a variable for our -- * own journal recovery? Possibly not, though we certainly -- * need to warn to the user */ -- slot = __ocfs2_node_num_to_slot(si, osb->node_num); -+ /* search for ourselves first and take the slot if it already -+ * exists. Perhaps we need to mark this in a variable for our -+ * own journal recovery? Possibly not, though we certainly -+ * need to warn to the user */ -+ slot = __ocfs2_node_num_to_slot(si, osb->node_num); -+ if (slot < 0) { -+ /* if no slot yet, then just take 1st available -+ * one. */ -+ slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); - if (slot < 0) { -- /* if no slot yet, then just take 1st available -- * one. */ -- slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); -- if (slot < 0) { -- spin_unlock(&osb->osb_lock); -- mlog(ML_ERROR, "no free slots available!\n"); -- status = -EINVAL; -- goto bail; -- } -- } else -- printk(KERN_INFO "ocfs2: Slot %d on device (%s) was " -- "already allocated to this node!\n", -- slot, osb->dev_str); -- } -+ spin_unlock(&osb->osb_lock); -+ mlog(ML_ERROR, "no free slots available!\n"); -+ status = -EINVAL; -+ goto bail; ++/* ++ * After the iocb has been issued, it's safe to be found on the poll list. ++ * Adding the kiocb to the list AFTER submission ensures that we don't ++ * find it from a io_do_iopoll() thread before the issuer is done ++ * accessing the kiocb cookie. ++ */ ++static void io_iopoll_req_issued(struct io_kiocb *req) ++{ ++ struct io_ring_ctx *ctx = req->ctx; ++ const bool in_async = io_wq_current_is_worker(); ++ ++ /* workqueue context doesn't hold uring_lock, grab it now */ ++ if (unlikely(in_async)) ++ mutex_lock(&ctx->uring_lock); ++ ++ /* ++ * Track whether we have multiple files in our lists. This will impact ++ * how we do polling eventually, not spinning if we're on potentially ++ * different devices. ++ */ ++ if (list_empty(&ctx->iopoll_list)) { ++ ctx->poll_multi_queue = false; ++ } else if (!ctx->poll_multi_queue) { ++ struct io_kiocb *list_req; ++ unsigned int queue_num0, queue_num1; ++ ++ list_req = list_first_entry(&ctx->iopoll_list, struct io_kiocb, ++ inflight_entry); ++ ++ if (list_req->file != req->file) { ++ ctx->poll_multi_queue = true; ++ } else { ++ queue_num0 = blk_qc_t_to_queue_num(list_req->rw.kiocb.ki_cookie); ++ queue_num1 = blk_qc_t_to_queue_num(req->rw.kiocb.ki_cookie); ++ if (queue_num0 != queue_num1) ++ ctx->poll_multi_queue = true; + } -+ } else -+ printk(KERN_INFO "ocfs2: Slot %d on device (%s) was already " -+ "allocated to this node!\n", slot, osb->dev_str); - - ocfs2_set_slot(si, slot, osb->node_num); - osb->slot_num = slot; -diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c -index 481017e1dac5a..166c8918c825a 100644 ---- a/fs/ocfs2/suballoc.c -+++ b/fs/ocfs2/suballoc.c -@@ -1251,26 +1251,23 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, - { - struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; - struct journal_head *jh; -- int ret = 1; -+ int ret; - - if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap)) - return 0; - -- if (!buffer_jbd(bg_bh)) -+ jh = jbd2_journal_grab_journal_head(bg_bh); -+ if (!jh) - return 1; - -- jbd_lock_bh_journal_head(bg_bh); -- if (buffer_jbd(bg_bh)) { -- jh = bh2jh(bg_bh); -- spin_lock(&jh->b_state_lock); -- bg = (struct ocfs2_group_desc *) jh->b_committed_data; -- if (bg) -- ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); -- else -- ret = 1; -- spin_unlock(&jh->b_state_lock); -- } -- jbd_unlock_bh_journal_head(bg_bh); -+ spin_lock(&jh->b_state_lock); -+ bg = (struct ocfs2_group_desc *) jh->b_committed_data; -+ if (bg) -+ ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); ++ } ++ ++ /* ++ * For fast devices, IO may have already completed. If it has, add ++ * it to the front so we find it first. ++ */ ++ if (READ_ONCE(req->iopoll_completed)) ++ list_add(&req->inflight_entry, &ctx->iopoll_list); + else -+ ret = 1; -+ spin_unlock(&jh->b_state_lock); -+ jbd2_journal_put_journal_head(jh); - - return ret; - } -diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c -index 5c914ce9b3ac9..a03f0cabff0bf 100644 ---- a/fs/ocfs2/super.c -+++ b/fs/ocfs2/super.c -@@ -173,7 +173,6 @@ enum { - Opt_dir_resv_level, - Opt_journal_async_commit, - Opt_err_cont, -- Opt_nocluster, - Opt_err, - }; - -@@ -207,7 +206,6 @@ static const match_table_t tokens = { - {Opt_dir_resv_level, "dir_resv_level=%u"}, - {Opt_journal_async_commit, "journal_async_commit"}, - {Opt_err_cont, "errors=continue"}, -- {Opt_nocluster, "nocluster"}, - {Opt_err, NULL} - }; - -@@ -619,13 +617,6 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) - goto out; - } - -- tmp = OCFS2_MOUNT_NOCLUSTER; -- if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) { -- ret = -EINVAL; -- mlog(ML_ERROR, "Cannot change nocluster option on remount\n"); -- goto out; -- } -- - tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | - OCFS2_MOUNT_HB_NONE; - if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) { -@@ -866,7 +857,6 @@ static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb, - } - - if (ocfs2_userspace_stack(osb) && -- !(osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) && - strncmp(osb->osb_cluster_stack, mopt->cluster_stack, - OCFS2_STACK_LABEL_LEN)) { - mlog(ML_ERROR, -@@ -1106,17 +1096,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) - goto read_super_error; - } - -- root = d_make_root(inode); -- if (!root) { -- status = -ENOMEM; -- mlog_errno(status); -- goto read_super_error; -- } -- -- sb->s_root = root; -- -- ocfs2_complete_mount_recovery(osb); -- - osb->osb_dev_kset = kset_create_and_add(sb->s_id, NULL, - &ocfs2_kset->kobj); - if (!osb->osb_dev_kset) { -@@ -1134,6 +1113,17 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) - goto read_super_error; - } - -+ root = d_make_root(inode); -+ if (!root) { -+ status = -ENOMEM; -+ mlog_errno(status); -+ goto read_super_error; ++ list_add_tail(&req->inflight_entry, &ctx->iopoll_list); ++ ++ if (unlikely(in_async)) { ++ /* ++ * If IORING_SETUP_SQPOLL is enabled, sqes are either handle ++ * in sq thread task context or in io worker task context. If ++ * current task context is sq thread, we don't need to check ++ * whether should wake up sq thread. ++ */ ++ if ((ctx->flags & IORING_SETUP_SQPOLL) && ++ wq_has_sleeper(&ctx->sq_data->wait)) ++ wake_up(&ctx->sq_data->wait); ++ ++ mutex_unlock(&ctx->uring_lock); + } ++} + -+ sb->s_root = root; ++static bool io_bdev_nowait(struct block_device *bdev) ++{ ++ return !bdev || blk_queue_nowait(bdev_get_queue(bdev)); ++} + -+ ocfs2_complete_mount_recovery(osb); ++/* ++ * If we tracked the file through the SCM inflight mechanism, we could support ++ * any file. For now, just ensure that anything potentially problematic is done ++ * inline. ++ */ ++static bool __io_file_supports_nowait(struct file *file, int rw) ++{ ++ umode_t mode = file_inode(file)->i_mode; + - if (ocfs2_mount_local(osb)) - snprintf(nodestr, sizeof(nodestr), "local"); - else -@@ -1145,11 +1135,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) - osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" : - "ordered"); - -- if ((osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) && -- !(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT)) -- printk(KERN_NOTICE "ocfs2: The shared device (%s) is mounted " -- "without cluster aware mode.\n", osb->dev_str); -- - atomic_set(&osb->vol_state, VOLUME_MOUNTED); - wake_up(&osb->osb_mount_event); - -@@ -1456,9 +1441,6 @@ static int ocfs2_parse_options(struct super_block *sb, - case Opt_journal_async_commit: - mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT; - break; -- case Opt_nocluster: -- mopt->mount_opt |= OCFS2_MOUNT_NOCLUSTER; -- break; - default: - mlog(ML_ERROR, - "Unrecognized mount option \"%s\" " -@@ -1570,9 +1552,6 @@ static int ocfs2_show_options(struct seq_file *s, struct dentry *root) - if (opts & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT) - seq_printf(s, ",journal_async_commit"); - -- if (opts & OCFS2_MOUNT_NOCLUSTER) -- seq_printf(s, ",nocluster"); -- - return 0; - } - -diff --git a/fs/open.c b/fs/open.c -index daa324606a41f..5e322f188e839 100644 ---- a/fs/open.c -+++ b/fs/open.c -@@ -32,6 +32,7 @@ - #include <linux/ima.h> - #include <linux/dnotify.h> - #include <linux/compat.h> -+#include <linux/mnt_idmapping.h> - - #include "internal.h" - -@@ -640,7 +641,7 @@ SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode) - - int chown_common(const struct path *path, uid_t user, gid_t group) - { -- struct user_namespace *mnt_userns; -+ struct user_namespace *mnt_userns, *fs_userns; - struct inode *inode = path->dentry->d_inode; - struct inode *delegated_inode = NULL; - int error; -@@ -652,8 +653,9 @@ int chown_common(const struct path *path, uid_t user, gid_t group) - gid = make_kgid(current_user_ns(), group); - - mnt_userns = mnt_user_ns(path->mnt); -- uid = kuid_from_mnt(mnt_userns, uid); -- gid = kgid_from_mnt(mnt_userns, gid); -+ fs_userns = i_user_ns(inode); -+ uid = mapped_kuid_user(mnt_userns, fs_userns, uid); -+ gid = mapped_kgid_user(mnt_userns, fs_userns, gid); - - retry_deleg: - newattrs.ia_valid = ATTR_CTIME; -@@ -784,7 +786,9 @@ static int do_dentry_open(struct file *f, - return 0; - } - -- if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) { -+ if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) { -+ i_readcount_inc(inode); -+ } else if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) { - error = get_write_access(inode); - if (unlikely(error)) - goto cleanup_file; -@@ -824,8 +828,6 @@ static int do_dentry_open(struct file *f, - goto cleanup_all; - } - f->f_mode |= FMODE_OPENED; -- if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) -- i_readcount_inc(inode); - if ((f->f_mode & FMODE_READ) && - likely(f->f_op->read || f->f_op->read_iter)) - f->f_mode |= FMODE_CAN_READ; -@@ -856,8 +858,20 @@ static int do_dentry_open(struct file *f, - * of THPs into the page cache will fail. - */ - smp_mb(); -- if (filemap_nr_thps(inode->i_mapping)) -- truncate_pagecache(inode, 0); -+ if (filemap_nr_thps(inode->i_mapping)) { -+ struct address_space *mapping = inode->i_mapping; ++ if (S_ISBLK(mode)) { ++ if (IS_ENABLED(CONFIG_BLOCK) && ++ io_bdev_nowait(I_BDEV(file->f_mapping->host))) ++ return true; ++ return false; ++ } ++ if (S_ISSOCK(mode)) ++ return true; ++ if (S_ISREG(mode)) { ++ if (IS_ENABLED(CONFIG_BLOCK) && ++ io_bdev_nowait(file->f_inode->i_sb->s_bdev) && ++ file->f_op != &io_uring_fops) ++ return true; ++ return false; ++ } + -+ filemap_invalidate_lock(inode->i_mapping); ++ /* any ->read/write should understand O_NONBLOCK */ ++ if (file->f_flags & O_NONBLOCK) ++ return true; ++ ++ if (!(file->f_mode & FMODE_NOWAIT)) ++ return false; ++ ++ if (rw == READ) ++ return file->f_op->read_iter != NULL; ++ ++ return file->f_op->write_iter != NULL; ++} ++ ++static bool io_file_supports_nowait(struct io_kiocb *req, int rw) ++{ ++ if (rw == READ && (req->flags & REQ_F_NOWAIT_READ)) ++ return true; ++ else if (rw == WRITE && (req->flags & REQ_F_NOWAIT_WRITE)) ++ return true; ++ ++ return __io_file_supports_nowait(req->file, rw); ++} ++ ++static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, ++ int rw) ++{ ++ struct io_ring_ctx *ctx = req->ctx; ++ struct kiocb *kiocb = &req->rw.kiocb; ++ struct file *file = req->file; ++ unsigned ioprio; ++ int ret; ++ ++ if (!io_req_ffs_set(req) && S_ISREG(file_inode(file)->i_mode)) ++ req->flags |= REQ_F_ISREG; ++ ++ kiocb->ki_pos = READ_ONCE(sqe->off); ++ kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp)); ++ kiocb->ki_flags = iocb_flags(kiocb->ki_filp); ++ ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags)); ++ if (unlikely(ret)) ++ return ret; ++ ++ /* ++ * If the file is marked O_NONBLOCK, still allow retry for it if it ++ * supports async. Otherwise it's impossible to use O_NONBLOCK files ++ * reliably. If not, or it IOCB_NOWAIT is set, don't retry. ++ */ ++ if ((kiocb->ki_flags & IOCB_NOWAIT) || ++ ((file->f_flags & O_NONBLOCK) && !io_file_supports_nowait(req, rw))) ++ req->flags |= REQ_F_NOWAIT; ++ ++ ioprio = READ_ONCE(sqe->ioprio); ++ if (ioprio) { ++ ret = ioprio_check_cap(ioprio); ++ if (ret) ++ return ret; ++ ++ kiocb->ki_ioprio = ioprio; ++ } else ++ kiocb->ki_ioprio = get_current_ioprio(); ++ ++ if (ctx->flags & IORING_SETUP_IOPOLL) { ++ if (!(kiocb->ki_flags & IOCB_DIRECT) || ++ !kiocb->ki_filp->f_op->iopoll) ++ return -EOPNOTSUPP; ++ ++ kiocb->ki_flags |= IOCB_HIPRI | IOCB_ALLOC_CACHE; ++ kiocb->ki_complete = io_complete_rw_iopoll; ++ req->iopoll_completed = 0; ++ } else { ++ if (kiocb->ki_flags & IOCB_HIPRI) ++ return -EINVAL; ++ kiocb->ki_complete = io_complete_rw; ++ } ++ ++ /* used for fixed read/write too - just read unconditionally */ ++ req->buf_index = READ_ONCE(sqe->buf_index); ++ req->imu = NULL; ++ ++ if (req->opcode == IORING_OP_READ_FIXED || ++ req->opcode == IORING_OP_WRITE_FIXED) { ++ struct io_ring_ctx *ctx = req->ctx; ++ u16 index; ++ ++ if (unlikely(req->buf_index >= ctx->nr_user_bufs)) ++ return -EFAULT; ++ index = array_index_nospec(req->buf_index, ctx->nr_user_bufs); ++ req->imu = ctx->user_bufs[index]; ++ io_req_set_rsrc_node(req); ++ } ++ ++ req->rw.addr = READ_ONCE(sqe->addr); ++ req->rw.len = READ_ONCE(sqe->len); ++ return 0; ++} ++ ++static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret) ++{ ++ switch (ret) { ++ case -EIOCBQUEUED: ++ break; ++ case -ERESTARTSYS: ++ case -ERESTARTNOINTR: ++ case -ERESTARTNOHAND: ++ case -ERESTART_RESTARTBLOCK: ++ /* ++ * We can't just restart the syscall, since previously ++ * submitted sqes may already be in progress. Just fail this ++ * IO with EINTR. ++ */ ++ ret = -EINTR; ++ fallthrough; ++ default: ++ kiocb->ki_complete(kiocb, ret, 0); ++ } ++} ++ ++static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req) ++{ ++ struct kiocb *kiocb = &req->rw.kiocb; ++ ++ if (kiocb->ki_pos != -1) ++ return &kiocb->ki_pos; ++ ++ if (!(req->file->f_mode & FMODE_STREAM)) { ++ req->flags |= REQ_F_CUR_POS; ++ kiocb->ki_pos = req->file->f_pos; ++ return &kiocb->ki_pos; ++ } ++ ++ kiocb->ki_pos = 0; ++ return NULL; ++} ++ ++static void kiocb_done(struct kiocb *kiocb, ssize_t ret, ++ unsigned int issue_flags) ++{ ++ struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); ++ ++ if (req->flags & REQ_F_CUR_POS) ++ req->file->f_pos = kiocb->ki_pos; ++ if (ret >= 0 && (kiocb->ki_complete == io_complete_rw)) { ++ if (!__io_complete_rw_common(req, ret)) { + /* -+ * unmap_mapping_range just need to be called once -+ * here, because the private pages is not need to be -+ * unmapped mapping (e.g. data segment of dynamic -+ * shared libraries here). ++ * Safe to call io_end from here as we're inline ++ * from the submission path. + */ -+ unmap_mapping_range(mapping, 0, 0, 0); -+ truncate_inode_pages(mapping, 0); -+ filemap_invalidate_unlock(inode->i_mapping); ++ io_req_io_end(req); ++ __io_req_complete(req, issue_flags, ++ io_fixup_rw_res(req, ret), ++ io_put_rw_kbuf(req)); + } - } - - return 0; -@@ -866,10 +880,7 @@ cleanup_all: - if (WARN_ON_ONCE(error > 0)) - error = -EINVAL; - fops_put(f->f_op); -- if (f->f_mode & FMODE_WRITER) { -- put_write_access(inode); -- __mnt_drop_write(f->f_path.mnt); -- } -+ put_file_access(f); - cleanup_file: - path_put(&f->f_path); - f->f_path.mnt = NULL; -diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c -index fe484cf93e5cd..8bbe9486e3a62 100644 ---- a/fs/orangefs/dcache.c -+++ b/fs/orangefs/dcache.c -@@ -26,8 +26,10 @@ static int orangefs_revalidate_lookup(struct dentry *dentry) - gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: attempting lookup.\n", __func__); - - new_op = op_alloc(ORANGEFS_VFS_OP_LOOKUP); -- if (!new_op) -+ if (!new_op) { -+ ret = -ENOMEM; - goto out_put_parent; ++ } else { ++ io_rw_done(kiocb, ret); + } - - new_op->upcall.req.lookup.sym_follow = ORANGEFS_LOOKUP_LINK_NO_FOLLOW; - new_op->upcall.req.lookup.parent_refn = parent->refn; -diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c -index 538e839590ef5..b501dc07f9222 100644 ---- a/fs/orangefs/orangefs-bufmap.c -+++ b/fs/orangefs/orangefs-bufmap.c -@@ -176,7 +176,7 @@ orangefs_bufmap_free(struct orangefs_bufmap *bufmap) - { - kfree(bufmap->page_array); - kfree(bufmap->desc_array); -- kfree(bufmap->buffer_index_array); -+ bitmap_free(bufmap->buffer_index_array); - kfree(bufmap); - } - -@@ -226,8 +226,7 @@ orangefs_bufmap_alloc(struct ORANGEFS_dev_map_desc *user_desc) - bufmap->desc_size = user_desc->size; - bufmap->desc_shift = ilog2(bufmap->desc_size); - -- bufmap->buffer_index_array = -- kzalloc(DIV_ROUND_UP(bufmap->desc_count, BITS_PER_LONG), GFP_KERNEL); -+ bufmap->buffer_index_array = bitmap_zalloc(bufmap->desc_count, GFP_KERNEL); - if (!bufmap->buffer_index_array) - goto out_free_bufmap; - -@@ -250,7 +249,7 @@ orangefs_bufmap_alloc(struct ORANGEFS_dev_map_desc *user_desc) - out_free_desc_array: - kfree(bufmap->desc_array); - out_free_index_array: -- kfree(bufmap->buffer_index_array); -+ bitmap_free(bufmap->buffer_index_array); - out_free_bufmap: - kfree(bufmap); - out: -diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c -index 4e7d5bfa2949f..e040970408d4f 100644 ---- a/fs/overlayfs/copy_up.c -+++ b/fs/overlayfs/copy_up.c -@@ -140,12 +140,14 @@ static int ovl_copy_fileattr(struct inode *inode, struct path *old, - int err; - - err = ovl_real_fileattr_get(old, &oldfa); -- if (err) -- return err; -- -- err = ovl_real_fileattr_get(new, &newfa); -- if (err) -+ if (err) { -+ /* Ntfs-3g returns -EINVAL for "no fileattr support" */ -+ if (err == -ENOTTY || err == -EINVAL) -+ return 0; -+ pr_warn("failed to retrieve lower fileattr (%pd2, err=%i)\n", -+ old->dentry, err); - return err; ++ ++ if (req->flags & REQ_F_REISSUE) { ++ req->flags &= ~REQ_F_REISSUE; ++ if (io_resubmit_prep(req)) { ++ io_req_task_queue_reissue(req); ++ } else { ++ unsigned int cflags = io_put_rw_kbuf(req); ++ struct io_ring_ctx *ctx = req->ctx; ++ ++ ret = io_fixup_rw_res(req, ret); ++ req_set_fail(req); ++ if (!(issue_flags & IO_URING_F_NONBLOCK)) { ++ mutex_lock(&ctx->uring_lock); ++ __io_req_complete(req, issue_flags, ret, cflags); ++ mutex_unlock(&ctx->uring_lock); ++ } else { ++ __io_req_complete(req, issue_flags, ret, cflags); ++ } ++ } + } - - /* - * We cannot set immutable and append-only flags on upper inode, -@@ -155,10 +157,31 @@ static int ovl_copy_fileattr(struct inode *inode, struct path *old, - */ - if (oldfa.flags & OVL_PROT_FS_FLAGS_MASK) { - err = ovl_set_protattr(inode, new->dentry, &oldfa); -- if (err) -+ if (err == -EPERM) -+ pr_warn_once("copying fileattr: no xattr on upper\n"); -+ else if (err) - return err; - } - -+ /* Don't bother copying flags if none are set */ -+ if (!(oldfa.flags & OVL_COPY_FS_FLAGS_MASK)) -+ return 0; ++} + -+ err = ovl_real_fileattr_get(new, &newfa); -+ if (err) { ++static int __io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter, ++ struct io_mapped_ubuf *imu) ++{ ++ size_t len = req->rw.len; ++ u64 buf_end, buf_addr = req->rw.addr; ++ size_t offset; ++ ++ if (unlikely(check_add_overflow(buf_addr, (u64)len, &buf_end))) ++ return -EFAULT; ++ /* not inside the mapped region */ ++ if (unlikely(buf_addr < imu->ubuf || buf_end > imu->ubuf_end)) ++ return -EFAULT; ++ ++ /* ++ * May not be a start of buffer, set size appropriately ++ * and advance us to the beginning. ++ */ ++ offset = buf_addr - imu->ubuf; ++ iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len); ++ ++ if (offset) { + /* -+ * Returning an error if upper doesn't support fileattr will -+ * result in a regression, so revert to the old behavior. ++ * Don't use iov_iter_advance() here, as it's really slow for ++ * using the latter parts of a big fixed buffer - it iterates ++ * over each segment manually. We can cheat a bit here, because ++ * we know that: ++ * ++ * 1) it's a BVEC iter, we set it up ++ * 2) all bvecs are PAGE_SIZE in size, except potentially the ++ * first and last bvec ++ * ++ * So just find our index, and adjust the iterator afterwards. ++ * If the offset is within the first bvec (or the whole first ++ * bvec, just use iov_iter_advance(). This makes it easier ++ * since we can just skip the first segment, which may not ++ * be PAGE_SIZE aligned. + */ -+ if (err == -ENOTTY || err == -EINVAL) { -+ pr_warn_once("copying fileattr: no support on upper\n"); -+ return 0; ++ const struct bio_vec *bvec = imu->bvec; ++ ++ if (offset <= bvec->bv_len) { ++ iov_iter_advance(iter, offset); ++ } else { ++ unsigned long seg_skip; ++ ++ /* skip first vec */ ++ offset -= bvec->bv_len; ++ seg_skip = 1 + (offset >> PAGE_SHIFT); ++ ++ iter->bvec = bvec + seg_skip; ++ iter->nr_segs -= seg_skip; ++ iter->count -= bvec->bv_len + offset; ++ iter->iov_offset = offset & ~PAGE_MASK; + } -+ pr_warn("failed to retrieve upper fileattr (%pd2, err=%i)\n", -+ new->dentry, err); -+ return err; + } + - BUILD_BUG_ON(OVL_COPY_FS_FLAGS_MASK & ~FS_COMMON_FL); - newfa.flags &= ~OVL_COPY_FS_FLAGS_MASK; - newfa.flags |= (oldfa.flags & OVL_COPY_FS_FLAGS_MASK); -diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c -index 93c7c267de934..f18490813170a 100644 ---- a/fs/overlayfs/dir.c -+++ b/fs/overlayfs/dir.c -@@ -137,8 +137,7 @@ kill_whiteout: - goto out; - } - --static int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry, -- umode_t mode) -+int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry, umode_t mode) - { - int err; - struct dentry *d, *dentry = *newdentry; -diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c -index ebde05c9cf62e..dbb944b5f81e5 100644 ---- a/fs/overlayfs/export.c -+++ b/fs/overlayfs/export.c -@@ -259,7 +259,7 @@ static int ovl_encode_fh(struct inode *inode, u32 *fid, int *max_len, - return FILEID_INVALID; - - dentry = d_find_any_alias(inode); -- if (WARN_ON(!dentry)) -+ if (!dentry) - return FILEID_INVALID; - - bytes = ovl_dentry_to_fid(ofs, dentry, fid, buflen); -diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c -index c88ac571593dc..44fea16751f1d 100644 ---- a/fs/overlayfs/file.c -+++ b/fs/overlayfs/file.c -@@ -17,6 +17,7 @@ - - struct ovl_aio_req { - struct kiocb iocb; -+ refcount_t ref; - struct kiocb *orig_iocb; - struct fd fd; - }; -@@ -252,6 +253,14 @@ static rwf_t ovl_iocb_to_rwf(int ifl) - return flags; - } - -+static inline void ovl_aio_put(struct ovl_aio_req *aio_req) ++ return 0; ++} ++ ++static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter) +{ -+ if (refcount_dec_and_test(&aio_req->ref)) { -+ fdput(aio_req->fd); -+ kmem_cache_free(ovl_aio_request_cachep, aio_req); -+ } ++ if (WARN_ON_ONCE(!req->imu)) ++ return -EFAULT; ++ return __io_import_fixed(req, rw, iter, req->imu); +} + - static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req) - { - struct kiocb *iocb = &aio_req->iocb; -@@ -268,8 +277,7 @@ static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req) - } - - orig_iocb->ki_pos = iocb->ki_pos; -- fdput(aio_req->fd); -- kmem_cache_free(ovl_aio_request_cachep, aio_req); -+ ovl_aio_put(aio_req); - } - - static void ovl_aio_rw_complete(struct kiocb *iocb, long res, long res2) -@@ -319,7 +327,9 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) - aio_req->orig_iocb = iocb; - kiocb_clone(&aio_req->iocb, iocb, real.file); - aio_req->iocb.ki_complete = ovl_aio_rw_complete; -+ refcount_set(&aio_req->ref, 2); - ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter); -+ ovl_aio_put(aio_req); - if (ret != -EIOCBQUEUED) - ovl_aio_cleanup_handler(aio_req); - } -@@ -390,7 +400,9 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) - kiocb_clone(&aio_req->iocb, iocb, real.file); - aio_req->iocb.ki_flags = ifl; - aio_req->iocb.ki_complete = ovl_aio_rw_complete; -+ refcount_set(&aio_req->ref, 2); - ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter); -+ ovl_aio_put(aio_req); - if (ret != -EIOCBQUEUED) - ovl_aio_cleanup_handler(aio_req); - } -diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c -index 832b17589733a..1f36158c7dbe2 100644 ---- a/fs/overlayfs/inode.c -+++ b/fs/overlayfs/inode.c -@@ -610,7 +610,10 @@ int ovl_real_fileattr_get(struct path *realpath, struct fileattr *fa) - if (err) - return err; - -- return vfs_fileattr_get(realpath->dentry, fa); -+ err = vfs_fileattr_get(realpath->dentry, fa); -+ if (err == -ENOIOCTLCMD) -+ err = -ENOTTY; -+ return err; - } - - int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa) -diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h -index 3894f33479552..2cd5741c873b6 100644 ---- a/fs/overlayfs/overlayfs.h -+++ b/fs/overlayfs/overlayfs.h -@@ -570,6 +570,7 @@ struct ovl_cattr { - - #define OVL_CATTR(m) (&(struct ovl_cattr) { .mode = (m) }) - -+int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry, umode_t mode); - struct dentry *ovl_create_real(struct inode *dir, struct dentry *newdentry, - struct ovl_cattr *attr); - int ovl_cleanup(struct inode *dir, struct dentry *dentry); -diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c -index 178daa5e82c9d..9837aaf9caf18 100644 ---- a/fs/overlayfs/super.c -+++ b/fs/overlayfs/super.c -@@ -787,10 +787,14 @@ retry: - goto retry; - } - -- work = ovl_create_real(dir, work, OVL_CATTR(attr.ia_mode)); -- err = PTR_ERR(work); -- if (IS_ERR(work)) -- goto out_err; -+ err = ovl_mkdir_real(dir, &work, attr.ia_mode); -+ if (err) -+ goto out_dput; ++static void io_ring_submit_unlock(struct io_ring_ctx *ctx, bool needs_lock) ++{ ++ if (needs_lock) ++ mutex_unlock(&ctx->uring_lock); ++} + -+ /* Weird filesystem returning with hashed negative (kernfs)? */ -+ err = -EINVAL; -+ if (d_really_is_negative(work)) -+ goto out_dput; - - /* - * Try to remove POSIX ACL xattrs from workdir. We are good if: -@@ -869,7 +873,7 @@ static int ovl_mount_dir_noesc(const char *name, struct path *path) - pr_err("filesystem on '%s' not supported\n", name); - goto out_put; - } -- if (mnt_user_ns(path->mnt) != &init_user_ns) { -+ if (is_idmapped_mnt(path->mnt)) { - pr_err("idmapped layers are currently not supported\n"); - goto out_put; - } -@@ -1409,11 +1413,12 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, - */ - err = ovl_do_setxattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE, "0", 1); - if (err) { -+ pr_warn("failed to set xattr on upper\n"); - ofs->noxattr = true; - if (ofs->config.index || ofs->config.metacopy) { - ofs->config.index = false; - ofs->config.metacopy = false; -- pr_warn("upper fs does not support xattr, falling back to index=off,metacopy=off.\n"); -+ pr_warn("...falling back to index=off,metacopy=off.\n"); - } - /* - * xattr support is required for persistent st_ino. -@@ -1421,8 +1426,10 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, - */ - if (ofs->config.xino == OVL_XINO_AUTO) { - ofs->config.xino = OVL_XINO_OFF; -- pr_warn("upper fs does not support xattr, falling back to xino=off.\n"); -+ pr_warn("...falling back to xino=off.\n"); - } -+ if (err == -EPERM && !ofs->config.userxattr) -+ pr_info("try mounting with 'userxattr' option\n"); - err = 0; - } else { - ovl_do_removexattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE); -diff --git a/fs/pipe.c b/fs/pipe.c -index 6d4342bad9f15..e08f0fe55584b 100644 ---- a/fs/pipe.c -+++ b/fs/pipe.c -@@ -252,7 +252,8 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) - */ - was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage); - for (;;) { -- unsigned int head = pipe->head; -+ /* Read ->head with a barrier vs post_one_notification() */ -+ unsigned int head = smp_load_acquire(&pipe->head); - unsigned int tail = pipe->tail; - unsigned int mask = pipe->ring_size - 1; - -@@ -651,7 +652,7 @@ pipe_poll(struct file *filp, poll_table *wait) - unsigned int head, tail; - - /* Epoll has some historical nasty semantics, this enables them */ -- pipe->poll_usage = 1; -+ WRITE_ONCE(pipe->poll_usage, true); - - /* - * Reading pipe state only -- no need for acquiring the semaphore. -@@ -830,10 +831,8 @@ void free_pipe_info(struct pipe_inode_info *pipe) - int i; - - #ifdef CONFIG_WATCH_QUEUE -- if (pipe->watch_queue) { -+ if (pipe->watch_queue) - watch_queue_clear(pipe->watch_queue); -- put_watch_queue(pipe->watch_queue); -- } - #endif - - (void) account_pipe_buffers(pipe->user, pipe->nr_accounted, 0); -@@ -843,6 +842,10 @@ void free_pipe_info(struct pipe_inode_info *pipe) - if (buf->ops) - pipe_buf_release(pipe, buf); - } -+#ifdef CONFIG_WATCH_QUEUE -+ if (pipe->watch_queue) -+ put_watch_queue(pipe->watch_queue); -+#endif - if (pipe->tmp_page) - __free_page(pipe->tmp_page); - kfree(pipe->bufs); -@@ -1241,30 +1244,33 @@ unsigned int round_pipe_size(unsigned long size) - - /* - * Resize the pipe ring to a number of slots. -+ * -+ * Note the pipe can be reduced in capacity, but only if the current -+ * occupancy doesn't exceed nr_slots; if it does, EBUSY will be -+ * returned instead. - */ - int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots) - { - struct pipe_buffer *bufs; - unsigned int head, tail, mask, n; - -- /* -- * We can shrink the pipe, if arg is greater than the ring occupancy. -- * Since we don't expect a lot of shrink+grow operations, just free and -- * allocate again like we would do for growing. If the pipe currently -- * contains more buffers than arg, then return busy. -- */ -- mask = pipe->ring_size - 1; -- head = pipe->head; -- tail = pipe->tail; -- n = pipe_occupancy(pipe->head, pipe->tail); -- if (nr_slots < n) -- return -EBUSY; -- - bufs = kcalloc(nr_slots, sizeof(*bufs), - GFP_KERNEL_ACCOUNT | __GFP_NOWARN); - if (unlikely(!bufs)) - return -ENOMEM; - -+ spin_lock_irq(&pipe->rd_wait.lock); -+ mask = pipe->ring_size - 1; -+ head = pipe->head; -+ tail = pipe->tail; ++static void io_ring_submit_lock(struct io_ring_ctx *ctx, bool needs_lock) ++{ ++ /* ++ * "Normal" inline submissions always hold the uring_lock, since we ++ * grab it from the system call. Same is true for the SQPOLL offload. ++ * The only exception is when we've detached the request and issue it ++ * from an async worker thread, grab the lock for that case. ++ */ ++ if (needs_lock) ++ mutex_lock(&ctx->uring_lock); ++} + -+ n = pipe_occupancy(head, tail); -+ if (nr_slots < n) { -+ spin_unlock_irq(&pipe->rd_wait.lock); -+ kfree(bufs); -+ return -EBUSY; ++static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len, ++ int bgid, struct io_buffer *kbuf, ++ bool needs_lock) ++{ ++ struct io_buffer *head; ++ ++ if (req->flags & REQ_F_BUFFER_SELECTED) ++ return kbuf; ++ ++ io_ring_submit_lock(req->ctx, needs_lock); ++ ++ lockdep_assert_held(&req->ctx->uring_lock); ++ ++ head = xa_load(&req->ctx->io_buffers, bgid); ++ if (head) { ++ if (!list_empty(&head->list)) { ++ kbuf = list_last_entry(&head->list, struct io_buffer, ++ list); ++ list_del(&kbuf->list); ++ } else { ++ kbuf = head; ++ xa_erase(&req->ctx->io_buffers, bgid); ++ } ++ if (*len > kbuf->len) ++ *len = kbuf->len; ++ } else { ++ kbuf = ERR_PTR(-ENOBUFS); + } + - /* - * The pipe array wraps around, so just start the new one at zero - * and adjust the indices. -@@ -1296,6 +1302,8 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots) - pipe->tail = tail; - pipe->head = head; - -+ spin_unlock_irq(&pipe->rd_wait.lock); ++ io_ring_submit_unlock(req->ctx, needs_lock); + - /* This might have made more room for writers */ - wake_up_interruptible(&pipe->wr_wait); - return 0; -diff --git a/fs/posix_acl.c b/fs/posix_acl.c -index f5c25f580dd92..ceb1e3b868577 100644 ---- a/fs/posix_acl.c -+++ b/fs/posix_acl.c -@@ -23,6 +23,7 @@ - #include <linux/export.h> - #include <linux/user_namespace.h> - #include <linux/namei.h> -+#include <linux/mnt_idmapping.h> - - static struct posix_acl **acl_by_type(struct inode *inode, int type) - { -@@ -375,7 +376,9 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode, - goto check_perm; - break; - case ACL_USER: -- uid = kuid_into_mnt(mnt_userns, pa->e_uid); -+ uid = mapped_kuid_fs(mnt_userns, -+ i_user_ns(inode), -+ pa->e_uid); - if (uid_eq(uid, current_fsuid())) - goto mask; - break; -@@ -388,7 +391,9 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode, - } - break; - case ACL_GROUP: -- gid = kgid_into_mnt(mnt_userns, pa->e_gid); -+ gid = mapped_kgid_fs(mnt_userns, -+ i_user_ns(inode), -+ pa->e_gid); - if (in_group_p(gid)) { - found = 1; - if ((pa->e_perm & want) == want) -@@ -735,17 +740,17 @@ static void posix_acl_fix_xattr_userns( - case ACL_USER: - uid = make_kuid(from, le32_to_cpu(entry->e_id)); - if (from_user) -- uid = kuid_from_mnt(mnt_userns, uid); -+ uid = mapped_kuid_user(mnt_userns, &init_user_ns, uid); - else -- uid = kuid_into_mnt(mnt_userns, uid); -+ uid = mapped_kuid_fs(mnt_userns, &init_user_ns, uid); - entry->e_id = cpu_to_le32(from_kuid(to, uid)); - break; - case ACL_GROUP: - gid = make_kgid(from, le32_to_cpu(entry->e_id)); - if (from_user) -- gid = kgid_from_mnt(mnt_userns, gid); -+ gid = mapped_kgid_user(mnt_userns, &init_user_ns, gid); - else -- gid = kgid_into_mnt(mnt_userns, gid); -+ gid = mapped_kgid_fs(mnt_userns, &init_user_ns, gid); - entry->e_id = cpu_to_le32(from_kgid(to, gid)); - break; - default: -@@ -755,9 +760,14 @@ static void posix_acl_fix_xattr_userns( - } - - void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns, -+ struct inode *inode, - void *value, size_t size) - { - struct user_namespace *user_ns = current_user_ns(); ++ return kbuf; ++} + -+ /* Leave ids untouched on non-idmapped mounts. */ -+ if (no_idmapping(mnt_userns, i_user_ns(inode))) -+ mnt_userns = &init_user_ns; - if ((user_ns == &init_user_ns) && (mnt_userns == &init_user_ns)) - return; - posix_acl_fix_xattr_userns(&init_user_ns, user_ns, mnt_userns, value, -@@ -765,9 +775,14 @@ void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns, - } - - void posix_acl_fix_xattr_to_user(struct user_namespace *mnt_userns, -+ struct inode *inode, - void *value, size_t size) - { - struct user_namespace *user_ns = current_user_ns(); ++static void __user *io_rw_buffer_select(struct io_kiocb *req, size_t *len, ++ bool needs_lock) ++{ ++ struct io_buffer *kbuf; ++ u16 bgid; + -+ /* Leave ids untouched on non-idmapped mounts. */ -+ if (no_idmapping(mnt_userns, i_user_ns(inode))) -+ mnt_userns = &init_user_ns; - if ((user_ns == &init_user_ns) && (mnt_userns == &init_user_ns)) - return; - posix_acl_fix_xattr_userns(user_ns, &init_user_ns, mnt_userns, value, -diff --git a/fs/proc/base.c b/fs/proc/base.c -index 533d5836eb9a4..300d53ee7040c 100644 ---- a/fs/proc/base.c -+++ b/fs/proc/base.c -@@ -67,6 +67,7 @@ - #include <linux/mm.h> - #include <linux/swap.h> - #include <linux/rcupdate.h> -+#include <linux/kallsyms.h> - #include <linux/stacktrace.h> - #include <linux/resource.h> - #include <linux/module.h> -@@ -386,17 +387,19 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns, - struct pid *pid, struct task_struct *task) - { - unsigned long wchan; -+ char symname[KSYM_NAME_LEN]; - -- if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) -- wchan = get_wchan(task); -- else -- wchan = 0; -+ if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) -+ goto print0; - -- if (wchan) -- seq_printf(m, "%ps", (void *) wchan); -- else -- seq_putc(m, '0'); -+ wchan = get_wchan(task); -+ if (wchan && !lookup_symbol_name(wchan, symname)) { -+ seq_puts(m, symname); ++ kbuf = (struct io_buffer *) (unsigned long) req->rw.addr; ++ bgid = req->buf_index; ++ kbuf = io_buffer_select(req, len, bgid, kbuf, needs_lock); ++ if (IS_ERR(kbuf)) ++ return kbuf; ++ req->rw.addr = (u64) (unsigned long) kbuf; ++ req->flags |= REQ_F_BUFFER_SELECTED; ++ return u64_to_user_ptr(kbuf->addr); ++} ++ ++#ifdef CONFIG_COMPAT ++static ssize_t io_compat_import(struct io_kiocb *req, struct iovec *iov, ++ bool needs_lock) ++{ ++ struct compat_iovec __user *uiov; ++ compat_ssize_t clen; ++ void __user *buf; ++ ssize_t len; ++ ++ uiov = u64_to_user_ptr(req->rw.addr); ++ if (!access_ok(uiov, sizeof(*uiov))) ++ return -EFAULT; ++ if (__get_user(clen, &uiov->iov_len)) ++ return -EFAULT; ++ if (clen < 0) ++ return -EINVAL; ++ ++ len = clen; ++ buf = io_rw_buffer_select(req, &len, needs_lock); ++ if (IS_ERR(buf)) ++ return PTR_ERR(buf); ++ iov[0].iov_base = buf; ++ iov[0].iov_len = (compat_size_t) len; ++ return 0; ++} ++#endif ++ ++static ssize_t __io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov, ++ bool needs_lock) ++{ ++ struct iovec __user *uiov = u64_to_user_ptr(req->rw.addr); ++ void __user *buf; ++ ssize_t len; ++ ++ if (copy_from_user(iov, uiov, sizeof(*uiov))) ++ return -EFAULT; ++ ++ len = iov[0].iov_len; ++ if (len < 0) ++ return -EINVAL; ++ buf = io_rw_buffer_select(req, &len, needs_lock); ++ if (IS_ERR(buf)) ++ return PTR_ERR(buf); ++ iov[0].iov_base = buf; ++ iov[0].iov_len = len; ++ return 0; ++} ++ ++static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov, ++ bool needs_lock) ++{ ++ if (req->flags & REQ_F_BUFFER_SELECTED) { ++ struct io_buffer *kbuf; ++ ++ kbuf = (struct io_buffer *) (unsigned long) req->rw.addr; ++ iov[0].iov_base = u64_to_user_ptr(kbuf->addr); ++ iov[0].iov_len = kbuf->len; + return 0; + } - -+print0: -+ seq_putc(m, '0'); - return 0; - } - #endif /* CONFIG_KALLSYMS */ -@@ -1883,7 +1886,7 @@ void proc_pid_evict_inode(struct proc_inode *ei) - put_pid(pid); - } - --struct inode *proc_pid_make_inode(struct super_block * sb, -+struct inode *proc_pid_make_inode(struct super_block *sb, - struct task_struct *task, umode_t mode) - { - struct inode * inode; -@@ -1912,11 +1915,6 @@ struct inode *proc_pid_make_inode(struct super_block * sb, - - /* Let the pid remember us for quick removal */ - ei->pid = pid; -- if (S_ISDIR(mode)) { -- spin_lock(&pid->lock); -- hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes); -- spin_unlock(&pid->lock); -- } - - task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); - security_task_to_inode(task, inode); -@@ -1929,6 +1927,39 @@ out_unlock: - return NULL; - } - ++ if (req->rw.len != 1) ++ return -EINVAL; ++ ++#ifdef CONFIG_COMPAT ++ if (req->ctx->compat) ++ return io_compat_import(req, iov, needs_lock); ++#endif ++ ++ return __io_iov_buffer_select(req, iov, needs_lock); ++} ++ ++static int io_import_iovec(int rw, struct io_kiocb *req, struct iovec **iovec, ++ struct iov_iter *iter, bool needs_lock) ++{ ++ void __user *buf = u64_to_user_ptr(req->rw.addr); ++ size_t sqe_len = req->rw.len; ++ u8 opcode = req->opcode; ++ ssize_t ret; ++ ++ if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) { ++ *iovec = NULL; ++ return io_import_fixed(req, rw, iter); ++ } ++ ++ /* buffer index only valid with fixed read/write, or buffer select */ ++ if (req->buf_index && !(req->flags & REQ_F_BUFFER_SELECT)) ++ return -EINVAL; ++ ++ if (opcode == IORING_OP_READ || opcode == IORING_OP_WRITE) { ++ if (req->flags & REQ_F_BUFFER_SELECT) { ++ buf = io_rw_buffer_select(req, &sqe_len, needs_lock); ++ if (IS_ERR(buf)) ++ return PTR_ERR(buf); ++ req->rw.len = sqe_len; ++ } ++ ++ ret = import_single_range(rw, buf, sqe_len, *iovec, iter); ++ *iovec = NULL; ++ return ret; ++ } ++ ++ if (req->flags & REQ_F_BUFFER_SELECT) { ++ ret = io_iov_buffer_select(req, *iovec, needs_lock); ++ if (!ret) ++ iov_iter_init(iter, rw, *iovec, 1, (*iovec)->iov_len); ++ *iovec = NULL; ++ return ret; ++ } ++ ++ return __import_iovec(rw, buf, sqe_len, UIO_FASTIOV, iovec, iter, ++ req->ctx->compat); ++} ++ ++static inline loff_t *io_kiocb_ppos(struct kiocb *kiocb) ++{ ++ return (kiocb->ki_filp->f_mode & FMODE_STREAM) ? NULL : &kiocb->ki_pos; ++} ++ +/* -+ * Generating an inode and adding it into @pid->inodes, so that task will -+ * invalidate inode's dentry before being released. -+ * -+ * This helper is used for creating dir-type entries under '/proc' and -+ * '/proc/<tgid>/task'. Other entries(eg. fd, stat) under '/proc/<tgid>' -+ * can be released by invalidating '/proc/<tgid>' dentry. -+ * In theory, dentries under '/proc/<tgid>/task' can also be released by -+ * invalidating '/proc/<tgid>' dentry, we reserve it to handle single -+ * thread exiting situation: Any one of threads should invalidate its -+ * '/proc/<tgid>/task/<pid>' dentry before released. ++ * For files that don't have ->read_iter() and ->write_iter(), handle them ++ * by looping over ->read() or ->write() manually. + */ -+static struct inode *proc_pid_make_base_inode(struct super_block *sb, -+ struct task_struct *task, umode_t mode) ++static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter) +{ -+ struct inode *inode; -+ struct proc_inode *ei; -+ struct pid *pid; ++ struct kiocb *kiocb = &req->rw.kiocb; ++ struct file *file = req->file; ++ ssize_t ret = 0; ++ loff_t *ppos; + -+ inode = proc_pid_make_inode(sb, task, mode); -+ if (!inode) -+ return NULL; ++ /* ++ * Don't support polled IO through this interface, and we can't ++ * support non-blocking either. For the latter, this just causes ++ * the kiocb to be handled from an async context. ++ */ ++ if (kiocb->ki_flags & IOCB_HIPRI) ++ return -EOPNOTSUPP; ++ if (kiocb->ki_flags & IOCB_NOWAIT) ++ return -EAGAIN; + -+ /* Let proc_flush_pid find this directory inode */ -+ ei = PROC_I(inode); -+ pid = ei->pid; -+ spin_lock(&pid->lock); -+ hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes); -+ spin_unlock(&pid->lock); ++ ppos = io_kiocb_ppos(kiocb); + -+ return inode; ++ while (iov_iter_count(iter)) { ++ struct iovec iovec; ++ ssize_t nr; ++ ++ if (!iov_iter_is_bvec(iter)) { ++ iovec = iov_iter_iovec(iter); ++ } else { ++ iovec.iov_base = u64_to_user_ptr(req->rw.addr); ++ iovec.iov_len = req->rw.len; ++ } ++ ++ if (rw == READ) { ++ nr = file->f_op->read(file, iovec.iov_base, ++ iovec.iov_len, ppos); ++ } else { ++ nr = file->f_op->write(file, iovec.iov_base, ++ iovec.iov_len, ppos); ++ } ++ ++ if (nr < 0) { ++ if (!ret) ++ ret = nr; ++ break; ++ } ++ ret += nr; ++ if (!iov_iter_is_bvec(iter)) { ++ iov_iter_advance(iter, nr); ++ } else { ++ req->rw.addr += nr; ++ req->rw.len -= nr; ++ if (!req->rw.len) ++ break; ++ } ++ if (nr != iovec.iov_len) ++ break; ++ } ++ ++ return ret; +} + - int pid_getattr(struct user_namespace *mnt_userns, const struct path *path, - struct kstat *stat, u32 request_mask, unsigned int query_flags) - { -@@ -3346,7 +3377,8 @@ static struct dentry *proc_pid_instantiate(struct dentry * dentry, - { - struct inode *inode; - -- inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); -+ inode = proc_pid_make_base_inode(dentry->d_sb, task, -+ S_IFDIR | S_IRUGO | S_IXUGO); - if (!inode) - return ERR_PTR(-ENOENT); - -@@ -3645,7 +3677,8 @@ static struct dentry *proc_task_instantiate(struct dentry *dentry, - struct task_struct *task, const void *ptr) - { - struct inode *inode; -- inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); -+ inode = proc_pid_make_base_inode(dentry->d_sb, task, -+ S_IFDIR | S_IRUGO | S_IXUGO); - if (!inode) - return ERR_PTR(-ENOENT); - -diff --git a/fs/proc/bootconfig.c b/fs/proc/bootconfig.c -index 6d8d4bf208377..2e244ada1f970 100644 ---- a/fs/proc/bootconfig.c -+++ b/fs/proc/bootconfig.c -@@ -32,6 +32,8 @@ static int __init copy_xbc_key_value_list(char *dst, size_t size) - int ret = 0; - - key = kzalloc(XBC_KEYLEN_MAX, GFP_KERNEL); -+ if (!key) -+ return -ENOMEM; - - xbc_for_each_key_value(leaf, val) { - ret = xbc_node_compose_key(leaf, key, XBC_KEYLEN_MAX); -diff --git a/fs/proc/fd.c b/fs/proc/fd.c -index 172c86270b312..913bef0d2a36c 100644 ---- a/fs/proc/fd.c -+++ b/fs/proc/fd.c -@@ -72,7 +72,7 @@ out: - return 0; - } - --static int seq_fdinfo_open(struct inode *inode, struct file *file) -+static int proc_fdinfo_access_allowed(struct inode *inode) - { - bool allowed = false; - struct task_struct *task = get_proc_task(inode); -@@ -86,6 +86,16 @@ static int seq_fdinfo_open(struct inode *inode, struct file *file) - if (!allowed) - return -EACCES; - -+ return 0; ++static void io_req_map_rw(struct io_kiocb *req, const struct iovec *iovec, ++ const struct iovec *fast_iov, struct iov_iter *iter) ++{ ++ struct io_async_rw *rw = req->async_data; ++ ++ memcpy(&rw->iter, iter, sizeof(*iter)); ++ rw->free_iovec = iovec; ++ rw->bytes_done = 0; ++ /* can only be fixed buffers, no need to do anything */ ++ if (iov_iter_is_bvec(iter)) ++ return; ++ if (!iovec) { ++ unsigned iov_off = 0; ++ ++ rw->iter.iov = rw->fast_iov; ++ if (iter->iov != fast_iov) { ++ iov_off = iter->iov - fast_iov; ++ rw->iter.iov += iov_off; ++ } ++ if (rw->fast_iov != fast_iov) ++ memcpy(rw->fast_iov + iov_off, fast_iov + iov_off, ++ sizeof(struct iovec) * iter->nr_segs); ++ } else { ++ req->flags |= REQ_F_NEED_CLEANUP; ++ } +} + -+static int seq_fdinfo_open(struct inode *inode, struct file *file) ++static inline int io_alloc_async_data(struct io_kiocb *req) +{ -+ int ret = proc_fdinfo_access_allowed(inode); ++ WARN_ON_ONCE(!io_op_defs[req->opcode].async_size); ++ req->async_data = kmalloc(io_op_defs[req->opcode].async_size, GFP_KERNEL); ++ return req->async_data == NULL; ++} + -+ if (ret) -+ return ret; ++static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec, ++ const struct iovec *fast_iov, ++ struct iov_iter *iter, bool force) ++{ ++ if (!force && !io_op_defs[req->opcode].needs_async_setup) ++ return 0; ++ if (!req->async_data) { ++ struct io_async_rw *iorw; + - return single_open(file, seq_show, inode); - } - -@@ -348,12 +358,23 @@ static int proc_readfdinfo(struct file *file, struct dir_context *ctx) - proc_fdinfo_instantiate); - } - -+static int proc_open_fdinfo(struct inode *inode, struct file *file) ++ if (io_alloc_async_data(req)) { ++ kfree(iovec); ++ return -ENOMEM; ++ } ++ ++ io_req_map_rw(req, iovec, fast_iov, iter); ++ iorw = req->async_data; ++ /* we've copied and mapped the iter, ensure state is saved */ ++ iov_iter_save_state(&iorw->iter, &iorw->iter_state); ++ } ++ return 0; ++} ++ ++static inline int io_rw_prep_async(struct io_kiocb *req, int rw) +{ -+ int ret = proc_fdinfo_access_allowed(inode); ++ struct io_async_rw *iorw = req->async_data; ++ struct iovec *iov = iorw->fast_iov; ++ int ret; + -+ if (ret) ++ ret = io_import_iovec(rw, req, &iov, &iorw->iter, false); ++ if (unlikely(ret < 0)) + return ret; + ++ iorw->bytes_done = 0; ++ iorw->free_iovec = iov; ++ if (iov) ++ req->flags |= REQ_F_NEED_CLEANUP; ++ iov_iter_save_state(&iorw->iter, &iorw->iter_state); + return 0; +} + - const struct inode_operations proc_fdinfo_inode_operations = { - .lookup = proc_lookupfdinfo, - .setattr = proc_setattr, - }; - - const struct file_operations proc_fdinfo_operations = { -+ .open = proc_open_fdinfo, - .read = generic_read_dir, - .iterate_shared = proc_readfdinfo, - .llseek = generic_file_llseek, -diff --git a/fs/proc/generic.c b/fs/proc/generic.c -index 5b78739e60e40..d32f69aaaa36f 100644 ---- a/fs/proc/generic.c -+++ b/fs/proc/generic.c -@@ -448,6 +448,9 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, - proc_set_user(ent, (*parent)->uid, (*parent)->gid); - - ent->proc_dops = &proc_misc_dentry_ops; -+ /* Revalidate everything under /proc/${pid}/net */ -+ if ((*parent)->proc_dops == &proc_net_dentry_ops) -+ pde_force_lookup(ent); - - out: - return ent; -diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c -index 15c2e55d2ed2c..123e3c9d8674b 100644 ---- a/fs/proc/proc_net.c -+++ b/fs/proc/proc_net.c -@@ -363,6 +363,9 @@ static __net_init int proc_net_ns_init(struct net *net) - - proc_set_user(netd, uid, gid); - -+ /* Seed dentry revalidation for /proc/${pid}/net */ -+ pde_force_lookup(netd); ++static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) ++{ ++ if (unlikely(!(req->file->f_mode & FMODE_READ))) ++ return -EBADF; ++ return io_prep_rw(req, sqe, READ); ++} + - err = -EEXIST; - net_statd = proc_net_mkdir(net, "stat", netd); - if (!net_statd) -diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c -index 5d66faecd4ef0..013fc5931bc37 100644 ---- a/fs/proc/proc_sysctl.c -+++ b/fs/proc/proc_sysctl.c -@@ -25,7 +25,7 @@ static const struct file_operations proc_sys_dir_file_operations; - static const struct inode_operations proc_sys_dir_operations; - - /* shared constants to be used in various sysctls */ --const int sysctl_vals[] = { 0, 1, INT_MAX }; -+const int sysctl_vals[] = { -1, 0, 1, 2, 4, 100, 200, 1000, 3000, INT_MAX }; - EXPORT_SYMBOL(sysctl_vals); - - /* Support for permanently empty directories */ -diff --git a/fs/proc/stat.c b/fs/proc/stat.c -index 6561a06ef9059..4fb8729a68d4e 100644 ---- a/fs/proc/stat.c -+++ b/fs/proc/stat.c -@@ -24,7 +24,7 @@ - - #ifdef arch_idle_time - --static u64 get_idle_time(struct kernel_cpustat *kcs, int cpu) -+u64 get_idle_time(struct kernel_cpustat *kcs, int cpu) - { - u64 idle; - -@@ -46,7 +46,7 @@ static u64 get_iowait_time(struct kernel_cpustat *kcs, int cpu) - - #else - --static u64 get_idle_time(struct kernel_cpustat *kcs, int cpu) -+u64 get_idle_time(struct kernel_cpustat *kcs, int cpu) - { - u64 idle, idle_usecs = -1ULL; - -diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c -index cf25be3e03212..c3b76746cce85 100644 ---- a/fs/proc/task_mmu.c -+++ b/fs/proc/task_mmu.c -@@ -430,7 +430,8 @@ static void smaps_page_accumulate(struct mem_size_stats *mss, - } - - static void smaps_account(struct mem_size_stats *mss, struct page *page, -- bool compound, bool young, bool dirty, bool locked) -+ bool compound, bool young, bool dirty, bool locked, -+ bool migration) - { - int i, nr = compound ? compound_nr(page) : 1; - unsigned long size = nr * PAGE_SIZE; -@@ -457,8 +458,15 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page, - * page_count(page) == 1 guarantees the page is mapped exactly once. - * If any subpage of the compound page mapped with PTE it would elevate - * page_count(). -+ * -+ * The page_mapcount() is called to get a snapshot of the mapcount. -+ * Without holding the page lock this snapshot can be slightly wrong as -+ * we cannot always read the mapcount atomically. It is not safe to -+ * call page_mapcount() even with PTL held if the page is not mapped, -+ * especially for migration entries. Treat regular migration entries -+ * as mapcount == 1. - */ -- if (page_count(page) == 1) { -+ if ((page_count(page) == 1) || migration) { - smaps_page_accumulate(mss, page, size, size << PSS_SHIFT, dirty, - locked, true); - return; -@@ -495,9 +503,12 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, - struct vm_area_struct *vma = walk->vma; - bool locked = !!(vma->vm_flags & VM_LOCKED); - struct page *page = NULL; -+ bool migration = false, young = false, dirty = false; - - if (pte_present(*pte)) { - page = vm_normal_page(vma, addr, *pte); -+ young = pte_young(*pte); -+ dirty = pte_dirty(*pte); - } else if (is_swap_pte(*pte)) { - swp_entry_t swpent = pte_to_swp_entry(*pte); - -@@ -514,8 +525,11 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, - } else { - mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT; - } -- } else if (is_pfn_swap_entry(swpent)) -+ } else if (is_pfn_swap_entry(swpent)) { -+ if (is_migration_entry(swpent)) -+ migration = true; - page = pfn_swap_entry_to_page(swpent); -+ } - } else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap - && pte_none(*pte))) { - page = xa_load(&vma->vm_file->f_mapping->i_pages, -@@ -528,7 +542,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, - if (!page) - return; - -- smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), locked); -+ smaps_account(mss, page, false, young, dirty, locked, migration); - } - - #ifdef CONFIG_TRANSPARENT_HUGEPAGE -@@ -539,6 +553,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, - struct vm_area_struct *vma = walk->vma; - bool locked = !!(vma->vm_flags & VM_LOCKED); - struct page *page = NULL; -+ bool migration = false; - - if (pmd_present(*pmd)) { - /* FOLL_DUMP will return -EFAULT on huge zero page */ -@@ -546,8 +561,10 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, - } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) { - swp_entry_t entry = pmd_to_swp_entry(*pmd); - -- if (is_migration_entry(entry)) -+ if (is_migration_entry(entry)) { -+ migration = true; - page = pfn_swap_entry_to_page(entry); -+ } - } - if (IS_ERR_OR_NULL(page)) - return; -@@ -559,7 +576,9 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, - /* pass */; - else - mss->file_thp += HPAGE_PMD_SIZE; -- smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked); ++/* ++ * This is our waitqueue callback handler, registered through lock_page_async() ++ * when we initially tried to do the IO with the iocb armed our waitqueue. ++ * This gets called when the page is unlocked, and we generally expect that to ++ * happen when the page IO is completed and the page is now uptodate. This will ++ * queue a task_work based retry of the operation, attempting to copy the data ++ * again. If the latter fails because the page was NOT uptodate, then we will ++ * do a thread based blocking retry of the operation. That's the unexpected ++ * slow path. ++ */ ++static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode, ++ int sync, void *arg) ++{ ++ struct wait_page_queue *wpq; ++ struct io_kiocb *req = wait->private; ++ struct wait_page_key *key = arg; + -+ smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), -+ locked, migration); - } - #else - static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, -@@ -932,7 +951,7 @@ static int show_smaps_rollup(struct seq_file *m, void *v) - vma = vma->vm_next; - } - -- show_vma_header_prefix(m, priv->mm->mmap->vm_start, -+ show_vma_header_prefix(m, priv->mm->mmap ? priv->mm->mmap->vm_start : 0, - last_vma_end, 0, 0, 0, 0); - seq_pad(m, ' '); - seq_puts(m, "[rollup]\n"); -@@ -1363,6 +1382,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, - { - u64 frame = 0, flags = 0; - struct page *page = NULL; -+ bool migration = false; - - if (pte_present(pte)) { - if (pm->show_pfn) -@@ -1384,13 +1404,14 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, - frame = swp_type(entry) | - (swp_offset(entry) << MAX_SWAPFILES_SHIFT); - flags |= PM_SWAP; -+ migration = is_migration_entry(entry); - if (is_pfn_swap_entry(entry)) - page = pfn_swap_entry_to_page(entry); - } - - if (page && !PageAnon(page)) - flags |= PM_FILE; -- if (page && page_mapcount(page) == 1) -+ if (page && !migration && page_mapcount(page) == 1) - flags |= PM_MMAP_EXCLUSIVE; - if (vma->vm_flags & VM_SOFTDIRTY) - flags |= PM_SOFT_DIRTY; -@@ -1406,8 +1427,9 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, - spinlock_t *ptl; - pte_t *pte, *orig_pte; - int err = 0; -- - #ifdef CONFIG_TRANSPARENT_HUGEPAGE -+ bool migration = false; ++ wpq = container_of(wait, struct wait_page_queue, wait); + - ptl = pmd_trans_huge_lock(pmdp, vma); - if (ptl) { - u64 flags = 0, frame = 0; -@@ -1446,11 +1468,12 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, - if (pmd_swp_uffd_wp(pmd)) - flags |= PM_UFFD_WP; - VM_BUG_ON(!is_pmd_migration_entry(pmd)); -+ migration = is_migration_entry(entry); - page = pfn_swap_entry_to_page(entry); - } - #endif - -- if (page && page_mapcount(page) == 1) -+ if (page && !migration && page_mapcount(page) == 1) - flags |= PM_MMAP_EXCLUSIVE; - - for (; addr != end; addr += PAGE_SIZE) { -@@ -1560,7 +1583,8 @@ static const struct mm_walk_ops pagemap_ops = { - * Bits 5-54 swap offset if swapped - * Bit 55 pte is soft-dirty (see Documentation/admin-guide/mm/soft-dirty.rst) - * Bit 56 page exclusively mapped -- * Bits 57-60 zero -+ * Bit 57 pte is uffd-wp write-protected -+ * Bits 58-60 zero - * Bit 61 page is file-page or shared-anon - * Bit 62 page swapped - * Bit 63 page present -diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c -index 5a1b228964fb7..deb99bc9b7e6b 100644 ---- a/fs/proc/uptime.c -+++ b/fs/proc/uptime.c -@@ -12,18 +12,22 @@ static int uptime_proc_show(struct seq_file *m, void *v) - { - struct timespec64 uptime; - struct timespec64 idle; -- u64 nsec; -+ u64 idle_nsec; - u32 rem; - int i; - -- nsec = 0; -- for_each_possible_cpu(i) -- nsec += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE]; -+ idle_nsec = 0; -+ for_each_possible_cpu(i) { -+ struct kernel_cpustat kcs; ++ if (!wake_page_match(wpq, key)) ++ return 0; + -+ kcpustat_cpu_fetch(&kcs, i); -+ idle_nsec += get_idle_time(&kcs, i); ++ req->rw.kiocb.ki_flags &= ~IOCB_WAITQ; ++ list_del_init(&wait->entry); ++ io_req_task_queue(req); ++ return 1; ++} ++ ++/* ++ * This controls whether a given IO request should be armed for async page ++ * based retry. If we return false here, the request is handed to the async ++ * worker threads for retry. If we're doing buffered reads on a regular file, ++ * we prepare a private wait_page_queue entry and retry the operation. This ++ * will either succeed because the page is now uptodate and unlocked, or it ++ * will register a callback when the page is unlocked at IO completion. Through ++ * that callback, io_uring uses task_work to setup a retry of the operation. ++ * That retry will attempt the buffered read again. The retry will generally ++ * succeed, or in rare cases where it fails, we then fall back to using the ++ * async worker threads for a blocking retry. ++ */ ++static bool io_rw_should_retry(struct io_kiocb *req) ++{ ++ struct io_async_rw *rw = req->async_data; ++ struct wait_page_queue *wait = &rw->wpq; ++ struct kiocb *kiocb = &req->rw.kiocb; ++ ++ /* never retry for NOWAIT, we just complete with -EAGAIN */ ++ if (req->flags & REQ_F_NOWAIT) ++ return false; ++ ++ /* Only for buffered IO */ ++ if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_HIPRI)) ++ return false; ++ ++ /* ++ * just use poll if we can, and don't attempt if the fs doesn't ++ * support callback based unlocks ++ */ ++ if (file_can_poll(req->file) || !(req->file->f_mode & FMODE_BUF_RASYNC)) ++ return false; ++ ++ wait->wait.func = io_async_buf_func; ++ wait->wait.private = req; ++ wait->wait.flags = 0; ++ INIT_LIST_HEAD(&wait->wait.entry); ++ kiocb->ki_flags |= IOCB_WAITQ; ++ kiocb->ki_flags &= ~IOCB_NOWAIT; ++ kiocb->ki_waitq = wait; ++ return true; ++} ++ ++static inline int io_iter_do_read(struct io_kiocb *req, struct iov_iter *iter) ++{ ++ if (req->file->f_op->read_iter) ++ return call_read_iter(req->file, &req->rw.kiocb, iter); ++ else if (req->file->f_op->read) ++ return loop_rw_iter(READ, req, iter); ++ else ++ return -EINVAL; ++} ++ ++static bool need_read_all(struct io_kiocb *req) ++{ ++ return req->flags & REQ_F_ISREG || ++ S_ISBLK(file_inode(req->file)->i_mode); ++} ++ ++static int io_read(struct io_kiocb *req, unsigned int issue_flags) ++{ ++ struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; ++ struct kiocb *kiocb = &req->rw.kiocb; ++ struct iov_iter __iter, *iter = &__iter; ++ struct io_async_rw *rw = req->async_data; ++ bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; ++ struct iov_iter_state __state, *state; ++ ssize_t ret, ret2; ++ loff_t *ppos; ++ ++ if (rw) { ++ iter = &rw->iter; ++ state = &rw->iter_state; ++ /* ++ * We come here from an earlier attempt, restore our state to ++ * match in case it doesn't. It's cheap enough that we don't ++ * need to make this conditional. ++ */ ++ iov_iter_restore(iter, state); ++ iovec = NULL; ++ } else { ++ ret = io_import_iovec(READ, req, &iovec, iter, !force_nonblock); ++ if (ret < 0) ++ return ret; ++ state = &__state; ++ iov_iter_save_state(iter, state); ++ } ++ req->result = iov_iter_count(iter); ++ ++ /* Ensure we clear previously set non-block flag */ ++ if (!force_nonblock) ++ kiocb->ki_flags &= ~IOCB_NOWAIT; ++ else ++ kiocb->ki_flags |= IOCB_NOWAIT; ++ ++ /* If the file doesn't support async, just async punt */ ++ if (force_nonblock && !io_file_supports_nowait(req, READ)) { ++ ret = io_setup_async_rw(req, iovec, inline_vecs, iter, true); ++ return ret ?: -EAGAIN; ++ } ++ ++ ppos = io_kiocb_update_pos(req); ++ ++ ret = rw_verify_area(READ, req->file, ppos, req->result); ++ if (unlikely(ret)) { ++ kfree(iovec); ++ return ret; ++ } ++ ++ ret = io_iter_do_read(req, iter); ++ ++ if (ret == -EAGAIN || (req->flags & REQ_F_REISSUE)) { ++ req->flags &= ~REQ_F_REISSUE; ++ /* IOPOLL retry should happen for io-wq threads */ ++ if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL)) ++ goto done; ++ /* no retry on NONBLOCK nor RWF_NOWAIT */ ++ if (req->flags & REQ_F_NOWAIT) ++ goto done; ++ ret = 0; ++ } else if (ret == -EIOCBQUEUED) { ++ goto out_free; ++ } else if (ret <= 0 || ret == req->result || !force_nonblock || ++ (req->flags & REQ_F_NOWAIT) || !need_read_all(req)) { ++ /* read all, failed, already did sync or don't want to retry */ ++ goto done; + } - - ktime_get_boottime_ts64(&uptime); - timens_add_boottime(&uptime); - -- idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem); -+ idle.tv_sec = div_u64_rem(idle_nsec, NSEC_PER_SEC, &rem); - idle.tv_nsec = rem; - seq_printf(m, "%lu.%02lu %lu.%02lu\n", - (unsigned long) uptime.tv_sec, -diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c -index 9a15334da2086..e5730986758fa 100644 ---- a/fs/proc/vmcore.c -+++ b/fs/proc/vmcore.c -@@ -124,9 +124,13 @@ ssize_t read_from_oldmem(char *buf, size_t count, - nr_bytes = count; - - /* If pfn is not ram, return zeros for sparse dump files */ -- if (pfn_is_ram(pfn) == 0) -- memset(buf, 0, nr_bytes); -- else { -+ if (pfn_is_ram(pfn) == 0) { -+ tmp = 0; -+ if (!userbuf) -+ memset(buf, 0, nr_bytes); -+ else if (clear_user(buf, nr_bytes)) -+ tmp = -EFAULT; -+ } else { - if (encrypted) - tmp = copy_oldmem_page_encrypted(pfn, buf, - nr_bytes, -@@ -135,10 +139,10 @@ ssize_t read_from_oldmem(char *buf, size_t count, - else - tmp = copy_oldmem_page(pfn, buf, nr_bytes, - offset, userbuf); -- -- if (tmp < 0) -- return tmp; - } -+ if (tmp < 0) -+ return tmp; + - *ppos += nr_bytes; - count -= nr_bytes; - buf += nr_bytes; -diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c -index 392ef5162655b..49650e54d2f88 100644 ---- a/fs/proc_namespace.c -+++ b/fs/proc_namespace.c -@@ -80,7 +80,7 @@ static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt) - seq_puts(m, fs_infop->str); - } - -- if (mnt_user_ns(mnt) != &init_user_ns) -+ if (is_idmapped_mnt(mnt)) - seq_puts(m, ",idmapped"); - } - -diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig -index 328da35da3908..8adabde685f13 100644 ---- a/fs/pstore/Kconfig -+++ b/fs/pstore/Kconfig -@@ -173,7 +173,6 @@ config PSTORE_BLK - tristate "Log panic/oops to a block device" - depends on PSTORE - depends on BLOCK -- depends on BROKEN - select PSTORE_ZONE - default n - help -diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c -index 04ce58c939a0b..6093088de49fd 100644 ---- a/fs/pstore/blk.c -+++ b/fs/pstore/blk.c -@@ -311,7 +311,7 @@ static int __init __best_effort_init(void) - if (ret) - kfree(best_effort_dev); - else -- pr_info("attached %s (%zu) (no dedicated panic_write!)\n", -+ pr_info("attached %s (%lu) (no dedicated panic_write!)\n", - blkdev, best_effort_dev->zone.total_size); - - return ret; -diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c -index b9614db48b1de..ad96ba97d8f97 100644 ---- a/fs/pstore/platform.c -+++ b/fs/pstore/platform.c -@@ -143,21 +143,22 @@ static void pstore_timer_kick(void) - mod_timer(&pstore_timer, jiffies + msecs_to_jiffies(pstore_update_ms)); - } - --/* -- * Should pstore_dump() wait for a concurrent pstore_dump()? If -- * not, the current pstore_dump() will report a failure to dump -- * and return. -- */ --static bool pstore_cannot_wait(enum kmsg_dump_reason reason) -+static bool pstore_cannot_block_path(enum kmsg_dump_reason reason) - { -- /* In NMI path, pstore shouldn't block regardless of reason. */ + /* -+ * In case of NMI path, pstore shouldn't be blocked -+ * regardless of reason. ++ * Don't depend on the iter state matching what was consumed, or being ++ * untouched in case of error. Restore it and we'll advance it ++ * manually if we need to. + */ - if (in_nmi()) - return true; - - switch (reason) { - /* In panic case, other cpus are stopped by smp_send_stop(). */ - case KMSG_DUMP_PANIC: -- /* Emergency restart shouldn't be blocked. */ ++ iov_iter_restore(iter, state); ++ ++ ret2 = io_setup_async_rw(req, iovec, inline_vecs, iter, true); ++ if (ret2) ++ return ret2; ++ ++ iovec = NULL; ++ rw = req->async_data; + /* -+ * Emergency restart shouldn't be blocked by spinning on -+ * pstore_info::buf_lock. ++ * Now use our persistent iterator and state, if we aren't already. ++ * We've restored and mapped the iter to match. + */ - case KMSG_DUMP_EMERG: - return true; - default: -@@ -389,21 +390,19 @@ static void pstore_dump(struct kmsg_dumper *dumper, - unsigned long total = 0; - const char *why; - unsigned int part = 1; -+ unsigned long flags = 0; - int ret; - - why = kmsg_dump_reason_str(reason); - -- if (down_trylock(&psinfo->buf_lock)) { -- /* Failed to acquire lock: give up if we cannot wait. */ -- if (pstore_cannot_wait(reason)) { -- pr_err("dump skipped in %s path: may corrupt error record\n", -- in_nmi() ? "NMI" : why); -- return; -- } -- if (down_interruptible(&psinfo->buf_lock)) { -- pr_err("could not grab semaphore?!\n"); -+ if (pstore_cannot_block_path(reason)) { -+ if (!spin_trylock_irqsave(&psinfo->buf_lock, flags)) { -+ pr_err("dump skipped in %s path because of concurrent dump\n", -+ in_nmi() ? "NMI" : why); - return; - } ++ if (iter != &rw->iter) { ++ iter = &rw->iter; ++ state = &rw->iter_state; ++ } ++ ++ do { ++ /* ++ * We end up here because of a partial read, either from ++ * above or inside this loop. Advance the iter by the bytes ++ * that were consumed. ++ */ ++ iov_iter_advance(iter, ret); ++ if (!iov_iter_count(iter)) ++ break; ++ rw->bytes_done += ret; ++ iov_iter_save_state(iter, state); ++ ++ /* if we can retry, do so with the callbacks armed */ ++ if (!io_rw_should_retry(req)) { ++ kiocb->ki_flags &= ~IOCB_WAITQ; ++ return -EAGAIN; ++ } ++ ++ req->result = iov_iter_count(iter); ++ /* ++ * Now retry read with the IOCB_WAITQ parts set in the iocb. If ++ * we get -EIOCBQUEUED, then we'll get a notification when the ++ * desired page gets unlocked. We can also get a partial read ++ * here, and if we do, then just retry at the new offset. ++ */ ++ ret = io_iter_do_read(req, iter); ++ if (ret == -EIOCBQUEUED) ++ return 0; ++ /* we got some bytes, but not all. retry. */ ++ kiocb->ki_flags &= ~IOCB_WAITQ; ++ iov_iter_restore(iter, state); ++ } while (ret > 0); ++done: ++ kiocb_done(kiocb, ret, issue_flags); ++out_free: ++ /* it's faster to check here then delegate to kfree */ ++ if (iovec) ++ kfree(iovec); ++ return 0; ++} ++ ++static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) ++{ ++ if (unlikely(!(req->file->f_mode & FMODE_WRITE))) ++ return -EBADF; ++ return io_prep_rw(req, sqe, WRITE); ++} ++ ++static int io_write(struct io_kiocb *req, unsigned int issue_flags) ++{ ++ struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; ++ struct kiocb *kiocb = &req->rw.kiocb; ++ struct iov_iter __iter, *iter = &__iter; ++ struct io_async_rw *rw = req->async_data; ++ bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; ++ struct iov_iter_state __state, *state; ++ ssize_t ret, ret2; ++ loff_t *ppos; ++ ++ if (rw) { ++ iter = &rw->iter; ++ state = &rw->iter_state; ++ iov_iter_restore(iter, state); ++ iovec = NULL; + } else { -+ spin_lock_irqsave(&psinfo->buf_lock, flags); - } - - kmsg_dump_rewind(&iter); -@@ -467,8 +466,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, - total += record.size; - part++; - } -- -- up(&psinfo->buf_lock); -+ spin_unlock_irqrestore(&psinfo->buf_lock, flags); - } - - static struct kmsg_dumper pstore_dumper = { -@@ -594,7 +592,7 @@ int pstore_register(struct pstore_info *psi) - psi->write_user = pstore_write_user_compat; - psinfo = psi; - mutex_init(&psinfo->read_mutex); -- sema_init(&psinfo->buf_lock, 1); -+ spin_lock_init(&psinfo->buf_lock); - - if (psi->flags & PSTORE_FLAGS_DMESG) - allocate_buf_for_compression(); -diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c -index 22d904bde6ab9..09d1307959d08 100644 ---- a/fs/quota/dquot.c -+++ b/fs/quota/dquot.c -@@ -79,6 +79,7 @@ - #include <linux/capability.h> - #include <linux/quotaops.h> - #include <linux/blkdev.h> -+#include <linux/sched/mm.h> - #include "../internal.h" /* ugh */ - - #include <linux/uaccess.h> -@@ -425,9 +426,11 @@ EXPORT_SYMBOL(mark_info_dirty); - int dquot_acquire(struct dquot *dquot) - { - int ret = 0, ret2 = 0; -+ unsigned int memalloc; - struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); - - mutex_lock(&dquot->dq_lock); -+ memalloc = memalloc_nofs_save(); - if (!test_bit(DQ_READ_B, &dquot->dq_flags)) { - ret = dqopt->ops[dquot->dq_id.type]->read_dqblk(dquot); - if (ret < 0) -@@ -458,6 +461,7 @@ int dquot_acquire(struct dquot *dquot) - smp_mb__before_atomic(); - set_bit(DQ_ACTIVE_B, &dquot->dq_flags); - out_iolock: -+ memalloc_nofs_restore(memalloc); - mutex_unlock(&dquot->dq_lock); - return ret; - } -@@ -469,9 +473,11 @@ EXPORT_SYMBOL(dquot_acquire); - int dquot_commit(struct dquot *dquot) - { - int ret = 0; -+ unsigned int memalloc; - struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); - - mutex_lock(&dquot->dq_lock); -+ memalloc = memalloc_nofs_save(); - if (!clear_dquot_dirty(dquot)) - goto out_lock; - /* Inactive dquot can be only if there was error during read/init -@@ -481,6 +487,7 @@ int dquot_commit(struct dquot *dquot) - else - ret = -EIO; - out_lock: -+ memalloc_nofs_restore(memalloc); - mutex_unlock(&dquot->dq_lock); - return ret; - } -@@ -492,9 +499,11 @@ EXPORT_SYMBOL(dquot_commit); - int dquot_release(struct dquot *dquot) - { - int ret = 0, ret2 = 0; -+ unsigned int memalloc; - struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); - - mutex_lock(&dquot->dq_lock); -+ memalloc = memalloc_nofs_save(); - /* Check whether we are not racing with some other dqget() */ - if (dquot_is_busy(dquot)) - goto out_dqlock; -@@ -510,6 +519,7 @@ int dquot_release(struct dquot *dquot) - } - clear_bit(DQ_ACTIVE_B, &dquot->dq_flags); - out_dqlock: -+ memalloc_nofs_restore(memalloc); - mutex_unlock(&dquot->dq_lock); - return ret; - } -@@ -690,9 +700,14 @@ int dquot_quota_sync(struct super_block *sb, int type) - /* This is not very clever (and fast) but currently I don't know about - * any other simple way of getting quota data to disk and we must get - * them there for userspace to be visible... */ -- if (sb->s_op->sync_fs) -- sb->s_op->sync_fs(sb, 1); -- sync_blockdev(sb->s_bdev); -+ if (sb->s_op->sync_fs) { -+ ret = sb->s_op->sync_fs(sb, 1); -+ if (ret) ++ ret = io_import_iovec(WRITE, req, &iovec, iter, !force_nonblock); ++ if (ret < 0) + return ret; ++ state = &__state; ++ iov_iter_save_state(iter, state); + } -+ ret = sync_blockdev(sb->s_bdev); -+ if (ret) ++ req->result = iov_iter_count(iter); ++ ++ /* Ensure we clear previously set non-block flag */ ++ if (!force_nonblock) ++ kiocb->ki_flags &= ~IOCB_NOWAIT; ++ else ++ kiocb->ki_flags |= IOCB_NOWAIT; ++ ++ /* If the file doesn't support async, just async punt */ ++ if (force_nonblock && !io_file_supports_nowait(req, WRITE)) ++ goto copy_iov; ++ ++ /* file path doesn't support NOWAIT for non-direct_IO */ ++ if (force_nonblock && !(kiocb->ki_flags & IOCB_DIRECT) && ++ (req->flags & REQ_F_ISREG)) ++ goto copy_iov; ++ ++ ppos = io_kiocb_update_pos(req); ++ ++ ret = rw_verify_area(WRITE, req->file, ppos, req->result); ++ if (unlikely(ret)) ++ goto out_free; ++ ++ /* ++ * Open-code file_start_write here to grab freeze protection, ++ * which will be released by another thread in ++ * io_complete_rw(). Fool lockdep by telling it the lock got ++ * released so that it doesn't complain about the held lock when ++ * we return to userspace. ++ */ ++ if (req->flags & REQ_F_ISREG) { ++ sb_start_write(file_inode(req->file)->i_sb); ++ __sb_writers_release(file_inode(req->file)->i_sb, ++ SB_FREEZE_WRITE); ++ } ++ kiocb->ki_flags |= IOCB_WRITE; ++ ++ if (req->file->f_op->write_iter) ++ ret2 = call_write_iter(req->file, kiocb, iter); ++ else if (req->file->f_op->write) ++ ret2 = loop_rw_iter(WRITE, req, iter); ++ else ++ ret2 = -EINVAL; ++ ++ if (req->flags & REQ_F_REISSUE) { ++ req->flags &= ~REQ_F_REISSUE; ++ ret2 = -EAGAIN; ++ } ++ ++ /* ++ * Raw bdev writes will return -EOPNOTSUPP for IOCB_NOWAIT. Just ++ * retry them without IOCB_NOWAIT. ++ */ ++ if (ret2 == -EOPNOTSUPP && (kiocb->ki_flags & IOCB_NOWAIT)) ++ ret2 = -EAGAIN; ++ /* no retry on NONBLOCK nor RWF_NOWAIT */ ++ if (ret2 == -EAGAIN && (req->flags & REQ_F_NOWAIT)) ++ goto done; ++ if (!force_nonblock || ret2 != -EAGAIN) { ++ /* IOPOLL retry should happen for io-wq threads */ ++ if ((req->ctx->flags & IORING_SETUP_IOPOLL) && ret2 == -EAGAIN) ++ goto copy_iov; ++done: ++ kiocb_done(kiocb, ret2, issue_flags); ++ } else { ++copy_iov: ++ iov_iter_restore(iter, state); ++ ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false); ++ if (!ret) { ++ if (kiocb->ki_flags & IOCB_WRITE) ++ kiocb_end_write(req); ++ return -EAGAIN; ++ } + return ret; - - /* - * Now when everything is written we can discard the pagecache so -diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c -index d3e995e1046fb..7e65d67de9f33 100644 ---- a/fs/quota/quota_tree.c -+++ b/fs/quota/quota_tree.c -@@ -71,6 +71,35 @@ static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf) - return ret; - } - -+static inline int do_check_range(struct super_block *sb, const char *val_name, -+ uint val, uint min_val, uint max_val) ++ } ++out_free: ++ /* it's reportedly faster than delegating the null check to kfree() */ ++ if (iovec) ++ kfree(iovec); ++ return ret; ++} ++ ++static int io_renameat_prep(struct io_kiocb *req, ++ const struct io_uring_sqe *sqe) ++{ ++ struct io_rename *ren = &req->rename; ++ const char __user *oldf, *newf; ++ ++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) ++ return -EINVAL; ++ if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in) ++ return -EINVAL; ++ if (unlikely(req->flags & REQ_F_FIXED_FILE)) ++ return -EBADF; ++ ++ ren->old_dfd = READ_ONCE(sqe->fd); ++ oldf = u64_to_user_ptr(READ_ONCE(sqe->addr)); ++ newf = u64_to_user_ptr(READ_ONCE(sqe->addr2)); ++ ren->new_dfd = READ_ONCE(sqe->len); ++ ren->flags = READ_ONCE(sqe->rename_flags); ++ ++ ren->oldpath = getname(oldf); ++ if (IS_ERR(ren->oldpath)) ++ return PTR_ERR(ren->oldpath); ++ ++ ren->newpath = getname(newf); ++ if (IS_ERR(ren->newpath)) { ++ putname(ren->oldpath); ++ return PTR_ERR(ren->newpath); ++ } ++ ++ req->flags |= REQ_F_NEED_CLEANUP; ++ return 0; ++} ++ ++static int io_renameat(struct io_kiocb *req, unsigned int issue_flags) ++{ ++ struct io_rename *ren = &req->rename; ++ int ret; ++ ++ if (issue_flags & IO_URING_F_NONBLOCK) ++ return -EAGAIN; ++ ++ ret = do_renameat2(ren->old_dfd, ren->oldpath, ren->new_dfd, ++ ren->newpath, ren->flags); ++ ++ req->flags &= ~REQ_F_NEED_CLEANUP; ++ if (ret < 0) ++ req_set_fail(req); ++ io_req_complete(req, ret); ++ return 0; ++} ++ ++static int io_unlinkat_prep(struct io_kiocb *req, ++ const struct io_uring_sqe *sqe) ++{ ++ struct io_unlink *un = &req->unlink; ++ const char __user *fname; ++ ++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) ++ return -EINVAL; ++ if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index || ++ sqe->splice_fd_in) ++ return -EINVAL; ++ if (unlikely(req->flags & REQ_F_FIXED_FILE)) ++ return -EBADF; ++ ++ un->dfd = READ_ONCE(sqe->fd); ++ ++ un->flags = READ_ONCE(sqe->unlink_flags); ++ if (un->flags & ~AT_REMOVEDIR) ++ return -EINVAL; ++ ++ fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); ++ un->filename = getname(fname); ++ if (IS_ERR(un->filename)) ++ return PTR_ERR(un->filename); ++ ++ req->flags |= REQ_F_NEED_CLEANUP; ++ return 0; ++} ++ ++static int io_unlinkat(struct io_kiocb *req, unsigned int issue_flags) ++{ ++ struct io_unlink *un = &req->unlink; ++ int ret; ++ ++ if (issue_flags & IO_URING_F_NONBLOCK) ++ return -EAGAIN; ++ ++ if (un->flags & AT_REMOVEDIR) ++ ret = do_rmdir(un->dfd, un->filename); ++ else ++ ret = do_unlinkat(un->dfd, un->filename); ++ ++ req->flags &= ~REQ_F_NEED_CLEANUP; ++ if (ret < 0) ++ req_set_fail(req); ++ io_req_complete(req, ret); ++ return 0; ++} ++ ++static int io_mkdirat_prep(struct io_kiocb *req, ++ const struct io_uring_sqe *sqe) ++{ ++ struct io_mkdir *mkd = &req->mkdir; ++ const char __user *fname; ++ ++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) ++ return -EINVAL; ++ if (sqe->ioprio || sqe->off || sqe->rw_flags || sqe->buf_index || ++ sqe->splice_fd_in) ++ return -EINVAL; ++ if (unlikely(req->flags & REQ_F_FIXED_FILE)) ++ return -EBADF; ++ ++ mkd->dfd = READ_ONCE(sqe->fd); ++ mkd->mode = READ_ONCE(sqe->len); ++ ++ fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); ++ mkd->filename = getname(fname); ++ if (IS_ERR(mkd->filename)) ++ return PTR_ERR(mkd->filename); ++ ++ req->flags |= REQ_F_NEED_CLEANUP; ++ return 0; ++} ++ ++static int io_mkdirat(struct io_kiocb *req, int issue_flags) ++{ ++ struct io_mkdir *mkd = &req->mkdir; ++ int ret; ++ ++ if (issue_flags & IO_URING_F_NONBLOCK) ++ return -EAGAIN; ++ ++ ret = do_mkdirat(mkd->dfd, mkd->filename, mkd->mode); ++ ++ req->flags &= ~REQ_F_NEED_CLEANUP; ++ if (ret < 0) ++ req_set_fail(req); ++ io_req_complete(req, ret); ++ return 0; ++} ++ ++static int io_symlinkat_prep(struct io_kiocb *req, ++ const struct io_uring_sqe *sqe) ++{ ++ struct io_symlink *sl = &req->symlink; ++ const char __user *oldpath, *newpath; ++ ++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) ++ return -EINVAL; ++ if (sqe->ioprio || sqe->len || sqe->rw_flags || sqe->buf_index || ++ sqe->splice_fd_in) ++ return -EINVAL; ++ if (unlikely(req->flags & REQ_F_FIXED_FILE)) ++ return -EBADF; ++ ++ sl->new_dfd = READ_ONCE(sqe->fd); ++ oldpath = u64_to_user_ptr(READ_ONCE(sqe->addr)); ++ newpath = u64_to_user_ptr(READ_ONCE(sqe->addr2)); ++ ++ sl->oldpath = getname(oldpath); ++ if (IS_ERR(sl->oldpath)) ++ return PTR_ERR(sl->oldpath); ++ ++ sl->newpath = getname(newpath); ++ if (IS_ERR(sl->newpath)) { ++ putname(sl->oldpath); ++ return PTR_ERR(sl->newpath); ++ } ++ ++ req->flags |= REQ_F_NEED_CLEANUP; ++ return 0; ++} ++ ++static int io_symlinkat(struct io_kiocb *req, int issue_flags) ++{ ++ struct io_symlink *sl = &req->symlink; ++ int ret; ++ ++ if (issue_flags & IO_URING_F_NONBLOCK) ++ return -EAGAIN; ++ ++ ret = do_symlinkat(sl->oldpath, sl->new_dfd, sl->newpath); ++ ++ req->flags &= ~REQ_F_NEED_CLEANUP; ++ if (ret < 0) ++ req_set_fail(req); ++ io_req_complete(req, ret); ++ return 0; ++} ++ ++static int io_linkat_prep(struct io_kiocb *req, ++ const struct io_uring_sqe *sqe) ++{ ++ struct io_hardlink *lnk = &req->hardlink; ++ const char __user *oldf, *newf; ++ ++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) ++ return -EINVAL; ++ if (sqe->ioprio || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in) ++ return -EINVAL; ++ if (unlikely(req->flags & REQ_F_FIXED_FILE)) ++ return -EBADF; ++ ++ lnk->old_dfd = READ_ONCE(sqe->fd); ++ lnk->new_dfd = READ_ONCE(sqe->len); ++ oldf = u64_to_user_ptr(READ_ONCE(sqe->addr)); ++ newf = u64_to_user_ptr(READ_ONCE(sqe->addr2)); ++ lnk->flags = READ_ONCE(sqe->hardlink_flags); ++ ++ lnk->oldpath = getname(oldf); ++ if (IS_ERR(lnk->oldpath)) ++ return PTR_ERR(lnk->oldpath); ++ ++ lnk->newpath = getname(newf); ++ if (IS_ERR(lnk->newpath)) { ++ putname(lnk->oldpath); ++ return PTR_ERR(lnk->newpath); ++ } ++ ++ req->flags |= REQ_F_NEED_CLEANUP; ++ return 0; ++} ++ ++static int io_linkat(struct io_kiocb *req, int issue_flags) ++{ ++ struct io_hardlink *lnk = &req->hardlink; ++ int ret; ++ ++ if (issue_flags & IO_URING_F_NONBLOCK) ++ return -EAGAIN; ++ ++ ret = do_linkat(lnk->old_dfd, lnk->oldpath, lnk->new_dfd, ++ lnk->newpath, lnk->flags); ++ ++ req->flags &= ~REQ_F_NEED_CLEANUP; ++ if (ret < 0) ++ req_set_fail(req); ++ io_req_complete(req, ret); ++ return 0; ++} ++ ++static int io_shutdown_prep(struct io_kiocb *req, ++ const struct io_uring_sqe *sqe) ++{ ++#if defined(CONFIG_NET) ++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) ++ return -EINVAL; ++ if (unlikely(sqe->ioprio || sqe->off || sqe->addr || sqe->rw_flags || ++ sqe->buf_index || sqe->splice_fd_in)) ++ return -EINVAL; ++ ++ req->shutdown.how = READ_ONCE(sqe->len); ++ return 0; ++#else ++ return -EOPNOTSUPP; ++#endif ++} ++ ++static int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) ++{ ++#if defined(CONFIG_NET) ++ struct socket *sock; ++ int ret; ++ ++ if (issue_flags & IO_URING_F_NONBLOCK) ++ return -EAGAIN; ++ ++ sock = sock_from_file(req->file); ++ if (unlikely(!sock)) ++ return -ENOTSOCK; ++ ++ ret = __sys_shutdown_sock(sock, req->shutdown.how); ++ if (ret < 0) ++ req_set_fail(req); ++ io_req_complete(req, ret); ++ return 0; ++#else ++ return -EOPNOTSUPP; ++#endif ++} ++ ++static int __io_splice_prep(struct io_kiocb *req, ++ const struct io_uring_sqe *sqe) ++{ ++ struct io_splice *sp = &req->splice; ++ unsigned int valid_flags = SPLICE_F_FD_IN_FIXED | SPLICE_F_ALL; ++ ++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) ++ return -EINVAL; ++ ++ sp->len = READ_ONCE(sqe->len); ++ sp->flags = READ_ONCE(sqe->splice_flags); ++ if (unlikely(sp->flags & ~valid_flags)) ++ return -EINVAL; ++ sp->splice_fd_in = READ_ONCE(sqe->splice_fd_in); ++ return 0; ++} ++ ++static int io_tee_prep(struct io_kiocb *req, ++ const struct io_uring_sqe *sqe) ++{ ++ if (READ_ONCE(sqe->splice_off_in) || READ_ONCE(sqe->off)) ++ return -EINVAL; ++ return __io_splice_prep(req, sqe); ++} ++ ++static int io_tee(struct io_kiocb *req, unsigned int issue_flags) ++{ ++ struct io_splice *sp = &req->splice; ++ struct file *out = sp->file_out; ++ unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED; ++ struct file *in; ++ long ret = 0; ++ ++ if (issue_flags & IO_URING_F_NONBLOCK) ++ return -EAGAIN; ++ ++ in = io_file_get(req->ctx, req, sp->splice_fd_in, ++ (sp->flags & SPLICE_F_FD_IN_FIXED), issue_flags); ++ if (!in) { ++ ret = -EBADF; ++ goto done; ++ } ++ ++ if (sp->len) ++ ret = do_tee(in, out, sp->len, flags); ++ ++ if (!(sp->flags & SPLICE_F_FD_IN_FIXED)) ++ io_put_file(in); ++done: ++ if (ret != sp->len) ++ req_set_fail(req); ++ io_req_complete(req, ret); ++ return 0; ++} ++ ++static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) ++{ ++ struct io_splice *sp = &req->splice; ++ ++ sp->off_in = READ_ONCE(sqe->splice_off_in); ++ sp->off_out = READ_ONCE(sqe->off); ++ return __io_splice_prep(req, sqe); ++} ++ ++static int io_splice(struct io_kiocb *req, unsigned int issue_flags) ++{ ++ struct io_splice *sp = &req->splice; ++ struct file *out = sp->file_out; ++ unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED; ++ loff_t *poff_in, *poff_out; ++ struct file *in; ++ long ret = 0; ++ ++ if (issue_flags & IO_URING_F_NONBLOCK) ++ return -EAGAIN; ++ ++ in = io_file_get(req->ctx, req, sp->splice_fd_in, ++ (sp->flags & SPLICE_F_FD_IN_FIXED), issue_flags); ++ if (!in) { ++ ret = -EBADF; ++ goto done; ++ } ++ ++ poff_in = (sp->off_in == -1) ? NULL : &sp->off_in; ++ poff_out = (sp->off_out == -1) ? NULL : &sp->off_out; ++ ++ if (sp->len) ++ ret = do_splice(in, poff_in, out, poff_out, sp->len, flags); ++ ++ if (!(sp->flags & SPLICE_F_FD_IN_FIXED)) ++ io_put_file(in); ++done: ++ if (ret != sp->len) ++ req_set_fail(req); ++ io_req_complete(req, ret); ++ return 0; ++} ++ ++/* ++ * IORING_OP_NOP just posts a completion event, nothing else. ++ */ ++static int io_nop(struct io_kiocb *req, unsigned int issue_flags) ++{ ++ struct io_ring_ctx *ctx = req->ctx; ++ ++ if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) ++ return -EINVAL; ++ ++ __io_req_complete(req, issue_flags, 0, 0); ++ return 0; ++} ++ ++static int io_fsync_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ -+ if (val < min_val || val > max_val) { -+ quota_error(sb, "Getting %s %u out of range %u-%u", -+ val_name, val, min_val, max_val); -+ return -EUCLEAN; ++ struct io_ring_ctx *ctx = req->ctx; ++ ++ if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) ++ return -EINVAL; ++ if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index || ++ sqe->splice_fd_in)) ++ return -EINVAL; ++ ++ req->sync.flags = READ_ONCE(sqe->fsync_flags); ++ if (unlikely(req->sync.flags & ~IORING_FSYNC_DATASYNC)) ++ return -EINVAL; ++ ++ req->sync.off = READ_ONCE(sqe->off); ++ req->sync.len = READ_ONCE(sqe->len); ++ return 0; ++} ++ ++static int io_fsync(struct io_kiocb *req, unsigned int issue_flags) ++{ ++ loff_t end = req->sync.off + req->sync.len; ++ int ret; ++ ++ /* fsync always requires a blocking context */ ++ if (issue_flags & IO_URING_F_NONBLOCK) ++ return -EAGAIN; ++ ++ ret = vfs_fsync_range(req->file, req->sync.off, ++ end > 0 ? end : LLONG_MAX, ++ req->sync.flags & IORING_FSYNC_DATASYNC); ++ if (ret < 0) ++ req_set_fail(req); ++ io_req_complete(req, ret); ++ return 0; ++} ++ ++static int io_fallocate_prep(struct io_kiocb *req, ++ const struct io_uring_sqe *sqe) ++{ ++ if (sqe->ioprio || sqe->buf_index || sqe->rw_flags || ++ sqe->splice_fd_in) ++ return -EINVAL; ++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) ++ return -EINVAL; ++ ++ req->sync.off = READ_ONCE(sqe->off); ++ req->sync.len = READ_ONCE(sqe->addr); ++ req->sync.mode = READ_ONCE(sqe->len); ++ return 0; ++} ++ ++static int io_fallocate(struct io_kiocb *req, unsigned int issue_flags) ++{ ++ int ret; ++ ++ /* fallocate always requiring blocking context */ ++ if (issue_flags & IO_URING_F_NONBLOCK) ++ return -EAGAIN; ++ ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off, ++ req->sync.len); ++ if (ret < 0) ++ req_set_fail(req); ++ else ++ fsnotify_modify(req->file); ++ io_req_complete(req, ret); ++ return 0; ++} ++ ++static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) ++{ ++ const char __user *fname; ++ int ret; ++ ++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) ++ return -EINVAL; ++ if (unlikely(sqe->ioprio || sqe->buf_index)) ++ return -EINVAL; ++ if (unlikely(req->flags & REQ_F_FIXED_FILE)) ++ return -EBADF; ++ ++ /* open.how should be already initialised */ ++ if (!(req->open.how.flags & O_PATH) && force_o_largefile()) ++ req->open.how.flags |= O_LARGEFILE; ++ ++ req->open.dfd = READ_ONCE(sqe->fd); ++ fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); ++ req->open.filename = getname(fname); ++ if (IS_ERR(req->open.filename)) { ++ ret = PTR_ERR(req->open.filename); ++ req->open.filename = NULL; ++ return ret; + } + ++ req->open.file_slot = READ_ONCE(sqe->file_index); ++ if (req->open.file_slot && (req->open.how.flags & O_CLOEXEC)) ++ return -EINVAL; ++ ++ req->open.nofile = rlimit(RLIMIT_NOFILE); ++ req->flags |= REQ_F_NEED_CLEANUP; + return 0; +} + -+static int check_dquot_block_header(struct qtree_mem_dqinfo *info, -+ struct qt_disk_dqdbheader *dh) ++static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ -+ int err = 0; ++ u64 mode = READ_ONCE(sqe->len); ++ u64 flags = READ_ONCE(sqe->open_flags); + -+ err = do_check_range(info->dqi_sb, "dqdh_next_free", -+ le32_to_cpu(dh->dqdh_next_free), 0, -+ info->dqi_blocks - 1); -+ if (err) -+ return err; -+ err = do_check_range(info->dqi_sb, "dqdh_prev_free", -+ le32_to_cpu(dh->dqdh_prev_free), 0, -+ info->dqi_blocks - 1); ++ req->open.how = build_open_how(flags, mode); ++ return __io_openat_prep(req, sqe); ++} + -+ return err; ++static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) ++{ ++ struct open_how __user *how; ++ size_t len; ++ int ret; ++ ++ how = u64_to_user_ptr(READ_ONCE(sqe->addr2)); ++ len = READ_ONCE(sqe->len); ++ if (len < OPEN_HOW_SIZE_VER0) ++ return -EINVAL; ++ ++ ret = copy_struct_from_user(&req->open.how, sizeof(req->open.how), how, ++ len); ++ if (ret) ++ return ret; ++ ++ return __io_openat_prep(req, sqe); +} + - /* Remove empty block from list and return it */ - static int get_free_dqblk(struct qtree_mem_dqinfo *info) - { -@@ -85,6 +114,9 @@ static int get_free_dqblk(struct qtree_mem_dqinfo *info) - ret = read_blk(info, blk, buf); - if (ret < 0) - goto out_buf; -+ ret = check_dquot_block_header(info, dh); -+ if (ret) -+ goto out_buf; - info->dqi_free_blk = le32_to_cpu(dh->dqdh_next_free); - } - else { -@@ -232,6 +264,9 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info, - *err = read_blk(info, blk, buf); - if (*err < 0) - goto out_buf; -+ *err = check_dquot_block_header(info, dh); -+ if (*err) -+ goto out_buf; - } else { - blk = get_free_dqblk(info); - if ((int)blk < 0) { -@@ -414,6 +449,7 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot, - quota_error(dquot->dq_sb, "Quota structure has offset to " - "other block (%u) than it should (%u)", blk, - (uint)(dquot->dq_off >> info->dqi_blocksize_bits)); -+ ret = -EIO; - goto out_buf; - } - ret = read_blk(info, blk, buf); -@@ -423,6 +459,9 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot, - goto out_buf; - } - dh = (struct qt_disk_dqdbheader *)buf; -+ ret = check_dquot_block_header(info, dh); ++static int io_openat2(struct io_kiocb *req, unsigned int issue_flags) ++{ ++ struct open_flags op; ++ struct file *file; ++ bool resolve_nonblock, nonblock_set; ++ bool fixed = !!req->open.file_slot; ++ int ret; ++ ++ ret = build_open_flags(&req->open.how, &op); + if (ret) -+ goto out_buf; - le16_add_cpu(&dh->dqdh_entries, -1); - if (!le16_to_cpu(dh->dqdh_entries)) { /* Block got free? */ - ret = remove_free_dqentry(info, buf, blk); -@@ -479,6 +518,13 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot, - goto out_buf; - } - newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]); -+ if (newblk < QT_TREEOFF || newblk >= info->dqi_blocks) { -+ quota_error(dquot->dq_sb, "Getting block too big (%u >= %u)", -+ newblk, info->dqi_blocks); -+ ret = -EUCLEAN; -+ goto out_buf; ++ goto err; ++ nonblock_set = op.open_flag & O_NONBLOCK; ++ resolve_nonblock = req->open.how.resolve & RESOLVE_CACHED; ++ if (issue_flags & IO_URING_F_NONBLOCK) { ++ /* ++ * Don't bother trying for O_TRUNC, O_CREAT, or O_TMPFILE open, ++ * it'll always -EAGAIN ++ */ ++ if (req->open.how.flags & (O_TRUNC | O_CREAT | O_TMPFILE)) ++ return -EAGAIN; ++ op.lookup_flags |= LOOKUP_CACHED; ++ op.open_flag |= O_NONBLOCK; + } + - if (depth == info->dqi_qtree_depth - 1) { - ret = free_dqentry(info, dquot, newblk); - newblk = 0; -@@ -578,6 +624,13 @@ static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info, - blk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]); - if (!blk) /* No reference? */ - goto out_buf; -+ if (blk < QT_TREEOFF || blk >= info->dqi_blocks) { -+ quota_error(dquot->dq_sb, "Getting block too big (%u >= %u)", -+ blk, info->dqi_blocks); -+ ret = -EUCLEAN; -+ goto out_buf; ++ if (!fixed) { ++ ret = __get_unused_fd_flags(req->open.how.flags, req->open.nofile); ++ if (ret < 0) ++ goto err; + } + - if (depth < info->dqi_qtree_depth - 1) - ret = find_tree_dqentry(info, dquot, blk, depth+1); - else -diff --git a/fs/read_write.c b/fs/read_write.c -index af057c57bdc64..8d3ec975514d0 100644 ---- a/fs/read_write.c -+++ b/fs/read_write.c -@@ -1250,6 +1250,9 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, - count, fl); - file_end_write(out.file); - } else { -+ if (out.file->f_flags & O_NONBLOCK) -+ fl |= SPLICE_F_NONBLOCK; -+ - retval = splice_file_to_pipe(in.file, opipe, &pos, count, fl); - } - -@@ -1384,28 +1387,6 @@ ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, - } - EXPORT_SYMBOL(generic_copy_file_range); - --static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, -- struct file *file_out, loff_t pos_out, -- size_t len, unsigned int flags) --{ -- /* -- * Although we now allow filesystems to handle cross sb copy, passing -- * a file of the wrong filesystem type to filesystem driver can result -- * in an attempt to dereference the wrong type of ->private_data, so -- * avoid doing that until we really have a good reason. NFS defines -- * several different file_system_type structures, but they all end up -- * using the same ->copy_file_range() function pointer. -- */ -- if (file_out->f_op->copy_file_range && -- file_out->f_op->copy_file_range == file_in->f_op->copy_file_range) -- return file_out->f_op->copy_file_range(file_in, pos_in, -- file_out, pos_out, -- len, flags); -- -- return generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, -- flags); --} -- - /* - * Performs necessary checks before doing a file copy - * -@@ -1427,6 +1408,24 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in, - if (ret) - return ret; - -+ /* -+ * We allow some filesystems to handle cross sb copy, but passing -+ * a file of the wrong filesystem type to filesystem driver can result -+ * in an attempt to dereference the wrong type of ->private_data, so -+ * avoid doing that until we really have a good reason. -+ * -+ * nfs and cifs define several different file_system_type structures -+ * and several different sets of file_operations, but they all end up -+ * using the same ->copy_file_range() function pointer. -+ */ -+ if (file_out->f_op->copy_file_range) { -+ if (file_in->f_op->copy_file_range != -+ file_out->f_op->copy_file_range) -+ return -EXDEV; -+ } else if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) { -+ return -EXDEV; -+ } ++ file = do_filp_open(req->open.dfd, req->open.filename, &op); ++ if (IS_ERR(file)) { ++ /* ++ * We could hang on to this 'fd' on retrying, but seems like ++ * marginal gain for something that is now known to be a slower ++ * path. So just put it, and we'll get a new one when we retry. ++ */ ++ if (!fixed) ++ put_unused_fd(ret); + - /* Don't touch certain kinds of inodes */ - if (IS_IMMUTABLE(inode_out)) - return -EPERM; -@@ -1492,26 +1491,41 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, - file_start_write(file_out); - - /* -- * Try cloning first, this is supported by more file systems, and -- * more efficient if both clone and copy are supported (e.g. NFS). -+ * Cloning is supported by more file systems, so we implement copy on -+ * same sb using clone, but for filesystems where both clone and copy -+ * are supported (e.g. nfs,cifs), we only call the copy method. - */ -+ if (file_out->f_op->copy_file_range) { -+ ret = file_out->f_op->copy_file_range(file_in, pos_in, -+ file_out, pos_out, -+ len, flags); -+ goto done; ++ ret = PTR_ERR(file); ++ /* only retry if RESOLVE_CACHED wasn't already set by application */ ++ if (ret == -EAGAIN && ++ (!resolve_nonblock && (issue_flags & IO_URING_F_NONBLOCK))) ++ return -EAGAIN; ++ goto err; + } + - if (file_in->f_op->remap_file_range && - file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) { -- loff_t cloned; -- -- cloned = file_in->f_op->remap_file_range(file_in, pos_in, -+ ret = file_in->f_op->remap_file_range(file_in, pos_in, - file_out, pos_out, - min_t(loff_t, MAX_RW_COUNT, len), - REMAP_FILE_CAN_SHORTEN); -- if (cloned > 0) { -- ret = cloned; -+ if (ret > 0) - goto done; -- } - } - -- ret = do_copy_file_range(file_in, pos_in, file_out, pos_out, len, -- flags); -- WARN_ON_ONCE(ret == -EOPNOTSUPP); -+ /* -+ * We can get here for same sb copy of filesystems that do not implement -+ * ->copy_file_range() in case filesystem does not support clone or in -+ * case filesystem supports clone but rejected the clone request (e.g. -+ * because it was not block aligned). -+ * -+ * In both cases, fall back to kernel copy so we are able to maintain a -+ * consistent story about which filesystems support copy_file_range() -+ * and which filesystems do not, that will allow userspace tools to -+ * make consistent desicions w.r.t using copy_file_range(). -+ */ -+ ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, -+ flags); ++ if ((issue_flags & IO_URING_F_NONBLOCK) && !nonblock_set) ++ file->f_flags &= ~O_NONBLOCK; ++ fsnotify_open(file); + - done: - if (ret > 0) { - fsnotify_access(file_in); -diff --git a/fs/remap_range.c b/fs/remap_range.c -index 6d4a9beaa0974..e69bafb96f093 100644 ---- a/fs/remap_range.c -+++ b/fs/remap_range.c -@@ -71,7 +71,8 @@ static int generic_remap_checks(struct file *file_in, loff_t pos_in, - * Otherwise, make sure the count is also block-aligned, having - * already confirmed the starting offsets' block alignment. - */ -- if (pos_in + count == size_in) { -+ if (pos_in + count == size_in && -+ (!(remap_flags & REMAP_FILE_DEDUP) || pos_out + count == size_out)) { - bcount = ALIGN(size_in, bs) - pos_in; - } else { - if (!IS_ALIGNED(count, bs)) -diff --git a/fs/select.c b/fs/select.c -index 945896d0ac9e7..5edffee1162c2 100644 ---- a/fs/select.c -+++ b/fs/select.c -@@ -458,9 +458,11 @@ get_max: - return max; - } - --#define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR) --#define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR) --#define POLLEX_SET (EPOLLPRI) -+#define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR |\ -+ EPOLLNVAL) -+#define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR |\ -+ EPOLLNVAL) -+#define POLLEX_SET (EPOLLPRI | EPOLLNVAL) - - static inline void wait_key_set(poll_table *wait, unsigned long in, - unsigned long out, unsigned long bit, -@@ -527,6 +529,7 @@ static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time) - break; - if (!(bit & all_bits)) - continue; -+ mask = EPOLLNVAL; - f = fdget(i); - if (f.file) { - wait_key_set(wait, in, out, bit, -@@ -534,34 +537,34 @@ static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time) - mask = vfs_poll(f.file, wait); - - fdput(f); -- if ((mask & POLLIN_SET) && (in & bit)) { -- res_in |= bit; -- retval++; -- wait->_qproc = NULL; -- } -- if ((mask & POLLOUT_SET) && (out & bit)) { -- res_out |= bit; -- retval++; -- wait->_qproc = NULL; -- } -- if ((mask & POLLEX_SET) && (ex & bit)) { -- res_ex |= bit; -- retval++; -- wait->_qproc = NULL; -- } -- /* got something, stop busy polling */ -- if (retval) { -- can_busy_loop = false; -- busy_flag = 0; -- -- /* -- * only remember a returned -- * POLL_BUSY_LOOP if we asked for it -- */ -- } else if (busy_flag & mask) -- can_busy_loop = true; -- - } -+ if ((mask & POLLIN_SET) && (in & bit)) { -+ res_in |= bit; -+ retval++; -+ wait->_qproc = NULL; -+ } -+ if ((mask & POLLOUT_SET) && (out & bit)) { -+ res_out |= bit; -+ retval++; -+ wait->_qproc = NULL; -+ } -+ if ((mask & POLLEX_SET) && (ex & bit)) { -+ res_ex |= bit; -+ retval++; -+ wait->_qproc = NULL; -+ } -+ /* got something, stop busy polling */ -+ if (retval) { -+ can_busy_loop = false; -+ busy_flag = 0; ++ if (!fixed) ++ fd_install(ret, file); ++ else ++ ret = io_install_fixed_file(req, file, issue_flags, ++ req->open.file_slot - 1); ++err: ++ putname(req->open.filename); ++ req->flags &= ~REQ_F_NEED_CLEANUP; ++ if (ret < 0) ++ req_set_fail(req); ++ __io_req_complete(req, issue_flags, ret, 0); ++ return 0; ++} + -+ /* -+ * only remember a returned -+ * POLL_BUSY_LOOP if we asked for it -+ */ -+ } else if (busy_flag & mask) -+ can_busy_loop = true; ++static int io_openat(struct io_kiocb *req, unsigned int issue_flags) ++{ ++ return io_openat2(req, issue_flags); ++} + - } - if (res_in) - *rinp = res_in; -diff --git a/fs/seq_file.c b/fs/seq_file.c -index 4a2cda04d3e29..b17ee4c4f618a 100644 ---- a/fs/seq_file.c -+++ b/fs/seq_file.c -@@ -947,6 +947,38 @@ struct list_head *seq_list_next(void *v, struct list_head *head, loff_t *ppos) - } - EXPORT_SYMBOL(seq_list_next); - -+struct list_head *seq_list_start_rcu(struct list_head *head, loff_t pos) ++static int io_remove_buffers_prep(struct io_kiocb *req, ++ const struct io_uring_sqe *sqe) +{ -+ struct list_head *lh; ++ struct io_provide_buf *p = &req->pbuf; ++ u64 tmp; + -+ list_for_each_rcu(lh, head) -+ if (pos-- == 0) -+ return lh; ++ if (sqe->ioprio || sqe->rw_flags || sqe->addr || sqe->len || sqe->off || ++ sqe->splice_fd_in) ++ return -EINVAL; + -+ return NULL; ++ tmp = READ_ONCE(sqe->fd); ++ if (!tmp || tmp > USHRT_MAX) ++ return -EINVAL; ++ ++ memset(p, 0, sizeof(*p)); ++ p->nbufs = tmp; ++ p->bgid = READ_ONCE(sqe->buf_group); ++ return 0; +} -+EXPORT_SYMBOL(seq_list_start_rcu); + -+struct list_head *seq_list_start_head_rcu(struct list_head *head, loff_t pos) ++static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf, ++ int bgid, unsigned nbufs) +{ -+ if (!pos) -+ return head; ++ unsigned i = 0; + -+ return seq_list_start_rcu(head, pos - 1); ++ /* shouldn't happen */ ++ if (!nbufs) ++ return 0; ++ ++ /* the head kbuf is the list itself */ ++ while (!list_empty(&buf->list)) { ++ struct io_buffer *nxt; ++ ++ nxt = list_first_entry(&buf->list, struct io_buffer, list); ++ list_del(&nxt->list); ++ kfree(nxt); ++ if (++i == nbufs) ++ return i; ++ cond_resched(); ++ } ++ i++; ++ kfree(buf); ++ xa_erase(&ctx->io_buffers, bgid); ++ ++ return i; +} -+EXPORT_SYMBOL(seq_list_start_head_rcu); + -+struct list_head *seq_list_next_rcu(void *v, struct list_head *head, -+ loff_t *ppos) ++static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags) +{ -+ struct list_head *lh; ++ struct io_provide_buf *p = &req->pbuf; ++ struct io_ring_ctx *ctx = req->ctx; ++ struct io_buffer *head; ++ int ret = 0; ++ bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; + -+ lh = list_next_rcu((struct list_head *)v); -+ ++*ppos; -+ return lh == head ? NULL : lh; ++ io_ring_submit_lock(ctx, !force_nonblock); ++ ++ lockdep_assert_held(&ctx->uring_lock); ++ ++ ret = -ENOENT; ++ head = xa_load(&ctx->io_buffers, p->bgid); ++ if (head) ++ ret = __io_remove_buffers(ctx, head, p->bgid, p->nbufs); ++ if (ret < 0) ++ req_set_fail(req); ++ ++ /* complete before unlock, IOPOLL may need the lock */ ++ __io_req_complete(req, issue_flags, ret, 0); ++ io_ring_submit_unlock(ctx, !force_nonblock); ++ return 0; +} -+EXPORT_SYMBOL(seq_list_next_rcu); + - /** - * seq_hlist_start - start an iteration of a hlist - * @head: the head of the hlist -diff --git a/fs/signalfd.c b/fs/signalfd.c -index 040e1cf905282..65ce0e72e7b95 100644 ---- a/fs/signalfd.c -+++ b/fs/signalfd.c -@@ -35,17 +35,7 @@ - - void signalfd_cleanup(struct sighand_struct *sighand) - { -- wait_queue_head_t *wqh = &sighand->signalfd_wqh; -- /* -- * The lockless check can race with remove_wait_queue() in progress, -- * but in this case its caller should run under rcu_read_lock() and -- * sighand_cachep is SLAB_TYPESAFE_BY_RCU, we can safely return. -- */ -- if (likely(!waitqueue_active(wqh))) -- return; -- -- /* wait_queue_entry_t->func(POLLFREE) should do remove_wait_queue() */ -- wake_up_poll(wqh, EPOLLHUP | POLLFREE); -+ wake_up_pollfree(&sighand->signalfd_wqh); - } - - struct signalfd_ctx { -diff --git a/fs/smbfs_common/cifs_arc4.c b/fs/smbfs_common/cifs_arc4.c -index 85ba15a60b13b..043e4cb839fa2 100644 ---- a/fs/smbfs_common/cifs_arc4.c -+++ b/fs/smbfs_common/cifs_arc4.c -@@ -72,16 +72,3 @@ void cifs_arc4_crypt(struct arc4_ctx *ctx, u8 *out, const u8 *in, unsigned int l - ctx->y = y; - } - EXPORT_SYMBOL_GPL(cifs_arc4_crypt); -- --static int __init --init_smbfs_common(void) --{ -- return 0; --} --static void __init --exit_smbfs_common(void) --{ --} -- --module_init(init_smbfs_common) --module_exit(exit_smbfs_common) -diff --git a/fs/stat.c b/fs/stat.c -index 28d2020ba1f42..246d138ec0669 100644 ---- a/fs/stat.c -+++ b/fs/stat.c -@@ -334,9 +334,6 @@ SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, stat - # define choose_32_64(a,b) b - #endif - --#define valid_dev(x) choose_32_64(old_valid_dev(x),true) --#define encode_dev(x) choose_32_64(old_encode_dev,new_encode_dev)(x) -- - #ifndef INIT_STRUCT_STAT_PADDING - # define INIT_STRUCT_STAT_PADDING(st) memset(&st, 0, sizeof(st)) - #endif -@@ -345,7 +342,9 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf) - { - struct stat tmp; - -- if (!valid_dev(stat->dev) || !valid_dev(stat->rdev)) -+ if (sizeof(tmp.st_dev) < 4 && !old_valid_dev(stat->dev)) ++static int io_provide_buffers_prep(struct io_kiocb *req, ++ const struct io_uring_sqe *sqe) ++{ ++ unsigned long size, tmp_check; ++ struct io_provide_buf *p = &req->pbuf; ++ u64 tmp; ++ ++ if (sqe->ioprio || sqe->rw_flags || sqe->splice_fd_in) ++ return -EINVAL; ++ ++ tmp = READ_ONCE(sqe->fd); ++ if (!tmp || tmp > USHRT_MAX) ++ return -E2BIG; ++ p->nbufs = tmp; ++ p->addr = READ_ONCE(sqe->addr); ++ p->len = READ_ONCE(sqe->len); ++ ++ if (check_mul_overflow((unsigned long)p->len, (unsigned long)p->nbufs, ++ &size)) + return -EOVERFLOW; -+ if (sizeof(tmp.st_rdev) < 4 && !old_valid_dev(stat->rdev)) - return -EOVERFLOW; - #if BITS_PER_LONG == 32 - if (stat->size > MAX_NON_LFS) -@@ -353,7 +352,7 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf) - #endif - - INIT_STRUCT_STAT_PADDING(tmp); -- tmp.st_dev = encode_dev(stat->dev); -+ tmp.st_dev = new_encode_dev(stat->dev); - tmp.st_ino = stat->ino; - if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) - return -EOVERFLOW; -@@ -363,7 +362,7 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf) - return -EOVERFLOW; - SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid)); - SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid)); -- tmp.st_rdev = encode_dev(stat->rdev); -+ tmp.st_rdev = new_encode_dev(stat->rdev); - tmp.st_size = stat->size; - tmp.st_atime = stat->atime.tv_sec; - tmp.st_mtime = stat->mtime.tv_sec; -@@ -644,11 +643,13 @@ static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) - { - struct compat_stat tmp; - -- if (!old_valid_dev(stat->dev) || !old_valid_dev(stat->rdev)) -+ if (sizeof(tmp.st_dev) < 4 && !old_valid_dev(stat->dev)) ++ if (check_add_overflow((unsigned long)p->addr, size, &tmp_check)) + return -EOVERFLOW; -+ if (sizeof(tmp.st_rdev) < 4 && !old_valid_dev(stat->rdev)) - return -EOVERFLOW; - - memset(&tmp, 0, sizeof(tmp)); -- tmp.st_dev = old_encode_dev(stat->dev); -+ tmp.st_dev = new_encode_dev(stat->dev); - tmp.st_ino = stat->ino; - if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) - return -EOVERFLOW; -@@ -658,7 +659,7 @@ static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) - return -EOVERFLOW; - SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid)); - SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid)); -- tmp.st_rdev = old_encode_dev(stat->rdev); -+ tmp.st_rdev = new_encode_dev(stat->rdev); - if ((u64) stat->size > MAX_NON_LFS) - return -EOVERFLOW; - tmp.st_size = stat->size; -diff --git a/fs/super.c b/fs/super.c -index bcef3a6f4c4b5..87379bb1f7a30 100644 ---- a/fs/super.c -+++ b/fs/super.c -@@ -1421,8 +1421,8 @@ struct dentry *mount_nodev(struct file_system_type *fs_type, - } - EXPORT_SYMBOL(mount_nodev); - --static int reconfigure_single(struct super_block *s, -- int flags, void *data) -+int reconfigure_single(struct super_block *s, -+ int flags, void *data) - { - struct fs_context *fc; - int ret; -@@ -1616,11 +1616,9 @@ static void lockdep_sb_freeze_acquire(struct super_block *sb) - percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_); - } - --static void sb_freeze_unlock(struct super_block *sb) -+static void sb_freeze_unlock(struct super_block *sb, int level) - { -- int level; -- -- for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--) -+ for (level--; level >= 0; level--) - percpu_up_write(sb->s_writers.rw_sem + level); - } - -@@ -1691,7 +1689,14 @@ int freeze_super(struct super_block *sb) - sb_wait_write(sb, SB_FREEZE_PAGEFAULT); - - /* All writers are done so after syncing there won't be dirty data */ -- sync_filesystem(sb); -+ ret = sync_filesystem(sb); -+ if (ret) { -+ sb->s_writers.frozen = SB_UNFROZEN; -+ sb_freeze_unlock(sb, SB_FREEZE_PAGEFAULT); -+ wake_up(&sb->s_writers.wait_unfrozen); -+ deactivate_locked_super(sb); -+ return ret; ++ ++ size = (unsigned long)p->len * p->nbufs; ++ if (!access_ok(u64_to_user_ptr(p->addr), size)) ++ return -EFAULT; ++ ++ p->bgid = READ_ONCE(sqe->buf_group); ++ tmp = READ_ONCE(sqe->off); ++ if (tmp > USHRT_MAX) ++ return -E2BIG; ++ p->bid = tmp; ++ return 0; ++} ++ ++static int io_add_buffers(struct io_provide_buf *pbuf, struct io_buffer **head) ++{ ++ struct io_buffer *buf; ++ u64 addr = pbuf->addr; ++ int i, bid = pbuf->bid; ++ ++ for (i = 0; i < pbuf->nbufs; i++) { ++ buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT); ++ if (!buf) ++ break; ++ ++ buf->addr = addr; ++ buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT); ++ buf->bid = bid; ++ addr += pbuf->len; ++ bid++; ++ if (!*head) { ++ INIT_LIST_HEAD(&buf->list); ++ *head = buf; ++ } else { ++ list_add_tail(&buf->list, &(*head)->list); ++ } ++ cond_resched(); + } - - /* Now wait for internal filesystem counter */ - sb->s_writers.frozen = SB_FREEZE_FS; -@@ -1703,7 +1708,7 @@ int freeze_super(struct super_block *sb) - printk(KERN_ERR - "VFS:Filesystem freeze failed\n"); - sb->s_writers.frozen = SB_UNFROZEN; -- sb_freeze_unlock(sb); -+ sb_freeze_unlock(sb, SB_FREEZE_FS); - wake_up(&sb->s_writers.wait_unfrozen); - deactivate_locked_super(sb); - return ret; -@@ -1748,7 +1753,7 @@ static int thaw_super_locked(struct super_block *sb) - } - - sb->s_writers.frozen = SB_UNFROZEN; -- sb_freeze_unlock(sb); -+ sb_freeze_unlock(sb, SB_FREEZE_FS); - out: - wake_up(&sb->s_writers.wait_unfrozen); - deactivate_locked_super(sb); -diff --git a/fs/sync.c b/fs/sync.c -index 1373a610dc784..c7690016453e4 100644 ---- a/fs/sync.c -+++ b/fs/sync.c -@@ -3,6 +3,7 @@ - * High-level sync()-related operations - */ - -+#include <linux/blkdev.h> - #include <linux/kernel.h> - #include <linux/file.h> - #include <linux/fs.h> -@@ -21,25 +22,6 @@ - #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \ - SYNC_FILE_RANGE_WAIT_AFTER) - --/* -- * Do the filesystem syncing work. For simple filesystems -- * writeback_inodes_sb(sb) just dirties buffers with inodes so we have to -- * submit IO for these buffers via __sync_blockdev(). This also speeds up the -- * wait == 1 case since in that case write_inode() functions do -- * sync_dirty_buffer() and thus effectively write one block at a time. -- */ --static int __sync_filesystem(struct super_block *sb, int wait) --{ -- if (wait) -- sync_inodes_sb(sb); -- else -- writeback_inodes_sb(sb, WB_REASON_SYNC); -- -- if (sb->s_op->sync_fs) -- sb->s_op->sync_fs(sb, wait); -- return __sync_blockdev(sb->s_bdev, wait); --} -- - /* - * Write out and wait upon all dirty data associated with this - * superblock. Filesystem data as well as the underlying block -@@ -47,7 +29,7 @@ static int __sync_filesystem(struct super_block *sb, int wait) - */ - int sync_filesystem(struct super_block *sb) - { -- int ret; ++ ++ return i ? i : -ENOMEM; ++} ++ ++static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags) ++{ ++ struct io_provide_buf *p = &req->pbuf; ++ struct io_ring_ctx *ctx = req->ctx; ++ struct io_buffer *head, *list; + int ret = 0; - - /* - * We need to be protected against the filesystem going from -@@ -61,10 +43,31 @@ int sync_filesystem(struct super_block *sb) - if (sb_rdonly(sb)) - return 0; - -- ret = __sync_filesystem(sb, 0); -- if (ret < 0) -+ /* -+ * Do the filesystem syncing work. For simple filesystems -+ * writeback_inodes_sb(sb) just dirties buffers with inodes so we have -+ * to submit I/O for these buffers via sync_blockdev(). This also -+ * speeds up the wait == 1 case since in that case write_inode() -+ * methods call sync_dirty_buffer() and thus effectively write one block -+ * at a time. -+ */ -+ writeback_inodes_sb(sb, WB_REASON_SYNC); -+ if (sb->s_op->sync_fs) { -+ ret = sb->s_op->sync_fs(sb, 0); -+ if (ret) -+ return ret; ++ bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; ++ ++ io_ring_submit_lock(ctx, !force_nonblock); ++ ++ lockdep_assert_held(&ctx->uring_lock); ++ ++ list = head = xa_load(&ctx->io_buffers, p->bgid); ++ ++ ret = io_add_buffers(p, &head); ++ if (ret >= 0 && !list) { ++ ret = xa_insert(&ctx->io_buffers, p->bgid, head, ++ GFP_KERNEL_ACCOUNT); ++ if (ret < 0) ++ __io_remove_buffers(ctx, head, p->bgid, -1U); + } -+ ret = sync_blockdev_nowait(sb->s_bdev); -+ if (ret) - return ret; -- return __sync_filesystem(sb, 1); ++ if (ret < 0) ++ req_set_fail(req); ++ /* complete before unlock, IOPOLL may need the lock */ ++ __io_req_complete(req, issue_flags, ret, 0); ++ io_ring_submit_unlock(ctx, !force_nonblock); ++ return 0; ++} + -+ sync_inodes_sb(sb); -+ if (sb->s_op->sync_fs) { -+ ret = sb->s_op->sync_fs(sb, 1); -+ if (ret) -+ return ret; ++static int io_epoll_ctl_prep(struct io_kiocb *req, ++ const struct io_uring_sqe *sqe) ++{ ++#if defined(CONFIG_EPOLL) ++ if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in) ++ return -EINVAL; ++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) ++ return -EINVAL; ++ ++ req->epoll.epfd = READ_ONCE(sqe->fd); ++ req->epoll.op = READ_ONCE(sqe->len); ++ req->epoll.fd = READ_ONCE(sqe->off); ++ ++ if (ep_op_has_event(req->epoll.op)) { ++ struct epoll_event __user *ev; ++ ++ ev = u64_to_user_ptr(READ_ONCE(sqe->addr)); ++ if (copy_from_user(&req->epoll.event, ev, sizeof(*ev))) ++ return -EFAULT; + } -+ return sync_blockdev(sb->s_bdev); - } - EXPORT_SYMBOL(sync_filesystem); - -@@ -81,21 +84,6 @@ static void sync_fs_one_sb(struct super_block *sb, void *arg) - sb->s_op->sync_fs(sb, *(int *)arg); - } - --static void fdatawrite_one_bdev(struct block_device *bdev, void *arg) --{ -- filemap_fdatawrite(bdev->bd_inode->i_mapping); --} -- --static void fdatawait_one_bdev(struct block_device *bdev, void *arg) --{ -- /* -- * We keep the error status of individual mapping so that -- * applications can catch the writeback error using fsync(2). -- * See filemap_fdatawait_keep_errors() for details. -- */ -- filemap_fdatawait_keep_errors(bdev->bd_inode->i_mapping); --} -- - /* - * Sync everything. We start by waking flusher threads so that most of - * writeback runs on all devices in parallel. Then we sync all inodes reliably -@@ -114,8 +102,8 @@ void ksys_sync(void) - iterate_supers(sync_inodes_one_sb, NULL); - iterate_supers(sync_fs_one_sb, &nowait); - iterate_supers(sync_fs_one_sb, &wait); -- iterate_bdevs(fdatawrite_one_bdev, NULL); -- iterate_bdevs(fdatawait_one_bdev, NULL); -+ sync_bdevs(false); -+ sync_bdevs(true); - if (unlikely(laptop_mode)) - laptop_sync_completion(); - } -@@ -136,10 +124,10 @@ static void do_sync_work(struct work_struct *work) - */ - iterate_supers(sync_inodes_one_sb, &nowait); - iterate_supers(sync_fs_one_sb, &nowait); -- iterate_bdevs(fdatawrite_one_bdev, NULL); -+ sync_bdevs(false); - iterate_supers(sync_inodes_one_sb, &nowait); - iterate_supers(sync_fs_one_sb, &nowait); -- iterate_bdevs(fdatawrite_one_bdev, NULL); -+ sync_bdevs(false); - printk("Emergency Sync complete\n"); - kfree(work); - } -diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c -index 1261e8b41edb4..066e8344934de 100644 ---- a/fs/tracefs/inode.c -+++ b/fs/tracefs/inode.c -@@ -141,6 +141,8 @@ struct tracefs_mount_opts { - kuid_t uid; - kgid_t gid; - umode_t mode; -+ /* Opt_* bitfield. */ -+ unsigned int opts; - }; - - enum { -@@ -161,6 +163,77 @@ struct tracefs_fs_info { - struct tracefs_mount_opts mount_opts; - }; - -+static void change_gid(struct dentry *dentry, kgid_t gid) ++ ++ return 0; ++#else ++ return -EOPNOTSUPP; ++#endif ++} ++ ++static int io_epoll_ctl(struct io_kiocb *req, unsigned int issue_flags) +{ -+ if (!dentry->d_inode) -+ return; -+ dentry->d_inode->i_gid = gid; ++#if defined(CONFIG_EPOLL) ++ struct io_epoll *ie = &req->epoll; ++ int ret; ++ bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; ++ ++ ret = do_epoll_ctl(ie->epfd, ie->op, ie->fd, &ie->event, force_nonblock); ++ if (force_nonblock && ret == -EAGAIN) ++ return -EAGAIN; ++ ++ if (ret < 0) ++ req_set_fail(req); ++ __io_req_complete(req, issue_flags, ret, 0); ++ return 0; ++#else ++ return -EOPNOTSUPP; ++#endif +} + -+/* -+ * Taken from d_walk, but without he need for handling renames. -+ * Nothing can be renamed while walking the list, as tracefs -+ * does not support renames. This is only called when mounting -+ * or remounting the file system, to set all the files to -+ * the given gid. -+ */ -+static void set_gid(struct dentry *parent, kgid_t gid) ++static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ -+ struct dentry *this_parent; -+ struct list_head *next; ++#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU) ++ if (sqe->ioprio || sqe->buf_index || sqe->off || sqe->splice_fd_in) ++ return -EINVAL; ++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) ++ return -EINVAL; + -+ this_parent = parent; -+ spin_lock(&this_parent->d_lock); ++ req->madvise.addr = READ_ONCE(sqe->addr); ++ req->madvise.len = READ_ONCE(sqe->len); ++ req->madvise.advice = READ_ONCE(sqe->fadvise_advice); ++ return 0; ++#else ++ return -EOPNOTSUPP; ++#endif ++} + -+ change_gid(this_parent, gid); -+repeat: -+ next = this_parent->d_subdirs.next; -+resume: -+ while (next != &this_parent->d_subdirs) { -+ struct list_head *tmp = next; -+ struct dentry *dentry = list_entry(tmp, struct dentry, d_child); -+ next = tmp->next; ++static int io_madvise(struct io_kiocb *req, unsigned int issue_flags) ++{ ++#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU) ++ struct io_madvise *ma = &req->madvise; ++ int ret; + -+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); ++ if (issue_flags & IO_URING_F_NONBLOCK) ++ return -EAGAIN; + -+ change_gid(dentry, gid); ++ ret = do_madvise(current->mm, ma->addr, ma->len, ma->advice); ++ if (ret < 0) ++ req_set_fail(req); ++ io_req_complete(req, ret); ++ return 0; ++#else ++ return -EOPNOTSUPP; ++#endif ++} + -+ if (!list_empty(&dentry->d_subdirs)) { -+ spin_unlock(&this_parent->d_lock); -+ spin_release(&dentry->d_lock.dep_map, _RET_IP_); -+ this_parent = dentry; -+ spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); -+ goto repeat; ++static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) ++{ ++ if (sqe->ioprio || sqe->buf_index || sqe->addr || sqe->splice_fd_in) ++ return -EINVAL; ++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) ++ return -EINVAL; ++ ++ req->fadvise.offset = READ_ONCE(sqe->off); ++ req->fadvise.len = READ_ONCE(sqe->len); ++ req->fadvise.advice = READ_ONCE(sqe->fadvise_advice); ++ return 0; ++} ++ ++static int io_fadvise(struct io_kiocb *req, unsigned int issue_flags) ++{ ++ struct io_fadvise *fa = &req->fadvise; ++ int ret; ++ ++ if (issue_flags & IO_URING_F_NONBLOCK) { ++ switch (fa->advice) { ++ case POSIX_FADV_NORMAL: ++ case POSIX_FADV_RANDOM: ++ case POSIX_FADV_SEQUENTIAL: ++ break; ++ default: ++ return -EAGAIN; + } -+ spin_unlock(&dentry->d_lock); + } -+ /* -+ * All done at this level ... ascend and resume the search. -+ */ -+ rcu_read_lock(); -+ascend: -+ if (this_parent != parent) { -+ struct dentry *child = this_parent; -+ this_parent = child->d_parent; + -+ spin_unlock(&child->d_lock); -+ spin_lock(&this_parent->d_lock); ++ ret = vfs_fadvise(req->file, fa->offset, fa->len, fa->advice); ++ if (ret < 0) ++ req_set_fail(req); ++ __io_req_complete(req, issue_flags, ret, 0); ++ return 0; ++} + -+ /* go into the first sibling still alive */ -+ do { -+ next = child->d_child.next; -+ if (next == &this_parent->d_subdirs) -+ goto ascend; -+ child = list_entry(next, struct dentry, d_child); -+ } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)); -+ rcu_read_unlock(); -+ goto resume; ++static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) ++{ ++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) ++ return -EINVAL; ++ if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in) ++ return -EINVAL; ++ if (req->flags & REQ_F_FIXED_FILE) ++ return -EBADF; ++ ++ req->statx.dfd = READ_ONCE(sqe->fd); ++ req->statx.mask = READ_ONCE(sqe->len); ++ req->statx.filename = u64_to_user_ptr(READ_ONCE(sqe->addr)); ++ req->statx.buffer = u64_to_user_ptr(READ_ONCE(sqe->addr2)); ++ req->statx.flags = READ_ONCE(sqe->statx_flags); ++ ++ return 0; ++} ++ ++static int io_statx(struct io_kiocb *req, unsigned int issue_flags) ++{ ++ struct io_statx *ctx = &req->statx; ++ int ret; ++ ++ if (issue_flags & IO_URING_F_NONBLOCK) ++ return -EAGAIN; ++ ++ ret = do_statx(ctx->dfd, ctx->filename, ctx->flags, ctx->mask, ++ ctx->buffer); ++ ++ if (ret < 0) ++ req_set_fail(req); ++ io_req_complete(req, ret); ++ return 0; ++} ++ ++static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) ++{ ++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) ++ return -EINVAL; ++ if (sqe->ioprio || sqe->off || sqe->addr || sqe->len || ++ sqe->rw_flags || sqe->buf_index) ++ return -EINVAL; ++ if (req->flags & REQ_F_FIXED_FILE) ++ return -EBADF; ++ ++ req->close.fd = READ_ONCE(sqe->fd); ++ req->close.file_slot = READ_ONCE(sqe->file_index); ++ if (req->close.file_slot && req->close.fd) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++static int io_close(struct io_kiocb *req, unsigned int issue_flags) ++{ ++ struct files_struct *files = current->files; ++ struct io_close *close = &req->close; ++ struct fdtable *fdt; ++ struct file *file = NULL; ++ int ret = -EBADF; ++ ++ if (req->close.file_slot) { ++ ret = io_close_fixed(req, issue_flags); ++ goto err; + } -+ rcu_read_unlock(); -+ spin_unlock(&this_parent->d_lock); -+ return; ++ ++ spin_lock(&files->file_lock); ++ fdt = files_fdtable(files); ++ if (close->fd >= fdt->max_fds) { ++ spin_unlock(&files->file_lock); ++ goto err; ++ } ++ file = fdt->fd[close->fd]; ++ if (!file || file->f_op == &io_uring_fops) { ++ spin_unlock(&files->file_lock); ++ file = NULL; ++ goto err; ++ } ++ ++ /* if the file has a flush method, be safe and punt to async */ ++ if (file->f_op->flush && (issue_flags & IO_URING_F_NONBLOCK)) { ++ spin_unlock(&files->file_lock); ++ return -EAGAIN; ++ } ++ ++ ret = __close_fd_get_file(close->fd, &file); ++ spin_unlock(&files->file_lock); ++ if (ret < 0) { ++ if (ret == -ENOENT) ++ ret = -EBADF; ++ goto err; ++ } ++ ++ /* No ->flush() or already async, safely close from here */ ++ ret = filp_close(file, current->files); ++err: ++ if (ret < 0) ++ req_set_fail(req); ++ if (file) ++ fput(file); ++ __io_req_complete(req, issue_flags, ret, 0); ++ return 0; +} + - static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts) - { - substring_t args[MAX_OPT_ARGS]; -@@ -170,6 +243,7 @@ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts) - kgid_t gid; - char *p; - -+ opts->opts = 0; - opts->mode = TRACEFS_DEFAULT_MODE; - - while ((p = strsep(&data, ",")) != NULL) { -@@ -204,22 +278,36 @@ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts) - * but traditionally tracefs has ignored all mount options - */ - } ++static int io_sfr_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) ++{ ++ struct io_ring_ctx *ctx = req->ctx; + -+ opts->opts |= BIT(token); - } - - return 0; - } - --static int tracefs_apply_options(struct super_block *sb) -+static int tracefs_apply_options(struct super_block *sb, bool remount) - { - struct tracefs_fs_info *fsi = sb->s_fs_info; - struct inode *inode = sb->s_root->d_inode; - struct tracefs_mount_opts *opts = &fsi->mount_opts; - -- inode->i_mode &= ~S_IALLUGO; -- inode->i_mode |= opts->mode; -+ /* -+ * On remount, only reset mode/uid/gid if they were provided as mount -+ * options. -+ */ ++ if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) ++ return -EINVAL; ++ if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index || ++ sqe->splice_fd_in)) ++ return -EINVAL; ++ ++ req->sync.off = READ_ONCE(sqe->off); ++ req->sync.len = READ_ONCE(sqe->len); ++ req->sync.flags = READ_ONCE(sqe->sync_range_flags); ++ return 0; ++} ++ ++static int io_sync_file_range(struct io_kiocb *req, unsigned int issue_flags) ++{ ++ int ret; ++ ++ /* sync_file_range always requires a blocking context */ ++ if (issue_flags & IO_URING_F_NONBLOCK) ++ return -EAGAIN; ++ ++ ret = sync_file_range(req->file, req->sync.off, req->sync.len, ++ req->sync.flags); ++ if (ret < 0) ++ req_set_fail(req); ++ io_req_complete(req, ret); ++ return 0; ++} ++ ++#if defined(CONFIG_NET) ++static bool io_net_retry(struct socket *sock, int flags) ++{ ++ if (!(flags & MSG_WAITALL)) ++ return false; ++ return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; ++} ++ ++static int io_setup_async_msg(struct io_kiocb *req, ++ struct io_async_msghdr *kmsg) ++{ ++ struct io_async_msghdr *async_msg = req->async_data; ++ ++ if (async_msg) ++ return -EAGAIN; ++ if (io_alloc_async_data(req)) { ++ kfree(kmsg->free_iov); ++ return -ENOMEM; ++ } ++ async_msg = req->async_data; ++ req->flags |= REQ_F_NEED_CLEANUP; ++ memcpy(async_msg, kmsg, sizeof(*kmsg)); ++ if (async_msg->msg.msg_name) ++ async_msg->msg.msg_name = &async_msg->addr; ++ /* if were using fast_iov, set it to the new one */ ++ if (!kmsg->free_iov) { ++ size_t fast_idx = kmsg->msg.msg_iter.iov - kmsg->fast_iov; ++ async_msg->msg.msg_iter.iov = &async_msg->fast_iov[fast_idx]; ++ } ++ ++ return -EAGAIN; ++} ++ ++static int io_sendmsg_copy_hdr(struct io_kiocb *req, ++ struct io_async_msghdr *iomsg) ++{ ++ iomsg->msg.msg_name = &iomsg->addr; ++ iomsg->free_iov = iomsg->fast_iov; ++ return sendmsg_copy_msghdr(&iomsg->msg, req->sr_msg.umsg, ++ req->sr_msg.msg_flags, &iomsg->free_iov); ++} ++ ++static int io_sendmsg_prep_async(struct io_kiocb *req) ++{ ++ int ret; ++ ++ ret = io_sendmsg_copy_hdr(req, req->async_data); ++ if (!ret) ++ req->flags |= REQ_F_NEED_CLEANUP; ++ return ret; ++} ++ ++static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) ++{ ++ struct io_sr_msg *sr = &req->sr_msg; ++ ++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) ++ return -EINVAL; ++ if (unlikely(sqe->addr2 || sqe->file_index)) ++ return -EINVAL; ++ if (unlikely(sqe->addr2 || sqe->file_index || sqe->ioprio)) ++ return -EINVAL; ++ ++ sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); ++ sr->len = READ_ONCE(sqe->len); ++ sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; ++ if (sr->msg_flags & MSG_DONTWAIT) ++ req->flags |= REQ_F_NOWAIT; ++ ++#ifdef CONFIG_COMPAT ++ if (req->ctx->compat) ++ sr->msg_flags |= MSG_CMSG_COMPAT; ++#endif ++ sr->done_io = 0; ++ return 0; ++} + -+ if (!remount || opts->opts & BIT(Opt_mode)) { -+ inode->i_mode &= ~S_IALLUGO; -+ inode->i_mode |= opts->mode; ++static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) ++{ ++ struct io_async_msghdr iomsg, *kmsg; ++ struct io_sr_msg *sr = &req->sr_msg; ++ struct socket *sock; ++ unsigned flags; ++ int min_ret = 0; ++ int ret; ++ ++ sock = sock_from_file(req->file); ++ if (unlikely(!sock)) ++ return -ENOTSOCK; ++ ++ kmsg = req->async_data; ++ if (!kmsg) { ++ ret = io_sendmsg_copy_hdr(req, &iomsg); ++ if (ret) ++ return ret; ++ kmsg = &iomsg; ++ } ++ ++ flags = req->sr_msg.msg_flags; ++ if (issue_flags & IO_URING_F_NONBLOCK) ++ flags |= MSG_DONTWAIT; ++ if (flags & MSG_WAITALL) ++ min_ret = iov_iter_count(&kmsg->msg.msg_iter); ++ ++ ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); ++ ++ if (ret < min_ret) { ++ if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) ++ return io_setup_async_msg(req, kmsg); ++ if (ret == -ERESTARTSYS) ++ ret = -EINTR; ++ if (ret > 0 && io_net_retry(sock, flags)) { ++ sr->done_io += ret; ++ req->flags |= REQ_F_PARTIAL_IO; ++ return io_setup_async_msg(req, kmsg); ++ } ++ req_set_fail(req); + } - -- inode->i_uid = opts->uid; -- inode->i_gid = opts->gid; -+ if (!remount || opts->opts & BIT(Opt_uid)) -+ inode->i_uid = opts->uid; ++ /* fast path, check for non-NULL to avoid function call */ ++ if (kmsg->free_iov) ++ kfree(kmsg->free_iov); ++ req->flags &= ~REQ_F_NEED_CLEANUP; ++ if (ret >= 0) ++ ret += sr->done_io; ++ else if (sr->done_io) ++ ret = sr->done_io; ++ __io_req_complete(req, issue_flags, ret, 0); ++ return 0; ++} + -+ if (!remount || opts->opts & BIT(Opt_gid)) { -+ /* Set all the group ids to the mount option */ -+ set_gid(sb->s_root, opts->gid); ++static int io_send(struct io_kiocb *req, unsigned int issue_flags) ++{ ++ struct io_sr_msg *sr = &req->sr_msg; ++ struct msghdr msg; ++ struct iovec iov; ++ struct socket *sock; ++ unsigned flags; ++ int min_ret = 0; ++ int ret; ++ ++ sock = sock_from_file(req->file); ++ if (unlikely(!sock)) ++ return -ENOTSOCK; ++ ++ ret = import_single_range(WRITE, sr->buf, sr->len, &iov, &msg.msg_iter); ++ if (unlikely(ret)) ++ return ret; ++ ++ msg.msg_name = NULL; ++ msg.msg_control = NULL; ++ msg.msg_controllen = 0; ++ msg.msg_namelen = 0; ++ ++ flags = req->sr_msg.msg_flags; ++ if (issue_flags & IO_URING_F_NONBLOCK) ++ flags |= MSG_DONTWAIT; ++ if (flags & MSG_WAITALL) ++ min_ret = iov_iter_count(&msg.msg_iter); ++ ++ msg.msg_flags = flags; ++ ret = sock_sendmsg(sock, &msg); ++ if (ret < min_ret) { ++ if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) ++ return -EAGAIN; ++ if (ret == -ERESTARTSYS) ++ ret = -EINTR; ++ if (ret > 0 && io_net_retry(sock, flags)) { ++ sr->len -= ret; ++ sr->buf += ret; ++ sr->done_io += ret; ++ req->flags |= REQ_F_PARTIAL_IO; ++ return -EAGAIN; ++ } ++ req_set_fail(req); + } - - return 0; - } -@@ -234,7 +322,7 @@ static int tracefs_remount(struct super_block *sb, int *flags, char *data) - if (err) - goto fail; - -- tracefs_apply_options(sb); -+ tracefs_apply_options(sb, true); - - fail: - return err; -@@ -286,7 +374,7 @@ static int trace_fill_super(struct super_block *sb, void *data, int silent) - - sb->s_op = &tracefs_super_operations; - -- tracefs_apply_options(sb); -+ tracefs_apply_options(sb, false); - - return 0; - -@@ -414,6 +502,8 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode, - inode->i_mode = mode; - inode->i_fop = fops ? fops : &tracefs_file_operations; - inode->i_private = data; -+ inode->i_uid = d_inode(dentry->d_parent)->i_uid; -+ inode->i_gid = d_inode(dentry->d_parent)->i_gid; - d_instantiate(dentry, inode); - fsnotify_create(dentry->d_parent->d_inode, dentry); - return end_creating(dentry); -@@ -432,9 +522,12 @@ static struct dentry *__create_dir(const char *name, struct dentry *parent, - if (unlikely(!inode)) - return failed_creating(dentry); - -- inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; -+ /* Do not set bits for OTH */ -+ inode->i_mode = S_IFDIR | S_IRWXU | S_IRUSR| S_IRGRP | S_IXUSR | S_IXGRP; - inode->i_op = ops; - inode->i_fop = &simple_dir_operations; -+ inode->i_uid = d_inode(dentry->d_parent)->i_uid; -+ inode->i_gid = d_inode(dentry->d_parent)->i_gid; - - /* directory inodes start off with i_nlink == 2 (for "." entry) */ - inc_nlink(inode); -diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c -index 7c61d0ec0159e..79e371bc15e1e 100644 ---- a/fs/ubifs/dir.c -+++ b/fs/ubifs/dir.c -@@ -349,20 +349,97 @@ out_budg: - return err; - } - --static int do_tmpfile(struct inode *dir, struct dentry *dentry, -- umode_t mode, struct inode **whiteout) -+static struct inode *create_whiteout(struct inode *dir, struct dentry *dentry) ++ if (ret >= 0) ++ ret += sr->done_io; ++ else if (sr->done_io) ++ ret = sr->done_io; ++ __io_req_complete(req, issue_flags, ret, 0); ++ return 0; ++} ++ ++static int __io_recvmsg_copy_hdr(struct io_kiocb *req, ++ struct io_async_msghdr *iomsg) +{ -+ int err; -+ umode_t mode = S_IFCHR | WHITEOUT_MODE; -+ struct inode *inode; -+ struct ubifs_info *c = dir->i_sb->s_fs_info; -+ struct fscrypt_name nm; ++ struct io_sr_msg *sr = &req->sr_msg; ++ struct iovec __user *uiov; ++ size_t iov_len; ++ int ret; + -+ /* -+ * Create an inode('nlink = 1') for whiteout without updating journal, -+ * let ubifs_jnl_rename() store it on flash to complete rename whiteout -+ * atomically. -+ */ ++ ret = __copy_msghdr_from_user(&iomsg->msg, sr->umsg, ++ &iomsg->uaddr, &uiov, &iov_len); ++ if (ret) ++ return ret; + -+ dbg_gen("dent '%pd', mode %#hx in dir ino %lu", -+ dentry, mode, dir->i_ino); ++ if (req->flags & REQ_F_BUFFER_SELECT) { ++ if (iov_len > 1) ++ return -EINVAL; ++ if (copy_from_user(iomsg->fast_iov, uiov, sizeof(*uiov))) ++ return -EFAULT; ++ sr->len = iomsg->fast_iov[0].iov_len; ++ iomsg->free_iov = NULL; ++ } else { ++ iomsg->free_iov = iomsg->fast_iov; ++ ret = __import_iovec(READ, uiov, iov_len, UIO_FASTIOV, ++ &iomsg->free_iov, &iomsg->msg.msg_iter, ++ false); ++ if (ret > 0) ++ ret = 0; ++ } + -+ err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &nm); -+ if (err) -+ return ERR_PTR(err); ++ return ret; ++} + -+ inode = ubifs_new_inode(c, dir, mode); -+ if (IS_ERR(inode)) { -+ err = PTR_ERR(inode); -+ goto out_free; ++#ifdef CONFIG_COMPAT ++static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req, ++ struct io_async_msghdr *iomsg) ++{ ++ struct io_sr_msg *sr = &req->sr_msg; ++ struct compat_iovec __user *uiov; ++ compat_uptr_t ptr; ++ compat_size_t len; ++ int ret; ++ ++ ret = __get_compat_msghdr(&iomsg->msg, sr->umsg_compat, &iomsg->uaddr, ++ &ptr, &len); ++ if (ret) ++ return ret; ++ ++ uiov = compat_ptr(ptr); ++ if (req->flags & REQ_F_BUFFER_SELECT) { ++ compat_ssize_t clen; ++ ++ if (len > 1) ++ return -EINVAL; ++ if (!access_ok(uiov, sizeof(*uiov))) ++ return -EFAULT; ++ if (__get_user(clen, &uiov->iov_len)) ++ return -EFAULT; ++ if (clen < 0) ++ return -EINVAL; ++ sr->len = clen; ++ iomsg->free_iov = NULL; ++ } else { ++ iomsg->free_iov = iomsg->fast_iov; ++ ret = __import_iovec(READ, (struct iovec __user *)uiov, len, ++ UIO_FASTIOV, &iomsg->free_iov, ++ &iomsg->msg.msg_iter, true); ++ if (ret < 0) ++ return ret; + } + -+ init_special_inode(inode, inode->i_mode, WHITEOUT_DEV); -+ ubifs_assert(c, inode->i_op == &ubifs_file_inode_operations); ++ return 0; ++} ++#endif + -+ err = ubifs_init_security(dir, inode, &dentry->d_name); -+ if (err) -+ goto out_inode; ++static int io_recvmsg_copy_hdr(struct io_kiocb *req, ++ struct io_async_msghdr *iomsg) ++{ ++ iomsg->msg.msg_name = &iomsg->addr; + -+ /* The dir size is updated by do_rename. */ -+ insert_inode_hash(inode); ++#ifdef CONFIG_COMPAT ++ if (req->ctx->compat) ++ return __io_compat_recvmsg_copy_hdr(req, iomsg); ++#endif + -+ return inode; ++ return __io_recvmsg_copy_hdr(req, iomsg); ++} + -+out_inode: -+ make_bad_inode(inode); -+ iput(inode); -+out_free: -+ fscrypt_free_filename(&nm); -+ ubifs_err(c, "cannot create whiteout file, error %d", err); -+ return ERR_PTR(err); ++static struct io_buffer *io_recv_buffer_select(struct io_kiocb *req, ++ bool needs_lock) ++{ ++ struct io_sr_msg *sr = &req->sr_msg; ++ struct io_buffer *kbuf; ++ ++ kbuf = io_buffer_select(req, &sr->len, sr->bgid, sr->kbuf, needs_lock); ++ if (IS_ERR(kbuf)) ++ return kbuf; ++ ++ sr->kbuf = kbuf; ++ req->flags |= REQ_F_BUFFER_SELECTED; ++ return kbuf; +} + -+/** -+ * lock_2_inodes - a wrapper for locking two UBIFS inodes. -+ * @inode1: first inode -+ * @inode2: second inode -+ * -+ * We do not implement any tricks to guarantee strict lock ordering, because -+ * VFS has already done it for us on the @i_mutex. So this is just a simple -+ * wrapper function. -+ */ -+static void lock_2_inodes(struct inode *inode1, struct inode *inode2) ++static inline unsigned int io_put_recv_kbuf(struct io_kiocb *req) +{ -+ mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1); -+ mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2); ++ return io_put_kbuf(req, req->sr_msg.kbuf); +} + -+/** -+ * unlock_2_inodes - a wrapper for unlocking two UBIFS inodes. -+ * @inode1: first inode -+ * @inode2: second inode -+ */ -+static void unlock_2_inodes(struct inode *inode1, struct inode *inode2) ++static int io_recvmsg_prep_async(struct io_kiocb *req) +{ -+ mutex_unlock(&ubifs_inode(inode2)->ui_mutex); -+ mutex_unlock(&ubifs_inode(inode1)->ui_mutex); ++ int ret; ++ ++ ret = io_recvmsg_copy_hdr(req, req->async_data); ++ if (!ret) ++ req->flags |= REQ_F_NEED_CLEANUP; ++ return ret; +} + -+static int ubifs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, -+ struct dentry *dentry, umode_t mode) - { - struct inode *inode; - struct ubifs_info *c = dir->i_sb->s_fs_info; -- struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1}; -+ struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, -+ .dirtied_ino = 1}; - struct ubifs_budget_req ino_req = { .dirtied_ino = 1 }; -- struct ubifs_inode *ui, *dir_ui = ubifs_inode(dir); -+ struct ubifs_inode *ui; - int err, instantiated = 0; - struct fscrypt_name nm; - - /* -- * Budget request settings: new dirty inode, new direntry, -- * budget for dirtied inode will be released via writeback. -+ * Budget request settings: new inode, new direntry, changing the -+ * parent directory inode. -+ * Allocate budget separately for new dirtied inode, the budget will -+ * be released via writeback. - */ - - dbg_gen("dent '%pd', mode %#hx in dir ino %lu", -@@ -392,42 +469,30 @@ static int do_tmpfile(struct inode *dir, struct dentry *dentry, - } - ui = ubifs_inode(inode); - -- if (whiteout) { -- init_special_inode(inode, inode->i_mode, WHITEOUT_DEV); -- ubifs_assert(c, inode->i_op == &ubifs_file_inode_operations); -- } -- - err = ubifs_init_security(dir, inode, &dentry->d_name); - if (err) - goto out_inode; - - mutex_lock(&ui->ui_mutex); - insert_inode_hash(inode); -- -- if (whiteout) { -- mark_inode_dirty(inode); -- drop_nlink(inode); -- *whiteout = inode; -- } else { -- d_tmpfile(dentry, inode); -- } -+ d_tmpfile(dentry, inode); - ubifs_assert(c, ui->dirty); - - instantiated = 1; - mutex_unlock(&ui->ui_mutex); - -- mutex_lock(&dir_ui->ui_mutex); -+ lock_2_inodes(dir, inode); - err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0); - if (err) - goto out_cancel; -- mutex_unlock(&dir_ui->ui_mutex); -+ unlock_2_inodes(dir, inode); - - ubifs_release_budget(c, &req); - - return 0; - - out_cancel: -- mutex_unlock(&dir_ui->ui_mutex); -+ unlock_2_inodes(dir, inode); - out_inode: - make_bad_inode(inode); - if (!instantiated) -@@ -441,12 +506,6 @@ out_budg: - return err; - } - --static int ubifs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, -- struct dentry *dentry, umode_t mode) --{ -- return do_tmpfile(dir, dentry, mode, NULL); --} -- - /** - * vfs_dent_type - get VFS directory entry type. - * @type: UBIFS directory entry type -@@ -660,32 +719,6 @@ static int ubifs_dir_release(struct inode *dir, struct file *file) - return 0; - } - --/** -- * lock_2_inodes - a wrapper for locking two UBIFS inodes. -- * @inode1: first inode -- * @inode2: second inode -- * -- * We do not implement any tricks to guarantee strict lock ordering, because -- * VFS has already done it for us on the @i_mutex. So this is just a simple -- * wrapper function. -- */ --static void lock_2_inodes(struct inode *inode1, struct inode *inode2) --{ -- mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1); -- mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2); --} -- --/** -- * unlock_2_inodes - a wrapper for unlocking two UBIFS inodes. -- * @inode1: first inode -- * @inode2: second inode -- */ --static void unlock_2_inodes(struct inode *inode1, struct inode *inode2) --{ -- mutex_unlock(&ubifs_inode(inode2)->ui_mutex); -- mutex_unlock(&ubifs_inode(inode1)->ui_mutex); --} -- - static int ubifs_link(struct dentry *old_dentry, struct inode *dir, - struct dentry *dentry) - { -@@ -949,7 +982,8 @@ static int ubifs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, - struct ubifs_inode *dir_ui = ubifs_inode(dir); - struct ubifs_info *c = dir->i_sb->s_fs_info; - int err, sz_change; -- struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1 }; -+ struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, -+ .dirtied_ino = 1}; - struct fscrypt_name nm; - - /* -@@ -1264,17 +1298,19 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, - .dirtied_ino = 3 }; - struct ubifs_budget_req ino_req = { .dirtied_ino = 1, - .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) }; -+ struct ubifs_budget_req wht_req; - struct timespec64 time; - unsigned int saved_nlink; - struct fscrypt_name old_nm, new_nm; - - /* -- * Budget request settings: deletion direntry, new direntry, removing -- * the old inode, and changing old and new parent directory inodes. -+ * Budget request settings: -+ * req: deletion direntry, new direntry, removing the old inode, -+ * and changing old and new parent directory inodes. -+ * -+ * wht_req: new whiteout inode for RENAME_WHITEOUT. - * -- * However, this operation also marks the target inode as dirty and -- * does not write it, so we allocate budget for the target inode -- * separately. -+ * ino_req: marks the target inode as dirty and does not write it. - */ - - dbg_gen("dent '%pd' ino %lu in dir ino %lu to dent '%pd' in dir ino %lu flags 0x%x", -@@ -1331,20 +1367,44 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, - goto out_release; - } - -- err = do_tmpfile(old_dir, old_dentry, S_IFCHR | WHITEOUT_MODE, &whiteout); -- if (err) { -+ /* -+ * The whiteout inode without dentry is pinned in memory, -+ * umount won't happen during rename process because we -+ * got parent dentry. -+ */ -+ whiteout = create_whiteout(old_dir, old_dentry); -+ if (IS_ERR(whiteout)) { -+ err = PTR_ERR(whiteout); - kfree(dev); - goto out_release; - } - -- spin_lock(&whiteout->i_lock); -- whiteout->i_state |= I_LINKABLE; -- spin_unlock(&whiteout->i_lock); -- - whiteout_ui = ubifs_inode(whiteout); - whiteout_ui->data = dev; - whiteout_ui->data_len = ubifs_encode_dev(dev, MKDEV(0, 0)); - ubifs_assert(c, !whiteout_ui->dirty); ++static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) ++{ ++ struct io_sr_msg *sr = &req->sr_msg; + -+ memset(&wht_req, 0, sizeof(struct ubifs_budget_req)); -+ wht_req.new_ino = 1; -+ wht_req.new_ino_d = ALIGN(whiteout_ui->data_len, 8); -+ /* -+ * To avoid deadlock between space budget (holds ui_mutex and -+ * waits wb work) and writeback work(waits ui_mutex), do space -+ * budget before ubifs inodes locked. -+ */ -+ err = ubifs_budget_space(c, &wht_req); -+ if (err) { -+ /* -+ * Whiteout inode can not be written on flash by -+ * ubifs_jnl_write_inode(), because it's neither -+ * dirty nor zero-nlink. -+ */ -+ iput(whiteout); -+ goto out_release; -+ } ++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) ++ return -EINVAL; ++ if (unlikely(sqe->addr2 || sqe->file_index)) ++ return -EINVAL; ++ if (unlikely(sqe->addr2 || sqe->file_index || sqe->ioprio)) ++ return -EINVAL; + -+ /* Add the old_dentry size to the old_dir size. */ -+ old_sz -= CALC_DENT_SIZE(fname_len(&old_nm)); - } - - lock_4_inodes(old_dir, new_dir, new_inode, whiteout); -@@ -1416,29 +1476,11 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, - sync = IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir); - if (unlink && IS_SYNC(new_inode)) - sync = 1; -- } -- -- if (whiteout) { -- struct ubifs_budget_req wht_req = { .dirtied_ino = 1, -- .dirtied_ino_d = \ -- ALIGN(ubifs_inode(whiteout)->data_len, 8) }; -- -- err = ubifs_budget_space(c, &wht_req); -- if (err) { -- kfree(whiteout_ui->data); -- whiteout_ui->data_len = 0; -- iput(whiteout); -- goto out_release; -- } -- -- inc_nlink(whiteout); -- mark_inode_dirty(whiteout); -- -- spin_lock(&whiteout->i_lock); -- whiteout->i_state &= ~I_LINKABLE; -- spin_unlock(&whiteout->i_lock); -- -- iput(whiteout); -+ /* -+ * S_SYNC flag of whiteout inherits from the old_dir, and we -+ * have already checked the old dir inode. So there is no need -+ * to check whiteout. -+ */ - } - - err = ubifs_jnl_rename(c, old_dir, old_inode, &old_nm, new_dir, -@@ -1449,6 +1491,11 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, - unlock_4_inodes(old_dir, new_dir, new_inode, whiteout); - ubifs_release_budget(c, &req); - -+ if (whiteout) { -+ ubifs_release_budget(c, &wht_req); -+ iput(whiteout); -+ } ++ sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); ++ sr->len = READ_ONCE(sqe->len); ++ sr->bgid = READ_ONCE(sqe->buf_group); ++ sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; ++ if (sr->msg_flags & MSG_DONTWAIT) ++ req->flags |= REQ_F_NOWAIT; + - mutex_lock(&old_inode_ui->ui_mutex); - release = old_inode_ui->dirty; - mark_inode_dirty_sync(old_inode); -@@ -1457,11 +1504,16 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, - if (release) - ubifs_release_budget(c, &ino_req); - if (IS_SYNC(old_inode)) -- err = old_inode->i_sb->s_op->write_inode(old_inode, NULL); -+ /* -+ * Rename finished here. Although old inode cannot be updated -+ * on flash, old ctime is not a big problem, don't return err -+ * code to userspace. -+ */ -+ old_inode->i_sb->s_op->write_inode(old_inode, NULL); - - fscrypt_free_filename(&old_nm); - fscrypt_free_filename(&new_nm); -- return err; ++#ifdef CONFIG_COMPAT ++ if (req->ctx->compat) ++ sr->msg_flags |= MSG_CMSG_COMPAT; ++#endif ++ sr->done_io = 0; + return 0; - - out_cancel: - if (unlink) { -@@ -1482,11 +1534,11 @@ out_cancel: - inc_nlink(old_dir); - } - } -+ unlock_4_inodes(old_dir, new_dir, new_inode, whiteout); - if (whiteout) { -- drop_nlink(whiteout); -+ ubifs_release_budget(c, &wht_req); - iput(whiteout); - } -- unlock_4_inodes(old_dir, new_dir, new_inode, whiteout); - out_release: - ubifs_release_budget(c, &ino_req); - ubifs_release_budget(c, &req); -diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c -index 5cfa28cd00cdc..6b45a037a0471 100644 ---- a/fs/ubifs/file.c -+++ b/fs/ubifs/file.c -@@ -570,7 +570,7 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping, - } - - if (!PagePrivate(page)) { -- SetPagePrivate(page); -+ attach_page_private(page, (void *)1); - atomic_long_inc(&c->dirty_pg_cnt); - __set_page_dirty_nobuffers(page); - } -@@ -947,7 +947,7 @@ static int do_writepage(struct page *page, int len) - release_existing_page_budget(c); - - atomic_long_dec(&c->dirty_pg_cnt); -- ClearPagePrivate(page); -+ detach_page_private(page); - ClearPageChecked(page); - - kunmap(page); -@@ -1304,7 +1304,7 @@ static void ubifs_invalidatepage(struct page *page, unsigned int offset, - release_existing_page_budget(c); - - atomic_long_dec(&c->dirty_pg_cnt); -- ClearPagePrivate(page); -+ detach_page_private(page); - ClearPageChecked(page); - } - -@@ -1471,8 +1471,8 @@ static int ubifs_migrate_page(struct address_space *mapping, - return rc; - - if (PagePrivate(page)) { -- ClearPagePrivate(page); -- SetPagePrivate(newpage); -+ detach_page_private(page); -+ attach_page_private(newpage, (void *)1); - } - - if (mode != MIGRATE_SYNC_NO_COPY) -@@ -1496,7 +1496,7 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags) - return 0; - ubifs_assert(c, PagePrivate(page)); - ubifs_assert(c, 0); -- ClearPagePrivate(page); -+ detach_page_private(page); - ClearPageChecked(page); - return 1; - } -@@ -1567,7 +1567,7 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf) - else { - if (!PageChecked(page)) - ubifs_convert_page_budget(c); -- SetPagePrivate(page); -+ attach_page_private(page, (void *)1); - atomic_long_inc(&c->dirty_pg_cnt); - __set_page_dirty_nobuffers(page); - } -diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c -index 00b61dba62b70..b019dd6f7fa06 100644 ---- a/fs/ubifs/io.c -+++ b/fs/ubifs/io.c -@@ -833,16 +833,42 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) - */ - n = aligned_len >> c->max_write_shift; - if (n) { -- n <<= c->max_write_shift; -+ int m = n - 1; ++} + - dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, - wbuf->offs); -- err = ubifs_leb_write(c, wbuf->lnum, buf + written, -- wbuf->offs, n); ++static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) ++{ ++ struct io_async_msghdr iomsg, *kmsg; ++ struct io_sr_msg *sr = &req->sr_msg; ++ struct socket *sock; ++ struct io_buffer *kbuf; ++ unsigned flags; ++ int min_ret = 0; ++ int ret, cflags = 0; ++ bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; + -+ if (m) { -+ /* '(n-1)<<c->max_write_shift < len' is always true. */ -+ m <<= c->max_write_shift; -+ err = ubifs_leb_write(c, wbuf->lnum, buf + written, -+ wbuf->offs, m); -+ if (err) -+ goto out; -+ wbuf->offs += m; -+ aligned_len -= m; -+ len -= m; -+ written += m; -+ } ++ sock = sock_from_file(req->file); ++ if (unlikely(!sock)) ++ return -ENOTSOCK; + -+ /* -+ * The non-written len of buf may be less than 'n' because -+ * parameter 'len' is not 8 bytes aligned, so here we read -+ * min(len, n) bytes from buf. -+ */ -+ n = 1 << c->max_write_shift; -+ memcpy(wbuf->buf, buf + written, min(len, n)); -+ if (n > len) { -+ ubifs_assert(c, n - len < 8); -+ ubifs_pad(c, wbuf->buf + len, n - len); ++ kmsg = req->async_data; ++ if (!kmsg) { ++ ret = io_recvmsg_copy_hdr(req, &iomsg); ++ if (ret) ++ return ret; ++ kmsg = &iomsg; ++ } ++ ++ if (req->flags & REQ_F_BUFFER_SELECT) { ++ kbuf = io_recv_buffer_select(req, !force_nonblock); ++ if (IS_ERR(kbuf)) ++ return PTR_ERR(kbuf); ++ kmsg->fast_iov[0].iov_base = u64_to_user_ptr(kbuf->addr); ++ kmsg->fast_iov[0].iov_len = req->sr_msg.len; ++ iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->fast_iov, ++ 1, req->sr_msg.len); ++ } ++ ++ flags = req->sr_msg.msg_flags; ++ if (force_nonblock) ++ flags |= MSG_DONTWAIT; ++ if (flags & MSG_WAITALL) ++ min_ret = iov_iter_count(&kmsg->msg.msg_iter); ++ ++ ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.umsg, ++ kmsg->uaddr, flags); ++ if (ret < min_ret) { ++ if (ret == -EAGAIN && force_nonblock) ++ return io_setup_async_msg(req, kmsg); ++ if (ret == -ERESTARTSYS) ++ ret = -EINTR; ++ if (ret > 0 && io_net_retry(sock, flags)) { ++ sr->done_io += ret; ++ req->flags |= REQ_F_PARTIAL_IO; ++ return io_setup_async_msg(req, kmsg); + } ++ req_set_fail(req); ++ } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { ++ req_set_fail(req); ++ } + -+ err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, n); - if (err) - goto out; - wbuf->offs += n; - aligned_len -= n; -- len -= n; -+ len -= min(len, n); - written += n; - } - -diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c -index c6a8634877803..71bcebe45f9c5 100644 ---- a/fs/ubifs/ioctl.c -+++ b/fs/ubifs/ioctl.c -@@ -108,7 +108,7 @@ static int setflags(struct inode *inode, int flags) - struct ubifs_inode *ui = ubifs_inode(inode); - struct ubifs_info *c = inode->i_sb->s_fs_info; - struct ubifs_budget_req req = { .dirtied_ino = 1, -- .dirtied_ino_d = ui->data_len }; -+ .dirtied_ino_d = ALIGN(ui->data_len, 8) }; - - err = ubifs_budget_space(c, &req); - if (err) -diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c -index 8ea680dba61e3..75dab0ae3939d 100644 ---- a/fs/ubifs/journal.c -+++ b/fs/ubifs/journal.c -@@ -1207,9 +1207,9 @@ out_free: - * @sync: non-zero if the write-buffer has to be synchronized - * - * This function implements the re-name operation which may involve writing up -- * to 4 inodes and 2 directory entries. It marks the written inodes as clean -- * and returns zero on success. In case of failure, a negative error code is -- * returned. -+ * to 4 inodes(new inode, whiteout inode, old and new parent directory inodes) -+ * and 2 directory entries. It marks the written inodes as clean and returns -+ * zero on success. In case of failure, a negative error code is returned. - */ - int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, - const struct inode *old_inode, -@@ -1222,14 +1222,15 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, - void *p; - union ubifs_key key; - struct ubifs_dent_node *dent, *dent2; -- int err, dlen1, dlen2, ilen, lnum, offs, len, orphan_added = 0; -+ int err, dlen1, dlen2, ilen, wlen, lnum, offs, len, orphan_added = 0; - int aligned_dlen1, aligned_dlen2, plen = UBIFS_INO_NODE_SZ; - int last_reference = !!(new_inode && new_inode->i_nlink == 0); - int move = (old_dir != new_dir); -- struct ubifs_inode *new_ui; -+ struct ubifs_inode *new_ui, *whiteout_ui; - u8 hash_old_dir[UBIFS_HASH_ARR_SZ]; - u8 hash_new_dir[UBIFS_HASH_ARR_SZ]; - u8 hash_new_inode[UBIFS_HASH_ARR_SZ]; -+ u8 hash_whiteout_inode[UBIFS_HASH_ARR_SZ]; - u8 hash_dent1[UBIFS_HASH_ARR_SZ]; - u8 hash_dent2[UBIFS_HASH_ARR_SZ]; - -@@ -1249,9 +1250,20 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, - } else - ilen = 0; - -+ if (whiteout) { -+ whiteout_ui = ubifs_inode(whiteout); -+ ubifs_assert(c, mutex_is_locked(&whiteout_ui->ui_mutex)); -+ ubifs_assert(c, whiteout->i_nlink == 1); -+ ubifs_assert(c, !whiteout_ui->dirty); -+ wlen = UBIFS_INO_NODE_SZ; -+ wlen += whiteout_ui->data_len; -+ } else -+ wlen = 0; ++ if (req->flags & REQ_F_BUFFER_SELECTED) ++ cflags = io_put_recv_kbuf(req); ++ /* fast path, check for non-NULL to avoid function call */ ++ if (kmsg->free_iov) ++ kfree(kmsg->free_iov); ++ req->flags &= ~REQ_F_NEED_CLEANUP; ++ if (ret >= 0) ++ ret += sr->done_io; ++ else if (sr->done_io) ++ ret = sr->done_io; ++ __io_req_complete(req, issue_flags, ret, cflags); ++ return 0; ++} + - aligned_dlen1 = ALIGN(dlen1, 8); - aligned_dlen2 = ALIGN(dlen2, 8); -- len = aligned_dlen1 + aligned_dlen2 + ALIGN(ilen, 8) + ALIGN(plen, 8); -+ len = aligned_dlen1 + aligned_dlen2 + ALIGN(ilen, 8) + -+ ALIGN(wlen, 8) + ALIGN(plen, 8); - if (move) - len += plen; - -@@ -1313,6 +1325,15 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, - p += ALIGN(ilen, 8); - } - -+ if (whiteout) { -+ pack_inode(c, p, whiteout, 0); -+ err = ubifs_node_calc_hash(c, p, hash_whiteout_inode); -+ if (err) -+ goto out_release; ++static int io_recv(struct io_kiocb *req, unsigned int issue_flags) ++{ ++ struct io_buffer *kbuf; ++ struct io_sr_msg *sr = &req->sr_msg; ++ struct msghdr msg; ++ void __user *buf = sr->buf; ++ struct socket *sock; ++ struct iovec iov; ++ unsigned flags; ++ int min_ret = 0; ++ int ret, cflags = 0; ++ bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; + -+ p += ALIGN(wlen, 8); ++ sock = sock_from_file(req->file); ++ if (unlikely(!sock)) ++ return -ENOTSOCK; ++ ++ if (req->flags & REQ_F_BUFFER_SELECT) { ++ kbuf = io_recv_buffer_select(req, !force_nonblock); ++ if (IS_ERR(kbuf)) ++ return PTR_ERR(kbuf); ++ buf = u64_to_user_ptr(kbuf->addr); + } + - if (!move) { - pack_inode(c, p, old_dir, 1); - err = ubifs_node_calc_hash(c, p, hash_old_dir); -@@ -1352,6 +1373,9 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, - if (new_inode) - ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, - new_inode->i_ino); -+ if (whiteout) -+ ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, -+ whiteout->i_ino); - } - release_head(c, BASEHD); - -@@ -1368,8 +1392,6 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, - err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen2, hash_dent2, old_nm); - if (err) - goto out_ro; -- -- ubifs_delete_orphan(c, whiteout->i_ino); - } else { - err = ubifs_add_dirt(c, lnum, dlen2); - if (err) -@@ -1390,6 +1412,15 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, - offs += ALIGN(ilen, 8); - } - -+ if (whiteout) { -+ ino_key_init(c, &key, whiteout->i_ino); -+ err = ubifs_tnc_add(c, &key, lnum, offs, wlen, -+ hash_whiteout_inode); -+ if (err) -+ goto out_ro; -+ offs += ALIGN(wlen, 8); ++ ret = import_single_range(READ, buf, sr->len, &iov, &msg.msg_iter); ++ if (unlikely(ret)) ++ goto out_free; ++ ++ msg.msg_name = NULL; ++ msg.msg_control = NULL; ++ msg.msg_controllen = 0; ++ msg.msg_namelen = 0; ++ msg.msg_iocb = NULL; ++ msg.msg_flags = 0; ++ ++ flags = req->sr_msg.msg_flags; ++ if (force_nonblock) ++ flags |= MSG_DONTWAIT; ++ if (flags & MSG_WAITALL) ++ min_ret = iov_iter_count(&msg.msg_iter); ++ ++ ret = sock_recvmsg(sock, &msg, flags); ++ if (ret < min_ret) { ++ if (ret == -EAGAIN && force_nonblock) ++ return -EAGAIN; ++ if (ret == -ERESTARTSYS) ++ ret = -EINTR; ++ if (ret > 0 && io_net_retry(sock, flags)) { ++ sr->len -= ret; ++ sr->buf += ret; ++ sr->done_io += ret; ++ req->flags |= REQ_F_PARTIAL_IO; ++ return -EAGAIN; ++ } ++ req_set_fail(req); ++ } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { ++out_free: ++ req_set_fail(req); + } ++ if (req->flags & REQ_F_BUFFER_SELECTED) ++ cflags = io_put_recv_kbuf(req); ++ if (ret >= 0) ++ ret += sr->done_io; ++ else if (sr->done_io) ++ ret = sr->done_io; ++ __io_req_complete(req, issue_flags, ret, cflags); ++ return 0; ++} + - ino_key_init(c, &key, old_dir->i_ino); - err = ubifs_tnc_add(c, &key, lnum, offs, plen, hash_old_dir); - if (err) -@@ -1410,6 +1441,11 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, - new_ui->synced_i_size = new_ui->ui_size; - spin_unlock(&new_ui->ui_lock); - } -+ /* -+ * No need to mark whiteout inode clean. -+ * Whiteout doesn't have non-zero size, no need to update -+ * synced_i_size for whiteout_ui. -+ */ - mark_inode_clean(c, ubifs_inode(old_dir)); - if (move) - mark_inode_clean(c, ubifs_inode(new_dir)); -diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c -index f0fb25727d961..eb05038b71911 100644 ---- a/fs/ubifs/super.c -+++ b/fs/ubifs/super.c -@@ -1853,7 +1853,6 @@ out: - kthread_stop(c->bgt); - c->bgt = NULL; - } -- free_wbufs(c); - kfree(c->write_reserve_buf); - c->write_reserve_buf = NULL; - vfree(c->ileb_buf); -diff --git a/fs/udf/dir.c b/fs/udf/dir.c -index 70abdfad2df17..42e3e551fa4c3 100644 ---- a/fs/udf/dir.c -+++ b/fs/udf/dir.c -@@ -31,6 +31,7 @@ - #include <linux/mm.h> - #include <linux/slab.h> - #include <linux/bio.h> -+#include <linux/iversion.h> - - #include "udf_i.h" - #include "udf_sb.h" -@@ -43,7 +44,7 @@ static int udf_readdir(struct file *file, struct dir_context *ctx) - struct fileIdentDesc *fi = NULL; - struct fileIdentDesc cfi; - udf_pblk_t block, iblock; -- loff_t nf_pos; -+ loff_t nf_pos, emit_pos = 0; - int flen; - unsigned char *fname = NULL, *copy_name = NULL; - unsigned char *nameptr; -@@ -57,6 +58,7 @@ static int udf_readdir(struct file *file, struct dir_context *ctx) - int i, num, ret = 0; - struct extent_position epos = { NULL, 0, {0, 0} }; - struct super_block *sb = dir->i_sb; -+ bool pos_valid = false; - - if (ctx->pos == 0) { - if (!dir_emit_dot(file, ctx)) -@@ -67,6 +69,21 @@ static int udf_readdir(struct file *file, struct dir_context *ctx) - if (nf_pos >= size) - goto out; - -+ /* -+ * Something changed since last readdir (either lseek was called or dir -+ * changed)? We need to verify the position correctly points at the -+ * beginning of some dir entry so that the directory parsing code does -+ * not get confused. Since UDF does not have any reliable way of -+ * identifying beginning of dir entry (names are under user control), -+ * we need to scan the directory from the beginning. -+ */ -+ if (!inode_eq_iversion(dir, file->f_version)) { -+ emit_pos = nf_pos; -+ nf_pos = 0; ++static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) ++{ ++ struct io_accept *accept = &req->accept; ++ ++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) ++ return -EINVAL; ++ if (sqe->ioprio || sqe->len || sqe->buf_index) ++ return -EINVAL; ++ ++ accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); ++ accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); ++ accept->flags = READ_ONCE(sqe->accept_flags); ++ accept->nofile = rlimit(RLIMIT_NOFILE); ++ ++ accept->file_slot = READ_ONCE(sqe->file_index); ++ if (accept->file_slot && (accept->flags & SOCK_CLOEXEC)) ++ return -EINVAL; ++ if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) ++ return -EINVAL; ++ if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK)) ++ accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK; ++ return 0; ++} ++ ++static int io_accept(struct io_kiocb *req, unsigned int issue_flags) ++{ ++ struct io_accept *accept = &req->accept; ++ bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; ++ unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0; ++ bool fixed = !!accept->file_slot; ++ struct file *file; ++ int ret, fd; ++ ++ if (!fixed) { ++ fd = __get_unused_fd_flags(accept->flags, accept->nofile); ++ if (unlikely(fd < 0)) ++ return fd; ++ } ++ file = do_accept(req->file, file_flags, accept->addr, accept->addr_len, ++ accept->flags); ++ if (IS_ERR(file)) { ++ if (!fixed) ++ put_unused_fd(fd); ++ ret = PTR_ERR(file); ++ /* safe to retry */ ++ req->flags |= REQ_F_PARTIAL_IO; ++ if (ret == -EAGAIN && force_nonblock) ++ return -EAGAIN; ++ if (ret == -ERESTARTSYS) ++ ret = -EINTR; ++ req_set_fail(req); ++ } else if (!fixed) { ++ fd_install(fd, file); ++ ret = fd; + } else { -+ pos_valid = true; ++ ret = io_install_fixed_file(req, file, issue_flags, ++ accept->file_slot - 1); + } ++ __io_req_complete(req, issue_flags, ret, 0); ++ return 0; ++} + - fname = kmalloc(UDF_NAME_LEN, GFP_NOFS); - if (!fname) { - ret = -ENOMEM; -@@ -122,13 +139,21 @@ static int udf_readdir(struct file *file, struct dir_context *ctx) - - while (nf_pos < size) { - struct kernel_lb_addr tloc; -+ loff_t cur_pos = nf_pos; - -- ctx->pos = (nf_pos >> 2) + 1; -+ /* Update file position only if we got past the current one */ -+ if (nf_pos >= emit_pos) { -+ ctx->pos = (nf_pos >> 2) + 1; -+ pos_valid = true; -+ } - - fi = udf_fileident_read(dir, &nf_pos, &fibh, &cfi, &epos, &eloc, - &elen, &offset); - if (!fi) - goto out; -+ /* Still not at offset where user asked us to read from? */ -+ if (cur_pos < emit_pos) -+ continue; - - liu = le16_to_cpu(cfi.lengthOfImpUse); - lfi = cfi.lengthFileIdent; -@@ -186,8 +211,11 @@ static int udf_readdir(struct file *file, struct dir_context *ctx) - } /* end while */ - - ctx->pos = (nf_pos >> 2) + 1; -+ pos_valid = true; - - out: -+ if (pos_valid) -+ file->f_version = inode_query_iversion(dir); - if (fibh.sbh != fibh.ebh) - brelse(fibh.ebh); - brelse(fibh.sbh); -diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c -index 2ecf0e87660e3..b5d611cee749c 100644 ---- a/fs/udf/ialloc.c -+++ b/fs/udf/ialloc.c -@@ -77,6 +77,7 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode) - GFP_KERNEL); - } - if (!iinfo->i_data) { -+ make_bad_inode(inode); - iput(inode); - return ERR_PTR(-ENOMEM); - } -@@ -86,6 +87,7 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode) - dinfo->i_location.partitionReferenceNum, - start, &err); - if (err) { -+ make_bad_inode(inode); - iput(inode); - return ERR_PTR(err); - } -diff --git a/fs/udf/inode.c b/fs/udf/inode.c -index 1d6b7a50736ba..ea8f6cd01f501 100644 ---- a/fs/udf/inode.c -+++ b/fs/udf/inode.c -@@ -258,10 +258,6 @@ int udf_expand_file_adinicb(struct inode *inode) - char *kaddr; - struct udf_inode_info *iinfo = UDF_I(inode); - int err; -- struct writeback_control udf_wbc = { -- .sync_mode = WB_SYNC_NONE, -- .nr_to_write = 1, -- }; - - WARN_ON_ONCE(!inode_is_locked(inode)); - if (!iinfo->i_lenAlloc) { -@@ -305,8 +301,10 @@ int udf_expand_file_adinicb(struct inode *inode) - iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; - /* from now on we have normal address_space methods */ - inode->i_data.a_ops = &udf_aops; -+ set_page_dirty(page); -+ unlock_page(page); - up_write(&iinfo->i_data_sem); -- err = inode->i_data.a_ops->writepage(page, &udf_wbc); -+ err = filemap_fdatawrite(inode->i_mapping); - if (err) { - /* Restore everything back so that we don't lose data... */ - lock_page(page); -@@ -317,6 +315,7 @@ int udf_expand_file_adinicb(struct inode *inode) - unlock_page(page); - iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; - inode->i_data.a_ops = &udf_adinicb_aops; -+ iinfo->i_lenAlloc = inode->i_size; - up_write(&iinfo->i_data_sem); - } - put_page(page); -diff --git a/fs/udf/namei.c b/fs/udf/namei.c -index caeef08efed23..b3d5f97f16cdb 100644 ---- a/fs/udf/namei.c -+++ b/fs/udf/namei.c -@@ -30,6 +30,7 @@ - #include <linux/sched.h> - #include <linux/crc-itu-t.h> - #include <linux/exportfs.h> -+#include <linux/iversion.h> - - static inline int udf_match(int len1, const unsigned char *name1, int len2, - const unsigned char *name2) -@@ -74,11 +75,11 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi, - - if (fileident) { - if (adinicb || (offset + lfi < 0)) { -- memcpy(udf_get_fi_ident(sfi), fileident, lfi); -+ memcpy(sfi->impUse + liu, fileident, lfi); - } else if (offset >= 0) { - memcpy(fibh->ebh->b_data + offset, fileident, lfi); - } else { -- memcpy(udf_get_fi_ident(sfi), fileident, -offset); -+ memcpy(sfi->impUse + liu, fileident, -offset); - memcpy(fibh->ebh->b_data, fileident - offset, - lfi + offset); - } -@@ -87,11 +88,11 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi, - offset += lfi; - - if (adinicb || (offset + padlen < 0)) { -- memset(udf_get_fi_ident(sfi) + lfi, 0x00, padlen); -+ memset(sfi->impUse + liu + lfi, 0x00, padlen); - } else if (offset >= 0) { - memset(fibh->ebh->b_data + offset, 0x00, padlen); - } else { -- memset(udf_get_fi_ident(sfi) + lfi, 0x00, -offset); -+ memset(sfi->impUse + liu + lfi, 0x00, -offset); - memset(fibh->ebh->b_data, 0x00, padlen + offset); - } - -@@ -134,6 +135,8 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi, - mark_buffer_dirty_inode(fibh->ebh, inode); - mark_buffer_dirty_inode(fibh->sbh, inode); - } -+ inode_inc_iversion(inode); ++static int io_connect_prep_async(struct io_kiocb *req) ++{ ++ struct io_async_connect *io = req->async_data; ++ struct io_connect *conn = &req->connect; + - return 0; - } - -diff --git a/fs/udf/super.c b/fs/udf/super.c -index b2d7c57d06881..aa2f6093d3f6f 100644 ---- a/fs/udf/super.c -+++ b/fs/udf/super.c -@@ -57,6 +57,7 @@ - #include <linux/crc-itu-t.h> - #include <linux/log2.h> - #include <asm/byteorder.h> -+#include <linux/iversion.h> - - #include "udf_sb.h" - #include "udf_i.h" -@@ -149,6 +150,7 @@ static struct inode *udf_alloc_inode(struct super_block *sb) - init_rwsem(&ei->i_data_sem); - ei->cached_extent.lstart = -1; - spin_lock_init(&ei->i_extent_cache_lock); -+ inode_set_iversion(&ei->vfs_inode, 1); - - return &ei->vfs_inode; - } -diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c -index 22bf14ab2d163..b56e8e31d967f 100644 ---- a/fs/userfaultfd.c -+++ b/fs/userfaultfd.c -@@ -982,7 +982,7 @@ static int resolve_userfault_fork(struct userfaultfd_ctx *new, - int fd; - - fd = anon_inode_getfd_secure("[userfaultfd]", &userfaultfd_fops, new, -- O_RDWR | (new->flags & UFFD_SHARED_FCNTL_FLAGS), inode); -+ O_RDONLY | (new->flags & UFFD_SHARED_FCNTL_FLAGS), inode); - if (fd < 0) - return fd; - -@@ -2097,7 +2097,7 @@ SYSCALL_DEFINE1(userfaultfd, int, flags) - mmgrab(ctx->mm); - - fd = anon_inode_getfd_secure("[userfaultfd]", &userfaultfd_fops, ctx, -- O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS), NULL); -+ O_RDONLY | (flags & UFFD_SHARED_FCNTL_FLAGS), NULL); - if (fd < 0) { - mmdrop(ctx->mm); - kmem_cache_free(userfaultfd_ctx_cachep, ctx); -diff --git a/fs/xattr.c b/fs/xattr.c -index 5c8c5175b385c..7117cb2538640 100644 ---- a/fs/xattr.c -+++ b/fs/xattr.c -@@ -25,6 +25,8 @@ - - #include <linux/uaccess.h> - -+#include "internal.h" ++ return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address); ++} ++ ++static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) ++{ ++ struct io_connect *conn = &req->connect; ++ ++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) ++ return -EINVAL; ++ if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags || ++ sqe->splice_fd_in) ++ return -EINVAL; ++ ++ conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); ++ conn->addr_len = READ_ONCE(sqe->addr2); ++ return 0; ++} ++ ++static int io_connect(struct io_kiocb *req, unsigned int issue_flags) ++{ ++ struct io_async_connect __io, *io; ++ unsigned file_flags; ++ int ret; ++ bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; ++ ++ if (req->async_data) { ++ io = req->async_data; ++ } else { ++ ret = move_addr_to_kernel(req->connect.addr, ++ req->connect.addr_len, ++ &__io.address); ++ if (ret) ++ goto out; ++ io = &__io; ++ } ++ ++ file_flags = force_nonblock ? O_NONBLOCK : 0; ++ ++ ret = __sys_connect_file(req->file, &io->address, ++ req->connect.addr_len, file_flags); ++ if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) { ++ if (req->async_data) ++ return -EAGAIN; ++ if (io_alloc_async_data(req)) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ memcpy(req->async_data, &__io, sizeof(__io)); ++ return -EAGAIN; ++ } ++ if (ret == -ERESTARTSYS) ++ ret = -EINTR; ++out: ++ if (ret < 0) ++ req_set_fail(req); ++ __io_req_complete(req, issue_flags, ret, 0); ++ return 0; ++} ++#else /* !CONFIG_NET */ ++#define IO_NETOP_FN(op) \ ++static int io_##op(struct io_kiocb *req, unsigned int issue_flags) \ ++{ \ ++ return -EOPNOTSUPP; \ ++} ++ ++#define IO_NETOP_PREP(op) \ ++IO_NETOP_FN(op) \ ++static int io_##op##_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) \ ++{ \ ++ return -EOPNOTSUPP; \ ++} \ ++ ++#define IO_NETOP_PREP_ASYNC(op) \ ++IO_NETOP_PREP(op) \ ++static int io_##op##_prep_async(struct io_kiocb *req) \ ++{ \ ++ return -EOPNOTSUPP; \ ++} ++ ++IO_NETOP_PREP_ASYNC(sendmsg); ++IO_NETOP_PREP_ASYNC(recvmsg); ++IO_NETOP_PREP_ASYNC(connect); ++IO_NETOP_PREP(accept); ++IO_NETOP_FN(send); ++IO_NETOP_FN(recv); ++#endif /* CONFIG_NET */ ++ ++struct io_poll_table { ++ struct poll_table_struct pt; ++ struct io_kiocb *req; ++ int nr_entries; ++ int error; ++}; + - static const char * - strcmp_prefix(const char *a, const char *a_prefix) - { -@@ -539,43 +541,76 @@ EXPORT_SYMBOL_GPL(vfs_removexattr); - /* - * Extended attribute SET operations - */ --static long --setxattr(struct user_namespace *mnt_userns, struct dentry *d, -- const char __user *name, const void __user *value, size_t size, -- int flags) ++#define IO_POLL_CANCEL_FLAG BIT(31) ++#define IO_POLL_RETRY_FLAG BIT(30) ++#define IO_POLL_REF_MASK GENMASK(29, 0) + -+int setxattr_copy(const char __user *name, struct xattr_ctx *ctx) - { - int error; -- void *kvalue = NULL; -- char kname[XATTR_NAME_MAX + 1]; - -- if (flags & ~(XATTR_CREATE|XATTR_REPLACE)) -+ if (ctx->flags & ~(XATTR_CREATE|XATTR_REPLACE)) - return -EINVAL; - -- error = strncpy_from_user(kname, name, sizeof(kname)); -- if (error == 0 || error == sizeof(kname)) -- error = -ERANGE; -+ error = strncpy_from_user(ctx->kname->name, name, -+ sizeof(ctx->kname->name)); -+ if (error == 0 || error == sizeof(ctx->kname->name)) -+ return -ERANGE; - if (error < 0) - return error; - -- if (size) { -- if (size > XATTR_SIZE_MAX) -+ error = 0; -+ if (ctx->size) { -+ if (ctx->size > XATTR_SIZE_MAX) - return -E2BIG; -- kvalue = kvmalloc(size, GFP_KERNEL); -- if (!kvalue) -- return -ENOMEM; -- if (copy_from_user(kvalue, value, size)) { -- error = -EFAULT; -- goto out; ++/* ++ * We usually have 1-2 refs taken, 128 is more than enough and we want to ++ * maximise the margin between this amount and the moment when it overflows. ++ */ ++#define IO_POLL_REF_BIAS 128 + -+ ctx->kvalue = vmemdup_user(ctx->cvalue, ctx->size); -+ if (IS_ERR(ctx->kvalue)) { -+ error = PTR_ERR(ctx->kvalue); -+ ctx->kvalue = NULL; - } -- if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || -- (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)) -- posix_acl_fix_xattr_from_user(mnt_userns, kvalue, size); - } - -- error = vfs_setxattr(mnt_userns, d, kname, kvalue, size, flags); --out: -- kvfree(kvalue); -+ return error; ++static bool io_poll_get_ownership_slowpath(struct io_kiocb *req) ++{ ++ int v; ++ ++ /* ++ * poll_refs are already elevated and we don't have much hope for ++ * grabbing the ownership. Instead of incrementing set a retry flag ++ * to notify the loop that there might have been some change. ++ */ ++ v = atomic_fetch_or(IO_POLL_RETRY_FLAG, &req->poll_refs); ++ if (v & IO_POLL_REF_MASK) ++ return false; ++ return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); +} + -+static void setxattr_convert(struct user_namespace *mnt_userns, -+ struct dentry *d, struct xattr_ctx *ctx) ++/* ++ * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can ++ * bump it and acquire ownership. It's disallowed to modify requests while not ++ * owning it, that prevents from races for enqueueing task_work's and b/w ++ * arming poll and wakeups. ++ */ ++static inline bool io_poll_get_ownership(struct io_kiocb *req) +{ -+ if (ctx->size && -+ ((strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || -+ (strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))) -+ posix_acl_fix_xattr_from_user(mnt_userns, d_inode(d), -+ ctx->kvalue, ctx->size); ++ if (unlikely(atomic_read(&req->poll_refs) >= IO_POLL_REF_BIAS)) ++ return io_poll_get_ownership_slowpath(req); ++ return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); +} + -+int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry, -+ struct xattr_ctx *ctx) ++static void io_poll_mark_cancelled(struct io_kiocb *req) +{ -+ setxattr_convert(mnt_userns, dentry, ctx); -+ return vfs_setxattr(mnt_userns, dentry, ctx->kname->name, -+ ctx->kvalue, ctx->size, ctx->flags); ++ atomic_or(IO_POLL_CANCEL_FLAG, &req->poll_refs); +} + -+static long -+setxattr(struct user_namespace *mnt_userns, struct dentry *d, -+ const char __user *name, const void __user *value, size_t size, -+ int flags) ++static struct io_poll_iocb *io_poll_get_double(struct io_kiocb *req) +{ -+ struct xattr_name kname; -+ struct xattr_ctx ctx = { -+ .cvalue = value, -+ .kvalue = NULL, -+ .size = size, -+ .kname = &kname, -+ .flags = flags, -+ }; -+ int error; ++ /* pure poll stashes this in ->async_data, poll driven retry elsewhere */ ++ if (req->opcode == IORING_OP_POLL_ADD) ++ return req->async_data; ++ return req->apoll->double_poll; ++} + -+ error = setxattr_copy(name, &ctx); -+ if (error) -+ return error; ++static struct io_poll_iocb *io_poll_get_single(struct io_kiocb *req) ++{ ++ if (req->opcode == IORING_OP_POLL_ADD) ++ return &req->poll; ++ return &req->apoll->poll; ++} + -+ error = do_setxattr(mnt_userns, d, &ctx); - -+ kvfree(ctx.kvalue); - return error; - } - -@@ -667,7 +702,8 @@ getxattr(struct user_namespace *mnt_userns, struct dentry *d, - if (error > 0) { - if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || - (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)) -- posix_acl_fix_xattr_to_user(mnt_userns, kvalue, error); -+ posix_acl_fix_xattr_to_user(mnt_userns, d_inode(d), -+ kvalue, error); - if (size && copy_to_user(value, kvalue, error)) - error = -EFAULT; - } else if (error == -ERANGE && size >= XATTR_SIZE_MAX) { -diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h -index 4c6f9045baca0..3f597cad2c333 100644 ---- a/fs/xfs/libxfs/xfs_ag.h -+++ b/fs/xfs/libxfs/xfs_ag.h -@@ -116,23 +116,29 @@ void xfs_perag_put(struct xfs_perag *pag); - - /* - * Perag iteration APIs -- * -- * XXX: for_each_perag_range() usage really needs an iterator to clean up when -- * we terminate at end_agno because we may have taken a reference to the perag -- * beyond end_agno. Right now callers have to be careful to catch and clean that -- * up themselves. This is not necessary for the callers of for_each_perag() and -- * for_each_perag_from() because they terminate at sb_agcount where there are -- * no perag structures in tree beyond end_agno. - */ --#define for_each_perag_range(mp, next_agno, end_agno, pag) \ -- for ((pag) = xfs_perag_get((mp), (next_agno)); \ -- (pag) != NULL && (next_agno) <= (end_agno); \ -- (next_agno) = (pag)->pag_agno + 1, \ -- xfs_perag_put(pag), \ -- (pag) = xfs_perag_get((mp), (next_agno))) -+static inline struct xfs_perag * -+xfs_perag_next( -+ struct xfs_perag *pag, -+ xfs_agnumber_t *agno, -+ xfs_agnumber_t end_agno) ++static void io_poll_req_insert(struct io_kiocb *req) +{ -+ struct xfs_mount *mp = pag->pag_mount; ++ struct io_ring_ctx *ctx = req->ctx; ++ struct hlist_head *list; + -+ *agno = pag->pag_agno + 1; -+ xfs_perag_put(pag); -+ if (*agno > end_agno) -+ return NULL; -+ return xfs_perag_get(mp, *agno); ++ list = &ctx->cancel_hash[hash_long(req->user_data, ctx->cancel_hash_bits)]; ++ hlist_add_head(&req->hash_node, list); +} + -+#define for_each_perag_range(mp, agno, end_agno, pag) \ -+ for ((pag) = xfs_perag_get((mp), (agno)); \ -+ (pag) != NULL; \ -+ (pag) = xfs_perag_next((pag), &(agno), (end_agno))) - --#define for_each_perag_from(mp, next_agno, pag) \ -- for_each_perag_range((mp), (next_agno), (mp)->m_sb.sb_agcount, (pag)) -+#define for_each_perag_from(mp, agno, pag) \ -+ for_each_perag_range((mp), (agno), (mp)->m_sb.sb_agcount - 1, (pag)) - - - #define for_each_perag(mp, agno, pag) \ -diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c -index fbc9d816882ce..23523b802539e 100644 ---- a/fs/xfs/libxfs/xfs_attr.c -+++ b/fs/xfs/libxfs/xfs_attr.c -@@ -1077,21 +1077,18 @@ xfs_attr_node_hasname( - - state = xfs_da_state_alloc(args); - if (statep != NULL) -- *statep = NULL; -+ *statep = state; - - /* - * Search to see if name exists, and get back a pointer to it. - */ - error = xfs_da3_node_lookup_int(state, &retval); -- if (error) { -- xfs_da_state_free(state); -- return error; -- } -+ if (error) -+ retval = error; - -- if (statep != NULL) -- *statep = state; -- else -+ if (!statep) - xfs_da_state_free(state); ++static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events, ++ wait_queue_func_t wake_func) ++{ ++ poll->head = NULL; ++#define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP) ++ /* mask in events that we always want/need */ ++ poll->events = events | IO_POLL_UNMASK; ++ INIT_LIST_HEAD(&poll->wait.entry); ++ init_waitqueue_func_entry(&poll->wait, wake_func); ++} ++ ++static inline void io_poll_remove_entry(struct io_poll_iocb *poll) ++{ ++ struct wait_queue_head *head = smp_load_acquire(&poll->head); ++ ++ if (head) { ++ spin_lock_irq(&head->lock); ++ list_del_init(&poll->wait.entry); ++ poll->head = NULL; ++ spin_unlock_irq(&head->lock); ++ } ++} ++ ++static void io_poll_remove_entries(struct io_kiocb *req) ++{ ++ struct io_poll_iocb *poll = io_poll_get_single(req); ++ struct io_poll_iocb *poll_double = io_poll_get_double(req); + - return retval; - } - -@@ -1112,7 +1109,7 @@ xfs_attr_node_addname_find_attr( - */ - retval = xfs_attr_node_hasname(args, &dac->da_state); - if (retval != -ENOATTR && retval != -EEXIST) -- return retval; -+ goto error; - - if (retval == -ENOATTR && (args->attr_flags & XATTR_REPLACE)) - goto error; -@@ -1337,7 +1334,7 @@ int xfs_attr_node_removename_setup( - - error = xfs_attr_node_hasname(args, state); - if (error != -EEXIST) -- return error; -+ goto out; - error = 0; - - ASSERT((*state)->path.blk[(*state)->path.active - 1].bp != NULL); -diff --git a/fs/xfs/libxfs/xfs_btree_staging.c b/fs/xfs/libxfs/xfs_btree_staging.c -index ac9e80152b5cf..89c8a1498df1d 100644 ---- a/fs/xfs/libxfs/xfs_btree_staging.c -+++ b/fs/xfs/libxfs/xfs_btree_staging.c -@@ -662,7 +662,7 @@ xfs_btree_bload_compute_geometry( - xfs_btree_bload_ensure_slack(cur, &bbl->node_slack, 1); - - bbl->nr_records = nr_this_level = nr_records; -- for (cur->bc_nlevels = 1; cur->bc_nlevels < XFS_BTREE_MAXLEVELS;) { -+ for (cur->bc_nlevels = 1; cur->bc_nlevels <= XFS_BTREE_MAXLEVELS;) { - uint64_t level_blocks; - uint64_t dontcare64; - unsigned int level = cur->bc_nlevels - 1; -@@ -724,7 +724,7 @@ xfs_btree_bload_compute_geometry( - nr_this_level = level_blocks; - } - -- if (cur->bc_nlevels == XFS_BTREE_MAXLEVELS) -+ if (cur->bc_nlevels > XFS_BTREE_MAXLEVELS) - return -EOVERFLOW; - - bbl->btree_height = cur->bc_nlevels; -diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c -index 3932b4ebf9037..f84d3fbb9d3da 100644 ---- a/fs/xfs/libxfs/xfs_inode_buf.c -+++ b/fs/xfs/libxfs/xfs_inode_buf.c -@@ -337,19 +337,36 @@ xfs_dinode_verify_fork( - int whichfork) - { - uint32_t di_nextents = XFS_DFORK_NEXTENTS(dip, whichfork); -+ mode_t mode = be16_to_cpu(dip->di_mode); -+ uint32_t fork_size = XFS_DFORK_SIZE(dip, mp, whichfork); -+ uint32_t fork_format = XFS_DFORK_FORMAT(dip, whichfork); - -- switch (XFS_DFORK_FORMAT(dip, whichfork)) { + /* -+ * For fork types that can contain local data, check that the fork -+ * format matches the size of local data contained within the fork. ++ * While we hold the waitqueue lock and the waitqueue is nonempty, ++ * wake_up_pollfree() will wait for us. However, taking the waitqueue ++ * lock in the first place can race with the waitqueue being freed. + * -+ * For all types, check that when the size says the should be in extent -+ * or btree format, the inode isn't claiming it is in local format. ++ * We solve this as eventpoll does: by taking advantage of the fact that ++ * all users of wake_up_pollfree() will RCU-delay the actual free. If ++ * we enter rcu_read_lock() and see that the pointer to the queue is ++ * non-NULL, we can then lock it without the memory being freed out from ++ * under us. ++ * ++ * Keep holding rcu_read_lock() as long as we hold the queue lock, in ++ * case the caller deletes the entry from the queue, leaving it empty. ++ * In that case, only RCU prevents the queue memory from being freed. + */ -+ if (whichfork == XFS_DATA_FORK) { -+ if (S_ISDIR(mode) || S_ISLNK(mode)) { -+ if (be64_to_cpu(dip->di_size) <= fork_size && -+ fork_format != XFS_DINODE_FMT_LOCAL) -+ return __this_address; ++ rcu_read_lock(); ++ io_poll_remove_entry(poll); ++ if (poll_double) ++ io_poll_remove_entry(poll_double); ++ rcu_read_unlock(); ++} ++ ++/* ++ * All poll tw should go through this. Checks for poll events, manages ++ * references, does rewait, etc. ++ * ++ * Returns a negative error on failure. >0 when no action require, which is ++ * either spurious wakeup or multishot CQE is served. 0 when it's done with ++ * the request, then the mask is stored in req->result. ++ */ ++static int io_poll_check_events(struct io_kiocb *req) ++{ ++ struct io_ring_ctx *ctx = req->ctx; ++ struct io_poll_iocb *poll = io_poll_get_single(req); ++ int v; ++ ++ /* req->task == current here, checking PF_EXITING is safe */ ++ if (unlikely(req->task->flags & PF_EXITING)) ++ io_poll_mark_cancelled(req); ++ ++ do { ++ v = atomic_read(&req->poll_refs); ++ ++ /* tw handler should be the owner, and so have some references */ ++ if (WARN_ON_ONCE(!(v & IO_POLL_REF_MASK))) ++ return 0; ++ if (v & IO_POLL_CANCEL_FLAG) ++ return -ECANCELED; ++ /* ++ * cqe.res contains only events of the first wake up ++ * and all others are be lost. Redo vfs_poll() to get ++ * up to date state. ++ */ ++ if ((v & IO_POLL_REF_MASK) != 1) ++ req->result = 0; ++ if (v & IO_POLL_RETRY_FLAG) { ++ req->result = 0; ++ /* ++ * We won't find new events that came in between ++ * vfs_poll and the ref put unless we clear the ++ * flag in advance. ++ */ ++ atomic_andnot(IO_POLL_RETRY_FLAG, &req->poll_refs); ++ v &= ~IO_POLL_RETRY_FLAG; + } + -+ if (be64_to_cpu(dip->di_size) > fork_size && -+ fork_format == XFS_DINODE_FMT_LOCAL) -+ return __this_address; ++ if (!req->result) { ++ struct poll_table_struct pt = { ._key = poll->events }; ++ ++ req->result = vfs_poll(req->file, &pt) & poll->events; ++ } ++ ++ /* multishot, just fill an CQE and proceed */ ++ if (req->result && !(poll->events & EPOLLONESHOT)) { ++ __poll_t mask = mangle_poll(req->result & poll->events); ++ bool filled; ++ ++ spin_lock(&ctx->completion_lock); ++ filled = io_fill_cqe_aux(ctx, req->user_data, mask, ++ IORING_CQE_F_MORE); ++ io_commit_cqring(ctx); ++ spin_unlock(&ctx->completion_lock); ++ if (unlikely(!filled)) ++ return -ECANCELED; ++ io_cqring_ev_posted(ctx); ++ } else if (req->result) { ++ return 0; ++ } ++ ++ /* force the next iteration to vfs_poll() */ ++ req->result = 0; ++ ++ /* ++ * Release all references, retry if someone tried to restart ++ * task_work while we were executing it. ++ */ ++ } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs) & ++ IO_POLL_REF_MASK); ++ ++ return 1; ++} ++ ++static void io_poll_task_func(struct io_kiocb *req, bool *locked) ++{ ++ struct io_ring_ctx *ctx = req->ctx; ++ int ret; ++ ++ ret = io_poll_check_events(req); ++ if (ret > 0) ++ return; ++ ++ if (!ret) { ++ req->result = mangle_poll(req->result & req->poll.events); ++ } else { ++ req->result = ret; ++ req_set_fail(req); + } + -+ switch (fork_format) { - case XFS_DINODE_FMT_LOCAL: - /* -- * no local regular files yet -+ * No local regular files yet. - */ -- if (whichfork == XFS_DATA_FORK) { -- if (S_ISREG(be16_to_cpu(dip->di_mode))) -- return __this_address; -- if (be64_to_cpu(dip->di_size) > -- XFS_DFORK_SIZE(dip, mp, whichfork)) -- return __this_address; -- } -+ if (S_ISREG(mode) && whichfork == XFS_DATA_FORK) -+ return __this_address; - if (di_nextents) - return __this_address; - break; -diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c -index 34fc6148032a3..c8c15c3c31471 100644 ---- a/fs/xfs/xfs_aops.c -+++ b/fs/xfs/xfs_aops.c -@@ -82,6 +82,7 @@ xfs_end_ioend( - struct iomap_ioend *ioend) - { - struct xfs_inode *ip = XFS_I(ioend->io_inode); -+ struct xfs_mount *mp = ip->i_mount; - xfs_off_t offset = ioend->io_offset; - size_t size = ioend->io_size; - unsigned int nofs_flag; -@@ -97,18 +98,26 @@ xfs_end_ioend( - /* - * Just clean up the in-memory structures if the fs has been shut down. - */ -- if (xfs_is_shutdown(ip->i_mount)) { -+ if (xfs_is_shutdown(mp)) { - error = -EIO; - goto done; - } - - /* -- * Clean up any COW blocks on an I/O error. -+ * Clean up all COW blocks and underlying data fork delalloc blocks on -+ * I/O error. The delalloc punch is required because this ioend was -+ * mapped to blocks in the COW fork and the associated pages are no -+ * longer dirty. If we don't remove delalloc blocks here, they become -+ * stale and can corrupt free space accounting on unmount. - */ - error = blk_status_to_errno(ioend->io_bio->bi_status); - if (unlikely(error)) { -- if (ioend->io_flags & IOMAP_F_SHARED) -+ if (ioend->io_flags & IOMAP_F_SHARED) { - xfs_reflink_cancel_cow_range(ip, offset, size, true); -+ xfs_bmap_punch_delalloc_range(ip, -+ XFS_B_TO_FSBT(mp, offset), -+ XFS_B_TO_FSB(mp, size)); ++ io_poll_remove_entries(req); ++ spin_lock(&ctx->completion_lock); ++ hash_del(&req->hash_node); ++ spin_unlock(&ctx->completion_lock); ++ io_req_complete_post(req, req->result, 0); ++} ++ ++static void io_apoll_task_func(struct io_kiocb *req, bool *locked) ++{ ++ struct io_ring_ctx *ctx = req->ctx; ++ int ret; ++ ++ ret = io_poll_check_events(req); ++ if (ret > 0) ++ return; ++ ++ io_poll_remove_entries(req); ++ spin_lock(&ctx->completion_lock); ++ hash_del(&req->hash_node); ++ spin_unlock(&ctx->completion_lock); ++ ++ if (!ret) ++ io_req_task_submit(req, locked); ++ else ++ io_req_complete_failed(req, ret); ++} ++ ++static void __io_poll_execute(struct io_kiocb *req, int mask) ++{ ++ req->result = mask; ++ if (req->opcode == IORING_OP_POLL_ADD) ++ req->io_task_work.func = io_poll_task_func; ++ else ++ req->io_task_work.func = io_apoll_task_func; ++ ++ trace_io_uring_task_add(req->ctx, req->opcode, req->user_data, mask); ++ io_req_task_work_add(req); ++} ++ ++static inline void io_poll_execute(struct io_kiocb *req, int res) ++{ ++ if (io_poll_get_ownership(req)) ++ __io_poll_execute(req, res); ++} ++ ++static void io_poll_cancel_req(struct io_kiocb *req) ++{ ++ io_poll_mark_cancelled(req); ++ /* kick tw, which should complete the request */ ++ io_poll_execute(req, 0); ++} ++ ++static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, ++ void *key) ++{ ++ struct io_kiocb *req = wait->private; ++ struct io_poll_iocb *poll = container_of(wait, struct io_poll_iocb, ++ wait); ++ __poll_t mask = key_to_poll(key); ++ ++ if (unlikely(mask & POLLFREE)) { ++ io_poll_mark_cancelled(req); ++ /* we have to kick tw in case it's not already */ ++ io_poll_execute(req, 0); ++ ++ /* ++ * If the waitqueue is being freed early but someone is already ++ * holds ownership over it, we have to tear down the request as ++ * best we can. That means immediately removing the request from ++ * its waitqueue and preventing all further accesses to the ++ * waitqueue via the request. ++ */ ++ list_del_init(&poll->wait.entry); ++ ++ /* ++ * Careful: this *must* be the last step, since as soon ++ * as req->head is NULL'ed out, the request can be ++ * completed and freed, since aio_poll_complete_work() ++ * will no longer need to take the waitqueue lock. ++ */ ++ smp_store_release(&poll->head, NULL); ++ return 1; ++ } ++ ++ /* for instances that support it check for an event match first */ ++ if (mask && !(mask & poll->events)) ++ return 0; ++ ++ if (io_poll_get_ownership(req)) { ++ /* ++ * If we trigger a multishot poll off our own wakeup path, ++ * disable multishot as there is a circular dependency between ++ * CQ posting and triggering the event. ++ */ ++ if (mask & EPOLL_URING_WAKE) ++ poll->events |= EPOLLONESHOT; ++ ++ __io_poll_execute(req, mask); ++ } ++ return 1; ++} ++ ++static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, ++ struct wait_queue_head *head, ++ struct io_poll_iocb **poll_ptr) ++{ ++ struct io_kiocb *req = pt->req; ++ ++ /* ++ * The file being polled uses multiple waitqueues for poll handling ++ * (e.g. one for read, one for write). Setup a separate io_poll_iocb ++ * if this happens. ++ */ ++ if (unlikely(pt->nr_entries)) { ++ struct io_poll_iocb *first = poll; ++ ++ /* double add on the same waitqueue head, ignore */ ++ if (first->head == head) ++ return; ++ /* already have a 2nd entry, fail a third attempt */ ++ if (*poll_ptr) { ++ if ((*poll_ptr)->head == head) ++ return; ++ pt->error = -EINVAL; ++ return; + } - goto done; - } - -diff --git a/fs/xfs/xfs_bio_io.c b/fs/xfs/xfs_bio_io.c -index 667e297f59b16..17f36db2f7928 100644 ---- a/fs/xfs/xfs_bio_io.c -+++ b/fs/xfs/xfs_bio_io.c -@@ -9,41 +9,6 @@ static inline unsigned int bio_max_vecs(unsigned int count) - return bio_max_segs(howmany(count, PAGE_SIZE)); - } - --static void --xfs_flush_bdev_async_endio( -- struct bio *bio) --{ -- complete(bio->bi_private); --} -- --/* -- * Submit a request for an async cache flush to run. If the request queue does -- * not require flush operations, just skip it altogether. If the caller needs -- * to wait for the flush completion at a later point in time, they must supply a -- * valid completion. This will be signalled when the flush completes. The -- * caller never sees the bio that is issued here. -- */ --void --xfs_flush_bdev_async( -- struct bio *bio, -- struct block_device *bdev, -- struct completion *done) --{ -- struct request_queue *q = bdev->bd_disk->queue; -- -- if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) { -- complete(done); -- return; -- } -- -- bio_init(bio, NULL, 0); -- bio_set_dev(bio, bdev); -- bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC; -- bio->bi_private = done; -- bio->bi_end_io = xfs_flush_bdev_async_endio; -- -- submit_bio(bio); --} - int - xfs_rw_bdev( - struct block_device *bdev, -diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c -index a476c7ef5d533..991fbf1eb5640 100644 ---- a/fs/xfs/xfs_buf_item_recover.c -+++ b/fs/xfs/xfs_buf_item_recover.c -@@ -816,7 +816,7 @@ xlog_recover_get_buf_lsn( - } - - if (lsn != (xfs_lsn_t)-1) { -- if (!uuid_equal(&mp->m_sb.sb_uuid, uuid)) -+ if (!uuid_equal(&mp->m_sb.sb_meta_uuid, uuid)) - goto recover_immediately; - return lsn; - } -diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c -index 3f8a0713573ad..a4b8caa2c601d 100644 ---- a/fs/xfs/xfs_extfree_item.c -+++ b/fs/xfs/xfs_extfree_item.c -@@ -482,7 +482,7 @@ xfs_extent_free_finish_item( - free->xefi_startblock, - free->xefi_blockcount, - &free->xefi_oinfo, free->xefi_skip_discard); -- kmem_free(free); -+ kmem_cache_free(xfs_bmap_free_item_zone, free); - return error; - } - -@@ -502,7 +502,7 @@ xfs_extent_free_cancel_item( - struct xfs_extent_free_item *free; - - free = container_of(item, struct xfs_extent_free_item, xefi_list); -- kmem_free(free); -+ kmem_cache_free(xfs_bmap_free_item_zone, free); - } - - const struct xfs_defer_op_type xfs_extent_free_defer_type = { -@@ -564,7 +564,7 @@ xfs_agfl_free_finish_item( - extp->ext_len = free->xefi_blockcount; - efdp->efd_next_extent++; - -- kmem_free(free); -+ kmem_cache_free(xfs_bmap_free_item_zone, free); - return error; - } - -diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c -index 7aa943edfc02f..240eb932c014b 100644 ---- a/fs/xfs/xfs_file.c -+++ b/fs/xfs/xfs_file.c -@@ -259,7 +259,7 @@ xfs_file_dio_read( - ret = xfs_ilock_iocb(iocb, XFS_IOLOCK_SHARED); - if (ret) - return ret; -- ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL, 0); -+ ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL, 0, 0); - xfs_iunlock(ip, XFS_IOLOCK_SHARED); - - return ret; -@@ -569,7 +569,7 @@ xfs_file_dio_write_aligned( - } - trace_xfs_file_direct_write(iocb, from); - ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops, -- &xfs_dio_write_ops, 0); -+ &xfs_dio_write_ops, 0, 0); - out_unlock: - if (iolock) - xfs_iunlock(ip, iolock); -@@ -647,7 +647,7 @@ retry_exclusive: - - trace_xfs_file_direct_write(iocb, from); - ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops, -- &xfs_dio_write_ops, flags); -+ &xfs_dio_write_ops, flags, 0); - - /* - * Retry unaligned I/O with exclusive blocking semantics if the DIO -diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c -index 6a3ce0f6dc9e9..be9bcf8a1f991 100644 ---- a/fs/xfs/xfs_filestream.c -+++ b/fs/xfs/xfs_filestream.c -@@ -128,11 +128,12 @@ xfs_filestream_pick_ag( - if (!pag->pagf_init) { - err = xfs_alloc_pagf_init(mp, NULL, ag, trylock); - if (err) { -- xfs_perag_put(pag); -- if (err != -EAGAIN) -+ if (err != -EAGAIN) { -+ xfs_perag_put(pag); - return err; -+ } - /* Couldn't lock the AGF, skip this AG. */ -- continue; -+ goto next_ag; - } - } - -diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c -index 33e26690a8c4f..5b5b68affe66d 100644 ---- a/fs/xfs/xfs_fsops.c -+++ b/fs/xfs/xfs_fsops.c -@@ -430,46 +430,36 @@ xfs_reserve_blocks( - * If the request is larger than the current reservation, reserve the - * blocks before we update the reserve counters. Sample m_fdblocks and - * perform a partial reservation if the request exceeds free space. -+ * -+ * The code below estimates how many blocks it can request from -+ * fdblocks to stash in the reserve pool. This is a classic TOCTOU -+ * race since fdblocks updates are not always coordinated via -+ * m_sb_lock. Set the reserve size even if there's not enough free -+ * space to fill it because mod_fdblocks will refill an undersized -+ * reserve when it can. - */ -- error = -ENOSPC; -- do { -- free = percpu_counter_sum(&mp->m_fdblocks) - -- mp->m_alloc_set_aside; -- if (free <= 0) -- break; -- -- delta = request - mp->m_resblks; -- lcounter = free - delta; -- if (lcounter < 0) -- /* We can't satisfy the request, just get what we can */ -- fdblks_delta = free; -- else -- fdblks_delta = delta; -- -+ free = percpu_counter_sum(&mp->m_fdblocks) - -+ xfs_fdblocks_unavailable(mp); -+ delta = request - mp->m_resblks; -+ mp->m_resblks = request; -+ if (delta > 0 && free > 0) { - /* - * We'll either succeed in getting space from the free block -- * count or we'll get an ENOSPC. If we get a ENOSPC, it means -- * things changed while we were calculating fdblks_delta and so -- * we should try again to see if there is anything left to -- * reserve. -+ * count or we'll get an ENOSPC. Don't set the reserved flag -+ * here - we don't want to reserve the extra reserve blocks -+ * from the reserve. - * -- * Don't set the reserved flag here - we don't want to reserve -- * the extra reserve blocks from the reserve..... -+ * The desired reserve size can change after we drop the lock. -+ * Use mod_fdblocks to put the space into the reserve or into -+ * fdblocks as appropriate. - */ -+ fdblks_delta = min(free, delta); - spin_unlock(&mp->m_sb_lock); - error = xfs_mod_fdblocks(mp, -fdblks_delta, 0); -+ if (!error) -+ xfs_mod_fdblocks(mp, fdblks_delta, 0); - spin_lock(&mp->m_sb_lock); -- } while (error == -ENOSPC); -- -- /* -- * Update the reserve counters if blocks have been successfully -- * allocated. -- */ -- if (!error && fdblks_delta) { -- mp->m_resblks += fdblks_delta; -- mp->m_resblks_avail += fdblks_delta; - } -- - out: - if (outval) { - outval->resblks = mp->m_resblks; -diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c -index f2210d927481b..5e44d7bbd8fca 100644 ---- a/fs/xfs/xfs_icache.c -+++ b/fs/xfs/xfs_icache.c -@@ -1872,28 +1872,20 @@ xfs_inodegc_worker( - } - - /* -- * Force all currently queued inode inactivation work to run immediately, and -- * wait for the work to finish. Two pass - queue all the work first pass, wait -- * for it in a second pass. -+ * Force all currently queued inode inactivation work to run immediately and -+ * wait for the work to finish. - */ - void - xfs_inodegc_flush( - struct xfs_mount *mp) - { -- struct xfs_inodegc *gc; -- int cpu; -- - if (!xfs_is_inodegc_enabled(mp)) - return; - - trace_xfs_inodegc_flush(mp, __return_address); - - xfs_inodegc_queue_all(mp); -- -- for_each_online_cpu(cpu) { -- gc = per_cpu_ptr(mp->m_inodegc, cpu); -- flush_work(&gc->work); -- } -+ flush_workqueue(mp->m_inodegc_wq); - } - - /* -@@ -1904,18 +1896,12 @@ void - xfs_inodegc_stop( - struct xfs_mount *mp) - { -- struct xfs_inodegc *gc; -- int cpu; -- - if (!xfs_clear_inodegc_enabled(mp)) - return; - - xfs_inodegc_queue_all(mp); -+ drain_workqueue(mp->m_inodegc_wq); - -- for_each_online_cpu(cpu) { -- gc = per_cpu_ptr(mp->m_inodegc, cpu); -- cancel_work_sync(&gc->work); -- } - trace_xfs_inodegc_stop(mp, __return_address); - } - -diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c -index a4f6f034fb813..b2ea853182141 100644 ---- a/fs/xfs/xfs_inode.c -+++ b/fs/xfs/xfs_inode.c -@@ -994,8 +994,8 @@ xfs_create( - /* - * Make sure that we have allocated dquot(s) on disk. - */ -- error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns), -- mapped_fsgid(mnt_userns), prid, -+ error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns), -+ mapped_fsgid(mnt_userns, &init_user_ns), prid, - XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, - &udqp, &gdqp, &pdqp); - if (error) -@@ -1148,8 +1148,8 @@ xfs_create_tmpfile( - /* - * Make sure that we have allocated dquot(s) on disk. - */ -- error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns), -- mapped_fsgid(mnt_userns), prid, -+ error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns), -+ mapped_fsgid(mnt_userns, &init_user_ns), prid, - XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, - &udqp, &gdqp, &pdqp); - if (error) -@@ -1223,7 +1223,7 @@ xfs_link( - { - xfs_mount_t *mp = tdp->i_mount; - xfs_trans_t *tp; -- int error; -+ int error, nospace_error = 0; - int resblks; - - trace_xfs_link(tdp, target_name); -@@ -1242,19 +1242,11 @@ xfs_link( - goto std_return; - - resblks = XFS_LINK_SPACE_RES(mp, target_name->len); -- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, resblks, 0, 0, &tp); -- if (error == -ENOSPC) { -- resblks = 0; -- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, 0, 0, 0, &tp); -- } -+ error = xfs_trans_alloc_dir(tdp, &M_RES(mp)->tr_link, sip, &resblks, -+ &tp, &nospace_error); - if (error) - goto std_return; - -- xfs_lock_two_inodes(sip, XFS_ILOCK_EXCL, tdp, XFS_ILOCK_EXCL); -- -- xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); -- xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); -- - error = xfs_iext_count_may_overflow(tdp, XFS_DATA_FORK, - XFS_IEXT_DIR_MANIP_CNT(mp)); - if (error) -@@ -1312,6 +1304,8 @@ xfs_link( - error_return: - xfs_trans_cancel(tp); - std_return: -+ if (error == -ENOSPC && nospace_error) -+ error = nospace_error; - return error; - } - -@@ -2605,14 +2599,13 @@ xfs_ifree_cluster( - } - - /* -- * This is called to return an inode to the inode free list. -- * The inode should already be truncated to 0 length and have -- * no pages associated with it. This routine also assumes that -- * the inode is already a part of the transaction. -+ * This is called to return an inode to the inode free list. The inode should -+ * already be truncated to 0 length and have no pages associated with it. This -+ * routine also assumes that the inode is already a part of the transaction. - * -- * The on-disk copy of the inode will have been added to the list -- * of unlinked inodes in the AGI. We need to remove the inode from -- * that list atomically with respect to freeing it here. -+ * The on-disk copy of the inode will have been added to the list of unlinked -+ * inodes in the AGI. We need to remove the inode from that list atomically with -+ * respect to freeing it here. - */ - int - xfs_ifree( -@@ -2634,13 +2627,16 @@ xfs_ifree( - pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); - - /* -- * Pull the on-disk inode from the AGI unlinked list. -+ * Free the inode first so that we guarantee that the AGI lock is going -+ * to be taken before we remove the inode from the unlinked list. This -+ * makes the AGI lock -> unlinked list modification order the same as -+ * used in O_TMPFILE creation. - */ -- error = xfs_iunlink_remove(tp, pag, ip); -+ error = xfs_difree(tp, pag, ip->i_ino, &xic); - if (error) - goto out; - -- error = xfs_difree(tp, pag, ip->i_ino, &xic); -+ error = xfs_iunlink_remove(tp, pag, ip); - if (error) - goto out; - -@@ -2761,6 +2757,7 @@ xfs_remove( - xfs_mount_t *mp = dp->i_mount; - xfs_trans_t *tp = NULL; - int is_dir = S_ISDIR(VFS_I(ip)->i_mode); -+ int dontcare; - int error = 0; - uint resblks; - -@@ -2778,31 +2775,24 @@ xfs_remove( - goto std_return; - - /* -- * We try to get the real space reservation first, -- * allowing for directory btree deletion(s) implying -- * possible bmap insert(s). If we can't get the space -- * reservation then we use 0 instead, and avoid the bmap -- * btree insert(s) in the directory code by, if the bmap -- * insert tries to happen, instead trimming the LAST -- * block from the directory. -+ * We try to get the real space reservation first, allowing for -+ * directory btree deletion(s) implying possible bmap insert(s). If we -+ * can't get the space reservation then we use 0 instead, and avoid the -+ * bmap btree insert(s) in the directory code by, if the bmap insert -+ * tries to happen, instead trimming the LAST block from the directory. -+ * -+ * Ignore EDQUOT and ENOSPC being returned via nospace_error because -+ * the directory code can handle a reservationless update and we don't -+ * want to prevent a user from trying to free space by deleting things. - */ - resblks = XFS_REMOVE_SPACE_RES(mp); -- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_remove, resblks, 0, 0, &tp); -- if (error == -ENOSPC) { -- resblks = 0; -- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_remove, 0, 0, 0, -- &tp); -- } -+ error = xfs_trans_alloc_dir(dp, &M_RES(mp)->tr_remove, ip, &resblks, -+ &tp, &dontcare); - if (error) { - ASSERT(error != -ENOSPC); - goto std_return; - } - -- xfs_lock_two_inodes(dp, XFS_ILOCK_EXCL, ip, XFS_ILOCK_EXCL); -- -- xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); -- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); -- - /* - * If we're removing a directory perform some additional validation. - */ -@@ -3115,7 +3105,8 @@ xfs_rename( - bool new_parent = (src_dp != target_dp); - bool src_is_directory = S_ISDIR(VFS_I(src_ip)->i_mode); - int spaceres; -- int error; -+ bool retried = false; -+ int error, nospace_error = 0; - - trace_xfs_rename(src_dp, target_dp, src_name, target_name); - -@@ -3128,7 +3119,6 @@ xfs_rename( - * appropriately. - */ - if (flags & RENAME_WHITEOUT) { -- ASSERT(!(flags & (RENAME_NOREPLACE | RENAME_EXCHANGE))); - error = xfs_rename_alloc_whiteout(mnt_userns, target_dp, &wip); - if (error) - return error; -@@ -3140,9 +3130,12 @@ xfs_rename( - xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip, - inodes, &num_inodes); - -+retry: -+ nospace_error = 0; - spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len); - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, spaceres, 0, 0, &tp); - if (error == -ENOSPC) { -+ nospace_error = error; - spaceres = 0; - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, 0, 0, 0, - &tp); -@@ -3196,6 +3189,31 @@ xfs_rename( - target_dp, target_name, target_ip, - spaceres); - ++ ++ poll = kmalloc(sizeof(*poll), GFP_ATOMIC); ++ if (!poll) { ++ pt->error = -ENOMEM; ++ return; ++ } ++ io_init_poll_iocb(poll, first->events, first->wait.func); ++ *poll_ptr = poll; ++ } ++ ++ pt->nr_entries++; ++ poll->head = head; ++ poll->wait.private = req; ++ ++ if (poll->events & EPOLLEXCLUSIVE) ++ add_wait_queue_exclusive(head, &poll->wait); ++ else ++ add_wait_queue(head, &poll->wait); ++} ++ ++static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head, ++ struct poll_table_struct *p) ++{ ++ struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); ++ ++ __io_queue_proc(&pt->req->poll, pt, head, ++ (struct io_poll_iocb **) &pt->req->async_data); ++} ++ ++static int __io_arm_poll_handler(struct io_kiocb *req, ++ struct io_poll_iocb *poll, ++ struct io_poll_table *ipt, __poll_t mask) ++{ ++ struct io_ring_ctx *ctx = req->ctx; ++ ++ INIT_HLIST_NODE(&req->hash_node); ++ io_init_poll_iocb(poll, mask, io_poll_wake); ++ poll->file = req->file; ++ poll->wait.private = req; ++ ++ ipt->pt._key = mask; ++ ipt->req = req; ++ ipt->error = 0; ++ ipt->nr_entries = 0; ++ + /* -+ * Try to reserve quota to handle an expansion of the target directory. -+ * We'll allow the rename to continue in reservationless mode if we hit -+ * a space usage constraint. If we trigger reservationless mode, save -+ * the errno if there isn't any free space in the target directory. ++ * Take the ownership to delay any tw execution up until we're done ++ * with poll arming. see io_poll_get_ownership(). + */ -+ if (spaceres != 0) { -+ error = xfs_trans_reserve_quota_nblks(tp, target_dp, spaceres, -+ 0, false); -+ if (error == -EDQUOT || error == -ENOSPC) { -+ if (!retried) { -+ xfs_trans_cancel(tp); -+ xfs_blockgc_free_quota(target_dp, 0); -+ retried = true; -+ goto retry; ++ atomic_set(&req->poll_refs, 1); ++ mask = vfs_poll(req->file, &ipt->pt) & poll->events; ++ ++ if (mask && (poll->events & EPOLLONESHOT)) { ++ io_poll_remove_entries(req); ++ /* no one else has access to the req, forget about the ref */ ++ return mask; ++ } ++ if (!mask && unlikely(ipt->error || !ipt->nr_entries)) { ++ io_poll_remove_entries(req); ++ if (!ipt->error) ++ ipt->error = -EINVAL; ++ return 0; ++ } ++ ++ spin_lock(&ctx->completion_lock); ++ io_poll_req_insert(req); ++ spin_unlock(&ctx->completion_lock); ++ ++ if (mask) { ++ /* can't multishot if failed, just queue the event we've got */ ++ if (unlikely(ipt->error || !ipt->nr_entries)) { ++ poll->events |= EPOLLONESHOT; ++ ipt->error = 0; ++ } ++ __io_poll_execute(req, mask); ++ return 0; ++ } ++ ++ /* ++ * Try to release ownership. If we see a change of state, e.g. ++ * poll was waken up, queue up a tw, it'll deal with it. ++ */ ++ if (atomic_cmpxchg(&req->poll_refs, 1, 0) != 1) ++ __io_poll_execute(req, 0); ++ return 0; ++} ++ ++static void io_async_queue_proc(struct file *file, struct wait_queue_head *head, ++ struct poll_table_struct *p) ++{ ++ struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); ++ struct async_poll *apoll = pt->req->apoll; ++ ++ __io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll); ++} ++ ++enum { ++ IO_APOLL_OK, ++ IO_APOLL_ABORTED, ++ IO_APOLL_READY ++}; ++ ++static int io_arm_poll_handler(struct io_kiocb *req) ++{ ++ const struct io_op_def *def = &io_op_defs[req->opcode]; ++ struct io_ring_ctx *ctx = req->ctx; ++ struct async_poll *apoll; ++ struct io_poll_table ipt; ++ __poll_t mask = EPOLLONESHOT | POLLERR | POLLPRI; ++ int ret; ++ ++ if (!req->file || !file_can_poll(req->file)) ++ return IO_APOLL_ABORTED; ++ if ((req->flags & (REQ_F_POLLED|REQ_F_PARTIAL_IO)) == REQ_F_POLLED) ++ return IO_APOLL_ABORTED; ++ if (!def->pollin && !def->pollout) ++ return IO_APOLL_ABORTED; ++ ++ if (def->pollin) { ++ mask |= POLLIN | POLLRDNORM; ++ ++ /* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */ ++ if ((req->opcode == IORING_OP_RECVMSG) && ++ (req->sr_msg.msg_flags & MSG_ERRQUEUE)) ++ mask &= ~POLLIN; ++ } else { ++ mask |= POLLOUT | POLLWRNORM; ++ } ++ ++ if (req->flags & REQ_F_POLLED) { ++ apoll = req->apoll; ++ kfree(apoll->double_poll); ++ } else { ++ apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC); ++ } ++ if (unlikely(!apoll)) ++ return IO_APOLL_ABORTED; ++ apoll->double_poll = NULL; ++ req->apoll = apoll; ++ req->flags |= REQ_F_POLLED; ++ ipt.pt._qproc = io_async_queue_proc; ++ ++ ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask); ++ if (ret || ipt.error) ++ return ret ? IO_APOLL_READY : IO_APOLL_ABORTED; ++ ++ trace_io_uring_poll_arm(ctx, req, req->opcode, req->user_data, ++ mask, apoll->poll.events); ++ return IO_APOLL_OK; ++} ++ ++/* ++ * Returns true if we found and killed one or more poll requests ++ */ ++static bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk, ++ bool cancel_all) ++{ ++ struct hlist_node *tmp; ++ struct io_kiocb *req; ++ bool found = false; ++ int i; ++ ++ spin_lock(&ctx->completion_lock); ++ for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { ++ struct hlist_head *list; ++ ++ list = &ctx->cancel_hash[i]; ++ hlist_for_each_entry_safe(req, tmp, list, hash_node) { ++ if (io_match_task_safe(req, tsk, cancel_all)) { ++ hlist_del_init(&req->hash_node); ++ io_poll_cancel_req(req); ++ found = true; + } ++ } ++ } ++ spin_unlock(&ctx->completion_lock); ++ return found; ++} + -+ nospace_error = error; -+ spaceres = 0; -+ error = 0; ++static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, __u64 sqe_addr, ++ bool poll_only) ++ __must_hold(&ctx->completion_lock) ++{ ++ struct hlist_head *list; ++ struct io_kiocb *req; ++ ++ list = &ctx->cancel_hash[hash_long(sqe_addr, ctx->cancel_hash_bits)]; ++ hlist_for_each_entry(req, list, hash_node) { ++ if (sqe_addr != req->user_data) ++ continue; ++ if (poll_only && req->opcode != IORING_OP_POLL_ADD) ++ continue; ++ return req; ++ } ++ return NULL; ++} ++ ++static bool io_poll_disarm(struct io_kiocb *req) ++ __must_hold(&ctx->completion_lock) ++{ ++ if (!io_poll_get_ownership(req)) ++ return false; ++ io_poll_remove_entries(req); ++ hash_del(&req->hash_node); ++ return true; ++} ++ ++static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr, ++ bool poll_only) ++ __must_hold(&ctx->completion_lock) ++{ ++ struct io_kiocb *req = io_poll_find(ctx, sqe_addr, poll_only); ++ ++ if (!req) ++ return -ENOENT; ++ io_poll_cancel_req(req); ++ return 0; ++} ++ ++static __poll_t io_poll_parse_events(const struct io_uring_sqe *sqe, ++ unsigned int flags) ++{ ++ u32 events; ++ ++ events = READ_ONCE(sqe->poll32_events); ++#ifdef __BIG_ENDIAN ++ events = swahw32(events); ++#endif ++ if (!(flags & IORING_POLL_ADD_MULTI)) ++ events |= EPOLLONESHOT; ++ return demangle_poll(events) | (events & (EPOLLEXCLUSIVE|EPOLLONESHOT)); ++} ++ ++static int io_poll_update_prep(struct io_kiocb *req, ++ const struct io_uring_sqe *sqe) ++{ ++ struct io_poll_update *upd = &req->poll_update; ++ u32 flags; ++ ++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) ++ return -EINVAL; ++ if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in) ++ return -EINVAL; ++ flags = READ_ONCE(sqe->len); ++ if (flags & ~(IORING_POLL_UPDATE_EVENTS | IORING_POLL_UPDATE_USER_DATA | ++ IORING_POLL_ADD_MULTI)) ++ return -EINVAL; ++ /* meaningless without update */ ++ if (flags == IORING_POLL_ADD_MULTI) ++ return -EINVAL; ++ ++ upd->old_user_data = READ_ONCE(sqe->addr); ++ upd->update_events = flags & IORING_POLL_UPDATE_EVENTS; ++ upd->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA; ++ ++ upd->new_user_data = READ_ONCE(sqe->off); ++ if (!upd->update_user_data && upd->new_user_data) ++ return -EINVAL; ++ if (upd->update_events) ++ upd->events = io_poll_parse_events(sqe, flags); ++ else if (sqe->poll32_events) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) ++{ ++ struct io_poll_iocb *poll = &req->poll; ++ u32 flags; ++ ++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) ++ return -EINVAL; ++ if (sqe->ioprio || sqe->buf_index || sqe->off || sqe->addr) ++ return -EINVAL; ++ flags = READ_ONCE(sqe->len); ++ if (flags & ~IORING_POLL_ADD_MULTI) ++ return -EINVAL; ++ ++ io_req_set_refcount(req); ++ poll->events = io_poll_parse_events(sqe, flags); ++ return 0; ++} ++ ++static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags) ++{ ++ struct io_poll_iocb *poll = &req->poll; ++ struct io_poll_table ipt; ++ int ret; ++ ++ ipt.pt._qproc = io_poll_queue_proc; ++ ++ ret = __io_arm_poll_handler(req, &req->poll, &ipt, poll->events); ++ if (!ret && ipt.error) ++ req_set_fail(req); ++ ret = ret ?: ipt.error; ++ if (ret) ++ __io_req_complete(req, issue_flags, ret, 0); ++ return 0; ++} ++ ++static int io_poll_update(struct io_kiocb *req, unsigned int issue_flags) ++{ ++ struct io_ring_ctx *ctx = req->ctx; ++ struct io_kiocb *preq; ++ int ret2, ret = 0; ++ ++ spin_lock(&ctx->completion_lock); ++ preq = io_poll_find(ctx, req->poll_update.old_user_data, true); ++ if (!preq || !io_poll_disarm(preq)) { ++ spin_unlock(&ctx->completion_lock); ++ ret = preq ? -EALREADY : -ENOENT; ++ goto out; ++ } ++ spin_unlock(&ctx->completion_lock); ++ ++ if (req->poll_update.update_events || req->poll_update.update_user_data) { ++ /* only mask one event flags, keep behavior flags */ ++ if (req->poll_update.update_events) { ++ preq->poll.events &= ~0xffff; ++ preq->poll.events |= req->poll_update.events & 0xffff; ++ preq->poll.events |= IO_POLL_UNMASK; + } -+ if (error) -+ goto out_trans_cancel; ++ if (req->poll_update.update_user_data) ++ preq->user_data = req->poll_update.new_user_data; ++ ++ ret2 = io_poll_add(preq, issue_flags); ++ /* successfully updated, don't complete poll request */ ++ if (!ret2) ++ goto out; + } ++ req_set_fail(preq); ++ io_req_complete(preq, -ECANCELED); ++out: ++ if (ret < 0) ++ req_set_fail(req); ++ /* complete update request, we're done with it */ ++ io_req_complete(req, ret); ++ return 0; ++} + - /* - * Check for expected errors before we dirty the transaction - * so we can return an error without a transaction abort. -@@ -3442,6 +3460,8 @@ out_trans_cancel: - out_release_wip: - if (wip) - xfs_irele(wip); -+ if (error == -ENOSPC && nospace_error) -+ error = nospace_error; - return error; - } - -diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c -index 0c795dc093efa..bcc3c18c8080b 100644 ---- a/fs/xfs/xfs_ioctl.c -+++ b/fs/xfs/xfs_ioctl.c -@@ -372,7 +372,7 @@ int - xfs_ioc_attr_list( - struct xfs_inode *dp, - void __user *ubuf, -- int bufsize, -+ size_t bufsize, - int flags, - struct xfs_attrlist_cursor __user *ucursor) - { -@@ -687,7 +687,8 @@ xfs_ioc_space( - - if (bf->l_start > XFS_ISIZE(ip)) { - error = xfs_alloc_file_space(ip, XFS_ISIZE(ip), -- bf->l_start - XFS_ISIZE(ip), 0); -+ bf->l_start - XFS_ISIZE(ip), -+ XFS_BMAPI_PREALLOC); - if (error) - goto out_unlock; - } -@@ -1544,7 +1545,7 @@ xfs_ioc_getbmap( - - if (bmx.bmv_count < 2) - return -EINVAL; -- if (bmx.bmv_count > ULONG_MAX / recsize) -+ if (bmx.bmv_count >= INT_MAX / recsize) - return -ENOMEM; - - buf = kvzalloc(bmx.bmv_count * sizeof(*buf), GFP_KERNEL); -diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h -index 28453a6d44618..845d3bcab74b4 100644 ---- a/fs/xfs/xfs_ioctl.h -+++ b/fs/xfs/xfs_ioctl.h -@@ -38,8 +38,9 @@ xfs_readlink_by_handle( - int xfs_ioc_attrmulti_one(struct file *parfilp, struct inode *inode, - uint32_t opcode, void __user *uname, void __user *value, - uint32_t *len, uint32_t flags); --int xfs_ioc_attr_list(struct xfs_inode *dp, void __user *ubuf, int bufsize, -- int flags, struct xfs_attrlist_cursor __user *ucursor); -+int xfs_ioc_attr_list(struct xfs_inode *dp, void __user *ubuf, -+ size_t bufsize, int flags, -+ struct xfs_attrlist_cursor __user *ucursor); - - extern struct dentry * - xfs_handle_to_dentry( -diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h -index c174262a074e3..cb9105d667db4 100644 ---- a/fs/xfs/xfs_linux.h -+++ b/fs/xfs/xfs_linux.h -@@ -61,6 +61,7 @@ typedef __u32 xfs_nlink_t; - #include <linux/ratelimit.h> - #include <linux/rhashtable.h> - #include <linux/xattr.h> -+#include <linux/mnt_idmapping.h> - - #include <asm/page.h> - #include <asm/div64.h> -@@ -196,8 +197,6 @@ static inline uint64_t howmany_64(uint64_t x, uint32_t y) - - int xfs_rw_bdev(struct block_device *bdev, sector_t sector, unsigned int count, - char *data, unsigned int op); --void xfs_flush_bdev_async(struct bio *bio, struct block_device *bdev, -- struct completion *done); - - #define ASSERT_ALWAYS(expr) \ - (likely(expr) ? (void)0 : assfail(NULL, #expr, __FILE__, __LINE__)) -diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c -index f6cd2d4aa770d..0fb7d05ca308d 100644 ---- a/fs/xfs/xfs_log.c -+++ b/fs/xfs/xfs_log.c -@@ -487,7 +487,10 @@ out_error: - * Run all the pending iclog callbacks and wake log force waiters and iclog - * space waiters so they can process the newly set shutdown state. We really - * don't care what order we process callbacks here because the log is shut down -- * and so state cannot change on disk anymore. -+ * and so state cannot change on disk anymore. However, we cannot wake waiters -+ * until the callbacks have been processed because we may be in unmount and -+ * we must ensure that all AIL operations the callbacks perform have completed -+ * before we tear down the AIL. - * - * We avoid processing actively referenced iclogs so that we don't run callbacks - * while the iclog owner might still be preparing the iclog for IO submssion. -@@ -501,7 +504,6 @@ xlog_state_shutdown_callbacks( - struct xlog_in_core *iclog; - LIST_HEAD(cb_list); - -- spin_lock(&log->l_icloglock); - iclog = log->l_iclog; - do { - if (atomic_read(&iclog->ic_refcnt)) { -@@ -509,26 +511,22 @@ xlog_state_shutdown_callbacks( - continue; - } - list_splice_init(&iclog->ic_callbacks, &cb_list); -+ spin_unlock(&log->l_icloglock); ++static void io_req_task_timeout(struct io_kiocb *req, bool *locked) ++{ ++ req_set_fail(req); ++ io_req_complete_post(req, -ETIME, 0); ++} + -+ xlog_cil_process_committed(&cb_list); ++static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer) ++{ ++ struct io_timeout_data *data = container_of(timer, ++ struct io_timeout_data, timer); ++ struct io_kiocb *req = data->req; ++ struct io_ring_ctx *ctx = req->ctx; ++ unsigned long flags; + -+ spin_lock(&log->l_icloglock); - wake_up_all(&iclog->ic_write_wait); - wake_up_all(&iclog->ic_force_wait); - } while ((iclog = iclog->ic_next) != log->l_iclog); - - wake_up_all(&log->l_flush_wait); -- spin_unlock(&log->l_icloglock); -- -- xlog_cil_process_committed(&cb_list); - } - - /* - * Flush iclog to disk if this is the last reference to the given iclog and the - * it is in the WANT_SYNC state. - * -- * If the caller passes in a non-zero @old_tail_lsn and the current log tail -- * does not match, there may be metadata on disk that must be persisted before -- * this iclog is written. To satisfy that requirement, set the -- * XLOG_ICL_NEED_FLUSH flag as a condition for writing this iclog with the new -- * log tail value. -- * - * If XLOG_ICL_NEED_FUA is already set on the iclog, we need to ensure that the - * log tail is updated correctly. NEED_FUA indicates that the iclog will be - * written to stable storage, and implies that a commit record is contained -@@ -545,12 +543,10 @@ xlog_state_shutdown_callbacks( - * always capture the tail lsn on the iclog on the first NEED_FUA release - * regardless of the number of active reference counts on this iclog. - */ -- - int - xlog_state_release_iclog( - struct xlog *log, -- struct xlog_in_core *iclog, -- xfs_lsn_t old_tail_lsn) -+ struct xlog_in_core *iclog) - { - xfs_lsn_t tail_lsn; - bool last_ref; -@@ -561,18 +557,14 @@ xlog_state_release_iclog( - /* - * Grabbing the current log tail needs to be atomic w.r.t. the writing - * of the tail LSN into the iclog so we guarantee that the log tail does -- * not move between deciding if a cache flush is required and writing -- * the LSN into the iclog below. -+ * not move between the first time we know that the iclog needs to be -+ * made stable and when we eventually submit it. - */ -- if (old_tail_lsn || iclog->ic_state == XLOG_STATE_WANT_SYNC) { -+ if ((iclog->ic_state == XLOG_STATE_WANT_SYNC || -+ (iclog->ic_flags & XLOG_ICL_NEED_FUA)) && -+ !iclog->ic_header.h_tail_lsn) { - tail_lsn = xlog_assign_tail_lsn(log->l_mp); -- -- if (old_tail_lsn && tail_lsn != old_tail_lsn) -- iclog->ic_flags |= XLOG_ICL_NEED_FLUSH; -- -- if ((iclog->ic_flags & XLOG_ICL_NEED_FUA) && -- !iclog->ic_header.h_tail_lsn) -- iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); -+ iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); - } - - last_ref = atomic_dec_and_test(&iclog->ic_refcnt); -@@ -583,11 +575,8 @@ xlog_state_release_iclog( - * pending iclog callbacks that were waiting on the release of - * this iclog. - */ -- if (last_ref) { -- spin_unlock(&log->l_icloglock); -+ if (last_ref) - xlog_state_shutdown_callbacks(log); -- spin_lock(&log->l_icloglock); -- } - return -EIO; - } - -@@ -600,8 +589,6 @@ xlog_state_release_iclog( - } - - iclog->ic_state = XLOG_STATE_SYNCING; -- if (!iclog->ic_header.h_tail_lsn) -- iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); - xlog_verify_tail_lsn(log, iclog); - trace_xlog_iclog_syncing(iclog, _RET_IP_); - -@@ -874,7 +861,7 @@ xlog_force_iclog( - iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA; - if (iclog->ic_state == XLOG_STATE_ACTIVE) - xlog_state_switch_iclogs(iclog->ic_log, iclog, 0); -- return xlog_state_release_iclog(iclog->ic_log, iclog, 0); -+ return xlog_state_release_iclog(iclog->ic_log, iclog); - } - - /* -@@ -2412,7 +2399,7 @@ xlog_write_copy_finish( - ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC || - xlog_is_shutdown(log)); - release_iclog: -- error = xlog_state_release_iclog(log, iclog, 0); -+ error = xlog_state_release_iclog(log, iclog); - spin_unlock(&log->l_icloglock); - return error; - } -@@ -2629,7 +2616,7 @@ next_lv: - - spin_lock(&log->l_icloglock); - xlog_state_finish_copy(log, iclog, record_cnt, data_cnt); -- error = xlog_state_release_iclog(log, iclog, 0); -+ error = xlog_state_release_iclog(log, iclog); - spin_unlock(&log->l_icloglock); - - return error; -@@ -3053,7 +3040,7 @@ restart: - * reference to the iclog. - */ - if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1)) -- error = xlog_state_release_iclog(log, iclog, 0); -+ error = xlog_state_release_iclog(log, iclog); - spin_unlock(&log->l_icloglock); - if (error) - return error; -@@ -3904,7 +3891,10 @@ xlog_force_shutdown( - wake_up_all(&log->l_cilp->xc_start_wait); - wake_up_all(&log->l_cilp->xc_commit_wait); - spin_unlock(&log->l_cilp->xc_push_lock); ++ spin_lock_irqsave(&ctx->timeout_lock, flags); ++ list_del_init(&req->timeout.list); ++ atomic_set(&req->ctx->cq_timeouts, ++ atomic_read(&req->ctx->cq_timeouts) + 1); ++ spin_unlock_irqrestore(&ctx->timeout_lock, flags); + -+ spin_lock(&log->l_icloglock); - xlog_state_shutdown_callbacks(log); -+ spin_unlock(&log->l_icloglock); - - return log_error; - } -diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c -index 6c93c8ada6f35..eafe30843ff0f 100644 ---- a/fs/xfs/xfs_log_cil.c -+++ b/fs/xfs/xfs_log_cil.c -@@ -681,11 +681,21 @@ xlog_cil_set_ctx_write_state( - * The LSN we need to pass to the log items on transaction - * commit is the LSN reported by the first log vector write, not - * the commit lsn. If we use the commit record lsn then we can -- * move the tail beyond the grant write head. -+ * move the grant write head beyond the tail LSN and overwrite -+ * it. - */ - ctx->start_lsn = lsn; - wake_up_all(&cil->xc_start_wait); - spin_unlock(&cil->xc_push_lock); ++ req->io_task_work.func = io_req_task_timeout; ++ io_req_task_work_add(req); ++ return HRTIMER_NORESTART; ++} + -+ /* -+ * Make sure the metadata we are about to overwrite in the log -+ * has been flushed to stable storage before this iclog is -+ * issued. -+ */ -+ spin_lock(&cil->xc_log->l_icloglock); -+ iclog->ic_flags |= XLOG_ICL_NEED_FLUSH; -+ spin_unlock(&cil->xc_log->l_icloglock); - return; - } - -@@ -864,10 +874,7 @@ xlog_cil_push_work( - struct xfs_trans_header thdr; - struct xfs_log_iovec lhdr; - struct xfs_log_vec lvhdr = { NULL }; -- xfs_lsn_t preflush_tail_lsn; - xfs_csn_t push_seq; -- struct bio bio; -- DECLARE_COMPLETION_ONSTACK(bdev_flush); - bool push_commit_stable; - - new_ctx = xlog_cil_ctx_alloc(); -@@ -937,23 +944,6 @@ xlog_cil_push_work( - list_add(&ctx->committing, &cil->xc_committing); - spin_unlock(&cil->xc_push_lock); - -- /* -- * The CIL is stable at this point - nothing new will be added to it -- * because we hold the flush lock exclusively. Hence we can now issue -- * a cache flush to ensure all the completed metadata in the journal we -- * are about to overwrite is on stable storage. -- * -- * Because we are issuing this cache flush before we've written the -- * tail lsn to the iclog, we can have metadata IO completions move the -- * tail forwards between the completion of this flush and the iclog -- * being written. In this case, we need to re-issue the cache flush -- * before the iclog write. To detect whether the log tail moves, sample -- * the tail LSN *before* we issue the flush. -- */ -- preflush_tail_lsn = atomic64_read(&log->l_tail_lsn); -- xfs_flush_bdev_async(&bio, log->l_mp->m_ddev_targp->bt_bdev, -- &bdev_flush); -- - /* - * Pull all the log vectors off the items in the CIL, and remove the - * items from the CIL. We don't need the CIL lock here because it's only -@@ -1030,12 +1020,6 @@ xlog_cil_push_work( - lvhdr.lv_iovecp = &lhdr; - lvhdr.lv_next = ctx->lv_chain; - -- /* -- * Before we format and submit the first iclog, we have to ensure that -- * the metadata writeback ordering cache flush is complete. -- */ -- wait_for_completion(&bdev_flush); -- - error = xlog_cil_write_chain(ctx, &lvhdr); - if (error) - goto out_abort_free_ticket; -@@ -1094,7 +1078,7 @@ xlog_cil_push_work( - if (push_commit_stable && - ctx->commit_iclog->ic_state == XLOG_STATE_ACTIVE) - xlog_state_switch_iclogs(log, ctx->commit_iclog, 0); -- xlog_state_release_iclog(log, ctx->commit_iclog, preflush_tail_lsn); -+ xlog_state_release_iclog(log, ctx->commit_iclog); - - /* Not safe to reference ctx now! */ - -@@ -1115,7 +1099,7 @@ out_abort_free_ticket: - return; - } - spin_lock(&log->l_icloglock); -- xlog_state_release_iclog(log, ctx->commit_iclog, 0); -+ xlog_state_release_iclog(log, ctx->commit_iclog); - /* Not safe to reference ctx now! */ - spin_unlock(&log->l_icloglock); - } -@@ -1442,9 +1426,9 @@ out_shutdown: - */ - bool - xfs_log_item_in_current_chkpt( -- struct xfs_log_item *lip) -+ struct xfs_log_item *lip) - { -- struct xfs_cil_ctx *ctx = lip->li_mountp->m_log->l_cilp->xc_ctx; -+ struct xfs_cil *cil = lip->li_mountp->m_log->l_cilp; - - if (list_empty(&lip->li_cil)) - return false; -@@ -1454,7 +1438,7 @@ xfs_log_item_in_current_chkpt( - * first checkpoint it is written to. Hence if it is different to the - * current sequence, we're in a new checkpoint. - */ -- return lip->li_seq == ctx->sequence; -+ return lip->li_seq == READ_ONCE(cil->xc_current_sequence); - } - - /* -diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h -index 844fbeec3545a..f3d68ca39f45c 100644 ---- a/fs/xfs/xfs_log_priv.h -+++ b/fs/xfs/xfs_log_priv.h -@@ -524,8 +524,7 @@ void xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket); - - void xlog_state_switch_iclogs(struct xlog *log, struct xlog_in_core *iclog, - int eventual_size); --int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog, -- xfs_lsn_t log_tail_lsn); -+int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog); - - /* - * When we crack an atomic LSN, we sample it first so that the value will not -diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c -index 10562ecbd9eac..581aeb288b32b 100644 ---- a/fs/xfs/xfs_log_recover.c -+++ b/fs/xfs/xfs_log_recover.c -@@ -27,7 +27,7 @@ - #include "xfs_buf_item.h" - #include "xfs_ag.h" - #include "xfs_quota.h" -- -+#include "xfs_reflink.h" - - #define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1) - -@@ -3502,6 +3502,28 @@ xlog_recover_finish( - - xlog_recover_process_iunlinks(log); - xlog_recover_check_summary(log); ++static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx, ++ __u64 user_data) ++ __must_hold(&ctx->timeout_lock) ++{ ++ struct io_timeout_data *io; ++ struct io_kiocb *req; ++ bool found = false; + -+ /* -+ * Recover any CoW staging blocks that are still referenced by the -+ * ondisk refcount metadata. During mount there cannot be any live -+ * staging extents as we have not permitted any user modifications. -+ * Therefore, it is safe to free them all right now, even on a -+ * read-only mount. -+ */ -+ error = xfs_reflink_recover_cow(log->l_mp); -+ if (error) { -+ xfs_alert(log->l_mp, -+ "Failed to recover leftover CoW staging extents, err %d.", -+ error); -+ /* -+ * If we get an error here, make sure the log is shut down -+ * but return zero so that any log items committed since the -+ * end of intents processing can be pushed through the CIL -+ * and AIL. -+ */ -+ xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); ++ list_for_each_entry(req, &ctx->timeout_list, timeout.list) { ++ found = user_data == req->user_data; ++ if (found) ++ break; + } ++ if (!found) ++ return ERR_PTR(-ENOENT); ++ ++ io = req->async_data; ++ if (hrtimer_try_to_cancel(&io->timer) == -1) ++ return ERR_PTR(-EALREADY); ++ list_del_init(&req->timeout.list); ++ return req; ++} ++ ++static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data) ++ __must_hold(&ctx->completion_lock) ++ __must_hold(&ctx->timeout_lock) ++{ ++ struct io_kiocb *req = io_timeout_extract(ctx, user_data); ++ ++ if (IS_ERR(req)) ++ return PTR_ERR(req); ++ ++ req_set_fail(req); ++ io_fill_cqe_req(req, -ECANCELED, 0); ++ io_put_req_deferred(req); ++ return 0; ++} ++ ++static clockid_t io_timeout_get_clock(struct io_timeout_data *data) ++{ ++ switch (data->flags & IORING_TIMEOUT_CLOCK_MASK) { ++ case IORING_TIMEOUT_BOOTTIME: ++ return CLOCK_BOOTTIME; ++ case IORING_TIMEOUT_REALTIME: ++ return CLOCK_REALTIME; ++ default: ++ /* can't happen, vetted at prep time */ ++ WARN_ON_ONCE(1); ++ fallthrough; ++ case 0: ++ return CLOCK_MONOTONIC; ++ } ++} ++ ++static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, ++ struct timespec64 *ts, enum hrtimer_mode mode) ++ __must_hold(&ctx->timeout_lock) ++{ ++ struct io_timeout_data *io; ++ struct io_kiocb *req; ++ bool found = false; ++ ++ list_for_each_entry(req, &ctx->ltimeout_list, timeout.list) { ++ found = user_data == req->user_data; ++ if (found) ++ break; ++ } ++ if (!found) ++ return -ENOENT; ++ ++ io = req->async_data; ++ if (hrtimer_try_to_cancel(&io->timer) == -1) ++ return -EALREADY; ++ hrtimer_init(&io->timer, io_timeout_get_clock(io), mode); ++ io->timer.function = io_link_timeout_fn; ++ hrtimer_start(&io->timer, timespec64_to_ktime(*ts), mode); ++ return 0; ++} ++ ++static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, ++ struct timespec64 *ts, enum hrtimer_mode mode) ++ __must_hold(&ctx->timeout_lock) ++{ ++ struct io_kiocb *req = io_timeout_extract(ctx, user_data); ++ struct io_timeout_data *data; ++ ++ if (IS_ERR(req)) ++ return PTR_ERR(req); ++ ++ req->timeout.off = 0; /* noseq */ ++ data = req->async_data; ++ list_add_tail(&req->timeout.list, &ctx->timeout_list); ++ hrtimer_init(&data->timer, io_timeout_get_clock(data), mode); ++ data->timer.function = io_timeout_fn; ++ hrtimer_start(&data->timer, timespec64_to_ktime(*ts), mode); ++ return 0; ++} ++ ++static int io_timeout_remove_prep(struct io_kiocb *req, ++ const struct io_uring_sqe *sqe) ++{ ++ struct io_timeout_rem *tr = &req->timeout_rem; ++ ++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) ++ return -EINVAL; ++ if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) ++ return -EINVAL; ++ if (sqe->ioprio || sqe->buf_index || sqe->len || sqe->splice_fd_in) ++ return -EINVAL; ++ ++ tr->ltimeout = false; ++ tr->addr = READ_ONCE(sqe->addr); ++ tr->flags = READ_ONCE(sqe->timeout_flags); ++ if (tr->flags & IORING_TIMEOUT_UPDATE_MASK) { ++ if (hweight32(tr->flags & IORING_TIMEOUT_CLOCK_MASK) > 1) ++ return -EINVAL; ++ if (tr->flags & IORING_LINK_TIMEOUT_UPDATE) ++ tr->ltimeout = true; ++ if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK|IORING_TIMEOUT_ABS)) ++ return -EINVAL; ++ if (get_timespec64(&tr->ts, u64_to_user_ptr(sqe->addr2))) ++ return -EFAULT; ++ } else if (tr->flags) { ++ /* timeout removal doesn't support flags */ ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static inline enum hrtimer_mode io_translate_timeout_mode(unsigned int flags) ++{ ++ return (flags & IORING_TIMEOUT_ABS) ? HRTIMER_MODE_ABS ++ : HRTIMER_MODE_REL; ++} + - return 0; - } - -diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c -index 06dac09eddbd8..76056de83971c 100644 ---- a/fs/xfs/xfs_mount.c -+++ b/fs/xfs/xfs_mount.c -@@ -922,15 +922,6 @@ xfs_mountfs( - xfs_warn(mp, - "Unable to allocate reserve blocks. Continuing without reserve pool."); - -- /* Recover any CoW blocks that never got remapped. */ -- error = xfs_reflink_recover_cow(mp); -- if (error) { -- xfs_err(mp, -- "Error %d recovering leftover CoW allocations.", error); -- xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); -- goto out_quota; -- } -- - /* Reserve AG blocks for future btree expansion. */ - error = xfs_fs_reserve_ag_blocks(mp); - if (error && error != -ENOSPC) -@@ -941,7 +932,6 @@ xfs_mountfs( - - out_agresv: - xfs_fs_unreserve_ag_blocks(mp); -- out_quota: - xfs_qm_unmount_quotas(mp); - out_rtunmount: - xfs_rtunmount_inodes(mp); -@@ -1142,7 +1132,7 @@ xfs_mod_fdblocks( - * problems (i.e. transaction abort, pagecache discards, etc.) than - * slightly premature -ENOSPC. - */ -- set_aside = mp->m_alloc_set_aside + atomic64_read(&mp->m_allocbt_blks); -+ set_aside = xfs_fdblocks_unavailable(mp); - percpu_counter_add_batch(&mp->m_fdblocks, delta, batch); - if (__percpu_counter_compare(&mp->m_fdblocks, set_aside, - XFS_FDBLOCKS_BATCH) >= 0) { -diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h -index e091f3b3fa158..86564295fce6d 100644 ---- a/fs/xfs/xfs_mount.h -+++ b/fs/xfs/xfs_mount.h -@@ -478,6 +478,21 @@ extern void xfs_unmountfs(xfs_mount_t *); - */ - #define XFS_FDBLOCKS_BATCH 1024 - +/* -+ * Estimate the amount of free space that is not available to userspace and is -+ * not explicitly reserved from the incore fdblocks. This includes: -+ * -+ * - The minimum number of blocks needed to support splitting a bmap btree -+ * - The blocks currently in use by the freespace btrees because they record -+ * the actual blocks that will fill per-AG metadata space reservations ++ * Remove or update an existing timeout command + */ -+static inline uint64_t -+xfs_fdblocks_unavailable( -+ struct xfs_mount *mp) ++static int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags) +{ -+ return mp->m_alloc_set_aside + atomic64_read(&mp->m_allocbt_blks); ++ struct io_timeout_rem *tr = &req->timeout_rem; ++ struct io_ring_ctx *ctx = req->ctx; ++ int ret; ++ ++ if (!(req->timeout_rem.flags & IORING_TIMEOUT_UPDATE)) { ++ spin_lock(&ctx->completion_lock); ++ spin_lock_irq(&ctx->timeout_lock); ++ ret = io_timeout_cancel(ctx, tr->addr); ++ spin_unlock_irq(&ctx->timeout_lock); ++ spin_unlock(&ctx->completion_lock); ++ } else { ++ enum hrtimer_mode mode = io_translate_timeout_mode(tr->flags); ++ ++ spin_lock_irq(&ctx->timeout_lock); ++ if (tr->ltimeout) ++ ret = io_linked_timeout_update(ctx, tr->addr, &tr->ts, mode); ++ else ++ ret = io_timeout_update(ctx, tr->addr, &tr->ts, mode); ++ spin_unlock_irq(&ctx->timeout_lock); ++ } ++ ++ if (ret < 0) ++ req_set_fail(req); ++ io_req_complete_post(req, ret, 0); ++ return 0; +} + - extern int xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta, - bool reserved); - extern int xfs_mod_frextents(struct xfs_mount *mp, int64_t delta); -diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c -index 76355f2934884..36832e4bc803c 100644 ---- a/fs/xfs/xfs_reflink.c -+++ b/fs/xfs/xfs_reflink.c -@@ -749,7 +749,10 @@ xfs_reflink_end_cow( - } - - /* -- * Free leftover CoW reservations that didn't get cleaned out. -+ * Free all CoW staging blocks that are still referenced by the ondisk refcount -+ * metadata. The ondisk metadata does not track which inode created the -+ * staging extent, so callers must ensure that there are no cached inodes with -+ * live CoW staging extents. - */ - int - xfs_reflink_recover_cow( -diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c -index c4e0cd1c1c8ca..df1d6be61bfa3 100644 ---- a/fs/xfs/xfs_super.c -+++ b/fs/xfs/xfs_super.c -@@ -642,7 +642,7 @@ xfs_fs_destroy_inode( - static void - xfs_fs_dirty_inode( - struct inode *inode, -- int flag) -+ int flags) - { - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; -@@ -650,7 +650,13 @@ xfs_fs_dirty_inode( - - if (!(inode->i_sb->s_flags & SB_LAZYTIME)) - return; -- if (flag != I_DIRTY_SYNC || !(inode->i_state & I_DIRTY_TIME)) ++static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe, ++ bool is_timeout_link) ++{ ++ struct io_timeout_data *data; ++ unsigned flags; ++ u32 off = READ_ONCE(sqe->off); ++ ++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) ++ return -EINVAL; ++ if (sqe->ioprio || sqe->buf_index || sqe->len != 1 || ++ sqe->splice_fd_in) ++ return -EINVAL; ++ if (off && is_timeout_link) ++ return -EINVAL; ++ flags = READ_ONCE(sqe->timeout_flags); ++ if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK)) ++ return -EINVAL; ++ /* more than one clock specified is invalid, obviously */ ++ if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1) ++ return -EINVAL; ++ ++ INIT_LIST_HEAD(&req->timeout.list); ++ req->timeout.off = off; ++ if (unlikely(off && !req->ctx->off_timeout_used)) ++ req->ctx->off_timeout_used = true; ++ ++ if (!req->async_data && io_alloc_async_data(req)) ++ return -ENOMEM; ++ ++ data = req->async_data; ++ data->req = req; ++ data->flags = flags; ++ ++ if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr))) ++ return -EFAULT; ++ ++ INIT_LIST_HEAD(&req->timeout.list); ++ data->mode = io_translate_timeout_mode(flags); ++ hrtimer_init(&data->timer, io_timeout_get_clock(data), data->mode); ++ ++ if (is_timeout_link) { ++ struct io_submit_link *link = &req->ctx->submit_state.link; ++ ++ if (!link->head) ++ return -EINVAL; ++ if (link->last->opcode == IORING_OP_LINK_TIMEOUT) ++ return -EINVAL; ++ req->timeout.head = link->last; ++ link->last->flags |= REQ_F_ARM_LTIMEOUT; ++ } ++ return 0; ++} ++ ++static int io_timeout(struct io_kiocb *req, unsigned int issue_flags) ++{ ++ struct io_ring_ctx *ctx = req->ctx; ++ struct io_timeout_data *data = req->async_data; ++ struct list_head *entry; ++ u32 tail, off = req->timeout.off; ++ ++ spin_lock_irq(&ctx->timeout_lock); + + /* -+ * Only do the timestamp update if the inode is dirty (I_DIRTY_SYNC) -+ * and has dirty timestamp (I_DIRTY_TIME). I_DIRTY_TIME can be passed -+ * in flags possibly together with I_DIRTY_SYNC. ++ * sqe->off holds how many events that need to occur for this ++ * timeout event to be satisfied. If it isn't set, then this is ++ * a pure timeout request, sequence isn't used. + */ -+ if ((flags & ~I_DIRTY_TIME) != I_DIRTY_SYNC || !(flags & I_DIRTY_TIME)) - return; - - if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp)) -@@ -729,6 +735,7 @@ xfs_fs_sync_fs( - int wait) - { - struct xfs_mount *mp = XFS_M(sb); -+ int error; - - trace_xfs_fs_sync_fs(mp, __return_address); - -@@ -738,7 +745,10 @@ xfs_fs_sync_fs( - if (!wait) - return 0; - -- xfs_log_force(mp, XFS_LOG_SYNC); -+ error = xfs_log_force(mp, XFS_LOG_SYNC); -+ if (error) -+ return error; ++ if (io_is_timeout_noseq(req)) { ++ entry = ctx->timeout_list.prev; ++ goto add; ++ } + - if (laptop_mode) { - /* - * The disk must be active because we're syncing. -@@ -1738,15 +1748,6 @@ xfs_remount_rw( - */ - xfs_restore_resvblks(mp); - xfs_log_work_queue(mp); -- -- /* Recover any CoW blocks that never got remapped. */ -- error = xfs_reflink_recover_cow(mp); -- if (error) { -- xfs_err(mp, -- "Error %d recovering leftover CoW allocations.", error); -- xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); -- return error; -- } - xfs_blockgc_start(mp); - - /* Create the per-AG metadata reservation pool .*/ -@@ -1764,7 +1765,15 @@ static int - xfs_remount_ro( - struct xfs_mount *mp) - { -- int error; -+ struct xfs_icwalk icw = { -+ .icw_flags = XFS_ICWALK_FLAG_SYNC, -+ }; -+ int error; ++ tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); ++ req->timeout.target_seq = tail + off; ++ ++ /* Update the last seq here in case io_flush_timeouts() hasn't. ++ * This is safe because ->completion_lock is held, and submissions ++ * and completions are never mixed in the same ->completion_lock section. ++ */ ++ ctx->cq_last_tm_flush = tail; + -+ /* Flush all the dirty data to disk. */ -+ error = sync_filesystem(mp->m_super); -+ if (error) -+ return error; - - /* - * Cancel background eofb scanning so it cannot race with the final -@@ -1772,8 +1781,13 @@ xfs_remount_ro( - */ - xfs_blockgc_stop(mp); - -- /* Get rid of any leftover CoW reservations... */ -- error = xfs_blockgc_free_space(mp, NULL); + /* -+ * Clear out all remaining COW staging extents and speculative post-EOF -+ * preallocations so that we don't leave inodes requiring inactivation -+ * cleanups during reclaim on a read-only mount. We must process every -+ * cached inode, so this requires a synchronous cache scan. ++ * Insertion sort, ensuring the first entry in the list is always ++ * the one we need first. + */ -+ error = xfs_blockgc_free_space(mp, &icw); - if (error) { - xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); - return error; -@@ -1839,8 +1853,6 @@ xfs_fs_reconfigure( - if (error) - return error; - -- sync_filesystem(mp->m_super); -- - /* inode32 -> inode64 */ - if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) { - mp->m_features &= ~XFS_FEAT_SMALL_INUMS; -diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c -index fc2c6a4046471..a31d2e5d03214 100644 ---- a/fs/xfs/xfs_symlink.c -+++ b/fs/xfs/xfs_symlink.c -@@ -184,8 +184,8 @@ xfs_symlink( - /* - * Make sure that we have allocated dquot(s) on disk. - */ -- error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns), -- mapped_fsgid(mnt_userns), prid, -+ error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns), -+ mapped_fsgid(mnt_userns, &init_user_ns), prid, - XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, - &udqp, &gdqp, &pdqp); - if (error) -diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c -index 67dec11e34c7e..95c183072e7a2 100644 ---- a/fs/xfs/xfs_trans.c -+++ b/fs/xfs/xfs_trans.c -@@ -1201,3 +1201,89 @@ out_cancel: - xfs_trans_cancel(tp); - return error; - } ++ list_for_each_prev(entry, &ctx->timeout_list) { ++ struct io_kiocb *nxt = list_entry(entry, struct io_kiocb, ++ timeout.list); ++ ++ if (io_is_timeout_noseq(nxt)) ++ continue; ++ /* nxt.seq is behind @tail, otherwise would've been completed */ ++ if (off >= nxt->timeout.target_seq - tail) ++ break; ++ } ++add: ++ list_add(&req->timeout.list, entry); ++ data->timer.function = io_timeout_fn; ++ hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode); ++ spin_unlock_irq(&ctx->timeout_lock); ++ return 0; ++} ++ ++struct io_cancel_data { ++ struct io_ring_ctx *ctx; ++ u64 user_data; ++}; ++ ++static bool io_cancel_cb(struct io_wq_work *work, void *data) ++{ ++ struct io_kiocb *req = container_of(work, struct io_kiocb, work); ++ struct io_cancel_data *cd = data; ++ ++ return req->ctx == cd->ctx && req->user_data == cd->user_data; ++} ++ ++static int io_async_cancel_one(struct io_uring_task *tctx, u64 user_data, ++ struct io_ring_ctx *ctx) ++{ ++ struct io_cancel_data data = { .ctx = ctx, .user_data = user_data, }; ++ enum io_wq_cancel cancel_ret; ++ int ret = 0; ++ ++ if (!tctx || !tctx->io_wq) ++ return -ENOENT; ++ ++ cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, &data, false); ++ switch (cancel_ret) { ++ case IO_WQ_CANCEL_OK: ++ ret = 0; ++ break; ++ case IO_WQ_CANCEL_RUNNING: ++ ret = -EALREADY; ++ break; ++ case IO_WQ_CANCEL_NOTFOUND: ++ ret = -ENOENT; ++ break; ++ } ++ ++ return ret; ++} ++ ++static int io_try_cancel_userdata(struct io_kiocb *req, u64 sqe_addr) ++{ ++ struct io_ring_ctx *ctx = req->ctx; ++ int ret; ++ ++ WARN_ON_ONCE(!io_wq_current_is_worker() && req->task != current); ++ ++ ret = io_async_cancel_one(req->task->io_uring, sqe_addr, ctx); ++ if (ret != -ENOENT) ++ return ret; ++ ++ spin_lock(&ctx->completion_lock); ++ spin_lock_irq(&ctx->timeout_lock); ++ ret = io_timeout_cancel(ctx, sqe_addr); ++ spin_unlock_irq(&ctx->timeout_lock); ++ if (ret != -ENOENT) ++ goto out; ++ ret = io_poll_cancel(ctx, sqe_addr, false); ++out: ++ spin_unlock(&ctx->completion_lock); ++ return ret; ++} ++ ++static int io_async_cancel_prep(struct io_kiocb *req, ++ const struct io_uring_sqe *sqe) ++{ ++ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) ++ return -EINVAL; ++ if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) ++ return -EINVAL; ++ if (sqe->ioprio || sqe->off || sqe->len || sqe->cancel_flags || ++ sqe->splice_fd_in) ++ return -EINVAL; ++ ++ req->cancel.addr = READ_ONCE(sqe->addr); ++ return 0; ++} ++ ++static int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags) ++{ ++ struct io_ring_ctx *ctx = req->ctx; ++ u64 sqe_addr = req->cancel.addr; ++ struct io_tctx_node *node; ++ int ret; ++ ++ ret = io_try_cancel_userdata(req, sqe_addr); ++ if (ret != -ENOENT) ++ goto done; ++ ++ /* slow path, try all io-wq's */ ++ io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); ++ ret = -ENOENT; ++ list_for_each_entry(node, &ctx->tctx_list, ctx_node) { ++ struct io_uring_task *tctx = node->task->io_uring; ++ ++ ret = io_async_cancel_one(tctx, req->cancel.addr, ctx); ++ if (ret != -ENOENT) ++ break; ++ } ++ io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); ++done: ++ if (ret < 0) ++ req_set_fail(req); ++ io_req_complete_post(req, ret, 0); ++ return 0; ++} ++ ++static int io_rsrc_update_prep(struct io_kiocb *req, ++ const struct io_uring_sqe *sqe) ++{ ++ if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) ++ return -EINVAL; ++ if (sqe->ioprio || sqe->rw_flags || sqe->splice_fd_in) ++ return -EINVAL; ++ ++ req->rsrc_update.offset = READ_ONCE(sqe->off); ++ req->rsrc_update.nr_args = READ_ONCE(sqe->len); ++ if (!req->rsrc_update.nr_args) ++ return -EINVAL; ++ req->rsrc_update.arg = READ_ONCE(sqe->addr); ++ return 0; ++} ++ ++static int io_files_update(struct io_kiocb *req, unsigned int issue_flags) ++{ ++ struct io_ring_ctx *ctx = req->ctx; ++ struct io_uring_rsrc_update2 up; ++ int ret; + -+/* -+ * Allocate an transaction, lock and join the directory and child inodes to it, -+ * and reserve quota for a directory update. If there isn't sufficient space, -+ * @dblocks will be set to zero for a reservationless directory update and -+ * @nospace_error will be set to a negative errno describing the space -+ * constraint we hit. -+ * -+ * The caller must ensure that the on-disk dquots attached to this inode have -+ * already been allocated and initialized. The ILOCKs will be dropped when the -+ * transaction is committed or cancelled. -+ */ -+int -+xfs_trans_alloc_dir( -+ struct xfs_inode *dp, -+ struct xfs_trans_res *resv, -+ struct xfs_inode *ip, -+ unsigned int *dblocks, -+ struct xfs_trans **tpp, -+ int *nospace_error) ++ up.offset = req->rsrc_update.offset; ++ up.data = req->rsrc_update.arg; ++ up.nr = 0; ++ up.tags = 0; ++ up.resv = 0; ++ up.resv2 = 0; ++ ++ io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); ++ ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE, ++ &up, req->rsrc_update.nr_args); ++ io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); ++ ++ if (ret < 0) ++ req_set_fail(req); ++ __io_req_complete(req, issue_flags, ret, 0); ++ return 0; ++} ++ ++static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ -+ struct xfs_trans *tp; -+ struct xfs_mount *mp = ip->i_mount; -+ unsigned int resblks; -+ bool retried = false; -+ int error; ++ switch (req->opcode) { ++ case IORING_OP_NOP: ++ return 0; ++ case IORING_OP_READV: ++ case IORING_OP_READ_FIXED: ++ case IORING_OP_READ: ++ return io_read_prep(req, sqe); ++ case IORING_OP_WRITEV: ++ case IORING_OP_WRITE_FIXED: ++ case IORING_OP_WRITE: ++ return io_write_prep(req, sqe); ++ case IORING_OP_POLL_ADD: ++ return io_poll_add_prep(req, sqe); ++ case IORING_OP_POLL_REMOVE: ++ return io_poll_update_prep(req, sqe); ++ case IORING_OP_FSYNC: ++ return io_fsync_prep(req, sqe); ++ case IORING_OP_SYNC_FILE_RANGE: ++ return io_sfr_prep(req, sqe); ++ case IORING_OP_SENDMSG: ++ case IORING_OP_SEND: ++ return io_sendmsg_prep(req, sqe); ++ case IORING_OP_RECVMSG: ++ case IORING_OP_RECV: ++ return io_recvmsg_prep(req, sqe); ++ case IORING_OP_CONNECT: ++ return io_connect_prep(req, sqe); ++ case IORING_OP_TIMEOUT: ++ return io_timeout_prep(req, sqe, false); ++ case IORING_OP_TIMEOUT_REMOVE: ++ return io_timeout_remove_prep(req, sqe); ++ case IORING_OP_ASYNC_CANCEL: ++ return io_async_cancel_prep(req, sqe); ++ case IORING_OP_LINK_TIMEOUT: ++ return io_timeout_prep(req, sqe, true); ++ case IORING_OP_ACCEPT: ++ return io_accept_prep(req, sqe); ++ case IORING_OP_FALLOCATE: ++ return io_fallocate_prep(req, sqe); ++ case IORING_OP_OPENAT: ++ return io_openat_prep(req, sqe); ++ case IORING_OP_CLOSE: ++ return io_close_prep(req, sqe); ++ case IORING_OP_FILES_UPDATE: ++ return io_rsrc_update_prep(req, sqe); ++ case IORING_OP_STATX: ++ return io_statx_prep(req, sqe); ++ case IORING_OP_FADVISE: ++ return io_fadvise_prep(req, sqe); ++ case IORING_OP_MADVISE: ++ return io_madvise_prep(req, sqe); ++ case IORING_OP_OPENAT2: ++ return io_openat2_prep(req, sqe); ++ case IORING_OP_EPOLL_CTL: ++ return io_epoll_ctl_prep(req, sqe); ++ case IORING_OP_SPLICE: ++ return io_splice_prep(req, sqe); ++ case IORING_OP_PROVIDE_BUFFERS: ++ return io_provide_buffers_prep(req, sqe); ++ case IORING_OP_REMOVE_BUFFERS: ++ return io_remove_buffers_prep(req, sqe); ++ case IORING_OP_TEE: ++ return io_tee_prep(req, sqe); ++ case IORING_OP_SHUTDOWN: ++ return io_shutdown_prep(req, sqe); ++ case IORING_OP_RENAMEAT: ++ return io_renameat_prep(req, sqe); ++ case IORING_OP_UNLINKAT: ++ return io_unlinkat_prep(req, sqe); ++ case IORING_OP_MKDIRAT: ++ return io_mkdirat_prep(req, sqe); ++ case IORING_OP_SYMLINKAT: ++ return io_symlinkat_prep(req, sqe); ++ case IORING_OP_LINKAT: ++ return io_linkat_prep(req, sqe); ++ } ++ ++ printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n", ++ req->opcode); ++ return -EINVAL; ++} + -+retry: -+ *nospace_error = 0; -+ resblks = *dblocks; -+ error = xfs_trans_alloc(mp, resv, resblks, 0, 0, &tp); -+ if (error == -ENOSPC) { -+ *nospace_error = error; -+ resblks = 0; -+ error = xfs_trans_alloc(mp, resv, resblks, 0, 0, &tp); ++static int io_req_prep_async(struct io_kiocb *req) ++{ ++ if (!io_op_defs[req->opcode].needs_async_setup) ++ return 0; ++ if (WARN_ON_ONCE(req->async_data)) ++ return -EFAULT; ++ if (io_alloc_async_data(req)) ++ return -EAGAIN; ++ ++ switch (req->opcode) { ++ case IORING_OP_READV: ++ return io_rw_prep_async(req, READ); ++ case IORING_OP_WRITEV: ++ return io_rw_prep_async(req, WRITE); ++ case IORING_OP_SENDMSG: ++ return io_sendmsg_prep_async(req); ++ case IORING_OP_RECVMSG: ++ return io_recvmsg_prep_async(req); ++ case IORING_OP_CONNECT: ++ return io_connect_prep_async(req); + } -+ if (error) -+ return error; ++ printk_once(KERN_WARNING "io_uring: prep_async() bad opcode %d\n", ++ req->opcode); ++ return -EFAULT; ++} + -+ xfs_lock_two_inodes(dp, XFS_ILOCK_EXCL, ip, XFS_ILOCK_EXCL); ++static u32 io_get_sequence(struct io_kiocb *req) ++{ ++ u32 seq = req->ctx->cached_sq_head; + -+ xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); -+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); ++ /* need original cached_sq_head, but it was increased for each req */ ++ io_for_each_link(req, req) ++ seq--; ++ return seq; ++} + -+ error = xfs_qm_dqattach_locked(dp, false); -+ if (error) { -+ /* Caller should have allocated the dquots! */ -+ ASSERT(error != -ENOENT); -+ goto out_cancel; ++static bool io_drain_req(struct io_kiocb *req) ++{ ++ struct io_kiocb *pos; ++ struct io_ring_ctx *ctx = req->ctx; ++ struct io_defer_entry *de; ++ int ret; ++ u32 seq; ++ ++ if (req->flags & REQ_F_FAIL) { ++ io_req_complete_fail_submit(req); ++ return true; + } + -+ error = xfs_qm_dqattach_locked(ip, false); -+ if (error) { -+ /* Caller should have allocated the dquots! */ -+ ASSERT(error != -ENOENT); -+ goto out_cancel; ++ /* ++ * If we need to drain a request in the middle of a link, drain the ++ * head request and the next request/link after the current link. ++ * Considering sequential execution of links, IOSQE_IO_DRAIN will be ++ * maintained for every request of our link. ++ */ ++ if (ctx->drain_next) { ++ req->flags |= REQ_F_IO_DRAIN; ++ ctx->drain_next = false; ++ } ++ /* not interested in head, start from the first linked */ ++ io_for_each_link(pos, req->link) { ++ if (pos->flags & REQ_F_IO_DRAIN) { ++ ctx->drain_next = true; ++ req->flags |= REQ_F_IO_DRAIN; ++ break; ++ } + } + -+ if (resblks == 0) -+ goto done; ++ /* Still need defer if there is pending req in defer list. */ ++ spin_lock(&ctx->completion_lock); ++ if (likely(list_empty_careful(&ctx->defer_list) && ++ !(req->flags & REQ_F_IO_DRAIN))) { ++ spin_unlock(&ctx->completion_lock); ++ ctx->drain_active = false; ++ return false; ++ } ++ spin_unlock(&ctx->completion_lock); + -+ error = xfs_trans_reserve_quota_nblks(tp, dp, resblks, 0, false); -+ if (error == -EDQUOT || error == -ENOSPC) { -+ if (!retried) { -+ xfs_trans_cancel(tp); -+ xfs_blockgc_free_quota(dp, 0); -+ retried = true; -+ goto retry; ++ seq = io_get_sequence(req); ++ /* Still a chance to pass the sequence check */ ++ if (!req_need_defer(req, seq) && list_empty_careful(&ctx->defer_list)) ++ return false; ++ ++ ret = io_req_prep_async(req); ++ if (ret) ++ goto fail; ++ io_prep_async_link(req); ++ de = kmalloc(sizeof(*de), GFP_KERNEL); ++ if (!de) { ++ ret = -ENOMEM; ++fail: ++ io_req_complete_failed(req, ret); ++ return true; ++ } ++ ++ spin_lock(&ctx->completion_lock); ++ if (!req_need_defer(req, seq) && list_empty(&ctx->defer_list)) { ++ spin_unlock(&ctx->completion_lock); ++ kfree(de); ++ io_queue_async_work(req, NULL); ++ return true; ++ } ++ ++ trace_io_uring_defer(ctx, req, req->user_data); ++ de->req = req; ++ de->seq = seq; ++ list_add_tail(&de->list, &ctx->defer_list); ++ spin_unlock(&ctx->completion_lock); ++ return true; ++} ++ ++static void io_clean_op(struct io_kiocb *req) ++{ ++ if (req->flags & REQ_F_BUFFER_SELECTED) { ++ switch (req->opcode) { ++ case IORING_OP_READV: ++ case IORING_OP_READ_FIXED: ++ case IORING_OP_READ: ++ kfree((void *)(unsigned long)req->rw.addr); ++ break; ++ case IORING_OP_RECVMSG: ++ case IORING_OP_RECV: ++ kfree(req->sr_msg.kbuf); ++ break; + } ++ } + -+ *nospace_error = error; -+ resblks = 0; -+ error = 0; ++ if (req->flags & REQ_F_NEED_CLEANUP) { ++ switch (req->opcode) { ++ case IORING_OP_READV: ++ case IORING_OP_READ_FIXED: ++ case IORING_OP_READ: ++ case IORING_OP_WRITEV: ++ case IORING_OP_WRITE_FIXED: ++ case IORING_OP_WRITE: { ++ struct io_async_rw *io = req->async_data; ++ ++ kfree(io->free_iovec); ++ break; ++ } ++ case IORING_OP_RECVMSG: ++ case IORING_OP_SENDMSG: { ++ struct io_async_msghdr *io = req->async_data; ++ ++ kfree(io->free_iov); ++ break; ++ } ++ case IORING_OP_OPENAT: ++ case IORING_OP_OPENAT2: ++ if (req->open.filename) ++ putname(req->open.filename); ++ break; ++ case IORING_OP_RENAMEAT: ++ putname(req->rename.oldpath); ++ putname(req->rename.newpath); ++ break; ++ case IORING_OP_UNLINKAT: ++ putname(req->unlink.filename); ++ break; ++ case IORING_OP_MKDIRAT: ++ putname(req->mkdir.filename); ++ break; ++ case IORING_OP_SYMLINKAT: ++ putname(req->symlink.oldpath); ++ putname(req->symlink.newpath); ++ break; ++ case IORING_OP_LINKAT: ++ putname(req->hardlink.oldpath); ++ putname(req->hardlink.newpath); ++ break; ++ } + } -+ if (error) -+ goto out_cancel; ++ if ((req->flags & REQ_F_POLLED) && req->apoll) { ++ kfree(req->apoll->double_poll); ++ kfree(req->apoll); ++ req->apoll = NULL; ++ } ++ if (req->flags & REQ_F_INFLIGHT) { ++ struct io_uring_task *tctx = req->task->io_uring; + -+done: -+ *tpp = tp; -+ *dblocks = resblks; -+ return 0; ++ atomic_dec(&tctx->inflight_tracked); ++ } ++ if (req->flags & REQ_F_CREDS) ++ put_cred(req->creds); + -+out_cancel: -+ xfs_trans_cancel(tp); -+ return error; ++ req->flags &= ~IO_REQ_CLEAN_FLAGS; +} -diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h -index 50da47f23a077..faba74d4c7026 100644 ---- a/fs/xfs/xfs_trans.h -+++ b/fs/xfs/xfs_trans.h -@@ -265,6 +265,9 @@ int xfs_trans_alloc_icreate(struct xfs_mount *mp, struct xfs_trans_res *resv, - int xfs_trans_alloc_ichange(struct xfs_inode *ip, struct xfs_dquot *udqp, - struct xfs_dquot *gdqp, struct xfs_dquot *pdqp, bool force, - struct xfs_trans **tpp); -+int xfs_trans_alloc_dir(struct xfs_inode *dp, struct xfs_trans_res *resv, -+ struct xfs_inode *ip, unsigned int *dblocks, -+ struct xfs_trans **tpp, int *nospace_error); - - static inline void - xfs_trans_set_context( -diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c -index 3872ce6714119..955c457e585a3 100644 ---- a/fs/xfs/xfs_trans_dquot.c -+++ b/fs/xfs/xfs_trans_dquot.c -@@ -603,7 +603,6 @@ xfs_dqresv_check( - return QUOTA_NL_ISOFTLONGWARN; - } - -- res->warnings++; - return QUOTA_NL_ISOFTWARN; - } - -diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c -index ddc346a9df9ba..f8feaed0b54d3 100644 ---- a/fs/zonefs/super.c -+++ b/fs/zonefs/super.c -@@ -35,6 +35,17 @@ static inline int zonefs_zone_mgmt(struct inode *inode, - - lockdep_assert_held(&zi->i_truncate_mutex); - -+ /* -+ * With ZNS drives, closing an explicitly open zone that has not been -+ * written will change the zone state to "closed", that is, the zone -+ * will remain active. Since this can then cause failure of explicit -+ * open operation on other zones if the drive active zone resources -+ * are exceeded, make sure that the zone does not remain active by -+ * resetting it. -+ */ -+ if (op == REQ_OP_ZONE_CLOSE && !zi->i_wpoffset) -+ op = REQ_OP_ZONE_RESET; + - trace_zonefs_zone_mgmt(inode, op); - ret = blkdev_zone_mgmt(inode->i_sb->s_bdev, op, zi->i_zsector, - zi->i_zone_size >> SECTOR_SHIFT, GFP_NOFS); -@@ -61,15 +72,51 @@ static inline void zonefs_i_size_write(struct inode *inode, loff_t isize) - zi->i_flags &= ~ZONEFS_ZONE_OPEN; - } - --static int zonefs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, -- unsigned int flags, struct iomap *iomap, -- struct iomap *srcmap) -+static int zonefs_read_iomap_begin(struct inode *inode, loff_t offset, -+ loff_t length, unsigned int flags, -+ struct iomap *iomap, struct iomap *srcmap) ++static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) +{ -+ struct zonefs_inode_info *zi = ZONEFS_I(inode); -+ struct super_block *sb = inode->i_sb; -+ loff_t isize; ++ struct io_ring_ctx *ctx = req->ctx; ++ const struct cred *creds = NULL; ++ int ret; + -+ /* -+ * All blocks are always mapped below EOF. If reading past EOF, -+ * act as if there is a hole up to the file maximum size. -+ */ -+ mutex_lock(&zi->i_truncate_mutex); -+ iomap->bdev = inode->i_sb->s_bdev; -+ iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize); -+ isize = i_size_read(inode); -+ if (iomap->offset >= isize) { -+ iomap->type = IOMAP_HOLE; -+ iomap->addr = IOMAP_NULL_ADDR; -+ iomap->length = length; -+ } else { -+ iomap->type = IOMAP_MAPPED; -+ iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset; -+ iomap->length = isize - iomap->offset; ++ if ((req->flags & REQ_F_CREDS) && req->creds != current_cred()) ++ creds = override_creds(req->creds); ++ ++ switch (req->opcode) { ++ case IORING_OP_NOP: ++ ret = io_nop(req, issue_flags); ++ break; ++ case IORING_OP_READV: ++ case IORING_OP_READ_FIXED: ++ case IORING_OP_READ: ++ ret = io_read(req, issue_flags); ++ break; ++ case IORING_OP_WRITEV: ++ case IORING_OP_WRITE_FIXED: ++ case IORING_OP_WRITE: ++ ret = io_write(req, issue_flags); ++ break; ++ case IORING_OP_FSYNC: ++ ret = io_fsync(req, issue_flags); ++ break; ++ case IORING_OP_POLL_ADD: ++ ret = io_poll_add(req, issue_flags); ++ break; ++ case IORING_OP_POLL_REMOVE: ++ ret = io_poll_update(req, issue_flags); ++ break; ++ case IORING_OP_SYNC_FILE_RANGE: ++ ret = io_sync_file_range(req, issue_flags); ++ break; ++ case IORING_OP_SENDMSG: ++ ret = io_sendmsg(req, issue_flags); ++ break; ++ case IORING_OP_SEND: ++ ret = io_send(req, issue_flags); ++ break; ++ case IORING_OP_RECVMSG: ++ ret = io_recvmsg(req, issue_flags); ++ break; ++ case IORING_OP_RECV: ++ ret = io_recv(req, issue_flags); ++ break; ++ case IORING_OP_TIMEOUT: ++ ret = io_timeout(req, issue_flags); ++ break; ++ case IORING_OP_TIMEOUT_REMOVE: ++ ret = io_timeout_remove(req, issue_flags); ++ break; ++ case IORING_OP_ACCEPT: ++ ret = io_accept(req, issue_flags); ++ break; ++ case IORING_OP_CONNECT: ++ ret = io_connect(req, issue_flags); ++ break; ++ case IORING_OP_ASYNC_CANCEL: ++ ret = io_async_cancel(req, issue_flags); ++ break; ++ case IORING_OP_FALLOCATE: ++ ret = io_fallocate(req, issue_flags); ++ break; ++ case IORING_OP_OPENAT: ++ ret = io_openat(req, issue_flags); ++ break; ++ case IORING_OP_CLOSE: ++ ret = io_close(req, issue_flags); ++ break; ++ case IORING_OP_FILES_UPDATE: ++ ret = io_files_update(req, issue_flags); ++ break; ++ case IORING_OP_STATX: ++ ret = io_statx(req, issue_flags); ++ break; ++ case IORING_OP_FADVISE: ++ ret = io_fadvise(req, issue_flags); ++ break; ++ case IORING_OP_MADVISE: ++ ret = io_madvise(req, issue_flags); ++ break; ++ case IORING_OP_OPENAT2: ++ ret = io_openat2(req, issue_flags); ++ break; ++ case IORING_OP_EPOLL_CTL: ++ ret = io_epoll_ctl(req, issue_flags); ++ break; ++ case IORING_OP_SPLICE: ++ ret = io_splice(req, issue_flags); ++ break; ++ case IORING_OP_PROVIDE_BUFFERS: ++ ret = io_provide_buffers(req, issue_flags); ++ break; ++ case IORING_OP_REMOVE_BUFFERS: ++ ret = io_remove_buffers(req, issue_flags); ++ break; ++ case IORING_OP_TEE: ++ ret = io_tee(req, issue_flags); ++ break; ++ case IORING_OP_SHUTDOWN: ++ ret = io_shutdown(req, issue_flags); ++ break; ++ case IORING_OP_RENAMEAT: ++ ret = io_renameat(req, issue_flags); ++ break; ++ case IORING_OP_UNLINKAT: ++ ret = io_unlinkat(req, issue_flags); ++ break; ++ case IORING_OP_MKDIRAT: ++ ret = io_mkdirat(req, issue_flags); ++ break; ++ case IORING_OP_SYMLINKAT: ++ ret = io_symlinkat(req, issue_flags); ++ break; ++ case IORING_OP_LINKAT: ++ ret = io_linkat(req, issue_flags); ++ break; ++ default: ++ ret = -EINVAL; ++ break; + } -+ mutex_unlock(&zi->i_truncate_mutex); + -+ trace_zonefs_iomap_begin(inode, iomap); ++ if (creds) ++ revert_creds(creds); ++ if (ret) ++ return ret; ++ /* If the op doesn't have a file, we're not polling for it */ ++ if ((ctx->flags & IORING_SETUP_IOPOLL) && req->file) ++ io_iopoll_req_issued(req); + + return 0; +} + -+static const struct iomap_ops zonefs_read_iomap_ops = { -+ .iomap_begin = zonefs_read_iomap_begin, -+}; ++static struct io_wq_work *io_wq_free_work(struct io_wq_work *work) ++{ ++ struct io_kiocb *req = container_of(work, struct io_kiocb, work); + -+static int zonefs_write_iomap_begin(struct inode *inode, loff_t offset, -+ loff_t length, unsigned int flags, -+ struct iomap *iomap, struct iomap *srcmap) - { - struct zonefs_inode_info *zi = ZONEFS_I(inode); - struct super_block *sb = inode->i_sb; - loff_t isize; - -- /* All I/Os should always be within the file maximum size */ -+ /* All write I/Os should always be within the file maximum size */ - if (WARN_ON_ONCE(offset + length > zi->i_max_size)) - return -EIO; - -@@ -79,7 +126,7 @@ static int zonefs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, - * operation. - */ - if (WARN_ON_ONCE(zi->i_ztype == ZONEFS_ZTYPE_SEQ && -- (flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT))) -+ !(flags & IOMAP_DIRECT))) - return -EIO; - - /* -@@ -88,47 +135,44 @@ static int zonefs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, - * write pointer) and unwriten beyond. - */ - mutex_lock(&zi->i_truncate_mutex); -+ iomap->bdev = inode->i_sb->s_bdev; -+ iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize); -+ iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset; - isize = i_size_read(inode); -- if (offset >= isize) -+ if (iomap->offset >= isize) { - iomap->type = IOMAP_UNWRITTEN; -- else -+ iomap->length = zi->i_max_size - iomap->offset; ++ req = io_put_req_find_next(req); ++ return req ? &req->work : NULL; ++} ++ ++static void io_wq_submit_work(struct io_wq_work *work) ++{ ++ struct io_kiocb *req = container_of(work, struct io_kiocb, work); ++ struct io_kiocb *timeout; ++ int ret = 0; ++ ++ /* one will be dropped by ->io_free_work() after returning to io-wq */ ++ if (!(req->flags & REQ_F_REFCOUNT)) ++ __io_req_set_refcount(req, 2); ++ else ++ req_ref_get(req); ++ ++ timeout = io_prep_linked_timeout(req); ++ if (timeout) ++ io_queue_linked_timeout(timeout); ++ ++ /* either cancelled or io-wq is dying, so don't touch tctx->iowq */ ++ if (work->flags & IO_WQ_WORK_CANCEL) ++ ret = -ECANCELED; ++ ++ if (!ret) { ++ do { ++ ret = io_issue_sqe(req, 0); ++ /* ++ * We can get EAGAIN for polled IO even though we're ++ * forcing a sync submission from here, since we can't ++ * wait for request slots on the block side. ++ */ ++ if (ret != -EAGAIN || !(req->ctx->flags & IORING_SETUP_IOPOLL)) ++ break; ++ cond_resched(); ++ } while (1); ++ } ++ ++ /* avoid locking problems by failing it from a clean context */ ++ if (ret) ++ io_req_task_queue_fail(req, ret); ++} ++ ++static inline struct io_fixed_file *io_fixed_file_slot(struct io_file_table *table, ++ unsigned i) ++{ ++ return &table->files[i]; ++} ++ ++static inline struct file *io_file_from_index(struct io_ring_ctx *ctx, ++ int index) ++{ ++ struct io_fixed_file *slot = io_fixed_file_slot(&ctx->file_table, index); ++ ++ return (struct file *) (slot->file_ptr & FFS_MASK); ++} ++ ++static void io_fixed_file_set(struct io_fixed_file *file_slot, struct file *file) ++{ ++ unsigned long file_ptr = (unsigned long) file; ++ ++ if (__io_file_supports_nowait(file, READ)) ++ file_ptr |= FFS_ASYNC_READ; ++ if (__io_file_supports_nowait(file, WRITE)) ++ file_ptr |= FFS_ASYNC_WRITE; ++ if (S_ISREG(file_inode(file)->i_mode)) ++ file_ptr |= FFS_ISREG; ++ file_slot->file_ptr = file_ptr; ++} ++ ++static inline struct file *io_file_get_fixed(struct io_ring_ctx *ctx, ++ struct io_kiocb *req, int fd, ++ unsigned int issue_flags) ++{ ++ struct file *file = NULL; ++ unsigned long file_ptr; ++ ++ io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); ++ ++ if (unlikely((unsigned int)fd >= ctx->nr_user_files)) ++ goto out; ++ fd = array_index_nospec(fd, ctx->nr_user_files); ++ file_ptr = io_fixed_file_slot(&ctx->file_table, fd)->file_ptr; ++ file = (struct file *) (file_ptr & FFS_MASK); ++ file_ptr &= ~FFS_MASK; ++ /* mask in overlapping REQ_F and FFS bits */ ++ req->flags |= (file_ptr << REQ_F_NOWAIT_READ_BIT); ++ io_req_set_rsrc_node(req); ++out: ++ io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); ++ return file; ++} ++ ++static struct file *io_file_get_normal(struct io_ring_ctx *ctx, ++ struct io_kiocb *req, int fd) ++{ ++ struct file *file = fget(fd); ++ ++ trace_io_uring_file_get(ctx, fd); ++ ++ /* we don't allow fixed io_uring files */ ++ if (file && unlikely(file->f_op == &io_uring_fops)) ++ io_req_track_inflight(req); ++ return file; ++} ++ ++static inline struct file *io_file_get(struct io_ring_ctx *ctx, ++ struct io_kiocb *req, int fd, bool fixed, ++ unsigned int issue_flags) ++{ ++ if (fixed) ++ return io_file_get_fixed(ctx, req, fd, issue_flags); ++ else ++ return io_file_get_normal(ctx, req, fd); ++} ++ ++static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked) ++{ ++ struct io_kiocb *prev = req->timeout.prev; ++ int ret = -ENOENT; ++ ++ if (prev) { ++ if (!(req->task->flags & PF_EXITING)) ++ ret = io_try_cancel_userdata(req, prev->user_data); ++ io_req_complete_post(req, ret ?: -ETIME, 0); ++ io_put_req(prev); + } else { - iomap->type = IOMAP_MAPPED; -- if (flags & IOMAP_WRITE) -- length = zi->i_max_size - offset; -- else -- length = min(length, isize - offset); -+ iomap->length = isize - iomap->offset; ++ io_req_complete_post(req, -ETIME, 0); + } - mutex_unlock(&zi->i_truncate_mutex); - -- iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize); -- iomap->length = ALIGN(offset + length, sb->s_blocksize) - iomap->offset; -- iomap->bdev = inode->i_sb->s_bdev; -- iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset; -- - trace_zonefs_iomap_begin(inode, iomap); - - return 0; - } - --static const struct iomap_ops zonefs_iomap_ops = { -- .iomap_begin = zonefs_iomap_begin, -+static const struct iomap_ops zonefs_write_iomap_ops = { -+ .iomap_begin = zonefs_write_iomap_begin, - }; - - static int zonefs_readpage(struct file *unused, struct page *page) - { -- return iomap_readpage(page, &zonefs_iomap_ops); -+ return iomap_readpage(page, &zonefs_read_iomap_ops); - } - - static void zonefs_readahead(struct readahead_control *rac) - { -- iomap_readahead(rac, &zonefs_iomap_ops); -+ iomap_readahead(rac, &zonefs_read_iomap_ops); - } - - /* - * Map blocks for page writeback. This is used only on conventional zone files, - * which implies that the page range can only be within the fixed inode size. - */ --static int zonefs_map_blocks(struct iomap_writepage_ctx *wpc, -- struct inode *inode, loff_t offset) -+static int zonefs_write_map_blocks(struct iomap_writepage_ctx *wpc, -+ struct inode *inode, loff_t offset) - { - struct zonefs_inode_info *zi = ZONEFS_I(inode); - -@@ -142,12 +186,12 @@ static int zonefs_map_blocks(struct iomap_writepage_ctx *wpc, - offset < wpc->iomap.offset + wpc->iomap.length) - return 0; - -- return zonefs_iomap_begin(inode, offset, zi->i_max_size - offset, -- IOMAP_WRITE, &wpc->iomap, NULL); -+ return zonefs_write_iomap_begin(inode, offset, zi->i_max_size - offset, -+ IOMAP_WRITE, &wpc->iomap, NULL); - } - - static const struct iomap_writeback_ops zonefs_writeback_ops = { -- .map_blocks = zonefs_map_blocks, -+ .map_blocks = zonefs_write_map_blocks, - }; - - static int zonefs_writepage(struct page *page, struct writeback_control *wbc) -@@ -177,7 +221,8 @@ static int zonefs_swap_activate(struct swap_info_struct *sis, - return -EINVAL; - } - -- return iomap_swapfile_activate(sis, swap_file, span, &zonefs_iomap_ops); -+ return iomap_swapfile_activate(sis, swap_file, span, -+ &zonefs_read_iomap_ops); - } - - static const struct address_space_operations zonefs_file_aops = { -@@ -596,7 +641,7 @@ static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf) - - /* Serialize against truncates */ - filemap_invalidate_lock_shared(inode->i_mapping); -- ret = iomap_page_mkwrite(vmf, &zonefs_iomap_ops); -+ ret = iomap_page_mkwrite(vmf, &zonefs_write_iomap_ops); - filemap_invalidate_unlock_shared(inode->i_mapping); - - sb_end_pagefault(inode->i_sb); -@@ -678,13 +723,12 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from) - struct inode *inode = file_inode(iocb->ki_filp); - struct zonefs_inode_info *zi = ZONEFS_I(inode); - struct block_device *bdev = inode->i_sb->s_bdev; -- unsigned int max; -+ unsigned int max = bdev_max_zone_append_sectors(bdev); - struct bio *bio; - ssize_t size; - int nr_pages; - ssize_t ret; - -- max = queue_max_zone_append_sectors(bdev_get_queue(bdev)); - max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize); - iov_iter_truncate(from, max); - -@@ -851,8 +895,8 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) - if (append) - ret = zonefs_file_dio_append(iocb, from); - else -- ret = iomap_dio_rw(iocb, from, &zonefs_iomap_ops, -- &zonefs_write_dio_ops, 0); -+ ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops, -+ &zonefs_write_dio_ops, 0, 0); - if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && - (ret > 0 || ret == -EIOCBQUEUED)) { - if (ret > 0) -@@ -893,7 +937,7 @@ static ssize_t zonefs_file_buffered_write(struct kiocb *iocb, - if (ret <= 0) - goto inode_unlock; - -- ret = iomap_file_buffered_write(iocb, from, &zonefs_iomap_ops); -+ ret = iomap_file_buffered_write(iocb, from, &zonefs_write_iomap_ops); - if (ret > 0) - iocb->ki_pos += ret; - else if (ret == -EIO) -@@ -986,8 +1030,8 @@ static ssize_t zonefs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) - goto inode_unlock; - } - file_accessed(iocb->ki_filp); -- ret = iomap_dio_rw(iocb, to, &zonefs_iomap_ops, -- &zonefs_read_dio_ops, 0); -+ ret = iomap_dio_rw(iocb, to, &zonefs_read_iomap_ops, -+ &zonefs_read_dio_ops, 0, 0); - } else { - ret = generic_file_read_iter(iocb, to); - if (ret == -EIO) -@@ -1144,6 +1188,7 @@ static struct inode *zonefs_alloc_inode(struct super_block *sb) - inode_init_once(&zi->i_vnode); - mutex_init(&zi->i_truncate_mutex); - zi->i_wr_refcnt = 0; -+ zi->i_flags = 0; - - return &zi->i_vnode; - } -@@ -1295,12 +1340,13 @@ static void zonefs_init_dir_inode(struct inode *parent, struct inode *inode, - inc_nlink(parent); - } - --static void zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone, -- enum zonefs_ztype type) -+static int zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone, -+ enum zonefs_ztype type) - { - struct super_block *sb = inode->i_sb; - struct zonefs_sb_info *sbi = ZONEFS_SB(sb); - struct zonefs_inode_info *zi = ZONEFS_I(inode); -+ int ret = 0; - - inode->i_ino = zone->start >> sbi->s_zone_sectors_shift; - inode->i_mode = S_IFREG | sbi->s_perm; -@@ -1325,6 +1371,22 @@ static void zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone, - sb->s_maxbytes = max(zi->i_max_size, sb->s_maxbytes); - sbi->s_blocks += zi->i_max_size >> sb->s_blocksize_bits; - sbi->s_used_blocks += zi->i_wpoffset >> sb->s_blocksize_bits; ++} ++ ++static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer) ++{ ++ struct io_timeout_data *data = container_of(timer, ++ struct io_timeout_data, timer); ++ struct io_kiocb *prev, *req = data->req; ++ struct io_ring_ctx *ctx = req->ctx; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&ctx->timeout_lock, flags); ++ prev = req->timeout.head; ++ req->timeout.head = NULL; + + /* -+ * For sequential zones, make sure that any open zone is closed first -+ * to ensure that the initial number of open zones is 0, in sync with -+ * the open zone accounting done when the mount option -+ * ZONEFS_MNTOPT_EXPLICIT_OPEN is used. ++ * We don't expect the list to be empty, that will only happen if we ++ * race with the completion of the linked work. + */ -+ if (type == ZONEFS_ZTYPE_SEQ && -+ (zone->cond == BLK_ZONE_COND_IMP_OPEN || -+ zone->cond == BLK_ZONE_COND_EXP_OPEN)) { -+ mutex_lock(&zi->i_truncate_mutex); -+ ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_CLOSE); -+ mutex_unlock(&zi->i_truncate_mutex); ++ if (prev) { ++ io_remove_next_linked(prev); ++ if (!req_ref_inc_not_zero(prev)) ++ prev = NULL; + } ++ list_del(&req->timeout.list); ++ req->timeout.prev = prev; ++ spin_unlock_irqrestore(&ctx->timeout_lock, flags); + -+ return ret; - } - - static struct dentry *zonefs_create_inode(struct dentry *parent, -@@ -1334,6 +1396,7 @@ static struct dentry *zonefs_create_inode(struct dentry *parent, - struct inode *dir = d_inode(parent); - struct dentry *dentry; - struct inode *inode; ++ req->io_task_work.func = io_req_task_link_timeout; ++ io_req_task_work_add(req); ++ return HRTIMER_NORESTART; ++} ++ ++static void io_queue_linked_timeout(struct io_kiocb *req) ++{ ++ struct io_ring_ctx *ctx = req->ctx; ++ ++ spin_lock_irq(&ctx->timeout_lock); ++ /* ++ * If the back reference is NULL, then our linked request finished ++ * before we got a chance to setup the timer ++ */ ++ if (req->timeout.head) { ++ struct io_timeout_data *data = req->async_data; ++ ++ data->timer.function = io_link_timeout_fn; ++ hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), ++ data->mode); ++ list_add_tail(&req->timeout.list, &ctx->ltimeout_list); ++ } ++ spin_unlock_irq(&ctx->timeout_lock); ++ /* drop submission reference */ ++ io_put_req(req); ++} ++ ++static void __io_queue_sqe(struct io_kiocb *req) ++ __must_hold(&req->ctx->uring_lock) ++{ ++ struct io_kiocb *linked_timeout; + int ret; - - dentry = d_alloc_name(parent, name); - if (!dentry) -@@ -1344,10 +1407,16 @@ static struct dentry *zonefs_create_inode(struct dentry *parent, - goto dput; - - inode->i_ctime = inode->i_mtime = inode->i_atime = dir->i_ctime; -- if (zone) -- zonefs_init_file_inode(inode, zone, type); -- else -+ if (zone) { -+ ret = zonefs_init_file_inode(inode, zone, type); -+ if (ret) { -+ iput(inode); -+ goto dput; ++ ++issue_sqe: ++ ret = io_issue_sqe(req, IO_URING_F_NONBLOCK|IO_URING_F_COMPLETE_DEFER); ++ ++ /* ++ * We async punt it if the file wasn't marked NOWAIT, or if the file ++ * doesn't support non-blocking read/write attempts ++ */ ++ if (likely(!ret)) { ++ if (req->flags & REQ_F_COMPLETE_INLINE) { ++ struct io_ring_ctx *ctx = req->ctx; ++ struct io_submit_state *state = &ctx->submit_state; ++ ++ state->compl_reqs[state->compl_nr++] = req; ++ if (state->compl_nr == ARRAY_SIZE(state->compl_reqs)) ++ io_submit_flush_completions(ctx); ++ return; ++ } ++ ++ linked_timeout = io_prep_linked_timeout(req); ++ if (linked_timeout) ++ io_queue_linked_timeout(linked_timeout); ++ } else if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) { ++ linked_timeout = io_prep_linked_timeout(req); ++ ++ switch (io_arm_poll_handler(req)) { ++ case IO_APOLL_READY: ++ if (linked_timeout) ++ io_queue_linked_timeout(linked_timeout); ++ goto issue_sqe; ++ case IO_APOLL_ABORTED: ++ /* ++ * Queued up for async execution, worker will release ++ * submit reference when the iocb is actually submitted. ++ */ ++ io_queue_async_work(req, NULL); ++ break; + } ++ ++ if (linked_timeout) ++ io_queue_linked_timeout(linked_timeout); + } else { - zonefs_init_dir_inode(dir, inode, type); ++ io_req_complete_failed(req, ret); + } ++} + - d_add(dentry, inode); - dir->i_size++; - -@@ -1658,11 +1727,6 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent) - sbi->s_mount_opts = ZONEFS_MNTOPT_ERRORS_RO; - sbi->s_max_open_zones = bdev_max_open_zones(sb->s_bdev); - atomic_set(&sbi->s_open_zones, 0); -- if (!sbi->s_max_open_zones && -- sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) { -- zonefs_info(sb, "No open zones limit. Ignoring explicit_open mount option\n"); -- sbi->s_mount_opts &= ~ZONEFS_MNTOPT_EXPLICIT_OPEN; -- } - - ret = zonefs_read_super(sb); - if (ret) -@@ -1681,6 +1745,12 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent) - zonefs_info(sb, "Mounting %u zones", - blkdev_nr_zones(sb->s_bdev->bd_disk)); - -+ if (!sbi->s_max_open_zones && -+ sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) { -+ zonefs_info(sb, "No open zones limit. Ignoring explicit_open mount option\n"); -+ sbi->s_mount_opts &= ~ZONEFS_MNTOPT_EXPLICIT_OPEN; -+ } ++static inline void io_queue_sqe(struct io_kiocb *req) ++ __must_hold(&req->ctx->uring_lock) ++{ ++ if (unlikely(req->ctx->drain_active) && io_drain_req(req)) ++ return; + - /* Create root directory inode */ - ret = -ENOMEM; - inode = new_inode(sb); -@@ -1787,5 +1857,6 @@ static void __exit zonefs_exit(void) - MODULE_AUTHOR("Damien Le Moal"); - MODULE_DESCRIPTION("Zone file system for zoned block devices"); - MODULE_LICENSE("GPL"); -+MODULE_ALIAS_FS("zonefs"); - module_init(zonefs_init); - module_exit(zonefs_exit); -diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h -index 13d93371790ec..e9c7d7b270e73 100644 ---- a/include/acpi/acpi_bus.h -+++ b/include/acpi/acpi_bus.h -@@ -613,9 +613,10 @@ int acpi_enable_wakeup_device_power(struct acpi_device *dev, int state); - int acpi_disable_wakeup_device_power(struct acpi_device *dev); - - #ifdef CONFIG_X86 --bool acpi_device_always_present(struct acpi_device *adev); -+bool acpi_device_override_status(struct acpi_device *adev, unsigned long long *status); - #else --static inline bool acpi_device_always_present(struct acpi_device *adev) -+static inline bool acpi_device_override_status(struct acpi_device *adev, -+ unsigned long long *status) - { - return false; - } -diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h -index 92c71dfce0d5d..cefbb7ad253e0 100644 ---- a/include/acpi/actypes.h -+++ b/include/acpi/actypes.h -@@ -536,8 +536,14 @@ typedef u64 acpi_integer; - * Can be used with access_width of struct acpi_generic_address and access_size of - * struct acpi_resource_generic_register. - */ --#define ACPI_ACCESS_BIT_WIDTH(size) (1 << ((size) + 2)) --#define ACPI_ACCESS_BYTE_WIDTH(size) (1 << ((size) - 1)) -+#define ACPI_ACCESS_BIT_SHIFT 2 -+#define ACPI_ACCESS_BYTE_SHIFT -1 -+#define ACPI_ACCESS_BIT_MAX (31 - ACPI_ACCESS_BIT_SHIFT) -+#define ACPI_ACCESS_BYTE_MAX (31 - ACPI_ACCESS_BYTE_SHIFT) -+#define ACPI_ACCESS_BIT_DEFAULT (8 - ACPI_ACCESS_BIT_SHIFT) -+#define ACPI_ACCESS_BYTE_DEFAULT (8 - ACPI_ACCESS_BYTE_SHIFT) -+#define ACPI_ACCESS_BIT_WIDTH(size) (1 << ((size) + ACPI_ACCESS_BIT_SHIFT)) -+#define ACPI_ACCESS_BYTE_WIDTH(size) (1 << ((size) + ACPI_ACCESS_BYTE_SHIFT)) - - /******************************************************************************* - * -diff --git a/include/acpi/apei.h b/include/acpi/apei.h -index 680f80960c3dc..a6ac2e8b72da8 100644 ---- a/include/acpi/apei.h -+++ b/include/acpi/apei.h -@@ -27,14 +27,16 @@ extern int hest_disable; - extern int erst_disable; - #ifdef CONFIG_ACPI_APEI_GHES - extern bool ghes_disable; -+void __init ghes_init(void); - #else - #define ghes_disable 1 -+static inline void ghes_init(void) { } - #endif - - #ifdef CONFIG_ACPI_APEI - void __init acpi_hest_init(void); - #else --static inline void acpi_hest_init(void) { return; } -+static inline void acpi_hest_init(void) { } - #endif - - typedef int (*apei_hest_func_t)(struct acpi_hest_header *hest_hdr, void *data); -diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h -index bc159a9b4a733..6b14414b9ec12 100644 ---- a/include/acpi/cppc_acpi.h -+++ b/include/acpi/cppc_acpi.h -@@ -17,7 +17,7 @@ - #include <acpi/pcc.h> - #include <acpi/processor.h> - --/* Support CPPCv2 and CPPCv3 */ -+/* CPPCv2 and CPPCv3 support */ - #define CPPC_V2_REV 2 - #define CPPC_V3_REV 3 - #define CPPC_V2_NUM_ENT 21 -diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h -index 3096f086b5a32..71ab4ba9c25d1 100644 ---- a/include/asm-generic/bitops/atomic.h -+++ b/include/asm-generic/bitops/atomic.h -@@ -39,9 +39,6 @@ arch_test_and_set_bit(unsigned int nr, volatile unsigned long *p) - unsigned long mask = BIT_MASK(nr); - - p += BIT_WORD(nr); -- if (READ_ONCE(*p) & mask) -- return 1; -- - old = arch_atomic_long_fetch_or(mask, (atomic_long_t *)p); - return !!(old & mask); - } -@@ -53,9 +50,6 @@ arch_test_and_clear_bit(unsigned int nr, volatile unsigned long *p) - unsigned long mask = BIT_MASK(nr); - - p += BIT_WORD(nr); -- if (!(READ_ONCE(*p) & mask)) -- return 0; -- - old = arch_atomic_long_fetch_andnot(mask, (atomic_long_t *)p); - return !!(old & mask); - } -diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h -index 0d132ee2a2913..835f959a25f25 100644 ---- a/include/asm-generic/bitops/find.h -+++ b/include/asm-generic/bitops/find.h -@@ -97,6 +97,7 @@ unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, - - #ifdef CONFIG_GENERIC_FIND_FIRST_BIT - -+#ifndef find_first_bit - /** - * find_first_bit - find the first set bit in a memory region - * @addr: The address to start the search at -@@ -116,7 +117,9 @@ unsigned long find_first_bit(const unsigned long *addr, unsigned long size) - - return _find_first_bit(addr, size); - } -+#endif - -+#ifndef find_first_zero_bit - /** - * find_first_zero_bit - find the first cleared bit in a memory region - * @addr: The address to start the search at -@@ -136,6 +139,8 @@ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) - - return _find_first_zero_bit(addr, size); - } -+#endif ++ if (likely(!(req->flags & (REQ_F_FORCE_ASYNC | REQ_F_FAIL)))) { ++ __io_queue_sqe(req); ++ } else if (req->flags & REQ_F_FAIL) { ++ io_req_complete_fail_submit(req); ++ } else { ++ int ret = io_req_prep_async(req); + - #else /* CONFIG_GENERIC_FIND_FIRST_BIT */ - - #ifndef find_first_bit -diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h -index 7ce93aaf69f8d..98954dda57344 100644 ---- a/include/asm-generic/io.h -+++ b/include/asm-generic/io.h -@@ -1125,9 +1125,7 @@ static inline void memcpy_toio(volatile void __iomem *addr, const void *buffer, - } - #endif - --#ifndef CONFIG_GENERIC_DEVMEM_IS_ALLOWED - extern int devmem_is_allowed(unsigned long pfn); --#endif - - #endif /* __KERNEL__ */ - -diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h -index d16302d3eb597..72f1e2a8c1670 100644 ---- a/include/asm-generic/sections.h -+++ b/include/asm-generic/sections.h -@@ -114,7 +114,7 @@ static inline bool memory_contains(void *begin, void *end, void *virt, - /** - * memory_intersects - checks if the region occupied by an object intersects - * with another memory region -- * @begin: virtual address of the beginning of the memory regien -+ * @begin: virtual address of the beginning of the memory region - * @end: virtual address of the end of the memory region - * @virt: virtual address of the memory object - * @size: size of the memory object -@@ -127,7 +127,10 @@ static inline bool memory_intersects(void *begin, void *end, void *virt, - { - void *vend = virt + size; - -- return (virt >= begin && virt < end) || (vend >= begin && vend < end); -+ if (virt < end && vend > begin) ++ if (unlikely(ret)) ++ io_req_complete_failed(req, ret); ++ else ++ io_queue_async_work(req, NULL); ++ } ++} ++ ++/* ++ * Check SQE restrictions (opcode and flags). ++ * ++ * Returns 'true' if SQE is allowed, 'false' otherwise. ++ */ ++static inline bool io_check_restriction(struct io_ring_ctx *ctx, ++ struct io_kiocb *req, ++ unsigned int sqe_flags) ++{ ++ if (likely(!ctx->restricted)) + return true; + -+ return false; - } - - /** -diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h -index 2c68a545ffa7d..71942a1c642d4 100644 ---- a/include/asm-generic/tlb.h -+++ b/include/asm-generic/tlb.h -@@ -565,10 +565,14 @@ static inline void tlb_flush_p4d_range(struct mmu_gather *tlb, - #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \ - do { \ - unsigned long _sz = huge_page_size(h); \ -- if (_sz == PMD_SIZE) \ -- tlb_flush_pmd_range(tlb, address, _sz); \ -- else if (_sz == PUD_SIZE) \ -+ if (_sz >= P4D_SIZE) \ -+ tlb_flush_p4d_range(tlb, address, _sz); \ -+ else if (_sz >= PUD_SIZE) \ - tlb_flush_pud_range(tlb, address, _sz); \ -+ else if (_sz >= PMD_SIZE) \ -+ tlb_flush_pmd_range(tlb, address, _sz); \ -+ else \ -+ tlb_flush_pte_range(tlb, address, _sz); \ - __tlb_remove_tlb_entry(tlb, ptep, address); \ - } while (0) - -diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h -index f2984af2b85bd..9eac202fbcfdf 100644 ---- a/include/asm-generic/vmlinux.lds.h -+++ b/include/asm-generic/vmlinux.lds.h -@@ -549,10 +549,9 @@ - */ - #ifdef CONFIG_CFI_CLANG - #define TEXT_CFI_JT \ -- . = ALIGN(PMD_SIZE); \ -+ ALIGN_FUNCTION(); \ - __cfi_jt_start = .; \ - *(.text..L.cfi.jumptable .text..L.cfi.jumptable.*) \ -- . = ALIGN(PMD_SIZE); \ - __cfi_jt_end = .; - #else - #define TEXT_CFI_JT -diff --git a/include/crypto/blake2s.h b/include/crypto/blake2s.h -index bc3fb59442ce5..4e30e1799e614 100644 ---- a/include/crypto/blake2s.h -+++ b/include/crypto/blake2s.h -@@ -101,7 +101,4 @@ static inline void blake2s(u8 *out, const u8 *in, const u8 *key, - blake2s_final(&state, out); - } - --void blake2s256_hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen, -- const size_t keylen); -- - #endif /* _CRYPTO_BLAKE2S_H */ -diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h -index dabaee6987186..b3ea73b819443 100644 ---- a/include/crypto/chacha.h -+++ b/include/crypto/chacha.h -@@ -47,12 +47,19 @@ static inline void hchacha_block(const u32 *state, u32 *out, int nrounds) - hchacha_block_generic(state, out, nrounds); - } - -+enum chacha_constants { /* expand 32-byte k */ -+ CHACHA_CONSTANT_EXPA = 0x61707865U, -+ CHACHA_CONSTANT_ND_3 = 0x3320646eU, -+ CHACHA_CONSTANT_2_BY = 0x79622d32U, -+ CHACHA_CONSTANT_TE_K = 0x6b206574U -+}; ++ if (!test_bit(req->opcode, ctx->restrictions.sqe_op)) ++ return false; + - static inline void chacha_init_consts(u32 *state) - { -- state[0] = 0x61707865; /* "expa" */ -- state[1] = 0x3320646e; /* "nd 3" */ -- state[2] = 0x79622d32; /* "2-by" */ -- state[3] = 0x6b206574; /* "te k" */ -+ state[0] = CHACHA_CONSTANT_EXPA; -+ state[1] = CHACHA_CONSTANT_ND_3; -+ state[2] = CHACHA_CONSTANT_2_BY; -+ state[3] = CHACHA_CONSTANT_TE_K; - } - - void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv); -diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h -index c4165126937e4..a6c3b8e7deb64 100644 ---- a/include/crypto/drbg.h -+++ b/include/crypto/drbg.h -@@ -105,6 +105,12 @@ struct drbg_test_data { - struct drbg_string *testentropy; /* TEST PARAMETER: test entropy */ - }; - -+enum drbg_seed_state { -+ DRBG_SEED_STATE_UNSEEDED, -+ DRBG_SEED_STATE_PARTIAL, /* Seeded with !rng_is_initialized() */ -+ DRBG_SEED_STATE_FULL, -+}; ++ if ((sqe_flags & ctx->restrictions.sqe_flags_required) != ++ ctx->restrictions.sqe_flags_required) ++ return false; + - struct drbg_state { - struct mutex drbg_mutex; /* lock around DRBG */ - unsigned char *V; /* internal state 10.1.1.1 1a) */ -@@ -127,16 +133,14 @@ struct drbg_state { - struct crypto_wait ctr_wait; /* CTR mode async wait obj */ - struct scatterlist sg_in, sg_out; /* CTR mode SGLs */ - -- bool seeded; /* DRBG fully seeded? */ -+ enum drbg_seed_state seeded; /* DRBG fully seeded? */ - bool pr; /* Prediction resistance enabled? */ - bool fips_primed; /* Continuous test primed? */ - unsigned char *prev; /* FIPS 140-2 continuous test value */ -- struct work_struct seed_work; /* asynchronous seeding support */ - struct crypto_rng *jent; - const struct drbg_state_ops *d_ops; - const struct drbg_core *core; - struct drbg_string test_data; -- struct random_ready_callback random_ready; - }; - - static inline __u8 drbg_statelen(struct drbg_state *drbg) -diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h -index 8e50d487500f2..506d56530ca93 100644 ---- a/include/crypto/internal/blake2s.h -+++ b/include/crypto/internal/blake2s.h -@@ -8,112 +8,14 @@ - #define _CRYPTO_INTERNAL_BLAKE2S_H - - #include <crypto/blake2s.h> --#include <crypto/internal/hash.h> - #include <linux/string.h> - --void blake2s_compress_generic(struct blake2s_state *state,const u8 *block, -+void blake2s_compress_generic(struct blake2s_state *state, const u8 *block, - size_t nblocks, const u32 inc); - --void blake2s_compress_arch(struct blake2s_state *state,const u8 *block, -- size_t nblocks, const u32 inc); -+void blake2s_compress(struct blake2s_state *state, const u8 *block, -+ size_t nblocks, const u32 inc); - - bool blake2s_selftest(void); - --static inline void blake2s_set_lastblock(struct blake2s_state *state) --{ -- state->f[0] = -1; --} -- --typedef void (*blake2s_compress_t)(struct blake2s_state *state, -- const u8 *block, size_t nblocks, u32 inc); -- --/* Helper functions for BLAKE2s shared by the library and shash APIs */ -- --static inline void __blake2s_update(struct blake2s_state *state, -- const u8 *in, size_t inlen, -- blake2s_compress_t compress) --{ -- const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen; -- -- if (unlikely(!inlen)) -- return; -- if (inlen > fill) { -- memcpy(state->buf + state->buflen, in, fill); -- (*compress)(state, state->buf, 1, BLAKE2S_BLOCK_SIZE); -- state->buflen = 0; -- in += fill; -- inlen -= fill; -- } -- if (inlen > BLAKE2S_BLOCK_SIZE) { -- const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE); -- /* Hash one less (full) block than strictly possible */ -- (*compress)(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE); -- in += BLAKE2S_BLOCK_SIZE * (nblocks - 1); -- inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1); -- } -- memcpy(state->buf + state->buflen, in, inlen); -- state->buflen += inlen; --} -- --static inline void __blake2s_final(struct blake2s_state *state, u8 *out, -- blake2s_compress_t compress) --{ -- blake2s_set_lastblock(state); -- memset(state->buf + state->buflen, 0, -- BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */ -- (*compress)(state, state->buf, 1, state->buflen); -- cpu_to_le32_array(state->h, ARRAY_SIZE(state->h)); -- memcpy(out, state->h, state->outlen); --} -- --/* Helper functions for shash implementations of BLAKE2s */ -- --struct blake2s_tfm_ctx { -- u8 key[BLAKE2S_KEY_SIZE]; -- unsigned int keylen; --}; -- --static inline int crypto_blake2s_setkey(struct crypto_shash *tfm, -- const u8 *key, unsigned int keylen) --{ -- struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(tfm); -- -- if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE) -- return -EINVAL; -- -- memcpy(tctx->key, key, keylen); -- tctx->keylen = keylen; -- -- return 0; --} -- --static inline int crypto_blake2s_init(struct shash_desc *desc) --{ -- const struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); -- struct blake2s_state *state = shash_desc_ctx(desc); -- unsigned int outlen = crypto_shash_digestsize(desc->tfm); -- -- __blake2s_init(state, outlen, tctx->key, tctx->keylen); -- return 0; --} -- --static inline int crypto_blake2s_update(struct shash_desc *desc, -- const u8 *in, unsigned int inlen, -- blake2s_compress_t compress) --{ -- struct blake2s_state *state = shash_desc_ctx(desc); -- -- __blake2s_update(state, in, inlen, compress); -- return 0; --} -- --static inline int crypto_blake2s_final(struct shash_desc *desc, u8 *out, -- blake2s_compress_t compress) --{ -- struct blake2s_state *state = shash_desc_ctx(desc); -- -- __blake2s_final(state, out, compress); -- return 0; --} -- - #endif /* _CRYPTO_INTERNAL_BLAKE2S_H */ -diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h -index 46bdfa48c4134..1648ce265cba0 100644 ---- a/include/drm/drm_bridge.h -+++ b/include/drm/drm_bridge.h -@@ -914,4 +914,17 @@ struct drm_bridge *devm_drm_panel_bridge_add_typed(struct device *dev, - struct drm_connector *drm_panel_bridge_connector(struct drm_bridge *bridge); - #endif - -+#if defined(CONFIG_OF) && defined(CONFIG_DRM_PANEL_BRIDGE) -+struct drm_bridge *devm_drm_of_get_bridge(struct device *dev, struct device_node *node, -+ u32 port, u32 endpoint); -+#else -+static inline struct drm_bridge *devm_drm_of_get_bridge(struct device *dev, -+ struct device_node *node, -+ u32 port, -+ u32 endpoint) ++ if (sqe_flags & ~(ctx->restrictions.sqe_flags_allowed | ++ ctx->restrictions.sqe_flags_required)) ++ return false; ++ ++ return true; ++} ++ ++static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, ++ const struct io_uring_sqe *sqe) ++ __must_hold(&ctx->uring_lock) +{ -+ return ERR_PTR(-ENODEV); ++ struct io_submit_state *state; ++ unsigned int sqe_flags; ++ int personality, ret = 0; ++ ++ /* req is partially pre-initialised, see io_preinit_req() */ ++ req->opcode = READ_ONCE(sqe->opcode); ++ /* same numerical values with corresponding REQ_F_*, safe to copy */ ++ req->flags = sqe_flags = READ_ONCE(sqe->flags); ++ req->user_data = READ_ONCE(sqe->user_data); ++ req->file = NULL; ++ req->fixed_rsrc_refs = NULL; ++ req->task = current; ++ ++ /* enforce forwards compatibility on users */ ++ if (unlikely(sqe_flags & ~SQE_VALID_FLAGS)) ++ return -EINVAL; ++ if (unlikely(req->opcode >= IORING_OP_LAST)) ++ return -EINVAL; ++ if (!io_check_restriction(ctx, req, sqe_flags)) ++ return -EACCES; ++ ++ if ((sqe_flags & IOSQE_BUFFER_SELECT) && ++ !io_op_defs[req->opcode].buffer_select) ++ return -EOPNOTSUPP; ++ if (unlikely(sqe_flags & IOSQE_IO_DRAIN)) ++ ctx->drain_active = true; ++ ++ personality = READ_ONCE(sqe->personality); ++ if (personality) { ++ req->creds = xa_load(&ctx->personalities, personality); ++ if (!req->creds) ++ return -EINVAL; ++ get_cred(req->creds); ++ req->flags |= REQ_F_CREDS; ++ } ++ state = &ctx->submit_state; ++ ++ /* ++ * Plug now if we have more than 1 IO left after this, and the target ++ * is potentially a read/write to block based storage. ++ */ ++ if (!state->plug_started && state->ios_left > 1 && ++ io_op_defs[req->opcode].plug) { ++ blk_start_plug(&state->plug); ++ state->plug_started = true; ++ } ++ ++ if (io_op_defs[req->opcode].needs_file) { ++ req->file = io_file_get(ctx, req, READ_ONCE(sqe->fd), ++ (sqe_flags & IOSQE_FIXED_FILE), ++ IO_URING_F_NONBLOCK); ++ if (unlikely(!req->file)) ++ ret = -EBADF; ++ } ++ ++ state->ios_left--; ++ return ret; +} -+#endif + - #endif -diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h -index 1647960c9e506..1f43d7c6724aa 100644 ---- a/include/drm/drm_connector.h -+++ b/include/drm/drm_connector.h -@@ -566,10 +566,16 @@ struct drm_display_info { - bool rgb_quant_range_selectable; - - /** -- * @edid_hdmi_dc_modes: Mask of supported hdmi deep color modes. Even -- * more stuff redundant with @bus_formats. -+ * @edid_hdmi_rgb444_dc_modes: Mask of supported hdmi deep color modes -+ * in RGB 4:4:4. Even more stuff redundant with @bus_formats. - */ -- u8 edid_hdmi_dc_modes; -+ u8 edid_hdmi_rgb444_dc_modes; ++static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, ++ const struct io_uring_sqe *sqe) ++ __must_hold(&ctx->uring_lock) ++{ ++ struct io_submit_link *link = &ctx->submit_state.link; ++ int ret; + -+ /** -+ * @edid_hdmi_ycbcr444_dc_modes: Mask of supported hdmi deep color -+ * modes in YCbCr 4:4:4. Even more stuff redundant with @bus_formats. ++ ret = io_init_req(ctx, req, sqe); ++ if (unlikely(ret)) { ++fail_req: ++ /* fail even hard links since we don't submit */ ++ if (link->head) { ++ /* ++ * we can judge a link req is failed or cancelled by if ++ * REQ_F_FAIL is set, but the head is an exception since ++ * it may be set REQ_F_FAIL because of other req's failure ++ * so let's leverage req->result to distinguish if a head ++ * is set REQ_F_FAIL because of its failure or other req's ++ * failure so that we can set the correct ret code for it. ++ * init result here to avoid affecting the normal path. ++ */ ++ if (!(link->head->flags & REQ_F_FAIL)) ++ req_fail_link_node(link->head, -ECANCELED); ++ } else if (!(req->flags & (REQ_F_LINK | REQ_F_HARDLINK))) { ++ /* ++ * the current req is a normal req, we should return ++ * error and thus break the submittion loop. ++ */ ++ io_req_complete_failed(req, ret); ++ return ret; ++ } ++ req_fail_link_node(req, ret); ++ } else { ++ ret = io_req_prep(req, sqe); ++ if (unlikely(ret)) ++ goto fail_req; ++ } ++ ++ /* don't need @sqe from now on */ ++ trace_io_uring_submit_sqe(ctx, req, req->opcode, req->user_data, ++ req->flags, true, ++ ctx->flags & IORING_SETUP_SQPOLL); ++ ++ /* ++ * If we already have a head request, queue this one for async ++ * submittal once the head completes. If we don't have a head but ++ * IOSQE_IO_LINK is set in the sqe, start a new head. This one will be ++ * submitted sync once the chain is complete. If none of those ++ * conditions are true (normal request), then just queue it. + */ -+ u8 edid_hdmi_ycbcr444_dc_modes; - - /** - * @cea_rev: CEA revision of the HDMI sink. -diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h -index 1d5b3dbb6e563..dfb46915015b1 100644 ---- a/include/drm/drm_dp_helper.h -+++ b/include/drm/drm_dp_helper.h -@@ -455,7 +455,7 @@ struct drm_panel; - # define DP_FEC_BIT_ERROR_COUNT_CAP (1 << 3) - - /* DP-HDMI2.1 PCON DSC ENCODER SUPPORT */ --#define DP_PCON_DSC_ENCODER_CAP_SIZE 0xC /* 0x9E - 0x92 */ -+#define DP_PCON_DSC_ENCODER_CAP_SIZE 0xD /* 0x92 through 0x9E */ - #define DP_PCON_DSC_ENCODER 0x092 - # define DP_PCON_DSC_ENCODER_SUPPORTED (1 << 0) - # define DP_PCON_DSC_PPS_ENC_OVERRIDE (1 << 1) -diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h -index deccfd39e6db2..c24559f5329dd 100644 ---- a/include/drm/drm_edid.h -+++ b/include/drm/drm_edid.h -@@ -121,7 +121,7 @@ struct detailed_data_monitor_range { - u8 supported_scalings; - u8 preferred_refresh; - } __attribute__((packed)) cvt; -- } formula; -+ } __attribute__((packed)) formula; - } __attribute__((packed)); - - struct detailed_data_wpindex { -@@ -154,7 +154,7 @@ struct detailed_non_pixel { - struct detailed_data_wpindex color; - struct std_timing timings[6]; - struct cvt_timing cvt[4]; -- } data; -+ } __attribute__((packed)) data; - } __attribute__((packed)); - - #define EDID_DETAIL_EST_TIMINGS 0xf7 -@@ -172,7 +172,7 @@ struct detailed_timing { - union { - struct detailed_pixel_timing pixel_data; - struct detailed_non_pixel other_data; -- } data; -+ } __attribute__((packed)) data; - } __attribute__((packed)); - - #define DRM_EDID_INPUT_SERRATION_VSYNC (1 << 0) -diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h -index 434328d8a0d90..311d66c9cf4b1 100644 ---- a/include/drm/drm_gem_shmem_helper.h -+++ b/include/drm/drm_gem_shmem_helper.h -@@ -107,16 +107,17 @@ struct drm_gem_shmem_object { - container_of(obj, struct drm_gem_shmem_object, base) - - struct drm_gem_shmem_object *drm_gem_shmem_create(struct drm_device *dev, size_t size); --void drm_gem_shmem_free_object(struct drm_gem_object *obj); -+void drm_gem_shmem_free(struct drm_gem_shmem_object *shmem); - - int drm_gem_shmem_get_pages(struct drm_gem_shmem_object *shmem); - void drm_gem_shmem_put_pages(struct drm_gem_shmem_object *shmem); --int drm_gem_shmem_pin(struct drm_gem_object *obj); --void drm_gem_shmem_unpin(struct drm_gem_object *obj); --int drm_gem_shmem_vmap(struct drm_gem_object *obj, struct dma_buf_map *map); --void drm_gem_shmem_vunmap(struct drm_gem_object *obj, struct dma_buf_map *map); -+int drm_gem_shmem_pin(struct drm_gem_shmem_object *shmem); -+void drm_gem_shmem_unpin(struct drm_gem_shmem_object *shmem); -+int drm_gem_shmem_vmap(struct drm_gem_shmem_object *shmem, struct dma_buf_map *map); -+void drm_gem_shmem_vunmap(struct drm_gem_shmem_object *shmem, struct dma_buf_map *map); -+int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct *vma); - --int drm_gem_shmem_madvise(struct drm_gem_object *obj, int madv); -+int drm_gem_shmem_madvise(struct drm_gem_shmem_object *shmem, int madv); - - static inline bool drm_gem_shmem_is_purgeable(struct drm_gem_shmem_object *shmem) - { -@@ -125,29 +126,156 @@ static inline bool drm_gem_shmem_is_purgeable(struct drm_gem_shmem_object *shmem - !shmem->base.dma_buf && !shmem->base.import_attach; - } - --void drm_gem_shmem_purge_locked(struct drm_gem_object *obj); --bool drm_gem_shmem_purge(struct drm_gem_object *obj); -+void drm_gem_shmem_purge_locked(struct drm_gem_shmem_object *shmem); -+bool drm_gem_shmem_purge(struct drm_gem_shmem_object *shmem); - --struct drm_gem_shmem_object * --drm_gem_shmem_create_with_handle(struct drm_file *file_priv, -- struct drm_device *dev, size_t size, -- uint32_t *handle); -+struct sg_table *drm_gem_shmem_get_sg_table(struct drm_gem_shmem_object *shmem); -+struct sg_table *drm_gem_shmem_get_pages_sgt(struct drm_gem_shmem_object *shmem); - --int drm_gem_shmem_dumb_create(struct drm_file *file, struct drm_device *dev, -- struct drm_mode_create_dumb *args); -+void drm_gem_shmem_print_info(const struct drm_gem_shmem_object *shmem, -+ struct drm_printer *p, unsigned int indent); ++ if (link->head) { ++ struct io_kiocb *head = link->head; ++ ++ if (!(req->flags & REQ_F_FAIL)) { ++ ret = io_req_prep_async(req); ++ if (unlikely(ret)) { ++ req_fail_link_node(req, ret); ++ if (!(head->flags & REQ_F_FAIL)) ++ req_fail_link_node(head, -ECANCELED); ++ } ++ } ++ trace_io_uring_link(ctx, req, head); ++ link->last->link = req; ++ link->last = req; ++ ++ /* last request of a link, enqueue the link */ ++ if (!(req->flags & (REQ_F_LINK | REQ_F_HARDLINK))) { ++ link->head = NULL; ++ io_queue_sqe(head); ++ } ++ } else { ++ if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) { ++ link->head = req; ++ link->last = req; ++ } else { ++ io_queue_sqe(req); ++ } ++ } ++ ++ return 0; ++} + +/* -+ * GEM object functions ++ * Batched submission is done, ensure local IO is flushed out. + */ ++static void io_submit_state_end(struct io_submit_state *state, ++ struct io_ring_ctx *ctx) ++{ ++ if (state->link.head) ++ io_queue_sqe(state->link.head); ++ if (state->compl_nr) ++ io_submit_flush_completions(ctx); ++ if (state->plug_started) ++ blk_finish_plug(&state->plug); ++} + -+/** -+ * drm_gem_shmem_object_free - GEM object function for drm_gem_shmem_free() -+ * @obj: GEM object to free -+ * -+ * This function wraps drm_gem_shmem_free(). Drivers that employ the shmem helpers -+ * should use it as their &drm_gem_object_funcs.free handler. ++/* ++ * Start submission side cache. + */ -+static inline void drm_gem_shmem_object_free(struct drm_gem_object *obj) ++static void io_submit_state_start(struct io_submit_state *state, ++ unsigned int max_ios) +{ -+ struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); ++ state->plug_started = false; ++ state->ios_left = max_ios; ++ /* set only head, no need to init link_last in advance */ ++ state->link.head = NULL; ++} + -+ drm_gem_shmem_free(shmem); ++static void io_commit_sqring(struct io_ring_ctx *ctx) ++{ ++ struct io_rings *rings = ctx->rings; ++ ++ /* ++ * Ensure any loads from the SQEs are done at this point, ++ * since once we write the new head, the application could ++ * write new data to them. ++ */ ++ smp_store_release(&rings->sq.head, ctx->cached_sq_head); +} + -+/** -+ * drm_gem_shmem_object_print_info() - Print &drm_gem_shmem_object info for debugfs -+ * @p: DRM printer -+ * @indent: Tab indentation level -+ * @obj: GEM object -+ * -+ * This function wraps drm_gem_shmem_print_info(). Drivers that employ the shmem helpers should -+ * use this function as their &drm_gem_object_funcs.print_info handler. ++/* ++ * Fetch an sqe, if one is available. Note this returns a pointer to memory ++ * that is mapped by userspace. This means that care needs to be taken to ++ * ensure that reads are stable, as we cannot rely on userspace always ++ * being a good citizen. If members of the sqe are validated and then later ++ * used, it's important that those reads are done through READ_ONCE() to ++ * prevent a re-load down the line. + */ -+static inline void drm_gem_shmem_object_print_info(struct drm_printer *p, unsigned int indent, -+ const struct drm_gem_object *obj) ++static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx) +{ -+ const struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); ++ unsigned head, mask = ctx->sq_entries - 1; ++ unsigned sq_idx = ctx->cached_sq_head++ & mask; + -+ drm_gem_shmem_print_info(shmem, p, indent); ++ /* ++ * The cached sq head (or cq tail) serves two purposes: ++ * ++ * 1) allows us to batch the cost of updating the user visible ++ * head updates. ++ * 2) allows the kernel side to track the head on its own, even ++ * though the application is the one updating it. ++ */ ++ head = READ_ONCE(ctx->sq_array[sq_idx]); ++ if (likely(head < ctx->sq_entries)) ++ return &ctx->sq_sqes[head]; ++ ++ /* drop invalid entries */ ++ ctx->cq_extra--; ++ WRITE_ONCE(ctx->rings->sq_dropped, ++ READ_ONCE(ctx->rings->sq_dropped) + 1); ++ return NULL; +} + -+/** -+ * drm_gem_shmem_object_pin - GEM object function for drm_gem_shmem_pin() -+ * @obj: GEM object -+ * -+ * This function wraps drm_gem_shmem_pin(). Drivers that employ the shmem helpers should -+ * use it as their &drm_gem_object_funcs.pin handler. -+ */ -+static inline int drm_gem_shmem_object_pin(struct drm_gem_object *obj) ++static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) ++ __must_hold(&ctx->uring_lock) +{ -+ struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); ++ int submitted = 0; + -+ return drm_gem_shmem_pin(shmem); ++ /* make sure SQ entry isn't read before tail */ ++ nr = min3(nr, ctx->sq_entries, io_sqring_entries(ctx)); ++ if (!percpu_ref_tryget_many(&ctx->refs, nr)) ++ return -EAGAIN; ++ io_get_task_refs(nr); ++ ++ io_submit_state_start(&ctx->submit_state, nr); ++ while (submitted < nr) { ++ const struct io_uring_sqe *sqe; ++ struct io_kiocb *req; ++ ++ req = io_alloc_req(ctx); ++ if (unlikely(!req)) { ++ if (!submitted) ++ submitted = -EAGAIN; ++ break; ++ } ++ sqe = io_get_sqe(ctx); ++ if (unlikely(!sqe)) { ++ list_add(&req->inflight_entry, &ctx->submit_state.free_list); ++ break; ++ } ++ /* will complete beyond this point, count as submitted */ ++ submitted++; ++ if (io_submit_sqe(ctx, req, sqe)) ++ break; ++ } ++ ++ if (unlikely(submitted != nr)) { ++ int ref_used = (submitted == -EAGAIN) ? 0 : submitted; ++ int unused = nr - ref_used; ++ ++ current->io_uring->cached_refs += unused; ++ percpu_ref_put_many(&ctx->refs, unused); ++ } ++ ++ io_submit_state_end(&ctx->submit_state, ctx); ++ /* Commit SQ ring head once we've consumed and submitted all SQEs */ ++ io_commit_sqring(ctx); ++ ++ return submitted; +} + -+/** -+ * drm_gem_shmem_object_unpin - GEM object function for drm_gem_shmem_unpin() -+ * @obj: GEM object -+ * -+ * This function wraps drm_gem_shmem_unpin(). Drivers that employ the shmem helpers should -+ * use it as their &drm_gem_object_funcs.unpin handler. -+ */ -+static inline void drm_gem_shmem_object_unpin(struct drm_gem_object *obj) ++static inline bool io_sqd_events_pending(struct io_sq_data *sqd) +{ -+ struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); ++ return READ_ONCE(sqd->state); ++} + -+ drm_gem_shmem_unpin(shmem); ++static inline void io_ring_set_wakeup_flag(struct io_ring_ctx *ctx) ++{ ++ /* Tell userspace we may need a wakeup call */ ++ spin_lock(&ctx->completion_lock); ++ WRITE_ONCE(ctx->rings->sq_flags, ++ ctx->rings->sq_flags | IORING_SQ_NEED_WAKEUP); ++ spin_unlock(&ctx->completion_lock); +} - --int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); -+/** -+ * drm_gem_shmem_object_get_sg_table - GEM object function for drm_gem_shmem_get_sg_table() -+ * @obj: GEM object -+ * -+ * This function wraps drm_gem_shmem_get_sg_table(). Drivers that employ the shmem helpers should -+ * use it as their &drm_gem_object_funcs.get_sg_table handler. -+ * -+ * Returns: -+ * A pointer to the scatter/gather table of pinned pages or NULL on failure. -+ */ -+static inline struct sg_table *drm_gem_shmem_object_get_sg_table(struct drm_gem_object *obj) ++ ++static inline void io_ring_clear_wakeup_flag(struct io_ring_ctx *ctx) +{ -+ struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); ++ spin_lock(&ctx->completion_lock); ++ WRITE_ONCE(ctx->rings->sq_flags, ++ ctx->rings->sq_flags & ~IORING_SQ_NEED_WAKEUP); ++ spin_unlock(&ctx->completion_lock); ++} + -+ return drm_gem_shmem_get_sg_table(shmem); ++static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) ++{ ++ unsigned int to_submit; ++ int ret = 0; ++ ++ to_submit = io_sqring_entries(ctx); ++ /* if we're handling multiple rings, cap submit size for fairness */ ++ if (cap_entries && to_submit > IORING_SQPOLL_CAP_ENTRIES_VALUE) ++ to_submit = IORING_SQPOLL_CAP_ENTRIES_VALUE; ++ ++ if (!list_empty(&ctx->iopoll_list) || to_submit) { ++ unsigned nr_events = 0; ++ const struct cred *creds = NULL; ++ ++ if (ctx->sq_creds != current_cred()) ++ creds = override_creds(ctx->sq_creds); ++ ++ mutex_lock(&ctx->uring_lock); ++ if (!list_empty(&ctx->iopoll_list)) ++ io_do_iopoll(ctx, &nr_events, 0); ++ ++ /* ++ * Don't submit if refs are dying, good for io_uring_register(), ++ * but also it is relied upon by io_ring_exit_work() ++ */ ++ if (to_submit && likely(!percpu_ref_is_dying(&ctx->refs)) && ++ !(ctx->flags & IORING_SETUP_R_DISABLED)) ++ ret = io_submit_sqes(ctx, to_submit); ++ mutex_unlock(&ctx->uring_lock); ++ ++ if (to_submit && wq_has_sleeper(&ctx->sqo_sq_wait)) ++ wake_up(&ctx->sqo_sq_wait); ++ if (creds) ++ revert_creds(creds); ++ } ++ ++ return ret; +} + -+/* -+ * drm_gem_shmem_object_vmap - GEM object function for drm_gem_shmem_vmap() -+ * @obj: GEM object -+ * @map: Returns the kernel virtual address of the SHMEM GEM object's backing store. -+ * -+ * This function wraps drm_gem_shmem_vmap(). Drivers that employ the shmem helpers should -+ * use it as their &drm_gem_object_funcs.vmap handler. -+ * -+ * Returns: -+ * 0 on success or a negative error code on failure. -+ */ -+static inline int drm_gem_shmem_object_vmap(struct drm_gem_object *obj, struct dma_buf_map *map) ++static void io_sqd_update_thread_idle(struct io_sq_data *sqd) +{ -+ struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); ++ struct io_ring_ctx *ctx; ++ unsigned sq_thread_idle = 0; + -+ return drm_gem_shmem_vmap(shmem, map); ++ list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) ++ sq_thread_idle = max(sq_thread_idle, ctx->sq_thread_idle); ++ sqd->sq_thread_idle = sq_thread_idle; ++} ++ ++static bool io_sqd_handle_event(struct io_sq_data *sqd) ++{ ++ bool did_sig = false; ++ struct ksignal ksig; ++ ++ if (test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state) || ++ signal_pending(current)) { ++ mutex_unlock(&sqd->lock); ++ if (signal_pending(current)) ++ did_sig = get_signal(&ksig); ++ cond_resched(); ++ mutex_lock(&sqd->lock); ++ } ++ return did_sig || test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); ++} ++ ++static int io_sq_thread(void *data) ++{ ++ struct io_sq_data *sqd = data; ++ struct io_ring_ctx *ctx; ++ unsigned long timeout = 0; ++ char buf[TASK_COMM_LEN]; ++ DEFINE_WAIT(wait); ++ ++ snprintf(buf, sizeof(buf), "iou-sqp-%d", sqd->task_pid); ++ set_task_comm(current, buf); ++ ++ if (sqd->sq_cpu != -1) ++ set_cpus_allowed_ptr(current, cpumask_of(sqd->sq_cpu)); ++ else ++ set_cpus_allowed_ptr(current, cpu_online_mask); ++ current->flags |= PF_NO_SETAFFINITY; ++ ++ mutex_lock(&sqd->lock); ++ while (1) { ++ bool cap_entries, sqt_spin = false; ++ ++ if (io_sqd_events_pending(sqd) || signal_pending(current)) { ++ if (io_sqd_handle_event(sqd)) ++ break; ++ timeout = jiffies + sqd->sq_thread_idle; ++ } ++ ++ cap_entries = !list_is_singular(&sqd->ctx_list); ++ list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { ++ int ret = __io_sq_thread(ctx, cap_entries); ++ ++ if (!sqt_spin && (ret > 0 || !list_empty(&ctx->iopoll_list))) ++ sqt_spin = true; ++ } ++ if (io_run_task_work()) ++ sqt_spin = true; ++ ++ if (sqt_spin || !time_after(jiffies, timeout)) { ++ cond_resched(); ++ if (sqt_spin) ++ timeout = jiffies + sqd->sq_thread_idle; ++ continue; ++ } ++ ++ prepare_to_wait(&sqd->wait, &wait, TASK_INTERRUPTIBLE); ++ if (!io_sqd_events_pending(sqd) && !current->task_works) { ++ bool needs_sched = true; ++ ++ list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { ++ io_ring_set_wakeup_flag(ctx); ++ ++ if ((ctx->flags & IORING_SETUP_IOPOLL) && ++ !list_empty_careful(&ctx->iopoll_list)) { ++ needs_sched = false; ++ break; ++ } ++ if (io_sqring_entries(ctx)) { ++ needs_sched = false; ++ break; ++ } ++ } ++ ++ if (needs_sched) { ++ mutex_unlock(&sqd->lock); ++ schedule(); ++ mutex_lock(&sqd->lock); ++ } ++ list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) ++ io_ring_clear_wakeup_flag(ctx); ++ } ++ ++ finish_wait(&sqd->wait, &wait); ++ timeout = jiffies + sqd->sq_thread_idle; ++ } ++ ++ io_uring_cancel_generic(true, sqd); ++ sqd->thread = NULL; ++ list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) ++ io_ring_set_wakeup_flag(ctx); ++ io_run_task_work(); ++ mutex_unlock(&sqd->lock); ++ ++ complete(&sqd->exited); ++ do_exit(0); ++} ++ ++struct io_wait_queue { ++ struct wait_queue_entry wq; ++ struct io_ring_ctx *ctx; ++ unsigned cq_tail; ++ unsigned nr_timeouts; ++}; ++ ++static inline bool io_should_wake(struct io_wait_queue *iowq) ++{ ++ struct io_ring_ctx *ctx = iowq->ctx; ++ int dist = ctx->cached_cq_tail - (int) iowq->cq_tail; ++ ++ /* ++ * Wake up if we have enough events, or if a timeout occurred since we ++ * started waiting. For timeouts, we always want to return to userspace, ++ * regardless of event count. ++ */ ++ return dist >= 0 || atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts; ++} ++ ++static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode, ++ int wake_flags, void *key) ++{ ++ struct io_wait_queue *iowq = container_of(curr, struct io_wait_queue, ++ wq); ++ ++ /* ++ * Cannot safely flush overflowed CQEs from here, ensure we wake up ++ * the task, and the next invocation will do it. ++ */ ++ if (io_should_wake(iowq) || test_bit(0, &iowq->ctx->check_cq_overflow)) ++ return autoremove_wake_function(curr, mode, wake_flags, key); ++ return -1; ++} ++ ++static int io_run_task_work_sig(void) ++{ ++ if (io_run_task_work()) ++ return 1; ++ if (!signal_pending(current)) ++ return 0; ++ if (test_thread_flag(TIF_NOTIFY_SIGNAL)) ++ return -ERESTARTSYS; ++ return -EINTR; ++} ++ ++/* when returns >0, the caller should retry */ ++static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, ++ struct io_wait_queue *iowq, ++ ktime_t *timeout) ++{ ++ int ret; ++ ++ /* make sure we run task_work before checking for signals */ ++ ret = io_run_task_work_sig(); ++ if (ret || io_should_wake(iowq)) ++ return ret; ++ /* let the caller flush overflows, retry */ ++ if (test_bit(0, &ctx->check_cq_overflow)) ++ return 1; ++ ++ if (!schedule_hrtimeout(timeout, HRTIMER_MODE_ABS)) ++ return -ETIME; ++ return 1; +} + +/* -+ * drm_gem_shmem_object_vunmap - GEM object function for drm_gem_shmem_vunmap() -+ * @obj: GEM object -+ * @map: Kernel virtual address where the SHMEM GEM object was mapped -+ * -+ * This function wraps drm_gem_shmem_vunmap(). Drivers that employ the shmem helpers should -+ * use it as their &drm_gem_object_funcs.vunmap handler. ++ * Wait until events become available, if we don't already have some. The ++ * application must reap them itself, as they reside on the shared cq ring. + */ -+static inline void drm_gem_shmem_object_vunmap(struct drm_gem_object *obj, struct dma_buf_map *map) ++static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, ++ const sigset_t __user *sig, size_t sigsz, ++ struct __kernel_timespec __user *uts) +{ -+ struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); ++ struct io_wait_queue iowq; ++ struct io_rings *rings = ctx->rings; ++ ktime_t timeout = KTIME_MAX; ++ int ret; + -+ drm_gem_shmem_vunmap(shmem, map); ++ do { ++ io_cqring_overflow_flush(ctx); ++ if (io_cqring_events(ctx) >= min_events) ++ return 0; ++ if (!io_run_task_work()) ++ break; ++ } while (1); ++ ++ if (uts) { ++ struct timespec64 ts; ++ ++ if (get_timespec64(&ts, uts)) ++ return -EFAULT; ++ timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns()); ++ } ++ ++ if (sig) { ++#ifdef CONFIG_COMPAT ++ if (in_compat_syscall()) ++ ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig, ++ sigsz); ++ else ++#endif ++ ret = set_user_sigmask(sig, sigsz); ++ ++ if (ret) ++ return ret; ++ } ++ ++ init_waitqueue_func_entry(&iowq.wq, io_wake_function); ++ iowq.wq.private = current; ++ INIT_LIST_HEAD(&iowq.wq.entry); ++ iowq.ctx = ctx; ++ iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts); ++ iowq.cq_tail = READ_ONCE(ctx->rings->cq.head) + min_events; ++ ++ trace_io_uring_cqring_wait(ctx, min_events); ++ do { ++ /* if we can't even flush overflow, don't wait for more */ ++ if (!io_cqring_overflow_flush(ctx)) { ++ ret = -EBUSY; ++ break; ++ } ++ prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq, ++ TASK_INTERRUPTIBLE); ++ ret = io_cqring_wait_schedule(ctx, &iowq, &timeout); ++ finish_wait(&ctx->cq_wait, &iowq.wq); ++ cond_resched(); ++ } while (ret > 0); ++ ++ restore_saved_sigmask_unless(ret == -EINTR); ++ ++ return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0; +} + -+/** -+ * drm_gem_shmem_object_mmap - GEM object function for drm_gem_shmem_mmap() -+ * @obj: GEM object -+ * @vma: VMA for the area to be mapped -+ * -+ * This function wraps drm_gem_shmem_mmap(). Drivers that employ the shmem helpers should -+ * use it as their &drm_gem_object_funcs.mmap handler. -+ * -+ * Returns: -+ * 0 on success or a negative error code on failure. ++static void io_free_page_table(void **table, size_t size) ++{ ++ unsigned i, nr_tables = DIV_ROUND_UP(size, PAGE_SIZE); ++ ++ for (i = 0; i < nr_tables; i++) ++ kfree(table[i]); ++ kfree(table); ++} ++ ++static void **io_alloc_page_table(size_t size) ++{ ++ unsigned i, nr_tables = DIV_ROUND_UP(size, PAGE_SIZE); ++ size_t init_size = size; ++ void **table; ++ ++ table = kcalloc(nr_tables, sizeof(*table), GFP_KERNEL_ACCOUNT); ++ if (!table) ++ return NULL; ++ ++ for (i = 0; i < nr_tables; i++) { ++ unsigned int this_size = min_t(size_t, size, PAGE_SIZE); ++ ++ table[i] = kzalloc(this_size, GFP_KERNEL_ACCOUNT); ++ if (!table[i]) { ++ io_free_page_table(table, init_size); ++ return NULL; ++ } ++ size -= this_size; ++ } ++ return table; ++} ++ ++static void io_rsrc_node_destroy(struct io_rsrc_node *ref_node) ++{ ++ percpu_ref_exit(&ref_node->refs); ++ kfree(ref_node); ++} ++ ++static void io_rsrc_node_ref_zero(struct percpu_ref *ref) ++{ ++ struct io_rsrc_node *node = container_of(ref, struct io_rsrc_node, refs); ++ struct io_ring_ctx *ctx = node->rsrc_data->ctx; ++ unsigned long flags; ++ bool first_add = false; ++ unsigned long delay = HZ; ++ ++ spin_lock_irqsave(&ctx->rsrc_ref_lock, flags); ++ node->done = true; ++ ++ /* if we are mid-quiesce then do not delay */ ++ if (node->rsrc_data->quiesce) ++ delay = 0; ++ ++ while (!list_empty(&ctx->rsrc_ref_list)) { ++ node = list_first_entry(&ctx->rsrc_ref_list, ++ struct io_rsrc_node, node); ++ /* recycle ref nodes in order */ ++ if (!node->done) ++ break; ++ list_del(&node->node); ++ first_add |= llist_add(&node->llist, &ctx->rsrc_put_llist); ++ } ++ spin_unlock_irqrestore(&ctx->rsrc_ref_lock, flags); ++ ++ if (first_add) ++ mod_delayed_work(system_wq, &ctx->rsrc_put_work, delay); ++} ++ ++static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx) ++{ ++ struct io_rsrc_node *ref_node; ++ ++ ref_node = kzalloc(sizeof(*ref_node), GFP_KERNEL); ++ if (!ref_node) ++ return NULL; ++ ++ if (percpu_ref_init(&ref_node->refs, io_rsrc_node_ref_zero, ++ 0, GFP_KERNEL)) { ++ kfree(ref_node); ++ return NULL; ++ } ++ INIT_LIST_HEAD(&ref_node->node); ++ INIT_LIST_HEAD(&ref_node->rsrc_list); ++ ref_node->done = false; ++ return ref_node; ++} ++ ++static void io_rsrc_node_switch(struct io_ring_ctx *ctx, ++ struct io_rsrc_data *data_to_kill) ++{ ++ WARN_ON_ONCE(!ctx->rsrc_backup_node); ++ WARN_ON_ONCE(data_to_kill && !ctx->rsrc_node); ++ ++ if (data_to_kill) { ++ struct io_rsrc_node *rsrc_node = ctx->rsrc_node; ++ ++ rsrc_node->rsrc_data = data_to_kill; ++ spin_lock_irq(&ctx->rsrc_ref_lock); ++ list_add_tail(&rsrc_node->node, &ctx->rsrc_ref_list); ++ spin_unlock_irq(&ctx->rsrc_ref_lock); ++ ++ atomic_inc(&data_to_kill->refs); ++ percpu_ref_kill(&rsrc_node->refs); ++ ctx->rsrc_node = NULL; ++ } ++ ++ if (!ctx->rsrc_node) { ++ ctx->rsrc_node = ctx->rsrc_backup_node; ++ ctx->rsrc_backup_node = NULL; ++ } ++} ++ ++static int io_rsrc_node_switch_start(struct io_ring_ctx *ctx) ++{ ++ if (ctx->rsrc_backup_node) ++ return 0; ++ ctx->rsrc_backup_node = io_rsrc_node_alloc(ctx); ++ return ctx->rsrc_backup_node ? 0 : -ENOMEM; ++} ++ ++static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, struct io_ring_ctx *ctx) ++{ ++ int ret; ++ ++ /* As we may drop ->uring_lock, other task may have started quiesce */ ++ if (data->quiesce) ++ return -ENXIO; ++ ++ data->quiesce = true; ++ do { ++ ret = io_rsrc_node_switch_start(ctx); ++ if (ret) ++ break; ++ io_rsrc_node_switch(ctx, data); ++ ++ /* kill initial ref, already quiesced if zero */ ++ if (atomic_dec_and_test(&data->refs)) ++ break; ++ mutex_unlock(&ctx->uring_lock); ++ flush_delayed_work(&ctx->rsrc_put_work); ++ ret = wait_for_completion_interruptible(&data->done); ++ if (!ret) { ++ mutex_lock(&ctx->uring_lock); ++ if (atomic_read(&data->refs) > 0) { ++ /* ++ * it has been revived by another thread while ++ * we were unlocked ++ */ ++ mutex_unlock(&ctx->uring_lock); ++ } else { ++ break; ++ } ++ } ++ ++ atomic_inc(&data->refs); ++ /* wait for all works potentially completing data->done */ ++ flush_delayed_work(&ctx->rsrc_put_work); ++ reinit_completion(&data->done); ++ ++ ret = io_run_task_work_sig(); ++ mutex_lock(&ctx->uring_lock); ++ } while (ret >= 0); ++ data->quiesce = false; ++ ++ return ret; ++} ++ ++static u64 *io_get_tag_slot(struct io_rsrc_data *data, unsigned int idx) ++{ ++ unsigned int off = idx & IO_RSRC_TAG_TABLE_MASK; ++ unsigned int table_idx = idx >> IO_RSRC_TAG_TABLE_SHIFT; ++ ++ return &data->tags[table_idx][off]; ++} ++ ++static void io_rsrc_data_free(struct io_rsrc_data *data) ++{ ++ size_t size = data->nr * sizeof(data->tags[0][0]); ++ ++ if (data->tags) ++ io_free_page_table((void **)data->tags, size); ++ kfree(data); ++} ++ ++static int io_rsrc_data_alloc(struct io_ring_ctx *ctx, rsrc_put_fn *do_put, ++ u64 __user *utags, unsigned nr, ++ struct io_rsrc_data **pdata) ++{ ++ struct io_rsrc_data *data; ++ int ret = -ENOMEM; ++ unsigned i; ++ ++ data = kzalloc(sizeof(*data), GFP_KERNEL); ++ if (!data) ++ return -ENOMEM; ++ data->tags = (u64 **)io_alloc_page_table(nr * sizeof(data->tags[0][0])); ++ if (!data->tags) { ++ kfree(data); ++ return -ENOMEM; ++ } ++ ++ data->nr = nr; ++ data->ctx = ctx; ++ data->do_put = do_put; ++ if (utags) { ++ ret = -EFAULT; ++ for (i = 0; i < nr; i++) { ++ u64 *tag_slot = io_get_tag_slot(data, i); ++ ++ if (copy_from_user(tag_slot, &utags[i], ++ sizeof(*tag_slot))) ++ goto fail; ++ } ++ } ++ ++ atomic_set(&data->refs, 1); ++ init_completion(&data->done); ++ *pdata = data; ++ return 0; ++fail: ++ io_rsrc_data_free(data); ++ return ret; ++} ++ ++static bool io_alloc_file_tables(struct io_file_table *table, unsigned nr_files) ++{ ++ table->files = kvcalloc(nr_files, sizeof(table->files[0]), ++ GFP_KERNEL_ACCOUNT); ++ return !!table->files; ++} ++ ++static void io_free_file_tables(struct io_file_table *table) ++{ ++ kvfree(table->files); ++ table->files = NULL; ++} ++ ++static void __io_sqe_files_unregister(struct io_ring_ctx *ctx) ++{ ++#if defined(CONFIG_UNIX) ++ if (ctx->ring_sock) { ++ struct sock *sock = ctx->ring_sock->sk; ++ struct sk_buff *skb; ++ ++ while ((skb = skb_dequeue(&sock->sk_receive_queue)) != NULL) ++ kfree_skb(skb); ++ } ++#else ++ int i; ++ ++ for (i = 0; i < ctx->nr_user_files; i++) { ++ struct file *file; ++ ++ file = io_file_from_index(ctx, i); ++ if (file) ++ fput(file); ++ } ++#endif ++ io_free_file_tables(&ctx->file_table); ++ io_rsrc_data_free(ctx->file_data); ++ ctx->file_data = NULL; ++ ctx->nr_user_files = 0; ++} ++ ++static int io_sqe_files_unregister(struct io_ring_ctx *ctx) ++{ ++ unsigned nr = ctx->nr_user_files; ++ int ret; ++ ++ if (!ctx->file_data) ++ return -ENXIO; ++ ++ /* ++ * Quiesce may unlock ->uring_lock, and while it's not held ++ * prevent new requests using the table. ++ */ ++ ctx->nr_user_files = 0; ++ ret = io_rsrc_ref_quiesce(ctx->file_data, ctx); ++ ctx->nr_user_files = nr; ++ if (!ret) ++ __io_sqe_files_unregister(ctx); ++ return ret; ++} ++ ++static void io_sq_thread_unpark(struct io_sq_data *sqd) ++ __releases(&sqd->lock) ++{ ++ WARN_ON_ONCE(sqd->thread == current); ++ ++ /* ++ * Do the dance but not conditional clear_bit() because it'd race with ++ * other threads incrementing park_pending and setting the bit. ++ */ ++ clear_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); ++ if (atomic_dec_return(&sqd->park_pending)) ++ set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); ++ mutex_unlock(&sqd->lock); ++} ++ ++static void io_sq_thread_park(struct io_sq_data *sqd) ++ __acquires(&sqd->lock) ++{ ++ WARN_ON_ONCE(sqd->thread == current); ++ ++ atomic_inc(&sqd->park_pending); ++ set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); ++ mutex_lock(&sqd->lock); ++ if (sqd->thread) ++ wake_up_process(sqd->thread); ++} ++ ++static void io_sq_thread_stop(struct io_sq_data *sqd) ++{ ++ WARN_ON_ONCE(sqd->thread == current); ++ WARN_ON_ONCE(test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state)); ++ ++ set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); ++ mutex_lock(&sqd->lock); ++ if (sqd->thread) ++ wake_up_process(sqd->thread); ++ mutex_unlock(&sqd->lock); ++ wait_for_completion(&sqd->exited); ++} ++ ++static void io_put_sq_data(struct io_sq_data *sqd) ++{ ++ if (refcount_dec_and_test(&sqd->refs)) { ++ WARN_ON_ONCE(atomic_read(&sqd->park_pending)); ++ ++ io_sq_thread_stop(sqd); ++ kfree(sqd); ++ } ++} ++ ++static void io_sq_thread_finish(struct io_ring_ctx *ctx) ++{ ++ struct io_sq_data *sqd = ctx->sq_data; ++ ++ if (sqd) { ++ io_sq_thread_park(sqd); ++ list_del_init(&ctx->sqd_list); ++ io_sqd_update_thread_idle(sqd); ++ io_sq_thread_unpark(sqd); ++ ++ io_put_sq_data(sqd); ++ ctx->sq_data = NULL; ++ } ++} ++ ++static struct io_sq_data *io_attach_sq_data(struct io_uring_params *p) ++{ ++ struct io_ring_ctx *ctx_attach; ++ struct io_sq_data *sqd; ++ struct fd f; ++ ++ f = fdget(p->wq_fd); ++ if (!f.file) ++ return ERR_PTR(-ENXIO); ++ if (f.file->f_op != &io_uring_fops) { ++ fdput(f); ++ return ERR_PTR(-EINVAL); ++ } ++ ++ ctx_attach = f.file->private_data; ++ sqd = ctx_attach->sq_data; ++ if (!sqd) { ++ fdput(f); ++ return ERR_PTR(-EINVAL); ++ } ++ if (sqd->task_tgid != current->tgid) { ++ fdput(f); ++ return ERR_PTR(-EPERM); ++ } ++ ++ refcount_inc(&sqd->refs); ++ fdput(f); ++ return sqd; ++} ++ ++static struct io_sq_data *io_get_sq_data(struct io_uring_params *p, ++ bool *attached) ++{ ++ struct io_sq_data *sqd; ++ ++ *attached = false; ++ if (p->flags & IORING_SETUP_ATTACH_WQ) { ++ sqd = io_attach_sq_data(p); ++ if (!IS_ERR(sqd)) { ++ *attached = true; ++ return sqd; ++ } ++ /* fall through for EPERM case, setup new sqd/task */ ++ if (PTR_ERR(sqd) != -EPERM) ++ return sqd; ++ } ++ ++ sqd = kzalloc(sizeof(*sqd), GFP_KERNEL); ++ if (!sqd) ++ return ERR_PTR(-ENOMEM); ++ ++ atomic_set(&sqd->park_pending, 0); ++ refcount_set(&sqd->refs, 1); ++ INIT_LIST_HEAD(&sqd->ctx_list); ++ mutex_init(&sqd->lock); ++ init_waitqueue_head(&sqd->wait); ++ init_completion(&sqd->exited); ++ return sqd; ++} ++ ++#if defined(CONFIG_UNIX) ++/* ++ * Ensure the UNIX gc is aware of our file set, so we are certain that ++ * the io_uring can be safely unregistered on process exit, even if we have ++ * loops in the file referencing. + */ -+static inline int drm_gem_shmem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) ++static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset) +{ -+ struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); ++ struct sock *sk = ctx->ring_sock->sk; ++ struct scm_fp_list *fpl; ++ struct sk_buff *skb; ++ int i, nr_files; + -+ return drm_gem_shmem_mmap(shmem, vma); ++ fpl = kzalloc(sizeof(*fpl), GFP_KERNEL); ++ if (!fpl) ++ return -ENOMEM; ++ ++ skb = alloc_skb(0, GFP_KERNEL); ++ if (!skb) { ++ kfree(fpl); ++ return -ENOMEM; ++ } ++ ++ skb->sk = sk; ++ skb->scm_io_uring = 1; ++ ++ nr_files = 0; ++ fpl->user = get_uid(current_user()); ++ for (i = 0; i < nr; i++) { ++ struct file *file = io_file_from_index(ctx, i + offset); ++ ++ if (!file) ++ continue; ++ fpl->fp[nr_files] = get_file(file); ++ unix_inflight(fpl->user, fpl->fp[nr_files]); ++ nr_files++; ++ } ++ ++ if (nr_files) { ++ fpl->max = SCM_MAX_FD; ++ fpl->count = nr_files; ++ UNIXCB(skb).fp = fpl; ++ skb->destructor = unix_destruct_scm; ++ refcount_add(skb->truesize, &sk->sk_wmem_alloc); ++ skb_queue_head(&sk->sk_receive_queue, skb); ++ ++ for (i = 0; i < nr; i++) { ++ struct file *file = io_file_from_index(ctx, i + offset); ++ ++ if (file) ++ fput(file); ++ } ++ } else { ++ kfree_skb(skb); ++ free_uid(fpl->user); ++ kfree(fpl); ++ } ++ ++ return 0; +} - --void drm_gem_shmem_print_info(struct drm_printer *p, unsigned int indent, -- const struct drm_gem_object *obj); ++ +/* -+ * Driver ops ++ * If UNIX sockets are enabled, fd passing can cause a reference cycle which ++ * causes regular reference counting to break down. We rely on the UNIX ++ * garbage collection to take care of this problem for us. + */ - --struct sg_table *drm_gem_shmem_get_sg_table(struct drm_gem_object *obj); - struct drm_gem_object * - drm_gem_shmem_prime_import_sg_table(struct drm_device *dev, - struct dma_buf_attachment *attach, - struct sg_table *sgt); -- --struct sg_table *drm_gem_shmem_get_pages_sgt(struct drm_gem_object *obj); -+int drm_gem_shmem_dumb_create(struct drm_file *file, struct drm_device *dev, -+ struct drm_mode_create_dumb *args); - - /** - * DRM_GEM_SHMEM_DRIVER_OPS - Default shmem GEM operations -diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h -index f681bbdbc6982..36f7eb9d06639 100644 ---- a/include/drm/ttm/ttm_bo_api.h -+++ b/include/drm/ttm/ttm_bo_api.h -@@ -594,8 +594,7 @@ vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, - - vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, - pgprot_t prot, -- pgoff_t num_prefault, -- pgoff_t fault_page_size); -+ pgoff_t num_prefault); - - vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf); - -diff --git a/include/dt-bindings/clock/qcom,gcc-msm8939.h b/include/dt-bindings/clock/qcom,gcc-msm8939.h -index 0634467c4ce5a..2d545ed0d35ab 100644 ---- a/include/dt-bindings/clock/qcom,gcc-msm8939.h -+++ b/include/dt-bindings/clock/qcom,gcc-msm8939.h -@@ -192,6 +192,7 @@ - #define GCC_VENUS0_CORE0_VCODEC0_CLK 183 - #define GCC_VENUS0_CORE1_VCODEC0_CLK 184 - #define GCC_OXILI_TIMER_CLK 185 -+#define SYSTEM_MM_NOC_BFDCD_CLK_SRC 186 - - /* Indexes for GDSCs */ - #define BIMC_GDSC 0 -diff --git a/include/linux/acpi.h b/include/linux/acpi.h -index 974d497a897dc..6224b1e32681c 100644 ---- a/include/linux/acpi.h -+++ b/include/linux/acpi.h -@@ -976,6 +976,15 @@ static inline int acpi_get_local_address(acpi_handle handle, u32 *addr) - return -ENODEV; - } - -+static inline int acpi_register_wakeup_handler(int wake_irq, -+ bool (*wakeup)(void *context), void *context) ++static int io_sqe_files_scm(struct io_ring_ctx *ctx) +{ -+ return -ENXIO; ++ unsigned left, total; ++ int ret = 0; ++ ++ total = 0; ++ left = ctx->nr_user_files; ++ while (left) { ++ unsigned this_files = min_t(unsigned, left, SCM_MAX_FD); ++ ++ ret = __io_sqe_files_scm(ctx, this_files, total); ++ if (ret) ++ break; ++ left -= this_files; ++ total += this_files; ++ } ++ ++ if (!ret) ++ return 0; ++ ++ while (total < ctx->nr_user_files) { ++ struct file *file = io_file_from_index(ctx, total); ++ ++ if (file) ++ fput(file); ++ total++; ++ } ++ ++ return ret; +} ++#else ++static int io_sqe_files_scm(struct io_ring_ctx *ctx) ++{ ++ return 0; ++} ++#endif + -+static inline void acpi_unregister_wakeup_handler( -+ bool (*wakeup)(void *context), void *context) { } ++static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc) ++{ ++ struct file *file = prsrc->file; ++#if defined(CONFIG_UNIX) ++ struct sock *sock = ctx->ring_sock->sk; ++ struct sk_buff_head list, *head = &sock->sk_receive_queue; ++ struct sk_buff *skb; ++ int i; + - #endif /* !CONFIG_ACPI */ - - #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC -diff --git a/include/linux/acpi_viot.h b/include/linux/acpi_viot.h -index 1eb8ee5b0e5fe..a5a1224315637 100644 ---- a/include/linux/acpi_viot.h -+++ b/include/linux/acpi_viot.h -@@ -6,9 +6,11 @@ - #include <linux/acpi.h> - - #ifdef CONFIG_ACPI_VIOT -+void __init acpi_viot_early_init(void); - void __init acpi_viot_init(void); - int viot_iommu_configure(struct device *dev); - #else -+static inline void acpi_viot_early_init(void) {} - static inline void acpi_viot_init(void) {} - static inline int viot_iommu_configure(struct device *dev) - { -diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h -index 63ccb52521902..220c8c60e021a 100644 ---- a/include/linux/arm-smccc.h -+++ b/include/linux/arm-smccc.h -@@ -92,6 +92,11 @@ - ARM_SMCCC_SMC_32, \ - 0, 0x7fff) - -+#define ARM_SMCCC_ARCH_WORKAROUND_3 \ -+ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ -+ ARM_SMCCC_SMC_32, \ -+ 0, 0x3fff) ++ __skb_queue_head_init(&list); + - #define ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID \ - ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ - ARM_SMCCC_SMC_32, \ -diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h -index 0a241c5c911d8..14dc461b0e829 100644 ---- a/include/linux/arm_sdei.h -+++ b/include/linux/arm_sdei.h -@@ -46,9 +46,11 @@ int sdei_unregister_ghes(struct ghes *ghes); - /* For use by arch code when CPU hotplug notifiers are not appropriate. */ - int sdei_mask_local_cpu(void); - int sdei_unmask_local_cpu(void); -+void __init sdei_init(void); - #else - static inline int sdei_mask_local_cpu(void) { return 0; } - static inline int sdei_unmask_local_cpu(void) { return 0; } -+static inline void sdei_init(void) { } - #endif /* CONFIG_ARM_SDE_INTERFACE */ - - -diff --git a/include/linux/ata.h b/include/linux/ata.h -index 1b44f40c7700b..3b1ad57d0e017 100644 ---- a/include/linux/ata.h -+++ b/include/linux/ata.h -@@ -565,6 +565,18 @@ struct ata_bmdma_prd { - ((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \ - ((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \ - ((id)[ATA_ID_FEATURE_SUPP] & (1 << 2))) -+#define ata_id_has_devslp(id) \ -+ ((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \ -+ ((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \ -+ ((id)[ATA_ID_FEATURE_SUPP] & (1 << 8))) -+#define ata_id_has_ncq_autosense(id) \ -+ ((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \ -+ ((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \ -+ ((id)[ATA_ID_FEATURE_SUPP] & (1 << 7))) -+#define ata_id_has_dipm(id) \ -+ ((((id)[ATA_ID_SATA_CAPABILITY] != 0x0000) && \ -+ ((id)[ATA_ID_SATA_CAPABILITY] != 0xffff)) && \ -+ ((id)[ATA_ID_FEATURE_SUPP] & (1 << 3))) - #define ata_id_iordy_disable(id) ((id)[ATA_ID_CAPABILITY] & (1 << 10)) - #define ata_id_has_iordy(id) ((id)[ATA_ID_CAPABILITY] & (1 << 11)) - #define ata_id_u32(id,n) \ -@@ -577,9 +589,6 @@ struct ata_bmdma_prd { - - #define ata_id_cdb_intr(id) (((id)[ATA_ID_CONFIG] & 0x60) == 0x20) - #define ata_id_has_da(id) ((id)[ATA_ID_SATA_CAPABILITY_2] & (1 << 4)) --#define ata_id_has_devslp(id) ((id)[ATA_ID_FEATURE_SUPP] & (1 << 8)) --#define ata_id_has_ncq_autosense(id) \ -- ((id)[ATA_ID_FEATURE_SUPP] & (1 << 7)) - - static inline bool ata_id_has_hipm(const u16 *id) - { -@@ -591,17 +600,6 @@ static inline bool ata_id_has_hipm(const u16 *id) - return val & (1 << 9); - } - --static inline bool ata_id_has_dipm(const u16 *id) --{ -- u16 val = id[ATA_ID_FEATURE_SUPP]; -- -- if (val == 0 || val == 0xffff) -- return false; -- -- return val & (1 << 3); --} -- -- - static inline bool ata_id_has_fua(const u16 *id) - { - if ((id[ATA_ID_CFSSE] & 0xC000) != 0x4000) -@@ -770,16 +768,21 @@ static inline bool ata_id_has_read_log_dma_ext(const u16 *id) - - static inline bool ata_id_has_sense_reporting(const u16 *id) - { -- if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15))) -+ if (!(id[ATA_ID_CFS_ENABLE_2] & BIT(15))) -+ return false; -+ if ((id[ATA_ID_COMMAND_SET_3] & (BIT(15) | BIT(14))) != BIT(14)) - return false; -- return id[ATA_ID_COMMAND_SET_3] & (1 << 6); -+ return id[ATA_ID_COMMAND_SET_3] & BIT(6); - } - - static inline bool ata_id_sense_reporting_enabled(const u16 *id) - { -- if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15))) -+ if (!ata_id_has_sense_reporting(id)) -+ return false; -+ /* ata_id_has_sense_reporting() == true, word 86 must have bit 15 set */ -+ if ((id[ATA_ID_COMMAND_SET_4] & (BIT(15) | BIT(14))) != BIT(14)) - return false; -- return id[ATA_ID_COMMAND_SET_4] & (1 << 6); -+ return id[ATA_ID_COMMAND_SET_4] & BIT(6); - } - - /** -diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h -index a3dba31df01e9..6db58d1808665 100644 ---- a/include/linux/atomic/atomic-arch-fallback.h -+++ b/include/linux/atomic/atomic-arch-fallback.h -@@ -151,7 +151,16 @@ - static __always_inline int - arch_atomic_read_acquire(const atomic_t *v) - { -- return smp_load_acquire(&(v)->counter); ++ /* ++ * Find the skb that holds this file in its SCM_RIGHTS. When found, ++ * remove this entry and rearrange the file array. ++ */ ++ skb = skb_dequeue(head); ++ while (skb) { ++ struct scm_fp_list *fp; ++ ++ fp = UNIXCB(skb).fp; ++ for (i = 0; i < fp->count; i++) { ++ int left; ++ ++ if (fp->fp[i] != file) ++ continue; ++ ++ unix_notinflight(fp->user, fp->fp[i]); ++ left = fp->count - 1 - i; ++ if (left) { ++ memmove(&fp->fp[i], &fp->fp[i + 1], ++ left * sizeof(struct file *)); ++ } ++ fp->count--; ++ if (!fp->count) { ++ kfree_skb(skb); ++ skb = NULL; ++ } else { ++ __skb_queue_tail(&list, skb); ++ } ++ fput(file); ++ file = NULL; ++ break; ++ } ++ ++ if (!file) ++ break; ++ ++ __skb_queue_tail(&list, skb); ++ ++ skb = skb_dequeue(head); ++ } ++ ++ if (skb_peek(&list)) { ++ spin_lock_irq(&head->lock); ++ while ((skb = __skb_dequeue(&list)) != NULL) ++ __skb_queue_tail(head, skb); ++ spin_unlock_irq(&head->lock); ++ } ++#else ++ fput(file); ++#endif ++} ++ ++static void __io_rsrc_put_work(struct io_rsrc_node *ref_node) ++{ ++ struct io_rsrc_data *rsrc_data = ref_node->rsrc_data; ++ struct io_ring_ctx *ctx = rsrc_data->ctx; ++ struct io_rsrc_put *prsrc, *tmp; ++ ++ list_for_each_entry_safe(prsrc, tmp, &ref_node->rsrc_list, list) { ++ list_del(&prsrc->list); ++ ++ if (prsrc->tag) { ++ bool lock_ring = ctx->flags & IORING_SETUP_IOPOLL; ++ ++ io_ring_submit_lock(ctx, lock_ring); ++ spin_lock(&ctx->completion_lock); ++ io_fill_cqe_aux(ctx, prsrc->tag, 0, 0); ++ io_commit_cqring(ctx); ++ spin_unlock(&ctx->completion_lock); ++ io_cqring_ev_posted(ctx); ++ io_ring_submit_unlock(ctx, lock_ring); ++ } ++ ++ rsrc_data->do_put(ctx, prsrc); ++ kfree(prsrc); ++ } ++ ++ io_rsrc_node_destroy(ref_node); ++ if (atomic_dec_and_test(&rsrc_data->refs)) ++ complete(&rsrc_data->done); ++} ++ ++static void io_rsrc_put_work(struct work_struct *work) ++{ ++ struct io_ring_ctx *ctx; ++ struct llist_node *node; ++ ++ ctx = container_of(work, struct io_ring_ctx, rsrc_put_work.work); ++ node = llist_del_all(&ctx->rsrc_put_llist); ++ ++ while (node) { ++ struct io_rsrc_node *ref_node; ++ struct llist_node *next = node->next; ++ ++ ref_node = llist_entry(node, struct io_rsrc_node, llist); ++ __io_rsrc_put_work(ref_node); ++ node = next; ++ } ++} ++ ++static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, ++ unsigned nr_args, u64 __user *tags) ++{ ++ __s32 __user *fds = (__s32 __user *) arg; ++ struct file *file; ++ int fd, ret; ++ unsigned i; ++ ++ if (ctx->file_data) ++ return -EBUSY; ++ if (!nr_args) ++ return -EINVAL; ++ if (nr_args > IORING_MAX_FIXED_FILES) ++ return -EMFILE; ++ if (nr_args > rlimit(RLIMIT_NOFILE)) ++ return -EMFILE; ++ ret = io_rsrc_node_switch_start(ctx); ++ if (ret) ++ return ret; ++ ret = io_rsrc_data_alloc(ctx, io_rsrc_file_put, tags, nr_args, ++ &ctx->file_data); ++ if (ret) ++ return ret; ++ ++ ret = -ENOMEM; ++ if (!io_alloc_file_tables(&ctx->file_table, nr_args)) ++ goto out_free; ++ ++ for (i = 0; i < nr_args; i++, ctx->nr_user_files++) { ++ if (copy_from_user(&fd, &fds[i], sizeof(fd))) { ++ ret = -EFAULT; ++ goto out_fput; ++ } ++ /* allow sparse sets */ ++ if (fd == -1) { ++ ret = -EINVAL; ++ if (unlikely(*io_get_tag_slot(ctx->file_data, i))) ++ goto out_fput; ++ continue; ++ } ++ ++ file = fget(fd); ++ ret = -EBADF; ++ if (unlikely(!file)) ++ goto out_fput; ++ ++ /* ++ * Don't allow io_uring instances to be registered. If UNIX ++ * isn't enabled, then this causes a reference cycle and this ++ * instance can never get freed. If UNIX is enabled we'll ++ * handle it just fine, but there's still no point in allowing ++ * a ring fd as it doesn't support regular read/write anyway. ++ */ ++ if (file->f_op == &io_uring_fops) { ++ fput(file); ++ goto out_fput; ++ } ++ io_fixed_file_set(io_fixed_file_slot(&ctx->file_table, i), file); ++ } ++ ++ ret = io_sqe_files_scm(ctx); ++ if (ret) { ++ __io_sqe_files_unregister(ctx); ++ return ret; ++ } ++ ++ io_rsrc_node_switch(ctx, NULL); ++ return ret; ++out_fput: ++ for (i = 0; i < ctx->nr_user_files; i++) { ++ file = io_file_from_index(ctx, i); ++ if (file) ++ fput(file); ++ } ++ io_free_file_tables(&ctx->file_table); ++ ctx->nr_user_files = 0; ++out_free: ++ io_rsrc_data_free(ctx->file_data); ++ ctx->file_data = NULL; ++ return ret; ++} ++ ++static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file, ++ int index) ++{ ++#if defined(CONFIG_UNIX) ++ struct sock *sock = ctx->ring_sock->sk; ++ struct sk_buff_head *head = &sock->sk_receive_queue; ++ struct sk_buff *skb; ++ ++ /* ++ * See if we can merge this file into an existing skb SCM_RIGHTS ++ * file set. If there's no room, fall back to allocating a new skb ++ * and filling it in. ++ */ ++ spin_lock_irq(&head->lock); ++ skb = skb_peek(head); ++ if (skb) { ++ struct scm_fp_list *fpl = UNIXCB(skb).fp; ++ ++ if (fpl->count < SCM_MAX_FD) { ++ __skb_unlink(skb, head); ++ spin_unlock_irq(&head->lock); ++ fpl->fp[fpl->count] = get_file(file); ++ unix_inflight(fpl->user, fpl->fp[fpl->count]); ++ fpl->count++; ++ spin_lock_irq(&head->lock); ++ __skb_queue_head(head, skb); ++ } else { ++ skb = NULL; ++ } ++ } ++ spin_unlock_irq(&head->lock); ++ ++ if (skb) { ++ fput(file); ++ return 0; ++ } ++ ++ return __io_sqe_files_scm(ctx, 1, index); ++#else ++ return 0; ++#endif ++} ++ ++static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx, ++ struct io_rsrc_node *node, void *rsrc) ++{ ++ u64 *tag_slot = io_get_tag_slot(data, idx); ++ struct io_rsrc_put *prsrc; ++ ++ prsrc = kzalloc(sizeof(*prsrc), GFP_KERNEL); ++ if (!prsrc) ++ return -ENOMEM; ++ ++ prsrc->tag = *tag_slot; ++ *tag_slot = 0; ++ prsrc->rsrc = rsrc; ++ list_add(&prsrc->list, &node->rsrc_list); ++ return 0; ++} ++ ++static int io_install_fixed_file(struct io_kiocb *req, struct file *file, ++ unsigned int issue_flags, u32 slot_index) ++{ ++ struct io_ring_ctx *ctx = req->ctx; ++ bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; ++ bool needs_switch = false; ++ struct io_fixed_file *file_slot; ++ int ret = -EBADF; ++ ++ io_ring_submit_lock(ctx, !force_nonblock); ++ if (file->f_op == &io_uring_fops) ++ goto err; ++ ret = -ENXIO; ++ if (!ctx->file_data) ++ goto err; ++ ret = -EINVAL; ++ if (slot_index >= ctx->nr_user_files) ++ goto err; ++ ++ slot_index = array_index_nospec(slot_index, ctx->nr_user_files); ++ file_slot = io_fixed_file_slot(&ctx->file_table, slot_index); ++ ++ if (file_slot->file_ptr) { ++ struct file *old_file; ++ ++ ret = io_rsrc_node_switch_start(ctx); ++ if (ret) ++ goto err; ++ ++ old_file = (struct file *)(file_slot->file_ptr & FFS_MASK); ++ ret = io_queue_rsrc_removal(ctx->file_data, slot_index, ++ ctx->rsrc_node, old_file); ++ if (ret) ++ goto err; ++ file_slot->file_ptr = 0; ++ needs_switch = true; ++ } ++ ++ *io_get_tag_slot(ctx->file_data, slot_index) = 0; ++ io_fixed_file_set(file_slot, file); ++ ret = io_sqe_file_register(ctx, file, slot_index); ++ if (ret) { ++ file_slot->file_ptr = 0; ++ goto err; ++ } ++ ++ ret = 0; ++err: ++ if (needs_switch) ++ io_rsrc_node_switch(ctx, ctx->file_data); ++ io_ring_submit_unlock(ctx, !force_nonblock); ++ if (ret) ++ fput(file); ++ return ret; ++} ++ ++static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags) ++{ ++ unsigned int offset = req->close.file_slot - 1; ++ struct io_ring_ctx *ctx = req->ctx; ++ struct io_fixed_file *file_slot; ++ struct file *file; ++ int ret; ++ ++ io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); ++ ret = -ENXIO; ++ if (unlikely(!ctx->file_data)) ++ goto out; ++ ret = -EINVAL; ++ if (offset >= ctx->nr_user_files) ++ goto out; ++ ret = io_rsrc_node_switch_start(ctx); ++ if (ret) ++ goto out; ++ ++ offset = array_index_nospec(offset, ctx->nr_user_files); ++ file_slot = io_fixed_file_slot(&ctx->file_table, offset); ++ ret = -EBADF; ++ if (!file_slot->file_ptr) ++ goto out; ++ ++ file = (struct file *)(file_slot->file_ptr & FFS_MASK); ++ ret = io_queue_rsrc_removal(ctx->file_data, offset, ctx->rsrc_node, file); ++ if (ret) ++ goto out; ++ ++ file_slot->file_ptr = 0; ++ io_rsrc_node_switch(ctx, ctx->file_data); ++ ret = 0; ++out: ++ io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); ++ return ret; ++} ++ ++static int __io_sqe_files_update(struct io_ring_ctx *ctx, ++ struct io_uring_rsrc_update2 *up, ++ unsigned nr_args) ++{ ++ u64 __user *tags = u64_to_user_ptr(up->tags); ++ __s32 __user *fds = u64_to_user_ptr(up->data); ++ struct io_rsrc_data *data = ctx->file_data; ++ struct io_fixed_file *file_slot; ++ struct file *file; ++ int fd, i, err = 0; ++ unsigned int done; ++ bool needs_switch = false; ++ ++ if (!ctx->file_data) ++ return -ENXIO; ++ if (up->offset + nr_args > ctx->nr_user_files) ++ return -EINVAL; ++ ++ for (done = 0; done < nr_args; done++) { ++ u64 tag = 0; ++ ++ if ((tags && copy_from_user(&tag, &tags[done], sizeof(tag))) || ++ copy_from_user(&fd, &fds[done], sizeof(fd))) { ++ err = -EFAULT; ++ break; ++ } ++ if ((fd == IORING_REGISTER_FILES_SKIP || fd == -1) && tag) { ++ err = -EINVAL; ++ break; ++ } ++ if (fd == IORING_REGISTER_FILES_SKIP) ++ continue; ++ ++ i = array_index_nospec(up->offset + done, ctx->nr_user_files); ++ file_slot = io_fixed_file_slot(&ctx->file_table, i); ++ ++ if (file_slot->file_ptr) { ++ file = (struct file *)(file_slot->file_ptr & FFS_MASK); ++ err = io_queue_rsrc_removal(data, i, ctx->rsrc_node, file); ++ if (err) ++ break; ++ file_slot->file_ptr = 0; ++ needs_switch = true; ++ } ++ if (fd != -1) { ++ file = fget(fd); ++ if (!file) { ++ err = -EBADF; ++ break; ++ } ++ /* ++ * Don't allow io_uring instances to be registered. If ++ * UNIX isn't enabled, then this causes a reference ++ * cycle and this instance can never get freed. If UNIX ++ * is enabled we'll handle it just fine, but there's ++ * still no point in allowing a ring fd as it doesn't ++ * support regular read/write anyway. ++ */ ++ if (file->f_op == &io_uring_fops) { ++ fput(file); ++ err = -EBADF; ++ break; ++ } ++ *io_get_tag_slot(data, i) = tag; ++ io_fixed_file_set(file_slot, file); ++ err = io_sqe_file_register(ctx, file, i); ++ if (err) { ++ file_slot->file_ptr = 0; ++ fput(file); ++ break; ++ } ++ } ++ } ++ ++ if (needs_switch) ++ io_rsrc_node_switch(ctx, data); ++ return done ? done : err; ++} ++ ++static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx, ++ struct task_struct *task) ++{ ++ struct io_wq_hash *hash; ++ struct io_wq_data data; ++ unsigned int concurrency; ++ ++ mutex_lock(&ctx->uring_lock); ++ hash = ctx->hash_map; ++ if (!hash) { ++ hash = kzalloc(sizeof(*hash), GFP_KERNEL); ++ if (!hash) { ++ mutex_unlock(&ctx->uring_lock); ++ return ERR_PTR(-ENOMEM); ++ } ++ refcount_set(&hash->refs, 1); ++ init_waitqueue_head(&hash->wait); ++ ctx->hash_map = hash; ++ } ++ mutex_unlock(&ctx->uring_lock); ++ ++ data.hash = hash; ++ data.task = task; ++ data.free_work = io_wq_free_work; ++ data.do_work = io_wq_submit_work; ++ ++ /* Do QD, or 4 * CPUS, whatever is smallest */ ++ concurrency = min(ctx->sq_entries, 4 * num_online_cpus()); ++ ++ return io_wq_create(concurrency, &data); ++} ++ ++static int io_uring_alloc_task_context(struct task_struct *task, ++ struct io_ring_ctx *ctx) ++{ ++ struct io_uring_task *tctx; + int ret; + -+ if (__native_word(atomic_t)) { -+ ret = smp_load_acquire(&(v)->counter); -+ } else { -+ ret = arch_atomic_read(v); -+ __atomic_acquire_fence(); ++ tctx = kzalloc(sizeof(*tctx), GFP_KERNEL); ++ if (unlikely(!tctx)) ++ return -ENOMEM; ++ ++ ret = percpu_counter_init(&tctx->inflight, 0, GFP_KERNEL); ++ if (unlikely(ret)) { ++ kfree(tctx); ++ return ret; + } + -+ return ret; - } - #define arch_atomic_read_acquire arch_atomic_read_acquire - #endif -@@ -160,7 +169,12 @@ arch_atomic_read_acquire(const atomic_t *v) - static __always_inline void - arch_atomic_set_release(atomic_t *v, int i) - { -- smp_store_release(&(v)->counter, i); -+ if (__native_word(atomic_t)) { -+ smp_store_release(&(v)->counter, i); -+ } else { -+ __atomic_release_fence(); -+ arch_atomic_set(v, i); ++ tctx->io_wq = io_init_wq_offload(ctx, task); ++ if (IS_ERR(tctx->io_wq)) { ++ ret = PTR_ERR(tctx->io_wq); ++ percpu_counter_destroy(&tctx->inflight); ++ kfree(tctx); ++ return ret; + } - } - #define arch_atomic_set_release arch_atomic_set_release - #endif -@@ -1258,7 +1272,16 @@ arch_atomic_dec_if_positive(atomic_t *v) - static __always_inline s64 - arch_atomic64_read_acquire(const atomic64_t *v) - { -- return smp_load_acquire(&(v)->counter); -+ s64 ret; + -+ if (__native_word(atomic64_t)) { -+ ret = smp_load_acquire(&(v)->counter); -+ } else { -+ ret = arch_atomic64_read(v); -+ __atomic_acquire_fence(); ++ xa_init(&tctx->xa); ++ init_waitqueue_head(&tctx->wait); ++ atomic_set(&tctx->in_idle, 0); ++ atomic_set(&tctx->inflight_tracked, 0); ++ task->io_uring = tctx; ++ spin_lock_init(&tctx->task_lock); ++ INIT_WQ_LIST(&tctx->task_list); ++ init_task_work(&tctx->task_work, tctx_task_work); ++ return 0; ++} ++ ++void __io_uring_free(struct task_struct *tsk) ++{ ++ struct io_uring_task *tctx = tsk->io_uring; ++ ++ WARN_ON_ONCE(!xa_empty(&tctx->xa)); ++ WARN_ON_ONCE(tctx->io_wq); ++ WARN_ON_ONCE(tctx->cached_refs); ++ ++ percpu_counter_destroy(&tctx->inflight); ++ kfree(tctx); ++ tsk->io_uring = NULL; ++} ++ ++static int io_sq_offload_create(struct io_ring_ctx *ctx, ++ struct io_uring_params *p) ++{ ++ int ret; ++ ++ /* Retain compatibility with failing for an invalid attach attempt */ ++ if ((ctx->flags & (IORING_SETUP_ATTACH_WQ | IORING_SETUP_SQPOLL)) == ++ IORING_SETUP_ATTACH_WQ) { ++ struct fd f; ++ ++ f = fdget(p->wq_fd); ++ if (!f.file) ++ return -ENXIO; ++ if (f.file->f_op != &io_uring_fops) { ++ fdput(f); ++ return -EINVAL; ++ } ++ fdput(f); + } ++ if (ctx->flags & IORING_SETUP_SQPOLL) { ++ struct task_struct *tsk; ++ struct io_sq_data *sqd; ++ bool attached; + -+ return ret; - } - #define arch_atomic64_read_acquire arch_atomic64_read_acquire - #endif -@@ -1267,7 +1290,12 @@ arch_atomic64_read_acquire(const atomic64_t *v) - static __always_inline void - arch_atomic64_set_release(atomic64_t *v, s64 i) - { -- smp_store_release(&(v)->counter, i); -+ if (__native_word(atomic64_t)) { -+ smp_store_release(&(v)->counter, i); -+ } else { -+ __atomic_release_fence(); -+ arch_atomic64_set(v, i); ++ sqd = io_get_sq_data(p, &attached); ++ if (IS_ERR(sqd)) { ++ ret = PTR_ERR(sqd); ++ goto err; ++ } ++ ++ ctx->sq_creds = get_current_cred(); ++ ctx->sq_data = sqd; ++ ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle); ++ if (!ctx->sq_thread_idle) ++ ctx->sq_thread_idle = HZ; ++ ++ io_sq_thread_park(sqd); ++ list_add(&ctx->sqd_list, &sqd->ctx_list); ++ io_sqd_update_thread_idle(sqd); ++ /* don't attach to a dying SQPOLL thread, would be racy */ ++ ret = (attached && !sqd->thread) ? -ENXIO : 0; ++ io_sq_thread_unpark(sqd); ++ ++ if (ret < 0) ++ goto err; ++ if (attached) ++ return 0; ++ ++ if (p->flags & IORING_SETUP_SQ_AFF) { ++ int cpu = p->sq_thread_cpu; ++ ++ ret = -EINVAL; ++ if (cpu >= nr_cpu_ids || !cpu_online(cpu)) ++ goto err_sqpoll; ++ sqd->sq_cpu = cpu; ++ } else { ++ sqd->sq_cpu = -1; ++ } ++ ++ sqd->task_pid = current->pid; ++ sqd->task_tgid = current->tgid; ++ tsk = create_io_thread(io_sq_thread, sqd, NUMA_NO_NODE); ++ if (IS_ERR(tsk)) { ++ ret = PTR_ERR(tsk); ++ goto err_sqpoll; ++ } ++ ++ sqd->thread = tsk; ++ ret = io_uring_alloc_task_context(tsk, ctx); ++ wake_up_new_task(tsk); ++ if (ret) ++ goto err; ++ } else if (p->flags & IORING_SETUP_SQ_AFF) { ++ /* Can't have SQ_AFF without SQPOLL */ ++ ret = -EINVAL; ++ goto err; + } - } - #define arch_atomic64_set_release arch_atomic64_set_release - #endif -@@ -2358,4 +2386,4 @@ arch_atomic64_dec_if_positive(atomic64_t *v) - #endif - - #endif /* _LINUX_ATOMIC_FALLBACK_H */ --// cca554917d7ea73d5e3e7397dd70c484cad9b2c4 -+// 8e2cc06bc0d2c0967d2f8424762bd48555ee40ae -diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h -index ac7f231b88258..eed9a98eae0d0 100644 ---- a/include/linux/backing-dev.h -+++ b/include/linux/backing-dev.h -@@ -121,6 +121,8 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); - - extern struct backing_dev_info noop_backing_dev_info; - -+int bdi_init(struct backing_dev_info *bdi); + - /** - * writeback_in_progress - determine whether there is writeback in progress - * @wb: bdi_writeback of interest -diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h -index 049cf9421d831..f821b72433613 100644 ---- a/include/linux/binfmts.h -+++ b/include/linux/binfmts.h -@@ -87,6 +87,9 @@ struct coredump_params { - loff_t written; - loff_t pos; - loff_t to_skip; -+ int vma_count; -+ size_t vma_data_size; -+ struct core_vma_metadata *vma_meta; - }; - - /* -diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h -index 4e035aca6f7e6..6093fa6db2600 100644 ---- a/include/linux/bitfield.h -+++ b/include/linux/bitfield.h -@@ -41,6 +41,22 @@ - - #define __bf_shf(x) (__builtin_ffsll(x) - 1) - -+#define __scalar_type_to_unsigned_cases(type) \ -+ unsigned type: (unsigned type)0, \ -+ signed type: (unsigned type)0 ++ return 0; ++err_sqpoll: ++ complete(&ctx->sq_data->exited); ++err: ++ io_sq_thread_finish(ctx); ++ return ret; ++} + -+#define __unsigned_scalar_typeof(x) typeof( \ -+ _Generic((x), \ -+ char: (unsigned char)0, \ -+ __scalar_type_to_unsigned_cases(char), \ -+ __scalar_type_to_unsigned_cases(short), \ -+ __scalar_type_to_unsigned_cases(int), \ -+ __scalar_type_to_unsigned_cases(long), \ -+ __scalar_type_to_unsigned_cases(long long), \ -+ default: (x))) ++static inline void __io_unaccount_mem(struct user_struct *user, ++ unsigned long nr_pages) ++{ ++ atomic_long_sub(nr_pages, &user->locked_vm); ++} + -+#define __bf_cast_unsigned(type, x) ((__unsigned_scalar_typeof(type))(x)) ++static inline int __io_account_mem(struct user_struct *user, ++ unsigned long nr_pages) ++{ ++ unsigned long page_limit, cur_pages, new_pages; + - #define __BF_FIELD_CHECK(_mask, _reg, _val, _pfx) \ - ({ \ - BUILD_BUG_ON_MSG(!__builtin_constant_p(_mask), \ -@@ -49,7 +65,8 @@ - BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ? \ - ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \ - _pfx "value too large for the field"); \ -- BUILD_BUG_ON_MSG((_mask) > (typeof(_reg))~0ull, \ -+ BUILD_BUG_ON_MSG(__bf_cast_unsigned(_mask, _mask) > \ -+ __bf_cast_unsigned(_reg, ~0ull), \ - _pfx "type of reg too small for mask"); \ - __BUILD_BUG_ON_NOT_POWER_OF_2((_mask) + \ - (1ULL << __bf_shf(_mask))); \ -diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h -index b4de2010fba55..bc5c04d711bbc 100644 ---- a/include/linux/blk-cgroup.h -+++ b/include/linux/blk-cgroup.h -@@ -24,6 +24,7 @@ - #include <linux/atomic.h> - #include <linux/kthread.h> - #include <linux/fs.h> -+#include <linux/blk-mq.h> - - /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */ - #define BLKG_STAT_CPU_BATCH (INT_MAX / 2) -@@ -604,6 +605,21 @@ static inline void blkcg_clear_delay(struct blkcg_gq *blkg) - atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); - } - -+/** -+ * blk_cgroup_mergeable - Determine whether to allow or disallow merges -+ * @rq: request to merge into -+ * @bio: bio to merge -+ * -+ * @bio and @rq should belong to the same cgroup and their issue_as_root should -+ * match. The latter is necessary as we don't want to throttle e.g. a metadata -+ * update because it happens to be next to a regular IO. -+ */ -+static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) ++ /* Don't allow more pages than we can safely lock */ ++ page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; ++ ++ do { ++ cur_pages = atomic_long_read(&user->locked_vm); ++ new_pages = cur_pages + nr_pages; ++ if (new_pages > page_limit) ++ return -ENOMEM; ++ } while (atomic_long_cmpxchg(&user->locked_vm, cur_pages, ++ new_pages) != cur_pages); ++ ++ return 0; ++} ++ ++static void io_unaccount_mem(struct io_ring_ctx *ctx, unsigned long nr_pages) +{ -+ return rq->bio->bi_blkg == bio->bi_blkg && -+ bio_issue_as_root_blkg(rq->bio) == bio_issue_as_root_blkg(bio); ++ if (ctx->user) ++ __io_unaccount_mem(ctx->user, nr_pages); ++ ++ if (ctx->mm_account) ++ atomic64_sub(nr_pages, &ctx->mm_account->pinned_vm); +} + - void blk_cgroup_bio_start(struct bio *bio); - void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta); - void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay); -@@ -659,6 +675,7 @@ static inline void blkg_put(struct blkcg_gq *blkg) { } - static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; } - static inline void blkcg_bio_issue_init(struct bio *bio) { } - static inline void blk_cgroup_bio_start(struct bio *bio) { } -+static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { return true; } - - #define blk_queue_for_each_rl(rl, q) \ - for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) -diff --git a/include/linux/blk-pm.h b/include/linux/blk-pm.h -index b80c65aba2493..2580e05a8ab67 100644 ---- a/include/linux/blk-pm.h -+++ b/include/linux/blk-pm.h -@@ -14,7 +14,7 @@ extern void blk_pm_runtime_init(struct request_queue *q, struct device *dev); - extern int blk_pre_runtime_suspend(struct request_queue *q); - extern void blk_post_runtime_suspend(struct request_queue *q, int err); - extern void blk_pre_runtime_resume(struct request_queue *q); --extern void blk_post_runtime_resume(struct request_queue *q, int err); -+extern void blk_post_runtime_resume(struct request_queue *q); - extern void blk_set_runtime_active(struct request_queue *q); - #else - static inline void blk_pm_runtime_init(struct request_queue *q, -diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h -index be622b5a21ed5..36ce3d0fb9f3b 100644 ---- a/include/linux/blk_types.h -+++ b/include/linux/blk_types.h -@@ -215,9 +215,8 @@ static inline void bio_issue_init(struct bio_issue *issue, - struct bio { - struct bio *bi_next; /* request queue link */ - struct block_device *bi_bdev; -- unsigned int bi_opf; /* bottom bits req flags, -- * top bits REQ_OP. Use -- * accessors. -+ unsigned int bi_opf; /* bottom bits REQ_OP, top bits -+ * req_flags. - */ - unsigned short bi_flags; /* BIO_* below */ - unsigned short bi_ioprio; -@@ -295,7 +294,8 @@ enum { - BIO_TRACE_COMPLETION, /* bio_endio() should trace the final completion - * of this bio. */ - BIO_CGROUP_ACCT, /* has been accounted to a cgroup */ -- BIO_TRACKED, /* set if bio goes through the rq_qos path */ -+ BIO_QOS_THROTTLED, /* bio went through rq_qos throttle path */ -+ BIO_QOS_MERGED, /* but went through rq_qos merge path */ - BIO_REMAPPED, - BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */ - BIO_PERCPU_CACHE, /* can participate in per-cpu alloc cache */ -diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h -index 12b9dbcc980ee..67344dfe07a7c 100644 ---- a/include/linux/blkdev.h -+++ b/include/linux/blkdev.h -@@ -235,6 +235,14 @@ struct request { - void *end_io_data; - }; - -+static inline int blk_validate_block_size(unsigned int bsize) ++static int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages) +{ -+ if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize)) -+ return -EINVAL; ++ int ret; ++ ++ if (ctx->user) { ++ ret = __io_account_mem(ctx->user, nr_pages); ++ if (ret) ++ return ret; ++ } ++ ++ if (ctx->mm_account) ++ atomic64_add(nr_pages, &ctx->mm_account->pinned_vm); + + return 0; +} + - static inline bool blk_op_is_passthrough(unsigned int op) - { - op &= REQ_OP_MASK; -@@ -253,8 +261,6 @@ static inline unsigned short req_get_ioprio(struct request *req) - - #include <linux/elevator.h> - --struct blk_queue_ctx; -- - struct bio_vec; - - enum blk_eh_timer_return { -@@ -1176,7 +1182,8 @@ extern void blk_dump_rq_flags(struct request *, char *); - - bool __must_check blk_get_queue(struct request_queue *); - extern void blk_put_queue(struct request_queue *); --extern void blk_set_queue_dying(struct request_queue *); ++static void io_mem_free(void *ptr) ++{ ++ struct page *page; + -+void blk_mark_disk_dead(struct gendisk *disk); - - #ifdef CONFIG_BLOCK - /* -@@ -1198,8 +1205,6 @@ struct blk_plug { - bool multiple_queues; - bool nowait; - }; --#define BLK_MAX_REQUEST_COUNT 16 --#define BLK_PLUG_FLUSH_SIZE (128 * 1024) - - struct blk_plug_cb; - typedef void (*blk_plug_cb_fn)(struct blk_plug_cb *, bool); -@@ -1382,6 +1387,17 @@ static inline unsigned int queue_max_zone_append_sectors(const struct request_qu - return min(l->max_zone_append_sectors, l->max_sectors); - } - -+static inline unsigned int -+bdev_max_zone_append_sectors(struct block_device *bdev) ++ if (!ptr) ++ return; ++ ++ page = virt_to_head_page(ptr); ++ if (put_page_testzero(page)) ++ free_compound_page(page); ++} ++ ++static void *io_mem_alloc(size_t size) +{ -+ return queue_max_zone_append_sectors(bdev_get_queue(bdev)); ++ gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP; ++ ++ return (void *) __get_free_pages(gfp, get_order(size)); +} + -+static inline unsigned int bdev_max_segments(struct block_device *bdev) ++static unsigned long rings_size(unsigned sq_entries, unsigned cq_entries, ++ size_t *sq_offset) +{ -+ return queue_max_segments(bdev_get_queue(bdev)); ++ struct io_rings *rings; ++ size_t off, sq_array_size; ++ ++ off = struct_size(rings, cqes, cq_entries); ++ if (off == SIZE_MAX) ++ return SIZE_MAX; ++ ++#ifdef CONFIG_SMP ++ off = ALIGN(off, SMP_CACHE_BYTES); ++ if (off == 0) ++ return SIZE_MAX; ++#endif ++ ++ if (sq_offset) ++ *sq_offset = off; ++ ++ sq_array_size = array_size(sizeof(u32), sq_entries); ++ if (sq_array_size == SIZE_MAX) ++ return SIZE_MAX; ++ ++ if (check_add_overflow(off, sq_array_size, &off)) ++ return SIZE_MAX; ++ ++ return off; +} + - static inline unsigned queue_logical_block_size(const struct request_queue *q) - { - int retval = 512; -@@ -1941,6 +1957,7 @@ unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, - void disk_end_io_acct(struct gendisk *disk, unsigned int op, - unsigned long start_time); - -+void bio_start_io_acct_time(struct bio *bio, unsigned long start_time); - unsigned long bio_start_io_acct(struct bio *bio); - void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time, - struct block_device *orig_bdev); -@@ -1991,6 +2008,8 @@ int truncate_bdev_range(struct block_device *bdev, fmode_t mode, loff_t lstart, - #ifdef CONFIG_BLOCK - void invalidate_bdev(struct block_device *bdev); - int sync_blockdev(struct block_device *bdev); -+int sync_blockdev_nowait(struct block_device *bdev); -+void sync_bdevs(bool wait); - #else - static inline void invalidate_bdev(struct block_device *bdev) - { -@@ -1999,6 +2018,13 @@ static inline int sync_blockdev(struct block_device *bdev) - { - return 0; - } -+static inline int sync_blockdev_nowait(struct block_device *bdev) ++static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf **slot) +{ -+ return 0; ++ struct io_mapped_ubuf *imu = *slot; ++ unsigned int i; ++ ++ if (imu != ctx->dummy_ubuf) { ++ for (i = 0; i < imu->nr_bvecs; i++) ++ unpin_user_page(imu->bvec[i].bv_page); ++ if (imu->acct_pages) ++ io_unaccount_mem(ctx, imu->acct_pages); ++ kvfree(imu); ++ } ++ *slot = NULL; +} -+static inline void sync_bdevs(bool wait) ++ ++static void io_rsrc_buf_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc) +{ ++ io_buffer_unmap(ctx, &prsrc->buf); ++ prsrc->buf = NULL; +} - #endif - int fsync_bdev(struct block_device *bdev); - -diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h -index 2746fd8042162..3536ab432b30c 100644 ---- a/include/linux/bpf-cgroup.h -+++ b/include/linux/bpf-cgroup.h -@@ -517,6 +517,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, - - #define cgroup_bpf_enabled(atype) (0) - #define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) ({ 0; }) -+#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) ({ 0; }) - #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0) - #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) - #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) -diff --git a/include/linux/bpf.h b/include/linux/bpf.h -index 3db6f6c95489e..818cd594e9229 100644 ---- a/include/linux/bpf.h -+++ b/include/linux/bpf.h -@@ -190,7 +190,7 @@ struct bpf_map { - atomic64_t usercnt; - struct work_struct work; - struct mutex freeze_mutex; -- u64 writecnt; /* writable mmap cnt; protected by freeze_mutex */ -+ atomic64_t writecnt; - }; - - static inline bool map_value_has_spin_lock(const struct bpf_map *map) -@@ -206,11 +206,9 @@ static inline bool map_value_has_timer(const struct bpf_map *map) - static inline void check_and_init_map_value(struct bpf_map *map, void *dst) - { - if (unlikely(map_value_has_spin_lock(map))) -- *(struct bpf_spin_lock *)(dst + map->spin_lock_off) = -- (struct bpf_spin_lock){}; -+ memset(dst + map->spin_lock_off, 0, sizeof(struct bpf_spin_lock)); - if (unlikely(map_value_has_timer(map))) -- *(struct bpf_timer *)(dst + map->timer_off) = -- (struct bpf_timer){}; -+ memset(dst + map->timer_off, 0, sizeof(struct bpf_timer)); - } - - /* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */ -@@ -221,7 +219,8 @@ static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) - if (unlikely(map_value_has_spin_lock(map))) { - s_off = map->spin_lock_off; - s_sz = sizeof(struct bpf_spin_lock); -- } else if (unlikely(map_value_has_timer(map))) { ++ ++static void __io_sqe_buffers_unregister(struct io_ring_ctx *ctx) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < ctx->nr_user_bufs; i++) ++ io_buffer_unmap(ctx, &ctx->user_bufs[i]); ++ kfree(ctx->user_bufs); ++ io_rsrc_data_free(ctx->buf_data); ++ ctx->user_bufs = NULL; ++ ctx->buf_data = NULL; ++ ctx->nr_user_bufs = 0; ++} ++ ++static int io_sqe_buffers_unregister(struct io_ring_ctx *ctx) ++{ ++ unsigned nr = ctx->nr_user_bufs; ++ int ret; ++ ++ if (!ctx->buf_data) ++ return -ENXIO; ++ ++ /* ++ * Quiesce may unlock ->uring_lock, and while it's not held ++ * prevent new requests using the table. ++ */ ++ ctx->nr_user_bufs = 0; ++ ret = io_rsrc_ref_quiesce(ctx->buf_data, ctx); ++ ctx->nr_user_bufs = nr; ++ if (!ret) ++ __io_sqe_buffers_unregister(ctx); ++ return ret; ++} ++ ++static int io_copy_iov(struct io_ring_ctx *ctx, struct iovec *dst, ++ void __user *arg, unsigned index) ++{ ++ struct iovec __user *src; ++ ++#ifdef CONFIG_COMPAT ++ if (ctx->compat) { ++ struct compat_iovec __user *ciovs; ++ struct compat_iovec ciov; ++ ++ ciovs = (struct compat_iovec __user *) arg; ++ if (copy_from_user(&ciov, &ciovs[index], sizeof(ciov))) ++ return -EFAULT; ++ ++ dst->iov_base = u64_to_user_ptr((u64)ciov.iov_base); ++ dst->iov_len = ciov.iov_len; ++ return 0; + } -+ if (unlikely(map_value_has_timer(map))) { - t_off = map->timer_off; - t_sz = sizeof(struct bpf_timer); - } -@@ -294,6 +293,34 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0, - - extern const struct bpf_map_ops bpf_map_offload_ops; - -+/* bpf_type_flag contains a set of flags that are applicable to the values of -+ * arg_type, ret_type and reg_type. For example, a pointer value may be null, -+ * or a memory is read-only. We classify types into two categories: base types -+ * and extended types. Extended types are base types combined with a type flag. ++#endif ++ src = (struct iovec __user *) arg; ++ if (copy_from_user(dst, &src[index], sizeof(*dst))) ++ return -EFAULT; ++ return 0; ++} ++ ++/* ++ * Not super efficient, but this is just a registration time. And we do cache ++ * the last compound head, so generally we'll only do a full search if we don't ++ * match that one. + * -+ * Currently there are no more than 32 base types in arg_type, ret_type and -+ * reg_types. ++ * We check if the given compound head page has already been accounted, to ++ * avoid double accounting it. This allows us to account the full size of the ++ * page, not just the constituent pages of a huge page. + */ -+#define BPF_BASE_TYPE_BITS 8 ++static bool headpage_already_acct(struct io_ring_ctx *ctx, struct page **pages, ++ int nr_pages, struct page *hpage) ++{ ++ int i, j; + -+enum bpf_type_flag { -+ /* PTR may be NULL. */ -+ PTR_MAYBE_NULL = BIT(0 + BPF_BASE_TYPE_BITS), ++ /* check current page array */ ++ for (i = 0; i < nr_pages; i++) { ++ if (!PageCompound(pages[i])) ++ continue; ++ if (compound_head(pages[i]) == hpage) ++ return true; ++ } + -+ /* MEM is read-only. When applied on bpf_arg, it indicates the arg is -+ * compatible with both mutable and immutable memory. -+ */ -+ MEM_RDONLY = BIT(1 + BPF_BASE_TYPE_BITS), ++ /* check previously registered pages */ ++ for (i = 0; i < ctx->nr_user_bufs; i++) { ++ struct io_mapped_ubuf *imu = ctx->user_bufs[i]; + -+ __BPF_TYPE_LAST_FLAG = MEM_RDONLY, -+}; ++ for (j = 0; j < imu->nr_bvecs; j++) { ++ if (!PageCompound(imu->bvec[j].bv_page)) ++ continue; ++ if (compound_head(imu->bvec[j].bv_page) == hpage) ++ return true; ++ } ++ } + -+/* Max number of base types. */ -+#define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS) ++ return false; ++} + -+/* Max number of all types. */ -+#define BPF_TYPE_LIMIT (__BPF_TYPE_LAST_FLAG | (__BPF_TYPE_LAST_FLAG - 1)) ++static int io_buffer_account_pin(struct io_ring_ctx *ctx, struct page **pages, ++ int nr_pages, struct io_mapped_ubuf *imu, ++ struct page **last_hpage) ++{ ++ int i, ret; + - /* function argument constraints */ - enum bpf_arg_type { - ARG_DONTCARE = 0, /* unused argument in helper function */ -@@ -305,13 +332,11 @@ enum bpf_arg_type { - ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */ - ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */ - ARG_PTR_TO_UNINIT_MAP_VALUE, /* pointer to valid memory used to store a map value */ -- ARG_PTR_TO_MAP_VALUE_OR_NULL, /* pointer to stack used as map value or NULL */ - - /* the following constraints used to prototype bpf_memcmp() and other - * functions that access data on eBPF program stack - */ - ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map value) */ -- ARG_PTR_TO_MEM_OR_NULL, /* pointer to valid memory or NULL */ - ARG_PTR_TO_UNINIT_MEM, /* pointer to memory does not need to be initialized, - * helper function must fill all bytes or clear - * them in error case. -@@ -321,42 +346,65 @@ enum bpf_arg_type { - ARG_CONST_SIZE_OR_ZERO, /* number of bytes accessed from memory or 0 */ - - ARG_PTR_TO_CTX, /* pointer to context */ -- ARG_PTR_TO_CTX_OR_NULL, /* pointer to context or NULL */ - ARG_ANYTHING, /* any (initialized) argument is ok */ - ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ - ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */ - ARG_PTR_TO_INT, /* pointer to int */ - ARG_PTR_TO_LONG, /* pointer to long */ - ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */ -- ARG_PTR_TO_SOCKET_OR_NULL, /* pointer to bpf_sock (fullsock) or NULL */ - ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */ - ARG_PTR_TO_ALLOC_MEM, /* pointer to dynamically allocated memory */ -- ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */ - ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */ - ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */ - ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */ - ARG_PTR_TO_FUNC, /* pointer to a bpf program function */ -- ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */ -+ ARG_PTR_TO_STACK, /* pointer to stack */ - ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */ - ARG_PTR_TO_TIMER, /* pointer to bpf_timer */ - __BPF_ARG_TYPE_MAX, ++ imu->acct_pages = 0; ++ for (i = 0; i < nr_pages; i++) { ++ if (!PageCompound(pages[i])) { ++ imu->acct_pages++; ++ } else { ++ struct page *hpage; + -+ /* Extended arg_types. */ -+ ARG_PTR_TO_MAP_VALUE_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_MAP_VALUE, -+ ARG_PTR_TO_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_MEM, -+ ARG_PTR_TO_CTX_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_CTX, -+ ARG_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_SOCKET, -+ ARG_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_ALLOC_MEM, -+ ARG_PTR_TO_STACK_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_STACK, ++ hpage = compound_head(pages[i]); ++ if (hpage == *last_hpage) ++ continue; ++ *last_hpage = hpage; ++ if (headpage_already_acct(ctx, pages, i, hpage)) ++ continue; ++ imu->acct_pages += page_size(hpage) >> PAGE_SHIFT; ++ } ++ } + -+ /* This must be the last entry. Its purpose is to ensure the enum is -+ * wide enough to hold the higher bits reserved for bpf_type_flag. -+ */ -+ __BPF_ARG_TYPE_LIMIT = BPF_TYPE_LIMIT, - }; -+static_assert(__BPF_ARG_TYPE_MAX <= BPF_BASE_TYPE_LIMIT); - - /* type of values returned from helper functions */ - enum bpf_return_type { - RET_INTEGER, /* function returns integer */ - RET_VOID, /* function doesn't return anything */ - RET_PTR_TO_MAP_VALUE, /* returns a pointer to map elem value */ -- RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */ -- RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */ -- RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */ -- RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */ -- RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */ -- RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */ -- RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */ -+ RET_PTR_TO_SOCKET, /* returns a pointer to a socket */ -+ RET_PTR_TO_TCP_SOCK, /* returns a pointer to a tcp_sock */ -+ RET_PTR_TO_SOCK_COMMON, /* returns a pointer to a sock_common */ -+ RET_PTR_TO_ALLOC_MEM, /* returns a pointer to dynamically allocated memory */ - RET_PTR_TO_MEM_OR_BTF_ID, /* returns a pointer to a valid memory or a btf_id */ - RET_PTR_TO_BTF_ID, /* returns a pointer to a btf_id */ -+ __BPF_RET_TYPE_MAX, ++ if (!imu->acct_pages) ++ return 0; + -+ /* Extended ret_types. */ -+ RET_PTR_TO_MAP_VALUE_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_MAP_VALUE, -+ RET_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCKET, -+ RET_PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK, -+ RET_PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON, -+ RET_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_ALLOC_MEM, -+ RET_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID, ++ ret = io_account_mem(ctx, imu->acct_pages); ++ if (ret) ++ imu->acct_pages = 0; ++ return ret; ++} + -+ /* This must be the last entry. Its purpose is to ensure the enum is -+ * wide enough to hold the higher bits reserved for bpf_type_flag. -+ */ -+ __BPF_RET_TYPE_LIMIT = BPF_TYPE_LIMIT, - }; -+static_assert(__BPF_RET_TYPE_MAX <= BPF_BASE_TYPE_LIMIT); - - /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs - * to in-kernel helper functions and for adjusting imm32 field in BPF_CALL -@@ -418,18 +466,15 @@ enum bpf_reg_type { - PTR_TO_CTX, /* reg points to bpf_context */ - CONST_PTR_TO_MAP, /* reg points to struct bpf_map */ - PTR_TO_MAP_VALUE, /* reg points to map element value */ -- PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */ -+ PTR_TO_MAP_KEY, /* reg points to a map element key */ - PTR_TO_STACK, /* reg == frame_pointer + offset */ - PTR_TO_PACKET_META, /* skb->data - meta_len */ - PTR_TO_PACKET, /* reg points to skb->data */ - PTR_TO_PACKET_END, /* skb->data + headlen */ - PTR_TO_FLOW_KEYS, /* reg points to bpf_flow_keys */ - PTR_TO_SOCKET, /* reg points to struct bpf_sock */ -- PTR_TO_SOCKET_OR_NULL, /* reg points to struct bpf_sock or NULL */ - PTR_TO_SOCK_COMMON, /* reg points to sock_common */ -- PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */ - PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */ -- PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */ - PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */ - PTR_TO_XDP_SOCK, /* reg points to struct xdp_sock */ - /* PTR_TO_BTF_ID points to a kernel struct that does not need -@@ -447,18 +492,25 @@ enum bpf_reg_type { - * been checked for null. Used primarily to inform the verifier - * an explicit null check is required for this struct. - */ -- PTR_TO_BTF_ID_OR_NULL, - PTR_TO_MEM, /* reg points to valid memory region */ -- PTR_TO_MEM_OR_NULL, /* reg points to valid memory region or NULL */ -- PTR_TO_RDONLY_BUF, /* reg points to a readonly buffer */ -- PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */ -- PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */ -- PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */ -+ PTR_TO_BUF, /* reg points to a read/write buffer */ - PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */ - PTR_TO_FUNC, /* reg points to a bpf program function */ -- PTR_TO_MAP_KEY, /* reg points to a map element key */ - __BPF_REG_TYPE_MAX, ++static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, ++ struct io_mapped_ubuf **pimu, ++ struct page **last_hpage) ++{ ++ struct io_mapped_ubuf *imu = NULL; ++ struct vm_area_struct **vmas = NULL; ++ struct page **pages = NULL; ++ unsigned long off, start, end, ubuf; ++ size_t size; ++ int ret, pret, nr_pages, i; + -+ /* Extended reg_types. */ -+ PTR_TO_MAP_VALUE_OR_NULL = PTR_MAYBE_NULL | PTR_TO_MAP_VALUE, -+ PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | PTR_TO_SOCKET, -+ PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | PTR_TO_SOCK_COMMON, -+ PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | PTR_TO_TCP_SOCK, -+ PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | PTR_TO_BTF_ID, ++ if (!iov->iov_base) { ++ *pimu = ctx->dummy_ubuf; ++ return 0; ++ } + -+ /* This must be the last entry. Its purpose is to ensure the enum is -+ * wide enough to hold the higher bits reserved for bpf_type_flag. ++ ubuf = (unsigned long) iov->iov_base; ++ end = (ubuf + iov->iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT; ++ start = ubuf >> PAGE_SHIFT; ++ nr_pages = end - start; ++ ++ *pimu = NULL; ++ ret = -ENOMEM; ++ ++ pages = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL); ++ if (!pages) ++ goto done; ++ ++ vmas = kvmalloc_array(nr_pages, sizeof(struct vm_area_struct *), ++ GFP_KERNEL); ++ if (!vmas) ++ goto done; ++ ++ imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL); ++ if (!imu) ++ goto done; ++ ++ ret = 0; ++ mmap_read_lock(current->mm); ++ pret = pin_user_pages(ubuf, nr_pages, FOLL_WRITE | FOLL_LONGTERM, ++ pages, vmas); ++ if (pret == nr_pages) { ++ /* don't support file backed memory */ ++ for (i = 0; i < nr_pages; i++) { ++ struct vm_area_struct *vma = vmas[i]; ++ ++ if (vma_is_shmem(vma)) ++ continue; ++ if (vma->vm_file && ++ !is_file_hugepages(vma->vm_file)) { ++ ret = -EOPNOTSUPP; ++ break; ++ } ++ } ++ } else { ++ ret = pret < 0 ? pret : -EFAULT; ++ } ++ mmap_read_unlock(current->mm); ++ if (ret) { ++ /* ++ * if we did partial map, or found file backed vmas, ++ * release any pages we did get ++ */ ++ if (pret > 0) ++ unpin_user_pages(pages, pret); ++ goto done; ++ } ++ ++ ret = io_buffer_account_pin(ctx, pages, pret, imu, last_hpage); ++ if (ret) { ++ unpin_user_pages(pages, pret); ++ goto done; ++ } ++ ++ off = ubuf & ~PAGE_MASK; ++ size = iov->iov_len; ++ for (i = 0; i < nr_pages; i++) { ++ size_t vec_len; ++ ++ vec_len = min_t(size_t, size, PAGE_SIZE - off); ++ imu->bvec[i].bv_page = pages[i]; ++ imu->bvec[i].bv_len = vec_len; ++ imu->bvec[i].bv_offset = off; ++ off = 0; ++ size -= vec_len; ++ } ++ /* store original address for later verification */ ++ imu->ubuf = ubuf; ++ imu->ubuf_end = ubuf + iov->iov_len; ++ imu->nr_bvecs = nr_pages; ++ *pimu = imu; ++ ret = 0; ++done: ++ if (ret) ++ kvfree(imu); ++ kvfree(pages); ++ kvfree(vmas); ++ return ret; ++} ++ ++static int io_buffers_map_alloc(struct io_ring_ctx *ctx, unsigned int nr_args) ++{ ++ ctx->user_bufs = kcalloc(nr_args, sizeof(*ctx->user_bufs), GFP_KERNEL); ++ return ctx->user_bufs ? 0 : -ENOMEM; ++} ++ ++static int io_buffer_validate(struct iovec *iov) ++{ ++ unsigned long tmp, acct_len = iov->iov_len + (PAGE_SIZE - 1); ++ ++ /* ++ * Don't impose further limits on the size and buffer ++ * constraints here, we'll -EINVAL later when IO is ++ * submitted if they are wrong. + */ -+ __BPF_REG_TYPE_LIMIT = BPF_TYPE_LIMIT, - }; -+static_assert(__BPF_REG_TYPE_MAX <= BPF_BASE_TYPE_LIMIT); - - /* The information passed from prog-specific *_is_valid_access - * back to the verifier. -@@ -481,6 +533,12 @@ bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size) - aux->ctx_field_size = size; - } - -+static inline bool bpf_pseudo_func(const struct bpf_insn *insn) ++ if (!iov->iov_base) ++ return iov->iov_len ? -EFAULT : 0; ++ if (!iov->iov_len) ++ return -EFAULT; ++ ++ /* arbitrary limit, but we need something */ ++ if (iov->iov_len > SZ_1G) ++ return -EFAULT; ++ ++ if (check_add_overflow((unsigned long)iov->iov_base, acct_len, &tmp)) ++ return -EOVERFLOW; ++ ++ return 0; ++} ++ ++static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg, ++ unsigned int nr_args, u64 __user *tags) +{ -+ return insn->code == (BPF_LD | BPF_IMM | BPF_DW) && -+ insn->src_reg == BPF_PSEUDO_FUNC; ++ struct page *last_hpage = NULL; ++ struct io_rsrc_data *data; ++ int i, ret; ++ struct iovec iov; ++ ++ if (ctx->user_bufs) ++ return -EBUSY; ++ if (!nr_args || nr_args > IORING_MAX_REG_BUFFERS) ++ return -EINVAL; ++ ret = io_rsrc_node_switch_start(ctx); ++ if (ret) ++ return ret; ++ ret = io_rsrc_data_alloc(ctx, io_rsrc_buf_put, tags, nr_args, &data); ++ if (ret) ++ return ret; ++ ret = io_buffers_map_alloc(ctx, nr_args); ++ if (ret) { ++ io_rsrc_data_free(data); ++ return ret; ++ } ++ ++ for (i = 0; i < nr_args; i++, ctx->nr_user_bufs++) { ++ ret = io_copy_iov(ctx, &iov, arg, i); ++ if (ret) ++ break; ++ ret = io_buffer_validate(&iov); ++ if (ret) ++ break; ++ if (!iov.iov_base && *io_get_tag_slot(data, i)) { ++ ret = -EINVAL; ++ break; ++ } ++ ++ ret = io_sqe_buffer_register(ctx, &iov, &ctx->user_bufs[i], ++ &last_hpage); ++ if (ret) ++ break; ++ } ++ ++ WARN_ON_ONCE(ctx->buf_data); ++ ++ ctx->buf_data = data; ++ if (ret) ++ __io_sqe_buffers_unregister(ctx); ++ else ++ io_rsrc_node_switch(ctx, NULL); ++ return ret; +} + - struct bpf_prog_ops { - int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr, - union bpf_attr __user *uattr); -@@ -723,6 +781,7 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) - struct bpf_trampoline *bpf_trampoline_get(u64 key, - struct bpf_attach_target_info *tgt_info); - void bpf_trampoline_put(struct bpf_trampoline *tr); -+int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs); - #define BPF_DISPATCHER_INIT(_name) { \ - .mutex = __MUTEX_INITIALIZER(_name.mutex), \ - .func = &_name##_func, \ -@@ -1320,28 +1379,16 @@ extern struct mutex bpf_stats_enabled_mutex; - * kprobes, tracepoints) to prevent deadlocks on map operations as any of - * these events can happen inside a region which holds a map bucket lock - * and can deadlock on it. -- * -- * Use the preemption safe inc/dec variants on RT because migrate disable -- * is preemptible on RT and preemption in the middle of the RMW operation -- * might lead to inconsistent state. Use the raw variants for non RT -- * kernels as migrate_disable() maps to preempt_disable() so the slightly -- * more expensive save operation can be avoided. - */ - static inline void bpf_disable_instrumentation(void) - { - migrate_disable(); -- if (IS_ENABLED(CONFIG_PREEMPT_RT)) -- this_cpu_inc(bpf_prog_active); -- else -- __this_cpu_inc(bpf_prog_active); -+ this_cpu_inc(bpf_prog_active); - } - - static inline void bpf_enable_instrumentation(void) - { -- if (IS_ENABLED(CONFIG_PREEMPT_RT)) -- this_cpu_dec(bpf_prog_active); -- else -- __this_cpu_dec(bpf_prog_active); -+ this_cpu_dec(bpf_prog_active); - migrate_enable(); - } - -@@ -1387,6 +1434,7 @@ void bpf_map_put(struct bpf_map *map); - void *bpf_map_area_alloc(u64 size, int numa_node); - void *bpf_map_area_mmapable_alloc(u64 size, int numa_node); - void bpf_map_area_free(void *base); -+bool bpf_map_write_active(const struct bpf_map *map); - void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); - int generic_map_lookup_batch(struct bpf_map *map, - const union bpf_attr *attr, -@@ -1677,6 +1725,12 @@ bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog); - const struct btf_func_model * - bpf_jit_find_kfunc_model(const struct bpf_prog *prog, - const struct bpf_insn *insn); ++static int __io_sqe_buffers_update(struct io_ring_ctx *ctx, ++ struct io_uring_rsrc_update2 *up, ++ unsigned int nr_args) ++{ ++ u64 __user *tags = u64_to_user_ptr(up->tags); ++ struct iovec iov, __user *iovs = u64_to_user_ptr(up->data); ++ struct page *last_hpage = NULL; ++ bool needs_switch = false; ++ __u32 done; ++ int i, err; + -+static inline bool unprivileged_ebpf_enabled(void) ++ if (!ctx->buf_data) ++ return -ENXIO; ++ if (up->offset + nr_args > ctx->nr_user_bufs) ++ return -EINVAL; ++ ++ for (done = 0; done < nr_args; done++) { ++ struct io_mapped_ubuf *imu; ++ int offset = up->offset + done; ++ u64 tag = 0; ++ ++ err = io_copy_iov(ctx, &iov, iovs, done); ++ if (err) ++ break; ++ if (tags && copy_from_user(&tag, &tags[done], sizeof(tag))) { ++ err = -EFAULT; ++ break; ++ } ++ err = io_buffer_validate(&iov); ++ if (err) ++ break; ++ if (!iov.iov_base && tag) { ++ err = -EINVAL; ++ break; ++ } ++ err = io_sqe_buffer_register(ctx, &iov, &imu, &last_hpage); ++ if (err) ++ break; ++ ++ i = array_index_nospec(offset, ctx->nr_user_bufs); ++ if (ctx->user_bufs[i] != ctx->dummy_ubuf) { ++ err = io_queue_rsrc_removal(ctx->buf_data, i, ++ ctx->rsrc_node, ctx->user_bufs[i]); ++ if (unlikely(err)) { ++ io_buffer_unmap(ctx, &imu); ++ break; ++ } ++ ctx->user_bufs[i] = NULL; ++ needs_switch = true; ++ } ++ ++ ctx->user_bufs[i] = imu; ++ *io_get_tag_slot(ctx->buf_data, offset) = tag; ++ } ++ ++ if (needs_switch) ++ io_rsrc_node_switch(ctx, ctx->buf_data); ++ return done ? done : err; ++} ++ ++static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg) +{ -+ return !sysctl_unprivileged_bpf_disabled; ++ __s32 __user *fds = arg; ++ int fd; ++ ++ if (ctx->cq_ev_fd) ++ return -EBUSY; ++ ++ if (copy_from_user(&fd, fds, sizeof(*fds))) ++ return -EFAULT; ++ ++ ctx->cq_ev_fd = eventfd_ctx_fdget(fd); ++ if (IS_ERR(ctx->cq_ev_fd)) { ++ int ret = PTR_ERR(ctx->cq_ev_fd); ++ ++ ctx->cq_ev_fd = NULL; ++ return ret; ++ } ++ ++ return 0; +} + - #else /* !CONFIG_BPF_SYSCALL */ - static inline struct bpf_prog *bpf_prog_get(u32 ufd) - { -@@ -1895,6 +1949,12 @@ bpf_jit_find_kfunc_model(const struct bpf_prog *prog, - { - return NULL; - } ++static int io_eventfd_unregister(struct io_ring_ctx *ctx) ++{ ++ if (ctx->cq_ev_fd) { ++ eventfd_ctx_put(ctx->cq_ev_fd); ++ ctx->cq_ev_fd = NULL; ++ return 0; ++ } + -+static inline bool unprivileged_ebpf_enabled(void) ++ return -ENXIO; ++} ++ ++static void io_destroy_buffers(struct io_ring_ctx *ctx) +{ -+ return false; ++ struct io_buffer *buf; ++ unsigned long index; ++ ++ xa_for_each(&ctx->io_buffers, index, buf) ++ __io_remove_buffers(ctx, buf, index, -1U); +} + - #endif /* CONFIG_BPF_SYSCALL */ - - void __bpf_free_used_btfs(struct bpf_prog_aux *aux, -@@ -1937,6 +1997,8 @@ void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev, - struct net_device *netdev); - bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev); - -+void unpriv_ebpf_notify(int new_state); ++static void io_req_cache_free(struct list_head *list) ++{ ++ struct io_kiocb *req, *nxt; + - #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) - int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr); - -diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h -index 5424124dbe365..5625e19ae95b4 100644 ---- a/include/linux/bpf_verifier.h -+++ b/include/linux/bpf_verifier.h -@@ -18,6 +18,8 @@ - * that converting umax_value to int cannot overflow. - */ - #define BPF_MAX_VAR_SIZ (1 << 29) -+/* size of type_str_buf in bpf_verifier. */ -+#define TYPE_STR_BUF_LEN 64 - - /* Liveness marks, used for registers and spilled-regs (in stack slots). - * Read marks propagate upwards until they find a write mark; they record that -@@ -190,6 +192,17 @@ struct bpf_reference_state { - * is used purely to inform the user of a reference leak. - */ - int insn_idx; -+ /* There can be a case like: -+ * main (frame 0) -+ * cb (frame 1) -+ * func (frame 3) -+ * cb (frame 4) -+ * Hence for frame 4, if callback_ref just stored boolean, it would be -+ * impossible to distinguish nested callback refs. Hence store the -+ * frameno and compare that to callback_ref in check_reference_leak when -+ * exiting a callback function. ++ list_for_each_entry_safe(req, nxt, list, inflight_entry) { ++ list_del(&req->inflight_entry); ++ kmem_cache_free(req_cachep, req); ++ } ++} ++ ++static void io_req_caches_free(struct io_ring_ctx *ctx) ++{ ++ struct io_submit_state *state = &ctx->submit_state; ++ ++ mutex_lock(&ctx->uring_lock); ++ ++ if (state->free_reqs) { ++ kmem_cache_free_bulk(req_cachep, state->free_reqs, state->reqs); ++ state->free_reqs = 0; ++ } ++ ++ io_flush_cached_locked_reqs(ctx, state); ++ io_req_cache_free(&state->free_list); ++ mutex_unlock(&ctx->uring_lock); ++} ++ ++static void io_wait_rsrc_data(struct io_rsrc_data *data) ++{ ++ if (data && !atomic_dec_and_test(&data->refs)) ++ wait_for_completion(&data->done); ++} ++ ++static void io_ring_ctx_free(struct io_ring_ctx *ctx) ++{ ++ io_sq_thread_finish(ctx); ++ ++ /* __io_rsrc_put_work() may need uring_lock to progress, wait w/o it */ ++ io_wait_rsrc_data(ctx->buf_data); ++ io_wait_rsrc_data(ctx->file_data); ++ ++ mutex_lock(&ctx->uring_lock); ++ if (ctx->buf_data) ++ __io_sqe_buffers_unregister(ctx); ++ if (ctx->file_data) ++ __io_sqe_files_unregister(ctx); ++ if (ctx->rings) ++ __io_cqring_overflow_flush(ctx, true); ++ mutex_unlock(&ctx->uring_lock); ++ io_eventfd_unregister(ctx); ++ io_destroy_buffers(ctx); ++ if (ctx->sq_creds) ++ put_cred(ctx->sq_creds); ++ ++ /* there are no registered resources left, nobody uses it */ ++ if (ctx->rsrc_node) ++ io_rsrc_node_destroy(ctx->rsrc_node); ++ if (ctx->rsrc_backup_node) ++ io_rsrc_node_destroy(ctx->rsrc_backup_node); ++ flush_delayed_work(&ctx->rsrc_put_work); ++ ++ WARN_ON_ONCE(!list_empty(&ctx->rsrc_ref_list)); ++ WARN_ON_ONCE(!llist_empty(&ctx->rsrc_put_llist)); ++ ++#if defined(CONFIG_UNIX) ++ if (ctx->ring_sock) { ++ ctx->ring_sock->file = NULL; /* so that iput() is called */ ++ sock_release(ctx->ring_sock); ++ } ++#endif ++ WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list)); ++ ++ if (ctx->mm_account) { ++ mmdrop(ctx->mm_account); ++ ctx->mm_account = NULL; ++ } ++ ++ io_mem_free(ctx->rings); ++ io_mem_free(ctx->sq_sqes); ++ ++ percpu_ref_exit(&ctx->refs); ++ free_uid(ctx->user); ++ io_req_caches_free(ctx); ++ if (ctx->hash_map) ++ io_wq_put_hash(ctx->hash_map); ++ kfree(ctx->cancel_hash); ++ kfree(ctx->dummy_ubuf); ++ kfree(ctx); ++} ++ ++static __poll_t io_uring_poll(struct file *file, poll_table *wait) ++{ ++ struct io_ring_ctx *ctx = file->private_data; ++ __poll_t mask = 0; ++ ++ poll_wait(file, &ctx->poll_wait, wait); ++ /* ++ * synchronizes with barrier from wq_has_sleeper call in ++ * io_commit_cqring + */ -+ int callback_ref; - }; - - /* state of the program: -@@ -396,6 +409,13 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) - log->level == BPF_LOG_KERNEL); - } - -+static inline bool -+bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log) ++ smp_rmb(); ++ if (!io_sqring_full(ctx)) ++ mask |= EPOLLOUT | EPOLLWRNORM; ++ ++ /* ++ * Don't flush cqring overflow list here, just do a simple check. ++ * Otherwise there could possible be ABBA deadlock: ++ * CPU0 CPU1 ++ * ---- ---- ++ * lock(&ctx->uring_lock); ++ * lock(&ep->mtx); ++ * lock(&ctx->uring_lock); ++ * lock(&ep->mtx); ++ * ++ * Users may get EPOLLIN meanwhile seeing nothing in cqring, this ++ * pushs them to do the flush. ++ */ ++ if (io_cqring_events(ctx) || test_bit(0, &ctx->check_cq_overflow)) ++ mask |= EPOLLIN | EPOLLRDNORM; ++ ++ return mask; ++} ++ ++static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id) +{ -+ return log->len_total >= 128 && log->len_total <= UINT_MAX >> 2 && -+ log->level && log->ubuf && !(log->level & ~BPF_LOG_MASK); ++ const struct cred *creds; ++ ++ creds = xa_erase(&ctx->personalities, id); ++ if (creds) { ++ put_cred(creds); ++ return 0; ++ } ++ ++ return -EINVAL; +} + - #define BPF_MAX_SUBPROGS 256 - - struct bpf_subprog_info { -@@ -467,6 +487,8 @@ struct bpf_verifier_env { - /* longest register parentage chain walked for liveness marking */ - u32 longest_mark_read_walk; - bpfptr_t fd_array; -+ /* buffer used in reg_type_str() to generate reg_type string */ -+ char type_str_buf[TYPE_STR_BUF_LEN]; - }; - - __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log, -@@ -528,4 +550,18 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, - u32 btf_id, - struct bpf_attach_target_info *tgt_info); - -+#define BPF_BASE_TYPE_MASK GENMASK(BPF_BASE_TYPE_BITS - 1, 0) ++struct io_tctx_exit { ++ struct callback_head task_work; ++ struct completion completion; ++ struct io_ring_ctx *ctx; ++}; + -+/* extract base type from bpf_{arg, return, reg}_type. */ -+static inline u32 base_type(u32 type) ++static void io_tctx_exit_cb(struct callback_head *cb) +{ -+ return type & BPF_BASE_TYPE_MASK; ++ struct io_uring_task *tctx = current->io_uring; ++ struct io_tctx_exit *work; ++ ++ work = container_of(cb, struct io_tctx_exit, task_work); ++ /* ++ * When @in_idle, we're in cancellation and it's racy to remove the ++ * node. It'll be removed by the end of cancellation, just ignore it. ++ * tctx can be NULL if the queueing of this task_work raced with ++ * work cancelation off the exec path. ++ */ ++ if (tctx && !atomic_read(&tctx->in_idle)) ++ io_uring_del_tctx_node((unsigned long)work->ctx); ++ complete(&work->completion); +} + -+/* extract flags from an extended type. See bpf_type_flag in bpf.h. */ -+static inline u32 type_flag(u32 type) ++static bool io_cancel_ctx_cb(struct io_wq_work *work, void *data) +{ -+ return type & ~BPF_BASE_TYPE_MASK; ++ struct io_kiocb *req = container_of(work, struct io_kiocb, work); ++ ++ return req->ctx == data; +} + - #endif /* _LINUX_BPF_VERIFIER_H */ -diff --git a/include/linux/bpfptr.h b/include/linux/bpfptr.h -index 546e27fc6d462..ee28d2b0a3091 100644 ---- a/include/linux/bpfptr.h -+++ b/include/linux/bpfptr.h -@@ -48,7 +48,9 @@ static inline void bpfptr_add(bpfptr_t *bpfptr, size_t val) - static inline int copy_from_bpfptr_offset(void *dst, bpfptr_t src, - size_t offset, size_t size) - { -- return copy_from_sockptr_offset(dst, (sockptr_t) src, offset, size); -+ if (!bpfptr_is_kernel(src)) -+ return copy_from_user(dst, src.user + offset, size); -+ return copy_from_kernel_nofault(dst, src.kernel + offset, size); - } - - static inline int copy_from_bpfptr(void *dst, bpfptr_t src, size_t size) -@@ -77,7 +79,9 @@ static inline void *kvmemdup_bpfptr(bpfptr_t src, size_t len) - - static inline long strncpy_from_bpfptr(char *dst, bpfptr_t src, size_t count) - { -- return strncpy_from_sockptr(dst, (sockptr_t) src, count); -+ if (bpfptr_is_kernel(src)) -+ return strncpy_from_kernel_nofault(dst, src.kernel, count); -+ return strncpy_from_user(dst, src.user, count); - } - - #endif /* _LINUX_BPFPTR_H */ -diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h -index 36f33685c8c00..25b4263d66d70 100644 ---- a/include/linux/buffer_head.h -+++ b/include/linux/buffer_head.h -@@ -117,7 +117,6 @@ static __always_inline int test_clear_buffer_##name(struct buffer_head *bh) \ - * of the form "mark_buffer_foo()". These are higher-level functions which - * do something in addition to setting a b_state bit. - */ --BUFFER_FNS(Uptodate, uptodate) - BUFFER_FNS(Dirty, dirty) - TAS_BUFFER_FNS(Dirty, dirty) - BUFFER_FNS(Lock, locked) -@@ -135,6 +134,41 @@ BUFFER_FNS(Meta, meta) - BUFFER_FNS(Prio, prio) - BUFFER_FNS(Defer_Completion, defer_completion) - -+static __always_inline void set_buffer_uptodate(struct buffer_head *bh) ++static void io_ring_exit_work(struct work_struct *work) +{ ++ struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx, exit_work); ++ unsigned long timeout = jiffies + HZ * 60 * 5; ++ unsigned long interval = HZ / 20; ++ struct io_tctx_exit exit; ++ struct io_tctx_node *node; ++ int ret; ++ + /* -+ * If somebody else already set this uptodate, they will -+ * have done the memory barrier, and a reader will thus -+ * see *some* valid buffer state. -+ * -+ * Any other serialization (with IO errors or whatever that -+ * might clear the bit) has to come from other state (eg BH_Lock). ++ * If we're doing polled IO and end up having requests being ++ * submitted async (out-of-line), then completions can come in while ++ * we're waiting for refs to drop. We need to reap these manually, ++ * as nobody else will be looking for them. + */ -+ if (test_bit(BH_Uptodate, &bh->b_state)) -+ return; ++ do { ++ io_uring_try_cancel_requests(ctx, NULL, true); ++ if (ctx->sq_data) { ++ struct io_sq_data *sqd = ctx->sq_data; ++ struct task_struct *tsk; ++ ++ io_sq_thread_park(sqd); ++ tsk = sqd->thread; ++ if (tsk && tsk->io_uring && tsk->io_uring->io_wq) ++ io_wq_cancel_cb(tsk->io_uring->io_wq, ++ io_cancel_ctx_cb, ctx, true); ++ io_sq_thread_unpark(sqd); ++ } ++ ++ if (WARN_ON_ONCE(time_after(jiffies, timeout))) { ++ /* there is little hope left, don't run it too often */ ++ interval = HZ * 60; ++ } ++ } while (!wait_for_completion_timeout(&ctx->ref_comp, interval)); + ++ init_completion(&exit.completion); ++ init_task_work(&exit.task_work, io_tctx_exit_cb); ++ exit.ctx = ctx; + /* -+ * make it consistent with folio_mark_uptodate -+ * pairs with smp_load_acquire in buffer_uptodate ++ * Some may use context even when all refs and requests have been put, ++ * and they are free to do so while still holding uring_lock or ++ * completion_lock, see io_req_task_submit(). Apart from other work, ++ * this lock/unlock section also waits them to finish. + */ -+ smp_mb__before_atomic(); -+ set_bit(BH_Uptodate, &bh->b_state); ++ mutex_lock(&ctx->uring_lock); ++ while (!list_empty(&ctx->tctx_list)) { ++ WARN_ON_ONCE(time_after(jiffies, timeout)); ++ ++ node = list_first_entry(&ctx->tctx_list, struct io_tctx_node, ++ ctx_node); ++ /* don't spin on a single task if cancellation failed */ ++ list_rotate_left(&ctx->tctx_list); ++ ret = task_work_add(node->task, &exit.task_work, TWA_SIGNAL); ++ if (WARN_ON_ONCE(ret)) ++ continue; ++ wake_up_process(node->task); ++ ++ mutex_unlock(&ctx->uring_lock); ++ wait_for_completion(&exit.completion); ++ mutex_lock(&ctx->uring_lock); ++ } ++ mutex_unlock(&ctx->uring_lock); ++ spin_lock(&ctx->completion_lock); ++ spin_unlock(&ctx->completion_lock); ++ ++ io_ring_ctx_free(ctx); +} + -+static __always_inline void clear_buffer_uptodate(struct buffer_head *bh) ++/* Returns true if we found and killed one or more timeouts */ ++static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk, ++ bool cancel_all) +{ -+ clear_bit(BH_Uptodate, &bh->b_state); ++ struct io_kiocb *req, *tmp; ++ int canceled = 0; ++ ++ spin_lock(&ctx->completion_lock); ++ spin_lock_irq(&ctx->timeout_lock); ++ list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) { ++ if (io_match_task(req, tsk, cancel_all)) { ++ io_kill_timeout(req, -ECANCELED); ++ canceled++; ++ } ++ } ++ spin_unlock_irq(&ctx->timeout_lock); ++ if (canceled != 0) ++ io_commit_cqring(ctx); ++ spin_unlock(&ctx->completion_lock); ++ if (canceled != 0) ++ io_cqring_ev_posted(ctx); ++ return canceled != 0; +} + -+static __always_inline int buffer_uptodate(const struct buffer_head *bh) ++static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) +{ -+ /* -+ * make it consistent with folio_test_uptodate -+ * pairs with smp_mb__before_atomic in set_buffer_uptodate -+ */ -+ return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0; -+} ++ unsigned long index; ++ struct creds *creds; + - #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) - - /* If we *know* page->private refers to buffer_heads */ -diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h -new file mode 100644 -index 0000000000000..a075b70b9a70c ---- /dev/null -+++ b/include/linux/cc_platform.h -@@ -0,0 +1,88 @@ -+/* SPDX-License-Identifier: GPL-2.0-only */ -+/* -+ * Confidential Computing Platform Capability checks -+ * -+ * Copyright (C) 2021 Advanced Micro Devices, Inc. -+ * -+ * Author: Tom Lendacky <thomas.lendacky@amd.com> -+ */ ++ mutex_lock(&ctx->uring_lock); ++ percpu_ref_kill(&ctx->refs); ++ if (ctx->rings) ++ __io_cqring_overflow_flush(ctx, true); ++ xa_for_each(&ctx->personalities, index, creds) ++ io_unregister_personality(ctx, index); ++ mutex_unlock(&ctx->uring_lock); + -+#ifndef _LINUX_CC_PLATFORM_H -+#define _LINUX_CC_PLATFORM_H ++ io_kill_timeouts(ctx, NULL, true); ++ io_poll_remove_all(ctx, NULL, true); + -+#include <linux/types.h> -+#include <linux/stddef.h> ++ /* if we failed setting up the ctx, we might not have any rings */ ++ io_iopoll_try_reap_events(ctx); + -+/** -+ * enum cc_attr - Confidential computing attributes -+ * -+ * These attributes represent confidential computing features that are -+ * currently active. -+ */ -+enum cc_attr { -+ /** -+ * @CC_ATTR_MEM_ENCRYPT: Memory encryption is active -+ * -+ * The platform/OS is running with active memory encryption. This -+ * includes running either as a bare-metal system or a hypervisor -+ * and actively using memory encryption or as a guest/virtual machine -+ * and actively using memory encryption. -+ * -+ * Examples include SME, SEV and SEV-ES. -+ */ -+ CC_ATTR_MEM_ENCRYPT, ++ /* drop cached put refs after potentially doing completions */ ++ if (current->io_uring) ++ io_uring_drop_tctx_refs(current); + -+ /** -+ * @CC_ATTR_HOST_MEM_ENCRYPT: Host memory encryption is active -+ * -+ * The platform/OS is running as a bare-metal system or a hypervisor -+ * and actively using memory encryption. -+ * -+ * Examples include SME. ++ INIT_WORK(&ctx->exit_work, io_ring_exit_work); ++ /* ++ * Use system_unbound_wq to avoid spawning tons of event kworkers ++ * if we're exiting a ton of rings at the same time. It just adds ++ * noise and overhead, there's no discernable change in runtime ++ * over using system_wq. + */ -+ CC_ATTR_HOST_MEM_ENCRYPT, ++ queue_work(system_unbound_wq, &ctx->exit_work); ++} + -+ /** -+ * @CC_ATTR_GUEST_MEM_ENCRYPT: Guest memory encryption is active -+ * -+ * The platform/OS is running as a guest/virtual machine and actively -+ * using memory encryption. -+ * -+ * Examples include SEV and SEV-ES. -+ */ -+ CC_ATTR_GUEST_MEM_ENCRYPT, ++static int io_uring_release(struct inode *inode, struct file *file) ++{ ++ struct io_ring_ctx *ctx = file->private_data; + -+ /** -+ * @CC_ATTR_GUEST_STATE_ENCRYPT: Guest state encryption is active -+ * -+ * The platform/OS is running as a guest/virtual machine and actively -+ * using memory encryption and register state encryption. -+ * -+ * Examples include SEV-ES. -+ */ -+ CC_ATTR_GUEST_STATE_ENCRYPT, ++ file->private_data = NULL; ++ io_ring_ctx_wait_and_kill(ctx); ++ return 0; ++} ++ ++struct io_task_cancel { ++ struct task_struct *task; ++ bool all; +}; + -+#ifdef CONFIG_ARCH_HAS_CC_PLATFORM ++static bool io_cancel_task_cb(struct io_wq_work *work, void *data) ++{ ++ struct io_kiocb *req = container_of(work, struct io_kiocb, work); ++ struct io_task_cancel *cancel = data; + -+/** -+ * cc_platform_has() - Checks if the specified cc_attr attribute is active -+ * @attr: Confidential computing attribute to check -+ * -+ * The cc_platform_has() function will return an indicator as to whether the -+ * specified Confidential Computing attribute is currently active. -+ * -+ * Context: Any context -+ * Return: -+ * * TRUE - Specified Confidential Computing attribute is active -+ * * FALSE - Specified Confidential Computing attribute is not active -+ */ -+bool cc_platform_has(enum cc_attr attr); ++ return io_match_task_safe(req, cancel->task, cancel->all); ++} + -+#else /* !CONFIG_ARCH_HAS_CC_PLATFORM */ ++static bool io_cancel_defer_files(struct io_ring_ctx *ctx, ++ struct task_struct *task, bool cancel_all) ++{ ++ struct io_defer_entry *de; ++ LIST_HEAD(list); + -+static inline bool cc_platform_has(enum cc_attr attr) { return false; } ++ spin_lock(&ctx->completion_lock); ++ list_for_each_entry_reverse(de, &ctx->defer_list, list) { ++ if (io_match_task_safe(de->req, task, cancel_all)) { ++ list_cut_position(&list, &ctx->defer_list, &de->list); ++ break; ++ } ++ } ++ spin_unlock(&ctx->completion_lock); ++ if (list_empty(&list)) ++ return false; + -+#endif /* CONFIG_ARCH_HAS_CC_PLATFORM */ ++ while (!list_empty(&list)) { ++ de = list_first_entry(&list, struct io_defer_entry, list); ++ list_del_init(&de->list); ++ io_req_complete_failed(de->req, -ECANCELED); ++ kfree(de); ++ } ++ return true; ++} + -+#endif /* _LINUX_CC_PLATFORM_H */ -diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h -index 83fa08a065071..787fff5ec7f58 100644 ---- a/include/linux/ceph/osd_client.h -+++ b/include/linux/ceph/osd_client.h -@@ -287,6 +287,9 @@ struct ceph_osd_linger_request { - rados_watcherrcb_t errcb; - void *data; - -+ struct ceph_pagelist *request_pl; -+ struct page **notify_id_pages; ++static bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx) ++{ ++ struct io_tctx_node *node; ++ enum io_wq_cancel cret; ++ bool ret = false; + - struct page ***preply_pages; - size_t *preply_len; - }; -diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h -index db2e147e069fe..cd8b8bd5ec4d5 100644 ---- a/include/linux/cgroup-defs.h -+++ b/include/linux/cgroup-defs.h -@@ -264,7 +264,8 @@ struct css_set { - * List of csets participating in the on-going migration either as - * source or destination. Protected by cgroup_mutex. - */ -- struct list_head mg_preload_node; -+ struct list_head mg_src_preload_node; -+ struct list_head mg_dst_preload_node; - struct list_head mg_node; - - /* -diff --git a/include/linux/compat.h b/include/linux/compat.h -index 1c758b0e03598..01fddf72a81f0 100644 ---- a/include/linux/compat.h -+++ b/include/linux/compat.h -@@ -235,6 +235,7 @@ typedef struct compat_siginfo { - struct { - compat_ulong_t _data; - u32 _type; -+ u32 _flags; - } _perf; - }; - } _sigfault; -diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h -index bd2b881c6b63a..b9d5f9c373a09 100644 ---- a/include/linux/compiler-gcc.h -+++ b/include/linux/compiler-gcc.h -@@ -144,3 +144,11 @@ - #else - #define __diag_GCC_8(s) - #endif ++ mutex_lock(&ctx->uring_lock); ++ list_for_each_entry(node, &ctx->tctx_list, ctx_node) { ++ struct io_uring_task *tctx = node->task->io_uring; + -+/* -+ * Prior to 9.1, -Wno-alloc-size-larger-than (and therefore the "alloc_size" -+ * attribute) do not work, and must be disabled. -+ */ -+#if GCC_VERSION < 90100 -+#undef __alloc_size__ -+#endif -diff --git a/include/linux/compiler.h b/include/linux/compiler.h -index 3d5af56337bdb..0f7fd205ab7ea 100644 ---- a/include/linux/compiler.h -+++ b/include/linux/compiler.h -@@ -117,40 +117,29 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, - */ - #define __stringify_label(n) #n - --#define __annotate_reachable(c) ({ \ -- asm volatile(__stringify_label(c) ":\n\t" \ -- ".pushsection .discard.reachable\n\t" \ -- ".long " __stringify_label(c) "b - .\n\t" \ -- ".popsection\n\t"); \ --}) --#define annotate_reachable() __annotate_reachable(__COUNTER__) -- - #define __annotate_unreachable(c) ({ \ - asm volatile(__stringify_label(c) ":\n\t" \ - ".pushsection .discard.unreachable\n\t" \ - ".long " __stringify_label(c) "b - .\n\t" \ -- ".popsection\n\t"); \ -+ ".popsection\n\t" : : "i" (c)); \ - }) - #define annotate_unreachable() __annotate_unreachable(__COUNTER__) - --#define ASM_UNREACHABLE \ -- "999:\n\t" \ -- ".pushsection .discard.unreachable\n\t" \ -- ".long 999b - .\n\t" \ -+#define ASM_REACHABLE \ -+ "998:\n\t" \ -+ ".pushsection .discard.reachable\n\t" \ -+ ".long 998b - .\n\t" \ - ".popsection\n\t" - - /* Annotate a C jump table to allow objtool to follow the code flow */ - #define __annotate_jump_table __section(".rodata..c_jump_table") - - #else --#define annotate_reachable() - #define annotate_unreachable() -+# define ASM_REACHABLE - #define __annotate_jump_table - #endif - --#ifndef ASM_UNREACHABLE --# define ASM_UNREACHABLE --#endif - #ifndef unreachable - # define unreachable() do { \ - annotate_unreachable(); \ -diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h -index e6ec634039658..3de06a8fae73b 100644 ---- a/include/linux/compiler_attributes.h -+++ b/include/linux/compiler_attributes.h -@@ -33,6 +33,15 @@ - #define __aligned(x) __attribute__((__aligned__(x))) - #define __aligned_largest __attribute__((__aligned__)) - -+/* -+ * Note: do not use this directly. Instead, use __alloc_size() since it is conditionally -+ * available and includes other attributes. -+ * -+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-alloc_005fsize-function-attribute -+ * clang: https://clang.llvm.org/docs/AttributeReference.html#alloc-size -+ */ -+#define __alloc_size__(x, ...) __attribute__((__alloc_size__(x, ## __VA_ARGS__))) ++ /* ++ * io_wq will stay alive while we hold uring_lock, because it's ++ * killed after ctx nodes, which requires to take the lock. ++ */ ++ if (!tctx || !tctx->io_wq) ++ continue; ++ cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_ctx_cb, ctx, true); ++ ret |= (cret != IO_WQ_CANCEL_NOTFOUND); ++ } ++ mutex_unlock(&ctx->uring_lock); + - /* - * Note: users of __always_inline currently do not write "inline" themselves, - * which seems to be required by gcc to apply the attribute according -@@ -153,6 +162,7 @@ - - /* - * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-malloc-function-attribute -+ * clang: https://clang.llvm.org/docs/AttributeReference.html#malloc - */ - #define __malloc __attribute__((__malloc__)) - -diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h -index b6ff83a714ca9..4f2203c4a2574 100644 ---- a/include/linux/compiler_types.h -+++ b/include/linux/compiler_types.h -@@ -250,6 +250,18 @@ struct ftrace_likely_data { - # define __cficanonical - #endif - -+/* -+ * Any place that could be marked with the "alloc_size" attribute is also -+ * a place to be marked with the "malloc" attribute. Do this as part of the -+ * __alloc_size macro to avoid redundant attributes and to avoid missing a -+ * __malloc marking. -+ */ -+#ifdef __alloc_size__ -+# define __alloc_size(x, ...) __alloc_size__(x, ## __VA_ARGS__) __malloc -+#else -+# define __alloc_size(x, ...) __malloc -+#endif ++ return ret; ++} + - #ifndef asm_volatile_goto - #define asm_volatile_goto(x...) asm goto(x) - #endif -diff --git a/include/linux/console.h b/include/linux/console.h -index 20874db50bc8a..a97f277cfdfa3 100644 ---- a/include/linux/console.h -+++ b/include/linux/console.h -@@ -149,6 +149,8 @@ struct console { - short flags; - short index; - int cflag; -+ uint ispeed; -+ uint ospeed; - void *data; - struct console *next; - }; -diff --git a/include/linux/coredump.h b/include/linux/coredump.h -index 78fcd776b185a..4b95e46d215f1 100644 ---- a/include/linux/coredump.h -+++ b/include/linux/coredump.h -@@ -12,6 +12,8 @@ struct core_vma_metadata { - unsigned long start, end; - unsigned long flags; - unsigned long dump_size; -+ unsigned long pgoff; -+ struct file *file; - }; - - extern int core_uses_pid; -@@ -29,9 +31,6 @@ extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr); - extern int dump_align(struct coredump_params *cprm, int align); - int dump_user_range(struct coredump_params *cprm, unsigned long start, - unsigned long len); --int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count, -- struct core_vma_metadata **vma_meta, -- size_t *vma_data_size_ptr); - extern void do_coredump(const kernel_siginfo_t *siginfo); - #else - static inline void do_coredump(const kernel_siginfo_t *siginfo) {} -diff --git a/include/linux/cpu.h b/include/linux/cpu.h -index 9cf51e41e6972..6102a21a01d9a 100644 ---- a/include/linux/cpu.h -+++ b/include/linux/cpu.h -@@ -65,6 +65,11 @@ extern ssize_t cpu_show_tsx_async_abort(struct device *dev, - extern ssize_t cpu_show_itlb_multihit(struct device *dev, - struct device_attribute *attr, char *buf); - extern ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *buf); -+extern ssize_t cpu_show_mmio_stale_data(struct device *dev, -+ struct device_attribute *attr, -+ char *buf); -+extern ssize_t cpu_show_retbleed(struct device *dev, -+ struct device_attribute *attr, char *buf); - - extern __printf(4, 5) - struct device *cpu_device_create(struct device *parent, void *drvdata, -diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h -index ff88bb3e44fca..66a1f495f01a6 100644 ---- a/include/linux/cpufreq.h -+++ b/include/linux/cpufreq.h -@@ -1041,7 +1041,7 @@ static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_ - if (cpu == pcpu) - continue; - -- ret = parse_perf_domain(pcpu, list_name, cell_name); -+ ret = parse_perf_domain(cpu, list_name, cell_name); - if (ret < 0) - continue; - -diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h -index 991911048857a..c88ccc48877d6 100644 ---- a/include/linux/cpuhotplug.h -+++ b/include/linux/cpuhotplug.h -@@ -99,6 +99,7 @@ enum cpuhp_state { - CPUHP_LUSTRE_CFS_DEAD, - CPUHP_AP_ARM_CACHE_B15_RAC_DEAD, - CPUHP_PADATA_DEAD, -+ CPUHP_RANDOM_PREPARE, - CPUHP_WORKQUEUE_PREP, - CPUHP_POWER_NUMA_PREPARE, - CPUHP_HRTIMERS_PREPARE, -@@ -238,6 +239,7 @@ enum cpuhp_state { - CPUHP_AP_PERF_CSKY_ONLINE, - CPUHP_AP_WATCHDOG_ONLINE, - CPUHP_AP_WORKQUEUE_ONLINE, -+ CPUHP_AP_RANDOM_ONLINE, - CPUHP_AP_RCUTREE_ONLINE, - CPUHP_AP_BASE_CACHEINFO_ONLINE, - CPUHP_AP_ONLINE_DYN, -diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h -index 1e7399fc69c0a..b3c230dea0713 100644 ---- a/include/linux/cpumask.h -+++ b/include/linux/cpumask.h -@@ -1045,4 +1045,23 @@ cpumap_print_list_to_buf(char *buf, const struct cpumask *mask, - [0] = 1UL \ - } } - -+/* -+ * Provide a valid theoretical max size for cpumap and cpulist sysfs files -+ * to avoid breaking userspace which may allocate a buffer based on the size -+ * reported by e.g. fstat. -+ * -+ * for cpumap NR_CPUS * 9/32 - 1 should be an exact length. -+ * -+ * For cpulist 7 is (ceil(log10(NR_CPUS)) + 1) allowing for NR_CPUS to be up -+ * to 2 orders of magnitude larger than 8192. And then we divide by 2 to -+ * cover a worst-case of every other cpu being on one of two nodes for a -+ * very large NR_CPUS. -+ * -+ * Use PAGE_SIZE as a minimum for smaller configurations while avoiding -+ * unsigned comparison to -1. -+ */ -+#define CPUMAP_FILE_MAX_BYTES (((NR_CPUS * 9)/32 > PAGE_SIZE) \ -+ ? (NR_CPUS * 9)/32 - 1 : PAGE_SIZE) -+#define CPULIST_FILE_MAX_BYTES (((NR_CPUS * 7)/2 > PAGE_SIZE) ? (NR_CPUS * 7)/2 : PAGE_SIZE) ++static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx, ++ struct task_struct *task, ++ bool cancel_all) ++{ ++ struct io_task_cancel cancel = { .task = task, .all = cancel_all, }; ++ struct io_uring_task *tctx = task ? task->io_uring : NULL; + - #endif /* __LINUX_CPUMASK_H */ -diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h -index c869f1e73d755..f60674692d365 100644 ---- a/include/linux/debugfs.h -+++ b/include/linux/debugfs.h -@@ -91,6 +91,8 @@ struct dentry *debugfs_create_automount(const char *name, - void debugfs_remove(struct dentry *dentry); - #define debugfs_remove_recursive debugfs_remove - -+void debugfs_lookup_and_remove(const char *name, struct dentry *parent); ++ while (1) { ++ enum io_wq_cancel cret; ++ bool ret = false; + - const struct file_operations *debugfs_real_fops(const struct file *filp); - - int debugfs_file_get(struct dentry *dentry); -@@ -225,6 +227,10 @@ static inline void debugfs_remove(struct dentry *dentry) - static inline void debugfs_remove_recursive(struct dentry *dentry) - { } - -+static inline void debugfs_lookup_and_remove(const char *name, -+ struct dentry *parent) -+{ } ++ if (!task) { ++ ret |= io_uring_try_cancel_iowq(ctx); ++ } else if (tctx && tctx->io_wq) { ++ /* ++ * Cancels requests of all rings, not only @ctx, but ++ * it's fine as the task is in exit/exec. ++ */ ++ cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_task_cb, ++ &cancel, true); ++ ret |= (cret != IO_WQ_CANCEL_NOTFOUND); ++ } + - const struct file_operations *debugfs_real_fops(const struct file *filp); - - static inline int debugfs_file_get(struct dentry *dentry) -diff --git a/include/linux/delay.h b/include/linux/delay.h -index 1d0e2ce6b6d9f..e8607992c68a5 100644 ---- a/include/linux/delay.h -+++ b/include/linux/delay.h -@@ -20,6 +20,7 @@ - */ - - #include <linux/kernel.h> -+#include <linux/sched.h> - - extern unsigned long loops_per_jiffy; - -@@ -58,7 +59,18 @@ void calibrate_delay(void); - void __attribute__((weak)) calibration_delay_done(void); - void msleep(unsigned int msecs); - unsigned long msleep_interruptible(unsigned int msecs); --void usleep_range(unsigned long min, unsigned long max); -+void usleep_range_state(unsigned long min, unsigned long max, -+ unsigned int state); ++ /* SQPOLL thread does its own polling */ ++ if ((!(ctx->flags & IORING_SETUP_SQPOLL) && cancel_all) || ++ (ctx->sq_data && ctx->sq_data->thread == current)) { ++ while (!list_empty_careful(&ctx->iopoll_list)) { ++ io_iopoll_try_reap_events(ctx); ++ ret = true; ++ } ++ } + -+static inline void usleep_range(unsigned long min, unsigned long max) -+{ -+ usleep_range_state(min, max, TASK_UNINTERRUPTIBLE); ++ ret |= io_cancel_defer_files(ctx, task, cancel_all); ++ ret |= io_poll_remove_all(ctx, task, cancel_all); ++ ret |= io_kill_timeouts(ctx, task, cancel_all); ++ if (task) ++ ret |= io_run_task_work(); ++ if (!ret) ++ break; ++ cond_resched(); ++ } +} + -+static inline void usleep_idle_range(unsigned long min, unsigned long max) ++static int __io_uring_add_tctx_node(struct io_ring_ctx *ctx) +{ -+ usleep_range_state(min, max, TASK_IDLE); -+} - - static inline void ssleep(unsigned int seconds) - { -diff --git a/include/linux/dim.h b/include/linux/dim.h -index b698266d00356..6c5733981563e 100644 ---- a/include/linux/dim.h -+++ b/include/linux/dim.h -@@ -21,7 +21,7 @@ - * We consider 10% difference as significant. - */ - #define IS_SIGNIFICANT_DIFF(val, ref) \ -- (((100UL * abs((val) - (ref))) / (ref)) > 10) -+ ((ref) && (((100UL * abs((val) - (ref))) / (ref)) > 10)) - - /* - * Calculate the gap between two values. -diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h -index 8b32b4bdd5908..3ad636a13b8e9 100644 ---- a/include/linux/dma-buf.h -+++ b/include/linux/dma-buf.h -@@ -433,7 +433,7 @@ struct dma_buf { - wait_queue_head_t *poll; - - __poll_t active; -- } cb_excl, cb_shared; -+ } cb_in, cb_out; - #ifdef CONFIG_DMABUF_SYSFS_STATS - /** - * @sysfs_entry: -diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h -index e5c2c9e71bf10..9000f3ffce8b3 100644 ---- a/include/linux/dmaengine.h -+++ b/include/linux/dmaengine.h -@@ -944,10 +944,8 @@ struct dma_device { - void (*device_issue_pending)(struct dma_chan *chan); - void (*device_release)(struct dma_device *dev); - /* debugfs support */ --#ifdef CONFIG_DEBUG_FS - void (*dbg_summary_show)(struct seq_file *s, struct dma_device *dev); - struct dentry *dbg_dev_root; --#endif - }; - - static inline int dmaengine_slave_config(struct dma_chan *chan, -diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h -index 8ae999f587c48..289064b51fa9a 100644 ---- a/include/linux/dsa/ocelot.h -+++ b/include/linux/dsa/ocelot.h -@@ -12,6 +12,7 @@ - struct ocelot_skb_cb { - struct sk_buff *clone; - unsigned int ptp_class; /* valid only for clones */ -+ u32 tstamp_lo; - u8 ptp_cmd; - u8 ts_id; - }; -diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h -index dce631e678dd6..8d9eec5f6d8bb 100644 ---- a/include/linux/dynamic_debug.h -+++ b/include/linux/dynamic_debug.h -@@ -55,9 +55,6 @@ struct _ddebug { - - #if defined(CONFIG_DYNAMIC_DEBUG_CORE) - --/* exported for module authors to exercise >control */ --int dynamic_debug_exec_queries(const char *query, const char *modname); -- - int ddebug_add_module(struct _ddebug *tab, unsigned int n, - const char *modname); - extern int ddebug_remove_module(const char *mod_name); -@@ -201,7 +198,7 @@ static inline int ddebug_remove_module(const char *mod) - static inline int ddebug_dyndbg_module_param_cb(char *param, char *val, - const char *modname) - { -- if (strstr(param, "dyndbg")) { -+ if (!strcmp(param, "dyndbg")) { - /* avoid pr_warn(), which wants pr_fmt() fully defined */ - printk(KERN_WARNING "dyndbg param is supported only in " - "CONFIG_DYNAMIC_DEBUG builds\n"); -@@ -221,12 +218,6 @@ static inline int ddebug_dyndbg_module_param_cb(char *param, char *val, - rowsize, groupsize, buf, len, ascii); \ - } while (0) - --static inline int dynamic_debug_exec_queries(const char *query, const char *modname) --{ -- pr_warn("kernel not built with CONFIG_DYNAMIC_DEBUG_CORE\n"); -- return 0; --} -- - #endif /* !CONFIG_DYNAMIC_DEBUG_CORE */ - - #endif -diff --git a/include/linux/efi.h b/include/linux/efi.h -index 6b5d36babfcc4..d34e8a7ed4d5c 100644 ---- a/include/linux/efi.h -+++ b/include/linux/efi.h -@@ -167,6 +167,8 @@ struct capsule_info { - size_t page_bytes_remain; - }; - -+int efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff, -+ size_t hdr_bytes); - int __efi_capsule_setup_info(struct capsule_info *cap_info); - - /* -@@ -1282,4 +1284,10 @@ static inline struct efi_mokvar_table_entry *efi_mokvar_entry_find( - } - #endif - -+#ifdef CONFIG_SYSFB -+extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt); -+#else -+static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt) { } -+#endif ++ struct io_uring_task *tctx = current->io_uring; ++ struct io_tctx_node *node; ++ int ret; + - #endif /* _LINUX_EFI_H */ -diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h -index c58d504514854..7f28fa702bb72 100644 ---- a/include/linux/etherdevice.h -+++ b/include/linux/etherdevice.h -@@ -127,7 +127,7 @@ static inline bool is_multicast_ether_addr(const u8 *addr) - #endif - } - --static inline bool is_multicast_ether_addr_64bits(const u8 addr[6+2]) -+static inline bool is_multicast_ether_addr_64bits(const u8 *addr) - { - #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 - #ifdef __BIG_ENDIAN -@@ -364,8 +364,7 @@ static inline bool ether_addr_equal(const u8 *addr1, const u8 *addr2) - * Please note that alignment of addr1 & addr2 are only guaranteed to be 16 bits. - */ - --static inline bool ether_addr_equal_64bits(const u8 addr1[6+2], -- const u8 addr2[6+2]) -+static inline bool ether_addr_equal_64bits(const u8 *addr1, const u8 *addr2) - { - #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 - u64 fold = (*(const u64 *)addr1) ^ (*(const u64 *)addr2); -diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h -index 849524b55d89a..3fad741df53ef 100644 ---- a/include/linux/ethtool.h -+++ b/include/linux/ethtool.h -@@ -94,7 +94,7 @@ struct ethtool_link_ext_state_info { - enum ethtool_link_ext_substate_link_logical_mismatch link_logical_mismatch; - enum ethtool_link_ext_substate_bad_signal_integrity bad_signal_integrity; - enum ethtool_link_ext_substate_cable_issue cable_issue; -- u8 __link_ext_substate; -+ u32 __link_ext_substate; - }; - }; - -diff --git a/include/linux/ethtool_netlink.h b/include/linux/ethtool_netlink.h -index 1e7bf78cb3829..aba348d58ff61 100644 ---- a/include/linux/ethtool_netlink.h -+++ b/include/linux/ethtool_netlink.h -@@ -10,6 +10,9 @@ - #define __ETHTOOL_LINK_MODE_MASK_NWORDS \ - DIV_ROUND_UP(__ETHTOOL_LINK_MODE_MASK_NBITS, 32) - -+#define ETHTOOL_PAUSE_STAT_CNT (__ETHTOOL_A_PAUSE_STAT_CNT - \ -+ ETHTOOL_A_PAUSE_STAT_TX_FRAMES) ++ if (unlikely(!tctx)) { ++ ret = io_uring_alloc_task_context(current, ctx); ++ if (unlikely(ret)) ++ return ret; + - enum ethtool_multicast_groups { - ETHNL_MCGRP_MONITOR, - }; -diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h -index 305d5f19093b9..30eb30d6909b0 100644 ---- a/include/linux/eventfd.h -+++ b/include/linux/eventfd.h -@@ -46,7 +46,7 @@ void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt); - - static inline bool eventfd_signal_allowed(void) - { -- return !current->in_eventfd_signal; -+ return !current->in_eventfd; - } - - #else /* CONFIG_EVENTFD */ -diff --git a/include/linux/export.h b/include/linux/export.h -index 27d848712b90b..5910ccb66ca2d 100644 ---- a/include/linux/export.h -+++ b/include/linux/export.h -@@ -2,6 +2,8 @@ - #ifndef _LINUX_EXPORT_H - #define _LINUX_EXPORT_H - -+#include <linux/stringify.h> ++ tctx = current->io_uring; ++ if (ctx->iowq_limits_set) { ++ unsigned int limits[2] = { ctx->iowq_limits[0], ++ ctx->iowq_limits[1], }; + - /* - * Export symbols from the kernel to modules. Forked from module.h - * to reduce the amount of pointless cruft we feed to gcc when only -@@ -154,7 +156,6 @@ struct kernel_symbol { - #endif /* CONFIG_MODULES */ - - #ifdef DEFAULT_SYMBOL_NAMESPACE --#include <linux/stringify.h> - #define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, __stringify(DEFAULT_SYMBOL_NAMESPACE)) - #else - #define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, "") -@@ -162,8 +163,8 @@ struct kernel_symbol { - - #define EXPORT_SYMBOL(sym) _EXPORT_SYMBOL(sym, "") - #define EXPORT_SYMBOL_GPL(sym) _EXPORT_SYMBOL(sym, "_gpl") --#define EXPORT_SYMBOL_NS(sym, ns) __EXPORT_SYMBOL(sym, "", #ns) --#define EXPORT_SYMBOL_NS_GPL(sym, ns) __EXPORT_SYMBOL(sym, "_gpl", #ns) -+#define EXPORT_SYMBOL_NS(sym, ns) __EXPORT_SYMBOL(sym, "", __stringify(ns)) -+#define EXPORT_SYMBOL_NS_GPL(sym, ns) __EXPORT_SYMBOL(sym, "_gpl", __stringify(ns)) - - #endif /* !__ASSEMBLY__ */ - -diff --git a/include/linux/extcon.h b/include/linux/extcon.h -index 0c19010da77fa..685401d94d398 100644 ---- a/include/linux/extcon.h -+++ b/include/linux/extcon.h -@@ -296,7 +296,7 @@ static inline void devm_extcon_unregister_notifier_all(struct device *dev, - - static inline struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name) - { -- return ERR_PTR(-ENODEV); -+ return NULL; - } - - static inline struct extcon_dev *extcon_find_edev_by_node(struct device_node *node) -diff --git a/include/linux/fb.h b/include/linux/fb.h -index 5950f8f5dc74d..3d7306c9a7065 100644 ---- a/include/linux/fb.h -+++ b/include/linux/fb.h -@@ -502,6 +502,7 @@ struct fb_info { - } *apertures; - - bool skip_vt_switch; /* no VT switch on suspend/resume required */ -+ bool forced_out; /* set when being removed by another driver */ - }; - - static inline struct apertures_struct *alloc_apertures(unsigned int max_num) { -@@ -610,6 +611,7 @@ extern int remove_conflicting_pci_framebuffers(struct pci_dev *pdev, - const char *name); - extern int remove_conflicting_framebuffers(struct apertures_struct *a, - const char *name, bool primary); -+extern bool is_firmware_framebuffer(struct apertures_struct *a); - extern int fb_prepare_logo(struct fb_info *fb_info, int rotate); - extern int fb_show_logo(struct fb_info *fb_info, int rotate); - extern char* fb_get_buffer_offset(struct fb_info *info, struct fb_pixmap *buf, u32 size); -diff --git a/include/linux/fbcon.h b/include/linux/fbcon.h -index ff5596dd30f85..2382dec6d6ab8 100644 ---- a/include/linux/fbcon.h -+++ b/include/linux/fbcon.h -@@ -15,6 +15,8 @@ void fbcon_new_modelist(struct fb_info *info); - void fbcon_get_requirement(struct fb_info *info, - struct fb_blit_caps *caps); - void fbcon_fb_blanked(struct fb_info *info, int blank); -+int fbcon_modechange_possible(struct fb_info *info, -+ struct fb_var_screeninfo *var); - void fbcon_update_vcs(struct fb_info *info, bool all); - void fbcon_remap_all(struct fb_info *info); - int fbcon_set_con2fb_map_ioctl(void __user *argp); -@@ -33,6 +35,8 @@ static inline void fbcon_new_modelist(struct fb_info *info) {} - static inline void fbcon_get_requirement(struct fb_info *info, - struct fb_blit_caps *caps) {} - static inline void fbcon_fb_blanked(struct fb_info *info, int blank) {} -+static inline int fbcon_modechange_possible(struct fb_info *info, -+ struct fb_var_screeninfo *var) { return 0; } - static inline void fbcon_update_vcs(struct fb_info *info, bool all) {} - static inline void fbcon_remap_all(struct fb_info *info) {} - static inline int fbcon_set_con2fb_map_ioctl(void __user *argp) { return 0; } -diff --git a/include/linux/filter.h b/include/linux/filter.h -index ef03ff34234d8..a9956b681f090 100644 ---- a/include/linux/filter.h -+++ b/include/linux/filter.h -@@ -554,9 +554,9 @@ struct bpf_binary_header { - }; - - struct bpf_prog_stats { -- u64 cnt; -- u64 nsecs; -- u64 misses; -+ u64_stats_t cnt; -+ u64_stats_t nsecs; -+ u64_stats_t misses; - struct u64_stats_sync syncp; - } __aligned(2 * sizeof(u64)); - -@@ -613,13 +613,14 @@ static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog, - if (static_branch_unlikely(&bpf_stats_enabled_key)) { - struct bpf_prog_stats *stats; - u64 start = sched_clock(); -+ unsigned long flags; - - ret = dfunc(ctx, prog->insnsi, prog->bpf_func); - stats = this_cpu_ptr(prog->stats); -- u64_stats_update_begin(&stats->syncp); -- stats->cnt++; -- stats->nsecs += sched_clock() - start; -- u64_stats_update_end(&stats->syncp); -+ flags = u64_stats_update_begin_irqsave(&stats->syncp); -+ u64_stats_inc(&stats->cnt); -+ u64_stats_add(&stats->nsecs, sched_clock() - start); -+ u64_stats_update_end_irqrestore(&stats->syncp, flags); - } else { - ret = dfunc(ctx, prog->insnsi, prog->bpf_func); - } -@@ -638,9 +639,6 @@ static __always_inline u32 bpf_prog_run(const struct bpf_prog *prog, const void - * This uses migrate_disable/enable() explicitly to document that the - * invocation of a BPF program does not require reentrancy protection - * against a BPF program which is invoked from a preempting task. -- * -- * For non RT enabled kernels migrate_disable/enable() maps to -- * preempt_disable/enable(), i.e. it disables also preemption. - */ - static inline u32 bpf_prog_run_pin_on_cpu(const struct bpf_prog *prog, - const void *ctx) -diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h -index c1be37437e778..0c70febd03e95 100644 ---- a/include/linux/fortify-string.h -+++ b/include/linux/fortify-string.h -@@ -280,7 +280,10 @@ __FORTIFY_INLINE char *strcpy(char *p, const char *q) - if (p_size == (size_t)-1 && q_size == (size_t)-1) - return __underlying_strcpy(p, q); - size = strlen(q) + 1; -- /* test here to use the more stringent object size */ -+ /* Compile-time check for const size overflow. */ -+ if (__builtin_constant_p(size) && p_size < size) -+ __write_overflow(); -+ /* Run-time check for dynamic size overflow. */ - if (p_size < size) - fortify_panic(__func__); - memcpy(p, q, size); -diff --git a/include/linux/fs.h b/include/linux/fs.h -index e7a633353fd20..fd4c450dc6128 100644 ---- a/include/linux/fs.h -+++ b/include/linux/fs.h -@@ -41,6 +41,7 @@ - #include <linux/stddef.h> - #include <linux/mount.h> - #include <linux/cred.h> -+#include <linux/mnt_idmapping.h> - - #include <asm/byteorder.h> - #include <uapi/linux/fs.h> -@@ -1601,6 +1602,11 @@ struct super_block { - struct list_head s_inodes_wb; /* writeback inodes */ - } __randomize_layout; - -+static inline struct user_namespace *i_user_ns(const struct inode *inode) -+{ -+ return inode->i_sb->s_user_ns; ++ ret = io_wq_max_workers(tctx->io_wq, limits); ++ if (ret) ++ return ret; ++ } ++ } ++ if (!xa_load(&tctx->xa, (unsigned long)ctx)) { ++ node = kmalloc(sizeof(*node), GFP_KERNEL); ++ if (!node) ++ return -ENOMEM; ++ node->ctx = ctx; ++ node->task = current; ++ ++ ret = xa_err(xa_store(&tctx->xa, (unsigned long)ctx, ++ node, GFP_KERNEL)); ++ if (ret) { ++ kfree(node); ++ return ret; ++ } ++ ++ mutex_lock(&ctx->uring_lock); ++ list_add(&node->ctx_node, &ctx->tctx_list); ++ mutex_unlock(&ctx->uring_lock); ++ } ++ tctx->last = ctx; ++ return 0; +} + - /* Helper functions so that in most cases filesystems will - * not need to deal directly with kuid_t and kgid_t and can - * instead deal with the raw numeric values that are stored -@@ -1608,50 +1614,22 @@ struct super_block { - */ - static inline uid_t i_uid_read(const struct inode *inode) - { -- return from_kuid(inode->i_sb->s_user_ns, inode->i_uid); -+ return from_kuid(i_user_ns(inode), inode->i_uid); - } - - static inline gid_t i_gid_read(const struct inode *inode) - { -- return from_kgid(inode->i_sb->s_user_ns, inode->i_gid); -+ return from_kgid(i_user_ns(inode), inode->i_gid); - } - - static inline void i_uid_write(struct inode *inode, uid_t uid) - { -- inode->i_uid = make_kuid(inode->i_sb->s_user_ns, uid); -+ inode->i_uid = make_kuid(i_user_ns(inode), uid); - } - - static inline void i_gid_write(struct inode *inode, gid_t gid) - { -- inode->i_gid = make_kgid(inode->i_sb->s_user_ns, gid); --} -- --/** -- * kuid_into_mnt - map a kuid down into a mnt_userns -- * @mnt_userns: user namespace of the relevant mount -- * @kuid: kuid to be mapped -- * -- * Return: @kuid mapped according to @mnt_userns. -- * If @kuid has no mapping INVALID_UID is returned. -- */ --static inline kuid_t kuid_into_mnt(struct user_namespace *mnt_userns, -- kuid_t kuid) --{ -- return make_kuid(mnt_userns, __kuid_val(kuid)); --} -- --/** -- * kgid_into_mnt - map a kgid down into a mnt_userns -- * @mnt_userns: user namespace of the relevant mount -- * @kgid: kgid to be mapped -- * -- * Return: @kgid mapped according to @mnt_userns. -- * If @kgid has no mapping INVALID_GID is returned. -- */ --static inline kgid_t kgid_into_mnt(struct user_namespace *mnt_userns, -- kgid_t kgid) --{ -- return make_kgid(mnt_userns, __kgid_val(kgid)); -+ inode->i_gid = make_kgid(i_user_ns(inode), gid); - } - - /** -@@ -1665,7 +1643,7 @@ static inline kgid_t kgid_into_mnt(struct user_namespace *mnt_userns, - static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns, - const struct inode *inode) - { -- return kuid_into_mnt(mnt_userns, inode->i_uid); -+ return mapped_kuid_fs(mnt_userns, i_user_ns(inode), inode->i_uid); - } - - /** -@@ -1679,69 +1657,7 @@ static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns, - static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns, - const struct inode *inode) - { -- return kgid_into_mnt(mnt_userns, inode->i_gid); --} -- --/** -- * kuid_from_mnt - map a kuid up into a mnt_userns -- * @mnt_userns: user namespace of the relevant mount -- * @kuid: kuid to be mapped -- * -- * Return: @kuid mapped up according to @mnt_userns. -- * If @kuid has no mapping INVALID_UID is returned. -- */ --static inline kuid_t kuid_from_mnt(struct user_namespace *mnt_userns, -- kuid_t kuid) --{ -- return KUIDT_INIT(from_kuid(mnt_userns, kuid)); --} -- --/** -- * kgid_from_mnt - map a kgid up into a mnt_userns -- * @mnt_userns: user namespace of the relevant mount -- * @kgid: kgid to be mapped -- * -- * Return: @kgid mapped up according to @mnt_userns. -- * If @kgid has no mapping INVALID_GID is returned. -- */ --static inline kgid_t kgid_from_mnt(struct user_namespace *mnt_userns, -- kgid_t kgid) --{ -- return KGIDT_INIT(from_kgid(mnt_userns, kgid)); --} -- --/** -- * mapped_fsuid - return caller's fsuid mapped up into a mnt_userns -- * @mnt_userns: user namespace of the relevant mount -- * -- * Use this helper to initialize a new vfs or filesystem object based on -- * the caller's fsuid. A common example is initializing the i_uid field of -- * a newly allocated inode triggered by a creation event such as mkdir or -- * O_CREAT. Other examples include the allocation of quotas for a specific -- * user. -- * -- * Return: the caller's current fsuid mapped up according to @mnt_userns. -- */ --static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns) --{ -- return kuid_from_mnt(mnt_userns, current_fsuid()); --} -- --/** -- * mapped_fsgid - return caller's fsgid mapped up into a mnt_userns -- * @mnt_userns: user namespace of the relevant mount -- * -- * Use this helper to initialize a new vfs or filesystem object based on -- * the caller's fsgid. A common example is initializing the i_gid field of -- * a newly allocated inode triggered by a creation event such as mkdir or -- * O_CREAT. Other examples include the allocation of quotas for a specific -- * user. -- * -- * Return: the caller's current fsgid mapped up according to @mnt_userns. -- */ --static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns) --{ -- return kgid_from_mnt(mnt_userns, current_fsgid()); -+ return mapped_kgid_fs(mnt_userns, i_user_ns(inode), inode->i_gid); - } - - /** -@@ -1755,7 +1671,7 @@ static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns) - static inline void inode_fsuid_set(struct inode *inode, - struct user_namespace *mnt_userns) - { -- inode->i_uid = mapped_fsuid(mnt_userns); -+ inode->i_uid = mapped_fsuid(mnt_userns, i_user_ns(inode)); - } - - /** -@@ -1769,7 +1685,7 @@ static inline void inode_fsuid_set(struct inode *inode, - static inline void inode_fsgid_set(struct inode *inode, - struct user_namespace *mnt_userns) - { -- inode->i_gid = mapped_fsgid(mnt_userns); -+ inode->i_gid = mapped_fsgid(mnt_userns, i_user_ns(inode)); - } - - /** -@@ -1786,10 +1702,18 @@ static inline void inode_fsgid_set(struct inode *inode, - static inline bool fsuidgid_has_mapping(struct super_block *sb, - struct user_namespace *mnt_userns) - { -- struct user_namespace *s_user_ns = sb->s_user_ns; -+ struct user_namespace *fs_userns = sb->s_user_ns; -+ kuid_t kuid; -+ kgid_t kgid; - -- return kuid_has_mapping(s_user_ns, mapped_fsuid(mnt_userns)) && -- kgid_has_mapping(s_user_ns, mapped_fsgid(mnt_userns)); -+ kuid = mapped_fsuid(mnt_userns, fs_userns); -+ if (!uid_valid(kuid)) -+ return false; -+ kgid = mapped_fsgid(mnt_userns, fs_userns); -+ if (!gid_valid(kgid)) -+ return false; -+ return kuid_has_mapping(fs_userns, kuid) && -+ kgid_has_mapping(fs_userns, kgid); - } - - extern struct timespec64 current_time(struct inode *inode); -@@ -2364,13 +2288,14 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, - * don't have to write inode on fdatasync() when only - * e.g. the timestamps have changed. - * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean. -- * I_DIRTY_TIME The inode itself only has dirty timestamps, and the -+ * I_DIRTY_TIME The inode itself has dirty timestamps, and the - * lazytime mount option is enabled. We keep track of this - * separately from I_DIRTY_SYNC in order to implement - * lazytime. This gets cleared if I_DIRTY_INODE -- * (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. I.e. -- * either I_DIRTY_TIME *or* I_DIRTY_INODE can be set in -- * i_state, but not both. I_DIRTY_PAGES may still be set. -+ * (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. But -+ * I_DIRTY_TIME can still be set if I_DIRTY_SYNC is already -+ * in place because writeback might already be in progress -+ * and we don't want to lose the time update - * I_NEW Serves as both a mutex and completion notification. - * New inodes set I_NEW. If two processes both create - * the same inode, one of them will release its inode and -@@ -2498,6 +2423,8 @@ enum file_time_flags { - - extern bool atime_needs_update(const struct path *, struct inode *); - extern void touch_atime(const struct path *); -+int inode_update_time(struct inode *inode, struct timespec64 *time, int flags); ++/* ++ * Note that this task has used io_uring. We use it for cancelation purposes. ++ */ ++static inline int io_uring_add_tctx_node(struct io_ring_ctx *ctx) ++{ ++ struct io_uring_task *tctx = current->io_uring; + - static inline void file_accessed(struct file *file) - { - if (!(file->f_flags & O_NOATIME)) -@@ -2724,6 +2651,21 @@ static inline struct user_namespace *file_mnt_user_ns(struct file *file) - { - return mnt_user_ns(file->f_path.mnt); - } ++ if (likely(tctx && tctx->last == ctx)) ++ return 0; ++ return __io_uring_add_tctx_node(ctx); ++} + -+/** -+ * is_idmapped_mnt - check whether a mount is mapped -+ * @mnt: the mount to check -+ * -+ * If @mnt has an idmapping attached different from the -+ * filesystem's idmapping then @mnt is mapped. -+ * -+ * Return: true if mount is mapped, false if not. ++/* ++ * Remove this io_uring_file -> task mapping. + */ -+static inline bool is_idmapped_mnt(const struct vfsmount *mnt) ++static void io_uring_del_tctx_node(unsigned long index) +{ -+ return mnt_user_ns(mnt) != mnt->mnt_sb->s_user_ns; ++ struct io_uring_task *tctx = current->io_uring; ++ struct io_tctx_node *node; ++ ++ if (!tctx) ++ return; ++ node = xa_erase(&tctx->xa, index); ++ if (!node) ++ return; ++ ++ WARN_ON_ONCE(current != node->task); ++ WARN_ON_ONCE(list_empty(&node->ctx_node)); ++ ++ mutex_lock(&node->ctx->uring_lock); ++ list_del(&node->ctx_node); ++ mutex_unlock(&node->ctx->uring_lock); ++ ++ if (tctx->last == node->ctx) ++ tctx->last = NULL; ++ kfree(node); ++} ++ ++static void io_uring_clean_tctx(struct io_uring_task *tctx) ++{ ++ struct io_wq *wq = tctx->io_wq; ++ struct io_tctx_node *node; ++ unsigned long index; ++ ++ xa_for_each(&tctx->xa, index, node) { ++ io_uring_del_tctx_node(index); ++ cond_resched(); ++ } ++ if (wq) { ++ /* ++ * Must be after io_uring_del_task_file() (removes nodes under ++ * uring_lock) to avoid race with io_uring_try_cancel_iowq(). ++ */ ++ io_wq_put_and_exit(wq); ++ tctx->io_wq = NULL; ++ } ++} ++ ++static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked) ++{ ++ if (tracked) ++ return atomic_read(&tctx->inflight_tracked); ++ return percpu_counter_sum(&tctx->inflight); +} + - extern long vfs_truncate(const struct path *, loff_t); - int do_truncate(struct user_namespace *, struct dentry *, loff_t start, - unsigned int time_attrs, struct file *filp); -diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h -index 6b54982fc5f37..13fa6f3df8e46 100644 ---- a/include/linux/fs_context.h -+++ b/include/linux/fs_context.h -@@ -142,6 +142,8 @@ extern void put_fs_context(struct fs_context *fc); - extern int vfs_parse_fs_param_source(struct fs_context *fc, - struct fs_parameter *param); - extern void fc_drop_locked(struct fs_context *fc); -+int reconfigure_single(struct super_block *s, -+ int flags, void *data); - - /* - * sget() wrappers to be called from the ->get_tree() op. -diff --git a/include/linux/fscache.h b/include/linux/fscache.h -index a4dab59986137..3b2282c157f79 100644 ---- a/include/linux/fscache.h -+++ b/include/linux/fscache.h -@@ -167,7 +167,7 @@ struct fscache_cookie { - - static inline bool fscache_cookie_enabled(struct fscache_cookie *cookie) - { -- return test_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags); -+ return fscache_cookie_valid(cookie) && test_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags); - } - - /* -diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h -index 12d3a7d308ab9..a9477c14fad5c 100644 ---- a/include/linux/fsnotify.h -+++ b/include/linux/fsnotify.h -@@ -212,6 +212,42 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, - fsnotify_name(dir, FS_CREATE, inode, &new_dentry->d_name, 0); - } - +/* -+ * fsnotify_delete - @dentry was unlinked and unhashed -+ * -+ * Caller must make sure that dentry->d_name is stable. -+ * -+ * Note: unlike fsnotify_unlink(), we have to pass also the unlinked inode -+ * as this may be called after d_delete() and old_dentry may be negative. ++ * Find any io_uring ctx that this task has registered or done IO on, and cancel ++ * requests. @sqd should be not-null IFF it's an SQPOLL thread cancellation. + */ -+static inline void fsnotify_delete(struct inode *dir, struct inode *inode, -+ struct dentry *dentry) ++static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd) +{ -+ __u32 mask = FS_DELETE; ++ struct io_uring_task *tctx = current->io_uring; ++ struct io_ring_ctx *ctx; ++ s64 inflight; ++ DEFINE_WAIT(wait); + -+ if (S_ISDIR(inode->i_mode)) -+ mask |= FS_ISDIR; ++ WARN_ON_ONCE(sqd && sqd->thread != current); + -+ fsnotify_name(dir, mask, inode, &dentry->d_name, 0); ++ if (!current->io_uring) ++ return; ++ if (tctx->io_wq) ++ io_wq_exit_start(tctx->io_wq); ++ ++ atomic_inc(&tctx->in_idle); ++ do { ++ io_uring_drop_tctx_refs(current); ++ /* read completions before cancelations */ ++ inflight = tctx_inflight(tctx, !cancel_all); ++ if (!inflight) ++ break; ++ ++ if (!sqd) { ++ struct io_tctx_node *node; ++ unsigned long index; ++ ++ xa_for_each(&tctx->xa, index, node) { ++ /* sqpoll task will cancel all its requests */ ++ if (node->ctx->sq_data) ++ continue; ++ io_uring_try_cancel_requests(node->ctx, current, ++ cancel_all); ++ } ++ } else { ++ list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) ++ io_uring_try_cancel_requests(ctx, current, ++ cancel_all); ++ } ++ ++ prepare_to_wait(&tctx->wait, &wait, TASK_INTERRUPTIBLE); ++ io_run_task_work(); ++ io_uring_drop_tctx_refs(current); ++ ++ /* ++ * If we've seen completions, retry without waiting. This ++ * avoids a race where a completion comes in before we did ++ * prepare_to_wait(). ++ */ ++ if (inflight == tctx_inflight(tctx, !cancel_all)) ++ schedule(); ++ finish_wait(&tctx->wait, &wait); ++ } while (1); ++ ++ io_uring_clean_tctx(tctx); ++ if (cancel_all) { ++ /* ++ * We shouldn't run task_works after cancel, so just leave ++ * ->in_idle set for normal exit. ++ */ ++ atomic_dec(&tctx->in_idle); ++ /* for exec all current's requests should be gone, kill tctx */ ++ __io_uring_free(current); ++ } +} + -+/** -+ * d_delete_notify - delete a dentry and call fsnotify_delete() -+ * @dentry: The dentry to delete -+ * -+ * This helper is used to guaranty that the unlinked inode cannot be found -+ * by lookup of this name after fsnotify_delete() event has been delivered. -+ */ -+static inline void d_delete_notify(struct inode *dir, struct dentry *dentry) ++void __io_uring_cancel(bool cancel_all) +{ -+ struct inode *inode = d_inode(dentry); ++ io_uring_cancel_generic(cancel_all, NULL); ++} + -+ ihold(inode); -+ d_delete(dentry); -+ fsnotify_delete(dir, inode, dentry); -+ iput(inode); ++static void *io_uring_validate_mmap_request(struct file *file, ++ loff_t pgoff, size_t sz) ++{ ++ struct io_ring_ctx *ctx = file->private_data; ++ loff_t offset = pgoff << PAGE_SHIFT; ++ struct page *page; ++ void *ptr; ++ ++ switch (offset) { ++ case IORING_OFF_SQ_RING: ++ case IORING_OFF_CQ_RING: ++ ptr = ctx->rings; ++ break; ++ case IORING_OFF_SQES: ++ ptr = ctx->sq_sqes; ++ break; ++ default: ++ return ERR_PTR(-EINVAL); ++ } ++ ++ page = virt_to_head_page(ptr); ++ if (sz > page_size(page)) ++ return ERR_PTR(-EINVAL); ++ ++ return ptr; +} + - /* - * fsnotify_unlink - 'name' was unlinked - * -@@ -219,10 +255,10 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, - */ - static inline void fsnotify_unlink(struct inode *dir, struct dentry *dentry) - { -- /* Expected to be called before d_delete() */ -- WARN_ON_ONCE(d_is_negative(dentry)); -+ if (WARN_ON_ONCE(d_is_negative(dentry))) -+ return; - -- fsnotify_dirent(dir, dentry, FS_DELETE); -+ fsnotify_delete(dir, d_inode(dentry), dentry); - } - - /* -@@ -242,10 +278,10 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) - */ - static inline void fsnotify_rmdir(struct inode *dir, struct dentry *dentry) - { -- /* Expected to be called before d_delete() */ -- WARN_ON_ONCE(d_is_negative(dentry)); -+ if (WARN_ON_ONCE(d_is_negative(dentry))) -+ return; - -- fsnotify_dirent(dir, dentry, FS_DELETE | FS_ISDIR); -+ fsnotify_delete(dir, d_inode(dentry), dentry); - } - - /* -diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h -index 9f4ad719bfe3f..2d68606fb725d 100644 ---- a/include/linux/fwnode.h -+++ b/include/linux/fwnode.h -@@ -147,12 +147,12 @@ struct fwnode_operations { - int (*add_links)(struct fwnode_handle *fwnode); - }; - --#define fwnode_has_op(fwnode, op) \ -- ((fwnode) && (fwnode)->ops && (fwnode)->ops->op) -+#define fwnode_has_op(fwnode, op) \ -+ (!IS_ERR_OR_NULL(fwnode) && (fwnode)->ops && (fwnode)->ops->op) ++#ifdef CONFIG_MMU + - #define fwnode_call_int_op(fwnode, op, ...) \ -- (fwnode ? (fwnode_has_op(fwnode, op) ? \ -- (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : -ENXIO) : \ -- -EINVAL) -+ (fwnode_has_op(fwnode, op) ? \ -+ (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : (IS_ERR_OR_NULL(fwnode) ? -EINVAL : -ENXIO)) - - #define fwnode_call_bool_op(fwnode, op, ...) \ - (fwnode_has_op(fwnode, op) ? \ -diff --git a/include/linux/genhd.h b/include/linux/genhd.h -index 0f5315c2b5a34..0b48a0cf42624 100644 ---- a/include/linux/genhd.h -+++ b/include/linux/genhd.h -@@ -12,12 +12,10 @@ - - #include <linux/types.h> - #include <linux/kdev_t.h> --#include <linux/rcupdate.h> --#include <linux/slab.h> --#include <linux/percpu-refcount.h> - #include <linux/uuid.h> - #include <linux/blk_types.h> --#include <asm/local.h> -+#include <linux/device.h> -+#include <linux/xarray.h> - - extern const struct device_type disk_type; - extern struct device_type part_type; -@@ -26,14 +24,6 @@ extern struct class block_class; - #define DISK_MAX_PARTS 256 - #define DISK_NAME_LEN 32 - --#include <linux/major.h> --#include <linux/device.h> --#include <linux/smp.h> --#include <linux/string.h> --#include <linux/fs.h> --#include <linux/workqueue.h> --#include <linux/xarray.h> -- - #define PARTITION_META_INFO_VOLNAMELTH 64 - /* - * Enough for the string representation of any kind of UUID plus NULL. -diff --git a/include/linux/goldfish.h b/include/linux/goldfish.h -index 12be1601fd845..bcc17f95b9066 100644 ---- a/include/linux/goldfish.h -+++ b/include/linux/goldfish.h -@@ -8,14 +8,21 @@ - - /* Helpers for Goldfish virtual platform */ - -+#ifndef gf_ioread32 -+#define gf_ioread32 ioread32 ++static int io_uring_mmap(struct file *file, struct vm_area_struct *vma) ++{ ++ size_t sz = vma->vm_end - vma->vm_start; ++ unsigned long pfn; ++ void *ptr; ++ ++ ptr = io_uring_validate_mmap_request(file, vma->vm_pgoff, sz); ++ if (IS_ERR(ptr)) ++ return PTR_ERR(ptr); ++ ++ pfn = virt_to_phys(ptr) >> PAGE_SHIFT; ++ return remap_pfn_range(vma, vma->vm_start, pfn, sz, vma->vm_page_prot); ++} ++ ++#else /* !CONFIG_MMU */ ++ ++static int io_uring_mmap(struct file *file, struct vm_area_struct *vma) ++{ ++ return vma->vm_flags & (VM_SHARED | VM_MAYSHARE) ? 0 : -EINVAL; ++} ++ ++static unsigned int io_uring_nommu_mmap_capabilities(struct file *file) ++{ ++ return NOMMU_MAP_DIRECT | NOMMU_MAP_READ | NOMMU_MAP_WRITE; ++} ++ ++static unsigned long io_uring_nommu_get_unmapped_area(struct file *file, ++ unsigned long addr, unsigned long len, ++ unsigned long pgoff, unsigned long flags) ++{ ++ void *ptr; ++ ++ ptr = io_uring_validate_mmap_request(file, pgoff, len); ++ if (IS_ERR(ptr)) ++ return PTR_ERR(ptr); ++ ++ return (unsigned long) ptr; ++} ++ ++#endif /* !CONFIG_MMU */ ++ ++static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx) ++{ ++ DEFINE_WAIT(wait); ++ ++ do { ++ if (!io_sqring_full(ctx)) ++ break; ++ prepare_to_wait(&ctx->sqo_sq_wait, &wait, TASK_INTERRUPTIBLE); ++ ++ if (!io_sqring_full(ctx)) ++ break; ++ schedule(); ++ } while (!signal_pending(current)); ++ ++ finish_wait(&ctx->sqo_sq_wait, &wait); ++ return 0; ++} ++ ++static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz, ++ struct __kernel_timespec __user **ts, ++ const sigset_t __user **sig) ++{ ++ struct io_uring_getevents_arg arg; ++ ++ /* ++ * If EXT_ARG isn't set, then we have no timespec and the argp pointer ++ * is just a pointer to the sigset_t. ++ */ ++ if (!(flags & IORING_ENTER_EXT_ARG)) { ++ *sig = (const sigset_t __user *) argp; ++ *ts = NULL; ++ return 0; ++ } ++ ++ /* ++ * EXT_ARG is set - ensure we agree on the size of it and copy in our ++ * timespec and sigset_t pointers if good. ++ */ ++ if (*argsz != sizeof(arg)) ++ return -EINVAL; ++ if (copy_from_user(&arg, argp, sizeof(arg))) ++ return -EFAULT; ++ if (arg.pad) ++ return -EINVAL; ++ *sig = u64_to_user_ptr(arg.sigmask); ++ *argsz = arg.sigmask_sz; ++ *ts = u64_to_user_ptr(arg.ts); ++ return 0; ++} ++ ++SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, ++ u32, min_complete, u32, flags, const void __user *, argp, ++ size_t, argsz) ++{ ++ struct io_ring_ctx *ctx; ++ int submitted = 0; ++ struct fd f; ++ long ret; ++ ++ io_run_task_work(); ++ ++ if (unlikely(flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP | ++ IORING_ENTER_SQ_WAIT | IORING_ENTER_EXT_ARG))) ++ return -EINVAL; ++ ++ f = fdget(fd); ++ if (unlikely(!f.file)) ++ return -EBADF; ++ ++ ret = -EOPNOTSUPP; ++ if (unlikely(f.file->f_op != &io_uring_fops)) ++ goto out_fput; ++ ++ ret = -ENXIO; ++ ctx = f.file->private_data; ++ if (unlikely(!percpu_ref_tryget(&ctx->refs))) ++ goto out_fput; ++ ++ ret = -EBADFD; ++ if (unlikely(ctx->flags & IORING_SETUP_R_DISABLED)) ++ goto out; ++ ++ /* ++ * For SQ polling, the thread will do all submissions and completions. ++ * Just return the requested submit count, and wake the thread if ++ * we were asked to. ++ */ ++ ret = 0; ++ if (ctx->flags & IORING_SETUP_SQPOLL) { ++ io_cqring_overflow_flush(ctx); ++ ++ if (unlikely(ctx->sq_data->thread == NULL)) { ++ ret = -EOWNERDEAD; ++ goto out; ++ } ++ if (flags & IORING_ENTER_SQ_WAKEUP) ++ wake_up(&ctx->sq_data->wait); ++ if (flags & IORING_ENTER_SQ_WAIT) { ++ ret = io_sqpoll_wait_sq(ctx); ++ if (ret) ++ goto out; ++ } ++ submitted = to_submit; ++ } else if (to_submit) { ++ ret = io_uring_add_tctx_node(ctx); ++ if (unlikely(ret)) ++ goto out; ++ mutex_lock(&ctx->uring_lock); ++ submitted = io_submit_sqes(ctx, to_submit); ++ mutex_unlock(&ctx->uring_lock); ++ ++ if (submitted != to_submit) ++ goto out; ++ } ++ if (flags & IORING_ENTER_GETEVENTS) { ++ const sigset_t __user *sig; ++ struct __kernel_timespec __user *ts; ++ ++ ret = io_get_ext_arg(flags, argp, &argsz, &ts, &sig); ++ if (unlikely(ret)) ++ goto out; ++ ++ min_complete = min(min_complete, ctx->cq_entries); ++ ++ /* ++ * When SETUP_IOPOLL and SETUP_SQPOLL are both enabled, user ++ * space applications don't need to do io completion events ++ * polling again, they can rely on io_sq_thread to do polling ++ * work, which can reduce cpu usage and uring_lock contention. ++ */ ++ if (ctx->flags & IORING_SETUP_IOPOLL && ++ !(ctx->flags & IORING_SETUP_SQPOLL)) { ++ ret = io_iopoll_check(ctx, min_complete); ++ } else { ++ ret = io_cqring_wait(ctx, min_complete, sig, argsz, ts); ++ } ++ } ++ ++out: ++ percpu_ref_put(&ctx->refs); ++out_fput: ++ fdput(f); ++ return submitted ? submitted : ret; ++} ++ ++#ifdef CONFIG_PROC_FS ++static int io_uring_show_cred(struct seq_file *m, unsigned int id, ++ const struct cred *cred) ++{ ++ struct user_namespace *uns = seq_user_ns(m); ++ struct group_info *gi; ++ kernel_cap_t cap; ++ unsigned __capi; ++ int g; ++ ++ seq_printf(m, "%5d\n", id); ++ seq_put_decimal_ull(m, "\tUid:\t", from_kuid_munged(uns, cred->uid)); ++ seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->euid)); ++ seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->suid)); ++ seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->fsuid)); ++ seq_put_decimal_ull(m, "\n\tGid:\t", from_kgid_munged(uns, cred->gid)); ++ seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->egid)); ++ seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->sgid)); ++ seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->fsgid)); ++ seq_puts(m, "\n\tGroups:\t"); ++ gi = cred->group_info; ++ for (g = 0; g < gi->ngroups; g++) { ++ seq_put_decimal_ull(m, g ? " " : "", ++ from_kgid_munged(uns, gi->gid[g])); ++ } ++ seq_puts(m, "\n\tCapEff:\t"); ++ cap = cred->cap_effective; ++ CAP_FOR_EACH_U32(__capi) ++ seq_put_hex_ll(m, NULL, cap.cap[CAP_LAST_U32 - __capi], 8); ++ seq_putc(m, '\n'); ++ return 0; ++} ++ ++static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) ++{ ++ struct io_sq_data *sq = NULL; ++ bool has_lock; ++ int i; ++ ++ /* ++ * Avoid ABBA deadlock between the seq lock and the io_uring mutex, ++ * since fdinfo case grabs it in the opposite direction of normal use ++ * cases. If we fail to get the lock, we just don't iterate any ++ * structures that could be going away outside the io_uring mutex. ++ */ ++ has_lock = mutex_trylock(&ctx->uring_lock); ++ ++ if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) { ++ sq = ctx->sq_data; ++ if (!sq->thread) ++ sq = NULL; ++ } ++ ++ seq_printf(m, "SqThread:\t%d\n", sq ? task_pid_nr(sq->thread) : -1); ++ seq_printf(m, "SqThreadCpu:\t%d\n", sq ? task_cpu(sq->thread) : -1); ++ seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files); ++ for (i = 0; has_lock && i < ctx->nr_user_files; i++) { ++ struct file *f = io_file_from_index(ctx, i); ++ ++ if (f) ++ seq_printf(m, "%5u: %s\n", i, file_dentry(f)->d_iname); ++ else ++ seq_printf(m, "%5u: <none>\n", i); ++ } ++ seq_printf(m, "UserBufs:\t%u\n", ctx->nr_user_bufs); ++ for (i = 0; has_lock && i < ctx->nr_user_bufs; i++) { ++ struct io_mapped_ubuf *buf = ctx->user_bufs[i]; ++ unsigned int len = buf->ubuf_end - buf->ubuf; ++ ++ seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf, len); ++ } ++ if (has_lock && !xa_empty(&ctx->personalities)) { ++ unsigned long index; ++ const struct cred *cred; ++ ++ seq_printf(m, "Personalities:\n"); ++ xa_for_each(&ctx->personalities, index, cred) ++ io_uring_show_cred(m, index, cred); ++ } ++ seq_printf(m, "PollList:\n"); ++ spin_lock(&ctx->completion_lock); ++ for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { ++ struct hlist_head *list = &ctx->cancel_hash[i]; ++ struct io_kiocb *req; ++ ++ hlist_for_each_entry(req, list, hash_node) ++ seq_printf(m, " op=%d, task_works=%d\n", req->opcode, ++ req->task->task_works != NULL); ++ } ++ spin_unlock(&ctx->completion_lock); ++ if (has_lock) ++ mutex_unlock(&ctx->uring_lock); ++} ++ ++static void io_uring_show_fdinfo(struct seq_file *m, struct file *f) ++{ ++ struct io_ring_ctx *ctx = f->private_data; ++ ++ if (percpu_ref_tryget(&ctx->refs)) { ++ __io_uring_show_fdinfo(ctx, m); ++ percpu_ref_put(&ctx->refs); ++ } ++} +#endif -+#ifndef gf_iowrite32 -+#define gf_iowrite32 iowrite32 ++ ++static const struct file_operations io_uring_fops = { ++ .release = io_uring_release, ++ .mmap = io_uring_mmap, ++#ifndef CONFIG_MMU ++ .get_unmapped_area = io_uring_nommu_get_unmapped_area, ++ .mmap_capabilities = io_uring_nommu_mmap_capabilities, ++#endif ++ .poll = io_uring_poll, ++#ifdef CONFIG_PROC_FS ++ .show_fdinfo = io_uring_show_fdinfo, +#endif ++}; + - static inline void gf_write_ptr(const void *ptr, void __iomem *portl, - void __iomem *porth) - { - const unsigned long addr = (unsigned long)ptr; - -- __raw_writel(lower_32_bits(addr), portl); -+ gf_iowrite32(lower_32_bits(addr), portl); - #ifdef CONFIG_64BIT -- __raw_writel(upper_32_bits(addr), porth); -+ gf_iowrite32(upper_32_bits(addr), porth); - #endif - } - -@@ -23,9 +30,9 @@ static inline void gf_write_dma_addr(const dma_addr_t addr, - void __iomem *portl, - void __iomem *porth) - { -- __raw_writel(lower_32_bits(addr), portl); -+ gf_iowrite32(lower_32_bits(addr), portl); - #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT -- __raw_writel(upper_32_bits(addr), porth); -+ gf_iowrite32(upper_32_bits(addr), porth); - #endif - } - -diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h -index a0f9901dcae60..ad479db8f0aac 100644 ---- a/include/linux/gpio/driver.h -+++ b/include/linux/gpio/driver.h -@@ -224,6 +224,15 @@ struct gpio_irq_chip { - unsigned long *valid_mask, - unsigned int ngpios); - -+ /** -+ * @initialized: -+ * -+ * Flag to track GPIO chip irq member's initialization. -+ * This flag will make sure GPIO chip irq members are not used -+ * before they are initialized. ++static int io_allocate_scq_urings(struct io_ring_ctx *ctx, ++ struct io_uring_params *p) ++{ ++ struct io_rings *rings; ++ size_t size, sq_array_offset; ++ ++ /* make sure these are sane, as we already accounted them */ ++ ctx->sq_entries = p->sq_entries; ++ ctx->cq_entries = p->cq_entries; ++ ++ size = rings_size(p->sq_entries, p->cq_entries, &sq_array_offset); ++ if (size == SIZE_MAX) ++ return -EOVERFLOW; ++ ++ rings = io_mem_alloc(size); ++ if (!rings) ++ return -ENOMEM; ++ ++ ctx->rings = rings; ++ ctx->sq_array = (u32 *)((char *)rings + sq_array_offset); ++ rings->sq_ring_mask = p->sq_entries - 1; ++ rings->cq_ring_mask = p->cq_entries - 1; ++ rings->sq_ring_entries = p->sq_entries; ++ rings->cq_ring_entries = p->cq_entries; ++ ++ size = array_size(sizeof(struct io_uring_sqe), p->sq_entries); ++ if (size == SIZE_MAX) { ++ io_mem_free(ctx->rings); ++ ctx->rings = NULL; ++ return -EOVERFLOW; ++ } ++ ++ ctx->sq_sqes = io_mem_alloc(size); ++ if (!ctx->sq_sqes) { ++ io_mem_free(ctx->rings); ++ ctx->rings = NULL; ++ return -ENOMEM; ++ } ++ ++ return 0; ++} ++ ++static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file) ++{ ++ int ret, fd; ++ ++ fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC); ++ if (fd < 0) ++ return fd; ++ ++ ret = io_uring_add_tctx_node(ctx); ++ if (ret) { ++ put_unused_fd(fd); ++ return ret; ++ } ++ fd_install(fd, file); ++ return fd; ++} ++ ++/* ++ * Allocate an anonymous fd, this is what constitutes the application ++ * visible backing of an io_uring instance. The application mmaps this ++ * fd to gain access to the SQ/CQ ring details. If UNIX sockets are enabled, ++ * we have to tie this fd to a socket for file garbage collection purposes. ++ */ ++static struct file *io_uring_get_file(struct io_ring_ctx *ctx) ++{ ++ struct file *file; ++#if defined(CONFIG_UNIX) ++ int ret; ++ ++ ret = sock_create_kern(&init_net, PF_UNIX, SOCK_RAW, IPPROTO_IP, ++ &ctx->ring_sock); ++ if (ret) ++ return ERR_PTR(ret); ++#endif ++ ++ file = anon_inode_getfile("[io_uring]", &io_uring_fops, ctx, ++ O_RDWR | O_CLOEXEC); ++#if defined(CONFIG_UNIX) ++ if (IS_ERR(file)) { ++ sock_release(ctx->ring_sock); ++ ctx->ring_sock = NULL; ++ } else { ++ ctx->ring_sock->file = file; ++ } ++#endif ++ return file; ++} ++ ++static int io_uring_create(unsigned entries, struct io_uring_params *p, ++ struct io_uring_params __user *params) ++{ ++ struct io_ring_ctx *ctx; ++ struct file *file; ++ int ret; ++ ++ if (!entries) ++ return -EINVAL; ++ if (entries > IORING_MAX_ENTRIES) { ++ if (!(p->flags & IORING_SETUP_CLAMP)) ++ return -EINVAL; ++ entries = IORING_MAX_ENTRIES; ++ } ++ ++ /* ++ * Use twice as many entries for the CQ ring. It's possible for the ++ * application to drive a higher depth than the size of the SQ ring, ++ * since the sqes are only used at submission time. This allows for ++ * some flexibility in overcommitting a bit. If the application has ++ * set IORING_SETUP_CQSIZE, it will have passed in the desired number ++ * of CQ ring entries manually. + */ -+ bool initialized; ++ p->sq_entries = roundup_pow_of_two(entries); ++ if (p->flags & IORING_SETUP_CQSIZE) { ++ /* ++ * If IORING_SETUP_CQSIZE is set, we do the same roundup ++ * to a power-of-two, if it isn't already. We do NOT impose ++ * any cq vs sq ring sizing. ++ */ ++ if (!p->cq_entries) ++ return -EINVAL; ++ if (p->cq_entries > IORING_MAX_CQ_ENTRIES) { ++ if (!(p->flags & IORING_SETUP_CLAMP)) ++ return -EINVAL; ++ p->cq_entries = IORING_MAX_CQ_ENTRIES; ++ } ++ p->cq_entries = roundup_pow_of_two(p->cq_entries); ++ if (p->cq_entries < p->sq_entries) ++ return -EINVAL; ++ } else { ++ p->cq_entries = 2 * p->sq_entries; ++ } + - /** - * @valid_mask: - * -@@ -472,6 +481,18 @@ struct gpio_chip { - */ - int (*of_xlate)(struct gpio_chip *gc, - const struct of_phandle_args *gpiospec, u32 *flags); ++ ctx = io_ring_ctx_alloc(p); ++ if (!ctx) ++ return -ENOMEM; ++ ctx->compat = in_compat_syscall(); ++ if (!capable(CAP_IPC_LOCK)) ++ ctx->user = get_uid(current_user()); + -+ /** -+ * @of_gpio_ranges_fallback: -+ * -+ * Optional hook for the case that no gpio-ranges property is defined -+ * within the device tree node "np" (usually DT before introduction -+ * of gpio-ranges). So this callback is helpful to provide the -+ * necessary backward compatibility for the pin ranges. ++ /* ++ * This is just grabbed for accounting purposes. When a process exits, ++ * the mm is exited and dropped before the files, hence we need to hang ++ * on to this mm purely for the purposes of being able to unaccount ++ * memory (locked/pinned vm). It's not used for anything else. + */ -+ int (*of_gpio_ranges_fallback)(struct gpio_chip *gc, -+ struct device_node *np); ++ mmgrab(current->mm); ++ ctx->mm_account = current->mm; + - #endif /* CONFIG_OF_GPIO */ - }; - -diff --git a/include/linux/hid.h b/include/linux/hid.h -index 9e067f937dbc2..26742ca14609a 100644 ---- a/include/linux/hid.h -+++ b/include/linux/hid.h -@@ -349,6 +349,8 @@ struct hid_item { - /* BIT(9) reserved for backward compatibility, was NO_INIT_INPUT_REPORTS */ - #define HID_QUIRK_ALWAYS_POLL BIT(10) - #define HID_QUIRK_INPUT_PER_APP BIT(11) -+#define HID_QUIRK_X_INVERT BIT(12) -+#define HID_QUIRK_Y_INVERT BIT(13) - #define HID_QUIRK_SKIP_OUTPUT_REPORTS BIT(16) - #define HID_QUIRK_SKIP_OUTPUT_REPORT_ID BIT(17) - #define HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP BIT(18) -@@ -840,6 +842,11 @@ static inline bool hid_is_using_ll_driver(struct hid_device *hdev, - return hdev->ll_driver == driver; - } - -+static inline bool hid_is_usb(struct hid_device *hdev) ++ ret = io_allocate_scq_urings(ctx, p); ++ if (ret) ++ goto err; ++ ++ ret = io_sq_offload_create(ctx, p); ++ if (ret) ++ goto err; ++ /* always set a rsrc node */ ++ ret = io_rsrc_node_switch_start(ctx); ++ if (ret) ++ goto err; ++ io_rsrc_node_switch(ctx, NULL); ++ ++ memset(&p->sq_off, 0, sizeof(p->sq_off)); ++ p->sq_off.head = offsetof(struct io_rings, sq.head); ++ p->sq_off.tail = offsetof(struct io_rings, sq.tail); ++ p->sq_off.ring_mask = offsetof(struct io_rings, sq_ring_mask); ++ p->sq_off.ring_entries = offsetof(struct io_rings, sq_ring_entries); ++ p->sq_off.flags = offsetof(struct io_rings, sq_flags); ++ p->sq_off.dropped = offsetof(struct io_rings, sq_dropped); ++ p->sq_off.array = (char *)ctx->sq_array - (char *)ctx->rings; ++ ++ memset(&p->cq_off, 0, sizeof(p->cq_off)); ++ p->cq_off.head = offsetof(struct io_rings, cq.head); ++ p->cq_off.tail = offsetof(struct io_rings, cq.tail); ++ p->cq_off.ring_mask = offsetof(struct io_rings, cq_ring_mask); ++ p->cq_off.ring_entries = offsetof(struct io_rings, cq_ring_entries); ++ p->cq_off.overflow = offsetof(struct io_rings, cq_overflow); ++ p->cq_off.cqes = offsetof(struct io_rings, cqes); ++ p->cq_off.flags = offsetof(struct io_rings, cq_flags); ++ ++ p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP | ++ IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS | ++ IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL | ++ IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED | ++ IORING_FEAT_EXT_ARG | IORING_FEAT_NATIVE_WORKERS | ++ IORING_FEAT_RSRC_TAGS; ++ ++ if (copy_to_user(params, p, sizeof(*p))) { ++ ret = -EFAULT; ++ goto err; ++ } ++ ++ file = io_uring_get_file(ctx); ++ if (IS_ERR(file)) { ++ ret = PTR_ERR(file); ++ goto err; ++ } ++ ++ /* ++ * Install ring fd as the very last thing, so we don't risk someone ++ * having closed it before we finish setup ++ */ ++ ret = io_uring_install_fd(ctx, file); ++ if (ret < 0) { ++ /* fput will clean it up */ ++ fput(file); ++ return ret; ++ } ++ ++ trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags); ++ return ret; ++err: ++ io_ring_ctx_wait_and_kill(ctx); ++ return ret; ++} ++ ++/* ++ * Sets up an aio uring context, and returns the fd. Applications asks for a ++ * ring size, we return the actual sq/cq ring sizes (among other things) in the ++ * params structure passed in. ++ */ ++static long io_uring_setup(u32 entries, struct io_uring_params __user *params) +{ -+ return hid_is_using_ll_driver(hdev, &usb_hid_driver); ++ struct io_uring_params p; ++ int i; ++ ++ if (copy_from_user(&p, params, sizeof(p))) ++ return -EFAULT; ++ for (i = 0; i < ARRAY_SIZE(p.resv); i++) { ++ if (p.resv[i]) ++ return -EINVAL; ++ } ++ ++ if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL | ++ IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE | ++ IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ | ++ IORING_SETUP_R_DISABLED)) ++ return -EINVAL; ++ ++ return io_uring_create(entries, &p, params); +} + - #define PM_HINT_FULLON 1<<5 - #define PM_HINT_NORMAL 1<<1 - -diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h -index 1faebe1cd0ed5..22c1d935e22dd 100644 ---- a/include/linux/hugetlb.h -+++ b/include/linux/hugetlb.h -@@ -167,6 +167,7 @@ long hugetlb_unreserve_pages(struct inode *inode, long start, long end, - long freed); - bool isolate_huge_page(struct page *page, struct list_head *list); - int get_hwpoison_huge_page(struct page *page, bool *hugetlb); -+int get_huge_page_for_hwpoison(unsigned long pfn, int flags); - void putback_active_hugepage(struct page *page); - void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason); - void free_huge_page(struct page *page); -@@ -362,6 +363,11 @@ static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb) - return 0; - } - -+static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags) ++SYSCALL_DEFINE2(io_uring_setup, u32, entries, ++ struct io_uring_params __user *, params) ++{ ++ return io_uring_setup(entries, params); ++} ++ ++static int io_probe(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args) ++{ ++ struct io_uring_probe *p; ++ size_t size; ++ int i, ret; ++ ++ size = struct_size(p, ops, nr_args); ++ if (size == SIZE_MAX) ++ return -EOVERFLOW; ++ p = kzalloc(size, GFP_KERNEL); ++ if (!p) ++ return -ENOMEM; ++ ++ ret = -EFAULT; ++ if (copy_from_user(p, arg, size)) ++ goto out; ++ ret = -EINVAL; ++ if (memchr_inv(p, 0, size)) ++ goto out; ++ ++ p->last_op = IORING_OP_LAST - 1; ++ if (nr_args > IORING_OP_LAST) ++ nr_args = IORING_OP_LAST; ++ ++ for (i = 0; i < nr_args; i++) { ++ p->ops[i].op = i; ++ if (!io_op_defs[i].not_supported) ++ p->ops[i].flags = IO_URING_OP_SUPPORTED; ++ } ++ p->ops_len = i; ++ ++ ret = 0; ++ if (copy_to_user(arg, p, size)) ++ ret = -EFAULT; ++out: ++ kfree(p); ++ return ret; ++} ++ ++static int io_register_personality(struct io_ring_ctx *ctx) ++{ ++ const struct cred *creds; ++ u32 id; ++ int ret; ++ ++ creds = get_current_cred(); ++ ++ ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds, ++ XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL); ++ if (ret < 0) { ++ put_cred(creds); ++ return ret; ++ } ++ return id; ++} ++ ++static int io_register_restrictions(struct io_ring_ctx *ctx, void __user *arg, ++ unsigned int nr_args) ++{ ++ struct io_uring_restriction *res; ++ size_t size; ++ int i, ret; ++ ++ /* Restrictions allowed only if rings started disabled */ ++ if (!(ctx->flags & IORING_SETUP_R_DISABLED)) ++ return -EBADFD; ++ ++ /* We allow only a single restrictions registration */ ++ if (ctx->restrictions.registered) ++ return -EBUSY; ++ ++ if (!arg || nr_args > IORING_MAX_RESTRICTIONS) ++ return -EINVAL; ++ ++ size = array_size(nr_args, sizeof(*res)); ++ if (size == SIZE_MAX) ++ return -EOVERFLOW; ++ ++ res = memdup_user(arg, size); ++ if (IS_ERR(res)) ++ return PTR_ERR(res); ++ ++ ret = 0; ++ ++ for (i = 0; i < nr_args; i++) { ++ switch (res[i].opcode) { ++ case IORING_RESTRICTION_REGISTER_OP: ++ if (res[i].register_op >= IORING_REGISTER_LAST) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ __set_bit(res[i].register_op, ++ ctx->restrictions.register_op); ++ break; ++ case IORING_RESTRICTION_SQE_OP: ++ if (res[i].sqe_op >= IORING_OP_LAST) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ __set_bit(res[i].sqe_op, ctx->restrictions.sqe_op); ++ break; ++ case IORING_RESTRICTION_SQE_FLAGS_ALLOWED: ++ ctx->restrictions.sqe_flags_allowed = res[i].sqe_flags; ++ break; ++ case IORING_RESTRICTION_SQE_FLAGS_REQUIRED: ++ ctx->restrictions.sqe_flags_required = res[i].sqe_flags; ++ break; ++ default: ++ ret = -EINVAL; ++ goto out; ++ } ++ } ++ ++out: ++ /* Reset all restrictions if an error happened */ ++ if (ret != 0) ++ memset(&ctx->restrictions, 0, sizeof(ctx->restrictions)); ++ else ++ ctx->restrictions.registered = true; ++ ++ kfree(res); ++ return ret; ++} ++ ++static int io_register_enable_rings(struct io_ring_ctx *ctx) +{ ++ if (!(ctx->flags & IORING_SETUP_R_DISABLED)) ++ return -EBADFD; ++ ++ if (ctx->restrictions.registered) ++ ctx->restricted = 1; ++ ++ ctx->flags &= ~IORING_SETUP_R_DISABLED; ++ if (ctx->sq_data && wq_has_sleeper(&ctx->sq_data->wait)) ++ wake_up(&ctx->sq_data->wait); + return 0; +} + - static inline void putback_active_hugepage(struct page *page) - { - } -diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h -index 8e6dd908da216..aa1d4da03538b 100644 ---- a/include/linux/hw_random.h -+++ b/include/linux/hw_random.h -@@ -60,7 +60,5 @@ extern int devm_hwrng_register(struct device *dev, struct hwrng *rng); - /** Unregister a Hardware Random Number Generator driver. */ - extern void hwrng_unregister(struct hwrng *rng); - extern void devm_hwrng_unregister(struct device *dve, struct hwrng *rng); --/** Feed random bits into the pool. */ --extern void add_hwgenerator_randomness(const char *buffer, size_t count, size_t entropy); - - #endif /* LINUX_HWRANDOM_H_ */ -diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h -index 694264503119d..00ed7c17698d1 100644 ---- a/include/linux/ieee80211.h -+++ b/include/linux/ieee80211.h -@@ -1023,6 +1023,8 @@ struct ieee80211_tpc_report_ie { - #define IEEE80211_ADDBA_EXT_FRAG_LEVEL_MASK GENMASK(2, 1) - #define IEEE80211_ADDBA_EXT_FRAG_LEVEL_SHIFT 1 - #define IEEE80211_ADDBA_EXT_NO_FRAG BIT(0) -+#define IEEE80211_ADDBA_EXT_BUF_SIZE_MASK GENMASK(7, 5) -+#define IEEE80211_ADDBA_EXT_BUF_SIZE_SHIFT 10 - - struct ieee80211_addba_ext_ie { - u8 data; -@@ -1697,10 +1699,12 @@ struct ieee80211_ht_operation { - * A-MPDU buffer sizes - * According to HT size varies from 8 to 64 frames - * HE adds the ability to have up to 256 frames. -+ * EHT adds the ability to have up to 1K frames. - */ - #define IEEE80211_MIN_AMPDU_BUF 0x8 - #define IEEE80211_MAX_AMPDU_BUF_HT 0x40 --#define IEEE80211_MAX_AMPDU_BUF 0x100 -+#define IEEE80211_MAX_AMPDU_BUF_HE 0x100 -+#define IEEE80211_MAX_AMPDU_BUF_EHT 0x400 - - - /* Spatial Multiplexing Power Save Modes (for capability) */ -diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h -index b712217f70304..1ed52441972f9 100644 ---- a/include/linux/if_arp.h -+++ b/include/linux/if_arp.h -@@ -52,6 +52,7 @@ static inline bool dev_is_mac_header_xmit(const struct net_device *dev) - case ARPHRD_VOID: - case ARPHRD_NONE: - case ARPHRD_RAWIP: -+ case ARPHRD_PIMREG: - return false; - default: - return true; -diff --git a/include/linux/iio/common/cros_ec_sensors_core.h b/include/linux/iio/common/cros_ec_sensors_core.h -index c582e1a142320..7b5dbd7499957 100644 ---- a/include/linux/iio/common/cros_ec_sensors_core.h -+++ b/include/linux/iio/common/cros_ec_sensors_core.h -@@ -95,8 +95,11 @@ int cros_ec_sensors_read_cmd(struct iio_dev *indio_dev, unsigned long scan_mask, - struct platform_device; - int cros_ec_sensors_core_init(struct platform_device *pdev, - struct iio_dev *indio_dev, bool physical_device, -- cros_ec_sensors_capture_t trigger_capture, -- cros_ec_sensorhub_push_data_cb_t push_data); -+ cros_ec_sensors_capture_t trigger_capture); ++static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type, ++ struct io_uring_rsrc_update2 *up, ++ unsigned nr_args) ++{ ++ __u32 tmp; ++ int err; + -+int cros_ec_sensors_core_register(struct device *dev, -+ struct iio_dev *indio_dev, -+ cros_ec_sensorhub_push_data_cb_t push_data); - - irqreturn_t cros_ec_sensors_capture(int irq, void *p); - int cros_ec_sensors_push_data(struct iio_dev *indio_dev, -diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h -index 8bdbaf3f3796b..69f4a1f6b536d 100644 ---- a/include/linux/iio/common/st_sensors.h -+++ b/include/linux/iio/common/st_sensors.h -@@ -238,6 +238,7 @@ struct st_sensor_settings { - * @hw_irq_trigger: if we're using the hardware interrupt on the sensor. - * @hw_timestamp: Latest timestamp from the interrupt handler, when in use. - * @buffer_data: Data used by buffer part. -+ * @odr_lock: Local lock for preventing concurrent ODR accesses/changes - */ - struct st_sensor_data { - struct device *dev; -@@ -263,6 +264,8 @@ struct st_sensor_data { - s64 hw_timestamp; - - char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] ____cacheline_aligned; ++ if (check_add_overflow(up->offset, nr_args, &tmp)) ++ return -EOVERFLOW; ++ err = io_rsrc_node_switch_start(ctx); ++ if (err) ++ return err; + -+ struct mutex odr_lock; - }; ++ switch (type) { ++ case IORING_RSRC_FILE: ++ return __io_sqe_files_update(ctx, up, nr_args); ++ case IORING_RSRC_BUFFER: ++ return __io_sqe_buffers_update(ctx, up, nr_args); ++ } ++ return -EINVAL; ++} ++ ++static int io_register_files_update(struct io_ring_ctx *ctx, void __user *arg, ++ unsigned nr_args) ++{ ++ struct io_uring_rsrc_update2 up; ++ ++ if (!nr_args) ++ return -EINVAL; ++ memset(&up, 0, sizeof(up)); ++ if (copy_from_user(&up, arg, sizeof(struct io_uring_rsrc_update))) ++ return -EFAULT; ++ if (up.resv || up.resv2) ++ return -EINVAL; ++ return __io_register_rsrc_update(ctx, IORING_RSRC_FILE, &up, nr_args); ++} ++ ++static int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg, ++ unsigned size, unsigned type) ++{ ++ struct io_uring_rsrc_update2 up; ++ ++ if (size != sizeof(up)) ++ return -EINVAL; ++ if (copy_from_user(&up, arg, sizeof(up))) ++ return -EFAULT; ++ if (!up.nr || up.resv || up.resv2) ++ return -EINVAL; ++ return __io_register_rsrc_update(ctx, type, &up, up.nr); ++} ++ ++static int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg, ++ unsigned int size, unsigned int type) ++{ ++ struct io_uring_rsrc_register rr; ++ ++ /* keep it extendible */ ++ if (size != sizeof(rr)) ++ return -EINVAL; ++ ++ memset(&rr, 0, sizeof(rr)); ++ if (copy_from_user(&rr, arg, size)) ++ return -EFAULT; ++ if (!rr.nr || rr.resv || rr.resv2) ++ return -EINVAL; ++ ++ switch (type) { ++ case IORING_RSRC_FILE: ++ return io_sqe_files_register(ctx, u64_to_user_ptr(rr.data), ++ rr.nr, u64_to_user_ptr(rr.tags)); ++ case IORING_RSRC_BUFFER: ++ return io_sqe_buffers_register(ctx, u64_to_user_ptr(rr.data), ++ rr.nr, u64_to_user_ptr(rr.tags)); ++ } ++ return -EINVAL; ++} ++ ++static int io_register_iowq_aff(struct io_ring_ctx *ctx, void __user *arg, ++ unsigned len) ++{ ++ struct io_uring_task *tctx = current->io_uring; ++ cpumask_var_t new_mask; ++ int ret; ++ ++ if (!tctx || !tctx->io_wq) ++ return -EINVAL; ++ ++ if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) ++ return -ENOMEM; ++ ++ cpumask_clear(new_mask); ++ if (len > cpumask_size()) ++ len = cpumask_size(); ++ ++ if (in_compat_syscall()) { ++ ret = compat_get_bitmap(cpumask_bits(new_mask), ++ (const compat_ulong_t __user *)arg, ++ len * 8 /* CHAR_BIT */); ++ } else { ++ ret = copy_from_user(new_mask, arg, len); ++ } ++ ++ if (ret) { ++ free_cpumask_var(new_mask); ++ return -EFAULT; ++ } ++ ++ ret = io_wq_cpu_affinity(tctx->io_wq, new_mask); ++ free_cpumask_var(new_mask); ++ return ret; ++} ++ ++static int io_unregister_iowq_aff(struct io_ring_ctx *ctx) ++{ ++ struct io_uring_task *tctx = current->io_uring; ++ ++ if (!tctx || !tctx->io_wq) ++ return -EINVAL; ++ ++ return io_wq_cpu_affinity(tctx->io_wq, NULL); ++} ++ ++static int io_register_iowq_max_workers(struct io_ring_ctx *ctx, ++ void __user *arg) ++ __must_hold(&ctx->uring_lock) ++{ ++ struct io_tctx_node *node; ++ struct io_uring_task *tctx = NULL; ++ struct io_sq_data *sqd = NULL; ++ __u32 new_count[2]; ++ int i, ret; ++ ++ if (copy_from_user(new_count, arg, sizeof(new_count))) ++ return -EFAULT; ++ for (i = 0; i < ARRAY_SIZE(new_count); i++) ++ if (new_count[i] > INT_MAX) ++ return -EINVAL; ++ ++ if (ctx->flags & IORING_SETUP_SQPOLL) { ++ sqd = ctx->sq_data; ++ if (sqd) { ++ /* ++ * Observe the correct sqd->lock -> ctx->uring_lock ++ * ordering. Fine to drop uring_lock here, we hold ++ * a ref to the ctx. ++ */ ++ refcount_inc(&sqd->refs); ++ mutex_unlock(&ctx->uring_lock); ++ mutex_lock(&sqd->lock); ++ mutex_lock(&ctx->uring_lock); ++ if (sqd->thread) ++ tctx = sqd->thread->io_uring; ++ } ++ } else { ++ tctx = current->io_uring; ++ } ++ ++ BUILD_BUG_ON(sizeof(new_count) != sizeof(ctx->iowq_limits)); ++ ++ for (i = 0; i < ARRAY_SIZE(new_count); i++) ++ if (new_count[i]) ++ ctx->iowq_limits[i] = new_count[i]; ++ ctx->iowq_limits_set = true; ++ ++ ret = -EINVAL; ++ if (tctx && tctx->io_wq) { ++ ret = io_wq_max_workers(tctx->io_wq, new_count); ++ if (ret) ++ goto err; ++ } else { ++ memset(new_count, 0, sizeof(new_count)); ++ } ++ ++ if (sqd) { ++ mutex_unlock(&sqd->lock); ++ io_put_sq_data(sqd); ++ } ++ ++ if (copy_to_user(arg, new_count, sizeof(new_count))) ++ return -EFAULT; ++ ++ /* that's it for SQPOLL, only the SQPOLL task creates requests */ ++ if (sqd) ++ return 0; ++ ++ /* now propagate the restriction to all registered users */ ++ list_for_each_entry(node, &ctx->tctx_list, ctx_node) { ++ struct io_uring_task *tctx = node->task->io_uring; ++ ++ if (WARN_ON_ONCE(!tctx->io_wq)) ++ continue; ++ ++ for (i = 0; i < ARRAY_SIZE(new_count); i++) ++ new_count[i] = ctx->iowq_limits[i]; ++ /* ignore errors, it always returns zero anyway */ ++ (void)io_wq_max_workers(tctx->io_wq, new_count); ++ } ++ return 0; ++err: ++ if (sqd) { ++ mutex_unlock(&sqd->lock); ++ io_put_sq_data(sqd); ++ } ++ return ret; ++} ++ ++static bool io_register_op_must_quiesce(int op) ++{ ++ switch (op) { ++ case IORING_REGISTER_BUFFERS: ++ case IORING_UNREGISTER_BUFFERS: ++ case IORING_REGISTER_FILES: ++ case IORING_UNREGISTER_FILES: ++ case IORING_REGISTER_FILES_UPDATE: ++ case IORING_REGISTER_PROBE: ++ case IORING_REGISTER_PERSONALITY: ++ case IORING_UNREGISTER_PERSONALITY: ++ case IORING_REGISTER_FILES2: ++ case IORING_REGISTER_FILES_UPDATE2: ++ case IORING_REGISTER_BUFFERS2: ++ case IORING_REGISTER_BUFFERS_UPDATE: ++ case IORING_REGISTER_IOWQ_AFF: ++ case IORING_UNREGISTER_IOWQ_AFF: ++ case IORING_REGISTER_IOWQ_MAX_WORKERS: ++ return false; ++ default: ++ return true; ++ } ++} ++ ++static int io_ctx_quiesce(struct io_ring_ctx *ctx) ++{ ++ long ret; ++ ++ percpu_ref_kill(&ctx->refs); ++ ++ /* ++ * Drop uring mutex before waiting for references to exit. If another ++ * thread is currently inside io_uring_enter() it might need to grab the ++ * uring_lock to make progress. If we hold it here across the drain ++ * wait, then we can deadlock. It's safe to drop the mutex here, since ++ * no new references will come in after we've killed the percpu ref. ++ */ ++ mutex_unlock(&ctx->uring_lock); ++ do { ++ ret = wait_for_completion_interruptible(&ctx->ref_comp); ++ if (!ret) ++ break; ++ ret = io_run_task_work_sig(); ++ } while (ret >= 0); ++ mutex_lock(&ctx->uring_lock); ++ ++ if (ret) ++ io_refs_resurrect(&ctx->refs, &ctx->ref_comp); ++ return ret; ++} ++ ++static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, ++ void __user *arg, unsigned nr_args) ++ __releases(ctx->uring_lock) ++ __acquires(ctx->uring_lock) ++{ ++ int ret; ++ ++ /* ++ * We're inside the ring mutex, if the ref is already dying, then ++ * someone else killed the ctx or is already going through ++ * io_uring_register(). ++ */ ++ if (percpu_ref_is_dying(&ctx->refs)) ++ return -ENXIO; ++ ++ if (ctx->restricted) { ++ opcode = array_index_nospec(opcode, IORING_REGISTER_LAST); ++ if (!test_bit(opcode, ctx->restrictions.register_op)) ++ return -EACCES; ++ } ++ ++ if (io_register_op_must_quiesce(opcode)) { ++ ret = io_ctx_quiesce(ctx); ++ if (ret) ++ return ret; ++ } ++ ++ switch (opcode) { ++ case IORING_REGISTER_BUFFERS: ++ ret = io_sqe_buffers_register(ctx, arg, nr_args, NULL); ++ break; ++ case IORING_UNREGISTER_BUFFERS: ++ ret = -EINVAL; ++ if (arg || nr_args) ++ break; ++ ret = io_sqe_buffers_unregister(ctx); ++ break; ++ case IORING_REGISTER_FILES: ++ ret = io_sqe_files_register(ctx, arg, nr_args, NULL); ++ break; ++ case IORING_UNREGISTER_FILES: ++ ret = -EINVAL; ++ if (arg || nr_args) ++ break; ++ ret = io_sqe_files_unregister(ctx); ++ break; ++ case IORING_REGISTER_FILES_UPDATE: ++ ret = io_register_files_update(ctx, arg, nr_args); ++ break; ++ case IORING_REGISTER_EVENTFD: ++ case IORING_REGISTER_EVENTFD_ASYNC: ++ ret = -EINVAL; ++ if (nr_args != 1) ++ break; ++ ret = io_eventfd_register(ctx, arg); ++ if (ret) ++ break; ++ if (opcode == IORING_REGISTER_EVENTFD_ASYNC) ++ ctx->eventfd_async = 1; ++ else ++ ctx->eventfd_async = 0; ++ break; ++ case IORING_UNREGISTER_EVENTFD: ++ ret = -EINVAL; ++ if (arg || nr_args) ++ break; ++ ret = io_eventfd_unregister(ctx); ++ break; ++ case IORING_REGISTER_PROBE: ++ ret = -EINVAL; ++ if (!arg || nr_args > 256) ++ break; ++ ret = io_probe(ctx, arg, nr_args); ++ break; ++ case IORING_REGISTER_PERSONALITY: ++ ret = -EINVAL; ++ if (arg || nr_args) ++ break; ++ ret = io_register_personality(ctx); ++ break; ++ case IORING_UNREGISTER_PERSONALITY: ++ ret = -EINVAL; ++ if (arg) ++ break; ++ ret = io_unregister_personality(ctx, nr_args); ++ break; ++ case IORING_REGISTER_ENABLE_RINGS: ++ ret = -EINVAL; ++ if (arg || nr_args) ++ break; ++ ret = io_register_enable_rings(ctx); ++ break; ++ case IORING_REGISTER_RESTRICTIONS: ++ ret = io_register_restrictions(ctx, arg, nr_args); ++ break; ++ case IORING_REGISTER_FILES2: ++ ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_FILE); ++ break; ++ case IORING_REGISTER_FILES_UPDATE2: ++ ret = io_register_rsrc_update(ctx, arg, nr_args, ++ IORING_RSRC_FILE); ++ break; ++ case IORING_REGISTER_BUFFERS2: ++ ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_BUFFER); ++ break; ++ case IORING_REGISTER_BUFFERS_UPDATE: ++ ret = io_register_rsrc_update(ctx, arg, nr_args, ++ IORING_RSRC_BUFFER); ++ break; ++ case IORING_REGISTER_IOWQ_AFF: ++ ret = -EINVAL; ++ if (!arg || !nr_args) ++ break; ++ ret = io_register_iowq_aff(ctx, arg, nr_args); ++ break; ++ case IORING_UNREGISTER_IOWQ_AFF: ++ ret = -EINVAL; ++ if (arg || nr_args) ++ break; ++ ret = io_unregister_iowq_aff(ctx); ++ break; ++ case IORING_REGISTER_IOWQ_MAX_WORKERS: ++ ret = -EINVAL; ++ if (!arg || nr_args != 2) ++ break; ++ ret = io_register_iowq_max_workers(ctx, arg); ++ break; ++ default: ++ ret = -EINVAL; ++ break; ++ } ++ ++ if (io_register_op_must_quiesce(opcode)) { ++ /* bring the ctx back to life */ ++ percpu_ref_reinit(&ctx->refs); ++ reinit_completion(&ctx->ref_comp); ++ } ++ return ret; ++} ++ ++SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode, ++ void __user *, arg, unsigned int, nr_args) ++{ ++ struct io_ring_ctx *ctx; ++ long ret = -EBADF; ++ struct fd f; ++ ++ if (opcode >= IORING_REGISTER_LAST) ++ return -EINVAL; ++ ++ f = fdget(fd); ++ if (!f.file) ++ return -EBADF; ++ ++ ret = -EOPNOTSUPP; ++ if (f.file->f_op != &io_uring_fops) ++ goto out_fput; ++ ++ ctx = f.file->private_data; ++ ++ io_run_task_work(); ++ ++ mutex_lock(&ctx->uring_lock); ++ ret = __io_uring_register(ctx, opcode, arg, nr_args); ++ mutex_unlock(&ctx->uring_lock); ++ trace_io_uring_register(ctx, opcode, ctx->nr_user_files, ctx->nr_user_bufs, ++ ctx->cq_ev_fd != NULL, ret); ++out_fput: ++ fdput(f); ++ return ret; ++} ++ ++static int __init io_uring_init(void) ++{ ++#define __BUILD_BUG_VERIFY_ELEMENT(stype, eoffset, etype, ename) do { \ ++ BUILD_BUG_ON(offsetof(stype, ename) != eoffset); \ ++ BUILD_BUG_ON(sizeof(etype) != sizeof_field(stype, ename)); \ ++} while (0) ++ ++#define BUILD_BUG_SQE_ELEM(eoffset, etype, ename) \ ++ __BUILD_BUG_VERIFY_ELEMENT(struct io_uring_sqe, eoffset, etype, ename) ++ BUILD_BUG_ON(sizeof(struct io_uring_sqe) != 64); ++ BUILD_BUG_SQE_ELEM(0, __u8, opcode); ++ BUILD_BUG_SQE_ELEM(1, __u8, flags); ++ BUILD_BUG_SQE_ELEM(2, __u16, ioprio); ++ BUILD_BUG_SQE_ELEM(4, __s32, fd); ++ BUILD_BUG_SQE_ELEM(8, __u64, off); ++ BUILD_BUG_SQE_ELEM(8, __u64, addr2); ++ BUILD_BUG_SQE_ELEM(16, __u64, addr); ++ BUILD_BUG_SQE_ELEM(16, __u64, splice_off_in); ++ BUILD_BUG_SQE_ELEM(24, __u32, len); ++ BUILD_BUG_SQE_ELEM(28, __kernel_rwf_t, rw_flags); ++ BUILD_BUG_SQE_ELEM(28, /* compat */ int, rw_flags); ++ BUILD_BUG_SQE_ELEM(28, /* compat */ __u32, rw_flags); ++ BUILD_BUG_SQE_ELEM(28, __u32, fsync_flags); ++ BUILD_BUG_SQE_ELEM(28, /* compat */ __u16, poll_events); ++ BUILD_BUG_SQE_ELEM(28, __u32, poll32_events); ++ BUILD_BUG_SQE_ELEM(28, __u32, sync_range_flags); ++ BUILD_BUG_SQE_ELEM(28, __u32, msg_flags); ++ BUILD_BUG_SQE_ELEM(28, __u32, timeout_flags); ++ BUILD_BUG_SQE_ELEM(28, __u32, accept_flags); ++ BUILD_BUG_SQE_ELEM(28, __u32, cancel_flags); ++ BUILD_BUG_SQE_ELEM(28, __u32, open_flags); ++ BUILD_BUG_SQE_ELEM(28, __u32, statx_flags); ++ BUILD_BUG_SQE_ELEM(28, __u32, fadvise_advice); ++ BUILD_BUG_SQE_ELEM(28, __u32, splice_flags); ++ BUILD_BUG_SQE_ELEM(32, __u64, user_data); ++ BUILD_BUG_SQE_ELEM(40, __u16, buf_index); ++ BUILD_BUG_SQE_ELEM(40, __u16, buf_group); ++ BUILD_BUG_SQE_ELEM(42, __u16, personality); ++ BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in); ++ BUILD_BUG_SQE_ELEM(44, __u32, file_index); ++ ++ BUILD_BUG_ON(sizeof(struct io_uring_files_update) != ++ sizeof(struct io_uring_rsrc_update)); ++ BUILD_BUG_ON(sizeof(struct io_uring_rsrc_update) > ++ sizeof(struct io_uring_rsrc_update2)); ++ ++ /* ->buf_index is u16 */ ++ BUILD_BUG_ON(IORING_MAX_REG_BUFFERS >= (1u << 16)); ++ ++ /* should fit into one byte */ ++ BUILD_BUG_ON(SQE_VALID_FLAGS >= (1 << 8)); ++ ++ BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST); ++ BUILD_BUG_ON(__REQ_F_LAST_BIT > 8 * sizeof(int)); ++ ++ req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC | ++ SLAB_ACCOUNT); ++ return 0; ++}; ++__initcall(io_uring_init); +diff --git a/ipc/mqueue.c b/ipc/mqueue.c +index 5becca9be867c..089c34d0732cf 100644 +--- a/ipc/mqueue.c ++++ b/ipc/mqueue.c +@@ -45,6 +45,7 @@ - #ifdef CONFIG_IIO_BUFFER -diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h -index 096f68dd2e0ca..4c69b144677b1 100644 ---- a/include/linux/iio/trigger.h -+++ b/include/linux/iio/trigger.h -@@ -55,6 +55,7 @@ struct iio_trigger_ops { - * @attached_own_device:[INTERN] if we are using our own device as trigger, - * i.e. if we registered a poll function to the same - * device as the one providing the trigger. -+ * @reenable_work: [INTERN] work item used to ensure reenable can sleep. - **/ - struct iio_trigger { - const struct iio_trigger_ops *ops; -@@ -74,6 +75,7 @@ struct iio_trigger { - unsigned long pool[BITS_TO_LONGS(CONFIG_IIO_CONSUMERS_PER_TRIGGER)]; - struct mutex pool_lock; - bool attached_own_device; -+ struct work_struct reenable_work; + struct mqueue_fs_context { + struct ipc_namespace *ipc_ns; ++ bool newns; /* Set if newly created ipc namespace */ }; + #define MQUEUE_MAGIC 0x19800202 +@@ -427,6 +428,14 @@ static int mqueue_get_tree(struct fs_context *fc) + { + struct mqueue_fs_context *ctx = fc->fs_private; -diff --git a/include/linux/instrumentation.h b/include/linux/instrumentation.h -index fa2cd8c63dcc9..24359b4a96053 100644 ---- a/include/linux/instrumentation.h -+++ b/include/linux/instrumentation.h -@@ -11,7 +11,7 @@ - asm volatile(__stringify(c) ": nop\n\t" \ - ".pushsection .discard.instr_begin\n\t" \ - ".long " __stringify(c) "b - .\n\t" \ -- ".popsection\n\t"); \ -+ ".popsection\n\t" : : "i" (c)); \ - }) - #define instrumentation_begin() __instrumentation_begin(__COUNTER__) ++ /* ++ * With a newly created ipc namespace, we don't need to do a search ++ * for an ipc namespace match, but we still need to set s_fs_info. ++ */ ++ if (ctx->newns) { ++ fc->s_fs_info = ctx->ipc_ns; ++ return get_tree_nodev(fc, mqueue_fill_super); ++ } + return get_tree_keyed(fc, mqueue_fill_super, ctx->ipc_ns); + } -@@ -50,7 +50,7 @@ - asm volatile(__stringify(c) ": nop\n\t" \ - ".pushsection .discard.instr_end\n\t" \ - ".long " __stringify(c) "b - .\n\t" \ -- ".popsection\n\t"); \ -+ ".popsection\n\t" : : "i" (c)); \ - }) - #define instrumentation_end() __instrumentation_end(__COUNTER__) - #else -diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h -index 05a65eb155f76..81da7107e3bd0 100644 ---- a/include/linux/intel-iommu.h -+++ b/include/linux/intel-iommu.h -@@ -196,7 +196,6 @@ - #define ecap_dis(e) (((e) >> 27) & 0x1) - #define ecap_nest(e) (((e) >> 26) & 0x1) - #define ecap_mts(e) (((e) >> 25) & 0x1) --#define ecap_ecs(e) (((e) >> 24) & 0x1) - #define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16) - #define ecap_max_iotlb_offset(e) (ecap_iotlb_offset(e) + 16) - #define ecap_coherent(e) ((e) & 0x1) -@@ -264,7 +263,6 @@ - #define DMA_GSTS_CFIS (((u32)1) << 23) +@@ -454,6 +463,10 @@ static int mqueue_init_fs_context(struct fs_context *fc) + return 0; + } - /* DMA_RTADDR_REG */ --#define DMA_RTADDR_RTT (((u64)1) << 11) - #define DMA_RTADDR_SMT (((u64)1) << 10) ++/* ++ * mq_init_ns() is currently the only caller of mq_create_mount(). ++ * So the ns parameter is always a newly created ipc namespace. ++ */ + static struct vfsmount *mq_create_mount(struct ipc_namespace *ns) + { + struct mqueue_fs_context *ctx; +@@ -465,6 +478,7 @@ static struct vfsmount *mq_create_mount(struct ipc_namespace *ns) + return ERR_CAST(fc); + + ctx = fc->fs_private; ++ ctx->newns = true; + put_ipc_ns(ctx->ipc_ns); + ctx->ipc_ns = get_ipc_ns(ns); + put_user_ns(fc->user_ns); +diff --git a/ipc/sem.c b/ipc/sem.c +index 6693daf4fe112..c1f3ca244a698 100644 +--- a/ipc/sem.c ++++ b/ipc/sem.c +@@ -1964,6 +1964,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) + */ + un = lookup_undo(ulp, semid); + if (un) { ++ spin_unlock(&ulp->lock); + kvfree(new); + goto success; + } +@@ -1976,9 +1977,8 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) + ipc_assert_locked_object(&sma->sem_perm); + list_add(&new->list_id, &sma->list_id); + un = new; +- +-success: + spin_unlock(&ulp->lock); ++success: + sem_unlock(sma, -1); + out: + return un; +@@ -2182,14 +2182,15 @@ long __do_semtimedop(int semid, struct sembuf *sops, + * scenarios where we were awakened externally, during the + * window between wake_q_add() and wake_up_q(). + */ ++ rcu_read_lock(); + error = READ_ONCE(queue.status); + if (error != -EINTR) { + /* see SEM_BARRIER_2 for purpose/pairing */ + smp_acquire__after_ctrl_dep(); ++ rcu_read_unlock(); + goto out; + } - /* CCMD_REG */ -@@ -594,6 +592,7 @@ struct intel_iommu { - #ifdef CONFIG_INTEL_IOMMU - unsigned long *domain_ids; /* bitmap of domains */ - struct dmar_domain ***domains; /* ptr to domains */ -+ unsigned long *copied_tables; /* bitmap of copied tables */ - spinlock_t lock; /* protect context, domain ids */ - struct root_entry *root_entry; /* virtual address */ +- rcu_read_lock(); + locknum = sem_lock(sma, sops, nsops); -@@ -713,6 +712,11 @@ static inline int first_pte_in_page(struct dma_pte *pte) - return !((unsigned long)pte & ~VTD_PAGE_MASK); - } + if (!ipc_valid_object(&sma->sem_perm)) +diff --git a/ipc/shm.c b/ipc/shm.c +index ab749be6d8b71..048eb183b24b9 100644 +--- a/ipc/shm.c ++++ b/ipc/shm.c +@@ -62,9 +62,18 @@ struct shmid_kernel /* private to the kernel */ + struct pid *shm_lprid; + struct ucounts *mlock_ucounts; -+static inline bool context_present(struct context_entry *context) -+{ -+ return (context->lo & 1); -+} +- /* The task created the shm object. NULL if the task is dead. */ ++ /* ++ * The task created the shm object, for ++ * task_lock(shp->shm_creator) ++ */ + struct task_struct *shm_creator; +- struct list_head shm_clist; /* list by creator */ + - extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); - extern int dmar_find_matched_atsr_unit(struct pci_dev *dev); ++ /* ++ * List by creator. task_lock(->shm_creator) required for read/write. ++ * If list_empty(), then the creator is dead already. ++ */ ++ struct list_head shm_clist; ++ struct ipc_namespace *ns; + } __randomize_layout; -@@ -806,7 +810,6 @@ static inline void intel_iommu_debugfs_init(void) {} - #endif /* CONFIG_INTEL_IOMMU_DEBUGFS */ + /* shm_mode upper byte flags */ +@@ -115,6 +124,7 @@ static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) + struct shmid_kernel *shp; - extern const struct attribute_group *intel_iommu_groups[]; --bool context_present(struct context_entry *context); - struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, - u8 devfn, int alloc); + shp = container_of(ipcp, struct shmid_kernel, shm_perm); ++ WARN_ON(ns != shp->ns); -diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h -index 86af6f0a00a2a..ca98aeadcc804 100644 ---- a/include/linux/io-pgtable.h -+++ b/include/linux/io-pgtable.h -@@ -74,17 +74,22 @@ struct io_pgtable_cfg { - * to support up to 35 bits PA where the bit32, bit33 and bit34 are - * encoded in the bit9, bit4 and bit5 of the PTE respectively. - * -+ * IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT: (ARM v7s format) MediaTek IOMMUs -+ * extend the translation table base support up to 35 bits PA, the -+ * encoding format is same with IO_PGTABLE_QUIRK_ARM_MTK_EXT. -+ * - * IO_PGTABLE_QUIRK_ARM_TTBR1: (ARM LPAE format) Configure the table - * for use in the upper half of a split address space. - * - * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the outer-cacheability - * attributes set in the TCR for a non-coherent page-table walker. - */ -- #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) -- #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) -- #define IO_PGTABLE_QUIRK_ARM_MTK_EXT BIT(3) -- #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) -- #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6) -+ #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) -+ #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) -+ #define IO_PGTABLE_QUIRK_ARM_MTK_EXT BIT(3) -+ #define IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT BIT(4) -+ #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) -+ #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6) - unsigned long quirks; - unsigned long pgsize_bitmap; - unsigned int ias; -diff --git a/include/linux/iomap.h b/include/linux/iomap.h -index 24f8489583ca7..829f2325ecbab 100644 ---- a/include/linux/iomap.h -+++ b/include/linux/iomap.h -@@ -330,12 +330,19 @@ struct iomap_dio_ops { - */ - #define IOMAP_DIO_OVERWRITE_ONLY (1 << 1) + if (shp->shm_nattch) { + shp->shm_perm.mode |= SHM_DEST; +@@ -225,10 +235,43 @@ static void shm_rcu_free(struct rcu_head *head) + kfree(shp); + } +-static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) +/* -+ * When a page fault occurs, return a partial synchronous result and allow -+ * the caller to retry the rest of the operation after dealing with the page -+ * fault. ++ * It has to be called with shp locked. ++ * It must be called before ipc_rmid() + */ -+#define IOMAP_DIO_PARTIAL (1 << 2) ++static inline void shm_clist_rm(struct shmid_kernel *shp) + { +- list_del(&s->shm_clist); +- ipc_rmid(&shm_ids(ns), &s->shm_perm); ++ struct task_struct *creator; + - ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, - const struct iomap_ops *ops, const struct iomap_dio_ops *dops, -- unsigned int dio_flags); -+ unsigned int dio_flags, size_t done_before); - struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, - const struct iomap_ops *ops, const struct iomap_dio_ops *dops, -- unsigned int dio_flags); -+ unsigned int dio_flags, size_t done_before); - ssize_t iomap_dio_complete(struct iomap_dio *dio); - int iomap_dio_iopoll(struct kiocb *kiocb, bool spin); - -diff --git a/include/linux/ioport.h b/include/linux/ioport.h -index 8359c50f99884..ec5f71f7135b0 100644 ---- a/include/linux/ioport.h -+++ b/include/linux/ioport.h -@@ -262,6 +262,8 @@ resource_union(struct resource *r1, struct resource *r2, struct resource *r) - #define request_muxed_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name), IORESOURCE_MUXED) - #define __request_mem_region(start,n,name, excl) __request_region(&iomem_resource, (start), (n), (name), excl) - #define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name), 0) -+#define request_mem_region_muxed(start, n, name) \ -+ __request_region(&iomem_resource, (start), (n), (name), IORESOURCE_MUXED) - #define request_mem_region_exclusive(start,n,name) \ - __request_region(&iomem_resource, (start), (n), (name), IORESOURCE_EXCLUSIVE) - #define rename_region(region, newname) do { (region)->name = (newname); } while (0) -diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h -index 3f53bc27a19bf..3d088a88f8320 100644 ---- a/include/linux/ioprio.h -+++ b/include/linux/ioprio.h -@@ -11,7 +11,7 @@ - /* - * Default IO priority. - */ --#define IOPRIO_DEFAULT IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM) -+#define IOPRIO_DEFAULT IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0) - - /* - * Check that a priority value has a valid class. -diff --git a/include/linux/iova.h b/include/linux/iova.h -index 71d8a2de66354..6b6cc104e300d 100644 ---- a/include/linux/iova.h -+++ b/include/linux/iova.h -@@ -133,7 +133,7 @@ static inline unsigned long iova_pfn(struct iova_domain *iovad, dma_addr_t iova) - return iova >> iova_shift(iovad); - } - --#if IS_ENABLED(CONFIG_IOMMU_IOVA) -+#if IS_REACHABLE(CONFIG_IOMMU_IOVA) - int iova_cache_get(void); - void iova_cache_put(void); - -diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h -index 05e22770af517..b75395ec8d521 100644 ---- a/include/linux/ipc_namespace.h -+++ b/include/linux/ipc_namespace.h -@@ -131,6 +131,16 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) - return ns; - } - -+static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns) -+{ -+ if (ns) { -+ if (refcount_inc_not_zero(&ns->ns.count)) -+ return ns; -+ } ++ /* ensure that shm_creator does not disappear */ ++ rcu_read_lock(); + -+ return NULL; -+} ++ /* ++ * A concurrent exit_shm may do a list_del_init() as well. ++ * Just do nothing if exit_shm already did the work ++ */ ++ if (!list_empty(&shp->shm_clist)) { ++ /* ++ * shp->shm_creator is guaranteed to be valid *only* ++ * if shp->shm_clist is not empty. ++ */ ++ creator = shp->shm_creator; + - extern void put_ipc_ns(struct ipc_namespace *ns); - #else - static inline struct ipc_namespace *copy_ipcs(unsigned long flags, -@@ -147,6 +157,11 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) - return ns; - } - -+static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns) -+{ -+ return ns; ++ task_lock(creator); ++ /* ++ * list_del_init() is a nop if the entry was already removed ++ * from the list. ++ */ ++ list_del_init(&shp->shm_clist); ++ task_unlock(creator); ++ } ++ rcu_read_unlock(); +} + - static inline void put_ipc_ns(struct ipc_namespace *ns) - { ++static inline void shm_rmid(struct shmid_kernel *s) ++{ ++ shm_clist_rm(s); ++ ipc_rmid(&shm_ids(s->ns), &s->shm_perm); } -diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h -index ef4a69865737c..d1f3864307959 100644 ---- a/include/linux/ipv6.h -+++ b/include/linux/ipv6.h -@@ -51,7 +51,7 @@ struct ipv6_devconf { - __s32 use_optimistic; - #endif - #ifdef CONFIG_IPV6_MROUTE -- __s32 mc_forwarding; -+ atomic_t mc_forwarding; - #endif - __s32 disable_ipv6; - __s32 drop_unicast_in_l2_multicast; -@@ -132,6 +132,7 @@ struct inet6_skb_parm { - __u16 dsthao; - #endif - __u16 frag_max_size; -+ __u16 srhoff; - - #define IP6SKB_XFRM_TRANSFORMED 1 - #define IP6SKB_FORWARDED 2 -@@ -141,6 +142,7 @@ struct inet6_skb_parm { - #define IP6SKB_HOPBYHOP 32 - #define IP6SKB_L3SLAVE 64 - #define IP6SKB_JUMBOGRAM 128 -+#define IP6SKB_SEG6 256 - }; - - #if defined(CONFIG_NET_L3_MASTER_DEV) -@@ -282,7 +284,6 @@ struct ipv6_pinfo { - __be32 rcv_flowinfo; - - __u32 dst_cookie; -- __u32 rx_dst_cookie; - - struct ipv6_mc_socklist __rcu *ipv6_mc_list; - struct ipv6_ac_socklist *ipv6_ac_list; -diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h -index 600c10da321a7..747f40e0c3260 100644 ---- a/include/linux/irqflags.h -+++ b/include/linux/irqflags.h -@@ -20,13 +20,13 @@ - #ifdef CONFIG_PROVE_LOCKING - extern void lockdep_softirqs_on(unsigned long ip); - extern void lockdep_softirqs_off(unsigned long ip); -- extern void lockdep_hardirqs_on_prepare(unsigned long ip); -+ extern void lockdep_hardirqs_on_prepare(void); - extern void lockdep_hardirqs_on(unsigned long ip); - extern void lockdep_hardirqs_off(unsigned long ip); - #else - static inline void lockdep_softirqs_on(unsigned long ip) { } - static inline void lockdep_softirqs_off(unsigned long ip) { } -- static inline void lockdep_hardirqs_on_prepare(unsigned long ip) { } -+ static inline void lockdep_hardirqs_on_prepare(void) { } - static inline void lockdep_hardirqs_on(unsigned long ip) { } - static inline void lockdep_hardirqs_off(unsigned long ip) { } - #endif -diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h -index fd933c45281af..d63b8106796e2 100644 ---- a/include/linux/jbd2.h -+++ b/include/linux/jbd2.h -@@ -1295,7 +1295,7 @@ struct journal_s - * Clean-up after fast commit or full commit. JBD2 calls this function - * after every commit operation. - */ -- void (*j_fc_cleanup_callback)(struct journal_s *journal, int); -+ void (*j_fc_cleanup_callback)(struct journal_s *journal, int full, tid_t tid); - /** - * @j_fc_replay_callback: -diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h -index 48b9b2a82767d..019e55c13248b 100644 ---- a/include/linux/jump_label.h -+++ b/include/linux/jump_label.h -@@ -261,9 +261,9 @@ extern void static_key_disable_cpuslocked(struct static_key *key); - #include <linux/atomic.h> - #include <linux/bug.h> --static inline int static_key_count(struct static_key *key) -+static __always_inline int static_key_count(struct static_key *key) +@@ -283,7 +326,7 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) + shm_file = shp->shm_file; + shp->shm_file = NULL; + ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; +- shm_rmid(ns, shp); ++ shm_rmid(shp); + shm_unlock(shp); + if (!is_file_hugepages(shm_file)) + shmem_lock(shm_file, 0, shp->mlock_ucounts); +@@ -306,10 +349,10 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) + * + * 2) sysctl kernel.shm_rmid_forced is set to 1. + */ +-static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) ++static bool shm_may_destroy(struct shmid_kernel *shp) { -- return atomic_read(&key->enabled); -+ return arch_atomic_read(&key->enabled); + return (shp->shm_nattch == 0) && +- (ns->shm_rmid_forced || ++ (shp->ns->shm_rmid_forced || + (shp->shm_perm.mode & SHM_DEST)); } - static __always_inline void jump_label_init(void) -diff --git a/include/linux/kasan.h b/include/linux/kasan.h -index dd874a1ee862a..f407e937241af 100644 ---- a/include/linux/kasan.h -+++ b/include/linux/kasan.h -@@ -461,12 +461,12 @@ static inline void kasan_release_vmalloc(unsigned long start, - * allocations with real shadow memory. With KASAN vmalloc, the special - * case is unnecessary, as the work is handled in the generic case. - */ --int kasan_module_alloc(void *addr, size_t size); -+int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask); - void kasan_free_shadow(const struct vm_struct *vm); +@@ -340,7 +383,7 @@ static void shm_close(struct vm_area_struct *vma) + ipc_update_pid(&shp->shm_lprid, task_tgid(current)); + shp->shm_dtim = ktime_get_real_seconds(); + shp->shm_nattch--; +- if (shm_may_destroy(ns, shp)) ++ if (shm_may_destroy(shp)) + shm_destroy(ns, shp); + else + shm_unlock(shp); +@@ -361,10 +404,10 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data) + * + * As shp->* are changed under rwsem, it's safe to skip shp locking. + */ +- if (shp->shm_creator != NULL) ++ if (!list_empty(&shp->shm_clist)) + return 0; - #else /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */ +- if (shm_may_destroy(ns, shp)) { ++ if (shm_may_destroy(shp)) { + shm_lock_by_ptr(shp); + shm_destroy(ns, shp); + } +@@ -382,48 +425,97 @@ void shm_destroy_orphaned(struct ipc_namespace *ns) + /* Locking assumes this will only be called with task == current */ + void exit_shm(struct task_struct *task) + { +- struct ipc_namespace *ns = task->nsproxy->ipc_ns; +- struct shmid_kernel *shp, *n; ++ for (;;) { ++ struct shmid_kernel *shp; ++ struct ipc_namespace *ns; --static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } -+static inline int kasan_module_alloc(void *addr, size_t size, gfp_t gfp_mask) { return 0; } - static inline void kasan_free_shadow(const struct vm_struct *vm) {} +- if (list_empty(&task->sysvshm.shm_clist)) +- return; ++ task_lock(task); ++ ++ if (list_empty(&task->sysvshm.shm_clist)) { ++ task_unlock(task); ++ break; ++ } ++ ++ shp = list_first_entry(&task->sysvshm.shm_clist, struct shmid_kernel, ++ shm_clist); - #endif /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */ -diff --git a/include/linux/kernel.h b/include/linux/kernel.h -index 2776423a587e4..f56cd8879a594 100644 ---- a/include/linux/kernel.h -+++ b/include/linux/kernel.h -@@ -277,7 +277,7 @@ static inline char *hex_byte_pack_upper(char *buf, u8 byte) - return buf; - } +- /* +- * If kernel.shm_rmid_forced is not set then only keep track of +- * which shmids are orphaned, so that a later set of the sysctl +- * can clean them up. +- */ +- if (!ns->shm_rmid_forced) { +- down_read(&shm_ids(ns).rwsem); +- list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist) +- shp->shm_creator = NULL; + /* +- * Only under read lock but we are only called on current +- * so no entry on the list will be shared. ++ * 1) Get pointer to the ipc namespace. It is worth to say ++ * that this pointer is guaranteed to be valid because ++ * shp lifetime is always shorter than namespace lifetime ++ * in which shp lives. ++ * We taken task_lock it means that shp won't be freed. + */ +- list_del(&task->sysvshm.shm_clist); +- up_read(&shm_ids(ns).rwsem); +- return; +- } ++ ns = shp->ns; --extern int hex_to_bin(char ch); -+extern int hex_to_bin(unsigned char ch); - extern int __must_check hex2bin(u8 *dst, const char *src, size_t count); - extern char *bin2hex(char *dst, const void *src, size_t count); +- /* +- * Destroy all already created segments, that were not yet mapped, +- * and mark any mapped as orphan to cover the sysctl toggling. +- * Destroy is skipped if shm_may_destroy() returns false. +- */ +- down_write(&shm_ids(ns).rwsem); +- list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) { +- shp->shm_creator = NULL; ++ /* ++ * 2) If kernel.shm_rmid_forced is not set then only keep track of ++ * which shmids are orphaned, so that a later set of the sysctl ++ * can clean them up. ++ */ ++ if (!ns->shm_rmid_forced) ++ goto unlink_continue; -diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h -index 44ae1a7eb9e39..69ae6b2784645 100644 ---- a/include/linux/kernel_stat.h -+++ b/include/linux/kernel_stat.h -@@ -102,6 +102,7 @@ extern void account_system_index_time(struct task_struct *, u64, - enum cpu_usage_stat); - extern void account_steal_time(u64); - extern void account_idle_time(u64); -+extern u64 get_idle_time(struct kernel_cpustat *kcs, int cpu); +- if (shm_may_destroy(ns, shp)) { +- shm_lock_by_ptr(shp); +- shm_destroy(ns, shp); ++ /* ++ * 3) get a reference to the namespace. ++ * The refcount could be already 0. If it is 0, then ++ * the shm objects will be free by free_ipc_work(). ++ */ ++ ns = get_ipc_ns_not_zero(ns); ++ if (!ns) { ++unlink_continue: ++ list_del_init(&shp->shm_clist); ++ task_unlock(task); ++ continue; + } +- } - #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - static inline void account_process_tick(struct task_struct *tsk, int user) -diff --git a/include/linux/kexec.h b/include/linux/kexec.h -index 0c994ae37729e..cf042d41c87b9 100644 ---- a/include/linux/kexec.h -+++ b/include/linux/kexec.h -@@ -187,14 +187,6 @@ void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name); - int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, - unsigned long buf_len); - void *arch_kexec_kernel_image_load(struct kimage *image); --int arch_kexec_apply_relocations_add(struct purgatory_info *pi, -- Elf_Shdr *section, -- const Elf_Shdr *relsec, -- const Elf_Shdr *symtab); --int arch_kexec_apply_relocations(struct purgatory_info *pi, -- Elf_Shdr *section, -- const Elf_Shdr *relsec, -- const Elf_Shdr *symtab); - int arch_kimage_file_post_load_cleanup(struct kimage *image); - #ifdef CONFIG_KEXEC_SIG - int arch_kexec_kernel_verify_sig(struct kimage *image, void *buf, -@@ -223,6 +215,44 @@ extern int crash_exclude_mem_range(struct crash_mem *mem, - unsigned long long mend); - extern int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map, - void **addr, unsigned long *sz); +- /* Remove the list head from any segments still attached. */ +- list_del(&task->sysvshm.shm_clist); +- up_write(&shm_ids(ns).rwsem); ++ /* ++ * 4) get a reference to shp. ++ * This cannot fail: shm_clist_rm() is called before ++ * ipc_rmid(), thus the refcount cannot be 0. ++ */ ++ WARN_ON(!ipc_rcu_getref(&shp->shm_perm)); + -+#ifndef arch_kexec_apply_relocations_add -+/* -+ * arch_kexec_apply_relocations_add - apply relocations of type RELA -+ * @pi: Purgatory to be relocated. -+ * @section: Section relocations applying to. -+ * @relsec: Section containing RELAs. -+ * @symtab: Corresponding symtab. -+ * -+ * Return: 0 on success, negative errno on error. -+ */ -+static inline int -+arch_kexec_apply_relocations_add(struct purgatory_info *pi, Elf_Shdr *section, -+ const Elf_Shdr *relsec, const Elf_Shdr *symtab) -+{ -+ pr_err("RELA relocation unsupported.\n"); -+ return -ENOEXEC; -+} -+#endif ++ /* ++ * 5) unlink the shm segment from the list of segments ++ * created by current. ++ * This must be done last. After unlinking, ++ * only the refcounts obtained above prevent IPC_RMID ++ * from destroying the segment or the namespace. ++ */ ++ list_del_init(&shp->shm_clist); + -+#ifndef arch_kexec_apply_relocations -+/* -+ * arch_kexec_apply_relocations - apply relocations of type REL -+ * @pi: Purgatory to be relocated. -+ * @section: Section relocations applying to. -+ * @relsec: Section containing RELs. -+ * @symtab: Corresponding symtab. -+ * -+ * Return: 0 on success, negative errno on error. -+ */ -+static inline int -+arch_kexec_apply_relocations(struct purgatory_info *pi, Elf_Shdr *section, -+ const Elf_Shdr *relsec, const Elf_Shdr *symtab) -+{ -+ pr_err("REL relocation unsupported.\n"); -+ return -ENOEXEC; -+} -+#endif - #endif /* CONFIG_KEXEC_FILE */ ++ task_unlock(task); ++ ++ /* ++ * 6) we have all references ++ * Thus lock & if needed destroy shp. ++ */ ++ down_write(&shm_ids(ns).rwsem); ++ shm_lock_by_ptr(shp); ++ /* ++ * rcu_read_lock was implicitly taken in shm_lock_by_ptr, it's ++ * safe to call ipc_rcu_putref here ++ */ ++ ipc_rcu_putref(&shp->shm_perm, shm_rcu_free); ++ ++ if (ipc_valid_object(&shp->shm_perm)) { ++ if (shm_may_destroy(shp)) ++ shm_destroy(ns, shp); ++ else ++ shm_unlock(shp); ++ } else { ++ /* ++ * Someone else deleted the shp from namespace ++ * idr/kht while we have waited. ++ * Just unlock and continue. ++ */ ++ shm_unlock(shp); ++ } ++ ++ up_write(&shm_ids(ns).rwsem); ++ put_ipc_ns(ns); /* paired with get_ipc_ns_not_zero */ ++ } + } - #ifdef CONFIG_KEXEC_ELF -@@ -422,6 +452,12 @@ static inline int kexec_crash_loaded(void) { return 0; } - #define kexec_in_progress false - #endif /* CONFIG_KEXEC_CORE */ + static vm_fault_t shm_fault(struct vm_fault *vmf) +@@ -680,7 +772,11 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) + if (error < 0) + goto no_id; -+#ifdef CONFIG_KEXEC_SIG -+void set_kexec_sig_enforced(void); -+#else -+static inline void set_kexec_sig_enforced(void) {} -+#endif ++ shp->ns = ns; + - #endif /* !defined(__ASSEBMLY__) */ ++ task_lock(current); + list_add(&shp->shm_clist, ¤t->sysvshm.shm_clist); ++ task_unlock(current); - #endif /* LINUX_KEXEC_H */ -diff --git a/include/linux/kfence.h b/include/linux/kfence.h -index 3fe6dd8a18c19..3c75209a545e1 100644 ---- a/include/linux/kfence.h -+++ b/include/linux/kfence.h -@@ -14,6 +14,9 @@ + /* + * shmid gets reported as "inode#" in /proc/pid/maps. +@@ -1573,7 +1669,8 @@ out_nattch: + down_write(&shm_ids(ns).rwsem); + shp = shm_lock(ns, shmid); + shp->shm_nattch--; +- if (shm_may_destroy(ns, shp)) ++ ++ if (shm_may_destroy(shp)) + shm_destroy(ns, shp); + else + shm_unlock(shp); +diff --git a/ipc/util.c b/ipc/util.c +index d48d8cfa1f3fa..fa2d86ef3fb80 100644 +--- a/ipc/util.c ++++ b/ipc/util.c +@@ -447,8 +447,8 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids, + static void ipc_kht_remove(struct ipc_ids *ids, struct kern_ipc_perm *ipcp) + { + if (ipcp->key != IPC_PRIVATE) +- rhashtable_remove_fast(&ids->key_ht, &ipcp->khtnode, +- ipc_kht_params); ++ WARN_ON_ONCE(rhashtable_remove_fast(&ids->key_ht, &ipcp->khtnode, ++ ipc_kht_params)); + } - #ifdef CONFIG_KFENCE + /** +@@ -498,7 +498,7 @@ void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp) + { + int idx = ipcid_to_idx(ipcp->id); -+#include <linux/atomic.h> -+#include <linux/static_key.h> -+ - /* - * We allocate an even number of pages, as it simplifies calculations to map - * address to metadata indices; effectively, the very first page serves as an -@@ -22,13 +25,8 @@ - #define KFENCE_POOL_SIZE ((CONFIG_KFENCE_NUM_OBJECTS + 1) * 2 * PAGE_SIZE) - extern char *__kfence_pool; +- idr_remove(&ids->ipcs_idr, idx); ++ WARN_ON_ONCE(idr_remove(&ids->ipcs_idr, idx) != ipcp); + ipc_kht_remove(ids, ipcp); + ids->in_use--; + ipcp->deleted = true; +diff --git a/kernel/Makefile b/kernel/Makefile +index 4df609be42d07..599cb926449a6 100644 +--- a/kernel/Makefile ++++ b/kernel/Makefile +@@ -59,7 +59,7 @@ obj-$(CONFIG_FREEZER) += freezer.o + obj-$(CONFIG_PROFILING) += profile.o + obj-$(CONFIG_STACKTRACE) += stacktrace.o + obj-y += time/ +-obj-$(CONFIG_FUTEX) += futex.o ++obj-$(CONFIG_FUTEX) += futex/ + obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o + obj-$(CONFIG_SMP) += smp.o + ifneq ($(CONFIG_SMP),y) +@@ -113,7 +113,8 @@ obj-$(CONFIG_CPU_PM) += cpu_pm.o + obj-$(CONFIG_BPF) += bpf/ + obj-$(CONFIG_KCSAN) += kcsan/ + obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o +-obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call.o ++obj-$(CONFIG_HAVE_STATIC_CALL) += static_call.o ++obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call_inline.o + obj-$(CONFIG_CFI_CLANG) += cfi.o --#ifdef CONFIG_KFENCE_STATIC_KEYS --#include <linux/static_key.h> - DECLARE_STATIC_KEY_FALSE(kfence_allocation_key); --#else --#include <linux/atomic.h> - extern atomic_t kfence_allocation_gate; --#endif + obj-$(CONFIG_PERF_EVENTS) += events/ +diff --git a/kernel/acct.c b/kernel/acct.c +index 23a7ab8e6cbc8..2b5cc63eb295b 100644 +--- a/kernel/acct.c ++++ b/kernel/acct.c +@@ -331,6 +331,8 @@ static comp_t encode_comp_t(unsigned long value) + exp++; + } + ++ if (exp > (((comp_t) ~0U) >> MANTSIZE)) ++ return (comp_t) ~0U; + /* + * Clean it up and polish it off. + */ +diff --git a/kernel/async.c b/kernel/async.c +index b8d7a663497f9..b2c4ba5686ee4 100644 +--- a/kernel/async.c ++++ b/kernel/async.c +@@ -205,9 +205,6 @@ async_cookie_t async_schedule_node_domain(async_func_t func, void *data, + atomic_inc(&entry_count); + spin_unlock_irqrestore(&async_lock, flags); + +- /* mark that this task has queued an async job, used by module init */ +- current->flags |= PF_USED_ASYNC; +- + /* schedule for execution */ + queue_work_node(node, system_unbound_wq, &entry->work); +diff --git a/kernel/audit.c b/kernel/audit.c +index 121d37e700a62..94ded5de91317 100644 +--- a/kernel/audit.c ++++ b/kernel/audit.c +@@ -541,20 +541,22 @@ static void kauditd_printk_skb(struct sk_buff *skb) /** - * is_kfence_address() - check if an address belongs to KFENCE pool -@@ -116,13 +114,16 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags); + * kauditd_rehold_skb - Handle a audit record send failure in the hold queue + * @skb: audit record ++ * @error: error code (unused) + * + * Description: + * This should only be used by the kauditd_thread when it fails to flush the + * hold queue. */ - static __always_inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) +-static void kauditd_rehold_skb(struct sk_buff *skb) ++static void kauditd_rehold_skb(struct sk_buff *skb, __always_unused int error) { --#ifdef CONFIG_KFENCE_STATIC_KEYS -- if (static_branch_unlikely(&kfence_allocation_key)) -+#if defined(CONFIG_KFENCE_STATIC_KEYS) || CONFIG_KFENCE_SAMPLE_INTERVAL == 0 -+ if (!static_branch_unlikely(&kfence_allocation_key)) -+ return NULL; - #else -- if (unlikely(!atomic_read(&kfence_allocation_gate))) -+ if (!static_branch_likely(&kfence_allocation_key)) -+ return NULL; - #endif -- return __kfence_alloc(s, size, flags); -- return NULL; -+ if (likely(atomic_read(&kfence_allocation_gate))) -+ return NULL; -+ return __kfence_alloc(s, size, flags); +- /* put the record back in the queue at the same place */ +- skb_queue_head(&audit_hold_queue, skb); ++ /* put the record back in the queue */ ++ skb_queue_tail(&audit_hold_queue, skb); } /** -@@ -201,6 +202,22 @@ static __always_inline __must_check bool kfence_free(void *addr) + * kauditd_hold_skb - Queue an audit record, waiting for auditd + * @skb: audit record ++ * @error: error code + * + * Description: + * Queue the audit record, waiting for an instance of auditd. When this +@@ -564,19 +566,31 @@ static void kauditd_rehold_skb(struct sk_buff *skb) + * and queue it, if we have room. If we want to hold on to the record, but we + * don't have room, record a record lost message. */ - bool __must_check kfence_handle_page_fault(unsigned long addr, bool is_write, struct pt_regs *regs); +-static void kauditd_hold_skb(struct sk_buff *skb) ++static void kauditd_hold_skb(struct sk_buff *skb, int error) + { + /* at this point it is uncertain if we will ever send this to auditd so + * try to send the message via printk before we go any further */ + kauditd_printk_skb(skb); -+#ifdef CONFIG_PRINTK -+struct kmem_obj_info; -+/** -+ * __kfence_obj_info() - fill kmem_obj_info struct -+ * @kpp: kmem_obj_info to be filled -+ * @object: the object -+ * -+ * Return: -+ * * false - not a KFENCE object -+ * * true - a KFENCE object, filled @kpp -+ * -+ * Copies information to @kpp for KFENCE objects. -+ */ -+bool __kfence_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page); -+#endif + /* can we just silently drop the message? */ +- if (!audit_default) { +- kfree_skb(skb); +- return; ++ if (!audit_default) ++ goto drop; + - #else /* CONFIG_KFENCE */ ++ /* the hold queue is only for when the daemon goes away completely, ++ * not -EAGAIN failures; if we are in a -EAGAIN state requeue the ++ * record on the retry queue unless it's full, in which case drop it ++ */ ++ if (error == -EAGAIN) { ++ if (!audit_backlog_limit || ++ skb_queue_len(&audit_retry_queue) < audit_backlog_limit) { ++ skb_queue_tail(&audit_retry_queue, skb); ++ return; ++ } ++ audit_log_lost("kauditd retry queue overflow"); ++ goto drop; + } - static inline bool is_kfence_address(const void *addr) { return false; } -@@ -218,6 +235,14 @@ static inline bool __must_check kfence_handle_page_fault(unsigned long addr, boo - return false; - } +- /* if we have room, queue the message */ ++ /* if we have room in the hold queue, queue the message */ + if (!audit_backlog_limit || + skb_queue_len(&audit_hold_queue) < audit_backlog_limit) { + skb_queue_tail(&audit_hold_queue, skb); +@@ -585,24 +599,32 @@ static void kauditd_hold_skb(struct sk_buff *skb) -+#ifdef CONFIG_PRINTK -+struct kmem_obj_info; -+static inline bool __kfence_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page) -+{ -+ return false; -+} -+#endif -+ - #endif + /* we have no other options - drop the message */ + audit_log_lost("kauditd hold queue overflow"); ++drop: + kfree_skb(skb); + } - #endif /* _LINUX_KFENCE_H */ -diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h -index 86249476b57f4..0b35a41440ff1 100644 ---- a/include/linux/kfifo.h -+++ b/include/linux/kfifo.h -@@ -688,7 +688,7 @@ __kfifo_uint_must_check_helper( \ - * writer, you don't need extra locking to use these macro. + /** + * kauditd_retry_skb - Queue an audit record, attempt to send again to auditd + * @skb: audit record ++ * @error: error code (unused) + * + * Description: + * Not as serious as kauditd_hold_skb() as we still have a connected auditd, + * but for some reason we are having problems sending it audit records so + * queue the given record and attempt to resend. */ - #define kfifo_to_user(fifo, to, len, copied) \ --__kfifo_uint_must_check_helper( \ -+__kfifo_int_must_check_helper( \ - ({ \ - typeof((fifo) + 1) __tmp = (fifo); \ - void __user *__to = (to); \ -diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h -index e4f3bfe087570..ef8c7accbc689 100644 ---- a/include/linux/kprobes.h -+++ b/include/linux/kprobes.h -@@ -154,6 +154,8 @@ struct kretprobe { - struct kretprobe_holder *rph; - }; - -+#define KRETPROBE_MAX_DATA_SIZE 4096 +-static void kauditd_retry_skb(struct sk_buff *skb) ++static void kauditd_retry_skb(struct sk_buff *skb, __always_unused int error) + { +- /* NOTE: because records should only live in the retry queue for a +- * short period of time, before either being sent or moved to the hold +- * queue, we don't currently enforce a limit on this queue */ +- skb_queue_tail(&audit_retry_queue, skb); ++ if (!audit_backlog_limit || ++ skb_queue_len(&audit_retry_queue) < audit_backlog_limit) { ++ skb_queue_tail(&audit_retry_queue, skb); ++ return; ++ } + - struct kretprobe_instance { - union { - struct freelist_node freelist; -diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h -index 0f18df7fe8749..7e2423ffaf593 100644 ---- a/include/linux/kvm_host.h -+++ b/include/linux/kvm_host.h -@@ -15,6 +15,8 @@ - #include <linux/minmax.h> - #include <linux/mm.h> - #include <linux/mmu_notifier.h> -+#include <linux/ftrace.h> -+#include <linux/instrumentation.h> - #include <linux/preempt.h> - #include <linux/msi.h> - #include <linux/slab.h> -@@ -363,8 +365,11 @@ struct kvm_vcpu { - int last_used_slot; - }; ++ /* we have to drop the record, send it via printk as a last effort */ ++ kauditd_printk_skb(skb); ++ audit_log_lost("kauditd retry queue overflow"); ++ kfree_skb(skb); + } --/* must be called with irqs disabled */ --static __always_inline void guest_enter_irqoff(void) -+/* -+ * Start accounting time towards a guest. -+ * Must be called before entering guest context. -+ */ -+static __always_inline void guest_timing_enter_irqoff(void) + /** +@@ -640,7 +662,7 @@ static void auditd_reset(const struct auditd_connection *ac) + /* flush the retry queue to the hold queue, but don't touch the main + * queue since we need to process that normally for multicast */ + while ((skb = skb_dequeue(&audit_retry_queue))) +- kauditd_hold_skb(skb); ++ kauditd_hold_skb(skb, -ECONNREFUSED); + } + + /** +@@ -714,16 +736,18 @@ static int kauditd_send_queue(struct sock *sk, u32 portid, + struct sk_buff_head *queue, + unsigned int retry_limit, + void (*skb_hook)(struct sk_buff *skb), +- void (*err_hook)(struct sk_buff *skb)) ++ void (*err_hook)(struct sk_buff *skb, int error)) { - /* - * This is running in ioctl context so its safe to assume that it's the -@@ -373,7 +378,18 @@ static __always_inline void guest_enter_irqoff(void) - instrumentation_begin(); - vtime_account_guest_enter(); - instrumentation_end(); -+} + int rc = 0; +- struct sk_buff *skb; +- static unsigned int failed = 0; ++ struct sk_buff *skb = NULL; ++ struct sk_buff *skb_tail; ++ unsigned int failed = 0; -+/* -+ * Enter guest context and enter an RCU extended quiescent state. -+ * -+ * Between guest_context_enter_irqoff() and guest_context_exit_irqoff() it is -+ * unsafe to use any code which may directly or indirectly use RCU, tracing -+ * (including IRQ flag tracing), or lockdep. All code in this period must be -+ * non-instrumentable. -+ */ -+static __always_inline void guest_context_enter_irqoff(void) -+{ - /* - * KVM does not hold any references to rcu protected data when it - * switches CPU into a guest mode. In fact switching to a guest mode -@@ -389,16 +405,79 @@ static __always_inline void guest_enter_irqoff(void) + /* NOTE: kauditd_thread takes care of all our locking, we just use + * the netlink info passed to us (e.g. sk and portid) */ + +- while ((skb = skb_dequeue(queue))) { ++ skb_tail = skb_peek_tail(queue); ++ while ((skb != skb_tail) && (skb = skb_dequeue(queue))) { + /* call the skb_hook for each skb we touch */ + if (skb_hook) + (*skb_hook)(skb); +@@ -731,36 +755,34 @@ static int kauditd_send_queue(struct sock *sk, u32 portid, + /* can we send to anyone via unicast? */ + if (!sk) { + if (err_hook) +- (*err_hook)(skb); ++ (*err_hook)(skb, -ECONNREFUSED); + continue; + } + ++retry: + /* grab an extra skb reference in case of error */ + skb_get(skb); + rc = netlink_unicast(sk, skb, portid, 0); + if (rc < 0) { +- /* fatal failure for our queue flush attempt? */ ++ /* send failed - try a few times unless fatal error */ + if (++failed >= retry_limit || + rc == -ECONNREFUSED || rc == -EPERM) { +- /* yes - error processing for the queue */ + sk = NULL; + if (err_hook) +- (*err_hook)(skb); +- if (!skb_hook) +- goto out; +- /* keep processing with the skb_hook */ ++ (*err_hook)(skb, rc); ++ if (rc == -EAGAIN) ++ rc = 0; ++ /* continue to drain the queue */ + continue; + } else +- /* no - requeue to preserve ordering */ +- skb_queue_head(queue, skb); ++ goto retry; + } else { +- /* it worked - drop the extra reference and continue */ ++ /* skb sent - drop the extra reference and continue */ + consume_skb(skb); + failed = 0; + } } + +-out: + return (rc >= 0 ? 0 : rc); } --static __always_inline void guest_exit_irqoff(void) -+/* -+ * Deprecated. Architectures should move to guest_timing_enter_irqoff() and -+ * guest_state_enter_irqoff(). -+ */ -+static __always_inline void guest_enter_irqoff(void) -+{ -+ guest_timing_enter_irqoff(); -+ guest_context_enter_irqoff(); -+} +@@ -1542,6 +1564,20 @@ static void audit_receive(struct sk_buff *skb) + nlh = nlmsg_next(nlh, &len); + } + audit_ctl_unlock(); + -+/** -+ * guest_state_enter_irqoff - Fixup state when entering a guest -+ * -+ * Entry to a guest will enable interrupts, but the kernel state is interrupts -+ * disabled when this is invoked. Also tell RCU about it. -+ * -+ * 1) Trace interrupts on state -+ * 2) Invoke context tracking if enabled to adjust RCU state -+ * 3) Tell lockdep that interrupts are enabled -+ * -+ * Invoked from architecture specific code before entering a guest. -+ * Must be called with interrupts disabled and the caller must be -+ * non-instrumentable. -+ * The caller has to invoke guest_timing_enter_irqoff() before this. -+ * -+ * Note: this is analogous to exit_to_user_mode(). -+ */ -+static __always_inline void guest_state_enter_irqoff(void) -+{ -+ instrumentation_begin(); -+ trace_hardirqs_on_prepare(); -+ lockdep_hardirqs_on_prepare(); -+ instrumentation_end(); ++ /* can't block with the ctrl lock, so penalize the sender now */ ++ if (audit_backlog_limit && ++ (skb_queue_len(&audit_queue) > audit_backlog_limit)) { ++ DECLARE_WAITQUEUE(wait, current); + -+ guest_context_enter_irqoff(); -+ lockdep_hardirqs_on(CALLER_ADDR0); -+} ++ /* wake kauditd to try and flush the queue */ ++ wake_up_interruptible(&kauditd_wait); + -+/* -+ * Exit guest context and exit an RCU extended quiescent state. -+ * -+ * Between guest_context_enter_irqoff() and guest_context_exit_irqoff() it is -+ * unsafe to use any code which may directly or indirectly use RCU, tracing -+ * (including IRQ flag tracing), or lockdep. All code in this period must be -+ * non-instrumentable. -+ */ -+static __always_inline void guest_context_exit_irqoff(void) - { - context_tracking_guest_exit(); -+} ++ add_wait_queue_exclusive(&audit_backlog_wait, &wait); ++ set_current_state(TASK_UNINTERRUPTIBLE); ++ schedule_timeout(audit_backlog_wait_time); ++ remove_wait_queue(&audit_backlog_wait, &wait); ++ } + } -+/* -+ * Stop accounting time towards a guest. -+ * Must be called after exiting guest context. -+ */ -+static __always_inline void guest_timing_exit_irqoff(void) -+{ - instrumentation_begin(); - /* Flush the guest cputime we spent on the guest */ - vtime_account_guest_exit(); - instrumentation_end(); + /* Log information about who is connecting to the audit multicast socket */ +@@ -1609,7 +1645,8 @@ static int __net_init audit_net_init(struct net *net) + audit_panic("cannot initialize netlink socket in namespace"); + return -ENOMEM; + } +- aunet->sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; ++ /* limit the timeout in case auditd is blocked/stopped */ ++ aunet->sk->sk_sndtimeo = HZ / 10; + + return 0; } +@@ -1825,7 +1862,9 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, + * task_tgid_vnr() since auditd_pid is set in audit_receive_msg() + * using a PID anchored in the caller's namespace + * 2. generator holding the audit_cmd_mutex - we don't want to block +- * while holding the mutex */ ++ * while holding the mutex, although we do penalize the sender ++ * later in audit_receive() when it is safe to block ++ */ + if (!(auditd_test_task(current) || audit_ctl_owner_current())) { + long stime = audit_backlog_wait_time; -+/* -+ * Deprecated. Architectures should move to guest_state_exit_irqoff() and -+ * guest_timing_exit_irqoff(). -+ */ -+static __always_inline void guest_exit_irqoff(void) -+{ -+ guest_context_exit_irqoff(); -+ guest_timing_exit_irqoff(); -+} -+ - static inline void guest_exit(void) - { - unsigned long flags; -@@ -408,6 +487,33 @@ static inline void guest_exit(void) - local_irq_restore(flags); +diff --git a/kernel/audit.h b/kernel/audit.h +index d6a2c899a8dbf..b2ef4c0d3ec03 100644 +--- a/kernel/audit.h ++++ b/kernel/audit.h +@@ -194,6 +194,10 @@ struct audit_context { + struct { + char *name; + } module; ++ struct { ++ struct audit_ntp_data ntp_data; ++ struct timespec64 tk_injoffset; ++ } time; + }; + int fds[2]; + struct audit_proctitle proctitle; +diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c +index 60739d5e3373f..c428312938e95 100644 +--- a/kernel/audit_fsnotify.c ++++ b/kernel/audit_fsnotify.c +@@ -102,6 +102,7 @@ struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pa + + ret = fsnotify_add_inode_mark(&audit_mark->mark, inode, true); + if (ret < 0) { ++ audit_mark->path = NULL; + fsnotify_put_mark(&audit_mark->mark); + audit_mark = ERR_PTR(ret); + } +diff --git a/kernel/auditsc.c b/kernel/auditsc.c +index b1cb1dbf7417f..2f036bab3c28f 100644 +--- a/kernel/auditsc.c ++++ b/kernel/auditsc.c +@@ -1219,6 +1219,53 @@ static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name) + from_kuid(&init_user_ns, name->fcap.rootid)); } -+/** -+ * guest_state_exit_irqoff - Establish state when returning from guest mode -+ * -+ * Entry from a guest disables interrupts, but guest mode is traced as -+ * interrupts enabled. Also with NO_HZ_FULL RCU might be idle. -+ * -+ * 1) Tell lockdep that interrupts are disabled -+ * 2) Invoke context tracking if enabled to reactivate RCU -+ * 3) Trace interrupts off state -+ * -+ * Invoked from architecture specific code after exiting a guest. -+ * Must be invoked with interrupts disabled and the caller must be -+ * non-instrumentable. -+ * The caller has to invoke guest_timing_exit_irqoff() after this. -+ * -+ * Note: this is analogous to enter_from_user_mode(). -+ */ -+static __always_inline void guest_state_exit_irqoff(void) ++static void audit_log_time(struct audit_context *context, struct audit_buffer **ab) +{ -+ lockdep_hardirqs_off(CALLER_ADDR0); -+ guest_context_exit_irqoff(); ++ const struct audit_ntp_data *ntp = &context->time.ntp_data; ++ const struct timespec64 *tk = &context->time.tk_injoffset; ++ static const char * const ntp_name[] = { ++ "offset", ++ "freq", ++ "status", ++ "tai", ++ "tick", ++ "adjust", ++ }; ++ int type; + -+ instrumentation_begin(); -+ trace_hardirqs_off_finish(); -+ instrumentation_end(); ++ if (context->type == AUDIT_TIME_ADJNTPVAL) { ++ for (type = 0; type < AUDIT_NTP_NVALS; type++) { ++ if (ntp->vals[type].newval != ntp->vals[type].oldval) { ++ if (!*ab) { ++ *ab = audit_log_start(context, ++ GFP_KERNEL, ++ AUDIT_TIME_ADJNTPVAL); ++ if (!*ab) ++ return; ++ } ++ audit_log_format(*ab, "op=%s old=%lli new=%lli", ++ ntp_name[type], ++ ntp->vals[type].oldval, ++ ntp->vals[type].newval); ++ audit_log_end(*ab); ++ *ab = NULL; ++ } ++ } ++ } ++ if (tk->tv_sec != 0 || tk->tv_nsec != 0) { ++ if (!*ab) { ++ *ab = audit_log_start(context, GFP_KERNEL, ++ AUDIT_TIME_INJOFFSET); ++ if (!*ab) ++ return; ++ } ++ audit_log_format(*ab, "sec=%lli nsec=%li", ++ (long long)tk->tv_sec, tk->tv_nsec); ++ audit_log_end(*ab); ++ *ab = NULL; ++ } +} + - static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) + static void show_special(struct audit_context *context, int *call_panic) { - /* -@@ -1018,6 +1124,8 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, - struct kvm_enable_cap *cap); - long kvm_arch_vm_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg); -+long kvm_arch_vm_compat_ioctl(struct file *filp, unsigned int ioctl, -+ unsigned long arg); + struct audit_buffer *ab; +@@ -1327,6 +1374,11 @@ static void show_special(struct audit_context *context, int *call_panic) + audit_log_format(ab, "(null)"); - int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu); - int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu); -@@ -1127,7 +1235,7 @@ static inline void kvm_arch_end_assignment(struct kvm *kvm) - { + break; ++ case AUDIT_TIME_ADJNTPVAL: ++ case AUDIT_TIME_INJOFFSET: ++ /* this call deviates from the rest, eating the buffer */ ++ audit_log_time(context, &ab); ++ break; + } + audit_log_end(ab); } +@@ -2564,31 +2616,26 @@ void __audit_fanotify(unsigned int response) --static inline bool kvm_arch_has_assigned_device(struct kvm *kvm) -+static __always_inline bool kvm_arch_has_assigned_device(struct kvm *kvm) + void __audit_tk_injoffset(struct timespec64 offset) { - return false; - } -@@ -1806,6 +1914,8 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp, - void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, - unsigned long start, unsigned long end); +- audit_log(audit_context(), GFP_KERNEL, AUDIT_TIME_INJOFFSET, +- "sec=%lli nsec=%li", +- (long long)offset.tv_sec, offset.tv_nsec); +-} +- +-static void audit_log_ntp_val(const struct audit_ntp_data *ad, +- const char *op, enum audit_ntp_type type) +-{ +- const struct audit_ntp_val *val = &ad->vals[type]; +- +- if (val->newval == val->oldval) +- return; ++ struct audit_context *context = audit_context(); -+void kvm_arch_guest_memory_reclaimed(struct kvm *kvm); -+ - #ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE - int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu); - #else -diff --git a/include/linux/libata.h b/include/linux/libata.h -index c0c64f03e1074..a64e12605d31d 100644 ---- a/include/linux/libata.h -+++ b/include/linux/libata.h -@@ -394,7 +394,7 @@ enum { - /* This should match the actual table size of - * ata_eh_cmd_timeout_table in libata-eh.c. - */ -- ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 6, -+ ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 7, +- audit_log(audit_context(), GFP_KERNEL, AUDIT_TIME_ADJNTPVAL, +- "op=%s old=%lli new=%lli", op, val->oldval, val->newval); ++ /* only set type if not already set by NTP */ ++ if (!context->type) ++ context->type = AUDIT_TIME_INJOFFSET; ++ memcpy(&context->time.tk_injoffset, &offset, sizeof(offset)); + } - /* Horkage types. May be set by libata or controller on drives - (some horkage may be drive/controller pair dependent */ -diff --git a/include/linux/list.h b/include/linux/list.h -index f2af4b4aa4e9a..d206ae93c06da 100644 ---- a/include/linux/list.h -+++ b/include/linux/list.h -@@ -33,7 +33,7 @@ - static inline void INIT_LIST_HEAD(struct list_head *list) + void __audit_ntp_log(const struct audit_ntp_data *ad) { - WRITE_ONCE(list->next, list); -- list->prev = list; -+ WRITE_ONCE(list->prev, list); +- audit_log_ntp_val(ad, "offset", AUDIT_NTP_OFFSET); +- audit_log_ntp_val(ad, "freq", AUDIT_NTP_FREQ); +- audit_log_ntp_val(ad, "status", AUDIT_NTP_STATUS); +- audit_log_ntp_val(ad, "tai", AUDIT_NTP_TAI); +- audit_log_ntp_val(ad, "tick", AUDIT_NTP_TICK); +- audit_log_ntp_val(ad, "adjust", AUDIT_NTP_ADJUST); ++ struct audit_context *context = audit_context(); ++ int type; ++ ++ for (type = 0; type < AUDIT_NTP_NVALS; type++) ++ if (ad->vals[type].newval != ad->vals[type].oldval) { ++ /* unconditionally set type, overwriting TK */ ++ context->type = AUDIT_TIME_ADJNTPVAL; ++ memcpy(&context->time.ntp_data, ad, sizeof(*ad)); ++ break; ++ } } - #ifdef CONFIG_DEBUG_LIST -@@ -256,8 +256,7 @@ static inline void list_bulk_move_tail(struct list_head *head, - * @list: the entry to test - * @head: the head of the list - */ --static inline int list_is_first(const struct list_head *list, -- const struct list_head *head) -+static inline int list_is_first(const struct list_head *list, const struct list_head *head) - { - return list->prev == head; + void __audit_log_nfcfg(const char *name, u8 af, unsigned int nentries, +diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c +index 447def5405444..88014cd31b28a 100644 +--- a/kernel/bpf/arraymap.c ++++ b/kernel/bpf/arraymap.c +@@ -620,6 +620,11 @@ static int bpf_iter_init_array_map(void *priv_data, + seq_info->percpu_value_buf = value_buf; + } + ++ /* bpf_iter_attach_map() acquires a map uref, and the uref may be ++ * released before or in the middle of iterating map elements, so ++ * acquire an extra map uref for iterator. ++ */ ++ bpf_map_inc_with_uref(map); + seq_info->map = map; + return 0; } -@@ -267,12 +266,21 @@ static inline int list_is_first(const struct list_head *list, - * @list: the entry to test - * @head: the head of the list - */ --static inline int list_is_last(const struct list_head *list, -- const struct list_head *head) -+static inline int list_is_last(const struct list_head *list, const struct list_head *head) +@@ -628,6 +633,7 @@ static void bpf_iter_fini_array_map(void *priv_data) { - return list->next == head; + struct bpf_iter_seq_array_map_info *seq_info = priv_data; + ++ bpf_map_put_with_uref(seq_info->map); + kfree(seq_info->percpu_value_buf); } -+/** -+ * list_is_head - tests whether @list is the list @head -+ * @list: the entry to test -+ * @head: the head of the list -+ */ -+static inline int list_is_head(const struct list_head *list, const struct list_head *head) -+{ -+ return list == head; -+} -+ - /** - * list_empty - tests whether a list is empty - * @head: the list to test. -@@ -296,7 +304,7 @@ static inline int list_empty(const struct list_head *head) - static inline void list_del_init_careful(struct list_head *entry) +diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c +index b305270b7a4bd..6c2d39a3d5581 100644 +--- a/kernel/bpf/bpf_local_storage.c ++++ b/kernel/bpf/bpf_local_storage.c +@@ -71,7 +71,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, + GFP_ATOMIC | __GFP_NOWARN); + if (selem) { + if (value) +- memcpy(SDATA(selem)->data, value, smap->map.value_size); ++ copy_map_value(&smap->map, SDATA(selem)->data, value); + return selem; + } + +@@ -506,11 +506,11 @@ void bpf_local_storage_map_free(struct bpf_local_storage_map *smap, + struct bpf_local_storage_elem, map_node))) { + if (busy_counter) { + migrate_disable(); +- __this_cpu_inc(*busy_counter); ++ this_cpu_inc(*busy_counter); + } + bpf_selem_unlink(selem); + if (busy_counter) { +- __this_cpu_dec(*busy_counter); ++ this_cpu_dec(*busy_counter); + migrate_enable(); + } + cond_resched_rcu(); +diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c +index ebfa8bc908923..6b7bfce239158 100644 +--- a/kernel/bpf/bpf_task_storage.c ++++ b/kernel/bpf/bpf_task_storage.c +@@ -25,20 +25,20 @@ static DEFINE_PER_CPU(int, bpf_task_storage_busy); + static void bpf_task_storage_lock(void) { - __list_del_entry(entry); -- entry->prev = entry; -+ WRITE_ONCE(entry->prev, entry); - smp_store_release(&entry->next, entry); + migrate_disable(); +- __this_cpu_inc(bpf_task_storage_busy); ++ this_cpu_inc(bpf_task_storage_busy); } -@@ -316,7 +324,7 @@ static inline void list_del_init_careful(struct list_head *entry) - static inline int list_empty_careful(const struct list_head *head) + static void bpf_task_storage_unlock(void) { - struct list_head *next = smp_load_acquire(&head->next); -- return (next == head) && (next == head->prev); -+ return list_is_head(next, head) && (next == READ_ONCE(head->prev)); +- __this_cpu_dec(bpf_task_storage_busy); ++ this_cpu_dec(bpf_task_storage_busy); + migrate_enable(); } - /** -@@ -391,10 +399,9 @@ static inline void list_cut_position(struct list_head *list, + static bool bpf_task_storage_trylock(void) { - if (list_empty(head)) - return; -- if (list_is_singular(head) && -- (head->next != entry && head != entry)) -+ if (list_is_singular(head) && !list_is_head(entry, head) && (entry != head->next)) - return; -- if (entry == head) -+ if (list_is_head(entry, head)) - INIT_LIST_HEAD(list); - else - __list_cut_position(list, head, entry); -@@ -568,7 +575,17 @@ static inline void list_splice_tail_init(struct list_head *list, - * @head: the head for your list. - */ - #define list_for_each(pos, head) \ -- for (pos = (head)->next; pos != (head); pos = pos->next) -+ for (pos = (head)->next; !list_is_head(pos, (head)); pos = pos->next) + migrate_disable(); +- if (unlikely(__this_cpu_inc_return(bpf_task_storage_busy) != 1)) { +- __this_cpu_dec(bpf_task_storage_busy); ++ if (unlikely(this_cpu_inc_return(bpf_task_storage_busy) != 1)) { ++ this_cpu_dec(bpf_task_storage_busy); + migrate_enable(); + return false; + } +diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c +index dfe61df4f974d..0c2fa93bd8d27 100644 +--- a/kernel/bpf/btf.c ++++ b/kernel/bpf/btf.c +@@ -2983,7 +2983,7 @@ static int btf_struct_resolve(struct btf_verifier_env *env, + if (v->next_member) { + const struct btf_type *last_member_type; + const struct btf_member *last_member; +- u16 last_member_type_id; ++ u32 last_member_type_id; + + last_member = btf_type_member(v->t) + v->next_member - 1; + last_member_type_id = last_member->type; +@@ -3864,6 +3864,11 @@ static int btf_func_proto_check(struct btf_verifier_env *env, + break; + } + ++ if (btf_type_is_resolve_source_only(arg_type)) { ++ btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1); ++ return -EINVAL; ++ } + -+/** -+ * list_for_each_rcu - Iterate over a list in an RCU-safe fashion -+ * @pos: the &struct list_head to use as a loop cursor. -+ * @head: the head for your list. -+ */ -+#define list_for_each_rcu(pos, head) \ -+ for (pos = rcu_dereference((head)->next); \ -+ !list_is_head(pos, (head)); \ -+ pos = rcu_dereference(pos->next)) + if (args[i].name_off && + (!btf_name_offset_valid(btf, args[i].name_off) || + !btf_name_valid_identifier(btf, args[i].name_off))) { +@@ -4332,8 +4337,7 @@ static struct btf *btf_parse(bpfptr_t btf_data, u32 btf_data_size, + log->len_total = log_size; - /** - * list_for_each_continue - continue iteration over a list -@@ -578,7 +595,7 @@ static inline void list_splice_tail_init(struct list_head *list, - * Continue to iterate over a list, continuing after the current position. - */ - #define list_for_each_continue(pos, head) \ -- for (pos = pos->next; pos != (head); pos = pos->next) -+ for (pos = pos->next; !list_is_head(pos, (head)); pos = pos->next) + /* log attributes have to be sane */ +- if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 || +- !log->level || !log->ubuf) { ++ if (!bpf_verifier_log_attr_valid(log)) { + err = -EINVAL; + goto errout; + } +@@ -4801,10 +4805,12 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, + /* check for PTR_TO_RDONLY_BUF_OR_NULL or PTR_TO_RDWR_BUF_OR_NULL */ + for (i = 0; i < prog->aux->ctx_arg_info_size; i++) { + const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i]; ++ u32 type, flag; - /** - * list_for_each_prev - iterate over a list backwards -@@ -586,7 +603,7 @@ static inline void list_splice_tail_init(struct list_head *list, - * @head: the head for your list. - */ - #define list_for_each_prev(pos, head) \ -- for (pos = (head)->prev; pos != (head); pos = pos->prev) -+ for (pos = (head)->prev; !list_is_head(pos, (head)); pos = pos->prev) +- if (ctx_arg_info->offset == off && +- (ctx_arg_info->reg_type == PTR_TO_RDONLY_BUF_OR_NULL || +- ctx_arg_info->reg_type == PTR_TO_RDWR_BUF_OR_NULL)) { ++ type = base_type(ctx_arg_info->reg_type); ++ flag = type_flag(ctx_arg_info->reg_type); ++ if (ctx_arg_info->offset == off && type == PTR_TO_BUF && ++ (flag & PTR_MAYBE_NULL)) { + info->reg_type = ctx_arg_info->reg_type; + return true; + } +@@ -5440,6 +5446,8 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, + struct bpf_reg_state *regs, + bool ptr_to_mem_ok) + { ++ enum bpf_prog_type prog_type = env->prog->type == BPF_PROG_TYPE_EXT ? ++ env->prog->aux->dst_prog->type : env->prog->type; + struct bpf_verifier_log *log = &env->log; + const char *func_name, *ref_tname; + const struct btf_type *t, *ref_t; +@@ -5509,9 +5517,9 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, + if (reg->type == PTR_TO_BTF_ID) { + reg_btf = reg->btf; + reg_ref_id = reg->btf_id; +- } else if (reg2btf_ids[reg->type]) { ++ } else if (reg2btf_ids[base_type(reg->type)]) { + reg_btf = btf_vmlinux; +- reg_ref_id = *reg2btf_ids[reg->type]; ++ reg_ref_id = *reg2btf_ids[base_type(reg->type)]; + } else { + bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d is not a pointer to btf_id\n", + func_name, i, +@@ -5532,8 +5540,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, + reg_ref_tname); + return -EINVAL; + } +- } else if (btf_get_prog_ctx_type(log, btf, t, +- env->prog->type, i)) { ++ } else if (btf_get_prog_ctx_type(log, btf, t, prog_type, i)) { + /* If function expects ctx type in BTF check that caller + * is passing PTR_TO_CTX. + */ +@@ -5718,7 +5725,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, + return -EINVAL; + } - /** - * list_for_each_safe - iterate over a list safe against removal of list entry -@@ -595,8 +612,9 @@ static inline void list_splice_tail_init(struct list_head *list, - * @head: the head for your list. - */ - #define list_for_each_safe(pos, n, head) \ -- for (pos = (head)->next, n = pos->next; pos != (head); \ -- pos = n, n = pos->next) -+ for (pos = (head)->next, n = pos->next; \ -+ !list_is_head(pos, (head)); \ -+ pos = n, n = pos->next) +- reg->type = PTR_TO_MEM_OR_NULL; ++ reg->type = PTR_TO_MEM | PTR_MAYBE_NULL; + reg->id = ++env->id_gen; - /** - * list_for_each_prev_safe - iterate over a list backwards safe against removal of list entry -@@ -606,7 +624,7 @@ static inline void list_splice_tail_init(struct list_head *list, - */ - #define list_for_each_prev_safe(pos, n, head) \ - for (pos = (head)->prev, n = pos->prev; \ -- pos != (head); \ -+ !list_is_head(pos, (head)); \ - pos = n, n = pos->prev) + continue; +@@ -6007,12 +6014,17 @@ bool btf_id_set_contains(const struct btf_id_set *set, u32 id) + return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL; + } - /** -diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h -index a98309c0121cb..bed63156b0521 100644 ---- a/include/linux/lockd/xdr.h -+++ b/include/linux/lockd/xdr.h -@@ -41,6 +41,8 @@ struct nlm_lock { - struct nfs_fh fh; - struct xdr_netobj oh; - u32 svid; -+ u64 lock_start; -+ u64 lock_len; - struct file_lock fl; ++enum { ++ BTF_MODULE_F_LIVE = (1 << 0), ++}; ++ + #ifdef CONFIG_DEBUG_INFO_BTF_MODULES + struct btf_module { + struct list_head list; + struct module *module; + struct btf *btf; + struct bin_attribute *sysfs_attr; ++ int flags; }; -diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h -index 9fe165beb0f9e..aa0ecfc6cdb4b 100644 ---- a/include/linux/lockdep.h -+++ b/include/linux/lockdep.h -@@ -192,7 +192,7 @@ static inline void - lockdep_init_map_waits(struct lockdep_map *lock, const char *name, - struct lock_class_key *key, int subclass, u8 inner, u8 outer) - { -- lockdep_init_map_type(lock, name, key, subclass, inner, LD_WAIT_INV, LD_LOCK_NORMAL); -+ lockdep_init_map_type(lock, name, key, subclass, inner, outer, LD_LOCK_NORMAL); - } + static LIST_HEAD(btf_modules); +@@ -6038,7 +6050,8 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op, + int err = 0; - static inline void -@@ -215,24 +215,28 @@ static inline void lockdep_init_map(struct lockdep_map *lock, const char *name, - * or they are too narrow (they suffer from a false class-split): - */ - #define lockdep_set_class(lock, key) \ -- lockdep_init_map_waits(&(lock)->dep_map, #key, key, 0, \ -- (lock)->dep_map.wait_type_inner, \ -- (lock)->dep_map.wait_type_outer) -+ lockdep_init_map_type(&(lock)->dep_map, #key, key, 0, \ -+ (lock)->dep_map.wait_type_inner, \ -+ (lock)->dep_map.wait_type_outer, \ -+ (lock)->dep_map.lock_type) + if (mod->btf_data_size == 0 || +- (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING)) ++ (op != MODULE_STATE_COMING && op != MODULE_STATE_LIVE && ++ op != MODULE_STATE_GOING)) + goto out; - #define lockdep_set_class_and_name(lock, key, name) \ -- lockdep_init_map_waits(&(lock)->dep_map, name, key, 0, \ -- (lock)->dep_map.wait_type_inner, \ -- (lock)->dep_map.wait_type_outer) -+ lockdep_init_map_type(&(lock)->dep_map, name, key, 0, \ -+ (lock)->dep_map.wait_type_inner, \ -+ (lock)->dep_map.wait_type_outer, \ -+ (lock)->dep_map.lock_type) + switch (op) { +@@ -6095,6 +6108,17 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op, + btf_mod->sysfs_attr = attr; + } - #define lockdep_set_class_and_subclass(lock, key, sub) \ -- lockdep_init_map_waits(&(lock)->dep_map, #key, key, sub,\ -- (lock)->dep_map.wait_type_inner, \ -- (lock)->dep_map.wait_type_outer) -+ lockdep_init_map_type(&(lock)->dep_map, #key, key, sub, \ -+ (lock)->dep_map.wait_type_inner, \ -+ (lock)->dep_map.wait_type_outer, \ -+ (lock)->dep_map.lock_type) ++ break; ++ case MODULE_STATE_LIVE: ++ mutex_lock(&btf_module_mutex); ++ list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) { ++ if (btf_mod->module != module) ++ continue; ++ ++ btf_mod->flags |= BTF_MODULE_F_LIVE; ++ break; ++ } ++ mutex_unlock(&btf_module_mutex); + break; + case MODULE_STATE_GOING: + mutex_lock(&btf_module_mutex); +@@ -6141,7 +6165,12 @@ struct module *btf_try_get_module(const struct btf *btf) + if (btf_mod->btf != btf) + continue; - #define lockdep_set_subclass(lock, sub) \ -- lockdep_init_map_waits(&(lock)->dep_map, #lock, (lock)->dep_map.key, sub,\ -- (lock)->dep_map.wait_type_inner, \ -- (lock)->dep_map.wait_type_outer) -+ lockdep_init_map_type(&(lock)->dep_map, #lock, (lock)->dep_map.key, sub,\ -+ (lock)->dep_map.wait_type_inner, \ -+ (lock)->dep_map.wait_type_outer, \ -+ (lock)->dep_map.lock_type) +- if (try_module_get(btf_mod->module)) ++ /* We must only consider module whose __init routine has ++ * finished, hence we must check for BTF_MODULE_F_LIVE flag, ++ * which is set from the notifier callback for ++ * MODULE_STATE_LIVE. ++ */ ++ if ((btf_mod->flags & BTF_MODULE_F_LIVE) && try_module_get(btf_mod->module)) + res = btf_mod->module; - #define lockdep_set_novalidate_class(lock) \ - lockdep_set_class_and_name(lock, &__lockdep_no_validate__, #lock) -diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h -index 2adeea44c0d53..61590c1f2d333 100644 ---- a/include/linux/lsm_hook_defs.h -+++ b/include/linux/lsm_hook_defs.h -@@ -26,13 +26,13 @@ - * #undef LSM_HOOK - * }; - */ --LSM_HOOK(int, 0, binder_set_context_mgr, struct task_struct *mgr) --LSM_HOOK(int, 0, binder_transaction, struct task_struct *from, -- struct task_struct *to) --LSM_HOOK(int, 0, binder_transfer_binder, struct task_struct *from, -- struct task_struct *to) --LSM_HOOK(int, 0, binder_transfer_file, struct task_struct *from, -- struct task_struct *to, struct file *file) -+LSM_HOOK(int, 0, binder_set_context_mgr, const struct cred *mgr) -+LSM_HOOK(int, 0, binder_transaction, const struct cred *from, -+ const struct cred *to) -+LSM_HOOK(int, 0, binder_transfer_binder, const struct cred *from, -+ const struct cred *to) -+LSM_HOOK(int, 0, binder_transfer_file, const struct cred *from, -+ const struct cred *to, struct file *file) - LSM_HOOK(int, 0, ptrace_access_check, struct task_struct *child, - unsigned int mode) - LSM_HOOK(int, 0, ptrace_traceme, struct task_struct *parent) -diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h -index 5c4c5c0602cb7..59024618554e2 100644 ---- a/include/linux/lsm_hooks.h -+++ b/include/linux/lsm_hooks.h -@@ -1313,22 +1313,22 @@ - * - * @binder_set_context_mgr: - * Check whether @mgr is allowed to be the binder context manager. -- * @mgr contains the task_struct for the task being registered. -+ * @mgr contains the struct cred for the current binder process. - * Return 0 if permission is granted. - * @binder_transaction: - * Check whether @from is allowed to invoke a binder transaction call - * to @to. -- * @from contains the task_struct for the sending task. -- * @to contains the task_struct for the receiving task. -+ * @from contains the struct cred for the sending process. -+ * @to contains the struct cred for the receiving process. - * @binder_transfer_binder: - * Check whether @from is allowed to transfer a binder reference to @to. -- * @from contains the task_struct for the sending task. -- * @to contains the task_struct for the receiving task. -+ * @from contains the struct cred for the sending process. -+ * @to contains the struct cred for the receiving process. - * @binder_transfer_file: - * Check whether @from is allowed to transfer @file to @to. -- * @from contains the task_struct for the sending task. -+ * @from contains the struct cred for the sending process. - * @file contains the struct file being transferred. -- * @to contains the task_struct for the receiving task. -+ * @to contains the struct cred for the receiving process. - * - * @ptrace_access_check: - * Check permission before allowing the current process to trace the -diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h -index 36d6ce673503c..6fee33cb52f58 100644 ---- a/include/linux/mailbox_controller.h -+++ b/include/linux/mailbox_controller.h -@@ -83,6 +83,7 @@ struct mbox_controller { - const struct of_phandle_args *sp); - /* Internal to API */ - struct hrtimer poll_hrt; -+ spinlock_t poll_hrt_lock; - struct list_head node; - }; + break; +@@ -6208,7 +6237,7 @@ const struct bpf_func_proto bpf_btf_find_by_name_kind_proto = { + .func = bpf_btf_find_by_name_kind, + .gpl_only = false, + .ret_type = RET_INTEGER, +- .arg1_type = ARG_PTR_TO_MEM, ++ .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg2_type = ARG_CONST_SIZE, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_ANYTHING, +diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c +index 03145d45e3d5b..eb3e787a3a977 100644 +--- a/kernel/bpf/cgroup.c ++++ b/kernel/bpf/cgroup.c +@@ -667,6 +667,62 @@ static struct bpf_prog_list *find_detach_entry(struct list_head *progs, + return ERR_PTR(-ENOENT); + } -diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h -index 20f1e3ff60130..8eca7f25c4320 100644 ---- a/include/linux/mbcache.h -+++ b/include/linux/mbcache.h -@@ -30,15 +30,23 @@ void mb_cache_destroy(struct mb_cache *cache); - int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, - u64 value, bool reusable); - void __mb_cache_entry_free(struct mb_cache_entry *entry); -+void mb_cache_entry_wait_unused(struct mb_cache_entry *entry); - static inline int mb_cache_entry_put(struct mb_cache *cache, - struct mb_cache_entry *entry) - { -- if (!atomic_dec_and_test(&entry->e_refcnt)) -+ unsigned int cnt = atomic_dec_return(&entry->e_refcnt); ++/** ++ * purge_effective_progs() - After compute_effective_progs fails to alloc new ++ * cgrp->bpf.inactive table we can recover by ++ * recomputing the array in place. ++ * ++ * @cgrp: The cgroup which descendants to travers ++ * @prog: A program to detach or NULL ++ * @link: A link to detach or NULL ++ * @atype: Type of detach operation ++ */ ++static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog, ++ struct bpf_cgroup_link *link, ++ enum cgroup_bpf_attach_type atype) ++{ ++ struct cgroup_subsys_state *css; ++ struct bpf_prog_array *progs; ++ struct bpf_prog_list *pl; ++ struct list_head *head; ++ struct cgroup *cg; ++ int pos; + -+ if (cnt > 0) { -+ if (cnt <= 3) -+ wake_up_var(&entry->e_refcnt); - return 0; ++ /* recompute effective prog array in place */ ++ css_for_each_descendant_pre(css, &cgrp->self) { ++ struct cgroup *desc = container_of(css, struct cgroup, self); ++ ++ if (percpu_ref_is_zero(&desc->bpf.refcnt)) ++ continue; ++ ++ /* find position of link or prog in effective progs array */ ++ for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) { ++ if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI)) ++ continue; ++ ++ head = &cg->bpf.progs[atype]; ++ list_for_each_entry(pl, head, node) { ++ if (!prog_list_prog(pl)) ++ continue; ++ if (pl->prog == prog && pl->link == link) ++ goto found; ++ pos++; ++ } ++ } ++ ++ /* no link or prog match, skip the cgroup of this layer */ ++ continue; ++found: ++ progs = rcu_dereference_protected( ++ desc->bpf.effective[atype], ++ lockdep_is_held(&cgroup_mutex)); ++ ++ /* Remove the program from the array */ ++ WARN_ONCE(bpf_prog_array_delete_safe_at(progs, pos), ++ "Failed to purge a prog from array at index %d", pos); + } - __mb_cache_entry_free(entry); - return 1; - } - -+struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache, -+ u32 key, u64 value); - void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value); - struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key, - u64 value); -diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h -index 0661af17a7584..3038124c61154 100644 ---- a/include/linux/mc146818rtc.h -+++ b/include/linux/mc146818rtc.h -@@ -86,6 +86,8 @@ struct cmos_rtc_board_info { - /* 2 values for divider stage reset, others for "testing purposes only" */ - # define RTC_DIV_RESET1 0x60 - # define RTC_DIV_RESET2 0x70 -+ /* In AMD BKDG bit 5 and 6 are reserved, bit 4 is for select dv0 bank */ -+# define RTC_AMD_BANK_SELECT 0x10 - /* Periodic intr. / Square wave rate select. 0=none, 1=32.8kHz,... 15=2Hz */ - # define RTC_RATE_SELECT 0x0F ++} ++ + /** + * __cgroup_bpf_detach() - Detach the program or link from a cgroup, and + * propagate the change to descendants +@@ -686,7 +742,6 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, + struct bpf_prog_list *pl; + struct list_head *progs; + u32 flags; +- int err; -@@ -123,7 +125,8 @@ struct cmos_rtc_board_info { - #define RTC_IO_EXTENT_USED RTC_IO_EXTENT - #endif /* ARCH_RTC_LOCATION */ + atype = to_cgroup_bpf_attach_type(type); + if (atype < 0) +@@ -708,9 +763,12 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, + pl->prog = NULL; + pl->link = NULL; --unsigned int mc146818_get_time(struct rtc_time *time); -+bool mc146818_does_rtc_work(void); -+int mc146818_get_time(struct rtc_time *time); - int mc146818_set_time(struct rtc_time *time); +- err = update_effective_progs(cgrp, atype); +- if (err) +- goto cleanup; ++ if (update_effective_progs(cgrp, atype)) { ++ /* if update effective array failed replace the prog with a dummy prog*/ ++ pl->prog = old_prog; ++ pl->link = link; ++ purge_effective_progs(cgrp, old_prog, link, atype); ++ } - #endif /* _MC146818RTC_H */ -diff --git a/include/linux/memblock.h b/include/linux/memblock.h -index 34de69b3b8bad..5df38332e4139 100644 ---- a/include/linux/memblock.h -+++ b/include/linux/memblock.h -@@ -388,8 +388,8 @@ phys_addr_t memblock_alloc_range_nid(phys_addr_t size, - phys_addr_t end, int nid, bool exact_nid); - phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid); + /* now can actually delete it from this cgroup list */ + list_del(&pl->node); +@@ -722,12 +780,6 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, + bpf_prog_put(old_prog); + static_branch_dec(&cgroup_bpf_enabled_key[atype]); + return 0; +- +-cleanup: +- /* restore back prog or link */ +- pl->prog = old_prog; +- pl->link = link; +- return err; + } --static inline phys_addr_t memblock_phys_alloc(phys_addr_t size, -- phys_addr_t align) -+static __always_inline phys_addr_t memblock_phys_alloc(phys_addr_t size, -+ phys_addr_t align) - { - return memblock_phys_alloc_range(size, align, 0, - MEMBLOCK_ALLOC_ACCESSIBLE); -diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h -index 3096c9a0ee014..4f189b17dafcc 100644 ---- a/include/linux/memcontrol.h -+++ b/include/linux/memcontrol.h -@@ -223,7 +223,7 @@ struct obj_cgroup { - struct mem_cgroup *memcg; - atomic_t nr_charged_bytes; - union { -- struct list_head list; -+ struct list_head list; /* protected by objcg_lock */ - struct rcu_head rcu; - }; + /* Must be called with cgroup_mutex held to avoid races. */ +@@ -1753,7 +1805,7 @@ static const struct bpf_func_proto bpf_sysctl_set_new_value_proto = { + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +- .arg2_type = ARG_PTR_TO_MEM, ++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_CONST_SIZE, }; -@@ -320,7 +320,8 @@ struct mem_cgroup { - int kmemcg_id; - enum memcg_kmem_state kmem_state; - struct obj_cgroup __rcu *objcg; -- struct list_head objcg_list; /* list of inherited objcgs */ -+ /* list of inherited objcgs, protected by objcg_lock */ -+ struct list_head objcg_list; - #endif - - MEMCG_PADDING(_pad2_); -@@ -965,19 +966,30 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg, - static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) - { -- return READ_ONCE(memcg->vmstats.state[idx]); -+ long x = READ_ONCE(memcg->vmstats.state[idx]); -+#ifdef CONFIG_SMP -+ if (x < 0) -+ x = 0; -+#endif -+ return x; - } +@@ -1773,6 +1825,8 @@ sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) + return &bpf_sysctl_get_new_value_proto; + case BPF_FUNC_sysctl_set_new_value: + return &bpf_sysctl_set_new_value_proto; ++ case BPF_FUNC_ktime_get_coarse_ns: ++ return &bpf_ktime_get_coarse_ns_proto; + default: + return cgroup_base_func_proto(func_id, prog); + } +diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c +index 6e3ae90ad107a..cea0d1296599c 100644 +--- a/kernel/bpf/core.c ++++ b/kernel/bpf/core.c +@@ -32,6 +32,7 @@ + #include <linux/perf_event.h> + #include <linux/extable.h> + #include <linux/log2.h> ++#include <linux/nospec.h> - static inline unsigned long lruvec_page_state(struct lruvec *lruvec, - enum node_stat_item idx) + #include <asm/barrier.h> + #include <asm/unaligned.h> +@@ -66,11 +67,13 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns { - struct mem_cgroup_per_node *pn; -+ long x; - - if (mem_cgroup_disabled()) - return node_page_state(lruvec_pgdat(lruvec), idx); + u8 *ptr = NULL; - pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); -- return READ_ONCE(pn->lruvec_stats.state[idx]); -+ x = READ_ONCE(pn->lruvec_stats.state[idx]); -+#ifdef CONFIG_SMP -+ if (x < 0) -+ x = 0; -+#endif -+ return x; - } +- if (k >= SKF_NET_OFF) ++ if (k >= SKF_NET_OFF) { + ptr = skb_network_header(skb) + k - SKF_NET_OFF; +- else if (k >= SKF_LL_OFF) ++ } else if (k >= SKF_LL_OFF) { ++ if (unlikely(!skb_mac_header_was_set(skb))) ++ return NULL; + ptr = skb_mac_header(skb) + k - SKF_LL_OFF; +- ++ } + if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb)) + return ptr; - static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, -@@ -1001,6 +1013,7 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, - } +@@ -389,6 +392,13 @@ static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, s32 end_old, + i = end_new; + insn = prog->insnsi + end_old; + } ++ if (bpf_pseudo_func(insn)) { ++ ret = bpf_adj_delta_to_imm(insn, pos, end_old, ++ end_new, i, probe_pass); ++ if (ret) ++ return ret; ++ continue; ++ } + code = insn->code; + if ((BPF_CLASS(code) != BPF_JMP && + BPF_CLASS(code) != BPF_JMP32) || +@@ -1639,9 +1649,7 @@ out: + * reuse preexisting logic from Spectre v1 mitigation that + * happens to produce the required code on x86 for v4 as well. + */ +-#ifdef CONFIG_X86 + barrier_nospec(); +-#endif + CONT; + #define LDST(SIZEOP, SIZE) \ + STX_MEM_##SIZEOP: \ +@@ -1652,6 +1660,11 @@ out: + CONT; \ + LDX_MEM_##SIZEOP: \ + DST = *(SIZE *)(unsigned long) (SRC + insn->off); \ ++ CONT; \ ++ LDX_PROBE_MEM_##SIZEOP: \ ++ bpf_probe_read_kernel(&DST, sizeof(SIZE), \ ++ (const void *)(long) (SRC + insn->off)); \ ++ DST = *((SIZE *)&DST); \ + CONT; - void mem_cgroup_flush_stats(void); -+void mem_cgroup_flush_stats_delayed(void); + LDST(B, u8) +@@ -1659,15 +1672,6 @@ out: + LDST(W, u32) + LDST(DW, u64) + #undef LDST +-#define LDX_PROBE(SIZEOP, SIZE) \ +- LDX_PROBE_MEM_##SIZEOP: \ +- bpf_probe_read_kernel(&DST, SIZE, (const void *)(long) (SRC + insn->off)); \ +- CONT; +- LDX_PROBE(B, 1) +- LDX_PROBE(H, 2) +- LDX_PROBE(W, 4) +- LDX_PROBE(DW, 8) +-#undef LDX_PROBE - void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, - int val); -@@ -1421,6 +1434,10 @@ static inline void mem_cgroup_flush_stats(void) + #define ATOMIC_ALU_OP(BOP, KOP) \ + case BOP: \ +diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c +index 32471ba027086..e7f45a966e6b5 100644 +--- a/kernel/bpf/hashtab.c ++++ b/kernel/bpf/hashtab.c +@@ -161,17 +161,25 @@ static inline int htab_lock_bucket(const struct bpf_htab *htab, + unsigned long *pflags) { - } + unsigned long flags; ++ bool use_raw_lock; -+static inline void mem_cgroup_flush_stats_delayed(void) -+{ -+} -+ - static inline void __mod_memcg_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx, int val) - { -diff --git a/include/linux/memregion.h b/include/linux/memregion.h -index e11595256cac0..c04c4fd2e2091 100644 ---- a/include/linux/memregion.h -+++ b/include/linux/memregion.h -@@ -16,7 +16,7 @@ static inline int memregion_alloc(gfp_t gfp) - { - return -ENOMEM; - } --void memregion_free(int id) -+static inline void memregion_free(int id) - { - } - #endif -diff --git a/include/linux/memremap.h b/include/linux/memremap.h -index c0e9d35889e8d..a8bc588fe7aa8 100644 ---- a/include/linux/memremap.h -+++ b/include/linux/memremap.h -@@ -72,16 +72,6 @@ struct dev_pagemap_ops { - */ - void (*page_free)(struct page *page); + hash = hash & HASHTAB_MAP_LOCK_MASK; -- /* -- * Transition the refcount in struct dev_pagemap to the dead state. -- */ -- void (*kill)(struct dev_pagemap *pgmap); -- -- /* -- * Wait for refcount in struct dev_pagemap to be idle and reap it. -- */ -- void (*cleanup)(struct dev_pagemap *pgmap); -- - /* - * Used for private (un-addressable) device memory only. Must migrate - * the page back to a CPU accessible page. -@@ -95,8 +85,7 @@ struct dev_pagemap_ops { - * struct dev_pagemap - metadata for ZONE_DEVICE mappings - * @altmap: pre-allocated/reserved memory for vmemmap allocations - * @ref: reference count that pins the devm_memremap_pages() mapping -- * @internal_ref: internal reference if @ref is not provided by the caller -- * @done: completion for @internal_ref -+ * @done: completion for @ref - * @type: memory type: see MEMORY_* in memory_hotplug.h - * @flags: PGMAP_* flags to specify defailed behavior - * @ops: method table -@@ -109,8 +98,7 @@ struct dev_pagemap_ops { - */ - struct dev_pagemap { - struct vmem_altmap altmap; -- struct percpu_ref *ref; -- struct percpu_ref internal_ref; -+ struct percpu_ref ref; - struct completion done; - enum memory_type type; - unsigned int flags; -@@ -191,7 +179,7 @@ static inline unsigned long memremap_compat_align(void) - static inline void put_dev_pagemap(struct dev_pagemap *pgmap) +- migrate_disable(); ++ use_raw_lock = htab_use_raw_lock(htab); ++ if (use_raw_lock) ++ preempt_disable(); ++ else ++ migrate_disable(); + if (unlikely(__this_cpu_inc_return(*(htab->map_locked[hash])) != 1)) { + __this_cpu_dec(*(htab->map_locked[hash])); +- migrate_enable(); ++ if (use_raw_lock) ++ preempt_enable(); ++ else ++ migrate_enable(); + return -EBUSY; + } + +- if (htab_use_raw_lock(htab)) ++ if (use_raw_lock) + raw_spin_lock_irqsave(&b->raw_lock, flags); + else + spin_lock_irqsave(&b->lock, flags); +@@ -184,13 +192,18 @@ static inline void htab_unlock_bucket(const struct bpf_htab *htab, + struct bucket *b, u32 hash, + unsigned long flags) { - if (pgmap) -- percpu_ref_put(pgmap->ref); -+ percpu_ref_put(&pgmap->ref); ++ bool use_raw_lock = htab_use_raw_lock(htab); ++ + hash = hash & HASHTAB_MAP_LOCK_MASK; +- if (htab_use_raw_lock(htab)) ++ if (use_raw_lock) + raw_spin_unlock_irqrestore(&b->raw_lock, flags); + else + spin_unlock_irqrestore(&b->lock, flags); + __this_cpu_dec(*(htab->map_locked[hash])); +- migrate_enable(); ++ if (use_raw_lock) ++ preempt_enable(); ++ else ++ migrate_enable(); } - #endif /* _LINUX_MEMREMAP_H_ */ -diff --git a/include/linux/mfd/t7l66xb.h b/include/linux/mfd/t7l66xb.h -index 69632c1b07bd8..ae3e7a5c5219b 100644 ---- a/include/linux/mfd/t7l66xb.h -+++ b/include/linux/mfd/t7l66xb.h -@@ -12,7 +12,6 @@ - - struct t7l66xb_platform_data { - int (*enable)(struct platform_device *dev); -- int (*disable)(struct platform_device *dev); - int (*suspend)(struct platform_device *dev); - int (*resume)(struct platform_device *dev); - -diff --git a/include/linux/mhi.h b/include/linux/mhi.h -index 7239858790353..a5cc4cdf9cc86 100644 ---- a/include/linux/mhi.h -+++ b/include/linux/mhi.h -@@ -663,6 +663,19 @@ int mhi_pm_suspend(struct mhi_controller *mhi_cntrl); - */ - int mhi_pm_resume(struct mhi_controller *mhi_cntrl); + static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node); +@@ -291,12 +304,8 @@ static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key, + struct htab_elem *l; -+/** -+ * mhi_pm_resume_force - Force resume MHI from suspended state -+ * @mhi_cntrl: MHI controller -+ * -+ * Resume the device irrespective of its MHI state. As per the MHI spec, devices -+ * has to be in M3 state during resume. But some devices seem to be in a -+ * different MHI state other than M3 but they continue working fine if allowed. -+ * This API is intented to be used for such devices. -+ * -+ * Return: 0 if the resume succeeds, a negative error code otherwise -+ */ -+int mhi_pm_resume_force(struct mhi_controller *mhi_cntrl); -+ - /** - * mhi_download_rddm_image - Download ramdump image from device for - * debugging purpose. -diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h -index f17d2101af7a0..4c678de4608dd 100644 ---- a/include/linux/mlx5/driver.h -+++ b/include/linux/mlx5/driver.h -@@ -759,6 +759,7 @@ struct mlx5_core_dev { - enum mlx5_device_state state; - /* sync interface state */ - struct mutex intf_state_mutex; -+ struct lock_class_key lock_key; - unsigned long intf_state; - struct mlx5_priv priv; - struct mlx5_profile profile; -diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h -index 4ab5c1fc1270d..a09ed4c8361b6 100644 ---- a/include/linux/mlx5/eswitch.h -+++ b/include/linux/mlx5/eswitch.h -@@ -136,13 +136,13 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw, - ESW_TUN_OPTS_SLOW_TABLE_GOTO_VPORT) - #define ESW_TUN_SLOW_TABLE_GOTO_VPORT_MARK ESW_TUN_OPTS_MASK + if (node) { +- u32 key_size = htab->map.key_size; +- + l = container_of(node, struct htab_elem, lru_node); +- memcpy(l->key, key, key_size); +- check_and_init_map_value(&htab->map, +- l->key + round_up(key_size, 8)); ++ memcpy(l->key, key, htab->map.key_size); + return l; + } --u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev); -+u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev); - u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev); - struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw); +@@ -1662,8 +1671,11 @@ again_nocopy: + /* do not grab the lock unless need it (bucket_cnt > 0). */ + if (locked) { + ret = htab_lock_bucket(htab, b, batch, &flags); +- if (ret) +- goto next_batch; ++ if (ret) { ++ rcu_read_unlock(); ++ bpf_enable_instrumentation(); ++ goto after_loop; ++ } + } - #else /* CONFIG_MLX5_ESWITCH */ + bucket_cnt = 0; +@@ -2023,6 +2035,7 @@ static int bpf_iter_init_hash_map(void *priv_data, + seq_info->percpu_value_buf = value_buf; + } --static inline u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev) -+static inline u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev) ++ bpf_map_inc_with_uref(map); + seq_info->map = map; + seq_info->htab = container_of(map, struct bpf_htab, map); + return 0; +@@ -2032,6 +2045,7 @@ static void bpf_iter_fini_hash_map(void *priv_data) { - return MLX5_ESWITCH_NONE; + struct bpf_iter_seq_hash_map_info *seq_info = priv_data; + ++ bpf_map_put_with_uref(seq_info->map); + kfree(seq_info->percpu_value_buf); } -diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h -index 993204a6c1a13..cd9d1c95129e3 100644 ---- a/include/linux/mlx5/mlx5_ifc.h -+++ b/include/linux/mlx5/mlx5_ifc.h -@@ -3309,8 +3309,8 @@ enum { - }; - enum { -- MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO = 0x1, -- MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO = 0x2, -+ MLX5_TIRC_PACKET_MERGE_MASK_IPV4_LRO = BIT(0), -+ MLX5_TIRC_PACKET_MERGE_MASK_IPV6_LRO = BIT(1), +diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c +index 9aabf84afd4b2..a711ffe238932 100644 +--- a/kernel/bpf/helpers.c ++++ b/kernel/bpf/helpers.c +@@ -530,7 +530,7 @@ const struct bpf_func_proto bpf_strtol_proto = { + .func = bpf_strtol, + .gpl_only = false, + .ret_type = RET_INTEGER, +- .arg1_type = ARG_PTR_TO_MEM, ++ .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg2_type = ARG_CONST_SIZE, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_LONG, +@@ -558,7 +558,7 @@ const struct bpf_func_proto bpf_strtoul_proto = { + .func = bpf_strtoul, + .gpl_only = false, + .ret_type = RET_INTEGER, +- .arg1_type = ARG_PTR_TO_MEM, ++ .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg2_type = ARG_CONST_SIZE, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_LONG, +@@ -630,7 +630,7 @@ const struct bpf_func_proto bpf_event_output_data_proto = { + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, +- .arg4_type = ARG_PTR_TO_MEM, ++ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; - enum { -@@ -3335,7 +3335,7 @@ struct mlx5_ifc_tirc_bits { +@@ -667,7 +667,7 @@ BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu) + const struct bpf_func_proto bpf_per_cpu_ptr_proto = { + .func = bpf_per_cpu_ptr, + .gpl_only = false, +- .ret_type = RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, ++ .ret_type = RET_PTR_TO_MEM_OR_BTF_ID | PTR_MAYBE_NULL | MEM_RDONLY, + .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, + .arg2_type = ARG_ANYTHING, + }; +@@ -680,7 +680,7 @@ BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr) + const struct bpf_func_proto bpf_this_cpu_ptr_proto = { + .func = bpf_this_cpu_ptr, + .gpl_only = false, +- .ret_type = RET_PTR_TO_MEM_OR_BTF_ID, ++ .ret_type = RET_PTR_TO_MEM_OR_BTF_ID | MEM_RDONLY, + .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, + }; - u8 reserved_at_80[0x4]; - u8 lro_timeout_period_usecs[0x10]; -- u8 lro_enable_mask[0x4]; -+ u8 packet_merge_mask[0x4]; - u8 lro_max_ip_payload_size[0x8]; +@@ -1013,7 +1013,7 @@ const struct bpf_func_proto bpf_snprintf_proto = { + .arg1_type = ARG_PTR_TO_MEM_OR_NULL, + .arg2_type = ARG_CONST_SIZE_OR_ZERO, + .arg3_type = ARG_PTR_TO_CONST_STR, +- .arg4_type = ARG_PTR_TO_MEM_OR_NULL, ++ .arg4_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY, + .arg5_type = ARG_CONST_SIZE_OR_ZERO, + }; - u8 reserved_at_a0[0x40]; -@@ -5028,12 +5028,11 @@ struct mlx5_ifc_query_qp_out_bits { +@@ -1367,8 +1367,6 @@ bpf_base_func_proto(enum bpf_func_id func_id) + return &bpf_ktime_get_ns_proto; + case BPF_FUNC_ktime_get_boot_ns: + return &bpf_ktime_get_boot_ns_proto; +- case BPF_FUNC_ktime_get_coarse_ns: +- return &bpf_ktime_get_coarse_ns_proto; + case BPF_FUNC_ringbuf_output: + return &bpf_ringbuf_output_proto; + case BPF_FUNC_ringbuf_reserve: +diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c +index 80da1db47c686..5a8d9f7467bf4 100644 +--- a/kernel/bpf/inode.c ++++ b/kernel/bpf/inode.c +@@ -648,12 +648,22 @@ static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param) + int opt; - u8 syndrome[0x20]; + opt = fs_parse(fc, bpf_fs_parameters, param, &result); +- if (opt < 0) ++ if (opt < 0) { + /* We might like to report bad mount options here, but + * traditionally we've ignored all mount options, so we'd + * better continue to ignore non-existing options for bpf. + */ +- return opt == -ENOPARAM ? 0 : opt; ++ if (opt == -ENOPARAM) { ++ opt = vfs_parse_fs_param_source(fc, param); ++ if (opt != -ENOPARAM) ++ return opt; ++ ++ return 0; ++ } ++ ++ if (opt < 0) ++ return opt; ++ } -- u8 reserved_at_40[0x20]; -- u8 ece[0x20]; -+ u8 reserved_at_40[0x40]; + switch (opt) { + case OPT_MODE: +diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c +index 6a9542af4212a..b0fa190b09790 100644 +--- a/kernel/bpf/map_iter.c ++++ b/kernel/bpf/map_iter.c +@@ -174,9 +174,9 @@ static const struct bpf_iter_reg bpf_map_elem_reg_info = { + .ctx_arg_info_size = 2, + .ctx_arg_info = { + { offsetof(struct bpf_iter__bpf_map_elem, key), +- PTR_TO_RDONLY_BUF_OR_NULL }, ++ PTR_TO_BUF | PTR_MAYBE_NULL | MEM_RDONLY }, + { offsetof(struct bpf_iter__bpf_map_elem, value), +- PTR_TO_RDWR_BUF_OR_NULL }, ++ PTR_TO_BUF | PTR_MAYBE_NULL }, + }, + }; - u8 opt_param_mask[0x20]; +diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c +index bd09290e36484..fcdd28224f532 100644 +--- a/kernel/bpf/offload.c ++++ b/kernel/bpf/offload.c +@@ -216,9 +216,6 @@ static void __bpf_prog_offload_destroy(struct bpf_prog *prog) + if (offload->dev_state) + offload->offdev->ops->destroy(prog); + +- /* Make sure BPF_PROG_GET_NEXT_ID can't find this dead program */ +- bpf_prog_free_id(prog, true); +- + list_del_init(&offload->offloads); + kfree(offload); + prog->aux->offload = NULL; +diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c +index 3d897de890612..bbab8bb4b2fda 100644 +--- a/kernel/bpf/percpu_freelist.c ++++ b/kernel/bpf/percpu_freelist.c +@@ -102,22 +102,21 @@ void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, + u32 nr_elems) + { + struct pcpu_freelist_head *head; +- int i, cpu, pcpu_entries; ++ unsigned int cpu, cpu_idx, i, j, n, m; + +- pcpu_entries = nr_elems / num_possible_cpus() + 1; +- i = 0; ++ n = nr_elems / num_possible_cpus(); ++ m = nr_elems % num_possible_cpus(); -- u8 reserved_at_a0[0x20]; -+ u8 ece[0x20]; ++ cpu_idx = 0; + for_each_possible_cpu(cpu) { +-again: + head = per_cpu_ptr(s->freelist, cpu); +- /* No locking required as this is not visible yet. */ +- pcpu_freelist_push_node(head, buf); +- i++; +- buf += elem_size; +- if (i == nr_elems) +- break; +- if (i % pcpu_entries) +- goto again; ++ j = n + (cpu_idx < m ? 1 : 0); ++ for (i = 0; i < j; i++) { ++ /* No locking required as this is not visible yet. */ ++ pcpu_freelist_push_node(head, buf); ++ buf += elem_size; ++ } ++ cpu_idx++; + } + } - struct mlx5_ifc_qpc_bits qpc; +diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c +index 9e0c10c6892ad..710ba9de12ce4 100644 +--- a/kernel/bpf/ringbuf.c ++++ b/kernel/bpf/ringbuf.c +@@ -104,7 +104,7 @@ static struct bpf_ringbuf *bpf_ringbuf_area_alloc(size_t data_sz, int numa_node) + } -@@ -6369,7 +6368,7 @@ struct mlx5_ifc_modify_tir_bitmask_bits { - u8 reserved_at_3c[0x1]; - u8 hash[0x1]; - u8 reserved_at_3e[0x1]; -- u8 lro[0x1]; -+ u8 packet_merge[0x1]; + rb = vmap(pages, nr_meta_pages + 2 * nr_data_pages, +- VM_ALLOC | VM_USERMAP, PAGE_KERNEL); ++ VM_MAP | VM_USERMAP, PAGE_KERNEL); + if (rb) { + kmemleak_not_leak(pages); + rb->pages = pages; +@@ -444,7 +444,7 @@ const struct bpf_func_proto bpf_ringbuf_output_proto = { + .func = bpf_ringbuf_output, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, +- .arg2_type = ARG_PTR_TO_MEM, ++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_CONST_SIZE_OR_ZERO, + .arg4_type = ARG_ANYTHING, }; +diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c +index 6e75bbee39f0b..7efae3af62017 100644 +--- a/kernel/bpf/stackmap.c ++++ b/kernel/bpf/stackmap.c +@@ -119,7 +119,6 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) + return ERR_PTR(-E2BIG); - struct mlx5_ifc_modify_tir_out_bits { -@@ -9508,8 +9507,8 @@ struct mlx5_ifc_bufferx_reg_bits { - u8 reserved_at_0[0x6]; - u8 lossy[0x1]; - u8 epsb[0x1]; -- u8 reserved_at_8[0xc]; -- u8 size[0xc]; -+ u8 reserved_at_8[0x8]; -+ u8 size[0x10]; - - u8 xoff_threshold[0x10]; - u8 xon_threshold[0x10]; -diff --git a/include/linux/mm.h b/include/linux/mm.h -index 73a52aba448f9..e4e1817bb3b89 100644 ---- a/include/linux/mm.h -+++ b/include/linux/mm.h -@@ -1511,11 +1511,18 @@ static inline u8 page_kasan_tag(const struct page *page) - - static inline void page_kasan_tag_set(struct page *page, u8 tag) - { -- if (kasan_enabled()) { -- tag ^= 0xff; -- page->flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT); -- page->flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT; -- } -+ unsigned long old_flags, flags; -+ -+ if (!kasan_enabled()) -+ return; -+ -+ tag ^= 0xff; -+ old_flags = READ_ONCE(page->flags); -+ do { -+ flags = old_flags; -+ flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT); -+ flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT; -+ } while (unlikely(!try_cmpxchg(&page->flags, &old_flags, flags))); + cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); +- cost += n_buckets * (value_size + sizeof(struct stack_map_bucket)); + smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr)); + if (!smap) + return ERR_PTR(-ENOMEM); +@@ -219,7 +218,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, } - static inline void page_kasan_tag_reset(struct page *page) -@@ -2600,6 +2607,7 @@ extern int install_special_mapping(struct mm_struct *mm, - unsigned long flags, struct page **pages); - - unsigned long randomize_stack_top(unsigned long stack_top); -+unsigned long randomize_page(unsigned long start, unsigned long range); + static struct perf_callchain_entry * +-get_callchain_entry_for_task(struct task_struct *task, u32 init_nr) ++get_callchain_entry_for_task(struct task_struct *task, u32 max_depth) + { + #ifdef CONFIG_STACKTRACE + struct perf_callchain_entry *entry; +@@ -230,9 +229,8 @@ get_callchain_entry_for_task(struct task_struct *task, u32 init_nr) + if (!entry) + return NULL; - extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); +- entry->nr = init_nr + +- stack_trace_save_tsk(task, (unsigned long *)(entry->ip + init_nr), +- sysctl_perf_event_max_stack - init_nr, 0); ++ entry->nr = stack_trace_save_tsk(task, (unsigned long *)entry->ip, ++ max_depth, 0); -@@ -2851,7 +2859,8 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, - #define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ - #define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO - * and return without waiting upon it */ --#define FOLL_POPULATE 0x40 /* fault in page */ -+#define FOLL_POPULATE 0x40 /* fault in pages (with FOLL_MLOCK) */ -+#define FOLL_NOFAULT 0x80 /* do not fault in pages */ - #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ - #define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ - #define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ -@@ -3123,6 +3132,14 @@ extern int sysctl_memory_failure_recovery; - extern void shake_page(struct page *p); - extern atomic_long_t num_poisoned_pages __read_mostly; - extern int soft_offline_page(unsigned long pfn, int flags); -+#ifdef CONFIG_MEMORY_FAILURE -+extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags); -+#else -+static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags) -+{ -+ return 0; -+} -+#endif + /* stack_trace_save_tsk() works on unsigned long array, while + * perf_callchain_entry uses u64 array. For 32-bit systems, it is +@@ -244,7 +242,7 @@ get_callchain_entry_for_task(struct task_struct *task, u32 init_nr) + int i; + /* copy data from the end to avoid using extra buffer */ +- for (i = entry->nr - 1; i >= (int)init_nr; i--) ++ for (i = entry->nr - 1; i >= 0; i--) + to[i] = (u64)(from[i]); + } - /* -diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h -index 37f9758751020..12c7f2d3e2107 100644 ---- a/include/linux/mmc/card.h -+++ b/include/linux/mmc/card.h -@@ -292,6 +292,7 @@ struct mmc_card { - #define MMC_QUIRK_BROKEN_IRQ_POLLING (1<<11) /* Polling SDIO_CCCR_INTx could create a fake interrupt */ - #define MMC_QUIRK_TRIM_BROKEN (1<<12) /* Skip trim */ - #define MMC_QUIRK_BROKEN_HPI (1<<13) /* Disable broken HPI support */ -+#define MMC_QUIRK_BROKEN_SD_DISCARD (1<<14) /* Disable broken SD discard support */ +@@ -261,27 +259,19 @@ static long __bpf_get_stackid(struct bpf_map *map, + { + struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); + struct stack_map_bucket *bucket, *new_bucket, *old_bucket; +- u32 max_depth = map->value_size / stack_map_data_size(map); +- /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */ +- u32 init_nr = sysctl_perf_event_max_stack - max_depth; + u32 skip = flags & BPF_F_SKIP_FIELD_MASK; + u32 hash, id, trace_nr, trace_len; + bool user = flags & BPF_F_USER_STACK; + u64 *ips; + bool hash_matches; - bool reenable_cmdq; /* Re-enable Command Queue */ +- /* get_perf_callchain() guarantees that trace->nr >= init_nr +- * and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth +- */ +- trace_nr = trace->nr - init_nr; +- +- if (trace_nr <= skip) ++ if (trace->nr <= skip) + /* skipping more than usable stack trace */ + return -EFAULT; -diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h -index 6a1d79d84675a..6ba1002165302 100644 ---- a/include/linux/mmzone.h -+++ b/include/linux/mmzone.h -@@ -1031,6 +1031,15 @@ static inline int is_highmem_idx(enum zone_type idx) - #endif - } +- trace_nr -= skip; ++ trace_nr = trace->nr - skip; + trace_len = trace_nr * sizeof(u64); +- ips = trace->ip + skip + init_nr; ++ ips = trace->ip + skip; + hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0); + id = hash & (smap->n_buckets - 1); + bucket = READ_ONCE(smap->buckets[id]); +@@ -338,8 +328,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, + u64, flags) + { + u32 max_depth = map->value_size / stack_map_data_size(map); +- /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */ +- u32 init_nr = sysctl_perf_event_max_stack - max_depth; ++ u32 skip = flags & BPF_F_SKIP_FIELD_MASK; + bool user = flags & BPF_F_USER_STACK; + struct perf_callchain_entry *trace; + bool kernel = !user; +@@ -348,8 +337,12 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, + BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID))) + return -EINVAL; -+#ifdef CONFIG_ZONE_DMA -+bool has_managed_dma(void); -+#else -+static inline bool has_managed_dma(void) -+{ -+ return false; -+} -+#endif +- trace = get_perf_callchain(regs, init_nr, kernel, user, +- sysctl_perf_event_max_stack, false, false); ++ max_depth += skip; ++ if (max_depth > sysctl_perf_event_max_stack) ++ max_depth = sysctl_perf_event_max_stack; + - /** - * is_highmem - helper function to quickly check if a struct zone is a - * highmem zone or not. This is an attempt to keep references -@@ -1342,13 +1351,16 @@ static inline unsigned long *section_to_usemap(struct mem_section *ms) ++ trace = get_perf_callchain(regs, 0, kernel, user, max_depth, ++ false, false); - static inline struct mem_section *__nr_to_section(unsigned long nr) + if (unlikely(!trace)) + /* couldn't fetch the stack trace */ +@@ -440,7 +433,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, + struct perf_callchain_entry *trace_in, + void *buf, u32 size, u64 flags) { -+ unsigned long root = SECTION_NR_TO_ROOT(nr); -+ -+ if (unlikely(root >= NR_SECTION_ROOTS)) -+ return NULL; +- u32 init_nr, trace_nr, copy_len, elem_size, num_elem; ++ u32 trace_nr, copy_len, elem_size, num_elem, max_depth; + bool user_build_id = flags & BPF_F_USER_BUILD_ID; + u32 skip = flags & BPF_F_SKIP_FIELD_MASK; + bool user = flags & BPF_F_USER_STACK; +@@ -465,30 +458,28 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, + goto err_fault; + + num_elem = size / elem_size; +- if (sysctl_perf_event_max_stack < num_elem) +- init_nr = 0; +- else +- init_nr = sysctl_perf_event_max_stack - num_elem; ++ max_depth = num_elem + skip; ++ if (sysctl_perf_event_max_stack < max_depth) ++ max_depth = sysctl_perf_event_max_stack; + + if (trace_in) + trace = trace_in; + else if (kernel && task) +- trace = get_callchain_entry_for_task(task, init_nr); ++ trace = get_callchain_entry_for_task(task, max_depth); + else +- trace = get_perf_callchain(regs, init_nr, kernel, user, +- sysctl_perf_event_max_stack, ++ trace = get_perf_callchain(regs, 0, kernel, user, max_depth, + false, false); + if (unlikely(!trace)) + goto err_fault; + +- trace_nr = trace->nr - init_nr; +- if (trace_nr < skip) ++ if (trace->nr < skip) + goto err_fault; + +- trace_nr -= skip; ++ trace_nr = trace->nr - skip; + trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem; + copy_len = trace_nr * elem_size; +- ips = trace->ip + skip + init_nr; + - #ifdef CONFIG_SPARSEMEM_EXTREME -- if (!mem_section) -+ if (!mem_section || !mem_section[root]) - return NULL; - #endif -- if (!mem_section[SECTION_NR_TO_ROOT(nr)]) -- return NULL; -- return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; -+ return &mem_section[root][nr & SECTION_ROOT_MASK]; ++ ips = trace->ip + skip; + if (user && user_build_id) + stack_map_get_build_id_offset(buf, ips, trace_nr, user); + else +@@ -525,13 +516,14 @@ BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf, + u32, size, u64, flags) + { + struct pt_regs *regs; +- long res; ++ long res = -EINVAL; + + if (!try_get_task_stack(task)) + return -EFAULT; + + regs = task_pt_regs(task); +- res = __bpf_get_stack(regs, task, NULL, buf, size, flags); ++ if (regs) ++ res = __bpf_get_stack(regs, task, NULL, buf, size, flags); + put_task_stack(task); + + return res; +diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c +index 1cad6979a0d0f..ad41b8230780b 100644 +--- a/kernel/bpf/syscall.c ++++ b/kernel/bpf/syscall.c +@@ -132,6 +132,21 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) + return map; } - extern size_t mem_section_usage_size(void); -diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h -new file mode 100644 -index 0000000000000..ee5a217de2a88 ---- /dev/null -+++ b/include/linux/mnt_idmapping.h -@@ -0,0 +1,234 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _LINUX_MNT_IDMAPPING_H -+#define _LINUX_MNT_IDMAPPING_H -+ -+#include <linux/types.h> -+#include <linux/uidgid.h> -+ -+struct user_namespace; -+/* -+ * Carries the initial idmapping of 0:0:4294967295 which is an identity -+ * mapping. This means that {g,u}id 0 is mapped to {g,u}id 0, {g,u}id 1 is -+ * mapped to {g,u}id 1, [...], {g,u}id 1000 to {g,u}id 1000, [...]. -+ */ -+extern struct user_namespace init_user_ns; -+ -+/** -+ * initial_idmapping - check whether this is the initial mapping -+ * @ns: idmapping to check -+ * -+ * Check whether this is the initial mapping, mapping 0 to 0, 1 to 1, -+ * [...], 1000 to 1000 [...]. -+ * -+ * Return: true if this is the initial mapping, false if not. -+ */ -+static inline bool initial_idmapping(const struct user_namespace *ns) -+{ -+ return ns == &init_user_ns; -+} -+ -+/** -+ * no_idmapping - check whether we can skip remapping a kuid/gid -+ * @mnt_userns: the mount's idmapping -+ * @fs_userns: the filesystem's idmapping -+ * -+ * This function can be used to check whether a remapping between two -+ * idmappings is required. -+ * An idmapped mount is a mount that has an idmapping attached to it that -+ * is different from the filsystem's idmapping and the initial idmapping. -+ * If the initial mapping is used or the idmapping of the mount and the -+ * filesystem are identical no remapping is required. -+ * -+ * Return: true if remapping can be skipped, false if not. -+ */ -+static inline bool no_idmapping(const struct user_namespace *mnt_userns, -+ const struct user_namespace *fs_userns) -+{ -+ return initial_idmapping(mnt_userns) || mnt_userns == fs_userns; -+} -+ -+/** -+ * mapped_kuid_fs - map a filesystem kuid into a mnt_userns -+ * @mnt_userns: the mount's idmapping -+ * @fs_userns: the filesystem's idmapping -+ * @kuid : kuid to be mapped -+ * -+ * Take a @kuid and remap it from @fs_userns into @mnt_userns. Use this -+ * function when preparing a @kuid to be reported to userspace. -+ * -+ * If no_idmapping() determines that this is not an idmapped mount we can -+ * simply return @kuid unchanged. -+ * If initial_idmapping() tells us that the filesystem is not mounted with an -+ * idmapping we know the value of @kuid won't change when calling -+ * from_kuid() so we can simply retrieve the value via __kuid_val() -+ * directly. -+ * -+ * Return: @kuid mapped according to @mnt_userns. -+ * If @kuid has no mapping in either @mnt_userns or @fs_userns INVALID_UID is -+ * returned. -+ */ -+static inline kuid_t mapped_kuid_fs(struct user_namespace *mnt_userns, -+ struct user_namespace *fs_userns, -+ kuid_t kuid) -+{ -+ uid_t uid; -+ -+ if (no_idmapping(mnt_userns, fs_userns)) -+ return kuid; -+ if (initial_idmapping(fs_userns)) -+ uid = __kuid_val(kuid); -+ else -+ uid = from_kuid(fs_userns, kuid); -+ if (uid == (uid_t)-1) -+ return INVALID_UID; -+ return make_kuid(mnt_userns, uid); -+} -+ -+/** -+ * mapped_kgid_fs - map a filesystem kgid into a mnt_userns -+ * @mnt_userns: the mount's idmapping -+ * @fs_userns: the filesystem's idmapping -+ * @kgid : kgid to be mapped -+ * -+ * Take a @kgid and remap it from @fs_userns into @mnt_userns. Use this -+ * function when preparing a @kgid to be reported to userspace. -+ * -+ * If no_idmapping() determines that this is not an idmapped mount we can -+ * simply return @kgid unchanged. -+ * If initial_idmapping() tells us that the filesystem is not mounted with an -+ * idmapping we know the value of @kgid won't change when calling -+ * from_kgid() so we can simply retrieve the value via __kgid_val() -+ * directly. -+ * -+ * Return: @kgid mapped according to @mnt_userns. -+ * If @kgid has no mapping in either @mnt_userns or @fs_userns INVALID_GID is -+ * returned. -+ */ -+static inline kgid_t mapped_kgid_fs(struct user_namespace *mnt_userns, -+ struct user_namespace *fs_userns, -+ kgid_t kgid) -+{ -+ gid_t gid; -+ -+ if (no_idmapping(mnt_userns, fs_userns)) -+ return kgid; -+ if (initial_idmapping(fs_userns)) -+ gid = __kgid_val(kgid); -+ else -+ gid = from_kgid(fs_userns, kgid); -+ if (gid == (gid_t)-1) -+ return INVALID_GID; -+ return make_kgid(mnt_userns, gid); -+} -+ -+/** -+ * mapped_kuid_user - map a user kuid into a mnt_userns -+ * @mnt_userns: the mount's idmapping -+ * @fs_userns: the filesystem's idmapping -+ * @kuid : kuid to be mapped -+ * -+ * Use the idmapping of @mnt_userns to remap a @kuid into @fs_userns. Use this -+ * function when preparing a @kuid to be written to disk or inode. -+ * -+ * If no_idmapping() determines that this is not an idmapped mount we can -+ * simply return @kuid unchanged. -+ * If initial_idmapping() tells us that the filesystem is not mounted with an -+ * idmapping we know the value of @kuid won't change when calling -+ * make_kuid() so we can simply retrieve the value via KUIDT_INIT() -+ * directly. -+ * -+ * Return: @kuid mapped according to @mnt_userns. -+ * If @kuid has no mapping in either @mnt_userns or @fs_userns INVALID_UID is -+ * returned. -+ */ -+static inline kuid_t mapped_kuid_user(struct user_namespace *mnt_userns, -+ struct user_namespace *fs_userns, -+ kuid_t kuid) -+{ -+ uid_t uid; -+ -+ if (no_idmapping(mnt_userns, fs_userns)) -+ return kuid; -+ uid = from_kuid(mnt_userns, kuid); -+ if (uid == (uid_t)-1) -+ return INVALID_UID; -+ if (initial_idmapping(fs_userns)) -+ return KUIDT_INIT(uid); -+ return make_kuid(fs_userns, uid); -+} -+ -+/** -+ * mapped_kgid_user - map a user kgid into a mnt_userns -+ * @mnt_userns: the mount's idmapping -+ * @fs_userns: the filesystem's idmapping -+ * @kgid : kgid to be mapped -+ * -+ * Use the idmapping of @mnt_userns to remap a @kgid into @fs_userns. Use this -+ * function when preparing a @kgid to be written to disk or inode. -+ * -+ * If no_idmapping() determines that this is not an idmapped mount we can -+ * simply return @kgid unchanged. -+ * If initial_idmapping() tells us that the filesystem is not mounted with an -+ * idmapping we know the value of @kgid won't change when calling -+ * make_kgid() so we can simply retrieve the value via KGIDT_INIT() -+ * directly. -+ * -+ * Return: @kgid mapped according to @mnt_userns. -+ * If @kgid has no mapping in either @mnt_userns or @fs_userns INVALID_GID is -+ * returned. -+ */ -+static inline kgid_t mapped_kgid_user(struct user_namespace *mnt_userns, -+ struct user_namespace *fs_userns, -+ kgid_t kgid) ++static void bpf_map_write_active_inc(struct bpf_map *map) +{ -+ gid_t gid; -+ -+ if (no_idmapping(mnt_userns, fs_userns)) -+ return kgid; -+ gid = from_kgid(mnt_userns, kgid); -+ if (gid == (gid_t)-1) -+ return INVALID_GID; -+ if (initial_idmapping(fs_userns)) -+ return KGIDT_INIT(gid); -+ return make_kgid(fs_userns, gid); ++ atomic64_inc(&map->writecnt); +} + -+/** -+ * mapped_fsuid - return caller's fsuid mapped up into a mnt_userns -+ * @mnt_userns: the mount's idmapping -+ * @fs_userns: the filesystem's idmapping -+ * -+ * Use this helper to initialize a new vfs or filesystem object based on -+ * the caller's fsuid. A common example is initializing the i_uid field of -+ * a newly allocated inode triggered by a creation event such as mkdir or -+ * O_CREAT. Other examples include the allocation of quotas for a specific -+ * user. -+ * -+ * Return: the caller's current fsuid mapped up according to @mnt_userns. -+ */ -+static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns, -+ struct user_namespace *fs_userns) ++static void bpf_map_write_active_dec(struct bpf_map *map) +{ -+ return mapped_kuid_user(mnt_userns, fs_userns, current_fsuid()); ++ atomic64_dec(&map->writecnt); +} + -+/** -+ * mapped_fsgid - return caller's fsgid mapped up into a mnt_userns -+ * @mnt_userns: the mount's idmapping -+ * @fs_userns: the filesystem's idmapping -+ * -+ * Use this helper to initialize a new vfs or filesystem object based on -+ * the caller's fsgid. A common example is initializing the i_gid field of -+ * a newly allocated inode triggered by a creation event such as mkdir or -+ * O_CREAT. Other examples include the allocation of quotas for a specific -+ * user. -+ * -+ * Return: the caller's current fsgid mapped up according to @mnt_userns. -+ */ -+static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns, -+ struct user_namespace *fs_userns) ++bool bpf_map_write_active(const struct bpf_map *map) +{ -+ return mapped_kgid_user(mnt_userns, fs_userns, current_fsgid()); ++ return atomic64_read(&map->writecnt) != 0; +} + -+#endif /* _LINUX_MNT_IDMAPPING_H */ -diff --git a/include/linux/msi.h b/include/linux/msi.h -index 49cf6eb222e76..e616f94c7c585 100644 ---- a/include/linux/msi.h -+++ b/include/linux/msi.h -@@ -148,7 +148,7 @@ struct msi_desc { - u8 is_msix : 1; - u8 multiple : 3; - u8 multi_cap : 3; -- u8 maskbit : 1; -+ u8 can_mask : 1; - u8 is_64 : 1; - u8 is_virtual : 1; - u16 entry_nr; -diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h -index fd1ecb8211060..d88bb56c18e2e 100644 ---- a/include/linux/mtd/cfi.h -+++ b/include/linux/mtd/cfi.h -@@ -286,6 +286,7 @@ struct cfi_private { - map_word sector_erase_cmd; - unsigned long chipshift; /* Because they're of the same type */ - const char *im_name; /* inter_module name for cmdset_setup */ -+ unsigned long quirks; - struct flchip chips[]; /* per-chip data structure for each chip */ - }; + static u32 bpf_map_value_size(const struct bpf_map *map) + { + if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || +@@ -596,11 +611,8 @@ static void bpf_map_mmap_open(struct vm_area_struct *vma) + { + struct bpf_map *map = vma->vm_file->private_data; -diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h -index 88227044fc86c..8a2c60235ebb8 100644 ---- a/include/linux/mtd/mtd.h -+++ b/include/linux/mtd/mtd.h -@@ -394,10 +394,8 @@ struct mtd_info { - /* List of partitions attached to this MTD device */ - struct list_head partitions; +- if (vma->vm_flags & VM_MAYWRITE) { +- mutex_lock(&map->freeze_mutex); +- map->writecnt++; +- mutex_unlock(&map->freeze_mutex); +- } ++ if (vma->vm_flags & VM_MAYWRITE) ++ bpf_map_write_active_inc(map); + } -- union { -- struct mtd_part part; -- struct mtd_master master; -- }; -+ struct mtd_part part; -+ struct mtd_master master; - }; + /* called for all unmapped memory region (including initial) */ +@@ -608,11 +620,8 @@ static void bpf_map_mmap_close(struct vm_area_struct *vma) + { + struct bpf_map *map = vma->vm_file->private_data; - static inline struct mtd_info *mtd_get_master(struct mtd_info *mtd) -diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h -index b2f9dd3cbd695..dcf90144d70b7 100644 ---- a/include/linux/mtd/rawnand.h -+++ b/include/linux/mtd/rawnand.h -@@ -1240,6 +1240,7 @@ struct nand_secure_region { - * @lock: Lock protecting the suspended field. Also used to serialize accesses - * to the NAND device - * @suspended: Set to 1 when the device is suspended, 0 when it's not -+ * @resume_wq: wait queue to sleep if rawnand is in suspended state. - * @cur_cs: Currently selected target. -1 means no target selected, otherwise we - * should always have cur_cs >= 0 && cur_cs < nanddev_ntargets(). - * NAND Controller drivers should not modify this value, but they're -@@ -1294,6 +1295,7 @@ struct nand_chip { - /* Internals */ - struct mutex lock; - unsigned int suspended : 1; -+ wait_queue_head_t resume_wq; - int cur_cs; - int read_retries; - struct nand_secure_region *secure_regions; -@@ -1539,6 +1541,8 @@ int nand_read_data_op(struct nand_chip *chip, void *buf, unsigned int len, - bool force_8bit, bool check_only); - int nand_write_data_op(struct nand_chip *chip, const void *buf, - unsigned int len, bool force_8bit); -+int nand_read_page_hwecc_oob_first(struct nand_chip *chip, uint8_t *buf, -+ int oob_required, int page); +- if (vma->vm_flags & VM_MAYWRITE) { +- mutex_lock(&map->freeze_mutex); +- map->writecnt--; +- mutex_unlock(&map->freeze_mutex); +- } ++ if (vma->vm_flags & VM_MAYWRITE) ++ bpf_map_write_active_dec(map); + } - /* Scan and identify a NAND device */ - int nand_scan_with_ids(struct nand_chip *chip, unsigned int max_chips, -diff --git a/include/linux/namei.h b/include/linux/namei.h -index e89329bb3134e..caeb08a98536c 100644 ---- a/include/linux/namei.h -+++ b/include/linux/namei.h -@@ -69,6 +69,12 @@ extern struct dentry *lookup_one_len(const char *, struct dentry *, int); - extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int); - extern struct dentry *lookup_positive_unlocked(const char *, struct dentry *, int); - struct dentry *lookup_one(struct user_namespace *, const char *, struct dentry *, int); -+struct dentry *lookup_one_unlocked(struct user_namespace *mnt_userns, -+ const char *name, struct dentry *base, -+ int len); -+struct dentry *lookup_one_positive_unlocked(struct user_namespace *mnt_userns, -+ const char *name, -+ struct dentry *base, int len); + static const struct vm_operations_struct bpf_map_default_vmops = { +@@ -663,7 +672,7 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma) + goto out; + + if (vma->vm_flags & VM_MAYWRITE) +- map->writecnt++; ++ bpf_map_write_active_inc(map); + out: + mutex_unlock(&map->freeze_mutex); + return err; +@@ -1122,6 +1131,7 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr) + map = __bpf_map_get(f); + if (IS_ERR(map)) + return PTR_ERR(map); ++ bpf_map_write_active_inc(map); + if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { + err = -EPERM; + goto err_put; +@@ -1157,6 +1167,7 @@ free_value: + free_key: + kvfree(key); + err_put: ++ bpf_map_write_active_dec(map); + fdput(f); + return err; + } +@@ -1179,6 +1190,7 @@ static int map_delete_elem(union bpf_attr *attr) + map = __bpf_map_get(f); + if (IS_ERR(map)) + return PTR_ERR(map); ++ bpf_map_write_active_inc(map); + if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { + err = -EPERM; + goto err_put; +@@ -1209,6 +1221,7 @@ static int map_delete_elem(union bpf_attr *attr) + out: + kvfree(key); + err_put: ++ bpf_map_write_active_dec(map); + fdput(f); + return err; + } +@@ -1324,6 +1337,7 @@ int generic_map_delete_batch(struct bpf_map *map, + maybe_wait_bpf_programs(map); + if (err) + break; ++ cond_resched(); + } + if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) + err = -EFAULT; +@@ -1381,6 +1395,7 @@ int generic_map_update_batch(struct bpf_map *map, + + if (err) + break; ++ cond_resched(); + } + + if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) +@@ -1478,6 +1493,7 @@ int generic_map_lookup_batch(struct bpf_map *map, + swap(prev_key, key); + retry = MAP_LOOKUP_RETRIES; + cp++; ++ cond_resched(); + } + + if (err == -EFAULT) +@@ -1516,6 +1532,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr) + map = __bpf_map_get(f); + if (IS_ERR(map)) + return PTR_ERR(map); ++ bpf_map_write_active_inc(map); + if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) || + !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { + err = -EPERM; +@@ -1580,6 +1597,7 @@ free_value: + free_key: + kvfree(key); + err_put: ++ bpf_map_write_active_dec(map); + fdput(f); + return err; + } +@@ -1607,8 +1625,7 @@ static int map_freeze(const union bpf_attr *attr) + } + + mutex_lock(&map->freeze_mutex); +- +- if (map->writecnt) { ++ if (bpf_map_write_active(map)) { + err = -EBUSY; + goto err_put; + } +@@ -1678,7 +1695,7 @@ static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op) + return; + if (audit_enabled == AUDIT_OFF) + return; +- if (op == BPF_AUDIT_LOAD) ++ if (!in_irq() && !irqs_disabled()) + ctx = audit_context(); + ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF); + if (unlikely(!ab)) +@@ -1773,6 +1790,7 @@ static void bpf_prog_put_deferred(struct work_struct *work) + prog = aux->prog; + perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0); + bpf_audit_prog(prog, BPF_AUDIT_UNLOAD); ++ bpf_prog_free_id(prog, true); + __bpf_prog_put_noref(prog, true); + } + +@@ -1781,9 +1799,6 @@ static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) + struct bpf_prog_aux *aux = prog->aux; + + if (atomic64_dec_and_test(&aux->refcnt)) { +- /* bpf_prog_free_id() must be called first */ +- bpf_prog_free_id(prog, do_idr_lock); +- + if (in_irq() || irqs_disabled()) { + INIT_WORK(&aux->work, bpf_prog_put_deferred); + schedule_work(&aux->work); +@@ -1807,8 +1822,14 @@ static int bpf_prog_release(struct inode *inode, struct file *filp) + return 0; + } + ++struct bpf_prog_kstats { ++ u64 nsecs; ++ u64 cnt; ++ u64 misses; ++}; ++ + static void bpf_prog_get_stats(const struct bpf_prog *prog, +- struct bpf_prog_stats *stats) ++ struct bpf_prog_kstats *stats) + { + u64 nsecs = 0, cnt = 0, misses = 0; + int cpu; +@@ -1821,9 +1842,9 @@ static void bpf_prog_get_stats(const struct bpf_prog *prog, + st = per_cpu_ptr(prog->stats, cpu); + do { + start = u64_stats_fetch_begin_irq(&st->syncp); +- tnsecs = st->nsecs; +- tcnt = st->cnt; +- tmisses = st->misses; ++ tnsecs = u64_stats_read(&st->nsecs); ++ tcnt = u64_stats_read(&st->cnt); ++ tmisses = u64_stats_read(&st->misses); + } while (u64_stats_fetch_retry_irq(&st->syncp, start)); + nsecs += tnsecs; + cnt += tcnt; +@@ -1839,7 +1860,7 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) + { + const struct bpf_prog *prog = filp->private_data; + char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; +- struct bpf_prog_stats stats; ++ struct bpf_prog_kstats stats; - extern int follow_down_one(struct path *); - extern int follow_down(struct path *); -diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h -index 2c6b9e4162254..7c2d77d75a888 100644 ---- a/include/linux/netdev_features.h -+++ b/include/linux/netdev_features.h -@@ -169,7 +169,7 @@ enum { - #define NETIF_F_HW_HSR_FWD __NETIF_F(HW_HSR_FWD) - #define NETIF_F_HW_HSR_DUP __NETIF_F(HW_HSR_DUP) + bpf_prog_get_stats(prog, &stats); + bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); +@@ -3578,7 +3599,7 @@ static int bpf_prog_get_info_by_fd(struct file *file, + struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); + struct bpf_prog_info info; + u32 info_len = attr->info.info_len; +- struct bpf_prog_stats stats; ++ struct bpf_prog_kstats stats; + char __user *uinsns; + u32 ulen; + int err; +@@ -4077,7 +4098,9 @@ static int bpf_task_fd_query(const union bpf_attr *attr, + if (attr->task_fd_query.flags != 0) + return -EINVAL; --/* Finds the next feature with the highest number of the range of start till 0. -+/* Finds the next feature with the highest number of the range of start-1 till 0. - */ - static inline int find_next_netdev_feature(u64 feature, unsigned long start) ++ rcu_read_lock(); + task = get_pid_task(find_vpid(pid), PIDTYPE_PID); ++ rcu_read_unlock(); + if (!task) + return -ENOENT; + +@@ -4143,6 +4166,9 @@ static int bpf_map_do_batch(const union bpf_attr *attr, + union bpf_attr __user *uattr, + int cmd) { -@@ -188,7 +188,7 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start) - for ((bit) = find_next_netdev_feature((mask_addr), \ - NETDEV_FEATURE_COUNT); \ - (bit) >= 0; \ -- (bit) = find_next_netdev_feature((mask_addr), (bit) - 1)) -+ (bit) = find_next_netdev_feature((mask_addr), (bit))) ++ bool has_read = cmd == BPF_MAP_LOOKUP_BATCH || ++ cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH; ++ bool has_write = cmd != BPF_MAP_LOOKUP_BATCH; + struct bpf_map *map; + int err, ufd; + struct fd f; +@@ -4155,16 +4181,13 @@ static int bpf_map_do_batch(const union bpf_attr *attr, + map = __bpf_map_get(f); + if (IS_ERR(map)) + return PTR_ERR(map); +- +- if ((cmd == BPF_MAP_LOOKUP_BATCH || +- cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) && +- !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { ++ if (has_write) ++ bpf_map_write_active_inc(map); ++ if (has_read && !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { + err = -EPERM; + goto err_put; + } +- +- if (cmd != BPF_MAP_LOOKUP_BATCH && +- !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { ++ if (has_write && !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { + err = -EPERM; + goto err_put; + } +@@ -4177,8 +4200,9 @@ static int bpf_map_do_batch(const union bpf_attr *attr, + BPF_DO_BATCH(map->ops->map_update_batch); + else + BPF_DO_BATCH(map->ops->map_delete_batch); +- + err_put: ++ if (has_write) ++ bpf_map_write_active_dec(map); + fdput(f); + return err; + } +@@ -4729,7 +4753,7 @@ static const struct bpf_func_proto bpf_sys_bpf_proto = { + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, +- .arg2_type = ARG_PTR_TO_MEM, ++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_CONST_SIZE, + }; - /* Features valid for ethtool to change */ - /* = all defined minus driver/device-class-related */ -diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h -index d79163208dfdb..3b97438afe3e2 100644 ---- a/include/linux/netdevice.h -+++ b/include/linux/netdevice.h -@@ -626,9 +626,23 @@ extern int sysctl_devconf_inherit_init_net; - */ - static inline bool net_has_fallback_tunnels(const struct net *net) +@@ -4761,7 +4785,7 @@ syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { -- return !IS_ENABLED(CONFIG_SYSCTL) || -- !sysctl_fb_tunnels_only_for_init_net || -- (net == &init_net && sysctl_fb_tunnels_only_for_init_net == 1); -+#if IS_ENABLED(CONFIG_SYSCTL) -+ int fb_tunnels_only_for_init_net = READ_ONCE(sysctl_fb_tunnels_only_for_init_net); + switch (func_id) { + case BPF_FUNC_sys_bpf: +- return &bpf_sys_bpf_proto; ++ return !perfmon_capable() ? NULL : &bpf_sys_bpf_proto; + case BPF_FUNC_btf_find_by_name_kind: + return &bpf_btf_find_by_name_kind_proto; + case BPF_FUNC_sys_close: +diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c +index fe1e857324e66..4fa75791b45e2 100644 +--- a/kernel/bpf/trampoline.c ++++ b/kernel/bpf/trampoline.c +@@ -414,7 +414,7 @@ int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) + { + enum bpf_tramp_prog_type kind; + int err = 0; +- int cnt; ++ int cnt = 0, i; + + kind = bpf_attach_type_to_tramp(prog); + mutex_lock(&tr->mutex); +@@ -425,7 +425,10 @@ int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) + err = -EBUSY; + goto out; + } +- cnt = tr->progs_cnt[BPF_TRAMP_FENTRY] + tr->progs_cnt[BPF_TRAMP_FEXIT]; + -+ return !fb_tunnels_only_for_init_net || -+ (net_eq(net, &init_net) && fb_tunnels_only_for_init_net == 1); -+#else -+ return true; -+#endif -+} ++ for (i = 0; i < BPF_TRAMP_MAX; i++) ++ cnt += tr->progs_cnt[i]; + -+static inline int net_inherit_devconf(void) -+{ -+#if IS_ENABLED(CONFIG_SYSCTL) -+ return READ_ONCE(sysctl_devconf_inherit_init_net); -+#else -+ return 0; -+#endif - } + if (kind == BPF_TRAMP_REPLACE) { + /* Cannot attach extension if fentry/fexit are in use. */ + if (cnt) { +@@ -503,16 +506,19 @@ out: - static inline int netdev_queue_numa_node_read(const struct netdev_queue *q) -@@ -887,7 +901,7 @@ struct net_device_path_stack { + void bpf_trampoline_put(struct bpf_trampoline *tr) + { ++ int i; ++ + if (!tr) + return; + mutex_lock(&trampoline_mutex); + if (!refcount_dec_and_test(&tr->refcnt)) + goto out; + WARN_ON_ONCE(mutex_is_locked(&tr->mutex)); +- if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FENTRY]))) +- goto out; +- if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT]))) +- goto out; ++ ++ for (i = 0; i < BPF_TRAMP_MAX; i++) ++ if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[i]))) ++ goto out; ++ + /* This code will be executed even when the last bpf_tramp_image + * is alive. All progs are detached from the trampoline and the + * trampoline image is patched with jmp into epilogue to skip +@@ -541,11 +547,12 @@ static u64 notrace bpf_prog_start_time(void) + static void notrace inc_misses_counter(struct bpf_prog *prog) + { + struct bpf_prog_stats *stats; ++ unsigned int flags; - struct net_device_path_ctx { - const struct net_device *dev; -- const u8 *daddr; -+ u8 daddr[ETH_ALEN]; + stats = this_cpu_ptr(prog->stats); +- u64_stats_update_begin(&stats->syncp); +- stats->misses++; +- u64_stats_update_end(&stats->syncp); ++ flags = u64_stats_update_begin_irqsave(&stats->syncp); ++ u64_stats_inc(&stats->misses); ++ u64_stats_update_end_irqrestore(&stats->syncp, flags); + } - int num_vlans; - struct { -@@ -1645,7 +1659,7 @@ enum netdev_priv_flags { - IFF_FAILOVER_SLAVE = 1<<28, - IFF_L3MDEV_RX_HANDLER = 1<<29, - IFF_LIVE_RENAME_OK = 1<<30, -- IFF_TX_SKB_NO_LINEAR = 1<<31, -+ IFF_TX_SKB_NO_LINEAR = BIT_ULL(31), - }; + /* The logic is similar to bpf_prog_run(), but with an explicit +@@ -585,11 +592,13 @@ static void notrace update_prog_stats(struct bpf_prog *prog, + * Hence check that 'start' is valid. + */ + start > NO_START_TIME) { ++ unsigned long flags; ++ + stats = this_cpu_ptr(prog->stats); +- u64_stats_update_begin(&stats->syncp); +- stats->cnt++; +- stats->nsecs += sched_clock() - start; +- u64_stats_update_end(&stats->syncp); ++ flags = u64_stats_update_begin_irqsave(&stats->syncp); ++ u64_stats_inc(&stats->cnt); ++ u64_stats_add(&stats->nsecs, sched_clock() - start); ++ u64_stats_update_end_irqrestore(&stats->syncp, flags); + } + } - #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN -@@ -2149,7 +2163,7 @@ struct net_device { - struct netdev_queue *_tx ____cacheline_aligned_in_smp; - unsigned int num_tx_queues; - unsigned int real_num_tx_queues; -- struct Qdisc *qdisc; -+ struct Qdisc __rcu *qdisc; - unsigned int tx_queue_len; - spinlock_t tx_global_lock; +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index e76b559179054..1c95d97e7aa53 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -240,12 +240,6 @@ static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn) + insn->src_reg == BPF_PSEUDO_KFUNC_CALL; + } -@@ -2636,6 +2650,7 @@ struct packet_type { - struct net_device *); - bool (*id_match)(struct packet_type *ptype, - struct sock *sk); -+ struct net *af_packet_net; - void *af_packet_priv; - struct list_head list; - }; -@@ -4403,7 +4418,8 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits) - static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) - { - spin_lock(&txq->_xmit_lock); -- txq->xmit_lock_owner = cpu; -+ /* Pairs with READ_ONCE() in __dev_queue_xmit() */ -+ WRITE_ONCE(txq->xmit_lock_owner, cpu); +-static bool bpf_pseudo_func(const struct bpf_insn *insn) +-{ +- return insn->code == (BPF_LD | BPF_IMM | BPF_DW) && +- insn->src_reg == BPF_PSEUDO_FUNC; +-} +- + struct bpf_call_arg_meta { + struct bpf_map *map_ptr; + bool raw_mode; +@@ -445,18 +439,6 @@ static bool reg_type_not_null(enum bpf_reg_type type) + type == PTR_TO_SOCK_COMMON; } - static inline bool __netif_tx_acquire(struct netdev_queue *txq) -@@ -4420,26 +4436,32 @@ static inline void __netif_tx_release(struct netdev_queue *txq) - static inline void __netif_tx_lock_bh(struct netdev_queue *txq) +-static bool reg_type_may_be_null(enum bpf_reg_type type) +-{ +- return type == PTR_TO_MAP_VALUE_OR_NULL || +- type == PTR_TO_SOCKET_OR_NULL || +- type == PTR_TO_SOCK_COMMON_OR_NULL || +- type == PTR_TO_TCP_SOCK_OR_NULL || +- type == PTR_TO_BTF_ID_OR_NULL || +- type == PTR_TO_MEM_OR_NULL || +- type == PTR_TO_RDONLY_BUF_OR_NULL || +- type == PTR_TO_RDWR_BUF_OR_NULL; +-} +- + static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) { - spin_lock_bh(&txq->_xmit_lock); -- txq->xmit_lock_owner = smp_processor_id(); -+ /* Pairs with READ_ONCE() in __dev_queue_xmit() */ -+ WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id()); - } + return reg->type == PTR_TO_MAP_VALUE && +@@ -465,12 +447,14 @@ static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) - static inline bool __netif_tx_trylock(struct netdev_queue *txq) + static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type) { - bool ok = spin_trylock(&txq->_xmit_lock); -- if (likely(ok)) -- txq->xmit_lock_owner = smp_processor_id(); +- return type == PTR_TO_SOCKET || +- type == PTR_TO_SOCKET_OR_NULL || +- type == PTR_TO_TCP_SOCK || +- type == PTR_TO_TCP_SOCK_OR_NULL || +- type == PTR_TO_MEM || +- type == PTR_TO_MEM_OR_NULL; ++ return base_type(type) == PTR_TO_SOCKET || ++ base_type(type) == PTR_TO_TCP_SOCK || ++ base_type(type) == PTR_TO_MEM; ++} + -+ if (likely(ok)) { -+ /* Pairs with READ_ONCE() in __dev_queue_xmit() */ -+ WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id()); -+ } - return ok; ++static bool type_is_rdonly_mem(u32 type) ++{ ++ return type & MEM_RDONLY; } - static inline void __netif_tx_unlock(struct netdev_queue *txq) - { -- txq->xmit_lock_owner = -1; -+ /* Pairs with READ_ONCE() in __dev_queue_xmit() */ -+ WRITE_ONCE(txq->xmit_lock_owner, -1); - spin_unlock(&txq->_xmit_lock); + static bool arg_type_may_be_refcounted(enum bpf_arg_type type) +@@ -478,14 +462,9 @@ static bool arg_type_may_be_refcounted(enum bpf_arg_type type) + return type == ARG_PTR_TO_SOCK_COMMON; } - static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) +-static bool arg_type_may_be_null(enum bpf_arg_type type) ++static bool type_may_be_null(u32 type) { -- txq->xmit_lock_owner = -1; -+ /* Pairs with READ_ONCE() in __dev_queue_xmit() */ -+ WRITE_ONCE(txq->xmit_lock_owner, -1); - spin_unlock_bh(&txq->_xmit_lock); +- return type == ARG_PTR_TO_MAP_VALUE_OR_NULL || +- type == ARG_PTR_TO_MEM_OR_NULL || +- type == ARG_PTR_TO_CTX_OR_NULL || +- type == ARG_PTR_TO_SOCKET_OR_NULL || +- type == ARG_PTR_TO_ALLOC_MEM_OR_NULL || +- type == ARG_PTR_TO_STACK_OR_NULL; ++ return type & PTR_MAYBE_NULL; } -diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h -index 700ea077ce2d6..2770db2fa080d 100644 ---- a/include/linux/netfilter/nf_conntrack_common.h -+++ b/include/linux/netfilter/nf_conntrack_common.h -@@ -2,7 +2,7 @@ - #ifndef _NF_CONNTRACK_COMMON_H - #define _NF_CONNTRACK_COMMON_H - --#include <linux/atomic.h> -+#include <linux/refcount.h> - #include <uapi/linux/netfilter/nf_conntrack_common.h> + /* Determine whether the function releases some resources allocated by another +@@ -545,39 +524,54 @@ static bool is_cmpxchg_insn(const struct bpf_insn *insn) + insn->imm == BPF_CMPXCHG; + } - struct ip_conntrack_stat { -@@ -25,19 +25,21 @@ struct ip_conntrack_stat { - #define NFCT_PTRMASK ~(NFCT_INFOMASK) +-/* string representation of 'enum bpf_reg_type' */ +-static const char * const reg_type_str[] = { +- [NOT_INIT] = "?", +- [SCALAR_VALUE] = "inv", +- [PTR_TO_CTX] = "ctx", +- [CONST_PTR_TO_MAP] = "map_ptr", +- [PTR_TO_MAP_VALUE] = "map_value", +- [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null", +- [PTR_TO_STACK] = "fp", +- [PTR_TO_PACKET] = "pkt", +- [PTR_TO_PACKET_META] = "pkt_meta", +- [PTR_TO_PACKET_END] = "pkt_end", +- [PTR_TO_FLOW_KEYS] = "flow_keys", +- [PTR_TO_SOCKET] = "sock", +- [PTR_TO_SOCKET_OR_NULL] = "sock_or_null", +- [PTR_TO_SOCK_COMMON] = "sock_common", +- [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null", +- [PTR_TO_TCP_SOCK] = "tcp_sock", +- [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null", +- [PTR_TO_TP_BUFFER] = "tp_buffer", +- [PTR_TO_XDP_SOCK] = "xdp_sock", +- [PTR_TO_BTF_ID] = "ptr_", +- [PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_", +- [PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_", +- [PTR_TO_MEM] = "mem", +- [PTR_TO_MEM_OR_NULL] = "mem_or_null", +- [PTR_TO_RDONLY_BUF] = "rdonly_buf", +- [PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null", +- [PTR_TO_RDWR_BUF] = "rdwr_buf", +- [PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null", +- [PTR_TO_FUNC] = "func", +- [PTR_TO_MAP_KEY] = "map_key", +-}; ++/* string representation of 'enum bpf_reg_type' ++ * ++ * Note that reg_type_str() can not appear more than once in a single verbose() ++ * statement. ++ */ ++static const char *reg_type_str(struct bpf_verifier_env *env, ++ enum bpf_reg_type type) ++{ ++ char postfix[16] = {0}, prefix[16] = {0}; ++ static const char * const str[] = { ++ [NOT_INIT] = "?", ++ [SCALAR_VALUE] = "inv", ++ [PTR_TO_CTX] = "ctx", ++ [CONST_PTR_TO_MAP] = "map_ptr", ++ [PTR_TO_MAP_VALUE] = "map_value", ++ [PTR_TO_STACK] = "fp", ++ [PTR_TO_PACKET] = "pkt", ++ [PTR_TO_PACKET_META] = "pkt_meta", ++ [PTR_TO_PACKET_END] = "pkt_end", ++ [PTR_TO_FLOW_KEYS] = "flow_keys", ++ [PTR_TO_SOCKET] = "sock", ++ [PTR_TO_SOCK_COMMON] = "sock_common", ++ [PTR_TO_TCP_SOCK] = "tcp_sock", ++ [PTR_TO_TP_BUFFER] = "tp_buffer", ++ [PTR_TO_XDP_SOCK] = "xdp_sock", ++ [PTR_TO_BTF_ID] = "ptr_", ++ [PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_", ++ [PTR_TO_MEM] = "mem", ++ [PTR_TO_BUF] = "buf", ++ [PTR_TO_FUNC] = "func", ++ [PTR_TO_MAP_KEY] = "map_key", ++ }; ++ ++ if (type & PTR_MAYBE_NULL) { ++ if (base_type(type) == PTR_TO_BTF_ID || ++ base_type(type) == PTR_TO_PERCPU_BTF_ID) ++ strncpy(postfix, "or_null_", 16); ++ else ++ strncpy(postfix, "_or_null", 16); ++ } ++ ++ if (type & MEM_RDONLY) ++ strncpy(prefix, "rdonly_", 16); ++ ++ snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s", ++ prefix, str[base_type(type)], postfix); ++ return env->type_str_buf; ++} - struct nf_conntrack { -- atomic_t use; -+ refcount_t use; - }; + static char slot_type_char[] = { + [STACK_INVALID] = '?', +@@ -612,6 +606,20 @@ static const char *kernel_type_name(const struct btf* btf, u32 id) + return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off); + } - void nf_conntrack_destroy(struct nf_conntrack *nfct); ++/* The reg state of a pointer or a bounded scalar was saved when ++ * it was spilled to the stack. ++ */ ++static bool is_spilled_reg(const struct bpf_stack_state *stack) ++{ ++ return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL; ++} + -+/* like nf_ct_put, but without module dependency on nf_conntrack */ - static inline void nf_conntrack_put(struct nf_conntrack *nfct) ++static void scrub_spilled_slot(u8 *stype) ++{ ++ if (*stype != STACK_INVALID) ++ *stype = STACK_MISC; ++} ++ + static void print_verifier_state(struct bpf_verifier_env *env, + const struct bpf_func_state *state) { -- if (nfct && atomic_dec_and_test(&nfct->use)) -+ if (nfct && refcount_dec_and_test(&nfct->use)) - nf_conntrack_destroy(nfct); - } - static inline void nf_conntrack_get(struct nf_conntrack *nfct) +@@ -628,7 +636,7 @@ static void print_verifier_state(struct bpf_verifier_env *env, + continue; + verbose(env, " R%d", i); + print_liveness(env, reg->live); +- verbose(env, "=%s", reg_type_str[t]); ++ verbose(env, "=%s", reg_type_str(env, t)); + if (t == SCALAR_VALUE && reg->precise) + verbose(env, "P"); + if ((t == SCALAR_VALUE || t == PTR_TO_STACK) && +@@ -636,9 +644,8 @@ static void print_verifier_state(struct bpf_verifier_env *env, + /* reg->off should be 0 for SCALAR_VALUE */ + verbose(env, "%lld", reg->var_off.value + reg->off); + } else { +- if (t == PTR_TO_BTF_ID || +- t == PTR_TO_BTF_ID_OR_NULL || +- t == PTR_TO_PERCPU_BTF_ID) ++ if (base_type(t) == PTR_TO_BTF_ID || ++ base_type(t) == PTR_TO_PERCPU_BTF_ID) + verbose(env, "%s", kernel_type_name(reg->btf, reg->btf_id)); + verbose(env, "(id=%d", reg->id); + if (reg_type_may_be_refcounted_or_null(t)) +@@ -647,10 +654,9 @@ static void print_verifier_state(struct bpf_verifier_env *env, + verbose(env, ",off=%d", reg->off); + if (type_is_pkt_pointer(t)) + verbose(env, ",r=%d", reg->range); +- else if (t == CONST_PTR_TO_MAP || +- t == PTR_TO_MAP_KEY || +- t == PTR_TO_MAP_VALUE || +- t == PTR_TO_MAP_VALUE_OR_NULL) ++ else if (base_type(t) == CONST_PTR_TO_MAP || ++ base_type(t) == PTR_TO_MAP_KEY || ++ base_type(t) == PTR_TO_MAP_VALUE) + verbose(env, ",ks=%d,vs=%d", + reg->map_ptr->key_size, + reg->map_ptr->value_size); +@@ -717,10 +723,10 @@ static void print_verifier_state(struct bpf_verifier_env *env, + continue; + verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE); + print_liveness(env, state->stack[i].spilled_ptr.live); +- if (state->stack[i].slot_type[0] == STACK_SPILL) { ++ if (is_spilled_reg(&state->stack[i])) { + reg = &state->stack[i].spilled_ptr; + t = reg->type; +- verbose(env, "=%s", reg_type_str[t]); ++ verbose(env, "=%s", reg_type_str(env, t)); + if (t == SCALAR_VALUE && reg->precise) + verbose(env, "P"); + if (t == SCALAR_VALUE && tnum_is_const(reg->var_off)) +@@ -778,12 +784,17 @@ out: + */ + static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size) { - if (nfct) -- atomic_inc(&nfct->use); -+ refcount_inc(&nfct->use); - } - - #endif /* _NF_CONNTRACK_COMMON_H */ -diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h -index 10a01978bc0d3..bde9db771ae41 100644 ---- a/include/linux/netfilter_bridge/ebtables.h -+++ b/include/linux/netfilter_bridge/ebtables.h -@@ -94,10 +94,6 @@ struct ebt_table { - struct ebt_replace_kernel *table; - unsigned int valid_hooks; - rwlock_t lock; -- /* e.g. could be the table explicitly only allows certain -- * matches, targets, ... 0 == let it in */ -- int (*check)(const struct ebt_table_info *info, -- unsigned int valid_hooks); - /* the data used by the kernel */ - struct ebt_table_info *private; - struct nf_hook_ops *ops; -diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h -index b9a8b925db430..5ddc30405f7f4 100644 ---- a/include/linux/nfs_fs.h -+++ b/include/linux/nfs_fs.h -@@ -103,6 +103,7 @@ struct nfs_open_dir_context { - __be32 verf[NFS_DIR_VERIFIER_SIZE]; - __u64 dir_cookie; - __u64 dup_cookie; -+ pgoff_t page_index; - signed char duped; - }; ++ void *new_arr; ++ + if (!new_n || old_n == new_n) + goto out; -@@ -154,36 +155,40 @@ struct nfs_inode { - unsigned long attrtimeo_timestamp; +- arr = krealloc_array(arr, new_n, size, GFP_KERNEL); +- if (!arr) ++ new_arr = krealloc_array(arr, new_n, size, GFP_KERNEL); ++ if (!new_arr) { ++ kfree(arr); + return NULL; ++ } ++ arr = new_arr; - unsigned long attr_gencount; -- /* "Generation counter" for the attribute cache. This is -- * bumped whenever we update the metadata on the -- * server. -- */ -- unsigned long cache_change_attribute; + if (new_n > old_n) + memset(arr + old_n * size, 0, (new_n - old_n) * size); +@@ -859,6 +870,7 @@ static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx) + id = ++env->id_gen; + state->refs[new_ofs].id = id; + state->refs[new_ofs].insn_idx = insn_idx; ++ state->refs[new_ofs].callback_ref = state->in_callback_fn ? state->frameno : 0; - struct rb_root access_cache; - struct list_head access_cache_entry_lru; - struct list_head access_cache_inode_lru; + return id; + } +@@ -871,6 +883,9 @@ static int release_reference_state(struct bpf_func_state *state, int ptr_id) + last_idx = state->acquired_refs - 1; + for (i = 0; i < state->acquired_refs; i++) { + if (state->refs[i].id == ptr_id) { ++ /* Cannot release caller references in callbacks */ ++ if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno) ++ return -EINVAL; + if (last_idx && i != last_idx) + memcpy(&state->refs[i], &state->refs[last_idx], + sizeof(*state->refs)); +@@ -1133,8 +1148,7 @@ static void mark_reg_known_zero(struct bpf_verifier_env *env, -- /* -- * This is the cookie verifier used for NFSv3 readdir -- * operations -- */ -- __be32 cookieverf[NFS_DIR_VERIFIER_SIZE]; -- -- atomic_long_t nrequests; -- struct nfs_mds_commit_info commit_info; -+ union { -+ /* Directory */ -+ struct { -+ /* "Generation counter" for the attribute cache. -+ * This is bumped whenever we update the metadata -+ * on the server. -+ */ -+ unsigned long cache_change_attribute; -+ /* -+ * This is the cookie verifier used for NFSv3 readdir -+ * operations -+ */ -+ __be32 cookieverf[NFS_DIR_VERIFIER_SIZE]; -+ /* Readers: in-flight sillydelete RPC calls */ -+ /* Writers: rmdir */ -+ struct rw_semaphore rmdir_sem; -+ }; -+ /* Regular file */ -+ struct { -+ atomic_long_t nrequests; -+ atomic_long_t redirtied_pages; -+ struct nfs_mds_commit_info commit_info; -+ struct mutex commit_mutex; -+ }; -+ }; + static void mark_ptr_not_null_reg(struct bpf_reg_state *reg) + { +- switch (reg->type) { +- case PTR_TO_MAP_VALUE_OR_NULL: { ++ if (base_type(reg->type) == PTR_TO_MAP_VALUE) { + const struct bpf_map *map = reg->map_ptr; - /* Open contexts for shared mmap writes */ - struct list_head open_files; + if (map->inner_map_meta) { +@@ -1143,7 +1157,8 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg) + /* transfer reg's id which is unique for every map_lookup_elem + * as UID of the inner map. + */ +- reg->map_uid = reg->id; ++ if (map_value_has_timer(map->inner_map_meta)) ++ reg->map_uid = reg->id; + } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) { + reg->type = PTR_TO_XDP_SOCK; + } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP || +@@ -1152,32 +1167,10 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg) + } else { + reg->type = PTR_TO_MAP_VALUE; + } +- break; +- } +- case PTR_TO_SOCKET_OR_NULL: +- reg->type = PTR_TO_SOCKET; +- break; +- case PTR_TO_SOCK_COMMON_OR_NULL: +- reg->type = PTR_TO_SOCK_COMMON; +- break; +- case PTR_TO_TCP_SOCK_OR_NULL: +- reg->type = PTR_TO_TCP_SOCK; +- break; +- case PTR_TO_BTF_ID_OR_NULL: +- reg->type = PTR_TO_BTF_ID; +- break; +- case PTR_TO_MEM_OR_NULL: +- reg->type = PTR_TO_MEM; +- break; +- case PTR_TO_RDONLY_BUF_OR_NULL: +- reg->type = PTR_TO_RDONLY_BUF; +- break; +- case PTR_TO_RDWR_BUF_OR_NULL: +- reg->type = PTR_TO_RDWR_BUF; +- break; +- default: +- WARN_ONCE(1, "unknown nullable register type"); ++ return; + } ++ ++ reg->type &= ~PTR_MAYBE_NULL; + } -- /* Readers: in-flight sillydelete RPC calls */ -- /* Writers: rmdir */ -- struct rw_semaphore rmdir_sem; -- struct mutex commit_mutex; -- -- /* track last access to cached pages */ -- unsigned long page_index; -- - #if IS_ENABLED(CONFIG_NFS_V4) - struct nfs4_cached_acl *nfs4_acl; - /* NFSv4 state */ -@@ -272,6 +277,7 @@ struct nfs4_copy_state { - #define NFS_INO_INVALIDATING (3) /* inode is being invalidated */ - #define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */ - #define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */ -+#define NFS_INO_FORCE_READDIR (7) /* force readdirplus */ - #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ - #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ - #define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */ -@@ -421,9 +427,22 @@ extern void nfs_fattr_set_barrier(struct nfs_fattr *fattr); - extern unsigned long nfs_inc_attr_generation_counter(void); + static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg) +@@ -1357,22 +1350,43 @@ static void __reg_bound_offset(struct bpf_reg_state *reg) + reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off); + } - extern struct nfs_fattr *nfs_alloc_fattr(void); -+extern struct nfs_fattr *nfs_alloc_fattr_with_label(struct nfs_server *server); ++static void reg_bounds_sync(struct bpf_reg_state *reg) ++{ ++ /* We might have learned new bounds from the var_off. */ ++ __update_reg_bounds(reg); ++ /* We might have learned something about the sign bit. */ ++ __reg_deduce_bounds(reg); ++ /* We might have learned some bits from the bounds. */ ++ __reg_bound_offset(reg); ++ /* Intersecting with the old var_off might have improved our bounds ++ * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), ++ * then new var_off is (0; 0x7f...fc) which improves our umax. ++ */ ++ __update_reg_bounds(reg); ++} + -+static inline void nfs4_label_free(struct nfs4_label *label) ++static bool __reg32_bound_s64(s32 a) +{ -+#ifdef CONFIG_NFS_V4_SECURITY_LABEL -+ if (label) { -+ kfree(label->label); -+ kfree(label); -+ } -+#endif ++ return a >= 0 && a <= S32_MAX; +} - - static inline void nfs_free_fattr(const struct nfs_fattr *fattr) ++ + static void __reg_assign_32_into_64(struct bpf_reg_state *reg) { -+ if (fattr) -+ nfs4_label_free(fattr->label); - kfree(fattr); + reg->umin_value = reg->u32_min_value; + reg->umax_value = reg->u32_max_value; +- /* Attempt to pull 32-bit signed bounds into 64-bit bounds +- * but must be positive otherwise set to worse case bounds +- * and refine later from tnum. ++ ++ /* Attempt to pull 32-bit signed bounds into 64-bit bounds but must ++ * be positive otherwise set to worse case bounds and refine later ++ * from tnum. + */ +- if (reg->s32_min_value >= 0 && reg->s32_max_value >= 0) +- reg->smax_value = reg->s32_max_value; +- else +- reg->smax_value = U32_MAX; +- if (reg->s32_min_value >= 0) ++ if (__reg32_bound_s64(reg->s32_min_value) && ++ __reg32_bound_s64(reg->s32_max_value)) { + reg->smin_value = reg->s32_min_value; +- else ++ reg->smax_value = reg->s32_max_value; ++ } else { + reg->smin_value = 0; ++ reg->smax_value = U32_MAX; ++ } } -@@ -494,10 +513,10 @@ static inline const struct cred *nfs_file_cred(struct file *file) - * linux/fs/nfs/direct.c - */ - extern ssize_t nfs_direct_IO(struct kiocb *, struct iov_iter *); --extern ssize_t nfs_file_direct_read(struct kiocb *iocb, -- struct iov_iter *iter); --extern ssize_t nfs_file_direct_write(struct kiocb *iocb, -- struct iov_iter *iter); -+ssize_t nfs_file_direct_read(struct kiocb *iocb, -+ struct iov_iter *iter, bool swap); -+ssize_t nfs_file_direct_write(struct kiocb *iocb, -+ struct iov_iter *iter, bool swap); - - /* - * linux/fs/nfs/dir.c -@@ -517,8 +536,8 @@ extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, - struct nfs_fattr *fattr, struct nfs4_label *label); - extern int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags); - extern void nfs_access_zap_cache(struct inode *inode); --extern int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, -- bool may_block); -+extern int nfs_access_get_cached(struct inode *inode, const struct cred *cred, -+ u32 *mask, bool may_block); + static void __reg_combine_32_into_64(struct bpf_reg_state *reg) +@@ -1392,32 +1406,23 @@ static void __reg_combine_32_into_64(struct bpf_reg_state *reg) + * so they do not impact tnum bounds calculation. + */ + __mark_reg64_unbounded(reg); +- __update_reg_bounds(reg); + } +- +- /* Intersecting with the old var_off might have improved our bounds +- * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), +- * then new var_off is (0; 0x7f...fc) which improves our umax. +- */ +- __reg_deduce_bounds(reg); +- __reg_bound_offset(reg); +- __update_reg_bounds(reg); ++ reg_bounds_sync(reg); + } - /* - * linux/fs/nfs/symlink.c -@@ -567,13 +586,16 @@ extern int nfs_wb_all(struct inode *inode); - extern int nfs_wb_page(struct inode *inode, struct page *page); - extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); - extern int nfs_commit_inode(struct inode *, int); --extern struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail); -+extern struct nfs_commit_data *nfs_commitdata_alloc(void); - extern void nfs_commit_free(struct nfs_commit_data *data); -+bool nfs_commit_end(struct nfs_mds_commit_info *cinfo); + static bool __reg64_bound_s32(s64 a) + { +- return a > S32_MIN && a < S32_MAX; ++ return a >= S32_MIN && a <= S32_MAX; + } - static inline int - nfs_have_writebacks(struct inode *inode) + static bool __reg64_bound_u32(u64 a) { -- return atomic_long_read(&NFS_I(inode)->nrequests) != 0; -+ if (S_ISREG(inode->i_mode)) -+ return atomic_long_read(&NFS_I(inode)->nrequests) != 0; -+ return 0; +- return a > U32_MIN && a < U32_MAX; ++ return a >= U32_MIN && a <= U32_MAX; } - /* -diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h -index 2a9acbfe00f0f..da9ef0ab9b4b6 100644 ---- a/include/linux/nfs_fs_sb.h -+++ b/include/linux/nfs_fs_sb.h -@@ -287,5 +287,6 @@ struct nfs_server { - #define NFS_CAP_COPY_NOTIFY (1U << 27) - #define NFS_CAP_XATTR (1U << 28) - #define NFS_CAP_READ_PLUS (1U << 29) + static void __reg_combine_64_into_32(struct bpf_reg_state *reg) + { + __mark_reg32_unbounded(reg); - -+#define NFS_CAP_FS_LOCATIONS (1U << 30) -+#define NFS_CAP_MOVEABLE (1U << 31) - #endif -diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h -index e9698b6278a52..ecd74cc347974 100644 ---- a/include/linux/nfs_xdr.h -+++ b/include/linux/nfs_xdr.h -@@ -1805,6 +1805,7 @@ struct nfs_rpc_ops { - struct nfs_server *(*create_server)(struct fs_context *); - struct nfs_server *(*clone_server)(struct nfs_server *, struct nfs_fh *, - struct nfs_fattr *, rpc_authflavor_t); -+ int (*discover_trunking)(struct nfs_server *, struct nfs_fh *); - }; + if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) { + reg->s32_min_value = (s32)reg->smin_value; + reg->s32_max_value = (s32)reg->smax_value; +@@ -1426,14 +1431,7 @@ static void __reg_combine_64_into_32(struct bpf_reg_state *reg) + reg->u32_min_value = (u32)reg->umin_value; + reg->u32_max_value = (u32)reg->umax_value; + } +- +- /* Intersecting with the old var_off might have improved our bounds +- * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), +- * then new var_off is (0; 0x7f...fc) which improves our umax. +- */ +- __reg_deduce_bounds(reg); +- __reg_bound_offset(reg); +- __update_reg_bounds(reg); ++ reg_bounds_sync(reg); + } - /* -diff --git a/include/linux/nmi.h b/include/linux/nmi.h -index 750c7f395ca90..f700ff2df074e 100644 ---- a/include/linux/nmi.h -+++ b/include/linux/nmi.h -@@ -122,6 +122,8 @@ int watchdog_nmi_probe(void); - int watchdog_nmi_enable(unsigned int cpu); - void watchdog_nmi_disable(unsigned int cpu); + /* Mark a register as having a completely unknown (scalar) value. */ +@@ -1807,16 +1805,10 @@ static int add_subprog_and_kfunc(struct bpf_verifier_env *env) + return -EPERM; + } -+void lockup_detector_reconfigure(void); -+ - /** - * touch_nmi_watchdog - restart NMI watchdog timeout. - * -diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h -index 567c3ddba2c42..0f233b76c9cec 100644 ---- a/include/linux/nodemask.h -+++ b/include/linux/nodemask.h -@@ -42,11 +42,11 @@ - * void nodes_shift_right(dst, src, n) Shift right - * void nodes_shift_left(dst, src, n) Shift left - * -- * int first_node(mask) Number lowest set bit, or MAX_NUMNODES -- * int next_node(node, mask) Next node past 'node', or MAX_NUMNODES -- * int next_node_in(node, mask) Next node past 'node', or wrap to first, -+ * unsigned int first_node(mask) Number lowest set bit, or MAX_NUMNODES -+ * unsigend int next_node(node, mask) Next node past 'node', or MAX_NUMNODES -+ * unsigned int next_node_in(node, mask) Next node past 'node', or wrap to first, - * or MAX_NUMNODES -- * int first_unset_node(mask) First node not set in mask, or -+ * unsigned int first_unset_node(mask) First node not set in mask, or - * MAX_NUMNODES - * - * nodemask_t nodemask_of_node(node) Return nodemask with bit 'node' set -@@ -153,7 +153,7 @@ static inline void __nodes_clear(nodemask_t *dstp, unsigned int nbits) +- if (bpf_pseudo_func(insn)) { +- ret = add_subprog(env, i + insn->imm + 1); +- if (ret >= 0) +- /* remember subprog */ +- insn[1].imm = ret; +- } else if (bpf_pseudo_call(insn)) { ++ if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn)) + ret = add_subprog(env, i + insn->imm + 1); +- } else { ++ else + ret = add_kfunc_call(env, insn->imm); +- } - #define node_test_and_set(node, nodemask) \ - __node_test_and_set((node), &(nodemask)) --static inline int __node_test_and_set(int node, nodemask_t *addr) -+static inline bool __node_test_and_set(int node, nodemask_t *addr) - { - return test_and_set_bit(node, addr->bits); + if (ret < 0) + return ret; +@@ -1899,7 +1891,7 @@ static int mark_reg_read(struct bpf_verifier_env *env, + break; + if (parent->live & REG_LIVE_DONE) { + verbose(env, "verifier BUG type %s var_off %lld off %d\n", +- reg_type_str[parent->type], ++ reg_type_str(env, parent->type), + parent->var_off.value, parent->off); + return -EFAULT; + } +@@ -2232,8 +2224,6 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, + */ + if (insn->src_reg != BPF_REG_FP) + return 0; +- if (BPF_SIZE(insn->code) != BPF_DW) +- return 0; + + /* dreg = *(u64 *)[fp - off] was a fill from the stack. + * that [fp - off] slot contains scalar that needs to be +@@ -2256,8 +2246,6 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, + /* scalars can only be spilled into stack */ + if (insn->dst_reg != BPF_REG_FP) + return 0; +- if (BPF_SIZE(insn->code) != BPF_DW) +- return 0; + spi = (-insn->off - 1) / BPF_REG_SIZE; + if (spi >= 64) { + verbose(env, "BUG spi %d\n", spi); +@@ -2273,6 +2261,12 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, + if (opcode == BPF_CALL) { + if (insn->src_reg == BPF_PSEUDO_CALL) + return -ENOTSUPP; ++ /* kfunc with imm==0 is invalid and fixup_kfunc_call will ++ * catch this error later. Make backtracking conservative ++ * with ENOTSUPP. ++ */ ++ if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && insn->imm == 0) ++ return -ENOTSUPP; + /* regular helper call sets R0 */ + *reg_mask &= ~1; + if (*reg_mask & 0x3f) { +@@ -2373,7 +2367,7 @@ static void mark_all_scalars_precise(struct bpf_verifier_env *env, + reg->precise = true; + } + for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) { +- if (func->stack[j].slot_type[0] != STACK_SPILL) ++ if (!is_spilled_reg(&func->stack[j])) + continue; + reg = &func->stack[j].spilled_ptr; + if (reg->type != SCALAR_VALUE) +@@ -2383,7 +2377,7 @@ static void mark_all_scalars_precise(struct bpf_verifier_env *env, + } } -@@ -200,7 +200,7 @@ static inline void __nodes_complement(nodemask_t *dstp, - #define nodes_equal(src1, src2) \ - __nodes_equal(&(src1), &(src2), MAX_NUMNODES) --static inline int __nodes_equal(const nodemask_t *src1p, -+static inline bool __nodes_equal(const nodemask_t *src1p, - const nodemask_t *src2p, unsigned int nbits) +-static int __mark_chain_precision(struct bpf_verifier_env *env, int regno, ++static int __mark_chain_precision(struct bpf_verifier_env *env, int frame, int regno, + int spi) { - return bitmap_equal(src1p->bits, src2p->bits, nbits); -@@ -208,7 +208,7 @@ static inline int __nodes_equal(const nodemask_t *src1p, + struct bpf_verifier_state *st = env->cur_state; +@@ -2400,7 +2394,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno, + if (!env->bpf_capable) + return 0; - #define nodes_intersects(src1, src2) \ - __nodes_intersects(&(src1), &(src2), MAX_NUMNODES) --static inline int __nodes_intersects(const nodemask_t *src1p, -+static inline bool __nodes_intersects(const nodemask_t *src1p, - const nodemask_t *src2p, unsigned int nbits) - { - return bitmap_intersects(src1p->bits, src2p->bits, nbits); -@@ -216,20 +216,20 @@ static inline int __nodes_intersects(const nodemask_t *src1p, +- func = st->frame[st->curframe]; ++ func = st->frame[frame]; + if (regno >= 0) { + reg = &func->regs[regno]; + if (reg->type != SCALAR_VALUE) { +@@ -2415,7 +2409,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno, + } - #define nodes_subset(src1, src2) \ - __nodes_subset(&(src1), &(src2), MAX_NUMNODES) --static inline int __nodes_subset(const nodemask_t *src1p, -+static inline bool __nodes_subset(const nodemask_t *src1p, - const nodemask_t *src2p, unsigned int nbits) - { - return bitmap_subset(src1p->bits, src2p->bits, nbits); - } + while (spi >= 0) { +- if (func->stack[spi].slot_type[0] != STACK_SPILL) { ++ if (!is_spilled_reg(&func->stack[spi])) { + stack_mask = 0; + break; + } +@@ -2481,7 +2475,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno, + break; - #define nodes_empty(src) __nodes_empty(&(src), MAX_NUMNODES) --static inline int __nodes_empty(const nodemask_t *srcp, unsigned int nbits) -+static inline bool __nodes_empty(const nodemask_t *srcp, unsigned int nbits) + new_marks = false; +- func = st->frame[st->curframe]; ++ func = st->frame[frame]; + bitmap_from_u64(mask, reg_mask); + for_each_set_bit(i, mask, 32) { + reg = &func->regs[i]; +@@ -2514,7 +2508,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno, + return 0; + } + +- if (func->stack[i].slot_type[0] != STACK_SPILL) { ++ if (!is_spilled_reg(&func->stack[i])) { + stack_mask &= ~(1ull << i); + continue; + } +@@ -2547,19 +2541,23 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno, + + static int mark_chain_precision(struct bpf_verifier_env *env, int regno) { - return bitmap_empty(srcp->bits, nbits); +- return __mark_chain_precision(env, regno, -1); ++ return __mark_chain_precision(env, env->cur_state->curframe, regno, -1); ++} ++ ++static int mark_chain_precision_frame(struct bpf_verifier_env *env, int frame, int regno) ++{ ++ return __mark_chain_precision(env, frame, regno, -1); } - #define nodes_full(nodemask) __nodes_full(&(nodemask), MAX_NUMNODES) --static inline int __nodes_full(const nodemask_t *srcp, unsigned int nbits) -+static inline bool __nodes_full(const nodemask_t *srcp, unsigned int nbits) +-static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi) ++static int mark_chain_precision_stack_frame(struct bpf_verifier_env *env, int frame, int spi) { - return bitmap_full(srcp->bits, nbits); +- return __mark_chain_precision(env, -1, spi); ++ return __mark_chain_precision(env, frame, -1, spi); } -@@ -260,15 +260,15 @@ static inline void __nodes_shift_left(nodemask_t *dstp, - > MAX_NUMNODES, then the silly min_ts could be dropped. */ - #define first_node(src) __first_node(&(src)) --static inline int __first_node(const nodemask_t *srcp) -+static inline unsigned int __first_node(const nodemask_t *srcp) + static bool is_spillable_regtype(enum bpf_reg_type type) { -- return min_t(int, MAX_NUMNODES, find_first_bit(srcp->bits, MAX_NUMNODES)); -+ return min_t(unsigned int, MAX_NUMNODES, find_first_bit(srcp->bits, MAX_NUMNODES)); +- switch (type) { ++ switch (base_type(type)) { + case PTR_TO_MAP_VALUE: +- case PTR_TO_MAP_VALUE_OR_NULL: + case PTR_TO_STACK: + case PTR_TO_CTX: + case PTR_TO_PACKET: +@@ -2568,21 +2566,13 @@ static bool is_spillable_regtype(enum bpf_reg_type type) + case PTR_TO_FLOW_KEYS: + case CONST_PTR_TO_MAP: + case PTR_TO_SOCKET: +- case PTR_TO_SOCKET_OR_NULL: + case PTR_TO_SOCK_COMMON: +- case PTR_TO_SOCK_COMMON_OR_NULL: + case PTR_TO_TCP_SOCK: +- case PTR_TO_TCP_SOCK_OR_NULL: + case PTR_TO_XDP_SOCK: + case PTR_TO_BTF_ID: +- case PTR_TO_BTF_ID_OR_NULL: +- case PTR_TO_RDONLY_BUF: +- case PTR_TO_RDONLY_BUF_OR_NULL: +- case PTR_TO_RDWR_BUF: +- case PTR_TO_RDWR_BUF_OR_NULL: ++ case PTR_TO_BUF: + case PTR_TO_PERCPU_BTF_ID: + case PTR_TO_MEM: +- case PTR_TO_MEM_OR_NULL: + case PTR_TO_FUNC: + case PTR_TO_MAP_KEY: + return true; +@@ -2625,16 +2615,33 @@ static bool __is_pointer_value(bool allow_ptr_leaks, + return reg->type != SCALAR_VALUE; } - #define next_node(n, src) __next_node((n), &(src)) --static inline int __next_node(int n, const nodemask_t *srcp) -+static inline unsigned int __next_node(int n, const nodemask_t *srcp) ++/* Copy src state preserving dst->parent and dst->live fields */ ++static void copy_register_state(struct bpf_reg_state *dst, const struct bpf_reg_state *src) ++{ ++ struct bpf_reg_state *parent = dst->parent; ++ enum bpf_reg_liveness live = dst->live; ++ ++ *dst = *src; ++ dst->parent = parent; ++ dst->live = live; ++} ++ + static void save_register_state(struct bpf_func_state *state, +- int spi, struct bpf_reg_state *reg) ++ int spi, struct bpf_reg_state *reg, ++ int size) { -- return min_t(int,MAX_NUMNODES,find_next_bit(srcp->bits, MAX_NUMNODES, n+1)); -+ return min_t(unsigned int, MAX_NUMNODES, find_next_bit(srcp->bits, MAX_NUMNODES, n+1)); + int i; + +- state->stack[spi].spilled_ptr = *reg; +- state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; ++ copy_register_state(&state->stack[spi].spilled_ptr, reg); ++ if (size == BPF_REG_SIZE) ++ state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; ++ ++ for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--) ++ state->stack[spi].slot_type[i - 1] = STACK_SPILL; + +- for (i = 0; i < BPF_REG_SIZE; i++) +- state->stack[spi].slot_type[i] = STACK_SPILL; ++ /* size < 8 bytes spill */ ++ for (; i; i--) ++ scrub_spilled_slot(&state->stack[spi].slot_type[i - 1]); } - /* -@@ -276,7 +276,7 @@ static inline int __next_node(int n, const nodemask_t *srcp) - * the first node in src if needed. Returns MAX_NUMNODES if src is empty. - */ - #define next_node_in(n, src) __next_node_in((n), &(src)) --int __next_node_in(int node, const nodemask_t *srcp); -+unsigned int __next_node_in(int node, const nodemask_t *srcp); + /* check_stack_{read,write}_fixed_off functions track spill/fill of registers, +@@ -2671,7 +2678,9 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, + bool sanitize = reg && is_spillable_regtype(reg->type); + + for (i = 0; i < size; i++) { +- if (state->stack[spi].slot_type[i] == STACK_INVALID) { ++ u8 type = state->stack[spi].slot_type[i]; ++ ++ if (type != STACK_MISC && type != STACK_ZERO) { + sanitize = true; + break; + } +@@ -2681,7 +2690,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, + env->insn_aux_data[insn_idx].sanitize_stack_spill = true; + } + +- if (reg && size == BPF_REG_SIZE && register_is_bounded(reg) && ++ if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) && + !register_is_null(reg) && env->bpf_capable) { + if (dst_reg != BPF_REG_FP) { + /* The backtracking logic can only recognize explicit +@@ -2694,7 +2703,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, + if (err) + return err; + } +- save_register_state(state, spi, reg); ++ save_register_state(state, spi, reg, size); + } else if (reg && is_spillable_regtype(reg->type)) { + /* register containing pointer is being spilled into stack */ + if (size != BPF_REG_SIZE) { +@@ -2706,16 +2715,16 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, + verbose(env, "cannot spill pointers to stack into stack frame of the caller\n"); + return -EINVAL; + } +- save_register_state(state, spi, reg); ++ save_register_state(state, spi, reg, size); + } else { + u8 type = STACK_MISC; + + /* regular write of data into stack destroys any spilled ptr */ + state->stack[spi].spilled_ptr.type = NOT_INIT; + /* Mark slots as STACK_MISC if they belonged to spilled ptr. */ +- if (state->stack[spi].slot_type[0] == STACK_SPILL) ++ if (is_spilled_reg(&state->stack[spi])) + for (i = 0; i < BPF_REG_SIZE; i++) +- state->stack[spi].slot_type[i] = STACK_MISC; ++ scrub_spilled_slot(&state->stack[spi].slot_type[i]); + + /* only mark the slot as written if all 8 bytes were written + * otherwise read propagation may incorrectly stop too soon +@@ -2803,14 +2812,17 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env, + spi = slot / BPF_REG_SIZE; + stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE]; + +- if (!env->allow_ptr_leaks +- && *stype != NOT_INIT +- && *stype != SCALAR_VALUE) { +- /* Reject the write if there's are spilled pointers in +- * range. If we didn't reject here, the ptr status +- * would be erased below (even though not all slots are +- * actually overwritten), possibly opening the door to +- * leaks. ++ if (!env->allow_ptr_leaks && *stype != STACK_MISC && *stype != STACK_ZERO) { ++ /* Reject the write if range we may write to has not ++ * been initialized beforehand. If we didn't reject ++ * here, the ptr status would be erased below (even ++ * though not all slots are actually overwritten), ++ * possibly opening the door to leaks. ++ * ++ * We do however catch STACK_INVALID case below, and ++ * only allow reading possibly uninitialized memory ++ * later for CAP_PERFMON, as the write may not happen to ++ * that slot. + */ + verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d", + insn_idx, i); +@@ -2918,35 +2930,56 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, + struct bpf_func_state *state = vstate->frame[vstate->curframe]; + int i, slot = -off - 1, spi = slot / BPF_REG_SIZE; + struct bpf_reg_state *reg; +- u8 *stype; ++ u8 *stype, type; + + stype = reg_state->stack[spi].slot_type; + reg = ®_state->stack[spi].spilled_ptr; + +- if (stype[0] == STACK_SPILL) { +- if (size != BPF_REG_SIZE) { ++ if (is_spilled_reg(®_state->stack[spi])) { ++ u8 spill_size = 1; ++ ++ for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--) ++ spill_size++; ++ ++ if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) { + if (reg->type != SCALAR_VALUE) { + verbose_linfo(env, env->insn_idx, "; "); + verbose(env, "invalid size of register fill\n"); + return -EACCES; + } +- if (dst_regno >= 0) { ++ ++ mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); ++ if (dst_regno < 0) ++ return 0; ++ ++ if (!(off % BPF_REG_SIZE) && size == spill_size) { ++ /* The earlier check_reg_arg() has decided the ++ * subreg_def for this insn. Save it first. ++ */ ++ s32 subreg_def = state->regs[dst_regno].subreg_def; ++ ++ copy_register_state(&state->regs[dst_regno], reg); ++ state->regs[dst_regno].subreg_def = subreg_def; ++ } else { ++ for (i = 0; i < size; i++) { ++ type = stype[(slot - i) % BPF_REG_SIZE]; ++ if (type == STACK_SPILL) ++ continue; ++ if (type == STACK_MISC) ++ continue; ++ verbose(env, "invalid read from stack off %d+%d size %d\n", ++ off, i, size); ++ return -EACCES; ++ } + mark_reg_unknown(env, state->regs, dst_regno); +- state->regs[dst_regno].live |= REG_LIVE_WRITTEN; + } +- mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); ++ state->regs[dst_regno].live |= REG_LIVE_WRITTEN; + return 0; + } +- for (i = 1; i < BPF_REG_SIZE; i++) { +- if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) { +- verbose(env, "corrupted spill memory\n"); +- return -EACCES; +- } +- } + + if (dst_regno >= 0) { + /* restore register state from stack */ +- state->regs[dst_regno] = *reg; ++ copy_register_state(&state->regs[dst_regno], reg); + /* mark reg as written since spilled pointer state likely + * has its liveness marks cleared by is_state_visited() + * which resets stack/reg liveness for state transitions +@@ -2965,8 +2998,6 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, + } + mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); + } else { +- u8 type; +- + for (i = 0; i < size; i++) { + type = stype[(slot - i) % BPF_REG_SIZE]; + if (type == STACK_MISC) +@@ -3398,7 +3429,7 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, + */ + *reg_type = info.reg_type; + +- if (*reg_type == PTR_TO_BTF_ID || *reg_type == PTR_TO_BTF_ID_OR_NULL) { ++ if (base_type(*reg_type) == PTR_TO_BTF_ID) { + *btf = info.btf; + *btf_id = info.btf_id; + } else { +@@ -3466,7 +3497,7 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, + } + + verbose(env, "R%d invalid %s access off=%d size=%d\n", +- regno, reg_type_str[reg->type], off, size); ++ regno, reg_type_str(env, reg->type), off, size); - static inline void init_nodemask_of_node(nodemask_t *mask, int node) - { -@@ -296,9 +296,9 @@ static inline void init_nodemask_of_node(nodemask_t *mask, int node) - }) + return -EACCES; + } +@@ -3884,7 +3915,22 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size) - #define first_unset_node(mask) __first_unset_node(&(mask)) --static inline int __first_unset_node(const nodemask_t *maskp) -+static inline unsigned int __first_unset_node(const nodemask_t *maskp) + static bool bpf_map_is_rdonly(const struct bpf_map *map) { -- return min_t(int,MAX_NUMNODES, -+ return min_t(unsigned int, MAX_NUMNODES, - find_first_zero_bit(maskp->bits, MAX_NUMNODES)); +- return (map->map_flags & BPF_F_RDONLY_PROG) && map->frozen; ++ /* A map is considered read-only if the following condition are true: ++ * ++ * 1) BPF program side cannot change any of the map content. The ++ * BPF_F_RDONLY_PROG flag is throughout the lifetime of a map ++ * and was set at map creation time. ++ * 2) The map value(s) have been initialized from user space by a ++ * loader and then "frozen", such that no new map update/delete ++ * operations from syscall side are possible for the rest of ++ * the map's lifetime from that point onwards. ++ * 3) Any parallel/pending map update/delete operations from syscall ++ * side have been completed. Only after that point, it's safe to ++ * assume that map value(s) are immutable. ++ */ ++ return (map->map_flags & BPF_F_RDONLY_PROG) && ++ READ_ONCE(map->frozen) && ++ !bpf_map_write_active(map); } -@@ -375,14 +375,13 @@ static inline void __nodes_fold(nodemask_t *dstp, const nodemask_t *origp, - } + static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val) +@@ -4178,15 +4224,30 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn + mark_reg_unknown(env, regs, value_regno); + } + } +- } else if (reg->type == PTR_TO_MEM) { ++ } else if (base_type(reg->type) == PTR_TO_MEM) { ++ bool rdonly_mem = type_is_rdonly_mem(reg->type); ++ ++ if (type_may_be_null(reg->type)) { ++ verbose(env, "R%d invalid mem access '%s'\n", regno, ++ reg_type_str(env, reg->type)); ++ return -EACCES; ++ } ++ ++ if (t == BPF_WRITE && rdonly_mem) { ++ verbose(env, "R%d cannot write into %s\n", ++ regno, reg_type_str(env, reg->type)); ++ return -EACCES; ++ } ++ + if (t == BPF_WRITE && value_regno >= 0 && + is_pointer_value(env, value_regno)) { + verbose(env, "R%d leaks addr into mem\n", value_regno); + return -EACCES; + } ++ + err = check_mem_region_access(env, regno, off, size, + reg->mem_size, false); +- if (!err && t == BPF_READ && value_regno >= 0) ++ if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem)) + mark_reg_unknown(env, regs, value_regno); + } else if (reg->type == PTR_TO_CTX) { + enum bpf_reg_type reg_type = SCALAR_VALUE; +@@ -4216,7 +4277,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn + } else { + mark_reg_known_zero(env, regs, + value_regno); +- if (reg_type_may_be_null(reg_type)) ++ if (type_may_be_null(reg_type)) + regs[value_regno].id = ++env->id_gen; + /* A load of ctx field could have different + * actual load size with the one encoded in the +@@ -4224,8 +4285,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn + * a sub-register. + */ + regs[value_regno].subreg_def = DEF_NOT_SUBREG; +- if (reg_type == PTR_TO_BTF_ID || +- reg_type == PTR_TO_BTF_ID_OR_NULL) { ++ if (base_type(reg_type) == PTR_TO_BTF_ID) { + regs[value_regno].btf = btf; + regs[value_regno].btf_id = btf_id; + } +@@ -4278,7 +4338,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn + } else if (type_is_sk_pointer(reg->type)) { + if (t == BPF_WRITE) { + verbose(env, "R%d cannot write into %s\n", +- regno, reg_type_str[reg->type]); ++ regno, reg_type_str(env, reg->type)); + return -EACCES; + } + err = check_sock_access(env, insn_idx, regno, off, size, t); +@@ -4294,26 +4354,32 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn + } else if (reg->type == CONST_PTR_TO_MAP) { + err = check_ptr_to_map_access(env, regs, regno, off, size, t, + value_regno); +- } else if (reg->type == PTR_TO_RDONLY_BUF) { +- if (t == BPF_WRITE) { +- verbose(env, "R%d cannot write into %s\n", +- regno, reg_type_str[reg->type]); +- return -EACCES; ++ } else if (base_type(reg->type) == PTR_TO_BUF) { ++ bool rdonly_mem = type_is_rdonly_mem(reg->type); ++ const char *buf_info; ++ u32 *max_access; ++ ++ if (rdonly_mem) { ++ if (t == BPF_WRITE) { ++ verbose(env, "R%d cannot write into %s\n", ++ regno, reg_type_str(env, reg->type)); ++ return -EACCES; ++ } ++ buf_info = "rdonly"; ++ max_access = &env->prog->aux->max_rdonly_access; ++ } else { ++ buf_info = "rdwr"; ++ max_access = &env->prog->aux->max_rdwr_access; + } ++ + err = check_buffer_access(env, reg, regno, off, size, false, +- "rdonly", +- &env->prog->aux->max_rdonly_access); +- if (!err && value_regno >= 0) +- mark_reg_unknown(env, regs, value_regno); +- } else if (reg->type == PTR_TO_RDWR_BUF) { +- err = check_buffer_access(env, reg, regno, off, size, false, +- "rdwr", +- &env->prog->aux->max_rdwr_access); +- if (!err && t == BPF_READ && value_regno >= 0) ++ buf_info, max_access); ++ ++ if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ)) + mark_reg_unknown(env, regs, value_regno); + } else { + verbose(env, "R%d invalid mem access '%s'\n", regno, +- reg_type_str[reg->type]); ++ reg_type_str(env, reg->type)); + return -EACCES; + } - #if MAX_NUMNODES > 1 --#define for_each_node_mask(node, mask) \ -- for ((node) = first_node(mask); \ -- (node) < MAX_NUMNODES; \ -- (node) = next_node((node), (mask))) -+#define for_each_node_mask(node, mask) \ -+ for ((node) = first_node(mask); \ -+ (node >= 0) && (node) < MAX_NUMNODES; \ -+ (node) = next_node((node), (mask))) - #else /* MAX_NUMNODES == 1 */ --#define for_each_node_mask(node, mask) \ -- if (!nodes_empty(mask)) \ -- for ((node) = 0; (node) < 1; (node)++) -+#define for_each_node_mask(node, mask) \ -+ for ((node) = 0; (node) < 1 && !nodes_empty(mask); (node)++) - #endif /* MAX_NUMNODES */ +@@ -4364,9 +4430,16 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i - /* -@@ -436,11 +435,11 @@ static inline int num_node_state(enum node_states state) + if (insn->imm == BPF_CMPXCHG) { + /* Check comparison of R0 with memory location */ +- err = check_reg_arg(env, BPF_REG_0, SRC_OP); ++ const u32 aux_reg = BPF_REG_0; ++ ++ err = check_reg_arg(env, aux_reg, SRC_OP); + if (err) + return err; ++ ++ if (is_pointer_value(env, aux_reg)) { ++ verbose(env, "R%d leaks addr into mem\n", aux_reg); ++ return -EACCES; ++ } + } - #define first_online_node first_node(node_states[N_ONLINE]) - #define first_memory_node first_node(node_states[N_MEMORY]) --static inline int next_online_node(int nid) -+static inline unsigned int next_online_node(int nid) - { - return next_node(nid, node_states[N_ONLINE]); - } --static inline int next_memory_node(int nid) -+static inline unsigned int next_memory_node(int nid) + if (is_pointer_value(env, insn->src_reg)) { +@@ -4380,7 +4453,7 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i + is_sk_reg(env, insn->dst_reg)) { + verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n", + insn->dst_reg, +- reg_type_str[reg_state(env, insn->dst_reg)->type]); ++ reg_type_str(env, reg_state(env, insn->dst_reg)->type)); + return -EACCES; + } + +@@ -4401,13 +4474,19 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i + load_reg = -1; + } + +- /* check whether we can read the memory */ ++ /* Check whether we can read the memory, with second call for fetch ++ * case to simulate the register fill. ++ */ + err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, +- BPF_SIZE(insn->code), BPF_READ, load_reg, true); ++ BPF_SIZE(insn->code), BPF_READ, -1, true); ++ if (!err && load_reg >= 0) ++ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, ++ BPF_SIZE(insn->code), BPF_READ, load_reg, ++ true); + if (err) + return err; + +- /* check whether we can write into the same memory */ ++ /* Check whether we can write into the same memory. */ + err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, + BPF_SIZE(insn->code), BPF_WRITE, -1, true); + if (err) +@@ -4514,17 +4593,17 @@ static int check_stack_range_initialized( + goto mark; + } + +- if (state->stack[spi].slot_type[0] == STACK_SPILL && ++ if (is_spilled_reg(&state->stack[spi]) && + state->stack[spi].spilled_ptr.type == PTR_TO_BTF_ID) + goto mark; + +- if (state->stack[spi].slot_type[0] == STACK_SPILL && ++ if (is_spilled_reg(&state->stack[spi]) && + (state->stack[spi].spilled_ptr.type == SCALAR_VALUE || + env->allow_ptr_leaks)) { + if (clobber) { + __mark_reg_unknown(env, &state->stack[spi].spilled_ptr); + for (j = 0; j < BPF_REG_SIZE; j++) +- state->stack[spi].slot_type[j] = STACK_MISC; ++ scrub_spilled_slot(&state->stack[spi].slot_type[j]); + } + goto mark; + } +@@ -4557,13 +4636,20 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, + struct bpf_call_arg_meta *meta) { - return next_node(nid, node_states[N_MEMORY]); - } -diff --git a/include/linux/nvme.h b/include/linux/nvme.h -index b7c4c4130b65e..039f59ee8f435 100644 ---- a/include/linux/nvme.h -+++ b/include/linux/nvme.h -@@ -322,6 +322,7 @@ enum { - NVME_CTRL_ONCS_TIMESTAMP = 1 << 6, - NVME_CTRL_VWC_PRESENT = 1 << 0, - NVME_CTRL_OACS_SEC_SUPP = 1 << 0, -+ NVME_CTRL_OACS_NS_MNGT_SUPP = 1 << 3, - NVME_CTRL_OACS_DIRECTIVES = 1 << 5, - NVME_CTRL_OACS_DBBUF_SUPP = 1 << 8, - NVME_CTRL_LPA_CMD_EFFECTS_LOG = 1 << 1, -diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h -index 104505e9028f7..87932bdb25d7b 100644 ---- a/include/linux/nvmem-provider.h -+++ b/include/linux/nvmem-provider.h -@@ -66,7 +66,8 @@ struct nvmem_keepout { - * @word_size: Minimum read/write access granularity. - * @stride: Minimum read/write access stride. - * @priv: User context passed to read/write callbacks. -- * @wp-gpio: Write protect pin -+ * @wp-gpio: Write protect pin -+ * @ignore_wp: Write Protect pin is managed by the provider. - * - * Note: A default "nvmem<id>" name will be assigned to the device if - * no name is specified in its configuration. In such case "<id>" is -@@ -88,6 +89,7 @@ struct nvmem_config { - enum nvmem_type type; - bool read_only; - bool root_only; -+ bool ignore_wp; - struct device_node *of_node; - bool no_of_node; - nvmem_reg_read_t reg_read; -diff --git a/include/linux/objtool.h b/include/linux/objtool.h -index 7e72d975cb761..a2042c4186864 100644 ---- a/include/linux/objtool.h -+++ b/include/linux/objtool.h -@@ -32,11 +32,16 @@ struct unwind_hint { - * - * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function. - * Useful for code which doesn't have an ELF function annotation. -+ * -+ * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc. - */ - #define UNWIND_HINT_TYPE_CALL 0 - #define UNWIND_HINT_TYPE_REGS 1 - #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 - #define UNWIND_HINT_TYPE_FUNC 3 -+#define UNWIND_HINT_TYPE_ENTRY 4 -+#define UNWIND_HINT_TYPE_SAVE 5 -+#define UNWIND_HINT_TYPE_RESTORE 6 + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; ++ const char *buf_info; ++ u32 *max_access; - #ifdef CONFIG_STACK_VALIDATION +- switch (reg->type) { ++ switch (base_type(reg->type)) { + case PTR_TO_PACKET: + case PTR_TO_PACKET_META: + return check_packet_access(env, regno, reg->off, access_size, + zero_size_allowed); + case PTR_TO_MAP_KEY: ++ if (meta && meta->raw_mode) { ++ verbose(env, "R%d cannot write into %s\n", regno, ++ reg_type_str(env, reg->type)); ++ return -EACCES; ++ } + return check_mem_region_access(env, regno, reg->off, access_size, + reg->map_ptr->key_size, false); + case PTR_TO_MAP_VALUE: +@@ -4574,21 +4660,33 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, + return check_map_access(env, regno, reg->off, access_size, + zero_size_allowed); + case PTR_TO_MEM: ++ if (type_is_rdonly_mem(reg->type)) { ++ if (meta && meta->raw_mode) { ++ verbose(env, "R%d cannot write into %s\n", regno, ++ reg_type_str(env, reg->type)); ++ return -EACCES; ++ } ++ } + return check_mem_region_access(env, regno, reg->off, + access_size, reg->mem_size, + zero_size_allowed); +- case PTR_TO_RDONLY_BUF: +- if (meta && meta->raw_mode) +- return -EACCES; +- return check_buffer_access(env, reg, regno, reg->off, +- access_size, zero_size_allowed, +- "rdonly", +- &env->prog->aux->max_rdonly_access); +- case PTR_TO_RDWR_BUF: ++ case PTR_TO_BUF: ++ if (type_is_rdonly_mem(reg->type)) { ++ if (meta && meta->raw_mode) { ++ verbose(env, "R%d cannot write into %s\n", regno, ++ reg_type_str(env, reg->type)); ++ return -EACCES; ++ } ++ ++ buf_info = "rdonly"; ++ max_access = &env->prog->aux->max_rdonly_access; ++ } else { ++ buf_info = "rdwr"; ++ max_access = &env->prog->aux->max_rdwr_access; ++ } + return check_buffer_access(env, reg, regno, reg->off, + access_size, zero_size_allowed, +- "rdwr", +- &env->prog->aux->max_rdwr_access); ++ buf_info, max_access); + case PTR_TO_STACK: + return check_stack_range_initialized( + env, +@@ -4600,9 +4698,9 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, + register_is_null(reg)) + return 0; -@@ -99,7 +104,7 @@ struct unwind_hint { - * the debuginfo as necessary. It will also warn if it sees any - * inconsistencies. - */ --.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 -+.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 - .Lunwind_hint_ip_\@: - .pushsection .discard.unwind_hints - /* struct unwind_hint */ -@@ -129,7 +134,7 @@ struct unwind_hint { - #define STACK_FRAME_NON_STANDARD(func) - #else - #define ANNOTATE_INTRA_FUNCTION_CALL --.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 -+.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 - .endm - .macro STACK_FRAME_NON_STANDARD func:req - .endm -diff --git a/include/linux/of_device.h b/include/linux/of_device.h -index 1d7992a02e36e..1a803e4335d30 100644 ---- a/include/linux/of_device.h -+++ b/include/linux/of_device.h -@@ -101,8 +101,9 @@ static inline struct device_node *of_cpu_device_node_get(int cpu) +- verbose(env, "R%d type=%s expected=%s\n", regno, +- reg_type_str[reg->type], +- reg_type_str[PTR_TO_STACK]); ++ verbose(env, "R%d type=%s ", regno, ++ reg_type_str(env, reg->type)); ++ verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK)); + return -EACCES; + } } +@@ -4613,7 +4711,7 @@ int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + if (register_is_null(reg)) + return 0; - static inline int of_dma_configure_id(struct device *dev, -- struct device_node *np, -- bool force_dma) -+ struct device_node *np, -+ bool force_dma, -+ const u32 *id) +- if (reg_type_may_be_null(reg->type)) { ++ if (type_may_be_null(reg->type)) { + /* Assuming that the register contains a value check if the memory + * access is safe. Temporarily save and restore the register's state as + * the conversion shouldn't be visible to a caller. +@@ -4761,9 +4859,8 @@ static int process_timer_func(struct bpf_verifier_env *env, int regno, + + static bool arg_type_is_mem_ptr(enum bpf_arg_type type) { - return 0; +- return type == ARG_PTR_TO_MEM || +- type == ARG_PTR_TO_MEM_OR_NULL || +- type == ARG_PTR_TO_UNINIT_MEM; ++ return base_type(type) == ARG_PTR_TO_MEM || ++ base_type(type) == ARG_PTR_TO_UNINIT_MEM; } -diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h -index cf6a65b94d40e..6508b97dbf1d2 100644 ---- a/include/linux/of_fdt.h -+++ b/include/linux/of_fdt.h -@@ -62,6 +62,7 @@ extern int early_init_dt_scan_chosen(unsigned long node, const char *uname, - int depth, void *data); - extern int early_init_dt_scan_memory(unsigned long node, const char *uname, - int depth, void *data); -+extern void early_init_dt_check_for_usable_mem_range(void); - extern int early_init_dt_scan_chosen_stdout(void); - extern void early_init_fdt_scan_reserved_mem(void); - extern void early_init_fdt_reserve_self(void); -@@ -87,6 +88,7 @@ extern void unflatten_and_copy_device_tree(void); - extern void early_init_devtree(void *); - extern void early_get_first_memblock_info(void *, phys_addr_t *); - #else /* CONFIG_OF_EARLY_FLATTREE */ -+static inline void early_init_dt_check_for_usable_mem_range(void) {} - static inline int early_init_dt_scan_chosen_stdout(void) { return -ENODEV; } - static inline void early_init_fdt_scan_reserved_mem(void) {} - static inline void early_init_fdt_reserve_self(void) {} -diff --git a/include/linux/of_net.h b/include/linux/of_net.h -index daef3b0d9270d..55460ecfa50ad 100644 ---- a/include/linux/of_net.h -+++ b/include/linux/of_net.h -@@ -8,7 +8,7 @@ - - #include <linux/phy.h> --#ifdef CONFIG_OF_NET -+#if defined(CONFIG_OF) && defined(CONFIG_NET) - #include <linux/of.h> + static bool arg_type_is_mem_size(enum bpf_arg_type type) +@@ -4865,8 +4962,7 @@ static const struct bpf_reg_types mem_types = { + PTR_TO_MAP_KEY, + PTR_TO_MAP_VALUE, + PTR_TO_MEM, +- PTR_TO_RDONLY_BUF, +- PTR_TO_RDWR_BUF, ++ PTR_TO_BUF, + }, + }; - struct net_device; -diff --git a/include/linux/once.h b/include/linux/once.h -index d361fb14ac3a2..1528625087b69 100644 ---- a/include/linux/once.h -+++ b/include/linux/once.h -@@ -5,10 +5,18 @@ - #include <linux/types.h> - #include <linux/jump_label.h> +@@ -4897,31 +4993,26 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { + [ARG_PTR_TO_MAP_KEY] = &map_key_value_types, + [ARG_PTR_TO_MAP_VALUE] = &map_key_value_types, + [ARG_PTR_TO_UNINIT_MAP_VALUE] = &map_key_value_types, +- [ARG_PTR_TO_MAP_VALUE_OR_NULL] = &map_key_value_types, + [ARG_CONST_SIZE] = &scalar_types, + [ARG_CONST_SIZE_OR_ZERO] = &scalar_types, + [ARG_CONST_ALLOC_SIZE_OR_ZERO] = &scalar_types, + [ARG_CONST_MAP_PTR] = &const_map_ptr_types, + [ARG_PTR_TO_CTX] = &context_types, +- [ARG_PTR_TO_CTX_OR_NULL] = &context_types, + [ARG_PTR_TO_SOCK_COMMON] = &sock_types, + #ifdef CONFIG_NET + [ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types, + #endif + [ARG_PTR_TO_SOCKET] = &fullsock_types, +- [ARG_PTR_TO_SOCKET_OR_NULL] = &fullsock_types, + [ARG_PTR_TO_BTF_ID] = &btf_ptr_types, + [ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types, + [ARG_PTR_TO_MEM] = &mem_types, +- [ARG_PTR_TO_MEM_OR_NULL] = &mem_types, + [ARG_PTR_TO_UNINIT_MEM] = &mem_types, + [ARG_PTR_TO_ALLOC_MEM] = &alloc_mem_types, +- [ARG_PTR_TO_ALLOC_MEM_OR_NULL] = &alloc_mem_types, + [ARG_PTR_TO_INT] = &int_ptr_types, + [ARG_PTR_TO_LONG] = &int_ptr_types, + [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types, + [ARG_PTR_TO_FUNC] = &func_ptr_types, +- [ARG_PTR_TO_STACK_OR_NULL] = &stack_ptr_types, ++ [ARG_PTR_TO_STACK] = &stack_ptr_types, + [ARG_PTR_TO_CONST_STR] = &const_str_ptr_types, + [ARG_PTR_TO_TIMER] = &timer_types, + }; +@@ -4935,12 +5026,27 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno, + const struct bpf_reg_types *compatible; + int i, j; -+/* Helpers used from arbitrary contexts. -+ * Hard irqs are blocked, be cautious. -+ */ - bool __do_once_start(bool *done, unsigned long *flags); - void __do_once_done(bool *done, struct static_key_true *once_key, - unsigned long *flags, struct module *mod); +- compatible = compatible_reg_types[arg_type]; ++ compatible = compatible_reg_types[base_type(arg_type)]; + if (!compatible) { + verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type); + return -EFAULT; + } -+/* Variant for process contexts only. */ -+bool __do_once_slow_start(bool *done); -+void __do_once_slow_done(bool *done, struct static_key_true *once_key, -+ struct module *mod); ++ /* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY, ++ * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY ++ * ++ * Same for MAYBE_NULL: ++ * ++ * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL, ++ * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL ++ * ++ * Therefore we fold these flags depending on the arg_type before comparison. ++ */ ++ if (arg_type & MEM_RDONLY) ++ type &= ~MEM_RDONLY; ++ if (arg_type & PTR_MAYBE_NULL) ++ type &= ~PTR_MAYBE_NULL; + - /* Call a function exactly once. The idea of DO_ONCE() is to perform - * a function call such as initialization of random seeds, etc, only - * once, where DO_ONCE() can live in the fast-path. After @func has -@@ -52,9 +60,29 @@ void __do_once_done(bool *done, struct static_key_true *once_key, - ___ret; \ - }) + for (i = 0; i < ARRAY_SIZE(compatible->types); i++) { + expected = compatible->types[i]; + if (expected == NOT_INIT) +@@ -4950,14 +5056,14 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno, + goto found; + } -+/* Variant of DO_ONCE() for process/sleepable contexts. */ -+#define DO_ONCE_SLOW(func, ...) \ -+ ({ \ -+ bool ___ret = false; \ -+ static bool __section(".data.once") ___done = false; \ -+ static DEFINE_STATIC_KEY_TRUE(___once_key); \ -+ if (static_branch_unlikely(&___once_key)) { \ -+ ___ret = __do_once_slow_start(&___done); \ -+ if (unlikely(___ret)) { \ -+ func(__VA_ARGS__); \ -+ __do_once_slow_done(&___done, &___once_key, \ -+ THIS_MODULE); \ -+ } \ -+ } \ -+ ___ret; \ -+ }) -+ - #define get_random_once(buf, nbytes) \ - DO_ONCE(get_random_bytes, (buf), (nbytes)) - #define get_random_once_wait(buf, nbytes) \ - DO_ONCE(get_random_bytes_wait, (buf), (nbytes)) \ +- verbose(env, "R%d type=%s expected=", regno, reg_type_str[type]); ++ verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, reg->type)); + for (j = 0; j + 1 < i; j++) +- verbose(env, "%s, ", reg_type_str[compatible->types[j]]); +- verbose(env, "%s\n", reg_type_str[compatible->types[j]]); ++ verbose(env, "%s, ", reg_type_str(env, compatible->types[j])); ++ verbose(env, "%s\n", reg_type_str(env, compatible->types[j])); + return -EACCES; -+#define get_random_slow_once(buf, nbytes) \ -+ DO_ONCE_SLOW(get_random_bytes, (buf), (nbytes)) -+ - #endif /* _LINUX_ONCE_H */ -diff --git a/include/linux/once_lite.h b/include/linux/once_lite.h -index 861e606b820fa..b7bce4983638f 100644 ---- a/include/linux/once_lite.h -+++ b/include/linux/once_lite.h -@@ -9,15 +9,27 @@ - */ - #define DO_ONCE_LITE(func, ...) \ - DO_ONCE_LITE_IF(true, func, ##__VA_ARGS__) --#define DO_ONCE_LITE_IF(condition, func, ...) \ -+ -+#define __ONCE_LITE_IF(condition) \ - ({ \ - static bool __section(".data.once") __already_done; \ -- bool __ret_do_once = !!(condition); \ -+ bool __ret_cond = !!(condition); \ -+ bool __ret_once = false; \ - \ -- if (unlikely(__ret_do_once && !__already_done)) { \ -+ if (unlikely(__ret_cond && !__already_done)) { \ - __already_done = true; \ -- func(__VA_ARGS__); \ -+ __ret_once = true; \ - } \ -+ unlikely(__ret_once); \ -+ }) -+ -+#define DO_ONCE_LITE_IF(condition, func, ...) \ -+ ({ \ -+ bool __ret_do_once = !!(condition); \ -+ \ -+ if (__ONCE_LITE_IF(__ret_do_once)) \ -+ func(__VA_ARGS__); \ -+ \ - unlikely(__ret_do_once); \ - }) + found: +- if (type == PTR_TO_BTF_ID) { ++ if (reg->type == PTR_TO_BTF_ID) { + if (!arg_btf_id) { + if (!compatible->btf_id) { + verbose(env, "verifier internal error: missing arg compatible BTF ID\n"); +@@ -5016,15 +5122,14 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, + return -EACCES; + } -diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h -index 62db6b0176b95..2f7dd14083d94 100644 ---- a/include/linux/pagemap.h -+++ b/include/linux/pagemap.h -@@ -733,61 +733,11 @@ int wait_on_page_private_2_killable(struct page *page); - extern void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter); +- if (arg_type == ARG_PTR_TO_MAP_VALUE || +- arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE || +- arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) { ++ if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE || ++ base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) { + err = resolve_map_arg_type(env, meta, &arg_type); + if (err) + return err; + } - /* -- * Fault everything in given userspace address range in. -+ * Fault in userspace address range. +- if (register_is_null(reg) && arg_type_may_be_null(arg_type)) ++ if (register_is_null(reg) && type_may_be_null(arg_type)) + /* A NULL register has a SCALAR_VALUE type, so skip + * type checking. + */ +@@ -5093,10 +5198,11 @@ skip_type_check: + err = check_helper_mem_access(env, regno, + meta->map_ptr->key_size, false, + NULL); +- } else if (arg_type == ARG_PTR_TO_MAP_VALUE || +- (arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL && +- !register_is_null(reg)) || +- arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) { ++ } else if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE || ++ base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) { ++ if (type_may_be_null(arg_type) && register_is_null(reg)) ++ return 0; ++ + /* bpf_map_xxx(..., map_ptr, ..., value) call: + * check [value, value + map->value_size) validity + */ +@@ -5590,31 +5696,15 @@ static int check_func_proto(const struct bpf_func_proto *fn, int func_id) + /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END] + * are now invalid, so turn them into unknown SCALAR_VALUE. */ --static inline int fault_in_pages_writeable(char __user *uaddr, size_t size) --{ -- char __user *end = uaddr + size - 1; -- -- if (unlikely(size == 0)) -- return 0; -- -- if (unlikely(uaddr > end)) -- return -EFAULT; -- /* -- * Writing zeroes into userspace here is OK, because we know that if -- * the zero gets there, we'll be overwriting it. -- */ -- do { -- if (unlikely(__put_user(0, uaddr) != 0)) -- return -EFAULT; -- uaddr += PAGE_SIZE; -- } while (uaddr <= end); -- -- /* Check whether the range spilled into the next page. */ -- if (((unsigned long)uaddr & PAGE_MASK) == -- ((unsigned long)end & PAGE_MASK)) -- return __put_user(0, end); +-static void __clear_all_pkt_pointers(struct bpf_verifier_env *env, +- struct bpf_func_state *state) ++static void clear_all_pkt_pointers(struct bpf_verifier_env *env) + { +- struct bpf_reg_state *regs = state->regs, *reg; +- int i; - -- return 0; +- for (i = 0; i < MAX_BPF_REG; i++) +- if (reg_is_pkt_pointer_any(®s[i])) +- mark_reg_unknown(env, regs, i); ++ struct bpf_func_state *state; ++ struct bpf_reg_state *reg; + +- bpf_for_each_spilled_reg(i, state, reg) { +- if (!reg) +- continue; ++ bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ + if (reg_is_pkt_pointer_any(reg)) + __mark_reg_unknown(env, reg); +- } -} - --static inline int fault_in_pages_readable(const char __user *uaddr, size_t size) +-static void clear_all_pkt_pointers(struct bpf_verifier_env *env) -{ -- volatile char c; -- const char __user *end = uaddr + size - 1; -- -- if (unlikely(size == 0)) -- return 0; +- struct bpf_verifier_state *vstate = env->cur_state; +- int i; - -- if (unlikely(uaddr > end)) -- return -EFAULT; +- for (i = 0; i <= vstate->curframe; i++) +- __clear_all_pkt_pointers(env, vstate->frame[i]); ++ })); + } + + enum { +@@ -5643,41 +5733,28 @@ static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range + reg->range = AT_PKT_END; + } + +-static void release_reg_references(struct bpf_verifier_env *env, +- struct bpf_func_state *state, +- int ref_obj_id) +-{ +- struct bpf_reg_state *regs = state->regs, *reg; +- int i; - -- do { -- if (unlikely(__get_user(c, uaddr) != 0)) -- return -EFAULT; -- uaddr += PAGE_SIZE; -- } while (uaddr <= end); +- for (i = 0; i < MAX_BPF_REG; i++) +- if (regs[i].ref_obj_id == ref_obj_id) +- mark_reg_unknown(env, regs, i); - -- /* Check whether the range spilled into the next page. */ -- if (((unsigned long)uaddr & PAGE_MASK) == -- ((unsigned long)end & PAGE_MASK)) { -- return __get_user(c, end); +- bpf_for_each_spilled_reg(i, state, reg) { +- if (!reg) +- continue; +- if (reg->ref_obj_id == ref_obj_id) +- __mark_reg_unknown(env, reg); - } -- -- (void)c; -- return 0; -} -+size_t fault_in_writeable(char __user *uaddr, size_t size); -+size_t fault_in_safe_writeable(const char __user *uaddr, size_t size); -+size_t fault_in_readable(const char __user *uaddr, size_t size); +- + /* The pointer with the specified id has released its reference to kernel + * resources. Identify all copies of the same pointer and clear the reference. + */ + static int release_reference(struct bpf_verifier_env *env, + int ref_obj_id) + { +- struct bpf_verifier_state *vstate = env->cur_state; ++ struct bpf_func_state *state; ++ struct bpf_reg_state *reg; + int err; +- int i; - int add_to_page_cache_locked(struct page *page, struct address_space *mapping, - pgoff_t index, gfp_t gfp_mask); -diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h -index d2558121d48c0..6f7949b2fd8dc 100644 ---- a/include/linux/part_stat.h -+++ b/include/linux/part_stat.h -@@ -3,6 +3,7 @@ - #define _LINUX_PART_STAT_H + err = release_reference_state(cur_func(env), ref_obj_id); + if (err) + return err; - #include <linux/genhd.h> -+#include <asm/local.h> +- for (i = 0; i <= vstate->curframe; i++) +- release_reg_references(env, vstate->frame[i], ref_obj_id); ++ bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ ++ if (reg->ref_obj_id == ref_obj_id) { ++ if (!env->allow_ptr_leaks) ++ __mark_reg_not_init(env, reg); ++ else ++ __mark_reg_unknown(env, reg); ++ } ++ })); - struct disk_stats { - u64 nsecs[NR_STAT_GROUPS]; -diff --git a/include/linux/pci.h b/include/linux/pci.h -index cd8aa6fce2041..9d6e75222868f 100644 ---- a/include/linux/pci.h -+++ b/include/linux/pci.h -@@ -233,6 +233,8 @@ enum pci_dev_flags { - PCI_DEV_FLAGS_NO_FLR_RESET = (__force pci_dev_flags_t) (1 << 10), - /* Don't use Relaxed Ordering for TLPs directed at this device */ - PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 11), -+ /* Device does honor MSI masking despite saying otherwise */ -+ PCI_DEV_FLAGS_HAS_MSI_MASKING = (__force pci_dev_flags_t) (1 << 12), - }; + return 0; + } +@@ -5750,6 +5827,7 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn + } - enum pci_irq_reroute_variant { -@@ -654,6 +656,7 @@ struct pci_bus { - struct bin_attribute *legacy_io; /* Legacy I/O for this bus */ - struct bin_attribute *legacy_mem; /* Legacy mem */ - unsigned int is_added:1; -+ unsigned int unsafe_warn:1; /* warned about RW1C config write */ - }; + if (insn->code == (BPF_JMP | BPF_CALL) && ++ insn->src_reg == 0 && + insn->imm == BPF_FUNC_timer_set_callback) { + struct bpf_verifier_state *async_cb; - #define to_pci_bus(n) container_of(n, struct pci_bus, dev) -diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h -index 011f2f1ea5bb5..04f44a4694a2e 100644 ---- a/include/linux/pci_ids.h -+++ b/include/linux/pci_ids.h -@@ -60,6 +60,8 @@ - #define PCI_CLASS_BRIDGE_EISA 0x0602 - #define PCI_CLASS_BRIDGE_MC 0x0603 - #define PCI_CLASS_BRIDGE_PCI 0x0604 -+#define PCI_CLASS_BRIDGE_PCI_NORMAL 0x060400 -+#define PCI_CLASS_BRIDGE_PCI_SUBTRACTIVE 0x060401 - #define PCI_CLASS_BRIDGE_PCMCIA 0x0605 - #define PCI_CLASS_BRIDGE_NUBUS 0x0606 - #define PCI_CLASS_BRIDGE_CARDBUS 0x0607 -diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h -index 9b60bb89d86ab..6cce33e7e7acc 100644 ---- a/include/linux/perf_event.h -+++ b/include/linux/perf_event.h -@@ -680,18 +680,6 @@ struct perf_event { - u64 total_time_running; - u64 tstamp; +@@ -5792,11 +5870,11 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn + /* Transfer references to the callee */ + err = copy_reference_state(callee, caller); + if (err) +- return err; ++ goto err_out; -- /* -- * timestamp shadows the actual context timing but it can -- * be safely used in NMI interrupt context. It reflects the -- * context time as it was when the event was last scheduled in, -- * or when ctx_sched_in failed to schedule the event because we -- * run out of PMC. -- * -- * ctx_time already accounts for ctx->timestamp. Therefore to -- * compute ctx_time for a sample, simply add perf_clock(). -- */ -- u64 shadow_ctx_time; -- - struct perf_event_attr attr; - u16 header_size; - u16 id_header_size; -@@ -838,6 +826,7 @@ struct perf_event_context { - */ - u64 time; - u64 timestamp; -+ u64 timeoffset; + err = set_callee_state_cb(env, caller, callee, *insn_idx); + if (err) +- return err; ++ goto err_out; - /* - * These fields let us detect when two contexts have both -@@ -920,6 +909,8 @@ struct bpf_perf_event_data_kern { - struct perf_cgroup_info { - u64 time; - u64 timestamp; -+ u64 timeoffset; -+ int active; - }; + clear_caller_saved_regs(env, caller->regs); - struct perf_cgroup { -@@ -1239,7 +1230,18 @@ extern void perf_event_bpf_event(struct bpf_prog *prog, - enum perf_bpf_event_type type, - u16 flags); +@@ -5813,6 +5891,11 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn + print_verifier_state(env, callee); + } + return 0; ++ ++err_out: ++ free_func_state(callee); ++ state->frame[state->curframe + 1] = NULL; ++ return err; + } --extern struct perf_guest_info_callbacks *perf_guest_cbs; -+extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs; -+static inline struct perf_guest_info_callbacks *perf_get_guest_cbs(void) -+{ -+ /* -+ * Callbacks are RCU-protected and must be READ_ONCE to avoid reloading -+ * the callbacks between a !NULL check and dereferences, to ensure -+ * pending stores/changes to the callback pointers are visible before a -+ * non-NULL perf_guest_cbs is visible to readers, and to prevent a -+ * module from unloading callbacks while readers are active. + int map_set_for_each_callback_args(struct bpf_verifier_env *env, +@@ -5950,8 +6033,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) + return -EINVAL; + } + +- state->curframe--; +- caller = state->frame[state->curframe]; ++ caller = state->frame[state->curframe - 1]; + if (callee->in_callback_fn) { + /* enforce R0 return value range [0, 1]. */ + struct tnum range = tnum_range(0, 1); +@@ -5969,10 +6051,17 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) + caller->regs[BPF_REG_0] = *r0; + } + +- /* Transfer references to the caller */ +- err = copy_reference_state(caller, callee); +- if (err) +- return err; ++ /* callback_fn frame should have released its own additions to parent's ++ * reference state at this point, or check_reference_leak would ++ * complain, hence it must be the same as the caller. There is no need ++ * to copy it back. + */ -+ return rcu_dereference(perf_guest_cbs); -+} - extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); - extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); ++ if (!callee->in_callback_fn) { ++ /* Transfer references to the caller */ ++ err = copy_reference_state(caller, callee); ++ if (err) ++ return err; ++ } -diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h -index e24d2c992b112..d468efcf48f45 100644 ---- a/include/linux/pgtable.h -+++ b/include/linux/pgtable.h -@@ -62,6 +62,7 @@ static inline unsigned long pte_index(unsigned long address) - { - return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); + *insn_idx = callee->callsite + 1; + if (env->log.level & BPF_LOG_LEVEL) { +@@ -5983,7 +6072,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) + } + /* clear everything in the callee */ + free_func_state(callee); +- state->frame[state->curframe + 1] = NULL; ++ state->frame[state->curframe--] = NULL; + return 0; } -+#define pte_index pte_index - #ifndef pmd_index - static inline unsigned long pmd_index(unsigned long address) -diff --git a/include/linux/phy.h b/include/linux/phy.h -index 736e1d1a47c40..946ccec178588 100644 ---- a/include/linux/phy.h -+++ b/include/linux/phy.h -@@ -536,6 +536,10 @@ struct macsec_ops; - * @mdix: Current crossover - * @mdix_ctrl: User setting of crossover - * @interrupts: Flag interrupts have been enabled -+ * @irq_suspended: Flag indicating PHY is suspended and therefore interrupt -+ * handling shall be postponed until PHY has resumed -+ * @irq_rerun: Flag indicating interrupts occurred while PHY was suspended, -+ * requiring a rerun of the interrupt handler after resume - * @interface: enum phy_interface_t value - * @skb: Netlink message for cable diagnostics - * @nest: Netlink nest used for cable diagnostics -@@ -590,6 +594,8 @@ struct phy_device { +@@ -6005,9 +6094,7 @@ static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type, + ret_reg->s32_max_value = meta->msize_max_value; + ret_reg->smin_value = -MAX_ERRNO; + ret_reg->s32_min_value = -MAX_ERRNO; +- __reg_deduce_bounds(ret_reg); +- __reg_bound_offset(ret_reg); +- __update_reg_bounds(ret_reg); ++ reg_bounds_sync(ret_reg); + } - /* Interrupts are enabled */ - unsigned interrupts:1; -+ unsigned irq_suspended:1; -+ unsigned irq_rerun:1; + static int +@@ -6062,8 +6149,7 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, + struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx]; + struct bpf_reg_state *regs = cur_regs(env), *reg; + struct bpf_map *map = meta->map_ptr; +- struct tnum range; +- u64 val; ++ u64 val, max; + int err; - enum phy_state state; + if (func_id != BPF_FUNC_tail_call) +@@ -6073,10 +6159,11 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, + return -EINVAL; + } -diff --git a/include/linux/phylink.h b/include/linux/phylink.h -index 237291196ce28..b306159c1fada 100644 ---- a/include/linux/phylink.h -+++ b/include/linux/phylink.h -@@ -64,6 +64,7 @@ enum phylink_op_type { - * @pcs_poll: MAC PCS cannot provide link change interrupt - * @poll_fixed_state: if true, starts link_poll, - * if MAC link is at %MLO_AN_FIXED mode. -+ * @mac_managed_pm: if true, indicate the MAC driver is responsible for PHY PM. - * @ovr_an_inband: if true, override PCS to MLO_AN_INBAND - * @get_fixed_state: callback to execute to determine the fixed link state, - * if MAC link is at %MLO_AN_FIXED mode. -@@ -73,6 +74,7 @@ struct phylink_config { - enum phylink_op_type type; - bool pcs_poll; - bool poll_fixed_state; -+ bool mac_managed_pm; - bool ovr_an_inband; - void (*get_fixed_state)(struct phylink_config *config, - struct phylink_link_state *state); -diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h -index fc5642431b923..18dcca51829e2 100644 ---- a/include/linux/pipe_fs_i.h -+++ b/include/linux/pipe_fs_i.h -@@ -71,7 +71,7 @@ struct pipe_inode_info { - unsigned int files; - unsigned int r_counter; - unsigned int w_counter; -- unsigned int poll_usage; -+ bool poll_usage; - struct page *tmp_page; - struct fasync_struct *fasync_readers; - struct fasync_struct *fasync_writers; -@@ -229,6 +229,15 @@ static inline bool pipe_buf_try_steal(struct pipe_inode_info *pipe, - return buf->ops->try_steal(pipe, buf); - } +- range = tnum_range(0, map->max_entries - 1); + reg = ®s[BPF_REG_3]; ++ val = reg->var_off.value; ++ max = map->max_entries; -+static inline void pipe_discard_from(struct pipe_inode_info *pipe, -+ unsigned int old_head) -+{ -+ unsigned int mask = pipe->ring_size - 1; -+ -+ while (pipe->head > old_head) -+ pipe_buf_release(pipe, &pipe->bufs[--pipe->head & mask]); -+} +- if (!register_is_const(reg) || !tnum_in(range, reg->var_off)) { ++ if (!(register_is_const(reg) && val < max)) { + bpf_map_key_store(aux, BPF_MAP_KEY_POISON); + return 0; + } +@@ -6084,8 +6171,6 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, + err = mark_chain_precision(env, BPF_REG_3); + if (err) + return err; +- +- val = reg->var_off.value; + if (bpf_map_key_unseen(aux)) + bpf_map_key_store(aux, val); + else if (!bpf_map_key_poisoned(aux) && +@@ -6097,13 +6182,20 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, + static int check_reference_leak(struct bpf_verifier_env *env) + { + struct bpf_func_state *state = cur_func(env); ++ bool refs_lingering = false; + int i; + ++ if (state->frameno && !state->in_callback_fn) ++ return 0; + - /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual - memory allocation, whereas PIPE_BUF makes atomicity guarantees. */ - #define PIPE_SIZE PAGE_SIZE -diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h -index 02599687770c5..7f03e02c48cd4 100644 ---- a/include/linux/platform_data/cros_ec_proto.h -+++ b/include/linux/platform_data/cros_ec_proto.h -@@ -216,6 +216,9 @@ int cros_ec_prepare_tx(struct cros_ec_device *ec_dev, - int cros_ec_check_result(struct cros_ec_device *ec_dev, - struct cros_ec_command *msg); + for (i = 0; i < state->acquired_refs; i++) { ++ if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno) ++ continue; + verbose(env, "Unreleased reference id=%d alloc_insn=%d\n", + state->refs[i].id, state->refs[i].insn_idx); ++ refs_lingering = true; + } +- return state->acquired_refs ? -EINVAL : 0; ++ return refs_lingering ? -EINVAL : 0; + } + + static int check_bpf_snprintf_call(struct bpf_verifier_env *env, +@@ -6170,6 +6262,8 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn + int *insn_idx_p) + { + const struct bpf_func_proto *fn = NULL; ++ enum bpf_return_type ret_type; ++ enum bpf_type_flag ret_flag; + struct bpf_reg_state *regs; + struct bpf_call_arg_meta meta; + int insn_idx = *insn_idx_p; +@@ -6303,13 +6397,14 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn + regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG; + + /* update return register (already marked as written above) */ +- if (fn->ret_type == RET_INTEGER) { ++ ret_type = fn->ret_type; ++ ret_flag = type_flag(fn->ret_type); ++ if (ret_type == RET_INTEGER) { + /* sets type to SCALAR_VALUE */ + mark_reg_unknown(env, regs, BPF_REG_0); +- } else if (fn->ret_type == RET_VOID) { ++ } else if (ret_type == RET_VOID) { + regs[BPF_REG_0].type = NOT_INIT; +- } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL || +- fn->ret_type == RET_PTR_TO_MAP_VALUE) { ++ } else if (base_type(ret_type) == RET_PTR_TO_MAP_VALUE) { + /* There is no offset yet applied, variable or fixed */ + mark_reg_known_zero(env, regs, BPF_REG_0); + /* remember map_ptr, so that check_map_access() +@@ -6323,28 +6418,25 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn + } + regs[BPF_REG_0].map_ptr = meta.map_ptr; + regs[BPF_REG_0].map_uid = meta.map_uid; +- if (fn->ret_type == RET_PTR_TO_MAP_VALUE) { +- regs[BPF_REG_0].type = PTR_TO_MAP_VALUE; +- if (map_value_has_spin_lock(meta.map_ptr)) +- regs[BPF_REG_0].id = ++env->id_gen; +- } else { +- regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL; ++ regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag; ++ if (!type_may_be_null(ret_type) && ++ map_value_has_spin_lock(meta.map_ptr)) { ++ regs[BPF_REG_0].id = ++env->id_gen; + } +- } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) { ++ } else if (base_type(ret_type) == RET_PTR_TO_SOCKET) { + mark_reg_known_zero(env, regs, BPF_REG_0); +- regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL; +- } else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) { ++ regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag; ++ } else if (base_type(ret_type) == RET_PTR_TO_SOCK_COMMON) { + mark_reg_known_zero(env, regs, BPF_REG_0); +- regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL; +- } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) { ++ regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag; ++ } else if (base_type(ret_type) == RET_PTR_TO_TCP_SOCK) { + mark_reg_known_zero(env, regs, BPF_REG_0); +- regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL; +- } else if (fn->ret_type == RET_PTR_TO_ALLOC_MEM_OR_NULL) { ++ regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag; ++ } else if (base_type(ret_type) == RET_PTR_TO_ALLOC_MEM) { + mark_reg_known_zero(env, regs, BPF_REG_0); +- regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL; ++ regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag; + regs[BPF_REG_0].mem_size = meta.mem_size; +- } else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL || +- fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID) { ++ } else if (base_type(ret_type) == RET_PTR_TO_MEM_OR_BTF_ID) { + const struct btf_type *t; -+int cros_ec_cmd_xfer(struct cros_ec_device *ec_dev, -+ struct cros_ec_command *msg); + mark_reg_known_zero(env, regs, BPF_REG_0); +@@ -6362,29 +6454,30 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn + tname, PTR_ERR(ret)); + return -EINVAL; + } +- regs[BPF_REG_0].type = +- fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ? +- PTR_TO_MEM : PTR_TO_MEM_OR_NULL; ++ regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag; + regs[BPF_REG_0].mem_size = tsize; + } else { +- regs[BPF_REG_0].type = +- fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ? +- PTR_TO_BTF_ID : PTR_TO_BTF_ID_OR_NULL; ++ /* MEM_RDONLY may be carried from ret_flag, but it ++ * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise ++ * it will confuse the check of PTR_TO_BTF_ID in ++ * check_mem_access(). ++ */ ++ ret_flag &= ~MEM_RDONLY; + - int cros_ec_cmd_xfer_status(struct cros_ec_device *ec_dev, - struct cros_ec_command *msg); - -diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h -index 9837fb011f2fb..989aa30c598dc 100644 ---- a/include/linux/platform_data/ti-sysc.h -+++ b/include/linux/platform_data/ti-sysc.h -@@ -50,6 +50,7 @@ struct sysc_regbits { - s8 emufree_shift; - }; ++ regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag; + regs[BPF_REG_0].btf = meta.ret_btf; + regs[BPF_REG_0].btf_id = meta.ret_btf_id; + } +- } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL || +- fn->ret_type == RET_PTR_TO_BTF_ID) { ++ } else if (base_type(ret_type) == RET_PTR_TO_BTF_ID) { + int ret_btf_id; -+#define SYSC_QUIRK_REINIT_ON_CTX_LOST BIT(28) - #define SYSC_QUIRK_REINIT_ON_RESUME BIT(27) - #define SYSC_QUIRK_GPMC_DEBUG BIT(26) - #define SYSC_MODULE_QUIRK_ENA_RESETDONE BIT(25) -diff --git a/include/linux/platform_data/x86/pmc_atom.h b/include/linux/platform_data/x86/pmc_atom.h -index 022bcea9edec5..99a9b09dc839d 100644 ---- a/include/linux/platform_data/x86/pmc_atom.h -+++ b/include/linux/platform_data/x86/pmc_atom.h -@@ -7,6 +7,8 @@ - #ifndef PMC_ATOM_H - #define PMC_ATOM_H + mark_reg_known_zero(env, regs, BPF_REG_0); +- regs[BPF_REG_0].type = fn->ret_type == RET_PTR_TO_BTF_ID ? +- PTR_TO_BTF_ID : +- PTR_TO_BTF_ID_OR_NULL; ++ regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag; + ret_btf_id = *fn->ret_btf_id; + if (ret_btf_id == 0) { +- verbose(env, "invalid return type %d of func %s#%d\n", +- fn->ret_type, func_id_name(func_id), func_id); ++ verbose(env, "invalid return type %u of func %s#%d\n", ++ base_type(ret_type), func_id_name(func_id), ++ func_id); + return -EINVAL; + } + /* current BPF helper definitions are only coming from +@@ -6393,12 +6486,12 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn + regs[BPF_REG_0].btf = btf_vmlinux; + regs[BPF_REG_0].btf_id = ret_btf_id; + } else { +- verbose(env, "unknown return type %d of func %s#%d\n", +- fn->ret_type, func_id_name(func_id), func_id); ++ verbose(env, "unknown return type %u of func %s#%d\n", ++ base_type(ret_type), func_id_name(func_id), func_id); + return -EINVAL; + } -+#include <linux/bits.h> -+ - /* ValleyView Power Control Unit PCI Device ID */ - #define PCI_DEVICE_ID_VLV_PMC 0x0F1C - /* CherryTrail Power Control Unit PCI Device ID */ -@@ -139,9 +141,9 @@ - #define ACPI_MMIO_REG_LEN 0x100 +- if (reg_type_may_be_null(regs[BPF_REG_0].type)) ++ if (type_may_be_null(regs[BPF_REG_0].type)) + regs[BPF_REG_0].id = ++env->id_gen; - #define PM1_CNT 0x4 --#define SLEEP_TYPE_MASK 0xFFFFECFF -+#define SLEEP_TYPE_MASK GENMASK(12, 10) - #define SLEEP_TYPE_S5 0x1C00 --#define SLEEP_ENABLE 0x2000 -+#define SLEEP_ENABLE BIT(13) + if (is_ptr_cast_function(func_id)) { +@@ -6597,25 +6690,25 @@ static bool check_reg_sane_offset(struct bpf_verifier_env *env, - extern int pmc_atom_read(int offset, u32 *value); - extern int pmc_atom_write(int offset, u32 value); -diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h -index 222da43b7096d..90eaff8b78fc9 100644 ---- a/include/linux/pm_runtime.h -+++ b/include/linux/pm_runtime.h -@@ -58,6 +58,7 @@ extern void pm_runtime_get_suppliers(struct device *dev); - extern void pm_runtime_put_suppliers(struct device *dev); - extern void pm_runtime_new_link(struct device *dev); - extern void pm_runtime_drop_link(struct device_link *link); -+extern void pm_runtime_release_supplier(struct device_link *link); + if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) { + verbose(env, "math between %s pointer and %lld is not allowed\n", +- reg_type_str[type], val); ++ reg_type_str(env, type), val); + return false; + } - extern int devm_pm_runtime_enable(struct device *dev); + if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) { + verbose(env, "%s pointer offset %d is not allowed\n", +- reg_type_str[type], reg->off); ++ reg_type_str(env, type), reg->off); + return false; + } -@@ -129,7 +130,7 @@ static inline bool pm_runtime_suspended(struct device *dev) - * pm_runtime_active - Check whether or not a device is runtime-active. - * @dev: Target device. - * -- * Return %true if runtime PM is enabled for @dev and its runtime PM status is -+ * Return %true if runtime PM is disabled for @dev or its runtime PM status is - * %RPM_ACTIVE, or %false otherwise. - * - * Note that the return value of this function can only be trusted if it is -@@ -283,6 +284,7 @@ static inline void pm_runtime_get_suppliers(struct device *dev) {} - static inline void pm_runtime_put_suppliers(struct device *dev) {} - static inline void pm_runtime_new_link(struct device *dev) {} - static inline void pm_runtime_drop_link(struct device_link *link) {} -+static inline void pm_runtime_release_supplier(struct device_link *link) {} + if (smin == S64_MIN) { + verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n", +- reg_type_str[type]); ++ reg_type_str(env, type)); + return false; + } - #endif /* !CONFIG_PM */ + if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) { + verbose(env, "value %lld makes %s pointer be out of bounds\n", +- smin, reg_type_str[type]); ++ smin, reg_type_str(env, type)); + return false; + } -diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h -index 00fef0064355f..5bbcd280bfd26 100644 ---- a/include/linux/posix-timers.h -+++ b/include/linux/posix-timers.h -@@ -184,8 +184,10 @@ static inline void posix_cputimers_group_init(struct posix_cputimers *pct, - #endif +@@ -6818,7 +6911,7 @@ do_sim: + */ + if (!ptr_is_dst_reg) { + tmp = *dst_reg; +- *dst_reg = *ptr_reg; ++ copy_register_state(dst_reg, ptr_reg); + } + ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1, + env->insn_idx); +@@ -6992,11 +7085,13 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, + return -EACCES; + } - #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK -+void clear_posix_cputimers_work(struct task_struct *p); - void posix_cputimers_init_work(void); - #else -+static inline void clear_posix_cputimers_work(struct task_struct *p) { } - static inline void posix_cputimers_init_work(void) { } - #endif +- switch (ptr_reg->type) { +- case PTR_TO_MAP_VALUE_OR_NULL: ++ if (ptr_reg->type & PTR_MAYBE_NULL) { + verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n", +- dst, reg_type_str[ptr_reg->type]); ++ dst, reg_type_str(env, ptr_reg->type)); + return -EACCES; ++ } ++ ++ switch (base_type(ptr_reg->type)) { + case CONST_PTR_TO_MAP: + /* smin_val represents the known value */ + if (known && smin_val == 0 && opcode == BPF_ADD) +@@ -7004,16 +7099,16 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, + fallthrough; + case PTR_TO_PACKET_END: + case PTR_TO_SOCKET: +- case PTR_TO_SOCKET_OR_NULL: + case PTR_TO_SOCK_COMMON: +- case PTR_TO_SOCK_COMMON_OR_NULL: + case PTR_TO_TCP_SOCK: +- case PTR_TO_TCP_SOCK_OR_NULL: + case PTR_TO_XDP_SOCK: ++reject: + verbose(env, "R%d pointer arithmetic on %s prohibited\n", +- dst, reg_type_str[ptr_reg->type]); ++ dst, reg_type_str(env, ptr_reg->type)); + return -EACCES; + default: ++ if (type_may_be_null(ptr_reg->type)) ++ goto reject; + break; + } -diff --git a/include/linux/posix_acl_xattr.h b/include/linux/posix_acl_xattr.h -index 060e8d2031814..1766e1de69560 100644 ---- a/include/linux/posix_acl_xattr.h -+++ b/include/linux/posix_acl_xattr.h -@@ -34,15 +34,19 @@ posix_acl_xattr_count(size_t size) +@@ -7164,11 +7259,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, - #ifdef CONFIG_FS_POSIX_ACL - void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns, -+ struct inode *inode, - void *value, size_t size); - void posix_acl_fix_xattr_to_user(struct user_namespace *mnt_userns, -+ struct inode *inode, - void *value, size_t size); - #else - static inline void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns, -+ struct inode *inode, - void *value, size_t size) - { - } - static inline void posix_acl_fix_xattr_to_user(struct user_namespace *mnt_userns, -+ struct inode *inode, - void *value, size_t size) - { + if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type)) + return -EINVAL; +- +- __update_reg_bounds(dst_reg); +- __reg_deduce_bounds(dst_reg); +- __reg_bound_offset(dst_reg); +- ++ reg_bounds_sync(dst_reg); + if (sanitize_check_bounds(env, insn, dst_reg) < 0) + return -EACCES; + if (sanitize_needed(opcode)) { +@@ -7906,10 +7997,7 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, + /* ALU32 ops are zero extended into 64bit register */ + if (alu32) + zext_32_to_64(dst_reg); +- +- __update_reg_bounds(dst_reg); +- __reg_deduce_bounds(dst_reg); +- __reg_bound_offset(dst_reg); ++ reg_bounds_sync(dst_reg); + return 0; } -diff --git a/include/linux/prandom.h b/include/linux/prandom.h -index 056d31317e499..a4aadd2dc153e 100644 ---- a/include/linux/prandom.h -+++ b/include/linux/prandom.h -@@ -10,6 +10,7 @@ - - #include <linux/types.h> - #include <linux/percpu.h> -+#include <linux/siphash.h> - - u32 prandom_u32(void); - void prandom_bytes(void *buf, size_t nbytes); -@@ -27,15 +28,10 @@ DECLARE_PER_CPU(unsigned long, net_rand_noise); - * The core SipHash round function. Each line can be executed in - * parallel given enough CPU resources. - */ --#define PRND_SIPROUND(v0, v1, v2, v3) ( \ -- v0 += v1, v1 = rol64(v1, 13), v2 += v3, v3 = rol64(v3, 16), \ -- v1 ^= v0, v0 = rol64(v0, 32), v3 ^= v2, \ -- v0 += v3, v3 = rol64(v3, 21), v2 += v1, v1 = rol64(v1, 17), \ -- v3 ^= v0, v1 ^= v2, v2 = rol64(v2, 32) \ --) -+#define PRND_SIPROUND(v0, v1, v2, v3) SIPHASH_PERMUTATION(v0, v1, v2, v3) - --#define PRND_K0 (0x736f6d6570736575 ^ 0x6c7967656e657261) --#define PRND_K1 (0x646f72616e646f6d ^ 0x7465646279746573) -+#define PRND_K0 (SIPHASH_CONST_0 ^ SIPHASH_CONST_2) -+#define PRND_K1 (SIPHASH_CONST_1 ^ SIPHASH_CONST_3) - - #elif BITS_PER_LONG == 32 - /* -@@ -43,14 +39,9 @@ DECLARE_PER_CPU(unsigned long, net_rand_noise); - * This is weaker, but 32-bit machines are not used for high-traffic - * applications, so there is less output for an attacker to analyze. - */ --#define PRND_SIPROUND(v0, v1, v2, v3) ( \ -- v0 += v1, v1 = rol32(v1, 5), v2 += v3, v3 = rol32(v3, 8), \ -- v1 ^= v0, v0 = rol32(v0, 16), v3 ^= v2, \ -- v0 += v3, v3 = rol32(v3, 7), v2 += v1, v1 = rol32(v1, 13), \ -- v3 ^= v0, v1 ^= v2, v2 = rol32(v2, 16) \ --) --#define PRND_K0 0x6c796765 --#define PRND_K1 0x74656462 -+#define PRND_SIPROUND(v0, v1, v2, v3) HSIPHASH_PERMUTATION(v0, v1, v2, v3) -+#define PRND_K0 (HSIPHASH_CONST_0 ^ HSIPHASH_CONST_2) -+#define PRND_K1 (HSIPHASH_CONST_1 ^ HSIPHASH_CONST_3) - #else - #error Unsupported BITS_PER_LONG -diff --git a/include/linux/printk.h b/include/linux/printk.h -index 85b656f82d752..9497f6b983399 100644 ---- a/include/linux/printk.h -+++ b/include/linux/printk.h -@@ -198,6 +198,7 @@ void dump_stack_print_info(const char *log_lvl); - void show_regs_print_info(const char *log_lvl); - extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold; - extern asmlinkage void dump_stack(void) __cold; -+void printk_trigger_flush(void); - #else - static inline __printf(1, 0) - int vprintk(const char *s, va_list args) -@@ -274,6 +275,9 @@ static inline void dump_stack_lvl(const char *log_lvl) - static inline void dump_stack(void) - { +@@ -7969,6 +8057,11 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, + return err; + return adjust_ptr_min_max_vals(env, insn, + dst_reg, src_reg); ++ } else if (dst_reg->precise) { ++ /* if dst_reg is precise, src_reg should be precise as well */ ++ err = mark_chain_precision(env, insn->src_reg); ++ if (err) ++ return err; + } + } else { + /* Pretend the src is a reg with a known value, since we only +@@ -8074,7 +8167,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) + * to propagate min/max range. + */ + src_reg->id = ++env->id_gen; +- *dst_reg = *src_reg; ++ copy_register_state(dst_reg, src_reg); + dst_reg->live |= REG_LIVE_WRITTEN; + dst_reg->subreg_def = DEF_NOT_SUBREG; + } else { +@@ -8085,7 +8178,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) + insn->src_reg); + return -EACCES; + } else if (src_reg->type == SCALAR_VALUE) { +- *dst_reg = *src_reg; ++ copy_register_state(dst_reg, src_reg); + /* Make sure ID is cleared otherwise + * dst_reg min/max could be incorrectly + * propagated into src_reg by find_equal_scalars() +@@ -8098,6 +8191,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) + insn->dst_reg); + } + zext_32_to_64(dst_reg); ++ reg_bounds_sync(dst_reg); + } + } else { + /* case: R = imm +@@ -8169,34 +8263,14 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) + return 0; } -+static inline void printk_trigger_flush(void) -+{ -+} - #endif - - #ifdef CONFIG_SMP -diff --git a/include/linux/psi.h b/include/linux/psi.h -index 65eb1476ac705..57823b30c2d3d 100644 ---- a/include/linux/psi.h -+++ b/include/linux/psi.h -@@ -24,18 +24,17 @@ void psi_memstall_enter(unsigned long *flags); - void psi_memstall_leave(unsigned long *flags); - int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res); +-static void __find_good_pkt_pointers(struct bpf_func_state *state, +- struct bpf_reg_state *dst_reg, +- enum bpf_reg_type type, int new_range) +-{ +- struct bpf_reg_state *reg; +- int i; - --#ifdef CONFIG_CGROUPS --int psi_cgroup_alloc(struct cgroup *cgrp); --void psi_cgroup_free(struct cgroup *cgrp); --void cgroup_move_task(struct task_struct *p, struct css_set *to); +- for (i = 0; i < MAX_BPF_REG; i++) { +- reg = &state->regs[i]; +- if (reg->type == type && reg->id == dst_reg->id) +- /* keep the maximum range already checked */ +- reg->range = max(reg->range, new_range); +- } - - struct psi_trigger *psi_trigger_create(struct psi_group *group, - char *buf, size_t nbytes, enum psi_res res); --void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *t); -+void psi_trigger_destroy(struct psi_trigger *t); - - __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file, - poll_table *wait); -+ -+#ifdef CONFIG_CGROUPS -+int psi_cgroup_alloc(struct cgroup *cgrp); -+void psi_cgroup_free(struct cgroup *cgrp); -+void cgroup_move_task(struct task_struct *p, struct css_set *to); - #endif +- bpf_for_each_spilled_reg(i, state, reg) { +- if (!reg) +- continue; +- if (reg->type == type && reg->id == dst_reg->id) +- reg->range = max(reg->range, new_range); +- } +-} +- + static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, + struct bpf_reg_state *dst_reg, + enum bpf_reg_type type, + bool range_right_open) + { +- int new_range, i; ++ struct bpf_func_state *state; ++ struct bpf_reg_state *reg; ++ int new_range; - #else /* CONFIG_PSI */ -diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h -index 0a23300d49af7..6f190002a2022 100644 ---- a/include/linux/psi_types.h -+++ b/include/linux/psi_types.h -@@ -21,7 +21,17 @@ enum psi_task_count { - * don't have to special case any state tracking for it. - */ - NR_ONCPU, -- NR_PSI_TASK_COUNTS = 4, -+ /* -+ * For IO and CPU stalls the presence of running/oncpu tasks -+ * in the domain means a partial rather than a full stall. -+ * For memory it's not so simple because of page reclaimers: -+ * they are running/oncpu while representing a stall. To tell -+ * whether a domain has productivity left or not, we need to -+ * distinguish between regular running (i.e. productive) -+ * threads and memstall ones. -+ */ -+ NR_MEMSTALL_RUNNING, -+ NR_PSI_TASK_COUNTS = 5, - }; + if (dst_reg->off < 0 || + (dst_reg->off == 0 && range_right_open)) +@@ -8212,7 +8286,7 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, - /* Task state bitmasks */ -@@ -29,6 +39,7 @@ enum psi_task_count { - #define TSK_MEMSTALL (1 << NR_MEMSTALL) - #define TSK_RUNNING (1 << NR_RUNNING) - #define TSK_ONCPU (1 << NR_ONCPU) -+#define TSK_MEMSTALL_RUNNING (1 << NR_MEMSTALL_RUNNING) + new_range = dst_reg->off; + if (range_right_open) +- new_range--; ++ new_range++; - /* Resources that workloads could be stalled on */ - enum psi_res { -@@ -129,9 +140,6 @@ struct psi_trigger { - * events to one per window + /* Examples for register markings: + * +@@ -8261,9 +8335,11 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, + * the range won't allow anything. + * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16. */ - u64 last_event_time; -- -- /* Refcounting to prevent premature destruction */ -- struct kref refcount; - }; - - struct psi_group { -diff --git a/include/linux/pstore.h b/include/linux/pstore.h -index eb93a54cff31f..e97a8188f0fd8 100644 ---- a/include/linux/pstore.h -+++ b/include/linux/pstore.h -@@ -14,7 +14,7 @@ - #include <linux/errno.h> - #include <linux/kmsg_dump.h> - #include <linux/mutex.h> --#include <linux/semaphore.h> -+#include <linux/spinlock.h> - #include <linux/time.h> - #include <linux/types.h> +- for (i = 0; i <= vstate->curframe; i++) +- __find_good_pkt_pointers(vstate->frame[i], dst_reg, type, +- new_range); ++ bpf_for_each_reg_in_vstate(vstate, state, reg, ({ ++ if (reg->type == type && reg->id == dst_reg->id) ++ /* keep the maximum range already checked */ ++ reg->range = max(reg->range, new_range); ++ })); + } + + static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode) +@@ -8535,26 +8611,33 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, + return; -@@ -87,7 +87,7 @@ struct pstore_record { - * @owner: module which is responsible for this backend driver - * @name: name of the backend driver - * -- * @buf_lock: semaphore to serialize access to @buf -+ * @buf_lock: spinlock to serialize access to @buf - * @buf: preallocated crash dump buffer - * @bufsize: size of @buf available for crash dump bytes (must match - * smallest number of bytes available for writing to a -@@ -178,7 +178,7 @@ struct pstore_info { - struct module *owner; - const char *name; + switch (opcode) { ++ /* JEQ/JNE comparison doesn't change the register equivalence. ++ * ++ * r1 = r2; ++ * if (r1 == 42) goto label; ++ * ... ++ * label: // here both r1 and r2 are known to be 42. ++ * ++ * Hence when marking register as known preserve it's ID. ++ */ + case BPF_JEQ: ++ if (is_jmp32) { ++ __mark_reg32_known(true_reg, val32); ++ true_32off = tnum_subreg(true_reg->var_off); ++ } else { ++ ___mark_reg_known(true_reg, val); ++ true_64off = true_reg->var_off; ++ } ++ break; + case BPF_JNE: +- { +- struct bpf_reg_state *reg = +- opcode == BPF_JEQ ? true_reg : false_reg; +- +- /* JEQ/JNE comparison doesn't change the register equivalence. +- * r1 = r2; +- * if (r1 == 42) goto label; +- * ... +- * label: // here both r1 and r2 are known to be 42. +- * +- * Hence when marking register as known preserve it's ID. +- */ +- if (is_jmp32) +- __mark_reg32_known(reg, val32); +- else +- ___mark_reg_known(reg, val); ++ if (is_jmp32) { ++ __mark_reg32_known(false_reg, val32); ++ false_32off = tnum_subreg(false_reg->var_off); ++ } else { ++ ___mark_reg_known(false_reg, val); ++ false_64off = false_reg->var_off; ++ } + break; +- } + case BPF_JSET: + if (is_jmp32) { + false_32off = tnum_and(false_32off, tnum_const(~val32)); +@@ -8693,21 +8776,8 @@ static void __reg_combine_min_max(struct bpf_reg_state *src_reg, + dst_reg->smax_value); + src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off, + dst_reg->var_off); +- /* We might have learned new bounds from the var_off. */ +- __update_reg_bounds(src_reg); +- __update_reg_bounds(dst_reg); +- /* We might have learned something about the sign bit. */ +- __reg_deduce_bounds(src_reg); +- __reg_deduce_bounds(dst_reg); +- /* We might have learned some bits from the bounds. */ +- __reg_bound_offset(src_reg); +- __reg_bound_offset(dst_reg); +- /* Intersecting with the old var_off might have improved our bounds +- * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), +- * then new var_off is (0; 0x7f...fc) which improves our umax. +- */ +- __update_reg_bounds(src_reg); +- __update_reg_bounds(dst_reg); ++ reg_bounds_sync(src_reg); ++ reg_bounds_sync(dst_reg); + } -- struct semaphore buf_lock; -+ spinlock_t buf_lock; - char *buf; - size_t bufsize; + static void reg_combine_min_max(struct bpf_reg_state *true_src, +@@ -8730,17 +8800,17 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, + struct bpf_reg_state *reg, u32 id, + bool is_null) + { +- if (reg_type_may_be_null(reg->type) && reg->id == id && ++ if (type_may_be_null(reg->type) && reg->id == id && + !WARN_ON_ONCE(!reg->id)) { +- /* Old offset (both fixed and variable parts) should +- * have been known-zero, because we don't allow pointer +- * arithmetic on pointers that might be NULL. +- */ + if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || + !tnum_equals_const(reg->var_off, 0) || + reg->off)) { +- __mark_reg_known_zero(reg); +- reg->off = 0; ++ /* Old offset (both fixed and variable parts) should ++ * have been known-zero, because we don't allow pointer ++ * arithmetic on pointers that might be NULL. If we ++ * see this happening, don't convert the register. ++ */ ++ return; + } + if (is_null) { + reg->type = SCALAR_VALUE; +@@ -8758,7 +8828,7 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, -diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h -index ae04968a3a472..7a526b52bd748 100644 ---- a/include/linux/ptp_classify.h -+++ b/include/linux/ptp_classify.h -@@ -42,6 +42,9 @@ - #define OFF_PTP_SOURCE_UUID 22 /* PTPv1 only */ - #define OFF_PTP_SEQUENCE_ID 30 + if (!reg_may_point_to_spin_lock(reg)) { + /* For not-NULL ptr, reg->ref_obj_id will be reset +- * in release_reg_references(). ++ * in release_reference(). + * + * reg->id is still used by spin_lock ptr. Other + * than spin_lock ptr type, reg->id can be reset. +@@ -8768,22 +8838,6 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, + } + } -+/* PTP header flag fields */ -+#define PTP_FLAG_TWOSTEP BIT(1) -+ - /* Below defines should actually be removed at some point in time. */ - #define IP6_HLEN 40 - #define UDP_HLEN 8 -diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h -index 2e5565067355b..554454cb86931 100644 ---- a/include/linux/ptp_clock_kernel.h -+++ b/include/linux/ptp_clock_kernel.h -@@ -351,15 +351,17 @@ int ptp_get_vclocks_index(int pclock_index, int **vclock_index); - * - * @hwtstamps: skb_shared_hwtstamps structure pointer - * @vclock_index: phc index of ptp vclock. -+ * -+ * Returns converted timestamp, or 0 on error. +-static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id, +- bool is_null) +-{ +- struct bpf_reg_state *reg; +- int i; +- +- for (i = 0; i < MAX_BPF_REG; i++) +- mark_ptr_or_null_reg(state, &state->regs[i], id, is_null); +- +- bpf_for_each_spilled_reg(i, state, reg) { +- if (!reg) +- continue; +- mark_ptr_or_null_reg(state, reg, id, is_null); +- } +-} +- + /* The logic is similar to find_good_pkt_pointers(), both could eventually + * be folded together at some point. */ --void ptp_convert_timestamp(struct skb_shared_hwtstamps *hwtstamps, -- int vclock_index); -+ktime_t ptp_convert_timestamp(const struct skb_shared_hwtstamps *hwtstamps, -+ int vclock_index); - #else - static inline int ptp_get_vclocks_index(int pclock_index, int **vclock_index) - { return 0; } --static inline void ptp_convert_timestamp(struct skb_shared_hwtstamps *hwtstamps, -- int vclock_index) --{ } -+static inline ktime_t ptp_convert_timestamp(const struct skb_shared_hwtstamps *hwtstamps, -+ int vclock_index) -+{ return 0; } - - #endif - -diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h -index b5ebf6c012924..d695c43fd740d 100644 ---- a/include/linux/ptrace.h -+++ b/include/linux/ptrace.h -@@ -30,7 +30,6 @@ extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, +@@ -8791,10 +8845,9 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, + bool is_null) + { + struct bpf_func_state *state = vstate->frame[vstate->curframe]; +- struct bpf_reg_state *regs = state->regs; ++ struct bpf_reg_state *regs = state->regs, *reg; + u32 ref_obj_id = regs[regno].ref_obj_id; + u32 id = regs[regno].id; +- int i; - #define PT_SEIZED 0x00010000 /* SEIZE used, enable new behavior */ - #define PT_PTRACED 0x00000001 --#define PT_DTRACE 0x00000002 /* delayed trace (used on m68k, i386) */ + if (ref_obj_id && ref_obj_id == id && is_null) + /* regs[regno] is in the " == NULL" branch. +@@ -8803,8 +8856,9 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, + */ + WARN_ON_ONCE(release_reference_state(state, id)); - #define PT_OPT_FLAG_SHIFT 3 - /* PT_TRACE_* event enable flags */ -@@ -47,12 +46,6 @@ extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, - #define PT_EXITKILL (PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT) - #define PT_SUSPEND_SECCOMP (PTRACE_O_SUSPEND_SECCOMP << PT_OPT_FLAG_SHIFT) +- for (i = 0; i <= vstate->curframe; i++) +- __mark_ptr_or_null_regs(vstate->frame[i], id, is_null); ++ bpf_for_each_reg_in_vstate(vstate, state, reg, ({ ++ mark_ptr_or_null_reg(state, reg, id, is_null); ++ })); + } --/* single stepping state bits (used on ARM and PA-RISC) */ --#define PT_SINGLESTEP_BIT 31 --#define PT_SINGLESTEP (1<<PT_SINGLESTEP_BIT) --#define PT_BLOCKSTEP_BIT 30 --#define PT_BLOCKSTEP (1<<PT_BLOCKSTEP_BIT) + static bool try_match_pkt_pointers(const struct bpf_insn *insn, +@@ -8917,23 +8971,11 @@ static void find_equal_scalars(struct bpf_verifier_state *vstate, + { + struct bpf_func_state *state; + struct bpf_reg_state *reg; +- int i, j; - - extern long arch_ptrace(struct task_struct *child, long request, - unsigned long addr, unsigned long data); - extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len); -diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h -index 812a4d7511633..4df0bf0a0864e 100644 ---- a/include/linux/qed/qed_eth_if.h -+++ b/include/linux/qed/qed_eth_if.h -@@ -145,12 +145,6 @@ struct qed_filter_mcast_params { - unsigned char mac[64][ETH_ALEN]; - }; +- for (i = 0; i <= vstate->curframe; i++) { +- state = vstate->frame[i]; +- for (j = 0; j < MAX_BPF_REG; j++) { +- reg = &state->regs[j]; +- if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) +- *reg = *known_reg; +- } --union qed_filter_type_params { -- enum qed_filter_rx_mode_type accept_flags; -- struct qed_filter_ucast_params ucast; -- struct qed_filter_mcast_params mcast; --}; -- - enum qed_filter_type { - QED_FILTER_TYPE_UCAST, - QED_FILTER_TYPE_MCAST, -@@ -158,11 +152,6 @@ enum qed_filter_type { - QED_MAX_FILTER_TYPES, - }; +- bpf_for_each_spilled_reg(j, state, reg) { +- if (!reg) +- continue; +- if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) +- *reg = *known_reg; +- } +- } ++ bpf_for_each_reg_in_vstate(vstate, state, reg, ({ ++ if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) ++ copy_register_state(reg, known_reg); ++ })); + } --struct qed_filter_params { -- enum qed_filter_type type; -- union qed_filter_type_params filter; --}; -- - struct qed_tunn_params { - u16 vxlan_port; - u8 update_vxlan_port; -@@ -314,8 +303,14 @@ struct qed_eth_ops { + static int check_cond_jmp_op(struct bpf_verifier_env *env, +@@ -9108,7 +9150,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, + */ + if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K && + insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) && +- reg_type_may_be_null(dst_reg->type)) { ++ type_may_be_null(dst_reg->type)) { + /* Mark all identical registers in each branch as either + * safe or unknown depending R == 0 or R != 0 conditional. + */ +@@ -9159,11 +9201,15 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) + return 0; + } - int (*q_tx_stop)(struct qed_dev *cdev, u8 rss_id, void *handle); +- if (insn->src_reg == BPF_PSEUDO_BTF_ID) { +- mark_reg_known_zero(env, regs, insn->dst_reg); ++ /* All special src_reg cases are listed below. From this point onwards ++ * we either succeed and assign a corresponding dst_reg->type after ++ * zeroing the offset, or fail and reject the program. ++ */ ++ mark_reg_known_zero(env, regs, insn->dst_reg); -- int (*filter_config)(struct qed_dev *cdev, -- struct qed_filter_params *params); -+ int (*filter_config_rx_mode)(struct qed_dev *cdev, -+ enum qed_filter_rx_mode_type type); -+ -+ int (*filter_config_ucast)(struct qed_dev *cdev, -+ struct qed_filter_ucast_params *params); -+ -+ int (*filter_config_mcast)(struct qed_dev *cdev, -+ struct qed_filter_mcast_params *params); ++ if (insn->src_reg == BPF_PSEUDO_BTF_ID) { + dst_reg->type = aux->btf_var.reg_type; +- switch (dst_reg->type) { ++ switch (base_type(dst_reg->type)) { + case PTR_TO_MEM: + dst_reg->mem_size = aux->btf_var.mem_size; + break; +@@ -9181,7 +9227,8 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) - int (*fastpath_stop)(struct qed_dev *cdev); + if (insn->src_reg == BPF_PSEUDO_FUNC) { + struct bpf_prog_aux *aux = env->prog->aux; +- u32 subprogno = insn[1].imm; ++ u32 subprogno = find_subprog(env, ++ env->insn_idx + insn->imm + 1); -diff --git a/include/linux/random.h b/include/linux/random.h -index f45b8be3e3c4e..3feafab498ad9 100644 ---- a/include/linux/random.h -+++ b/include/linux/random.h -@@ -1,9 +1,5 @@ - /* SPDX-License-Identifier: GPL-2.0 */ --/* -- * include/linux/random.h -- * -- * Include file for the random number generator. -- */ -+ - #ifndef _LINUX_RANDOM_H - #define _LINUX_RANDOM_H + if (!aux->func_info) { + verbose(env, "missing btf func_info\n"); +@@ -9198,7 +9245,6 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) + } -@@ -14,41 +10,26 @@ + map = env->used_maps[aux->map_index]; +- mark_reg_known_zero(env, regs, insn->dst_reg); + dst_reg->map_ptr = map; - #include <uapi/linux/random.h> + if (insn->src_reg == BPF_PSEUDO_MAP_VALUE || +@@ -9361,7 +9407,7 @@ static int check_return_code(struct bpf_verifier_env *env) + /* enforce return zero from async callbacks like timer */ + if (reg->type != SCALAR_VALUE) { + verbose(env, "In async callback the register R0 is not a known value (%s)\n", +- reg_type_str[reg->type]); ++ reg_type_str(env, reg->type)); + return -EINVAL; + } --struct random_ready_callback { -- struct list_head list; -- void (*func)(struct random_ready_callback *rdy); -- struct module *owner; --}; -+struct notifier_block; +@@ -9375,7 +9421,7 @@ static int check_return_code(struct bpf_verifier_env *env) + if (is_subprog) { + if (reg->type != SCALAR_VALUE) { + verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n", +- reg_type_str[reg->type]); ++ reg_type_str(env, reg->type)); + return -EINVAL; + } + return 0; +@@ -9439,7 +9485,7 @@ static int check_return_code(struct bpf_verifier_env *env) --extern void add_device_randomness(const void *, unsigned int); --extern void add_bootloader_randomness(const void *, unsigned int); -+void add_device_randomness(const void *buf, size_t len); -+void __init add_bootloader_randomness(const void *buf, size_t len); -+void add_input_randomness(unsigned int type, unsigned int code, -+ unsigned int value) __latent_entropy; -+void add_interrupt_randomness(int irq) __latent_entropy; -+void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy); + if (reg->type != SCALAR_VALUE) { + verbose(env, "At program exit the register R0 is not a known value (%s)\n", +- reg_type_str[reg->type]); ++ reg_type_str(env, reg->type)); + return -EINVAL; + } - #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__) - static inline void add_latent_entropy(void) +@@ -10220,7 +10266,7 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, + return true; + if (rcur->type == NOT_INIT) + return false; +- switch (rold->type) { ++ switch (base_type(rold->type)) { + case SCALAR_VALUE: + if (env->explore_alu_limits) + return false; +@@ -10242,6 +10288,22 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, + } + case PTR_TO_MAP_KEY: + case PTR_TO_MAP_VALUE: ++ /* a PTR_TO_MAP_VALUE could be safe to use as a ++ * PTR_TO_MAP_VALUE_OR_NULL into the same map. ++ * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL- ++ * checked, doing so could have affected others with the same ++ * id, and we can't check for that because we lost the id when ++ * we converted to a PTR_TO_MAP_VALUE. ++ */ ++ if (type_may_be_null(rold->type)) { ++ if (!type_may_be_null(rcur->type)) ++ return false; ++ if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id))) ++ return false; ++ /* Check our ids match any regs they're supposed to */ ++ return check_ids(rold->id, rcur->id, idmap); ++ } ++ + /* If the new min/max/var_off satisfy the old ones and + * everything else matches, we are OK. + * 'id' is not compared, since it's only used for maps with +@@ -10253,20 +10315,6 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, + return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && + range_within(rold, rcur) && + tnum_in(rold->var_off, rcur->var_off); +- case PTR_TO_MAP_VALUE_OR_NULL: +- /* a PTR_TO_MAP_VALUE could be safe to use as a +- * PTR_TO_MAP_VALUE_OR_NULL into the same map. +- * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL- +- * checked, doing so could have affected others with the same +- * id, and we can't check for that because we lost the id when +- * we converted to a PTR_TO_MAP_VALUE. +- */ +- if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL) +- return false; +- if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id))) +- return false; +- /* Check our ids match any regs they're supposed to */ +- return check_ids(rold->id, rcur->id, idmap); + case PTR_TO_PACKET_META: + case PTR_TO_PACKET: + if (rcur->type != rold->type) +@@ -10295,11 +10343,8 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, + case PTR_TO_PACKET_END: + case PTR_TO_FLOW_KEYS: + case PTR_TO_SOCKET: +- case PTR_TO_SOCKET_OR_NULL: + case PTR_TO_SOCK_COMMON: +- case PTR_TO_SOCK_COMMON_OR_NULL: + case PTR_TO_TCP_SOCK: +- case PTR_TO_TCP_SOCK_OR_NULL: + case PTR_TO_XDP_SOCK: + /* Only valid matches are exact, which memcmp() above + * would have accepted +@@ -10356,9 +10401,9 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, + * return false to continue verification of this path + */ + return false; +- if (i % BPF_REG_SIZE) ++ if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1) + continue; +- if (old->stack[spi].slot_type[0] != STACK_SPILL) ++ if (!is_spilled_reg(&old->stack[spi])) + continue; + if (!regsafe(env, &old->stack[spi].spilled_ptr, + &cur->stack[spi].spilled_ptr, idmap)) +@@ -10549,34 +10594,36 @@ static int propagate_precision(struct bpf_verifier_env *env, { -- add_device_randomness((const void *)&latent_entropy, -- sizeof(latent_entropy)); -+ add_device_randomness((const void *)&latent_entropy, sizeof(latent_entropy)); - } - #else --static inline void add_latent_entropy(void) {} --#endif -- --extern void add_input_randomness(unsigned int type, unsigned int code, -- unsigned int value) __latent_entropy; --extern void add_interrupt_randomness(int irq, int irq_flags) __latent_entropy; -- --extern void get_random_bytes(void *buf, int nbytes); --extern int wait_for_random_bytes(void); --extern int __init rand_initialize(void); --extern bool rng_is_initialized(void); --extern int add_random_ready_callback(struct random_ready_callback *rdy); --extern void del_random_ready_callback(struct random_ready_callback *rdy); --extern int __must_check get_random_bytes_arch(void *buf, int nbytes); -- --#ifndef MODULE --extern const struct file_operations random_fops, urandom_fops; -+static inline void add_latent_entropy(void) { } - #endif + struct bpf_reg_state *state_reg; + struct bpf_func_state *state; +- int i, err = 0; ++ int i, err = 0, fr; -+void get_random_bytes(void *buf, size_t len); -+size_t __must_check get_random_bytes_arch(void *buf, size_t len); - u32 get_random_u32(void); - u64 get_random_u64(void); - static inline unsigned int get_random_int(void) -@@ -80,36 +61,38 @@ static inline unsigned long get_random_long(void) +- state = old->frame[old->curframe]; +- state_reg = state->regs; +- for (i = 0; i < BPF_REG_FP; i++, state_reg++) { +- if (state_reg->type != SCALAR_VALUE || +- !state_reg->precise) +- continue; +- if (env->log.level & BPF_LOG_LEVEL2) +- verbose(env, "propagating r%d\n", i); +- err = mark_chain_precision(env, i); +- if (err < 0) +- return err; +- } ++ for (fr = old->curframe; fr >= 0; fr--) { ++ state = old->frame[fr]; ++ state_reg = state->regs; ++ for (i = 0; i < BPF_REG_FP; i++, state_reg++) { ++ if (state_reg->type != SCALAR_VALUE || ++ !state_reg->precise) ++ continue; ++ if (env->log.level & BPF_LOG_LEVEL2) ++ verbose(env, "frame %d: propagating r%d\n", i, fr); ++ err = mark_chain_precision_frame(env, fr, i); ++ if (err < 0) ++ return err; ++ } - static inline unsigned long get_random_canary(void) - { -- unsigned long val = get_random_long(); -- -- return val & CANARY_MASK; -+ return get_random_long() & CANARY_MASK; +- for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { +- if (state->stack[i].slot_type[0] != STACK_SPILL) +- continue; +- state_reg = &state->stack[i].spilled_ptr; +- if (state_reg->type != SCALAR_VALUE || +- !state_reg->precise) +- continue; +- if (env->log.level & BPF_LOG_LEVEL2) +- verbose(env, "propagating fp%d\n", +- (-i - 1) * BPF_REG_SIZE); +- err = mark_chain_precision_stack(env, i); +- if (err < 0) +- return err; ++ for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { ++ if (!is_spilled_reg(&state->stack[i])) ++ continue; ++ state_reg = &state->stack[i].spilled_ptr; ++ if (state_reg->type != SCALAR_VALUE || ++ !state_reg->precise) ++ continue; ++ if (env->log.level & BPF_LOG_LEVEL2) ++ verbose(env, "frame %d: propagating fp%d\n", ++ (-i - 1) * BPF_REG_SIZE, fr); ++ err = mark_chain_precision_stack_frame(env, fr, i); ++ if (err < 0) ++ return err; ++ } + } + return 0; } - -+int __init random_init(const char *command_line); -+bool rng_is_initialized(void); -+int wait_for_random_bytes(void); -+int register_random_ready_notifier(struct notifier_block *nb); -+int unregister_random_ready_notifier(struct notifier_block *nb); -+ - /* Calls wait_for_random_bytes() and then calls get_random_bytes(buf, nbytes). - * Returns the result of the call to wait_for_random_bytes. */ --static inline int get_random_bytes_wait(void *buf, int nbytes) -+static inline int get_random_bytes_wait(void *buf, size_t nbytes) +@@ -10825,17 +10872,13 @@ next: + /* Return true if it's OK to have the same insn return a different type. */ + static bool reg_type_mismatch_ok(enum bpf_reg_type type) { - int ret = wait_for_random_bytes(); - get_random_bytes(buf, nbytes); - return ret; - } - --#define declare_get_random_var_wait(var) \ -- static inline int get_random_ ## var ## _wait(var *out) { \ -+#define declare_get_random_var_wait(name, ret_type) \ -+ static inline int get_random_ ## name ## _wait(ret_type *out) { \ - int ret = wait_for_random_bytes(); \ - if (unlikely(ret)) \ - return ret; \ -- *out = get_random_ ## var(); \ -+ *out = get_random_ ## name(); \ - return 0; \ - } --declare_get_random_var_wait(u32) --declare_get_random_var_wait(u64) --declare_get_random_var_wait(int) --declare_get_random_var_wait(long) -+declare_get_random_var_wait(u32, u32) -+declare_get_random_var_wait(u64, u32) -+declare_get_random_var_wait(int, unsigned int) -+declare_get_random_var_wait(long, unsigned long) - #undef declare_get_random_var - --unsigned long randomize_page(unsigned long start, unsigned long range); -- - /* - * This is designed to be standalone for just prandom - * users, but for now we include it from <linux/random.h> -@@ -120,22 +103,10 @@ unsigned long randomize_page(unsigned long start, unsigned long range); - #ifdef CONFIG_ARCH_RANDOM - # include <asm/archrandom.h> - #else --static inline bool __must_check arch_get_random_long(unsigned long *v) --{ -- return false; --} --static inline bool __must_check arch_get_random_int(unsigned int *v) --{ -- return false; --} --static inline bool __must_check arch_get_random_seed_long(unsigned long *v) --{ -- return false; --} --static inline bool __must_check arch_get_random_seed_int(unsigned int *v) --{ -- return false; --} -+static inline bool __must_check arch_get_random_long(unsigned long *v) { return false; } -+static inline bool __must_check arch_get_random_int(unsigned int *v) { return false; } -+static inline bool __must_check arch_get_random_seed_long(unsigned long *v) { return false; } -+static inline bool __must_check arch_get_random_seed_int(unsigned int *v) { return false; } - #endif +- switch (type) { ++ switch (base_type(type)) { + case PTR_TO_CTX: + case PTR_TO_SOCKET: +- case PTR_TO_SOCKET_OR_NULL: + case PTR_TO_SOCK_COMMON: +- case PTR_TO_SOCK_COMMON_OR_NULL: + case PTR_TO_TCP_SOCK: +- case PTR_TO_TCP_SOCK_OR_NULL: + case PTR_TO_XDP_SOCK: + case PTR_TO_BTF_ID: +- case PTR_TO_BTF_ID_OR_NULL: + return false; + default: + return true; +@@ -11059,7 +11102,7 @@ static int do_check(struct bpf_verifier_env *env) + if (is_ctx_reg(env, insn->dst_reg)) { + verbose(env, "BPF_ST stores into R%d %s is not allowed\n", + insn->dst_reg, +- reg_type_str[reg_state(env, insn->dst_reg)->type]); ++ reg_type_str(env, reg_state(env, insn->dst_reg)->type)); + return -EACCES; + } - /* -@@ -158,4 +129,13 @@ static inline bool __init arch_get_random_long_early(unsigned long *v) - } - #endif +@@ -11128,6 +11171,16 @@ static int do_check(struct bpf_verifier_env *env) + return -EINVAL; + } -+#ifdef CONFIG_SMP -+int random_prepare_cpu(unsigned int cpu); -+int random_online_cpu(unsigned int cpu); -+#endif -+ -+#ifndef MODULE -+extern const struct file_operations random_fops, urandom_fops; -+#endif -+ - #endif /* _LINUX_RANDOM_H */ -diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h -index bebc911161b6f..d373f1bcbf7ca 100644 ---- a/include/linux/randomize_kstack.h -+++ b/include/linux/randomize_kstack.h -@@ -16,8 +16,20 @@ DECLARE_PER_CPU(u32, kstack_offset); - * alignment. Also, since this use is being explicitly masked to a max of - * 10 bits, stack-clash style attacks are unlikely. For more details see - * "VLAs" in Documentation/process/deprecated.rst -+ * -+ * The normal __builtin_alloca() is initialized with INIT_STACK_ALL (currently -+ * only with Clang and not GCC). Initializing the unused area on each syscall -+ * entry is expensive, and generating an implicit call to memset() may also be -+ * problematic (such as in noinstr functions). Therefore, if the compiler -+ * supports it (which it should if it initializes allocas), always use the -+ * "uninitialized" variant of the builtin. - */ --void *__builtin_alloca(size_t size); -+#if __has_builtin(__builtin_alloca_uninitialized) -+#define __kstack_alloca __builtin_alloca_uninitialized -+#else -+#define __kstack_alloca __builtin_alloca -+#endif ++ /* We must do check_reference_leak here before ++ * prepare_func_exit to handle the case when ++ * state->curframe > 0, it may be a callback ++ * function, for which reference_state must ++ * match caller reference state when it exits. ++ */ ++ err = check_reference_leak(env); ++ if (err) ++ return err; + - /* - * Use, at most, 10 bits of entropy. We explicitly cap this to keep the - * "VLA" from being unbounded (see above). 10 bits leaves enough room for -@@ -36,7 +48,7 @@ void *__builtin_alloca(size_t size); - if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \ - &randomize_kstack_offset)) { \ - u32 offset = raw_cpu_read(kstack_offset); \ -- u8 *ptr = __builtin_alloca(KSTACK_OFFSET_MAX(offset)); \ -+ u8 *ptr = __kstack_alloca(KSTACK_OFFSET_MAX(offset)); \ - /* Keep allocation even after "ptr" loses scope. */ \ - asm volatile("" :: "r"(ptr) : "memory"); \ - } \ -diff --git a/include/linux/ratelimit_types.h b/include/linux/ratelimit_types.h -index b676aa419eef8..f0e535f199bef 100644 ---- a/include/linux/ratelimit_types.h -+++ b/include/linux/ratelimit_types.h -@@ -23,12 +23,16 @@ struct ratelimit_state { - unsigned long flags; - }; + if (state->curframe) { + /* exit from nested function */ + err = prepare_func_exit(env, &env->insn_idx); +@@ -11137,10 +11190,6 @@ static int do_check(struct bpf_verifier_env *env) + continue; + } --#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) { \ -- .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ -- .interval = interval_init, \ -- .burst = burst_init, \ -+#define RATELIMIT_STATE_INIT_FLAGS(name, interval_init, burst_init, flags_init) { \ -+ .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ -+ .interval = interval_init, \ -+ .burst = burst_init, \ -+ .flags = flags_init, \ +- err = check_reference_leak(env); +- if (err) +- return err; +- + err = check_return_code(env); + if (err) + return err; +@@ -11310,7 +11359,7 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env, + err = -EINVAL; + goto err_put; + } +- aux->btf_var.reg_type = PTR_TO_MEM; ++ aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY; + aux->btf_var.mem_size = tsize; + } else { + aux->btf_var.reg_type = PTR_TO_BTF_ID; +@@ -11435,6 +11484,13 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, + } } -+#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) \ -+ RATELIMIT_STATE_INIT_FLAGS(name, interval_init, burst_init, 0) ++ if (map_value_has_timer(map)) { ++ if (is_tracing_prog_type(prog_type)) { ++ verbose(env, "tracing progs cannot use bpf_timer yet\n"); ++ return -EINVAL; ++ } ++ } + - #define RATELIMIT_STATE_INIT_DISABLED \ - RATELIMIT_STATE_INIT(ratelimit_state, 0, DEFAULT_RATELIMIT_BURST) + if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) && + !bpf_offload_prog_map_match(prog, map)) { + verbose(env, "offload device mismatch between prog and map\n"); +@@ -12122,6 +12178,10 @@ static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env, + if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn)) + continue; -diff --git a/include/linux/reset.h b/include/linux/reset.h -index db0e6115a2f6a..7bb5837375289 100644 ---- a/include/linux/reset.h -+++ b/include/linux/reset.h -@@ -711,7 +711,7 @@ static inline int __must_check - devm_reset_control_bulk_get_optional_exclusive(struct device *dev, int num_rstcs, - struct reset_control_bulk_data *rstcs) - { -- return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, true, false, true); -+ return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, false, true, true); - } ++ /* Zero-extension is done by the caller. */ ++ if (bpf_pseudo_kfunc_call(&insn)) ++ continue; ++ + if (WARN_ON(load_reg == -1)) { + verbose(env, "verifier bug. zext_dst is set, but no reg is defined\n"); + return -EFAULT; +@@ -12350,14 +12410,9 @@ static int jit_subprogs(struct bpf_verifier_env *env) + return 0; - /** -diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h -index dac53fd3afea3..2504df9a0453e 100644 ---- a/include/linux/ring_buffer.h -+++ b/include/linux/ring_buffer.h -@@ -101,7 +101,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k - int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full); - __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, - struct file *filp, poll_table *poll_table); -- -+void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu); + for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { +- if (bpf_pseudo_func(insn)) { +- env->insn_aux_data[i].call_imm = insn->imm; +- /* subprog is encoded in insn[1].imm */ ++ if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn)) + continue; +- } - #define RING_BUFFER_ALL_CPUS -1 +- if (!bpf_pseudo_call(insn)) +- continue; + /* Upon error here we cannot fall back to interpreter but + * need a hard reject of the program. Thus -EFAULT is + * propagated in any case. +@@ -12378,6 +12433,12 @@ static int jit_subprogs(struct bpf_verifier_env *env) + env->insn_aux_data[i].call_imm = insn->imm; + /* point imm to __bpf_call_base+1 from JITs point of view */ + insn->imm = 1; ++ if (bpf_pseudo_func(insn)) ++ /* jit (e.g. x86_64) may emit fewer instructions ++ * if it learns a u32 imm is the same as a u64 imm. ++ * Force a non zero here. ++ */ ++ insn[1].imm = 1; + } -diff --git a/include/linux/rmap.h b/include/linux/rmap.h -index c976cc6de2574..c29d9c13378b3 100644 ---- a/include/linux/rmap.h -+++ b/include/linux/rmap.h -@@ -39,12 +39,15 @@ struct anon_vma { - atomic_t refcount; + err = bpf_prog_alloc_jited_linfo(prog); +@@ -12413,6 +12474,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) + /* Below members will be freed only at prog->aux */ + func[i]->aux->btf = prog->aux->btf; + func[i]->aux->func_info = prog->aux->func_info; ++ func[i]->aux->func_info_cnt = prog->aux->func_info_cnt; + func[i]->aux->poke_tab = prog->aux->poke_tab; + func[i]->aux->size_poke_tab = prog->aux->size_poke_tab; - /* -- * Count of child anon_vmas and VMAs which points to this anon_vma. -+ * Count of child anon_vmas. Equals to the count of all anon_vmas that -+ * have ->parent pointing to this one, including itself. - * - * This counter is used for making decision about reusing anon_vma - * instead of forking new one. See comments in function anon_vma_clone. - */ -- unsigned degree; -+ unsigned long num_children; -+ /* Count of VMAs whose ->anon_vma pointer points to this object. */ -+ unsigned long num_active_vmas; +@@ -12425,9 +12487,6 @@ static int jit_subprogs(struct bpf_verifier_env *env) + poke->aux = func[i]->aux; + } - struct anon_vma *parent; /* Parent of this anon_vma */ +- /* Use bpf_prog_F_tag to indicate functions in stack traces. +- * Long term would need debug info to populate names +- */ + func[i]->aux->name[0] = 'F'; + func[i]->aux->stack_depth = env->subprog_info[i].stack_depth; + func[i]->jit_requested = 1; +@@ -12461,7 +12520,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) + insn = func[i]->insnsi; + for (j = 0; j < func[i]->len; j++, insn++) { + if (bpf_pseudo_func(insn)) { +- subprog = insn[1].imm; ++ subprog = insn->off; + insn[0].imm = (u32)(long)func[subprog]->bpf_func; + insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32; + continue; +@@ -12513,7 +12572,8 @@ static int jit_subprogs(struct bpf_verifier_env *env) + for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { + if (bpf_pseudo_func(insn)) { + insn[0].imm = env->insn_aux_data[i].call_imm; +- insn[1].imm = find_subprog(env, i + insn[0].imm + 1); ++ insn[1].imm = insn->off; ++ insn->off = 0; + continue; + } + if (!bpf_pseudo_call(insn)) +@@ -13125,7 +13185,7 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog) + mark_reg_known_zero(env, regs, i); + else if (regs[i].type == SCALAR_VALUE) + mark_reg_unknown(env, regs, i); +- else if (regs[i].type == PTR_TO_MEM_OR_NULL) { ++ else if (base_type(regs[i].type) == PTR_TO_MEM) { + const u32 mem_size = regs[i].mem_size; -diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h -index d97dcd049f18f..a8dcf8a9ae885 100644 ---- a/include/linux/rpmsg.h -+++ b/include/linux/rpmsg.h -@@ -231,7 +231,7 @@ static inline struct rpmsg_endpoint *rpmsg_create_ept(struct rpmsg_device *rpdev - /* This shouldn't be possible */ - WARN_ON(1); + mark_reg_known_zero(env, regs, i); +@@ -13713,11 +13773,11 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr) + log->ubuf = (char __user *) (unsigned long) attr->log_buf; + log->len_total = attr->log_size; -- return ERR_PTR(-ENXIO); -+ return NULL; - } +- ret = -EINVAL; + /* log attributes have to be sane */ +- if (log->len_total < 128 || log->len_total > UINT_MAX >> 2 || +- !log->level || !log->ubuf || log->level & ~BPF_LOG_MASK) ++ if (!bpf_verifier_log_attr_valid(log)) { ++ ret = -EINVAL; + goto err_unlock; ++ } + } - static inline int rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len) -diff --git a/include/linux/rtsx_usb.h b/include/linux/rtsx_usb.h -index 159729cffd8e1..3247ed8e9ff0f 100644 ---- a/include/linux/rtsx_usb.h -+++ b/include/linux/rtsx_usb.h -@@ -54,8 +54,6 @@ struct rtsx_ucr { - struct usb_device *pusb_dev; - struct usb_interface *pusb_intf; - struct usb_sg_request current_sg; -- unsigned char *iobuf; -- dma_addr_t iobuf_dma; + if (IS_ERR(btf_vmlinux)) { +diff --git a/kernel/cfi.c b/kernel/cfi.c +index 9594cfd1cf2cf..08102d19ec15a 100644 +--- a/kernel/cfi.c ++++ b/kernel/cfi.c +@@ -281,6 +281,8 @@ static inline cfi_check_fn find_module_check_fn(unsigned long ptr) + static inline cfi_check_fn find_check_fn(unsigned long ptr) + { + cfi_check_fn fn = NULL; ++ unsigned long flags; ++ bool rcu_idle; - struct timer_list sg_timer; - struct mutex dev_mutex; -diff --git a/include/linux/sched.h b/include/linux/sched.h -index c1a927ddec646..e418935f8db6a 100644 ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -933,7 +933,7 @@ struct task_struct { - #endif - #ifdef CONFIG_EVENTFD - /* Recursion prevention for eventfd_signal() */ -- unsigned in_eventfd_signal:1; -+ unsigned in_eventfd:1; - #endif + if (is_kernel_text(ptr)) + return __cfi_check; +@@ -290,13 +292,21 @@ static inline cfi_check_fn find_check_fn(unsigned long ptr) + * the shadow and __module_address use RCU, so we need to wake it + * up if necessary. + */ +- RCU_NONIDLE({ +- if (IS_ENABLED(CONFIG_CFI_CLANG_SHADOW)) +- fn = find_shadow_check_fn(ptr); ++ rcu_idle = !rcu_is_watching(); ++ if (rcu_idle) { ++ local_irq_save(flags); ++ rcu_irq_enter(); ++ } ++ ++ if (IS_ENABLED(CONFIG_CFI_CLANG_SHADOW)) ++ fn = find_shadow_check_fn(ptr); ++ if (!fn) ++ fn = find_module_check_fn(ptr); - unsigned long atomic_flags; /* Flags requiring atomic access. */ -@@ -1436,6 +1436,7 @@ struct task_struct { - int pagefault_disabled; - #ifdef CONFIG_MMU - struct task_struct *oom_reaper_list; -+ struct timer_list oom_reaper_timer; - #endif - #ifdef CONFIG_VMAP_STACK - struct vm_struct *stack_vm_area; -@@ -1626,6 +1627,14 @@ static inline unsigned int task_state_index(struct task_struct *tsk) - if (tsk_state == TASK_IDLE) - state = TASK_REPORT_IDLE; +- if (!fn) +- fn = find_module_check_fn(ptr); +- }); ++ if (rcu_idle) { ++ rcu_irq_exit(); ++ local_irq_restore(flags); ++ } -+ /* -+ * We're lying here, but rather than expose a completely new task state -+ * to userspace, we can make this appear as if the task has gone through -+ * a regular rt_mutex_lock() call. -+ */ -+ if (tsk_state == TASK_RTLOCK_WAIT) -+ state = TASK_UNINTERRUPTIBLE; -+ - return fls(state); + return fn; } - -@@ -1675,7 +1684,6 @@ extern struct pid *cad_pid; - #define PF_MEMALLOC 0x00000800 /* Allocating memory */ - #define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */ - #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ --#define PF_USED_ASYNC 0x00004000 /* Used async_schedule*(), used by module init */ - #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ - #define PF_FROZEN 0x00010000 /* Frozen for system suspend */ - #define PF_KSWAPD 0x00020000 /* I am kswapd */ -@@ -1789,7 +1797,7 @@ current_restore_flags(unsigned long orig_flags, unsigned long flags) +diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h +index bfbeabc17a9df..d8fcc139ac05d 100644 +--- a/kernel/cgroup/cgroup-internal.h ++++ b/kernel/cgroup/cgroup-internal.h +@@ -65,6 +65,25 @@ static inline struct cgroup_fs_context *cgroup_fc2context(struct fs_context *fc) + return container_of(kfc, struct cgroup_fs_context, kfc); } - extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial); --extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed); -+extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_effective_cpus); - #ifdef CONFIG_SMP - extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask); - extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); -diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h -index 5561486fddef7..95fb7aaaec8de 100644 ---- a/include/linux/sched/mm.h -+++ b/include/linux/sched/mm.h -@@ -106,6 +106,14 @@ static inline void mm_update_next_owner(struct mm_struct *mm) - #endif /* CONFIG_MEMCG */ - - #ifdef CONFIG_MMU -+#ifndef arch_get_mmap_end -+#define arch_get_mmap_end(addr) (TASK_SIZE) -+#endif ++struct cgroup_pidlist; + -+#ifndef arch_get_mmap_base -+#define arch_get_mmap_base(addr, base) (base) -+#endif ++struct cgroup_file_ctx { ++ struct cgroup_namespace *ns; + - extern void arch_pick_mmap_layout(struct mm_struct *mm, - struct rlimit *rlim_stack); - extern unsigned long -diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h -index e5af028c08b49..994c25640e156 100644 ---- a/include/linux/sched/rt.h -+++ b/include/linux/sched/rt.h -@@ -39,20 +39,12 @@ static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *p) - } - extern void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task); - extern void rt_mutex_adjust_pi(struct task_struct *p); --static inline bool tsk_is_pi_blocked(struct task_struct *tsk) --{ -- return tsk->pi_blocked_on != NULL; --} - #else - static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task) - { - return NULL; - } - # define rt_mutex_adjust_pi(p) do { } while (0) --static inline bool tsk_is_pi_blocked(struct task_struct *tsk) --{ -- return false; --} - #endif - - extern void normalize_rt_tasks(void); -diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h -index e5f4ce622ee61..5f0e8403e8ceb 100644 ---- a/include/linux/sched/signal.h -+++ b/include/linux/sched/signal.h -@@ -318,7 +318,7 @@ int send_sig_mceerr(int code, void __user *, short, struct task_struct *); - - int force_sig_bnderr(void __user *addr, void __user *lower, void __user *upper); - int force_sig_pkuerr(void __user *addr, u32 pkey); --int force_sig_perf(void __user *addr, u32 type, u64 sig_data); -+int send_sig_perf(void __user *addr, u32 type, u64 sig_data); ++ struct { ++ void *trigger; ++ } psi; ++ ++ struct { ++ bool started; ++ struct css_task_iter iter; ++ } procs; ++ ++ struct { ++ struct cgroup_pidlist *pidlist; ++ } procs1; ++}; ++ + /* + * A cgroup can be associated with multiple css_sets as different tasks may + * belong to different cgroups on different hierarchies. In the other +@@ -150,7 +169,6 @@ extern struct mutex cgroup_mutex; + extern spinlock_t css_set_lock; + extern struct cgroup_subsys *cgroup_subsys[]; + extern struct list_head cgroup_roots; +-extern struct file_system_type cgroup_fs_type; + + /* iterate across the hierarchies */ + #define for_each_root(root) \ +diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c +index 35b9203283447..ee8b3d80f19ee 100644 +--- a/kernel/cgroup/cgroup-v1.c ++++ b/kernel/cgroup/cgroup-v1.c +@@ -59,6 +59,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) + int retval = 0; - int force_sig_ptrace_errno_trap(int errno, void __user *addr); - int force_sig_fault_trapno(int sig, int code, void __user *addr, int trapno); -@@ -338,6 +338,8 @@ extern int kill_pid(struct pid *pid, int sig, int priv); - extern __must_check bool do_notify_parent(struct task_struct *, int); - extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); - extern void force_sig(int); -+extern void force_fatal_sig(int); -+extern void force_exit_sig(int); - extern int send_sig(int, struct task_struct *, int); - extern int zap_other_threads(struct task_struct *p); - extern struct sigqueue *sigqueue_alloc(void); -diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h -index ef02be869cf28..caae8e045160d 100644 ---- a/include/linux/sched/task.h -+++ b/include/linux/sched/task.h -@@ -54,6 +54,7 @@ extern asmlinkage void schedule_tail(struct task_struct *prev); - extern void init_idle(struct task_struct *idle, int cpu); + mutex_lock(&cgroup_mutex); ++ cpus_read_lock(); + percpu_down_write(&cgroup_threadgroup_rwsem); + for_each_root(root) { + struct cgroup *from_cgrp; +@@ -75,6 +76,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) + break; + } + percpu_up_write(&cgroup_threadgroup_rwsem); ++ cpus_read_unlock(); + mutex_unlock(&cgroup_mutex); - extern int sched_fork(unsigned long clone_flags, struct task_struct *p); -+extern void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs); - extern void sched_post_fork(struct task_struct *p); - extern void sched_dead(struct task_struct *p); + return retval; +@@ -397,6 +399,7 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos) + * next pid to display, if any + */ + struct kernfs_open_file *of = s->private; ++ struct cgroup_file_ctx *ctx = of->priv; + struct cgroup *cgrp = seq_css(s)->cgroup; + struct cgroup_pidlist *l; + enum cgroup_filetype type = seq_cft(s)->private; +@@ -406,25 +409,24 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos) + mutex_lock(&cgrp->pidlist_mutex); -@@ -80,7 +81,7 @@ static inline void exit_thread(struct task_struct *tsk) - extern void do_group_exit(int); + /* +- * !NULL @of->priv indicates that this isn't the first start() +- * after open. If the matching pidlist is around, we can use that. +- * Look for it. Note that @of->priv can't be used directly. It +- * could already have been destroyed. ++ * !NULL @ctx->procs1.pidlist indicates that this isn't the first ++ * start() after open. If the matching pidlist is around, we can use ++ * that. Look for it. Note that @ctx->procs1.pidlist can't be used ++ * directly. It could already have been destroyed. + */ +- if (of->priv) +- of->priv = cgroup_pidlist_find(cgrp, type); ++ if (ctx->procs1.pidlist) ++ ctx->procs1.pidlist = cgroup_pidlist_find(cgrp, type); - extern void exit_files(struct task_struct *); --extern void exit_itimers(struct signal_struct *); -+extern void exit_itimers(struct task_struct *); + /* + * Either this is the first start() after open or the matching + * pidlist has been destroyed inbetween. Create a new one. + */ +- if (!of->priv) { +- ret = pidlist_array_load(cgrp, type, +- (struct cgroup_pidlist **)&of->priv); ++ if (!ctx->procs1.pidlist) { ++ ret = pidlist_array_load(cgrp, type, &ctx->procs1.pidlist); + if (ret) + return ERR_PTR(ret); + } +- l = of->priv; ++ l = ctx->procs1.pidlist; - extern pid_t kernel_clone(struct kernel_clone_args *kargs); - struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node); -@@ -157,7 +158,7 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) - * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring - * subscriptions and synchronises with wait4(). Also used in procfs. Also - * pins the final release of task.io_context. Also protects ->cpuset and -- * ->cgroup.subsys[]. And ->vfork_done. -+ * ->cgroup.subsys[]. And ->vfork_done. And ->sysvshm.shm_clist. - * - * Nests both inside and outside of read_lock(&tasklist_lock). - * It must not be nested with write_lock_irq(&tasklist_lock), -diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h -index 2413427e439c7..1009b6b5ce403 100644 ---- a/include/linux/sched/task_stack.h -+++ b/include/linux/sched/task_stack.h -@@ -16,7 +16,7 @@ - * try_get_task_stack() instead. task_stack_page will return a pointer - * that could get freed out from under you. - */ --static inline void *task_stack_page(const struct task_struct *task) -+static __always_inline void *task_stack_page(const struct task_struct *task) + if (pid) { + int end = l->length; +@@ -452,7 +454,8 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos) + static void cgroup_pidlist_stop(struct seq_file *s, void *v) { - return task->stack; - } -@@ -25,7 +25,11 @@ static inline void *task_stack_page(const struct task_struct *task) + struct kernfs_open_file *of = s->private; +- struct cgroup_pidlist *l = of->priv; ++ struct cgroup_file_ctx *ctx = of->priv; ++ struct cgroup_pidlist *l = ctx->procs1.pidlist; - static inline unsigned long *end_of_stack(const struct task_struct *task) + if (l) + mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork, +@@ -463,7 +466,8 @@ static void cgroup_pidlist_stop(struct seq_file *s, void *v) + static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos) { -+#ifdef CONFIG_STACK_GROWSUP -+ return (unsigned long *)((unsigned long)task->stack + THREAD_SIZE) - 1; -+#else - return task->stack; -+#endif - } - - #elif !defined(__HAVE_THREAD_FUNCTIONS) -diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h -index 8f0f778b7c911..63a04a65e3106 100644 ---- a/include/linux/sched/topology.h -+++ b/include/linux/sched/topology.h -@@ -74,6 +74,7 @@ struct sched_domain_shared { - atomic_t ref; - atomic_t nr_busy_cpus; - int has_idle_cores; -+ int nr_idle_scan; - }; - - struct sched_domain { -diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h -index 80e781c51ddc1..d22f62203ee36 100644 ---- a/include/linux/scmi_protocol.h -+++ b/include/linux/scmi_protocol.h -@@ -74,7 +74,7 @@ struct scmi_protocol_handle; - struct scmi_clk_proto_ops { - int (*count_get)(const struct scmi_protocol_handle *ph); + struct kernfs_open_file *of = s->private; +- struct cgroup_pidlist *l = of->priv; ++ struct cgroup_file_ctx *ctx = of->priv; ++ struct cgroup_pidlist *l = ctx->procs1.pidlist; + pid_t *p = v; + pid_t *end = l->list + l->length; + /* +@@ -507,10 +511,11 @@ static ssize_t __cgroup1_procs_write(struct kernfs_open_file *of, + goto out_unlock; -- const struct scmi_clock_info *(*info_get) -+ const struct scmi_clock_info __must_check *(*info_get) - (const struct scmi_protocol_handle *ph, u32 clk_id); - int (*rate_get)(const struct scmi_protocol_handle *ph, u32 clk_id, - u64 *rate); -@@ -452,7 +452,7 @@ enum scmi_sensor_class { - */ - struct scmi_sensor_proto_ops { - int (*count_get)(const struct scmi_protocol_handle *ph); -- const struct scmi_sensor_info *(*info_get) -+ const struct scmi_sensor_info __must_check *(*info_get) - (const struct scmi_protocol_handle *ph, u32 sensor_id); - int (*trip_point_config)(const struct scmi_protocol_handle *ph, - u32 sensor_id, u8 trip_id, u64 trip_value); -diff --git a/include/linux/security.h b/include/linux/security.h -index 5b7288521300b..da184e7b361f4 100644 ---- a/include/linux/security.h -+++ b/include/linux/security.h -@@ -121,10 +121,12 @@ enum lockdown_reason { - LOCKDOWN_DEBUGFS, - LOCKDOWN_XMON_WR, - LOCKDOWN_BPF_WRITE_USER, -+ LOCKDOWN_DBG_WRITE_KERNEL, - LOCKDOWN_INTEGRITY_MAX, - LOCKDOWN_KCORE, - LOCKDOWN_KPROBES, - LOCKDOWN_BPF_READ_KERNEL, -+ LOCKDOWN_DBG_READ_KERNEL, - LOCKDOWN_PERF, - LOCKDOWN_TRACEFS, - LOCKDOWN_XMON_RW, -@@ -258,13 +260,13 @@ extern int security_init(void); - extern int early_security_init(void); + /* +- * Even if we're attaching all tasks in the thread group, we only +- * need to check permissions on one of them. ++ * Even if we're attaching all tasks in the thread group, we only need ++ * to check permissions on one of them. Check permissions using the ++ * credentials from file open to protect against inherited fd attacks. + */ +- cred = current_cred(); ++ cred = of->file->f_cred; + tcred = get_task_cred(task); + if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && + !uid_eq(cred->euid, tcred->uid) && +@@ -546,9 +551,19 @@ static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) + { + struct cgroup *cgrp; ++ struct cgroup_file_ctx *ctx; - /* Security operations */ --int security_binder_set_context_mgr(struct task_struct *mgr); --int security_binder_transaction(struct task_struct *from, -- struct task_struct *to); --int security_binder_transfer_binder(struct task_struct *from, -- struct task_struct *to); --int security_binder_transfer_file(struct task_struct *from, -- struct task_struct *to, struct file *file); -+int security_binder_set_context_mgr(const struct cred *mgr); -+int security_binder_transaction(const struct cred *from, -+ const struct cred *to); -+int security_binder_transfer_binder(const struct cred *from, -+ const struct cred *to); -+int security_binder_transfer_file(const struct cred *from, -+ const struct cred *to, struct file *file); - int security_ptrace_access_check(struct task_struct *child, unsigned int mode); - int security_ptrace_traceme(struct task_struct *parent); - int security_capget(struct task_struct *target, -@@ -508,25 +510,25 @@ static inline int early_security_init(void) - return 0; - } + BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); --static inline int security_binder_set_context_mgr(struct task_struct *mgr) -+static inline int security_binder_set_context_mgr(const struct cred *mgr) - { - return 0; - } ++ /* ++ * Release agent gets called with all capabilities, ++ * require capabilities to set release agent. ++ */ ++ ctx = of->priv; ++ if ((ctx->ns->user_ns != &init_user_ns) || ++ !file_ns_capable(of->file, &init_user_ns, CAP_SYS_ADMIN)) ++ return -EPERM; ++ + cgrp = cgroup_kn_lock_live(of->kn, false); + if (!cgrp) + return -ENODEV; +@@ -960,6 +975,12 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) + /* Specifying two release agents is forbidden */ + if (ctx->release_agent) + return invalfc(fc, "release_agent respecified"); ++ /* ++ * Release agent gets called with all capabilities, ++ * require capabilities to set release agent. ++ */ ++ if ((fc->user_ns != &init_user_ns) || !capable(CAP_SYS_ADMIN)) ++ return invalfc(fc, "Setting release_agent not allowed"); + ctx->release_agent = param->string; + param->string = NULL; + break; +diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c +index ea08f01d0111a..a92990f070d12 100644 +--- a/kernel/cgroup/cgroup.c ++++ b/kernel/cgroup/cgroup.c +@@ -764,7 +764,8 @@ struct css_set init_css_set = { + .task_iters = LIST_HEAD_INIT(init_css_set.task_iters), + .threaded_csets = LIST_HEAD_INIT(init_css_set.threaded_csets), + .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links), +- .mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node), ++ .mg_src_preload_node = LIST_HEAD_INIT(init_css_set.mg_src_preload_node), ++ .mg_dst_preload_node = LIST_HEAD_INIT(init_css_set.mg_dst_preload_node), + .mg_node = LIST_HEAD_INIT(init_css_set.mg_node), --static inline int security_binder_transaction(struct task_struct *from, -- struct task_struct *to) -+static inline int security_binder_transaction(const struct cred *from, -+ const struct cred *to) - { - return 0; - } + /* +@@ -1239,7 +1240,8 @@ static struct css_set *find_css_set(struct css_set *old_cset, + INIT_LIST_HEAD(&cset->threaded_csets); + INIT_HLIST_NODE(&cset->hlist); + INIT_LIST_HEAD(&cset->cgrp_links); +- INIT_LIST_HEAD(&cset->mg_preload_node); ++ INIT_LIST_HEAD(&cset->mg_src_preload_node); ++ INIT_LIST_HEAD(&cset->mg_dst_preload_node); + INIT_LIST_HEAD(&cset->mg_node); --static inline int security_binder_transfer_binder(struct task_struct *from, -- struct task_struct *to) -+static inline int security_binder_transfer_binder(const struct cred *from, -+ const struct cred *to) - { - return 0; - } + /* Copy the set of subsystem state objects generated in +@@ -1740,6 +1742,7 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) + struct cgroup *dcgrp = &dst_root->cgrp; + struct cgroup_subsys *ss; + int ssid, i, ret; ++ u16 dfl_disable_ss_mask = 0; --static inline int security_binder_transfer_file(struct task_struct *from, -- struct task_struct *to, -+static inline int security_binder_transfer_file(const struct cred *from, -+ const struct cred *to, - struct file *file) - { - return 0; -@@ -1041,6 +1043,11 @@ static inline void security_transfer_creds(struct cred *new, - { - } + lockdep_assert_held(&cgroup_mutex); -+static inline void security_cred_getsecid(const struct cred *c, u32 *secid) -+{ -+ *secid = 0; -+} +@@ -1756,8 +1759,28 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) + /* can't move between two non-dummy roots either */ + if (ss->root != &cgrp_dfl_root && dst_root != &cgrp_dfl_root) + return -EBUSY; + - static inline int security_kernel_act_as(struct cred *cred, u32 secid) - { - return 0; -diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h -index dd99569595fd3..0b429111f85e4 100644 ---- a/include/linux/seq_file.h -+++ b/include/linux/seq_file.h -@@ -194,7 +194,7 @@ static const struct file_operations __name ## _fops = { \ - #define DEFINE_PROC_SHOW_ATTRIBUTE(__name) \ - static int __name ## _open(struct inode *inode, struct file *file) \ - { \ -- return single_open(file, __name ## _show, inode->i_private); \ -+ return single_open(file, __name ## _show, PDE_DATA(inode)); \ - } \ - \ - static const struct proc_ops __name ## _proc_ops = { \ -@@ -261,6 +261,10 @@ extern struct list_head *seq_list_start_head(struct list_head *head, - extern struct list_head *seq_list_next(void *v, struct list_head *head, - loff_t *ppos); ++ /* ++ * Collect ssid's that need to be disabled from default ++ * hierarchy. ++ */ ++ if (ss->root == &cgrp_dfl_root) ++ dfl_disable_ss_mask |= 1 << ssid; ++ + } while_each_subsys_mask(); -+extern struct list_head *seq_list_start_rcu(struct list_head *head, loff_t pos); -+extern struct list_head *seq_list_start_head_rcu(struct list_head *head, loff_t pos); -+extern struct list_head *seq_list_next_rcu(void *v, struct list_head *head, loff_t *ppos); ++ if (dfl_disable_ss_mask) { ++ struct cgroup *scgrp = &cgrp_dfl_root.cgrp; + - /* - * Helpers for iteration over hlist_head-s in seq_files - */ -diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h -index 5db211f43b29e..68abc6bdd8914 100644 ---- a/include/linux/serial_8250.h -+++ b/include/linux/serial_8250.h -@@ -74,6 +74,7 @@ struct uart_8250_port; - struct uart_8250_ops { - int (*setup_irq)(struct uart_8250_port *); - void (*release_irq)(struct uart_8250_port *); -+ void (*setup_timer)(struct uart_8250_port *); - }; ++ /* ++ * Controllers from default hierarchy that need to be rebound ++ * are all disabled together in one go. ++ */ ++ cgrp_dfl_root.subsys_mask &= ~dfl_disable_ss_mask; ++ WARN_ON(cgroup_apply_control(scgrp)); ++ cgroup_finalize_control(scgrp, 0); ++ } ++ + do_each_subsys_mask(ss, ssid, ss_mask) { + struct cgroup_root *src_root = ss->root; + struct cgroup *scgrp = &src_root->cgrp; +@@ -1766,10 +1789,12 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) - struct uart_8250_em485 { -diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h -index c58cc142d23f4..86fa187f6d65e 100644 ---- a/include/linux/serial_core.h -+++ b/include/linux/serial_core.h -@@ -100,7 +100,7 @@ struct uart_icount { - __u32 buf_overrun; - }; + WARN_ON(!css || cgroup_css(dcgrp, ss)); --typedef unsigned int __bitwise upf_t; -+typedef u64 __bitwise upf_t; - typedef unsigned int __bitwise upstat_t; +- /* disable from the source */ +- src_root->subsys_mask &= ~(1 << ssid); +- WARN_ON(cgroup_apply_control(scgrp)); +- cgroup_finalize_control(scgrp, 0); ++ if (src_root != &cgrp_dfl_root) { ++ /* disable from the source */ ++ src_root->subsys_mask &= ~(1 << ssid); ++ WARN_ON(cgroup_apply_control(scgrp)); ++ cgroup_finalize_control(scgrp, 0); ++ } - struct uart_port { -@@ -207,6 +207,7 @@ struct uart_port { - #define UPF_FIXED_PORT ((__force upf_t) (1 << 29)) - #define UPF_DEAD ((__force upf_t) (1 << 30)) - #define UPF_IOREMAP ((__force upf_t) (1 << 31)) -+#define UPF_FULL_PROBE ((__force upf_t) (1ULL << 32)) + /* rebind */ + RCU_INIT_POINTER(scgrp->subsys[ssid], NULL); +@@ -1785,6 +1810,7 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) - #define __UPF_CHANGE_MASK 0x17fff - #define UPF_CHANGE_MASK ((__force upf_t) __UPF_CHANGE_MASK) -@@ -300,6 +301,23 @@ struct uart_state { - /* number of characters left in xmit buffer before we ask for more */ - #define WAKEUP_CHARS 256 + if (ss->css_rstat_flush) { + list_del_rcu(&css->rstat_css_node); ++ synchronize_rcu(); + list_add_rcu(&css->rstat_css_node, + &dcgrp->rstat_css_list); + } +@@ -2319,6 +2345,47 @@ int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen) + } + EXPORT_SYMBOL_GPL(task_cgroup_path); +/** -+ * uart_xmit_advance - Advance xmit buffer and account Tx'ed chars -+ * @up: uart_port structure describing the port -+ * @chars: number of characters sent ++ * cgroup_attach_lock - Lock for ->attach() ++ * @lock_threadgroup: whether to down_write cgroup_threadgroup_rwsem + * -+ * This function advances the tail of circular xmit buffer by the number of -+ * @chars transmitted and handles accounting of transmitted bytes (into -+ * @up's icount.tx). ++ * cgroup migration sometimes needs to stabilize threadgroups against forks and ++ * exits by write-locking cgroup_threadgroup_rwsem. However, some ->attach() ++ * implementations (e.g. cpuset), also need to disable CPU hotplug. ++ * Unfortunately, letting ->attach() operations acquire cpus_read_lock() can ++ * lead to deadlocks. ++ * ++ * Bringing up a CPU may involve creating and destroying tasks which requires ++ * read-locking threadgroup_rwsem, so threadgroup_rwsem nests inside ++ * cpus_read_lock(). If we call an ->attach() which acquires the cpus lock while ++ * write-locking threadgroup_rwsem, the locking order is reversed and we end up ++ * waiting for an on-going CPU hotplug operation which in turn is waiting for ++ * the threadgroup_rwsem to be released to create new tasks. For more details: ++ * ++ * http://lkml.kernel.org/r/20220711174629.uehfmqegcwn2lqzu@wubuntu ++ * ++ * Resolve the situation by always acquiring cpus_read_lock() before optionally ++ * write-locking cgroup_threadgroup_rwsem. This allows ->attach() to assume that ++ * CPU hotplug is disabled on entry. + */ -+static inline void uart_xmit_advance(struct uart_port *up, unsigned int chars) ++static void cgroup_attach_lock(bool lock_threadgroup) +{ -+ struct circ_buf *xmit = &up->state->xmit; -+ -+ xmit->tail = (xmit->tail + chars) & (UART_XMIT_SIZE - 1); -+ up->icount.tx += chars; ++ cpus_read_lock(); ++ if (lock_threadgroup) ++ percpu_down_write(&cgroup_threadgroup_rwsem); +} + - struct module; - struct tty_driver; - -@@ -388,6 +406,11 @@ static const bool earlycon_acpi_spcr_enable EARLYCON_USED_OR_UNUSED; - static inline int setup_earlycon(char *buf) { return 0; } - #endif - -+static inline bool uart_console_enabled(struct uart_port *port) ++/** ++ * cgroup_attach_unlock - Undo cgroup_attach_lock() ++ * @lock_threadgroup: whether to up_write cgroup_threadgroup_rwsem ++ */ ++static void cgroup_attach_unlock(bool lock_threadgroup) +{ -+ return uart_console(port) && (port->cons->flags & CON_ENABLED); ++ if (lock_threadgroup) ++ percpu_up_write(&cgroup_threadgroup_rwsem); ++ cpus_read_unlock(); +} + - struct uart_port *uart_get_console(struct uart_port *ports, int nr, - struct console *c); - int uart_parse_earlycon(char *p, unsigned char *iotype, resource_size_t *addr, -@@ -458,6 +481,8 @@ extern void uart_handle_cts_change(struct uart_port *uport, - extern void uart_insert_char(struct uart_port *port, unsigned int status, - unsigned int overrun, unsigned int ch, unsigned int flag); + /** + * cgroup_migrate_add_task - add a migration target task to a migration context + * @task: target task +@@ -2573,21 +2640,27 @@ int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp) + */ + void cgroup_migrate_finish(struct cgroup_mgctx *mgctx) + { +- LIST_HEAD(preloaded); + struct css_set *cset, *tmp_cset; -+void uart_xchar_out(struct uart_port *uport, int offset); -+ - #ifdef CONFIG_MAGIC_SYSRQ_SERIAL - #define SYSRQ_TIMEOUT (HZ * 5) + lockdep_assert_held(&cgroup_mutex); -diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h -index 34cb28b8f16ca..a70b2bdbf4d96 100644 ---- a/include/linux/signal_types.h -+++ b/include/linux/signal_types.h -@@ -70,6 +70,9 @@ struct ksignal { - int sig; - }; + spin_lock_irq(&css_set_lock); -+/* Used to kill the race between sigaction and forced signals */ -+#define SA_IMMUTABLE 0x00800000 -+ - #ifndef __ARCH_UAPI_SA_FLAGS - #ifdef SA_RESTORER - #define __ARCH_UAPI_SA_FLAGS SA_RESTORER -diff --git a/include/linux/siphash.h b/include/linux/siphash.h -index bf21591a9e5e6..0bb5ecd507bef 100644 ---- a/include/linux/siphash.h -+++ b/include/linux/siphash.h -@@ -27,9 +27,7 @@ static inline bool siphash_key_is_zero(const siphash_key_t *key) - } +- list_splice_tail_init(&mgctx->preloaded_src_csets, &preloaded); +- list_splice_tail_init(&mgctx->preloaded_dst_csets, &preloaded); ++ list_for_each_entry_safe(cset, tmp_cset, &mgctx->preloaded_src_csets, ++ mg_src_preload_node) { ++ cset->mg_src_cgrp = NULL; ++ cset->mg_dst_cgrp = NULL; ++ cset->mg_dst_cset = NULL; ++ list_del_init(&cset->mg_src_preload_node); ++ put_css_set_locked(cset); ++ } - u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key); --#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key); --#endif +- list_for_each_entry_safe(cset, tmp_cset, &preloaded, mg_preload_node) { ++ list_for_each_entry_safe(cset, tmp_cset, &mgctx->preloaded_dst_csets, ++ mg_dst_preload_node) { + cset->mg_src_cgrp = NULL; + cset->mg_dst_cgrp = NULL; + cset->mg_dst_cset = NULL; +- list_del_init(&cset->mg_preload_node); ++ list_del_init(&cset->mg_dst_preload_node); + put_css_set_locked(cset); + } - u64 siphash_1u64(const u64 a, const siphash_key_t *key); - u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t *key); -@@ -82,10 +80,9 @@ static inline u64 ___siphash_aligned(const __le64 *data, size_t len, - static inline u64 siphash(const void *data, size_t len, - const siphash_key_t *key) - { --#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS -- if (!IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT)) -+ if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || -+ !IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT)) - return __siphash_unaligned(data, len, key); --#endif - return ___siphash_aligned(data, len, key); - } +@@ -2629,7 +2702,7 @@ void cgroup_migrate_add_src(struct css_set *src_cset, -@@ -96,10 +93,8 @@ typedef struct { + src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root); - u32 __hsiphash_aligned(const void *data, size_t len, - const hsiphash_key_t *key); --#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - u32 __hsiphash_unaligned(const void *data, size_t len, - const hsiphash_key_t *key); --#endif +- if (!list_empty(&src_cset->mg_preload_node)) ++ if (!list_empty(&src_cset->mg_src_preload_node)) + return; - u32 hsiphash_1u32(const u32 a, const hsiphash_key_t *key); - u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t *key); -@@ -135,11 +130,38 @@ static inline u32 ___hsiphash_aligned(const __le32 *data, size_t len, - static inline u32 hsiphash(const void *data, size_t len, - const hsiphash_key_t *key) - { --#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS -- if (!IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT)) -+ if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || -+ !IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT)) - return __hsiphash_unaligned(data, len, key); --#endif - return ___hsiphash_aligned(data, len, key); + WARN_ON(src_cset->mg_src_cgrp); +@@ -2640,7 +2713,7 @@ void cgroup_migrate_add_src(struct css_set *src_cset, + src_cset->mg_src_cgrp = src_cgrp; + src_cset->mg_dst_cgrp = dst_cgrp; + get_css_set(src_cset); +- list_add_tail(&src_cset->mg_preload_node, &mgctx->preloaded_src_csets); ++ list_add_tail(&src_cset->mg_src_preload_node, &mgctx->preloaded_src_csets); } -+/* -+ * These macros expose the raw SipHash and HalfSipHash permutations. -+ * Do not use them directly! If you think you have a use for them, -+ * be sure to CC the maintainer of this file explaining why. -+ */ -+ -+#define SIPHASH_PERMUTATION(a, b, c, d) ( \ -+ (a) += (b), (b) = rol64((b), 13), (b) ^= (a), (a) = rol64((a), 32), \ -+ (c) += (d), (d) = rol64((d), 16), (d) ^= (c), \ -+ (a) += (d), (d) = rol64((d), 21), (d) ^= (a), \ -+ (c) += (b), (b) = rol64((b), 17), (b) ^= (c), (c) = rol64((c), 32)) -+ -+#define SIPHASH_CONST_0 0x736f6d6570736575ULL -+#define SIPHASH_CONST_1 0x646f72616e646f6dULL -+#define SIPHASH_CONST_2 0x6c7967656e657261ULL -+#define SIPHASH_CONST_3 0x7465646279746573ULL -+ -+#define HSIPHASH_PERMUTATION(a, b, c, d) ( \ -+ (a) += (b), (b) = rol32((b), 5), (b) ^= (a), (a) = rol32((a), 16), \ -+ (c) += (d), (d) = rol32((d), 8), (d) ^= (c), \ -+ (a) += (d), (d) = rol32((d), 7), (d) ^= (a), \ -+ (c) += (b), (b) = rol32((b), 13), (b) ^= (c), (c) = rol32((c), 16)) -+ -+#define HSIPHASH_CONST_0 0U -+#define HSIPHASH_CONST_1 0U -+#define HSIPHASH_CONST_2 0x6c796765U -+#define HSIPHASH_CONST_3 0x74656462U -+ - #endif /* _LINUX_SIPHASH_H */ -diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h -index 841e2f0f5240b..19e595cab23ac 100644 ---- a/include/linux/skbuff.h -+++ b/include/linux/skbuff.h -@@ -286,7 +286,10 @@ struct nf_bridge_info { - struct tc_skb_ext { - __u32 chain; - __u16 mru; -- bool post_ct; -+ __u16 zone; -+ u8 post_ct:1; -+ u8 post_ct_snat:1; -+ u8 post_ct_dnat:1; - }; - #endif - -@@ -301,6 +304,41 @@ struct sk_buff_head { + /** +@@ -2665,7 +2738,7 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx) - struct sk_buff; + /* look up the dst cset for each src cset and link it to src */ + list_for_each_entry_safe(src_cset, tmp_cset, &mgctx->preloaded_src_csets, +- mg_preload_node) { ++ mg_src_preload_node) { + struct css_set *dst_cset; + struct cgroup_subsys *ss; + int ssid; +@@ -2684,7 +2757,7 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx) + if (src_cset == dst_cset) { + src_cset->mg_src_cgrp = NULL; + src_cset->mg_dst_cgrp = NULL; +- list_del_init(&src_cset->mg_preload_node); ++ list_del_init(&src_cset->mg_src_preload_node); + put_css_set(src_cset); + put_css_set(dst_cset); + continue; +@@ -2692,8 +2765,8 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx) -+/* The reason of skb drop, which is used in kfree_skb_reason(). -+ * en...maybe they should be splited by group? -+ * -+ * Each item here should also be in 'TRACE_SKB_DROP_REASON', which is -+ * used to translate the reason to string. -+ */ -+enum skb_drop_reason { -+ SKB_DROP_REASON_NOT_SPECIFIED, /* drop reason is not specified */ -+ SKB_DROP_REASON_NO_SOCKET, /* socket not found */ -+ SKB_DROP_REASON_PKT_TOO_SMALL, /* packet size is too small */ -+ SKB_DROP_REASON_TCP_CSUM, /* TCP checksum error */ -+ SKB_DROP_REASON_SOCKET_FILTER, /* dropped by socket filter */ -+ SKB_DROP_REASON_UDP_CSUM, /* UDP checksum error */ -+ SKB_DROP_REASON_NETFILTER_DROP, /* dropped by netfilter */ -+ SKB_DROP_REASON_OTHERHOST, /* packet don't belong to current -+ * host (interface is in promisc -+ * mode) -+ */ -+ SKB_DROP_REASON_IP_CSUM, /* IP checksum error */ -+ SKB_DROP_REASON_IP_INHDR, /* there is something wrong with -+ * IP header (see -+ * IPSTATS_MIB_INHDRERRORS) -+ */ -+ SKB_DROP_REASON_IP_RPFILTER, /* IP rpfilter validate failed. -+ * see the document for rp_filter -+ * in ip-sysctl.rst for more -+ * information -+ */ -+ SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, /* destination address of L2 -+ * is multicast, but L3 is -+ * unicast. -+ */ -+ SKB_DROP_REASON_MAX, -+}; -+ - /* To allow 64K frame to be packed as single skb without frag_list we - * require 64K/PAGE_SIZE pages plus 1 additional page to allow for - * buffers which do not start on a page boundary. -@@ -687,6 +725,7 @@ typedef unsigned char *sk_buff_data_t; - * @csum_level: indicates the number of consecutive checksums found in - * the packet minus one that have been verified as - * CHECKSUM_UNNECESSARY (max 3) -+ * @scm_io_uring: SKB holds io_uring registered files - * @dst_pending_confirm: need to confirm neighbour - * @decrypted: Decrypted SKB - * @slow_gro: state present at GRO time, slower prepare step required -@@ -872,6 +911,7 @@ struct sk_buff { - __u8 decrypted:1; - #endif - __u8 slow_gro:1; -+ __u8 scm_io_uring:1; + src_cset->mg_dst_cset = dst_cset; - #ifdef CONFIG_NET_SCHED - __u16 tc_index; /* traffic control index */ -@@ -1071,8 +1111,18 @@ static inline bool skb_unref(struct sk_buff *skb) - return true; +- if (list_empty(&dst_cset->mg_preload_node)) +- list_add_tail(&dst_cset->mg_preload_node, ++ if (list_empty(&dst_cset->mg_dst_preload_node)) ++ list_add_tail(&dst_cset->mg_dst_preload_node, + &mgctx->preloaded_dst_csets); + else + put_css_set(dst_cset); +@@ -2789,8 +2862,7 @@ int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader, } -+void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason); -+ -+/** -+ * kfree_skb - free an sk_buff with 'NOT_SPECIFIED' reason -+ * @skb: buffer to free -+ */ -+static inline void kfree_skb(struct sk_buff *skb) -+{ -+ kfree_skb_reason(skb, SKB_DROP_REASON_NOT_SPECIFIED); -+} -+ - void skb_release_head_state(struct sk_buff *skb); --void kfree_skb(struct sk_buff *skb); - void kfree_skb_list(struct sk_buff *segs); - void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt); - void skb_tx_error(struct sk_buff *skb); -@@ -1370,7 +1420,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb, - struct flow_dissector *flow_dissector, - void *target_container, - u16 *ctinfo_map, size_t mapsize, -- bool post_ct); -+ bool post_ct, u16 zone); - void - skb_flow_dissect_tunnel_info(const struct sk_buff *skb, - struct flow_dissector *flow_dissector, -@@ -1433,6 +1483,11 @@ static inline unsigned int skb_end_offset(const struct sk_buff *skb) + struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup, +- bool *locked) +- __acquires(&cgroup_threadgroup_rwsem) ++ bool *threadgroup_locked) { - return skb->end; + struct task_struct *tsk; + pid_t pid; +@@ -2807,12 +2879,8 @@ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup, + * Therefore, we can skip the global lock. + */ + lockdep_assert_held(&cgroup_mutex); +- if (pid || threadgroup) { +- percpu_down_write(&cgroup_threadgroup_rwsem); +- *locked = true; +- } else { +- *locked = false; +- } ++ *threadgroup_locked = pid || threadgroup; ++ cgroup_attach_lock(*threadgroup_locked); + + rcu_read_lock(); + if (pid) { +@@ -2843,17 +2911,14 @@ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup, + goto out_unlock_rcu; + + out_unlock_threadgroup: +- if (*locked) { +- percpu_up_write(&cgroup_threadgroup_rwsem); +- *locked = false; +- } ++ cgroup_attach_unlock(*threadgroup_locked); ++ *threadgroup_locked = false; + out_unlock_rcu: + rcu_read_unlock(); + return tsk; } -+ -+static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset) -+{ -+ skb->end = offset; -+} - #else - static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) - { -@@ -1443,6 +1498,11 @@ static inline unsigned int skb_end_offset(const struct sk_buff *skb) + +-void cgroup_procs_write_finish(struct task_struct *task, bool locked) +- __releases(&cgroup_threadgroup_rwsem) ++void cgroup_procs_write_finish(struct task_struct *task, bool threadgroup_locked) { - return skb->end - skb->head; - } + struct cgroup_subsys *ss; + int ssid; +@@ -2861,8 +2926,8 @@ void cgroup_procs_write_finish(struct task_struct *task, bool locked) + /* release reference from cgroup_procs_write_start() */ + put_task_struct(task); + +- if (locked) +- percpu_up_write(&cgroup_threadgroup_rwsem); ++ cgroup_attach_unlock(threadgroup_locked); + -+static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset) -+{ -+ skb->end = skb->head + offset; -+} - #endif + for_each_subsys(ss, ssid) + if (ss->post_attach) + ss->post_attach(); +@@ -2917,12 +2982,11 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) + struct cgroup_subsys_state *d_css; + struct cgroup *dsct; + struct css_set *src_cset; ++ bool has_tasks; + int ret; - /* Internal */ -@@ -1671,6 +1731,22 @@ static inline int skb_unclone(struct sk_buff *skb, gfp_t pri) - return 0; - } + lockdep_assert_held(&cgroup_mutex); -+/* This variant of skb_unclone() makes sure skb->truesize -+ * and skb_end_offset() are not changed, whenever a new skb->head is needed. -+ * -+ * Indeed there is no guarantee that ksize(kmalloc(X)) == ksize(kmalloc(X)) -+ * when various debugging features are in place. -+ */ -+int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri); -+static inline int skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) -+{ -+ might_sleep_if(gfpflags_allow_blocking(pri)); -+ -+ if (skb_cloned(skb)) -+ return __skb_unclone_keeptruesize(skb, pri); -+ return 0; -+} -+ - /** - * skb_header_cloned - is the header a clone - * @skb: buffer to check -@@ -2158,6 +2234,22 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb) - return skb_headlen(skb) + __skb_pagelen(skb); - } +- percpu_down_write(&cgroup_threadgroup_rwsem); +- + /* look up all csses currently attached to @cgrp's subtree */ + spin_lock_irq(&css_set_lock); + cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) { +@@ -2933,13 +2997,23 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) + } + spin_unlock_irq(&css_set_lock); -+static inline void __skb_fill_page_desc_noacc(struct skb_shared_info *shinfo, -+ int i, struct page *page, -+ int off, int size) -+{ -+ skb_frag_t *frag = &shinfo->frags[i]; -+ + /* -+ * Propagate page pfmemalloc to the skb if we can. The problem is -+ * that not all callers have unique ownership of the page but rely -+ * on page_is_pfmemalloc doing the right thing(tm). ++ * We need to write-lock threadgroup_rwsem while migrating tasks. ++ * However, if there are no source csets for @cgrp, changing its ++ * controllers isn't gonna produce any task migrations and the ++ * write-locking can be skipped safely. + */ -+ frag->bv_page = page; -+ frag->bv_offset = off; -+ skb_frag_size_set(frag, size); -+} ++ has_tasks = !list_empty(&mgctx.preloaded_src_csets); ++ cgroup_attach_lock(has_tasks); + - /** - * __skb_fill_page_desc - initialise a paged fragment in an skb - * @skb: buffer containing fragment to be initialised -@@ -2174,17 +2266,7 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb) - static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, - struct page *page, int off, int size) - { -- skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; -- -- /* -- * Propagate page pfmemalloc to the skb if we can. The problem is -- * that not all callers have unique ownership of the page but rely -- * on page_is_pfmemalloc doing the right thing(tm). -- */ -- frag->bv_page = page; -- frag->bv_offset = off; -- skb_frag_size_set(frag, size); -- -+ __skb_fill_page_desc_noacc(skb_shinfo(skb), i, page, off, size); - page = compound_head(page); - if (page_is_pfmemalloc(page)) - skb->pfmemalloc = true; -@@ -2211,6 +2293,27 @@ static inline void skb_fill_page_desc(struct sk_buff *skb, int i, - skb_shinfo(skb)->nr_frags = i + 1; - } + /* NULL dst indicates self on default hierarchy */ + ret = cgroup_migrate_prepare_dst(&mgctx); + if (ret) + goto out_finish; -+/** -+ * skb_fill_page_desc_noacc - initialise a paged fragment in an skb -+ * @skb: buffer containing fragment to be initialised -+ * @i: paged fragment index to initialise -+ * @page: the page to use for this fragment -+ * @off: the offset to the data with @page -+ * @size: the length of the data -+ * -+ * Variant of skb_fill_page_desc() which does not deal with -+ * pfmemalloc, if page is not owned by us. -+ */ -+static inline void skb_fill_page_desc_noacc(struct sk_buff *skb, int i, -+ struct page *page, int off, -+ int size) -+{ -+ struct skb_shared_info *shinfo = skb_shinfo(skb); -+ -+ __skb_fill_page_desc_noacc(shinfo, i, page, off, size); -+ shinfo->nr_frags = i + 1; -+} -+ - void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, - int size, unsigned int truesize); + spin_lock_irq(&css_set_lock); +- list_for_each_entry(src_cset, &mgctx.preloaded_src_csets, mg_preload_node) { ++ list_for_each_entry(src_cset, &mgctx.preloaded_src_csets, ++ mg_src_preload_node) { + struct task_struct *task, *ntask; -@@ -2254,6 +2357,14 @@ static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset) + /* all tasks in src_csets need to be migrated */ +@@ -2951,7 +3025,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) + ret = cgroup_migrate_execute(&mgctx); + out_finish: + cgroup_migrate_finish(&mgctx); +- percpu_up_write(&cgroup_threadgroup_rwsem); ++ cgroup_attach_unlock(has_tasks); + return ret; + } - #endif /* NET_SKBUFF_DATA_USES_OFFSET */ +@@ -3607,6 +3681,7 @@ static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v) + static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, enum psi_res res) + { ++ struct cgroup_file_ctx *ctx = of->priv; + struct psi_trigger *new; + struct cgroup *cgrp; + struct psi_group *psi; +@@ -3618,6 +3693,12 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, + cgroup_get(cgrp); + cgroup_kn_unlock(of->kn); -+static inline void skb_assert_len(struct sk_buff *skb) -+{ -+#ifdef CONFIG_DEBUG_NET -+ if (WARN_ONCE(!skb->len, "%s\n", __func__)) -+ DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false); -+#endif /* CONFIG_DEBUG_NET */ -+} ++ /* Allow only one trigger per file descriptor */ ++ if (ctx->psi.trigger) { ++ cgroup_put(cgrp); ++ return -EBUSY; ++ } + - /* - * Add data to an sk_buff - */ -diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h -index 1ce9a9eb223b6..0c742cdf413c0 100644 ---- a/include/linux/skmsg.h -+++ b/include/linux/skmsg.h -@@ -283,7 +283,8 @@ static inline void sk_msg_sg_copy_clear(struct sk_msg *msg, u32 start) + psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi; + new = psi_trigger_create(psi, buf, nbytes, res); + if (IS_ERR(new)) { +@@ -3625,8 +3706,7 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, + return PTR_ERR(new); + } - static inline struct sk_psock *sk_psock(const struct sock *sk) - { -- return rcu_dereference_sk_user_data(sk); -+ return __rcu_dereference_sk_user_data_with_flags(sk, -+ SK_USER_DATA_PSOCK); - } +- psi_trigger_replace(&of->priv, new); +- ++ smp_store_release(&ctx->psi.trigger, new); + cgroup_put(cgrp); - static inline void sk_psock_set_state(struct sk_psock *psock, -@@ -310,21 +311,16 @@ static inline void sock_drop(struct sock *sk, struct sk_buff *skb) - kfree_skb(skb); + return nbytes; +@@ -3656,12 +3736,16 @@ static ssize_t cgroup_cpu_pressure_write(struct kernfs_open_file *of, + static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of, + poll_table *pt) + { +- return psi_trigger_poll(&of->priv, of->file, pt); ++ struct cgroup_file_ctx *ctx = of->priv; ++ ++ return psi_trigger_poll(&ctx->psi.trigger, of->file, pt); } --static inline void drop_sk_msg(struct sk_psock *psock, struct sk_msg *msg) --{ -- if (msg->skb) -- sock_drop(psock->sk, msg->skb); -- kfree(msg); --} -- - static inline void sk_psock_queue_msg(struct sk_psock *psock, - struct sk_msg *msg) + static void cgroup_pressure_release(struct kernfs_open_file *of) { - spin_lock_bh(&psock->ingress_lock); - if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) - list_add_tail(&msg->list, &psock->ingress_msg); -- else -- drop_sk_msg(psock, msg); -+ else { -+ sk_msg_free(psock->sk, msg); -+ kfree(msg); -+ } - spin_unlock_bh(&psock->ingress_lock); +- psi_trigger_replace(&of->priv, NULL); ++ struct cgroup_file_ctx *ctx = of->priv; ++ ++ psi_trigger_destroy(ctx->psi.trigger); } -@@ -509,8 +505,22 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock) - - #if IS_ENABLED(CONFIG_NET_SOCK_MSG) + bool cgroup_psi_enabled(void) +@@ -3788,24 +3872,43 @@ static ssize_t cgroup_kill_write(struct kernfs_open_file *of, char *buf, + static int cgroup_file_open(struct kernfs_open_file *of) + { + struct cftype *cft = of_cft(of); ++ struct cgroup_file_ctx *ctx; ++ int ret; --/* We only have one bit so far. */ --#define BPF_F_PTR_MASK ~(BPF_F_INGRESS) -+#define BPF_F_STRPARSER (1UL << 1) -+ -+/* We only have two bits so far. */ -+#define BPF_F_PTR_MASK ~(BPF_F_INGRESS | BPF_F_STRPARSER) +- if (cft->open) +- return cft->open(of); +- return 0; ++ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); ++ if (!ctx) ++ return -ENOMEM; + -+static inline bool skb_bpf_strparser(const struct sk_buff *skb) -+{ -+ unsigned long sk_redir = skb->_sk_redir; ++ ctx->ns = current->nsproxy->cgroup_ns; ++ get_cgroup_ns(ctx->ns); ++ of->priv = ctx; + -+ return sk_redir & BPF_F_STRPARSER; -+} ++ if (!cft->open) ++ return 0; + -+static inline void skb_bpf_set_strparser(struct sk_buff *skb) -+{ -+ skb->_sk_redir |= BPF_F_STRPARSER; -+} ++ ret = cft->open(of); ++ if (ret) { ++ put_cgroup_ns(ctx->ns); ++ kfree(ctx); ++ } ++ return ret; + } - static inline bool skb_bpf_ingress(const struct sk_buff *skb) + static void cgroup_file_release(struct kernfs_open_file *of) { -diff --git a/include/linux/soc/qcom/qcom_aoss.h b/include/linux/soc/qcom/qcom_aoss.h -new file mode 100644 -index 0000000000000..3c2a82e606f81 ---- /dev/null -+++ b/include/linux/soc/qcom/qcom_aoss.h -@@ -0,0 +1,38 @@ -+/* SPDX-License-Identifier: GPL-2.0-only */ -+/* -+ * Copyright (c) 2021, The Linux Foundation. All rights reserved. -+ */ -+ -+#ifndef __QCOM_AOSS_H__ -+#define __QCOM_AOSS_H__ -+ -+#include <linux/err.h> -+#include <linux/device.h> -+ -+struct qmp; -+ -+#if IS_ENABLED(CONFIG_QCOM_AOSS_QMP) -+ -+int qmp_send(struct qmp *qmp, const void *data, size_t len); -+struct qmp *qmp_get(struct device *dev); -+void qmp_put(struct qmp *qmp); -+ -+#else -+ -+static inline int qmp_send(struct qmp *qmp, const void *data, size_t len) -+{ -+ return -ENODEV; -+} -+ -+static inline struct qmp *qmp_get(struct device *dev) -+{ -+ return ERR_PTR(-ENODEV); -+} -+ -+static inline void qmp_put(struct qmp *qmp) -+{ -+} -+ -+#endif -+ -+#endif -diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h -index 0aad7009b50e6..bd0d11af76c5e 100644 ---- a/include/linux/soc/ti/ti_sci_protocol.h -+++ b/include/linux/soc/ti/ti_sci_protocol.h -@@ -645,7 +645,7 @@ devm_ti_sci_get_of_resource(const struct ti_sci_handle *handle, + struct cftype *cft = of_cft(of); ++ struct cgroup_file_ctx *ctx = of->priv; - static inline struct ti_sci_resource * - devm_ti_sci_get_resource(const struct ti_sci_handle *handle, struct device *dev, -- u32 dev_id, u32 sub_type); -+ u32 dev_id, u32 sub_type) + if (cft->release) + cft->release(of); ++ put_cgroup_ns(ctx->ns); ++ kfree(ctx); + } + + static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) { - return ERR_PTR(-EINVAL); +- struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; ++ struct cgroup_file_ctx *ctx = of->priv; + struct cgroup *cgrp = of->kn->parent->priv; + struct cftype *cft = of_cft(of); + struct cgroup_subsys_state *css; +@@ -3822,7 +3925,7 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, + */ + if ((cgrp->root->flags & CGRP_ROOT_NS_DELEGATE) && + !(cft->flags & CFTYPE_NS_DELEGATABLE) && +- ns != &init_cgroup_ns && ns->root_cset->dfl_cgrp == cgrp) ++ ctx->ns != &init_cgroup_ns && ctx->ns->root_cset->dfl_cgrp == cgrp) + return -EPERM; + + if (cft->write) +@@ -4728,21 +4831,21 @@ void css_task_iter_end(struct css_task_iter *it) + + static void cgroup_procs_release(struct kernfs_open_file *of) + { +- if (of->priv) { +- css_task_iter_end(of->priv); +- kfree(of->priv); +- } ++ struct cgroup_file_ctx *ctx = of->priv; ++ ++ if (ctx->procs.started) ++ css_task_iter_end(&ctx->procs.iter); } -diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h -index 76ce3f3ac0f22..bf6f0decb3f6d 100644 ---- a/include/linux/soundwire/sdw.h -+++ b/include/linux/soundwire/sdw.h -@@ -646,9 +646,6 @@ struct sdw_slave_ops { - * @dev_num: Current Device Number, values can be 0 or dev_num_sticky - * @dev_num_sticky: one-time static Device Number assigned by Bus - * @probed: boolean tracking driver state -- * @probe_complete: completion utility to control potential races -- * on startup between driver probe/initialization and SoundWire -- * Slave state changes/implementation-defined interrupts - * @enumeration_complete: completion utility to control potential races - * on startup between device enumeration and read/write access to the - * Slave device -@@ -663,6 +660,7 @@ struct sdw_slave_ops { - * for a Slave happens for the first time after enumeration - * @is_mockup_device: status flag used to squelch errors in the command/control - * protocol for SoundWire mockup devices -+ * @sdw_dev_lock: mutex used to protect callbacks/remove races - */ - struct sdw_slave { - struct sdw_slave_id id; -@@ -680,12 +678,12 @@ struct sdw_slave { - u16 dev_num; - u16 dev_num_sticky; - bool probed; -- struct completion probe_complete; - struct completion enumeration_complete; - struct completion initialization_complete; - u32 unattach_request; - bool first_interrupt_done; - bool is_mockup_device; -+ struct mutex sdw_dev_lock; /* protect callbacks/remove races */ - }; - #define dev_to_sdw_dev(_dev) container_of(_dev, struct sdw_slave, dev) -diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h -index 6bb4bc1a5f545..22919a94ca19d 100644 ---- a/include/linux/stackdepot.h -+++ b/include/linux/stackdepot.h -@@ -19,8 +19,6 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries, - unsigned int stack_depot_fetch(depot_stack_handle_t handle, - unsigned long **entries); + static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos) + { + struct kernfs_open_file *of = s->private; +- struct css_task_iter *it = of->priv; ++ struct cgroup_file_ctx *ctx = of->priv; --unsigned int filter_irq_stacks(unsigned long *entries, unsigned int nr_entries); -- - #ifdef CONFIG_STACKDEPOT - int stack_depot_init(void); - #else -diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h -index 9edecb494e9e2..bef158815e83d 100644 ---- a/include/linux/stacktrace.h -+++ b/include/linux/stacktrace.h -@@ -21,6 +21,7 @@ unsigned int stack_trace_save_tsk(struct task_struct *task, - unsigned int stack_trace_save_regs(struct pt_regs *regs, unsigned long *store, - unsigned int size, unsigned int skipnr); - unsigned int stack_trace_save_user(unsigned long *store, unsigned int size); -+unsigned int filter_irq_stacks(unsigned long *entries, unsigned int nr_entries); + if (pos) + (*pos)++; - /* Internal interfaces. Do not use in generic code */ - #ifdef CONFIG_ARCH_STACKWALK -diff --git a/include/linux/static_call.h b/include/linux/static_call.h -index 3e56a9751c062..fcc5b48989b3c 100644 ---- a/include/linux/static_call.h -+++ b/include/linux/static_call.h -@@ -248,10 +248,7 @@ static inline int static_call_text_reserved(void *start, void *end) - return 0; +- return css_task_iter_next(it); ++ return css_task_iter_next(&ctx->procs.iter); } --static inline long __static_call_return0(void) --{ -- return 0; --} -+extern long __static_call_return0(void); + static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, +@@ -4750,21 +4853,18 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, + { + struct kernfs_open_file *of = s->private; + struct cgroup *cgrp = seq_css(s)->cgroup; +- struct css_task_iter *it = of->priv; ++ struct cgroup_file_ctx *ctx = of->priv; ++ struct css_task_iter *it = &ctx->procs.iter; - #define EXPORT_STATIC_CALL(name) \ - EXPORT_SYMBOL(STATIC_CALL_KEY(name)); \ -diff --git a/include/linux/stddef.h b/include/linux/stddef.h -index 998a4ba28eba4..31fdbb784c24e 100644 ---- a/include/linux/stddef.h -+++ b/include/linux/stddef.h -@@ -36,4 +36,65 @@ enum { - #define offsetofend(TYPE, MEMBER) \ - (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) + /* + * When a seq_file is seeked, it's always traversed sequentially + * from position 0, so we can simply keep iterating on !0 *pos. + */ +- if (!it) { ++ if (!ctx->procs.started) { + if (WARN_ON_ONCE((*pos))) + return ERR_PTR(-EINVAL); +- +- it = kzalloc(sizeof(*it), GFP_KERNEL); +- if (!it) +- return ERR_PTR(-ENOMEM); +- of->priv = it; + css_task_iter_start(&cgrp->self, iter_flags, it); ++ ctx->procs.started = true; + } else if (!(*pos)) { + css_task_iter_end(it); + css_task_iter_start(&cgrp->self, iter_flags, it); +@@ -4815,9 +4915,9 @@ static int cgroup_may_write(const struct cgroup *cgrp, struct super_block *sb) -+/** -+ * struct_group() - Wrap a set of declarations in a mirrored struct -+ * -+ * @NAME: The identifier name of the mirrored sub-struct -+ * @MEMBERS: The member declarations for the mirrored structs -+ * -+ * Used to create an anonymous union of two structs with identical -+ * layout and size: one anonymous and one named. The former can be -+ * used normally without sub-struct naming, and the latter can be -+ * used to reason about the start, end, and size of the group of -+ * struct members. -+ */ -+#define struct_group(NAME, MEMBERS...) \ -+ __struct_group(/* no tag */, NAME, /* no attrs */, MEMBERS) -+ -+/** -+ * struct_group_attr() - Create a struct_group() with trailing attributes -+ * -+ * @NAME: The identifier name of the mirrored sub-struct -+ * @ATTRS: Any struct attributes to apply -+ * @MEMBERS: The member declarations for the mirrored structs -+ * -+ * Used to create an anonymous union of two structs with identical -+ * layout and size: one anonymous and one named. The former can be -+ * used normally without sub-struct naming, and the latter can be -+ * used to reason about the start, end, and size of the group of -+ * struct members. Includes structure attributes argument. -+ */ -+#define struct_group_attr(NAME, ATTRS, MEMBERS...) \ -+ __struct_group(/* no tag */, NAME, ATTRS, MEMBERS) -+ -+/** -+ * struct_group_tagged() - Create a struct_group with a reusable tag -+ * -+ * @TAG: The tag name for the named sub-struct -+ * @NAME: The identifier name of the mirrored sub-struct -+ * @MEMBERS: The member declarations for the mirrored structs -+ * -+ * Used to create an anonymous union of two structs with identical -+ * layout and size: one anonymous and one named. The former can be -+ * used normally without sub-struct naming, and the latter can be -+ * used to reason about the start, end, and size of the group of -+ * struct members. Includes struct tag argument for the named copy, -+ * so the specified layout can be reused later. -+ */ -+#define struct_group_tagged(TAG, NAME, MEMBERS...) \ -+ __struct_group(TAG, NAME, /* no attrs */, MEMBERS) -+ -+/** -+ * DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union -+ * -+ * @TYPE: The type of each flexible array element -+ * @NAME: The name of the flexible array member -+ * -+ * In order to have a flexible array member in a union or alone in a -+ * struct, it needs to be wrapped in an anonymous struct with at least 1 -+ * named member, but that member can be empty. -+ */ -+#define DECLARE_FLEX_ARRAY(TYPE, NAME) \ -+ __DECLARE_FLEX_ARRAY(TYPE, NAME) -+ - #endif -diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h -index a6f03b36fc4f7..48d015ed21752 100644 ---- a/include/linux/stmmac.h -+++ b/include/linux/stmmac.h -@@ -233,6 +233,7 @@ struct plat_stmmacenet_data { - int (*clks_config)(void *priv, bool enabled); - int (*crosststamp)(ktime_t *device, struct system_counterval_t *system, - void *ctx); -+ void (*dump_debug_regs)(void *priv); - void *bsp_priv; - struct clk *stmmac_clk; - struct clk *pclk; -@@ -268,5 +269,6 @@ struct plat_stmmacenet_data { - int msi_rx_base_vec; - int msi_tx_base_vec; - bool use_phy_wol; -+ bool sph_disable; - }; - #endif -diff --git a/include/linux/string.h b/include/linux/string.h -index 5e96d656be7ae..d68097b4f600b 100644 ---- a/include/linux/string.h -+++ b/include/linux/string.h -@@ -262,23 +262,8 @@ void __write_overflow(void) __compiletime_error("detected write beyond size of o - #include <linux/fortify-string.h> - #endif + static int cgroup_procs_write_permission(struct cgroup *src_cgrp, + struct cgroup *dst_cgrp, +- struct super_block *sb) ++ struct super_block *sb, ++ struct cgroup_namespace *ns) + { +- struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; + struct cgroup *com_cgrp = src_cgrp; + int ret; --/** -- * memcpy_and_pad - Copy one buffer to another with padding -- * @dest: Where to copy to -- * @dest_len: The destination buffer size -- * @src: Where to copy from -- * @count: The number of bytes to copy -- * @pad: Character to use for padding if space is left in destination. -- */ --static inline void memcpy_and_pad(void *dest, size_t dest_len, -- const void *src, size_t count, int pad) --{ -- if (dest_len > count) { -- memcpy(dest, src, count); -- memset(dest + count, pad, dest_len - count); -- } else -- memcpy(dest, src, dest_len); --} -+void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, -+ int pad); +@@ -4846,11 +4946,12 @@ static int cgroup_procs_write_permission(struct cgroup *src_cgrp, - /** - * str_has_prefix - Test if a string has a given prefix -diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h -index a4661646adc9c..9fcf5ffc4f9ad 100644 ---- a/include/linux/sunrpc/clnt.h -+++ b/include/linux/sunrpc/clnt.h -@@ -159,6 +159,7 @@ struct rpc_add_xprt_test { - #define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT (1UL << 9) - #define RPC_CLNT_CREATE_SOFTERR (1UL << 10) - #define RPC_CLNT_CREATE_REUSEPORT (1UL << 11) -+#define RPC_CLNT_CREATE_CONNECTED (1UL << 12) + static int cgroup_attach_permissions(struct cgroup *src_cgrp, + struct cgroup *dst_cgrp, +- struct super_block *sb, bool threadgroup) ++ struct super_block *sb, bool threadgroup, ++ struct cgroup_namespace *ns) + { + int ret = 0; - struct rpc_clnt *rpc_create(struct rpc_create_args *args); - struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, -diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h -index 064c96157d1f0..045f34add206f 100644 ---- a/include/linux/sunrpc/svc.h -+++ b/include/linux/sunrpc/svc.h -@@ -384,6 +384,7 @@ struct svc_deferred_req { - size_t addrlen; - struct sockaddr_storage daddr; /* where reply must come from */ - size_t daddrlen; -+ void *xprt_ctxt; - struct cache_deferred_req handle; - size_t xprt_hlen; - int argslen; -@@ -532,8 +533,7 @@ int svc_encode_result_payload(struct svc_rqst *rqstp, - unsigned int offset, - unsigned int length); - unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, -- struct page **pages, -- struct kvec *first, size_t total); -+ struct xdr_buf *payload); - char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, - struct kvec *first, void *p, - size_t total); -@@ -566,16 +566,27 @@ static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space) - } +- ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, sb); ++ ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, sb, ns); + if (ret) + return ret; - /** -- * svcxdr_init_decode - Prepare an xdr_stream for svc Call decoding -+ * svcxdr_init_decode - Prepare an xdr_stream for Call decoding - * @rqstp: controlling server RPC transaction context - * -+ * This function currently assumes the RPC header in rq_arg has -+ * already been decoded. Upon return, xdr->p points to the -+ * location of the upper layer header. - */ - static inline void svcxdr_init_decode(struct svc_rqst *rqstp) +@@ -4867,16 +4968,18 @@ static int cgroup_attach_permissions(struct cgroup *src_cgrp, + static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, + bool threadgroup) { - struct xdr_stream *xdr = &rqstp->rq_arg_stream; -- struct kvec *argv = rqstp->rq_arg.head; -+ struct xdr_buf *buf = &rqstp->rq_arg; -+ struct kvec *argv = buf->head; ++ struct cgroup_file_ctx *ctx = of->priv; + struct cgroup *src_cgrp, *dst_cgrp; + struct task_struct *task; ++ const struct cred *saved_cred; + ssize_t ret; +- bool locked; ++ bool threadgroup_locked; -- xdr_init_decode(xdr, &rqstp->rq_arg, argv->iov_base, NULL); + dst_cgrp = cgroup_kn_lock_live(of->kn, false); + if (!dst_cgrp) + return -ENODEV; + +- task = cgroup_procs_write_start(buf, threadgroup, &locked); ++ task = cgroup_procs_write_start(buf, threadgroup, &threadgroup_locked); + ret = PTR_ERR_OR_ZERO(task); + if (ret) + goto out_unlock; +@@ -4886,16 +4989,23 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, + src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root); + spin_unlock_irq(&css_set_lock); + +- /* process and thread migrations follow same delegation rule */ + /* -+ * svc_getnl() and friends do not keep the xdr_buf's ::len -+ * field up to date. Refresh that field before initializing -+ * the argument decoding stream. ++ * Process and thread migrations follow same delegation rule. Check ++ * permissions using the credentials from file open to protect against ++ * inherited fd attacks. + */ -+ buf->len = buf->head->iov_len + buf->page_len + buf->tail->iov_len; ++ saved_cred = override_creds(of->file->f_cred); + ret = cgroup_attach_permissions(src_cgrp, dst_cgrp, +- of->file->f_path.dentry->d_sb, threadgroup); ++ of->file->f_path.dentry->d_sb, ++ threadgroup, ctx->ns); ++ revert_creds(saved_cred); + if (ret) + goto out_finish; + + ret = cgroup_attach_task(dst_cgrp, task, threadgroup); + + out_finish: +- cgroup_procs_write_finish(task, locked); ++ cgroup_procs_write_finish(task, threadgroup_locked); + out_unlock: + cgroup_kn_unlock(of->kn); + +@@ -5911,17 +6021,23 @@ struct cgroup *cgroup_get_from_id(u64 id) + struct kernfs_node *kn; + struct cgroup *cgrp = NULL; + +- mutex_lock(&cgroup_mutex); + kn = kernfs_find_and_get_node_by_id(cgrp_dfl_root.kf_root, id); + if (!kn) +- goto out_unlock; ++ goto out; + -+ xdr_init_decode(xdr, buf, argv->iov_base, NULL); - xdr_set_scratch_page(xdr, rqstp->rq_scratch_page); - } ++ if (kernfs_type(kn) != KERNFS_DIR) ++ goto put; -@@ -598,7 +609,7 @@ static inline void svcxdr_init_encode(struct svc_rqst *rqstp) - xdr->end = resv->iov_base + PAGE_SIZE - rqstp->rq_auth_slack; - buf->len = resv->iov_len; - xdr->page_ptr = buf->pages - 1; -- buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages); -+ buf->buflen = PAGE_SIZE * (rqstp->rq_page_end - buf->pages); - buf->buflen -= rqstp->rq_auth_slack; - xdr->rqst = NULL; +- cgrp = kn->priv; +- if (cgroup_is_dead(cgrp) || !cgroup_tryget(cgrp)) ++ rcu_read_lock(); ++ ++ cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv); ++ if (cgrp && !cgroup_tryget(cgrp)) + cgrp = NULL; ++ ++ rcu_read_unlock(); ++put: + kernfs_put(kn); +-out_unlock: +- mutex_unlock(&cgroup_mutex); ++out: + return cgrp; } -diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h -index b519609af1d02..3a2c714d6b629 100644 ---- a/include/linux/sunrpc/xdr.h -+++ b/include/linux/sunrpc/xdr.h -@@ -405,8 +405,8 @@ static inline int xdr_stream_encode_item_absent(struct xdr_stream *xdr) + EXPORT_SYMBOL_GPL(cgroup_get_from_id); +@@ -6104,7 +6220,8 @@ static int cgroup_css_set_fork(struct kernel_clone_args *kargs) + goto err; + + ret = cgroup_attach_permissions(cset->dfl_cgrp, dst_cgrp, sb, +- !(kargs->flags & CLONE_THREAD)); ++ !(kargs->flags & CLONE_THREAD), ++ current->nsproxy->cgroup_ns); + if (ret) + goto err; + +@@ -6474,30 +6591,38 @@ struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss) + * + * Find the cgroup at @path on the default hierarchy, increment its + * reference count and return it. Returns pointer to the found cgroup on +- * success, ERR_PTR(-ENOENT) if @path doesn't exist and ERR_PTR(-ENOTDIR) +- * if @path points to a non-directory. ++ * success, ERR_PTR(-ENOENT) if @path doesn't exist or if the cgroup has already ++ * been released and ERR_PTR(-ENOTDIR) if @path points to a non-directory. */ - static inline __be32 *xdr_encode_bool(__be32 *p, u32 n) + struct cgroup *cgroup_get_from_path(const char *path) { -- *p = n ? xdr_one : xdr_zero; -- return p++; -+ *p++ = n ? xdr_one : xdr_zero; -+ return p; - } - - /** -@@ -731,6 +731,8 @@ xdr_stream_decode_uint32_array(struct xdr_stream *xdr, + struct kernfs_node *kn; +- struct cgroup *cgrp; ++ struct cgroup *cgrp = ERR_PTR(-ENOENT); ++ struct cgroup *root_cgrp; - if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) - return -EBADMSG; -+ if (len > SIZE_MAX / sizeof(*p)) -+ return -EBADMSG; - p = xdr_inline_decode(xdr, len * sizeof(*p)); - if (unlikely(!p)) - return -EBADMSG; -diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h -index 8c2a712cb2420..689062afdd610 100644 ---- a/include/linux/sunrpc/xprtsock.h -+++ b/include/linux/sunrpc/xprtsock.h -@@ -89,5 +89,6 @@ struct sock_xprt { - #define XPRT_SOCK_WAKE_WRITE (5) - #define XPRT_SOCK_WAKE_PENDING (6) - #define XPRT_SOCK_WAKE_DISCONNECT (7) -+#define XPRT_SOCK_CONNECT_SENT (8) +- mutex_lock(&cgroup_mutex); ++ spin_lock_irq(&css_set_lock); ++ root_cgrp = current_cgns_cgroup_from_root(&cgrp_dfl_root); ++ kn = kernfs_walk_and_get(root_cgrp->kn, path); ++ spin_unlock_irq(&css_set_lock); ++ if (!kn) ++ goto out; - #endif /* _LINUX_SUNRPC_XPRTSOCK_H */ -diff --git a/include/linux/surface_aggregator/controller.h b/include/linux/surface_aggregator/controller.h -index 068e1982ad371..74bfdffaf7b0e 100644 ---- a/include/linux/surface_aggregator/controller.h -+++ b/include/linux/surface_aggregator/controller.h -@@ -792,8 +792,8 @@ enum ssam_event_mask { - #define SSAM_EVENT_REGISTRY_KIP \ - SSAM_EVENT_REGISTRY(SSAM_SSH_TC_KIP, 0x02, 0x27, 0x28) +- kn = kernfs_walk_and_get(cgrp_dfl_root.cgrp.kn, path); +- if (kn) { +- if (kernfs_type(kn) == KERNFS_DIR) { +- cgrp = kn->priv; +- cgroup_get_live(cgrp); +- } else { +- cgrp = ERR_PTR(-ENOTDIR); +- } +- kernfs_put(kn); +- } else { +- cgrp = ERR_PTR(-ENOENT); ++ if (kernfs_type(kn) != KERNFS_DIR) { ++ cgrp = ERR_PTR(-ENOTDIR); ++ goto out_kernfs; + } --#define SSAM_EVENT_REGISTRY_REG \ -- SSAM_EVENT_REGISTRY(SSAM_SSH_TC_REG, 0x02, 0x01, 0x02) -+#define SSAM_EVENT_REGISTRY_REG(tid)\ -+ SSAM_EVENT_REGISTRY(SSAM_SSH_TC_REG, tid, 0x01, 0x02) +- mutex_unlock(&cgroup_mutex); ++ rcu_read_lock(); ++ ++ cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv); ++ if (!cgrp || !cgroup_tryget(cgrp)) ++ cgrp = ERR_PTR(-ENOENT); ++ ++ rcu_read_unlock(); ++ ++out_kernfs: ++ kernfs_put(kn); ++out: + return cgrp; + } + EXPORT_SYMBOL_GPL(cgroup_get_from_path); +diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c +index 2a9695ccb65f5..428820bf141d1 100644 +--- a/kernel/cgroup/cpuset.c ++++ b/kernel/cgroup/cpuset.c +@@ -33,6 +33,7 @@ + #include <linux/interrupt.h> + #include <linux/kernel.h> + #include <linux/kmod.h> ++#include <linux/kthread.h> + #include <linux/list.h> + #include <linux/mempolicy.h> + #include <linux/mm.h> +@@ -1087,10 +1088,18 @@ static void update_tasks_cpumask(struct cpuset *cs) + { + struct css_task_iter it; + struct task_struct *task; ++ bool top_cs = cs == &top_cpuset; - /** - * enum ssam_event_notifier_flags - Flags for event notifiers. -diff --git a/include/linux/suspend.h b/include/linux/suspend.h -index 8af13ba60c7e4..4bcd65679cee0 100644 ---- a/include/linux/suspend.h -+++ b/include/linux/suspend.h -@@ -430,15 +430,7 @@ struct platform_hibernation_ops { + css_task_iter_start(&cs->css, 0, &it); +- while ((task = css_task_iter_next(&it))) ++ while ((task = css_task_iter_next(&it))) { ++ /* ++ * Percpu kthreads in top_cpuset are ignored ++ */ ++ if (top_cs && (task->flags & PF_KTHREAD) && ++ kthread_is_per_cpu(task)) ++ continue; + set_cpus_allowed_ptr(task, cs->effective_cpus); ++ } + css_task_iter_end(&it); + } - #ifdef CONFIG_HIBERNATION - /* kernel/power/snapshot.c */ --extern void __register_nosave_region(unsigned long b, unsigned long e, int km); --static inline void __init register_nosave_region(unsigned long b, unsigned long e) --{ -- __register_nosave_region(b, e, 0); --} --static inline void __init register_nosave_region_late(unsigned long b, unsigned long e) --{ -- __register_nosave_region(b, e, 1); --} -+extern void register_nosave_region(unsigned long b, unsigned long e); - extern int swsusp_page_is_forbidden(struct page *); - extern void swsusp_set_page_free(struct page *); - extern void swsusp_unset_page_free(struct page *); -@@ -457,7 +449,6 @@ int pfn_is_nosave(unsigned long pfn); - int hibernate_quiet_exec(int (*func)(void *data), void *data); - #else /* CONFIG_HIBERNATION */ - static inline void register_nosave_region(unsigned long b, unsigned long e) {} --static inline void register_nosave_region_late(unsigned long b, unsigned long e) {} - static inline int swsusp_page_is_forbidden(struct page *p) { return 0; } - static inline void swsusp_set_page_free(struct page *p) {} - static inline void swsusp_unset_page_free(struct page *p) {} -@@ -505,14 +496,14 @@ extern void ksys_sync_helper(void); +@@ -1512,10 +1521,15 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs, + struct cpuset *sibling; + struct cgroup_subsys_state *pos_css; - /* drivers/base/power/wakeup.c */ - extern bool events_check_enabled; --extern unsigned int pm_wakeup_irq; - extern suspend_state_t pm_suspend_target_state; ++ percpu_rwsem_assert_held(&cpuset_rwsem); ++ + /* + * Check all its siblings and call update_cpumasks_hier() + * if their use_parent_ecpus flag is set in order for them + * to use the right effective_cpus value. ++ * ++ * The update_cpumasks_hier() function may sleep. So we have to ++ * release the RCU read lock before calling it. + */ + rcu_read_lock(); + cpuset_for_each_child(sibling, pos_css, parent) { +@@ -1523,8 +1537,13 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs, + continue; + if (!sibling->use_parent_ecpus) + continue; ++ if (!css_tryget_online(&sibling->css)) ++ continue; - extern bool pm_wakeup_pending(void); - extern void pm_system_wakeup(void); - extern void pm_system_cancel_wakeup(void); --extern void pm_wakeup_clear(bool reset); -+extern void pm_wakeup_clear(unsigned int irq_number); - extern void pm_system_irq_wakeup(unsigned int irq_number); -+extern unsigned int pm_wakeup_irq(void); - extern bool pm_get_wakeup_count(unsigned int *count, bool block); - extern bool pm_save_wakeup_count(unsigned int count); - extern void pm_wakep_autosleep_enabled(bool set); -diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h -index b0cb2a9973f49..569272871375c 100644 ---- a/include/linux/swiotlb.h -+++ b/include/linux/swiotlb.h -@@ -45,7 +45,8 @@ extern void __init swiotlb_update_mem_attributes(void); ++ rcu_read_unlock(); + update_cpumasks_hier(sibling, tmp); ++ rcu_read_lock(); ++ css_put(&sibling->css); + } + rcu_read_unlock(); + } +@@ -1597,8 +1616,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, + * Make sure that subparts_cpus is a subset of cpus_allowed. + */ + if (cs->nr_subparts_cpus) { +- cpumask_andnot(cs->subparts_cpus, cs->subparts_cpus, +- cs->cpus_allowed); ++ cpumask_and(cs->subparts_cpus, cs->subparts_cpus, cs->cpus_allowed); + cs->nr_subparts_cpus = cpumask_weight(cs->subparts_cpus); + } + spin_unlock_irq(&callback_lock); +@@ -2043,12 +2061,7 @@ static int update_prstate(struct cpuset *cs, int new_prs) + update_flag(CS_CPU_EXCLUSIVE, cs, 0); + } - phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys, - size_t mapping_size, size_t alloc_size, -- enum dma_data_direction dir, unsigned long attrs); -+ unsigned int alloc_aligned_mask, enum dma_data_direction dir, -+ unsigned long attrs); +- /* +- * Update cpumask of parent's tasks except when it is the top +- * cpuset as some system daemons cannot be mapped to other CPUs. +- */ +- if (parent != &top_cpuset) +- update_tasks_cpumask(parent); ++ update_tasks_cpumask(parent); - extern void swiotlb_tbl_unmap_single(struct device *hwdev, - phys_addr_t tlb_addr, -diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h -index 1fa2b69c6fc3d..fa372b4c23132 100644 ---- a/include/linux/sysctl.h -+++ b/include/linux/sysctl.h -@@ -38,9 +38,16 @@ struct ctl_table_header; - struct ctl_dir; + if (parent->child_ecpus_count) + update_sibling_cpumasks(parent, cs, &tmpmask); +@@ -2190,7 +2203,7 @@ static int cpuset_can_attach(struct cgroup_taskset *tset) + goto out_unlock; - /* Keep the same order as in fs/proc/proc_sysctl.c */ --#define SYSCTL_ZERO ((void *)&sysctl_vals[0]) --#define SYSCTL_ONE ((void *)&sysctl_vals[1]) --#define SYSCTL_INT_MAX ((void *)&sysctl_vals[2]) -+#define SYSCTL_NEG_ONE ((void *)&sysctl_vals[0]) -+#define SYSCTL_ZERO ((void *)&sysctl_vals[1]) -+#define SYSCTL_ONE ((void *)&sysctl_vals[2]) -+#define SYSCTL_TWO ((void *)&sysctl_vals[3]) -+#define SYSCTL_FOUR ((void *)&sysctl_vals[4]) -+#define SYSCTL_ONE_HUNDRED ((void *)&sysctl_vals[5]) -+#define SYSCTL_TWO_HUNDRED ((void *)&sysctl_vals[6]) -+#define SYSCTL_ONE_THOUSAND ((void *)&sysctl_vals[7]) -+#define SYSCTL_THREE_THOUSAND ((void *)&sysctl_vals[8]) -+#define SYSCTL_INT_MAX ((void *)&sysctl_vals[9]) + cgroup_taskset_for_each(task, css, tset) { +- ret = task_can_attach(task, cs->cpus_allowed); ++ ret = task_can_attach(task, cs->effective_cpus); + if (ret) + goto out_unlock; + ret = security_task_setscheduler(task); +@@ -2240,6 +2253,7 @@ static void cpuset_attach(struct cgroup_taskset *tset) + cgroup_taskset_first(tset, &css); + cs = css_cs(css); - extern const int sysctl_vals[]; ++ lockdep_assert_cpus_held(); /* see cgroup_attach_lock() */ + percpu_down_write(&cpuset_rwsem); -diff --git a/include/linux/sysfb.h b/include/linux/sysfb.h -index b0dcfa26d07bd..8ba8b5be55675 100644 ---- a/include/linux/sysfb.h -+++ b/include/linux/sysfb.h -@@ -55,6 +55,18 @@ struct efifb_dmi_info { - int flags; - }; + guarantee_online_mems(cs, &cpuset_attach_nodemask_to); +@@ -3336,8 +3350,11 @@ static struct notifier_block cpuset_track_online_nodes_nb = { + */ + void __init cpuset_init_smp(void) + { +- cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); +- top_cpuset.mems_allowed = node_states[N_MEMORY]; ++ /* ++ * cpus_allowd/mems_allowed set to v2 values in the initial ++ * cpuset_bind() call will be reset to v1 values in another ++ * cpuset_bind() call when v1 cpuset is mounted. ++ */ + top_cpuset.old_mems_allowed = top_cpuset.mems_allowed; -+#ifdef CONFIG_SYSFB -+ -+void sysfb_disable(void); -+ -+#else /* CONFIG_SYSFB */ -+ -+static inline void sysfb_disable(void) -+{ -+} -+ -+#endif /* CONFIG_SYSFB */ -+ - #ifdef CONFIG_EFI + cpumask_copy(top_cpuset.effective_cpus, cpu_active_mask); +diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c +index b264ab5652ba9..1486768f23185 100644 +--- a/kernel/cgroup/rstat.c ++++ b/kernel/cgroup/rstat.c +@@ -433,8 +433,6 @@ static void root_cgroup_cputime(struct task_cputime *cputime) + cputime->sum_exec_runtime += user; + cputime->sum_exec_runtime += sys; + cputime->sum_exec_runtime += cpustat[CPUTIME_STEAL]; +- cputime->sum_exec_runtime += cpustat[CPUTIME_GUEST]; +- cputime->sum_exec_runtime += cpustat[CPUTIME_GUEST_NICE]; + } + } - extern struct efifb_dmi_info efifb_dmi_list[]; -@@ -72,8 +84,8 @@ static inline void sysfb_apply_efi_quirks(struct platform_device *pd) +diff --git a/kernel/cpu.c b/kernel/cpu.c +index 192e43a874076..393114c10c285 100644 +--- a/kernel/cpu.c ++++ b/kernel/cpu.c +@@ -31,8 +31,10 @@ + #include <linux/smpboot.h> + #include <linux/relay.h> + #include <linux/slab.h> ++#include <linux/scs.h> + #include <linux/percpu-rwsem.h> + #include <linux/cpuset.h> ++#include <linux/random.h> - bool sysfb_parse_mode(const struct screen_info *si, - struct simplefb_platform_data *mode); --int sysfb_create_simplefb(const struct screen_info *si, -- const struct simplefb_platform_data *mode); -+struct platform_device *sysfb_create_simplefb(const struct screen_info *si, -+ const struct simplefb_platform_data *mode); + #include <trace/events/power.h> + #define CREATE_TRACE_POINTS +@@ -69,7 +71,6 @@ struct cpuhp_cpu_state { + bool rollback; + bool single; + bool bringup; +- int cpu; + struct hlist_node *node; + struct hlist_node *last; + enum cpuhp_state cb_state; +@@ -473,7 +474,7 @@ static inline bool cpu_smt_allowed(unsigned int cpu) { return true; } + #endif - #else /* CONFIG_SYSFB_SIMPLE */ + static inline enum cpuhp_state +-cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target) ++cpuhp_set_state(int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target) + { + enum cpuhp_state prev_state = st->state; + bool bringup = st->state < target; +@@ -484,14 +485,15 @@ cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target) + st->target = target; + st->single = false; + st->bringup = bringup; +- if (cpu_dying(st->cpu) != !bringup) +- set_cpu_dying(st->cpu, !bringup); ++ if (cpu_dying(cpu) != !bringup) ++ set_cpu_dying(cpu, !bringup); -@@ -83,10 +95,10 @@ static inline bool sysfb_parse_mode(const struct screen_info *si, - return false; + return prev_state; } --static inline int sysfb_create_simplefb(const struct screen_info *si, -- const struct simplefb_platform_data *mode) -+static inline struct platform_device *sysfb_create_simplefb(const struct screen_info *si, -+ const struct simplefb_platform_data *mode) + static inline void +-cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state) ++cpuhp_reset_state(int cpu, struct cpuhp_cpu_state *st, ++ enum cpuhp_state prev_state) { -- return -EINVAL; -+ return ERR_PTR(-EINVAL); + bool bringup = !st->bringup; + +@@ -518,8 +520,8 @@ cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state) + } + + st->bringup = bringup; +- if (cpu_dying(st->cpu) != !bringup) +- set_cpu_dying(st->cpu, !bringup); ++ if (cpu_dying(cpu) != !bringup) ++ set_cpu_dying(cpu, !bringup); } - #endif /* CONFIG_SYSFB_SIMPLE */ -diff --git a/include/linux/tcp.h b/include/linux/tcp.h -index 48d8a363319e5..a7ebadf83c681 100644 ---- a/include/linux/tcp.h -+++ b/include/linux/tcp.h -@@ -265,7 +265,7 @@ struct tcp_sock { - u32 packets_out; /* Packets which are "in flight" */ - u32 retrans_out; /* Retransmitted packets out */ - u32 max_packets_out; /* max packets_out in last window */ -- u32 max_packets_seq; /* right edge of max_packets_out flight */ -+ u32 cwnd_usage_seq; /* right edge of cwnd usage tracking flight */ + /* Regular hotplug invocation of the AP hotplug thread */ +@@ -539,15 +541,16 @@ static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st) + wait_for_ap_thread(st, st->bringup); + } - u16 urg_data; /* Saved octet of OOB data and control flags */ - u8 ecn_flags; /* ECN status bits. */ -diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h -index 3ebfea0781f10..38b701b7af4cf 100644 ---- a/include/linux/tee_drv.h -+++ b/include/linux/tee_drv.h -@@ -195,7 +195,7 @@ int tee_session_calc_client_uuid(uuid_t *uuid, u32 connection_method, - * @offset: offset of buffer in user space - * @pages: locked pages from userspace - * @num_pages: number of locked pages -- * @dmabuf: dmabuf used to for exporting to user space -+ * @refcount: reference counter - * @flags: defined by TEE_SHM_* in tee_drv.h - * @id: unique id of a shared memory object on this device - * -@@ -210,7 +210,7 @@ struct tee_shm { - unsigned int offset; - struct page **pages; - size_t num_pages; -- struct dma_buf *dmabuf; -+ refcount_t refcount; - u32 flags; - int id; - }; -@@ -582,4 +582,18 @@ struct tee_client_driver { - #define to_tee_client_driver(d) \ - container_of(d, struct tee_client_driver, driver) +-static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target) ++static int cpuhp_kick_ap(int cpu, struct cpuhp_cpu_state *st, ++ enum cpuhp_state target) + { + enum cpuhp_state prev_state; + int ret; -+/** -+ * teedev_open() - Open a struct tee_device -+ * @teedev: Device to open -+ * -+ * @return a pointer to struct tee_context on success or an ERR_PTR on failure. -+ */ -+struct tee_context *teedev_open(struct tee_device *teedev); -+ -+/** -+ * teedev_close_context() - closes a struct tee_context -+ * @ctx: The struct tee_context to close -+ */ -+void teedev_close_context(struct tee_context *ctx); -+ - #endif /*__TEE_DRV_H*/ -diff --git a/include/linux/timex.h b/include/linux/timex.h -index 059b18eb1f1fa..3871b06bd302c 100644 ---- a/include/linux/timex.h -+++ b/include/linux/timex.h -@@ -62,6 +62,8 @@ - #include <linux/types.h> - #include <linux/param.h> +- prev_state = cpuhp_set_state(st, target); ++ prev_state = cpuhp_set_state(cpu, st, target); + __cpuhp_kick_ap(st); + if ((ret = st->result)) { +- cpuhp_reset_state(st, prev_state); ++ cpuhp_reset_state(cpu, st, prev_state); + __cpuhp_kick_ap(st); + } -+unsigned long random_get_entropy_fallback(void); -+ - #include <asm/timex.h> +@@ -579,7 +582,7 @@ static int bringup_wait_for_ap(unsigned int cpu) + if (st->target <= CPUHP_AP_ONLINE_IDLE) + return 0; - #ifndef random_get_entropy -@@ -74,8 +76,14 @@ - * - * By default we use get_cycles() for this purpose, but individual - * architectures may override this in their asm/timex.h header file. -+ * If a given arch does not have get_cycles(), then we fallback to -+ * using random_get_entropy_fallback(). - */ --#define random_get_entropy() get_cycles() -+#ifdef get_cycles -+#define random_get_entropy() ((unsigned long)get_cycles()) -+#else -+#define random_get_entropy() random_get_entropy_fallback() -+#endif - #endif +- return cpuhp_kick_ap(st, st->target); ++ return cpuhp_kick_ap(cpu, st, st->target); + } - /* -diff --git a/include/linux/torture.h b/include/linux/torture.h -index 0910c5803f35a..24f58e50a94b8 100644 ---- a/include/linux/torture.h -+++ b/include/linux/torture.h -@@ -47,6 +47,14 @@ do { \ - } while (0) - void verbose_torout_sleep(void); + static int bringup_cpu(unsigned int cpu) +@@ -587,6 +590,12 @@ static int bringup_cpu(unsigned int cpu) + struct task_struct *idle = idle_thread_get(cpu); + int ret; -+#define torture_init_error(firsterr) \ -+({ \ -+ int ___firsterr = (firsterr); \ -+ \ -+ WARN_ONCE(!IS_MODULE(CONFIG_RCU_TORTURE_TEST) && ___firsterr < 0, "Torture-test initialization failed with error code %d\n", ___firsterr); \ -+ ___firsterr < 0; \ -+}) ++ /* ++ * Reset stale stack state from the last time this CPU was online. ++ */ ++ scs_task_reset(idle); ++ kasan_unpoison_task_stack(idle); + - /* Definitions for online/offline exerciser. */ - #ifdef CONFIG_HOTPLUG_CPU - int torture_num_online_cpus(void); -diff --git a/include/linux/tpm.h b/include/linux/tpm.h -index aa11fe323c56b..12d827734686d 100644 ---- a/include/linux/tpm.h -+++ b/include/linux/tpm.h -@@ -269,6 +269,7 @@ enum tpm2_cc_attrs { - #define TPM_VID_INTEL 0x8086 - #define TPM_VID_WINBOND 0x1050 - #define TPM_VID_STM 0x104A -+#define TPM_VID_ATML 0x1114 - - enum tpm_chip_flags { - TPM_CHIP_FLAG_TPM2 = BIT(1), -diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h -index 739ba9a03ec16..20c0ff54b7a0d 100644 ---- a/include/linux/tpm_eventlog.h -+++ b/include/linux/tpm_eventlog.h -@@ -157,7 +157,7 @@ struct tcg_algorithm_info { - * Return: size of the event on success, 0 on failure - */ + /* + * Some architectures have to walk the irq descriptors to + * setup the vector space for the cpu which comes online. +@@ -653,21 +662,51 @@ static bool cpuhp_next_state(bool bringup, + return true; + } --static inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event, -+static __always_inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event, - struct tcg_pcr_event *event_header, - bool do_mapping) +-static int cpuhp_invoke_callback_range(bool bringup, +- unsigned int cpu, +- struct cpuhp_cpu_state *st, +- enum cpuhp_state target) ++static int __cpuhp_invoke_callback_range(bool bringup, ++ unsigned int cpu, ++ struct cpuhp_cpu_state *st, ++ enum cpuhp_state target, ++ bool nofail) { -diff --git a/include/linux/trace.h b/include/linux/trace.h -index bf169612ffe12..b5e16e438448f 100644 ---- a/include/linux/trace.h -+++ b/include/linux/trace.h -@@ -2,8 +2,6 @@ - #ifndef _LINUX_TRACE_H - #define _LINUX_TRACE_H - --#ifdef CONFIG_TRACING -- - #define TRACE_EXPORT_FUNCTION BIT(0) - #define TRACE_EXPORT_EVENT BIT(1) - #define TRACE_EXPORT_MARKER BIT(2) -@@ -28,6 +26,8 @@ struct trace_export { - int flags; - }; + enum cpuhp_state state; +- int err = 0; ++ int ret = 0; -+#ifdef CONFIG_TRACING + while (cpuhp_next_state(bringup, &state, st, target)) { ++ int err; + - int register_ftrace_export(struct trace_export *export); - int unregister_ftrace_export(struct trace_export *export); - -@@ -48,6 +48,38 @@ void osnoise_arch_unregister(void); - void osnoise_trace_irq_entry(int id); - void osnoise_trace_irq_exit(int id, const char *desc); + err = cpuhp_invoke_callback(cpu, state, bringup, NULL, NULL); +- if (err) ++ if (!err) ++ continue; ++ ++ if (nofail) { ++ pr_warn("CPU %u %s state %s (%d) failed (%d)\n", ++ cpu, bringup ? "UP" : "DOWN", ++ cpuhp_get_step(st->state)->name, ++ st->state, err); ++ ret = -1; ++ } else { ++ ret = err; + break; ++ } + } -+#else /* CONFIG_TRACING */ -+static inline int register_ftrace_export(struct trace_export *export) -+{ -+ return -EINVAL; -+} -+static inline int unregister_ftrace_export(struct trace_export *export) -+{ -+ return 0; -+} -+static inline void trace_printk_init_buffers(void) -+{ -+} -+static inline int trace_array_printk(struct trace_array *tr, unsigned long ip, -+ const char *fmt, ...) -+{ -+ return 0; -+} -+static inline int trace_array_init_printk(struct trace_array *tr) -+{ -+ return -EINVAL; -+} -+static inline void trace_array_put(struct trace_array *tr) -+{ +- return err; ++ return ret; +} -+static inline struct trace_array *trace_array_get_by_name(const char *name) ++ ++static inline int cpuhp_invoke_callback_range(bool bringup, ++ unsigned int cpu, ++ struct cpuhp_cpu_state *st, ++ enum cpuhp_state target) +{ -+ return NULL; ++ return __cpuhp_invoke_callback_range(bringup, cpu, st, target, false); +} -+static inline int trace_array_destroy(struct trace_array *tr) ++ ++static inline void cpuhp_invoke_callback_range_nofail(bool bringup, ++ unsigned int cpu, ++ struct cpuhp_cpu_state *st, ++ enum cpuhp_state target) +{ -+ return 0; -+} - #endif /* CONFIG_TRACING */ ++ __cpuhp_invoke_callback_range(bringup, cpu, st, target, true); + } - #endif /* _LINUX_TRACE_H */ -diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h -index 3e475eeb5a995..ff137179e0c30 100644 ---- a/include/linux/trace_events.h -+++ b/include/linux/trace_events.h -@@ -91,6 +91,7 @@ struct trace_iterator { - unsigned int temp_size; - char *fmt; /* modified format holder */ - unsigned int fmt_size; -+ long wait_index; + static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st) +@@ -696,7 +735,7 @@ static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, + ret, cpu, cpuhp_get_step(st->state)->name, + st->state); - /* trace_seq for __print_flags() and __print_symbolic() etc. */ - struct trace_seq tmp_seq; -@@ -673,7 +674,7 @@ struct trace_event_file { +- cpuhp_reset_state(st, prev_state); ++ cpuhp_reset_state(cpu, st, prev_state); + if (can_rollback_cpu(st)) + WARN_ON(cpuhp_invoke_callback_range(false, cpu, st, + prev_state)); +@@ -713,7 +752,6 @@ static void cpuhp_create(unsigned int cpu) - #define PERF_MAX_TRACE_SIZE 2048 + init_completion(&st->done_up); + init_completion(&st->done_down); +- st->cpu = cpu; + } --#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ -+#define MAX_FILTER_STR_VAL 256U /* Should handle KSYM_SYMBOL_LEN */ + static int cpuhp_should_run(unsigned int cpu) +@@ -867,7 +905,7 @@ static int cpuhp_kick_ap_work(unsigned int cpu) + cpuhp_lock_release(true); - enum event_trigger_type { - ETT_NONE = (0), -diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h -index 32284992b31a0..1fb727b7b969a 100644 ---- a/include/linux/tty_flip.h -+++ b/include/linux/tty_flip.h -@@ -17,7 +17,6 @@ extern int tty_insert_flip_string_fixed_flag(struct tty_port *port, - extern int tty_prepare_flip_string(struct tty_port *port, - unsigned char **chars, size_t size); - extern void tty_flip_buffer_push(struct tty_port *port); --void tty_schedule_flip(struct tty_port *port); - int __tty_insert_flip_char(struct tty_port *port, unsigned char ch, char flag); + trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work); +- ret = cpuhp_kick_ap(st, st->target); ++ ret = cpuhp_kick_ap(cpu, st, st->target); + trace_cpuhp_exit(cpu, st->state, prev_state, ret); - static inline int tty_insert_flip_char(struct tty_port *port, -diff --git a/include/linux/uacce.h b/include/linux/uacce.h -index 48e319f402751..9ce88c28b0a87 100644 ---- a/include/linux/uacce.h -+++ b/include/linux/uacce.h -@@ -70,6 +70,7 @@ enum uacce_q_state { - * @wait: wait queue head - * @list: index into uacce queues list - * @qfrs: pointer of qfr regions -+ * @mutex: protects queue state - * @state: queue state machine - * @pasid: pasid associated to the mm - * @handle: iommu_sva handle returned by iommu_sva_bind_device() -@@ -80,6 +81,7 @@ struct uacce_queue { - wait_queue_head_t wait; - struct list_head list; - struct uacce_qfile_region *qfrs[UACCE_MAX_REGION]; -+ struct mutex mutex; - enum uacce_q_state state; - u32 pasid; - struct iommu_sva *handle; -@@ -97,9 +99,9 @@ struct uacce_queue { - * @dev_id: id of the uacce device - * @cdev: cdev of the uacce - * @dev: dev of the uacce -+ * @mutex: protects uacce operation - * @priv: private pointer of the uacce - * @queues: list of queues -- * @queues_lock: lock for queues list - * @inode: core vfs - */ - struct uacce_device { -@@ -113,9 +115,9 @@ struct uacce_device { - u32 dev_id; - struct cdev *cdev; - struct device dev; -+ struct mutex mutex; - void *priv; - struct list_head queues; -- struct mutex queues_lock; - struct inode *inode; - }; + return ret; +@@ -986,7 +1024,6 @@ static int take_cpu_down(void *_param) + struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); + enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE); + int err, cpu = smp_processor_id(); +- int ret; -diff --git a/include/linux/udp.h b/include/linux/udp.h -index ae66dadd85434..0727276e7538c 100644 ---- a/include/linux/udp.h -+++ b/include/linux/udp.h -@@ -75,6 +75,7 @@ struct udp_sock { - * For encapsulation sockets. + /* Ensure this CPU doesn't handle any more interrupts. */ + err = __cpu_disable(); +@@ -999,13 +1036,10 @@ static int take_cpu_down(void *_param) */ - int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); -+ void (*encap_err_rcv)(struct sock *sk, struct sk_buff *skb, unsigned int udp_offset); - int (*encap_err_lookup)(struct sock *sk, struct sk_buff *skb); - void (*encap_destroy)(struct sock *sk); - -diff --git a/include/linux/uio.h b/include/linux/uio.h -index 207101a9c5c32..6350354f97e90 100644 ---- a/include/linux/uio.h -+++ b/include/linux/uio.h -@@ -35,6 +35,7 @@ struct iov_iter_state { - - struct iov_iter { - u8 iter_type; -+ bool nofault; - bool data_source; - size_t iov_offset; - size_t count; -@@ -133,7 +134,8 @@ size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, - size_t bytes, struct iov_iter *i); - void iov_iter_advance(struct iov_iter *i, size_t bytes); - void iov_iter_revert(struct iov_iter *i, size_t bytes); --int iov_iter_fault_in_readable(const struct iov_iter *i, size_t bytes); -+size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t bytes); -+size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t bytes); - size_t iov_iter_single_seg_count(const struct iov_iter *i); - size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, - struct iov_iter *i); -diff --git a/include/linux/usb.h b/include/linux/usb.h -index 7ccaa76a9a968..da1329b85329b 100644 ---- a/include/linux/usb.h -+++ b/include/linux/usb.h -@@ -575,6 +575,7 @@ struct usb3_lpm_parameters { - * @devaddr: device address, XHCI: assigned by HW, others: same as devnum - * @can_submit: URBs may be submitted - * @persist_enabled: USB_PERSIST enabled for this device -+ * @reset_in_progress: the device is being reset - * @have_langid: whether string_langid is valid - * @authorized: policy has said we can use it; - * (user space) policy determines if we authorize this device to be -@@ -661,6 +662,7 @@ struct usb_device { - - unsigned can_submit:1; - unsigned persist_enabled:1; -+ unsigned reset_in_progress:1; - unsigned have_langid:1; - unsigned authorized:1; - unsigned authenticated:1; -diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h -index 2c1fc9212cf28..98d1921f02b1e 100644 ---- a/include/linux/usb/hcd.h -+++ b/include/linux/usb/hcd.h -@@ -66,6 +66,7 @@ - - struct giveback_urb_bh { - bool running; -+ bool high_prio; - spinlock_t lock; - struct list_head head; - struct tasklet_struct bh; -diff --git a/include/linux/usb/role.h b/include/linux/usb/role.h -index 031f148ab3734..b5deafd91f67b 100644 ---- a/include/linux/usb/role.h -+++ b/include/linux/usb/role.h -@@ -91,6 +91,12 @@ fwnode_usb_role_switch_get(struct fwnode_handle *node) - - static inline void usb_role_switch_put(struct usb_role_switch *sw) { } + WARN_ON(st->state != (CPUHP_TEARDOWN_CPU - 1)); -+static inline struct usb_role_switch * -+usb_role_switch_find_by_fwnode(const struct fwnode_handle *fwnode) -+{ -+ return NULL; -+} -+ - static inline struct usb_role_switch * - usb_role_switch_register(struct device *parent, - const struct usb_role_switch_desc *desc) -diff --git a/include/linux/usb/typec_dp.h b/include/linux/usb/typec_dp.h -index cfb916cccd316..8d09c2f0a9b80 100644 ---- a/include/linux/usb/typec_dp.h -+++ b/include/linux/usb/typec_dp.h -@@ -73,6 +73,11 @@ enum { - #define DP_CAP_USB BIT(7) - #define DP_CAP_DFP_D_PIN_ASSIGN(_cap_) (((_cap_) & GENMASK(15, 8)) >> 8) - #define DP_CAP_UFP_D_PIN_ASSIGN(_cap_) (((_cap_) & GENMASK(23, 16)) >> 16) -+/* Get pin assignment taking plug & receptacle into consideration */ -+#define DP_CAP_PIN_ASSIGN_UFP_D(_cap_) ((_cap_ & DP_CAP_RECEPTACLE) ? \ -+ DP_CAP_UFP_D_PIN_ASSIGN(_cap_) : DP_CAP_DFP_D_PIN_ASSIGN(_cap_)) -+#define DP_CAP_PIN_ASSIGN_DFP_D(_cap_) ((_cap_ & DP_CAP_RECEPTACLE) ? \ -+ DP_CAP_DFP_D_PIN_ASSIGN(_cap_) : DP_CAP_UFP_D_PIN_ASSIGN(_cap_)) +- /* Invoke the former CPU_DYING callbacks */ +- ret = cpuhp_invoke_callback_range(false, cpu, st, target); +- + /* +- * DYING must not fail! ++ * Invoke the former CPU_DYING callbacks. DYING must not fail! + */ +- WARN_ON_ONCE(ret); ++ cpuhp_invoke_callback_range_nofail(false, cpu, st, target); - /* DisplayPort Status Update VDO bits */ - #define DP_STATUS_CONNECTION(_status_) ((_status_) & 3) -diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h -index ef9a44b6cf5d5..6e5db4edc3359 100644 ---- a/include/linux/vfio_pci_core.h -+++ b/include/linux/vfio_pci_core.h -@@ -133,6 +133,8 @@ struct vfio_pci_core_device { - struct mutex ioeventfds_lock; - struct list_head ioeventfds_list; - struct vfio_pci_vf_token *vf_token; -+ struct list_head sriov_pfs_item; -+ struct vfio_pci_core_device *sriov_pf_core_dev; - struct notifier_block nb; - struct mutex vma_lock; - struct list_head vma_list; -@@ -159,8 +161,17 @@ extern ssize_t vfio_pci_config_rw(struct vfio_pci_core_device *vdev, - extern ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, - size_t count, loff_t *ppos, bool iswrite); + /* Give up timekeeping duties */ + tick_handover_do_timer(); +@@ -1099,7 +1133,7 @@ static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, + ret, cpu, cpuhp_get_step(st->state)->name, + st->state); -+#ifdef CONFIG_VFIO_PCI_VGA - extern ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf, - size_t count, loff_t *ppos, bool iswrite); -+#else -+static inline ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, -+ char __user *buf, size_t count, -+ loff_t *ppos, bool iswrite) -+{ -+ return -EINVAL; -+} -+#endif +- cpuhp_reset_state(st, prev_state); ++ cpuhp_reset_state(cpu, st, prev_state); - extern long vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset, - uint64_t data, int count, int fd); -diff --git a/include/linux/virtio.h b/include/linux/virtio.h -index 41edbc01ffa40..1af8d65d4c8f7 100644 ---- a/include/linux/virtio.h -+++ b/include/linux/virtio.h -@@ -133,7 +133,6 @@ bool is_virtio_device(struct device *dev); - void virtio_break_device(struct virtio_device *dev); + if (st->state < prev_state) + WARN_ON(cpuhp_invoke_callback_range(true, cpu, st, +@@ -1126,7 +1160,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, - void virtio_config_changed(struct virtio_device *dev); --int virtio_finalize_features(struct virtio_device *dev); - #ifdef CONFIG_PM_SLEEP - int virtio_device_freeze(struct virtio_device *dev); - int virtio_device_restore(struct virtio_device *dev); -diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h -index 8519b3ae5d52e..b341dd62aa4da 100644 ---- a/include/linux/virtio_config.h -+++ b/include/linux/virtio_config.h -@@ -62,8 +62,9 @@ struct virtio_shm_region { - * Returns the first 64 feature bits (all we currently need). - * @finalize_features: confirm what device features we'll be using. - * vdev: the virtio_device -- * This gives the final feature bits for the device: it can change -+ * This sends the driver feature bits to the device: it can change - * the dev->feature bits if it wants. -+ * Note: despite the name this can be called any number of times. - * Returns 0 on success or error status - * @bus_name: return the bus name associated with the device (optional) - * vdev: the virtio_device -diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h -index b465f8f3e554f..a960de68ac69e 100644 ---- a/include/linux/virtio_net.h -+++ b/include/linux/virtio_net.h -@@ -7,9 +7,27 @@ - #include <uapi/linux/udp.h> - #include <uapi/linux/virtio_net.h> + cpuhp_tasks_frozen = tasks_frozen; -+static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type) -+{ -+ switch (gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { -+ case VIRTIO_NET_HDR_GSO_TCPV4: -+ return protocol == cpu_to_be16(ETH_P_IP); -+ case VIRTIO_NET_HDR_GSO_TCPV6: -+ return protocol == cpu_to_be16(ETH_P_IPV6); -+ case VIRTIO_NET_HDR_GSO_UDP: -+ return protocol == cpu_to_be16(ETH_P_IP) || -+ protocol == cpu_to_be16(ETH_P_IPV6); -+ default: -+ return false; -+ } -+} -+ - static inline int virtio_net_hdr_set_proto(struct sk_buff *skb, - const struct virtio_net_hdr *hdr) +- prev_state = cpuhp_set_state(st, target); ++ prev_state = cpuhp_set_state(cpu, st, target); + /* + * If the current CPU state is in the range of the AP hotplug thread, + * then we need to kick the thread. +@@ -1157,7 +1191,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, + ret = cpuhp_down_callbacks(cpu, st, target); + if (ret && st->state < prev_state) { + if (st->state == CPUHP_TEARDOWN_CPU) { +- cpuhp_reset_state(st, prev_state); ++ cpuhp_reset_state(cpu, st, prev_state); + __cpuhp_kick_ap(st); + } else { + WARN(1, "DEAD callback error for CPU%d", cpu); +@@ -1277,16 +1311,14 @@ void notify_cpu_starting(unsigned int cpu) { -+ if (skb->protocol) -+ return 0; -+ - switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { - case VIRTIO_NET_HDR_GSO_TCPV4: - case VIRTIO_NET_HDR_GSO_UDP: -@@ -88,9 +106,12 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, - if (!skb->protocol) { - __be16 protocol = dev_parse_header_protocol(skb); + struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); + enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE); +- int ret; -- virtio_net_hdr_set_proto(skb, hdr); -- if (protocol && protocol != skb->protocol) -+ if (!protocol) -+ virtio_net_hdr_set_proto(skb, hdr); -+ else if (!virtio_net_hdr_match_proto(protocol, hdr->gso_type)) - return -EINVAL; -+ else -+ skb->protocol = protocol; - } - retry: - if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, -@@ -120,10 +141,15 @@ retry: + rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */ + cpumask_set_cpu(cpu, &cpus_booted_once_mask); +- ret = cpuhp_invoke_callback_range(true, cpu, st, target); - if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { - u16 gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size); -+ unsigned int nh_off = p_off; - struct skb_shared_info *shinfo = skb_shinfo(skb); + /* + * STARTING must not fail! + */ +- WARN_ON_ONCE(ret); ++ cpuhp_invoke_callback_range_nofail(true, cpu, st, target); + } -+ /* UFO may not include transport header in gso_size. */ -+ if (gso_type & SKB_GSO_UDP) -+ nh_off -= thlen; -+ - /* Too small packets are not really GSO ones. */ -- if (skb->len - p_off > gso_size) { -+ if (skb->len - nh_off > gso_size) { - shinfo->gso_size = gso_size; - shinfo->gso_type = gso_type; + /* +@@ -1344,7 +1376,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target) -diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h -index 671d402c3778f..5535be1012a28 100644 ---- a/include/linux/vmalloc.h -+++ b/include/linux/vmalloc.h -@@ -28,6 +28,13 @@ struct notifier_block; /* in notifier.h */ - #define VM_MAP_PUT_PAGES 0x00000200 /* put pages and free array in vfree */ - #define VM_NO_HUGE_VMAP 0x00000400 /* force PAGE_SIZE pte mapping */ + cpuhp_tasks_frozen = tasks_frozen; -+#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ -+ !defined(CONFIG_KASAN_VMALLOC) -+#define VM_DEFER_KMEMLEAK 0x00000800 /* defer kmemleak object creation */ -+#else -+#define VM_DEFER_KMEMLEAK 0 -+#endif -+ - /* - * VM_KASAN is used slightly differently depending on CONFIG_KASAN_VMALLOC. - * -@@ -152,6 +159,11 @@ void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, - int node, const void *caller); - void *vmalloc_no_huge(unsigned long size); +- cpuhp_set_state(st, target); ++ cpuhp_set_state(cpu, st, target); + /* + * If the current CPU state is in the range of the AP hotplug thread, + * then we need to kick the thread once more. +@@ -1652,6 +1684,11 @@ static struct cpuhp_step cpuhp_hp_states[] = { + .startup.single = perf_event_init_cpu, + .teardown.single = perf_event_exit_cpu, + }, ++ [CPUHP_RANDOM_PREPARE] = { ++ .name = "random:prepare", ++ .startup.single = random_prepare_cpu, ++ .teardown.single = NULL, ++ }, + [CPUHP_WORKQUEUE_PREP] = { + .name = "workqueue:prepare", + .startup.single = workqueue_prepare_cpu, +@@ -1775,6 +1812,11 @@ static struct cpuhp_step cpuhp_hp_states[] = { + .startup.single = workqueue_online_cpu, + .teardown.single = workqueue_offline_cpu, + }, ++ [CPUHP_AP_RANDOM_ONLINE] = { ++ .name = "random:online", ++ .startup.single = random_online_cpu, ++ .teardown.single = NULL, ++ }, + [CPUHP_AP_RCUTREE_ONLINE] = { + .name = "RCU/tree:online", + .startup.single = rcutree_online_cpu, +@@ -2297,8 +2339,10 @@ static ssize_t target_store(struct device *dev, struct device_attribute *attr, -+extern void *__vmalloc_array(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2); -+extern void *vmalloc_array(size_t n, size_t size) __alloc_size(1, 2); -+extern void *__vcalloc(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2); -+extern void *vcalloc(size_t n, size_t size) __alloc_size(1, 2); -+ - extern void vfree(const void *addr); - extern void vfree_atomic(const void *addr); + if (st->state < target) + ret = cpu_up(dev->id, target); +- else ++ else if (st->state > target) + ret = cpu_down(dev->id, target); ++ else if (WARN_ON(st->target != target)) ++ st->target = target; + out: + unlock_device_hotplug(); + return ret ? ret : count; +diff --git a/kernel/crash_core.c b/kernel/crash_core.c +index eb53f5ec62c90..256cf6db573cd 100644 +--- a/kernel/crash_core.c ++++ b/kernel/crash_core.c +@@ -6,6 +6,7 @@ -diff --git a/include/linux/wait.h b/include/linux/wait.h -index 93dab0e9580f8..21044562aab74 100644 ---- a/include/linux/wait.h -+++ b/include/linux/wait.h -@@ -217,6 +217,7 @@ void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void - void __wake_up_locked_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); - void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr); - void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode); -+void __wake_up_pollfree(struct wait_queue_head *wq_head); + #include <linux/buildid.h> + #include <linux/crash_core.h> ++#include <linux/init.h> + #include <linux/utsname.h> + #include <linux/vmalloc.h> - #define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL) - #define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL) -@@ -245,6 +246,31 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode); - #define wake_up_interruptible_sync_poll_locked(x, m) \ - __wake_up_locked_sync_key((x), TASK_INTERRUPTIBLE, poll_to_key(m)) +@@ -295,6 +296,16 @@ int __init parse_crashkernel_low(char *cmdline, + "crashkernel=", suffix_tbl[SUFFIX_LOW]); + } -+/** -+ * wake_up_pollfree - signal that a polled waitqueue is going away -+ * @wq_head: the wait queue head -+ * -+ * In the very rare cases where a ->poll() implementation uses a waitqueue whose -+ * lifetime is tied to a task rather than to the 'struct file' being polled, -+ * this function must be called before the waitqueue is freed so that -+ * non-blocking polls (e.g. epoll) are notified that the queue is going away. -+ * -+ * The caller must also RCU-delay the freeing of the wait_queue_head, e.g. via -+ * an explicit synchronize_rcu() or call_rcu(), or via SLAB_TYPESAFE_BY_RCU. ++/* ++ * Add a dummy early_param handler to mark crashkernel= as a known command line ++ * parameter and suppress incorrect warnings in init/main.c. + */ -+static inline void wake_up_pollfree(struct wait_queue_head *wq_head) ++static int __init parse_crashkernel_dummy(char *arg) +{ -+ /* -+ * For performance reasons, we don't always take the queue lock here. -+ * Therefore, we might race with someone removing the last entry from -+ * the queue, and proceed while they still hold the queue lock. -+ * However, rcu_read_lock() is required to be held in such cases, so we -+ * can safely proceed with an RCU-delayed free. -+ */ -+ if (waitqueue_active(wq_head)) -+ __wake_up_pollfree(wq_head); ++ return 0; +} ++early_param("crashkernel", parse_crashkernel_dummy); + - #define ___wait_cond_timeout(condition) \ - ({ \ - bool __cond = (condition); \ -@@ -518,10 +544,11 @@ do { \ - \ - hrtimer_init_sleeper_on_stack(&__t, CLOCK_MONOTONIC, \ - HRTIMER_MODE_REL); \ -- if ((timeout) != KTIME_MAX) \ -- hrtimer_start_range_ns(&__t.timer, timeout, \ -- current->timer_slack_ns, \ -- HRTIMER_MODE_REL); \ -+ if ((timeout) != KTIME_MAX) { \ -+ hrtimer_set_expires_range_ns(&__t.timer, timeout, \ -+ current->timer_slack_ns); \ -+ hrtimer_sleeper_start_expires(&__t, HRTIMER_MODE_REL); \ -+ } \ - \ - __ret = ___wait_event(wq_head, condition, state, 0, 0, \ - if (!__t.task) { \ -diff --git a/include/linux/watch_queue.h b/include/linux/watch_queue.h -index c994d1b2cdbaa..3b9a40ae8bdba 100644 ---- a/include/linux/watch_queue.h -+++ b/include/linux/watch_queue.h -@@ -28,7 +28,8 @@ struct watch_type_filter { - struct watch_filter { - union { - struct rcu_head rcu; -- unsigned long type_filter[2]; /* Bitmask of accepted types */ -+ /* Bitmask of accepted types */ -+ DECLARE_BITMAP(type_filter, WATCH_TYPE__NR); - }; - u32 nr_filters; /* Number of filters */ - struct watch_type_filter filters[]; -diff --git a/include/media/cec.h b/include/media/cec.h -index 208c9613c07eb..77346f757036d 100644 ---- a/include/media/cec.h -+++ b/include/media/cec.h -@@ -26,13 +26,17 @@ - * @dev: cec device - * @cdev: cec character device - * @minor: device node minor number -+ * @lock: lock to serialize open/release and registration - * @registered: the device was correctly registered - * @unregistered: the device was unregistered -+ * @lock_fhs: lock to control access to @fhs - * @fhs: the list of open filehandles (cec_fh) -- * @lock: lock to control access to this structure - * - * This structure represents a cec-related device node. - * -+ * To add or remove filehandles from @fhs the @lock must be taken first, -+ * followed by @lock_fhs. It is safe to access @fhs if either lock is held. -+ * - * The @parent is a physical device. It must be set by core or device drivers - * before registering the node. - */ -@@ -43,10 +47,13 @@ struct cec_devnode { + Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, + void *data, size_t data_len) + { +diff --git a/kernel/cred.c b/kernel/cred.c +index 1ae0b4948a5a8..933155c969227 100644 +--- a/kernel/cred.c ++++ b/kernel/cred.c +@@ -665,26 +665,20 @@ EXPORT_SYMBOL(cred_fscmp); - /* device info */ - int minor; -+ /* serialize open/release and registration */ -+ struct mutex lock; - bool registered; - bool unregistered; -+ /* protect access to fhs */ -+ struct mutex lock_fhs; - struct list_head fhs; -- struct mutex lock; - }; + int set_cred_ucounts(struct cred *new) + { +- struct task_struct *task = current; +- const struct cred *old = task->real_cred; + struct ucounts *new_ucounts, *old_ucounts = new->ucounts; - struct cec_adapter; -diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h -index 12955cb460d23..3b5986cee0739 100644 ---- a/include/media/videobuf2-core.h -+++ b/include/media/videobuf2-core.h -@@ -46,6 +46,7 @@ enum vb2_memory { +- if (new->user == old->user && new->user_ns == old->user_ns) +- return 0; +- + /* + * This optimization is needed because alloc_ucounts() uses locks + * for table lookups. + */ +- if (old_ucounts && old_ucounts->ns == new->user_ns && uid_eq(old_ucounts->uid, new->euid)) ++ if (old_ucounts->ns == new->user_ns && uid_eq(old_ucounts->uid, new->uid)) + return 0; - struct vb2_fileio_data; - struct vb2_threadio_data; -+struct vb2_buffer; +- if (!(new_ucounts = alloc_ucounts(new->user_ns, new->euid))) ++ if (!(new_ucounts = alloc_ucounts(new->user_ns, new->uid))) + return -EAGAIN; - /** - * struct vb2_mem_ops - memory handling/memory allocator operations. -@@ -53,10 +54,8 @@ struct vb2_threadio_data; - * return ERR_PTR() on failure or a pointer to allocator private, - * per-buffer data on success; the returned private structure - * will then be passed as @buf_priv argument to other ops in this -- * structure. Additional gfp_flags to use when allocating the -- * are also passed to this operation. These flags are from the -- * gfp_flags field of vb2_queue. The size argument to this function -- * shall be *page aligned*. -+ * structure. The size argument to this function shall be -+ * *page aligned*. - * @put: inform the allocator that the buffer will no longer be used; - * usually will result in the allocator freeing the buffer (if - * no other users of this buffer are present); the @buf_priv -@@ -117,31 +116,33 @@ struct vb2_threadio_data; - * map_dmabuf, unmap_dmabuf. - */ - struct vb2_mem_ops { -- void *(*alloc)(struct device *dev, unsigned long attrs, -- unsigned long size, -- enum dma_data_direction dma_dir, -- gfp_t gfp_flags); -+ void *(*alloc)(struct vb2_buffer *vb, -+ struct device *dev, -+ unsigned long size); - void (*put)(void *buf_priv); -- struct dma_buf *(*get_dmabuf)(void *buf_priv, unsigned long flags); -- -- void *(*get_userptr)(struct device *dev, unsigned long vaddr, -- unsigned long size, -- enum dma_data_direction dma_dir); -+ struct dma_buf *(*get_dmabuf)(struct vb2_buffer *vb, -+ void *buf_priv, -+ unsigned long flags); -+ -+ void *(*get_userptr)(struct vb2_buffer *vb, -+ struct device *dev, -+ unsigned long vaddr, -+ unsigned long size); - void (*put_userptr)(void *buf_priv); + new->ucounts = new_ucounts; +- if (old_ucounts) +- put_ucounts(old_ucounts); ++ put_ucounts(old_ucounts); - void (*prepare)(void *buf_priv); - void (*finish)(void *buf_priv); + return 0; + } +diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c +index da06a5553835b..7beceb447211d 100644 +--- a/kernel/debug/debug_core.c ++++ b/kernel/debug/debug_core.c +@@ -53,6 +53,7 @@ + #include <linux/vmacache.h> + #include <linux/rcupdate.h> + #include <linux/irq.h> ++#include <linux/security.h> + + #include <asm/cacheflush.h> + #include <asm/byteorder.h> +@@ -752,6 +753,29 @@ cpu_master_loop: + continue; + kgdb_connected = 0; + } else { ++ /* ++ * This is a brutal way to interfere with the debugger ++ * and prevent gdb being used to poke at kernel memory. ++ * This could cause trouble if lockdown is applied when ++ * there is already an active gdb session. For now the ++ * answer is simply "don't do that". Typically lockdown ++ * *will* be applied before the debug core gets started ++ * so only developers using kgdb for fairly advanced ++ * early kernel debug can be biten by this. Hopefully ++ * they are sophisticated enough to take care of ++ * themselves, especially with help from the lockdown ++ * message printed on the console! ++ */ ++ if (security_locked_down(LOCKDOWN_DBG_WRITE_KERNEL)) { ++ if (IS_ENABLED(CONFIG_KGDB_KDB)) { ++ /* Switch back to kdb if possible... */ ++ dbg_kdb_mode = 1; ++ continue; ++ } else { ++ /* ... otherwise just bail */ ++ break; ++ } ++ } + error = gdb_serial_stub(ks); + } -- void *(*attach_dmabuf)(struct device *dev, -+ void *(*attach_dmabuf)(struct vb2_buffer *vb, -+ struct device *dev, - struct dma_buf *dbuf, -- unsigned long size, -- enum dma_data_direction dma_dir); -+ unsigned long size); - void (*detach_dmabuf)(void *buf_priv); - int (*map_dmabuf)(void *buf_priv); - void (*unmap_dmabuf)(void *buf_priv); +diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c +index 1f9f0e47aedaa..10b454554ab03 100644 +--- a/kernel/debug/kdb/kdb_bt.c ++++ b/kernel/debug/kdb/kdb_bt.c +@@ -46,7 +46,7 @@ static void kdb_show_stack(struct task_struct *p, void *addr) + * btp <pid> Kernel stack for <pid> + * btt <address-expression> Kernel stack for task structure at + * <address-expression> +- * bta [DRSTCZEUIMA] All useful processes, optionally ++ * bta [state_chars>|A] All useful processes, optionally + * filtered by state + * btc [<cpu>] The current process on one cpu, + * default is all cpus +@@ -74,7 +74,7 @@ static void kdb_show_stack(struct task_struct *p, void *addr) + */ -- void *(*vaddr)(void *buf_priv); -- void *(*cookie)(void *buf_priv); -+ void *(*vaddr)(struct vb2_buffer *vb, void *buf_priv); -+ void *(*cookie)(struct vb2_buffer *vb, void *buf_priv); + static int +-kdb_bt1(struct task_struct *p, unsigned long mask, bool btaprompt) ++kdb_bt1(struct task_struct *p, const char *mask, bool btaprompt) + { + char ch; - unsigned int (*num_users)(void *buf_priv); +@@ -120,7 +120,7 @@ kdb_bt_cpu(unsigned long cpu) + return; + } -diff --git a/include/memory/renesas-rpc-if.h b/include/memory/renesas-rpc-if.h -index e3e770f76f349..15dd0076c2936 100644 ---- a/include/memory/renesas-rpc-if.h -+++ b/include/memory/renesas-rpc-if.h -@@ -59,12 +59,14 @@ struct rpcif_op { +- kdb_bt1(kdb_tsk, ~0UL, false); ++ kdb_bt1(kdb_tsk, "A", false); + } - struct rpcif { - struct device *dev; -+ void __iomem *base; - void __iomem *dirmap; - struct regmap *regmap; - struct reset_control *rstc; - size_t size; - enum rpcif_data_dir dir; - u8 bus_size; -+ u8 xfer_size; - void *buffer; - u32 xferlen; - u32 smcr; -diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h -index 03614de869425..6d0615140dbcf 100644 ---- a/include/net/9p/9p.h -+++ b/include/net/9p/9p.h -@@ -32,13 +32,13 @@ - */ + int +@@ -138,8 +138,8 @@ kdb_bt(int argc, const char **argv) + if (strcmp(argv[0], "bta") == 0) { + struct task_struct *g, *p; + unsigned long cpu; +- unsigned long mask = kdb_task_state_string(argc ? argv[1] : +- NULL); ++ const char *mask = argc ? argv[1] : kdbgetenv("PS"); ++ + if (argc == 0) + kdb_ps_suppressed(); + /* Run the active tasks first */ +@@ -167,7 +167,7 @@ kdb_bt(int argc, const char **argv) + return diag; + p = find_task_by_pid_ns(pid, &init_pid_ns); + if (p) +- return kdb_bt1(p, ~0UL, false); ++ return kdb_bt1(p, "A", false); + kdb_printf("No process with pid == %ld found\n", pid); + return 0; + } else if (strcmp(argv[0], "btt") == 0) { +@@ -176,7 +176,7 @@ kdb_bt(int argc, const char **argv) + diag = kdbgetularg((char *)argv[1], &addr); + if (diag) + return diag; +- return kdb_bt1((struct task_struct *)addr, ~0UL, false); ++ return kdb_bt1((struct task_struct *)addr, "A", false); + } else if (strcmp(argv[0], "btc") == 0) { + unsigned long cpu = ~0; + if (argc > 1) +@@ -212,7 +212,7 @@ kdb_bt(int argc, const char **argv) + kdb_show_stack(kdb_current_task, (void *)addr); + return 0; + } else { +- return kdb_bt1(kdb_current_task, ~0UL, false); ++ return kdb_bt1(kdb_current_task, "A", false); + } + } - enum p9_debug_flags { -- P9_DEBUG_ERROR = (1<<0), -- P9_DEBUG_9P = (1<<2), -+ P9_DEBUG_ERROR = (1<<0), -+ P9_DEBUG_9P = (1<<2), - P9_DEBUG_VFS = (1<<3), - P9_DEBUG_CONV = (1<<4), - P9_DEBUG_MUX = (1<<5), - P9_DEBUG_TRANS = (1<<6), -- P9_DEBUG_SLABS = (1<<7), -+ P9_DEBUG_SLABS = (1<<7), - P9_DEBUG_FCALL = (1<<8), - P9_DEBUG_FID = (1<<9), - P9_DEBUG_PKT = (1<<10), -@@ -317,8 +317,8 @@ enum p9_qid_t { - }; +diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c +index fa6deda894a17..ead4da9471270 100644 +--- a/kernel/debug/kdb/kdb_main.c ++++ b/kernel/debug/kdb/kdb_main.c +@@ -45,6 +45,7 @@ + #include <linux/proc_fs.h> + #include <linux/uaccess.h> + #include <linux/slab.h> ++#include <linux/security.h> + #include "kdb_private.h" - /* 9P Magic Numbers */ --#define P9_NOTAG (u16)(~0) --#define P9_NOFID (u32)(~0) -+#define P9_NOTAG ((u16)(~0)) -+#define P9_NOFID ((u32)(~0)) - #define P9_MAXWELEM 16 + #undef MODULE_PARAM_PREFIX +@@ -166,10 +167,62 @@ struct task_struct *kdb_curr_task(int cpu) + } - /* Minimal header size: size[4] type[1] tag[2] */ -diff --git a/include/net/9p/client.h b/include/net/9p/client.h -index e1c308d8d288e..7060de84c5593 100644 ---- a/include/net/9p/client.h -+++ b/include/net/9p/client.h -@@ -23,7 +23,7 @@ - * @p9_proto_2000L: 9P2000.L extension + /* +- * Check whether the flags of the current command and the permissions +- * of the kdb console has allow a command to be run. ++ * Update the permissions flags (kdb_cmd_enabled) to match the ++ * current lockdown state. ++ * ++ * Within this function the calls to security_locked_down() are "lazy". We ++ * avoid calling them if the current value of kdb_cmd_enabled already excludes ++ * flags that might be subject to lockdown. Additionally we deliberately check ++ * the lockdown flags independently (even though read lockdown implies write ++ * lockdown) since that results in both simpler code and clearer messages to ++ * the user on first-time debugger entry. ++ * ++ * The permission masks during a read+write lockdown permits the following ++ * flags: INSPECT, SIGNAL, REBOOT (and ALWAYS_SAFE). ++ * ++ * The INSPECT commands are not blocked during lockdown because they are ++ * not arbitrary memory reads. INSPECT covers the backtrace family (sometimes ++ * forcing them to have no arguments) and lsmod. These commands do expose ++ * some kernel state but do not allow the developer seated at the console to ++ * choose what state is reported. SIGNAL and REBOOT should not be controversial, ++ * given these are allowed for root during lockdown already. ++ */ ++static void kdb_check_for_lockdown(void) ++{ ++ const int write_flags = KDB_ENABLE_MEM_WRITE | ++ KDB_ENABLE_REG_WRITE | ++ KDB_ENABLE_FLOW_CTRL; ++ const int read_flags = KDB_ENABLE_MEM_READ | ++ KDB_ENABLE_REG_READ; ++ ++ bool need_to_lockdown_write = false; ++ bool need_to_lockdown_read = false; ++ ++ if (kdb_cmd_enabled & (KDB_ENABLE_ALL | write_flags)) ++ need_to_lockdown_write = ++ security_locked_down(LOCKDOWN_DBG_WRITE_KERNEL); ++ ++ if (kdb_cmd_enabled & (KDB_ENABLE_ALL | read_flags)) ++ need_to_lockdown_read = ++ security_locked_down(LOCKDOWN_DBG_READ_KERNEL); ++ ++ /* De-compose KDB_ENABLE_ALL if required */ ++ if (need_to_lockdown_write || need_to_lockdown_read) ++ if (kdb_cmd_enabled & KDB_ENABLE_ALL) ++ kdb_cmd_enabled = KDB_ENABLE_MASK & ~KDB_ENABLE_ALL; ++ ++ if (need_to_lockdown_write) ++ kdb_cmd_enabled &= ~write_flags; ++ ++ if (need_to_lockdown_read) ++ kdb_cmd_enabled &= ~read_flags; ++} ++ ++/* ++ * Check whether the flags of the current command, the permissions of the kdb ++ * console and the lockdown state allow a command to be run. */ +-static inline bool kdb_check_flags(kdb_cmdflags_t flags, int permissions, ++static bool kdb_check_flags(kdb_cmdflags_t flags, int permissions, + bool no_args) + { + /* permissions comes from userspace so needs massaging slightly */ +@@ -1180,6 +1233,9 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs, + kdb_curr_task(raw_smp_processor_id()); --enum p9_proto_versions{ -+enum p9_proto_versions { - p9_proto_legacy, - p9_proto_2000u, - p9_proto_2000L, -@@ -78,7 +78,7 @@ enum p9_req_status_t { - struct p9_req_t { - int status; - int t_err; -- struct kref refcount; -+ refcount_t refcount; - wait_queue_head_t wq; - struct p9_fcall tc; - struct p9_fcall rc; -@@ -219,36 +219,40 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, - u64 request_mask); - - int p9_client_mknod_dotl(struct p9_fid *oldfid, const char *name, int mode, -- dev_t rdev, kgid_t gid, struct p9_qid *); -+ dev_t rdev, kgid_t gid, struct p9_qid *qid); - int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode, -- kgid_t gid, struct p9_qid *); -+ kgid_t gid, struct p9_qid *qid); - int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status); - int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl); - void p9_fcall_fini(struct p9_fcall *fc); --struct p9_req_t *p9_tag_lookup(struct p9_client *, u16); -+struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag); - - static inline void p9_req_get(struct p9_req_t *r) + KDB_DEBUG_STATE("kdb_local 1", reason); ++ ++ kdb_check_for_lockdown(); ++ + kdb_go_count = 0; + if (reason == KDB_REASON_DEBUG) { + /* special case below */ +@@ -2203,8 +2259,8 @@ static void kdb_cpu_status(void) + state = 'D'; /* cpu is online but unresponsive */ + } else { + state = ' '; /* cpu is responding to kdb */ +- if (kdb_task_state_char(KDB_TSK(i)) == 'I') +- state = 'I'; /* idle task */ ++ if (kdb_task_state_char(KDB_TSK(i)) == '-') ++ state = '-'; /* idle task */ + } + if (state != prev_state) { + if (prev_state != '?') { +@@ -2271,37 +2327,30 @@ static int kdb_cpu(int argc, const char **argv) + void kdb_ps_suppressed(void) { -- kref_get(&r->refcount); -+ refcount_inc(&r->refcount); + int idle = 0, daemon = 0; +- unsigned long mask_I = kdb_task_state_string("I"), +- mask_M = kdb_task_state_string("M"); + unsigned long cpu; + const struct task_struct *p, *g; + for_each_online_cpu(cpu) { + p = kdb_curr_task(cpu); +- if (kdb_task_state(p, mask_I)) ++ if (kdb_task_state(p, "-")) + ++idle; + } + for_each_process_thread(g, p) { +- if (kdb_task_state(p, mask_M)) ++ if (kdb_task_state(p, "ims")) + ++daemon; + } + if (idle || daemon) { + if (idle) +- kdb_printf("%d idle process%s (state I)%s\n", ++ kdb_printf("%d idle process%s (state -)%s\n", + idle, idle == 1 ? "" : "es", + daemon ? " and " : ""); + if (daemon) +- kdb_printf("%d sleeping system daemon (state M) " ++ kdb_printf("%d sleeping system daemon (state [ims]) " + "process%s", daemon, + daemon == 1 ? "" : "es"); + kdb_printf(" suppressed,\nuse 'ps A' to see all.\n"); + } } - static inline int p9_req_try_get(struct p9_req_t *r) +-/* +- * kdb_ps - This function implements the 'ps' command which shows a +- * list of the active processes. +- * ps [DRSTCZEUIMA] All processes, optionally filtered by state +- */ + void kdb_ps1(const struct task_struct *p) { -- return kref_get_unless_zero(&r->refcount); -+ return refcount_inc_not_zero(&r->refcount); + int cpu; +@@ -2330,17 +2379,25 @@ void kdb_ps1(const struct task_struct *p) + } } --int p9_req_put(struct p9_req_t *r); -+int p9_req_put(struct p9_client *c, struct p9_req_t *r); ++/* ++ * kdb_ps - This function implements the 'ps' command which shows a ++ * list of the active processes. ++ * ++ * ps [<state_chars>] Show processes, optionally selecting only those whose ++ * state character is found in <state_chars>. ++ */ + static int kdb_ps(int argc, const char **argv) + { + struct task_struct *g, *p; +- unsigned long mask, cpu; ++ const char *mask; ++ unsigned long cpu; - void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status); + if (argc == 0) + kdb_ps_suppressed(); + kdb_printf("%-*s Pid Parent [*] cpu State %-*s Command\n", + (int)(2*sizeof(void *))+2, "Task Addr", + (int)(2*sizeof(void *))+2, "Thread"); +- mask = kdb_task_state_string(argc ? argv[1] : NULL); ++ mask = argc ? argv[1] : kdbgetenv("PS"); + /* Run the active tasks first */ + for_each_online_cpu(cpu) { + if (KDB_FLAG(CMD_INTERRUPT)) +@@ -2742,8 +2799,8 @@ static kdbtab_t maintab[] = { + }, + { .name = "bta", + .func = kdb_bt, +- .usage = "[D|R|S|T|C|Z|E|U|I|M|A]", +- .help = "Backtrace all processes matching state flag", ++ .usage = "[<state_chars>|A]", ++ .help = "Backtrace all processes whose state matches", + .flags = KDB_ENABLE_INSPECT, + }, + { .name = "btc", +@@ -2797,7 +2854,7 @@ static kdbtab_t maintab[] = { + }, + { .name = "ps", + .func = kdb_ps, +- .usage = "[<flags>|A]", ++ .usage = "[<state_chars>|A]", + .help = "Display active task list", + .flags = KDB_ENABLE_INSPECT, + }, +diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h +index 629590084a0dc..0d2f9feea0a46 100644 +--- a/kernel/debug/kdb/kdb_private.h ++++ b/kernel/debug/kdb/kdb_private.h +@@ -190,10 +190,8 @@ extern char kdb_grep_string[]; + extern int kdb_grep_leading; + extern int kdb_grep_trailing; + extern char *kdb_cmds[]; +-extern unsigned long kdb_task_state_string(const char *); + extern char kdb_task_state_char (const struct task_struct *); +-extern unsigned long kdb_task_state(const struct task_struct *p, +- unsigned long mask); ++extern bool kdb_task_state(const struct task_struct *p, const char *mask); + extern void kdb_ps_suppressed(void); + extern void kdb_ps1(const struct task_struct *p); + extern void kdb_send_sig(struct task_struct *p, int sig); +diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c +index 7507d9a8dc6ac..85cb51c4a17e6 100644 +--- a/kernel/debug/kdb/kdb_support.c ++++ b/kernel/debug/kdb/kdb_support.c +@@ -24,6 +24,7 @@ + #include <linux/uaccess.h> + #include <linux/kdb.h> + #include <linux/slab.h> ++#include <linux/ctype.h> + #include "kdb_private.h" --int p9_parse_header(struct p9_fcall *, int32_t *, int8_t *, int16_t *, int); --int p9stat_read(struct p9_client *, char *, int, struct p9_wstat *); --void p9stat_free(struct p9_wstat *); -+int p9_parse_header(struct p9_fcall *pdu, int32_t *size, int8_t *type, -+ int16_t *tag, int rewind); -+int p9stat_read(struct p9_client *clnt, char *buf, int len, -+ struct p9_wstat *st); -+void p9stat_free(struct p9_wstat *stbuf); + /* +@@ -290,7 +291,7 @@ int kdb_getarea_size(void *res, unsigned long addr, size_t size) + */ + int kdb_putarea_size(unsigned long addr, void *res, size_t size) + { +- int ret = copy_from_kernel_nofault((char *)addr, (char *)res, size); ++ int ret = copy_to_kernel_nofault((char *)addr, (char *)res, size); + if (ret) { + if (!KDB_STATE(SUPPRESS)) { + kdb_func_printf("Bad address 0x%lx\n", addr); +@@ -473,82 +474,7 @@ int kdb_putword(unsigned long addr, unsigned long word, size_t size) + return diag; + } - int p9_is_proto_dotu(struct p9_client *clnt); - int p9_is_proto_dotl(struct p9_client *clnt); --struct p9_fid *p9_client_xattrwalk(struct p9_fid *, const char *, u64 *); --int p9_client_xattrcreate(struct p9_fid *, const char *, u64, int); -+struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid, -+ const char *attr_name, u64 *attr_size); -+int p9_client_xattrcreate(struct p9_fid *fid, const char *name, -+ u64 attr_size, int flags); - int p9_client_readlink(struct p9_fid *fid, char **target); +-/* +- * kdb_task_state_string - Convert a string containing any of the +- * letters DRSTCZEUIMA to a mask for the process state field and +- * return the value. If no argument is supplied, return the mask +- * that corresponds to environment variable PS, DRSTCZEU by +- * default. +- * Inputs: +- * s String to convert +- * Returns: +- * Mask for process state. +- * Notes: +- * The mask folds data from several sources into a single long value, so +- * be careful not to overlap the bits. TASK_* bits are in the LSB, +- * special cases like UNRUNNABLE are in the MSB. As of 2.6.10-rc1 there +- * is no overlap between TASK_* and EXIT_* but that may not always be +- * true, so EXIT_* bits are shifted left 16 bits before being stored in +- * the mask. +- */ +- +-/* unrunnable is < 0 */ +-#define UNRUNNABLE (1UL << (8*sizeof(unsigned long) - 1)) +-#define RUNNING (1UL << (8*sizeof(unsigned long) - 2)) +-#define IDLE (1UL << (8*sizeof(unsigned long) - 3)) +-#define DAEMON (1UL << (8*sizeof(unsigned long) - 4)) - int p9_client_init(void); -diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h -index 3eb4261b29588..7215976116257 100644 ---- a/include/net/9p/transport.h -+++ b/include/net/9p/transport.h -@@ -40,14 +40,16 @@ struct p9_trans_module { - int maxsize; /* max message size of transport */ - int def; /* this transport should be default */ - struct module *owner; -- int (*create)(struct p9_client *, const char *, char *); -- void (*close) (struct p9_client *); -- int (*request) (struct p9_client *, struct p9_req_t *req); -- int (*cancel) (struct p9_client *, struct p9_req_t *req); -- int (*cancelled)(struct p9_client *, struct p9_req_t *req); -- int (*zc_request)(struct p9_client *, struct p9_req_t *, -- struct iov_iter *, struct iov_iter *, int , int, int); -- int (*show_options)(struct seq_file *, struct p9_client *); -+ int (*create)(struct p9_client *client, -+ const char *devname, char *args); -+ void (*close)(struct p9_client *client); -+ int (*request)(struct p9_client *client, struct p9_req_t *req); -+ int (*cancel)(struct p9_client *client, struct p9_req_t *req); -+ int (*cancelled)(struct p9_client *client, struct p9_req_t *req); -+ int (*zc_request)(struct p9_client *client, struct p9_req_t *req, -+ struct iov_iter *uidata, struct iov_iter *uodata, -+ int inlen, int outlen, int in_hdr_len); -+ int (*show_options)(struct seq_file *m, struct p9_client *client); - }; +-unsigned long kdb_task_state_string(const char *s) +-{ +- long res = 0; +- if (!s) { +- s = kdbgetenv("PS"); +- if (!s) +- s = "DRSTCZEU"; /* default value for ps */ +- } +- while (*s) { +- switch (*s) { +- case 'D': +- res |= TASK_UNINTERRUPTIBLE; +- break; +- case 'R': +- res |= RUNNING; +- break; +- case 'S': +- res |= TASK_INTERRUPTIBLE; +- break; +- case 'T': +- res |= TASK_STOPPED; +- break; +- case 'C': +- res |= TASK_TRACED; +- break; +- case 'Z': +- res |= EXIT_ZOMBIE << 16; +- break; +- case 'E': +- res |= EXIT_DEAD << 16; +- break; +- case 'U': +- res |= UNRUNNABLE; +- break; +- case 'I': +- res |= IDLE; +- break; +- case 'M': +- res |= DAEMON; +- break; +- case 'A': +- res = ~0UL; +- break; +- default: +- kdb_func_printf("unknown flag '%c' ignored\n", *s); +- break; +- } +- ++s; +- } +- return res; +-} - void v9fs_register_trans(struct p9_trans_module *m); -diff --git a/include/net/addrconf.h b/include/net/addrconf.h -index 78ea3e332688f..53627afab1044 100644 ---- a/include/net/addrconf.h -+++ b/include/net/addrconf.h -@@ -6,6 +6,8 @@ - #define RTR_SOLICITATION_INTERVAL (4*HZ) - #define RTR_SOLICITATION_MAX_INTERVAL (3600*HZ) /* 1 hour */ + /* + * kdb_task_state_char - Return the character that represents the task state. +@@ -559,7 +485,6 @@ unsigned long kdb_task_state_string(const char *s) + */ + char kdb_task_state_char (const struct task_struct *p) + { +- unsigned int p_state; + unsigned long tmp; + char state; + int cpu; +@@ -568,25 +493,18 @@ char kdb_task_state_char (const struct task_struct *p) + copy_from_kernel_nofault(&tmp, (char *)p, sizeof(unsigned long))) + return 'E'; -+#define MIN_VALID_LIFETIME (2*3600) /* 2 hours */ +- cpu = kdb_process_cpu(p); +- p_state = READ_ONCE(p->__state); +- state = (p_state == 0) ? 'R' : +- (p_state < 0) ? 'U' : +- (p_state & TASK_UNINTERRUPTIBLE) ? 'D' : +- (p_state & TASK_STOPPED) ? 'T' : +- (p_state & TASK_TRACED) ? 'C' : +- (p->exit_state & EXIT_ZOMBIE) ? 'Z' : +- (p->exit_state & EXIT_DEAD) ? 'E' : +- (p_state & TASK_INTERRUPTIBLE) ? 'S' : '?'; ++ state = task_state_to_char((struct task_struct *) p); + - #define TEMP_VALID_LIFETIME (7*86400) - #define TEMP_PREFERRED_LIFETIME (86400) - #define REGEN_MAX_RETRY (3) -@@ -107,8 +109,6 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, - int ipv6_dev_get_saddr(struct net *net, const struct net_device *dev, - const struct in6_addr *daddr, unsigned int srcprefs, - struct in6_addr *saddr); --int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, -- u32 banned_flags); - int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, - u32 banned_flags); - bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, -@@ -403,6 +403,9 @@ static inline bool ip6_ignore_linkdown(const struct net_device *dev) + if (is_idle_task(p)) { + /* Idle task. Is it really idle, apart from the kdb + * interrupt? */ ++ cpu = kdb_process_cpu(p); + if (!kdb_task_has_cpu(p) || kgdb_info[cpu].irq_depth == 1) { + if (cpu != kdb_initial_cpu) +- state = 'I'; /* idle task */ ++ state = '-'; /* idle task */ + } +- } else if (!p->mm && state == 'S') { +- state = 'M'; /* sleeping system daemon */ ++ } else if (!p->mm && strchr("IMS", state)) { ++ state = tolower(state); /* sleeping system daemon */ + } + return state; + } +@@ -596,14 +514,28 @@ char kdb_task_state_char (const struct task_struct *p) + * given by the mask. + * Inputs: + * p struct task for the process +- * mask mask from kdb_task_state_string to select processes ++ * mask set of characters used to select processes; both NULL ++ * and the empty string mean adopt a default filter, which ++ * is to suppress sleeping system daemons and the idle tasks + * Returns: + * True if the process matches at least one criteria defined by the mask. + */ +-unsigned long kdb_task_state(const struct task_struct *p, unsigned long mask) ++bool kdb_task_state(const struct task_struct *p, const char *mask) { - const struct inet6_dev *idev = __in6_dev_get(dev); - -+ if (unlikely(!idev)) +- char state[] = { kdb_task_state_char(p), '\0' }; +- return (mask & kdb_task_state_string(state)) != 0; ++ char state = kdb_task_state_char(p); ++ ++ /* If there is no mask, then we will filter code that runs when the ++ * scheduler is idling and any system daemons that are currently ++ * sleeping. ++ */ ++ if (!mask || mask[0] == '\0') ++ return !strchr("-ims", state); ++ ++ /* A is a special case that matches all states */ ++ if (strchr(mask, 'A')) + return true; + - return !!idev->cnf.ignore_routes_with_linkdown; ++ return strchr(mask, state); } -diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h -index ab207677e0a8b..f742e50207fbd 100644 ---- a/include/net/af_vsock.h -+++ b/include/net/af_vsock.h -@@ -205,7 +205,8 @@ struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); - struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, - struct sockaddr_vm *dst); - void vsock_remove_sock(struct vsock_sock *vsk); --void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); -+void vsock_for_each_connected_socket(struct vsock_transport *transport, -+ void (*fn)(struct sock *sk)); - int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk); - bool vsock_find_cid(unsigned int cid); + /* Maintain a small stack of kdb_flags to allow recursion without disturbing +diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c +index 7a14ca29c3778..2caafd13f8aac 100644 +--- a/kernel/dma/debug.c ++++ b/kernel/dma/debug.c +@@ -448,7 +448,7 @@ void debug_dma_dump_mappings(struct device *dev) + * other hand, consumes a single dma_debug_entry, but inserts 'nents' + * entries into the tree. + */ +-static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT); ++static RADIX_TREE(dma_active_cacheline, GFP_ATOMIC); + static DEFINE_SPINLOCK(radix_lock); + #define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1) + #define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT) +@@ -564,7 +564,7 @@ static void add_dma_entry(struct dma_debug_entry *entry, unsigned long attrs) -diff --git a/include/net/arp.h b/include/net/arp.h -index 4950191f6b2bf..4a23a97195f33 100644 ---- a/include/net/arp.h -+++ b/include/net/arp.h -@@ -71,6 +71,7 @@ void arp_send(int type, int ptype, __be32 dest_ip, - const unsigned char *src_hw, const unsigned char *th); - int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir); - void arp_ifdown(struct net_device *dev); -+int arp_invalidate(struct net_device *dev, __be32 ip, bool force); + rc = active_cacheline_insert(entry); + if (rc == -ENOMEM) { +- pr_err("cacheline tracking ENOMEM, dma-debug disabled\n"); ++ pr_err_once("cacheline tracking ENOMEM, dma-debug disabled\n"); + global_disable = true; + } else if (rc == -EEXIST && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { + err_printk(entry->dev, entry, +@@ -927,7 +927,7 @@ static __init int dma_debug_cmdline(char *str) + global_disable = true; + } - struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, - struct net_device *dev, __be32 src_ip, -diff --git a/include/net/ax25.h b/include/net/ax25.h -index 8b7eb46ad72d8..aadff553e4b73 100644 ---- a/include/net/ax25.h -+++ b/include/net/ax25.h -@@ -236,6 +236,7 @@ typedef struct ax25_dev { - #if defined(CONFIG_AX25_DAMA_SLAVE) || defined(CONFIG_AX25_DAMA_MASTER) - ax25_dama_info dama; - #endif -+ refcount_t refcount; - } ax25_dev; +- return 0; ++ return 1; + } - typedef struct ax25_cb { -@@ -290,6 +291,17 @@ static __inline__ void ax25_cb_put(ax25_cb *ax25) - } + static __init int dma_debug_entries_cmdline(char *str) +@@ -936,7 +936,7 @@ static __init int dma_debug_entries_cmdline(char *str) + return -EINVAL; + if (!get_option(&str, &nr_prealloc_entries)) + nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES; +- return 0; ++ return 1; } -+static inline void ax25_dev_hold(ax25_dev *ax25_dev) + __setup("dma_debug=", dma_debug_cmdline); +diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c +index 4c6c5e0635e34..ed5dd9e023241 100644 +--- a/kernel/dma/direct.c ++++ b/kernel/dma/direct.c +@@ -75,6 +75,25 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) + min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit); + } + ++static int dma_set_decrypted(struct device *dev, void *vaddr, size_t size) +{ -+ refcount_inc(&ax25_dev->refcount); ++ if (!force_dma_unencrypted(dev)) ++ return 0; ++ return set_memory_decrypted((unsigned long)vaddr, PFN_UP(size)); +} + -+static inline void ax25_dev_put(ax25_dev *ax25_dev) ++static int dma_set_encrypted(struct device *dev, void *vaddr, size_t size) +{ -+ if (refcount_dec_and_test(&ax25_dev->refcount)) { -+ kfree(ax25_dev); -+ } ++ int ret; ++ ++ if (!force_dma_unencrypted(dev)) ++ return 0; ++ ret = set_memory_encrypted((unsigned long)vaddr, PFN_UP(size)); ++ if (ret) ++ pr_warn_ratelimited("leaking DMA memory that can't be re-encrypted\n"); ++ return ret; +} - static inline __be16 ax25_type_trans(struct sk_buff *skb, struct net_device *dev) - { - skb->dev = dev; -diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h -index 9125effbf4483..355835639ae58 100644 ---- a/include/net/bluetooth/bluetooth.h -+++ b/include/net/bluetooth/bluetooth.h -@@ -180,19 +180,21 @@ void bt_err_ratelimited(const char *fmt, ...); - #define BT_DBG(fmt, ...) pr_debug(fmt "\n", ##__VA_ARGS__) - #endif - -+#define bt_dev_name(hdev) ((hdev) ? (hdev)->name : "null") + - #define bt_dev_info(hdev, fmt, ...) \ -- BT_INFO("%s: " fmt, (hdev)->name, ##__VA_ARGS__) -+ BT_INFO("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) - #define bt_dev_warn(hdev, fmt, ...) \ -- BT_WARN("%s: " fmt, (hdev)->name, ##__VA_ARGS__) -+ BT_WARN("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) - #define bt_dev_err(hdev, fmt, ...) \ -- BT_ERR("%s: " fmt, (hdev)->name, ##__VA_ARGS__) -+ BT_ERR("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) - #define bt_dev_dbg(hdev, fmt, ...) \ -- BT_DBG("%s: " fmt, (hdev)->name, ##__VA_ARGS__) -+ BT_DBG("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) + static void __dma_direct_free_pages(struct device *dev, struct page *page, + size_t size) + { +@@ -85,7 +104,7 @@ static void __dma_direct_free_pages(struct device *dev, struct page *page, + } - #define bt_dev_warn_ratelimited(hdev, fmt, ...) \ -- bt_warn_ratelimited("%s: " fmt, (hdev)->name, ##__VA_ARGS__) -+ bt_warn_ratelimited("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) - #define bt_dev_err_ratelimited(hdev, fmt, ...) \ -- bt_err_ratelimited("%s: " fmt, (hdev)->name, ##__VA_ARGS__) -+ bt_err_ratelimited("%s: " fmt, bt_dev_name(hdev), ##__VA_ARGS__) + static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, +- gfp_t gfp) ++ gfp_t gfp, bool allow_highmem) + { + int node = dev_to_node(dev); + struct page *page = NULL; +@@ -106,9 +125,12 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, + } - /* Connection and socket states */ - enum { -@@ -420,6 +422,71 @@ out: - return NULL; + page = dma_alloc_contiguous(dev, size, gfp); +- if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { +- dma_free_contiguous(dev, page, size); +- page = NULL; ++ if (page) { ++ if (!dma_coherent_ok(dev, page_to_phys(page), size) || ++ (!allow_highmem && PageHighMem(page))) { ++ dma_free_contiguous(dev, page, size); ++ page = NULL; ++ } + } + again: + if (!page) +@@ -149,29 +171,37 @@ static void *dma_direct_alloc_from_pool(struct device *dev, size_t size, + return ret; } -+/* Shall not be called with lock_sock held */ -+static inline struct sk_buff *bt_skb_sendmsg(struct sock *sk, -+ struct msghdr *msg, -+ size_t len, size_t mtu, -+ size_t headroom, size_t tailroom) -+{ -+ struct sk_buff *skb; -+ size_t size = min_t(size_t, len, mtu); -+ int err; -+ -+ skb = bt_skb_send_alloc(sk, size + headroom + tailroom, -+ msg->msg_flags & MSG_DONTWAIT, &err); -+ if (!skb) -+ return ERR_PTR(err); -+ -+ skb_reserve(skb, headroom); -+ skb_tailroom_reserve(skb, mtu, tailroom); -+ -+ if (!copy_from_iter_full(skb_put(skb, size), size, &msg->msg_iter)) { -+ kfree_skb(skb); -+ return ERR_PTR(-EFAULT); -+ } -+ -+ skb->priority = sk->sk_priority; -+ -+ return skb; -+} -+ -+/* Similar to bt_skb_sendmsg but can split the msg into multiple fragments -+ * accourding to the MTU. -+ */ -+static inline struct sk_buff *bt_skb_sendmmsg(struct sock *sk, -+ struct msghdr *msg, -+ size_t len, size_t mtu, -+ size_t headroom, size_t tailroom) ++static void *dma_direct_alloc_no_mapping(struct device *dev, size_t size, ++ dma_addr_t *dma_handle, gfp_t gfp) +{ -+ struct sk_buff *skb, **frag; -+ -+ skb = bt_skb_sendmsg(sk, msg, len, mtu, headroom, tailroom); -+ if (IS_ERR_OR_NULL(skb)) -+ return skb; -+ -+ len -= skb->len; -+ if (!len) -+ return skb; -+ -+ /* Add remaining data over MTU as continuation fragments */ -+ frag = &skb_shinfo(skb)->frag_list; -+ while (len) { -+ struct sk_buff *tmp; -+ -+ tmp = bt_skb_sendmsg(sk, msg, len, mtu, headroom, tailroom); -+ if (IS_ERR(tmp)) { -+ return skb; -+ } ++ struct page *page; + -+ len -= tmp->len; ++ page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO, true); ++ if (!page) ++ return NULL; + -+ *frag = tmp; -+ frag = &(*frag)->next; -+ } ++ /* remove any dirty cache lines on the kernel alias */ ++ if (!PageHighMem(page)) ++ arch_dma_prep_coherent(page, size); + -+ return skb; ++ /* return the page pointer as the opaque cookie */ ++ *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); ++ return page; +} + - int bt_to_errno(u16 code); + void *dma_direct_alloc(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) + { + struct page *page; + void *ret; +- int err; - void hci_sock_set_flag(struct sock *sk, int nr); -diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h -index b80415011dcd5..9ce46cb8564d6 100644 ---- a/include/net/bluetooth/hci.h -+++ b/include/net/bluetooth/hci.h -@@ -246,6 +246,15 @@ enum { - * HCI after resume. - */ - HCI_QUIRK_NO_SUSPEND_NOTIFIER, -+ -+ /* -+ * When this quirk is set, LE tx power is not queried on startup -+ * and the min/max tx power values default to HCI_TX_POWER_INVALID. -+ * -+ * This quirk can be set before hci_register_dev is called or -+ * during the hdev->setup vendor callback. -+ */ -+ HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER, - }; + size = PAGE_ALIGN(size); + if (attrs & DMA_ATTR_NO_WARN) + gfp |= __GFP_NOWARN; - /* HCI device flags */ -diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h -index a7360c8c72f82..3da5cfcf84c1d 100644 ---- a/include/net/bluetooth/hci_core.h -+++ b/include/net/bluetooth/hci_core.h -@@ -35,6 +35,9 @@ - /* HCI priority */ - #define HCI_PRIO_MAX 7 + if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && +- !force_dma_unencrypted(dev) && !is_swiotlb_for_alloc(dev)) { +- page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO); +- if (!page) +- return NULL; +- /* remove any dirty cache lines on the kernel alias */ +- if (!PageHighMem(page)) +- arch_dma_prep_coherent(page, size); +- *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); +- /* return the page pointer as the opaque cookie */ +- return page; +- } ++ !force_dma_unencrypted(dev) && !is_swiotlb_for_alloc(dev)) ++ return dma_direct_alloc_no_mapping(dev, size, dma_handle, gfp); -+/* HCI maximum id value */ -+#define HCI_MAX_ID 10000 -+ - /* HCI Core structures */ - struct inquiry_data { - bdaddr_t bdaddr; -diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h -index 3c4f550e5a8b7..2f766e3437ce2 100644 ---- a/include/net/bluetooth/l2cap.h -+++ b/include/net/bluetooth/l2cap.h -@@ -847,6 +847,7 @@ enum { - }; + if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && + !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && +@@ -200,7 +230,7 @@ void *dma_direct_alloc(struct device *dev, size_t size, + return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp); - void l2cap_chan_hold(struct l2cap_chan *c); -+struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c); - void l2cap_chan_put(struct l2cap_chan *c); + /* we always manually zero the memory once we are done */ +- page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO); ++ page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO, true); + if (!page) + return NULL; - static inline void l2cap_chan_lock(struct l2cap_chan *chan) -diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h -index 38785d48baff9..f2273bd5a4c58 100644 ---- a/include/net/bond_3ad.h -+++ b/include/net/bond_3ad.h -@@ -15,8 +15,6 @@ - #define PKT_TYPE_LACPDU cpu_to_be16(ETH_P_SLOW) - #define AD_TIMER_INTERVAL 100 /*msec*/ +@@ -216,12 +246,6 @@ void *dma_direct_alloc(struct device *dev, size_t size, + __builtin_return_address(0)); + if (!ret) + goto out_free_pages; +- if (force_dma_unencrypted(dev)) { +- err = set_memory_decrypted((unsigned long)ret, +- 1 << get_order(size)); +- if (err) +- goto out_free_pages; +- } + memset(ret, 0, size); + goto done; + } +@@ -238,13 +262,8 @@ void *dma_direct_alloc(struct device *dev, size_t size, + } --#define MULTICAST_LACPDU_ADDR {0x01, 0x80, 0xC2, 0x00, 0x00, 0x02} + ret = page_address(page); +- if (force_dma_unencrypted(dev)) { +- err = set_memory_decrypted((unsigned long)ret, +- 1 << get_order(size)); +- if (err) +- goto out_free_pages; +- } - - #define AD_LACP_SLOW 0 - #define AD_LACP_FAST 1 ++ if (dma_set_decrypted(dev, ret, size)) ++ goto out_free_pages; + memset(ret, 0, size); -@@ -262,7 +260,7 @@ struct ad_system { - struct ad_bond_info { - struct ad_system system; /* 802.3ad system structure */ - struct bond_3ad_stats stats; -- u32 agg_select_timer; /* Timer to select aggregator after all adapter's hand shakes */ -+ atomic_t agg_select_timer; /* Timer to select aggregator after all adapter's hand shakes */ - u16 aggregator_identifier; - }; + if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && +@@ -259,13 +278,8 @@ done: + return ret; -diff --git a/include/net/bond_alb.h b/include/net/bond_alb.h -index f6af76c87a6c3..191c36afa1f4a 100644 ---- a/include/net/bond_alb.h -+++ b/include/net/bond_alb.h -@@ -126,7 +126,7 @@ struct tlb_slave_info { - struct alb_bond_info { - struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */ - u32 unbalanced_load; -- int tx_rebalance_counter; -+ atomic_t tx_rebalance_counter; - int lp_counter; - /* -------- rlb parameters -------- */ - int rlb_enabled; -diff --git a/include/net/bonding.h b/include/net/bonding.h -index 15e083e18f75f..8c18c6b01634c 100644 ---- a/include/net/bonding.h -+++ b/include/net/bonding.h -@@ -757,6 +757,9 @@ extern struct rtnl_link_ops bond_link_ops; - /* exported from bond_sysfs_slave.c */ - extern const struct sysfs_ops slave_sysfs_ops; + out_encrypt_pages: +- if (force_dma_unencrypted(dev)) { +- err = set_memory_encrypted((unsigned long)page_address(page), +- 1 << get_order(size)); +- /* If memory cannot be re-encrypted, it must be leaked */ +- if (err) +- return NULL; +- } ++ if (dma_set_encrypted(dev, page_address(page), size)) ++ return NULL; + out_free_pages: + __dma_direct_free_pages(dev, page, size); + return NULL; +@@ -304,13 +318,14 @@ void dma_direct_free(struct device *dev, size_t size, + dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size))) + return; -+/* exported from bond_3ad.c */ -+extern const u8 lacpdu_mcast_addr[]; -+ - static inline netdev_tx_t bond_tx_drop(struct net_device *dev, struct sk_buff *skb) - { - atomic_long_inc(&dev->tx_dropped); -diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h -index 40296ed976a97..3459a04a3d61c 100644 ---- a/include/net/busy_poll.h -+++ b/include/net/busy_poll.h -@@ -33,7 +33,7 @@ extern unsigned int sysctl_net_busy_poll __read_mostly; +- if (force_dma_unencrypted(dev)) +- set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order); +- +- if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) ++ if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) { + vunmap(cpu_addr); +- else if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_CLEAR_UNCACHED)) +- arch_dma_clear_uncached(cpu_addr, size); ++ } else { ++ if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_CLEAR_UNCACHED)) ++ arch_dma_clear_uncached(cpu_addr, size); ++ if (dma_set_encrypted(dev, cpu_addr, size)) ++ return; ++ } - static inline bool net_busy_loop_on(void) - { -- return sysctl_net_busy_poll; -+ return READ_ONCE(sysctl_net_busy_poll); + __dma_direct_free_pages(dev, dma_direct_to_page(dev, dma_addr), size); } +@@ -326,26 +341,13 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size, + !is_swiotlb_for_alloc(dev)) + return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp); - static inline bool sk_can_busy_loop(const struct sock *sk) -diff --git a/include/net/checksum.h b/include/net/checksum.h -index 5b96d5bd6e545..d3b5d368a0caa 100644 ---- a/include/net/checksum.h -+++ b/include/net/checksum.h -@@ -22,7 +22,7 @@ - #include <asm/checksum.h> - - #ifndef _HAVE_ARCH_COPY_AND_CSUM_FROM_USER --static inline -+static __always_inline - __wsum csum_and_copy_from_user (const void __user *src, void *dst, - int len) - { -@@ -33,7 +33,7 @@ __wsum csum_and_copy_from_user (const void __user *src, void *dst, - #endif - - #ifndef HAVE_CSUM_COPY_USER --static __inline__ __wsum csum_and_copy_to_user -+static __always_inline __wsum csum_and_copy_to_user - (const void *src, void __user *dst, int len) - { - __wsum sum = csum_partial(src, len, ~0U); -@@ -45,7 +45,7 @@ static __inline__ __wsum csum_and_copy_to_user - #endif +- page = __dma_direct_alloc_pages(dev, size, gfp); ++ page = __dma_direct_alloc_pages(dev, size, gfp, false); + if (!page) + return NULL; +- if (PageHighMem(page)) { +- /* +- * Depending on the cma= arguments and per-arch setup +- * dma_alloc_contiguous could return highmem pages. +- * Without remapping there is no way to return them here, +- * so log an error and fail. +- */ +- dev_info(dev, "Rejecting highmem page from CMA.\n"); +- goto out_free_pages; +- } - #ifndef _HAVE_ARCH_CSUM_AND_COPY --static inline __wsum -+static __always_inline __wsum - csum_partial_copy_nocheck(const void *src, void *dst, int len) + ret = page_address(page); +- if (force_dma_unencrypted(dev)) { +- if (set_memory_decrypted((unsigned long)ret, +- 1 << get_order(size))) +- goto out_free_pages; +- } ++ if (dma_set_decrypted(dev, ret, size)) ++ goto out_free_pages; + memset(ret, 0, size); + *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); + return page; +@@ -358,7 +360,6 @@ void dma_direct_free_pages(struct device *dev, size_t size, + struct page *page, dma_addr_t dma_addr, + enum dma_data_direction dir) { - memcpy(dst, src, len); -@@ -54,7 +54,7 @@ csum_partial_copy_nocheck(const void *src, void *dst, int len) - #endif +- unsigned int page_order = get_order(size); + void *vaddr = page_address(page); - #ifndef HAVE_ARCH_CSUM_ADD --static inline __wsum csum_add(__wsum csum, __wsum addend) -+static __always_inline __wsum csum_add(__wsum csum, __wsum addend) - { - u32 res = (__force u32)csum; - res += (__force u32)addend; -@@ -62,12 +62,12 @@ static inline __wsum csum_add(__wsum csum, __wsum addend) - } - #endif + /* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */ +@@ -366,9 +367,8 @@ void dma_direct_free_pages(struct device *dev, size_t size, + dma_free_from_pool(dev, vaddr, size)) + return; --static inline __wsum csum_sub(__wsum csum, __wsum addend) -+static __always_inline __wsum csum_sub(__wsum csum, __wsum addend) - { - return csum_add(csum, ~addend); +- if (force_dma_unencrypted(dev)) +- set_memory_encrypted((unsigned long)vaddr, 1 << page_order); +- ++ if (dma_set_encrypted(dev, vaddr, size)) ++ return; + __dma_direct_free_pages(dev, page, size); } --static inline __sum16 csum16_add(__sum16 csum, __be16 addend) -+static __always_inline __sum16 csum16_add(__sum16 csum, __be16 addend) - { - u16 res = (__force u16)csum; - -@@ -75,12 +75,12 @@ static inline __sum16 csum16_add(__sum16 csum, __be16 addend) - return (__force __sum16)(res + (res < (__force u16)addend)); - } +diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h +index 4632b0f4f72eb..8a6cd53dbe8ce 100644 +--- a/kernel/dma/direct.h ++++ b/kernel/dma/direct.h +@@ -114,6 +114,7 @@ static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, + dma_direct_sync_single_for_cpu(dev, addr, size, dir); --static inline __sum16 csum16_sub(__sum16 csum, __be16 addend) -+static __always_inline __sum16 csum16_sub(__sum16 csum, __be16 addend) - { - return csum16_add(csum, ~addend); + if (unlikely(is_swiotlb_buffer(dev, phys))) +- swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); ++ swiotlb_tbl_unmap_single(dev, phys, size, dir, ++ attrs | DMA_ATTR_SKIP_CPU_SYNC); } + #endif /* _KERNEL_DMA_DIRECT_H */ +diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c +index 8349a9f2c3453..9478eccd1c8e6 100644 +--- a/kernel/dma/mapping.c ++++ b/kernel/dma/mapping.c +@@ -296,10 +296,6 @@ dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr, + if (WARN_ON_ONCE(!dev->dma_mask)) + return DMA_MAPPING_ERROR; --static inline __wsum csum_shift(__wsum sum, int offset) -+static __always_inline __wsum csum_shift(__wsum sum, int offset) - { - /* rotate sum to align it with a 16b boundary */ - if (offset & 1) -@@ -88,42 +88,43 @@ static inline __wsum csum_shift(__wsum sum, int offset) - return sum; +- /* Don't allow RAM to be mapped */ +- if (WARN_ON_ONCE(pfn_valid(PHYS_PFN(phys_addr)))) +- return DMA_MAPPING_ERROR; +- + if (dma_map_direct(dev, ops)) + addr = dma_direct_map_resource(dev, phys_addr, size, dir, attrs); + else if (ops->map_resource) +diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c +index 5f84e6cdb78ea..4d40dcce7604b 100644 +--- a/kernel/dma/pool.c ++++ b/kernel/dma/pool.c +@@ -203,7 +203,7 @@ static int __init dma_atomic_pool_init(void) + GFP_KERNEL); + if (!atomic_pool_kernel) + ret = -ENOMEM; +- if (IS_ENABLED(CONFIG_ZONE_DMA)) { ++ if (has_managed_dma()) { + atomic_pool_dma = __dma_atomic_pool_init(atomic_pool_size, + GFP_KERNEL | GFP_DMA); + if (!atomic_pool_dma) +@@ -226,7 +226,7 @@ static inline struct gen_pool *dma_guess_pool(struct gen_pool *prev, gfp_t gfp) + if (prev == NULL) { + if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32)) + return atomic_pool_dma32; +- if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp & GFP_DMA)) ++ if (atomic_pool_dma && (gfp & GFP_DMA)) + return atomic_pool_dma; + return atomic_pool_kernel; + } +diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c +index 87c40517e8227..a9849670bdb54 100644 +--- a/kernel/dma/swiotlb.c ++++ b/kernel/dma/swiotlb.c +@@ -435,7 +435,10 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size + } } --static inline __wsum -+static __always_inline __wsum - csum_block_add(__wsum csum, __wsum csum2, int offset) - { - return csum_add(csum, csum_shift(csum2, offset)); - } +-#define slot_addr(start, idx) ((start) + ((idx) << IO_TLB_SHIFT)) ++static inline phys_addr_t slot_addr(phys_addr_t start, phys_addr_t idx) ++{ ++ return start + (idx << IO_TLB_SHIFT); ++} --static inline __wsum -+static __always_inline __wsum - csum_block_add_ext(__wsum csum, __wsum csum2, int offset, int len) + /* + * Carefully handle integer overflow which can occur when boundary_mask == ~0UL. +@@ -459,7 +462,7 @@ static unsigned int wrap_index(struct io_tlb_mem *mem, unsigned int index) + * allocate a buffer from that IO TLB pool. + */ + static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, +- size_t alloc_size) ++ size_t alloc_size, unsigned int alloc_align_mask) { - return csum_block_add(csum, csum2, offset); - } + struct io_tlb_mem *mem = dev->dma_io_tlb_mem; + unsigned long boundary_mask = dma_get_seg_boundary(dev); +@@ -483,6 +486,7 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, + stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1; + if (alloc_size >= PAGE_SIZE) + stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT)); ++ stride = max(stride, (alloc_align_mask >> IO_TLB_SHIFT) + 1); --static inline __wsum -+static __always_inline __wsum - csum_block_sub(__wsum csum, __wsum csum2, int offset) - { - return csum_block_add(csum, ~csum2, offset); - } + spin_lock_irqsave(&mem->lock, flags); + if (unlikely(nslots > mem->nslabs - mem->used)) +@@ -541,7 +545,8 @@ found: --static inline __wsum csum_unfold(__sum16 n) -+static __always_inline __wsum csum_unfold(__sum16 n) + phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, + size_t mapping_size, size_t alloc_size, +- enum dma_data_direction dir, unsigned long attrs) ++ unsigned int alloc_align_mask, enum dma_data_direction dir, ++ unsigned long attrs) { - return (__force __wsum)n; - } + struct io_tlb_mem *mem = dev->dma_io_tlb_mem; + unsigned int offset = swiotlb_align_offset(dev, orig_addr); +@@ -549,7 +554,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, + int index; + phys_addr_t tlb_addr; --static inline __wsum csum_partial_ext(const void *buff, int len, __wsum sum) -+static __always_inline -+__wsum csum_partial_ext(const void *buff, int len, __wsum sum) - { - return csum_partial(buff, len, sum); - } +- if (!mem) ++ if (!mem || !mem->nslabs) + panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); - #define CSUM_MANGLED_0 ((__force __sum16)0xffff) + if (mem_encrypt_active()) +@@ -561,7 +566,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, + return (phys_addr_t)DMA_MAPPING_ERROR; + } --static inline void csum_replace_by_diff(__sum16 *sum, __wsum diff) -+static __always_inline void csum_replace_by_diff(__sum16 *sum, __wsum diff) - { - *sum = csum_fold(csum_add(diff, ~csum_unfold(*sum))); +- index = swiotlb_find_slots(dev, orig_addr, alloc_size + offset); ++ index = swiotlb_find_slots(dev, orig_addr, ++ alloc_size + offset, alloc_align_mask); + if (index == -1) { + if (!(attrs & DMA_ATTR_NO_WARN)) + dev_warn_ratelimited(dev, +@@ -578,9 +584,14 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, + for (i = 0; i < nr_slots(alloc_size + offset); i++) + mem->slots[index + i].orig_addr = slot_addr(orig_addr, i); + tlb_addr = slot_addr(mem->start, index) + offset; +- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && +- (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) +- swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE); ++ /* ++ * When dir == DMA_FROM_DEVICE we could omit the copy from the orig ++ * to the tlb buffer, if we knew for sure the device will ++ * overwirte the entire current content. But we don't. Thus ++ * unconditional bounce may prevent leaking swiotlb content (i.e. ++ * kernel memory) to user-space. ++ */ ++ swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE); + return tlb_addr; } --static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) -+static __always_inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) - { - __wsum tmp = csum_sub(~csum_unfold(*sum), (__force __wsum)from); +@@ -675,7 +686,7 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size, + trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size, + swiotlb_force); -@@ -136,11 +137,16 @@ static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) - * m : old value of a 16bit field - * m' : new value of a 16bit field - */ --static inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new) -+static __always_inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new) +- swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, dir, ++ swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, 0, dir, + attrs); + if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR) + return DMA_MAPPING_ERROR; +@@ -698,7 +709,18 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size, + + size_t swiotlb_max_mapping_size(struct device *dev) { - *sum = ~csum16_add(csum16_sub(~(*sum), old), new); +- return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE; ++ int min_align_mask = dma_get_min_align_mask(dev); ++ int min_align = 0; ++ ++ /* ++ * swiotlb_find_slots() skips slots according to ++ * min align mask. This affects max mapping size. ++ * Take it into acount here. ++ */ ++ if (min_align_mask) ++ min_align = roundup(min_align_mask, IO_TLB_SIZE); ++ ++ return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE - min_align; } -+static inline void csum_replace(__wsum *csum, __wsum old, __wsum new) -+{ -+ *csum = csum_add(csum_sub(*csum, old), new); -+} -+ - struct sk_buff; - void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, - __be32 from, __be32 to, bool pseudohdr); -@@ -150,16 +156,16 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, - void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, - __wsum diff, bool pseudohdr); + bool is_swiotlb_active(struct device *dev) +@@ -759,7 +781,7 @@ struct page *swiotlb_alloc(struct device *dev, size_t size) + if (!mem) + return NULL; --static inline void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb, -- __be16 from, __be16 to, -- bool pseudohdr) -+static __always_inline -+void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb, -+ __be16 from, __be16 to, bool pseudohdr) - { - inet_proto_csum_replace4(sum, skb, (__force __be32)from, - (__force __be32)to, pseudohdr); - } +- index = swiotlb_find_slots(dev, 0, size); ++ index = swiotlb_find_slots(dev, 0, size, 0); + if (index == -1) + return NULL; --static inline __wsum remcsum_adjust(void *ptr, __wsum csum, -- int start, int offset) -+static __always_inline __wsum remcsum_adjust(void *ptr, __wsum csum, -+ int start, int offset) +diff --git a/kernel/entry/common.c b/kernel/entry/common.c +index d5a61d565ad5d..998bdb7b8bf7f 100644 +--- a/kernel/entry/common.c ++++ b/kernel/entry/common.c +@@ -124,7 +124,7 @@ static __always_inline void __exit_to_user_mode(void) { - __sum16 *psum = (__sum16 *)(ptr + offset); - __wsum delta; -@@ -175,7 +181,7 @@ static inline __wsum remcsum_adjust(void *ptr, __wsum csum, - return delta; - } + instrumentation_begin(); + trace_hardirqs_on_prepare(); +- lockdep_hardirqs_on_prepare(CALLER_ADDR0); ++ lockdep_hardirqs_on_prepare(); + instrumentation_end(); --static inline void remcsum_unadjust(__sum16 *psum, __wsum delta) -+static __always_inline void remcsum_unadjust(__sum16 *psum, __wsum delta) - { - *psum = csum_fold(csum_sub(delta, (__force __wsum)*psum)); - } -diff --git a/include/net/dst_cache.h b/include/net/dst_cache.h -index 67634675e9197..df6622a5fe98f 100644 ---- a/include/net/dst_cache.h -+++ b/include/net/dst_cache.h -@@ -79,6 +79,17 @@ static inline void dst_cache_reset(struct dst_cache *dst_cache) - dst_cache->reset_ts = jiffies; - } + user_enter_irqoff(); +@@ -412,7 +412,7 @@ noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state) + instrumentation_begin(); + /* Tell the tracer that IRET will enable interrupts */ + trace_hardirqs_on_prepare(); +- lockdep_hardirqs_on_prepare(CALLER_ADDR0); ++ lockdep_hardirqs_on_prepare(); + instrumentation_end(); + rcu_irq_exit(); + lockdep_hardirqs_on(CALLER_ADDR0); +@@ -465,7 +465,7 @@ void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state) + ftrace_nmi_exit(); + if (irq_state.lockdep) { + trace_hardirqs_on_prepare(); +- lockdep_hardirqs_on_prepare(CALLER_ADDR0); ++ lockdep_hardirqs_on_prepare(); + } + instrumentation_end(); -+/** -+ * dst_cache_reset_now - invalidate the cache contents immediately -+ * @dst_cache: the cache -+ * -+ * The caller must be sure there are no concurrent users, as this frees -+ * all dst_cache users immediately, rather than waiting for the next -+ * per-cpu usage like dst_cache_reset does. Most callers should use the -+ * higher speed lazily-freed dst_cache_reset function instead. -+ */ -+void dst_cache_reset_now(struct dst_cache *dst_cache); -+ - /** - * dst_cache_init - initialize the cache, allocating the required storage - * @dst_cache: the cache -diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h -index 14efa0ded75dd..adab27ba1ecbf 100644 ---- a/include/net/dst_metadata.h -+++ b/include/net/dst_metadata.h -@@ -123,8 +123,20 @@ static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb) +diff --git a/kernel/entry/syscall_user_dispatch.c b/kernel/entry/syscall_user_dispatch.c +index c240302f56e23..0b6379adff6bd 100644 +--- a/kernel/entry/syscall_user_dispatch.c ++++ b/kernel/entry/syscall_user_dispatch.c +@@ -47,14 +47,18 @@ bool syscall_user_dispatch(struct pt_regs *regs) + * access_ok() is performed once, at prctl time, when + * the selector is loaded by userspace. + */ +- if (unlikely(__get_user(state, sd->selector))) +- do_exit(SIGSEGV); ++ if (unlikely(__get_user(state, sd->selector))) { ++ force_exit_sig(SIGSEGV); ++ return true; ++ } - memcpy(&new_md->u.tun_info, &md_dst->u.tun_info, - sizeof(struct ip_tunnel_info) + md_size); -+#ifdef CONFIG_DST_CACHE -+ /* Unclone the dst cache if there is one */ -+ if (new_md->u.tun_info.dst_cache.cache) { -+ int ret; -+ -+ ret = dst_cache_init(&new_md->u.tun_info.dst_cache, GFP_ATOMIC); -+ if (ret) { -+ metadata_dst_free(new_md); -+ return ERR_PTR(ret); + if (likely(state == SYSCALL_DISPATCH_FILTER_ALLOW)) + return false; + +- if (state != SYSCALL_DISPATCH_FILTER_BLOCK) +- do_exit(SIGSYS); ++ if (state != SYSCALL_DISPATCH_FILTER_BLOCK) { ++ force_exit_sig(SIGSYS); ++ return true; + } -+ } -+#endif -+ - skb_dst_drop(skb); -- dst_hold(&new_md->dst); - skb_dst_set(skb, &new_md->dst); - return new_md; - } -diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h -index 4b10676c69d19..bd07484ab9dd5 100644 ---- a/include/net/fib_rules.h -+++ b/include/net/fib_rules.h -@@ -69,7 +69,7 @@ struct fib_rules_ops { - int (*action)(struct fib_rule *, - struct flowi *, int, - struct fib_lookup_arg *); -- bool (*suppress)(struct fib_rule *, -+ bool (*suppress)(struct fib_rule *, int, - struct fib_lookup_arg *); - int (*match)(struct fib_rule *, - struct flowi *, int); -@@ -218,7 +218,9 @@ INDIRECT_CALLABLE_DECLARE(int fib4_rule_action(struct fib_rule *rule, - struct fib_lookup_arg *arg)); + } - INDIRECT_CALLABLE_DECLARE(bool fib6_rule_suppress(struct fib_rule *rule, -+ int flags, - struct fib_lookup_arg *arg)); - INDIRECT_CALLABLE_DECLARE(bool fib4_rule_suppress(struct fib_rule *rule, -+ int flags, - struct fib_lookup_arg *arg)); - #endif -diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h -index ffd386ea0dbb3..c8d1c5e187e4b 100644 ---- a/include/net/flow_dissector.h -+++ b/include/net/flow_dissector.h -@@ -59,6 +59,8 @@ struct flow_dissector_key_vlan { - __be16 vlan_tci; - }; - __be16 vlan_tpid; -+ __be16 vlan_eth_type; -+ u16 padding; - }; + sd->on_dispatch = true; +diff --git a/kernel/events/core.c b/kernel/events/core.c +index f23ca260307f0..d8795036202ac 100644 +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -54,6 +54,7 @@ + #include <linux/highmem.h> + #include <linux/pgtable.h> + #include <linux/buildid.h> ++#include <linux/task_work.h> - struct flow_dissector_mpls_lse { -diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h -index 3961461d9c8bc..7a2b0223a02c7 100644 ---- a/include/net/flow_offload.h -+++ b/include/net/flow_offload.h -@@ -575,5 +575,6 @@ int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch, - enum tc_setup_type type, void *data, - struct flow_block_offload *bo, - void (*cleanup)(struct flow_block_cb *block_cb)); -+bool flow_indr_dev_exists(void); + #include "internal.h" - #endif /* _NET_FLOW_OFFLOAD_H */ -diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h -index d0d188c3294bd..03b64bf876a46 100644 ---- a/include/net/ieee802154_netdev.h -+++ b/include/net/ieee802154_netdev.h -@@ -15,6 +15,22 @@ - #ifndef IEEE802154_NETDEVICE_H - #define IEEE802154_NETDEVICE_H +@@ -674,6 +675,23 @@ perf_event_set_state(struct perf_event *event, enum perf_event_state state) + WRITE_ONCE(event->state, state); + } -+#define IEEE802154_REQUIRED_SIZE(struct_type, member) \ -+ (offsetof(typeof(struct_type), member) + \ -+ sizeof(((typeof(struct_type) *)(NULL))->member)) -+ -+#define IEEE802154_ADDR_OFFSET \ -+ offsetof(typeof(struct sockaddr_ieee802154), addr) -+ -+#define IEEE802154_MIN_NAMELEN (IEEE802154_ADDR_OFFSET + \ -+ IEEE802154_REQUIRED_SIZE(struct ieee802154_addr_sa, addr_type)) ++/* ++ * UP store-release, load-acquire ++ */ + -+#define IEEE802154_NAMELEN_SHORT (IEEE802154_ADDR_OFFSET + \ -+ IEEE802154_REQUIRED_SIZE(struct ieee802154_addr_sa, short_addr)) ++#define __store_release(ptr, val) \ ++do { \ ++ barrier(); \ ++ WRITE_ONCE(*(ptr), (val)); \ ++} while (0) + -+#define IEEE802154_NAMELEN_LONG (IEEE802154_ADDR_OFFSET + \ -+ IEEE802154_REQUIRED_SIZE(struct ieee802154_addr_sa, hwaddr)) ++#define __load_acquire(ptr) \ ++({ \ ++ __unqual_scalar_typeof(*(ptr)) ___p = READ_ONCE(*(ptr)); \ ++ barrier(); \ ++ ___p; \ ++}) + - #include <net/af_ieee802154.h> - #include <linux/netdevice.h> - #include <linux/skbuff.h> -@@ -165,6 +181,33 @@ static inline void ieee802154_devaddr_to_raw(void *raw, __le64 addr) - memcpy(raw, &temp, IEEE802154_ADDR_LEN); + #ifdef CONFIG_CGROUP_PERF + + static inline bool +@@ -719,34 +737,51 @@ static inline u64 perf_cgroup_event_time(struct perf_event *event) + return t->time; } -+static inline int -+ieee802154_sockaddr_check_size(struct sockaddr_ieee802154 *daddr, int len) -+{ -+ struct ieee802154_addr_sa *sa; -+ int ret = 0; -+ -+ sa = &daddr->addr; -+ if (len < IEEE802154_MIN_NAMELEN) -+ return -EINVAL; -+ switch (sa->addr_type) { -+ case IEEE802154_ADDR_NONE: -+ break; -+ case IEEE802154_ADDR_SHORT: -+ if (len < IEEE802154_NAMELEN_SHORT) -+ ret = -EINVAL; -+ break; -+ case IEEE802154_ADDR_LONG: -+ if (len < IEEE802154_NAMELEN_LONG) -+ ret = -EINVAL; -+ break; -+ default: -+ ret = -EINVAL; -+ break; -+ } -+ return ret; -+} -+ - static inline void ieee802154_addr_from_sa(struct ieee802154_addr *a, - const struct ieee802154_addr_sa *sa) +-static inline void __update_cgrp_time(struct perf_cgroup *cgrp) ++static inline u64 perf_cgroup_event_time_now(struct perf_event *event, u64 now) { -diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h -index 653e7d0f65cb7..8ec0878a90a7a 100644 ---- a/include/net/if_inet6.h -+++ b/include/net/if_inet6.h -@@ -64,6 +64,14 @@ struct inet6_ifaddr { +- struct perf_cgroup_info *info; +- u64 now; +- +- now = perf_clock(); ++ struct perf_cgroup_info *t; - struct hlist_node addr_lst; - struct list_head if_list; +- info = this_cpu_ptr(cgrp->info); ++ t = per_cpu_ptr(event->cgrp->info, event->cpu); ++ if (!__load_acquire(&t->active)) ++ return t->time; ++ now += READ_ONCE(t->timeoffset); ++ return now; ++} + +- info->time += now - info->timestamp; ++static inline void __update_cgrp_time(struct perf_cgroup_info *info, u64 now, bool adv) ++{ ++ if (adv) ++ info->time += now - info->timestamp; + info->timestamp = now; + /* -+ * Used to safely traverse idev->addr_list in process context -+ * if the idev->lock needed to protect idev->addr_list cannot be held. -+ * In that case, add the items to this list temporarily and iterate -+ * without holding idev->lock. -+ * See addrconf_ifdown and dev_forward_change. ++ * see update_context_time() + */ -+ struct list_head if_list_aux; - - struct list_head tmp_list; - struct inet6_ifaddr *ifpub; -diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h -index 81b9659530368..56f1286583d3c 100644 ---- a/include/net/inet6_hashtables.h -+++ b/include/net/inet6_hashtables.h -@@ -103,15 +103,24 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, - const int dif); ++ WRITE_ONCE(info->timeoffset, info->time - info->timestamp); + } - int inet6_hash(struct sock *sk); --#endif /* IS_ENABLED(CONFIG_IPV6) */ +-static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx) ++static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx, bool final) + { + struct perf_cgroup *cgrp = cpuctx->cgrp; + struct cgroup_subsys_state *css; ++ struct perf_cgroup_info *info; --#define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif, __sdif) \ -- (((__sk)->sk_portpair == (__ports)) && \ -- ((__sk)->sk_family == AF_INET6) && \ -- ipv6_addr_equal(&(__sk)->sk_v6_daddr, (__saddr)) && \ -- ipv6_addr_equal(&(__sk)->sk_v6_rcv_saddr, (__daddr)) && \ -- (((__sk)->sk_bound_dev_if == (__dif)) || \ -- ((__sk)->sk_bound_dev_if == (__sdif))) && \ -- net_eq(sock_net(__sk), (__net))) -+static inline bool inet6_match(struct net *net, const struct sock *sk, -+ const struct in6_addr *saddr, -+ const struct in6_addr *daddr, -+ const __portpair ports, -+ const int dif, const int sdif) -+{ -+ if (!net_eq(sock_net(sk), net) || -+ sk->sk_family != AF_INET6 || -+ sk->sk_portpair != ports || -+ !ipv6_addr_equal(&sk->sk_v6_daddr, saddr) || -+ !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) -+ return false; + if (cgrp) { ++ u64 now = perf_clock(); + -+ /* READ_ONCE() paired with WRITE_ONCE() in sock_bindtoindex_locked() */ -+ return inet_sk_bound_dev_eq(net, READ_ONCE(sk->sk_bound_dev_if), dif, -+ sdif); -+} -+#endif /* IS_ENABLED(CONFIG_IPV6) */ + for (css = &cgrp->css; css; css = css->parent) { + cgrp = container_of(css, struct perf_cgroup, css); +- __update_cgrp_time(cgrp); ++ info = this_cpu_ptr(cgrp->info); ++ ++ __update_cgrp_time(info, now, true); ++ if (final) ++ __store_release(&info->active, 0); + } + } + } - #endif /* _INET6_HASHTABLES_H */ -diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h -index b06c2d02ec84e..695ed45841f06 100644 ---- a/include/net/inet_connection_sock.h -+++ b/include/net/inet_connection_sock.h -@@ -289,7 +289,7 @@ static inline void inet_csk_prepare_for_destroy_sock(struct sock *sk) + static inline void update_cgrp_time_from_event(struct perf_event *event) { - /* The below has to be done to allow calling inet_csk_destroy_sock */ - sock_set_flag(sk, SOCK_DEAD); -- percpu_counter_inc(sk->sk_prot->orphan_count); -+ this_cpu_inc(*sk->sk_prot->orphan_count); ++ struct perf_cgroup_info *info; + struct perf_cgroup *cgrp; + + /* +@@ -760,8 +795,10 @@ static inline void update_cgrp_time_from_event(struct perf_event *event) + /* + * Do not update time when cgroup is not active + */ +- if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup)) +- __update_cgrp_time(event->cgrp); ++ if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup)) { ++ info = this_cpu_ptr(event->cgrp->info); ++ __update_cgrp_time(info, perf_clock(), true); ++ } } - void inet_csk_destroy_sock(struct sock *sk); -@@ -315,7 +315,7 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, + static inline void +@@ -785,7 +822,8 @@ perf_cgroup_set_timestamp(struct task_struct *task, + for (css = &cgrp->css; css; css = css->parent) { + cgrp = container_of(css, struct perf_cgroup, css); + info = this_cpu_ptr(cgrp->info); +- info->timestamp = ctx->timestamp; ++ __update_cgrp_time(info, ctx->timestamp, false); ++ __store_release(&info->active, 1); + } + } - struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); +@@ -802,7 +840,7 @@ static DEFINE_PER_CPU(struct list_head, cgrp_cpuctx_list); + */ + static void perf_cgroup_switch(struct task_struct *task, int mode) + { +- struct perf_cpu_context *cpuctx; ++ struct perf_cpu_context *cpuctx, *tmp; + struct list_head *list; + unsigned long flags; --#define TCP_PINGPONG_THRESH 3 -+#define TCP_PINGPONG_THRESH 1 +@@ -813,7 +851,7 @@ static void perf_cgroup_switch(struct task_struct *task, int mode) + local_irq_save(flags); - static inline void inet_csk_enter_pingpong_mode(struct sock *sk) - { -@@ -332,14 +332,6 @@ static inline bool inet_csk_in_pingpong_mode(struct sock *sk) - return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH; + list = this_cpu_ptr(&cgrp_cpuctx_list); +- list_for_each_entry(cpuctx, list, cgrp_cpuctx_entry) { ++ list_for_each_entry_safe(cpuctx, tmp, list, cgrp_cpuctx_entry) { + WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0); + + perf_ctx_lock(cpuctx, cpuctx->task_ctx); +@@ -981,14 +1019,6 @@ out: + return ret; } --static inline void inet_csk_inc_pingpong_cnt(struct sock *sk) +-static inline void +-perf_cgroup_set_shadow_time(struct perf_event *event, u64 now) -{ -- struct inet_connection_sock *icsk = inet_csk(sk); -- -- if (icsk->icsk_ack.pingpong < U8_MAX) -- icsk->icsk_ack.pingpong++; +- struct perf_cgroup_info *t; +- t = per_cpu_ptr(event->cgrp->info, event->cpu); +- event->shadow_ctx_time = now - t->timestamp; -} - - static inline bool inet_csk_has_ulp(struct sock *sk) + static inline void + perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ctx) { - return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops; -diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h -index 48cc5795ceda6..63540be0fc34a 100644 ---- a/include/net/inet_frag.h -+++ b/include/net/inet_frag.h -@@ -117,8 +117,15 @@ int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net); +@@ -1066,7 +1096,8 @@ static inline void update_cgrp_time_from_event(struct perf_event *event) + { + } - static inline void fqdir_pre_exit(struct fqdir *fqdir) +-static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx) ++static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx, ++ bool final) { -- fqdir->high_thresh = 0; /* prevent creation of new frags */ -- fqdir->dead = true; -+ /* Prevent creation of new frags. -+ * Pairs with READ_ONCE() in inet_frag_find(). -+ */ -+ WRITE_ONCE(fqdir->high_thresh, 0); -+ -+ /* Pairs with READ_ONCE() in inet_frag_kill(), ip_expire() -+ * and ip6frag_expire_frag_queue(). -+ */ -+ WRITE_ONCE(fqdir->dead, true); } - void fqdir_exit(struct fqdir *fqdir); -diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h -index f72ec113ae568..53c22b64e9724 100644 ---- a/include/net/inet_hashtables.h -+++ b/include/net/inet_hashtables.h -@@ -203,17 +203,6 @@ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo) - hashinfo->ehash_locks = NULL; +@@ -1098,12 +1129,12 @@ perf_cgroup_switch(struct task_struct *task, struct task_struct *next) + { } --static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if, -- int dif, int sdif) --{ --#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) -- return inet_bound_dev_eq(!!net->ipv4.sysctl_tcp_l3mdev_accept, -- bound_dev_if, dif, sdif); --#else -- return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); --#endif --} -- - struct inet_bind_bucket * - inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net, - struct inet_bind_hashbucket *head, -@@ -295,7 +284,6 @@ static inline struct sock *inet_lookup_listener(struct net *net, - ((__force __portpair)(((__u32)(__dport) << 16) | (__force __u32)(__be16)(__sport))) - #endif +-static inline void +-perf_cgroup_set_shadow_time(struct perf_event *event, u64 now) ++static inline u64 perf_cgroup_event_time(struct perf_event *event) + { ++ return 0; + } --#if (BITS_PER_LONG == 64) - #ifdef __BIG_ENDIAN - #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ - const __addrpair __name = (__force __addrpair) ( \ -@@ -307,24 +295,20 @@ static inline struct sock *inet_lookup_listener(struct net *net, - (((__force __u64)(__be32)(__daddr)) << 32) | \ - ((__force __u64)(__be32)(__saddr))) - #endif /* __BIG_ENDIAN */ --#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \ -- (((__sk)->sk_portpair == (__ports)) && \ -- ((__sk)->sk_addrpair == (__cookie)) && \ -- (((__sk)->sk_bound_dev_if == (__dif)) || \ -- ((__sk)->sk_bound_dev_if == (__sdif))) && \ -- net_eq(sock_net(__sk), (__net))) --#else /* 32-bit arch */ --#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ -- const int __name __deprecated __attribute__((unused)) +-static inline u64 perf_cgroup_event_time(struct perf_event *event) ++static inline u64 perf_cgroup_event_time_now(struct perf_event *event, u64 now) + { + return 0; + } +@@ -1525,22 +1556,59 @@ static void perf_unpin_context(struct perf_event_context *ctx) + /* + * Update the record of the current time in a context. + */ +-static void update_context_time(struct perf_event_context *ctx) ++static void __update_context_time(struct perf_event_context *ctx, bool adv) + { + u64 now = perf_clock(); --#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \ -- (((__sk)->sk_portpair == (__ports)) && \ -- ((__sk)->sk_daddr == (__saddr)) && \ -- ((__sk)->sk_rcv_saddr == (__daddr)) && \ -- (((__sk)->sk_bound_dev_if == (__dif)) || \ -- ((__sk)->sk_bound_dev_if == (__sdif))) && \ -- net_eq(sock_net(__sk), (__net))) --#endif /* 64-bit arch */ -+static inline bool INET_MATCH(struct net *net, const struct sock *sk, -+ const __addrpair cookie, const __portpair ports, -+ int dif, int sdif) -+{ -+ if (!net_eq(sock_net(sk), net) || -+ sk->sk_portpair != ports || -+ sk->sk_addrpair != cookie) -+ return false; +- ctx->time += now - ctx->timestamp; ++ if (adv) ++ ctx->time += now - ctx->timestamp; + ctx->timestamp = now; + -+ /* READ_ONCE() paired with WRITE_ONCE() in sock_bindtoindex_locked() */ -+ return inet_sk_bound_dev_eq(net, READ_ONCE(sk->sk_bound_dev_if), dif, -+ sdif); ++ /* ++ * The above: time' = time + (now - timestamp), can be re-arranged ++ * into: time` = now + (time - timestamp), which gives a single value ++ * offset to compute future time without locks on. ++ * ++ * See perf_event_time_now(), which can be used from NMI context where ++ * it's (obviously) not possible to acquire ctx->lock in order to read ++ * both the above values in a consistent manner. ++ */ ++ WRITE_ONCE(ctx->timeoffset, ctx->time - ctx->timestamp); +} - - /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need - * not check it for lookups anymore, thanks Alexey. -DaveM -@@ -425,7 +409,7 @@ static inline void sk_rcv_saddr_set(struct sock *sk, __be32 addr) ++ ++static void update_context_time(struct perf_event_context *ctx) ++{ ++ __update_context_time(ctx, true); } - int __inet_hash_connect(struct inet_timewait_death_row *death_row, -- struct sock *sk, u32 port_offset, -+ struct sock *sk, u64 port_offset, - int (*check_established)(struct inet_timewait_death_row *, - struct sock *, __u16, - struct inet_timewait_sock **)); -diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h -index 89163ef8cf4be..2c2b41ea7f81d 100644 ---- a/include/net/inet_sock.h -+++ b/include/net/inet_sock.h -@@ -107,7 +107,8 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) - - static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb) - { -- if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept) -+ if (!sk->sk_mark && -+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)) - return skb->mark; - - return sk->sk_mark; -@@ -116,14 +117,15 @@ static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb) - static inline int inet_request_bound_dev_if(const struct sock *sk, - struct sk_buff *skb) + static u64 perf_event_time(struct perf_event *event) { -+ int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); - #ifdef CONFIG_NET_L3_MASTER_DEV - struct net *net = sock_net(sk); + struct perf_event_context *ctx = event->ctx; -- if (!sk->sk_bound_dev_if && net->ipv4.sysctl_tcp_l3mdev_accept) -+ if (!bound_dev_if && READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept)) - return l3mdev_master_ifindex_by_index(net, skb->skb_iif); - #endif ++ if (unlikely(!ctx)) ++ return 0; ++ + if (is_cgroup_event(event)) + return perf_cgroup_event_time(event); -- return sk->sk_bound_dev_if; -+ return bound_dev_if; +- return ctx ? ctx->time : 0; ++ return ctx->time; ++} ++ ++static u64 perf_event_time_now(struct perf_event *event, u64 now) ++{ ++ struct perf_event_context *ctx = event->ctx; ++ ++ if (unlikely(!ctx)) ++ return 0; ++ ++ if (is_cgroup_event(event)) ++ return perf_cgroup_event_time_now(event, now); ++ ++ if (!(__load_acquire(&ctx->is_active) & EVENT_TIME)) ++ return ctx->time; ++ ++ now += READ_ONCE(ctx->timeoffset); ++ return now; } - static inline int inet_sk_bound_l3mdev(const struct sock *sk) -@@ -131,7 +133,7 @@ static inline int inet_sk_bound_l3mdev(const struct sock *sk) - #ifdef CONFIG_NET_L3_MASTER_DEV - struct net *net = sock_net(sk); + static enum event_type_t get_event_type(struct perf_event *event) +@@ -2285,11 +2353,27 @@ event_sched_out(struct perf_event *event, + event->pmu->del(event, 0); + event->oncpu = -1; + +- if (READ_ONCE(event->pending_disable) >= 0) { +- WRITE_ONCE(event->pending_disable, -1); ++ if (event->pending_disable) { ++ event->pending_disable = 0; + perf_cgroup_event_disable(event, ctx); + state = PERF_EVENT_STATE_OFF; + } ++ ++ if (event->pending_sigtrap) { ++ bool dec = true; ++ ++ event->pending_sigtrap = 0; ++ if (state != PERF_EVENT_STATE_OFF && ++ !event->pending_work) { ++ event->pending_work = 1; ++ dec = false; ++ WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount)); ++ task_work_add(current, &event->pending_task, TWA_RESUME); ++ } ++ if (dec) ++ local_dec(&event->ctx->nr_pending); ++ } ++ + perf_event_set_state(event, state); -- if (!net->ipv4.sysctl_tcp_l3mdev_accept) -+ if (!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept)) - return l3mdev_master_ifindex_by_index(net, - sk->sk_bound_dev_if); - #endif -@@ -147,6 +149,17 @@ static inline bool inet_bound_dev_eq(bool l3mdev_accept, int bound_dev_if, - return bound_dev_if == dif || bound_dev_if == sdif; - } + if (!is_software_event(event)) +@@ -2329,6 +2413,7 @@ group_sched_out(struct perf_event *group_event, -+static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if, -+ int dif, int sdif) -+{ -+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) -+ return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept), -+ bound_dev_if, dif, sdif); -+#else -+ return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); -+#endif -+} -+ - struct inet_cork { - unsigned int flags; - __be32 addr; -@@ -207,11 +220,10 @@ struct inet_sock { - __be32 inet_saddr; - __s16 uc_ttl; - __u16 cmsg_flags; -+ struct ip_options_rcu __rcu *inet_opt; - __be16 inet_sport; - __u16 inet_id; + #define DETACH_GROUP 0x01UL + #define DETACH_CHILD 0x02UL ++#define DETACH_DEAD 0x04UL -- struct ip_options_rcu __rcu *inet_opt; -- int rx_dst_ifindex; - __u8 tos; - __u8 min_ttl; - __u8 mc_ttl; -@@ -253,6 +265,11 @@ struct inet_sock { - #define IP_CMSG_CHECKSUM BIT(7) - #define IP_CMSG_RECVFRAGSIZE BIT(8) + /* + * Cross CPU call to remove a performance event +@@ -2346,17 +2431,28 @@ __perf_remove_from_context(struct perf_event *event, -+static inline bool sk_is_inet(struct sock *sk) -+{ -+ return sk->sk_family == AF_INET || sk->sk_family == AF_INET6; -+} + if (ctx->is_active & EVENT_TIME) { + update_context_time(ctx); +- update_cgrp_time_from_cpuctx(cpuctx); ++ update_cgrp_time_from_cpuctx(cpuctx, false); + } + ++ /* ++ * Ensure event_sched_out() switches to OFF, at the very least ++ * this avoids raising perf_pending_task() at this time. ++ */ ++ if (flags & DETACH_DEAD) ++ event->pending_disable = 1; + event_sched_out(event, cpuctx, ctx); + if (flags & DETACH_GROUP) + perf_group_detach(event); + if (flags & DETACH_CHILD) + perf_child_detach(event); + list_del_event(event, ctx); ++ if (flags & DETACH_DEAD) ++ event->state = PERF_EVENT_STATE_DEAD; + + if (!ctx->nr_events && ctx->is_active) { ++ if (ctx == &cpuctx->ctx) ++ update_cgrp_time_from_cpuctx(cpuctx, true); + - /** - * sk_to_full_sk - Access to a full socket - * @sk: pointer to a socket -@@ -369,7 +386,7 @@ static inline bool inet_get_convert_csum(struct sock *sk) - static inline bool inet_can_nonlocal_bind(struct net *net, - struct inet_sock *inet) + ctx->is_active = 0; + ctx->rotate_necessary = 0; + if (ctx->task) { +@@ -2388,7 +2484,11 @@ static void perf_remove_from_context(struct perf_event *event, unsigned long fla + * event_function_call() user. + */ + raw_spin_lock_irq(&ctx->lock); +- if (!ctx->is_active) { ++ /* ++ * Cgroup events are per-cpu events, and must IPI because of ++ * cgrp_cpuctx_list. ++ */ ++ if (!ctx->is_active && !is_cgroup_event(event)) { + __perf_remove_from_context(event, __get_cpu_context(ctx), + ctx, (void *)flags); + raw_spin_unlock_irq(&ctx->lock); +@@ -2434,7 +2534,7 @@ static void __perf_event_disable(struct perf_event *event, + * hold the top-level event's child_mutex, so any descendant that + * goes to exit will block in perf_event_exit_event(). + * +- * When called from perf_pending_event it's OK because event->ctx ++ * When called from perf_pending_irq it's OK because event->ctx + * is the current context on this CPU and preemption is disabled, + * hence we can't get into perf_event_task_sched_out for this context. + */ +@@ -2473,43 +2573,8 @@ EXPORT_SYMBOL_GPL(perf_event_disable); + + void perf_event_disable_inatomic(struct perf_event *event) { -- return net->ipv4.sysctl_ip_nonlocal_bind || -+ return READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind) || - inet->freebind || inet->transparent; +- WRITE_ONCE(event->pending_disable, smp_processor_id()); +- /* can fail, see perf_pending_event_disable() */ +- irq_work_queue(&event->pending); +-} +- +-static void perf_set_shadow_time(struct perf_event *event, +- struct perf_event_context *ctx) +-{ +- /* +- * use the correct time source for the time snapshot +- * +- * We could get by without this by leveraging the +- * fact that to get to this function, the caller +- * has most likely already called update_context_time() +- * and update_cgrp_time_xx() and thus both timestamp +- * are identical (or very close). Given that tstamp is, +- * already adjusted for cgroup, we could say that: +- * tstamp - ctx->timestamp +- * is equivalent to +- * tstamp - cgrp->timestamp. +- * +- * Then, in perf_output_read(), the calculation would +- * work with no changes because: +- * - event is guaranteed scheduled in +- * - no scheduled out in between +- * - thus the timestamp would be the same +- * +- * But this is a bit hairy. +- * +- * So instead, we have an explicit cgroup call to remain +- * within the time source all along. We believe it +- * is cleaner and simpler to understand. +- */ +- if (is_cgroup_event(event)) +- perf_cgroup_set_shadow_time(event, event->tstamp); +- else +- event->shadow_ctx_time = event->tstamp - ctx->timestamp; ++ event->pending_disable = 1; ++ irq_work_queue(&event->pending_irq); } -diff --git a/include/net/ip.h b/include/net/ip.h -index 9192444f2964e..8462ced0c21ec 100644 ---- a/include/net/ip.h -+++ b/include/net/ip.h -@@ -55,6 +55,7 @@ struct inet_skb_parm { - #define IPSKB_DOREDIRECT BIT(5) - #define IPSKB_FRAG_PMTU BIT(6) - #define IPSKB_L3SLAVE BIT(7) -+#define IPSKB_NOPOLICY BIT(8) - - u16 frag_max_size; - }; -@@ -351,7 +352,7 @@ static inline bool sysctl_dev_name_is_allowed(const char *name) - - static inline bool inet_port_requires_bind_service(struct net *net, unsigned short port) - { -- return port < net->ipv4.sysctl_ip_prot_sock; -+ return port < READ_ONCE(net->ipv4.sysctl_ip_prot_sock); - } + #define MAX_INTERRUPTS (~0ULL) +@@ -2552,8 +2617,6 @@ event_sched_in(struct perf_event *event, - #else -@@ -378,7 +379,7 @@ void ipfrag_init(void); - void ip_static_sysctl_init(void); + perf_pmu_disable(event->pmu); - #define IP4_REPLY_MARK(net, mark) \ -- ((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0) -+ (READ_ONCE((net)->ipv4.sysctl_fwmark_reflect) ? (mark) : 0) +- perf_set_shadow_time(event, ctx); +- + perf_log_itrace_start(event); - static inline bool ip_is_fragment(const struct iphdr *iph) - { -@@ -440,7 +441,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, - struct net *net = dev_net(dst->dev); - unsigned int mtu; + if (event->pmu->add(event, PERF_EF_START)) { +@@ -2857,11 +2920,14 @@ perf_install_in_context(struct perf_event_context *ctx, + * perf_event_attr::disabled events will not run and can be initialized + * without IPI. Except when this is the first event for the context, in + * that case we need the magic of the IPI to set ctx->is_active. ++ * Similarly, cgroup events for the context also needs the IPI to ++ * manipulate the cgrp_cpuctx_list. + * + * The IOC_ENABLE that is sure to follow the creation of a disabled + * event will issue the IPI and reprogram the hardware. + */ +- if (__perf_effective_state(event) == PERF_EVENT_STATE_OFF && ctx->nr_events) { ++ if (__perf_effective_state(event) == PERF_EVENT_STATE_OFF && ++ ctx->nr_events && !is_cgroup_event(event)) { + raw_spin_lock_irq(&ctx->lock); + if (ctx->task == TASK_TOMBSTONE) { + raw_spin_unlock_irq(&ctx->lock); +@@ -3193,6 +3259,15 @@ static int perf_event_modify_breakpoint(struct perf_event *bp, + return err; + } -- if (net->ipv4.sysctl_ip_fwd_use_pmtu || -+ if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) || - ip_mtu_locked(dst) || - !forwarding) { - mtu = rt->rt_pmtu; -@@ -520,19 +521,18 @@ static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb, ++/* ++ * Copy event-type-independent attributes that may be modified. ++ */ ++static void perf_event_modify_copy_attr(struct perf_event_attr *to, ++ const struct perf_event_attr *from) ++{ ++ to->sig_data = from->sig_data; ++} ++ + static int perf_event_modify_attr(struct perf_event *event, + struct perf_event_attr *attr) { - struct iphdr *iph = ip_hdr(skb); +@@ -3215,10 +3290,17 @@ static int perf_event_modify_attr(struct perf_event *event, + WARN_ON_ONCE(event->ctx->parent_ctx); -+ /* We had many attacks based on IPID, use the private -+ * generator as much as we can. + mutex_lock(&event->child_mutex); ++ /* ++ * Event-type-independent attributes must be copied before event-type ++ * modification, which will validate that final attributes match the ++ * source attributes after all relevant attributes have been copied. + */ -+ if (sk && inet_sk(sk)->inet_daddr) { -+ iph->id = htons(inet_sk(sk)->inet_id); -+ inet_sk(sk)->inet_id += segs; -+ return; ++ perf_event_modify_copy_attr(&event->attr, attr); + err = func(event, attr); + if (err) + goto out; + list_for_each_entry(child, &event->child_list, child_list) { ++ perf_event_modify_copy_attr(&child->attr, attr); + err = func(child, attr); + if (err) + goto out; +@@ -3247,16 +3329,6 @@ static void ctx_sched_out(struct perf_event_context *ctx, + return; + } + +- ctx->is_active &= ~event_type; +- if (!(ctx->is_active & EVENT_ALL)) +- ctx->is_active = 0; +- +- if (ctx->task) { +- WARN_ON_ONCE(cpuctx->task_ctx != ctx); +- if (!ctx->is_active) +- cpuctx->task_ctx = NULL; +- } +- + /* + * Always update time if it was set; not only when it changes. + * Otherwise we can 'forget' to update time for any but the last +@@ -3270,7 +3342,22 @@ static void ctx_sched_out(struct perf_event_context *ctx, + if (is_active & EVENT_TIME) { + /* update (and stop) ctx time */ + update_context_time(ctx); +- update_cgrp_time_from_cpuctx(cpuctx); ++ update_cgrp_time_from_cpuctx(cpuctx, ctx == &cpuctx->ctx); ++ /* ++ * CPU-release for the below ->is_active store, ++ * see __load_acquire() in perf_event_time_now() ++ */ ++ barrier(); + } - if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) { -- /* This is only to work around buggy Windows95/2000 -- * VJ compression implementations. If the ID field -- * does not change, they drop every other packet in -- * a TCP stream using header compression. -- */ -- if (sk && inet_sk(sk)->inet_daddr) { -- iph->id = htons(inet_sk(sk)->inet_id); -- inet_sk(sk)->inet_id += segs; -- } else { -- iph->id = 0; -- } -+ iph->id = 0; - } else { -+ /* Unfortunately we need the big hammer to get a suitable IPID */ - __ip_select_ident(net, iph, segs); ++ ++ ctx->is_active &= ~event_type; ++ if (!(ctx->is_active & EVENT_ALL)) ++ ctx->is_active = 0; ++ ++ if (ctx->task) { ++ WARN_ON_ONCE(cpuctx->task_ctx != ctx); ++ if (!ctx->is_active) ++ cpuctx->task_ctx = NULL; } - } -diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h -index c412dde4d67dc..bbb27639f2933 100644 ---- a/include/net/ip6_fib.h -+++ b/include/net/ip6_fib.h -@@ -189,14 +189,16 @@ struct fib6_info { - u32 fib6_metric; - u8 fib6_protocol; - u8 fib6_type; + + is_active ^= ctx->is_active; /* changed bits */ +@@ -3444,11 +3531,23 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, + raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING); + if (context_equiv(ctx, next_ctx)) { + ++ perf_pmu_disable(pmu); + -+ u8 offload; -+ u8 trap; -+ u8 offload_failed; ++ /* PMIs are disabled; ctx->nr_pending is stable. */ ++ if (local_read(&ctx->nr_pending) || ++ local_read(&next_ctx->nr_pending)) { ++ /* ++ * Must not swap out ctx when there's pending ++ * events that rely on the ctx->task relation. ++ */ ++ raw_spin_unlock(&next_ctx->lock); ++ rcu_read_unlock(); ++ goto inside_switch; ++ } + - u8 should_flush:1, - dst_nocount:1, - dst_nopolicy:1, - fib6_destroying:1, -- offload:1, -- trap:1, -- offload_failed:1, -- unused:1; -+ unused:4; + WRITE_ONCE(ctx->task, next); + WRITE_ONCE(next_ctx->task, task); - struct rcu_head rcu; - struct nexthop *nh; -@@ -281,7 +283,7 @@ static inline bool fib6_get_cookie_safe(const struct fib6_info *f6i, - fn = rcu_dereference(f6i->fib6_node); +- perf_pmu_disable(pmu); +- + if (cpuctx->sched_cb_usage && pmu->sched_task) + pmu->sched_task(ctx, false); - if (fn) { -- *cookie = fn->fn_sernum; -+ *cookie = READ_ONCE(fn->fn_sernum); - /* pairs with smp_wmb() in __fib6_update_sernum_upto_root() */ - smp_rmb(); - status = true; -@@ -485,6 +487,7 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, - struct fib6_config *cfg, gfp_t gfp_flags, - struct netlink_ext_ack *extack); - void fib6_nh_release(struct fib6_nh *fib6_nh); -+void fib6_nh_release_dsts(struct fib6_nh *fib6_nh); +@@ -3489,6 +3588,7 @@ unlock: + raw_spin_lock(&ctx->lock); + perf_pmu_disable(pmu); - int call_fib6_entry_notifiers(struct net *net, - enum fib_event_type event_type, -diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h -index 028eaea1c8544..42d50856fcf24 100644 ---- a/include/net/ip6_tunnel.h -+++ b/include/net/ip6_tunnel.h -@@ -57,7 +57,7 @@ struct ip6_tnl { ++inside_switch: + if (cpuctx->sched_cb_usage && pmu->sched_task) + pmu->sched_task(ctx, false); + task_ctx_sched_out(cpuctx, ctx, EVENT_ALL); +@@ -3707,13 +3807,19 @@ static noinline int visit_groups_merge(struct perf_cpu_context *cpuctx, + return 0; + } - /* These fields used only by GRE */ - __u32 i_seqno; /* The last seen seqno */ -- __u32 o_seqno; /* The last output seqno */ -+ atomic_t o_seqno; /* The last output seqno */ - int hlen; /* tun_hlen + encap_hlen */ - int tun_hlen; /* Precalculated header length */ - int encap_hlen; /* Encap header length (FOU,GUE) */ -diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h -index ab5348e57db1a..3417ba2d27ad6 100644 ---- a/include/net/ip_fib.h -+++ b/include/net/ip_fib.h -@@ -438,7 +438,7 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, - #ifdef CONFIG_IP_ROUTE_CLASSID - static inline int fib_num_tclassid_users(struct net *net) ++/* ++ * Because the userpage is strictly per-event (there is no concept of context, ++ * so there cannot be a context indirection), every userpage must be updated ++ * when context time starts :-( ++ * ++ * IOW, we must not miss EVENT_TIME edges. ++ */ + static inline bool event_update_userpage(struct perf_event *event) { -- return net->ipv4.fib_num_tclassid_users; -+ return atomic_read(&net->ipv4.fib_num_tclassid_users); - } - #else - static inline int fib_num_tclassid_users(struct net *net) -diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h -index bc3b13ec93c9d..37d5d4968e20a 100644 ---- a/include/net/ip_tunnels.h -+++ b/include/net/ip_tunnels.h -@@ -113,7 +113,7 @@ struct ip_tunnel { + if (likely(!atomic_read(&event->mmap_count))) + return false; - /* These four fields used only by GRE */ - u32 i_seqno; /* The last seen seqno */ -- u32 o_seqno; /* The last output seqno */ -+ atomic_t o_seqno; /* The last output seqno */ - int tun_hlen; /* Precalculated header length */ + perf_event_update_time(event); +- perf_set_shadow_time(event, event->ctx); + perf_event_update_userpage(event); - /* These four fields used only by ERSPAN */ -diff --git a/include/net/ipv6.h b/include/net/ipv6.h -index f2d0ecc257bb2..359540dfc0339 100644 ---- a/include/net/ipv6.h -+++ b/include/net/ipv6.h -@@ -391,17 +391,20 @@ static inline void txopt_put(struct ipv6_txoptions *opt) - kfree_rcu(opt, rcu); - } + return true; +@@ -3797,13 +3903,23 @@ ctx_sched_in(struct perf_event_context *ctx, + struct task_struct *task) + { + int is_active = ctx->is_active; +- u64 now; -+#if IS_ENABLED(CONFIG_IPV6) - struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label); + lockdep_assert_held(&ctx->lock); - extern struct static_key_false_deferred ipv6_flowlabel_exclusive; - static inline struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, - __be32 label) - { -- if (static_branch_unlikely(&ipv6_flowlabel_exclusive.key)) -+ if (static_branch_unlikely(&ipv6_flowlabel_exclusive.key) && -+ READ_ONCE(sock_net(sk)->ipv6.flowlabel_has_excl)) - return __fl6_sock_lookup(sk, label) ? : ERR_PTR(-ENOENT); + if (likely(!ctx->nr_events)) + return; - return NULL; ++ if (is_active ^ EVENT_TIME) { ++ /* start ctx time */ ++ __update_context_time(ctx, false); ++ perf_cgroup_set_timestamp(task, ctx); ++ /* ++ * CPU-release for the below ->is_active store, ++ * see __load_acquire() in perf_event_time_now() ++ */ ++ barrier(); ++ } ++ + ctx->is_active |= (event_type | EVENT_TIME); + if (ctx->task) { + if (!is_active) +@@ -3814,13 +3930,6 @@ ctx_sched_in(struct perf_event_context *ctx, + + is_active ^= ctx->is_active; /* changed bits */ + +- if (is_active & EVENT_TIME) { +- /* start ctx time */ +- now = perf_clock(); +- ctx->timestamp = now; +- perf_cgroup_set_timestamp(task, ctx); +- } +- + /* + * First go through the list and put on any pinned groups + * in order to give them the best chance of going on. +@@ -4414,6 +4523,18 @@ static inline u64 perf_event_count(struct perf_event *event) + return local64_read(&event->count) + atomic64_read(&event->child_count); } -+#endif - struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space, - struct ip6_flowlabel *fl, -diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h -index 851029ecff13c..0a4779175a523 100644 ---- a/include/net/ipv6_frag.h -+++ b/include/net/ipv6_frag.h -@@ -67,7 +67,8 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) - struct sk_buff *head; ++static void calc_timer_values(struct perf_event *event, ++ u64 *now, ++ u64 *enabled, ++ u64 *running) ++{ ++ u64 ctx_time; ++ ++ *now = perf_clock(); ++ ctx_time = perf_event_time_now(event, *now); ++ __perf_update_times(event, ctx_time, enabled, running); ++} ++ + /* + * NMI-safe method to read a local event, that is an event that + * is: +@@ -4473,10 +4594,9 @@ int perf_event_read_local(struct perf_event *event, u64 *value, - rcu_read_lock(); -- if (fq->q.fqdir->dead) -+ /* Paired with the WRITE_ONCE() in fqdir_pre_exit(). */ -+ if (READ_ONCE(fq->q.fqdir->dead)) - goto out_rcu_unlock; - spin_lock(&fq->q.lock); + *value = local64_read(&event->count); + if (enabled || running) { +- u64 now = event->shadow_ctx_time + perf_clock(); +- u64 __enabled, __running; ++ u64 __enabled, __running, __now;; -diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h -index afbce90c44808..45e0339be6fa4 100644 ---- a/include/net/ipv6_stubs.h -+++ b/include/net/ipv6_stubs.h -@@ -47,6 +47,7 @@ struct ipv6_stub { - struct fib6_config *cfg, gfp_t gfp_flags, - struct netlink_ext_ack *extack); - void (*fib6_nh_release)(struct fib6_nh *fib6_nh); -+ void (*fib6_nh_release_dsts)(struct fib6_nh *fib6_nh); - void (*fib6_update_sernum)(struct net *net, struct fib6_info *rt); - int (*ip6_del_rt)(struct net *net, struct fib6_info *rt, bool skip_notify); - void (*fib6_rt_update)(struct net *net, struct fib6_info *rt, -diff --git a/include/net/llc.h b/include/net/llc.h -index df282d9b40170..9c10b121b49b0 100644 ---- a/include/net/llc.h -+++ b/include/net/llc.h -@@ -72,7 +72,9 @@ struct llc_sap { - static inline - struct hlist_head *llc_sk_dev_hash(struct llc_sap *sap, int ifindex) +- __perf_update_times(event, now, &__enabled, &__running); ++ calc_timer_values(event, &__now, &__enabled, &__running); + if (enabled) + *enabled = __enabled; + if (running) +@@ -4948,7 +5068,7 @@ static void perf_addr_filters_splice(struct perf_event *event, + + static void _free_event(struct perf_event *event) { -- return &sap->sk_dev_hash[ifindex % LLC_SK_DEV_HASH_ENTRIES]; -+ u32 bucket = hash_32(ifindex, LLC_SK_DEV_HASH_BITS); -+ -+ return &sap->sk_dev_hash[bucket]; - } +- irq_work_sync(&event->pending); ++ irq_work_sync(&event->pending_irq); - static inline -diff --git a/include/net/ndisc.h b/include/net/ndisc.h -index 38e4094960cee..e97ef508664f4 100644 ---- a/include/net/ndisc.h -+++ b/include/net/ndisc.h -@@ -487,9 +487,9 @@ int igmp6_late_init(void); - void igmp6_cleanup(void); - void igmp6_late_cleanup(void); + unaccount_event(event); --int igmp6_event_query(struct sk_buff *skb); -+void igmp6_event_query(struct sk_buff *skb); +@@ -5102,9 +5222,7 @@ int perf_event_release_kernel(struct perf_event *event) --int igmp6_event_report(struct sk_buff *skb); -+void igmp6_event_report(struct sk_buff *skb); + ctx = perf_event_ctx_lock(event); + WARN_ON_ONCE(ctx->parent_ctx); +- perf_remove_from_context(event, DETACH_GROUP); +- raw_spin_lock_irq(&ctx->lock); + /* + * Mark this event as STATE_DEAD, there is no external reference to it + * anymore. +@@ -5116,8 +5234,7 @@ int perf_event_release_kernel(struct perf_event *event) + * Thus this guarantees that we will in fact observe and kill _ALL_ + * child events. + */ +- event->state = PERF_EVENT_STATE_DEAD; +- raw_spin_unlock_irq(&ctx->lock); ++ perf_remove_from_context(event, DETACH_GROUP|DETACH_DEAD); - #ifdef CONFIG_SYSCTL -diff --git a/include/net/neighbour.h b/include/net/neighbour.h -index 22ced1381ede5..d5767e25509cc 100644 ---- a/include/net/neighbour.h -+++ b/include/net/neighbour.h -@@ -253,6 +253,7 @@ static inline void *neighbour_priv(const struct neighbour *n) - #define NEIGH_UPDATE_F_OVERRIDE 0x00000001 - #define NEIGH_UPDATE_F_WEAK_OVERRIDE 0x00000002 - #define NEIGH_UPDATE_F_OVERRIDE_ISROUTER 0x00000004 -+#define NEIGH_UPDATE_F_USE 0x10000000 - #define NEIGH_UPDATE_F_EXT_LEARNED 0x20000000 - #define NEIGH_UPDATE_F_ISROUTER 0x40000000 - #define NEIGH_UPDATE_F_ADMIN 0x80000000 -@@ -504,10 +505,15 @@ static inline int neigh_output(struct neighbour *n, struct sk_buff *skb, - { - const struct hh_cache *hh = &n->hh; + perf_event_ctx_unlock(event, ctx); -- if ((n->nud_state & NUD_CONNECTED) && hh->hh_len && !skip_cache) -+ /* n->nud_state and hh->hh_len could be changed under us. -+ * neigh_hh_output() is taking care of the race later. -+ */ -+ if (!skip_cache && -+ (READ_ONCE(n->nud_state) & NUD_CONNECTED) && -+ READ_ONCE(hh->hh_len)) - return neigh_hh_output(hh, skb); -- else -- return n->output(n, skb); -+ -+ return n->output(n, skb); +@@ -5798,18 +5915,6 @@ static int perf_event_index(struct perf_event *event) + return event->pmu->event_idx(event); } - static inline struct neighbour * -diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h -index bb5fa59140321..2ba326f9e004d 100644 ---- a/include/net/net_namespace.h -+++ b/include/net/net_namespace.h -@@ -479,4 +479,10 @@ static inline void fnhe_genid_bump(struct net *net) - atomic_inc(&net->fnhe_genid); - } +-static void calc_timer_values(struct perf_event *event, +- u64 *now, +- u64 *enabled, +- u64 *running) +-{ +- u64 ctx_time; +- +- *now = perf_clock(); +- ctx_time = event->shadow_ctx_time + *now; +- __perf_update_times(event, ctx_time, enabled, running); +-} +- + static void perf_event_init_userpage(struct perf_event *event) + { + struct perf_event_mmap_page *userpg; +@@ -5934,6 +6039,8 @@ static void ring_buffer_attach(struct perf_event *event, + struct perf_buffer *old_rb = NULL; + unsigned long flags; -+#ifdef CONFIG_NET -+void net_ns_init(void); -+#else -+static inline void net_ns_init(void) {} -+#endif ++ WARN_ON_ONCE(event->parent); + - #endif /* __NET_NET_NAMESPACE_H */ -diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h -index cc663c68ddc4b..34c266502a50e 100644 ---- a/include/net/netfilter/nf_conntrack.h -+++ b/include/net/netfilter/nf_conntrack.h -@@ -76,6 +76,8 @@ struct nf_conn { - * Hint, SKB address this struct and refcnt via skb->_nfct and - * helpers nf_conntrack_get() and nf_conntrack_put(). - * Helper nf_ct_put() equals nf_conntrack_put() by dec refcnt, -+ * except that the latter uses internal indirection and does not -+ * result in a conntrack module dependency. - * beware nf_ct_get() is different and don't inc refcnt. - */ - struct nf_conntrack ct_general; -@@ -169,11 +171,13 @@ nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) - return (struct nf_conn *)(nfct & NFCT_PTRMASK); - } + if (event->rb) { + /* + * Should be impossible, we set this when removing +@@ -5991,6 +6098,9 @@ static void ring_buffer_wakeup(struct perf_event *event) + { + struct perf_buffer *rb; -+void nf_ct_destroy(struct nf_conntrack *nfct); ++ if (event->parent) ++ event = event->parent; + - /* decrement reference count on a conntrack */ - static inline void nf_ct_put(struct nf_conn *ct) + rcu_read_lock(); + rb = rcu_dereference(event->rb); + if (rb) { +@@ -6004,6 +6114,9 @@ struct perf_buffer *ring_buffer_get(struct perf_event *event) { -- WARN_ON(!ct); -- nf_conntrack_put(&ct->ct_general); -+ if (ct && refcount_dec_and_test(&ct->ct_general.use)) -+ nf_ct_destroy(&ct->ct_general); - } + struct perf_buffer *rb; - /* Protocol module loading */ -@@ -276,14 +280,14 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb) - /* jiffies until ct expires, 0 if already expired */ - static inline unsigned long nf_ct_expires(const struct nf_conn *ct) - { -- s32 timeout = ct->timeout - nfct_time_stamp; -+ s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp; ++ if (event->parent) ++ event = event->parent; ++ + rcu_read_lock(); + rb = rcu_dereference(event->rb); + if (rb) { +@@ -6270,17 +6383,17 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) + again: + mutex_lock(&event->mmap_mutex); + if (event->rb) { +- if (event->rb->nr_pages != nr_pages) { ++ if (data_page_nr(event->rb) != nr_pages) { + ret = -EINVAL; + goto unlock; + } - return timeout > 0 ? timeout : 0; - } + if (!atomic_inc_not_zero(&event->rb->mmap_count)) { + /* +- * Raced against perf_mmap_close() through +- * perf_event_set_output(). Try again, hope for better +- * luck. ++ * Raced against perf_mmap_close(); remove the ++ * event and try again. + */ ++ ring_buffer_attach(event, NULL); + mutex_unlock(&event->mmap_mutex); + goto again; + } +@@ -6349,7 +6462,6 @@ accounting: + ring_buffer_attach(event, rb); - static inline bool nf_ct_is_expired(const struct nf_conn *ct) - { -- return (__s32)(ct->timeout - nfct_time_stamp) <= 0; -+ return (__s32)(READ_ONCE(ct->timeout) - nfct_time_stamp) <= 0; + perf_event_update_time(event); +- perf_set_shadow_time(event, event->ctx); + perf_event_init_userpage(event); + perf_event_update_userpage(event); + } else { +@@ -6447,32 +6559,43 @@ static void perf_sigtrap(struct perf_event *event) + return; + + /* +- * perf_pending_event() can race with the task exiting. ++ * Both perf_pending_task() and perf_pending_irq() can race with the ++ * task exiting. + */ + if (current->flags & PF_EXITING) + return; + +- force_sig_perf((void __user *)event->pending_addr, +- event->attr.type, event->attr.sig_data); ++ send_sig_perf((void __user *)event->pending_addr, ++ event->attr.type, event->attr.sig_data); } - /* use after obtaining a reference count */ -@@ -302,7 +306,7 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct) - static inline void nf_ct_offload_timeout(struct nf_conn *ct) +-static void perf_pending_event_disable(struct perf_event *event) ++/* ++ * Deliver the pending work in-event-context or follow the context. ++ */ ++static void __perf_pending_irq(struct perf_event *event) { - if (nf_ct_expires(ct) < NF_CT_DAY / 2) -- ct->timeout = nfct_time_stamp + NF_CT_DAY; -+ WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY); - } +- int cpu = READ_ONCE(event->pending_disable); ++ int cpu = READ_ONCE(event->oncpu); - struct kernel_param; -diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h -index 13807ea94cd2b..2d524782f53b7 100644 ---- a/include/net/netfilter/nf_conntrack_core.h -+++ b/include/net/netfilter/nf_conntrack_core.h -@@ -58,8 +58,13 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb) - int ret = NF_ACCEPT; ++ /* ++ * If the event isn't running; we done. event_sched_out() will have ++ * taken care of things. ++ */ + if (cpu < 0) + return; - if (ct) { -- if (!nf_ct_is_confirmed(ct)) -+ if (!nf_ct_is_confirmed(ct)) { - ret = __nf_conntrack_confirm(skb); -+ -+ if (ret == NF_ACCEPT) -+ ct = (struct nf_conn *)skb_nfct(skb); ++ /* ++ * Yay, we hit home and are in the context of the event. ++ */ + if (cpu == smp_processor_id()) { +- WRITE_ONCE(event->pending_disable, -1); +- +- if (event->attr.sigtrap) { ++ if (event->pending_sigtrap) { ++ event->pending_sigtrap = 0; + perf_sigtrap(event); +- atomic_set_release(&event->event_limit, 1); /* rearm event */ +- return; ++ local_dec(&event->ctx->nr_pending); + } -+ - if (likely(ret == NF_ACCEPT)) - nf_ct_deliver_cached_events(ct); ++ if (event->pending_disable) { ++ event->pending_disable = 0; ++ perf_event_disable_local(event); + } +- +- perf_event_disable_local(event); + return; } -diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h -index 37f0fbefb060f..9939c366f720d 100644 ---- a/include/net/netfilter/nf_conntrack_helper.h -+++ b/include/net/netfilter/nf_conntrack_helper.h -@@ -177,4 +177,5 @@ void nf_nat_helper_unregister(struct nf_conntrack_nat_helper *nat); - int nf_nat_helper_try_module_get(const char *name, u16 l3num, - u8 protonum); - void nf_nat_helper_put(struct nf_conntrack_helper *helper); -+void nf_ct_set_auto_assign_helper_warned(struct net *net); - #endif /*_NF_CONNTRACK_HELPER_H*/ -diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h -index a3647fadf1ccb..aaa518e777e9e 100644 ---- a/include/net/netfilter/nf_flow_table.h -+++ b/include/net/netfilter/nf_flow_table.h -@@ -10,6 +10,8 @@ - #include <linux/netfilter/nf_conntrack_tuple_common.h> - #include <net/flow_offload.h> - #include <net/dst.h> -+#include <linux/if_pppox.h> -+#include <linux/ppp_defs.h> - struct nf_flowtable; - struct nf_flow_rule; -@@ -264,6 +266,7 @@ void flow_offload_refresh(struct nf_flowtable *flow_table, +@@ -6492,52 +6615,88 @@ static void perf_pending_event_disable(struct perf_event *event) + * irq_work_queue(); // FAILS + * + * irq_work_run() +- * perf_pending_event() ++ * perf_pending_irq() + * + * But the event runs on CPU-B and wants disabling there. + */ +- irq_work_queue_on(&event->pending, cpu); ++ irq_work_queue_on(&event->pending_irq, cpu); + } - struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table, - struct flow_offload_tuple *tuple); -+void nf_flow_table_gc_run(struct nf_flowtable *flow_table); - void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable, - struct net_device *dev); - void nf_flow_table_cleanup(struct net_device *dev); -@@ -300,6 +303,8 @@ void nf_flow_offload_stats(struct nf_flowtable *flowtable, - struct flow_offload *flow); +-static void perf_pending_event(struct irq_work *entry) ++static void perf_pending_irq(struct irq_work *entry) + { +- struct perf_event *event = container_of(entry, struct perf_event, pending); ++ struct perf_event *event = container_of(entry, struct perf_event, pending_irq); + int rctx; - void nf_flow_table_offload_flush(struct nf_flowtable *flowtable); -+void nf_flow_table_offload_flush_cleanup(struct nf_flowtable *flowtable); +- rctx = perf_swevent_get_recursion_context(); + /* + * If we 'fail' here, that's OK, it means recursion is already disabled + * and we won't recurse 'further'. + */ ++ rctx = perf_swevent_get_recursion_context(); + +- perf_pending_event_disable(event); +- ++ /* ++ * The wakeup isn't bound to the context of the event -- it can happen ++ * irrespective of where the event is. ++ */ + if (event->pending_wakeup) { + event->pending_wakeup = 0; + perf_event_wakeup(event); + } + ++ __perf_pending_irq(event); + - int nf_flow_table_offload_setup(struct nf_flowtable *flowtable, - struct net_device *dev, - enum flow_block_command cmd); -@@ -313,4 +318,20 @@ int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow, - int nf_flow_table_offload_init(void); - void nf_flow_table_offload_exit(void); + if (rctx >= 0) + perf_swevent_put_recursion_context(rctx); + } -+static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb) +-/* ++static void perf_pending_task(struct callback_head *head) +{ -+ __be16 proto; ++ struct perf_event *event = container_of(head, struct perf_event, pending_task); ++ int rctx; + -+ proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN + -+ sizeof(struct pppoe_hdr))); -+ switch (proto) { -+ case htons(PPP_IP): -+ return htons(ETH_P_IP); -+ case htons(PPP_IPV6): -+ return htons(ETH_P_IPV6); ++ /* ++ * If we 'fail' here, that's OK, it means recursion is already disabled ++ * and we won't recurse 'further'. ++ */ ++ preempt_disable_notrace(); ++ rctx = perf_swevent_get_recursion_context(); ++ ++ if (event->pending_work) { ++ event->pending_work = 0; ++ perf_sigtrap(event); ++ local_dec(&event->ctx->nr_pending); + } + -+ return 0; ++ if (rctx >= 0) ++ perf_swevent_put_recursion_context(rctx); ++ preempt_enable_notrace(); ++ ++ put_event(event); +} + - #endif /* _NF_FLOW_TABLE_H */ -diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h -index 9eed51e920e87..980daa6e1e3aa 100644 ---- a/include/net/netfilter/nf_queue.h -+++ b/include/net/netfilter/nf_queue.h -@@ -37,7 +37,7 @@ void nf_register_queue_handler(const struct nf_queue_handler *qh); - void nf_unregister_queue_handler(void); - void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict); ++/* + * We assume there is only KVM supporting the callbacks. + * Later on, we might change it to a list if there is + * another virtualization implementation supporting the callbacks. + */ +-struct perf_guest_info_callbacks *perf_guest_cbs; ++struct perf_guest_info_callbacks __rcu *perf_guest_cbs; --void nf_queue_entry_get_refs(struct nf_queue_entry *entry); -+bool nf_queue_entry_get_refs(struct nf_queue_entry *entry); - void nf_queue_entry_free(struct nf_queue_entry *entry); + int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs) + { +- perf_guest_cbs = cbs; ++ if (WARN_ON_ONCE(rcu_access_pointer(perf_guest_cbs))) ++ return -EBUSY; ++ ++ rcu_assign_pointer(perf_guest_cbs, cbs); + return 0; + } + EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks); - static inline void init_hashrandom(u32 *jhash_initval) -diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h -index a16171c5fd9eb..53746494eb846 100644 ---- a/include/net/netfilter/nf_tables.h -+++ b/include/net/netfilter/nf_tables.h -@@ -21,13 +21,19 @@ struct module; + int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs) + { +- perf_guest_cbs = NULL; ++ if (WARN_ON_ONCE(rcu_access_pointer(perf_guest_cbs) != cbs)) ++ return -EINVAL; ++ ++ rcu_assign_pointer(perf_guest_cbs, NULL); ++ synchronize_rcu(); + return 0; + } + EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks); +@@ -6696,7 +6855,7 @@ static unsigned long perf_prepare_sample_aux(struct perf_event *event, + if (WARN_ON_ONCE(READ_ONCE(sampler->oncpu) != smp_processor_id())) + goto out; - #define NFT_JUMP_STACK_SIZE 16 +- rb = ring_buffer_get(sampler->parent ? sampler->parent : sampler); ++ rb = ring_buffer_get(sampler); + if (!rb) + goto out; -+enum { -+ NFT_PKTINFO_L4PROTO = (1 << 0), -+ NFT_PKTINFO_INNER = (1 << 1), -+}; -+ - struct nft_pktinfo { - struct sk_buff *skb; - const struct nf_hook_state *state; -- bool tprot_set; -+ u8 flags; - u8 tprot; - u16 fragoff; - unsigned int thoff; -+ unsigned int inneroff; - }; +@@ -6762,7 +6921,7 @@ static void perf_aux_sample_output(struct perf_event *event, + if (WARN_ON_ONCE(!sampler || !data->aux_size)) + return; - static inline struct sock *nft_sk(const struct nft_pktinfo *pkt) -@@ -75,7 +81,7 @@ static inline void nft_set_pktinfo(struct nft_pktinfo *pkt, +- rb = ring_buffer_get(sampler->parent ? sampler->parent : sampler); ++ rb = ring_buffer_get(sampler); + if (!rb) + return; - static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt) +@@ -7154,7 +7313,6 @@ void perf_output_sample(struct perf_output_handle *handle, + static u64 perf_virt_to_phys(u64 virt) { -- pkt->tprot_set = false; -+ pkt->flags = 0; - pkt->tprot = 0; - pkt->thoff = 0; - pkt->fragoff = 0; -@@ -187,13 +193,18 @@ struct nft_ctx { - bool report; - }; + u64 phys_addr = 0; +- struct page *p = NULL; -+enum nft_data_desc_flags { -+ NFT_DATA_DESC_SETELEM = (1 << 0), -+}; + if (!virt) + return 0; +@@ -7173,14 +7331,15 @@ static u64 perf_virt_to_phys(u64 virt) + * If failed, leave phys_addr as 0. + */ + if (current->mm != NULL) { ++ struct page *p; + - struct nft_data_desc { - enum nft_data_types type; -+ unsigned int size; - unsigned int len; -+ unsigned int flags; - }; + pagefault_disable(); +- if (get_user_page_fast_only(virt, 0, &p)) ++ if (get_user_page_fast_only(virt, 0, &p)) { + phys_addr = page_to_phys(p) + virt % PAGE_SIZE; ++ put_page(p); ++ } + pagefault_enable(); + } +- +- if (p) +- put_page(p); + } --int nft_data_init(const struct nft_ctx *ctx, -- struct nft_data *data, unsigned int size, -+int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data, - struct nft_data_desc *desc, const struct nlattr *nla); - void nft_data_hold(const struct nft_data *data, enum nft_data_types type); - void nft_data_release(const struct nft_data *data, enum nft_data_types type); -@@ -636,18 +647,22 @@ static inline void nft_set_ext_prepare(struct nft_set_ext_tmpl *tmpl) - tmpl->len = sizeof(struct nft_set_ext); + return phys_addr; +@@ -8927,7 +9086,7 @@ static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog, + PERF_RECORD_KSYMBOL_TYPE_BPF, + (u64)(unsigned long)subprog->bpf_func, + subprog->jited_len, unregister, +- prog->aux->ksym.name); ++ subprog->aux->ksym.name); + } + } } +@@ -9145,8 +9304,8 @@ int perf_event_account_interrupt(struct perf_event *event) + */ --static inline void nft_set_ext_add_length(struct nft_set_ext_tmpl *tmpl, u8 id, -- unsigned int len) -+static inline int nft_set_ext_add_length(struct nft_set_ext_tmpl *tmpl, u8 id, -+ unsigned int len) + static int __perf_event_overflow(struct perf_event *event, +- int throttle, struct perf_sample_data *data, +- struct pt_regs *regs) ++ int throttle, struct perf_sample_data *data, ++ struct pt_regs *regs) { - tmpl->len = ALIGN(tmpl->len, nft_set_ext_types[id].align); -- BUG_ON(tmpl->len > U8_MAX); -+ if (tmpl->len > U8_MAX) -+ return -EINVAL; + int events = atomic_read(&event->event_limit); + int ret = 0; +@@ -9169,24 +9328,49 @@ static int __perf_event_overflow(struct perf_event *event, + if (events && atomic_dec_and_test(&event->event_limit)) { + ret = 1; + event->pending_kill = POLL_HUP; +- event->pending_addr = data->addr; +- + perf_event_disable_inatomic(event); + } + ++ if (event->attr.sigtrap) { ++ unsigned int pending_id = 1; + - tmpl->offset[id] = tmpl->len; - tmpl->len += nft_set_ext_types[id].len + len; ++ if (regs) ++ pending_id = hash32_ptr((void *)instruction_pointer(regs)) ?: 1; ++ if (!event->pending_sigtrap) { ++ event->pending_sigtrap = pending_id; ++ local_inc(&event->ctx->nr_pending); ++ } else if (event->attr.exclude_kernel) { ++ /* ++ * Should not be able to return to user space without ++ * consuming pending_sigtrap; with exceptions: ++ * ++ * 1. Where !exclude_kernel, events can overflow again ++ * in the kernel without returning to user space. ++ * ++ * 2. Events that can overflow again before the IRQ- ++ * work without user space progress (e.g. hrtimer). ++ * To approximate progress (with false negatives), ++ * check 32-bit hash of the current IP. ++ */ ++ WARN_ON_ONCE(event->pending_sigtrap != pending_id); ++ } ++ event->pending_addr = data->addr; ++ irq_work_queue(&event->pending_irq); ++ } + -+ return 0; + READ_ONCE(event->overflow_handler)(event, data, regs); + + if (*perf_event_fasync(event) && event->pending_kill) { + event->pending_wakeup = 1; +- irq_work_queue(&event->pending); ++ irq_work_queue(&event->pending_irq); + } + + return ret; } --static inline void nft_set_ext_add(struct nft_set_ext_tmpl *tmpl, u8 id) -+static inline int nft_set_ext_add(struct nft_set_ext_tmpl *tmpl, u8 id) + int perf_event_overflow(struct perf_event *event, +- struct perf_sample_data *data, +- struct pt_regs *regs) ++ struct perf_sample_data *data, ++ struct pt_regs *regs) { -- nft_set_ext_add_length(tmpl, id, 0); -+ return nft_set_ext_add_length(tmpl, id, 0); + return __perf_event_overflow(event, 1, data, regs); } +@@ -9729,6 +9913,9 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, + continue; + if (event->attr.config != entry->type) + continue; ++ /* Cannot deliver synchronous signal to other task. */ ++ if (event->attr.sigtrap) ++ continue; + if (perf_tp_event_match(event, &data, regs)) + perf_swevent_event(event, count, &data, regs); + } +@@ -10443,8 +10630,11 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr, + } - static inline void nft_set_ext_init(struct nft_set_ext *ext, -@@ -883,9 +898,9 @@ struct nft_expr_ops { - int (*offload)(struct nft_offload_ctx *ctx, - struct nft_flow_rule *flow, - const struct nft_expr *expr); -+ bool (*offload_action)(const struct nft_expr *expr); - void (*offload_stats)(struct nft_expr *expr, - const struct flow_stats *stats); -- u32 offload_flags; - const struct nft_expr_type *type; - void *data; - }; -@@ -1053,7 +1068,6 @@ struct nft_stats { + /* ready to consume more filters */ ++ kfree(filename); ++ filename = NULL; + state = IF_STATE_ACTION; + filter = NULL; ++ kernel = 0; + } + } - struct nft_hook { - struct list_head list; -- bool inactive; - struct nf_hook_ops ops; - struct rcu_head rcu; - }; -@@ -1586,6 +1600,7 @@ struct nftables_pernet { - struct list_head module_list; - struct list_head notify_list; - struct mutex commit_mutex; -+ u64 table_handle; - unsigned int base_seq; - u8 validate_state; - }; -diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h -index 0fa5a6d98a00b..9dfa11d4224d2 100644 ---- a/include/net/netfilter/nf_tables_core.h -+++ b/include/net/netfilter/nf_tables_core.h -@@ -40,6 +40,14 @@ struct nft_cmp_fast_expr { - bool inv; - }; +@@ -10985,13 +11175,15 @@ static int pmu_dev_alloc(struct pmu *pmu) -+struct nft_cmp16_fast_expr { -+ struct nft_data data; -+ struct nft_data mask; -+ u8 sreg; -+ u8 len; -+ bool inv; -+}; + pmu->dev->groups = pmu->attr_groups; + device_initialize(pmu->dev); +- ret = dev_set_name(pmu->dev, "%s", pmu->name); +- if (ret) +- goto free_dev; + + dev_set_drvdata(pmu->dev, pmu); + pmu->dev->bus = &pmu_bus; + pmu->dev->release = pmu_dev_release; + - struct nft_immediate_expr { - struct nft_data data; - u8 dreg; -@@ -57,6 +65,7 @@ static inline u32 nft_cmp_fast_mask(unsigned int len) - } ++ ret = dev_set_name(pmu->dev, "%s", pmu->name); ++ if (ret) ++ goto free_dev; ++ + ret = device_add(pmu->dev); + if (ret) + goto free_dev; +@@ -11486,8 +11678,8 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, - extern const struct nft_expr_ops nft_cmp_fast_ops; -+extern const struct nft_expr_ops nft_cmp16_fast_ops; - struct nft_payload { - enum nft_payload_bases base:8; -diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h -index eb4c094cd54d2..c4a6147b0ef8c 100644 ---- a/include/net/netfilter/nf_tables_ipv4.h -+++ b/include/net/netfilter/nf_tables_ipv4.h -@@ -10,7 +10,7 @@ static inline void nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt) - struct iphdr *ip; + init_waitqueue_head(&event->waitq); +- event->pending_disable = -1; +- init_irq_work(&event->pending, perf_pending_event); ++ init_irq_work(&event->pending_irq, perf_pending_irq); ++ init_task_work(&event->pending_task, perf_pending_task); - ip = ip_hdr(pkt->skb); -- pkt->tprot_set = true; -+ pkt->flags = NFT_PKTINFO_L4PROTO; - pkt->tprot = ip->protocol; - pkt->thoff = ip_hdrlen(pkt->skb); - pkt->fragoff = ntohs(ip->frag_off) & IP_OFFSET; -@@ -36,7 +36,7 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt) - else if (len < thoff) - return -1; + mutex_init(&event->mmap_mutex); + raw_spin_lock_init(&event->addr_filters.lock); +@@ -11506,8 +11698,8 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, -- pkt->tprot_set = true; -+ pkt->flags = NFT_PKTINFO_L4PROTO; - pkt->tprot = iph->protocol; - pkt->thoff = thoff; - pkt->fragoff = ntohs(iph->frag_off) & IP_OFFSET; -@@ -71,7 +71,7 @@ static inline int nft_set_pktinfo_ipv4_ingress(struct nft_pktinfo *pkt) - goto inhdr_error; - } + event->state = PERF_EVENT_STATE_INACTIVE; -- pkt->tprot_set = true; -+ pkt->flags = NFT_PKTINFO_L4PROTO; - pkt->tprot = iph->protocol; - pkt->thoff = thoff; - pkt->fragoff = ntohs(iph->frag_off) & IP_OFFSET; -@@ -82,4 +82,5 @@ inhdr_error: - __IP_INC_STATS(nft_net(pkt), IPSTATS_MIB_INHDRERRORS); - return -1; +- if (event->attr.sigtrap) +- atomic_set(&event->event_limit, 1); ++ if (parent_event) ++ event->event_caps = parent_event->event_caps; + + if (task) { + event->attach_state = PERF_ATTACH_TASK; +@@ -11799,14 +11991,25 @@ err_size: + goto out; } -+ - #endif -diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h -index 7595e02b00ba0..ec7eaeaf4f04c 100644 ---- a/include/net/netfilter/nf_tables_ipv6.h -+++ b/include/net/netfilter/nf_tables_ipv6.h -@@ -18,7 +18,7 @@ static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt) - return; - } -- pkt->tprot_set = true; -+ pkt->flags = NFT_PKTINFO_L4PROTO; - pkt->tprot = protohdr; - pkt->thoff = thoff; - pkt->fragoff = frag_off; -@@ -50,7 +50,7 @@ static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt) - if (protohdr < 0) - return -1; ++static void mutex_lock_double(struct mutex *a, struct mutex *b) ++{ ++ if (b < a) ++ swap(a, b); ++ ++ mutex_lock(a); ++ mutex_lock_nested(b, SINGLE_DEPTH_NESTING); ++} ++ + static int + perf_event_set_output(struct perf_event *event, struct perf_event *output_event) + { + struct perf_buffer *rb = NULL; + int ret = -EINVAL; -- pkt->tprot_set = true; -+ pkt->flags = NFT_PKTINFO_L4PROTO; - pkt->tprot = protohdr; - pkt->thoff = thoff; - pkt->fragoff = frag_off; -@@ -96,7 +96,7 @@ static inline int nft_set_pktinfo_ipv6_ingress(struct nft_pktinfo *pkt) - if (protohdr < 0) - goto inhdr_error; +- if (!output_event) ++ if (!output_event) { ++ mutex_lock(&event->mmap_mutex); + goto set; ++ } -- pkt->tprot_set = true; -+ pkt->flags = NFT_PKTINFO_L4PROTO; - pkt->tprot = protohdr; - pkt->thoff = thoff; - pkt->fragoff = frag_off; -diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h -index f9d95ff82df83..3568b6a2f5f0f 100644 ---- a/include/net/netfilter/nf_tables_offload.h -+++ b/include/net/netfilter/nf_tables_offload.h -@@ -67,8 +67,6 @@ struct nft_flow_rule { - struct flow_rule *rule; - }; + /* don't allow circular references */ + if (event == output_event) +@@ -11844,8 +12047,15 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event) + event->pmu != output_event->pmu) + goto out; --#define NFT_OFFLOAD_F_ACTION (1 << 0) -- - void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow, - enum flow_dissector_key_id addr_type); ++ /* ++ * Hold both mmap_mutex to serialize against perf_mmap_close(). Since ++ * output_event is already on rb->event_list, and the list iteration ++ * restarts after every removal, it is guaranteed this new event is ++ * observed *OR* if output_event is already removed, it's guaranteed we ++ * observe !rb->mmap_count. ++ */ ++ mutex_lock_double(&event->mmap_mutex, &output_event->mmap_mutex); + set: +- mutex_lock(&event->mmap_mutex); + /* Can't redirect output if we've got an active mmap() */ + if (atomic_read(&event->mmap_count)) + goto unlock; +@@ -11855,6 +12065,12 @@ set: + rb = ring_buffer_get(output_event); + if (!rb) + goto unlock; ++ ++ /* did we race against perf_mmap_close() */ ++ if (!atomic_read(&rb->mmap_count)) { ++ ring_buffer_put(rb); ++ goto unlock; ++ } + } -@@ -94,7 +92,7 @@ int nft_flow_rule_offload_commit(struct net *net); - NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg) \ - memset(&(__reg)->mask, 0xff, (__reg)->len); + ring_buffer_attach(event, rb); +@@ -11862,20 +12078,13 @@ set: + ret = 0; + unlock: + mutex_unlock(&event->mmap_mutex); ++ if (output_event) ++ mutex_unlock(&output_event->mmap_mutex); --int nft_chain_offload_priority(struct nft_base_chain *basechain); -+bool nft_chain_offload_support(const struct nft_base_chain *basechain); + out: + return ret; + } - int nft_offload_init(void); - void nft_offload_exit(void); -diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h -index 2f65701a43c95..d60a10cfc3823 100644 ---- a/include/net/netns/ipv4.h -+++ b/include/net/netns/ipv4.h -@@ -65,7 +65,7 @@ struct netns_ipv4 { - bool fib_has_custom_local_routes; - bool fib_offload_disabled; - #ifdef CONFIG_IP_ROUTE_CLASSID -- int fib_num_tclassid_users; -+ atomic_t fib_num_tclassid_users; - #endif - struct hlist_head *fib_table_hash; - struct sock *fibnl; -@@ -74,7 +74,6 @@ struct netns_ipv4 { - struct sock *mc_autojoin_sk; +-static void mutex_lock_double(struct mutex *a, struct mutex *b) +-{ +- if (b < a) +- swap(a, b); +- +- mutex_lock(a); +- mutex_lock_nested(b, SINGLE_DEPTH_NESTING); +-} +- + static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id) + { + bool nmi_safe = false; +@@ -12006,12 +12215,12 @@ SYSCALL_DEFINE5(perf_event_open, + if (flags & ~PERF_FLAG_ALL) + return -EINVAL; - struct inet_peer_base *peers; -- struct sock * __percpu *tcp_sk; - struct fqdir *fqdir; +- /* Do we allow access to perf_event_open(2) ? */ +- err = security_perf_event_open(&attr, PERF_SECURITY_OPEN); ++ err = perf_copy_attr(attr_uptr, &attr); + if (err) + return err; - u8 sysctl_icmp_echo_ignore_all; -diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h -index a4b5503803165..ff82983b7ab41 100644 ---- a/include/net/netns/ipv6.h -+++ b/include/net/netns/ipv6.h -@@ -75,11 +75,12 @@ struct netns_ipv6 { - struct list_head fib6_walkers; - rwlock_t fib6_walker_lock; - spinlock_t fib6_gc_lock; -- unsigned int ip6_rt_gc_expire; -- unsigned long ip6_rt_last_gc; -+ atomic_t ip6_rt_gc_expire; -+ unsigned long ip6_rt_last_gc; -+ unsigned char flowlabel_has_excl; - #ifdef CONFIG_IPV6_MULTIPLE_TABLES -- unsigned int fib6_rules_require_fldissect; - bool fib6_has_custom_rules; -+ unsigned int fib6_rules_require_fldissect; - #ifdef CONFIG_IPV6_SUBTREES - unsigned int fib6_routes_require_src; - #endif -diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h -index 947733a639a6f..bd7c3be4af5d7 100644 ---- a/include/net/netns/xfrm.h -+++ b/include/net/netns/xfrm.h -@@ -66,11 +66,7 @@ struct netns_xfrm { - int sysctl_larval_drop; - u32 sysctl_acq_expires; +- err = perf_copy_attr(attr_uptr, &attr); ++ /* Do we allow access to perf_event_open(2) ? */ ++ err = security_perf_event_open(&attr, PERF_SECURITY_OPEN); + if (err) + return err; -- u8 policy_default; --#define XFRM_POL_DEFAULT_IN 1 --#define XFRM_POL_DEFAULT_OUT 2 --#define XFRM_POL_DEFAULT_FWD 4 --#define XFRM_POL_DEFAULT_MASK 7 -+ u8 policy_default[XFRM_POLICY_MAX]; +@@ -12190,6 +12399,9 @@ SYSCALL_DEFINE5(perf_event_open, + * Do not allow to attach to a group in a different task + * or CPU context. If we're moving SW events, we'll fix + * this up later, so allow that. ++ * ++ * Racy, not holding group_leader->ctx->mutex, see comment with ++ * perf_event_ctx_lock(). + */ + if (!move_group && group_leader->ctx != ctx) + goto err_context; +@@ -12255,6 +12467,7 @@ SYSCALL_DEFINE5(perf_event_open, + } else { + perf_event_ctx_unlock(group_leader, gctx); + move_group = 0; ++ goto not_move_group; + } + } - #ifdef CONFIG_SYSCTL - struct ctl_table_header *sysctl_hdr; -diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h -index a964daedc17b6..ea8595651c384 100644 ---- a/include/net/nfc/nci_core.h -+++ b/include/net/nfc/nci_core.h -@@ -30,6 +30,7 @@ enum nci_flag { - NCI_UP, - NCI_DATA_EXCHANGE, - NCI_DATA_EXCHANGE_TO, -+ NCI_UNREG, - }; +@@ -12271,7 +12484,17 @@ SYSCALL_DEFINE5(perf_event_open, + } + } else { + mutex_lock(&ctx->mutex); ++ ++ /* ++ * Now that we hold ctx->lock, (re)validate group_leader->ctx == ctx, ++ * see the group_leader && !move_group test earlier. ++ */ ++ if (group_leader && group_leader->ctx != ctx) { ++ err = -EINVAL; ++ goto err_locked; ++ } + } ++not_move_group: - /* NCI device states */ -diff --git a/include/net/nl802154.h b/include/net/nl802154.h -index ddcee128f5d9a..145acb8f25095 100644 ---- a/include/net/nl802154.h -+++ b/include/net/nl802154.h -@@ -19,6 +19,8 @@ - * - */ + if (ctx->task == TASK_TOMBSTONE) { + err = -ESRCH; +diff --git a/kernel/events/internal.h b/kernel/events/internal.h +index 228801e207886..aa23ffdaf819f 100644 +--- a/kernel/events/internal.h ++++ b/kernel/events/internal.h +@@ -116,6 +116,11 @@ static inline int page_order(struct perf_buffer *rb) + } + #endif -+#include <linux/types.h> ++static inline int data_page_nr(struct perf_buffer *rb) ++{ ++ return rb->nr_pages << page_order(rb); ++} + - #define NL802154_GENL_NAME "nl802154" + static inline unsigned long perf_data_size(struct perf_buffer *rb) + { + return rb->nr_pages << (PAGE_SHIFT + page_order(rb)); +diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c +index 52868716ec358..f40da32f5e753 100644 +--- a/kernel/events/ring_buffer.c ++++ b/kernel/events/ring_buffer.c +@@ -22,7 +22,7 @@ static void perf_output_wakeup(struct perf_output_handle *handle) + atomic_set(&handle->rb->poll, EPOLLIN); - enum nl802154_commands { -@@ -150,10 +152,9 @@ enum nl802154_attrs { - }; + handle->event->pending_wakeup = 1; +- irq_work_queue(&handle->event->pending); ++ irq_work_queue(&handle->event->pending_irq); + } - enum nl802154_iftype { -- /* for backwards compatibility TODO */ -- NL802154_IFTYPE_UNSPEC = -1, -+ NL802154_IFTYPE_UNSPEC = (~(__u32)0), + /* +@@ -859,11 +859,6 @@ void rb_free(struct perf_buffer *rb) + } -- NL802154_IFTYPE_NODE, -+ NL802154_IFTYPE_NODE = 0, - NL802154_IFTYPE_MONITOR, - NL802154_IFTYPE_COORD, + #else +-static int data_page_nr(struct perf_buffer *rb) +-{ +- return rb->nr_pages << page_order(rb); +-} +- + static struct page * + __perf_mmap_to_page(struct perf_buffer *rb, unsigned long pgoff) + { +diff --git a/kernel/exit.c b/kernel/exit.c +index 91a43e57a32eb..80efdfda6662b 100644 +--- a/kernel/exit.c ++++ b/kernel/exit.c +@@ -64,11 +64,58 @@ + #include <linux/rcuwait.h> + #include <linux/compat.h> + #include <linux/io_uring.h> ++#include <linux/sysfs.h> -diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h -index bf79f3a890af2..9e7b21c0b3a6d 100644 ---- a/include/net/pkt_sched.h -+++ b/include/net/pkt_sched.h -@@ -193,4 +193,22 @@ static inline void skb_txtime_consumed(struct sk_buff *skb) - skb->tstamp = ktime_set(0, 0); - } + #include <linux/uaccess.h> + #include <asm/unistd.h> + #include <asm/mmu_context.h> -+struct tc_skb_cb { -+ struct qdisc_skb_cb qdisc_cb; ++/* ++ * The default value should be high enough to not crash a system that randomly ++ * crashes its kernel from time to time, but low enough to at least not permit ++ * overflowing 32-bit refcounts or the ldsem writer count. ++ */ ++static unsigned int oops_limit = 10000; + -+ u16 mru; -+ u8 post_ct:1; -+ u8 post_ct_snat:1; -+ u8 post_ct_dnat:1; -+ u16 zone; /* Only valid if post_ct = true */ ++#ifdef CONFIG_SYSCTL ++static struct ctl_table kern_exit_table[] = { ++ { ++ .procname = "oops_limit", ++ .data = &oops_limit, ++ .maxlen = sizeof(oops_limit), ++ .mode = 0644, ++ .proc_handler = proc_douintvec, ++ }, ++ { } +}; + -+static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb) ++static __init int kernel_exit_sysctls_init(void) +{ -+ struct tc_skb_cb *cb = (struct tc_skb_cb *)skb->cb; ++ register_sysctl_init("kernel", kern_exit_table); ++ return 0; ++} ++late_initcall(kernel_exit_sysctls_init); ++#endif + -+ BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb)); -+ return cb; ++static atomic_t oops_count = ATOMIC_INIT(0); ++ ++#ifdef CONFIG_SYSFS ++static ssize_t oops_count_show(struct kobject *kobj, struct kobj_attribute *attr, ++ char *page) ++{ ++ return sysfs_emit(page, "%d\n", atomic_read(&oops_count)); +} + - #endif -diff --git a/include/net/raw.h b/include/net/raw.h -index 8ad8df5948536..c51a635671a73 100644 ---- a/include/net/raw.h -+++ b/include/net/raw.h -@@ -75,7 +75,7 @@ static inline bool raw_sk_bound_dev_eq(struct net *net, int bound_dev_if, - int dif, int sdif) ++static struct kobj_attribute oops_count_attr = __ATTR_RO(oops_count); ++ ++static __init int kernel_exit_sysfs_init(void) ++{ ++ sysfs_add_file_to_group(kernel_kobj, &oops_count_attr.attr, NULL); ++ return 0; ++} ++late_initcall(kernel_exit_sysfs_init); ++#endif ++ + static void __unhash_process(struct task_struct *p, bool group_dead) { - #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) -- return inet_bound_dev_eq(!!net->ipv4.sysctl_raw_l3mdev_accept, -+ return inet_bound_dev_eq(READ_ONCE(net->ipv4.sysctl_raw_l3mdev_accept), - bound_dev_if, dif, sdif); - #else - return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); -diff --git a/include/net/route.h b/include/net/route.h -index 2e6c0e153e3a5..30610101ea14f 100644 ---- a/include/net/route.h -+++ b/include/net/route.h -@@ -360,7 +360,7 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst) - struct net *net = dev_net(dst->dev); + nr_threads--; +@@ -796,7 +843,7 @@ void __noreturn do_exit(long code) - if (hoplimit == 0) -- hoplimit = net->ipv4.sysctl_ip_default_ttl; -+ hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); - return hoplimit; + #ifdef CONFIG_POSIX_TIMERS + hrtimer_cancel(&tsk->signal->real_timer); +- exit_itimers(tsk->signal); ++ exit_itimers(tsk); + #endif + if (tsk->mm) + setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm); +@@ -877,6 +924,31 @@ void __noreturn do_exit(long code) } + EXPORT_SYMBOL_GPL(do_exit); -@@ -369,7 +369,7 @@ static inline struct neighbour *ip_neigh_gw4(struct net_device *dev, ++void __noreturn make_task_dead(int signr) ++{ ++ /* ++ * Take the task off the cpu after something catastrophic has ++ * happened. ++ */ ++ unsigned int limit; ++ ++ /* ++ * Every time the system oopses, if the oops happens while a reference ++ * to an object was held, the reference leaks. ++ * If the oops doesn't also leak memory, repeated oopsing can cause ++ * reference counters to wrap around (if they're not using refcount_t). ++ * This means that repeated oopsing can make unexploitable-looking bugs ++ * exploitable through repeated oopsing. ++ * To make sure this can't happen, place an upper bound on how often the ++ * kernel may oops without panic(). ++ */ ++ limit = READ_ONCE(oops_limit); ++ if (atomic_inc_return(&oops_count) >= limit && limit) ++ panic("Oopsed too often (kernel.oops_limit is %d)", limit); ++ ++ do_exit(signr); ++} ++ + void complete_and_exit(struct completion *comp, long code) { - struct neighbour *neigh; + if (comp) +diff --git a/kernel/fork.c b/kernel/fork.c +index 38681ad44c76b..3fb7e9e6a7b97 100644 +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -446,6 +446,9 @@ void put_task_stack(struct task_struct *tsk) -- neigh = __ipv4_neigh_lookup_noref(dev, daddr); -+ neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)daddr); - if (unlikely(!neigh)) - neigh = __neigh_create(&arp_tbl, &daddr, dev, false); + void free_task(struct task_struct *tsk) + { ++#ifdef CONFIG_SECCOMP ++ WARN_ON_ONCE(tsk->seccomp.filter); ++#endif + release_user_cpus_ptr(tsk); + scs_release(tsk); -diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h -index c0069ac00e62d..891b44d80c982 100644 ---- a/include/net/sch_generic.h -+++ b/include/net/sch_generic.h -@@ -173,37 +173,17 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) - if (spin_trylock(&qdisc->seqlock)) - return true; +@@ -1153,6 +1156,7 @@ void mmput_async(struct mm_struct *mm) + schedule_work(&mm->async_put_work); + } + } ++EXPORT_SYMBOL_GPL(mmput_async); + #endif -- /* Paired with smp_mb__after_atomic() to make sure -- * STATE_MISSED checking is synchronized with clearing -- * in pfifo_fast_dequeue(). -+ /* No need to insist if the MISSED flag was already set. -+ * Note that test_and_set_bit() also gives us memory ordering -+ * guarantees wrt potential earlier enqueue() and below -+ * spin_trylock(), both of which are necessary to prevent races - */ -- smp_mb__before_atomic(); + /** +@@ -2055,18 +2059,18 @@ static __latent_entropy struct task_struct *copy_process( + #ifdef CONFIG_PROVE_LOCKING + DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); + #endif ++ retval = copy_creds(p, clone_flags); ++ if (retval < 0) ++ goto bad_fork_free; ++ + retval = -EAGAIN; + if (is_ucounts_overlimit(task_ucounts(p), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) { + if (p->real_cred->user != INIT_USER && + !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) +- goto bad_fork_free; ++ goto bad_fork_cleanup_count; + } + current->flags &= ~PF_NPROC_EXCEEDED; + +- retval = copy_creds(p, clone_flags); +- if (retval < 0) +- goto bad_fork_free; +- + /* + * If multiple threads are within copy_process(), then this check + * triggers too late. This doesn't hurt, the check is only there +@@ -2280,6 +2284,7 @@ static __latent_entropy struct task_struct *copy_process( + p->pdeath_signal = 0; + INIT_LIST_HEAD(&p->thread_group); + p->task_works = NULL; ++ clear_posix_cputimers_work(p); + + #ifdef CONFIG_KRETPROBES + p->kretprobe_instances.first = NULL; +@@ -2295,6 +2300,17 @@ static __latent_entropy struct task_struct *copy_process( + if (retval) + goto bad_fork_put_pidfd; + ++ /* ++ * Now that the cgroups are pinned, re-clone the parent cgroup and put ++ * the new task on the correct runqueue. All this *before* the task ++ * becomes visible. ++ * ++ * This isn't part of ->can_fork() because while the re-cloning is ++ * cgroup specific, it unconditionally needs to place the task on a ++ * runqueue. ++ */ ++ sched_cgroup_fork(p, args); ++ + /* + * From this point on we must avoid any synchronous user-space + * communication until we take the tasklist-lock. In particular, we do +@@ -2332,12 +2348,6 @@ static __latent_entropy struct task_struct *copy_process( + + spin_lock(¤t->sighand->siglock); + +- /* +- * Copy seccomp details explicitly here, in case they were changed +- * before holding sighand lock. +- */ +- copy_seccomp(p); +- + rseq_fork(p, clone_flags); + + /* Don't start children in a dying pid namespace */ +@@ -2352,9 +2362,13 @@ static __latent_entropy struct task_struct *copy_process( + goto bad_fork_cancel_cgroup; + } + +- /* past the last point of failure */ +- if (pidfile) +- fd_install(pidfd, pidfile); ++ /* No more failure paths after this point. */ ++ ++ /* ++ * Copy seccomp details explicitly here, in case they were changed ++ * before holding sighand lock. ++ */ ++ copy_seccomp(p); + + init_task_pid_links(p); + if (likely(p->pid)) { +@@ -2404,6 +2418,9 @@ static __latent_entropy struct task_struct *copy_process( + syscall_tracepoint_update(p); + write_unlock_irq(&tasklist_lock); + ++ if (pidfile) ++ fd_install(pidfd, pidfile); ++ + proc_fork_connector(p); + sched_post_fork(p); + cgroup_post_fork(p, args); +diff --git a/kernel/futex.c b/kernel/futex.c +deleted file mode 100644 +index c15ad276fd157..0000000000000 +--- a/kernel/futex.c ++++ /dev/null +@@ -1,4272 +0,0 @@ +-// SPDX-License-Identifier: GPL-2.0-or-later +-/* +- * Fast Userspace Mutexes (which I call "Futexes!"). +- * (C) Rusty Russell, IBM 2002 +- * +- * Generalized futexes, futex requeueing, misc fixes by Ingo Molnar +- * (C) Copyright 2003 Red Hat Inc, All Rights Reserved +- * +- * Removed page pinning, fix privately mapped COW pages and other cleanups +- * (C) Copyright 2003, 2004 Jamie Lokier +- * +- * Robust futex support started by Ingo Molnar +- * (C) Copyright 2006 Red Hat Inc, All Rights Reserved +- * Thanks to Thomas Gleixner for suggestions, analysis and fixes. +- * +- * PI-futex support started by Ingo Molnar and Thomas Gleixner +- * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> +- * Copyright (C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> +- * +- * PRIVATE futexes by Eric Dumazet +- * Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com> +- * +- * Requeue-PI support by Darren Hart <dvhltc@us.ibm.com> +- * Copyright (C) IBM Corporation, 2009 +- * Thanks to Thomas Gleixner for conceptual design and careful reviews. +- * +- * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly +- * enough at me, Linus for the original (flawed) idea, Matthew +- * Kirkwood for proof-of-concept implementation. +- * +- * "The futexes are also cursed." +- * "But they come in a choice of three flavours!" +- */ +-#include <linux/compat.h> +-#include <linux/jhash.h> +-#include <linux/pagemap.h> +-#include <linux/syscalls.h> +-#include <linux/freezer.h> +-#include <linux/memblock.h> +-#include <linux/fault-inject.h> +-#include <linux/time_namespace.h> +- +-#include <asm/futex.h> +- +-#include "locking/rtmutex_common.h" +- +-/* +- * READ this before attempting to hack on futexes! +- * +- * Basic futex operation and ordering guarantees +- * ============================================= +- * +- * The waiter reads the futex value in user space and calls +- * futex_wait(). This function computes the hash bucket and acquires +- * the hash bucket lock. After that it reads the futex user space value +- * again and verifies that the data has not changed. If it has not changed +- * it enqueues itself into the hash bucket, releases the hash bucket lock +- * and schedules. +- * +- * The waker side modifies the user space value of the futex and calls +- * futex_wake(). This function computes the hash bucket and acquires the +- * hash bucket lock. Then it looks for waiters on that futex in the hash +- * bucket and wakes them. +- * +- * In futex wake up scenarios where no tasks are blocked on a futex, taking +- * the hb spinlock can be avoided and simply return. In order for this +- * optimization to work, ordering guarantees must exist so that the waiter +- * being added to the list is acknowledged when the list is concurrently being +- * checked by the waker, avoiding scenarios like the following: +- * +- * CPU 0 CPU 1 +- * val = *futex; +- * sys_futex(WAIT, futex, val); +- * futex_wait(futex, val); +- * uval = *futex; +- * *futex = newval; +- * sys_futex(WAKE, futex); +- * futex_wake(futex); +- * if (queue_empty()) +- * return; +- * if (uval == val) +- * lock(hash_bucket(futex)); +- * queue(); +- * unlock(hash_bucket(futex)); +- * schedule(); +- * +- * This would cause the waiter on CPU 0 to wait forever because it +- * missed the transition of the user space value from val to newval +- * and the waker did not find the waiter in the hash bucket queue. +- * +- * The correct serialization ensures that a waiter either observes +- * the changed user space value before blocking or is woken by a +- * concurrent waker: +- * +- * CPU 0 CPU 1 +- * val = *futex; +- * sys_futex(WAIT, futex, val); +- * futex_wait(futex, val); +- * +- * waiters++; (a) +- * smp_mb(); (A) <-- paired with -. +- * | +- * lock(hash_bucket(futex)); | +- * | +- * uval = *futex; | +- * | *futex = newval; +- * | sys_futex(WAKE, futex); +- * | futex_wake(futex); +- * | +- * `--------> smp_mb(); (B) +- * if (uval == val) +- * queue(); +- * unlock(hash_bucket(futex)); +- * schedule(); if (waiters) +- * lock(hash_bucket(futex)); +- * else wake_waiters(futex); +- * waiters--; (b) unlock(hash_bucket(futex)); +- * +- * Where (A) orders the waiters increment and the futex value read through +- * atomic operations (see hb_waiters_inc) and where (B) orders the write +- * to futex and the waiters read (see hb_waiters_pending()). +- * +- * This yields the following case (where X:=waiters, Y:=futex): +- * +- * X = Y = 0 +- * +- * w[X]=1 w[Y]=1 +- * MB MB +- * r[Y]=y r[X]=x +- * +- * Which guarantees that x==0 && y==0 is impossible; which translates back into +- * the guarantee that we cannot both miss the futex variable change and the +- * enqueue. +- * +- * Note that a new waiter is accounted for in (a) even when it is possible that +- * the wait call can return error, in which case we backtrack from it in (b). +- * Refer to the comment in queue_lock(). +- * +- * Similarly, in order to account for waiters being requeued on another +- * address we always increment the waiters for the destination bucket before +- * acquiring the lock. It then decrements them again after releasing it - +- * the code that actually moves the futex(es) between hash buckets (requeue_futex) +- * will do the additional required waiter count housekeeping. This is done for +- * double_lock_hb() and double_unlock_hb(), respectively. +- */ +- +-#ifdef CONFIG_HAVE_FUTEX_CMPXCHG +-#define futex_cmpxchg_enabled 1 +-#else +-static int __read_mostly futex_cmpxchg_enabled; +-#endif +- +-/* +- * Futex flags used to encode options to functions and preserve them across +- * restarts. +- */ +-#ifdef CONFIG_MMU +-# define FLAGS_SHARED 0x01 +-#else +-/* +- * NOMMU does not have per process address space. Let the compiler optimize +- * code away. +- */ +-# define FLAGS_SHARED 0x00 +-#endif +-#define FLAGS_CLOCKRT 0x02 +-#define FLAGS_HAS_TIMEOUT 0x04 +- +-/* +- * Priority Inheritance state: +- */ +-struct futex_pi_state { +- /* +- * list of 'owned' pi_state instances - these have to be +- * cleaned up in do_exit() if the task exits prematurely: +- */ +- struct list_head list; +- +- /* +- * The PI object: +- */ +- struct rt_mutex_base pi_mutex; +- +- struct task_struct *owner; +- refcount_t refcount; +- +- union futex_key key; +-} __randomize_layout; +- +-/** +- * struct futex_q - The hashed futex queue entry, one per waiting task +- * @list: priority-sorted list of tasks waiting on this futex +- * @task: the task waiting on the futex +- * @lock_ptr: the hash bucket lock +- * @key: the key the futex is hashed on +- * @pi_state: optional priority inheritance state +- * @rt_waiter: rt_waiter storage for use with requeue_pi +- * @requeue_pi_key: the requeue_pi target futex key +- * @bitset: bitset for the optional bitmasked wakeup +- * @requeue_state: State field for futex_requeue_pi() +- * @requeue_wait: RCU wait for futex_requeue_pi() (RT only) +- * +- * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so +- * we can wake only the relevant ones (hashed queues may be shared). +- * +- * A futex_q has a woken state, just like tasks have TASK_RUNNING. +- * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. +- * The order of wakeup is always to make the first condition true, then +- * the second. +- * +- * PI futexes are typically woken before they are removed from the hash list via +- * the rt_mutex code. See unqueue_me_pi(). +- */ +-struct futex_q { +- struct plist_node list; +- +- struct task_struct *task; +- spinlock_t *lock_ptr; +- union futex_key key; +- struct futex_pi_state *pi_state; +- struct rt_mutex_waiter *rt_waiter; +- union futex_key *requeue_pi_key; +- u32 bitset; +- atomic_t requeue_state; +-#ifdef CONFIG_PREEMPT_RT +- struct rcuwait requeue_wait; +-#endif +-} __randomize_layout; +- +-/* +- * On PREEMPT_RT, the hash bucket lock is a 'sleeping' spinlock with an +- * underlying rtmutex. The task which is about to be requeued could have +- * just woken up (timeout, signal). After the wake up the task has to +- * acquire hash bucket lock, which is held by the requeue code. As a task +- * can only be blocked on _ONE_ rtmutex at a time, the proxy lock blocking +- * and the hash bucket lock blocking would collide and corrupt state. +- * +- * On !PREEMPT_RT this is not a problem and everything could be serialized +- * on hash bucket lock, but aside of having the benefit of common code, +- * this allows to avoid doing the requeue when the task is already on the +- * way out and taking the hash bucket lock of the original uaddr1 when the +- * requeue has been completed. +- * +- * The following state transitions are valid: +- * +- * On the waiter side: +- * Q_REQUEUE_PI_NONE -> Q_REQUEUE_PI_IGNORE +- * Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_WAIT +- * +- * On the requeue side: +- * Q_REQUEUE_PI_NONE -> Q_REQUEUE_PI_INPROGRESS +- * Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_DONE/LOCKED +- * Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_NONE (requeue failed) +- * Q_REQUEUE_PI_WAIT -> Q_REQUEUE_PI_DONE/LOCKED +- * Q_REQUEUE_PI_WAIT -> Q_REQUEUE_PI_IGNORE (requeue failed) +- * +- * The requeue side ignores a waiter with state Q_REQUEUE_PI_IGNORE as this +- * signals that the waiter is already on the way out. It also means that +- * the waiter is still on the 'wait' futex, i.e. uaddr1. +- * +- * The waiter side signals early wakeup to the requeue side either through +- * setting state to Q_REQUEUE_PI_IGNORE or to Q_REQUEUE_PI_WAIT depending +- * on the current state. In case of Q_REQUEUE_PI_IGNORE it can immediately +- * proceed to take the hash bucket lock of uaddr1. If it set state to WAIT, +- * which means the wakeup is interleaving with a requeue in progress it has +- * to wait for the requeue side to change the state. Either to DONE/LOCKED +- * or to IGNORE. DONE/LOCKED means the waiter q is now on the uaddr2 futex +- * and either blocked (DONE) or has acquired it (LOCKED). IGNORE is set by +- * the requeue side when the requeue attempt failed via deadlock detection +- * and therefore the waiter q is still on the uaddr1 futex. +- */ +-enum { +- Q_REQUEUE_PI_NONE = 0, +- Q_REQUEUE_PI_IGNORE, +- Q_REQUEUE_PI_IN_PROGRESS, +- Q_REQUEUE_PI_WAIT, +- Q_REQUEUE_PI_DONE, +- Q_REQUEUE_PI_LOCKED, +-}; +- +-static const struct futex_q futex_q_init = { +- /* list gets initialized in queue_me()*/ +- .key = FUTEX_KEY_INIT, +- .bitset = FUTEX_BITSET_MATCH_ANY, +- .requeue_state = ATOMIC_INIT(Q_REQUEUE_PI_NONE), +-}; +- +-/* +- * Hash buckets are shared by all the futex_keys that hash to the same +- * location. Each key may have multiple futex_q structures, one for each task +- * waiting on a futex. +- */ +-struct futex_hash_bucket { +- atomic_t waiters; +- spinlock_t lock; +- struct plist_head chain; +-} ____cacheline_aligned_in_smp; +- +-/* +- * The base of the bucket array and its size are always used together +- * (after initialization only in hash_futex()), so ensure that they +- * reside in the same cacheline. +- */ +-static struct { +- struct futex_hash_bucket *queues; +- unsigned long hashsize; +-} __futex_data __read_mostly __aligned(2*sizeof(long)); +-#define futex_queues (__futex_data.queues) +-#define futex_hashsize (__futex_data.hashsize) +- +- +-/* +- * Fault injections for futexes. +- */ +-#ifdef CONFIG_FAIL_FUTEX +- +-static struct { +- struct fault_attr attr; +- +- bool ignore_private; +-} fail_futex = { +- .attr = FAULT_ATTR_INITIALIZER, +- .ignore_private = false, +-}; +- +-static int __init setup_fail_futex(char *str) +-{ +- return setup_fault_attr(&fail_futex.attr, str); +-} +-__setup("fail_futex=", setup_fail_futex); +- +-static bool should_fail_futex(bool fshared) +-{ +- if (fail_futex.ignore_private && !fshared) +- return false; +- +- return should_fail(&fail_futex.attr, 1); +-} +- +-#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS +- +-static int __init fail_futex_debugfs(void) +-{ +- umode_t mode = S_IFREG | S_IRUSR | S_IWUSR; +- struct dentry *dir; +- +- dir = fault_create_debugfs_attr("fail_futex", NULL, +- &fail_futex.attr); +- if (IS_ERR(dir)) +- return PTR_ERR(dir); +- +- debugfs_create_bool("ignore-private", mode, dir, +- &fail_futex.ignore_private); +- return 0; +-} +- +-late_initcall(fail_futex_debugfs); +- +-#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */ +- +-#else +-static inline bool should_fail_futex(bool fshared) +-{ +- return false; +-} +-#endif /* CONFIG_FAIL_FUTEX */ +- +-#ifdef CONFIG_COMPAT +-static void compat_exit_robust_list(struct task_struct *curr); +-#endif +- +-/* +- * Reflects a new waiter being added to the waitqueue. +- */ +-static inline void hb_waiters_inc(struct futex_hash_bucket *hb) +-{ +-#ifdef CONFIG_SMP +- atomic_inc(&hb->waiters); +- /* +- * Full barrier (A), see the ordering comment above. +- */ +- smp_mb__after_atomic(); +-#endif +-} +- +-/* +- * Reflects a waiter being removed from the waitqueue by wakeup +- * paths. +- */ +-static inline void hb_waiters_dec(struct futex_hash_bucket *hb) +-{ +-#ifdef CONFIG_SMP +- atomic_dec(&hb->waiters); +-#endif +-} +- +-static inline int hb_waiters_pending(struct futex_hash_bucket *hb) +-{ +-#ifdef CONFIG_SMP +- /* +- * Full barrier (B), see the ordering comment above. +- */ +- smp_mb(); +- return atomic_read(&hb->waiters); +-#else +- return 1; +-#endif +-} +- +-/** +- * hash_futex - Return the hash bucket in the global hash +- * @key: Pointer to the futex key for which the hash is calculated +- * +- * We hash on the keys returned from get_futex_key (see below) and return the +- * corresponding hash bucket in the global hash. +- */ +-static struct futex_hash_bucket *hash_futex(union futex_key *key) +-{ +- u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4, +- key->both.offset); +- +- return &futex_queues[hash & (futex_hashsize - 1)]; +-} +- +- +-/** +- * match_futex - Check whether two futex keys are equal +- * @key1: Pointer to key1 +- * @key2: Pointer to key2 +- * +- * Return 1 if two futex_keys are equal, 0 otherwise. +- */ +-static inline int match_futex(union futex_key *key1, union futex_key *key2) +-{ +- return (key1 && key2 +- && key1->both.word == key2->both.word +- && key1->both.ptr == key2->both.ptr +- && key1->both.offset == key2->both.offset); +-} +- +-enum futex_access { +- FUTEX_READ, +- FUTEX_WRITE +-}; +- +-/** +- * futex_setup_timer - set up the sleeping hrtimer. +- * @time: ptr to the given timeout value +- * @timeout: the hrtimer_sleeper structure to be set up +- * @flags: futex flags +- * @range_ns: optional range in ns +- * +- * Return: Initialized hrtimer_sleeper structure or NULL if no timeout +- * value given +- */ +-static inline struct hrtimer_sleeper * +-futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout, +- int flags, u64 range_ns) +-{ +- if (!time) +- return NULL; +- +- hrtimer_init_sleeper_on_stack(timeout, (flags & FLAGS_CLOCKRT) ? +- CLOCK_REALTIME : CLOCK_MONOTONIC, +- HRTIMER_MODE_ABS); +- /* +- * If range_ns is 0, calling hrtimer_set_expires_range_ns() is +- * effectively the same as calling hrtimer_set_expires(). +- */ +- hrtimer_set_expires_range_ns(&timeout->timer, *time, range_ns); +- +- return timeout; +-} +- +-/* +- * Generate a machine wide unique identifier for this inode. +- * +- * This relies on u64 not wrapping in the life-time of the machine; which with +- * 1ns resolution means almost 585 years. +- * +- * This further relies on the fact that a well formed program will not unmap +- * the file while it has a (shared) futex waiting on it. This mapping will have +- * a file reference which pins the mount and inode. +- * +- * If for some reason an inode gets evicted and read back in again, it will get +- * a new sequence number and will _NOT_ match, even though it is the exact same +- * file. +- * +- * It is important that match_futex() will never have a false-positive, esp. +- * for PI futexes that can mess up the state. The above argues that false-negatives +- * are only possible for malformed programs. +- */ +-static u64 get_inode_sequence_number(struct inode *inode) +-{ +- static atomic64_t i_seq; +- u64 old; +- +- /* Does the inode already have a sequence number? */ +- old = atomic64_read(&inode->i_sequence); +- if (likely(old)) +- return old; +- +- for (;;) { +- u64 new = atomic64_add_return(1, &i_seq); +- if (WARN_ON_ONCE(!new)) +- continue; +- +- old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new); +- if (old) +- return old; +- return new; +- } +-} +- +-/** +- * get_futex_key() - Get parameters which are the keys for a futex +- * @uaddr: virtual address of the futex +- * @fshared: false for a PROCESS_PRIVATE futex, true for PROCESS_SHARED +- * @key: address where result is stored. +- * @rw: mapping needs to be read/write (values: FUTEX_READ, +- * FUTEX_WRITE) +- * +- * Return: a negative error code or 0 +- * +- * The key words are stored in @key on success. +- * +- * For shared mappings (when @fshared), the key is: +- * +- * ( inode->i_sequence, page->index, offset_within_page ) +- * +- * [ also see get_inode_sequence_number() ] +- * +- * For private mappings (or when !@fshared), the key is: +- * +- * ( current->mm, address, 0 ) +- * +- * This allows (cross process, where applicable) identification of the futex +- * without keeping the page pinned for the duration of the FUTEX_WAIT. +- * +- * lock_page() might sleep, the caller should not hold a spinlock. +- */ +-static int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key, +- enum futex_access rw) +-{ +- unsigned long address = (unsigned long)uaddr; +- struct mm_struct *mm = current->mm; +- struct page *page, *tail; +- struct address_space *mapping; +- int err, ro = 0; +- +- /* +- * The futex address must be "naturally" aligned. +- */ +- key->both.offset = address % PAGE_SIZE; +- if (unlikely((address % sizeof(u32)) != 0)) +- return -EINVAL; +- address -= key->both.offset; +- +- if (unlikely(!access_ok(uaddr, sizeof(u32)))) +- return -EFAULT; +- +- if (unlikely(should_fail_futex(fshared))) +- return -EFAULT; +- +- /* +- * PROCESS_PRIVATE futexes are fast. +- * As the mm cannot disappear under us and the 'key' only needs +- * virtual address, we dont even have to find the underlying vma. +- * Note : We do have to check 'uaddr' is a valid user address, +- * but access_ok() should be faster than find_vma() +- */ +- if (!fshared) { +- key->private.mm = mm; +- key->private.address = address; +- return 0; +- } +- +-again: +- /* Ignore any VERIFY_READ mapping (futex common case) */ +- if (unlikely(should_fail_futex(true))) +- return -EFAULT; +- +- err = get_user_pages_fast(address, 1, FOLL_WRITE, &page); +- /* +- * If write access is not required (eg. FUTEX_WAIT), try +- * and get read-only access. +- */ +- if (err == -EFAULT && rw == FUTEX_READ) { +- err = get_user_pages_fast(address, 1, 0, &page); +- ro = 1; +- } +- if (err < 0) +- return err; +- else +- err = 0; +- +- /* +- * The treatment of mapping from this point on is critical. The page +- * lock protects many things but in this context the page lock +- * stabilizes mapping, prevents inode freeing in the shared +- * file-backed region case and guards against movement to swap cache. +- * +- * Strictly speaking the page lock is not needed in all cases being +- * considered here and page lock forces unnecessarily serialization +- * From this point on, mapping will be re-verified if necessary and +- * page lock will be acquired only if it is unavoidable +- * +- * Mapping checks require the head page for any compound page so the +- * head page and mapping is looked up now. For anonymous pages, it +- * does not matter if the page splits in the future as the key is +- * based on the address. For filesystem-backed pages, the tail is +- * required as the index of the page determines the key. For +- * base pages, there is no tail page and tail == page. +- */ +- tail = page; +- page = compound_head(page); +- mapping = READ_ONCE(page->mapping); +- +- /* +- * If page->mapping is NULL, then it cannot be a PageAnon +- * page; but it might be the ZERO_PAGE or in the gate area or +- * in a special mapping (all cases which we are happy to fail); +- * or it may have been a good file page when get_user_pages_fast +- * found it, but truncated or holepunched or subjected to +- * invalidate_complete_page2 before we got the page lock (also +- * cases which we are happy to fail). And we hold a reference, +- * so refcount care in invalidate_complete_page's remove_mapping +- * prevents drop_caches from setting mapping to NULL beneath us. +- * +- * The case we do have to guard against is when memory pressure made +- * shmem_writepage move it from filecache to swapcache beneath us: +- * an unlikely race, but we do need to retry for page->mapping. +- */ +- if (unlikely(!mapping)) { +- int shmem_swizzled; +- +- /* +- * Page lock is required to identify which special case above +- * applies. If this is really a shmem page then the page lock +- * will prevent unexpected transitions. +- */ +- lock_page(page); +- shmem_swizzled = PageSwapCache(page) || page->mapping; +- unlock_page(page); +- put_page(page); +- +- if (shmem_swizzled) +- goto again; +- +- return -EFAULT; +- } +- +- /* +- * Private mappings are handled in a simple way. +- * +- * If the futex key is stored on an anonymous page, then the associated +- * object is the mm which is implicitly pinned by the calling process. +- * +- * NOTE: When userspace waits on a MAP_SHARED mapping, even if +- * it's a read-only handle, it's expected that futexes attach to +- * the object not the particular process. +- */ +- if (PageAnon(page)) { +- /* +- * A RO anonymous page will never change and thus doesn't make +- * sense for futex operations. +- */ +- if (unlikely(should_fail_futex(true)) || ro) { +- err = -EFAULT; +- goto out; +- } +- +- key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ +- key->private.mm = mm; +- key->private.address = address; +- +- } else { +- struct inode *inode; +- +- /* +- * The associated futex object in this case is the inode and +- * the page->mapping must be traversed. Ordinarily this should +- * be stabilised under page lock but it's not strictly +- * necessary in this case as we just want to pin the inode, not +- * update the radix tree or anything like that. +- * +- * The RCU read lock is taken as the inode is finally freed +- * under RCU. If the mapping still matches expectations then the +- * mapping->host can be safely accessed as being a valid inode. +- */ +- rcu_read_lock(); +- +- if (READ_ONCE(page->mapping) != mapping) { +- rcu_read_unlock(); +- put_page(page); +- +- goto again; +- } +- +- inode = READ_ONCE(mapping->host); +- if (!inode) { +- rcu_read_unlock(); +- put_page(page); +- +- goto again; +- } +- +- key->both.offset |= FUT_OFF_INODE; /* inode-based key */ +- key->shared.i_seq = get_inode_sequence_number(inode); +- key->shared.pgoff = page_to_pgoff(tail); +- rcu_read_unlock(); +- } +- +-out: +- put_page(page); +- return err; +-} +- +-/** +- * fault_in_user_writeable() - Fault in user address and verify RW access +- * @uaddr: pointer to faulting user space address +- * +- * Slow path to fixup the fault we just took in the atomic write +- * access to @uaddr. +- * +- * We have no generic implementation of a non-destructive write to the +- * user address. We know that we faulted in the atomic pagefault +- * disabled section so we can as well avoid the #PF overhead by +- * calling get_user_pages() right away. +- */ +-static int fault_in_user_writeable(u32 __user *uaddr) +-{ +- struct mm_struct *mm = current->mm; +- int ret; +- +- mmap_read_lock(mm); +- ret = fixup_user_fault(mm, (unsigned long)uaddr, +- FAULT_FLAG_WRITE, NULL); +- mmap_read_unlock(mm); +- +- return ret < 0 ? ret : 0; +-} +- +-/** +- * futex_top_waiter() - Return the highest priority waiter on a futex +- * @hb: the hash bucket the futex_q's reside in +- * @key: the futex key (to distinguish it from other futex futex_q's) +- * +- * Must be called with the hb lock held. +- */ +-static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, +- union futex_key *key) +-{ +- struct futex_q *this; +- +- plist_for_each_entry(this, &hb->chain, list) { +- if (match_futex(&this->key, key)) +- return this; +- } +- return NULL; +-} +- +-static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr, +- u32 uval, u32 newval) +-{ +- int ret; +- +- pagefault_disable(); +- ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval); +- pagefault_enable(); +- +- return ret; +-} +- +-static int get_futex_value_locked(u32 *dest, u32 __user *from) +-{ +- int ret; +- +- pagefault_disable(); +- ret = __get_user(*dest, from); +- pagefault_enable(); +- +- return ret ? -EFAULT : 0; +-} +- +- +-/* +- * PI code: +- */ +-static int refill_pi_state_cache(void) +-{ +- struct futex_pi_state *pi_state; +- +- if (likely(current->pi_state_cache)) +- return 0; +- +- pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL); +- +- if (!pi_state) +- return -ENOMEM; +- +- INIT_LIST_HEAD(&pi_state->list); +- /* pi_mutex gets initialized later */ +- pi_state->owner = NULL; +- refcount_set(&pi_state->refcount, 1); +- pi_state->key = FUTEX_KEY_INIT; +- +- current->pi_state_cache = pi_state; +- +- return 0; +-} +- +-static struct futex_pi_state *alloc_pi_state(void) +-{ +- struct futex_pi_state *pi_state = current->pi_state_cache; +- +- WARN_ON(!pi_state); +- current->pi_state_cache = NULL; +- +- return pi_state; +-} +- +-static void pi_state_update_owner(struct futex_pi_state *pi_state, +- struct task_struct *new_owner) +-{ +- struct task_struct *old_owner = pi_state->owner; +- +- lockdep_assert_held(&pi_state->pi_mutex.wait_lock); +- +- if (old_owner) { +- raw_spin_lock(&old_owner->pi_lock); +- WARN_ON(list_empty(&pi_state->list)); +- list_del_init(&pi_state->list); +- raw_spin_unlock(&old_owner->pi_lock); +- } +- +- if (new_owner) { +- raw_spin_lock(&new_owner->pi_lock); +- WARN_ON(!list_empty(&pi_state->list)); +- list_add(&pi_state->list, &new_owner->pi_state_list); +- pi_state->owner = new_owner; +- raw_spin_unlock(&new_owner->pi_lock); +- } +-} +- +-static void get_pi_state(struct futex_pi_state *pi_state) +-{ +- WARN_ON_ONCE(!refcount_inc_not_zero(&pi_state->refcount)); +-} +- +-/* +- * Drops a reference to the pi_state object and frees or caches it +- * when the last reference is gone. +- */ +-static void put_pi_state(struct futex_pi_state *pi_state) +-{ +- if (!pi_state) +- return; +- +- if (!refcount_dec_and_test(&pi_state->refcount)) +- return; +- +- /* +- * If pi_state->owner is NULL, the owner is most probably dying +- * and has cleaned up the pi_state already +- */ +- if (pi_state->owner) { +- unsigned long flags; +- +- raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags); +- pi_state_update_owner(pi_state, NULL); +- rt_mutex_proxy_unlock(&pi_state->pi_mutex); +- raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags); +- } +- +- if (current->pi_state_cache) { +- kfree(pi_state); +- } else { +- /* +- * pi_state->list is already empty. +- * clear pi_state->owner. +- * refcount is at 0 - put it back to 1. +- */ +- pi_state->owner = NULL; +- refcount_set(&pi_state->refcount, 1); +- current->pi_state_cache = pi_state; +- } +-} +- +-#ifdef CONFIG_FUTEX_PI +- +-/* +- * This task is holding PI mutexes at exit time => bad. +- * Kernel cleans up PI-state, but userspace is likely hosed. +- * (Robust-futex cleanup is separate and might save the day for userspace.) +- */ +-static void exit_pi_state_list(struct task_struct *curr) +-{ +- struct list_head *next, *head = &curr->pi_state_list; +- struct futex_pi_state *pi_state; +- struct futex_hash_bucket *hb; +- union futex_key key = FUTEX_KEY_INIT; +- +- if (!futex_cmpxchg_enabled) +- return; +- /* +- * We are a ZOMBIE and nobody can enqueue itself on +- * pi_state_list anymore, but we have to be careful +- * versus waiters unqueueing themselves: +- */ +- raw_spin_lock_irq(&curr->pi_lock); +- while (!list_empty(head)) { +- next = head->next; +- pi_state = list_entry(next, struct futex_pi_state, list); +- key = pi_state->key; +- hb = hash_futex(&key); +- +- /* +- * We can race against put_pi_state() removing itself from the +- * list (a waiter going away). put_pi_state() will first +- * decrement the reference count and then modify the list, so +- * its possible to see the list entry but fail this reference +- * acquire. +- * +- * In that case; drop the locks to let put_pi_state() make +- * progress and retry the loop. +- */ +- if (!refcount_inc_not_zero(&pi_state->refcount)) { +- raw_spin_unlock_irq(&curr->pi_lock); +- cpu_relax(); +- raw_spin_lock_irq(&curr->pi_lock); +- continue; +- } +- raw_spin_unlock_irq(&curr->pi_lock); +- +- spin_lock(&hb->lock); +- raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); +- raw_spin_lock(&curr->pi_lock); +- /* +- * We dropped the pi-lock, so re-check whether this +- * task still owns the PI-state: +- */ +- if (head->next != next) { +- /* retain curr->pi_lock for the loop invariant */ +- raw_spin_unlock(&pi_state->pi_mutex.wait_lock); +- spin_unlock(&hb->lock); +- put_pi_state(pi_state); +- continue; +- } +- +- WARN_ON(pi_state->owner != curr); +- WARN_ON(list_empty(&pi_state->list)); +- list_del_init(&pi_state->list); +- pi_state->owner = NULL; +- +- raw_spin_unlock(&curr->pi_lock); +- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); +- spin_unlock(&hb->lock); +- +- rt_mutex_futex_unlock(&pi_state->pi_mutex); +- put_pi_state(pi_state); +- +- raw_spin_lock_irq(&curr->pi_lock); +- } +- raw_spin_unlock_irq(&curr->pi_lock); +-} +-#else +-static inline void exit_pi_state_list(struct task_struct *curr) { } +-#endif +- +-/* +- * We need to check the following states: +- * +- * Waiter | pi_state | pi->owner | uTID | uODIED | ? +- * +- * [1] NULL | --- | --- | 0 | 0/1 | Valid +- * [2] NULL | --- | --- | >0 | 0/1 | Valid +- * +- * [3] Found | NULL | -- | Any | 0/1 | Invalid +- * +- * [4] Found | Found | NULL | 0 | 1 | Valid +- * [5] Found | Found | NULL | >0 | 1 | Invalid +- * +- * [6] Found | Found | task | 0 | 1 | Valid +- * +- * [7] Found | Found | NULL | Any | 0 | Invalid +- * +- * [8] Found | Found | task | ==taskTID | 0/1 | Valid +- * [9] Found | Found | task | 0 | 0 | Invalid +- * [10] Found | Found | task | !=taskTID | 0/1 | Invalid +- * +- * [1] Indicates that the kernel can acquire the futex atomically. We +- * came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit. +- * +- * [2] Valid, if TID does not belong to a kernel thread. If no matching +- * thread is found then it indicates that the owner TID has died. +- * +- * [3] Invalid. The waiter is queued on a non PI futex +- * +- * [4] Valid state after exit_robust_list(), which sets the user space +- * value to FUTEX_WAITERS | FUTEX_OWNER_DIED. +- * +- * [5] The user space value got manipulated between exit_robust_list() +- * and exit_pi_state_list() +- * +- * [6] Valid state after exit_pi_state_list() which sets the new owner in +- * the pi_state but cannot access the user space value. +- * +- * [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set. +- * +- * [8] Owner and user space value match +- * +- * [9] There is no transient state which sets the user space TID to 0 +- * except exit_robust_list(), but this is indicated by the +- * FUTEX_OWNER_DIED bit. See [4] +- * +- * [10] There is no transient state which leaves owner and user space +- * TID out of sync. Except one error case where the kernel is denied +- * write access to the user address, see fixup_pi_state_owner(). +- * +- * +- * Serialization and lifetime rules: +- * +- * hb->lock: +- * +- * hb -> futex_q, relation +- * futex_q -> pi_state, relation +- * +- * (cannot be raw because hb can contain arbitrary amount +- * of futex_q's) +- * +- * pi_mutex->wait_lock: +- * +- * {uval, pi_state} +- * +- * (and pi_mutex 'obviously') +- * +- * p->pi_lock: +- * +- * p->pi_state_list -> pi_state->list, relation +- * pi_mutex->owner -> pi_state->owner, relation +- * +- * pi_state->refcount: +- * +- * pi_state lifetime +- * +- * +- * Lock order: +- * +- * hb->lock +- * pi_mutex->wait_lock +- * p->pi_lock +- * +- */ +- +-/* +- * Validate that the existing waiter has a pi_state and sanity check +- * the pi_state against the user space value. If correct, attach to +- * it. +- */ +-static int attach_to_pi_state(u32 __user *uaddr, u32 uval, +- struct futex_pi_state *pi_state, +- struct futex_pi_state **ps) +-{ +- pid_t pid = uval & FUTEX_TID_MASK; +- u32 uval2; +- int ret; +- +- /* +- * Userspace might have messed up non-PI and PI futexes [3] +- */ +- if (unlikely(!pi_state)) +- return -EINVAL; +- +- /* +- * We get here with hb->lock held, and having found a +- * futex_top_waiter(). This means that futex_lock_pi() of said futex_q +- * has dropped the hb->lock in between queue_me() and unqueue_me_pi(), +- * which in turn means that futex_lock_pi() still has a reference on +- * our pi_state. +- * +- * The waiter holding a reference on @pi_state also protects against +- * the unlocked put_pi_state() in futex_unlock_pi(), futex_lock_pi() +- * and futex_wait_requeue_pi() as it cannot go to 0 and consequently +- * free pi_state before we can take a reference ourselves. +- */ +- WARN_ON(!refcount_read(&pi_state->refcount)); +- +- /* +- * Now that we have a pi_state, we can acquire wait_lock +- * and do the state validation. +- */ +- raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); +- +- /* +- * Since {uval, pi_state} is serialized by wait_lock, and our current +- * uval was read without holding it, it can have changed. Verify it +- * still is what we expect it to be, otherwise retry the entire +- * operation. +- */ +- if (get_futex_value_locked(&uval2, uaddr)) +- goto out_efault; +- +- if (uval != uval2) +- goto out_eagain; +- +- /* +- * Handle the owner died case: +- */ +- if (uval & FUTEX_OWNER_DIED) { +- /* +- * exit_pi_state_list sets owner to NULL and wakes the +- * topmost waiter. The task which acquires the +- * pi_state->rt_mutex will fixup owner. +- */ +- if (!pi_state->owner) { +- /* +- * No pi state owner, but the user space TID +- * is not 0. Inconsistent state. [5] +- */ +- if (pid) +- goto out_einval; +- /* +- * Take a ref on the state and return success. [4] +- */ +- goto out_attach; +- } +- +- /* +- * If TID is 0, then either the dying owner has not +- * yet executed exit_pi_state_list() or some waiter +- * acquired the rtmutex in the pi state, but did not +- * yet fixup the TID in user space. +- * +- * Take a ref on the state and return success. [6] +- */ +- if (!pid) +- goto out_attach; +- } else { +- /* +- * If the owner died bit is not set, then the pi_state +- * must have an owner. [7] +- */ +- if (!pi_state->owner) +- goto out_einval; +- } +- +- /* +- * Bail out if user space manipulated the futex value. If pi +- * state exists then the owner TID must be the same as the +- * user space TID. [9/10] +- */ +- if (pid != task_pid_vnr(pi_state->owner)) +- goto out_einval; +- +-out_attach: +- get_pi_state(pi_state); +- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); +- *ps = pi_state; +- return 0; +- +-out_einval: +- ret = -EINVAL; +- goto out_error; +- +-out_eagain: +- ret = -EAGAIN; +- goto out_error; +- +-out_efault: +- ret = -EFAULT; +- goto out_error; +- +-out_error: +- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); +- return ret; +-} +- +-/** +- * wait_for_owner_exiting - Block until the owner has exited +- * @ret: owner's current futex lock status +- * @exiting: Pointer to the exiting task +- * +- * Caller must hold a refcount on @exiting. +- */ +-static void wait_for_owner_exiting(int ret, struct task_struct *exiting) +-{ +- if (ret != -EBUSY) { +- WARN_ON_ONCE(exiting); +- return; +- } +- +- if (WARN_ON_ONCE(ret == -EBUSY && !exiting)) +- return; +- +- mutex_lock(&exiting->futex_exit_mutex); +- /* +- * No point in doing state checking here. If the waiter got here +- * while the task was in exec()->exec_futex_release() then it can +- * have any FUTEX_STATE_* value when the waiter has acquired the +- * mutex. OK, if running, EXITING or DEAD if it reached exit() +- * already. Highly unlikely and not a problem. Just one more round +- * through the futex maze. +- */ +- mutex_unlock(&exiting->futex_exit_mutex); +- +- put_task_struct(exiting); +-} +- +-static int handle_exit_race(u32 __user *uaddr, u32 uval, +- struct task_struct *tsk) +-{ +- u32 uval2; +- +- /* +- * If the futex exit state is not yet FUTEX_STATE_DEAD, tell the +- * caller that the alleged owner is busy. +- */ +- if (tsk && tsk->futex_state != FUTEX_STATE_DEAD) +- return -EBUSY; +- +- /* +- * Reread the user space value to handle the following situation: +- * +- * CPU0 CPU1 +- * +- * sys_exit() sys_futex() +- * do_exit() futex_lock_pi() +- * futex_lock_pi_atomic() +- * exit_signals(tsk) No waiters: +- * tsk->flags |= PF_EXITING; *uaddr == 0x00000PID +- * mm_release(tsk) Set waiter bit +- * exit_robust_list(tsk) { *uaddr = 0x80000PID; +- * Set owner died attach_to_pi_owner() { +- * *uaddr = 0xC0000000; tsk = get_task(PID); +- * } if (!tsk->flags & PF_EXITING) { +- * ... attach(); +- * tsk->futex_state = } else { +- * FUTEX_STATE_DEAD; if (tsk->futex_state != +- * FUTEX_STATE_DEAD) +- * return -EAGAIN; +- * return -ESRCH; <--- FAIL +- * } +- * +- * Returning ESRCH unconditionally is wrong here because the +- * user space value has been changed by the exiting task. +- * +- * The same logic applies to the case where the exiting task is +- * already gone. +- */ +- if (get_futex_value_locked(&uval2, uaddr)) +- return -EFAULT; +- +- /* If the user space value has changed, try again. */ +- if (uval2 != uval) +- return -EAGAIN; +- +- /* +- * The exiting task did not have a robust list, the robust list was +- * corrupted or the user space value in *uaddr is simply bogus. +- * Give up and tell user space. +- */ +- return -ESRCH; +-} +- +-static void __attach_to_pi_owner(struct task_struct *p, union futex_key *key, +- struct futex_pi_state **ps) +-{ +- /* +- * No existing pi state. First waiter. [2] +- * +- * This creates pi_state, we have hb->lock held, this means nothing can +- * observe this state, wait_lock is irrelevant. +- */ +- struct futex_pi_state *pi_state = alloc_pi_state(); +- +- /* +- * Initialize the pi_mutex in locked state and make @p +- * the owner of it: +- */ +- rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p); +- +- /* Store the key for possible exit cleanups: */ +- pi_state->key = *key; +- +- WARN_ON(!list_empty(&pi_state->list)); +- list_add(&pi_state->list, &p->pi_state_list); +- /* +- * Assignment without holding pi_state->pi_mutex.wait_lock is safe +- * because there is no concurrency as the object is not published yet. +- */ +- pi_state->owner = p; +- +- *ps = pi_state; +-} +-/* +- * Lookup the task for the TID provided from user space and attach to +- * it after doing proper sanity checks. +- */ +-static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key, +- struct futex_pi_state **ps, +- struct task_struct **exiting) +-{ +- pid_t pid = uval & FUTEX_TID_MASK; +- struct task_struct *p; +- +- /* +- * We are the first waiter - try to look up the real owner and attach +- * the new pi_state to it, but bail out when TID = 0 [1] +- * +- * The !pid check is paranoid. None of the call sites should end up +- * with pid == 0, but better safe than sorry. Let the caller retry +- */ +- if (!pid) +- return -EAGAIN; +- p = find_get_task_by_vpid(pid); +- if (!p) +- return handle_exit_race(uaddr, uval, NULL); +- +- if (unlikely(p->flags & PF_KTHREAD)) { +- put_task_struct(p); +- return -EPERM; +- } +- +- /* +- * We need to look at the task state to figure out, whether the +- * task is exiting. To protect against the change of the task state +- * in futex_exit_release(), we do this protected by p->pi_lock: +- */ +- raw_spin_lock_irq(&p->pi_lock); +- if (unlikely(p->futex_state != FUTEX_STATE_OK)) { +- /* +- * The task is on the way out. When the futex state is +- * FUTEX_STATE_DEAD, we know that the task has finished +- * the cleanup: +- */ +- int ret = handle_exit_race(uaddr, uval, p); +- +- raw_spin_unlock_irq(&p->pi_lock); +- /* +- * If the owner task is between FUTEX_STATE_EXITING and +- * FUTEX_STATE_DEAD then store the task pointer and keep +- * the reference on the task struct. The calling code will +- * drop all locks, wait for the task to reach +- * FUTEX_STATE_DEAD and then drop the refcount. This is +- * required to prevent a live lock when the current task +- * preempted the exiting task between the two states. +- */ +- if (ret == -EBUSY) +- *exiting = p; +- else +- put_task_struct(p); +- return ret; +- } +- +- __attach_to_pi_owner(p, key, ps); +- raw_spin_unlock_irq(&p->pi_lock); +- +- put_task_struct(p); +- +- return 0; +-} +- +-static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) +-{ +- int err; +- u32 curval; +- +- if (unlikely(should_fail_futex(true))) +- return -EFAULT; +- +- err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval); +- if (unlikely(err)) +- return err; +- +- /* If user space value changed, let the caller retry */ +- return curval != uval ? -EAGAIN : 0; +-} +- +-/** +- * futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex +- * @uaddr: the pi futex user address +- * @hb: the pi futex hash bucket +- * @key: the futex key associated with uaddr and hb +- * @ps: the pi_state pointer where we store the result of the +- * lookup +- * @task: the task to perform the atomic lock work for. This will +- * be "current" except in the case of requeue pi. +- * @exiting: Pointer to store the task pointer of the owner task +- * which is in the middle of exiting +- * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) +- * +- * Return: +- * - 0 - ready to wait; +- * - 1 - acquired the lock; +- * - <0 - error +- * +- * The hb->lock must be held by the caller. +- * +- * @exiting is only set when the return value is -EBUSY. If so, this holds +- * a refcount on the exiting task on return and the caller needs to drop it +- * after waiting for the exit to complete. +- */ +-static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, +- union futex_key *key, +- struct futex_pi_state **ps, +- struct task_struct *task, +- struct task_struct **exiting, +- int set_waiters) +-{ +- u32 uval, newval, vpid = task_pid_vnr(task); +- struct futex_q *top_waiter; +- int ret; +- +- /* +- * Read the user space value first so we can validate a few +- * things before proceeding further. +- */ +- if (get_futex_value_locked(&uval, uaddr)) +- return -EFAULT; +- +- if (unlikely(should_fail_futex(true))) +- return -EFAULT; +- +- /* +- * Detect deadlocks. +- */ +- if ((unlikely((uval & FUTEX_TID_MASK) == vpid))) +- return -EDEADLK; +- +- if ((unlikely(should_fail_futex(true)))) +- return -EDEADLK; +- +- /* +- * Lookup existing state first. If it exists, try to attach to +- * its pi_state. +- */ +- top_waiter = futex_top_waiter(hb, key); +- if (top_waiter) +- return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps); +- +- /* +- * No waiter and user TID is 0. We are here because the +- * waiters or the owner died bit is set or called from +- * requeue_cmp_pi or for whatever reason something took the +- * syscall. +- */ +- if (!(uval & FUTEX_TID_MASK)) { +- /* +- * We take over the futex. No other waiters and the user space +- * TID is 0. We preserve the owner died bit. +- */ +- newval = uval & FUTEX_OWNER_DIED; +- newval |= vpid; +- +- /* The futex requeue_pi code can enforce the waiters bit */ +- if (set_waiters) +- newval |= FUTEX_WAITERS; +- +- ret = lock_pi_update_atomic(uaddr, uval, newval); +- if (ret) +- return ret; +- +- /* +- * If the waiter bit was requested the caller also needs PI +- * state attached to the new owner of the user space futex. +- * +- * @task is guaranteed to be alive and it cannot be exiting +- * because it is either sleeping or waiting in +- * futex_requeue_pi_wakeup_sync(). +- * +- * No need to do the full attach_to_pi_owner() exercise +- * because @task is known and valid. +- */ +- if (set_waiters) { +- raw_spin_lock_irq(&task->pi_lock); +- __attach_to_pi_owner(task, key, ps); +- raw_spin_unlock_irq(&task->pi_lock); +- } +- return 1; +- } +- +- /* +- * First waiter. Set the waiters bit before attaching ourself to +- * the owner. If owner tries to unlock, it will be forced into +- * the kernel and blocked on hb->lock. +- */ +- newval = uval | FUTEX_WAITERS; +- ret = lock_pi_update_atomic(uaddr, uval, newval); +- if (ret) +- return ret; +- /* +- * If the update of the user space value succeeded, we try to +- * attach to the owner. If that fails, no harm done, we only +- * set the FUTEX_WAITERS bit in the user space variable. +- */ +- return attach_to_pi_owner(uaddr, newval, key, ps, exiting); +-} +- +-/** +- * __unqueue_futex() - Remove the futex_q from its futex_hash_bucket +- * @q: The futex_q to unqueue +- * +- * The q->lock_ptr must not be NULL and must be held by the caller. +- */ +-static void __unqueue_futex(struct futex_q *q) +-{ +- struct futex_hash_bucket *hb; +- +- if (WARN_ON_SMP(!q->lock_ptr) || WARN_ON(plist_node_empty(&q->list))) +- return; +- lockdep_assert_held(q->lock_ptr); +- +- hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock); +- plist_del(&q->list, &hb->chain); +- hb_waiters_dec(hb); +-} +- +-/* +- * The hash bucket lock must be held when this is called. +- * Afterwards, the futex_q must not be accessed. Callers +- * must ensure to later call wake_up_q() for the actual +- * wakeups to occur. +- */ +-static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q) +-{ +- struct task_struct *p = q->task; +- +- if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n")) +- return; +- +- get_task_struct(p); +- __unqueue_futex(q); +- /* +- * The waiting task can free the futex_q as soon as q->lock_ptr = NULL +- * is written, without taking any locks. This is possible in the event +- * of a spurious wakeup, for example. A memory barrier is required here +- * to prevent the following store to lock_ptr from getting ahead of the +- * plist_del in __unqueue_futex(). +- */ +- smp_store_release(&q->lock_ptr, NULL); +- +- /* +- * Queue the task for later wakeup for after we've released +- * the hb->lock. +- */ +- wake_q_add_safe(wake_q, p); +-} +- +-/* +- * Caller must hold a reference on @pi_state. +- */ +-static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state) +-{ +- struct rt_mutex_waiter *top_waiter; +- struct task_struct *new_owner; +- bool postunlock = false; +- DEFINE_RT_WAKE_Q(wqh); +- u32 curval, newval; +- int ret = 0; +- +- top_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex); +- if (WARN_ON_ONCE(!top_waiter)) { +- /* +- * As per the comment in futex_unlock_pi() this should not happen. +- * +- * When this happens, give up our locks and try again, giving +- * the futex_lock_pi() instance time to complete, either by +- * waiting on the rtmutex or removing itself from the futex +- * queue. +- */ +- ret = -EAGAIN; +- goto out_unlock; +- } +- +- new_owner = top_waiter->task; +- +- /* +- * We pass it to the next owner. The WAITERS bit is always kept +- * enabled while there is PI state around. We cleanup the owner +- * died bit, because we are the owner. +- */ +- newval = FUTEX_WAITERS | task_pid_vnr(new_owner); +- +- if (unlikely(should_fail_futex(true))) { +- ret = -EFAULT; +- goto out_unlock; +- } +- +- ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval); +- if (!ret && (curval != uval)) { +- /* +- * If a unconditional UNLOCK_PI operation (user space did not +- * try the TID->0 transition) raced with a waiter setting the +- * FUTEX_WAITERS flag between get_user() and locking the hash +- * bucket lock, retry the operation. +- */ +- if ((FUTEX_TID_MASK & curval) == uval) +- ret = -EAGAIN; +- else +- ret = -EINVAL; +- } +- +- if (!ret) { +- /* +- * This is a point of no return; once we modified the uval +- * there is no going back and subsequent operations must +- * not fail. +- */ +- pi_state_update_owner(pi_state, new_owner); +- postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wqh); +- } +- +-out_unlock: +- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); +- +- if (postunlock) +- rt_mutex_postunlock(&wqh); +- +- return ret; +-} +- +-/* +- * Express the locking dependencies for lockdep: +- */ +-static inline void +-double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2) +-{ +- if (hb1 <= hb2) { +- spin_lock(&hb1->lock); +- if (hb1 < hb2) +- spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING); +- } else { /* hb1 > hb2 */ +- spin_lock(&hb2->lock); +- spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING); +- } +-} +- +-static inline void +-double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2) +-{ +- spin_unlock(&hb1->lock); +- if (hb1 != hb2) +- spin_unlock(&hb2->lock); +-} +- +-/* +- * Wake up waiters matching bitset queued on this futex (uaddr). +- */ +-static int +-futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) +-{ +- struct futex_hash_bucket *hb; +- struct futex_q *this, *next; +- union futex_key key = FUTEX_KEY_INIT; +- int ret; +- DEFINE_WAKE_Q(wake_q); +- +- if (!bitset) +- return -EINVAL; +- +- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_READ); +- if (unlikely(ret != 0)) +- return ret; +- +- hb = hash_futex(&key); +- +- /* Make sure we really have tasks to wakeup */ +- if (!hb_waiters_pending(hb)) +- return ret; +- +- spin_lock(&hb->lock); +- +- plist_for_each_entry_safe(this, next, &hb->chain, list) { +- if (match_futex (&this->key, &key)) { +- if (this->pi_state || this->rt_waiter) { +- ret = -EINVAL; +- break; +- } +- +- /* Check if one of the bits is set in both bitsets */ +- if (!(this->bitset & bitset)) +- continue; +- +- mark_wake_futex(&wake_q, this); +- if (++ret >= nr_wake) +- break; +- } +- } +- +- spin_unlock(&hb->lock); +- wake_up_q(&wake_q); +- return ret; +-} +- +-static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr) +-{ +- unsigned int op = (encoded_op & 0x70000000) >> 28; +- unsigned int cmp = (encoded_op & 0x0f000000) >> 24; +- int oparg = sign_extend32((encoded_op & 0x00fff000) >> 12, 11); +- int cmparg = sign_extend32(encoded_op & 0x00000fff, 11); +- int oldval, ret; +- +- if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) { +- if (oparg < 0 || oparg > 31) { +- char comm[sizeof(current->comm)]; +- /* +- * kill this print and return -EINVAL when userspace +- * is sane again +- */ +- pr_info_ratelimited("futex_wake_op: %s tries to shift op by %d; fix this program\n", +- get_task_comm(comm, current), oparg); +- oparg &= 31; +- } +- oparg = 1 << oparg; +- } - -- /* If the MISSED flag is set, it means other thread has -- * set the MISSED flag before second spin_trylock(), so -- * we can return false here to avoid multi cpus doing -- * the set_bit() and second spin_trylock() concurrently. -- */ -- if (test_bit(__QDISC_STATE_MISSED, &qdisc->state)) -+ if (test_and_set_bit(__QDISC_STATE_MISSED, &qdisc->state)) - return false; - -- /* Set the MISSED flag before the second spin_trylock(), -- * if the second spin_trylock() return false, it means -- * other cpu holding the lock will do dequeuing for us -- * or it will see the MISSED flag set after releasing -- * lock and reschedule the net_tx_action() to do the -- * dequeuing. +- pagefault_disable(); +- ret = arch_futex_atomic_op_inuser(op, oparg, &oldval, uaddr); +- pagefault_enable(); +- if (ret) +- return ret; +- +- switch (cmp) { +- case FUTEX_OP_CMP_EQ: +- return oldval == cmparg; +- case FUTEX_OP_CMP_NE: +- return oldval != cmparg; +- case FUTEX_OP_CMP_LT: +- return oldval < cmparg; +- case FUTEX_OP_CMP_GE: +- return oldval >= cmparg; +- case FUTEX_OP_CMP_LE: +- return oldval <= cmparg; +- case FUTEX_OP_CMP_GT: +- return oldval > cmparg; +- default: +- return -ENOSYS; +- } +-} +- +-/* +- * Wake up all waiters hashed on the physical page that is mapped +- * to this virtual address: +- */ +-static int +-futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, +- int nr_wake, int nr_wake2, int op) +-{ +- union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; +- struct futex_hash_bucket *hb1, *hb2; +- struct futex_q *this, *next; +- int ret, op_ret; +- DEFINE_WAKE_Q(wake_q); +- +-retry: +- ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ); +- if (unlikely(ret != 0)) +- return ret; +- ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE); +- if (unlikely(ret != 0)) +- return ret; +- +- hb1 = hash_futex(&key1); +- hb2 = hash_futex(&key2); +- +-retry_private: +- double_lock_hb(hb1, hb2); +- op_ret = futex_atomic_op_inuser(op, uaddr2); +- if (unlikely(op_ret < 0)) { +- double_unlock_hb(hb1, hb2); +- +- if (!IS_ENABLED(CONFIG_MMU) || +- unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) { +- /* +- * we don't get EFAULT from MMU faults if we don't have +- * an MMU, but we might get them from range checking +- */ +- ret = op_ret; +- return ret; +- } +- +- if (op_ret == -EFAULT) { +- ret = fault_in_user_writeable(uaddr2); +- if (ret) +- return ret; +- } +- +- cond_resched(); +- if (!(flags & FLAGS_SHARED)) +- goto retry_private; +- goto retry; +- } +- +- plist_for_each_entry_safe(this, next, &hb1->chain, list) { +- if (match_futex (&this->key, &key1)) { +- if (this->pi_state || this->rt_waiter) { +- ret = -EINVAL; +- goto out_unlock; +- } +- mark_wake_futex(&wake_q, this); +- if (++ret >= nr_wake) +- break; +- } +- } +- +- if (op_ret > 0) { +- op_ret = 0; +- plist_for_each_entry_safe(this, next, &hb2->chain, list) { +- if (match_futex (&this->key, &key2)) { +- if (this->pi_state || this->rt_waiter) { +- ret = -EINVAL; +- goto out_unlock; +- } +- mark_wake_futex(&wake_q, this); +- if (++op_ret >= nr_wake2) +- break; +- } +- } +- ret += op_ret; +- } +- +-out_unlock: +- double_unlock_hb(hb1, hb2); +- wake_up_q(&wake_q); +- return ret; +-} +- +-/** +- * requeue_futex() - Requeue a futex_q from one hb to another +- * @q: the futex_q to requeue +- * @hb1: the source hash_bucket +- * @hb2: the target hash_bucket +- * @key2: the new key for the requeued futex_q +- */ +-static inline +-void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, +- struct futex_hash_bucket *hb2, union futex_key *key2) +-{ +- +- /* +- * If key1 and key2 hash to the same bucket, no need to +- * requeue. +- */ +- if (likely(&hb1->chain != &hb2->chain)) { +- plist_del(&q->list, &hb1->chain); +- hb_waiters_dec(hb1); +- hb_waiters_inc(hb2); +- plist_add(&q->list, &hb2->chain); +- q->lock_ptr = &hb2->lock; +- } +- q->key = *key2; +-} +- +-static inline bool futex_requeue_pi_prepare(struct futex_q *q, +- struct futex_pi_state *pi_state) +-{ +- int old, new; +- +- /* +- * Set state to Q_REQUEUE_PI_IN_PROGRESS unless an early wakeup has +- * already set Q_REQUEUE_PI_IGNORE to signal that requeue should +- * ignore the waiter. +- */ +- old = atomic_read_acquire(&q->requeue_state); +- do { +- if (old == Q_REQUEUE_PI_IGNORE) +- return false; +- +- /* +- * futex_proxy_trylock_atomic() might have set it to +- * IN_PROGRESS and a interleaved early wake to WAIT. +- * +- * It was considered to have an extra state for that +- * trylock, but that would just add more conditionals +- * all over the place for a dubious value. - */ -- set_bit(__QDISC_STATE_MISSED, &qdisc->state); +- if (old != Q_REQUEUE_PI_NONE) +- break; - -- /* spin_trylock() only has load-acquire semantic, so use -- * smp_mb__after_atomic() to ensure STATE_MISSED is set -- * before doing the second spin_trylock(). +- new = Q_REQUEUE_PI_IN_PROGRESS; +- } while (!atomic_try_cmpxchg(&q->requeue_state, &old, new)); +- +- q->pi_state = pi_state; +- return true; +-} +- +-static inline void futex_requeue_pi_complete(struct futex_q *q, int locked) +-{ +- int old, new; +- +- old = atomic_read_acquire(&q->requeue_state); +- do { +- if (old == Q_REQUEUE_PI_IGNORE) +- return; +- +- if (locked >= 0) { +- /* Requeue succeeded. Set DONE or LOCKED */ +- WARN_ON_ONCE(old != Q_REQUEUE_PI_IN_PROGRESS && +- old != Q_REQUEUE_PI_WAIT); +- new = Q_REQUEUE_PI_DONE + locked; +- } else if (old == Q_REQUEUE_PI_IN_PROGRESS) { +- /* Deadlock, no early wakeup interleave */ +- new = Q_REQUEUE_PI_NONE; +- } else { +- /* Deadlock, early wakeup interleave. */ +- WARN_ON_ONCE(old != Q_REQUEUE_PI_WAIT); +- new = Q_REQUEUE_PI_IGNORE; +- } +- } while (!atomic_try_cmpxchg(&q->requeue_state, &old, new)); +- +-#ifdef CONFIG_PREEMPT_RT +- /* If the waiter interleaved with the requeue let it know */ +- if (unlikely(old == Q_REQUEUE_PI_WAIT)) +- rcuwait_wake_up(&q->requeue_wait); +-#endif +-} +- +-static inline int futex_requeue_pi_wakeup_sync(struct futex_q *q) +-{ +- int old, new; +- +- old = atomic_read_acquire(&q->requeue_state); +- do { +- /* Is requeue done already? */ +- if (old >= Q_REQUEUE_PI_DONE) +- return old; +- +- /* +- * If not done, then tell the requeue code to either ignore +- * the waiter or to wake it up once the requeue is done. - */ -- smp_mb__after_atomic(); +- new = Q_REQUEUE_PI_WAIT; +- if (old == Q_REQUEUE_PI_NONE) +- new = Q_REQUEUE_PI_IGNORE; +- } while (!atomic_try_cmpxchg(&q->requeue_state, &old, new)); +- +- /* If the requeue was in progress, wait for it to complete */ +- if (old == Q_REQUEUE_PI_IN_PROGRESS) { +-#ifdef CONFIG_PREEMPT_RT +- rcuwait_wait_event(&q->requeue_wait, +- atomic_read(&q->requeue_state) != Q_REQUEUE_PI_WAIT, +- TASK_UNINTERRUPTIBLE); +-#else +- (void)atomic_cond_read_relaxed(&q->requeue_state, VAL != Q_REQUEUE_PI_WAIT); +-#endif +- } - -- /* Retry again in case other CPU may not see the new flag -- * after it releases the lock at the end of qdisc_run_end(). -+ /* Try to take the lock again to make sure that we will either -+ * grab it or the CPU that still has it will see MISSED set -+ * when testing it in qdisc_run_end() - */ - return spin_trylock(&qdisc->seqlock); - } else if (qdisc_is_running(qdisc)) { -@@ -222,6 +202,12 @@ static inline void qdisc_run_end(struct Qdisc *qdisc) - if (qdisc->flags & TCQ_F_NOLOCK) { - spin_unlock(&qdisc->seqlock); - -+ /* spin_unlock() only has store-release semantic. The unlock -+ * and test_bit() ordering is a store-load ordering, so a full -+ * memory barrier is needed here. -+ */ -+ smp_mb(); -+ - if (unlikely(test_bit(__QDISC_STATE_MISSED, - &qdisc->state))) - __netif_schedule(qdisc); -@@ -308,6 +294,8 @@ struct Qdisc_ops { - struct netlink_ext_ack *extack); - void (*attach)(struct Qdisc *sch); - int (*change_tx_queue_len)(struct Qdisc *, unsigned int); -+ void (*change_real_num_tx)(struct Qdisc *sch, -+ unsigned int new_real_tx); - - int (*dump)(struct Qdisc *, struct sk_buff *); - int (*dump_stats)(struct Qdisc *, struct gnet_dump *); -@@ -438,8 +426,6 @@ struct qdisc_skb_cb { - }; - #define QDISC_CB_PRIV_LEN 20 - unsigned char data[QDISC_CB_PRIV_LEN]; -- u16 mru; -- bool post_ct; - }; - - typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv); -@@ -684,6 +670,8 @@ void qdisc_class_hash_grow(struct Qdisc *, struct Qdisc_class_hash *); - void qdisc_class_hash_destroy(struct Qdisc_class_hash *); - - int dev_qdisc_change_tx_queue_len(struct net_device *dev); -+void dev_qdisc_change_real_num_tx(struct net_device *dev, -+ unsigned int new_real_tx); - void dev_init_scheduler(struct net_device *dev); - void dev_shutdown(struct net_device *dev); - void dev_activate(struct net_device *dev); -@@ -1189,7 +1177,6 @@ static inline void __qdisc_reset_queue(struct qdisc_skb_head *qh) - static inline void qdisc_reset_queue(struct Qdisc *sch) - { - __qdisc_reset_queue(&sch->q); -- sch->qstats.backlog = 0; - } - - static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new, -@@ -1258,6 +1245,7 @@ struct psched_ratecfg { - u64 rate_bytes_ps; /* bytes per second */ - u32 mult; - u16 overhead; -+ u16 mpu; - u8 linklayer; - u8 shift; - }; -@@ -1267,6 +1255,9 @@ static inline u64 psched_l2t_ns(const struct psched_ratecfg *r, - { - len += r->overhead; - -+ if (len < r->mpu) -+ len = r->mpu; -+ - if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) - return ((u64)(DIV_ROUND_UP(len,48)*53) * r->mult) >> r->shift; - -@@ -1289,6 +1280,7 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res, - res->rate = min_t(u64, r->rate_bytes_ps, ~0U); - - res->overhead = r->overhead; -+ res->mpu = r->mpu; - res->linklayer = (r->linklayer & TC_LINKLAYER_MASK); - } - -diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h -index 69bab88ad66b1..3ae61ce2eabd0 100644 ---- a/include/net/sctp/sctp.h -+++ b/include/net/sctp/sctp.h -@@ -105,19 +105,18 @@ extern struct percpu_counter sctp_sockets_allocated; - int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *); - struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *); - -+typedef int (*sctp_callback_t)(struct sctp_endpoint *, struct sctp_transport *, void *); - void sctp_transport_walk_start(struct rhashtable_iter *iter); - void sctp_transport_walk_stop(struct rhashtable_iter *iter); - struct sctp_transport *sctp_transport_get_next(struct net *net, - struct rhashtable_iter *iter); - struct sctp_transport *sctp_transport_get_idx(struct net *net, - struct rhashtable_iter *iter, int pos); --int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), -- struct net *net, -+int sctp_transport_lookup_process(sctp_callback_t cb, struct net *net, - const union sctp_addr *laddr, - const union sctp_addr *paddr, void *p); --int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), -- int (*cb_done)(struct sctp_transport *, void *), -- struct net *net, int *pos, void *p); -+int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done, -+ struct net *net, int *pos, void *p); - int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p); - int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, - struct sctp_info *info); -@@ -626,7 +625,8 @@ static inline __u32 sctp_min_frag_point(struct sctp_sock *sp, __u16 datasize) - - static inline int sctp_transport_pl_hlen(struct sctp_transport *t) - { -- return __sctp_mtu_payload(sctp_sk(t->asoc->base.sk), t, 0, 0); -+ return __sctp_mtu_payload(sctp_sk(t->asoc->base.sk), t, 0, 0) - -+ sizeof(struct sctphdr); - } - - static inline void sctp_transport_pl_reset(struct sctp_transport *t) -@@ -653,12 +653,10 @@ static inline void sctp_transport_pl_update(struct sctp_transport *t) - if (t->pl.state == SCTP_PL_DISABLED) - return; - -- if (del_timer(&t->probe_timer)) -- sctp_transport_put(t); +- /* +- * Requeue is now either prohibited or complete. Reread state +- * because during the wait above it might have changed. Nothing +- * will modify q->requeue_state after this point. +- */ +- return atomic_read(&q->requeue_state); +-} - - t->pl.state = SCTP_PL_BASE; - t->pl.pmtu = SCTP_BASE_PLPMTU; - t->pl.probe_size = SCTP_BASE_PLPMTU; -+ sctp_transport_reset_probe_timer(t); - } - - static inline bool sctp_transport_pl_enabled(struct sctp_transport *t) -diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h -index 651bba654d77d..8d2c3dd9f5953 100644 ---- a/include/net/sctp/structs.h -+++ b/include/net/sctp/structs.h -@@ -1365,6 +1365,7 @@ struct sctp_endpoint { - - u32 secid; - u32 peer_secid; -+ struct rcu_head rcu; - }; - - /* Recover the outter endpoint structure. */ -@@ -1380,7 +1381,7 @@ static inline struct sctp_endpoint *sctp_ep(struct sctp_ep_common *base) - struct sctp_endpoint *sctp_endpoint_new(struct sock *, gfp_t); - void sctp_endpoint_free(struct sctp_endpoint *); - void sctp_endpoint_put(struct sctp_endpoint *); --void sctp_endpoint_hold(struct sctp_endpoint *); -+int sctp_endpoint_hold(struct sctp_endpoint *ep); - void sctp_endpoint_add_asoc(struct sctp_endpoint *, struct sctp_association *); - struct sctp_association *sctp_endpoint_lookup_assoc( - const struct sctp_endpoint *ep, -diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h -index d7d2495f83c27..dac91aa38c5af 100644 ---- a/include/net/secure_seq.h -+++ b/include/net/secure_seq.h -@@ -4,8 +4,8 @@ - - #include <linux/types.h> - --u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); --u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, -+u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); -+u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, - __be16 dport); - u32 secure_tcp_seq(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport); -diff --git a/include/net/seg6.h b/include/net/seg6.h -index 9d19c15e8545c..af668f17b3988 100644 ---- a/include/net/seg6.h -+++ b/include/net/seg6.h -@@ -58,9 +58,30 @@ extern int seg6_local_init(void); - extern void seg6_local_exit(void); - - extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len, bool reduced); -+extern struct ipv6_sr_hdr *seg6_get_srh(struct sk_buff *skb, int flags); -+extern void seg6_icmp_srh(struct sk_buff *skb, struct inet6_skb_parm *opt); - extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, - int proto); - extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh); - extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, - u32 tbl_id); -+ -+/* If the packet which invoked an ICMP error contains an SRH return -+ * the true destination address from within the SRH, otherwise use the -+ * destination address in the IP header. -+ */ -+static inline const struct in6_addr *seg6_get_daddr(struct sk_buff *skb, -+ struct inet6_skb_parm *opt) -+{ -+ struct ipv6_sr_hdr *srh; -+ -+ if (opt->flags & IP6SKB_SEG6) { -+ srh = (struct ipv6_sr_hdr *)(skb->data + opt->srhoff); -+ return &srh->segments[0]; -+ } -+ -+ return NULL; -+} -+ -+ - #endif -diff --git a/include/net/sock.h b/include/net/sock.h -index 463f390d90b3e..cb1a1bb64ed81 100644 ---- a/include/net/sock.h -+++ b/include/net/sock.h -@@ -161,9 +161,6 @@ typedef __u64 __bitwise __addrpair; - * for struct sock and struct inet_timewait_sock. - */ - struct sock_common { -- /* skc_daddr and skc_rcv_saddr must be grouped on a 8 bytes aligned -- * address on 64bit arches : cf INET_MATCH() +-/** +- * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue +- * @q: the futex_q +- * @key: the key of the requeue target futex +- * @hb: the hash_bucket of the requeue target futex +- * +- * During futex_requeue, with requeue_pi=1, it is possible to acquire the +- * target futex if it is uncontended or via a lock steal. +- * +- * 1) Set @q::key to the requeue target futex key so the waiter can detect +- * the wakeup on the right futex. +- * +- * 2) Dequeue @q from the hash bucket. +- * +- * 3) Set @q::rt_waiter to NULL so the woken up task can detect atomic lock +- * acquisition. +- * +- * 4) Set the q->lock_ptr to the requeue target hb->lock for the case that +- * the waiter has to fixup the pi state. +- * +- * 5) Complete the requeue state so the waiter can make progress. After +- * this point the waiter task can return from the syscall immediately in +- * case that the pi state does not have to be fixed up. +- * +- * 6) Wake the waiter task. +- * +- * Must be called with both q->lock_ptr and hb->lock held. +- */ +-static inline +-void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, +- struct futex_hash_bucket *hb) +-{ +- q->key = *key; +- +- __unqueue_futex(q); +- +- WARN_ON(!q->rt_waiter); +- q->rt_waiter = NULL; +- +- q->lock_ptr = &hb->lock; +- +- /* Signal locked state to the waiter */ +- futex_requeue_pi_complete(q, 1); +- wake_up_state(q->task, TASK_NORMAL); +-} +- +-/** +- * futex_proxy_trylock_atomic() - Attempt an atomic lock for the top waiter +- * @pifutex: the user address of the to futex +- * @hb1: the from futex hash bucket, must be locked by the caller +- * @hb2: the to futex hash bucket, must be locked by the caller +- * @key1: the from futex key +- * @key2: the to futex key +- * @ps: address to store the pi_state pointer +- * @exiting: Pointer to store the task pointer of the owner task +- * which is in the middle of exiting +- * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) +- * +- * Try and get the lock on behalf of the top waiter if we can do it atomically. +- * Wake the top waiter if we succeed. If the caller specified set_waiters, +- * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit. +- * hb1 and hb2 must be held by the caller. +- * +- * @exiting is only set when the return value is -EBUSY. If so, this holds +- * a refcount on the exiting task on return and the caller needs to drop it +- * after waiting for the exit to complete. +- * +- * Return: +- * - 0 - failed to acquire the lock atomically; +- * - >0 - acquired the lock, return value is vpid of the top_waiter +- * - <0 - error +- */ +-static int +-futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1, +- struct futex_hash_bucket *hb2, union futex_key *key1, +- union futex_key *key2, struct futex_pi_state **ps, +- struct task_struct **exiting, int set_waiters) +-{ +- struct futex_q *top_waiter = NULL; +- u32 curval; +- int ret; +- +- if (get_futex_value_locked(&curval, pifutex)) +- return -EFAULT; +- +- if (unlikely(should_fail_futex(true))) +- return -EFAULT; +- +- /* +- * Find the top_waiter and determine if there are additional waiters. +- * If the caller intends to requeue more than 1 waiter to pifutex, +- * force futex_lock_pi_atomic() to set the FUTEX_WAITERS bit now, +- * as we have means to handle the possible fault. If not, don't set +- * the bit unnecessarily as it will force the subsequent unlock to enter +- * the kernel. - */ - union { - __addrpair skc_addrpair; - struct { -@@ -259,6 +256,8 @@ struct bpf_local_storage; - * @sk_rcvbuf: size of receive buffer in bytes - * @sk_wq: sock wait queue and async head - * @sk_rx_dst: receive input route used by early demux -+ * @sk_rx_dst_ifindex: ifindex for @sk_rx_dst -+ * @sk_rx_dst_cookie: cookie for @sk_rx_dst - * @sk_dst_cache: destination cache - * @sk_dst_pending_confirm: need to confirm neighbour - * @sk_policy: flow policy -@@ -430,7 +429,10 @@ struct sock { - #ifdef CONFIG_XFRM - struct xfrm_policy __rcu *sk_policy[2]; - #endif -- struct dst_entry *sk_rx_dst; -+ struct dst_entry __rcu *sk_rx_dst; -+ int sk_rx_dst_ifindex; -+ u32 sk_rx_dst_cookie; -+ - struct dst_entry __rcu *sk_dst_cache; - atomic_t sk_omem_alloc; - int sk_sndbuf; -@@ -501,7 +503,7 @@ struct sock { - u16 sk_tsflags; - int sk_bind_phc; - u8 sk_shutdown; -- u32 sk_tskey; -+ atomic_t sk_tskey; - atomic_t sk_zckey; - - u8 sk_clockid; -@@ -541,14 +543,26 @@ enum sk_pacing { - SK_PACING_FQ = 2, - }; - --/* Pointer stored in sk_user_data might not be suitable for copying -- * when cloning the socket. For instance, it can point to a reference -- * counted object. sk_user_data bottom bit is set if pointer must not -- * be copied. -+/* flag bits in sk_user_data -+ * -+ * - SK_USER_DATA_NOCOPY: Pointer stored in sk_user_data might -+ * not be suitable for copying when cloning the socket. For instance, -+ * it can point to a reference counted object. sk_user_data bottom -+ * bit is set if pointer must not be copied. -+ * -+ * - SK_USER_DATA_BPF: Mark whether sk_user_data field is -+ * managed/owned by a BPF reuseport array. This bit should be set -+ * when sk_user_data's sk is added to the bpf's reuseport_array. -+ * -+ * - SK_USER_DATA_PSOCK: Mark whether pointer stored in -+ * sk_user_data points to psock type. This bit should be set -+ * when sk_user_data is assigned to a psock object. - */ - #define SK_USER_DATA_NOCOPY 1UL --#define SK_USER_DATA_BPF 2UL /* Managed by BPF */ --#define SK_USER_DATA_PTRMASK ~(SK_USER_DATA_NOCOPY | SK_USER_DATA_BPF) -+#define SK_USER_DATA_BPF 2UL -+#define SK_USER_DATA_PSOCK 4UL -+#define SK_USER_DATA_PTRMASK ~(SK_USER_DATA_NOCOPY | SK_USER_DATA_BPF |\ -+ SK_USER_DATA_PSOCK) - - /** - * sk_user_data_is_nocopy - Test if sk_user_data pointer must not be copied -@@ -561,24 +575,40 @@ static inline bool sk_user_data_is_nocopy(const struct sock *sk) - - #define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data))) - -+/** -+ * __rcu_dereference_sk_user_data_with_flags - return the pointer -+ * only if argument flags all has been set in sk_user_data. Otherwise -+ * return NULL -+ * -+ * @sk: socket -+ * @flags: flag bits -+ */ -+static inline void * -+__rcu_dereference_sk_user_data_with_flags(const struct sock *sk, -+ uintptr_t flags) -+{ -+ uintptr_t sk_user_data = (uintptr_t)rcu_dereference(__sk_user_data(sk)); -+ -+ WARN_ON_ONCE(flags & SK_USER_DATA_PTRMASK); -+ -+ if ((sk_user_data & flags) == flags) -+ return (void *)(sk_user_data & SK_USER_DATA_PTRMASK); -+ return NULL; -+} -+ - #define rcu_dereference_sk_user_data(sk) \ -+ __rcu_dereference_sk_user_data_with_flags(sk, 0) -+#define __rcu_assign_sk_user_data_with_flags(sk, ptr, flags) \ - ({ \ -- void *__tmp = rcu_dereference(__sk_user_data((sk))); \ -- (void *)((uintptr_t)__tmp & SK_USER_DATA_PTRMASK); \ --}) --#define rcu_assign_sk_user_data(sk, ptr) \ --({ \ -- uintptr_t __tmp = (uintptr_t)(ptr); \ -- WARN_ON_ONCE(__tmp & ~SK_USER_DATA_PTRMASK); \ -- rcu_assign_pointer(__sk_user_data((sk)), __tmp); \ --}) --#define rcu_assign_sk_user_data_nocopy(sk, ptr) \ --({ \ -- uintptr_t __tmp = (uintptr_t)(ptr); \ -- WARN_ON_ONCE(__tmp & ~SK_USER_DATA_PTRMASK); \ -+ uintptr_t __tmp1 = (uintptr_t)(ptr), \ -+ __tmp2 = (uintptr_t)(flags); \ -+ WARN_ON_ONCE(__tmp1 & ~SK_USER_DATA_PTRMASK); \ -+ WARN_ON_ONCE(__tmp2 & SK_USER_DATA_PTRMASK); \ - rcu_assign_pointer(__sk_user_data((sk)), \ -- __tmp | SK_USER_DATA_NOCOPY); \ -+ __tmp1 | __tmp2); \ - }) -+#define rcu_assign_sk_user_data(sk, ptr) \ -+ __rcu_assign_sk_user_data_with_flags(sk, ptr, 0) - - /* - * SK_CAN_REUSE and SK_NO_REUSE on a socket mean that the socket is OK -@@ -1237,7 +1267,7 @@ struct proto { - unsigned int useroffset; /* Usercopy region offset */ - unsigned int usersize; /* Usercopy region size */ - -- struct percpu_counter *orphan_count; -+ unsigned int __percpu *orphan_count; - - struct request_sock_ops *rsk_prot; - struct timewait_sock_ops *twsk_prot; -@@ -1479,7 +1509,7 @@ void __sk_mem_reclaim(struct sock *sk, int amount); - /* sysctl_mem values are in pages, we convert them in SK_MEM_QUANTUM units */ - static inline long sk_prot_mem_limits(const struct sock *sk, int index) - { -- long val = sk->sk_prot->sysctl_mem[index]; -+ long val = READ_ONCE(sk->sk_prot->sysctl_mem[index]); - - #if PAGE_SIZE > SK_MEM_QUANTUM - val <<= PAGE_SHIFT - SK_MEM_QUANTUM_SHIFT; -@@ -1502,19 +1532,23 @@ static inline bool sk_has_account(struct sock *sk) - - static inline bool sk_wmem_schedule(struct sock *sk, int size) - { -+ int delta; -+ - if (!sk_has_account(sk)) - return true; -- return size <= sk->sk_forward_alloc || -- __sk_mem_schedule(sk, size, SK_MEM_SEND); -+ delta = size - sk->sk_forward_alloc; -+ return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_SEND); - } - - static inline bool - sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size) - { -+ int delta; -+ - if (!sk_has_account(sk)) - return true; -- return size <= sk->sk_forward_alloc || -- __sk_mem_schedule(sk, size, SK_MEM_RECV) || -+ delta = size - sk->sk_forward_alloc; -+ return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_RECV) || - skb_pfmemalloc(skb); - } - -@@ -2400,19 +2434,22 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, - * @sk: socket - * - * Use the per task page_frag instead of the per socket one for -- * optimization when we know that we're in the normal context and owns -+ * optimization when we know that we're in process context and own - * everything that's associated with %current. - * -- * gfpflags_allow_blocking() isn't enough here as direct reclaim may nest -- * inside other socket operations and end up recursing into sk_page_frag() -- * while it's already in use. -+ * Both direct reclaim and page faults can nest inside other -+ * socket operations and end up recursing into sk_page_frag() -+ * while it's already in use: explicitly avoid task page_frag -+ * usage if the caller is potentially doing any of them. -+ * This assumes that page fault handlers use the GFP_NOFS flags. - * - * Return: a per task page_frag if context allows that, - * otherwise a per socket one. - */ - static inline struct page_frag *sk_page_frag(struct sock *sk) - { -- if (gfpflags_normal_context(sk->sk_allocation)) -+ if ((sk->sk_allocation & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC | __GFP_FS)) == -+ (__GFP_DIRECT_RECLAIM | __GFP_FS)) - return ¤t->task_frag; - - return &sk->sk_frag; -@@ -2590,7 +2627,7 @@ static inline void _sock_tx_timestamp(struct sock *sk, __u16 tsflags, - __sock_tx_timestamp(tsflags, tx_flags); - if (tsflags & SOF_TIMESTAMPING_OPT_ID && tskey && - tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) -- *tskey = sk->sk_tskey++; -+ *tskey = atomic_inc_return(&sk->sk_tskey) - 1; - } - if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS))) - *tx_flags |= SKBTX_WIFI_STATUS; -@@ -2757,18 +2794,18 @@ static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto) - { - /* Does this proto have per netns sysctl_wmem ? */ - if (proto->sysctl_wmem_offset) -- return *(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset); -+ return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset)); - -- return *proto->sysctl_wmem; -+ return READ_ONCE(*proto->sysctl_wmem); - } - - static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto) - { - /* Does this proto have per netns sysctl_rmem ? */ - if (proto->sysctl_rmem_offset) -- return *(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset); -+ return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset)); - -- return *proto->sysctl_rmem; -+ return READ_ONCE(*proto->sysctl_rmem); - } - - /* Default TCP Small queue budget is ~1 ms of data (1sec >> 10) -diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h -index 473b0b0fa4abc..efc9085c68927 100644 ---- a/include/net/sock_reuseport.h -+++ b/include/net/sock_reuseport.h -@@ -43,21 +43,20 @@ struct sock *reuseport_migrate_sock(struct sock *sk, - extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog); - extern int reuseport_detach_prog(struct sock *sk); - --static inline bool reuseport_has_conns(struct sock *sk, bool set) -+static inline bool reuseport_has_conns(struct sock *sk) - { - struct sock_reuseport *reuse; - bool ret = false; - - rcu_read_lock(); - reuse = rcu_dereference(sk->sk_reuseport_cb); -- if (reuse) { -- if (set) -- reuse->has_conns = 1; -- ret = reuse->has_conns; +- top_waiter = futex_top_waiter(hb1, key1); +- +- /* There are no waiters, nothing for us to do. */ +- if (!top_waiter) +- return 0; +- +- /* +- * Ensure that this is a waiter sitting in futex_wait_requeue_pi() +- * and waiting on the 'waitqueue' futex which is always !PI. +- */ +- if (!top_waiter->rt_waiter || top_waiter->pi_state) +- return -EINVAL; +- +- /* Ensure we requeue to the expected futex. */ +- if (!match_futex(top_waiter->requeue_pi_key, key2)) +- return -EINVAL; +- +- /* Ensure that this does not race against an early wakeup */ +- if (!futex_requeue_pi_prepare(top_waiter, NULL)) +- return -EAGAIN; +- +- /* +- * Try to take the lock for top_waiter and set the FUTEX_WAITERS bit +- * in the contended case or if @set_waiters is true. +- * +- * In the contended case PI state is attached to the lock owner. If +- * the user space lock can be acquired then PI state is attached to +- * the new owner (@top_waiter->task) when @set_waiters is true. +- */ +- ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, +- exiting, set_waiters); +- if (ret == 1) { +- /* +- * Lock was acquired in user space and PI state was +- * attached to @top_waiter->task. That means state is fully +- * consistent and the waiter can return to user space +- * immediately after the wakeup. +- */ +- requeue_pi_wake_futex(top_waiter, key2, hb2); +- } else if (ret < 0) { +- /* Rewind top_waiter::requeue_state */ +- futex_requeue_pi_complete(top_waiter, ret); +- } else { +- /* +- * futex_lock_pi_atomic() did not acquire the user space +- * futex, but managed to establish the proxy lock and pi +- * state. top_waiter::requeue_state cannot be fixed up here +- * because the waiter is not enqueued on the rtmutex +- * yet. This is handled at the callsite depending on the +- * result of rt_mutex_start_proxy_lock() which is +- * guaranteed to be reached with this function returning 0. +- */ - } -+ if (reuse && reuse->has_conns) -+ ret = true; - rcu_read_unlock(); - - return ret; - } - -+void reuseport_has_conns_set(struct sock *sk); -+ - #endif /* _SOCK_REUSEPORT_H */ -diff --git a/include/net/strparser.h b/include/net/strparser.h -index 1d20b98493a10..732b7097d78e4 100644 ---- a/include/net/strparser.h -+++ b/include/net/strparser.h -@@ -54,10 +54,28 @@ struct strp_msg { - int offset; - }; - -+struct _strp_msg { -+ /* Internal cb structure. struct strp_msg must be first for passing -+ * to upper layer. -+ */ -+ struct strp_msg strp; -+ int accum_len; -+}; -+ -+struct sk_skb_cb { -+#define SK_SKB_CB_PRIV_LEN 20 -+ unsigned char data[SK_SKB_CB_PRIV_LEN]; -+ struct _strp_msg strp; -+ /* temp_reg is a temporary register used for bpf_convert_data_end_access -+ * when dst_reg == src_reg. -+ */ -+ u64 temp_reg; -+}; -+ - static inline struct strp_msg *strp_msg(struct sk_buff *skb) - { - return (struct strp_msg *)((void *)skb->cb + -- offsetof(struct qdisc_skb_cb, data)); -+ offsetof(struct sk_skb_cb, strp)); - } - - /* Structure for an attached lower socket */ -diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h -index 748cf87a4d7ea..3e02709a1df65 100644 ---- a/include/net/tc_act/tc_pedit.h -+++ b/include/net/tc_act/tc_pedit.h -@@ -14,6 +14,7 @@ struct tcf_pedit { - struct tc_action common; - unsigned char tcfp_nkeys; - unsigned char tcfp_flags; -+ u32 tcfp_off_max_hint; - struct tc_pedit_key *tcfp_keys; - struct tcf_pedit_key_ex *tcfp_keys_ex; - }; -diff --git a/include/net/tcp.h b/include/net/tcp.h -index 60c384569e9cd..d2de3b7788a97 100644 ---- a/include/net/tcp.h -+++ b/include/net/tcp.h -@@ -48,7 +48,9 @@ - - extern struct inet_hashinfo tcp_hashinfo; - --extern struct percpu_counter tcp_orphan_count; -+DECLARE_PER_CPU(unsigned int, tcp_orphan_count); -+int tcp_orphan_count_sum(void); -+ - void tcp_time_wait(struct sock *sk, int state, int timeo); - - #define MAX_TCP_HEADER L1_CACHE_ALIGN(128 + MAX_HEADER) -@@ -290,19 +292,6 @@ static inline bool tcp_out_of_memory(struct sock *sk) - - void sk_forced_mem_schedule(struct sock *sk, int size); - --static inline bool tcp_too_many_orphans(struct sock *sk, int shift) +- return ret; +-} +- +-/** +- * futex_requeue() - Requeue waiters from uaddr1 to uaddr2 +- * @uaddr1: source futex user address +- * @flags: futex flags (FLAGS_SHARED, etc.) +- * @uaddr2: target futex user address +- * @nr_wake: number of waiters to wake (must be 1 for requeue_pi) +- * @nr_requeue: number of waiters to requeue (0-INT_MAX) +- * @cmpval: @uaddr1 expected value (or %NULL) +- * @requeue_pi: if we are attempting to requeue from a non-pi futex to a +- * pi futex (pi to pi requeue is not supported) +- * +- * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire +- * uaddr2 atomically on behalf of the top waiter. +- * +- * Return: +- * - >=0 - on success, the number of tasks requeued or woken; +- * - <0 - on error +- */ +-static int futex_requeue(u32 __user *uaddr1, unsigned int flags, +- u32 __user *uaddr2, int nr_wake, int nr_requeue, +- u32 *cmpval, int requeue_pi) -{ -- struct percpu_counter *ocp = sk->sk_prot->orphan_count; -- int orphans = percpu_counter_read_positive(ocp); +- union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; +- int task_count = 0, ret; +- struct futex_pi_state *pi_state = NULL; +- struct futex_hash_bucket *hb1, *hb2; +- struct futex_q *this, *next; +- DEFINE_WAKE_Q(wake_q); +- +- if (nr_wake < 0 || nr_requeue < 0) +- return -EINVAL; - -- if (orphans << shift > sysctl_tcp_max_orphans) { -- orphans = percpu_counter_sum_positive(ocp); -- if (orphans << shift > sysctl_tcp_max_orphans) -- return true; +- /* +- * When PI not supported: return -ENOSYS if requeue_pi is true, +- * consequently the compiler knows requeue_pi is always false past +- * this point which will optimize away all the conditional code +- * further down. +- */ +- if (!IS_ENABLED(CONFIG_FUTEX_PI) && requeue_pi) +- return -ENOSYS; +- +- if (requeue_pi) { +- /* +- * Requeue PI only works on two distinct uaddrs. This +- * check is only valid for private futexes. See below. +- */ +- if (uaddr1 == uaddr2) +- return -EINVAL; +- +- /* +- * futex_requeue() allows the caller to define the number +- * of waiters to wake up via the @nr_wake argument. With +- * REQUEUE_PI, waking up more than one waiter is creating +- * more problems than it solves. Waking up a waiter makes +- * only sense if the PI futex @uaddr2 is uncontended as +- * this allows the requeue code to acquire the futex +- * @uaddr2 before waking the waiter. The waiter can then +- * return to user space without further action. A secondary +- * wakeup would just make the futex_wait_requeue_pi() +- * handling more complex, because that code would have to +- * look up pi_state and do more or less all the handling +- * which the requeue code has to do for the to be requeued +- * waiters. So restrict the number of waiters to wake to +- * one, and only wake it up when the PI futex is +- * uncontended. Otherwise requeue it and let the unlock of +- * the PI futex handle the wakeup. +- * +- * All REQUEUE_PI users, e.g. pthread_cond_signal() and +- * pthread_cond_broadcast() must use nr_wake=1. +- */ +- if (nr_wake != 1) +- return -EINVAL; +- +- /* +- * requeue_pi requires a pi_state, try to allocate it now +- * without any locks in case it fails. +- */ +- if (refill_pi_state_cache()) +- return -ENOMEM; - } -- return false; --} - - bool tcp_check_oom(struct sock *sk, int shift); - - -@@ -481,6 +470,7 @@ int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, - u32 cookie); - struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); - struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, -+ const struct tcp_request_sock_ops *af_ops, - struct sock *sk, struct sk_buff *skb); - #ifdef CONFIG_SYN_COOKIES - -@@ -581,6 +571,8 @@ __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss); - #endif - /* tcp_output.c */ - -+void tcp_skb_entail(struct sock *sk, struct sk_buff *skb); -+void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb); - void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, - int nonagle); - int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs); -@@ -619,6 +611,7 @@ void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req); - void tcp_reset(struct sock *sk, struct sk_buff *skb); - void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb); - void tcp_fin(struct sock *sk); -+void tcp_check_space(struct sock *sk); - - /* tcp_timer.c */ - void tcp_init_xmit_timers(struct sock *); -@@ -1037,6 +1030,7 @@ struct rate_sample { - int losses; /* number of packets marked lost upon ACK */ - u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */ - u32 prior_in_flight; /* in flight before this ACK */ -+ u32 last_end_seq; /* end_seq of most recently ACKed packet */ - bool is_app_limited; /* is sample from packet with bubble in pipe? */ - bool is_retrans; /* is sample from retransmission? */ - bool is_ack_delayed; /* is this (likely) a delayed ACK? */ -@@ -1159,6 +1153,11 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, - bool is_sack_reneg, struct rate_sample *rs); - void tcp_rate_check_app_limited(struct sock *sk); - -+static inline bool tcp_skb_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2) -+{ -+ return t1 > t2 || (t1 == t2 && after(seq1, seq2)); -+} -+ - /* These functions determine how the current flow behaves in respect of SACK - * handling. SACK is negotiated with the peer, and therefore it can vary - * between different flows. -@@ -1202,9 +1201,20 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) - - #define TCP_INFINITE_SSTHRESH 0x7fffffff - -+static inline u32 tcp_snd_cwnd(const struct tcp_sock *tp) -+{ -+ return tp->snd_cwnd; -+} -+ -+static inline void tcp_snd_cwnd_set(struct tcp_sock *tp, u32 val) -+{ -+ WARN_ON_ONCE((int)val <= 0); -+ tp->snd_cwnd = val; -+} -+ - static inline bool tcp_in_slow_start(const struct tcp_sock *tp) - { -- return tp->snd_cwnd < tp->snd_ssthresh; -+ return tcp_snd_cwnd(tp) < tp->snd_ssthresh; - } - - static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp) -@@ -1230,8 +1240,8 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) - return tp->snd_ssthresh; - else - return max(tp->snd_ssthresh, -- ((tp->snd_cwnd >> 1) + -- (tp->snd_cwnd >> 2))); -+ ((tcp_snd_cwnd(tp) >> 1) + -+ (tcp_snd_cwnd(tp) >> 2))); - } - - /* Use define here intentionally to get WARN_ON location shown at the caller */ -@@ -1271,11 +1281,14 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk) - { - const struct tcp_sock *tp = tcp_sk(sk); - -+ if (tp->is_cwnd_limited) -+ return true; -+ - /* If in slow start, ensure cwnd grows to twice what was ACKed. */ - if (tcp_in_slow_start(tp)) -- return tp->snd_cwnd < 2 * tp->max_packets_out; -+ return tcp_snd_cwnd(tp) < 2 * tp->max_packets_out; - -- return tp->is_cwnd_limited; -+ return false; - } - - /* BBR congestion control needs pacing. -@@ -1382,8 +1395,8 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk) - struct tcp_sock *tp = tcp_sk(sk); - s32 delta; - -- if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out || -- ca_ops->cong_control) -+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) || -+ tp->packets_out || ca_ops->cong_control) - return; - delta = tcp_jiffies32 - tp->lsndtime; - if (delta > inet_csk(sk)->icsk_rto) -@@ -1398,7 +1411,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, - - static inline int tcp_win_from_space(const struct sock *sk, int space) - { -- int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale; -+ int tcp_adv_win_scale = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale); - - return tcp_adv_win_scale <= 0 ? - (space>>(-tcp_adv_win_scale)) : -@@ -1461,21 +1474,24 @@ static inline int keepalive_intvl_when(const struct tcp_sock *tp) - { - struct net *net = sock_net((struct sock *)tp); - -- return tp->keepalive_intvl ? : net->ipv4.sysctl_tcp_keepalive_intvl; -+ return tp->keepalive_intvl ? : -+ READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl); - } - - static inline int keepalive_time_when(const struct tcp_sock *tp) - { - struct net *net = sock_net((struct sock *)tp); - -- return tp->keepalive_time ? : net->ipv4.sysctl_tcp_keepalive_time; -+ return tp->keepalive_time ? : -+ READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time); - } - - static inline int keepalive_probes(const struct tcp_sock *tp) - { - struct net *net = sock_net((struct sock *)tp); - -- return tp->keepalive_probes ? : net->ipv4.sysctl_tcp_keepalive_probes; -+ return tp->keepalive_probes ? : -+ READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes); - } - - static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) -@@ -1488,7 +1504,8 @@ static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) - - static inline int tcp_fin_time(const struct sock *sk) - { -- int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout; -+ int fin_timeout = tcp_sk(sk)->linger2 ? : -+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fin_timeout); - const int rto = inet_csk(sk)->icsk_rto; - - if (fin_timeout < (rto << 2) - (rto >> 1)) -@@ -1982,7 +1999,7 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr); - static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) - { - struct net *net = sock_net((struct sock *)tp); -- return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat; -+ return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat); - } - - bool tcp_stream_memory_free(const struct sock *sk, int wake); -diff --git a/include/net/tls.h b/include/net/tls.h -index 1fffb206f09f5..bf3d63a527885 100644 ---- a/include/net/tls.h -+++ b/include/net/tls.h -@@ -707,7 +707,7 @@ int tls_sw_fallback_init(struct sock *sk, - struct tls_crypto_info *crypto_info); - - #ifdef CONFIG_TLS_DEVICE --void tls_device_init(void); -+int tls_device_init(void); - void tls_device_cleanup(void); - void tls_device_sk_destruct(struct sock *sk); - int tls_set_device_offload(struct sock *sk, struct tls_context *ctx); -@@ -727,7 +727,7 @@ static inline bool tls_is_sk_rx_device_offloaded(struct sock *sk) - return tls_get_ctx(sk)->rx_conf == TLS_HW; - } - #else --static inline void tls_device_init(void) {} -+static inline int tls_device_init(void) { return 0; } - static inline void tls_device_cleanup(void) {} - - static inline int -diff --git a/include/net/udp.h b/include/net/udp.h -index 909ecf447e0fb..438b1b01a56ce 100644 ---- a/include/net/udp.h -+++ b/include/net/udp.h -@@ -262,7 +262,7 @@ static inline bool udp_sk_bound_dev_eq(struct net *net, int bound_dev_if, - int dif, int sdif) - { - #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) -- return inet_bound_dev_eq(!!net->ipv4.sysctl_udp_l3mdev_accept, -+ return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_udp_l3mdev_accept), - bound_dev_if, dif, sdif); - #else - return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); -diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h -index afc7ce713657b..72394f441dad8 100644 ---- a/include/net/udp_tunnel.h -+++ b/include/net/udp_tunnel.h -@@ -67,6 +67,9 @@ static inline int udp_sock_create(struct net *net, - typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb); - typedef int (*udp_tunnel_encap_err_lookup_t)(struct sock *sk, - struct sk_buff *skb); -+typedef void (*udp_tunnel_encap_err_rcv_t)(struct sock *sk, -+ struct sk_buff *skb, -+ unsigned int udp_offset); - typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk); - typedef struct sk_buff *(*udp_tunnel_gro_receive_t)(struct sock *sk, - struct list_head *head, -@@ -80,6 +83,7 @@ struct udp_tunnel_sock_cfg { - __u8 encap_type; - udp_tunnel_encap_rcv_t encap_rcv; - udp_tunnel_encap_err_lookup_t encap_err_lookup; -+ udp_tunnel_encap_err_rcv_t encap_err_rcv; - udp_tunnel_encap_destroy_t encap_destroy; - udp_tunnel_gro_receive_t gro_receive; - udp_tunnel_gro_complete_t gro_complete; -diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h -index 4e295541e3967..ffe13a10bc963 100644 ---- a/include/net/xdp_sock_drv.h -+++ b/include/net/xdp_sock_drv.h -@@ -13,7 +13,7 @@ - - void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries); - bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc); --u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *desc, u32 max); -+u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max); - void xsk_tx_release(struct xsk_buff_pool *pool); - struct xsk_buff_pool *xsk_get_pool_from_qid(struct net_device *dev, - u16 queue_id); -@@ -129,8 +129,7 @@ static inline bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, - return false; - } - --static inline u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *desc, -- u32 max) -+static inline u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max) - { - return 0; - } -diff --git a/include/net/xfrm.h b/include/net/xfrm.h -index 2308210793a01..73030094c6e6f 100644 ---- a/include/net/xfrm.h -+++ b/include/net/xfrm.h -@@ -200,6 +200,11 @@ struct xfrm_state { - struct xfrm_algo_aead *aead; - const char *geniv; - -+ /* mapping change rate limiting */ -+ __be16 new_mapping_sport; -+ u32 new_mapping; /* seconds */ -+ u32 mapping_maxage; /* seconds for input SA */ -+ - /* Data for encapsulator */ - struct xfrm_encap_tmpl *encap; - struct sock __rcu *encap_sk; -@@ -1075,24 +1080,29 @@ xfrm_state_addr_cmp(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x, un - } - - #ifdef CONFIG_XFRM --static inline bool --xfrm_default_allow(struct net *net, int dir) +-retry: +- ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ); +- if (unlikely(ret != 0)) +- return ret; +- ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, +- requeue_pi ? FUTEX_WRITE : FUTEX_READ); +- if (unlikely(ret != 0)) +- return ret; +- +- /* +- * The check above which compares uaddrs is not sufficient for +- * shared futexes. We need to compare the keys: +- */ +- if (requeue_pi && match_futex(&key1, &key2)) +- return -EINVAL; +- +- hb1 = hash_futex(&key1); +- hb2 = hash_futex(&key2); +- +-retry_private: +- hb_waiters_inc(hb2); +- double_lock_hb(hb1, hb2); +- +- if (likely(cmpval != NULL)) { +- u32 curval; +- +- ret = get_futex_value_locked(&curval, uaddr1); +- +- if (unlikely(ret)) { +- double_unlock_hb(hb1, hb2); +- hb_waiters_dec(hb2); +- +- ret = get_user(curval, uaddr1); +- if (ret) +- return ret; +- +- if (!(flags & FLAGS_SHARED)) +- goto retry_private; +- +- goto retry; +- } +- if (curval != *cmpval) { +- ret = -EAGAIN; +- goto out_unlock; +- } +- } +- +- if (requeue_pi) { +- struct task_struct *exiting = NULL; +- +- /* +- * Attempt to acquire uaddr2 and wake the top waiter. If we +- * intend to requeue waiters, force setting the FUTEX_WAITERS +- * bit. We force this here where we are able to easily handle +- * faults rather in the requeue loop below. +- * +- * Updates topwaiter::requeue_state if a top waiter exists. +- */ +- ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1, +- &key2, &pi_state, +- &exiting, nr_requeue); +- +- /* +- * At this point the top_waiter has either taken uaddr2 or +- * is waiting on it. In both cases pi_state has been +- * established and an initial refcount on it. In case of an +- * error there's nothing. +- * +- * The top waiter's requeue_state is up to date: +- * +- * - If the lock was acquired atomically (ret == 1), then +- * the state is Q_REQUEUE_PI_LOCKED. +- * +- * The top waiter has been dequeued and woken up and can +- * return to user space immediately. The kernel/user +- * space state is consistent. In case that there must be +- * more waiters requeued the WAITERS bit in the user +- * space futex is set so the top waiter task has to go +- * into the syscall slowpath to unlock the futex. This +- * will block until this requeue operation has been +- * completed and the hash bucket locks have been +- * dropped. +- * +- * - If the trylock failed with an error (ret < 0) then +- * the state is either Q_REQUEUE_PI_NONE, i.e. "nothing +- * happened", or Q_REQUEUE_PI_IGNORE when there was an +- * interleaved early wakeup. +- * +- * - If the trylock did not succeed (ret == 0) then the +- * state is either Q_REQUEUE_PI_IN_PROGRESS or +- * Q_REQUEUE_PI_WAIT if an early wakeup interleaved. +- * This will be cleaned up in the loop below, which +- * cannot fail because futex_proxy_trylock_atomic() did +- * the same sanity checks for requeue_pi as the loop +- * below does. +- */ +- switch (ret) { +- case 0: +- /* We hold a reference on the pi state. */ +- break; +- +- case 1: +- /* +- * futex_proxy_trylock_atomic() acquired the user space +- * futex. Adjust task_count. +- */ +- task_count++; +- ret = 0; +- break; +- +- /* +- * If the above failed, then pi_state is NULL and +- * waiter::requeue_state is correct. +- */ +- case -EFAULT: +- double_unlock_hb(hb1, hb2); +- hb_waiters_dec(hb2); +- ret = fault_in_user_writeable(uaddr2); +- if (!ret) +- goto retry; +- return ret; +- case -EBUSY: +- case -EAGAIN: +- /* +- * Two reasons for this: +- * - EBUSY: Owner is exiting and we just wait for the +- * exit to complete. +- * - EAGAIN: The user space value changed. +- */ +- double_unlock_hb(hb1, hb2); +- hb_waiters_dec(hb2); +- /* +- * Handle the case where the owner is in the middle of +- * exiting. Wait for the exit to complete otherwise +- * this task might loop forever, aka. live lock. +- */ +- wait_for_owner_exiting(ret, exiting); +- cond_resched(); +- goto retry; +- default: +- goto out_unlock; +- } +- } +- +- plist_for_each_entry_safe(this, next, &hb1->chain, list) { +- if (task_count - nr_wake >= nr_requeue) +- break; +- +- if (!match_futex(&this->key, &key1)) +- continue; +- +- /* +- * FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI should always +- * be paired with each other and no other futex ops. +- * +- * We should never be requeueing a futex_q with a pi_state, +- * which is awaiting a futex_unlock_pi(). +- */ +- if ((requeue_pi && !this->rt_waiter) || +- (!requeue_pi && this->rt_waiter) || +- this->pi_state) { +- ret = -EINVAL; +- break; +- } +- +- /* Plain futexes just wake or requeue and are done */ +- if (!requeue_pi) { +- if (++task_count <= nr_wake) +- mark_wake_futex(&wake_q, this); +- else +- requeue_futex(this, hb1, hb2, &key2); +- continue; +- } +- +- /* Ensure we requeue to the expected futex for requeue_pi. */ +- if (!match_futex(this->requeue_pi_key, &key2)) { +- ret = -EINVAL; +- break; +- } +- +- /* +- * Requeue nr_requeue waiters and possibly one more in the case +- * of requeue_pi if we couldn't acquire the lock atomically. +- * +- * Prepare the waiter to take the rt_mutex. Take a refcount +- * on the pi_state and store the pointer in the futex_q +- * object of the waiter. +- */ +- get_pi_state(pi_state); +- +- /* Don't requeue when the waiter is already on the way out. */ +- if (!futex_requeue_pi_prepare(this, pi_state)) { +- /* +- * Early woken waiter signaled that it is on the +- * way out. Drop the pi_state reference and try the +- * next waiter. @this->pi_state is still NULL. +- */ +- put_pi_state(pi_state); +- continue; +- } +- +- ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex, +- this->rt_waiter, +- this->task); +- +- if (ret == 1) { +- /* +- * We got the lock. We do neither drop the refcount +- * on pi_state nor clear this->pi_state because the +- * waiter needs the pi_state for cleaning up the +- * user space value. It will drop the refcount +- * after doing so. this::requeue_state is updated +- * in the wakeup as well. +- */ +- requeue_pi_wake_futex(this, &key2, hb2); +- task_count++; +- } else if (!ret) { +- /* Waiter is queued, move it to hb2 */ +- requeue_futex(this, hb1, hb2, &key2); +- futex_requeue_pi_complete(this, 0); +- task_count++; +- } else { +- /* +- * rt_mutex_start_proxy_lock() detected a potential +- * deadlock when we tried to queue that waiter. +- * Drop the pi_state reference which we took above +- * and remove the pointer to the state from the +- * waiters futex_q object. +- */ +- this->pi_state = NULL; +- put_pi_state(pi_state); +- futex_requeue_pi_complete(this, ret); +- /* +- * We stop queueing more waiters and let user space +- * deal with the mess. +- */ +- break; +- } +- } +- +- /* +- * We took an extra initial reference to the pi_state in +- * futex_proxy_trylock_atomic(). We need to drop it here again. +- */ +- put_pi_state(pi_state); +- +-out_unlock: +- double_unlock_hb(hb1, hb2); +- wake_up_q(&wake_q); +- hb_waiters_dec(hb2); +- return ret ? ret : task_count; +-} +- +-/* The key must be already stored in q->key. */ +-static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) +- __acquires(&hb->lock) -{ -- u8 def = net->xfrm.policy_default; +- struct futex_hash_bucket *hb; - -- switch (dir) { -- case XFRM_POLICY_IN: -- return def & XFRM_POL_DEFAULT_IN ? false : true; -- case XFRM_POLICY_OUT: -- return def & XFRM_POL_DEFAULT_OUT ? false : true; -- case XFRM_POLICY_FWD: -- return def & XFRM_POL_DEFAULT_FWD ? false : true; +- hb = hash_futex(&q->key); +- +- /* +- * Increment the counter before taking the lock so that +- * a potential waker won't miss a to-be-slept task that is +- * waiting for the spinlock. This is safe as all queue_lock() +- * users end up calling queue_me(). Similarly, for housekeeping, +- * decrement the counter at queue_unlock() when some error has +- * occurred and we don't end up adding the task to the list. +- */ +- hb_waiters_inc(hb); /* implies smp_mb(); (A) */ +- +- q->lock_ptr = &hb->lock; +- +- spin_lock(&hb->lock); +- return hb; +-} +- +-static inline void +-queue_unlock(struct futex_hash_bucket *hb) +- __releases(&hb->lock) +-{ +- spin_unlock(&hb->lock); +- hb_waiters_dec(hb); +-} +- +-static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb) +-{ +- int prio; +- +- /* +- * The priority used to register this element is +- * - either the real thread-priority for the real-time threads +- * (i.e. threads with a priority lower than MAX_RT_PRIO) +- * - or MAX_RT_PRIO for non-RT threads. +- * Thus, all RT-threads are woken first in priority order, and +- * the others are woken last, in FIFO order. +- */ +- prio = min(current->normal_prio, MAX_RT_PRIO); +- +- plist_node_init(&q->list, prio); +- plist_add(&q->list, &hb->chain); +- q->task = current; +-} +- +-/** +- * queue_me() - Enqueue the futex_q on the futex_hash_bucket +- * @q: The futex_q to enqueue +- * @hb: The destination hash bucket +- * +- * The hb->lock must be held by the caller, and is released here. A call to +- * queue_me() is typically paired with exactly one call to unqueue_me(). The +- * exceptions involve the PI related operations, which may use unqueue_me_pi() +- * or nothing if the unqueue is done as part of the wake process and the unqueue +- * state is implicit in the state of woken task (see futex_wait_requeue_pi() for +- * an example). +- */ +-static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) +- __releases(&hb->lock) +-{ +- __queue_me(q, hb); +- spin_unlock(&hb->lock); +-} +- +-/** +- * unqueue_me() - Remove the futex_q from its futex_hash_bucket +- * @q: The futex_q to unqueue +- * +- * The q->lock_ptr must not be held by the caller. A call to unqueue_me() must +- * be paired with exactly one earlier call to queue_me(). +- * +- * Return: +- * - 1 - if the futex_q was still queued (and we removed unqueued it); +- * - 0 - if the futex_q was already removed by the waking thread +- */ +-static int unqueue_me(struct futex_q *q) +-{ +- spinlock_t *lock_ptr; +- int ret = 0; +- +- /* In the common case we don't take the spinlock, which is nice. */ +-retry: +- /* +- * q->lock_ptr can change between this read and the following spin_lock. +- * Use READ_ONCE to forbid the compiler from reloading q->lock_ptr and +- * optimizing lock_ptr out of the logic below. +- */ +- lock_ptr = READ_ONCE(q->lock_ptr); +- if (lock_ptr != NULL) { +- spin_lock(lock_ptr); +- /* +- * q->lock_ptr can change between reading it and +- * spin_lock(), causing us to take the wrong lock. This +- * corrects the race condition. +- * +- * Reasoning goes like this: if we have the wrong lock, +- * q->lock_ptr must have changed (maybe several times) +- * between reading it and the spin_lock(). It can +- * change again after the spin_lock() but only if it was +- * already changed before the spin_lock(). It cannot, +- * however, change back to the original value. Therefore +- * we can detect whether we acquired the correct lock. +- */ +- if (unlikely(lock_ptr != q->lock_ptr)) { +- spin_unlock(lock_ptr); +- goto retry; +- } +- __unqueue_futex(q); +- +- BUG_ON(q->pi_state); +- +- spin_unlock(lock_ptr); +- ret = 1; - } -+int __xfrm_policy_check(struct sock *, int dir, struct sk_buff *skb, -+ unsigned short family); -+ -+static inline bool __xfrm_check_nopolicy(struct net *net, struct sk_buff *skb, -+ int dir) -+{ -+ if (!net->xfrm.policy_count[dir] && !secpath_exists(skb)) -+ return net->xfrm.policy_default[dir] == XFRM_USERPOLICY_ACCEPT; -+ - return false; - } - --int __xfrm_policy_check(struct sock *, int dir, struct sk_buff *skb, -- unsigned short family); -+static inline bool __xfrm_check_dev_nopolicy(struct sk_buff *skb, -+ int dir, unsigned short family) -+{ -+ if (dir != XFRM_POLICY_OUT && family == AF_INET) { -+ /* same dst may be used for traffic originating from -+ * devices with different policy settings. -+ */ -+ return IPCB(skb)->flags & IPSKB_NOPOLICY; -+ } -+ return skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY); -+} - - static inline int __xfrm_policy_check2(struct sock *sk, int dir, - struct sk_buff *skb, -@@ -1104,13 +1114,9 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir, - if (sk && sk->sk_policy[XFRM_POLICY_IN]) - return __xfrm_policy_check(sk, ndir, skb, family); - -- if (xfrm_default_allow(net, dir)) -- return (!net->xfrm.policy_count[dir] && !secpath_exists(skb)) || -- (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) || -- __xfrm_policy_check(sk, ndir, skb, family); -- else -- return (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) || -- __xfrm_policy_check(sk, ndir, skb, family); -+ return __xfrm_check_nopolicy(net, skb, dir) || -+ __xfrm_check_dev_nopolicy(skb, dir, family) || -+ __xfrm_policy_check(sk, ndir, skb, family); - } - - static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family) -@@ -1162,13 +1168,12 @@ static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family) - { - struct net *net = dev_net(skb->dev); - -- if (xfrm_default_allow(net, XFRM_POLICY_FWD)) -- return !net->xfrm.policy_count[XFRM_POLICY_OUT] || -- (skb_dst(skb)->flags & DST_NOXFRM) || -- __xfrm_route_forward(skb, family); -- else -- return (skb_dst(skb)->flags & DST_NOXFRM) || -- __xfrm_route_forward(skb, family); -+ if (!net->xfrm.policy_count[XFRM_POLICY_OUT] && -+ net->xfrm.policy_default[XFRM_POLICY_OUT] == XFRM_USERPOLICY_ACCEPT) -+ return true; -+ -+ return (skb_dst(skb)->flags & DST_NOXFRM) || -+ __xfrm_route_forward(skb, family); - } - - static inline int xfrm4_route_forward(struct sk_buff *skb) -@@ -1185,6 +1190,8 @@ int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk); - - static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) - { -+ if (!sk_fullsock(osk)) -+ return 0; - sk->sk_policy[0] = NULL; - sk->sk_policy[1] = NULL; - if (unlikely(osk->sk_policy[0] || osk->sk_policy[1])) -@@ -1562,7 +1569,6 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si); - void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si); - u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq); - int xfrm_init_replay(struct xfrm_state *x); --u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu); - u32 xfrm_state_mtu(struct xfrm_state *x, int mtu); - int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload); - int xfrm_init_state(struct xfrm_state *x); -@@ -1675,14 +1681,15 @@ int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, - const struct xfrm_migrate *m, int num_bundles, - const struct xfrm_kmaddress *k, - const struct xfrm_encap_tmpl *encap); --struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net); -+struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net, -+ u32 if_id); - struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x, - struct xfrm_migrate *m, - struct xfrm_encap_tmpl *encap); - int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_bundles, - struct xfrm_kmaddress *k, struct net *net, -- struct xfrm_encap_tmpl *encap); -+ struct xfrm_encap_tmpl *encap, u32 if_id); - #endif - - int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport); -diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h -index 7a9a23e7a604a..7517f4faf6b32 100644 ---- a/include/net/xsk_buff_pool.h -+++ b/include/net/xsk_buff_pool.h -@@ -60,6 +60,7 @@ struct xsk_buff_pool { - */ - dma_addr_t *dma_pages; - struct xdp_buff_xsk *heads; -+ struct xdp_desc *tx_descs; - u64 chunk_mask; - u64 addrs_cnt; - u32 free_list_cnt; -@@ -86,8 +87,9 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, - struct xdp_umem *umem); - int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *dev, - u16 queue_id, u16 flags); --int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem, -+int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_sock *umem_xs, - struct net_device *dev, u16 queue_id); -+int xp_alloc_tx_descs(struct xsk_buff_pool *pool, struct xdp_sock *xs); - void xp_destroy(struct xsk_buff_pool *pool); - void xp_release(struct xdp_buff_xsk *xskb); - void xp_get_pool(struct xsk_buff_pool *pool); -diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h -index 4b50d9a3018a6..4ba642fc8a19a 100644 ---- a/include/rdma/ib_verbs.h -+++ b/include/rdma/ib_verbs.h -@@ -4097,8 +4097,13 @@ static inline int ib_dma_map_sgtable_attrs(struct ib_device *dev, - enum dma_data_direction direction, - unsigned long dma_attrs) - { -+ int nents; -+ - if (ib_uses_virt_dma(dev)) { -- ib_dma_virt_map_sg(dev, sgt->sgl, sgt->orig_nents); -+ nents = ib_dma_virt_map_sg(dev, sgt->sgl, sgt->orig_nents); -+ if (!nents) -+ return -EIO; -+ sgt->nents = nents; - return 0; - } - return dma_map_sgtable(dev->dma_device, sgt, direction, dma_attrs); -diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h -index 2758d9df71ee9..c2a79aeee113c 100644 ---- a/include/rdma/rdma_netlink.h -+++ b/include/rdma/rdma_netlink.h -@@ -30,7 +30,7 @@ enum rdma_nl_flags { - * constant as well and the compiler checks they are the same. - */ - #define MODULE_ALIAS_RDMA_NETLINK(_index, _val) \ -- static inline void __chk_##_index(void) \ -+ static inline void __maybe_unused __chk_##_index(void) \ - { \ - BUILD_BUG_ON(_index != _val); \ - } \ -diff --git a/include/scsi/libfcoe.h b/include/scsi/libfcoe.h -index fac8e89aed81d..310e0dbffda99 100644 ---- a/include/scsi/libfcoe.h -+++ b/include/scsi/libfcoe.h -@@ -249,7 +249,8 @@ int fcoe_ctlr_recv_flogi(struct fcoe_ctlr *, struct fc_lport *, - struct fc_frame *); - - /* libfcoe funcs */ --u64 fcoe_wwn_from_mac(unsigned char mac[MAX_ADDR_LEN], unsigned int, unsigned int); -+u64 fcoe_wwn_from_mac(unsigned char mac[ETH_ALEN], unsigned int scheme, -+ unsigned int port); - int fcoe_libfc_config(struct fc_lport *, struct fcoe_ctlr *, - const struct libfc_function_template *, int init_fcp); - u32 fcoe_fc_crc(struct fc_frame *fp); -diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h -index 4ee233e5a6ffa..5cf84228b51d1 100644 ---- a/include/scsi/libiscsi.h -+++ b/include/scsi/libiscsi.h -@@ -52,8 +52,10 @@ enum { - - #define ISID_SIZE 6 - --/* Connection suspend "bit" */ --#define ISCSI_SUSPEND_BIT 1 -+/* Connection flags */ -+#define ISCSI_CONN_FLAG_SUSPEND_TX 0 -+#define ISCSI_CONN_FLAG_SUSPEND_RX 1 -+#define ISCSI_CONN_FLAG_BOUND 2 - - #define ISCSI_ITT_MASK 0x1fff - #define ISCSI_TOTAL_CMDS_MAX 4096 -@@ -199,8 +201,9 @@ struct iscsi_conn { - struct list_head cmdqueue; /* data-path cmd queue */ - struct list_head requeue; /* tasks needing another run */ - struct work_struct xmitwork; /* per-conn. xmit workqueue */ -- unsigned long suspend_tx; /* suspend Tx */ -- unsigned long suspend_rx; /* suspend Rx */ -+ /* recv */ -+ struct work_struct recvwork; -+ unsigned long flags; /* ISCSI_CONN_FLAGs */ - - /* negotiated params */ - unsigned max_recv_dlength; /* initiator_max_recv_dsl*/ -@@ -399,7 +402,7 @@ extern int iscsi_host_add(struct Scsi_Host *shost, struct device *pdev); - extern struct Scsi_Host *iscsi_host_alloc(struct scsi_host_template *sht, - int dd_data_size, - bool xmit_can_sleep); --extern void iscsi_host_remove(struct Scsi_Host *shost); -+extern void iscsi_host_remove(struct Scsi_Host *shost, bool is_shutdown); - extern void iscsi_host_free(struct Scsi_Host *shost); - extern int iscsi_target_alloc(struct scsi_target *starget); - extern int iscsi_host_get_max_scsi_cmds(struct Scsi_Host *shost, -@@ -440,8 +443,10 @@ extern int iscsi_conn_get_param(struct iscsi_cls_conn *cls_conn, - extern int iscsi_conn_get_addr_param(struct sockaddr_storage *addr, - enum iscsi_param param, char *buf); - extern void iscsi_suspend_tx(struct iscsi_conn *conn); -+extern void iscsi_suspend_rx(struct iscsi_conn *conn); - extern void iscsi_suspend_queue(struct iscsi_conn *conn); --extern void iscsi_conn_queue_work(struct iscsi_conn *conn); -+extern void iscsi_conn_queue_xmit(struct iscsi_conn *conn); -+extern void iscsi_conn_queue_recv(struct iscsi_conn *conn); - - #define iscsi_conn_printk(prefix, _c, fmt, a...) \ - iscsi_cls_conn_printk(prefix, ((struct iscsi_conn *)_c)->cls_conn, \ -diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h -index eaf04c9a1dfcb..685249233f2fe 100644 ---- a/include/scsi/scsi_cmnd.h -+++ b/include/scsi/scsi_cmnd.h -@@ -68,7 +68,7 @@ struct scsi_pointer { - struct scsi_cmnd { - struct scsi_request req; - struct scsi_device *device; -- struct list_head eh_entry; /* entry for the host eh_cmd_q */ -+ struct list_head eh_entry; /* entry for the host eh_abort_list/eh_cmd_q */ - struct delayed_work abort_work; - - struct rcu_head rcu; -@@ -211,7 +211,7 @@ static inline unsigned int scsi_get_resid(struct scsi_cmnd *cmd) - for_each_sg(scsi_sglist(cmd), sg, nseg, __i) - - static inline int scsi_sg_copy_from_buffer(struct scsi_cmnd *cmd, -- void *buf, int buflen) -+ const void *buf, int buflen) - { - return sg_copy_from_buffer(scsi_sglist(cmd), scsi_sg_count(cmd), - buf, buflen); -diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h -index b97e142a7ca92..3b3dbc37653da 100644 ---- a/include/scsi/scsi_device.h -+++ b/include/scsi/scsi_device.h -@@ -206,6 +206,7 @@ struct scsi_device { - unsigned rpm_autosuspend:1; /* Enable runtime autosuspend at device - * creation time */ - unsigned ignore_media_change:1; /* Ignore MEDIA CHANGE on resume */ -+ unsigned silence_suspend:1; /* Do not print runtime PM related messages */ - - bool offline_already; /* Device offline message logged */ - -diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h -index 75363707b73f9..1a02e58eb4e44 100644 ---- a/include/scsi/scsi_host.h -+++ b/include/scsi/scsi_host.h -@@ -556,6 +556,7 @@ struct Scsi_Host { - - struct mutex scan_mutex;/* serialize scanning activity */ - -+ struct list_head eh_abort_list; - struct list_head eh_cmd_q; - struct task_struct * ehandler; /* Error recovery thread. */ - struct completion * eh_action; /* Wait for specific actions on the -diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h -index c5d7810fd7926..0f2f149ad916c 100644 ---- a/include/scsi/scsi_transport_iscsi.h -+++ b/include/scsi/scsi_transport_iscsi.h -@@ -211,6 +211,8 @@ struct iscsi_cls_conn { - struct mutex ep_mutex; - struct iscsi_endpoint *ep; - -+ /* Used when accessing flags and queueing work. */ -+ spinlock_t lock; - unsigned long flags; - struct work_struct cleanup_work; - -@@ -294,7 +296,7 @@ extern void iscsi_host_for_each_session(struct Scsi_Host *shost, - struct iscsi_endpoint { - void *dd_data; /* LLD private data */ - struct device dev; -- uint64_t id; -+ int id; - struct iscsi_cls_conn *conn; - }; - -@@ -439,6 +441,7 @@ extern struct iscsi_cls_session *iscsi_create_session(struct Scsi_Host *shost, - struct iscsi_transport *t, - int dd_size, - unsigned int target_id); -+extern void iscsi_force_destroy_session(struct iscsi_cls_session *session); - extern void iscsi_remove_session(struct iscsi_cls_session *session); - extern void iscsi_free_session(struct iscsi_cls_session *session); - extern struct iscsi_cls_conn *iscsi_create_conn(struct iscsi_cls_session *sess, -diff --git a/include/soc/at91/sama7-ddr.h b/include/soc/at91/sama7-ddr.h -index f6542584ca139..f203f34dba12e 100644 ---- a/include/soc/at91/sama7-ddr.h -+++ b/include/soc/at91/sama7-ddr.h -@@ -11,8 +11,6 @@ - #ifndef __SAMA7_DDR_H__ - #define __SAMA7_DDR_H__ - --#ifdef CONFIG_SOC_SAMA7 - - /* DDR3PHY */ - #define DDR3PHY_PIR (0x04) /* DDR3PHY PHY Initialization Register */ - #define DDR3PHY_PIR_DLLBYP (1 << 17) /* DLL Bypass */ -@@ -40,6 +38,14 @@ - #define DDR3PHY_DSGCR_ODTPDD_ODT0 (1 << 20) /* ODT[0] Power Down Driver */ - - #define DDR3PHY_ZQ0SR0 (0x188) /* ZQ status register 0 */ -+#define DDR3PHY_ZQ0SR0_PDO_OFF (0) /* Pull-down output impedance select offset */ -+#define DDR3PHY_ZQ0SR0_PUO_OFF (5) /* Pull-up output impedance select offset */ -+#define DDR3PHY_ZQ0SR0_PDODT_OFF (10) /* Pull-down on-die termination impedance select offset */ -+#define DDR3PHY_ZQ0SRO_PUODT_OFF (15) /* Pull-up on-die termination impedance select offset */ -+ -+#define DDR3PHY_DX0DLLCR (0x1CC) /* DDR3PHY DATX8 DLL Control Register */ -+#define DDR3PHY_DX1DLLCR (0x20C) /* DDR3PHY DATX8 DLL Control Register */ -+#define DDR3PHY_DXDLLCR_DLLDIS (1 << 31) /* DLL Disable */ - - /* UDDRC */ - #define UDDRC_STAT (0x04) /* UDDRC Operating Mode Status Register */ -@@ -75,6 +81,4 @@ - #define UDDRC_PCTRL_3 (0x6A0) /* UDDRC Port 3 Control Register */ - #define UDDRC_PCTRL_4 (0x750) /* UDDRC Port 4 Control Register */ - --#endif /* CONFIG_SOC_SAMA7 */ +- return ret; +-} - - #endif /* __SAMA7_DDR_H__ */ -diff --git a/include/sound/control.h b/include/sound/control.h -index 985c51a8fb748..a1fc7e0a47d95 100644 ---- a/include/sound/control.h -+++ b/include/sound/control.h -@@ -109,7 +109,7 @@ struct snd_ctl_file { - int preferred_subdevice[SND_CTL_SUBDEV_ITEMS]; - wait_queue_head_t change_sleep; - spinlock_t read_lock; -- struct fasync_struct *fasync; -+ struct snd_fasync *fasync; - int subscribed; /* read interface is activated */ - struct list_head events; /* waiting events for read */ - }; -diff --git a/include/sound/core.h b/include/sound/core.h -index b7e9b58d3c788..39cee40ac22e0 100644 ---- a/include/sound/core.h -+++ b/include/sound/core.h -@@ -284,6 +284,7 @@ int snd_card_disconnect(struct snd_card *card); - void snd_card_disconnect_sync(struct snd_card *card); - int snd_card_free(struct snd_card *card); - int snd_card_free_when_closed(struct snd_card *card); -+int snd_card_free_on_error(struct device *dev, int ret); - void snd_card_set_id(struct snd_card *card, const char *id); - int snd_card_register(struct snd_card *card); - int snd_card_info_init(void); -@@ -500,4 +501,12 @@ snd_pci_quirk_lookup_id(u16 vendor, u16 device, - } - #endif - -+/* async signal helpers */ -+struct snd_fasync; -+ -+int snd_fasync_helper(int fd, struct file *file, int on, -+ struct snd_fasync **fasyncp); -+void snd_kill_fasync(struct snd_fasync *fasync, int signal, int poll); -+void snd_fasync_free(struct snd_fasync *fasync); -+ - #endif /* __SOUND_CORE_H */ -diff --git a/include/sound/hda_codec.h b/include/sound/hda_codec.h -index 0e45963bb767f..82d9daa178517 100644 ---- a/include/sound/hda_codec.h -+++ b/include/sound/hda_codec.h -@@ -8,7 +8,7 @@ - #ifndef __SOUND_HDA_CODEC_H - #define __SOUND_HDA_CODEC_H - --#include <linux/kref.h> -+#include <linux/refcount.h> - #include <linux/mod_devicetable.h> - #include <sound/info.h> - #include <sound/control.h> -@@ -166,8 +166,8 @@ struct hda_pcm { - bool own_chmap; /* codec driver provides own channel maps */ - /* private: */ - struct hda_codec *codec; -- struct kref kref; - struct list_head list; -+ unsigned int disconnected:1; - }; - - /* codec information */ -@@ -187,6 +187,8 @@ struct hda_codec { - - /* PCM to create, set by patch_ops.build_pcms callback */ - struct list_head pcm_list_head; -+ refcount_t pcm_ref; -+ wait_queue_head_t remove_sleep; - - /* codec specific info */ - void *spec; -@@ -420,7 +422,7 @@ void snd_hda_codec_cleanup_for_unbind(struct hda_codec *codec); - - static inline void snd_hda_codec_pcm_get(struct hda_pcm *pcm) - { -- kref_get(&pcm->kref); -+ refcount_inc(&pcm->codec->pcm_ref); - } - void snd_hda_codec_pcm_put(struct hda_pcm *pcm); - -diff --git a/include/sound/hdaudio_ext.h b/include/sound/hdaudio_ext.h -index 375581634143c..d4e31ea16aba3 100644 ---- a/include/sound/hdaudio_ext.h -+++ b/include/sound/hdaudio_ext.h -@@ -88,6 +88,8 @@ struct hdac_ext_stream *snd_hdac_ext_stream_assign(struct hdac_bus *bus, - struct snd_pcm_substream *substream, - int type); - void snd_hdac_ext_stream_release(struct hdac_ext_stream *azx_dev, int type); -+void snd_hdac_ext_stream_decouple_locked(struct hdac_bus *bus, -+ struct hdac_ext_stream *azx_dev, bool decouple); - void snd_hdac_ext_stream_decouple(struct hdac_bus *bus, - struct hdac_ext_stream *azx_dev, bool decouple); - void snd_hdac_ext_stop_streams(struct hdac_bus *bus); -diff --git a/include/sound/jack.h b/include/sound/jack.h -index 1181f536557eb..1ed90e2109e9b 100644 ---- a/include/sound/jack.h -+++ b/include/sound/jack.h -@@ -62,6 +62,7 @@ struct snd_jack { - const char *id; - #ifdef CONFIG_SND_JACK_INPUT_DEV - struct input_dev *input_dev; -+ struct mutex input_dev_lock; - int registered; - int type; - char name[100]; -diff --git a/include/sound/pcm.h b/include/sound/pcm.h -index 33451f8ff755b..e08bf475d02d4 100644 ---- a/include/sound/pcm.h -+++ b/include/sound/pcm.h -@@ -398,6 +398,8 @@ struct snd_pcm_runtime { - wait_queue_head_t tsleep; /* transfer sleep */ - struct fasync_struct *fasync; - bool stop_operating; /* sync_stop will be called */ -+ struct mutex buffer_mutex; /* protect for buffer changes */ -+ atomic_t buffer_accessing; /* >0: in r/w operation, <0: blocked */ - - /* -- private section -- */ - void *private_data; -diff --git a/include/sound/soc-topology.h b/include/sound/soc-topology.h -index 4afd667e124c2..3e8a85e1e8094 100644 ---- a/include/sound/soc-topology.h -+++ b/include/sound/soc-topology.h -@@ -188,8 +188,7 @@ int snd_soc_tplg_widget_bind_event(struct snd_soc_dapm_widget *w, - - #else - --static inline int snd_soc_tplg_component_remove(struct snd_soc_component *comp, -- u32 index) -+static inline int snd_soc_tplg_component_remove(struct snd_soc_component *comp) - { - return 0; - } -diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h -index fb11c7693b257..2121a323fd6c3 100644 ---- a/include/target/target_core_base.h -+++ b/include/target/target_core_base.h -@@ -812,8 +812,9 @@ struct se_device { - atomic_long_t read_bytes; - atomic_long_t write_bytes; - /* Active commands on this virtual SE device */ -- atomic_t simple_cmds; -- atomic_t dev_ordered_sync; -+ atomic_t non_ordered; -+ bool ordered_sync_in_progress; -+ atomic_t delayed_cmd_count; - atomic_t dev_qf_count; - u32 export_count; - spinlock_t delayed_cmd_lock; -@@ -834,6 +835,7 @@ struct se_device { - struct list_head dev_sep_list; - struct list_head dev_tmr_list; - struct work_struct qf_work_queue; -+ struct work_struct delayed_cmd_work; - struct list_head delayed_cmd_list; - struct list_head qf_cmd_list; - /* Pointer to associated SE HBA */ -diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h -index a23be89119aa5..04939b2d2f192 100644 ---- a/include/trace/bpf_probe.h -+++ b/include/trace/bpf_probe.h -@@ -21,6 +21,22 @@ - #undef __get_bitmask - #define __get_bitmask(field) (char *)__get_dynamic_array(field) - -+#undef __get_rel_dynamic_array -+#define __get_rel_dynamic_array(field) \ -+ ((void *)(&__entry->__rel_loc_##field) + \ -+ sizeof(__entry->__rel_loc_##field) + \ -+ (__entry->__rel_loc_##field & 0xffff)) -+ -+#undef __get_rel_dynamic_array_len -+#define __get_rel_dynamic_array_len(field) \ -+ ((__entry->__rel_loc_##field >> 16) & 0xffff) -+ -+#undef __get_rel_str -+#define __get_rel_str(field) ((char *)__get_rel_dynamic_array(field)) -+ -+#undef __get_rel_bitmask -+#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field) -+ - #undef __perf_count - #define __perf_count(c) (c) - -diff --git a/include/trace/events/cgroup.h b/include/trace/events/cgroup.h -index 7f42a3de59e6b..dd7d7c9efecdf 100644 ---- a/include/trace/events/cgroup.h -+++ b/include/trace/events/cgroup.h -@@ -59,8 +59,8 @@ DECLARE_EVENT_CLASS(cgroup, - - TP_STRUCT__entry( - __field( int, root ) -- __field( int, id ) - __field( int, level ) -+ __field( u64, id ) - __string( path, path ) - ), - -@@ -71,7 +71,7 @@ DECLARE_EVENT_CLASS(cgroup, - __assign_str(path, path); - ), - -- TP_printk("root=%d id=%d level=%d path=%s", -+ TP_printk("root=%d id=%llu level=%d path=%s", - __entry->root, __entry->id, __entry->level, __get_str(path)) - ); - -@@ -126,8 +126,8 @@ DECLARE_EVENT_CLASS(cgroup_migrate, - - TP_STRUCT__entry( - __field( int, dst_root ) -- __field( int, dst_id ) - __field( int, dst_level ) -+ __field( u64, dst_id ) - __field( int, pid ) - __string( dst_path, path ) - __string( comm, task->comm ) -@@ -142,7 +142,7 @@ DECLARE_EVENT_CLASS(cgroup_migrate, - __assign_str(comm, task->comm); - ), - -- TP_printk("dst_root=%d dst_id=%d dst_level=%d dst_path=%s pid=%d comm=%s", -+ TP_printk("dst_root=%d dst_id=%llu dst_level=%d dst_path=%s pid=%d comm=%s", - __entry->dst_root, __entry->dst_id, __entry->dst_level, - __get_str(dst_path), __entry->pid, __get_str(comm)) - ); -@@ -171,8 +171,8 @@ DECLARE_EVENT_CLASS(cgroup_event, - - TP_STRUCT__entry( - __field( int, root ) -- __field( int, id ) - __field( int, level ) -+ __field( u64, id ) - __string( path, path ) - __field( int, val ) - ), -@@ -185,7 +185,7 @@ DECLARE_EVENT_CLASS(cgroup_event, - __entry->val = val; - ), - -- TP_printk("root=%d id=%d level=%d path=%s val=%d", -+ TP_printk("root=%d id=%llu level=%d path=%s val=%d", - __entry->root, __entry->id, __entry->level, __get_str(path), - __entry->val) - ); -diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h -index 0ea36b2b0662a..61a64d1b2bb68 100644 ---- a/include/trace/events/ext4.h -+++ b/include/trace/events/ext4.h -@@ -95,6 +95,17 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B); - { FALLOC_FL_COLLAPSE_RANGE, "COLLAPSE_RANGE"}, \ - { FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"}) - -+TRACE_DEFINE_ENUM(EXT4_FC_REASON_XATTR); -+TRACE_DEFINE_ENUM(EXT4_FC_REASON_CROSS_RENAME); -+TRACE_DEFINE_ENUM(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE); -+TRACE_DEFINE_ENUM(EXT4_FC_REASON_NOMEM); -+TRACE_DEFINE_ENUM(EXT4_FC_REASON_SWAP_BOOT); -+TRACE_DEFINE_ENUM(EXT4_FC_REASON_RESIZE); -+TRACE_DEFINE_ENUM(EXT4_FC_REASON_RENAME_DIR); -+TRACE_DEFINE_ENUM(EXT4_FC_REASON_FALLOC_RANGE); -+TRACE_DEFINE_ENUM(EXT4_FC_REASON_INODE_JOURNAL_DATA); -+TRACE_DEFINE_ENUM(EXT4_FC_REASON_MAX); -+ - #define show_fc_reason(reason) \ - __print_symbolic(reason, \ - { EXT4_FC_REASON_XATTR, "XATTR"}, \ -@@ -2723,41 +2734,50 @@ TRACE_EVENT(ext4_fc_commit_stop, - - #define FC_REASON_NAME_STAT(reason) \ - show_fc_reason(reason), \ -- __entry->sbi->s_fc_stats.fc_ineligible_reason_count[reason] -+ __entry->fc_ineligible_rc[reason] - - TRACE_EVENT(ext4_fc_stats, -- TP_PROTO(struct super_block *sb), +-/* +- * PI futexes can not be requeued and must remove themselves from the +- * hash bucket. The hash bucket lock (i.e. lock_ptr) is held. +- */ +-static void unqueue_me_pi(struct futex_q *q) +-{ +- __unqueue_futex(q); - -- TP_ARGS(sb), -+ TP_PROTO(struct super_block *sb), - -- TP_STRUCT__entry( -- __field(dev_t, dev) -- __field(struct ext4_sb_info *, sbi) -- __field(int, count) -- ), -+ TP_ARGS(sb), - -- TP_fast_assign( -- __entry->dev = sb->s_dev; -- __entry->sbi = EXT4_SB(sb); -- ), -+ TP_STRUCT__entry( -+ __field(dev_t, dev) -+ __array(unsigned int, fc_ineligible_rc, EXT4_FC_REASON_MAX) -+ __field(unsigned long, fc_commits) -+ __field(unsigned long, fc_ineligible_commits) -+ __field(unsigned long, fc_numblks) -+ ), - -- TP_printk("dev %d:%d fc ineligible reasons:\n" -- "%s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d; " -- "num_commits:%ld, ineligible: %ld, numblks: %ld", -- MAJOR(__entry->dev), MINOR(__entry->dev), -- FC_REASON_NAME_STAT(EXT4_FC_REASON_XATTR), -- FC_REASON_NAME_STAT(EXT4_FC_REASON_CROSS_RENAME), -- FC_REASON_NAME_STAT(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE), -- FC_REASON_NAME_STAT(EXT4_FC_REASON_NOMEM), -- FC_REASON_NAME_STAT(EXT4_FC_REASON_SWAP_BOOT), -- FC_REASON_NAME_STAT(EXT4_FC_REASON_RESIZE), -- FC_REASON_NAME_STAT(EXT4_FC_REASON_RENAME_DIR), -- FC_REASON_NAME_STAT(EXT4_FC_REASON_FALLOC_RANGE), -- FC_REASON_NAME_STAT(EXT4_FC_REASON_INODE_JOURNAL_DATA), -- __entry->sbi->s_fc_stats.fc_num_commits, -- __entry->sbi->s_fc_stats.fc_ineligible_commits, -- __entry->sbi->s_fc_stats.fc_numblks) -+ TP_fast_assign( -+ int i; - -+ __entry->dev = sb->s_dev; -+ for (i = 0; i < EXT4_FC_REASON_MAX; i++) { -+ __entry->fc_ineligible_rc[i] = -+ EXT4_SB(sb)->s_fc_stats.fc_ineligible_reason_count[i]; -+ } -+ __entry->fc_commits = EXT4_SB(sb)->s_fc_stats.fc_num_commits; -+ __entry->fc_ineligible_commits = -+ EXT4_SB(sb)->s_fc_stats.fc_ineligible_commits; -+ __entry->fc_numblks = EXT4_SB(sb)->s_fc_stats.fc_numblks; -+ ), -+ -+ TP_printk("dev %d,%d fc ineligible reasons:\n" -+ "%s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u, %s:%u " -+ "num_commits:%lu, ineligible: %lu, numblks: %lu", -+ MAJOR(__entry->dev), MINOR(__entry->dev), -+ FC_REASON_NAME_STAT(EXT4_FC_REASON_XATTR), -+ FC_REASON_NAME_STAT(EXT4_FC_REASON_CROSS_RENAME), -+ FC_REASON_NAME_STAT(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE), -+ FC_REASON_NAME_STAT(EXT4_FC_REASON_NOMEM), -+ FC_REASON_NAME_STAT(EXT4_FC_REASON_SWAP_BOOT), -+ FC_REASON_NAME_STAT(EXT4_FC_REASON_RESIZE), -+ FC_REASON_NAME_STAT(EXT4_FC_REASON_RENAME_DIR), -+ FC_REASON_NAME_STAT(EXT4_FC_REASON_FALLOC_RANGE), -+ FC_REASON_NAME_STAT(EXT4_FC_REASON_INODE_JOURNAL_DATA), -+ __entry->fc_commits, __entry->fc_ineligible_commits, -+ __entry->fc_numblks) - ); - - #define DEFINE_TRACE_DENTRY_EVENT(__type) \ -diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h -index 4e881d91c8744..4cb055af1ec0b 100644 ---- a/include/trace/events/f2fs.h -+++ b/include/trace/events/f2fs.h -@@ -807,20 +807,20 @@ TRACE_EVENT(f2fs_lookup_start, - TP_STRUCT__entry( - __field(dev_t, dev) - __field(ino_t, ino) -- __field(const char *, name) -+ __string(name, dentry->d_name.name) - __field(unsigned int, flags) - ), - - TP_fast_assign( - __entry->dev = dir->i_sb->s_dev; - __entry->ino = dir->i_ino; -- __entry->name = dentry->d_name.name; -+ __assign_str(name, dentry->d_name.name); - __entry->flags = flags; - ), - - TP_printk("dev = (%d,%d), pino = %lu, name:%s, flags:%u", - show_dev_ino(__entry), -- __entry->name, -+ __get_str(name), - __entry->flags) - ); - -@@ -834,7 +834,7 @@ TRACE_EVENT(f2fs_lookup_end, - TP_STRUCT__entry( - __field(dev_t, dev) - __field(ino_t, ino) -- __field(const char *, name) -+ __string(name, dentry->d_name.name) - __field(nid_t, cino) - __field(int, err) - ), -@@ -842,14 +842,14 @@ TRACE_EVENT(f2fs_lookup_end, - TP_fast_assign( - __entry->dev = dir->i_sb->s_dev; - __entry->ino = dir->i_ino; -- __entry->name = dentry->d_name.name; -+ __assign_str(name, dentry->d_name.name); - __entry->cino = ino; - __entry->err = err; - ), - - TP_printk("dev = (%d,%d), pino = %lu, name:%s, ino:%u, err:%d", - show_dev_ino(__entry), -- __entry->name, -+ __get_str(name), - __entry->cino, - __entry->err) - ); -diff --git a/include/trace/events/libata.h b/include/trace/events/libata.h -index ab69434e2329e..72e785a903b65 100644 ---- a/include/trace/events/libata.h -+++ b/include/trace/events/libata.h -@@ -249,6 +249,7 @@ DECLARE_EVENT_CLASS(ata_qc_complete_template, - __entry->hob_feature = qc->result_tf.hob_feature; - __entry->nsect = qc->result_tf.nsect; - __entry->hob_nsect = qc->result_tf.hob_nsect; -+ __entry->flags = qc->flags; - ), - - TP_printk("ata_port=%u ata_dev=%u tag=%d flags=%s status=%s " \ -diff --git a/include/trace/events/random.h b/include/trace/events/random.h -deleted file mode 100644 -index 3d7b432ca5f31..0000000000000 ---- a/include/trace/events/random.h -+++ /dev/null -@@ -1,247 +0,0 @@ --/* SPDX-License-Identifier: GPL-2.0 */ --#undef TRACE_SYSTEM --#define TRACE_SYSTEM random +- BUG_ON(!q->pi_state); +- put_pi_state(q->pi_state); +- q->pi_state = NULL; +-} - --#if !defined(_TRACE_RANDOM_H) || defined(TRACE_HEADER_MULTI_READ) --#define _TRACE_RANDOM_H +-static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, +- struct task_struct *argowner) +-{ +- struct futex_pi_state *pi_state = q->pi_state; +- struct task_struct *oldowner, *newowner; +- u32 uval, curval, newval, newtid; +- int err = 0; - --#include <linux/writeback.h> --#include <linux/tracepoint.h> +- oldowner = pi_state->owner; - --TRACE_EVENT(add_device_randomness, -- TP_PROTO(int bytes, unsigned long IP), +- /* +- * We are here because either: +- * +- * - we stole the lock and pi_state->owner needs updating to reflect +- * that (@argowner == current), +- * +- * or: +- * +- * - someone stole our lock and we need to fix things to point to the +- * new owner (@argowner == NULL). +- * +- * Either way, we have to replace the TID in the user space variable. +- * This must be atomic as we have to preserve the owner died bit here. +- * +- * Note: We write the user space value _before_ changing the pi_state +- * because we can fault here. Imagine swapped out pages or a fork +- * that marked all the anonymous memory readonly for cow. +- * +- * Modifying pi_state _before_ the user space value would leave the +- * pi_state in an inconsistent state when we fault here, because we +- * need to drop the locks to handle the fault. This might be observed +- * in the PID checks when attaching to PI state . +- */ +-retry: +- if (!argowner) { +- if (oldowner != current) { +- /* +- * We raced against a concurrent self; things are +- * already fixed up. Nothing to do. +- */ +- return 0; +- } - -- TP_ARGS(bytes, IP), +- if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) { +- /* We got the lock. pi_state is correct. Tell caller. */ +- return 1; +- } - -- TP_STRUCT__entry( -- __field( int, bytes ) -- __field(unsigned long, IP ) -- ), +- /* +- * The trylock just failed, so either there is an owner or +- * there is a higher priority waiter than this one. +- */ +- newowner = rt_mutex_owner(&pi_state->pi_mutex); +- /* +- * If the higher priority waiter has not yet taken over the +- * rtmutex then newowner is NULL. We can't return here with +- * that state because it's inconsistent vs. the user space +- * state. So drop the locks and try again. It's a valid +- * situation and not any different from the other retry +- * conditions. +- */ +- if (unlikely(!newowner)) { +- err = -EAGAIN; +- goto handle_err; +- } +- } else { +- WARN_ON_ONCE(argowner != current); +- if (oldowner == current) { +- /* +- * We raced against a concurrent self; things are +- * already fixed up. Nothing to do. +- */ +- return 1; +- } +- newowner = argowner; +- } - -- TP_fast_assign( -- __entry->bytes = bytes; -- __entry->IP = IP; -- ), +- newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; +- /* Owner died? */ +- if (!pi_state->owner) +- newtid |= FUTEX_OWNER_DIED; - -- TP_printk("bytes %d caller %pS", -- __entry->bytes, (void *)__entry->IP) --); +- err = get_futex_value_locked(&uval, uaddr); +- if (err) +- goto handle_err; - --DECLARE_EVENT_CLASS(random__mix_pool_bytes, -- TP_PROTO(const char *pool_name, int bytes, unsigned long IP), +- for (;;) { +- newval = (uval & FUTEX_OWNER_DIED) | newtid; - -- TP_ARGS(pool_name, bytes, IP), +- err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval); +- if (err) +- goto handle_err; - -- TP_STRUCT__entry( -- __field( const char *, pool_name ) -- __field( int, bytes ) -- __field(unsigned long, IP ) -- ), +- if (curval == uval) +- break; +- uval = curval; +- } - -- TP_fast_assign( -- __entry->pool_name = pool_name; -- __entry->bytes = bytes; -- __entry->IP = IP; -- ), +- /* +- * We fixed up user space. Now we need to fix the pi_state +- * itself. +- */ +- pi_state_update_owner(pi_state, newowner); - -- TP_printk("%s pool: bytes %d caller %pS", -- __entry->pool_name, __entry->bytes, (void *)__entry->IP) --); +- return argowner == current; - --DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes, -- TP_PROTO(const char *pool_name, int bytes, unsigned long IP), +- /* +- * In order to reschedule or handle a page fault, we need to drop the +- * locks here. In the case of a fault, this gives the other task +- * (either the highest priority waiter itself or the task which stole +- * the rtmutex) the chance to try the fixup of the pi_state. So once we +- * are back from handling the fault we need to check the pi_state after +- * reacquiring the locks and before trying to do another fixup. When +- * the fixup has been done already we simply return. +- * +- * Note: we hold both hb->lock and pi_mutex->wait_lock. We can safely +- * drop hb->lock since the caller owns the hb -> futex_q relation. +- * Dropping the pi_mutex->wait_lock requires the state revalidate. +- */ +-handle_err: +- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); +- spin_unlock(q->lock_ptr); - -- TP_ARGS(pool_name, bytes, IP) --); +- switch (err) { +- case -EFAULT: +- err = fault_in_user_writeable(uaddr); +- break; - --DEFINE_EVENT(random__mix_pool_bytes, mix_pool_bytes_nolock, -- TP_PROTO(const char *pool_name, int bytes, unsigned long IP), +- case -EAGAIN: +- cond_resched(); +- err = 0; +- break; - -- TP_ARGS(pool_name, bytes, IP) --); +- default: +- WARN_ON_ONCE(1); +- break; +- } - --TRACE_EVENT(credit_entropy_bits, -- TP_PROTO(const char *pool_name, int bits, int entropy_count, -- unsigned long IP), +- spin_lock(q->lock_ptr); +- raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); - -- TP_ARGS(pool_name, bits, entropy_count, IP), +- /* +- * Check if someone else fixed it for us: +- */ +- if (pi_state->owner != oldowner) +- return argowner == current; - -- TP_STRUCT__entry( -- __field( const char *, pool_name ) -- __field( int, bits ) -- __field( int, entropy_count ) -- __field(unsigned long, IP ) -- ), +- /* Retry if err was -EAGAIN or the fault in succeeded */ +- if (!err) +- goto retry; - -- TP_fast_assign( -- __entry->pool_name = pool_name; -- __entry->bits = bits; -- __entry->entropy_count = entropy_count; -- __entry->IP = IP; -- ), +- /* +- * fault_in_user_writeable() failed so user state is immutable. At +- * best we can make the kernel state consistent but user state will +- * be most likely hosed and any subsequent unlock operation will be +- * rejected due to PI futex rule [10]. +- * +- * Ensure that the rtmutex owner is also the pi_state owner despite +- * the user space value claiming something different. There is no +- * point in unlocking the rtmutex if current is the owner as it +- * would need to wait until the next waiter has taken the rtmutex +- * to guarantee consistent state. Keep it simple. Userspace asked +- * for this wreckaged state. +- * +- * The rtmutex has an owner - either current or some other +- * task. See the EAGAIN loop above. +- */ +- pi_state_update_owner(pi_state, rt_mutex_owner(&pi_state->pi_mutex)); - -- TP_printk("%s pool: bits %d entropy_count %d caller %pS", -- __entry->pool_name, __entry->bits, -- __entry->entropy_count, (void *)__entry->IP) --); +- return err; +-} - --TRACE_EVENT(debit_entropy, -- TP_PROTO(const char *pool_name, int debit_bits), +-static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, +- struct task_struct *argowner) +-{ +- struct futex_pi_state *pi_state = q->pi_state; +- int ret; - -- TP_ARGS(pool_name, debit_bits), +- lockdep_assert_held(q->lock_ptr); - -- TP_STRUCT__entry( -- __field( const char *, pool_name ) -- __field( int, debit_bits ) -- ), +- raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); +- ret = __fixup_pi_state_owner(uaddr, q, argowner); +- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); +- return ret; +-} +- +-static long futex_wait_restart(struct restart_block *restart); +- +-/** +- * fixup_owner() - Post lock pi_state and corner case management +- * @uaddr: user address of the futex +- * @q: futex_q (contains pi_state and access to the rt_mutex) +- * @locked: if the attempt to take the rt_mutex succeeded (1) or not (0) +- * +- * After attempting to lock an rt_mutex, this function is called to cleanup +- * the pi_state owner as well as handle race conditions that may allow us to +- * acquire the lock. Must be called with the hb lock held. +- * +- * Return: +- * - 1 - success, lock taken; +- * - 0 - success, lock not taken; +- * - <0 - on error (-EFAULT) +- */ +-static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) +-{ +- if (locked) { +- /* +- * Got the lock. We might not be the anticipated owner if we +- * did a lock-steal - fix up the PI-state in that case: +- * +- * Speculative pi_state->owner read (we don't hold wait_lock); +- * since we own the lock pi_state->owner == current is the +- * stable state, anything else needs more attention. +- */ +- if (q->pi_state->owner != current) +- return fixup_pi_state_owner(uaddr, q, current); +- return 1; +- } +- +- /* +- * If we didn't get the lock; check if anybody stole it from us. In +- * that case, we need to fix up the uval to point to them instead of +- * us, otherwise bad things happen. [10] +- * +- * Another speculative read; pi_state->owner == current is unstable +- * but needs our attention. +- */ +- if (q->pi_state->owner == current) +- return fixup_pi_state_owner(uaddr, q, NULL); +- +- /* +- * Paranoia check. If we did not take the lock, then we should not be +- * the owner of the rt_mutex. Warn and establish consistent state. +- */ +- if (WARN_ON_ONCE(rt_mutex_owner(&q->pi_state->pi_mutex) == current)) +- return fixup_pi_state_owner(uaddr, q, current); +- +- return 0; +-} +- +-/** +- * futex_wait_queue_me() - queue_me() and wait for wakeup, timeout, or signal +- * @hb: the futex hash bucket, must be locked by the caller +- * @q: the futex_q to queue up on +- * @timeout: the prepared hrtimer_sleeper, or null for no timeout +- */ +-static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, +- struct hrtimer_sleeper *timeout) +-{ +- /* +- * The task state is guaranteed to be set before another task can +- * wake it. set_current_state() is implemented using smp_store_mb() and +- * queue_me() calls spin_unlock() upon completion, both serializing +- * access to the hash list and forcing another memory barrier. +- */ +- set_current_state(TASK_INTERRUPTIBLE); +- queue_me(q, hb); - -- TP_fast_assign( -- __entry->pool_name = pool_name; -- __entry->debit_bits = debit_bits; -- ), +- /* Arm the timer */ +- if (timeout) +- hrtimer_sleeper_start_expires(timeout, HRTIMER_MODE_ABS); - -- TP_printk("%s: debit_bits %d", __entry->pool_name, -- __entry->debit_bits) --); +- /* +- * If we have been removed from the hash list, then another task +- * has tried to wake us, and we can skip the call to schedule(). +- */ +- if (likely(!plist_node_empty(&q->list))) { +- /* +- * If the timer has already expired, current will already be +- * flagged for rescheduling. Only call schedule if there +- * is no timeout, or if it has yet to expire. +- */ +- if (!timeout || timeout->task) +- freezable_schedule(); +- } +- __set_current_state(TASK_RUNNING); +-} - --TRACE_EVENT(add_input_randomness, -- TP_PROTO(int input_bits), +-/** +- * futex_wait_setup() - Prepare to wait on a futex +- * @uaddr: the futex userspace address +- * @val: the expected value +- * @flags: futex flags (FLAGS_SHARED, etc.) +- * @q: the associated futex_q +- * @hb: storage for hash_bucket pointer to be returned to caller +- * +- * Setup the futex_q and locate the hash_bucket. Get the futex value and +- * compare it with the expected value. Handle atomic faults internally. +- * Return with the hb lock held on success, and unlocked on failure. +- * +- * Return: +- * - 0 - uaddr contains val and hb has been locked; +- * - <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked +- */ +-static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, +- struct futex_q *q, struct futex_hash_bucket **hb) +-{ +- u32 uval; +- int ret; - -- TP_ARGS(input_bits), +- /* +- * Access the page AFTER the hash-bucket is locked. +- * Order is important: +- * +- * Userspace waiter: val = var; if (cond(val)) futex_wait(&var, val); +- * Userspace waker: if (cond(var)) { var = new; futex_wake(&var); } +- * +- * The basic logical guarantee of a futex is that it blocks ONLY +- * if cond(var) is known to be true at the time of blocking, for +- * any cond. If we locked the hash-bucket after testing *uaddr, that +- * would open a race condition where we could block indefinitely with +- * cond(var) false, which would violate the guarantee. +- * +- * On the other hand, we insert q and release the hash-bucket only +- * after testing *uaddr. This guarantees that futex_wait() will NOT +- * absorb a wakeup if *uaddr does not match the desired values +- * while the syscall executes. +- */ +-retry: +- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, FUTEX_READ); +- if (unlikely(ret != 0)) +- return ret; - -- TP_STRUCT__entry( -- __field( int, input_bits ) -- ), +-retry_private: +- *hb = queue_lock(q); - -- TP_fast_assign( -- __entry->input_bits = input_bits; -- ), +- ret = get_futex_value_locked(&uval, uaddr); - -- TP_printk("input_pool_bits %d", __entry->input_bits) --); +- if (ret) { +- queue_unlock(*hb); - --TRACE_EVENT(add_disk_randomness, -- TP_PROTO(dev_t dev, int input_bits), +- ret = get_user(uval, uaddr); +- if (ret) +- return ret; - -- TP_ARGS(dev, input_bits), +- if (!(flags & FLAGS_SHARED)) +- goto retry_private; - -- TP_STRUCT__entry( -- __field( dev_t, dev ) -- __field( int, input_bits ) -- ), +- goto retry; +- } - -- TP_fast_assign( -- __entry->dev = dev; -- __entry->input_bits = input_bits; -- ), +- if (uval != val) { +- queue_unlock(*hb); +- ret = -EWOULDBLOCK; +- } - -- TP_printk("dev %d,%d input_pool_bits %d", MAJOR(__entry->dev), -- MINOR(__entry->dev), __entry->input_bits) --); +- return ret; +-} - --DECLARE_EVENT_CLASS(random__get_random_bytes, -- TP_PROTO(int nbytes, unsigned long IP), +-static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, +- ktime_t *abs_time, u32 bitset) +-{ +- struct hrtimer_sleeper timeout, *to; +- struct restart_block *restart; +- struct futex_hash_bucket *hb; +- struct futex_q q = futex_q_init; +- int ret; - -- TP_ARGS(nbytes, IP), +- if (!bitset) +- return -EINVAL; +- q.bitset = bitset; - -- TP_STRUCT__entry( -- __field( int, nbytes ) -- __field(unsigned long, IP ) -- ), +- to = futex_setup_timer(abs_time, &timeout, flags, +- current->timer_slack_ns); +-retry: +- /* +- * Prepare to wait on uaddr. On success, it holds hb->lock and q +- * is initialized. +- */ +- ret = futex_wait_setup(uaddr, val, flags, &q, &hb); +- if (ret) +- goto out; - -- TP_fast_assign( -- __entry->nbytes = nbytes; -- __entry->IP = IP; -- ), +- /* queue_me and wait for wakeup, timeout, or a signal. */ +- futex_wait_queue_me(hb, &q, to); - -- TP_printk("nbytes %d caller %pS", __entry->nbytes, (void *)__entry->IP) --); +- /* If we were woken (and unqueued), we succeeded, whatever. */ +- ret = 0; +- if (!unqueue_me(&q)) +- goto out; +- ret = -ETIMEDOUT; +- if (to && !to->task) +- goto out; - --DEFINE_EVENT(random__get_random_bytes, get_random_bytes, -- TP_PROTO(int nbytes, unsigned long IP), +- /* +- * We expect signal_pending(current), but we might be the +- * victim of a spurious wakeup as well. +- */ +- if (!signal_pending(current)) +- goto retry; - -- TP_ARGS(nbytes, IP) --); +- ret = -ERESTARTSYS; +- if (!abs_time) +- goto out; - --DEFINE_EVENT(random__get_random_bytes, get_random_bytes_arch, -- TP_PROTO(int nbytes, unsigned long IP), +- restart = ¤t->restart_block; +- restart->futex.uaddr = uaddr; +- restart->futex.val = val; +- restart->futex.time = *abs_time; +- restart->futex.bitset = bitset; +- restart->futex.flags = flags | FLAGS_HAS_TIMEOUT; - -- TP_ARGS(nbytes, IP) --); +- ret = set_restart_fn(restart, futex_wait_restart); - --DECLARE_EVENT_CLASS(random__extract_entropy, -- TP_PROTO(const char *pool_name, int nbytes, int entropy_count, -- unsigned long IP), +-out: +- if (to) { +- hrtimer_cancel(&to->timer); +- destroy_hrtimer_on_stack(&to->timer); +- } +- return ret; +-} - -- TP_ARGS(pool_name, nbytes, entropy_count, IP), - -- TP_STRUCT__entry( -- __field( const char *, pool_name ) -- __field( int, nbytes ) -- __field( int, entropy_count ) -- __field(unsigned long, IP ) -- ), +-static long futex_wait_restart(struct restart_block *restart) +-{ +- u32 __user *uaddr = restart->futex.uaddr; +- ktime_t t, *tp = NULL; - -- TP_fast_assign( -- __entry->pool_name = pool_name; -- __entry->nbytes = nbytes; -- __entry->entropy_count = entropy_count; -- __entry->IP = IP; -- ), +- if (restart->futex.flags & FLAGS_HAS_TIMEOUT) { +- t = restart->futex.time; +- tp = &t; +- } +- restart->fn = do_no_restart_syscall; - -- TP_printk("%s pool: nbytes %d entropy_count %d caller %pS", -- __entry->pool_name, __entry->nbytes, __entry->entropy_count, -- (void *)__entry->IP) --); +- return (long)futex_wait(uaddr, restart->futex.flags, +- restart->futex.val, tp, restart->futex.bitset); +-} - - --DEFINE_EVENT(random__extract_entropy, extract_entropy, -- TP_PROTO(const char *pool_name, int nbytes, int entropy_count, -- unsigned long IP), +-/* +- * Userspace tried a 0 -> TID atomic transition of the futex value +- * and failed. The kernel side here does the whole locking operation: +- * if there are waiters then it will block as a consequence of relying +- * on rt-mutexes, it does PI, etc. (Due to races the kernel might see +- * a 0 value of the futex too.). +- * +- * Also serves as futex trylock_pi()'ing, and due semantics. +- */ +-static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, +- ktime_t *time, int trylock) +-{ +- struct hrtimer_sleeper timeout, *to; +- struct task_struct *exiting = NULL; +- struct rt_mutex_waiter rt_waiter; +- struct futex_hash_bucket *hb; +- struct futex_q q = futex_q_init; +- int res, ret; +- +- if (!IS_ENABLED(CONFIG_FUTEX_PI)) +- return -ENOSYS; - -- TP_ARGS(pool_name, nbytes, entropy_count, IP) --); +- if (refill_pi_state_cache()) +- return -ENOMEM; - --TRACE_EVENT(urandom_read, -- TP_PROTO(int got_bits, int pool_left, int input_left), +- to = futex_setup_timer(time, &timeout, flags, 0); - -- TP_ARGS(got_bits, pool_left, input_left), +-retry: +- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, FUTEX_WRITE); +- if (unlikely(ret != 0)) +- goto out; - -- TP_STRUCT__entry( -- __field( int, got_bits ) -- __field( int, pool_left ) -- __field( int, input_left ) -- ), +-retry_private: +- hb = queue_lock(&q); - -- TP_fast_assign( -- __entry->got_bits = got_bits; -- __entry->pool_left = pool_left; -- __entry->input_left = input_left; -- ), +- ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, +- &exiting, 0); +- if (unlikely(ret)) { +- /* +- * Atomic work succeeded and we got the lock, +- * or failed. Either way, we do _not_ block. +- */ +- switch (ret) { +- case 1: +- /* We got the lock. */ +- ret = 0; +- goto out_unlock_put_key; +- case -EFAULT: +- goto uaddr_faulted; +- case -EBUSY: +- case -EAGAIN: +- /* +- * Two reasons for this: +- * - EBUSY: Task is exiting and we just wait for the +- * exit to complete. +- * - EAGAIN: The user space value changed. +- */ +- queue_unlock(hb); +- /* +- * Handle the case where the owner is in the middle of +- * exiting. Wait for the exit to complete otherwise +- * this task might loop forever, aka. live lock. +- */ +- wait_for_owner_exiting(ret, exiting); +- cond_resched(); +- goto retry; +- default: +- goto out_unlock_put_key; +- } +- } - -- TP_printk("got_bits %d nonblocking_pool_entropy_left %d " -- "input_entropy_left %d", __entry->got_bits, -- __entry->pool_left, __entry->input_left) --); +- WARN_ON(!q.pi_state); - --TRACE_EVENT(prandom_u32, +- /* +- * Only actually queue now that the atomic ops are done: +- */ +- __queue_me(&q, hb); - -- TP_PROTO(unsigned int ret), +- if (trylock) { +- ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex); +- /* Fixup the trylock return value: */ +- ret = ret ? 0 : -EWOULDBLOCK; +- goto no_block; +- } - -- TP_ARGS(ret), +- rt_mutex_init_waiter(&rt_waiter); - -- TP_STRUCT__entry( -- __field( unsigned int, ret) -- ), +- /* +- * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not +- * hold it while doing rt_mutex_start_proxy(), because then it will +- * include hb->lock in the blocking chain, even through we'll not in +- * fact hold it while blocking. This will lead it to report -EDEADLK +- * and BUG when futex_unlock_pi() interleaves with this. +- * +- * Therefore acquire wait_lock while holding hb->lock, but drop the +- * latter before calling __rt_mutex_start_proxy_lock(). This +- * interleaves with futex_unlock_pi() -- which does a similar lock +- * handoff -- such that the latter can observe the futex_q::pi_state +- * before __rt_mutex_start_proxy_lock() is done. +- */ +- raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock); +- spin_unlock(q.lock_ptr); +- /* +- * __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter +- * such that futex_unlock_pi() is guaranteed to observe the waiter when +- * it sees the futex_q::pi_state. +- */ +- ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current); +- raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock); - -- TP_fast_assign( -- __entry->ret = ret; -- ), +- if (ret) { +- if (ret == 1) +- ret = 0; +- goto cleanup; +- } - -- TP_printk("ret=%u" , __entry->ret) --); +- if (unlikely(to)) +- hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS); - --#endif /* _TRACE_RANDOM_H */ +- ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter); - --/* This part must be outside protection */ --#include <trace/define_trace.h> -diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h -index e70c90116edae..1c714336b8635 100644 ---- a/include/trace/events/rxrpc.h -+++ b/include/trace/events/rxrpc.h -@@ -83,12 +83,15 @@ enum rxrpc_call_trace { - rxrpc_call_error, - rxrpc_call_got, - rxrpc_call_got_kernel, -+ rxrpc_call_got_timer, - rxrpc_call_got_userid, - rxrpc_call_new_client, - rxrpc_call_new_service, - rxrpc_call_put, - rxrpc_call_put_kernel, - rxrpc_call_put_noqueue, -+ rxrpc_call_put_notimer, -+ rxrpc_call_put_timer, - rxrpc_call_put_userid, - rxrpc_call_queued, - rxrpc_call_queued_ref, -@@ -278,12 +281,15 @@ enum rxrpc_tx_point { - EM(rxrpc_call_error, "*E*") \ - EM(rxrpc_call_got, "GOT") \ - EM(rxrpc_call_got_kernel, "Gke") \ -+ EM(rxrpc_call_got_timer, "GTM") \ - EM(rxrpc_call_got_userid, "Gus") \ - EM(rxrpc_call_new_client, "NWc") \ - EM(rxrpc_call_new_service, "NWs") \ - EM(rxrpc_call_put, "PUT") \ - EM(rxrpc_call_put_kernel, "Pke") \ -- EM(rxrpc_call_put_noqueue, "PNQ") \ -+ EM(rxrpc_call_put_noqueue, "PnQ") \ -+ EM(rxrpc_call_put_notimer, "PnT") \ -+ EM(rxrpc_call_put_timer, "PTM") \ - EM(rxrpc_call_put_userid, "Pus") \ - EM(rxrpc_call_queued, "QUE") \ - EM(rxrpc_call_queued_ref, "QUR") \ -@@ -1503,7 +1509,7 @@ TRACE_EVENT(rxrpc_call_reset, - __entry->call_serial = call->rx_serial; - __entry->conn_serial = call->conn->hi_serial; - __entry->tx_seq = call->tx_hard_ack; -- __entry->rx_seq = call->ackr_seen; -+ __entry->rx_seq = call->rx_hard_ack; - ), - - TP_printk("c=%08x %08x:%08x r=%08x/%08x tx=%08x rx=%08x", -diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h -index 9e92f22eb086c..485a1d3034a4b 100644 ---- a/include/trace/events/skb.h -+++ b/include/trace/events/skb.h -@@ -9,29 +9,63 @@ - #include <linux/netdevice.h> - #include <linux/tracepoint.h> - -+#define TRACE_SKB_DROP_REASON \ -+ EM(SKB_DROP_REASON_NOT_SPECIFIED, NOT_SPECIFIED) \ -+ EM(SKB_DROP_REASON_NO_SOCKET, NO_SOCKET) \ -+ EM(SKB_DROP_REASON_PKT_TOO_SMALL, PKT_TOO_SMALL) \ -+ EM(SKB_DROP_REASON_TCP_CSUM, TCP_CSUM) \ -+ EM(SKB_DROP_REASON_SOCKET_FILTER, SOCKET_FILTER) \ -+ EM(SKB_DROP_REASON_UDP_CSUM, UDP_CSUM) \ -+ EM(SKB_DROP_REASON_NETFILTER_DROP, NETFILTER_DROP) \ -+ EM(SKB_DROP_REASON_OTHERHOST, OTHERHOST) \ -+ EM(SKB_DROP_REASON_IP_CSUM, IP_CSUM) \ -+ EM(SKB_DROP_REASON_IP_INHDR, IP_INHDR) \ -+ EM(SKB_DROP_REASON_IP_RPFILTER, IP_RPFILTER) \ -+ EM(SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, \ -+ UNICAST_IN_L2_MULTICAST) \ -+ EMe(SKB_DROP_REASON_MAX, MAX) -+ -+#undef EM -+#undef EMe -+ -+#define EM(a, b) TRACE_DEFINE_ENUM(a); -+#define EMe(a, b) TRACE_DEFINE_ENUM(a); -+ -+TRACE_SKB_DROP_REASON -+ -+#undef EM -+#undef EMe -+#define EM(a, b) { a, #b }, -+#define EMe(a, b) { a, #b } -+ - /* - * Tracepoint for free an sk_buff: - */ - TRACE_EVENT(kfree_skb, - -- TP_PROTO(struct sk_buff *skb, void *location), -+ TP_PROTO(struct sk_buff *skb, void *location, -+ enum skb_drop_reason reason), - -- TP_ARGS(skb, location), -+ TP_ARGS(skb, location, reason), - - TP_STRUCT__entry( -- __field( void *, skbaddr ) -- __field( void *, location ) -- __field( unsigned short, protocol ) -+ __field(void *, skbaddr) -+ __field(void *, location) -+ __field(unsigned short, protocol) -+ __field(enum skb_drop_reason, reason) - ), - - TP_fast_assign( - __entry->skbaddr = skb; - __entry->location = location; - __entry->protocol = ntohs(skb->protocol); -+ __entry->reason = reason; - ), - -- TP_printk("skbaddr=%p protocol=%u location=%p", -- __entry->skbaddr, __entry->protocol, __entry->location) -+ TP_printk("skbaddr=%p protocol=%u location=%p reason: %s", -+ __entry->skbaddr, __entry->protocol, __entry->location, -+ __print_symbolic(__entry->reason, -+ TRACE_SKB_DROP_REASON)) - ); - - TRACE_EVENT(consume_skb, -diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h -index 12c315782766a..777ee6cbe9330 100644 ---- a/include/trace/events/sock.h -+++ b/include/trace/events/sock.h -@@ -98,7 +98,7 @@ TRACE_EVENT(sock_exceed_buf_limit, - - TP_STRUCT__entry( - __array(char, name, 32) -- __field(long *, sysctl_mem) -+ __array(long, sysctl_mem, 3) - __field(long, allocated) - __field(int, sysctl_rmem) - __field(int, rmem_alloc) -@@ -110,7 +110,9 @@ TRACE_EVENT(sock_exceed_buf_limit, - - TP_fast_assign( - strncpy(__entry->name, prot->name, 32); -- __entry->sysctl_mem = prot->sysctl_mem; -+ __entry->sysctl_mem[0] = READ_ONCE(prot->sysctl_mem[0]); -+ __entry->sysctl_mem[1] = READ_ONCE(prot->sysctl_mem[1]); -+ __entry->sysctl_mem[2] = READ_ONCE(prot->sysctl_mem[2]); - __entry->allocated = allocated; - __entry->sysctl_rmem = sk_get_rmem0(sk, prot); - __entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc); -diff --git a/include/trace/events/spmi.h b/include/trace/events/spmi.h -index 8b60efe18ba68..a6819fd85cdf4 100644 ---- a/include/trace/events/spmi.h -+++ b/include/trace/events/spmi.h -@@ -21,15 +21,15 @@ TRACE_EVENT(spmi_write_begin, - __field ( u8, sid ) - __field ( u16, addr ) - __field ( u8, len ) -- __dynamic_array ( u8, buf, len + 1 ) -+ __dynamic_array ( u8, buf, len ) - ), - - TP_fast_assign( - __entry->opcode = opcode; - __entry->sid = sid; - __entry->addr = addr; -- __entry->len = len + 1; -- memcpy(__get_dynamic_array(buf), buf, len + 1); -+ __entry->len = len; -+ memcpy(__get_dynamic_array(buf), buf, len); - ), - - TP_printk("opc=%d sid=%02d addr=0x%04x len=%d buf=0x[%*phD]", -@@ -92,7 +92,7 @@ TRACE_EVENT(spmi_read_end, - __field ( u16, addr ) - __field ( int, ret ) - __field ( u8, len ) -- __dynamic_array ( u8, buf, len + 1 ) -+ __dynamic_array ( u8, buf, len ) - ), - - TP_fast_assign( -@@ -100,8 +100,8 @@ TRACE_EVENT(spmi_read_end, - __entry->sid = sid; - __entry->addr = addr; - __entry->ret = ret; -- __entry->len = len + 1; -- memcpy(__get_dynamic_array(buf), buf, len + 1); -+ __entry->len = len; -+ memcpy(__get_dynamic_array(buf), buf, len); - ), - - TP_printk("opc=%d sid=%02d addr=0x%04x ret=%d len=%02d buf=0x[%*phD]", -diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h -index 2d04eb96d4183..2a598fb45bf4f 100644 ---- a/include/trace/events/sunrpc.h -+++ b/include/trace/events/sunrpc.h -@@ -925,18 +925,19 @@ TRACE_EVENT(rpc_socket_nospace, - - #define rpc_show_xprt_state(x) \ - __print_flags(x, "|", \ -- { (1UL << XPRT_LOCKED), "LOCKED"}, \ -- { (1UL << XPRT_CONNECTED), "CONNECTED"}, \ -- { (1UL << XPRT_CONNECTING), "CONNECTING"}, \ -- { (1UL << XPRT_CLOSE_WAIT), "CLOSE_WAIT"}, \ -- { (1UL << XPRT_BOUND), "BOUND"}, \ -- { (1UL << XPRT_BINDING), "BINDING"}, \ -- { (1UL << XPRT_CLOSING), "CLOSING"}, \ -- { (1UL << XPRT_OFFLINE), "OFFLINE"}, \ -- { (1UL << XPRT_REMOVE), "REMOVE"}, \ -- { (1UL << XPRT_CONGESTED), "CONGESTED"}, \ -- { (1UL << XPRT_CWND_WAIT), "CWND_WAIT"}, \ -- { (1UL << XPRT_WRITE_SPACE), "WRITE_SPACE"}) -+ { BIT(XPRT_LOCKED), "LOCKED" }, \ -+ { BIT(XPRT_CONNECTED), "CONNECTED" }, \ -+ { BIT(XPRT_CONNECTING), "CONNECTING" }, \ -+ { BIT(XPRT_CLOSE_WAIT), "CLOSE_WAIT" }, \ -+ { BIT(XPRT_BOUND), "BOUND" }, \ -+ { BIT(XPRT_BINDING), "BINDING" }, \ -+ { BIT(XPRT_CLOSING), "CLOSING" }, \ -+ { BIT(XPRT_OFFLINE), "OFFLINE" }, \ -+ { BIT(XPRT_REMOVE), "REMOVE" }, \ -+ { BIT(XPRT_CONGESTED), "CONGESTED" }, \ -+ { BIT(XPRT_CWND_WAIT), "CWND_WAIT" }, \ -+ { BIT(XPRT_WRITE_SPACE), "WRITE_SPACE" }, \ -+ { BIT(XPRT_SND_IS_COOKIE), "SND_IS_COOKIE" }) - - DECLARE_EVENT_CLASS(rpc_xprt_lifetime_class, - TP_PROTO( -@@ -975,7 +976,6 @@ DEFINE_RPC_XPRT_LIFETIME_EVENT(connect); - DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_auto); - DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_done); - DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_force); --DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_cleanup); - DEFINE_RPC_XPRT_LIFETIME_EVENT(destroy); - - DECLARE_EVENT_CLASS(rpc_xprt_event, -@@ -1133,8 +1133,11 @@ DECLARE_EVENT_CLASS(xprt_writelock_event, - __entry->task_id = -1; - __entry->client_id = -1; - } -- __entry->snd_task_id = xprt->snd_task ? -- xprt->snd_task->tk_pid : -1; -+ if (xprt->snd_task && -+ !test_bit(XPRT_SND_IS_COOKIE, &xprt->state)) -+ __entry->snd_task_id = xprt->snd_task->tk_pid; -+ else -+ __entry->snd_task_id = -1; - ), - - TP_printk("task:%u@%u snd_task:%u", -@@ -1178,8 +1181,12 @@ DECLARE_EVENT_CLASS(xprt_cong_event, - __entry->task_id = -1; - __entry->client_id = -1; - } -- __entry->snd_task_id = xprt->snd_task ? -- xprt->snd_task->tk_pid : -1; -+ if (xprt->snd_task && -+ !test_bit(XPRT_SND_IS_COOKIE, &xprt->state)) -+ __entry->snd_task_id = xprt->snd_task->tk_pid; -+ else -+ __entry->snd_task_id = -1; -+ - __entry->cong = xprt->cong; - __entry->cwnd = xprt->cwnd; - __entry->wait = test_bit(XPRT_CWND_WAIT, &xprt->state); -@@ -1719,10 +1726,11 @@ TRACE_EVENT(svc_xprt_create_err, - const char *program, - const char *protocol, - struct sockaddr *sap, -+ size_t salen, - const struct svc_xprt *xprt - ), - -- TP_ARGS(program, protocol, sap, xprt), -+ TP_ARGS(program, protocol, sap, salen, xprt), - - TP_STRUCT__entry( - __field(long, error) -@@ -1735,7 +1743,7 @@ TRACE_EVENT(svc_xprt_create_err, - __entry->error = PTR_ERR(xprt); - __assign_str(program, program); - __assign_str(protocol, protocol); -- memcpy(__entry->addr, sap, sizeof(__entry->addr)); -+ memcpy(__entry->addr, sap, min(salen, sizeof(__entry->addr))); - ), - - TP_printk("addr=%pISpc program=%s protocol=%s error=%ld", -@@ -1915,17 +1923,18 @@ DECLARE_EVENT_CLASS(svc_deferred_event, - TP_STRUCT__entry( - __field(const void *, dr) - __field(u32, xid) -- __string(addr, dr->xprt->xpt_remotebuf) -+ __array(__u8, addr, INET6_ADDRSTRLEN + 10) - ), - - TP_fast_assign( - __entry->dr = dr; - __entry->xid = be32_to_cpu(*(__be32 *)(dr->args + - (dr->xprt_hlen>>2))); -- __assign_str(addr, dr->xprt->xpt_remotebuf); -+ snprintf(__entry->addr, sizeof(__entry->addr) - 1, -+ "%pISpc", (struct sockaddr *)&dr->addr); - ), - -- TP_printk("addr=%s dr=%p xid=0x%08x", __get_str(addr), __entry->dr, -+ TP_printk("addr=%s dr=%p xid=0x%08x", __entry->addr, __entry->dr, - __entry->xid) - ); - -@@ -2103,17 +2112,17 @@ DECLARE_EVENT_CLASS(svcsock_accept_class, - TP_STRUCT__entry( - __field(long, status) - __string(service, service) -- __array(unsigned char, addr, sizeof(struct sockaddr_in6)) -+ __field(unsigned int, netns_ino) - ), - - TP_fast_assign( - __entry->status = status; - __assign_str(service, service); -- memcpy(__entry->addr, &xprt->xpt_local, sizeof(__entry->addr)); -+ __entry->netns_ino = xprt->xpt_net->ns.inum; - ), - -- TP_printk("listener=%pISpc service=%s status=%ld", -- __entry->addr, __get_str(service), __entry->status -+ TP_printk("addr=listener service=%s status=%ld", -+ __get_str(service), __entry->status - ) - ); - -diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h -index 521059d8dc0a6..edcd6369de102 100644 ---- a/include/trace/events/tcp.h -+++ b/include/trace/events/tcp.h -@@ -279,7 +279,7 @@ TRACE_EVENT(tcp_probe, - __entry->data_len = skb->len - __tcp_hdrlen(th); - __entry->snd_nxt = tp->snd_nxt; - __entry->snd_una = tp->snd_una; -- __entry->snd_cwnd = tp->snd_cwnd; -+ __entry->snd_cwnd = tcp_snd_cwnd(tp); - __entry->snd_wnd = tp->snd_wnd; - __entry->rcv_wnd = tp->rcv_wnd; - __entry->ssthresh = tcp_current_ssthresh(sk); -diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h -index 88faf2400ec25..b2eeeb0800126 100644 ---- a/include/trace/events/vmscan.h -+++ b/include/trace/events/vmscan.h -@@ -283,7 +283,7 @@ TRACE_EVENT(mm_vmscan_lru_isolate, - __field(unsigned long, nr_scanned) - __field(unsigned long, nr_skipped) - __field(unsigned long, nr_taken) -- __field(isolate_mode_t, isolate_mode) -+ __field(unsigned int, isolate_mode) - __field(int, lru) - ), - -@@ -294,7 +294,7 @@ TRACE_EVENT(mm_vmscan_lru_isolate, - __entry->nr_scanned = nr_scanned; - __entry->nr_skipped = nr_skipped; - __entry->nr_taken = nr_taken; -- __entry->isolate_mode = isolate_mode; -+ __entry->isolate_mode = (__force unsigned int)isolate_mode; - __entry->lru = lru; - ), - -diff --git a/include/trace/perf.h b/include/trace/perf.h -index dbc6c74defc38..5d48c46a30083 100644 ---- a/include/trace/perf.h -+++ b/include/trace/perf.h -@@ -21,6 +21,23 @@ - #undef __get_bitmask - #define __get_bitmask(field) (char *)__get_dynamic_array(field) - -+#undef __get_rel_dynamic_array -+#define __get_rel_dynamic_array(field) \ -+ ((void *)__entry + \ -+ offsetof(typeof(*__entry), __rel_loc_##field) + \ -+ sizeof(__entry->__rel_loc_##field) + \ -+ (__entry->__rel_loc_##field & 0xffff)) -+ -+#undef __get_rel_dynamic_array_len -+#define __get_rel_dynamic_array_len(field) \ -+ ((__entry->__rel_loc_##field >> 16) & 0xffff) -+ -+#undef __get_rel_str -+#define __get_rel_str(field) ((char *)__get_rel_dynamic_array(field)) -+ -+#undef __get_rel_bitmask -+#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field) -+ - #undef __perf_count - #define __perf_count(c) (__count = (c)) - -diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h -index 08810a4638805..a77b690709cc1 100644 ---- a/include/trace/trace_events.h -+++ b/include/trace/trace_events.h -@@ -108,6 +108,18 @@ TRACE_MAKE_SYSTEM_STR(); - #undef __bitmask - #define __bitmask(item, nr_bits) __dynamic_array(char, item, -1) - -+#undef __rel_dynamic_array -+#define __rel_dynamic_array(type, item, len) u32 __rel_loc_##item; -+ -+#undef __rel_string -+#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) -+ -+#undef __rel_string_len -+#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) -+ -+#undef __rel_bitmask -+#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(char, item, -1) -+ - #undef TP_STRUCT__entry - #define TP_STRUCT__entry(args...) args - -@@ -116,7 +128,7 @@ TRACE_MAKE_SYSTEM_STR(); - struct trace_event_raw_##name { \ - struct trace_entry ent; \ - tstruct \ -- char __data[0]; \ -+ char __data[]; \ - }; \ - \ - static struct trace_event_class event_class_##name; -@@ -200,11 +212,23 @@ TRACE_MAKE_SYSTEM_STR(); - #undef __string - #define __string(item, src) __dynamic_array(char, item, -1) - -+#undef __bitmask -+#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) -+ - #undef __string_len - #define __string_len(item, src, len) __dynamic_array(char, item, -1) - --#undef __bitmask --#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) -+#undef __rel_dynamic_array -+#define __rel_dynamic_array(type, item, len) u32 item; -+ -+#undef __rel_string -+#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) -+ -+#undef __rel_string_len -+#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) -+ -+#undef __rel_bitmask -+#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) - - #undef DECLARE_EVENT_CLASS - #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ -@@ -293,6 +317,20 @@ TRACE_MAKE_SYSTEM_STR(); - #undef __get_str - #define __get_str(field) ((char *)__get_dynamic_array(field)) - -+#undef __get_rel_dynamic_array -+#define __get_rel_dynamic_array(field) \ -+ ((void *)__entry + \ -+ offsetof(typeof(*__entry), __rel_loc_##field) + \ -+ sizeof(__entry->__rel_loc_##field) + \ -+ (__entry->__rel_loc_##field & 0xffff)) -+ -+#undef __get_rel_dynamic_array_len -+#define __get_rel_dynamic_array_len(field) \ -+ ((__entry->__rel_loc_##field >> 16) & 0xffff) -+ -+#undef __get_rel_str -+#define __get_rel_str(field) ((char *)__get_rel_dynamic_array(field)) -+ - #undef __get_bitmask - #define __get_bitmask(field) \ - ({ \ -@@ -302,6 +340,15 @@ TRACE_MAKE_SYSTEM_STR(); - trace_print_bitmask_seq(p, __bitmask, __bitmask_size); \ - }) - -+#undef __get_rel_bitmask -+#define __get_rel_bitmask(field) \ -+ ({ \ -+ void *__bitmask = __get_rel_dynamic_array(field); \ -+ unsigned int __bitmask_size; \ -+ __bitmask_size = __get_rel_dynamic_array_len(field); \ -+ trace_print_bitmask_seq(p, __bitmask, __bitmask_size); \ -+ }) -+ - #undef __print_flags - #define __print_flags(flag, delim, flag_array...) \ - ({ \ -@@ -432,16 +479,18 @@ static struct trace_event_functions trace_event_type_funcs_##call = { \ - - #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) - -+#define ALIGN_STRUCTFIELD(type) ((int)(offsetof(struct {char a; type b;}, b))) -+ - #undef __field_ext - #define __field_ext(_type, _item, _filter_type) { \ - .type = #_type, .name = #_item, \ -- .size = sizeof(_type), .align = __alignof__(_type), \ -+ .size = sizeof(_type), .align = ALIGN_STRUCTFIELD(_type), \ - .is_signed = is_signed_type(_type), .filter_type = _filter_type }, - - #undef __field_struct_ext - #define __field_struct_ext(_type, _item, _filter_type) { \ - .type = #_type, .name = #_item, \ -- .size = sizeof(_type), .align = __alignof__(_type), \ -+ .size = sizeof(_type), .align = ALIGN_STRUCTFIELD(_type), \ - 0, .filter_type = _filter_type }, - - #undef __field -@@ -453,7 +502,7 @@ static struct trace_event_functions trace_event_type_funcs_##call = { \ - #undef __array - #define __array(_type, _item, _len) { \ - .type = #_type"["__stringify(_len)"]", .name = #_item, \ -- .size = sizeof(_type[_len]), .align = __alignof__(_type), \ -+ .size = sizeof(_type[_len]), .align = ALIGN_STRUCTFIELD(_type), \ - .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }, - - #undef __dynamic_array -@@ -471,6 +520,21 @@ static struct trace_event_functions trace_event_type_funcs_##call = { \ - #undef __bitmask - #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) - -+#undef __rel_dynamic_array -+#define __rel_dynamic_array(_type, _item, _len) { \ -+ .type = "__rel_loc " #_type "[]", .name = #_item, \ -+ .size = 4, .align = 4, \ -+ .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }, -+ -+#undef __rel_string -+#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) -+ -+#undef __rel_string_len -+#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) -+ -+#undef __rel_bitmask -+#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) -+ - #undef DECLARE_EVENT_CLASS - #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \ - static struct trace_event_fields trace_event_fields_##call[] = { \ -@@ -519,6 +583,22 @@ static struct trace_event_fields trace_event_fields_##call[] = { \ - #undef __string_len - #define __string_len(item, src, len) __dynamic_array(char, item, (len) + 1) - -+#undef __rel_dynamic_array -+#define __rel_dynamic_array(type, item, len) \ -+ __item_length = (len) * sizeof(type); \ -+ __data_offsets->item = __data_size + \ -+ offsetof(typeof(*entry), __data) - \ -+ offsetof(typeof(*entry), __rel_loc_##item) - \ -+ sizeof(u32); \ -+ __data_offsets->item |= __item_length << 16; \ -+ __data_size += __item_length; -+ -+#undef __rel_string -+#define __rel_string(item, src) __rel_dynamic_array(char, item, \ -+ strlen((src) ? (const char *)(src) : "(null)") + 1) -+ -+#undef __rel_string_len -+#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, (len) + 1) - /* - * __bitmask_size_in_bytes_raw is the number of bytes needed to hold - * num_possible_cpus(). -@@ -542,6 +622,10 @@ static struct trace_event_fields trace_event_fields_##call[] = { \ - #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, \ - __bitmask_size_in_longs(nr_bits)) - -+#undef __rel_bitmask -+#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, \ -+ __bitmask_size_in_longs(nr_bits)) -+ - #undef DECLARE_EVENT_CLASS - #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ - static inline notrace int trace_event_get_offsets_##call( \ -@@ -706,6 +790,37 @@ static inline notrace int trace_event_get_offsets_##call( \ - #define __assign_bitmask(dst, src, nr_bits) \ - memcpy(__get_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits)) - -+#undef __rel_dynamic_array -+#define __rel_dynamic_array(type, item, len) \ -+ __entry->__rel_loc_##item = __data_offsets.item; -+ -+#undef __rel_string -+#define __rel_string(item, src) __rel_dynamic_array(char, item, -1) -+ -+#undef __rel_string_len -+#define __rel_string_len(item, src, len) __rel_dynamic_array(char, item, -1) -+ -+#undef __assign_rel_str -+#define __assign_rel_str(dst, src) \ -+ strcpy(__get_rel_str(dst), (src) ? (const char *)(src) : "(null)"); -+ -+#undef __assign_rel_str_len -+#define __assign_rel_str_len(dst, src, len) \ -+ do { \ -+ memcpy(__get_rel_str(dst), (src), (len)); \ -+ __get_rel_str(dst)[len] = '\0'; \ -+ } while (0) -+ -+#undef __rel_bitmask -+#define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1) -+ -+#undef __get_rel_bitmask -+#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field) -+ -+#undef __assign_rel_bitmask -+#define __assign_rel_bitmask(dst, src, nr_bits) \ -+ memcpy(__get_rel_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits)) -+ - #undef TP_fast_assign - #define TP_fast_assign(args...) args - -@@ -770,6 +885,10 @@ static inline void ftrace_test_probe_##call(void) \ - #undef __get_dynamic_array_len - #undef __get_str - #undef __get_bitmask -+#undef __get_rel_dynamic_array -+#undef __get_rel_dynamic_array_len -+#undef __get_rel_str -+#undef __get_rel_bitmask - #undef __print_array - #undef __print_hex_dump - -diff --git a/include/uapi/asm-generic/poll.h b/include/uapi/asm-generic/poll.h -index 41b509f410bf9..f9c520ce4bf4e 100644 ---- a/include/uapi/asm-generic/poll.h -+++ b/include/uapi/asm-generic/poll.h -@@ -29,7 +29,7 @@ - #define POLLRDHUP 0x2000 - #endif - --#define POLLFREE (__force __poll_t)0x4000 /* currently only for epoll */ -+#define POLLFREE (__force __poll_t)0x4000 - - #define POLL_BUSY_LOOP (__force __poll_t)0x8000 - -diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h -index 3ba180f550d7c..ffbe4cec9f32d 100644 ---- a/include/uapi/asm-generic/siginfo.h -+++ b/include/uapi/asm-generic/siginfo.h -@@ -99,6 +99,7 @@ union __sifields { - struct { - unsigned long _data; - __u32 _type; -+ __u32 _flags; - } _perf; - }; - } _sigfault; -@@ -164,6 +165,7 @@ typedef struct siginfo { - #define si_pkey _sifields._sigfault._addr_pkey._pkey - #define si_perf_data _sifields._sigfault._perf._data - #define si_perf_type _sifields._sigfault._perf._type -+#define si_perf_flags _sifields._sigfault._perf._flags - #define si_band _sifields._sigpoll._band - #define si_fd _sifields._sigpoll._fd - #define si_call_addr _sifields._sigsys._call_addr -@@ -270,6 +272,11 @@ typedef struct siginfo { - * that are of the form: ((PTRACE_EVENT_XXX << 8) | SIGTRAP) - */ - -+/* -+ * Flags for si_perf_flags if SIGTRAP si_code is TRAP_PERF. -+ */ -+#define TRAP_PERF_FLAG_ASYNC (1u << 0) -+ - /* - * SIGCHLD si_codes - */ -diff --git a/include/uapi/asm-generic/signal-defs.h b/include/uapi/asm-generic/signal-defs.h -index fe929e7b77ca1..7572f2f46ee89 100644 ---- a/include/uapi/asm-generic/signal-defs.h -+++ b/include/uapi/asm-generic/signal-defs.h -@@ -45,6 +45,7 @@ - #define SA_UNSUPPORTED 0x00000400 - #define SA_EXPOSE_TAGBITS 0x00000800 - /* 0x00010000 used on mips */ -+/* 0x00800000 used for internal SA_IMMUTABLE */ - /* 0x01000000 used on x86 */ - /* 0x02000000 used on x86 */ - /* -diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h -index 9f4bb4a6f358c..808c73c52820f 100644 ---- a/include/uapi/drm/drm_fourcc.h -+++ b/include/uapi/drm/drm_fourcc.h -@@ -1352,11 +1352,11 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) - #define AMD_FMT_MOD_PIPE_MASK 0x7 - - #define AMD_FMT_MOD_SET(field, value) \ -- ((uint64_t)(value) << AMD_FMT_MOD_##field##_SHIFT) -+ ((__u64)(value) << AMD_FMT_MOD_##field##_SHIFT) - #define AMD_FMT_MOD_GET(field, value) \ - (((value) >> AMD_FMT_MOD_##field##_SHIFT) & AMD_FMT_MOD_##field##_MASK) - #define AMD_FMT_MOD_CLEAR(field) \ -- (~((uint64_t)AMD_FMT_MOD_##field##_MASK << AMD_FMT_MOD_##field##_SHIFT)) -+ (~((__u64)AMD_FMT_MOD_##field##_MASK << AMD_FMT_MOD_##field##_SHIFT)) - - #if defined(__cplusplus) - } -diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h -index 791f31dd0abee..a887e582f0e78 100644 ---- a/include/uapi/linux/bpf.h -+++ b/include/uapi/linux/bpf.h -@@ -2276,8 +2276,8 @@ union bpf_attr { - * Return - * The return value depends on the result of the test, and can be: - * -- * * 0, if current task belongs to the cgroup2. -- * * 1, if current task does not belong to the cgroup2. -+ * * 1, if current task belongs to the cgroup2. -+ * * 0, if current task does not belong to the cgroup2. - * * A negative error code, if an error occurred. - * - * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) -@@ -2965,8 +2965,8 @@ union bpf_attr { - * - * # sysctl kernel.perf_event_max_stack=<new value> - * Return -- * A non-negative value equal to or less than *size* on success, -- * or a negative error in case of failure. -+ * The non-negative copied *buf* length equal to or less than -+ * *size* on success, or a negative error in case of failure. - * - * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header) - * Description -@@ -4269,8 +4269,8 @@ union bpf_attr { - * - * # sysctl kernel.perf_event_max_stack=<new value> - * Return -- * A non-negative value equal to or less than *size* on success, -- * or a negative error in case of failure. -+ * The non-negative copied *buf* length equal to or less than -+ * *size* on success, or a negative error in case of failure. - * - * long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags) - * Description -@@ -5347,7 +5347,8 @@ struct bpf_sock { - __u32 src_ip4; - __u32 src_ip6[4]; - __u32 src_port; /* host byte order */ -- __u32 dst_port; /* network byte order */ -+ __be16 dst_port; /* network byte order */ -+ __u16 :16; /* zero padding */ - __u32 dst_ip4; - __u32 dst_ip6[4]; - __u32 state; -@@ -6222,7 +6223,8 @@ struct bpf_sk_lookup { - __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */ - __u32 remote_ip4; /* Network byte order */ - __u32 remote_ip6[4]; /* Network byte order */ -- __u32 remote_port; /* Network byte order */ -+ __be16 remote_port; /* Network byte order */ -+ __u16 :16; /* Zero padding */ - __u32 local_ip4; /* Network byte order */ - __u32 local_ip6[4]; /* Network byte order */ - __u32 local_port; /* Host byte order */ -diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h -index e1c4c732aabac..5416f1f1a77a8 100644 ---- a/include/uapi/linux/btrfs_tree.h -+++ b/include/uapi/linux/btrfs_tree.h -@@ -146,7 +146,9 @@ - - /* - * dir items are the name -> inode pointers in a directory. There is one -- * for every name in a directory. -+ * for every name in a directory. BTRFS_DIR_LOG_ITEM_KEY is no longer used -+ * but it's still defined here for documentation purposes and to help avoid -+ * having its numerical value reused in the future. - */ - #define BTRFS_DIR_LOG_ITEM_KEY 60 - #define BTRFS_DIR_LOG_INDEX_KEY 72 -diff --git a/include/uapi/linux/byteorder/big_endian.h b/include/uapi/linux/byteorder/big_endian.h -index 2199adc6a6c20..80aa5c41a7636 100644 ---- a/include/uapi/linux/byteorder/big_endian.h -+++ b/include/uapi/linux/byteorder/big_endian.h -@@ -9,6 +9,7 @@ - #define __BIG_ENDIAN_BITFIELD - #endif - -+#include <linux/stddef.h> - #include <linux/types.h> - #include <linux/swab.h> - -diff --git a/include/uapi/linux/byteorder/little_endian.h b/include/uapi/linux/byteorder/little_endian.h -index 601c904fd5cd9..cd98982e7523e 100644 ---- a/include/uapi/linux/byteorder/little_endian.h -+++ b/include/uapi/linux/byteorder/little_endian.h -@@ -9,6 +9,7 @@ - #define __LITTLE_ENDIAN_BITFIELD - #endif - -+#include <linux/stddef.h> - #include <linux/types.h> - #include <linux/swab.h> - -diff --git a/include/uapi/linux/can/error.h b/include/uapi/linux/can/error.h -index 34633283de641..a1000cb630632 100644 ---- a/include/uapi/linux/can/error.h -+++ b/include/uapi/linux/can/error.h -@@ -120,6 +120,9 @@ - #define CAN_ERR_TRX_CANL_SHORT_TO_GND 0x70 /* 0111 0000 */ - #define CAN_ERR_TRX_CANL_SHORT_TO_CANH 0x80 /* 1000 0000 */ - --/* controller specific additional information / data[5..7] */ -+/* data[5] is reserved (do not use) */ -+ -+/* TX error counter / data[6] */ -+/* RX error counter / data[7] */ - - #endif /* _UAPI_CAN_ERROR_H */ -diff --git a/include/uapi/linux/can/isotp.h b/include/uapi/linux/can/isotp.h -index c55935b64ccc8..590f8aea2b6d2 100644 ---- a/include/uapi/linux/can/isotp.h -+++ b/include/uapi/linux/can/isotp.h -@@ -137,20 +137,16 @@ struct can_isotp_ll_options { - #define CAN_ISOTP_WAIT_TX_DONE 0x400 /* wait for tx completion */ - #define CAN_ISOTP_SF_BROADCAST 0x800 /* 1-to-N functional addressing */ - --/* default values */ -+/* protocol machine default values */ - - #define CAN_ISOTP_DEFAULT_FLAGS 0 - #define CAN_ISOTP_DEFAULT_EXT_ADDRESS 0x00 - #define CAN_ISOTP_DEFAULT_PAD_CONTENT 0xCC /* prevent bit-stuffing */ --#define CAN_ISOTP_DEFAULT_FRAME_TXTIME 0 -+#define CAN_ISOTP_DEFAULT_FRAME_TXTIME 50000 /* 50 micro seconds */ - #define CAN_ISOTP_DEFAULT_RECV_BS 0 - #define CAN_ISOTP_DEFAULT_RECV_STMIN 0x00 - #define CAN_ISOTP_DEFAULT_RECV_WFTMAX 0 - --#define CAN_ISOTP_DEFAULT_LL_MTU CAN_MTU --#define CAN_ISOTP_DEFAULT_LL_TX_DL CAN_MAX_DLEN --#define CAN_ISOTP_DEFAULT_LL_TX_FLAGS 0 +-cleanup: +- spin_lock(q.lock_ptr); +- /* +- * If we failed to acquire the lock (deadlock/signal/timeout), we must +- * first acquire the hb->lock before removing the lock from the +- * rt_mutex waitqueue, such that we can keep the hb and rt_mutex wait +- * lists consistent. +- * +- * In particular; it is important that futex_unlock_pi() can not +- * observe this inconsistency. +- */ +- if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter)) +- ret = 0; +- +-no_block: +- /* +- * Fixup the pi_state owner and possibly acquire the lock if we +- * haven't already. +- */ +- res = fixup_owner(uaddr, &q, !ret); +- /* +- * If fixup_owner() returned an error, propagate that. If it acquired +- * the lock, clear our -ETIMEDOUT or -EINTR. +- */ +- if (res) +- ret = (res < 0) ? res : 0; - - /* - * Remark on CAN_ISOTP_DEFAULT_RECV_* values: - * -@@ -162,4 +158,24 @@ struct can_isotp_ll_options { - * consistency and copied directly into the flow control (FC) frame. - */ - -+/* link layer default values => make use of Classical CAN frames */ -+ -+#define CAN_ISOTP_DEFAULT_LL_MTU CAN_MTU -+#define CAN_ISOTP_DEFAULT_LL_TX_DL CAN_MAX_DLEN -+#define CAN_ISOTP_DEFAULT_LL_TX_FLAGS 0 -+ -+/* -+ * The CAN_ISOTP_DEFAULT_FRAME_TXTIME has become a non-zero value as -+ * it only makes sense for isotp implementation tests to run without -+ * a N_As value. As user space applications usually do not set the -+ * frame_txtime element of struct can_isotp_options the new in-kernel -+ * default is very likely overwritten with zero when the sockopt() -+ * CAN_ISOTP_OPTS is invoked. -+ * To make sure that a N_As value of zero is only set intentional the -+ * value '0' is now interpreted as 'do not change the current value'. -+ * When a frame_txtime of zero is required for testing purposes this -+ * CAN_ISOTP_FRAME_TXTIME_ZERO u32 value has to be set in frame_txtime. -+ */ -+#define CAN_ISOTP_FRAME_TXTIME_ZERO 0xFFFFFFFF -+ - #endif /* !_UAPI_CAN_ISOTP_H */ -diff --git a/include/uapi/linux/cyclades.h b/include/uapi/linux/cyclades.h -new file mode 100644 -index 0000000000000..6225c5aebe06a ---- /dev/null -+++ b/include/uapi/linux/cyclades.h -@@ -0,0 +1,35 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+ -+#ifndef _UAPI_LINUX_CYCLADES_H -+#define _UAPI_LINUX_CYCLADES_H -+ -+#warning "Support for features provided by this header has been removed" -+#warning "Please consider updating your code" -+ -+struct cyclades_monitor { -+ unsigned long int_count; -+ unsigned long char_count; -+ unsigned long char_max; -+ unsigned long char_last; -+}; -+ -+#define CYGETMON 0x435901 -+#define CYGETTHRESH 0x435902 -+#define CYSETTHRESH 0x435903 -+#define CYGETDEFTHRESH 0x435904 -+#define CYSETDEFTHRESH 0x435905 -+#define CYGETTIMEOUT 0x435906 -+#define CYSETTIMEOUT 0x435907 -+#define CYGETDEFTIMEOUT 0x435908 -+#define CYSETDEFTIMEOUT 0x435909 -+#define CYSETRFLOW 0x43590a -+#define CYGETRFLOW 0x43590b -+#define CYSETRTSDTR_INV 0x43590c -+#define CYGETRTSDTR_INV 0x43590d -+#define CYZSETPOLLCYCLE 0x43590e -+#define CYZGETPOLLCYCLE 0x43590f -+#define CYGETCD1400VER 0x435910 -+#define CYSETWAIT 0x435912 -+#define CYGETWAIT 0x435913 -+ -+#endif /* _UAPI_LINUX_CYCLADES_H */ -diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h -index 8e4a2ca0bcbf7..b1523cb8ab307 100644 ---- a/include/uapi/linux/dma-buf.h -+++ b/include/uapi/linux/dma-buf.h -@@ -92,7 +92,7 @@ struct dma_buf_sync { - * between them in actual uapi, they're just different numbers. - */ - #define DMA_BUF_SET_NAME _IOW(DMA_BUF_BASE, 1, const char *) --#define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, u32) --#define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, u64) -+#define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, __u32) -+#define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, __u64) - - #endif -diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h -index 5545f1ca9237c..f7204bdfe8db1 100644 ---- a/include/uapi/linux/ethtool_netlink.h -+++ b/include/uapi/linux/ethtool_netlink.h -@@ -407,7 +407,9 @@ enum { - ETHTOOL_A_PAUSE_STAT_TX_FRAMES, - ETHTOOL_A_PAUSE_STAT_RX_FRAMES, - -- /* add new constants above here */ -+ /* add new constants above here -+ * adjust ETHTOOL_PAUSE_STAT_CNT if adding non-stats! -+ */ - __ETHTOOL_A_PAUSE_STAT_CNT, - ETHTOOL_A_PAUSE_STAT_MAX = (__ETHTOOL_A_PAUSE_STAT_CNT - 1) - }; -diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h -index 225ec87d4f228..7989d9483ea75 100644 ---- a/include/uapi/linux/input-event-codes.h -+++ b/include/uapi/linux/input-event-codes.h -@@ -278,7 +278,8 @@ - #define KEY_PAUSECD 201 - #define KEY_PROG3 202 - #define KEY_PROG4 203 --#define KEY_DASHBOARD 204 /* AL Dashboard */ -+#define KEY_ALL_APPLICATIONS 204 /* AC Desktop Show All Applications */ -+#define KEY_DASHBOARD KEY_ALL_APPLICATIONS - #define KEY_SUSPEND 205 - #define KEY_CLOSE 206 /* AC Close */ - #define KEY_PLAY 207 -@@ -612,6 +613,7 @@ - #define KEY_ASSISTANT 0x247 /* AL Context-aware desktop assistant */ - #define KEY_KBD_LAYOUT_NEXT 0x248 /* AC Next Keyboard Layout Select */ - #define KEY_EMOJI_PICKER 0x249 /* Show/hide emoji picker (HUTRR101) */ -+#define KEY_DICTATE 0x24a /* Start or Stop Voice Dictation Session (HUTRR99) */ - - #define KEY_BRIGHTNESS_MIN 0x250 /* Set Brightness to Minimum */ - #define KEY_BRIGHTNESS_MAX 0x251 /* Set Brightness to Maximum */ -diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h -index b3d952067f59c..21c8d58283c9e 100644 ---- a/include/uapi/linux/landlock.h -+++ b/include/uapi/linux/landlock.h -@@ -33,7 +33,9 @@ struct landlock_ruleset_attr { - * - %LANDLOCK_CREATE_RULESET_VERSION: Get the highest supported Landlock ABI - * version. - */ -+/* clang-format off */ - #define LANDLOCK_CREATE_RULESET_VERSION (1U << 0) -+/* clang-format on */ - - /** - * enum landlock_rule_type - Landlock rule type -@@ -60,8 +62,9 @@ struct landlock_path_beneath_attr { - */ - __u64 allowed_access; - /** -- * @parent_fd: File descriptor, open with ``O_PATH``, which identifies -- * the parent directory of a file hierarchy, or just a file. -+ * @parent_fd: File descriptor, preferably opened with ``O_PATH``, -+ * which identifies the parent directory of a file hierarchy, or just a -+ * file. - */ - __s32 parent_fd; - /* -@@ -120,6 +123,7 @@ struct landlock_path_beneath_attr { - * :manpage:`access(2)`. - * Future Landlock evolutions will enable to restrict them. - */ -+/* clang-format off */ - #define LANDLOCK_ACCESS_FS_EXECUTE (1ULL << 0) - #define LANDLOCK_ACCESS_FS_WRITE_FILE (1ULL << 1) - #define LANDLOCK_ACCESS_FS_READ_FILE (1ULL << 2) -@@ -133,5 +137,6 @@ struct landlock_path_beneath_attr { - #define LANDLOCK_ACCESS_FS_MAKE_FIFO (1ULL << 10) - #define LANDLOCK_ACCESS_FS_MAKE_BLOCK (1ULL << 11) - #define LANDLOCK_ACCESS_FS_MAKE_SYM (1ULL << 12) -+/* clang-format on */ - - #endif /* _UAPI_LINUX_LANDLOCK_H */ -diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h -index f66038b9551fa..80c40194e2977 100644 ---- a/include/uapi/linux/mptcp.h -+++ b/include/uapi/linux/mptcp.h -@@ -129,19 +129,21 @@ struct mptcp_info { - * MPTCP_EVENT_REMOVED: token, rem_id - * An address has been lost by the peer. - * -- * MPTCP_EVENT_SUB_ESTABLISHED: token, family, saddr4 | saddr6, -- * daddr4 | daddr6, sport, dport, backup, -- * if_idx [, error] -+ * MPTCP_EVENT_SUB_ESTABLISHED: token, family, loc_id, rem_id, -+ * saddr4 | saddr6, daddr4 | daddr6, sport, -+ * dport, backup, if_idx [, error] - * A new subflow has been established. 'error' should not be set. - * -- * MPTCP_EVENT_SUB_CLOSED: token, family, saddr4 | saddr6, daddr4 | daddr6, -- * sport, dport, backup, if_idx [, error] -+ * MPTCP_EVENT_SUB_CLOSED: token, family, loc_id, rem_id, saddr4 | saddr6, -+ * daddr4 | daddr6, sport, dport, backup, if_idx -+ * [, error] - * A subflow has been closed. An error (copy of sk_err) could be set if an - * error has been detected for this subflow. - * -- * MPTCP_EVENT_SUB_PRIORITY: token, family, saddr4 | saddr6, daddr4 | daddr6, -- * sport, dport, backup, if_idx [, error] -- * The priority of a subflow has changed. 'error' should not be set. -+ * MPTCP_EVENT_SUB_PRIORITY: token, family, loc_id, rem_id, saddr4 | saddr6, -+ * daddr4 | daddr6, sport, dport, backup, if_idx -+ * [, error] -+ * The priority of a subflow has changed. 'error' should not be set. - */ - enum mptcp_event_type { - MPTCP_EVENT_UNSPEC = 0, -diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h -index 4b3395082d15c..26071021e986f 100644 ---- a/include/uapi/linux/netfilter/nf_conntrack_common.h -+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h -@@ -106,7 +106,7 @@ enum ip_conntrack_status { - IPS_NAT_CLASH = IPS_UNTRACKED, - #endif - -- /* Conntrack got a helper explicitly attached via CT target. */ -+ /* Conntrack got a helper explicitly attached (ruleset, ctnetlink). */ - IPS_HELPER_BIT = 13, - IPS_HELPER = (1 << IPS_HELPER_BIT), - -diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h -index e94d1fa554cb2..07871c8a06014 100644 ---- a/include/uapi/linux/netfilter/nf_tables.h -+++ b/include/uapi/linux/netfilter/nf_tables.h -@@ -753,11 +753,13 @@ enum nft_dynset_attributes { - * @NFT_PAYLOAD_LL_HEADER: link layer header - * @NFT_PAYLOAD_NETWORK_HEADER: network header - * @NFT_PAYLOAD_TRANSPORT_HEADER: transport header -+ * @NFT_PAYLOAD_INNER_HEADER: inner header / payload - */ - enum nft_payload_bases { - NFT_PAYLOAD_LL_HEADER, - NFT_PAYLOAD_NETWORK_HEADER, - NFT_PAYLOAD_TRANSPORT_HEADER, -+ NFT_PAYLOAD_INNER_HEADER, - }; - - /** -diff --git a/include/uapi/linux/netfilter/xt_IDLETIMER.h b/include/uapi/linux/netfilter/xt_IDLETIMER.h -index 49ddcdc61c094..7bfb31a66fc9b 100644 ---- a/include/uapi/linux/netfilter/xt_IDLETIMER.h -+++ b/include/uapi/linux/netfilter/xt_IDLETIMER.h -@@ -1,6 +1,5 @@ -+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ - /* -- * linux/include/linux/netfilter/xt_IDLETIMER.h +- unqueue_me_pi(&q); +- spin_unlock(q.lock_ptr); +- goto out; +- +-out_unlock_put_key: +- queue_unlock(hb); +- +-out: +- if (to) { +- hrtimer_cancel(&to->timer); +- destroy_hrtimer_on_stack(&to->timer); +- } +- return ret != -EINTR ? ret : -ERESTARTNOINTR; +- +-uaddr_faulted: +- queue_unlock(hb); +- +- ret = fault_in_user_writeable(uaddr); +- if (ret) +- goto out; +- +- if (!(flags & FLAGS_SHARED)) +- goto retry_private; +- +- goto retry; +-} +- +-/* +- * Userspace attempted a TID -> 0 atomic transition, and failed. +- * This is the in-kernel slowpath: we look up the PI state (if any), +- * and do the rt-mutex unlock. +- */ +-static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) +-{ +- u32 curval, uval, vpid = task_pid_vnr(current); +- union futex_key key = FUTEX_KEY_INIT; +- struct futex_hash_bucket *hb; +- struct futex_q *top_waiter; +- int ret; +- +- if (!IS_ENABLED(CONFIG_FUTEX_PI)) +- return -ENOSYS; +- +-retry: +- if (get_user(uval, uaddr)) +- return -EFAULT; +- /* +- * We release only a lock we actually own: +- */ +- if ((uval & FUTEX_TID_MASK) != vpid) +- return -EPERM; +- +- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_WRITE); +- if (ret) +- return ret; +- +- hb = hash_futex(&key); +- spin_lock(&hb->lock); +- +- /* +- * Check waiters first. We do not trust user space values at +- * all and we at least want to know if user space fiddled +- * with the futex value instead of blindly unlocking. +- */ +- top_waiter = futex_top_waiter(hb, &key); +- if (top_waiter) { +- struct futex_pi_state *pi_state = top_waiter->pi_state; +- +- ret = -EINVAL; +- if (!pi_state) +- goto out_unlock; +- +- /* +- * If current does not own the pi_state then the futex is +- * inconsistent and user space fiddled with the futex value. +- */ +- if (pi_state->owner != current) +- goto out_unlock; +- +- get_pi_state(pi_state); +- /* +- * By taking wait_lock while still holding hb->lock, we ensure +- * there is no point where we hold neither; and therefore +- * wake_futex_pi() must observe a state consistent with what we +- * observed. +- * +- * In particular; this forces __rt_mutex_start_proxy() to +- * complete such that we're guaranteed to observe the +- * rt_waiter. Also see the WARN in wake_futex_pi(). +- */ +- raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); +- spin_unlock(&hb->lock); +- +- /* drops pi_state->pi_mutex.wait_lock */ +- ret = wake_futex_pi(uaddr, uval, pi_state); +- +- put_pi_state(pi_state); +- +- /* +- * Success, we're done! No tricky corner cases. +- */ +- if (!ret) +- return ret; +- /* +- * The atomic access to the futex value generated a +- * pagefault, so retry the user-access and the wakeup: +- */ +- if (ret == -EFAULT) +- goto pi_faulted; +- /* +- * A unconditional UNLOCK_PI op raced against a waiter +- * setting the FUTEX_WAITERS bit. Try again. +- */ +- if (ret == -EAGAIN) +- goto pi_retry; +- /* +- * wake_futex_pi has detected invalid state. Tell user +- * space. +- */ +- return ret; +- } +- +- /* +- * We have no kernel internal state, i.e. no waiters in the +- * kernel. Waiters which are about to queue themselves are stuck +- * on hb->lock. So we can safely ignore them. We do neither +- * preserve the WAITERS bit not the OWNER_DIED one. We are the +- * owner. +- */ +- if ((ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, 0))) { +- spin_unlock(&hb->lock); +- switch (ret) { +- case -EFAULT: +- goto pi_faulted; +- +- case -EAGAIN: +- goto pi_retry; +- +- default: +- WARN_ON_ONCE(1); +- return ret; +- } +- } +- +- /* +- * If uval has changed, let user space handle it. +- */ +- ret = (curval == uval) ? 0 : -EAGAIN; +- +-out_unlock: +- spin_unlock(&hb->lock); +- return ret; +- +-pi_retry: +- cond_resched(); +- goto retry; +- +-pi_faulted: +- +- ret = fault_in_user_writeable(uaddr); +- if (!ret) +- goto retry; +- +- return ret; +-} +- +-/** +- * handle_early_requeue_pi_wakeup() - Handle early wakeup on the initial futex +- * @hb: the hash_bucket futex_q was original enqueued on +- * @q: the futex_q woken while waiting to be requeued +- * @timeout: the timeout associated with the wait (NULL if none) - * - * Header file for Xtables timer target module. - * - * Copyright (C) 2004, 2010 Nokia Corporation -@@ -10,20 +9,6 @@ - * by Luciano Coelho <luciano.coelho@nokia.com> - * - * Contact: Luciano Coelho <luciano.coelho@nokia.com> +- * Determine the cause for the early wakeup. - * -- * This program is free software; you can redistribute it and/or -- * modify it under the terms of the GNU General Public License -- * version 2 as published by the Free Software Foundation. +- * Return: +- * -EWOULDBLOCK or -ETIMEDOUT or -ERESTARTNOINTR +- */ +-static inline +-int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, +- struct futex_q *q, +- struct hrtimer_sleeper *timeout) +-{ +- int ret; +- +- /* +- * With the hb lock held, we avoid races while we process the wakeup. +- * We only need to hold hb (and not hb2) to ensure atomicity as the +- * wakeup code can't change q.key from uaddr to uaddr2 if we hold hb. +- * It can't be requeued from uaddr2 to something else since we don't +- * support a PI aware source futex for requeue. +- */ +- WARN_ON_ONCE(&hb->lock != q->lock_ptr); +- +- /* +- * We were woken prior to requeue by a timeout or a signal. +- * Unqueue the futex_q and determine which it was. +- */ +- plist_del(&q->list, &hb->chain); +- hb_waiters_dec(hb); +- +- /* Handle spurious wakeups gracefully */ +- ret = -EWOULDBLOCK; +- if (timeout && !timeout->task) +- ret = -ETIMEDOUT; +- else if (signal_pending(current)) +- ret = -ERESTARTNOINTR; +- return ret; +-} +- +-/** +- * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2 +- * @uaddr: the futex we initially wait on (non-pi) +- * @flags: futex flags (FLAGS_SHARED, FLAGS_CLOCKRT, etc.), they must be +- * the same type, no requeueing from private to shared, etc. +- * @val: the expected value of uaddr +- * @abs_time: absolute timeout +- * @bitset: 32 bit wakeup bitset set by userspace, defaults to all +- * @uaddr2: the pi futex we will take prior to returning to user-space - * -- * This program is distributed in the hope that it will be useful, but -- * WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- * General Public License for more details. +- * The caller will wait on uaddr and will be requeued by futex_requeue() to +- * uaddr2 which must be PI aware and unique from uaddr. Normal wakeup will wake +- * on uaddr2 and complete the acquisition of the rt_mutex prior to returning to +- * userspace. This ensures the rt_mutex maintains an owner when it has waiters; +- * without one, the pi logic would not know which task to boost/deboost, if +- * there was a need to. - * -- * You should have received a copy of the GNU General Public License -- * along with this program; if not, write to the Free Software -- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -- * 02110-1301 USA - */ - - #ifndef _XT_IDLETIMER_H -diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h -index f6e3c8c9c7449..4fa4e979e948a 100644 ---- a/include/uapi/linux/nfc.h -+++ b/include/uapi/linux/nfc.h -@@ -263,7 +263,7 @@ enum nfc_sdp_attr { - #define NFC_SE_ENABLED 0x1 - - struct sockaddr_nfc { -- sa_family_t sa_family; -+ __kernel_sa_family_t sa_family; - __u32 dev_idx; - __u32 target_idx; - __u32 nfc_protocol; -@@ -271,14 +271,14 @@ struct sockaddr_nfc { - - #define NFC_LLCP_MAX_SERVICE_NAME 63 - struct sockaddr_nfc_llcp { -- sa_family_t sa_family; -+ __kernel_sa_family_t sa_family; - __u32 dev_idx; - __u32 target_idx; - __u32 nfc_protocol; - __u8 dsap; /* Destination SAP, if known */ - __u8 ssap; /* Source SAP to be bound to */ - char service_name[NFC_LLCP_MAX_SERVICE_NAME]; /* Service name URI */; -- size_t service_name_len; -+ __kernel_size_t service_name_len; - }; - - /* NFC socket protocols */ -diff --git a/include/uapi/linux/omap3isp.h b/include/uapi/linux/omap3isp.h -index 87b55755f4ffe..d9db7ad438908 100644 ---- a/include/uapi/linux/omap3isp.h -+++ b/include/uapi/linux/omap3isp.h -@@ -162,6 +162,7 @@ struct omap3isp_h3a_aewb_config { - * struct omap3isp_stat_data - Statistic data sent to or received from user - * @ts: Timestamp of returned framestats. - * @buf: Pointer to pass to user. -+ * @buf_size: Size of buffer. - * @frame_number: Frame number of requested stats. - * @cur_frame: Current frame number being processed. - * @config_counter: Number of the configuration associated with the data. -@@ -176,10 +177,12 @@ struct omap3isp_stat_data { - struct timeval ts; - #endif - void __user *buf; -- __u32 buf_size; -- __u16 frame_number; -- __u16 cur_frame; -- __u16 config_counter; -+ __struct_group(/* no tag */, frame, /* no attrs */, -+ __u32 buf_size; -+ __u16 frame_number; -+ __u16 cur_frame; -+ __u16 config_counter; -+ ); - }; - - #ifdef __KERNEL__ -@@ -189,10 +192,12 @@ struct omap3isp_stat_data_time32 { - __s32 tv_usec; - } ts; - __u32 buf; -- __u32 buf_size; -- __u16 frame_number; -- __u16 cur_frame; -- __u16 config_counter; -+ __struct_group(/* no tag */, frame, /* no attrs */, -+ __u32 buf_size; -+ __u16 frame_number; -+ __u16 cur_frame; -+ __u16 config_counter; -+ ); - }; - #endif - -diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h -index e709ae8235e7f..ff6ccbc6efe96 100644 ---- a/include/uapi/linux/pci_regs.h -+++ b/include/uapi/linux/pci_regs.h -@@ -504,6 +504,12 @@ - #define PCI_EXP_DEVCTL_URRE 0x0008 /* Unsupported Request Reporting En. */ - #define PCI_EXP_DEVCTL_RELAX_EN 0x0010 /* Enable relaxed ordering */ - #define PCI_EXP_DEVCTL_PAYLOAD 0x00e0 /* Max_Payload_Size */ -+#define PCI_EXP_DEVCTL_PAYLOAD_128B 0x0000 /* 128 Bytes */ -+#define PCI_EXP_DEVCTL_PAYLOAD_256B 0x0020 /* 256 Bytes */ -+#define PCI_EXP_DEVCTL_PAYLOAD_512B 0x0040 /* 512 Bytes */ -+#define PCI_EXP_DEVCTL_PAYLOAD_1024B 0x0060 /* 1024 Bytes */ -+#define PCI_EXP_DEVCTL_PAYLOAD_2048B 0x0080 /* 2048 Bytes */ -+#define PCI_EXP_DEVCTL_PAYLOAD_4096B 0x00a0 /* 4096 Bytes */ - #define PCI_EXP_DEVCTL_EXT_TAG 0x0100 /* Extended Tag Field Enable */ - #define PCI_EXP_DEVCTL_PHANTOM 0x0200 /* Phantom Functions Enable */ - #define PCI_EXP_DEVCTL_AUX_PME 0x0400 /* Auxiliary Power PM Enable */ -diff --git a/include/uapi/linux/rfkill.h b/include/uapi/linux/rfkill.h -index 9b77cfc42efa3..db6c8588c1d0c 100644 ---- a/include/uapi/linux/rfkill.h -+++ b/include/uapi/linux/rfkill.h -@@ -159,8 +159,16 @@ struct rfkill_event_ext { - * old behaviour for all userspace, unless it explicitly opts in to the - * rules outlined here by using the new &struct rfkill_event_ext. - * -- * Userspace using &struct rfkill_event_ext must adhere to the following -- * rules -+ * Additionally, some other userspace (bluez, g-s-d) was reading with a -+ * large size but as streaming reads rather than message-based, or with -+ * too strict checks for the returned size. So eventually, we completely -+ * reverted this, and extended messages need to be opted in to by using -+ * an ioctl: -+ * -+ * ioctl(fd, RFKILL_IOCTL_MAX_SIZE, sizeof(struct rfkill_event_ext)); -+ * -+ * Userspace using &struct rfkill_event_ext and the ioctl must adhere to -+ * the following rules: - * - * 1. accept short writes, optionally using them to detect that it's - * running on an older kernel; -@@ -175,6 +183,8 @@ struct rfkill_event_ext { - #define RFKILL_IOC_MAGIC 'R' - #define RFKILL_IOC_NOINPUT 1 - #define RFKILL_IOCTL_NOINPUT _IO(RFKILL_IOC_MAGIC, RFKILL_IOC_NOINPUT) -+#define RFKILL_IOC_MAX_SIZE 2 -+#define RFKILL_IOCTL_MAX_SIZE _IOW(RFKILL_IOC_MAGIC, RFKILL_IOC_MAX_SIZE, __u32) - - /* and that's all userspace gets */ - -diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h -index 9a402fdb60e97..77ee207623a9b 100644 ---- a/include/uapi/linux/rseq.h -+++ b/include/uapi/linux/rseq.h -@@ -105,23 +105,11 @@ struct rseq { - * Read and set by the kernel. Set by user-space with single-copy - * atomicity semantics. This field should only be updated by the - * thread which registered this data structure. Aligned on 64-bit. -+ * -+ * 32-bit architectures should update the low order bits of the -+ * rseq_cs field, leaving the high order bits initialized to 0. - */ -- union { -- __u64 ptr64; --#ifdef __LP64__ -- __u64 ptr; --#else -- struct { --#if (defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN)) || defined(__BIG_ENDIAN) -- __u32 padding; /* Initialized to zero. */ -- __u32 ptr32; --#else /* LITTLE */ -- __u32 ptr32; -- __u32 padding; /* Initialized to zero. */ --#endif /* ENDIAN */ -- } ptr; --#endif -- } rseq_cs; -+ __u64 rseq_cs; - - /* - * Restartable sequences flags field. -diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h -index c4042dcfdc0c3..8885e69178bd7 100644 ---- a/include/uapi/linux/serial_core.h -+++ b/include/uapi/linux/serial_core.h -@@ -68,6 +68,9 @@ - /* NVIDIA Tegra Combined UART */ - #define PORT_TEGRA_TCU 41 - -+/* ASPEED AST2x00 virtual UART */ -+#define PORT_ASPEED_VUART 42 -+ - /* Intel EG20 */ - #define PORT_PCH_8LINE 44 - #define PORT_PCH_2LINE 45 -diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h -index ee8220f8dcf5f..7837ba4fe7289 100644 ---- a/include/uapi/linux/stddef.h -+++ b/include/uapi/linux/stddef.h -@@ -1,6 +1,47 @@ - /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+#ifndef _UAPI_LINUX_STDDEF_H -+#define _UAPI_LINUX_STDDEF_H -+ - #include <linux/compiler_types.h> - - #ifndef __always_inline - #define __always_inline inline - #endif -+ -+/** -+ * __struct_group() - Create a mirrored named and anonyomous struct -+ * -+ * @TAG: The tag name for the named sub-struct (usually empty) -+ * @NAME: The identifier name of the mirrored sub-struct -+ * @ATTRS: Any struct attributes (usually empty) -+ * @MEMBERS: The member declarations for the mirrored structs -+ * -+ * Used to create an anonymous union of two structs with identical layout -+ * and size: one anonymous and one named. The former's members can be used -+ * normally without sub-struct naming, and the latter can be used to -+ * reason about the start, end, and size of the group of struct members. -+ * The named struct can also be explicitly tagged for layer reuse, as well -+ * as both having struct attributes appended. -+ */ -+#define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \ -+ union { \ -+ struct { MEMBERS } ATTRS; \ -+ struct TAG { MEMBERS } ATTRS NAME; \ -+ } -+ -+/** -+ * __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union -+ * -+ * @TYPE: The type of each flexible array element -+ * @NAME: The name of the flexible array member -+ * -+ * In order to have a flexible array member in a union or alone in a -+ * struct, it needs to be wrapped in an anonymous struct with at least 1 -+ * named member, but that member can be empty. -+ */ -+#define __DECLARE_FLEX_ARRAY(TYPE, NAME) \ -+ struct { \ -+ struct { } __empty_ ## NAME; \ -+ TYPE NAME[]; \ -+ } -+#endif -diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h -index 80d76b75bccd9..7aa2eb7662050 100644 ---- a/include/uapi/linux/virtio_ids.h -+++ b/include/uapi/linux/virtio_ids.h -@@ -73,12 +73,12 @@ - * Virtio Transitional IDs - */ - --#define VIRTIO_TRANS_ID_NET 1000 /* transitional virtio net */ --#define VIRTIO_TRANS_ID_BLOCK 1001 /* transitional virtio block */ --#define VIRTIO_TRANS_ID_BALLOON 1002 /* transitional virtio balloon */ --#define VIRTIO_TRANS_ID_CONSOLE 1003 /* transitional virtio console */ --#define VIRTIO_TRANS_ID_SCSI 1004 /* transitional virtio SCSI */ --#define VIRTIO_TRANS_ID_RNG 1005 /* transitional virtio rng */ --#define VIRTIO_TRANS_ID_9P 1009 /* transitional virtio 9p console */ -+#define VIRTIO_TRANS_ID_NET 0x1000 /* transitional virtio net */ -+#define VIRTIO_TRANS_ID_BLOCK 0x1001 /* transitional virtio block */ -+#define VIRTIO_TRANS_ID_BALLOON 0x1002 /* transitional virtio balloon */ -+#define VIRTIO_TRANS_ID_CONSOLE 0x1003 /* transitional virtio console */ -+#define VIRTIO_TRANS_ID_SCSI 0x1004 /* transitional virtio SCSI */ -+#define VIRTIO_TRANS_ID_RNG 0x1005 /* transitional virtio rng */ -+#define VIRTIO_TRANS_ID_9P 0x1009 /* transitional virtio 9p console */ - - #endif /* _LINUX_VIRTIO_IDS_H */ -diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h -index eda0426ec4c2b..65e13a099b1a0 100644 ---- a/include/uapi/linux/xfrm.h -+++ b/include/uapi/linux/xfrm.h -@@ -313,6 +313,7 @@ enum xfrm_attr_type_t { - XFRMA_SET_MARK, /* __u32 */ - XFRMA_SET_MARK_MASK, /* __u32 */ - XFRMA_IF_ID, /* __u32 */ -+ XFRMA_MTIMER_THRESH, /* __u32 in seconds for input SA */ - __XFRMA_MAX - - #define XFRMA_OUTPUT_MARK XFRMA_SET_MARK /* Compatibility */ -@@ -510,6 +511,12 @@ struct xfrm_user_offload { - int ifindex; - __u8 flags; - }; -+/* This flag was exposed without any kernel code that supporting it. -+ * Unfortunately, strongswan has the code that uses sets this flag, -+ * which makes impossible to reuse this bit. -+ * -+ * So leave it here to make sure that it won't be reused by mistake. -+ */ - #define XFRM_OFFLOAD_IPV6 1 - #define XFRM_OFFLOAD_INBOUND 2 - -diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h -index 86be4a92b67bf..a96b7d2770e15 100644 ---- a/include/uapi/rdma/mlx5-abi.h -+++ b/include/uapi/rdma/mlx5-abi.h -@@ -104,6 +104,7 @@ enum mlx5_ib_alloc_ucontext_resp_mask { - MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE = 1UL << 2, - MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_SQD2RTS = 1UL << 3, - MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_REAL_TIME_TS = 1UL << 4, -+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_MKEY_UPDATE_TAG = 1UL << 5, - }; - - enum mlx5_user_cmds_supp_uhw { -diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h -index 5859ca0a1439b..93e40f91bd49a 100644 ---- a/include/uapi/sound/asound.h -+++ b/include/uapi/sound/asound.h -@@ -56,8 +56,10 @@ - * * - ****************************************************************************/ - -+#define AES_IEC958_STATUS_SIZE 24 -+ - struct snd_aes_iec958 { -- unsigned char status[24]; /* AES/IEC958 channel status bits */ -+ unsigned char status[AES_IEC958_STATUS_SIZE]; /* AES/IEC958 channel status bits */ - unsigned char subcode[147]; /* AES/IEC958 subcode bits */ - unsigned char pad; /* nothing */ - unsigned char dig_subframe[4]; /* AES/IEC958 subframe bits */ -diff --git a/include/video/of_display_timing.h b/include/video/of_display_timing.h -index e1126a74882a5..eff166fdd81b9 100644 ---- a/include/video/of_display_timing.h -+++ b/include/video/of_display_timing.h -@@ -8,6 +8,8 @@ - #ifndef __LINUX_OF_DISPLAY_TIMING_H - #define __LINUX_OF_DISPLAY_TIMING_H - -+#include <linux/errno.h> -+ - struct device_node; - struct display_timing; - struct display_timings; -diff --git a/include/xen/events.h b/include/xen/events.h -index c204262d9fc24..344081e71584b 100644 ---- a/include/xen/events.h -+++ b/include/xen/events.h -@@ -17,6 +17,7 @@ struct xenbus_device; - unsigned xen_evtchn_nr_channels(void); - - int bind_evtchn_to_irq(evtchn_port_t evtchn); -+int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn); - int bind_evtchn_to_irqhandler(evtchn_port_t evtchn, - irq_handler_t handler, - unsigned long irqflags, const char *devname, -diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h -index cb854df031ce0..c9fea9389ebec 100644 ---- a/include/xen/grant_table.h -+++ b/include/xen/grant_table.h -@@ -104,17 +104,32 @@ int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly); - * access has been ended, free the given page too. Access will be ended - * immediately iff the grant entry is not in use, otherwise it will happen - * some time later. page may be 0, in which case no freeing will occur. -+ * Note that the granted page might still be accessed (read or write) by the -+ * other side after gnttab_end_foreign_access() returns, so even if page was -+ * specified as 0 it is not allowed to just reuse the page for other -+ * purposes immediately. gnttab_end_foreign_access() will take an additional -+ * reference to the granted page in this case, which is dropped only after -+ * the grant is no longer in use. -+ * This requires that multi page allocations for areas subject to -+ * gnttab_end_foreign_access() are done via alloc_pages_exact() (and freeing -+ * via free_pages_exact()) in order to avoid high order pages. - */ - void gnttab_end_foreign_access(grant_ref_t ref, int readonly, - unsigned long page); - -+/* -+ * End access through the given grant reference, iff the grant entry is -+ * no longer in use. In case of success ending foreign access, the -+ * grant reference is deallocated. -+ * Return 1 if the grant entry was freed, 0 if it is still in use. -+ */ -+int gnttab_try_end_foreign_access(grant_ref_t ref); -+ - int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn); - - unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref); - unsigned long gnttab_end_foreign_transfer(grant_ref_t ref); - --int gnttab_query_foreign_access(grant_ref_t ref); +- * We call schedule in futex_wait_queue_me() when we enqueue and return there +- * via the following-- +- * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue() +- * 2) wakeup on uaddr2 after a requeue +- * 3) signal +- * 4) timeout +- * +- * If 3, cleanup and return -ERESTARTNOINTR. +- * +- * If 2, we may then block on trying to take the rt_mutex and return via: +- * 5) successful lock +- * 6) signal +- * 7) timeout +- * 8) other lock acquisition failure +- * +- * If 6, return -EWOULDBLOCK (restarting the syscall would do the same). +- * +- * If 4 or 7, we cleanup and return with -ETIMEDOUT. +- * +- * Return: +- * - 0 - On success; +- * - <0 - On error +- */ +-static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, +- u32 val, ktime_t *abs_time, u32 bitset, +- u32 __user *uaddr2) +-{ +- struct hrtimer_sleeper timeout, *to; +- struct rt_mutex_waiter rt_waiter; +- struct futex_hash_bucket *hb; +- union futex_key key2 = FUTEX_KEY_INIT; +- struct futex_q q = futex_q_init; +- struct rt_mutex_base *pi_mutex; +- int res, ret; +- +- if (!IS_ENABLED(CONFIG_FUTEX_PI)) +- return -ENOSYS; +- +- if (uaddr == uaddr2) +- return -EINVAL; +- +- if (!bitset) +- return -EINVAL; +- +- to = futex_setup_timer(abs_time, &timeout, flags, +- current->timer_slack_ns); +- +- /* +- * The waiter is allocated on our stack, manipulated by the requeue +- * code while we sleep on uaddr. +- */ +- rt_mutex_init_waiter(&rt_waiter); +- +- ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE); +- if (unlikely(ret != 0)) +- goto out; +- +- q.bitset = bitset; +- q.rt_waiter = &rt_waiter; +- q.requeue_pi_key = &key2; +- +- /* +- * Prepare to wait on uaddr. On success, it holds hb->lock and q +- * is initialized. +- */ +- ret = futex_wait_setup(uaddr, val, flags, &q, &hb); +- if (ret) +- goto out; +- +- /* +- * The check above which compares uaddrs is not sufficient for +- * shared futexes. We need to compare the keys: +- */ +- if (match_futex(&q.key, &key2)) { +- queue_unlock(hb); +- ret = -EINVAL; +- goto out; +- } +- +- /* Queue the futex_q, drop the hb lock, wait for wakeup. */ +- futex_wait_queue_me(hb, &q, to); +- +- switch (futex_requeue_pi_wakeup_sync(&q)) { +- case Q_REQUEUE_PI_IGNORE: +- /* The waiter is still on uaddr1 */ +- spin_lock(&hb->lock); +- ret = handle_early_requeue_pi_wakeup(hb, &q, to); +- spin_unlock(&hb->lock); +- break; +- +- case Q_REQUEUE_PI_LOCKED: +- /* The requeue acquired the lock */ +- if (q.pi_state && (q.pi_state->owner != current)) { +- spin_lock(q.lock_ptr); +- ret = fixup_owner(uaddr2, &q, true); +- /* +- * Drop the reference to the pi state which the +- * requeue_pi() code acquired for us. +- */ +- put_pi_state(q.pi_state); +- spin_unlock(q.lock_ptr); +- /* +- * Adjust the return value. It's either -EFAULT or +- * success (1) but the caller expects 0 for success. +- */ +- ret = ret < 0 ? ret : 0; +- } +- break; +- +- case Q_REQUEUE_PI_DONE: +- /* Requeue completed. Current is 'pi_blocked_on' the rtmutex */ +- pi_mutex = &q.pi_state->pi_mutex; +- ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter); +- +- /* Current is not longer pi_blocked_on */ +- spin_lock(q.lock_ptr); +- if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter)) +- ret = 0; +- +- debug_rt_mutex_free_waiter(&rt_waiter); +- /* +- * Fixup the pi_state owner and possibly acquire the lock if we +- * haven't already. +- */ +- res = fixup_owner(uaddr2, &q, !ret); +- /* +- * If fixup_owner() returned an error, propagate that. If it +- * acquired the lock, clear -ETIMEDOUT or -EINTR. +- */ +- if (res) +- ret = (res < 0) ? res : 0; +- +- unqueue_me_pi(&q); +- spin_unlock(q.lock_ptr); +- +- if (ret == -EINTR) { +- /* +- * We've already been requeued, but cannot restart +- * by calling futex_lock_pi() directly. We could +- * restart this syscall, but it would detect that +- * the user space "val" changed and return +- * -EWOULDBLOCK. Save the overhead of the restart +- * and return -EWOULDBLOCK directly. +- */ +- ret = -EWOULDBLOCK; +- } +- break; +- default: +- BUG(); +- } +- +-out: +- if (to) { +- hrtimer_cancel(&to->timer); +- destroy_hrtimer_on_stack(&to->timer); +- } +- return ret; +-} +- +-/* +- * Support for robust futexes: the kernel cleans up held futexes at +- * thread exit time. +- * +- * Implementation: user-space maintains a per-thread list of locks it +- * is holding. Upon do_exit(), the kernel carefully walks this list, +- * and marks all locks that are owned by this thread with the +- * FUTEX_OWNER_DIED bit, and wakes up a waiter (if any). The list is +- * always manipulated with the lock held, so the list is private and +- * per-thread. Userspace also maintains a per-thread 'list_op_pending' +- * field, to allow the kernel to clean up if the thread dies after +- * acquiring the lock, but just before it could have added itself to +- * the list. There can only be one such pending lock. +- */ +- +-/** +- * sys_set_robust_list() - Set the robust-futex list head of a task +- * @head: pointer to the list-head +- * @len: length of the list-head, as userspace expects +- */ +-SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, +- size_t, len) +-{ +- if (!futex_cmpxchg_enabled) +- return -ENOSYS; +- /* +- * The kernel knows only one size for now: +- */ +- if (unlikely(len != sizeof(*head))) +- return -EINVAL; +- +- current->robust_list = head; - - /* - * operations on reserved batches of grant references - */ -diff --git a/init/Kconfig b/init/Kconfig -index 11f8a845f259d..d19ed66aba3ba 100644 ---- a/init/Kconfig -+++ b/init/Kconfig -@@ -77,6 +77,11 @@ config CC_HAS_ASM_GOTO_OUTPUT - depends on CC_HAS_ASM_GOTO - def_bool $(success,echo 'int foo(int x) { asm goto ("": "=r"(x) ::: bar); return x; bar: return 0; }' | $(CC) -x c - -c -o /dev/null) - -+config CC_HAS_ASM_GOTO_TIED_OUTPUT -+ depends on CC_HAS_ASM_GOTO_OUTPUT -+ # Detect buggy gcc and clang, fixed in gcc-11 clang-14. -+ def_bool $(success,echo 'int foo(int *x) { asm goto (".long (%l[bar]) - .\n": "+m"(*x) ::: bar); return *x; bar: return 0; }' | $CC -x c - -c -o /dev/null) -+ - config TOOLS_SUPPORT_RELR - def_bool $(success,env "CC=$(CC)" "LD=$(LD)" "NM=$(NM)" "OBJCOPY=$(OBJCOPY)" $(srctree)/scripts/tools-support-relr.sh) - -diff --git a/init/main.c b/init/main.c -index 3c4054a955458..649d9e4201a80 100644 ---- a/init/main.c -+++ b/init/main.c -@@ -100,6 +100,8 @@ - #include <linux/kcsan.h> - #include <linux/init_syscalls.h> - #include <linux/stackdepot.h> -+#include <linux/randomize_kstack.h> -+#include <net/net_namespace.h> - - #include <asm/io.h> - #include <asm/bugs.h> -@@ -924,7 +926,9 @@ static void __init print_unknown_bootoptions(void) - for (p = &envp_init[2]; *p; p++) - end += sprintf(end, " %s", *p); - -- pr_notice("Unknown command line parameters:%s\n", unknown_options); -+ /* Start at unknown_options[1] to skip the initial space */ -+ pr_notice("Unknown kernel command line parameters \"%s\", will be passed to user space.\n", -+ &unknown_options[1]); - memblock_free_ptr(unknown_options, len); - } - -@@ -1038,21 +1042,18 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) - softirq_init(); - timekeeping_init(); - kfence_init(); -+ time_init(); - - /* - * For best initial stack canary entropy, prepare it after: - * - setup_arch() for any UEFI RNG entropy and boot cmdline access -- * - timekeeping_init() for ktime entropy used in rand_initialize() -- * - rand_initialize() to get any arch-specific entropy like RDRAND -- * - add_latent_entropy() to get any latent entropy -- * - adding command line entropy -+ * - timekeeping_init() for ktime entropy used in random_init() -+ * - time_init() for making random_get_entropy() work on some platforms -+ * - random_init() to initialize the RNG from from early entropy sources - */ -- rand_initialize(); -- add_latent_entropy(); -- add_device_randomness(command_line, strlen(command_line)); -+ random_init(command_line); - boot_init_stack_canary(); - -- time_init(); - perf_event_init(); - profile_init(); - call_function_init(); -@@ -1120,6 +1121,7 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) - key_init(); - security_init(); - dbg_late_init(); -+ net_ns_init(); - vfs_caches_init(); - pagecache_init(); - signals_init(); -@@ -1194,7 +1196,7 @@ static int __init initcall_blacklist(char *str) - } - } while (str_entry); - - return 0; -+ return 1; - } - - static bool __init_or_module initcall_blacklisted(initcall_t fn) -@@ -1456,7 +1458,9 @@ static noinline void __init kernel_init_freeable(void); - bool rodata_enabled __ro_after_init = true; - static int __init set_debug_rodata(char *str) - { -- return strtobool(str, &rodata_enabled); -+ if (strtobool(str, &rodata_enabled)) -+ pr_warn("Invalid option string for rodata: '%s'\n", str); -+ return 1; - } - __setup("rodata=", set_debug_rodata); - #endif -diff --git a/ipc/mqueue.c b/ipc/mqueue.c -index 5becca9be867c..089c34d0732cf 100644 ---- a/ipc/mqueue.c -+++ b/ipc/mqueue.c -@@ -45,6 +45,7 @@ - - struct mqueue_fs_context { - struct ipc_namespace *ipc_ns; -+ bool newns; /* Set if newly created ipc namespace */ - }; - - #define MQUEUE_MAGIC 0x19800202 -@@ -427,6 +428,14 @@ static int mqueue_get_tree(struct fs_context *fc) - { - struct mqueue_fs_context *ctx = fc->fs_private; - -+ /* -+ * With a newly created ipc namespace, we don't need to do a search -+ * for an ipc namespace match, but we still need to set s_fs_info. -+ */ -+ if (ctx->newns) { -+ fc->s_fs_info = ctx->ipc_ns; -+ return get_tree_nodev(fc, mqueue_fill_super); -+ } - return get_tree_keyed(fc, mqueue_fill_super, ctx->ipc_ns); - } - -@@ -454,6 +463,10 @@ static int mqueue_init_fs_context(struct fs_context *fc) - return 0; - } - -+/* -+ * mq_init_ns() is currently the only caller of mq_create_mount(). -+ * So the ns parameter is always a newly created ipc namespace. -+ */ - static struct vfsmount *mq_create_mount(struct ipc_namespace *ns) - { - struct mqueue_fs_context *ctx; -@@ -465,6 +478,7 @@ static struct vfsmount *mq_create_mount(struct ipc_namespace *ns) - return ERR_CAST(fc); - - ctx = fc->fs_private; -+ ctx->newns = true; - put_ipc_ns(ctx->ipc_ns); - ctx->ipc_ns = get_ipc_ns(ns); - put_user_ns(fc->user_ns); -diff --git a/ipc/sem.c b/ipc/sem.c -index 6693daf4fe112..0dbdb98fdf2d9 100644 ---- a/ipc/sem.c -+++ b/ipc/sem.c -@@ -1964,6 +1964,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) - */ - un = lookup_undo(ulp, semid); - if (un) { -+ spin_unlock(&ulp->lock); - kvfree(new); - goto success; - } -@@ -1976,9 +1977,8 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) - ipc_assert_locked_object(&sma->sem_perm); - list_add(&new->list_id, &sma->list_id); - un = new; +-} +- +-/** +- * sys_get_robust_list() - Get the robust-futex list head of a task +- * @pid: pid of the process [zero for current task] +- * @head_ptr: pointer to a list-head pointer, the kernel fills it in +- * @len_ptr: pointer to a length field, the kernel fills in the header size +- */ +-SYSCALL_DEFINE3(get_robust_list, int, pid, +- struct robust_list_head __user * __user *, head_ptr, +- size_t __user *, len_ptr) +-{ +- struct robust_list_head __user *head; +- unsigned long ret; +- struct task_struct *p; +- +- if (!futex_cmpxchg_enabled) +- return -ENOSYS; +- +- rcu_read_lock(); +- +- ret = -ESRCH; +- if (!pid) +- p = current; +- else { +- p = find_task_by_vpid(pid); +- if (!p) +- goto err_unlock; +- } +- +- ret = -EPERM; +- if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) +- goto err_unlock; +- +- head = p->robust_list; +- rcu_read_unlock(); +- +- if (put_user(sizeof(*head), len_ptr)) +- return -EFAULT; +- return put_user(head, head_ptr); +- +-err_unlock: +- rcu_read_unlock(); +- +- return ret; +-} +- +-/* Constants for the pending_op argument of handle_futex_death */ +-#define HANDLE_DEATH_PENDING true +-#define HANDLE_DEATH_LIST false +- +-/* +- * Process a futex-list entry, check whether it's owned by the +- * dying task, and do notification if so: +- */ +-static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, +- bool pi, bool pending_op) +-{ +- u32 uval, nval, mval; +- int err; +- +- /* Futex address must be 32bit aligned */ +- if ((((unsigned long)uaddr) % sizeof(*uaddr)) != 0) +- return -1; +- +-retry: +- if (get_user(uval, uaddr)) +- return -1; +- +- /* +- * Special case for regular (non PI) futexes. The unlock path in +- * user space has two race scenarios: +- * +- * 1. The unlock path releases the user space futex value and +- * before it can execute the futex() syscall to wake up +- * waiters it is killed. +- * +- * 2. A woken up waiter is killed before it can acquire the +- * futex in user space. +- * +- * In both cases the TID validation below prevents a wakeup of +- * potential waiters which can cause these waiters to block +- * forever. +- * +- * In both cases the following conditions are met: +- * +- * 1) task->robust_list->list_op_pending != NULL +- * @pending_op == true +- * 2) User space futex value == 0 +- * 3) Regular futex: @pi == false +- * +- * If these conditions are met, it is safe to attempt waking up a +- * potential waiter without touching the user space futex value and +- * trying to set the OWNER_DIED bit. The user space futex value is +- * uncontended and the rest of the user space mutex state is +- * consistent, so a woken waiter will just take over the +- * uncontended futex. Setting the OWNER_DIED bit would create +- * inconsistent state and malfunction of the user space owner died +- * handling. +- */ +- if (pending_op && !pi && !uval) { +- futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY); +- return 0; +- } +- +- if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr)) +- return 0; +- +- /* +- * Ok, this dying thread is truly holding a futex +- * of interest. Set the OWNER_DIED bit atomically +- * via cmpxchg, and if the value had FUTEX_WAITERS +- * set, wake up a waiter (if any). (We have to do a +- * futex_wake() even if OWNER_DIED is already set - +- * to handle the rare but possible case of recursive +- * thread-death.) The rest of the cleanup is done in +- * userspace. +- */ +- mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; +- +- /* +- * We are not holding a lock here, but we want to have +- * the pagefault_disable/enable() protection because +- * we want to handle the fault gracefully. If the +- * access fails we try to fault in the futex with R/W +- * verification via get_user_pages. get_user() above +- * does not guarantee R/W access. If that fails we +- * give up and leave the futex locked. +- */ +- if ((err = cmpxchg_futex_value_locked(&nval, uaddr, uval, mval))) { +- switch (err) { +- case -EFAULT: +- if (fault_in_user_writeable(uaddr)) +- return -1; +- goto retry; +- +- case -EAGAIN: +- cond_resched(); +- goto retry; +- +- default: +- WARN_ON_ONCE(1); +- return err; +- } +- } +- +- if (nval != uval) +- goto retry; +- +- /* +- * Wake robust non-PI futexes here. The wakeup of +- * PI futexes happens in exit_pi_state(): +- */ +- if (!pi && (uval & FUTEX_WAITERS)) +- futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY); +- +- return 0; +-} - --success: - spin_unlock(&ulp->lock); -+success: - sem_unlock(sma, -1); - out: - return un; -diff --git a/ipc/shm.c b/ipc/shm.c -index ab749be6d8b71..048eb183b24b9 100644 ---- a/ipc/shm.c -+++ b/ipc/shm.c -@@ -62,9 +62,18 @@ struct shmid_kernel /* private to the kernel */ - struct pid *shm_lprid; - struct ucounts *mlock_ucounts; - -- /* The task created the shm object. NULL if the task is dead. */ -+ /* -+ * The task created the shm object, for -+ * task_lock(shp->shm_creator) -+ */ - struct task_struct *shm_creator; -- struct list_head shm_clist; /* list by creator */ -+ -+ /* -+ * List by creator. task_lock(->shm_creator) required for read/write. -+ * If list_empty(), then the creator is dead already. -+ */ -+ struct list_head shm_clist; -+ struct ipc_namespace *ns; - } __randomize_layout; - - /* shm_mode upper byte flags */ -@@ -115,6 +124,7 @@ static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) - struct shmid_kernel *shp; - - shp = container_of(ipcp, struct shmid_kernel, shm_perm); -+ WARN_ON(ns != shp->ns); - - if (shp->shm_nattch) { - shp->shm_perm.mode |= SHM_DEST; -@@ -225,10 +235,43 @@ static void shm_rcu_free(struct rcu_head *head) - kfree(shp); - } - --static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) -+/* -+ * It has to be called with shp locked. -+ * It must be called before ipc_rmid() -+ */ -+static inline void shm_clist_rm(struct shmid_kernel *shp) - { -- list_del(&s->shm_clist); -- ipc_rmid(&shm_ids(ns), &s->shm_perm); -+ struct task_struct *creator; -+ -+ /* ensure that shm_creator does not disappear */ -+ rcu_read_lock(); -+ -+ /* -+ * A concurrent exit_shm may do a list_del_init() as well. -+ * Just do nothing if exit_shm already did the work -+ */ -+ if (!list_empty(&shp->shm_clist)) { -+ /* -+ * shp->shm_creator is guaranteed to be valid *only* -+ * if shp->shm_clist is not empty. -+ */ -+ creator = shp->shm_creator; -+ -+ task_lock(creator); -+ /* -+ * list_del_init() is a nop if the entry was already removed -+ * from the list. -+ */ -+ list_del_init(&shp->shm_clist); -+ task_unlock(creator); -+ } -+ rcu_read_unlock(); -+} -+ -+static inline void shm_rmid(struct shmid_kernel *s) -+{ -+ shm_clist_rm(s); -+ ipc_rmid(&shm_ids(s->ns), &s->shm_perm); - } - - -@@ -283,7 +326,7 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) - shm_file = shp->shm_file; - shp->shm_file = NULL; - ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; -- shm_rmid(ns, shp); -+ shm_rmid(shp); - shm_unlock(shp); - if (!is_file_hugepages(shm_file)) - shmem_lock(shm_file, 0, shp->mlock_ucounts); -@@ -306,10 +349,10 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) - * - * 2) sysctl kernel.shm_rmid_forced is set to 1. - */ --static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) -+static bool shm_may_destroy(struct shmid_kernel *shp) - { - return (shp->shm_nattch == 0) && -- (ns->shm_rmid_forced || -+ (shp->ns->shm_rmid_forced || - (shp->shm_perm.mode & SHM_DEST)); - } - -@@ -340,7 +383,7 @@ static void shm_close(struct vm_area_struct *vma) - ipc_update_pid(&shp->shm_lprid, task_tgid(current)); - shp->shm_dtim = ktime_get_real_seconds(); - shp->shm_nattch--; -- if (shm_may_destroy(ns, shp)) -+ if (shm_may_destroy(shp)) - shm_destroy(ns, shp); - else - shm_unlock(shp); -@@ -361,10 +404,10 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data) - * - * As shp->* are changed under rwsem, it's safe to skip shp locking. - */ -- if (shp->shm_creator != NULL) -+ if (!list_empty(&shp->shm_clist)) - return 0; - -- if (shm_may_destroy(ns, shp)) { -+ if (shm_may_destroy(shp)) { - shm_lock_by_ptr(shp); - shm_destroy(ns, shp); - } -@@ -382,48 +425,97 @@ void shm_destroy_orphaned(struct ipc_namespace *ns) - /* Locking assumes this will only be called with task == current */ - void exit_shm(struct task_struct *task) - { -- struct ipc_namespace *ns = task->nsproxy->ipc_ns; -- struct shmid_kernel *shp, *n; -+ for (;;) { -+ struct shmid_kernel *shp; -+ struct ipc_namespace *ns; - -- if (list_empty(&task->sysvshm.shm_clist)) +-/* +- * Fetch a robust-list pointer. Bit 0 signals PI futexes: +- */ +-static inline int fetch_robust_entry(struct robust_list __user **entry, +- struct robust_list __user * __user *head, +- unsigned int *pi) +-{ +- unsigned long uentry; +- +- if (get_user(uentry, (unsigned long __user *)head)) +- return -EFAULT; +- +- *entry = (void __user *)(uentry & ~1UL); +- *pi = uentry & 1; +- +- return 0; +-} +- +-/* +- * Walk curr->robust_list (very carefully, it's a userspace list!) +- * and mark any locks found there dead, and notify any waiters. +- * +- * We silently return on any sign of list-walking problem. +- */ +-static void exit_robust_list(struct task_struct *curr) +-{ +- struct robust_list_head __user *head = curr->robust_list; +- struct robust_list __user *entry, *next_entry, *pending; +- unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; +- unsigned int next_pi; +- unsigned long futex_offset; +- int rc; +- +- if (!futex_cmpxchg_enabled) - return; -+ task_lock(task); -+ -+ if (list_empty(&task->sysvshm.shm_clist)) { -+ task_unlock(task); -+ break; -+ } -+ -+ shp = list_first_entry(&task->sysvshm.shm_clist, struct shmid_kernel, -+ shm_clist); - +- - /* -- * If kernel.shm_rmid_forced is not set then only keep track of -- * which shmids are orphaned, so that a later set of the sysctl -- * can clean them up. +- * Fetch the list head (which was registered earlier, via +- * sys_set_robust_list()): - */ -- if (!ns->shm_rmid_forced) { -- down_read(&shm_ids(ns).rwsem); -- list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist) -- shp->shm_creator = NULL; - /* -- * Only under read lock but we are only called on current -- * so no entry on the list will be shared. -+ * 1) Get pointer to the ipc namespace. It is worth to say -+ * that this pointer is guaranteed to be valid because -+ * shp lifetime is always shorter than namespace lifetime -+ * in which shp lives. -+ * We taken task_lock it means that shp won't be freed. - */ -- list_del(&task->sysvshm.shm_clist); -- up_read(&shm_ids(ns).rwsem); +- if (fetch_robust_entry(&entry, &head->list.next, &pi)) +- return; +- /* +- * Fetch the relative futex offset: +- */ +- if (get_user(futex_offset, &head->futex_offset)) +- return; +- /* +- * Fetch any possibly pending lock-add first, and handle it +- * if it exists: +- */ +- if (fetch_robust_entry(&pending, &head->list_op_pending, &pip)) - return; +- +- next_entry = NULL; /* avoid warning with gcc */ +- while (entry != &head->list) { +- /* +- * Fetch the next entry in the list before calling +- * handle_futex_death: +- */ +- rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi); +- /* +- * A pending lock might already be on the list, so +- * don't process it twice: +- */ +- if (entry != pending) { +- if (handle_futex_death((void __user *)entry + futex_offset, +- curr, pi, HANDLE_DEATH_LIST)) +- return; +- } +- if (rc) +- return; +- entry = next_entry; +- pi = next_pi; +- /* +- * Avoid excessively long or circular lists: +- */ +- if (!--limit) +- break; +- +- cond_resched(); - } -+ ns = shp->ns; - +- +- if (pending) { +- handle_futex_death((void __user *)pending + futex_offset, +- curr, pip, HANDLE_DEATH_PENDING); +- } +-} +- +-static void futex_cleanup(struct task_struct *tsk) +-{ +- if (unlikely(tsk->robust_list)) { +- exit_robust_list(tsk); +- tsk->robust_list = NULL; +- } +- +-#ifdef CONFIG_COMPAT +- if (unlikely(tsk->compat_robust_list)) { +- compat_exit_robust_list(tsk); +- tsk->compat_robust_list = NULL; +- } +-#endif +- +- if (unlikely(!list_empty(&tsk->pi_state_list))) +- exit_pi_state_list(tsk); +-} +- +-/** +- * futex_exit_recursive - Set the tasks futex state to FUTEX_STATE_DEAD +- * @tsk: task to set the state on +- * +- * Set the futex exit state of the task lockless. The futex waiter code +- * observes that state when a task is exiting and loops until the task has +- * actually finished the futex cleanup. The worst case for this is that the +- * waiter runs through the wait loop until the state becomes visible. +- * +- * This is called from the recursive fault handling path in do_exit(). +- * +- * This is best effort. Either the futex exit code has run already or +- * not. If the OWNER_DIED bit has been set on the futex then the waiter can +- * take it over. If not, the problem is pushed back to user space. If the +- * futex exit code did not run yet, then an already queued waiter might +- * block forever, but there is nothing which can be done about that. +- */ +-void futex_exit_recursive(struct task_struct *tsk) +-{ +- /* If the state is FUTEX_STATE_EXITING then futex_exit_mutex is held */ +- if (tsk->futex_state == FUTEX_STATE_EXITING) +- mutex_unlock(&tsk->futex_exit_mutex); +- tsk->futex_state = FUTEX_STATE_DEAD; +-} +- +-static void futex_cleanup_begin(struct task_struct *tsk) +-{ - /* -- * Destroy all already created segments, that were not yet mapped, -- * and mark any mapped as orphan to cover the sysctl toggling. -- * Destroy is skipped if shm_may_destroy() returns false. +- * Prevent various race issues against a concurrent incoming waiter +- * including live locks by forcing the waiter to block on +- * tsk->futex_exit_mutex when it observes FUTEX_STATE_EXITING in +- * attach_to_pi_owner(). - */ -- down_write(&shm_ids(ns).rwsem); -- list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) { -- shp->shm_creator = NULL; -+ /* -+ * 2) If kernel.shm_rmid_forced is not set then only keep track of -+ * which shmids are orphaned, so that a later set of the sysctl -+ * can clean them up. -+ */ -+ if (!ns->shm_rmid_forced) -+ goto unlink_continue; - -- if (shm_may_destroy(ns, shp)) { -- shm_lock_by_ptr(shp); -- shm_destroy(ns, shp); -+ /* -+ * 3) get a reference to the namespace. -+ * The refcount could be already 0. If it is 0, then -+ * the shm objects will be free by free_ipc_work(). -+ */ -+ ns = get_ipc_ns_not_zero(ns); -+ if (!ns) { -+unlink_continue: -+ list_del_init(&shp->shm_clist); -+ task_unlock(task); -+ continue; - } +- mutex_lock(&tsk->futex_exit_mutex); +- +- /* +- * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock. +- * +- * This ensures that all subsequent checks of tsk->futex_state in +- * attach_to_pi_owner() must observe FUTEX_STATE_EXITING with +- * tsk->pi_lock held. +- * +- * It guarantees also that a pi_state which was queued right before +- * the state change under tsk->pi_lock by a concurrent waiter must +- * be observed in exit_pi_state_list(). +- */ +- raw_spin_lock_irq(&tsk->pi_lock); +- tsk->futex_state = FUTEX_STATE_EXITING; +- raw_spin_unlock_irq(&tsk->pi_lock); +-} +- +-static void futex_cleanup_end(struct task_struct *tsk, int state) +-{ +- /* +- * Lockless store. The only side effect is that an observer might +- * take another loop until it becomes visible. +- */ +- tsk->futex_state = state; +- /* +- * Drop the exit protection. This unblocks waiters which observed +- * FUTEX_STATE_EXITING to reevaluate the state. +- */ +- mutex_unlock(&tsk->futex_exit_mutex); +-} +- +-void futex_exec_release(struct task_struct *tsk) +-{ +- /* +- * The state handling is done for consistency, but in the case of +- * exec() there is no way to prevent further damage as the PID stays +- * the same. But for the unlikely and arguably buggy case that a +- * futex is held on exec(), this provides at least as much state +- * consistency protection which is possible. +- */ +- futex_cleanup_begin(tsk); +- futex_cleanup(tsk); +- /* +- * Reset the state to FUTEX_STATE_OK. The task is alive and about +- * exec a new binary. +- */ +- futex_cleanup_end(tsk, FUTEX_STATE_OK); +-} +- +-void futex_exit_release(struct task_struct *tsk) +-{ +- futex_cleanup_begin(tsk); +- futex_cleanup(tsk); +- futex_cleanup_end(tsk, FUTEX_STATE_DEAD); +-} +- +-long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, +- u32 __user *uaddr2, u32 val2, u32 val3) +-{ +- int cmd = op & FUTEX_CMD_MASK; +- unsigned int flags = 0; +- +- if (!(op & FUTEX_PRIVATE_FLAG)) +- flags |= FLAGS_SHARED; +- +- if (op & FUTEX_CLOCK_REALTIME) { +- flags |= FLAGS_CLOCKRT; +- if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI && +- cmd != FUTEX_LOCK_PI2) +- return -ENOSYS; - } - -- /* Remove the list head from any segments still attached. */ -- list_del(&task->sysvshm.shm_clist); -- up_write(&shm_ids(ns).rwsem); -+ /* -+ * 4) get a reference to shp. -+ * This cannot fail: shm_clist_rm() is called before -+ * ipc_rmid(), thus the refcount cannot be 0. -+ */ -+ WARN_ON(!ipc_rcu_getref(&shp->shm_perm)); -+ -+ /* -+ * 5) unlink the shm segment from the list of segments -+ * created by current. -+ * This must be done last. After unlinking, -+ * only the refcounts obtained above prevent IPC_RMID -+ * from destroying the segment or the namespace. -+ */ -+ list_del_init(&shp->shm_clist); -+ -+ task_unlock(task); -+ -+ /* -+ * 6) we have all references -+ * Thus lock & if needed destroy shp. -+ */ -+ down_write(&shm_ids(ns).rwsem); -+ shm_lock_by_ptr(shp); -+ /* -+ * rcu_read_lock was implicitly taken in shm_lock_by_ptr, it's -+ * safe to call ipc_rcu_putref here -+ */ -+ ipc_rcu_putref(&shp->shm_perm, shm_rcu_free); -+ -+ if (ipc_valid_object(&shp->shm_perm)) { -+ if (shm_may_destroy(shp)) -+ shm_destroy(ns, shp); -+ else -+ shm_unlock(shp); -+ } else { -+ /* -+ * Someone else deleted the shp from namespace -+ * idr/kht while we have waited. -+ * Just unlock and continue. -+ */ -+ shm_unlock(shp); -+ } -+ -+ up_write(&shm_ids(ns).rwsem); -+ put_ipc_ns(ns); /* paired with get_ipc_ns_not_zero */ -+ } - } - - static vm_fault_t shm_fault(struct vm_fault *vmf) -@@ -680,7 +772,11 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) - if (error < 0) - goto no_id; - -+ shp->ns = ns; -+ -+ task_lock(current); - list_add(&shp->shm_clist, ¤t->sysvshm.shm_clist); -+ task_unlock(current); - - /* - * shmid gets reported as "inode#" in /proc/pid/maps. -@@ -1573,7 +1669,8 @@ out_nattch: - down_write(&shm_ids(ns).rwsem); - shp = shm_lock(ns, shmid); - shp->shm_nattch--; -- if (shm_may_destroy(ns, shp)) -+ -+ if (shm_may_destroy(shp)) - shm_destroy(ns, shp); - else - shm_unlock(shp); -diff --git a/ipc/util.c b/ipc/util.c -index d48d8cfa1f3fa..fa2d86ef3fb80 100644 ---- a/ipc/util.c -+++ b/ipc/util.c -@@ -447,8 +447,8 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids, - static void ipc_kht_remove(struct ipc_ids *ids, struct kern_ipc_perm *ipcp) - { - if (ipcp->key != IPC_PRIVATE) -- rhashtable_remove_fast(&ids->key_ht, &ipcp->khtnode, -- ipc_kht_params); -+ WARN_ON_ONCE(rhashtable_remove_fast(&ids->key_ht, &ipcp->khtnode, -+ ipc_kht_params)); - } - - /** -@@ -498,7 +498,7 @@ void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp) - { - int idx = ipcid_to_idx(ipcp->id); - -- idr_remove(&ids->ipcs_idr, idx); -+ WARN_ON_ONCE(idr_remove(&ids->ipcs_idr, idx) != ipcp); - ipc_kht_remove(ids, ipcp); - ids->in_use--; - ipcp->deleted = true; -diff --git a/kernel/Makefile b/kernel/Makefile -index 4df609be42d07..0e119c52a2cd6 100644 ---- a/kernel/Makefile -+++ b/kernel/Makefile -@@ -113,7 +113,8 @@ obj-$(CONFIG_CPU_PM) += cpu_pm.o - obj-$(CONFIG_BPF) += bpf/ - obj-$(CONFIG_KCSAN) += kcsan/ - obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o --obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call.o -+obj-$(CONFIG_HAVE_STATIC_CALL) += static_call.o -+obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call_inline.o - obj-$(CONFIG_CFI_CLANG) += cfi.o - - obj-$(CONFIG_PERF_EVENTS) += events/ -diff --git a/kernel/async.c b/kernel/async.c -index b8d7a663497f9..b2c4ba5686ee4 100644 ---- a/kernel/async.c -+++ b/kernel/async.c -@@ -205,9 +205,6 @@ async_cookie_t async_schedule_node_domain(async_func_t func, void *data, - atomic_inc(&entry_count); - spin_unlock_irqrestore(&async_lock, flags); - -- /* mark that this task has queued an async job, used by module init */ -- current->flags |= PF_USED_ASYNC; - - /* schedule for execution */ - queue_work_node(node, system_unbound_wq, &entry->work); - -diff --git a/kernel/audit.c b/kernel/audit.c -index 121d37e700a62..94ded5de91317 100644 ---- a/kernel/audit.c -+++ b/kernel/audit.c -@@ -541,20 +541,22 @@ static void kauditd_printk_skb(struct sk_buff *skb) - /** - * kauditd_rehold_skb - Handle a audit record send failure in the hold queue - * @skb: audit record -+ * @error: error code (unused) - * - * Description: - * This should only be used by the kauditd_thread when it fails to flush the - * hold queue. - */ --static void kauditd_rehold_skb(struct sk_buff *skb) -+static void kauditd_rehold_skb(struct sk_buff *skb, __always_unused int error) - { -- /* put the record back in the queue at the same place */ -- skb_queue_head(&audit_hold_queue, skb); -+ /* put the record back in the queue */ -+ skb_queue_tail(&audit_hold_queue, skb); - } - - /** - * kauditd_hold_skb - Queue an audit record, waiting for auditd - * @skb: audit record -+ * @error: error code - * - * Description: - * Queue the audit record, waiting for an instance of auditd. When this -@@ -564,19 +566,31 @@ static void kauditd_rehold_skb(struct sk_buff *skb) - * and queue it, if we have room. If we want to hold on to the record, but we - * don't have room, record a record lost message. - */ --static void kauditd_hold_skb(struct sk_buff *skb) -+static void kauditd_hold_skb(struct sk_buff *skb, int error) - { - /* at this point it is uncertain if we will ever send this to auditd so - * try to send the message via printk before we go any further */ - kauditd_printk_skb(skb); - - /* can we just silently drop the message? */ -- if (!audit_default) { -- kfree_skb(skb); -- return; -+ if (!audit_default) -+ goto drop; -+ -+ /* the hold queue is only for when the daemon goes away completely, -+ * not -EAGAIN failures; if we are in a -EAGAIN state requeue the -+ * record on the retry queue unless it's full, in which case drop it -+ */ -+ if (error == -EAGAIN) { -+ if (!audit_backlog_limit || -+ skb_queue_len(&audit_retry_queue) < audit_backlog_limit) { -+ skb_queue_tail(&audit_retry_queue, skb); -+ return; -+ } -+ audit_log_lost("kauditd retry queue overflow"); -+ goto drop; - } - -- /* if we have room, queue the message */ -+ /* if we have room in the hold queue, queue the message */ - if (!audit_backlog_limit || - skb_queue_len(&audit_hold_queue) < audit_backlog_limit) { - skb_queue_tail(&audit_hold_queue, skb); -@@ -585,24 +599,32 @@ static void kauditd_hold_skb(struct sk_buff *skb) - - /* we have no other options - drop the message */ - audit_log_lost("kauditd hold queue overflow"); -+drop: - kfree_skb(skb); - } - - /** - * kauditd_retry_skb - Queue an audit record, attempt to send again to auditd - * @skb: audit record -+ * @error: error code (unused) - * - * Description: - * Not as serious as kauditd_hold_skb() as we still have a connected auditd, - * but for some reason we are having problems sending it audit records so - * queue the given record and attempt to resend. - */ --static void kauditd_retry_skb(struct sk_buff *skb) -+static void kauditd_retry_skb(struct sk_buff *skb, __always_unused int error) - { -- /* NOTE: because records should only live in the retry queue for a -- * short period of time, before either being sent or moved to the hold -- * queue, we don't currently enforce a limit on this queue */ -- skb_queue_tail(&audit_retry_queue, skb); -+ if (!audit_backlog_limit || -+ skb_queue_len(&audit_retry_queue) < audit_backlog_limit) { -+ skb_queue_tail(&audit_retry_queue, skb); -+ return; -+ } -+ -+ /* we have to drop the record, send it via printk as a last effort */ -+ kauditd_printk_skb(skb); -+ audit_log_lost("kauditd retry queue overflow"); -+ kfree_skb(skb); - } - - /** -@@ -640,7 +662,7 @@ static void auditd_reset(const struct auditd_connection *ac) - /* flush the retry queue to the hold queue, but don't touch the main - * queue since we need to process that normally for multicast */ - while ((skb = skb_dequeue(&audit_retry_queue))) -- kauditd_hold_skb(skb); -+ kauditd_hold_skb(skb, -ECONNREFUSED); - } - - /** -@@ -714,16 +736,18 @@ static int kauditd_send_queue(struct sock *sk, u32 portid, - struct sk_buff_head *queue, - unsigned int retry_limit, - void (*skb_hook)(struct sk_buff *skb), -- void (*err_hook)(struct sk_buff *skb)) -+ void (*err_hook)(struct sk_buff *skb, int error)) - { - int rc = 0; -- struct sk_buff *skb; -- static unsigned int failed = 0; -+ struct sk_buff *skb = NULL; -+ struct sk_buff *skb_tail; -+ unsigned int failed = 0; - - /* NOTE: kauditd_thread takes care of all our locking, we just use - * the netlink info passed to us (e.g. sk and portid) */ - -- while ((skb = skb_dequeue(queue))) { -+ skb_tail = skb_peek_tail(queue); -+ while ((skb != skb_tail) && (skb = skb_dequeue(queue))) { - /* call the skb_hook for each skb we touch */ - if (skb_hook) - (*skb_hook)(skb); -@@ -731,36 +755,34 @@ static int kauditd_send_queue(struct sock *sk, u32 portid, - /* can we send to anyone via unicast? */ - if (!sk) { - if (err_hook) -- (*err_hook)(skb); -+ (*err_hook)(skb, -ECONNREFUSED); - continue; - } - -+retry: - /* grab an extra skb reference in case of error */ - skb_get(skb); - rc = netlink_unicast(sk, skb, portid, 0); - if (rc < 0) { -- /* fatal failure for our queue flush attempt? */ -+ /* send failed - try a few times unless fatal error */ - if (++failed >= retry_limit || - rc == -ECONNREFUSED || rc == -EPERM) { -- /* yes - error processing for the queue */ - sk = NULL; - if (err_hook) -- (*err_hook)(skb); -- if (!skb_hook) -- goto out; -- /* keep processing with the skb_hook */ -+ (*err_hook)(skb, rc); -+ if (rc == -EAGAIN) -+ rc = 0; -+ /* continue to drain the queue */ - continue; - } else -- /* no - requeue to preserve ordering */ -- skb_queue_head(queue, skb); -+ goto retry; - } else { -- /* it worked - drop the extra reference and continue */ -+ /* skb sent - drop the extra reference and continue */ - consume_skb(skb); - failed = 0; - } - } - --out: - return (rc >= 0 ? 0 : rc); - } - -@@ -1542,6 +1564,20 @@ static void audit_receive(struct sk_buff *skb) - nlh = nlmsg_next(nlh, &len); - } - audit_ctl_unlock(); -+ -+ /* can't block with the ctrl lock, so penalize the sender now */ -+ if (audit_backlog_limit && -+ (skb_queue_len(&audit_queue) > audit_backlog_limit)) { -+ DECLARE_WAITQUEUE(wait, current); -+ -+ /* wake kauditd to try and flush the queue */ -+ wake_up_interruptible(&kauditd_wait); -+ -+ add_wait_queue_exclusive(&audit_backlog_wait, &wait); -+ set_current_state(TASK_UNINTERRUPTIBLE); -+ schedule_timeout(audit_backlog_wait_time); -+ remove_wait_queue(&audit_backlog_wait, &wait); -+ } - } - - /* Log information about who is connecting to the audit multicast socket */ -@@ -1609,7 +1645,8 @@ static int __net_init audit_net_init(struct net *net) - audit_panic("cannot initialize netlink socket in namespace"); - return -ENOMEM; - } -- aunet->sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; -+ /* limit the timeout in case auditd is blocked/stopped */ -+ aunet->sk->sk_sndtimeo = HZ / 10; - - return 0; - } -@@ -1825,7 +1862,9 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, - * task_tgid_vnr() since auditd_pid is set in audit_receive_msg() - * using a PID anchored in the caller's namespace - * 2. generator holding the audit_cmd_mutex - we don't want to block -- * while holding the mutex */ -+ * while holding the mutex, although we do penalize the sender -+ * later in audit_receive() when it is safe to block -+ */ - if (!(auditd_test_task(current) || audit_ctl_owner_current())) { - long stime = audit_backlog_wait_time; - -diff --git a/kernel/audit.h b/kernel/audit.h -index d6a2c899a8dbf..b2ef4c0d3ec03 100644 ---- a/kernel/audit.h -+++ b/kernel/audit.h -@@ -194,6 +194,10 @@ struct audit_context { - struct { - char *name; - } module; -+ struct { -+ struct audit_ntp_data ntp_data; -+ struct timespec64 tk_injoffset; -+ } time; - }; - int fds[2]; - struct audit_proctitle proctitle; -diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c -index 60739d5e3373f..c428312938e95 100644 ---- a/kernel/audit_fsnotify.c -+++ b/kernel/audit_fsnotify.c -@@ -102,6 +102,7 @@ struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pa - - ret = fsnotify_add_inode_mark(&audit_mark->mark, inode, true); - if (ret < 0) { -+ audit_mark->path = NULL; - fsnotify_put_mark(&audit_mark->mark); - audit_mark = ERR_PTR(ret); - } -diff --git a/kernel/auditsc.c b/kernel/auditsc.c -index b1cb1dbf7417f..2f036bab3c28f 100644 ---- a/kernel/auditsc.c -+++ b/kernel/auditsc.c -@@ -1219,6 +1219,53 @@ static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name) - from_kuid(&init_user_ns, name->fcap.rootid)); - } - -+static void audit_log_time(struct audit_context *context, struct audit_buffer **ab) -+{ -+ const struct audit_ntp_data *ntp = &context->time.ntp_data; -+ const struct timespec64 *tk = &context->time.tk_injoffset; -+ static const char * const ntp_name[] = { -+ "offset", -+ "freq", -+ "status", -+ "tai", -+ "tick", -+ "adjust", -+ }; -+ int type; -+ -+ if (context->type == AUDIT_TIME_ADJNTPVAL) { -+ for (type = 0; type < AUDIT_NTP_NVALS; type++) { -+ if (ntp->vals[type].newval != ntp->vals[type].oldval) { -+ if (!*ab) { -+ *ab = audit_log_start(context, -+ GFP_KERNEL, -+ AUDIT_TIME_ADJNTPVAL); -+ if (!*ab) -+ return; -+ } -+ audit_log_format(*ab, "op=%s old=%lli new=%lli", -+ ntp_name[type], -+ ntp->vals[type].oldval, -+ ntp->vals[type].newval); -+ audit_log_end(*ab); -+ *ab = NULL; -+ } -+ } -+ } -+ if (tk->tv_sec != 0 || tk->tv_nsec != 0) { -+ if (!*ab) { -+ *ab = audit_log_start(context, GFP_KERNEL, -+ AUDIT_TIME_INJOFFSET); -+ if (!*ab) -+ return; -+ } -+ audit_log_format(*ab, "sec=%lli nsec=%li", -+ (long long)tk->tv_sec, tk->tv_nsec); -+ audit_log_end(*ab); -+ *ab = NULL; -+ } -+} -+ - static void show_special(struct audit_context *context, int *call_panic) - { - struct audit_buffer *ab; -@@ -1327,6 +1374,11 @@ static void show_special(struct audit_context *context, int *call_panic) - audit_log_format(ab, "(null)"); - - break; -+ case AUDIT_TIME_ADJNTPVAL: -+ case AUDIT_TIME_INJOFFSET: -+ /* this call deviates from the rest, eating the buffer */ -+ audit_log_time(context, &ab); -+ break; - } - audit_log_end(ab); - } -@@ -2564,31 +2616,26 @@ void __audit_fanotify(unsigned int response) - - void __audit_tk_injoffset(struct timespec64 offset) - { -- audit_log(audit_context(), GFP_KERNEL, AUDIT_TIME_INJOFFSET, -- "sec=%lli nsec=%li", -- (long long)offset.tv_sec, offset.tv_nsec); +- switch (cmd) { +- case FUTEX_LOCK_PI: +- case FUTEX_LOCK_PI2: +- case FUTEX_UNLOCK_PI: +- case FUTEX_TRYLOCK_PI: +- case FUTEX_WAIT_REQUEUE_PI: +- case FUTEX_CMP_REQUEUE_PI: +- if (!futex_cmpxchg_enabled) +- return -ENOSYS; +- } +- +- switch (cmd) { +- case FUTEX_WAIT: +- val3 = FUTEX_BITSET_MATCH_ANY; +- fallthrough; +- case FUTEX_WAIT_BITSET: +- return futex_wait(uaddr, flags, val, timeout, val3); +- case FUTEX_WAKE: +- val3 = FUTEX_BITSET_MATCH_ANY; +- fallthrough; +- case FUTEX_WAKE_BITSET: +- return futex_wake(uaddr, flags, val, val3); +- case FUTEX_REQUEUE: +- return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0); +- case FUTEX_CMP_REQUEUE: +- return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0); +- case FUTEX_WAKE_OP: +- return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3); +- case FUTEX_LOCK_PI: +- flags |= FLAGS_CLOCKRT; +- fallthrough; +- case FUTEX_LOCK_PI2: +- return futex_lock_pi(uaddr, flags, timeout, 0); +- case FUTEX_UNLOCK_PI: +- return futex_unlock_pi(uaddr, flags); +- case FUTEX_TRYLOCK_PI: +- return futex_lock_pi(uaddr, flags, NULL, 1); +- case FUTEX_WAIT_REQUEUE_PI: +- val3 = FUTEX_BITSET_MATCH_ANY; +- return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3, +- uaddr2); +- case FUTEX_CMP_REQUEUE_PI: +- return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1); +- } +- return -ENOSYS; -} - --static void audit_log_ntp_val(const struct audit_ntp_data *ad, -- const char *op, enum audit_ntp_type type) +-static __always_inline bool futex_cmd_has_timeout(u32 cmd) -{ -- const struct audit_ntp_val *val = &ad->vals[type]; +- switch (cmd) { +- case FUTEX_WAIT: +- case FUTEX_LOCK_PI: +- case FUTEX_LOCK_PI2: +- case FUTEX_WAIT_BITSET: +- case FUTEX_WAIT_REQUEUE_PI: +- return true; +- } +- return false; +-} - -- if (val->newval == val->oldval) +-static __always_inline int +-futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t) +-{ +- if (!timespec64_valid(ts)) +- return -EINVAL; +- +- *t = timespec64_to_ktime(*ts); +- if (cmd == FUTEX_WAIT) +- *t = ktime_add_safe(ktime_get(), *t); +- else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME)) +- *t = timens_ktime_to_host(CLOCK_MONOTONIC, *t); +- return 0; +-} +- +-SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, +- const struct __kernel_timespec __user *, utime, +- u32 __user *, uaddr2, u32, val3) +-{ +- int ret, cmd = op & FUTEX_CMD_MASK; +- ktime_t t, *tp = NULL; +- struct timespec64 ts; +- +- if (utime && futex_cmd_has_timeout(cmd)) { +- if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG)))) +- return -EFAULT; +- if (get_timespec64(&ts, utime)) +- return -EFAULT; +- ret = futex_init_timeout(cmd, op, &ts, &t); +- if (ret) +- return ret; +- tp = &t; +- } +- +- return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3); +-} +- +-#ifdef CONFIG_COMPAT +-/* +- * Fetch a robust-list pointer. Bit 0 signals PI futexes: +- */ +-static inline int +-compat_fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry, +- compat_uptr_t __user *head, unsigned int *pi) +-{ +- if (get_user(*uentry, head)) +- return -EFAULT; +- +- *entry = compat_ptr((*uentry) & ~1); +- *pi = (unsigned int)(*uentry) & 1; +- +- return 0; +-} +- +-static void __user *futex_uaddr(struct robust_list __user *entry, +- compat_long_t futex_offset) +-{ +- compat_uptr_t base = ptr_to_compat(entry); +- void __user *uaddr = compat_ptr(base + futex_offset); +- +- return uaddr; +-} +- +-/* +- * Walk curr->robust_list (very carefully, it's a userspace list!) +- * and mark any locks found there dead, and notify any waiters. +- * +- * We silently return on any sign of list-walking problem. +- */ +-static void compat_exit_robust_list(struct task_struct *curr) +-{ +- struct compat_robust_list_head __user *head = curr->compat_robust_list; +- struct robust_list __user *entry, *next_entry, *pending; +- unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; +- unsigned int next_pi; +- compat_uptr_t uentry, next_uentry, upending; +- compat_long_t futex_offset; +- int rc; +- +- if (!futex_cmpxchg_enabled) - return; -+ struct audit_context *context = audit_context(); - -- audit_log(audit_context(), GFP_KERNEL, AUDIT_TIME_ADJNTPVAL, -- "op=%s old=%lli new=%lli", op, val->oldval, val->newval); -+ /* only set type if not already set by NTP */ -+ if (!context->type) -+ context->type = AUDIT_TIME_INJOFFSET; -+ memcpy(&context->time.tk_injoffset, &offset, sizeof(offset)); - } - - void __audit_ntp_log(const struct audit_ntp_data *ad) - { -- audit_log_ntp_val(ad, "offset", AUDIT_NTP_OFFSET); -- audit_log_ntp_val(ad, "freq", AUDIT_NTP_FREQ); -- audit_log_ntp_val(ad, "status", AUDIT_NTP_STATUS); -- audit_log_ntp_val(ad, "tai", AUDIT_NTP_TAI); -- audit_log_ntp_val(ad, "tick", AUDIT_NTP_TICK); -- audit_log_ntp_val(ad, "adjust", AUDIT_NTP_ADJUST); -+ struct audit_context *context = audit_context(); -+ int type; -+ -+ for (type = 0; type < AUDIT_NTP_NVALS; type++) -+ if (ad->vals[type].newval != ad->vals[type].oldval) { -+ /* unconditionally set type, overwriting TK */ -+ context->type = AUDIT_TIME_ADJNTPVAL; -+ memcpy(&context->time.ntp_data, ad, sizeof(*ad)); -+ break; -+ } - } - - void __audit_log_nfcfg(const char *name, u8 af, unsigned int nentries, -diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c -index 447def5405444..88014cd31b28a 100644 ---- a/kernel/bpf/arraymap.c -+++ b/kernel/bpf/arraymap.c -@@ -620,6 +620,11 @@ static int bpf_iter_init_array_map(void *priv_data, - seq_info->percpu_value_buf = value_buf; - } - -+ /* bpf_iter_attach_map() acquires a map uref, and the uref may be -+ * released before or in the middle of iterating map elements, so -+ * acquire an extra map uref for iterator. -+ */ -+ bpf_map_inc_with_uref(map); - seq_info->map = map; - return 0; - } -@@ -628,6 +633,7 @@ static void bpf_iter_fini_array_map(void *priv_data) - { - struct bpf_iter_seq_array_map_info *seq_info = priv_data; - -+ bpf_map_put_with_uref(seq_info->map); - kfree(seq_info->percpu_value_buf); - } - -diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c -index b305270b7a4bd..de4d741d99a3d 100644 ---- a/kernel/bpf/bpf_local_storage.c -+++ b/kernel/bpf/bpf_local_storage.c -@@ -506,11 +506,11 @@ void bpf_local_storage_map_free(struct bpf_local_storage_map *smap, - struct bpf_local_storage_elem, map_node))) { - if (busy_counter) { - migrate_disable(); -- __this_cpu_inc(*busy_counter); -+ this_cpu_inc(*busy_counter); - } - bpf_selem_unlink(selem); - if (busy_counter) { -- __this_cpu_dec(*busy_counter); -+ this_cpu_dec(*busy_counter); - migrate_enable(); - } - cond_resched_rcu(); -diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c -index ebfa8bc908923..6b7bfce239158 100644 ---- a/kernel/bpf/bpf_task_storage.c -+++ b/kernel/bpf/bpf_task_storage.c -@@ -25,20 +25,20 @@ static DEFINE_PER_CPU(int, bpf_task_storage_busy); - static void bpf_task_storage_lock(void) - { - migrate_disable(); -- __this_cpu_inc(bpf_task_storage_busy); -+ this_cpu_inc(bpf_task_storage_busy); - } - - static void bpf_task_storage_unlock(void) - { -- __this_cpu_dec(bpf_task_storage_busy); -+ this_cpu_dec(bpf_task_storage_busy); - migrate_enable(); - } - - static bool bpf_task_storage_trylock(void) - { - migrate_disable(); -- if (unlikely(__this_cpu_inc_return(bpf_task_storage_busy) != 1)) { -- __this_cpu_dec(bpf_task_storage_busy); -+ if (unlikely(this_cpu_inc_return(bpf_task_storage_busy) != 1)) { -+ this_cpu_dec(bpf_task_storage_busy); - migrate_enable(); - return false; - } -diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c -index dfe61df4f974d..7cb13b9f69a66 100644 ---- a/kernel/bpf/btf.c -+++ b/kernel/bpf/btf.c -@@ -2983,7 +2983,7 @@ static int btf_struct_resolve(struct btf_verifier_env *env, - if (v->next_member) { - const struct btf_type *last_member_type; - const struct btf_member *last_member; -- u16 last_member_type_id; -+ u32 last_member_type_id; - - last_member = btf_type_member(v->t) + v->next_member - 1; - last_member_type_id = last_member->type; -@@ -4332,8 +4332,7 @@ static struct btf *btf_parse(bpfptr_t btf_data, u32 btf_data_size, - log->len_total = log_size; - - /* log attributes have to be sane */ -- if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 || -- !log->level || !log->ubuf) { -+ if (!bpf_verifier_log_attr_valid(log)) { - err = -EINVAL; - goto errout; - } -@@ -4801,10 +4800,12 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, - /* check for PTR_TO_RDONLY_BUF_OR_NULL or PTR_TO_RDWR_BUF_OR_NULL */ - for (i = 0; i < prog->aux->ctx_arg_info_size; i++) { - const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i]; -+ u32 type, flag; - -- if (ctx_arg_info->offset == off && -- (ctx_arg_info->reg_type == PTR_TO_RDONLY_BUF_OR_NULL || -- ctx_arg_info->reg_type == PTR_TO_RDWR_BUF_OR_NULL)) { -+ type = base_type(ctx_arg_info->reg_type); -+ flag = type_flag(ctx_arg_info->reg_type); -+ if (ctx_arg_info->offset == off && type == PTR_TO_BUF && -+ (flag & PTR_MAYBE_NULL)) { - info->reg_type = ctx_arg_info->reg_type; - return true; - } -@@ -5440,6 +5441,8 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, - struct bpf_reg_state *regs, - bool ptr_to_mem_ok) - { -+ enum bpf_prog_type prog_type = env->prog->type == BPF_PROG_TYPE_EXT ? -+ env->prog->aux->dst_prog->type : env->prog->type; - struct bpf_verifier_log *log = &env->log; - const char *func_name, *ref_tname; - const struct btf_type *t, *ref_t; -@@ -5509,9 +5512,9 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, - if (reg->type == PTR_TO_BTF_ID) { - reg_btf = reg->btf; - reg_ref_id = reg->btf_id; -- } else if (reg2btf_ids[reg->type]) { -+ } else if (reg2btf_ids[base_type(reg->type)]) { - reg_btf = btf_vmlinux; -- reg_ref_id = *reg2btf_ids[reg->type]; -+ reg_ref_id = *reg2btf_ids[base_type(reg->type)]; - } else { - bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d is not a pointer to btf_id\n", - func_name, i, -@@ -5532,8 +5535,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, - reg_ref_tname); - return -EINVAL; - } -- } else if (btf_get_prog_ctx_type(log, btf, t, -- env->prog->type, i)) { -+ } else if (btf_get_prog_ctx_type(log, btf, t, prog_type, i)) { - /* If function expects ctx type in BTF check that caller - * is passing PTR_TO_CTX. - */ -@@ -5718,7 +5720,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, - return -EINVAL; - } - -- reg->type = PTR_TO_MEM_OR_NULL; -+ reg->type = PTR_TO_MEM | PTR_MAYBE_NULL; - reg->id = ++env->id_gen; - - continue; -@@ -6007,12 +6009,17 @@ bool btf_id_set_contains(const struct btf_id_set *set, u32 id) - return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL; - } - -+enum { -+ BTF_MODULE_F_LIVE = (1 << 0), -+}; +- +- /* +- * Fetch the list head (which was registered earlier, via +- * sys_set_robust_list()): +- */ +- if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi)) +- return; +- /* +- * Fetch the relative futex offset: +- */ +- if (get_user(futex_offset, &head->futex_offset)) +- return; +- /* +- * Fetch any possibly pending lock-add first, and handle it +- * if it exists: +- */ +- if (compat_fetch_robust_entry(&upending, &pending, +- &head->list_op_pending, &pip)) +- return; +- +- next_entry = NULL; /* avoid warning with gcc */ +- while (entry != (struct robust_list __user *) &head->list) { +- /* +- * Fetch the next entry in the list before calling +- * handle_futex_death: +- */ +- rc = compat_fetch_robust_entry(&next_uentry, &next_entry, +- (compat_uptr_t __user *)&entry->next, &next_pi); +- /* +- * A pending lock might already be on the list, so +- * dont process it twice: +- */ +- if (entry != pending) { +- void __user *uaddr = futex_uaddr(entry, futex_offset); +- +- if (handle_futex_death(uaddr, curr, pi, +- HANDLE_DEATH_LIST)) +- return; +- } +- if (rc) +- return; +- uentry = next_uentry; +- entry = next_entry; +- pi = next_pi; +- /* +- * Avoid excessively long or circular lists: +- */ +- if (!--limit) +- break; +- +- cond_resched(); +- } +- if (pending) { +- void __user *uaddr = futex_uaddr(pending, futex_offset); +- +- handle_futex_death(uaddr, curr, pip, HANDLE_DEATH_PENDING); +- } +-} +- +-COMPAT_SYSCALL_DEFINE2(set_robust_list, +- struct compat_robust_list_head __user *, head, +- compat_size_t, len) +-{ +- if (!futex_cmpxchg_enabled) +- return -ENOSYS; +- +- if (unlikely(len != sizeof(*head))) +- return -EINVAL; +- +- current->compat_robust_list = head; +- +- return 0; +-} +- +-COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, +- compat_uptr_t __user *, head_ptr, +- compat_size_t __user *, len_ptr) +-{ +- struct compat_robust_list_head __user *head; +- unsigned long ret; +- struct task_struct *p; +- +- if (!futex_cmpxchg_enabled) +- return -ENOSYS; +- +- rcu_read_lock(); +- +- ret = -ESRCH; +- if (!pid) +- p = current; +- else { +- p = find_task_by_vpid(pid); +- if (!p) +- goto err_unlock; +- } +- +- ret = -EPERM; +- if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) +- goto err_unlock; +- +- head = p->compat_robust_list; +- rcu_read_unlock(); +- +- if (put_user(sizeof(*head), len_ptr)) +- return -EFAULT; +- return put_user(ptr_to_compat(head), head_ptr); +- +-err_unlock: +- rcu_read_unlock(); +- +- return ret; +-} +-#endif /* CONFIG_COMPAT */ +- +-#ifdef CONFIG_COMPAT_32BIT_TIME +-SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, +- const struct old_timespec32 __user *, utime, u32 __user *, uaddr2, +- u32, val3) +-{ +- int ret, cmd = op & FUTEX_CMD_MASK; +- ktime_t t, *tp = NULL; +- struct timespec64 ts; +- +- if (utime && futex_cmd_has_timeout(cmd)) { +- if (get_old_timespec32(&ts, utime)) +- return -EFAULT; +- ret = futex_init_timeout(cmd, op, &ts, &t); +- if (ret) +- return ret; +- tp = &t; +- } +- +- return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3); +-} +-#endif /* CONFIG_COMPAT_32BIT_TIME */ +- +-static void __init futex_detect_cmpxchg(void) +-{ +-#ifndef CONFIG_HAVE_FUTEX_CMPXCHG +- u32 curval; +- +- /* +- * This will fail and we want it. Some arch implementations do +- * runtime detection of the futex_atomic_cmpxchg_inatomic() +- * functionality. We want to know that before we call in any +- * of the complex code paths. Also we want to prevent +- * registration of robust lists in that case. NULL is +- * guaranteed to fault and we get -EFAULT on functional +- * implementation, the non-functional ones will return +- * -ENOSYS. +- */ +- if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT) +- futex_cmpxchg_enabled = 1; +-#endif +-} +- +-static int __init futex_init(void) +-{ +- unsigned int futex_shift; +- unsigned long i; +- +-#if CONFIG_BASE_SMALL +- futex_hashsize = 16; +-#else +- futex_hashsize = roundup_pow_of_two(256 * num_possible_cpus()); +-#endif +- +- futex_queues = alloc_large_system_hash("futex", sizeof(*futex_queues), +- futex_hashsize, 0, +- futex_hashsize < 256 ? HASH_SMALL : 0, +- &futex_shift, NULL, +- futex_hashsize, futex_hashsize); +- futex_hashsize = 1UL << futex_shift; +- +- futex_detect_cmpxchg(); +- +- for (i = 0; i < futex_hashsize; i++) { +- atomic_set(&futex_queues[i].waiters, 0); +- plist_head_init(&futex_queues[i].chain); +- spin_lock_init(&futex_queues[i].lock); +- } +- +- return 0; +-} +-core_initcall(futex_init); +diff --git a/kernel/futex/Makefile b/kernel/futex/Makefile +new file mode 100644 +index 0000000000000..b89ba3fba3437 +--- /dev/null ++++ b/kernel/futex/Makefile +@@ -0,0 +1,3 @@ ++# SPDX-License-Identifier: GPL-2.0 + - #ifdef CONFIG_DEBUG_INFO_BTF_MODULES - struct btf_module { - struct list_head list; - struct module *module; - struct btf *btf; - struct bin_attribute *sysfs_attr; -+ int flags; - }; - - static LIST_HEAD(btf_modules); -@@ -6038,7 +6045,8 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op, - int err = 0; - - if (mod->btf_data_size == 0 || -- (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING)) -+ (op != MODULE_STATE_COMING && op != MODULE_STATE_LIVE && -+ op != MODULE_STATE_GOING)) - goto out; - - switch (op) { -@@ -6095,6 +6103,17 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op, - btf_mod->sysfs_attr = attr; - } - -+ break; -+ case MODULE_STATE_LIVE: -+ mutex_lock(&btf_module_mutex); -+ list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) { -+ if (btf_mod->module != module) -+ continue; ++obj-y += core.o +diff --git a/kernel/futex/core.c b/kernel/futex/core.c +new file mode 100644 +index 0000000000000..764e73622b386 +--- /dev/null ++++ b/kernel/futex/core.c +@@ -0,0 +1,4280 @@ ++// SPDX-License-Identifier: GPL-2.0-or-later ++/* ++ * Fast Userspace Mutexes (which I call "Futexes!"). ++ * (C) Rusty Russell, IBM 2002 ++ * ++ * Generalized futexes, futex requeueing, misc fixes by Ingo Molnar ++ * (C) Copyright 2003 Red Hat Inc, All Rights Reserved ++ * ++ * Removed page pinning, fix privately mapped COW pages and other cleanups ++ * (C) Copyright 2003, 2004 Jamie Lokier ++ * ++ * Robust futex support started by Ingo Molnar ++ * (C) Copyright 2006 Red Hat Inc, All Rights Reserved ++ * Thanks to Thomas Gleixner for suggestions, analysis and fixes. ++ * ++ * PI-futex support started by Ingo Molnar and Thomas Gleixner ++ * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> ++ * Copyright (C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> ++ * ++ * PRIVATE futexes by Eric Dumazet ++ * Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com> ++ * ++ * Requeue-PI support by Darren Hart <dvhltc@us.ibm.com> ++ * Copyright (C) IBM Corporation, 2009 ++ * Thanks to Thomas Gleixner for conceptual design and careful reviews. ++ * ++ * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly ++ * enough at me, Linus for the original (flawed) idea, Matthew ++ * Kirkwood for proof-of-concept implementation. ++ * ++ * "The futexes are also cursed." ++ * "But they come in a choice of three flavours!" ++ */ ++#include <linux/compat.h> ++#include <linux/jhash.h> ++#include <linux/pagemap.h> ++#include <linux/syscalls.h> ++#include <linux/freezer.h> ++#include <linux/memblock.h> ++#include <linux/fault-inject.h> ++#include <linux/time_namespace.h> + -+ btf_mod->flags |= BTF_MODULE_F_LIVE; -+ break; -+ } -+ mutex_unlock(&btf_module_mutex); - break; - case MODULE_STATE_GOING: - mutex_lock(&btf_module_mutex); -@@ -6141,7 +6160,12 @@ struct module *btf_try_get_module(const struct btf *btf) - if (btf_mod->btf != btf) - continue; - -- if (try_module_get(btf_mod->module)) -+ /* We must only consider module whose __init routine has -+ * finished, hence we must check for BTF_MODULE_F_LIVE flag, -+ * which is set from the notifier callback for -+ * MODULE_STATE_LIVE. -+ */ -+ if ((btf_mod->flags & BTF_MODULE_F_LIVE) && try_module_get(btf_mod->module)) - res = btf_mod->module; - - break; -@@ -6208,7 +6232,7 @@ const struct bpf_func_proto bpf_btf_find_by_name_kind_proto = { - .func = bpf_btf_find_by_name_kind, - .gpl_only = false, - .ret_type = RET_INTEGER, -- .arg1_type = ARG_PTR_TO_MEM, -+ .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, - .arg2_type = ARG_CONST_SIZE, - .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_ANYTHING, -diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c -index 03145d45e3d5b..eb3e787a3a977 100644 ---- a/kernel/bpf/cgroup.c -+++ b/kernel/bpf/cgroup.c -@@ -667,6 +667,62 @@ static struct bpf_prog_list *find_detach_entry(struct list_head *progs, - return ERR_PTR(-ENOENT); - } - -+/** -+ * purge_effective_progs() - After compute_effective_progs fails to alloc new -+ * cgrp->bpf.inactive table we can recover by -+ * recomputing the array in place. ++#include <asm/futex.h> ++ ++#include "../locking/rtmutex_common.h" ++ ++/* ++ * READ this before attempting to hack on futexes! + * -+ * @cgrp: The cgroup which descendants to travers -+ * @prog: A program to detach or NULL -+ * @link: A link to detach or NULL -+ * @atype: Type of detach operation ++ * Basic futex operation and ordering guarantees ++ * ============================================= ++ * ++ * The waiter reads the futex value in user space and calls ++ * futex_wait(). This function computes the hash bucket and acquires ++ * the hash bucket lock. After that it reads the futex user space value ++ * again and verifies that the data has not changed. If it has not changed ++ * it enqueues itself into the hash bucket, releases the hash bucket lock ++ * and schedules. ++ * ++ * The waker side modifies the user space value of the futex and calls ++ * futex_wake(). This function computes the hash bucket and acquires the ++ * hash bucket lock. Then it looks for waiters on that futex in the hash ++ * bucket and wakes them. ++ * ++ * In futex wake up scenarios where no tasks are blocked on a futex, taking ++ * the hb spinlock can be avoided and simply return. In order for this ++ * optimization to work, ordering guarantees must exist so that the waiter ++ * being added to the list is acknowledged when the list is concurrently being ++ * checked by the waker, avoiding scenarios like the following: ++ * ++ * CPU 0 CPU 1 ++ * val = *futex; ++ * sys_futex(WAIT, futex, val); ++ * futex_wait(futex, val); ++ * uval = *futex; ++ * *futex = newval; ++ * sys_futex(WAKE, futex); ++ * futex_wake(futex); ++ * if (queue_empty()) ++ * return; ++ * if (uval == val) ++ * lock(hash_bucket(futex)); ++ * queue(); ++ * unlock(hash_bucket(futex)); ++ * schedule(); ++ * ++ * This would cause the waiter on CPU 0 to wait forever because it ++ * missed the transition of the user space value from val to newval ++ * and the waker did not find the waiter in the hash bucket queue. ++ * ++ * The correct serialization ensures that a waiter either observes ++ * the changed user space value before blocking or is woken by a ++ * concurrent waker: ++ * ++ * CPU 0 CPU 1 ++ * val = *futex; ++ * sys_futex(WAIT, futex, val); ++ * futex_wait(futex, val); ++ * ++ * waiters++; (a) ++ * smp_mb(); (A) <-- paired with -. ++ * | ++ * lock(hash_bucket(futex)); | ++ * | ++ * uval = *futex; | ++ * | *futex = newval; ++ * | sys_futex(WAKE, futex); ++ * | futex_wake(futex); ++ * | ++ * `--------> smp_mb(); (B) ++ * if (uval == val) ++ * queue(); ++ * unlock(hash_bucket(futex)); ++ * schedule(); if (waiters) ++ * lock(hash_bucket(futex)); ++ * else wake_waiters(futex); ++ * waiters--; (b) unlock(hash_bucket(futex)); ++ * ++ * Where (A) orders the waiters increment and the futex value read through ++ * atomic operations (see hb_waiters_inc) and where (B) orders the write ++ * to futex and the waiters read (see hb_waiters_pending()). ++ * ++ * This yields the following case (where X:=waiters, Y:=futex): ++ * ++ * X = Y = 0 ++ * ++ * w[X]=1 w[Y]=1 ++ * MB MB ++ * r[Y]=y r[X]=x ++ * ++ * Which guarantees that x==0 && y==0 is impossible; which translates back into ++ * the guarantee that we cannot both miss the futex variable change and the ++ * enqueue. ++ * ++ * Note that a new waiter is accounted for in (a) even when it is possible that ++ * the wait call can return error, in which case we backtrack from it in (b). ++ * Refer to the comment in queue_lock(). ++ * ++ * Similarly, in order to account for waiters being requeued on another ++ * address we always increment the waiters for the destination bucket before ++ * acquiring the lock. It then decrements them again after releasing it - ++ * the code that actually moves the futex(es) between hash buckets (requeue_futex) ++ * will do the additional required waiter count housekeeping. This is done for ++ * double_lock_hb() and double_unlock_hb(), respectively. + */ -+static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog, -+ struct bpf_cgroup_link *link, -+ enum cgroup_bpf_attach_type atype) -+{ -+ struct cgroup_subsys_state *css; -+ struct bpf_prog_array *progs; -+ struct bpf_prog_list *pl; -+ struct list_head *head; -+ struct cgroup *cg; -+ int pos; + -+ /* recompute effective prog array in place */ -+ css_for_each_descendant_pre(css, &cgrp->self) { -+ struct cgroup *desc = container_of(css, struct cgroup, self); ++#ifdef CONFIG_HAVE_FUTEX_CMPXCHG ++#define futex_cmpxchg_enabled 1 ++#else ++static int __read_mostly futex_cmpxchg_enabled; ++#endif + -+ if (percpu_ref_is_zero(&desc->bpf.refcnt)) -+ continue; ++/* ++ * Futex flags used to encode options to functions and preserve them across ++ * restarts. ++ */ ++#ifdef CONFIG_MMU ++# define FLAGS_SHARED 0x01 ++#else ++/* ++ * NOMMU does not have per process address space. Let the compiler optimize ++ * code away. ++ */ ++# define FLAGS_SHARED 0x00 ++#endif ++#define FLAGS_CLOCKRT 0x02 ++#define FLAGS_HAS_TIMEOUT 0x04 + -+ /* find position of link or prog in effective progs array */ -+ for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) { -+ if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI)) -+ continue; ++/* ++ * Priority Inheritance state: ++ */ ++struct futex_pi_state { ++ /* ++ * list of 'owned' pi_state instances - these have to be ++ * cleaned up in do_exit() if the task exits prematurely: ++ */ ++ struct list_head list; + -+ head = &cg->bpf.progs[atype]; -+ list_for_each_entry(pl, head, node) { -+ if (!prog_list_prog(pl)) -+ continue; -+ if (pl->prog == prog && pl->link == link) -+ goto found; -+ pos++; -+ } -+ } ++ /* ++ * The PI object: ++ */ ++ struct rt_mutex_base pi_mutex; + -+ /* no link or prog match, skip the cgroup of this layer */ -+ continue; -+found: -+ progs = rcu_dereference_protected( -+ desc->bpf.effective[atype], -+ lockdep_is_held(&cgroup_mutex)); ++ struct task_struct *owner; ++ refcount_t refcount; + -+ /* Remove the program from the array */ -+ WARN_ONCE(bpf_prog_array_delete_safe_at(progs, pos), -+ "Failed to purge a prog from array at index %d", pos); -+ } -+} ++ union futex_key key; ++} __randomize_layout; + - /** - * __cgroup_bpf_detach() - Detach the program or link from a cgroup, and - * propagate the change to descendants -@@ -686,7 +742,6 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, - struct bpf_prog_list *pl; - struct list_head *progs; - u32 flags; -- int err; - - atype = to_cgroup_bpf_attach_type(type); - if (atype < 0) -@@ -708,9 +763,12 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, - pl->prog = NULL; - pl->link = NULL; - -- err = update_effective_progs(cgrp, atype); -- if (err) -- goto cleanup; -+ if (update_effective_progs(cgrp, atype)) { -+ /* if update effective array failed replace the prog with a dummy prog*/ -+ pl->prog = old_prog; -+ pl->link = link; -+ purge_effective_progs(cgrp, old_prog, link, atype); -+ } - - /* now can actually delete it from this cgroup list */ - list_del(&pl->node); -@@ -722,12 +780,6 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, - bpf_prog_put(old_prog); - static_branch_dec(&cgroup_bpf_enabled_key[atype]); - return 0; -- --cleanup: -- /* restore back prog or link */ -- pl->prog = old_prog; -- pl->link = link; -- return err; - } - - /* Must be called with cgroup_mutex held to avoid races. */ -@@ -1753,7 +1805,7 @@ static const struct bpf_func_proto bpf_sysctl_set_new_value_proto = { - .gpl_only = false, - .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_CTX, -- .arg2_type = ARG_PTR_TO_MEM, -+ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, - .arg3_type = ARG_CONST_SIZE, - }; - -@@ -1773,6 +1825,8 @@ sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) - return &bpf_sysctl_get_new_value_proto; - case BPF_FUNC_sysctl_set_new_value: - return &bpf_sysctl_set_new_value_proto; -+ case BPF_FUNC_ktime_get_coarse_ns: -+ return &bpf_ktime_get_coarse_ns_proto; - default: - return cgroup_base_func_proto(func_id, prog); - } -diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c -index 6e3ae90ad107a..4ce500eac2ef2 100644 ---- a/kernel/bpf/core.c -+++ b/kernel/bpf/core.c -@@ -66,11 +66,13 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns - { - u8 *ptr = NULL; - -- if (k >= SKF_NET_OFF) -+ if (k >= SKF_NET_OFF) { - ptr = skb_network_header(skb) + k - SKF_NET_OFF; -- else if (k >= SKF_LL_OFF) -+ } else if (k >= SKF_LL_OFF) { -+ if (unlikely(!skb_mac_header_was_set(skb))) -+ return NULL; - ptr = skb_mac_header(skb) + k - SKF_LL_OFF; -- -+ } - if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb)) - return ptr; - -@@ -389,6 +391,13 @@ static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, s32 end_old, - i = end_new; - insn = prog->insnsi + end_old; - } -+ if (bpf_pseudo_func(insn)) { -+ ret = bpf_adj_delta_to_imm(insn, pos, end_old, -+ end_new, i, probe_pass); -+ if (ret) -+ return ret; -+ continue; -+ } - code = insn->code; - if ((BPF_CLASS(code) != BPF_JMP && - BPF_CLASS(code) != BPF_JMP32) || -@@ -1652,6 +1661,11 @@ out: - CONT; \ - LDX_MEM_##SIZEOP: \ - DST = *(SIZE *)(unsigned long) (SRC + insn->off); \ -+ CONT; \ -+ LDX_PROBE_MEM_##SIZEOP: \ -+ bpf_probe_read_kernel(&DST, sizeof(SIZE), \ -+ (const void *)(long) (SRC + insn->off)); \ -+ DST = *((SIZE *)&DST); \ - CONT; - - LDST(B, u8) -@@ -1659,15 +1673,6 @@ out: - LDST(W, u32) - LDST(DW, u64) - #undef LDST --#define LDX_PROBE(SIZEOP, SIZE) \ -- LDX_PROBE_MEM_##SIZEOP: \ -- bpf_probe_read_kernel(&DST, SIZE, (const void *)(long) (SRC + insn->off)); \ -- CONT; -- LDX_PROBE(B, 1) -- LDX_PROBE(H, 2) -- LDX_PROBE(W, 4) -- LDX_PROBE(DW, 8) --#undef LDX_PROBE - - #define ATOMIC_ALU_OP(BOP, KOP) \ - case BOP: \ -diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c -index 32471ba027086..e7f45a966e6b5 100644 ---- a/kernel/bpf/hashtab.c -+++ b/kernel/bpf/hashtab.c -@@ -161,17 +161,25 @@ static inline int htab_lock_bucket(const struct bpf_htab *htab, - unsigned long *pflags) - { - unsigned long flags; -+ bool use_raw_lock; - - hash = hash & HASHTAB_MAP_LOCK_MASK; - -- migrate_disable(); -+ use_raw_lock = htab_use_raw_lock(htab); -+ if (use_raw_lock) -+ preempt_disable(); -+ else -+ migrate_disable(); - if (unlikely(__this_cpu_inc_return(*(htab->map_locked[hash])) != 1)) { - __this_cpu_dec(*(htab->map_locked[hash])); -- migrate_enable(); -+ if (use_raw_lock) -+ preempt_enable(); -+ else -+ migrate_enable(); - return -EBUSY; - } - -- if (htab_use_raw_lock(htab)) -+ if (use_raw_lock) - raw_spin_lock_irqsave(&b->raw_lock, flags); - else - spin_lock_irqsave(&b->lock, flags); -@@ -184,13 +192,18 @@ static inline void htab_unlock_bucket(const struct bpf_htab *htab, - struct bucket *b, u32 hash, - unsigned long flags) - { -+ bool use_raw_lock = htab_use_raw_lock(htab); ++/** ++ * struct futex_q - The hashed futex queue entry, one per waiting task ++ * @list: priority-sorted list of tasks waiting on this futex ++ * @task: the task waiting on the futex ++ * @lock_ptr: the hash bucket lock ++ * @key: the key the futex is hashed on ++ * @pi_state: optional priority inheritance state ++ * @rt_waiter: rt_waiter storage for use with requeue_pi ++ * @requeue_pi_key: the requeue_pi target futex key ++ * @bitset: bitset for the optional bitmasked wakeup ++ * @requeue_state: State field for futex_requeue_pi() ++ * @requeue_wait: RCU wait for futex_requeue_pi() (RT only) ++ * ++ * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so ++ * we can wake only the relevant ones (hashed queues may be shared). ++ * ++ * A futex_q has a woken state, just like tasks have TASK_RUNNING. ++ * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. ++ * The order of wakeup is always to make the first condition true, then ++ * the second. ++ * ++ * PI futexes are typically woken before they are removed from the hash list via ++ * the rt_mutex code. See unqueue_me_pi(). ++ */ ++struct futex_q { ++ struct plist_node list; ++ ++ struct task_struct *task; ++ spinlock_t *lock_ptr; ++ union futex_key key; ++ struct futex_pi_state *pi_state; ++ struct rt_mutex_waiter *rt_waiter; ++ union futex_key *requeue_pi_key; ++ u32 bitset; ++ atomic_t requeue_state; ++#ifdef CONFIG_PREEMPT_RT ++ struct rcuwait requeue_wait; ++#endif ++} __randomize_layout; + - hash = hash & HASHTAB_MAP_LOCK_MASK; -- if (htab_use_raw_lock(htab)) -+ if (use_raw_lock) - raw_spin_unlock_irqrestore(&b->raw_lock, flags); - else - spin_unlock_irqrestore(&b->lock, flags); - __this_cpu_dec(*(htab->map_locked[hash])); -- migrate_enable(); -+ if (use_raw_lock) -+ preempt_enable(); -+ else -+ migrate_enable(); - } - - static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node); -@@ -291,12 +304,8 @@ static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key, - struct htab_elem *l; - - if (node) { -- u32 key_size = htab->map.key_size; -- - l = container_of(node, struct htab_elem, lru_node); -- memcpy(l->key, key, key_size); -- check_and_init_map_value(&htab->map, -- l->key + round_up(key_size, 8)); -+ memcpy(l->key, key, htab->map.key_size); - return l; - } - -@@ -1662,8 +1671,11 @@ again_nocopy: - /* do not grab the lock unless need it (bucket_cnt > 0). */ - if (locked) { - ret = htab_lock_bucket(htab, b, batch, &flags); -- if (ret) -- goto next_batch; -+ if (ret) { -+ rcu_read_unlock(); -+ bpf_enable_instrumentation(); -+ goto after_loop; -+ } - } - - bucket_cnt = 0; -@@ -2023,6 +2035,7 @@ static int bpf_iter_init_hash_map(void *priv_data, - seq_info->percpu_value_buf = value_buf; - } - -+ bpf_map_inc_with_uref(map); - seq_info->map = map; - seq_info->htab = container_of(map, struct bpf_htab, map); - return 0; -@@ -2032,6 +2045,7 @@ static void bpf_iter_fini_hash_map(void *priv_data) - { - struct bpf_iter_seq_hash_map_info *seq_info = priv_data; - -+ bpf_map_put_with_uref(seq_info->map); - kfree(seq_info->percpu_value_buf); - } - -diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c -index 9aabf84afd4b2..a711ffe238932 100644 ---- a/kernel/bpf/helpers.c -+++ b/kernel/bpf/helpers.c -@@ -530,7 +530,7 @@ const struct bpf_func_proto bpf_strtol_proto = { - .func = bpf_strtol, - .gpl_only = false, - .ret_type = RET_INTEGER, -- .arg1_type = ARG_PTR_TO_MEM, -+ .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, - .arg2_type = ARG_CONST_SIZE, - .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_LONG, -@@ -558,7 +558,7 @@ const struct bpf_func_proto bpf_strtoul_proto = { - .func = bpf_strtoul, - .gpl_only = false, - .ret_type = RET_INTEGER, -- .arg1_type = ARG_PTR_TO_MEM, -+ .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, - .arg2_type = ARG_CONST_SIZE, - .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_LONG, -@@ -630,7 +630,7 @@ const struct bpf_func_proto bpf_event_output_data_proto = { - .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_CONST_MAP_PTR, - .arg3_type = ARG_ANYTHING, -- .arg4_type = ARG_PTR_TO_MEM, -+ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, - .arg5_type = ARG_CONST_SIZE_OR_ZERO, - }; - -@@ -667,7 +667,7 @@ BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu) - const struct bpf_func_proto bpf_per_cpu_ptr_proto = { - .func = bpf_per_cpu_ptr, - .gpl_only = false, -- .ret_type = RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, -+ .ret_type = RET_PTR_TO_MEM_OR_BTF_ID | PTR_MAYBE_NULL | MEM_RDONLY, - .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, - .arg2_type = ARG_ANYTHING, - }; -@@ -680,7 +680,7 @@ BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr) - const struct bpf_func_proto bpf_this_cpu_ptr_proto = { - .func = bpf_this_cpu_ptr, - .gpl_only = false, -- .ret_type = RET_PTR_TO_MEM_OR_BTF_ID, -+ .ret_type = RET_PTR_TO_MEM_OR_BTF_ID | MEM_RDONLY, - .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, - }; - -@@ -1013,7 +1013,7 @@ const struct bpf_func_proto bpf_snprintf_proto = { - .arg1_type = ARG_PTR_TO_MEM_OR_NULL, - .arg2_type = ARG_CONST_SIZE_OR_ZERO, - .arg3_type = ARG_PTR_TO_CONST_STR, -- .arg4_type = ARG_PTR_TO_MEM_OR_NULL, -+ .arg4_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY, - .arg5_type = ARG_CONST_SIZE_OR_ZERO, - }; - -@@ -1367,8 +1367,6 @@ bpf_base_func_proto(enum bpf_func_id func_id) - return &bpf_ktime_get_ns_proto; - case BPF_FUNC_ktime_get_boot_ns: - return &bpf_ktime_get_boot_ns_proto; -- case BPF_FUNC_ktime_get_coarse_ns: -- return &bpf_ktime_get_coarse_ns_proto; - case BPF_FUNC_ringbuf_output: - return &bpf_ringbuf_output_proto; - case BPF_FUNC_ringbuf_reserve: -diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c -index 80da1db47c686..5a8d9f7467bf4 100644 ---- a/kernel/bpf/inode.c -+++ b/kernel/bpf/inode.c -@@ -648,12 +648,22 @@ static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param) - int opt; - - opt = fs_parse(fc, bpf_fs_parameters, param, &result); -- if (opt < 0) -+ if (opt < 0) { - /* We might like to report bad mount options here, but - * traditionally we've ignored all mount options, so we'd - * better continue to ignore non-existing options for bpf. - */ -- return opt == -ENOPARAM ? 0 : opt; -+ if (opt == -ENOPARAM) { -+ opt = vfs_parse_fs_param_source(fc, param); -+ if (opt != -ENOPARAM) -+ return opt; ++/* ++ * On PREEMPT_RT, the hash bucket lock is a 'sleeping' spinlock with an ++ * underlying rtmutex. The task which is about to be requeued could have ++ * just woken up (timeout, signal). After the wake up the task has to ++ * acquire hash bucket lock, which is held by the requeue code. As a task ++ * can only be blocked on _ONE_ rtmutex at a time, the proxy lock blocking ++ * and the hash bucket lock blocking would collide and corrupt state. ++ * ++ * On !PREEMPT_RT this is not a problem and everything could be serialized ++ * on hash bucket lock, but aside of having the benefit of common code, ++ * this allows to avoid doing the requeue when the task is already on the ++ * way out and taking the hash bucket lock of the original uaddr1 when the ++ * requeue has been completed. ++ * ++ * The following state transitions are valid: ++ * ++ * On the waiter side: ++ * Q_REQUEUE_PI_NONE -> Q_REQUEUE_PI_IGNORE ++ * Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_WAIT ++ * ++ * On the requeue side: ++ * Q_REQUEUE_PI_NONE -> Q_REQUEUE_PI_INPROGRESS ++ * Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_DONE/LOCKED ++ * Q_REQUEUE_PI_IN_PROGRESS -> Q_REQUEUE_PI_NONE (requeue failed) ++ * Q_REQUEUE_PI_WAIT -> Q_REQUEUE_PI_DONE/LOCKED ++ * Q_REQUEUE_PI_WAIT -> Q_REQUEUE_PI_IGNORE (requeue failed) ++ * ++ * The requeue side ignores a waiter with state Q_REQUEUE_PI_IGNORE as this ++ * signals that the waiter is already on the way out. It also means that ++ * the waiter is still on the 'wait' futex, i.e. uaddr1. ++ * ++ * The waiter side signals early wakeup to the requeue side either through ++ * setting state to Q_REQUEUE_PI_IGNORE or to Q_REQUEUE_PI_WAIT depending ++ * on the current state. In case of Q_REQUEUE_PI_IGNORE it can immediately ++ * proceed to take the hash bucket lock of uaddr1. If it set state to WAIT, ++ * which means the wakeup is interleaving with a requeue in progress it has ++ * to wait for the requeue side to change the state. Either to DONE/LOCKED ++ * or to IGNORE. DONE/LOCKED means the waiter q is now on the uaddr2 futex ++ * and either blocked (DONE) or has acquired it (LOCKED). IGNORE is set by ++ * the requeue side when the requeue attempt failed via deadlock detection ++ * and therefore the waiter q is still on the uaddr1 futex. ++ */ ++enum { ++ Q_REQUEUE_PI_NONE = 0, ++ Q_REQUEUE_PI_IGNORE, ++ Q_REQUEUE_PI_IN_PROGRESS, ++ Q_REQUEUE_PI_WAIT, ++ Q_REQUEUE_PI_DONE, ++ Q_REQUEUE_PI_LOCKED, ++}; + -+ return 0; -+ } ++static const struct futex_q futex_q_init = { ++ /* list gets initialized in queue_me()*/ ++ .key = FUTEX_KEY_INIT, ++ .bitset = FUTEX_BITSET_MATCH_ANY, ++ .requeue_state = ATOMIC_INIT(Q_REQUEUE_PI_NONE), ++}; + -+ if (opt < 0) -+ return opt; -+ } - - switch (opt) { - case OPT_MODE: -diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c -index 6a9542af4212a..b0fa190b09790 100644 ---- a/kernel/bpf/map_iter.c -+++ b/kernel/bpf/map_iter.c -@@ -174,9 +174,9 @@ static const struct bpf_iter_reg bpf_map_elem_reg_info = { - .ctx_arg_info_size = 2, - .ctx_arg_info = { - { offsetof(struct bpf_iter__bpf_map_elem, key), -- PTR_TO_RDONLY_BUF_OR_NULL }, -+ PTR_TO_BUF | PTR_MAYBE_NULL | MEM_RDONLY }, - { offsetof(struct bpf_iter__bpf_map_elem, value), -- PTR_TO_RDWR_BUF_OR_NULL }, -+ PTR_TO_BUF | PTR_MAYBE_NULL }, - }, - }; - -diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c -index 9e0c10c6892ad..710ba9de12ce4 100644 ---- a/kernel/bpf/ringbuf.c -+++ b/kernel/bpf/ringbuf.c -@@ -104,7 +104,7 @@ static struct bpf_ringbuf *bpf_ringbuf_area_alloc(size_t data_sz, int numa_node) - } - - rb = vmap(pages, nr_meta_pages + 2 * nr_data_pages, -- VM_ALLOC | VM_USERMAP, PAGE_KERNEL); -+ VM_MAP | VM_USERMAP, PAGE_KERNEL); - if (rb) { - kmemleak_not_leak(pages); - rb->pages = pages; -@@ -444,7 +444,7 @@ const struct bpf_func_proto bpf_ringbuf_output_proto = { - .func = bpf_ringbuf_output, - .ret_type = RET_INTEGER, - .arg1_type = ARG_CONST_MAP_PTR, -- .arg2_type = ARG_PTR_TO_MEM, -+ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, - .arg3_type = ARG_CONST_SIZE_OR_ZERO, - .arg4_type = ARG_ANYTHING, - }; -diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c -index 6e75bbee39f0b..7efae3af62017 100644 ---- a/kernel/bpf/stackmap.c -+++ b/kernel/bpf/stackmap.c -@@ -119,7 +119,6 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) - return ERR_PTR(-E2BIG); - - cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); -- cost += n_buckets * (value_size + sizeof(struct stack_map_bucket)); - smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr)); - if (!smap) - return ERR_PTR(-ENOMEM); -@@ -219,7 +218,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, - } - - static struct perf_callchain_entry * --get_callchain_entry_for_task(struct task_struct *task, u32 init_nr) -+get_callchain_entry_for_task(struct task_struct *task, u32 max_depth) - { - #ifdef CONFIG_STACKTRACE - struct perf_callchain_entry *entry; -@@ -230,9 +229,8 @@ get_callchain_entry_for_task(struct task_struct *task, u32 init_nr) - if (!entry) - return NULL; - -- entry->nr = init_nr + -- stack_trace_save_tsk(task, (unsigned long *)(entry->ip + init_nr), -- sysctl_perf_event_max_stack - init_nr, 0); -+ entry->nr = stack_trace_save_tsk(task, (unsigned long *)entry->ip, -+ max_depth, 0); - - /* stack_trace_save_tsk() works on unsigned long array, while - * perf_callchain_entry uses u64 array. For 32-bit systems, it is -@@ -244,7 +242,7 @@ get_callchain_entry_for_task(struct task_struct *task, u32 init_nr) - int i; - - /* copy data from the end to avoid using extra buffer */ -- for (i = entry->nr - 1; i >= (int)init_nr; i--) -+ for (i = entry->nr - 1; i >= 0; i--) - to[i] = (u64)(from[i]); - } - -@@ -261,27 +259,19 @@ static long __bpf_get_stackid(struct bpf_map *map, - { - struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); - struct stack_map_bucket *bucket, *new_bucket, *old_bucket; -- u32 max_depth = map->value_size / stack_map_data_size(map); -- /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */ -- u32 init_nr = sysctl_perf_event_max_stack - max_depth; - u32 skip = flags & BPF_F_SKIP_FIELD_MASK; - u32 hash, id, trace_nr, trace_len; - bool user = flags & BPF_F_USER_STACK; - u64 *ips; - bool hash_matches; - -- /* get_perf_callchain() guarantees that trace->nr >= init_nr -- * and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth -- */ -- trace_nr = trace->nr - init_nr; -- -- if (trace_nr <= skip) -+ if (trace->nr <= skip) - /* skipping more than usable stack trace */ - return -EFAULT; - -- trace_nr -= skip; -+ trace_nr = trace->nr - skip; - trace_len = trace_nr * sizeof(u64); -- ips = trace->ip + skip + init_nr; -+ ips = trace->ip + skip; - hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0); - id = hash & (smap->n_buckets - 1); - bucket = READ_ONCE(smap->buckets[id]); -@@ -338,8 +328,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, - u64, flags) - { - u32 max_depth = map->value_size / stack_map_data_size(map); -- /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */ -- u32 init_nr = sysctl_perf_event_max_stack - max_depth; -+ u32 skip = flags & BPF_F_SKIP_FIELD_MASK; - bool user = flags & BPF_F_USER_STACK; - struct perf_callchain_entry *trace; - bool kernel = !user; -@@ -348,8 +337,12 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, - BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID))) - return -EINVAL; - -- trace = get_perf_callchain(regs, init_nr, kernel, user, -- sysctl_perf_event_max_stack, false, false); -+ max_depth += skip; -+ if (max_depth > sysctl_perf_event_max_stack) -+ max_depth = sysctl_perf_event_max_stack; ++/* ++ * Hash buckets are shared by all the futex_keys that hash to the same ++ * location. Each key may have multiple futex_q structures, one for each task ++ * waiting on a futex. ++ */ ++struct futex_hash_bucket { ++ atomic_t waiters; ++ spinlock_t lock; ++ struct plist_head chain; ++} ____cacheline_aligned_in_smp; + -+ trace = get_perf_callchain(regs, 0, kernel, user, max_depth, -+ false, false); - - if (unlikely(!trace)) - /* couldn't fetch the stack trace */ -@@ -440,7 +433,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, - struct perf_callchain_entry *trace_in, - void *buf, u32 size, u64 flags) - { -- u32 init_nr, trace_nr, copy_len, elem_size, num_elem; -+ u32 trace_nr, copy_len, elem_size, num_elem, max_depth; - bool user_build_id = flags & BPF_F_USER_BUILD_ID; - u32 skip = flags & BPF_F_SKIP_FIELD_MASK; - bool user = flags & BPF_F_USER_STACK; -@@ -465,30 +458,28 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, - goto err_fault; - - num_elem = size / elem_size; -- if (sysctl_perf_event_max_stack < num_elem) -- init_nr = 0; -- else -- init_nr = sysctl_perf_event_max_stack - num_elem; -+ max_depth = num_elem + skip; -+ if (sysctl_perf_event_max_stack < max_depth) -+ max_depth = sysctl_perf_event_max_stack; - - if (trace_in) - trace = trace_in; - else if (kernel && task) -- trace = get_callchain_entry_for_task(task, init_nr); -+ trace = get_callchain_entry_for_task(task, max_depth); - else -- trace = get_perf_callchain(regs, init_nr, kernel, user, -- sysctl_perf_event_max_stack, -+ trace = get_perf_callchain(regs, 0, kernel, user, max_depth, - false, false); - if (unlikely(!trace)) - goto err_fault; - -- trace_nr = trace->nr - init_nr; -- if (trace_nr < skip) -+ if (trace->nr < skip) - goto err_fault; - -- trace_nr -= skip; -+ trace_nr = trace->nr - skip; - trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem; - copy_len = trace_nr * elem_size; -- ips = trace->ip + skip + init_nr; ++/* ++ * The base of the bucket array and its size are always used together ++ * (after initialization only in hash_futex()), so ensure that they ++ * reside in the same cacheline. ++ */ ++static struct { ++ struct futex_hash_bucket *queues; ++ unsigned long hashsize; ++} __futex_data __read_mostly __aligned(2*sizeof(long)); ++#define futex_queues (__futex_data.queues) ++#define futex_hashsize (__futex_data.hashsize) + -+ ips = trace->ip + skip; - if (user && user_build_id) - stack_map_get_build_id_offset(buf, ips, trace_nr, user); - else -@@ -525,13 +516,14 @@ BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf, - u32, size, u64, flags) - { - struct pt_regs *regs; -- long res; -+ long res = -EINVAL; - - if (!try_get_task_stack(task)) - return -EFAULT; - - regs = task_pt_regs(task); -- res = __bpf_get_stack(regs, task, NULL, buf, size, flags); -+ if (regs) -+ res = __bpf_get_stack(regs, task, NULL, buf, size, flags); - put_task_stack(task); - - return res; -diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c -index 1cad6979a0d0f..aea9852f1c225 100644 ---- a/kernel/bpf/syscall.c -+++ b/kernel/bpf/syscall.c -@@ -132,6 +132,21 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) - return map; - } - -+static void bpf_map_write_active_inc(struct bpf_map *map) ++ ++/* ++ * Fault injections for futexes. ++ */ ++#ifdef CONFIG_FAIL_FUTEX ++ ++static struct { ++ struct fault_attr attr; ++ ++ bool ignore_private; ++} fail_futex = { ++ .attr = FAULT_ATTR_INITIALIZER, ++ .ignore_private = false, ++}; ++ ++static int __init setup_fail_futex(char *str) +{ -+ atomic64_inc(&map->writecnt); ++ return setup_fault_attr(&fail_futex.attr, str); +} ++__setup("fail_futex=", setup_fail_futex); + -+static void bpf_map_write_active_dec(struct bpf_map *map) ++static bool should_fail_futex(bool fshared) +{ -+ atomic64_dec(&map->writecnt); ++ if (fail_futex.ignore_private && !fshared) ++ return false; ++ ++ return should_fail(&fail_futex.attr, 1); +} + -+bool bpf_map_write_active(const struct bpf_map *map) ++#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS ++ ++static int __init fail_futex_debugfs(void) +{ -+ return atomic64_read(&map->writecnt) != 0; ++ umode_t mode = S_IFREG | S_IRUSR | S_IWUSR; ++ struct dentry *dir; ++ ++ dir = fault_create_debugfs_attr("fail_futex", NULL, ++ &fail_futex.attr); ++ if (IS_ERR(dir)) ++ return PTR_ERR(dir); ++ ++ debugfs_create_bool("ignore-private", mode, dir, ++ &fail_futex.ignore_private); ++ return 0; +} + - static u32 bpf_map_value_size(const struct bpf_map *map) - { - if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || -@@ -596,11 +611,8 @@ static void bpf_map_mmap_open(struct vm_area_struct *vma) - { - struct bpf_map *map = vma->vm_file->private_data; - -- if (vma->vm_flags & VM_MAYWRITE) { -- mutex_lock(&map->freeze_mutex); -- map->writecnt++; -- mutex_unlock(&map->freeze_mutex); -- } -+ if (vma->vm_flags & VM_MAYWRITE) -+ bpf_map_write_active_inc(map); - } - - /* called for all unmapped memory region (including initial) */ -@@ -608,11 +620,8 @@ static void bpf_map_mmap_close(struct vm_area_struct *vma) - { - struct bpf_map *map = vma->vm_file->private_data; - -- if (vma->vm_flags & VM_MAYWRITE) { -- mutex_lock(&map->freeze_mutex); -- map->writecnt--; -- mutex_unlock(&map->freeze_mutex); -- } -+ if (vma->vm_flags & VM_MAYWRITE) -+ bpf_map_write_active_dec(map); - } - - static const struct vm_operations_struct bpf_map_default_vmops = { -@@ -663,7 +672,7 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma) - goto out; - - if (vma->vm_flags & VM_MAYWRITE) -- map->writecnt++; -+ bpf_map_write_active_inc(map); - out: - mutex_unlock(&map->freeze_mutex); - return err; -@@ -1122,6 +1131,7 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr) - map = __bpf_map_get(f); - if (IS_ERR(map)) - return PTR_ERR(map); -+ bpf_map_write_active_inc(map); - if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { - err = -EPERM; - goto err_put; -@@ -1157,6 +1167,7 @@ free_value: - free_key: - kvfree(key); - err_put: -+ bpf_map_write_active_dec(map); - fdput(f); - return err; - } -@@ -1179,6 +1190,7 @@ static int map_delete_elem(union bpf_attr *attr) - map = __bpf_map_get(f); - if (IS_ERR(map)) - return PTR_ERR(map); -+ bpf_map_write_active_inc(map); - if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { - err = -EPERM; - goto err_put; -@@ -1209,6 +1221,7 @@ static int map_delete_elem(union bpf_attr *attr) - out: - kvfree(key); - err_put: -+ bpf_map_write_active_dec(map); - fdput(f); - return err; - } -@@ -1324,6 +1337,7 @@ int generic_map_delete_batch(struct bpf_map *map, - maybe_wait_bpf_programs(map); - if (err) - break; -+ cond_resched(); - } - if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) - err = -EFAULT; -@@ -1381,6 +1395,7 @@ int generic_map_update_batch(struct bpf_map *map, - - if (err) - break; -+ cond_resched(); - } - - if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) -@@ -1478,6 +1493,7 @@ int generic_map_lookup_batch(struct bpf_map *map, - swap(prev_key, key); - retry = MAP_LOOKUP_RETRIES; - cp++; -+ cond_resched(); - } - - if (err == -EFAULT) -@@ -1516,6 +1532,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr) - map = __bpf_map_get(f); - if (IS_ERR(map)) - return PTR_ERR(map); -+ bpf_map_write_active_inc(map); - if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) || - !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { - err = -EPERM; -@@ -1580,6 +1597,7 @@ free_value: - free_key: - kvfree(key); - err_put: -+ bpf_map_write_active_dec(map); - fdput(f); - return err; - } -@@ -1607,8 +1625,7 @@ static int map_freeze(const union bpf_attr *attr) - } - - mutex_lock(&map->freeze_mutex); -- -- if (map->writecnt) { -+ if (bpf_map_write_active(map)) { - err = -EBUSY; - goto err_put; - } -@@ -1807,8 +1824,14 @@ static int bpf_prog_release(struct inode *inode, struct file *filp) - return 0; - } - -+struct bpf_prog_kstats { -+ u64 nsecs; -+ u64 cnt; -+ u64 misses; -+}; ++late_initcall(fail_futex_debugfs); + - static void bpf_prog_get_stats(const struct bpf_prog *prog, -- struct bpf_prog_stats *stats) -+ struct bpf_prog_kstats *stats) - { - u64 nsecs = 0, cnt = 0, misses = 0; - int cpu; -@@ -1821,9 +1844,9 @@ static void bpf_prog_get_stats(const struct bpf_prog *prog, - st = per_cpu_ptr(prog->stats, cpu); - do { - start = u64_stats_fetch_begin_irq(&st->syncp); -- tnsecs = st->nsecs; -- tcnt = st->cnt; -- tmisses = st->misses; -+ tnsecs = u64_stats_read(&st->nsecs); -+ tcnt = u64_stats_read(&st->cnt); -+ tmisses = u64_stats_read(&st->misses); - } while (u64_stats_fetch_retry_irq(&st->syncp, start)); - nsecs += tnsecs; - cnt += tcnt; -@@ -1839,7 +1862,7 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) - { - const struct bpf_prog *prog = filp->private_data; - char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; -- struct bpf_prog_stats stats; -+ struct bpf_prog_kstats stats; - - bpf_prog_get_stats(prog, &stats); - bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); -@@ -3578,7 +3601,7 @@ static int bpf_prog_get_info_by_fd(struct file *file, - struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); - struct bpf_prog_info info; - u32 info_len = attr->info.info_len; -- struct bpf_prog_stats stats; -+ struct bpf_prog_kstats stats; - char __user *uinsns; - u32 ulen; - int err; -@@ -4077,7 +4100,9 @@ static int bpf_task_fd_query(const union bpf_attr *attr, - if (attr->task_fd_query.flags != 0) - return -EINVAL; - -+ rcu_read_lock(); - task = get_pid_task(find_vpid(pid), PIDTYPE_PID); -+ rcu_read_unlock(); - if (!task) - return -ENOENT; - -@@ -4143,6 +4168,9 @@ static int bpf_map_do_batch(const union bpf_attr *attr, - union bpf_attr __user *uattr, - int cmd) - { -+ bool has_read = cmd == BPF_MAP_LOOKUP_BATCH || -+ cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH; -+ bool has_write = cmd != BPF_MAP_LOOKUP_BATCH; - struct bpf_map *map; - int err, ufd; - struct fd f; -@@ -4155,16 +4183,13 @@ static int bpf_map_do_batch(const union bpf_attr *attr, - map = __bpf_map_get(f); - if (IS_ERR(map)) - return PTR_ERR(map); -- -- if ((cmd == BPF_MAP_LOOKUP_BATCH || -- cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) && -- !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { -+ if (has_write) -+ bpf_map_write_active_inc(map); -+ if (has_read && !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { - err = -EPERM; - goto err_put; - } -- -- if (cmd != BPF_MAP_LOOKUP_BATCH && -- !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { -+ if (has_write && !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { - err = -EPERM; - goto err_put; - } -@@ -4177,8 +4202,9 @@ static int bpf_map_do_batch(const union bpf_attr *attr, - BPF_DO_BATCH(map->ops->map_update_batch); - else - BPF_DO_BATCH(map->ops->map_delete_batch); -- - err_put: -+ if (has_write) -+ bpf_map_write_active_dec(map); - fdput(f); - return err; - } -@@ -4729,7 +4755,7 @@ static const struct bpf_func_proto bpf_sys_bpf_proto = { - .gpl_only = false, - .ret_type = RET_INTEGER, - .arg1_type = ARG_ANYTHING, -- .arg2_type = ARG_PTR_TO_MEM, -+ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, - .arg3_type = ARG_CONST_SIZE, - }; - -@@ -4761,7 +4787,7 @@ syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) - { - switch (func_id) { - case BPF_FUNC_sys_bpf: -- return &bpf_sys_bpf_proto; -+ return !perfmon_capable() ? NULL : &bpf_sys_bpf_proto; - case BPF_FUNC_btf_find_by_name_kind: - return &bpf_btf_find_by_name_kind_proto; - case BPF_FUNC_sys_close: -diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c -index fe1e857324e66..4fa75791b45e2 100644 ---- a/kernel/bpf/trampoline.c -+++ b/kernel/bpf/trampoline.c -@@ -414,7 +414,7 @@ int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) - { - enum bpf_tramp_prog_type kind; - int err = 0; -- int cnt; -+ int cnt = 0, i; - - kind = bpf_attach_type_to_tramp(prog); - mutex_lock(&tr->mutex); -@@ -425,7 +425,10 @@ int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) - err = -EBUSY; - goto out; - } -- cnt = tr->progs_cnt[BPF_TRAMP_FENTRY] + tr->progs_cnt[BPF_TRAMP_FEXIT]; ++#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */ ++ ++#else ++static inline bool should_fail_futex(bool fshared) ++{ ++ return false; ++} ++#endif /* CONFIG_FAIL_FUTEX */ ++ ++#ifdef CONFIG_COMPAT ++static void compat_exit_robust_list(struct task_struct *curr); ++#endif ++ ++/* ++ * Reflects a new waiter being added to the waitqueue. ++ */ ++static inline void hb_waiters_inc(struct futex_hash_bucket *hb) ++{ ++#ifdef CONFIG_SMP ++ atomic_inc(&hb->waiters); ++ /* ++ * Full barrier (A), see the ordering comment above. ++ */ ++ smp_mb__after_atomic(); ++#endif ++} ++ ++/* ++ * Reflects a waiter being removed from the waitqueue by wakeup ++ * paths. ++ */ ++static inline void hb_waiters_dec(struct futex_hash_bucket *hb) ++{ ++#ifdef CONFIG_SMP ++ atomic_dec(&hb->waiters); ++#endif ++} + -+ for (i = 0; i < BPF_TRAMP_MAX; i++) -+ cnt += tr->progs_cnt[i]; ++static inline int hb_waiters_pending(struct futex_hash_bucket *hb) ++{ ++#ifdef CONFIG_SMP ++ /* ++ * Full barrier (B), see the ordering comment above. ++ */ ++ smp_mb(); ++ return atomic_read(&hb->waiters); ++#else ++ return 1; ++#endif ++} + - if (kind == BPF_TRAMP_REPLACE) { - /* Cannot attach extension if fentry/fexit are in use. */ - if (cnt) { -@@ -503,16 +506,19 @@ out: - - void bpf_trampoline_put(struct bpf_trampoline *tr) - { -+ int i; ++/** ++ * hash_futex - Return the hash bucket in the global hash ++ * @key: Pointer to the futex key for which the hash is calculated ++ * ++ * We hash on the keys returned from get_futex_key (see below) and return the ++ * corresponding hash bucket in the global hash. ++ */ ++static struct futex_hash_bucket *hash_futex(union futex_key *key) ++{ ++ u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4, ++ key->both.offset); + - if (!tr) - return; - mutex_lock(&trampoline_mutex); - if (!refcount_dec_and_test(&tr->refcnt)) - goto out; - WARN_ON_ONCE(mutex_is_locked(&tr->mutex)); -- if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FENTRY]))) -- goto out; -- if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT]))) -- goto out; ++ return &futex_queues[hash & (futex_hashsize - 1)]; ++} + -+ for (i = 0; i < BPF_TRAMP_MAX; i++) -+ if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[i]))) ++ ++/** ++ * match_futex - Check whether two futex keys are equal ++ * @key1: Pointer to key1 ++ * @key2: Pointer to key2 ++ * ++ * Return 1 if two futex_keys are equal, 0 otherwise. ++ */ ++static inline int match_futex(union futex_key *key1, union futex_key *key2) ++{ ++ return (key1 && key2 ++ && key1->both.word == key2->both.word ++ && key1->both.ptr == key2->both.ptr ++ && key1->both.offset == key2->both.offset); ++} ++ ++enum futex_access { ++ FUTEX_READ, ++ FUTEX_WRITE ++}; ++ ++/** ++ * futex_setup_timer - set up the sleeping hrtimer. ++ * @time: ptr to the given timeout value ++ * @timeout: the hrtimer_sleeper structure to be set up ++ * @flags: futex flags ++ * @range_ns: optional range in ns ++ * ++ * Return: Initialized hrtimer_sleeper structure or NULL if no timeout ++ * value given ++ */ ++static inline struct hrtimer_sleeper * ++futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout, ++ int flags, u64 range_ns) ++{ ++ if (!time) ++ return NULL; ++ ++ hrtimer_init_sleeper_on_stack(timeout, (flags & FLAGS_CLOCKRT) ? ++ CLOCK_REALTIME : CLOCK_MONOTONIC, ++ HRTIMER_MODE_ABS); ++ /* ++ * If range_ns is 0, calling hrtimer_set_expires_range_ns() is ++ * effectively the same as calling hrtimer_set_expires(). ++ */ ++ hrtimer_set_expires_range_ns(&timeout->timer, *time, range_ns); ++ ++ return timeout; ++} ++ ++/* ++ * Generate a machine wide unique identifier for this inode. ++ * ++ * This relies on u64 not wrapping in the life-time of the machine; which with ++ * 1ns resolution means almost 585 years. ++ * ++ * This further relies on the fact that a well formed program will not unmap ++ * the file while it has a (shared) futex waiting on it. This mapping will have ++ * a file reference which pins the mount and inode. ++ * ++ * If for some reason an inode gets evicted and read back in again, it will get ++ * a new sequence number and will _NOT_ match, even though it is the exact same ++ * file. ++ * ++ * It is important that match_futex() will never have a false-positive, esp. ++ * for PI futexes that can mess up the state. The above argues that false-negatives ++ * are only possible for malformed programs. ++ */ ++static u64 get_inode_sequence_number(struct inode *inode) ++{ ++ static atomic64_t i_seq; ++ u64 old; ++ ++ /* Does the inode already have a sequence number? */ ++ old = atomic64_read(&inode->i_sequence); ++ if (likely(old)) ++ return old; ++ ++ for (;;) { ++ u64 new = atomic64_add_return(1, &i_seq); ++ if (WARN_ON_ONCE(!new)) ++ continue; ++ ++ old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new); ++ if (old) ++ return old; ++ return new; ++ } ++} ++ ++/** ++ * get_futex_key() - Get parameters which are the keys for a futex ++ * @uaddr: virtual address of the futex ++ * @fshared: false for a PROCESS_PRIVATE futex, true for PROCESS_SHARED ++ * @key: address where result is stored. ++ * @rw: mapping needs to be read/write (values: FUTEX_READ, ++ * FUTEX_WRITE) ++ * ++ * Return: a negative error code or 0 ++ * ++ * The key words are stored in @key on success. ++ * ++ * For shared mappings (when @fshared), the key is: ++ * ++ * ( inode->i_sequence, page->index, offset_within_page ) ++ * ++ * [ also see get_inode_sequence_number() ] ++ * ++ * For private mappings (or when !@fshared), the key is: ++ * ++ * ( current->mm, address, 0 ) ++ * ++ * This allows (cross process, where applicable) identification of the futex ++ * without keeping the page pinned for the duration of the FUTEX_WAIT. ++ * ++ * lock_page() might sleep, the caller should not hold a spinlock. ++ */ ++static int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key, ++ enum futex_access rw) ++{ ++ unsigned long address = (unsigned long)uaddr; ++ struct mm_struct *mm = current->mm; ++ struct page *page, *tail; ++ struct address_space *mapping; ++ int err, ro = 0; ++ ++ /* ++ * The futex address must be "naturally" aligned. ++ */ ++ key->both.offset = address % PAGE_SIZE; ++ if (unlikely((address % sizeof(u32)) != 0)) ++ return -EINVAL; ++ address -= key->both.offset; ++ ++ if (unlikely(!access_ok(uaddr, sizeof(u32)))) ++ return -EFAULT; ++ ++ if (unlikely(should_fail_futex(fshared))) ++ return -EFAULT; ++ ++ /* ++ * PROCESS_PRIVATE futexes are fast. ++ * As the mm cannot disappear under us and the 'key' only needs ++ * virtual address, we dont even have to find the underlying vma. ++ * Note : We do have to check 'uaddr' is a valid user address, ++ * but access_ok() should be faster than find_vma() ++ */ ++ if (!fshared) { ++ key->private.mm = mm; ++ key->private.address = address; ++ return 0; ++ } ++ ++again: ++ /* Ignore any VERIFY_READ mapping (futex common case) */ ++ if (unlikely(should_fail_futex(true))) ++ return -EFAULT; ++ ++ err = get_user_pages_fast(address, 1, FOLL_WRITE, &page); ++ /* ++ * If write access is not required (eg. FUTEX_WAIT), try ++ * and get read-only access. ++ */ ++ if (err == -EFAULT && rw == FUTEX_READ) { ++ err = get_user_pages_fast(address, 1, 0, &page); ++ ro = 1; ++ } ++ if (err < 0) ++ return err; ++ else ++ err = 0; ++ ++ /* ++ * The treatment of mapping from this point on is critical. The page ++ * lock protects many things but in this context the page lock ++ * stabilizes mapping, prevents inode freeing in the shared ++ * file-backed region case and guards against movement to swap cache. ++ * ++ * Strictly speaking the page lock is not needed in all cases being ++ * considered here and page lock forces unnecessarily serialization ++ * From this point on, mapping will be re-verified if necessary and ++ * page lock will be acquired only if it is unavoidable ++ * ++ * Mapping checks require the head page for any compound page so the ++ * head page and mapping is looked up now. For anonymous pages, it ++ * does not matter if the page splits in the future as the key is ++ * based on the address. For filesystem-backed pages, the tail is ++ * required as the index of the page determines the key. For ++ * base pages, there is no tail page and tail == page. ++ */ ++ tail = page; ++ page = compound_head(page); ++ mapping = READ_ONCE(page->mapping); ++ ++ /* ++ * If page->mapping is NULL, then it cannot be a PageAnon ++ * page; but it might be the ZERO_PAGE or in the gate area or ++ * in a special mapping (all cases which we are happy to fail); ++ * or it may have been a good file page when get_user_pages_fast ++ * found it, but truncated or holepunched or subjected to ++ * invalidate_complete_page2 before we got the page lock (also ++ * cases which we are happy to fail). And we hold a reference, ++ * so refcount care in invalidate_complete_page's remove_mapping ++ * prevents drop_caches from setting mapping to NULL beneath us. ++ * ++ * The case we do have to guard against is when memory pressure made ++ * shmem_writepage move it from filecache to swapcache beneath us: ++ * an unlikely race, but we do need to retry for page->mapping. ++ */ ++ if (unlikely(!mapping)) { ++ int shmem_swizzled; ++ ++ /* ++ * Page lock is required to identify which special case above ++ * applies. If this is really a shmem page then the page lock ++ * will prevent unexpected transitions. ++ */ ++ lock_page(page); ++ shmem_swizzled = PageSwapCache(page) || page->mapping; ++ unlock_page(page); ++ put_page(page); ++ ++ if (shmem_swizzled) ++ goto again; ++ ++ return -EFAULT; ++ } ++ ++ /* ++ * Private mappings are handled in a simple way. ++ * ++ * If the futex key is stored on an anonymous page, then the associated ++ * object is the mm which is implicitly pinned by the calling process. ++ * ++ * NOTE: When userspace waits on a MAP_SHARED mapping, even if ++ * it's a read-only handle, it's expected that futexes attach to ++ * the object not the particular process. ++ */ ++ if (PageAnon(page)) { ++ /* ++ * A RO anonymous page will never change and thus doesn't make ++ * sense for futex operations. ++ */ ++ if (unlikely(should_fail_futex(true)) || ro) { ++ err = -EFAULT; + goto out; ++ } + - /* This code will be executed even when the last bpf_tramp_image - * is alive. All progs are detached from the trampoline and the - * trampoline image is patched with jmp into epilogue to skip -@@ -541,11 +547,12 @@ static u64 notrace bpf_prog_start_time(void) - static void notrace inc_misses_counter(struct bpf_prog *prog) - { - struct bpf_prog_stats *stats; -+ unsigned int flags; - - stats = this_cpu_ptr(prog->stats); -- u64_stats_update_begin(&stats->syncp); -- stats->misses++; -- u64_stats_update_end(&stats->syncp); -+ flags = u64_stats_update_begin_irqsave(&stats->syncp); -+ u64_stats_inc(&stats->misses); -+ u64_stats_update_end_irqrestore(&stats->syncp, flags); - } - - /* The logic is similar to bpf_prog_run(), but with an explicit -@@ -585,11 +592,13 @@ static void notrace update_prog_stats(struct bpf_prog *prog, - * Hence check that 'start' is valid. - */ - start > NO_START_TIME) { -+ unsigned long flags; ++ key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ ++ key->private.mm = mm; ++ key->private.address = address; + - stats = this_cpu_ptr(prog->stats); -- u64_stats_update_begin(&stats->syncp); -- stats->cnt++; -- stats->nsecs += sched_clock() - start; -- u64_stats_update_end(&stats->syncp); -+ flags = u64_stats_update_begin_irqsave(&stats->syncp); -+ u64_stats_inc(&stats->cnt); -+ u64_stats_add(&stats->nsecs, sched_clock() - start); -+ u64_stats_update_end_irqrestore(&stats->syncp, flags); - } - } - -diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c -index e76b559179054..c3a4158e838e7 100644 ---- a/kernel/bpf/verifier.c -+++ b/kernel/bpf/verifier.c -@@ -240,12 +240,6 @@ static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn) - insn->src_reg == BPF_PSEUDO_KFUNC_CALL; - } - --static bool bpf_pseudo_func(const struct bpf_insn *insn) --{ -- return insn->code == (BPF_LD | BPF_IMM | BPF_DW) && -- insn->src_reg == BPF_PSEUDO_FUNC; --} -- - struct bpf_call_arg_meta { - struct bpf_map *map_ptr; - bool raw_mode; -@@ -445,18 +439,6 @@ static bool reg_type_not_null(enum bpf_reg_type type) - type == PTR_TO_SOCK_COMMON; - } - --static bool reg_type_may_be_null(enum bpf_reg_type type) --{ -- return type == PTR_TO_MAP_VALUE_OR_NULL || -- type == PTR_TO_SOCKET_OR_NULL || -- type == PTR_TO_SOCK_COMMON_OR_NULL || -- type == PTR_TO_TCP_SOCK_OR_NULL || -- type == PTR_TO_BTF_ID_OR_NULL || -- type == PTR_TO_MEM_OR_NULL || -- type == PTR_TO_RDONLY_BUF_OR_NULL || -- type == PTR_TO_RDWR_BUF_OR_NULL; --} -- - static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) - { - return reg->type == PTR_TO_MAP_VALUE && -@@ -465,12 +447,14 @@ static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) - - static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type) - { -- return type == PTR_TO_SOCKET || -- type == PTR_TO_SOCKET_OR_NULL || -- type == PTR_TO_TCP_SOCK || -- type == PTR_TO_TCP_SOCK_OR_NULL || -- type == PTR_TO_MEM || -- type == PTR_TO_MEM_OR_NULL; -+ return base_type(type) == PTR_TO_SOCKET || -+ base_type(type) == PTR_TO_TCP_SOCK || -+ base_type(type) == PTR_TO_MEM; ++ } else { ++ struct inode *inode; ++ ++ /* ++ * The associated futex object in this case is the inode and ++ * the page->mapping must be traversed. Ordinarily this should ++ * be stabilised under page lock but it's not strictly ++ * necessary in this case as we just want to pin the inode, not ++ * update the radix tree or anything like that. ++ * ++ * The RCU read lock is taken as the inode is finally freed ++ * under RCU. If the mapping still matches expectations then the ++ * mapping->host can be safely accessed as being a valid inode. ++ */ ++ rcu_read_lock(); ++ ++ if (READ_ONCE(page->mapping) != mapping) { ++ rcu_read_unlock(); ++ put_page(page); ++ ++ goto again; ++ } ++ ++ inode = READ_ONCE(mapping->host); ++ if (!inode) { ++ rcu_read_unlock(); ++ put_page(page); ++ ++ goto again; ++ } ++ ++ key->both.offset |= FUT_OFF_INODE; /* inode-based key */ ++ key->shared.i_seq = get_inode_sequence_number(inode); ++ key->shared.pgoff = page_to_pgoff(tail); ++ rcu_read_unlock(); ++ } ++ ++out: ++ put_page(page); ++ return err; +} + -+static bool type_is_rdonly_mem(u32 type) ++/** ++ * fault_in_user_writeable() - Fault in user address and verify RW access ++ * @uaddr: pointer to faulting user space address ++ * ++ * Slow path to fixup the fault we just took in the atomic write ++ * access to @uaddr. ++ * ++ * We have no generic implementation of a non-destructive write to the ++ * user address. We know that we faulted in the atomic pagefault ++ * disabled section so we can as well avoid the #PF overhead by ++ * calling get_user_pages() right away. ++ */ ++static int fault_in_user_writeable(u32 __user *uaddr) +{ -+ return type & MEM_RDONLY; - } - - static bool arg_type_may_be_refcounted(enum bpf_arg_type type) -@@ -478,14 +462,9 @@ static bool arg_type_may_be_refcounted(enum bpf_arg_type type) - return type == ARG_PTR_TO_SOCK_COMMON; - } - --static bool arg_type_may_be_null(enum bpf_arg_type type) -+static bool type_may_be_null(u32 type) - { -- return type == ARG_PTR_TO_MAP_VALUE_OR_NULL || -- type == ARG_PTR_TO_MEM_OR_NULL || -- type == ARG_PTR_TO_CTX_OR_NULL || -- type == ARG_PTR_TO_SOCKET_OR_NULL || -- type == ARG_PTR_TO_ALLOC_MEM_OR_NULL || -- type == ARG_PTR_TO_STACK_OR_NULL; -+ return type & PTR_MAYBE_NULL; - } - - /* Determine whether the function releases some resources allocated by another -@@ -545,39 +524,54 @@ static bool is_cmpxchg_insn(const struct bpf_insn *insn) - insn->imm == BPF_CMPXCHG; - } - --/* string representation of 'enum bpf_reg_type' */ --static const char * const reg_type_str[] = { -- [NOT_INIT] = "?", -- [SCALAR_VALUE] = "inv", -- [PTR_TO_CTX] = "ctx", -- [CONST_PTR_TO_MAP] = "map_ptr", -- [PTR_TO_MAP_VALUE] = "map_value", -- [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null", -- [PTR_TO_STACK] = "fp", -- [PTR_TO_PACKET] = "pkt", -- [PTR_TO_PACKET_META] = "pkt_meta", -- [PTR_TO_PACKET_END] = "pkt_end", -- [PTR_TO_FLOW_KEYS] = "flow_keys", -- [PTR_TO_SOCKET] = "sock", -- [PTR_TO_SOCKET_OR_NULL] = "sock_or_null", -- [PTR_TO_SOCK_COMMON] = "sock_common", -- [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null", -- [PTR_TO_TCP_SOCK] = "tcp_sock", -- [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null", -- [PTR_TO_TP_BUFFER] = "tp_buffer", -- [PTR_TO_XDP_SOCK] = "xdp_sock", -- [PTR_TO_BTF_ID] = "ptr_", -- [PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_", -- [PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_", -- [PTR_TO_MEM] = "mem", -- [PTR_TO_MEM_OR_NULL] = "mem_or_null", -- [PTR_TO_RDONLY_BUF] = "rdonly_buf", -- [PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null", -- [PTR_TO_RDWR_BUF] = "rdwr_buf", -- [PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null", -- [PTR_TO_FUNC] = "func", -- [PTR_TO_MAP_KEY] = "map_key", --}; -+/* string representation of 'enum bpf_reg_type' ++ struct mm_struct *mm = current->mm; ++ int ret; ++ ++ mmap_read_lock(mm); ++ ret = fixup_user_fault(mm, (unsigned long)uaddr, ++ FAULT_FLAG_WRITE, NULL); ++ mmap_read_unlock(mm); ++ ++ return ret < 0 ? ret : 0; ++} ++ ++/** ++ * futex_top_waiter() - Return the highest priority waiter on a futex ++ * @hb: the hash bucket the futex_q's reside in ++ * @key: the futex key (to distinguish it from other futex futex_q's) + * -+ * Note that reg_type_str() can not appear more than once in a single verbose() -+ * statement. ++ * Must be called with the hb lock held. + */ -+static const char *reg_type_str(struct bpf_verifier_env *env, -+ enum bpf_reg_type type) ++static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, ++ union futex_key *key) +{ -+ char postfix[16] = {0}, prefix[16] = {0}; -+ static const char * const str[] = { -+ [NOT_INIT] = "?", -+ [SCALAR_VALUE] = "inv", -+ [PTR_TO_CTX] = "ctx", -+ [CONST_PTR_TO_MAP] = "map_ptr", -+ [PTR_TO_MAP_VALUE] = "map_value", -+ [PTR_TO_STACK] = "fp", -+ [PTR_TO_PACKET] = "pkt", -+ [PTR_TO_PACKET_META] = "pkt_meta", -+ [PTR_TO_PACKET_END] = "pkt_end", -+ [PTR_TO_FLOW_KEYS] = "flow_keys", -+ [PTR_TO_SOCKET] = "sock", -+ [PTR_TO_SOCK_COMMON] = "sock_common", -+ [PTR_TO_TCP_SOCK] = "tcp_sock", -+ [PTR_TO_TP_BUFFER] = "tp_buffer", -+ [PTR_TO_XDP_SOCK] = "xdp_sock", -+ [PTR_TO_BTF_ID] = "ptr_", -+ [PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_", -+ [PTR_TO_MEM] = "mem", -+ [PTR_TO_BUF] = "buf", -+ [PTR_TO_FUNC] = "func", -+ [PTR_TO_MAP_KEY] = "map_key", -+ }; ++ struct futex_q *this; + -+ if (type & PTR_MAYBE_NULL) { -+ if (base_type(type) == PTR_TO_BTF_ID || -+ base_type(type) == PTR_TO_PERCPU_BTF_ID) -+ strncpy(postfix, "or_null_", 16); -+ else -+ strncpy(postfix, "_or_null", 16); ++ plist_for_each_entry(this, &hb->chain, list) { ++ if (match_futex(&this->key, key)) ++ return this; + } ++ return NULL; ++} + -+ if (type & MEM_RDONLY) -+ strncpy(prefix, "rdonly_", 16); ++static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr, ++ u32 uval, u32 newval) ++{ ++ int ret; + -+ snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s", -+ prefix, str[base_type(type)], postfix); -+ return env->type_str_buf; ++ pagefault_disable(); ++ ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval); ++ pagefault_enable(); ++ ++ return ret; +} - - static char slot_type_char[] = { - [STACK_INVALID] = '?', -@@ -628,7 +622,7 @@ static void print_verifier_state(struct bpf_verifier_env *env, - continue; - verbose(env, " R%d", i); - print_liveness(env, reg->live); -- verbose(env, "=%s", reg_type_str[t]); -+ verbose(env, "=%s", reg_type_str(env, t)); - if (t == SCALAR_VALUE && reg->precise) - verbose(env, "P"); - if ((t == SCALAR_VALUE || t == PTR_TO_STACK) && -@@ -636,9 +630,8 @@ static void print_verifier_state(struct bpf_verifier_env *env, - /* reg->off should be 0 for SCALAR_VALUE */ - verbose(env, "%lld", reg->var_off.value + reg->off); - } else { -- if (t == PTR_TO_BTF_ID || -- t == PTR_TO_BTF_ID_OR_NULL || -- t == PTR_TO_PERCPU_BTF_ID) -+ if (base_type(t) == PTR_TO_BTF_ID || -+ base_type(t) == PTR_TO_PERCPU_BTF_ID) - verbose(env, "%s", kernel_type_name(reg->btf, reg->btf_id)); - verbose(env, "(id=%d", reg->id); - if (reg_type_may_be_refcounted_or_null(t)) -@@ -647,10 +640,9 @@ static void print_verifier_state(struct bpf_verifier_env *env, - verbose(env, ",off=%d", reg->off); - if (type_is_pkt_pointer(t)) - verbose(env, ",r=%d", reg->range); -- else if (t == CONST_PTR_TO_MAP || -- t == PTR_TO_MAP_KEY || -- t == PTR_TO_MAP_VALUE || -- t == PTR_TO_MAP_VALUE_OR_NULL) -+ else if (base_type(t) == CONST_PTR_TO_MAP || -+ base_type(t) == PTR_TO_MAP_KEY || -+ base_type(t) == PTR_TO_MAP_VALUE) - verbose(env, ",ks=%d,vs=%d", - reg->map_ptr->key_size, - reg->map_ptr->value_size); -@@ -720,7 +712,7 @@ static void print_verifier_state(struct bpf_verifier_env *env, - if (state->stack[i].slot_type[0] == STACK_SPILL) { - reg = &state->stack[i].spilled_ptr; - t = reg->type; -- verbose(env, "=%s", reg_type_str[t]); -+ verbose(env, "=%s", reg_type_str(env, t)); - if (t == SCALAR_VALUE && reg->precise) - verbose(env, "P"); - if (t == SCALAR_VALUE && tnum_is_const(reg->var_off)) -@@ -859,6 +851,7 @@ static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx) - id = ++env->id_gen; - state->refs[new_ofs].id = id; - state->refs[new_ofs].insn_idx = insn_idx; -+ state->refs[new_ofs].callback_ref = state->in_callback_fn ? state->frameno : 0; - - return id; - } -@@ -871,6 +864,9 @@ static int release_reference_state(struct bpf_func_state *state, int ptr_id) - last_idx = state->acquired_refs - 1; - for (i = 0; i < state->acquired_refs; i++) { - if (state->refs[i].id == ptr_id) { -+ /* Cannot release caller references in callbacks */ -+ if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno) -+ return -EINVAL; - if (last_idx && i != last_idx) - memcpy(&state->refs[i], &state->refs[last_idx], - sizeof(*state->refs)); -@@ -1133,8 +1129,7 @@ static void mark_reg_known_zero(struct bpf_verifier_env *env, - - static void mark_ptr_not_null_reg(struct bpf_reg_state *reg) - { -- switch (reg->type) { -- case PTR_TO_MAP_VALUE_OR_NULL: { -+ if (base_type(reg->type) == PTR_TO_MAP_VALUE) { - const struct bpf_map *map = reg->map_ptr; - - if (map->inner_map_meta) { -@@ -1143,7 +1138,8 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg) - /* transfer reg's id which is unique for every map_lookup_elem - * as UID of the inner map. - */ -- reg->map_uid = reg->id; -+ if (map_value_has_timer(map->inner_map_meta)) -+ reg->map_uid = reg->id; - } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) { - reg->type = PTR_TO_XDP_SOCK; - } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP || -@@ -1152,32 +1148,10 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg) - } else { - reg->type = PTR_TO_MAP_VALUE; - } -- break; -- } -- case PTR_TO_SOCKET_OR_NULL: -- reg->type = PTR_TO_SOCKET; -- break; -- case PTR_TO_SOCK_COMMON_OR_NULL: -- reg->type = PTR_TO_SOCK_COMMON; -- break; -- case PTR_TO_TCP_SOCK_OR_NULL: -- reg->type = PTR_TO_TCP_SOCK; -- break; -- case PTR_TO_BTF_ID_OR_NULL: -- reg->type = PTR_TO_BTF_ID; -- break; -- case PTR_TO_MEM_OR_NULL: -- reg->type = PTR_TO_MEM; -- break; -- case PTR_TO_RDONLY_BUF_OR_NULL: -- reg->type = PTR_TO_RDONLY_BUF; -- break; -- case PTR_TO_RDWR_BUF_OR_NULL: -- reg->type = PTR_TO_RDWR_BUF; -- break; -- default: -- WARN_ONCE(1, "unknown nullable register type"); -+ return; - } + -+ reg->type &= ~PTR_MAYBE_NULL; - } - - static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg) -@@ -1357,22 +1331,43 @@ static void __reg_bound_offset(struct bpf_reg_state *reg) - reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off); - } - -+static void reg_bounds_sync(struct bpf_reg_state *reg) ++static int get_futex_value_locked(u32 *dest, u32 __user *from) +{ -+ /* We might have learned new bounds from the var_off. */ -+ __update_reg_bounds(reg); -+ /* We might have learned something about the sign bit. */ -+ __reg_deduce_bounds(reg); -+ /* We might have learned some bits from the bounds. */ -+ __reg_bound_offset(reg); -+ /* Intersecting with the old var_off might have improved our bounds -+ * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), -+ * then new var_off is (0; 0x7f...fc) which improves our umax. -+ */ -+ __update_reg_bounds(reg); ++ int ret; ++ ++ pagefault_disable(); ++ ret = __get_user(*dest, from); ++ pagefault_enable(); ++ ++ return ret ? -EFAULT : 0; +} + -+static bool __reg32_bound_s64(s32 a) ++ ++/* ++ * PI code: ++ */ ++static int refill_pi_state_cache(void) +{ -+ return a >= 0 && a <= S32_MAX; ++ struct futex_pi_state *pi_state; ++ ++ if (likely(current->pi_state_cache)) ++ return 0; ++ ++ pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL); ++ ++ if (!pi_state) ++ return -ENOMEM; ++ ++ INIT_LIST_HEAD(&pi_state->list); ++ /* pi_mutex gets initialized later */ ++ pi_state->owner = NULL; ++ refcount_set(&pi_state->refcount, 1); ++ pi_state->key = FUTEX_KEY_INIT; ++ ++ current->pi_state_cache = pi_state; ++ ++ return 0; +} + - static void __reg_assign_32_into_64(struct bpf_reg_state *reg) - { - reg->umin_value = reg->u32_min_value; - reg->umax_value = reg->u32_max_value; -- /* Attempt to pull 32-bit signed bounds into 64-bit bounds -- * but must be positive otherwise set to worse case bounds -- * and refine later from tnum. ++static struct futex_pi_state *alloc_pi_state(void) ++{ ++ struct futex_pi_state *pi_state = current->pi_state_cache; + -+ /* Attempt to pull 32-bit signed bounds into 64-bit bounds but must -+ * be positive otherwise set to worse case bounds and refine later -+ * from tnum. - */ -- if (reg->s32_min_value >= 0 && reg->s32_max_value >= 0) -- reg->smax_value = reg->s32_max_value; -- else -- reg->smax_value = U32_MAX; -- if (reg->s32_min_value >= 0) -+ if (__reg32_bound_s64(reg->s32_min_value) && -+ __reg32_bound_s64(reg->s32_max_value)) { - reg->smin_value = reg->s32_min_value; -- else -+ reg->smax_value = reg->s32_max_value; ++ WARN_ON(!pi_state); ++ current->pi_state_cache = NULL; ++ ++ return pi_state; ++} ++ ++static void pi_state_update_owner(struct futex_pi_state *pi_state, ++ struct task_struct *new_owner) ++{ ++ struct task_struct *old_owner = pi_state->owner; ++ ++ lockdep_assert_held(&pi_state->pi_mutex.wait_lock); ++ ++ if (old_owner) { ++ raw_spin_lock(&old_owner->pi_lock); ++ WARN_ON(list_empty(&pi_state->list)); ++ list_del_init(&pi_state->list); ++ raw_spin_unlock(&old_owner->pi_lock); ++ } ++ ++ if (new_owner) { ++ raw_spin_lock(&new_owner->pi_lock); ++ WARN_ON(!list_empty(&pi_state->list)); ++ list_add(&pi_state->list, &new_owner->pi_state_list); ++ pi_state->owner = new_owner; ++ raw_spin_unlock(&new_owner->pi_lock); ++ } ++} ++ ++static void get_pi_state(struct futex_pi_state *pi_state) ++{ ++ WARN_ON_ONCE(!refcount_inc_not_zero(&pi_state->refcount)); ++} ++ ++/* ++ * Drops a reference to the pi_state object and frees or caches it ++ * when the last reference is gone. ++ */ ++static void put_pi_state(struct futex_pi_state *pi_state) ++{ ++ if (!pi_state) ++ return; ++ ++ if (!refcount_dec_and_test(&pi_state->refcount)) ++ return; ++ ++ /* ++ * If pi_state->owner is NULL, the owner is most probably dying ++ * and has cleaned up the pi_state already ++ */ ++ if (pi_state->owner) { ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags); ++ pi_state_update_owner(pi_state, NULL); ++ rt_mutex_proxy_unlock(&pi_state->pi_mutex); ++ raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags); ++ } ++ ++ if (current->pi_state_cache) { ++ kfree(pi_state); + } else { - reg->smin_value = 0; -+ reg->smax_value = U32_MAX; ++ /* ++ * pi_state->list is already empty. ++ * clear pi_state->owner. ++ * refcount is at 0 - put it back to 1. ++ */ ++ pi_state->owner = NULL; ++ refcount_set(&pi_state->refcount, 1); ++ current->pi_state_cache = pi_state; + } - } - - static void __reg_combine_32_into_64(struct bpf_reg_state *reg) -@@ -1392,32 +1387,23 @@ static void __reg_combine_32_into_64(struct bpf_reg_state *reg) - * so they do not impact tnum bounds calculation. - */ - __mark_reg64_unbounded(reg); -- __update_reg_bounds(reg); - } -- -- /* Intersecting with the old var_off might have improved our bounds -- * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), -- * then new var_off is (0; 0x7f...fc) which improves our umax. -- */ -- __reg_deduce_bounds(reg); -- __reg_bound_offset(reg); -- __update_reg_bounds(reg); -+ reg_bounds_sync(reg); - } - - static bool __reg64_bound_s32(s64 a) - { -- return a > S32_MIN && a < S32_MAX; -+ return a >= S32_MIN && a <= S32_MAX; - } - - static bool __reg64_bound_u32(u64 a) - { -- return a > U32_MIN && a < U32_MAX; -+ return a >= U32_MIN && a <= U32_MAX; - } - - static void __reg_combine_64_into_32(struct bpf_reg_state *reg) - { - __mark_reg32_unbounded(reg); -- - if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) { - reg->s32_min_value = (s32)reg->smin_value; - reg->s32_max_value = (s32)reg->smax_value; -@@ -1426,14 +1412,7 @@ static void __reg_combine_64_into_32(struct bpf_reg_state *reg) - reg->u32_min_value = (u32)reg->umin_value; - reg->u32_max_value = (u32)reg->umax_value; - } -- -- /* Intersecting with the old var_off might have improved our bounds -- * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), -- * then new var_off is (0; 0x7f...fc) which improves our umax. -- */ -- __reg_deduce_bounds(reg); -- __reg_bound_offset(reg); -- __update_reg_bounds(reg); -+ reg_bounds_sync(reg); - } - - /* Mark a register as having a completely unknown (scalar) value. */ -@@ -1807,16 +1786,10 @@ static int add_subprog_and_kfunc(struct bpf_verifier_env *env) - return -EPERM; - } - -- if (bpf_pseudo_func(insn)) { -- ret = add_subprog(env, i + insn->imm + 1); -- if (ret >= 0) -- /* remember subprog */ -- insn[1].imm = ret; -- } else if (bpf_pseudo_call(insn)) { -+ if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn)) - ret = add_subprog(env, i + insn->imm + 1); -- } else { -+ else - ret = add_kfunc_call(env, insn->imm); -- } - - if (ret < 0) - return ret; -@@ -1899,7 +1872,7 @@ static int mark_reg_read(struct bpf_verifier_env *env, - break; - if (parent->live & REG_LIVE_DONE) { - verbose(env, "verifier BUG type %s var_off %lld off %d\n", -- reg_type_str[parent->type], -+ reg_type_str(env, parent->type), - parent->var_off.value, parent->off); - return -EFAULT; - } -@@ -2557,9 +2530,8 @@ static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi) - - static bool is_spillable_regtype(enum bpf_reg_type type) - { -- switch (type) { -+ switch (base_type(type)) { - case PTR_TO_MAP_VALUE: -- case PTR_TO_MAP_VALUE_OR_NULL: - case PTR_TO_STACK: - case PTR_TO_CTX: - case PTR_TO_PACKET: -@@ -2568,21 +2540,13 @@ static bool is_spillable_regtype(enum bpf_reg_type type) - case PTR_TO_FLOW_KEYS: - case CONST_PTR_TO_MAP: - case PTR_TO_SOCKET: -- case PTR_TO_SOCKET_OR_NULL: - case PTR_TO_SOCK_COMMON: -- case PTR_TO_SOCK_COMMON_OR_NULL: - case PTR_TO_TCP_SOCK: -- case PTR_TO_TCP_SOCK_OR_NULL: - case PTR_TO_XDP_SOCK: - case PTR_TO_BTF_ID: -- case PTR_TO_BTF_ID_OR_NULL: -- case PTR_TO_RDONLY_BUF: -- case PTR_TO_RDONLY_BUF_OR_NULL: -- case PTR_TO_RDWR_BUF: -- case PTR_TO_RDWR_BUF_OR_NULL: -+ case PTR_TO_BUF: - case PTR_TO_PERCPU_BTF_ID: - case PTR_TO_MEM: -- case PTR_TO_MEM_OR_NULL: - case PTR_TO_FUNC: - case PTR_TO_MAP_KEY: - return true; -@@ -3398,7 +3362,7 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, - */ - *reg_type = info.reg_type; - -- if (*reg_type == PTR_TO_BTF_ID || *reg_type == PTR_TO_BTF_ID_OR_NULL) { -+ if (base_type(*reg_type) == PTR_TO_BTF_ID) { - *btf = info.btf; - *btf_id = info.btf_id; - } else { -@@ -3466,7 +3430,7 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, - } - - verbose(env, "R%d invalid %s access off=%d size=%d\n", -- regno, reg_type_str[reg->type], off, size); -+ regno, reg_type_str(env, reg->type), off, size); - - return -EACCES; - } -@@ -3884,7 +3848,22 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size) - - static bool bpf_map_is_rdonly(const struct bpf_map *map) - { -- return (map->map_flags & BPF_F_RDONLY_PROG) && map->frozen; -+ /* A map is considered read-only if the following condition are true: -+ * -+ * 1) BPF program side cannot change any of the map content. The -+ * BPF_F_RDONLY_PROG flag is throughout the lifetime of a map -+ * and was set at map creation time. -+ * 2) The map value(s) have been initialized from user space by a -+ * loader and then "frozen", such that no new map update/delete -+ * operations from syscall side are possible for the rest of -+ * the map's lifetime from that point onwards. -+ * 3) Any parallel/pending map update/delete operations from syscall -+ * side have been completed. Only after that point, it's safe to -+ * assume that map value(s) are immutable. ++} ++ ++#ifdef CONFIG_FUTEX_PI ++ ++/* ++ * This task is holding PI mutexes at exit time => bad. ++ * Kernel cleans up PI-state, but userspace is likely hosed. ++ * (Robust-futex cleanup is separate and might save the day for userspace.) ++ */ ++static void exit_pi_state_list(struct task_struct *curr) ++{ ++ struct list_head *next, *head = &curr->pi_state_list; ++ struct futex_pi_state *pi_state; ++ struct futex_hash_bucket *hb; ++ union futex_key key = FUTEX_KEY_INIT; ++ ++ if (!futex_cmpxchg_enabled) ++ return; ++ /* ++ * We are a ZOMBIE and nobody can enqueue itself on ++ * pi_state_list anymore, but we have to be careful ++ * versus waiters unqueueing themselves: + */ -+ return (map->map_flags & BPF_F_RDONLY_PROG) && -+ READ_ONCE(map->frozen) && -+ !bpf_map_write_active(map); - } - - static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val) -@@ -4178,15 +4157,30 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn - mark_reg_unknown(env, regs, value_regno); - } - } -- } else if (reg->type == PTR_TO_MEM) { -+ } else if (base_type(reg->type) == PTR_TO_MEM) { -+ bool rdonly_mem = type_is_rdonly_mem(reg->type); ++ raw_spin_lock_irq(&curr->pi_lock); ++ while (!list_empty(head)) { ++ next = head->next; ++ pi_state = list_entry(next, struct futex_pi_state, list); ++ key = pi_state->key; ++ hb = hash_futex(&key); + -+ if (type_may_be_null(reg->type)) { -+ verbose(env, "R%d invalid mem access '%s'\n", regno, -+ reg_type_str(env, reg->type)); -+ return -EACCES; ++ /* ++ * We can race against put_pi_state() removing itself from the ++ * list (a waiter going away). put_pi_state() will first ++ * decrement the reference count and then modify the list, so ++ * its possible to see the list entry but fail this reference ++ * acquire. ++ * ++ * In that case; drop the locks to let put_pi_state() make ++ * progress and retry the loop. ++ */ ++ if (!refcount_inc_not_zero(&pi_state->refcount)) { ++ raw_spin_unlock_irq(&curr->pi_lock); ++ cpu_relax(); ++ raw_spin_lock_irq(&curr->pi_lock); ++ continue; + } ++ raw_spin_unlock_irq(&curr->pi_lock); + -+ if (t == BPF_WRITE && rdonly_mem) { -+ verbose(env, "R%d cannot write into %s\n", -+ regno, reg_type_str(env, reg->type)); -+ return -EACCES; ++ spin_lock(&hb->lock); ++ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); ++ raw_spin_lock(&curr->pi_lock); ++ /* ++ * We dropped the pi-lock, so re-check whether this ++ * task still owns the PI-state: ++ */ ++ if (head->next != next) { ++ /* retain curr->pi_lock for the loop invariant */ ++ raw_spin_unlock(&pi_state->pi_mutex.wait_lock); ++ spin_unlock(&hb->lock); ++ put_pi_state(pi_state); ++ continue; + } + - if (t == BPF_WRITE && value_regno >= 0 && - is_pointer_value(env, value_regno)) { - verbose(env, "R%d leaks addr into mem\n", value_regno); - return -EACCES; - } ++ WARN_ON(pi_state->owner != curr); ++ WARN_ON(list_empty(&pi_state->list)); ++ list_del_init(&pi_state->list); ++ pi_state->owner = NULL; + - err = check_mem_region_access(env, regno, off, size, - reg->mem_size, false); -- if (!err && t == BPF_READ && value_regno >= 0) -+ if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem)) - mark_reg_unknown(env, regs, value_regno); - } else if (reg->type == PTR_TO_CTX) { - enum bpf_reg_type reg_type = SCALAR_VALUE; -@@ -4216,7 +4210,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn - } else { - mark_reg_known_zero(env, regs, - value_regno); -- if (reg_type_may_be_null(reg_type)) -+ if (type_may_be_null(reg_type)) - regs[value_regno].id = ++env->id_gen; - /* A load of ctx field could have different - * actual load size with the one encoded in the -@@ -4224,8 +4218,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn - * a sub-register. - */ - regs[value_regno].subreg_def = DEF_NOT_SUBREG; -- if (reg_type == PTR_TO_BTF_ID || -- reg_type == PTR_TO_BTF_ID_OR_NULL) { -+ if (base_type(reg_type) == PTR_TO_BTF_ID) { - regs[value_regno].btf = btf; - regs[value_regno].btf_id = btf_id; - } -@@ -4278,7 +4271,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn - } else if (type_is_sk_pointer(reg->type)) { - if (t == BPF_WRITE) { - verbose(env, "R%d cannot write into %s\n", -- regno, reg_type_str[reg->type]); -+ regno, reg_type_str(env, reg->type)); - return -EACCES; - } - err = check_sock_access(env, insn_idx, regno, off, size, t); -@@ -4294,26 +4287,32 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn - } else if (reg->type == CONST_PTR_TO_MAP) { - err = check_ptr_to_map_access(env, regs, regno, off, size, t, - value_regno); -- } else if (reg->type == PTR_TO_RDONLY_BUF) { -- if (t == BPF_WRITE) { -- verbose(env, "R%d cannot write into %s\n", -- regno, reg_type_str[reg->type]); -- return -EACCES; -+ } else if (base_type(reg->type) == PTR_TO_BUF) { -+ bool rdonly_mem = type_is_rdonly_mem(reg->type); -+ const char *buf_info; -+ u32 *max_access; ++ raw_spin_unlock(&curr->pi_lock); ++ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); ++ spin_unlock(&hb->lock); + -+ if (rdonly_mem) { -+ if (t == BPF_WRITE) { -+ verbose(env, "R%d cannot write into %s\n", -+ regno, reg_type_str(env, reg->type)); -+ return -EACCES; -+ } -+ buf_info = "rdonly"; -+ max_access = &env->prog->aux->max_rdonly_access; -+ } else { -+ buf_info = "rdwr"; -+ max_access = &env->prog->aux->max_rdwr_access; - } ++ rt_mutex_futex_unlock(&pi_state->pi_mutex); ++ put_pi_state(pi_state); + - err = check_buffer_access(env, reg, regno, off, size, false, -- "rdonly", -- &env->prog->aux->max_rdonly_access); -- if (!err && value_regno >= 0) -- mark_reg_unknown(env, regs, value_regno); -- } else if (reg->type == PTR_TO_RDWR_BUF) { -- err = check_buffer_access(env, reg, regno, off, size, false, -- "rdwr", -- &env->prog->aux->max_rdwr_access); -- if (!err && t == BPF_READ && value_regno >= 0) -+ buf_info, max_access); ++ raw_spin_lock_irq(&curr->pi_lock); ++ } ++ raw_spin_unlock_irq(&curr->pi_lock); ++} ++#else ++static inline void exit_pi_state_list(struct task_struct *curr) { } ++#endif + -+ if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ)) - mark_reg_unknown(env, regs, value_regno); - } else { - verbose(env, "R%d invalid mem access '%s'\n", regno, -- reg_type_str[reg->type]); -+ reg_type_str(env, reg->type)); - return -EACCES; - } - -@@ -4364,9 +4363,16 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i - - if (insn->imm == BPF_CMPXCHG) { - /* Check comparison of R0 with memory location */ -- err = check_reg_arg(env, BPF_REG_0, SRC_OP); -+ const u32 aux_reg = BPF_REG_0; ++/* ++ * We need to check the following states: ++ * ++ * Waiter | pi_state | pi->owner | uTID | uODIED | ? ++ * ++ * [1] NULL | --- | --- | 0 | 0/1 | Valid ++ * [2] NULL | --- | --- | >0 | 0/1 | Valid ++ * ++ * [3] Found | NULL | -- | Any | 0/1 | Invalid ++ * ++ * [4] Found | Found | NULL | 0 | 1 | Valid ++ * [5] Found | Found | NULL | >0 | 1 | Invalid ++ * ++ * [6] Found | Found | task | 0 | 1 | Valid ++ * ++ * [7] Found | Found | NULL | Any | 0 | Invalid ++ * ++ * [8] Found | Found | task | ==taskTID | 0/1 | Valid ++ * [9] Found | Found | task | 0 | 0 | Invalid ++ * [10] Found | Found | task | !=taskTID | 0/1 | Invalid ++ * ++ * [1] Indicates that the kernel can acquire the futex atomically. We ++ * came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit. ++ * ++ * [2] Valid, if TID does not belong to a kernel thread. If no matching ++ * thread is found then it indicates that the owner TID has died. ++ * ++ * [3] Invalid. The waiter is queued on a non PI futex ++ * ++ * [4] Valid state after exit_robust_list(), which sets the user space ++ * value to FUTEX_WAITERS | FUTEX_OWNER_DIED. ++ * ++ * [5] The user space value got manipulated between exit_robust_list() ++ * and exit_pi_state_list() ++ * ++ * [6] Valid state after exit_pi_state_list() which sets the new owner in ++ * the pi_state but cannot access the user space value. ++ * ++ * [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set. ++ * ++ * [8] Owner and user space value match ++ * ++ * [9] There is no transient state which sets the user space TID to 0 ++ * except exit_robust_list(), but this is indicated by the ++ * FUTEX_OWNER_DIED bit. See [4] ++ * ++ * [10] There is no transient state which leaves owner and user space ++ * TID out of sync. Except one error case where the kernel is denied ++ * write access to the user address, see fixup_pi_state_owner(). ++ * ++ * ++ * Serialization and lifetime rules: ++ * ++ * hb->lock: ++ * ++ * hb -> futex_q, relation ++ * futex_q -> pi_state, relation ++ * ++ * (cannot be raw because hb can contain arbitrary amount ++ * of futex_q's) ++ * ++ * pi_mutex->wait_lock: ++ * ++ * {uval, pi_state} ++ * ++ * (and pi_mutex 'obviously') ++ * ++ * p->pi_lock: ++ * ++ * p->pi_state_list -> pi_state->list, relation ++ * pi_mutex->owner -> pi_state->owner, relation ++ * ++ * pi_state->refcount: ++ * ++ * pi_state lifetime ++ * ++ * ++ * Lock order: ++ * ++ * hb->lock ++ * pi_mutex->wait_lock ++ * p->pi_lock ++ * ++ */ + -+ err = check_reg_arg(env, aux_reg, SRC_OP); - if (err) - return err; ++/* ++ * Validate that the existing waiter has a pi_state and sanity check ++ * the pi_state against the user space value. If correct, attach to ++ * it. ++ */ ++static int attach_to_pi_state(u32 __user *uaddr, u32 uval, ++ struct futex_pi_state *pi_state, ++ struct futex_pi_state **ps) ++{ ++ pid_t pid = uval & FUTEX_TID_MASK; ++ u32 uval2; ++ int ret; + -+ if (is_pointer_value(env, aux_reg)) { -+ verbose(env, "R%d leaks addr into mem\n", aux_reg); -+ return -EACCES; -+ } - } - - if (is_pointer_value(env, insn->src_reg)) { -@@ -4380,7 +4386,7 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i - is_sk_reg(env, insn->dst_reg)) { - verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n", - insn->dst_reg, -- reg_type_str[reg_state(env, insn->dst_reg)->type]); -+ reg_type_str(env, reg_state(env, insn->dst_reg)->type)); - return -EACCES; - } - -@@ -4401,13 +4407,19 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i - load_reg = -1; - } - -- /* check whether we can read the memory */ -+ /* Check whether we can read the memory, with second call for fetch -+ * case to simulate the register fill. ++ /* ++ * Userspace might have messed up non-PI and PI futexes [3] + */ - err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, -- BPF_SIZE(insn->code), BPF_READ, load_reg, true); -+ BPF_SIZE(insn->code), BPF_READ, -1, true); -+ if (!err && load_reg >= 0) -+ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, -+ BPF_SIZE(insn->code), BPF_READ, load_reg, -+ true); - if (err) - return err; - -- /* check whether we can write into the same memory */ -+ /* Check whether we can write into the same memory. */ - err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, - BPF_SIZE(insn->code), BPF_WRITE, -1, true); - if (err) -@@ -4557,13 +4569,20 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, - struct bpf_call_arg_meta *meta) - { - struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; -+ const char *buf_info; -+ u32 *max_access; - -- switch (reg->type) { -+ switch (base_type(reg->type)) { - case PTR_TO_PACKET: - case PTR_TO_PACKET_META: - return check_packet_access(env, regno, reg->off, access_size, - zero_size_allowed); - case PTR_TO_MAP_KEY: -+ if (meta && meta->raw_mode) { -+ verbose(env, "R%d cannot write into %s\n", regno, -+ reg_type_str(env, reg->type)); -+ return -EACCES; -+ } - return check_mem_region_access(env, regno, reg->off, access_size, - reg->map_ptr->key_size, false); - case PTR_TO_MAP_VALUE: -@@ -4574,21 +4593,33 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, - return check_map_access(env, regno, reg->off, access_size, - zero_size_allowed); - case PTR_TO_MEM: -+ if (type_is_rdonly_mem(reg->type)) { -+ if (meta && meta->raw_mode) { -+ verbose(env, "R%d cannot write into %s\n", regno, -+ reg_type_str(env, reg->type)); -+ return -EACCES; -+ } ++ if (unlikely(!pi_state)) ++ return -EINVAL; ++ ++ /* ++ * We get here with hb->lock held, and having found a ++ * futex_top_waiter(). This means that futex_lock_pi() of said futex_q ++ * has dropped the hb->lock in between queue_me() and unqueue_me_pi(), ++ * which in turn means that futex_lock_pi() still has a reference on ++ * our pi_state. ++ * ++ * The waiter holding a reference on @pi_state also protects against ++ * the unlocked put_pi_state() in futex_unlock_pi(), futex_lock_pi() ++ * and futex_wait_requeue_pi() as it cannot go to 0 and consequently ++ * free pi_state before we can take a reference ourselves. ++ */ ++ WARN_ON(!refcount_read(&pi_state->refcount)); ++ ++ /* ++ * Now that we have a pi_state, we can acquire wait_lock ++ * and do the state validation. ++ */ ++ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); ++ ++ /* ++ * Since {uval, pi_state} is serialized by wait_lock, and our current ++ * uval was read without holding it, it can have changed. Verify it ++ * still is what we expect it to be, otherwise retry the entire ++ * operation. ++ */ ++ if (get_futex_value_locked(&uval2, uaddr)) ++ goto out_efault; ++ ++ if (uval != uval2) ++ goto out_eagain; ++ ++ /* ++ * Handle the owner died case: ++ */ ++ if (uval & FUTEX_OWNER_DIED) { ++ /* ++ * exit_pi_state_list sets owner to NULL and wakes the ++ * topmost waiter. The task which acquires the ++ * pi_state->rt_mutex will fixup owner. ++ */ ++ if (!pi_state->owner) { ++ /* ++ * No pi state owner, but the user space TID ++ * is not 0. Inconsistent state. [5] ++ */ ++ if (pid) ++ goto out_einval; ++ /* ++ * Take a ref on the state and return success. [4] ++ */ ++ goto out_attach; + } - return check_mem_region_access(env, regno, reg->off, - access_size, reg->mem_size, - zero_size_allowed); -- case PTR_TO_RDONLY_BUF: -- if (meta && meta->raw_mode) -- return -EACCES; -- return check_buffer_access(env, reg, regno, reg->off, -- access_size, zero_size_allowed, -- "rdonly", -- &env->prog->aux->max_rdonly_access); -- case PTR_TO_RDWR_BUF: -+ case PTR_TO_BUF: -+ if (type_is_rdonly_mem(reg->type)) { -+ if (meta && meta->raw_mode) { -+ verbose(env, "R%d cannot write into %s\n", regno, -+ reg_type_str(env, reg->type)); -+ return -EACCES; -+ } + -+ buf_info = "rdonly"; -+ max_access = &env->prog->aux->max_rdonly_access; -+ } else { -+ buf_info = "rdwr"; -+ max_access = &env->prog->aux->max_rdwr_access; -+ } - return check_buffer_access(env, reg, regno, reg->off, - access_size, zero_size_allowed, -- "rdwr", -- &env->prog->aux->max_rdwr_access); -+ buf_info, max_access); - case PTR_TO_STACK: - return check_stack_range_initialized( - env, -@@ -4600,9 +4631,9 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, - register_is_null(reg)) - return 0; - -- verbose(env, "R%d type=%s expected=%s\n", regno, -- reg_type_str[reg->type], -- reg_type_str[PTR_TO_STACK]); -+ verbose(env, "R%d type=%s ", regno, -+ reg_type_str(env, reg->type)); -+ verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK)); - return -EACCES; - } - } -@@ -4613,7 +4644,7 @@ int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, - if (register_is_null(reg)) - return 0; - -- if (reg_type_may_be_null(reg->type)) { -+ if (type_may_be_null(reg->type)) { - /* Assuming that the register contains a value check if the memory - * access is safe. Temporarily save and restore the register's state as - * the conversion shouldn't be visible to a caller. -@@ -4761,9 +4792,8 @@ static int process_timer_func(struct bpf_verifier_env *env, int regno, - - static bool arg_type_is_mem_ptr(enum bpf_arg_type type) - { -- return type == ARG_PTR_TO_MEM || -- type == ARG_PTR_TO_MEM_OR_NULL || -- type == ARG_PTR_TO_UNINIT_MEM; -+ return base_type(type) == ARG_PTR_TO_MEM || -+ base_type(type) == ARG_PTR_TO_UNINIT_MEM; - } - - static bool arg_type_is_mem_size(enum bpf_arg_type type) -@@ -4865,8 +4895,7 @@ static const struct bpf_reg_types mem_types = { - PTR_TO_MAP_KEY, - PTR_TO_MAP_VALUE, - PTR_TO_MEM, -- PTR_TO_RDONLY_BUF, -- PTR_TO_RDWR_BUF, -+ PTR_TO_BUF, - }, - }; - -@@ -4897,31 +4926,26 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { - [ARG_PTR_TO_MAP_KEY] = &map_key_value_types, - [ARG_PTR_TO_MAP_VALUE] = &map_key_value_types, - [ARG_PTR_TO_UNINIT_MAP_VALUE] = &map_key_value_types, -- [ARG_PTR_TO_MAP_VALUE_OR_NULL] = &map_key_value_types, - [ARG_CONST_SIZE] = &scalar_types, - [ARG_CONST_SIZE_OR_ZERO] = &scalar_types, - [ARG_CONST_ALLOC_SIZE_OR_ZERO] = &scalar_types, - [ARG_CONST_MAP_PTR] = &const_map_ptr_types, - [ARG_PTR_TO_CTX] = &context_types, -- [ARG_PTR_TO_CTX_OR_NULL] = &context_types, - [ARG_PTR_TO_SOCK_COMMON] = &sock_types, - #ifdef CONFIG_NET - [ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types, - #endif - [ARG_PTR_TO_SOCKET] = &fullsock_types, -- [ARG_PTR_TO_SOCKET_OR_NULL] = &fullsock_types, - [ARG_PTR_TO_BTF_ID] = &btf_ptr_types, - [ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types, - [ARG_PTR_TO_MEM] = &mem_types, -- [ARG_PTR_TO_MEM_OR_NULL] = &mem_types, - [ARG_PTR_TO_UNINIT_MEM] = &mem_types, - [ARG_PTR_TO_ALLOC_MEM] = &alloc_mem_types, -- [ARG_PTR_TO_ALLOC_MEM_OR_NULL] = &alloc_mem_types, - [ARG_PTR_TO_INT] = &int_ptr_types, - [ARG_PTR_TO_LONG] = &int_ptr_types, - [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types, - [ARG_PTR_TO_FUNC] = &func_ptr_types, -- [ARG_PTR_TO_STACK_OR_NULL] = &stack_ptr_types, -+ [ARG_PTR_TO_STACK] = &stack_ptr_types, - [ARG_PTR_TO_CONST_STR] = &const_str_ptr_types, - [ARG_PTR_TO_TIMER] = &timer_types, - }; -@@ -4935,12 +4959,27 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno, - const struct bpf_reg_types *compatible; - int i, j; - -- compatible = compatible_reg_types[arg_type]; -+ compatible = compatible_reg_types[base_type(arg_type)]; - if (!compatible) { - verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type); - return -EFAULT; - } - -+ /* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY, -+ * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY ++ /* ++ * If TID is 0, then either the dying owner has not ++ * yet executed exit_pi_state_list() or some waiter ++ * acquired the rtmutex in the pi state, but did not ++ * yet fixup the TID in user space. ++ * ++ * Take a ref on the state and return success. [6] ++ */ ++ if (!pid) ++ goto out_attach; ++ } else { ++ /* ++ * If the owner died bit is not set, then the pi_state ++ * must have an owner. [7] ++ */ ++ if (!pi_state->owner) ++ goto out_einval; ++ } ++ ++ /* ++ * Bail out if user space manipulated the futex value. If pi ++ * state exists then the owner TID must be the same as the ++ * user space TID. [9/10] ++ */ ++ if (pid != task_pid_vnr(pi_state->owner)) ++ goto out_einval; ++ ++out_attach: ++ get_pi_state(pi_state); ++ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); ++ *ps = pi_state; ++ return 0; ++ ++out_einval: ++ ret = -EINVAL; ++ goto out_error; ++ ++out_eagain: ++ ret = -EAGAIN; ++ goto out_error; ++ ++out_efault: ++ ret = -EFAULT; ++ goto out_error; ++ ++out_error: ++ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); ++ return ret; ++} ++ ++/** ++ * wait_for_owner_exiting - Block until the owner has exited ++ * @ret: owner's current futex lock status ++ * @exiting: Pointer to the exiting task ++ * ++ * Caller must hold a refcount on @exiting. ++ */ ++static void wait_for_owner_exiting(int ret, struct task_struct *exiting) ++{ ++ if (ret != -EBUSY) { ++ WARN_ON_ONCE(exiting); ++ return; ++ } ++ ++ if (WARN_ON_ONCE(ret == -EBUSY && !exiting)) ++ return; ++ ++ mutex_lock(&exiting->futex_exit_mutex); ++ /* ++ * No point in doing state checking here. If the waiter got here ++ * while the task was in exec()->exec_futex_release() then it can ++ * have any FUTEX_STATE_* value when the waiter has acquired the ++ * mutex. OK, if running, EXITING or DEAD if it reached exit() ++ * already. Highly unlikely and not a problem. Just one more round ++ * through the futex maze. ++ */ ++ mutex_unlock(&exiting->futex_exit_mutex); ++ ++ put_task_struct(exiting); ++} ++ ++static int handle_exit_race(u32 __user *uaddr, u32 uval, ++ struct task_struct *tsk) ++{ ++ u32 uval2; ++ ++ /* ++ * If the futex exit state is not yet FUTEX_STATE_DEAD, tell the ++ * caller that the alleged owner is busy. ++ */ ++ if (tsk && tsk->futex_state != FUTEX_STATE_DEAD) ++ return -EBUSY; ++ ++ /* ++ * Reread the user space value to handle the following situation: + * -+ * Same for MAYBE_NULL: ++ * CPU0 CPU1 + * -+ * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL, -+ * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL ++ * sys_exit() sys_futex() ++ * do_exit() futex_lock_pi() ++ * futex_lock_pi_atomic() ++ * exit_signals(tsk) No waiters: ++ * tsk->flags |= PF_EXITING; *uaddr == 0x00000PID ++ * mm_release(tsk) Set waiter bit ++ * exit_robust_list(tsk) { *uaddr = 0x80000PID; ++ * Set owner died attach_to_pi_owner() { ++ * *uaddr = 0xC0000000; tsk = get_task(PID); ++ * } if (!tsk->flags & PF_EXITING) { ++ * ... attach(); ++ * tsk->futex_state = } else { ++ * FUTEX_STATE_DEAD; if (tsk->futex_state != ++ * FUTEX_STATE_DEAD) ++ * return -EAGAIN; ++ * return -ESRCH; <--- FAIL ++ * } + * -+ * Therefore we fold these flags depending on the arg_type before comparison. ++ * Returning ESRCH unconditionally is wrong here because the ++ * user space value has been changed by the exiting task. ++ * ++ * The same logic applies to the case where the exiting task is ++ * already gone. + */ -+ if (arg_type & MEM_RDONLY) -+ type &= ~MEM_RDONLY; -+ if (arg_type & PTR_MAYBE_NULL) -+ type &= ~PTR_MAYBE_NULL; ++ if (get_futex_value_locked(&uval2, uaddr)) ++ return -EFAULT; + - for (i = 0; i < ARRAY_SIZE(compatible->types); i++) { - expected = compatible->types[i]; - if (expected == NOT_INIT) -@@ -4950,14 +4989,14 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno, - goto found; - } - -- verbose(env, "R%d type=%s expected=", regno, reg_type_str[type]); -+ verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, reg->type)); - for (j = 0; j + 1 < i; j++) -- verbose(env, "%s, ", reg_type_str[compatible->types[j]]); -- verbose(env, "%s\n", reg_type_str[compatible->types[j]]); -+ verbose(env, "%s, ", reg_type_str(env, compatible->types[j])); -+ verbose(env, "%s\n", reg_type_str(env, compatible->types[j])); - return -EACCES; - - found: -- if (type == PTR_TO_BTF_ID) { -+ if (reg->type == PTR_TO_BTF_ID) { - if (!arg_btf_id) { - if (!compatible->btf_id) { - verbose(env, "verifier internal error: missing arg compatible BTF ID\n"); -@@ -5016,15 +5055,14 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, - return -EACCES; - } - -- if (arg_type == ARG_PTR_TO_MAP_VALUE || -- arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE || -- arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) { -+ if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE || -+ base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) { - err = resolve_map_arg_type(env, meta, &arg_type); - if (err) - return err; - } - -- if (register_is_null(reg) && arg_type_may_be_null(arg_type)) -+ if (register_is_null(reg) && type_may_be_null(arg_type)) - /* A NULL register has a SCALAR_VALUE type, so skip - * type checking. - */ -@@ -5093,10 +5131,11 @@ skip_type_check: - err = check_helper_mem_access(env, regno, - meta->map_ptr->key_size, false, - NULL); -- } else if (arg_type == ARG_PTR_TO_MAP_VALUE || -- (arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL && -- !register_is_null(reg)) || -- arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) { -+ } else if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE || -+ base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) { -+ if (type_may_be_null(arg_type) && register_is_null(reg)) -+ return 0; ++ /* If the user space value has changed, try again. */ ++ if (uval2 != uval) ++ return -EAGAIN; + - /* bpf_map_xxx(..., map_ptr, ..., value) call: - * check [value, value + map->value_size) validity - */ -@@ -5750,6 +5789,7 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn - } - - if (insn->code == (BPF_JMP | BPF_CALL) && -+ insn->src_reg == 0 && - insn->imm == BPF_FUNC_timer_set_callback) { - struct bpf_verifier_state *async_cb; - -@@ -5969,10 +6009,17 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) - caller->regs[BPF_REG_0] = *r0; - } - -- /* Transfer references to the caller */ -- err = copy_reference_state(caller, callee); -- if (err) -- return err; -+ /* callback_fn frame should have released its own additions to parent's -+ * reference state at this point, or check_reference_leak would -+ * complain, hence it must be the same as the caller. There is no need -+ * to copy it back. ++ /* ++ * The exiting task did not have a robust list, the robust list was ++ * corrupted or the user space value in *uaddr is simply bogus. ++ * Give up and tell user space. + */ -+ if (!callee->in_callback_fn) { -+ /* Transfer references to the caller */ -+ err = copy_reference_state(caller, callee); -+ if (err) -+ return err; ++ return -ESRCH; ++} ++ ++static void __attach_to_pi_owner(struct task_struct *p, union futex_key *key, ++ struct futex_pi_state **ps) ++{ ++ /* ++ * No existing pi state. First waiter. [2] ++ * ++ * This creates pi_state, we have hb->lock held, this means nothing can ++ * observe this state, wait_lock is irrelevant. ++ */ ++ struct futex_pi_state *pi_state = alloc_pi_state(); ++ ++ /* ++ * Initialize the pi_mutex in locked state and make @p ++ * the owner of it: ++ */ ++ rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p); ++ ++ /* Store the key for possible exit cleanups: */ ++ pi_state->key = *key; ++ ++ WARN_ON(!list_empty(&pi_state->list)); ++ list_add(&pi_state->list, &p->pi_state_list); ++ /* ++ * Assignment without holding pi_state->pi_mutex.wait_lock is safe ++ * because there is no concurrency as the object is not published yet. ++ */ ++ pi_state->owner = p; ++ ++ *ps = pi_state; ++} ++/* ++ * Lookup the task for the TID provided from user space and attach to ++ * it after doing proper sanity checks. ++ */ ++static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key, ++ struct futex_pi_state **ps, ++ struct task_struct **exiting) ++{ ++ pid_t pid = uval & FUTEX_TID_MASK; ++ struct task_struct *p; ++ ++ /* ++ * We are the first waiter - try to look up the real owner and attach ++ * the new pi_state to it, but bail out when TID = 0 [1] ++ * ++ * The !pid check is paranoid. None of the call sites should end up ++ * with pid == 0, but better safe than sorry. Let the caller retry ++ */ ++ if (!pid) ++ return -EAGAIN; ++ p = find_get_task_by_vpid(pid); ++ if (!p) ++ return handle_exit_race(uaddr, uval, NULL); ++ ++ if (unlikely(p->flags & PF_KTHREAD)) { ++ put_task_struct(p); ++ return -EPERM; + } - - *insn_idx = callee->callsite + 1; - if (env->log.level & BPF_LOG_LEVEL) { -@@ -6005,9 +6052,7 @@ static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type, - ret_reg->s32_max_value = meta->msize_max_value; - ret_reg->smin_value = -MAX_ERRNO; - ret_reg->s32_min_value = -MAX_ERRNO; -- __reg_deduce_bounds(ret_reg); -- __reg_bound_offset(ret_reg); -- __update_reg_bounds(ret_reg); -+ reg_bounds_sync(ret_reg); - } - - static int -@@ -6062,8 +6107,7 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, - struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx]; - struct bpf_reg_state *regs = cur_regs(env), *reg; - struct bpf_map *map = meta->map_ptr; -- struct tnum range; -- u64 val; -+ u64 val, max; - int err; - - if (func_id != BPF_FUNC_tail_call) -@@ -6073,10 +6117,11 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, - return -EINVAL; - } - -- range = tnum_range(0, map->max_entries - 1); - reg = ®s[BPF_REG_3]; -+ val = reg->var_off.value; -+ max = map->max_entries; - -- if (!register_is_const(reg) || !tnum_in(range, reg->var_off)) { -+ if (!(register_is_const(reg) && val < max)) { - bpf_map_key_store(aux, BPF_MAP_KEY_POISON); - return 0; - } -@@ -6084,8 +6129,6 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, - err = mark_chain_precision(env, BPF_REG_3); - if (err) - return err; -- -- val = reg->var_off.value; - if (bpf_map_key_unseen(aux)) - bpf_map_key_store(aux, val); - else if (!bpf_map_key_poisoned(aux) && -@@ -6097,13 +6140,20 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, - static int check_reference_leak(struct bpf_verifier_env *env) - { - struct bpf_func_state *state = cur_func(env); -+ bool refs_lingering = false; - int i; - -+ if (state->frameno && !state->in_callback_fn) -+ return 0; + - for (i = 0; i < state->acquired_refs; i++) { -+ if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno) -+ continue; - verbose(env, "Unreleased reference id=%d alloc_insn=%d\n", - state->refs[i].id, state->refs[i].insn_idx); -+ refs_lingering = true; - } -- return state->acquired_refs ? -EINVAL : 0; -+ return refs_lingering ? -EINVAL : 0; - } - - static int check_bpf_snprintf_call(struct bpf_verifier_env *env, -@@ -6170,6 +6220,8 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn - int *insn_idx_p) - { - const struct bpf_func_proto *fn = NULL; -+ enum bpf_return_type ret_type; -+ enum bpf_type_flag ret_flag; - struct bpf_reg_state *regs; - struct bpf_call_arg_meta meta; - int insn_idx = *insn_idx_p; -@@ -6303,13 +6355,14 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn - regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG; - - /* update return register (already marked as written above) */ -- if (fn->ret_type == RET_INTEGER) { -+ ret_type = fn->ret_type; -+ ret_flag = type_flag(fn->ret_type); -+ if (ret_type == RET_INTEGER) { - /* sets type to SCALAR_VALUE */ - mark_reg_unknown(env, regs, BPF_REG_0); -- } else if (fn->ret_type == RET_VOID) { -+ } else if (ret_type == RET_VOID) { - regs[BPF_REG_0].type = NOT_INIT; -- } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL || -- fn->ret_type == RET_PTR_TO_MAP_VALUE) { -+ } else if (base_type(ret_type) == RET_PTR_TO_MAP_VALUE) { - /* There is no offset yet applied, variable or fixed */ - mark_reg_known_zero(env, regs, BPF_REG_0); - /* remember map_ptr, so that check_map_access() -@@ -6323,28 +6376,25 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn - } - regs[BPF_REG_0].map_ptr = meta.map_ptr; - regs[BPF_REG_0].map_uid = meta.map_uid; -- if (fn->ret_type == RET_PTR_TO_MAP_VALUE) { -- regs[BPF_REG_0].type = PTR_TO_MAP_VALUE; -- if (map_value_has_spin_lock(meta.map_ptr)) -- regs[BPF_REG_0].id = ++env->id_gen; -- } else { -- regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL; -+ regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag; -+ if (!type_may_be_null(ret_type) && -+ map_value_has_spin_lock(meta.map_ptr)) { -+ regs[BPF_REG_0].id = ++env->id_gen; - } -- } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) { -+ } else if (base_type(ret_type) == RET_PTR_TO_SOCKET) { - mark_reg_known_zero(env, regs, BPF_REG_0); -- regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL; -- } else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) { -+ regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag; -+ } else if (base_type(ret_type) == RET_PTR_TO_SOCK_COMMON) { - mark_reg_known_zero(env, regs, BPF_REG_0); -- regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL; -- } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) { -+ regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag; -+ } else if (base_type(ret_type) == RET_PTR_TO_TCP_SOCK) { - mark_reg_known_zero(env, regs, BPF_REG_0); -- regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL; -- } else if (fn->ret_type == RET_PTR_TO_ALLOC_MEM_OR_NULL) { -+ regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag; -+ } else if (base_type(ret_type) == RET_PTR_TO_ALLOC_MEM) { - mark_reg_known_zero(env, regs, BPF_REG_0); -- regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL; -+ regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag; - regs[BPF_REG_0].mem_size = meta.mem_size; -- } else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL || -- fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID) { -+ } else if (base_type(ret_type) == RET_PTR_TO_MEM_OR_BTF_ID) { - const struct btf_type *t; - - mark_reg_known_zero(env, regs, BPF_REG_0); -@@ -6362,29 +6412,30 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn - tname, PTR_ERR(ret)); - return -EINVAL; - } -- regs[BPF_REG_0].type = -- fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ? -- PTR_TO_MEM : PTR_TO_MEM_OR_NULL; -+ regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag; - regs[BPF_REG_0].mem_size = tsize; - } else { -- regs[BPF_REG_0].type = -- fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ? -- PTR_TO_BTF_ID : PTR_TO_BTF_ID_OR_NULL; -+ /* MEM_RDONLY may be carried from ret_flag, but it -+ * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise -+ * it will confuse the check of PTR_TO_BTF_ID in -+ * check_mem_access(). -+ */ -+ ret_flag &= ~MEM_RDONLY; ++ /* ++ * We need to look at the task state to figure out, whether the ++ * task is exiting. To protect against the change of the task state ++ * in futex_exit_release(), we do this protected by p->pi_lock: ++ */ ++ raw_spin_lock_irq(&p->pi_lock); ++ if (unlikely(p->futex_state != FUTEX_STATE_OK)) { ++ /* ++ * The task is on the way out. When the futex state is ++ * FUTEX_STATE_DEAD, we know that the task has finished ++ * the cleanup: ++ */ ++ int ret = handle_exit_race(uaddr, uval, p); + -+ regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag; - regs[BPF_REG_0].btf = meta.ret_btf; - regs[BPF_REG_0].btf_id = meta.ret_btf_id; - } -- } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL || -- fn->ret_type == RET_PTR_TO_BTF_ID) { -+ } else if (base_type(ret_type) == RET_PTR_TO_BTF_ID) { - int ret_btf_id; - - mark_reg_known_zero(env, regs, BPF_REG_0); -- regs[BPF_REG_0].type = fn->ret_type == RET_PTR_TO_BTF_ID ? -- PTR_TO_BTF_ID : -- PTR_TO_BTF_ID_OR_NULL; -+ regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag; - ret_btf_id = *fn->ret_btf_id; - if (ret_btf_id == 0) { -- verbose(env, "invalid return type %d of func %s#%d\n", -- fn->ret_type, func_id_name(func_id), func_id); -+ verbose(env, "invalid return type %u of func %s#%d\n", -+ base_type(ret_type), func_id_name(func_id), -+ func_id); - return -EINVAL; - } - /* current BPF helper definitions are only coming from -@@ -6393,12 +6444,12 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn - regs[BPF_REG_0].btf = btf_vmlinux; - regs[BPF_REG_0].btf_id = ret_btf_id; - } else { -- verbose(env, "unknown return type %d of func %s#%d\n", -- fn->ret_type, func_id_name(func_id), func_id); -+ verbose(env, "unknown return type %u of func %s#%d\n", -+ base_type(ret_type), func_id_name(func_id), func_id); - return -EINVAL; - } - -- if (reg_type_may_be_null(regs[BPF_REG_0].type)) -+ if (type_may_be_null(regs[BPF_REG_0].type)) - regs[BPF_REG_0].id = ++env->id_gen; - - if (is_ptr_cast_function(func_id)) { -@@ -6597,25 +6648,25 @@ static bool check_reg_sane_offset(struct bpf_verifier_env *env, - - if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) { - verbose(env, "math between %s pointer and %lld is not allowed\n", -- reg_type_str[type], val); -+ reg_type_str(env, type), val); - return false; - } - - if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) { - verbose(env, "%s pointer offset %d is not allowed\n", -- reg_type_str[type], reg->off); -+ reg_type_str(env, type), reg->off); - return false; - } - - if (smin == S64_MIN) { - verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n", -- reg_type_str[type]); -+ reg_type_str(env, type)); - return false; - } - - if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) { - verbose(env, "value %lld makes %s pointer be out of bounds\n", -- smin, reg_type_str[type]); -+ smin, reg_type_str(env, type)); - return false; - } - -@@ -6992,11 +7043,13 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, - return -EACCES; - } - -- switch (ptr_reg->type) { -- case PTR_TO_MAP_VALUE_OR_NULL: -+ if (ptr_reg->type & PTR_MAYBE_NULL) { - verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n", -- dst, reg_type_str[ptr_reg->type]); -+ dst, reg_type_str(env, ptr_reg->type)); - return -EACCES; ++ raw_spin_unlock_irq(&p->pi_lock); ++ /* ++ * If the owner task is between FUTEX_STATE_EXITING and ++ * FUTEX_STATE_DEAD then store the task pointer and keep ++ * the reference on the task struct. The calling code will ++ * drop all locks, wait for the task to reach ++ * FUTEX_STATE_DEAD and then drop the refcount. This is ++ * required to prevent a live lock when the current task ++ * preempted the exiting task between the two states. ++ */ ++ if (ret == -EBUSY) ++ *exiting = p; ++ else ++ put_task_struct(p); ++ return ret; + } + -+ switch (base_type(ptr_reg->type)) { - case CONST_PTR_TO_MAP: - /* smin_val represents the known value */ - if (known && smin_val == 0 && opcode == BPF_ADD) -@@ -7004,16 +7057,16 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, - fallthrough; - case PTR_TO_PACKET_END: - case PTR_TO_SOCKET: -- case PTR_TO_SOCKET_OR_NULL: - case PTR_TO_SOCK_COMMON: -- case PTR_TO_SOCK_COMMON_OR_NULL: - case PTR_TO_TCP_SOCK: -- case PTR_TO_TCP_SOCK_OR_NULL: - case PTR_TO_XDP_SOCK: -+reject: - verbose(env, "R%d pointer arithmetic on %s prohibited\n", -- dst, reg_type_str[ptr_reg->type]); -+ dst, reg_type_str(env, ptr_reg->type)); - return -EACCES; - default: -+ if (type_may_be_null(ptr_reg->type)) -+ goto reject; - break; - } - -@@ -7164,11 +7217,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, - - if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type)) - return -EINVAL; -- -- __update_reg_bounds(dst_reg); -- __reg_deduce_bounds(dst_reg); -- __reg_bound_offset(dst_reg); -- -+ reg_bounds_sync(dst_reg); - if (sanitize_check_bounds(env, insn, dst_reg) < 0) - return -EACCES; - if (sanitize_needed(opcode)) { -@@ -7906,10 +7955,7 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, - /* ALU32 ops are zero extended into 64bit register */ - if (alu32) - zext_32_to_64(dst_reg); -- -- __update_reg_bounds(dst_reg); -- __reg_deduce_bounds(dst_reg); -- __reg_bound_offset(dst_reg); -+ reg_bounds_sync(dst_reg); - return 0; - } - -@@ -8098,6 +8144,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) - insn->dst_reg); - } - zext_32_to_64(dst_reg); -+ reg_bounds_sync(dst_reg); - } - } else { - /* case: R = imm -@@ -8212,7 +8259,7 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, - - new_range = dst_reg->off; - if (range_right_open) -- new_range--; -+ new_range++; - - /* Examples for register markings: - * -@@ -8535,26 +8582,33 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, - return; - - switch (opcode) { -+ /* JEQ/JNE comparison doesn't change the register equivalence. -+ * -+ * r1 = r2; -+ * if (r1 == 42) goto label; -+ * ... -+ * label: // here both r1 and r2 are known to be 42. -+ * -+ * Hence when marking register as known preserve it's ID. ++ __attach_to_pi_owner(p, key, ps); ++ raw_spin_unlock_irq(&p->pi_lock); ++ ++ put_task_struct(p); ++ ++ return 0; ++} ++ ++static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) ++{ ++ int err; ++ u32 curval; ++ ++ if (unlikely(should_fail_futex(true))) ++ return -EFAULT; ++ ++ err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval); ++ if (unlikely(err)) ++ return err; ++ ++ /* If user space value changed, let the caller retry */ ++ return curval != uval ? -EAGAIN : 0; ++} ++ ++/** ++ * futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex ++ * @uaddr: the pi futex user address ++ * @hb: the pi futex hash bucket ++ * @key: the futex key associated with uaddr and hb ++ * @ps: the pi_state pointer where we store the result of the ++ * lookup ++ * @task: the task to perform the atomic lock work for. This will ++ * be "current" except in the case of requeue pi. ++ * @exiting: Pointer to store the task pointer of the owner task ++ * which is in the middle of exiting ++ * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) ++ * ++ * Return: ++ * - 0 - ready to wait; ++ * - 1 - acquired the lock; ++ * - <0 - error ++ * ++ * The hb->lock must be held by the caller. ++ * ++ * @exiting is only set when the return value is -EBUSY. If so, this holds ++ * a refcount on the exiting task on return and the caller needs to drop it ++ * after waiting for the exit to complete. ++ */ ++static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, ++ union futex_key *key, ++ struct futex_pi_state **ps, ++ struct task_struct *task, ++ struct task_struct **exiting, ++ int set_waiters) ++{ ++ u32 uval, newval, vpid = task_pid_vnr(task); ++ struct futex_q *top_waiter; ++ int ret; ++ ++ /* ++ * Read the user space value first so we can validate a few ++ * things before proceeding further. + */ - case BPF_JEQ: -+ if (is_jmp32) { -+ __mark_reg32_known(true_reg, val32); -+ true_32off = tnum_subreg(true_reg->var_off); -+ } else { -+ ___mark_reg_known(true_reg, val); -+ true_64off = true_reg->var_off; -+ } -+ break; - case BPF_JNE: -- { -- struct bpf_reg_state *reg = -- opcode == BPF_JEQ ? true_reg : false_reg; -- -- /* JEQ/JNE comparison doesn't change the register equivalence. -- * r1 = r2; -- * if (r1 == 42) goto label; -- * ... -- * label: // here both r1 and r2 are known to be 42. -- * -- * Hence when marking register as known preserve it's ID. -- */ -- if (is_jmp32) -- __mark_reg32_known(reg, val32); -- else -- ___mark_reg_known(reg, val); -+ if (is_jmp32) { -+ __mark_reg32_known(false_reg, val32); -+ false_32off = tnum_subreg(false_reg->var_off); -+ } else { -+ ___mark_reg_known(false_reg, val); -+ false_64off = false_reg->var_off; -+ } - break; -- } - case BPF_JSET: - if (is_jmp32) { - false_32off = tnum_and(false_32off, tnum_const(~val32)); -@@ -8693,21 +8747,8 @@ static void __reg_combine_min_max(struct bpf_reg_state *src_reg, - dst_reg->smax_value); - src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off, - dst_reg->var_off); -- /* We might have learned new bounds from the var_off. */ -- __update_reg_bounds(src_reg); -- __update_reg_bounds(dst_reg); -- /* We might have learned something about the sign bit. */ -- __reg_deduce_bounds(src_reg); -- __reg_deduce_bounds(dst_reg); -- /* We might have learned some bits from the bounds. */ -- __reg_bound_offset(src_reg); -- __reg_bound_offset(dst_reg); -- /* Intersecting with the old var_off might have improved our bounds -- * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), -- * then new var_off is (0; 0x7f...fc) which improves our umax. -- */ -- __update_reg_bounds(src_reg); -- __update_reg_bounds(dst_reg); -+ reg_bounds_sync(src_reg); -+ reg_bounds_sync(dst_reg); - } - - static void reg_combine_min_max(struct bpf_reg_state *true_src, -@@ -8730,17 +8771,17 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, - struct bpf_reg_state *reg, u32 id, - bool is_null) - { -- if (reg_type_may_be_null(reg->type) && reg->id == id && -+ if (type_may_be_null(reg->type) && reg->id == id && - !WARN_ON_ONCE(!reg->id)) { -- /* Old offset (both fixed and variable parts) should -- * have been known-zero, because we don't allow pointer -- * arithmetic on pointers that might be NULL. -- */ - if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || - !tnum_equals_const(reg->var_off, 0) || - reg->off)) { -- __mark_reg_known_zero(reg); -- reg->off = 0; -+ /* Old offset (both fixed and variable parts) should -+ * have been known-zero, because we don't allow pointer -+ * arithmetic on pointers that might be NULL. If we -+ * see this happening, don't convert the register. -+ */ -+ return; - } - if (is_null) { - reg->type = SCALAR_VALUE; -@@ -9108,7 +9149,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, - */ - if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K && - insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) && -- reg_type_may_be_null(dst_reg->type)) { -+ type_may_be_null(dst_reg->type)) { - /* Mark all identical registers in each branch as either - * safe or unknown depending R == 0 or R != 0 conditional. - */ -@@ -9159,11 +9200,15 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) - return 0; - } - -- if (insn->src_reg == BPF_PSEUDO_BTF_ID) { -- mark_reg_known_zero(env, regs, insn->dst_reg); -+ /* All special src_reg cases are listed below. From this point onwards -+ * we either succeed and assign a corresponding dst_reg->type after -+ * zeroing the offset, or fail and reject the program. ++ if (get_futex_value_locked(&uval, uaddr)) ++ return -EFAULT; ++ ++ if (unlikely(should_fail_futex(true))) ++ return -EFAULT; ++ ++ /* ++ * Detect deadlocks. + */ -+ mark_reg_known_zero(env, regs, insn->dst_reg); - -+ if (insn->src_reg == BPF_PSEUDO_BTF_ID) { - dst_reg->type = aux->btf_var.reg_type; -- switch (dst_reg->type) { -+ switch (base_type(dst_reg->type)) { - case PTR_TO_MEM: - dst_reg->mem_size = aux->btf_var.mem_size; - break; -@@ -9181,7 +9226,8 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) - - if (insn->src_reg == BPF_PSEUDO_FUNC) { - struct bpf_prog_aux *aux = env->prog->aux; -- u32 subprogno = insn[1].imm; -+ u32 subprogno = find_subprog(env, -+ env->insn_idx + insn->imm + 1); - - if (!aux->func_info) { - verbose(env, "missing btf func_info\n"); -@@ -9198,7 +9244,6 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) - } - - map = env->used_maps[aux->map_index]; -- mark_reg_known_zero(env, regs, insn->dst_reg); - dst_reg->map_ptr = map; - - if (insn->src_reg == BPF_PSEUDO_MAP_VALUE || -@@ -9361,7 +9406,7 @@ static int check_return_code(struct bpf_verifier_env *env) - /* enforce return zero from async callbacks like timer */ - if (reg->type != SCALAR_VALUE) { - verbose(env, "In async callback the register R0 is not a known value (%s)\n", -- reg_type_str[reg->type]); -+ reg_type_str(env, reg->type)); - return -EINVAL; - } - -@@ -9375,7 +9420,7 @@ static int check_return_code(struct bpf_verifier_env *env) - if (is_subprog) { - if (reg->type != SCALAR_VALUE) { - verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n", -- reg_type_str[reg->type]); -+ reg_type_str(env, reg->type)); - return -EINVAL; - } - return 0; -@@ -9439,7 +9484,7 @@ static int check_return_code(struct bpf_verifier_env *env) - - if (reg->type != SCALAR_VALUE) { - verbose(env, "At program exit the register R0 is not a known value (%s)\n", -- reg_type_str[reg->type]); -+ reg_type_str(env, reg->type)); - return -EINVAL; - } - -@@ -10220,7 +10265,7 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, - return true; - if (rcur->type == NOT_INIT) - return false; -- switch (rold->type) { -+ switch (base_type(rold->type)) { - case SCALAR_VALUE: - if (env->explore_alu_limits) - return false; -@@ -10242,6 +10287,22 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, - } - case PTR_TO_MAP_KEY: - case PTR_TO_MAP_VALUE: -+ /* a PTR_TO_MAP_VALUE could be safe to use as a -+ * PTR_TO_MAP_VALUE_OR_NULL into the same map. -+ * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL- -+ * checked, doing so could have affected others with the same -+ * id, and we can't check for that because we lost the id when -+ * we converted to a PTR_TO_MAP_VALUE. ++ if ((unlikely((uval & FUTEX_TID_MASK) == vpid))) ++ return -EDEADLK; ++ ++ if ((unlikely(should_fail_futex(true)))) ++ return -EDEADLK; ++ ++ /* ++ * Lookup existing state first. If it exists, try to attach to ++ * its pi_state. ++ */ ++ top_waiter = futex_top_waiter(hb, key); ++ if (top_waiter) ++ return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps); ++ ++ /* ++ * No waiter and user TID is 0. We are here because the ++ * waiters or the owner died bit is set or called from ++ * requeue_cmp_pi or for whatever reason something took the ++ * syscall. ++ */ ++ if (!(uval & FUTEX_TID_MASK)) { ++ /* ++ * We take over the futex. No other waiters and the user space ++ * TID is 0. We preserve the owner died bit. + */ -+ if (type_may_be_null(rold->type)) { -+ if (!type_may_be_null(rcur->type)) -+ return false; -+ if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id))) -+ return false; -+ /* Check our ids match any regs they're supposed to */ -+ return check_ids(rold->id, rcur->id, idmap); -+ } ++ newval = uval & FUTEX_OWNER_DIED; ++ newval |= vpid; + - /* If the new min/max/var_off satisfy the old ones and - * everything else matches, we are OK. - * 'id' is not compared, since it's only used for maps with -@@ -10253,20 +10314,6 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, - return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && - range_within(rold, rcur) && - tnum_in(rold->var_off, rcur->var_off); -- case PTR_TO_MAP_VALUE_OR_NULL: -- /* a PTR_TO_MAP_VALUE could be safe to use as a -- * PTR_TO_MAP_VALUE_OR_NULL into the same map. -- * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL- -- * checked, doing so could have affected others with the same -- * id, and we can't check for that because we lost the id when -- * we converted to a PTR_TO_MAP_VALUE. -- */ -- if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL) -- return false; -- if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id))) -- return false; -- /* Check our ids match any regs they're supposed to */ -- return check_ids(rold->id, rcur->id, idmap); - case PTR_TO_PACKET_META: - case PTR_TO_PACKET: - if (rcur->type != rold->type) -@@ -10295,11 +10342,8 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, - case PTR_TO_PACKET_END: - case PTR_TO_FLOW_KEYS: - case PTR_TO_SOCKET: -- case PTR_TO_SOCKET_OR_NULL: - case PTR_TO_SOCK_COMMON: -- case PTR_TO_SOCK_COMMON_OR_NULL: - case PTR_TO_TCP_SOCK: -- case PTR_TO_TCP_SOCK_OR_NULL: - case PTR_TO_XDP_SOCK: - /* Only valid matches are exact, which memcmp() above - * would have accepted -@@ -10825,17 +10869,13 @@ next: - /* Return true if it's OK to have the same insn return a different type. */ - static bool reg_type_mismatch_ok(enum bpf_reg_type type) - { -- switch (type) { -+ switch (base_type(type)) { - case PTR_TO_CTX: - case PTR_TO_SOCKET: -- case PTR_TO_SOCKET_OR_NULL: - case PTR_TO_SOCK_COMMON: -- case PTR_TO_SOCK_COMMON_OR_NULL: - case PTR_TO_TCP_SOCK: -- case PTR_TO_TCP_SOCK_OR_NULL: - case PTR_TO_XDP_SOCK: - case PTR_TO_BTF_ID: -- case PTR_TO_BTF_ID_OR_NULL: - return false; - default: - return true; -@@ -11059,7 +11099,7 @@ static int do_check(struct bpf_verifier_env *env) - if (is_ctx_reg(env, insn->dst_reg)) { - verbose(env, "BPF_ST stores into R%d %s is not allowed\n", - insn->dst_reg, -- reg_type_str[reg_state(env, insn->dst_reg)->type]); -+ reg_type_str(env, reg_state(env, insn->dst_reg)->type)); - return -EACCES; - } - -@@ -11128,6 +11168,16 @@ static int do_check(struct bpf_verifier_env *env) - return -EINVAL; - } - -+ /* We must do check_reference_leak here before -+ * prepare_func_exit to handle the case when -+ * state->curframe > 0, it may be a callback -+ * function, for which reference_state must -+ * match caller reference state when it exits. -+ */ -+ err = check_reference_leak(env); -+ if (err) -+ return err; ++ /* The futex requeue_pi code can enforce the waiters bit */ ++ if (set_waiters) ++ newval |= FUTEX_WAITERS; + - if (state->curframe) { - /* exit from nested function */ - err = prepare_func_exit(env, &env->insn_idx); -@@ -11137,10 +11187,6 @@ static int do_check(struct bpf_verifier_env *env) - continue; - } - -- err = check_reference_leak(env); -- if (err) -- return err; -- - err = check_return_code(env); - if (err) - return err; -@@ -11310,7 +11356,7 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env, - err = -EINVAL; - goto err_put; - } -- aux->btf_var.reg_type = PTR_TO_MEM; -+ aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY; - aux->btf_var.mem_size = tsize; - } else { - aux->btf_var.reg_type = PTR_TO_BTF_ID; -@@ -11435,6 +11481,13 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, - } - } - -+ if (map_value_has_timer(map)) { -+ if (is_tracing_prog_type(prog_type)) { -+ verbose(env, "tracing progs cannot use bpf_timer yet\n"); -+ return -EINVAL; ++ ret = lock_pi_update_atomic(uaddr, uval, newval); ++ if (ret) ++ return ret; ++ ++ /* ++ * If the waiter bit was requested the caller also needs PI ++ * state attached to the new owner of the user space futex. ++ * ++ * @task is guaranteed to be alive and it cannot be exiting ++ * because it is either sleeping or waiting in ++ * futex_requeue_pi_wakeup_sync(). ++ * ++ * No need to do the full attach_to_pi_owner() exercise ++ * because @task is known and valid. ++ */ ++ if (set_waiters) { ++ raw_spin_lock_irq(&task->pi_lock); ++ __attach_to_pi_owner(task, key, ps); ++ raw_spin_unlock_irq(&task->pi_lock); + } ++ return 1; + } + - if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) && - !bpf_offload_prog_map_match(prog, map)) { - verbose(env, "offload device mismatch between prog and map\n"); -@@ -12350,14 +12403,9 @@ static int jit_subprogs(struct bpf_verifier_env *env) - return 0; - - for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { -- if (bpf_pseudo_func(insn)) { -- env->insn_aux_data[i].call_imm = insn->imm; -- /* subprog is encoded in insn[1].imm */ -+ if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn)) - continue; -- } - -- if (!bpf_pseudo_call(insn)) -- continue; - /* Upon error here we cannot fall back to interpreter but - * need a hard reject of the program. Thus -EFAULT is - * propagated in any case. -@@ -12378,6 +12426,12 @@ static int jit_subprogs(struct bpf_verifier_env *env) - env->insn_aux_data[i].call_imm = insn->imm; - /* point imm to __bpf_call_base+1 from JITs point of view */ - insn->imm = 1; -+ if (bpf_pseudo_func(insn)) -+ /* jit (e.g. x86_64) may emit fewer instructions -+ * if it learns a u32 imm is the same as a u64 imm. -+ * Force a non zero here. -+ */ -+ insn[1].imm = 1; - } - - err = bpf_prog_alloc_jited_linfo(prog); -@@ -12413,6 +12467,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) - /* Below members will be freed only at prog->aux */ - func[i]->aux->btf = prog->aux->btf; - func[i]->aux->func_info = prog->aux->func_info; -+ func[i]->aux->func_info_cnt = prog->aux->func_info_cnt; - func[i]->aux->poke_tab = prog->aux->poke_tab; - func[i]->aux->size_poke_tab = prog->aux->size_poke_tab; - -@@ -12425,9 +12480,6 @@ static int jit_subprogs(struct bpf_verifier_env *env) - poke->aux = func[i]->aux; - } - -- /* Use bpf_prog_F_tag to indicate functions in stack traces. -- * Long term would need debug info to populate names -- */ - func[i]->aux->name[0] = 'F'; - func[i]->aux->stack_depth = env->subprog_info[i].stack_depth; - func[i]->jit_requested = 1; -@@ -12461,7 +12513,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) - insn = func[i]->insnsi; - for (j = 0; j < func[i]->len; j++, insn++) { - if (bpf_pseudo_func(insn)) { -- subprog = insn[1].imm; -+ subprog = insn->off; - insn[0].imm = (u32)(long)func[subprog]->bpf_func; - insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32; - continue; -@@ -12513,7 +12565,8 @@ static int jit_subprogs(struct bpf_verifier_env *env) - for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { - if (bpf_pseudo_func(insn)) { - insn[0].imm = env->insn_aux_data[i].call_imm; -- insn[1].imm = find_subprog(env, i + insn[0].imm + 1); -+ insn[1].imm = insn->off; -+ insn->off = 0; - continue; - } - if (!bpf_pseudo_call(insn)) -@@ -13125,7 +13178,7 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog) - mark_reg_known_zero(env, regs, i); - else if (regs[i].type == SCALAR_VALUE) - mark_reg_unknown(env, regs, i); -- else if (regs[i].type == PTR_TO_MEM_OR_NULL) { -+ else if (base_type(regs[i].type) == PTR_TO_MEM) { - const u32 mem_size = regs[i].mem_size; - - mark_reg_known_zero(env, regs, i); -@@ -13713,11 +13766,11 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr) - log->ubuf = (char __user *) (unsigned long) attr->log_buf; - log->len_total = attr->log_size; - -- ret = -EINVAL; - /* log attributes have to be sane */ -- if (log->len_total < 128 || log->len_total > UINT_MAX >> 2 || -- !log->level || !log->ubuf || log->level & ~BPF_LOG_MASK) -+ if (!bpf_verifier_log_attr_valid(log)) { ++ /* ++ * First waiter. Set the waiters bit before attaching ourself to ++ * the owner. If owner tries to unlock, it will be forced into ++ * the kernel and blocked on hb->lock. ++ */ ++ newval = uval | FUTEX_WAITERS; ++ ret = lock_pi_update_atomic(uaddr, uval, newval); ++ if (ret) ++ return ret; ++ /* ++ * If the update of the user space value succeeded, we try to ++ * attach to the owner. If that fails, no harm done, we only ++ * set the FUTEX_WAITERS bit in the user space variable. ++ */ ++ return attach_to_pi_owner(uaddr, newval, key, ps, exiting); ++} ++ ++/** ++ * __unqueue_futex() - Remove the futex_q from its futex_hash_bucket ++ * @q: The futex_q to unqueue ++ * ++ * The q->lock_ptr must not be NULL and must be held by the caller. ++ */ ++static void __unqueue_futex(struct futex_q *q) ++{ ++ struct futex_hash_bucket *hb; ++ ++ if (WARN_ON_SMP(!q->lock_ptr) || WARN_ON(plist_node_empty(&q->list))) ++ return; ++ lockdep_assert_held(q->lock_ptr); ++ ++ hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock); ++ plist_del(&q->list, &hb->chain); ++ hb_waiters_dec(hb); ++} ++ ++/* ++ * The hash bucket lock must be held when this is called. ++ * Afterwards, the futex_q must not be accessed. Callers ++ * must ensure to later call wake_up_q() for the actual ++ * wakeups to occur. ++ */ ++static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q) ++{ ++ struct task_struct *p = q->task; ++ ++ if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n")) ++ return; ++ ++ get_task_struct(p); ++ __unqueue_futex(q); ++ /* ++ * The waiting task can free the futex_q as soon as q->lock_ptr = NULL ++ * is written, without taking any locks. This is possible in the event ++ * of a spurious wakeup, for example. A memory barrier is required here ++ * to prevent the following store to lock_ptr from getting ahead of the ++ * plist_del in __unqueue_futex(). ++ */ ++ smp_store_release(&q->lock_ptr, NULL); ++ ++ /* ++ * Queue the task for later wakeup for after we've released ++ * the hb->lock. ++ */ ++ wake_q_add_safe(wake_q, p); ++} ++ ++/* ++ * Caller must hold a reference on @pi_state. ++ */ ++static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state) ++{ ++ struct rt_mutex_waiter *top_waiter; ++ struct task_struct *new_owner; ++ bool postunlock = false; ++ DEFINE_RT_WAKE_Q(wqh); ++ u32 curval, newval; ++ int ret = 0; ++ ++ top_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex); ++ if (WARN_ON_ONCE(!top_waiter)) { ++ /* ++ * As per the comment in futex_unlock_pi() this should not happen. ++ * ++ * When this happens, give up our locks and try again, giving ++ * the futex_lock_pi() instance time to complete, either by ++ * waiting on the rtmutex or removing itself from the futex ++ * queue. ++ */ ++ ret = -EAGAIN; ++ goto out_unlock; ++ } ++ ++ new_owner = top_waiter->task; ++ ++ /* ++ * We pass it to the next owner. The WAITERS bit is always kept ++ * enabled while there is PI state around. We cleanup the owner ++ * died bit, because we are the owner. ++ */ ++ newval = FUTEX_WAITERS | task_pid_vnr(new_owner); ++ ++ if (unlikely(should_fail_futex(true))) { ++ ret = -EFAULT; ++ goto out_unlock; ++ } ++ ++ ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval); ++ if (!ret && (curval != uval)) { ++ /* ++ * If a unconditional UNLOCK_PI operation (user space did not ++ * try the TID->0 transition) raced with a waiter setting the ++ * FUTEX_WAITERS flag between get_user() and locking the hash ++ * bucket lock, retry the operation. ++ */ ++ if ((FUTEX_TID_MASK & curval) == uval) ++ ret = -EAGAIN; ++ else + ret = -EINVAL; - goto err_unlock; ++ } ++ ++ if (!ret) { ++ /* ++ * This is a point of no return; once we modified the uval ++ * there is no going back and subsequent operations must ++ * not fail. ++ */ ++ pi_state_update_owner(pi_state, new_owner); ++ postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wqh); ++ } ++ ++out_unlock: ++ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); ++ ++ if (postunlock) ++ rt_mutex_postunlock(&wqh); ++ ++ return ret; ++} ++ ++/* ++ * Express the locking dependencies for lockdep: ++ */ ++static inline void ++double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2) ++{ ++ if (hb1 <= hb2) { ++ spin_lock(&hb1->lock); ++ if (hb1 < hb2) ++ spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING); ++ } else { /* hb1 > hb2 */ ++ spin_lock(&hb2->lock); ++ spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING); ++ } ++} ++ ++static inline void ++double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2) ++{ ++ spin_unlock(&hb1->lock); ++ if (hb1 != hb2) ++ spin_unlock(&hb2->lock); ++} ++ ++/* ++ * Wake up waiters matching bitset queued on this futex (uaddr). ++ */ ++static int ++futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) ++{ ++ struct futex_hash_bucket *hb; ++ struct futex_q *this, *next; ++ union futex_key key = FUTEX_KEY_INIT; ++ int ret; ++ DEFINE_WAKE_Q(wake_q); ++ ++ if (!bitset) ++ return -EINVAL; ++ ++ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_READ); ++ if (unlikely(ret != 0)) ++ return ret; ++ ++ hb = hash_futex(&key); ++ ++ /* Make sure we really have tasks to wakeup */ ++ if (!hb_waiters_pending(hb)) ++ return ret; ++ ++ spin_lock(&hb->lock); ++ ++ plist_for_each_entry_safe(this, next, &hb->chain, list) { ++ if (match_futex (&this->key, &key)) { ++ if (this->pi_state || this->rt_waiter) { ++ ret = -EINVAL; ++ break; ++ } ++ ++ /* Check if one of the bits is set in both bitsets */ ++ if (!(this->bitset & bitset)) ++ continue; ++ ++ mark_wake_futex(&wake_q, this); ++ if (++ret >= nr_wake) ++ break; ++ } ++ } ++ ++ spin_unlock(&hb->lock); ++ wake_up_q(&wake_q); ++ return ret; ++} ++ ++static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr) ++{ ++ unsigned int op = (encoded_op & 0x70000000) >> 28; ++ unsigned int cmp = (encoded_op & 0x0f000000) >> 24; ++ int oparg = sign_extend32((encoded_op & 0x00fff000) >> 12, 11); ++ int cmparg = sign_extend32(encoded_op & 0x00000fff, 11); ++ int oldval, ret; ++ ++ if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) { ++ if (oparg < 0 || oparg > 31) { ++ char comm[sizeof(current->comm)]; ++ /* ++ * kill this print and return -EINVAL when userspace ++ * is sane again ++ */ ++ pr_info_ratelimited("futex_wake_op: %s tries to shift op by %d; fix this program\n", ++ get_task_comm(comm, current), oparg); ++ oparg &= 31; + } - } - - if (IS_ERR(btf_vmlinux)) { -diff --git a/kernel/cfi.c b/kernel/cfi.c -index 9594cfd1cf2cf..08102d19ec15a 100644 ---- a/kernel/cfi.c -+++ b/kernel/cfi.c -@@ -281,6 +281,8 @@ static inline cfi_check_fn find_module_check_fn(unsigned long ptr) - static inline cfi_check_fn find_check_fn(unsigned long ptr) - { - cfi_check_fn fn = NULL; -+ unsigned long flags; -+ bool rcu_idle; - - if (is_kernel_text(ptr)) - return __cfi_check; -@@ -290,13 +292,21 @@ static inline cfi_check_fn find_check_fn(unsigned long ptr) - * the shadow and __module_address use RCU, so we need to wake it - * up if necessary. - */ -- RCU_NONIDLE({ -- if (IS_ENABLED(CONFIG_CFI_CLANG_SHADOW)) -- fn = find_shadow_check_fn(ptr); -+ rcu_idle = !rcu_is_watching(); -+ if (rcu_idle) { -+ local_irq_save(flags); -+ rcu_irq_enter(); ++ oparg = 1 << oparg; + } + -+ if (IS_ENABLED(CONFIG_CFI_CLANG_SHADOW)) -+ fn = find_shadow_check_fn(ptr); -+ if (!fn) -+ fn = find_module_check_fn(ptr); - -- if (!fn) -- fn = find_module_check_fn(ptr); -- }); -+ if (rcu_idle) { -+ rcu_irq_exit(); -+ local_irq_restore(flags); ++ pagefault_disable(); ++ ret = arch_futex_atomic_op_inuser(op, oparg, &oldval, uaddr); ++ pagefault_enable(); ++ if (ret) ++ return ret; ++ ++ switch (cmp) { ++ case FUTEX_OP_CMP_EQ: ++ return oldval == cmparg; ++ case FUTEX_OP_CMP_NE: ++ return oldval != cmparg; ++ case FUTEX_OP_CMP_LT: ++ return oldval < cmparg; ++ case FUTEX_OP_CMP_GE: ++ return oldval >= cmparg; ++ case FUTEX_OP_CMP_LE: ++ return oldval <= cmparg; ++ case FUTEX_OP_CMP_GT: ++ return oldval > cmparg; ++ default: ++ return -ENOSYS; + } - - return fn; - } -diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h -index bfbeabc17a9df..6e36e854b5124 100644 ---- a/kernel/cgroup/cgroup-internal.h -+++ b/kernel/cgroup/cgroup-internal.h -@@ -65,6 +65,25 @@ static inline struct cgroup_fs_context *cgroup_fc2context(struct fs_context *fc) - return container_of(kfc, struct cgroup_fs_context, kfc); - } - -+struct cgroup_pidlist; ++} + -+struct cgroup_file_ctx { -+ struct cgroup_namespace *ns; ++/* ++ * Wake up all waiters hashed on the physical page that is mapped ++ * to this virtual address: ++ */ ++static int ++futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, ++ int nr_wake, int nr_wake2, int op) ++{ ++ union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; ++ struct futex_hash_bucket *hb1, *hb2; ++ struct futex_q *this, *next; ++ int ret, op_ret; ++ DEFINE_WAKE_Q(wake_q); + -+ struct { -+ void *trigger; -+ } psi; ++retry: ++ ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ); ++ if (unlikely(ret != 0)) ++ return ret; ++ ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE); ++ if (unlikely(ret != 0)) ++ return ret; + -+ struct { -+ bool started; -+ struct css_task_iter iter; -+ } procs; ++ hb1 = hash_futex(&key1); ++ hb2 = hash_futex(&key2); + -+ struct { -+ struct cgroup_pidlist *pidlist; -+ } procs1; -+}; ++retry_private: ++ double_lock_hb(hb1, hb2); ++ op_ret = futex_atomic_op_inuser(op, uaddr2); ++ if (unlikely(op_ret < 0)) { ++ double_unlock_hb(hb1, hb2); ++ ++ if (!IS_ENABLED(CONFIG_MMU) || ++ unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) { ++ /* ++ * we don't get EFAULT from MMU faults if we don't have ++ * an MMU, but we might get them from range checking ++ */ ++ ret = op_ret; ++ return ret; ++ } ++ ++ if (op_ret == -EFAULT) { ++ ret = fault_in_user_writeable(uaddr2); ++ if (ret) ++ return ret; ++ } ++ ++ cond_resched(); ++ if (!(flags & FLAGS_SHARED)) ++ goto retry_private; ++ goto retry; ++ } ++ ++ plist_for_each_entry_safe(this, next, &hb1->chain, list) { ++ if (match_futex (&this->key, &key1)) { ++ if (this->pi_state || this->rt_waiter) { ++ ret = -EINVAL; ++ goto out_unlock; ++ } ++ mark_wake_futex(&wake_q, this); ++ if (++ret >= nr_wake) ++ break; ++ } ++ } ++ ++ if (op_ret > 0) { ++ op_ret = 0; ++ plist_for_each_entry_safe(this, next, &hb2->chain, list) { ++ if (match_futex (&this->key, &key2)) { ++ if (this->pi_state || this->rt_waiter) { ++ ret = -EINVAL; ++ goto out_unlock; ++ } ++ mark_wake_futex(&wake_q, this); ++ if (++op_ret >= nr_wake2) ++ break; ++ } ++ } ++ ret += op_ret; ++ } ++ ++out_unlock: ++ double_unlock_hb(hb1, hb2); ++ wake_up_q(&wake_q); ++ return ret; ++} ++ ++/** ++ * requeue_futex() - Requeue a futex_q from one hb to another ++ * @q: the futex_q to requeue ++ * @hb1: the source hash_bucket ++ * @hb2: the target hash_bucket ++ * @key2: the new key for the requeued futex_q ++ */ ++static inline ++void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, ++ struct futex_hash_bucket *hb2, union futex_key *key2) ++{ + - /* - * A cgroup can be associated with multiple css_sets as different tasks may - * belong to different cgroups on different hierarchies. In the other -diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c -index 35b9203283447..ee8b3d80f19ee 100644 ---- a/kernel/cgroup/cgroup-v1.c -+++ b/kernel/cgroup/cgroup-v1.c -@@ -59,6 +59,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) - int retval = 0; - - mutex_lock(&cgroup_mutex); -+ cpus_read_lock(); - percpu_down_write(&cgroup_threadgroup_rwsem); - for_each_root(root) { - struct cgroup *from_cgrp; -@@ -75,6 +76,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) - break; - } - percpu_up_write(&cgroup_threadgroup_rwsem); -+ cpus_read_unlock(); - mutex_unlock(&cgroup_mutex); - - return retval; -@@ -397,6 +399,7 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos) - * next pid to display, if any - */ - struct kernfs_open_file *of = s->private; -+ struct cgroup_file_ctx *ctx = of->priv; - struct cgroup *cgrp = seq_css(s)->cgroup; - struct cgroup_pidlist *l; - enum cgroup_filetype type = seq_cft(s)->private; -@@ -406,25 +409,24 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos) - mutex_lock(&cgrp->pidlist_mutex); - - /* -- * !NULL @of->priv indicates that this isn't the first start() -- * after open. If the matching pidlist is around, we can use that. -- * Look for it. Note that @of->priv can't be used directly. It -- * could already have been destroyed. -+ * !NULL @ctx->procs1.pidlist indicates that this isn't the first -+ * start() after open. If the matching pidlist is around, we can use -+ * that. Look for it. Note that @ctx->procs1.pidlist can't be used -+ * directly. It could already have been destroyed. - */ -- if (of->priv) -- of->priv = cgroup_pidlist_find(cgrp, type); -+ if (ctx->procs1.pidlist) -+ ctx->procs1.pidlist = cgroup_pidlist_find(cgrp, type); - - /* - * Either this is the first start() after open or the matching - * pidlist has been destroyed inbetween. Create a new one. - */ -- if (!of->priv) { -- ret = pidlist_array_load(cgrp, type, -- (struct cgroup_pidlist **)&of->priv); -+ if (!ctx->procs1.pidlist) { -+ ret = pidlist_array_load(cgrp, type, &ctx->procs1.pidlist); - if (ret) - return ERR_PTR(ret); - } -- l = of->priv; -+ l = ctx->procs1.pidlist; - - if (pid) { - int end = l->length; -@@ -452,7 +454,8 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos) - static void cgroup_pidlist_stop(struct seq_file *s, void *v) - { - struct kernfs_open_file *of = s->private; -- struct cgroup_pidlist *l = of->priv; -+ struct cgroup_file_ctx *ctx = of->priv; -+ struct cgroup_pidlist *l = ctx->procs1.pidlist; - - if (l) - mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork, -@@ -463,7 +466,8 @@ static void cgroup_pidlist_stop(struct seq_file *s, void *v) - static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos) - { - struct kernfs_open_file *of = s->private; -- struct cgroup_pidlist *l = of->priv; -+ struct cgroup_file_ctx *ctx = of->priv; -+ struct cgroup_pidlist *l = ctx->procs1.pidlist; - pid_t *p = v; - pid_t *end = l->list + l->length; - /* -@@ -507,10 +511,11 @@ static ssize_t __cgroup1_procs_write(struct kernfs_open_file *of, - goto out_unlock; - - /* -- * Even if we're attaching all tasks in the thread group, we only -- * need to check permissions on one of them. -+ * Even if we're attaching all tasks in the thread group, we only need -+ * to check permissions on one of them. Check permissions using the -+ * credentials from file open to protect against inherited fd attacks. - */ -- cred = current_cred(); -+ cred = of->file->f_cred; - tcred = get_task_cred(task); - if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && - !uid_eq(cred->euid, tcred->uid) && -@@ -546,9 +551,19 @@ static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of, - char *buf, size_t nbytes, loff_t off) - { - struct cgroup *cgrp; -+ struct cgroup_file_ctx *ctx; - - BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); - + /* -+ * Release agent gets called with all capabilities, -+ * require capabilities to set release agent. ++ * If key1 and key2 hash to the same bucket, no need to ++ * requeue. + */ -+ ctx = of->priv; -+ if ((ctx->ns->user_ns != &init_user_ns) || -+ !file_ns_capable(of->file, &init_user_ns, CAP_SYS_ADMIN)) -+ return -EPERM; ++ if (likely(&hb1->chain != &hb2->chain)) { ++ plist_del(&q->list, &hb1->chain); ++ hb_waiters_dec(hb1); ++ hb_waiters_inc(hb2); ++ plist_add(&q->list, &hb2->chain); ++ q->lock_ptr = &hb2->lock; ++ } ++ q->key = *key2; ++} + - cgrp = cgroup_kn_lock_live(of->kn, false); - if (!cgrp) - return -ENODEV; -@@ -960,6 +975,12 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) - /* Specifying two release agents is forbidden */ - if (ctx->release_agent) - return invalfc(fc, "release_agent respecified"); -+ /* -+ * Release agent gets called with all capabilities, -+ * require capabilities to set release agent. -+ */ -+ if ((fc->user_ns != &init_user_ns) || !capable(CAP_SYS_ADMIN)) -+ return invalfc(fc, "Setting release_agent not allowed"); - ctx->release_agent = param->string; - param->string = NULL; - break; -diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c -index ea08f01d0111a..a92990f070d12 100644 ---- a/kernel/cgroup/cgroup.c -+++ b/kernel/cgroup/cgroup.c -@@ -764,7 +764,8 @@ struct css_set init_css_set = { - .task_iters = LIST_HEAD_INIT(init_css_set.task_iters), - .threaded_csets = LIST_HEAD_INIT(init_css_set.threaded_csets), - .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links), -- .mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node), -+ .mg_src_preload_node = LIST_HEAD_INIT(init_css_set.mg_src_preload_node), -+ .mg_dst_preload_node = LIST_HEAD_INIT(init_css_set.mg_dst_preload_node), - .mg_node = LIST_HEAD_INIT(init_css_set.mg_node), - - /* -@@ -1239,7 +1240,8 @@ static struct css_set *find_css_set(struct css_set *old_cset, - INIT_LIST_HEAD(&cset->threaded_csets); - INIT_HLIST_NODE(&cset->hlist); - INIT_LIST_HEAD(&cset->cgrp_links); -- INIT_LIST_HEAD(&cset->mg_preload_node); -+ INIT_LIST_HEAD(&cset->mg_src_preload_node); -+ INIT_LIST_HEAD(&cset->mg_dst_preload_node); - INIT_LIST_HEAD(&cset->mg_node); - - /* Copy the set of subsystem state objects generated in -@@ -1740,6 +1742,7 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) - struct cgroup *dcgrp = &dst_root->cgrp; - struct cgroup_subsys *ss; - int ssid, i, ret; -+ u16 dfl_disable_ss_mask = 0; - - lockdep_assert_held(&cgroup_mutex); - -@@ -1756,8 +1759,28 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) - /* can't move between two non-dummy roots either */ - if (ss->root != &cgrp_dfl_root && dst_root != &cgrp_dfl_root) - return -EBUSY; ++static inline bool futex_requeue_pi_prepare(struct futex_q *q, ++ struct futex_pi_state *pi_state) ++{ ++ int old, new; ++ ++ /* ++ * Set state to Q_REQUEUE_PI_IN_PROGRESS unless an early wakeup has ++ * already set Q_REQUEUE_PI_IGNORE to signal that requeue should ++ * ignore the waiter. ++ */ ++ old = atomic_read_acquire(&q->requeue_state); ++ do { ++ if (old == Q_REQUEUE_PI_IGNORE) ++ return false; + + /* -+ * Collect ssid's that need to be disabled from default -+ * hierarchy. ++ * futex_proxy_trylock_atomic() might have set it to ++ * IN_PROGRESS and a interleaved early wake to WAIT. ++ * ++ * It was considered to have an extra state for that ++ * trylock, but that would just add more conditionals ++ * all over the place for a dubious value. + */ -+ if (ss->root == &cgrp_dfl_root) -+ dfl_disable_ss_mask |= 1 << ssid; ++ if (old != Q_REQUEUE_PI_NONE) ++ break; + - } while_each_subsys_mask(); - -+ if (dfl_disable_ss_mask) { -+ struct cgroup *scgrp = &cgrp_dfl_root.cgrp; ++ new = Q_REQUEUE_PI_IN_PROGRESS; ++ } while (!atomic_try_cmpxchg(&q->requeue_state, &old, new)); ++ ++ q->pi_state = pi_state; ++ return true; ++} ++ ++static inline void futex_requeue_pi_complete(struct futex_q *q, int locked) ++{ ++ int old, new; ++ ++ old = atomic_read_acquire(&q->requeue_state); ++ do { ++ if (old == Q_REQUEUE_PI_IGNORE) ++ return; ++ ++ if (locked >= 0) { ++ /* Requeue succeeded. Set DONE or LOCKED */ ++ WARN_ON_ONCE(old != Q_REQUEUE_PI_IN_PROGRESS && ++ old != Q_REQUEUE_PI_WAIT); ++ new = Q_REQUEUE_PI_DONE + locked; ++ } else if (old == Q_REQUEUE_PI_IN_PROGRESS) { ++ /* Deadlock, no early wakeup interleave */ ++ new = Q_REQUEUE_PI_NONE; ++ } else { ++ /* Deadlock, early wakeup interleave. */ ++ WARN_ON_ONCE(old != Q_REQUEUE_PI_WAIT); ++ new = Q_REQUEUE_PI_IGNORE; ++ } ++ } while (!atomic_try_cmpxchg(&q->requeue_state, &old, new)); ++ ++#ifdef CONFIG_PREEMPT_RT ++ /* If the waiter interleaved with the requeue let it know */ ++ if (unlikely(old == Q_REQUEUE_PI_WAIT)) ++ rcuwait_wake_up(&q->requeue_wait); ++#endif ++} ++ ++static inline int futex_requeue_pi_wakeup_sync(struct futex_q *q) ++{ ++ int old, new; ++ ++ old = atomic_read_acquire(&q->requeue_state); ++ do { ++ /* Is requeue done already? */ ++ if (old >= Q_REQUEUE_PI_DONE) ++ return old; + + /* -+ * Controllers from default hierarchy that need to be rebound -+ * are all disabled together in one go. ++ * If not done, then tell the requeue code to either ignore ++ * the waiter or to wake it up once the requeue is done. + */ -+ cgrp_dfl_root.subsys_mask &= ~dfl_disable_ss_mask; -+ WARN_ON(cgroup_apply_control(scgrp)); -+ cgroup_finalize_control(scgrp, 0); ++ new = Q_REQUEUE_PI_WAIT; ++ if (old == Q_REQUEUE_PI_NONE) ++ new = Q_REQUEUE_PI_IGNORE; ++ } while (!atomic_try_cmpxchg(&q->requeue_state, &old, new)); ++ ++ /* If the requeue was in progress, wait for it to complete */ ++ if (old == Q_REQUEUE_PI_IN_PROGRESS) { ++#ifdef CONFIG_PREEMPT_RT ++ rcuwait_wait_event(&q->requeue_wait, ++ atomic_read(&q->requeue_state) != Q_REQUEUE_PI_WAIT, ++ TASK_UNINTERRUPTIBLE); ++#else ++ (void)atomic_cond_read_relaxed(&q->requeue_state, VAL != Q_REQUEUE_PI_WAIT); ++#endif + } + - do_each_subsys_mask(ss, ssid, ss_mask) { - struct cgroup_root *src_root = ss->root; - struct cgroup *scgrp = &src_root->cgrp; -@@ -1766,10 +1789,12 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) - - WARN_ON(!css || cgroup_css(dcgrp, ss)); - -- /* disable from the source */ -- src_root->subsys_mask &= ~(1 << ssid); -- WARN_ON(cgroup_apply_control(scgrp)); -- cgroup_finalize_control(scgrp, 0); -+ if (src_root != &cgrp_dfl_root) { -+ /* disable from the source */ -+ src_root->subsys_mask &= ~(1 << ssid); -+ WARN_ON(cgroup_apply_control(scgrp)); -+ cgroup_finalize_control(scgrp, 0); -+ } - - /* rebind */ - RCU_INIT_POINTER(scgrp->subsys[ssid], NULL); -@@ -1785,6 +1810,7 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) - - if (ss->css_rstat_flush) { - list_del_rcu(&css->rstat_css_node); -+ synchronize_rcu(); - list_add_rcu(&css->rstat_css_node, - &dcgrp->rstat_css_list); - } -@@ -2319,6 +2345,47 @@ int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen) - } - EXPORT_SYMBOL_GPL(task_cgroup_path); - ++ /* ++ * Requeue is now either prohibited or complete. Reread state ++ * because during the wait above it might have changed. Nothing ++ * will modify q->requeue_state after this point. ++ */ ++ return atomic_read(&q->requeue_state); ++} ++ +/** -+ * cgroup_attach_lock - Lock for ->attach() -+ * @lock_threadgroup: whether to down_write cgroup_threadgroup_rwsem ++ * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue ++ * @q: the futex_q ++ * @key: the key of the requeue target futex ++ * @hb: the hash_bucket of the requeue target futex + * -+ * cgroup migration sometimes needs to stabilize threadgroups against forks and -+ * exits by write-locking cgroup_threadgroup_rwsem. However, some ->attach() -+ * implementations (e.g. cpuset), also need to disable CPU hotplug. -+ * Unfortunately, letting ->attach() operations acquire cpus_read_lock() can -+ * lead to deadlocks. ++ * During futex_requeue, with requeue_pi=1, it is possible to acquire the ++ * target futex if it is uncontended or via a lock steal. + * -+ * Bringing up a CPU may involve creating and destroying tasks which requires -+ * read-locking threadgroup_rwsem, so threadgroup_rwsem nests inside -+ * cpus_read_lock(). If we call an ->attach() which acquires the cpus lock while -+ * write-locking threadgroup_rwsem, the locking order is reversed and we end up -+ * waiting for an on-going CPU hotplug operation which in turn is waiting for -+ * the threadgroup_rwsem to be released to create new tasks. For more details: ++ * 1) Set @q::key to the requeue target futex key so the waiter can detect ++ * the wakeup on the right futex. + * -+ * http://lkml.kernel.org/r/20220711174629.uehfmqegcwn2lqzu@wubuntu ++ * 2) Dequeue @q from the hash bucket. + * -+ * Resolve the situation by always acquiring cpus_read_lock() before optionally -+ * write-locking cgroup_threadgroup_rwsem. This allows ->attach() to assume that -+ * CPU hotplug is disabled on entry. ++ * 3) Set @q::rt_waiter to NULL so the woken up task can detect atomic lock ++ * acquisition. ++ * ++ * 4) Set the q->lock_ptr to the requeue target hb->lock for the case that ++ * the waiter has to fixup the pi state. ++ * ++ * 5) Complete the requeue state so the waiter can make progress. After ++ * this point the waiter task can return from the syscall immediately in ++ * case that the pi state does not have to be fixed up. ++ * ++ * 6) Wake the waiter task. ++ * ++ * Must be called with both q->lock_ptr and hb->lock held. + */ -+static void cgroup_attach_lock(bool lock_threadgroup) ++static inline ++void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, ++ struct futex_hash_bucket *hb) +{ -+ cpus_read_lock(); -+ if (lock_threadgroup) -+ percpu_down_write(&cgroup_threadgroup_rwsem); ++ q->key = *key; ++ ++ __unqueue_futex(q); ++ ++ WARN_ON(!q->rt_waiter); ++ q->rt_waiter = NULL; ++ ++ q->lock_ptr = &hb->lock; ++ ++ /* Signal locked state to the waiter */ ++ futex_requeue_pi_complete(q, 1); ++ wake_up_state(q->task, TASK_NORMAL); +} + +/** -+ * cgroup_attach_unlock - Undo cgroup_attach_lock() -+ * @lock_threadgroup: whether to up_write cgroup_threadgroup_rwsem ++ * futex_proxy_trylock_atomic() - Attempt an atomic lock for the top waiter ++ * @pifutex: the user address of the to futex ++ * @hb1: the from futex hash bucket, must be locked by the caller ++ * @hb2: the to futex hash bucket, must be locked by the caller ++ * @key1: the from futex key ++ * @key2: the to futex key ++ * @ps: address to store the pi_state pointer ++ * @exiting: Pointer to store the task pointer of the owner task ++ * which is in the middle of exiting ++ * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) ++ * ++ * Try and get the lock on behalf of the top waiter if we can do it atomically. ++ * Wake the top waiter if we succeed. If the caller specified set_waiters, ++ * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit. ++ * hb1 and hb2 must be held by the caller. ++ * ++ * @exiting is only set when the return value is -EBUSY. If so, this holds ++ * a refcount on the exiting task on return and the caller needs to drop it ++ * after waiting for the exit to complete. ++ * ++ * Return: ++ * - 0 - failed to acquire the lock atomically; ++ * - >0 - acquired the lock, return value is vpid of the top_waiter ++ * - <0 - error + */ -+static void cgroup_attach_unlock(bool lock_threadgroup) ++static int ++futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1, ++ struct futex_hash_bucket *hb2, union futex_key *key1, ++ union futex_key *key2, struct futex_pi_state **ps, ++ struct task_struct **exiting, int set_waiters) +{ -+ if (lock_threadgroup) -+ percpu_up_write(&cgroup_threadgroup_rwsem); -+ cpus_read_unlock(); ++ struct futex_q *top_waiter = NULL; ++ u32 curval; ++ int ret; ++ ++ if (get_futex_value_locked(&curval, pifutex)) ++ return -EFAULT; ++ ++ if (unlikely(should_fail_futex(true))) ++ return -EFAULT; ++ ++ /* ++ * Find the top_waiter and determine if there are additional waiters. ++ * If the caller intends to requeue more than 1 waiter to pifutex, ++ * force futex_lock_pi_atomic() to set the FUTEX_WAITERS bit now, ++ * as we have means to handle the possible fault. If not, don't set ++ * the bit unnecessarily as it will force the subsequent unlock to enter ++ * the kernel. ++ */ ++ top_waiter = futex_top_waiter(hb1, key1); ++ ++ /* There are no waiters, nothing for us to do. */ ++ if (!top_waiter) ++ return 0; ++ ++ /* ++ * Ensure that this is a waiter sitting in futex_wait_requeue_pi() ++ * and waiting on the 'waitqueue' futex which is always !PI. ++ */ ++ if (!top_waiter->rt_waiter || top_waiter->pi_state) ++ return -EINVAL; ++ ++ /* Ensure we requeue to the expected futex. */ ++ if (!match_futex(top_waiter->requeue_pi_key, key2)) ++ return -EINVAL; ++ ++ /* Ensure that this does not race against an early wakeup */ ++ if (!futex_requeue_pi_prepare(top_waiter, NULL)) ++ return -EAGAIN; ++ ++ /* ++ * Try to take the lock for top_waiter and set the FUTEX_WAITERS bit ++ * in the contended case or if @set_waiters is true. ++ * ++ * In the contended case PI state is attached to the lock owner. If ++ * the user space lock can be acquired then PI state is attached to ++ * the new owner (@top_waiter->task) when @set_waiters is true. ++ */ ++ ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, ++ exiting, set_waiters); ++ if (ret == 1) { ++ /* ++ * Lock was acquired in user space and PI state was ++ * attached to @top_waiter->task. That means state is fully ++ * consistent and the waiter can return to user space ++ * immediately after the wakeup. ++ */ ++ requeue_pi_wake_futex(top_waiter, key2, hb2); ++ } else if (ret < 0) { ++ /* Rewind top_waiter::requeue_state */ ++ futex_requeue_pi_complete(top_waiter, ret); ++ } else { ++ /* ++ * futex_lock_pi_atomic() did not acquire the user space ++ * futex, but managed to establish the proxy lock and pi ++ * state. top_waiter::requeue_state cannot be fixed up here ++ * because the waiter is not enqueued on the rtmutex ++ * yet. This is handled at the callsite depending on the ++ * result of rt_mutex_start_proxy_lock() which is ++ * guaranteed to be reached with this function returning 0. ++ */ ++ } ++ return ret; +} + - /** - * cgroup_migrate_add_task - add a migration target task to a migration context - * @task: target task -@@ -2573,21 +2640,27 @@ int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp) - */ - void cgroup_migrate_finish(struct cgroup_mgctx *mgctx) - { -- LIST_HEAD(preloaded); - struct css_set *cset, *tmp_cset; - - lockdep_assert_held(&cgroup_mutex); - - spin_lock_irq(&css_set_lock); - -- list_splice_tail_init(&mgctx->preloaded_src_csets, &preloaded); -- list_splice_tail_init(&mgctx->preloaded_dst_csets, &preloaded); -+ list_for_each_entry_safe(cset, tmp_cset, &mgctx->preloaded_src_csets, -+ mg_src_preload_node) { -+ cset->mg_src_cgrp = NULL; -+ cset->mg_dst_cgrp = NULL; -+ cset->mg_dst_cset = NULL; -+ list_del_init(&cset->mg_src_preload_node); -+ put_css_set_locked(cset); ++/** ++ * futex_requeue() - Requeue waiters from uaddr1 to uaddr2 ++ * @uaddr1: source futex user address ++ * @flags: futex flags (FLAGS_SHARED, etc.) ++ * @uaddr2: target futex user address ++ * @nr_wake: number of waiters to wake (must be 1 for requeue_pi) ++ * @nr_requeue: number of waiters to requeue (0-INT_MAX) ++ * @cmpval: @uaddr1 expected value (or %NULL) ++ * @requeue_pi: if we are attempting to requeue from a non-pi futex to a ++ * pi futex (pi to pi requeue is not supported) ++ * ++ * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire ++ * uaddr2 atomically on behalf of the top waiter. ++ * ++ * Return: ++ * - >=0 - on success, the number of tasks requeued or woken; ++ * - <0 - on error ++ */ ++static int futex_requeue(u32 __user *uaddr1, unsigned int flags, ++ u32 __user *uaddr2, int nr_wake, int nr_requeue, ++ u32 *cmpval, int requeue_pi) ++{ ++ union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; ++ int task_count = 0, ret; ++ struct futex_pi_state *pi_state = NULL; ++ struct futex_hash_bucket *hb1, *hb2; ++ struct futex_q *this, *next; ++ DEFINE_WAKE_Q(wake_q); ++ ++ if (nr_wake < 0 || nr_requeue < 0) ++ return -EINVAL; ++ ++ /* ++ * When PI not supported: return -ENOSYS if requeue_pi is true, ++ * consequently the compiler knows requeue_pi is always false past ++ * this point which will optimize away all the conditional code ++ * further down. ++ */ ++ if (!IS_ENABLED(CONFIG_FUTEX_PI) && requeue_pi) ++ return -ENOSYS; ++ ++ if (requeue_pi) { ++ /* ++ * Requeue PI only works on two distinct uaddrs. This ++ * check is only valid for private futexes. See below. ++ */ ++ if (uaddr1 == uaddr2) ++ return -EINVAL; ++ ++ /* ++ * futex_requeue() allows the caller to define the number ++ * of waiters to wake up via the @nr_wake argument. With ++ * REQUEUE_PI, waking up more than one waiter is creating ++ * more problems than it solves. Waking up a waiter makes ++ * only sense if the PI futex @uaddr2 is uncontended as ++ * this allows the requeue code to acquire the futex ++ * @uaddr2 before waking the waiter. The waiter can then ++ * return to user space without further action. A secondary ++ * wakeup would just make the futex_wait_requeue_pi() ++ * handling more complex, because that code would have to ++ * look up pi_state and do more or less all the handling ++ * which the requeue code has to do for the to be requeued ++ * waiters. So restrict the number of waiters to wake to ++ * one, and only wake it up when the PI futex is ++ * uncontended. Otherwise requeue it and let the unlock of ++ * the PI futex handle the wakeup. ++ * ++ * All REQUEUE_PI users, e.g. pthread_cond_signal() and ++ * pthread_cond_broadcast() must use nr_wake=1. ++ */ ++ if (nr_wake != 1) ++ return -EINVAL; ++ ++ /* ++ * requeue_pi requires a pi_state, try to allocate it now ++ * without any locks in case it fails. ++ */ ++ if (refill_pi_state_cache()) ++ return -ENOMEM; + } - -- list_for_each_entry_safe(cset, tmp_cset, &preloaded, mg_preload_node) { -+ list_for_each_entry_safe(cset, tmp_cset, &mgctx->preloaded_dst_csets, -+ mg_dst_preload_node) { - cset->mg_src_cgrp = NULL; - cset->mg_dst_cgrp = NULL; - cset->mg_dst_cset = NULL; -- list_del_init(&cset->mg_preload_node); -+ list_del_init(&cset->mg_dst_preload_node); - put_css_set_locked(cset); - } - -@@ -2629,7 +2702,7 @@ void cgroup_migrate_add_src(struct css_set *src_cset, - - src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root); - -- if (!list_empty(&src_cset->mg_preload_node)) -+ if (!list_empty(&src_cset->mg_src_preload_node)) - return; - - WARN_ON(src_cset->mg_src_cgrp); -@@ -2640,7 +2713,7 @@ void cgroup_migrate_add_src(struct css_set *src_cset, - src_cset->mg_src_cgrp = src_cgrp; - src_cset->mg_dst_cgrp = dst_cgrp; - get_css_set(src_cset); -- list_add_tail(&src_cset->mg_preload_node, &mgctx->preloaded_src_csets); -+ list_add_tail(&src_cset->mg_src_preload_node, &mgctx->preloaded_src_csets); - } - - /** -@@ -2665,7 +2738,7 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx) - - /* look up the dst cset for each src cset and link it to src */ - list_for_each_entry_safe(src_cset, tmp_cset, &mgctx->preloaded_src_csets, -- mg_preload_node) { -+ mg_src_preload_node) { - struct css_set *dst_cset; - struct cgroup_subsys *ss; - int ssid; -@@ -2684,7 +2757,7 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx) - if (src_cset == dst_cset) { - src_cset->mg_src_cgrp = NULL; - src_cset->mg_dst_cgrp = NULL; -- list_del_init(&src_cset->mg_preload_node); -+ list_del_init(&src_cset->mg_src_preload_node); - put_css_set(src_cset); - put_css_set(dst_cset); - continue; -@@ -2692,8 +2765,8 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx) - - src_cset->mg_dst_cset = dst_cset; - -- if (list_empty(&dst_cset->mg_preload_node)) -- list_add_tail(&dst_cset->mg_preload_node, -+ if (list_empty(&dst_cset->mg_dst_preload_node)) -+ list_add_tail(&dst_cset->mg_dst_preload_node, - &mgctx->preloaded_dst_csets); - else - put_css_set(dst_cset); -@@ -2789,8 +2862,7 @@ int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader, - } - - struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup, -- bool *locked) -- __acquires(&cgroup_threadgroup_rwsem) -+ bool *threadgroup_locked) - { - struct task_struct *tsk; - pid_t pid; -@@ -2807,12 +2879,8 @@ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup, - * Therefore, we can skip the global lock. - */ - lockdep_assert_held(&cgroup_mutex); -- if (pid || threadgroup) { -- percpu_down_write(&cgroup_threadgroup_rwsem); -- *locked = true; -- } else { -- *locked = false; -- } -+ *threadgroup_locked = pid || threadgroup; -+ cgroup_attach_lock(*threadgroup_locked); - - rcu_read_lock(); - if (pid) { -@@ -2843,17 +2911,14 @@ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup, - goto out_unlock_rcu; - - out_unlock_threadgroup: -- if (*locked) { -- percpu_up_write(&cgroup_threadgroup_rwsem); -- *locked = false; -- } -+ cgroup_attach_unlock(*threadgroup_locked); -+ *threadgroup_locked = false; - out_unlock_rcu: - rcu_read_unlock(); - return tsk; - } - --void cgroup_procs_write_finish(struct task_struct *task, bool locked) -- __releases(&cgroup_threadgroup_rwsem) -+void cgroup_procs_write_finish(struct task_struct *task, bool threadgroup_locked) - { - struct cgroup_subsys *ss; - int ssid; -@@ -2861,8 +2926,8 @@ void cgroup_procs_write_finish(struct task_struct *task, bool locked) - /* release reference from cgroup_procs_write_start() */ - put_task_struct(task); - -- if (locked) -- percpu_up_write(&cgroup_threadgroup_rwsem); -+ cgroup_attach_unlock(threadgroup_locked); + - for_each_subsys(ss, ssid) - if (ss->post_attach) - ss->post_attach(); -@@ -2917,12 +2982,11 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) - struct cgroup_subsys_state *d_css; - struct cgroup *dsct; - struct css_set *src_cset; -+ bool has_tasks; - int ret; - - lockdep_assert_held(&cgroup_mutex); - -- percpu_down_write(&cgroup_threadgroup_rwsem); -- - /* look up all csses currently attached to @cgrp's subtree */ - spin_lock_irq(&css_set_lock); - cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) { -@@ -2933,13 +2997,23 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) - } - spin_unlock_irq(&css_set_lock); - ++retry: ++ ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ); ++ if (unlikely(ret != 0)) ++ return ret; ++ ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, ++ requeue_pi ? FUTEX_WRITE : FUTEX_READ); ++ if (unlikely(ret != 0)) ++ return ret; ++ + /* -+ * We need to write-lock threadgroup_rwsem while migrating tasks. -+ * However, if there are no source csets for @cgrp, changing its -+ * controllers isn't gonna produce any task migrations and the -+ * write-locking can be skipped safely. ++ * The check above which compares uaddrs is not sufficient for ++ * shared futexes. We need to compare the keys: + */ -+ has_tasks = !list_empty(&mgctx.preloaded_src_csets); -+ cgroup_attach_lock(has_tasks); ++ if (requeue_pi && match_futex(&key1, &key2)) ++ return -EINVAL; + - /* NULL dst indicates self on default hierarchy */ - ret = cgroup_migrate_prepare_dst(&mgctx); - if (ret) - goto out_finish; - - spin_lock_irq(&css_set_lock); -- list_for_each_entry(src_cset, &mgctx.preloaded_src_csets, mg_preload_node) { -+ list_for_each_entry(src_cset, &mgctx.preloaded_src_csets, -+ mg_src_preload_node) { - struct task_struct *task, *ntask; - - /* all tasks in src_csets need to be migrated */ -@@ -2951,7 +3025,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) - ret = cgroup_migrate_execute(&mgctx); - out_finish: - cgroup_migrate_finish(&mgctx); -- percpu_up_write(&cgroup_threadgroup_rwsem); -+ cgroup_attach_unlock(has_tasks); - return ret; - } - -@@ -3607,6 +3681,7 @@ static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v) - static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, - size_t nbytes, enum psi_res res) - { -+ struct cgroup_file_ctx *ctx = of->priv; - struct psi_trigger *new; - struct cgroup *cgrp; - struct psi_group *psi; -@@ -3618,6 +3693,12 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, - cgroup_get(cgrp); - cgroup_kn_unlock(of->kn); - -+ /* Allow only one trigger per file descriptor */ -+ if (ctx->psi.trigger) { -+ cgroup_put(cgrp); -+ return -EBUSY; ++ hb1 = hash_futex(&key1); ++ hb2 = hash_futex(&key2); ++ ++retry_private: ++ hb_waiters_inc(hb2); ++ double_lock_hb(hb1, hb2); ++ ++ if (likely(cmpval != NULL)) { ++ u32 curval; ++ ++ ret = get_futex_value_locked(&curval, uaddr1); ++ ++ if (unlikely(ret)) { ++ double_unlock_hb(hb1, hb2); ++ hb_waiters_dec(hb2); ++ ++ ret = get_user(curval, uaddr1); ++ if (ret) ++ return ret; ++ ++ if (!(flags & FLAGS_SHARED)) ++ goto retry_private; ++ ++ goto retry; ++ } ++ if (curval != *cmpval) { ++ ret = -EAGAIN; ++ goto out_unlock; ++ } + } + - psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi; - new = psi_trigger_create(psi, buf, nbytes, res); - if (IS_ERR(new)) { -@@ -3625,8 +3706,7 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, - return PTR_ERR(new); - } - -- psi_trigger_replace(&of->priv, new); -- -+ smp_store_release(&ctx->psi.trigger, new); - cgroup_put(cgrp); - - return nbytes; -@@ -3656,12 +3736,16 @@ static ssize_t cgroup_cpu_pressure_write(struct kernfs_open_file *of, - static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of, - poll_table *pt) - { -- return psi_trigger_poll(&of->priv, of->file, pt); -+ struct cgroup_file_ctx *ctx = of->priv; ++ if (requeue_pi) { ++ struct task_struct *exiting = NULL; + -+ return psi_trigger_poll(&ctx->psi.trigger, of->file, pt); - } - - static void cgroup_pressure_release(struct kernfs_open_file *of) - { -- psi_trigger_replace(&of->priv, NULL); -+ struct cgroup_file_ctx *ctx = of->priv; ++ /* ++ * Attempt to acquire uaddr2 and wake the top waiter. If we ++ * intend to requeue waiters, force setting the FUTEX_WAITERS ++ * bit. We force this here where we are able to easily handle ++ * faults rather in the requeue loop below. ++ * ++ * Updates topwaiter::requeue_state if a top waiter exists. ++ */ ++ ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1, ++ &key2, &pi_state, ++ &exiting, nr_requeue); + -+ psi_trigger_destroy(ctx->psi.trigger); - } - - bool cgroup_psi_enabled(void) -@@ -3788,24 +3872,43 @@ static ssize_t cgroup_kill_write(struct kernfs_open_file *of, char *buf, - static int cgroup_file_open(struct kernfs_open_file *of) - { - struct cftype *cft = of_cft(of); -+ struct cgroup_file_ctx *ctx; -+ int ret; - -- if (cft->open) -- return cft->open(of); -- return 0; -+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); -+ if (!ctx) -+ return -ENOMEM; ++ /* ++ * At this point the top_waiter has either taken uaddr2 or ++ * is waiting on it. In both cases pi_state has been ++ * established and an initial refcount on it. In case of an ++ * error there's nothing. ++ * ++ * The top waiter's requeue_state is up to date: ++ * ++ * - If the lock was acquired atomically (ret == 1), then ++ * the state is Q_REQUEUE_PI_LOCKED. ++ * ++ * The top waiter has been dequeued and woken up and can ++ * return to user space immediately. The kernel/user ++ * space state is consistent. In case that there must be ++ * more waiters requeued the WAITERS bit in the user ++ * space futex is set so the top waiter task has to go ++ * into the syscall slowpath to unlock the futex. This ++ * will block until this requeue operation has been ++ * completed and the hash bucket locks have been ++ * dropped. ++ * ++ * - If the trylock failed with an error (ret < 0) then ++ * the state is either Q_REQUEUE_PI_NONE, i.e. "nothing ++ * happened", or Q_REQUEUE_PI_IGNORE when there was an ++ * interleaved early wakeup. ++ * ++ * - If the trylock did not succeed (ret == 0) then the ++ * state is either Q_REQUEUE_PI_IN_PROGRESS or ++ * Q_REQUEUE_PI_WAIT if an early wakeup interleaved. ++ * This will be cleaned up in the loop below, which ++ * cannot fail because futex_proxy_trylock_atomic() did ++ * the same sanity checks for requeue_pi as the loop ++ * below does. ++ */ ++ switch (ret) { ++ case 0: ++ /* We hold a reference on the pi state. */ ++ break; + -+ ctx->ns = current->nsproxy->cgroup_ns; -+ get_cgroup_ns(ctx->ns); -+ of->priv = ctx; ++ case 1: ++ /* ++ * futex_proxy_trylock_atomic() acquired the user space ++ * futex. Adjust task_count. ++ */ ++ task_count++; ++ ret = 0; ++ break; + -+ if (!cft->open) -+ return 0; ++ /* ++ * If the above failed, then pi_state is NULL and ++ * waiter::requeue_state is correct. ++ */ ++ case -EFAULT: ++ double_unlock_hb(hb1, hb2); ++ hb_waiters_dec(hb2); ++ ret = fault_in_user_writeable(uaddr2); ++ if (!ret) ++ goto retry; ++ return ret; ++ case -EBUSY: ++ case -EAGAIN: ++ /* ++ * Two reasons for this: ++ * - EBUSY: Owner is exiting and we just wait for the ++ * exit to complete. ++ * - EAGAIN: The user space value changed. ++ */ ++ double_unlock_hb(hb1, hb2); ++ hb_waiters_dec(hb2); ++ /* ++ * Handle the case where the owner is in the middle of ++ * exiting. Wait for the exit to complete otherwise ++ * this task might loop forever, aka. live lock. ++ */ ++ wait_for_owner_exiting(ret, exiting); ++ cond_resched(); ++ goto retry; ++ default: ++ goto out_unlock; ++ } ++ } + -+ ret = cft->open(of); -+ if (ret) { -+ put_cgroup_ns(ctx->ns); -+ kfree(ctx); ++ plist_for_each_entry_safe(this, next, &hb1->chain, list) { ++ if (task_count - nr_wake >= nr_requeue) ++ break; ++ ++ if (!match_futex(&this->key, &key1)) ++ continue; ++ ++ /* ++ * FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI should always ++ * be paired with each other and no other futex ops. ++ * ++ * We should never be requeueing a futex_q with a pi_state, ++ * which is awaiting a futex_unlock_pi(). ++ */ ++ if ((requeue_pi && !this->rt_waiter) || ++ (!requeue_pi && this->rt_waiter) || ++ this->pi_state) { ++ ret = -EINVAL; ++ break; ++ } ++ ++ /* Plain futexes just wake or requeue and are done */ ++ if (!requeue_pi) { ++ if (++task_count <= nr_wake) ++ mark_wake_futex(&wake_q, this); ++ else ++ requeue_futex(this, hb1, hb2, &key2); ++ continue; ++ } ++ ++ /* Ensure we requeue to the expected futex for requeue_pi. */ ++ if (!match_futex(this->requeue_pi_key, &key2)) { ++ ret = -EINVAL; ++ break; ++ } ++ ++ /* ++ * Requeue nr_requeue waiters and possibly one more in the case ++ * of requeue_pi if we couldn't acquire the lock atomically. ++ * ++ * Prepare the waiter to take the rt_mutex. Take a refcount ++ * on the pi_state and store the pointer in the futex_q ++ * object of the waiter. ++ */ ++ get_pi_state(pi_state); ++ ++ /* Don't requeue when the waiter is already on the way out. */ ++ if (!futex_requeue_pi_prepare(this, pi_state)) { ++ /* ++ * Early woken waiter signaled that it is on the ++ * way out. Drop the pi_state reference and try the ++ * next waiter. @this->pi_state is still NULL. ++ */ ++ put_pi_state(pi_state); ++ continue; ++ } ++ ++ ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex, ++ this->rt_waiter, ++ this->task); ++ ++ if (ret == 1) { ++ /* ++ * We got the lock. We do neither drop the refcount ++ * on pi_state nor clear this->pi_state because the ++ * waiter needs the pi_state for cleaning up the ++ * user space value. It will drop the refcount ++ * after doing so. this::requeue_state is updated ++ * in the wakeup as well. ++ */ ++ requeue_pi_wake_futex(this, &key2, hb2); ++ task_count++; ++ } else if (!ret) { ++ /* Waiter is queued, move it to hb2 */ ++ requeue_futex(this, hb1, hb2, &key2); ++ futex_requeue_pi_complete(this, 0); ++ task_count++; ++ } else { ++ /* ++ * rt_mutex_start_proxy_lock() detected a potential ++ * deadlock when we tried to queue that waiter. ++ * Drop the pi_state reference which we took above ++ * and remove the pointer to the state from the ++ * waiters futex_q object. ++ */ ++ this->pi_state = NULL; ++ put_pi_state(pi_state); ++ futex_requeue_pi_complete(this, ret); ++ /* ++ * We stop queueing more waiters and let user space ++ * deal with the mess. ++ */ ++ break; ++ } + } -+ return ret; - } - - static void cgroup_file_release(struct kernfs_open_file *of) - { - struct cftype *cft = of_cft(of); -+ struct cgroup_file_ctx *ctx = of->priv; - - if (cft->release) - cft->release(of); -+ put_cgroup_ns(ctx->ns); -+ kfree(ctx); - } - - static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, - size_t nbytes, loff_t off) - { -- struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; -+ struct cgroup_file_ctx *ctx = of->priv; - struct cgroup *cgrp = of->kn->parent->priv; - struct cftype *cft = of_cft(of); - struct cgroup_subsys_state *css; -@@ -3822,7 +3925,7 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, - */ - if ((cgrp->root->flags & CGRP_ROOT_NS_DELEGATE) && - !(cft->flags & CFTYPE_NS_DELEGATABLE) && -- ns != &init_cgroup_ns && ns->root_cset->dfl_cgrp == cgrp) -+ ctx->ns != &init_cgroup_ns && ctx->ns->root_cset->dfl_cgrp == cgrp) - return -EPERM; - - if (cft->write) -@@ -4728,21 +4831,21 @@ void css_task_iter_end(struct css_task_iter *it) - - static void cgroup_procs_release(struct kernfs_open_file *of) - { -- if (of->priv) { -- css_task_iter_end(of->priv); -- kfree(of->priv); -- } -+ struct cgroup_file_ctx *ctx = of->priv; + -+ if (ctx->procs.started) -+ css_task_iter_end(&ctx->procs.iter); - } - - static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos) - { - struct kernfs_open_file *of = s->private; -- struct css_task_iter *it = of->priv; -+ struct cgroup_file_ctx *ctx = of->priv; - - if (pos) - (*pos)++; - -- return css_task_iter_next(it); -+ return css_task_iter_next(&ctx->procs.iter); - } - - static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, -@@ -4750,21 +4853,18 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, - { - struct kernfs_open_file *of = s->private; - struct cgroup *cgrp = seq_css(s)->cgroup; -- struct css_task_iter *it = of->priv; -+ struct cgroup_file_ctx *ctx = of->priv; -+ struct css_task_iter *it = &ctx->procs.iter; - - /* - * When a seq_file is seeked, it's always traversed sequentially - * from position 0, so we can simply keep iterating on !0 *pos. - */ -- if (!it) { -+ if (!ctx->procs.started) { - if (WARN_ON_ONCE((*pos))) - return ERR_PTR(-EINVAL); -- -- it = kzalloc(sizeof(*it), GFP_KERNEL); -- if (!it) -- return ERR_PTR(-ENOMEM); -- of->priv = it; - css_task_iter_start(&cgrp->self, iter_flags, it); -+ ctx->procs.started = true; - } else if (!(*pos)) { - css_task_iter_end(it); - css_task_iter_start(&cgrp->self, iter_flags, it); -@@ -4815,9 +4915,9 @@ static int cgroup_may_write(const struct cgroup *cgrp, struct super_block *sb) - - static int cgroup_procs_write_permission(struct cgroup *src_cgrp, - struct cgroup *dst_cgrp, -- struct super_block *sb) -+ struct super_block *sb, -+ struct cgroup_namespace *ns) - { -- struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; - struct cgroup *com_cgrp = src_cgrp; - int ret; - -@@ -4846,11 +4946,12 @@ static int cgroup_procs_write_permission(struct cgroup *src_cgrp, - - static int cgroup_attach_permissions(struct cgroup *src_cgrp, - struct cgroup *dst_cgrp, -- struct super_block *sb, bool threadgroup) -+ struct super_block *sb, bool threadgroup, -+ struct cgroup_namespace *ns) - { - int ret = 0; - -- ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, sb); -+ ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, sb, ns); - if (ret) - return ret; - -@@ -4867,16 +4968,18 @@ static int cgroup_attach_permissions(struct cgroup *src_cgrp, - static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, - bool threadgroup) - { -+ struct cgroup_file_ctx *ctx = of->priv; - struct cgroup *src_cgrp, *dst_cgrp; - struct task_struct *task; -+ const struct cred *saved_cred; - ssize_t ret; -- bool locked; -+ bool threadgroup_locked; - - dst_cgrp = cgroup_kn_lock_live(of->kn, false); - if (!dst_cgrp) - return -ENODEV; - -- task = cgroup_procs_write_start(buf, threadgroup, &locked); -+ task = cgroup_procs_write_start(buf, threadgroup, &threadgroup_locked); - ret = PTR_ERR_OR_ZERO(task); - if (ret) - goto out_unlock; -@@ -4886,16 +4989,23 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, - src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root); - spin_unlock_irq(&css_set_lock); - -- /* process and thread migrations follow same delegation rule */ + /* -+ * Process and thread migrations follow same delegation rule. Check -+ * permissions using the credentials from file open to protect against -+ * inherited fd attacks. ++ * We took an extra initial reference to the pi_state in ++ * futex_proxy_trylock_atomic(). We need to drop it here again. + */ -+ saved_cred = override_creds(of->file->f_cred); - ret = cgroup_attach_permissions(src_cgrp, dst_cgrp, -- of->file->f_path.dentry->d_sb, threadgroup); -+ of->file->f_path.dentry->d_sb, -+ threadgroup, ctx->ns); -+ revert_creds(saved_cred); - if (ret) - goto out_finish; - - ret = cgroup_attach_task(dst_cgrp, task, threadgroup); - - out_finish: -- cgroup_procs_write_finish(task, locked); -+ cgroup_procs_write_finish(task, threadgroup_locked); - out_unlock: - cgroup_kn_unlock(of->kn); - -@@ -5911,17 +6021,23 @@ struct cgroup *cgroup_get_from_id(u64 id) - struct kernfs_node *kn; - struct cgroup *cgrp = NULL; - -- mutex_lock(&cgroup_mutex); - kn = kernfs_find_and_get_node_by_id(cgrp_dfl_root.kf_root, id); - if (!kn) -- goto out_unlock; -+ goto out; ++ put_pi_state(pi_state); ++ ++out_unlock: ++ double_unlock_hb(hb1, hb2); ++ wake_up_q(&wake_q); ++ hb_waiters_dec(hb2); ++ return ret ? ret : task_count; ++} ++ ++/* The key must be already stored in q->key. */ ++static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) ++ __acquires(&hb->lock) ++{ ++ struct futex_hash_bucket *hb; ++ ++ hb = hash_futex(&q->key); ++ ++ /* ++ * Increment the counter before taking the lock so that ++ * a potential waker won't miss a to-be-slept task that is ++ * waiting for the spinlock. This is safe as all queue_lock() ++ * users end up calling queue_me(). Similarly, for housekeeping, ++ * decrement the counter at queue_unlock() when some error has ++ * occurred and we don't end up adding the task to the list. ++ */ ++ hb_waiters_inc(hb); /* implies smp_mb(); (A) */ ++ ++ q->lock_ptr = &hb->lock; ++ ++ spin_lock(&hb->lock); ++ return hb; ++} ++ ++static inline void ++queue_unlock(struct futex_hash_bucket *hb) ++ __releases(&hb->lock) ++{ ++ spin_unlock(&hb->lock); ++ hb_waiters_dec(hb); ++} ++ ++static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb) ++{ ++ int prio; ++ ++ /* ++ * The priority used to register this element is ++ * - either the real thread-priority for the real-time threads ++ * (i.e. threads with a priority lower than MAX_RT_PRIO) ++ * - or MAX_RT_PRIO for non-RT threads. ++ * Thus, all RT-threads are woken first in priority order, and ++ * the others are woken last, in FIFO order. ++ */ ++ prio = min(current->normal_prio, MAX_RT_PRIO); ++ ++ plist_node_init(&q->list, prio); ++ plist_add(&q->list, &hb->chain); ++ q->task = current; ++} ++ ++/** ++ * queue_me() - Enqueue the futex_q on the futex_hash_bucket ++ * @q: The futex_q to enqueue ++ * @hb: The destination hash bucket ++ * ++ * The hb->lock must be held by the caller, and is released here. A call to ++ * queue_me() is typically paired with exactly one call to unqueue_me(). The ++ * exceptions involve the PI related operations, which may use unqueue_me_pi() ++ * or nothing if the unqueue is done as part of the wake process and the unqueue ++ * state is implicit in the state of woken task (see futex_wait_requeue_pi() for ++ * an example). ++ */ ++static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) ++ __releases(&hb->lock) ++{ ++ __queue_me(q, hb); ++ spin_unlock(&hb->lock); ++} ++ ++/** ++ * unqueue_me() - Remove the futex_q from its futex_hash_bucket ++ * @q: The futex_q to unqueue ++ * ++ * The q->lock_ptr must not be held by the caller. A call to unqueue_me() must ++ * be paired with exactly one earlier call to queue_me(). ++ * ++ * Return: ++ * - 1 - if the futex_q was still queued (and we removed unqueued it); ++ * - 0 - if the futex_q was already removed by the waking thread ++ */ ++static int unqueue_me(struct futex_q *q) ++{ ++ spinlock_t *lock_ptr; ++ int ret = 0; ++ ++ /* In the common case we don't take the spinlock, which is nice. */ ++retry: ++ /* ++ * q->lock_ptr can change between this read and the following spin_lock. ++ * Use READ_ONCE to forbid the compiler from reloading q->lock_ptr and ++ * optimizing lock_ptr out of the logic below. ++ */ ++ lock_ptr = READ_ONCE(q->lock_ptr); ++ if (lock_ptr != NULL) { ++ spin_lock(lock_ptr); ++ /* ++ * q->lock_ptr can change between reading it and ++ * spin_lock(), causing us to take the wrong lock. This ++ * corrects the race condition. ++ * ++ * Reasoning goes like this: if we have the wrong lock, ++ * q->lock_ptr must have changed (maybe several times) ++ * between reading it and the spin_lock(). It can ++ * change again after the spin_lock() but only if it was ++ * already changed before the spin_lock(). It cannot, ++ * however, change back to the original value. Therefore ++ * we can detect whether we acquired the correct lock. ++ */ ++ if (unlikely(lock_ptr != q->lock_ptr)) { ++ spin_unlock(lock_ptr); ++ goto retry; ++ } ++ __unqueue_futex(q); ++ ++ BUG_ON(q->pi_state); ++ ++ spin_unlock(lock_ptr); ++ ret = 1; ++ } ++ ++ return ret; ++} ++ ++/* ++ * PI futexes can not be requeued and must remove themselves from the ++ * hash bucket. The hash bucket lock (i.e. lock_ptr) is held. ++ */ ++static void unqueue_me_pi(struct futex_q *q) ++{ ++ __unqueue_futex(q); + -+ if (kernfs_type(kn) != KERNFS_DIR) -+ goto put; - -- cgrp = kn->priv; -- if (cgroup_is_dead(cgrp) || !cgroup_tryget(cgrp)) -+ rcu_read_lock(); ++ BUG_ON(!q->pi_state); ++ put_pi_state(q->pi_state); ++ q->pi_state = NULL; ++} + -+ cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv); -+ if (cgrp && !cgroup_tryget(cgrp)) - cgrp = NULL; ++static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, ++ struct task_struct *argowner) ++{ ++ struct futex_pi_state *pi_state = q->pi_state; ++ struct task_struct *oldowner, *newowner; ++ u32 uval, curval, newval, newtid; ++ int err = 0; + -+ rcu_read_unlock(); -+put: - kernfs_put(kn); --out_unlock: -- mutex_unlock(&cgroup_mutex); -+out: - return cgrp; - } - EXPORT_SYMBOL_GPL(cgroup_get_from_id); -@@ -6104,7 +6220,8 @@ static int cgroup_css_set_fork(struct kernel_clone_args *kargs) - goto err; - - ret = cgroup_attach_permissions(cset->dfl_cgrp, dst_cgrp, sb, -- !(kargs->flags & CLONE_THREAD)); -+ !(kargs->flags & CLONE_THREAD), -+ current->nsproxy->cgroup_ns); - if (ret) - goto err; - -@@ -6474,30 +6591,38 @@ struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss) - * - * Find the cgroup at @path on the default hierarchy, increment its - * reference count and return it. Returns pointer to the found cgroup on -- * success, ERR_PTR(-ENOENT) if @path doesn't exist and ERR_PTR(-ENOTDIR) -- * if @path points to a non-directory. -+ * success, ERR_PTR(-ENOENT) if @path doesn't exist or if the cgroup has already -+ * been released and ERR_PTR(-ENOTDIR) if @path points to a non-directory. - */ - struct cgroup *cgroup_get_from_path(const char *path) - { - struct kernfs_node *kn; -- struct cgroup *cgrp; -+ struct cgroup *cgrp = ERR_PTR(-ENOENT); -+ struct cgroup *root_cgrp; - -- mutex_lock(&cgroup_mutex); -+ spin_lock_irq(&css_set_lock); -+ root_cgrp = current_cgns_cgroup_from_root(&cgrp_dfl_root); -+ kn = kernfs_walk_and_get(root_cgrp->kn, path); -+ spin_unlock_irq(&css_set_lock); -+ if (!kn) -+ goto out; - -- kn = kernfs_walk_and_get(cgrp_dfl_root.cgrp.kn, path); -- if (kn) { -- if (kernfs_type(kn) == KERNFS_DIR) { -- cgrp = kn->priv; -- cgroup_get_live(cgrp); -- } else { -- cgrp = ERR_PTR(-ENOTDIR); -- } -- kernfs_put(kn); -- } else { -- cgrp = ERR_PTR(-ENOENT); -+ if (kernfs_type(kn) != KERNFS_DIR) { -+ cgrp = ERR_PTR(-ENOTDIR); -+ goto out_kernfs; - } - -- mutex_unlock(&cgroup_mutex); -+ rcu_read_lock(); ++ oldowner = pi_state->owner; + -+ cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv); -+ if (!cgrp || !cgroup_tryget(cgrp)) -+ cgrp = ERR_PTR(-ENOENT); ++ /* ++ * We are here because either: ++ * ++ * - we stole the lock and pi_state->owner needs updating to reflect ++ * that (@argowner == current), ++ * ++ * or: ++ * ++ * - someone stole our lock and we need to fix things to point to the ++ * new owner (@argowner == NULL). ++ * ++ * Either way, we have to replace the TID in the user space variable. ++ * This must be atomic as we have to preserve the owner died bit here. ++ * ++ * Note: We write the user space value _before_ changing the pi_state ++ * because we can fault here. Imagine swapped out pages or a fork ++ * that marked all the anonymous memory readonly for cow. ++ * ++ * Modifying pi_state _before_ the user space value would leave the ++ * pi_state in an inconsistent state when we fault here, because we ++ * need to drop the locks to handle the fault. This might be observed ++ * in the PID checks when attaching to PI state . ++ */ ++retry: ++ if (!argowner) { ++ if (oldowner != current) { ++ /* ++ * We raced against a concurrent self; things are ++ * already fixed up. Nothing to do. ++ */ ++ return 0; ++ } + -+ rcu_read_unlock(); ++ if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) { ++ /* We got the lock. pi_state is correct. Tell caller. */ ++ return 1; ++ } + -+out_kernfs: -+ kernfs_put(kn); -+out: - return cgrp; - } - EXPORT_SYMBOL_GPL(cgroup_get_from_path); -diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c -index 2a9695ccb65f5..428820bf141d1 100644 ---- a/kernel/cgroup/cpuset.c -+++ b/kernel/cgroup/cpuset.c -@@ -33,6 +33,7 @@ - #include <linux/interrupt.h> - #include <linux/kernel.h> - #include <linux/kmod.h> -+#include <linux/kthread.h> - #include <linux/list.h> - #include <linux/mempolicy.h> - #include <linux/mm.h> -@@ -1087,10 +1088,18 @@ static void update_tasks_cpumask(struct cpuset *cs) - { - struct css_task_iter it; - struct task_struct *task; -+ bool top_cs = cs == &top_cpuset; - - css_task_iter_start(&cs->css, 0, &it); -- while ((task = css_task_iter_next(&it))) -+ while ((task = css_task_iter_next(&it))) { + /* -+ * Percpu kthreads in top_cpuset are ignored ++ * The trylock just failed, so either there is an owner or ++ * there is a higher priority waiter than this one. + */ -+ if (top_cs && (task->flags & PF_KTHREAD) && -+ kthread_is_per_cpu(task)) -+ continue; - set_cpus_allowed_ptr(task, cs->effective_cpus); ++ newowner = rt_mutex_owner(&pi_state->pi_mutex); ++ /* ++ * If the higher priority waiter has not yet taken over the ++ * rtmutex then newowner is NULL. We can't return here with ++ * that state because it's inconsistent vs. the user space ++ * state. So drop the locks and try again. It's a valid ++ * situation and not any different from the other retry ++ * conditions. ++ */ ++ if (unlikely(!newowner)) { ++ err = -EAGAIN; ++ goto handle_err; ++ } ++ } else { ++ WARN_ON_ONCE(argowner != current); ++ if (oldowner == current) { ++ /* ++ * We raced against a concurrent self; things are ++ * already fixed up. Nothing to do. ++ */ ++ return 1; ++ } ++ newowner = argowner; + } - css_task_iter_end(&it); - } - -@@ -1512,10 +1521,15 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs, - struct cpuset *sibling; - struct cgroup_subsys_state *pos_css; - -+ percpu_rwsem_assert_held(&cpuset_rwsem); + - /* - * Check all its siblings and call update_cpumasks_hier() - * if their use_parent_ecpus flag is set in order for them - * to use the right effective_cpus value. ++ newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; ++ /* Owner died? */ ++ if (!pi_state->owner) ++ newtid |= FUTEX_OWNER_DIED; ++ ++ err = get_futex_value_locked(&uval, uaddr); ++ if (err) ++ goto handle_err; ++ ++ for (;;) { ++ newval = (uval & FUTEX_OWNER_DIED) | newtid; ++ ++ err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval); ++ if (err) ++ goto handle_err; ++ ++ if (curval == uval) ++ break; ++ uval = curval; ++ } ++ ++ /* ++ * We fixed up user space. Now we need to fix the pi_state ++ * itself. ++ */ ++ pi_state_update_owner(pi_state, newowner); ++ ++ return argowner == current; ++ ++ /* ++ * In order to reschedule or handle a page fault, we need to drop the ++ * locks here. In the case of a fault, this gives the other task ++ * (either the highest priority waiter itself or the task which stole ++ * the rtmutex) the chance to try the fixup of the pi_state. So once we ++ * are back from handling the fault we need to check the pi_state after ++ * reacquiring the locks and before trying to do another fixup. When ++ * the fixup has been done already we simply return. + * -+ * The update_cpumasks_hier() function may sleep. So we have to -+ * release the RCU read lock before calling it. - */ - rcu_read_lock(); - cpuset_for_each_child(sibling, pos_css, parent) { -@@ -1523,8 +1537,13 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs, - continue; - if (!sibling->use_parent_ecpus) - continue; -+ if (!css_tryget_online(&sibling->css)) -+ continue; - -+ rcu_read_unlock(); - update_cpumasks_hier(sibling, tmp); -+ rcu_read_lock(); -+ css_put(&sibling->css); - } - rcu_read_unlock(); - } -@@ -1597,8 +1616,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, - * Make sure that subparts_cpus is a subset of cpus_allowed. - */ - if (cs->nr_subparts_cpus) { -- cpumask_andnot(cs->subparts_cpus, cs->subparts_cpus, -- cs->cpus_allowed); -+ cpumask_and(cs->subparts_cpus, cs->subparts_cpus, cs->cpus_allowed); - cs->nr_subparts_cpus = cpumask_weight(cs->subparts_cpus); - } - spin_unlock_irq(&callback_lock); -@@ -2043,12 +2061,7 @@ static int update_prstate(struct cpuset *cs, int new_prs) - update_flag(CS_CPU_EXCLUSIVE, cs, 0); - } - -- /* -- * Update cpumask of parent's tasks except when it is the top -- * cpuset as some system daemons cannot be mapped to other CPUs. -- */ -- if (parent != &top_cpuset) -- update_tasks_cpumask(parent); -+ update_tasks_cpumask(parent); - - if (parent->child_ecpus_count) - update_sibling_cpumasks(parent, cs, &tmpmask); -@@ -2190,7 +2203,7 @@ static int cpuset_can_attach(struct cgroup_taskset *tset) - goto out_unlock; - - cgroup_taskset_for_each(task, css, tset) { -- ret = task_can_attach(task, cs->cpus_allowed); -+ ret = task_can_attach(task, cs->effective_cpus); - if (ret) - goto out_unlock; - ret = security_task_setscheduler(task); -@@ -2240,6 +2253,7 @@ static void cpuset_attach(struct cgroup_taskset *tset) - cgroup_taskset_first(tset, &css); - cs = css_cs(css); - -+ lockdep_assert_cpus_held(); /* see cgroup_attach_lock() */ - percpu_down_write(&cpuset_rwsem); - - guarantee_online_mems(cs, &cpuset_attach_nodemask_to); -@@ -3336,8 +3350,11 @@ static struct notifier_block cpuset_track_online_nodes_nb = { - */ - void __init cpuset_init_smp(void) - { -- cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); -- top_cpuset.mems_allowed = node_states[N_MEMORY]; ++ * Note: we hold both hb->lock and pi_mutex->wait_lock. We can safely ++ * drop hb->lock since the caller owns the hb -> futex_q relation. ++ * Dropping the pi_mutex->wait_lock requires the state revalidate. ++ */ ++handle_err: ++ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); ++ spin_unlock(q->lock_ptr); ++ ++ switch (err) { ++ case -EFAULT: ++ err = fault_in_user_writeable(uaddr); ++ break; ++ ++ case -EAGAIN: ++ cond_resched(); ++ err = 0; ++ break; ++ ++ default: ++ WARN_ON_ONCE(1); ++ break; ++ } ++ ++ spin_lock(q->lock_ptr); ++ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); ++ + /* -+ * cpus_allowd/mems_allowed set to v2 values in the initial -+ * cpuset_bind() call will be reset to v1 values in another -+ * cpuset_bind() call when v1 cpuset is mounted. ++ * Check if someone else fixed it for us: + */ - top_cpuset.old_mems_allowed = top_cpuset.mems_allowed; - - cpumask_copy(top_cpuset.effective_cpus, cpu_active_mask); -diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c -index b264ab5652ba9..1486768f23185 100644 ---- a/kernel/cgroup/rstat.c -+++ b/kernel/cgroup/rstat.c -@@ -433,8 +433,6 @@ static void root_cgroup_cputime(struct task_cputime *cputime) - cputime->sum_exec_runtime += user; - cputime->sum_exec_runtime += sys; - cputime->sum_exec_runtime += cpustat[CPUTIME_STEAL]; -- cputime->sum_exec_runtime += cpustat[CPUTIME_GUEST]; -- cputime->sum_exec_runtime += cpustat[CPUTIME_GUEST_NICE]; - } - } - -diff --git a/kernel/cpu.c b/kernel/cpu.c -index 192e43a874076..da871eb075662 100644 ---- a/kernel/cpu.c -+++ b/kernel/cpu.c -@@ -31,8 +31,10 @@ - #include <linux/smpboot.h> - #include <linux/relay.h> - #include <linux/slab.h> -+#include <linux/scs.h> - #include <linux/percpu-rwsem.h> - #include <linux/cpuset.h> -+#include <linux/random.h> - - #include <trace/events/power.h> - #define CREATE_TRACE_POINTS -@@ -69,7 +71,6 @@ struct cpuhp_cpu_state { - bool rollback; - bool single; - bool bringup; -- int cpu; - struct hlist_node *node; - struct hlist_node *last; - enum cpuhp_state cb_state; -@@ -473,7 +474,7 @@ static inline bool cpu_smt_allowed(unsigned int cpu) { return true; } - #endif - - static inline enum cpuhp_state --cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target) -+cpuhp_set_state(int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target) - { - enum cpuhp_state prev_state = st->state; - bool bringup = st->state < target; -@@ -484,14 +485,15 @@ cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target) - st->target = target; - st->single = false; - st->bringup = bringup; -- if (cpu_dying(st->cpu) != !bringup) -- set_cpu_dying(st->cpu, !bringup); -+ if (cpu_dying(cpu) != !bringup) -+ set_cpu_dying(cpu, !bringup); - - return prev_state; - } - - static inline void --cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state) -+cpuhp_reset_state(int cpu, struct cpuhp_cpu_state *st, -+ enum cpuhp_state prev_state) - { - bool bringup = !st->bringup; - -@@ -518,8 +520,8 @@ cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state) - } - - st->bringup = bringup; -- if (cpu_dying(st->cpu) != !bringup) -- set_cpu_dying(st->cpu, !bringup); -+ if (cpu_dying(cpu) != !bringup) -+ set_cpu_dying(cpu, !bringup); - } - - /* Regular hotplug invocation of the AP hotplug thread */ -@@ -539,15 +541,16 @@ static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st) - wait_for_ap_thread(st, st->bringup); - } - --static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target) -+static int cpuhp_kick_ap(int cpu, struct cpuhp_cpu_state *st, -+ enum cpuhp_state target) - { - enum cpuhp_state prev_state; - int ret; - -- prev_state = cpuhp_set_state(st, target); -+ prev_state = cpuhp_set_state(cpu, st, target); - __cpuhp_kick_ap(st); - if ((ret = st->result)) { -- cpuhp_reset_state(st, prev_state); -+ cpuhp_reset_state(cpu, st, prev_state); - __cpuhp_kick_ap(st); - } - -@@ -579,7 +582,7 @@ static int bringup_wait_for_ap(unsigned int cpu) - if (st->target <= CPUHP_AP_ONLINE_IDLE) - return 0; - -- return cpuhp_kick_ap(st, st->target); -+ return cpuhp_kick_ap(cpu, st, st->target); - } - - static int bringup_cpu(unsigned int cpu) -@@ -587,6 +590,12 @@ static int bringup_cpu(unsigned int cpu) - struct task_struct *idle = idle_thread_get(cpu); - int ret; - ++ if (pi_state->owner != oldowner) ++ return argowner == current; ++ ++ /* Retry if err was -EAGAIN or the fault in succeeded */ ++ if (!err) ++ goto retry; ++ + /* -+ * Reset stale stack state from the last time this CPU was online. ++ * fault_in_user_writeable() failed so user state is immutable. At ++ * best we can make the kernel state consistent but user state will ++ * be most likely hosed and any subsequent unlock operation will be ++ * rejected due to PI futex rule [10]. ++ * ++ * Ensure that the rtmutex owner is also the pi_state owner despite ++ * the user space value claiming something different. There is no ++ * point in unlocking the rtmutex if current is the owner as it ++ * would need to wait until the next waiter has taken the rtmutex ++ * to guarantee consistent state. Keep it simple. Userspace asked ++ * for this wreckaged state. ++ * ++ * The rtmutex has an owner - either current or some other ++ * task. See the EAGAIN loop above. + */ -+ scs_task_reset(idle); -+ kasan_unpoison_task_stack(idle); ++ pi_state_update_owner(pi_state, rt_mutex_owner(&pi_state->pi_mutex)); + - /* - * Some architectures have to walk the irq descriptors to - * setup the vector space for the cpu which comes online. -@@ -696,7 +705,7 @@ static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, - ret, cpu, cpuhp_get_step(st->state)->name, - st->state); - -- cpuhp_reset_state(st, prev_state); -+ cpuhp_reset_state(cpu, st, prev_state); - if (can_rollback_cpu(st)) - WARN_ON(cpuhp_invoke_callback_range(false, cpu, st, - prev_state)); -@@ -713,7 +722,6 @@ static void cpuhp_create(unsigned int cpu) - - init_completion(&st->done_up); - init_completion(&st->done_down); -- st->cpu = cpu; - } - - static int cpuhp_should_run(unsigned int cpu) -@@ -867,7 +875,7 @@ static int cpuhp_kick_ap_work(unsigned int cpu) - cpuhp_lock_release(true); - - trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work); -- ret = cpuhp_kick_ap(st, st->target); -+ ret = cpuhp_kick_ap(cpu, st, st->target); - trace_cpuhp_exit(cpu, st->state, prev_state, ret); - - return ret; -@@ -1099,7 +1107,7 @@ static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, - ret, cpu, cpuhp_get_step(st->state)->name, - st->state); - -- cpuhp_reset_state(st, prev_state); -+ cpuhp_reset_state(cpu, st, prev_state); - - if (st->state < prev_state) - WARN_ON(cpuhp_invoke_callback_range(true, cpu, st, -@@ -1126,7 +1134,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, - - cpuhp_tasks_frozen = tasks_frozen; - -- prev_state = cpuhp_set_state(st, target); -+ prev_state = cpuhp_set_state(cpu, st, target); - /* - * If the current CPU state is in the range of the AP hotplug thread, - * then we need to kick the thread. -@@ -1157,7 +1165,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, - ret = cpuhp_down_callbacks(cpu, st, target); - if (ret && st->state < prev_state) { - if (st->state == CPUHP_TEARDOWN_CPU) { -- cpuhp_reset_state(st, prev_state); -+ cpuhp_reset_state(cpu, st, prev_state); - __cpuhp_kick_ap(st); - } else { - WARN(1, "DEAD callback error for CPU%d", cpu); -@@ -1344,7 +1352,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target) - - cpuhp_tasks_frozen = tasks_frozen; - -- cpuhp_set_state(st, target); -+ cpuhp_set_state(cpu, st, target); - /* - * If the current CPU state is in the range of the AP hotplug thread, - * then we need to kick the thread once more. -@@ -1652,6 +1660,11 @@ static struct cpuhp_step cpuhp_hp_states[] = { - .startup.single = perf_event_init_cpu, - .teardown.single = perf_event_exit_cpu, - }, -+ [CPUHP_RANDOM_PREPARE] = { -+ .name = "random:prepare", -+ .startup.single = random_prepare_cpu, -+ .teardown.single = NULL, -+ }, - [CPUHP_WORKQUEUE_PREP] = { - .name = "workqueue:prepare", - .startup.single = workqueue_prepare_cpu, -@@ -1775,6 +1788,11 @@ static struct cpuhp_step cpuhp_hp_states[] = { - .startup.single = workqueue_online_cpu, - .teardown.single = workqueue_offline_cpu, - }, -+ [CPUHP_AP_RANDOM_ONLINE] = { -+ .name = "random:online", -+ .startup.single = random_online_cpu, -+ .teardown.single = NULL, -+ }, - [CPUHP_AP_RCUTREE_ONLINE] = { - .name = "RCU/tree:online", - .startup.single = rcutree_online_cpu, -diff --git a/kernel/crash_core.c b/kernel/crash_core.c -index eb53f5ec62c90..256cf6db573cd 100644 ---- a/kernel/crash_core.c -+++ b/kernel/crash_core.c -@@ -6,6 +6,7 @@ - - #include <linux/buildid.h> - #include <linux/crash_core.h> -+#include <linux/init.h> - #include <linux/utsname.h> - #include <linux/vmalloc.h> - -@@ -295,6 +296,16 @@ int __init parse_crashkernel_low(char *cmdline, - "crashkernel=", suffix_tbl[SUFFIX_LOW]); - } - -+/* -+ * Add a dummy early_param handler to mark crashkernel= as a known command line -+ * parameter and suppress incorrect warnings in init/main.c. -+ */ -+static int __init parse_crashkernel_dummy(char *arg) ++ return err; ++} ++ ++static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, ++ struct task_struct *argowner) +{ -+ return 0; ++ struct futex_pi_state *pi_state = q->pi_state; ++ int ret; ++ ++ lockdep_assert_held(q->lock_ptr); ++ ++ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); ++ ret = __fixup_pi_state_owner(uaddr, q, argowner); ++ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); ++ return ret; +} -+early_param("crashkernel", parse_crashkernel_dummy); + - Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, - void *data, size_t data_len) - { -diff --git a/kernel/cred.c b/kernel/cred.c -index 1ae0b4948a5a8..933155c969227 100644 ---- a/kernel/cred.c -+++ b/kernel/cred.c -@@ -665,26 +665,20 @@ EXPORT_SYMBOL(cred_fscmp); - - int set_cred_ucounts(struct cred *new) - { -- struct task_struct *task = current; -- const struct cred *old = task->real_cred; - struct ucounts *new_ucounts, *old_ucounts = new->ucounts; - -- if (new->user == old->user && new->user_ns == old->user_ns) -- return 0; -- - /* - * This optimization is needed because alloc_ucounts() uses locks - * for table lookups. - */ -- if (old_ucounts && old_ucounts->ns == new->user_ns && uid_eq(old_ucounts->uid, new->euid)) -+ if (old_ucounts->ns == new->user_ns && uid_eq(old_ucounts->uid, new->uid)) - return 0; - -- if (!(new_ucounts = alloc_ucounts(new->user_ns, new->euid))) -+ if (!(new_ucounts = alloc_ucounts(new->user_ns, new->uid))) - return -EAGAIN; - - new->ucounts = new_ucounts; -- if (old_ucounts) -- put_ucounts(old_ucounts); -+ put_ucounts(old_ucounts); - - return 0; - } -diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c -index da06a5553835b..7beceb447211d 100644 ---- a/kernel/debug/debug_core.c -+++ b/kernel/debug/debug_core.c -@@ -53,6 +53,7 @@ - #include <linux/vmacache.h> - #include <linux/rcupdate.h> - #include <linux/irq.h> -+#include <linux/security.h> - - #include <asm/cacheflush.h> - #include <asm/byteorder.h> -@@ -752,6 +753,29 @@ cpu_master_loop: - continue; - kgdb_connected = 0; - } else { -+ /* -+ * This is a brutal way to interfere with the debugger -+ * and prevent gdb being used to poke at kernel memory. -+ * This could cause trouble if lockdown is applied when -+ * there is already an active gdb session. For now the -+ * answer is simply "don't do that". Typically lockdown -+ * *will* be applied before the debug core gets started -+ * so only developers using kgdb for fairly advanced -+ * early kernel debug can be biten by this. Hopefully -+ * they are sophisticated enough to take care of -+ * themselves, especially with help from the lockdown -+ * message printed on the console! -+ */ -+ if (security_locked_down(LOCKDOWN_DBG_WRITE_KERNEL)) { -+ if (IS_ENABLED(CONFIG_KGDB_KDB)) { -+ /* Switch back to kdb if possible... */ -+ dbg_kdb_mode = 1; -+ continue; -+ } else { -+ /* ... otherwise just bail */ -+ break; -+ } -+ } - error = gdb_serial_stub(ks); - } - -diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c -index 1f9f0e47aedaa..10b454554ab03 100644 ---- a/kernel/debug/kdb/kdb_bt.c -+++ b/kernel/debug/kdb/kdb_bt.c -@@ -46,7 +46,7 @@ static void kdb_show_stack(struct task_struct *p, void *addr) - * btp <pid> Kernel stack for <pid> - * btt <address-expression> Kernel stack for task structure at - * <address-expression> -- * bta [DRSTCZEUIMA] All useful processes, optionally -+ * bta [state_chars>|A] All useful processes, optionally - * filtered by state - * btc [<cpu>] The current process on one cpu, - * default is all cpus -@@ -74,7 +74,7 @@ static void kdb_show_stack(struct task_struct *p, void *addr) - */ - - static int --kdb_bt1(struct task_struct *p, unsigned long mask, bool btaprompt) -+kdb_bt1(struct task_struct *p, const char *mask, bool btaprompt) - { - char ch; - -@@ -120,7 +120,7 @@ kdb_bt_cpu(unsigned long cpu) - return; - } - -- kdb_bt1(kdb_tsk, ~0UL, false); -+ kdb_bt1(kdb_tsk, "A", false); - } - - int -@@ -138,8 +138,8 @@ kdb_bt(int argc, const char **argv) - if (strcmp(argv[0], "bta") == 0) { - struct task_struct *g, *p; - unsigned long cpu; -- unsigned long mask = kdb_task_state_string(argc ? argv[1] : -- NULL); -+ const char *mask = argc ? argv[1] : kdbgetenv("PS"); ++static long futex_wait_restart(struct restart_block *restart); + - if (argc == 0) - kdb_ps_suppressed(); - /* Run the active tasks first */ -@@ -167,7 +167,7 @@ kdb_bt(int argc, const char **argv) - return diag; - p = find_task_by_pid_ns(pid, &init_pid_ns); - if (p) -- return kdb_bt1(p, ~0UL, false); -+ return kdb_bt1(p, "A", false); - kdb_printf("No process with pid == %ld found\n", pid); - return 0; - } else if (strcmp(argv[0], "btt") == 0) { -@@ -176,7 +176,7 @@ kdb_bt(int argc, const char **argv) - diag = kdbgetularg((char *)argv[1], &addr); - if (diag) - return diag; -- return kdb_bt1((struct task_struct *)addr, ~0UL, false); -+ return kdb_bt1((struct task_struct *)addr, "A", false); - } else if (strcmp(argv[0], "btc") == 0) { - unsigned long cpu = ~0; - if (argc > 1) -@@ -212,7 +212,7 @@ kdb_bt(int argc, const char **argv) - kdb_show_stack(kdb_current_task, (void *)addr); - return 0; - } else { -- return kdb_bt1(kdb_current_task, ~0UL, false); -+ return kdb_bt1(kdb_current_task, "A", false); - } - } - -diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c -index fa6deda894a17..ead4da9471270 100644 ---- a/kernel/debug/kdb/kdb_main.c -+++ b/kernel/debug/kdb/kdb_main.c -@@ -45,6 +45,7 @@ - #include <linux/proc_fs.h> - #include <linux/uaccess.h> - #include <linux/slab.h> -+#include <linux/security.h> - #include "kdb_private.h" - - #undef MODULE_PARAM_PREFIX -@@ -166,10 +167,62 @@ struct task_struct *kdb_curr_task(int cpu) - } - - /* -- * Check whether the flags of the current command and the permissions -- * of the kdb console has allow a command to be run. -+ * Update the permissions flags (kdb_cmd_enabled) to match the -+ * current lockdown state. -+ * -+ * Within this function the calls to security_locked_down() are "lazy". We -+ * avoid calling them if the current value of kdb_cmd_enabled already excludes -+ * flags that might be subject to lockdown. Additionally we deliberately check -+ * the lockdown flags independently (even though read lockdown implies write -+ * lockdown) since that results in both simpler code and clearer messages to -+ * the user on first-time debugger entry. ++/** ++ * fixup_owner() - Post lock pi_state and corner case management ++ * @uaddr: user address of the futex ++ * @q: futex_q (contains pi_state and access to the rt_mutex) ++ * @locked: if the attempt to take the rt_mutex succeeded (1) or not (0) + * -+ * The permission masks during a read+write lockdown permits the following -+ * flags: INSPECT, SIGNAL, REBOOT (and ALWAYS_SAFE). ++ * After attempting to lock an rt_mutex, this function is called to cleanup ++ * the pi_state owner as well as handle race conditions that may allow us to ++ * acquire the lock. Must be called with the hb lock held. + * -+ * The INSPECT commands are not blocked during lockdown because they are -+ * not arbitrary memory reads. INSPECT covers the backtrace family (sometimes -+ * forcing them to have no arguments) and lsmod. These commands do expose -+ * some kernel state but do not allow the developer seated at the console to -+ * choose what state is reported. SIGNAL and REBOOT should not be controversial, -+ * given these are allowed for root during lockdown already. ++ * Return: ++ * - 1 - success, lock taken; ++ * - 0 - success, lock not taken; ++ * - <0 - on error (-EFAULT) + */ -+static void kdb_check_for_lockdown(void) ++static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) +{ -+ const int write_flags = KDB_ENABLE_MEM_WRITE | -+ KDB_ENABLE_REG_WRITE | -+ KDB_ENABLE_FLOW_CTRL; -+ const int read_flags = KDB_ENABLE_MEM_READ | -+ KDB_ENABLE_REG_READ; ++ if (locked) { ++ /* ++ * Got the lock. We might not be the anticipated owner if we ++ * did a lock-steal - fix up the PI-state in that case: ++ * ++ * Speculative pi_state->owner read (we don't hold wait_lock); ++ * since we own the lock pi_state->owner == current is the ++ * stable state, anything else needs more attention. ++ */ ++ if (q->pi_state->owner != current) ++ return fixup_pi_state_owner(uaddr, q, current); ++ return 1; ++ } + -+ bool need_to_lockdown_write = false; -+ bool need_to_lockdown_read = false; ++ /* ++ * If we didn't get the lock; check if anybody stole it from us. In ++ * that case, we need to fix up the uval to point to them instead of ++ * us, otherwise bad things happen. [10] ++ * ++ * Another speculative read; pi_state->owner == current is unstable ++ * but needs our attention. ++ */ ++ if (q->pi_state->owner == current) ++ return fixup_pi_state_owner(uaddr, q, NULL); + -+ if (kdb_cmd_enabled & (KDB_ENABLE_ALL | write_flags)) -+ need_to_lockdown_write = -+ security_locked_down(LOCKDOWN_DBG_WRITE_KERNEL); ++ /* ++ * Paranoia check. If we did not take the lock, then we should not be ++ * the owner of the rt_mutex. Warn and establish consistent state. ++ */ ++ if (WARN_ON_ONCE(rt_mutex_owner(&q->pi_state->pi_mutex) == current)) ++ return fixup_pi_state_owner(uaddr, q, current); + -+ if (kdb_cmd_enabled & (KDB_ENABLE_ALL | read_flags)) -+ need_to_lockdown_read = -+ security_locked_down(LOCKDOWN_DBG_READ_KERNEL); ++ return 0; ++} + -+ /* De-compose KDB_ENABLE_ALL if required */ -+ if (need_to_lockdown_write || need_to_lockdown_read) -+ if (kdb_cmd_enabled & KDB_ENABLE_ALL) -+ kdb_cmd_enabled = KDB_ENABLE_MASK & ~KDB_ENABLE_ALL; ++/** ++ * futex_wait_queue_me() - queue_me() and wait for wakeup, timeout, or signal ++ * @hb: the futex hash bucket, must be locked by the caller ++ * @q: the futex_q to queue up on ++ * @timeout: the prepared hrtimer_sleeper, or null for no timeout ++ */ ++static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, ++ struct hrtimer_sleeper *timeout) ++{ ++ /* ++ * The task state is guaranteed to be set before another task can ++ * wake it. set_current_state() is implemented using smp_store_mb() and ++ * queue_me() calls spin_unlock() upon completion, both serializing ++ * access to the hash list and forcing another memory barrier. ++ */ ++ set_current_state(TASK_INTERRUPTIBLE); ++ queue_me(q, hb); + -+ if (need_to_lockdown_write) -+ kdb_cmd_enabled &= ~write_flags; ++ /* Arm the timer */ ++ if (timeout) ++ hrtimer_sleeper_start_expires(timeout, HRTIMER_MODE_ABS); + -+ if (need_to_lockdown_read) -+ kdb_cmd_enabled &= ~read_flags; ++ /* ++ * If we have been removed from the hash list, then another task ++ * has tried to wake us, and we can skip the call to schedule(). ++ */ ++ if (likely(!plist_node_empty(&q->list))) { ++ /* ++ * If the timer has already expired, current will already be ++ * flagged for rescheduling. Only call schedule if there ++ * is no timeout, or if it has yet to expire. ++ */ ++ if (!timeout || timeout->task) ++ freezable_schedule(); ++ } ++ __set_current_state(TASK_RUNNING); +} + -+/* -+ * Check whether the flags of the current command, the permissions of the kdb -+ * console and the lockdown state allow a command to be run. - */ --static inline bool kdb_check_flags(kdb_cmdflags_t flags, int permissions, -+static bool kdb_check_flags(kdb_cmdflags_t flags, int permissions, - bool no_args) - { - /* permissions comes from userspace so needs massaging slightly */ -@@ -1180,6 +1233,9 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs, - kdb_curr_task(raw_smp_processor_id()); - - KDB_DEBUG_STATE("kdb_local 1", reason); -+ -+ kdb_check_for_lockdown(); -+ - kdb_go_count = 0; - if (reason == KDB_REASON_DEBUG) { - /* special case below */ -@@ -2203,8 +2259,8 @@ static void kdb_cpu_status(void) - state = 'D'; /* cpu is online but unresponsive */ - } else { - state = ' '; /* cpu is responding to kdb */ -- if (kdb_task_state_char(KDB_TSK(i)) == 'I') -- state = 'I'; /* idle task */ -+ if (kdb_task_state_char(KDB_TSK(i)) == '-') -+ state = '-'; /* idle task */ - } - if (state != prev_state) { - if (prev_state != '?') { -@@ -2271,37 +2327,30 @@ static int kdb_cpu(int argc, const char **argv) - void kdb_ps_suppressed(void) - { - int idle = 0, daemon = 0; -- unsigned long mask_I = kdb_task_state_string("I"), -- mask_M = kdb_task_state_string("M"); - unsigned long cpu; - const struct task_struct *p, *g; - for_each_online_cpu(cpu) { - p = kdb_curr_task(cpu); -- if (kdb_task_state(p, mask_I)) -+ if (kdb_task_state(p, "-")) - ++idle; - } - for_each_process_thread(g, p) { -- if (kdb_task_state(p, mask_M)) -+ if (kdb_task_state(p, "ims")) - ++daemon; - } - if (idle || daemon) { - if (idle) -- kdb_printf("%d idle process%s (state I)%s\n", -+ kdb_printf("%d idle process%s (state -)%s\n", - idle, idle == 1 ? "" : "es", - daemon ? " and " : ""); - if (daemon) -- kdb_printf("%d sleeping system daemon (state M) " -+ kdb_printf("%d sleeping system daemon (state [ims]) " - "process%s", daemon, - daemon == 1 ? "" : "es"); - kdb_printf(" suppressed,\nuse 'ps A' to see all.\n"); - } - } - --/* -- * kdb_ps - This function implements the 'ps' command which shows a -- * list of the active processes. -- * ps [DRSTCZEUIMA] All processes, optionally filtered by state -- */ - void kdb_ps1(const struct task_struct *p) - { - int cpu; -@@ -2330,17 +2379,25 @@ void kdb_ps1(const struct task_struct *p) - } - } - -+/* -+ * kdb_ps - This function implements the 'ps' command which shows a -+ * list of the active processes. ++/** ++ * futex_wait_setup() - Prepare to wait on a futex ++ * @uaddr: the futex userspace address ++ * @val: the expected value ++ * @flags: futex flags (FLAGS_SHARED, etc.) ++ * @q: the associated futex_q ++ * @hb: storage for hash_bucket pointer to be returned to caller + * -+ * ps [<state_chars>] Show processes, optionally selecting only those whose -+ * state character is found in <state_chars>. ++ * Setup the futex_q and locate the hash_bucket. Get the futex value and ++ * compare it with the expected value. Handle atomic faults internally. ++ * Return with the hb lock held on success, and unlocked on failure. ++ * ++ * Return: ++ * - 0 - uaddr contains val and hb has been locked; ++ * - <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked + */ - static int kdb_ps(int argc, const char **argv) - { - struct task_struct *g, *p; -- unsigned long mask, cpu; -+ const char *mask; -+ unsigned long cpu; - - if (argc == 0) - kdb_ps_suppressed(); - kdb_printf("%-*s Pid Parent [*] cpu State %-*s Command\n", - (int)(2*sizeof(void *))+2, "Task Addr", - (int)(2*sizeof(void *))+2, "Thread"); -- mask = kdb_task_state_string(argc ? argv[1] : NULL); -+ mask = argc ? argv[1] : kdbgetenv("PS"); - /* Run the active tasks first */ - for_each_online_cpu(cpu) { - if (KDB_FLAG(CMD_INTERRUPT)) -@@ -2742,8 +2799,8 @@ static kdbtab_t maintab[] = { - }, - { .name = "bta", - .func = kdb_bt, -- .usage = "[D|R|S|T|C|Z|E|U|I|M|A]", -- .help = "Backtrace all processes matching state flag", -+ .usage = "[<state_chars>|A]", -+ .help = "Backtrace all processes whose state matches", - .flags = KDB_ENABLE_INSPECT, - }, - { .name = "btc", -@@ -2797,7 +2854,7 @@ static kdbtab_t maintab[] = { - }, - { .name = "ps", - .func = kdb_ps, -- .usage = "[<flags>|A]", -+ .usage = "[<state_chars>|A]", - .help = "Display active task list", - .flags = KDB_ENABLE_INSPECT, - }, -diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h -index 629590084a0dc..0d2f9feea0a46 100644 ---- a/kernel/debug/kdb/kdb_private.h -+++ b/kernel/debug/kdb/kdb_private.h -@@ -190,10 +190,8 @@ extern char kdb_grep_string[]; - extern int kdb_grep_leading; - extern int kdb_grep_trailing; - extern char *kdb_cmds[]; --extern unsigned long kdb_task_state_string(const char *); - extern char kdb_task_state_char (const struct task_struct *); --extern unsigned long kdb_task_state(const struct task_struct *p, -- unsigned long mask); -+extern bool kdb_task_state(const struct task_struct *p, const char *mask); - extern void kdb_ps_suppressed(void); - extern void kdb_ps1(const struct task_struct *p); - extern void kdb_send_sig(struct task_struct *p, int sig); -diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c -index 7507d9a8dc6ac..85cb51c4a17e6 100644 ---- a/kernel/debug/kdb/kdb_support.c -+++ b/kernel/debug/kdb/kdb_support.c -@@ -24,6 +24,7 @@ - #include <linux/uaccess.h> - #include <linux/kdb.h> - #include <linux/slab.h> -+#include <linux/ctype.h> - #include "kdb_private.h" - - /* -@@ -290,7 +291,7 @@ int kdb_getarea_size(void *res, unsigned long addr, size_t size) - */ - int kdb_putarea_size(unsigned long addr, void *res, size_t size) - { -- int ret = copy_from_kernel_nofault((char *)addr, (char *)res, size); -+ int ret = copy_to_kernel_nofault((char *)addr, (char *)res, size); - if (ret) { - if (!KDB_STATE(SUPPRESS)) { - kdb_func_printf("Bad address 0x%lx\n", addr); -@@ -473,82 +474,7 @@ int kdb_putword(unsigned long addr, unsigned long word, size_t size) - return diag; - } - --/* -- * kdb_task_state_string - Convert a string containing any of the -- * letters DRSTCZEUIMA to a mask for the process state field and -- * return the value. If no argument is supplied, return the mask -- * that corresponds to environment variable PS, DRSTCZEU by -- * default. -- * Inputs: -- * s String to convert -- * Returns: -- * Mask for process state. -- * Notes: -- * The mask folds data from several sources into a single long value, so -- * be careful not to overlap the bits. TASK_* bits are in the LSB, -- * special cases like UNRUNNABLE are in the MSB. As of 2.6.10-rc1 there -- * is no overlap between TASK_* and EXIT_* but that may not always be -- * true, so EXIT_* bits are shifted left 16 bits before being stored in -- * the mask. -- */ -- --/* unrunnable is < 0 */ --#define UNRUNNABLE (1UL << (8*sizeof(unsigned long) - 1)) --#define RUNNING (1UL << (8*sizeof(unsigned long) - 2)) --#define IDLE (1UL << (8*sizeof(unsigned long) - 3)) --#define DAEMON (1UL << (8*sizeof(unsigned long) - 4)) - --unsigned long kdb_task_state_string(const char *s) --{ -- long res = 0; -- if (!s) { -- s = kdbgetenv("PS"); -- if (!s) -- s = "DRSTCZEU"; /* default value for ps */ -- } -- while (*s) { -- switch (*s) { -- case 'D': -- res |= TASK_UNINTERRUPTIBLE; -- break; -- case 'R': -- res |= RUNNING; -- break; -- case 'S': -- res |= TASK_INTERRUPTIBLE; -- break; -- case 'T': -- res |= TASK_STOPPED; -- break; -- case 'C': -- res |= TASK_TRACED; -- break; -- case 'Z': -- res |= EXIT_ZOMBIE << 16; -- break; -- case 'E': -- res |= EXIT_DEAD << 16; -- break; -- case 'U': -- res |= UNRUNNABLE; -- break; -- case 'I': -- res |= IDLE; -- break; -- case 'M': -- res |= DAEMON; -- break; -- case 'A': -- res = ~0UL; -- break; -- default: -- kdb_func_printf("unknown flag '%c' ignored\n", *s); -- break; -- } -- ++s; -- } -- return res; --} - - /* - * kdb_task_state_char - Return the character that represents the task state. -@@ -559,7 +485,6 @@ unsigned long kdb_task_state_string(const char *s) - */ - char kdb_task_state_char (const struct task_struct *p) - { -- unsigned int p_state; - unsigned long tmp; - char state; - int cpu; -@@ -568,25 +493,18 @@ char kdb_task_state_char (const struct task_struct *p) - copy_from_kernel_nofault(&tmp, (char *)p, sizeof(unsigned long))) - return 'E'; - -- cpu = kdb_process_cpu(p); -- p_state = READ_ONCE(p->__state); -- state = (p_state == 0) ? 'R' : -- (p_state < 0) ? 'U' : -- (p_state & TASK_UNINTERRUPTIBLE) ? 'D' : -- (p_state & TASK_STOPPED) ? 'T' : -- (p_state & TASK_TRACED) ? 'C' : -- (p->exit_state & EXIT_ZOMBIE) ? 'Z' : -- (p->exit_state & EXIT_DEAD) ? 'E' : -- (p_state & TASK_INTERRUPTIBLE) ? 'S' : '?'; -+ state = task_state_to_char((struct task_struct *) p); ++static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, ++ struct futex_q *q, struct futex_hash_bucket **hb) ++{ ++ u32 uval; ++ int ret; + - if (is_idle_task(p)) { - /* Idle task. Is it really idle, apart from the kdb - * interrupt? */ -+ cpu = kdb_process_cpu(p); - if (!kdb_task_has_cpu(p) || kgdb_info[cpu].irq_depth == 1) { - if (cpu != kdb_initial_cpu) -- state = 'I'; /* idle task */ -+ state = '-'; /* idle task */ - } -- } else if (!p->mm && state == 'S') { -- state = 'M'; /* sleeping system daemon */ -+ } else if (!p->mm && strchr("IMS", state)) { -+ state = tolower(state); /* sleeping system daemon */ - } - return state; - } -@@ -596,14 +514,28 @@ char kdb_task_state_char (const struct task_struct *p) - * given by the mask. - * Inputs: - * p struct task for the process -- * mask mask from kdb_task_state_string to select processes -+ * mask set of characters used to select processes; both NULL -+ * and the empty string mean adopt a default filter, which -+ * is to suppress sleeping system daemons and the idle tasks - * Returns: - * True if the process matches at least one criteria defined by the mask. - */ --unsigned long kdb_task_state(const struct task_struct *p, unsigned long mask) -+bool kdb_task_state(const struct task_struct *p, const char *mask) - { -- char state[] = { kdb_task_state_char(p), '\0' }; -- return (mask & kdb_task_state_string(state)) != 0; -+ char state = kdb_task_state_char(p); ++ /* ++ * Access the page AFTER the hash-bucket is locked. ++ * Order is important: ++ * ++ * Userspace waiter: val = var; if (cond(val)) futex_wait(&var, val); ++ * Userspace waker: if (cond(var)) { var = new; futex_wake(&var); } ++ * ++ * The basic logical guarantee of a futex is that it blocks ONLY ++ * if cond(var) is known to be true at the time of blocking, for ++ * any cond. If we locked the hash-bucket after testing *uaddr, that ++ * would open a race condition where we could block indefinitely with ++ * cond(var) false, which would violate the guarantee. ++ * ++ * On the other hand, we insert q and release the hash-bucket only ++ * after testing *uaddr. This guarantees that futex_wait() will NOT ++ * absorb a wakeup if *uaddr does not match the desired values ++ * while the syscall executes. ++ */ ++retry: ++ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, FUTEX_READ); ++ if (unlikely(ret != 0)) ++ return ret; ++ ++retry_private: ++ *hb = queue_lock(q); ++ ++ ret = get_futex_value_locked(&uval, uaddr); ++ ++ if (ret) { ++ queue_unlock(*hb); ++ ++ ret = get_user(uval, uaddr); ++ if (ret) ++ return ret; ++ ++ if (!(flags & FLAGS_SHARED)) ++ goto retry_private; ++ ++ goto retry; ++ } ++ ++ if (uval != val) { ++ queue_unlock(*hb); ++ ret = -EWOULDBLOCK; ++ } ++ ++ return ret; ++} + -+ /* If there is no mask, then we will filter code that runs when the -+ * scheduler is idling and any system daemons that are currently -+ * sleeping. ++static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ++ ktime_t *abs_time, u32 bitset) ++{ ++ struct hrtimer_sleeper timeout, *to; ++ struct restart_block *restart; ++ struct futex_hash_bucket *hb; ++ struct futex_q q = futex_q_init; ++ int ret; ++ ++ if (!bitset) ++ return -EINVAL; ++ q.bitset = bitset; ++ ++ to = futex_setup_timer(abs_time, &timeout, flags, ++ current->timer_slack_ns); ++retry: ++ /* ++ * Prepare to wait on uaddr. On success, it holds hb->lock and q ++ * is initialized. + */ -+ if (!mask || mask[0] == '\0') -+ return !strchr("-ims", state); ++ ret = futex_wait_setup(uaddr, val, flags, &q, &hb); ++ if (ret) ++ goto out; + -+ /* A is a special case that matches all states */ -+ if (strchr(mask, 'A')) -+ return true; ++ /* queue_me and wait for wakeup, timeout, or a signal. */ ++ futex_wait_queue_me(hb, &q, to); + -+ return strchr(mask, state); - } - - /* Maintain a small stack of kdb_flags to allow recursion without disturbing -diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c -index 7a14ca29c3778..2caafd13f8aac 100644 ---- a/kernel/dma/debug.c -+++ b/kernel/dma/debug.c -@@ -448,7 +448,7 @@ void debug_dma_dump_mappings(struct device *dev) - * other hand, consumes a single dma_debug_entry, but inserts 'nents' - * entries into the tree. - */ --static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT); -+static RADIX_TREE(dma_active_cacheline, GFP_ATOMIC); - static DEFINE_SPINLOCK(radix_lock); - #define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1) - #define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT) -@@ -564,7 +564,7 @@ static void add_dma_entry(struct dma_debug_entry *entry, unsigned long attrs) - - rc = active_cacheline_insert(entry); - if (rc == -ENOMEM) { -- pr_err("cacheline tracking ENOMEM, dma-debug disabled\n"); -+ pr_err_once("cacheline tracking ENOMEM, dma-debug disabled\n"); - global_disable = true; - } else if (rc == -EEXIST && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { - err_printk(entry->dev, entry, -@@ -927,7 +927,7 @@ static __init int dma_debug_cmdline(char *str) - global_disable = true; - } - -- return 0; -+ return 1; - } - - static __init int dma_debug_entries_cmdline(char *str) -@@ -936,7 +936,7 @@ static __init int dma_debug_entries_cmdline(char *str) - return -EINVAL; - if (!get_option(&str, &nr_prealloc_entries)) - nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES; -- return 0; -+ return 1; - } - - __setup("dma_debug=", dma_debug_cmdline); -diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c -index 4c6c5e0635e34..ed5dd9e023241 100644 ---- a/kernel/dma/direct.c -+++ b/kernel/dma/direct.c -@@ -75,6 +75,25 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) - min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit); - } - -+static int dma_set_decrypted(struct device *dev, void *vaddr, size_t size) ++ /* If we were woken (and unqueued), we succeeded, whatever. */ ++ ret = 0; ++ if (!unqueue_me(&q)) ++ goto out; ++ ret = -ETIMEDOUT; ++ if (to && !to->task) ++ goto out; ++ ++ /* ++ * We expect signal_pending(current), but we might be the ++ * victim of a spurious wakeup as well. ++ */ ++ if (!signal_pending(current)) ++ goto retry; ++ ++ ret = -ERESTARTSYS; ++ if (!abs_time) ++ goto out; ++ ++ restart = ¤t->restart_block; ++ restart->futex.uaddr = uaddr; ++ restart->futex.val = val; ++ restart->futex.time = *abs_time; ++ restart->futex.bitset = bitset; ++ restart->futex.flags = flags | FLAGS_HAS_TIMEOUT; ++ ++ ret = set_restart_fn(restart, futex_wait_restart); ++ ++out: ++ if (to) { ++ hrtimer_cancel(&to->timer); ++ destroy_hrtimer_on_stack(&to->timer); ++ } ++ return ret; ++} ++ ++ ++static long futex_wait_restart(struct restart_block *restart) +{ -+ if (!force_dma_unencrypted(dev)) -+ return 0; -+ return set_memory_decrypted((unsigned long)vaddr, PFN_UP(size)); ++ u32 __user *uaddr = restart->futex.uaddr; ++ ktime_t t, *tp = NULL; ++ ++ if (restart->futex.flags & FLAGS_HAS_TIMEOUT) { ++ t = restart->futex.time; ++ tp = &t; ++ } ++ restart->fn = do_no_restart_syscall; ++ ++ return (long)futex_wait(uaddr, restart->futex.flags, ++ restart->futex.val, tp, restart->futex.bitset); +} + -+static int dma_set_encrypted(struct device *dev, void *vaddr, size_t size) ++ ++/* ++ * Userspace tried a 0 -> TID atomic transition of the futex value ++ * and failed. The kernel side here does the whole locking operation: ++ * if there are waiters then it will block as a consequence of relying ++ * on rt-mutexes, it does PI, etc. (Due to races the kernel might see ++ * a 0 value of the futex too.). ++ * ++ * Also serves as futex trylock_pi()'ing, and due semantics. ++ */ ++static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ++ ktime_t *time, int trylock) ++{ ++ struct hrtimer_sleeper timeout, *to; ++ struct task_struct *exiting = NULL; ++ struct rt_mutex_waiter rt_waiter; ++ struct futex_hash_bucket *hb; ++ struct futex_q q = futex_q_init; ++ int res, ret; ++ ++ if (!IS_ENABLED(CONFIG_FUTEX_PI)) ++ return -ENOSYS; ++ ++ if (refill_pi_state_cache()) ++ return -ENOMEM; ++ ++ to = futex_setup_timer(time, &timeout, flags, 0); ++ ++retry: ++ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, FUTEX_WRITE); ++ if (unlikely(ret != 0)) ++ goto out; ++ ++retry_private: ++ hb = queue_lock(&q); ++ ++ ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, ++ &exiting, 0); ++ if (unlikely(ret)) { ++ /* ++ * Atomic work succeeded and we got the lock, ++ * or failed. Either way, we do _not_ block. ++ */ ++ switch (ret) { ++ case 1: ++ /* We got the lock. */ ++ ret = 0; ++ goto out_unlock_put_key; ++ case -EFAULT: ++ goto uaddr_faulted; ++ case -EBUSY: ++ case -EAGAIN: ++ /* ++ * Two reasons for this: ++ * - EBUSY: Task is exiting and we just wait for the ++ * exit to complete. ++ * - EAGAIN: The user space value changed. ++ */ ++ queue_unlock(hb); ++ /* ++ * Handle the case where the owner is in the middle of ++ * exiting. Wait for the exit to complete otherwise ++ * this task might loop forever, aka. live lock. ++ */ ++ wait_for_owner_exiting(ret, exiting); ++ cond_resched(); ++ goto retry; ++ default: ++ goto out_unlock_put_key; ++ } ++ } ++ ++ WARN_ON(!q.pi_state); ++ ++ /* ++ * Only actually queue now that the atomic ops are done: ++ */ ++ __queue_me(&q, hb); ++ ++ if (trylock) { ++ ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex); ++ /* Fixup the trylock return value: */ ++ ret = ret ? 0 : -EWOULDBLOCK; ++ goto no_block; ++ } ++ ++ rt_mutex_init_waiter(&rt_waiter); ++ ++ /* ++ * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not ++ * hold it while doing rt_mutex_start_proxy(), because then it will ++ * include hb->lock in the blocking chain, even through we'll not in ++ * fact hold it while blocking. This will lead it to report -EDEADLK ++ * and BUG when futex_unlock_pi() interleaves with this. ++ * ++ * Therefore acquire wait_lock while holding hb->lock, but drop the ++ * latter before calling __rt_mutex_start_proxy_lock(). This ++ * interleaves with futex_unlock_pi() -- which does a similar lock ++ * handoff -- such that the latter can observe the futex_q::pi_state ++ * before __rt_mutex_start_proxy_lock() is done. ++ */ ++ raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock); ++ spin_unlock(q.lock_ptr); ++ /* ++ * __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter ++ * such that futex_unlock_pi() is guaranteed to observe the waiter when ++ * it sees the futex_q::pi_state. ++ */ ++ ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current); ++ raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock); ++ ++ if (ret) { ++ if (ret == 1) ++ ret = 0; ++ goto cleanup; ++ } ++ ++ if (unlikely(to)) ++ hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS); ++ ++ ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter); ++ ++cleanup: ++ spin_lock(q.lock_ptr); ++ /* ++ * If we failed to acquire the lock (deadlock/signal/timeout), we must ++ * first acquire the hb->lock before removing the lock from the ++ * rt_mutex waitqueue, such that we can keep the hb and rt_mutex wait ++ * lists consistent. ++ * ++ * In particular; it is important that futex_unlock_pi() can not ++ * observe this inconsistency. ++ */ ++ if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter)) ++ ret = 0; ++ ++no_block: ++ /* ++ * Fixup the pi_state owner and possibly acquire the lock if we ++ * haven't already. ++ */ ++ res = fixup_owner(uaddr, &q, !ret); ++ /* ++ * If fixup_owner() returned an error, propagate that. If it acquired ++ * the lock, clear our -ETIMEDOUT or -EINTR. ++ */ ++ if (res) ++ ret = (res < 0) ? res : 0; ++ ++ unqueue_me_pi(&q); ++ spin_unlock(q.lock_ptr); ++ goto out; ++ ++out_unlock_put_key: ++ queue_unlock(hb); ++ ++out: ++ if (to) { ++ hrtimer_cancel(&to->timer); ++ destroy_hrtimer_on_stack(&to->timer); ++ } ++ return ret != -EINTR ? ret : -ERESTARTNOINTR; ++ ++uaddr_faulted: ++ queue_unlock(hb); ++ ++ ret = fault_in_user_writeable(uaddr); ++ if (ret) ++ goto out; ++ ++ if (!(flags & FLAGS_SHARED)) ++ goto retry_private; ++ ++ goto retry; ++} ++ ++/* ++ * Userspace attempted a TID -> 0 atomic transition, and failed. ++ * This is the in-kernel slowpath: we look up the PI state (if any), ++ * and do the rt-mutex unlock. ++ */ ++static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) +{ ++ u32 curval, uval, vpid = task_pid_vnr(current); ++ union futex_key key = FUTEX_KEY_INIT; ++ struct futex_hash_bucket *hb; ++ struct futex_q *top_waiter; + int ret; + -+ if (!force_dma_unencrypted(dev)) -+ return 0; -+ ret = set_memory_encrypted((unsigned long)vaddr, PFN_UP(size)); ++ if (!IS_ENABLED(CONFIG_FUTEX_PI)) ++ return -ENOSYS; ++ ++retry: ++ if (get_user(uval, uaddr)) ++ return -EFAULT; ++ /* ++ * We release only a lock we actually own: ++ */ ++ if ((uval & FUTEX_TID_MASK) != vpid) ++ return -EPERM; ++ ++ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_WRITE); + if (ret) -+ pr_warn_ratelimited("leaking DMA memory that can't be re-encrypted\n"); ++ return ret; ++ ++ hb = hash_futex(&key); ++ spin_lock(&hb->lock); ++ ++ /* ++ * Check waiters first. We do not trust user space values at ++ * all and we at least want to know if user space fiddled ++ * with the futex value instead of blindly unlocking. ++ */ ++ top_waiter = futex_top_waiter(hb, &key); ++ if (top_waiter) { ++ struct futex_pi_state *pi_state = top_waiter->pi_state; ++ ++ ret = -EINVAL; ++ if (!pi_state) ++ goto out_unlock; ++ ++ /* ++ * If current does not own the pi_state then the futex is ++ * inconsistent and user space fiddled with the futex value. ++ */ ++ if (pi_state->owner != current) ++ goto out_unlock; ++ ++ get_pi_state(pi_state); ++ /* ++ * By taking wait_lock while still holding hb->lock, we ensure ++ * there is no point where we hold neither; and therefore ++ * wake_futex_pi() must observe a state consistent with what we ++ * observed. ++ * ++ * In particular; this forces __rt_mutex_start_proxy() to ++ * complete such that we're guaranteed to observe the ++ * rt_waiter. Also see the WARN in wake_futex_pi(). ++ */ ++ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); ++ spin_unlock(&hb->lock); ++ ++ /* drops pi_state->pi_mutex.wait_lock */ ++ ret = wake_futex_pi(uaddr, uval, pi_state); ++ ++ put_pi_state(pi_state); ++ ++ /* ++ * Success, we're done! No tricky corner cases. ++ */ ++ if (!ret) ++ return ret; ++ /* ++ * The atomic access to the futex value generated a ++ * pagefault, so retry the user-access and the wakeup: ++ */ ++ if (ret == -EFAULT) ++ goto pi_faulted; ++ /* ++ * A unconditional UNLOCK_PI op raced against a waiter ++ * setting the FUTEX_WAITERS bit. Try again. ++ */ ++ if (ret == -EAGAIN) ++ goto pi_retry; ++ /* ++ * wake_futex_pi has detected invalid state. Tell user ++ * space. ++ */ ++ return ret; ++ } ++ ++ /* ++ * We have no kernel internal state, i.e. no waiters in the ++ * kernel. Waiters which are about to queue themselves are stuck ++ * on hb->lock. So we can safely ignore them. We do neither ++ * preserve the WAITERS bit not the OWNER_DIED one. We are the ++ * owner. ++ */ ++ if ((ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, 0))) { ++ spin_unlock(&hb->lock); ++ switch (ret) { ++ case -EFAULT: ++ goto pi_faulted; ++ ++ case -EAGAIN: ++ goto pi_retry; ++ ++ default: ++ WARN_ON_ONCE(1); ++ return ret; ++ } ++ } ++ ++ /* ++ * If uval has changed, let user space handle it. ++ */ ++ ret = (curval == uval) ? 0 : -EAGAIN; ++ ++out_unlock: ++ spin_unlock(&hb->lock); ++ return ret; ++ ++pi_retry: ++ cond_resched(); ++ goto retry; ++ ++pi_faulted: ++ ++ ret = fault_in_user_writeable(uaddr); ++ if (!ret) ++ goto retry; ++ + return ret; +} + - static void __dma_direct_free_pages(struct device *dev, struct page *page, - size_t size) - { -@@ -85,7 +104,7 @@ static void __dma_direct_free_pages(struct device *dev, struct page *page, - } - - static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, -- gfp_t gfp) -+ gfp_t gfp, bool allow_highmem) - { - int node = dev_to_node(dev); - struct page *page = NULL; -@@ -106,9 +125,12 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, - } - - page = dma_alloc_contiguous(dev, size, gfp); -- if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { -- dma_free_contiguous(dev, page, size); -- page = NULL; -+ if (page) { -+ if (!dma_coherent_ok(dev, page_to_phys(page), size) || -+ (!allow_highmem && PageHighMem(page))) { -+ dma_free_contiguous(dev, page, size); -+ page = NULL; -+ } - } - again: - if (!page) -@@ -149,29 +171,37 @@ static void *dma_direct_alloc_from_pool(struct device *dev, size_t size, - return ret; - } - -+static void *dma_direct_alloc_no_mapping(struct device *dev, size_t size, -+ dma_addr_t *dma_handle, gfp_t gfp) ++/** ++ * handle_early_requeue_pi_wakeup() - Handle early wakeup on the initial futex ++ * @hb: the hash_bucket futex_q was original enqueued on ++ * @q: the futex_q woken while waiting to be requeued ++ * @timeout: the timeout associated with the wait (NULL if none) ++ * ++ * Determine the cause for the early wakeup. ++ * ++ * Return: ++ * -EWOULDBLOCK or -ETIMEDOUT or -ERESTARTNOINTR ++ */ ++static inline ++int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, ++ struct futex_q *q, ++ struct hrtimer_sleeper *timeout) +{ -+ struct page *page; ++ int ret; + -+ page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO, true); -+ if (!page) -+ return NULL; ++ /* ++ * With the hb lock held, we avoid races while we process the wakeup. ++ * We only need to hold hb (and not hb2) to ensure atomicity as the ++ * wakeup code can't change q.key from uaddr to uaddr2 if we hold hb. ++ * It can't be requeued from uaddr2 to something else since we don't ++ * support a PI aware source futex for requeue. ++ */ ++ WARN_ON_ONCE(&hb->lock != q->lock_ptr); + -+ /* remove any dirty cache lines on the kernel alias */ -+ if (!PageHighMem(page)) -+ arch_dma_prep_coherent(page, size); ++ /* ++ * We were woken prior to requeue by a timeout or a signal. ++ * Unqueue the futex_q and determine which it was. ++ */ ++ plist_del(&q->list, &hb->chain); ++ hb_waiters_dec(hb); + -+ /* return the page pointer as the opaque cookie */ -+ *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); -+ return page; ++ /* Handle spurious wakeups gracefully */ ++ ret = -EWOULDBLOCK; ++ if (timeout && !timeout->task) ++ ret = -ETIMEDOUT; ++ else if (signal_pending(current)) ++ ret = -ERESTARTNOINTR; ++ return ret; +} + - void *dma_direct_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) - { - struct page *page; - void *ret; -- int err; - - size = PAGE_ALIGN(size); - if (attrs & DMA_ATTR_NO_WARN) - gfp |= __GFP_NOWARN; - - if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && -- !force_dma_unencrypted(dev) && !is_swiotlb_for_alloc(dev)) { -- page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO); -- if (!page) -- return NULL; -- /* remove any dirty cache lines on the kernel alias */ -- if (!PageHighMem(page)) -- arch_dma_prep_coherent(page, size); -- *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); -- /* return the page pointer as the opaque cookie */ -- return page; -- } -+ !force_dma_unencrypted(dev) && !is_swiotlb_for_alloc(dev)) -+ return dma_direct_alloc_no_mapping(dev, size, dma_handle, gfp); - - if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && - !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && -@@ -200,7 +230,7 @@ void *dma_direct_alloc(struct device *dev, size_t size, - return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp); - - /* we always manually zero the memory once we are done */ -- page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO); -+ page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO, true); - if (!page) - return NULL; - -@@ -216,12 +246,6 @@ void *dma_direct_alloc(struct device *dev, size_t size, - __builtin_return_address(0)); - if (!ret) - goto out_free_pages; -- if (force_dma_unencrypted(dev)) { -- err = set_memory_decrypted((unsigned long)ret, -- 1 << get_order(size)); -- if (err) -- goto out_free_pages; -- } - memset(ret, 0, size); - goto done; - } -@@ -238,13 +262,8 @@ void *dma_direct_alloc(struct device *dev, size_t size, - } - - ret = page_address(page); -- if (force_dma_unencrypted(dev)) { -- err = set_memory_decrypted((unsigned long)ret, -- 1 << get_order(size)); -- if (err) -- goto out_free_pages; -- } -- -+ if (dma_set_decrypted(dev, ret, size)) -+ goto out_free_pages; - memset(ret, 0, size); - - if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && -@@ -259,13 +278,8 @@ done: - return ret; - - out_encrypt_pages: -- if (force_dma_unencrypted(dev)) { -- err = set_memory_encrypted((unsigned long)page_address(page), -- 1 << get_order(size)); -- /* If memory cannot be re-encrypted, it must be leaked */ -- if (err) -- return NULL; -- } -+ if (dma_set_encrypted(dev, page_address(page), size)) -+ return NULL; - out_free_pages: - __dma_direct_free_pages(dev, page, size); - return NULL; -@@ -304,13 +318,14 @@ void dma_direct_free(struct device *dev, size_t size, - dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size))) - return; - -- if (force_dma_unencrypted(dev)) -- set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order); -- -- if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) -+ if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) { - vunmap(cpu_addr); -- else if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_CLEAR_UNCACHED)) -- arch_dma_clear_uncached(cpu_addr, size); -+ } else { -+ if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_CLEAR_UNCACHED)) -+ arch_dma_clear_uncached(cpu_addr, size); -+ if (dma_set_encrypted(dev, cpu_addr, size)) -+ return; ++/** ++ * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2 ++ * @uaddr: the futex we initially wait on (non-pi) ++ * @flags: futex flags (FLAGS_SHARED, FLAGS_CLOCKRT, etc.), they must be ++ * the same type, no requeueing from private to shared, etc. ++ * @val: the expected value of uaddr ++ * @abs_time: absolute timeout ++ * @bitset: 32 bit wakeup bitset set by userspace, defaults to all ++ * @uaddr2: the pi futex we will take prior to returning to user-space ++ * ++ * The caller will wait on uaddr and will be requeued by futex_requeue() to ++ * uaddr2 which must be PI aware and unique from uaddr. Normal wakeup will wake ++ * on uaddr2 and complete the acquisition of the rt_mutex prior to returning to ++ * userspace. This ensures the rt_mutex maintains an owner when it has waiters; ++ * without one, the pi logic would not know which task to boost/deboost, if ++ * there was a need to. ++ * ++ * We call schedule in futex_wait_queue_me() when we enqueue and return there ++ * via the following-- ++ * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue() ++ * 2) wakeup on uaddr2 after a requeue ++ * 3) signal ++ * 4) timeout ++ * ++ * If 3, cleanup and return -ERESTARTNOINTR. ++ * ++ * If 2, we may then block on trying to take the rt_mutex and return via: ++ * 5) successful lock ++ * 6) signal ++ * 7) timeout ++ * 8) other lock acquisition failure ++ * ++ * If 6, return -EWOULDBLOCK (restarting the syscall would do the same). ++ * ++ * If 4 or 7, we cleanup and return with -ETIMEDOUT. ++ * ++ * Return: ++ * - 0 - On success; ++ * - <0 - On error ++ */ ++static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, ++ u32 val, ktime_t *abs_time, u32 bitset, ++ u32 __user *uaddr2) ++{ ++ struct hrtimer_sleeper timeout, *to; ++ struct rt_mutex_waiter rt_waiter; ++ struct futex_hash_bucket *hb; ++ union futex_key key2 = FUTEX_KEY_INIT; ++ struct futex_q q = futex_q_init; ++ struct rt_mutex_base *pi_mutex; ++ int res, ret; ++ ++ if (!IS_ENABLED(CONFIG_FUTEX_PI)) ++ return -ENOSYS; ++ ++ if (uaddr == uaddr2) ++ return -EINVAL; ++ ++ if (!bitset) ++ return -EINVAL; ++ ++ to = futex_setup_timer(abs_time, &timeout, flags, ++ current->timer_slack_ns); ++ ++ /* ++ * The waiter is allocated on our stack, manipulated by the requeue ++ * code while we sleep on uaddr. ++ */ ++ rt_mutex_init_waiter(&rt_waiter); ++ ++ ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE); ++ if (unlikely(ret != 0)) ++ goto out; ++ ++ q.bitset = bitset; ++ q.rt_waiter = &rt_waiter; ++ q.requeue_pi_key = &key2; ++ ++ /* ++ * Prepare to wait on uaddr. On success, it holds hb->lock and q ++ * is initialized. ++ */ ++ ret = futex_wait_setup(uaddr, val, flags, &q, &hb); ++ if (ret) ++ goto out; ++ ++ /* ++ * The check above which compares uaddrs is not sufficient for ++ * shared futexes. We need to compare the keys: ++ */ ++ if (match_futex(&q.key, &key2)) { ++ queue_unlock(hb); ++ ret = -EINVAL; ++ goto out; + } - - __dma_direct_free_pages(dev, dma_direct_to_page(dev, dma_addr), size); - } -@@ -326,26 +341,13 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size, - !is_swiotlb_for_alloc(dev)) - return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp); - -- page = __dma_direct_alloc_pages(dev, size, gfp); -+ page = __dma_direct_alloc_pages(dev, size, gfp, false); - if (!page) - return NULL; -- if (PageHighMem(page)) { -- /* -- * Depending on the cma= arguments and per-arch setup -- * dma_alloc_contiguous could return highmem pages. -- * Without remapping there is no way to return them here, -- * so log an error and fail. -- */ -- dev_info(dev, "Rejecting highmem page from CMA.\n"); -- goto out_free_pages; -- } - - ret = page_address(page); -- if (force_dma_unencrypted(dev)) { -- if (set_memory_decrypted((unsigned long)ret, -- 1 << get_order(size))) -- goto out_free_pages; -- } -+ if (dma_set_decrypted(dev, ret, size)) -+ goto out_free_pages; - memset(ret, 0, size); - *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); - return page; -@@ -358,7 +360,6 @@ void dma_direct_free_pages(struct device *dev, size_t size, - struct page *page, dma_addr_t dma_addr, - enum dma_data_direction dir) - { -- unsigned int page_order = get_order(size); - void *vaddr = page_address(page); - - /* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */ -@@ -366,9 +367,8 @@ void dma_direct_free_pages(struct device *dev, size_t size, - dma_free_from_pool(dev, vaddr, size)) - return; - -- if (force_dma_unencrypted(dev)) -- set_memory_encrypted((unsigned long)vaddr, 1 << page_order); -- -+ if (dma_set_encrypted(dev, vaddr, size)) -+ return; - __dma_direct_free_pages(dev, page, size); - } - -diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h -index 4632b0f4f72eb..8a6cd53dbe8ce 100644 ---- a/kernel/dma/direct.h -+++ b/kernel/dma/direct.h -@@ -114,6 +114,7 @@ static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, - dma_direct_sync_single_for_cpu(dev, addr, size, dir); - - if (unlikely(is_swiotlb_buffer(dev, phys))) -- swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); -+ swiotlb_tbl_unmap_single(dev, phys, size, dir, -+ attrs | DMA_ATTR_SKIP_CPU_SYNC); - } - #endif /* _KERNEL_DMA_DIRECT_H */ -diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c -index 8349a9f2c3453..9478eccd1c8e6 100644 ---- a/kernel/dma/mapping.c -+++ b/kernel/dma/mapping.c -@@ -296,10 +296,6 @@ dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr, - if (WARN_ON_ONCE(!dev->dma_mask)) - return DMA_MAPPING_ERROR; - -- /* Don't allow RAM to be mapped */ -- if (WARN_ON_ONCE(pfn_valid(PHYS_PFN(phys_addr)))) -- return DMA_MAPPING_ERROR; -- - if (dma_map_direct(dev, ops)) - addr = dma_direct_map_resource(dev, phys_addr, size, dir, attrs); - else if (ops->map_resource) -diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c -index 5f84e6cdb78ea..4d40dcce7604b 100644 ---- a/kernel/dma/pool.c -+++ b/kernel/dma/pool.c -@@ -203,7 +203,7 @@ static int __init dma_atomic_pool_init(void) - GFP_KERNEL); - if (!atomic_pool_kernel) - ret = -ENOMEM; -- if (IS_ENABLED(CONFIG_ZONE_DMA)) { -+ if (has_managed_dma()) { - atomic_pool_dma = __dma_atomic_pool_init(atomic_pool_size, - GFP_KERNEL | GFP_DMA); - if (!atomic_pool_dma) -@@ -226,7 +226,7 @@ static inline struct gen_pool *dma_guess_pool(struct gen_pool *prev, gfp_t gfp) - if (prev == NULL) { - if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32)) - return atomic_pool_dma32; -- if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp & GFP_DMA)) -+ if (atomic_pool_dma && (gfp & GFP_DMA)) - return atomic_pool_dma; - return atomic_pool_kernel; - } -diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c -index 87c40517e8227..a9849670bdb54 100644 ---- a/kernel/dma/swiotlb.c -+++ b/kernel/dma/swiotlb.c -@@ -435,7 +435,10 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size - } - } - --#define slot_addr(start, idx) ((start) + ((idx) << IO_TLB_SHIFT)) -+static inline phys_addr_t slot_addr(phys_addr_t start, phys_addr_t idx) ++ ++ /* Queue the futex_q, drop the hb lock, wait for wakeup. */ ++ futex_wait_queue_me(hb, &q, to); ++ ++ switch (futex_requeue_pi_wakeup_sync(&q)) { ++ case Q_REQUEUE_PI_IGNORE: ++ /* The waiter is still on uaddr1 */ ++ spin_lock(&hb->lock); ++ ret = handle_early_requeue_pi_wakeup(hb, &q, to); ++ spin_unlock(&hb->lock); ++ break; ++ ++ case Q_REQUEUE_PI_LOCKED: ++ /* The requeue acquired the lock */ ++ if (q.pi_state && (q.pi_state->owner != current)) { ++ spin_lock(q.lock_ptr); ++ ret = fixup_owner(uaddr2, &q, true); ++ /* ++ * Drop the reference to the pi state which the ++ * requeue_pi() code acquired for us. ++ */ ++ put_pi_state(q.pi_state); ++ spin_unlock(q.lock_ptr); ++ /* ++ * Adjust the return value. It's either -EFAULT or ++ * success (1) but the caller expects 0 for success. ++ */ ++ ret = ret < 0 ? ret : 0; ++ } ++ break; ++ ++ case Q_REQUEUE_PI_DONE: ++ /* Requeue completed. Current is 'pi_blocked_on' the rtmutex */ ++ pi_mutex = &q.pi_state->pi_mutex; ++ ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter); ++ ++ /* Current is not longer pi_blocked_on */ ++ spin_lock(q.lock_ptr); ++ if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter)) ++ ret = 0; ++ ++ debug_rt_mutex_free_waiter(&rt_waiter); ++ /* ++ * Fixup the pi_state owner and possibly acquire the lock if we ++ * haven't already. ++ */ ++ res = fixup_owner(uaddr2, &q, !ret); ++ /* ++ * If fixup_owner() returned an error, propagate that. If it ++ * acquired the lock, clear -ETIMEDOUT or -EINTR. ++ */ ++ if (res) ++ ret = (res < 0) ? res : 0; ++ ++ unqueue_me_pi(&q); ++ spin_unlock(q.lock_ptr); ++ ++ if (ret == -EINTR) { ++ /* ++ * We've already been requeued, but cannot restart ++ * by calling futex_lock_pi() directly. We could ++ * restart this syscall, but it would detect that ++ * the user space "val" changed and return ++ * -EWOULDBLOCK. Save the overhead of the restart ++ * and return -EWOULDBLOCK directly. ++ */ ++ ret = -EWOULDBLOCK; ++ } ++ break; ++ default: ++ BUG(); ++ } ++ ++out: ++ if (to) { ++ hrtimer_cancel(&to->timer); ++ destroy_hrtimer_on_stack(&to->timer); ++ } ++ return ret; ++} ++ ++/* ++ * Support for robust futexes: the kernel cleans up held futexes at ++ * thread exit time. ++ * ++ * Implementation: user-space maintains a per-thread list of locks it ++ * is holding. Upon do_exit(), the kernel carefully walks this list, ++ * and marks all locks that are owned by this thread with the ++ * FUTEX_OWNER_DIED bit, and wakes up a waiter (if any). The list is ++ * always manipulated with the lock held, so the list is private and ++ * per-thread. Userspace also maintains a per-thread 'list_op_pending' ++ * field, to allow the kernel to clean up if the thread dies after ++ * acquiring the lock, but just before it could have added itself to ++ * the list. There can only be one such pending lock. ++ */ ++ ++/** ++ * sys_set_robust_list() - Set the robust-futex list head of a task ++ * @head: pointer to the list-head ++ * @len: length of the list-head, as userspace expects ++ */ ++SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, ++ size_t, len) +{ -+ return start + (idx << IO_TLB_SHIFT); ++ if (!futex_cmpxchg_enabled) ++ return -ENOSYS; ++ /* ++ * The kernel knows only one size for now: ++ */ ++ if (unlikely(len != sizeof(*head))) ++ return -EINVAL; ++ ++ current->robust_list = head; ++ ++ return 0; +} - - /* - * Carefully handle integer overflow which can occur when boundary_mask == ~0UL. -@@ -459,7 +462,7 @@ static unsigned int wrap_index(struct io_tlb_mem *mem, unsigned int index) - * allocate a buffer from that IO TLB pool. - */ - static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, -- size_t alloc_size) -+ size_t alloc_size, unsigned int alloc_align_mask) - { - struct io_tlb_mem *mem = dev->dma_io_tlb_mem; - unsigned long boundary_mask = dma_get_seg_boundary(dev); -@@ -483,6 +486,7 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, - stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1; - if (alloc_size >= PAGE_SIZE) - stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT)); -+ stride = max(stride, (alloc_align_mask >> IO_TLB_SHIFT) + 1); - - spin_lock_irqsave(&mem->lock, flags); - if (unlikely(nslots > mem->nslabs - mem->used)) -@@ -541,7 +545,8 @@ found: - - phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, - size_t mapping_size, size_t alloc_size, -- enum dma_data_direction dir, unsigned long attrs) -+ unsigned int alloc_align_mask, enum dma_data_direction dir, -+ unsigned long attrs) - { - struct io_tlb_mem *mem = dev->dma_io_tlb_mem; - unsigned int offset = swiotlb_align_offset(dev, orig_addr); -@@ -549,7 +554,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, - int index; - phys_addr_t tlb_addr; - -- if (!mem) -+ if (!mem || !mem->nslabs) - panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); - - if (mem_encrypt_active()) -@@ -561,7 +566,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, - return (phys_addr_t)DMA_MAPPING_ERROR; - } - -- index = swiotlb_find_slots(dev, orig_addr, alloc_size + offset); -+ index = swiotlb_find_slots(dev, orig_addr, -+ alloc_size + offset, alloc_align_mask); - if (index == -1) { - if (!(attrs & DMA_ATTR_NO_WARN)) - dev_warn_ratelimited(dev, -@@ -578,9 +584,14 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, - for (i = 0; i < nr_slots(alloc_size + offset); i++) - mem->slots[index + i].orig_addr = slot_addr(orig_addr, i); - tlb_addr = slot_addr(mem->start, index) + offset; -- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && -- (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) -- swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE); ++ ++/** ++ * sys_get_robust_list() - Get the robust-futex list head of a task ++ * @pid: pid of the process [zero for current task] ++ * @head_ptr: pointer to a list-head pointer, the kernel fills it in ++ * @len_ptr: pointer to a length field, the kernel fills in the header size ++ */ ++SYSCALL_DEFINE3(get_robust_list, int, pid, ++ struct robust_list_head __user * __user *, head_ptr, ++ size_t __user *, len_ptr) ++{ ++ struct robust_list_head __user *head; ++ unsigned long ret; ++ struct task_struct *p; ++ ++ if (!futex_cmpxchg_enabled) ++ return -ENOSYS; ++ ++ rcu_read_lock(); ++ ++ ret = -ESRCH; ++ if (!pid) ++ p = current; ++ else { ++ p = find_task_by_vpid(pid); ++ if (!p) ++ goto err_unlock; ++ } ++ ++ ret = -EPERM; ++ if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) ++ goto err_unlock; ++ ++ head = p->robust_list; ++ rcu_read_unlock(); ++ ++ if (put_user(sizeof(*head), len_ptr)) ++ return -EFAULT; ++ return put_user(head, head_ptr); ++ ++err_unlock: ++ rcu_read_unlock(); ++ ++ return ret; ++} ++ ++/* Constants for the pending_op argument of handle_futex_death */ ++#define HANDLE_DEATH_PENDING true ++#define HANDLE_DEATH_LIST false ++ ++/* ++ * Process a futex-list entry, check whether it's owned by the ++ * dying task, and do notification if so: ++ */ ++static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, ++ bool pi, bool pending_op) ++{ ++ u32 uval, nval, mval; ++ pid_t owner; ++ int err; ++ ++ /* Futex address must be 32bit aligned */ ++ if ((((unsigned long)uaddr) % sizeof(*uaddr)) != 0) ++ return -1; ++ ++retry: ++ if (get_user(uval, uaddr)) ++ return -1; ++ + /* -+ * When dir == DMA_FROM_DEVICE we could omit the copy from the orig -+ * to the tlb buffer, if we knew for sure the device will -+ * overwirte the entire current content. But we don't. Thus -+ * unconditional bounce may prevent leaking swiotlb content (i.e. -+ * kernel memory) to user-space. ++ * Special case for regular (non PI) futexes. The unlock path in ++ * user space has two race scenarios: ++ * ++ * 1. The unlock path releases the user space futex value and ++ * before it can execute the futex() syscall to wake up ++ * waiters it is killed. ++ * ++ * 2. A woken up waiter is killed before it can acquire the ++ * futex in user space. ++ * ++ * In the second case, the wake up notification could be generated ++ * by the unlock path in user space after setting the futex value ++ * to zero or by the kernel after setting the OWNER_DIED bit below. ++ * ++ * In both cases the TID validation below prevents a wakeup of ++ * potential waiters which can cause these waiters to block ++ * forever. ++ * ++ * In both cases the following conditions are met: ++ * ++ * 1) task->robust_list->list_op_pending != NULL ++ * @pending_op == true ++ * 2) The owner part of user space futex value == 0 ++ * 3) Regular futex: @pi == false ++ * ++ * If these conditions are met, it is safe to attempt waking up a ++ * potential waiter without touching the user space futex value and ++ * trying to set the OWNER_DIED bit. If the futex value is zero, ++ * the rest of the user space mutex state is consistent, so a woken ++ * waiter will just take over the uncontended futex. Setting the ++ * OWNER_DIED bit would create inconsistent state and malfunction ++ * of the user space owner died handling. Otherwise, the OWNER_DIED ++ * bit is already set, and the woken waiter is expected to deal with ++ * this. + */ -+ swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE); - return tlb_addr; - } - -@@ -675,7 +686,7 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size, - trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size, - swiotlb_force); - -- swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, dir, -+ swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, 0, dir, - attrs); - if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR) - return DMA_MAPPING_ERROR; -@@ -698,7 +709,18 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size, - - size_t swiotlb_max_mapping_size(struct device *dev) - { -- return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE; -+ int min_align_mask = dma_get_min_align_mask(dev); -+ int min_align = 0; ++ owner = uval & FUTEX_TID_MASK; ++ ++ if (pending_op && !pi && !owner) { ++ futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY); ++ return 0; ++ } ++ ++ if (owner != task_pid_vnr(curr)) ++ return 0; + + /* -+ * swiotlb_find_slots() skips slots according to -+ * min align mask. This affects max mapping size. -+ * Take it into acount here. ++ * Ok, this dying thread is truly holding a futex ++ * of interest. Set the OWNER_DIED bit atomically ++ * via cmpxchg, and if the value had FUTEX_WAITERS ++ * set, wake up a waiter (if any). (We have to do a ++ * futex_wake() even if OWNER_DIED is already set - ++ * to handle the rare but possible case of recursive ++ * thread-death.) The rest of the cleanup is done in ++ * userspace. + */ -+ if (min_align_mask) -+ min_align = roundup(min_align_mask, IO_TLB_SIZE); ++ mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; + -+ return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE - min_align; - } - - bool is_swiotlb_active(struct device *dev) -@@ -759,7 +781,7 @@ struct page *swiotlb_alloc(struct device *dev, size_t size) - if (!mem) - return NULL; - -- index = swiotlb_find_slots(dev, 0, size); -+ index = swiotlb_find_slots(dev, 0, size, 0); - if (index == -1) - return NULL; - -diff --git a/kernel/entry/common.c b/kernel/entry/common.c -index d5a61d565ad5d..998bdb7b8bf7f 100644 ---- a/kernel/entry/common.c -+++ b/kernel/entry/common.c -@@ -124,7 +124,7 @@ static __always_inline void __exit_to_user_mode(void) - { - instrumentation_begin(); - trace_hardirqs_on_prepare(); -- lockdep_hardirqs_on_prepare(CALLER_ADDR0); -+ lockdep_hardirqs_on_prepare(); - instrumentation_end(); - - user_enter_irqoff(); -@@ -412,7 +412,7 @@ noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state) - instrumentation_begin(); - /* Tell the tracer that IRET will enable interrupts */ - trace_hardirqs_on_prepare(); -- lockdep_hardirqs_on_prepare(CALLER_ADDR0); -+ lockdep_hardirqs_on_prepare(); - instrumentation_end(); - rcu_irq_exit(); - lockdep_hardirqs_on(CALLER_ADDR0); -@@ -465,7 +465,7 @@ void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state) - ftrace_nmi_exit(); - if (irq_state.lockdep) { - trace_hardirqs_on_prepare(); -- lockdep_hardirqs_on_prepare(CALLER_ADDR0); -+ lockdep_hardirqs_on_prepare(); - } - instrumentation_end(); - -diff --git a/kernel/entry/syscall_user_dispatch.c b/kernel/entry/syscall_user_dispatch.c -index c240302f56e23..0b6379adff6bd 100644 ---- a/kernel/entry/syscall_user_dispatch.c -+++ b/kernel/entry/syscall_user_dispatch.c -@@ -47,14 +47,18 @@ bool syscall_user_dispatch(struct pt_regs *regs) - * access_ok() is performed once, at prctl time, when - * the selector is loaded by userspace. - */ -- if (unlikely(__get_user(state, sd->selector))) -- do_exit(SIGSEGV); -+ if (unlikely(__get_user(state, sd->selector))) { -+ force_exit_sig(SIGSEGV); -+ return true; -+ } - - if (likely(state == SYSCALL_DISPATCH_FILTER_ALLOW)) - return false; - -- if (state != SYSCALL_DISPATCH_FILTER_BLOCK) -- do_exit(SIGSYS); -+ if (state != SYSCALL_DISPATCH_FILTER_BLOCK) { -+ force_exit_sig(SIGSYS); -+ return true; ++ /* ++ * We are not holding a lock here, but we want to have ++ * the pagefault_disable/enable() protection because ++ * we want to handle the fault gracefully. If the ++ * access fails we try to fault in the futex with R/W ++ * verification via get_user_pages. get_user() above ++ * does not guarantee R/W access. If that fails we ++ * give up and leave the futex locked. ++ */ ++ if ((err = cmpxchg_futex_value_locked(&nval, uaddr, uval, mval))) { ++ switch (err) { ++ case -EFAULT: ++ if (fault_in_user_writeable(uaddr)) ++ return -1; ++ goto retry; ++ ++ case -EAGAIN: ++ cond_resched(); ++ goto retry; ++ ++ default: ++ WARN_ON_ONCE(1); ++ return err; + } - } - - sd->on_dispatch = true; -diff --git a/kernel/events/core.c b/kernel/events/core.c -index f23ca260307f0..c6c7a4d805733 100644 ---- a/kernel/events/core.c -+++ b/kernel/events/core.c -@@ -674,6 +674,23 @@ perf_event_set_state(struct perf_event *event, enum perf_event_state state) - WRITE_ONCE(event->state, state); - } - ++ } ++ ++ if (nval != uval) ++ goto retry; ++ ++ /* ++ * Wake robust non-PI futexes here. The wakeup of ++ * PI futexes happens in exit_pi_state(): ++ */ ++ if (!pi && (uval & FUTEX_WAITERS)) ++ futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY); ++ ++ return 0; ++} ++ +/* -+ * UP store-release, load-acquire ++ * Fetch a robust-list pointer. Bit 0 signals PI futexes: + */ ++static inline int fetch_robust_entry(struct robust_list __user **entry, ++ struct robust_list __user * __user *head, ++ unsigned int *pi) ++{ ++ unsigned long uentry; + -+#define __store_release(ptr, val) \ -+do { \ -+ barrier(); \ -+ WRITE_ONCE(*(ptr), (val)); \ -+} while (0) ++ if (get_user(uentry, (unsigned long __user *)head)) ++ return -EFAULT; + -+#define __load_acquire(ptr) \ -+({ \ -+ __unqual_scalar_typeof(*(ptr)) ___p = READ_ONCE(*(ptr)); \ -+ barrier(); \ -+ ___p; \ -+}) ++ *entry = (void __user *)(uentry & ~1UL); ++ *pi = uentry & 1; + - #ifdef CONFIG_CGROUP_PERF - - static inline bool -@@ -719,34 +736,51 @@ static inline u64 perf_cgroup_event_time(struct perf_event *event) - return t->time; - } - --static inline void __update_cgrp_time(struct perf_cgroup *cgrp) -+static inline u64 perf_cgroup_event_time_now(struct perf_event *event, u64 now) - { -- struct perf_cgroup_info *info; -- u64 now; -- -- now = perf_clock(); -+ struct perf_cgroup_info *t; - -- info = this_cpu_ptr(cgrp->info); -+ t = per_cpu_ptr(event->cgrp->info, event->cpu); -+ if (!__load_acquire(&t->active)) -+ return t->time; -+ now += READ_ONCE(t->timeoffset); -+ return now; ++ return 0; +} - -- info->time += now - info->timestamp; -+static inline void __update_cgrp_time(struct perf_cgroup_info *info, u64 now, bool adv) ++ ++/* ++ * Walk curr->robust_list (very carefully, it's a userspace list!) ++ * and mark any locks found there dead, and notify any waiters. ++ * ++ * We silently return on any sign of list-walking problem. ++ */ ++static void exit_robust_list(struct task_struct *curr) +{ -+ if (adv) -+ info->time += now - info->timestamp; - info->timestamp = now; ++ struct robust_list_head __user *head = curr->robust_list; ++ struct robust_list __user *entry, *next_entry, *pending; ++ unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; ++ unsigned int next_pi; ++ unsigned long futex_offset; ++ int rc; ++ ++ if (!futex_cmpxchg_enabled) ++ return; ++ + /* -+ * see update_context_time() ++ * Fetch the list head (which was registered earlier, via ++ * sys_set_robust_list()): + */ -+ WRITE_ONCE(info->timeoffset, info->time - info->timestamp); - } - --static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx) -+static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx, bool final) - { - struct perf_cgroup *cgrp = cpuctx->cgrp; - struct cgroup_subsys_state *css; -+ struct perf_cgroup_info *info; - - if (cgrp) { -+ u64 now = perf_clock(); ++ if (fetch_robust_entry(&entry, &head->list.next, &pi)) ++ return; ++ /* ++ * Fetch the relative futex offset: ++ */ ++ if (get_user(futex_offset, &head->futex_offset)) ++ return; ++ /* ++ * Fetch any possibly pending lock-add first, and handle it ++ * if it exists: ++ */ ++ if (fetch_robust_entry(&pending, &head->list_op_pending, &pip)) ++ return; + - for (css = &cgrp->css; css; css = css->parent) { - cgrp = container_of(css, struct perf_cgroup, css); -- __update_cgrp_time(cgrp); -+ info = this_cpu_ptr(cgrp->info); ++ next_entry = NULL; /* avoid warning with gcc */ ++ while (entry != &head->list) { ++ /* ++ * Fetch the next entry in the list before calling ++ * handle_futex_death: ++ */ ++ rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi); ++ /* ++ * A pending lock might already be on the list, so ++ * don't process it twice: ++ */ ++ if (entry != pending) { ++ if (handle_futex_death((void __user *)entry + futex_offset, ++ curr, pi, HANDLE_DEATH_LIST)) ++ return; ++ } ++ if (rc) ++ return; ++ entry = next_entry; ++ pi = next_pi; ++ /* ++ * Avoid excessively long or circular lists: ++ */ ++ if (!--limit) ++ break; + -+ __update_cgrp_time(info, now, true); -+ if (final) -+ __store_release(&info->active, 0); - } - } - } - - static inline void update_cgrp_time_from_event(struct perf_event *event) - { -+ struct perf_cgroup_info *info; - struct perf_cgroup *cgrp; - - /* -@@ -760,8 +794,10 @@ static inline void update_cgrp_time_from_event(struct perf_event *event) - /* - * Do not update time when cgroup is not active - */ -- if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup)) -- __update_cgrp_time(event->cgrp); -+ if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup)) { -+ info = this_cpu_ptr(event->cgrp->info); -+ __update_cgrp_time(info, perf_clock(), true); ++ cond_resched(); + } - } - - static inline void -@@ -785,7 +821,8 @@ perf_cgroup_set_timestamp(struct task_struct *task, - for (css = &cgrp->css; css; css = css->parent) { - cgrp = container_of(css, struct perf_cgroup, css); - info = this_cpu_ptr(cgrp->info); -- info->timestamp = ctx->timestamp; -+ __update_cgrp_time(info, ctx->timestamp, false); -+ __store_release(&info->active, 1); - } - } - -@@ -802,7 +839,7 @@ static DEFINE_PER_CPU(struct list_head, cgrp_cpuctx_list); - */ - static void perf_cgroup_switch(struct task_struct *task, int mode) - { -- struct perf_cpu_context *cpuctx; -+ struct perf_cpu_context *cpuctx, *tmp; - struct list_head *list; - unsigned long flags; - -@@ -813,7 +850,7 @@ static void perf_cgroup_switch(struct task_struct *task, int mode) - local_irq_save(flags); - - list = this_cpu_ptr(&cgrp_cpuctx_list); -- list_for_each_entry(cpuctx, list, cgrp_cpuctx_entry) { -+ list_for_each_entry_safe(cpuctx, tmp, list, cgrp_cpuctx_entry) { - WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0); - - perf_ctx_lock(cpuctx, cpuctx->task_ctx); -@@ -981,14 +1018,6 @@ out: - return ret; - } - --static inline void --perf_cgroup_set_shadow_time(struct perf_event *event, u64 now) --{ -- struct perf_cgroup_info *t; -- t = per_cpu_ptr(event->cgrp->info, event->cpu); -- event->shadow_ctx_time = now - t->timestamp; --} -- - static inline void - perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ctx) - { -@@ -1066,7 +1095,8 @@ static inline void update_cgrp_time_from_event(struct perf_event *event) - { - } - --static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx) -+static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx, -+ bool final) - { - } - -@@ -1098,12 +1128,12 @@ perf_cgroup_switch(struct task_struct *task, struct task_struct *next) - { - } - --static inline void --perf_cgroup_set_shadow_time(struct perf_event *event, u64 now) -+static inline u64 perf_cgroup_event_time(struct perf_event *event) - { -+ return 0; - } - --static inline u64 perf_cgroup_event_time(struct perf_event *event) -+static inline u64 perf_cgroup_event_time_now(struct perf_event *event, u64 now) - { - return 0; - } -@@ -1525,22 +1555,59 @@ static void perf_unpin_context(struct perf_event_context *ctx) - /* - * Update the record of the current time in a context. - */ --static void update_context_time(struct perf_event_context *ctx) -+static void __update_context_time(struct perf_event_context *ctx, bool adv) - { - u64 now = perf_clock(); - -- ctx->time += now - ctx->timestamp; -+ if (adv) -+ ctx->time += now - ctx->timestamp; - ctx->timestamp = now; + ++ if (pending) { ++ handle_futex_death((void __user *)pending + futex_offset, ++ curr, pip, HANDLE_DEATH_PENDING); ++ } ++} ++ ++static void futex_cleanup(struct task_struct *tsk) ++{ ++ if (unlikely(tsk->robust_list)) { ++ exit_robust_list(tsk); ++ tsk->robust_list = NULL; ++ } ++ ++#ifdef CONFIG_COMPAT ++ if (unlikely(tsk->compat_robust_list)) { ++ compat_exit_robust_list(tsk); ++ tsk->compat_robust_list = NULL; ++ } ++#endif ++ ++ if (unlikely(!list_empty(&tsk->pi_state_list))) ++ exit_pi_state_list(tsk); ++} ++ ++/** ++ * futex_exit_recursive - Set the tasks futex state to FUTEX_STATE_DEAD ++ * @tsk: task to set the state on ++ * ++ * Set the futex exit state of the task lockless. The futex waiter code ++ * observes that state when a task is exiting and loops until the task has ++ * actually finished the futex cleanup. The worst case for this is that the ++ * waiter runs through the wait loop until the state becomes visible. ++ * ++ * This is called from the recursive fault handling path in do_exit(). ++ * ++ * This is best effort. Either the futex exit code has run already or ++ * not. If the OWNER_DIED bit has been set on the futex then the waiter can ++ * take it over. If not, the problem is pushed back to user space. If the ++ * futex exit code did not run yet, then an already queued waiter might ++ * block forever, but there is nothing which can be done about that. ++ */ ++void futex_exit_recursive(struct task_struct *tsk) ++{ ++ /* If the state is FUTEX_STATE_EXITING then futex_exit_mutex is held */ ++ if (tsk->futex_state == FUTEX_STATE_EXITING) ++ mutex_unlock(&tsk->futex_exit_mutex); ++ tsk->futex_state = FUTEX_STATE_DEAD; ++} ++ ++static void futex_cleanup_begin(struct task_struct *tsk) ++{ + /* -+ * The above: time' = time + (now - timestamp), can be re-arranged -+ * into: time` = now + (time - timestamp), which gives a single value -+ * offset to compute future time without locks on. ++ * Prevent various race issues against a concurrent incoming waiter ++ * including live locks by forcing the waiter to block on ++ * tsk->futex_exit_mutex when it observes FUTEX_STATE_EXITING in ++ * attach_to_pi_owner(). ++ */ ++ mutex_lock(&tsk->futex_exit_mutex); ++ ++ /* ++ * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock. + * -+ * See perf_event_time_now(), which can be used from NMI context where -+ * it's (obviously) not possible to acquire ctx->lock in order to read -+ * both the above values in a consistent manner. ++ * This ensures that all subsequent checks of tsk->futex_state in ++ * attach_to_pi_owner() must observe FUTEX_STATE_EXITING with ++ * tsk->pi_lock held. ++ * ++ * It guarantees also that a pi_state which was queued right before ++ * the state change under tsk->pi_lock by a concurrent waiter must ++ * be observed in exit_pi_state_list(). + */ -+ WRITE_ONCE(ctx->timeoffset, ctx->time - ctx->timestamp); ++ raw_spin_lock_irq(&tsk->pi_lock); ++ tsk->futex_state = FUTEX_STATE_EXITING; ++ raw_spin_unlock_irq(&tsk->pi_lock); +} + -+static void update_context_time(struct perf_event_context *ctx) ++static void futex_cleanup_end(struct task_struct *tsk, int state) +{ -+ __update_context_time(ctx, true); - } - - static u64 perf_event_time(struct perf_event *event) - { - struct perf_event_context *ctx = event->ctx; - -+ if (unlikely(!ctx)) -+ return 0; ++ /* ++ * Lockless store. The only side effect is that an observer might ++ * take another loop until it becomes visible. ++ */ ++ tsk->futex_state = state; ++ /* ++ * Drop the exit protection. This unblocks waiters which observed ++ * FUTEX_STATE_EXITING to reevaluate the state. ++ */ ++ mutex_unlock(&tsk->futex_exit_mutex); ++} + - if (is_cgroup_event(event)) - return perf_cgroup_event_time(event); - -- return ctx ? ctx->time : 0; -+ return ctx->time; ++void futex_exec_release(struct task_struct *tsk) ++{ ++ /* ++ * The state handling is done for consistency, but in the case of ++ * exec() there is no way to prevent further damage as the PID stays ++ * the same. But for the unlikely and arguably buggy case that a ++ * futex is held on exec(), this provides at least as much state ++ * consistency protection which is possible. ++ */ ++ futex_cleanup_begin(tsk); ++ futex_cleanup(tsk); ++ /* ++ * Reset the state to FUTEX_STATE_OK. The task is alive and about ++ * exec a new binary. ++ */ ++ futex_cleanup_end(tsk, FUTEX_STATE_OK); +} + -+static u64 perf_event_time_now(struct perf_event *event, u64 now) ++void futex_exit_release(struct task_struct *tsk) +{ -+ struct perf_event_context *ctx = event->ctx; ++ futex_cleanup_begin(tsk); ++ futex_cleanup(tsk); ++ futex_cleanup_end(tsk, FUTEX_STATE_DEAD); ++} + -+ if (unlikely(!ctx)) -+ return 0; ++long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, ++ u32 __user *uaddr2, u32 val2, u32 val3) ++{ ++ int cmd = op & FUTEX_CMD_MASK; ++ unsigned int flags = 0; + -+ if (is_cgroup_event(event)) -+ return perf_cgroup_event_time_now(event, now); ++ if (!(op & FUTEX_PRIVATE_FLAG)) ++ flags |= FLAGS_SHARED; + -+ if (!(__load_acquire(&ctx->is_active) & EVENT_TIME)) -+ return ctx->time; ++ if (op & FUTEX_CLOCK_REALTIME) { ++ flags |= FLAGS_CLOCKRT; ++ if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI && ++ cmd != FUTEX_LOCK_PI2) ++ return -ENOSYS; ++ } + -+ now += READ_ONCE(ctx->timeoffset); -+ return now; - } - - static enum event_type_t get_event_type(struct perf_event *event) -@@ -2346,7 +2413,7 @@ __perf_remove_from_context(struct perf_event *event, - - if (ctx->is_active & EVENT_TIME) { - update_context_time(ctx); -- update_cgrp_time_from_cpuctx(cpuctx); -+ update_cgrp_time_from_cpuctx(cpuctx, false); - } - - event_sched_out(event, cpuctx, ctx); -@@ -2357,6 +2424,9 @@ __perf_remove_from_context(struct perf_event *event, - list_del_event(event, ctx); - - if (!ctx->nr_events && ctx->is_active) { -+ if (ctx == &cpuctx->ctx) -+ update_cgrp_time_from_cpuctx(cpuctx, true); ++ switch (cmd) { ++ case FUTEX_LOCK_PI: ++ case FUTEX_LOCK_PI2: ++ case FUTEX_UNLOCK_PI: ++ case FUTEX_TRYLOCK_PI: ++ case FUTEX_WAIT_REQUEUE_PI: ++ case FUTEX_CMP_REQUEUE_PI: ++ if (!futex_cmpxchg_enabled) ++ return -ENOSYS; ++ } + - ctx->is_active = 0; - ctx->rotate_necessary = 0; - if (ctx->task) { -@@ -2388,7 +2458,11 @@ static void perf_remove_from_context(struct perf_event *event, unsigned long fla - * event_function_call() user. - */ - raw_spin_lock_irq(&ctx->lock); -- if (!ctx->is_active) { -+ /* -+ * Cgroup events are per-cpu events, and must IPI because of -+ * cgrp_cpuctx_list. -+ */ -+ if (!ctx->is_active && !is_cgroup_event(event)) { - __perf_remove_from_context(event, __get_cpu_context(ctx), - ctx, (void *)flags); - raw_spin_unlock_irq(&ctx->lock); -@@ -2478,40 +2552,6 @@ void perf_event_disable_inatomic(struct perf_event *event) - irq_work_queue(&event->pending); - } - --static void perf_set_shadow_time(struct perf_event *event, -- struct perf_event_context *ctx) --{ -- /* -- * use the correct time source for the time snapshot -- * -- * We could get by without this by leveraging the -- * fact that to get to this function, the caller -- * has most likely already called update_context_time() -- * and update_cgrp_time_xx() and thus both timestamp -- * are identical (or very close). Given that tstamp is, -- * already adjusted for cgroup, we could say that: -- * tstamp - ctx->timestamp -- * is equivalent to -- * tstamp - cgrp->timestamp. -- * -- * Then, in perf_output_read(), the calculation would -- * work with no changes because: -- * - event is guaranteed scheduled in -- * - no scheduled out in between -- * - thus the timestamp would be the same -- * -- * But this is a bit hairy. -- * -- * So instead, we have an explicit cgroup call to remain -- * within the time source all along. We believe it -- * is cleaner and simpler to understand. -- */ -- if (is_cgroup_event(event)) -- perf_cgroup_set_shadow_time(event, event->tstamp); -- else -- event->shadow_ctx_time = event->tstamp - ctx->timestamp; --} -- - #define MAX_INTERRUPTS (~0ULL) - - static void perf_log_throttle(struct perf_event *event, int enable); -@@ -2552,8 +2592,6 @@ event_sched_in(struct perf_event *event, - - perf_pmu_disable(event->pmu); - -- perf_set_shadow_time(event, ctx); -- - perf_log_itrace_start(event); - - if (event->pmu->add(event, PERF_EF_START)) { -@@ -2857,11 +2895,14 @@ perf_install_in_context(struct perf_event_context *ctx, - * perf_event_attr::disabled events will not run and can be initialized - * without IPI. Except when this is the first event for the context, in - * that case we need the magic of the IPI to set ctx->is_active. -+ * Similarly, cgroup events for the context also needs the IPI to -+ * manipulate the cgrp_cpuctx_list. - * - * The IOC_ENABLE that is sure to follow the creation of a disabled - * event will issue the IPI and reprogram the hardware. - */ -- if (__perf_effective_state(event) == PERF_EVENT_STATE_OFF && ctx->nr_events) { -+ if (__perf_effective_state(event) == PERF_EVENT_STATE_OFF && -+ ctx->nr_events && !is_cgroup_event(event)) { - raw_spin_lock_irq(&ctx->lock); - if (ctx->task == TASK_TOMBSTONE) { - raw_spin_unlock_irq(&ctx->lock); -@@ -3193,6 +3234,15 @@ static int perf_event_modify_breakpoint(struct perf_event *bp, - return err; - } - ++ switch (cmd) { ++ case FUTEX_WAIT: ++ val3 = FUTEX_BITSET_MATCH_ANY; ++ fallthrough; ++ case FUTEX_WAIT_BITSET: ++ return futex_wait(uaddr, flags, val, timeout, val3); ++ case FUTEX_WAKE: ++ val3 = FUTEX_BITSET_MATCH_ANY; ++ fallthrough; ++ case FUTEX_WAKE_BITSET: ++ return futex_wake(uaddr, flags, val, val3); ++ case FUTEX_REQUEUE: ++ return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0); ++ case FUTEX_CMP_REQUEUE: ++ return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0); ++ case FUTEX_WAKE_OP: ++ return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3); ++ case FUTEX_LOCK_PI: ++ flags |= FLAGS_CLOCKRT; ++ fallthrough; ++ case FUTEX_LOCK_PI2: ++ return futex_lock_pi(uaddr, flags, timeout, 0); ++ case FUTEX_UNLOCK_PI: ++ return futex_unlock_pi(uaddr, flags); ++ case FUTEX_TRYLOCK_PI: ++ return futex_lock_pi(uaddr, flags, NULL, 1); ++ case FUTEX_WAIT_REQUEUE_PI: ++ val3 = FUTEX_BITSET_MATCH_ANY; ++ return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3, ++ uaddr2); ++ case FUTEX_CMP_REQUEUE_PI: ++ return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1); ++ } ++ return -ENOSYS; ++} ++ ++static __always_inline bool futex_cmd_has_timeout(u32 cmd) ++{ ++ switch (cmd) { ++ case FUTEX_WAIT: ++ case FUTEX_LOCK_PI: ++ case FUTEX_LOCK_PI2: ++ case FUTEX_WAIT_BITSET: ++ case FUTEX_WAIT_REQUEUE_PI: ++ return true; ++ } ++ return false; ++} ++ ++static __always_inline int ++futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t) ++{ ++ if (!timespec64_valid(ts)) ++ return -EINVAL; ++ ++ *t = timespec64_to_ktime(*ts); ++ if (cmd == FUTEX_WAIT) ++ *t = ktime_add_safe(ktime_get(), *t); ++ else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME)) ++ *t = timens_ktime_to_host(CLOCK_MONOTONIC, *t); ++ return 0; ++} ++ ++SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, ++ const struct __kernel_timespec __user *, utime, ++ u32 __user *, uaddr2, u32, val3) ++{ ++ int ret, cmd = op & FUTEX_CMD_MASK; ++ ktime_t t, *tp = NULL; ++ struct timespec64 ts; ++ ++ if (utime && futex_cmd_has_timeout(cmd)) { ++ if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG)))) ++ return -EFAULT; ++ if (get_timespec64(&ts, utime)) ++ return -EFAULT; ++ ret = futex_init_timeout(cmd, op, &ts, &t); ++ if (ret) ++ return ret; ++ tp = &t; ++ } ++ ++ return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3); ++} ++ ++#ifdef CONFIG_COMPAT +/* -+ * Copy event-type-independent attributes that may be modified. ++ * Fetch a robust-list pointer. Bit 0 signals PI futexes: + */ -+static void perf_event_modify_copy_attr(struct perf_event_attr *to, -+ const struct perf_event_attr *from) ++static inline int ++compat_fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry, ++ compat_uptr_t __user *head, unsigned int *pi) +{ -+ to->sig_data = from->sig_data; ++ if (get_user(*uentry, head)) ++ return -EFAULT; ++ ++ *entry = compat_ptr((*uentry) & ~1); ++ *pi = (unsigned int)(*uentry) & 1; ++ ++ return 0; +} + - static int perf_event_modify_attr(struct perf_event *event, - struct perf_event_attr *attr) - { -@@ -3215,10 +3265,17 @@ static int perf_event_modify_attr(struct perf_event *event, - WARN_ON_ONCE(event->ctx->parent_ctx); - - mutex_lock(&event->child_mutex); -+ /* -+ * Event-type-independent attributes must be copied before event-type -+ * modification, which will validate that final attributes match the -+ * source attributes after all relevant attributes have been copied. -+ */ -+ perf_event_modify_copy_attr(&event->attr, attr); - err = func(event, attr); - if (err) - goto out; - list_for_each_entry(child, &event->child_list, child_list) { -+ perf_event_modify_copy_attr(&child->attr, attr); - err = func(child, attr); - if (err) - goto out; -@@ -3247,16 +3304,6 @@ static void ctx_sched_out(struct perf_event_context *ctx, - return; - } - -- ctx->is_active &= ~event_type; -- if (!(ctx->is_active & EVENT_ALL)) -- ctx->is_active = 0; -- -- if (ctx->task) { -- WARN_ON_ONCE(cpuctx->task_ctx != ctx); -- if (!ctx->is_active) -- cpuctx->task_ctx = NULL; -- } -- - /* - * Always update time if it was set; not only when it changes. - * Otherwise we can 'forget' to update time for any but the last -@@ -3270,7 +3317,22 @@ static void ctx_sched_out(struct perf_event_context *ctx, - if (is_active & EVENT_TIME) { - /* update (and stop) ctx time */ - update_context_time(ctx); -- update_cgrp_time_from_cpuctx(cpuctx); -+ update_cgrp_time_from_cpuctx(cpuctx, ctx == &cpuctx->ctx); -+ /* -+ * CPU-release for the below ->is_active store, -+ * see __load_acquire() in perf_event_time_now() -+ */ -+ barrier(); -+ } ++static void __user *futex_uaddr(struct robust_list __user *entry, ++ compat_long_t futex_offset) ++{ ++ compat_uptr_t base = ptr_to_compat(entry); ++ void __user *uaddr = compat_ptr(base + futex_offset); + -+ ctx->is_active &= ~event_type; -+ if (!(ctx->is_active & EVENT_ALL)) -+ ctx->is_active = 0; ++ return uaddr; ++} + -+ if (ctx->task) { -+ WARN_ON_ONCE(cpuctx->task_ctx != ctx); -+ if (!ctx->is_active) -+ cpuctx->task_ctx = NULL; - } - - is_active ^= ctx->is_active; /* changed bits */ -@@ -3707,13 +3769,19 @@ static noinline int visit_groups_merge(struct perf_cpu_context *cpuctx, - return 0; - } - +/* -+ * Because the userpage is strictly per-event (there is no concept of context, -+ * so there cannot be a context indirection), every userpage must be updated -+ * when context time starts :-( ++ * Walk curr->robust_list (very carefully, it's a userspace list!) ++ * and mark any locks found there dead, and notify any waiters. + * -+ * IOW, we must not miss EVENT_TIME edges. ++ * We silently return on any sign of list-walking problem. + */ - static inline bool event_update_userpage(struct perf_event *event) - { - if (likely(!atomic_read(&event->mmap_count))) - return false; - - perf_event_update_time(event); -- perf_set_shadow_time(event, event->ctx); - perf_event_update_userpage(event); - - return true; -@@ -3797,13 +3865,23 @@ ctx_sched_in(struct perf_event_context *ctx, - struct task_struct *task) - { - int is_active = ctx->is_active; -- u64 now; - - lockdep_assert_held(&ctx->lock); - - if (likely(!ctx->nr_events)) - return; - -+ if (is_active ^ EVENT_TIME) { -+ /* start ctx time */ -+ __update_context_time(ctx, false); -+ perf_cgroup_set_timestamp(task, ctx); ++static void compat_exit_robust_list(struct task_struct *curr) ++{ ++ struct compat_robust_list_head __user *head = curr->compat_robust_list; ++ struct robust_list __user *entry, *next_entry, *pending; ++ unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; ++ unsigned int next_pi; ++ compat_uptr_t uentry, next_uentry, upending; ++ compat_long_t futex_offset; ++ int rc; ++ ++ if (!futex_cmpxchg_enabled) ++ return; ++ ++ /* ++ * Fetch the list head (which was registered earlier, via ++ * sys_set_robust_list()): ++ */ ++ if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi)) ++ return; ++ /* ++ * Fetch the relative futex offset: ++ */ ++ if (get_user(futex_offset, &head->futex_offset)) ++ return; ++ /* ++ * Fetch any possibly pending lock-add first, and handle it ++ * if it exists: ++ */ ++ if (compat_fetch_robust_entry(&upending, &pending, ++ &head->list_op_pending, &pip)) ++ return; ++ ++ next_entry = NULL; /* avoid warning with gcc */ ++ while (entry != (struct robust_list __user *) &head->list) { + /* -+ * CPU-release for the below ->is_active store, -+ * see __load_acquire() in perf_event_time_now() ++ * Fetch the next entry in the list before calling ++ * handle_futex_death: + */ -+ barrier(); ++ rc = compat_fetch_robust_entry(&next_uentry, &next_entry, ++ (compat_uptr_t __user *)&entry->next, &next_pi); ++ /* ++ * A pending lock might already be on the list, so ++ * dont process it twice: ++ */ ++ if (entry != pending) { ++ void __user *uaddr = futex_uaddr(entry, futex_offset); ++ ++ if (handle_futex_death(uaddr, curr, pi, ++ HANDLE_DEATH_LIST)) ++ return; ++ } ++ if (rc) ++ return; ++ uentry = next_uentry; ++ entry = next_entry; ++ pi = next_pi; ++ /* ++ * Avoid excessively long or circular lists: ++ */ ++ if (!--limit) ++ break; ++ ++ cond_resched(); + } ++ if (pending) { ++ void __user *uaddr = futex_uaddr(pending, futex_offset); + - ctx->is_active |= (event_type | EVENT_TIME); - if (ctx->task) { - if (!is_active) -@@ -3814,13 +3892,6 @@ ctx_sched_in(struct perf_event_context *ctx, - - is_active ^= ctx->is_active; /* changed bits */ - -- if (is_active & EVENT_TIME) { -- /* start ctx time */ -- now = perf_clock(); -- ctx->timestamp = now; -- perf_cgroup_set_timestamp(task, ctx); -- } -- - /* - * First go through the list and put on any pinned groups - * in order to give them the best chance of going on. -@@ -4414,6 +4485,18 @@ static inline u64 perf_event_count(struct perf_event *event) - return local64_read(&event->count) + atomic64_read(&event->child_count); - } - -+static void calc_timer_values(struct perf_event *event, -+ u64 *now, -+ u64 *enabled, -+ u64 *running) ++ handle_futex_death(uaddr, curr, pip, HANDLE_DEATH_PENDING); ++ } ++} ++ ++COMPAT_SYSCALL_DEFINE2(set_robust_list, ++ struct compat_robust_list_head __user *, head, ++ compat_size_t, len) +{ -+ u64 ctx_time; ++ if (!futex_cmpxchg_enabled) ++ return -ENOSYS; + -+ *now = perf_clock(); -+ ctx_time = perf_event_time_now(event, *now); -+ __perf_update_times(event, ctx_time, enabled, running); ++ if (unlikely(len != sizeof(*head))) ++ return -EINVAL; ++ ++ current->compat_robust_list = head; ++ ++ return 0; +} + - /* - * NMI-safe method to read a local event, that is an event that - * is: -@@ -4473,10 +4556,9 @@ int perf_event_read_local(struct perf_event *event, u64 *value, - - *value = local64_read(&event->count); - if (enabled || running) { -- u64 now = event->shadow_ctx_time + perf_clock(); -- u64 __enabled, __running; -+ u64 __enabled, __running, __now;; - -- __perf_update_times(event, now, &__enabled, &__running); -+ calc_timer_values(event, &__now, &__enabled, &__running); - if (enabled) - *enabled = __enabled; - if (running) -@@ -5798,18 +5880,6 @@ static int perf_event_index(struct perf_event *event) - return event->pmu->event_idx(event); - } - --static void calc_timer_values(struct perf_event *event, -- u64 *now, -- u64 *enabled, -- u64 *running) --{ -- u64 ctx_time; -- -- *now = perf_clock(); -- ctx_time = event->shadow_ctx_time + *now; -- __perf_update_times(event, ctx_time, enabled, running); --} -- - static void perf_event_init_userpage(struct perf_event *event) - { - struct perf_event_mmap_page *userpg; -@@ -5934,6 +6004,8 @@ static void ring_buffer_attach(struct perf_event *event, - struct perf_buffer *old_rb = NULL; - unsigned long flags; - -+ WARN_ON_ONCE(event->parent); ++COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, ++ compat_uptr_t __user *, head_ptr, ++ compat_size_t __user *, len_ptr) ++{ ++ struct compat_robust_list_head __user *head; ++ unsigned long ret; ++ struct task_struct *p; + - if (event->rb) { - /* - * Should be impossible, we set this when removing -@@ -5991,6 +6063,9 @@ static void ring_buffer_wakeup(struct perf_event *event) - { - struct perf_buffer *rb; - -+ if (event->parent) -+ event = event->parent; ++ if (!futex_cmpxchg_enabled) ++ return -ENOSYS; + - rcu_read_lock(); - rb = rcu_dereference(event->rb); - if (rb) { -@@ -6004,6 +6079,9 @@ struct perf_buffer *ring_buffer_get(struct perf_event *event) - { - struct perf_buffer *rb; - -+ if (event->parent) -+ event = event->parent; ++ rcu_read_lock(); + - rcu_read_lock(); - rb = rcu_dereference(event->rb); - if (rb) { -@@ -6270,17 +6348,17 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) - again: - mutex_lock(&event->mmap_mutex); - if (event->rb) { -- if (event->rb->nr_pages != nr_pages) { -+ if (data_page_nr(event->rb) != nr_pages) { - ret = -EINVAL; - goto unlock; - } - - if (!atomic_inc_not_zero(&event->rb->mmap_count)) { - /* -- * Raced against perf_mmap_close() through -- * perf_event_set_output(). Try again, hope for better -- * luck. -+ * Raced against perf_mmap_close(); remove the -+ * event and try again. - */ -+ ring_buffer_attach(event, NULL); - mutex_unlock(&event->mmap_mutex); - goto again; - } -@@ -6349,7 +6427,6 @@ accounting: - ring_buffer_attach(event, rb); - - perf_event_update_time(event); -- perf_set_shadow_time(event, event->ctx); - perf_event_init_userpage(event); - perf_event_update_userpage(event); - } else { -@@ -6452,8 +6529,8 @@ static void perf_sigtrap(struct perf_event *event) - if (current->flags & PF_EXITING) - return; - -- force_sig_perf((void __user *)event->pending_addr, -- event->attr.type, event->attr.sig_data); -+ send_sig_perf((void __user *)event->pending_addr, -+ event->attr.type, event->attr.sig_data); - } - - static void perf_pending_event_disable(struct perf_event *event) -@@ -6526,18 +6603,25 @@ static void perf_pending_event(struct irq_work *entry) - * Later on, we might change it to a list if there is - * another virtualization implementation supporting the callbacks. - */ --struct perf_guest_info_callbacks *perf_guest_cbs; -+struct perf_guest_info_callbacks __rcu *perf_guest_cbs; - - int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs) - { -- perf_guest_cbs = cbs; -+ if (WARN_ON_ONCE(rcu_access_pointer(perf_guest_cbs))) -+ return -EBUSY; ++ ret = -ESRCH; ++ if (!pid) ++ p = current; ++ else { ++ p = find_task_by_vpid(pid); ++ if (!p) ++ goto err_unlock; ++ } + -+ rcu_assign_pointer(perf_guest_cbs, cbs); - return 0; - } - EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks); - - int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs) - { -- perf_guest_cbs = NULL; -+ if (WARN_ON_ONCE(rcu_access_pointer(perf_guest_cbs) != cbs)) -+ return -EINVAL; ++ ret = -EPERM; ++ if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) ++ goto err_unlock; + -+ rcu_assign_pointer(perf_guest_cbs, NULL); -+ synchronize_rcu(); - return 0; - } - EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks); -@@ -6696,7 +6780,7 @@ static unsigned long perf_prepare_sample_aux(struct perf_event *event, - if (WARN_ON_ONCE(READ_ONCE(sampler->oncpu) != smp_processor_id())) - goto out; - -- rb = ring_buffer_get(sampler->parent ? sampler->parent : sampler); -+ rb = ring_buffer_get(sampler); - if (!rb) - goto out; - -@@ -6762,7 +6846,7 @@ static void perf_aux_sample_output(struct perf_event *event, - if (WARN_ON_ONCE(!sampler || !data->aux_size)) - return; - -- rb = ring_buffer_get(sampler->parent ? sampler->parent : sampler); -+ rb = ring_buffer_get(sampler); - if (!rb) - return; - -@@ -7154,7 +7238,6 @@ void perf_output_sample(struct perf_output_handle *handle, - static u64 perf_virt_to_phys(u64 virt) - { - u64 phys_addr = 0; -- struct page *p = NULL; - - if (!virt) - return 0; -@@ -7173,14 +7256,15 @@ static u64 perf_virt_to_phys(u64 virt) - * If failed, leave phys_addr as 0. - */ - if (current->mm != NULL) { -+ struct page *p; ++ head = p->compat_robust_list; ++ rcu_read_unlock(); + - pagefault_disable(); -- if (get_user_page_fast_only(virt, 0, &p)) -+ if (get_user_page_fast_only(virt, 0, &p)) { - phys_addr = page_to_phys(p) + virt % PAGE_SIZE; -+ put_page(p); -+ } - pagefault_enable(); - } -- -- if (p) -- put_page(p); - } - - return phys_addr; -@@ -9729,6 +9813,9 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, - continue; - if (event->attr.config != entry->type) - continue; -+ /* Cannot deliver synchronous signal to other task. */ -+ if (event->attr.sigtrap) -+ continue; - if (perf_tp_event_match(event, &data, regs)) - perf_swevent_event(event, count, &data, regs); - } -@@ -10443,8 +10530,11 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr, - } - - /* ready to consume more filters */ -+ kfree(filename); -+ filename = NULL; - state = IF_STATE_ACTION; - filter = NULL; -+ kernel = 0; - } - } - -@@ -11506,6 +11596,9 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, - - event->state = PERF_EVENT_STATE_INACTIVE; - -+ if (parent_event) -+ event->event_caps = parent_event->event_caps; ++ if (put_user(sizeof(*head), len_ptr)) ++ return -EFAULT; ++ return put_user(ptr_to_compat(head), head_ptr); + - if (event->attr.sigtrap) - atomic_set(&event->event_limit, 1); - -@@ -11799,14 +11892,25 @@ err_size: - goto out; - } - -+static void mutex_lock_double(struct mutex *a, struct mutex *b) -+{ -+ if (b < a) -+ swap(a, b); ++err_unlock: ++ rcu_read_unlock(); + -+ mutex_lock(a); -+ mutex_lock_nested(b, SINGLE_DEPTH_NESTING); ++ return ret; +} ++#endif /* CONFIG_COMPAT */ + - static int - perf_event_set_output(struct perf_event *event, struct perf_event *output_event) - { - struct perf_buffer *rb = NULL; - int ret = -EINVAL; - -- if (!output_event) -+ if (!output_event) { -+ mutex_lock(&event->mmap_mutex); - goto set; ++#ifdef CONFIG_COMPAT_32BIT_TIME ++SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, ++ const struct old_timespec32 __user *, utime, u32 __user *, uaddr2, ++ u32, val3) ++{ ++ int ret, cmd = op & FUTEX_CMD_MASK; ++ ktime_t t, *tp = NULL; ++ struct timespec64 ts; ++ ++ if (utime && futex_cmd_has_timeout(cmd)) { ++ if (get_old_timespec32(&ts, utime)) ++ return -EFAULT; ++ ret = futex_init_timeout(cmd, op, &ts, &t); ++ if (ret) ++ return ret; ++ tp = &t; + } - - /* don't allow circular references */ - if (event == output_event) -@@ -11844,8 +11948,15 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event) - event->pmu != output_event->pmu) - goto out; - -+ /* -+ * Hold both mmap_mutex to serialize against perf_mmap_close(). Since -+ * output_event is already on rb->event_list, and the list iteration -+ * restarts after every removal, it is guaranteed this new event is -+ * observed *OR* if output_event is already removed, it's guaranteed we -+ * observe !rb->mmap_count. -+ */ -+ mutex_lock_double(&event->mmap_mutex, &output_event->mmap_mutex); - set: -- mutex_lock(&event->mmap_mutex); - /* Can't redirect output if we've got an active mmap() */ - if (atomic_read(&event->mmap_count)) - goto unlock; -@@ -11855,6 +11966,12 @@ set: - rb = ring_buffer_get(output_event); - if (!rb) - goto unlock; + -+ /* did we race against perf_mmap_close() */ -+ if (!atomic_read(&rb->mmap_count)) { -+ ring_buffer_put(rb); -+ goto unlock; -+ } - } - - ring_buffer_attach(event, rb); -@@ -11862,20 +11979,13 @@ set: - ret = 0; - unlock: - mutex_unlock(&event->mmap_mutex); -+ if (output_event) -+ mutex_unlock(&output_event->mmap_mutex); - - out: - return ret; - } - --static void mutex_lock_double(struct mutex *a, struct mutex *b) --{ -- if (b < a) -- swap(a, b); -- -- mutex_lock(a); -- mutex_lock_nested(b, SINGLE_DEPTH_NESTING); --} -- - static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id) - { - bool nmi_safe = false; -@@ -12190,6 +12300,9 @@ SYSCALL_DEFINE5(perf_event_open, - * Do not allow to attach to a group in a different task - * or CPU context. If we're moving SW events, we'll fix - * this up later, so allow that. -+ * -+ * Racy, not holding group_leader->ctx->mutex, see comment with -+ * perf_event_ctx_lock(). - */ - if (!move_group && group_leader->ctx != ctx) - goto err_context; -@@ -12255,6 +12368,7 @@ SYSCALL_DEFINE5(perf_event_open, - } else { - perf_event_ctx_unlock(group_leader, gctx); - move_group = 0; -+ goto not_move_group; - } - } - -@@ -12271,7 +12385,17 @@ SYSCALL_DEFINE5(perf_event_open, - } - } else { - mutex_lock(&ctx->mutex); ++ return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3); ++} ++#endif /* CONFIG_COMPAT_32BIT_TIME */ + -+ /* -+ * Now that we hold ctx->lock, (re)validate group_leader->ctx == ctx, -+ * see the group_leader && !move_group test earlier. -+ */ -+ if (group_leader && group_leader->ctx != ctx) { -+ err = -EINVAL; -+ goto err_locked; -+ } - } -+not_move_group: - - if (ctx->task == TASK_TOMBSTONE) { - err = -ESRCH; -diff --git a/kernel/events/internal.h b/kernel/events/internal.h -index 228801e207886..aa23ffdaf819f 100644 ---- a/kernel/events/internal.h -+++ b/kernel/events/internal.h -@@ -116,6 +116,11 @@ static inline int page_order(struct perf_buffer *rb) - } - #endif - -+static inline int data_page_nr(struct perf_buffer *rb) ++static void __init futex_detect_cmpxchg(void) +{ -+ return rb->nr_pages << page_order(rb); ++#ifndef CONFIG_HAVE_FUTEX_CMPXCHG ++ u32 curval; ++ ++ /* ++ * This will fail and we want it. Some arch implementations do ++ * runtime detection of the futex_atomic_cmpxchg_inatomic() ++ * functionality. We want to know that before we call in any ++ * of the complex code paths. Also we want to prevent ++ * registration of robust lists in that case. NULL is ++ * guaranteed to fault and we get -EFAULT on functional ++ * implementation, the non-functional ones will return ++ * -ENOSYS. ++ */ ++ if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT) ++ futex_cmpxchg_enabled = 1; ++#endif +} + - static inline unsigned long perf_data_size(struct perf_buffer *rb) - { - return rb->nr_pages << (PAGE_SHIFT + page_order(rb)); -diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c -index 52868716ec358..fb35b926024ca 100644 ---- a/kernel/events/ring_buffer.c -+++ b/kernel/events/ring_buffer.c -@@ -859,11 +859,6 @@ void rb_free(struct perf_buffer *rb) - } - - #else --static int data_page_nr(struct perf_buffer *rb) --{ -- return rb->nr_pages << page_order(rb); --} -- - static struct page * - __perf_mmap_to_page(struct perf_buffer *rb, unsigned long pgoff) - { -diff --git a/kernel/exit.c b/kernel/exit.c -index 91a43e57a32eb..aefe7445508db 100644 ---- a/kernel/exit.c -+++ b/kernel/exit.c -@@ -796,7 +796,7 @@ void __noreturn do_exit(long code) - - #ifdef CONFIG_POSIX_TIMERS - hrtimer_cancel(&tsk->signal->real_timer); -- exit_itimers(tsk->signal); -+ exit_itimers(tsk); - #endif - if (tsk->mm) - setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm); -diff --git a/kernel/fork.c b/kernel/fork.c -index 38681ad44c76b..908ba3c93893f 100644 ---- a/kernel/fork.c -+++ b/kernel/fork.c -@@ -1153,6 +1153,7 @@ void mmput_async(struct mm_struct *mm) - schedule_work(&mm->async_put_work); - } - } -+EXPORT_SYMBOL_GPL(mmput_async); - #endif - - /** -@@ -2055,18 +2056,18 @@ static __latent_entropy struct task_struct *copy_process( - #ifdef CONFIG_PROVE_LOCKING - DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); - #endif -+ retval = copy_creds(p, clone_flags); -+ if (retval < 0) -+ goto bad_fork_free; ++static int __init futex_init(void) ++{ ++ unsigned int futex_shift; ++ unsigned long i; + - retval = -EAGAIN; - if (is_ucounts_overlimit(task_ucounts(p), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) { - if (p->real_cred->user != INIT_USER && - !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) -- goto bad_fork_free; -+ goto bad_fork_cleanup_count; - } - current->flags &= ~PF_NPROC_EXCEEDED; - -- retval = copy_creds(p, clone_flags); -- if (retval < 0) -- goto bad_fork_free; -- - /* - * If multiple threads are within copy_process(), then this check - * triggers too late. This doesn't hurt, the check is only there -@@ -2280,6 +2281,7 @@ static __latent_entropy struct task_struct *copy_process( - p->pdeath_signal = 0; - INIT_LIST_HEAD(&p->thread_group); - p->task_works = NULL; -+ clear_posix_cputimers_work(p); - - #ifdef CONFIG_KRETPROBES - p->kretprobe_instances.first = NULL; -@@ -2295,6 +2297,17 @@ static __latent_entropy struct task_struct *copy_process( - if (retval) - goto bad_fork_put_pidfd; ++#if CONFIG_BASE_SMALL ++ futex_hashsize = 16; ++#else ++ futex_hashsize = roundup_pow_of_two(256 * num_possible_cpus()); ++#endif ++ ++ futex_queues = alloc_large_system_hash("futex", sizeof(*futex_queues), ++ futex_hashsize, 0, ++ futex_hashsize < 256 ? HASH_SMALL : 0, ++ &futex_shift, NULL, ++ futex_hashsize, futex_hashsize); ++ futex_hashsize = 1UL << futex_shift; ++ ++ futex_detect_cmpxchg(); ++ ++ for (i = 0; i < futex_hashsize; i++) { ++ atomic_set(&futex_queues[i].waiters, 0); ++ plist_head_init(&futex_queues[i].chain); ++ spin_lock_init(&futex_queues[i].lock); ++ } ++ ++ return 0; ++} ++core_initcall(futex_init); +diff --git a/kernel/gcov/clang.c b/kernel/gcov/clang.c +index cbb0bed958abd..7670a811a5657 100644 +--- a/kernel/gcov/clang.c ++++ b/kernel/gcov/clang.c +@@ -280,6 +280,8 @@ void gcov_info_add(struct gcov_info *dst, struct gcov_info *src) -+ /* -+ * Now that the cgroups are pinned, re-clone the parent cgroup and put -+ * the new task on the correct runqueue. All this *before* the task -+ * becomes visible. -+ * -+ * This isn't part of ->can_fork() because while the re-cloning is -+ * cgroup specific, it unconditionally needs to place the task on a -+ * runqueue. -+ */ -+ sched_cgroup_fork(p, args); + for (i = 0; i < sfn_ptr->num_counters; i++) + dfn_ptr->counters[i] += sfn_ptr->counters[i]; + - /* - * From this point on we must avoid any synchronous user-space - * communication until we take the tasklist-lock. In particular, we do -@@ -2352,10 +2365,6 @@ static __latent_entropy struct task_struct *copy_process( - goto bad_fork_cancel_cgroup; ++ sfn_ptr = list_next_entry(sfn_ptr, head); } + } -- /* past the last point of failure */ -- if (pidfile) -- fd_install(pidfd, pidfile); -- - init_task_pid_links(p); - if (likely(p->pid)) { - ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); -@@ -2404,6 +2413,9 @@ static __latent_entropy struct task_struct *copy_process( - syscall_tracepoint_update(p); - write_unlock_irq(&tasklist_lock); - -+ if (pidfile) -+ fd_install(pidfd, pidfile); -+ - proc_fork_connector(p); - sched_post_fork(p); - cgroup_post_fork(p, args); diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c -index 460c12b7dfea2..7971e989e425b 100644 +index 460c12b7dfea2..74a4ef1da9ad7 100644 --- a/kernel/gcov/gcc_4_7.c +++ b/kernel/gcov/gcc_4_7.c @@ -30,6 +30,13 @@ @@ -356874,7 +462455,26 @@ index 460c12b7dfea2..7971e989e425b 100644 static struct gcov_info *gcov_info_head; /** -@@ -383,12 +390,18 @@ size_t convert_to_gcda(char *buffer, struct gcov_info *info) +@@ -75,6 +82,7 @@ struct gcov_fn_info { + * @version: gcov version magic indicating the gcc version used for compilation + * @next: list head for a singly-linked list + * @stamp: uniquifying time stamp ++ * @checksum: unique object checksum + * @filename: name of the associated gcov data file + * @merge: merge functions (null for unused counter type) + * @n_functions: number of instrumented functions +@@ -87,6 +95,10 @@ struct gcov_info { + unsigned int version; + struct gcov_info *next; + unsigned int stamp; ++ /* Since GCC 12.1 a checksum field is added. */ ++#if (__GNUC__ >= 12) ++ unsigned int checksum; ++#endif + const char *filename; + void (*merge[GCOV_COUNTERS])(gcov_type *, unsigned int); + unsigned int n_functions; +@@ -383,12 +395,18 @@ size_t convert_to_gcda(char *buffer, struct gcov_info *info) pos += store_gcov_u32(buffer, pos, info->version); pos += store_gcov_u32(buffer, pos, info->stamp); @@ -356894,7 +462494,7 @@ index 460c12b7dfea2..7971e989e425b 100644 pos += store_gcov_u32(buffer, pos, fi_ptr->ident); pos += store_gcov_u32(buffer, pos, fi_ptr->lineno_checksum); pos += store_gcov_u32(buffer, pos, fi_ptr->cfg_checksum); -@@ -402,7 +415,8 @@ size_t convert_to_gcda(char *buffer, struct gcov_info *info) +@@ -402,7 +420,8 @@ size_t convert_to_gcda(char *buffer, struct gcov_info *info) /* Counter record. */ pos += store_gcov_u32(buffer, pos, GCOV_TAG_FOR_COUNTER(ct_idx)); @@ -356960,7 +462560,7 @@ index 221d80c31e94c..fca637d4da1a7 100644 if (!irq_settings_no_debug(desc)) note_interrupt(desc, retval); diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h -index 54363527feea4..e58342ace11f2 100644 +index 54363527feea4..f1d83a8b44171 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -29,12 +29,14 @@ extern struct irqaction chained_action; @@ -356978,11 +462578,59 @@ index 54363527feea4..e58342ace11f2 100644 }; /* +@@ -50,6 +52,7 @@ enum { + * IRQS_PENDING - irq is pending and replayed later + * IRQS_SUSPENDED - irq is suspended + * IRQS_NMI - irq line is used to deliver NMIs ++ * IRQS_SYSFS - descriptor has been added to sysfs + */ + enum { + IRQS_AUTODETECT = 0x00000001, +@@ -62,6 +65,7 @@ enum { + IRQS_SUSPENDED = 0x00000800, + IRQS_TIMINGS = 0x00001000, + IRQS_NMI = 0x00002000, ++ IRQS_SYSFS = 0x00004000, + }; + + #include "debug.h" diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c -index 4e3c29bb603c3..21b3ac2a29d20 100644 +index 4e3c29bb603c3..7a45fd5932454 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c -@@ -407,6 +407,7 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags, +@@ -288,22 +288,25 @@ static void irq_sysfs_add(int irq, struct irq_desc *desc) + if (irq_kobj_base) { + /* + * Continue even in case of failure as this is nothing +- * crucial. ++ * crucial and failures in the late irq_sysfs_init() ++ * cannot be rolled back. + */ + if (kobject_add(&desc->kobj, irq_kobj_base, "%d", irq)) + pr_warn("Failed to add kobject for irq %d\n", irq); ++ else ++ desc->istate |= IRQS_SYSFS; + } + } + + static void irq_sysfs_del(struct irq_desc *desc) + { + /* +- * If irq_sysfs_init() has not yet been invoked (early boot), then +- * irq_kobj_base is NULL and the descriptor was never added. +- * kobject_del() complains about a object with no parent, so make +- * it conditional. ++ * Only invoke kobject_del() when kobject_add() was successfully ++ * invoked for the descriptor. This covers both early boot, where ++ * sysfs is not initialized yet, and the case of a failed ++ * kobject_add() invocation. + */ +- if (irq_kobj_base) ++ if (desc->istate & IRQS_SYSFS) + kobject_del(&desc->kobj); + } + +@@ -407,6 +410,7 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags, lockdep_set_class(&desc->lock, &irq_desc_lock_class); mutex_init(&desc->request_mutex); init_rcu_head(&desc->rcu); @@ -356990,7 +462638,7 @@ index 4e3c29bb603c3..21b3ac2a29d20 100644 desc_set_defaults(irq, desc, node, affinity, owner); irqd_set(&desc->irq_data, flags); -@@ -575,6 +576,7 @@ int __init early_irq_init(void) +@@ -575,6 +579,7 @@ int __init early_irq_init(void) raw_spin_lock_init(&desc[i].lock); lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); mutex_init(&desc[i].request_mutex); @@ -356999,7 +462647,7 @@ index 4e3c29bb603c3..21b3ac2a29d20 100644 } return arch_early_irq_init(); diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c -index 4d8fc65cf38f4..035e3038c4de4 100644 +index 4d8fc65cf38f4..b1e6ca98d0af4 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -910,6 +910,8 @@ struct irq_desc *__irq_resolve_mapping(struct irq_domain *domain, @@ -357011,11 +462659,80 @@ index 4d8fc65cf38f4..035e3038c4de4 100644 } return desc; +@@ -1911,7 +1913,7 @@ static void debugfs_add_domain_dir(struct irq_domain *d) + + static void debugfs_remove_domain_dir(struct irq_domain *d) + { +- debugfs_remove(debugfs_lookup(d->name, domain_dir)); ++ debugfs_lookup_and_remove(d->name, domain_dir); + } + + void __init irq_domain_debugfs_init(struct dentry *root) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c -index 27667e82ecc91..0c3c26fb054f7 100644 +index 27667e82ecc91..9862372e0f011 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c -@@ -1248,6 +1248,31 @@ static void irq_wake_secondary(struct irq_desc *desc, struct irqaction *action) +@@ -222,11 +222,16 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, + { + struct irq_desc *desc = irq_data_to_desc(data); + struct irq_chip *chip = irq_data_get_irq_chip(data); ++ const struct cpumask *prog_mask; + int ret; + ++ static DEFINE_RAW_SPINLOCK(tmp_mask_lock); ++ static struct cpumask tmp_mask; ++ + if (!chip || !chip->irq_set_affinity) + return -EINVAL; + ++ raw_spin_lock(&tmp_mask_lock); + /* + * If this is a managed interrupt and housekeeping is enabled on + * it check whether the requested affinity mask intersects with +@@ -248,24 +253,34 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, + */ + if (irqd_affinity_is_managed(data) && + housekeeping_enabled(HK_FLAG_MANAGED_IRQ)) { +- const struct cpumask *hk_mask, *prog_mask; +- +- static DEFINE_RAW_SPINLOCK(tmp_mask_lock); +- static struct cpumask tmp_mask; ++ const struct cpumask *hk_mask; + + hk_mask = housekeeping_cpumask(HK_FLAG_MANAGED_IRQ); + +- raw_spin_lock(&tmp_mask_lock); + cpumask_and(&tmp_mask, mask, hk_mask); + if (!cpumask_intersects(&tmp_mask, cpu_online_mask)) + prog_mask = mask; + else + prog_mask = &tmp_mask; +- ret = chip->irq_set_affinity(data, prog_mask, force); +- raw_spin_unlock(&tmp_mask_lock); + } else { +- ret = chip->irq_set_affinity(data, mask, force); ++ prog_mask = mask; + } ++ ++ /* ++ * Make sure we only provide online CPUs to the irqchip, ++ * unless we are being asked to force the affinity (in which ++ * case we do as we are told). ++ */ ++ cpumask_and(&tmp_mask, prog_mask, cpu_online_mask); ++ if (!force && !cpumask_empty(&tmp_mask)) ++ ret = chip->irq_set_affinity(data, &tmp_mask, force); ++ else if (force) ++ ret = chip->irq_set_affinity(data, mask, force); ++ else ++ ret = -EINVAL; ++ ++ raw_spin_unlock(&tmp_mask_lock); ++ + switch (ret) { + case IRQ_SET_MASK_OK: + case IRQ_SET_MASK_OK_DONE: +@@ -1248,6 +1263,31 @@ static void irq_wake_secondary(struct irq_desc *desc, struct irqaction *action) raw_spin_unlock_irq(&desc->lock); } @@ -357047,7 +462764,7 @@ index 27667e82ecc91..0c3c26fb054f7 100644 /* * Interrupt handler thread */ -@@ -1259,6 +1284,8 @@ static int irq_thread(void *data) +@@ -1259,6 +1299,8 @@ static int irq_thread(void *data) irqreturn_t (*handler_fn)(struct irq_desc *desc, struct irqaction *action); @@ -357056,7 +462773,7 @@ index 27667e82ecc91..0c3c26fb054f7 100644 if (force_irqthreads() && test_bit(IRQTF_FORCED_THREAD, &action->thread_flags)) handler_fn = irq_forced_thread_fn; -@@ -1683,8 +1710,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) +@@ -1683,8 +1725,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) } if (!shared) { @@ -357065,7 +462782,7 @@ index 27667e82ecc91..0c3c26fb054f7 100644 /* Setup the type (level, edge polarity) if configured: */ if (new->flags & IRQF_TRIGGER_MASK) { ret = __irq_set_trigger(desc, -@@ -1780,14 +1805,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) +@@ -1780,14 +1820,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) irq_setup_timings(desc, new); @@ -357083,7 +462800,7 @@ index 27667e82ecc91..0c3c26fb054f7 100644 register_irq_proc(irq, desc); new->dir = NULL; diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c -index 6a5ecee6e5674..7f350ae59c5fd 100644 +index 6a5ecee6e5674..d75586dc584f8 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -529,10 +529,10 @@ static bool msi_check_reservation_mode(struct irq_domain *domain, @@ -357099,6 +462816,135 @@ index 6a5ecee6e5674..7f350ae59c5fd 100644 } int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, +@@ -596,6 +596,13 @@ int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, + irqd_clr_can_reserve(irq_data); + if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK) + irqd_set_msi_nomask_quirk(irq_data); ++ if ((info->flags & MSI_FLAG_ACTIVATE_EARLY) && ++ irqd_affinity_is_managed(irq_data) && ++ !cpumask_intersects(irq_data_get_affinity_mask(irq_data), ++ cpu_online_mask)) { ++ irqd_set_managed_shutdown(irq_data); ++ continue; ++ } + } + ret = irq_domain_activate_irq(irq_data, can_reserve); + if (ret) +diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c +index 76e67d1e02d48..526510b3791ed 100644 +--- a/kernel/kcsan/core.c ++++ b/kernel/kcsan/core.c +@@ -14,10 +14,12 @@ + #include <linux/init.h> + #include <linux/kernel.h> + #include <linux/list.h> ++#include <linux/minmax.h> + #include <linux/moduleparam.h> + #include <linux/percpu.h> + #include <linux/preempt.h> + #include <linux/sched.h> ++#include <linux/string.h> + #include <linux/uaccess.h> + + #include "encoding.h" +@@ -1060,3 +1062,51 @@ EXPORT_SYMBOL(__tsan_atomic_thread_fence); + void __tsan_atomic_signal_fence(int memorder); + void __tsan_atomic_signal_fence(int memorder) { } + EXPORT_SYMBOL(__tsan_atomic_signal_fence); ++ ++#ifdef __HAVE_ARCH_MEMSET ++void *__tsan_memset(void *s, int c, size_t count); ++noinline void *__tsan_memset(void *s, int c, size_t count) ++{ ++ /* ++ * Instead of not setting up watchpoints where accessed size is greater ++ * than MAX_ENCODABLE_SIZE, truncate checked size to MAX_ENCODABLE_SIZE. ++ */ ++ size_t check_len = min_t(size_t, count, MAX_ENCODABLE_SIZE); ++ ++ check_access(s, check_len, KCSAN_ACCESS_WRITE); ++ return memset(s, c, count); ++} ++#else ++void *__tsan_memset(void *s, int c, size_t count) __alias(memset); ++#endif ++EXPORT_SYMBOL(__tsan_memset); ++ ++#ifdef __HAVE_ARCH_MEMMOVE ++void *__tsan_memmove(void *dst, const void *src, size_t len); ++noinline void *__tsan_memmove(void *dst, const void *src, size_t len) ++{ ++ size_t check_len = min_t(size_t, len, MAX_ENCODABLE_SIZE); ++ ++ check_access(dst, check_len, KCSAN_ACCESS_WRITE); ++ check_access(src, check_len, 0); ++ return memmove(dst, src, len); ++} ++#else ++void *__tsan_memmove(void *dst, const void *src, size_t len) __alias(memmove); ++#endif ++EXPORT_SYMBOL(__tsan_memmove); ++ ++#ifdef __HAVE_ARCH_MEMCPY ++void *__tsan_memcpy(void *dst, const void *src, size_t len); ++noinline void *__tsan_memcpy(void *dst, const void *src, size_t len) ++{ ++ size_t check_len = min_t(size_t, len, MAX_ENCODABLE_SIZE); ++ ++ check_access(dst, check_len, KCSAN_ACCESS_WRITE); ++ check_access(src, check_len, 0); ++ return memcpy(dst, src, len); ++} ++#else ++void *__tsan_memcpy(void *dst, const void *src, size_t len) __alias(memcpy); ++#endif ++EXPORT_SYMBOL(__tsan_memcpy); +diff --git a/kernel/kcsan/kcsan_test.c b/kernel/kcsan/kcsan_test.c +index dc55fd5a36fcc..8b176aeab91b6 100644 +--- a/kernel/kcsan/kcsan_test.c ++++ b/kernel/kcsan/kcsan_test.c +@@ -151,7 +151,7 @@ static bool report_matches(const struct expect_report *r) + const bool is_assert = (r->access[0].type | r->access[1].type) & KCSAN_ACCESS_ASSERT; + bool ret = false; + unsigned long flags; +- typeof(observed.lines) expect; ++ typeof(*observed.lines) *expect; + const char *end; + char *cur; + int i; +@@ -160,6 +160,10 @@ static bool report_matches(const struct expect_report *r) + if (!report_available()) + return false; + ++ expect = kmalloc(sizeof(observed.lines), GFP_KERNEL); ++ if (WARN_ON(!expect)) ++ return false; ++ + /* Generate expected report contents. */ + + /* Title */ +@@ -243,6 +247,7 @@ static bool report_matches(const struct expect_report *r) + strstr(observed.lines[2], expect[1]))); + out: + spin_unlock_irqrestore(&observed.lock, flags); ++ kfree(expect); + return ret; + } + +diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c +index 21137929d4283..b88d5d5f29e48 100644 +--- a/kernel/kcsan/report.c ++++ b/kernel/kcsan/report.c +@@ -432,8 +432,7 @@ static void print_report(enum kcsan_value_change value_change, + dump_stack_print_info(KERN_DEFAULT); + pr_err("==================================================================\n"); + +- if (panic_on_warn) +- panic("panic_on_warn set ...\n"); ++ check_panic_on_warn("KCSAN"); + } + + static void release_report(unsigned long *flags, struct other_info *other_info) diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 33400ff051a84..f7a4fd4d243f4 100644 --- a/kernel/kexec_file.c @@ -357170,10 +463016,85 @@ index 33400ff051a84..f7a4fd4d243f4 100644 return ret; } diff --git a/kernel/kprobes.c b/kernel/kprobes.c -index 790a573bbe00c..9df585b9467e4 100644 +index 790a573bbe00c..8818f3a89fef3 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c -@@ -1559,7 +1559,9 @@ static int check_kprobe_address_safe(struct kprobe *p, +@@ -18,6 +18,9 @@ + * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi + * <prasanna@in.ibm.com> added function-return probes. + */ ++ ++#define pr_fmt(fmt) "kprobes: " fmt ++ + #include <linux/kprobes.h> + #include <linux/hash.h> + #include <linux/init.h> +@@ -892,7 +895,7 @@ static void optimize_all_kprobes(void) + optimize_kprobe(p); + } + cpus_read_unlock(); +- printk(KERN_INFO "Kprobes globally optimized\n"); ++ pr_info("kprobe jump-optimization is enabled. All kprobes are optimized if possible.\n"); + out: + mutex_unlock(&kprobe_mutex); + } +@@ -925,7 +928,7 @@ static void unoptimize_all_kprobes(void) + + /* Wait for unoptimizing completion */ + wait_for_kprobe_optimizer(); +- printk(KERN_INFO "Kprobes globally unoptimized\n"); ++ pr_info("kprobe jump-optimization is disabled. All kprobes are based on software breakpoint.\n"); + } + + static DEFINE_MUTEX(kprobe_sysctl_mutex); +@@ -1003,7 +1006,7 @@ static int reuse_unused_kprobe(struct kprobe *ap) + * unregistered. + * Thus there should be no chance to reuse unused kprobe. + */ +- printk(KERN_ERR "Error: There should be no unused kprobe here.\n"); ++ WARN_ON_ONCE(1); + return -EINVAL; + } + +@@ -1049,18 +1052,13 @@ static int __arm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops, + int ret = 0; + + ret = ftrace_set_filter_ip(ops, (unsigned long)p->addr, 0, 0); +- if (ret) { +- pr_debug("Failed to arm kprobe-ftrace at %pS (%d)\n", +- p->addr, ret); ++ if (WARN_ONCE(ret < 0, "Failed to arm kprobe-ftrace at %pS (error %d)\n", p->addr, ret)) + return ret; +- } + + if (*cnt == 0) { + ret = register_ftrace_function(ops); +- if (ret) { +- pr_debug("Failed to init kprobe-ftrace (%d)\n", ret); ++ if (WARN(ret < 0, "Failed to register kprobe-ftrace (error %d)\n", ret)) + goto err_ftrace; +- } + } + + (*cnt)++; +@@ -1092,14 +1090,14 @@ static int __disarm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops, + + if (*cnt == 1) { + ret = unregister_ftrace_function(ops); +- if (WARN(ret < 0, "Failed to unregister kprobe-ftrace (%d)\n", ret)) ++ if (WARN(ret < 0, "Failed to unregister kprobe-ftrace (error %d)\n", ret)) + return ret; + } + + (*cnt)--; + + ret = ftrace_set_filter_ip(ops, (unsigned long)p->addr, 1, 0); +- WARN_ONCE(ret < 0, "Failed to disarm kprobe-ftrace at %pS (%d)\n", ++ WARN_ONCE(ret < 0, "Failed to disarm kprobe-ftrace at %pS (error %d)\n", + p->addr, ret); + return ret; + } +@@ -1559,7 +1557,9 @@ static int check_kprobe_address_safe(struct kprobe *p, preempt_disable(); /* Ensure it is not in reserved area nor out of text */ @@ -357184,7 +463105,7 @@ index 790a573bbe00c..9df585b9467e4 100644 within_kprobe_blacklist((unsigned long) p->addr) || jump_label_text_reserved(p->addr, p->addr) || static_call_text_reserved(p->addr, p->addr) || -@@ -1704,11 +1706,12 @@ static struct kprobe *__disable_kprobe(struct kprobe *p) +@@ -1704,11 +1704,12 @@ static struct kprobe *__disable_kprobe(struct kprobe *p) /* Try to disarm and disable this/parent probe */ if (p == orig_p || aggr_kprobe_disabled(orig_p)) { /* @@ -357201,7 +463122,31 @@ index 790a573bbe00c..9df585b9467e4 100644 ret = disarm_kprobe(orig_p, true); if (ret) { p->flags &= ~KPROBE_FLAG_DISABLED; -@@ -2006,6 +2009,9 @@ int register_kretprobe(struct kretprobe *rp) +@@ -1757,7 +1758,13 @@ static int __unregister_kprobe_top(struct kprobe *p) + if ((list_p != p) && (list_p->post_handler)) + goto noclean; + } +- ap->post_handler = NULL; ++ /* ++ * For the kprobe-on-ftrace case, we keep the ++ * post_handler setting to identify this aggrprobe ++ * armed with kprobe_ipmodify_ops. ++ */ ++ if (!kprobe_ftrace(ap)) ++ ap->post_handler = NULL; + } + noclean: + /* +@@ -1885,7 +1892,7 @@ unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs, + + node = node->next; + } +- pr_err("Oops! Kretprobe fails to find correct return address.\n"); ++ pr_err("kretprobe: Return address not found, not execute handler. Maybe there is a bug in the kernel.\n"); + BUG_ON(1); + + found: +@@ -2006,6 +2013,9 @@ int register_kretprobe(struct kretprobe *rp) } } @@ -357211,7 +463156,67 @@ index 790a573bbe00c..9df585b9467e4 100644 rp->kp.pre_handler = pre_handler_kretprobe; rp->kp.post_handler = NULL; -@@ -2809,13 +2815,12 @@ static const struct file_operations fops_kp = { +@@ -2202,8 +2212,11 @@ int enable_kprobe(struct kprobe *kp) + if (!kprobes_all_disarmed && kprobe_disabled(p)) { + p->flags &= ~KPROBE_FLAG_DISABLED; + ret = arm_kprobe(p); +- if (ret) ++ if (ret) { + p->flags |= KPROBE_FLAG_DISABLED; ++ if (p != kp) ++ kp->flags |= KPROBE_FLAG_DISABLED; ++ } + } + out: + mutex_unlock(&kprobe_mutex); +@@ -2214,8 +2227,7 @@ EXPORT_SYMBOL_GPL(enable_kprobe); + /* Caller must NOT call this in usual path. This is only for critical case */ + void dump_kprobe(struct kprobe *kp) + { +- pr_err("Dumping kprobe:\n"); +- pr_err("Name: %s\nOffset: %x\nAddress: %pS\n", ++ pr_err("Dump kprobe:\n.symbol_name = %s, .offset = %x, .addr = %pS\n", + kp->symbol_name, kp->offset, kp->addr); + } + NOKPROBE_SYMBOL(dump_kprobe); +@@ -2478,8 +2490,7 @@ static int __init init_kprobes(void) + err = populate_kprobe_blacklist(__start_kprobe_blacklist, + __stop_kprobe_blacklist); + if (err) { +- pr_err("kprobes: failed to populate blacklist: %d\n", err); +- pr_err("Please take care of using kprobes.\n"); ++ pr_err("Failed to populate blacklist (error %d), kprobes not restricted, be careful using them!\n", err); + } + + if (kretprobe_blacklist_size) { +@@ -2488,7 +2499,7 @@ static int __init init_kprobes(void) + kretprobe_blacklist[i].addr = + kprobe_lookup_name(kretprobe_blacklist[i].name, 0); + if (!kretprobe_blacklist[i].addr) +- printk("kretprobe: lookup failed: %s\n", ++ pr_err("Failed to lookup symbol '%s' for kretprobe blacklist. Maybe the target function is removed or renamed.\n", + kretprobe_blacklist[i].name); + } + } +@@ -2692,7 +2703,7 @@ static int arm_all_kprobes(void) + } + + if (errors) +- pr_warn("Kprobes globally enabled, but failed to arm %d out of %d probes\n", ++ pr_warn("Kprobes globally enabled, but failed to enable %d out of %d probes. Please check which kprobes are kept disabled via debugfs.\n", + errors, total); + else + pr_info("Kprobes globally enabled\n"); +@@ -2735,7 +2746,7 @@ static int disarm_all_kprobes(void) + } + + if (errors) +- pr_warn("Kprobes globally disabled, but failed to disarm %d out of %d probes\n", ++ pr_warn("Kprobes globally disabled, but failed to disable %d out of %d probes. Please check which kprobes are kept enabled via debugfs.\n", + errors, total); + else + pr_info("Kprobes globally disabled\n"); +@@ -2809,13 +2820,12 @@ static const struct file_operations fops_kp = { static int __init debugfs_kprobe_init(void) { struct dentry *dir; @@ -357657,10 +463662,22 @@ index b8d9a050c337a..15fdc7fa5c688 100644 return count; } diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c -index 6bb116c559b4a..ea5a701ab2408 100644 +index 6bb116c559b4a..fcd9ad3f7f2e5 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c -@@ -1373,7 +1373,7 @@ static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock, +@@ -855,8 +855,9 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, + * then we need to wake the new top waiter up to try + * to get the lock. + */ +- if (prerequeue_top_waiter != rt_mutex_top_waiter(lock)) +- wake_up_state(waiter->task, waiter->wake_state); ++ top_waiter = rt_mutex_top_waiter(lock); ++ if (prerequeue_top_waiter != top_waiter) ++ wake_up_state(top_waiter->task, top_waiter->wake_state); + raw_spin_unlock_irq(&lock->wait_lock); + return 0; + } +@@ -1373,7 +1374,7 @@ static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock, * - the VCPU on which owner runs is preempted */ if (!owner->on_cpu || need_resched() || @@ -358091,7 +464108,7 @@ index 000e8d5a28841..4cc73e6f8974b 100644 } diff --git a/kernel/module.c b/kernel/module.c -index 5c26a76e800b5..ef79f4dbda876 100644 +index 5c26a76e800b5..8a1766c69c6ec 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2967,14 +2967,29 @@ static int elf_validity_check(struct load_info *info) @@ -358223,7 +464240,17 @@ index 5c26a76e800b5..ef79f4dbda876 100644 } #define COPY_CHUNK_SIZE (16*PAGE_SIZE) -@@ -3683,12 +3722,6 @@ static noinline int do_init_module(struct module *mod) +@@ -3626,7 +3665,8 @@ static bool finished_loading(const char *name) + sched_annotate_sleep(); + mutex_lock(&module_mutex); + mod = find_module_all(name, strlen(name), true); +- ret = !mod || mod->state == MODULE_STATE_LIVE; ++ ret = !mod || mod->state == MODULE_STATE_LIVE ++ || mod->state == MODULE_STATE_GOING; + mutex_unlock(&module_mutex); + + return ret; +@@ -3683,12 +3723,6 @@ static noinline int do_init_module(struct module *mod) } freeinit->module_init = mod->init_layout.base; @@ -358236,7 +464263,7 @@ index 5c26a76e800b5..ef79f4dbda876 100644 do_mod_ctors(mod); /* Start the module */ if (mod->init != NULL) -@@ -3714,22 +3747,13 @@ static noinline int do_init_module(struct module *mod) +@@ -3714,22 +3748,13 @@ static noinline int do_init_module(struct module *mod) /* * We need to finish all async code before the module init sequence @@ -358264,7 +464291,47 @@ index 5c26a76e800b5..ef79f4dbda876 100644 async_synchronize_full(); ftrace_free_mem(mod, mod->init_layout.base, mod->init_layout.base + -@@ -3940,10 +3964,8 @@ static int load_module(struct load_info *info, const char __user *uargs, +@@ -3811,20 +3836,35 @@ static int add_unformed_module(struct module *mod) + + mod->state = MODULE_STATE_UNFORMED; + +-again: + mutex_lock(&module_mutex); + old = find_module_all(mod->name, strlen(mod->name), true); + if (old != NULL) { +- if (old->state != MODULE_STATE_LIVE) { ++ if (old->state == MODULE_STATE_COMING ++ || old->state == MODULE_STATE_UNFORMED) { + /* Wait in case it fails to load. */ + mutex_unlock(&module_mutex); + err = wait_event_interruptible(module_wq, + finished_loading(mod->name)); + if (err) + goto out_unlocked; +- goto again; ++ ++ /* The module might have gone in the meantime. */ ++ mutex_lock(&module_mutex); ++ old = find_module_all(mod->name, strlen(mod->name), ++ true); + } +- err = -EEXIST; ++ ++ /* ++ * We are here only when the same module was being loaded. Do ++ * not try to load it again right now. It prevents long delays ++ * caused by serialized module load failures. It might happen ++ * when more devices of the same type trigger load of ++ * a particular module. ++ */ ++ if (old && old->state == MODULE_STATE_LIVE) ++ err = -EEXIST; ++ else ++ err = -EBUSY; + goto out; + } + mod_update_bounds(mod); +@@ -3940,10 +3980,8 @@ static int load_module(struct load_info *info, const char __user *uargs, * sections. */ err = elf_validity_check(info); @@ -358276,6 +464343,192 @@ index 5c26a76e800b5..ef79f4dbda876 100644 /* * Everything checks out, so set up the section info +diff --git a/kernel/padata.c b/kernel/padata.c +index 18d3a5c699d84..c17f772cc315a 100644 +--- a/kernel/padata.c ++++ b/kernel/padata.c +@@ -207,14 +207,16 @@ int padata_do_parallel(struct padata_shell *ps, + pw = padata_work_alloc(); + spin_unlock(&padata_works_lock); + ++ if (!pw) { ++ /* Maximum works limit exceeded, run in the current task. */ ++ padata->parallel(padata); ++ } ++ + rcu_read_unlock_bh(); + + if (pw) { + padata_work_init(pw, padata_parallel_worker, padata, 0); + queue_work(pinst->parallel_wq, &pw->pw_work); +- } else { +- /* Maximum works limit exceeded, run in the current task. */ +- padata->parallel(padata); + } + + return 0; +@@ -388,13 +390,16 @@ void padata_do_serial(struct padata_priv *padata) + int hashed_cpu = padata_cpu_hash(pd, padata->seq_nr); + struct padata_list *reorder = per_cpu_ptr(pd->reorder_list, hashed_cpu); + struct padata_priv *cur; ++ struct list_head *pos; + + spin_lock(&reorder->lock); + /* Sort in ascending order of sequence number. */ +- list_for_each_entry_reverse(cur, &reorder->list, list) ++ list_for_each_prev(pos, &reorder->list) { ++ cur = list_entry(pos, struct padata_priv, list); + if (cur->seq_nr < padata->seq_nr) + break; +- list_add(&padata->list, &cur->list); ++ } ++ list_add(&padata->list, pos); + spin_unlock(&reorder->lock); + + /* +diff --git a/kernel/panic.c b/kernel/panic.c +index cefd7d82366fb..47933d4c769b6 100644 +--- a/kernel/panic.c ++++ b/kernel/panic.c +@@ -32,6 +32,7 @@ + #include <linux/bug.h> + #include <linux/ratelimit.h> + #include <linux/debugfs.h> ++#include <linux/sysfs.h> + #include <asm/sections.h> + + #define PANIC_TIMER_STEP 100 +@@ -42,7 +43,9 @@ + * Should we dump all CPUs backtraces in an oops event? + * Defaults to 0, can be changed via sysctl. + */ +-unsigned int __read_mostly sysctl_oops_all_cpu_backtrace; ++static unsigned int __read_mostly sysctl_oops_all_cpu_backtrace; ++#else ++#define sysctl_oops_all_cpu_backtrace 0 + #endif /* CONFIG_SMP */ + + int panic_on_oops = CONFIG_PANIC_ON_OOPS_VALUE; +@@ -55,6 +58,7 @@ bool crash_kexec_post_notifiers; + int panic_on_warn __read_mostly; + unsigned long panic_on_taint; + bool panic_on_taint_nousertaint = false; ++static unsigned int warn_limit __read_mostly; + + int panic_timeout = CONFIG_PANIC_TIMEOUT; + EXPORT_SYMBOL_GPL(panic_timeout); +@@ -71,6 +75,56 @@ ATOMIC_NOTIFIER_HEAD(panic_notifier_list); + + EXPORT_SYMBOL(panic_notifier_list); + ++#ifdef CONFIG_SYSCTL ++static struct ctl_table kern_panic_table[] = { ++#ifdef CONFIG_SMP ++ { ++ .procname = "oops_all_cpu_backtrace", ++ .data = &sysctl_oops_all_cpu_backtrace, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = proc_dointvec_minmax, ++ .extra1 = SYSCTL_ZERO, ++ .extra2 = SYSCTL_ONE, ++ }, ++#endif ++ { ++ .procname = "warn_limit", ++ .data = &warn_limit, ++ .maxlen = sizeof(warn_limit), ++ .mode = 0644, ++ .proc_handler = proc_douintvec, ++ }, ++ { } ++}; ++ ++static __init int kernel_panic_sysctls_init(void) ++{ ++ register_sysctl_init("kernel", kern_panic_table); ++ return 0; ++} ++late_initcall(kernel_panic_sysctls_init); ++#endif ++ ++static atomic_t warn_count = ATOMIC_INIT(0); ++ ++#ifdef CONFIG_SYSFS ++static ssize_t warn_count_show(struct kobject *kobj, struct kobj_attribute *attr, ++ char *page) ++{ ++ return sysfs_emit(page, "%d\n", atomic_read(&warn_count)); ++} ++ ++static struct kobj_attribute warn_count_attr = __ATTR_RO(warn_count); ++ ++static __init int kernel_panic_sysfs_init(void) ++{ ++ sysfs_add_file_to_group(kernel_kobj, &warn_count_attr.attr, NULL); ++ return 0; ++} ++late_initcall(kernel_panic_sysfs_init); ++#endif ++ + static long no_blink(int state) + { + return 0; +@@ -167,6 +221,19 @@ static void panic_print_sys_info(void) + ftrace_dump(DUMP_ALL); + } + ++void check_panic_on_warn(const char *origin) ++{ ++ unsigned int limit; ++ ++ if (panic_on_warn) ++ panic("%s: panic_on_warn set ...\n", origin); ++ ++ limit = READ_ONCE(warn_limit); ++ if (atomic_inc_return(&warn_count) >= limit && limit) ++ panic("%s: system warned too often (kernel.warn_limit is %d)", ++ origin, limit); ++} ++ + /** + * panic - halt the system + * @fmt: The text string to print +@@ -184,6 +251,16 @@ void panic(const char *fmt, ...) + int old_cpu, this_cpu; + bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers; + ++ if (panic_on_warn) { ++ /* ++ * This thread may hit another WARN() in the panic path. ++ * Resetting this prevents additional WARN() from panicking the ++ * system on this thread. Other threads are blocked by the ++ * panic_mutex in panic(). ++ */ ++ panic_on_warn = 0; ++ } ++ + /* + * Disable local interrupts. This will prevent panic_smp_self_stop + * from deadlocking the first cpu that invokes the panic, since +@@ -592,16 +669,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint, + if (regs) + show_regs(regs); + +- if (panic_on_warn) { +- /* +- * This thread may hit another WARN() in the panic path. +- * Resetting this prevents additional WARN() from panicking the +- * system on this thread. Other threads are blocked by the +- * panic_mutex in panic(). +- */ +- panic_on_warn = 0; +- panic("panic_on_warn set ...\n"); +- } ++ check_panic_on_warn("kernel"); + + if (!regs) + dump_stack(); diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index a332ccd829e24..97e62469a6b32 100644 --- a/kernel/power/energy_model.c @@ -358326,9 +464579,18 @@ index a332ccd829e24..97e62469a6b32 100644 pd->table = table; diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c -index 559acef3fddb8..d926852f81191 100644 +index 559acef3fddb8..9abc73d500fbf 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c +@@ -640,7 +640,7 @@ static void power_down(void) + int error; + + if (hibernation_mode == HIBERNATION_SUSPEND) { +- error = suspend_devices_and_enter(PM_SUSPEND_MEM); ++ error = suspend_devices_and_enter(mem_sleep_current); + if (error) { + hibernation_mode = hibernation_ops ? + HIBERNATION_PLATFORM : @@ -691,7 +691,7 @@ static int load_image_and_restore(void) goto Unlock; @@ -358395,7 +464657,7 @@ index 37401c99b7d7d..ee78a39463e63 100644 pm_freezing = true; error = try_to_freeze_tasks(true); diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c -index f7a9860782135..330d499376924 100644 +index f7a9860782135..475d630e650f1 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -978,8 +978,7 @@ static void memory_bm_recycle(struct memory_bitmap *bm) @@ -358433,6 +464695,17 @@ index f7a9860782135..330d499376924 100644 region->start_pfn = start_pfn; region->end_pfn = end_pfn; list_add_tail(®ion->list, &nosave_regions); +@@ -1726,8 +1719,8 @@ static unsigned long minimum_image_size(unsigned long saveable) + * /sys/power/reserved_size, respectively). To make this happen, we compute the + * total number of available page frames and allocate at least + * +- * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2 +- * + 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE) ++ * ([page frames total] - PAGES_FOR_IO - [metadata pages]) / 2 ++ * - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE) + * + * of them, which corresponds to the maximum size of a hibernation image. + * diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index eb75f394a0590..13d905dd32675 100644 --- a/kernel/power/suspend.c @@ -359122,7 +465395,7 @@ index ab4215266ebee..d820ef615475b 100644 if (object_debug) rcu_test_debug_objects(); diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h -index 806160c44b172..ae8396032b5da 100644 +index 806160c44b172..4bd07cc3c0eab 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -171,7 +171,7 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func, @@ -359161,8 +465434,66 @@ index 806160c44b172..ae8396032b5da 100644 rtp->n_ipis += cpumask_weight(cpu_online_mask); schedule_on_each_cpu(rcu_tasks_be_rude); } +@@ -890,32 +892,24 @@ static void trc_read_check_handler(void *t_in) + + // If the task is no longer running on this CPU, leave. + if (unlikely(texp != t)) { +- if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end))) +- wake_up(&trc_wait); + goto reset_ipi; // Already on holdout list, so will check later. + } + + // If the task is not in a read-side critical section, and + // if this is the last reader, awaken the grace-period kthread. + if (likely(!READ_ONCE(t->trc_reader_nesting))) { +- if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end))) +- wake_up(&trc_wait); +- // Mark as checked after decrement to avoid false +- // positives on the above WARN_ON_ONCE(). + WRITE_ONCE(t->trc_reader_checked, true); + goto reset_ipi; + } + // If we are racing with an rcu_read_unlock_trace(), try again later. +- if (unlikely(READ_ONCE(t->trc_reader_nesting) < 0)) { +- if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end))) +- wake_up(&trc_wait); ++ if (unlikely(READ_ONCE(t->trc_reader_nesting) < 0)) + goto reset_ipi; +- } + WRITE_ONCE(t->trc_reader_checked, true); + + // Get here if the task is in a read-side critical section. Set + // its state so that it will awaken the grace-period kthread upon + // exit from that critical section. ++ atomic_inc(&trc_n_readers_need_end); // One more to wait on. + WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs)); + WRITE_ONCE(t->trc_reader_special.b.need_qs, true); + +@@ -1015,21 +1009,15 @@ static void trc_wait_for_one_reader(struct task_struct *t, + if (per_cpu(trc_ipi_to_cpu, cpu) || t->trc_ipi_to_cpu >= 0) + return; + +- atomic_inc(&trc_n_readers_need_end); + per_cpu(trc_ipi_to_cpu, cpu) = true; + t->trc_ipi_to_cpu = cpu; + rcu_tasks_trace.n_ipis++; +- if (smp_call_function_single(cpu, +- trc_read_check_handler, t, 0)) { ++ if (smp_call_function_single(cpu, trc_read_check_handler, t, 0)) { + // Just in case there is some other reason for + // failure than the target CPU being offline. + rcu_tasks_trace.n_ipis_fails++; + per_cpu(trc_ipi_to_cpu, cpu) = false; + t->trc_ipi_to_cpu = cpu; +- if (atomic_dec_and_test(&trc_n_readers_need_end)) { +- WARN_ON_ONCE(1); +- wake_up(&trc_wait); +- } + } + } + } diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c -index bce848e50512e..63f7ce228cc35 100644 +index bce848e50512e..cf101da389b00 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -327,7 +327,7 @@ static void rcu_dynticks_eqs_online(void) @@ -359252,6 +465583,15 @@ index bce848e50512e..63f7ce228cc35 100644 } else if (count < rdp->qlen_last_fqs_check - qhimark) rdp->qlen_last_fqs_check = count; +@@ -2668,7 +2674,7 @@ void rcu_force_quiescent_state(void) + struct rcu_node *rnp_old = NULL; + + /* Funnel through hierarchy to reduce memory contention. */ +- rnp = __this_cpu_read(rcu_data.mynode); ++ rnp = raw_cpu_read(rcu_data.mynode); + for (; rnp != NULL; rnp = rnp->parent) { + ret = (READ_ONCE(rcu_state.gp_flags) & RCU_GP_FLAG_FQS) || + !raw_spin_trylock(&rnp->fqslock); @@ -2898,10 +2904,10 @@ static void __call_rcu_core(struct rcu_data *rdp, struct rcu_head *head, } else { /* Give the grace period a kick. */ @@ -359391,6 +465731,26 @@ index d070059163d70..f1a73a1f8472e 100644 { #ifdef CONFIG_TASKS_TRACE_RCU if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) +diff --git a/kernel/relay.c b/kernel/relay.c +index d1a67fbb819d3..6825b84038776 100644 +--- a/kernel/relay.c ++++ b/kernel/relay.c +@@ -151,13 +151,13 @@ static struct rchan_buf *relay_create_buf(struct rchan *chan) + { + struct rchan_buf *buf; + +- if (chan->n_subbufs > KMALLOC_MAX_SIZE / sizeof(size_t *)) ++ if (chan->n_subbufs > KMALLOC_MAX_SIZE / sizeof(size_t)) + return NULL; + + buf = kzalloc(sizeof(struct rchan_buf), GFP_KERNEL); + if (!buf) + return NULL; +- buf->padding = kmalloc_array(chan->n_subbufs, sizeof(size_t *), ++ buf->padding = kmalloc_array(chan->n_subbufs, sizeof(size_t), + GFP_KERNEL); + if (!buf->padding) + goto free_buf; diff --git a/kernel/resource.c b/kernel/resource.c index ca9f5198a01ff..20e10e48f0523 100644 --- a/kernel/resource.c @@ -359515,9 +465875,18 @@ index 2067080bb2358..8629b37d118e7 100644 } diff --git a/kernel/sched/core.c b/kernel/sched/core.c -index f21714ea3db85..85be684687b08 100644 +index f21714ea3db85..c1458fa8beb3e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c +@@ -21,7 +21,7 @@ + #include <asm/tlb.h> + + #include "../workqueue_internal.h" +-#include "../../fs/io-wq.h" ++#include "../../io_uring/io-wq.h" + #include "../smpboot.h" + + #include "pelt.h" @@ -36,6 +36,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_rt_tp); EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_dl_tp); EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp); @@ -359549,7 +465918,55 @@ index f21714ea3db85..85be684687b08 100644 } static void __init init_uclamp(void) -@@ -3707,10 +3708,13 @@ out: +@@ -2500,14 +2501,43 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) + int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, + int node) + { +- if (!src->user_cpus_ptr) ++ cpumask_t *user_mask; ++ unsigned long flags; ++ ++ /* ++ * Always clear dst->user_cpus_ptr first as their user_cpus_ptr's ++ * may differ by now due to racing. ++ */ ++ dst->user_cpus_ptr = NULL; ++ ++ /* ++ * This check is racy and losing the race is a valid situation. ++ * It is not worth the extra overhead of taking the pi_lock on ++ * every fork/clone. ++ */ ++ if (data_race(!src->user_cpus_ptr)) + return 0; + +- dst->user_cpus_ptr = kmalloc_node(cpumask_size(), GFP_KERNEL, node); +- if (!dst->user_cpus_ptr) ++ user_mask = kmalloc_node(cpumask_size(), GFP_KERNEL, node); ++ if (!user_mask) + return -ENOMEM; + +- cpumask_copy(dst->user_cpus_ptr, src->user_cpus_ptr); ++ /* ++ * Use pi_lock to protect content of user_cpus_ptr ++ * ++ * Though unlikely, user_cpus_ptr can be reset to NULL by a concurrent ++ * do_set_cpus_allowed(). ++ */ ++ raw_spin_lock_irqsave(&src->pi_lock, flags); ++ if (src->user_cpus_ptr) { ++ swap(dst->user_cpus_ptr, user_mask); ++ cpumask_copy(dst->user_cpus_ptr, src->user_cpus_ptr); ++ } ++ raw_spin_unlock_irqrestore(&src->pi_lock, flags); ++ ++ if (unlikely(user_mask)) ++ kfree(user_mask); ++ + return 0; + } + +@@ -3707,10 +3737,13 @@ out: bool cpus_share_cache(int this_cpu, int that_cpu) { @@ -359564,7 +465981,7 @@ index f21714ea3db85..85be684687b08 100644 { /* * Do not complicate things with the async wake_list while the CPU is -@@ -3719,6 +3723,10 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags) +@@ -3719,6 +3752,10 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags) if (!cpu_active(cpu)) return false; @@ -359575,7 +465992,7 @@ index f21714ea3db85..85be684687b08 100644 /* * If the CPU does not share cache, then queue the task on the * remote rqs wakelist to avoid accessing remote data. -@@ -3726,13 +3734,21 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags) +@@ -3726,13 +3763,21 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags) if (!cpus_share_cache(smp_processor_id(), cpu)) return true; @@ -359602,7 +466019,7 @@ index f21714ea3db85..85be684687b08 100644 return true; return false; -@@ -3740,10 +3756,7 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags) +@@ -3740,10 +3785,7 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags) static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) { @@ -359614,7 +466031,7 @@ index f21714ea3db85..85be684687b08 100644 sched_clock_cpu(cpu); /* Sync clocks across CPUs */ __ttwu_queue_wakelist(p, cpu, wake_flags); return true; -@@ -4065,7 +4078,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) +@@ -4065,7 +4107,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) * scheduling. */ if (smp_load_acquire(&p->on_cpu) && @@ -359623,7 +466040,7 @@ index f21714ea3db85..85be684687b08 100644 goto unlock; /* -@@ -4328,8 +4341,6 @@ int sysctl_schedstats(struct ctl_table *table, int write, void *buffer, +@@ -4328,8 +4370,6 @@ int sysctl_schedstats(struct ctl_table *table, int write, void *buffer, */ int sched_fork(unsigned long clone_flags, struct task_struct *p) { @@ -359632,7 +466049,7 @@ index f21714ea3db85..85be684687b08 100644 __sched_fork(clone_flags, p); /* * We mark the process as NEW here. This guarantees that -@@ -4375,23 +4386,6 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) +@@ -4375,23 +4415,6 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) init_entity_runnable_average(&p->se); @@ -359656,7 +466073,7 @@ index f21714ea3db85..85be684687b08 100644 #ifdef CONFIG_SCHED_INFO if (likely(sched_info_on())) -@@ -4408,6 +4402,35 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) +@@ -4408,6 +4431,35 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) return 0; } @@ -359692,7 +466109,7 @@ index f21714ea3db85..85be684687b08 100644 void sched_post_fork(struct task_struct *p) { uclamp_post_fork(p); -@@ -4571,7 +4594,8 @@ static inline void prepare_task(struct task_struct *next) +@@ -4571,7 +4623,8 @@ static inline void prepare_task(struct task_struct *next) * Claim the task as running, we do this before switching to it * such that any running task will have this set. * @@ -359702,7 +466119,7 @@ index f21714ea3db85..85be684687b08 100644 */ WRITE_ONCE(next->on_cpu, 1); #endif -@@ -4616,25 +4640,55 @@ static void do_balance_callbacks(struct rq *rq, struct callback_head *head) +@@ -4616,25 +4669,55 @@ static void do_balance_callbacks(struct rq *rq, struct callback_head *head) static void balance_push(struct rq *rq); @@ -359761,7 +466178,17 @@ index f21714ea3db85..85be684687b08 100644 } static inline void balance_callbacks(struct rq *rq, struct callback_head *head) -@@ -5913,7 +5967,7 @@ static bool try_steal_cookie(int this, int that) +@@ -5477,8 +5560,7 @@ static noinline void __schedule_bug(struct task_struct *prev) + pr_err("Preemption disabled at:"); + print_ip_sym(KERN_ERR, preempt_disable_ip); + } +- if (panic_on_warn) +- panic("scheduling while atomic\n"); ++ check_panic_on_warn("scheduling while atomic"); + + dump_stack(); + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); +@@ -5913,7 +5995,7 @@ static bool try_steal_cookie(int this, int that) if (p == src->core_pick || p == src->curr) goto next; @@ -359770,7 +466197,7 @@ index f21714ea3db85..85be684687b08 100644 goto next; if (p->core_occupation > dst->idle->core_occupation) -@@ -6335,8 +6389,12 @@ static inline void sched_submit_work(struct task_struct *tsk) +@@ -6335,8 +6417,12 @@ static inline void sched_submit_work(struct task_struct *tsk) preempt_enable_no_resched(); } @@ -359785,7 +466212,7 @@ index f21714ea3db85..85be684687b08 100644 /* * If we are going to sleep and we have plugged IO queued, -@@ -6656,11 +6714,11 @@ static int __init setup_preempt_mode(char *str) +@@ -6656,11 +6742,11 @@ static int __init setup_preempt_mode(char *str) int mode = sched_dynamic_mode(str); if (mode < 0) { pr_warn("Dynamic Preempt: unsupported mode: %s\n", str); @@ -359799,7 +466226,7 @@ index f21714ea3db85..85be684687b08 100644 } __setup("preempt=", setup_preempt_mode); -@@ -8195,9 +8253,7 @@ int __cond_resched_lock(spinlock_t *lock) +@@ -8195,9 +8281,7 @@ int __cond_resched_lock(spinlock_t *lock) if (spin_needbreak(lock) || resched) { spin_unlock(lock); @@ -359810,7 +466237,7 @@ index f21714ea3db85..85be684687b08 100644 cpu_relax(); ret = 1; spin_lock(lock); -@@ -8215,9 +8271,7 @@ int __cond_resched_rwlock_read(rwlock_t *lock) +@@ -8215,9 +8299,7 @@ int __cond_resched_rwlock_read(rwlock_t *lock) if (rwlock_needbreak(lock) || resched) { read_unlock(lock); @@ -359821,7 +466248,7 @@ index f21714ea3db85..85be684687b08 100644 cpu_relax(); ret = 1; read_lock(lock); -@@ -8235,9 +8289,7 @@ int __cond_resched_rwlock_write(rwlock_t *lock) +@@ -8235,9 +8317,7 @@ int __cond_resched_rwlock_write(rwlock_t *lock) if (rwlock_needbreak(lock) || resched) { write_unlock(lock); @@ -359832,7 +466259,7 @@ index f21714ea3db85..85be684687b08 100644 cpu_relax(); ret = 1; write_lock(lock); -@@ -8637,9 +8689,6 @@ void __init init_idle(struct task_struct *idle, int cpu) +@@ -8637,9 +8717,6 @@ void __init init_idle(struct task_struct *idle, int cpu) idle->flags |= PF_IDLE | PF_KTHREAD | PF_NO_SETAFFINITY; kthread_set_per_cpu(idle, cpu); @@ -359842,7 +466269,7 @@ index f21714ea3db85..85be684687b08 100644 #ifdef CONFIG_SMP /* * It's possible that init_idle() gets called multiple times on a task, -@@ -8702,7 +8751,7 @@ int cpuset_cpumask_can_shrink(const struct cpumask *cur, +@@ -8702,7 +8779,7 @@ int cpuset_cpumask_can_shrink(const struct cpumask *cur, } int task_can_attach(struct task_struct *p, @@ -359851,7 +466278,7 @@ index f21714ea3db85..85be684687b08 100644 { int ret = 0; -@@ -8721,8 +8770,13 @@ int task_can_attach(struct task_struct *p, +@@ -8721,8 +8798,13 @@ int task_can_attach(struct task_struct *p, } if (dl_task(p) && !cpumask_intersects(task_rq(p)->rd->span, @@ -359867,7 +466294,7 @@ index f21714ea3db85..85be684687b08 100644 out: return ret; -@@ -8795,7 +8849,6 @@ void idle_task_exit(void) +@@ -8795,7 +8877,6 @@ void idle_task_exit(void) finish_arch_post_lock_switch(); } @@ -359875,7 +466302,7 @@ index f21714ea3db85..85be684687b08 100644 /* finish_cpu(), as ran on the BP, will clean up the active_mm state */ } -@@ -9007,8 +9060,10 @@ static void cpuset_cpu_active(void) +@@ -9007,8 +9088,10 @@ static void cpuset_cpu_active(void) static int cpuset_cpu_inactive(unsigned int cpu) { if (!cpuhp_tasks_frozen) { @@ -359888,7 +466315,7 @@ index f21714ea3db85..85be684687b08 100644 cpuset_update_active_cpus(); } else { num_cpus_frozen++; -@@ -9716,6 +9771,22 @@ static void sched_free_group(struct task_group *tg) +@@ -9716,6 +9799,22 @@ static void sched_free_group(struct task_group *tg) kmem_cache_free(task_group_cache, tg); } @@ -359911,7 +466338,7 @@ index f21714ea3db85..85be684687b08 100644 /* allocate runqueue etc for a new task group */ struct task_group *sched_create_group(struct task_group *parent) { -@@ -9759,25 +9830,35 @@ void sched_online_group(struct task_group *tg, struct task_group *parent) +@@ -9759,25 +9858,35 @@ void sched_online_group(struct task_group *tg, struct task_group *parent) } /* rcu callback to free various structures associated with a task group */ @@ -359954,7 +466381,7 @@ index f21714ea3db85..85be684687b08 100644 spin_lock_irqsave(&task_group_lock, flags); list_del_rcu(&tg->list); list_del_rcu(&tg->siblings); -@@ -9896,7 +9977,7 @@ static void cpu_cgroup_css_released(struct cgroup_subsys_state *css) +@@ -9896,7 +10005,7 @@ static void cpu_cgroup_css_released(struct cgroup_subsys_state *css) { struct task_group *tg = css_tg(css); @@ -359963,7 +466390,7 @@ index f21714ea3db85..85be684687b08 100644 } static void cpu_cgroup_css_free(struct cgroup_subsys_state *css) -@@ -9906,7 +9987,7 @@ static void cpu_cgroup_css_free(struct cgroup_subsys_state *css) +@@ -9906,7 +10015,7 @@ static void cpu_cgroup_css_free(struct cgroup_subsys_state *css) /* * Relies on the RCU grace period between css_released() and this. */ @@ -360137,6 +466564,19 @@ index 893eece65bfda..cacc2076ad214 100644 rcu_read_unlock(); } +diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c +index ceb03d76c0ccd..221ca10505738 100644 +--- a/kernel/sched/cpudeadline.c ++++ b/kernel/sched/cpudeadline.c +@@ -124,7 +124,7 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p, + unsigned long cap, max_cap = 0; + int cpu, max_cpu = -1; + +- if (!static_branch_unlikely(&sched_asym_cpucapacity)) ++ if (!sched_asym_cpucap_active()) + return 1; + + /* Ensure the capacity of the CPUs fits the task. */ diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index e7af18857371e..7f6bb37d3a2f7 100644 --- a/kernel/sched/cpufreq_schedutil.c @@ -360167,9 +466607,18 @@ index 872e481d5098c..042a6dbce8f32 100644 } } diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c -index e94314633b39d..147b757d162b9 100644 +index e94314633b39d..2a2f32eaffccd 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c +@@ -112,7 +112,7 @@ static inline unsigned long __dl_bw_capacity(int i) + */ + static inline unsigned long dl_bw_capacity(int i) + { +- if (!static_branch_unlikely(&sched_asym_cpucapacity) && ++ if (!sched_asym_cpucap_active() && + capacity_orig_of(i) == SCHED_CAPACITY_SCALE) { + return dl_bw_cpus(i) << SCHED_CAPACITY_SHIFT; + } else { @@ -1561,7 +1561,10 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) * the throttle. */ @@ -360182,6 +466631,15 @@ index e94314633b39d..147b757d162b9 100644 return; } +@@ -1700,7 +1703,7 @@ select_task_rq_dl(struct task_struct *p, int cpu, int flags) + * Take the capacity of the CPU into account to + * ensure it fits the requirement of the task. + */ +- if (static_branch_unlikely(&sched_asym_cpucapacity)) ++ if (sched_asym_cpucap_active()) + select_rq |= !dl_task_fits_capacity(p, cpu); + + if (select_rq) { @@ -1720,6 +1723,7 @@ out: static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused) @@ -360350,7 +466808,7 @@ index 17a653b67006a..34c5ff3a0669b 100644 } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index f6a05d9b54436..a853e4e9e3c36 100644 +index f6a05d9b54436..6648683cd9644 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3422,7 +3422,6 @@ void set_task_rq_fair(struct sched_entity *se, @@ -360407,7 +466865,160 @@ index f6a05d9b54436..a853e4e9e3c36 100644 enqueue_load_avg(cfs_rq, se); cfs_rq->avg.util_avg += se->avg.util_avg; -@@ -4802,8 +4812,8 @@ static int tg_unthrottle_up(struct task_group *tg, void *data) +@@ -4110,14 +4120,140 @@ done: + trace_sched_util_est_se_tp(&p->se); + } + +-static inline int task_fits_capacity(struct task_struct *p, long capacity) ++static inline int util_fits_cpu(unsigned long util, ++ unsigned long uclamp_min, ++ unsigned long uclamp_max, ++ int cpu) + { +- return fits_capacity(uclamp_task_util(p), capacity); ++ unsigned long capacity_orig, capacity_orig_thermal; ++ unsigned long capacity = capacity_of(cpu); ++ bool fits, uclamp_max_fits; ++ ++ /* ++ * Check if the real util fits without any uclamp boost/cap applied. ++ */ ++ fits = fits_capacity(util, capacity); ++ ++ if (!uclamp_is_used()) ++ return fits; ++ ++ /* ++ * We must use capacity_orig_of() for comparing against uclamp_min and ++ * uclamp_max. We only care about capacity pressure (by using ++ * capacity_of()) for comparing against the real util. ++ * ++ * If a task is boosted to 1024 for example, we don't want a tiny ++ * pressure to skew the check whether it fits a CPU or not. ++ * ++ * Similarly if a task is capped to capacity_orig_of(little_cpu), it ++ * should fit a little cpu even if there's some pressure. ++ * ++ * Only exception is for thermal pressure since it has a direct impact ++ * on available OPP of the system. ++ * ++ * We honour it for uclamp_min only as a drop in performance level ++ * could result in not getting the requested minimum performance level. ++ * ++ * For uclamp_max, we can tolerate a drop in performance level as the ++ * goal is to cap the task. So it's okay if it's getting less. ++ * ++ * In case of capacity inversion, which is not handled yet, we should ++ * honour the inverted capacity for both uclamp_min and uclamp_max all ++ * the time. ++ */ ++ capacity_orig = capacity_orig_of(cpu); ++ capacity_orig_thermal = capacity_orig - arch_scale_thermal_pressure(cpu); ++ ++ /* ++ * We want to force a task to fit a cpu as implied by uclamp_max. ++ * But we do have some corner cases to cater for.. ++ * ++ * ++ * C=z ++ * | ___ ++ * | C=y | | ++ * |_ _ _ _ _ _ _ _ _ ___ _ _ _ | _ | _ _ _ _ _ uclamp_max ++ * | C=x | | | | ++ * | ___ | | | | ++ * | | | | | | | (util somewhere in this region) ++ * | | | | | | | ++ * | | | | | | | ++ * +---------------------------------------- ++ * cpu0 cpu1 cpu2 ++ * ++ * In the above example if a task is capped to a specific performance ++ * point, y, then when: ++ * ++ * * util = 80% of x then it does not fit on cpu0 and should migrate ++ * to cpu1 ++ * * util = 80% of y then it is forced to fit on cpu1 to honour ++ * uclamp_max request. ++ * ++ * which is what we're enforcing here. A task always fits if ++ * uclamp_max <= capacity_orig. But when uclamp_max > capacity_orig, ++ * the normal upmigration rules should withhold still. ++ * ++ * Only exception is when we are on max capacity, then we need to be ++ * careful not to block overutilized state. This is so because: ++ * ++ * 1. There's no concept of capping at max_capacity! We can't go ++ * beyond this performance level anyway. ++ * 2. The system is being saturated when we're operating near ++ * max capacity, it doesn't make sense to block overutilized. ++ */ ++ uclamp_max_fits = (capacity_orig == SCHED_CAPACITY_SCALE) && (uclamp_max == SCHED_CAPACITY_SCALE); ++ uclamp_max_fits = !uclamp_max_fits && (uclamp_max <= capacity_orig); ++ fits = fits || uclamp_max_fits; ++ ++ /* ++ * ++ * C=z ++ * | ___ (region a, capped, util >= uclamp_max) ++ * | C=y | | ++ * |_ _ _ _ _ _ _ _ _ ___ _ _ _ | _ | _ _ _ _ _ uclamp_max ++ * | C=x | | | | ++ * | ___ | | | | (region b, uclamp_min <= util <= uclamp_max) ++ * |_ _ _|_ _|_ _ _ _| _ | _ _ _| _ | _ _ _ _ _ uclamp_min ++ * | | | | | | | ++ * | | | | | | | (region c, boosted, util < uclamp_min) ++ * +---------------------------------------- ++ * cpu0 cpu1 cpu2 ++ * ++ * a) If util > uclamp_max, then we're capped, we don't care about ++ * actual fitness value here. We only care if uclamp_max fits ++ * capacity without taking margin/pressure into account. ++ * See comment above. ++ * ++ * b) If uclamp_min <= util <= uclamp_max, then the normal ++ * fits_capacity() rules apply. Except we need to ensure that we ++ * enforce we remain within uclamp_max, see comment above. ++ * ++ * c) If util < uclamp_min, then we are boosted. Same as (b) but we ++ * need to take into account the boosted value fits the CPU without ++ * taking margin/pressure into account. ++ * ++ * Cases (a) and (b) are handled in the 'fits' variable already. We ++ * just need to consider an extra check for case (c) after ensuring we ++ * handle the case uclamp_min > uclamp_max. ++ */ ++ uclamp_min = min(uclamp_min, uclamp_max); ++ if (util < uclamp_min && capacity_orig != SCHED_CAPACITY_SCALE) ++ fits = fits && (uclamp_min <= capacity_orig_thermal); ++ ++ return fits; ++} ++ ++static inline int task_fits_cpu(struct task_struct *p, int cpu) ++{ ++ unsigned long uclamp_min = uclamp_eff_value(p, UCLAMP_MIN); ++ unsigned long uclamp_max = uclamp_eff_value(p, UCLAMP_MAX); ++ unsigned long util = task_util_est(p); ++ return util_fits_cpu(util, uclamp_min, uclamp_max, cpu); + } + + static inline void update_misfit_status(struct task_struct *p, struct rq *rq) + { +- if (!static_branch_unlikely(&sched_asym_cpucapacity)) ++ if (!sched_asym_cpucap_active()) + return; + + if (!p || p->nr_cpus_allowed == 1) { +@@ -4125,7 +4261,7 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) + return; + } + +- if (task_fits_capacity(p, capacity_of(cpu_of(rq)))) { ++ if (task_fits_cpu(p, cpu_of(rq))) { + rq->misfit_task_load = 0; + return; + } +@@ -4802,8 +4938,8 @@ static int tg_unthrottle_up(struct task_group *tg, void *data) cfs_rq->throttle_count--; if (!cfs_rq->throttle_count) { @@ -360418,7 +467029,7 @@ index f6a05d9b54436..a853e4e9e3c36 100644 /* Add cfs_rq with load or one or more already running entities to the list */ if (!cfs_rq_is_decayed(cfs_rq) || cfs_rq->nr_running) -@@ -4820,7 +4830,7 @@ static int tg_throttle_down(struct task_group *tg, void *data) +@@ -4820,7 +4956,7 @@ static int tg_throttle_down(struct task_group *tg, void *data) /* group is entering throttled state, stop time */ if (!cfs_rq->throttle_count) { @@ -360427,7 +467038,7 @@ index f6a05d9b54436..a853e4e9e3c36 100644 list_del_leaf_cfs_rq(cfs_rq); } cfs_rq->throttle_count++; -@@ -5264,7 +5274,7 @@ static void sync_throttle(struct task_group *tg, int cpu) +@@ -5264,7 +5400,7 @@ static void sync_throttle(struct task_group *tg, int cpu) pcfs_rq = tg->parent->cfs_rq[cpu]; cfs_rq->throttle_count = pcfs_rq->throttle_count; @@ -360436,7 +467047,7 @@ index f6a05d9b54436..a853e4e9e3c36 100644 } /* conditionally throttle active cfs_rq's from put_prev_entity() */ -@@ -6270,6 +6280,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool +@@ -6270,6 +6406,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool { struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask); int i, cpu, idle_cpu = -1, nr = INT_MAX; @@ -360444,7 +467055,7 @@ index f6a05d9b54436..a853e4e9e3c36 100644 struct rq *this_rq = this_rq(); int this = smp_processor_id(); struct sched_domain *this_sd; -@@ -6309,6 +6320,17 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool +@@ -6309,6 +6446,17 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool time = cpu_clock(this); } @@ -360462,7 +467073,91 @@ index f6a05d9b54436..a853e4e9e3c36 100644 for_each_cpu_wrap(cpu, cpus, target + 1) { if (has_idle_core) { i = select_idle_core(p, cpu, cpus, &idle_cpu); -@@ -6429,8 +6451,10 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) +@@ -6350,21 +6498,23 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool + static int + select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target) + { +- unsigned long task_util, best_cap = 0; ++ unsigned long task_util, util_min, util_max, best_cap = 0; + int cpu, best_cpu = -1; + struct cpumask *cpus; + + cpus = this_cpu_cpumask_var_ptr(select_idle_mask); + cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr); + +- task_util = uclamp_task_util(p); ++ task_util = task_util_est(p); ++ util_min = uclamp_eff_value(p, UCLAMP_MIN); ++ util_max = uclamp_eff_value(p, UCLAMP_MAX); + + for_each_cpu_wrap(cpu, cpus, target) { + unsigned long cpu_cap = capacity_of(cpu); + + if (!available_idle_cpu(cpu) && !sched_idle_cpu(cpu)) + continue; +- if (fits_capacity(task_util, cpu_cap)) ++ if (util_fits_cpu(task_util, util_min, util_max, cpu)) + return cpu; + + if (cpu_cap > best_cap) { +@@ -6376,10 +6526,13 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target) + return best_cpu; + } + +-static inline bool asym_fits_capacity(int task_util, int cpu) ++static inline bool asym_fits_cpu(unsigned long util, ++ unsigned long util_min, ++ unsigned long util_max, ++ int cpu) + { +- if (static_branch_unlikely(&sched_asym_cpucapacity)) +- return fits_capacity(task_util, capacity_of(cpu)); ++ if (sched_asym_cpucap_active()) ++ return util_fits_cpu(util, util_min, util_max, cpu); + + return true; + } +@@ -6391,16 +6544,18 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) + { + bool has_idle_core = false; + struct sched_domain *sd; +- unsigned long task_util; ++ unsigned long task_util, util_min, util_max; + int i, recent_used_cpu; + + /* + * On asymmetric system, update task utilization because we will check + * that the task fits with cpu's capacity. + */ +- if (static_branch_unlikely(&sched_asym_cpucapacity)) { ++ if (sched_asym_cpucap_active()) { + sync_entity_load_avg(&p->se); +- task_util = uclamp_task_util(p); ++ task_util = task_util_est(p); ++ util_min = uclamp_eff_value(p, UCLAMP_MIN); ++ util_max = uclamp_eff_value(p, UCLAMP_MAX); + } + + /* +@@ -6409,7 +6564,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) + lockdep_assert_irqs_disabled(); + + if ((available_idle_cpu(target) || sched_idle_cpu(target)) && +- asym_fits_capacity(task_util, target)) ++ asym_fits_cpu(task_util, util_min, util_max, target)) + return target; + + /* +@@ -6417,7 +6572,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) + */ + if (prev != target && cpus_share_cache(prev, target) && + (available_idle_cpu(prev) || sched_idle_cpu(prev)) && +- asym_fits_capacity(task_util, prev)) ++ asym_fits_cpu(task_util, util_min, util_max, prev)) + return prev; + + /* +@@ -6429,8 +6584,10 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) * pattern is IO completions. */ if (is_per_cpu_kthread(current) && @@ -360470,11 +467165,72 @@ index f6a05d9b54436..a853e4e9e3c36 100644 prev == smp_processor_id() && - this_rq()->nr_running <= 1) { + this_rq()->nr_running <= 1 && -+ asym_fits_capacity(task_util, prev)) { ++ asym_fits_cpu(task_util, util_min, util_max, prev)) { return prev; } -@@ -8993,9 +9017,10 @@ static bool update_pick_idlest(struct sched_group *idlest, +@@ -6442,12 +6599,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) + cpus_share_cache(recent_used_cpu, target) && + (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) && + cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr) && +- asym_fits_capacity(task_util, recent_used_cpu)) { +- /* +- * Replace recent_used_cpu with prev as it is a potential +- * candidate for the next wake: +- */ +- p->recent_used_cpu = prev; ++ asym_fits_cpu(task_util, util_min, util_max, recent_used_cpu)) { + return recent_used_cpu; + } + +@@ -6455,7 +6607,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) + * For asymmetric CPU capacity systems, our domain of interest is + * sd_asym_cpucapacity rather than sd_llc. + */ +- if (static_branch_unlikely(&sched_asym_cpucapacity)) { ++ if (sched_asym_cpucap_active()) { + sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, target)); + /* + * On an asymmetric CPU capacity system where an exclusive +@@ -8009,7 +8161,7 @@ static int detach_tasks(struct lb_env *env) + + case migrate_misfit: + /* This is not a misfit task */ +- if (task_fits_capacity(p, capacity_of(env->src_cpu))) ++ if (task_fits_cpu(p, env->src_cpu)) + goto next; + + env->imbalance = 0; +@@ -8894,6 +9046,10 @@ static inline void update_sg_wakeup_stats(struct sched_domain *sd, + + memset(sgs, 0, sizeof(*sgs)); + ++ /* Assume that task can't fit any CPU of the group */ ++ if (sd->flags & SD_ASYM_CPUCAPACITY) ++ sgs->group_misfit_task_load = 1; ++ + for_each_cpu(i, sched_group_span(group)) { + struct rq *rq = cpu_rq(i); + unsigned int local; +@@ -8913,12 +9069,12 @@ static inline void update_sg_wakeup_stats(struct sched_domain *sd, + if (!nr_running && idle_cpu_without(i, p)) + sgs->idle_cpus++; + +- } ++ /* Check if task fits in the CPU */ ++ if (sd->flags & SD_ASYM_CPUCAPACITY && ++ sgs->group_misfit_task_load && ++ task_fits_cpu(p, i)) ++ sgs->group_misfit_task_load = 0; + +- /* Check if task fits in the group */ +- if (sd->flags & SD_ASYM_CPUCAPACITY && +- !task_fits_capacity(p, group->sgc->max_capacity)) { +- sgs->group_misfit_task_load = 1; + } + + sgs->group_capacity = group->sgc->capacity; +@@ -8993,9 +9149,10 @@ static bool update_pick_idlest(struct sched_group *idlest, * This is an approximation as the number of running tasks may not be * related to the number of busy CPUs due to sched_setaffinity. */ @@ -360487,7 +467243,7 @@ index f6a05d9b54436..a853e4e9e3c36 100644 } /* -@@ -9129,12 +9154,13 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) +@@ -9129,12 +9286,13 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) return idlest; #endif /* @@ -360506,7 +467262,7 @@ index f6a05d9b54436..a853e4e9e3c36 100644 return NULL; } -@@ -9152,6 +9178,77 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) +@@ -9152,6 +9310,77 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) return idlest; } @@ -360584,7 +467340,7 @@ index f6a05d9b54436..a853e4e9e3c36 100644 /** * update_sd_lb_stats - Update sched_domain's statistics for load balancing. * @env: The load balancing environment. -@@ -9164,6 +9261,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd +@@ -9164,6 +9393,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd struct sched_group *sg = env->sd->groups; struct sg_lb_stats *local = &sds->local_stat; struct sg_lb_stats tmp_sgs; @@ -360592,7 +467348,7 @@ index f6a05d9b54436..a853e4e9e3c36 100644 int sg_status = 0; do { -@@ -9196,6 +9294,7 @@ next_group: +@@ -9196,6 +9426,7 @@ next_group: sds->total_load += sgs->group_load; sds->total_capacity += sgs->group_capacity; @@ -360600,7 +467356,7 @@ index f6a05d9b54436..a853e4e9e3c36 100644 sg = sg->next; } while (sg != env->sd->groups); -@@ -9221,6 +9320,8 @@ next_group: +@@ -9221,6 +9452,8 @@ next_group: WRITE_ONCE(rd->overutilized, SG_OVERUTILIZED); trace_sched_overutilized_tp(rd, SG_OVERUTILIZED); } @@ -360609,7 +467365,7 @@ index f6a05d9b54436..a853e4e9e3c36 100644 } #define NUMA_IMBALANCE_MIN 2 -@@ -9340,7 +9441,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s +@@ -9340,7 +9573,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s /* Consider allowing a small imbalance between NUMA groups */ if (env->sd->flags & SD_NUMA) { env->imbalance = adjust_numa_imbalance(env->imbalance, @@ -360618,7 +467374,7 @@ index f6a05d9b54436..a853e4e9e3c36 100644 } return; -@@ -11358,8 +11459,6 @@ void free_fair_sched_group(struct task_group *tg) +@@ -11358,8 +11591,6 @@ void free_fair_sched_group(struct task_group *tg) { int i; @@ -360627,7 +467383,7 @@ index f6a05d9b54436..a853e4e9e3c36 100644 for_each_possible_cpu(i) { if (tg->cfs_rq) kfree(tg->cfs_rq[i]); -@@ -11436,6 +11535,8 @@ void unregister_fair_sched_group(struct task_group *tg) +@@ -11436,6 +11667,8 @@ void unregister_fair_sched_group(struct task_group *tg) struct rq *rq; int cpu; @@ -360722,7 +467478,7 @@ index e06071bf3472c..4ff2ed4f8fa15 100644 #else static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq) diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c -index 1652f2bb54b79..cad2a1b34ed04 100644 +index 1652f2bb54b79..fa88bf6ccce02 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -34,13 +34,19 @@ @@ -360914,7 +467670,7 @@ index 1652f2bb54b79..cad2a1b34ed04 100644 mutex_lock(&group->trigger_lock); -@@ -1180,15 +1155,19 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group, +@@ -1180,20 +1155,25 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group, return t; } @@ -360936,9 +467692,18 @@ index 1652f2bb54b79..cad2a1b34ed04 100644 + group = t->group; /* - * Wakeup waiters to stop polling. Can happen if cgroup is deleted - * from under a polling process. -@@ -1224,9 +1203,9 @@ static void psi_trigger_destroy(struct kref *ref) +- * Wakeup waiters to stop polling. Can happen if cgroup is deleted +- * from under a polling process. ++ * Wakeup waiters to stop polling and clear the queue to prevent it from ++ * being accessed later. Can happen if cgroup is deleted from under a ++ * polling process. + */ +- wake_up_interruptible(&t->event_wait); ++ wake_up_pollfree(&t->event_wait); + + mutex_lock(&group->trigger_lock); + +@@ -1224,9 +1204,9 @@ static void psi_trigger_destroy(struct kref *ref) mutex_unlock(&group->trigger_lock); /* @@ -360951,7 +467716,7 @@ index 1652f2bb54b79..cad2a1b34ed04 100644 */ synchronize_rcu(); /* -@@ -1243,18 +1222,6 @@ static void psi_trigger_destroy(struct kref *ref) +@@ -1243,18 +1223,6 @@ static void psi_trigger_destroy(struct kref *ref) kfree(t); } @@ -360970,7 +467735,7 @@ index 1652f2bb54b79..cad2a1b34ed04 100644 __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file, poll_table *wait) { -@@ -1264,27 +1231,57 @@ __poll_t psi_trigger_poll(void **trigger_ptr, +@@ -1264,27 +1232,57 @@ __poll_t psi_trigger_poll(void **trigger_ptr, if (static_branch_likely(&psi_disabled)) return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI; @@ -361039,7 +467804,7 @@ index 1652f2bb54b79..cad2a1b34ed04 100644 static ssize_t psi_write(struct file *file, const char __user *user_buf, size_t nbytes, enum psi_res res) { -@@ -1305,14 +1302,24 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf, +@@ -1305,14 +1303,24 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf, buf[buf_size - 1] = '\0'; @@ -361069,7 +467834,7 @@ index 1652f2bb54b79..cad2a1b34ed04 100644 mutex_unlock(&seq->lock); return nbytes; -@@ -1347,7 +1354,7 @@ static int psi_fop_release(struct inode *inode, struct file *file) +@@ -1347,7 +1355,7 @@ static int psi_fop_release(struct inode *inode, struct file *file) { struct seq_file *seq = file->private_data; @@ -361078,14 +467843,14 @@ index 1652f2bb54b79..cad2a1b34ed04 100644 return single_release(inode, file); } -@@ -1389,3 +1396,5 @@ static int __init psi_proc_init(void) +@@ -1389,3 +1397,5 @@ static int __init psi_proc_init(void) return 0; } module_init(psi_proc_init); + +#endif /* CONFIG_PROC_FS */ diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c -index 3daf42a0f4623..f75dcd3537b84 100644 +index 3daf42a0f4623..add67f811e004 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -52,11 +52,8 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) @@ -361155,6 +467920,15 @@ index 3daf42a0f4623..f75dcd3537b84 100644 static inline int on_rt_rq(struct sched_rt_entity *rt_se) { +@@ -462,7 +473,7 @@ static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu) + unsigned int cpu_cap; + + /* Only heterogeneous systems can benefit from this check */ +- if (!static_branch_unlikely(&sched_asym_cpucapacity)) ++ if (!sched_asym_cpucap_active()) + return true; + + min_cap = uclamp_eff_value(p, UCLAMP_MIN); @@ -554,7 +565,7 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq) rt_se = rt_rq->tg->rt_se[cpu]; @@ -361257,6 +468031,15 @@ index 3daf42a0f4623..f75dcd3537b84 100644 } static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags) +@@ -1717,7 +1736,7 @@ static int find_lowest_rq(struct task_struct *task) + * If we're on asym system ensure we consider the different capacities + * of the CPUs when searching for the lowest_mask. + */ +- if (static_branch_unlikely(&sched_asym_cpucapacity)) { ++ if (sched_asym_cpucap_active()) { + + ret = cpupri_find_fitness(&task_rq(task)->rd->cpupri, + task, lowest_mask, @@ -1885,6 +1904,16 @@ static int push_rt_task(struct rq *rq, bool pull) return 0; @@ -361324,7 +468107,7 @@ index 3daf42a0f4623..f75dcd3537b84 100644 int sched_rt_handler(struct ctl_table *table, int write, void *buffer, diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h -index 3d3e5793e1172..e499028982536 100644 +index 3d3e5793e1172..e1f46ed412bce 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -348,9 +348,8 @@ extern void __setparam_dl(struct task_struct *p, const struct sched_attr *attr); @@ -361366,6 +468149,45 @@ index 3d3e5793e1172..e499028982536 100644 int throttled; int throttle_count; struct list_head throttled_list; +@@ -1148,6 +1148,14 @@ static inline bool is_migration_disabled(struct task_struct *p) + #endif + } + ++DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); ++ ++#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) ++#define this_rq() this_cpu_ptr(&runqueues) ++#define task_rq(p) cpu_rq(task_cpu(p)) ++#define cpu_curr(cpu) (cpu_rq(cpu)->curr) ++#define raw_rq() raw_cpu_ptr(&runqueues) ++ + struct sched_group; + #ifdef CONFIG_SCHED_CORE + static inline struct cpumask *sched_group_span(struct sched_group *sg); +@@ -1235,7 +1243,7 @@ static inline bool sched_group_cookie_match(struct rq *rq, + return true; + + for_each_cpu_and(cpu, sched_group_span(group), p->cpus_ptr) { +- if (sched_core_cookie_match(rq, p)) ++ if (sched_core_cookie_match(cpu_rq(cpu), p)) + return true; + } + return false; +@@ -1361,14 +1369,6 @@ static inline void update_idle_core(struct rq *rq) + static inline void update_idle_core(struct rq *rq) { } + #endif + +-DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); +- +-#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) +-#define this_rq() this_cpu_ptr(&runqueues) +-#define task_rq(p) cpu_rq(task_cpu(p)) +-#define cpu_curr(cpu) (cpu_rq(cpu)->curr) +-#define raw_rq() raw_cpu_ptr(&runqueues) +- + #ifdef CONFIG_FAIR_GROUP_SCHED + static inline struct task_struct *task_of(struct sched_entity *se) + { @@ -1717,6 +1717,11 @@ queue_balance_callback(struct rq *rq, { lockdep_assert_rq_held(rq); @@ -361378,7 +468200,19 @@ index 3d3e5793e1172..e499028982536 100644 if (unlikely(head->next || rq->balance_callback == &balance_push_callback)) return; -@@ -2047,7 +2052,6 @@ static inline int task_on_rq_migrating(struct task_struct *p) +@@ -1783,6 +1788,11 @@ DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing); + DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity); + extern struct static_key_false sched_asym_cpucapacity; + ++static __always_inline bool sched_asym_cpucap_active(void) ++{ ++ return static_branch_unlikely(&sched_asym_cpucapacity); ++} ++ + struct sched_group_capacity { + atomic_t ref; + /* +@@ -2047,7 +2057,6 @@ static inline int task_on_rq_migrating(struct task_struct *p) #define WF_SYNC 0x10 /* Waker goes to sleep after wakeup */ #define WF_MIGRATED 0x20 /* Internal use, task got migrated */ @@ -361386,7 +468220,7 @@ index 3d3e5793e1172..e499028982536 100644 #ifdef CONFIG_SMP static_assert(WF_EXEC == SD_BALANCE_EXEC); -@@ -2488,6 +2492,24 @@ unsigned long arch_scale_freq_capacity(int cpu) +@@ -2488,6 +2497,24 @@ unsigned long arch_scale_freq_capacity(int cpu) } #endif @@ -361411,7 +468245,7 @@ index 3d3e5793e1172..e499028982536 100644 #ifdef CONFIG_SMP -@@ -2553,14 +2575,15 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest) +@@ -2553,14 +2580,15 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest) __acquires(busiest->lock) __acquires(this_rq->lock) { @@ -361431,7 +468265,7 @@ index 3d3e5793e1172..e499028982536 100644 return 0; } -@@ -2654,6 +2677,7 @@ static inline void double_rq_lock(struct rq *rq1, struct rq *rq2) +@@ -2654,6 +2682,7 @@ static inline void double_rq_lock(struct rq *rq1, struct rq *rq2) BUG_ON(rq1 != rq2); raw_spin_rq_lock(rq1); __acquire(rq2->lock); /* Fake it out ;) */ @@ -361439,6 +468273,22 @@ index 3d3e5793e1172..e499028982536 100644 } /* +@@ -2892,6 +2921,15 @@ static inline bool uclamp_is_used(void) + return static_branch_likely(&sched_uclamp_used); + } + #else /* CONFIG_UCLAMP_TASK */ ++static inline unsigned long uclamp_eff_value(struct task_struct *p, ++ enum uclamp_id clamp_id) ++{ ++ if (clamp_id == UCLAMP_MIN) ++ return 0; ++ ++ return SCHED_CAPACITY_SCALE; ++} ++ + static inline + unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util, + struct task_struct *p) diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index d8f8eb0c655ba..606a3982d13a5 100644 --- a/kernel/sched/stats.h @@ -363024,7 +469874,7 @@ index 0000000000000..dc5665b628140 + +#endif /* CONFIG_STATIC_CALL_SELFTEST */ diff --git a/kernel/sys.c b/kernel/sys.c -index 8fdac0d90504a..3e4e8930fafc6 100644 +index 8fdac0d90504a..2d2bc6396515e 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -472,6 +472,16 @@ static int set_user(struct cred *new) @@ -363085,6 +469935,15 @@ index 8fdac0d90504a..3e4e8930fafc6 100644 return commit_creds(new); error: +@@ -1567,6 +1575,8 @@ int do_prlimit(struct task_struct *tsk, unsigned int resource, + + if (resource >= RLIM_NLIMITS) + return -EINVAL; ++ resource = array_index_nospec(resource, RLIM_NLIMITS); ++ + if (new_rlim) { + if (new_rlim->rlim_cur > new_rlim->rlim_max) + return -EINVAL; diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index f43d89d92860d..126380696f9c5 100644 --- a/kernel/sys_ni.c @@ -363098,7 +469957,7 @@ index f43d89d92860d..126380696f9c5 100644 /* mm/, CONFIG_MMU only */ COND_SYSCALL(swapon); diff --git a/kernel/sysctl.c b/kernel/sysctl.c -index 083be6af29d70..23c08bf3db58c 100644 +index 083be6af29d70..928798f89ca1d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -113,15 +113,9 @@ @@ -363138,6 +469997,43 @@ index 083be6af29d70..23c08bf3db58c 100644 return ret; } #endif /* CONFIG_BPF_SYSCALL && CONFIG_SYSCTL */ +@@ -378,13 +379,14 @@ int proc_dostring(struct ctl_table *table, int write, + ppos); + } + +-static size_t proc_skip_spaces(char **buf) ++static void proc_skip_spaces(char **buf, size_t *size) + { +- size_t ret; +- char *tmp = skip_spaces(*buf); +- ret = tmp - *buf; +- *buf = tmp; +- return ret; ++ while (*size) { ++ if (!isspace(**buf)) ++ break; ++ (*size)--; ++ (*buf)++; ++ } + } + + static void proc_skip_char(char **buf, size_t *size, const char v) +@@ -453,13 +455,12 @@ static int proc_get_long(char **buf, size_t *size, + unsigned long *val, bool *neg, + const char *perm_tr, unsigned perm_tr_len, char *tr) + { +- int len; + char *p, tmp[TMPBUFLEN]; ++ ssize_t len = *size; + +- if (!*size) ++ if (len <= 0) + return -EINVAL; + +- len = *size; + if (len > TMPBUFLEN - 1) + len = TMPBUFLEN - 1; + @@ -559,14 +560,14 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp, if (*negp) { if (*lvalp > (unsigned long) INT_MAX + 1) @@ -363168,6 +470064,42 @@ index 083be6af29d70..23c08bf3db58c 100644 *lvalp = (unsigned long)val; } return 0; +@@ -632,7 +633,7 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, + bool neg; + + if (write) { +- left -= proc_skip_spaces(&p); ++ proc_skip_spaces(&p, &left); + + if (!left) + break; +@@ -659,7 +660,7 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, + if (!write && !first && left && !err) + proc_put_char(&buffer, &left, '\n'); + if (write && !err && left) +- left -= proc_skip_spaces(&p); ++ proc_skip_spaces(&p, &left); + if (write && first) + return err ? : -EINVAL; + *lenp -= left; +@@ -701,7 +702,7 @@ static int do_proc_douintvec_w(unsigned int *tbl_data, + if (left > PAGE_SIZE - 1) + left = PAGE_SIZE - 1; + +- left -= proc_skip_spaces(&p); ++ proc_skip_spaces(&p, &left); + if (!left) { + err = -EINVAL; + goto out_free; +@@ -721,7 +722,7 @@ static int do_proc_douintvec_w(unsigned int *tbl_data, + } + + if (!err && left) +- left -= proc_skip_spaces(&p); ++ proc_skip_spaces(&p, &left); + + out_free: + if (err) @@ -981,7 +982,7 @@ static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp, if ((param->min && *param->min > tmp) || (param->max && *param->max < tmp)) @@ -363202,6 +470134,15 @@ index 083be6af29d70..23c08bf3db58c 100644 return 0; } EXPORT_SYMBOL_GPL(proc_dou8vec_minmax); +@@ -1258,7 +1259,7 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, + if (write) { + bool neg; + +- left -= proc_skip_spaces(&p); ++ proc_skip_spaces(&p, &left); + if (!left) + break; + @@ -1274,9 +1275,9 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, err = -EINVAL; break; @@ -363214,6 +470155,15 @@ index 083be6af29d70..23c08bf3db58c 100644 if (!first) proc_put_char(&buffer, &left, '\t'); proc_put_long(&buffer, &left, val, false); +@@ -1286,7 +1287,7 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, + if (!write && !first && left && !err) + proc_put_char(&buffer, &left, '\n'); + if (write && !err) +- left -= proc_skip_spaces(&p); ++ proc_skip_spaces(&p, &left); + if (write && first) + return err ? : -EINVAL; + *lenp -= left; @@ -1357,9 +1358,12 @@ static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp, if (write) { if (*lvalp > INT_MAX / HZ) @@ -363261,7 +470211,25 @@ index 083be6af29d70..23c08bf3db58c 100644 .extra2 = SYSCTL_ONE, }, #endif -@@ -2304,7 +2308,7 @@ static struct ctl_table kern_table[] = { +@@ -2216,17 +2220,6 @@ static struct ctl_table kern_table[] = { + .proc_handler = proc_dointvec, + }, + #endif +-#ifdef CONFIG_SMP +- { +- .procname = "oops_all_cpu_backtrace", +- .data = &sysctl_oops_all_cpu_backtrace, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = proc_dointvec_minmax, +- .extra1 = SYSCTL_ZERO, +- .extra2 = SYSCTL_ONE, +- }, +-#endif /* CONFIG_SMP */ + { + .procname = "pid_max", + .data = &pid_max, +@@ -2304,7 +2297,7 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax_sysadmin, .extra1 = SYSCTL_ZERO, @@ -363270,7 +470238,7 @@ index 083be6af29d70..23c08bf3db58c 100644 }, #endif { -@@ -2564,7 +2568,7 @@ static struct ctl_table kern_table[] = { +@@ -2564,7 +2557,7 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -363279,7 +470247,7 @@ index 083be6af29d70..23c08bf3db58c 100644 }, #endif #ifdef CONFIG_RT_MUTEXES -@@ -2626,7 +2630,7 @@ static struct ctl_table kern_table[] = { +@@ -2626,7 +2619,7 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = perf_cpu_time_max_percent_handler, .extra1 = SYSCTL_ZERO, @@ -363288,7 +470256,7 @@ index 083be6af29d70..23c08bf3db58c 100644 }, { .procname = "perf_event_max_stack", -@@ -2644,7 +2648,7 @@ static struct ctl_table kern_table[] = { +@@ -2644,7 +2637,7 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = perf_event_max_stack_handler, .extra1 = SYSCTL_ZERO, @@ -363297,7 +470265,7 @@ index 083be6af29d70..23c08bf3db58c 100644 }, #endif { -@@ -2675,7 +2679,7 @@ static struct ctl_table kern_table[] = { +@@ -2675,7 +2668,7 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = bpf_unpriv_handler, .extra1 = SYSCTL_ZERO, @@ -363306,7 +470274,7 @@ index 083be6af29d70..23c08bf3db58c 100644 }, { .procname = "bpf_stats_enabled", -@@ -2729,7 +2733,7 @@ static struct ctl_table vm_table[] = { +@@ -2729,7 +2722,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = overcommit_policy_handler, .extra1 = SYSCTL_ZERO, @@ -363315,7 +470283,7 @@ index 083be6af29d70..23c08bf3db58c 100644 }, { .procname = "panic_on_oom", -@@ -2738,7 +2742,7 @@ static struct ctl_table vm_table[] = { +@@ -2738,7 +2731,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, @@ -363324,7 +470292,7 @@ index 083be6af29d70..23c08bf3db58c 100644 }, { .procname = "oom_kill_allocating_task", -@@ -2783,7 +2787,7 @@ static struct ctl_table vm_table[] = { +@@ -2783,7 +2776,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = dirty_background_ratio_handler, .extra1 = SYSCTL_ZERO, @@ -363333,7 +470301,7 @@ index 083be6af29d70..23c08bf3db58c 100644 }, { .procname = "dirty_background_bytes", -@@ -2800,7 +2804,7 @@ static struct ctl_table vm_table[] = { +@@ -2800,7 +2793,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = dirty_ratio_handler, .extra1 = SYSCTL_ZERO, @@ -363342,13 +470310,13 @@ index 083be6af29d70..23c08bf3db58c 100644 }, { .procname = "dirty_bytes", -@@ -2840,8 +2844,19 @@ static struct ctl_table vm_table[] = { +@@ -2840,8 +2833,19 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = &two_hundred, + .extra2 = SYSCTL_TWO_HUNDRED, - }, ++ }, +#ifdef CONFIG_NUMA + { + .procname = "numa_stat", @@ -363358,12 +470326,12 @@ index 083be6af29d70..23c08bf3db58c 100644 + .proc_handler = sysctl_vm_numa_stat_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, -+ }, + }, +#endif #ifdef CONFIG_HUGETLB_PAGE { .procname = "nr_hugepages", -@@ -2858,15 +2873,6 @@ static struct ctl_table vm_table[] = { +@@ -2858,15 +2862,6 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = &hugetlb_mempolicy_sysctl_handler, }, @@ -363379,7 +470347,7 @@ index 083be6af29d70..23c08bf3db58c 100644 #endif { .procname = "hugetlb_shm_group", -@@ -2897,7 +2903,7 @@ static struct ctl_table vm_table[] = { +@@ -2897,7 +2892,7 @@ static struct ctl_table vm_table[] = { .mode = 0200, .proc_handler = drop_caches_sysctl_handler, .extra1 = SYSCTL_ONE, @@ -363388,7 +470356,7 @@ index 083be6af29d70..23c08bf3db58c 100644 }, #ifdef CONFIG_COMPACTION { -@@ -2914,7 +2920,7 @@ static struct ctl_table vm_table[] = { +@@ -2914,7 +2909,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = compaction_proactiveness_sysctl_handler, .extra1 = SYSCTL_ZERO, @@ -363397,7 +470365,7 @@ index 083be6af29d70..23c08bf3db58c 100644 }, { .procname = "extfrag_threshold", -@@ -2959,7 +2965,7 @@ static struct ctl_table vm_table[] = { +@@ -2959,7 +2954,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = watermark_scale_factor_sysctl_handler, .extra1 = SYSCTL_ONE, @@ -363406,7 +470374,7 @@ index 083be6af29d70..23c08bf3db58c 100644 }, { .procname = "percpu_pagelist_high_fraction", -@@ -3038,7 +3044,7 @@ static struct ctl_table vm_table[] = { +@@ -3038,7 +3033,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = sysctl_min_unmapped_ratio_sysctl_handler, .extra1 = SYSCTL_ZERO, @@ -363415,7 +470383,7 @@ index 083be6af29d70..23c08bf3db58c 100644 }, { .procname = "min_slab_ratio", -@@ -3047,7 +3053,7 @@ static struct ctl_table vm_table[] = { +@@ -3047,7 +3042,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = sysctl_min_slab_ratio_sysctl_handler, .extra1 = SYSCTL_ZERO, @@ -363424,7 +470392,7 @@ index 083be6af29d70..23c08bf3db58c 100644 }, #endif #ifdef CONFIG_SMP -@@ -3337,7 +3343,7 @@ static struct ctl_table fs_table[] = { +@@ -3337,7 +3332,7 @@ static struct ctl_table fs_table[] = { .mode = 0600, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, @@ -363433,7 +470401,7 @@ index 083be6af29d70..23c08bf3db58c 100644 }, { .procname = "protected_regular", -@@ -3346,7 +3352,7 @@ static struct ctl_table fs_table[] = { +@@ -3346,7 +3341,7 @@ static struct ctl_table fs_table[] = { .mode = 0600, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, @@ -363442,7 +470410,7 @@ index 083be6af29d70..23c08bf3db58c 100644 }, { .procname = "suid_dumpable", -@@ -3355,7 +3361,7 @@ static struct ctl_table fs_table[] = { +@@ -3355,7 +3350,7 @@ static struct ctl_table fs_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax_coredump, .extra1 = SYSCTL_ZERO, @@ -363451,6 +470419,61 @@ index 083be6af29d70..23c08bf3db58c 100644 }, #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE) { +diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c +index 5897828b9d7ed..7e5dff602585d 100644 +--- a/kernel/time/alarmtimer.c ++++ b/kernel/time/alarmtimer.c +@@ -470,11 +470,35 @@ u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval) + } + EXPORT_SYMBOL_GPL(alarm_forward); + +-u64 alarm_forward_now(struct alarm *alarm, ktime_t interval) ++static u64 __alarm_forward_now(struct alarm *alarm, ktime_t interval, bool throttle) + { + struct alarm_base *base = &alarm_bases[alarm->type]; ++ ktime_t now = base->get_ktime(); ++ ++ if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && throttle) { ++ /* ++ * Same issue as with posix_timer_fn(). Timers which are ++ * periodic but the signal is ignored can starve the system ++ * with a very small interval. The real fix which was ++ * promised in the context of posix_timer_fn() never ++ * materialized, but someone should really work on it. ++ * ++ * To prevent DOS fake @now to be 1 jiffie out which keeps ++ * the overrun accounting correct but creates an ++ * inconsistency vs. timer_gettime(2). ++ */ ++ ktime_t kj = NSEC_PER_SEC / HZ; ++ ++ if (interval < kj) ++ now = ktime_add(now, kj); ++ } ++ ++ return alarm_forward(alarm, now, interval); ++} + +- return alarm_forward(alarm, base->get_ktime(), interval); ++u64 alarm_forward_now(struct alarm *alarm, ktime_t interval) ++{ ++ return __alarm_forward_now(alarm, interval, false); + } + EXPORT_SYMBOL_GPL(alarm_forward_now); + +@@ -551,9 +575,10 @@ static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm, + if (posix_timer_event(ptr, si_private) && ptr->it_interval) { + /* + * Handle ignored signals and rearm the timer. This will go +- * away once we handle ignored signals proper. ++ * away once we handle ignored signals proper. Ensure that ++ * small intervals cannot starve the system. + */ +- ptr->it_overrun += alarm_forward_now(alarm, ptr->it_interval); ++ ptr->it_overrun += __alarm_forward_now(alarm, ptr->it_interval, true); + ++ptr->it_requeue_pending; + ptr->it_active = 1; + result = ALARMTIMER_RESTART; diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index b8a14d2fb5ba6..bcad1a1e5dcf1 100644 --- a/kernel/time/clocksource.c @@ -363810,6 +470833,26 @@ index e3d2c23c413d4..9dd2a39cb3b00 100644 } -EXPORT_SYMBOL(usleep_range); +EXPORT_SYMBOL(usleep_range_state); +diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig +index 420ff4bc67fd7..4265d125d50f3 100644 +--- a/kernel/trace/Kconfig ++++ b/kernel/trace/Kconfig +@@ -328,6 +328,7 @@ config SCHED_TRACER + config HWLAT_TRACER + bool "Tracer to detect hardware latencies (like SMIs)" + select GENERIC_TRACER ++ select TRACER_MAX_TRACE + help + This tracer, when enabled will create one or more kernel threads, + depending on what the cpumask file is set to, which each thread +@@ -363,6 +364,7 @@ config HWLAT_TRACER + config OSNOISE_TRACER + bool "OS Noise tracer" + select GENERIC_TRACER ++ select TRACER_MAX_TRACE + help + In the context of high-performance computing (HPC), the Operating + System Noise (osnoise) refers to the interference experienced by an diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 6de5d4d631658..bedc5caceec70 100644 --- a/kernel/trace/Makefile @@ -363823,7 +470866,7 @@ index 6de5d4d631658..bedc5caceec70 100644 obj-$(CONFIG_PREEMPTIRQ_DELAY_TEST) += preemptirq_delay_test.o obj-$(CONFIG_SYNTH_EVENT_GEN_TEST) += synth_event_gen_test.o diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c -index fa91f398f28b7..eaa98e2b468fc 100644 +index fa91f398f28b7..16b0d3fa56e00 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -310,10 +310,20 @@ record_it: @@ -363889,7 +470932,17 @@ index fa91f398f28b7..eaa98e2b468fc 100644 sizeof(r), &r, blk_trace_request_get_cgid(rq)); rcu_read_unlock(); } -@@ -1615,7 +1625,7 @@ static int blk_trace_remove_queue(struct request_queue *q) +@@ -1537,7 +1547,8 @@ blk_trace_event_print_binary(struct trace_iterator *iter, int flags, + + static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter) + { +- if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) ++ if ((iter->ent->type != TRACE_BLK) || ++ !(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) + return TRACE_TYPE_UNHANDLED; + + return print_one_line(iter, true); +@@ -1615,7 +1626,7 @@ static int blk_trace_remove_queue(struct request_queue *q) put_probe_ref(); synchronize_rcu(); @@ -363898,7 +470951,7 @@ index fa91f398f28b7..eaa98e2b468fc 100644 return 0; } -@@ -1646,7 +1656,7 @@ static int blk_trace_setup_queue(struct request_queue *q, +@@ -1646,7 +1657,7 @@ static int blk_trace_setup_queue(struct request_queue *q, return 0; free_bt: @@ -363908,7 +470961,7 @@ index fa91f398f28b7..eaa98e2b468fc 100644 } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c -index 8e2eb950aa829..c289010b0964e 100644 +index 8e2eb950aa829..b314e71a008ce 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -345,7 +345,7 @@ static const struct bpf_func_proto bpf_probe_write_user_proto = { @@ -363968,7 +471021,34 @@ index 8e2eb950aa829..c289010b0964e 100644 .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; -@@ -958,7 +958,7 @@ const struct bpf_func_proto bpf_snprintf_btf_proto = { +@@ -776,6 +776,7 @@ static void do_bpf_send_signal(struct irq_work *entry) + + work = container_of(entry, struct send_signal_irq_work, irq_work); + group_send_sig_info(work->sig, SEND_SIG_PRIV, work->task, work->type); ++ put_task_struct(work->task); + } + + static int bpf_send_signal_common(u32 sig, enum pid_type type) +@@ -793,6 +794,9 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type) + return -EPERM; + if (unlikely(!nmi_uaccess_okay())) + return -EPERM; ++ /* Task should not be pid=1 to avoid kernel panic. */ ++ if (unlikely(is_global_init(current))) ++ return -EPERM; + + if (irqs_disabled()) { + /* Do an early check on signal validity. Otherwise, +@@ -809,7 +813,7 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type) + * to the irq_work. The current task may change when queued + * irq works get executed. + */ +- work->task = current; ++ work->task = get_task_struct(current); + work->sig = sig; + work->type = type; + irq_work_queue(&work->irq_work); +@@ -958,7 +962,7 @@ const struct bpf_func_proto bpf_snprintf_btf_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_MEM, .arg2_type = ARG_CONST_SIZE, @@ -363977,7 +471057,7 @@ index 8e2eb950aa829..c289010b0964e 100644 .arg4_type = ARG_CONST_SIZE, .arg5_type = ARG_ANYTHING, }; -@@ -1037,8 +1037,6 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +@@ -1037,8 +1041,6 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_ktime_get_ns_proto; case BPF_FUNC_ktime_get_boot_ns: return &bpf_ktime_get_boot_ns_proto; @@ -363986,7 +471066,7 @@ index 8e2eb950aa829..c289010b0964e 100644 case BPF_FUNC_tail_call: return &bpf_tail_call_proto; case BPF_FUNC_get_current_pid_tgid: -@@ -1209,7 +1207,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_tp = { +@@ -1209,7 +1211,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_tp = { .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, @@ -363995,7 +471075,7 @@ index 8e2eb950aa829..c289010b0964e 100644 .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; -@@ -1324,9 +1322,6 @@ static const struct bpf_func_proto bpf_perf_prog_read_value_proto = { +@@ -1324,9 +1326,6 @@ static const struct bpf_func_proto bpf_perf_prog_read_value_proto = { BPF_CALL_4(bpf_read_branch_records, struct bpf_perf_event_data_kern *, ctx, void *, buf, u32, size, u64, flags) { @@ -364005,7 +471085,7 @@ index 8e2eb950aa829..c289010b0964e 100644 static const u32 br_entry_size = sizeof(struct perf_branch_entry); struct perf_branch_stack *br_stack = ctx->data->br_stack; u32 to_copy; -@@ -1335,7 +1330,7 @@ BPF_CALL_4(bpf_read_branch_records, struct bpf_perf_event_data_kern *, ctx, +@@ -1335,7 +1334,7 @@ BPF_CALL_4(bpf_read_branch_records, struct bpf_perf_event_data_kern *, ctx, return -EINVAL; if (unlikely(!br_stack)) @@ -364014,7 +471094,7 @@ index 8e2eb950aa829..c289010b0964e 100644 if (flags & BPF_F_GET_BRANCH_RECORDS_SIZE) return br_stack->nr * br_entry_size; -@@ -1347,7 +1342,6 @@ BPF_CALL_4(bpf_read_branch_records, struct bpf_perf_event_data_kern *, ctx, +@@ -1347,7 +1346,6 @@ BPF_CALL_4(bpf_read_branch_records, struct bpf_perf_event_data_kern *, ctx, memcpy(buf, br_stack->entries, to_copy); return to_copy; @@ -364022,7 +471102,7 @@ index 8e2eb950aa829..c289010b0964e 100644 } static const struct bpf_func_proto bpf_read_branch_records_proto = { -@@ -1435,7 +1429,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = { +@@ -1435,7 +1433,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = { .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, @@ -364031,7 +471111,7 @@ index 8e2eb950aa829..c289010b0964e 100644 .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; -@@ -1489,7 +1483,7 @@ static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = { +@@ -1489,7 +1487,7 @@ static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = { .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, @@ -364041,7 +471121,7 @@ index 8e2eb950aa829..c289010b0964e 100644 .arg4_type = ARG_ANYTHING, }; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c -index feebf57c64588..37db5bfa8edc1 100644 +index feebf57c64588..731f25a409687 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -988,8 +988,9 @@ static __init void ftrace_profile_tracefs(struct dentry *d_tracer) @@ -364056,7 +471136,15 @@ index feebf57c64588..37db5bfa8edc1 100644 if (!entry) pr_warn("Could not create tracefs 'function_profile_enabled' entry\n"); } -@@ -2900,6 +2901,16 @@ int ftrace_startup(struct ftrace_ops *ops, int command) +@@ -1294,6 +1295,7 @@ static int ftrace_add_mod(struct trace_array *tr, + if (!ftrace_mod) + return -ENOMEM; + ++ INIT_LIST_HEAD(&ftrace_mod->list); + ftrace_mod->func = kstrdup(func, GFP_KERNEL); + ftrace_mod->module = kstrdup(module, GFP_KERNEL); + ftrace_mod->enable = enable; +@@ -2900,6 +2902,16 @@ int ftrace_startup(struct ftrace_ops *ops, int command) ftrace_startup_enable(command); @@ -364073,7 +471161,53 @@ index feebf57c64588..37db5bfa8edc1 100644 ops->flags &= ~FTRACE_OPS_FL_ADDING; return 0; -@@ -4419,7 +4430,7 @@ int ftrace_func_mapper_add_ip(struct ftrace_func_mapper *mapper, +@@ -2937,18 +2949,8 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command) + command |= FTRACE_UPDATE_TRACE_FUNC; + } + +- if (!command || !ftrace_enabled) { +- /* +- * If these are dynamic or per_cpu ops, they still +- * need their data freed. Since, function tracing is +- * not currently active, we can just free them +- * without synchronizing all CPUs. +- */ +- if (ops->flags & FTRACE_OPS_FL_DYNAMIC) +- goto free_ops; +- +- return 0; +- } ++ if (!command || !ftrace_enabled) ++ goto out; + + /* + * If the ops uses a trampoline, then it needs to be +@@ -2985,6 +2987,7 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command) + removed_ops = NULL; + ops->flags &= ~FTRACE_OPS_FL_REMOVING; + ++out: + /* + * Dynamic ops may be freed, we must make sure that all + * callers are done before leaving this function. +@@ -3012,7 +3015,6 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command) + if (IS_ENABLED(CONFIG_PREEMPTION)) + synchronize_rcu_tasks(); + +- free_ops: + ftrace_trampoline_free(ops); + } + +@@ -3173,7 +3175,7 @@ static int ftrace_allocate_records(struct ftrace_page *pg, int count) + /* if we can't allocate this size, try something smaller */ + if (!order) + return -ENOMEM; +- order >>= 1; ++ order--; + goto again; + } + +@@ -4419,7 +4421,7 @@ int ftrace_func_mapper_add_ip(struct ftrace_func_mapper *mapper, * @ip: The instruction pointer address to remove the data from * * Returns the data if it is found, otherwise NULL. @@ -364082,7 +471216,7 @@ index feebf57c64588..37db5bfa8edc1 100644 * ftrace_func_mapper_find_ip(), then the return value may be meaningless, * if the data pointer was set to zero. */ -@@ -5145,8 +5156,6 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr) +@@ -5145,8 +5147,6 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr) __add_hash_entry(direct_functions, entry); ret = ftrace_set_filter_ip(&direct_ops, ip, 0, 0); @@ -364091,7 +471225,7 @@ index feebf57c64588..37db5bfa8edc1 100644 if (!ret && !(direct_ops.flags & FTRACE_OPS_FL_ENABLED)) { ret = register_ftrace_function(&direct_ops); -@@ -5155,6 +5164,7 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr) +@@ -5155,6 +5155,7 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr) } if (ret) { @@ -364099,7 +471233,7 @@ index feebf57c64588..37db5bfa8edc1 100644 kfree(entry); if (!direct->count) { list_del_rcu(&direct->next); -@@ -5644,8 +5654,12 @@ int ftrace_regex_release(struct inode *inode, struct file *file) +@@ -5644,8 +5645,12 @@ int ftrace_regex_release(struct inode *inode, struct file *file) if (filter_hash) { orig_hash = &iter->ops->func_hash->filter_hash; @@ -364114,7 +471248,7 @@ index feebf57c64588..37db5bfa8edc1 100644 } else orig_hash = &iter->ops->func_hash->notrace_hash; -@@ -6109,10 +6123,10 @@ void ftrace_create_filter_files(struct ftrace_ops *ops, +@@ -6109,10 +6114,10 @@ void ftrace_create_filter_files(struct ftrace_ops *ops, struct dentry *parent) { @@ -364127,7 +471261,7 @@ index feebf57c64588..37db5bfa8edc1 100644 ops, &ftrace_notrace_fops); } -@@ -6139,19 +6153,19 @@ void ftrace_destroy_filter_files(struct ftrace_ops *ops) +@@ -6139,19 +6144,19 @@ void ftrace_destroy_filter_files(struct ftrace_ops *ops) static __init int ftrace_init_dyn_tracefs(struct dentry *d_tracer) { @@ -364151,7 +471285,16 @@ index feebf57c64588..37db5bfa8edc1 100644 NULL, &ftrace_graph_notrace_fops); #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -@@ -7184,10 +7198,10 @@ static void clear_ftrace_pids(struct trace_array *tr, int type) +@@ -6866,7 +6871,7 @@ void __init ftrace_init(void) + } + + pr_info("ftrace: allocating %ld entries in %ld pages\n", +- count, count / ENTRIES_PER_PAGE + 1); ++ count, DIV_ROUND_UP(count, ENTRIES_PER_PAGE)); + + last_ftrace_enabled = ftrace_enabled = 1; + +@@ -7184,10 +7189,10 @@ static void clear_ftrace_pids(struct trace_array *tr, int type) synchronize_rcu(); if ((type & TRACE_PIDS) && pid_list) @@ -364164,7 +471307,7 @@ index feebf57c64588..37db5bfa8edc1 100644 } void ftrace_clear_pids(struct trace_array *tr) -@@ -7428,7 +7442,7 @@ pid_write(struct file *filp, const char __user *ubuf, +@@ -7428,7 +7433,7 @@ pid_write(struct file *filp, const char __user *ubuf, if (filtered_pids) { synchronize_rcu(); @@ -364173,7 +471316,7 @@ index feebf57c64588..37db5bfa8edc1 100644 } else if (pid_list && !other_pids) { /* Register a probe to set whether to ignore the tracing of a task */ register_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr); -@@ -7494,10 +7508,10 @@ static const struct file_operations ftrace_no_pid_fops = { +@@ -7494,10 +7499,10 @@ static const struct file_operations ftrace_no_pid_fops = { void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d_tracer) { @@ -364188,10 +471331,10 @@ index feebf57c64588..37db5bfa8edc1 100644 void __init ftrace_init_tracefs_toplevel(struct trace_array *tr, diff --git a/kernel/trace/kprobe_event_gen_test.c b/kernel/trace/kprobe_event_gen_test.c -index 18b0f1cbb947f..80e04a1e19772 100644 +index 18b0f1cbb947f..c736487fc0e48 100644 --- a/kernel/trace/kprobe_event_gen_test.c +++ b/kernel/trace/kprobe_event_gen_test.c -@@ -35,6 +35,45 @@ +@@ -35,6 +35,49 @@ static struct trace_event_file *gen_kprobe_test; static struct trace_event_file *gen_kretprobe_test; @@ -364233,11 +471376,15 @@ index 18b0f1cbb947f..80e04a1e19772 100644 +#define KPROBE_GEN_TEST_ARG3 NULL +#endif + ++static bool trace_event_file_is_valid(struct trace_event_file *input) ++{ ++ return input && !IS_ERR(input); ++} + /* * Test to make sure we can create a kprobe event, then add more * fields. -@@ -58,14 +97,14 @@ static int __init test_gen_kprobe_cmd(void) +@@ -58,23 +101,23 @@ static int __init test_gen_kprobe_cmd(void) * fields. */ ret = kprobe_event_gen_cmd_start(&cmd, "gen_kprobe_test", @@ -364246,16 +471393,45 @@ index 18b0f1cbb947f..80e04a1e19772 100644 + KPROBE_GEN_TEST_FUNC, + KPROBE_GEN_TEST_ARG0, KPROBE_GEN_TEST_ARG1); if (ret) - goto free; +- goto free; ++ goto out; /* Use kprobe_event_add_fields to add the rest of the fields */ - ret = kprobe_event_add_fields(&cmd, "flags=%cx", "mode=+4($stack)"); + ret = kprobe_event_add_fields(&cmd, KPROBE_GEN_TEST_ARG2, KPROBE_GEN_TEST_ARG3); if (ret) - goto free; +- goto free; ++ goto out; -@@ -128,7 +167,7 @@ static int __init test_gen_kretprobe_cmd(void) + /* + * This actually creates the event. + */ + ret = kprobe_event_gen_cmd_end(&cmd); + if (ret) +- goto free; ++ goto out; + + /* + * Now get the gen_kprobe_test event file. We need to prevent +@@ -97,13 +140,13 @@ static int __init test_gen_kprobe_cmd(void) + goto delete; + } + out: ++ kfree(buf); + return ret; + delete: ++ if (trace_event_file_is_valid(gen_kprobe_test)) ++ gen_kprobe_test = NULL; + /* We got an error after creating the event, delete it */ + ret = kprobe_event_delete("gen_kprobe_test"); +- free: +- kfree(buf); +- + goto out; + } + +@@ -128,17 +171,17 @@ static int __init test_gen_kretprobe_cmd(void) * Define the kretprobe event. */ ret = kretprobe_event_gen_cmd_start(&cmd, "gen_kretprobe_test", @@ -364263,16 +471439,96 @@ index 18b0f1cbb947f..80e04a1e19772 100644 + KPROBE_GEN_TEST_FUNC, "$retval"); if (ret) - goto free; -@@ -206,7 +245,7 @@ static void __exit kprobe_event_gen_test_exit(void) +- goto free; ++ goto out; + + /* + * This actually creates the event. + */ + ret = kretprobe_event_gen_cmd_end(&cmd); + if (ret) +- goto free; ++ goto out; + + /* + * Now get the gen_kretprobe_test event file. We need to +@@ -162,13 +205,13 @@ static int __init test_gen_kretprobe_cmd(void) + goto delete; + } + out: ++ kfree(buf); + return ret; + delete: ++ if (trace_event_file_is_valid(gen_kretprobe_test)) ++ gen_kretprobe_test = NULL; + /* We got an error after creating the event, delete it */ + ret = kprobe_event_delete("gen_kretprobe_test"); +- free: +- kfree(buf); +- + goto out; + } + +@@ -182,10 +225,12 @@ static int __init kprobe_event_gen_test_init(void) + + ret = test_gen_kretprobe_cmd(); + if (ret) { +- WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr, +- "kprobes", +- "gen_kretprobe_test", false)); +- trace_put_event_file(gen_kretprobe_test); ++ if (trace_event_file_is_valid(gen_kretprobe_test)) { ++ WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr, ++ "kprobes", ++ "gen_kretprobe_test", false)); ++ trace_put_event_file(gen_kretprobe_test); ++ } + WARN_ON(kprobe_event_delete("gen_kretprobe_test")); + } + +@@ -194,24 +239,30 @@ static int __init kprobe_event_gen_test_init(void) + + static void __exit kprobe_event_gen_test_exit(void) + { +- /* Disable the event or you can't remove it */ +- WARN_ON(trace_array_set_clr_event(gen_kprobe_test->tr, +- "kprobes", +- "gen_kprobe_test", false)); ++ if (trace_event_file_is_valid(gen_kprobe_test)) { ++ /* Disable the event or you can't remove it */ ++ WARN_ON(trace_array_set_clr_event(gen_kprobe_test->tr, ++ "kprobes", ++ "gen_kprobe_test", false)); ++ ++ /* Now give the file and instance back */ ++ trace_put_event_file(gen_kprobe_test); ++ } + +- /* Now give the file and instance back */ +- trace_put_event_file(gen_kprobe_test); + + /* Now unregister and free the event */ WARN_ON(kprobe_event_delete("gen_kprobe_test")); - /* Disable the event or you can't remove it */ +- /* Disable the event or you can't remove it */ - WARN_ON(trace_array_set_clr_event(gen_kprobe_test->tr, -+ WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr, - "kprobes", - "gen_kretprobe_test", false)); +- "kprobes", +- "gen_kretprobe_test", false)); ++ if (trace_event_file_is_valid(gen_kretprobe_test)) { ++ /* Disable the event or you can't remove it */ ++ WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr, ++ "kprobes", ++ "gen_kretprobe_test", false)); ++ ++ /* Now give the file and instance back */ ++ trace_put_event_file(gen_kretprobe_test); ++ } + +- /* Now give the file and instance back */ +- trace_put_event_file(gen_kretprobe_test); + /* Now unregister and free the event */ + WARN_ON(kprobe_event_delete("gen_kretprobe_test")); diff --git a/kernel/trace/pid_list.c b/kernel/trace/pid_list.c new file mode 100644 index 0000000000000..4483ef70b5626 @@ -364459,7 +471715,7 @@ index 0000000000000..80d0ecfe1536e + +#endif /* _TRACE_INTERNAL_PID_LIST_H */ diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c -index c5a3fbf19617e..90fc95e852322 100644 +index c5a3fbf19617e..ffc8696e67467 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -405,6 +405,7 @@ struct rb_irq_work { @@ -364470,7 +471726,56 @@ index c5a3fbf19617e..90fc95e852322 100644 bool waiters_pending; bool full_waiters_pending; bool wakeup_full; -@@ -881,12 +882,44 @@ static void rb_wake_up_waiters(struct irq_work *work) +@@ -509,6 +510,7 @@ struct ring_buffer_per_cpu { + local_t committing; + local_t commits; + local_t pages_touched; ++ local_t pages_lost; + local_t pages_read; + long last_pages_touch; + size_t shortest_full; +@@ -857,10 +859,18 @@ size_t ring_buffer_nr_pages(struct trace_buffer *buffer, int cpu) + size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu) + { + size_t read; ++ size_t lost; + size_t cnt; + + read = local_read(&buffer->buffers[cpu]->pages_read); ++ lost = local_read(&buffer->buffers[cpu]->pages_lost); + cnt = local_read(&buffer->buffers[cpu]->pages_touched); ++ ++ if (WARN_ON_ONCE(cnt < lost)) ++ return 0; ++ ++ cnt -= lost; ++ + /* The reader can read an empty page, but not more than that */ + if (cnt < read) { + WARN_ON_ONCE(read > cnt + 1); +@@ -870,6 +880,21 @@ size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu) + return cnt - read; + } + ++static __always_inline bool full_hit(struct trace_buffer *buffer, int cpu, int full) ++{ ++ struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; ++ size_t nr_pages; ++ size_t dirty; ++ ++ nr_pages = cpu_buffer->nr_pages; ++ if (!nr_pages || !full) ++ return true; ++ ++ dirty = ring_buffer_nr_dirty_pages(buffer, cpu); ++ ++ return (dirty * 100) > (full * nr_pages); ++} ++ + /* + * rb_wake_up_waiters - wake up tasks waiting for ring buffer input + * +@@ -881,12 +906,55 @@ static void rb_wake_up_waiters(struct irq_work *work) struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work); wake_up_all(&rbwork->waiters); @@ -364494,6 +471799,9 @@ index c5a3fbf19617e..90fc95e852322 100644 + struct ring_buffer_per_cpu *cpu_buffer; + struct rb_irq_work *rbwork; + ++ if (!buffer) ++ return; ++ + if (cpu == RING_BUFFER_ALL_CPUS) { + + /* Wake up individual ones too. One level recursion */ @@ -364502,7 +471810,15 @@ index c5a3fbf19617e..90fc95e852322 100644 + + rbwork = &buffer->irq_work; + } else { ++ if (WARN_ON_ONCE(!buffer->buffers)) ++ return; ++ if (WARN_ON_ONCE(cpu >= nr_cpu_ids)) ++ return; ++ + cpu_buffer = buffer->buffers[cpu]; ++ /* The CPU buffer may not have been initialized yet */ ++ if (!cpu_buffer) ++ return; + rbwork = &cpu_buffer->irq_work; + } + @@ -364516,7 +471832,7 @@ index c5a3fbf19617e..90fc95e852322 100644 /** * ring_buffer_wait - wait for input to the ring buffer * @buffer: buffer to wait on -@@ -902,6 +935,7 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) +@@ -902,6 +970,7 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) struct ring_buffer_per_cpu *cpu_buffer; DEFINE_WAIT(wait); struct rb_irq_work *work; @@ -364524,7 +471840,7 @@ index c5a3fbf19617e..90fc95e852322 100644 int ret = 0; /* -@@ -920,6 +954,7 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) +@@ -920,6 +989,7 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) work = &cpu_buffer->irq_work; } @@ -364532,16 +471848,32 @@ index c5a3fbf19617e..90fc95e852322 100644 while (true) { if (full) -@@ -975,7 +1010,7 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) - nr_pages = cpu_buffer->nr_pages; - dirty = ring_buffer_nr_dirty_pages(buffer, cpu); +@@ -964,26 +1034,29 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) + !ring_buffer_empty_cpu(buffer, cpu)) { + unsigned long flags; + bool pagebusy; +- size_t nr_pages; +- size_t dirty; ++ bool done; + + if (!full) + break; + + raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page; +- nr_pages = cpu_buffer->nr_pages; +- dirty = ring_buffer_nr_dirty_pages(buffer, cpu); ++ done = !pagebusy && full_hit(buffer, cpu, full); ++ if (!cpu_buffer->shortest_full || - cpu_buffer->shortest_full < full) + cpu_buffer->shortest_full > full) cpu_buffer->shortest_full = full; raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); - if (!pagebusy && -@@ -984,6 +1019,11 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) +- if (!pagebusy && +- (!nr_pages || (dirty * 100) > full * nr_pages)) ++ if (done) + break; } schedule(); @@ -364553,7 +471885,89 @@ index c5a3fbf19617e..90fc95e852322 100644 } if (full) -@@ -2572,6 +2612,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, +@@ -1000,6 +1073,7 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) + * @cpu: the cpu buffer to wait on + * @filp: the file descriptor + * @poll_table: The poll descriptor ++ * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS + * + * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon + * as data is added to any of the @buffer's cpu buffers. Otherwise +@@ -1009,14 +1083,15 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) + * zero otherwise. + */ + __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, +- struct file *filp, poll_table *poll_table) ++ struct file *filp, poll_table *poll_table, int full) + { + struct ring_buffer_per_cpu *cpu_buffer; + struct rb_irq_work *work; + +- if (cpu == RING_BUFFER_ALL_CPUS) ++ if (cpu == RING_BUFFER_ALL_CPUS) { + work = &buffer->irq_work; +- else { ++ full = 0; ++ } else { + if (!cpumask_test_cpu(cpu, buffer->cpumask)) + return -EINVAL; + +@@ -1024,8 +1099,14 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, + work = &cpu_buffer->irq_work; + } + +- poll_wait(filp, &work->waiters, poll_table); +- work->waiters_pending = true; ++ if (full) { ++ poll_wait(filp, &work->full_waiters, poll_table); ++ work->full_waiters_pending = true; ++ } else { ++ poll_wait(filp, &work->waiters, poll_table); ++ work->waiters_pending = true; ++ } ++ + /* + * There's a tight race between setting the waiters_pending and + * checking if the ring buffer is empty. Once the waiters_pending bit +@@ -1041,6 +1122,9 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, + */ + smp_mb(); + ++ if (full) ++ return full_hit(buffer, cpu, full) ? EPOLLIN | EPOLLRDNORM : 0; ++ + if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || + (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) + return EPOLLIN | EPOLLRDNORM; +@@ -1682,9 +1766,9 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) + + free_buffer_page(cpu_buffer->reader_page); + +- rb_head_page_deactivate(cpu_buffer); +- + if (head) { ++ rb_head_page_deactivate(cpu_buffer); ++ + list_for_each_entry_safe(bpage, tmp, head, list) { + list_del_init(&bpage->list); + free_buffer_page(bpage); +@@ -1920,6 +2004,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) + */ + local_add(page_entries, &cpu_buffer->overrun); + local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); ++ local_inc(&cpu_buffer->pages_lost); + } + + /* +@@ -2404,6 +2489,7 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, + */ + local_add(entries, &cpu_buffer->overrun); + local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); ++ local_inc(&cpu_buffer->pages_lost); + + /* + * The entries will be zeroed out when we move the +@@ -2572,6 +2658,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, /* Mark the rest of the page with padding */ rb_event_set_padding(event); @@ -364563,7 +471977,7 @@ index c5a3fbf19617e..90fc95e852322 100644 /* Set the write back to the previous setting */ local_sub(length, &tail_page->write); return; -@@ -2583,6 +2626,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, +@@ -2583,6 +2672,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, /* time delta must be non zero */ event->time_delta = 1; @@ -364573,7 +471987,30 @@ index c5a3fbf19617e..90fc95e852322 100644 /* Set write to end of buffer */ length = (tail + length) - BUF_PAGE_SIZE; local_sub(length, &tail_page->write); -@@ -4547,6 +4593,33 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) +@@ -3055,10 +3147,6 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, + static __always_inline void + rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) + { +- size_t nr_pages; +- size_t dirty; +- size_t full; +- + if (buffer->irq_work.waiters_pending) { + buffer->irq_work.waiters_pending = false; + /* irq_work_queue() supplies it's own memory barriers */ +@@ -3082,10 +3170,7 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) + + cpu_buffer->last_pages_touch = local_read(&cpu_buffer->pages_touched); + +- full = cpu_buffer->shortest_full; +- nr_pages = cpu_buffer->nr_pages; +- dirty = ring_buffer_nr_dirty_pages(buffer, cpu_buffer->cpu); +- if (full && nr_pages && (dirty * 100) <= full * nr_pages) ++ if (!full_hit(buffer, cpu_buffer->cpu, cpu_buffer->shortest_full)) + return; + + cpu_buffer->irq_work.wakeup_full = true; +@@ -4547,6 +4632,33 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) arch_spin_unlock(&cpu_buffer->lock); local_irq_restore(flags); @@ -364607,7 +472044,15 @@ index c5a3fbf19617e..90fc95e852322 100644 return reader; } -@@ -5233,6 +5306,9 @@ void ring_buffer_reset(struct trace_buffer *buffer) +@@ -5122,6 +5234,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) + local_set(&cpu_buffer->committing, 0); + local_set(&cpu_buffer->commits, 0); + local_set(&cpu_buffer->pages_touched, 0); ++ local_set(&cpu_buffer->pages_lost, 0); + local_set(&cpu_buffer->pages_read, 0); + cpu_buffer->last_pages_touch = 0; + cpu_buffer->shortest_full = 0; +@@ -5233,6 +5346,9 @@ void ring_buffer_reset(struct trace_buffer *buffer) struct ring_buffer_per_cpu *cpu_buffer; int cpu; @@ -364617,7 +472062,7 @@ index c5a3fbf19617e..90fc95e852322 100644 for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; -@@ -5251,6 +5327,8 @@ void ring_buffer_reset(struct trace_buffer *buffer) +@@ -5251,6 +5367,8 @@ void ring_buffer_reset(struct trace_buffer *buffer) atomic_dec(&cpu_buffer->record_disabled); atomic_dec(&cpu_buffer->resize_disabled); } @@ -364626,7 +472071,7 @@ index c5a3fbf19617e..90fc95e852322 100644 } EXPORT_SYMBOL_GPL(ring_buffer_reset); -@@ -5569,7 +5647,15 @@ int ring_buffer_read_page(struct trace_buffer *buffer, +@@ -5569,7 +5687,15 @@ int ring_buffer_read_page(struct trace_buffer *buffer, unsigned int pos = 0; unsigned int size; @@ -364643,8 +472088,50 @@ index c5a3fbf19617e..90fc95e852322 100644 goto out_unlock; if (len > (commit - read)) +diff --git a/kernel/trace/synth_event_gen_test.c b/kernel/trace/synth_event_gen_test.c +index 0b15e975d2c2c..8d77526892f45 100644 +--- a/kernel/trace/synth_event_gen_test.c ++++ b/kernel/trace/synth_event_gen_test.c +@@ -120,15 +120,13 @@ static int __init test_gen_synth_cmd(void) + + /* Now generate a gen_synth_test event */ + ret = synth_event_trace_array(gen_synth_test, vals, ARRAY_SIZE(vals)); +- out: ++ free: ++ kfree(buf); + return ret; + delete: + /* We got an error after creating the event, delete it */ + synth_event_delete("gen_synth_test"); +- free: +- kfree(buf); +- +- goto out; ++ goto free; + } + + /* +@@ -227,15 +225,13 @@ static int __init test_empty_synth_event(void) + + /* Now trace an empty_synth_test event */ + ret = synth_event_trace_array(empty_synth_test, vals, ARRAY_SIZE(vals)); +- out: ++ free: ++ kfree(buf); + return ret; + delete: + /* We got an error after creating the event, delete it */ + synth_event_delete("empty_synth_test"); +- free: +- kfree(buf); +- +- goto out; ++ goto free; + } + + static struct synth_field_desc create_synth_test_fields[] = { diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c -index bc677cd642240..24a5ea9a2cc04 100644 +index bc677cd642240..161ffc56afa3d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -235,7 +235,7 @@ static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata; @@ -364903,7 +472390,15 @@ index bc677cd642240..24a5ea9a2cc04 100644 return ret; } -@@ -1492,10 +1480,12 @@ static int __init set_buf_size(char *str) +@@ -1421,6 +1409,7 @@ int tracing_snapshot_cond_disable(struct trace_array *tr) + return false; + } + EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable); ++#define free_snapshot(tr) do { } while (0) + #endif /* CONFIG_TRACER_SNAPSHOT */ + + void tracer_tracing_off(struct trace_array *tr) +@@ -1492,10 +1481,12 @@ static int __init set_buf_size(char *str) if (!str) return 0; buf_size = memparse(str, &str); @@ -364920,7 +472415,16 @@ index bc677cd642240..24a5ea9a2cc04 100644 return 1; } __setup("trace_buf_size=", set_buf_size); -@@ -1714,7 +1704,8 @@ static void trace_create_maxlat_file(struct trace_array *tr, +@@ -1689,6 +1680,8 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) + } + + unsigned long __read_mostly tracing_thresh; ++ ++#ifdef CONFIG_TRACER_MAX_TRACE + static const struct file_operations tracing_max_lat_fops; + + #ifdef LATENCY_FS_NOTIFY +@@ -1714,7 +1707,8 @@ static void trace_create_maxlat_file(struct trace_array *tr, { INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn); init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq); @@ -364930,8 +472434,13 @@ index bc677cd642240..24a5ea9a2cc04 100644 d_tracer, &tr->max_latency, &tracing_max_lat_fops); } -@@ -1748,8 +1739,8 @@ void latency_fsnotify(struct trace_array *tr) - || defined(CONFIG_OSNOISE_TRACER) +@@ -1744,18 +1738,14 @@ void latency_fsnotify(struct trace_array *tr) + irq_work_queue(&tr->fsnotify_irqwork); + } + +-#elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) \ +- || defined(CONFIG_OSNOISE_TRACER) ++#else /* !LATENCY_FS_NOTIFY */ #define trace_create_maxlat_file(tr, d_tracer) \ - trace_create_file("tracing_max_latency", 0644, d_tracer, \ @@ -364939,9 +472448,42 @@ index bc677cd642240..24a5ea9a2cc04 100644 + trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \ + d_tracer, &tr->max_latency, &tracing_max_lat_fops) - #else - #define trace_create_maxlat_file(tr, d_tracer) do { } while (0) -@@ -2202,6 +2193,11 @@ static size_t tgid_map_max; +-#else +-#define trace_create_maxlat_file(tr, d_tracer) do { } while (0) + #endif + +-#ifdef CONFIG_TRACER_MAX_TRACE + /* + * Copy the new maximum trace into the separate maximum-trace + * structure. (this way the maximum trace is permanently saved, +@@ -1830,14 +1820,15 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, + ring_buffer_record_off(tr->max_buffer.buffer); + + #ifdef CONFIG_TRACER_SNAPSHOT +- if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) +- goto out_unlock; ++ if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) { ++ arch_spin_unlock(&tr->max_lock); ++ return; ++ } + #endif + swap(tr->array_buffer.buffer, tr->max_buffer.buffer); + + __update_max_tr(tr, tsk, cpu); + +- out_unlock: + arch_spin_unlock(&tr->max_lock); + } + +@@ -1884,6 +1875,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) + __update_max_tr(tr, tsk, cpu); + arch_spin_unlock(&tr->max_lock); + } ++ + #endif /* CONFIG_TRACER_MAX_TRACE */ + + static int wait_on_pipe(struct trace_iterator *iter, int full) +@@ -2202,6 +2194,11 @@ static size_t tgid_map_max; #define SAVED_CMDLINES_DEFAULT 128 #define NO_CMDLINE_MAP UINT_MAX @@ -364953,7 +472495,7 @@ index bc677cd642240..24a5ea9a2cc04 100644 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED; struct saved_cmdlines_buffer { unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; -@@ -2414,7 +2410,11 @@ static int trace_save_cmdline(struct task_struct *tsk) +@@ -2414,7 +2411,11 @@ static int trace_save_cmdline(struct task_struct *tsk) * the lock, but we also don't want to spin * nor do we want to disable interrupts, * so if we miss here, then better luck next time. @@ -364965,7 +472507,7 @@ index bc677cd642240..24a5ea9a2cc04 100644 if (!arch_spin_trylock(&trace_cmdline_lock)) return 0; -@@ -2833,7 +2833,7 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, +@@ -2833,7 +2834,7 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, } EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve); @@ -364974,7 +472516,7 @@ index bc677cd642240..24a5ea9a2cc04 100644 static DEFINE_MUTEX(tracepoint_printk_mutex); static void output_printk(struct trace_event_buffer *fbuffer) -@@ -2861,14 +2861,14 @@ static void output_printk(struct trace_event_buffer *fbuffer) +@@ -2861,14 +2862,14 @@ static void output_printk(struct trace_event_buffer *fbuffer) event = &fbuffer->trace_file->event_call->event; @@ -364991,7 +472533,7 @@ index bc677cd642240..24a5ea9a2cc04 100644 } int tracepoint_printk_sysctl(struct ctl_table *table, int write, -@@ -3230,7 +3230,7 @@ struct trace_buffer_struct { +@@ -3230,7 +3231,7 @@ struct trace_buffer_struct { char buffer[4][TRACE_BUF_SIZE]; }; @@ -365000,7 +472542,7 @@ index bc677cd642240..24a5ea9a2cc04 100644 /* * This allows for lockless recording. If we're nested too deeply, then -@@ -3240,7 +3240,7 @@ static char *get_trace_buf(void) +@@ -3240,7 +3241,7 @@ static char *get_trace_buf(void) { struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer); @@ -365009,7 +472551,7 @@ index bc677cd642240..24a5ea9a2cc04 100644 return NULL; buffer->nesting++; -@@ -3259,7 +3259,7 @@ static void put_trace_buf(void) +@@ -3259,7 +3260,7 @@ static void put_trace_buf(void) static int alloc_percpu_trace_buffer(void) { @@ -365018,7 +472560,7 @@ index bc677cd642240..24a5ea9a2cc04 100644 if (trace_percpu_buffer) return 0; -@@ -3671,12 +3671,17 @@ static char *trace_iter_expand_format(struct trace_iterator *iter) +@@ -3671,12 +3672,17 @@ static char *trace_iter_expand_format(struct trace_iterator *iter) } /* Returns true if the string is safe to dereference from an event */ @@ -365037,7 +472579,7 @@ index bc677cd642240..24a5ea9a2cc04 100644 /* OK if part of the event data */ if ((addr >= (unsigned long)iter->ent) && (addr < (unsigned long)iter->ent + iter->ent_size)) -@@ -3835,6 +3840,18 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt, +@@ -3835,6 +3841,18 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt, iter->fmt[i] = '\0'; trace_seq_vprintf(&iter->seq, iter->fmt, ap); @@ -365056,7 +472598,7 @@ index bc677cd642240..24a5ea9a2cc04 100644 if (star) len = va_arg(ap, int); -@@ -3850,7 +3867,7 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt, +@@ -3850,7 +3868,7 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt, * instead. See samples/trace_events/trace-events-sample.h * for reference. */ @@ -365065,7 +472607,7 @@ index bc677cd642240..24a5ea9a2cc04 100644 "fmt: '%s' current_buffer: '%s'", fmt, show_buffer(&iter->seq))) { int ret; -@@ -5859,9 +5876,11 @@ tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf, +@@ -5859,9 +5877,11 @@ tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf, char buf[64]; int r; @@ -365077,7 +472619,7 @@ index bc677cd642240..24a5ea9a2cc04 100644 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } -@@ -5886,10 +5905,12 @@ static int tracing_resize_saved_cmdlines(unsigned int val) +@@ -5886,10 +5906,12 @@ static int tracing_resize_saved_cmdlines(unsigned int val) return -ENOMEM; } @@ -365090,7 +472632,7 @@ index bc677cd642240..24a5ea9a2cc04 100644 free_saved_cmdlines_buffer(savedcmd_temp); return 0; -@@ -6077,7 +6098,7 @@ trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start, +@@ -6077,7 +6099,7 @@ trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start, static void trace_create_eval_file(struct dentry *d_tracer) { @@ -365099,7 +472641,7 @@ index bc677cd642240..24a5ea9a2cc04 100644 NULL, &tracing_eval_map_fops); } -@@ -6296,12 +6317,18 @@ static void tracing_set_nop(struct trace_array *tr) +@@ -6296,12 +6318,18 @@ static void tracing_set_nop(struct trace_array *tr) tr->current_trace = &nop_trace; } @@ -365118,7 +472660,7 @@ index bc677cd642240..24a5ea9a2cc04 100644 create_trace_option_files(tr, t); } -@@ -6336,10 +6363,12 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) +@@ -6336,10 +6364,12 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) #ifdef CONFIG_TRACER_SNAPSHOT if (t->use_max_tr) { @@ -365131,7 +472673,7 @@ index bc677cd642240..24a5ea9a2cc04 100644 if (ret) goto out; } -@@ -6370,12 +6399,12 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) +@@ -6370,12 +6400,12 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) if (tr->current_trace->reset) tr->current_trace->reset(tr); @@ -365147,7 +472689,7 @@ index bc677cd642240..24a5ea9a2cc04 100644 if (had_max_tr && !t->use_max_tr) { /* * We need to make sure that the update_max_tr sees that -@@ -6387,14 +6416,14 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) +@@ -6387,14 +6417,14 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) synchronize_rcu(); free_snapshot(tr); } @@ -365165,7 +472707,55 @@ index bc677cd642240..24a5ea9a2cc04 100644 #endif if (t->init) { -@@ -7412,10 +7441,12 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, +@@ -6507,7 +6537,7 @@ out: + return ret; + } + +-#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) ++#ifdef CONFIG_TRACER_MAX_TRACE + + static ssize_t + tracing_max_lat_read(struct file *filp, char __user *ubuf, +@@ -6601,6 +6631,7 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) + mutex_unlock(&trace_types_lock); + + free_cpumask_var(iter->started); ++ kfree(iter->fmt); + mutex_destroy(&iter->mutex); + kfree(iter); + +@@ -6625,7 +6656,7 @@ trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_tabl + return EPOLLIN | EPOLLRDNORM; + else + return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file, +- filp, poll_table); ++ filp, poll_table, iter->tr->buffer_percent); + } + + static __poll_t +@@ -6733,7 +6764,20 @@ waitagain: + + ret = print_trace_line(iter); + if (ret == TRACE_TYPE_PARTIAL_LINE) { +- /* don't print partial lines */ ++ /* ++ * If one print_trace_line() fills entire trace_seq in one shot, ++ * trace_seq_to_user() will returns -EBUSY because save_len == 0, ++ * In this case, we need to consume it, otherwise, loop will peek ++ * this event next time, resulting in an infinite loop. ++ */ ++ if (save_len == 0) { ++ iter->seq.full = 0; ++ trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n"); ++ trace_consume(iter); ++ break; ++ } ++ ++ /* In other cases, don't print partial lines */ + iter->seq.seq.len = save_len; + break; + } +@@ -7412,10 +7456,12 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, goto out; } @@ -365178,7 +472768,16 @@ index bc677cd642240..24a5ea9a2cc04 100644 if (ret) goto out; -@@ -7736,7 +7767,8 @@ static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr) +@@ -7528,7 +7574,7 @@ static const struct file_operations tracing_thresh_fops = { + .llseek = generic_file_llseek, + }; + +-#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) ++#ifdef CONFIG_TRACER_MAX_TRACE + static const struct file_operations tracing_max_lat_fops = { + .open = tracing_open_generic, + .read = tracing_max_lat_read, +@@ -7736,7 +7782,8 @@ static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr) err = kzalloc(sizeof(*err), GFP_KERNEL); if (!err) err = ERR_PTR(-ENOMEM); @@ -365188,7 +472787,7 @@ index bc677cd642240..24a5ea9a2cc04 100644 return err; } -@@ -8085,6 +8117,12 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) +@@ -8085,6 +8132,12 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) __trace_array_put(iter->tr); @@ -365201,7 +472800,7 @@ index bc677cd642240..24a5ea9a2cc04 100644 if (info->spare) ring_buffer_free_read_page(iter->array_buffer->buffer, info->spare_cpu, info->spare); -@@ -8238,6 +8276,8 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, +@@ -8238,6 +8291,8 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, /* did we read anything? */ if (!spd.nr_pages) { @@ -365210,7 +472809,7 @@ index bc677cd642240..24a5ea9a2cc04 100644 if (ret) goto out; -@@ -8245,10 +8285,21 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, +@@ -8245,10 +8300,21 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) goto out; @@ -365232,7 +472831,7 @@ index bc677cd642240..24a5ea9a2cc04 100644 goto again; } -@@ -8259,12 +8310,34 @@ out: +@@ -8259,12 +8325,34 @@ out: return ret; } @@ -365267,7 +472866,7 @@ index bc677cd642240..24a5ea9a2cc04 100644 .llseek = no_llseek, }; -@@ -8590,27 +8663,27 @@ tracing_init_tracefs_percpu(struct trace_array *tr, long cpu) +@@ -8590,27 +8678,27 @@ tracing_init_tracefs_percpu(struct trace_array *tr, long cpu) } /* per cpu trace_pipe */ @@ -365302,7 +472901,7 @@ index bc677cd642240..24a5ea9a2cc04 100644 tr, cpu, &snapshot_raw_fops); #endif } -@@ -8816,8 +8889,8 @@ create_trace_option_file(struct trace_array *tr, +@@ -8816,8 +8904,8 @@ create_trace_option_file(struct trace_array *tr, topt->opt = opt; topt->tr = tr; @@ -365313,7 +472912,7 @@ index bc677cd642240..24a5ea9a2cc04 100644 } -@@ -8892,7 +8965,7 @@ create_trace_option_core_file(struct trace_array *tr, +@@ -8892,7 +8980,7 @@ create_trace_option_core_file(struct trace_array *tr, if (!t_options) return NULL; @@ -365322,7 +472921,7 @@ index bc677cd642240..24a5ea9a2cc04 100644 (void *)&tr->trace_flags_index[index], &trace_options_core_fops); } -@@ -8953,6 +9026,8 @@ rb_simple_write(struct file *filp, const char __user *ubuf, +@@ -8953,6 +9041,8 @@ rb_simple_write(struct file *filp, const char __user *ubuf, tracer_tracing_off(tr); if (tr->current_trace->stop) tr->current_trace->stop(tr); @@ -365331,7 +472930,17 @@ index bc677cd642240..24a5ea9a2cc04 100644 } mutex_unlock(&trace_types_lock); } -@@ -9121,6 +9196,7 @@ static void __update_tracer_options(struct trace_array *tr) +@@ -8999,9 +9089,6 @@ buffer_percent_write(struct file *filp, const char __user *ubuf, + if (val > 100) + return -EINVAL; + +- if (!val) +- val = 1; +- + tr->buffer_percent = val; + + (*ppos)++; +@@ -9121,6 +9208,7 @@ static void __update_tracer_options(struct trace_array *tr) static void update_tracer_options(struct trace_array *tr) { mutex_lock(&trace_types_lock); @@ -365339,7 +472948,7 @@ index bc677cd642240..24a5ea9a2cc04 100644 __update_tracer_options(tr); mutex_unlock(&trace_types_lock); } -@@ -9417,28 +9493,28 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) +@@ -9417,28 +9505,28 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) struct trace_event_file *file; int cpu; @@ -365376,7 +472985,7 @@ index bc677cd642240..24a5ea9a2cc04 100644 tr, &tracing_total_entries_fops); trace_create_file("free_buffer", 0200, d_tracer, -@@ -9449,25 +9525,25 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) +@@ -9449,40 +9537,42 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) file = __find_event_file(tr, "ftrace", "print"); if (file && file->dir) @@ -365408,7 +473017,12 @@ index bc677cd642240..24a5ea9a2cc04 100644 tr, &buffer_percent_fops); create_trace_options_dir(tr); -@@ -9478,11 +9554,11 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) + ++#ifdef CONFIG_TRACER_MAX_TRACE + trace_create_maxlat_file(tr, d_tracer); ++#endif + + if (ftrace_create_function_files(tr, d_tracer)) MEM_FAIL(1, "Could not allocate function filter files"); #ifdef CONFIG_TRACER_SNAPSHOT @@ -365422,7 +473036,7 @@ index bc677cd642240..24a5ea9a2cc04 100644 tr, &tracing_err_log_fops); for_each_tracing_cpu(cpu) -@@ -9675,19 +9751,19 @@ static __init int tracer_init_tracefs(void) +@@ -9675,19 +9765,19 @@ static __init int tracer_init_tracefs(void) init_tracer_tracefs(&global_trace, NULL); ftrace_init_tracefs_toplevel(&global_trace, NULL); @@ -365447,7 +473061,7 @@ index bc677cd642240..24a5ea9a2cc04 100644 NULL, &tracing_saved_tgids_fops); trace_eval_init(); -@@ -9699,7 +9775,7 @@ static __init int tracer_init_tracefs(void) +@@ -9699,7 +9789,7 @@ static __init int tracer_init_tracefs(void) #endif #ifdef CONFIG_DYNAMIC_FTRACE @@ -365456,7 +473070,7 @@ index bc677cd642240..24a5ea9a2cc04 100644 NULL, &tracing_dyn_info_fops); #endif -@@ -9799,6 +9875,12 @@ void trace_init_global_iter(struct trace_iterator *iter) +@@ -9799,6 +9889,12 @@ void trace_init_global_iter(struct trace_iterator *iter) /* Output in nanoseconds only if we are using a clock in nanoseconds. */ if (trace_clocks[iter->tr->clock_id].in_ns) iter->iter_flags |= TRACE_FILE_TIME_IN_NS; @@ -365469,7 +473083,7 @@ index bc677cd642240..24a5ea9a2cc04 100644 } void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) -@@ -9831,11 +9913,6 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) +@@ -9831,11 +9927,6 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) /* Simulate the iterator */ trace_init_global_iter(&iter); @@ -365481,8 +473095,17 @@ index bc677cd642240..24a5ea9a2cc04 100644 for_each_tracing_cpu(cpu) { atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); +@@ -10123,6 +10214,8 @@ void __init early_trace_init(void) + static_key_enable(&tracepoint_printk_key.key); + } + tracer_alloc_buffers(); ++ ++ init_events(); + } + + void __init trace_init(void) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h -index b7c0f8e160fb4..28ea6c0be4953 100644 +index b7c0f8e160fb4..66b6c8395fbc1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -22,11 +22,16 @@ @@ -365531,7 +473154,32 @@ index b7c0f8e160fb4..28ea6c0be4953 100644 enum { TRACE_PIDS = BIT(0), -@@ -1357,14 +1369,26 @@ __event_trigger_test_discard(struct trace_event_file *file, +@@ -297,8 +309,7 @@ struct trace_array { + struct array_buffer max_buffer; + bool allocated_snapshot; + #endif +-#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) \ +- || defined(CONFIG_OSNOISE_TRACER) ++#ifdef CONFIG_TRACER_MAX_TRACE + unsigned long max_latency; + #ifdef CONFIG_FSNOTIFY + struct dentry *d_max_latency; +@@ -676,12 +687,11 @@ void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, + void *cond_data); + void update_max_tr_single(struct trace_array *tr, + struct task_struct *tsk, int cpu); +-#endif /* CONFIG_TRACER_MAX_TRACE */ + +-#if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) \ +- || defined(CONFIG_OSNOISE_TRACER)) && defined(CONFIG_FSNOTIFY) ++#ifdef CONFIG_FSNOTIFY + #define LATENCY_FS_NOTIFY + #endif ++#endif /* CONFIG_TRACER_MAX_TRACE */ + + #ifdef LATENCY_FS_NOTIFY + void latency_fsnotify(struct trace_array *tr); +@@ -1357,14 +1367,26 @@ __event_trigger_test_discard(struct trace_event_file *file, if (eflags & EVENT_FILE_FL_TRIGGER_COND) *tt = event_triggers_call(file, buffer, entry, event); @@ -365564,6 +473212,48 @@ index b7c0f8e160fb4..28ea6c0be4953 100644 } /** +@@ -1478,6 +1500,7 @@ extern void trace_event_enable_cmd_record(bool enable); + extern void trace_event_enable_tgid_record(bool enable); + + extern int event_trace_init(void); ++extern int init_events(void); + extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr); + extern int event_trace_del_tracer(struct trace_array *tr); + extern void __trace_early_add_events(struct trace_array *tr); +@@ -1917,17 +1940,30 @@ static __always_inline void trace_iterator_reset(struct trace_iterator *iter) + } + + /* Check the name is good for event/group/fields */ +-static inline bool is_good_name(const char *name) ++static inline bool __is_good_name(const char *name, bool hash_ok) + { +- if (!isalpha(*name) && *name != '_') ++ if (!isalpha(*name) && *name != '_' && (!hash_ok || *name != '-')) + return false; + while (*++name != '\0') { +- if (!isalpha(*name) && !isdigit(*name) && *name != '_') ++ if (!isalpha(*name) && !isdigit(*name) && *name != '_' && ++ (!hash_ok || *name != '-')) + return false; + } + return true; + } + ++/* Check the name is good for event/group/fields */ ++static inline bool is_good_name(const char *name) ++{ ++ return __is_good_name(name, false); ++} ++ ++/* Check the name is good for system */ ++static inline bool is_good_system_name(const char *name) ++{ ++ return __is_good_name(name, true); ++} ++ + /* Convert certain expected symbols into '_' when generating event names */ + static inline void sanitize_event_name(char *name) + { diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 8d252f63cd784..778200dd8edea 100644 --- a/kernel/trace/trace_boot.c @@ -365596,10 +473286,26 @@ index 8d252f63cd784..778200dd8edea 100644 pr_err("Failed to apply hist trigger: %s\n", tmp); kfree(tmp); diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c -index 1110112e55bd7..e34e8182ee4b5 100644 +index 1110112e55bd7..d4f7137233234 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c -@@ -262,7 +262,7 @@ static __init int init_dynamic_event(void) +@@ -118,6 +118,7 @@ int dyn_event_release(const char *raw_command, struct dyn_event_operations *type + if (ret) + break; + } ++ tracing_reset_all_online_cpus(); + mutex_unlock(&event_mutex); + out: + argv_free(argv); +@@ -214,6 +215,7 @@ int dyn_events_release_all(struct dyn_event_operations *type) + break; + } + out: ++ tracing_reset_all_online_cpus(); + mutex_unlock(&event_mutex); + + return ret; +@@ -262,7 +264,7 @@ static __init int init_dynamic_event(void) if (ret) return 0; @@ -365609,7 +473315,7 @@ index 1110112e55bd7..e34e8182ee4b5 100644 /* Event list interface */ diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c -index 928867f527e70..399e6d0663489 100644 +index 928867f527e70..9806316af1279 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -16,6 +16,7 @@ @@ -365834,7 +473540,20 @@ index 928867f527e70..399e6d0663489 100644 } static nokprobe_inline int -@@ -849,6 +871,10 @@ static int trace_eprobe_tp_update_arg(struct trace_eprobe *ep, const char *argv[ +@@ -545,6 +567,12 @@ static void eprobe_trigger_func(struct event_trigger_data *data, + { + struct eprobe_data *edata = data->private_data; + ++ if (unlikely(!rec)) ++ return; ++ ++ if (unlikely(!rec)) ++ return; ++ + __eprobe_trace_func(edata, rec); + } + +@@ -849,6 +877,10 @@ static int trace_eprobe_tp_update_arg(struct trace_eprobe *ep, const char *argv[ if (ep->tp.args[i].code->op == FETCH_OP_TP_ARG) ret = trace_eprobe_tp_arg_update(ep, i); @@ -365909,7 +473628,7 @@ index 6aed10e2f7ce0..083f648e32650 100644 /* diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c -index 830b3b9940f4c..c84c94334a606 100644 +index 830b3b9940f4c..1aadc9a6487b5 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -40,6 +40,14 @@ static LIST_HEAD(ftrace_generic_fields); @@ -366144,7 +473863,32 @@ index 830b3b9940f4c..c84c94334a606 100644 file->event_call = call; file->tr = tr; atomic_set(&file->sm_ref, 0); -@@ -2840,6 +2938,7 @@ static void trace_module_add_events(struct module *mod) +@@ -2776,7 +2874,10 @@ static int probe_remove_event_call(struct trace_event_call *call) + * TRACE_REG_UNREGISTER. + */ + if (file->flags & EVENT_FILE_FL_ENABLED) +- return -EBUSY; ++ goto busy; ++ ++ if (file->flags & EVENT_FILE_FL_WAS_ENABLED) ++ tr->clear_trace = true; + /* + * The do_for_each_event_file_safe() is + * a double loop. After finding the call for this +@@ -2789,6 +2890,12 @@ static int probe_remove_event_call(struct trace_event_call *call) + __trace_remove_event_call(call); + + return 0; ++ busy: ++ /* No need to clear the trace now */ ++ list_for_each_entry(tr, &ftrace_trace_arrays, list) { ++ tr->clear_trace = false; ++ } ++ return -EBUSY; + } + + /* Remove an event_call */ +@@ -2840,6 +2947,7 @@ static void trace_module_add_events(struct module *mod) static void trace_module_remove_events(struct module *mod) { struct trace_event_call *call, *p; @@ -366152,7 +473896,7 @@ index 830b3b9940f4c..c84c94334a606 100644 down_write(&trace_event_sem); list_for_each_entry_safe(call, p, &ftrace_events, list) { -@@ -2848,6 +2947,14 @@ static void trace_module_remove_events(struct module *mod) +@@ -2848,6 +2956,14 @@ static void trace_module_remove_events(struct module *mod) if (call->module == mod) __trace_remove_event_call(call); } @@ -366167,7 +473911,7 @@ index 830b3b9940f4c..c84c94334a606 100644 up_write(&trace_event_sem); /* -@@ -3433,7 +3540,7 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) +@@ -3433,7 +3549,7 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) struct dentry *d_events; struct dentry *entry; @@ -366176,7 +473920,7 @@ index 830b3b9940f4c..c84c94334a606 100644 tr, &ftrace_set_event_fops); if (!entry) { pr_warn("Could not create tracefs 'set_event' entry\n"); -@@ -3446,7 +3553,7 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) +@@ -3446,7 +3562,7 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) return -ENOMEM; } @@ -366185,7 +473929,7 @@ index 830b3b9940f4c..c84c94334a606 100644 tr, &ftrace_tr_enable_fops); if (!entry) { pr_warn("Could not create tracefs 'enable' entry\n"); -@@ -3455,24 +3562,25 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) +@@ -3455,24 +3571,25 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) /* There are not as crucial, just warn if they are not created */ @@ -366216,7 +473960,7 @@ index 830b3b9940f4c..c84c94334a606 100644 ring_buffer_print_entry_header, &ftrace_show_header_fops); if (!entry) -@@ -3689,8 +3797,8 @@ __init int event_trace_init(void) +@@ -3689,8 +3806,8 @@ __init int event_trace_init(void) if (!tr) return -ENODEV; @@ -366396,10 +474140,28 @@ index c9124038b140f..06d6318ee5377 100644 enum { diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c -index f01e442716e2f..d5c7b9a37ed53 100644 +index f01e442716e2f..aaf779ee68a6a 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c -@@ -1733,9 +1733,10 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data, +@@ -452,7 +452,7 @@ struct action_data { + * event param, and is passed to the synthetic event + * invocation. + */ +- unsigned int var_ref_idx[TRACING_MAP_VARS_MAX]; ++ unsigned int var_ref_idx[SYNTH_FIELDS_MAX]; + struct synth_event *synth_event; + bool use_trace_keyword; + char *synth_event_name; +@@ -1699,6 +1699,8 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data, + hist_field->fn = flags & HIST_FIELD_FL_LOG2 ? hist_field_log2 : + hist_field_bucket; + hist_field->operands[0] = create_hist_field(hist_data, field, fl, NULL); ++ if (!hist_field->operands[0]) ++ goto free; + hist_field->size = hist_field->operands[0]->size; + hist_field->type = kstrdup_const(hist_field->operands[0]->type, GFP_KERNEL); + if (!hist_field->type) +@@ -1733,9 +1735,10 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data, if (!hist_field->type) goto free; @@ -366412,7 +474174,7 @@ index f01e442716e2f..d5c7b9a37ed53 100644 hist_field->fn = hist_field_dynstring; else hist_field->fn = hist_field_pstring; -@@ -1837,8 +1838,11 @@ static int init_var_ref(struct hist_field *ref_field, +@@ -1837,8 +1840,11 @@ static int init_var_ref(struct hist_field *ref_field, return err; free: kfree(ref_field->system); @@ -366424,7 +474186,18 @@ index f01e442716e2f..d5c7b9a37ed53 100644 goto out; } -@@ -2048,9 +2052,9 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, +@@ -1891,7 +1897,9 @@ static struct hist_field *create_var_ref(struct hist_trigger_data *hist_data, + return ref_field; + } + } +- ++ /* Sanity check to avoid out-of-bound write on 'hist_data->var_refs' */ ++ if (hist_data->n_var_refs >= TRACING_MAP_VARS_MAX) ++ return NULL; + ref_field = create_hist_field(var_field->hist_data, NULL, flags, NULL); + if (ref_field) { + if (init_var_ref(ref_field, var_field, system, event_name)) { +@@ -2048,9 +2056,9 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, /* * For backward compatibility, if field_name * was "cpu", then we treat this the same as @@ -366436,7 +474209,7 @@ index f01e442716e2f..d5c7b9a37ed53 100644 *flags |= HIST_FIELD_FL_CPU; } else { hist_err(tr, HIST_ERR_FIELD_NOT_FOUND, -@@ -2219,6 +2223,8 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, +@@ -2219,6 +2227,8 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, (HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS); expr->fn = hist_field_unary_minus; expr->operands[0] = operand1; @@ -366445,7 +474218,7 @@ index f01e442716e2f..d5c7b9a37ed53 100644 expr->operator = FIELD_OP_UNARY_MINUS; expr->name = expr_str(expr, 0); expr->type = kstrdup_const(operand1->type, GFP_KERNEL); -@@ -2358,6 +2364,7 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, +@@ -2358,6 +2368,7 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, /* The operand sizes should be the same, so just pick one */ expr->size = operand1->size; @@ -366453,7 +474226,7 @@ index f01e442716e2f..d5c7b9a37ed53 100644 expr->operator = field_op; expr->name = expr_str(expr, 0); -@@ -2690,8 +2697,10 @@ static inline void __update_field_vars(struct tracing_map_elt *elt, +@@ -2690,8 +2701,10 @@ static inline void __update_field_vars(struct tracing_map_elt *elt, if (val->flags & HIST_FIELD_FL_STRING) { char *str = elt_data->field_var_str[j++]; char *val_str = (char *)(uintptr_t)var_val; @@ -366465,7 +474238,15 @@ index f01e442716e2f..d5c7b9a37ed53 100644 var_val = (u64)(uintptr_t)str; } tracing_map_set_var(elt, var_idx, var_val); -@@ -3416,7 +3425,7 @@ static int check_synth_field(struct synth_event *event, +@@ -3179,6 +3192,7 @@ static int parse_action_params(struct trace_array *tr, char *params, + while (params) { + if (data->n_params >= SYNTH_FIELDS_MAX) { + hist_err(tr, HIST_ERR_TOO_MANY_PARAMS, 0); ++ ret = -EINVAL; + goto out; + } + +@@ -3416,7 +3430,7 @@ static int check_synth_field(struct synth_event *event, if (strcmp(field->type, hist_field->type) != 0) { if (field->size != hist_field->size || @@ -366474,7 +474255,18 @@ index f01e442716e2f..d5c7b9a37ed53 100644 return -EINVAL; } -@@ -3578,6 +3587,7 @@ static int trace_action_create(struct hist_trigger_data *hist_data, +@@ -3515,6 +3529,10 @@ static int trace_action_create(struct hist_trigger_data *hist_data, + + lockdep_assert_held(&event_mutex); + ++ /* Sanity check to avoid out-of-bound write on 'data->var_ref_idx' */ ++ if (data->n_params > SYNTH_FIELDS_MAX) ++ return -EINVAL; ++ + if (data->use_trace_keyword) + synth_event_name = data->synth_event_name; + else +@@ -3578,6 +3596,7 @@ static int trace_action_create(struct hist_trigger_data *hist_data, var_ref_idx = find_var_ref_idx(hist_data, var_ref); if (WARN_ON(var_ref_idx < 0)) { @@ -366482,7 +474274,7 @@ index f01e442716e2f..d5c7b9a37ed53 100644 ret = var_ref_idx; goto err; } -@@ -4046,6 +4056,8 @@ static int parse_var_defs(struct hist_trigger_data *hist_data) +@@ -4046,6 +4065,8 @@ static int parse_var_defs(struct hist_trigger_data *hist_data) s = kstrdup(field_str, GFP_KERNEL); if (!s) { @@ -366491,7 +474283,7 @@ index f01e442716e2f..d5c7b9a37ed53 100644 ret = -ENOMEM; goto free; } -@@ -4471,7 +4483,7 @@ static int create_tracing_map_fields(struct hist_trigger_data *hist_data) +@@ -4471,7 +4492,7 @@ static int create_tracing_map_fields(struct hist_trigger_data *hist_data) if (hist_field->flags & HIST_FIELD_FL_STACKTRACE) cmp_fn = tracing_map_cmp_none; @@ -366500,7 +474292,7 @@ index f01e442716e2f..d5c7b9a37ed53 100644 cmp_fn = tracing_map_cmp_num(hist_field->size, hist_field->is_signed); else if (is_string_field(field)) -@@ -4578,6 +4590,7 @@ static void hist_trigger_elt_update(struct hist_trigger_data *hist_data, +@@ -4578,6 +4599,7 @@ static void hist_trigger_elt_update(struct hist_trigger_data *hist_data, if (hist_field->flags & HIST_FIELD_FL_STRING) { unsigned int str_start, var_str_idx, idx; char *str, *val_str; @@ -366508,7 +474300,7 @@ index f01e442716e2f..d5c7b9a37ed53 100644 str_start = hist_data->n_field_var_str + hist_data->n_save_var_str; -@@ -4586,7 +4599,9 @@ static void hist_trigger_elt_update(struct hist_trigger_data *hist_data, +@@ -4586,7 +4608,9 @@ static void hist_trigger_elt_update(struct hist_trigger_data *hist_data, str = elt_data->field_var_str[idx]; val_str = (char *)(uintptr_t)hist_val; @@ -366519,8 +474311,27 @@ index f01e442716e2f..d5c7b9a37ed53 100644 hist_val = (u64)(uintptr_t)str; } +@@ -4662,6 +4686,9 @@ static void event_hist_trigger(struct event_trigger_data *data, + void *key = NULL; + unsigned int i; + ++ if (unlikely(!rbe)) ++ return; ++ + memset(compound_key, 0, hist_data->key_size); + + for_each_hist_key_field(i, hist_data) { +@@ -5936,7 +5963,7 @@ enable: + /* Just return zero, not the number of registered triggers */ + ret = 0; + out: +- if (ret == 0) ++ if (ret == 0 && glob[0]) + hist_err_clear(); + + return ret; diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c -index d54094b7a9d75..bc7dd0653b05f 100644 +index d54094b7a9d75..2fdf3fd591e18 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -17,6 +17,8 @@ @@ -366582,7 +474393,34 @@ index d54094b7a9d75..bc7dd0653b05f 100644 fields_size += len; } -@@ -2053,6 +2064,13 @@ static int create_synth_event(const char *raw_command) +@@ -809,10 +820,9 @@ static int register_synth_event(struct synth_event *event) + } + + ret = set_synth_event_print_fmt(call); +- if (ret < 0) { ++ /* unregister_trace_event() will be called inside */ ++ if (ret < 0) + trace_remove_event_call(call); +- goto err; +- } + out: + return ret; + err: +@@ -1265,12 +1275,12 @@ static int __create_synth_event(const char *name, const char *raw_fields) + goto err; + } + +- fields[n_fields++] = field; + if (n_fields == SYNTH_FIELDS_MAX) { + synth_err(SYNTH_ERR_TOO_MANY_FIELDS, 0); + ret = -EINVAL; + goto err; + } ++ fields[n_fields++] = field; + + n_fields_this_loop++; + } +@@ -2053,6 +2063,13 @@ static int create_synth_event(const char *raw_command) last_cmd_set(raw_command); @@ -366596,7 +474434,7 @@ index d54094b7a9d75..bc7dd0653b05f 100644 p = strpbrk(raw_command, " \t"); if (!p) { synth_err(SYNTH_ERR_INVALID_CMD, 0); -@@ -2061,12 +2079,6 @@ static int create_synth_event(const char *raw_command) +@@ -2061,12 +2078,6 @@ static int create_synth_event(const char *raw_command) fields = skip_spaces(p); @@ -366609,7 +474447,7 @@ index d54094b7a9d75..bc7dd0653b05f 100644 /* This interface accepts group name prefix */ if (strchr(name, '/')) { len = str_has_prefix(name, SYNTH_SYSTEM "/"); -@@ -2227,8 +2239,8 @@ static __init int trace_events_synth_init(void) +@@ -2227,8 +2238,8 @@ static __init int trace_events_synth_init(void) if (err) goto err; @@ -366952,9 +474790,36 @@ index 3a64ba4bbad6f..e755e09805ab1 100644 if (!entry) pr_warn("Could not create tracefs 'kprobe_profile' entry\n"); diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c -index ce053619f289e..6ef1164c04409 100644 +index ce053619f289e..90c4f70dc9fdf 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c +@@ -730,7 +730,7 @@ void osnoise_trace_irq_entry(int id) + void osnoise_trace_irq_exit(int id, const char *desc) + { + struct osnoise_variables *osn_var = this_cpu_osn_var(); +- int duration; ++ s64 duration; + + if (!osn_var->sampling) + return; +@@ -861,7 +861,7 @@ static void trace_softirq_entry_callback(void *data, unsigned int vec_nr) + static void trace_softirq_exit_callback(void *data, unsigned int vec_nr) + { + struct osnoise_variables *osn_var = this_cpu_osn_var(); +- int duration; ++ s64 duration; + + if (!osn_var->sampling) + return; +@@ -969,7 +969,7 @@ thread_entry(struct osnoise_variables *osn_var, struct task_struct *t) + static void + thread_exit(struct osnoise_variables *osn_var, struct task_struct *t) + { +- int duration; ++ s64 duration; + + if (!osn_var->sampling) + return; @@ -1195,6 +1195,26 @@ static int run_osnoise(void) osnoise_stop_tracing(); } @@ -367150,6 +475015,24 @@ index ce053619f289e..6ef1164c04409 100644 osnoise_busy = false; } +diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c +index c2ca40e8595be..6b4d3f3abdae2 100644 +--- a/kernel/trace/trace_output.c ++++ b/kernel/trace/trace_output.c +@@ -1569,7 +1569,7 @@ static struct trace_event *events[] __initdata = { + NULL + }; + +-__init static int init_events(void) ++__init int init_events(void) + { + struct trace_event *event; + int i, ret; +@@ -1587,4 +1587,3 @@ __init static int init_events(void) + + return 0; + } +-early_initcall(init_events); diff --git a/kernel/trace/trace_preemptirq.c b/kernel/trace/trace_preemptirq.c index f4938040c2286..1e130da1b742c 100644 --- a/kernel/trace/trace_preemptirq.c @@ -367196,9 +475079,18 @@ index 4b320fe7df704..29f6e95439b67 100644 return 0; diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c -index 3ed2a3f372972..2bbe4a7c6a2b6 100644 +index 3ed2a3f372972..cb8f9fe5669ad 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c +@@ -246,7 +246,7 @@ int traceprobe_parse_event_name(const char **pevent, const char **pgroup, + return -EINVAL; + } + strlcpy(buf, event, slash - event + 1); +- if (!is_good_name(buf)) { ++ if (!is_good_system_name(buf)) { + trace_probe_log_err(offset, BAD_GROUP_NAME); + return -EINVAL; + } @@ -279,7 +279,14 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t, int ret = 0; int len; @@ -368240,7 +476132,7 @@ index 5e7165e6a346c..baa977e003b76 100644 tristate "CRC-CCITT functions" help diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug -index 2a9b6dcdac4ff..1699b21245586 100644 +index 2a9b6dcdac4ff..dbbd243c865f0 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -208,6 +208,11 @@ config DEBUG_BUGVERBOSE @@ -368263,16 +476155,40 @@ index 2a9b6dcdac4ff..1699b21245586 100644 help The implicit default version of DWARF debug info produced by a toolchain changes over time. -@@ -295,7 +301,7 @@ config DEBUG_INFO_DWARF4 +@@ -295,8 +301,8 @@ config DEBUG_INFO_DWARF4 config DEBUG_INFO_DWARF5 bool "Generate DWARF Version 5 debuginfo" - depends on !CC_IS_CLANG || (CC_IS_CLANG && (AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502))) +- depends on !DEBUG_INFO_BTF + depends on !CC_IS_CLANG || AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502 && AS_HAS_NON_CONST_LEB128) - depends on !DEBUG_INFO_BTF ++ depends on !DEBUG_INFO_BTF || PAHOLE_VERSION >= 121 help Generate DWARF v5 debug info. Requires binutils 2.35.2, gcc 5.0+ (gcc -@@ -414,7 +420,8 @@ config SECTION_MISMATCH_WARN_ONLY + 5.0+ accepts the -gdwarf-5 flag but only had partial support for some +@@ -322,7 +328,7 @@ config DEBUG_INFO_BTF + DWARF type info into equivalent deduplicated BTF type info. + + config PAHOLE_HAS_SPLIT_BTF +- def_bool $(success, test `$(PAHOLE) --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/'` -ge "119") ++ def_bool PAHOLE_VERSION >= 119 + + config DEBUG_INFO_BTF_MODULES + def_bool y +@@ -346,8 +352,10 @@ config FRAME_WARN + int "Warn for stack frames larger than" + range 0 8192 + default 2048 if GCC_PLUGIN_LATENT_ENTROPY +- default 1536 if (!64BIT && (PARISC || XTENSA)) +- default 1024 if (!64BIT && !PARISC) ++ default 2048 if PARISC ++ default 1536 if (!64BIT && XTENSA) ++ default 1280 if KASAN && !64BIT ++ default 1024 if !64BIT + default 2048 if 64BIT + help + Tell gcc to warn at build time for stack frames larger than this. +@@ -414,7 +422,8 @@ config SECTION_MISMATCH_WARN_ONLY If unsure, say Y. config DEBUG_FORCE_FUNCTION_ALIGN_64B @@ -368282,7 +476198,7 @@ index 2a9b6dcdac4ff..1699b21245586 100644 help There are cases that a commit from one domain changes the function address alignment of other domains, and cause magic performance -@@ -1558,8 +1565,7 @@ config WARN_ALL_UNSEEDED_RANDOM +@@ -1558,8 +1567,7 @@ config WARN_ALL_UNSEEDED_RANDOM so architecture maintainers really need to do what they can to get the CRNG seeded sooner after the system is booted. However, since users cannot do anything actionable to @@ -368292,6 +476208,22 @@ index 2a9b6dcdac4ff..1699b21245586 100644 Say Y here if you want to receive warnings for all uses of unseeded randomness. This will be of use primarily for +@@ -1866,8 +1874,14 @@ config NETDEV_NOTIFIER_ERROR_INJECT + If unsure, say N. + + config FUNCTION_ERROR_INJECTION +- def_bool y ++ bool "Fault-injections of functions" + depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES ++ help ++ Add fault injections into various functions that are annotated with ++ ALLOW_ERROR_INJECTION() in the kernel. BPF may also modify the return ++ value of theses functions. This is useful to test error paths of code. ++ ++ If unsure, say N + + config FAULT_INJECTION + bool "Fault-injection framework" diff --git a/lib/Kconfig.kfence b/lib/Kconfig.kfence index e641add339475..912f252a41fc6 100644 --- a/lib/Kconfig.kfence @@ -368783,6 +476715,48 @@ index 633b59fed9db8..284e62576d0c6 100644 0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7, 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05, 0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3, +diff --git a/lib/debugobjects.c b/lib/debugobjects.c +index 6946f8e204e39..793c31b7e417d 100644 +--- a/lib/debugobjects.c ++++ b/lib/debugobjects.c +@@ -440,6 +440,7 @@ static int object_cpu_offline(unsigned int cpu) + struct debug_percpu_free *percpu_pool; + struct hlist_node *tmp; + struct debug_obj *obj; ++ unsigned long flags; + + /* Remote access is safe as the CPU is dead already */ + percpu_pool = per_cpu_ptr(&percpu_obj_pool, cpu); +@@ -447,6 +448,12 @@ static int object_cpu_offline(unsigned int cpu) + hlist_del(&obj->node); + kmem_cache_free(obj_cache, obj); + } ++ ++ raw_spin_lock_irqsave(&pool_lock, flags); ++ obj_pool_used -= percpu_pool->obj_free; ++ debug_objects_freed += percpu_pool->obj_free; ++ raw_spin_unlock_irqrestore(&pool_lock, flags); ++ + percpu_pool->obj_free = 0; + + return 0; +@@ -1321,6 +1328,8 @@ static int __init debug_objects_replace_static_objects(void) + hlist_add_head(&obj->node, &objects); + } + ++ debug_objects_allocated += i; ++ + /* + * debug_objects_mem_init() is now called early that only one CPU is up + * and interrupts have been disabled, so it is safe to replace the +@@ -1389,6 +1398,7 @@ void __init debug_objects_mem_init(void) + debug_objects_enabled = 0; + kmem_cache_destroy(obj_cache); + pr_warn("out of memory.\n"); ++ return; + } else + debug_objects_selftest(); + diff --git a/lib/decompress_unxz.c b/lib/decompress_unxz.c index a2f38e23004aa..f7a3dc13316a3 100644 --- a/lib/decompress_unxz.c @@ -368962,6 +476936,21 @@ index cb5abb42c16a2..2ca56c22a169e 100644 /* * File_ops->write method for <debugfs>/dynamic_debug/control. Gathers the * command text from userspace, parses and executes it. +diff --git a/lib/fonts/fonts.c b/lib/fonts/fonts.c +index 5f4b07b56cd9c..9738664386088 100644 +--- a/lib/fonts/fonts.c ++++ b/lib/fonts/fonts.c +@@ -135,8 +135,8 @@ const struct font_desc *get_default_font(int xres, int yres, u32 font_w, + if (res > 20) + c += 20 - res; + +- if ((font_w & (1 << (f->width - 1))) && +- (font_h & (1 << (f->height - 1)))) ++ if ((font_w & (1U << (f->width - 1))) && ++ (font_h & (1U << (f->height - 1)))) + c += 1000; + + if (c > cc) { diff --git a/lib/hexdump.c b/lib/hexdump.c index 9301578f98e8c..06833d404398d 100644 --- a/lib/hexdump.c @@ -369474,6 +477463,18 @@ index 7ac845f65be56..133929e0ce8ff 100644 if (!block_transition) { /* * Serialize output: print all messages from the work +diff --git a/lib/lockref.c b/lib/lockref.c +index 5b34bbd3eba81..81ac5f3552428 100644 +--- a/lib/lockref.c ++++ b/lib/lockref.c +@@ -24,7 +24,6 @@ + } \ + if (!--retry) \ + break; \ +- cpu_relax(); \ + } \ + } while (0) + diff --git a/lib/logic_iomem.c b/lib/logic_iomem.c index 9bdfde0c0f86d..e7ea9b28d8db5 100644 --- a/lib/logic_iomem.c @@ -369778,6 +477779,34 @@ index 47bc59edd4ff9..54fcc01564d9d 100644 if (copy) { ctx->m = mpi_copy(m); +diff --git a/lib/nlattr.c b/lib/nlattr.c +index 86029ad5ead4f..73635bdb00620 100644 +--- a/lib/nlattr.c ++++ b/lib/nlattr.c +@@ -10,6 +10,7 @@ + #include <linux/kernel.h> + #include <linux/errno.h> + #include <linux/jiffies.h> ++#include <linux/nospec.h> + #include <linux/skbuff.h> + #include <linux/string.h> + #include <linux/types.h> +@@ -369,6 +370,7 @@ static int validate_nla(const struct nlattr *nla, int maxtype, + if (type <= 0 || type > maxtype) + return 0; + ++ type = array_index_nospec(type, maxtype + 1); + pt = &policy[type]; + + BUG_ON(pt->type > NLA_TYPE_MAX); +@@ -584,6 +586,7 @@ static int __nla_validate_parse(const struct nlattr *head, int len, int maxtype, + } + continue; + } ++ type = array_index_nospec(type, maxtype + 1); + if (policy) { + int err = validate_nla(nla, maxtype, policy, + validate, extack, depth); diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c index f9e89001b52eb..199ab201d5019 100644 --- a/lib/nmi_backtrace.c @@ -369811,6 +477840,19 @@ index 3aa454c54c0de..e22647f5181b3 100644 if (ret == MAX_NUMNODES) ret = __first_node(srcp); +diff --git a/lib/notifier-error-inject.c b/lib/notifier-error-inject.c +index 21016b32d3131..2b24ea6c94979 100644 +--- a/lib/notifier-error-inject.c ++++ b/lib/notifier-error-inject.c +@@ -15,7 +15,7 @@ static int debugfs_errno_get(void *data, u64 *val) + return 0; + } + +-DEFINE_SIMPLE_ATTRIBUTE(fops_errno, debugfs_errno_get, debugfs_errno_set, ++DEFINE_SIMPLE_ATTRIBUTE_SIGNED(fops_errno, debugfs_errno_get, debugfs_errno_set, + "%lld\n"); + + static struct dentry *debugfs_create_errno(const char *name, umode_t mode, diff --git a/lib/once.c b/lib/once.c index 59149bf3bfb4a..351f66aad310a 100644 --- a/lib/once.c @@ -370440,6 +478482,18 @@ index 830a18ecffc88..84f5dd3b0fc7b 100644 if (ret == test->result) { pr_cont("%lld PASS", duration); pass_cnt++; +diff --git a/lib/test_firmware.c b/lib/test_firmware.c +index 1bccd6cd5f482..e68be7aba7d16 100644 +--- a/lib/test_firmware.c ++++ b/lib/test_firmware.c +@@ -1111,6 +1111,7 @@ static int __init test_firmware_init(void) + + rc = misc_register(&test_fw_misc_device); + if (rc) { ++ __test_firmware_config_free(); + kfree(test_fw_config); + pr_err("could not register misc device: %d\n", rc); + return rc; diff --git a/lib/test_hmm.c b/lib/test_hmm.c index c259842f6d443..a89cb4281c9dc 100644 --- a/lib/test_hmm.c @@ -370618,6 +478672,121 @@ index e4f706a404b3a..3ca717f113977 100644 *total_failures += fail; return 1; } +diff --git a/lib/test_overflow.c b/lib/test_overflow.c +index 7a4b6f6c5473c..7a5a5738d2d21 100644 +--- a/lib/test_overflow.c ++++ b/lib/test_overflow.c +@@ -588,12 +588,110 @@ static int __init test_overflow_allocation(void) + return err; + } + ++struct __test_flex_array { ++ unsigned long flags; ++ size_t count; ++ unsigned long data[]; ++}; ++ ++static int __init test_overflow_size_helpers(void) ++{ ++ struct __test_flex_array *obj; ++ int count = 0; ++ int err = 0; ++ int var; ++ ++#define check_one_size_helper(expected, func, args...) ({ \ ++ bool __failure = false; \ ++ size_t _r; \ ++ \ ++ _r = func(args); \ ++ if (_r != (expected)) { \ ++ pr_warn("expected " #func "(" #args ") " \ ++ "to return %zu but got %zu instead\n", \ ++ (size_t)(expected), _r); \ ++ __failure = true; \ ++ } \ ++ count++; \ ++ __failure; \ ++}) ++ ++ var = 4; ++ err |= check_one_size_helper(20, size_mul, var++, 5); ++ err |= check_one_size_helper(20, size_mul, 4, var++); ++ err |= check_one_size_helper(0, size_mul, 0, 3); ++ err |= check_one_size_helper(0, size_mul, 3, 0); ++ err |= check_one_size_helper(6, size_mul, 2, 3); ++ err |= check_one_size_helper(SIZE_MAX, size_mul, SIZE_MAX, 1); ++ err |= check_one_size_helper(SIZE_MAX, size_mul, SIZE_MAX, 3); ++ err |= check_one_size_helper(SIZE_MAX, size_mul, SIZE_MAX, -3); ++ ++ var = 4; ++ err |= check_one_size_helper(9, size_add, var++, 5); ++ err |= check_one_size_helper(9, size_add, 4, var++); ++ err |= check_one_size_helper(9, size_add, 9, 0); ++ err |= check_one_size_helper(9, size_add, 0, 9); ++ err |= check_one_size_helper(5, size_add, 2, 3); ++ err |= check_one_size_helper(SIZE_MAX, size_add, SIZE_MAX, 1); ++ err |= check_one_size_helper(SIZE_MAX, size_add, SIZE_MAX, 3); ++ err |= check_one_size_helper(SIZE_MAX, size_add, SIZE_MAX, -3); ++ ++ var = 4; ++ err |= check_one_size_helper(1, size_sub, var--, 3); ++ err |= check_one_size_helper(1, size_sub, 4, var--); ++ err |= check_one_size_helper(1, size_sub, 3, 2); ++ err |= check_one_size_helper(9, size_sub, 9, 0); ++ err |= check_one_size_helper(SIZE_MAX, size_sub, 9, -3); ++ err |= check_one_size_helper(SIZE_MAX, size_sub, 0, 9); ++ err |= check_one_size_helper(SIZE_MAX, size_sub, 2, 3); ++ err |= check_one_size_helper(SIZE_MAX, size_sub, SIZE_MAX, 0); ++ err |= check_one_size_helper(SIZE_MAX, size_sub, SIZE_MAX, 10); ++ err |= check_one_size_helper(SIZE_MAX, size_sub, 0, SIZE_MAX); ++ err |= check_one_size_helper(SIZE_MAX, size_sub, 14, SIZE_MAX); ++ err |= check_one_size_helper(SIZE_MAX - 2, size_sub, SIZE_MAX - 1, 1); ++ err |= check_one_size_helper(SIZE_MAX - 4, size_sub, SIZE_MAX - 1, 3); ++ err |= check_one_size_helper(1, size_sub, SIZE_MAX - 1, -3); ++ ++ var = 4; ++ err |= check_one_size_helper(4 * sizeof(*obj->data), ++ flex_array_size, obj, data, var++); ++ err |= check_one_size_helper(5 * sizeof(*obj->data), ++ flex_array_size, obj, data, var++); ++ err |= check_one_size_helper(0, flex_array_size, obj, data, 0); ++ err |= check_one_size_helper(sizeof(*obj->data), ++ flex_array_size, obj, data, 1); ++ err |= check_one_size_helper(7 * sizeof(*obj->data), ++ flex_array_size, obj, data, 7); ++ err |= check_one_size_helper(SIZE_MAX, ++ flex_array_size, obj, data, -1); ++ err |= check_one_size_helper(SIZE_MAX, ++ flex_array_size, obj, data, SIZE_MAX - 4); ++ ++ var = 4; ++ err |= check_one_size_helper(sizeof(*obj) + (4 * sizeof(*obj->data)), ++ struct_size, obj, data, var++); ++ err |= check_one_size_helper(sizeof(*obj) + (5 * sizeof(*obj->data)), ++ struct_size, obj, data, var++); ++ err |= check_one_size_helper(sizeof(*obj), struct_size, obj, data, 0); ++ err |= check_one_size_helper(sizeof(*obj) + sizeof(*obj->data), ++ struct_size, obj, data, 1); ++ err |= check_one_size_helper(SIZE_MAX, ++ struct_size, obj, data, -3); ++ err |= check_one_size_helper(SIZE_MAX, ++ struct_size, obj, data, SIZE_MAX - 3); ++ ++ pr_info("%d overflow size helper tests finished\n", count); ++ ++ return err; ++} ++ + static int __init test_module_init(void) + { + int err = 0; + + err |= test_overflow_calculation(); + err |= test_overflow_shift(); ++ err |= test_overflow_size_helpers(); + err |= test_overflow_allocation(); + + if (err) { diff --git a/lib/test_ubsan.c b/lib/test_ubsan.c index 7e7bbd0f3fd27..2062be1f2e80f 100644 --- a/lib/test_ubsan.c @@ -370708,6 +478877,65 @@ index 8b1c318189ce8..e77d4856442c3 100644 } check_create_range_3(); +diff --git a/lib/ubsan.c b/lib/ubsan.c +index bdc380ff5d5c7..60c7099857a05 100644 +--- a/lib/ubsan.c ++++ b/lib/ubsan.c +@@ -154,16 +154,7 @@ static void ubsan_epilogue(void) + + current->in_ubsan--; + +- if (panic_on_warn) { +- /* +- * This thread may hit another WARN() in the panic path. +- * Resetting this prevents additional WARN() from panicking the +- * system on this thread. Other threads are blocked by the +- * panic_mutex in panic(). +- */ +- panic_on_warn = 0; +- panic("panic_on_warn set ...\n"); +- } ++ check_panic_on_warn("UBSAN"); + } + + void __ubsan_handle_divrem_overflow(void *_data, void *lhs, void *rhs) +diff --git a/lib/usercopy.c b/lib/usercopy.c +index 7413dd300516e..7ee63df042d7e 100644 +--- a/lib/usercopy.c ++++ b/lib/usercopy.c +@@ -3,6 +3,7 @@ + #include <linux/fault-inject-usercopy.h> + #include <linux/instrumented.h> + #include <linux/uaccess.h> ++#include <linux/nospec.h> + + /* out-of-line parts */ + +@@ -12,6 +13,12 @@ unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n + unsigned long res = n; + might_fault(); + if (!should_fail_usercopy() && likely(access_ok(from, n))) { ++ /* ++ * Ensure that bad access_ok() speculation will not ++ * lead to nasty side effects *after* the copy is ++ * finished: ++ */ ++ barrier_nospec(); + instrument_copy_from_user(to, from, n); + res = raw_copy_from_user(to, from, n); + } +diff --git a/lib/vdso/Makefile b/lib/vdso/Makefile +index c415a685d61bb..e814061d6aa01 100644 +--- a/lib/vdso/Makefile ++++ b/lib/vdso/Makefile +@@ -17,6 +17,6 @@ $(error ARCH_REL_TYPE_ABS is not set) + endif + + quiet_cmd_vdso_check = VDSOCHK $@ +- cmd_vdso_check = if $(OBJDUMP) -R $@ | egrep -h "$(ARCH_REL_TYPE_ABS)"; \ ++ cmd_vdso_check = if $(OBJDUMP) -R $@ | grep -E -h "$(ARCH_REL_TYPE_ABS)"; \ + then (echo >&2 "$@: dynamic relocations are not supported"; \ + rm -f $@; /bin/false); fi diff --git a/lib/vsprintf.c b/lib/vsprintf.c index d7ad44f2c8f57..a60f0bb2ea902 100644 --- a/lib/vsprintf.c @@ -371096,10 +479324,169 @@ index 995e15480937f..a972c3440c404 100644 page = pfn_to_page(pfn); break; diff --git a/mm/compaction.c b/mm/compaction.c -index bfc93da1c2c7c..48a2111ce437a 100644 +index bfc93da1c2c7c..89517ad5d6a0b 100644 --- a/mm/compaction.c +++ b/mm/compaction.c -@@ -1815,6 +1815,8 @@ static unsigned long fast_find_migrateblock(struct compact_control *cc) +@@ -779,7 +779,7 @@ static bool too_many_isolated(pg_data_t *pgdat) + * @cc: Compaction control structure. + * @low_pfn: The first PFN to isolate + * @end_pfn: The one-past-the-last PFN to isolate, within same pageblock +- * @isolate_mode: Isolation mode to be used. ++ * @mode: Isolation mode to be used. + * + * Isolate all pages that can be migrated from the range specified by + * [low_pfn, end_pfn). The range is expected to be within same pageblock. +@@ -792,7 +792,7 @@ static bool too_many_isolated(pg_data_t *pgdat) + */ + static int + isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, +- unsigned long end_pfn, isolate_mode_t isolate_mode) ++ unsigned long end_pfn, isolate_mode_t mode) + { + pg_data_t *pgdat = cc->zone->zone_pgdat; + unsigned long nr_scanned = 0, nr_isolated = 0; +@@ -800,6 +800,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, + unsigned long flags = 0; + struct lruvec *locked = NULL; + struct page *page = NULL, *valid_page = NULL; ++ struct address_space *mapping; + unsigned long start_pfn = low_pfn; + bool skip_on_failure = false; + unsigned long next_skip_pfn = 0; +@@ -984,40 +985,76 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, + locked = NULL; + } + +- if (!isolate_movable_page(page, isolate_mode)) ++ if (!isolate_movable_page(page, mode)) + goto isolate_success; + } + + goto isolate_fail; + } + ++ /* ++ * Be careful not to clear PageLRU until after we're ++ * sure the page is not being freed elsewhere -- the ++ * page release code relies on it. ++ */ ++ if (unlikely(!get_page_unless_zero(page))) ++ goto isolate_fail; ++ + /* + * Migration will fail if an anonymous page is pinned in memory, + * so avoid taking lru_lock and isolating it unnecessarily in an + * admittedly racy check. + */ +- if (!page_mapping(page) && +- page_count(page) > page_mapcount(page)) +- goto isolate_fail; ++ mapping = page_mapping(page); ++ if (!mapping && (page_count(page) - 1) > total_mapcount(page)) ++ goto isolate_fail_put; + + /* + * Only allow to migrate anonymous pages in GFP_NOFS context + * because those do not depend on fs locks. + */ +- if (!(cc->gfp_mask & __GFP_FS) && page_mapping(page)) +- goto isolate_fail; ++ if (!(cc->gfp_mask & __GFP_FS) && mapping) ++ goto isolate_fail_put; ++ ++ /* Only take pages on LRU: a check now makes later tests safe */ ++ if (!PageLRU(page)) ++ goto isolate_fail_put; ++ ++ /* Compaction might skip unevictable pages but CMA takes them */ ++ if (!(mode & ISOLATE_UNEVICTABLE) && PageUnevictable(page)) ++ goto isolate_fail_put; + + /* +- * Be careful not to clear PageLRU until after we're +- * sure the page is not being freed elsewhere -- the +- * page release code relies on it. ++ * To minimise LRU disruption, the caller can indicate with ++ * ISOLATE_ASYNC_MIGRATE that it only wants to isolate pages ++ * it will be able to migrate without blocking - clean pages ++ * for the most part. PageWriteback would require blocking. + */ +- if (unlikely(!get_page_unless_zero(page))) +- goto isolate_fail; +- +- if (!__isolate_lru_page_prepare(page, isolate_mode)) ++ if ((mode & ISOLATE_ASYNC_MIGRATE) && PageWriteback(page)) + goto isolate_fail_put; + ++ if ((mode & ISOLATE_ASYNC_MIGRATE) && PageDirty(page)) { ++ bool migrate_dirty; ++ ++ /* ++ * Only pages without mappings or that have a ++ * ->migratepage callback are possible to migrate ++ * without blocking. However, we can be racing with ++ * truncation so it's necessary to lock the page ++ * to stabilise the mapping as truncation holds ++ * the page lock until after the page is removed ++ * from the page cache. ++ */ ++ if (!trylock_page(page)) ++ goto isolate_fail_put; ++ ++ mapping = page_mapping(page); ++ migrate_dirty = !mapping || mapping->a_ops->migratepage; ++ unlock_page(page); ++ if (!migrate_dirty) ++ goto isolate_fail_put; ++ } ++ + /* Try isolate the page */ + if (!TestClearPageLRU(page)) + goto isolate_fail_put; +@@ -1313,7 +1350,7 @@ move_freelist_tail(struct list_head *freelist, struct page *freepage) + } + + static void +-fast_isolate_around(struct compact_control *cc, unsigned long pfn, unsigned long nr_isolated) ++fast_isolate_around(struct compact_control *cc, unsigned long pfn) + { + unsigned long start_pfn, end_pfn; + struct page *page; +@@ -1334,21 +1371,13 @@ fast_isolate_around(struct compact_control *cc, unsigned long pfn, unsigned long + if (!page) + return; + +- /* Scan before */ +- if (start_pfn != pfn) { +- isolate_freepages_block(cc, &start_pfn, pfn, &cc->freepages, 1, false); +- if (cc->nr_freepages >= cc->nr_migratepages) +- return; +- } +- +- /* Scan after */ +- start_pfn = pfn + nr_isolated; +- if (start_pfn < end_pfn) +- isolate_freepages_block(cc, &start_pfn, end_pfn, &cc->freepages, 1, false); ++ isolate_freepages_block(cc, &start_pfn, end_pfn, &cc->freepages, 1, false); + + /* Skip this pageblock in the future as it's full or nearly full */ + if (cc->nr_freepages < cc->nr_migratepages) + set_pageblock_skip(page); ++ ++ return; + } + + /* Search orders in round-robin fashion */ +@@ -1524,7 +1553,7 @@ fast_isolate_freepages(struct compact_control *cc) + return cc->free_pfn; + + low_pfn = page_to_pfn(page); +- fast_isolate_around(cc, low_pfn, nr_isolated); ++ fast_isolate_around(cc, low_pfn); + return low_pfn; + } + +@@ -1815,6 +1844,8 @@ static unsigned long fast_find_migrateblock(struct compact_control *cc) update_fast_start_pfn(cc, free_pfn); pfn = pageblock_start_pfn(free_pfn); @@ -371148,7 +479535,7 @@ index 30e9211f494a7..7a4912d6e65f2 100644 if (ctx->primitive.check_accesses) max_nr_accesses = ctx->primitive.check_accesses(ctx); diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c -index faee070977d80..e670fb6b11260 100644 +index faee070977d80..b039fd1f8a1db 100644 --- a/mm/damon/dbgfs.c +++ b/mm/damon/dbgfs.c @@ -32,7 +32,7 @@ static char *user_input_str(const char __user *buf, size_t count, loff_t *ppos) @@ -371214,16 +479601,27 @@ index faee070977d80..e670fb6b11260 100644 dbgfs_dirs[dbgfs_nr_ctxs] = new_dir; new_ctx = dbgfs_new_ctx(); -@@ -430,6 +443,7 @@ static int dbgfs_rm_context(char *name) +@@ -428,8 +441,10 @@ out: + static int dbgfs_rm_context(char *name) + { struct dentry *root, *dir, **new_dirs; ++ struct inode *inode; struct damon_ctx **new_ctxs; int i, j; + int ret = 0; if (damon_nr_running_ctxs()) return -EBUSY; -@@ -444,14 +458,16 @@ static int dbgfs_rm_context(char *name) +@@ -442,16 +457,24 @@ static int dbgfs_rm_context(char *name) + if (!dir) + return -ENOENT; ++ inode = d_inode(dir); ++ if (!S_ISDIR(inode->i_mode)) { ++ ret = -EINVAL; ++ goto out_dput; ++ } ++ new_dirs = kmalloc_array(dbgfs_nr_ctxs - 1, sizeof(*dbgfs_dirs), GFP_KERNEL); - if (!new_dirs) @@ -371243,7 +479641,7 @@ index faee070977d80..e670fb6b11260 100644 } for (i = 0, j = 0; i < dbgfs_nr_ctxs; i++) { -@@ -471,7 +487,13 @@ static int dbgfs_rm_context(char *name) +@@ -471,7 +494,13 @@ static int dbgfs_rm_context(char *name) dbgfs_ctxs = new_ctxs; dbgfs_nr_ctxs--; @@ -371258,7 +479656,7 @@ index faee070977d80..e670fb6b11260 100644 } static ssize_t dbgfs_rm_context_write(struct file *file, -@@ -538,12 +560,14 @@ static ssize_t dbgfs_monitor_on_write(struct file *file, +@@ -538,12 +567,14 @@ static ssize_t dbgfs_monitor_on_write(struct file *file, return -EINVAL; } @@ -371273,7 +479671,7 @@ index faee070977d80..e670fb6b11260 100644 if (err) ret = err; -@@ -596,15 +620,16 @@ static int __init __damon_dbgfs_init(void) +@@ -596,15 +627,16 @@ static int __init __damon_dbgfs_init(void) static int __init damon_dbgfs_init(void) { @@ -371293,7 +479691,7 @@ index faee070977d80..e670fb6b11260 100644 } dbgfs_nr_ctxs = 1; -@@ -615,6 +640,8 @@ static int __init damon_dbgfs_init(void) +@@ -615,6 +647,8 @@ static int __init damon_dbgfs_init(void) pr_err("%s: dbgfs init failed\n", __func__); } @@ -371344,7 +479742,7 @@ index 1403639302e48..718d0d3ad8c4e 100644 static void __init pte_savedwrite_tests(struct pgtable_debug_args *args) diff --git a/mm/filemap.c b/mm/filemap.c -index dae481293b5d9..dbc461703ff45 100644 +index dae481293b5d9..81e28722edfaf 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -90,7 +90,7 @@ @@ -371403,7 +479801,38 @@ index dae481293b5d9..dbc461703ff45 100644 continue; put_page: put_page(head); -@@ -3757,7 +3762,7 @@ again: +@@ -2533,18 +2538,19 @@ static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter, + struct page *page; + int err = 0; + ++ /* "last_index" is the index of the page beyond the end of the read */ + last_index = DIV_ROUND_UP(iocb->ki_pos + iter->count, PAGE_SIZE); + retry: + if (fatal_signal_pending(current)) + return -EINTR; + +- filemap_get_read_batch(mapping, index, last_index, pvec); ++ filemap_get_read_batch(mapping, index, last_index - 1, pvec); + if (!pagevec_count(pvec)) { + if (iocb->ki_flags & IOCB_NOIO) + return -EAGAIN; + page_cache_sync_readahead(mapping, ra, filp, index, + last_index - index); +- filemap_get_read_batch(mapping, index, last_index, pvec); ++ filemap_get_read_batch(mapping, index, last_index - 1, pvec); + } + if (!pagevec_count(pvec)) { + if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ)) +@@ -3744,7 +3750,7 @@ ssize_t generic_perform_write(struct file *file, + unsigned long offset; /* Offset into pagecache page */ + unsigned long bytes; /* Bytes to write to page */ + size_t copied; /* Bytes copied from user */ +- void *fsdata; ++ void *fsdata = NULL; + + offset = (pos & (PAGE_SIZE - 1)); + bytes = min_t(unsigned long, PAGE_SIZE - offset, +@@ -3757,7 +3763,7 @@ again: * same page as we're writing to, without it being marked * up-to-date. */ @@ -371413,7 +479842,7 @@ index dae481293b5d9..dbc461703ff45 100644 break; } diff --git a/mm/gup.c b/mm/gup.c -index 886d6148d3d03..1a23cd0b4fba1 100644 +index 886d6148d3d03..0a1839b325747 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -124,8 +124,8 @@ static inline struct page *try_get_compound_head(struct page *page, int refs) @@ -371475,7 +479904,35 @@ index 886d6148d3d03..1a23cd0b4fba1 100644 return -EFAULT; if (flags & FOLL_TOUCH) { -@@ -918,6 +943,8 @@ static int faultin_page(struct vm_area_struct *vma, +@@ -484,6 +509,18 @@ static struct page *follow_page_pte(struct vm_area_struct *vma, + if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) == + (FOLL_PIN | FOLL_GET))) + return ERR_PTR(-EINVAL); ++ ++ /* ++ * Considering PTE level hugetlb, like continuous-PTE hugetlb on ++ * ARM64 architecture. ++ */ ++ if (is_vm_hugetlb_page(vma)) { ++ page = follow_huge_pmd_pte(vma, address, flags); ++ if (page) ++ return page; ++ return no_page_table(vma, flags); ++ } ++ + retry: + if (unlikely(pmd_bad(*pmd))) + return no_page_table(vma, flags); +@@ -627,7 +664,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma, + if (pmd_none(pmdval)) + return no_page_table(vma, flags); + if (pmd_huge(pmdval) && is_vm_hugetlb_page(vma)) { +- page = follow_huge_pmd(mm, address, pmd, flags); ++ page = follow_huge_pmd_pte(vma, address, flags); + if (page) + return page; + return no_page_table(vma, flags); +@@ -918,6 +955,8 @@ static int faultin_page(struct vm_area_struct *vma, /* mlock all present pages, but do not fault in new pages */ if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK) return -ENOENT; @@ -371484,7 +479941,7 @@ index 886d6148d3d03..1a23cd0b4fba1 100644 if (*flags & FOLL_WRITE) fault_flags |= FAULT_FLAG_WRITE; if (*flags & FOLL_REMOTE) -@@ -1656,6 +1683,122 @@ finish_or_fault: +@@ -1656,6 +1695,122 @@ finish_or_fault: } #endif /* !CONFIG_MMU */ @@ -371607,7 +480064,16 @@ index 886d6148d3d03..1a23cd0b4fba1 100644 /** * get_dump_page() - pin user page in memory while writing it to core dump * @addr: user address -@@ -2123,8 +2266,28 @@ static void __maybe_unused undo_dev_pagemap(int *nr, int nr_start, +@@ -1722,7 +1877,7 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages, + */ + if (!is_pinnable_page(head)) { + if (PageHuge(head)) { +- if (!isolate_huge_page(head, &movable_page_list)) ++ if (isolate_hugetlb(head, &movable_page_list)) + isolation_error_count++; + } else { + if (!PageLRU(head) && drain_allow) { +@@ -2123,8 +2278,28 @@ static void __maybe_unused undo_dev_pagemap(int *nr, int nr_start, } #ifdef CONFIG_ARCH_HAS_PTE_SPECIAL @@ -371638,7 +480104,7 @@ index 886d6148d3d03..1a23cd0b4fba1 100644 { struct dev_pagemap *pgmap = NULL; int nr_start = *nr, ret = 0; -@@ -2169,7 +2332,8 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, +@@ -2169,7 +2344,8 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, goto pte_unmap; } @@ -371648,7 +480114,7 @@ index 886d6148d3d03..1a23cd0b4fba1 100644 put_compound_head(head, 1, flags); goto pte_unmap; } -@@ -2214,8 +2378,9 @@ pte_unmap: +@@ -2214,8 +2390,9 @@ pte_unmap: * get_user_pages_fast_only implementation that can pin pages. Thus it's still * useful to have gup_huge_pmd even if we can't operate on ptes. */ @@ -371660,7 +480126,7 @@ index 886d6148d3d03..1a23cd0b4fba1 100644 { return 0; } -@@ -2524,7 +2689,7 @@ static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned lo +@@ -2524,7 +2701,7 @@ static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned lo if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr, PMD_SHIFT, next, flags, pages, nr)) return 0; @@ -371669,7 +480135,16 @@ index 886d6148d3d03..1a23cd0b4fba1 100644 return 0; } while (pmdp++, addr = next, addr != end); -@@ -2708,7 +2873,7 @@ static int internal_get_user_pages_fast(unsigned long start, +@@ -2544,7 +2721,7 @@ static int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, unsigned lo + next = pud_addr_end(addr, end); + if (unlikely(!pud_present(pud))) + return 0; +- if (unlikely(pud_huge(pud))) { ++ if (unlikely(pud_huge(pud) || pud_devmap(pud))) { + if (!gup_huge_pud(pud, pudp, addr, next, flags, + pages, nr)) + return 0; +@@ -2708,7 +2885,7 @@ static int internal_get_user_pages_fast(unsigned long start, if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM | FOLL_FORCE | FOLL_PIN | FOLL_GET | @@ -371917,10 +480392,38 @@ index c5142d237e482..07941a1540cbb 100644 next: put_page(page); diff --git a/mm/hugetlb.c b/mm/hugetlb.c -index 95dc7b83381f9..f5f8929fad515 100644 +index 95dc7b83381f9..2f5c1b2456ef2 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c -@@ -2813,11 +2813,11 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, +@@ -82,6 +82,8 @@ struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp; + + /* Forward declaration */ + static int hugetlb_acct_memory(struct hstate *h, long delta); ++static void hugetlb_unshare_pmds(struct vm_area_struct *vma, ++ unsigned long start, unsigned long end); + + static inline bool subpool_is_free(struct hugepage_subpool *spool) + { +@@ -2654,8 +2656,7 @@ retry: + * Fail with -EBUSY if not possible. + */ + spin_unlock_irq(&hugetlb_lock); +- if (!isolate_huge_page(old_page, list)) +- ret = -EBUSY; ++ ret = isolate_hugetlb(old_page, list); + spin_lock_irq(&hugetlb_lock); + goto free_new; + } else if (!HPageFreed(old_page)) { +@@ -2731,7 +2732,7 @@ int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list) + if (hstate_is_gigantic(h)) + return -ENOMEM; + +- if (page_count(head) && isolate_huge_page(head, list)) ++ if (page_count(head) && !isolate_hugetlb(head, list)) + ret = 0; + else if (!page_count(head)) + ret = alloc_and_dissolve_huge_page(h, head, list); +@@ -2813,11 +2814,11 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, page = alloc_buddy_huge_page_with_mpol(h, vma, addr); if (!page) goto out_uncharge_cgroup; @@ -371933,7 +480436,33 @@ index 95dc7b83381f9..f5f8929fad515 100644 list_add(&page->lru, &h->hugepage_activelist); /* Fall through */ } -@@ -4439,6 +4439,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, +@@ -4164,6 +4165,25 @@ static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr) + { + if (addr & ~(huge_page_mask(hstate_vma(vma)))) + return -EINVAL; ++ ++ /* ++ * PMD sharing is only possible for PUD_SIZE-aligned address ranges ++ * in HugeTLB VMAs. If we will lose PUD_SIZE alignment due to this ++ * split, unshare PMDs in the PUD_SIZE interval surrounding addr now. ++ */ ++ if (addr & ~PUD_MASK) { ++ /* ++ * hugetlb_vm_op_split is called right before we attempt to ++ * split the VMA. We will need to unshare PMDs in the old and ++ * new VMAs, so let's unshare before we split. ++ */ ++ unsigned long floor = addr & PUD_MASK; ++ unsigned long ceil = floor + PUD_SIZE; ++ ++ if (floor >= vma->vm_start && ceil <= vma->vm_end) ++ hugetlb_unshare_pmds(vma, floor, ceil); ++ } ++ + return 0; + } + +@@ -4439,6 +4459,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, struct hstate *h = hstate_vma(vma); unsigned long sz = huge_page_size(h); struct mmu_notifier_range range; @@ -371941,7 +480470,7 @@ index 95dc7b83381f9..f5f8929fad515 100644 WARN_ON(!is_vm_hugetlb_page(vma)); BUG_ON(start & ~huge_page_mask(h)); -@@ -4467,10 +4468,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, +@@ -4467,10 +4488,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, ptl = huge_pte_lock(h, mm, ptep); if (huge_pmd_unshare(mm, vma, &address, ptep)) { spin_unlock(ptl); @@ -371954,7 +480483,7 @@ index 95dc7b83381f9..f5f8929fad515 100644 continue; } -@@ -4527,6 +4526,22 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, +@@ -4527,6 +4546,22 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, } mmu_notifier_invalidate_range_end(&range); tlb_end_vma(tlb, vma); @@ -371977,7 +480506,7 @@ index 95dc7b83381f9..f5f8929fad515 100644 } void __unmap_hugepage_range_final(struct mmu_gather *tlb, -@@ -4829,7 +4844,6 @@ static inline vm_fault_t hugetlb_handle_userfault(struct vm_area_struct *vma, +@@ -4829,7 +4864,6 @@ static inline vm_fault_t hugetlb_handle_userfault(struct vm_area_struct *vma, unsigned long haddr, unsigned long reason) { @@ -371985,7 +480514,7 @@ index 95dc7b83381f9..f5f8929fad515 100644 u32 hash; struct vm_fault vmf = { .vma = vma, -@@ -4846,18 +4860,14 @@ static inline vm_fault_t hugetlb_handle_userfault(struct vm_area_struct *vma, +@@ -4846,18 +4880,14 @@ static inline vm_fault_t hugetlb_handle_userfault(struct vm_area_struct *vma, }; /* @@ -372008,7 +480537,7 @@ index 95dc7b83381f9..f5f8929fad515 100644 } static vm_fault_t hugetlb_no_page(struct mm_struct *mm, -@@ -4874,6 +4884,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, +@@ -4874,6 +4904,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, spinlock_t *ptl; unsigned long haddr = address & huge_page_mask(h); bool new_page, new_pagecache_page = false; @@ -372016,7 +480545,7 @@ index 95dc7b83381f9..f5f8929fad515 100644 /* * Currently, we are forced to kill the process in the event the -@@ -4883,7 +4894,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, +@@ -4883,7 +4914,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, if (is_vma_resv_set(vma, HPAGE_RESV_UNMAPPED)) { pr_warn_ratelimited("PID %d killed due to inadequate hugepage pool\n", current->pid); @@ -372025,7 +480554,7 @@ index 95dc7b83381f9..f5f8929fad515 100644 } /* -@@ -4900,12 +4911,10 @@ retry: +@@ -4900,12 +4931,10 @@ retry: page = find_lock_page(mapping, idx); if (!page) { /* Check for page in userfault range */ @@ -372040,7 +480569,7 @@ index 95dc7b83381f9..f5f8929fad515 100644 page = alloc_huge_page(vma, haddr, 0); if (IS_ERR(page)) { -@@ -4965,10 +4974,9 @@ retry: +@@ -4965,10 +4994,9 @@ retry: if (userfaultfd_minor(vma)) { unlock_page(page); put_page(page); @@ -372052,7 +480581,7 @@ index 95dc7b83381f9..f5f8929fad515 100644 } } -@@ -5019,6 +5027,8 @@ retry: +@@ -5019,6 +5047,8 @@ retry: unlock_page(page); out: @@ -372061,7 +480590,7 @@ index 95dc7b83381f9..f5f8929fad515 100644 return ret; backout: -@@ -5116,10 +5126,12 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, +@@ -5116,10 +5146,12 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, mutex_lock(&hugetlb_fault_mutex_table[hash]); entry = huge_ptep_get(ptep); @@ -372078,7 +480607,7 @@ index 95dc7b83381f9..f5f8929fad515 100644 ret = 0; -@@ -5236,13 +5248,14 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, +@@ -5236,13 +5268,14 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, int ret = -ENOMEM; struct page *page; int writable; @@ -372094,7 +480623,7 @@ index 95dc7b83381f9..f5f8929fad515 100644 } else if (!*pagep) { /* If a page already exists, then it's UFFDIO_COPY for * a non-missing case. Return -EEXIST. -@@ -5298,6 +5311,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, +@@ -5298,6 +5331,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, page = alloc_huge_page(dst_vma, dst_addr, 0); if (IS_ERR(page)) { @@ -372102,7 +480631,7 @@ index 95dc7b83381f9..f5f8929fad515 100644 ret = -ENOMEM; *pagep = NULL; goto out; -@@ -5330,7 +5344,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, +@@ -5330,12 +5364,16 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, ret = huge_add_to_page_cache(page, mapping, idx); if (ret) goto out_release_nounlock; @@ -372111,7 +480640,16 @@ index 95dc7b83381f9..f5f8929fad515 100644 } ptl = huge_pte_lockptr(h, dst_mm, dst_pte); -@@ -5354,7 +5368,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, + spin_lock(ptl); + ++ ret = -EIO; ++ if (PageHWPoison(page)) ++ goto out_release_unlock; ++ + /* + * Recheck the i_size after holding PT lock to make sure not + * to leave any page mapped (as page_mapped()) beyond the end +@@ -5354,7 +5392,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, if (!huge_pte_none(huge_ptep_get(dst_pte))) goto out_release_unlock; @@ -372120,7 +480658,7 @@ index 95dc7b83381f9..f5f8929fad515 100644 page_dup_rmap(page, true); } else { ClearHPageRestoreReserve(page); -@@ -5394,7 +5408,7 @@ out_release_unlock: +@@ -5394,7 +5432,7 @@ out_release_unlock: if (vm_shared || is_continue) unlock_page(page); out_release_nounlock: @@ -372129,7 +480667,7 @@ index 95dc7b83381f9..f5f8929fad515 100644 restore_reserve_on_error(h, dst_vma, dst_addr, page); put_page(page); goto out; -@@ -6044,7 +6058,14 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, +@@ -6044,7 +6082,14 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, pud_clear(pud); put_page(virt_to_page(ptep)); mm_dec_nr_pmds(mm); @@ -372145,7 +480683,78 @@ index 95dc7b83381f9..f5f8929fad515 100644 return 1; } -@@ -6267,6 +6288,16 @@ int get_hwpoison_huge_page(struct page *page, bool *hugetlb) +@@ -6161,12 +6206,13 @@ follow_huge_pd(struct vm_area_struct *vma, + } + + struct page * __weak +-follow_huge_pmd(struct mm_struct *mm, unsigned long address, +- pmd_t *pmd, int flags) ++follow_huge_pmd_pte(struct vm_area_struct *vma, unsigned long address, int flags) + { ++ struct hstate *h = hstate_vma(vma); ++ struct mm_struct *mm = vma->vm_mm; + struct page *page = NULL; + spinlock_t *ptl; +- pte_t pte; ++ pte_t *ptep, pte; + + /* FOLL_GET and FOLL_PIN are mutually exclusive. */ + if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) == +@@ -6174,17 +6220,15 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, + return NULL; + + retry: +- ptl = pmd_lockptr(mm, pmd); +- spin_lock(ptl); +- /* +- * make sure that the address range covered by this pmd is not +- * unmapped from other threads. +- */ +- if (!pmd_huge(*pmd)) +- goto out; +- pte = huge_ptep_get((pte_t *)pmd); ++ ptep = huge_pte_offset(mm, address, huge_page_size(h)); ++ if (!ptep) ++ return NULL; ++ ++ ptl = huge_pte_lock(h, mm, ptep); ++ pte = huge_ptep_get(ptep); + if (pte_present(pte)) { +- page = pmd_page(*pmd) + ((address & ~PMD_MASK) >> PAGE_SHIFT); ++ page = pte_page(pte) + ++ ((address & ~huge_page_mask(h)) >> PAGE_SHIFT); + /* + * try_grab_page() should always succeed here, because: a) we + * hold the pmd (ptl) lock, and b) we've just checked that the +@@ -6200,7 +6244,7 @@ retry: + } else { + if (is_hugetlb_entry_migration(pte)) { + spin_unlock(ptl); +- __migration_entry_wait(mm, (pte_t *)pmd, ptl); ++ __migration_entry_wait(mm, ptep, ptl); + goto retry; + } + /* +@@ -6232,15 +6276,15 @@ follow_huge_pgd(struct mm_struct *mm, unsigned long address, pgd_t *pgd, int fla + return pte_page(*(pte_t *)pgd) + ((address & ~PGDIR_MASK) >> PAGE_SHIFT); + } + +-bool isolate_huge_page(struct page *page, struct list_head *list) ++int isolate_hugetlb(struct page *page, struct list_head *list) + { +- bool ret = true; ++ int ret = 0; + + spin_lock_irq(&hugetlb_lock); + if (!PageHeadHuge(page) || + !HPageMigratable(page) || + !get_page_unless_zero(page)) { +- ret = false; ++ ret = -EBUSY; + goto unlock; + } + ClearHPageMigratable(page); +@@ -6267,6 +6311,16 @@ int get_hwpoison_huge_page(struct page *page, bool *hugetlb) return ret; } @@ -372162,6 +480771,54 @@ index 95dc7b83381f9..f5f8929fad515 100644 void putback_active_hugepage(struct page *page) { spin_lock_irq(&hugetlb_lock); +@@ -6315,26 +6369,21 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason) + } + } + +-/* +- * This function will unconditionally remove all the shared pmd pgtable entries +- * within the specific vma for a hugetlbfs memory range. +- */ +-void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) ++static void hugetlb_unshare_pmds(struct vm_area_struct *vma, ++ unsigned long start, ++ unsigned long end) + { + struct hstate *h = hstate_vma(vma); + unsigned long sz = huge_page_size(h); + struct mm_struct *mm = vma->vm_mm; + struct mmu_notifier_range range; +- unsigned long address, start, end; ++ unsigned long address; + spinlock_t *ptl; + pte_t *ptep; + + if (!(vma->vm_flags & VM_MAYSHARE)) + return; + +- start = ALIGN(vma->vm_start, PUD_SIZE); +- end = ALIGN_DOWN(vma->vm_end, PUD_SIZE); +- + if (start >= end) + return; + +@@ -6366,6 +6415,16 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) + mmu_notifier_invalidate_range_end(&range); + } + ++/* ++ * This function will unconditionally remove all the shared pmd pgtable entries ++ * within the specific vma for a hugetlbfs memory range. ++ */ ++void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) ++{ ++ hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE), ++ ALIGN_DOWN(vma->vm_end, PUD_SIZE)); ++} ++ + #ifdef CONFIG_CMA + static bool cma_reserve_called __initdata; + diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c index aff4d27ec2352..a1d6fc3c78b9c 100644 --- a/mm/hwpoison-inject.c @@ -372217,6 +480874,29 @@ index d8ccff4c1275e..1bd6a3f13467b 100644 qlist_move_cache(q, &to_free, cache); qlist_free_all(&to_free, cache); } +diff --git a/mm/kasan/report.c b/mm/kasan/report.c +index 884a950c70265..887af873733bc 100644 +--- a/mm/kasan/report.c ++++ b/mm/kasan/report.c +@@ -117,16 +117,8 @@ static void end_report(unsigned long *flags, unsigned long addr) + pr_err("==================================================================\n"); + add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); + spin_unlock_irqrestore(&report_lock, *flags); +- if (panic_on_warn && !test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags)) { +- /* +- * This thread may hit another WARN() in the panic path. +- * Resetting this prevents additional WARN() from panicking the +- * system on this thread. Other threads are blocked by the +- * panic_mutex in panic(). +- */ +- panic_on_warn = 0; +- panic("panic_on_warn set ...\n"); +- } ++ if (!test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags)) ++ check_panic_on_warn("KASAN"); + if (kasan_arg_fault == KASAN_ARG_FAULT_PANIC) + panic("kasan.fault=panic set ...\n"); + kasan_enable_current(); diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c index 8d95ee52d0194..dd79840e60964 100644 --- a/mm/kasan/shadow.c @@ -372701,10 +481381,17 @@ index c1f23c61e5f91..600f2e2431d6d 100644 enum kfence_error_type { KFENCE_ERROR_OOB, /* Detected a out-of-bounds access. */ diff --git a/mm/kfence/report.c b/mm/kfence/report.c -index f93a7b2a338be..37e140e7f201e 100644 +index f93a7b2a338be..cbd9456359b96 100644 --- a/mm/kfence/report.c +++ b/mm/kfence/report.c -@@ -273,3 +273,50 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r +@@ -267,9 +267,55 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r + + lockdep_on(); + +- if (panic_on_warn) +- panic("panic_on_warn set ...\n"); ++ check_panic_on_warn("KFENCE"); + /* We encountered a memory safety error, taint the kernel! */ add_taint(TAINT_BAD_PAGE, LOCKDEP_STILL_OK); } @@ -372756,10 +481443,10 @@ index f93a7b2a338be..37e140e7f201e 100644 +} +#endif diff --git a/mm/khugepaged.c b/mm/khugepaged.c -index 8a8b3aa929370..dd069afd9cb9c 100644 +index 8a8b3aa929370..3afcb1466ec51 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c -@@ -1146,10 +1146,12 @@ static void collapse_huge_page(struct mm_struct *mm, +@@ -1146,14 +1146,17 @@ static void collapse_huge_page(struct mm_struct *mm, pmd_ptl = pmd_lock(mm, pmd); /* probably unnecessary */ /* @@ -372776,6 +481463,105 @@ index 8a8b3aa929370..dd069afd9cb9c 100644 */ _pmd = pmdp_collapse_flush(vma, address, pmd); spin_unlock(pmd_ptl); + mmu_notifier_invalidate_range_end(&range); ++ tlb_remove_table_sync_one(); + + spin_lock(pte_ptl); + isolated = __collapse_huge_page_isolate(vma, address, pte, +@@ -1440,6 +1443,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) + spinlock_t *ptl; + int count = 0; + int i; ++ struct mmu_notifier_range range; + + if (!vma || !vma->vm_file || + !range_in_vma(vma, haddr, haddr + HPAGE_PMD_SIZE)) +@@ -1466,6 +1470,19 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) + if (!pmd) + goto drop_hpage; + ++ /* ++ * We need to lock the mapping so that from here on, only GUP-fast and ++ * hardware page walks can access the parts of the page tables that ++ * we're operating on. ++ */ ++ i_mmap_lock_write(vma->vm_file->f_mapping); ++ ++ /* ++ * This spinlock should be unnecessary: Nobody else should be accessing ++ * the page tables under spinlock protection here, only ++ * lockless_pages_from_mm() and the hardware page walker can access page ++ * tables while all the high-level locks are held in write mode. ++ */ + start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl); + + /* step 1: check all mapped PTEs are to the right huge page */ +@@ -1512,12 +1529,23 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) + } + + /* step 4: collapse pmd */ +- ptl = pmd_lock(vma->vm_mm, pmd); ++ /* we make no change to anon, but protect concurrent anon page lookup */ ++ if (vma->anon_vma) ++ anon_vma_lock_write(vma->anon_vma); ++ ++ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm, haddr, ++ haddr + HPAGE_PMD_SIZE); ++ mmu_notifier_invalidate_range_start(&range); + _pmd = pmdp_collapse_flush(vma, haddr, pmd); +- spin_unlock(ptl); + mm_dec_nr_ptes(mm); ++ tlb_remove_table_sync_one(); ++ mmu_notifier_invalidate_range_end(&range); + pte_free(mm, pmd_pgtable(_pmd)); + ++ if (vma->anon_vma) ++ anon_vma_unlock_write(vma->anon_vma); ++ i_mmap_unlock_write(vma->vm_file->f_mapping); ++ + drop_hpage: + unlock_page(hpage); + put_page(hpage); +@@ -1525,6 +1553,7 @@ drop_hpage: + + abort: + pte_unmap_unlock(start_pte, ptl); ++ i_mmap_unlock_write(vma->vm_file->f_mapping); + goto drop_hpage; + } + +@@ -1573,7 +1602,8 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) + * An alternative would be drop the check, but check that page + * table is clear before calling pmdp_collapse_flush() under + * ptl. It has higher chance to recover THP for the VMA, but +- * has higher cost too. ++ * has higher cost too. It would also probably require locking ++ * the anon_vma. + */ + if (vma->anon_vma) + continue; +@@ -1595,12 +1625,19 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) + */ + if (mmap_write_trylock(mm)) { + if (!khugepaged_test_exit(mm)) { +- spinlock_t *ptl = pmd_lock(mm, pmd); ++ struct mmu_notifier_range range; ++ ++ mmu_notifier_range_init(&range, ++ MMU_NOTIFY_CLEAR, 0, ++ NULL, mm, addr, ++ addr + HPAGE_PMD_SIZE); ++ mmu_notifier_invalidate_range_start(&range); + /* assume page table is clear */ + _pmd = pmdp_collapse_flush(vma, addr, pmd); +- spin_unlock(ptl); + mm_dec_nr_ptes(mm); ++ tlb_remove_table_sync_one(); + pte_free(mm, pmd_pgtable(_pmd)); ++ mmu_notifier_invalidate_range_end(&range); + } + mmap_write_unlock(mm); + } else { diff --git a/mm/kmemleak.c b/mm/kmemleak.c index b57383c17cf60..b78861b8e0139 100644 --- a/mm/kmemleak.c @@ -372844,6 +481630,19 @@ index b57383c17cf60..b78861b8e0139 100644 continue; /* only scan if page is in use */ if (page_count(page) == 0) +diff --git a/mm/maccess.c b/mm/maccess.c +index d3f1a1f0b1c1a..ded4bfaba7f37 100644 +--- a/mm/maccess.c ++++ b/mm/maccess.c +@@ -99,7 +99,7 @@ long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count) + return src - unsafe_addr; + Efault: + pagefault_enable(); +- dst[-1] = '\0'; ++ dst[0] = '\0'; + return -EFAULT; + } + #else /* HAVE_GET_KERNEL_NOFAULT */ diff --git a/mm/madvise.c b/mm/madvise.c index 0734db8d53a7a..6c099f8bb8e69 100644 --- a/mm/madvise.c @@ -372909,7 +481708,7 @@ index 5096500b26473..2b7397781c99a 100644 memblock_memory = NULL; diff --git a/mm/memcontrol.c b/mm/memcontrol.c -index 6da5020a8656d..971546bb99e04 100644 +index 6da5020a8656d..3d3364cd4ff19 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -103,11 +103,6 @@ static bool do_memsw_account(void) @@ -373187,7 +481986,50 @@ index 6da5020a8656d..971546bb99e04 100644 *pdirty = memcg_page_state(memcg, NR_FILE_DIRTY); *pwriteback = memcg_page_state(memcg, NR_WRITEBACK); -@@ -5341,21 +5394,6 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css) +@@ -4736,6 +4789,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, + unsigned int efd, cfd; + struct fd efile; + struct fd cfile; ++ struct dentry *cdentry; + const char *name; + char *endp; + int ret; +@@ -4786,6 +4840,16 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, + if (ret < 0) + goto out_put_cfile; + ++ /* ++ * The control file must be a regular cgroup1 file. As a regular cgroup ++ * file can't be renamed, it's safe to access its name afterwards. ++ */ ++ cdentry = cfile.file->f_path.dentry; ++ if (cdentry->d_sb->s_type != &cgroup_fs_type || !d_is_reg(cdentry)) { ++ ret = -EINVAL; ++ goto out_put_cfile; ++ } ++ + /* + * Determine the event callbacks and set them in @event. This used + * to be done via struct cftype but cgroup core no longer knows +@@ -4794,7 +4858,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, + * + * DO NOT ADD NEW FILES. + */ +- name = cfile.file->f_path.dentry->d_name.name; ++ name = cdentry->d_name.name; + + if (!strcmp(name, "memory.usage_in_bytes")) { + event->register_event = mem_cgroup_usage_register_event; +@@ -4818,7 +4882,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, + * automatically removed on cgroup destruction but the removal is + * asynchronous, so take an extra ref on @css. + */ +- cfile_css = css_tryget_online_from_dir(cfile.file->f_path.dentry->d_parent, ++ cfile_css = css_tryget_online_from_dir(cdentry->d_parent, + &memory_cgrp_subsys); + ret = -EINVAL; + if (IS_ERR(cfile_css)) +@@ -5341,21 +5405,6 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css) memcg_wb_domain_size_changed(memcg); } @@ -373209,7 +482051,7 @@ index 6da5020a8656d..971546bb99e04 100644 static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); -@@ -6373,7 +6411,7 @@ static int memory_numa_stat_show(struct seq_file *m, void *v) +@@ -6373,7 +6422,7 @@ static int memory_numa_stat_show(struct seq_file *m, void *v) int i; struct mem_cgroup *memcg = mem_cgroup_from_seq(m); @@ -373218,7 +482060,7 @@ index 6da5020a8656d..971546bb99e04 100644 for (i = 0; i < ARRAY_SIZE(memory_stats); i++) { int nid; -@@ -7077,7 +7115,7 @@ static int __init cgroup_memory(char *s) +@@ -7077,7 +7126,7 @@ static int __init cgroup_memory(char *s) if (!strcmp(token, "nokmem")) cgroup_memory_nokmem = true; } @@ -373309,10 +482151,18 @@ index 081dd33e6a61b..475d095dd7f53 100644 xas_pause(&xas); xas_unlock_irq(&xas); diff --git a/mm/memory-failure.c b/mm/memory-failure.c -index bdbbb32211a58..31db222b6deb8 100644 +index bdbbb32211a58..9f9dd968fbe3c 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c -@@ -700,13 +700,18 @@ static int kill_accessing_process(struct task_struct *p, unsigned long pfn, +@@ -57,6 +57,7 @@ + #include <linux/ratelimit.h> + #include <linux/page-isolation.h> + #include <linux/pagewalk.h> ++#include <linux/shmem_fs.h> + #include "internal.h" + #include "ras/ras_event.h" + +@@ -700,13 +701,18 @@ static int kill_accessing_process(struct task_struct *p, unsigned long pfn, }; priv.tk.tsk = p; @@ -373332,7 +482182,235 @@ index bdbbb32211a58..31db222b6deb8 100644 } static const char *action_name[] = { -@@ -1215,7 +1220,7 @@ try_again: +@@ -806,12 +812,44 @@ static int truncate_error_page(struct page *p, unsigned long pfn, + return ret; + } + ++struct page_state { ++ unsigned long mask; ++ unsigned long res; ++ enum mf_action_page_type type; ++ ++ /* Callback ->action() has to unlock the relevant page inside it. */ ++ int (*action)(struct page_state *ps, struct page *p); ++}; ++ ++/* ++ * Return true if page is still referenced by others, otherwise return ++ * false. ++ * ++ * The extra_pins is true when one extra refcount is expected. ++ */ ++static bool has_extra_refcount(struct page_state *ps, struct page *p, ++ bool extra_pins) ++{ ++ int count = page_count(p) - 1; ++ ++ if (extra_pins) ++ count -= 1; ++ ++ if (count > 0) { ++ pr_err("Memory failure: %#lx: %s still referenced by %d users\n", ++ page_to_pfn(p), action_page_types[ps->type], count); ++ return true; ++ } ++ ++ return false; ++} ++ + /* + * Error hit kernel page. + * Do nothing, try to be lucky and not touch this instead. For a few cases we + * could be more sophisticated. + */ +-static int me_kernel(struct page *p, unsigned long pfn) ++static int me_kernel(struct page_state *ps, struct page *p) + { + unlock_page(p); + return MF_IGNORED; +@@ -820,9 +858,9 @@ static int me_kernel(struct page *p, unsigned long pfn) + /* + * Page in unknown state. Do nothing. + */ +-static int me_unknown(struct page *p, unsigned long pfn) ++static int me_unknown(struct page_state *ps, struct page *p) + { +- pr_err("Memory failure: %#lx: Unknown page state\n", pfn); ++ pr_err("Memory failure: %#lx: Unknown page state\n", page_to_pfn(p)); + unlock_page(p); + return MF_FAILED; + } +@@ -830,10 +868,11 @@ static int me_unknown(struct page *p, unsigned long pfn) + /* + * Clean (or cleaned) page cache page. + */ +-static int me_pagecache_clean(struct page *p, unsigned long pfn) ++static int me_pagecache_clean(struct page_state *ps, struct page *p) + { + int ret; + struct address_space *mapping; ++ bool extra_pins; + + delete_from_lru_cache(p); + +@@ -862,14 +901,24 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn) + goto out; + } + ++ /* ++ * The shmem page is kept in page cache instead of truncating ++ * so is expected to have an extra refcount after error-handling. ++ */ ++ extra_pins = shmem_mapping(mapping); ++ + /* + * Truncation is a bit tricky. Enable it per file system for now. + * + * Open: to take i_rwsem or not for this? Right now we don't. + */ +- ret = truncate_error_page(p, pfn, mapping); ++ ret = truncate_error_page(p, page_to_pfn(p), mapping); ++ if (has_extra_refcount(ps, p, extra_pins)) ++ ret = MF_FAILED; ++ + out: + unlock_page(p); ++ + return ret; + } + +@@ -878,7 +927,7 @@ out: + * Issues: when the error hit a hole page the error is not properly + * propagated. + */ +-static int me_pagecache_dirty(struct page *p, unsigned long pfn) ++static int me_pagecache_dirty(struct page_state *ps, struct page *p) + { + struct address_space *mapping = page_mapping(p); + +@@ -922,7 +971,7 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn) + mapping_set_error(mapping, -EIO); + } + +- return me_pagecache_clean(p, pfn); ++ return me_pagecache_clean(ps, p); + } + + /* +@@ -944,9 +993,10 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn) + * Clean swap cache pages can be directly isolated. A later page fault will + * bring in the known good data from disk. + */ +-static int me_swapcache_dirty(struct page *p, unsigned long pfn) ++static int me_swapcache_dirty(struct page_state *ps, struct page *p) + { + int ret; ++ bool extra_pins = false; + + ClearPageDirty(p); + /* Trigger EIO in shmem: */ +@@ -954,10 +1004,17 @@ static int me_swapcache_dirty(struct page *p, unsigned long pfn) + + ret = delete_from_lru_cache(p) ? MF_FAILED : MF_DELAYED; + unlock_page(p); ++ ++ if (ret == MF_DELAYED) ++ extra_pins = true; ++ ++ if (has_extra_refcount(ps, p, extra_pins)) ++ ret = MF_FAILED; ++ + return ret; + } + +-static int me_swapcache_clean(struct page *p, unsigned long pfn) ++static int me_swapcache_clean(struct page_state *ps, struct page *p) + { + int ret; + +@@ -965,6 +1022,10 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn) + + ret = delete_from_lru_cache(p) ? MF_FAILED : MF_RECOVERED; + unlock_page(p); ++ ++ if (has_extra_refcount(ps, p, false)) ++ ret = MF_FAILED; ++ + return ret; + } + +@@ -974,18 +1035,21 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn) + * - Error on hugepage is contained in hugepage unit (not in raw page unit.) + * To narrow down kill region to one page, we need to break up pmd. + */ +-static int me_huge_page(struct page *p, unsigned long pfn) ++static int me_huge_page(struct page_state *ps, struct page *p) + { + int res; + struct page *hpage = compound_head(p); + struct address_space *mapping; ++ bool extra_pins = false; + + if (!PageHuge(hpage)) + return MF_DELAYED; + + mapping = page_mapping(hpage); + if (mapping) { +- res = truncate_error_page(hpage, pfn, mapping); ++ res = truncate_error_page(hpage, page_to_pfn(p), mapping); ++ /* The page is kept in page cache. */ ++ extra_pins = true; + unlock_page(hpage); + } else { + res = MF_FAILED; +@@ -1003,6 +1067,9 @@ static int me_huge_page(struct page *p, unsigned long pfn) + } + } + ++ if (has_extra_refcount(ps, p, extra_pins)) ++ res = MF_FAILED; ++ + return res; + } + +@@ -1028,14 +1095,7 @@ static int me_huge_page(struct page *p, unsigned long pfn) + #define slab (1UL << PG_slab) + #define reserved (1UL << PG_reserved) + +-static struct page_state { +- unsigned long mask; +- unsigned long res; +- enum mf_action_page_type type; +- +- /* Callback ->action() has to unlock the relevant page inside it. */ +- int (*action)(struct page *p, unsigned long pfn); +-} error_states[] = { ++static struct page_state error_states[] = { + { reserved, reserved, MF_MSG_KERNEL, me_kernel }, + /* + * free pages are specially detected outside this table: +@@ -1095,19 +1155,10 @@ static int page_action(struct page_state *ps, struct page *p, + unsigned long pfn) + { + int result; +- int count; + + /* page p should be unlocked after returning from ps->action(). */ +- result = ps->action(p, pfn); ++ result = ps->action(ps, p); + +- count = page_count(p) - 1; +- if (ps->action == me_swapcache_dirty && result == MF_DELAYED) +- count--; +- if (count > 0) { +- pr_err("Memory failure: %#lx: %s still referenced by %d users\n", +- pfn, action_page_types[ps->type], count); +- result = MF_FAILED; +- } + action_result(pfn, ps->type, result); + + /* Could do more checks here if page looks ok */ +@@ -1215,7 +1266,7 @@ try_again: } out: if (ret == -EIO) @@ -373341,7 +482419,24 @@ index bdbbb32211a58..31db222b6deb8 100644 return ret; } -@@ -1416,64 +1421,115 @@ static int try_to_split_thp_page(struct page *page, const char *msg) +@@ -1400,14 +1451,11 @@ static int identify_page_state(unsigned long pfn, struct page *p, + static int try_to_split_thp_page(struct page *page, const char *msg) + { + lock_page(page); +- if (!PageAnon(page) || unlikely(split_huge_page(page))) { ++ if (unlikely(split_huge_page(page))) { + unsigned long pfn = page_to_pfn(page); + + unlock_page(page); +- if (!PageAnon(page)) +- pr_info("%s: %#lx: non anonymous thp\n", msg, pfn); +- else +- pr_info("%s: %#lx: thp split failed\n", msg, pfn); ++ pr_info("%s: %#lx: thp split failed\n", msg, pfn); + put_page(page); + return -EBUSY; + } +@@ -1416,64 +1464,115 @@ static int try_to_split_thp_page(struct page *page, const char *msg) return 0; } @@ -373500,7 +482595,7 @@ index bdbbb32211a58..31db222b6deb8 100644 /* * TODO: hwpoison for pud-sized hugetlb doesn't work right now, so * simply disable it. In order to make it work properly, we need -@@ -1500,6 +1556,12 @@ out: +@@ -1500,6 +1599,12 @@ out: unlock_page(head); return res; } @@ -373513,7 +482608,7 @@ index bdbbb32211a58..31db222b6deb8 100644 static int memory_failure_dev_pagemap(unsigned long pfn, int flags, struct dev_pagemap *pgmap) -@@ -1536,7 +1598,7 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags, +@@ -1536,7 +1641,7 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags, goto out; if (hwpoison_filter(page)) { @@ -373522,7 +482617,7 @@ index bdbbb32211a58..31db222b6deb8 100644 goto unlock; } -@@ -1587,6 +1649,8 @@ out: +@@ -1587,6 +1692,8 @@ out: return rc; } @@ -373531,7 +482626,7 @@ index bdbbb32211a58..31db222b6deb8 100644 /** * memory_failure - Handle memory failure of a page. * @pfn: Page Number of the corrupted page -@@ -1603,6 +1667,10 @@ out: +@@ -1603,6 +1710,10 @@ out: * * Must run in process context (e.g. a work queue) with interrupts * enabled and no spinlocks hold. @@ -373542,7 +482637,7 @@ index bdbbb32211a58..31db222b6deb8 100644 */ int memory_failure(unsigned long pfn, int flags) { -@@ -1613,7 +1681,7 @@ int memory_failure(unsigned long pfn, int flags) +@@ -1613,7 +1724,7 @@ int memory_failure(unsigned long pfn, int flags) int res = 0; unsigned long page_flags; bool retry = true; @@ -373551,7 +482646,7 @@ index bdbbb32211a58..31db222b6deb8 100644 if (!sysctl_memory_failure_recovery) panic("Memory failure on page %lx", pfn); -@@ -1634,10 +1702,9 @@ int memory_failure(unsigned long pfn, int flags) +@@ -1634,10 +1745,9 @@ int memory_failure(unsigned long pfn, int flags) mutex_lock(&mf_mutex); try_again: @@ -373564,7 +482659,7 @@ index bdbbb32211a58..31db222b6deb8 100644 if (TestSetPageHWPoison(p)) { pr_err("Memory failure: %#lx: already hardware poisoned\n", -@@ -1747,21 +1814,12 @@ try_again: +@@ -1747,21 +1857,12 @@ try_again: */ page_flags = p->flags; @@ -373587,7 +482682,7 @@ index bdbbb32211a58..31db222b6deb8 100644 goto unlock_mutex; } -@@ -1937,6 +1995,7 @@ int unpoison_memory(unsigned long pfn) +@@ -1937,6 +2038,7 @@ int unpoison_memory(unsigned long pfn) struct page *page; struct page *p; int freeit = 0; @@ -373595,7 +482690,7 @@ index bdbbb32211a58..31db222b6deb8 100644 unsigned long flags = 0; static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); -@@ -1947,39 +2006,30 @@ int unpoison_memory(unsigned long pfn) +@@ -1947,39 +2049,30 @@ int unpoison_memory(unsigned long pfn) p = pfn_to_page(pfn); page = compound_head(p); @@ -373641,7 +482736,7 @@ index bdbbb32211a58..31db222b6deb8 100644 } if (!get_hwpoison_page(p, flags)) { -@@ -1987,29 +2037,23 @@ int unpoison_memory(unsigned long pfn) +@@ -1987,29 +2080,23 @@ int unpoison_memory(unsigned long pfn) num_poisoned_pages_dec(); unpoison_pr_info("Unpoison: Software-unpoisoned free page %#lx\n", pfn, &unpoison_rs); @@ -373675,7 +482770,16 @@ index bdbbb32211a58..31db222b6deb8 100644 } EXPORT_SYMBOL(unpoison_memory); -@@ -2190,9 +2234,12 @@ int soft_offline_page(unsigned long pfn, int flags) +@@ -2019,7 +2106,7 @@ static bool isolate_page(struct page *page, struct list_head *pagelist) + bool lru = PageLRU(page); + + if (PageHuge(page)) { +- isolated = isolate_huge_page(page, pagelist); ++ isolated = !isolate_hugetlb(page, pagelist); + } else { + if (lru) + isolated = !isolate_lru_page(page); +@@ -2190,9 +2277,12 @@ int soft_offline_page(unsigned long pfn, int flags) return -EIO; } @@ -373688,7 +482792,7 @@ index bdbbb32211a58..31db222b6deb8 100644 return 0; } -@@ -2206,9 +2253,12 @@ retry: +@@ -2206,9 +2296,12 @@ retry: } else if (ret == 0) { if (soft_offline_free_page(page) && try_again) { try_again = false; @@ -373838,8 +482942,21 @@ index c52be6d6b6055..a4d0f744a458d 100644 cond_resched(); } return ret_val; +diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c +index 9fd0be32a281e..81f2a97c886c9 100644 +--- a/mm/memory_hotplug.c ++++ b/mm/memory_hotplug.c +@@ -1704,7 +1704,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) + + if (PageHuge(page)) { + pfn = page_to_pfn(head) + compound_nr(head) - 1; +- isolate_huge_page(head, &source); ++ isolate_hugetlb(head, &source); + continue; + } else if (PageTransHuge(page)) + pfn = page_to_pfn(head) + thp_nr_pages(page) - 1; diff --git a/mm/mempolicy.c b/mm/mempolicy.c -index d12e0608fced2..4472be6f123db 100644 +index d12e0608fced2..818753635e427 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -347,7 +347,7 @@ static void mpol_rebind_preferred(struct mempolicy *pol, @@ -373851,7 +482968,19 @@ index d12e0608fced2..4472be6f123db 100644 return; if (!mpol_store_user_nodemask(pol) && nodes_equal(pol->w.cpuset_mems_allowed, *newmask)) -@@ -783,7 +783,6 @@ static int vma_replace_policy(struct vm_area_struct *vma, +@@ -603,8 +603,9 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask, + + /* With MPOL_MF_MOVE, we migrate only unshared hugepage. */ + if (flags & (MPOL_MF_MOVE_ALL) || +- (flags & MPOL_MF_MOVE && page_mapcount(page) == 1)) { +- if (!isolate_huge_page(page, qp->pagelist) && ++ (flags & MPOL_MF_MOVE && page_mapcount(page) == 1 && ++ !hugetlb_pmd_shared(pte))) { ++ if (isolate_hugetlb(page, qp->pagelist) && + (flags & MPOL_MF_STRICT)) + /* + * Failed to isolate page but allow migrating pages +@@ -783,7 +784,6 @@ static int vma_replace_policy(struct vm_area_struct *vma, static int mbind_range(struct mm_struct *mm, unsigned long start, unsigned long end, struct mempolicy *new_pol) { @@ -373859,7 +482988,7 @@ index d12e0608fced2..4472be6f123db 100644 struct vm_area_struct *prev; struct vm_area_struct *vma; int err = 0; -@@ -798,8 +797,7 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, +@@ -798,8 +798,7 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, if (start > vma->vm_start) prev = vma; @@ -373869,7 +482998,7 @@ index d12e0608fced2..4472be6f123db 100644 vmstart = max(start, vma->vm_start); vmend = min(end, vma->vm_end); -@@ -813,10 +811,6 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, +@@ -813,10 +812,6 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, new_pol, vma->vm_userfaultfd_ctx); if (prev) { vma = prev; @@ -373880,7 +483009,7 @@ index d12e0608fced2..4472be6f123db 100644 goto replace; } if (vma->vm_start != vmstart) { -@@ -1395,7 +1389,7 @@ static int get_nodes(nodemask_t *nodes, const unsigned long __user *nmask, +@@ -1395,7 +1390,7 @@ static int get_nodes(nodemask_t *nodes, const unsigned long __user *nmask, unsigned long bits = min_t(unsigned long, maxnode, BITS_PER_LONG); unsigned long t; @@ -373889,7 +483018,7 @@ index d12e0608fced2..4472be6f123db 100644 return -EFAULT; if (maxnode - bits >= MAX_NUMNODES) { -@@ -2140,8 +2134,7 @@ struct page *alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, +@@ -2140,8 +2135,7 @@ struct page *alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, * memory with both reclaim and compact as well. */ if (!page && (gfp & __GFP_DIRECT_RECLAIM)) @@ -373899,7 +483028,7 @@ index d12e0608fced2..4472be6f123db 100644 goto out; } -@@ -2568,6 +2561,7 @@ alloc_new: +@@ -2568,6 +2562,7 @@ alloc_new: mpol_new = kmem_cache_alloc(policy_cache, GFP_KERNEL); if (!mpol_new) goto err_out; @@ -373908,7 +483037,7 @@ index d12e0608fced2..4472be6f123db 100644 } diff --git a/mm/memremap.c b/mm/memremap.c -index ed593bf87109a..8d743cbc29642 100644 +index ed593bf87109a..1a7539502bbc0 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -112,30 +112,6 @@ static unsigned long pfn_next(unsigned long pfn) @@ -373990,7 +483119,15 @@ index ed593bf87109a..8d743cbc29642 100644 return 0; err_add_memory: -@@ -362,22 +338,11 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) +@@ -351,6 +327,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) + WARN(1, "File system DAX not supported\n"); + return ERR_PTR(-EINVAL); + } ++ params.pgprot = pgprot_decrypted(params.pgprot); + break; + case MEMORY_DEVICE_GENERIC: + break; +@@ -362,22 +339,11 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) break; } @@ -374018,7 +483155,7 @@ index ed593bf87109a..8d743cbc29642 100644 devmap_managed_enable_get(pgmap); -@@ -486,7 +451,7 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn, +@@ -486,7 +452,7 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn, /* fall back to slow path lookup */ rcu_read_lock(); pgmap = xa_load(&pgmap_array, PHYS_PFN(phys)); @@ -374028,9 +483165,18 @@ index ed593bf87109a..8d743cbc29642 100644 rcu_read_unlock(); diff --git a/mm/migrate.c b/mm/migrate.c -index 1852d787e6ab6..7da052c6cf1ea 100644 +index 1852d787e6ab6..dd50b1cc089e0 100644 --- a/mm/migrate.c +++ b/mm/migrate.c +@@ -134,7 +134,7 @@ static void putback_movable_page(struct page *page) + * + * This function shall be used whenever the isolated pageset has been + * built from lru, balloon, hugetlbfs page. See isolate_migratepages_range() +- * and isolate_huge_page(). ++ * and isolate_hugetlb(). + */ + void putback_movable_pages(struct list_head *l) + { @@ -948,9 +948,12 @@ static int move_to_new_page(struct page *newpage, struct page *page, if (!PageMappingFlags(page)) page->mapping = NULL; @@ -374046,7 +483192,19 @@ index 1852d787e6ab6..7da052c6cf1ea 100644 } out: return rc; -@@ -2419,13 +2422,14 @@ next: +@@ -1719,8 +1722,9 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr, + + if (PageHuge(page)) { + if (PageHead(page)) { +- isolate_huge_page(page, pagelist); +- err = 1; ++ err = isolate_hugetlb(page, pagelist); ++ if (!err) ++ err = 1; + } + } else { + struct page *head; +@@ -2419,13 +2423,14 @@ next: migrate->dst[migrate->npages] = 0; migrate->src[migrate->npages++] = mpfn; } @@ -374188,6 +483346,28 @@ index 88dcc5c252255..a0a4eadc8779d 100644 free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS, next ? next->vm_start : USER_PGTABLES_CEILING); tlb_finish_mmu(&tlb); +diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c +index 1b9837419bf9c..8be26c7ddb47f 100644 +--- a/mm/mmu_gather.c ++++ b/mm/mmu_gather.c +@@ -139,7 +139,7 @@ static void tlb_remove_table_smp_sync(void *arg) + /* Simply deliver the interrupt */ + } + +-static void tlb_remove_table_sync_one(void) ++void tlb_remove_table_sync_one(void) + { + /* + * This isn't an RCU grace period and hence the page-tables cannot be +@@ -163,8 +163,6 @@ static void tlb_remove_table_free(struct mmu_table_batch *batch) + + #else /* !CONFIG_MMU_GATHER_RCU_TABLE_FREE */ + +-static void tlb_remove_table_sync_one(void) { } +- + static void tlb_remove_table_free(struct mmu_table_batch *batch) + { + __tlb_remove_table_free(batch); diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index 459d195d2ff64..f45ff1b7626a6 100644 --- a/mm/mmu_notifier.c @@ -374430,7 +483610,7 @@ index 4812a17b288c5..8ca6617b2a723 100644 int test_clear_page_writeback(struct page *page) diff --git a/mm/page_alloc.c b/mm/page_alloc.c -index 23d3339ac4e8e..a71722b4e464b 100644 +index 23d3339ac4e8e..c929357fbefe2 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3928,11 +3928,15 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order, @@ -374562,7 +483742,21 @@ index 23d3339ac4e8e..a71722b4e464b 100644 goto failed_irq; break; } -@@ -5560,6 +5600,18 @@ refill: +@@ -5450,9 +5490,12 @@ EXPORT_SYMBOL(get_zeroed_page); + */ + void __free_pages(struct page *page, unsigned int order) + { ++ /* get PageHead before we drop reference */ ++ int head = PageHead(page); ++ + if (put_page_testzero(page)) + free_the_page(page, order); +- else if (!PageHead(page)) ++ else if (!head) + while (order-- > 0) + free_the_page(page + (1 << order), order); + } +@@ -5560,6 +5603,18 @@ refill: /* reset page count bias and offset to start of new frag */ nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; offset = size - fragsz; @@ -374581,7 +483775,7 @@ index 23d3339ac4e8e..a71722b4e464b 100644 } nc->pagecnt_bias--; -@@ -6090,7 +6142,7 @@ static int build_zonerefs_node(pg_data_t *pgdat, struct zoneref *zonerefs) +@@ -6090,7 +6145,7 @@ static int build_zonerefs_node(pg_data_t *pgdat, struct zoneref *zonerefs) do { zone_type--; zone = pgdat->node_zones + zone_type; @@ -374590,7 +483784,7 @@ index 23d3339ac4e8e..a71722b4e464b 100644 zoneref_set_zone(zone, &zonerefs[nr_zones++]); check_highest_zone(zone_type); } -@@ -6361,9 +6413,8 @@ static void __build_all_zonelists(void *data) +@@ -6361,9 +6416,8 @@ static void __build_all_zonelists(void *data) int nid; int __maybe_unused cpu; pg_data_t *self = data; @@ -374601,7 +483795,7 @@ index 23d3339ac4e8e..a71722b4e464b 100644 #ifdef CONFIG_NUMA memset(node_load, 0, sizeof(node_load)); -@@ -6396,7 +6447,7 @@ static void __build_all_zonelists(void *data) +@@ -6396,7 +6450,7 @@ static void __build_all_zonelists(void *data) #endif } @@ -374610,7 +483804,7 @@ index 23d3339ac4e8e..a71722b4e464b 100644 } static noinline void __init -@@ -7897,10 +7948,17 @@ restart: +@@ -7897,10 +7951,17 @@ restart: out2: /* Align start of ZONE_MOVABLE on all nids to MAX_ORDER_NR_PAGES */ @@ -374629,7 +483823,7 @@ index 23d3339ac4e8e..a71722b4e464b 100644 out: /* restore the node_state */ node_states[N_MEMORY] = saved_node_state; -@@ -8160,7 +8218,7 @@ void __init mem_init_print_info(void) +@@ -8160,7 +8221,7 @@ void __init mem_init_print_info(void) */ #define adj_init_size(start, end, size, pos, adj) \ do { \ @@ -374638,7 +483832,7 @@ index 23d3339ac4e8e..a71722b4e464b 100644 size -= adj; \ } while (0) -@@ -9449,3 +9507,18 @@ bool take_page_off_buddy(struct page *page) +@@ -9449,3 +9510,18 @@ bool take_page_off_buddy(struct page *page) return ret; } #endif @@ -375081,7 +484275,7 @@ index 22b310adb53d9..d1986ce2e7c77 100644 /* prevent secretmem mappings from ever getting PROT_EXEC */ secretmem_mnt->mnt_flags |= MNT_NOEXEC; diff --git a/mm/shmem.c b/mm/shmem.c -index b5860f4a2738e..342d1bc728670 100644 +index b5860f4a2738e..0c37c5f0a903e 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -555,7 +555,7 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, @@ -375193,6 +484387,113 @@ index b5860f4a2738e..342d1bc728670 100644 } } else { page = *pagep; +@@ -2456,6 +2463,7 @@ shmem_write_begin(struct file *file, struct address_space *mapping, + struct inode *inode = mapping->host; + struct shmem_inode_info *info = SHMEM_I(inode); + pgoff_t index = pos >> PAGE_SHIFT; ++ int ret = 0; + + /* i_rwsem is held by caller */ + if (unlikely(info->seals & (F_SEAL_GROW | +@@ -2466,7 +2474,19 @@ shmem_write_begin(struct file *file, struct address_space *mapping, + return -EPERM; + } + +- return shmem_getpage(inode, index, pagep, SGP_WRITE); ++ ret = shmem_getpage(inode, index, pagep, SGP_WRITE); ++ ++ if (ret) ++ return ret; ++ ++ if (PageHWPoison(*pagep)) { ++ unlock_page(*pagep); ++ put_page(*pagep); ++ *pagep = NULL; ++ return -EIO; ++ } ++ ++ return 0; + } + + static int +@@ -2553,6 +2573,12 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to) + if (sgp == SGP_CACHE) + set_page_dirty(page); + unlock_page(page); ++ ++ if (PageHWPoison(page)) { ++ put_page(page); ++ error = -EIO; ++ break; ++ } + } + + /* +@@ -3114,7 +3140,8 @@ static const char *shmem_get_link(struct dentry *dentry, + page = find_get_page(inode->i_mapping, 0); + if (!page) + return ERR_PTR(-ECHILD); +- if (!PageUptodate(page)) { ++ if (PageHWPoison(page) || ++ !PageUptodate(page)) { + put_page(page); + return ERR_PTR(-ECHILD); + } +@@ -3122,6 +3149,13 @@ static const char *shmem_get_link(struct dentry *dentry, + error = shmem_getpage(inode, 0, &page, SGP_READ); + if (error) + return ERR_PTR(error); ++ if (!page) ++ return ERR_PTR(-ECHILD); ++ if (PageHWPoison(page)) { ++ unlock_page(page); ++ put_page(page); ++ return ERR_PTR(-ECHILD); ++ } + unlock_page(page); + } + set_delayed_call(done, shmem_put_link, page); +@@ -3772,6 +3806,13 @@ static void shmem_destroy_inodecache(void) + kmem_cache_destroy(shmem_inode_cachep); + } + ++/* Keep the page in page cache instead of truncating it */ ++static int shmem_error_remove_page(struct address_space *mapping, ++ struct page *page) ++{ ++ return 0; ++} ++ + const struct address_space_operations shmem_aops = { + .writepage = shmem_writepage, + .set_page_dirty = __set_page_dirty_no_writeback, +@@ -3782,7 +3823,7 @@ const struct address_space_operations shmem_aops = { + #ifdef CONFIG_MIGRATION + .migratepage = migrate_page, + #endif +- .error_remove_page = generic_error_remove_page, ++ .error_remove_page = shmem_error_remove_page, + }; + EXPORT_SYMBOL(shmem_aops); + +@@ -4190,9 +4231,14 @@ struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, + error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE, + gfp, NULL, NULL, NULL); + if (error) +- page = ERR_PTR(error); +- else +- unlock_page(page); ++ return ERR_PTR(error); ++ ++ unlock_page(page); ++ if (PageHWPoison(page)) { ++ put_page(page); ++ return ERR_PTR(-EIO); ++ } ++ + return page; + #else + /* diff --git a/mm/slab.c b/mm/slab.c index 874b3f8fe80da..1bd283e98c58c 100644 --- a/mm/slab.c @@ -375430,6 +484731,18 @@ index bc7cee6b2ec54..122a37cbc081f 100644 } /* +diff --git a/mm/swapfile.c b/mm/swapfile.c +index 22d10f7138487..1551fb89769f6 100644 +--- a/mm/swapfile.c ++++ b/mm/swapfile.c +@@ -1093,6 +1093,7 @@ start_over: + goto check_out; + pr_debug("scan_swap_map of si %d failed to find offset\n", + si->type); ++ cond_resched(); + + spin_lock(&swap_avail_lock); + nextsi: diff --git a/mm/usercopy.c b/mm/usercopy.c index b3de3c4eefba7..540968b481e7e 100644 --- a/mm/usercopy.c @@ -375447,9 +484760,18 @@ index b3de3c4eefba7..540968b481e7e 100644 __setup("hardened_usercopy=", parse_hardened_usercopy); diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c -index 7a90084155343..3bbaf5f5353ed 100644 +index 7a90084155343..caa13abe0c56b 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c +@@ -63,7 +63,7 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, + pte_t _dst_pte, *dst_pte; + bool writable = dst_vma->vm_flags & VM_WRITE; + bool vm_shared = dst_vma->vm_flags & VM_SHARED; +- bool page_in_cache = page->mapping; ++ bool page_in_cache = page_mapping(page); + spinlock_t *ptl; + struct inode *inode; + pgoff_t offset, max_off; @@ -151,6 +151,8 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm, /* don't free the page */ goto out; @@ -375471,7 +484793,19 @@ index 7a90084155343..3bbaf5f5353ed 100644 if (ret) goto out; if (!page) { -@@ -621,6 +626,7 @@ retry: +@@ -233,6 +238,11 @@ static int mcontinue_atomic_pte(struct mm_struct *dst_mm, + goto out; + } + ++ if (PageHWPoison(page)) { ++ ret = -EIO; ++ goto out_release; ++ } ++ + ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr, + page, false, wp_copy); + if (ret) +@@ -621,6 +631,7 @@ retry: err = -EFAULT; goto out; } @@ -375605,6 +484939,205 @@ index e8a807c781107..8375eecc55de5 100644 return addr; +diff --git a/mm/vmscan.c b/mm/vmscan.c +index 74296c2d1fed2..201acea818040 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -1865,69 +1865,6 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone, + return nr_reclaimed; + } + +-/* +- * Attempt to remove the specified page from its LRU. Only take this page +- * if it is of the appropriate PageActive status. Pages which are being +- * freed elsewhere are also ignored. +- * +- * page: page to consider +- * mode: one of the LRU isolation modes defined above +- * +- * returns true on success, false on failure. +- */ +-bool __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode) +-{ +- /* Only take pages on the LRU. */ +- if (!PageLRU(page)) +- return false; +- +- /* Compaction should not handle unevictable pages but CMA can do so */ +- if (PageUnevictable(page) && !(mode & ISOLATE_UNEVICTABLE)) +- return false; +- +- /* +- * To minimise LRU disruption, the caller can indicate that it only +- * wants to isolate pages it will be able to operate on without +- * blocking - clean pages for the most part. +- * +- * ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages +- * that it is possible to migrate without blocking +- */ +- if (mode & ISOLATE_ASYNC_MIGRATE) { +- /* All the caller can do on PageWriteback is block */ +- if (PageWriteback(page)) +- return false; +- +- if (PageDirty(page)) { +- struct address_space *mapping; +- bool migrate_dirty; +- +- /* +- * Only pages without mappings or that have a +- * ->migratepage callback are possible to migrate +- * without blocking. However, we can be racing with +- * truncation so it's necessary to lock the page +- * to stabilise the mapping as truncation holds +- * the page lock until after the page is removed +- * from the page cache. +- */ +- if (!trylock_page(page)) +- return false; +- +- mapping = page_mapping(page); +- migrate_dirty = !mapping || mapping->a_ops->migratepage; +- unlock_page(page); +- if (!migrate_dirty) +- return false; +- } +- } +- +- if ((mode & ISOLATE_UNMAPPED) && page_mapped(page)) +- return false; +- +- return true; +-} +- + /* + * Update LRU sizes after isolating pages. The LRU size updates must + * be complete before mem_cgroup_update_lru_size due to a sanity check. +@@ -1979,11 +1916,11 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, + unsigned long skipped = 0; + unsigned long scan, total_scan, nr_pages; + LIST_HEAD(pages_skipped); +- isolate_mode_t mode = (sc->may_unmap ? 0 : ISOLATE_UNMAPPED); + + total_scan = 0; + scan = 0; + while (scan < nr_to_scan && !list_empty(src)) { ++ struct list_head *move_to = src; + struct page *page; + + page = lru_to_page(src); +@@ -1993,9 +1930,9 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, + total_scan += nr_pages; + + if (page_zonenum(page) > sc->reclaim_idx) { +- list_move(&page->lru, &pages_skipped); + nr_skipped[page_zonenum(page)] += nr_pages; +- continue; ++ move_to = &pages_skipped; ++ goto move; + } + + /* +@@ -2003,37 +1940,34 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, + * return with no isolated pages if the LRU mostly contains + * ineligible pages. This causes the VM to not reclaim any + * pages, triggering a premature OOM. +- * +- * Account all tail pages of THP. This would not cause +- * premature OOM since __isolate_lru_page() returns -EBUSY +- * only when the page is being freed somewhere else. ++ * Account all tail pages of THP. + */ + scan += nr_pages; +- if (!__isolate_lru_page_prepare(page, mode)) { +- /* It is being freed elsewhere */ +- list_move(&page->lru, src); +- continue; +- } ++ ++ if (!PageLRU(page)) ++ goto move; ++ if (!sc->may_unmap && page_mapped(page)) ++ goto move; ++ + /* + * Be careful not to clear PageLRU until after we're + * sure the page is not being freed elsewhere -- the + * page release code relies on it. + */ +- if (unlikely(!get_page_unless_zero(page))) { +- list_move(&page->lru, src); +- continue; +- } ++ if (unlikely(!get_page_unless_zero(page))) ++ goto move; + + if (!TestClearPageLRU(page)) { + /* Another thread is already isolating this page */ + put_page(page); +- list_move(&page->lru, src); +- continue; ++ goto move; + } + + nr_taken += nr_pages; + nr_zone_taken[page_zonenum(page)] += nr_pages; +- list_move(&page->lru, dst); ++ move_to = dst; ++move: ++ list_move(&page->lru, move_to); + } + + /* +@@ -2057,7 +1991,8 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, + } + *nr_scanned = total_scan; + trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, +- total_scan, skipped, nr_taken, mode, lru); ++ total_scan, skipped, nr_taken, ++ sc->may_unmap ? 0 : ISOLATE_UNMAPPED, lru); + update_lru_sizes(lruvec, lru, nr_zone_taken); + return nr_taken; + } +@@ -2791,8 +2726,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) + enum lru_list lru; + unsigned long nr_reclaimed = 0; + unsigned long nr_to_reclaim = sc->nr_to_reclaim; ++ bool proportional_reclaim; + struct blk_plug plug; +- bool scan_adjusted; + + get_scan_count(lruvec, sc, nr); + +@@ -2810,8 +2745,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) + * abort proportional reclaim if either the file or anon lru has already + * dropped to zero at the first pass. + */ +- scan_adjusted = (!cgroup_reclaim(sc) && !current_is_kswapd() && +- sc->priority == DEF_PRIORITY); ++ proportional_reclaim = (!cgroup_reclaim(sc) && !current_is_kswapd() && ++ sc->priority == DEF_PRIORITY); + + blk_start_plug(&plug); + while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || +@@ -2831,7 +2766,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) + + cond_resched(); + +- if (nr_reclaimed < nr_to_reclaim || scan_adjusted) ++ if (nr_reclaimed < nr_to_reclaim || proportional_reclaim) + continue; + + /* +@@ -2882,8 +2817,6 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) + nr_scanned = targets[lru] - nr[lru]; + nr[lru] = targets[lru] * (100 - percentage) / 100; + nr[lru] -= min(nr[lru], nr_scanned); +- +- scan_adjusted = true; + } + blk_finish_plug(&plug); + sc->nr_reclaimed += nr_reclaimed; diff --git a/mm/workingset.c b/mm/workingset.c index d5b81e4f4cbe8..880d882f3325f 100644 --- a/mm/workingset.c @@ -375682,6 +485215,57 @@ index 68e8831068f4b..439deb8decbcc 100644 if (atomic_long_read(&pool->isolated_pages) == 0 && pool->destroying) wake_up_all(&pool->migration_wait); } +diff --git a/net/802/mrp.c b/net/802/mrp.c +index 35e04cc5390c4..c10a432a5b435 100644 +--- a/net/802/mrp.c ++++ b/net/802/mrp.c +@@ -606,7 +606,10 @@ static void mrp_join_timer(struct timer_list *t) + spin_unlock(&app->lock); + + mrp_queue_xmit(app); +- mrp_join_timer_arm(app); ++ spin_lock(&app->lock); ++ if (likely(app->active)) ++ mrp_join_timer_arm(app); ++ spin_unlock(&app->lock); + } + + static void mrp_periodic_timer_arm(struct mrp_applicant *app) +@@ -620,11 +623,12 @@ static void mrp_periodic_timer(struct timer_list *t) + struct mrp_applicant *app = from_timer(app, t, periodic_timer); + + spin_lock(&app->lock); +- mrp_mad_event(app, MRP_EVENT_PERIODIC); +- mrp_pdu_queue(app); ++ if (likely(app->active)) { ++ mrp_mad_event(app, MRP_EVENT_PERIODIC); ++ mrp_pdu_queue(app); ++ mrp_periodic_timer_arm(app); ++ } + spin_unlock(&app->lock); +- +- mrp_periodic_timer_arm(app); + } + + static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset) +@@ -872,6 +876,7 @@ int mrp_init_applicant(struct net_device *dev, struct mrp_application *appl) + app->dev = dev; + app->app = appl; + app->mad = RB_ROOT; ++ app->active = true; + spin_lock_init(&app->lock); + skb_queue_head_init(&app->queue); + rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app); +@@ -900,6 +905,9 @@ void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl) + + RCU_INIT_POINTER(port->applicants[appl->type], NULL); + ++ spin_lock_bh(&app->lock); ++ app->active = false; ++ spin_unlock_bh(&app->lock); + /* Delete timer and generate a final TX event to flush out + * all pending messages before the applicant is gone. + */ diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 55275ef9a31a7..abaa5d96ded24 100644 --- a/net/8021q/vlan.c @@ -375758,7 +485342,7 @@ index 0c21d1fec8522..a54535cbcf4cf 100644 void vlan_setup(struct net_device *dev) diff --git a/net/9p/client.c b/net/9p/client.c -index 213f12ed76cd8..565aee6dfcc66 100644 +index 213f12ed76cd8..08e0c9990af06 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -32,10 +32,9 @@ @@ -375836,7 +485420,19 @@ index 213f12ed76cd8..565aee6dfcc66 100644 pr_info("Could not find request transport: %s\n", s); ret = -EINVAL; -@@ -311,7 +307,7 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size) +@@ -285,6 +281,11 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size) + p9pdu_reset(&req->rc); + req->t_err = 0; + req->status = REQ_STATUS_ALLOC; ++ /* refcount needs to be set to 0 before inserting into the idr ++ * so p9_tag_lookup does not accept a request that is not fully ++ * initialized. refcount_set to 2 below will mark request ready. ++ */ ++ refcount_set(&req->refcount, 0); + init_waitqueue_head(&req->wq); + INIT_LIST_HEAD(&req->req_list); + +@@ -311,7 +312,7 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size) * callback), so p9_client_cb eats the second ref there * as the pointer is duplicated directly by virtqueue_add_sgs() */ @@ -375845,7 +485441,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 return req; -@@ -347,7 +343,7 @@ again: +@@ -347,7 +348,7 @@ again: if (!p9_req_try_get(req)) goto again; if (req->tc.tag != tag) { @@ -375854,25 +485450,25 @@ index 213f12ed76cd8..565aee6dfcc66 100644 goto again; } } -@@ -373,20 +369,18 @@ static int p9_tag_remove(struct p9_client *c, struct p9_req_t *r) +@@ -373,20 +374,18 @@ static int p9_tag_remove(struct p9_client *c, struct p9_req_t *r) spin_lock_irqsave(&c->lock, flags); idr_remove(&c->reqs, tag); spin_unlock_irqrestore(&c->lock, flags); - return p9_req_put(r); -+ return p9_req_put(c, r); - } - +-} +- -static void p9_req_free(struct kref *ref) -+int p9_req_put(struct p9_client *c, struct p9_req_t *r) - { +-{ - struct p9_req_t *r = container_of(ref, struct p9_req_t, refcount); - p9_fcall_fini(&r->tc); - p9_fcall_fini(&r->rc); - kmem_cache_free(p9_req_cache, r); --} -- ++ return p9_req_put(c, r); + } + -int p9_req_put(struct p9_req_t *r) --{ ++int p9_req_put(struct p9_client *c, struct p9_req_t *r) + { - return kref_put(&r->refcount, p9_req_free); + if (refcount_dec_and_test(&r->refcount)) { + p9_fcall_fini(&r->tc); @@ -375884,7 +485480,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 } EXPORT_SYMBOL(p9_req_put); -@@ -423,8 +417,7 @@ void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status) +@@ -423,8 +422,7 @@ void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status) { p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc.tag); @@ -375894,7 +485490,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 * the status change is visible to another thread */ smp_wmb(); -@@ -432,7 +425,7 @@ void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status) +@@ -432,7 +430,7 @@ void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status) wake_up(&req->wq); p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc.tag); @@ -375903,7 +485499,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 } EXPORT_SYMBOL(p9_client_cb); -@@ -446,12 +439,12 @@ EXPORT_SYMBOL(p9_client_cb); +@@ -446,12 +444,12 @@ EXPORT_SYMBOL(p9_client_cb); */ int @@ -375921,7 +485517,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 int offset = pdu->offset; int err; -@@ -499,7 +492,7 @@ EXPORT_SYMBOL(p9_parse_header); +@@ -499,7 +497,7 @@ EXPORT_SYMBOL(p9_parse_header); static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) { @@ -375930,7 +485526,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 int err; int ecode; -@@ -510,8 +503,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) +@@ -510,8 +508,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) req->rc.size); return -EIO; } @@ -375940,7 +485536,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 * This should be after check errors which poplulate pdu_fcall. */ trace_9p_protocol_dump(c, &req->rc); -@@ -524,6 +516,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) +@@ -524,6 +521,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) if (!p9_is_proto_dotl(c)) { char *ename; @@ -375948,7 +485544,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 err = p9pdu_readf(&req->rc, c->proto_version, "s?d", &ename, &ecode); if (err) -@@ -541,6 +534,8 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) +@@ -541,6 +539,8 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) kfree(ename); } else { err = p9pdu_readf(&req->rc, c->proto_version, "d", &ecode); @@ -375957,7 +485553,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 err = -ecode; p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); -@@ -572,12 +567,11 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req, +@@ -572,12 +572,11 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req, { int err; int ecode; @@ -375972,7 +485568,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 * This should be after parse_header which poplulate pdu_fcall. */ trace_9p_protocol_dump(c, &req->rc); -@@ -605,7 +599,7 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req, +@@ -605,7 +604,7 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req, if (len > inline_len) { /* We have error in external buffer */ if (!copy_from_iter_full(ename + inline_len, @@ -375981,7 +485577,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 err = -EFAULT; goto out_err; } -@@ -657,7 +651,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...); +@@ -657,7 +656,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...); static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) { struct p9_req_t *req; @@ -375990,7 +485586,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 int err; err = p9_parse_header(&oldreq->tc, NULL, NULL, &oldtag, 1); -@@ -670,8 +664,7 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) +@@ -670,8 +669,7 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) if (IS_ERR(req)) return PTR_ERR(req); @@ -376000,7 +485596,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 * remove it from the list */ if (oldreq->status == REQ_STATUS_SENT) { -@@ -697,7 +690,7 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c, +@@ -697,7 +695,7 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c, return ERR_PTR(-EIO); /* if status is begin_disconnected we allow only clunk request */ @@ -376009,7 +485605,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 return ERR_PTR(-EIO); req = p9_tag_alloc(c, type, req_size); -@@ -715,7 +708,7 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c, +@@ -715,7 +713,7 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c, reterr: p9_tag_remove(c, req); /* We have to put also the 2nd reference as it won't be used */ @@ -376018,7 +485614,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 return ERR_PTR(err); } -@@ -745,13 +738,14 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) +@@ -745,13 +743,14 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) if (signal_pending(current)) { sigpending = 1; clear_thread_flag(TIF_SIGPENDING); @@ -376035,7 +485631,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 if (err != -ERESTARTSYS && err != -EFAULT) c->status = Disconnected; goto recalc_sigpending; -@@ -760,14 +754,13 @@ again: +@@ -760,14 +759,13 @@ again: /* Wait for the response */ err = wait_event_killable(req->wq, req->status >= REQ_STATUS_RCVD); @@ -376053,7 +485649,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 sigpending = 1; clear_thread_flag(TIF_SIGPENDING); goto again; -@@ -777,7 +770,7 @@ again: +@@ -777,7 +775,7 @@ again: p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); err = req->t_err; } @@ -376062,7 +485658,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 p9_debug(P9_DEBUG_MUX, "flushing\n"); sigpending = 1; clear_thread_flag(TIF_SIGPENDING); -@@ -832,8 +825,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, +@@ -832,8 +830,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, struct p9_req_t *req; va_start(ap, fmt); @@ -376072,7 +485668,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 * The actual content is passed in zero-copy fashion. */ req = p9_client_prepare_req(c, type, P9_ZC_HDR_SZ, fmt, ap); -@@ -844,8 +836,9 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, +@@ -844,8 +841,9 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, if (signal_pending(current)) { sigpending = 1; clear_thread_flag(TIF_SIGPENDING); @@ -376083,7 +485679,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 err = c->trans_mod->zc_request(c, req, uidata, uodata, inlen, olen, in_hdrlen); -@@ -859,7 +852,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, +@@ -859,7 +857,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); err = req->t_err; } @@ -376092,7 +485688,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 p9_debug(P9_DEBUG_MUX, "flushing\n"); sigpending = 1; clear_thread_flag(TIF_SIGPENDING); -@@ -895,16 +888,13 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt) +@@ -895,16 +893,13 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt) struct p9_fid *fid; p9_debug(P9_DEBUG_FID, "clnt %p\n", clnt); @@ -376110,7 +485706,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 refcount_set(&fid->count, 1); idr_preload(GFP_KERNEL); -@@ -947,15 +937,15 @@ static int p9_client_version(struct p9_client *c) +@@ -947,15 +942,15 @@ static int p9_client_version(struct p9_client *c) switch (c->proto_version) { case p9_proto_2000L: req = p9_client_rpc(c, P9_TVERSION, "ds", @@ -376129,7 +485725,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 break; default: return -EINVAL; -@@ -972,13 +962,13 @@ static int p9_client_version(struct p9_client *c) +@@ -972,13 +967,13 @@ static int p9_client_version(struct p9_client *c) } p9_debug(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version); @@ -376147,7 +485743,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 p9_debug(P9_DEBUG_ERROR, "server returned an unknown version: %s\n", version); err = -EREMOTEIO; -@@ -1008,7 +998,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) +@@ -1008,7 +1003,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) char *client_id; err = 0; @@ -376156,7 +485752,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 if (!clnt) return ERR_PTR(-ENOMEM); -@@ -1030,7 +1020,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) +@@ -1030,7 +1025,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) if (!clnt->trans_mod) clnt->trans_mod = v9fs_get_default_trans(); @@ -376165,7 +485761,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 err = -EPROTONOSUPPORT; p9_debug(P9_DEBUG_ERROR, "No transport defined or default transport\n"); -@@ -1118,14 +1108,14 @@ void p9_client_begin_disconnect(struct p9_client *clnt) +@@ -1118,14 +1113,14 @@ void p9_client_begin_disconnect(struct p9_client *clnt) EXPORT_SYMBOL(p9_client_begin_disconnect); struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, @@ -376182,7 +485778,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 p9_debug(P9_DEBUG_9P, ">>> TATTACH afid %d uname %s aname %s\n", afid ? afid->fid : -1, uname, aname); fid = p9_fid_create(clnt); -@@ -1136,7 +1126,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, +@@ -1136,7 +1131,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, fid->uid = n_uname; req = p9_client_rpc(clnt, P9_TATTACH, "ddss?u", fid->fid, @@ -376191,7 +485787,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 if (IS_ERR(req)) { err = PTR_ERR(req); goto error; -@@ -1150,7 +1140,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, +@@ -1150,7 +1145,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, } p9_debug(P9_DEBUG_9P, "<<< RATTACH qid %x.%llx.%x\n", @@ -376200,7 +485796,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 memmove(&fid->qid, &qid, sizeof(struct p9_qid)); -@@ -1165,14 +1155,14 @@ error: +@@ -1165,14 +1160,14 @@ error: EXPORT_SYMBOL(p9_client_attach); struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, @@ -376217,7 +485813,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 err = 0; wqids = NULL; -@@ -1185,14 +1175,14 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, +@@ -1185,14 +1180,14 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, } fid->uid = oldfid->uid; @@ -376235,7 +485831,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 if (IS_ERR(req)) { err = PTR_ERR(req); goto error; -@@ -1215,9 +1205,9 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, +@@ -1215,9 +1210,9 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, for (count = 0; count < nwqids; count++) p9_debug(P9_DEBUG_9P, "<<< [%d] %x.%llx.%x\n", @@ -376248,7 +485844,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 if (nwname) memmove(&fid->qid, &wqids[nwqids - 1], sizeof(struct p9_qid)); -@@ -1233,7 +1223,7 @@ clunk_fid: +@@ -1233,7 +1228,7 @@ clunk_fid: fid = NULL; error: @@ -376257,7 +485853,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 p9_fid_destroy(fid); return ERR_PTR(err); -@@ -1250,7 +1240,7 @@ int p9_client_open(struct p9_fid *fid, int mode) +@@ -1250,7 +1245,7 @@ int p9_client_open(struct p9_fid *fid, int mode) clnt = fid->clnt; p9_debug(P9_DEBUG_9P, ">>> %s fid %d mode %d\n", @@ -376266,7 +485862,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 err = 0; if (fid->mode != -1) -@@ -1272,8 +1262,8 @@ int p9_client_open(struct p9_fid *fid, int mode) +@@ -1272,8 +1267,8 @@ int p9_client_open(struct p9_fid *fid, int mode) } p9_debug(P9_DEBUG_9P, "<<< %s qid %x.%llx.%x iounit %x\n", @@ -376277,7 +485873,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 memmove(&fid->qid, &qid, sizeof(struct p9_qid)); fid->mode = mode; -@@ -1286,8 +1276,8 @@ error: +@@ -1286,8 +1281,8 @@ error: } EXPORT_SYMBOL(p9_client_open); @@ -376288,7 +485884,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 { int err = 0; struct p9_client *clnt; -@@ -1295,16 +1285,16 @@ int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32 +@@ -1295,16 +1290,16 @@ int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32 int iounit; p9_debug(P9_DEBUG_9P, @@ -376309,7 +485905,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 if (IS_ERR(req)) { err = PTR_ERR(req); goto error; -@@ -1317,9 +1307,7 @@ int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32 +@@ -1317,9 +1312,7 @@ int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32 } p9_debug(P9_DEBUG_9P, "<<< RLCREATE qid %x.%llx.%x iounit %x\n", @@ -376320,7 +485916,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 memmove(&ofid->qid, qid, sizeof(struct p9_qid)); ofid->mode = mode; -@@ -1342,7 +1330,7 @@ int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode, +@@ -1342,7 +1335,7 @@ int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode, int iounit; p9_debug(P9_DEBUG_9P, ">>> TCREATE fid %d name %s perm %d mode %d\n", @@ -376329,7 +485925,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 err = 0; clnt = fid->clnt; -@@ -1350,7 +1338,7 @@ int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode, +@@ -1350,7 +1343,7 @@ int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode, return -EINVAL; req = p9_client_rpc(clnt, P9_TCREATE, "dsdb?s", fid->fid, name, perm, @@ -376338,7 +485934,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 if (IS_ERR(req)) { err = PTR_ERR(req); goto error; -@@ -1363,9 +1351,7 @@ int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode, +@@ -1363,9 +1356,7 @@ int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode, } p9_debug(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n", @@ -376349,7 +485945,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 memmove(&fid->qid, &qid, sizeof(struct p9_qid)); fid->mode = mode; -@@ -1379,18 +1365,18 @@ error: +@@ -1379,18 +1370,18 @@ error: EXPORT_SYMBOL(p9_client_fcreate); int p9_client_symlink(struct p9_fid *dfid, const char *name, @@ -376371,7 +485967,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 if (IS_ERR(req)) { err = PTR_ERR(req); goto error; -@@ -1403,7 +1389,7 @@ int p9_client_symlink(struct p9_fid *dfid, const char *name, +@@ -1403,7 +1394,7 @@ int p9_client_symlink(struct p9_fid *dfid, const char *name, } p9_debug(P9_DEBUG_9P, "<<< RSYMLINK qid %x.%llx.%x\n", @@ -376380,7 +485976,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 free_and_error: p9_tag_remove(clnt, req); -@@ -1418,10 +1404,10 @@ int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, const char *newna +@@ -1418,10 +1409,10 @@ int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, const char *newna struct p9_req_t *req; p9_debug(P9_DEBUG_9P, ">>> TLINK dfid %d oldfid %d newname %s\n", @@ -376393,7 +485989,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 if (IS_ERR(req)) return PTR_ERR(req); -@@ -1438,7 +1424,7 @@ int p9_client_fsync(struct p9_fid *fid, int datasync) +@@ -1438,7 +1429,7 @@ int p9_client_fsync(struct p9_fid *fid, int datasync) struct p9_req_t *req; p9_debug(P9_DEBUG_9P, ">>> TFSYNC fid %d datasync:%d\n", @@ -376402,7 +485998,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 err = 0; clnt = fid->clnt; -@@ -1474,8 +1460,8 @@ int p9_client_clunk(struct p9_fid *fid) +@@ -1474,8 +1465,8 @@ int p9_client_clunk(struct p9_fid *fid) return 0; again: @@ -376413,7 +486009,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 err = 0; clnt = fid->clnt; -@@ -1489,16 +1475,16 @@ again: +@@ -1489,16 +1480,16 @@ again: p9_tag_remove(clnt, req); error: @@ -376433,7 +486029,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 return err; } EXPORT_SYMBOL(p9_client_clunk); -@@ -1538,7 +1524,7 @@ int p9_client_unlinkat(struct p9_fid *dfid, const char *name, int flags) +@@ -1538,7 +1529,7 @@ int p9_client_unlinkat(struct p9_fid *dfid, const char *name, int flags) struct p9_client *clnt; p9_debug(P9_DEBUG_9P, ">>> TUNLINKAT fid %d %s %d\n", @@ -376442,7 +486038,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 clnt = dfid->clnt; req = p9_client_rpc(clnt, P9_TUNLINKAT, "dsd", dfid->fid, name, flags); -@@ -1584,8 +1570,8 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to, +@@ -1584,8 +1575,8 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to, char *dataptr; *err = 0; @@ -376453,7 +486049,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 rsize = fid->iounit; if (!rsize || rsize > clnt->msize - P9_IOHDRSZ) -@@ -1651,13 +1637,13 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) +@@ -1651,13 +1642,13 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) *err = 0; p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %zd\n", @@ -376470,7 +486066,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 rsize = clnt->msize - P9_IOHDRSZ; if (count < rsize) -@@ -1670,7 +1656,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) +@@ -1670,7 +1661,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) fid->fid, offset, rsize); } else { req = p9_client_rpc(clnt, P9_TWRITE, "dqV", fid->fid, @@ -376479,7 +486075,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 } if (IS_ERR(req)) { *err = PTR_ERR(req); -@@ -1703,12 +1689,13 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) +@@ -1703,12 +1694,13 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) { int err; struct p9_client *clnt; @@ -376494,7 +486090,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 if (!ret) return ERR_PTR(-ENOMEM); -@@ -1729,17 +1716,17 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) +@@ -1729,17 +1721,17 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) } p9_debug(P9_DEBUG_9P, @@ -376523,7 +486119,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 p9_tag_remove(clnt, req); return ret; -@@ -1751,17 +1738,17 @@ error: +@@ -1751,17 +1743,17 @@ error: EXPORT_SYMBOL(p9_client_stat); struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, @@ -376545,7 +486141,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 if (!ret) return ERR_PTR(-ENOMEM); -@@ -1781,26 +1768,27 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, +@@ -1781,26 +1773,27 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, goto error; } @@ -376593,7 +486189,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 p9_tag_remove(clnt, req); return ret; -@@ -1819,7 +1807,7 @@ static int p9_client_statsize(struct p9_wstat *wst, int proto_version) +@@ -1819,7 +1812,7 @@ static int p9_client_statsize(struct p9_wstat *wst, int proto_version) /* size[2] type[2] dev[4] qid[13] */ /* mode[4] atime[4] mtime[4] length[8]*/ /* name[s] uid[s] gid[s] muid[s] */ @@ -376602,7 +486198,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 if (wst->name) ret += strlen(wst->name); -@@ -1830,9 +1818,10 @@ static int p9_client_statsize(struct p9_wstat *wst, int proto_version) +@@ -1830,9 +1823,10 @@ static int p9_client_statsize(struct p9_wstat *wst, int proto_version) if (wst->muid) ret += strlen(wst->muid); @@ -376616,7 +486212,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 if (wst->extension) ret += strlen(wst->extension); } -@@ -1849,21 +1838,23 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) +@@ -1849,21 +1843,23 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) err = 0; clnt = fid->clnt; wst->size = p9_client_statsize(wst, clnt->proto_version); @@ -376654,7 +486250,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 if (IS_ERR(req)) { err = PTR_ERR(req); goto error; -@@ -1886,15 +1877,15 @@ int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr) +@@ -1886,15 +1882,15 @@ int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr) err = 0; clnt = fid->clnt; p9_debug(P9_DEBUG_9P, ">>> TSETATTR fid %d\n", fid->fid); @@ -376679,7 +486275,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 req = p9_client_rpc(clnt, P9_TSETATTR, "dI", fid->fid, p9attr); -@@ -1935,12 +1926,10 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb) +@@ -1935,12 +1931,10 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb) goto error; } @@ -376696,7 +486292,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 p9_tag_remove(clnt, req); error: -@@ -1959,10 +1948,10 @@ int p9_client_rename(struct p9_fid *fid, +@@ -1959,10 +1953,10 @@ int p9_client_rename(struct p9_fid *fid, clnt = fid->clnt; p9_debug(P9_DEBUG_9P, ">>> TRENAME fid %d newdirfid %d name %s\n", @@ -376709,7 +486305,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 if (IS_ERR(req)) { err = PTR_ERR(req); goto error; -@@ -1986,9 +1975,9 @@ int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name, +@@ -1986,9 +1980,9 @@ int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name, err = 0; clnt = olddirfid->clnt; @@ -376722,7 +486318,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 req = p9_client_rpc(clnt, P9_TRENAMEAT, "dsds", olddirfid->fid, old_name, newdirfid->fid, new_name); -@@ -1998,7 +1987,7 @@ int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name, +@@ -1998,7 +1992,7 @@ int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name, } p9_debug(P9_DEBUG_9P, "<<< RRENAMEAT newdirfid %d new name %s\n", @@ -376731,7 +486327,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 p9_tag_remove(clnt, req); error: -@@ -2006,11 +1995,10 @@ error: +@@ -2006,11 +2000,10 @@ error: } EXPORT_SYMBOL(p9_client_renameat); @@ -376745,7 +486341,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 { int err; struct p9_req_t *req; -@@ -2025,11 +2013,11 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid, +@@ -2025,11 +2018,11 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid, goto error; } p9_debug(P9_DEBUG_9P, @@ -376760,7 +486356,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 if (IS_ERR(req)) { err = PTR_ERR(req); goto error; -@@ -2042,13 +2030,13 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid, +@@ -2042,13 +2035,13 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid, } p9_tag_remove(clnt, req); p9_debug(P9_DEBUG_9P, "<<< RXATTRWALK fid %d size %llu\n", @@ -376776,7 +486372,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 p9_fid_destroy(attr_fid); return ERR_PTR(err); -@@ -2056,19 +2044,19 @@ error: +@@ -2056,19 +2049,19 @@ error: EXPORT_SYMBOL_GPL(p9_client_xattrwalk); int p9_client_xattrcreate(struct p9_fid *fid, const char *name, @@ -376800,7 +486396,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 if (IS_ERR(req)) { err = PTR_ERR(req); goto error; -@@ -2092,13 +2080,13 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) +@@ -2092,13 +2085,13 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) iov_iter_kvec(&to, READ, &kv, 1, count); p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n", @@ -376816,7 +486412,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 rsize = clnt->msize - P9_READDIRHDRSZ; if (count < rsize) -@@ -2106,8 +2094,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) +@@ -2106,8 +2099,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) /* Don't bother zerocopy for small IO (< 1024) */ if (clnt->trans_mod->zc_request && rsize > 1024) { @@ -376826,7 +486422,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 * PDU Header(7) + IO Size (4) */ req = p9_client_zc_rpc(clnt, P9_TREADDIR, &to, NULL, rsize, 0, -@@ -2148,7 +2135,7 @@ error: +@@ -2148,7 +2140,7 @@ error: EXPORT_SYMBOL(p9_client_readdir); int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode, @@ -376835,7 +486431,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 { int err; struct p9_client *clnt; -@@ -2156,10 +2143,11 @@ int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode, +@@ -2156,10 +2148,11 @@ int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode, err = 0; clnt = fid->clnt; @@ -376850,7 +486446,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 if (IS_ERR(req)) return PTR_ERR(req); -@@ -2168,18 +2156,17 @@ int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode, +@@ -2168,18 +2161,17 @@ int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode, trace_9p_protocol_dump(clnt, &req->rc); goto error; } @@ -376872,7 +486468,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 { int err; struct p9_client *clnt; -@@ -2189,8 +2176,8 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode, +@@ -2189,8 +2181,8 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode, clnt = fid->clnt; p9_debug(P9_DEBUG_9P, ">>> TMKDIR fid %d name %s mode %d gid %d\n", fid->fid, name, mode, from_kgid(&init_user_ns, gid)); @@ -376883,7 +486479,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 if (IS_ERR(req)) return PTR_ERR(req); -@@ -2200,12 +2187,11 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode, +@@ -2200,12 +2192,11 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode, goto error; } p9_debug(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type, @@ -376897,7 +486493,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 } EXPORT_SYMBOL(p9_client_mkdir_dotl); -@@ -2217,14 +2203,14 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status) +@@ -2217,14 +2208,14 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status) err = 0; clnt = fid->clnt; @@ -376918,7 +486514,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 if (IS_ERR(req)) return PTR_ERR(req); -@@ -2238,7 +2224,6 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status) +@@ -2238,7 +2229,6 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status) error: p9_tag_remove(clnt, req); return err; @@ -376926,7 +486522,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 } EXPORT_SYMBOL(p9_client_lock_dotl); -@@ -2250,12 +2235,14 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock) +@@ -2250,12 +2240,14 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock) err = 0; clnt = fid->clnt; @@ -376946,7 +486542,7 @@ index 213f12ed76cd8..565aee6dfcc66 100644 if (IS_ERR(req)) return PTR_ERR(req); -@@ -2267,9 +2254,10 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock) +@@ -2267,9 +2259,10 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock) trace_9p_protocol_dump(clnt, &req->rc); goto error; } @@ -377100,10 +486696,87 @@ index c43babb3f6354..65c094c321a29 100644 -void p9_release_pages(struct page **, int); +void p9_release_pages(struct page **pages, int nr_pages); diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c -index 007bbcc68010b..6fe3719c1fc61 100644 +index 007bbcc68010b..f359cfdc1858f 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c -@@ -345,6 +345,7 @@ static void p9_read_work(struct work_struct *work) +@@ -93,6 +93,7 @@ struct p9_poll_wait { + * @mux_list: list link for mux to manage multiple connections (?) + * @client: reference to client instance for this connection + * @err: error state ++ * @req_lock: lock protecting req_list and requests statuses + * @req_list: accounting for requests which have been sent + * @unsent_req_list: accounting for requests that haven't been sent + * @rreq: read request +@@ -116,11 +117,12 @@ struct p9_conn { + struct list_head mux_list; + struct p9_client *client; + int err; ++ spinlock_t req_lock; + struct list_head req_list; + struct list_head unsent_req_list; + struct p9_req_t *rreq; + struct p9_req_t *wreq; +- char tmp_buf[7]; ++ char tmp_buf[P9_HDRSZ]; + struct p9_fcall rc; + int wpos; + int wsize; +@@ -191,10 +193,10 @@ static void p9_conn_cancel(struct p9_conn *m, int err) + + p9_debug(P9_DEBUG_ERROR, "mux %p err %d\n", m, err); + +- spin_lock(&m->client->lock); ++ spin_lock(&m->req_lock); + + if (m->err) { +- spin_unlock(&m->client->lock); ++ spin_unlock(&m->req_lock); + return; + } + +@@ -202,11 +204,15 @@ static void p9_conn_cancel(struct p9_conn *m, int err) + + list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { + list_move(&req->req_list, &cancel_list); ++ req->status = REQ_STATUS_ERROR; + } + list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { + list_move(&req->req_list, &cancel_list); ++ req->status = REQ_STATUS_ERROR; + } + ++ spin_unlock(&m->req_lock); ++ + list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { + p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req); + list_del(&req->req_list); +@@ -214,7 +220,6 @@ static void p9_conn_cancel(struct p9_conn *m, int err) + req->t_err = err; + p9_client_cb(m->client, req, REQ_STATUS_ERROR); + } +- spin_unlock(&m->client->lock); + } + + static __poll_t +@@ -290,7 +295,7 @@ static void p9_read_work(struct work_struct *work) + if (!m->rc.sdata) { + m->rc.sdata = m->tmp_buf; + m->rc.offset = 0; +- m->rc.capacity = 7; /* start by reading header */ ++ m->rc.capacity = P9_HDRSZ; /* start by reading header */ + } + + clear_bit(Rpending, &m->wsched); +@@ -313,7 +318,7 @@ static void p9_read_work(struct work_struct *work) + p9_debug(P9_DEBUG_TRANS, "got new header\n"); + + /* Header size */ +- m->rc.size = 7; ++ m->rc.size = P9_HDRSZ; + err = p9_parse_header(&m->rc, &m->rc.size, NULL, NULL, 0); + if (err) { + p9_debug(P9_DEBUG_ERROR, +@@ -345,6 +350,7 @@ static void p9_read_work(struct work_struct *work) p9_debug(P9_DEBUG_ERROR, "No recv fcall for tag %d (req %p), disconnecting!\n", m->rc.tag, m->rreq); @@ -377111,7 +486784,29 @@ index 007bbcc68010b..6fe3719c1fc61 100644 m->rreq = NULL; err = -EIO; goto error; -@@ -380,7 +381,7 @@ static void p9_read_work(struct work_struct *work) +@@ -360,7 +366,7 @@ static void p9_read_work(struct work_struct *work) + if ((m->rreq) && (m->rc.offset == m->rc.capacity)) { + p9_debug(P9_DEBUG_TRANS, "got new packet\n"); + m->rreq->rc.size = m->rc.offset; +- spin_lock(&m->client->lock); ++ spin_lock(&m->req_lock); + if (m->rreq->status == REQ_STATUS_SENT) { + list_del(&m->rreq->req_list); + p9_client_cb(m->client, m->rreq, REQ_STATUS_RCVD); +@@ -369,18 +375,18 @@ static void p9_read_work(struct work_struct *work) + p9_debug(P9_DEBUG_TRANS, + "Ignore replies associated with a cancelled request\n"); + } else { +- spin_unlock(&m->client->lock); ++ spin_unlock(&m->req_lock); + p9_debug(P9_DEBUG_ERROR, + "Request tag %d errored out while we were reading the reply\n", + m->rc.tag); + err = -EIO; + goto error; + } +- spin_unlock(&m->client->lock); ++ spin_unlock(&m->req_lock); m->rc.sdata = NULL; m->rc.offset = 0; m->rc.capacity = 0; @@ -377120,7 +486815,29 @@ index 007bbcc68010b..6fe3719c1fc61 100644 m->rreq = NULL; } -@@ -494,7 +495,7 @@ static void p9_write_work(struct work_struct *work) +@@ -454,10 +460,10 @@ static void p9_write_work(struct work_struct *work) + } + + if (!m->wsize) { +- spin_lock(&m->client->lock); ++ spin_lock(&m->req_lock); + if (list_empty(&m->unsent_req_list)) { + clear_bit(Wworksched, &m->wsched); +- spin_unlock(&m->client->lock); ++ spin_unlock(&m->req_lock); + return; + } + +@@ -472,7 +478,7 @@ static void p9_write_work(struct work_struct *work) + m->wpos = 0; + p9_req_get(req); + m->wreq = req; +- spin_unlock(&m->client->lock); ++ spin_unlock(&m->req_lock); + } + + p9_debug(P9_DEBUG_TRANS, "mux %p pos %d size %d\n", +@@ -494,7 +500,7 @@ static void p9_write_work(struct work_struct *work) m->wpos += err; if (m->wpos == m->wsize) { m->wpos = m->wsize = 0; @@ -377129,7 +486846,40 @@ index 007bbcc68010b..6fe3719c1fc61 100644 m->wreq = NULL; } -@@ -697,7 +698,7 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) +@@ -589,6 +595,7 @@ static void p9_conn_create(struct p9_client *client) + INIT_LIST_HEAD(&m->mux_list); + m->client = client; + ++ spin_lock_init(&m->req_lock); + INIT_LIST_HEAD(&m->req_list); + INIT_LIST_HEAD(&m->unsent_req_list); + INIT_WORK(&m->rq, p9_read_work); +@@ -670,10 +677,10 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) + if (m->err < 0) + return m->err; + +- spin_lock(&client->lock); ++ spin_lock(&m->req_lock); + req->status = REQ_STATUS_UNSENT; + list_add_tail(&req->req_list, &m->unsent_req_list); +- spin_unlock(&client->lock); ++ spin_unlock(&m->req_lock); + + if (test_and_clear_bit(Wpending, &m->wsched)) + n = EPOLLOUT; +@@ -688,33 +695,38 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) + + static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) + { ++ struct p9_trans_fd *ts = client->trans; ++ struct p9_conn *m = &ts->conn; + int ret = 1; + + p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req); + +- spin_lock(&client->lock); ++ spin_lock(&m->req_lock); + if (req->status == REQ_STATUS_UNSENT) { list_del(&req->req_list); req->status = REQ_STATUS_FLSHD; @@ -377137,17 +486887,70 @@ index 007bbcc68010b..6fe3719c1fc61 100644 + p9_req_put(client, req); ret = 0; } - spin_unlock(&client->lock); -@@ -724,7 +725,7 @@ static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req) +- spin_unlock(&client->lock); ++ spin_unlock(&m->req_lock); + + return ret; + } + + static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req) + { ++ struct p9_trans_fd *ts = client->trans; ++ struct p9_conn *m = &ts->conn; ++ + p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req); + +- spin_lock(&client->lock); ++ spin_lock(&m->req_lock); + /* Ignore cancelled request if message has been received + * before lock. + */ + if (req->status == REQ_STATUS_RCVD) { +- spin_unlock(&client->lock); ++ spin_unlock(&m->req_lock); + return 0; + } + +@@ -723,8 +735,9 @@ static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req) + */ list_del(&req->req_list); req->status = REQ_STATUS_FLSHD; - spin_unlock(&client->lock); +- spin_unlock(&client->lock); - p9_req_put(req); ++ spin_unlock(&m->req_lock); ++ + p9_req_put(client, req); return 0; } -@@ -885,12 +886,12 @@ static void p9_conn_destroy(struct p9_conn *m) +@@ -822,11 +835,14 @@ static int p9_fd_open(struct p9_client *client, int rfd, int wfd) + goto out_free_ts; + if (!(ts->rd->f_mode & FMODE_READ)) + goto out_put_rd; ++ /* prevent workers from hanging on IO when fd is a pipe */ ++ ts->rd->f_flags |= O_NONBLOCK; + ts->wr = fget(wfd); + if (!ts->wr) + goto out_put_rd; + if (!(ts->wr->f_mode & FMODE_WRITE)) + goto out_put_wr; ++ ts->wr->f_flags |= O_NONBLOCK; + + client->trans = ts; + client->status = Connected; +@@ -848,8 +864,10 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket) + struct file *file; + + p = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); +- if (!p) ++ if (!p) { ++ sock_release(csocket); + return -ENOMEM; ++ } + + csocket->sk->sk_allocation = GFP_NOIO; + file = sock_alloc_file(csocket, 0, NULL); +@@ -885,12 +903,12 @@ static void p9_conn_destroy(struct p9_conn *m) p9_mux_poll_stop(m); cancel_work_sync(&m->rq); if (m->rreq) { @@ -377198,7 +487001,7 @@ index 490a4c9003395..d110df3cb4e1d 100644 return err; } diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c -index 3ec1a51a6944e..427f6caefa29f 100644 +index 3ec1a51a6944e..4255f2a3bea48 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -186,7 +186,7 @@ again: @@ -377210,7 +487013,30 @@ index 3ec1a51a6944e..427f6caefa29f 100644 return 0; } -@@ -304,9 +304,9 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv) +@@ -231,6 +231,14 @@ static void p9_xen_response(struct work_struct *work) + continue; + } + ++ if (h.size > req->rc.capacity) { ++ dev_warn(&priv->dev->dev, ++ "requested packet size too big: %d for tag %d with capacity %zd\n", ++ h.size, h.tag, req->rc.capacity); ++ req->status = REQ_STATUS_ERROR; ++ goto recv_error; ++ } ++ + memcpy(&req->rc, &h, sizeof(h)); + req->rc.offset = 0; + +@@ -240,6 +248,7 @@ static void p9_xen_response(struct work_struct *work) + masked_prod, &masked_cons, + XEN_9PFS_RING_SIZE(ring)); + ++recv_error: + virt_mb(); + cons += h.size; + ring->intf->in_cons = cons; +@@ -304,9 +313,9 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv) ref = priv->rings[i].intf->ref[j]; gnttab_end_foreign_access(ref, 0, 0); } @@ -377223,7 +487049,7 @@ index 3ec1a51a6944e..427f6caefa29f 100644 } gnttab_end_foreign_access(priv->rings[i].ref, 0, 0); free_page((unsigned long)priv->rings[i].intf); -@@ -345,8 +345,8 @@ static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev, +@@ -345,8 +354,8 @@ static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev, if (ret < 0) goto out; ring->ref = ret; @@ -377234,7 +487060,7 @@ index 3ec1a51a6944e..427f6caefa29f 100644 if (!bytes) { ret = -ENOMEM; goto out; -@@ -377,9 +377,7 @@ out: +@@ -377,9 +386,7 @@ out: if (bytes) { for (i--; i >= 0; i--) gnttab_end_foreign_access(ring->intf->ref[i], 0, 0); @@ -378063,6 +487889,40 @@ index 0604b02795731..6ab28b509d4bb 100644 } else { if (batadv_dat_snoop_outgoing_arp_request(bat_priv, skb)) +diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c +index fd164a248569c..580b0940f067a 100644 +--- a/net/bluetooth/6lowpan.c ++++ b/net/bluetooth/6lowpan.c +@@ -971,6 +971,7 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type, + hci_dev_lock(hdev); + hcon = hci_conn_hash_lookup_le(hdev, addr, *addr_type); + hci_dev_unlock(hdev); ++ hci_dev_put(hdev); + + if (!hcon) + return -ENOENT; +diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c +index 1661979b6a6e8..ce744b14d1a98 100644 +--- a/net/bluetooth/af_bluetooth.c ++++ b/net/bluetooth/af_bluetooth.c +@@ -736,7 +736,7 @@ static int __init bt_init(void) + + err = bt_sysfs_init(); + if (err < 0) +- return err; ++ goto cleanup_led; + + err = sock_register(&bt_sock_family_ops); + if (err) +@@ -772,6 +772,8 @@ unregister_socket: + sock_unregister(PF_BLUETOOTH); + cleanup_sysfs: + bt_sysfs_cleanup(); ++cleanup_led: ++ bt_leds_cleanup(); + return err; + } + diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 0a2d78e811cf5..83eb84e8e688f 100644 --- a/net/bluetooth/cmtp/core.c @@ -378093,7 +487953,7 @@ index 2b5059a56cdaa..7a7e92be1652c 100644 } diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c -index 8a47a3017d61d..396696241d17f 100644 +index 8a47a3017d61d..cabe8eb4c14f4 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -742,7 +742,8 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt) @@ -378106,7 +487966,15 @@ index 8a47a3017d61d..396696241d17f 100644 /* Read LE Min/Max Tx Power*/ hci_req_add(req, HCI_OP_LE_READ_TRANSMIT_POWER, 0, NULL); -@@ -3906,10 +3907,10 @@ int hci_register_dev(struct hci_dev *hdev) +@@ -1631,6 +1632,7 @@ setup_failed: + hdev->flush(hdev); + + if (hdev->sent_cmd) { ++ cancel_delayed_work_sync(&hdev->cmd_timer); + kfree_skb(hdev->sent_cmd); + hdev->sent_cmd = NULL; + } +@@ -3906,10 +3908,10 @@ int hci_register_dev(struct hci_dev *hdev) */ switch (hdev->dev_type) { case HCI_PRIMARY: @@ -378119,7 +487987,7 @@ index 8a47a3017d61d..396696241d17f 100644 break; default: return -EINVAL; -@@ -3918,7 +3919,7 @@ int hci_register_dev(struct hci_dev *hdev) +@@ -3918,7 +3920,7 @@ int hci_register_dev(struct hci_dev *hdev) if (id < 0) return id; @@ -378128,7 +487996,17 @@ index 8a47a3017d61d..396696241d17f 100644 hdev->id = id; BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); -@@ -3998,6 +3999,7 @@ int hci_register_dev(struct hci_dev *hdev) +@@ -3984,7 +3986,8 @@ int hci_register_dev(struct hci_dev *hdev) + hci_sock_dev_event(hdev, HCI_DEV_REG); + hci_dev_hold(hdev); + +- if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) { ++ if (!hdev->suspend_notifier.notifier_call && ++ !test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) { + hdev->suspend_notifier.notifier_call = hci_suspend_notifier; + error = register_pm_notifier(&hdev->suspend_notifier); + if (error) +@@ -3998,6 +4001,7 @@ int hci_register_dev(struct hci_dev *hdev) return id; err_wqueue: @@ -378136,7 +488014,7 @@ index 8a47a3017d61d..396696241d17f 100644 destroy_workqueue(hdev->workqueue); destroy_workqueue(hdev->req_workqueue); err: -@@ -4081,6 +4083,7 @@ void hci_release_dev(struct hci_dev *hdev) +@@ -4081,6 +4085,7 @@ void hci_release_dev(struct hci_dev *hdev) hci_dev_unlock(hdev); ida_simple_remove(&hci_index_ida, hdev->id); @@ -378144,7 +488022,7 @@ index 8a47a3017d61d..396696241d17f 100644 kfree(hdev); } EXPORT_SYMBOL(hci_release_dev); -@@ -4670,15 +4673,27 @@ static inline int __get_blocks(struct hci_dev *hdev, struct sk_buff *skb) +@@ -4670,15 +4675,27 @@ static inline int __get_blocks(struct hci_dev *hdev, struct sk_buff *skb) return DIV_ROUND_UP(skb->len - HCI_ACL_HDR_SIZE, hdev->block_len); } @@ -378179,7 +488057,7 @@ index 8a47a3017d61d..396696241d17f 100644 } /* Schedule SCO */ -@@ -4736,7 +4751,7 @@ static void hci_sched_acl_pkt(struct hci_dev *hdev) +@@ -4736,7 +4753,7 @@ static void hci_sched_acl_pkt(struct hci_dev *hdev) struct sk_buff *skb; int quote; @@ -378188,7 +488066,7 @@ index 8a47a3017d61d..396696241d17f 100644 while (hdev->acl_cnt && (chan = hci_chan_sent(hdev, ACL_LINK, "e))) { -@@ -4779,8 +4794,6 @@ static void hci_sched_acl_blk(struct hci_dev *hdev) +@@ -4779,8 +4796,6 @@ static void hci_sched_acl_blk(struct hci_dev *hdev) int quote; u8 type; @@ -378197,7 +488075,7 @@ index 8a47a3017d61d..396696241d17f 100644 BT_DBG("%s", hdev->name); if (hdev->dev_type == HCI_AMP) -@@ -4788,6 +4801,8 @@ static void hci_sched_acl_blk(struct hci_dev *hdev) +@@ -4788,6 +4803,8 @@ static void hci_sched_acl_blk(struct hci_dev *hdev) else type = ACL_LINK; @@ -378206,7 +488084,7 @@ index 8a47a3017d61d..396696241d17f 100644 while (hdev->block_cnt > 0 && (chan = hci_chan_sent(hdev, type, "e))) { u32 priority = (skb_peek(&chan->data_q))->priority; -@@ -4861,7 +4876,7 @@ static void hci_sched_le(struct hci_dev *hdev) +@@ -4861,7 +4878,7 @@ static void hci_sched_le(struct hci_dev *hdev) cnt = hdev->le_pkts ? hdev->le_cnt : hdev->acl_cnt; @@ -378215,8 +488093,17 @@ index 8a47a3017d61d..396696241d17f 100644 tmp = cnt; while (cnt && (chan = hci_chan_sent(hdev, LE_LINK, "e))) { +@@ -5085,7 +5102,7 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status, + *req_complete_skb = bt_cb(skb)->hci.req_complete_skb; + else + *req_complete = bt_cb(skb)->hci.req_complete; +- kfree_skb(skb); ++ dev_kfree_skb_irq(skb); + } + spin_unlock_irqrestore(&hdev->cmd_q.lock, flags); + } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c -index 0bca035bf2dcc..2337e9275863e 100644 +index 0bca035bf2dcc..9f82fe0e62708 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1325,8 +1325,10 @@ static void hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev, @@ -378347,7 +488234,27 @@ index 0bca035bf2dcc..2337e9275863e 100644 } static u8 hci_to_mgmt_reason(u8 err) -@@ -5151,8 +5168,9 @@ static void hci_disconn_phylink_complete_evt(struct hci_dev *hdev, +@@ -4398,6 +4415,19 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, + struct hci_ev_sync_conn_complete *ev = (void *) skb->data; + struct hci_conn *conn; + ++ switch (ev->link_type) { ++ case SCO_LINK: ++ case ESCO_LINK: ++ break; ++ default: ++ /* As per Core 5.3 Vol 4 Part E 7.7.35 (p.2219), Link_Type ++ * for HCI_Synchronous_Connection_Complete is limited to ++ * either SCO or eSCO ++ */ ++ bt_dev_err(hdev, "Ignoring connect complete event for invalid link type"); ++ return; ++ } ++ + BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); + + hci_dev_lock(hdev); +@@ -5151,8 +5181,9 @@ static void hci_disconn_phylink_complete_evt(struct hci_dev *hdev, hci_dev_lock(hdev); hcon = hci_conn_hash_lookup_handle(hdev, ev->phy_handle); @@ -378358,7 +488265,7 @@ index 0bca035bf2dcc..2337e9275863e 100644 hci_conn_del(hcon); } -@@ -5780,7 +5798,13 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb) +@@ -5780,7 +5811,13 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb) struct hci_ev_le_advertising_info *ev = ptr; s8 rssi; @@ -378426,7 +488333,7 @@ index 7827639ecf5c3..08542dfc2dc53 100644 } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c -index 77ba68209dbd8..8f1a95b9d3207 100644 +index 77ba68209dbd8..e15fcf72a3428 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -61,6 +61,9 @@ static void l2cap_send_disconn_req(struct l2cap_chan *chan, int err); @@ -378551,6 +488458,15 @@ index 77ba68209dbd8..8f1a95b9d3207 100644 if (state && c->state != state) continue; +@@ -1960,7 +1990,7 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, + if (link_type == LE_LINK && c->src_type == BDADDR_BREDR) + continue; + +- if (c->psm == psm) { ++ if (c->chan_type != L2CAP_CHAN_FIXED && c->psm == psm) { + int src_match, dst_match; + int src_any, dst_any; + @@ -1968,7 +1998,9 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, src_match = !bacmp(&c->src, src); dst_match = !bacmp(&c->dst, dst); @@ -378582,7 +488498,17 @@ index 77ba68209dbd8..8f1a95b9d3207 100644 skb_queue_head_init(&chan->srej_q); err = l2cap_seq_list_init(&chan->srej_list, chan->tx_win); -@@ -4281,6 +4309,12 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn, +@@ -3736,7 +3764,8 @@ done: + l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, + sizeof(rfc), (unsigned long) &rfc, endptr - ptr); + +- if (test_bit(FLAG_EFS_ENABLE, &chan->flags)) { ++ if (remote_efs && ++ test_bit(FLAG_EFS_ENABLE, &chan->flags)) { + chan->remote_id = efs.id; + chan->remote_stype = efs.stype; + chan->remote_msdu = le16_to_cpu(efs.msdu); +@@ -4281,6 +4310,12 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn, } } @@ -378595,7 +488521,7 @@ index 77ba68209dbd8..8f1a95b9d3207 100644 err = 0; l2cap_chan_lock(chan); -@@ -4310,6 +4344,7 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn, +@@ -4310,6 +4345,7 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn, } l2cap_chan_unlock(chan); @@ -378603,7 +488529,17 @@ index 77ba68209dbd8..8f1a95b9d3207 100644 unlock: mutex_unlock(&conn->chan_lock); -@@ -4463,6 +4498,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, +@@ -4417,7 +4453,8 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, + + chan->ident = cmd->ident; + l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, len, rsp); +- chan->num_conf_rsp++; ++ if (chan->num_conf_rsp < L2CAP_CONF_MAX_CONF_RSP) ++ chan->num_conf_rsp++; + + /* Reset config buffer. */ + chan->conf_len = 0; +@@ -4463,6 +4500,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, unlock: l2cap_chan_unlock(chan); @@ -378611,7 +488547,7 @@ index 77ba68209dbd8..8f1a95b9d3207 100644 return err; } -@@ -4577,6 +4613,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, +@@ -4577,6 +4615,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, done: l2cap_chan_unlock(chan); @@ -378619,7 +488555,7 @@ index 77ba68209dbd8..8f1a95b9d3207 100644 return err; } -@@ -5304,6 +5341,7 @@ send_move_response: +@@ -5304,6 +5343,7 @@ send_move_response: l2cap_send_move_chan_rsp(chan, result); l2cap_chan_unlock(chan); @@ -378627,7 +488563,7 @@ index 77ba68209dbd8..8f1a95b9d3207 100644 return 0; } -@@ -5396,6 +5434,7 @@ static void l2cap_move_continue(struct l2cap_conn *conn, u16 icid, u16 result) +@@ -5396,6 +5436,7 @@ static void l2cap_move_continue(struct l2cap_conn *conn, u16 icid, u16 result) } l2cap_chan_unlock(chan); @@ -378635,7 +488571,7 @@ index 77ba68209dbd8..8f1a95b9d3207 100644 } static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid, -@@ -5425,6 +5464,7 @@ static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid, +@@ -5425,6 +5466,7 @@ static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid, l2cap_send_move_chan_cfm(chan, L2CAP_MC_UNCONFIRMED); l2cap_chan_unlock(chan); @@ -378643,7 +488579,7 @@ index 77ba68209dbd8..8f1a95b9d3207 100644 } static int l2cap_move_channel_rsp(struct l2cap_conn *conn, -@@ -5488,6 +5528,7 @@ static int l2cap_move_channel_confirm(struct l2cap_conn *conn, +@@ -5488,6 +5530,7 @@ static int l2cap_move_channel_confirm(struct l2cap_conn *conn, l2cap_send_move_chan_cfm_rsp(conn, cmd->ident, icid); l2cap_chan_unlock(chan); @@ -378651,7 +488587,7 @@ index 77ba68209dbd8..8f1a95b9d3207 100644 return 0; } -@@ -5523,6 +5564,7 @@ static inline int l2cap_move_channel_confirm_rsp(struct l2cap_conn *conn, +@@ -5523,6 +5566,7 @@ static inline int l2cap_move_channel_confirm_rsp(struct l2cap_conn *conn, } l2cap_chan_unlock(chan); @@ -378659,7 +488595,27 @@ index 77ba68209dbd8..8f1a95b9d3207 100644 return 0; } -@@ -5895,12 +5937,11 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn, +@@ -5771,6 +5815,19 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn, + BT_DBG("psm 0x%2.2x scid 0x%4.4x mtu %u mps %u", __le16_to_cpu(psm), + scid, mtu, mps); + ++ /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 3, Part A ++ * page 1059: ++ * ++ * Valid range: 0x0001-0x00ff ++ * ++ * Table 4.15: L2CAP_LE_CREDIT_BASED_CONNECTION_REQ SPSM ranges ++ */ ++ if (!psm || __le16_to_cpu(psm) > L2CAP_PSM_LE_DYN_END) { ++ result = L2CAP_CR_LE_BAD_PSM; ++ chan = NULL; ++ goto response; ++ } ++ + /* Check if we have socket listening on psm */ + pchan = l2cap_global_chan_by_psm(BT_LISTEN, psm, &conn->hcon->src, + &conn->hcon->dst, LE_LINK); +@@ -5895,12 +5952,11 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn, if (credits > max_credits) { BT_ERR("LE credits overflow"); l2cap_send_disconn_req(chan, ECONNRESET); @@ -378673,7 +488629,7 @@ index 77ba68209dbd8..8f1a95b9d3207 100644 } chan->tx_credits += credits; -@@ -5911,7 +5952,9 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn, +@@ -5911,7 +5967,9 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn, if (chan->tx_credits) chan->ops->resume(chan); @@ -378683,7 +488639,119 @@ index 77ba68209dbd8..8f1a95b9d3207 100644 return 0; } -@@ -7597,6 +7640,7 @@ drop: +@@ -5958,6 +6016,18 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, + + psm = req->psm; + ++ /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 3, Part A ++ * page 1059: ++ * ++ * Valid range: 0x0001-0x00ff ++ * ++ * Table 4.15: L2CAP_LE_CREDIT_BASED_CONNECTION_REQ SPSM ranges ++ */ ++ if (!psm || __le16_to_cpu(psm) > L2CAP_PSM_LE_DYN_END) { ++ result = L2CAP_CR_LE_BAD_PSM; ++ goto response; ++ } ++ + BT_DBG("psm 0x%2.2x mtu %u mps %u", __le16_to_cpu(psm), mtu, mps); + + memset(&pdu, 0, sizeof(pdu)); +@@ -6842,6 +6912,7 @@ static int l2cap_rx_state_recv(struct l2cap_chan *chan, + struct l2cap_ctrl *control, + struct sk_buff *skb, u8 event) + { ++ struct l2cap_ctrl local_control; + int err = 0; + bool skb_in_use = false; + +@@ -6866,15 +6937,32 @@ static int l2cap_rx_state_recv(struct l2cap_chan *chan, + chan->buffer_seq = chan->expected_tx_seq; + skb_in_use = true; + ++ /* l2cap_reassemble_sdu may free skb, hence invalidate ++ * control, so make a copy in advance to use it after ++ * l2cap_reassemble_sdu returns and to avoid the race ++ * condition, for example: ++ * ++ * The current thread calls: ++ * l2cap_reassemble_sdu ++ * chan->ops->recv == l2cap_sock_recv_cb ++ * __sock_queue_rcv_skb ++ * Another thread calls: ++ * bt_sock_recvmsg ++ * skb_recv_datagram ++ * skb_free_datagram ++ * Then the current thread tries to access control, but ++ * it was freed by skb_free_datagram. ++ */ ++ local_control = *control; + err = l2cap_reassemble_sdu(chan, skb, control); + if (err) + break; + +- if (control->final) { ++ if (local_control.final) { + if (!test_and_clear_bit(CONN_REJ_ACT, + &chan->conn_state)) { +- control->final = 0; +- l2cap_retransmit_all(chan, control); ++ local_control.final = 0; ++ l2cap_retransmit_all(chan, &local_control); + l2cap_ertm_send(chan); + } + } +@@ -7254,11 +7342,27 @@ static int l2cap_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control, + static int l2cap_stream_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control, + struct sk_buff *skb) + { ++ /* l2cap_reassemble_sdu may free skb, hence invalidate control, so store ++ * the txseq field in advance to use it after l2cap_reassemble_sdu ++ * returns and to avoid the race condition, for example: ++ * ++ * The current thread calls: ++ * l2cap_reassemble_sdu ++ * chan->ops->recv == l2cap_sock_recv_cb ++ * __sock_queue_rcv_skb ++ * Another thread calls: ++ * bt_sock_recvmsg ++ * skb_recv_datagram ++ * skb_free_datagram ++ * Then the current thread tries to access control, but it was freed by ++ * skb_free_datagram. ++ */ ++ u16 txseq = control->txseq; ++ + BT_DBG("chan %p, control %p, skb %p, state %d", chan, control, skb, + chan->rx_state); + +- if (l2cap_classify_txseq(chan, control->txseq) == +- L2CAP_TXSEQ_EXPECTED) { ++ if (l2cap_classify_txseq(chan, txseq) == L2CAP_TXSEQ_EXPECTED) { + l2cap_pass_to_tx(chan, control); + + BT_DBG("buffer_seq %u->%u", chan->buffer_seq, +@@ -7281,8 +7385,8 @@ static int l2cap_stream_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control, + } + } + +- chan->last_acked_seq = control->txseq; +- chan->expected_tx_seq = __next_seq(chan, control->txseq); ++ chan->last_acked_seq = txseq; ++ chan->expected_tx_seq = __next_seq(chan, txseq); + + return 0; + } +@@ -7538,6 +7642,7 @@ static void l2cap_data_channel(struct l2cap_conn *conn, u16 cid, + return; + } + ++ l2cap_chan_hold(chan); + l2cap_chan_lock(chan); + } else { + BT_DBG("unknown cid 0x%4.4x", cid); +@@ -7597,6 +7702,7 @@ drop: done: l2cap_chan_unlock(chan); @@ -378691,7 +488759,7 @@ index 77ba68209dbd8..8f1a95b9d3207 100644 } static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm, -@@ -8085,7 +8129,7 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c, +@@ -8085,7 +8191,7 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c, if (src_type != c->src_type) continue; @@ -378700,6 +488768,27 @@ index 77ba68209dbd8..8f1a95b9d3207 100644 read_unlock(&chan_list_lock); return c; } +@@ -8382,9 +8488,8 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) + * expected length. + */ + if (skb->len < L2CAP_LEN_SIZE) { +- if (l2cap_recv_frag(conn, skb, conn->mtu) < 0) +- goto drop; +- return; ++ l2cap_recv_frag(conn, skb, conn->mtu); ++ break; + } + + len = get_unaligned_le16(skb->data) + L2CAP_HDR_SIZE; +@@ -8428,7 +8533,7 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) + + /* Header still could not be read just continue */ + if (conn->rx_skb->len < L2CAP_LEN_SIZE) +- return; ++ break; + } + + if (skb->len > conn->rx_len) { diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index c99d65ef13b1e..d2c6785205992 100644 --- a/net/bluetooth/l2cap_sock.c @@ -378845,7 +488934,7 @@ index c99d65ef13b1e..d2c6785205992 100644 if (l2cap_pi(sk)->rx_busy_skb) { kfree_skb(l2cap_pi(sk)->rx_busy_skb); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c -index cea01e275f1ea..f09f0a78eb7be 100644 +index cea01e275f1ea..04000499f4a21 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3806,7 +3806,7 @@ static const u8 rpa_resolution_uuid[16] = { @@ -379149,8 +489238,17 @@ index cea01e275f1ea..f09f0a78eb7be 100644 } return mgmt_cmd_status(sk, hdev ? hdev->id : MGMT_INDEX_NONE, +@@ -7935,7 +7971,7 @@ static int add_ext_adv_params(struct sock *sk, struct hci_dev *hdev, + * extra parameters we don't know about will be ignored in this request. + */ + if (data_len < MGMT_ADD_EXT_ADV_PARAMS_MIN_SIZE) +- return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, ++ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_PARAMS, + MGMT_STATUS_INVALID_PARAMS); + + flags = __le32_to_cpu(cp->flags); diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c -index f2bacb464ccf3..7324764384b67 100644 +index f2bacb464ccf3..8d6fce9005bdd 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -549,22 +549,58 @@ struct rfcomm_dlc *rfcomm_dlc_exists(bdaddr_t *src, bdaddr_t *dst, u8 channel) @@ -379204,7 +489302,7 @@ index f2bacb464ccf3..7324764384b67 100644 + + ret = rfcomm_dlc_send_frag(d, frag); + if (ret < 0) { -+ kfree_skb(frag); ++ dev_kfree_skb_irq(frag); + goto unlock; + } + @@ -379220,10 +489318,38 @@ index f2bacb464ccf3..7324764384b67 100644 return len; } diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c -index 2c95bb58f901a..21e24da4847f0 100644 +index 2c95bb58f901a..4397e14ff560f 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c -@@ -575,46 +575,20 @@ static int rfcomm_sock_sendmsg(struct socket *sock, struct msghdr *msg, +@@ -391,6 +391,7 @@ static int rfcomm_sock_connect(struct socket *sock, struct sockaddr *addr, int a + addr->sa_family != AF_BLUETOOTH) + return -EINVAL; + ++ sock_hold(sk); + lock_sock(sk); + + if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) { +@@ -410,14 +411,18 @@ static int rfcomm_sock_connect(struct socket *sock, struct sockaddr *addr, int a + d->sec_level = rfcomm_pi(sk)->sec_level; + d->role_switch = rfcomm_pi(sk)->role_switch; + ++ /* Drop sock lock to avoid potential deadlock with the RFCOMM lock */ ++ release_sock(sk); + err = rfcomm_dlc_open(d, &rfcomm_pi(sk)->src, &sa->rc_bdaddr, + sa->rc_channel); +- if (!err) ++ lock_sock(sk); ++ if (!err && !sock_flag(sk, SOCK_ZAPPED)) + err = bt_sock_wait_state(sk, BT_CONNECTED, + sock_sndtimeo(sk, flags & O_NONBLOCK)); + + done: + release_sock(sk); ++ sock_put(sk); + return err; + } + +@@ -575,46 +580,20 @@ static int rfcomm_sock_sendmsg(struct socket *sock, struct msghdr *msg, lock_sock(sk); sent = bt_sock_wait_ready(sk, msg->msg_flags); @@ -379242,7 +489368,7 @@ index 2c95bb58f901a..21e24da4847f0 100644 - break; - } - skb_reserve(skb, RFCOMM_SKB_HEAD_RESERVE); - +- - err = memcpy_from_msg(skb_put(skb, size), msg, size); - if (err) { - kfree_skb(skb); @@ -379250,7 +489376,7 @@ index 2c95bb58f901a..21e24da4847f0 100644 - sent = err; - break; - } -- + - skb->priority = sk->sk_priority; + release_sock(sk); @@ -379280,7 +489406,7 @@ index 2c95bb58f901a..21e24da4847f0 100644 return sent; } -@@ -928,7 +902,10 @@ static int rfcomm_sock_shutdown(struct socket *sock, int how) +@@ -928,7 +907,10 @@ static int rfcomm_sock_shutdown(struct socket *sock, int how) lock_sock(sk); if (!sk->sk_shutdown) { sk->sk_shutdown = SHUTDOWN_MASK; @@ -379433,20 +489559,18 @@ index 98a8815865128..9a8814d4565a0 100644 } diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c -index b5f4ef35357c8..a9fb16b9c735a 100644 +index b5f4ef35357c8..11d254ce3581c 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c -@@ -469,6 +469,9 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) - { - struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb; +@@ -259,6 +259,7 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 size, + if (user_size > size) + return ERR_PTR(-EMSGSIZE); -+ if (!skb->len) -+ return -EINVAL; -+ - if (!__skb) - return 0; - -@@ -954,7 +957,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat ++ size = SKB_DATA_ALIGN(size); + data = kzalloc(size + headroom + tailroom, GFP_USER); + if (!data) + return ERR_PTR(-ENOMEM); +@@ -954,7 +955,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat if (!range_is_zero(user_ctx, offsetofend(typeof(*user_ctx), local_port), sizeof(*user_ctx))) goto out; @@ -379455,7 +489579,7 @@ index b5f4ef35357c8..a9fb16b9c735a 100644 ret = -ERANGE; goto out; } -@@ -962,7 +965,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat +@@ -962,7 +963,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat ctx.family = (u16)user_ctx->family; ctx.protocol = (u16)user_ctx->protocol; ctx.dport = (u16)user_ctx->local_port; @@ -379577,7 +489701,7 @@ index f3d751105343c..db4f2641d1cd1 100644 * br_multicast_list_adjacent - Returns snooped multicast addresses * @dev: The bridge port adjacent to which to retrieve addresses diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c -index 8edfb98ae1d58..a718204c4bfdd 100644 +index 8edfb98ae1d58..f3c7cfba31e1b 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -384,6 +384,7 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_ @@ -379624,7 +489748,15 @@ index 8edfb98ae1d58..a718204c4bfdd 100644 data = this_cpu_ptr(&brnf_frag_data_storage); data->encap_size = nf_bridge_encap_header_len(skb); data->size = ETH_HLEN + data->encap_size; -@@ -1013,9 +1014,24 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net, +@@ -870,6 +871,7 @@ static unsigned int ip_sabotage_in(void *priv, + if (nf_bridge && !nf_bridge->in_prerouting && + !netif_is_l3_master(skb->dev) && + !netif_is_l3_slave(skb->dev)) { ++ nf_bridge_info_free(skb); + state->okfn(state->net, state->sk, skb); + return NF_STOLEN; + } +@@ -1013,9 +1015,24 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net, return okfn(net, sk, skb); ops = nf_hook_entries_get_hook_ops(e); @@ -379760,7 +489892,7 @@ index d9a89ddd03310..7b0c19772111c 100644 } diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c -index 19f65ab91a027..10e63ea6a13e1 100644 +index 19f65ab91a027..86441ff78a0f8 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -560,10 +560,10 @@ static bool __allowed_ingress(const struct net_bridge *br, @@ -379777,7 +489909,51 @@ index 19f65ab91a027..10e63ea6a13e1 100644 } } v = br_vlan_find(vg, *vid); -@@ -2105,7 +2105,8 @@ static int br_vlan_rtm_dump(struct sk_buff *skb, struct netlink_callback *cb) +@@ -904,6 +904,8 @@ int __br_vlan_set_proto(struct net_bridge *br, __be16 proto, + list_for_each_entry(p, &br->port_list, list) { + vg = nbp_vlan_group(p); + list_for_each_entry(vlan, &vg->vlan_list, vlist) { ++ if (vlan->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV) ++ continue; + err = vlan_vid_add(p->dev, proto, vlan->vid); + if (err) + goto err_filt; +@@ -918,8 +920,11 @@ int __br_vlan_set_proto(struct net_bridge *br, __be16 proto, + /* Delete VLANs for the old proto from the device filter. */ + list_for_each_entry(p, &br->port_list, list) { + vg = nbp_vlan_group(p); +- list_for_each_entry(vlan, &vg->vlan_list, vlist) ++ list_for_each_entry(vlan, &vg->vlan_list, vlist) { ++ if (vlan->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV) ++ continue; + vlan_vid_del(p->dev, oldproto, vlan->vid); ++ } + } + + return 0; +@@ -928,13 +933,19 @@ err_filt: + attr.u.vlan_protocol = ntohs(oldproto); + switchdev_port_attr_set(br->dev, &attr, NULL); + +- list_for_each_entry_continue_reverse(vlan, &vg->vlan_list, vlist) ++ list_for_each_entry_continue_reverse(vlan, &vg->vlan_list, vlist) { ++ if (vlan->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV) ++ continue; + vlan_vid_del(p->dev, proto, vlan->vid); ++ } + + list_for_each_entry_continue_reverse(p, &br->port_list, list) { + vg = nbp_vlan_group(p); +- list_for_each_entry(vlan, &vg->vlan_list, vlist) ++ list_for_each_entry(vlan, &vg->vlan_list, vlist) { ++ if (vlan->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV) ++ continue; + vlan_vid_del(p->dev, proto, vlan->vid); ++ } + } + + return err; +@@ -2105,7 +2116,8 @@ static int br_vlan_rtm_dump(struct sk_buff *skb, struct netlink_callback *cb) goto out_err; } err = br_vlan_dump_dev(dev, skb, cb, dump_flags); @@ -379947,6 +490123,84 @@ index eba0efe64d05a..fbf858ddec352 100644 if (!nskb) return; +diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c +index e12fd3cad6194..997c4ebdce6f6 100644 +--- a/net/caif/caif_socket.c ++++ b/net/caif/caif_socket.c +@@ -1020,6 +1020,7 @@ static void caif_sock_destructor(struct sock *sk) + return; + } + sk_stream_kill_queues(&cf_sk->sk); ++ WARN_ON(sk->sk_forward_alloc); + caif_free_client(&cf_sk->layer); + } + +diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c +index 2809cbd6b7f74..d8cb4b2a076b4 100644 +--- a/net/caif/cfctrl.c ++++ b/net/caif/cfctrl.c +@@ -269,11 +269,15 @@ int cfctrl_linkup_request(struct cflayer *layer, + default: + pr_warn("Request setup of bad link type = %d\n", + param->linktype); ++ cfpkt_destroy(pkt); + return -EINVAL; + } + req = kzalloc(sizeof(*req), GFP_KERNEL); +- if (!req) ++ if (!req) { ++ cfpkt_destroy(pkt); + return -ENOMEM; ++ } ++ + req->client_layer = user_layer; + req->cmd = CFCTRL_CMD_LINK_SETUP; + req->param = *param; +diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c +index 414dc5671c45e..2de6b44deb2c4 100644 +--- a/net/caif/chnl_net.c ++++ b/net/caif/chnl_net.c +@@ -310,9 +310,6 @@ static int chnl_net_open(struct net_device *dev) + + if (result == 0) { + pr_debug("connect timeout\n"); +- caif_disconnect_client(dev_net(dev), &priv->chnl); +- priv->state = CAIF_DISCONNECTED; +- pr_debug("state disconnected\n"); + result = -ETIMEDOUT; + goto error; + } +diff --git a/net/can/af_can.c b/net/can/af_can.c +index cce2af10eb3ea..20d2dcb7c97ae 100644 +--- a/net/can/af_can.c ++++ b/net/can/af_can.c +@@ -451,7 +451,7 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id, + + /* insert new receiver (dev,canid,mask) -> (func,data) */ + +- if (dev && dev->type != ARPHRD_CAN) ++ if (dev && (dev->type != ARPHRD_CAN || !can_get_ml_priv(dev))) + return -ENODEV; + + if (dev && !net_eq(net, dev_net(dev))) +@@ -680,7 +680,7 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, + { + struct canfd_frame *cfd = (struct canfd_frame *)skb->data; + +- if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU)) { ++ if (unlikely(dev->type != ARPHRD_CAN || !can_get_ml_priv(dev) || skb->len != CAN_MTU)) { + pr_warn_once("PF_CAN: dropped non conform CAN skbuff: dev type %d, len %d\n", + dev->type, skb->len); + goto free_skb; +@@ -706,7 +706,7 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, + { + struct canfd_frame *cfd = (struct canfd_frame *)skb->data; + +- if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU)) { ++ if (unlikely(dev->type != ARPHRD_CAN || !can_get_ml_priv(dev) || skb->len != CANFD_MTU)) { + pr_warn_once("PF_CAN: dropped non conform CAN FD skbuff: dev type %d, len %d\n", + dev->type, skb->len); + goto free_skb; diff --git a/net/can/bcm.c b/net/can/bcm.c index 508f67de0b801..aab3a18f4a90f 100644 --- a/net/can/bcm.c @@ -380368,8 +490622,59 @@ index df6968b28bf41..26821487a0573 100644 spin_lock(&isotp_notifier_lock); list_add_tail(&so->notifier, &isotp_notifier_list); +diff --git a/net/can/j1939/address-claim.c b/net/can/j1939/address-claim.c +index f33c473279278..ca4ad6cdd5cbf 100644 +--- a/net/can/j1939/address-claim.c ++++ b/net/can/j1939/address-claim.c +@@ -165,6 +165,46 @@ static void j1939_ac_process(struct j1939_priv *priv, struct sk_buff *skb) + * leaving this function. + */ + ecu = j1939_ecu_get_by_name_locked(priv, name); ++ ++ if (ecu && ecu->addr == skcb->addr.sa) { ++ /* The ISO 11783-5 standard, in "4.5.2 - Address claim ++ * requirements", states: ++ * d) No CF shall begin, or resume, transmission on the ++ * network until 250 ms after it has successfully claimed ++ * an address except when responding to a request for ++ * address-claimed. ++ * ++ * But "Figure 6" and "Figure 7" in "4.5.4.2 - Address-claim ++ * prioritization" show that the CF begins the transmission ++ * after 250 ms from the first AC (address-claimed) message ++ * even if it sends another AC message during that time window ++ * to resolve the address contention with another CF. ++ * ++ * As stated in "4.4.2.3 - Address-claimed message": ++ * In order to successfully claim an address, the CF sending ++ * an address claimed message shall not receive a contending ++ * claim from another CF for at least 250 ms. ++ * ++ * As stated in "4.4.3.2 - NAME management (NM) message": ++ * 1) A commanding CF can ++ * d) request that a CF with a specified NAME transmit ++ * the address-claimed message with its current NAME. ++ * 2) A target CF shall ++ * d) send an address-claimed message in response to a ++ * request for a matching NAME ++ * ++ * Taking the above arguments into account, the 250 ms wait is ++ * requested only during network initialization. ++ * ++ * Do not restart the timer on AC message if both the NAME and ++ * the address match and so if the address has already been ++ * claimed (timer has expired) or the AC message has been sent ++ * to resolve the contention with another CF (timer is still ++ * running). ++ */ ++ goto out_ecu_put; ++ } ++ + if (!ecu && j1939_address_is_unicast(skcb->addr.sa)) + ecu = j1939_ecu_create_locked(priv, name); + diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c -index 9bc55ecb37f9f..8452b0fbb78c9 100644 +index 9bc55ecb37f9f..82671a882716f 100644 --- a/net/can/j1939/main.c +++ b/net/can/j1939/main.c @@ -75,6 +75,13 @@ static void j1939_can_recv(struct sk_buff *iskb, void *data) @@ -380386,6 +490691,16 @@ index 9bc55ecb37f9f..8452b0fbb78c9 100644 if (j1939_pgn_is_pdu1(skcb->addr.pgn)) { /* Type 1: with destination address */ skcb->addr.da = skcb->addr.pgn; +@@ -325,6 +332,9 @@ int j1939_send_one(struct j1939_priv *priv, struct sk_buff *skb) + /* re-claim the CAN_HDR from the SKB */ + cf = skb_push(skb, J1939_CAN_HDR); + ++ /* initialize header structure */ ++ memset(cf, 0, J1939_CAN_HDR); ++ + /* make it a full can frame again */ + skb_put(skb, J1939_CAN_FTR + (8 - dlc)); + diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 6dff4510687a4..41016aff21c5e 100644 --- a/net/can/j1939/socket.c @@ -380403,7 +490718,7 @@ index 6dff4510687a4..41016aff21c5e 100644 goto activate_next; } else { diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c -index 6c0a0ebdd024c..d7d86c944d76d 100644 +index 6c0a0ebdd024c..4177e96170703 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -260,6 +260,8 @@ static void __j1939_session_drop(struct j1939_session *session) @@ -380428,7 +490743,32 @@ index 6c0a0ebdd024c..d7d86c944d76d 100644 __j1939_session_drop(session); j1939_priv_put(session->priv); kfree(session); -@@ -2006,7 +2012,7 @@ struct j1939_session *j1939_tp_send(struct j1939_priv *priv, +@@ -336,10 +342,12 @@ static void j1939_session_skb_drop_old(struct j1939_session *session) + __skb_unlink(do_skb, &session->skb_queue); + /* drop ref taken in j1939_session_skb_queue() */ + skb_unref(do_skb); ++ spin_unlock_irqrestore(&session->skb_queue.lock, flags); + + kfree_skb(do_skb); ++ } else { ++ spin_unlock_irqrestore(&session->skb_queue.lock, flags); + } +- spin_unlock_irqrestore(&session->skb_queue.lock, flags); + } + + void j1939_session_skb_queue(struct j1939_session *session, +@@ -1084,10 +1092,6 @@ static bool j1939_session_deactivate(struct j1939_session *session) + bool active; + + j1939_session_list_lock(priv); +- /* This function should be called with a session ref-count of at +- * least 2. +- */ +- WARN_ON_ONCE(kref_read(&session->kref) < 2); + active = j1939_session_deactivate_locked(session); + j1939_session_list_unlock(priv); + +@@ -2006,7 +2010,7 @@ struct j1939_session *j1939_tp_send(struct j1939_priv *priv, /* set the end-packet for broadcast */ session->pkt.last = session->pkt.total; @@ -380437,7 +490777,7 @@ index 6c0a0ebdd024c..d7d86c944d76d 100644 session->tskey = skcb->tskey; return session; -@@ -2023,6 +2029,11 @@ static void j1939_tp_cmd_recv(struct j1939_priv *priv, struct sk_buff *skb) +@@ -2023,6 +2027,11 @@ static void j1939_tp_cmd_recv(struct j1939_priv *priv, struct sk_buff *skb) extd = J1939_ETP; fallthrough; case J1939_TP_CMD_BAM: @@ -380449,7 +490789,7 @@ index 6c0a0ebdd024c..d7d86c944d76d 100644 fallthrough; case J1939_TP_CMD_RTS: if (skcb->addr.type != extd) -@@ -2085,6 +2096,12 @@ static void j1939_tp_cmd_recv(struct j1939_priv *priv, struct sk_buff *skb) +@@ -2085,6 +2094,12 @@ static void j1939_tp_cmd_recv(struct j1939_priv *priv, struct sk_buff *skb) break; case J1939_ETP_CMD_ABORT: /* && J1939_TP_CMD_ABORT */ @@ -381035,7 +491375,7 @@ index 15ab9ffb27fe9..28e5f921dcaf4 100644 if (refs) page_ref_sub(last_head, refs); diff --git a/net/core/dev.c b/net/core/dev.c -index eb3a366bf212c..be51644e95dae 100644 +index eb3a366bf212c..24a80e960d2d9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -365,12 +365,12 @@ static void list_netdevice(struct net_device *dev) @@ -381310,7 +491650,35 @@ index eb3a366bf212c..be51644e95dae 100644 netdev_wait_allrefs(dev); -@@ -11028,9 +11048,10 @@ void unregister_netdevice_many(struct list_head *head) +@@ -10620,24 +10640,16 @@ void netdev_run_todo(void) + void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, + const struct net_device_stats *netdev_stats) + { +-#if BITS_PER_LONG == 64 +- BUILD_BUG_ON(sizeof(*stats64) < sizeof(*netdev_stats)); +- memcpy(stats64, netdev_stats, sizeof(*netdev_stats)); +- /* zero out counters that only exist in rtnl_link_stats64 */ +- memset((char *)stats64 + sizeof(*netdev_stats), 0, +- sizeof(*stats64) - sizeof(*netdev_stats)); +-#else +- size_t i, n = sizeof(*netdev_stats) / sizeof(unsigned long); +- const unsigned long *src = (const unsigned long *)netdev_stats; ++ size_t i, n = sizeof(*netdev_stats) / sizeof(atomic_long_t); ++ const atomic_long_t *src = (atomic_long_t *)netdev_stats; + u64 *dst = (u64 *)stats64; + + BUILD_BUG_ON(n > sizeof(*stats64) / sizeof(u64)); + for (i = 0; i < n; i++) +- dst[i] = src[i]; ++ dst[i] = (unsigned long)atomic_long_read(&src[i]); + /* zero out counters that only exist in rtnl_link_stats64 */ + memset((char *)stats64 + n * sizeof(u64), 0, + sizeof(*stats64) - n * sizeof(u64)); +-#endif + } + EXPORT_SYMBOL(netdev_stats_to_stats64); + +@@ -11028,9 +11040,10 @@ void unregister_netdevice_many(struct list_head *head) list_for_each_entry(dev, head, unreg_list) { /* And unlink it from device chain. */ @@ -381323,7 +491691,7 @@ index eb3a366bf212c..be51644e95dae 100644 } flush_all_backlogs(); -@@ -11175,7 +11196,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, +@@ -11175,7 +11188,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, dev_close(dev); /* And unlink it from device chain */ @@ -381332,7 +491700,7 @@ index eb3a366bf212c..be51644e95dae 100644 synchronize_net(); -@@ -11367,8 +11388,7 @@ static int __net_init netdev_init(struct net *net) +@@ -11367,8 +11380,7 @@ static int __net_init netdev_init(struct net *net) BUILD_BUG_ON(GRO_HASH_BUCKETS > 8 * sizeof_field(struct napi_struct, gro_bitmask)); @@ -381510,7 +491878,7 @@ index 79df7cd9dbc16..1bb567a3b329c 100644 if (err != -EAGAIN) { diff --git a/net/core/filter.c b/net/core/filter.c -index 2e32cee2c4690..fb5b9dbf3bc08 100644 +index 2e32cee2c4690..519315a1acf3a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1213,10 +1213,11 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) @@ -381584,7 +491952,34 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 .arg4_type = ARG_CONST_SIZE_OR_ZERO, .arg5_type = ARG_ANYTHING, }; -@@ -2541,7 +2542,7 @@ static const struct bpf_func_proto bpf_redirect_neigh_proto = { +@@ -2122,8 +2123,17 @@ static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev, + { + unsigned int mlen = skb_network_offset(skb); + ++ if (unlikely(skb->len <= mlen)) { ++ kfree_skb(skb); ++ return -ERANGE; ++ } ++ + if (mlen) { + __skb_pull(skb, mlen); ++ if (unlikely(!skb->len)) { ++ kfree_skb(skb); ++ return -ERANGE; ++ } + + /* At ingress, the mac header has already been pulled once. + * At egress, skb_pospull_rcsum has to be done in case that +@@ -2143,7 +2153,7 @@ static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev, + u32 flags) + { + /* Verify that a link layer header is carried */ +- if (unlikely(skb->mac_header >= skb->network_header)) { ++ if (unlikely(skb->mac_header >= skb->network_header || skb->len == 0)) { + kfree_skb(skb); + return -ERANGE; + } +@@ -2541,7 +2551,7 @@ static const struct bpf_func_proto bpf_redirect_neigh_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_ANYTHING, @@ -381593,7 +491988,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 .arg3_type = ARG_CONST_SIZE_OR_ZERO, .arg4_type = ARG_ANYTHING, }; -@@ -2711,6 +2712,9 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, +@@ -2711,6 +2721,9 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, if (unlikely(flags)) return -EINVAL; @@ -381603,7 +491998,28 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 /* First find the starting scatterlist element */ i = msg->sg.start; do { -@@ -4174,7 +4178,7 @@ static const struct bpf_func_proto bpf_skb_event_output_proto = { +@@ -3169,15 +3182,18 @@ static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len) + + static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len) + { ++ void *old_data; ++ + /* skb_ensure_writable() is not needed here, as we're + * already working on an uncloned skb. + */ + if (unlikely(!pskb_may_pull(skb, off + len))) + return -ENOMEM; + +- skb_postpull_rcsum(skb, skb->data + off, len); +- memmove(skb->data + len, skb->data, off); ++ old_data = skb->data; + __skb_pull(skb, len); ++ skb_postpull_rcsum(skb, old_data + off, len); ++ memmove(skb->data, old_data, off); + + return 0; + } +@@ -4174,7 +4190,7 @@ static const struct bpf_func_proto bpf_skb_event_output_proto = { .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, @@ -381612,7 +492028,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; -@@ -4188,7 +4192,7 @@ const struct bpf_func_proto bpf_skb_output_proto = { +@@ -4188,7 +4204,7 @@ const struct bpf_func_proto bpf_skb_output_proto = { .arg1_btf_id = &bpf_skb_output_btf_ids[0], .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, @@ -381621,7 +492037,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; -@@ -4371,7 +4375,7 @@ static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = { +@@ -4371,7 +4387,7 @@ static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, @@ -381630,7 +492046,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 .arg3_type = ARG_CONST_SIZE, .arg4_type = ARG_ANYTHING, }; -@@ -4397,7 +4401,7 @@ static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = { +@@ -4397,7 +4413,7 @@ static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, @@ -381639,7 +492055,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 .arg3_type = ARG_CONST_SIZE, }; -@@ -4567,7 +4571,7 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = { +@@ -4567,7 +4583,7 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = { .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, @@ -381648,7 +492064,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; -@@ -4581,7 +4585,7 @@ const struct bpf_func_proto bpf_xdp_output_proto = { +@@ -4581,7 +4597,7 @@ const struct bpf_func_proto bpf_xdp_output_proto = { .arg1_btf_id = &bpf_xdp_output_btf_ids[0], .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, @@ -381657,7 +492073,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; -@@ -4741,13 +4745,15 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, +@@ -4741,13 +4757,15 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, /* Only some socketops are supported */ switch (optname) { case SO_RCVBUF: @@ -381675,7 +492091,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 sk->sk_userlocks |= SOCK_SNDBUF_LOCK; WRITE_ONCE(sk->sk_sndbuf, max_t(int, val * 2, SOCK_MIN_SNDBUF)); -@@ -4880,7 +4886,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, +@@ -4880,7 +4898,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, if (val <= 0 || tp->data_segs_out > tp->syn_data) ret = -EINVAL; else @@ -381684,7 +492100,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 break; case TCP_BPF_SNDCWND_CLAMP: if (val <= 0) { -@@ -5067,7 +5073,7 @@ const struct bpf_func_proto bpf_sk_setsockopt_proto = { +@@ -5067,7 +5085,7 @@ const struct bpf_func_proto bpf_sk_setsockopt_proto = { .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, .arg2_type = ARG_ANYTHING, .arg3_type = ARG_ANYTHING, @@ -381693,7 +492109,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 .arg5_type = ARG_CONST_SIZE, }; -@@ -5101,7 +5107,7 @@ static const struct bpf_func_proto bpf_sock_addr_setsockopt_proto = { +@@ -5101,7 +5119,7 @@ static const struct bpf_func_proto bpf_sock_addr_setsockopt_proto = { .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, .arg3_type = ARG_ANYTHING, @@ -381702,7 +492118,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 .arg5_type = ARG_CONST_SIZE, }; -@@ -5135,7 +5141,7 @@ static const struct bpf_func_proto bpf_sock_ops_setsockopt_proto = { +@@ -5135,7 +5153,7 @@ static const struct bpf_func_proto bpf_sock_ops_setsockopt_proto = { .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, .arg3_type = ARG_ANYTHING, @@ -381711,7 +492127,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 .arg5_type = ARG_CONST_SIZE, }; -@@ -5310,7 +5316,7 @@ static const struct bpf_func_proto bpf_bind_proto = { +@@ -5310,7 +5328,7 @@ static const struct bpf_func_proto bpf_bind_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, @@ -381720,7 +492136,25 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 .arg3_type = ARG_CONST_SIZE, }; -@@ -5846,7 +5852,6 @@ static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len +@@ -5488,7 +5506,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, + neigh = __ipv6_neigh_lookup_noref_stub(dev, dst); + } + +- if (!neigh) ++ if (!neigh || !(neigh->nud_state & NUD_VALID)) + return BPF_FIB_LKUP_RET_NO_NEIGH; + + return bpf_fib_set_fwd_params(params, neigh, dev, mtu); +@@ -5603,7 +5621,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, + * not needed here. + */ + neigh = __ipv6_neigh_lookup_noref_stub(dev, dst); +- if (!neigh) ++ if (!neigh || !(neigh->nud_state & NUD_VALID)) + return BPF_FIB_LKUP_RET_NO_NEIGH; + + return bpf_fib_set_fwd_params(params, neigh, dev, mtu); +@@ -5846,7 +5864,6 @@ static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len if (err) return err; @@ -381728,7 +492162,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 skb_set_transport_header(skb, sizeof(struct ipv6hdr)); return seg6_lookup_nexthop(skb, NULL, 0); -@@ -5898,7 +5903,7 @@ static const struct bpf_func_proto bpf_lwt_in_push_encap_proto = { +@@ -5898,7 +5915,7 @@ static const struct bpf_func_proto bpf_lwt_in_push_encap_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, @@ -381737,7 +492171,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 .arg4_type = ARG_CONST_SIZE }; -@@ -5908,7 +5913,7 @@ static const struct bpf_func_proto bpf_lwt_xmit_push_encap_proto = { +@@ -5908,7 +5925,7 @@ static const struct bpf_func_proto bpf_lwt_xmit_push_encap_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, @@ -381746,7 +492180,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 .arg4_type = ARG_CONST_SIZE }; -@@ -5951,7 +5956,7 @@ static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = { +@@ -5951,7 +5968,7 @@ static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, @@ -381755,7 +492189,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 .arg4_type = ARG_CONST_SIZE }; -@@ -6039,7 +6044,7 @@ static const struct bpf_func_proto bpf_lwt_seg6_action_proto = { +@@ -6039,7 +6056,7 @@ static const struct bpf_func_proto bpf_lwt_seg6_action_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, @@ -381764,7 +492198,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 .arg4_type = ARG_CONST_SIZE }; -@@ -6204,10 +6209,21 @@ __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, +@@ -6204,10 +6221,21 @@ __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, ifindex, proto, netns_id, flags); if (sk) { @@ -381789,7 +492223,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 } } -@@ -6241,10 +6257,21 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, +@@ -6241,10 +6269,21 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, flags); if (sk) { @@ -381814,7 +492248,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 } } -@@ -6264,7 +6291,7 @@ static const struct bpf_func_proto bpf_skc_lookup_tcp_proto = { +@@ -6264,7 +6303,7 @@ static const struct bpf_func_proto bpf_skc_lookup_tcp_proto = { .pkt_access = true, .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL, .arg1_type = ARG_PTR_TO_CTX, @@ -381823,7 +492257,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 .arg3_type = ARG_CONST_SIZE, .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, -@@ -6283,7 +6310,7 @@ static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = { +@@ -6283,7 +6322,7 @@ static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = { .pkt_access = true, .ret_type = RET_PTR_TO_SOCKET_OR_NULL, .arg1_type = ARG_PTR_TO_CTX, @@ -381832,7 +492266,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 .arg3_type = ARG_CONST_SIZE, .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, -@@ -6302,7 +6329,7 @@ static const struct bpf_func_proto bpf_sk_lookup_udp_proto = { +@@ -6302,7 +6341,7 @@ static const struct bpf_func_proto bpf_sk_lookup_udp_proto = { .pkt_access = true, .ret_type = RET_PTR_TO_SOCKET_OR_NULL, .arg1_type = ARG_PTR_TO_CTX, @@ -381841,7 +492275,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 .arg3_type = ARG_CONST_SIZE, .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, -@@ -6339,7 +6366,7 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = { +@@ -6339,7 +6378,7 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = { .pkt_access = true, .ret_type = RET_PTR_TO_SOCKET_OR_NULL, .arg1_type = ARG_PTR_TO_CTX, @@ -381850,7 +492284,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 .arg3_type = ARG_CONST_SIZE, .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, -@@ -6362,7 +6389,7 @@ static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = { +@@ -6362,7 +6401,7 @@ static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = { .pkt_access = true, .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL, .arg1_type = ARG_PTR_TO_CTX, @@ -381859,7 +492293,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 .arg3_type = ARG_CONST_SIZE, .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, -@@ -6385,7 +6412,7 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = { +@@ -6385,7 +6424,7 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = { .pkt_access = true, .ret_type = RET_PTR_TO_SOCKET_OR_NULL, .arg1_type = ARG_PTR_TO_CTX, @@ -381868,7 +492302,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 .arg3_type = ARG_CONST_SIZE, .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, -@@ -6404,7 +6431,7 @@ static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = { +@@ -6404,7 +6443,7 @@ static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = { .gpl_only = false, .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL, .arg1_type = ARG_PTR_TO_CTX, @@ -381877,7 +492311,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 .arg3_type = ARG_CONST_SIZE, .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, -@@ -6423,7 +6450,7 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = { +@@ -6423,7 +6462,7 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = { .gpl_only = false, .ret_type = RET_PTR_TO_SOCKET_OR_NULL, .arg1_type = ARG_PTR_TO_CTX, @@ -381886,7 +492320,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 .arg3_type = ARG_CONST_SIZE, .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, -@@ -6442,7 +6469,7 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = { +@@ -6442,7 +6481,7 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = { .gpl_only = false, .ret_type = RET_PTR_TO_SOCKET_OR_NULL, .arg1_type = ARG_PTR_TO_CTX, @@ -381895,7 +492329,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 .arg3_type = ARG_CONST_SIZE, .arg4_type = ARG_ANYTHING, .arg5_type = ARG_ANYTHING, -@@ -6708,30 +6735,39 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len +@@ -6708,30 +6747,39 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN) return -EINVAL; @@ -381940,7 +492374,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie); break; #endif /* CONFIG_IPV6 */ -@@ -6755,9 +6791,9 @@ static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = { +@@ -6755,9 +6803,9 @@ static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = { .pkt_access = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, @@ -381952,7 +492386,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 .arg5_type = ARG_CONST_SIZE, }; -@@ -6774,7 +6810,7 @@ BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len, +@@ -6774,7 +6822,7 @@ BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len, if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN) return -EINVAL; @@ -381961,7 +492395,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 return -ENOENT; if (!th->syn || th->ack || th->fin || th->rst) -@@ -6824,9 +6860,9 @@ static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = { +@@ -6824,9 +6872,9 @@ static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = { .pkt_access = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, @@ -381973,7 +492407,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 .arg5_type = ARG_CONST_SIZE, }; -@@ -7055,7 +7091,7 @@ static const struct bpf_func_proto bpf_sock_ops_store_hdr_opt_proto = { +@@ -7055,7 +7103,7 @@ static const struct bpf_func_proto bpf_sock_ops_store_hdr_opt_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, @@ -381982,7 +492416,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 .arg3_type = ARG_CONST_SIZE, .arg4_type = ARG_ANYTHING, }; -@@ -7162,6 +7198,8 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +@@ -7162,6 +7210,8 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) #endif case BPF_FUNC_sk_storage_get: return &bpf_sk_storage_get_cg_sock_proto; @@ -381991,7 +492425,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 default: return bpf_base_func_proto(func_id); } -@@ -7959,6 +7997,7 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, +@@ -7959,6 +8009,7 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) { const int size_default = sizeof(__u32); @@ -381999,7 +492433,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 if (off < 0 || off >= sizeof(struct bpf_sock)) return false; -@@ -7970,7 +8009,6 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, +@@ -7970,7 +8021,6 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, case offsetof(struct bpf_sock, family): case offsetof(struct bpf_sock, type): case offsetof(struct bpf_sock, protocol): @@ -382007,7 +492441,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 case offsetof(struct bpf_sock, src_port): case offsetof(struct bpf_sock, rx_queue_mapping): case bpf_ctx_range(struct bpf_sock, src_ip4): -@@ -7979,6 +8017,14 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, +@@ -7979,6 +8029,14 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]): bpf_ctx_record_field_size(info, size_default); return bpf_ctx_narrow_access_ok(off, size, size_default); @@ -382022,7 +492456,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 } return size == size_default; -@@ -8174,9 +8220,9 @@ void bpf_warn_invalid_xdp_action(u32 act) +@@ -8174,9 +8232,9 @@ void bpf_warn_invalid_xdp_action(u32 act) { const u32 act_max = XDP_REDIRECT; @@ -382035,7 +492469,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 } EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); -@@ -9735,22 +9781,46 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, +@@ -9735,22 +9793,46 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, static struct bpf_insn *bpf_convert_data_end_access(const struct bpf_insn *si, struct bpf_insn *insn) { @@ -382088,7 +492522,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 return insn; } -@@ -9761,11 +9831,33 @@ static u32 sk_skb_convert_ctx_access(enum bpf_access_type type, +@@ -9761,11 +9843,33 @@ static u32 sk_skb_convert_ctx_access(enum bpf_access_type type, struct bpf_prog *prog, u32 *target_size) { struct bpf_insn *insn = insn_buf; @@ -382122,7 +492556,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 default: return bpf_convert_ctx_access(type, si, insn_buf, prog, target_size); -@@ -10260,6 +10352,8 @@ sk_reuseport_func_proto(enum bpf_func_id func_id, +@@ -10260,6 +10364,8 @@ sk_reuseport_func_proto(enum bpf_func_id func_id, return &sk_reuseport_load_bytes_relative_proto; case BPF_FUNC_get_socket_cookie: return &bpf_get_socket_ptr_cookie_proto; @@ -382131,7 +492565,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 default: return bpf_base_func_proto(func_id); } -@@ -10468,7 +10562,8 @@ static bool sk_lookup_is_valid_access(int off, int size, +@@ -10468,7 +10574,8 @@ static bool sk_lookup_is_valid_access(int off, int size, case bpf_ctx_range(struct bpf_sk_lookup, local_ip4): case bpf_ctx_range_till(struct bpf_sk_lookup, remote_ip6[0], remote_ip6[3]): case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]): @@ -382141,7 +492575,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 case bpf_ctx_range(struct bpf_sk_lookup, local_port): bpf_ctx_record_field_size(info, sizeof(__u32)); return bpf_ctx_narrow_access_ok(off, size, sizeof(__u32)); -@@ -10741,6 +10836,8 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id) +@@ -10741,6 +10848,8 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id) case BPF_FUNC_skc_to_udp6_sock: func = &bpf_skc_to_udp6_sock_proto; break; @@ -382151,7 +492585,7 @@ index 2e32cee2c4690..fb5b9dbf3bc08 100644 return bpf_base_func_proto(func_id); } diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c -index bac0184cf3de7..1c34e22665781 100644 +index bac0184cf3de7..1d230f041386b 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -238,7 +238,7 @@ void @@ -382171,6 +492605,15 @@ index bac0184cf3de7..1c34e22665781 100644 return; } +@@ -269,7 +270,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb, + key->ct_zone = ct->zone.id; + #endif + #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) +- key->ct_mark = ct->mark; ++ key->ct_mark = READ_ONCE(ct->mark); + #endif + + cl = nf_ct_labels_find(ct); @@ -1180,6 +1181,7 @@ proto_again: VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; } @@ -382289,10 +492732,37 @@ index 2820aca2173a8..9ccd64e8a666a 100644 if (lwtunnel_valid_encap_type(encap_type, diff --git a/net/core/neighbour.c b/net/core/neighbour.c -index 2d5bc3a75faec..b3556c5c1c08e 100644 +index 2d5bc3a75faec..95f588b2fd159 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c -@@ -279,11 +279,26 @@ static int neigh_del_timer(struct neighbour *n) +@@ -241,7 +241,7 @@ static int neigh_forced_gc(struct neigh_table *tbl) + (n->nud_state == NUD_NOARP) || + (tbl->is_multicast && + tbl->is_multicast(n->primary_key)) || +- time_after(tref, n->updated)) ++ !time_in_range(n->updated, tref, jiffies)) + remove = true; + write_unlock(&n->lock); + +@@ -261,7 +261,17 @@ static int neigh_forced_gc(struct neigh_table *tbl) + + static void neigh_add_timer(struct neighbour *n, unsigned long when) + { ++ /* Use safe distance from the jiffies - LONG_MAX point while timer ++ * is running in DELAY/PROBE state but still show to user space ++ * large times in the past. ++ */ ++ unsigned long mint = jiffies - (LONG_MAX - 86400 * HZ); ++ + neigh_hold(n); ++ if (!time_in_range(n->confirmed, mint, jiffies)) ++ n->confirmed = mint; ++ if (time_before(n->used, n->confirmed)) ++ n->used = n->confirmed; + if (unlikely(mod_timer(&n->timer, when))) { + printk("NEIGH: BUG, double timer add, state is %x\n", + n->nud_state); +@@ -279,11 +289,26 @@ static int neigh_del_timer(struct neighbour *n) return 0; } @@ -382321,20 +492791,20 @@ index 2d5bc3a75faec..b3556c5c1c08e 100644 dev_put(skb->dev); kfree_skb(skb); } -@@ -357,9 +372,9 @@ static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev, +@@ -357,9 +382,9 @@ static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev, write_lock_bh(&tbl->lock); neigh_flush_dev(tbl, dev, skip_perm); pneigh_ifdown_and_unlock(tbl, dev); - - del_timer_sync(&tbl->proxy_timer); - pneigh_queue_purge(&tbl->proxy_queue); -+ pneigh_queue_purge(&tbl->proxy_queue, dev_net(dev)); ++ pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL); + if (skb_queue_empty_lockless(&tbl->proxy_queue)) + del_timer_sync(&tbl->proxy_timer); return 0; } -@@ -379,7 +394,7 @@ EXPORT_SYMBOL(neigh_ifdown); +@@ -379,7 +404,7 @@ EXPORT_SYMBOL(neigh_ifdown); static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev, @@ -382343,7 +492813,7 @@ index 2d5bc3a75faec..b3556c5c1c08e 100644 { struct neighbour *n = NULL; unsigned long now = jiffies; -@@ -412,6 +427,7 @@ do_alloc: +@@ -412,6 +437,7 @@ do_alloc: n->updated = n->used = now; n->nud_state = NUD_NONE; n->output = neigh_blackhole; @@ -382351,7 +492821,7 @@ index 2d5bc3a75faec..b3556c5c1c08e 100644 seqlock_init(&n->hh.hh_lock); n->parms = neigh_parms_clone(&tbl->parms); timer_setup(&n->timer, neigh_timer_handler, 0); -@@ -575,19 +591,18 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, +@@ -575,19 +601,18 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, } EXPORT_SYMBOL(neigh_lookup_nodev); @@ -382379,7 +492849,7 @@ index 2d5bc3a75faec..b3556c5c1c08e 100644 if (!n) { rc = ERR_PTR(-ENOBUFS); goto out; -@@ -674,7 +689,7 @@ out_neigh_release: +@@ -674,7 +699,7 @@ out_neigh_release: struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, struct net_device *dev, bool want_ref) { @@ -382388,7 +492858,7 @@ index 2d5bc3a75faec..b3556c5c1c08e 100644 } EXPORT_SYMBOL(__neigh_create); -@@ -733,11 +748,10 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, +@@ -733,11 +758,10 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, ASSERT_RTNL(); @@ -382401,7 +492871,24 @@ index 2d5bc3a75faec..b3556c5c1c08e 100644 write_pnet(&n->net, net); memcpy(n->key, pkey, key_len); n->dev = dev; -@@ -1217,7 +1231,7 @@ static void neigh_update_hhs(struct neighbour *neigh) +@@ -929,12 +953,14 @@ static void neigh_periodic_work(struct work_struct *work) + goto next_elt; + } + +- if (time_before(n->used, n->confirmed)) ++ if (time_before(n->used, n->confirmed) && ++ time_is_before_eq_jiffies(n->confirmed)) + n->used = n->confirmed; + + if (refcount_read(&n->refcnt) == 1 && + (state == NUD_FAILED || +- time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) { ++ !time_in_range_open(jiffies, n->used, ++ n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) { + *np = n->next; + neigh_mark_dead(n); + write_unlock(&n->lock); +@@ -1217,7 +1243,7 @@ static void neigh_update_hhs(struct neighbour *neigh) lladdr instead of overriding it if it is different. NEIGH_UPDATE_F_ADMIN means that the change is administrative. @@ -382410,7 +492897,7 @@ index 2d5bc3a75faec..b3556c5c1c08e 100644 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing NTF_ROUTER flag. NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as -@@ -1255,6 +1269,12 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, +@@ -1255,6 +1281,12 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, goto out; ext_learn_change = neigh_update_ext_learned(neigh, flags, ¬ify); @@ -382423,7 +492910,7 @@ index 2d5bc3a75faec..b3556c5c1c08e 100644 if (!(new & NUD_VALID)) { neigh_del_timer(neigh); -@@ -1730,7 +1750,7 @@ int neigh_table_clear(int index, struct neigh_table *tbl) +@@ -1730,7 +1762,7 @@ int neigh_table_clear(int index, struct neigh_table *tbl) /* It is not clean... Fix it to unload IPv6 module safely */ cancel_delayed_work_sync(&tbl->gc_work); del_timer_sync(&tbl->proxy_timer); @@ -382432,7 +492919,7 @@ index 2d5bc3a75faec..b3556c5c1c08e 100644 neigh_ifdown(tbl, NULL); if (atomic_read(&tbl->entries)) pr_crit("neighbour leakage\n"); -@@ -1942,7 +1962,9 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, +@@ -1942,7 +1974,9 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, exempt_from_gc = ndm->ndm_state & NUD_PERMANENT || ndm->ndm_flags & NTF_EXT_LEARNED; @@ -382443,7 +492930,7 @@ index 2d5bc3a75faec..b3556c5c1c08e 100644 if (IS_ERR(neigh)) { err = PTR_ERR(neigh); goto out; -@@ -1961,22 +1983,20 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, +@@ -1961,22 +1995,20 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, if (protocol) neigh->protocol = protocol; @@ -382702,7 +493189,7 @@ index b2e49eb7001d6..e9ea0695efb42 100644 { for (; dev; dev = dev->parent) { diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c -index a448a9b5bb2d6..9745cb6fdf516 100644 +index a448a9b5bb2d6..dcddc54d08409 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -44,13 +44,7 @@ EXPORT_SYMBOL_GPL(net_rwsem); @@ -382720,7 +493207,28 @@ index a448a9b5bb2d6..9745cb6fdf516 100644 EXPORT_SYMBOL(init_net); static bool init_net_initialized; -@@ -164,8 +158,10 @@ static void ops_exit_list(const struct pernet_operations *ops, +@@ -123,6 +117,7 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data) + + static int ops_init(const struct pernet_operations *ops, struct net *net) + { ++ struct net_generic *ng; + int err = -ENOMEM; + void *data = NULL; + +@@ -141,6 +136,12 @@ static int ops_init(const struct pernet_operations *ops, struct net *net) + if (!err) + return 0; + ++ if (ops->id && ops->size) { ++ ng = rcu_dereference_protected(net->gen, ++ lockdep_is_held(&pernet_ops_rwsem)); ++ ng->ptr[*ops->id] = NULL; ++ } ++ + cleanup: + kfree(data); + +@@ -164,8 +165,10 @@ static void ops_exit_list(const struct pernet_operations *ops, { struct net *net; if (ops->exit) { @@ -382732,7 +493240,7 @@ index a448a9b5bb2d6..9745cb6fdf516 100644 } if (ops->exit_batch) ops->exit_batch(net_exit_list); -@@ -473,7 +469,9 @@ struct net *copy_net_ns(unsigned long flags, +@@ -473,7 +476,9 @@ struct net *copy_net_ns(unsigned long flags, if (rv < 0) { put_userns: @@ -382742,7 +493250,7 @@ index a448a9b5bb2d6..9745cb6fdf516 100644 put_user_ns(user_ns); net_free(net); dec_ucounts: -@@ -605,7 +603,9 @@ static void cleanup_net(struct work_struct *work) +@@ -605,7 +610,9 @@ static void cleanup_net(struct work_struct *work) list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { list_del_init(&net->exit_list); dec_net_namespaces(net->ucounts); @@ -382752,7 +493260,7 @@ index a448a9b5bb2d6..9745cb6fdf516 100644 put_user_ns(net->user_ns); net_free(net); } -@@ -1075,7 +1075,7 @@ out: +@@ -1075,7 +1082,7 @@ out: rtnl_set_sk_err(net, RTNLGRP_NSID, err); } @@ -382761,7 +493269,7 @@ index a448a9b5bb2d6..9745cb6fdf516 100644 { struct net_generic *ng; -@@ -1096,6 +1096,9 @@ static int __init net_ns_init(void) +@@ -1096,6 +1103,9 @@ static int __init net_ns_init(void) rcu_assign_pointer(init_net.gen, ng); @@ -382771,7 +493279,7 @@ index a448a9b5bb2d6..9745cb6fdf516 100644 down_write(&pernet_ops_rwsem); if (setup_net(&init_net, &init_user_ns)) panic("Could not setup the initial network namespace"); -@@ -1110,12 +1113,8 @@ static int __init net_ns_init(void) +@@ -1110,12 +1120,8 @@ static int __init net_ns_init(void) RTNL_FLAG_DOIT_UNLOCKED); rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid, RTNL_FLAG_DOIT_UNLOCKED); @@ -383109,7 +493617,7 @@ index b5bc680d47553..189eea1372d5d 100644 EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral); #endif diff --git a/net/core/skbuff.c b/net/core/skbuff.c -index fe9358437380c..3c193e7d4bc67 100644 +index fe9358437380c..2d3f82b622366 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -203,7 +203,7 @@ static void __build_skb_around(struct sk_buff *skb, void *data, @@ -383212,7 +493720,17 @@ index fe9358437380c..3c193e7d4bc67 100644 /** * skb_expand_head - reallocate header of &sk_buff * @skb: buffer to reallocate -@@ -2254,7 +2287,7 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta) +@@ -2230,6 +2263,9 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta) + insp = list; + } else { + /* Eaten partially. */ ++ if (skb_is_gso(skb) && !list->head_frag && ++ skb_headlen(list)) ++ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; + + if (skb_shared(list)) { + /* Sucks! We need to fork list. :-( */ +@@ -2254,7 +2290,7 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta) /* Free pulled out fragments. */ while ((list = skb_shinfo(skb)->frag_list) != insp) { skb_shinfo(skb)->frag_list = list->next; @@ -383221,7 +493739,7 @@ index fe9358437380c..3c193e7d4bc67 100644 } /* And insert new clone at head. */ if (clone) { -@@ -3449,19 +3482,7 @@ EXPORT_SYMBOL(skb_split); +@@ -3449,19 +3485,7 @@ EXPORT_SYMBOL(skb_split); */ static int skb_prepare_for_shift(struct sk_buff *skb) { @@ -383242,7 +493760,16 @@ index fe9358437380c..3c193e7d4bc67 100644 } /** -@@ -3854,7 +3875,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, +@@ -3800,7 +3824,7 @@ int skb_append_pagefrags(struct sk_buff *skb, struct page *page, + skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size); + } else if (i < MAX_SKB_FRAGS) { + get_page(page); +- skb_fill_page_desc(skb, i, page, offset, size); ++ skb_fill_page_desc_noacc(skb, i, page, offset, size); + } else { + return -EMSGSIZE; + } +@@ -3854,17 +3878,18 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, unsigned int delta_len = 0; struct sk_buff *tail = NULL; struct sk_buff *nskb, *tmp; @@ -383251,7 +493778,11 @@ index fe9358437380c..3c193e7d4bc67 100644 skb_push(skb, -skb_network_offset(skb) + offset); -@@ -3865,6 +3886,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, + skb_shinfo(skb)->frag_list = NULL; + +- do { ++ while (list_skb) { + nskb = list_skb; list_skb = list_skb->next; err = 0; @@ -383259,7 +493790,7 @@ index fe9358437380c..3c193e7d4bc67 100644 if (skb_shared(nskb)) { tmp = skb_clone(nskb, GFP_ATOMIC); if (tmp) { -@@ -3889,14 +3911,15 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, +@@ -3889,14 +3914,15 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, tail = nskb; delta_len += nskb->len; @@ -383276,7 +493807,60 @@ index fe9358437380c..3c193e7d4bc67 100644 skb_copy_from_linear_data_offset(skb, -tnl_hlen, nskb->data - tnl_hlen, offset + tnl_hlen); -@@ -4165,9 +4188,8 @@ normal: +@@ -3904,8 +3930,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, + if (skb_needs_linearize(nskb, features) && + __skb_linearize(nskb)) + goto err_linearize; +- +- } while (list_skb); ++ } + + skb->truesize = skb->truesize - delta_truesize; + skb->data_len = skb->data_len - delta_len; +@@ -3987,23 +4012,25 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, + int i = 0; + int pos; + +- if (list_skb && !list_skb->head_frag && skb_headlen(list_skb) && +- (skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY)) { +- /* gso_size is untrusted, and we have a frag_list with a linear +- * non head_frag head. +- * +- * (we assume checking the first list_skb member suffices; +- * i.e if either of the list_skb members have non head_frag +- * head, then the first one has too). +- * +- * If head_skb's headlen does not fit requested gso_size, it +- * means that the frag_list members do NOT terminate on exact +- * gso_size boundaries. Hence we cannot perform skb_frag_t page +- * sharing. Therefore we must fallback to copying the frag_list +- * skbs; we do so by disabling SG. +- */ +- if (mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) +- features &= ~NETIF_F_SG; ++ if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) && ++ mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) { ++ struct sk_buff *check_skb; ++ ++ for (check_skb = list_skb; check_skb; check_skb = check_skb->next) { ++ if (skb_headlen(check_skb) && !check_skb->head_frag) { ++ /* gso_size is untrusted, and we have a frag_list with ++ * a linear non head_frag item. ++ * ++ * If head_skb's headlen does not fit requested gso_size, ++ * it means that the frag_list members do NOT terminate ++ * on exact gso_size boundaries. Hence we cannot perform ++ * skb_frag_t page sharing. Therefore we must fallback to ++ * copying the frag_list skbs; we do so by disabling SG. ++ */ ++ features &= ~NETIF_F_SG; ++ break; ++ } ++ } + } + + __skb_push(head_skb, doffset); +@@ -4165,9 +4192,8 @@ normal: SKB_GSO_CB(nskb)->csum_start = skb_headroom(nskb) + doffset; } else { @@ -383288,7 +493872,7 @@ index fe9358437380c..3c193e7d4bc67 100644 } continue; } -@@ -4856,7 +4878,7 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb, +@@ -4856,7 +4882,7 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb, serr->ee.ee_data = skb_shinfo(skb)->tskey; if (sk->sk_protocol == IPPROTO_TCP && sk->sk_type == SOCK_STREAM) @@ -383297,7 +493881,7 @@ index fe9358437380c..3c193e7d4bc67 100644 } err = sock_queue_err_skb(sk, skb); -@@ -4869,7 +4891,7 @@ static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly) +@@ -4869,7 +4895,7 @@ static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly) { bool ret; @@ -383306,7 +493890,7 @@ index fe9358437380c..3c193e7d4bc67 100644 return true; read_lock_bh(&sk->sk_callback_lock); -@@ -5371,11 +5393,18 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, +@@ -5371,11 +5397,18 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, if (skb_cloned(to)) return false; @@ -383329,7 +493913,7 @@ index fe9358437380c..3c193e7d4bc67 100644 return false; if (len <= skb_tailroom(to)) { -@@ -6171,11 +6200,7 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off, +@@ -6171,11 +6204,7 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off, skb->head = data; skb->data = data; skb->head_frag = 0; @@ -383342,7 +493926,7 @@ index fe9358437380c..3c193e7d4bc67 100644 skb_set_tail_pointer(skb, skb_headlen(skb)); skb_headers_offset_update(skb, 0); skb->cloned = 0; -@@ -6232,7 +6257,7 @@ static int pskb_carve_frag_list(struct sk_buff *skb, +@@ -6232,7 +6261,7 @@ static int pskb_carve_frag_list(struct sk_buff *skb, /* Free pulled out fragments. */ while ((list = shinfo->frag_list) != insp) { shinfo->frag_list = list->next; @@ -383351,7 +493935,7 @@ index fe9358437380c..3c193e7d4bc67 100644 } /* And insert new clone at head. */ if (clone) { -@@ -6313,11 +6338,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, +@@ -6313,11 +6342,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, skb->head = data; skb->head_frag = 0; skb->data = data; @@ -383365,7 +493949,7 @@ index fe9358437380c..3c193e7d4bc67 100644 skb_headers_offset_update(skb, 0); skb->cloned = 0; diff --git a/net/core/skmsg.c b/net/core/skmsg.c -index a86ef7e844f8c..736d8b035a679 100644 +index a86ef7e844f8c..dc9b93d8f0d3e 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -27,6 +27,7 @@ int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len, @@ -383563,7 +494147,15 @@ index a86ef7e844f8c..736d8b035a679 100644 if (sk->sk_user_data) { psock = ERR_PTR(-EBUSY); goto out; -@@ -707,7 +735,9 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node) +@@ -692,6 +720,7 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node) + psock->eval = __SK_NONE; + psock->sk_proto = prot; + psock->saved_unhash = prot->unhash; ++ psock->saved_destroy = prot->destroy; + psock->saved_close = prot->close; + psock->saved_write_space = sk->sk_write_space; + +@@ -707,7 +736,9 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node) sk_psock_set_state(psock, SK_PSOCK_TX_ENABLED); refcount_set(&psock->refcnt, 1); @@ -383574,7 +494166,54 @@ index a86ef7e844f8c..736d8b035a679 100644 sock_hold(sk); out: -@@ -877,6 +907,7 @@ static int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb) +@@ -766,16 +797,13 @@ static void sk_psock_link_destroy(struct sk_psock *psock) + } + } + +-void sk_psock_stop(struct sk_psock *psock, bool wait) ++void sk_psock_stop(struct sk_psock *psock) + { + spin_lock_bh(&psock->ingress_lock); + sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED); + sk_psock_cork_free(psock); + __sk_psock_zap_ingress(psock); + spin_unlock_bh(&psock->ingress_lock); +- +- if (wait) +- cancel_work_sync(&psock->work); + } + + static void sk_psock_done_strp(struct sk_psock *psock); +@@ -813,7 +841,7 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock) + sk_psock_stop_verdict(sk, psock); + write_unlock_bh(&sk->sk_callback_lock); + +- sk_psock_stop(psock, false); ++ sk_psock_stop(psock); + + INIT_RCU_WORK(&psock->rwork, sk_psock_destroy); + queue_rcu_work(system_wq, &psock->rwork); +@@ -852,13 +880,16 @@ int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock, + ret = sk_psock_map_verd(ret, msg->sk_redir); + psock->apply_bytes = msg->apply_bytes; + if (ret == __SK_REDIRECT) { +- if (psock->sk_redir) ++ if (psock->sk_redir) { + sock_put(psock->sk_redir); +- psock->sk_redir = msg->sk_redir; +- if (!psock->sk_redir) { ++ psock->sk_redir = NULL; ++ } ++ if (!msg->sk_redir) { + ret = __SK_DROP; + goto out; + } ++ psock->redir_ingress = sk_msg_to_ingress(msg); ++ psock->sk_redir = msg->sk_redir; + sock_hold(psock->sk_redir); + } + out: +@@ -877,6 +908,7 @@ static int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb) * return code, but then didn't set a redirect interface. */ if (unlikely(!sk_other)) { @@ -383582,7 +494221,7 @@ index a86ef7e844f8c..736d8b035a679 100644 sock_drop(from->sk, skb); return -EIO; } -@@ -944,6 +975,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb, +@@ -944,6 +976,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb, { struct sock *sk_other; int err = 0; @@ -383590,7 +494229,7 @@ index a86ef7e844f8c..736d8b035a679 100644 switch (verdict) { case __SK_PASS: -@@ -951,6 +983,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb, +@@ -951,6 +984,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb, sk_other = psock->sk; if (sock_flag(sk_other, SOCK_DEAD) || !sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { @@ -383598,7 +494237,7 @@ index a86ef7e844f8c..736d8b035a679 100644 goto out_free; } -@@ -963,7 +996,15 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb, +@@ -963,7 +997,15 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb, * retrying later from workqueue. */ if (skb_queue_empty(&psock->ingress_skb)) { @@ -383615,7 +494254,7 @@ index a86ef7e844f8c..736d8b035a679 100644 } if (err < 0) { spin_lock_bh(&psock->ingress_lock); -@@ -1029,6 +1070,8 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb) +@@ -1029,6 +1071,8 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb) skb_dst_drop(skb); skb_bpf_redirect_clear(skb); ret = bpf_prog_run_pin_on_cpu(prog, skb); @@ -383624,7 +494263,7 @@ index a86ef7e844f8c..736d8b035a679 100644 ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb)); skb->sk = NULL; } -@@ -1101,6 +1144,8 @@ void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock) +@@ -1101,6 +1145,8 @@ void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock) void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock) { @@ -383633,7 +494272,7 @@ index a86ef7e844f8c..736d8b035a679 100644 if (!psock->saved_data_ready) return; -@@ -1128,7 +1173,7 @@ static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb, +@@ -1128,7 +1174,7 @@ static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb, struct sk_psock *psock; struct bpf_prog *prog; int ret = __SK_DROP; @@ -383642,7 +494281,7 @@ index a86ef7e844f8c..736d8b035a679 100644 /* clone here so sk_eat_skb() in tcp_read_sock does not drop our data */ skb = skb_clone(skb, GFP_ATOMIC); -@@ -1189,6 +1234,9 @@ void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock) +@@ -1189,6 +1235,9 @@ void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock) void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock) { @@ -383653,7 +494292,7 @@ index a86ef7e844f8c..736d8b035a679 100644 return; diff --git a/net/core/sock.c b/net/core/sock.c -index c1601f75ec4b3..9bcffe1d5332a 100644 +index c1601f75ec4b3..b7ac53e72d1ad 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -830,6 +830,8 @@ static int sock_timestamping_bind_phc(struct sock *sk, int phc_index) @@ -383695,6 +494334,15 @@ index c1601f75ec4b3..9bcffe1d5332a 100644 break; case SO_RCVBUFFORCE: +@@ -1300,7 +1302,7 @@ set_sndbuf: + break; + } + case SO_INCOMING_CPU: +- WRITE_ONCE(sk->sk_incoming_cpu, val); ++ reuseport_update_incoming_cpu(sk, val); + break; + + case SO_CNX_ADVICE: @@ -2043,8 +2045,10 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_prot_creator = prot; @@ -383759,7 +494407,7 @@ index c1601f75ec4b3..9bcffe1d5332a 100644 sk->sk_max_pacing_rate = ~0UL; diff --git a/net/core/sock_map.c b/net/core/sock_map.c -index e252b8ec2b85e..795b3acfb9fd2 100644 +index e252b8ec2b85e..86b4e8909ad1e 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -167,8 +167,11 @@ static void sock_map_del_link(struct sock *sk, @@ -383834,7 +494482,21 @@ index e252b8ec2b85e..795b3acfb9fd2 100644 return ret; } -@@ -779,13 +789,22 @@ static int sock_map_init_seq_private(void *priv_data, +@@ -339,11 +349,13 @@ static void sock_map_free(struct bpf_map *map) + + sk = xchg(psk, NULL); + if (sk) { ++ sock_hold(sk); + lock_sock(sk); + rcu_read_lock(); + sock_map_unref(sk, psk); + rcu_read_unlock(); + release_sock(sk); ++ sock_put(sk); + } + } + +@@ -779,13 +791,22 @@ static int sock_map_init_seq_private(void *priv_data, { struct sock_map_seq_info *info = priv_data; @@ -383857,7 +494519,7 @@ index e252b8ec2b85e..795b3acfb9fd2 100644 .seq_priv_size = sizeof(struct sock_map_seq_info), }; -@@ -1366,18 +1385,27 @@ static const struct seq_operations sock_hash_seq_ops = { +@@ -1366,18 +1387,27 @@ static const struct seq_operations sock_hash_seq_ops = { }; static int sock_hash_init_seq_private(void *priv_data, @@ -383886,7 +494548,89 @@ index e252b8ec2b85e..795b3acfb9fd2 100644 .seq_priv_size = sizeof(struct sock_hash_seq_info), }; -@@ -1565,7 +1593,7 @@ static struct bpf_iter_reg sock_map_iter_reg = { +@@ -1484,18 +1514,43 @@ void sock_map_unhash(struct sock *sk) + psock = sk_psock(sk); + if (unlikely(!psock)) { + rcu_read_unlock(); +- if (sk->sk_prot->unhash) +- sk->sk_prot->unhash(sk); +- return; ++ saved_unhash = READ_ONCE(sk->sk_prot)->unhash; ++ } else { ++ saved_unhash = psock->saved_unhash; ++ sock_map_remove_links(sk, psock); ++ rcu_read_unlock(); + } +- +- saved_unhash = psock->saved_unhash; +- sock_map_remove_links(sk, psock); +- rcu_read_unlock(); +- saved_unhash(sk); ++ if (WARN_ON_ONCE(saved_unhash == sock_map_unhash)) ++ return; ++ if (saved_unhash) ++ saved_unhash(sk); + } + EXPORT_SYMBOL_GPL(sock_map_unhash); + ++void sock_map_destroy(struct sock *sk) ++{ ++ void (*saved_destroy)(struct sock *sk); ++ struct sk_psock *psock; ++ ++ rcu_read_lock(); ++ psock = sk_psock_get(sk); ++ if (unlikely(!psock)) { ++ rcu_read_unlock(); ++ saved_destroy = READ_ONCE(sk->sk_prot)->destroy; ++ } else { ++ saved_destroy = psock->saved_destroy; ++ sock_map_remove_links(sk, psock); ++ rcu_read_unlock(); ++ sk_psock_stop(psock); ++ sk_psock_put(sk, psock); ++ } ++ if (WARN_ON_ONCE(saved_destroy == sock_map_destroy)) ++ return; ++ if (saved_destroy) ++ saved_destroy(sk); ++} ++EXPORT_SYMBOL_GPL(sock_map_destroy); ++ + void sock_map_close(struct sock *sk, long timeout) + { + void (*saved_close)(struct sock *sk, long timeout); +@@ -1507,15 +1562,21 @@ void sock_map_close(struct sock *sk, long timeout) + if (unlikely(!psock)) { + rcu_read_unlock(); + release_sock(sk); +- return sk->sk_prot->close(sk, timeout); ++ saved_close = READ_ONCE(sk->sk_prot)->close; ++ } else { ++ saved_close = psock->saved_close; ++ sock_map_remove_links(sk, psock); ++ rcu_read_unlock(); ++ sk_psock_stop(psock); ++ release_sock(sk); ++ cancel_work_sync(&psock->work); ++ sk_psock_put(sk, psock); + } +- +- saved_close = psock->saved_close; +- sock_map_remove_links(sk, psock); +- rcu_read_unlock(); +- sk_psock_stop(psock, true); +- sk_psock_put(sk, psock); +- release_sock(sk); ++ /* Make sure we do not recurse. This is a bug. ++ * Leak the socket instead of crashing on a stack overflow. ++ */ ++ if (WARN_ON_ONCE(saved_close == sock_map_close)) ++ return; + saved_close(sk, timeout); + } + EXPORT_SYMBOL_GPL(sock_map_close); +@@ -1565,7 +1626,7 @@ static struct bpf_iter_reg sock_map_iter_reg = { .ctx_arg_info_size = 2, .ctx_arg_info = { { offsetof(struct bpf_iter__sockmap, key), @@ -383896,10 +494640,10 @@ index e252b8ec2b85e..795b3acfb9fd2 100644 PTR_TO_BTF_ID_OR_NULL }, }, diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c -index 3f00a28fe762a..fb90e1e00773b 100644 +index 3f00a28fe762a..5a165286e4d8e 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c -@@ -21,6 +21,22 @@ static DEFINE_IDA(reuseport_ida); +@@ -21,6 +21,86 @@ static DEFINE_IDA(reuseport_ida); static int reuseport_resurrect(struct sock *sk, struct sock_reuseport *old_reuse, struct sock_reuseport *reuse, bool bind_inany); @@ -383918,11 +494662,123 @@ index 3f00a28fe762a..fb90e1e00773b 100644 + spin_unlock_bh(&reuseport_lock); +} +EXPORT_SYMBOL(reuseport_has_conns_set); ++ ++static void __reuseport_get_incoming_cpu(struct sock_reuseport *reuse) ++{ ++ /* Paired with READ_ONCE() in reuseport_select_sock_by_hash(). */ ++ WRITE_ONCE(reuse->incoming_cpu, reuse->incoming_cpu + 1); ++} ++ ++static void __reuseport_put_incoming_cpu(struct sock_reuseport *reuse) ++{ ++ /* Paired with READ_ONCE() in reuseport_select_sock_by_hash(). */ ++ WRITE_ONCE(reuse->incoming_cpu, reuse->incoming_cpu - 1); ++} ++ ++static void reuseport_get_incoming_cpu(struct sock *sk, struct sock_reuseport *reuse) ++{ ++ if (sk->sk_incoming_cpu >= 0) ++ __reuseport_get_incoming_cpu(reuse); ++} ++ ++static void reuseport_put_incoming_cpu(struct sock *sk, struct sock_reuseport *reuse) ++{ ++ if (sk->sk_incoming_cpu >= 0) ++ __reuseport_put_incoming_cpu(reuse); ++} ++ ++void reuseport_update_incoming_cpu(struct sock *sk, int val) ++{ ++ struct sock_reuseport *reuse; ++ int old_sk_incoming_cpu; ++ ++ if (unlikely(!rcu_access_pointer(sk->sk_reuseport_cb))) { ++ /* Paired with REAE_ONCE() in sk_incoming_cpu_update() ++ * and compute_score(). ++ */ ++ WRITE_ONCE(sk->sk_incoming_cpu, val); ++ return; ++ } ++ ++ spin_lock_bh(&reuseport_lock); ++ ++ /* This must be done under reuseport_lock to avoid a race with ++ * reuseport_grow(), which accesses sk->sk_incoming_cpu without ++ * lock_sock() when detaching a shutdown()ed sk. ++ * ++ * Paired with READ_ONCE() in reuseport_select_sock_by_hash(). ++ */ ++ old_sk_incoming_cpu = sk->sk_incoming_cpu; ++ WRITE_ONCE(sk->sk_incoming_cpu, val); ++ ++ reuse = rcu_dereference_protected(sk->sk_reuseport_cb, ++ lockdep_is_held(&reuseport_lock)); ++ ++ /* reuseport_grow() has detached a closed sk. */ ++ if (!reuse) ++ goto out; ++ ++ if (old_sk_incoming_cpu < 0 && val >= 0) ++ __reuseport_get_incoming_cpu(reuse); ++ else if (old_sk_incoming_cpu >= 0 && val < 0) ++ __reuseport_put_incoming_cpu(reuse); ++ ++out: ++ spin_unlock_bh(&reuseport_lock); ++} + static int reuseport_sock_index(struct sock *sk, const struct sock_reuseport *reuse, bool closed) -@@ -387,7 +403,7 @@ void reuseport_stop_listen_sock(struct sock *sk) +@@ -48,6 +128,7 @@ static void __reuseport_add_sock(struct sock *sk, + /* paired with smp_rmb() in reuseport_(select|migrate)_sock() */ + smp_wmb(); + reuse->num_socks++; ++ reuseport_get_incoming_cpu(sk, reuse); + } + + static bool __reuseport_detach_sock(struct sock *sk, +@@ -60,6 +141,7 @@ static bool __reuseport_detach_sock(struct sock *sk, + + reuse->socks[i] = reuse->socks[reuse->num_socks - 1]; + reuse->num_socks--; ++ reuseport_put_incoming_cpu(sk, reuse); + + return true; + } +@@ -70,6 +152,7 @@ static void __reuseport_add_closed_sock(struct sock *sk, + reuse->socks[reuse->max_socks - reuse->num_closed_socks - 1] = sk; + /* paired with READ_ONCE() in inet_csk_bind_conflict() */ + WRITE_ONCE(reuse->num_closed_socks, reuse->num_closed_socks + 1); ++ reuseport_get_incoming_cpu(sk, reuse); + } + + static bool __reuseport_detach_closed_sock(struct sock *sk, +@@ -83,6 +166,7 @@ static bool __reuseport_detach_closed_sock(struct sock *sk, + reuse->socks[i] = reuse->socks[reuse->max_socks - reuse->num_closed_socks]; + /* paired with READ_ONCE() in inet_csk_bind_conflict() */ + WRITE_ONCE(reuse->num_closed_socks, reuse->num_closed_socks - 1); ++ reuseport_put_incoming_cpu(sk, reuse); + + return true; + } +@@ -150,6 +234,7 @@ int reuseport_alloc(struct sock *sk, bool bind_inany) + reuse->bind_inany = bind_inany; + reuse->socks[0] = sk; + reuse->num_socks = 1; ++ reuseport_get_incoming_cpu(sk, reuse); + rcu_assign_pointer(sk->sk_reuseport_cb, reuse); + + out: +@@ -193,6 +278,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) + more_reuse->reuseport_id = reuse->reuseport_id; + more_reuse->bind_inany = reuse->bind_inany; + more_reuse->has_conns = reuse->has_conns; ++ more_reuse->incoming_cpu = reuse->incoming_cpu; + + memcpy(more_reuse->socks, reuse->socks, + reuse->num_socks * sizeof(struct sock *)); +@@ -387,7 +473,7 @@ void reuseport_stop_listen_sock(struct sock *sk) prog = rcu_dereference_protected(reuse->prog, lockdep_is_held(&reuseport_lock)); @@ -383931,7 +494787,45 @@ index 3f00a28fe762a..fb90e1e00773b 100644 (prog && prog->expected_attach_type == BPF_SK_REUSEPORT_SELECT_OR_MIGRATE)) { /* Migration capable, move sk from the listening section * to the closed section. -@@ -545,7 +561,7 @@ struct sock *reuseport_migrate_sock(struct sock *sk, +@@ -442,18 +528,32 @@ static struct sock *run_bpf_filter(struct sock_reuseport *reuse, u16 socks, + static struct sock *reuseport_select_sock_by_hash(struct sock_reuseport *reuse, + u32 hash, u16 num_socks) + { ++ struct sock *first_valid_sk = NULL; + int i, j; + + i = j = reciprocal_scale(hash, num_socks); +- while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) { ++ do { ++ struct sock *sk = reuse->socks[i]; ++ ++ if (sk->sk_state != TCP_ESTABLISHED) { ++ /* Paired with WRITE_ONCE() in __reuseport_(get|put)_incoming_cpu(). */ ++ if (!READ_ONCE(reuse->incoming_cpu)) ++ return sk; ++ ++ /* Paired with WRITE_ONCE() in reuseport_update_incoming_cpu(). */ ++ if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) ++ return sk; ++ ++ if (!first_valid_sk) ++ first_valid_sk = sk; ++ } ++ + i++; + if (i >= num_socks) + i = 0; +- if (i == j) +- return NULL; +- } ++ } while (i != j); + +- return reuse->socks[i]; ++ return first_valid_sk; + } + + /** +@@ -545,7 +645,7 @@ struct sock *reuseport_migrate_sock(struct sock *sk, hash = migrating_sk->sk_hash; prog = rcu_dereference(reuse->prog); if (!prog || prog->expected_attach_type != BPF_SK_REUSEPORT_SELECT_OR_MIGRATE) { @@ -383941,7 +494835,7 @@ index 3f00a28fe762a..fb90e1e00773b 100644 goto failure; } diff --git a/net/core/stream.c b/net/core/stream.c -index 4f1d4aa5fb38d..a61130504827a 100644 +index 4f1d4aa5fb38d..cd60746877b1e 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -159,7 +159,8 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) @@ -383954,16 +494848,28 @@ index 4f1d4aa5fb38d..a61130504827a 100644 return err; do_error: -@@ -195,9 +196,6 @@ void sk_stream_kill_queues(struct sock *sk) +@@ -195,8 +196,11 @@ void sk_stream_kill_queues(struct sock *sk) /* First the read buffer. */ __skb_queue_purge(&sk->sk_receive_queue); - /* Next, the error queue. */ - __skb_queue_purge(&sk->sk_error_queue); -- ++ /* Next, the error queue. ++ * We need to use queue lock, because other threads might ++ * add packets to the queue without socket lock being held. ++ */ ++ skb_queue_purge(&sk->sk_error_queue); + /* Next, the write queue. */ WARN_ON(!skb_queue_empty(&sk->sk_write_queue)); +@@ -205,7 +209,6 @@ void sk_stream_kill_queues(struct sock *sk) + sk_mem_reclaim(sk); + WARN_ON(sk->sk_wmem_queued); +- WARN_ON(sk->sk_forward_alloc); + + /* It is _impossible_ for the backlog to contain anything + * when we get here. All user references to this socket diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 5f88526ad61cc..ed20cbdd19315 100644 --- a/net/core/sysctl_net_core.c @@ -384062,6 +494968,62 @@ index c5c1d2b8045e8..5183e627468d8 100644 void dccp_time_wait(struct sock *sk, int state, int timeo); +diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c +index 0ea29270d7e53..5bcfa1e9a941b 100644 +--- a/net/dccp/ipv4.c ++++ b/net/dccp/ipv4.c +@@ -137,6 +137,8 @@ failure: + * This unhashes the socket and releases the local port, if necessary. + */ + dccp_set_state(sk, DCCP_CLOSED); ++ if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) ++ inet_reset_saddr(sk); + ip_rt_put(rt); + sk->sk_route_caps = 0; + inet->inet_dport = 0; +diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c +index fa663518fa0e4..a28536ad765b1 100644 +--- a/net/dccp/ipv6.c ++++ b/net/dccp/ipv6.c +@@ -551,11 +551,9 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL); + /* Clone pktoptions received with SYN, if we own the req */ + if (*own_req && ireq->pktopts) { +- newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC); ++ newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk); + consume_skb(ireq->pktopts); + ireq->pktopts = NULL; +- if (newnp->pktoptions) +- skb_set_owner_r(newnp->pktoptions, newsk); + } + + return newsk; +@@ -615,7 +613,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) + --ANK (980728) + */ + if (np->rxopt.all) +- opt_skb = skb_clone(skb, GFP_ATOMIC); ++ opt_skb = skb_clone_and_charge_r(skb, sk); + + if (sk->sk_state == DCCP_OPEN) { /* Fast path */ + if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len)) +@@ -679,7 +677,6 @@ ipv6_pktoptions: + np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); + if (ipv6_opt_accepted(sk, opt_skb, + &DCCP_SKB_CB(opt_skb)->header.h6)) { +- skb_set_owner_r(opt_skb, sk); + memmove(IP6CB(opt_skb), + &DCCP_SKB_CB(opt_skb)->header.h6, + sizeof(struct inet6_skb_parm)); +@@ -967,6 +964,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, + + late_failure: + dccp_set_state(sk, DCCP_CLOSED); ++ if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) ++ inet_reset_saddr(sk); + __sk_dst_reset(sk); + failure: + inet->inet_dport = 0; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index abb5c596a8176..c4de716f4994a 100644 --- a/net/dccp/proto.c @@ -384158,10 +495120,41 @@ index dc92a67baea39..7d542eb461729 100644 /* Initialization of DECnet Session Control Port */ scp = DN_SK(sk); diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c -index e9911b18bdbfa..64a56db3de586 100644 +index e9911b18bdbfa..34763f575c308 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c -@@ -1341,6 +1341,7 @@ static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn) +@@ -1253,9 +1253,9 @@ static enum dsa_tag_protocol dsa_get_tag_protocol(struct dsa_port *dp, + static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master, + const char *user_protocol) + { ++ const struct dsa_device_ops *tag_ops = NULL; + struct dsa_switch *ds = dp->ds; + struct dsa_switch_tree *dst = ds->dst; +- const struct dsa_device_ops *tag_ops; + enum dsa_tag_protocol default_proto; + + /* Find out which protocol the switch would prefer. */ +@@ -1278,10 +1278,17 @@ static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master, + } + + tag_ops = dsa_find_tagger_by_name(user_protocol); +- } else { +- tag_ops = dsa_tag_driver_get(default_proto); ++ if (IS_ERR(tag_ops)) { ++ dev_warn(ds->dev, ++ "Failed to find a tagging driver for protocol %s, using default\n", ++ user_protocol); ++ tag_ops = NULL; ++ } + } + ++ if (!tag_ops) ++ tag_ops = dsa_tag_driver_get(default_proto); ++ + if (IS_ERR(tag_ops)) { + if (PTR_ERR(tag_ops) == -ENOPROTOOPT) + return -EPROBE_DEFER; +@@ -1341,6 +1348,7 @@ static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn) const char *user_protocol; master = of_find_net_device_by_node(ethernet); @@ -384169,7 +495162,7 @@ index e9911b18bdbfa..64a56db3de586 100644 if (!master) return -EPROBE_DEFER; -@@ -1630,6 +1631,10 @@ void dsa_switch_shutdown(struct dsa_switch *ds) +@@ -1630,6 +1638,10 @@ void dsa_switch_shutdown(struct dsa_switch *ds) struct dsa_port *dp; mutex_lock(&dsa2_mutex); @@ -384180,7 +495173,7 @@ index e9911b18bdbfa..64a56db3de586 100644 rtnl_lock(); list_for_each_entry(dp, &ds->dst->ports, list) { -@@ -1664,6 +1669,7 @@ void dsa_switch_shutdown(struct dsa_switch *ds) +@@ -1664,6 +1676,7 @@ void dsa_switch_shutdown(struct dsa_switch *ds) unregister_netdevice_many(&unregister_list); rtnl_unlock(); @@ -384188,11 +495181,60 @@ index e9911b18bdbfa..64a56db3de586 100644 mutex_unlock(&dsa2_mutex); } EXPORT_SYMBOL_GPL(dsa_switch_shutdown); +diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h +index a5c9bc7b66c6e..e91265434354e 100644 +--- a/net/dsa/dsa_priv.h ++++ b/net/dsa/dsa_priv.h +@@ -198,6 +198,7 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev, + } + + /* port.c */ ++bool dsa_port_supports_hwtstamp(struct dsa_port *dp, struct ifreq *ifr); + void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp, + const struct dsa_device_ops *tag_ops); + int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age); +diff --git a/net/dsa/master.c b/net/dsa/master.c +index e8e19857621bd..69ec510abe83c 100644 +--- a/net/dsa/master.c ++++ b/net/dsa/master.c +@@ -204,8 +204,7 @@ static int dsa_master_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) + * switch in the tree that is PTP capable. + */ + list_for_each_entry(dp, &dst->ports, list) +- if (dp->ds->ops->port_hwtstamp_get || +- dp->ds->ops->port_hwtstamp_set) ++ if (dsa_port_supports_hwtstamp(dp, ifr)) + return -EBUSY; + break; + } diff --git a/net/dsa/port.c b/net/dsa/port.c -index 616330a16d319..a21015d6bd365 100644 +index 616330a16d319..31e8a7a8c3e60 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c -@@ -111,11 +111,14 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age) +@@ -75,6 +75,22 @@ static bool dsa_port_can_configure_learning(struct dsa_port *dp) + return !err; + } + ++bool dsa_port_supports_hwtstamp(struct dsa_port *dp, struct ifreq *ifr) ++{ ++ struct dsa_switch *ds = dp->ds; ++ int err; ++ ++ if (!ds->ops->port_hwtstamp_get || !ds->ops->port_hwtstamp_set) ++ return false; ++ ++ /* "See through" shim implementations of the "get" method. ++ * This will clobber the ifreq structure, but we will either return an ++ * error, or the master will overwrite it with proper values. ++ */ ++ err = ds->ops->port_hwtstamp_get(ds, dp->index, ifr); ++ return err != -EOPNOTSUPP; ++} ++ + int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age) + { + struct dsa_switch *ds = dp->ds; +@@ -111,11 +127,14 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age) static void dsa_port_set_state_now(struct dsa_port *dp, u8 state, bool do_fast_age) { @@ -384209,7 +495251,7 @@ index 616330a16d319..a21015d6bd365 100644 } int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy) -@@ -1201,8 +1204,10 @@ int dsa_port_link_register_of(struct dsa_port *dp) +@@ -1201,8 +1220,10 @@ int dsa_port_link_register_of(struct dsa_port *dp) if (ds->ops->phylink_mac_link_down) ds->ops->phylink_mac_link_down(ds, port, MLO_AN_FIXED, PHY_INTERFACE_MODE_NA); @@ -384282,8 +495324,37 @@ index 6466d0539af9f..fb69f2f14234e 100644 return -EOPNOTSUPP; if (ds->index == info->sw_index) +diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c +index f8f7b7c34e7da..e443088ab0f65 100644 +--- a/net/dsa/tag_8021q.c ++++ b/net/dsa/tag_8021q.c +@@ -529,6 +529,7 @@ static void dsa_tag_8021q_teardown(struct dsa_switch *ds) + int dsa_tag_8021q_register(struct dsa_switch *ds, __be16 proto) + { + struct dsa_8021q_context *ctx; ++ int err; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) +@@ -541,7 +542,15 @@ int dsa_tag_8021q_register(struct dsa_switch *ds, __be16 proto) + + ds->tag_8021q_ctx = ctx; + +- return dsa_tag_8021q_setup(ds); ++ err = dsa_tag_8021q_setup(ds); ++ if (err) ++ goto err_free; ++ ++ return 0; ++ ++err_free: ++ kfree(ctx); ++ return err; + } + EXPORT_SYMBOL_GPL(dsa_tag_8021q_register); + diff --git a/net/dsa/tag_hellcreek.c b/net/dsa/tag_hellcreek.c -index f64b805303cd7..846588c0070a5 100644 +index f64b805303cd7..53a206d116850 100644 --- a/net/dsa/tag_hellcreek.c +++ b/net/dsa/tag_hellcreek.c @@ -21,6 +21,14 @@ static struct sk_buff *hellcreek_xmit(struct sk_buff *skb, @@ -384301,7 +495372,7 @@ index f64b805303cd7..846588c0070a5 100644 /* Tag encoding */ tag = skb_put(skb, HELLCREEK_TAG_LEN); *tag = BIT(dp->index); -@@ -37,7 +45,7 @@ static struct sk_buff *hellcreek_rcv(struct sk_buff *skb, +@@ -37,11 +45,12 @@ static struct sk_buff *hellcreek_rcv(struct sk_buff *skb, skb->dev = dsa_master_find_slave(dev, 0, port); if (!skb->dev) { @@ -384310,6 +495381,26 @@ index f64b805303cd7..846588c0070a5 100644 return NULL; } +- pskb_trim_rcsum(skb, skb->len - HELLCREEK_TAG_LEN); ++ if (pskb_trim_rcsum(skb, skb->len - HELLCREEK_TAG_LEN)) ++ return NULL; + + dsa_default_offload_fwd_mark(skb); + +diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c +index fa1d60d13ad90..6795dd0174996 100644 +--- a/net/dsa/tag_ksz.c ++++ b/net/dsa/tag_ksz.c +@@ -22,7 +22,8 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb, + if (!skb->dev) + return NULL; + +- pskb_trim_rcsum(skb, skb->len - len); ++ if (pskb_trim_rcsum(skb, skb->len - len)) ++ return NULL; + + dsa_default_offload_fwd_mark(skb); + diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c index cb548188f8134..98d7d7120bab2 100644 --- a/net/dsa/tag_lan9303.c @@ -384386,8 +495477,22 @@ index 605b51ca69210..6e0518aa3a4d2 100644 /* Ocelot switches copy frames unmodified to the CPU. However, it is * possible for the user to request a VLAN modification through +diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c +index 2edede9ddac93..d43feadd5fa6b 100644 +--- a/net/dsa/tag_sja1105.c ++++ b/net/dsa/tag_sja1105.c +@@ -644,7 +644,8 @@ static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb, + * padding and trailer we need to account for the fact that + * skb->data points to skb_mac_header(skb) + ETH_HLEN. + */ +- pskb_trim_rcsum(skb, start_of_padding - ETH_HLEN); ++ if (pskb_trim_rcsum(skb, start_of_padding - ETH_HLEN)) ++ return NULL; + /* Trap-to-host frame, no timestamp trailer */ + } else { + *source_port = SJA1110_RX_HEADER_SRC_PORT(rx_header); diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c -index 7e6b37a54add3..1c94bb8ea03f2 100644 +index 7e6b37a54add3..49c0a2a77f02d 100644 --- a/net/ethtool/eeprom.c +++ b/net/ethtool/eeprom.c @@ -36,7 +36,7 @@ static int fallback_set_params(struct eeprom_req_info *request, @@ -384399,8 +495504,17 @@ index 7e6b37a54add3..1c94bb8ea03f2 100644 request->i2c_address == 0x51) offset += ETH_MODULE_EEPROM_PAGE_LEN * 2; +@@ -124,7 +124,7 @@ static int eeprom_prepare_data(const struct ethnl_req_info *req_base, + if (ret) + goto err_free; + +- ret = get_module_eeprom_by_page(dev, &page_data, info->extack); ++ ret = get_module_eeprom_by_page(dev, &page_data, info ? info->extack : NULL); + if (ret < 0) + goto err_ops; + diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c -index f2abc31528883..e4983f473a3c5 100644 +index f2abc31528883..939c63d6e74b7 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1697,7 +1697,7 @@ static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev, @@ -384412,6 +495526,26 @@ index f2abc31528883..e4983f473a3c5 100644 return -EOPNOTSUPP; ret = dev->ethtool_ops->get_coalesce(dev, &coalesce, &kernel_coalesce, +@@ -1988,7 +1988,8 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) + } else { + /* Driver expects to be called at twice the frequency in rc */ + int n = rc * 2, interval = HZ / n; +- u64 count = n * id.data, i = 0; ++ u64 count = mul_u32_u32(n, id.data); ++ u64 i = 0; + + do { + rtnl_lock(); +@@ -2073,7 +2074,8 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr) + return n_stats; + if (n_stats > S32_MAX / sizeof(u64)) + return -ENOMEM; +- WARN_ON_ONCE(!n_stats); ++ if (WARN_ON_ONCE(!n_stats)) ++ return -EOPNOTSUPP; + + if (copy_from_user(&stats, useraddr, sizeof(stats))) + return -EFAULT; diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 1797a0a900195..b3729bdafb602 100644 --- a/net/ethtool/netlink.c @@ -384441,7 +495575,7 @@ index 9009f412151e7..ee1e5806bc93a 100644 } diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c -index 26c32407f0290..a1045c3d71b4f 100644 +index 26c32407f0290..7ce40b49c9560 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -30,13 +30,13 @@ static bool is_slave_up(struct net_device *dev) @@ -384461,6 +495595,99 @@ index 26c32407f0290..a1045c3d71b4f 100644 } } +@@ -219,7 +219,9 @@ static netdev_tx_t hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev) + skb->dev = master->dev; + skb_reset_mac_header(skb); + skb_reset_mac_len(skb); ++ spin_lock_bh(&hsr->seqnr_lock); + hsr_forward_skb(skb, master); ++ spin_unlock_bh(&hsr->seqnr_lock); + } else { + atomic_long_inc(&dev->tx_dropped); + dev_kfree_skb_any(skb); +@@ -278,7 +280,6 @@ static void send_hsr_supervision_frame(struct hsr_port *master, + __u8 type = HSR_TLV_LIFE_CHECK; + struct hsr_sup_payload *hsr_sp; + struct hsr_sup_tag *hsr_stag; +- unsigned long irqflags; + struct sk_buff *skb; + + *interval = msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL); +@@ -299,7 +300,7 @@ static void send_hsr_supervision_frame(struct hsr_port *master, + set_hsr_stag_HSR_ver(hsr_stag, hsr->prot_version); + + /* From HSRv1 on we have separate supervision sequence numbers. */ +- spin_lock_irqsave(&master->hsr->seqnr_lock, irqflags); ++ spin_lock_bh(&hsr->seqnr_lock); + if (hsr->prot_version > 0) { + hsr_stag->sequence_nr = htons(hsr->sup_sequence_nr); + hsr->sup_sequence_nr++; +@@ -307,7 +308,6 @@ static void send_hsr_supervision_frame(struct hsr_port *master, + hsr_stag->sequence_nr = htons(hsr->sequence_nr); + hsr->sequence_nr++; + } +- spin_unlock_irqrestore(&master->hsr->seqnr_lock, irqflags); + + hsr_stag->HSR_TLV_type = type; + /* TODO: Why 12 in HSRv0? */ +@@ -318,11 +318,13 @@ static void send_hsr_supervision_frame(struct hsr_port *master, + hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload)); + ether_addr_copy(hsr_sp->macaddress_A, master->dev->dev_addr); + +- if (skb_put_padto(skb, ETH_ZLEN)) ++ if (skb_put_padto(skb, ETH_ZLEN)) { ++ spin_unlock_bh(&hsr->seqnr_lock); + return; ++ } + + hsr_forward_skb(skb, master); +- ++ spin_unlock_bh(&hsr->seqnr_lock); + return; + } + +@@ -332,7 +334,6 @@ static void send_prp_supervision_frame(struct hsr_port *master, + struct hsr_priv *hsr = master->hsr; + struct hsr_sup_payload *hsr_sp; + struct hsr_sup_tag *hsr_stag; +- unsigned long irqflags; + struct sk_buff *skb; + + skb = hsr_init_skb(master); +@@ -347,7 +348,7 @@ static void send_prp_supervision_frame(struct hsr_port *master, + set_hsr_stag_HSR_ver(hsr_stag, (hsr->prot_version ? 1 : 0)); + + /* From HSRv1 on we have separate supervision sequence numbers. */ +- spin_lock_irqsave(&master->hsr->seqnr_lock, irqflags); ++ spin_lock_bh(&hsr->seqnr_lock); + hsr_stag->sequence_nr = htons(hsr->sup_sequence_nr); + hsr->sup_sequence_nr++; + hsr_stag->HSR_TLV_type = PRP_TLV_LIFE_CHECK_DD; +@@ -358,13 +359,12 @@ static void send_prp_supervision_frame(struct hsr_port *master, + ether_addr_copy(hsr_sp->macaddress_A, master->dev->dev_addr); + + if (skb_put_padto(skb, ETH_ZLEN)) { +- spin_unlock_irqrestore(&master->hsr->seqnr_lock, irqflags); ++ spin_unlock_bh(&hsr->seqnr_lock); + return; + } + +- spin_unlock_irqrestore(&master->hsr->seqnr_lock, irqflags); +- + hsr_forward_skb(skb, master); ++ spin_unlock_bh(&hsr->seqnr_lock); + } + + /* Announce (supervision frame) timer function +@@ -444,7 +444,7 @@ void hsr_dev_setup(struct net_device *dev) + dev->header_ops = &hsr_header_ops; + dev->netdev_ops = &hsr_device_ops; + SET_NETDEV_DEVTYPE(dev, &hsr_type); +- dev->priv_flags |= IFF_NO_QUEUE; ++ dev->priv_flags |= IFF_NO_QUEUE | IFF_DISABLE_NETPOLL; + + dev->needs_free_netdev = true; + @@ -493,7 +493,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], INIT_LIST_HEAD(&hsr->self_node_db); spin_lock_init(&hsr->list_lock); @@ -384471,7 +495698,7 @@ index 26c32407f0290..a1045c3d71b4f 100644 /* initialize protocol specific functions */ if (protocol_version == PRP_V1) { diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c -index ceb8afb2a62f4..13f81c246f5f5 100644 +index ceb8afb2a62f4..35382ed686d1d 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -108,15 +108,15 @@ struct sk_buff *hsr_get_untagged_frame(struct hsr_frame_info *frame, @@ -384496,6 +495723,164 @@ index ceb8afb2a62f4..13f81c246f5f5 100644 } return skb_clone(frame->skb_std, GFP_ATOMIC); +@@ -309,17 +309,18 @@ static void hsr_deliver_master(struct sk_buff *skb, struct net_device *dev, + struct hsr_node *node_src) + { + bool was_multicast_frame; +- int res; ++ int res, recv_len; + + was_multicast_frame = (skb->pkt_type == PACKET_MULTICAST); + hsr_addr_subst_source(node_src, skb); + skb_pull(skb, ETH_HLEN); ++ recv_len = skb->len; + res = netif_rx(skb); + if (res == NET_RX_DROP) { + dev->stats.rx_dropped++; + } else { + dev->stats.rx_packets++; +- dev->stats.rx_bytes += skb->len; ++ dev->stats.rx_bytes += recv_len; + if (was_multicast_frame) + dev->stats.multicast++; + } +@@ -457,7 +458,6 @@ static void handle_std_frame(struct sk_buff *skb, + { + struct hsr_port *port = frame->port_rcv; + struct hsr_priv *hsr = port->hsr; +- unsigned long irqflags; + + frame->skb_hsr = NULL; + frame->skb_prp = NULL; +@@ -467,10 +467,9 @@ static void handle_std_frame(struct sk_buff *skb, + frame->is_from_san = true; + } else { + /* Sequence nr for the master node */ +- spin_lock_irqsave(&hsr->seqnr_lock, irqflags); ++ lockdep_assert_held(&hsr->seqnr_lock); + frame->sequence_nr = hsr->sequence_nr; + hsr->sequence_nr++; +- spin_unlock_irqrestore(&hsr->seqnr_lock, irqflags); + } + } + +@@ -571,11 +570,13 @@ void hsr_forward_skb(struct sk_buff *skb, struct hsr_port *port) + { + struct hsr_frame_info frame; + ++ rcu_read_lock(); + if (fill_frame_info(&frame, skb, port) < 0) + goto out_drop; + + hsr_register_frame_in(frame.node_src, port, frame.sequence_nr); + hsr_forward_do(&frame); ++ rcu_read_unlock(); + /* Gets called for ingress frames as well as egress from master port. + * So check and increment stats for master port only here. + */ +@@ -590,6 +591,7 @@ void hsr_forward_skb(struct sk_buff *skb, struct hsr_port *port) + return; + + out_drop: ++ rcu_read_unlock(); + port->dev->stats.tx_dropped++; + kfree_skb(skb); + } +diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c +index e31949479305e..414bf4d3d3c92 100644 +--- a/net/hsr/hsr_framereg.c ++++ b/net/hsr/hsr_framereg.c +@@ -159,6 +159,7 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr, + return NULL; + + ether_addr_copy(new_node->macaddress_A, addr); ++ spin_lock_init(&new_node->seq_out_lock); + + /* We are only interested in time diffs here, so use current jiffies + * as initialization. (0 could trigger an spurious ring error warning). +@@ -313,6 +314,7 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame) + goto done; + + ether_addr_copy(node_real->macaddress_B, ethhdr->h_source); ++ spin_lock_bh(&node_real->seq_out_lock); + for (i = 0; i < HSR_PT_PORTS; i++) { + if (!node_curr->time_in_stale[i] && + time_after(node_curr->time_in[i], node_real->time_in[i])) { +@@ -323,12 +325,16 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame) + if (seq_nr_after(node_curr->seq_out[i], node_real->seq_out[i])) + node_real->seq_out[i] = node_curr->seq_out[i]; + } ++ spin_unlock_bh(&node_real->seq_out_lock); + node_real->addr_B_port = port_rcv->type; + + spin_lock_bh(&hsr->list_lock); +- list_del_rcu(&node_curr->mac_list); ++ if (!node_curr->removed) { ++ list_del_rcu(&node_curr->mac_list); ++ node_curr->removed = true; ++ kfree_rcu(node_curr, rcu_head); ++ } + spin_unlock_bh(&hsr->list_lock); +- kfree_rcu(node_curr, rcu_head); + + done: + /* PRP uses v0 header */ +@@ -416,13 +422,17 @@ void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port, + int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node, + u16 sequence_nr) + { ++ spin_lock_bh(&node->seq_out_lock); + if (seq_nr_before_or_eq(sequence_nr, node->seq_out[port->type]) && + time_is_after_jiffies(node->time_out[port->type] + +- msecs_to_jiffies(HSR_ENTRY_FORGET_TIME))) ++ msecs_to_jiffies(HSR_ENTRY_FORGET_TIME))) { ++ spin_unlock_bh(&node->seq_out_lock); + return 1; ++ } + + node->time_out[port->type] = jiffies; + node->seq_out[port->type] = sequence_nr; ++ spin_unlock_bh(&node->seq_out_lock); + return 0; + } + +@@ -502,9 +512,12 @@ void hsr_prune_nodes(struct timer_list *t) + if (time_is_before_jiffies(timestamp + + msecs_to_jiffies(HSR_NODE_FORGET_TIME))) { + hsr_nl_nodedown(hsr, node->macaddress_A); +- list_del_rcu(&node->mac_list); +- /* Note that we need to free this entry later: */ +- kfree_rcu(node, rcu_head); ++ if (!node->removed) { ++ list_del_rcu(&node->mac_list); ++ node->removed = true; ++ /* Note that we need to free this entry later: */ ++ kfree_rcu(node, rcu_head); ++ } + } + } + spin_unlock_bh(&hsr->list_lock); +diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h +index d9628e7a5f051..48990166e4c4e 100644 +--- a/net/hsr/hsr_framereg.h ++++ b/net/hsr/hsr_framereg.h +@@ -69,6 +69,8 @@ void prp_update_san_info(struct hsr_node *node, bool is_sup); + + struct hsr_node { + struct list_head mac_list; ++ /* Protect R/W access to seq_out */ ++ spinlock_t seq_out_lock; + unsigned char macaddress_A[ETH_ALEN]; + unsigned char macaddress_B[ETH_ALEN]; + /* Local slave through which AddrB frames are received from this node */ +@@ -80,6 +82,7 @@ struct hsr_node { + bool san_a; + bool san_b; + u16 seq_out[HSR_PT_PORTS]; ++ bool removed; + struct rcu_head rcu_head; + }; + diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c index f7e284f23b1f3..b099c31501509 100644 --- a/net/hsr/hsr_main.c @@ -384550,7 +495935,7 @@ index 277124f206e06..e0b072aecf0f3 100644 if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c -index 7bb9ef35c5707..d0aaa0346cb11 100644 +index 7bb9ef35c5707..c33f46c9b6b34 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -200,8 +200,9 @@ static int raw_bind(struct sock *sk, struct sockaddr *_uaddr, int len) @@ -384576,7 +495961,7 @@ index 7bb9ef35c5707..d0aaa0346cb11 100644 hlen = LL_RESERVED_SPACE(dev); tlen = dev->needed_tailroom; -@@ -493,7 +498,8 @@ static int dgram_bind(struct sock *sk, struct sockaddr *uaddr, int len) +@@ -493,11 +498,14 @@ static int dgram_bind(struct sock *sk, struct sockaddr *uaddr, int len) ro->bound = 0; @@ -384585,8 +495970,15 @@ index 7bb9ef35c5707..d0aaa0346cb11 100644 + if (err < 0) goto out; - if (addr->family != AF_IEEE802154) -@@ -564,8 +570,9 @@ static int dgram_connect(struct sock *sk, struct sockaddr *uaddr, +- if (addr->family != AF_IEEE802154) ++ if (addr->family != AF_IEEE802154) { ++ err = -EINVAL; + goto out; ++ } + + ieee802154_addr_from_sa(&haddr, &addr->addr); + dev = ieee802154_get_dev(sock_net(sk), &haddr); +@@ -564,8 +572,9 @@ static int dgram_connect(struct sock *sk, struct sockaddr *uaddr, struct dgram_sock *ro = dgram_sk(sk); int err = 0; @@ -384598,7 +495990,7 @@ index 7bb9ef35c5707..d0aaa0346cb11 100644 if (addr->family != AF_IEEE802154) return -EINVAL; -@@ -604,6 +611,7 @@ static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) +@@ -604,6 +613,7 @@ static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) struct ieee802154_mac_cb *cb; struct dgram_sock *ro = dgram_sk(sk); struct ieee802154_addr dst_addr; @@ -384606,7 +495998,7 @@ index 7bb9ef35c5707..d0aaa0346cb11 100644 int hlen, tlen; int err; -@@ -612,10 +620,20 @@ static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) +@@ -612,10 +622,20 @@ static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) return -EOPNOTSUPP; } @@ -384631,7 +496023,7 @@ index 7bb9ef35c5707..d0aaa0346cb11 100644 if (!ro->bound) dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154); -@@ -651,16 +669,6 @@ static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) +@@ -651,16 +671,6 @@ static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) cb = mac_cb_init(skb); cb->type = IEEE802154_FC_TYPE_DATA; cb->ackreq = ro->want_ack; @@ -384648,8 +496040,29 @@ index 7bb9ef35c5707..d0aaa0346cb11 100644 cb->secen = ro->secen; cb->secen_override = ro->secen_override; cb->seclevel = ro->seclevel; +diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig +index 87983e70f03f3..23b06063e1a51 100644 +--- a/net/ipv4/Kconfig ++++ b/net/ipv4/Kconfig +@@ -403,6 +403,16 @@ config INET_IPCOMP + + If unsure, say Y. + ++config INET_TABLE_PERTURB_ORDER ++ int "INET: Source port perturbation table size (as power of 2)" if EXPERT ++ default 16 ++ help ++ Source port perturbation table size (as power of 2) for ++ RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm. ++ ++ The default is almost always what you want. ++ Only change this if you know what you are doing. ++ + config INET_XFRM_TUNNEL + tristate + select INET_TUNNEL diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c -index 1d816a5fd3eb9..e4b2ced66261b 100644 +index 1d816a5fd3eb9..91710e5eedff0 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -158,7 +158,7 @@ void inet_sock_destruct(struct sock *sk) @@ -384746,7 +496159,34 @@ index 1d816a5fd3eb9..e4b2ced66261b 100644 if (IS_ERR_OR_NULL(segs)) goto out; -@@ -2002,6 +2007,10 @@ static int __init inet_init(void) +@@ -1723,24 +1728,14 @@ static const struct net_protocol igmp_protocol = { + }; + #endif + +-/* thinking of making this const? Don't. +- * early_demux can change based on sysctl. +- */ +-static struct net_protocol tcp_protocol = { +- .early_demux = tcp_v4_early_demux, +- .early_demux_handler = tcp_v4_early_demux, ++static const struct net_protocol tcp_protocol = { + .handler = tcp_v4_rcv, + .err_handler = tcp_v4_err, + .no_policy = 1, + .icmp_strict_tag_validation = 1, + }; + +-/* thinking of making this const? Don't. +- * early_demux can change based on sysctl. +- */ +-static struct net_protocol udp_protocol = { +- .early_demux = udp_v4_early_demux, +- .early_demux_handler = udp_v4_early_demux, ++static const struct net_protocol udp_protocol = { + .handler = udp_rcv, + .err_handler = udp_err, + .no_policy = 1, +@@ -2002,6 +1997,10 @@ static int __init inet_init(void) ip_init(); @@ -384757,7 +496197,7 @@ index 1d816a5fd3eb9..e4b2ced66261b 100644 /* Setup TCP slab cache for open requests. */ tcp_init(); -@@ -2032,12 +2041,6 @@ static int __init inet_init(void) +@@ -2032,12 +2031,6 @@ static int __init inet_init(void) if (init_inet_pernet_ops()) pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__); @@ -384958,7 +496398,7 @@ index 851f542928a33..d747166bb291c 100644 esp.tfclen = padto - skb->len; } diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c -index 8e4e9aa12130d..dad5d29a6a8db 100644 +index 8e4e9aa12130d..2ddba1e2cf228 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -159,6 +159,9 @@ static struct sk_buff *xfrm4_beet_gso_segment(struct xfrm_state *x, @@ -384971,8 +496411,18 @@ index 8e4e9aa12130d..dad5d29a6a8db 100644 __skb_pull(skb, skb_transport_offset(skb)); ops = rcu_dereference(inet_offloads[proto]); if (likely(ops && ops->callbacks.gso_segment)) +@@ -308,6 +311,9 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_ + xo->seq.low += skb_shinfo(skb)->gso_segs; + } + ++ if (xo->seq.low < seq) ++ xo->seq.hi++; ++ + esp.seqno = cpu_to_be64(seq + ((u64)xo->seq.hi << 32)); + + ip_hdr(skb)->tot_len = htons(skb->len); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c -index 9fe13e4f5d08a..1452bb72b7d9c 100644 +index 9fe13e4f5d08a..75c88d4863276 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -389,7 +389,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, @@ -384993,7 +496443,17 @@ index 9fe13e4f5d08a..1452bb72b7d9c 100644 } return ret; -@@ -1112,9 +1112,11 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) +@@ -830,6 +830,9 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, + return -EINVAL; + } + ++ if (!cfg->fc_table) ++ cfg->fc_table = RT_TABLE_MAIN; ++ + return 0; + errout: + return err; +@@ -1112,9 +1115,11 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) return; /* Add broadcast address, if it is explicitly assigned. */ @@ -385006,7 +496466,7 @@ index 9fe13e4f5d08a..1452bb72b7d9c 100644 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) && (prefix != addr || ifa->ifa_prefixlen < 32)) { -@@ -1128,6 +1130,7 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) +@@ -1128,6 +1133,7 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) if (ifa->ifa_prefixlen < 31) { fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask, 32, prim, 0); @@ -385014,7 +496474,7 @@ index 9fe13e4f5d08a..1452bb72b7d9c 100644 } } } -@@ -1582,7 +1585,7 @@ static int __net_init fib_net_init(struct net *net) +@@ -1582,7 +1588,7 @@ static int __net_init fib_net_init(struct net *net) int error; #ifdef CONFIG_IP_ROUTE_CLASSID @@ -385072,18 +496532,19 @@ index ce54a30c2ef1e..d279cb8ac1584 100644 net->ipv4.fib_has_custom_rules = true; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c -index 3364cb9c67e01..55de6fa83dea2 100644 +index 3364cb9c67e01..607a4f8161555 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c -@@ -29,6 +29,7 @@ +@@ -29,6 +29,8 @@ #include <linux/init.h> #include <linux/slab.h> #include <linux/netlink.h> +#include <linux/hash.h> ++#include <linux/nospec.h> #include <net/arp.h> #include <net/ip.h> -@@ -220,7 +221,7 @@ void fib_nh_release(struct net *net, struct fib_nh *fib_nh) +@@ -220,7 +222,7 @@ void fib_nh_release(struct net *net, struct fib_nh *fib_nh) { #ifdef CONFIG_IP_ROUTE_CLASSID if (fib_nh->nh_tclassid) @@ -385092,7 +496553,7 @@ index 3364cb9c67e01..55de6fa83dea2 100644 #endif fib_nh_common_release(&fib_nh->nh_common); } -@@ -249,7 +250,6 @@ void free_fib_info(struct fib_info *fi) +@@ -249,7 +251,6 @@ void free_fib_info(struct fib_info *fi) pr_warn("Freeing alive fib_info %p\n", fi); return; } @@ -385100,7 +496561,7 @@ index 3364cb9c67e01..55de6fa83dea2 100644 call_rcu(&fi->rcu, free_fib_info_rcu); } -@@ -260,6 +260,10 @@ void fib_release_info(struct fib_info *fi) +@@ -260,6 +261,10 @@ void fib_release_info(struct fib_info *fi) spin_lock_bh(&fib_info_lock); if (fi && refcount_dec_and_test(&fi->fib_treeref)) { hlist_del(&fi->fib_hash); @@ -385111,7 +496572,7 @@ index 3364cb9c67e01..55de6fa83dea2 100644 if (fi->fib_prefsrc) hlist_del(&fi->fib_lhash); if (fi->nh) { -@@ -316,11 +320,15 @@ static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi) +@@ -316,11 +321,15 @@ static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi) static inline unsigned int fib_devindex_hashfn(unsigned int val) { @@ -385131,7 +496592,15 @@ index 3364cb9c67e01..55de6fa83dea2 100644 } static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope, -@@ -430,12 +438,11 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev) +@@ -413,6 +422,7 @@ static struct fib_info *fib_find_info(struct fib_info *nfi) + nfi->fib_prefsrc == fi->fib_prefsrc && + nfi->fib_priority == fi->fib_priority && + nfi->fib_type == fi->fib_type && ++ nfi->fib_tb_id == fi->fib_tb_id && + memcmp(nfi->fib_metrics, fi->fib_metrics, + sizeof(u32) * RTAX_MAX) == 0 && + !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) && +@@ -430,12 +440,11 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev) { struct hlist_head *head; struct fib_nh *nh; @@ -385146,7 +496615,7 @@ index 3364cb9c67e01..55de6fa83dea2 100644 hlist_for_each_entry(nh, head, nh_hash) { if (nh->fib_nh_dev == dev && nh->fib_nh_gw4 == gw && -@@ -517,9 +524,9 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, +@@ -517,9 +526,9 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, fri.dst_len = dst_len; fri.tos = fa->fa_tos; fri.type = fa->fa_type; @@ -385159,7 +496628,7 @@ index 3364cb9c67e01..55de6fa83dea2 100644 err = fib_dump_info(skb, info->portid, seq, event, &fri, nlm_flags); if (err < 0) { /* -EMSGSIZE implies BUG in fib_nlmsg_size() */ -@@ -632,7 +639,7 @@ int fib_nh_init(struct net *net, struct fib_nh *nh, +@@ -632,7 +641,7 @@ int fib_nh_init(struct net *net, struct fib_nh *nh, #ifdef CONFIG_IP_ROUTE_CLASSID nh->nh_tclassid = cfg->fc_flow; if (nh->nh_tclassid) @@ -385168,7 +496637,7 @@ index 3364cb9c67e01..55de6fa83dea2 100644 #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH nh->fib_nh_weight = nh_weight; -@@ -662,6 +669,19 @@ static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining, +@@ -662,6 +671,19 @@ static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining, return nhs; } @@ -385188,7 +496657,7 @@ index 3364cb9c67e01..55de6fa83dea2 100644 /* only called when fib_nh is integrated into fib_info */ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, int remaining, struct fib_config *cfg, -@@ -704,7 +724,11 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, +@@ -704,7 +726,11 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, return -EINVAL; } if (nla) { @@ -385201,7 +496670,7 @@ index 3364cb9c67e01..55de6fa83dea2 100644 if (fib_cfg.fc_gw4) fib_cfg.fc_gw_family = AF_INET; } else if (nlav) { -@@ -714,10 +738,18 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, +@@ -714,10 +740,18 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, } nla = nla_find(attrs, attrlen, RTA_FLOW); @@ -385221,22 +496690,25 @@ index 3364cb9c67e01..55de6fa83dea2 100644 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); if (nla) fib_cfg.fc_encap_type = nla_get_u16(nla); -@@ -855,8 +887,13 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, +@@ -854,9 +888,16 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, + return 1; } ++ if (fi->nh) { ++ if (cfg->fc_oif || cfg->fc_gw_family || cfg->fc_mp) ++ return 1; ++ return 0; ++ } ++ if (cfg->fc_oif || cfg->fc_gw_family) { - struct fib_nh *nh = fib_info_nh(fi, 0); + struct fib_nh *nh; -+ -+ /* cannot match on nexthop object attributes */ -+ if (fi->nh) -+ return 1; + nh = fib_info_nh(fi, 0); if (cfg->fc_encap) { if (fib_encap_match(net, cfg->fc_encap_type, cfg->fc_encap, nh, cfg, extack)) -@@ -902,6 +939,7 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, +@@ -902,6 +943,7 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, attrlen = rtnh_attrlen(rtnh); if (attrlen > 0) { struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh); @@ -385244,7 +496716,7 @@ index 3364cb9c67e01..55de6fa83dea2 100644 nla = nla_find(attrs, attrlen, RTA_GATEWAY); nlav = nla_find(attrs, attrlen, RTA_VIA); -@@ -912,12 +950,17 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, +@@ -912,12 +954,17 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, } if (nla) { @@ -385264,7 +496736,7 @@ index 3364cb9c67e01..55de6fa83dea2 100644 err = fib_gw_from_via(&cfg2, nlav, extack); if (err) -@@ -940,8 +983,14 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, +@@ -940,8 +987,14 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, #ifdef CONFIG_IP_ROUTE_CLASSID nla = nla_find(attrs, attrlen, RTA_FLOW); @@ -385281,7 +496753,15 @@ index 3364cb9c67e01..55de6fa83dea2 100644 #endif } -@@ -1179,7 +1228,7 @@ static int fib_check_nh_nongw(struct net *net, struct fib_nh *nh, +@@ -968,6 +1021,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi) + if (type > RTAX_MAX) + return false; + ++ type = array_index_nospec(type, RTAX_MAX + 1); + if (type == RTAX_CC_ALGO) { + char tmp[TCP_CA_NAME_MAX]; + bool ecn_ca = false; +@@ -1179,7 +1233,7 @@ static int fib_check_nh_nongw(struct net *net, struct fib_nh *nh, nh->fib_nh_dev = in_dev->dev; dev_hold(nh->fib_nh_dev); @@ -385290,7 +496770,7 @@ index 3364cb9c67e01..55de6fa83dea2 100644 if (!netif_carrier_ok(nh->fib_nh_dev)) nh->fib_nh_flags |= RTNH_F_LINKDOWN; err = 0; -@@ -1393,7 +1442,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg, +@@ -1393,7 +1447,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg, #endif err = -ENOBUFS; @@ -385301,7 +496781,7 @@ index 3364cb9c67e01..55de6fa83dea2 100644 unsigned int new_size = fib_info_hash_size << 1; struct hlist_head *new_info_hash; struct hlist_head *new_laddrhash; -@@ -1425,7 +1476,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg, +@@ -1425,7 +1481,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg, return ERR_PTR(err); } @@ -385309,7 +496789,7 @@ index 3364cb9c67e01..55de6fa83dea2 100644 fi->fib_net = net; fi->fib_protocol = cfg->fc_protocol; fi->fib_scope = cfg->fc_scope; -@@ -1552,6 +1602,7 @@ link_it: +@@ -1552,6 +1607,7 @@ link_it: refcount_set(&fi->fib_treeref, 1); refcount_set(&fi->fib_clntref, 1); spin_lock_bh(&fib_info_lock); @@ -385317,7 +496797,7 @@ index 3364cb9c67e01..55de6fa83dea2 100644 hlist_add_head(&fi->fib_hash, &fib_info_hash[fib_info_hashfn(fi)]); if (fi->fib_prefsrc) { -@@ -1565,12 +1616,10 @@ link_it: +@@ -1565,12 +1621,10 @@ link_it: } else { change_nexthops(fi) { struct hlist_head *head; @@ -385331,7 +496811,7 @@ index 3364cb9c67e01..55de6fa83dea2 100644 hlist_add_head(&nexthop_nh->nh_hash, head); } endfor_nexthops(fi) } -@@ -1780,7 +1829,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, +@@ -1780,7 +1834,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, goto nla_put_failure; if (nexthop_is_blackhole(fi->nh)) rtm->rtm_type = RTN_BLACKHOLE; @@ -385340,7 +496820,7 @@ index 3364cb9c67e01..55de6fa83dea2 100644 goto offload; } -@@ -1922,8 +1971,7 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig) +@@ -1922,8 +1976,7 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig) void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) { @@ -385350,7 +496830,7 @@ index 3364cb9c67e01..55de6fa83dea2 100644 struct fib_nh *nh; hlist_for_each_entry(nh, head, nh_hash) { -@@ -1942,12 +1990,11 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) +@@ -1942,12 +1995,11 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) */ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) { @@ -385366,7 +496846,7 @@ index 3364cb9c67e01..55de6fa83dea2 100644 if (force) scope = -1; -@@ -2092,7 +2139,6 @@ out: +@@ -2092,7 +2144,6 @@ out: int fib_sync_up(struct net_device *dev, unsigned char nh_flags) { struct fib_info *prev_fi; @@ -385374,7 +496854,7 @@ index 3364cb9c67e01..55de6fa83dea2 100644 struct hlist_head *head; struct fib_nh *nh; int ret; -@@ -2108,8 +2154,7 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags) +@@ -2108,8 +2159,7 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags) } prev_fi = NULL; @@ -385384,7 +496864,7 @@ index 3364cb9c67e01..55de6fa83dea2 100644 ret = 0; hlist_for_each_entry(nh, head, nh_hash) { -@@ -2188,7 +2233,7 @@ void fib_select_multipath(struct fib_result *res, int hash) +@@ -2188,7 +2238,7 @@ void fib_select_multipath(struct fib_result *res, int hash) } change_nexthops(fi) { @@ -385394,7 +496874,7 @@ index 3364cb9c67e01..55de6fa83dea2 100644 continue; if (!first) { diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c -index 8060524f42566..19c6e7b93d3d8 100644 +index 8060524f42566..52f9f69f57b32 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -497,7 +497,7 @@ static void tnode_free(struct key_vector *tn) @@ -385450,7 +496930,19 @@ index 8060524f42566..19c6e7b93d3d8 100644 goto out; skb = nlmsg_new(fib_nlmsg_size(fa_match->fa_info), GFP_ATOMIC); -@@ -2297,9 +2304,9 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, +@@ -1368,8 +1375,10 @@ int fib_table_insert(struct net *net, struct fib_table *tb, + + /* The alias was already inserted, so the node must exist. */ + l = l ? l : fib_find_node(t, &tp, key); +- if (WARN_ON_ONCE(!l)) ++ if (WARN_ON_ONCE(!l)) { ++ err = -ENOENT; + goto out_free_new_fa; ++ } + + if (fib_find_alias(&l->leaf, new_fa->fa_slen, 0, 0, tb->tb_id, true) == + new_fa) { +@@ -2297,9 +2306,9 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, fri.dst_len = KEYLENGTH - fa->fa_slen; fri.tos = fa->fa_tos; fri.type = fa->fa_type; @@ -385758,10 +497250,29 @@ index d2e2b3d18c668..e07d10b2c4868 100644 err = 0; done: diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c -index f25d02ad4a8af..a53f9bf7886f0 100644 +index f25d02ad4a8af..29ec42c1f5d09 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c -@@ -259,7 +259,7 @@ next_port: +@@ -155,10 +155,14 @@ static int inet_csk_bind_conflict(const struct sock *sk, + */ + + sk_for_each_bound(sk2, &tb->owners) { +- if (sk != sk2 && +- (!sk->sk_bound_dev_if || +- !sk2->sk_bound_dev_if || +- sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { ++ int bound_dev_if2; ++ ++ if (sk == sk2) ++ continue; ++ bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if); ++ if ((!sk->sk_bound_dev_if || ++ !bound_dev_if2 || ++ sk->sk_bound_dev_if == bound_dev_if2)) { + if (reuse && sk2->sk_reuse && + sk2->sk_state != TCP_LISTEN) { + if ((!relax || +@@ -259,7 +263,7 @@ next_port: goto other_half_scan; } @@ -385770,7 +497281,7 @@ index f25d02ad4a8af..a53f9bf7886f0 100644 /* We still have a chance to connect to different destinations */ relax = true; goto ports_exhausted; -@@ -721,7 +721,7 @@ static struct request_sock *inet_reqsk_clone(struct request_sock *req, +@@ -721,7 +725,7 @@ static struct request_sock *inet_reqsk_clone(struct request_sock *req, sk_node_init(&nreq_sk->sk_node); nreq_sk->sk_tx_queue_mapping = req_sk->sk_tx_queue_mapping; @@ -385779,7 +497290,7 @@ index f25d02ad4a8af..a53f9bf7886f0 100644 nreq_sk->sk_rx_queue_mapping = req_sk->sk_rx_queue_mapping; #endif nreq_sk->sk_incoming_cpu = req_sk->sk_incoming_cpu; -@@ -829,7 +829,8 @@ static void reqsk_timer_handler(struct timer_list *t) +@@ -829,7 +833,8 @@ static void reqsk_timer_handler(struct timer_list *t) icsk = inet_csk(sk_listener); net = sock_net(sk_listener); @@ -385789,7 +497300,7 @@ index f25d02ad4a8af..a53f9bf7886f0 100644 /* Normally all the openreqs are young and become mature * (i.e. converted to established socket) for first timeout. * If synack was not acknowledged for 1 second, it means -@@ -1015,7 +1016,7 @@ void inet_csk_destroy_sock(struct sock *sk) +@@ -1015,7 +1020,7 @@ void inet_csk_destroy_sock(struct sock *sk) sk_refcnt_debug_release(sk); @@ -385798,7 +497309,42 @@ index f25d02ad4a8af..a53f9bf7886f0 100644 sock_put(sk); } -@@ -1074,7 +1075,7 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req, +@@ -1035,11 +1040,25 @@ void inet_csk_prepare_forced_close(struct sock *sk) + } + EXPORT_SYMBOL(inet_csk_prepare_forced_close); + ++static int inet_ulp_can_listen(const struct sock *sk) ++{ ++ const struct inet_connection_sock *icsk = inet_csk(sk); ++ ++ if (icsk->icsk_ulp_ops && !icsk->icsk_ulp_ops->clone) ++ return -EINVAL; ++ ++ return 0; ++} ++ + int inet_csk_listen_start(struct sock *sk, int backlog) + { + struct inet_connection_sock *icsk = inet_csk(sk); + struct inet_sock *inet = inet_sk(sk); +- int err = -EADDRINUSE; ++ int err; ++ ++ err = inet_ulp_can_listen(sk); ++ if (unlikely(err)) ++ return err; + + reqsk_queue_alloc(&icsk->icsk_accept_queue); + +@@ -1051,6 +1070,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog) + * It is OK, because this socket enters to hash table only + * after validation is complete. + */ ++ err = -EADDRINUSE; + inet_sk_state_store(sk, TCP_LISTEN); + if (!sk->sk_prot->get_port(sk, inet->inet_num)) { + inet->inet_sport = htons(inet->inet_num); +@@ -1074,7 +1094,7 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req, sock_orphan(child); @@ -385859,7 +497405,7 @@ index 05cd198d7a6ba..341096807100c 100644 rcu_read_lock(); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c -index bfb522e513461..ce6a3873f89e3 100644 +index bfb522e513461..410b6b7998caf 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -410,13 +410,11 @@ begin: @@ -385917,7 +497463,38 @@ index bfb522e513461..ce6a3873f89e3 100644 &sk->sk_v6_daddr, &sk->sk_v6_rcv_saddr, ports, dif, sdif))) { -@@ -598,7 +593,7 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk) +@@ -576,8 +571,20 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) + spin_lock(lock); + if (osk) { + WARN_ON_ONCE(sk->sk_hash != osk->sk_hash); +- ret = sk_nulls_del_node_init_rcu(osk); +- } else if (found_dup_sk) { ++ ret = sk_hashed(osk); ++ if (ret) { ++ /* Before deleting the node, we insert a new one to make ++ * sure that the look-up-sk process would not miss either ++ * of them and that at least one node would exist in ehash ++ * table all the time. Otherwise there's a tiny chance ++ * that lookup process could find nothing in ehash table. ++ */ ++ __sk_nulls_add_node_tail_rcu(sk, list); ++ sk_nulls_del_node_init_rcu(osk); ++ } ++ goto unlock; ++ } ++ if (found_dup_sk) { + *found_dup_sk = inet_ehash_lookup_by_sk(sk, list); + if (*found_dup_sk) + ret = false; +@@ -586,6 +593,7 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) + if (ret) + __sk_nulls_add_node_rcu(sk, list); + ++unlock: + spin_unlock(lock); + + return ret; +@@ -598,7 +606,7 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk) if (ok) { sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); } else { @@ -385926,7 +497503,7 @@ index bfb522e513461..ce6a3873f89e3 100644 inet_sk_set_state(sk, TCP_CLOSE); sock_set_flag(sk, SOCK_DEAD); inet_csk_destroy_sock(sk); -@@ -637,7 +632,9 @@ int __inet_hash(struct sock *sk, struct sock *osk) +@@ -637,7 +645,9 @@ int __inet_hash(struct sock *sk, struct sock *osk) int err = 0; if (sk->sk_state != TCP_LISTEN) { @@ -385936,7 +497513,7 @@ index bfb522e513461..ce6a3873f89e3 100644 return 0; } WARN_ON(!sk_unhashed(sk)); -@@ -669,45 +666,54 @@ int inet_hash(struct sock *sk) +@@ -669,45 +679,54 @@ int inet_hash(struct sock *sk) { int err = 0; @@ -386012,22 +497589,22 @@ index bfb522e513461..ce6a3873f89e3 100644 } EXPORT_SYMBOL_GPL(inet_unhash); -@@ -715,15 +721,17 @@ EXPORT_SYMBOL_GPL(inet_unhash); +@@ -715,15 +734,17 @@ EXPORT_SYMBOL_GPL(inet_unhash); * Note that we use 32bit integers (vs RFC 'short integers') * because 2^16 is not a multiple of num_ephemeral and this * property might be used by clever attacker. - * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, - * we use 256 instead to really give more isolation and - * privacy, this only consumes 1 KB of kernel memory. ++ * + * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, though -+ * attacks were since demonstrated, thus we use 65536 instead to really -+ * give more isolation and privacy, at the expense of 256kB of kernel -+ * memory. ++ * attacks were since demonstrated, thus we use 65536 by default instead ++ * to really give more isolation and privacy, at the expense of 256kB ++ * of kernel memory. */ -#define INET_TABLE_PERTURB_SHIFT 8 -static u32 table_perturb[1 << INET_TABLE_PERTURB_SHIFT]; -+#define INET_TABLE_PERTURB_SHIFT 16 -+#define INET_TABLE_PERTURB_SIZE (1 << INET_TABLE_PERTURB_SHIFT) ++#define INET_TABLE_PERTURB_SIZE (1 << CONFIG_INET_TABLE_PERTURB_ORDER) +static u32 *table_perturb; int __inet_hash_connect(struct inet_timewait_death_row *death_row, @@ -386036,7 +497613,7 @@ index bfb522e513461..ce6a3873f89e3 100644 int (*check_established)(struct inet_timewait_death_row *, struct sock *, __u16, struct inet_timewait_sock **)) { -@@ -763,10 +771,13 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, +@@ -763,10 +784,13 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, if (likely(remaining > 1)) remaining &= ~1U; @@ -386053,7 +497630,7 @@ index bfb522e513461..ce6a3873f89e3 100644 /* In first pass we try ports of @low parity. * inet_csk_get_port() does the opposite choice. */ -@@ -820,11 +831,12 @@ next_port: +@@ -820,11 +844,12 @@ next_port: return -EADDRNOTAVAIL; ok: @@ -386070,7 +497647,7 @@ index bfb522e513461..ce6a3873f89e3 100644 WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2); /* Head lock still held and bh's disabled */ -@@ -848,7 +860,7 @@ ok: +@@ -848,7 +873,7 @@ ok: int inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { @@ -386079,7 +497656,7 @@ index bfb522e513461..ce6a3873f89e3 100644 if (!inet_sk(sk)->inet_num) port_offset = inet_sk_port_offset(sk); -@@ -898,6 +910,14 @@ void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, +@@ -898,6 +923,14 @@ void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, low_limit, high_limit); init_hashinfo_lhash2(h); @@ -386094,6 +497671,33 @@ index bfb522e513461..ce6a3873f89e3 100644 } int inet_hashinfo2_init_mod(struct inet_hashinfo *h) +diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c +index 437afe392e667..fe6340c363b43 100644 +--- a/net/ipv4/inet_timewait_sock.c ++++ b/net/ipv4/inet_timewait_sock.c +@@ -81,10 +81,10 @@ void inet_twsk_put(struct inet_timewait_sock *tw) + } + EXPORT_SYMBOL_GPL(inet_twsk_put); + +-static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw, +- struct hlist_nulls_head *list) ++static void inet_twsk_add_node_tail_rcu(struct inet_timewait_sock *tw, ++ struct hlist_nulls_head *list) + { +- hlist_nulls_add_head_rcu(&tw->tw_node, list); ++ hlist_nulls_add_tail_rcu(&tw->tw_node, list); + } + + static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, +@@ -120,7 +120,7 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, + + spin_lock(lock); + +- inet_twsk_add_node_rcu(tw, &ehead->chain); ++ inet_twsk_add_node_tail_rcu(tw, &ehead->chain); + + /* Step 3: Remove SK from hash chain */ + if (__sk_nulls_del_node_init_rcu(sk)) diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index da21dfce24d73..e9fed83e9b3cc 100644 --- a/net/ipv4/inetpeer.c @@ -386151,7 +497755,7 @@ index cfeb8890f94ee..fad803d2d711e 100644 spin_lock(&qp->q.lock); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c -index 0fe6c936dc54a..fc74a3e3b3e12 100644 +index 0fe6c936dc54a..454c4357a2979 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -459,14 +459,12 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, @@ -386258,14 +497862,93 @@ index 0fe6c936dc54a..fc74a3e3b3e12 100644 } else { if (skb_cow_head(skb, dev->needed_headroom)) goto free_skb; +@@ -1495,24 +1498,6 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) + struct ip_tunnel_parm *p = &t->parms; + __be16 o_flags = p->o_flags; + +- if (t->erspan_ver <= 2) { +- if (t->erspan_ver != 0 && !t->collect_md) +- o_flags |= TUNNEL_KEY; +- +- if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver)) +- goto nla_put_failure; +- +- if (t->erspan_ver == 1) { +- if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index)) +- goto nla_put_failure; +- } else if (t->erspan_ver == 2) { +- if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir)) +- goto nla_put_failure; +- if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid)) +- goto nla_put_failure; +- } +- } +- + if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || + nla_put_be16(skb, IFLA_GRE_IFLAGS, + gre_tnl_flags_to_gre_flags(p->i_flags)) || +@@ -1553,6 +1538,34 @@ nla_put_failure: + return -EMSGSIZE; + } + ++static int erspan_fill_info(struct sk_buff *skb, const struct net_device *dev) ++{ ++ struct ip_tunnel *t = netdev_priv(dev); ++ ++ if (t->erspan_ver <= 2) { ++ if (t->erspan_ver != 0 && !t->collect_md) ++ t->parms.o_flags |= TUNNEL_KEY; ++ ++ if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver)) ++ goto nla_put_failure; ++ ++ if (t->erspan_ver == 1) { ++ if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index)) ++ goto nla_put_failure; ++ } else if (t->erspan_ver == 2) { ++ if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir)) ++ goto nla_put_failure; ++ if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid)) ++ goto nla_put_failure; ++ } ++ } ++ ++ return ipgre_fill_info(skb, dev); ++ ++nla_put_failure: ++ return -EMSGSIZE; ++} ++ + static void erspan_setup(struct net_device *dev) + { + struct ip_tunnel *t = netdev_priv(dev); +@@ -1631,7 +1644,7 @@ static struct rtnl_link_ops erspan_link_ops __read_mostly = { + .changelink = erspan_changelink, + .dellink = ip_tunnel_dellink, + .get_size = ipgre_get_size, +- .fill_info = ipgre_fill_info, ++ .fill_info = erspan_fill_info, + .get_link_net = ip_tunnel_get_link_net, + }; + diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c -index 3a025c0119718..d5222c0fa87cb 100644 +index 3a025c0119718..124bf8fdf924a 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c -@@ -318,8 +318,10 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk, +@@ -310,16 +310,17 @@ static bool ip_can_use_hint(const struct sk_buff *skb, const struct iphdr *iph, + ip_hdr(hint)->tos == iph->tos; + } + +-INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *)); +-INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *)); ++int tcp_v4_early_demux(struct sk_buff *skb); ++int udp_v4_early_demux(struct sk_buff *skb); + static int ip_rcv_finish_core(struct net *net, struct sock *sk, + struct sk_buff *skb, struct net_device *dev, + const struct sk_buff *hint) { const struct iphdr *iph = ip_hdr(skb); - int (*edemux)(struct sk_buff *skb); +- int (*edemux)(struct sk_buff *skb); + int err, drop_reason; struct rtable *rt; - int err; @@ -386274,7 +497957,61 @@ index 3a025c0119718..d5222c0fa87cb 100644 if (ip_can_use_hint(skb, iph, hint)) { err = ip_route_use_hint(skb, iph->daddr, iph->saddr, iph->tos, -@@ -396,19 +398,23 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk, +@@ -328,21 +329,29 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk, + goto drop_error; + } + +- if (net->ipv4.sysctl_ip_early_demux && ++ if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) && + !skb_dst(skb) && + !skb->sk && + !ip_is_fragment(iph)) { +- const struct net_protocol *ipprot; +- int protocol = iph->protocol; +- +- ipprot = rcu_dereference(inet_protos[protocol]); +- if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) { +- err = INDIRECT_CALL_2(edemux, tcp_v4_early_demux, +- udp_v4_early_demux, skb); +- if (unlikely(err)) +- goto drop_error; +- /* must reload iph, skb->head might have changed */ +- iph = ip_hdr(skb); ++ switch (iph->protocol) { ++ case IPPROTO_TCP: ++ if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux)) { ++ tcp_v4_early_demux(skb); ++ ++ /* must reload iph, skb->head might have changed */ ++ iph = ip_hdr(skb); ++ } ++ break; ++ case IPPROTO_UDP: ++ if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) { ++ err = udp_v4_early_demux(skb); ++ if (unlikely(err)) ++ goto drop_error; ++ ++ /* must reload iph, skb->head might have changed */ ++ iph = ip_hdr(skb); ++ } ++ break; + } + } + +@@ -355,6 +364,11 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk, + iph->tos, dev); + if (unlikely(err)) + goto drop_error; ++ } else { ++ struct in_device *in_dev = __in_dev_get_rcu(dev); ++ ++ if (in_dev && IN_DEV_ORCONF(in_dev, NOPOLICY)) ++ IPCB(skb)->flags |= IPSKB_NOPOLICY; + } + + #ifdef CONFIG_IP_ROUTE_CLASSID +@@ -396,19 +410,23 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk, * so-called "hole-196" attack) so do it for both. */ if (in_dev && @@ -386301,7 +498038,7 @@ index 3a025c0119718..d5222c0fa87cb 100644 goto drop; } -@@ -436,13 +442,16 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) +@@ -436,13 +454,16 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net) { const struct iphdr *iph; @@ -386319,7 +498056,7 @@ index 3a025c0119718..d5222c0fa87cb 100644 __IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len); -@@ -452,6 +461,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net) +@@ -452,6 +473,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net) goto out; } @@ -386327,7 +498064,7 @@ index 3a025c0119718..d5222c0fa87cb 100644 if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto inhdr_error; -@@ -488,6 +498,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net) +@@ -488,6 +510,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net) len = ntohs(iph->tot_len); if (skb->len < len) { @@ -386335,7 +498072,7 @@ index 3a025c0119718..d5222c0fa87cb 100644 __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } else if (len < (iph->ihl*4)) -@@ -516,11 +527,14 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net) +@@ -516,11 +539,14 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net) return skb; csum_error: @@ -386526,10 +498263,47 @@ index 2dda856ca2602..aea29d97f8dfa 100644 err1: fib_rules_unregister(ops); return err; +diff --git a/net/ipv4/metrics.c b/net/ipv4/metrics.c +index 25ea6ac44db95..6a1427916c7dc 100644 +--- a/net/ipv4/metrics.c ++++ b/net/ipv4/metrics.c +@@ -1,5 +1,6 @@ + // SPDX-License-Identifier: GPL-2.0-only + #include <linux/netlink.h> ++#include <linux/nospec.h> + #include <linux/rtnetlink.h> + #include <linux/types.h> + #include <net/ip.h> +@@ -28,6 +29,7 @@ static int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, + return -EINVAL; + } + ++ type = array_index_nospec(type, RTAX_MAX + 1); + if (type == RTAX_CC_ALGO) { + char tmp[TCP_CA_NAME_MAX]; + diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c -index 8fd1aba8af31c..b518f20c9a244 100644 +index 8fd1aba8af31c..34737b1d6526c 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c +@@ -435,7 +435,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) + + switch (ctinfo) { + case IP_CT_NEW: +- ct->mark = hash; ++ WRITE_ONCE(ct->mark, hash); + break; + case IP_CT_RELATED: + case IP_CT_RELATED_REPLY: +@@ -452,7 +452,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) + #ifdef DEBUG + nf_ct_dump_tuple_ip(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + #endif +- pr_debug("hash=%u ct_hash=%u ", hash, ct->mark); ++ pr_debug("hash=%u ct_hash=%u ", hash, READ_ONCE(ct->mark)); + if (!clusterip_responsible(cipinfo->config, hash)) { + pr_debug("not responsible\n"); + return NF_DROP; @@ -520,8 +520,11 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) if (IS_ERR(config)) return PTR_ERR(config); @@ -386580,7 +498354,7 @@ index 03df986217b7b..9e6f0f1275e2c 100644 nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { nft_fib_store_result(dest, priv, nft_in(pkt)); diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c -index 9e8100728d464..cc8f120149f6b 100644 +index 9e8100728d464..6cc7d347ec0ad 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1857,7 +1857,7 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh) @@ -386649,6 +498423,15 @@ index 9e8100728d464..cc8f120149f6b 100644 nexthop_replace_notify(net, new_nh, &cfg->nlinfo); } +@@ -2513,7 +2535,7 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh, + if (!err) { + nh->nh_flags = fib_nh->fib_nh_flags; + fib_info_update_nhc_saddr(net, &fib_nh->nh_common, +- fib_nh->fib_nh_scope); ++ !fib_nh->fib_nh_scope ? 0 : fib_nh->fib_nh_scope - 1); + } else { + fib_nh_release(net, fib_nh); + } @@ -2544,11 +2566,15 @@ static int nh_create_ipv6(struct net *net, struct nexthop *nh, /* sets nh_dev if successful */ err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL, @@ -386996,7 +498779,7 @@ index d6899ab5fb39b..ca59b61fd3a31 100644 } } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c -index 33792cf55a793..9408392640250 100644 +index 33792cf55a793..3aab914eb1039 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -249,12 +249,12 @@ bool cookie_timestamp_decode(const struct net *net, @@ -387032,7 +498815,7 @@ index 33792cf55a793..9408392640250 100644 return true; return dst_feature(dst, RTAX_FEATURE_ECN); -@@ -283,6 +283,7 @@ bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt, +@@ -283,22 +283,26 @@ bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt, EXPORT_SYMBOL(cookie_ecn_ok); struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, @@ -387040,7 +498823,19 @@ index 33792cf55a793..9408392640250 100644 struct sock *sk, struct sk_buff *skb) { -@@ -299,6 +300,10 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, + struct tcp_request_sock *treq; + struct request_sock *req; + +-#ifdef CONFIG_MPTCP + if (sk_is_mptcp(sk)) +- ops = &mptcp_subflow_request_sock_ops; +-#endif ++ req = mptcp_subflow_reqsk_alloc(ops, sk, false); ++ else ++ req = inet_reqsk_alloc(ops, sk, false); + +- req = inet_reqsk_alloc(ops, sk, false); + if (!req) return NULL; treq = tcp_rsk(req); @@ -387051,7 +498846,7 @@ index 33792cf55a793..9408392640250 100644 treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield; #if IS_ENABLED(CONFIG_MPTCP) treq->is_mptcp = sk_is_mptcp(sk); -@@ -337,7 +342,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) +@@ -337,7 +341,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) struct flowi4 fl4; u32 tsoff = 0; @@ -387061,7 +498856,7 @@ index 33792cf55a793..9408392640250 100644 goto out; if (tcp_synq_no_recent_overflow(sk)) -@@ -366,7 +372,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) +@@ -366,7 +371,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) goto out; ret = NULL; @@ -387072,7 +498867,7 @@ index 33792cf55a793..9408392640250 100644 goto out; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c -index 6f1e64d492328..a36728277e321 100644 +index 6f1e64d492328..495c58e442e2a 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -97,7 +97,7 @@ static int ipv4_local_port_range(struct ctl_table *table, int write, @@ -387102,7 +498897,69 @@ index 6f1e64d492328..a36728277e321 100644 } return ret; -@@ -639,6 +639,8 @@ static struct ctl_table ipv4_net_table[] = { +@@ -363,61 +363,6 @@ bad_key: + return ret; + } + +-static void proc_configure_early_demux(int enabled, int protocol) +-{ +- struct net_protocol *ipprot; +-#if IS_ENABLED(CONFIG_IPV6) +- struct inet6_protocol *ip6prot; +-#endif +- +- rcu_read_lock(); +- +- ipprot = rcu_dereference(inet_protos[protocol]); +- if (ipprot) +- ipprot->early_demux = enabled ? ipprot->early_demux_handler : +- NULL; +- +-#if IS_ENABLED(CONFIG_IPV6) +- ip6prot = rcu_dereference(inet6_protos[protocol]); +- if (ip6prot) +- ip6prot->early_demux = enabled ? ip6prot->early_demux_handler : +- NULL; +-#endif +- rcu_read_unlock(); +-} +- +-static int proc_tcp_early_demux(struct ctl_table *table, int write, +- void *buffer, size_t *lenp, loff_t *ppos) +-{ +- int ret = 0; +- +- ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); +- +- if (write && !ret) { +- int enabled = init_net.ipv4.sysctl_tcp_early_demux; +- +- proc_configure_early_demux(enabled, IPPROTO_TCP); +- } +- +- return ret; +-} +- +-static int proc_udp_early_demux(struct ctl_table *table, int write, +- void *buffer, size_t *lenp, loff_t *ppos) +-{ +- int ret = 0; +- +- ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); +- +- if (write && !ret) { +- int enabled = init_net.ipv4.sysctl_udp_early_demux; +- +- proc_configure_early_demux(enabled, IPPROTO_UDP); +- } +- +- return ret; +-} +- + static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table, + int write, void *buffer, + size_t *lenp, loff_t *ppos) +@@ -639,6 +584,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, @@ -387111,7 +498968,7 @@ index 6f1e64d492328..a36728277e321 100644 }, { .procname = "icmp_errors_use_inbound_ifaddr", -@@ -646,6 +648,8 @@ static struct ctl_table ipv4_net_table[] = { +@@ -646,6 +593,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, @@ -387120,7 +498977,7 @@ index 6f1e64d492328..a36728277e321 100644 }, { .procname = "icmp_ratelimit", -@@ -685,6 +689,8 @@ static struct ctl_table ipv4_net_table[] = { +@@ -685,6 +634,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, @@ -387129,7 +498986,7 @@ index 6f1e64d492328..a36728277e321 100644 }, { .procname = "tcp_ecn_fallback", -@@ -692,6 +698,8 @@ static struct ctl_table ipv4_net_table[] = { +@@ -692,6 +643,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, @@ -387138,8 +498995,25 @@ index 6f1e64d492328..a36728277e321 100644 }, { .procname = "ip_dynaddr", +@@ -712,14 +665,14 @@ static struct ctl_table ipv4_net_table[] = { + .data = &init_net.ipv4.sysctl_udp_early_demux, + .maxlen = sizeof(u8), + .mode = 0644, +- .proc_handler = proc_udp_early_demux ++ .proc_handler = proc_dou8vec_minmax, + }, + { + .procname = "tcp_early_demux", + .data = &init_net.ipv4.sysctl_tcp_early_demux, + .maxlen = sizeof(u8), + .mode = 0644, +- .proc_handler = proc_tcp_early_demux ++ .proc_handler = proc_dou8vec_minmax, + }, + { + .procname = "nexthop_compat_mode", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c -index f5c336f8b0c8e..5b4e170b6a346 100644 +index f5c336f8b0c8e..51f34560a9d63 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -287,8 +287,8 @@ enum { @@ -387153,7 +499027,7 @@ index f5c336f8b0c8e..5b4e170b6a346 100644 long sysctl_tcp_mem[3] __read_mostly; EXPORT_SYMBOL(sysctl_tcp_mem); -@@ -435,7 +435,7 @@ void tcp_init_sock(struct sock *sk) +@@ -435,10 +435,11 @@ void tcp_init_sock(struct sock *sk) * algorithms that we must have the following bandaid to talk * efficiently to them. -DaveM */ @@ -387162,7 +499036,11 @@ index f5c336f8b0c8e..5b4e170b6a346 100644 /* There's a bubble in the pipe until at least the first ACK. */ tp->app_limited = ~0U; -@@ -447,7 +447,7 @@ void tcp_init_sock(struct sock *sk) ++ tp->rate_app_limited = 1; + + /* See draft-stevens-tcpca-spec-01 for discussion of the + * initialization of these values. +@@ -447,7 +448,7 @@ void tcp_init_sock(struct sock *sk) tp->snd_cwnd_clamp = ~0; tp->mss_cache = TCP_MSS_DEFAULT; @@ -387171,7 +499049,7 @@ index f5c336f8b0c8e..5b4e170b6a346 100644 tcp_assign_congestion_control(sk); tp->tsoffset = 0; -@@ -458,8 +458,8 @@ void tcp_init_sock(struct sock *sk) +@@ -458,8 +459,8 @@ void tcp_init_sock(struct sock *sk) icsk->icsk_sync_mss = tcp_sync_mss; @@ -387182,7 +499060,7 @@ index f5c336f8b0c8e..5b4e170b6a346 100644 sk_sockets_allocated_inc(sk); sk->sk_route_forced_caps = NETIF_F_GSO; -@@ -644,7 +644,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) +@@ -644,7 +645,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) } EXPORT_SYMBOL(tcp_ioctl); @@ -387191,7 +499069,7 @@ index f5c336f8b0c8e..5b4e170b6a346 100644 { TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; tp->pushed_seq = tp->write_seq; -@@ -655,7 +655,7 @@ static inline bool forced_push(const struct tcp_sock *tp) +@@ -655,7 +656,7 @@ static inline bool forced_push(const struct tcp_sock *tp) return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1)); } @@ -387200,7 +499078,7 @@ index f5c336f8b0c8e..5b4e170b6a346 100644 { struct tcp_sock *tp = tcp_sk(sk); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); -@@ -694,7 +694,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, +@@ -694,7 +695,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, int size_goal) { return skb->len < size_goal && @@ -387209,7 +499087,7 @@ index f5c336f8b0c8e..5b4e170b6a346 100644 !tcp_rtx_queue_empty(sk) && refcount_read(&sk->sk_wmem_alloc) > skb->truesize; } -@@ -952,7 +952,7 @@ int tcp_send_mss(struct sock *sk, int *size_goal, int flags) +@@ -952,7 +953,7 @@ int tcp_send_mss(struct sock *sk, int *size_goal, int flags) */ void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb) { @@ -387218,7 +499096,7 @@ index f5c336f8b0c8e..5b4e170b6a346 100644 tcp_unlink_write_queue(skb, sk); if (tcp_write_queue_empty(sk)) tcp_chrono_stop(sk, TCP_CHRONO_BUSY); -@@ -982,7 +982,7 @@ new_segment: +@@ -982,7 +983,7 @@ new_segment: #ifdef CONFIG_TLS_DEVICE skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED); #endif @@ -387227,7 +499105,7 @@ index f5c336f8b0c8e..5b4e170b6a346 100644 copy = size_goal; } -@@ -991,7 +991,7 @@ new_segment: +@@ -991,7 +992,7 @@ new_segment: i = skb_shinfo(skb)->nr_frags; can_coalesce = skb_can_coalesce(skb, i, page, offset); @@ -387236,7 +499114,7 @@ index f5c336f8b0c8e..5b4e170b6a346 100644 tcp_mark_push(tp, skb); goto new_segment; } -@@ -1002,7 +1002,7 @@ new_segment: +@@ -1002,7 +1003,7 @@ new_segment: skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); } else { get_page(page); @@ -387245,7 +499123,7 @@ index f5c336f8b0c8e..5b4e170b6a346 100644 } if (!(flags & MSG_NO_SHARED_FRAGS)) -@@ -1159,7 +1159,8 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, +@@ -1159,7 +1160,8 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, struct sockaddr *uaddr = msg->msg_name; int err, flags; @@ -387255,7 +499133,7 @@ index f5c336f8b0c8e..5b4e170b6a346 100644 (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) && uaddr->sa_family == AF_UNSPEC)) return -EOPNOTSUPP; -@@ -1311,7 +1312,7 @@ new_segment: +@@ -1311,7 +1313,7 @@ new_segment: process_backlog++; skb->ip_summed = CHECKSUM_PARTIAL; @@ -387264,7 +499142,7 @@ index f5c336f8b0c8e..5b4e170b6a346 100644 copy = size_goal; /* All packets are restored as if they have -@@ -1343,7 +1344,7 @@ new_segment: +@@ -1343,7 +1345,7 @@ new_segment: if (!skb_can_coalesce(skb, i, pfrag->page, pfrag->offset)) { @@ -387273,7 +499151,7 @@ index f5c336f8b0c8e..5b4e170b6a346 100644 tcp_mark_push(tp, skb); goto new_segment; } -@@ -1663,11 +1664,13 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, +@@ -1663,11 +1665,13 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, if (!copied) copied = used; break; @@ -387291,7 +499169,7 @@ index f5c336f8b0c8e..5b4e170b6a346 100644 /* If recv_actor drops the lock (e.g. TCP splice * receive) the skb pointer might be invalid when * getting here: tcp_collapse might have deleted it -@@ -1719,7 +1722,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val) +@@ -1719,7 +1723,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val) if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) cap = sk->sk_rcvbuf >> 1; else @@ -387300,7 +499178,7 @@ index f5c336f8b0c8e..5b4e170b6a346 100644 val = min(val, cap); WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); -@@ -1776,6 +1779,9 @@ static skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb, +@@ -1776,6 +1780,9 @@ static skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb, { skb_frag_t *frag; @@ -387310,7 +499188,7 @@ index f5c336f8b0c8e..5b4e170b6a346 100644 offset_skb -= skb_headlen(skb); if ((int)offset_skb < 0 || skb_has_frag_list(skb)) return NULL; -@@ -2687,11 +2693,37 @@ void tcp_shutdown(struct sock *sk, int how) +@@ -2687,11 +2694,37 @@ void tcp_shutdown(struct sock *sk, int how) } EXPORT_SYMBOL(tcp_shutdown); @@ -387349,7 +499227,7 @@ index f5c336f8b0c8e..5b4e170b6a346 100644 out_of_socket_memory = tcp_out_of_memory(sk); if (too_many_orphans) -@@ -2800,7 +2832,7 @@ adjudge_to_death: +@@ -2800,7 +2833,7 @@ adjudge_to_death: /* remove backlog if any, without releasing ownership. */ __release_sock(sk); @@ -387358,7 +499236,7 @@ index f5c336f8b0c8e..5b4e170b6a346 100644 /* Have we already been destroyed by a softirq or backlog? */ if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE) -@@ -2991,8 +3023,10 @@ int tcp_disconnect(struct sock *sk, int flags) +@@ -2991,8 +3024,10 @@ int tcp_disconnect(struct sock *sk, int flags) icsk->icsk_rto_min = TCP_RTO_MIN; icsk->icsk_delack_max = TCP_DELACK_MAX; tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; @@ -387370,7 +499248,7 @@ index f5c336f8b0c8e..5b4e170b6a346 100644 tp->window_clamp = 0; tp->delivered = 0; tp->delivered_ce = 0; -@@ -3011,8 +3045,7 @@ int tcp_disconnect(struct sock *sk, int flags) +@@ -3011,8 +3046,7 @@ int tcp_disconnect(struct sock *sk, int flags) icsk->icsk_ack.rcv_mss = TCP_MIN_MSS; memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); @@ -387380,7 +499258,24 @@ index f5c336f8b0c8e..5b4e170b6a346 100644 tcp_saved_syn_free(tp); tp->compressed_ack = 0; tp->segs_in = 0; -@@ -3596,7 +3629,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname, +@@ -3033,6 +3067,7 @@ int tcp_disconnect(struct sock *sk, int flags) + tp->last_oow_ack_time = 0; + /* There's a bubble in the pipe until at least the first ACK. */ + tp->app_limited = ~0U; ++ tp->rate_app_limited = 1; + tp->rack.mstamp = 0; + tp->rack.advanced = 0; + tp->rack.reo_wnd_steps = 1; +@@ -3503,7 +3538,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname, + case TCP_REPAIR_OPTIONS: + if (!tp->repair) + err = -EINVAL; +- else if (sk->sk_state == TCP_ESTABLISHED) ++ else if (sk->sk_state == TCP_ESTABLISHED && !tp->bytes_sent) + err = tcp_repair_options_est(sk, optval, optlen); + else + err = -EPERM; +@@ -3596,7 +3631,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname, case TCP_FASTOPEN_CONNECT: if (val > 1 || val < 0) { err = -EINVAL; @@ -387390,7 +499285,7 @@ index f5c336f8b0c8e..5b4e170b6a346 100644 if (sk->sk_state == TCP_CLOSE) tp->fastopen_connect = val; else -@@ -3703,7 +3737,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) +@@ -3703,7 +3739,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_max_pacing_rate = rate64; info->tcpi_reordering = tp->reordering; @@ -387399,7 +499294,7 @@ index f5c336f8b0c8e..5b4e170b6a346 100644 if (info->tcpi_state == TCP_LISTEN) { /* listeners aliased fields : -@@ -3872,7 +3906,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, +@@ -3872,7 +3908,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, rate64 = tcp_compute_delivery_rate(tp); nla_put_u64_64bit(stats, TCP_NLA_DELIVERY_RATE, rate64, TCP_NLA_PAD); @@ -387408,7 +499303,7 @@ index f5c336f8b0c8e..5b4e170b6a346 100644 nla_put_u32(stats, TCP_NLA_REORDERING, tp->reordering); nla_put_u32(stats, TCP_NLA_MIN_RTT, tcp_min_rtt(tp)); -@@ -3944,12 +3978,13 @@ static int do_tcp_getsockopt(struct sock *sk, int level, +@@ -3944,12 +3980,13 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = keepalive_probes(tp); break; case TCP_SYNCNT: @@ -387424,7 +499319,7 @@ index f5c336f8b0c8e..5b4e170b6a346 100644 break; case TCP_DEFER_ACCEPT: val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept, -@@ -4296,12 +4331,16 @@ static void __tcp_alloc_md5sig_pool(void) +@@ -4296,12 +4333,16 @@ static void __tcp_alloc_md5sig_pool(void) * to memory. See smp_rmb() in tcp_get_md5sig_pool() */ smp_wmb(); @@ -387443,7 +499338,7 @@ index f5c336f8b0c8e..5b4e170b6a346 100644 mutex_lock(&tcp_md5sig_mutex); if (!tcp_md5sig_pool_populated) { -@@ -4312,7 +4351,8 @@ bool tcp_alloc_md5sig_pool(void) +@@ -4312,7 +4353,8 @@ bool tcp_alloc_md5sig_pool(void) mutex_unlock(&tcp_md5sig_mutex); } @@ -387453,7 +499348,7 @@ index f5c336f8b0c8e..5b4e170b6a346 100644 } EXPORT_SYMBOL(tcp_alloc_md5sig_pool); -@@ -4328,7 +4368,8 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) +@@ -4328,7 +4370,8 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) { local_bh_disable(); @@ -387463,7 +499358,7 @@ index f5c336f8b0c8e..5b4e170b6a346 100644 /* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */ smp_rmb(); return this_cpu_ptr(&tcp_md5sig_pool); -@@ -4502,7 +4543,10 @@ void __init tcp_init(void) +@@ -4502,7 +4545,10 @@ void __init tcp_init(void) sizeof_field(struct sk_buff, cb)); percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); @@ -387594,10 +499489,40 @@ index f5f588b1f6e9d..58358bf92e1b8 100644 static void bictcp_state(struct sock *sk, u8 new_state) diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c -index 5f4d6f45d87f7..2c597a4e429ab 100644 +index 5f4d6f45d87f7..7f34c455651db 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c -@@ -138,10 +138,9 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, +@@ -6,6 +6,7 @@ + #include <linux/bpf.h> + #include <linux/init.h> + #include <linux/wait.h> ++#include <linux/util_macros.h> + + #include <net/inet_common.h> + #include <net/tls.h> +@@ -45,8 +46,11 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock, + tmp->sg.end = i; + if (apply) { + apply_bytes -= size; +- if (!apply_bytes) ++ if (!apply_bytes) { ++ if (sge->length) ++ sk_msg_iter_var_prev(i); + break; ++ } + } + } while (i != msg->sg.end); + +@@ -131,17 +135,15 @@ static int tcp_bpf_push_locked(struct sock *sk, struct sk_msg *msg, + return ret; + } + +-int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, +- u32 bytes, int flags) ++int tcp_bpf_sendmsg_redir(struct sock *sk, bool ingress, ++ struct sk_msg *msg, u32 bytes, int flags) + { +- bool ingress = sk_msg_to_ingress(msg); struct sk_psock *psock = sk_psock_get(sk); int ret; @@ -387611,7 +499536,7 @@ index 5f4d6f45d87f7..2c597a4e429ab 100644 ret = ingress ? bpf_tcp_ingress(sk, psock, msg, bytes, flags) : tcp_bpf_push_locked(sk, msg, bytes, flags, false); sk_psock_put(sk, psock); -@@ -172,6 +171,68 @@ static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock, +@@ -172,6 +174,68 @@ static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock, return ret; } @@ -387680,29 +499605,62 @@ index 5f4d6f45d87f7..2c597a4e429ab 100644 static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len) { -@@ -273,7 +334,7 @@ more_data: - cork = true; - psock->cork = NULL; - } -- sk_msg_return(sk, msg, tosend); -+ sk_msg_return(sk, msg, msg->sg.size); +@@ -216,10 +280,10 @@ msg_bytes_ready: + static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, + struct sk_msg *msg, int *copied, int flags) + { +- bool cork = false, enospc = sk_msg_full(msg); ++ bool cork = false, enospc = sk_msg_full(msg), redir_ingress; + struct sock *sk_redir; +- u32 tosend, delta = 0; +- u32 eval = __SK_NONE; ++ u32 tosend, origsize, sent, delta = 0; ++ u32 eval; + int ret; + + more_data: +@@ -250,6 +314,7 @@ more_data: + tosend = msg->sg.size; + if (psock->apply_bytes && psock->apply_bytes < tosend) + tosend = psock->apply_bytes; ++ eval = __SK_NONE; + + switch (psock->eval) { + case __SK_PASS: +@@ -261,6 +326,7 @@ more_data: + sk_msg_apply_bytes(psock, tosend); + break; + case __SK_REDIRECT: ++ redir_ingress = psock->redir_ingress; + sk_redir = psock->sk_redir; + sk_msg_apply_bytes(psock, tosend); + if (!psock->apply_bytes) { +@@ -276,7 +342,10 @@ more_data: + sk_msg_return(sk, msg, tosend); release_sock(sk); - ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags); -@@ -313,8 +374,11 @@ more_data: +- ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags); ++ origsize = msg->sg.size; ++ ret = tcp_bpf_sendmsg_redir(sk_redir, redir_ingress, ++ msg, tosend, flags); ++ sent = origsize - msg->sg.size; + + if (eval == __SK_REDIRECT) + sock_put(sk_redir); +@@ -313,8 +382,11 @@ more_data: } if (msg && msg->sg.data[msg->sg.start].page_link && - msg->sg.data[msg->sg.start].length) + msg->sg.data[msg->sg.start].length) { + if (eval == __SK_REDIRECT) -+ sk_mem_charge(sk, msg->sg.size); ++ sk_mem_charge(sk, tosend - sent); goto more_data; + } } return ret; } -@@ -464,6 +528,8 @@ enum { +@@ -464,6 +536,8 @@ enum { enum { TCP_BPF_BASE, TCP_BPF_TX, @@ -387711,15 +499669,16 @@ index 5f4d6f45d87f7..2c597a4e429ab 100644 TCP_BPF_NUM_CFGS, }; -@@ -475,7 +541,6 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS], +@@ -475,7 +549,7 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS], struct proto *base) { prot[TCP_BPF_BASE] = *base; - prot[TCP_BPF_BASE].unhash = sock_map_unhash; ++ prot[TCP_BPF_BASE].destroy = sock_map_destroy; prot[TCP_BPF_BASE].close = sock_map_close; prot[TCP_BPF_BASE].recvmsg = tcp_bpf_recvmsg; prot[TCP_BPF_BASE].sock_is_readable = sk_msg_is_readable; -@@ -483,6 +548,12 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS], +@@ -483,6 +557,12 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS], prot[TCP_BPF_TX] = prot[TCP_BPF_BASE]; prot[TCP_BPF_TX].sendmsg = tcp_bpf_sendmsg; prot[TCP_BPF_TX].sendpage = tcp_bpf_sendpage; @@ -387732,7 +499691,7 @@ index 5f4d6f45d87f7..2c597a4e429ab 100644 } static void tcp_bpf_check_v6_needs_rebuild(struct proto *ops) -@@ -520,6 +591,10 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) +@@ -520,6 +600,10 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4; int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE; @@ -387743,7 +499702,7 @@ index 5f4d6f45d87f7..2c597a4e429ab 100644 if (restore) { if (inet_csk_has_ulp(sk)) { /* TLS does not have an unhash proto in SW cases, -@@ -537,9 +612,6 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) +@@ -537,9 +621,6 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) return 0; } @@ -387753,8 +499712,20 @@ index 5f4d6f45d87f7..2c597a4e429ab 100644 if (sk->sk_family == AF_INET6) { if (tcp_bpf_assert_proto_ops(psock->sk_proto)) return -EINVAL; +@@ -560,10 +641,9 @@ EXPORT_SYMBOL_GPL(tcp_bpf_update_proto); + */ + void tcp_bpf_clone(const struct sock *sk, struct sock *newsk) + { +- int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4; + struct proto *prot = newsk->sk_prot; + +- if (prot == &tcp_bpf_prots[family][TCP_BPF_BASE]) ++ if (is_insidevar(prot, tcp_bpf_prots)) + newsk->sk_prot = sk->sk_prot_creator; + } + #endif /* CONFIG_BPF_SYSCALL */ diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c -index 709d238018239..ddc7ba0554bdd 100644 +index 709d238018239..112f28f936934 100644 --- a/net/ipv4/tcp_cdg.c +++ b/net/ipv4/tcp_cdg.c @@ -161,8 +161,8 @@ static void tcp_cdg_hystart_update(struct sock *sk) @@ -387836,7 +499807,13 @@ index 709d238018239..ddc7ba0554bdd 100644 break; case CA_EVENT_COMPLETE_CWR: ca->state = CDG_UNKNOWN; -@@ -380,7 +380,7 @@ static void tcp_cdg_init(struct sock *sk) +@@ -375,12 +375,13 @@ static void tcp_cdg_init(struct sock *sk) + struct cdg *ca = inet_csk_ca(sk); + struct tcp_sock *tp = tcp_sk(sk); + ++ ca->gradients = NULL; + /* We silently fall back to window = 1 if allocation fails. */ + if (window > 1) ca->gradients = kcalloc(window, sizeof(ca->gradients[0]), GFP_NOWAIT | __GFP_NOWARN); ca->rtt_seq = tp->snd_nxt; @@ -387845,6 +499822,14 @@ index 709d238018239..ddc7ba0554bdd 100644 } static void tcp_cdg_release(struct sock *sk) +@@ -388,6 +389,7 @@ static void tcp_cdg_release(struct sock *sk) + struct cdg *ca = inet_csk_ca(sk); + + kfree(ca->gradients); ++ ca->gradients = NULL; + } + + static struct tcp_congestion_ops tcp_cdg __read_mostly = { diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index db5831e6c136a..f43db30a7195d 100644 --- a/net/ipv4/tcp_cong.c @@ -388253,7 +500238,7 @@ index 00e54873213e8..c0c81a2c77fae 100644 /* Extract info for Tcp socket info provided via netlink. */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c -index 141e85e6422b1..686e210d89c21 100644 +index 141e85e6422b1..102a0436eb291 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -414,7 +414,7 @@ static void tcp_sndbuf_expand(struct sock *sk) @@ -388431,7 +500416,17 @@ index 141e85e6422b1..686e210d89c21 100644 (new_recovery || icsk->icsk_retransmits) && !inet_csk(sk)->icsk_mtup.probe_size; } -@@ -2448,7 +2455,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) +@@ -2178,7 +2185,8 @@ void tcp_enter_loss(struct sock *sk) + */ + static bool tcp_check_sack_reneging(struct sock *sk, int flag) + { +- if (flag & FLAG_SACK_RENEGING) { ++ if (flag & FLAG_SACK_RENEGING && ++ flag & FLAG_SND_UNA_ADVANCED) { + struct tcp_sock *tp = tcp_sk(sk); + unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4), + msecs_to_jiffies(10)); +@@ -2448,7 +2456,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n", msg, &inet->inet_daddr, ntohs(inet->inet_dport), @@ -388440,7 +500435,7 @@ index 141e85e6422b1..686e210d89c21 100644 tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); } -@@ -2457,7 +2464,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) +@@ -2457,7 +2465,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", msg, &sk->sk_v6_daddr, ntohs(inet->inet_dport), @@ -388449,7 +500444,7 @@ index 141e85e6422b1..686e210d89c21 100644 tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); } -@@ -2482,7 +2489,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss) +@@ -2482,7 +2490,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss) if (tp->prior_ssthresh) { const struct inet_connection_sock *icsk = inet_csk(sk); @@ -388458,7 +500453,7 @@ index 141e85e6422b1..686e210d89c21 100644 if (tp->prior_ssthresh > tp->snd_ssthresh) { tp->snd_ssthresh = tp->prior_ssthresh; -@@ -2499,6 +2506,21 @@ static inline bool tcp_may_undo(const struct tcp_sock *tp) +@@ -2499,6 +2507,21 @@ static inline bool tcp_may_undo(const struct tcp_sock *tp) return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp)); } @@ -388480,7 +500475,7 @@ index 141e85e6422b1..686e210d89c21 100644 /* People celebrate: "We love our President!" */ static bool tcp_try_undo_recovery(struct sock *sk) { -@@ -2521,14 +2543,8 @@ static bool tcp_try_undo_recovery(struct sock *sk) +@@ -2521,14 +2544,8 @@ static bool tcp_try_undo_recovery(struct sock *sk) } else if (tp->rack.reo_wnd_persist) { tp->rack.reo_wnd_persist--; } @@ -388496,7 +500491,7 @@ index 141e85e6422b1..686e210d89c21 100644 tcp_set_ca_state(sk, TCP_CA_Open); tp->is_sack_reneg = 0; return false; -@@ -2564,6 +2580,8 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) +@@ -2564,6 +2581,8 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS); inet_csk(sk)->icsk_retransmits = 0; @@ -388505,7 +500500,7 @@ index 141e85e6422b1..686e210d89c21 100644 if (frto_undo || tcp_is_sack(tp)) { tcp_set_ca_state(sk, TCP_CA_Open); tp->is_sack_reneg = 0; -@@ -2589,7 +2607,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk) +@@ -2589,7 +2608,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk) tp->high_seq = tp->snd_nxt; tp->tlp_high_seq = 0; tp->snd_cwnd_cnt = 0; @@ -388514,7 +500509,7 @@ index 141e85e6422b1..686e210d89c21 100644 tp->prr_delivered = 0; tp->prr_out = 0; tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); -@@ -2619,7 +2637,7 @@ void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost, +@@ -2619,7 +2638,7 @@ void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost, } /* Force a fast retransmit upon entering fast recovery */ sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1)); @@ -388523,7 +500518,7 @@ index 141e85e6422b1..686e210d89c21 100644 } static inline void tcp_end_cwnd_reduction(struct sock *sk) -@@ -2632,7 +2650,7 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk) +@@ -2632,7 +2651,7 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk) /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */ if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH && (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) { @@ -388532,7 +500527,7 @@ index 141e85e6422b1..686e210d89c21 100644 tp->snd_cwnd_stamp = tcp_jiffies32; } tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); -@@ -2696,12 +2714,15 @@ static void tcp_mtup_probe_success(struct sock *sk) +@@ -2696,12 +2715,15 @@ static void tcp_mtup_probe_success(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); @@ -388552,7 +500547,7 @@ index 141e85e6422b1..686e210d89c21 100644 tp->snd_cwnd_cnt = 0; tp->snd_cwnd_stamp = tcp_jiffies32; tp->snd_ssthresh = tcp_current_ssthresh(sk); -@@ -3024,7 +3045,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, +@@ -3024,7 +3046,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, tp->snd_una == tp->mtu_probe.probe_seq_start) { tcp_mtup_probe_failed(sk); /* Restores the reduction we did in tcp_mtup_probe() */ @@ -388561,7 +500556,7 @@ index 141e85e6422b1..686e210d89c21 100644 tcp_simple_retransmit(sk); return; } -@@ -3041,7 +3062,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, +@@ -3041,7 +3063,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag) { @@ -388570,7 +500565,7 @@ index 141e85e6422b1..686e210d89c21 100644 struct tcp_sock *tp = tcp_sk(sk); if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) { -@@ -3452,7 +3473,8 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) +@@ -3452,7 +3474,8 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) * new SACK or ECE mark may first advance cwnd here and later reduce * cwnd in tcp_fastretrans_alert() based on more states. */ @@ -388580,7 +500575,7 @@ index 141e85e6422b1..686e210d89c21 100644 return flag & FLAG_FORWARD_PROGRESS; return flag & FLAG_DATA_ACKED; -@@ -3564,7 +3586,8 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx, +@@ -3564,7 +3587,8 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx, if (*last_oow_ack_time) { s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time); @@ -388590,7 +500585,7 @@ index 141e85e6422b1..686e210d89c21 100644 NET_INC_STATS(net, mib_idx); return true; /* rate-limited: don't send yet! */ } -@@ -3611,11 +3634,11 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) +@@ -3611,11 +3635,11 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) /* Then check host-wide RFC 5961 rate limit. */ now = jiffies / HZ; @@ -388605,7 +500600,7 @@ index 141e85e6422b1..686e210d89c21 100644 WRITE_ONCE(challenge_count, half + prandom_u32_max(ack_limit)); } count = READ_ONCE(challenge_count); -@@ -3858,7 +3881,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) +@@ -3858,7 +3882,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_process_tlp_ack(sk, ack, flag); if (tcp_ack_is_dubious(sk, flag)) { @@ -388615,7 +500610,7 @@ index 141e85e6422b1..686e210d89c21 100644 num_dupack = 1; /* Consider if pure acks were aggregated in tcp_add_backlog() */ if (!(flag & FLAG_DATA)) -@@ -4043,7 +4067,7 @@ void tcp_parse_options(const struct net *net, +@@ -4043,7 +4068,7 @@ void tcp_parse_options(const struct net *net, break; case TCPOPT_WINDOW: if (opsize == TCPOLEN_WINDOW && th->syn && @@ -388624,7 +500619,7 @@ index 141e85e6422b1..686e210d89c21 100644 __u8 snd_wscale = *(__u8 *)ptr; opt_rx->wscale_ok = 1; if (snd_wscale > TCP_MAX_WSCALE) { -@@ -4059,7 +4083,7 @@ void tcp_parse_options(const struct net *net, +@@ -4059,7 +4084,7 @@ void tcp_parse_options(const struct net *net, case TCPOPT_TIMESTAMP: if ((opsize == TCPOLEN_TIMESTAMP) && ((estab && opt_rx->tstamp_ok) || @@ -388633,7 +500628,7 @@ index 141e85e6422b1..686e210d89c21 100644 opt_rx->saw_tstamp = 1; opt_rx->rcv_tsval = get_unaligned_be32(ptr); opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4); -@@ -4067,7 +4091,7 @@ void tcp_parse_options(const struct net *net, +@@ -4067,7 +4092,7 @@ void tcp_parse_options(const struct net *net, break; case TCPOPT_SACK_PERM: if (opsize == TCPOLEN_SACK_PERM && th->syn && @@ -388642,7 +500637,7 @@ index 141e85e6422b1..686e210d89c21 100644 opt_rx->sack_ok = TCP_SACK_SEEN; tcp_sack_reset(opt_rx); } -@@ -4408,7 +4432,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) +@@ -4408,7 +4433,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) { struct tcp_sock *tp = tcp_sk(sk); @@ -388651,7 +500646,7 @@ index 141e85e6422b1..686e210d89c21 100644 int mib_idx; if (before(seq, tp->rcv_nxt)) -@@ -4455,7 +4479,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) +@@ -4455,7 +4480,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); @@ -388660,7 +500655,7 @@ index 141e85e6422b1..686e210d89c21 100644 u32 end_seq = TCP_SKB_CB(skb)->end_seq; tcp_rcv_spurious_retrans(sk, skb); -@@ -5400,7 +5424,7 @@ static bool tcp_should_expand_sndbuf(const struct sock *sk) +@@ -5400,7 +5425,7 @@ static bool tcp_should_expand_sndbuf(const struct sock *sk) return false; /* If we filled the congestion window, do not expand. */ @@ -388669,7 +500664,7 @@ index 141e85e6422b1..686e210d89c21 100644 return false; return true; -@@ -5418,7 +5442,17 @@ static void tcp_new_space(struct sock *sk) +@@ -5418,7 +5443,17 @@ static void tcp_new_space(struct sock *sk) INDIRECT_CALL_1(sk->sk_write_space, sk_stream_write_space, sk); } @@ -388688,7 +500683,7 @@ index 141e85e6422b1..686e210d89c21 100644 { /* pairs with tcp_poll() */ smp_mb(); -@@ -5468,7 +5502,7 @@ send_now: +@@ -5468,7 +5503,7 @@ send_now: } if (!tcp_is_sack(tp) || @@ -388697,7 +500692,7 @@ index 141e85e6422b1..686e210d89c21 100644 goto send_now; if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) { -@@ -5489,11 +5523,12 @@ send_now: +@@ -5489,11 +5524,12 @@ send_now: if (tp->srtt_us && tp->srtt_us < rtt) rtt = tp->srtt_us; @@ -388712,7 +500707,7 @@ index 141e85e6422b1..686e210d89c21 100644 HRTIMER_MODE_REL_PINNED_SOFT); } -@@ -5521,7 +5556,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th) +@@ -5521,7 +5557,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th) struct tcp_sock *tp = tcp_sk(sk); u32 ptr = ntohs(th->urg_ptr); @@ -388721,7 +500716,7 @@ index 141e85e6422b1..686e210d89c21 100644 ptr--; ptr += ntohl(th->seq); -@@ -5770,7 +5805,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) +@@ -5770,7 +5806,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) trace_tcp_probe(sk, skb); tcp_mstamp_refresh(tp); @@ -388730,7 +500725,7 @@ index 141e85e6422b1..686e210d89c21 100644 inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb); /* * Header prediction. -@@ -5957,9 +5992,9 @@ void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb) +@@ -5957,9 +5993,9 @@ void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb) * retransmission has occurred. */ if (tp->total_retrans > 1 && tp->undo_marker) @@ -388742,7 +500737,7 @@ index 141e85e6422b1..686e210d89c21 100644 tp->snd_cwnd_stamp = tcp_jiffies32; bpf_skops_established(sk, bpf_op, skb); -@@ -6653,7 +6688,7 @@ static void tcp_ecn_create_request(struct request_sock *req, +@@ -6653,7 +6689,7 @@ static void tcp_ecn_create_request(struct request_sock *req, ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield); ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK); @@ -388751,7 +500746,7 @@ index 141e85e6422b1..686e210d89c21 100644 if (((!ect || th->res1) && ecn_ok) || tcp_ca_needs_ecn(listen_sk) || (ecn_ok_dst & DST_FEATURE_ECN_CA) || -@@ -6719,11 +6754,14 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto) +@@ -6719,11 +6755,14 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto) { struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; const char *msg = "Dropping request"; @@ -388768,7 +500763,7 @@ index 141e85e6422b1..686e210d89c21 100644 msg = "Sending cookies"; want_cookie = true; __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); -@@ -6731,8 +6769,7 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto) +@@ -6731,8 +6770,7 @@ static bool tcp_syn_flood_action(const struct sock *sk, const char *proto) #endif __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); @@ -388778,7 +500773,7 @@ index 141e85e6422b1..686e210d89c21 100644 xchg(&queue->synflood_warned, 1) == 0) net_info_ratelimited("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", proto, sk->sk_num, msg); -@@ -6781,7 +6818,7 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, +@@ -6781,7 +6819,7 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, struct tcp_sock *tp = tcp_sk(sk); u16 mss; @@ -388787,7 +500782,7 @@ index 141e85e6422b1..686e210d89c21 100644 !inet_csk_reqsk_queue_is_full(sk)) return 0; -@@ -6815,13 +6852,15 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, +@@ -6815,13 +6853,15 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, bool want_cookie = false; struct dst_entry *dst; struct flowi fl; @@ -388805,7 +500800,7 @@ index 141e85e6422b1..686e210d89c21 100644 want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name); if (!want_cookie) goto drop; -@@ -6870,10 +6909,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, +@@ -6870,10 +6910,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb); if (!want_cookie && !isn) { @@ -388822,7 +500817,7 @@ index 141e85e6422b1..686e210d89c21 100644 /* Without syncookies last quarter of * backlog is filled with destinations, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c -index 5b8ce65dfc067..88a45d5650da4 100644 +index 5b8ce65dfc067..0e1fbad17dbe3 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -91,6 +91,8 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, @@ -388846,7 +500841,16 @@ index 5b8ce65dfc067..88a45d5650da4 100644 if (reuse == 2) { /* Still does not detect *everything* that goes through -@@ -807,13 +809,15 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) +@@ -322,6 +324,8 @@ failure: + * if necessary. + */ + tcp_set_state(sk, TCP_CLOSE); ++ if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) ++ inet_reset_saddr(sk); + ip_rt_put(rt); + sk->sk_route_caps = 0; + inet->inet_dport = 0; +@@ -807,13 +811,15 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) arg.tos = ip_hdr(skb)->tos; arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); local_bh_disable(); @@ -388863,7 +500867,7 @@ index 5b8ce65dfc067..88a45d5650da4 100644 } ip_send_unicast_reply(ctl_sk, skb, &TCP_SKB_CB(skb)->header.h4.opt, -@@ -822,6 +826,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) +@@ -822,6 +828,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) transmit_time); ctl_sk->sk_mark = 0; @@ -388872,7 +500876,7 @@ index 5b8ce65dfc067..88a45d5650da4 100644 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); __TCP_INC_STATS(net, TCP_MIB_OUTRSTS); local_bh_enable(); -@@ -905,7 +911,8 @@ static void tcp_v4_send_ack(const struct sock *sk, +@@ -905,7 +913,8 @@ static void tcp_v4_send_ack(const struct sock *sk, arg.tos = tos; arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL); local_bh_disable(); @@ -388882,7 +500886,7 @@ index 5b8ce65dfc067..88a45d5650da4 100644 ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? inet_twsk(sk)->tw_mark : sk->sk_mark; ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ? -@@ -918,6 +925,7 @@ static void tcp_v4_send_ack(const struct sock *sk, +@@ -918,6 +927,7 @@ static void tcp_v4_send_ack(const struct sock *sk, transmit_time); ctl_sk->sk_mark = 0; @@ -388890,7 +500894,7 @@ index 5b8ce65dfc067..88a45d5650da4 100644 __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); local_bh_enable(); } -@@ -998,7 +1006,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, +@@ -998,7 +1008,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, if (skb) { __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); @@ -388899,7 +500903,7 @@ index 5b8ce65dfc067..88a45d5650da4 100644 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | (inet_sk(sk)->tos & INET_ECN_MASK) : inet_sk(sk)->tos; -@@ -1584,7 +1592,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, +@@ -1584,7 +1594,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, /* Set ToS of the new socket based upon the value of incoming SYN. * ECT bits are set later in tcp_init_transfer(). */ @@ -388908,7 +500912,7 @@ index 5b8ce65dfc067..88a45d5650da4 100644 newinet->tos = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; if (!dst) { -@@ -1698,16 +1706,19 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) +@@ -1698,16 +1708,19 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) struct sock *rsk; if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ @@ -388931,7 +500935,7 @@ index 5b8ce65dfc067..88a45d5650da4 100644 } } tcp_rcv_established(sk, skb); -@@ -1783,12 +1794,12 @@ int tcp_v4_early_demux(struct sk_buff *skb) +@@ -1783,12 +1796,12 @@ int tcp_v4_early_demux(struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_edemux; if (sk_fullsock(sk)) { @@ -388946,7 +500950,32 @@ index 5b8ce65dfc067..88a45d5650da4 100644 skb_dst_set_noref(skb, dst); } } -@@ -1967,8 +1978,10 @@ int tcp_v4_rcv(struct sk_buff *skb) +@@ -1797,8 +1810,7 @@ int tcp_v4_early_demux(struct sk_buff *skb) + + bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) + { +- u32 limit = READ_ONCE(sk->sk_rcvbuf) + READ_ONCE(sk->sk_sndbuf); +- u32 tail_gso_size, tail_gso_segs; ++ u32 limit, tail_gso_size, tail_gso_segs; + struct skb_shared_info *shinfo; + const struct tcphdr *th; + struct tcphdr *thtail; +@@ -1902,11 +1914,13 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) + __skb_push(skb, hdrlen); + + no_coalesce: ++ limit = (u32)READ_ONCE(sk->sk_rcvbuf) + (u32)(READ_ONCE(sk->sk_sndbuf) >> 1); ++ + /* Only socket owner can try to collapse/prune rx queues + * to reduce memory overhead, so add a little headroom here. + * Few sockets backlog are possibly concurrently non empty. + */ +- limit += 64*1024; ++ limit += 64 * 1024; + + if (unlikely(sk_add_backlog(sk, skb, limit))) { + bh_unlock_sock(sk); +@@ -1967,8 +1981,10 @@ int tcp_v4_rcv(struct sk_buff *skb) const struct tcphdr *th; bool refcounted; struct sock *sk; @@ -388957,7 +500986,7 @@ index 5b8ce65dfc067..88a45d5650da4 100644 if (skb->pkt_type != PACKET_HOST) goto discard_it; -@@ -1980,8 +1993,10 @@ int tcp_v4_rcv(struct sk_buff *skb) +@@ -1980,8 +1996,10 @@ int tcp_v4_rcv(struct sk_buff *skb) th = (const struct tcphdr *)skb->data; @@ -388969,7 +500998,7 @@ index 5b8ce65dfc067..88a45d5650da4 100644 if (!pskb_may_pull(skb, th->doff * 4)) goto discard_it; -@@ -2011,7 +2026,8 @@ process: +@@ -2011,7 +2029,8 @@ process: struct sock *nsk; sk = req->rsk_listener; @@ -388979,7 +501008,7 @@ index 5b8ce65dfc067..88a45d5650da4 100644 sk_drops_add(sk, skb); reqsk_put(req); goto discard_it; -@@ -2058,6 +2074,7 @@ process: +@@ -2058,6 +2077,7 @@ process: } goto discard_and_relse; } @@ -388987,7 +501016,7 @@ index 5b8ce65dfc067..88a45d5650da4 100644 if (nsk == sk) { reqsk_put(req); tcp_v4_restore_cb(skb); -@@ -2082,8 +2099,10 @@ process: +@@ -2082,8 +2102,10 @@ process: nf_reset_ct(skb); @@ -388999,7 +501028,7 @@ index 5b8ce65dfc067..88a45d5650da4 100644 th = (const struct tcphdr *)skb->data; iph = ip_hdr(skb); tcp_v4_fill_cb(skb, iph, th); -@@ -2120,6 +2139,7 @@ put_and_return: +@@ -2120,6 +2142,7 @@ put_and_return: return ret; no_tcp_socket: @@ -389007,7 +501036,7 @@ index 5b8ce65dfc067..88a45d5650da4 100644 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard_it; -@@ -2127,6 +2147,7 @@ no_tcp_socket: +@@ -2127,6 +2150,7 @@ no_tcp_socket: if (tcp_checksum_complete(skb)) { csum_error: @@ -389015,7 +501044,7 @@ index 5b8ce65dfc067..88a45d5650da4 100644 trace_tcp_bad_csum(skb); __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); bad_packet: -@@ -2137,7 +2158,7 @@ bad_packet: +@@ -2137,7 +2161,7 @@ bad_packet: discard_it: /* Discard frame. */ @@ -389024,7 +501053,7 @@ index 5b8ce65dfc067..88a45d5650da4 100644 return 0; discard_and_relse: -@@ -2200,8 +2221,8 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) +@@ -2200,8 +2224,8 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) struct dst_entry *dst = skb_dst(skb); if (dst && dst_hold_safe(dst)) { @@ -389035,7 +501064,7 @@ index 5b8ce65dfc067..88a45d5650da4 100644 } } EXPORT_SYMBOL(inet_sk_rx_dst_set); -@@ -2653,7 +2674,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) +@@ -2653,7 +2677,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) jiffies_to_clock_t(icsk->icsk_rto), jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sk), @@ -389044,7 +501073,7 @@ index 5b8ce65dfc067..88a45d5650da4 100644 state == TCP_LISTEN ? fastopenq->max_qlen : (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)); -@@ -3098,41 +3119,14 @@ EXPORT_SYMBOL(tcp_prot); +@@ -3098,41 +3122,14 @@ EXPORT_SYMBOL(tcp_prot); static void __net_exit tcp_sk_exit(struct net *net) { @@ -389087,7 +501116,7 @@ index 5b8ce65dfc067..88a45d5650da4 100644 net->ipv4.sysctl_tcp_ecn = 2; net->ipv4.sysctl_tcp_ecn_fallback = 1; -@@ -3216,10 +3210,6 @@ static int __net_init tcp_sk_init(struct net *net) +@@ -3216,10 +3213,6 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.tcp_congestion_control = &tcp_reno; return 0; @@ -389098,7 +501127,7 @@ index 5b8ce65dfc067..88a45d5650da4 100644 } static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) -@@ -3313,6 +3303,24 @@ static void __init bpf_iter_register(void) +@@ -3313,6 +3306,24 @@ static void __init bpf_iter_register(void) void __init tcp_v4_init(void) { @@ -390014,6 +502043,21 @@ index 20cf4a98c69d8..50bba370486e8 100644 __sk_dst_reset(sk); out:; +diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c +index 7c27aa629af19..8e135af0d4f70 100644 +--- a/net/ipv4/tcp_ulp.c ++++ b/net/ipv4/tcp_ulp.c +@@ -136,6 +136,10 @@ static int __tcp_set_ulp(struct sock *sk, const struct tcp_ulp_ops *ulp_ops) + if (icsk->icsk_ulp_ops) + goto out_err; + ++ err = -ENOTCONN; ++ if (!ulp_ops->clone && sk->sk_state == TCP_LISTEN) ++ goto out_err; ++ + err = ulp_ops->init(sk); + if (err) + goto out_err; diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index c8003c8aad2c0..786848ad37ea8 100644 --- a/net/ipv4/tcp_vegas.c @@ -390395,7 +502439,7 @@ index 2fffcf2b54f3f..79d5425bed07c 100644 "inode ref pointer drops"); else { diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c -index b97e3635acf50..46101fd67a477 100644 +index b97e3635acf50..1ff5b8e30bb92 100644 --- a/net/ipv4/udp_tunnel_core.c +++ b/net/ipv4/udp_tunnel_core.c @@ -75,6 +75,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, @@ -390406,6 +502450,14 @@ index b97e3635acf50..46101fd67a477 100644 udp_sk(sk)->encap_err_lookup = cfg->encap_err_lookup; udp_sk(sk)->encap_destroy = cfg->encap_destroy; udp_sk(sk)->gro_receive = cfg->gro_receive; +@@ -178,6 +179,7 @@ EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb); + void udp_tunnel_sock_release(struct socket *sock) + { + rcu_assign_sk_user_data(sock->sk, NULL); ++ synchronize_rcu(); + kernel_sock_shutdown(sock, SHUT_RDWR); + sock_release(sock); + } diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c index b91003538d87a..bc3a043a5d5c7 100644 --- a/net/ipv4/udp_tunnel_nic.c @@ -390429,7 +502481,7 @@ index 2fe5860c21d6e..b146ce88c5d0c 100644 } -EXPORT_SYMBOL(xfrm4_protocol_init); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c -index c6a90b7bbb70e..8800987fdb402 100644 +index c6a90b7bbb70e..6ba34f51c411f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -552,7 +552,7 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, @@ -390544,17 +502596,89 @@ index c6a90b7bbb70e..8800987fdb402 100644 ifp->valid_lft = valid_lft; ifp->prefered_lft = prefered_lft; ifp->tstamp = now; -@@ -3110,6 +3130,9 @@ static void add_v4_addrs(struct inet6_dev *idev) +@@ -3109,14 +3129,17 @@ static void add_v4_addrs(struct inet6_dev *idev) + offset = sizeof(struct in6_addr) - 4; memcpy(&addr.s6_addr32[3], idev->dev->dev_addr + offset, 4); - if (idev->dev->flags&IFF_POINTOPOINT) { +- if (idev->dev->flags&IFF_POINTOPOINT) { +- addr.s6_addr32[0] = htonl(0xfe800000); +- scope = IFA_LINK; +- plen = 64; +- } else { ++ if (!(idev->dev->flags & IFF_POINTOPOINT) && idev->dev->type == ARPHRD_SIT) { + scope = IPV6_ADDR_COMPATv4; + plen = 96; + pflags |= RTF_NONEXTHOP; ++ } else { + if (idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_NONE) + return; + - addr.s6_addr32[0] = htonl(0xfe800000); - scope = IFA_LINK; - plen = 64; -@@ -3712,8 +3735,10 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister) ++ addr.s6_addr32[0] = htonl(0xfe800000); ++ scope = IFA_LINK; ++ plen = 64; + } + + if (addr.s6_addr32[3]) { +@@ -3424,6 +3447,30 @@ static void addrconf_gre_config(struct net_device *dev) + } + #endif + ++static void addrconf_init_auto_addrs(struct net_device *dev) ++{ ++ switch (dev->type) { ++#if IS_ENABLED(CONFIG_IPV6_SIT) ++ case ARPHRD_SIT: ++ addrconf_sit_config(dev); ++ break; ++#endif ++#if IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE) ++ case ARPHRD_IP6GRE: ++ case ARPHRD_IPGRE: ++ addrconf_gre_config(dev); ++ break; ++#endif ++ case ARPHRD_LOOPBACK: ++ init_loopback(dev); ++ break; ++ ++ default: ++ addrconf_dev_config(dev); ++ break; ++ } ++} ++ + static int fixup_permanent_addr(struct net *net, + struct inet6_dev *idev, + struct inet6_ifaddr *ifp) +@@ -3588,26 +3635,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, + run_pending = 1; + } + +- switch (dev->type) { +-#if IS_ENABLED(CONFIG_IPV6_SIT) +- case ARPHRD_SIT: +- addrconf_sit_config(dev); +- break; +-#endif +-#if IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE) +- case ARPHRD_IP6GRE: +- case ARPHRD_IPGRE: +- addrconf_gre_config(dev); +- break; +-#endif +- case ARPHRD_LOOPBACK: +- init_loopback(dev); +- break; +- +- default: +- addrconf_dev_config(dev); +- break; +- } ++ addrconf_init_auto_addrs(dev); + + if (!IS_ERR_OR_NULL(idev)) { + if (run_pending) +@@ -3712,8 +3740,10 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister) unsigned long event = unregister ? NETDEV_UNREGISTER : NETDEV_DOWN; struct net *net = dev_net(dev); struct inet6_dev *idev; @@ -390566,7 +502690,7 @@ index c6a90b7bbb70e..8800987fdb402 100644 int state, i; ASSERT_RTNL(); -@@ -3779,7 +3804,10 @@ restart: +@@ -3779,7 +3809,10 @@ restart: addrconf_del_rs_timer(idev); @@ -390578,7 +502702,7 @@ index c6a90b7bbb70e..8800987fdb402 100644 if (!unregister) idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY); -@@ -3800,16 +3828,23 @@ restart: +@@ -3800,16 +3833,23 @@ restart: write_lock_bh(&idev->lock); } @@ -390604,7 +502728,7 @@ index c6a90b7bbb70e..8800987fdb402 100644 spin_lock_bh(&ifa->lock); if (keep) { -@@ -3840,20 +3875,19 @@ restart: +@@ -3840,20 +3880,19 @@ restart: addrconf_leave_solict(ifa->idev, &ifa->addr); } @@ -390628,7 +502752,7 @@ index c6a90b7bbb70e..8800987fdb402 100644 ipv6_mc_down(idev); } -@@ -4181,7 +4215,8 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id, +@@ -4181,7 +4220,8 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id, send_rs = send_mld && ipv6_accept_ra(ifp->idev) && ifp->idev->cnf.rtr_solicits != 0 && @@ -390638,7 +502762,7 @@ index c6a90b7bbb70e..8800987fdb402 100644 read_unlock_bh(&ifp->idev->lock); /* While dad is in progress mld report's source address is in6_addrany. -@@ -4980,6 +5015,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, +@@ -4980,6 +5020,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) goto error; @@ -390646,7 +502770,7 @@ index c6a90b7bbb70e..8800987fdb402 100644 if (!((ifa->flags&IFA_F_PERMANENT) && (ifa->prefered_lft == INFINITY_LIFE_TIME))) { preferred = ifa->prefered_lft; -@@ -5001,6 +5037,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, +@@ -5001,6 +5042,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, preferred = INFINITY_LIFE_TIME; valid = INFINITY_LIFE_TIME; } @@ -390654,7 +502778,7 @@ index c6a90b7bbb70e..8800987fdb402 100644 if (!ipv6_addr_any(&ifa->peer_addr)) { if (nla_put_in6_addr(skb, IFA_LOCAL, &ifa->addr) < 0 || -@@ -5129,9 +5166,9 @@ next: +@@ -5129,9 +5171,9 @@ next: fillargs->event = RTM_GETMULTICAST; /* multicast address */ @@ -390666,7 +502790,7 @@ index c6a90b7bbb70e..8800987fdb402 100644 if (ip_idx < s_ip_idx) continue; err = inet6_fill_ifmcaddr(skb, ifmca, fillargs); -@@ -5515,7 +5552,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, +@@ -5515,7 +5557,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_USE_OPTIMISTIC] = cnf->use_optimistic; #endif #ifdef CONFIG_IPV6_MROUTE @@ -390675,7 +502799,25 @@ index c6a90b7bbb70e..8800987fdb402 100644 #endif array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6; array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad; -@@ -7091,9 +7128,8 @@ static int __net_init addrconf_init_net(struct net *net) +@@ -6348,7 +6390,7 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write, + + if (idev->cnf.addr_gen_mode != new_val) { + idev->cnf.addr_gen_mode = new_val; +- addrconf_dev_config(idev->dev); ++ addrconf_init_auto_addrs(idev->dev); + } + } else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) { + struct net_device *dev; +@@ -6359,7 +6401,7 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write, + if (idev && + idev->cnf.addr_gen_mode != new_val) { + idev->cnf.addr_gen_mode = new_val; +- addrconf_dev_config(idev->dev); ++ addrconf_init_auto_addrs(idev->dev); + } + } + } +@@ -7091,9 +7133,8 @@ static int __net_init addrconf_init_net(struct net *net) if (!dflt) goto err_alloc_dflt; @@ -390687,6 +502829,18 @@ index c6a90b7bbb70e..8800987fdb402 100644 case 1: /* copy from init_net */ memcpy(all, init_net.ipv6.devconf_all, sizeof(ipv6_devconf)); +diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c +index 8a22486cf2702..17ac45aa7194c 100644 +--- a/net/ipv6/addrlabel.c ++++ b/net/ipv6/addrlabel.c +@@ -437,6 +437,7 @@ static void ip6addrlbl_putmsg(struct nlmsghdr *nlh, + { + struct ifaddrlblmsg *ifal = nlmsg_data(nlh); + ifal->ifal_family = AF_INET6; ++ ifal->__ifal_reserved = 0; + ifal->ifal_prefixlen = prefixlen; + ifal->ifal_flags = 0; + ifal->ifal_index = ifindex; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index b5878bb8e419d..3a91d0d40aecc 100644 --- a/net/ipv6/af_inet6.c @@ -390752,9 +502906,18 @@ index b5878bb8e419d..3a91d0d40aecc 100644 .fib6_rt_update = fib6_rt_update, .ip6_del_rt = ip6_del_rt, diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c -index 206f66310a88d..f4559e5bc84bf 100644 +index 206f66310a88d..a30ff5d6808aa 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c +@@ -51,7 +51,7 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6, struct sock *sk) + fl6->flowi6_mark = sk->sk_mark; + fl6->fl6_dport = inet->inet_dport; + fl6->fl6_sport = inet->inet_sport; +- fl6->flowlabel = np->flow_label; ++ fl6->flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label); + fl6->flowi6_uid = sk->sk_uid; + + if (!fl6->flowi6_oif) @@ -256,7 +256,7 @@ ipv4_connected: goto out; } @@ -390801,7 +502964,7 @@ index ed2f061b87685..6219d97cac7a3 100644 th = (void *)(skb->data + offset); hdr_len += offset; diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c -index a349d47980776..302170882382a 100644 +index a349d47980776..4cc19acfc369e 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -198,6 +198,9 @@ static struct sk_buff *xfrm6_beet_gso_segment(struct xfrm_state *x, @@ -390814,6 +502977,16 @@ index a349d47980776..302170882382a 100644 __skb_pull(skb, skb_transport_offset(skb)); ops = rcu_dereference(inet6_offloads[proto]); if (likely(ops && ops->callbacks.gso_segment)) +@@ -340,6 +343,9 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features + xo->seq.low += skb_shinfo(skb)->gso_segs; + } + ++ if (xo->seq.low < seq) ++ xo->seq.hi++; ++ + esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32)); + + len = skb->len - sizeof(struct ipv6hdr); diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 40f3e4f9f33a2..dcedfe29d9d93 100644 --- a/net/ipv6/fib6_rules.c @@ -391031,7 +503204,7 @@ index aa673a6a7e432..ceb85c67ce395 100644 done: diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c -index 3ad201d372d88..70ef4d4ebff48 100644 +index 3ad201d372d88..13b1748b8b465 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -724,6 +724,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, @@ -391154,7 +503327,29 @@ index 3ad201d372d88..70ef4d4ebff48 100644 if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS)) fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; -@@ -1544,7 +1556,7 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev) +@@ -1141,14 +1153,16 @@ static void ip6gre_tnl_link_config_route(struct ip6_tnl *t, int set_mtu, + dev->needed_headroom = dst_len; + + if (set_mtu) { +- dev->mtu = rt->dst.dev->mtu - t_hlen; ++ int mtu = rt->dst.dev->mtu - t_hlen; ++ + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) +- dev->mtu -= 8; ++ mtu -= 8; + if (dev->type == ARPHRD_ETHER) +- dev->mtu -= ETH_HLEN; ++ mtu -= ETH_HLEN; + +- if (dev->mtu < IPV6_MIN_MTU) +- dev->mtu = IPV6_MIN_MTU; ++ if (mtu < IPV6_MIN_MTU) ++ mtu = IPV6_MIN_MTU; ++ WRITE_ONCE(dev->mtu, mtu); + } + } + ip6_rt_put(rt); +@@ -1544,7 +1558,7 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev) static struct inet6_protocol ip6gre_protocol __read_mostly = { .handler = gre_rcv, .err_handler = ip6gre_err, @@ -391164,10 +503359,44 @@ index 3ad201d372d88..70ef4d4ebff48 100644 static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head) diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c -index 80256717868e6..d4b1e2c5aa76d 100644 +index 80256717868e6..32071529bfd98 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c -@@ -508,7 +508,7 @@ int ip6_mc_input(struct sk_buff *skb) +@@ -45,20 +45,23 @@ + #include <net/inet_ecn.h> + #include <net/dst_metadata.h> + +-INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *)); + static void ip6_rcv_finish_core(struct net *net, struct sock *sk, + struct sk_buff *skb) + { +- void (*edemux)(struct sk_buff *skb); +- +- if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { +- const struct inet6_protocol *ipprot; +- +- ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); +- if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) +- INDIRECT_CALL_2(edemux, tcp_v6_early_demux, +- udp_v6_early_demux, skb); ++ if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) && ++ !skb_dst(skb) && !skb->sk) { ++ switch (ipv6_hdr(skb)->nexthdr) { ++ case IPPROTO_TCP: ++ if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux)) ++ tcp_v6_early_demux(skb); ++ break; ++ case IPPROTO_UDP: ++ if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) ++ udp_v6_early_demux(skb); ++ break; ++ } + } ++ + if (!skb_valid_dst(skb)) + ip6_route_input(skb); + } +@@ -508,7 +511,7 @@ int ip6_mc_input(struct sk_buff *skb) /* * IPv6 multicast router mode is now supported ;) */ @@ -391190,7 +503419,7 @@ index 1b9827ff8ccf4..172565d125704 100644 if (IS_ERR_OR_NULL(segs)) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c -index 2f044a49afa8c..7951ade74d142 100644 +index 2f044a49afa8c..383442ded9542 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -174,7 +174,7 @@ static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff @@ -391211,7 +503440,55 @@ index 2f044a49afa8c..7951ade74d142 100644 !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); goto drop; -@@ -1289,8 +1289,7 @@ struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb, +@@ -527,7 +527,20 @@ int ip6_forward(struct sk_buff *skb) + pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { + int proxied = ip6_forward_proxy_check(skb); + if (proxied > 0) { +- hdr->hop_limit--; ++ /* It's tempting to decrease the hop limit ++ * here by 1, as we do at the end of the ++ * function too. ++ * ++ * But that would be incorrect, as proxying is ++ * not forwarding. The ip6_input function ++ * will handle this packet locally, and it ++ * depends on the hop limit being unchanged. ++ * ++ * One example is the NDP hop limit, that ++ * always has to stay 255, but other would be ++ * similar checks around RA packets, where the ++ * user can even change the desired limit. ++ */ + return ip6_input(skb); + } else if (proxied < 0) { + __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); +@@ -897,6 +910,9 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, + if (err < 0) + goto fail; + ++ /* We prevent @rt from being freed. */ ++ rcu_read_lock(); ++ + for (;;) { + /* Prepare header of the next frame, + * before previous one went down. */ +@@ -920,6 +936,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, + if (err == 0) { + IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), + IPSTATS_MIB_FRAGOKS); ++ rcu_read_unlock(); + return 0; + } + +@@ -927,6 +944,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, + + IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), + IPSTATS_MIB_FRAGFAILS); ++ rcu_read_unlock(); + return err; + + slow_path_clean: +@@ -1289,8 +1307,7 @@ struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb, fl6.daddr = info->key.u.ipv6.dst; fl6.saddr = info->key.u.ipv6.src; prio = info->key.tos; @@ -391221,7 +503498,7 @@ index 2f044a49afa8c..7951ade74d142 100644 dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6, NULL); -@@ -1408,8 +1407,6 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, +@@ -1408,8 +1425,6 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, if (np->frag_size) mtu = np->frag_size; } @@ -391230,7 +503507,7 @@ index 2f044a49afa8c..7951ade74d142 100644 cork->base.fragsize = mtu; cork->base.gso_size = ipc6->gso_size; cork->base.tx_flags = 0; -@@ -1465,14 +1462,12 @@ static int __ip6_append_data(struct sock *sk, +@@ -1465,14 +1480,12 @@ static int __ip6_append_data(struct sock *sk, if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP && sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) @@ -391246,7 +503523,7 @@ index 2f044a49afa8c..7951ade74d142 100644 headersize = sizeof(struct ipv6hdr) + (opt ? opt->opt_flen + opt->opt_nflen : 0) + -@@ -1480,6 +1475,13 @@ static int __ip6_append_data(struct sock *sk, +@@ -1480,6 +1493,13 @@ static int __ip6_append_data(struct sock *sk, sizeof(struct frag_hdr) : 0) + rt->rt6i_nfheader_len; @@ -391261,7 +503538,7 @@ index 2f044a49afa8c..7951ade74d142 100644 * the first fragment */ diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c -index 20a67efda47f5..fa8da8ff35b42 100644 +index 20a67efda47f5..ea50779428711 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1036,14 +1036,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t, @@ -391283,6 +503560,34 @@ index 20a67efda47f5..fa8da8ff35b42 100644 else ret = 1; rcu_read_unlock(); +@@ -1446,8 +1446,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) + struct net_device *tdev = NULL; + struct __ip6_tnl_parm *p = &t->parms; + struct flowi6 *fl6 = &t->fl.u.ip6; +- unsigned int mtu; + int t_hlen; ++ int mtu; + + memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); + memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); +@@ -1494,12 +1494,13 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) + dev->hard_header_len = tdev->hard_header_len + t_hlen; + mtu = min_t(unsigned int, tdev->mtu, IP6_MAX_MTU); + +- dev->mtu = mtu - t_hlen; ++ mtu = mtu - t_hlen; + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) +- dev->mtu -= 8; ++ mtu -= 8; + +- if (dev->mtu < IPV6_MIN_MTU) +- dev->mtu = IPV6_MIN_MTU; ++ if (mtu < IPV6_MIN_MTU) ++ mtu = IPV6_MIN_MTU; ++ WRITE_ONCE(dev->mtu, mtu); + } + } + } diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 1d8e3ffa225d8..42c37ec832f15 100644 --- a/net/ipv6/ip6_vti.c @@ -391347,7 +503652,7 @@ index 36ed9efb88254..91f1c5f56d5fa 100644 inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c -index e4bdb09c55867..8a1c78f385084 100644 +index e4bdb09c55867..b24e0e5d55f9e 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -208,7 +208,7 @@ static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, @@ -391368,6 +503673,27 @@ index e4bdb09c55867..8a1c78f385084 100644 return -ENOBUFS; p = kmalloc(optlen + 4, GFP_KERNEL); +@@ -417,6 +417,12 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, + rtnl_lock(); + lock_sock(sk); + ++ /* Another thread has converted the socket into IPv4 with ++ * IPV6_ADDRFORM concurrently. ++ */ ++ if (unlikely(sk->sk_family != AF_INET6)) ++ goto unlock; ++ + switch (optname) { + + case IPV6_ADDRFORM: +@@ -976,6 +982,7 @@ done: + break; + } + ++unlock: + release_sock(sk); + if (needs_rtnl) + rtnl_unlock(); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index bed8155508c85..87c699d57b366 100644 --- a/net/ipv6/mcast.c @@ -391612,10 +503938,28 @@ index 6ac88fe24a8e0..135e3a060caa8 100644 .disconnect = __udp_disconnect, .setsockopt = ipv6_setsockopt, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c -index 60f1e4f5be5aa..c51d5ce3711c2 100644 +index 60f1e4f5be5aa..c68020b8de89e 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c -@@ -1020,6 +1020,9 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname, +@@ -539,6 +539,7 @@ csum_copy_err: + static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, + struct raw6_sock *rp) + { ++ struct ipv6_txoptions *opt; + struct sk_buff *skb; + int err = 0; + int offset; +@@ -556,6 +557,9 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, + + offset = rp->offset; + total_len = inet_sk(sk)->cork.base.length; ++ opt = inet6_sk(sk)->cork.opt; ++ total_len -= opt ? opt->opt_flen : 0; ++ + if (offset >= total_len - 1) { + err = -EINVAL; + ip6_flush_pending_frames(sk); +@@ -1020,6 +1024,9 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname, struct raw6_sock *rp = raw6_sk(sk); int val; @@ -391626,7 +503970,7 @@ index 60f1e4f5be5aa..c51d5ce3711c2 100644 return -EFAULT; diff --git a/net/ipv6/route.c b/net/ipv6/route.c -index 9b9ef09382ab9..27274fc3619ab 100644 +index 9b9ef09382ab9..0655fd8c67e93 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2802,7 +2802,7 @@ static void ip6_link_failure(struct sk_buff *skb) @@ -391839,6 +504183,27 @@ index 9b9ef09382ab9..27274fc3619ab 100644 ret = 0; out: +@@ -6514,10 +6570,16 @@ static void __net_exit ip6_route_net_exit(struct net *net) + static int __net_init ip6_route_net_init_late(struct net *net) + { + #ifdef CONFIG_PROC_FS +- proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops, +- sizeof(struct ipv6_route_iter)); +- proc_create_net_single("rt6_stats", 0444, net->proc_net, +- rt6_stats_seq_show, NULL); ++ if (!proc_create_net("ipv6_route", 0, net->proc_net, ++ &ipv6_route_seq_ops, ++ sizeof(struct ipv6_route_iter))) ++ return -ENOMEM; ++ ++ if (!proc_create_net_single("rt6_stats", 0444, net->proc_net, ++ rt6_stats_seq_show, NULL)) { ++ remove_proc_entry("ipv6_route", net->proc_net); ++ return -ENOMEM; ++ } + #endif + return 0; + } diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index e412817fba2f3..0c7c6fc16c3c3 100644 --- a/net/ipv6/seg6.c @@ -392058,7 +504423,7 @@ index 2dc40b3f373ef..59454285d5c58 100644 seg6_lookup_nexthop(skb, NULL, 0); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c -index ef0c7a7c18e23..637cd99bd7a64 100644 +index ef0c7a7c18e23..d4cdc2b1b4689 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -323,8 +323,6 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ip_tunnel_prl __u @@ -392097,7 +504462,114 @@ index ef0c7a7c18e23..637cd99bd7a64 100644 return ret; } -@@ -1933,7 +1931,6 @@ static int __net_init sit_init_net(struct net *net) +@@ -698,7 +696,7 @@ static int ipip6_rcv(struct sk_buff *skb) + skb->dev = tunnel->dev; + + if (packet_is_spoofed(skb, iph, tunnel)) { +- tunnel->dev->stats.rx_errors++; ++ DEV_STATS_INC(tunnel->dev, rx_errors); + goto out; + } + +@@ -718,8 +716,8 @@ static int ipip6_rcv(struct sk_buff *skb) + net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", + &iph->saddr, iph->tos); + if (err > 1) { +- ++tunnel->dev->stats.rx_frame_errors; +- ++tunnel->dev->stats.rx_errors; ++ DEV_STATS_INC(tunnel->dev, rx_frame_errors); ++ DEV_STATS_INC(tunnel->dev, rx_errors); + goto out; + } + } +@@ -950,7 +948,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, + if (!rt) { + rt = ip_route_output_flow(tunnel->net, &fl4, NULL); + if (IS_ERR(rt)) { +- dev->stats.tx_carrier_errors++; ++ DEV_STATS_INC(dev, tx_carrier_errors); + goto tx_error_icmp; + } + dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, fl4.saddr); +@@ -958,14 +956,14 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, + + if (rt->rt_type != RTN_UNICAST) { + ip_rt_put(rt); +- dev->stats.tx_carrier_errors++; ++ DEV_STATS_INC(dev, tx_carrier_errors); + goto tx_error_icmp; + } + tdev = rt->dst.dev; + + if (tdev == dev) { + ip_rt_put(rt); +- dev->stats.collisions++; ++ DEV_STATS_INC(dev, collisions); + goto tx_error; + } + +@@ -978,7 +976,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, + mtu = dst_mtu(&rt->dst) - t_hlen; + + if (mtu < IPV4_MIN_MTU) { +- dev->stats.collisions++; ++ DEV_STATS_INC(dev, collisions); + ip_rt_put(rt); + goto tx_error; + } +@@ -1017,7 +1015,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, + struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); + if (!new_skb) { + ip_rt_put(rt); +- dev->stats.tx_dropped++; ++ DEV_STATS_INC(dev, tx_dropped); + kfree_skb(skb); + return NETDEV_TX_OK; + } +@@ -1047,7 +1045,7 @@ tx_error_icmp: + dst_link_failure(skb); + tx_error: + kfree_skb(skb); +- dev->stats.tx_errors++; ++ DEV_STATS_INC(dev, tx_errors); + return NETDEV_TX_OK; + } + +@@ -1066,7 +1064,7 @@ static netdev_tx_t sit_tunnel_xmit__(struct sk_buff *skb, + return NETDEV_TX_OK; + tx_error: + kfree_skb(skb); +- dev->stats.tx_errors++; ++ DEV_STATS_INC(dev, tx_errors); + return NETDEV_TX_OK; + } + +@@ -1095,7 +1093,7 @@ static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb, + return NETDEV_TX_OK; + + tx_err: +- dev->stats.tx_errors++; ++ DEV_STATS_INC(dev, tx_errors); + kfree_skb(skb); + return NETDEV_TX_OK; + +@@ -1132,10 +1130,12 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) + + if (tdev && !netif_is_l3_master(tdev)) { + int t_hlen = tunnel->hlen + sizeof(struct iphdr); ++ int mtu; + +- dev->mtu = tdev->mtu - t_hlen; +- if (dev->mtu < IPV6_MIN_MTU) +- dev->mtu = IPV6_MIN_MTU; ++ mtu = tdev->mtu - t_hlen; ++ if (mtu < IPV6_MIN_MTU) ++ mtu = IPV6_MIN_MTU; ++ WRITE_ONCE(dev->mtu, mtu); + } + } + +@@ -1933,7 +1933,6 @@ static int __net_init sit_init_net(struct net *net) return 0; err_reg_dev: @@ -392130,7 +504602,7 @@ index e8cfb9e997bf0..12ae817aaf2ec 100644 goto out; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c -index b03dd02c9f13c..66d00368db828 100644 +index b03dd02c9f13c..3f331455f0202 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -107,9 +107,9 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) @@ -392146,7 +504618,24 @@ index b03dd02c9f13c..66d00368db828 100644 } } -@@ -542,7 +542,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, +@@ -269,6 +269,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, + fl6.flowi6_proto = IPPROTO_TCP; + fl6.daddr = sk->sk_v6_daddr; + fl6.saddr = saddr ? *saddr : np->saddr; ++ fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label); + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.flowi6_mark = sk->sk_mark; + fl6.fl6_dport = usin->sin6_port; +@@ -339,6 +340,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, + + late_failure: + tcp_set_state(sk, TCP_CLOSE); ++ if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) ++ inet_reset_saddr(sk); + failure: + inet->inet_dport = 0; + sk->sk_route_caps = 0; +@@ -542,7 +545,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, if (np->repflow && ireq->pktopts) fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); @@ -392155,7 +504644,7 @@ index b03dd02c9f13c..66d00368db828 100644 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | (np->tclass & INET_ECN_MASK) : np->tclass; -@@ -1001,7 +1001,10 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 +@@ -1001,7 +1004,10 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 * Underlying function will use this to retrieve the network * namespace */ @@ -392167,7 +504656,7 @@ index b03dd02c9f13c..66d00368db828 100644 if (!IS_ERR(dst)) { skb_dst_set(buff, dst); ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, -@@ -1364,7 +1367,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * +@@ -1364,7 +1370,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * /* Set ToS of the new socket based upon the value of incoming SYN. * ECT bits are set later in tcp_init_transfer(). */ @@ -392176,8 +504665,29 @@ index b03dd02c9f13c..66d00368db828 100644 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; /* Clone native IPv6 options from listening socket (if any) -@@ -1504,16 +1507,19 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) - opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); +@@ -1424,14 +1430,11 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * + + /* Clone pktoptions received with SYN, if we own the req */ + if (ireq->pktopts) { +- newnp->pktoptions = skb_clone(ireq->pktopts, +- sk_gfp_mask(sk, GFP_ATOMIC)); ++ newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk); + consume_skb(ireq->pktopts); + ireq->pktopts = NULL; +- if (newnp->pktoptions) { ++ if (newnp->pktoptions) + tcp_v6_restore_cb(newnp->pktoptions); +- skb_set_owner_r(newnp->pktoptions, newsk); +- } + } + } else { + if (!req_unhash && found_dup_sk) { +@@ -1501,19 +1504,22 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) + --ANK (980728) + */ + if (np->rxopt.all) +- opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); ++ opt_skb = skb_clone_and_charge_r(skb, sk); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ - struct dst_entry *dst = sk->sk_rx_dst; @@ -392200,7 +504710,24 @@ index b03dd02c9f13c..66d00368db828 100644 } } -@@ -1875,12 +1881,12 @@ INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb) +@@ -1582,7 +1588,6 @@ ipv6_pktoptions: + if (np->repflow) + np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); + if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { +- skb_set_owner_r(opt_skb, sk); + tcp_v6_restore_cb(opt_skb); + opt_skb = xchg(&np->pktoptions, opt_skb); + } else { +@@ -1848,7 +1853,7 @@ do_time_wait: + goto discard_it; + } + +-INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb) ++void tcp_v6_early_demux(struct sk_buff *skb) + { + const struct ipv6hdr *hdr; + const struct tcphdr *th; +@@ -1875,12 +1880,12 @@ INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_edemux; if (sk_fullsock(sk)) { @@ -392216,7 +504743,7 @@ index b03dd02c9f13c..66d00368db828 100644 skb_dst_set_noref(skb, dst); } } -@@ -2072,7 +2078,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) +@@ -2072,7 +2077,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) jiffies_to_clock_t(icsk->icsk_rto), jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), @@ -392225,8 +504752,22 @@ index b03dd02c9f13c..66d00368db828 100644 state == TCP_LISTEN ? fastopenq->max_qlen : (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) +@@ -2203,12 +2208,7 @@ struct proto tcpv6_prot = { + }; + EXPORT_SYMBOL_GPL(tcpv6_prot); + +-/* thinking of making this const? Don't. +- * early_demux can change based on sysctl. +- */ +-static struct inet6_protocol tcpv6_protocol = { +- .early_demux = tcp_v6_early_demux, +- .early_demux_handler = tcp_v6_early_demux, ++static const struct inet6_protocol tcpv6_protocol = { + .handler = tcp_v6_rcv, + .err_handler = tcp_v6_err, + .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c -index 8d785232b4796..19b6c4da0f42a 100644 +index 8d785232b4796..9dfb4bb54344b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -40,6 +40,7 @@ @@ -392295,6 +504836,15 @@ index 8d785232b4796..19b6c4da0f42a 100644 return sk; /* Only check first socket in chain */ break; +@@ -1042,7 +1046,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net, + return NULL; + } + +-INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb) ++void udp_v6_early_demux(struct sk_buff *skb) + { + struct net *net = dev_net(skb->dev); + const struct udphdr *uh; @@ -1070,10 +1074,10 @@ INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb) skb->sk = sk; @@ -392333,6 +504883,20 @@ index 8d785232b4796..19b6c4da0f42a 100644 fl6.daddr = *daddr; if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr)) fl6.saddr = np->saddr; +@@ -1655,12 +1659,7 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname, + return ipv6_getsockopt(sk, level, optname, optval, optlen); + } + +-/* thinking of making this const? Don't. +- * early_demux can change based on sysctl. +- */ +-static struct inet6_protocol udpv6_protocol = { +- .early_demux = udp_v6_early_demux, +- .early_demux_handler = udp_v6_early_demux, ++static const struct inet6_protocol udpv6_protocol = { + .handler = udpv6_rcv, + .err_handler = udpv6_err, + .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index d0d280077721b..ad07904642cad 100644 --- a/net/ipv6/xfrm6_output.c @@ -392367,11 +504931,224 @@ index d0d280077721b..ad07904642cad 100644 } else if (!skb->ignore_df && toobig && skb->sk) { xfrm_local_error(skb, mtu); kfree_skb(skb); +diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c +index af7a4b8b1e9c4..247296e3294bd 100644 +--- a/net/ipv6/xfrm6_policy.c ++++ b/net/ipv6/xfrm6_policy.c +@@ -289,9 +289,13 @@ int __init xfrm6_init(void) + if (ret) + goto out_state; + +- register_pernet_subsys(&xfrm6_net_ops); ++ ret = register_pernet_subsys(&xfrm6_net_ops); ++ if (ret) ++ goto out_protocol; + out: + return ret; ++out_protocol: ++ xfrm6_protocol_fini(); + out_state: + xfrm6_state_fini(); + out_policy: diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c -index 11a715d76a4f1..f780fbe82e7dc 100644 +index 11a715d76a4f1..9c60c0c18b4dd 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c -@@ -1411,12 +1411,6 @@ static int kcm_attach(struct socket *sock, struct socket *csock, +@@ -161,7 +161,8 @@ static void kcm_rcv_ready(struct kcm_sock *kcm) + /* Buffer limit is okay now, add to ready list */ + list_add_tail(&kcm->wait_rx_list, + &kcm->mux->kcm_rx_waiters); +- kcm->rx_wait = true; ++ /* paired with lockless reads in kcm_rfree() */ ++ WRITE_ONCE(kcm->rx_wait, true); + } + + static void kcm_rfree(struct sk_buff *skb) +@@ -177,7 +178,7 @@ static void kcm_rfree(struct sk_buff *skb) + /* For reading rx_wait and rx_psock without holding lock */ + smp_mb__after_atomic(); + +- if (!kcm->rx_wait && !kcm->rx_psock && ++ if (!READ_ONCE(kcm->rx_wait) && !READ_ONCE(kcm->rx_psock) && + sk_rmem_alloc_get(sk) < sk->sk_rcvlowat) { + spin_lock_bh(&mux->rx_lock); + kcm_rcv_ready(kcm); +@@ -220,7 +221,7 @@ static void requeue_rx_msgs(struct kcm_mux *mux, struct sk_buff_head *head) + struct sk_buff *skb; + struct kcm_sock *kcm; + +- while ((skb = __skb_dequeue(head))) { ++ while ((skb = skb_dequeue(head))) { + /* Reset destructor to avoid calling kcm_rcv_ready */ + skb->destructor = sock_rfree; + skb_orphan(skb); +@@ -236,7 +237,8 @@ try_again: + if (kcm_queue_rcv_skb(&kcm->sk, skb)) { + /* Should mean socket buffer full */ + list_del(&kcm->wait_rx_list); +- kcm->rx_wait = false; ++ /* paired with lockless reads in kcm_rfree() */ ++ WRITE_ONCE(kcm->rx_wait, false); + + /* Commit rx_wait to read in kcm_free */ + smp_wmb(); +@@ -279,10 +281,12 @@ static struct kcm_sock *reserve_rx_kcm(struct kcm_psock *psock, + kcm = list_first_entry(&mux->kcm_rx_waiters, + struct kcm_sock, wait_rx_list); + list_del(&kcm->wait_rx_list); +- kcm->rx_wait = false; ++ /* paired with lockless reads in kcm_rfree() */ ++ WRITE_ONCE(kcm->rx_wait, false); + + psock->rx_kcm = kcm; +- kcm->rx_psock = psock; ++ /* paired with lockless reads in kcm_rfree() */ ++ WRITE_ONCE(kcm->rx_psock, psock); + + spin_unlock_bh(&mux->rx_lock); + +@@ -309,7 +313,8 @@ static void unreserve_rx_kcm(struct kcm_psock *psock, + spin_lock_bh(&mux->rx_lock); + + psock->rx_kcm = NULL; +- kcm->rx_psock = NULL; ++ /* paired with lockless reads in kcm_rfree() */ ++ WRITE_ONCE(kcm->rx_psock, NULL); + + /* Commit kcm->rx_psock before sk_rmem_alloc_get to sync with + * kcm_rfree +@@ -833,7 +838,7 @@ static ssize_t kcm_sendpage(struct socket *sock, struct page *page, + } + + get_page(page); +- skb_fill_page_desc(skb, i, page, offset, size); ++ skb_fill_page_desc_noacc(skb, i, page, offset, size); + skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG; + + coalesced: +@@ -1079,53 +1084,18 @@ out_error: + return err; + } + +-static struct sk_buff *kcm_wait_data(struct sock *sk, int flags, +- long timeo, int *err) +-{ +- struct sk_buff *skb; +- +- while (!(skb = skb_peek(&sk->sk_receive_queue))) { +- if (sk->sk_err) { +- *err = sock_error(sk); +- return NULL; +- } +- +- if (sock_flag(sk, SOCK_DONE)) +- return NULL; +- +- if ((flags & MSG_DONTWAIT) || !timeo) { +- *err = -EAGAIN; +- return NULL; +- } +- +- sk_wait_data(sk, &timeo, NULL); +- +- /* Handle signals */ +- if (signal_pending(current)) { +- *err = sock_intr_errno(timeo); +- return NULL; +- } +- } +- +- return skb; +-} +- + static int kcm_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) + { ++ int noblock = flags & MSG_DONTWAIT; + struct sock *sk = sock->sk; + struct kcm_sock *kcm = kcm_sk(sk); + int err = 0; +- long timeo; + struct strp_msg *stm; + int copied = 0; + struct sk_buff *skb; + +- timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); +- +- lock_sock(sk); +- +- skb = kcm_wait_data(sk, flags, timeo, &err); ++ skb = skb_recv_datagram(sk, flags, noblock, &err); + if (!skb) + goto out; + +@@ -1156,14 +1126,11 @@ msg_finished: + /* Finished with message */ + msg->msg_flags |= MSG_EOR; + KCM_STATS_INCR(kcm->stats.rx_msgs); +- skb_unlink(skb, &sk->sk_receive_queue); +- kfree_skb(skb); + } + } + + out: +- release_sock(sk); +- ++ skb_free_datagram(sk, skb); + return copied ? : err; + } + +@@ -1171,9 +1138,9 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags) + { ++ int noblock = flags & MSG_DONTWAIT; + struct sock *sk = sock->sk; + struct kcm_sock *kcm = kcm_sk(sk); +- long timeo; + struct strp_msg *stm; + int err = 0; + ssize_t copied; +@@ -1181,11 +1148,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, + + /* Only support splice for SOCKSEQPACKET */ + +- timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); +- +- lock_sock(sk); +- +- skb = kcm_wait_data(sk, flags, timeo, &err); ++ skb = skb_recv_datagram(sk, flags, noblock, &err); + if (!skb) + goto err_out; + +@@ -1213,13 +1176,11 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, + * finish reading the message. + */ + +- release_sock(sk); +- ++ skb_free_datagram(sk, skb); + return copied; + + err_out: +- release_sock(sk); +- ++ skb_free_datagram(sk, skb); + return err; + } + +@@ -1239,7 +1200,8 @@ static void kcm_recv_disable(struct kcm_sock *kcm) + if (!kcm->rx_psock) { + if (kcm->rx_wait) { + list_del(&kcm->wait_rx_list); +- kcm->rx_wait = false; ++ /* paired with lockless reads in kcm_rfree() */ ++ WRITE_ONCE(kcm->rx_wait, false); + } + + requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue); +@@ -1411,12 +1373,6 @@ static int kcm_attach(struct socket *sock, struct socket *csock, psock->sk = csk; psock->bpf_prog = prog; @@ -392384,7 +505161,7 @@ index 11a715d76a4f1..f780fbe82e7dc 100644 write_lock_bh(&csk->sk_callback_lock); /* Check if sk_user_data is already by KCM or someone else. -@@ -1424,13 +1418,18 @@ static int kcm_attach(struct socket *sock, struct socket *csock, +@@ -1424,13 +1380,18 @@ static int kcm_attach(struct socket *sock, struct socket *csock, */ if (csk->sk_user_data) { write_unlock_bh(&csk->sk_callback_lock); @@ -392405,8 +505182,30 @@ index 11a715d76a4f1..f780fbe82e7dc 100644 psock->save_data_ready = csk->sk_data_ready; psock->save_write_space = csk->sk_write_space; psock->save_state_change = csk->sk_state_change; +@@ -1793,7 +1754,8 @@ static void kcm_done(struct kcm_sock *kcm) + + if (kcm->rx_wait) { + list_del(&kcm->wait_rx_list); +- kcm->rx_wait = false; ++ /* paired with lockless reads in kcm_rfree() */ ++ WRITE_ONCE(kcm->rx_wait, false); + } + /* Move any pending receive messages to other kcm sockets */ + requeue_rx_msgs(mux, &sk->sk_receive_queue); +@@ -1838,10 +1800,10 @@ static int kcm_release(struct socket *sock) + kcm = kcm_sk(sk); + mux = kcm->mux; + ++ lock_sock(sk); + sock_orphan(sk); + kfree_skb(kcm->seq_skb); + +- lock_sock(sk); + /* Purge queue under lock to avoid race condition with tx_work trying + * to act when queue is nonempty. If tx_work runs after this point + * it will just return. diff --git a/net/key/af_key.c b/net/key/af_key.c -index de24a7d474dfd..53cca90191586 100644 +index de24a7d474dfd..1d6ae1df3886b 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1697,9 +1697,12 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sad @@ -392443,33 +505242,360 @@ index de24a7d474dfd..53cca90191586 100644 pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, BROADCAST_PROMISC_ONLY, NULL, sock_net(sk)); -@@ -2898,7 +2905,7 @@ static int count_ah_combs(const struct xfrm_tmpl *t) - break; - if (!aalg->pfkey_supported) - continue; -- if (aalg_tmpl_set(t, aalg)) -+ if (aalg_tmpl_set(t, aalg) && aalg->available) - sz += sizeof(struct sadb_comb); - } +@@ -2934,9 +2941,10 @@ static int count_esp_combs(const struct xfrm_tmpl *t) return sz + sizeof(struct sadb_prop); -@@ -2916,7 +2923,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t) - if (!ealg->pfkey_supported) - continue; + } -- if (!(ealg_tmpl_set(t, ealg))) -+ if (!(ealg_tmpl_set(t, ealg) && ealg->available)) - continue; +-static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) ++static int dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) + { + struct sadb_prop *p; ++ int sz = 0; + int i; - for (k = 1; ; k++) { -@@ -2927,7 +2934,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t) - if (!aalg->pfkey_supported) - continue; + p = skb_put(skb, sizeof(struct sadb_prop)); +@@ -2964,13 +2972,17 @@ static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) + c->sadb_comb_soft_addtime = 20*60*60; + c->sadb_comb_hard_usetime = 8*60*60; + c->sadb_comb_soft_usetime = 7*60*60; ++ sz += sizeof(*c); + } + } ++ ++ return sz + sizeof(*p); + } -- if (aalg_tmpl_set(t, aalg)) -+ if (aalg_tmpl_set(t, aalg) && aalg->available) - sz += sizeof(struct sadb_comb); +-static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) ++static int dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) + { + struct sadb_prop *p; ++ int sz = 0; + int i, k; + + p = skb_put(skb, sizeof(struct sadb_prop)); +@@ -3012,8 +3024,11 @@ static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) + c->sadb_comb_soft_addtime = 20*60*60; + c->sadb_comb_hard_usetime = 8*60*60; + c->sadb_comb_soft_usetime = 7*60*60; ++ sz += sizeof(*c); } } ++ ++ return sz + sizeof(*p); + } + + static int key_notify_policy_expire(struct xfrm_policy *xp, const struct km_event *c) +@@ -3143,6 +3158,7 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct + struct sadb_x_sec_ctx *sec_ctx; + struct xfrm_sec_ctx *xfrm_ctx; + int ctx_size = 0; ++ int alg_size = 0; + + sockaddr_size = pfkey_sockaddr_size(x->props.family); + if (!sockaddr_size) +@@ -3154,16 +3170,16 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct + sizeof(struct sadb_x_policy); + + if (x->id.proto == IPPROTO_AH) +- size += count_ah_combs(t); ++ alg_size = count_ah_combs(t); + else if (x->id.proto == IPPROTO_ESP) +- size += count_esp_combs(t); ++ alg_size = count_esp_combs(t); + + if ((xfrm_ctx = x->security)) { + ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len); + size += sizeof(struct sadb_x_sec_ctx) + ctx_size; + } + +- skb = alloc_skb(size + 16, GFP_ATOMIC); ++ skb = alloc_skb(size + alg_size + 16, GFP_ATOMIC); + if (skb == NULL) + return -ENOMEM; + +@@ -3217,10 +3233,13 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct + pol->sadb_x_policy_priority = xp->priority; + + /* Set sadb_comb's. */ ++ alg_size = 0; + if (x->id.proto == IPPROTO_AH) +- dump_ah_combs(skb, t); ++ alg_size = dump_ah_combs(skb, t); + else if (x->id.proto == IPPROTO_ESP) +- dump_esp_combs(skb, t); ++ alg_size = dump_esp_combs(skb, t); ++ ++ hdr->sadb_msg_len += alg_size / 8; + + /* security context */ + if (xfrm_ctx) { +@@ -3375,7 +3394,7 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, + hdr->sadb_msg_len = size / sizeof(uint64_t); + hdr->sadb_msg_errno = 0; + hdr->sadb_msg_reserved = 0; +- hdr->sadb_msg_seq = x->km.seq = get_acqseq(); ++ hdr->sadb_msg_seq = x->km.seq; + hdr->sadb_msg_pid = 0; + + /* SA */ +diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c +index 93271a2632b8e..a2b13e213e06f 100644 +--- a/net/l2tp/l2tp_core.c ++++ b/net/l2tp/l2tp_core.c +@@ -104,9 +104,9 @@ static struct workqueue_struct *l2tp_wq; + /* per-net private data for this module */ + static unsigned int l2tp_net_id; + struct l2tp_net { +- struct list_head l2tp_tunnel_list; +- /* Lock for write access to l2tp_tunnel_list */ +- spinlock_t l2tp_tunnel_list_lock; ++ /* Lock for write access to l2tp_tunnel_idr */ ++ spinlock_t l2tp_tunnel_idr_lock; ++ struct idr l2tp_tunnel_idr; + struct hlist_head l2tp_session_hlist[L2TP_HASH_SIZE_2]; + /* Lock for write access to l2tp_session_hlist */ + spinlock_t l2tp_session_hlist_lock; +@@ -208,13 +208,10 @@ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id) + struct l2tp_tunnel *tunnel; + + rcu_read_lock_bh(); +- list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { +- if (tunnel->tunnel_id == tunnel_id && +- refcount_inc_not_zero(&tunnel->ref_count)) { +- rcu_read_unlock_bh(); +- +- return tunnel; +- } ++ tunnel = idr_find(&pn->l2tp_tunnel_idr, tunnel_id); ++ if (tunnel && refcount_inc_not_zero(&tunnel->ref_count)) { ++ rcu_read_unlock_bh(); ++ return tunnel; + } + rcu_read_unlock_bh(); + +@@ -224,13 +221,14 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_get); + + struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth) + { +- const struct l2tp_net *pn = l2tp_pernet(net); ++ struct l2tp_net *pn = l2tp_pernet(net); ++ unsigned long tunnel_id, tmp; + struct l2tp_tunnel *tunnel; + int count = 0; + + rcu_read_lock_bh(); +- list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { +- if (++count > nth && ++ idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) { ++ if (tunnel && ++count > nth && + refcount_inc_not_zero(&tunnel->ref_count)) { + rcu_read_unlock_bh(); + return tunnel; +@@ -1043,7 +1041,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, uns + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED); + nf_reset_ct(skb); + +- bh_lock_sock(sk); ++ bh_lock_sock_nested(sk); + if (sock_owned_by_user(sk)) { + kfree_skb(skb); + ret = NET_XMIT_DROP; +@@ -1150,8 +1148,10 @@ static void l2tp_tunnel_destruct(struct sock *sk) + } + + /* Remove hooks into tunnel socket */ ++ write_lock_bh(&sk->sk_callback_lock); + sk->sk_destruct = tunnel->old_sk_destruct; + sk->sk_user_data = NULL; ++ write_unlock_bh(&sk->sk_callback_lock); + + /* Call the original destructor */ + if (sk->sk_destruct) +@@ -1227,6 +1227,15 @@ static void l2tp_udp_encap_destroy(struct sock *sk) + l2tp_tunnel_delete(tunnel); + } + ++static void l2tp_tunnel_remove(struct net *net, struct l2tp_tunnel *tunnel) ++{ ++ struct l2tp_net *pn = l2tp_pernet(net); ++ ++ spin_lock_bh(&pn->l2tp_tunnel_idr_lock); ++ idr_remove(&pn->l2tp_tunnel_idr, tunnel->tunnel_id); ++ spin_unlock_bh(&pn->l2tp_tunnel_idr_lock); ++} ++ + /* Workqueue tunnel deletion function */ + static void l2tp_tunnel_del_work(struct work_struct *work) + { +@@ -1234,7 +1243,6 @@ static void l2tp_tunnel_del_work(struct work_struct *work) + del_work); + struct sock *sk = tunnel->sock; + struct socket *sock = sk->sk_socket; +- struct l2tp_net *pn; + + l2tp_tunnel_closeall(tunnel); + +@@ -1248,12 +1256,7 @@ static void l2tp_tunnel_del_work(struct work_struct *work) + } + } + +- /* Remove the tunnel struct from the tunnel list */ +- pn = l2tp_pernet(tunnel->l2tp_net); +- spin_lock_bh(&pn->l2tp_tunnel_list_lock); +- list_del_rcu(&tunnel->list); +- spin_unlock_bh(&pn->l2tp_tunnel_list_lock); +- ++ l2tp_tunnel_remove(tunnel->l2tp_net, tunnel); + /* drop initial ref */ + l2tp_tunnel_dec_refcount(tunnel); + +@@ -1384,8 +1387,6 @@ out: + return err; + } + +-static struct lock_class_key l2tp_socket_class; +- + int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, + struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp) + { +@@ -1455,12 +1456,19 @@ static int l2tp_validate_socket(const struct sock *sk, const struct net *net, + int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, + struct l2tp_tunnel_cfg *cfg) + { +- struct l2tp_tunnel *tunnel_walk; +- struct l2tp_net *pn; ++ struct l2tp_net *pn = l2tp_pernet(net); ++ u32 tunnel_id = tunnel->tunnel_id; + struct socket *sock; + struct sock *sk; + int ret; + ++ spin_lock_bh(&pn->l2tp_tunnel_idr_lock); ++ ret = idr_alloc_u32(&pn->l2tp_tunnel_idr, NULL, &tunnel_id, tunnel_id, ++ GFP_ATOMIC); ++ spin_unlock_bh(&pn->l2tp_tunnel_idr_lock); ++ if (ret) ++ return ret == -ENOSPC ? -EEXIST : ret; ++ + if (tunnel->fd < 0) { + ret = l2tp_tunnel_sock_create(net, tunnel->tunnel_id, + tunnel->peer_tunnel_id, cfg, +@@ -1471,30 +1479,16 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, + sock = sockfd_lookup(tunnel->fd, &ret); + if (!sock) + goto err; +- +- ret = l2tp_validate_socket(sock->sk, net, tunnel->encap); +- if (ret < 0) +- goto err_sock; + } + +- tunnel->l2tp_net = net; +- pn = l2tp_pernet(net); +- + sk = sock->sk; +- sock_hold(sk); +- tunnel->sock = sk; +- +- spin_lock_bh(&pn->l2tp_tunnel_list_lock); +- list_for_each_entry(tunnel_walk, &pn->l2tp_tunnel_list, list) { +- if (tunnel_walk->tunnel_id == tunnel->tunnel_id) { +- spin_unlock_bh(&pn->l2tp_tunnel_list_lock); +- sock_put(sk); +- ret = -EEXIST; +- goto err_sock; +- } +- } +- list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list); +- spin_unlock_bh(&pn->l2tp_tunnel_list_lock); ++ lock_sock(sk); ++ write_lock_bh(&sk->sk_callback_lock); ++ ret = l2tp_validate_socket(sk, net, tunnel->encap); ++ if (ret < 0) ++ goto err_inval_sock; ++ rcu_assign_sk_user_data(sk, tunnel); ++ write_unlock_bh(&sk->sk_callback_lock); + + if (tunnel->encap == L2TP_ENCAPTYPE_UDP) { + struct udp_tunnel_sock_cfg udp_cfg = { +@@ -1505,15 +1499,20 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, + }; + + setup_udp_tunnel_sock(net, sock, &udp_cfg); +- } else { +- sk->sk_user_data = tunnel; + } + + tunnel->old_sk_destruct = sk->sk_destruct; + sk->sk_destruct = &l2tp_tunnel_destruct; +- lockdep_set_class_and_name(&sk->sk_lock.slock, &l2tp_socket_class, +- "l2tp_sock"); + sk->sk_allocation = GFP_ATOMIC; ++ release_sock(sk); ++ ++ sock_hold(sk); ++ tunnel->sock = sk; ++ tunnel->l2tp_net = net; ++ ++ spin_lock_bh(&pn->l2tp_tunnel_idr_lock); ++ idr_replace(&pn->l2tp_tunnel_idr, tunnel, tunnel->tunnel_id); ++ spin_unlock_bh(&pn->l2tp_tunnel_idr_lock); + + trace_register_tunnel(tunnel); + +@@ -1522,12 +1521,16 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, + + return 0; + +-err_sock: ++err_inval_sock: ++ write_unlock_bh(&sk->sk_callback_lock); ++ release_sock(sk); ++ + if (tunnel->fd < 0) + sock_release(sock); + else + sockfd_put(sock); + err: ++ l2tp_tunnel_remove(net, tunnel); + return ret; + } + EXPORT_SYMBOL_GPL(l2tp_tunnel_register); +@@ -1641,8 +1644,8 @@ static __net_init int l2tp_init_net(struct net *net) + struct l2tp_net *pn = net_generic(net, l2tp_net_id); + int hash; + +- INIT_LIST_HEAD(&pn->l2tp_tunnel_list); +- spin_lock_init(&pn->l2tp_tunnel_list_lock); ++ idr_init(&pn->l2tp_tunnel_idr); ++ spin_lock_init(&pn->l2tp_tunnel_idr_lock); + + for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) + INIT_HLIST_HEAD(&pn->l2tp_session_hlist[hash]); +@@ -1656,11 +1659,13 @@ static __net_exit void l2tp_exit_net(struct net *net) + { + struct l2tp_net *pn = l2tp_pernet(net); + struct l2tp_tunnel *tunnel = NULL; ++ unsigned long tunnel_id, tmp; + int hash; + + rcu_read_lock_bh(); +- list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { +- l2tp_tunnel_delete(tunnel); ++ idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) { ++ if (tunnel) ++ l2tp_tunnel_delete(tunnel); + } + rcu_read_unlock_bh(); + +@@ -1670,6 +1675,7 @@ static __net_exit void l2tp_exit_net(struct net *net) + + for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) + WARN_ON_ONCE(!hlist_empty(&pn->l2tp_session_hlist[hash])); ++ idr_destroy(&pn->l2tp_tunnel_idr); + } + + static struct pernet_operations l2tp_net_ops = { diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 96f975777438f..d54dbd01d86f1 100644 --- a/net/l2tp/l2tp_ip6.c @@ -392690,7 +505816,7 @@ index cce28e3b22323..0d2bab9d351c6 100644 void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c -index 430a585875388..1deb3d874a4b9 100644 +index 430a585875388..a4d3fa14f76b7 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -9,7 +9,7 @@ @@ -392738,15 +505864,37 @@ index 430a585875388..1deb3d874a4b9 100644 buf_size, tid_tx->timeout); WARN_ON(test_and_set_bit(HT_AGG_STATE_SENT_ADDBA, &tid_tx->state)); -@@ -523,6 +523,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) +@@ -491,7 +491,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) + { + struct tid_ampdu_tx *tid_tx; + struct ieee80211_local *local = sta->local; +- struct ieee80211_sub_if_data *sdata = sta->sdata; ++ struct ieee80211_sub_if_data *sdata; + struct ieee80211_ampdu_params params = { + .sta = &sta->sta, + .action = IEEE80211_AMPDU_TX_START, +@@ -521,8 +521,10 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) + */ + synchronize_net(); ++ sdata = sta->sdata; params.ssn = sta->tid_seq[tid] >> 4; ret = drv_ampdu_action(local, sdata, ¶ms); + tid_tx->ssn = params.ssn; if (ret == IEEE80211_AMPDU_TX_START_DELAY_ADDBA) { return; } else if (ret == IEEE80211_AMPDU_TX_START_IMMEDIATE) { -@@ -625,6 +626,14 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, +@@ -533,6 +535,9 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) + */ + set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state); + } else if (ret) { ++ if (!sdata) ++ return; ++ + ht_dbg(sdata, + "BA request denied - HW unavailable for %pM tid %d\n", + sta->sta.addr, tid); +@@ -625,6 +630,14 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, return -EINVAL; } @@ -392761,7 +505909,7 @@ index 430a585875388..1deb3d874a4b9 100644 /* * 802.11n-2009 11.5.1.1: If the initiating STA is an HT STA, is a * member of an IBSS, and has no other existing Block Ack agreement -@@ -889,6 +898,7 @@ void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid, +@@ -889,6 +902,7 @@ void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid, { struct ieee80211_sub_if_data *sdata = sta->sdata; bool send_delba = false; @@ -392769,7 +505917,7 @@ index 430a585875388..1deb3d874a4b9 100644 ht_dbg(sdata, "Stopping Tx BA session for %pM tid %d\n", sta->sta.addr, tid); -@@ -906,10 +916,14 @@ void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid, +@@ -906,10 +920,14 @@ void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid, send_delba = true; ieee80211_remove_tid_tx(sta, tid); @@ -392784,6 +505932,20 @@ index 430a585875388..1deb3d874a4b9 100644 if (send_delba) ieee80211_send_delba(sdata, sta->sta.addr, tid, WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); +diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c +index 26d2f8ba70297..758ef63669e7b 100644 +--- a/net/mac80211/airtime.c ++++ b/net/mac80211/airtime.c +@@ -457,6 +457,9 @@ static u32 ieee80211_get_rate_duration(struct ieee80211_hw *hw, + (status->encoding == RX_ENC_HE && streams > 8))) + return 0; + ++ if (idx >= MCS_GROUP_RATES) ++ return 0; ++ + duration = airtime_mcs_groups[group].duration[idx]; + duration <<= airtime_mcs_groups[group].shift; + *overhead = 36 + (streams << 2); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index d69b31c20fe28..4fa216a108ae8 100644 --- a/net/mac80211/cfg.c @@ -392891,6 +506053,20 @@ index 76fc36a68750e..63e15f583e0a6 100644 } /* +diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c +index 48322e45e7ddb..120bd9cdf7dfa 100644 +--- a/net/mac80211/driver-ops.c ++++ b/net/mac80211/driver-ops.c +@@ -331,6 +331,9 @@ int drv_ampdu_action(struct ieee80211_local *local, + + might_sleep(); + ++ if (!sdata) ++ return -EIO; ++ + sdata = get_bss_sdata(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index cd3731cbf6c68..c336267f4599c 100644 --- a/net/mac80211/driver-ops.h @@ -393213,7 +506389,7 @@ index fb3aaa3c56069..b71a1428d883c 100644 #endif } diff --git a/net/mac80211/main.c b/net/mac80211/main.c -index 45fb517591ee9..5311c3cd3050d 100644 +index 45fb517591ee9..9617ff8e27147 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1131,17 +1131,14 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) @@ -393239,6 +506415,30 @@ index 45fb517591ee9..5311c3cd3050d 100644 if (!local->ops->hw_scan) { /* For hw_scan, driver needs to set these up. */ +@@ -1360,8 +1357,10 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) + ieee80211_led_exit(local); + destroy_workqueue(local->workqueue); + fail_workqueue: +- if (local->wiphy_ciphers_allocated) ++ if (local->wiphy_ciphers_allocated) { + kfree(local->hw.wiphy->cipher_suites); ++ local->wiphy_ciphers_allocated = false; ++ } + kfree(local->int_scan_req); + return result; + } +@@ -1429,8 +1428,10 @@ void ieee80211_free_hw(struct ieee80211_hw *hw) + mutex_destroy(&local->iflist_mtx); + mutex_destroy(&local->mtx); + +- if (local->wiphy_ciphers_allocated) ++ if (local->wiphy_ciphers_allocated) { + kfree(local->hw.wiphy->cipher_suites); ++ local->wiphy_ciphers_allocated = false; ++ } + + idr_for_each(&local->ack_status_frames, + ieee80211_free_ack_frame, NULL); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 5dcfd53a4ab6c..6847fdf934392 100644 --- a/net/mac80211/mesh.c @@ -393556,7 +506756,7 @@ index a05b615deb517..44a6fdb6efbd4 100644 /** diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c -index 7cab1cf09bf1a..acc1c299f1ae5 100644 +index 7cab1cf09bf1a..69d5e1ec6edef 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -47,32 +47,24 @@ static void mesh_path_rht_free(void *ptr, void *tblptr) @@ -393719,6 +506919,15 @@ index 7cab1cf09bf1a..acc1c299f1ae5 100644 rcu_read_lock(); hlist_for_each_entry_rcu(gate, &tbl->known_gates, gate_list) { +@@ -718,7 +710,7 @@ int mesh_path_send_to_gates(struct mesh_path *mpath) + void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) + { +- kfree_skb(skb); ++ ieee80211_free_txskb(&sdata->local->hw, skb); + sdata->u.mesh.mshstats.dropped_frames_no_route++; + } + @@ -762,29 +754,10 @@ void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop) mesh_path_tx_pending(mpath); } @@ -394689,10 +507898,10 @@ index c4071b015c188..175ead6b19cb4 100644 if (status->flag & RX_FLAG_8023) __ieee80211_rx_handle_8023(hw, pubsta, skb, list); diff --git a/net/mac80211/s1g.c b/net/mac80211/s1g.c -index 7e35ab5b61664..4141bc80cdfd6 100644 +index 7e35ab5b61664..10b34bc4b67d4 100644 --- a/net/mac80211/s1g.c +++ b/net/mac80211/s1g.c -@@ -104,9 +104,11 @@ ieee80211_s1g_rx_twt_setup(struct ieee80211_sub_if_data *sdata, +@@ -104,12 +104,17 @@ ieee80211_s1g_rx_twt_setup(struct ieee80211_sub_if_data *sdata, /* broadcast TWT not supported yet */ if (twt->control & IEEE80211_TWT_CONTROL_NEG_TYPE_BROADCAST) { @@ -394707,6 +507916,12 @@ index 7e35ab5b61664..4141bc80cdfd6 100644 goto out; } ++ /* TWT Information not supported yet */ ++ twt->control |= IEEE80211_TWT_CONTROL_RX_DISABLED; ++ + drv_add_twt_setup(sdata->local, sdata, &sta->sta, twt); + out: + ieee80211_s1g_send_twt_setup(sdata, mgmt->sa, sdata->vif.addr, twt); diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 6b50cb5e0e3cc..e692a2487eb5d 100644 --- a/net/mac80211/scan.c @@ -395483,8 +508698,20 @@ index 9ea6004abe1be..d50480b317505 100644 skb->priority = 0; /* required for correct WPA/11i MIC */ return IEEE80211_AC_BE; } +diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c +index 323d3d2d986f8..3e510664fc891 100644 +--- a/net/mac802154/iface.c ++++ b/net/mac802154/iface.c +@@ -661,6 +661,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name, + sdata->dev = ndev; + sdata->wpan_dev.wpan_phy = local->hw.phy; + sdata->local = local; ++ INIT_LIST_HEAD(&sdata->wpan_dev.list); + + /* setup type-dependent data */ + ret = ieee802154_setup_sdata(sdata, type); diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c -index b8ce84618a55b..c439125ef2b91 100644 +index b8ce84618a55b..726b47a4611b5 100644 --- a/net/mac802154/rx.c +++ b/net/mac802154/rx.c @@ -44,7 +44,7 @@ ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata, @@ -395496,8 +508723,28 @@ index b8ce84618a55b..c439125ef2b91 100644 /* FIXME: check if we are PAN coordinator */ skb->pkt_type = PACKET_OTHERHOST; else +@@ -132,7 +132,7 @@ static int + ieee802154_parse_frame_start(struct sk_buff *skb, struct ieee802154_hdr *hdr) + { + int hlen; +- struct ieee802154_mac_cb *cb = mac_cb_init(skb); ++ struct ieee802154_mac_cb *cb = mac_cb(skb); + + skb_reset_mac_header(skb); + +@@ -294,8 +294,9 @@ void + ieee802154_rx_irqsafe(struct ieee802154_hw *hw, struct sk_buff *skb, u8 lqi) + { + struct ieee802154_local *local = hw_to_local(hw); ++ struct ieee802154_mac_cb *cb = mac_cb_init(skb); + +- mac_cb(skb)->lqi = lqi; ++ cb->lqi = lqi; + skb->pkt_type = IEEE802154_RX_MSG; + skb_queue_tail(&local->skb_queue, skb); + tasklet_schedule(&local->tasklet); diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c -index a9526ac29dffe..85cc1a28cbe9f 100644 +index a9526ac29dffe..77137a8627d06 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -30,6 +30,12 @@ static int mctp_release(struct socket *sock) @@ -395546,11 +508793,73 @@ index a9526ac29dffe..85cc1a28cbe9f 100644 msg->msg_namelen = sizeof(*addr); } +@@ -275,11 +288,17 @@ static void mctp_sk_unhash(struct sock *sk) + + kfree_rcu(key, rcu); + } ++ sock_set_flag(sk, SOCK_DEAD); + spin_unlock_irqrestore(&net->mctp.keys_lock, flags); + + synchronize_rcu(); + } + ++static void mctp_sk_destruct(struct sock *sk) ++{ ++ skb_queue_purge(&sk->sk_receive_queue); ++} ++ + static struct proto mctp_proto = { + .name = "MCTP", + .owner = THIS_MODULE, +@@ -316,6 +335,7 @@ static int mctp_pf_create(struct net *net, struct socket *sock, + return -ENOMEM; + + sock_init_data(sock, sk); ++ sk->sk_destruct = mctp_sk_destruct; + + rc = 0; + if (sk->sk_prot->init) +@@ -362,12 +382,14 @@ static __init int mctp_init(void) + + rc = mctp_neigh_init(); + if (rc) +- goto err_unreg_proto; ++ goto err_unreg_routes; + + mctp_device_init(); + + return 0; + ++err_unreg_routes: ++ mctp_routes_exit(); + err_unreg_proto: + proto_unregister(&mctp_proto); + err_unreg_sock: diff --git a/net/mctp/route.c b/net/mctp/route.c -index 5ca186d53cb0f..bbb13dbc9227d 100644 +index 5ca186d53cb0f..89e67399249b4 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c -@@ -396,7 +396,7 @@ static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb) +@@ -135,6 +135,11 @@ static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk) + + spin_lock_irqsave(&net->mctp.keys_lock, flags); + ++ if (sock_flag(&msk->sk, SOCK_DEAD)) { ++ rc = -EINVAL; ++ goto out_unlock; ++ } ++ + hlist_for_each_entry(tmp, &net->mctp.keys, hlist) { + if (mctp_key_match(tmp, key->local_addr, key->peer_addr, + key->tag)) { +@@ -148,6 +153,7 @@ static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk) + hlist_add_head(&key->sklist, &msk->keys); + } + ++out_unlock: + spin_unlock_irqrestore(&net->mctp.keys_lock, flags); + + return rc; +@@ -396,7 +402,7 @@ static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb) rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol), daddr, skb->dev->dev_addr, skb->len); @@ -395559,7 +508868,7 @@ index 5ca186d53cb0f..bbb13dbc9227d 100644 kfree_skb(skb); return -EHOSTUNREACH; } -@@ -760,7 +760,7 @@ static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start, +@@ -760,7 +766,7 @@ static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start, } static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start, @@ -395568,7 +508877,7 @@ index 5ca186d53cb0f..bbb13dbc9227d 100644 { struct net *net = dev_net(mdev->dev); struct mctp_route *rt, *tmp; -@@ -777,7 +777,8 @@ static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start, +@@ -777,7 +783,8 @@ static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start, list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) { if (rt->dev == mdev && @@ -395578,7 +508887,7 @@ index 5ca186d53cb0f..bbb13dbc9227d 100644 list_del_rcu(&rt->list); /* TODO: immediate RTM_DELROUTE */ mctp_route_release(rt); -@@ -795,7 +796,7 @@ int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr) +@@ -795,7 +802,7 @@ int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr) int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr) { @@ -395587,7 +508896,7 @@ index 5ca186d53cb0f..bbb13dbc9227d 100644 } /* removes all entries for a given device */ -@@ -975,7 +976,7 @@ static int mctp_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, +@@ -975,7 +982,7 @@ static int mctp_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, if (rtm->rtm_type != RTN_UNICAST) return -EINVAL; @@ -395596,8 +508905,17 @@ index 5ca186d53cb0f..bbb13dbc9227d 100644 return rc; } +@@ -1108,7 +1115,7 @@ int __init mctp_routes_init(void) + return register_pernet_subsys(&mctp_net_ops); + } + +-void __exit mctp_routes_exit(void) ++void mctp_routes_exit(void) + { + unregister_pernet_subsys(&mctp_net_ops); + rtnl_unregister(PF_MCTP, RTM_DELROUTE); diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c -index ffeb2df8be7ae..58a7075084d17 100644 +index ffeb2df8be7ae..e69bed96811b5 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1079,9 +1079,9 @@ static void mpls_get_stats(struct mpls_dev *mdev, @@ -395612,7 +508930,25 @@ index ffeb2df8be7ae..58a7075084d17 100644 stats->rx_packets += local.rx_packets; stats->rx_bytes += local.rx_bytes; -@@ -1491,22 +1491,52 @@ static void mpls_dev_destroy_rcu(struct rcu_head *head) +@@ -1428,6 +1428,7 @@ static int mpls_dev_sysctl_register(struct net_device *dev, + free: + kfree(table); + out: ++ mdev->sysctl = NULL; + return -ENOBUFS; + } + +@@ -1437,6 +1438,9 @@ static void mpls_dev_sysctl_unregister(struct net_device *dev, + struct net *net = dev_net(dev); + struct ctl_table *table; + ++ if (!mdev->sysctl) ++ return; ++ + table = mdev->sysctl->ctl_table_arg; + unregister_net_sysctl_table(mdev->sysctl); + kfree(table); +@@ -1491,22 +1495,52 @@ static void mpls_dev_destroy_rcu(struct rcu_head *head) kfree(mdev); } @@ -395669,7 +509005,7 @@ index ffeb2df8be7ae..58a7075084d17 100644 change_nexthops(rt) { unsigned int nh_flags = nh->nh_flags; -@@ -1530,16 +1560,15 @@ static void mpls_ifdown(struct net_device *dev, int event) +@@ -1530,16 +1564,15 @@ static void mpls_ifdown(struct net_device *dev, int event) next: if (!(nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))) alive++; @@ -395690,7 +509026,7 @@ index ffeb2df8be7ae..58a7075084d17 100644 } static void mpls_ifup(struct net_device *dev, unsigned int flags) -@@ -1597,8 +1626,12 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, +@@ -1597,8 +1630,12 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, return NOTIFY_OK; switch (event) { @@ -395704,7 +509040,7 @@ index ffeb2df8be7ae..58a7075084d17 100644 break; case NETDEV_UP: flags = dev_get_flags(dev); -@@ -1609,13 +1642,18 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, +@@ -1609,13 +1646,18 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, break; case NETDEV_CHANGE: flags = dev_get_flags(dev); @@ -395727,7 +509063,7 @@ index ffeb2df8be7ae..58a7075084d17 100644 mdev = mpls_dev_get(dev); if (mdev) { mpls_dev_sysctl_unregister(dev, mdev); -@@ -1626,8 +1664,6 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, +@@ -1626,8 +1668,6 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, case NETDEV_CHANGENAME: mdev = mpls_dev_get(dev); if (mdev) { @@ -395974,7 +509310,7 @@ index 6ab386ff32944..d9790d6fbce9c 100644 } diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c -index 050eea231528b..2b1b40199c617 100644 +index 050eea231528b..3a1e8f2388665 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -459,6 +459,18 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm @@ -396096,15 +509432,37 @@ index 050eea231528b..2b1b40199c617 100644 } } -@@ -857,6 +889,7 @@ out: +@@ -857,9 +889,10 @@ out: static int mptcp_pm_nl_create_listen_socket(struct sock *sk, struct mptcp_pm_addr_entry *entry) { + int addrlen = sizeof(struct sockaddr_in); struct sockaddr_storage addr; - struct mptcp_sock *msk; +- struct mptcp_sock *msk; struct socket *ssock; -@@ -881,8 +914,11 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, ++ struct sock *newsk; + int backlog = 1024; + int err; + +@@ -868,21 +901,26 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, + if (err) + return err; + +- msk = mptcp_sk(entry->lsk->sk); +- if (!msk) { ++ newsk = entry->lsk->sk; ++ if (!newsk) { + err = -EINVAL; + goto out; + } + +- ssock = __mptcp_nmpc_socket(msk); ++ lock_sock(newsk); ++ ssock = __mptcp_nmpc_socket(mptcp_sk(newsk)); ++ release_sock(newsk); + if (!ssock) { + err = -EINVAL; + goto out; } mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family); @@ -396118,7 +509476,7 @@ index 050eea231528b..2b1b40199c617 100644 if (err) { pr_warn("kernel_bind error, err=%d", err); goto out; -@@ -1716,17 +1752,21 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) +@@ -1716,17 +1754,21 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP) bkup = 1; @@ -396150,7 +509508,7 @@ index 050eea231528b..2b1b40199c617 100644 } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c -index d073b21113828..47f359dac247b 100644 +index d073b21113828..5d05d85242bc6 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -406,9 +406,12 @@ static bool mptcp_pending_data_fin(struct sock *sk, u64 *seq) @@ -396520,7 +509878,35 @@ index d073b21113828..47f359dac247b 100644 return 0; } -@@ -2881,7 +2922,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, +@@ -2685,6 +2726,7 @@ static void mptcp_close(struct sock *sk, long timeout) + { + struct mptcp_subflow_context *subflow; + bool do_cancel_work = false; ++ int subflows_alive = 0; + + lock_sock(sk); + sk->sk_shutdown = SHUTDOWN_MASK; +@@ -2706,11 +2748,19 @@ cleanup: + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + bool slow = lock_sock_fast_nested(ssk); + ++ subflows_alive += ssk->sk_state != TCP_CLOSE; ++ + sock_orphan(ssk); + unlock_sock_fast(ssk, slow); + } + sock_orphan(sk); + ++ /* all the subflows are closed, only timeout can change the msk ++ * state, let's not keep resources busy for no reasons ++ */ ++ if (subflows_alive == 0) ++ inet_sk_state_store(sk, TCP_CLOSE); ++ + sock_hold(sk); + pr_debug("msk=%p state=%d", sk, sk->sk_state); + if (sk->sk_state == TCP_CLOSE) { +@@ -2881,7 +2931,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, */ if (WARN_ON_ONCE(!new_mptcp_sock)) { tcp_sk(newsk)->is_mptcp = 0; @@ -396529,7 +509915,7 @@ index d073b21113828..47f359dac247b 100644 } /* acquire the 2nd reference for the owning socket */ -@@ -2893,6 +2934,8 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, +@@ -2893,6 +2943,8 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); } @@ -396538,7 +509924,7 @@ index d073b21113828..47f359dac247b 100644 return newsk; } -@@ -2940,7 +2983,7 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk) +@@ -2940,7 +2992,7 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk) if (xmit_ssk == ssk) __mptcp_subflow_push_pending(sk, ssk); else if (xmit_ssk) @@ -396547,7 +509933,7 @@ index d073b21113828..47f359dac247b 100644 } else { set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags); } -@@ -2994,18 +3037,50 @@ static void mptcp_release_cb(struct sock *sk) +@@ -2994,18 +3046,50 @@ static void mptcp_release_cb(struct sock *sk) __mptcp_update_rmem(sk); } @@ -396697,10 +510083,107 @@ index 8c03afac5ca03..4bb305342fcc7 100644 case TCP_KEEPIDLE: case TCP_KEEPINTVL: diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c -index 6172f380dfb76..5ef9013b94c74 100644 +index 6172f380dfb76..b0e9548f00bf1 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c -@@ -845,9 +845,8 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff * +@@ -45,7 +45,6 @@ static void subflow_req_destructor(struct request_sock *req) + sock_put((struct sock *)subflow_req->msk); + + mptcp_token_destroy_request(req); +- tcp_request_sock_ops.destructor(req); + } + + static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2, +@@ -483,9 +482,8 @@ do_reset: + mptcp_subflow_reset(sk); + } + +-struct request_sock_ops mptcp_subflow_request_sock_ops; +-EXPORT_SYMBOL_GPL(mptcp_subflow_request_sock_ops); +-static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops; ++static struct request_sock_ops mptcp_subflow_v4_request_sock_ops __ro_after_init; ++static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops __ro_after_init; + + static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb) + { +@@ -497,7 +495,7 @@ static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb) + if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) + goto drop; + +- return tcp_conn_request(&mptcp_subflow_request_sock_ops, ++ return tcp_conn_request(&mptcp_subflow_v4_request_sock_ops, + &subflow_request_sock_ipv4_ops, + sk, skb); + drop: +@@ -505,10 +503,17 @@ drop: + return 0; + } + ++static void subflow_v4_req_destructor(struct request_sock *req) ++{ ++ subflow_req_destructor(req); ++ tcp_request_sock_ops.destructor(req); ++} ++ + #if IS_ENABLED(CONFIG_MPTCP_IPV6) +-static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops; +-static struct inet_connection_sock_af_ops subflow_v6_specific; +-static struct inet_connection_sock_af_ops subflow_v6m_specific; ++static struct request_sock_ops mptcp_subflow_v6_request_sock_ops __ro_after_init; ++static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops __ro_after_init; ++static struct inet_connection_sock_af_ops subflow_v6_specific __ro_after_init; ++static struct inet_connection_sock_af_ops subflow_v6m_specific __ro_after_init; + static struct proto tcpv6_prot_override; + + static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb) +@@ -528,15 +533,36 @@ static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb) + return 0; + } + +- return tcp_conn_request(&mptcp_subflow_request_sock_ops, ++ return tcp_conn_request(&mptcp_subflow_v6_request_sock_ops, + &subflow_request_sock_ipv6_ops, sk, skb); + + drop: + tcp_listendrop(sk); + return 0; /* don't send reset */ + } ++ ++static void subflow_v6_req_destructor(struct request_sock *req) ++{ ++ subflow_req_destructor(req); ++ tcp6_request_sock_ops.destructor(req); ++} + #endif + ++struct request_sock *mptcp_subflow_reqsk_alloc(const struct request_sock_ops *ops, ++ struct sock *sk_listener, ++ bool attach_listener) ++{ ++ if (ops->family == AF_INET) ++ ops = &mptcp_subflow_v4_request_sock_ops; ++#if IS_ENABLED(CONFIG_MPTCP_IPV6) ++ else if (ops->family == AF_INET6) ++ ops = &mptcp_subflow_v6_request_sock_ops; ++#endif ++ ++ return inet_reqsk_alloc(ops, sk_listener, attach_listener); ++} ++EXPORT_SYMBOL(mptcp_subflow_reqsk_alloc); ++ + /* validate hmac received in third ACK */ + static bool subflow_hmac_valid(const struct request_sock *req, + const struct mptcp_options_received *mp_opt) +@@ -790,7 +816,7 @@ dispose_child: + return child; + } + +-static struct inet_connection_sock_af_ops subflow_specific; ++static struct inet_connection_sock_af_ops subflow_specific __ro_after_init; + static struct proto tcp_prot_override; + + enum mapping_status { +@@ -845,9 +871,8 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff * bool csum_reqd) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); @@ -396711,7 +510194,7 @@ index 6172f380dfb76..5ef9013b94c74 100644 int len; if (!csum_reqd) -@@ -908,19 +907,20 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff * +@@ -908,19 +933,20 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff * * while the pseudo header requires the original DSS data len, * including that */ @@ -396741,7 +510224,7 @@ index 6172f380dfb76..5ef9013b94c74 100644 return MAPPING_OK; } -@@ -1102,6 +1102,18 @@ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ss +@@ -1102,6 +1128,18 @@ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ss } } @@ -396760,7 +510243,7 @@ index 6172f380dfb76..5ef9013b94c74 100644 static bool subflow_check_data_avail(struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); -@@ -1179,7 +1191,7 @@ fallback: +@@ -1179,7 +1217,7 @@ fallback: return true; } @@ -396769,6 +510252,109 @@ index 6172f380dfb76..5ef9013b94c74 100644 /* fatal protocol error, close the socket. * subflow_error_report() will introduce the appropriate barriers */ +@@ -1246,6 +1284,7 @@ void __mptcp_error_report(struct sock *sk) + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + int err = sock_error(ssk); ++ int ssk_state; + + if (!err) + continue; +@@ -1256,7 +1295,14 @@ void __mptcp_error_report(struct sock *sk) + if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk)) + continue; + +- inet_sk_state_store(sk, inet_sk_state_load(ssk)); ++ /* We need to propagate only transition to CLOSE state. ++ * Orphaned socket will see such state change via ++ * subflow_sched_work_if_closed() and that path will properly ++ * destroy the msk as needed. ++ */ ++ ssk_state = inet_sk_state_load(ssk); ++ if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) ++ inet_sk_state_store(sk, ssk_state); + sk->sk_err = -err; + + /* This barrier is coupled with smp_rmb() in mptcp_poll() */ +@@ -1315,7 +1361,7 @@ static void subflow_write_space(struct sock *ssk) + mptcp_write_space(sk); + } + +-static struct inet_connection_sock_af_ops * ++static const struct inet_connection_sock_af_ops * + subflow_default_af_ops(struct sock *sk) + { + #if IS_ENABLED(CONFIG_MPTCP_IPV6) +@@ -1330,7 +1376,7 @@ void mptcpv6_handle_mapped(struct sock *sk, bool mapped) + { + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct inet_connection_sock *icsk = inet_csk(sk); +- struct inet_connection_sock_af_ops *target; ++ const struct inet_connection_sock_af_ops *target; + + target = mapped ? &subflow_v6m_specific : subflow_default_af_ops(sk); + +@@ -1524,7 +1570,7 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock) + if (err) + return err; + +- lock_sock(sf->sk); ++ lock_sock_nested(sf->sk, SINGLE_DEPTH_NESTING); + + /* the newly created socket has to be in the same cgroup as its parent */ + mptcp_attach_cgroup(sk, sf->sk); +@@ -1770,7 +1816,6 @@ static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = { + static int subflow_ops_init(struct request_sock_ops *subflow_ops) + { + subflow_ops->obj_size = sizeof(struct mptcp_subflow_request_sock); +- subflow_ops->slab_name = "request_sock_subflow"; + + subflow_ops->slab = kmem_cache_create(subflow_ops->slab_name, + subflow_ops->obj_size, 0, +@@ -1780,16 +1825,17 @@ static int subflow_ops_init(struct request_sock_ops *subflow_ops) + if (!subflow_ops->slab) + return -ENOMEM; + +- subflow_ops->destructor = subflow_req_destructor; +- + return 0; + } + + void __init mptcp_subflow_init(void) + { +- mptcp_subflow_request_sock_ops = tcp_request_sock_ops; +- if (subflow_ops_init(&mptcp_subflow_request_sock_ops) != 0) +- panic("MPTCP: failed to init subflow request sock ops\n"); ++ mptcp_subflow_v4_request_sock_ops = tcp_request_sock_ops; ++ mptcp_subflow_v4_request_sock_ops.slab_name = "request_sock_subflow_v4"; ++ mptcp_subflow_v4_request_sock_ops.destructor = subflow_v4_req_destructor; ++ ++ if (subflow_ops_init(&mptcp_subflow_v4_request_sock_ops) != 0) ++ panic("MPTCP: failed to init subflow v4 request sock ops\n"); + + subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops; + subflow_request_sock_ipv4_ops.route_req = subflow_v4_route_req; +@@ -1803,6 +1849,20 @@ void __init mptcp_subflow_init(void) + tcp_prot_override.release_cb = tcp_release_cb_override; + + #if IS_ENABLED(CONFIG_MPTCP_IPV6) ++ /* In struct mptcp_subflow_request_sock, we assume the TCP request sock ++ * structures for v4 and v6 have the same size. It should not changed in ++ * the future but better to make sure to be warned if it is no longer ++ * the case. ++ */ ++ BUILD_BUG_ON(sizeof(struct tcp_request_sock) != sizeof(struct tcp6_request_sock)); ++ ++ mptcp_subflow_v6_request_sock_ops = tcp6_request_sock_ops; ++ mptcp_subflow_v6_request_sock_ops.slab_name = "request_sock_subflow_v6"; ++ mptcp_subflow_v6_request_sock_ops.destructor = subflow_v6_req_destructor; ++ ++ if (subflow_ops_init(&mptcp_subflow_v6_request_sock_ops) != 0) ++ panic("MPTCP: failed to init subflow v6 request sock ops\n"); ++ + subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops; + subflow_request_sock_ipv6_ops.route_req = subflow_v6_route_req; + diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c index ba9ae482141b0..dda8b76b77988 100644 --- a/net/ncsi/ncsi-cmd.c @@ -396892,11 +510478,644 @@ index 63d032191e626..cca0762a90102 100644 ret = NF_DROP_GETERR(verdict); if (ret == 0) ret = -EPERM; +diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c +index a8ce04a4bb72a..e4fa00abde6a2 100644 +--- a/net/netfilter/ipset/ip_set_bitmap_ip.c ++++ b/net/netfilter/ipset/ip_set_bitmap_ip.c +@@ -308,8 +308,8 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[], + return -IPSET_ERR_BITMAP_RANGE; + + pr_debug("mask_bits %u, netmask %u\n", mask_bits, netmask); +- hosts = 2 << (32 - netmask - 1); +- elements = 2 << (netmask - mask_bits - 1); ++ hosts = 2U << (32 - netmask - 1); ++ elements = 2UL << (netmask - mask_bits - 1); + } + if (elements > IPSET_BITMAP_MAX_RANGE + 1) + return -IPSET_ERR_BITMAP_RANGE_SIZE; +diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c +index 16ae92054baa8..ae061b27e4465 100644 +--- a/net/netfilter/ipset/ip_set_core.c ++++ b/net/netfilter/ipset/ip_set_core.c +@@ -1698,9 +1698,10 @@ call_ad(struct net *net, struct sock *ctnl, struct sk_buff *skb, + ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried); + ip_set_unlock(set); + retried = true; +- } while (ret == -EAGAIN && +- set->variant->resize && +- (ret = set->variant->resize(set, retried)) == 0); ++ } while (ret == -ERANGE || ++ (ret == -EAGAIN && ++ set->variant->resize && ++ (ret = set->variant->resize(set, retried)) == 0)); + + if (!ret || (ret == -IPSET_ERR_EXIST && eexist)) + return 0; +diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h +index 6e391308431da..7499192af5866 100644 +--- a/net/netfilter/ipset/ip_set_hash_gen.h ++++ b/net/netfilter/ipset/ip_set_hash_gen.h +@@ -42,31 +42,8 @@ + #define AHASH_MAX_SIZE (6 * AHASH_INIT_SIZE) + /* Max muber of elements in the array block when tuned */ + #define AHASH_MAX_TUNED 64 +- + #define AHASH_MAX(h) ((h)->bucketsize) + +-/* Max number of elements can be tuned */ +-#ifdef IP_SET_HASH_WITH_MULTI +-static u8 +-tune_bucketsize(u8 curr, u32 multi) +-{ +- u32 n; +- +- if (multi < curr) +- return curr; +- +- n = curr + AHASH_INIT_SIZE; +- /* Currently, at listing one hash bucket must fit into a message. +- * Therefore we have a hard limit here. +- */ +- return n > curr && n <= AHASH_MAX_TUNED ? n : curr; +-} +-#define TUNE_BUCKETSIZE(h, multi) \ +- ((h)->bucketsize = tune_bucketsize((h)->bucketsize, multi)) +-#else +-#define TUNE_BUCKETSIZE(h, multi) +-#endif +- + /* A hash bucket */ + struct hbucket { + struct rcu_head rcu; /* for call_rcu */ +@@ -936,7 +913,12 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, + goto set_full; + /* Create a new slot */ + if (n->pos >= n->size) { +- TUNE_BUCKETSIZE(h, multi); ++#ifdef IP_SET_HASH_WITH_MULTI ++ if (h->bucketsize >= AHASH_MAX_TUNED) ++ goto set_full; ++ else if (h->bucketsize <= multi) ++ h->bucketsize += AHASH_INIT_SIZE; ++#endif + if (n->size >= AHASH_MAX(h)) { + /* Trigger rehashing */ + mtype_data_next(&h->next, d); +diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c +index dd30c03d5a23f..24adcdd7a0b16 100644 +--- a/net/netfilter/ipset/ip_set_hash_ip.c ++++ b/net/netfilter/ipset/ip_set_hash_ip.c +@@ -98,11 +98,11 @@ static int + hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + { +- const struct hash_ip4 *h = set->data; ++ struct hash_ip4 *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ip4_elem e = { 0 }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); +- u32 ip = 0, ip_to = 0, hosts; ++ u32 ip = 0, ip_to = 0, hosts, i = 0; + int ret = 0; + + if (tb[IPSET_ATTR_LINENO]) +@@ -147,22 +147,20 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], + + hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1); + +- /* 64bit division is not allowed on 32bit */ +- if (((u64)ip_to - ip + 1) >> (32 - h->netmask) > IPSET_MAX_RANGE) +- return -ERANGE; +- +- if (retried) { ++ if (retried) + ip = ntohl(h->next.ip); ++ for (; ip <= ip_to; i++) { + e.ip = htonl(ip); +- } +- for (; ip <= ip_to;) { ++ if (i > IPSET_MAX_RANGE) { ++ hash_ip4_data_next(&h->next, &e); ++ return -ERANGE; ++ } + ret = adtfn(set, &e, &ext, &ext, flags); + if (ret && !ip_set_eexist(ret, flags)) + return ret; + + ip += hosts; +- e.ip = htonl(ip); +- if (e.ip == 0) ++ if (ip == 0) + return 0; + + ret = 0; +diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c +index 153de3457423e..a22ec1a6f6ec8 100644 +--- a/net/netfilter/ipset/ip_set_hash_ipmark.c ++++ b/net/netfilter/ipset/ip_set_hash_ipmark.c +@@ -97,11 +97,11 @@ static int + hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + { +- const struct hash_ipmark4 *h = set->data; ++ struct hash_ipmark4 *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipmark4_elem e = { }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); +- u32 ip, ip_to = 0; ++ u32 ip, ip_to = 0, i = 0; + int ret; + + if (tb[IPSET_ATTR_LINENO]) +@@ -148,13 +148,14 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], + ip_set_mask_from_to(ip, ip_to, cidr); + } + +- if (((u64)ip_to - ip + 1) > IPSET_MAX_RANGE) +- return -ERANGE; +- + if (retried) + ip = ntohl(h->next.ip); +- for (; ip <= ip_to; ip++) { ++ for (; ip <= ip_to; ip++, i++) { + e.ip = htonl(ip); ++ if (i > IPSET_MAX_RANGE) { ++ hash_ipmark4_data_next(&h->next, &e); ++ return -ERANGE; ++ } + ret = adtfn(set, &e, &ext, &ext, flags); + + if (ret && !ip_set_eexist(ret, flags)) +diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c +index 7303138e46be1..10481760a9b25 100644 +--- a/net/netfilter/ipset/ip_set_hash_ipport.c ++++ b/net/netfilter/ipset/ip_set_hash_ipport.c +@@ -105,11 +105,11 @@ static int + hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + { +- const struct hash_ipport4 *h = set->data; ++ struct hash_ipport4 *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipport4_elem e = { .ip = 0 }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); +- u32 ip, ip_to = 0, p = 0, port, port_to; ++ u32 ip, ip_to = 0, p = 0, port, port_to, i = 0; + bool with_ports = false; + int ret; + +@@ -173,17 +173,18 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], + swap(port, port_to); + } + +- if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE) +- return -ERANGE; +- + if (retried) + ip = ntohl(h->next.ip); + for (; ip <= ip_to; ip++) { + p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) + : port; +- for (; p <= port_to; p++) { ++ for (; p <= port_to; p++, i++) { + e.ip = htonl(ip); + e.port = htons(p); ++ if (i > IPSET_MAX_RANGE) { ++ hash_ipport4_data_next(&h->next, &e); ++ return -ERANGE; ++ } + ret = adtfn(set, &e, &ext, &ext, flags); + + if (ret && !ip_set_eexist(ret, flags)) +diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c +index 334fb1ad0e86c..39a01934b1536 100644 +--- a/net/netfilter/ipset/ip_set_hash_ipportip.c ++++ b/net/netfilter/ipset/ip_set_hash_ipportip.c +@@ -108,11 +108,11 @@ static int + hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + { +- const struct hash_ipportip4 *h = set->data; ++ struct hash_ipportip4 *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipportip4_elem e = { .ip = 0 }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); +- u32 ip, ip_to = 0, p = 0, port, port_to; ++ u32 ip, ip_to = 0, p = 0, port, port_to, i = 0; + bool with_ports = false; + int ret; + +@@ -180,17 +180,18 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], + swap(port, port_to); + } + +- if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE) +- return -ERANGE; +- + if (retried) + ip = ntohl(h->next.ip); + for (; ip <= ip_to; ip++) { + p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) + : port; +- for (; p <= port_to; p++) { ++ for (; p <= port_to; p++, i++) { + e.ip = htonl(ip); + e.port = htons(p); ++ if (i > IPSET_MAX_RANGE) { ++ hash_ipportip4_data_next(&h->next, &e); ++ return -ERANGE; ++ } + ret = adtfn(set, &e, &ext, &ext, flags); + + if (ret && !ip_set_eexist(ret, flags)) +diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c +index 7df94f437f600..5c6de605a9fb7 100644 +--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c ++++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c +@@ -160,12 +160,12 @@ static int + hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + { +- const struct hash_ipportnet4 *h = set->data; ++ struct hash_ipportnet4 *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 ip = 0, ip_to = 0, p = 0, port, port_to; +- u32 ip2_from = 0, ip2_to = 0, ip2; ++ u32 ip2_from = 0, ip2_to = 0, ip2, i = 0; + bool with_ports = false; + u8 cidr; + int ret; +@@ -253,9 +253,6 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], + swap(port, port_to); + } + +- if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE) +- return -ERANGE; +- + ip2_to = ip2_from; + if (tb[IPSET_ATTR_IP2_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to); +@@ -282,9 +279,15 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], + for (; p <= port_to; p++) { + e.port = htons(p); + do { ++ i++; + e.ip2 = htonl(ip2); + ip2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr); + e.cidr = cidr - 1; ++ if (i > IPSET_MAX_RANGE) { ++ hash_ipportnet4_data_next(&h->next, ++ &e); ++ return -ERANGE; ++ } + ret = adtfn(set, &e, &ext, &ext, flags); + + if (ret && !ip_set_eexist(ret, flags)) +diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c +index 1422739d9aa25..ce0a9ce5a91f1 100644 +--- a/net/netfilter/ipset/ip_set_hash_net.c ++++ b/net/netfilter/ipset/ip_set_hash_net.c +@@ -136,11 +136,11 @@ static int + hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + { +- const struct hash_net4 *h = set->data; ++ struct hash_net4 *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_net4_elem e = { .cidr = HOST_MASK }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); +- u32 ip = 0, ip_to = 0, ipn, n = 0; ++ u32 ip = 0, ip_to = 0, i = 0; + int ret; + + if (tb[IPSET_ATTR_LINENO]) +@@ -188,19 +188,16 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], + if (ip + UINT_MAX == ip_to) + return -IPSET_ERR_HASH_RANGE; + } +- ipn = ip; +- do { +- ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr); +- n++; +- } while (ipn++ < ip_to); +- +- if (n > IPSET_MAX_RANGE) +- return -ERANGE; + + if (retried) + ip = ntohl(h->next.ip); + do { ++ i++; + e.ip = htonl(ip); ++ if (i > IPSET_MAX_RANGE) { ++ hash_net4_data_next(&h->next, &e); ++ return -ERANGE; ++ } + ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr); + ret = adtfn(set, &e, &ext, &ext, flags); + if (ret && !ip_set_eexist(ret, flags)) +diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c +index 9810f5bf63f5e..0310732862362 100644 +--- a/net/netfilter/ipset/ip_set_hash_netiface.c ++++ b/net/netfilter/ipset/ip_set_hash_netiface.c +@@ -202,7 +202,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); +- u32 ip = 0, ip_to = 0, ipn, n = 0; ++ u32 ip = 0, ip_to = 0, i = 0; + int ret; + + if (tb[IPSET_ATTR_LINENO]) +@@ -256,19 +256,16 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], + } else { + ip_set_mask_from_to(ip, ip_to, e.cidr); + } +- ipn = ip; +- do { +- ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr); +- n++; +- } while (ipn++ < ip_to); +- +- if (n > IPSET_MAX_RANGE) +- return -ERANGE; + + if (retried) + ip = ntohl(h->next.ip); + do { ++ i++; + e.ip = htonl(ip); ++ if (i > IPSET_MAX_RANGE) { ++ hash_netiface4_data_next(&h->next, &e); ++ return -ERANGE; ++ } + ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr); + ret = adtfn(set, &e, &ext, &ext, flags); + +diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c +index 3d09eefe998a7..c07b70bf32db4 100644 +--- a/net/netfilter/ipset/ip_set_hash_netnet.c ++++ b/net/netfilter/ipset/ip_set_hash_netnet.c +@@ -163,13 +163,12 @@ static int + hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + { +- const struct hash_netnet4 *h = set->data; ++ struct hash_netnet4 *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netnet4_elem e = { }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 ip = 0, ip_to = 0; +- u32 ip2 = 0, ip2_from = 0, ip2_to = 0, ipn; +- u64 n = 0, m = 0; ++ u32 ip2 = 0, ip2_from = 0, ip2_to = 0, i = 0; + int ret; + + if (tb[IPSET_ATTR_LINENO]) +@@ -245,19 +244,6 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], + } else { + ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); + } +- ipn = ip; +- do { +- ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]); +- n++; +- } while (ipn++ < ip_to); +- ipn = ip2_from; +- do { +- ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]); +- m++; +- } while (ipn++ < ip2_to); +- +- if (n*m > IPSET_MAX_RANGE) +- return -ERANGE; + + if (retried) { + ip = ntohl(h->next.ip[0]); +@@ -270,7 +256,12 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], + e.ip[0] = htonl(ip); + ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); + do { ++ i++; + e.ip[1] = htonl(ip2); ++ if (i > IPSET_MAX_RANGE) { ++ hash_netnet4_data_next(&h->next, &e); ++ return -ERANGE; ++ } + ip2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]); + ret = adtfn(set, &e, &ext, &ext, flags); + if (ret && !ip_set_eexist(ret, flags)) +diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c +index 09cf72eb37f8d..d1a0628df4ef3 100644 +--- a/net/netfilter/ipset/ip_set_hash_netport.c ++++ b/net/netfilter/ipset/ip_set_hash_netport.c +@@ -154,12 +154,11 @@ static int + hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + { +- const struct hash_netport4 *h = set->data; ++ struct hash_netport4 *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); +- u32 port, port_to, p = 0, ip = 0, ip_to = 0, ipn; +- u64 n = 0; ++ u32 port, port_to, p = 0, ip = 0, ip_to = 0, i = 0; + bool with_ports = false; + u8 cidr; + int ret; +@@ -236,14 +235,6 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], + } else { + ip_set_mask_from_to(ip, ip_to, e.cidr + 1); + } +- ipn = ip; +- do { +- ipn = ip_set_range_to_cidr(ipn, ip_to, &cidr); +- n++; +- } while (ipn++ < ip_to); +- +- if (n*(port_to - port + 1) > IPSET_MAX_RANGE) +- return -ERANGE; + + if (retried) { + ip = ntohl(h->next.ip); +@@ -255,8 +246,12 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], + e.ip = htonl(ip); + ip = ip_set_range_to_cidr(ip, ip_to, &cidr); + e.cidr = cidr - 1; +- for (; p <= port_to; p++) { ++ for (; p <= port_to; p++, i++) { + e.port = htons(p); ++ if (i > IPSET_MAX_RANGE) { ++ hash_netport4_data_next(&h->next, &e); ++ return -ERANGE; ++ } + ret = adtfn(set, &e, &ext, &ext, flags); + if (ret && !ip_set_eexist(ret, flags)) + return ret; +diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c +index 19bcdb3141f6e..005a7ce87217e 100644 +--- a/net/netfilter/ipset/ip_set_hash_netportnet.c ++++ b/net/netfilter/ipset/ip_set_hash_netportnet.c +@@ -173,17 +173,26 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, + return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); + } + ++static u32 ++hash_netportnet4_range_to_cidr(u32 from, u32 to, u8 *cidr) ++{ ++ if (from == 0 && to == UINT_MAX) { ++ *cidr = 0; ++ return to; ++ } ++ return ip_set_range_to_cidr(from, to, cidr); ++} ++ + static int + hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + { +- const struct hash_netportnet4 *h = set->data; ++ struct hash_netportnet4 *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netportnet4_elem e = { }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 ip = 0, ip_to = 0, p = 0, port, port_to; +- u32 ip2_from = 0, ip2_to = 0, ip2, ipn; +- u64 n = 0, m = 0; ++ u32 ip2_from = 0, ip2_to = 0, ip2, i = 0; + bool with_ports = false; + int ret; + +@@ -285,19 +294,6 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], + } else { + ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); + } +- ipn = ip; +- do { +- ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]); +- n++; +- } while (ipn++ < ip_to); +- ipn = ip2_from; +- do { +- ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]); +- m++; +- } while (ipn++ < ip2_to); +- +- if (n*m*(port_to - port + 1) > IPSET_MAX_RANGE) +- return -ERANGE; + + if (retried) { + ip = ntohl(h->next.ip[0]); +@@ -310,13 +306,19 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], + + do { + e.ip[0] = htonl(ip); +- ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); ++ ip = hash_netportnet4_range_to_cidr(ip, ip_to, &e.cidr[0]); + for (; p <= port_to; p++) { + e.port = htons(p); + do { ++ i++; + e.ip[1] = htonl(ip2); +- ip2 = ip_set_range_to_cidr(ip2, ip2_to, +- &e.cidr[1]); ++ if (i > IPSET_MAX_RANGE) { ++ hash_netportnet4_data_next(&h->next, ++ &e); ++ return -ERANGE; ++ } ++ ip2 = hash_netportnet4_range_to_cidr(ip2, ++ ip2_to, &e.cidr[1]); + ret = adtfn(set, &e, &ext, &ext, flags); + if (ret && !ip_set_eexist(ret, flags)) + return ret; +diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c +index f9b16f2b22191..fdacbc3c15bef 100644 +--- a/net/netfilter/ipvs/ip_vs_app.c ++++ b/net/netfilter/ipvs/ip_vs_app.c +@@ -599,13 +599,19 @@ static const struct seq_operations ip_vs_app_seq_ops = { + int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs) + { + INIT_LIST_HEAD(&ipvs->app_list); +- proc_create_net("ip_vs_app", 0, ipvs->net->proc_net, &ip_vs_app_seq_ops, +- sizeof(struct seq_net_private)); ++#ifdef CONFIG_PROC_FS ++ if (!proc_create_net("ip_vs_app", 0, ipvs->net->proc_net, ++ &ip_vs_app_seq_ops, ++ sizeof(struct seq_net_private))) ++ return -ENOMEM; ++#endif + return 0; + } + + void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs) + { + unregister_ip_vs_app(ipvs, NULL /* all */); ++#ifdef CONFIG_PROC_FS + remove_proc_entry("ip_vs_app", ipvs->net->proc_net); ++#endif + } diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c -index 2c467c422dc63..fb67f1ca2495b 100644 +index 2c467c422dc63..cb6d68220c265 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c -@@ -1495,7 +1495,7 @@ int __init ip_vs_conn_init(void) +@@ -1265,8 +1265,8 @@ static inline int todrop_entry(struct ip_vs_conn *cp) + * The drop rate array needs tuning for real environments. + * Called from timer bh only => no locking + */ +- static const char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8}; +- static char todrop_counter[9] = {0}; ++ static const signed char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8}; ++ static signed char todrop_counter[9] = {0}; + int i; + + /* if the conn entry hasn't lasted for 60 seconds, don't drop it. +@@ -1447,20 +1447,36 @@ int __net_init ip_vs_conn_net_init(struct netns_ipvs *ipvs) + { + atomic_set(&ipvs->conn_count, 0); + +- proc_create_net("ip_vs_conn", 0, ipvs->net->proc_net, +- &ip_vs_conn_seq_ops, sizeof(struct ip_vs_iter_state)); +- proc_create_net("ip_vs_conn_sync", 0, ipvs->net->proc_net, +- &ip_vs_conn_sync_seq_ops, +- sizeof(struct ip_vs_iter_state)); ++#ifdef CONFIG_PROC_FS ++ if (!proc_create_net("ip_vs_conn", 0, ipvs->net->proc_net, ++ &ip_vs_conn_seq_ops, ++ sizeof(struct ip_vs_iter_state))) ++ goto err_conn; ++ ++ if (!proc_create_net("ip_vs_conn_sync", 0, ipvs->net->proc_net, ++ &ip_vs_conn_sync_seq_ops, ++ sizeof(struct ip_vs_iter_state))) ++ goto err_conn_sync; ++#endif ++ + return 0; ++ ++#ifdef CONFIG_PROC_FS ++err_conn_sync: ++ remove_proc_entry("ip_vs_conn", ipvs->net->proc_net); ++err_conn: ++ return -ENOMEM; ++#endif + } + + void __net_exit ip_vs_conn_net_cleanup(struct netns_ipvs *ipvs) + { + /* flush all the connection entries first */ + ip_vs_conn_flush(ipvs); ++#ifdef CONFIG_PROC_FS + remove_proc_entry("ip_vs_conn", ipvs->net->proc_net); + remove_proc_entry("ip_vs_conn_sync", ipvs->net->proc_net); ++#endif + } + + int __init ip_vs_conn_init(void) +@@ -1495,7 +1511,7 @@ int __init ip_vs_conn_init(void) pr_info("Connection hash table configured " "(size=%d, memory=%ldKbytes)\n", ip_vs_conn_tab_size, @@ -396957,7 +511176,7 @@ index 9d43277b8b4fe..a56fd0b5a430a 100644 sk->sk_userlocks |= SOCK_RCVBUF_LOCK; } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c -index 770a63103c7a4..9da5ee6c50cdd 100644 +index 770a63103c7a4..43ea8cfd374bb 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -66,6 +66,9 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash); @@ -397286,6 +511505,15 @@ index 770a63103c7a4..9da5ee6c50cdd 100644 nf_ct_ext_destroy(ct); kmem_cache_free(nf_conntrack_cachep, ct); +@@ -1676,7 +1735,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, + } + + #ifdef CONFIG_NF_CONNTRACK_MARK +- ct->mark = exp->master->mark; ++ ct->mark = READ_ONCE(exp->master->mark); + #endif + #ifdef CONFIG_NF_CONNTRACK_SECMARK + ct->secmark = exp->master->secmark; @@ -1688,8 +1747,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, if (!exp) __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC); @@ -397444,10 +511672,24 @@ index 08ee4e760a3d2..159e1e4441a43 100644 &tuple->src.u3.ip, &dcc_ip, dcc_port); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c -index f1e5443fe7c74..ef0a78aa9ba9e 100644 +index f1e5443fe7c74..2cc6092b4f865 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c -@@ -508,7 +508,7 @@ nla_put_failure: +@@ -324,7 +324,12 @@ nla_put_failure: + #ifdef CONFIG_NF_CONNTRACK_MARK + static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) + { +- if (nla_put_be32(skb, CTA_MARK, htonl(ct->mark))) ++ u32 mark = READ_ONCE(ct->mark); ++ ++ if (!mark) ++ return 0; ++ ++ if (nla_put_be32(skb, CTA_MARK, htonl(mark))) + goto nla_put_failure; + return 0; + +@@ -508,7 +513,7 @@ nla_put_failure: static int ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct) { @@ -397456,7 +511698,18 @@ index f1e5443fe7c74..ef0a78aa9ba9e 100644 goto nla_put_failure; return 0; -@@ -1011,11 +1011,9 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family) +@@ -820,8 +825,8 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item) + } + + #ifdef CONFIG_NF_CONNTRACK_MARK +- if ((events & (1 << IPCT_MARK) || ct->mark) +- && ctnetlink_dump_mark(skb, ct) < 0) ++ if (events & (1 << IPCT_MARK) && ++ ctnetlink_dump_mark(skb, ct) < 0) + goto nla_put_failure; + #endif + nlmsg_end(skb, nlh); +@@ -1011,11 +1016,9 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family) CTA_TUPLE_REPLY, filter->family, &filter->zone, @@ -397470,7 +511723,16 @@ index f1e5443fe7c74..ef0a78aa9ba9e 100644 } return filter; -@@ -1202,7 +1200,7 @@ restart: +@@ -1150,7 +1153,7 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data) + } + + #ifdef CONFIG_NF_CONNTRACK_MARK +- if ((ct->mark & filter->mark.mask) != filter->mark.val) ++ if ((READ_ONCE(ct->mark) & filter->mark.mask) != filter->mark.val) + goto ignore_entry; + #endif + status = (u32)READ_ONCE(ct->status); +@@ -1202,7 +1205,7 @@ restart: ct = nf_ct_tuplehash_to_ctrack(h); if (nf_ct_is_expired(ct)) { if (i < ARRAY_SIZE(nf_ct_evict) && @@ -397479,7 +511741,7 @@ index f1e5443fe7c74..ef0a78aa9ba9e 100644 nf_ct_evict[i++] = ct; continue; } -@@ -1750,7 +1748,7 @@ restart: +@@ -1750,7 +1753,7 @@ restart: NFNL_MSG_TYPE(cb->nlh->nlmsg_type), ct, dying ? true : false, 0); if (res < 0) { @@ -397488,7 +511750,7 @@ index f1e5443fe7c74..ef0a78aa9ba9e 100644 continue; cb->args[0] = cpu; cb->args[1] = (unsigned long)ct; -@@ -2000,7 +1998,7 @@ static int ctnetlink_change_timeout(struct nf_conn *ct, +@@ -2000,7 +2003,7 @@ static int ctnetlink_change_timeout(struct nf_conn *ct, if (timeout > INT_MAX) timeout = INT_MAX; @@ -397497,7 +511759,20 @@ index f1e5443fe7c74..ef0a78aa9ba9e 100644 if (test_bit(IPS_DYING_BIT, &ct->status)) return -ETIME; -@@ -2312,7 +2310,8 @@ ctnetlink_create_conntrack(struct net *net, +@@ -2018,9 +2021,9 @@ static void ctnetlink_change_mark(struct nf_conn *ct, + mask = ~ntohl(nla_get_be32(cda[CTA_MARK_MASK])); + + mark = ntohl(nla_get_be32(cda[CTA_MARK])); +- newmark = (ct->mark & mask) ^ mark; +- if (newmark != ct->mark) +- ct->mark = newmark; ++ newmark = (READ_ONCE(ct->mark) & mask) ^ mark; ++ if (newmark != READ_ONCE(ct->mark)) ++ WRITE_ONCE(ct->mark, newmark); + } + #endif + +@@ -2312,7 +2315,8 @@ ctnetlink_create_conntrack(struct net *net, if (helper->from_nlattr) helper->from_nlattr(helpinfo, ct); @@ -397507,11 +511782,252 @@ index f1e5443fe7c74..ef0a78aa9ba9e 100644 RCU_INIT_POINTER(help->helper, helper); } } else { +@@ -2752,7 +2756,7 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) + goto nla_put_failure; + + #ifdef CONFIG_NF_CONNTRACK_MARK +- if (ct->mark && ctnetlink_dump_mark(skb, ct) < 0) ++ if (ctnetlink_dump_mark(skb, ct) < 0) + goto nla_put_failure; + #endif + if (ctnetlink_dump_labels(skb, ct) < 0) +diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c +index 61e3b05cf02c3..1020d67600a95 100644 +--- a/net/netfilter/nf_conntrack_proto_icmpv6.c ++++ b/net/netfilter/nf_conntrack_proto_icmpv6.c +@@ -129,6 +129,56 @@ static void icmpv6_error_log(const struct sk_buff *skb, + nf_l4proto_log_invalid(skb, state, IPPROTO_ICMPV6, "%s", msg); + } + ++static noinline_for_stack int ++nf_conntrack_icmpv6_redirect(struct nf_conn *tmpl, struct sk_buff *skb, ++ unsigned int dataoff, ++ const struct nf_hook_state *state) ++{ ++ u8 hl = ipv6_hdr(skb)->hop_limit; ++ union nf_inet_addr outer_daddr; ++ union { ++ struct nd_opt_hdr nd_opt; ++ struct rd_msg rd_msg; ++ } tmp; ++ const struct nd_opt_hdr *nd_opt; ++ const struct rd_msg *rd_msg; ++ ++ rd_msg = skb_header_pointer(skb, dataoff, sizeof(*rd_msg), &tmp.rd_msg); ++ if (!rd_msg) { ++ icmpv6_error_log(skb, state, "short redirect"); ++ return -NF_ACCEPT; ++ } ++ ++ if (rd_msg->icmph.icmp6_code != 0) ++ return NF_ACCEPT; ++ ++ if (hl != 255 || !(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) { ++ icmpv6_error_log(skb, state, "invalid saddr or hoplimit for redirect"); ++ return -NF_ACCEPT; ++ } ++ ++ dataoff += sizeof(*rd_msg); ++ ++ /* warning: rd_msg no longer usable after this call */ ++ nd_opt = skb_header_pointer(skb, dataoff, sizeof(*nd_opt), &tmp.nd_opt); ++ if (!nd_opt || nd_opt->nd_opt_len == 0) { ++ icmpv6_error_log(skb, state, "redirect without options"); ++ return -NF_ACCEPT; ++ } ++ ++ /* We could call ndisc_parse_options(), but it would need ++ * skb_linearize() and a bit more work. ++ */ ++ if (nd_opt->nd_opt_type != ND_OPT_REDIRECT_HDR) ++ return NF_ACCEPT; ++ ++ memcpy(&outer_daddr.ip6, &ipv6_hdr(skb)->daddr, ++ sizeof(outer_daddr.ip6)); ++ dataoff += 8; ++ return nf_conntrack_inet_error(tmpl, skb, dataoff, state, ++ IPPROTO_ICMPV6, &outer_daddr); ++} ++ + int nf_conntrack_icmpv6_error(struct nf_conn *tmpl, + struct sk_buff *skb, + unsigned int dataoff, +@@ -159,6 +209,9 @@ int nf_conntrack_icmpv6_error(struct nf_conn *tmpl, + return NF_ACCEPT; + } + ++ if (icmp6h->icmp6_type == NDISC_REDIRECT) ++ return nf_conntrack_icmpv6_redirect(tmpl, skb, dataoff, state); ++ + /* is not error message ? */ + if (icmp6h->icmp6_type >= 128) + return NF_ACCEPT; diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c -index 2394238d01c91..5a936334b517a 100644 +index 2394238d01c91..895e0ca542994 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c -@@ -489,6 +489,15 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, +@@ -27,22 +27,16 @@ + #include <net/netfilter/nf_conntrack_ecache.h> + #include <net/netfilter/nf_conntrack_timeout.h> + +-/* FIXME: Examine ipfilter's timeouts and conntrack transitions more +- closely. They're more complex. --RR +- +- And so for me for SCTP :D -Kiran */ +- + static const char *const sctp_conntrack_names[] = { +- "NONE", +- "CLOSED", +- "COOKIE_WAIT", +- "COOKIE_ECHOED", +- "ESTABLISHED", +- "SHUTDOWN_SENT", +- "SHUTDOWN_RECD", +- "SHUTDOWN_ACK_SENT", +- "HEARTBEAT_SENT", +- "HEARTBEAT_ACKED", ++ [SCTP_CONNTRACK_NONE] = "NONE", ++ [SCTP_CONNTRACK_CLOSED] = "CLOSED", ++ [SCTP_CONNTRACK_COOKIE_WAIT] = "COOKIE_WAIT", ++ [SCTP_CONNTRACK_COOKIE_ECHOED] = "COOKIE_ECHOED", ++ [SCTP_CONNTRACK_ESTABLISHED] = "ESTABLISHED", ++ [SCTP_CONNTRACK_SHUTDOWN_SENT] = "SHUTDOWN_SENT", ++ [SCTP_CONNTRACK_SHUTDOWN_RECD] = "SHUTDOWN_RECD", ++ [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = "SHUTDOWN_ACK_SENT", ++ [SCTP_CONNTRACK_HEARTBEAT_SENT] = "HEARTBEAT_SENT", + }; + + #define SECS * HZ +@@ -54,12 +48,11 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = { + [SCTP_CONNTRACK_CLOSED] = 10 SECS, + [SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS, + [SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS, +- [SCTP_CONNTRACK_ESTABLISHED] = 5 DAYS, ++ [SCTP_CONNTRACK_ESTABLISHED] = 210 SECS, + [SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000, + [SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000, + [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS, + [SCTP_CONNTRACK_HEARTBEAT_SENT] = 30 SECS, +- [SCTP_CONNTRACK_HEARTBEAT_ACKED] = 210 SECS, + }; + + #define SCTP_FLAG_HEARTBEAT_VTAG_FAILED 1 +@@ -73,7 +66,6 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = { + #define sSR SCTP_CONNTRACK_SHUTDOWN_RECD + #define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT + #define sHS SCTP_CONNTRACK_HEARTBEAT_SENT +-#define sHA SCTP_CONNTRACK_HEARTBEAT_ACKED + #define sIV SCTP_CONNTRACK_MAX + + /* +@@ -96,9 +88,6 @@ SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite + CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of + the SHUTDOWN chunk. Connection is closed. + HEARTBEAT_SENT - We have seen a HEARTBEAT in a new flow. +-HEARTBEAT_ACKED - We have seen a HEARTBEAT-ACK in the direction opposite to +- that of the HEARTBEAT chunk. Secondary connection is +- established. + */ + + /* TODO +@@ -115,33 +104,33 @@ cookie echoed to closed. + static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = { + { + /* ORIGINAL */ +-/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */ +-/* init */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW, sHA}, +-/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA}, +-/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, +-/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL, sSS}, +-/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA, sHA}, +-/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* Can't have Stale cookie*/ +-/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* 5.2.4 - Big TODO */ +-/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* Can't come in orig dir */ +-/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL, sHA}, +-/* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA}, +-/* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA} ++/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */ ++/* init */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW}, ++/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL}, ++/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, ++/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL}, ++/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA}, ++/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't have Stale cookie*/ ++/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL},/* 5.2.4 - Big TODO */ ++/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't come in orig dir */ ++/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL}, ++/* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS}, ++/* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS}, + }, + { + /* REPLY */ +-/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */ +-/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},/* INIT in sCL Big TODO */ +-/* init_ack */ {sIV, sCW, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA}, +-/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV, sCL}, +-/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV, sSR}, +-/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV, sHA}, +-/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV, sHA}, +-/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},/* Can't come in reply dir */ +-/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV, sHA}, +-/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV, sHA}, +-/* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA}, +-/* heartbeat_ack*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHA, sHA} ++/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */ ++/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV},/* INIT in sCL Big TODO */ ++/* init_ack */ {sIV, sCW, sCW, sCE, sES, sSS, sSR, sSA, sIV}, ++/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV}, ++/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV}, ++/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV}, ++/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV}, ++/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV},/* Can't come in reply dir */ ++/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV}, ++/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV}, ++/* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS}, ++/* heartbeat_ack*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sES}, + } + }; + +@@ -412,22 +401,29 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, + for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { + /* Special cases of Verification tag check (Sec 8.5.1) */ + if (sch->type == SCTP_CID_INIT) { +- /* Sec 8.5.1 (A) */ ++ /* (A) vtag MUST be zero */ + if (sh->vtag != 0) + goto out_unlock; + } else if (sch->type == SCTP_CID_ABORT) { +- /* Sec 8.5.1 (B) */ +- if (sh->vtag != ct->proto.sctp.vtag[dir] && +- sh->vtag != ct->proto.sctp.vtag[!dir]) ++ /* (B) vtag MUST match own vtag if T flag is unset OR ++ * MUST match peer's vtag if T flag is set ++ */ ++ if ((!(sch->flags & SCTP_CHUNK_FLAG_T) && ++ sh->vtag != ct->proto.sctp.vtag[dir]) || ++ ((sch->flags & SCTP_CHUNK_FLAG_T) && ++ sh->vtag != ct->proto.sctp.vtag[!dir])) + goto out_unlock; + } else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) { +- /* Sec 8.5.1 (C) */ +- if (sh->vtag != ct->proto.sctp.vtag[dir] && +- sh->vtag != ct->proto.sctp.vtag[!dir] && +- sch->flags & SCTP_CHUNK_FLAG_T) ++ /* (C) vtag MUST match own vtag if T flag is unset OR ++ * MUST match peer's vtag if T flag is set ++ */ ++ if ((!(sch->flags & SCTP_CHUNK_FLAG_T) && ++ sh->vtag != ct->proto.sctp.vtag[dir]) || ++ ((sch->flags & SCTP_CHUNK_FLAG_T) && ++ sh->vtag != ct->proto.sctp.vtag[!dir])) + goto out_unlock; + } else if (sch->type == SCTP_CID_COOKIE_ECHO) { +- /* Sec 8.5.1 (D) */ ++ /* (D) vtag must be same as init_vtag as found in INIT_ACK */ + if (sh->vtag != ct->proto.sctp.vtag[dir]) + goto out_unlock; + } else if (sch->type == SCTP_CID_HEARTBEAT) { +@@ -489,11 +485,24 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir); ct->proto.sctp.vtag[!dir] = ih->init_tag; @@ -397527,6 +512043,31 @@ index 2394238d01c91..5a936334b517a 100644 } ct->proto.sctp.state = new_state; +- if (old_state != new_state) ++ if (old_state != new_state) { + nf_conntrack_event_cache(IPCT_PROTOINFO, ct); ++ if (new_state == SCTP_CONNTRACK_ESTABLISHED && ++ !test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) ++ nf_conntrack_event_cache(IPCT_ASSURED, ct); ++ } + } + spin_unlock_bh(&ct->lock); + +@@ -507,14 +516,6 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, + + nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]); + +- if (old_state == SCTP_CONNTRACK_COOKIE_ECHOED && +- dir == IP_CT_DIR_REPLY && +- new_state == SCTP_CONNTRACK_ESTABLISHED) { +- pr_debug("Setting assured bit\n"); +- set_bit(IPS_ASSURED_BIT, &ct->status); +- nf_conntrack_event_cache(IPCT_ASSURED, ct); +- } +- + return NF_ACCEPT; + + out_unlock: diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index af5115e127cfd..1ecfdc4f23be8 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c @@ -397658,7 +512199,7 @@ index b83dc9bf0a5dd..78fd9122b70c7 100644 if (in_header) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c -index 80f675d884b26..55aa55b252b20 100644 +index 80f675d884b26..338f02a12076b 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -303,7 +303,7 @@ static int ct_seq_show(struct seq_file *s, void *v) @@ -397670,7 +512211,15 @@ index 80f675d884b26..55aa55b252b20 100644 return 0; if (nf_ct_should_gc(ct)) { -@@ -370,7 +370,7 @@ static int ct_seq_show(struct seq_file *s, void *v) +@@ -363,14 +363,14 @@ static int ct_seq_show(struct seq_file *s, void *v) + goto release; + + #if defined(CONFIG_NF_CONNTRACK_MARK) +- seq_printf(s, "mark=%u ", ct->mark); ++ seq_printf(s, "mark=%u ", READ_ONCE(ct->mark)); + #endif + + ct_show_secctx(s, ct); ct_show_zone(s, ct, NF_CT_DEFAULT_ZONE_DIR); ct_show_delta_time(s, ct); @@ -397679,7 +512228,15 @@ index 80f675d884b26..55aa55b252b20 100644 if (seq_has_overflowed(s)) goto release; -@@ -823,7 +823,7 @@ static struct ctl_table nf_ct_sysctl_table[] = { +@@ -599,7 +599,6 @@ enum nf_ct_sysctl_index { + NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_RECD, + NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT, + NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_SENT, +- NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_ACKED, + #endif + #ifdef CONFIG_NF_CT_PROTO_DCCP + NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST, +@@ -823,7 +822,7 @@ static struct ctl_table nf_ct_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, @@ -397688,6 +512245,27 @@ index 80f675d884b26..55aa55b252b20 100644 [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD] = { .procname = "nf_flowtable_udp_timeout", .maxlen = sizeof(unsigned int), +@@ -892,12 +891,6 @@ static struct ctl_table nf_ct_sysctl_table[] = { + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, +- [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_ACKED] = { +- .procname = "nf_conntrack_sctp_timeout_heartbeat_acked", +- .maxlen = sizeof(unsigned int), +- .mode = 0644, +- .proc_handler = proc_dointvec_jiffies, +- }, + #endif + #ifdef CONFIG_NF_CT_PROTO_DCCP + [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST] = { +@@ -1041,7 +1034,6 @@ static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net, + XASSIGN(SHUTDOWN_RECD, sn); + XASSIGN(SHUTDOWN_ACK_SENT, sn); + XASSIGN(HEARTBEAT_SENT, sn); +- XASSIGN(HEARTBEAT_ACKED, sn); + #undef XASSIGN + #endif + } diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 87a7388b6c894..4f61eb1282834 100644 --- a/net/netfilter/nf_flow_table_core.c @@ -398001,7 +512579,7 @@ index 889cf88d3dba6..28026467b54cd 100644 return NF_DROP; diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c -index d6bf1b2cd541b..c4559fae8acd5 100644 +index d6bf1b2cd541b..336f282a221fd 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -65,11 +65,11 @@ static void nf_flow_rule_lwt_match(struct nf_flow_match *match, @@ -398018,6 +512596,22 @@ index d6bf1b2cd541b..c4559fae8acd5 100644 sizeof(struct in6_addr)); enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS); key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; +@@ -372,12 +372,12 @@ static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule, + const __be32 *addr, const __be32 *mask) + { + struct flow_action_entry *entry; +- int i, j; ++ int i; + +- for (i = 0, j = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32), j++) { ++ for (i = 0; i < sizeof(struct in6_addr) / sizeof(u32); i++) { + entry = flow_action_entry_next(flow_rule); + flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6, +- offset + i, &addr[j], mask); ++ offset + i * sizeof(u32), &addr[i], mask); + } + } + @@ -1050,6 +1050,14 @@ void nf_flow_offload_stats(struct nf_flowtable *flowtable, flow_offload_queue_work(offload); } @@ -398033,6 +512627,32 @@ index d6bf1b2cd541b..c4559fae8acd5 100644 void nf_flow_table_offload_flush(struct nf_flowtable *flowtable) { if (nf_flowtable_hw_offload(flowtable)) { +@@ -1066,6 +1074,7 @@ static int nf_flow_table_block_setup(struct nf_flowtable *flowtable, + struct flow_block_cb *block_cb, *next; + int err = 0; + ++ down_write(&flowtable->flow_block_lock); + switch (cmd) { + case FLOW_BLOCK_BIND: + list_splice(&bo->cb_list, &flowtable->flow_block.cb_list); +@@ -1080,6 +1089,7 @@ static int nf_flow_table_block_setup(struct nf_flowtable *flowtable, + WARN_ON_ONCE(1); + err = -EOPNOTSUPP; + } ++ up_write(&flowtable->flow_block_lock); + + return err; + } +@@ -1136,7 +1146,9 @@ static int nf_flow_table_offload_cmd(struct flow_block_offload *bo, + + nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable, + extack); ++ down_write(&flowtable->flow_block_lock); + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo); ++ up_write(&flowtable->flow_block_lock); + if (err < 0) + return err; + diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c index 13234641cdb34..7000e069bc076 100644 --- a/net/netfilter/nf_log_syslog.c @@ -398180,7 +512800,7 @@ index 3d6d49420db8b..049a88f038011 100644 iph->check = 0; iph->saddr = saddr; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c -index c0851fec11d46..f7a5b8414423d 100644 +index c0851fec11d46..81bd13b3d8fd4 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -32,7 +32,6 @@ static LIST_HEAD(nf_tables_objects); @@ -398256,7 +512876,49 @@ index c0851fec11d46..f7a5b8414423d 100644 static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans) { struct nftables_pernet *nft_net = nft_pernet(net); -@@ -529,6 +544,7 @@ static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, +@@ -450,8 +465,9 @@ static int nft_delrule_by_chain(struct nft_ctx *ctx) + return 0; + } + +-static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, +- struct nft_set *set) ++static int __nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, ++ struct nft_set *set, ++ const struct nft_set_desc *desc) + { + struct nft_trans *trans; + +@@ -459,17 +475,28 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, + if (trans == NULL) + return -ENOMEM; + +- if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) { ++ if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] && !desc) { + nft_trans_set_id(trans) = + ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID])); + nft_activate_next(ctx->net, set); + } + nft_trans_set(trans) = set; ++ if (desc) { ++ nft_trans_set_update(trans) = true; ++ nft_trans_set_gc_int(trans) = desc->gc_int; ++ nft_trans_set_timeout(trans) = desc->timeout; ++ } + nft_trans_commit_list_add_tail(ctx->net, trans); + + return 0; + } + ++static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, ++ struct nft_set *set) ++{ ++ return __nft_trans_set_add(ctx, msg_type, set, NULL); ++} ++ + static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set) + { + int err; +@@ -529,6 +556,7 @@ static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, if (msg_type == NFT_MSG_NEWFLOWTABLE) nft_activate_next(ctx->net, flowtable); @@ -398264,7 +512926,7 @@ index c0851fec11d46..f7a5b8414423d 100644 nft_trans_flowtable(trans) = flowtable; nft_trans_commit_list_add_tail(ctx->net, trans); -@@ -820,7 +836,7 @@ static int nf_tables_dump_tables(struct sk_buff *skb, +@@ -820,7 +848,7 @@ static int nf_tables_dump_tables(struct sk_buff *skb, rcu_read_lock(); nft_net = nft_pernet(net); @@ -398273,7 +512935,7 @@ index c0851fec11d46..f7a5b8414423d 100644 list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) -@@ -1139,7 +1155,7 @@ static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info, +@@ -1139,7 +1167,7 @@ static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info, INIT_LIST_HEAD(&table->flowtables); table->family = family; table->flags = flags; @@ -398282,7 +512944,7 @@ index c0851fec11d46..f7a5b8414423d 100644 if (table->flags & NFT_TABLE_F_OWNER) table->nlpid = NETLINK_CB(skb).portid; -@@ -1609,7 +1625,7 @@ static int nf_tables_dump_chains(struct sk_buff *skb, +@@ -1609,7 +1637,7 @@ static int nf_tables_dump_chains(struct sk_buff *skb, rcu_read_lock(); nft_net = nft_pernet(net); @@ -398291,7 +512953,7 @@ index c0851fec11d46..f7a5b8414423d 100644 list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) -@@ -1820,7 +1836,6 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net, +@@ -1820,7 +1848,6 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net, goto err_hook_dev; } hook->ops.dev = dev; @@ -398299,7 +512961,7 @@ index c0851fec11d46..f7a5b8414423d 100644 return hook; -@@ -2057,8 +2072,10 @@ static int nft_basechain_init(struct nft_base_chain *basechain, u8 family, +@@ -2057,8 +2084,10 @@ static int nft_basechain_init(struct nft_base_chain *basechain, u8 family, chain->flags |= NFT_CHAIN_BASE | flags; basechain->policy = NF_ACCEPT; if (chain->flags & NFT_CHAIN_HW_OFFLOAD && @@ -398311,7 +512973,7 @@ index c0851fec11d46..f7a5b8414423d 100644 flow_block_init(&basechain->flow_block); -@@ -2088,7 +2105,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, +@@ -2088,7 +2117,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, const struct nlattr * const *nla = ctx->nla; struct nft_table *table = ctx->table; struct nft_base_chain *basechain; @@ -398319,7 +512981,7 @@ index c0851fec11d46..f7a5b8414423d 100644 struct net *net = ctx->net; char name[NFT_NAME_MAXLEN]; struct nft_trans *trans; -@@ -2100,6 +2116,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, +@@ -2100,6 +2128,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, return -EOVERFLOW; if (nla[NFTA_CHAIN_HOOK]) { @@ -398327,7 +512989,7 @@ index c0851fec11d46..f7a5b8414423d 100644 struct nft_chain_hook hook; if (flags & NFT_CHAIN_BINDING) -@@ -2125,15 +2142,17 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, +@@ -2125,15 +2154,17 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, return PTR_ERR(stats); } rcu_assign_pointer(basechain->stats, stats); @@ -398346,7 +513008,7 @@ index c0851fec11d46..f7a5b8414423d 100644 } else { if (flags & NFT_CHAIN_BASE) return -EINVAL; -@@ -2362,6 +2381,7 @@ err: +@@ -2362,6 +2393,7 @@ err: } static struct nft_chain *nft_chain_lookup_byid(const struct net *net, @@ -398354,7 +513016,7 @@ index c0851fec11d46..f7a5b8414423d 100644 const struct nlattr *nla) { struct nftables_pernet *nft_net = nft_pernet(net); -@@ -2372,6 +2392,7 @@ static struct nft_chain *nft_chain_lookup_byid(const struct net *net, +@@ -2372,6 +2404,7 @@ static struct nft_chain *nft_chain_lookup_byid(const struct net *net, struct nft_chain *chain = trans->ctx.chain; if (trans->msg_type == NFT_MSG_NEWCHAIN && @@ -398362,7 +513024,7 @@ index c0851fec11d46..f7a5b8414423d 100644 id == nft_trans_chain_id(trans)) return chain; } -@@ -2461,6 +2482,9 @@ static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info, +@@ -2461,6 +2494,9 @@ static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info, nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla); if (chain != NULL) { @@ -398372,7 +513034,7 @@ index c0851fec11d46..f7a5b8414423d 100644 if (info->nlh->nlmsg_flags & NLM_F_EXCL) { NL_SET_BAD_ATTR(extack, attr); return -EEXIST; -@@ -2778,27 +2802,31 @@ static struct nft_expr *nft_expr_init(const struct nft_ctx *ctx, +@@ -2778,27 +2814,31 @@ static struct nft_expr *nft_expr_init(const struct nft_ctx *ctx, err = nf_tables_expr_parse(ctx, nla, &expr_info); if (err < 0) @@ -398410,7 +513072,7 @@ index c0851fec11d46..f7a5b8414423d 100644 return ERR_PTR(err); } -@@ -3032,7 +3060,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb, +@@ -3032,7 +3072,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb, rcu_read_lock(); nft_net = nft_pernet(net); @@ -398419,7 +513081,7 @@ index c0851fec11d46..f7a5b8414423d 100644 list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) -@@ -3255,6 +3283,7 @@ static int nft_table_validate(struct net *net, const struct nft_table *table) +@@ -3255,6 +3295,7 @@ static int nft_table_validate(struct net *net, const struct nft_table *table) } static struct nft_rule *nft_rule_lookup_byid(const struct net *net, @@ -398427,7 +513089,7 @@ index c0851fec11d46..f7a5b8414423d 100644 const struct nlattr *nla); #define NFT_RULE_MAXEXPRS 128 -@@ -3301,7 +3330,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, +@@ -3301,7 +3342,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, return -EOPNOTSUPP; } else if (nla[NFTA_RULE_CHAIN_ID]) { @@ -398436,7 +513098,7 @@ index c0851fec11d46..f7a5b8414423d 100644 if (IS_ERR(chain)) { NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN_ID]); return PTR_ERR(chain); -@@ -3343,7 +3372,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, +@@ -3343,7 +3384,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, return PTR_ERR(old_rule); } } else if (nla[NFTA_RULE_POSITION_ID]) { @@ -398445,7 +513107,7 @@ index c0851fec11d46..f7a5b8414423d 100644 if (IS_ERR(old_rule)) { NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION_ID]); return PTR_ERR(old_rule); -@@ -3488,6 +3517,7 @@ err_release_expr: +@@ -3488,6 +3529,7 @@ err_release_expr: } static struct nft_rule *nft_rule_lookup_byid(const struct net *net, @@ -398453,7 +513115,7 @@ index c0851fec11d46..f7a5b8414423d 100644 const struct nlattr *nla) { struct nftables_pernet *nft_net = nft_pernet(net); -@@ -3498,6 +3528,7 @@ static struct nft_rule *nft_rule_lookup_byid(const struct net *net, +@@ -3498,6 +3540,7 @@ static struct nft_rule *nft_rule_lookup_byid(const struct net *net, struct nft_rule *rule = nft_trans_rule(trans); if (trans->msg_type == NFT_MSG_NEWRULE && @@ -398461,7 +513123,7 @@ index c0851fec11d46..f7a5b8414423d 100644 id == nft_trans_rule_id(trans)) return rule; } -@@ -3547,7 +3578,7 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info, +@@ -3547,7 +3590,7 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info, err = nft_delrule(&ctx, rule); } else if (nla[NFTA_RULE_ID]) { @@ -398470,7 +513132,26 @@ index c0851fec11d46..f7a5b8414423d 100644 if (IS_ERR(rule)) { NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_ID]); return PTR_ERR(rule); -@@ -3726,6 +3757,7 @@ static struct nft_set *nft_set_lookup_byhandle(const struct nft_table *table, +@@ -3604,8 +3647,7 @@ static bool nft_set_ops_candidate(const struct nft_set_type *type, u32 flags) + static const struct nft_set_ops * + nft_select_set_ops(const struct nft_ctx *ctx, + const struct nlattr * const nla[], +- const struct nft_set_desc *desc, +- enum nft_set_policies policy) ++ const struct nft_set_desc *desc) + { + struct nftables_pernet *nft_net = nft_pernet(ctx->net); + const struct nft_set_ops *ops, *bops; +@@ -3634,7 +3676,7 @@ nft_select_set_ops(const struct nft_ctx *ctx, + if (!ops->estimate(desc, flags, &est)) + continue; + +- switch (policy) { ++ switch (desc->policy) { + case NFT_SET_POL_PERFORMANCE: + if (est.lookup < best.lookup) + break; +@@ -3726,6 +3768,7 @@ static struct nft_set *nft_set_lookup_byhandle(const struct nft_table *table, } static struct nft_set *nft_set_lookup_byid(const struct net *net, @@ -398478,7 +513159,7 @@ index c0851fec11d46..f7a5b8414423d 100644 const struct nlattr *nla, u8 genmask) { struct nftables_pernet *nft_net = nft_pernet(net); -@@ -3737,6 +3769,7 @@ static struct nft_set *nft_set_lookup_byid(const struct net *net, +@@ -3737,6 +3780,7 @@ static struct nft_set *nft_set_lookup_byid(const struct net *net, struct nft_set *set = nft_trans_set(trans); if (id == nft_trans_set_id(trans) && @@ -398486,7 +513167,7 @@ index c0851fec11d46..f7a5b8414423d 100644 nft_active_genmask(set, genmask)) return set; } -@@ -3757,7 +3790,7 @@ struct nft_set *nft_set_lookup_global(const struct net *net, +@@ -3757,7 +3801,7 @@ struct nft_set *nft_set_lookup_global(const struct net *net, if (!nla_set_id) return set; @@ -398495,7 +513176,7 @@ index c0851fec11d46..f7a5b8414423d 100644 } return set; } -@@ -3783,7 +3816,7 @@ cont: +@@ -3783,7 +3827,7 @@ cont: list_for_each_entry(i, &ctx->table->sets, list) { int tmp; @@ -398504,7 +513185,37 @@ index c0851fec11d46..f7a5b8414423d 100644 continue; if (!sscanf(i->name, name, &tmp)) continue; -@@ -4009,7 +4042,7 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) +@@ -3867,8 +3911,10 @@ static int nf_tables_fill_set_concat(struct sk_buff *skb, + static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, + const struct nft_set *set, u16 event, u16 flags) + { +- struct nlmsghdr *nlh; ++ u64 timeout = READ_ONCE(set->timeout); ++ u32 gc_int = READ_ONCE(set->gc_int); + u32 portid = ctx->portid; ++ struct nlmsghdr *nlh; + struct nlattr *nest; + u32 seq = ctx->seq; + int i; +@@ -3904,13 +3950,13 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, + nla_put_be32(skb, NFTA_SET_OBJ_TYPE, htonl(set->objtype))) + goto nla_put_failure; + +- if (set->timeout && ++ if (timeout && + nla_put_be64(skb, NFTA_SET_TIMEOUT, +- nf_jiffies64_to_msecs(set->timeout), ++ nf_jiffies64_to_msecs(timeout), + NFTA_SET_PAD)) + goto nla_put_failure; +- if (set->gc_int && +- nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(set->gc_int))) ++ if (gc_int && ++ nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(gc_int))) + goto nla_put_failure; + + if (set->policy != NFT_SET_POL_PERFORMANCE) { +@@ -4009,7 +4055,7 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); nft_net = nft_pernet(net); @@ -398513,7 +513224,7 @@ index c0851fec11d46..f7a5b8414423d 100644 list_for_each_entry_rcu(table, &nft_net->tables, list) { if (ctx->family != NFPROTO_UNSPEC && -@@ -4147,6 +4180,9 @@ static int nft_set_desc_concat_parse(const struct nlattr *attr, +@@ -4147,6 +4193,9 @@ static int nft_set_desc_concat_parse(const struct nlattr *attr, u32 len; int err; @@ -398523,7 +513234,7 @@ index c0851fec11d46..f7a5b8414423d 100644 err = nla_parse_nested_deprecated(tb, NFTA_SET_FIELD_MAX, attr, nft_concat_policy, NULL); if (err < 0) -@@ -4156,9 +4192,8 @@ static int nft_set_desc_concat_parse(const struct nlattr *attr, +@@ -4156,9 +4205,8 @@ static int nft_set_desc_concat_parse(const struct nlattr *attr, return -EINVAL; len = ntohl(nla_get_be32(tb[NFTA_SET_FIELD_LEN])); @@ -398535,7 +513246,7 @@ index c0851fec11d46..f7a5b8414423d 100644 desc->field_len[desc->field_count++] = len; -@@ -4169,7 +4204,8 @@ static int nft_set_desc_concat(struct nft_set_desc *desc, +@@ -4169,7 +4217,8 @@ static int nft_set_desc_concat(struct nft_set_desc *desc, const struct nlattr *nla) { struct nlattr *attr; @@ -398545,7 +513256,7 @@ index c0851fec11d46..f7a5b8414423d 100644 nla_for_each_nested(attr, nla, rem) { if (nla_type(attr) != NFTA_LIST_ELEM) -@@ -4180,6 +4216,12 @@ static int nft_set_desc_concat(struct nft_set_desc *desc, +@@ -4180,6 +4229,12 @@ static int nft_set_desc_concat(struct nft_set_desc *desc, return err; } @@ -398558,7 +513269,197 @@ index c0851fec11d46..f7a5b8414423d 100644 return 0; } -@@ -4318,6 +4360,11 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, +@@ -4202,15 +4257,94 @@ static int nf_tables_set_desc_parse(struct nft_set_desc *desc, + return err; + } + ++static int nft_set_expr_alloc(struct nft_ctx *ctx, struct nft_set *set, ++ const struct nlattr * const *nla, ++ struct nft_expr **exprs, int *num_exprs, ++ u32 flags) ++{ ++ struct nft_expr *expr; ++ int err, i; ++ ++ if (nla[NFTA_SET_EXPR]) { ++ expr = nft_set_elem_expr_alloc(ctx, set, nla[NFTA_SET_EXPR]); ++ if (IS_ERR(expr)) { ++ err = PTR_ERR(expr); ++ goto err_set_expr_alloc; ++ } ++ exprs[0] = expr; ++ (*num_exprs)++; ++ } else if (nla[NFTA_SET_EXPRESSIONS]) { ++ struct nlattr *tmp; ++ int left; ++ ++ if (!(flags & NFT_SET_EXPR)) { ++ err = -EINVAL; ++ goto err_set_expr_alloc; ++ } ++ i = 0; ++ nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) { ++ if (i == NFT_SET_EXPR_MAX) { ++ err = -E2BIG; ++ goto err_set_expr_alloc; ++ } ++ if (nla_type(tmp) != NFTA_LIST_ELEM) { ++ err = -EINVAL; ++ goto err_set_expr_alloc; ++ } ++ expr = nft_set_elem_expr_alloc(ctx, set, tmp); ++ if (IS_ERR(expr)) { ++ err = PTR_ERR(expr); ++ goto err_set_expr_alloc; ++ } ++ exprs[i++] = expr; ++ (*num_exprs)++; ++ } ++ } ++ ++ return 0; ++ ++err_set_expr_alloc: ++ for (i = 0; i < *num_exprs; i++) ++ nft_expr_destroy(ctx, exprs[i]); ++ ++ return err; ++} ++ ++static bool nft_set_is_same(const struct nft_set *set, ++ const struct nft_set_desc *desc, ++ struct nft_expr *exprs[], u32 num_exprs, u32 flags) ++{ ++ int i; ++ ++ if (set->ktype != desc->ktype || ++ set->dtype != desc->dtype || ++ set->flags != flags || ++ set->klen != desc->klen || ++ set->dlen != desc->dlen || ++ set->field_count != desc->field_count || ++ set->num_exprs != num_exprs) ++ return false; ++ ++ for (i = 0; i < desc->field_count; i++) { ++ if (set->field_len[i] != desc->field_len[i]) ++ return false; ++ } ++ ++ for (i = 0; i < num_exprs; i++) { ++ if (set->exprs[i]->ops != exprs[i]->ops) ++ return false; ++ } ++ ++ return true; ++} ++ + static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, + const struct nlattr * const nla[]) + { +- u32 ktype, dtype, flags, policy, gc_int, objtype; + struct netlink_ext_ack *extack = info->extack; + u8 genmask = nft_genmask_next(info->net); + u8 family = info->nfmsg->nfgen_family; + const struct nft_set_ops *ops; +- struct nft_expr *expr = NULL; + struct net *net = info->net; + struct nft_set_desc desc; + struct nft_table *table; +@@ -4218,10 +4352,11 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, + struct nft_set *set; + struct nft_ctx ctx; + size_t alloc_size; +- u64 timeout; ++ int num_exprs = 0; + char *name; + int err, i; + u16 udlen; ++ u32 flags; + u64 size; + + if (nla[NFTA_SET_TABLE] == NULL || +@@ -4232,10 +4367,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, + + memset(&desc, 0, sizeof(desc)); + +- ktype = NFT_DATA_VALUE; ++ desc.ktype = NFT_DATA_VALUE; + if (nla[NFTA_SET_KEY_TYPE] != NULL) { +- ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE])); +- if ((ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK) ++ desc.ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE])); ++ if ((desc.ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK) + return -EINVAL; + } + +@@ -4260,17 +4395,17 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, + return -EOPNOTSUPP; + } + +- dtype = 0; ++ desc.dtype = 0; + if (nla[NFTA_SET_DATA_TYPE] != NULL) { + if (!(flags & NFT_SET_MAP)) + return -EINVAL; + +- dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE])); +- if ((dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK && +- dtype != NFT_DATA_VERDICT) ++ desc.dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE])); ++ if ((desc.dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK && ++ desc.dtype != NFT_DATA_VERDICT) + return -EINVAL; + +- if (dtype != NFT_DATA_VERDICT) { ++ if (desc.dtype != NFT_DATA_VERDICT) { + if (nla[NFTA_SET_DATA_LEN] == NULL) + return -EINVAL; + desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN])); +@@ -4285,39 +4420,44 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, + if (!(flags & NFT_SET_OBJECT)) + return -EINVAL; + +- objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE])); +- if (objtype == NFT_OBJECT_UNSPEC || +- objtype > NFT_OBJECT_MAX) ++ desc.objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE])); ++ if (desc.objtype == NFT_OBJECT_UNSPEC || ++ desc.objtype > NFT_OBJECT_MAX) + return -EOPNOTSUPP; + } else if (flags & NFT_SET_OBJECT) + return -EINVAL; + else +- objtype = NFT_OBJECT_UNSPEC; ++ desc.objtype = NFT_OBJECT_UNSPEC; + +- timeout = 0; ++ desc.timeout = 0; + if (nla[NFTA_SET_TIMEOUT] != NULL) { + if (!(flags & NFT_SET_TIMEOUT)) + return -EINVAL; + +- err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &timeout); ++ err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &desc.timeout); + if (err) + return err; + } +- gc_int = 0; ++ desc.gc_int = 0; + if (nla[NFTA_SET_GC_INTERVAL] != NULL) { + if (!(flags & NFT_SET_TIMEOUT)) + return -EINVAL; +- gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL])); ++ desc.gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL])); + } + +- policy = NFT_SET_POL_PERFORMANCE; ++ desc.policy = NFT_SET_POL_PERFORMANCE; + if (nla[NFTA_SET_POLICY] != NULL) +- policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY])); ++ desc.policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY])); + + if (nla[NFTA_SET_DESC] != NULL) { err = nf_tables_set_desc_parse(&desc, nla[NFTA_SET_DESC]); if (err < 0) return err; @@ -398570,7 +513471,130 @@ index c0851fec11d46..f7a5b8414423d 100644 } if (nla[NFTA_SET_EXPR] || nla[NFTA_SET_EXPRESSIONS]) -@@ -4481,12 +4528,12 @@ struct nft_set_elem_catchall { +@@ -4339,6 +4479,8 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, + return PTR_ERR(set); + } + } else { ++ struct nft_expr *exprs[NFT_SET_EXPR_MAX] = {}; ++ + if (info->nlh->nlmsg_flags & NLM_F_EXCL) { + NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]); + return -EEXIST; +@@ -4346,13 +4488,29 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, + if (info->nlh->nlmsg_flags & NLM_F_REPLACE) + return -EOPNOTSUPP; + +- return 0; ++ err = nft_set_expr_alloc(&ctx, set, nla, exprs, &num_exprs, flags); ++ if (err < 0) ++ return err; ++ ++ err = 0; ++ if (!nft_set_is_same(set, &desc, exprs, num_exprs, flags)) { ++ NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]); ++ err = -EEXIST; ++ } ++ ++ for (i = 0; i < num_exprs; i++) ++ nft_expr_destroy(&ctx, exprs[i]); ++ ++ if (err < 0) ++ return err; ++ ++ return __nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set, &desc); + } + + if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) + return -ENOENT; + +- ops = nft_select_set_ops(&ctx, nla, &desc, policy); ++ ops = nft_select_set_ops(&ctx, nla, &desc); + if (IS_ERR(ops)) + return PTR_ERR(ops); + +@@ -4392,18 +4550,18 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, + set->table = table; + write_pnet(&set->net, net); + set->ops = ops; +- set->ktype = ktype; ++ set->ktype = desc.ktype; + set->klen = desc.klen; +- set->dtype = dtype; +- set->objtype = objtype; ++ set->dtype = desc.dtype; ++ set->objtype = desc.objtype; + set->dlen = desc.dlen; + set->flags = flags; + set->size = desc.size; +- set->policy = policy; ++ set->policy = desc.policy; + set->udlen = udlen; + set->udata = udata; +- set->timeout = timeout; +- set->gc_int = gc_int; ++ set->timeout = desc.timeout; ++ set->gc_int = desc.gc_int; + + set->field_count = desc.field_count; + for (i = 0; i < desc.field_count; i++) +@@ -4413,43 +4571,11 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, + if (err < 0) + goto err_set_init; + +- if (nla[NFTA_SET_EXPR]) { +- expr = nft_set_elem_expr_alloc(&ctx, set, nla[NFTA_SET_EXPR]); +- if (IS_ERR(expr)) { +- err = PTR_ERR(expr); +- goto err_set_expr_alloc; +- } +- set->exprs[0] = expr; +- set->num_exprs++; +- } else if (nla[NFTA_SET_EXPRESSIONS]) { +- struct nft_expr *expr; +- struct nlattr *tmp; +- int left; +- +- if (!(flags & NFT_SET_EXPR)) { +- err = -EINVAL; +- goto err_set_expr_alloc; +- } +- i = 0; +- nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) { +- if (i == NFT_SET_EXPR_MAX) { +- err = -E2BIG; +- goto err_set_expr_alloc; +- } +- if (nla_type(tmp) != NFTA_LIST_ELEM) { +- err = -EINVAL; +- goto err_set_expr_alloc; +- } +- expr = nft_set_elem_expr_alloc(&ctx, set, tmp); +- if (IS_ERR(expr)) { +- err = PTR_ERR(expr); +- goto err_set_expr_alloc; +- } +- set->exprs[i++] = expr; +- set->num_exprs++; +- } +- } ++ err = nft_set_expr_alloc(&ctx, set, nla, set->exprs, &num_exprs, flags); ++ if (err < 0) ++ goto err_set_destroy; + ++ set->num_exprs = num_exprs; + set->handle = nf_tables_alloc_handle(table); + + err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set); +@@ -4463,7 +4589,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, + err_set_expr_alloc: + for (i = 0; i < set->num_exprs; i++) + nft_expr_destroy(&ctx, set->exprs[i]); +- ++err_set_destroy: + ops->destroy(set); + err_set_init: + kfree(set->name); +@@ -4481,12 +4607,12 @@ struct nft_set_elem_catchall { static void nft_set_catchall_destroy(const struct nft_ctx *ctx, struct nft_set *set) { @@ -398586,7 +513610,7 @@ index c0851fec11d46..f7a5b8414423d 100644 } } -@@ -4928,6 +4975,8 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) +@@ -4928,6 +5054,8 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); nft_net = nft_pernet(net); @@ -398595,7 +513619,7 @@ index c0851fec11d46..f7a5b8414423d 100644 list_for_each_entry_rcu(table, &nft_net->tables, list) { if (dump_ctx->ctx.family != NFPROTO_UNSPEC && dump_ctx->ctx.family != table->family) -@@ -5063,6 +5112,9 @@ static int nft_setelem_parse_flags(const struct nft_set *set, +@@ -5063,6 +5191,9 @@ static int nft_setelem_parse_flags(const struct nft_set *set, if (!(set->flags & NFT_SET_INTERVAL) && *flags & NFT_SET_ELEM_INTERVAL_END) return -EINVAL; @@ -398605,7 +513629,7 @@ index c0851fec11d46..f7a5b8414423d 100644 return 0; } -@@ -5070,19 +5122,13 @@ static int nft_setelem_parse_flags(const struct nft_set *set, +@@ -5070,19 +5201,13 @@ static int nft_setelem_parse_flags(const struct nft_set *set, static int nft_setelem_parse_key(struct nft_ctx *ctx, struct nft_set *set, struct nft_data *key, struct nlattr *attr) { @@ -398631,7 +513655,7 @@ index c0851fec11d46..f7a5b8414423d 100644 } static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set, -@@ -5090,18 +5136,19 @@ static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set, +@@ -5090,18 +5215,19 @@ static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set, struct nft_data *data, struct nlattr *attr) { @@ -398660,7 +513684,7 @@ index c0851fec11d46..f7a5b8414423d 100644 } static void *nft_setelem_catchall_get(const struct net *net, -@@ -5318,9 +5365,6 @@ struct nft_expr *nft_set_elem_expr_alloc(const struct nft_ctx *ctx, +@@ -5318,9 +5444,6 @@ struct nft_expr *nft_set_elem_expr_alloc(const struct nft_ctx *ctx, return expr; err = -EOPNOTSUPP; @@ -398670,7 +513694,7 @@ index c0851fec11d46..f7a5b8414423d 100644 if (expr->ops->type->flags & NFT_EXPR_GC) { if (set->flags & NFT_SET_TIMEOUT) goto err_set_elem_expr; -@@ -5437,7 +5481,7 @@ int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set, +@@ -5437,7 +5560,7 @@ int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set, err = nft_expr_clone(expr, set->exprs[i]); if (err < 0) { @@ -398679,7 +513703,7 @@ index c0851fec11d46..f7a5b8414423d 100644 goto err_expr; } expr_array[i] = expr; -@@ -5653,7 +5697,7 @@ static void nft_setelem_catchall_remove(const struct net *net, +@@ -5653,7 +5776,7 @@ static void nft_setelem_catchall_remove(const struct net *net, list_for_each_entry_safe(catchall, next, &set->catchall_list, list) { if (catchall->elem == elem->priv) { list_del_rcu(&catchall->list); @@ -398688,7 +513712,7 @@ index c0851fec11d46..f7a5b8414423d 100644 break; } } -@@ -5669,6 +5713,25 @@ static void nft_setelem_remove(const struct net *net, +@@ -5669,6 +5792,25 @@ static void nft_setelem_remove(const struct net *net, set->ops->remove(net, set, elem); } @@ -398714,7 +513738,7 @@ index c0851fec11d46..f7a5b8414423d 100644 static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr, u32 nlmsg_flags) { -@@ -5704,8 +5767,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, +@@ -5704,8 +5846,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (!nla[NFTA_SET_ELEM_KEY] && !(flags & NFT_SET_ELEM_CATCHALL)) return -EINVAL; @@ -398728,7 +513752,7 @@ index c0851fec11d46..f7a5b8414423d 100644 if (set->flags & NFT_SET_MAP) { if (nla[NFTA_SET_ELEM_DATA] == NULL && -@@ -5716,6 +5782,18 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, +@@ -5716,6 +5861,18 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, return -EINVAL; } @@ -398747,7 +513771,7 @@ index c0851fec11d46..f7a5b8414423d 100644 if ((flags & NFT_SET_ELEM_INTERVAL_END) && (nla[NFTA_SET_ELEM_DATA] || nla[NFTA_SET_ELEM_OBJREF] || -@@ -5723,6 +5801,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, +@@ -5723,6 +5880,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, nla[NFTA_SET_ELEM_EXPIRATION] || nla[NFTA_SET_ELEM_USERDATA] || nla[NFTA_SET_ELEM_EXPR] || @@ -398755,7 +513779,29 @@ index c0851fec11d46..f7a5b8414423d 100644 nla[NFTA_SET_ELEM_EXPRESSIONS])) return -EINVAL; -@@ -5814,7 +5893,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, +@@ -5734,8 +5892,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, + &timeout); + if (err) + return err; +- } else if (set->flags & NFT_SET_TIMEOUT) { +- timeout = set->timeout; ++ } else if (set->flags & NFT_SET_TIMEOUT && ++ !(flags & NFT_SET_ELEM_INTERVAL_END)) { ++ timeout = READ_ONCE(set->timeout); + } + + expiration = 0; +@@ -5800,7 +5959,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, + err = -EOPNOTSUPP; + goto err_set_elem_expr; + } +- } else if (set->num_exprs > 0) { ++ } else if (set->num_exprs > 0 && ++ !(flags & NFT_SET_ELEM_INTERVAL_END)) { + err = nft_set_elem_expr_clone(ctx, set, expr_array); + if (err < 0) + goto err_set_elem_expr_clone; +@@ -5814,7 +5974,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (err < 0) goto err_set_elem_expr; @@ -398766,7 +513812,7 @@ index c0851fec11d46..f7a5b8414423d 100644 } if (nla[NFTA_SET_ELEM_KEY_END]) { -@@ -5823,29 +5904,34 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, +@@ -5823,29 +5985,34 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (err < 0) goto err_parse_key; @@ -398784,7 +513830,7 @@ index c0851fec11d46..f7a5b8414423d 100644 + if (err < 0) + goto err_parse_key_end; + -+ if (timeout != set->timeout) { ++ if (timeout != READ_ONCE(set->timeout)) { + err = nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT); + if (err < 0) + goto err_parse_key_end; @@ -398812,7 +513858,7 @@ index c0851fec11d46..f7a5b8414423d 100644 obj = nft_obj_lookup(ctx->net, ctx->table, nla[NFTA_SET_ELEM_OBJREF], set->objtype, genmask); -@@ -5853,7 +5939,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, +@@ -5853,7 +6020,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, err = PTR_ERR(obj); goto err_parse_key_end; } @@ -398823,7 +513869,7 @@ index c0851fec11d46..f7a5b8414423d 100644 } if (nla[NFTA_SET_ELEM_DATA] != NULL) { -@@ -5887,7 +5975,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, +@@ -5887,7 +6056,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, NFT_VALIDATE_NEED); } @@ -398834,7 +513880,7 @@ index c0851fec11d46..f7a5b8414423d 100644 } /* The full maximum length of userdata can exceed the maximum -@@ -5897,9 +5987,12 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, +@@ -5897,9 +6068,12 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, ulen = 0; if (nla[NFTA_SET_ELEM_USERDATA] != NULL) { ulen = nla_len(nla[NFTA_SET_ELEM_USERDATA]); @@ -398850,7 +513896,7 @@ index c0851fec11d46..f7a5b8414423d 100644 } err = -ENOMEM; -@@ -6123,10 +6216,16 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, +@@ -6123,10 +6297,16 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, if (!nla[NFTA_SET_ELEM_KEY] && !(flags & NFT_SET_ELEM_CATCHALL)) return -EINVAL; @@ -398869,7 +513915,7 @@ index c0851fec11d46..f7a5b8414423d 100644 if (nla[NFTA_SET_ELEM_KEY]) { err = nft_setelem_parse_key(ctx, set, &elem.key.val, -@@ -6134,16 +6233,20 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, +@@ -6134,16 +6314,20 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, if (err < 0) return err; @@ -398893,7 +513939,7 @@ index c0851fec11d46..f7a5b8414423d 100644 } err = -ENOMEM; -@@ -6151,7 +6254,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, +@@ -6151,7 +6335,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, elem.key_end.val.data, NULL, 0, 0, GFP_KERNEL); if (elem.priv == NULL) @@ -398902,7 +513948,7 @@ index c0851fec11d46..f7a5b8414423d 100644 ext = nft_set_elem_ext(set, elem.priv); if (flags) -@@ -6175,6 +6278,8 @@ fail_ops: +@@ -6175,6 +6359,8 @@ fail_ops: kfree(trans); fail_trans: kfree(elem.priv); @@ -398911,7 +513957,7 @@ index c0851fec11d46..f7a5b8414423d 100644 fail_elem: nft_data_release(&elem.key.val, NFT_DATA_VALUE); return err; -@@ -6535,12 +6640,15 @@ static int nf_tables_updobj(const struct nft_ctx *ctx, +@@ -6535,12 +6721,15 @@ static int nf_tables_updobj(const struct nft_ctx *ctx, { struct nft_object *newobj; struct nft_trans *trans; @@ -398929,7 +513975,7 @@ index c0851fec11d46..f7a5b8414423d 100644 newobj = nft_obj_init(ctx, type, attr); if (IS_ERR(newobj)) { -@@ -6557,6 +6665,8 @@ static int nf_tables_updobj(const struct nft_ctx *ctx, +@@ -6557,6 +6746,8 @@ static int nf_tables_updobj(const struct nft_ctx *ctx, err_free_trans: kfree(trans); @@ -398938,7 +513984,7 @@ index c0851fec11d46..f7a5b8414423d 100644 return err; } -@@ -6721,7 +6831,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) +@@ -6721,7 +6912,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); nft_net = nft_pernet(net); @@ -398947,7 +513993,7 @@ index c0851fec11d46..f7a5b8414423d 100644 list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) -@@ -7191,13 +7301,25 @@ static void nft_unregister_flowtable_hook(struct net *net, +@@ -7191,13 +7382,25 @@ static void nft_unregister_flowtable_hook(struct net *net, FLOW_BLOCK_UNBIND); } @@ -398977,7 +514023,7 @@ index c0851fec11d46..f7a5b8414423d 100644 } static int nft_register_flowtable_net_hooks(struct net *net, -@@ -7290,11 +7412,15 @@ static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh, +@@ -7290,11 +7493,15 @@ static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh, if (nla[NFTA_FLOWTABLE_FLAGS]) { flags = ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS])); @@ -398997,7 +514043,7 @@ index c0851fec11d46..f7a5b8414423d 100644 } else { flags = flowtable->data.flags; } -@@ -7475,6 +7601,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx, +@@ -7475,6 +7682,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx, { const struct nlattr * const *nla = ctx->nla; struct nft_flowtable_hook flowtable_hook; @@ -399005,7 +514051,7 @@ index c0851fec11d46..f7a5b8414423d 100644 struct nft_hook *this, *hook; struct nft_trans *trans; int err; -@@ -7490,7 +7617,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx, +@@ -7490,7 +7698,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx, err = -ENOENT; goto err_flowtable_del_hook; } @@ -399014,7 +514060,7 @@ index c0851fec11d46..f7a5b8414423d 100644 } trans = nft_trans_alloc(ctx, NFT_MSG_DELFLOWTABLE, -@@ -7503,6 +7630,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx, +@@ -7503,6 +7711,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx, nft_trans_flowtable(trans) = flowtable; nft_trans_flowtable_update(trans) = true; INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans)); @@ -399022,7 +514068,7 @@ index c0851fec11d46..f7a5b8414423d 100644 nft_flowtable_hook_release(&flowtable_hook); nft_trans_commit_list_add_tail(ctx->net, trans); -@@ -7510,13 +7638,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx, +@@ -7510,13 +7719,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx, return 0; err_flowtable_del_hook: @@ -399037,7 +514083,7 @@ index c0851fec11d46..f7a5b8414423d 100644 nft_flowtable_hook_release(&flowtable_hook); return err; -@@ -7641,7 +7763,7 @@ static int nf_tables_dump_flowtable(struct sk_buff *skb, +@@ -7641,7 +7844,7 @@ static int nf_tables_dump_flowtable(struct sk_buff *skb, rcu_read_lock(); nft_net = nft_pernet(net); @@ -399046,7 +514092,7 @@ index c0851fec11d46..f7a5b8414423d 100644 list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) -@@ -8169,7 +8291,7 @@ static void nft_obj_commit_update(struct nft_trans *trans) +@@ -8169,7 +8372,7 @@ static void nft_obj_commit_update(struct nft_trans *trans) if (obj->ops->update) obj->ops->update(obj, newobj); @@ -399055,17 +514101,7 @@ index c0851fec11d46..f7a5b8414423d 100644 } static void nft_commit_release(struct nft_trans *trans) -@@ -8186,6 +8308,9 @@ static void nft_commit_release(struct nft_trans *trans) - nf_tables_chain_destroy(&trans->ctx); - break; - case NFT_MSG_DELRULE: -+ if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) -+ nft_flow_rule_destroy(nft_trans_flow_rule(trans)); -+ - nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); - break; - case NFT_MSG_DELSET: -@@ -8371,17 +8496,6 @@ void nft_chain_del(struct nft_chain *chain) +@@ -8371,17 +8574,6 @@ void nft_chain_del(struct nft_chain *chain) list_del_rcu(&chain->list); } @@ -399083,7 +514119,7 @@ index c0851fec11d46..f7a5b8414423d 100644 static void nf_tables_module_autoload_cleanup(struct net *net) { struct nftables_pernet *nft_net = nft_pernet(net); -@@ -8533,6 +8647,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) +@@ -8533,6 +8725,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) struct nft_trans_elem *te; struct nft_chain *chain; struct nft_table *table; @@ -399091,7 +514127,7 @@ index c0851fec11d46..f7a5b8414423d 100644 LIST_HEAD(adl); int err; -@@ -8582,9 +8697,12 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) +@@ -8582,9 +8775,12 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) * Bump generation counter, invalidate any dump in progress. * Cannot fail after this point. */ @@ -399105,7 +514141,7 @@ index c0851fec11d46..f7a5b8414423d 100644 /* step 3. Start new generation, rules_gen_X now in use. */ net->nft.gencursor = nft_gencursor_next(net); -@@ -8636,6 +8754,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) +@@ -8636,6 +8832,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_rule_notify(&trans->ctx, nft_trans_rule(trans), NFT_MSG_NEWRULE); @@ -399115,7 +514151,40 @@ index c0851fec11d46..f7a5b8414423d 100644 nft_trans_destroy(trans); break; case NFT_MSG_DELRULE: -@@ -8726,8 +8847,6 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) +@@ -8646,16 +8845,25 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) + nft_rule_expr_deactivate(&trans->ctx, + nft_trans_rule(trans), + NFT_TRANS_COMMIT); ++ ++ if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) ++ nft_flow_rule_destroy(nft_trans_flow_rule(trans)); + break; + case NFT_MSG_NEWSET: +- nft_clear(net, nft_trans_set(trans)); +- /* This avoids hitting -EBUSY when deleting the table +- * from the transaction. +- */ +- if (nft_set_is_anonymous(nft_trans_set(trans)) && +- !list_empty(&nft_trans_set(trans)->bindings)) +- trans->ctx.table->use--; ++ if (nft_trans_set_update(trans)) { ++ struct nft_set *set = nft_trans_set(trans); + ++ WRITE_ONCE(set->timeout, nft_trans_set_timeout(trans)); ++ WRITE_ONCE(set->gc_int, nft_trans_set_gc_int(trans)); ++ } else { ++ nft_clear(net, nft_trans_set(trans)); ++ /* This avoids hitting -EBUSY when deleting the table ++ * from the transaction. ++ */ ++ if (nft_set_is_anonymous(nft_trans_set(trans)) && ++ !list_empty(&nft_trans_set(trans)->bindings)) ++ trans->ctx.table->use--; ++ } + nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), + NFT_MSG_NEWSET, GFP_KERNEL); + nft_trans_destroy(trans); +@@ -8726,8 +8934,6 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) break; case NFT_MSG_DELFLOWTABLE: if (nft_trans_flowtable_update(trans)) { @@ -399124,7 +514193,7 @@ index c0851fec11d46..f7a5b8414423d 100644 nf_tables_flowtable_notify(&trans->ctx, nft_trans_flowtable(trans), &nft_trans_flowtable_hooks(trans), -@@ -8808,7 +8927,6 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) +@@ -8808,7 +9014,6 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) struct nftables_pernet *nft_net = nft_pernet(net); struct nft_trans *trans, *next; struct nft_trans_elem *te; @@ -399132,7 +514201,18 @@ index c0851fec11d46..f7a5b8414423d 100644 if (action == NFNL_ABORT_VALIDATE && nf_tables_validate(net) < 0) -@@ -8914,7 +9032,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) +@@ -8880,6 +9085,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) + nft_trans_destroy(trans); + break; + case NFT_MSG_NEWSET: ++ if (nft_trans_set_update(trans)) { ++ nft_trans_destroy(trans); ++ break; ++ } + trans->ctx.table->use--; + if (nft_trans_set_bound(trans)) { + nft_trans_destroy(trans); +@@ -8914,7 +9123,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; case NFT_MSG_NEWOBJ: if (nft_trans_obj_update(trans)) { @@ -399141,7 +514221,7 @@ index c0851fec11d46..f7a5b8414423d 100644 nft_trans_destroy(trans); } else { trans->ctx.table->use--; -@@ -8939,8 +9057,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) +@@ -8939,8 +9148,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; case NFT_MSG_DELFLOWTABLE: if (nft_trans_flowtable_update(trans)) { @@ -399152,7 +514232,7 @@ index c0851fec11d46..f7a5b8414423d 100644 } else { trans->ctx.table->use++; nft_clear(trans->ctx.net, nft_trans_flowtable(trans)); -@@ -9203,17 +9321,23 @@ int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest) +@@ -9203,17 +9412,23 @@ int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest) } EXPORT_SYMBOL_GPL(nft_parse_u32_check); @@ -399179,7 +514259,7 @@ index c0851fec11d46..f7a5b8414423d 100644 } /** -@@ -9255,7 +9379,10 @@ int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len) +@@ -9255,7 +9470,10 @@ int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len) u32 reg; int err; @@ -399191,7 +514271,7 @@ index c0851fec11d46..f7a5b8414423d 100644 err = nft_validate_register_load(reg, len); if (err < 0) return err; -@@ -9310,7 +9437,10 @@ int nft_parse_register_store(const struct nft_ctx *ctx, +@@ -9310,7 +9528,10 @@ int nft_parse_register_store(const struct nft_ctx *ctx, int err; u32 reg; @@ -399203,7 +514283,7 @@ index c0851fec11d46..f7a5b8414423d 100644 err = nft_validate_register_store(ctx, reg, data, type, len); if (err < 0) return err; -@@ -9366,7 +9496,7 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, +@@ -9366,7 +9587,7 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, tb[NFTA_VERDICT_CHAIN], genmask); } else if (tb[NFTA_VERDICT_CHAIN_ID]) { @@ -399212,7 +514292,7 @@ index c0851fec11d46..f7a5b8414423d 100644 tb[NFTA_VERDICT_CHAIN_ID]); if (IS_ERR(chain)) return PTR_ERR(chain); -@@ -9378,6 +9508,11 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, +@@ -9378,6 +9599,11 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, return PTR_ERR(chain); if (nft_is_base_chain(chain)) return -EOPNOTSUPP; @@ -399224,7 +514304,7 @@ index c0851fec11d46..f7a5b8414423d 100644 chain->use++; data->verdict.chain = chain; -@@ -9385,7 +9520,7 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, +@@ -9385,7 +9611,7 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, } desc->len = sizeof(data->verdict); @@ -399233,7 +514313,7 @@ index c0851fec11d46..f7a5b8414423d 100644 return 0; } -@@ -9438,20 +9573,25 @@ nla_put_failure: +@@ -9438,20 +9664,25 @@ nla_put_failure: } static int nft_value_init(const struct nft_ctx *ctx, @@ -399264,7 +514344,7 @@ index c0851fec11d46..f7a5b8414423d 100644 return 0; } -@@ -9471,7 +9611,6 @@ static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = { +@@ -9471,7 +9702,6 @@ static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = { * * @ctx: context of the expression using the data * @data: destination struct nft_data @@ -399272,7 +514352,7 @@ index c0851fec11d46..f7a5b8414423d 100644 * @desc: data description * @nla: netlink attribute containing data * -@@ -9481,24 +9620,35 @@ static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = { +@@ -9481,24 +9711,35 @@ static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = { * The caller can indicate that it only wants to accept data of type * NFT_DATA_VALUE by passing NULL for the ctx argument. */ @@ -399316,7 +514396,7 @@ index c0851fec11d46..f7a5b8414423d 100644 } EXPORT_SYMBOL_GPL(nft_data_init); -@@ -9574,10 +9724,14 @@ EXPORT_SYMBOL_GPL(__nft_release_basechain); +@@ -9574,10 +9815,14 @@ EXPORT_SYMBOL_GPL(__nft_release_basechain); static void __nft_release_hook(struct net *net, struct nft_table *table) { @@ -399332,7 +514412,16 @@ index c0851fec11d46..f7a5b8414423d 100644 } static void __nft_release_hooks(struct net *net) -@@ -9716,7 +9870,11 @@ static int __net_init nf_tables_init_net(struct net *net) +@@ -9670,6 +9915,8 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event, + nft_net = nft_pernet(net); + deleted = 0; + mutex_lock(&nft_net->commit_mutex); ++ if (!list_empty(&nf_tables_destroy_list)) ++ rcu_barrier(); + again: + list_for_each_entry(table, &nft_net->tables, list) { + if (nft_table_has_owner(table) && +@@ -9716,7 +9963,11 @@ static int __net_init nf_tables_init_net(struct net *net) static void __net_exit nf_tables_pre_exit_net(struct net *net) { @@ -399344,6 +514433,16 @@ index c0851fec11d46..f7a5b8414423d 100644 } static void __net_exit nf_tables_exit_net(struct net *net) +@@ -9724,7 +9975,8 @@ static void __net_exit nf_tables_exit_net(struct net *net) + struct nftables_pernet *nft_net = nft_pernet(net); + + mutex_lock(&nft_net->commit_mutex); +- if (!list_empty(&nft_net->commit_list)) ++ if (!list_empty(&nft_net->commit_list) || ++ !list_empty(&nft_net->module_list)) + __nf_tables_abort(net, NFNL_ABORT_NONE); + __nft_release_tables(net); + mutex_unlock(&nft_net->commit_mutex); diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 866cfba04d6c0..2ab4216d2a903 100644 --- a/net/netfilter/nf_tables_core.c @@ -399529,6 +514628,18 @@ index e4fe2f0780eb6..84a7dea46efae 100644 len = min_t(unsigned int, skb->len - nft_thoff(pkt), NFT_TRACETYPE_TRANSPORT_HSIZE); if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb, +diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c +index 7e2c8dd01408f..2cce4033a70a6 100644 +--- a/net/netfilter/nfnetlink.c ++++ b/net/netfilter/nfnetlink.c +@@ -290,6 +290,7 @@ replay: + nfnl_lock(subsys_id); + if (nfnl_dereference_protected(subsys_id) != ss || + nfnetlink_find_client(type, ss) != nc) { ++ nfnl_unlock(subsys_id); + err = -EAGAIN; + break; + } diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 691ef4cffdd90..7f83f9697fc14 100644 --- a/net/netfilter/nfnetlink_log.c @@ -399930,9 +515041,18 @@ index 47b6d05f1ae69..461763a571f20 100644 struct nft_expr_type nft_cmp_type __read_mostly = { diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c -index 99b1de14ff7ee..9c7472af9e4a1 100644 +index 99b1de14ff7ee..bd468e955a212 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c +@@ -97,7 +97,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr, + return; + #ifdef CONFIG_NF_CONNTRACK_MARK + case NFT_CT_MARK: +- *dest = ct->mark; ++ *dest = READ_ONCE(ct->mark); + return; + #endif + #ifdef CONFIG_NF_CONNTRACK_SECMARK @@ -259,10 +259,13 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr, ct = this_cpu_read(nft_ct_pcpu_template); @@ -399957,6 +515077,17 @@ index 99b1de14ff7ee..9c7472af9e4a1 100644 nf_ct_set(skb, ct, IP_CT_NEW); } #endif +@@ -294,8 +296,8 @@ static void nft_ct_set_eval(const struct nft_expr *expr, + switch (priv->key) { + #ifdef CONFIG_NF_CONNTRACK_MARK + case NFT_CT_MARK: +- if (ct->mark != value) { +- ct->mark = value; ++ if (READ_ONCE(ct->mark) != value) { ++ WRITE_ONCE(ct->mark, value); + nf_conntrack_event_cache(IPCT_MARK, ct); + } + break; @@ -375,7 +377,6 @@ static bool nft_ct_tmpl_alloc_pcpu(void) return false; } @@ -400389,7 +515520,7 @@ index d82677e83400b..720dc9fba6d4f 100644 static struct nft_expr_type nft_osf_type; diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c -index a44b14f6c0dc0..da652c21368e1 100644 +index a44b14f6c0dc0..208a6f59281db 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -22,6 +22,7 @@ @@ -400400,6 +515531,15 @@ index a44b14f6c0dc0..da652c21368e1 100644 #include <net/sctp/checksum.h> static bool nft_payload_rebuild_vlan_hdr(const struct sk_buff *skb, int mac_off, +@@ -62,7 +63,7 @@ nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len) + return false; + + if (offset + len > VLAN_ETH_HLEN + vlan_hlen) +- ethlen -= offset + len - VLAN_ETH_HLEN + vlan_hlen; ++ ethlen -= offset + len - VLAN_ETH_HLEN - vlan_hlen; + + memcpy(dst_u8, vlanh + offset - vlan_hlen, ethlen); + @@ -79,6 +80,45 @@ nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len) return skb_copy_bits(skb, offset + mac_off, dst_u8, len) == 0; } @@ -400664,10 +515804,30 @@ index df40314de21f5..76de6c8d98655 100644 return false; } diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c -index dce866d93feed..4f9299b9dcddc 100644 +index dce866d93feed..06d46d1826347 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c -@@ -1290,6 +1290,11 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) +@@ -1162,6 +1162,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, + struct nft_pipapo_match *m = priv->clone; + u8 genmask = nft_genmask_next(net); + struct nft_pipapo_field *f; ++ const u8 *start_p, *end_p; + int i, bsize_max, err = 0; + + if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END)) +@@ -1202,9 +1203,9 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, + } + + /* Validate */ ++ start_p = start; ++ end_p = end; + nft_pipapo_for_each_field(f, i, m) { +- const u8 *start_p = start, *end_p = end; +- + if (f->rules >= (unsigned long)NFT_PIPAPO_RULE0_MAX) + return -ENOSPC; + +@@ -1290,6 +1291,11 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) if (!new->scratch_aligned) goto out_scratch; #endif @@ -400679,7 +515839,7 @@ index dce866d93feed..4f9299b9dcddc 100644 rcu_head_init(&new->rcu); -@@ -1334,6 +1339,9 @@ out_lt: +@@ -1334,6 +1340,9 @@ out_lt: kvfree(dst->lt); dst--; } @@ -400689,7 +515849,7 @@ index dce866d93feed..4f9299b9dcddc 100644 #ifdef NFT_PIPAPO_ALIGN free_percpu(new->scratch_aligned); #endif -@@ -2116,6 +2124,32 @@ out_scratch: +@@ -2116,6 +2125,32 @@ out_scratch: return err; } @@ -400722,7 +515882,7 @@ index dce866d93feed..4f9299b9dcddc 100644 /** * nft_pipapo_destroy() - Free private data for set and all committed elements * @set: nftables API set representation -@@ -2124,26 +2158,13 @@ static void nft_pipapo_destroy(const struct nft_set *set) +@@ -2124,26 +2159,13 @@ static void nft_pipapo_destroy(const struct nft_set *set) { struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *m; @@ -400751,7 +515911,7 @@ index dce866d93feed..4f9299b9dcddc 100644 #ifdef NFT_PIPAPO_ALIGN free_percpu(m->scratch_aligned); -@@ -2157,6 +2178,11 @@ static void nft_pipapo_destroy(const struct nft_set *set) +@@ -2157,6 +2179,11 @@ static void nft_pipapo_destroy(const struct nft_set *set) } if (priv->clone) { @@ -400777,22 +515937,420 @@ index e517663e0cd17..6f4116e729581 100644 /* Stall */ diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c -index d600a566da324..7325bee7d1442 100644 +index d600a566da324..19ea4d3c35535 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c -@@ -349,7 +349,11 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, - *ext = &rbe->ext; - return -EEXIST; - } else { +@@ -38,10 +38,12 @@ static bool nft_rbtree_interval_start(const struct nft_rbtree_elem *rbe) + return !nft_rbtree_interval_end(rbe); + } + +-static bool nft_rbtree_equal(const struct nft_set *set, const void *this, +- const struct nft_rbtree_elem *interval) ++static int nft_rbtree_cmp(const struct nft_set *set, ++ const struct nft_rbtree_elem *e1, ++ const struct nft_rbtree_elem *e2) + { +- return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0; ++ return memcmp(nft_set_ext_key(&e1->ext), nft_set_ext_key(&e2->ext), ++ set->klen); + } + + static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set, +@@ -52,7 +54,6 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set + const struct nft_rbtree_elem *rbe, *interval = NULL; + u8 genmask = nft_genmask_cur(net); + const struct rb_node *parent; +- const void *this; + int d; + + parent = rcu_dereference_raw(priv->root.rb_node); +@@ -62,12 +63,11 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set + + rbe = rb_entry(parent, struct nft_rbtree_elem, node); + +- this = nft_set_ext_key(&rbe->ext); +- d = memcmp(this, key, set->klen); ++ d = memcmp(nft_set_ext_key(&rbe->ext), key, set->klen); + if (d < 0) { + parent = rcu_dereference_raw(parent->rb_left); + if (interval && +- nft_rbtree_equal(set, this, interval) && ++ !nft_rbtree_cmp(set, rbe, interval) && + nft_rbtree_interval_end(rbe) && + nft_rbtree_interval_start(interval)) + continue; +@@ -215,150 +215,216 @@ static void *nft_rbtree_get(const struct net *net, const struct nft_set *set, + return rbe; + } + ++static int nft_rbtree_gc_elem(const struct nft_set *__set, ++ struct nft_rbtree *priv, ++ struct nft_rbtree_elem *rbe) ++{ ++ struct nft_set *set = (struct nft_set *)__set; ++ struct rb_node *prev = rb_prev(&rbe->node); ++ struct nft_rbtree_elem *rbe_prev; ++ struct nft_set_gc_batch *gcb; ++ ++ gcb = nft_set_gc_batch_check(set, NULL, GFP_ATOMIC); ++ if (!gcb) ++ return -ENOMEM; ++ ++ /* search for expired end interval coming before this element. */ ++ do { ++ rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); ++ if (nft_rbtree_interval_end(rbe_prev)) ++ break; ++ ++ prev = rb_prev(prev); ++ } while (prev != NULL); ++ ++ rb_erase(&rbe_prev->node, &priv->root); ++ rb_erase(&rbe->node, &priv->root); ++ atomic_sub(2, &set->nelems); ++ ++ nft_set_gc_batch_add(gcb, rbe); ++ nft_set_gc_batch_complete(gcb); ++ ++ return 0; ++} ++ ++static bool nft_rbtree_update_first(const struct nft_set *set, ++ struct nft_rbtree_elem *rbe, ++ struct rb_node *first) ++{ ++ struct nft_rbtree_elem *first_elem; ++ ++ first_elem = rb_entry(first, struct nft_rbtree_elem, node); ++ /* this element is closest to where the new element is to be inserted: ++ * update the first element for the node list path. ++ */ ++ if (nft_rbtree_cmp(set, rbe, first_elem) < 0) ++ return true; ++ ++ return false; ++} ++ + static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, + struct nft_rbtree_elem *new, + struct nft_set_ext **ext) + { +- bool overlap = false, dup_end_left = false, dup_end_right = false; ++ struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL; ++ struct rb_node *node, *parent, **p, *first = NULL; + struct nft_rbtree *priv = nft_set_priv(set); + u8 genmask = nft_genmask_next(net); +- struct nft_rbtree_elem *rbe; +- struct rb_node *parent, **p; +- int d; ++ int d, err; + +- /* Detect overlaps as we descend the tree. Set the flag in these cases: +- * +- * a1. _ _ __>| ?_ _ __| (insert end before existing end) +- * a2. _ _ ___| ?_ _ _>| (insert end after existing end) +- * a3. _ _ ___? >|_ _ __| (insert start before existing end) +- * +- * and clear it later on, as we eventually reach the points indicated by +- * '?' above, in the cases described below. We'll always meet these +- * later, locally, due to tree ordering, and overlaps for the intervals +- * that are the closest together are always evaluated last. +- * +- * b1. _ _ __>| !_ _ __| (insert end before existing start) +- * b2. _ _ ___| !_ _ _>| (insert end after existing start) +- * b3. _ _ ___! >|_ _ __| (insert start after existing end, as a leaf) +- * '--' no nodes falling in this range +- * b4. >|_ _ ! (insert start before existing start) +- * +- * Case a3. resolves to b3.: +- * - if the inserted start element is the leftmost, because the '0' +- * element in the tree serves as end element +- * - otherwise, if an existing end is found immediately to the left. If +- * there are existing nodes in between, we need to further descend the +- * tree before we can conclude the new start isn't causing an overlap +- * +- * or to b4., which, preceded by a3., means we already traversed one or +- * more existing intervals entirely, from the right. +- * +- * For a new, rightmost pair of elements, we'll hit cases b3. and b2., +- * in that order. +- * +- * The flag is also cleared in two special cases: +- * +- * b5. |__ _ _!|<_ _ _ (insert start right before existing end) +- * b6. |__ _ >|!__ _ _ (insert end right after existing start) +- * +- * which always happen as last step and imply that no further +- * overlapping is possible. +- * +- * Another special case comes from the fact that start elements matching +- * an already existing start element are allowed: insertion is not +- * performed but we return -EEXIST in that case, and the error will be +- * cleared by the caller if NLM_F_EXCL is not present in the request. +- * This way, request for insertion of an exact overlap isn't reported as +- * error to userspace if not desired. +- * +- * However, if the existing start matches a pre-existing start, but the +- * end element doesn't match the corresponding pre-existing end element, +- * we need to report a partial overlap. This is a local condition that +- * can be noticed without need for a tracking flag, by checking for a +- * local duplicated end for a corresponding start, from left and right, +- * separately. ++ /* Descend the tree to search for an existing element greater than the ++ * key value to insert that is greater than the new element. This is the ++ * first element to walk the ordered elements to find possible overlap. + */ +- + parent = NULL; + p = &priv->root.rb_node; + while (*p != NULL) { + parent = *p; + rbe = rb_entry(parent, struct nft_rbtree_elem, node); +- d = memcmp(nft_set_ext_key(&rbe->ext), +- nft_set_ext_key(&new->ext), +- set->klen); ++ d = nft_rbtree_cmp(set, rbe, new); ++ + if (d < 0) { + p = &parent->rb_left; +- +- if (nft_rbtree_interval_start(new)) { +- if (nft_rbtree_interval_end(rbe) && +- nft_set_elem_active(&rbe->ext, genmask) && +- !nft_set_elem_expired(&rbe->ext) && !*p) +- overlap = false; +- } else { +- if (dup_end_left && !*p) +- return -ENOTEMPTY; +- +- overlap = nft_rbtree_interval_end(rbe) && +- nft_set_elem_active(&rbe->ext, +- genmask) && +- !nft_set_elem_expired(&rbe->ext); +- +- if (overlap) { +- dup_end_right = true; +- continue; +- } +- } + } else if (d > 0) { +- p = &parent->rb_right; ++ if (!first || ++ nft_rbtree_update_first(set, rbe, first)) ++ first = &rbe->node; + +- if (nft_rbtree_interval_end(new)) { +- if (dup_end_right && !*p) +- return -ENOTEMPTY; +- +- overlap = nft_rbtree_interval_end(rbe) && +- nft_set_elem_active(&rbe->ext, +- genmask) && +- !nft_set_elem_expired(&rbe->ext); +- +- if (overlap) { +- dup_end_left = true; +- continue; +- } +- } else if (nft_set_elem_active(&rbe->ext, genmask) && +- !nft_set_elem_expired(&rbe->ext)) { +- overlap = nft_rbtree_interval_end(rbe); +- } ++ p = &parent->rb_right; + } else { +- if (nft_rbtree_interval_end(rbe) && +- nft_rbtree_interval_start(new)) { ++ if (nft_rbtree_interval_end(rbe)) + p = &parent->rb_left; +- +- if (nft_set_elem_active(&rbe->ext, genmask) && +- !nft_set_elem_expired(&rbe->ext)) +- overlap = false; +- } else if (nft_rbtree_interval_start(rbe) && +- nft_rbtree_interval_end(new)) { ++ else + p = &parent->rb_right; ++ } ++ } + +- if (nft_set_elem_active(&rbe->ext, genmask) && +- !nft_set_elem_expired(&rbe->ext)) +- overlap = false; +- } else if (nft_set_elem_active(&rbe->ext, genmask) && +- !nft_set_elem_expired(&rbe->ext)) { +- *ext = &rbe->ext; +- return -EEXIST; +- } else { - p = &parent->rb_left; -+ overlap = false; -+ if (nft_rbtree_interval_end(rbe)) -+ p = &parent->rb_left; -+ else -+ p = &parent->rb_right; ++ if (!first) ++ first = rb_first(&priv->root); ++ ++ /* Detect overlap by going through the list of valid tree nodes. ++ * Values stored in the tree are in reversed order, starting from ++ * highest to lowest value. ++ */ ++ for (node = first; node != NULL; node = rb_next(node)) { ++ rbe = rb_entry(node, struct nft_rbtree_elem, node); ++ ++ if (!nft_set_elem_active(&rbe->ext, genmask)) ++ continue; ++ ++ /* perform garbage collection to avoid bogus overlap reports. */ ++ if (nft_set_elem_expired(&rbe->ext)) { ++ err = nft_rbtree_gc_elem(set, priv, rbe); ++ if (err < 0) ++ return err; ++ ++ continue; ++ } ++ ++ d = nft_rbtree_cmp(set, rbe, new); ++ if (d == 0) { ++ /* Matching end element: no need to look for an ++ * overlapping greater or equal element. ++ */ ++ if (nft_rbtree_interval_end(rbe)) { ++ rbe_le = rbe; ++ break; ++ } ++ ++ /* first element that is greater or equal to key value. */ ++ if (!rbe_ge) { ++ rbe_ge = rbe; ++ continue; ++ } ++ ++ /* this is a closer more or equal element, update it. */ ++ if (nft_rbtree_cmp(set, rbe_ge, new) != 0) { ++ rbe_ge = rbe; ++ continue; ++ } ++ ++ /* element is equal to key value, make sure flags are ++ * the same, an existing more or equal start element ++ * must not be replaced by more or equal end element. ++ */ ++ if ((nft_rbtree_interval_start(new) && ++ nft_rbtree_interval_start(rbe_ge)) || ++ (nft_rbtree_interval_end(new) && ++ nft_rbtree_interval_end(rbe_ge))) { ++ rbe_ge = rbe; ++ continue; } ++ } else if (d > 0) { ++ /* annotate element greater than the new element. */ ++ rbe_ge = rbe; ++ continue; ++ } else if (d < 0) { ++ /* annotate element less than the new element. */ ++ rbe_le = rbe; ++ break; } ++ } + +- dup_end_left = dup_end_right = false; ++ /* - new start element matching existing start element: full overlap ++ * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given. ++ */ ++ if (rbe_ge && !nft_rbtree_cmp(set, new, rbe_ge) && ++ nft_rbtree_interval_start(rbe_ge) == nft_rbtree_interval_start(new)) { ++ *ext = &rbe_ge->ext; ++ return -EEXIST; + } + +- if (overlap) ++ /* - new end element matching existing end element: full overlap ++ * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given. ++ */ ++ if (rbe_le && !nft_rbtree_cmp(set, new, rbe_le) && ++ nft_rbtree_interval_end(rbe_le) == nft_rbtree_interval_end(new)) { ++ *ext = &rbe_le->ext; ++ return -EEXIST; ++ } ++ ++ /* - new start element with existing closest, less or equal key value ++ * being a start element: partial overlap, reported as -ENOTEMPTY. ++ * Anonymous sets allow for two consecutive start element since they ++ * are constant, skip them to avoid bogus overlap reports. ++ */ ++ if (!nft_set_is_anonymous(set) && rbe_le && ++ nft_rbtree_interval_start(rbe_le) && nft_rbtree_interval_start(new)) ++ return -ENOTEMPTY; ++ ++ /* - new end element with existing closest, less or equal key value ++ * being a end element: partial overlap, reported as -ENOTEMPTY. ++ */ ++ if (rbe_le && ++ nft_rbtree_interval_end(rbe_le) && nft_rbtree_interval_end(new)) ++ return -ENOTEMPTY; ++ ++ /* - new end element with existing closest, greater or equal key value ++ * being an end element: partial overlap, reported as -ENOTEMPTY ++ */ ++ if (rbe_ge && ++ nft_rbtree_interval_end(rbe_ge) && nft_rbtree_interval_end(new)) + return -ENOTEMPTY; + ++ /* Accepted element: pick insertion point depending on key value */ ++ parent = NULL; ++ p = &priv->root.rb_node; ++ while (*p != NULL) { ++ parent = *p; ++ rbe = rb_entry(parent, struct nft_rbtree_elem, node); ++ d = nft_rbtree_cmp(set, rbe, new); ++ ++ if (d < 0) ++ p = &parent->rb_left; ++ else if (d > 0) ++ p = &parent->rb_right; ++ else if (nft_rbtree_interval_end(rbe)) ++ p = &parent->rb_left; ++ else ++ p = &parent->rb_right; ++ } ++ + rb_link_node_rcu(&new->node, parent, p); + rb_insert_color(&new->node, &priv->root); + return 0; +@@ -497,23 +563,37 @@ static void nft_rbtree_gc(struct work_struct *work) + struct nft_rbtree *priv; + struct rb_node *node; + struct nft_set *set; ++ struct net *net; ++ u8 genmask; + + priv = container_of(work, struct nft_rbtree, gc_work.work); + set = nft_set_container_of(priv); ++ net = read_pnet(&set->net); ++ genmask = nft_genmask_cur(net); + write_lock_bh(&priv->lock); + write_seqcount_begin(&priv->count); + for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { + rbe = rb_entry(node, struct nft_rbtree_elem, node); + ++ if (!nft_set_elem_active(&rbe->ext, genmask)) ++ continue; ++ ++ /* elements are reversed in the rbtree for historical reasons, ++ * from highest to lowest value, that is why end element is ++ * always visited before the start element. ++ */ + if (nft_rbtree_interval_end(rbe)) { + rbe_end = rbe; + continue; + } + if (!nft_set_elem_expired(&rbe->ext)) + continue; +- if (nft_set_elem_mark_busy(&rbe->ext)) ++ ++ if (nft_set_elem_mark_busy(&rbe->ext)) { ++ rbe_end = NULL; + continue; ++ } + + if (rbe_prev) { + rb_erase(&rbe_prev->node, &priv->root); diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index d601974c9d2e0..9ad9cc0d1d27c 100644 --- a/net/netfilter/nft_socket.c @@ -400917,6 +516475,32 @@ index a0109fa1e92d0..1133e06f3c40e 100644 break; } +diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c +index b5b09a902c7ac..9fea90ed79d44 100644 +--- a/net/netfilter/nft_tproxy.c ++++ b/net/netfilter/nft_tproxy.c +@@ -312,6 +312,13 @@ static int nft_tproxy_dump(struct sk_buff *skb, + return 0; + } + ++static int nft_tproxy_validate(const struct nft_ctx *ctx, ++ const struct nft_expr *expr, ++ const struct nft_data **data) ++{ ++ return nft_chain_validate_hooks(ctx->chain, 1 << NF_INET_PRE_ROUTING); ++} ++ + static struct nft_expr_type nft_tproxy_type; + static const struct nft_expr_ops nft_tproxy_ops = { + .type = &nft_tproxy_type, +@@ -320,6 +327,7 @@ static const struct nft_expr_ops nft_tproxy_ops = { + .init = nft_tproxy_init, + .destroy = nft_tproxy_destroy, + .dump = nft_tproxy_dump, ++ .validate = nft_tproxy_validate, + }; + + static struct nft_expr_type nft_tproxy_type __read_mostly = { diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index 3b27926d5382c..2ee50996da8cc 100644 --- a/net/netfilter/nft_tunnel.c @@ -400950,6 +516534,66 @@ index 0a913ce07425a..267757b0392a6 100644 out: info->ct = ct; return 0; +diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c +index e5ebc0810675a..ad3c033db64e7 100644 +--- a/net/netfilter/xt_connmark.c ++++ b/net/netfilter/xt_connmark.c +@@ -30,6 +30,7 @@ connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info) + u_int32_t new_targetmark; + struct nf_conn *ct; + u_int32_t newmark; ++ u_int32_t oldmark; + + ct = nf_ct_get(skb, &ctinfo); + if (ct == NULL) +@@ -37,14 +38,15 @@ connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info) + + switch (info->mode) { + case XT_CONNMARK_SET: +- newmark = (ct->mark & ~info->ctmask) ^ info->ctmark; ++ oldmark = READ_ONCE(ct->mark); ++ newmark = (oldmark & ~info->ctmask) ^ info->ctmark; + if (info->shift_dir == D_SHIFT_RIGHT) + newmark >>= info->shift_bits; + else + newmark <<= info->shift_bits; + +- if (ct->mark != newmark) { +- ct->mark = newmark; ++ if (READ_ONCE(ct->mark) != newmark) { ++ WRITE_ONCE(ct->mark, newmark); + nf_conntrack_event_cache(IPCT_MARK, ct); + } + break; +@@ -55,15 +57,15 @@ connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info) + else + new_targetmark <<= info->shift_bits; + +- newmark = (ct->mark & ~info->ctmask) ^ ++ newmark = (READ_ONCE(ct->mark) & ~info->ctmask) ^ + new_targetmark; +- if (ct->mark != newmark) { +- ct->mark = newmark; ++ if (READ_ONCE(ct->mark) != newmark) { ++ WRITE_ONCE(ct->mark, newmark); + nf_conntrack_event_cache(IPCT_MARK, ct); + } + break; + case XT_CONNMARK_RESTORE: +- new_targetmark = (ct->mark & info->ctmask); ++ new_targetmark = (READ_ONCE(ct->mark) & info->ctmask); + if (info->shift_dir == D_SHIFT_RIGHT) + new_targetmark >>= info->shift_bits; + else +@@ -126,7 +128,7 @@ connmark_mt(const struct sk_buff *skb, struct xt_action_param *par) + if (ct == NULL) + return false; + +- return ((ct->mark & info->mask) == info->mark) ^ info->invert; ++ return ((READ_ONCE(ct->mark) & info->mask) == info->mark) ^ info->invert; + } + + static int connmark_mt_check(const struct xt_mtchk_param *par) diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 5e6459e116055..7013f55f05d1e 100644 --- a/net/netfilter/xt_socket.c @@ -400980,7 +516624,7 @@ index beb0e573266d0..54c0830039470 100644 byte = bitmap[byte_offset]; bit_spot = offset; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c -index ada47e59647a0..974d32632ef41 100644 +index ada47e59647a0..011ec7d9a719e 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -157,6 +157,8 @@ EXPORT_SYMBOL(do_trace_netlink_extack); @@ -400992,7 +516636,76 @@ index ada47e59647a0..974d32632ef41 100644 return group ? 1 << (group - 1) : 0; } -@@ -1871,6 +1873,11 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) +@@ -576,7 +578,9 @@ static int netlink_insert(struct sock *sk, u32 portid) + if (nlk_sk(sk)->bound) + goto err; + +- nlk_sk(sk)->portid = portid; ++ /* portid can be read locklessly from netlink_getname(). */ ++ WRITE_ONCE(nlk_sk(sk)->portid, portid); ++ + sock_hold(sk); + + err = __netlink_insert(table, sk); +@@ -1085,9 +1089,11 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, + return -EINVAL; + + if (addr->sa_family == AF_UNSPEC) { +- sk->sk_state = NETLINK_UNCONNECTED; +- nlk->dst_portid = 0; +- nlk->dst_group = 0; ++ /* paired with READ_ONCE() in netlink_getsockbyportid() */ ++ WRITE_ONCE(sk->sk_state, NETLINK_UNCONNECTED); ++ /* dst_portid and dst_group can be read locklessly */ ++ WRITE_ONCE(nlk->dst_portid, 0); ++ WRITE_ONCE(nlk->dst_group, 0); + return 0; + } + if (addr->sa_family != AF_NETLINK) +@@ -1108,9 +1114,11 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, + err = netlink_autobind(sock); + + if (err == 0) { +- sk->sk_state = NETLINK_CONNECTED; +- nlk->dst_portid = nladdr->nl_pid; +- nlk->dst_group = ffs(nladdr->nl_groups); ++ /* paired with READ_ONCE() in netlink_getsockbyportid() */ ++ WRITE_ONCE(sk->sk_state, NETLINK_CONNECTED); ++ /* dst_portid and dst_group can be read locklessly */ ++ WRITE_ONCE(nlk->dst_portid, nladdr->nl_pid); ++ WRITE_ONCE(nlk->dst_group, ffs(nladdr->nl_groups)); + } + + return err; +@@ -1127,10 +1135,12 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr, + nladdr->nl_pad = 0; + + if (peer) { +- nladdr->nl_pid = nlk->dst_portid; +- nladdr->nl_groups = netlink_group_mask(nlk->dst_group); ++ /* Paired with WRITE_ONCE() in netlink_connect() */ ++ nladdr->nl_pid = READ_ONCE(nlk->dst_portid); ++ nladdr->nl_groups = netlink_group_mask(READ_ONCE(nlk->dst_group)); + } else { +- nladdr->nl_pid = nlk->portid; ++ /* Paired with WRITE_ONCE() in netlink_insert() */ ++ nladdr->nl_pid = READ_ONCE(nlk->portid); + netlink_lock_table(); + nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0; + netlink_unlock_table(); +@@ -1157,8 +1167,9 @@ static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid) + + /* Don't bother queuing skb if kernel socket has no input function */ + nlk = nlk_sk(sock); +- if (sock->sk_state == NETLINK_CONNECTED && +- nlk->dst_portid != nlk_sk(ssk)->portid) { ++ /* dst_portid and sk_state can be changed in netlink_connect() */ ++ if (READ_ONCE(sock->sk_state) == NETLINK_CONNECTED && ++ READ_ONCE(nlk->dst_portid) != nlk_sk(ssk)->portid) { + sock_put(sock); + return ERR_PTR(-ECONNREFUSED); + } +@@ -1871,6 +1882,11 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; @@ -401004,7 +516717,19 @@ index ada47e59647a0..974d32632ef41 100644 err = scm_send(sock, msg, &scm, true); if (err < 0) return err; -@@ -1989,7 +1996,6 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, +@@ -1889,8 +1905,9 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) + goto out; + netlink_skb_flags |= NETLINK_SKB_DST; + } else { +- dst_portid = nlk->dst_portid; +- dst_group = nlk->dst_group; ++ /* Paired with WRITE_ONCE() in netlink_connect() */ ++ dst_portid = READ_ONCE(nlk->dst_portid); ++ dst_group = READ_ONCE(nlk->dst_group); + } + + /* Paired with WRITE_ONCE() in netlink_insert() */ +@@ -1989,7 +2006,6 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, copied = len; } @@ -401012,7 +516737,7 @@ index ada47e59647a0..974d32632ef41 100644 err = skb_copy_datagram_msg(data_skb, 0, msg, copied); if (msg->msg_name) { -@@ -2277,6 +2283,13 @@ static int netlink_dump(struct sock *sk) +@@ -2277,6 +2293,13 @@ static int netlink_dump(struct sock *sk) * single netdev. The outcome is MSG_TRUNC error. */ skb_reserve(skb, skb_tailroom(skb) - alloc_size); @@ -401089,7 +516814,7 @@ index 8d7c900e27f4c..87e3de0fde896 100644 static bool diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c -index 6d16e1ab1a8ab..e5c8a295e6406 100644 +index 6d16e1ab1a8ab..5c04da4cfbad0 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -298,7 +298,7 @@ static int nr_setsockopt(struct socket *sock, int level, int optname, @@ -401139,6 +516864,30 @@ index 6d16e1ab1a8ab..e5c8a295e6406 100644 return -EINVAL; nr->idle = opt * 60 * HZ; return 0; +@@ -400,6 +400,11 @@ static int nr_listen(struct socket *sock, int backlog) + struct sock *sk = sock->sk; + + lock_sock(sk); ++ if (sock->state != SS_UNCONNECTED) { ++ release_sock(sk); ++ return -EINVAL; ++ } ++ + if (sk->sk_state != TCP_LISTEN) { + memset(&nr_sk(sk)->user_addr, 0, AX25_ADDR_LEN); + sk->sk_max_ack_backlog = backlog; +diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c +index a8da88db7893f..4e7c968cde2dc 100644 +--- a/net/netrom/nr_timer.c ++++ b/net/netrom/nr_timer.c +@@ -121,6 +121,7 @@ static void nr_heartbeat_expiry(struct timer_list *t) + is accepted() it isn't 'dead' so doesn't get removed. */ + if (sock_flag(sk, SOCK_DESTROY) || + (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) { ++ sock_hold(sk); + bh_unlock_sock(sk); + nr_destroy_socket(sk); + goto out; diff --git a/net/nfc/core.c b/net/nfc/core.c index 3c645c1d99c9b..6ff3e10ff8e35 100644 --- a/net/nfc/core.c @@ -401321,6 +517070,18 @@ index 3c645c1d99c9b..6ff3e10ff8e35 100644 nfc_llcp_unregister_device(dev); mutex_lock(&nfc_devlist_mutex); +diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c +index eaeb2b1cfa6ac..fd43e75abd948 100644 +--- a/net/nfc/llcp_core.c ++++ b/net/nfc/llcp_core.c +@@ -159,6 +159,7 @@ static void local_cleanup(struct nfc_llcp_local *local) + cancel_work_sync(&local->rx_work); + cancel_work_sync(&local->timeout_work); + kfree_skb(local->rx_pending); ++ local->rx_pending = NULL; + del_timer_sync(&local->sdreq_timer); + cancel_work_sync(&local->sdreq_timeout_work); + nfc_llcp_free_sdp_tlv_list(&local->pending_sdreqs); diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 6cfd30fc07985..0b93a17b9f11f 100644 --- a/net/nfc/llcp_sock.c @@ -401338,7 +517099,7 @@ index 6cfd30fc07985..0b93a17b9f11f 100644 DECLARE_SOCKADDR(struct sockaddr_nfc_llcp *, addr, msg->msg_name); diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c -index 82ab39d80726e..189c9f428a3c2 100644 +index 82ab39d80726e..7b6cf9a44aea7 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -144,12 +144,15 @@ inline int nci_request(struct nci_dev *ndev, @@ -401373,6 +517134,15 @@ index 82ab39d80726e..189c9f428a3c2 100644 if (test_bit(NCI_UP, &ndev->flags)) { rc = -EALREADY; goto done; +@@ -534,7 +542,7 @@ static int nci_open_device(struct nci_dev *ndev) + skb_queue_purge(&ndev->tx_q); + + ndev->ops->close(ndev); +- ndev->flags = 0; ++ ndev->flags &= BIT(NCI_UNREG); + } + + done: @@ -545,9 +553,17 @@ done: static int nci_close_device(struct nci_dev *ndev) { @@ -401416,7 +517186,7 @@ index 82ab39d80726e..189c9f428a3c2 100644 destroy_workqueue(ndev->cmd_wq); diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c -index 6055dc9a82aa0..aa5e712adf078 100644 +index 6055dc9a82aa0..3d36ea5701f02 100644 --- a/net/nfc/nci/data.c +++ b/net/nfc/nci/data.c @@ -118,7 +118,7 @@ static int nci_queue_tx_data_frags(struct nci_dev *ndev, @@ -401428,6 +517198,18 @@ index 6055dc9a82aa0..aa5e712adf078 100644 if (skb_frag == NULL) { rc = -ENOMEM; goto free_exit; +@@ -279,8 +279,10 @@ void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb) + nci_plen(skb->data)); + + conn_info = nci_get_conn_info_by_conn_id(ndev, nci_conn_id(skb->data)); +- if (!conn_info) ++ if (!conn_info) { ++ kfree_skb(skb); + return; ++ } + + /* strip the nci data header */ + skb_pull(skb, NCI_DATA_HDR_SIZE); diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index e199912ee1e59..85b808fdcbc3a 100644 --- a/net/nfc/nci/hci.c @@ -401450,8 +517232,39 @@ index e199912ee1e59..85b808fdcbc3a 100644 if (!skb) return -ENOMEM; +diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c +index c5eacaac41aea..8f48b10619448 100644 +--- a/net/nfc/nci/ntf.c ++++ b/net/nfc/nci/ntf.c +@@ -240,6 +240,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev, + target->sens_res = nfca_poll->sens_res; + target->sel_res = nfca_poll->sel_res; + target->nfcid1_len = nfca_poll->nfcid1_len; ++ if (target->nfcid1_len > ARRAY_SIZE(target->nfcid1)) ++ return -EPROTO; + if (target->nfcid1_len > 0) { + memcpy(target->nfcid1, nfca_poll->nfcid1, + target->nfcid1_len); +@@ -248,6 +250,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev, + nfcb_poll = (struct rf_tech_specific_params_nfcb_poll *)params; + + target->sensb_res_len = nfcb_poll->sensb_res_len; ++ if (target->sensb_res_len > ARRAY_SIZE(target->sensb_res)) ++ return -EPROTO; + if (target->sensb_res_len > 0) { + memcpy(target->sensb_res, nfcb_poll->sensb_res, + target->sensb_res_len); +@@ -256,6 +260,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev, + nfcf_poll = (struct rf_tech_specific_params_nfcf_poll *)params; + + target->sensf_res_len = nfcf_poll->sensf_res_len; ++ if (target->sensf_res_len > ARRAY_SIZE(target->sensf_res)) ++ return -EPROTO; + if (target->sensf_res_len > 0) { + memcpy(target->sensf_res, nfcf_poll->sensf_res, + target->sensf_res_len); diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c -index 49089c50872e6..a207f0b8137b0 100644 +index 49089c50872e6..d928d5a24bbc1 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -636,8 +636,10 @@ static int nfc_genl_dump_devices_done(struct netlink_callback *cb) @@ -401498,6 +517311,103 @@ index 49089c50872e6..a207f0b8137b0 100644 return 0; } +@@ -1493,6 +1497,7 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info) + u32 dev_idx, se_idx; + u8 *apdu; + size_t apdu_len; ++ int rc; + + if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || + !info->attrs[NFC_ATTR_SE_INDEX] || +@@ -1506,25 +1511,37 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info) + if (!dev) + return -ENODEV; + +- if (!dev->ops || !dev->ops->se_io) +- return -ENOTSUPP; ++ if (!dev->ops || !dev->ops->se_io) { ++ rc = -EOPNOTSUPP; ++ goto put_dev; ++ } + + apdu_len = nla_len(info->attrs[NFC_ATTR_SE_APDU]); +- if (apdu_len == 0) +- return -EINVAL; ++ if (apdu_len == 0) { ++ rc = -EINVAL; ++ goto put_dev; ++ } + + apdu = nla_data(info->attrs[NFC_ATTR_SE_APDU]); +- if (!apdu) +- return -EINVAL; ++ if (!apdu) { ++ rc = -EINVAL; ++ goto put_dev; ++ } + + ctx = kzalloc(sizeof(struct se_io_ctx), GFP_KERNEL); +- if (!ctx) +- return -ENOMEM; ++ if (!ctx) { ++ rc = -ENOMEM; ++ goto put_dev; ++ } + + ctx->dev_idx = dev_idx; + ctx->se_idx = se_idx; + +- return nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx); ++ rc = nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx); ++ ++put_dev: ++ nfc_put_device(dev); ++ return rc; + } + + static int nfc_genl_vendor_cmd(struct sk_buff *skb, +@@ -1547,14 +1564,21 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb, + subcmd = nla_get_u32(info->attrs[NFC_ATTR_VENDOR_SUBCMD]); + + dev = nfc_get_device(dev_idx); +- if (!dev || !dev->vendor_cmds || !dev->n_vendor_cmds) ++ if (!dev) + return -ENODEV; + ++ if (!dev->vendor_cmds || !dev->n_vendor_cmds) { ++ err = -ENODEV; ++ goto put_dev; ++ } ++ + if (info->attrs[NFC_ATTR_VENDOR_DATA]) { + data = nla_data(info->attrs[NFC_ATTR_VENDOR_DATA]); + data_len = nla_len(info->attrs[NFC_ATTR_VENDOR_DATA]); +- if (data_len == 0) +- return -EINVAL; ++ if (data_len == 0) { ++ err = -EINVAL; ++ goto put_dev; ++ } + } else { + data = NULL; + data_len = 0; +@@ -1569,10 +1593,14 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb, + dev->cur_cmd_info = info; + err = cmd->doit(dev, data, data_len); + dev->cur_cmd_info = NULL; +- return err; ++ goto put_dev; + } + +- return -EOPNOTSUPP; ++ err = -EOPNOTSUPP; ++ ++put_dev: ++ nfc_put_device(dev); ++ return err; + } + + /* message building helper */ diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 076774034bb96..aca6e2b599c86 100644 --- a/net/openvswitch/actions.c @@ -401619,9 +517529,31 @@ index 076774034bb96..aca6e2b599c86 100644 dont_clone_flow_key = nla_get_u32(clone_arg); actions = nla_next(clone_arg, &rem); diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c -index 1b5eae57bc900..dc86f03309c10 100644 +index 1b5eae57bc900..7106ce231a2dd 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c +@@ -150,7 +150,7 @@ static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo) + static u32 ovs_ct_get_mark(const struct nf_conn *ct) + { + #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) +- return ct ? ct->mark : 0; ++ return ct ? READ_ONCE(ct->mark) : 0; + #else + return 0; + #endif +@@ -338,9 +338,9 @@ static int ovs_ct_set_mark(struct nf_conn *ct, struct sw_flow_key *key, + #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) + u32 new_mark; + +- new_mark = ct_mark | (ct->mark & ~(mask)); +- if (ct->mark != new_mark) { +- ct->mark = new_mark; ++ new_mark = ct_mark | (READ_ONCE(ct->mark) & ~(mask)); ++ if (READ_ONCE(ct->mark) != new_mark) { ++ WRITE_ONCE(ct->mark, new_mark); + if (nf_ct_is_confirmed(ct)) + nf_conntrack_event_cache(IPCT_MARK, ct); + key->ct.mark = new_mark; @@ -574,7 +574,7 @@ ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone, struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); @@ -401837,7 +517769,7 @@ index 1b5eae57bc900..dc86f03309c10 100644 err_free_ct: __ovs_ct_free_action(&ct_info); diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c -index 67ad08320886b..46ef1525b2e5e 100644 +index 67ad08320886b..0fc98e89a1149 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -251,10 +251,17 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) @@ -401873,7 +517805,96 @@ index 67ad08320886b..46ef1525b2e5e 100644 return err; } -@@ -1801,7 +1809,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) +@@ -938,6 +946,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) + struct sw_flow_mask mask; + struct sk_buff *reply; + struct datapath *dp; ++ struct sw_flow_key *key; + struct sw_flow_actions *acts; + struct sw_flow_match match; + u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); +@@ -965,30 +974,32 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) + } + + /* Extract key. */ +- ovs_match_init(&match, &new_flow->key, false, &mask); ++ key = kzalloc(sizeof(*key), GFP_KERNEL); ++ if (!key) { ++ error = -ENOMEM; ++ goto err_kfree_flow; ++ } ++ ++ ovs_match_init(&match, key, false, &mask); + error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], + a[OVS_FLOW_ATTR_MASK], log); + if (error) +- goto err_kfree_flow; ++ goto err_kfree_key; ++ ++ ovs_flow_mask_key(&new_flow->key, key, true, &mask); + + /* Extract flow identifier. */ + error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID], +- &new_flow->key, log); ++ key, log); + if (error) +- goto err_kfree_flow; +- +- /* unmasked key is needed to match when ufid is not used. */ +- if (ovs_identifier_is_key(&new_flow->id)) +- match.key = new_flow->id.unmasked_key; +- +- ovs_flow_mask_key(&new_flow->key, &new_flow->key, true, &mask); ++ goto err_kfree_key; + + /* Validate actions. */ + error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS], + &new_flow->key, &acts, log); + if (error) { + OVS_NLERR(log, "Flow actions may not be safe on all matching packets."); +- goto err_kfree_flow; ++ goto err_kfree_key; + } + + reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false, +@@ -1009,7 +1020,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) + if (ovs_identifier_is_ufid(&new_flow->id)) + flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id); + if (!flow) +- flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->key); ++ flow = ovs_flow_tbl_lookup(&dp->table, key); + if (likely(!flow)) { + rcu_assign_pointer(new_flow->sf_acts, acts); + +@@ -1079,6 +1090,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) + + if (reply) + ovs_notify(&dp_flow_genl_family, reply, info); ++ ++ kfree(key); + return 0; + + err_unlock_ovs: +@@ -1086,6 +1099,8 @@ err_unlock_ovs: + kfree_skb(reply); + err_kfree_acts: + ovs_nla_free_flow_actions(acts); ++err_kfree_key: ++ kfree(key); + err_kfree_flow: + ovs_flow_free(new_flow, false); + error: +@@ -1597,7 +1612,8 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb, + if (IS_ERR(dp)) + return; + +- WARN(dp->user_features, "Dropping previously announced user features\n"); ++ pr_warn("%s: Dropping previously announced user features\n", ++ ovs_dp_name(dp)); + dp->user_features = 0; + } + +@@ -1801,7 +1817,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_dp_reset_user_features(skb, info); } @@ -401882,7 +517903,7 @@ index 67ad08320886b..46ef1525b2e5e 100644 } err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, -@@ -1816,6 +1824,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) +@@ -1816,6 +1832,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_notify(&dp_datapath_genl_family, reply, info); return 0; @@ -402112,8 +518133,30 @@ index fd1f809e9bc1b..d77c21ff066c9 100644 if (err) nla_nest_cancel(skb, start); +diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c +index 896b8f5bc8853..67b471c666c7e 100644 +--- a/net/openvswitch/meter.c ++++ b/net/openvswitch/meter.c +@@ -450,7 +450,7 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info) + + err = attach_meter(meter_tbl, meter); + if (err) +- goto exit_unlock; ++ goto exit_free_old_meter; + + ovs_unlock(); + +@@ -473,6 +473,8 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info) + genlmsg_end(reply, ovs_reply_header); + return genlmsg_reply(reply, info); + ++exit_free_old_meter: ++ ovs_meter_free(old_meter); + exit_unlock: + ovs_unlock(); + nlmsg_free(reply); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c -index 2a2bc64f75cfd..968dac3fcf58a 100644 +index 2a2bc64f75cfd..7f9f2d0ef0e62 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1738,6 +1738,7 @@ static int fanout_add(struct sock *sk, struct fanout_args *args) @@ -402136,7 +518179,40 @@ index 2a2bc64f75cfd..968dac3fcf58a 100644 po->rollover = rollover; rollover = NULL; refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1); -@@ -2277,8 +2281,11 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, +@@ -1884,12 +1888,22 @@ oom: + + static void packet_parse_headers(struct sk_buff *skb, struct socket *sock) + { ++ int depth; ++ + if ((!skb->protocol || skb->protocol == htons(ETH_P_ALL)) && + sock->type == SOCK_RAW) { + skb_reset_mac_header(skb); + skb->protocol = dev_parse_header_protocol(skb); + } + ++ /* Move network header to the right position for VLAN tagged packets */ ++ if (likely(skb->dev->type == ARPHRD_ETHER) && ++ eth_type_vlan(skb->protocol) && ++ __vlan_get_protocol(skb, skb->protocol, &depth) != 0) { ++ if (pskb_may_pull(skb, depth)) ++ skb_set_network_header(skb, depth); ++ } ++ + skb_probe_transport_header(skb); + } + +@@ -2242,8 +2256,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, + if (skb->ip_summed == CHECKSUM_PARTIAL) + status |= TP_STATUS_CSUMNOTREADY; + else if (skb->pkt_type != PACKET_OUTGOING && +- (skb->ip_summed == CHECKSUM_COMPLETE || +- skb_csum_unnecessary(skb))) ++ skb_csum_unnecessary(skb)) + status |= TP_STATUS_CSUM_VALID; + + if (snaplen > res) +@@ -2277,8 +2290,11 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, copy_skb = skb_get(skb); skb_head = skb->data; } @@ -402149,7 +518225,7 @@ index 2a2bc64f75cfd..968dac3fcf58a 100644 } snaplen = po->rx_ring.frame_size - macoff; if ((int)snaplen < 0) { -@@ -2813,8 +2820,9 @@ tpacket_error: +@@ -2813,8 +2829,9 @@ tpacket_error: status = TP_STATUS_SEND_REQUEST; err = po->xmit(skb); @@ -402161,7 +518237,7 @@ index 2a2bc64f75cfd..968dac3fcf58a 100644 if (err && __packet_get_status(po, ph) == TP_STATUS_AVAILABLE) { /* skb was destructed already */ -@@ -2981,8 +2989,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) +@@ -2981,8 +2998,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (err) goto out_free; @@ -402172,9 +518248,27 @@ index 2a2bc64f75cfd..968dac3fcf58a 100644 err = -EINVAL; goto out_free; } -@@ -3015,8 +3023,12 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) - skb->no_fcs = 1; +@@ -3001,6 +3018,11 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) + skb->mark = sockc.mark; + skb->tstamp = sockc.transmit_time; + ++ if (unlikely(extra_len == 4)) ++ skb->no_fcs = 1; ++ ++ packet_parse_headers(skb, sock); ++ + if (has_vnet_hdr) { + err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le()); + if (err) +@@ -3009,14 +3031,13 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) + virtio_net_hdr_set_proto(skb, &vnet_hdr); + } +- packet_parse_headers(skb, sock); +- +- if (unlikely(extra_len == 4)) +- skb->no_fcs = 1; +- err = po->xmit(skb); - if (err > 0 && (err = net_xmit_errno(err)) != 0) - goto out_unlock; @@ -402187,7 +518281,7 @@ index 2a2bc64f75cfd..968dac3fcf58a 100644 dev_put(dev); -@@ -3323,6 +3335,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, +@@ -3323,6 +3344,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, po->prot_hook.func = packet_rcv_spkt; po->prot_hook.af_packet_priv = sk; @@ -402195,7 +518289,7 @@ index 2a2bc64f75cfd..968dac3fcf58a 100644 if (proto) { po->prot_hook.type = proto; -@@ -3429,6 +3442,8 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, +@@ -3429,6 +3451,8 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, sock_recv_ts_and_drops(msg, sk, skb); if (msg->msg_name) { @@ -402204,7 +518298,7 @@ index 2a2bc64f75cfd..968dac3fcf58a 100644 int copy_len; /* If the address length field is there to be filled -@@ -3451,6 +3466,10 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, +@@ -3451,6 +3475,10 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, msg->msg_namelen = sizeof(struct sockaddr_ll); } } @@ -402215,7 +518309,17 @@ index 2a2bc64f75cfd..968dac3fcf58a 100644 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len); } -@@ -3904,7 +3923,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, +@@ -3461,8 +3489,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + if (skb->ip_summed == CHECKSUM_PARTIAL) + aux.tp_status |= TP_STATUS_CSUMNOTREADY; + else if (skb->pkt_type != PACKET_OUTGOING && +- (skb->ip_summed == CHECKSUM_COMPLETE || +- skb_csum_unnecessary(skb))) ++ skb_csum_unnecessary(skb)) + aux.tp_status |= TP_STATUS_CSUM_VALID; + + aux.tp_len = origlen; +@@ -3904,7 +3931,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, } case PACKET_FANOUT_DATA: { @@ -402225,7 +518329,7 @@ index 2a2bc64f75cfd..968dac3fcf58a 100644 return -EINVAL; return fanout_set_data(po, optval, optlen); -@@ -4457,9 +4477,10 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, +@@ -4457,9 +4485,10 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, } out_free_pg_vec: @@ -402289,6 +518393,22 @@ index fa611678af052..49e7cab43d24c 100644 dev_dbg(qdev->dev, "Qualcomm MHI QRTR driver probed\n"); return 0; +diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c +index 1990d496fcfc0..e595079c2cafe 100644 +--- a/net/qrtr/ns.c ++++ b/net/qrtr/ns.c +@@ -83,7 +83,10 @@ static struct qrtr_node *node_get(unsigned int node_id) + + node->id = node_id; + +- radix_tree_insert(&nodes, node_id, node); ++ if (radix_tree_insert(&nodes, node_id, node)) { ++ kfree(node); ++ return NULL; ++ } + + return node; + } diff --git a/net/rds/connection.c b/net/rds/connection.c index a3bc4b54d4910..b4cc699c5fad3 100644 --- a/net/rds/connection.c @@ -402313,6 +518433,23 @@ index 6fdedd9dbbc28..cfbf0e129cba5 100644 /* We don't use wait_on_bit()/wake_up_bit() because our waking is in a * hot path and finding waiters is very rare. We don't want to walk +diff --git a/net/rds/message.c b/net/rds/message.c +index 799034e0f513d..b363ef13c75ef 100644 +--- a/net/rds/message.c ++++ b/net/rds/message.c +@@ -104,9 +104,9 @@ static void rds_rm_zerocopy_callback(struct rds_sock *rs, + spin_lock_irqsave(&q->lock, flags); + head = &q->zcookie_head; + if (!list_empty(head)) { +- info = list_entry(head, struct rds_msg_zcopy_info, +- rs_zcookie_next); +- if (info && rds_zcookie_add(info, cookie)) { ++ info = list_first_entry(head, struct rds_msg_zcopy_info, ++ rs_zcookie_next); ++ if (rds_zcookie_add(info, cookie)) { + spin_unlock_irqrestore(&q->lock, flags); + kfree(rds_info_from_znotifier(znotif)); + /* caller invokes rds_wake_sk_sleep() */ diff --git a/net/rds/tcp.c b/net/rds/tcp.c index abf19c0e3ba0b..b560d06e6d96d 100644 --- a/net/rds/tcp.c @@ -402455,7 +518592,7 @@ index ac15a944573f7..068c7bcd30c94 100644 }; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c -index cf7d974e0f619..29a208ed8fb88 100644 +index cf7d974e0f619..86c93cf1744b0 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -191,6 +191,7 @@ static void rose_kill_by_device(struct net_device *dev) @@ -402466,7 +518603,31 @@ index cf7d974e0f619..29a208ed8fb88 100644 rose->device = NULL; } } -@@ -591,6 +592,8 @@ static struct sock *rose_make_new(struct sock *osk) +@@ -486,6 +487,12 @@ static int rose_listen(struct socket *sock, int backlog) + { + struct sock *sk = sock->sk; + ++ lock_sock(sk); ++ if (sock->state != SS_UNCONNECTED) { ++ release_sock(sk); ++ return -EINVAL; ++ } ++ + if (sk->sk_state != TCP_LISTEN) { + struct rose_sock *rose = rose_sk(sk); + +@@ -495,8 +502,10 @@ static int rose_listen(struct socket *sock, int backlog) + memset(rose->dest_digis, 0, AX25_ADDR_LEN * ROSE_MAX_DIGIS); + sk->sk_max_ack_backlog = backlog; + sk->sk_state = TCP_LISTEN; ++ release_sock(sk); + return 0; + } ++ release_sock(sk); + + return -EOPNOTSUPP; + } +@@ -591,6 +600,8 @@ static struct sock *rose_make_new(struct sock *osk) rose->idle = orose->idle; rose->defer = orose->defer; rose->device = orose->device; @@ -402475,7 +518636,7 @@ index cf7d974e0f619..29a208ed8fb88 100644 rose->qbitincl = orose->qbitincl; return sk; -@@ -644,6 +647,7 @@ static int rose_release(struct socket *sock) +@@ -644,6 +655,7 @@ static int rose_release(struct socket *sock) break; } @@ -402483,7 +518644,7 @@ index cf7d974e0f619..29a208ed8fb88 100644 sock->sk = NULL; release_sock(sk); sock_put(sk); -@@ -720,7 +724,6 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le +@@ -720,7 +732,6 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le struct rose_sock *rose = rose_sk(sk); struct sockaddr_rose *addr = (struct sockaddr_rose *)uaddr; unsigned char cause, diagnostic; @@ -402491,7 +518652,7 @@ index cf7d974e0f619..29a208ed8fb88 100644 ax25_uid_assoc *user; int n, err = 0; -@@ -777,9 +780,12 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le +@@ -777,9 +788,12 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le } if (sock_flag(sk, SOCK_ZAPPED)) { /* Must bind first - autobinding in this may or may not work */ @@ -402505,7 +518666,7 @@ index cf7d974e0f619..29a208ed8fb88 100644 err = -ENETUNREACH; goto out_release; } -@@ -787,6 +793,7 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le +@@ -787,6 +801,7 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le user = ax25_findbyuid(current_euid()); if (!user) { err = -EINVAL; @@ -402513,6 +518674,20 @@ index cf7d974e0f619..29a208ed8fb88 100644 goto out_release; } +diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c +index f6102e6f51617..730d2205f1976 100644 +--- a/net/rose/rose_link.c ++++ b/net/rose/rose_link.c +@@ -236,6 +236,9 @@ void rose_transmit_clear_request(struct rose_neigh *neigh, unsigned int lci, uns + unsigned char *dptr; + int len; + ++ if (!neigh->dev) ++ return; ++ + len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN + 3; + + if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL) diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c index 11c45c8c6c164..036d92c0ad794 100644 --- a/net/rose/rose_loopback.c @@ -402690,11 +518865,39 @@ index b3138fc2e552e..f06ddbed3fed6 100644 bh_unlock_sock(sk); + sock_put(sk); } +diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c +index 2b5f89713e365..ceba28e9dce62 100644 +--- a/net/rxrpc/af_rxrpc.c ++++ b/net/rxrpc/af_rxrpc.c +@@ -351,7 +351,7 @@ static void rxrpc_dummy_notify_rx(struct sock *sk, struct rxrpc_call *rxcall, + */ + void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call) + { +- _enter("%d{%d}", call->debug_id, atomic_read(&call->usage)); ++ _enter("%d{%d}", call->debug_id, refcount_read(&call->ref)); + + mutex_lock(&call->user_mutex); + rxrpc_release_call(rxrpc_sk(sock->sk), call); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h -index 7bd6f8a66a3ef..f2e3fb77a02d3 100644 +index 7bd6f8a66a3ef..e0123efa2a623 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h -@@ -68,7 +68,7 @@ struct rxrpc_net { +@@ -15,14 +15,6 @@ + #include <keys/rxrpc-type.h> + #include "protocol.h" + +-#if 0 +-#define CHECK_SLAB_OKAY(X) \ +- BUG_ON(atomic_read((X)) >> (sizeof(atomic_t) - 2) == \ +- (POISON_FREE << 8 | POISON_FREE)) +-#else +-#define CHECK_SLAB_OKAY(X) do {} while (0) +-#endif +- + #define FCRYPT_BSIZE 8 + struct rxrpc_crypt { + union { +@@ -68,7 +60,7 @@ struct rxrpc_net { struct proc_dir_entry *proc_net; /* Subdir in /proc/net */ u32 epoch; /* Local epoch for detecting local-end reset */ struct list_head calls; /* List of calls active in this namespace */ @@ -402703,7 +518906,65 @@ index 7bd6f8a66a3ef..f2e3fb77a02d3 100644 atomic_t nr_calls; /* Count of allocated calls */ atomic_t nr_conns; -@@ -676,13 +676,12 @@ struct rxrpc_call { +@@ -88,7 +80,7 @@ struct rxrpc_net { + struct work_struct client_conn_reaper; + struct timer_list client_conn_reap_timer; + +- struct list_head local_endpoints; ++ struct hlist_head local_endpoints; + struct mutex local_mutex; /* Lock for ->local_endpoints */ + + DECLARE_HASHTABLE (peer_hash, 10); +@@ -279,9 +271,9 @@ struct rxrpc_security { + struct rxrpc_local { + struct rcu_head rcu; + atomic_t active_users; /* Number of users of the local endpoint */ +- atomic_t usage; /* Number of references to the structure */ ++ refcount_t ref; /* Number of references to the structure */ + struct rxrpc_net *rxnet; /* The network ns in which this resides */ +- struct list_head link; ++ struct hlist_node link; + struct socket *socket; /* my UDP socket */ + struct work_struct processor; + struct rxrpc_sock __rcu *service; /* Service(s) listening on this endpoint */ +@@ -304,7 +296,7 @@ struct rxrpc_local { + */ + struct rxrpc_peer { + struct rcu_head rcu; /* This must be first */ +- atomic_t usage; ++ refcount_t ref; + unsigned long hash_key; + struct hlist_node hash_link; + struct rxrpc_local *local; +@@ -406,7 +398,8 @@ enum rxrpc_conn_proto_state { + */ + struct rxrpc_bundle { + struct rxrpc_conn_parameters params; +- atomic_t usage; ++ refcount_t ref; ++ atomic_t active; /* Number of active users */ + unsigned int debug_id; + bool try_upgrade; /* True if the bundle is attempting upgrade */ + bool alloc_conn; /* True if someone's getting a conn */ +@@ -427,7 +420,7 @@ struct rxrpc_connection { + struct rxrpc_conn_proto proto; + struct rxrpc_conn_parameters params; + +- atomic_t usage; ++ refcount_t ref; + struct rcu_head rcu; + struct list_head cache_link; + +@@ -609,7 +602,7 @@ struct rxrpc_call { + int error; /* Local error incurred */ + enum rxrpc_call_state state; /* current state of call */ + enum rxrpc_call_completion completion; /* Call completion condition */ +- atomic_t usage; ++ refcount_t ref; + u16 service_id; /* service ID */ + u8 security_ix; /* Security type */ + enum rxrpc_interruptibility interruptibility; /* At what point call may be interrupted */ +@@ -676,13 +669,12 @@ struct rxrpc_call { spinlock_t input_lock; /* Lock for packet input to this call */ @@ -402721,7 +518982,7 @@ index 7bd6f8a66a3ef..f2e3fb77a02d3 100644 /* RTT management */ rxrpc_serial_t rtt_serial[4]; /* Serial number of DATA or PING sent */ -@@ -692,8 +691,10 @@ struct rxrpc_call { +@@ -692,8 +684,10 @@ struct rxrpc_call { #define RXRPC_CALL_RTT_AVAIL_MASK 0xf #define RXRPC_CALL_RTT_PEND_SHIFT 8 @@ -402733,7 +518994,7 @@ index 7bd6f8a66a3ef..f2e3fb77a02d3 100644 rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */ rxrpc_seq_t acks_lost_top; /* tx_top at the time lost-ack ping sent */ rxrpc_serial_t acks_lost_ping; /* Serial number of probe ACK */ -@@ -777,14 +778,12 @@ void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool, bool, +@@ -777,14 +771,12 @@ void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool, bool, enum rxrpc_propose_ack_trace); void rxrpc_process_call(struct work_struct *); @@ -402754,7 +519015,7 @@ index 7bd6f8a66a3ef..f2e3fb77a02d3 100644 /* * call_object.c -@@ -808,6 +807,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *); +@@ -808,6 +800,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *); bool __rxrpc_queue_call(struct rxrpc_call *); bool rxrpc_queue_call(struct rxrpc_call *); void rxrpc_see_call(struct rxrpc_call *); @@ -402762,7 +519023,7 @@ index 7bd6f8a66a3ef..f2e3fb77a02d3 100644 void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_cleanup_call(struct rxrpc_call *); -@@ -990,6 +990,7 @@ void rxrpc_send_keepalive(struct rxrpc_peer *); +@@ -990,6 +983,7 @@ void rxrpc_send_keepalive(struct rxrpc_peer *); /* * peer_event.c */ @@ -402770,10 +519031,36 @@ index 7bd6f8a66a3ef..f2e3fb77a02d3 100644 void rxrpc_error_report(struct sock *); void rxrpc_peer_keepalive_worker(struct work_struct *); +@@ -1015,6 +1009,7 @@ void rxrpc_put_peer_locked(struct rxrpc_peer *); + extern const struct seq_operations rxrpc_call_seq_ops; + extern const struct seq_operations rxrpc_connection_seq_ops; + extern const struct seq_operations rxrpc_peer_seq_ops; ++extern const struct seq_operations rxrpc_local_seq_ops; + + /* + * recvmsg.c diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c -index 1ae90fb979362..8b24ffbc72efb 100644 +index 1ae90fb979362..99e10eea37321 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c +@@ -91,7 +91,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, + (head + 1) & (size - 1)); + + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service, +- atomic_read(&conn->usage), here); ++ refcount_read(&conn->ref), here); + } + + /* Now it gets complicated, because calls get registered with the +@@ -104,7 +104,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, + call->state = RXRPC_CALL_SERVER_PREALLOC; + + trace_rxrpc_call(call->debug_id, rxrpc_call_new_service, +- atomic_read(&call->usage), ++ refcount_read(&call->ref), + here, (const void *)user_call_ID); + + write_lock(&rx->call_lock); @@ -140,9 +140,9 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, write_unlock(&rx->call_lock); @@ -402853,7 +519140,7 @@ index 6be2672a65eab..2a93e7b5fbd05 100644 goto recheck_state; } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c -index 4eb91d958a48d..d674d90e70313 100644 +index 4eb91d958a48d..6401cdf7a6246 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -53,10 +53,30 @@ static void rxrpc_call_timer_expired(struct timer_list *t) @@ -402864,9 +519151,9 @@ index 4eb91d958a48d..d674d90e70313 100644 + __rxrpc_queue_call(call); + } else { + rxrpc_put_call(call, rxrpc_call_put); -+ } -+} -+ + } + } + +void rxrpc_reduce_call_timer(struct rxrpc_call *call, + unsigned long expire_at, + unsigned long now, @@ -402876,9 +519163,9 @@ index 4eb91d958a48d..d674d90e70313 100644 + trace_rxrpc_timer(call, why, now); + if (timer_reduce(&call->timer, expire_at)) + rxrpc_put_call(call, rxrpc_call_put_notimer); - } - } - ++ } ++} ++ +void rxrpc_delete_call_timer(struct rxrpc_call *call) +{ + if (del_timer_sync(&call->timer)) @@ -402888,6 +519175,24 @@ index 4eb91d958a48d..d674d90e70313 100644 static struct lock_class_key rxrpc_call_user_mutex_lock_class_key; /* +@@ -92,7 +112,7 @@ struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *rx, + found_extant_call: + rxrpc_get_call(call, rxrpc_call_got); + read_unlock(&rx->call_lock); +- _leave(" = %p [%d]", call, atomic_read(&call->usage)); ++ _leave(" = %p [%d]", call, refcount_read(&call->ref)); + return call; + } + +@@ -140,7 +160,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, + spin_lock_init(&call->notify_lock); + spin_lock_init(&call->input_lock); + rwlock_init(&call->state_lock); +- atomic_set(&call->usage, 1); ++ refcount_set(&call->ref, 1); + call->debug_id = debug_id; + call->tx_total_len = -1; + call->next_rx_timo = 20 * HZ; @@ -265,8 +285,10 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, _enter("%p,%lx", rx, p->user_call_ID); @@ -402900,6 +519205,15 @@ index 4eb91d958a48d..d674d90e70313 100644 call = rxrpc_alloc_client_call(rx, srx, gfp, debug_id); if (IS_ERR(call)) { +@@ -279,7 +301,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, + call->interruptibility = p->interruptibility; + call->tx_total_len = p->tx_total_len; + trace_rxrpc_call(call->debug_id, rxrpc_call_new_client, +- atomic_read(&call->usage), ++ refcount_read(&call->ref), + here, (const void *)p->user_call_ID); + if (p->kernel) + __set_bit(RXRPC_CALL_KERNEL, &call->flags); @@ -317,9 +339,9 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, write_unlock(&rx->call_lock); @@ -402913,25 +519227,106 @@ index 4eb91d958a48d..d674d90e70313 100644 /* From this point on, the call is protected by its own lock. */ release_sock(&rx->sk); -@@ -463,6 +485,17 @@ void rxrpc_see_call(struct rxrpc_call *call) +@@ -332,7 +354,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, + goto error_attached_to_socket; + + trace_rxrpc_call(call->debug_id, rxrpc_call_connected, +- atomic_read(&call->usage), here, NULL); ++ refcount_read(&call->ref), here, NULL); + + rxrpc_start_call_timer(call); + +@@ -352,7 +374,7 @@ error_dup_user_ID: + __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, + RX_CALL_DEAD, -EEXIST); + trace_rxrpc_call(call->debug_id, rxrpc_call_error, +- atomic_read(&call->usage), here, ERR_PTR(-EEXIST)); ++ refcount_read(&call->ref), here, ERR_PTR(-EEXIST)); + rxrpc_release_call(rx, call); + mutex_unlock(&call->user_mutex); + rxrpc_put_call(call, rxrpc_call_put); +@@ -366,7 +388,7 @@ error_dup_user_ID: + */ + error_attached_to_socket: + trace_rxrpc_call(call->debug_id, rxrpc_call_error, +- atomic_read(&call->usage), here, ERR_PTR(ret)); ++ refcount_read(&call->ref), here, ERR_PTR(ret)); + set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); + __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, + RX_CALL_DEAD, ret); +@@ -422,8 +444,9 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, + bool rxrpc_queue_call(struct rxrpc_call *call) + { + const void *here = __builtin_return_address(0); +- int n = atomic_fetch_add_unless(&call->usage, 1, 0); +- if (n == 0) ++ int n; ++ ++ if (!__refcount_inc_not_zero(&call->ref, &n)) + return false; + if (rxrpc_queue_work(&call->processor)) + trace_rxrpc_call(call->debug_id, rxrpc_call_queued, n + 1, +@@ -439,7 +462,7 @@ bool rxrpc_queue_call(struct rxrpc_call *call) + bool __rxrpc_queue_call(struct rxrpc_call *call) + { + const void *here = __builtin_return_address(0); +- int n = atomic_read(&call->usage); ++ int n = refcount_read(&call->ref); + ASSERTCMP(n, >=, 1); + if (rxrpc_queue_work(&call->processor)) + trace_rxrpc_call(call->debug_id, rxrpc_call_queued_ref, n, +@@ -456,22 +479,34 @@ void rxrpc_see_call(struct rxrpc_call *call) + { + const void *here = __builtin_return_address(0); + if (call) { +- int n = atomic_read(&call->usage); ++ int n = refcount_read(&call->ref); + + trace_rxrpc_call(call->debug_id, rxrpc_call_seen, n, + here, NULL); } } +bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) +{ + const void *here = __builtin_return_address(0); -+ int n = atomic_fetch_add_unless(&call->usage, 1, 0); ++ int n; + -+ if (n == 0) ++ if (!__refcount_inc_not_zero(&call->ref, &n)) + return false; -+ trace_rxrpc_call(call->debug_id, op, n, here, NULL); ++ trace_rxrpc_call(call->debug_id, op, n + 1, here, NULL); + return true; +} + /* * Note the addition of a ref on a call. */ -@@ -510,8 +543,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) + void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) + { + const void *here = __builtin_return_address(0); +- int n = atomic_inc_return(&call->usage); ++ int n; + +- trace_rxrpc_call(call->debug_id, op, n, here, NULL); ++ __refcount_inc(&call->ref, &n); ++ trace_rxrpc_call(call->debug_id, op, n + 1, here, NULL); + } + + /* +@@ -496,10 +531,10 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) + struct rxrpc_connection *conn = call->conn; + bool put = false; + +- _enter("{%d,%d}", call->debug_id, atomic_read(&call->usage)); ++ _enter("{%d,%d}", call->debug_id, refcount_read(&call->ref)); + + trace_rxrpc_call(call->debug_id, rxrpc_call_release, +- atomic_read(&call->usage), ++ refcount_read(&call->ref), + here, (const void *)call->flags); + + ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); +@@ -510,8 +545,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) spin_unlock_bh(&call->lock); rxrpc_put_call_slot(call); @@ -402941,7 +519336,22 @@ index 4eb91d958a48d..d674d90e70313 100644 /* Make sure we don't get any more notifications */ write_lock_bh(&rx->recvmsg_lock); -@@ -601,9 +633,9 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op) +@@ -589,21 +623,21 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op) + struct rxrpc_net *rxnet = call->rxnet; + const void *here = __builtin_return_address(0); + unsigned int debug_id = call->debug_id; ++ bool dead; + int n; + + ASSERT(call != NULL); + +- n = atomic_dec_return(&call->usage); ++ dead = __refcount_dec_and_test(&call->ref, &n); + trace_rxrpc_call(debug_id, op, n, here, NULL); +- ASSERTCMP(n, >=, 0); +- if (n == 0) { ++ if (dead) { + _debug("call %d dead", call->debug_id); ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); if (!list_empty(&call->link)) { @@ -402953,7 +519363,7 @@ index 4eb91d958a48d..d674d90e70313 100644 } rxrpc_cleanup_call(call); -@@ -618,6 +650,8 @@ static void rxrpc_destroy_call(struct work_struct *work) +@@ -618,6 +652,8 @@ static void rxrpc_destroy_call(struct work_struct *work) struct rxrpc_call *call = container_of(work, struct rxrpc_call, processor); struct rxrpc_net *rxnet = call->rxnet; @@ -402962,7 +519372,7 @@ index 4eb91d958a48d..d674d90e70313 100644 rxrpc_put_connection(call->conn); rxrpc_put_peer(call->peer); kfree(call->rxtx_buffer); -@@ -652,8 +686,6 @@ void rxrpc_cleanup_call(struct rxrpc_call *call) +@@ -652,8 +688,6 @@ void rxrpc_cleanup_call(struct rxrpc_call *call) memset(&call->sock_node, 0xcd, sizeof(call->sock_node)); @@ -402971,7 +519381,7 @@ index 4eb91d958a48d..d674d90e70313 100644 ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags)); -@@ -675,7 +707,7 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet) +@@ -675,7 +709,7 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet) _enter(""); if (!list_empty(&rxnet->calls)) { @@ -402980,7 +519390,12 @@ index 4eb91d958a48d..d674d90e70313 100644 while (!list_empty(&rxnet->calls)) { call = list_entry(rxnet->calls.next, -@@ -690,12 +722,12 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet) +@@ -686,16 +720,16 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet) + list_del_init(&call->link); + + pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n", +- call, atomic_read(&call->usage), ++ call, refcount_read(&call->ref), rxrpc_call_states[call->state], call->flags, call->events); @@ -402997,10 +519412,43 @@ index 4eb91d958a48d..d674d90e70313 100644 atomic_dec(&rxnet->nr_calls); diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c -index dbea0bfee48e9..8120138dac018 100644 +index dbea0bfee48e9..bdb335cb2d057 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c -@@ -135,16 +135,20 @@ struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *bundle) +@@ -40,6 +40,8 @@ __read_mostly unsigned long rxrpc_conn_idle_client_fast_expiry = 2 * HZ; + DEFINE_IDR(rxrpc_client_conn_ids); + static DEFINE_SPINLOCK(rxrpc_conn_id_lock); + ++static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle); ++ + /* + * Get a connection ID and epoch for a client connection from the global pool. + * The connection struct pointer is then recorded in the idr radix tree. The +@@ -102,7 +104,7 @@ void rxrpc_destroy_client_conn_ids(void) + if (!idr_is_empty(&rxrpc_client_conn_ids)) { + idr_for_each_entry(&rxrpc_client_conn_ids, conn, id) { + pr_err("AF_RXRPC: Leaked client conn %p {%d}\n", +- conn, atomic_read(&conn->usage)); ++ conn, refcount_read(&conn->ref)); + } + BUG(); + } +@@ -122,7 +124,8 @@ static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_conn_parameters *cp, + if (bundle) { + bundle->params = *cp; + rxrpc_get_peer(bundle->params.peer); +- atomic_set(&bundle->usage, 1); ++ refcount_set(&bundle->ref, 1); ++ atomic_set(&bundle->active, 1); + spin_lock_init(&bundle->channel_lock); + INIT_LIST_HEAD(&bundle->waiting_calls); + } +@@ -131,20 +134,27 @@ static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_conn_parameters *cp, + + struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *bundle) + { +- atomic_inc(&bundle->usage); ++ refcount_inc(&bundle->ref); return bundle; } @@ -403013,19 +519461,42 @@ index dbea0bfee48e9..8120138dac018 100644 void rxrpc_put_bundle(struct rxrpc_bundle *bundle) { unsigned int d = bundle->debug_id; - unsigned int u = atomic_dec_return(&bundle->usage); +- unsigned int u = atomic_dec_return(&bundle->usage); ++ bool dead; ++ int r; - _debug("PUT B=%x %u", d, u); +- _debug("PUT B=%x %u", d, u); - if (u == 0) { - rxrpc_put_peer(bundle->params.peer); - kfree(bundle); - } -+ if (u == 0) ++ dead = __refcount_dec_and_test(&bundle->ref, &r); ++ ++ _debug("PUT B=%x %d", d, r - 1); ++ if (dead) + rxrpc_free_bundle(bundle); } /* -@@ -328,7 +332,7 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c +@@ -165,7 +175,7 @@ rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp) + return ERR_PTR(-ENOMEM); + } + +- atomic_set(&conn->usage, 1); ++ refcount_set(&conn->ref, 1); + conn->bundle = bundle; + conn->params = bundle->params; + conn->out_clientflag = RXRPC_CLIENT_INITIATED; +@@ -191,7 +201,7 @@ rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp) + key_get(conn->params.key); + + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_client, +- atomic_read(&conn->usage), ++ refcount_read(&conn->ref), + __builtin_return_address(0)); + + atomic_inc(&rxnet->nr_client_conns); +@@ -328,9 +338,10 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c return candidate; found_bundle_free: @@ -403033,11 +519504,121 @@ index dbea0bfee48e9..8120138dac018 100644 + rxrpc_free_bundle(candidate); found_bundle: rxrpc_get_bundle(bundle); ++ atomic_inc(&bundle->active); spin_unlock(&local->client_bundles_lock); + _leave(" = %u [found]", bundle->debug_id); + return bundle; +@@ -428,6 +439,7 @@ static void rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle, gfp_t gfp) + if (old) + trace_rxrpc_client(old, -1, rxrpc_client_replace); + candidate->bundle_shift = shift; ++ atomic_inc(&bundle->active); + bundle->conns[i] = candidate; + for (j = 0; j < RXRPC_MAXCALLS; j++) + set_bit(shift + j, &bundle->avail_chans); +@@ -718,6 +730,7 @@ granted_channel: + smp_rmb(); + + out_put_bundle: ++ rxrpc_deactivate_bundle(bundle); + rxrpc_put_bundle(bundle); + out: + _leave(" = %d", ret); +@@ -893,9 +906,8 @@ out: + static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) + { + struct rxrpc_bundle *bundle = conn->bundle; +- struct rxrpc_local *local = bundle->params.local; + unsigned int bindex; +- bool need_drop = false, need_put = false; ++ bool need_drop = false; + int i; + + _enter("C=%x", conn->debug_id); +@@ -914,15 +926,22 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) + } + spin_unlock(&bundle->channel_lock); + +- /* If there are no more connections, remove the bundle */ +- if (!bundle->avail_chans) { +- _debug("maybe unbundle"); +- spin_lock(&local->client_bundles_lock); ++ if (need_drop) { ++ rxrpc_deactivate_bundle(bundle); ++ rxrpc_put_connection(conn); ++ } ++} + +- for (i = 0; i < ARRAY_SIZE(bundle->conns); i++) +- if (bundle->conns[i]) +- break; +- if (i == ARRAY_SIZE(bundle->conns) && !bundle->params.exclusive) { ++/* ++ * Drop the active count on a bundle. ++ */ ++static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle) ++{ ++ struct rxrpc_local *local = bundle->params.local; ++ bool need_put = false; ++ ++ if (atomic_dec_and_lock(&bundle->active, &local->client_bundles_lock)) { ++ if (!bundle->params.exclusive) { + _debug("erase bundle"); + rb_erase(&bundle->local_node, &local->client_bundles); + need_put = true; +@@ -932,10 +951,6 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) + if (need_put) + rxrpc_put_bundle(bundle); + } +- +- if (need_drop) +- rxrpc_put_connection(conn); +- _leave(""); + } + + /* +@@ -962,14 +977,13 @@ void rxrpc_put_client_conn(struct rxrpc_connection *conn) + { + const void *here = __builtin_return_address(0); + unsigned int debug_id = conn->debug_id; +- int n; ++ bool dead; ++ int r; + +- n = atomic_dec_return(&conn->usage); +- trace_rxrpc_conn(debug_id, rxrpc_conn_put_client, n, here); +- if (n <= 0) { +- ASSERTCMP(n, >=, 0); ++ dead = __refcount_dec_and_test(&conn->ref, &r); ++ trace_rxrpc_conn(debug_id, rxrpc_conn_put_client, r - 1, here); ++ if (dead) + rxrpc_kill_client_conn(conn); +- } + } + + /* diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c -index b2159dbf5412c..660cd9b1a4658 100644 +index b2159dbf5412c..22089e37e97f0 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c +@@ -104,7 +104,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, + goto not_found; + *_peer = peer; + conn = rxrpc_find_service_conn_rcu(peer, skb); +- if (!conn || atomic_read(&conn->usage) == 0) ++ if (!conn || refcount_read(&conn->ref) == 0) + goto not_found; + _leave(" = %p", conn); + return conn; +@@ -114,7 +114,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, + */ + conn = idr_find(&rxrpc_client_conn_ids, + sp->hdr.cid >> RXRPC_CIDSHIFT); +- if (!conn || atomic_read(&conn->usage) == 0) { ++ if (!conn || refcount_read(&conn->ref) == 0) { + _debug("no conn"); + goto not_found; + } @@ -183,7 +183,7 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn, chan->last_type = RXRPC_PACKET_TYPE_ABORT; break; @@ -403047,8 +519628,177 @@ index b2159dbf5412c..660cd9b1a4658 100644 chan->last_type = RXRPC_PACKET_TYPE_ABORT; break; } +@@ -263,11 +263,12 @@ void rxrpc_kill_connection(struct rxrpc_connection *conn) + bool rxrpc_queue_conn(struct rxrpc_connection *conn) + { + const void *here = __builtin_return_address(0); +- int n = atomic_fetch_add_unless(&conn->usage, 1, 0); +- if (n == 0) ++ int r; ++ ++ if (!__refcount_inc_not_zero(&conn->ref, &r)) + return false; + if (rxrpc_queue_work(&conn->processor)) +- trace_rxrpc_conn(conn->debug_id, rxrpc_conn_queued, n + 1, here); ++ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_queued, r + 1, here); + else + rxrpc_put_connection(conn); + return true; +@@ -280,7 +281,7 @@ void rxrpc_see_connection(struct rxrpc_connection *conn) + { + const void *here = __builtin_return_address(0); + if (conn) { +- int n = atomic_read(&conn->usage); ++ int n = refcount_read(&conn->ref); + + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_seen, n, here); + } +@@ -292,9 +293,10 @@ void rxrpc_see_connection(struct rxrpc_connection *conn) + struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *conn) + { + const void *here = __builtin_return_address(0); +- int n = atomic_inc_return(&conn->usage); ++ int r; + +- trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, n, here); ++ __refcount_inc(&conn->ref, &r); ++ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, r, here); + return conn; + } + +@@ -305,11 +307,11 @@ struct rxrpc_connection * + rxrpc_get_connection_maybe(struct rxrpc_connection *conn) + { + const void *here = __builtin_return_address(0); ++ int r; + + if (conn) { +- int n = atomic_fetch_add_unless(&conn->usage, 1, 0); +- if (n > 0) +- trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, n + 1, here); ++ if (__refcount_inc_not_zero(&conn->ref, &r)) ++ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, r + 1, here); + else + conn = NULL; + } +@@ -333,12 +335,11 @@ void rxrpc_put_service_conn(struct rxrpc_connection *conn) + { + const void *here = __builtin_return_address(0); + unsigned int debug_id = conn->debug_id; +- int n; ++ int r; + +- n = atomic_dec_return(&conn->usage); +- trace_rxrpc_conn(debug_id, rxrpc_conn_put_service, n, here); +- ASSERTCMP(n, >=, 0); +- if (n == 1) ++ __refcount_dec(&conn->ref, &r); ++ trace_rxrpc_conn(debug_id, rxrpc_conn_put_service, r - 1, here); ++ if (r - 1 == 1) + rxrpc_set_service_reap_timer(conn->params.local->rxnet, + jiffies + rxrpc_connection_expiry); + } +@@ -351,9 +352,9 @@ static void rxrpc_destroy_connection(struct rcu_head *rcu) + struct rxrpc_connection *conn = + container_of(rcu, struct rxrpc_connection, rcu); + +- _enter("{%d,u=%d}", conn->debug_id, atomic_read(&conn->usage)); ++ _enter("{%d,u=%d}", conn->debug_id, refcount_read(&conn->ref)); + +- ASSERTCMP(atomic_read(&conn->usage), ==, 0); ++ ASSERTCMP(refcount_read(&conn->ref), ==, 0); + + _net("DESTROY CONN %d", conn->debug_id); + +@@ -392,8 +393,8 @@ void rxrpc_service_connection_reaper(struct work_struct *work) + + write_lock(&rxnet->conn_lock); + list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) { +- ASSERTCMP(atomic_read(&conn->usage), >, 0); +- if (likely(atomic_read(&conn->usage) > 1)) ++ ASSERTCMP(refcount_read(&conn->ref), >, 0); ++ if (likely(refcount_read(&conn->ref) > 1)) + continue; + if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) + continue; +@@ -405,7 +406,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work) + expire_at = idle_timestamp + rxrpc_closed_conn_expiry * HZ; + + _debug("reap CONN %d { u=%d,t=%ld }", +- conn->debug_id, atomic_read(&conn->usage), ++ conn->debug_id, refcount_read(&conn->ref), + (long)expire_at - (long)now); + + if (time_before(now, expire_at)) { +@@ -418,7 +419,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work) + /* The usage count sits at 1 whilst the object is unused on the + * list; we reduce that to 0 to make the object unavailable. + */ +- if (atomic_cmpxchg(&conn->usage, 1, 0) != 1) ++ if (!refcount_dec_if_one(&conn->ref)) + continue; + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_reap_service, 0, NULL); + +@@ -442,7 +443,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work) + link); + list_del_init(&conn->link); + +- ASSERTCMP(atomic_read(&conn->usage), ==, 0); ++ ASSERTCMP(refcount_read(&conn->ref), ==, 0); + rxrpc_kill_connection(conn); + } + +@@ -470,7 +471,7 @@ void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet) + write_lock(&rxnet->conn_lock); + list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) { + pr_err("AF_RXRPC: Leaked conn %p {%d}\n", +- conn, atomic_read(&conn->usage)); ++ conn, refcount_read(&conn->ref)); + leak = true; + } + write_unlock(&rxnet->conn_lock); +diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c +index e1966dfc91527..6e6aa02c6f9e8 100644 +--- a/net/rxrpc/conn_service.c ++++ b/net/rxrpc/conn_service.c +@@ -9,7 +9,7 @@ + #include "ar-internal.h" + + static struct rxrpc_bundle rxrpc_service_dummy_bundle = { +- .usage = ATOMIC_INIT(1), ++ .ref = REFCOUNT_INIT(1), + .debug_id = UINT_MAX, + .channel_lock = __SPIN_LOCK_UNLOCKED(&rxrpc_service_dummy_bundle.channel_lock), + }; +@@ -99,7 +99,7 @@ conn_published: + return; + + found_extant_conn: +- if (atomic_read(&cursor->usage) == 0) ++ if (refcount_read(&cursor->ref) == 0) + goto replace_old_connection; + write_sequnlock_bh(&peer->service_conn_lock); + /* We should not be able to get here. rxrpc_incoming_connection() is +@@ -132,7 +132,7 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn + * the rxrpc_connections list. + */ + conn->state = RXRPC_CONN_SERVICE_PREALLOC; +- atomic_set(&conn->usage, 2); ++ refcount_set(&conn->ref, 2); + conn->bundle = rxrpc_get_bundle(&rxrpc_service_dummy_bundle); + + atomic_inc(&rxnet->nr_conns); +@@ -142,7 +142,7 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn + write_unlock(&rxnet->conn_lock); + + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service, +- atomic_read(&conn->usage), ++ refcount_read(&conn->ref), + __builtin_return_address(0)); + } + diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c -index dc201363f2c48..3521ebd0ee41c 100644 +index dc201363f2c48..721d847ba92bb 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -412,8 +412,8 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) @@ -403182,10 +519932,41 @@ index dc201363f2c48..3521ebd0ee41c 100644 /* Parse rwind and mtu sizes if provided. */ if (buf.info.rxMTU) +@@ -1154,8 +1190,6 @@ static void rxrpc_post_packet_to_local(struct rxrpc_local *local, + */ + static void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb) + { +- CHECK_SLAB_OKAY(&local->usage); +- + if (rxrpc_get_local_maybe(local)) { + skb_queue_tail(&local->reject_queue, skb); + rxrpc_queue_local(local); +@@ -1413,7 +1447,7 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) + } + } + +- if (!call || atomic_read(&call->usage) == 0) { ++ if (!call || refcount_read(&call->ref) == 0) { + if (rxrpc_to_client(sp) || + sp->hdr.type != RXRPC_PACKET_TYPE_DATA) + goto bad_message; diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c -index a4111408ffd0c..1d15940f61d7e 100644 +index a4111408ffd0c..38ea98ff426bd 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c +@@ -79,10 +79,10 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet, + + local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL); + if (local) { +- atomic_set(&local->usage, 1); ++ refcount_set(&local->ref, 1); + atomic_set(&local->active_users, 1); + local->rxnet = rxnet; +- INIT_LIST_HEAD(&local->link); ++ INIT_HLIST_NODE(&local->link); + INIT_WORK(&local->processor, rxrpc_local_processor); + init_rwsem(&local->defrag_sem); + skb_queue_head_init(&local->reject_queue); @@ -117,6 +117,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) local, srx->transport_type, srx->transport.family); @@ -403211,7 +519992,140 @@ index a4111408ffd0c..1d15940f61d7e 100644 tuncfg.sk_user_data = local; setup_udp_tunnel_sock(net, local->socket, &tuncfg); -@@ -402,6 +406,9 @@ static void rxrpc_local_processor(struct work_struct *work) +@@ -177,7 +181,7 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, + { + struct rxrpc_local *local; + struct rxrpc_net *rxnet = rxrpc_net(net); +- struct list_head *cursor; ++ struct hlist_node *cursor; + const char *age; + long diff; + int ret; +@@ -187,16 +191,12 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, + + mutex_lock(&rxnet->local_mutex); + +- for (cursor = rxnet->local_endpoints.next; +- cursor != &rxnet->local_endpoints; +- cursor = cursor->next) { +- local = list_entry(cursor, struct rxrpc_local, link); ++ hlist_for_each(cursor, &rxnet->local_endpoints) { ++ local = hlist_entry(cursor, struct rxrpc_local, link); + + diff = rxrpc_local_cmp_key(local, srx); +- if (diff < 0) ++ if (diff != 0) + continue; +- if (diff > 0) +- break; + + /* Services aren't allowed to share transport sockets, so + * reject that here. It is possible that the object is dying - +@@ -208,9 +208,10 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, + goto addr_in_use; + } + +- /* Found a match. We replace a dying object. Attempting to +- * bind the transport socket may still fail if we're attempting +- * to use a local address that the dying object is still using. ++ /* Found a match. We want to replace a dying object. ++ * Attempting to bind the transport socket may still fail if ++ * we're attempting to use a local address that the dying ++ * object is still using. + */ + if (!rxrpc_use_local(local)) + break; +@@ -227,10 +228,12 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, + if (ret < 0) + goto sock_error; + +- if (cursor != &rxnet->local_endpoints) +- list_replace_init(cursor, &local->link); +- else +- list_add_tail(&local->link, cursor); ++ if (cursor) { ++ hlist_replace_rcu(cursor, &local->link); ++ cursor->pprev = NULL; ++ } else { ++ hlist_add_head_rcu(&local->link, &rxnet->local_endpoints); ++ } + age = "new"; + + found: +@@ -263,10 +266,10 @@ addr_in_use: + struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local) + { + const void *here = __builtin_return_address(0); +- int n; ++ int r; + +- n = atomic_inc_return(&local->usage); +- trace_rxrpc_local(local->debug_id, rxrpc_local_got, n, here); ++ __refcount_inc(&local->ref, &r); ++ trace_rxrpc_local(local->debug_id, rxrpc_local_got, r + 1, here); + return local; + } + +@@ -276,12 +279,12 @@ struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local) + struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local) + { + const void *here = __builtin_return_address(0); ++ int r; + + if (local) { +- int n = atomic_fetch_add_unless(&local->usage, 1, 0); +- if (n > 0) ++ if (__refcount_inc_not_zero(&local->ref, &r)) + trace_rxrpc_local(local->debug_id, rxrpc_local_got, +- n + 1, here); ++ r + 1, here); + else + local = NULL; + } +@@ -295,10 +298,10 @@ void rxrpc_queue_local(struct rxrpc_local *local) + { + const void *here = __builtin_return_address(0); + unsigned int debug_id = local->debug_id; +- int n = atomic_read(&local->usage); ++ int r = refcount_read(&local->ref); + + if (rxrpc_queue_work(&local->processor)) +- trace_rxrpc_local(debug_id, rxrpc_local_queued, n, here); ++ trace_rxrpc_local(debug_id, rxrpc_local_queued, r + 1, here); + else + rxrpc_put_local(local); + } +@@ -310,15 +313,16 @@ void rxrpc_put_local(struct rxrpc_local *local) + { + const void *here = __builtin_return_address(0); + unsigned int debug_id; +- int n; ++ bool dead; ++ int r; + + if (local) { + debug_id = local->debug_id; + +- n = atomic_dec_return(&local->usage); +- trace_rxrpc_local(debug_id, rxrpc_local_put, n, here); ++ dead = __refcount_dec_and_test(&local->ref, &r); ++ trace_rxrpc_local(debug_id, rxrpc_local_put, r, here); + +- if (n == 0) ++ if (dead) + call_rcu(&local->rcu, rxrpc_local_rcu); + } + } +@@ -371,7 +375,7 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local) + local->dead = true; + + mutex_lock(&rxnet->local_mutex); +- list_del_init(&local->link); ++ hlist_del_init_rcu(&local->link); + mutex_unlock(&rxnet->local_mutex); + + rxrpc_clean_up_local_conns(local); +@@ -402,8 +406,11 @@ static void rxrpc_local_processor(struct work_struct *work) container_of(work, struct rxrpc_local, processor); bool again; @@ -403219,10 +520133,28 @@ index a4111408ffd0c..1d15940f61d7e 100644 + return; + trace_rxrpc_local(local->debug_id, rxrpc_local_processing, - atomic_read(&local->usage), NULL); +- atomic_read(&local->usage), NULL); ++ refcount_read(&local->ref), NULL); + + do { + again = false; +@@ -455,11 +462,11 @@ void rxrpc_destroy_all_locals(struct rxrpc_net *rxnet) + flush_workqueue(rxrpc_workqueue); + +- if (!list_empty(&rxnet->local_endpoints)) { ++ if (!hlist_empty(&rxnet->local_endpoints)) { + mutex_lock(&rxnet->local_mutex); +- list_for_each_entry(local, &rxnet->local_endpoints, link) { ++ hlist_for_each_entry(local, &rxnet->local_endpoints, link) { + pr_err("AF_RXRPC: Leaked local %p {%d}\n", +- local, atomic_read(&local->usage)); ++ local, refcount_read(&local->ref)); + } + mutex_unlock(&rxnet->local_mutex); + BUG(); diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c -index 25bbc4cc8b135..e4d6d432515bc 100644 +index 25bbc4cc8b135..bb4c25d6df64c 100644 --- a/net/rxrpc/net_ns.c +++ b/net/rxrpc/net_ns.c @@ -50,7 +50,7 @@ static __net_init int rxrpc_init_net(struct net *net) @@ -403234,7 +520166,26 @@ index 25bbc4cc8b135..e4d6d432515bc 100644 atomic_set(&rxnet->nr_calls, 1); atomic_set(&rxnet->nr_conns, 1); -@@ -115,6 +115,8 @@ static __net_exit void rxrpc_exit_net(struct net *net) +@@ -72,7 +72,7 @@ static __net_init int rxrpc_init_net(struct net *net) + timer_setup(&rxnet->client_conn_reap_timer, + rxrpc_client_conn_reap_timeout, 0); + +- INIT_LIST_HEAD(&rxnet->local_endpoints); ++ INIT_HLIST_HEAD(&rxnet->local_endpoints); + mutex_init(&rxnet->local_mutex); + + hash_init(rxnet->peer_hash); +@@ -98,6 +98,9 @@ static __net_init int rxrpc_init_net(struct net *net) + proc_create_net("peers", 0444, rxnet->proc_net, + &rxrpc_peer_seq_ops, + sizeof(struct seq_net_private)); ++ proc_create_net("locals", 0444, rxnet->proc_net, ++ &rxrpc_local_seq_ops, ++ sizeof(struct seq_net_private)); + return 0; + + err_proc: +@@ -115,6 +118,8 @@ static __net_exit void rxrpc_exit_net(struct net *net) rxnet->live = false; del_timer_sync(&rxnet->peer_keepalive_timer); cancel_work_sync(&rxnet->peer_keepalive_work); @@ -403244,7 +520195,7 @@ index 25bbc4cc8b135..e4d6d432515bc 100644 rxrpc_destroy_all_connections(rxnet); rxrpc_destroy_all_peers(rxnet); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c -index 10f2bf2e9068a..9683617db7049 100644 +index 10f2bf2e9068a..08c117bc083ec 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -74,11 +74,18 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, @@ -403266,8 +520217,12 @@ index 10f2bf2e9068a..9683617db7049 100644 /* Barrier against rxrpc_input_data(). */ serial = call->ackr_serial; hard_ack = READ_ONCE(call->rx_hard_ack); -@@ -89,7 +96,7 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, - pkt->ack.bufferSpace = htons(8); +@@ -86,10 +93,10 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, + *_hard_ack = hard_ack; + *_top = top; + +- pkt->ack.bufferSpace = htons(8); ++ pkt->ack.bufferSpace = htons(0); pkt->ack.maxSkew = htons(0); pkt->ack.firstPacket = htonl(hard_ack + 1); - pkt->ack.previousPacket = htonl(call->ackr_prev_seq); @@ -403661,9 +520616,36 @@ index be032850ae8ca..32561e9567fe3 100644 } diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c -index 68396d0520525..0298fe2ad6d32 100644 +index 68396d0520525..26d2ae9baaf2c 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c +@@ -121,7 +121,7 @@ static struct rxrpc_peer *__rxrpc_lookup_peer_rcu( + + hash_for_each_possible_rcu(rxnet->peer_hash, peer, hash_link, hash_key) { + if (rxrpc_peer_cmp_key(peer, local, srx, hash_key) == 0 && +- atomic_read(&peer->usage) > 0) ++ refcount_read(&peer->ref) > 0) + return peer; + } + +@@ -140,7 +140,7 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local, + peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key); + if (peer) { + _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport); +- _leave(" = %p {u=%d}", peer, atomic_read(&peer->usage)); ++ _leave(" = %p {u=%d}", peer, refcount_read(&peer->ref)); + } + return peer; + } +@@ -216,7 +216,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) + + peer = kzalloc(sizeof(struct rxrpc_peer), gfp); + if (peer) { +- atomic_set(&peer->usage, 1); ++ refcount_set(&peer->ref, 1); + peer->local = rxrpc_get_local(local); + INIT_HLIST_HEAD(&peer->error_targets); + peer->service_conns = RB_ROOT; @@ -299,6 +299,12 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx, return peer; } @@ -403677,7 +520659,7 @@ index 68396d0520525..0298fe2ad6d32 100644 /* * Set up a new incoming peer. There shouldn't be any other matching peers * since we've already done a search in the list from the non-reentrant context -@@ -365,7 +371,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx, +@@ -365,14 +371,14 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx, spin_unlock_bh(&rxnet->peer_hash_lock); if (peer) @@ -403686,6 +520668,43 @@ index 68396d0520525..0298fe2ad6d32 100644 else peer = candidate; } + + _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport); + +- _leave(" = %p {u=%d}", peer, atomic_read(&peer->usage)); ++ _leave(" = %p {u=%d}", peer, refcount_read(&peer->ref)); + return peer; + } + +@@ -382,10 +388,10 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx, + struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer) + { + const void *here = __builtin_return_address(0); +- int n; ++ int r; + +- n = atomic_inc_return(&peer->usage); +- trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, n, here); ++ __refcount_inc(&peer->ref, &r); ++ trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, r + 1, here); + return peer; + } + +@@ -395,11 +401,11 @@ struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer) + struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer) + { + const void *here = __builtin_return_address(0); ++ int r; + + if (peer) { +- int n = atomic_fetch_add_unless(&peer->usage, 1, 0); +- if (n > 0) +- trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, n + 1, here); ++ if (__refcount_inc_not_zero(&peer->ref, &r)) ++ trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, r + 1, here); + else + peer = NULL; + } @@ -420,8 +426,7 @@ static void __rxrpc_put_peer(struct rxrpc_peer *peer) list_del_init(&peer->keepalive_link); spin_unlock_bh(&rxnet->peer_hash_lock); @@ -403696,8 +520715,39 @@ index 68396d0520525..0298fe2ad6d32 100644 } /* -@@ -457,8 +462,7 @@ void rxrpc_put_peer_locked(struct rxrpc_peer *peer) - if (n == 0) { +@@ -431,13 +436,14 @@ void rxrpc_put_peer(struct rxrpc_peer *peer) + { + const void *here = __builtin_return_address(0); + unsigned int debug_id; +- int n; ++ bool dead; ++ int r; + + if (peer) { + debug_id = peer->debug_id; +- n = atomic_dec_return(&peer->usage); +- trace_rxrpc_peer(debug_id, rxrpc_peer_put, n, here); +- if (n == 0) ++ dead = __refcount_dec_and_test(&peer->ref, &r); ++ trace_rxrpc_peer(debug_id, rxrpc_peer_put, r - 1, here); ++ if (dead) + __rxrpc_put_peer(peer); + } + } +@@ -450,15 +456,15 @@ void rxrpc_put_peer_locked(struct rxrpc_peer *peer) + { + const void *here = __builtin_return_address(0); + unsigned int debug_id = peer->debug_id; +- int n; ++ bool dead; ++ int r; + +- n = atomic_dec_return(&peer->usage); +- trace_rxrpc_peer(debug_id, rxrpc_peer_put, n, here); +- if (n == 0) { ++ dead = __refcount_dec_and_test(&peer->ref, &r); ++ trace_rxrpc_peer(debug_id, rxrpc_peer_put, r - 1, here); ++ if (dead) { hash_del_rcu(&peer->hash_link); list_del_init(&peer->keepalive_link); - rxrpc_put_local(peer->local); @@ -403706,8 +520756,17 @@ index 68396d0520525..0298fe2ad6d32 100644 } } +@@ -477,7 +483,7 @@ void rxrpc_destroy_all_peers(struct rxrpc_net *rxnet) + hlist_for_each_entry(peer, &rxnet->peer_hash[i], hash_link) { + pr_err("Leaked peer %u {%u} %pISp\n", + peer->debug_id, +- atomic_read(&peer->usage), ++ refcount_read(&peer->ref), + &peer->srx.transport); + } + } diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c -index e2f990754f882..5a67955cc00f6 100644 +index e2f990754f882..245418943e01c 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -26,29 +26,23 @@ static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = { @@ -403742,6 +520801,106 @@ index e2f990754f882..5a67955cc00f6 100644 rcu_read_unlock(); } +@@ -107,7 +101,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) + call->cid, + call->call_id, + rxrpc_is_service_call(call) ? "Svc" : "Clt", +- atomic_read(&call->usage), ++ refcount_read(&call->ref), + rxrpc_call_states[call->state], + call->abort_code, + call->debug_id, +@@ -189,7 +183,7 @@ print: + conn->service_id, + conn->proto.cid, + rxrpc_conn_is_service(conn) ? "Svc" : "Clt", +- atomic_read(&conn->usage), ++ refcount_read(&conn->ref), + rxrpc_conn_states[conn->state], + key_serial(conn->params.key), + atomic_read(&conn->serial), +@@ -239,7 +233,7 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v) + " %3u %5u %6llus %8u %8u\n", + lbuff, + rbuff, +- atomic_read(&peer->usage), ++ refcount_read(&peer->ref), + peer->cong_cwnd, + peer->mtu, + now - peer->last_tx_at, +@@ -334,3 +328,72 @@ const struct seq_operations rxrpc_peer_seq_ops = { + .stop = rxrpc_peer_seq_stop, + .show = rxrpc_peer_seq_show, + }; ++ ++/* ++ * Generate a list of extant virtual local endpoints in /proc/net/rxrpc/locals ++ */ ++static int rxrpc_local_seq_show(struct seq_file *seq, void *v) ++{ ++ struct rxrpc_local *local; ++ char lbuff[50]; ++ ++ if (v == SEQ_START_TOKEN) { ++ seq_puts(seq, ++ "Proto Local " ++ " Use Act\n"); ++ return 0; ++ } ++ ++ local = hlist_entry(v, struct rxrpc_local, link); ++ ++ sprintf(lbuff, "%pISpc", &local->srx.transport); ++ ++ seq_printf(seq, ++ "UDP %-47.47s %3u %3u\n", ++ lbuff, ++ refcount_read(&local->ref), ++ atomic_read(&local->active_users)); ++ ++ return 0; ++} ++ ++static void *rxrpc_local_seq_start(struct seq_file *seq, loff_t *_pos) ++ __acquires(rcu) ++{ ++ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); ++ unsigned int n; ++ ++ rcu_read_lock(); ++ ++ if (*_pos >= UINT_MAX) ++ return NULL; ++ ++ n = *_pos; ++ if (n == 0) ++ return SEQ_START_TOKEN; ++ ++ return seq_hlist_start_rcu(&rxnet->local_endpoints, n - 1); ++} ++ ++static void *rxrpc_local_seq_next(struct seq_file *seq, void *v, loff_t *_pos) ++{ ++ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); ++ ++ if (*_pos >= UINT_MAX) ++ return NULL; ++ ++ return seq_hlist_next_rcu(v, &rxnet->local_endpoints, _pos); ++} ++ ++static void rxrpc_local_seq_stop(struct seq_file *seq, void *v) ++ __releases(rcu) ++{ ++ rcu_read_unlock(); ++} ++ ++const struct seq_operations rxrpc_local_seq_ops = { ++ .start = rxrpc_local_seq_start, ++ .next = rxrpc_local_seq_next, ++ .stop = rxrpc_local_seq_stop, ++ .show = rxrpc_local_seq_show, ++}; diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index eca6dda26c77e..250f23bc1c076 100644 --- a/net/rxrpc/recvmsg.c @@ -403788,7 +520947,7 @@ index 08aab5c01437d..db47844f4ac99 100644 nsg = 4; } else { diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c -index af8ad6c30b9fb..3c3a626459deb 100644 +index af8ad6c30b9fb..d4e4e94f4f987 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -51,10 +51,7 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx, @@ -403931,6 +521090,15 @@ index af8ad6c30b9fb..3c3a626459deb 100644 int ret; struct rxrpc_send_params p = { +@@ -688,7 +716,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) + if (call->tx_total_len != -1 || + call->tx_pending || + call->tx_top != 0) +- goto error_put; ++ goto out_put_unlock; + call->tx_total_len = p.call.tx_total_len; + } + } @@ -731,21 +759,13 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) ret = rxrpc_send_abort_packet(call); } else if (p.command != RXRPC_CMD_SEND_DATA) { @@ -404015,6 +521183,18 @@ index ead3471307ee5..ee269e0e6ee87 100644 sec->destroy_server_key(key); } +diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c +index 0348d2bf6f7d8..580a5acffee71 100644 +--- a/net/rxrpc/skbuff.c ++++ b/net/rxrpc/skbuff.c +@@ -71,7 +71,6 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) + const void *here = __builtin_return_address(0); + if (skb) { + int n; +- CHECK_SLAB_OKAY(&skb->users); + n = atomic_dec_return(select_skb_count(skb)); + trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, + rxrpc_skb(skb)->rx_flags, here); diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index 540351d6a5f47..555e0910786bc 100644 --- a/net/rxrpc/sysctl.c @@ -404037,6 +521217,19 @@ index 540351d6a5f47..555e0910786bc 100644 }, { .procname = "rx_window_size", +diff --git a/net/sched/Kconfig b/net/sched/Kconfig +index 1e8ab4749c6c3..4662a6ce8a7e7 100644 +--- a/net/sched/Kconfig ++++ b/net/sched/Kconfig +@@ -976,7 +976,7 @@ config NET_ACT_TUNNEL_KEY + + config NET_ACT_CT + tristate "connection tracking tc action" +- depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT && NF_FLOW_TABLE ++ depends on NET_CLS_ACT && NF_CONNTRACK && (!NF_NAT || NF_NAT) && NF_FLOW_TABLE + help + Say Y here to allow sending the packets to conntrack module. + diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 7dd3a2dc5fa40..d775676956bf9 100644 --- a/net/sched/act_api.c @@ -404119,10 +521312,63 @@ index 7dd3a2dc5fa40..d775676956bf9 100644 if (TC_ACT_EXT_CMP(ret, TC_ACT_JUMP)) { jmp_prgcnt = ret & TCA_ACT_MAX_PRIO_MASK; +diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c +index 5c36013339e11..2a05bad56ef3e 100644 +--- a/net/sched/act_bpf.c ++++ b/net/sched/act_bpf.c +@@ -305,7 +305,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, + ret = tcf_idr_check_alloc(tn, &index, act, bind); + if (!ret) { + ret = tcf_idr_create(tn, index, est, act, +- &act_bpf_ops, bind, true, 0); ++ &act_bpf_ops, bind, true, flags); + if (ret < 0) { + tcf_idr_cleanup(tn, index); + return ret; +diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c +index 94e78ac7a7487..0deb4e96a6c2e 100644 +--- a/net/sched/act_connmark.c ++++ b/net/sched/act_connmark.c +@@ -62,7 +62,7 @@ static int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a, + + c = nf_ct_get(skb, &ctinfo); + if (c) { +- skb->mark = c->mark; ++ skb->mark = READ_ONCE(c->mark); + /* using overlimits stats to count how many packets marked */ + ca->tcf_qstats.overlimits++; + goto out; +@@ -82,7 +82,7 @@ static int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a, + c = nf_ct_tuplehash_to_ctrack(thash); + /* using overlimits stats to count how many packets marked */ + ca->tcf_qstats.overlimits++; +- skb->mark = c->mark; ++ skb->mark = READ_ONCE(c->mark); + nf_ct_put(c); + + out: +@@ -124,7 +124,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, + ret = tcf_idr_check_alloc(tn, &index, a, bind); + if (!ret) { + ret = tcf_idr_create(tn, index, est, a, +- &act_connmark_ops, bind, false, 0); ++ &act_connmark_ops, bind, false, flags); + if (ret) { + tcf_idr_cleanup(tn, index); + return ret; diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c -index 90866ae45573a..d85fdefe5730d 100644 +index 90866ae45573a..81a2d6cbfb441 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c +@@ -177,7 +177,7 @@ static void tcf_ct_flow_table_add_action_meta(struct nf_conn *ct, + entry = tcf_ct_flow_table_flow_action_get_next(action); + entry->id = FLOW_ACTION_CT_METADATA; + #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) +- entry->ct_metadata.mark = ct->mark; ++ entry->ct_metadata.mark = READ_ONCE(ct->mark); + #endif + ctinfo = dir == IP_CT_DIR_ORIGINAL ? IP_CT_ESTABLISHED : + IP_CT_ESTABLISHED_REPLY; @@ -516,11 +516,6 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, struct nf_conn *ct; u8 dir; @@ -404237,6 +521483,19 @@ index 90866ae45573a..d85fdefe5730d 100644 out: return err; } +@@ -852,9 +856,9 @@ static void tcf_ct_act_set_mark(struct nf_conn *ct, u32 mark, u32 mask) + if (!mask) + return; + +- new_mark = mark | (ct->mark & ~(mask)); +- if (ct->mark != new_mark) { +- ct->mark = new_mark; ++ new_mark = mark | (READ_ONCE(ct->mark) & ~(mask)); ++ if (READ_ONCE(ct->mark) != new_mark) { ++ WRITE_ONCE(ct->mark, new_mark); + if (nf_ct_is_confirmed(ct)) + nf_conntrack_event_cache(IPCT_MARK, ct); + } @@ -963,10 +967,10 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, tcf_action_update_bstats(&c->common, skb); @@ -404287,6 +521546,96 @@ index 90866ae45573a..d85fdefe5730d 100644 cleanup: if (goto_ch) tcf_chain_put_by_act(goto_ch); +diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c +index 549374a2d0086..56e0a5eb64942 100644 +--- a/net/sched/act_ctinfo.c ++++ b/net/sched/act_ctinfo.c +@@ -33,7 +33,7 @@ static void tcf_ctinfo_dscp_set(struct nf_conn *ct, struct tcf_ctinfo *ca, + { + u8 dscp, newdscp; + +- newdscp = (((ct->mark & cp->dscpmask) >> cp->dscpmaskshift) << 2) & ++ newdscp = (((READ_ONCE(ct->mark) & cp->dscpmask) >> cp->dscpmaskshift) << 2) & + ~INET_ECN_MASK; + + switch (proto) { +@@ -73,7 +73,7 @@ static void tcf_ctinfo_cpmark_set(struct nf_conn *ct, struct tcf_ctinfo *ca, + struct sk_buff *skb) + { + ca->stats_cpmark_set++; +- skb->mark = ct->mark & cp->cpmarkmask; ++ skb->mark = READ_ONCE(ct->mark) & cp->cpmarkmask; + } + + static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a, +@@ -92,7 +92,7 @@ static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a, + cp = rcu_dereference_bh(ca->params); + + tcf_lastuse_update(&ca->tcf_tm); +- bstats_update(&ca->tcf_bstats, skb); ++ tcf_action_update_bstats(&ca->common, skb); + action = READ_ONCE(ca->tcf_action); + + wlen = skb_network_offset(skb); +@@ -131,7 +131,7 @@ static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a, + } + + if (cp->mode & CTINFO_MODE_DSCP) +- if (!cp->dscpstatemask || (ct->mark & cp->dscpstatemask)) ++ if (!cp->dscpstatemask || (READ_ONCE(ct->mark) & cp->dscpstatemask)) + tcf_ctinfo_dscp_set(ct, ca, cp, skb, wlen, proto); + + if (cp->mode & CTINFO_MODE_CPMARK) +@@ -211,8 +211,8 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla, + index = actparm->index; + err = tcf_idr_check_alloc(tn, &index, a, bind); + if (!err) { +- ret = tcf_idr_create(tn, index, est, a, +- &act_ctinfo_ops, bind, false, 0); ++ ret = tcf_idr_create_from_flags(tn, index, est, a, ++ &act_ctinfo_ops, bind, flags); + if (ret) { + tcf_idr_cleanup(tn, index); + return ret; +diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c +index 7df72a4197a3f..ac985c53ebafe 100644 +--- a/net/sched/act_gate.c ++++ b/net/sched/act_gate.c +@@ -357,7 +357,7 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, + + if (!err) { + ret = tcf_idr_create(tn, index, est, a, +- &act_gate_ops, bind, false, 0); ++ &act_gate_ops, bind, false, flags); + if (ret) { + tcf_idr_cleanup(tn, index); + return ret; +diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c +index 7064a365a1a98..ec987ec758070 100644 +--- a/net/sched/act_ife.c ++++ b/net/sched/act_ife.c +@@ -553,7 +553,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, + + if (!exists) { + ret = tcf_idr_create(tn, index, est, a, &act_ife_ops, +- bind, true, 0); ++ bind, true, flags); + if (ret) { + tcf_idr_cleanup(tn, index); + kfree(p); +diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c +index 265b1443e252f..2f3d507c24a1f 100644 +--- a/net/sched/act_ipt.c ++++ b/net/sched/act_ipt.c +@@ -145,7 +145,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, + + if (!exists) { + ret = tcf_idr_create(tn, index, est, a, ops, bind, +- false, 0); ++ false, flags); + if (ret) { + tcf_idr_cleanup(tn, index); + return ret; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index d64b0eeccbe4d..efc963ab995a3 100644 --- a/net/sched/act_mirred.c @@ -404332,8 +521681,56 @@ index d64b0eeccbe4d..efc963ab995a3 100644 expects_nh = want_ingress || !m_mac_header_xmit; at_nh = skb->data == skb_network_header(skb); +diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c +index e4529b428cf44..980ad795727e9 100644 +--- a/net/sched/act_mpls.c ++++ b/net/sched/act_mpls.c +@@ -133,6 +133,11 @@ static int valid_label(const struct nlattr *attr, + { + const u32 *label = nla_data(attr); + ++ if (nla_len(attr) != sizeof(*label)) { ++ NL_SET_ERR_MSG_MOD(extack, "Invalid MPLS label length"); ++ return -EINVAL; ++ } ++ + if (*label & ~MPLS_LABEL_MASK || *label == MPLS_LABEL_IMPLNULL) { + NL_SET_ERR_MSG_MOD(extack, "MPLS label out of range"); + return -EINVAL; +@@ -144,7 +149,8 @@ static int valid_label(const struct nlattr *attr, + static const struct nla_policy mpls_policy[TCA_MPLS_MAX + 1] = { + [TCA_MPLS_PARMS] = NLA_POLICY_EXACT_LEN(sizeof(struct tc_mpls)), + [TCA_MPLS_PROTO] = { .type = NLA_U16 }, +- [TCA_MPLS_LABEL] = NLA_POLICY_VALIDATE_FN(NLA_U32, valid_label), ++ [TCA_MPLS_LABEL] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, ++ valid_label), + [TCA_MPLS_TC] = NLA_POLICY_RANGE(NLA_U8, 0, 7), + [TCA_MPLS_TTL] = NLA_POLICY_MIN(NLA_U8, 1), + [TCA_MPLS_BOS] = NLA_POLICY_RANGE(NLA_U8, 0, 1), +@@ -248,7 +254,7 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla, + + if (!exists) { + ret = tcf_idr_create(tn, index, est, a, +- &act_mpls_ops, bind, true, 0); ++ &act_mpls_ops, bind, true, flags); + if (ret) { + tcf_idr_cleanup(tn, index); + return ret; +diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c +index 7dd6b586ba7f6..2a39b3729e844 100644 +--- a/net/sched/act_nat.c ++++ b/net/sched/act_nat.c +@@ -61,7 +61,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, + err = tcf_idr_check_alloc(tn, &index, a, bind); + if (!err) { + ret = tcf_idr_create(tn, index, est, a, +- &act_nat_ops, bind, false, 0); ++ &act_nat_ops, bind, false, flags); + if (ret) { + tcf_idr_cleanup(tn, index); + return ret; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c -index c6c862c459cc3..1262a84b725fc 100644 +index c6c862c459cc3..4f72e6e7dbda5 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -149,7 +149,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, @@ -404345,6 +521742,15 @@ index c6c862c459cc3..1262a84b725fc 100644 u32 index; if (!nla) { +@@ -189,7 +189,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, + err = tcf_idr_check_alloc(tn, &index, a, bind); + if (!err) { + ret = tcf_idr_create(tn, index, est, a, +- &act_pedit_ops, bind, false, 0); ++ &act_pedit_ops, bind, false, flags); + if (ret) { + tcf_idr_cleanup(tn, index); + goto out_free; @@ -228,6 +228,22 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, p->tcfp_nkeys = parm->nkeys; } @@ -404399,9 +521805,18 @@ index c6c862c459cc3..1262a84b725fc 100644 return p->tcf_action; } diff --git a/net/sched/act_police.c b/net/sched/act_police.c -index 832157a840fc3..5c0a3ea9fe120 100644 +index 832157a840fc3..d44b933b821d7 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c +@@ -90,7 +90,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, + + if (!exists) { + ret = tcf_idr_create(tn, index, NULL, a, +- &act_police_ops, bind, true, 0); ++ &act_police_ops, bind, true, flags); + if (ret) { + tcf_idr_cleanup(tn, index); + return ret; @@ -239,6 +239,20 @@ release_idr: return err; } @@ -404432,6 +521847,58 @@ index 832157a840fc3..5c0a3ea9fe120 100644 if (!p->rate_present && !p->pps_present) { ret = p->tcfp_result; goto end; +diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c +index 230501eb9e069..ab4ae24ab886f 100644 +--- a/net/sched/act_sample.c ++++ b/net/sched/act_sample.c +@@ -70,7 +70,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, + + if (!exists) { + ret = tcf_idr_create(tn, index, est, a, +- &act_sample_ops, bind, true, 0); ++ &act_sample_ops, bind, true, flags); + if (ret) { + tcf_idr_cleanup(tn, index); + return ret; +diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c +index cbbe1861d3a20..7885271540259 100644 +--- a/net/sched/act_simple.c ++++ b/net/sched/act_simple.c +@@ -128,7 +128,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, + + if (!exists) { + ret = tcf_idr_create(tn, index, est, a, +- &act_simp_ops, bind, false, 0); ++ &act_simp_ops, bind, false, flags); + if (ret) { + tcf_idr_cleanup(tn, index); + return ret; +diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c +index 6054185383474..6088ceaf582e8 100644 +--- a/net/sched/act_skbedit.c ++++ b/net/sched/act_skbedit.c +@@ -176,7 +176,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, + + if (!exists) { + ret = tcf_idr_create(tn, index, est, a, +- &act_skbedit_ops, bind, true, 0); ++ &act_skbedit_ops, bind, true, act_flags); + if (ret) { + tcf_idr_cleanup(tn, index); + return ret; +diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c +index ecb9ee6660954..ee9cc0abf9e10 100644 +--- a/net/sched/act_skbmod.c ++++ b/net/sched/act_skbmod.c +@@ -168,7 +168,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, + + if (!exists) { + ret = tcf_idr_create(tn, index, est, a, +- &act_skbmod_ops, bind, true, 0); ++ &act_skbmod_ops, bind, true, flags); + if (ret) { + tcf_idr_cleanup(tn, index); + return ret; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 2ef8f5a6205a9..62ce6981942b7 100644 --- a/net/sched/cls_api.c @@ -404676,6 +522143,103 @@ index a35ab8c27866e..48712bc51bda7 100644 th = to_hash(fold->handle); h = from_hash(fold->handle >> 16); b = rtnl_dereference(head->table[th]); +diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c +index 742c7d49a9581..54c5ff207fb1b 100644 +--- a/net/sched/cls_tcindex.c ++++ b/net/sched/cls_tcindex.c +@@ -12,6 +12,7 @@ + #include <linux/errno.h> + #include <linux/slab.h> + #include <linux/refcount.h> ++#include <linux/rcupdate.h> + #include <net/act_api.h> + #include <net/netlink.h> + #include <net/pkt_cls.h> +@@ -332,12 +333,13 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, + struct tcindex_filter_result *r, struct nlattr **tb, + struct nlattr *est, u32 flags, struct netlink_ext_ack *extack) + { +- struct tcindex_filter_result new_filter_result, *old_r = r; ++ struct tcindex_filter_result new_filter_result; + struct tcindex_data *cp = NULL, *oldp; + struct tcindex_filter *f = NULL; /* make gcc behave */ + struct tcf_result cr = {}; + int err, balloc = 0; + struct tcf_exts e; ++ bool update_h = false; + + err = tcf_exts_init(&e, net, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); + if (err < 0) +@@ -401,7 +403,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, + err = tcindex_filter_result_init(&new_filter_result, cp, net); + if (err < 0) + goto errout_alloc; +- if (old_r) ++ if (r) + cr = r->res; + + err = -EBUSY; +@@ -455,10 +457,13 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, + } + } + +- if (cp->perfect) ++ if (cp->perfect) { + r = cp->perfect + handle; +- else +- r = tcindex_lookup(cp, handle) ? : &new_filter_result; ++ } else { ++ /* imperfect area is updated in-place using rcu */ ++ update_h = !!tcindex_lookup(cp, handle); ++ r = &new_filter_result; ++ } + + if (r == &new_filter_result) { + f = kzalloc(sizeof(*f), GFP_KERNEL); +@@ -478,21 +483,34 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, + tcf_bind_filter(tp, &cr, base); + } + +- if (old_r && old_r != r) { +- err = tcindex_filter_result_init(old_r, cp, net); +- if (err < 0) { +- kfree(f); +- goto errout_alloc; +- } +- } +- + oldp = p; + r->res = cr; + tcf_exts_change(&r->exts, &e); + + rcu_assign_pointer(tp->root, cp); + +- if (r == &new_filter_result) { ++ if (update_h) { ++ struct tcindex_filter __rcu **fp; ++ struct tcindex_filter *cf; ++ ++ f->result.res = r->res; ++ tcf_exts_change(&f->result.exts, &r->exts); ++ ++ /* imperfect area bucket */ ++ fp = cp->h + (handle % cp->hash); ++ ++ /* lookup the filter, guaranteed to exist */ ++ for (cf = rcu_dereference_bh_rtnl(*fp); cf; ++ fp = &cf->next, cf = rcu_dereference_bh_rtnl(*fp)) ++ if (cf->key == (u16)handle) ++ break; ++ ++ f->next = cf->next; ++ ++ cf = rcu_replace_pointer(*fp, f, 1); ++ tcf_exts_get_net(&cf->result.exts); ++ tcf_queue_work(&cf->rwork, tcindex_destroy_fexts_work); ++ } else if (r == &new_filter_result) { + struct tcindex_filter *nfp; + struct tcindex_filter __rcu **fp; + diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 4272814487f09..5d30db0d71574 100644 --- a/net/sched/cls_u32.c @@ -404750,8 +522314,21 @@ index 4272814487f09..5d30db0d71574 100644 return err; } +diff --git a/net/sched/ematch.c b/net/sched/ematch.c +index 4ce6813618515..5c1235e6076ae 100644 +--- a/net/sched/ematch.c ++++ b/net/sched/ematch.c +@@ -255,6 +255,8 @@ static int tcf_em_validate(struct tcf_proto *tp, + * the value carried. + */ + if (em_hdr->flags & TCF_EM_SIMPLE) { ++ if (em->ops->datalen > 0) ++ goto errout; + if (data_len < sizeof(u32)) + goto errout; + em->data = *(u32 *) data; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c -index 12f39a2dffd47..5ab20c764aa5b 100644 +index 12f39a2dffd47..02f62008e468f 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -301,7 +301,7 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) @@ -404797,7 +522374,19 @@ index 12f39a2dffd47..5ab20c764aa5b 100644 if (new && new->ops->attach) new->ops->attach(new); -@@ -1204,7 +1205,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev, +@@ -1113,6 +1114,11 @@ skip: + return -ENOENT; + } + ++ if (new && new->ops == &noqueue_qdisc_ops) { ++ NL_SET_ERR_MSG(extack, "Cannot assign noqueue to a class"); ++ return -EINVAL; ++ } ++ + err = cops->graft(parent, cl, new, &old, extack); + if (err) + return err; +@@ -1204,7 +1210,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev, err = -ENOENT; if (!ops) { @@ -404806,7 +522395,7 @@ index 12f39a2dffd47..5ab20c764aa5b 100644 goto err_out; } -@@ -1460,7 +1461,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, +@@ -1460,7 +1466,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, q = dev_ingress_queue(dev)->qdisc_sleeping; } } else { @@ -404815,7 +522404,7 @@ index 12f39a2dffd47..5ab20c764aa5b 100644 } if (!q) { NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device"); -@@ -1549,7 +1550,7 @@ replay: +@@ -1549,7 +1555,7 @@ replay: q = dev_ingress_queue(dev)->qdisc_sleeping; } } else { @@ -404824,7 +522413,7 @@ index 12f39a2dffd47..5ab20c764aa5b 100644 } /* It may be default qdisc, ignore it */ -@@ -1771,7 +1772,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) +@@ -1771,7 +1777,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) s_q_idx = 0; q_idx = 0; @@ -404834,7 +522423,7 @@ index 12f39a2dffd47..5ab20c764aa5b 100644 true, tca[TCA_DUMP_INVISIBLE]) < 0) goto done; -@@ -2042,7 +2044,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, +@@ -2042,7 +2049,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, } else if (qid1) { qid = qid1; } else if (qid == 0) @@ -404843,7 +522432,7 @@ index 12f39a2dffd47..5ab20c764aa5b 100644 /* Now qid is genuine qdisc handle consistent * both with parent and child. -@@ -2053,7 +2055,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, +@@ -2053,7 +2060,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, portid = TC_H_MAKE(qid, portid); } else { if (qid == 0) @@ -404852,7 +522441,7 @@ index 12f39a2dffd47..5ab20c764aa5b 100644 } /* OK. Locate qdisc */ -@@ -2214,7 +2216,8 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) +@@ -2214,7 +2221,8 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) s_t = cb->args[0]; t = 0; @@ -404863,10 +522452,25 @@ index 12f39a2dffd47..5ab20c764aa5b 100644 dev_queue = dev_ingress_queue(dev); diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c -index 7d8518176b45a..70fe1c5e44ad8 100644 +index 7d8518176b45a..33737169cc2d3 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c -@@ -576,7 +576,6 @@ static void atm_tc_reset(struct Qdisc *sch) +@@ -397,10 +397,13 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, + result = tcf_classify(skb, NULL, fl, &res, true); + if (result < 0) + continue; ++ if (result == TC_ACT_SHOT) ++ goto done; ++ + flow = (struct atm_flow_data *)res.class; + if (!flow) + flow = lookup_flow(sch, res.classid); +- goto done; ++ goto drop; + } + } + flow = NULL; +@@ -576,7 +579,6 @@ static void atm_tc_reset(struct Qdisc *sch) pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p); list_for_each_entry(flow, &p->flows, list) qdisc_reset(flow->q); @@ -404912,9 +522516,27 @@ index 3c2300d144681..6f6e74ce927f4 100644 static int cake_dump(struct Qdisc *sch, struct sk_buff *skb) diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c -index e0da15530f0e9..fd7e10567371c 100644 +index e0da15530f0e9..46b3dd71777d1 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c +@@ -231,6 +231,8 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) + result = tcf_classify(skb, NULL, fl, &res, true); + if (!fl || result < 0) + goto fallback; ++ if (result == TC_ACT_SHOT) ++ return NULL; + + cl = (void *)res.class; + if (!cl) { +@@ -251,8 +253,6 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) + case TC_ACT_TRAP: + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + fallthrough; +- case TC_ACT_SHOT: +- return NULL; + case TC_ACT_RECLASSIFY: + return cbq_reclassify(skb, cl); + } @@ -1053,7 +1053,6 @@ cbq_reset(struct Qdisc *sch) cl->cpriority = cl->priority; } @@ -405228,10 +522850,22 @@ index b7ac30cca035d..c802a027b4f31 100644 static void diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c -index 5067a6e5d4fde..caabdaa2f30f6 100644 +index 5067a6e5d4fde..8ce999e4ca323 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c -@@ -1008,8 +1008,6 @@ static void htb_reset(struct Qdisc *sch) +@@ -427,7 +427,10 @@ static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl) + while (cl->cmode == HTB_MAY_BORROW && p && mask) { + m = mask; + while (m) { +- int prio = ffz(~m); ++ unsigned int prio = ffz(~m); ++ ++ if (WARN_ON_ONCE(prio >= ARRAY_SIZE(p->inner.clprio))) ++ break; + m &= ~(1 << prio); + + if (p->inner.clprio[prio].feed.rb_node) +@@ -1008,8 +1011,6 @@ static void htb_reset(struct Qdisc *sch) } qdisc_watchdog_cancel(&q->watchdog); __qdisc_reset_queue(&q->direct_queue); @@ -405240,7 +522874,57 @@ index 5067a6e5d4fde..caabdaa2f30f6 100644 memset(q->hlevel, 0, sizeof(q->hlevel)); memset(q->row_mask, 0, sizeof(q->row_mask)); } -@@ -1803,6 +1801,26 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, +@@ -1560,7 +1561,7 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl, + struct tc_htb_qopt_offload offload_opt; + struct netdev_queue *dev_queue; + struct Qdisc *q = cl->leaf.q; +- struct Qdisc *old = NULL; ++ struct Qdisc *old; + int err; + + if (cl->level) +@@ -1568,14 +1569,17 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl, + + WARN_ON(!q); + dev_queue = htb_offload_get_queue(cl); +- old = htb_graft_helper(dev_queue, NULL); +- if (destroying) +- /* Before HTB is destroyed, the kernel grafts noop_qdisc to +- * all queues. ++ /* When destroying, caller qdisc_graft grafts the new qdisc and invokes ++ * qdisc_put for the qdisc being destroyed. htb_destroy_class_offload ++ * does not need to graft or qdisc_put the qdisc being destroyed. ++ */ ++ if (!destroying) { ++ old = htb_graft_helper(dev_queue, NULL); ++ /* Last qdisc grafted should be the same as cl->leaf.q when ++ * calling htb_delete. + */ +- WARN_ON(!(old->flags & TCQ_F_BUILTIN)); +- else + WARN_ON(old != q); ++ } + + if (cl->parent) { + cl->parent->bstats_bias.bytes += q->bstats.bytes; +@@ -1591,10 +1595,12 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl, + }; + err = htb_offload(qdisc_dev(sch), &offload_opt); + +- if (!err || destroying) +- qdisc_put(old); +- else +- htb_graft_helper(dev_queue, old); ++ if (!destroying) { ++ if (!err) ++ qdisc_put(old); ++ else ++ htb_graft_helper(dev_queue, old); ++ } + + if (last_child) + return err; +@@ -1803,6 +1809,26 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!hopt->rate.rate || !hopt->ceil.rate) goto failure; @@ -405418,10 +523102,30 @@ index 58a9d42b52b8f..50e51c1322fc1 100644 static void qfq_destroy_qdisc(struct Qdisc *sch) diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c -index 40adf1f07a82d..f1e013e3f04a9 100644 +index 40adf1f07a82d..935d90874b1b7 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c -@@ -176,8 +176,6 @@ static void red_reset(struct Qdisc *sch) +@@ -72,6 +72,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, + { + struct red_sched_data *q = qdisc_priv(sch); + struct Qdisc *child = q->qdisc; ++ unsigned int len; + int ret; + + q->vars.qavg = red_calc_qavg(&q->parms, +@@ -126,9 +127,10 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, + break; + } + ++ len = qdisc_pkt_len(skb); + ret = qdisc_enqueue(skb, child, to_free); + if (likely(ret == NET_XMIT_SUCCESS)) { +- qdisc_qstats_backlog_inc(sch, skb); ++ sch->qstats.backlog += len; + sch->q.qlen++; + } else if (net_xmit_drop_count(ret)) { + q->stats.pdrop++; +@@ -176,8 +178,6 @@ static void red_reset(struct Qdisc *sch) struct red_sched_data *q = qdisc_priv(sch); qdisc_reset(q->qdisc); @@ -405506,7 +523210,7 @@ index 7a5e4c4547156..df72fb83d9c7d 100644 __skb_queue_purge(&q->qdiscs[prio]); diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c -index b9fd18d986464..bd10a8eeb82db 100644 +index b9fd18d986464..e203deacc9533 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -66,6 +66,7 @@ struct taprio_sched { @@ -405633,7 +523337,13 @@ index b9fd18d986464..bd10a8eeb82db 100644 q->clockid = clockid; } else { -@@ -1627,8 +1637,6 @@ static void taprio_reset(struct Qdisc *sch) +@@ -1622,13 +1632,12 @@ static void taprio_reset(struct Qdisc *sch) + int i; + + hrtimer_cancel(&q->advance_timer); ++ + if (q->qdiscs) { + for (i = 0; i < dev->num_tx_queues; i++) if (q->qdiscs[i]) qdisc_reset(q->qdiscs[i]); } @@ -405642,24 +523352,14 @@ index b9fd18d986464..bd10a8eeb82db 100644 } static void taprio_destroy(struct Qdisc *sch) -@@ -1940,12 +1948,14 @@ start_error: - - static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl) - { -- struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); -+ struct taprio_sched *q = qdisc_priv(sch); -+ struct net_device *dev = qdisc_dev(sch); -+ unsigned int ntx = cl - 1; - -- if (!dev_queue) -+ if (ntx >= dev->num_tx_queues) - return NULL; +@@ -1645,6 +1654,7 @@ static void taprio_destroy(struct Qdisc *sch) + * happens in qdisc_create(), after taprio_init() has been called. + */ + hrtimer_cancel(&q->advance_timer); ++ qdisc_synchronize(sch); -- return dev_queue->qdisc_sleeping; -+ return q->qdiscs[ntx]; - } + taprio_disable_offload(dev, q, NULL); - static unsigned long taprio_find(struct Qdisc *sch, u32 classid) diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 78e79029dc631..7461e5c67d50a 100644 --- a/net/sched/sch_tbf.c @@ -405766,8 +523466,25 @@ index db6b7373d16c3..34964145514e6 100644 } else ep->active_key_id = key_id; +diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c +index 59e653b528b1f..6b95d3ba8fe1c 100644 +--- a/net/sctp/bind_addr.c ++++ b/net/sctp/bind_addr.c +@@ -73,6 +73,12 @@ int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest, + } + } + ++ /* If somehow no addresses were found that can be used with this ++ * scope, it's an error. ++ */ ++ if (list_empty(&dest->address_list)) ++ error = -ENETUNREACH; ++ + out: + if (error) + sctp_bind_addr_clean(dest); diff --git a/net/sctp/diag.c b/net/sctp/diag.c -index 760b367644c12..d9c6d8f30f093 100644 +index 760b367644c12..b0ce1080842d4 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -61,10 +61,6 @@ static void inet_diag_msg_sctpasoc_fill(struct inet_diag_msg *r, @@ -405879,7 +523596,7 @@ index 760b367644c12..d9c6d8f30f093 100644 list_for_each_entry(assoc, &ep->asocs, asocs) { if (cb->args[4] < cb->args[1]) goto next; -@@ -344,9 +338,8 @@ release: +@@ -344,17 +338,14 @@ release: return err; } @@ -405890,7 +523607,16 @@ index 760b367644c12..d9c6d8f30f093 100644 struct sctp_comm_param *commp = p; struct sock *sk = ep->base.sk; const struct inet_diag_req_v2 *r = commp->r; -@@ -429,15 +422,15 @@ static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, +- struct sctp_association *assoc = +- list_entry(ep->asocs.next, struct sctp_association, asocs); + + /* find the ep only once through the transports by this condition */ +- if (tsp->asoc != assoc) ++ if (!list_is_first(&tsp->asoc->asocs, &ep->asocs)) + return 0; + + if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family) +@@ -429,15 +420,15 @@ static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, static int sctp_diag_dump_one(struct netlink_callback *cb, const struct inet_diag_req_v2 *req) { @@ -405910,7 +523636,7 @@ index 760b367644c12..d9c6d8f30f093 100644 }; if (req->sdiag_family == AF_INET) { -@@ -460,7 +453,7 @@ static int sctp_diag_dump_one(struct netlink_callback *cb, +@@ -460,7 +451,7 @@ static int sctp_diag_dump_one(struct netlink_callback *cb, paddr.v6.sin6_family = AF_INET6; } @@ -405919,7 +523645,7 @@ index 760b367644c12..d9c6d8f30f093 100644 net, &laddr, &paddr, &commp); } -@@ -505,8 +498,8 @@ skip: +@@ -505,8 +496,8 @@ skip: if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE))) goto done; @@ -406024,10 +523750,38 @@ index 4dfb5ea82b05b..cdfdbd353c678 100644 /* alloc head skb */ diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c -index ff47091c385e7..b3950963fc8f0 100644 +index ff47091c385e7..dc29ac0f8d3f8 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c -@@ -911,6 +911,7 @@ static void sctp_outq_flush_ctrl(struct sctp_flush_ctx *ctx) +@@ -384,6 +384,7 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, + { + struct sctp_outq *q = &asoc->outqueue; + struct sctp_chunk *chk, *temp; ++ struct sctp_stream_out *sout; + + q->sched->unsched_all(&asoc->stream); + +@@ -398,12 +399,14 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, + sctp_sched_dequeue_common(q, chk); + asoc->sent_cnt_removable--; + asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; +- if (chk->sinfo.sinfo_stream < asoc->stream.outcnt) { +- struct sctp_stream_out *streamout = +- SCTP_SO(&asoc->stream, chk->sinfo.sinfo_stream); + +- streamout->ext->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; +- } ++ sout = SCTP_SO(&asoc->stream, chk->sinfo.sinfo_stream); ++ sout->ext->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; ++ ++ /* clear out_curr if all frag chunks are pruned */ ++ if (asoc->stream.out_curr == sout && ++ list_is_last(&chk->frag_list, &chk->msg->chunks)) ++ asoc->stream.out_curr = NULL; + + msg_len -= chk->skb->truesize + sizeof(struct sctp_chunk); + sctp_chunk_free(chk); +@@ -911,6 +914,7 @@ static void sctp_outq_flush_ctrl(struct sctp_flush_ctx *ctx) ctx->asoc->base.sk->sk_err = -error; return; } @@ -406035,7 +523789,7 @@ index ff47091c385e7..b3950963fc8f0 100644 break; case SCTP_CID_ABORT: -@@ -935,7 +936,10 @@ static void sctp_outq_flush_ctrl(struct sctp_flush_ctx *ctx) +@@ -935,7 +939,10 @@ static void sctp_outq_flush_ctrl(struct sctp_flush_ctx *ctx) case SCTP_CID_HEARTBEAT: if (chunk->pmtu_probe) { @@ -406179,10 +523933,50 @@ index 6b937bfd47515..5f6e6a6e91b3d 100644 /* Populate the fields of the newsk from the oldsk and migrate the diff --git a/net/sctp/stream.c b/net/sctp/stream.c -index 6dc95dcc0ff4f..ef9fceadef8d5 100644 +index 6dc95dcc0ff4f..ee6514af830f7 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c -@@ -137,7 +137,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, +@@ -52,6 +52,19 @@ static void sctp_stream_shrink_out(struct sctp_stream *stream, __u16 outcnt) + } + } + ++static void sctp_stream_free_ext(struct sctp_stream *stream, __u16 sid) ++{ ++ struct sctp_sched_ops *sched; ++ ++ if (!SCTP_SO(stream, sid)->ext) ++ return; ++ ++ sched = sctp_sched_ops_from_stream(stream); ++ sched->free_sid(stream, sid); ++ kfree(SCTP_SO(stream, sid)->ext); ++ SCTP_SO(stream, sid)->ext = NULL; ++} ++ + /* Migrates chunks from stream queues to new stream queues if needed, + * but not across associations. Also, removes those chunks to streams + * higher than the new max. +@@ -70,16 +83,14 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream, + * sctp_stream_update will swap ->out pointers. + */ + for (i = 0; i < outcnt; i++) { +- kfree(SCTP_SO(new, i)->ext); ++ sctp_stream_free_ext(new, i); + SCTP_SO(new, i)->ext = SCTP_SO(stream, i)->ext; + SCTP_SO(stream, i)->ext = NULL; + } + } + +- for (i = outcnt; i < stream->outcnt; i++) { +- kfree(SCTP_SO(stream, i)->ext); +- SCTP_SO(stream, i)->ext = NULL; +- } ++ for (i = outcnt; i < stream->outcnt; i++) ++ sctp_stream_free_ext(stream, i); + } + + static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt, +@@ -137,7 +148,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, ret = sctp_stream_alloc_out(stream, outcnt, gfp); if (ret) @@ -406191,19 +523985,19 @@ index 6dc95dcc0ff4f..ef9fceadef8d5 100644 for (i = 0; i < stream->outcnt; i++) SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; -@@ -145,22 +145,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, +@@ -145,22 +156,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, handle_in: sctp_stream_interleave_init(stream); if (!incnt) - goto out; -- ++ return 0; + - ret = sctp_stream_alloc_in(stream, incnt, gfp); - if (ret) - goto in_err; - - goto out; -+ return 0; - +- -in_err: - sched->free(stream); - genradix_free(&stream->in); @@ -406216,11 +524010,42 @@ index 6dc95dcc0ff4f..ef9fceadef8d5 100644 } int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid) +@@ -187,9 +185,9 @@ void sctp_stream_free(struct sctp_stream *stream) + struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); + int i; + +- sched->free(stream); ++ sched->unsched_all(stream); + for (i = 0; i < stream->outcnt; i++) +- kfree(SCTP_SO(stream, i)->ext); ++ sctp_stream_free_ext(stream, i); + genradix_free(&stream->out); + genradix_free(&stream->in); + } diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c -index 99e5f69fbb742..a2e1d34f52c5b 100644 +index 99e5f69fbb742..33c2630c2496b 100644 --- a/net/sctp/stream_sched.c +++ b/net/sctp/stream_sched.c -@@ -163,7 +163,7 @@ int sctp_sched_set_sched(struct sctp_association *asoc, +@@ -46,6 +46,10 @@ static int sctp_sched_fcfs_init_sid(struct sctp_stream *stream, __u16 sid, + return 0; + } + ++static void sctp_sched_fcfs_free_sid(struct sctp_stream *stream, __u16 sid) ++{ ++} ++ + static void sctp_sched_fcfs_free(struct sctp_stream *stream) + { + } +@@ -96,6 +100,7 @@ static struct sctp_sched_ops sctp_sched_fcfs = { + .get = sctp_sched_fcfs_get, + .init = sctp_sched_fcfs_init, + .init_sid = sctp_sched_fcfs_init_sid, ++ .free_sid = sctp_sched_fcfs_free_sid, + .free = sctp_sched_fcfs_free, + .enqueue = sctp_sched_fcfs_enqueue, + .dequeue = sctp_sched_fcfs_dequeue, +@@ -163,7 +168,7 @@ int sctp_sched_set_sched(struct sctp_association *asoc, if (!SCTP_SO(&asoc->stream, i)->ext) continue; @@ -406229,6 +524054,179 @@ index 99e5f69fbb742..a2e1d34f52c5b 100644 if (ret) goto err; } +diff --git a/net/sctp/stream_sched_prio.c b/net/sctp/stream_sched_prio.c +index 80b5a2c4cbc7b..4fc9f2923ed11 100644 +--- a/net/sctp/stream_sched_prio.c ++++ b/net/sctp/stream_sched_prio.c +@@ -204,6 +204,24 @@ static int sctp_sched_prio_init_sid(struct sctp_stream *stream, __u16 sid, + return sctp_sched_prio_set(stream, sid, 0, gfp); + } + ++static void sctp_sched_prio_free_sid(struct sctp_stream *stream, __u16 sid) ++{ ++ struct sctp_stream_priorities *prio = SCTP_SO(stream, sid)->ext->prio_head; ++ int i; ++ ++ if (!prio) ++ return; ++ ++ SCTP_SO(stream, sid)->ext->prio_head = NULL; ++ for (i = 0; i < stream->outcnt; i++) { ++ if (SCTP_SO(stream, i)->ext && ++ SCTP_SO(stream, i)->ext->prio_head == prio) ++ return; ++ } ++ ++ kfree(prio); ++} ++ + static void sctp_sched_prio_free(struct sctp_stream *stream) + { + struct sctp_stream_priorities *prio, *n; +@@ -323,6 +341,7 @@ static struct sctp_sched_ops sctp_sched_prio = { + .get = sctp_sched_prio_get, + .init = sctp_sched_prio_init, + .init_sid = sctp_sched_prio_init_sid, ++ .free_sid = sctp_sched_prio_free_sid, + .free = sctp_sched_prio_free, + .enqueue = sctp_sched_prio_enqueue, + .dequeue = sctp_sched_prio_dequeue, +diff --git a/net/sctp/stream_sched_rr.c b/net/sctp/stream_sched_rr.c +index ff425aed62c7f..cc444fe0d67c2 100644 +--- a/net/sctp/stream_sched_rr.c ++++ b/net/sctp/stream_sched_rr.c +@@ -90,6 +90,10 @@ static int sctp_sched_rr_init_sid(struct sctp_stream *stream, __u16 sid, + return 0; + } + ++static void sctp_sched_rr_free_sid(struct sctp_stream *stream, __u16 sid) ++{ ++} ++ + static void sctp_sched_rr_free(struct sctp_stream *stream) + { + sctp_sched_rr_unsched_all(stream); +@@ -177,6 +181,7 @@ static struct sctp_sched_ops sctp_sched_rr = { + .get = sctp_sched_rr_get, + .init = sctp_sched_rr_init, + .init_sid = sctp_sched_rr_init_sid, ++ .free_sid = sctp_sched_rr_free_sid, + .free = sctp_sched_rr_free, + .enqueue = sctp_sched_rr_enqueue, + .dequeue = sctp_sched_rr_dequeue, +diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c +index b46a416787ec3..43ebf090029d7 100644 +--- a/net/sctp/sysctl.c ++++ b/net/sctp/sysctl.c +@@ -84,17 +84,18 @@ static struct ctl_table sctp_table[] = { + { /* sentinel */ } + }; + ++/* The following index defines are used in sctp_sysctl_net_register(). ++ * If you add new items to the sctp_net_table, please ensure that ++ * the index values of these defines hold the same meaning indicated by ++ * their macro names when they appear in sctp_net_table. ++ */ ++#define SCTP_RTO_MIN_IDX 0 ++#define SCTP_RTO_MAX_IDX 1 ++#define SCTP_PF_RETRANS_IDX 2 ++#define SCTP_PS_RETRANS_IDX 3 ++ + static struct ctl_table sctp_net_table[] = { +- { +- .procname = "rto_initial", +- .data = &init_net.sctp.rto_initial, +- .maxlen = sizeof(unsigned int), +- .mode = 0644, +- .proc_handler = proc_dointvec_minmax, +- .extra1 = SYSCTL_ONE, +- .extra2 = &timer_max +- }, +- { ++ [SCTP_RTO_MIN_IDX] = { + .procname = "rto_min", + .data = &init_net.sctp.rto_min, + .maxlen = sizeof(unsigned int), +@@ -103,7 +104,7 @@ static struct ctl_table sctp_net_table[] = { + .extra1 = SYSCTL_ONE, + .extra2 = &init_net.sctp.rto_max + }, +- { ++ [SCTP_RTO_MAX_IDX] = { + .procname = "rto_max", + .data = &init_net.sctp.rto_max, + .maxlen = sizeof(unsigned int), +@@ -112,6 +113,33 @@ static struct ctl_table sctp_net_table[] = { + .extra1 = &init_net.sctp.rto_min, + .extra2 = &timer_max + }, ++ [SCTP_PF_RETRANS_IDX] = { ++ .procname = "pf_retrans", ++ .data = &init_net.sctp.pf_retrans, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = proc_dointvec_minmax, ++ .extra1 = SYSCTL_ZERO, ++ .extra2 = &init_net.sctp.ps_retrans, ++ }, ++ [SCTP_PS_RETRANS_IDX] = { ++ .procname = "ps_retrans", ++ .data = &init_net.sctp.ps_retrans, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = proc_dointvec_minmax, ++ .extra1 = &init_net.sctp.pf_retrans, ++ .extra2 = &ps_retrans_max, ++ }, ++ { ++ .procname = "rto_initial", ++ .data = &init_net.sctp.rto_initial, ++ .maxlen = sizeof(unsigned int), ++ .mode = 0644, ++ .proc_handler = proc_dointvec_minmax, ++ .extra1 = SYSCTL_ONE, ++ .extra2 = &timer_max ++ }, + { + .procname = "rto_alpha_exp_divisor", + .data = &init_net.sctp.rto_alpha, +@@ -207,24 +235,6 @@ static struct ctl_table sctp_net_table[] = { + .extra1 = SYSCTL_ONE, + .extra2 = SYSCTL_INT_MAX, + }, +- { +- .procname = "pf_retrans", +- .data = &init_net.sctp.pf_retrans, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = proc_dointvec_minmax, +- .extra1 = SYSCTL_ZERO, +- .extra2 = &init_net.sctp.ps_retrans, +- }, +- { +- .procname = "ps_retrans", +- .data = &init_net.sctp.ps_retrans, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = proc_dointvec_minmax, +- .extra1 = &init_net.sctp.pf_retrans, +- .extra2 = &ps_retrans_max, +- }, + { + .procname = "sndbuf_policy", + .data = &init_net.sctp.sndbuf_policy, +@@ -586,6 +596,11 @@ int sctp_sysctl_net_register(struct net *net) + for (i = 0; table[i].data; i++) + table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp; + ++ table[SCTP_RTO_MIN_IDX].extra2 = &net->sctp.rto_max; ++ table[SCTP_RTO_MAX_IDX].extra1 = &net->sctp.rto_min; ++ table[SCTP_PF_RETRANS_IDX].extra2 = &net->sctp.ps_retrans; ++ table[SCTP_PS_RETRANS_IDX].extra1 = &net->sctp.pf_retrans; ++ + net->sctp.sysctl_header = register_net_sysctl(net, "net/sctp", table); + if (net->sctp.sysctl_header == NULL) { + kfree(table); diff --git a/net/sctp/transport.c b/net/sctp/transport.c index a3d3ca6dd63dd..133f1719bf1b7 100644 --- a/net/sctp/transport.c @@ -406280,7 +524278,7 @@ index a3d3ca6dd63dd..133f1719bf1b7 100644 bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c -index 78b663dbfa1f9..26f81e2e1dfba 100644 +index 78b663dbfa1f9..d5ddf283ed8e2 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -148,14 +148,18 @@ static int __smc_release(struct smc_sock *smc) @@ -406738,6 +524736,32 @@ index 78b663dbfa1f9..26f81e2e1dfba 100644 } static int smc_ioctl(struct socket *sock, unsigned int cmd, +@@ -2559,14 +2744,14 @@ static int __init smc_init(void) + + rc = register_pernet_subsys(&smc_net_stat_ops); + if (rc) +- return rc; ++ goto out_pernet_subsys; + + smc_ism_init(); + smc_clc_init(); + + rc = smc_nl_init(); + if (rc) +- goto out_pernet_subsys; ++ goto out_pernet_subsys_stat; + + rc = smc_pnet_init(); + if (rc) +@@ -2644,6 +2829,8 @@ out_pnet: + smc_pnet_exit(); + out_nl: + smc_nl_exit(); ++out_pernet_subsys_stat: ++ unregister_pernet_subsys(&smc_net_stat_ops); + out_pernet_subsys: + unregister_pernet_subsys(&smc_net_ops); + diff --git a/net/smc/smc.h b/net/smc/smc.h index d65e15f0c944c..930544f7b2e2c 100644 --- a/net/smc/smc.h @@ -407817,6 +525841,50 @@ index a9f0d17fdb0d6..1bae32c482846 100644 continue; if (!rpcauth_unhash_cred(cred)) continue; +diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c +index 5f42aa5fc6128..2ff66a6a7e54c 100644 +--- a/net/sunrpc/auth_gss/auth_gss.c ++++ b/net/sunrpc/auth_gss/auth_gss.c +@@ -301,7 +301,7 @@ __gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth + list_for_each_entry(pos, &pipe->in_downcall, list) { + if (!uid_eq(pos->uid, uid)) + continue; +- if (auth && pos->auth->service != auth->service) ++ if (pos->auth->service != auth->service) + continue; + refcount_inc(&pos->count); + return pos; +@@ -685,6 +685,21 @@ out: + return err; + } + ++static struct gss_upcall_msg * ++gss_find_downcall(struct rpc_pipe *pipe, kuid_t uid) ++{ ++ struct gss_upcall_msg *pos; ++ list_for_each_entry(pos, &pipe->in_downcall, list) { ++ if (!uid_eq(pos->uid, uid)) ++ continue; ++ if (!rpc_msg_is_inflight(&pos->msg)) ++ continue; ++ refcount_inc(&pos->count); ++ return pos; ++ } ++ return NULL; ++} ++ + #define MSG_BUF_MAXSIZE 1024 + + static ssize_t +@@ -731,7 +746,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) + err = -ENOENT; + /* Find a matching upcall */ + spin_lock(&pipe->lock); +- gss_msg = __gss_find_upcall(pipe, uid, NULL); ++ gss_msg = gss_find_downcall(pipe, uid); + if (gss_msg == NULL) { + spin_unlock(&pipe->lock); + goto err_put_ctx; diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c index 61c276bddaf25..f549e4c05defc 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -407829,6 +525897,36 @@ index 61c276bddaf25..f549e4c05defc 100644 RPC_CLNT_CREATE_NO_IDLE_TIMEOUT }; struct rpc_clnt *clnt; +diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c +index 1f2817195549b..48b608cb5f5ec 100644 +--- a/net/sunrpc/auth_gss/svcauth_gss.c ++++ b/net/sunrpc/auth_gss/svcauth_gss.c +@@ -1162,18 +1162,23 @@ static int gss_read_proxy_verf(struct svc_rqst *rqstp, + return res; + + inlen = svc_getnl(argv); +- if (inlen > (argv->iov_len + rqstp->rq_arg.page_len)) ++ if (inlen > (argv->iov_len + rqstp->rq_arg.page_len)) { ++ kfree(in_handle->data); + return SVC_DENIED; ++ } + + pages = DIV_ROUND_UP(inlen, PAGE_SIZE); + in_token->pages = kcalloc(pages, sizeof(struct page *), GFP_KERNEL); +- if (!in_token->pages) ++ if (!in_token->pages) { ++ kfree(in_handle->data); + return SVC_DENIED; ++ } + in_token->page_base = 0; + in_token->page_len = inlen; + for (i = 0; i < pages; i++) { + in_token->pages[i] = alloc_page(GFP_KERNEL); + if (!in_token->pages[i]) { ++ kfree(in_handle->data); + gss_free_in_token_pages(in_token); + return SVC_DENIED; + } diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 22a2c235abf1b..77e347a45344c 100644 --- a/net/sunrpc/backchannel_rqst.c @@ -407862,7 +525960,7 @@ index 22a2c235abf1b..77e347a45344c 100644 xprt->bc_alloc_count++; atomic_inc(&xprt->bc_slot_count); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c -index f056ff9314442..ca2a494d727b2 100644 +index f056ff9314442..bbeb80e1133df 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -76,6 +76,7 @@ static int rpc_encode_header(struct rpc_task *task, @@ -407910,6 +526008,15 @@ index f056ff9314442..ca2a494d727b2 100644 if (task->tk_flags & RPC_TASK_NO_ROUND_ROBIN) task->tk_xprt = rpc_task_get_first_xprt(clnt); else +@@ -1362,7 +1375,7 @@ static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen, + break; + default: + err = -EAFNOSUPPORT; +- goto out; ++ goto out_release; + } + if (err < 0) { + dprintk("RPC: can't bind UDP socket (%d)\n", err); @@ -1868,7 +1881,7 @@ call_encode(struct rpc_task *task) break; case -EKEYEXPIRED: @@ -408144,7 +526251,7 @@ index 478f857cdaed4..6ea3d87e11475 100644 ret = kernel_sendmsg(sock, &msg, &rm, 1, rm.iov_len); if (ret < 0) diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c -index 9a6f17e18f73b..a7020b1f3ec72 100644 +index 9a6f17e18f73b..55da1b627a7db 100644 --- a/net/sunrpc/sysfs.c +++ b/net/sunrpc/sysfs.c @@ -282,8 +282,10 @@ static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj, @@ -408218,6 +526325,44 @@ index 9a6f17e18f73b..a7020b1f3ec72 100644 } } else { count = -EINVAL; +@@ -518,13 +525,16 @@ void rpc_sysfs_client_setup(struct rpc_clnt *clnt, + struct net *net) + { + struct rpc_sysfs_client *rpc_client; ++ struct rpc_sysfs_xprt_switch *xswitch = ++ (struct rpc_sysfs_xprt_switch *)xprt_switch->xps_sysfs; ++ ++ if (!xswitch) ++ return; + + rpc_client = rpc_sysfs_client_alloc(rpc_sunrpc_client_kobj, + net, clnt->cl_clid); + if (rpc_client) { + char name[] = "switch"; +- struct rpc_sysfs_xprt_switch *xswitch = +- (struct rpc_sysfs_xprt_switch *)xprt_switch->xps_sysfs; + int ret; + + clnt->cl_sysfs = rpc_client; +@@ -558,6 +568,8 @@ void rpc_sysfs_xprt_switch_setup(struct rpc_xprt_switch *xprt_switch, + rpc_xprt_switch->xprt_switch = xprt_switch; + rpc_xprt_switch->xprt = xprt; + kobject_uevent(&rpc_xprt_switch->kobject, KOBJ_ADD); ++ } else { ++ xprt_switch->xps_sysfs = NULL; + } + } + +@@ -569,6 +581,9 @@ void rpc_sysfs_xprt_setup(struct rpc_xprt_switch *xprt_switch, + struct rpc_sysfs_xprt_switch *switch_obj = + (struct rpc_sysfs_xprt_switch *)xprt_switch->xps_sysfs; + ++ if (!switch_obj) ++ return; ++ + rpc_xprt = rpc_sysfs_xprt_alloc(&switch_obj->kobject, xprt, gfp_flags); + if (rpc_xprt) { + xprt->xprt_sysfs = rpc_xprt; diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index ca10ba2626f27..f0a0a4ad6d525 100644 --- a/net/sunrpc/xdr.c @@ -408437,7 +526582,7 @@ index 16e5696314a4f..32df237967472 100644 if (!rpcrdma_check_regbuf(r_xprt, req->rl_sendbuf, rqst->rq_callsize, flags)) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c -index aaec3c9be8db6..1295f9ab839fd 100644 +index aaec3c9be8db6..507ba8b799920 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -438,6 +438,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) @@ -408464,6 +526609,15 @@ index aaec3c9be8db6..1295f9ab839fd 100644 goto out_destroy; } +@@ -855,7 +858,7 @@ struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size, + return req; + + out3: +- kfree(req->rl_sendbuf); ++ rpcrdma_regbuf_free(req->rl_sendbuf); + out2: + kfree(req); + out1: diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 04f1b78bcbca3..897dfce7dd271 100644 --- a/net/sunrpc/xprtsock.c @@ -408719,7 +526873,7 @@ index 3f4542e0f0650..434e70eabe081 100644 tipc_nametbl_stop(net); tipc_sk_rht_destroy(net); diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c -index dc60c32bb70df..b5074957e8812 100644 +index dc60c32bb70df..4243d2ab8adfb 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -597,6 +597,10 @@ static int tipc_aead_init(struct tipc_aead **aead, struct tipc_aead_key *ukey, @@ -408733,7 +526887,17 @@ index dc60c32bb70df..b5074957e8812 100644 memcpy(&tmp->salt, ukey->key + keylen, TIPC_AES_GCM_SALT_SIZE); atomic_set(&tmp->users, 0); atomic64_set(&tmp->seqno, 0); -@@ -2283,7 +2287,7 @@ static bool tipc_crypto_key_rcv(struct tipc_crypto *rx, struct tipc_msg *hdr) +@@ -1978,6 +1982,9 @@ rcv: + /* Ok, everything's fine, try to synch own keys according to peers' */ + tipc_crypto_key_synch(rx, *skb); + ++ /* Re-fetch skb cb as skb might be changed in tipc_msg_validate */ ++ skb_cb = TIPC_SKB_CB(*skb); ++ + /* Mark skb decrypted */ + skb_cb->decrypted = 1; + +@@ -2283,7 +2290,7 @@ static bool tipc_crypto_key_rcv(struct tipc_crypto *rx, struct tipc_msg *hdr) struct tipc_crypto *tx = tipc_net(rx->net)->crypto_tx; struct tipc_aead_key *skey = NULL; u16 key_gen = msg_key_gen(hdr); @@ -408743,7 +526907,7 @@ index dc60c32bb70df..b5074957e8812 100644 unsigned int keylen; diff --git a/net/tipc/discover.c b/net/tipc/discover.c -index da69e1abf68ff..e8630707901e3 100644 +index da69e1abf68ff..e8dcdf267c0c3 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -148,8 +148,8 @@ static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d, @@ -408756,8 +526920,20 @@ index da69e1abf68ff..e8630707901e3 100644 if (mtyp == DSC_TRIAL_FAIL_MSG) { if (!trial) +@@ -211,7 +211,10 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb, + u32 self; + int err; + +- skb_linearize(skb); ++ if (skb_linearize(skb)) { ++ kfree_skb(skb); ++ return; ++ } + hdr = buf_msg(skb); + + if (caps & TIPC_NODE_ID128) diff --git a/net/tipc/link.c b/net/tipc/link.c -index 1b7a487c88419..115a4a7950f50 100644 +index 1b7a487c88419..8fdd3b23bd123 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1298,8 +1298,11 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, @@ -408783,7 +526959,7 @@ index 1b7a487c88419..115a4a7950f50 100644 u16 peers_snd_nxt = msg_next_sent(hdr); u16 peers_tol = msg_link_tolerance(hdr); u16 peers_prio = msg_linkprio(hdr); -@@ -2210,6 +2213,10 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, +@@ -2210,13 +2213,19 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, void *data; trace_tipc_proto_rcv(skb, false, l->name); @@ -408794,7 +526970,17 @@ index 1b7a487c88419..115a4a7950f50 100644 if (tipc_link_is_blocked(l) || !xmitq) goto exit; -@@ -2278,6 +2285,11 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, + if (tipc_own_addr(l->net) > msg_prevnode(hdr)) + l->net_plane = msg_net_plane(hdr); + +- skb_linearize(skb); ++ if (skb_linearize(skb)) ++ goto exit; ++ + hdr = buf_msg(skb); + data = msg_data(hdr); + +@@ -2278,6 +2287,11 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, break; case STATE_MSG: @@ -408806,7 +526992,7 @@ index 1b7a487c88419..115a4a7950f50 100644 l->rcv_nxt_state = msg_seqno(hdr) + 1; /* Update own tolerance if peer indicates a non-zero value */ -@@ -2303,9 +2315,6 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, +@@ -2303,9 +2317,6 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, break; } @@ -408864,8 +527050,21 @@ index 01396dd1c899b..1d8ba233d0474 100644 return -EPIPE; } else { p = list_first_entry(&sr->all_publ, +diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c +index 0749df80454d4..ce00f271ca6b2 100644 +--- a/net/tipc/netlink_compat.c ++++ b/net/tipc/netlink_compat.c +@@ -880,7 +880,7 @@ static int tipc_nl_compat_name_table_dump_header(struct tipc_nl_compat_msg *msg) + }; + + ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req); +- if (TLV_GET_DATA_LEN(msg->req) < sizeof(struct tipc_name_table_query)) ++ if (TLV_GET_DATA_LEN(msg->req) < (int)sizeof(struct tipc_name_table_query)) + return -EINVAL; + + depth = ntohl(ntq->depth); diff --git a/net/tipc/node.c b/net/tipc/node.c -index 9947b7dfe1d2d..b48d97cbbe29c 100644 +index 9947b7dfe1d2d..5e000fde80676 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -403,7 +403,7 @@ static void tipc_node_write_unlock(struct tipc_node *n) @@ -408954,7 +527153,7 @@ index 9947b7dfe1d2d..b48d97cbbe29c 100644 tipc_node_get(n); timer_setup(&n->timer, tipc_node_timeout, 0); /* Start a slow timer anyway, crypto needs it */ -@@ -1154,7 +1174,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, +@@ -1154,13 +1174,14 @@ void tipc_node_check_dest(struct net *net, u32 addr, bool *respond, bool *dupl_addr) { struct tipc_node *n; @@ -408963,7 +527162,15 @@ index 9947b7dfe1d2d..b48d97cbbe29c 100644 struct tipc_link_entry *le; bool addr_match = false; bool sign_match = false; -@@ -1174,22 +1194,6 @@ void tipc_node_check_dest(struct net *net, u32 addr, + bool link_up = false; ++ bool link_is_reset = false; + bool accept_addr = false; +- bool reset = true; ++ bool reset = false; + char *if_name; + unsigned long intv; + u16 session; +@@ -1174,36 +1195,20 @@ void tipc_node_check_dest(struct net *net, u32 addr, return; tipc_node_write_lock(n); @@ -408986,6 +527193,90 @@ index 9947b7dfe1d2d..b48d97cbbe29c 100644 le = &n->links[b->identity]; + /* Prepare to validate requesting node's signature and media address */ + l = le->link; + link_up = l && tipc_link_is_up(l); ++ link_is_reset = l && tipc_link_is_reset(l); + addr_match = l && !memcmp(&le->maddr, maddr, sizeof(*maddr)); + sign_match = (signature == n->signature); + + /* These three flags give us eight permutations: */ + + if (sign_match && addr_match && link_up) { +- /* All is fine. Do nothing. */ +- reset = false; ++ /* All is fine. Ignore requests. */ + /* Peer node is not a container/local namespace */ + if (!n->peer_hash_mix) + n->peer_hash_mix = hash_mixes; +@@ -1228,6 +1233,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, + */ + accept_addr = true; + *respond = true; ++ reset = true; + } else if (!sign_match && addr_match && link_up) { + /* Peer node rebooted. Two possibilities: + * - Delayed re-discovery; this link endpoint has already +@@ -1259,6 +1265,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, + n->signature = signature; + accept_addr = true; + *respond = true; ++ reset = true; + } + + if (!accept_addr) +@@ -1287,6 +1294,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, + tipc_link_fsm_evt(l, LINK_RESET_EVT); + if (n->state == NODE_FAILINGOVER) + tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); ++ link_is_reset = tipc_link_is_reset(l); + le->link = l; + n->link_cnt++; + tipc_node_calculate_timer(n, l); +@@ -1299,7 +1307,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, + memcpy(&le->maddr, maddr, sizeof(*maddr)); + exit: + tipc_node_write_unlock(n); +- if (reset && l && !tipc_link_is_reset(l)) ++ if (reset && !link_is_reset) + tipc_node_link_down(n, b->identity, false); + tipc_node_put(n); + } +@@ -1685,6 +1693,7 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, + struct tipc_node *n; + struct sk_buff_head xmitq; + bool node_up = false; ++ struct net *peer_net; + int bearer_id; + int rc; + +@@ -1701,18 +1710,23 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, + return -EHOSTUNREACH; + } + ++ rcu_read_lock(); + tipc_node_read_lock(n); + node_up = node_is_up(n); +- if (node_up && n->peer_net && check_net(n->peer_net)) { ++ peer_net = n->peer_net; ++ tipc_node_read_unlock(n); ++ if (node_up && peer_net && check_net(peer_net)) { + /* xmit inner linux container */ +- tipc_lxc_xmit(n->peer_net, list); ++ tipc_lxc_xmit(peer_net, list); + if (likely(skb_queue_empty(list))) { +- tipc_node_read_unlock(n); ++ rcu_read_unlock(); + tipc_node_put(n); + return 0; + } + } ++ rcu_read_unlock(); + ++ tipc_node_read_lock(n); + bearer_id = n->active_links[selector & 1]; + if (unlikely(bearer_id == INVALID_BEARER_ID)) { + tipc_node_read_unlock(n); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index ad570c2450be8..f1c3b8eb4b3d3 100644 --- a/net/tipc/socket.c @@ -409036,18 +527327,119 @@ index ad570c2450be8..f1c3b8eb4b3d3 100644 * this means that setting prev_seq here will cause the * consistence check to fail in the netlink callback diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c -index 5522865deae95..14fd05fd6107d 100644 +index 5522865deae95..e3b427a703980 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c -@@ -568,7 +568,7 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, +@@ -176,7 +176,7 @@ static void tipc_conn_close(struct tipc_conn *con) + conn_put(con); + } + +-static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s) ++static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s, struct socket *sock) + { + struct tipc_conn *con; + int ret; +@@ -202,10 +202,12 @@ static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s) + } + con->conid = ret; + s->idr_in_use++; +- spin_unlock_bh(&s->idr_lock); + + set_bit(CF_CONNECTED, &con->flags); + con->server = s; ++ con->sock = sock; ++ conn_get(con); ++ spin_unlock_bh(&s->idr_lock); + + return con; + } +@@ -450,17 +452,24 @@ static void tipc_conn_data_ready(struct sock *sk) + static void tipc_topsrv_accept(struct work_struct *work) + { + struct tipc_topsrv *srv = container_of(work, struct tipc_topsrv, awork); +- struct socket *lsock = srv->listener; +- struct socket *newsock; ++ struct socket *newsock, *lsock; + struct tipc_conn *con; + struct sock *newsk; + int ret; + ++ spin_lock_bh(&srv->idr_lock); ++ if (!srv->listener) { ++ spin_unlock_bh(&srv->idr_lock); ++ return; ++ } ++ lsock = srv->listener; ++ spin_unlock_bh(&srv->idr_lock); ++ + while (1) { + ret = kernel_accept(lsock, &newsock, O_NONBLOCK); + if (ret < 0) + return; +- con = tipc_conn_alloc(srv); ++ con = tipc_conn_alloc(srv, newsock); + if (IS_ERR(con)) { + ret = PTR_ERR(con); + sock_release(newsock); +@@ -472,11 +481,11 @@ static void tipc_topsrv_accept(struct work_struct *work) + newsk->sk_data_ready = tipc_conn_data_ready; + newsk->sk_write_space = tipc_conn_write_space; + newsk->sk_user_data = con; +- con->sock = newsock; + write_unlock_bh(&newsk->sk_callback_lock); + + /* Wake up receive process in case of 'SYN+' message */ + newsk->sk_data_ready(newsk); ++ conn_put(con); + } + } + +@@ -489,7 +498,7 @@ static void tipc_topsrv_listener_data_ready(struct sock *sk) + + read_lock_bh(&sk->sk_callback_lock); + srv = sk->sk_user_data; +- if (srv->listener) ++ if (srv) + queue_work(srv->rcv_wq, &srv->awork); + read_unlock_bh(&sk->sk_callback_lock); + } +@@ -568,19 +577,19 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, sub.seq.upper = upper; sub.timeout = TIPC_WAIT_FOREVER; sub.filter = filter; - *(u32 *)&sub.usr_handle = port; + *(u64 *)&sub.usr_handle = (u64)port; - con = tipc_conn_alloc(tipc_topsrv(net)); +- con = tipc_conn_alloc(tipc_topsrv(net)); ++ con = tipc_conn_alloc(tipc_topsrv(net), NULL); if (IS_ERR(con)) + return false; + + *conid = con->conid; +- con->sock = NULL; + rc = tipc_conn_rcv_sub(tipc_topsrv(net), con, &sub); +- if (rc >= 0) +- return true; ++ if (rc) ++ conn_put(con); ++ + conn_put(con); +- return false; ++ return !rc; + } + + void tipc_topsrv_kern_unsubscr(struct net *net, int conid) +@@ -699,8 +708,9 @@ static void tipc_topsrv_stop(struct net *net) + __module_get(lsock->sk->sk_prot_creator->owner); + srv->listener = NULL; + spin_unlock_bh(&srv->idr_lock); +- sock_release(lsock); ++ + tipc_topsrv_work_stop(srv); ++ sock_release(lsock); + idr_destroy(&srv->conn_idr); + kfree(srv); + } diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index b932469ee69cc..cf75969375cfa 100644 --- a/net/tls/tls_device.c @@ -409243,7 +527635,7 @@ index 9ab81db8a6545..a947cfb100bda 100644 return 0; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c -index 1b08b877a8900..794ef3b3d7d4b 100644 +index 1b08b877a8900..c0fea678abb1c 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -515,7 +515,7 @@ static int tls_do_encryption(struct sock *sk, @@ -409255,7 +527647,34 @@ index 1b08b877a8900..794ef3b3d7d4b 100644 sge->offset += prot->prepend_size; sge->length -= prot->prepend_size; -@@ -1483,11 +1483,11 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, +@@ -801,7 +801,7 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk, + struct sk_psock *psock; + struct sock *sk_redir; + struct tls_rec *rec; +- bool enospc, policy; ++ bool enospc, policy, redir_ingress; + int err = 0, send; + u32 delta = 0; + +@@ -846,6 +846,7 @@ more_data: + } + break; + case __SK_REDIRECT: ++ redir_ingress = psock->redir_ingress; + sk_redir = psock->sk_redir; + memcpy(&msg_redir, msg, sizeof(*msg)); + if (msg->apply_bytes < send) +@@ -855,7 +856,8 @@ more_data: + sk_msg_return_zero(sk, msg, send); + msg->sg.size -= send; + release_sock(sk); +- err = tcp_bpf_sendmsg_redir(sk_redir, &msg_redir, send, flags); ++ err = tcp_bpf_sendmsg_redir(sk_redir, redir_ingress, ++ &msg_redir, send, flags); + lock_sock(sk); + if (err < 0) { + *copied -= sk_msg_free_nocharge(sk, &msg_redir); +@@ -1483,11 +1485,11 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, if (prot->version == TLS_1_3_VERSION || prot->cipher_type == TLS_CIPHER_CHACHA20_POLY1305) memcpy(iv + iv_offset, tls_ctx->rx.iv, @@ -409269,7 +527688,7 @@ index 1b08b877a8900..794ef3b3d7d4b 100644 /* Prepare AAD */ tls_make_aad(aad, rxm->full_len - prot->overhead_size + -@@ -1993,6 +1993,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, +@@ -1993,6 +1995,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, struct sock *sk = sock->sk; struct sk_buff *skb; ssize_t copied = 0; @@ -409277,7 +527696,7 @@ index 1b08b877a8900..794ef3b3d7d4b 100644 int err = 0; long timeo; int chunk; -@@ -2002,25 +2003,28 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, +@@ -2002,25 +2005,28 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, timeo = sock_rcvtimeo(sk, flags & SPLICE_F_NONBLOCK); @@ -409318,7 +527737,7 @@ index 1b08b877a8900..794ef3b3d7d4b 100644 rxm = strp_msg(skb); chunk = min_t(unsigned int, rxm->full_len, len); -@@ -2028,7 +2032,17 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, +@@ -2028,7 +2034,17 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, if (copied < 0) goto splice_read_end; @@ -409338,7 +527757,7 @@ index 1b08b877a8900..794ef3b3d7d4b 100644 splice_read_end: release_sock(sk); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c -index 78e08e82c08c4..b7be8d066753c 100644 +index 78e08e82c08c4..0a59a00cb5815 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -446,7 +446,7 @@ static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) @@ -409350,7 +527769,57 @@ index 78e08e82c08c4..b7be8d066753c 100644 return 1; if (connected) -@@ -1996,7 +1996,7 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other +@@ -504,12 +504,6 @@ static void unix_sock_destructor(struct sock *sk) + + skb_queue_purge(&sk->sk_receive_queue); + +-#if IS_ENABLED(CONFIG_AF_UNIX_OOB) +- if (u->oob_skb) { +- kfree_skb(u->oob_skb); +- u->oob_skb = NULL; +- } +-#endif + WARN_ON(refcount_read(&sk->sk_wmem_alloc)); + WARN_ON(!sk_unhashed(sk)); + WARN_ON(sk->sk_socket); +@@ -556,6 +550,13 @@ static void unix_release_sock(struct sock *sk, int embrion) + + unix_state_unlock(sk); + ++#if IS_ENABLED(CONFIG_AF_UNIX_OOB) ++ if (u->oob_skb) { ++ kfree_skb(u->oob_skb); ++ u->oob_skb = NULL; ++ } ++#endif ++ + wake_up_interruptible_all(&u->peer_wait); + + if (skpair != NULL) { +@@ -1864,13 +1865,20 @@ restart_locked: + unix_state_lock(sk); + + err = 0; +- if (unix_peer(sk) == other) { ++ if (sk->sk_type == SOCK_SEQPACKET) { ++ /* We are here only when racing with unix_release_sock() ++ * is clearing @other. Never change state to TCP_CLOSE ++ * unlike SOCK_DGRAM wants. ++ */ ++ unix_state_unlock(sk); ++ err = -EPIPE; ++ } else if (unix_peer(sk) == other) { + unix_peer(sk) = NULL; + unix_dgram_peer_wake_disconnect_wakeup(sk, other); + ++ sk->sk_state = TCP_CLOSE; + unix_state_unlock(sk); + +- sk->sk_state = TCP_CLOSE; + unix_dgram_disconnected(sk, other); + sock_put(other); + err = -ECONNREFUSED; +@@ -1996,7 +2004,7 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other if (ousk->oob_skb) consume_skb(ousk->oob_skb); @@ -409359,7 +527828,7 @@ index 78e08e82c08c4..b7be8d066753c 100644 scm_stat_add(other, skb); skb_queue_tail(&other->sk_receive_queue, skb); -@@ -2514,9 +2514,8 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state) +@@ -2514,9 +2522,8 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state) oob_skb = u->oob_skb; @@ -409371,7 +527840,7 @@ index 78e08e82c08c4..b7be8d066753c 100644 unix_state_unlock(sk); -@@ -2551,7 +2550,7 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, +@@ -2551,7 +2558,7 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, skb = NULL; } else if (sock_flag(sk, SOCK_URGINLINE)) { if (!(flags & MSG_PEEK)) { @@ -409380,7 +527849,7 @@ index 78e08e82c08c4..b7be8d066753c 100644 consume_skb(skb); } } else if (!(flags & MSG_PEEK)) { -@@ -2882,9 +2881,6 @@ static int unix_shutdown(struct socket *sock, int mode) +@@ -2882,9 +2889,6 @@ static int unix_shutdown(struct socket *sock, int mode) unix_state_lock(sk); sk->sk_shutdown |= mode; @@ -409390,7 +527859,7 @@ index 78e08e82c08c4..b7be8d066753c 100644 other = unix_peer(sk); if (other) sock_hold(other); -@@ -3009,11 +3005,10 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +@@ -3009,11 +3013,10 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCATMARK: { struct sk_buff *skb; @@ -409403,7 +527872,7 @@ index 78e08e82c08c4..b7be8d066753c 100644 answ = 1; err = put_user(answ, (int __user *)arg); } -@@ -3054,6 +3049,10 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa +@@ -3054,6 +3057,10 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa mask |= EPOLLIN | EPOLLRDNORM; if (sk_is_readable(sk)) mask |= EPOLLIN | EPOLLRDNORM; @@ -409414,6 +527883,85 @@ index 78e08e82c08c4..b7be8d066753c 100644 /* Connection-based need to check for termination and startup */ if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && +@@ -3401,6 +3408,7 @@ static int __init af_unix_init(void) + rc = proto_register(&unix_stream_proto, 1); + if (rc != 0) { + pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); ++ proto_unregister(&unix_dgram_proto); + goto out; + } + +diff --git a/net/unix/diag.c b/net/unix/diag.c +index 7e7d7f45685af..e534e327a6a5a 100644 +--- a/net/unix/diag.c ++++ b/net/unix/diag.c +@@ -113,14 +113,16 @@ static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb) + return nla_put(nlskb, UNIX_DIAG_RQLEN, sizeof(rql), &rql); + } + +-static int sk_diag_dump_uid(struct sock *sk, struct sk_buff *nlskb) ++static int sk_diag_dump_uid(struct sock *sk, struct sk_buff *nlskb, ++ struct user_namespace *user_ns) + { +- uid_t uid = from_kuid_munged(sk_user_ns(nlskb->sk), sock_i_uid(sk)); ++ uid_t uid = from_kuid_munged(user_ns, sock_i_uid(sk)); + return nla_put(nlskb, UNIX_DIAG_UID, sizeof(uid_t), &uid); + } + + static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req, +- u32 portid, u32 seq, u32 flags, int sk_ino) ++ struct user_namespace *user_ns, ++ u32 portid, u32 seq, u32 flags, int sk_ino) + { + struct nlmsghdr *nlh; + struct unix_diag_msg *rep; +@@ -166,7 +168,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r + goto out_nlmsg_trim; + + if ((req->udiag_show & UDIAG_SHOW_UID) && +- sk_diag_dump_uid(sk, skb)) ++ sk_diag_dump_uid(sk, skb, user_ns)) + goto out_nlmsg_trim; + + nlmsg_end(skb, nlh); +@@ -178,7 +180,8 @@ out_nlmsg_trim: + } + + static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req, +- u32 portid, u32 seq, u32 flags) ++ struct user_namespace *user_ns, ++ u32 portid, u32 seq, u32 flags) + { + int sk_ino; + +@@ -189,7 +192,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_r + if (!sk_ino) + return 0; + +- return sk_diag_fill(sk, skb, req, portid, seq, flags, sk_ino); ++ return sk_diag_fill(sk, skb, req, user_ns, portid, seq, flags, sk_ino); + } + + static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) +@@ -217,7 +220,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) + goto next; + if (!(req->udiag_states & (1 << sk->sk_state))) + goto next; +- if (sk_diag_dump(sk, skb, req, ++ if (sk_diag_dump(sk, skb, req, sk_user_ns(skb->sk), + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI) < 0) +@@ -285,7 +288,8 @@ again: + if (!rep) + goto out; + +- err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).portid, ++ err = sk_diag_fill(sk, rep, req, sk_user_ns(NETLINK_CB(in_skb).sk), ++ NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, 0, req->udiag_ino); + if (err < 0) { + nlmsg_free(rep); diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 12e2ddaf887f2..dc27635403932 100644 --- a/net/unix/garbage.c @@ -409513,7 +528061,7 @@ index 052ae709ce289..aa27a02478dc1 100644 user->unix_inflight--; spin_unlock(&unix_gc_lock); diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c -index e2c0cfb334d20..5d46036f3ad74 100644 +index e2c0cfb334d20..dc36a46ce0e75 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -333,7 +333,8 @@ void vsock_remove_sock(struct vsock_sock *vsk) @@ -409581,6 +528129,19 @@ index e2c0cfb334d20..5d46036f3ad74 100644 goto out_wait; } else if (timeout == 0) { err = -ETIMEDOUT; +@@ -1881,8 +1897,11 @@ static int vsock_connectible_wait_data(struct sock *sk, + err = 0; + transport = vsk->transport; + +- while ((data = vsock_connectible_has_data(vsk)) == 0) { ++ while (1) { + prepare_to_wait(sk_sleep(sk), wait, TASK_INTERRUPTIBLE); ++ data = vsock_connectible_has_data(vsk); ++ if (data != 0) ++ break; + + if (sk->sk_err != 0 || + (sk->sk_shutdown & RCV_SHUTDOWN) || diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 4f7c99dfd16cf..c5f936fbf876d 100644 --- a/net/vmw_vsock/virtio_transport.c @@ -409662,7 +528223,7 @@ index 59ee1be5a6dd3..3a12aee33e92f 100644 } EXPORT_SYMBOL_GPL(virtio_transport_free_pkt); diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c -index 7aef34e32bdf8..b17dc9745188e 100644 +index 7aef34e32bdf8..94c1112f1c8c3 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -75,6 +75,8 @@ static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID; @@ -409684,6 +528245,19 @@ index 7aef34e32bdf8..b17dc9745188e 100644 } static void vmci_transport_recv_pkt_work(struct work_struct *work) +@@ -1708,7 +1711,11 @@ static int vmci_transport_dgram_enqueue( + if (!dg) + return -ENOMEM; + +- memcpy_from_msg(VMCI_DG_PAYLOAD(dg), msg, len); ++ err = memcpy_from_msg(VMCI_DG_PAYLOAD(dg), msg, len); ++ if (err) { ++ kfree(dg); ++ return err; ++ } + + dg->dst = vmci_make_handle(remote_addr->svm_cid, + remote_addr->svm_port); diff --git a/net/wireless/core.c b/net/wireless/core.c index aaba847d79eb2..441136646f89a 100644 --- a/net/wireless/core.c @@ -409991,7 +528565,7 @@ index a3f387770f1bf..d642e3be4ee78 100644 struct genl_info *info, struct cfg80211_chan_def *chandef); diff --git a/net/wireless/reg.c b/net/wireless/reg.c -index df87c7f3a0492..54c13ea7d977e 100644 +index df87c7f3a0492..d0fbe822e7934 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -806,6 +806,8 @@ static int __init load_builtin_regdb_keys(void) @@ -410003,7 +528577,7 @@ index df87c7f3a0492..54c13ea7d977e 100644 static bool regdb_has_valid_signature(const u8 *data, unsigned int size) { const struct firmware *sig; -@@ -1077,6 +1079,8 @@ static void regdb_fw_cb(const struct firmware *fw, void *context) +@@ -1077,8 +1079,12 @@ static void regdb_fw_cb(const struct firmware *fw, void *context) release_firmware(fw); } @@ -410011,8 +528585,29 @@ index df87c7f3a0492..54c13ea7d977e 100644 + static int query_regdb_file(const char *alpha2) { ++ int err; ++ ASSERT_RTNL(); -@@ -2338,6 +2342,7 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) + + if (regdb) +@@ -1088,9 +1094,13 @@ static int query_regdb_file(const char *alpha2) + if (!alpha2) + return -ENOMEM; + +- return request_firmware_nowait(THIS_MODULE, true, "regulatory.db", +- ®_pdev->dev, GFP_KERNEL, +- (void *)alpha2, regdb_fw_cb); ++ err = request_firmware_nowait(THIS_MODULE, true, "regulatory.db", ++ ®_pdev->dev, GFP_KERNEL, ++ (void *)alpha2, regdb_fw_cb); ++ if (err) ++ kfree(alpha2); ++ ++ return err; + } + + int reg_reload_regdb(void) +@@ -2338,6 +2348,7 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) struct cfg80211_chan_def chandef = {}; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); enum nl80211_iftype iftype; @@ -410020,7 +528615,7 @@ index df87c7f3a0492..54c13ea7d977e 100644 wdev_lock(wdev); iftype = wdev->iftype; -@@ -2387,7 +2392,11 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) +@@ -2387,7 +2398,11 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_ADHOC: @@ -410033,8 +528628,20 @@ index df87c7f3a0492..54c13ea7d977e 100644 case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: return cfg80211_chandef_usable(wiphy, &chandef, +@@ -4232,8 +4247,10 @@ static int __init regulatory_init_db(void) + return -EINVAL; + + err = load_builtin_regdb_keys(); +- if (err) ++ if (err) { ++ platform_device_unregister(reg_pdev); + return err; ++ } + + /* We always try to get an update for the static regdomain */ + err = regulatory_hint_core(cfg80211_world_regdom->alpha2); diff --git a/net/wireless/scan.c b/net/wireless/scan.c -index adc0d14cfd860..f0de22a6caf72 100644 +index adc0d14cfd860..ef31e401d7914 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -143,18 +143,12 @@ static inline void bss_ref_get(struct cfg80211_registered_device *rdev, @@ -410072,7 +528679,17 @@ index adc0d14cfd860..f0de22a6caf72 100644 if (tmp_old[0] == 0) { tmp_old++; continue; -@@ -364,7 +359,8 @@ static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen, +@@ -335,7 +330,8 @@ static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen, + * determine if they are the same ie. + */ + if (tmp_old[0] == WLAN_EID_VENDOR_SPECIFIC) { +- if (!memcmp(tmp_old + 2, tmp + 2, 5)) { ++ if (tmp_old[1] >= 5 && tmp[1] >= 5 && ++ !memcmp(tmp_old + 2, tmp + 2, 5)) { + /* same vendor ie, copy from + * subelement + */ +@@ -364,7 +360,8 @@ static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen, * copied to new ie, skip ssid, capability, bssid-index ie */ tmp_new = sub_copy; @@ -410082,7 +528699,7 @@ index adc0d14cfd860..f0de22a6caf72 100644 if (!(tmp_new[0] == WLAN_EID_NON_TX_BSSID_CAP || tmp_new[0] == WLAN_EID_SSID)) { memcpy(pos, tmp_new, tmp_new[1] + 2); -@@ -429,6 +425,15 @@ cfg80211_add_nontrans_list(struct cfg80211_bss *trans_bss, +@@ -429,6 +426,15 @@ cfg80211_add_nontrans_list(struct cfg80211_bss *trans_bss, rcu_read_unlock(); @@ -410098,7 +528715,7 @@ index adc0d14cfd860..f0de22a6caf72 100644 /* add to the list */ list_add_tail(&nontrans_bss->nontrans_list, &trans_bss->nontrans_list); return 0; -@@ -702,8 +707,12 @@ static bool cfg80211_find_ssid_match(struct cfg80211_colocated_ap *ap, +@@ -702,8 +708,12 @@ static bool cfg80211_find_ssid_match(struct cfg80211_colocated_ap *ap, for (i = 0; i < request->n_ssids; i++) { /* wildcard ssid in the scan request */ @@ -410112,7 +528729,7 @@ index adc0d14cfd860..f0de22a6caf72 100644 if (ap->ssid_len && ap->ssid_len == request->ssids[i].ssid_len) { -@@ -829,6 +838,9 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev) +@@ -829,6 +839,9 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev) !cfg80211_find_ssid_match(ap, request)) continue; @@ -410122,7 +528739,7 @@ index adc0d14cfd860..f0de22a6caf72 100644 cfg80211_scan_req_add_chan(request, chan, true); memcpy(scan_6ghz_params->bssid, ap->bssid, ETH_ALEN); scan_6ghz_params->short_ssid = ap->short_ssid; -@@ -1597,6 +1609,23 @@ struct cfg80211_non_tx_bss { +@@ -1597,6 +1610,23 @@ struct cfg80211_non_tx_bss { u8 bssid_index; }; @@ -410146,7 +528763,7 @@ index adc0d14cfd860..f0de22a6caf72 100644 static bool cfg80211_update_known_bss(struct cfg80211_registered_device *rdev, struct cfg80211_internal_bss *known, -@@ -1620,7 +1649,6 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev, +@@ -1620,7 +1650,6 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev, kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head); } else if (rcu_access_pointer(new->pub.beacon_ies)) { const struct cfg80211_bss_ies *old; @@ -410154,7 +528771,7 @@ index adc0d14cfd860..f0de22a6caf72 100644 if (known->pub.hidden_beacon_bss && !list_empty(&known->hidden_list)) { -@@ -1648,16 +1676,7 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev, +@@ -1648,16 +1677,9 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev, if (old == rcu_access_pointer(known->pub.ies)) rcu_assign_pointer(known->pub.ies, new->pub.beacon_ies); @@ -410168,11 +528785,13 @@ index adc0d14cfd860..f0de22a6caf72 100644 - rcu_assign_pointer(bss->pub.beacon_ies, - new->pub.beacon_ies); - } -+ cfg80211_update_hidden_bsses(known, new->pub.beacon_ies, old); ++ cfg80211_update_hidden_bsses(known, ++ rcu_access_pointer(new->pub.beacon_ies), ++ old); if (old) kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head); -@@ -1734,6 +1753,8 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, +@@ -1734,6 +1756,8 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, new->refcount = 1; INIT_LIST_HEAD(&new->hidden_list); INIT_LIST_HEAD(&new->pub.nontrans_list); @@ -410181,7 +528800,7 @@ index adc0d14cfd860..f0de22a6caf72 100644 if (rcu_access_pointer(tmp->pub.proberesp_ies)) { hidden = rb_find_bss(rdev, tmp, BSS_CMP_HIDE_ZLEN); -@@ -1971,11 +1992,18 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, +@@ -1971,11 +1995,18 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, /* this is a nontransmitting bss, we need to add it to * transmitting bss' list if it is not there */ @@ -410201,7 +528820,7 @@ index adc0d14cfd860..f0de22a6caf72 100644 } trace_cfg80211_return_bss(&res->pub); -@@ -2094,6 +2122,8 @@ static void cfg80211_parse_mbssid_data(struct wiphy *wiphy, +@@ -2094,6 +2125,8 @@ static void cfg80211_parse_mbssid_data(struct wiphy *wiphy, for_each_element_id(elem, WLAN_EID_MULTIPLE_BSSID, ie, ielen) { if (elem->datalen < 4) continue; @@ -410210,7 +528829,7 @@ index adc0d14cfd860..f0de22a6caf72 100644 for_each_element(sub, elem->data + 1, elem->datalen - 1) { u8 profile_len; -@@ -2229,7 +2259,7 @@ cfg80211_update_notlisted_nontrans(struct wiphy *wiphy, +@@ -2229,7 +2262,7 @@ cfg80211_update_notlisted_nontrans(struct wiphy *wiphy, size_t new_ie_len; struct cfg80211_bss_ies *new_ies; const struct cfg80211_bss_ies *old; @@ -410219,7 +528838,7 @@ index adc0d14cfd860..f0de22a6caf72 100644 lockdep_assert_held(&wiphy_to_rdev(wiphy)->bss_lock); -@@ -2296,6 +2326,8 @@ cfg80211_update_notlisted_nontrans(struct wiphy *wiphy, +@@ -2296,6 +2329,8 @@ cfg80211_update_notlisted_nontrans(struct wiphy *wiphy, } else { old = rcu_access_pointer(nontrans_bss->beacon_ies); rcu_assign_pointer(nontrans_bss->beacon_ies, new_ies); @@ -410228,6 +528847,23 @@ index adc0d14cfd860..f0de22a6caf72 100644 rcu_assign_pointer(nontrans_bss->ies, new_ies); if (old) kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head); +@@ -2442,10 +2477,15 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, + const struct cfg80211_bss_ies *ies1, *ies2; + size_t ielen = len - offsetof(struct ieee80211_mgmt, + u.probe_resp.variable); +- struct cfg80211_non_tx_bss non_tx_data; ++ struct cfg80211_non_tx_bss non_tx_data = {}; + + res = cfg80211_inform_single_bss_frame_data(wiphy, data, mgmt, + len, gfp); ++ ++ /* don't do any further MBSSID handling for S1G */ ++ if (ieee80211_is_s1g_beacon(mgmt->frame_control)) ++ return res; ++ + if (!res || !wiphy->support_mbssid || + !cfg80211_find_ie(WLAN_EID_MULTIPLE_BSSID, ie, ielen)) + return res; diff --git a/net/wireless/util.c b/net/wireless/util.c index a1a99a5749844..cb15d7f4eb05a 100644 --- a/net/wireless/util.c @@ -410249,11 +528885,69 @@ index a1a99a5749844..cb15d7f4eb05a 100644 10239, /* 1.666666... */ 8532, /* 1.388888... */ 7680, /* 1.250000... */ +diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c +index 76a80a41615be..fe8765c4075d3 100644 +--- a/net/wireless/wext-core.c ++++ b/net/wireless/wext-core.c +@@ -468,6 +468,7 @@ void wireless_send_event(struct net_device * dev, + struct __compat_iw_event *compat_event; + struct compat_iw_point compat_wrqu; + struct sk_buff *compskb; ++ int ptr_len; + #endif + + /* +@@ -582,6 +583,9 @@ void wireless_send_event(struct net_device * dev, + nlmsg_end(skb, nlh); + #ifdef CONFIG_COMPAT + hdr_len = compat_event_type_size[descr->header_type]; ++ ++ /* ptr_len is remaining size in event header apart from LCP */ ++ ptr_len = hdr_len - IW_EV_COMPAT_LCP_LEN; + event_len = hdr_len + extra_len; + + compskb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); +@@ -612,16 +616,15 @@ void wireless_send_event(struct net_device * dev, + if (descr->header_type == IW_HEADER_TYPE_POINT) { + compat_wrqu.length = wrqu->data.length; + compat_wrqu.flags = wrqu->data.flags; +- memcpy(&compat_event->pointer, +- ((char *) &compat_wrqu) + IW_EV_COMPAT_POINT_OFF, +- hdr_len - IW_EV_COMPAT_LCP_LEN); ++ memcpy(compat_event->ptr_bytes, ++ ((char *)&compat_wrqu) + IW_EV_COMPAT_POINT_OFF, ++ ptr_len); + if (extra_len) +- memcpy(((char *) compat_event) + hdr_len, +- extra, extra_len); ++ memcpy(&compat_event->ptr_bytes[ptr_len], ++ extra, extra_len); + } else { + /* extra_len must be zero, so no if (extra) needed */ +- memcpy(&compat_event->pointer, wrqu, +- hdr_len - IW_EV_COMPAT_LCP_LEN); ++ memcpy(compat_event->ptr_bytes, wrqu, ptr_len); + } + + nlmsg_end(compskb, nlh); diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c -index 3583354a7d7fe..3a171828638b1 100644 +index 3583354a7d7fe..07f6206e7cb47 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c -@@ -1765,10 +1765,15 @@ void x25_kill_by_neigh(struct x25_neigh *nb) +@@ -482,6 +482,12 @@ static int x25_listen(struct socket *sock, int backlog) + int rc = -EOPNOTSUPP; + + lock_sock(sk); ++ if (sock->state != SS_UNCONNECTED) { ++ rc = -EINVAL; ++ release_sock(sk); ++ return rc; ++ } ++ + if (sk->sk_state != TCP_LISTEN) { + memset(&x25_sk(sk)->dest_addr, 0, X25_ADDR_LEN); + sk->sk_max_ack_backlog = backlog; +@@ -1765,10 +1771,15 @@ void x25_kill_by_neigh(struct x25_neigh *nb) write_lock_bh(&x25_list_lock); @@ -410272,6 +528966,19 @@ index 3583354a7d7fe..3a171828638b1 100644 write_unlock_bh(&x25_list_lock); /* Remove any related forwards */ +diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c +index 5259ef8f5242f..748d8630ab58b 100644 +--- a/net/x25/x25_dev.c ++++ b/net/x25/x25_dev.c +@@ -117,7 +117,7 @@ int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev, + + if (!pskb_may_pull(skb, 1)) { + x25_neigh_put(nb); +- return 0; ++ goto drop; + } + + switch (skb->data[0]) { diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index d6b500dc42084..330dd498fc61d 100644 --- a/net/xdp/xsk.c @@ -410730,10 +529437,18 @@ index 1f08ebf7d80c5..24ca49ecebea3 100644 __skb_queue_tail(&ctx->out_queue, skb); diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c -index 2bf2693901631..a0f62fa02e06e 100644 +index 2bf2693901631..8cbf45a8bcdc2 100644 --- a/net/xfrm/xfrm_compat.c +++ b/net/xfrm/xfrm_compat.c -@@ -127,6 +127,7 @@ static const struct nla_policy compat_policy[XFRMA_MAX+1] = { +@@ -5,6 +5,7 @@ + * Based on code and translator idea by: Florian Westphal <fw@strlen.de> + */ + #include <linux/compat.h> ++#include <linux/nospec.h> + #include <linux/xfrm.h> + #include <net/xfrm.h> + +@@ -127,6 +128,7 @@ static const struct nla_policy compat_policy[XFRMA_MAX+1] = { [XFRMA_SET_MARK] = { .type = NLA_U32 }, [XFRMA_SET_MARK_MASK] = { .type = NLA_U32 }, [XFRMA_IF_ID] = { .type = NLA_U32 }, @@ -410741,7 +529456,7 @@ index 2bf2693901631..a0f62fa02e06e 100644 }; static struct nlmsghdr *xfrm_nlmsg_put_compat(struct sk_buff *skb, -@@ -274,9 +275,10 @@ static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src) +@@ -274,9 +276,10 @@ static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src) case XFRMA_SET_MARK: case XFRMA_SET_MARK_MASK: case XFRMA_IF_ID: @@ -410753,7 +529468,16 @@ index 2bf2693901631..a0f62fa02e06e 100644 pr_warn_once("unsupported nla_type %d\n", src->nla_type); return -EOPNOTSUPP; } -@@ -431,7 +433,7 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla, +@@ -300,7 +303,7 @@ static int xfrm_xlate64(struct sk_buff *dst, const struct nlmsghdr *nlh_src) + nla_for_each_attr(nla, attrs, len, remaining) { + int err; + +- switch (type) { ++ switch (nlh_src->nlmsg_type) { + case XFRM_MSG_NEWSPDINFO: + err = xfrm_nla_cpy(dst, nla, nla_len(nla)); + break; +@@ -431,10 +434,11 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla, int err; if (type > XFRMA_MAX) { @@ -410762,11 +529486,44 @@ index 2bf2693901631..a0f62fa02e06e 100644 NL_SET_ERR_MSG(extack, "Bad attribute"); return -EOPNOTSUPP; } ++ type = array_index_nospec(type, XFRMA_MAX + 1); + if (nla_len(nla) < compat_policy[type].len) { + NL_SET_ERR_MSG(extack, "Attribute bad length"); + return -EOPNOTSUPP; diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c -index e843b0d9e2a61..c255aac6b816b 100644 +index e843b0d9e2a61..8b8e957a69c36 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c -@@ -223,6 +223,9 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, +@@ -97,6 +97,18 @@ static void xfrm_outer_mode_prep(struct xfrm_state *x, struct sk_buff *skb) + } + } + ++static inline bool xmit_xfrm_check_overflow(struct sk_buff *skb) ++{ ++ struct xfrm_offload *xo = xfrm_offload(skb); ++ __u32 seq = xo->seq.low; ++ ++ seq += skb_shinfo(skb)->gso_segs; ++ if (unlikely(seq < xo->seq.low)) ++ return true; ++ ++ return false; ++} ++ + struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again) + { + int err; +@@ -134,7 +146,8 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur + return skb; + } + +- if (skb_is_gso(skb) && unlikely(x->xso.dev != dev)) { ++ if (skb_is_gso(skb) && (unlikely(x->xso.dev != dev) || ++ unlikely(xmit_xfrm_check_overflow(skb)))) { + struct sk_buff *segs; + + /* Packet got rerouted, fixup features and segment it. */ +@@ -223,6 +236,9 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, if (x->encap || x->tfcpad) return -EINVAL; @@ -410776,7 +529533,7 @@ index e843b0d9e2a61..c255aac6b816b 100644 dev = dev_get_by_index(net, xuo->ifindex); if (!dev) { if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) { -@@ -261,7 +264,8 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, +@@ -261,7 +277,8 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, xso->dev = dev; xso->real_dev = dev; xso->num_exthdrs = 1; @@ -410787,7 +529544,7 @@ index e843b0d9e2a61..c255aac6b816b 100644 err = dev->xfrmdev_ops->xdo_dev_state_add(x); if (err) { diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c -index 3df0861d4390f..3d8668d62e639 100644 +index 3df0861d4390f..7c5958a2eed46 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -24,7 +24,8 @@ @@ -410800,7 +529557,17 @@ index 3df0861d4390f..3d8668d62e639 100644 struct sk_buff_head queue; }; -@@ -760,18 +761,22 @@ int xfrm_input_resume(struct sk_buff *skb, int nexthdr) +@@ -277,8 +278,7 @@ static int xfrm6_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb) + goto out; + + if (x->props.flags & XFRM_STATE_DECAP_DSCP) +- ipv6_copy_dscp(ipv6_get_dsfield(ipv6_hdr(skb)), +- ipipv6_hdr(skb)); ++ ipv6_copy_dscp(XFRM_MODE_SKB_CB(skb)->tos, ipipv6_hdr(skb)); + if (!(x->props.flags & XFRM_STATE_NOECN)) + ipip6_ecn_decapsulate(skb); + +@@ -760,18 +760,22 @@ int xfrm_input_resume(struct sk_buff *skb, int nexthdr) } EXPORT_SYMBOL(xfrm_input_resume); @@ -410825,7 +529592,7 @@ index 3df0861d4390f..3d8668d62e639 100644 } int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb, -@@ -782,15 +787,17 @@ int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb, +@@ -782,15 +786,17 @@ int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb, trans = this_cpu_ptr(&xfrm_trans_tasklet); @@ -410845,7 +529612,7 @@ index 3df0861d4390f..3d8668d62e639 100644 return 0; } EXPORT_SYMBOL(xfrm_trans_queue_net); -@@ -817,7 +824,8 @@ void __init xfrm_input_init(void) +@@ -817,7 +823,8 @@ void __init xfrm_input_init(void) struct xfrm_trans_tasklet *trans; trans = &per_cpu(xfrm_trans_tasklet, i); @@ -410856,10 +529623,63 @@ index 3df0861d4390f..3d8668d62e639 100644 } } diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c -index 41de46b5ffa94..1e8b26eecb3f8 100644 +index 41de46b5ffa94..694eec6ca147e 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c -@@ -304,7 +304,10 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) +@@ -207,6 +207,52 @@ static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet) + skb->mark = 0; + } + ++static int xfrmi_input(struct sk_buff *skb, int nexthdr, __be32 spi, ++ int encap_type, unsigned short family) ++{ ++ struct sec_path *sp; ++ ++ sp = skb_sec_path(skb); ++ if (sp && (sp->len || sp->olen) && ++ !xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family)) ++ goto discard; ++ ++ XFRM_SPI_SKB_CB(skb)->family = family; ++ if (family == AF_INET) { ++ XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); ++ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL; ++ } else { ++ XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); ++ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; ++ } ++ ++ return xfrm_input(skb, nexthdr, spi, encap_type); ++discard: ++ kfree_skb(skb); ++ return 0; ++} ++ ++static int xfrmi4_rcv(struct sk_buff *skb) ++{ ++ return xfrmi_input(skb, ip_hdr(skb)->protocol, 0, 0, AF_INET); ++} ++ ++static int xfrmi6_rcv(struct sk_buff *skb) ++{ ++ return xfrmi_input(skb, skb_network_header(skb)[IP6CB(skb)->nhoff], ++ 0, 0, AF_INET6); ++} ++ ++static int xfrmi4_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) ++{ ++ return xfrmi_input(skb, nexthdr, spi, encap_type, AF_INET); ++} ++ ++static int xfrmi6_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) ++{ ++ return xfrmi_input(skb, nexthdr, spi, encap_type, AF_INET6); ++} ++ + static int xfrmi_rcv_cb(struct sk_buff *skb, int err) + { + const struct xfrm_mode *inner_mode; +@@ -304,7 +350,10 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; @@ -410871,7 +529691,7 @@ index 41de46b5ffa94..1e8b26eecb3f8 100644 } else { if (!(ip_hdr(skb)->frag_off & htons(IP_DF))) goto xmit; -@@ -637,11 +640,16 @@ static int xfrmi_newlink(struct net *src_net, struct net_device *dev, +@@ -637,11 +686,16 @@ static int xfrmi_newlink(struct net *src_net, struct net_device *dev, struct netlink_ext_ack *extack) { struct net *net = dev_net(dev); @@ -410889,7 +529709,7 @@ index 41de46b5ffa94..1e8b26eecb3f8 100644 xi = xfrmi_locate(net, &p); if (xi) return -EEXIST; -@@ -666,9 +674,14 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[], +@@ -666,9 +720,14 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[], { struct xfrm_if *xi = netdev_priv(dev); struct net *net = xi->net; @@ -410905,6 +529725,28 @@ index 41de46b5ffa94..1e8b26eecb3f8 100644 xi = xfrmi_locate(net, &p); if (!xi) { xi = netdev_priv(dev); +@@ -761,8 +820,8 @@ static struct pernet_operations xfrmi_net_ops = { + }; + + static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = { +- .handler = xfrm6_rcv, +- .input_handler = xfrm_input, ++ .handler = xfrmi6_rcv, ++ .input_handler = xfrmi6_input, + .cb_handler = xfrmi_rcv_cb, + .err_handler = xfrmi6_err, + .priority = 10, +@@ -812,8 +871,8 @@ static struct xfrm6_tunnel xfrmi_ip6ip_handler __read_mostly = { + #endif + + static struct xfrm4_protocol xfrmi_esp4_protocol __read_mostly = { +- .handler = xfrm4_rcv, +- .input_handler = xfrm_input, ++ .handler = xfrmi4_rcv, ++ .input_handler = xfrmi4_input, + .cb_handler = xfrmi_rcv_cb, + .err_handler = xfrmi4_err, + .priority = 10, diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index cb40ff0ff28da..92ad336a83ab5 100644 --- a/net/xfrm/xfrm_ipcomp.c @@ -410974,7 +529816,7 @@ index 229544bc70c21..4dc4a7bbe51cf 100644 if (skb_is_gso(skb)) { diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c -index 37d17a79617c9..ba58b963f4827 100644 +index 37d17a79617c9..0540e9f72b2fe 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -31,8 +31,10 @@ @@ -411082,7 +529924,17 @@ index 37d17a79617c9..ba58b963f4827 100644 XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES); goto reject; } -@@ -4099,6 +4126,9 @@ static int __net_init xfrm_net_init(struct net *net) +@@ -3642,6 +3669,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, + goto reject; + } + ++ if (if_id) ++ secpath_reset(skb); ++ + xfrm_pols_put(pols, npols); + return 1; + } +@@ -4099,6 +4129,9 @@ static int __net_init xfrm_net_init(struct net *net) spin_lock_init(&net->xfrm.xfrm_policy_lock); seqcount_spinlock_init(&net->xfrm.xfrm_policy_hash_generation, &net->xfrm.xfrm_policy_lock); mutex_init(&net->xfrm.xfrm_cfg_mutex); @@ -411092,7 +529944,7 @@ index 37d17a79617c9..ba58b963f4827 100644 rv = xfrm_statistics_init(net); if (rv < 0) -@@ -4237,7 +4267,7 @@ static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp, +@@ -4237,7 +4270,7 @@ static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp, } static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel, @@ -411101,7 +529953,7 @@ index 37d17a79617c9..ba58b963f4827 100644 { struct xfrm_policy *pol, *ret = NULL; struct hlist_head *chain; -@@ -4246,7 +4276,8 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector * +@@ -4246,7 +4279,8 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector * spin_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir); hlist_for_each_entry(pol, chain, bydst) { @@ -411111,7 +529963,7 @@ index 37d17a79617c9..ba58b963f4827 100644 pol->type == type) { ret = pol; priority = ret->priority; -@@ -4258,7 +4289,8 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector * +@@ -4258,7 +4292,8 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector * if ((pol->priority >= priority) && ret) break; @@ -411121,7 +529973,7 @@ index 37d17a79617c9..ba58b963f4827 100644 pol->type == type) { ret = pol; break; -@@ -4374,7 +4406,7 @@ static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate) +@@ -4374,7 +4409,7 @@ static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate) int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_migrate, struct xfrm_kmaddress *k, struct net *net, @@ -411130,7 +529982,7 @@ index 37d17a79617c9..ba58b963f4827 100644 { int i, err, nx_cur = 0, nx_new = 0; struct xfrm_policy *pol = NULL; -@@ -4393,14 +4425,14 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, +@@ -4393,14 +4428,14 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, } /* Stage 1 - find policy */ @@ -411147,6 +529999,19 @@ index 37d17a79617c9..ba58b963f4827 100644 x_cur[nx_cur] = x; nx_cur++; xc = xfrm_state_migrate(x, mp, encap); +diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c +index 9277d81b344cb..49dd788859d8b 100644 +--- a/net/xfrm/xfrm_replay.c ++++ b/net/xfrm/xfrm_replay.c +@@ -714,7 +714,7 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff + oseq += skb_shinfo(skb)->gso_segs; + } + +- if (unlikely(oseq < replay_esn->oseq)) { ++ if (unlikely(xo->seq.low < replay_esn->oseq)) { + XFRM_SKB_CB(skb)->seq.output.hi = ++oseq_hi; + xo->seq.hi = oseq_hi; + replay_esn->oseq_hi = oseq_hi; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index a2f4001221d16..15132b080614c 100644 --- a/net/xfrm/xfrm_state.c @@ -412118,6 +530983,40 @@ index 7a15910d21718..c089e9cdaf328 100644 fprintf(stderr, "Hint: access to the binary, the interpreter or " "shared libraries may be denied.\n"); return 1; +diff --git a/samples/vfio-mdev/mdpy-fb.c b/samples/vfio-mdev/mdpy-fb.c +index 9ec93d90e8a5a..4eb7aa11cfbb2 100644 +--- a/samples/vfio-mdev/mdpy-fb.c ++++ b/samples/vfio-mdev/mdpy-fb.c +@@ -109,7 +109,7 @@ static int mdpy_fb_probe(struct pci_dev *pdev, + + ret = pci_request_regions(pdev, "mdpy-fb"); + if (ret < 0) +- return ret; ++ goto err_disable_dev; + + pci_read_config_dword(pdev, MDPY_FORMAT_OFFSET, &format); + pci_read_config_dword(pdev, MDPY_WIDTH_OFFSET, &width); +@@ -191,6 +191,9 @@ err_release_fb: + err_release_regions: + pci_release_regions(pdev); + ++err_disable_dev: ++ pci_disable_device(pdev); ++ + return ret; + } + +@@ -199,7 +202,10 @@ static void mdpy_fb_remove(struct pci_dev *pdev) + struct fb_info *info = pci_get_drvdata(pdev); + + unregister_framebuffer(info); ++ iounmap(info->screen_base); + framebuffer_release(info); ++ pci_release_regions(pdev); ++ pci_disable_device(pdev); + } + + static struct pci_device_id mdpy_fb_pci_table[] = { diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index cdec22088423e..914ea5eb16a8c 100644 --- a/scripts/Kbuild.include @@ -412153,6 +531052,21 @@ index cdec22088423e..914ea5eb16a8c 100644 ### # if_changed - execute command if any prerequisite is newer than +diff --git a/scripts/Makefile b/scripts/Makefile +index 9adb6d247818f..e2a239829556c 100644 +--- a/scripts/Makefile ++++ b/scripts/Makefile +@@ -3,8 +3,8 @@ + # scripts contains sources for various helper programs used throughout + # the kernel for the build process. + +-CRYPTO_LIBS = $(shell pkg-config --libs libcrypto 2> /dev/null || echo -lcrypto) +-CRYPTO_CFLAGS = $(shell pkg-config --cflags libcrypto 2> /dev/null) ++CRYPTO_LIBS = $(shell $(HOSTPKG_CONFIG) --libs libcrypto 2> /dev/null || echo -lcrypto) ++CRYPTO_CFLAGS = $(shell $(HOSTPKG_CONFIG) --cflags libcrypto 2> /dev/null) + + hostprogs-always-$(CONFIG_BUILD_BIN2C) += bin2c + hostprogs-always-$(CONFIG_KALLSYMS) += kallsyms diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 3efc984d4c690..17aa8ef2d52a7 100644 --- a/scripts/Makefile.build @@ -412316,15 +531230,27 @@ index c27d2312cfc30..88cb294dc4472 100755 our $Modifier; our $Inline = qr{inline|__always_inline|noinline|__inline|__inline__}; diff --git a/scripts/dtc/Makefile b/scripts/dtc/Makefile -index 95aaf7431bffa..1cba78e1dce68 100644 +index 95aaf7431bffa..2d5f274d6efde 100644 --- a/scripts/dtc/Makefile +++ b/scripts/dtc/Makefile -@@ -29,7 +29,7 @@ dtc-objs += yamltree.o +@@ -18,7 +18,7 @@ fdtoverlay-objs := $(libfdt) fdtoverlay.o util.o + # Source files need to get at the userspace version of libfdt_env.h to compile + HOST_EXTRACFLAGS += -I $(srctree)/$(src)/libfdt + +-ifeq ($(shell pkg-config --exists yaml-0.1 2>/dev/null && echo yes),) ++ifeq ($(shell $(HOSTPKG_CONFIG) --exists yaml-0.1 2>/dev/null && echo yes),) + ifneq ($(CHECK_DT_BINDING)$(CHECK_DTBS),) + $(error dtc needs libyaml for DT schema validation support. \ + Install the necessary libyaml development package.) +@@ -27,9 +27,9 @@ HOST_EXTRACFLAGS += -DNO_YAML + else + dtc-objs += yamltree.o # To include <yaml.h> installed in a non-default path - HOSTCFLAGS_yamltree.o := $(shell pkg-config --cflags yaml-0.1) +-HOSTCFLAGS_yamltree.o := $(shell pkg-config --cflags yaml-0.1) ++HOSTCFLAGS_yamltree.o := $(shell $(HOSTPKG_CONFIG) --cflags yaml-0.1) # To link libyaml installed in a non-default path -HOSTLDLIBS_dtc := $(shell pkg-config yaml-0.1 --libs) -+HOSTLDLIBS_dtc := $(shell pkg-config --libs yaml-0.1) ++HOSTLDLIBS_dtc := $(shell $(HOSTPKG_CONFIG) --libs yaml-0.1) endif # Generated files need one more search path to include headers in source tree @@ -412369,8 +531295,26 @@ index b2483149bbe55..7db8258434355 100755 exit 0 fi +diff --git a/scripts/extract-cert.c b/scripts/extract-cert.c +index 3bc48c726c41c..79ecbbfe37cd7 100644 +--- a/scripts/extract-cert.c ++++ b/scripts/extract-cert.c +@@ -23,6 +23,13 @@ + #include <openssl/err.h> + #include <openssl/engine.h> + ++/* ++ * OpenSSL 3.0 deprecates the OpenSSL's ENGINE API. ++ * ++ * Remove this if/when that API is no longer used ++ */ ++#pragma GCC diagnostic ignored "-Wdeprecated-declarations" ++ + #define PKEY_ID_PKCS7 2 + + static __attribute__((noreturn)) diff --git a/scripts/faddr2line b/scripts/faddr2line -index 6c6439f69a725..57099687e5e1d 100755 +index 6c6439f69a725..9e730b805e87c 100755 --- a/scripts/faddr2line +++ b/scripts/faddr2line @@ -44,17 +44,6 @@ @@ -412406,16 +531350,17 @@ index 6c6439f69a725..57099687e5e1d 100755 # Try to figure out the source directory prefix so we can remove it from the # addr2line output. HACK ALERT: This assumes that start_kernel() is in # init/main.c! This only works for vmlinux. Otherwise it falls back to -@@ -76,7 +73,7 @@ die() { +@@ -76,7 +73,8 @@ die() { find_dir_prefix() { local objfile=$1 - local start_kernel_addr=$(${READELF} -sW $objfile | awk '$8 == "start_kernel" {printf "0x%s", $2}') -+ local start_kernel_addr=$(${READELF} --symbols --wide $objfile | ${AWK} '$8 == "start_kernel" {printf "0x%s", $2}') ++ local start_kernel_addr=$(${READELF} --symbols --wide $objfile | sed 's/\[.*\]//' | ++ ${AWK} '$8 == "start_kernel" {printf "0x%s", $2}') [[ -z $start_kernel_addr ]] && return local file_line=$(${ADDR2LINE} -e $objfile $start_kernel_addr) -@@ -97,86 +94,158 @@ __faddr2line() { +@@ -97,86 +95,158 @@ __faddr2line() { local dir_prefix=$3 local print_warnings=$4 @@ -412518,7 +531463,7 @@ index 6c6439f69a725..57099687e5e1d 100755 + found=2 + break + fi -+ done < <(${READELF} --symbols --wide $objfile | ${AWK} -v sec=$sym_sec '$7 == sec' | sort --key=2) ++ done < <(${READELF} --symbols --wide $objfile | sed 's/\[.*\]//' | ${AWK} -v sec=$sym_sec '$7 == sec' | sort --key=2) + + if [[ $found = 0 ]]; then + warn "can't find symbol: sym_name: $sym_name sym_sec: $sym_sec sym_addr: $sym_addr sym_elf_size: $sym_elf_size" @@ -412616,7 +531561,7 @@ index 6c6439f69a725..57099687e5e1d 100755 do echo echo $line -@@ -184,12 +253,12 @@ __faddr2line() { +@@ -184,12 +254,12 @@ __faddr2line() { n1=$[$n-5] n2=$[$n+5] f=$(echo $line | sed 's/.*at \(.\+\):.*/\1/g') @@ -412627,7 +531572,7 @@ index 6c6439f69a725..57099687e5e1d 100755 DONE=1 - done < <(${NM} -n $objfile | awk -v fn=$func -v end=$file_end '$3 == fn { found=1; line=$0; start=$1; next } found == 1 { found=0; print line, "0x"$1 } END {if (found == 1) print line, end; }') -+ done < <(${READELF} --symbols --wide $objfile | ${AWK} -v fn=$sym_name '$4 == "FUNC" && $8 == fn') ++ done < <(${READELF} --symbols --wide $objfile | sed 's/\[.*\]//' | ${AWK} -v fn=$sym_name '$4 == "FUNC" && $8 == fn') } [[ $# -lt 2 ]] && usage @@ -412944,6 +531889,118 @@ index cf72680cd7692..4a828bca071e8 100644 conf_read_simple(name, S_DEF_AUTO); sym_calc_value(modules_sym); +diff --git a/scripts/kconfig/gconf-cfg.sh b/scripts/kconfig/gconf-cfg.sh +index 480ecd8b9f415..cbd90c28c05f2 100755 +--- a/scripts/kconfig/gconf-cfg.sh ++++ b/scripts/kconfig/gconf-cfg.sh +@@ -3,14 +3,14 @@ + + PKG="gtk+-2.0 gmodule-2.0 libglade-2.0" + +-if [ -z "$(command -v pkg-config)" ]; then ++if [ -z "$(command -v ${HOSTPKG_CONFIG})" ]; then + echo >&2 "*" +- echo >&2 "* 'make gconfig' requires 'pkg-config'. Please install it." ++ echo >&2 "* 'make gconfig' requires '${HOSTPKG_CONFIG}'. Please install it." + echo >&2 "*" + exit 1 + fi + +-if ! pkg-config --exists $PKG; then ++if ! ${HOSTPKG_CONFIG} --exists $PKG; then + echo >&2 "*" + echo >&2 "* Unable to find the GTK+ installation. Please make sure that" + echo >&2 "* the GTK+ 2.0 development package is correctly installed." +@@ -19,12 +19,12 @@ if ! pkg-config --exists $PKG; then + exit 1 + fi + +-if ! pkg-config --atleast-version=2.0.0 gtk+-2.0; then ++if ! ${HOSTPKG_CONFIG} --atleast-version=2.0.0 gtk+-2.0; then + echo >&2 "*" + echo >&2 "* GTK+ is present but version >= 2.0.0 is required." + echo >&2 "*" + exit 1 + fi + +-echo cflags=\"$(pkg-config --cflags $PKG)\" +-echo libs=\"$(pkg-config --libs $PKG)\" ++echo cflags=\"$(${HOSTPKG_CONFIG} --cflags $PKG)\" ++echo libs=\"$(${HOSTPKG_CONFIG} --libs $PKG)\" +diff --git a/scripts/kconfig/mconf-cfg.sh b/scripts/kconfig/mconf-cfg.sh +index b520e407a8ebb..025b565e0b7cd 100755 +--- a/scripts/kconfig/mconf-cfg.sh ++++ b/scripts/kconfig/mconf-cfg.sh +@@ -4,16 +4,16 @@ + PKG="ncursesw" + PKG2="ncurses" + +-if [ -n "$(command -v pkg-config)" ]; then +- if pkg-config --exists $PKG; then +- echo cflags=\"$(pkg-config --cflags $PKG)\" +- echo libs=\"$(pkg-config --libs $PKG)\" ++if [ -n "$(command -v ${HOSTPKG_CONFIG})" ]; then ++ if ${HOSTPKG_CONFIG} --exists $PKG; then ++ echo cflags=\"$(${HOSTPKG_CONFIG} --cflags $PKG)\" ++ echo libs=\"$(${HOSTPKG_CONFIG} --libs $PKG)\" + exit 0 + fi + +- if pkg-config --exists $PKG2; then +- echo cflags=\"$(pkg-config --cflags $PKG2)\" +- echo libs=\"$(pkg-config --libs $PKG2)\" ++ if ${HOSTPKG_CONFIG} --exists $PKG2; then ++ echo cflags=\"$(${HOSTPKG_CONFIG} --cflags $PKG2)\" ++ echo libs=\"$(${HOSTPKG_CONFIG} --libs $PKG2)\" + exit 0 + fi + fi +@@ -46,7 +46,7 @@ echo >&2 "* Unable to find the ncurses package." + echo >&2 "* Install ncurses (ncurses-devel or libncurses-dev" + echo >&2 "* depending on your distribution)." + echo >&2 "*" +-echo >&2 "* You may also need to install pkg-config to find the" ++echo >&2 "* You may also need to install ${HOSTPKG_CONFIG} to find the" + echo >&2 "* ncurses installed in a non-default location." + echo >&2 "*" + exit 1 +diff --git a/scripts/kconfig/nconf-cfg.sh b/scripts/kconfig/nconf-cfg.sh +index c212255070c0c..3a10bac2adb3a 100755 +--- a/scripts/kconfig/nconf-cfg.sh ++++ b/scripts/kconfig/nconf-cfg.sh +@@ -4,16 +4,16 @@ + PKG="ncursesw menuw panelw" + PKG2="ncurses menu panel" + +-if [ -n "$(command -v pkg-config)" ]; then +- if pkg-config --exists $PKG; then +- echo cflags=\"$(pkg-config --cflags $PKG)\" +- echo libs=\"$(pkg-config --libs $PKG)\" ++if [ -n "$(command -v ${HOSTPKG_CONFIG})" ]; then ++ if ${HOSTPKG_CONFIG} --exists $PKG; then ++ echo cflags=\"$(${HOSTPKG_CONFIG} --cflags $PKG)\" ++ echo libs=\"$(${HOSTPKG_CONFIG} --libs $PKG)\" + exit 0 + fi + +- if pkg-config --exists $PKG2; then +- echo cflags=\"$(pkg-config --cflags $PKG2)\" +- echo libs=\"$(pkg-config --libs $PKG2)\" ++ if ${HOSTPKG_CONFIG} --exists $PKG2; then ++ echo cflags=\"$(${HOSTPKG_CONFIG} --cflags $PKG2)\" ++ echo libs=\"$(${HOSTPKG_CONFIG} --libs $PKG2)\" + exit 0 + fi + fi +@@ -44,7 +44,7 @@ echo >&2 "* Unable to find the ncurses package." + echo >&2 "* Install ncurses (ncurses-devel or libncurses-dev" + echo >&2 "* depending on your distribution)." + echo >&2 "*" +-echo >&2 "* You may also need to install pkg-config to find the" ++echo >&2 "* You may also need to install ${HOSTPKG_CONFIG} to find the" + echo >&2 "* ncurses installed in a non-default location." + echo >&2 "*" + exit 1 diff --git a/scripts/kconfig/preprocess.c b/scripts/kconfig/preprocess.c index 0590f86df6e40..748da578b418c 100644 --- a/scripts/kconfig/preprocess.c @@ -412957,6 +532014,40 @@ index 0590f86df6e40..748da578b418c 100644 char *cmd; size_t nread; int i; +diff --git a/scripts/kconfig/qconf-cfg.sh b/scripts/kconfig/qconf-cfg.sh +index fa564cd795b7c..9b695e5cd9b37 100755 +--- a/scripts/kconfig/qconf-cfg.sh ++++ b/scripts/kconfig/qconf-cfg.sh +@@ -3,22 +3,22 @@ + + PKG="Qt5Core Qt5Gui Qt5Widgets" + +-if [ -z "$(command -v pkg-config)" ]; then ++if [ -z "$(command -v ${HOSTPKG_CONFIG})" ]; then + echo >&2 "*" +- echo >&2 "* 'make xconfig' requires 'pkg-config'. Please install it." ++ echo >&2 "* 'make xconfig' requires '${HOSTPKG_CONFIG}'. Please install it." + echo >&2 "*" + exit 1 + fi + +-if pkg-config --exists $PKG; then +- echo cflags=\"-std=c++11 -fPIC $(pkg-config --cflags $PKG)\" +- echo libs=\"$(pkg-config --libs $PKG)\" +- echo moc=\"$(pkg-config --variable=host_bins Qt5Core)/moc\" ++if ${HOSTPKG_CONFIG} --exists $PKG; then ++ echo cflags=\"-std=c++11 -fPIC $(${HOSTPKG_CONFIG} --cflags $PKG)\" ++ echo libs=\"$(${HOSTPKG_CONFIG} --libs $PKG)\" ++ echo moc=\"$(${HOSTPKG_CONFIG} --variable=host_bins Qt5Core)/moc\" + exit 0 + fi + + echo >&2 "*" +-echo >&2 "* Could not find Qt5 via pkg-config." ++echo >&2 "* Could not find Qt5 via ${HOSTPKG_CONFIG}." + echo >&2 "* Please install Qt5 and make sure it's in PKG_CONFIG_PATH" + echo >&2 "*" + exit 1 diff --git a/scripts/kernel-doc b/scripts/kernel-doc index cfcb607379577..5d54b57ff90cc 100755 --- a/scripts/kernel-doc @@ -413132,7 +532223,7 @@ index 7c477ca7dc982..951cc60e5a903 100755 $MAKE %{?_smp_mflags} INSTALL_HDR_PATH=%{buildroot}/usr headers_install diff --git a/scripts/pahole-flags.sh b/scripts/pahole-flags.sh new file mode 100755 -index 0000000000000..7acee326aa6c9 +index 0000000000000..d38fa6d84d62a --- /dev/null +++ b/scripts/pahole-flags.sh @@ -0,0 +1,24 @@ @@ -413145,7 +532236,7 @@ index 0000000000000..7acee326aa6c9 + exit 0 +fi + -+pahole_ver=$(${PAHOLE} --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/') ++pahole_ver=$($(dirname $0)/pahole-version.sh ${PAHOLE}) + +if [ "${pahole_ver}" -ge "118" ] && [ "${pahole_ver}" -le "121" ]; then + # pahole 1.18 through 1.21 can't handle zero-sized per-CPU vars @@ -413160,6 +532251,25 @@ index 0000000000000..7acee326aa6c9 +fi + +echo ${extra_paholeopt} +diff --git a/scripts/pahole-version.sh b/scripts/pahole-version.sh +new file mode 100755 +index 0000000000000..f8a32ab93ad12 +--- /dev/null ++++ b/scripts/pahole-version.sh +@@ -0,0 +1,13 @@ ++#!/bin/sh ++# SPDX-License-Identifier: GPL-2.0 ++# ++# Usage: $ ./pahole-version.sh pahole ++# ++# Prints pahole's version in a 3-digit form, such as 119 for v1.19. ++ ++if [ ! -x "$(command -v "$@")" ]; then ++ echo 0 ++ exit 1 ++fi ++ ++"$@" --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/' diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 7d631aaa0ae11..3ccb2c70add4d 100755 --- a/scripts/recordmcount.pl @@ -413186,6 +532296,24 @@ index 2dccf141241d7..20af56ce245c5 100755 awk '{ print $2 '}` $SF -F file_contexts $mounts +diff --git a/scripts/sign-file.c b/scripts/sign-file.c +index fbd34b8e8f578..7434e9ea926e2 100644 +--- a/scripts/sign-file.c ++++ b/scripts/sign-file.c +@@ -29,6 +29,13 @@ + #include <openssl/err.h> + #include <openssl/engine.h> + ++/* ++ * OpenSSL 3.0 deprecates the OpenSSL's ENGINE API. ++ * ++ * Remove this if/when that API is no longer used ++ */ ++#pragma GCC diagnostic ignored "-Wdeprecated-declarations" ++ + /* + * Use CMS if we have openssl-1.0.0 or newer available - otherwise we have to + * assume that it's not available and its header file is missing and that we diff --git a/scripts/sorttable.c b/scripts/sorttable.c index 6ee4fa882919c..278bb53b325c1 100644 --- a/scripts/sorttable.c @@ -413230,6 +532358,89 @@ index 288e86a9d1e58..f126ecbb0494d 100755 check_missing_file(["/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"], "fonts-dejavu", 2); +diff --git a/scripts/tags.sh b/scripts/tags.sh +index db8ba411860a5..91413d45f0fa1 100755 +--- a/scripts/tags.sh ++++ b/scripts/tags.sh +@@ -95,10 +95,13 @@ all_sources() + + all_compiled_sources() + { +- realpath -es $([ -z "$KBUILD_ABS_SRCTREE" ] && echo --relative-to=.) \ +- include/generated/autoconf.h $(find $ignore -name "*.cmd" -exec \ +- grep -Poh '(?(?=^source_.* \K).*|(?=^ \K\S).*(?= \\))' {} \+ | +- awk '!a[$0]++') | sort -u ++ { ++ echo include/generated/autoconf.h ++ find $ignore -name "*.cmd" -exec \ ++ sed -n -E 's/^source_.* (.*)/\1/p; s/^ (\S.*) \\/\1/p' {} \+ | ++ awk '!a[$0]++' ++ } | xargs realpath -es $([ -z "$KBUILD_ABS_SRCTREE" ] && echo --relative-to=.) | ++ sort -u + } + + all_target_sources() +diff --git a/scripts/tracing/ftrace-bisect.sh b/scripts/tracing/ftrace-bisect.sh +index 926701162bc83..bb4f59262bbe9 100755 +--- a/scripts/tracing/ftrace-bisect.sh ++++ b/scripts/tracing/ftrace-bisect.sh +@@ -12,7 +12,7 @@ + # (note, if this is a problem with function_graph tracing, then simply + # replace "function" with "function_graph" in the following steps). + # +-# # cd /sys/kernel/debug/tracing ++# # cd /sys/kernel/tracing + # # echo schedule > set_ftrace_filter + # # echo function > current_tracer + # +@@ -20,22 +20,40 @@ + # + # # echo nop > current_tracer + # +-# # cat available_filter_functions > ~/full-file ++# Starting with v5.1 this can be done with numbers, making it much faster: ++# ++# The old (slow) way, for kernels before v5.1. ++# ++# [old-way] # cat available_filter_functions > ~/full-file ++# ++# [old-way] *** Note *** this process will take several minutes to update the ++# [old-way] filters. Setting multiple functions is an O(n^2) operation, and we ++# [old-way] are dealing with thousands of functions. So go have coffee, talk ++# [old-way] with your coworkers, read facebook. And eventually, this operation ++# [old-way] will end. ++# ++# The new way (using numbers) is an O(n) operation, and usually takes less than a second. ++# ++# seq `wc -l available_filter_functions | cut -d' ' -f1` > ~/full-file ++# ++# This will create a sequence of numbers that match the functions in ++# available_filter_functions, and when echoing in a number into the ++# set_ftrace_filter file, it will enable the corresponding function in ++# O(1) time. Making enabling all functions O(n) where n is the number of ++# functions to enable. ++# ++# For either the new or old way, the rest of the operations remain the same. ++# + # # ftrace-bisect ~/full-file ~/test-file ~/non-test-file + # # cat ~/test-file > set_ftrace_filter + # +-# *** Note *** this will take several minutes. Setting multiple functions is +-# an O(n^2) operation, and we are dealing with thousands of functions. So go +-# have coffee, talk with your coworkers, read facebook. And eventually, this +-# operation will end. +-# + # # echo function > current_tracer + # + # If it crashes, we know that ~/test-file has a bad function. + # + # Reboot back to test kernel. + # +-# # cd /sys/kernel/debug/tracing ++# # cd /sys/kernel/tracing + # # mv ~/test-file ~/full-file + # + # If it didn't crash. diff --git a/security/Kconfig b/security/Kconfig index 0ced7fd33e4d0..5d412b3ddc496 100644 --- a/security/Kconfig @@ -413263,7 +532474,7 @@ index 0ced7fd33e4d0..5d412b3ddc496 100644 Detect overflows of buffers in common string and memory functions where the compiler can determine and validate the buffer sizes. diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening -index 90cbaff86e13a..942ed8de36d35 100644 +index 90cbaff86e13a..2e509e32cf75a 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -22,14 +22,23 @@ menu "Memory initialization" @@ -413292,8 +532503,18 @@ index 90cbaff86e13a..942ed8de36d35 100644 default INIT_STACK_NONE help This option enables initialization of stack variables at +@@ -231,6 +240,9 @@ config INIT_ON_FREE_DEFAULT_ON + + config CC_HAS_ZERO_CALL_USED_REGS + def_bool $(cc-option,-fzero-call-used-regs=used-gpr) ++ # https://github.com/ClangBuiltLinux/linux/issues/1766 ++ # https://github.com/llvm/llvm-project/issues/59242 ++ depends on !CC_IS_CLANG || CLANG_VERSION > 150006 + + config ZERO_CALL_USED_REGS + bool "Enable register zeroing on function exit" diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c -index 2ee3b3d29f10b..a891705b1d577 100644 +index 2ee3b3d29f10b..8c7719108d7f7 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -401,7 +401,7 @@ static struct aa_loaddata *aa_simple_write_to_buffer(const char __user *userbuf, @@ -413305,6 +532526,18 @@ index 2ee3b3d29f10b..a891705b1d577 100644 return ERR_PTR(-EFAULT); } +@@ -867,8 +867,10 @@ static struct multi_transaction *multi_transaction_new(struct file *file, + if (!t) + return ERR_PTR(-ENOMEM); + kref_init(&t->count); +- if (copy_from_user(t->data, buf, size)) ++ if (copy_from_user(t->data, buf, size)) { ++ put_multi_transaction(t); + return ERR_PTR(-EFAULT); ++ } + + return t; + } diff --git a/security/apparmor/audit.c b/security/apparmor/audit.c index f7e97c7e80f3d..704b0c895605a 100644 --- a/security/apparmor/audit.c @@ -413431,6 +532664,23 @@ index e68bcedca976b..66bc4704f8044 100644 return ERR_PTR(-EINVAL); len = label_count_strn_entries(str, end - str); +diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c +index f72406fe1bf27..10274eb90fa37 100644 +--- a/security/apparmor/lsm.c ++++ b/security/apparmor/lsm.c +@@ -1170,10 +1170,10 @@ static int apparmor_inet_conn_request(const struct sock *sk, struct sk_buff *skb + #endif + + /* +- * The cred blob is a pointer to, not an instance of, an aa_task_ctx. ++ * The cred blob is a pointer to, not an instance of, an aa_label. + */ + struct lsm_blob_sizes apparmor_blob_sizes __lsm_ro_after_init = { +- .lbs_cred = sizeof(struct aa_task_ctx *), ++ .lbs_cred = sizeof(struct aa_label *), + .lbs_file = sizeof(struct aa_file_ctx), + .lbs_task = sizeof(struct aa_task_ctx), + }; diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c index aa6fcfde30514..f7bb47daf2ad6 100644 --- a/security/apparmor/mount.c @@ -413464,8 +532714,34 @@ index aa6fcfde30514..f7bb47daf2ad6 100644 } else /* already audited error */ error = PTR_ERR(target); +diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c +index 4c010c9a6af1d..fcf22577f606c 100644 +--- a/security/apparmor/policy.c ++++ b/security/apparmor/policy.c +@@ -1125,7 +1125,7 @@ ssize_t aa_remove_profiles(struct aa_ns *policy_ns, struct aa_label *subj, + + if (!name) { + /* remove namespace - can only happen if fqname[0] == ':' */ +- mutex_lock_nested(&ns->parent->lock, ns->level); ++ mutex_lock_nested(&ns->parent->lock, ns->parent->level); + __aa_bump_ns_revision(ns); + __aa_remove_ns(ns); + mutex_unlock(&ns->parent->lock); +diff --git a/security/apparmor/policy_ns.c b/security/apparmor/policy_ns.c +index 70921d95fb406..53d24cf638936 100644 +--- a/security/apparmor/policy_ns.c ++++ b/security/apparmor/policy_ns.c +@@ -121,7 +121,7 @@ static struct aa_ns *alloc_ns(const char *prefix, const char *name) + return ns; + + fail_unconfined: +- kfree_sensitive(ns->base.hname); ++ aa_policy_destroy(&ns->base); + fail_ns: + kfree_sensitive(ns); + return NULL; diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c -index 4e1f96b216a8b..03c9609ca407b 100644 +index 4e1f96b216a8b..d5b3a062d1d18 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -746,16 +746,18 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) @@ -413492,8 +532768,17 @@ index 4e1f96b216a8b..03c9609ca407b 100644 if (!unpack_u32(e, &tmp, NULL)) goto fail; if (tmp) +@@ -962,7 +964,7 @@ static int verify_header(struct aa_ext *e, int required, const char **ns) + * if not specified use previous version + * Mask off everything that is not kernel abi version + */ +- if (VERSION_LT(e->version, v5) || VERSION_GT(e->version, v7)) { ++ if (VERSION_LT(e->version, v5) || VERSION_GT(e->version, v8)) { + audit_iface(NULL, NULL, NULL, "unsupported interface version", + e, error); + return error; diff --git a/security/commoncap.c b/security/commoncap.c -index 3f810d37b71bd..5fc8986c3c77c 100644 +index 3f810d37b71bd..bc751fa5adad7 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -24,6 +24,7 @@ @@ -413504,7 +532789,20 @@ index 3f810d37b71bd..5fc8986c3c77c 100644 /* * If a non-root user executes a setuid-root binary in -@@ -418,7 +419,7 @@ int cap_inode_getsecurity(struct user_namespace *mnt_userns, +@@ -400,8 +401,10 @@ int cap_inode_getsecurity(struct user_namespace *mnt_userns, + &tmpbuf, size, GFP_NOFS); + dput(dentry); + +- if (ret < 0 || !tmpbuf) +- return ret; ++ if (ret < 0 || !tmpbuf) { ++ size = ret; ++ goto out_free; ++ } + + fs_ns = inode->i_sb->s_user_ns; + cap = (struct vfs_cap_data *) tmpbuf; +@@ -418,7 +421,7 @@ int cap_inode_getsecurity(struct user_namespace *mnt_userns, kroot = make_kuid(fs_ns, root); /* If this is an idmapped mount shift the kuid. */ @@ -413513,7 +532811,7 @@ index 3f810d37b71bd..5fc8986c3c77c 100644 /* If the root kuid maps to a valid uid in current ns, then return * this as a nscap. */ -@@ -488,6 +489,7 @@ out_free: +@@ -488,6 +491,7 @@ out_free: * @size: size of @ivalue * @task_ns: user namespace of the caller * @mnt_userns: user namespace of the mount the inode was found from @@ -413521,7 +532819,7 @@ index 3f810d37b71bd..5fc8986c3c77c 100644 * * If the inode has been found through an idmapped mount the user namespace of * the vfsmount must be passed through @mnt_userns. This function will then -@@ -497,7 +499,8 @@ out_free: +@@ -497,7 +501,8 @@ out_free: */ static kuid_t rootid_from_xattr(const void *value, size_t size, struct user_namespace *task_ns, @@ -413531,7 +532829,7 @@ index 3f810d37b71bd..5fc8986c3c77c 100644 { const struct vfs_ns_cap_data *nscap = value; kuid_t rootkid; -@@ -507,7 +510,7 @@ static kuid_t rootid_from_xattr(const void *value, size_t size, +@@ -507,7 +512,7 @@ static kuid_t rootid_from_xattr(const void *value, size_t size, rootid = le32_to_cpu(nscap->rootid); rootkid = make_kuid(task_ns, rootid); @@ -413540,7 +532838,7 @@ index 3f810d37b71bd..5fc8986c3c77c 100644 } static bool validheader(size_t size, const struct vfs_cap_data *cap) -@@ -553,12 +556,12 @@ int cap_convert_nscap(struct user_namespace *mnt_userns, struct dentry *dentry, +@@ -553,12 +558,12 @@ int cap_convert_nscap(struct user_namespace *mnt_userns, struct dentry *dentry, return -EINVAL; if (!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_SETFCAP)) return -EPERM; @@ -413555,7 +532853,7 @@ index 3f810d37b71bd..5fc8986c3c77c 100644 if (!uid_valid(rootid)) return -EINVAL; -@@ -699,7 +702,7 @@ int get_vfs_caps_from_disk(struct user_namespace *mnt_userns, +@@ -699,7 +704,7 @@ int get_vfs_caps_from_disk(struct user_namespace *mnt_userns, /* Limit the caps to the mounter of the filesystem * or the more limited uid specified in the xattr. */ @@ -413564,6 +532862,98 @@ index 3f810d37b71bd..5fc8986c3c77c 100644 if (!rootid_owns_currentns(rootkuid)) return -ENODATA; +diff --git a/security/device_cgroup.c b/security/device_cgroup.c +index 04375df52fc9a..fe5cb7696993d 100644 +--- a/security/device_cgroup.c ++++ b/security/device_cgroup.c +@@ -81,6 +81,17 @@ free_and_exit: + return -ENOMEM; + } + ++static void dev_exceptions_move(struct list_head *dest, struct list_head *orig) ++{ ++ struct dev_exception_item *ex, *tmp; ++ ++ lockdep_assert_held(&devcgroup_mutex); ++ ++ list_for_each_entry_safe(ex, tmp, orig, list) { ++ list_move_tail(&ex->list, dest); ++ } ++} ++ + /* + * called under devcgroup_mutex + */ +@@ -603,11 +614,13 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup, + int count, rc = 0; + struct dev_exception_item ex; + struct dev_cgroup *parent = css_to_devcgroup(devcgroup->css.parent); ++ struct dev_cgroup tmp_devcgrp; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + memset(&ex, 0, sizeof(ex)); ++ memset(&tmp_devcgrp, 0, sizeof(tmp_devcgrp)); + b = buffer; + + switch (*b) { +@@ -619,15 +632,27 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup, + + if (!may_allow_all(parent)) + return -EPERM; +- dev_exception_clean(devcgroup); +- devcgroup->behavior = DEVCG_DEFAULT_ALLOW; +- if (!parent) ++ if (!parent) { ++ devcgroup->behavior = DEVCG_DEFAULT_ALLOW; ++ dev_exception_clean(devcgroup); + break; ++ } + ++ INIT_LIST_HEAD(&tmp_devcgrp.exceptions); ++ rc = dev_exceptions_copy(&tmp_devcgrp.exceptions, ++ &devcgroup->exceptions); ++ if (rc) ++ return rc; ++ dev_exception_clean(devcgroup); + rc = dev_exceptions_copy(&devcgroup->exceptions, + &parent->exceptions); +- if (rc) ++ if (rc) { ++ dev_exceptions_move(&devcgroup->exceptions, ++ &tmp_devcgrp.exceptions); + return rc; ++ } ++ devcgroup->behavior = DEVCG_DEFAULT_ALLOW; ++ dev_exception_clean(&tmp_devcgrp); + break; + case DEVCG_DENY: + if (css_has_online_children(&devcgroup->css)) +diff --git a/security/integrity/digsig.c b/security/integrity/digsig.c +index 3b06a01bd0fdd..aa93b750a9f32 100644 +--- a/security/integrity/digsig.c ++++ b/security/integrity/digsig.c +@@ -122,6 +122,7 @@ int __init integrity_init_keyring(const unsigned int id) + { + struct key_restriction *restriction; + key_perm_t perm; ++ int ret; + + perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW + | KEY_USR_READ | KEY_USR_SEARCH; +@@ -142,7 +143,10 @@ int __init integrity_init_keyring(const unsigned int id) + perm |= KEY_USR_WRITE; + + out: +- return __integrity_init_keyring(id, perm, restriction); ++ ret = __integrity_init_keyring(id, perm, restriction); ++ if (ret) ++ kfree(restriction); ++ return ret; + } + + static int __init integrity_add_key(const unsigned int id, const void *data, diff --git a/security/integrity/digsig_asymmetric.c b/security/integrity/digsig_asymmetric.c index 23240d793b074..895f4b9ce8c6b 100644 --- a/security/integrity/digsig_asymmetric.c @@ -413791,7 +533181,7 @@ index 3d8e9d5db5aa5..3ad8f7734208b 100644 return -1; } diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c -index 87b9b71cb8201..748b97a2582a4 100644 +index 87b9b71cb8201..ed43d30682ff8 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -228,7 +228,7 @@ static struct ima_rule_entry *arch_policy_entry __ro_after_init; @@ -413803,7 +533193,113 @@ index 87b9b71cb8201..748b97a2582a4 100644 static int ima_policy __initdata; -@@ -675,12 +675,14 @@ int ima_match_policy(struct user_namespace *mnt_userns, struct inode *inode, +@@ -391,12 +391,6 @@ static struct ima_rule_entry *ima_lsm_copy_rule(struct ima_rule_entry *entry) + + nentry->lsm[i].type = entry->lsm[i].type; + nentry->lsm[i].args_p = entry->lsm[i].args_p; +- /* +- * Remove the reference from entry so that the associated +- * memory will not be freed during a later call to +- * ima_lsm_free_rule(entry). +- */ +- entry->lsm[i].args_p = NULL; + + ima_filter_rule_init(nentry->lsm[i].type, Audit_equal, + nentry->lsm[i].args_p, +@@ -410,6 +404,7 @@ static struct ima_rule_entry *ima_lsm_copy_rule(struct ima_rule_entry *entry) + + static int ima_lsm_update_rule(struct ima_rule_entry *entry) + { ++ int i; + struct ima_rule_entry *nentry; + + nentry = ima_lsm_copy_rule(entry); +@@ -424,7 +419,8 @@ static int ima_lsm_update_rule(struct ima_rule_entry *entry) + * references and the entry itself. All other memory refrences will now + * be owned by nentry. + */ +- ima_lsm_free_rule(entry); ++ for (i = 0; i < MAX_LSM_RULES; i++) ++ ima_filter_rule_free(entry->lsm[i].rule); + kfree(entry); + + return 0; +@@ -542,6 +538,9 @@ static bool ima_match_rules(struct ima_rule_entry *rule, + const char *func_data) + { + int i; ++ bool result = false; ++ struct ima_rule_entry *lsm_rule = rule; ++ bool rule_reinitialized = false; + + if ((rule->flags & IMA_FUNC) && + (rule->func != func && func != POST_SETATTR)) +@@ -590,35 +589,55 @@ static bool ima_match_rules(struct ima_rule_entry *rule, + int rc = 0; + u32 osid; + +- if (!rule->lsm[i].rule) { +- if (!rule->lsm[i].args_p) ++ if (!lsm_rule->lsm[i].rule) { ++ if (!lsm_rule->lsm[i].args_p) + continue; + else + return false; + } ++ ++retry: + switch (i) { + case LSM_OBJ_USER: + case LSM_OBJ_ROLE: + case LSM_OBJ_TYPE: + security_inode_getsecid(inode, &osid); +- rc = ima_filter_rule_match(osid, rule->lsm[i].type, ++ rc = ima_filter_rule_match(osid, lsm_rule->lsm[i].type, + Audit_equal, +- rule->lsm[i].rule); ++ lsm_rule->lsm[i].rule); + break; + case LSM_SUBJ_USER: + case LSM_SUBJ_ROLE: + case LSM_SUBJ_TYPE: +- rc = ima_filter_rule_match(secid, rule->lsm[i].type, ++ rc = ima_filter_rule_match(secid, lsm_rule->lsm[i].type, + Audit_equal, +- rule->lsm[i].rule); ++ lsm_rule->lsm[i].rule); + break; + default: + break; + } +- if (!rc) +- return false; ++ ++ if (rc == -ESTALE && !rule_reinitialized) { ++ lsm_rule = ima_lsm_copy_rule(rule); ++ if (lsm_rule) { ++ rule_reinitialized = true; ++ goto retry; ++ } ++ } ++ if (!rc) { ++ result = false; ++ goto out; ++ } + } +- return true; ++ result = true; ++ ++out: ++ if (rule_reinitialized) { ++ for (i = 0; i < MAX_LSM_RULES; i++) ++ ima_filter_rule_free(lsm_rule->lsm[i].rule); ++ kfree(lsm_rule); ++ } ++ return result; + } + + /* +@@ -675,12 +694,14 @@ int ima_match_policy(struct user_namespace *mnt_userns, struct inode *inode, { struct ima_rule_entry *entry; int action = 0, actmask = flags | (flags << 1); @@ -413819,7 +533315,7 @@ index 87b9b71cb8201..748b97a2582a4 100644 if (!(entry->action & actmask)) continue; -@@ -741,9 +743,11 @@ void ima_update_policy_flags(void) +@@ -741,9 +762,11 @@ void ima_update_policy_flags(void) { struct ima_rule_entry *entry; int new_policy_flag = 0; @@ -413832,7 +533328,7 @@ index 87b9b71cb8201..748b97a2582a4 100644 /* * SETXATTR_CHECK rules do not implement a full policy check * because rule checking would probably have an important -@@ -968,10 +972,10 @@ void ima_update_policy(void) +@@ -968,10 +991,10 @@ void ima_update_policy(void) list_splice_tail_init_rcu(&ima_temp_rules, policy, synchronize_rcu); @@ -413845,7 +533341,7 @@ index 87b9b71cb8201..748b97a2582a4 100644 /* * IMA architecture specific policy rules are specified * as strings and converted to an array of ima_entry_rules -@@ -1061,7 +1065,7 @@ static int ima_lsm_rule_init(struct ima_rule_entry *entry, +@@ -1061,7 +1084,7 @@ static int ima_lsm_rule_init(struct ima_rule_entry *entry, pr_warn("rule for LSM \'%s\' is undefined\n", entry->lsm[lsm_rule].args_p); @@ -413854,7 +533350,7 @@ index 87b9b71cb8201..748b97a2582a4 100644 kfree(entry->lsm[lsm_rule].args_p); entry->lsm[lsm_rule].args_p = NULL; result = -EINVAL; -@@ -1768,9 +1772,11 @@ void *ima_policy_start(struct seq_file *m, loff_t *pos) +@@ -1768,9 +1791,11 @@ void *ima_policy_start(struct seq_file *m, loff_t *pos) { loff_t l = *pos; struct ima_rule_entry *entry; @@ -413867,7 +533363,7 @@ index 87b9b71cb8201..748b97a2582a4 100644 if (!l--) { rcu_read_unlock(); return entry; -@@ -1789,7 +1795,8 @@ void *ima_policy_next(struct seq_file *m, void *v, loff_t *pos) +@@ -1789,7 +1814,8 @@ void *ima_policy_next(struct seq_file *m, void *v, loff_t *pos) rcu_read_unlock(); (*pos)++; @@ -413877,7 +533373,7 @@ index 87b9b71cb8201..748b97a2582a4 100644 } void ima_policy_stop(struct seq_file *m, void *v) -@@ -1845,6 +1852,14 @@ int ima_policy_show(struct seq_file *m, void *v) +@@ -1845,6 +1871,14 @@ int ima_policy_show(struct seq_file *m, void *v) rcu_read_lock(); @@ -413892,7 +533388,7 @@ index 87b9b71cb8201..748b97a2582a4 100644 if (entry->action & MEASURE) seq_puts(m, pt(Opt_measure)); if (entry->action & DONT_MEASURE) -@@ -2014,14 +2029,20 @@ bool ima_appraise_signature(enum kernel_read_file_id id) +@@ -2014,14 +2048,20 @@ bool ima_appraise_signature(enum kernel_read_file_id id) struct ima_rule_entry *entry; bool found = false; enum ima_hooks func; @@ -413915,7 +533411,7 @@ index 87b9b71cb8201..748b97a2582a4 100644 continue; diff --git a/security/integrity/ima/ima_template.c b/security/integrity/ima/ima_template.c -index 694560396be05..db1ad6d7a57fb 100644 +index 694560396be05..31a8388e3dfae 100644 --- a/security/integrity/ima/ima_template.c +++ b/security/integrity/ima/ima_template.c @@ -29,6 +29,7 @@ static struct ima_template_desc builtin_templates[] = { @@ -413965,6 +533461,33 @@ index 694560396be05..db1ad6d7a57fb 100644 return 1; } +@@ -237,11 +241,11 @@ int template_desc_init_fields(const char *template_fmt, + } + + if (fields && num_fields) { +- *fields = kmalloc_array(i, sizeof(*fields), GFP_KERNEL); ++ *fields = kmalloc_array(i, sizeof(**fields), GFP_KERNEL); + if (*fields == NULL) + return -ENOMEM; + +- memcpy(*fields, found_fields, i * sizeof(*fields)); ++ memcpy(*fields, found_fields, i * sizeof(**fields)); + *num_fields = i; + } + +@@ -332,8 +336,11 @@ static struct ima_template_desc *restore_template_fmt(char *template_name) + + template_desc->name = ""; + template_desc->fmt = kstrdup(template_name, GFP_KERNEL); +- if (!template_desc->fmt) ++ if (!template_desc->fmt) { ++ kfree(template_desc); ++ template_desc = NULL; + goto out; ++ } + + spin_lock(&template_list); + list_add_tail_rcu(&template_desc->list, &defined_templates); diff --git a/security/integrity/integrity_audit.c b/security/integrity/integrity_audit.c index 29220056207f4..0ec5e4c22cb2a 100644 --- a/security/integrity/integrity_audit.c @@ -413995,7 +533518,7 @@ index 2462bfa08fe34..cd06bd6072be2 100644 + }, +#endif diff --git a/security/integrity/platform_certs/load_uefi.c b/security/integrity/platform_certs/load_uefi.c -index f290f78c3f301..185c609c6e380 100644 +index f290f78c3f301..d2f2c3936277a 100644 --- a/security/integrity/platform_certs/load_uefi.c +++ b/security/integrity/platform_certs/load_uefi.c @@ -3,6 +3,7 @@ @@ -414006,7 +533529,7 @@ index f290f78c3f301..185c609c6e380 100644 #include <linux/err.h> #include <linux/efi.h> #include <linux/slab.h> -@@ -11,6 +12,31 @@ +@@ -11,6 +12,32 @@ #include "../integrity.h" #include "keyring_handler.h" @@ -414032,13 +533555,14 @@ index f290f78c3f301..185c609c6e380 100644 + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacPro7,1") }, + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "iMac20,1") }, + { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "iMac20,2") }, ++ { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "iMacPro1,1") }, + { } +}; + /* * Look to see if a UEFI variable called MokIgnoreDB exists and return true if * it does. -@@ -137,6 +163,13 @@ static int __init load_uefi_certs(void) +@@ -137,6 +164,13 @@ static int __init load_uefi_certs(void) unsigned long dbsize = 0, dbxsize = 0, mokxsize = 0; efi_status_t status; int rc = 0; @@ -415249,6 +534773,60 @@ index 32396962f04d6..507d43827afed 100644 /* Gets and checks the ruleset. */ ruleset = get_ruleset_from_fd(ruleset_fd, FMODE_CAN_READ); if (IS_ERR(ruleset)) +diff --git a/security/loadpin/loadpin.c b/security/loadpin/loadpin.c +index b12f7d986b1e3..5fce105a372d3 100644 +--- a/security/loadpin/loadpin.c ++++ b/security/loadpin/loadpin.c +@@ -118,21 +118,11 @@ static void loadpin_sb_free_security(struct super_block *mnt_sb) + } + } + +-static int loadpin_read_file(struct file *file, enum kernel_read_file_id id, +- bool contents) ++static int loadpin_check(struct file *file, enum kernel_read_file_id id) + { + struct super_block *load_root; + const char *origin = kernel_read_file_id_str(id); + +- /* +- * If we will not know that we'll be seeing the full contents +- * then we cannot trust a load will be complete and unchanged +- * off disk. Treat all contents=false hooks as if there were +- * no associated file struct. +- */ +- if (!contents) +- file = NULL; +- + /* If the file id is excluded, ignore the pinning. */ + if ((unsigned int)id < ARRAY_SIZE(ignore_read_file_id) && + ignore_read_file_id[id]) { +@@ -187,9 +177,25 @@ static int loadpin_read_file(struct file *file, enum kernel_read_file_id id, + return 0; + } + ++static int loadpin_read_file(struct file *file, enum kernel_read_file_id id, ++ bool contents) ++{ ++ /* ++ * LoadPin only cares about the _origin_ of a file, not its ++ * contents, so we can ignore the "are full contents available" ++ * argument here. ++ */ ++ return loadpin_check(file, id); ++} ++ + static int loadpin_load_data(enum kernel_load_data_id id, bool contents) + { +- return loadpin_read_file(NULL, (enum kernel_read_file_id) id, contents); ++ /* ++ * LoadPin only cares about the _origin_ of a file, not its ++ * contents, so a NULL file is passed, and we can ignore the ++ * state of "contents". ++ */ ++ return loadpin_check(NULL, (enum kernel_read_file_id) id); + } + + static struct security_hook_list loadpin_hooks[] __lsm_ro_after_init = { diff --git a/security/security.c b/security/security.c index 9ffa9e9c5c554..7b9f9d3fffe52 100644 --- a/security/security.c @@ -416148,6 +535726,19 @@ index 3a75d2a8f5178..658eab05599e6 100644 rc = count; /* * This mapping may have been cached, so clear the cache. +diff --git a/security/tomoyo/Makefile b/security/tomoyo/Makefile +index cca5a3012fee2..221eaadffb09c 100644 +--- a/security/tomoyo/Makefile ++++ b/security/tomoyo/Makefile +@@ -10,7 +10,7 @@ endef + quiet_cmd_policy = POLICY $@ + cmd_policy = ($(call do_policy,profile); $(call do_policy,exception_policy); $(call do_policy,domain_policy); $(call do_policy,manager); $(call do_policy,stat)) >$@ + +-$(obj)/builtin-policy.h: $(wildcard $(obj)/policy/*.conf $(src)/policy/*.conf.default) FORCE ++$(obj)/builtin-policy.h: $(wildcard $(obj)/policy/*.conf $(srctree)/$(src)/policy/*.conf.default) FORCE + $(call if_changed,policy) + + $(obj)/common.o: $(obj)/builtin-policy.h diff --git a/security/tomoyo/load_policy.c b/security/tomoyo/load_policy.c index 3445ae6fd4794..363b65be87ab7 100644 --- a/security/tomoyo/load_policy.c @@ -416245,6 +535836,38 @@ index 1da2e3722b126..6799b1122c9d8 100644 - } return false; } +diff --git a/sound/aoa/soundbus/i2sbus/core.c b/sound/aoa/soundbus/i2sbus/core.c +index faf6b03131ee4..51ed2f34b276d 100644 +--- a/sound/aoa/soundbus/i2sbus/core.c ++++ b/sound/aoa/soundbus/i2sbus/core.c +@@ -147,6 +147,7 @@ static int i2sbus_get_and_fixup_rsrc(struct device_node *np, int index, + return rc; + } + ++/* Returns 1 if added, 0 for otherwise; don't return a negative value! */ + /* FIXME: look at device node refcounting */ + static int i2sbus_add_dev(struct macio_dev *macio, + struct i2sbus_control *control, +@@ -213,7 +214,7 @@ static int i2sbus_add_dev(struct macio_dev *macio, + * either as the second one in that case is just a modem. */ + if (!ok) { + kfree(dev); +- return -ENODEV; ++ return 0; + } + + mutex_init(&dev->lock); +@@ -302,6 +303,10 @@ static int i2sbus_add_dev(struct macio_dev *macio, + + if (soundbus_add_one(&dev->sound)) { + printk(KERN_DEBUG "i2sbus: device registration error!\n"); ++ if (dev->sound.ofdev.dev.kobj.state_initialized) { ++ soundbus_dev_put(&dev->sound); ++ return 0; ++ } + goto err; + } + diff --git a/sound/core/Makefile b/sound/core/Makefile index d774792850f31..79e1407cd0de7 100644 --- a/sound/core/Makefile @@ -416260,7 +535883,7 @@ index d774792850f31..79e1407cd0de7 100644 snd-$(CONFIG_SND_VMASTER) += vmaster.o snd-$(CONFIG_SND_JACK) += ctljack.o jack.o diff --git a/sound/core/control.c b/sound/core/control.c -index a25c0d64d104f..f66fe4be30d35 100644 +index a25c0d64d104f..b83ec284d6114 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -127,6 +127,7 @@ static int snd_ctl_release(struct inode *inode, struct file *file) @@ -416280,7 +535903,72 @@ index a25c0d64d104f..f66fe4be30d35 100644 } read_unlock_irqrestore(&card->ctl_files_rwlock, flags); } -@@ -2002,7 +2003,7 @@ static int snd_ctl_fasync(int fd, struct file * file, int on) +@@ -1066,14 +1067,19 @@ static int snd_ctl_elem_read(struct snd_card *card, + const u32 pattern = 0xdeadbeef; + int ret; + ++ down_read(&card->controls_rwsem); + kctl = snd_ctl_find_id(card, &control->id); +- if (kctl == NULL) +- return -ENOENT; ++ if (kctl == NULL) { ++ ret = -ENOENT; ++ goto unlock; ++ } + + index_offset = snd_ctl_get_ioff(kctl, &control->id); + vd = &kctl->vd[index_offset]; +- if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_READ) || kctl->get == NULL) +- return -EPERM; ++ if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_READ) || kctl->get == NULL) { ++ ret = -EPERM; ++ goto unlock; ++ } + + snd_ctl_build_ioff(&control->id, kctl, index_offset); + +@@ -1083,7 +1089,7 @@ static int snd_ctl_elem_read(struct snd_card *card, + info.id = control->id; + ret = __snd_ctl_elem_info(card, kctl, &info, NULL); + if (ret < 0) +- return ret; ++ goto unlock; + #endif + + if (!snd_ctl_skip_validation(&info)) +@@ -1093,7 +1099,7 @@ static int snd_ctl_elem_read(struct snd_card *card, + ret = kctl->get(kctl, control); + snd_power_unref(card); + if (ret < 0) +- return ret; ++ goto unlock; + if (!snd_ctl_skip_validation(&info) && + sanity_check_elem_value(card, control, &info, pattern) < 0) { + dev_err(card->dev, +@@ -1101,8 +1107,11 @@ static int snd_ctl_elem_read(struct snd_card *card, + control->id.iface, control->id.device, + control->id.subdevice, control->id.name, + control->id.index); +- return -EINVAL; ++ ret = -EINVAL; ++ goto unlock; + } ++unlock: ++ up_read(&card->controls_rwsem); + return ret; + } + +@@ -1116,9 +1125,7 @@ static int snd_ctl_elem_read_user(struct snd_card *card, + if (IS_ERR(control)) + return PTR_ERR(control); + +- down_read(&card->controls_rwsem); + result = snd_ctl_elem_read(card, control); +- up_read(&card->controls_rwsem); + if (result < 0) + goto error; + +@@ -2002,7 +2009,7 @@ static int snd_ctl_fasync(int fd, struct file * file, int on) struct snd_ctl_file *ctl; ctl = file->private_data; @@ -416289,7 +535977,7 @@ index a25c0d64d104f..f66fe4be30d35 100644 } /* return the preferred subdevice number if already assigned; -@@ -2170,7 +2171,7 @@ static int snd_ctl_dev_disconnect(struct snd_device *device) +@@ -2170,7 +2177,7 @@ static int snd_ctl_dev_disconnect(struct snd_device *device) read_lock_irqsave(&card->ctl_files_rwlock, flags); list_for_each_entry(ctl, &card->ctl_files, list) { wake_up(&ctl->change_sleep); @@ -416319,6 +536007,25 @@ index 470dabc60aa0e..edff063e088d2 100644 return 0; } +diff --git a/sound/core/control_led.c b/sound/core/control_led.c +index a95332b2b90b0..3eb1c5af82ad1 100644 +--- a/sound/core/control_led.c ++++ b/sound/core/control_led.c +@@ -530,12 +530,11 @@ static ssize_t set_led_id(struct snd_ctl_led_card *led_card, const char *buf, si + bool attach) + { + char buf2[256], *s, *os; +- size_t len = max(sizeof(s) - 1, count); + struct snd_ctl_elem_id id; + int err; + +- strncpy(buf2, buf, len); +- buf2[len] = '\0'; ++ if (strscpy(buf2, buf, sizeof(buf2)) < 0) ++ return -E2BIG; + memset(&id, 0, sizeof(id)); + id.iface = SNDRV_CTL_ELEM_IFACE_MIXER; + s = buf2; diff --git a/sound/core/info.c b/sound/core/info.c index a451b24199c3e..9f6714e29bbc3 100644 --- a/sound/core/info.c @@ -417162,7 +536869,7 @@ index 4866aed97aacc..5588b6a1ee8bd 100644 /* signed or 1 byte data */ if (pcm_formats[(INT)format].signd == 1 || width <= 8) { diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c -index d233cb3b41d8b..f38c2e5e9a297 100644 +index d233cb3b41d8b..44e06ef4ff0b4 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -672,6 +672,30 @@ static int snd_pcm_hw_params_choose(struct snd_pcm_substream *pcm, @@ -417366,7 +537073,19 @@ index d233cb3b41d8b..f38c2e5e9a297 100644 up_read(&snd_pcm_link_rwsem); return res; } -@@ -1830,11 +1876,13 @@ static int snd_pcm_do_reset(struct snd_pcm_substream *substream, +@@ -1373,8 +1419,10 @@ static int snd_pcm_do_start(struct snd_pcm_substream *substream, + static void snd_pcm_undo_start(struct snd_pcm_substream *substream, + snd_pcm_state_t state) + { +- if (substream->runtime->trigger_master == substream) ++ if (substream->runtime->trigger_master == substream) { + substream->ops->trigger(substream, SNDRV_PCM_TRIGGER_STOP); ++ substream->runtime->stop_operating = true; ++ } + } + + static void snd_pcm_post_start(struct snd_pcm_substream *substream, +@@ -1830,11 +1878,13 @@ static int snd_pcm_do_reset(struct snd_pcm_substream *substream, int err = snd_pcm_ops_ioctl(substream, SNDRV_PCM_IOCTL1_RESET, NULL); if (err < 0) return err; @@ -417380,7 +537099,7 @@ index d233cb3b41d8b..f38c2e5e9a297 100644 return 0; } -@@ -1842,10 +1890,12 @@ static void snd_pcm_post_reset(struct snd_pcm_substream *substream, +@@ -1842,10 +1892,12 @@ static void snd_pcm_post_reset(struct snd_pcm_substream *substream, snd_pcm_state_t state) { struct snd_pcm_runtime *runtime = substream->runtime; @@ -417464,6 +537183,42 @@ index 2e9d695d336c9..2d707afa1ef1c 100644 snd_seq_device_load_drivers(); } } +diff --git a/sound/core/seq/seq_memory.c b/sound/core/seq/seq_memory.c +index b7aee23fc3876..47ef6bc30c0ee 100644 +--- a/sound/core/seq/seq_memory.c ++++ b/sound/core/seq/seq_memory.c +@@ -113,15 +113,19 @@ EXPORT_SYMBOL(snd_seq_dump_var_event); + * expand the variable length event to linear buffer space. + */ + +-static int seq_copy_in_kernel(char **bufptr, const void *src, int size) ++static int seq_copy_in_kernel(void *ptr, void *src, int size) + { ++ char **bufptr = ptr; ++ + memcpy(*bufptr, src, size); + *bufptr += size; + return 0; + } + +-static int seq_copy_in_user(char __user **bufptr, const void *src, int size) ++static int seq_copy_in_user(void *ptr, void *src, int size) + { ++ char __user **bufptr = ptr; ++ + if (copy_to_user(*bufptr, src, size)) + return -EFAULT; + *bufptr += size; +@@ -151,8 +155,7 @@ int snd_seq_expand_var_event(const struct snd_seq_event *event, int count, char + return newlen; + } + err = snd_seq_dump_var_event(event, +- in_kernel ? (snd_seq_dump_func_t)seq_copy_in_kernel : +- (snd_seq_dump_func_t)seq_copy_in_user, ++ in_kernel ? seq_copy_in_kernel : seq_copy_in_user, + &buf); + return err < 0 ? err : newlen; + } diff --git a/sound/core/seq/seq_queue.c b/sound/core/seq/seq_queue.c index d6c02dea976c8..bc933104c3eea 100644 --- a/sound/core/seq/seq_queue.c @@ -417700,6 +537455,20 @@ index 11235baaf6fa5..f212f233ea618 100644 platform_set_drvdata(dev, card); printk(KERN_INFO "Motu MidiTimePiece on parallel port irq: %d ioport: 0x%lx\n", irq, port); return 0; +diff --git a/sound/drivers/mts64.c b/sound/drivers/mts64.c +index d3bc9e8c407dc..f0d34cf70c3e0 100644 +--- a/sound/drivers/mts64.c ++++ b/sound/drivers/mts64.c +@@ -815,6 +815,9 @@ static void snd_mts64_interrupt(void *private) + u8 status, data; + struct snd_rawmidi_substream *substream; + ++ if (!mts) ++ return; ++ + spin_lock(&mts->lock); + ret = mts64_read(mts->pardev->port); + data = ret & 0x00ff; diff --git a/sound/drivers/opl3/opl3_midi.c b/sound/drivers/opl3/opl3_midi.c index e1b69c65c3c88..e2b7be67f0e30 100644 --- a/sound/drivers/opl3/opl3_midi.c @@ -417713,6 +537482,41 @@ index e1b69c65c3c88..e2b7be67f0e30 100644 opl3_reg = reg_side | (OPL3_REG_KEYON_BLOCK + voice_offset + 3); reg_val = vp->keyon_reg & ~OPL3_KEYON_BIT; +diff --git a/sound/firewire/dice/dice-stream.c b/sound/firewire/dice/dice-stream.c +index f99e00083141e..4c677c8546c71 100644 +--- a/sound/firewire/dice/dice-stream.c ++++ b/sound/firewire/dice/dice-stream.c +@@ -59,7 +59,7 @@ int snd_dice_stream_get_rate_mode(struct snd_dice *dice, unsigned int rate, + + static int select_clock(struct snd_dice *dice, unsigned int rate) + { +- __be32 reg; ++ __be32 reg, new; + u32 data; + int i; + int err; +@@ -83,15 +83,17 @@ static int select_clock(struct snd_dice *dice, unsigned int rate) + if (completion_done(&dice->clock_accepted)) + reinit_completion(&dice->clock_accepted); + +- reg = cpu_to_be32(data); ++ new = cpu_to_be32(data); + err = snd_dice_transaction_write_global(dice, GLOBAL_CLOCK_SELECT, +- ®, sizeof(reg)); ++ &new, sizeof(new)); + if (err < 0) + return err; + + if (wait_for_completion_timeout(&dice->clock_accepted, +- msecs_to_jiffies(NOTIFICATION_TIMEOUT_MS)) == 0) +- return -ETIMEDOUT; ++ msecs_to_jiffies(NOTIFICATION_TIMEOUT_MS)) == 0) { ++ if (reg != new) ++ return -ETIMEDOUT; ++ } + + return 0; + } diff --git a/sound/firewire/fcp.c b/sound/firewire/fcp.c index bbfbebf4affbc..df44dd5dc4b22 100644 --- a/sound/firewire/fcp.c @@ -417808,7 +537612,7 @@ index c13034f6c2ca5..d728e451a25c6 100644 /* This is an arbitrary number for convinience. */ diff --git a/sound/hda/ext/hdac_ext_stream.c b/sound/hda/ext/hdac_ext_stream.c -index 0c005d67fa891..37154ed43bd53 100644 +index 0c005d67fa891..c09652da43ffd 100644 --- a/sound/hda/ext/hdac_ext_stream.c +++ b/sound/hda/ext/hdac_ext_stream.c @@ -106,20 +106,14 @@ void snd_hdac_stream_free_all(struct hdac_bus *bus) @@ -417934,6 +537738,30 @@ index 0c005d67fa891..37154ed43bd53 100644 stream->link_locked = 0; stream->link_substream = NULL; spin_unlock_irq(&bus->reg_lock); +@@ -465,23 +475,6 @@ int snd_hdac_ext_stream_get_spbmaxfifo(struct hdac_bus *bus, + } + EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_get_spbmaxfifo); + +- +-/** +- * snd_hdac_ext_stop_streams - stop all stream if running +- * @bus: HD-audio core bus +- */ +-void snd_hdac_ext_stop_streams(struct hdac_bus *bus) +-{ +- struct hdac_stream *stream; +- +- if (bus->chip_init) { +- list_for_each_entry(stream, &bus->stream_list, list) +- snd_hdac_stream_stop(stream); +- snd_hdac_bus_stop_chip(bus); +- } +-} +-EXPORT_SYMBOL_GPL(snd_hdac_ext_stop_streams); +- + /** + * snd_hdac_ext_stream_drsm_enable - enable DMA resume for a stream + * @bus: HD-audio core bus diff --git a/sound/hda/hdac_device.c b/sound/hda/hdac_device.c index 3e9e9ac804f62..b7e5032b61c97 100644 --- a/sound/hda/hdac_device.c @@ -417947,10 +537775,44 @@ index 3e9e9ac804f62..b7e5032b61c97 100644 { 0x1af4, "QEMU" }, { 0x434d, "C-Media" }, diff --git a/sound/hda/hdac_stream.c b/sound/hda/hdac_stream.c -index 1eb8563db2dff..aa7955fdf68a0 100644 +index 1eb8563db2dff..eea22cf72aefd 100644 --- a/sound/hda/hdac_stream.c +++ b/sound/hda/hdac_stream.c -@@ -296,6 +296,7 @@ struct hdac_stream *snd_hdac_stream_assign(struct hdac_bus *bus, +@@ -142,6 +142,33 @@ void snd_hdac_stream_stop(struct hdac_stream *azx_dev) + } + EXPORT_SYMBOL_GPL(snd_hdac_stream_stop); + ++/** ++ * snd_hdac_stop_streams - stop all streams ++ * @bus: HD-audio core bus ++ */ ++void snd_hdac_stop_streams(struct hdac_bus *bus) ++{ ++ struct hdac_stream *stream; ++ ++ list_for_each_entry(stream, &bus->stream_list, list) ++ snd_hdac_stream_stop(stream); ++} ++EXPORT_SYMBOL_GPL(snd_hdac_stop_streams); ++ ++/** ++ * snd_hdac_stop_streams_and_chip - stop all streams and chip if running ++ * @bus: HD-audio core bus ++ */ ++void snd_hdac_stop_streams_and_chip(struct hdac_bus *bus) ++{ ++ ++ if (bus->chip_init) { ++ snd_hdac_stop_streams(bus); ++ snd_hdac_bus_stop_chip(bus); ++ } ++} ++EXPORT_SYMBOL_GPL(snd_hdac_stop_streams_and_chip); ++ + /** + * snd_hdac_stream_reset - reset a stream + * @azx_dev: HD-audio core stream to reset +@@ -296,6 +323,7 @@ struct hdac_stream *snd_hdac_stream_assign(struct hdac_bus *bus, int key = (substream->pcm->device << 16) | (substream->number << 2) | (substream->stream + 1); @@ -417958,7 +537820,7 @@ index 1eb8563db2dff..aa7955fdf68a0 100644 list_for_each_entry(azx_dev, &bus->stream_list, list) { if (azx_dev->direction != substream->stream) continue; -@@ -309,13 +310,12 @@ struct hdac_stream *snd_hdac_stream_assign(struct hdac_bus *bus, +@@ -309,13 +337,12 @@ struct hdac_stream *snd_hdac_stream_assign(struct hdac_bus *bus, res = azx_dev; } if (res) { @@ -417973,7 +537835,7 @@ index 1eb8563db2dff..aa7955fdf68a0 100644 return res; } EXPORT_SYMBOL_GPL(snd_hdac_stream_assign); -@@ -534,17 +534,11 @@ static void azx_timecounter_init(struct hdac_stream *azx_dev, +@@ -534,17 +561,11 @@ static void azx_timecounter_init(struct hdac_stream *azx_dev, cc->mask = CLOCKSOURCE_MASK(32); /* @@ -417995,6 +537857,22 @@ index 1eb8563db2dff..aa7955fdf68a0 100644 nsec = 0; /* audio time is elapsed time since trigger */ timecounter_init(tc, cc, nsec); +diff --git a/sound/hda/hdac_sysfs.c b/sound/hda/hdac_sysfs.c +index 0d7771fca9f06..6b8d156537490 100644 +--- a/sound/hda/hdac_sysfs.c ++++ b/sound/hda/hdac_sysfs.c +@@ -346,8 +346,10 @@ static int add_widget_node(struct kobject *parent, hda_nid_t nid, + return -ENOMEM; + kobject_init(kobj, &widget_ktype); + err = kobject_add(kobj, parent, "%02x", nid); +- if (err < 0) ++ if (err < 0) { ++ kobject_put(kobj); + return err; ++ } + err = sysfs_create_group(kobj, group); + if (err < 0) { + kobject_put(kobj); diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c index c9d0ba353463b..4208fa8a4db5b 100644 --- a/sound/hda/intel-dsp-config.c @@ -418337,7 +538215,7 @@ index 93bc9bef7641f..41ce125971777 100644 help Say Y here to include support for audio on Cyrix/NatSemi CS5530 chips. diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c -index 01f296d524ce6..cb60a07d39a8e 100644 +index 01f296d524ce6..ceead55f13ab1 100644 --- a/sound/pci/ac97/ac97_codec.c +++ b/sound/pci/ac97/ac97_codec.c @@ -938,8 +938,8 @@ static int snd_ac97_ad18xx_pcm_get_volume(struct snd_kcontrol *kcontrol, struct @@ -418351,6 +538229,14 @@ index 01f296d524ce6..cb60a07d39a8e 100644 mutex_unlock(&ac97->page_mutex); return 0; } +@@ -2009,6 +2009,7 @@ static int snd_ac97_dev_register(struct snd_device *device) + err = device_register(&ac97->dev); + if (err < 0) { + ac97_err(ac97, "Can't register ac97 bus\n"); ++ put_device(&ac97->dev); + ac97->dev.bus = NULL; + return err; + } diff --git a/sound/pci/ad1889.c b/sound/pci/ad1889.c index bba4dae8dcc70..50e30704bf6f9 100644 --- a/sound/pci/ad1889.c @@ -418465,6 +538351,19 @@ index 535eccd124bee..f33aeb692a112 100644 #ifdef CONFIG_PM_SLEEP static int snd_als4000_suspend(struct device *dev) { +diff --git a/sound/pci/asihpi/hpioctl.c b/sound/pci/asihpi/hpioctl.c +index bb31b7fe867d6..477a5b4b50bcb 100644 +--- a/sound/pci/asihpi/hpioctl.c ++++ b/sound/pci/asihpi/hpioctl.c +@@ -361,7 +361,7 @@ int asihpi_adapter_probe(struct pci_dev *pci_dev, + pci_dev->device, pci_dev->subsystem_vendor, + pci_dev->subsystem_device, pci_dev->devfn); + +- if (pci_enable_device(pci_dev) < 0) { ++ if (pcim_enable_device(pci_dev) < 0) { + dev_err(&pci_dev->dev, + "pci_enable_device failed, disabling device\n"); + return -EIO; diff --git a/sound/pci/atiixp.c b/sound/pci/atiixp.c index b8e035d5930d2..43d01f1847ed7 100644 --- a/sound/pci/atiixp.c @@ -418547,6 +538446,43 @@ index 342ef2a6655e3..eb234153691bc 100644 // pci_driver definition static struct pci_driver vortex_driver = { .name = KBUILD_MODNAME, +diff --git a/sound/pci/au88x0/au88x0.h b/sound/pci/au88x0/au88x0.h +index 0aa7af049b1b9..6cbb2bc4a0483 100644 +--- a/sound/pci/au88x0/au88x0.h ++++ b/sound/pci/au88x0/au88x0.h +@@ -141,7 +141,7 @@ struct snd_vortex { + #ifndef CHIP_AU8810 + stream_t dma_wt[NR_WT]; + wt_voice_t wt_voice[NR_WT]; /* WT register cache. */ +- char mixwt[(NR_WT / NR_WTPB) * 6]; /* WT mixin objects */ ++ s8 mixwt[(NR_WT / NR_WTPB) * 6]; /* WT mixin objects */ + #endif + + /* Global resources */ +@@ -235,8 +235,8 @@ static int vortex_alsafmt_aspfmt(snd_pcm_format_t alsafmt, vortex_t *v); + static void vortex_connect_default(vortex_t * vortex, int en); + static int vortex_adb_allocroute(vortex_t * vortex, int dma, int nr_ch, + int dir, int type, int subdev); +-static char vortex_adb_checkinout(vortex_t * vortex, int resmap[], int out, +- int restype); ++static int vortex_adb_checkinout(vortex_t * vortex, int resmap[], int out, ++ int restype); + #ifndef CHIP_AU8810 + static int vortex_wt_allocroute(vortex_t * vortex, int dma, int nr_ch); + static void vortex_wt_connect(vortex_t * vortex, int en); +diff --git a/sound/pci/au88x0/au88x0_core.c b/sound/pci/au88x0/au88x0_core.c +index 2ed5100b8caea..f217c02dfdfa4 100644 +--- a/sound/pci/au88x0/au88x0_core.c ++++ b/sound/pci/au88x0/au88x0_core.c +@@ -1998,7 +1998,7 @@ static const int resnum[VORTEX_RESOURCE_LAST] = + out: Mean checkout if != 0. Else mean Checkin resource. + restype: Indicates type of resource to be checked in or out. + */ +-static char ++static int + vortex_adb_checkinout(vortex_t * vortex, int resmap[], int out, int restype) + { + int i, qty = resnum[restype], resinuse = 0; diff --git a/sound/pci/aw2/aw2-alsa.c b/sound/pci/aw2/aw2-alsa.c index d56f126d6fdd9..29a4bcdec237a 100644 --- a/sound/pci/aw2/aw2-alsa.c @@ -419303,7 +539239,7 @@ index a25358a4807ab..db76e3ddba654 100644 struct mutex mutex; void (*power_hook)(struct hda_beep *beep, bool on); diff --git a/sound/pci/hda/hda_bind.c b/sound/pci/hda/hda_bind.c -index 1c8bffc3eec6e..7af2515735957 100644 +index 1c8bffc3eec6e..8e35009ec25cb 100644 --- a/sound/pci/hda/hda_bind.c +++ b/sound/pci/hda/hda_bind.c @@ -14,6 +14,7 @@ @@ -419314,7 +539250,15 @@ index 1c8bffc3eec6e..7af2515735957 100644 /* * find a matching codec id -@@ -156,6 +157,12 @@ static int hda_codec_driver_remove(struct device *dev) +@@ -143,6 +144,7 @@ static int hda_codec_driver_probe(struct device *dev) + + error: + snd_hda_codec_cleanup_for_unbind(codec); ++ codec->preset = NULL; + return err; + } + +@@ -156,9 +158,16 @@ static int hda_codec_driver_remove(struct device *dev) return codec->bus->core.ext_ops->hdev_detach(&codec->core); } @@ -419327,8 +539271,12 @@ index 1c8bffc3eec6e..7af2515735957 100644 if (codec->patch_ops.free) codec->patch_ops.free(codec); snd_hda_codec_cleanup_for_unbind(codec); ++ codec->preset = NULL; + module_put(dev->driver->owner); + return 0; + } diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c -index 0c4a337c9fc0d..f552785d301e0 100644 +index 0c4a337c9fc0d..19be60bb57810 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -703,20 +703,10 @@ get_hda_cvt_setup(struct hda_codec *codec, hda_nid_t nid) @@ -419413,7 +539361,15 @@ index 0c4a337c9fc0d..f552785d301e0 100644 cancel_delayed_work_sync(&codec->jackpoll_work); if (!codec->in_freeing) snd_hda_ctls_clear(codec); -@@ -792,6 +799,7 @@ void snd_hda_codec_cleanup_for_unbind(struct hda_codec *codec) +@@ -784,7 +791,6 @@ void snd_hda_codec_cleanup_for_unbind(struct hda_codec *codec) + snd_array_free(&codec->cvt_setups); + snd_array_free(&codec->spdif_out); + snd_array_free(&codec->verbs); +- codec->preset = NULL; + codec->follower_dig_outs = NULL; + codec->spdif_status_reset = 0; + snd_array_free(&codec->mixers); +@@ -792,6 +798,7 @@ void snd_hda_codec_cleanup_for_unbind(struct hda_codec *codec) remove_conn_list(codec); snd_hdac_regmap_exit(&codec->core); codec->configured = 0; @@ -419421,7 +539377,7 @@ index 0c4a337c9fc0d..f552785d301e0 100644 } EXPORT_SYMBOL_GPL(snd_hda_codec_cleanup_for_unbind); -@@ -958,6 +966,8 @@ int snd_hda_codec_device_new(struct hda_bus *bus, struct snd_card *card, +@@ -958,6 +965,8 @@ int snd_hda_codec_device_new(struct hda_bus *bus, struct snd_card *card, snd_array_init(&codec->verbs, sizeof(struct hda_verb *), 8); INIT_LIST_HEAD(&codec->conn_list); INIT_LIST_HEAD(&codec->pcm_list_head); @@ -419430,7 +539386,7 @@ index 0c4a337c9fc0d..f552785d301e0 100644 INIT_DELAYED_WORK(&codec->jackpoll_work, hda_jackpoll_work); codec->depop_delay = -1; -@@ -1727,8 +1737,11 @@ void snd_hda_ctls_clear(struct hda_codec *codec) +@@ -1727,8 +1736,11 @@ void snd_hda_ctls_clear(struct hda_codec *codec) { int i; struct hda_nid_item *items = codec->mixers.list; @@ -419442,7 +539398,7 @@ index 0c4a337c9fc0d..f552785d301e0 100644 snd_array_free(&codec->mixers); snd_array_free(&codec->nids); } -@@ -2987,6 +3000,10 @@ void snd_hda_codec_shutdown(struct hda_codec *codec) +@@ -2987,6 +2999,10 @@ void snd_hda_codec_shutdown(struct hda_codec *codec) { struct hda_pcm *cpcm; @@ -419454,7 +539410,7 @@ index 0c4a337c9fc0d..f552785d301e0 100644 snd_pcm_suspend_all(cpcm->pcm); diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c -index 930ae4002a818..75dcb14ff20ad 100644 +index 930ae4002a818..0ff286b7b66be 100644 --- a/sound/pci/hda/hda_controller.c +++ b/sound/pci/hda/hda_controller.c @@ -504,7 +504,6 @@ static int azx_get_time_info(struct snd_pcm_substream *substream, @@ -419465,6 +539421,18 @@ index 930ae4002a818..75dcb14ff20ad 100644 if (audio_tstamp_config->report_delay) nsec = azx_adjust_codec_delay(substream, nsec); +@@ -1034,10 +1033,8 @@ EXPORT_SYMBOL_GPL(azx_init_chip); + void azx_stop_all_streams(struct azx *chip) + { + struct hdac_bus *bus = azx_bus(chip); +- struct hdac_stream *s; + +- list_for_each_entry(s, &bus->stream_list, list) +- snd_hdac_stream_stop(s); ++ snd_hdac_stop_streams(bus); + } + EXPORT_SYMBOL_GPL(azx_stop_all_streams); + diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index 3bf5e34107038..fc114e5224806 100644 --- a/sound/pci/hda/hda_generic.c @@ -419525,7 +539493,7 @@ index c43bd0f0338ea..362ddcaea15b3 100644 /* values for add_stereo_mix_input flag */ diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c -index 4d22e7adeee8e..b5b71a2851190 100644 +index 4d22e7adeee8e..c8042eb703c34 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -335,7 +335,10 @@ enum { @@ -419688,6 +539656,16 @@ index 4d22e7adeee8e..b5b71a2851190 100644 /* Oaktrail */ { PCI_DEVICE(0x8086, 0x080a), .driver_data = AZX_DRIVER_SCH | AZX_DCAPS_INTEL_PCH_BASE }, +@@ -2675,6 +2687,9 @@ static const struct pci_device_id azx_ids[] = { + { PCI_DEVICE(0x1002, 0xab28), + .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS | + AZX_DCAPS_PM_RUNTIME }, ++ { PCI_DEVICE(0x1002, 0xab30), ++ .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS | ++ AZX_DCAPS_PM_RUNTIME }, + { PCI_DEVICE(0x1002, 0xab38), + .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS | + AZX_DCAPS_PM_RUNTIME }, diff --git a/sound/pci/hda/hda_jack.c b/sound/pci/hda/hda_jack.c index f29975e3e98df..7d7786df60ea7 100644 --- a/sound/pci/hda/hda_jack.c @@ -419866,6 +539844,18 @@ index ea700395bef40..f0e556f2ccf69 100644 hda->clocks[hda->nclocks++].id = "hda"; hda->clocks[hda->nclocks++].id = "hda2hdmi"; +diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c +index 208933792787d..801dd8d44953b 100644 +--- a/sound/pci/hda/patch_ca0132.c ++++ b/sound/pci/hda/patch_ca0132.c +@@ -1306,6 +1306,7 @@ static const struct snd_pci_quirk ca0132_quirks[] = { + SND_PCI_QUIRK(0x1458, 0xA026, "Gigabyte G1.Sniper Z97", QUIRK_R3DI), + SND_PCI_QUIRK(0x1458, 0xA036, "Gigabyte GA-Z170X-Gaming 7", QUIRK_R3DI), + SND_PCI_QUIRK(0x3842, 0x1038, "EVGA X99 Classified", QUIRK_R3DI), ++ SND_PCI_QUIRK(0x3842, 0x1055, "EVGA Z390 DARK", QUIRK_R3DI), + SND_PCI_QUIRK(0x1102, 0x0013, "Recon3D", QUIRK_R3D), + SND_PCI_QUIRK(0x1102, 0x0018, "Recon3D", QUIRK_R3D), + SND_PCI_QUIRK(0x1102, 0x0051, "Sound Blaster AE-5", QUIRK_AE5), diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c index 678fbcaf2a3bc..6807b4708a176 100644 --- a/sound/pci/hda/patch_cirrus.c @@ -419879,7 +539869,7 @@ index 678fbcaf2a3bc..6807b4708a176 100644 SND_PCI_QUIRK(0x106b, 0x2000, "iMac 12,2", CS420X_IMAC27_122), SND_PCI_QUIRK(0x106b, 0x2800, "MacBookPro 10,1", CS420X_MBP101), diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c -index 0515137a75b0f..2bc9274e0960b 100644 +index 0515137a75b0f..9d6464ded63e5 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -222,6 +222,7 @@ enum { @@ -419950,6 +539940,14 @@ index 0515137a75b0f..2bc9274e0960b 100644 if (err < 0) goto error; +@@ -1107,6 +1124,7 @@ static const struct hda_device_id snd_hda_id_conexant[] = { + HDA_CODEC_ENTRY(0x14f11f86, "CX8070", patch_conexant_auto), + HDA_CODEC_ENTRY(0x14f12008, "CX8200", patch_conexant_auto), + HDA_CODEC_ENTRY(0x14f120d0, "CX11970", patch_conexant_auto), ++ HDA_CODEC_ENTRY(0x14f120d1, "SN6180", patch_conexant_auto), + HDA_CODEC_ENTRY(0x14f15045, "CX20549 (Venice)", patch_conexant_auto), + HDA_CODEC_ENTRY(0x14f15047, "CX20551 (Waikiki)", patch_conexant_auto), + HDA_CODEC_ENTRY(0x14f15051, "CX20561 (Hermosa)", patch_conexant_auto), diff --git a/sound/pci/hda/patch_cs8409-tables.c b/sound/pci/hda/patch_cs8409-tables.c index 0fb0a428428b4..df0b4522babf7 100644 --- a/sound/pci/hda/patch_cs8409-tables.c @@ -420036,7 +540034,7 @@ index ade2b838590cf..d0b725c7285b6 100644 unsigned int paged:1; unsigned int last_page; diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c -index 65d2c55399195..ba1289abd45f8 100644 +index 65d2c55399195..8ed66c416c0e7 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -162,6 +162,8 @@ struct hdmi_spec { @@ -420117,7 +540115,16 @@ index 65d2c55399195..ba1289abd45f8 100644 goto out; present = snd_hda_jack_pin_sense(codec, pin_nid, dev_id); -@@ -2257,7 +2275,9 @@ static int generic_hdmi_build_pcms(struct hda_codec *codec) +@@ -1944,6 +1962,8 @@ static int hdmi_add_cvt(struct hda_codec *codec, hda_nid_t cvt_nid) + static const struct snd_pci_quirk force_connect_list[] = { + SND_PCI_QUIRK(0x103c, 0x870f, "HP", 1), + SND_PCI_QUIRK(0x103c, 0x871a, "HP", 1), ++ SND_PCI_QUIRK(0x103c, 0x8711, "HP", 1), ++ SND_PCI_QUIRK(0x103c, 0x8715, "HP", 1), + SND_PCI_QUIRK(0x1462, 0xec94, "MS-7C94", 1), + SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", 1), + {} +@@ -2257,7 +2277,9 @@ static int generic_hdmi_build_pcms(struct hda_codec *codec) * dev_num is the device entry number in a pin */ @@ -420128,7 +540135,7 @@ index 65d2c55399195..ba1289abd45f8 100644 pcm_num = spec->num_nids; else pcm_num = spec->num_nids + spec->dev_num - 1; -@@ -2665,9 +2685,6 @@ static void generic_acomp_pin_eld_notify(void *audio_ptr, int port, int dev_id) +@@ -2665,9 +2687,6 @@ static void generic_acomp_pin_eld_notify(void *audio_ptr, int port, int dev_id) */ if (codec->core.dev.power.power_state.event == PM_EVENT_SUSPEND) return; @@ -420138,7 +540145,7 @@ index 65d2c55399195..ba1289abd45f8 100644 check_presence_and_report(codec, pin_nid, dev_id); } -@@ -2851,9 +2868,6 @@ static void intel_pin_eld_notify(void *audio_ptr, int port, int pipe) +@@ -2851,9 +2870,6 @@ static void intel_pin_eld_notify(void *audio_ptr, int port, int pipe) */ if (codec->core.dev.power.power_state.event == PM_EVENT_SUSPEND) return; @@ -420148,7 +540155,7 @@ index 65d2c55399195..ba1289abd45f8 100644 snd_hdac_i915_set_bclk(&codec->bus->core); check_presence_and_report(codec, pin_nid, dev_id); -@@ -2947,7 +2961,8 @@ static int parse_intel_hdmi(struct hda_codec *codec) +@@ -2947,7 +2963,8 @@ static int parse_intel_hdmi(struct hda_codec *codec) /* Intel Haswell and onwards; audio component with eld notifier */ static int intel_hsw_common_init(struct hda_codec *codec, hda_nid_t vendor_nid, @@ -420158,7 +540165,7 @@ index 65d2c55399195..ba1289abd45f8 100644 { struct hdmi_spec *spec; int err; -@@ -2980,7 +2995,7 @@ static int intel_hsw_common_init(struct hda_codec *codec, hda_nid_t vendor_nid, +@@ -2980,7 +2997,7 @@ static int intel_hsw_common_init(struct hda_codec *codec, hda_nid_t vendor_nid, * Enable silent stream feature, if it is enabled via * module param or Kconfig option */ @@ -420167,7 +540174,7 @@ index 65d2c55399195..ba1289abd45f8 100644 spec->send_silent_stream = true; return parse_intel_hdmi(codec); -@@ -2988,12 +3003,18 @@ static int intel_hsw_common_init(struct hda_codec *codec, hda_nid_t vendor_nid, +@@ -2988,12 +3005,18 @@ static int intel_hsw_common_init(struct hda_codec *codec, hda_nid_t vendor_nid, static int patch_i915_hsw_hdmi(struct hda_codec *codec) { @@ -420188,7 +540195,7 @@ index 65d2c55399195..ba1289abd45f8 100644 } static int patch_i915_icl_hdmi(struct hda_codec *codec) -@@ -3004,7 +3025,8 @@ static int patch_i915_icl_hdmi(struct hda_codec *codec) +@@ -3004,7 +3027,8 @@ static int patch_i915_icl_hdmi(struct hda_codec *codec) */ static const int map[] = {0x0, 0x4, 0x6, 0x8, 0xa, 0xb}; @@ -420198,7 +540205,7 @@ index 65d2c55399195..ba1289abd45f8 100644 } static int patch_i915_tgl_hdmi(struct hda_codec *codec) -@@ -3016,7 +3038,8 @@ static int patch_i915_tgl_hdmi(struct hda_codec *codec) +@@ -3016,7 +3040,8 @@ static int patch_i915_tgl_hdmi(struct hda_codec *codec) static const int map[] = {0x4, 0x6, 0x8, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf}; int ret; @@ -420208,7 +540215,7 @@ index 65d2c55399195..ba1289abd45f8 100644 if (!ret) { struct hdmi_spec *spec = codec->spec; -@@ -3522,6 +3545,7 @@ static int patch_nvhdmi_2ch(struct hda_codec *codec) +@@ -3522,6 +3547,7 @@ static int patch_nvhdmi_2ch(struct hda_codec *codec) spec->pcm_playback.rates = SUPPORTED_RATES; spec->pcm_playback.maxbps = SUPPORTED_MAXBPS; spec->pcm_playback.formats = SUPPORTED_FORMATS; @@ -420216,7 +540223,7 @@ index 65d2c55399195..ba1289abd45f8 100644 return 0; } -@@ -3661,6 +3685,7 @@ static int patch_nvhdmi(struct hda_codec *codec) +@@ -3661,6 +3687,7 @@ static int patch_nvhdmi(struct hda_codec *codec) spec->chmap.ops.chmap_cea_alloc_validate_get_type = nvhdmi_chmap_cea_alloc_validate_get_type; spec->chmap.ops.chmap_validate = nvhdmi_chmap_validate; @@ -420224,7 +540231,7 @@ index 65d2c55399195..ba1289abd45f8 100644 codec->link_down_at_suspend = 1; -@@ -3684,6 +3709,7 @@ static int patch_nvhdmi_legacy(struct hda_codec *codec) +@@ -3684,6 +3711,7 @@ static int patch_nvhdmi_legacy(struct hda_codec *codec) spec->chmap.ops.chmap_cea_alloc_validate_get_type = nvhdmi_chmap_cea_alloc_validate_get_type; spec->chmap.ops.chmap_validate = nvhdmi_chmap_validate; @@ -420232,7 +540239,7 @@ index 65d2c55399195..ba1289abd45f8 100644 codec->link_down_at_suspend = 1; -@@ -3851,11 +3877,13 @@ static int patch_tegra_hdmi(struct hda_codec *codec) +@@ -3851,11 +3879,13 @@ static int patch_tegra_hdmi(struct hda_codec *codec) if (err) return err; @@ -420246,7 +540253,7 @@ index 65d2c55399195..ba1289abd45f8 100644 return 0; } -@@ -4380,10 +4408,11 @@ HDA_CODEC_ENTRY(0x8086280f, "Icelake HDMI", patch_i915_icl_hdmi), +@@ -4380,10 +4410,11 @@ HDA_CODEC_ENTRY(0x8086280f, "Icelake HDMI", patch_i915_icl_hdmi), HDA_CODEC_ENTRY(0x80862812, "Tigerlake HDMI", patch_i915_tgl_hdmi), HDA_CODEC_ENTRY(0x80862814, "DG1 HDMI", patch_i915_tgl_hdmi), HDA_CODEC_ENTRY(0x80862815, "Alderlake HDMI", patch_i915_tgl_hdmi), @@ -420260,7 +540267,7 @@ index 65d2c55399195..ba1289abd45f8 100644 HDA_CODEC_ENTRY(0x80862882, "Valleyview2 HDMI", patch_i915_byt_hdmi), HDA_CODEC_ENTRY(0x80862883, "Braswell HDMI", patch_i915_byt_hdmi), diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c -index 965b096f416f6..0de1dcd3b9465 100644 +index 965b096f416f6..dddb6f842ff29 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -97,6 +97,7 @@ struct alc_spec { @@ -420397,6 +540404,15 @@ index 965b096f416f6..0de1dcd3b9465 100644 case 0x10ec0283: case 0x10ec0286: case 0x10ec0288: +@@ -777,7 +826,7 @@ do_sku: + alc_setup_gpio(codec, 0x02); + break; + case 7: +- alc_setup_gpio(codec, 0x03); ++ alc_setup_gpio(codec, 0x04); + break; + case 5: + default: @@ -885,6 +934,9 @@ static int alc_init(struct hda_codec *codec) return 0; } @@ -420579,7 +540595,38 @@ index 965b096f416f6..0de1dcd3b9465 100644 alc_write_coef_idx(codec, 0x48, 0xd011); alc_update_coef_idx(codec, 0x49, 0x007f, 0x0045); break; -@@ -3532,8 +3628,8 @@ static void alc256_shutup(struct hda_codec *codec) +@@ -3462,6 +3558,15 @@ static void alc256_init(struct hda_codec *codec) + hda_nid_t hp_pin = alc_get_hp_pin(spec); + bool hp_pin_sense; + ++ if (spec->ultra_low_power) { ++ alc_update_coef_idx(codec, 0x03, 1<<1, 1<<1); ++ alc_update_coef_idx(codec, 0x08, 3<<2, 3<<2); ++ alc_update_coef_idx(codec, 0x08, 7<<4, 0); ++ alc_update_coef_idx(codec, 0x3b, 1<<15, 0); ++ alc_update_coef_idx(codec, 0x0e, 7<<6, 7<<6); ++ msleep(30); ++ } ++ + if (!hp_pin) + hp_pin = 0x21; + +@@ -3473,14 +3578,6 @@ static void alc256_init(struct hda_codec *codec) + msleep(2); + + alc_update_coefex_idx(codec, 0x57, 0x04, 0x0007, 0x1); /* Low power */ +- if (spec->ultra_low_power) { +- alc_update_coef_idx(codec, 0x03, 1<<1, 1<<1); +- alc_update_coef_idx(codec, 0x08, 3<<2, 3<<2); +- alc_update_coef_idx(codec, 0x08, 7<<4, 0); +- alc_update_coef_idx(codec, 0x3b, 1<<15, 0); +- alc_update_coef_idx(codec, 0x0e, 7<<6, 7<<6); +- msleep(30); +- } + + snd_hda_codec_write(codec, hp_pin, 0, + AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); +@@ -3532,8 +3629,8 @@ static void alc256_shutup(struct hda_codec *codec) /* If disable 3k pulldown control for alc257, the Mic detection will not work correctly * when booting with headset plugged. So skip setting it for the codec alc257 */ @@ -420590,17 +540637,37 @@ index 965b096f416f6..0de1dcd3b9465 100644 alc_update_coef_idx(codec, 0x46, 0, 3 << 12); if (!spec->no_shutup_pins) -@@ -3611,7 +3707,8 @@ static void alc225_init(struct hda_codec *codec) +@@ -3611,7 +3708,15 @@ static void alc225_init(struct hda_codec *codec) hda_nid_t hp_pin = alc_get_hp_pin(spec); bool hp1_pin_sense, hp2_pin_sense; - if (spec->codec_variant != ALC269_TYPE_ALC287) ++ if (spec->ultra_low_power) { ++ alc_update_coef_idx(codec, 0x08, 0x0f << 2, 3<<2); ++ alc_update_coef_idx(codec, 0x0e, 7<<6, 7<<6); ++ alc_update_coef_idx(codec, 0x33, 1<<11, 0); ++ msleep(30); ++ } ++ + if (spec->codec_variant != ALC269_TYPE_ALC287 && + spec->codec_variant != ALC269_TYPE_ALC245) /* required only at boot or S3 and S4 resume time */ if (!spec->done_hp_init || is_s3_resume(codec) || -@@ -4355,6 +4452,16 @@ static void alc287_fixup_hp_gpio_led(struct hda_codec *codec, +@@ -3631,12 +3736,6 @@ static void alc225_init(struct hda_codec *codec) + msleep(2); + + alc_update_coefex_idx(codec, 0x57, 0x04, 0x0007, 0x1); /* Low power */ +- if (spec->ultra_low_power) { +- alc_update_coef_idx(codec, 0x08, 0x0f << 2, 3<<2); +- alc_update_coef_idx(codec, 0x0e, 7<<6, 7<<6); +- alc_update_coef_idx(codec, 0x33, 1<<11, 0); +- msleep(30); +- } + + if (hp1_pin_sense || spec->ultra_low_power) + snd_hda_codec_write(codec, hp_pin, 0, +@@ -4355,6 +4454,16 @@ static void alc287_fixup_hp_gpio_led(struct hda_codec *codec, alc_fixup_hp_gpio_led(codec, action, 0x10, 0); } @@ -420617,7 +540684,38 @@ index 965b096f416f6..0de1dcd3b9465 100644 /* turn on/off mic-mute LED per capture hook via VREF change */ static int vref_micmute_led_set(struct led_classdev *led_cdev, enum led_brightness brightness) -@@ -4577,6 +4684,48 @@ static void alc236_fixup_hp_mute_led_micmute_vref(struct hda_codec *codec, +@@ -4530,6 +4639,16 @@ static void alc285_fixup_hp_coef_micmute_led(struct hda_codec *codec, + } + } + ++static void alc285_fixup_hp_gpio_micmute_led(struct hda_codec *codec, ++ const struct hda_fixup *fix, int action) ++{ ++ struct alc_spec *spec = codec->spec; ++ ++ if (action == HDA_FIXUP_ACT_PRE_PROBE) ++ spec->micmute_led_polarity = 1; ++ alc_fixup_hp_gpio_led(codec, action, 0, 0x04); ++} ++ + static void alc236_fixup_hp_coef_micmute_led(struct hda_codec *codec, + const struct hda_fixup *fix, int action) + { +@@ -4551,6 +4670,13 @@ static void alc285_fixup_hp_mute_led(struct hda_codec *codec, + alc285_fixup_hp_coef_micmute_led(codec, fix, action); + } + ++static void alc285_fixup_hp_spectre_x360_mute_led(struct hda_codec *codec, ++ const struct hda_fixup *fix, int action) ++{ ++ alc285_fixup_hp_mute_led_coefbit(codec, fix, action); ++ alc285_fixup_hp_gpio_micmute_led(codec, fix, action); ++} ++ + static void alc236_fixup_hp_mute_led(struct hda_codec *codec, + const struct hda_fixup *fix, int action) + { +@@ -4577,6 +4703,48 @@ static void alc236_fixup_hp_mute_led_micmute_vref(struct hda_codec *codec, alc236_fixup_hp_micmute_led_vref(codec, fix, action); } @@ -420666,7 +540764,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 #if IS_REACHABLE(CONFIG_INPUT) static void gpio2_mic_hotkey_event(struct hda_codec *codec, struct hda_jack_callback *event) -@@ -4803,6 +4952,7 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec) +@@ -4803,6 +4971,7 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec) case 0x10ec0230: case 0x10ec0236: case 0x10ec0256: @@ -420674,7 +540772,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 alc_process_coef_fw(codec, coef0256); break; case 0x10ec0234: -@@ -4918,6 +5068,7 @@ static void alc_headset_mode_mic_in(struct hda_codec *codec, hda_nid_t hp_pin, +@@ -4918,6 +5087,7 @@ static void alc_headset_mode_mic_in(struct hda_codec *codec, hda_nid_t hp_pin, case 0x10ec0230: case 0x10ec0236: case 0x10ec0256: @@ -420682,7 +540780,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 alc_write_coef_idx(codec, 0x45, 0xc489); snd_hda_set_pin_ctl_cache(codec, hp_pin, 0); alc_process_coef_fw(codec, coef0256); -@@ -5068,6 +5219,7 @@ static void alc_headset_mode_default(struct hda_codec *codec) +@@ -5068,6 +5238,7 @@ static void alc_headset_mode_default(struct hda_codec *codec) case 0x10ec0230: case 0x10ec0236: case 0x10ec0256: @@ -420690,7 +540788,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 alc_write_coef_idx(codec, 0x1b, 0x0e4b); alc_write_coef_idx(codec, 0x45, 0xc089); msleep(50); -@@ -5167,6 +5319,7 @@ static void alc_headset_mode_ctia(struct hda_codec *codec) +@@ -5167,6 +5338,7 @@ static void alc_headset_mode_ctia(struct hda_codec *codec) case 0x10ec0230: case 0x10ec0236: case 0x10ec0256: @@ -420698,7 +540796,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 alc_process_coef_fw(codec, coef0256); break; case 0x10ec0234: -@@ -5281,6 +5434,7 @@ static void alc_headset_mode_omtp(struct hda_codec *codec) +@@ -5281,6 +5453,7 @@ static void alc_headset_mode_omtp(struct hda_codec *codec) case 0x10ec0230: case 0x10ec0236: case 0x10ec0256: @@ -420706,7 +540804,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 alc_process_coef_fw(codec, coef0256); break; case 0x10ec0234: -@@ -5382,6 +5536,7 @@ static void alc_determine_headset_type(struct hda_codec *codec) +@@ -5382,6 +5555,7 @@ static void alc_determine_headset_type(struct hda_codec *codec) case 0x10ec0230: case 0x10ec0236: case 0x10ec0256: @@ -420714,7 +540812,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 alc_write_coef_idx(codec, 0x1b, 0x0e4b); alc_write_coef_idx(codec, 0x06, 0x6104); alc_write_coefex_idx(codec, 0x57, 0x3, 0x09a3); -@@ -5676,6 +5831,7 @@ static void alc255_set_default_jack_type(struct hda_codec *codec) +@@ -5676,6 +5850,7 @@ static void alc255_set_default_jack_type(struct hda_codec *codec) case 0x10ec0230: case 0x10ec0236: case 0x10ec0256: @@ -420722,7 +540820,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 alc_process_coef_fw(codec, alc256fw); break; } -@@ -6278,6 +6434,7 @@ static void alc_combo_jack_hp_jd_restart(struct hda_codec *codec) +@@ -6278,6 +6453,7 @@ static void alc_combo_jack_hp_jd_restart(struct hda_codec *codec) case 0x10ec0236: case 0x10ec0255: case 0x10ec0256: @@ -420730,7 +540828,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 alc_update_coef_idx(codec, 0x1b, 0x8000, 1 << 15); /* Reset HP JD */ alc_update_coef_idx(codec, 0x1b, 0x8000, 0 << 15); break; -@@ -6492,22 +6649,64 @@ static void alc287_fixup_legion_15imhg05_speakers(struct hda_codec *codec, +@@ -6492,22 +6668,92 @@ static void alc287_fixup_legion_15imhg05_speakers(struct hda_codec *codec, /* for alc285_fixup_ideapad_s740_coef() */ #include "ideapad_s740_helper.c" @@ -420747,8 +540845,19 @@ index 965b096f416f6..0de1dcd3b9465 100644 +static void alc256_fixup_set_coef_defaults(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) -+{ -+ /* + { + /* +- * A certain other OS sets these coeffs to different values. On at least one TongFang +- * barebone these settings might survive even a cold reboot. So to restore a clean slate the +- * values are explicitly reset to default here. Without this, the external microphone is +- * always in a plugged-in state, while the internal microphone is always in an unplugged +- * state, breaking the ability to use the internal microphone. +- */ +- alc_write_coef_idx(codec, 0x24, 0x0000); +- alc_write_coef_idx(codec, 0x26, 0x0000); +- alc_write_coef_idx(codec, 0x29, 0x3000); +- alc_write_coef_idx(codec, 0x37, 0xfe05); +- alc_write_coef_idx(codec, 0x45, 0x5089); + * A certain other OS sets these coeffs to different values. On at least + * one TongFang barebone these settings might survive even a cold + * reboot. So to restore a clean slate the values are explicitly reset @@ -420769,19 +540878,8 @@ index 965b096f416f6..0de1dcd3b9465 100644 +static void alc233_fixup_no_audio_jack(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) - { - /* -- * A certain other OS sets these coeffs to different values. On at least one TongFang -- * barebone these settings might survive even a cold reboot. So to restore a clean slate the -- * values are explicitly reset to default here. Without this, the external microphone is -- * always in a plugged-in state, while the internal microphone is always in an unplugged -- * state, breaking the ability to use the internal microphone. -- */ -- alc_write_coef_idx(codec, 0x24, 0x0000); -- alc_write_coef_idx(codec, 0x26, 0x0000); -- alc_write_coef_idx(codec, 0x29, 0x3000); -- alc_write_coef_idx(codec, 0x37, 0xfe05); -- alc_write_coef_idx(codec, 0x45, 0x5089); ++{ ++ /* + * The audio jack input and output is not detected on the ASRock NUC Box + * 1100 series when cold booting without this fix. Warm rebooting from a + * certain other OS makes the audio functional, as COEF settings are @@ -420805,11 +540903,39 @@ index 965b096f416f6..0de1dcd3b9465 100644 + snd_hda_codec_set_pincfg(codec, 0x19, 0x04a11120); + } else { + snd_hda_codec_set_pincfg(codec, 0x1a, 0x04a1113c); ++ } ++} ++ ++static void alc295_fixup_dell_inspiron_top_speakers(struct hda_codec *codec, ++ const struct hda_fixup *fix, int action) ++{ ++ static const struct hda_pintbl pincfgs[] = { ++ { 0x14, 0x90170151 }, ++ { 0x17, 0x90170150 }, ++ { } ++ }; ++ static const hda_nid_t conn[] = { 0x02, 0x03 }; ++ static const hda_nid_t preferred_pairs[] = { ++ 0x14, 0x02, ++ 0x17, 0x03, ++ 0x21, 0x02, ++ 0 ++ }; ++ struct alc_spec *spec = codec->spec; ++ ++ alc_fixup_no_shutup(codec, fix, action); ++ ++ switch (action) { ++ case HDA_FIXUP_ACT_PRE_PROBE: ++ snd_hda_apply_pincfgs(codec, pincfgs); ++ snd_hda_override_conn_list(codec, 0x17, ARRAY_SIZE(conn), conn); ++ spec->gen.preferred_dacs = preferred_pairs; ++ break; + } } enum { -@@ -6564,6 +6763,7 @@ enum { +@@ -6564,6 +6810,7 @@ enum { ALC269_FIXUP_LIMIT_INT_MIC_BOOST, ALC269VB_FIXUP_ASUS_ZENBOOK, ALC269VB_FIXUP_ASUS_ZENBOOK_UX31A, @@ -420817,7 +540943,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 ALC269_FIXUP_LIMIT_INT_MIC_BOOST_MUTE_LED, ALC269VB_FIXUP_ORDISSIMO_EVE2, ALC283_FIXUP_CHROME_BOOK, -@@ -6623,6 +6823,7 @@ enum { +@@ -6623,6 +6870,7 @@ enum { ALC298_FIXUP_LENOVO_SPK_VOLUME, ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER, ALC269_FIXUP_ATIV_BOOK_8, @@ -420825,7 +540951,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 ALC221_FIXUP_HP_MIC_NO_PRESENCE, ALC256_FIXUP_ASUS_HEADSET_MODE, ALC256_FIXUP_ASUS_MIC, -@@ -6678,12 +6879,16 @@ enum { +@@ -6678,12 +6926,17 @@ enum { ALC294_FIXUP_ASUS_GU502_HP, ALC294_FIXUP_ASUS_GU502_PINS, ALC294_FIXUP_ASUS_GU502_VERBS, @@ -420833,6 +540959,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 + ALC285_FIXUP_ASUS_G533Z_PINS, ALC285_FIXUP_HP_GPIO_LED, ALC285_FIXUP_HP_MUTE_LED, ++ ALC285_FIXUP_HP_SPECTRE_X360_MUTE_LED, ALC236_FIXUP_HP_GPIO_LED, ALC236_FIXUP_HP_MUTE_LED, ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF, @@ -420842,7 +540969,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 ALC295_FIXUP_ASUS_MIC_NO_PRESENCE, ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS, ALC269VC_FIXUP_ACER_HEADSET_MIC, -@@ -6709,6 +6914,7 @@ enum { +@@ -6709,6 +6962,7 @@ enum { ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK, ALC287_FIXUP_HP_GPIO_LED, ALC256_FIXUP_HP_HEADSET_MIC, @@ -420850,7 +540977,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 ALC236_FIXUP_DELL_AIO_HEADSET_MIC, ALC282_FIXUP_ACER_DISABLE_LINEOUT, ALC255_FIXUP_ACER_LIMIT_INT_MIC_BOOST, -@@ -6725,10 +6931,34 @@ enum { +@@ -6725,10 +6979,36 @@ enum { ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS, ALC287_FIXUP_LEGION_15IMHG05_AUTOMUTE, ALC287_FIXUP_YOGA7_14ITL_SPEAKERS, @@ -420864,6 +540991,8 @@ index 965b096f416f6..0de1dcd3b9465 100644 + ALC285_FIXUP_LEGION_Y9000X_SPEAKERS, + ALC285_FIXUP_LEGION_Y9000X_AUTOMUTE, + ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED, ++ ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS, ++ ALC236_FIXUP_DELL_DUAL_CODECS, }; +/* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -420886,7 +541015,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 static const struct hda_fixup alc269_fixups[] = { [ALC269_FIXUP_GPIO2] = { .type = HDA_FIXUP_FUNC, -@@ -7114,6 +7344,15 @@ static const struct hda_fixup alc269_fixups[] = { +@@ -7114,6 +7394,15 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269VB_FIXUP_ASUS_ZENBOOK, }, @@ -420902,7 +541031,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 [ALC269_FIXUP_LIMIT_INT_MIC_BOOST_MUTE_LED] = { .type = HDA_FIXUP_FUNC, .v.func = alc269_fixup_limit_int_mic_boost, -@@ -7333,6 +7572,8 @@ static const struct hda_fixup alc269_fixups[] = { +@@ -7333,6 +7622,8 @@ static const struct hda_fixup alc269_fixups[] = { [ALC245_FIXUP_HP_X360_AMP] = { .type = HDA_FIXUP_FUNC, .v.func = alc245_fixup_hp_x360_amp, @@ -420911,7 +541040,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 }, [ALC288_FIXUP_DELL_HEADSET_MODE] = { .type = HDA_FIXUP_FUNC, -@@ -7523,6 +7764,16 @@ static const struct hda_fixup alc269_fixups[] = { +@@ -7523,6 +7814,16 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_NO_SHUTUP }, @@ -420928,7 +541057,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 [ALC221_FIXUP_HP_MIC_NO_PRESENCE] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { -@@ -7957,6 +8208,26 @@ static const struct hda_fixup alc269_fixups[] = { +@@ -7957,6 +8258,26 @@ static const struct hda_fixup alc269_fixups[] = { [ALC294_FIXUP_ASUS_GU502_HP] = { .type = HDA_FIXUP_FUNC, .v.func = alc294_fixup_gu502_hp, @@ -420955,7 +541084,18 @@ index 965b096f416f6..0de1dcd3b9465 100644 }, [ALC294_FIXUP_ASUS_COEF_1B] = { .type = HDA_FIXUP_VERBS, -@@ -7991,6 +8262,12 @@ static const struct hda_fixup alc269_fixups[] = { +@@ -7979,6 +8300,10 @@ static const struct hda_fixup alc269_fixups[] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc285_fixup_hp_mute_led, + }, ++ [ALC285_FIXUP_HP_SPECTRE_X360_MUTE_LED] = { ++ .type = HDA_FIXUP_FUNC, ++ .v.func = alc285_fixup_hp_spectre_x360_mute_led, ++ }, + [ALC236_FIXUP_HP_GPIO_LED] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc236_fixup_hp_gpio_led, +@@ -7991,6 +8316,12 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc236_fixup_hp_mute_led_micmute_vref, }, @@ -420968,7 +541108,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 [ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET] = { .type = HDA_FIXUP_VERBS, .v.verbs = (const struct hda_verb[]) { -@@ -7998,6 +8275,14 @@ static const struct hda_fixup alc269_fixups[] = { +@@ -7998,6 +8329,14 @@ static const struct hda_fixup alc269_fixups[] = { { } }, }, @@ -420983,7 +541123,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 [ALC295_FIXUP_ASUS_MIC_NO_PRESENCE] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { -@@ -8321,6 +8606,18 @@ static const struct hda_fixup alc269_fixups[] = { +@@ -8321,6 +8660,18 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF, }, @@ -421002,7 +541142,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 [ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS] = { .type = HDA_FIXUP_VERBS, //.v.verbs = legion_15imhg05_coefs, -@@ -8406,6 +8703,10 @@ static const struct hda_fixup alc269_fixups[] = { +@@ -8406,6 +8757,10 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HEADSET_MODE, }, @@ -421013,7 +541153,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 [ALC287_FIXUP_13S_GEN2_SPEAKERS] = { .type = HDA_FIXUP_VERBS, .v.verbs = (const struct hda_verb[]) { -@@ -8428,9 +8729,42 @@ static const struct hda_fixup alc269_fixups[] = { +@@ -8428,9 +8783,54 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HEADSET_MODE, }, @@ -421055,10 +541195,22 @@ index 965b096f416f6..0de1dcd3b9465 100644 + }, + .chained = true, + .chain_id = ALC285_FIXUP_HP_MUTE_LED, ++ }, ++ [ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS] = { ++ .type = HDA_FIXUP_FUNC, ++ .v.func = alc295_fixup_dell_inspiron_top_speakers, ++ .chained = true, ++ .chain_id = ALC269_FIXUP_DELL4_MIC_NO_PRESENCE, ++ }, ++ [ALC236_FIXUP_DELL_DUAL_CODECS] = { ++ .type = HDA_FIXUP_PINS, ++ .v.func = alc1220_fixup_gb_dual_codecs, ++ .chained = true, ++ .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, }, }; -@@ -8464,14 +8798,17 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { +@@ -8464,14 +8864,18 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x129c, "Acer SWIFT SF314-55", ALC256_FIXUP_ACER_HEADSET_MIC), @@ -421071,21 +541223,31 @@ index 965b096f416f6..0de1dcd3b9465 100644 SND_PCI_QUIRK(0x1025, 0x142b, "Acer Swift SF314-42", ALC255_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1430, "Acer TravelMate B311R-31", ALC256_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1466, "Acer Aspire A515-56", ALC255_FIXUP_ACER_HEADPHONE_AND_MIC), ++ SND_PCI_QUIRK(0x1025, 0x1534, "Acer Predator PH315-54", ALC255_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z), + SND_PCI_QUIRK(0x1028, 0x053c, "Dell Latitude E5430", ALC292_FIXUP_DELL_E7X), SND_PCI_QUIRK(0x1028, 0x054b, "Dell XPS one 2710", ALC275_FIXUP_DELL_XPS), SND_PCI_QUIRK(0x1028, 0x05bd, "Dell Latitude E6440", ALC292_FIXUP_DELL_E7X), SND_PCI_QUIRK(0x1028, 0x05be, "Dell Latitude E6540", ALC292_FIXUP_DELL_E7X), -@@ -8526,6 +8863,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { +@@ -8526,6 +8930,17 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0a62, "Dell Precision 5560", ALC289_FIXUP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x0a9d, "Dell Latitude 5430", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0a9e, "Dell Latitude 5430", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x0b19, "Dell XPS 15 9520", ALC289_FIXUP_DUAL_SPK), + SND_PCI_QUIRK(0x1028, 0x0b1a, "Dell Precision 5570", ALC289_FIXUP_DUAL_SPK), ++ SND_PCI_QUIRK(0x1028, 0x0b37, "Dell Inspiron 16 Plus 7620 2-in-1", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS), ++ SND_PCI_QUIRK(0x1028, 0x0b71, "Dell Inspiron 16 Plus 7620", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS), ++ SND_PCI_QUIRK(0x1028, 0x0c03, "Dell Precision 5340", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE), ++ SND_PCI_QUIRK(0x1028, 0x0c19, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS), ++ SND_PCI_QUIRK(0x1028, 0x0c1a, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS), ++ SND_PCI_QUIRK(0x1028, 0x0c1b, "Dell Precision 3440", ALC236_FIXUP_DELL_DUAL_CODECS), ++ SND_PCI_QUIRK(0x1028, 0x0c1c, "Dell Precision 3540", ALC236_FIXUP_DELL_DUAL_CODECS), ++ SND_PCI_QUIRK(0x1028, 0x0c1d, "Dell Precision 3440", ALC236_FIXUP_DELL_DUAL_CODECS), ++ SND_PCI_QUIRK(0x1028, 0x0c1e, "Dell Precision 3540", ALC236_FIXUP_DELL_DUAL_CODECS), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), -@@ -8585,6 +8924,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { +@@ -8585,6 +9000,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x2335, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x2336, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x2337, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), @@ -421093,7 +541255,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 SND_PCI_QUIRK(0x103c, 0x802e, "HP Z240 SFF", ALC221_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x802f, "HP Z240", ALC221_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x8077, "HP", ALC256_FIXUP_HP_HEADSET_MIC), -@@ -8601,14 +8941,19 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { +@@ -8601,14 +9017,20 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x84da, "HP OMEN dc0019-ur", ALC295_FIXUP_HP_OMEN), SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x8519, "HP Spectre x360 15-df0xxx", ALC285_FIXUP_HP_SPECTRE_X360), @@ -421103,6 +541265,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 SND_PCI_QUIRK(0x103c, 0x86c7, "HP Envy AiO 32", ALC274_FIXUP_HP_ENVY_GPIO), + SND_PCI_QUIRK(0x103c, 0x86e7, "HP Spectre x360 15-eb0xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1), + SND_PCI_QUIRK(0x103c, 0x86e8, "HP Spectre x360 15-eb0xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1), ++ SND_PCI_QUIRK(0x103c, 0x86f9, "HP Spectre x360 13-aw0xxx", ALC285_FIXUP_HP_SPECTRE_X360_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8716, "HP Elite Dragonfly G2 Notebook PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8720, "HP EliteBook x360 1040 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8724, "HP EliteBook 850 G7", ALC285_FIXUP_HP_GPIO_LED), @@ -421113,7 +541276,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 SND_PCI_QUIRK(0x103c, 0x8736, "HP", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8760, "HP", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x877a, "HP", ALC285_FIXUP_HP_MUTE_LED), -@@ -8617,6 +8962,9 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { +@@ -8617,6 +9039,9 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8783, "HP ZBook Fury 15 G7 Mobile Workstation", ALC285_FIXUP_HP_GPIO_AMP_INIT), @@ -421123,7 +541286,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87e5, "HP ProBook 440 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87e7, "HP ProBook 450 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), -@@ -8640,9 +8988,18 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { +@@ -8640,9 +9065,22 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8870, "HP ZBook Fury 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8873, "HP ZBook Studio 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x888d, "HP ZBook Power 15.6 inch G8 Mobile Workstation PC", ALC236_FIXUP_HP_GPIO_LED), @@ -421134,15 +541297,19 @@ index 965b096f416f6..0de1dcd3b9465 100644 + SND_PCI_QUIRK(0x103c, 0x89aa, "HP EliteBook 630 G9", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x89c3, "HP", ALC285_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x89ca, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), ++ SND_PCI_QUIRK(0x103c, 0x89d3, "HP EliteBook 645 G9 (MB 89D2)", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8a78, "HP Dev One", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x103c, 0x8aa0, "HP ProBook 440 G9 (MB 8A9E)", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8aa3, "HP ProBook 450 G9 (MB 8AA1)", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8aa8, "HP EliteBook 640 G9 (MB 8AA6)", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8aab, "HP EliteBook 650 G9 (MB 8AA9)", ALC236_FIXUP_HP_GPIO_LED), ++ SND_PCI_QUIRK(0x103c, 0x8b5d, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), ++ SND_PCI_QUIRK(0x103c, 0x8b5e, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), ++ SND_PCI_QUIRK(0x103c, 0x8bf0, "HP", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), -@@ -8657,17 +9014,20 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { +@@ -8657,17 +9095,20 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x12a0, "ASUS X441UV", ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x12e0, "ASUS X541SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x12f0, "ASUS X541UV", ALC256_FIXUP_ASUS_MIC), @@ -421164,7 +541331,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 SND_PCI_QUIRK(0x1043, 0x1982, "ASUS B1400CEPE", ALC256_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x19ce, "ASUS B9450FA", ALC294_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x19e1, "ASUS UX581LV", ALC295_FIXUP_ASUS_MIC_NO_PRESENCE), -@@ -8677,12 +9037,17 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { +@@ -8677,12 +9118,17 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1043, 0x1bbd, "ASUS Z550MA", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1c23, "Asus X55U", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), @@ -421182,7 +541349,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2), SND_PCI_QUIRK(0x1043, 0x831a, "ASUS P901", ALC269_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x1043, 0x834a, "ASUS S101", ALC269_FIXUP_STEREO_DMIC), -@@ -8704,17 +9069,19 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { +@@ -8704,17 +9150,22 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x10ec, 0x10f2, "Intel Reference board", ALC700_FIXUP_INTEL_REFERENCE), SND_PCI_QUIRK(0x10ec, 0x118c, "Medion EE4254 MD62100", ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE), SND_PCI_QUIRK(0x10ec, 0x1230, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), @@ -421199,16 +541366,19 @@ index 965b096f416f6..0de1dcd3b9465 100644 + SND_PCI_QUIRK(0x144d, 0xc176, "Samsung Notebook 9 Pro (NP930MBE-K04US)", ALC298_FIXUP_SAMSUNG_AMP), + SND_PCI_QUIRK(0x144d, 0xc189, "Samsung Galaxy Flex Book (NT950QCG-X716)", ALC298_FIXUP_SAMSUNG_AMP), + SND_PCI_QUIRK(0x144d, 0xc18a, "Samsung Galaxy Book Ion (NP930XCJ-K01US)", ALC298_FIXUP_SAMSUNG_AMP), ++ SND_PCI_QUIRK(0x144d, 0xc1a3, "Samsung Galaxy Book Pro (NP935XDB-KC1SE)", ALC298_FIXUP_SAMSUNG_AMP), ++ SND_PCI_QUIRK(0x144d, 0xc1a6, "Samsung Galaxy Book Pro 360 (NP930QBD)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc740, "Samsung Ativ book 8 (NP870Z5G)", ALC269_FIXUP_ATIV_BOOK_8), - SND_PCI_QUIRK(0x144d, 0xc812, "Samsung Notebook Pen S (NT950SBE-X58)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), - SND_PCI_QUIRK(0x144d, 0xc830, "Samsung Galaxy Book Ion (NT950XCJ-X716A)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), + SND_PCI_QUIRK(0x144d, 0xc812, "Samsung Notebook Pen S (NT950SBE-X58)", ALC298_FIXUP_SAMSUNG_AMP), + SND_PCI_QUIRK(0x144d, 0xc830, "Samsung Galaxy Book Ion (NT950XCJ-X716A)", ALC298_FIXUP_SAMSUNG_AMP), + SND_PCI_QUIRK(0x144d, 0xc832, "Samsung Galaxy Book Flex Alpha (NP730QCJ)", ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), ++ SND_PCI_QUIRK(0x144d, 0xca03, "Samsung Galaxy Book2 Pro 360 (NP930QED)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC), -@@ -8728,14 +9095,19 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { +@@ -8728,14 +9179,19 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x4018, "Clevo NV40M[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x4019, "Clevo NV40MZ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x4020, "Clevo NV40MB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), @@ -421228,7 +541398,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 SND_PCI_QUIRK(0x1558, 0x50f0, "Clevo NH50A[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x50f2, "Clevo NH50E[PR]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x50f3, "Clevo NH58DPQ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), -@@ -8750,6 +9122,9 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { +@@ -8750,6 +9206,9 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x70f3, "Clevo NH77DPQ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x70f4, "Clevo NH77EPY", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x70f6, "Clevo NH77DPQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), @@ -421238,7 +541408,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 SND_PCI_QUIRK(0x1558, 0x8228, "Clevo NR40BU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8520, "Clevo NH50D[CD]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8521, "Clevo NH77D[CD]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), -@@ -8761,8 +9136,11 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { +@@ -8761,8 +9220,11 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x8561, "Clevo NH[57][0-9][ER][ACDH]Q", ALC269_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1558, 0x8562, "Clevo NH[57][0-9]RZ[Q]", ALC269_FIXUP_DMIC), SND_PCI_QUIRK(0x1558, 0x8668, "Clevo NP50B[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), @@ -421251,7 +541421,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 SND_PCI_QUIRK(0x1558, 0x8a20, "Clevo NH55DCQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8a51, "Clevo NH70RCQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8d50, "Clevo NH55RCQ-M", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), -@@ -8828,13 +9206,19 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { +@@ -8828,13 +9290,19 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3176, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x3178, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x31af, "ThinkCentre Station", ALC623_FIXUP_LENOVO_THINKSTATION_P340), @@ -421274,7 +541444,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo B50-70", ALC269_FIXUP_DMIC_THINKPAD_ACPI), -@@ -8854,21 +9238,33 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { +@@ -8854,21 +9322,35 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x505d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x505f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x5062, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), @@ -421284,6 +541454,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 SND_PCI_QUIRK(0x17aa, 0x511f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD), + SND_PCI_QUIRK(0x1849, 0x1233, "ASRock NUC Box 1100", ALC233_FIXUP_NO_AUDIO_JACK), ++ SND_PCI_QUIRK(0x1849, 0xa233, "Positivo Master C6300", ALC269_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x19e5, 0x3204, "Huawei MACH-WX9", ALC256_FIXUP_HUAWEI_MACH_WX9_PINS), + SND_PCI_QUIRK(0x19e5, 0x320f, "Huawei WRT-WX9 ", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1b35, 0x1235, "CZC B20", ALC269_FIXUP_CZC_B20), @@ -421293,6 +541464,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 SND_PCI_QUIRK(0x1c06, 0x2013, "Lemote A1802", ALC269_FIXUP_LEMOTE_A1802), SND_PCI_QUIRK(0x1c06, 0x2015, "Lemote A190X", ALC269_FIXUP_LEMOTE_A190X), - SND_PCI_QUIRK(0x1d05, 0x1132, "TongFang PHxTxX1", ALC256_FIXUP_TONGFANG_RESET_PERSISTENT_SETTINGS), ++ SND_PCI_QUIRK(0x1c6c, 0x1251, "Positivo N14KP6-TG", ALC288_FIXUP_DELL1_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1d05, 0x1132, "TongFang PHxTxX1", ALC256_FIXUP_SET_COEF_DEFAULTS), + SND_PCI_QUIRK(0x1d05, 0x1096, "TongFang GMxMRxx", ALC269_FIXUP_NO_SHUTUP), + SND_PCI_QUIRK(0x1d05, 0x1100, "TongFang GKxNRxx", ALC269_FIXUP_NO_SHUTUP), @@ -421309,7 +541481,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC), SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED), -@@ -9045,7 +9441,8 @@ static const struct hda_model_fixup alc269_fixup_models[] = { +@@ -9045,7 +9527,8 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC299_FIXUP_PREDATOR_SPK, .name = "predator-spk"}, {.id = ALC298_FIXUP_HUAWEI_MBX_STEREO, .name = "huawei-mbx-stereo"}, {.id = ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE, .name = "alc256-medion-headset"}, @@ -421319,7 +541491,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 {.id = ALC255_FIXUP_XIAOMI_HEADSET_MIC, .name = "alc255-xiaomi-headset"}, {.id = ALC274_FIXUP_HP_MIC, .name = "alc274-hp-mic-detect"}, {.id = ALC245_FIXUP_HP_X360_AMP, .name = "alc245-hp-x360-amp"}, -@@ -9055,6 +9452,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { +@@ -9055,6 +9538,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP, .name = "alc287-ideapad-bass-spk-amp"}, {.id = ALC623_FIXUP_LENOVO_THINKSTATION_P340, .name = "alc623-lenovo-thinkstation-p340"}, {.id = ALC255_FIXUP_ACER_HEADPHONE_AND_MIC, .name = "alc255-acer-headphone-and-mic"}, @@ -421327,7 +541499,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 {} }; #define ALC225_STANDARD_PINS \ -@@ -9646,6 +10044,7 @@ static int patch_alc269(struct hda_codec *codec) +@@ -9646,6 +10130,7 @@ static int patch_alc269(struct hda_codec *codec) case 0x10ec0230: case 0x10ec0236: case 0x10ec0256: @@ -421335,7 +541507,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 spec->codec_variant = ALC269_TYPE_ALC256; spec->shutup = alc256_shutup; spec->init_hook = alc256_init; -@@ -9661,7 +10060,10 @@ static int patch_alc269(struct hda_codec *codec) +@@ -9661,7 +10146,10 @@ static int patch_alc269(struct hda_codec *codec) case 0x10ec0245: case 0x10ec0285: case 0x10ec0289: @@ -421347,7 +541519,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 spec->shutup = alc225_shutup; spec->init_hook = alc225_init; spec->gen.mixer_nid = 0; -@@ -10167,6 +10569,27 @@ static void alc671_fixup_hp_headset_mic2(struct hda_codec *codec, +@@ -10167,6 +10655,38 @@ static void alc671_fixup_hp_headset_mic2(struct hda_codec *codec, } } @@ -421371,21 +541543,34 @@ index 965b096f416f6..0de1dcd3b9465 100644 + spec->gen.hp_automute_hook = alc897_hp_automute_hook; + } +} ++ ++static void alc897_fixup_lenovo_headset_mode(struct hda_codec *codec, ++ const struct hda_fixup *fix, int action) ++{ ++ struct alc_spec *spec = codec->spec; ++ ++ if (action == HDA_FIXUP_ACT_PRE_PROBE) { ++ spec->parse_flags |= HDA_PINCFG_HEADSET_MIC; ++ spec->gen.hp_automute_hook = alc897_hp_automute_hook; ++ } ++} + static const struct coef_fw alc668_coefs[] = { WRITE_COEF(0x01, 0xbebe), WRITE_COEF(0x02, 0xaaaa), WRITE_COEF(0x03, 0x0), WRITE_COEF(0x04, 0x0180), WRITE_COEF(0x06, 0x0), WRITE_COEF(0x07, 0x0f80), -@@ -10247,6 +10670,9 @@ enum { +@@ -10247,6 +10767,11 @@ enum { ALC668_FIXUP_ASUS_NO_HEADSET_MIC, ALC668_FIXUP_HEADSET_MIC, ALC668_FIXUP_MIC_DET_COEF, + ALC897_FIXUP_LENOVO_HEADSET_MIC, + ALC897_FIXUP_HEADSET_MIC_PIN, + ALC897_FIXUP_HP_HSMIC_VERB, ++ ALC897_FIXUP_LENOVO_HEADSET_MODE, ++ ALC897_FIXUP_HEADSET_MIC_PIN2, }; static const struct hda_fixup alc662_fixups[] = { -@@ -10653,6 +11079,26 @@ static const struct hda_fixup alc662_fixups[] = { +@@ -10653,6 +11178,39 @@ static const struct hda_fixup alc662_fixups[] = { {} }, }, @@ -421408,11 +541593,24 @@ index 965b096f416f6..0de1dcd3b9465 100644 + { 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */ + { } + }, ++ }, ++ [ALC897_FIXUP_LENOVO_HEADSET_MODE] = { ++ .type = HDA_FIXUP_FUNC, ++ .v.func = alc897_fixup_lenovo_headset_mode, ++ }, ++ [ALC897_FIXUP_HEADSET_MIC_PIN2] = { ++ .type = HDA_FIXUP_PINS, ++ .v.pins = (const struct hda_pintbl[]) { ++ { 0x1a, 0x01a11140 }, /* use as headset mic, without its own jack detect */ ++ { } ++ }, ++ .chained = true, ++ .chain_id = ALC897_FIXUP_LENOVO_HEADSET_MODE + }, }; static const struct snd_pci_quirk alc662_fixup_tbl[] = { -@@ -10678,7 +11124,10 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { +@@ -10678,7 +11236,10 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0698, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x069f, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800), @@ -421423,7 +541621,7 @@ index 965b096f416f6..0de1dcd3b9465 100644 SND_PCI_QUIRK(0x1043, 0x1080, "Asus UX501VW", ALC668_FIXUP_HEADSET_MODE), SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_ASUS_Nx50), SND_PCI_QUIRK(0x1043, 0x129d, "Asus N750", ALC662_FIXUP_ASUS_Nx50), -@@ -10697,6 +11146,11 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { +@@ -10697,6 +11258,12 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc051, "Samsung R720", ALC662_FIXUP_IDEAPAD), SND_PCI_QUIRK(0x14cd, 0x5003, "USI", ALC662_FIXUP_USI_HEADSET_MODE), SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC662_FIXUP_LENOVO_MULTI_CODECS), @@ -421432,10 +541630,11 @@ index 965b096f416f6..0de1dcd3b9465 100644 + SND_PCI_QUIRK(0x17aa, 0x32cb, "Lenovo ThinkCentre M70", ALC897_FIXUP_HEADSET_MIC_PIN), + SND_PCI_QUIRK(0x17aa, 0x32cf, "Lenovo ThinkCentre M950", ALC897_FIXUP_HEADSET_MIC_PIN), + SND_PCI_QUIRK(0x17aa, 0x32f7, "Lenovo ThinkCentre M90", ALC897_FIXUP_HEADSET_MIC_PIN), ++ SND_PCI_QUIRK(0x17aa, 0x3742, "Lenovo TianYi510Pro-14IOB", ALC897_FIXUP_HEADSET_MIC_PIN2), SND_PCI_QUIRK(0x17aa, 0x38af, "Lenovo Ideapad Y550P", ALC662_FIXUP_IDEAPAD), SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Ideapad Y550", ALC662_FIXUP_IDEAPAD), SND_PCI_QUIRK(0x1849, 0x5892, "ASRock B150M", ALC892_FIXUP_ASROCK_MOBO), -@@ -11051,6 +11505,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = { +@@ -11051,6 +11618,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = { HDA_CODEC_ENTRY(0x10ec0b00, "ALCS1200A", patch_alc882), HDA_CODEC_ENTRY(0x10ec1168, "ALC1220", patch_alc882), HDA_CODEC_ENTRY(0x10ec1220, "ALC1220", patch_alc882), @@ -421465,7 +541664,7 @@ index 61df4d33c48ff..a794a01a68ca6 100644 caps = query_amp_caps(codec, nid, HDA_OUTPUT); if (!(caps & AC_AMPCAP_MUTE)) { diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c -index 773a136161f11..a188901a83bbe 100644 +index 773a136161f11..29abc96dc146c 100644 --- a/sound/pci/hda/patch_via.c +++ b/sound/pci/hda/patch_via.c @@ -520,11 +520,11 @@ static int via_parse_auto_config(struct hda_codec *codec) @@ -421482,6 +541681,16 @@ index 773a136161f11..a188901a83bbe 100644 if (err < 0) return err; +@@ -821,6 +821,9 @@ static int add_secret_dac_path(struct hda_codec *codec) + return 0; + nums = snd_hda_get_connections(codec, spec->gen.mixer_nid, conn, + ARRAY_SIZE(conn) - 1); ++ if (nums < 0) ++ return nums; ++ + for (i = 0; i < nums; i++) { + if (get_wcaps_type(get_wcaps(codec, conn[i])) == AC_WID_AUD_OUT) + return 0; diff --git a/sound/pci/ice1712/ice1724.c b/sound/pci/ice1712/ice1724.c index f6275868877a7..6fab2ad85bbec 100644 --- a/sound/pci/ice1712/ice1724.c @@ -421654,6 +541863,28 @@ index 168a1084f7303..bd9b6148dd6fb 100644 } static struct pci_driver lx6464es_driver = { +diff --git a/sound/pci/lx6464es/lx_core.c b/sound/pci/lx6464es/lx_core.c +index d3f58a3d17fbc..b5b0d43bb8dcd 100644 +--- a/sound/pci/lx6464es/lx_core.c ++++ b/sound/pci/lx6464es/lx_core.c +@@ -493,12 +493,11 @@ int lx_buffer_ask(struct lx6464es *chip, u32 pipe, int is_capture, + dev_dbg(chip->card->dev, + "CMD_08_ASK_BUFFERS: needed %d, freed %d\n", + *r_needed, *r_freed); +- for (i = 0; i < MAX_STREAM_BUFFER; ++i) { +- for (i = 0; i != chip->rmh.stat_len; ++i) +- dev_dbg(chip->card->dev, +- " stat[%d]: %x, %x\n", i, +- chip->rmh.stat[i], +- chip->rmh.stat[i] & MASK_DATA_SIZE); ++ for (i = 0; i < MAX_STREAM_BUFFER && i < chip->rmh.stat_len; ++ ++i) { ++ dev_dbg(chip->card->dev, " stat[%d]: %x, %x\n", i, ++ chip->rmh.stat[i], ++ chip->rmh.stat[i] & MASK_DATA_SIZE); + } + } + diff --git a/sound/pci/maestro3.c b/sound/pci/maestro3.c index 056838ead21d6..261850775c807 100644 --- a/sound/pci/maestro3.c @@ -421811,9 +542042,18 @@ index 8fc8115049203..bccb7e0d3d116 100644 .name = KBUILD_MODNAME, .id_table = snd_rme96_ids, diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c -index 75aa2ea733a59..3db641318d3ae 100644 +index 75aa2ea733a59..82c72e6c13754 100644 --- a/sound/pci/rme9652/hdsp.c +++ b/sound/pci/rme9652/hdsp.c +@@ -433,7 +433,7 @@ struct hdsp_midi { + struct snd_rawmidi *rmidi; + struct snd_rawmidi_substream *input; + struct snd_rawmidi_substream *output; +- char istimer; /* timer in use */ ++ signed char istimer; /* timer in use */ + struct timer_list timer; + spinlock_t lock; + int pending; @@ -468,8 +468,11 @@ struct hdsp { unsigned char ss_out_channels; u32 io_loopback; /* output loopback channel states*/ @@ -421828,6 +542068,60 @@ index 75aa2ea733a59..3db641318d3ae 100644 unsigned char *capture_buffer; /* suitably aligned address */ unsigned char *playback_buffer; /* suitably aligned address */ +@@ -477,7 +480,7 @@ struct hdsp { + pid_t playback_pid; + int running; + int system_sample_rate; +- const char *channel_map; ++ const signed char *channel_map; + int dev; + int irq; + unsigned long port; +@@ -499,7 +502,7 @@ struct hdsp { + where the data for that channel can be read/written from/to. + */ + +-static const char channel_map_df_ss[HDSP_MAX_CHANNELS] = { ++static const signed char channel_map_df_ss[HDSP_MAX_CHANNELS] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25 + }; +@@ -514,7 +517,7 @@ static const char channel_map_mf_ss[HDSP_MAX_CHANNELS] = { /* Multiface */ + -1, -1, -1, -1, -1, -1, -1, -1 + }; + +-static const char channel_map_ds[HDSP_MAX_CHANNELS] = { ++static const signed char channel_map_ds[HDSP_MAX_CHANNELS] = { + /* ADAT channels are remapped */ + 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, + /* channels 12 and 13 are S/PDIF */ +@@ -523,7 +526,7 @@ static const char channel_map_ds[HDSP_MAX_CHANNELS] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 + }; + +-static const char channel_map_H9632_ss[HDSP_MAX_CHANNELS] = { ++static const signed char channel_map_H9632_ss[HDSP_MAX_CHANNELS] = { + /* ADAT channels */ + 0, 1, 2, 3, 4, 5, 6, 7, + /* SPDIF */ +@@ -537,7 +540,7 @@ static const char channel_map_H9632_ss[HDSP_MAX_CHANNELS] = { + -1, -1 + }; + +-static const char channel_map_H9632_ds[HDSP_MAX_CHANNELS] = { ++static const signed char channel_map_H9632_ds[HDSP_MAX_CHANNELS] = { + /* ADAT */ + 1, 3, 5, 7, + /* SPDIF */ +@@ -551,7 +554,7 @@ static const char channel_map_H9632_ds[HDSP_MAX_CHANNELS] = { + -1, -1, -1, -1, -1, -1 + }; + +-static const char channel_map_H9632_qs[HDSP_MAX_CHANNELS] = { ++static const signed char channel_map_H9632_qs[HDSP_MAX_CHANNELS] = { + /* ADAT is disabled in this mode */ + /* SPDIF */ + 8, 9, @@ -3764,30 +3767,32 @@ static void snd_hdsp_proc_init(struct hdsp *hdsp) static int snd_hdsp_initialize_memory(struct hdsp *hdsp) @@ -421875,6 +542169,60 @@ index 75aa2ea733a59..3db641318d3ae 100644 return 0; } +@@ -3934,7 +3939,7 @@ static snd_pcm_uframes_t snd_hdsp_hw_pointer(struct snd_pcm_substream *substream + return hdsp_hw_pointer(hdsp); + } + +-static char *hdsp_channel_buffer_location(struct hdsp *hdsp, ++static signed char *hdsp_channel_buffer_location(struct hdsp *hdsp, + int stream, + int channel) + +@@ -3959,7 +3964,7 @@ static int snd_hdsp_playback_copy(struct snd_pcm_substream *substream, + void __user *src, unsigned long count) + { + struct hdsp *hdsp = snd_pcm_substream_chip(substream); +- char *channel_buf; ++ signed char *channel_buf; + + if (snd_BUG_ON(pos + count > HDSP_CHANNEL_BUFFER_BYTES)) + return -EINVAL; +@@ -3977,7 +3982,7 @@ static int snd_hdsp_playback_copy_kernel(struct snd_pcm_substream *substream, + void *src, unsigned long count) + { + struct hdsp *hdsp = snd_pcm_substream_chip(substream); +- char *channel_buf; ++ signed char *channel_buf; + + channel_buf = hdsp_channel_buffer_location(hdsp, substream->pstr->stream, channel); + if (snd_BUG_ON(!channel_buf)) +@@ -3991,7 +3996,7 @@ static int snd_hdsp_capture_copy(struct snd_pcm_substream *substream, + void __user *dst, unsigned long count) + { + struct hdsp *hdsp = snd_pcm_substream_chip(substream); +- char *channel_buf; ++ signed char *channel_buf; + + if (snd_BUG_ON(pos + count > HDSP_CHANNEL_BUFFER_BYTES)) + return -EINVAL; +@@ -4009,7 +4014,7 @@ static int snd_hdsp_capture_copy_kernel(struct snd_pcm_substream *substream, + void *dst, unsigned long count) + { + struct hdsp *hdsp = snd_pcm_substream_chip(substream); +- char *channel_buf; ++ signed char *channel_buf; + + channel_buf = hdsp_channel_buffer_location(hdsp, substream->pstr->stream, channel); + if (snd_BUG_ON(!channel_buf)) +@@ -4023,7 +4028,7 @@ static int snd_hdsp_hw_silence(struct snd_pcm_substream *substream, + unsigned long count) + { + struct hdsp *hdsp = snd_pcm_substream_chip(substream); +- char *channel_buf; ++ signed char *channel_buf; + + channel_buf = hdsp_channel_buffer_location (hdsp, substream->pstr->stream, channel); + if (snd_BUG_ON(!channel_buf)) @@ -4507,7 +4512,7 @@ static int snd_hdsp_playback_open(struct snd_pcm_substream *substream) snd_pcm_set_sync(substream); @@ -421949,7 +542297,7 @@ index ff06ee82607cf..fa1812e7a49dc 100644 static struct pci_driver hdspm_driver = { diff --git a/sound/pci/rme9652/rme9652.c b/sound/pci/rme9652/rme9652.c -index e76f737ac9e8e..1d614fe89a6ae 100644 +index e76f737ac9e8e..e7c320afefe86 100644 --- a/sound/pci/rme9652/rme9652.c +++ b/sound/pci/rme9652/rme9652.c @@ -208,8 +208,11 @@ struct snd_rme9652 { @@ -421966,6 +542314,48 @@ index e76f737ac9e8e..1d614fe89a6ae 100644 unsigned char *capture_buffer; /* suitably aligned address */ unsigned char *playback_buffer; /* suitably aligned address */ +@@ -227,7 +230,7 @@ struct snd_rme9652 { + int last_spdif_sample_rate; /* so that we can catch externally ... */ + int last_adat_sample_rate; /* ... induced rate changes */ + +- const char *channel_map; ++ const signed char *channel_map; + + struct snd_card *card; + struct snd_pcm *pcm; +@@ -244,12 +247,12 @@ struct snd_rme9652 { + where the data for that channel can be read/written from/to. + */ + +-static const char channel_map_9652_ss[26] = { ++static const signed char channel_map_9652_ss[26] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25 + }; + +-static const char channel_map_9636_ss[26] = { ++static const signed char channel_map_9636_ss[26] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + /* channels 16 and 17 are S/PDIF */ + 24, 25, +@@ -257,7 +260,7 @@ static const char channel_map_9636_ss[26] = { + -1, -1, -1, -1, -1, -1, -1, -1 + }; + +-static const char channel_map_9652_ds[26] = { ++static const signed char channel_map_9652_ds[26] = { + /* ADAT channels are remapped */ + 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, + /* channels 12 and 13 are S/PDIF */ +@@ -266,7 +269,7 @@ static const char channel_map_9652_ds[26] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 + }; + +-static const char channel_map_9636_ds[26] = { ++static const signed char channel_map_9636_ds[26] = { + /* ADAT channels are remapped */ + 1, 3, 5, 7, 9, 11, 13, 15, + /* channels 8 and 9 are S/PDIF */ @@ -1719,30 +1722,32 @@ static void snd_rme9652_card_free(struct snd_card *card) static int snd_rme9652_initialize_memory(struct snd_rme9652 *rme9652) @@ -422013,6 +542403,60 @@ index e76f737ac9e8e..1d614fe89a6ae 100644 return 0; } +@@ -1814,7 +1819,7 @@ static snd_pcm_uframes_t snd_rme9652_hw_pointer(struct snd_pcm_substream *substr + return rme9652_hw_pointer(rme9652); + } + +-static char *rme9652_channel_buffer_location(struct snd_rme9652 *rme9652, ++static signed char *rme9652_channel_buffer_location(struct snd_rme9652 *rme9652, + int stream, + int channel) + +@@ -1842,7 +1847,7 @@ static int snd_rme9652_playback_copy(struct snd_pcm_substream *substream, + void __user *src, unsigned long count) + { + struct snd_rme9652 *rme9652 = snd_pcm_substream_chip(substream); +- char *channel_buf; ++ signed char *channel_buf; + + if (snd_BUG_ON(pos + count > RME9652_CHANNEL_BUFFER_BYTES)) + return -EINVAL; +@@ -1862,7 +1867,7 @@ static int snd_rme9652_playback_copy_kernel(struct snd_pcm_substream *substream, + void *src, unsigned long count) + { + struct snd_rme9652 *rme9652 = snd_pcm_substream_chip(substream); +- char *channel_buf; ++ signed char *channel_buf; + + channel_buf = rme9652_channel_buffer_location(rme9652, + substream->pstr->stream, +@@ -1878,7 +1883,7 @@ static int snd_rme9652_capture_copy(struct snd_pcm_substream *substream, + void __user *dst, unsigned long count) + { + struct snd_rme9652 *rme9652 = snd_pcm_substream_chip(substream); +- char *channel_buf; ++ signed char *channel_buf; + + if (snd_BUG_ON(pos + count > RME9652_CHANNEL_BUFFER_BYTES)) + return -EINVAL; +@@ -1898,7 +1903,7 @@ static int snd_rme9652_capture_copy_kernel(struct snd_pcm_substream *substream, + void *dst, unsigned long count) + { + struct snd_rme9652 *rme9652 = snd_pcm_substream_chip(substream); +- char *channel_buf; ++ signed char *channel_buf; + + channel_buf = rme9652_channel_buffer_location(rme9652, + substream->pstr->stream, +@@ -1914,7 +1919,7 @@ static int snd_rme9652_hw_silence(struct snd_pcm_substream *substream, + unsigned long count) + { + struct snd_rme9652 *rme9652 = snd_pcm_substream_chip(substream); +- char *channel_buf; ++ signed char *channel_buf; + + channel_buf = rme9652_channel_buffer_location (rme9652, + substream->pstr->stream, @@ -2259,7 +2264,7 @@ static int snd_rme9652_playback_open(struct snd_pcm_substream *substream) snd_pcm_set_sync(substream); @@ -422790,19 +543234,6 @@ index 9a463ab54bddc..762d9de73dbc2 100644 dev_err(&i2c_client->dev, "Failed to request IRQ: %d\n", ret); -diff --git a/sound/soc/codecs/cs42l51.c b/sound/soc/codecs/cs42l51.c -index c61b17dc2af87..fc6a2bc311b4f 100644 ---- a/sound/soc/codecs/cs42l51.c -+++ b/sound/soc/codecs/cs42l51.c -@@ -146,7 +146,7 @@ static const struct snd_kcontrol_new cs42l51_snd_controls[] = { - 0, 0xA0, 96, adc_att_tlv), - SOC_DOUBLE_R_SX_TLV("PGA Volume", - CS42L51_ALC_PGA_CTL, CS42L51_ALC_PGB_CTL, -- 0, 0x1A, 30, pga_tlv), -+ 0, 0x19, 30, pga_tlv), - SOC_SINGLE("Playback Deemphasis Switch", CS42L51_DAC_CTL, 3, 1, 0), - SOC_SINGLE("Auto-Mute Switch", CS42L51_DAC_CTL, 2, 1, 0), - SOC_SINGLE("Soft Ramp Switch", CS42L51_DAC_CTL, 1, 1, 0), diff --git a/sound/soc/codecs/cs42l52.c b/sound/soc/codecs/cs42l52.c index 80161151b3f2c..c19ad3c247026 100644 --- a/sound/soc/codecs/cs42l52.c @@ -422837,7 +543268,7 @@ index 80161151b3f2c..c19ad3c247026 100644 SOC_DOUBLE("ADC Switch", CS42L52_ADC_MISC_CTL, 0, 1, 1, 0), diff --git a/sound/soc/codecs/cs42l56.c b/sound/soc/codecs/cs42l56.c -index 3cf8a0b4478cd..b39c25409c239 100644 +index 3cf8a0b4478cd..f0af8c18e5efa 100644 --- a/sound/soc/codecs/cs42l56.c +++ b/sound/soc/codecs/cs42l56.c @@ -391,9 +391,9 @@ static const struct snd_kcontrol_new cs42l56_snd_controls[] = { @@ -422852,6 +543283,25 @@ index 3cf8a0b4478cd..b39c25409c239 100644 SOC_SINGLE_TLV("Bass Shelving Volume", CS42L56_TONE_CTL, 0, 0x00, 1, tone_tlv), +@@ -1193,18 +1193,12 @@ static int cs42l56_i2c_probe(struct i2c_client *i2c_client, + if (pdata) { + cs42l56->pdata = *pdata; + } else { +- pdata = devm_kzalloc(&i2c_client->dev, sizeof(*pdata), +- GFP_KERNEL); +- if (!pdata) +- return -ENOMEM; +- + if (i2c_client->dev.of_node) { + ret = cs42l56_handle_of_data(i2c_client, + &cs42l56->pdata); + if (ret != 0) + return ret; + } +- cs42l56->pdata = *pdata; + } + + if (cs42l56->pdata.gpio_nreset) { diff --git a/sound/soc/codecs/cs47l15.c b/sound/soc/codecs/cs47l15.c index 1ee83160b83fb..ac9ccdea15b58 100644 --- a/sound/soc/codecs/cs47l15.c @@ -423023,6 +543473,83 @@ index 9632afc2d4d64..ca3b1c00fa787 100644 } +diff --git a/sound/soc/codecs/hdac_hda.c b/sound/soc/codecs/hdac_hda.c +index 390dd6c7f6a50..de5955db0a5f0 100644 +--- a/sound/soc/codecs/hdac_hda.c ++++ b/sound/soc/codecs/hdac_hda.c +@@ -46,9 +46,8 @@ static int hdac_hda_dai_hw_params(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai); + static int hdac_hda_dai_hw_free(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai); +-static int hdac_hda_dai_set_tdm_slot(struct snd_soc_dai *dai, +- unsigned int tx_mask, unsigned int rx_mask, +- int slots, int slot_width); ++static int hdac_hda_dai_set_stream(struct snd_soc_dai *dai, void *stream, ++ int direction); + static struct hda_pcm *snd_soc_find_pcm_from_dai(struct hdac_hda_priv *hda_pvt, + struct snd_soc_dai *dai); + +@@ -58,7 +57,7 @@ static const struct snd_soc_dai_ops hdac_hda_dai_ops = { + .prepare = hdac_hda_dai_prepare, + .hw_params = hdac_hda_dai_hw_params, + .hw_free = hdac_hda_dai_hw_free, +- .set_tdm_slot = hdac_hda_dai_set_tdm_slot, ++ .set_stream = hdac_hda_dai_set_stream, + }; + + static struct snd_soc_dai_driver hdac_hda_dais[] = { +@@ -180,21 +179,22 @@ static struct snd_soc_dai_driver hdac_hda_dais[] = { + + }; + +-static int hdac_hda_dai_set_tdm_slot(struct snd_soc_dai *dai, +- unsigned int tx_mask, unsigned int rx_mask, +- int slots, int slot_width) ++static int hdac_hda_dai_set_stream(struct snd_soc_dai *dai, ++ void *stream, int direction) + { + struct snd_soc_component *component = dai->component; + struct hdac_hda_priv *hda_pvt; + struct hdac_hda_pcm *pcm; ++ struct hdac_stream *hstream; ++ ++ if (!stream) ++ return -EINVAL; + + hda_pvt = snd_soc_component_get_drvdata(component); + pcm = &hda_pvt->pcm[dai->id]; ++ hstream = (struct hdac_stream *)stream; + +- if (tx_mask) +- pcm->stream_tag[SNDRV_PCM_STREAM_PLAYBACK] = tx_mask; +- else +- pcm->stream_tag[SNDRV_PCM_STREAM_CAPTURE] = rx_mask; ++ pcm->stream_tag[direction] = hstream->stream_tag; + + return 0; + } +diff --git a/sound/soc/codecs/hdac_hda.h b/sound/soc/codecs/hdac_hda.h +index d0efc5e254ae9..da0ed74758b05 100644 +--- a/sound/soc/codecs/hdac_hda.h ++++ b/sound/soc/codecs/hdac_hda.h +@@ -14,7 +14,7 @@ enum { + HDAC_HDMI_1_DAI_ID, + HDAC_HDMI_2_DAI_ID, + HDAC_HDMI_3_DAI_ID, +- HDAC_LAST_DAI_ID = HDAC_HDMI_3_DAI_ID, ++ HDAC_DAI_ID_NUM + }; + + struct hdac_hda_pcm { +@@ -24,7 +24,7 @@ struct hdac_hda_pcm { + + struct hdac_hda_priv { + struct hda_codec codec; +- struct hdac_hda_pcm pcm[HDAC_LAST_DAI_ID]; ++ struct hdac_hda_pcm pcm[HDAC_DAI_ID_NUM]; + bool need_display_power; + }; + diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c index b61f980cabdc0..b07607a9ecea4 100644 --- a/sound/soc/codecs/hdmi-codec.c @@ -423036,6 +543563,92 @@ index b61f980cabdc0..b07607a9ecea4 100644 }; static const struct snd_soc_dapm_widget hdmi_widgets[] = { +diff --git a/sound/soc/codecs/jz4725b.c b/sound/soc/codecs/jz4725b.c +index 5201a8f6d7b63..71ea576f7e67a 100644 +--- a/sound/soc/codecs/jz4725b.c ++++ b/sound/soc/codecs/jz4725b.c +@@ -136,14 +136,17 @@ enum { + #define REG_CGR3_GO1L_OFFSET 0 + #define REG_CGR3_GO1L_MASK (0x1f << REG_CGR3_GO1L_OFFSET) + ++#define REG_CGR10_GIL_OFFSET 0 ++#define REG_CGR10_GIR_OFFSET 4 ++ + struct jz_icdc { + struct regmap *regmap; + void __iomem *base; + struct clk *clk; + }; + +-static const SNDRV_CTL_TLVD_DECLARE_DB_LINEAR(jz4725b_dac_tlv, -2250, 0); +-static const SNDRV_CTL_TLVD_DECLARE_DB_LINEAR(jz4725b_line_tlv, -1500, 600); ++static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(jz4725b_adc_tlv, 0, 150, 0); ++static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(jz4725b_dac_tlv, -2250, 150, 0); + + static const struct snd_kcontrol_new jz4725b_codec_controls[] = { + SOC_DOUBLE_TLV("Master Playback Volume", +@@ -151,11 +154,11 @@ static const struct snd_kcontrol_new jz4725b_codec_controls[] = { + REG_CGR1_GODL_OFFSET, + REG_CGR1_GODR_OFFSET, + 0xf, 1, jz4725b_dac_tlv), +- SOC_DOUBLE_R_TLV("Master Capture Volume", +- JZ4725B_CODEC_REG_CGR3, +- JZ4725B_CODEC_REG_CGR2, +- REG_CGR2_GO1R_OFFSET, +- 0x1f, 1, jz4725b_line_tlv), ++ SOC_DOUBLE_TLV("Master Capture Volume", ++ JZ4725B_CODEC_REG_CGR10, ++ REG_CGR10_GIL_OFFSET, ++ REG_CGR10_GIR_OFFSET, ++ 0xf, 0, jz4725b_adc_tlv), + + SOC_SINGLE("Master Playback Switch", JZ4725B_CODEC_REG_CR1, + REG_CR1_DAC_MUTE_OFFSET, 1, 1), +@@ -180,7 +183,7 @@ static SOC_VALUE_ENUM_SINGLE_DECL(jz4725b_codec_adc_src_enum, + jz4725b_codec_adc_src_texts, + jz4725b_codec_adc_src_values); + static const struct snd_kcontrol_new jz4725b_codec_adc_src_ctrl = +- SOC_DAPM_ENUM("Route", jz4725b_codec_adc_src_enum); ++ SOC_DAPM_ENUM("ADC Source Capture Route", jz4725b_codec_adc_src_enum); + + static const struct snd_kcontrol_new jz4725b_codec_mixer_controls[] = { + SOC_DAPM_SINGLE("Line In Bypass", JZ4725B_CODEC_REG_CR1, +@@ -225,7 +228,7 @@ static const struct snd_soc_dapm_widget jz4725b_codec_dapm_widgets[] = { + SND_SOC_DAPM_ADC("ADC", "Capture", + JZ4725B_CODEC_REG_PMR1, REG_PMR1_SB_ADC_OFFSET, 1), + +- SND_SOC_DAPM_MUX("ADC Source", SND_SOC_NOPM, 0, 0, ++ SND_SOC_DAPM_MUX("ADC Source Capture Route", SND_SOC_NOPM, 0, 0, + &jz4725b_codec_adc_src_ctrl), + + /* Mixer */ +@@ -236,7 +239,8 @@ static const struct snd_soc_dapm_widget jz4725b_codec_dapm_widgets[] = { + SND_SOC_DAPM_MIXER("DAC to Mixer", JZ4725B_CODEC_REG_CR1, + REG_CR1_DACSEL_OFFSET, 0, NULL, 0), + +- SND_SOC_DAPM_MIXER("Line In", SND_SOC_NOPM, 0, 0, NULL, 0), ++ SND_SOC_DAPM_MIXER("Line In", JZ4725B_CODEC_REG_PMR1, ++ REG_PMR1_SB_LIN_OFFSET, 1, NULL, 0), + SND_SOC_DAPM_MIXER("HP Out", JZ4725B_CODEC_REG_CR1, + REG_CR1_HP_DIS_OFFSET, 1, NULL, 0), + +@@ -283,11 +287,11 @@ static const struct snd_soc_dapm_route jz4725b_codec_dapm_routes[] = { + {"Mixer", NULL, "DAC to Mixer"}, + + {"Mixer to ADC", NULL, "Mixer"}, +- {"ADC Source", "Mixer", "Mixer to ADC"}, +- {"ADC Source", "Line In", "Line In"}, +- {"ADC Source", "Mic 1", "Mic 1"}, +- {"ADC Source", "Mic 2", "Mic 2"}, +- {"ADC", NULL, "ADC Source"}, ++ {"ADC Source Capture Route", "Mixer", "Mixer to ADC"}, ++ {"ADC Source Capture Route", "Line In", "Line In"}, ++ {"ADC Source Capture Route", "Mic 1", "Mic 1"}, ++ {"ADC Source Capture Route", "Mic 2", "Mic 2"}, ++ {"ADC", NULL, "ADC Source Capture Route"}, + + {"Out Stage", NULL, "Mixer"}, + {"HP Out", NULL, "Out Stage"}, diff --git a/sound/soc/codecs/lpass-rx-macro.c b/sound/soc/codecs/lpass-rx-macro.c index 196b06898eeb2..23452900b9ae1 100644 --- a/sound/soc/codecs/lpass-rx-macro.c @@ -423310,10 +543923,34 @@ index b45ec35cd63c3..5513acd360b8f 100644 } static const char *max98090_perf_pwr_text[] = +diff --git a/sound/soc/codecs/max98373-i2c.c b/sound/soc/codecs/max98373-i2c.c +index ddb6436835d73..68497a4521dd2 100644 +--- a/sound/soc/codecs/max98373-i2c.c ++++ b/sound/soc/codecs/max98373-i2c.c +@@ -551,6 +551,10 @@ static int max98373_i2c_probe(struct i2c_client *i2c, + max98373->cache = devm_kcalloc(&i2c->dev, max98373->cache_num, + sizeof(*max98373->cache), + GFP_KERNEL); ++ if (!max98373->cache) { ++ ret = -ENOMEM; ++ return ret; ++ } + + for (i = 0; i < max98373->cache_num; i++) + max98373->cache[i].reg = max98373_i2c_cache_reg[i]; diff --git a/sound/soc/codecs/max98373-sdw.c b/sound/soc/codecs/max98373-sdw.c -index dc520effc61cb..12323d4b5bfaa 100644 +index dc520effc61cb..97b64477dde67 100644 --- a/sound/soc/codecs/max98373-sdw.c +++ b/sound/soc/codecs/max98373-sdw.c +@@ -741,7 +741,7 @@ static int max98373_sdw_set_tdm_slot(struct snd_soc_dai *dai, + static const struct snd_soc_dai_ops max98373_dai_sdw_ops = { + .hw_params = max98373_sdw_dai_hw_params, + .hw_free = max98373_pcm_hw_free, +- .set_sdw_stream = max98373_set_sdw_stream, ++ .set_stream = max98373_set_sdw_stream, + .shutdown = max98373_shutdown, + .set_tdm_slot = max98373_sdw_set_tdm_slot, + }; @@ -862,6 +862,16 @@ static int max98373_sdw_probe(struct sdw_slave *slave, return max98373_init(slave, regmap); } @@ -423584,23 +544221,15 @@ index 2d6a4a29b8507..cf1febe54bcd9 100644 dev_info(priv->dev, "%s() failed to read mic-type-2, use default (%d)\n", diff --git a/sound/soc/codecs/mt6660.c b/sound/soc/codecs/mt6660.c -index 358c500377dff..a0a3fd60e93ad 100644 +index 358c500377dff..7d7f97b8c7c4f 100644 --- a/sound/soc/codecs/mt6660.c +++ b/sound/soc/codecs/mt6660.c -@@ -504,13 +504,17 @@ static int mt6660_i2c_probe(struct i2c_client *client, - dev_err(chip->dev, "read chip revision fail\n"); - goto probe_fail; - } -- pm_runtime_set_active(chip->dev); -- pm_runtime_enable(chip->dev); - +@@ -510,7 +510,11 @@ static int mt6660_i2c_probe(struct i2c_client *client, ret = devm_snd_soc_register_component(chip->dev, &mt6660_component_driver, &mt6660_codec_dai, 1); -+ if (!ret) { -+ pm_runtime_set_active(chip->dev); -+ pm_runtime_enable(chip->dev); -+ } ++ if (ret) ++ pm_runtime_disable(chip->dev); + return ret; + @@ -423788,6 +544417,43 @@ index f946ef65a4c19..27589900f4fbf 100644 nau8824_print_device_properties(nau8824); ret = regmap_read(nau8824->regmap, NAU8824_REG_I2C_DEVICE_ID, &value); +diff --git a/sound/soc/codecs/pcm512x.c b/sound/soc/codecs/pcm512x.c +index 60dee41816dc2..1c26577f08ee0 100644 +--- a/sound/soc/codecs/pcm512x.c ++++ b/sound/soc/codecs/pcm512x.c +@@ -1635,7 +1635,7 @@ int pcm512x_probe(struct device *dev, struct regmap *regmap) + if (val > 6) { + dev_err(dev, "Invalid pll-in\n"); + ret = -EINVAL; +- goto err_clk; ++ goto err_pm; + } + pcm512x->pll_in = val; + } +@@ -1644,7 +1644,7 @@ int pcm512x_probe(struct device *dev, struct regmap *regmap) + if (val > 6) { + dev_err(dev, "Invalid pll-out\n"); + ret = -EINVAL; +- goto err_clk; ++ goto err_pm; + } + pcm512x->pll_out = val; + } +@@ -1653,12 +1653,12 @@ int pcm512x_probe(struct device *dev, struct regmap *regmap) + dev_err(dev, + "Error: both pll-in and pll-out, or none\n"); + ret = -EINVAL; +- goto err_clk; ++ goto err_pm; + } + if (pcm512x->pll_in && pcm512x->pll_in == pcm512x->pll_out) { + dev_err(dev, "Error: pll-in == pll-out\n"); + ret = -EINVAL; +- goto err_clk; ++ goto err_pm; + } + } + #endif diff --git a/sound/soc/codecs/rk3328_codec.c b/sound/soc/codecs/rk3328_codec.c index 758d439e8c7a5..86b679cf7aef9 100644 --- a/sound/soc/codecs/rk3328_codec.c @@ -423836,10 +544502,96 @@ index 943d7d933e81b..cce6f4e7992f5 100644 MODULE_AUTHOR("binyuan <kevan.lan@rock-chips.com>"); MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:rk817-codec"); +diff --git a/sound/soc/codecs/rt1019.c b/sound/soc/codecs/rt1019.c +index 8c0b00242bb87..56e7c7ee98fd0 100644 +--- a/sound/soc/codecs/rt1019.c ++++ b/sound/soc/codecs/rt1019.c +@@ -391,18 +391,18 @@ static int rt1019_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx_mask, + unsigned int rx_mask, int slots, int slot_width) + { + struct snd_soc_component *component = dai->component; +- unsigned int val = 0, rx_slotnum; ++ unsigned int cn = 0, cl = 0, rx_slotnum; + int ret = 0, first_bit; + + switch (slots) { + case 4: +- val |= RT1019_I2S_TX_4CH; ++ cn = RT1019_I2S_TX_4CH; + break; + case 6: +- val |= RT1019_I2S_TX_6CH; ++ cn = RT1019_I2S_TX_6CH; + break; + case 8: +- val |= RT1019_I2S_TX_8CH; ++ cn = RT1019_I2S_TX_8CH; + break; + case 2: + break; +@@ -412,16 +412,16 @@ static int rt1019_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx_mask, + + switch (slot_width) { + case 20: +- val |= RT1019_I2S_DL_20; ++ cl = RT1019_TDM_CL_20; + break; + case 24: +- val |= RT1019_I2S_DL_24; ++ cl = RT1019_TDM_CL_24; + break; + case 32: +- val |= RT1019_I2S_DL_32; ++ cl = RT1019_TDM_CL_32; + break; + case 8: +- val |= RT1019_I2S_DL_8; ++ cl = RT1019_TDM_CL_8; + break; + case 16: + break; +@@ -470,8 +470,10 @@ static int rt1019_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx_mask, + goto _set_tdm_err_; + } + ++ snd_soc_component_update_bits(component, RT1019_TDM_1, ++ RT1019_TDM_CL_MASK, cl); + snd_soc_component_update_bits(component, RT1019_TDM_2, +- RT1019_I2S_CH_TX_MASK | RT1019_I2S_DF_MASK, val); ++ RT1019_I2S_CH_TX_MASK, cn); + + _set_tdm_err_: + return ret; +diff --git a/sound/soc/codecs/rt1019.h b/sound/soc/codecs/rt1019.h +index 64df831eeb720..48ba15efb48dd 100644 +--- a/sound/soc/codecs/rt1019.h ++++ b/sound/soc/codecs/rt1019.h +@@ -95,6 +95,12 @@ + #define RT1019_TDM_BCLK_MASK (0x1 << 6) + #define RT1019_TDM_BCLK_NORM (0x0 << 6) + #define RT1019_TDM_BCLK_INV (0x1 << 6) ++#define RT1019_TDM_CL_MASK (0x7) ++#define RT1019_TDM_CL_8 (0x4) ++#define RT1019_TDM_CL_32 (0x3) ++#define RT1019_TDM_CL_24 (0x2) ++#define RT1019_TDM_CL_20 (0x1) ++#define RT1019_TDM_CL_16 (0x0) + + /* 0x0401 TDM Control-2 */ + #define RT1019_I2S_CH_TX_MASK (0x3 << 6) diff --git a/sound/soc/codecs/rt1308-sdw.c b/sound/soc/codecs/rt1308-sdw.c -index f716668de6400..8472d855c325e 100644 +index f716668de6400..03adf3324b81d 100644 --- a/sound/soc/codecs/rt1308-sdw.c +++ b/sound/soc/codecs/rt1308-sdw.c +@@ -613,7 +613,7 @@ static const struct snd_soc_component_driver soc_component_sdw_rt1308 = { + static const struct snd_soc_dai_ops rt1308_aif_dai_ops = { + .hw_params = rt1308_sdw_hw_params, + .hw_free = rt1308_sdw_pcm_hw_free, +- .set_sdw_stream = rt1308_set_sdw_stream, ++ .set_stream = rt1308_set_sdw_stream, + .shutdown = rt1308_sdw_shutdown, + .set_tdm_slot = rt1308_sdw_set_tdm_slot, + }; @@ -683,6 +683,16 @@ static int rt1308_sdw_probe(struct sdw_slave *slave, return 0; } @@ -423865,10 +544617,35 @@ index f716668de6400..8472d855c325e 100644 .ops = &rt1308_slave_ops, .id_table = rt1308_id, }; +diff --git a/sound/soc/codecs/rt1308-sdw.h b/sound/soc/codecs/rt1308-sdw.h +index c5ce75666dcc8..98293d73ebabc 100644 +--- a/sound/soc/codecs/rt1308-sdw.h ++++ b/sound/soc/codecs/rt1308-sdw.h +@@ -139,9 +139,11 @@ static const struct reg_default rt1308_reg_defaults[] = { + { 0x3005, 0x23 }, + { 0x3008, 0x02 }, + { 0x300a, 0x00 }, ++ { 0xc000 | (RT1308_DATA_PATH << 4), 0x00 }, + { 0xc003 | (RT1308_DAC_SET << 4), 0x00 }, + { 0xc001 | (RT1308_POWER << 4), 0x00 }, + { 0xc002 | (RT1308_POWER << 4), 0x00 }, ++ { 0xc000 | (RT1308_POWER_STATUS << 4), 0x00 }, + }; + + #define RT1308_SDW_OFFSET 0xc000 diff --git a/sound/soc/codecs/rt1316-sdw.c b/sound/soc/codecs/rt1316-sdw.c -index 09b4914bba1bf..09cf3ca86fa4a 100644 +index 09b4914bba1bf..1e04aa8ab1666 100644 --- a/sound/soc/codecs/rt1316-sdw.c +++ b/sound/soc/codecs/rt1316-sdw.c +@@ -602,7 +602,7 @@ static const struct snd_soc_component_driver soc_component_sdw_rt1316 = { + static const struct snd_soc_dai_ops rt1316_aif_dai_ops = { + .hw_params = rt1316_sdw_hw_params, + .hw_free = rt1316_sdw_pcm_hw_free, +- .set_sdw_stream = rt1316_set_sdw_stream, ++ .set_stream = rt1316_set_sdw_stream, + .shutdown = rt1316_sdw_shutdown, + }; + @@ -675,6 +675,16 @@ static int rt1316_sdw_probe(struct sdw_slave *slave, return rt1316_sdw_init(&slave->dev, regmap, slave); } @@ -423894,6 +544671,24 @@ index 09b4914bba1bf..09cf3ca86fa4a 100644 .ops = &rt1316_slave_ops, .id_table = rt1316_id, }; +diff --git a/sound/soc/codecs/rt298.c b/sound/soc/codecs/rt298.c +index c592c40a7ab35..604754e4b29ff 100644 +--- a/sound/soc/codecs/rt298.c ++++ b/sound/soc/codecs/rt298.c +@@ -1173,6 +1173,13 @@ static const struct dmi_system_id force_combo_jack_table[] = { + DMI_MATCH(DMI_PRODUCT_NAME, "Geminilake") + } + }, ++ { ++ .ident = "Intel Kabylake R RVP", ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "Intel Corporation"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "Kabylake Client platform") ++ } ++ }, + { } + }; + diff --git a/sound/soc/codecs/rt5514.c b/sound/soc/codecs/rt5514.c index 4b1ad5054e8d1..c011bd04153ae 100644 --- a/sound/soc/codecs/rt5514.c @@ -424009,6 +544804,19 @@ index 6ab1a8bc37352..1186ceb5a978e 100644 mutex_lock(&rt5668->calibrate_mutex); +diff --git a/sound/soc/codecs/rt5670.c b/sound/soc/codecs/rt5670.c +index ecbaf129a6e3e..51b385575a5cc 100644 +--- a/sound/soc/codecs/rt5670.c ++++ b/sound/soc/codecs/rt5670.c +@@ -3313,8 +3313,6 @@ static int rt5670_i2c_probe(struct i2c_client *i2c, + if (ret < 0) + goto err; + +- pm_runtime_put(&i2c->dev); +- + return 0; + err: + pm_runtime_disable(&i2c->dev); diff --git a/sound/soc/codecs/rt5682-i2c.c b/sound/soc/codecs/rt5682-i2c.c index b9d5d7a0975b3..3d2d7c9ce66df 100644 --- a/sound/soc/codecs/rt5682-i2c.c @@ -424073,9 +544881,18 @@ index b9d5d7a0975b3..3d2d7c9ce66df 100644 &rt5682_soc_component_dev, rt5682_dai, ARRAY_SIZE(rt5682_dai)); diff --git a/sound/soc/codecs/rt5682-sdw.c b/sound/soc/codecs/rt5682-sdw.c -index 31a4f286043e4..a030c9987b920 100644 +index 31a4f286043e4..f04e18c32489d 100644 --- a/sound/soc/codecs/rt5682-sdw.c +++ b/sound/soc/codecs/rt5682-sdw.c +@@ -272,7 +272,7 @@ static int rt5682_sdw_hw_free(struct snd_pcm_substream *substream, + static const struct snd_soc_dai_ops rt5682_sdw_ops = { + .hw_params = rt5682_sdw_hw_params, + .hw_free = rt5682_sdw_hw_free, +- .set_sdw_stream = rt5682_set_sdw_stream, ++ .set_stream = rt5682_set_sdw_stream, + .shutdown = rt5682_sdw_shutdown, + }; + @@ -719,9 +719,12 @@ static int rt5682_sdw_remove(struct sdw_slave *slave) { struct rt5682_priv *rt5682 = dev_get_drvdata(&slave->dev); @@ -424673,7 +545490,7 @@ index bda5948996642..f7439e40ca8b5 100644 } diff --git a/sound/soc/codecs/rt700.c b/sound/soc/codecs/rt700.c -index 921382724f9cd..e049d672ccfd0 100644 +index 921382724f9cd..3de3406d653e4 100644 --- a/sound/soc/codecs/rt700.c +++ b/sound/soc/codecs/rt700.c @@ -162,7 +162,7 @@ static void rt700_jack_detect_handler(struct work_struct *work) @@ -424716,6 +545533,15 @@ index 921382724f9cd..e049d672ccfd0 100644 return 0; } +@@ -1005,7 +1015,7 @@ static int rt700_pcm_hw_free(struct snd_pcm_substream *substream, + static const struct snd_soc_dai_ops rt700_ops = { + .hw_params = rt700_pcm_hw_params, + .hw_free = rt700_pcm_hw_free, +- .set_sdw_stream = rt700_set_sdw_stream, ++ .set_stream = rt700_set_sdw_stream, + .shutdown = rt700_shutdown, + }; + @@ -1114,6 +1124,11 @@ int rt700_init(struct device *dev, struct regmap *sdw_regmap, mutex_init(&rt700->disable_irq_lock); @@ -424743,7 +545569,7 @@ index 921382724f9cd..e049d672ccfd0 100644 * if set_jack callback occurred early than io_init, * we set up the jack detection function now diff --git a/sound/soc/codecs/rt711-sdca-sdw.c b/sound/soc/codecs/rt711-sdca-sdw.c -index aaf5af153d3fe..a085b2f530aa1 100644 +index aaf5af153d3fe..31e77d462ef34 100644 --- a/sound/soc/codecs/rt711-sdca-sdw.c +++ b/sound/soc/codecs/rt711-sdca-sdw.c @@ -11,6 +11,7 @@ @@ -424754,6 +545580,15 @@ index aaf5af153d3fe..a085b2f530aa1 100644 #include "rt711-sdca.h" #include "rt711-sdca-sdw.h" +@@ -229,7 +230,7 @@ static int rt711_sdca_read_prop(struct sdw_slave *slave) + } + + /* set the timeout values */ +- prop->clk_stop_timeout = 20; ++ prop->clk_stop_timeout = 700; + + /* wake-up event */ + prop->wake_capable = 1; @@ -364,11 +365,17 @@ static int rt711_sdca_sdw_remove(struct sdw_slave *slave) { struct rt711_sdca_priv *rt711 = dev_get_drvdata(&slave->dev); @@ -424774,7 +545609,7 @@ index aaf5af153d3fe..a085b2f530aa1 100644 } diff --git a/sound/soc/codecs/rt711-sdca.c b/sound/soc/codecs/rt711-sdca.c -index 2e992589f1e42..3b5df3ea2f602 100644 +index 2e992589f1e42..5ad53bbc85284 100644 --- a/sound/soc/codecs/rt711-sdca.c +++ b/sound/soc/codecs/rt711-sdca.c @@ -34,7 +34,7 @@ static int rt711_sdca_index_write(struct rt711_sdca_priv *rt711, @@ -424859,6 +545694,15 @@ index 2e992589f1e42..3b5df3ea2f602 100644 }; static int rt711_sdca_set_sdw_stream(struct snd_soc_dai *dai, void *sdw_stream, +@@ -1358,7 +1361,7 @@ static int rt711_sdca_pcm_hw_free(struct snd_pcm_substream *substream, + static const struct snd_soc_dai_ops rt711_sdca_ops = { + .hw_params = rt711_sdca_pcm_hw_params, + .hw_free = rt711_sdca_pcm_hw_free, +- .set_sdw_stream = rt711_sdca_set_sdw_stream, ++ .set_stream = rt711_sdca_set_sdw_stream, + .shutdown = rt711_sdca_shutdown, + }; + @@ -1411,8 +1414,12 @@ int rt711_sdca_init(struct device *dev, struct regmap *regmap, rt711->regmap = regmap; rt711->mbq_regmap = mbq_regmap; @@ -424920,7 +545764,7 @@ index bda2cc9439c98..4fe68bcf2a7c2 100644 } diff --git a/sound/soc/codecs/rt711.c b/sound/soc/codecs/rt711.c -index a7c5608a0ef87..51a98e730fc8f 100644 +index a7c5608a0ef87..286d882636e00 100644 --- a/sound/soc/codecs/rt711.c +++ b/sound/soc/codecs/rt711.c @@ -242,7 +242,7 @@ static void rt711_jack_detect_handler(struct work_struct *work) @@ -424986,6 +545830,15 @@ index a7c5608a0ef87..51a98e730fc8f 100644 }; static int rt711_set_sdw_stream(struct snd_soc_dai *dai, void *sdw_stream, +@@ -1089,7 +1092,7 @@ static int rt711_pcm_hw_free(struct snd_pcm_substream *substream, + static const struct snd_soc_dai_ops rt711_ops = { + .hw_params = rt711_pcm_hw_params, + .hw_free = rt711_pcm_hw_free, +- .set_sdw_stream = rt711_set_sdw_stream, ++ .set_stream = rt711_set_sdw_stream, + .shutdown = rt711_shutdown, + }; + @@ -1196,8 +1199,13 @@ int rt711_init(struct device *dev, struct regmap *sdw_regmap, rt711->sdw_regmap = sdw_regmap; rt711->regmap = regmap; @@ -425018,7 +545871,7 @@ index a7c5608a0ef87..51a98e730fc8f 100644 /* * if set_jack callback occurred early than io_init, diff --git a/sound/soc/codecs/rt715-sdca-sdw.c b/sound/soc/codecs/rt715-sdca-sdw.c -index a5c673f43d824..0f4354eafef25 100644 +index a5c673f43d824..85abf8073c278 100644 --- a/sound/soc/codecs/rt715-sdca-sdw.c +++ b/sound/soc/codecs/rt715-sdca-sdw.c @@ -13,6 +13,7 @@ @@ -425029,6 +545882,15 @@ index a5c673f43d824..0f4354eafef25 100644 #include <linux/regmap.h> #include <sound/soc.h> #include "rt715-sdca.h" +@@ -166,7 +167,7 @@ static int rt715_sdca_read_prop(struct sdw_slave *slave) + } + + /* set the timeout values */ +- prop->clk_stop_timeout = 20; ++ prop->clk_stop_timeout = 200; + + return 0; + } @@ -195,6 +196,16 @@ static int rt715_sdca_sdw_probe(struct sdw_slave *slave, return rt715_sdca_init(&slave->dev, mbq_regmap, regmap, slave); } @@ -425054,6 +545916,19 @@ index a5c673f43d824..0f4354eafef25 100644 .ops = &rt715_sdca_slave_ops, .id_table = rt715_sdca_id, }; +diff --git a/sound/soc/codecs/rt715-sdca.c b/sound/soc/codecs/rt715-sdca.c +index 66e166568c508..bfa536bd71960 100644 +--- a/sound/soc/codecs/rt715-sdca.c ++++ b/sound/soc/codecs/rt715-sdca.c +@@ -938,7 +938,7 @@ static int rt715_sdca_pcm_hw_free(struct snd_pcm_substream *substream, + static const struct snd_soc_dai_ops rt715_sdca_ops = { + .hw_params = rt715_sdca_pcm_hw_params, + .hw_free = rt715_sdca_pcm_hw_free, +- .set_sdw_stream = rt715_sdca_set_sdw_stream, ++ .set_stream = rt715_sdca_set_sdw_stream, + .shutdown = rt715_sdca_shutdown, + }; + diff --git a/sound/soc/codecs/rt715-sdw.c b/sound/soc/codecs/rt715-sdw.c index a7b21b03c08bb..b047bf87a100c 100644 --- a/sound/soc/codecs/rt715-sdw.c @@ -425091,21 +545966,48 @@ index a7b21b03c08bb..b047bf87a100c 100644 .ops = &rt715_slave_ops, .id_table = rt715_id, }; +diff --git a/sound/soc/codecs/rt715.c b/sound/soc/codecs/rt715.c +index 1352869cc0867..a64d11a747513 100644 +--- a/sound/soc/codecs/rt715.c ++++ b/sound/soc/codecs/rt715.c +@@ -909,7 +909,7 @@ static int rt715_pcm_hw_free(struct snd_pcm_substream *substream, + static const struct snd_soc_dai_ops rt715_ops = { + .hw_params = rt715_pcm_hw_params, + .hw_free = rt715_pcm_hw_free, +- .set_sdw_stream = rt715_set_sdw_stream, ++ .set_stream = rt715_set_sdw_stream, + .shutdown = rt715_shutdown, + }; + +diff --git a/sound/soc/codecs/sdw-mockup.c b/sound/soc/codecs/sdw-mockup.c +index 8ea13cfa9f8ed..7c612aaf31c75 100644 +--- a/sound/soc/codecs/sdw-mockup.c ++++ b/sound/soc/codecs/sdw-mockup.c +@@ -138,7 +138,7 @@ static int sdw_mockup_pcm_hw_free(struct snd_pcm_substream *substream, + static const struct snd_soc_dai_ops sdw_mockup_ops = { + .hw_params = sdw_mockup_pcm_hw_params, + .hw_free = sdw_mockup_pcm_hw_free, +- .set_sdw_stream = sdw_mockup_set_sdw_stream, ++ .set_stream = sdw_mockup_set_sdw_stream, + .shutdown = sdw_mockup_shutdown, + }; + diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c -index 97bf1f222805e..dc56e6c6b6685 100644 +index 97bf1f222805e..3c5a4fe2fad63 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c -@@ -1797,6 +1797,9 @@ static int sgtl5000_i2c_remove(struct i2c_client *client) +@@ -1797,6 +1797,10 @@ static int sgtl5000_i2c_remove(struct i2c_client *client) { struct sgtl5000_priv *sgtl5000 = i2c_get_clientdata(client); ++ regmap_write(sgtl5000->regmap, SGTL5000_CHIP_CLK_CTRL, SGTL5000_CHIP_CLK_CTRL_DEFAULT); + regmap_write(sgtl5000->regmap, SGTL5000_CHIP_DIG_POWER, SGTL5000_DIG_POWER_DEFAULT); + regmap_write(sgtl5000->regmap, SGTL5000_CHIP_ANA_POWER, SGTL5000_ANA_POWER_DEFAULT); + clk_disable_unprepare(sgtl5000->mclk); regulator_bulk_disable(sgtl5000->num_supplies, sgtl5000->supplies); regulator_bulk_free(sgtl5000->num_supplies, sgtl5000->supplies); -@@ -1804,6 +1807,11 @@ static int sgtl5000_i2c_remove(struct i2c_client *client) +@@ -1804,6 +1808,11 @@ static int sgtl5000_i2c_remove(struct i2c_client *client) return 0; } @@ -425117,7 +546019,7 @@ index 97bf1f222805e..dc56e6c6b6685 100644 static const struct i2c_device_id sgtl5000_id[] = { {"sgtl5000", 0}, {}, -@@ -1824,6 +1832,7 @@ static struct i2c_driver sgtl5000_i2c_driver = { +@@ -1824,6 +1833,7 @@ static struct i2c_driver sgtl5000_i2c_driver = { }, .probe = sgtl5000_i2c_probe, .remove = sgtl5000_i2c_remove, @@ -425138,7 +546040,7 @@ index 56ec5863f2507..3a808c762299e 100644 #define SGTL5000_DAC_EN 0x0020 #define SGTL5000_DAP_POWERUP 0x0010 diff --git a/sound/soc/codecs/tas2764.c b/sound/soc/codecs/tas2764.c -index 9265af41c235d..afb4c0d7e7144 100644 +index 9265af41c235d..1951bae95b315 100644 --- a/sound/soc/codecs/tas2764.c +++ b/sound/soc/codecs/tas2764.c @@ -34,6 +34,9 @@ struct tas2764_priv { @@ -425259,16 +546161,16 @@ index 9265af41c235d..afb4c0d7e7144 100644 { - struct snd_soc_component *component = dai->component; - int ret; -+ struct tas2764_priv *tas2764 = -+ snd_soc_component_get_drvdata(dai->component); - +- - ret = snd_soc_component_update_bits(component, TAS2764_PWR_CTRL, - TAS2764_PWR_CTRL_MASK, - mute ? TAS2764_PWR_CTRL_MUTE : 0); - - if (ret < 0) - return ret; -- ++ struct tas2764_priv *tas2764 = ++ snd_soc_component_get_drvdata(dai->component); + - return 0; + tas2764->unmuted = !mute; + return tas2764_update_pwr_ctrl(tas2764); @@ -425340,7 +546242,34 @@ index 9265af41c235d..afb4c0d7e7144 100644 if (ret < 0) return ret; -@@ -477,7 +466,7 @@ static struct snd_soc_dai_driver tas2764_dai_driver[] = { +@@ -397,20 +386,13 @@ static int tas2764_set_dai_tdm_slot(struct snd_soc_dai *dai, + if (tx_mask == 0 || rx_mask != 0) + return -EINVAL; + +- if (slots == 1) { +- if (tx_mask != 1) +- return -EINVAL; +- left_slot = 0; +- right_slot = 0; ++ left_slot = __ffs(tx_mask); ++ tx_mask &= ~(1 << left_slot); ++ if (tx_mask == 0) { ++ right_slot = left_slot; + } else { +- left_slot = __ffs(tx_mask); +- tx_mask &= ~(1 << left_slot); +- if (tx_mask == 0) { +- right_slot = left_slot; +- } else { +- right_slot = __ffs(tx_mask); +- tx_mask &= ~(1 << right_slot); +- } ++ right_slot = __ffs(tx_mask); ++ tx_mask &= ~(1 << right_slot); + } + + if (tx_mask != 0 || left_slot >= slots || right_slot >= slots) +@@ -477,7 +459,7 @@ static struct snd_soc_dai_driver tas2764_dai_driver[] = { .id = 0, .playback = { .stream_name = "ASI1 Playback", @@ -425349,7 +546278,7 @@ index 9265af41c235d..afb4c0d7e7144 100644 .channels_max = 2, .rates = TAS2764_RATES, .formats = TAS2764_FORMATS, -@@ -501,8 +490,10 @@ static int tas2764_codec_probe(struct snd_soc_component *component) +@@ -501,8 +483,10 @@ static int tas2764_codec_probe(struct snd_soc_component *component) tas2764->component = component; @@ -425361,7 +546290,7 @@ index 9265af41c235d..afb4c0d7e7144 100644 tas2764_reset(tas2764); -@@ -516,22 +507,16 @@ static int tas2764_codec_probe(struct snd_soc_component *component) +@@ -516,22 +500,16 @@ static int tas2764_codec_probe(struct snd_soc_component *component) if (ret < 0) return ret; @@ -425386,7 +546315,7 @@ index 9265af41c235d..afb4c0d7e7144 100644 tas2764_digital_tlv), }; -@@ -539,7 +524,6 @@ static const struct snd_soc_component_driver soc_component_driver_tas2764 = { +@@ -539,7 +517,6 @@ static const struct snd_soc_component_driver soc_component_driver_tas2764 = { .probe = tas2764_codec_probe, .suspend = tas2764_codec_suspend, .resume = tas2764_codec_resume, @@ -425394,7 +546323,7 @@ index 9265af41c235d..afb4c0d7e7144 100644 .controls = tas2764_snd_controls, .num_controls = ARRAY_SIZE(tas2764_snd_controls), .dapm_widgets = tas2764_dapm_widgets, -@@ -556,7 +540,7 @@ static const struct reg_default tas2764_reg_defaults[] = { +@@ -556,7 +533,7 @@ static const struct reg_default tas2764_reg_defaults[] = { { TAS2764_SW_RST, 0x00 }, { TAS2764_PWR_CTRL, 0x1a }, { TAS2764_DVC, 0x00 }, @@ -425428,7 +546357,7 @@ index 67d6fd903c42c..f015f22a083b5 100644 /* TDM Configuration Reg3 */ #define TAS2764_TDM_CFG3 TAS2764_REG(0X0, 0x0c) diff --git a/sound/soc/codecs/tas2770.c b/sound/soc/codecs/tas2770.c -index 172e79cbe0daf..a13b086a072be 100644 +index 172e79cbe0daf..ec0df3b1ef615 100644 --- a/sound/soc/codecs/tas2770.c +++ b/sound/soc/codecs/tas2770.c @@ -38,40 +38,30 @@ static void tas2770_reset(struct tas2770_priv *tas2770) @@ -425630,7 +546559,35 @@ index 172e79cbe0daf..a13b086a072be 100644 return 0; } -@@ -486,7 +478,7 @@ static struct snd_soc_dai_driver tas2770_dai_driver[] = { +@@ -403,21 +395,13 @@ static int tas2770_set_dai_tdm_slot(struct snd_soc_dai *dai, + if (tx_mask == 0 || rx_mask != 0) + return -EINVAL; + +- if (slots == 1) { +- if (tx_mask != 1) +- return -EINVAL; +- +- left_slot = 0; +- right_slot = 0; ++ left_slot = __ffs(tx_mask); ++ tx_mask &= ~(1 << left_slot); ++ if (tx_mask == 0) { ++ right_slot = left_slot; + } else { +- left_slot = __ffs(tx_mask); +- tx_mask &= ~(1 << left_slot); +- if (tx_mask == 0) { +- right_slot = left_slot; +- } else { +- right_slot = __ffs(tx_mask); +- tx_mask &= ~(1 << right_slot); +- } ++ right_slot = __ffs(tx_mask); ++ tx_mask &= ~(1 << right_slot); + } + + if (tx_mask != 0 || left_slot >= slots || right_slot >= slots) +@@ -486,7 +470,7 @@ static struct snd_soc_dai_driver tas2770_dai_driver[] = { .id = 0, .playback = { .stream_name = "ASI1 Playback", @@ -425639,7 +546596,7 @@ index 172e79cbe0daf..a13b086a072be 100644 .channels_max = 2, .rates = TAS2770_RATES, .formats = TAS2770_FORMATS, -@@ -503,6 +495,8 @@ static struct snd_soc_dai_driver tas2770_dai_driver[] = { +@@ -503,6 +487,8 @@ static struct snd_soc_dai_driver tas2770_dai_driver[] = { }, }; @@ -425648,7 +546605,7 @@ index 172e79cbe0daf..a13b086a072be 100644 static int tas2770_codec_probe(struct snd_soc_component *component) { struct tas2770_priv *tas2770 = -@@ -510,10 +504,13 @@ static int tas2770_codec_probe(struct snd_soc_component *component) +@@ -510,10 +496,13 @@ static int tas2770_codec_probe(struct snd_soc_component *component) tas2770->component = component; @@ -425663,7 +546620,7 @@ index 172e79cbe0daf..a13b086a072be 100644 return 0; } -@@ -532,7 +529,6 @@ static const struct snd_soc_component_driver soc_component_driver_tas2770 = { +@@ -532,7 +521,6 @@ static const struct snd_soc_component_driver soc_component_driver_tas2770 = { .probe = tas2770_codec_probe, .suspend = tas2770_codec_suspend, .resume = tas2770_codec_resume, @@ -426179,7 +547136,7 @@ index c496b359f2f40..4ca52d9db23a5 100644 dev_err(dev, "Unable to get SLIM Interface device\n"); return -EINVAL; diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c -index 52de7d14b1398..8cdc45e669f2d 100644 +index 52de7d14b1398..b95cbae586414 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -1174,6 +1174,9 @@ static bool wcd938x_readonly_register(struct device *dev, unsigned int reg) @@ -426361,6 +547318,15 @@ index 52de7d14b1398..8cdc45e669f2d 100644 wcd938x_hph_impedance_get, NULL), }; +@@ -4284,7 +4302,7 @@ static int wcd938x_codec_set_sdw_stream(struct snd_soc_dai *dai, + static const struct snd_soc_dai_ops wcd938x_sdw_dai_ops = { + .hw_params = wcd938x_codec_hw_params, + .hw_free = wcd938x_codec_free, +- .set_sdw_stream = wcd938x_codec_set_sdw_stream, ++ .set_stream = wcd938x_codec_set_sdw_stream, + }; + + static struct snd_soc_dai_driver wcd938x_dais[] = { diff --git a/sound/soc/codecs/wm2000.c b/sound/soc/codecs/wm2000.c index 72e165cc64439..97ece3114b3dc 100644 --- a/sound/soc/codecs/wm2000.c @@ -426390,32 +547356,8 @@ index 72e165cc64439..97ece3114b3dc 100644 } static int wm2000_anc_set_mode(struct wm2000_priv *wm2000) -diff --git a/sound/soc/codecs/wm5102.c b/sound/soc/codecs/wm5102.c -index 621598608bf0b..c8adce8936bcd 100644 ---- a/sound/soc/codecs/wm5102.c -+++ b/sound/soc/codecs/wm5102.c -@@ -2087,9 +2087,6 @@ static int wm5102_probe(struct platform_device *pdev) - regmap_update_bits(arizona->regmap, wm5102_digital_vu[i], - WM5102_DIG_VU, WM5102_DIG_VU); - -- pm_runtime_enable(&pdev->dev); -- pm_runtime_idle(&pdev->dev); -- - ret = arizona_request_irq(arizona, ARIZONA_IRQ_DSP_IRQ1, - "ADSP2 Compressed IRQ", wm5102_adsp2_irq, - wm5102); -@@ -2122,6 +2119,9 @@ static int wm5102_probe(struct platform_device *pdev) - goto err_spk_irqs; - } - -+ pm_runtime_enable(&pdev->dev); -+ pm_runtime_idle(&pdev->dev); -+ - return ret; - - err_spk_irqs: diff --git a/sound/soc/codecs/wm5110.c b/sound/soc/codecs/wm5110.c -index 5c2d45d05c975..66a4827c16bd6 100644 +index 5c2d45d05c975..7c6e01720d651 100644 --- a/sound/soc/codecs/wm5110.c +++ b/sound/soc/codecs/wm5110.c @@ -413,6 +413,7 @@ static int wm5110_put_dre(struct snd_kcontrol *kcontrol, @@ -426447,26 +547389,6 @@ index 5c2d45d05c975..66a4827c16bd6 100644 err: snd_soc_dapm_mutex_unlock(dapm); -@@ -2454,9 +2458,6 @@ static int wm5110_probe(struct platform_device *pdev) - regmap_update_bits(arizona->regmap, wm5110_digital_vu[i], - WM5110_DIG_VU, WM5110_DIG_VU); - -- pm_runtime_enable(&pdev->dev); -- pm_runtime_idle(&pdev->dev); -- - ret = arizona_request_irq(arizona, ARIZONA_IRQ_DSP_IRQ1, - "ADSP2 Compressed IRQ", wm5110_adsp2_irq, - wm5110); -@@ -2489,6 +2490,9 @@ static int wm5110_probe(struct platform_device *pdev) - goto err_spk_irqs; - } - -+ pm_runtime_enable(&pdev->dev); -+ pm_runtime_idle(&pdev->dev); -+ - return ret; - - err_spk_irqs: diff --git a/sound/soc/codecs/wm8350.c b/sound/soc/codecs/wm8350.c index 15d42ce3b21d6..41504ce2a682f 100644 --- a/sound/soc/codecs/wm8350.c @@ -426570,6 +547492,45 @@ index dcee7b2bd3d79..859ebcec83838 100644 } static int wm8731_i2c_remove(struct i2c_client *client) +diff --git a/sound/soc/codecs/wm8904.c b/sound/soc/codecs/wm8904.c +index a02a77fef360b..6759ce7e09ff4 100644 +--- a/sound/soc/codecs/wm8904.c ++++ b/sound/soc/codecs/wm8904.c +@@ -697,6 +697,7 @@ static int out_pga_event(struct snd_soc_dapm_widget *w, + int dcs_mask; + int dcs_l, dcs_r; + int dcs_l_reg, dcs_r_reg; ++ int an_out_reg; + int timeout; + int pwr_reg; + +@@ -712,6 +713,7 @@ static int out_pga_event(struct snd_soc_dapm_widget *w, + dcs_mask = WM8904_DCS_ENA_CHAN_0 | WM8904_DCS_ENA_CHAN_1; + dcs_r_reg = WM8904_DC_SERVO_8; + dcs_l_reg = WM8904_DC_SERVO_9; ++ an_out_reg = WM8904_ANALOGUE_OUT1_LEFT; + dcs_l = 0; + dcs_r = 1; + break; +@@ -720,6 +722,7 @@ static int out_pga_event(struct snd_soc_dapm_widget *w, + dcs_mask = WM8904_DCS_ENA_CHAN_2 | WM8904_DCS_ENA_CHAN_3; + dcs_r_reg = WM8904_DC_SERVO_6; + dcs_l_reg = WM8904_DC_SERVO_7; ++ an_out_reg = WM8904_ANALOGUE_OUT2_LEFT; + dcs_l = 2; + dcs_r = 3; + break; +@@ -792,6 +795,10 @@ static int out_pga_event(struct snd_soc_dapm_widget *w, + snd_soc_component_update_bits(component, reg, + WM8904_HPL_ENA_OUTP | WM8904_HPR_ENA_OUTP, + WM8904_HPL_ENA_OUTP | WM8904_HPR_ENA_OUTP); ++ ++ /* Update volume, requires PGA to be powered */ ++ val = snd_soc_component_read(component, an_out_reg); ++ snd_soc_component_write(component, an_out_reg, val); + break; + + case SND_SOC_DAPM_POST_PMU: diff --git a/sound/soc/codecs/wm8958-dsp2.c b/sound/soc/codecs/wm8958-dsp2.c index e4018ba3b19a2..7878c7a58ff10 100644 --- a/sound/soc/codecs/wm8958-dsp2.c @@ -426611,10 +547572,100 @@ index e4018ba3b19a2..7878c7a58ff10 100644 #define WM8958_ENH_EQ_SWITCH(xname, xval) {\ diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c -index ba16bdf9e478c..a5a4ae4440cc5 100644 +index ba16bdf9e478c..779f7097d336c 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c -@@ -3867,6 +3867,7 @@ static int wm8962_runtime_suspend(struct device *dev) +@@ -1840,6 +1840,49 @@ SOC_SINGLE_TLV("SPKOUTR Mixer DACR Volume", WM8962_SPEAKER_MIXER_5, + 4, 1, 0, inmix_tlv), + }; + ++static int tp_event(struct snd_soc_dapm_widget *w, ++ struct snd_kcontrol *kcontrol, int event) ++{ ++ int ret, reg, val, mask; ++ struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); ++ ++ ret = pm_runtime_resume_and_get(component->dev); ++ if (ret < 0) { ++ dev_err(component->dev, "Failed to resume device: %d\n", ret); ++ return ret; ++ } ++ ++ reg = WM8962_ADDITIONAL_CONTROL_4; ++ ++ if (!strcmp(w->name, "TEMP_HP")) { ++ mask = WM8962_TEMP_ENA_HP_MASK; ++ val = WM8962_TEMP_ENA_HP; ++ } else if (!strcmp(w->name, "TEMP_SPK")) { ++ mask = WM8962_TEMP_ENA_SPK_MASK; ++ val = WM8962_TEMP_ENA_SPK; ++ } else { ++ pm_runtime_put(component->dev); ++ return -EINVAL; ++ } ++ ++ switch (event) { ++ case SND_SOC_DAPM_POST_PMD: ++ val = 0; ++ fallthrough; ++ case SND_SOC_DAPM_POST_PMU: ++ ret = snd_soc_component_update_bits(component, reg, mask, val); ++ break; ++ default: ++ WARN(1, "Invalid event %d\n", event); ++ pm_runtime_put(component->dev); ++ return -EINVAL; ++ } ++ ++ pm_runtime_put(component->dev); ++ ++ return 0; ++} ++ + static int cp_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) + { +@@ -2133,8 +2176,10 @@ SND_SOC_DAPM_SUPPLY("TOCLK", WM8962_ADDITIONAL_CONTROL_1, 0, 0, NULL, 0), + SND_SOC_DAPM_SUPPLY_S("DSP2", 1, WM8962_DSP2_POWER_MANAGEMENT, + WM8962_DSP2_ENA_SHIFT, 0, dsp2_event, + SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD), +-SND_SOC_DAPM_SUPPLY("TEMP_HP", WM8962_ADDITIONAL_CONTROL_4, 2, 0, NULL, 0), +-SND_SOC_DAPM_SUPPLY("TEMP_SPK", WM8962_ADDITIONAL_CONTROL_4, 1, 0, NULL, 0), ++SND_SOC_DAPM_SUPPLY("TEMP_HP", SND_SOC_NOPM, 0, 0, tp_event, ++ SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD), ++SND_SOC_DAPM_SUPPLY("TEMP_SPK", SND_SOC_NOPM, 0, 0, tp_event, ++ SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD), + + SND_SOC_DAPM_MIXER("INPGAL", WM8962_LEFT_INPUT_PGA_CONTROL, 4, 0, + inpgal, ARRAY_SIZE(inpgal)), +@@ -2445,6 +2490,14 @@ static void wm8962_configure_bclk(struct snd_soc_component *component) + snd_soc_component_update_bits(component, WM8962_CLOCKING2, + WM8962_SYSCLK_ENA_MASK, WM8962_SYSCLK_ENA); + ++ /* DSPCLK_DIV field in WM8962_CLOCKING1 register is used to generate ++ * correct frequency of LRCLK and BCLK. Sometimes the read-only value ++ * can't be updated timely after enabling SYSCLK. This results in wrong ++ * calculation values. Delay is introduced here to wait for newest ++ * value from register. The time of the delay should be at least ++ * 500~1000us according to test. ++ */ ++ usleep_range(500, 1000); + dspclk = snd_soc_component_read(component, WM8962_CLOCKING1); + + if (snd_soc_component_get_bias_level(component) != SND_SOC_BIAS_ON) +@@ -3763,6 +3816,11 @@ static int wm8962_i2c_probe(struct i2c_client *i2c, + if (ret < 0) + goto err_pm_runtime; + ++ regmap_update_bits(wm8962->regmap, WM8962_ADDITIONAL_CONTROL_4, ++ WM8962_TEMP_ENA_HP_MASK, 0); ++ regmap_update_bits(wm8962->regmap, WM8962_ADDITIONAL_CONTROL_4, ++ WM8962_TEMP_ENA_SPK_MASK, 0); ++ + regcache_cache_only(wm8962->regmap, true); + + /* The drivers should power up as needed */ +@@ -3867,6 +3925,7 @@ static int wm8962_runtime_suspend(struct device *dev) #endif static const struct dev_pm_ops wm8962_pm = { @@ -426622,30 +547673,23 @@ index ba16bdf9e478c..a5a4ae4440cc5 100644 SET_RUNTIME_PM_OPS(wm8962_runtime_suspend, wm8962_runtime_resume, NULL) }; -diff --git a/sound/soc/codecs/wm8997.c b/sound/soc/codecs/wm8997.c -index 38ef631d1a1ff..c8c711e555c0e 100644 ---- a/sound/soc/codecs/wm8997.c -+++ b/sound/soc/codecs/wm8997.c -@@ -1162,9 +1162,6 @@ static int wm8997_probe(struct platform_device *pdev) - regmap_update_bits(arizona->regmap, wm8997_digital_vu[i], - WM8997_DIG_VU, WM8997_DIG_VU); - -- pm_runtime_enable(&pdev->dev); -- pm_runtime_idle(&pdev->dev); -- - arizona_init_common(arizona); - - ret = arizona_init_vol_limit(arizona); -@@ -1183,6 +1180,9 @@ static int wm8997_probe(struct platform_device *pdev) - goto err_spk_irqs; - } +diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c +index f117ec0c489f0..6759db92f6c46 100644 +--- a/sound/soc/codecs/wm8994.c ++++ b/sound/soc/codecs/wm8994.c +@@ -3853,7 +3853,12 @@ static irqreturn_t wm1811_jackdet_irq(int irq, void *data) + } else { + dev_dbg(component->dev, "Jack not detected\n"); -+ pm_runtime_enable(&pdev->dev); -+ pm_runtime_idle(&pdev->dev); -+ - return ret; ++ /* Release wm8994->accdet_lock to avoid deadlock: ++ * cancel_delayed_work_sync() takes wm8994->mic_work internal ++ * lock and wm1811_mic_work takes wm8994->accdet_lock */ ++ mutex_unlock(&wm8994->accdet_lock); + cancel_delayed_work_sync(&wm8994->mic_work); ++ mutex_lock(&wm8994->accdet_lock); - err_spk_irqs: + snd_soc_component_update_bits(component, WM8958_MICBIAS2, + WM8958_MICB2_DISCH, WM8958_MICB2_DISCH); diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index f7c800927cb2f..08fc1a025b1a9 100644 --- a/sound/soc/codecs/wm_adsp.c @@ -426660,7 +547704,7 @@ index f7c800927cb2f..08fc1a025b1a9 100644 if (ucontrol->value.enumerated.item[0] == dsp[e->shift_l].fw) return 0; diff --git a/sound/soc/codecs/wsa881x.c b/sound/soc/codecs/wsa881x.c -index 2da4a5fa7a18d..564b78f3cdd0a 100644 +index 2da4a5fa7a18d..0222370ff95de 100644 --- a/sound/soc/codecs/wsa881x.c +++ b/sound/soc/codecs/wsa881x.c @@ -772,7 +772,8 @@ static int wsa881x_put_pa_gain(struct snd_kcontrol *kc, @@ -426699,6 +547743,15 @@ index 2da4a5fa7a18d..564b78f3cdd0a 100644 } static const char * const smart_boost_lvl_text[] = { +@@ -1018,7 +1026,7 @@ static const struct snd_soc_dai_ops wsa881x_dai_ops = { + .hw_params = wsa881x_hw_params, + .hw_free = wsa881x_hw_free, + .mute_stream = wsa881x_digital_mute, +- .set_sdw_stream = wsa881x_set_sdw_stream, ++ .set_stream = wsa881x_set_sdw_stream, + }; + + static struct snd_soc_dai_driver wsa881x_dais[] = { diff --git a/sound/soc/dwc/dwc-i2s.c b/sound/soc/dwc/dwc-i2s.c index 33ce257ae1986..315ca5c4b057a 100644 --- a/sound/soc/dwc/dwc-i2s.c @@ -426780,9 +547833,25 @@ index e13271ea84ded..29cf9234984d9 100644 if (np) { /* The eukrea,asoc-tlv320 driver was explicitly diff --git a/sound/soc/fsl/fsl-asoc-card.c b/sound/soc/fsl/fsl-asoc-card.c -index 06107ae46e20b..c72a156737e61 100644 +index 06107ae46e20b..5000d779aade2 100644 --- a/sound/soc/fsl/fsl-asoc-card.c +++ b/sound/soc/fsl/fsl-asoc-card.c +@@ -120,11 +120,11 @@ static const struct snd_soc_dapm_route audio_map[] = { + + static const struct snd_soc_dapm_route audio_map_ac97[] = { + /* 1st half -- Normal DAPM routes */ +- {"Playback", NULL, "AC97 Playback"}, +- {"AC97 Capture", NULL, "Capture"}, ++ {"AC97 Playback", NULL, "CPU AC97 Playback"}, ++ {"CPU AC97 Capture", NULL, "AC97 Capture"}, + /* 2nd half -- ASRC DAPM routes */ +- {"AC97 Playback", NULL, "ASRC-Playback"}, +- {"ASRC-Capture", NULL, "AC97 Capture"}, ++ {"CPU AC97 Playback", NULL, "ASRC-Playback"}, ++ {"ASRC-Capture", NULL, "CPU AC97 Capture"}, + }; + + static const struct snd_soc_dapm_route audio_map_tx[] = { @@ -540,6 +540,7 @@ static int fsl_asoc_card_probe(struct platform_device *pdev) struct device *codec_dev = NULL; const char *codec_dai_name; @@ -426813,7 +547882,7 @@ index 06107ae46e20b..c72a156737e61 100644 } diff --git a/sound/soc/fsl/fsl_asrc.c b/sound/soc/fsl/fsl_asrc.c -index 24b41881a68f8..44dcbf49456cb 100644 +index 24b41881a68f8..08ca410ef551b 100644 --- a/sound/soc/fsl/fsl_asrc.c +++ b/sound/soc/fsl/fsl_asrc.c @@ -19,6 +19,7 @@ @@ -426968,6 +548037,15 @@ index 24b41881a68f8..44dcbf49456cb 100644 dev_warn(&pdev->dev, "unsupported width, use default S24_LE\n"); asrc->asrc_format = SNDRV_PCM_FORMAT_S24_LE; } +@@ -1177,7 +1226,7 @@ static int fsl_asrc_probe(struct platform_device *pdev) + } + + ret = pm_runtime_put_sync(&pdev->dev); +- if (ret < 0) ++ if (ret < 0 && ret != -ENOSYS) + goto err_pm_get_sync; + + ret = devm_snd_soc_register_component(&pdev->dev, &fsl_asrc_component, diff --git a/sound/soc/fsl/fsl_easrc.c b/sound/soc/fsl/fsl_easrc.c index be14f84796cb4..cf0e10d17dbe3 100644 --- a/sound/soc/fsl/fsl_easrc.c @@ -427020,6 +548098,79 @@ index 30620d56252cc..5b8469757c122 100644 unsigned int norm_rate; }; +diff --git a/sound/soc/fsl/fsl_esai.c b/sound/soc/fsl/fsl_esai.c +index bda66b30e063c..763f5f0592af1 100644 +--- a/sound/soc/fsl/fsl_esai.c ++++ b/sound/soc/fsl/fsl_esai.c +@@ -1070,7 +1070,7 @@ static int fsl_esai_probe(struct platform_device *pdev) + regmap_write(esai_priv->regmap, REG_ESAI_RSMB, 0); + + ret = pm_runtime_put_sync(&pdev->dev); +- if (ret < 0) ++ if (ret < 0 && ret != -ENOSYS) + goto err_pm_get_sync; + + /* +diff --git a/sound/soc/fsl/fsl_micfil.c b/sound/soc/fsl/fsl_micfil.c +index 9f90989ac59a6..38d4d1b7cfe39 100644 +--- a/sound/soc/fsl/fsl_micfil.c ++++ b/sound/soc/fsl/fsl_micfil.c +@@ -88,21 +88,21 @@ static DECLARE_TLV_DB_SCALE(gain_tlv, 0, 100, 0); + + static const struct snd_kcontrol_new fsl_micfil_snd_controls[] = { + SOC_SINGLE_SX_TLV("CH0 Volume", REG_MICFIL_OUT_CTRL, +- MICFIL_OUTGAIN_CHX_SHIFT(0), 0xF, 0x7, gain_tlv), ++ MICFIL_OUTGAIN_CHX_SHIFT(0), 0x8, 0xF, gain_tlv), + SOC_SINGLE_SX_TLV("CH1 Volume", REG_MICFIL_OUT_CTRL, +- MICFIL_OUTGAIN_CHX_SHIFT(1), 0xF, 0x7, gain_tlv), ++ MICFIL_OUTGAIN_CHX_SHIFT(1), 0x8, 0xF, gain_tlv), + SOC_SINGLE_SX_TLV("CH2 Volume", REG_MICFIL_OUT_CTRL, +- MICFIL_OUTGAIN_CHX_SHIFT(2), 0xF, 0x7, gain_tlv), ++ MICFIL_OUTGAIN_CHX_SHIFT(2), 0x8, 0xF, gain_tlv), + SOC_SINGLE_SX_TLV("CH3 Volume", REG_MICFIL_OUT_CTRL, +- MICFIL_OUTGAIN_CHX_SHIFT(3), 0xF, 0x7, gain_tlv), ++ MICFIL_OUTGAIN_CHX_SHIFT(3), 0x8, 0xF, gain_tlv), + SOC_SINGLE_SX_TLV("CH4 Volume", REG_MICFIL_OUT_CTRL, +- MICFIL_OUTGAIN_CHX_SHIFT(4), 0xF, 0x7, gain_tlv), ++ MICFIL_OUTGAIN_CHX_SHIFT(4), 0x8, 0xF, gain_tlv), + SOC_SINGLE_SX_TLV("CH5 Volume", REG_MICFIL_OUT_CTRL, +- MICFIL_OUTGAIN_CHX_SHIFT(5), 0xF, 0x7, gain_tlv), ++ MICFIL_OUTGAIN_CHX_SHIFT(5), 0x8, 0xF, gain_tlv), + SOC_SINGLE_SX_TLV("CH6 Volume", REG_MICFIL_OUT_CTRL, +- MICFIL_OUTGAIN_CHX_SHIFT(6), 0xF, 0x7, gain_tlv), ++ MICFIL_OUTGAIN_CHX_SHIFT(6), 0x8, 0xF, gain_tlv), + SOC_SINGLE_SX_TLV("CH7 Volume", REG_MICFIL_OUT_CTRL, +- MICFIL_OUTGAIN_CHX_SHIFT(7), 0xF, 0x7, gain_tlv), ++ MICFIL_OUTGAIN_CHX_SHIFT(7), 0x8, 0xF, gain_tlv), + SOC_ENUM_EXT("MICFIL Quality Select", + fsl_micfil_quality_enum, + snd_soc_get_enum_double, snd_soc_put_enum_double), +@@ -191,6 +191,25 @@ static int fsl_micfil_reset(struct device *dev) + return ret; + } + ++ /* ++ * SRES is self-cleared bit, but REG_MICFIL_CTRL1 is defined ++ * as non-volatile register, so SRES still remain in regmap ++ * cache after set, that every update of REG_MICFIL_CTRL1, ++ * software reset happens. so clear it explicitly. ++ */ ++ ret = regmap_clear_bits(micfil->regmap, REG_MICFIL_CTRL1, ++ MICFIL_CTRL1_SRES); ++ if (ret) ++ return ret; ++ ++ /* ++ * Set SRES should clear CHnF flags, But even add delay here ++ * the CHnF may not be cleared sometimes, so clear CHnF explicitly. ++ */ ++ ret = regmap_write_bits(micfil->regmap, REG_MICFIL_STAT, 0xFF, 0xFF); ++ if (ret) ++ return ret; ++ + return 0; + } + diff --git a/sound/soc/fsl/fsl_mqs.c b/sound/soc/fsl/fsl_mqs.c index 69aeb0e71844d..0d4efbed41dab 100644 --- a/sound/soc/fsl/fsl_mqs.c @@ -427030,6 +548181,171 @@ index 69aeb0e71844d..0d4efbed41dab 100644 MODULE_LICENSE("GPL v2"); -MODULE_ALIAS("platform: fsl-mqs"); +MODULE_ALIAS("platform:fsl-mqs"); +diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c +index 38f6362099d58..5ba06df2ace51 100644 +--- a/sound/soc/fsl/fsl_sai.c ++++ b/sound/soc/fsl/fsl_sai.c +@@ -1000,6 +1000,7 @@ static int fsl_sai_runtime_resume(struct device *dev); + static int fsl_sai_probe(struct platform_device *pdev) + { + struct device_node *np = pdev->dev.of_node; ++ struct device *dev = &pdev->dev; + struct fsl_sai *sai; + struct regmap *gpr; + struct resource *res; +@@ -1008,12 +1009,12 @@ static int fsl_sai_probe(struct platform_device *pdev) + int irq, ret, i; + int index; + +- sai = devm_kzalloc(&pdev->dev, sizeof(*sai), GFP_KERNEL); ++ sai = devm_kzalloc(dev, sizeof(*sai), GFP_KERNEL); + if (!sai) + return -ENOMEM; + + sai->pdev = pdev; +- sai->soc_data = of_device_get_match_data(&pdev->dev); ++ sai->soc_data = of_device_get_match_data(dev); + + sai->is_lsb_first = of_property_read_bool(np, "lsb-first"); + +@@ -1028,18 +1029,18 @@ static int fsl_sai_probe(struct platform_device *pdev) + ARRAY_SIZE(fsl_sai_reg_defaults_ofs8); + } + +- sai->regmap = devm_regmap_init_mmio(&pdev->dev, base, &fsl_sai_regmap_config); ++ sai->regmap = devm_regmap_init_mmio(dev, base, &fsl_sai_regmap_config); + if (IS_ERR(sai->regmap)) { +- dev_err(&pdev->dev, "regmap init failed\n"); ++ dev_err(dev, "regmap init failed\n"); + return PTR_ERR(sai->regmap); + } + +- sai->bus_clk = devm_clk_get(&pdev->dev, "bus"); ++ sai->bus_clk = devm_clk_get(dev, "bus"); + /* Compatible with old DTB cases */ + if (IS_ERR(sai->bus_clk) && PTR_ERR(sai->bus_clk) != -EPROBE_DEFER) +- sai->bus_clk = devm_clk_get(&pdev->dev, "sai"); ++ sai->bus_clk = devm_clk_get(dev, "sai"); + if (IS_ERR(sai->bus_clk)) { +- dev_err(&pdev->dev, "failed to get bus clock: %ld\n", ++ dev_err(dev, "failed to get bus clock: %ld\n", + PTR_ERR(sai->bus_clk)); + /* -EPROBE_DEFER */ + return PTR_ERR(sai->bus_clk); +@@ -1047,9 +1048,9 @@ static int fsl_sai_probe(struct platform_device *pdev) + + for (i = 1; i < FSL_SAI_MCLK_MAX; i++) { + sprintf(tmp, "mclk%d", i); +- sai->mclk_clk[i] = devm_clk_get(&pdev->dev, tmp); ++ sai->mclk_clk[i] = devm_clk_get(dev, tmp); + if (IS_ERR(sai->mclk_clk[i])) { +- dev_err(&pdev->dev, "failed to get mclk%d clock: %ld\n", ++ dev_err(dev, "failed to get mclk%d clock: %ld\n", + i + 1, PTR_ERR(sai->mclk_clk[i])); + sai->mclk_clk[i] = NULL; + } +@@ -1064,10 +1065,10 @@ static int fsl_sai_probe(struct platform_device *pdev) + if (irq < 0) + return irq; + +- ret = devm_request_irq(&pdev->dev, irq, fsl_sai_isr, IRQF_SHARED, ++ ret = devm_request_irq(dev, irq, fsl_sai_isr, IRQF_SHARED, + np->name, sai); + if (ret) { +- dev_err(&pdev->dev, "failed to claim irq %u\n", irq); ++ dev_err(dev, "failed to claim irq %u\n", irq); + return ret; + } + +@@ -1084,7 +1085,7 @@ static int fsl_sai_probe(struct platform_device *pdev) + if (of_find_property(np, "fsl,sai-synchronous-rx", NULL) && + of_find_property(np, "fsl,sai-asynchronous", NULL)) { + /* error out if both synchronous and asynchronous are present */ +- dev_err(&pdev->dev, "invalid binding for synchronous mode\n"); ++ dev_err(dev, "invalid binding for synchronous mode\n"); + return -EINVAL; + } + +@@ -1105,7 +1106,7 @@ static int fsl_sai_probe(struct platform_device *pdev) + of_device_is_compatible(np, "fsl,imx6ul-sai")) { + gpr = syscon_regmap_lookup_by_compatible("fsl,imx6ul-iomuxc-gpr"); + if (IS_ERR(gpr)) { +- dev_err(&pdev->dev, "cannot find iomuxc registers\n"); ++ dev_err(dev, "cannot find iomuxc registers\n"); + return PTR_ERR(gpr); + } + +@@ -1123,23 +1124,23 @@ static int fsl_sai_probe(struct platform_device *pdev) + sai->dma_params_tx.maxburst = FSL_SAI_MAXBURST_TX; + + platform_set_drvdata(pdev, sai); +- pm_runtime_enable(&pdev->dev); +- if (!pm_runtime_enabled(&pdev->dev)) { +- ret = fsl_sai_runtime_resume(&pdev->dev); ++ pm_runtime_enable(dev); ++ if (!pm_runtime_enabled(dev)) { ++ ret = fsl_sai_runtime_resume(dev); + if (ret) + goto err_pm_disable; + } + +- ret = pm_runtime_get_sync(&pdev->dev); ++ ret = pm_runtime_get_sync(dev); + if (ret < 0) { +- pm_runtime_put_noidle(&pdev->dev); ++ pm_runtime_put_noidle(dev); + goto err_pm_get_sync; + } + + /* Get sai version */ +- ret = fsl_sai_check_version(&pdev->dev); ++ ret = fsl_sai_check_version(dev); + if (ret < 0) +- dev_warn(&pdev->dev, "Error reading SAI version: %d\n", ret); ++ dev_warn(dev, "Error reading SAI version: %d\n", ret); + + /* Select MCLK direction */ + if (of_find_property(np, "fsl,sai-mclk-direction-output", NULL) && +@@ -1148,8 +1149,8 @@ static int fsl_sai_probe(struct platform_device *pdev) + FSL_SAI_MCTL_MCLK_EN, FSL_SAI_MCTL_MCLK_EN); + } + +- ret = pm_runtime_put_sync(&pdev->dev); +- if (ret < 0) ++ ret = pm_runtime_put_sync(dev); ++ if (ret < 0 && ret != -ENOSYS) + goto err_pm_get_sync; + + /* +@@ -1161,12 +1162,12 @@ static int fsl_sai_probe(struct platform_device *pdev) + if (ret) + goto err_pm_get_sync; + } else { +- ret = devm_snd_dmaengine_pcm_register(&pdev->dev, NULL, 0); ++ ret = devm_snd_dmaengine_pcm_register(dev, NULL, 0); + if (ret) + goto err_pm_get_sync; + } + +- ret = devm_snd_soc_register_component(&pdev->dev, &fsl_component, ++ ret = devm_snd_soc_register_component(dev, &fsl_component, + &sai->cpu_dai_drv, 1); + if (ret) + goto err_pm_get_sync; +@@ -1174,10 +1175,10 @@ static int fsl_sai_probe(struct platform_device *pdev) + return ret; + + err_pm_get_sync: +- if (!pm_runtime_status_suspended(&pdev->dev)) +- fsl_sai_runtime_suspend(&pdev->dev); ++ if (!pm_runtime_status_suspended(dev)) ++ fsl_sai_runtime_suspend(dev); + err_pm_disable: +- pm_runtime_disable(&pdev->dev); ++ pm_runtime_disable(dev); + + return ret; + } diff --git a/sound/soc/fsl/fsl_sai.h b/sound/soc/fsl/fsl_sai.h index bc60030967dd8..f471467dfb3e4 100644 --- a/sound/soc/fsl/fsl_sai.h @@ -427058,6 +548374,27 @@ index 1c53719bb61e2..8b5c3ba48516c 100644 } else { scr = SCR_RXFIFO_OFF | SCR_RXFIFO_CTL_ZERO; mask = SCR_RXFIFO_FSEL_MASK | SCR_RXFIFO_AUTOSYNC_MASK| +diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c +index ecbc1c365d5b1..0c73c2e9dce0c 100644 +--- a/sound/soc/fsl/fsl_ssi.c ++++ b/sound/soc/fsl/fsl_ssi.c +@@ -1160,14 +1160,14 @@ static struct snd_soc_dai_driver fsl_ssi_ac97_dai = { + .symmetric_channels = 1, + .probe = fsl_ssi_dai_probe, + .playback = { +- .stream_name = "AC97 Playback", ++ .stream_name = "CPU AC97 Playback", + .channels_min = 2, + .channels_max = 2, + .rates = SNDRV_PCM_RATE_8000_48000, + .formats = SNDRV_PCM_FMTBIT_S16 | SNDRV_PCM_FMTBIT_S20, + }, + .capture = { +- .stream_name = "AC97 Capture", ++ .stream_name = "CPU AC97 Capture", + .channels_min = 2, + .channels_max = 2, + .rates = SNDRV_PCM_RATE_48000, diff --git a/sound/soc/fsl/imx-audmux.c b/sound/soc/fsl/imx-audmux.c index dfa05d40b2764..a8e5e0f57faf9 100644 --- a/sound/soc/fsl/imx-audmux.c @@ -427400,7 +548737,7 @@ index af3c3b90c0aca..83b4a22bf15ac 100644 static int pcm030_fabric_remove(struct platform_device *op) diff --git a/sound/soc/generic/audio-graph-card.c b/sound/soc/generic/audio-graph-card.c -index 546f6fd0609e1..28cbcdb56857f 100644 +index 546f6fd0609e1..89814f68ff563 100644 --- a/sound/soc/generic/audio-graph-card.c +++ b/sound/soc/generic/audio-graph-card.c @@ -158,8 +158,10 @@ static int asoc_simple_parse_dai(struct device_node *ep, @@ -427415,6 +548752,18 @@ index 546f6fd0609e1..28cbcdb56857f 100644 dlc->of_node = node; +@@ -481,8 +483,10 @@ static int __graph_for_each_link(struct asoc_simple_priv *priv, + of_node_put(codec_ep); + of_node_put(codec_port); + +- if (ret < 0) ++ if (ret < 0) { ++ of_node_put(cpu_ep); + return ret; ++ } + + codec_port_old = codec_port; + } diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c index 10c63b73900c6..ffda8a38de3ed 100644 --- a/sound/soc/generic/simple-card-utils.c @@ -427502,11 +548851,200 @@ index a3a7990b5cb66..bc3e24c6a28a8 100644 if (ret < 0) goto dai_link_of_err; +diff --git a/sound/soc/intel/boards/bdw-rt5650.c b/sound/soc/intel/boards/bdw-rt5650.c +index c5122d3b0e6c4..7c8c2557d6850 100644 +--- a/sound/soc/intel/boards/bdw-rt5650.c ++++ b/sound/soc/intel/boards/bdw-rt5650.c +@@ -299,7 +299,7 @@ static int bdw_rt5650_probe(struct platform_device *pdev) + if (!bdw_rt5650) + return -ENOMEM; + +- /* override plaform name, if required */ ++ /* override platform name, if required */ + mach = pdev->dev.platform_data; + ret = snd_soc_fixup_dai_links_platform_name(&bdw_rt5650_card, + mach->mach_params.platform); +diff --git a/sound/soc/intel/boards/bdw-rt5677.c b/sound/soc/intel/boards/bdw-rt5677.c +index e01b7a90ca6c7..e990940179095 100644 +--- a/sound/soc/intel/boards/bdw-rt5677.c ++++ b/sound/soc/intel/boards/bdw-rt5677.c +@@ -426,7 +426,7 @@ static int bdw_rt5677_probe(struct platform_device *pdev) + if (!bdw_rt5677) + return -ENOMEM; + +- /* override plaform name, if required */ ++ /* override platform name, if required */ + mach = pdev->dev.platform_data; + ret = snd_soc_fixup_dai_links_platform_name(&bdw_rt5677_card, + mach->mach_params.platform); +diff --git a/sound/soc/intel/boards/broadwell.c b/sound/soc/intel/boards/broadwell.c +index 3c3aff9c61cc6..f18dcda23e74b 100644 +--- a/sound/soc/intel/boards/broadwell.c ++++ b/sound/soc/intel/boards/broadwell.c +@@ -292,7 +292,7 @@ static int broadwell_audio_probe(struct platform_device *pdev) + + broadwell_rt286.dev = &pdev->dev; + +- /* override plaform name, if required */ ++ /* override platform name, if required */ + mach = pdev->dev.platform_data; + ret = snd_soc_fixup_dai_links_platform_name(&broadwell_rt286, + mach->mach_params.platform); +diff --git a/sound/soc/intel/boards/bxt_da7219_max98357a.c b/sound/soc/intel/boards/bxt_da7219_max98357a.c +index e67ddfb8e4690..e49c64f54a12c 100644 +--- a/sound/soc/intel/boards/bxt_da7219_max98357a.c ++++ b/sound/soc/intel/boards/bxt_da7219_max98357a.c +@@ -825,7 +825,7 @@ static int broxton_audio_probe(struct platform_device *pdev) + } + } + +- /* override plaform name, if required */ ++ /* override platform name, if required */ + mach = pdev->dev.platform_data; + platform_name = mach->mach_params.platform; + +diff --git a/sound/soc/intel/boards/bxt_rt298.c b/sound/soc/intel/boards/bxt_rt298.c +index 47f6b1523ae6b..0d1df37ecea0b 100644 +--- a/sound/soc/intel/boards/bxt_rt298.c ++++ b/sound/soc/intel/boards/bxt_rt298.c +@@ -628,7 +628,7 @@ static int broxton_audio_probe(struct platform_device *pdev) + card->dev = &pdev->dev; + snd_soc_card_set_drvdata(card, ctx); + +- /* override plaform name, if required */ ++ /* override platform name, if required */ + mach = pdev->dev.platform_data; + platform_name = mach->mach_params.platform; + +diff --git a/sound/soc/intel/boards/bytcht_cx2072x.c b/sound/soc/intel/boards/bytcht_cx2072x.c +index a9e51bbf018c3..0fc57db6e92cb 100644 +--- a/sound/soc/intel/boards/bytcht_cx2072x.c ++++ b/sound/soc/intel/boards/bytcht_cx2072x.c +@@ -257,7 +257,7 @@ static int snd_byt_cht_cx2072x_probe(struct platform_device *pdev) + byt_cht_cx2072x_dais[dai_index].codecs->name = codec_name; + } + +- /* override plaform name, if required */ ++ /* override platform name, if required */ + ret = snd_soc_fixup_dai_links_platform_name(&byt_cht_cx2072x_card, + mach->mach_params.platform); + if (ret) +diff --git a/sound/soc/intel/boards/bytcht_da7213.c b/sound/soc/intel/boards/bytcht_da7213.c +index a28773fb7892b..21b6bebc9a26a 100644 +--- a/sound/soc/intel/boards/bytcht_da7213.c ++++ b/sound/soc/intel/boards/bytcht_da7213.c +@@ -260,7 +260,7 @@ static int bytcht_da7213_probe(struct platform_device *pdev) + dailink[dai_index].codecs->name = codec_name; + } + +- /* override plaform name, if required */ ++ /* override platform name, if required */ + platform_name = mach->mach_params.platform; + + ret_val = snd_soc_fixup_dai_links_platform_name(card, platform_name); +diff --git a/sound/soc/intel/boards/bytcht_es8316.c b/sound/soc/intel/boards/bytcht_es8316.c +index 4d313d0d0f23e..b5c97d35864a6 100644 +--- a/sound/soc/intel/boards/bytcht_es8316.c ++++ b/sound/soc/intel/boards/bytcht_es8316.c +@@ -443,6 +443,13 @@ static const struct dmi_system_id byt_cht_es8316_quirk_table[] = { + | BYT_CHT_ES8316_INTMIC_IN2_MAP + | BYT_CHT_ES8316_JD_INVERTED), + }, ++ { /* Nanote UMPC-01 */ ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "RWC CO.,LTD"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "UMPC-01"), ++ }, ++ .driver_data = (void *)BYT_CHT_ES8316_INTMIC_IN1_MAP, ++ }, + { /* Teclast X98 Plus II */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TECLAST"), +@@ -490,21 +497,28 @@ static int snd_byt_cht_es8316_mc_probe(struct platform_device *pdev) + if (adev) { + snprintf(codec_name, sizeof(codec_name), + "i2c-%s", acpi_dev_name(adev)); +- put_device(&adev->dev); + byt_cht_es8316_dais[dai_index].codecs->name = codec_name; + } else { + dev_err(dev, "Error cannot find '%s' dev\n", mach->id); + return -ENXIO; + } + +- /* override plaform name, if required */ ++ codec_dev = acpi_get_first_physical_node(adev); ++ acpi_dev_put(adev); ++ if (!codec_dev) ++ return -EPROBE_DEFER; ++ priv->codec_dev = get_device(codec_dev); ++ ++ /* override platform name, if required */ + byt_cht_es8316_card.dev = dev; + platform_name = mach->mach_params.platform; + + ret = snd_soc_fixup_dai_links_platform_name(&byt_cht_es8316_card, + platform_name); +- if (ret) ++ if (ret) { ++ put_device(codec_dev); + return ret; ++ } + + /* Check for BYTCR or other platform and setup quirks */ + dmi_id = dmi_first_match(byt_cht_es8316_quirk_table); +@@ -532,14 +546,10 @@ static int snd_byt_cht_es8316_mc_probe(struct platform_device *pdev) + + /* get the clock */ + priv->mclk = devm_clk_get(dev, "pmc_plt_clk_3"); +- if (IS_ERR(priv->mclk)) ++ if (IS_ERR(priv->mclk)) { ++ put_device(codec_dev); + return dev_err_probe(dev, PTR_ERR(priv->mclk), "clk_get pmc_plt_clk_3 failed\n"); +- +- /* get speaker enable GPIO */ +- codec_dev = acpi_get_first_physical_node(adev); +- if (!codec_dev) +- return -EPROBE_DEFER; +- priv->codec_dev = get_device(codec_dev); ++ } + + if (quirk & BYT_CHT_ES8316_JD_INVERTED) + props[cnt++] = PROPERTY_ENTRY_BOOL("everest,jack-detect-inverted"); +@@ -561,6 +571,7 @@ static int snd_byt_cht_es8316_mc_probe(struct platform_device *pdev) + } + } + ++ /* get speaker enable GPIO */ + devm_acpi_dev_add_driver_gpios(codec_dev, byt_cht_es8316_gpios); + priv->speaker_en_gpio = + gpiod_get_optional(codec_dev, "speaker-enable", diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c -index a6e837290c7dc..f9c82ebc552cf 100644 +index a6e837290c7dc..5f6e2bb324406 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c -@@ -759,6 +759,18 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { +@@ -570,6 +570,21 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { + BYT_RT5640_SSP0_AIF1 | + BYT_RT5640_MCLK_EN), + }, ++ { ++ /* Advantech MICA-071 */ ++ .matches = { ++ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Advantech"), ++ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "MICA-071"), ++ }, ++ /* OVCD Th = 1500uA to reliable detect head-phones vs -set */ ++ .driver_data = (void *)(BYT_RT5640_IN3_MAP | ++ BYT_RT5640_JD_SRC_JD2_IN4N | ++ BYT_RT5640_OVCD_TH_1500UA | ++ BYT_RT5640_OVCD_SF_0P75 | ++ BYT_RT5640_MONO_SPEAKER | ++ BYT_RT5640_DIFF_MIC | ++ BYT_RT5640_MCLK_EN), ++ }, + { + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ARCHOS"), +@@ -759,6 +774,18 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { BYT_RT5640_OVCD_SF_0P75 | BYT_RT5640_MCLK_EN), }, @@ -427525,10 +549063,91 @@ index a6e837290c7dc..f9c82ebc552cf 100644 { /* HP Stream 7 */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), +@@ -1534,13 +1561,18 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev) + if (adev) { + snprintf(byt_rt5640_codec_name, sizeof(byt_rt5640_codec_name), + "i2c-%s", acpi_dev_name(adev)); +- put_device(&adev->dev); + byt_rt5640_dais[dai_index].codecs->name = byt_rt5640_codec_name; + } else { + dev_err(&pdev->dev, "Error cannot find '%s' dev\n", mach->id); + return -ENXIO; + } + ++ codec_dev = acpi_get_first_physical_node(adev); ++ acpi_dev_put(adev); ++ if (!codec_dev) ++ return -EPROBE_DEFER; ++ priv->codec_dev = get_device(codec_dev); ++ + /* + * swap SSP0 if bytcr is detected + * (will be overridden if DMI quirk is detected) +@@ -1615,11 +1647,6 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev) + byt_rt5640_quirk = quirk_override; + } + +- codec_dev = acpi_get_first_physical_node(adev); +- if (!codec_dev) +- return -EPROBE_DEFER; +- priv->codec_dev = get_device(codec_dev); +- + if (byt_rt5640_quirk & BYT_RT5640_JD_HP_ELITEP_1000G2) { + acpi_dev_add_driver_gpios(ACPI_COMPANION(priv->codec_dev), + byt_rt5640_hp_elitepad_1000g2_gpios); +@@ -1706,7 +1733,7 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev) + byt_rt5640_card.long_name = byt_rt5640_long_name; + #endif + +- /* override plaform name, if required */ ++ /* override platform name, if required */ + platform_name = mach->mach_params.platform; + + ret_val = snd_soc_fixup_dai_links_platform_name(&byt_rt5640_card, +diff --git a/sound/soc/intel/boards/bytcr_rt5651.c b/sound/soc/intel/boards/bytcr_rt5651.c +index e94c9124d4f41..93cec4d916273 100644 +--- a/sound/soc/intel/boards/bytcr_rt5651.c ++++ b/sound/soc/intel/boards/bytcr_rt5651.c +@@ -930,7 +930,6 @@ static int snd_byt_rt5651_mc_probe(struct platform_device *pdev) + if (adev) { + snprintf(byt_rt5651_codec_name, sizeof(byt_rt5651_codec_name), + "i2c-%s", acpi_dev_name(adev)); +- put_device(&adev->dev); + byt_rt5651_dais[dai_index].codecs->name = byt_rt5651_codec_name; + } else { + dev_err(&pdev->dev, "Error cannot find '%s' dev\n", mach->id); +@@ -938,6 +937,7 @@ static int snd_byt_rt5651_mc_probe(struct platform_device *pdev) + } + + codec_dev = acpi_get_first_physical_node(adev); ++ acpi_dev_put(adev); + if (!codec_dev) + return -EPROBE_DEFER; + priv->codec_dev = get_device(codec_dev); +@@ -1104,7 +1104,7 @@ static int snd_byt_rt5651_mc_probe(struct platform_device *pdev) + byt_rt5651_card.long_name = byt_rt5651_long_name; + #endif + +- /* override plaform name, if required */ ++ /* override platform name, if required */ + platform_name = mach->mach_params.platform; + + ret_val = snd_soc_fixup_dai_links_platform_name(&byt_rt5651_card, diff --git a/sound/soc/intel/boards/bytcr_wm5102.c b/sound/soc/intel/boards/bytcr_wm5102.c -index 580d5fddae5ad..bb669d58eb8b3 100644 +index 580d5fddae5ad..9a4126f19d5f7 100644 --- a/sound/soc/intel/boards/bytcr_wm5102.c +++ b/sound/soc/intel/boards/bytcr_wm5102.c +@@ -411,9 +411,9 @@ static int snd_byt_wm5102_mc_probe(struct platform_device *pdev) + return -ENOENT; + } + snprintf(codec_name, sizeof(codec_name), "spi-%s", acpi_dev_name(adev)); +- put_device(&adev->dev); + + codec_dev = bus_find_device_by_name(&spi_bus_type, NULL, codec_name); ++ acpi_dev_put(adev); + if (!codec_dev) + return -EPROBE_DEFER; + @@ -421,8 +421,17 @@ static int snd_byt_wm5102_mc_probe(struct platform_device *pdev) priv->spkvdd_en_gpio = gpiod_get(codec_dev, "wlf,spkvdd-ena", GPIOD_OUT_LOW); put_device(codec_dev); @@ -427549,8 +549168,141 @@ index 580d5fddae5ad..bb669d58eb8b3 100644 /* override platform name, if required */ byt_wm5102_card.dev = dev; +diff --git a/sound/soc/intel/boards/cht_bsw_max98090_ti.c b/sound/soc/intel/boards/cht_bsw_max98090_ti.c +index 131882378a594..ba6de1e389cd8 100644 +--- a/sound/soc/intel/boards/cht_bsw_max98090_ti.c ++++ b/sound/soc/intel/boards/cht_bsw_max98090_ti.c +@@ -296,7 +296,7 @@ static int cht_max98090_headset_init(struct snd_soc_component *component) + int ret; + + /* +- * TI supports 4 butons headset detection ++ * TI supports 4 buttons headset detection + * KEY_MEDIA + * KEY_VOICECOMMAND + * KEY_VOLUMEUP +@@ -558,7 +558,7 @@ static int snd_cht_mc_probe(struct platform_device *pdev) + dev_dbg(dev, "Unable to add GPIO mapping table\n"); + } + +- /* override plaform name, if required */ ++ /* override platform name, if required */ + snd_soc_card_cht.dev = &pdev->dev; + mach = pdev->dev.platform_data; + platform_name = mach->mach_params.platform; +diff --git a/sound/soc/intel/boards/cht_bsw_nau8824.c b/sound/soc/intel/boards/cht_bsw_nau8824.c +index da5a5cbc87590..779b388db85d3 100644 +--- a/sound/soc/intel/boards/cht_bsw_nau8824.c ++++ b/sound/soc/intel/boards/cht_bsw_nau8824.c +@@ -100,7 +100,7 @@ static int cht_codec_init(struct snd_soc_pcm_runtime *runtime) + struct snd_soc_component *component = codec_dai->component; + int ret, jack_type; + +- /* NAU88L24 supports 4 butons headset detection ++ /* NAU88L24 supports 4 buttons headset detection + * KEY_PLAYPAUSE + * KEY_VOICECOMMAND + * KEY_VOLUMEUP +@@ -257,7 +257,7 @@ static int snd_cht_mc_probe(struct platform_device *pdev) + return -ENOMEM; + snd_soc_card_set_drvdata(&snd_soc_card_cht, drv); + +- /* override plaform name, if required */ ++ /* override platform name, if required */ + snd_soc_card_cht.dev = &pdev->dev; + mach = pdev->dev.platform_data; + platform_name = mach->mach_params.platform; +diff --git a/sound/soc/intel/boards/cht_bsw_rt5645.c b/sound/soc/intel/boards/cht_bsw_rt5645.c +index 804dbc7911d50..381bf6054047f 100644 +--- a/sound/soc/intel/boards/cht_bsw_rt5645.c ++++ b/sound/soc/intel/boards/cht_bsw_rt5645.c +@@ -653,7 +653,7 @@ static int snd_cht_mc_probe(struct platform_device *pdev) + (cht_rt5645_quirk & CHT_RT5645_SSP0_AIF2)) + cht_dailink[dai_index].cpus->dai_name = "ssp0-port"; + +- /* override plaform name, if required */ ++ /* override platform name, if required */ + platform_name = mach->mach_params.platform; + + ret_val = snd_soc_fixup_dai_links_platform_name(card, +diff --git a/sound/soc/intel/boards/cht_bsw_rt5672.c b/sound/soc/intel/boards/cht_bsw_rt5672.c +index 9509b6e161b89..ba96741c7771b 100644 +--- a/sound/soc/intel/boards/cht_bsw_rt5672.c ++++ b/sound/soc/intel/boards/cht_bsw_rt5672.c +@@ -483,7 +483,7 @@ static int snd_cht_mc_probe(struct platform_device *pdev) + drv->use_ssp0 = true; + } + +- /* override plaform name, if required */ ++ /* override platform name, if required */ + snd_soc_card_cht.dev = &pdev->dev; + platform_name = mach->mach_params.platform; + +diff --git a/sound/soc/intel/boards/glk_rt5682_max98357a.c b/sound/soc/intel/boards/glk_rt5682_max98357a.c +index 71fe26a1b7011..99b3d7642cb77 100644 +--- a/sound/soc/intel/boards/glk_rt5682_max98357a.c ++++ b/sound/soc/intel/boards/glk_rt5682_max98357a.c +@@ -604,7 +604,7 @@ static int geminilake_audio_probe(struct platform_device *pdev) + card->dev = &pdev->dev; + snd_soc_card_set_drvdata(card, ctx); + +- /* override plaform name, if required */ ++ /* override platform name, if required */ + mach = pdev->dev.platform_data; + platform_name = mach->mach_params.platform; + +diff --git a/sound/soc/intel/boards/haswell.c b/sound/soc/intel/boards/haswell.c +index c763bfeb1f38f..b5ca3177be6a3 100644 +--- a/sound/soc/intel/boards/haswell.c ++++ b/sound/soc/intel/boards/haswell.c +@@ -175,7 +175,7 @@ static int haswell_audio_probe(struct platform_device *pdev) + + haswell_rt5640.dev = &pdev->dev; + +- /* override plaform name, if required */ ++ /* override platform name, if required */ + mach = pdev->dev.platform_data; + ret = snd_soc_fixup_dai_links_platform_name(&haswell_rt5640, + mach->mach_params.platform); +diff --git a/sound/soc/intel/boards/sof_cs42l42.c b/sound/soc/intel/boards/sof_cs42l42.c +index ce78c18798876..8061082d9fbf3 100644 +--- a/sound/soc/intel/boards/sof_cs42l42.c ++++ b/sound/soc/intel/boards/sof_cs42l42.c +@@ -311,6 +311,9 @@ static int create_spk_amp_dai_links(struct device *dev, + links[*id].platforms = platform_component; + links[*id].num_platforms = ARRAY_SIZE(platform_component); + links[*id].dpcm_playback = 1; ++ /* firmware-generated echo reference */ ++ links[*id].dpcm_capture = 1; ++ + links[*id].no_pcm = 1; + links[*id].cpus = &cpus[*id]; + links[*id].num_cpus = 1; +diff --git a/sound/soc/intel/boards/sof_rt5682.c b/sound/soc/intel/boards/sof_rt5682.c +index f096bd6d69be7..d0ce2f06b30c6 100644 +--- a/sound/soc/intel/boards/sof_rt5682.c ++++ b/sound/soc/intel/boards/sof_rt5682.c +@@ -737,8 +737,6 @@ static struct snd_soc_dai_link *sof_card_dai_links_create(struct device *dev, + links[id].num_codecs = ARRAY_SIZE(max_98373_components); + links[id].init = max_98373_spk_codec_init; + links[id].ops = &max_98373_ops; +- /* feedback stream */ +- links[id].dpcm_capture = 1; + } else if (sof_rt5682_quirk & + SOF_MAX98360A_SPEAKER_AMP_PRESENT) { + max_98360a_dai_link(&links[id]); +@@ -751,6 +749,9 @@ static struct snd_soc_dai_link *sof_card_dai_links_create(struct device *dev, + links[id].platforms = platform_component; + links[id].num_platforms = ARRAY_SIZE(platform_component); + links[id].dpcm_playback = 1; ++ /* feedback stream or firmware-generated echo reference */ ++ links[id].dpcm_capture = 1; ++ + links[id].no_pcm = 1; + links[id].cpus = &cpus[id]; + links[id].num_cpus = 1; diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c -index 6b06248a9327a..abe39a0ef14b0 100644 +index 6b06248a9327a..2d53a707aff9c 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -184,11 +184,11 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { @@ -427567,7 +549319,25 @@ index 6b06248a9327a..abe39a0ef14b0 100644 }, { /* NUC15 'Bishop County' LAPBC510 and LAPBC710 skews */ -@@ -213,6 +213,16 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { +@@ -201,6 +201,17 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { + SOF_SDW_PCH_DMIC | + RT711_JD1), + }, ++ { ++ /* NUC15 LAPBC710 skews */ ++ .callback = sof_sdw_quirk_cb, ++ .matches = { ++ DMI_MATCH(DMI_BOARD_VENDOR, "Intel Corporation"), ++ DMI_MATCH(DMI_BOARD_NAME, "LAPBC710"), ++ }, ++ .driver_data = (void *)(SOF_SDW_TGL_HDMI | ++ SOF_SDW_PCH_DMIC | ++ RT711_JD1), ++ }, + /* TigerLake-SDCA devices */ + { + .callback = sof_sdw_quirk_cb, +@@ -213,6 +224,16 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { SOF_RT715_DAI_ID_FIX | SOF_SDW_FOUR_SPK), }, @@ -427584,7 +549354,34 @@ index 6b06248a9327a..abe39a0ef14b0 100644 /* AlderLake devices */ { .callback = sof_sdw_quirk_cb, -@@ -1313,6 +1323,33 @@ static struct snd_soc_card card_sof_sdw = { +@@ -270,7 +291,7 @@ int sdw_prepare(struct snd_pcm_substream *substream) + /* Find stream from first CPU DAI */ + dai = asoc_rtd_to_cpu(rtd, 0); + +- sdw_stream = snd_soc_dai_get_sdw_stream(dai, substream->stream); ++ sdw_stream = snd_soc_dai_get_stream(dai, substream->stream); + + if (IS_ERR(sdw_stream)) { + dev_err(rtd->dev, "no stream found for DAI %s", dai->name); +@@ -290,7 +311,7 @@ int sdw_trigger(struct snd_pcm_substream *substream, int cmd) + /* Find stream from first CPU DAI */ + dai = asoc_rtd_to_cpu(rtd, 0); + +- sdw_stream = snd_soc_dai_get_sdw_stream(dai, substream->stream); ++ sdw_stream = snd_soc_dai_get_stream(dai, substream->stream); + + if (IS_ERR(sdw_stream)) { + dev_err(rtd->dev, "no stream found for DAI %s", dai->name); +@@ -329,7 +350,7 @@ int sdw_hw_free(struct snd_pcm_substream *substream) + /* Find stream from first CPU DAI */ + dai = asoc_rtd_to_cpu(rtd, 0); + +- sdw_stream = snd_soc_dai_get_sdw_stream(dai, substream->stream); ++ sdw_stream = snd_soc_dai_get_stream(dai, substream->stream); + + if (IS_ERR(sdw_stream)) { + dev_err(rtd->dev, "no stream found for DAI %s", dai->name); +@@ -1313,6 +1334,33 @@ static struct snd_soc_card card_sof_sdw = { .late_probe = sof_sdw_card_late_probe, }; @@ -427618,7 +549415,7 @@ index 6b06248a9327a..abe39a0ef14b0 100644 static int mc_probe(struct platform_device *pdev) { struct snd_soc_card *card = &card_sof_sdw; -@@ -1377,6 +1414,7 @@ static int mc_probe(struct platform_device *pdev) +@@ -1377,6 +1425,7 @@ static int mc_probe(struct platform_device *pdev) ret = devm_snd_soc_register_card(&pdev->dev, card); if (ret) { dev_err(card->dev, "snd_soc_register_card failed %d\n", ret); @@ -427626,7 +549423,7 @@ index 6b06248a9327a..abe39a0ef14b0 100644 return ret; } -@@ -1388,29 +1426,8 @@ static int mc_probe(struct platform_device *pdev) +@@ -1388,29 +1437,8 @@ static int mc_probe(struct platform_device *pdev) static int mc_remove(struct platform_device *pdev) { struct snd_soc_card *card = platform_get_drvdata(pdev); @@ -427887,10 +549684,24 @@ index 64226072f0ee2..74f60f5dfaefd 100644 } diff --git a/sound/soc/intel/skylake/skl-pcm.c b/sound/soc/intel/skylake/skl-pcm.c -index 9ecaf6a1e8475..e4aa366d356eb 100644 +index 9ecaf6a1e8475..db41bd7170650 100644 --- a/sound/soc/intel/skylake/skl-pcm.c +++ b/sound/soc/intel/skylake/skl-pcm.c -@@ -1251,7 +1251,6 @@ static int skl_platform_soc_get_time_info( +@@ -562,11 +562,8 @@ static int skl_link_hw_params(struct snd_pcm_substream *substream, + + stream_tag = hdac_stream(link_dev)->stream_tag; + +- /* set the stream tag in the codec dai dma params */ +- if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) +- snd_soc_dai_set_tdm_slot(codec_dai, stream_tag, 0, 0, 0); +- else +- snd_soc_dai_set_tdm_slot(codec_dai, 0, stream_tag, 0, 0); ++ /* set the hdac_stream in the codec dai */ ++ snd_soc_dai_set_stream(codec_dai, hdac_stream(link_dev), substream->stream); + + p_params.s_fmt = snd_pcm_format_width(params_format(params)); + p_params.ch = params_channels(params); +@@ -1251,7 +1248,6 @@ static int skl_platform_soc_get_time_info( snd_pcm_gettime(substream->runtime, system_ts); nsec = timecounter_read(&hstr->tc); @@ -427898,6 +549709,129 @@ index 9ecaf6a1e8475..e4aa366d356eb 100644 if (audio_tstamp_config->report_delay) nsec = skl_adjust_codec_delay(substream, nsec); +diff --git a/sound/soc/intel/skylake/skl.c b/sound/soc/intel/skylake/skl.c +index 5b1a15e399123..46bb3b8bd5afe 100644 +--- a/sound/soc/intel/skylake/skl.c ++++ b/sound/soc/intel/skylake/skl.c +@@ -439,7 +439,7 @@ static int skl_free(struct hdac_bus *bus) + + skl->init_done = 0; /* to be sure */ + +- snd_hdac_ext_stop_streams(bus); ++ snd_hdac_stop_streams_and_chip(bus); + + if (bus->irq >= 0) + free_irq(bus->irq, (void *)bus); +@@ -1096,7 +1096,10 @@ static void skl_shutdown(struct pci_dev *pci) + if (!skl->init_done) + return; + +- snd_hdac_ext_stop_streams(bus); ++ snd_hdac_stop_streams(bus); ++ snd_hdac_ext_bus_link_power_down_all(bus); ++ skl_dsp_sleep(skl->dsp); ++ + list_for_each_entry(s, &bus->stream_list, list) { + stream = stream_to_hdac_ext_stream(s); + snd_hdac_ext_stream_decouple(bus, stream, false); +diff --git a/sound/soc/jz4740/jz4740-i2s.c b/sound/soc/jz4740/jz4740-i2s.c +index 7ad5d9a924d80..4e1fc4ba5150a 100644 +--- a/sound/soc/jz4740/jz4740-i2s.c ++++ b/sound/soc/jz4740/jz4740-i2s.c +@@ -56,7 +56,8 @@ + #define JZ_AIC_CTRL_MONO_TO_STEREO BIT(11) + #define JZ_AIC_CTRL_SWITCH_ENDIANNESS BIT(10) + #define JZ_AIC_CTRL_SIGNED_TO_UNSIGNED BIT(9) +-#define JZ_AIC_CTRL_FLUSH BIT(8) ++#define JZ_AIC_CTRL_TFLUSH BIT(8) ++#define JZ_AIC_CTRL_RFLUSH BIT(7) + #define JZ_AIC_CTRL_ENABLE_ROR_INT BIT(6) + #define JZ_AIC_CTRL_ENABLE_TUR_INT BIT(5) + #define JZ_AIC_CTRL_ENABLE_RFS_INT BIT(4) +@@ -91,6 +92,8 @@ enum jz47xx_i2s_version { + struct i2s_soc_info { + enum jz47xx_i2s_version version; + struct snd_soc_dai_driver *dai; ++ ++ bool shared_fifo_flush; + }; + + struct jz4740_i2s { +@@ -119,19 +122,44 @@ static inline void jz4740_i2s_write(const struct jz4740_i2s *i2s, + writel(value, i2s->base + reg); + } + ++static inline void jz4740_i2s_set_bits(const struct jz4740_i2s *i2s, ++ unsigned int reg, uint32_t bits) ++{ ++ uint32_t value = jz4740_i2s_read(i2s, reg); ++ value |= bits; ++ jz4740_i2s_write(i2s, reg, value); ++} ++ + static int jz4740_i2s_startup(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) + { + struct jz4740_i2s *i2s = snd_soc_dai_get_drvdata(dai); +- uint32_t conf, ctrl; ++ uint32_t conf; + int ret; + ++ /* ++ * When we can flush FIFOs independently, only flush the FIFO ++ * that is starting up. We can do this when the DAI is active ++ * because it does not disturb other active substreams. ++ */ ++ if (!i2s->soc_info->shared_fifo_flush) { ++ if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) ++ jz4740_i2s_set_bits(i2s, JZ_REG_AIC_CTRL, JZ_AIC_CTRL_TFLUSH); ++ else ++ jz4740_i2s_set_bits(i2s, JZ_REG_AIC_CTRL, JZ_AIC_CTRL_RFLUSH); ++ } ++ + if (snd_soc_dai_active(dai)) + return 0; + +- ctrl = jz4740_i2s_read(i2s, JZ_REG_AIC_CTRL); +- ctrl |= JZ_AIC_CTRL_FLUSH; +- jz4740_i2s_write(i2s, JZ_REG_AIC_CTRL, ctrl); ++ /* ++ * When there is a shared flush bit for both FIFOs, the TFLUSH ++ * bit flushes both FIFOs. Flushing while the DAI is active would ++ * cause FIFO underruns in other active substreams so we have to ++ * guard this behind the snd_soc_dai_active() check. ++ */ ++ if (i2s->soc_info->shared_fifo_flush) ++ jz4740_i2s_set_bits(i2s, JZ_REG_AIC_CTRL, JZ_AIC_CTRL_TFLUSH); + + ret = clk_prepare_enable(i2s->clk_i2s); + if (ret) +@@ -462,6 +490,7 @@ static struct snd_soc_dai_driver jz4740_i2s_dai = { + static const struct i2s_soc_info jz4740_i2s_soc_info = { + .version = JZ_I2S_JZ4740, + .dai = &jz4740_i2s_dai, ++ .shared_fifo_flush = true, + }; + + static const struct i2s_soc_info jz4760_i2s_soc_info = { +diff --git a/sound/soc/mediatek/common/mtk-btcvsd.c b/sound/soc/mediatek/common/mtk-btcvsd.c +index d884bb7c0fc74..1c28b41e43112 100644 +--- a/sound/soc/mediatek/common/mtk-btcvsd.c ++++ b/sound/soc/mediatek/common/mtk-btcvsd.c +@@ -1038,11 +1038,9 @@ static int mtk_pcm_btcvsd_copy(struct snd_soc_component *component, + struct mtk_btcvsd_snd *bt = snd_soc_component_get_drvdata(component); + + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) +- mtk_btcvsd_snd_write(bt, buf, count); ++ return mtk_btcvsd_snd_write(bt, buf, count); + else +- mtk_btcvsd_snd_read(bt, buf, count); +- +- return 0; ++ return mtk_btcvsd_snd_read(bt, buf, count); + } + + /* kcontrol */ diff --git a/sound/soc/mediatek/mt2701/mt2701-wm8960.c b/sound/soc/mediatek/mt2701/mt2701-wm8960.c index 414e422c0eba0..70e494fb3da87 100644 --- a/sound/soc/mediatek/mt2701/mt2701-wm8960.c @@ -427956,6 +549890,118 @@ index 496f32bcfb5e3..d2f6213a6bfcc 100644 return ret; } +diff --git a/sound/soc/mediatek/mt8173/mt8173-afe-pcm.c b/sound/soc/mediatek/mt8173/mt8173-afe-pcm.c +index 6350390414d4a..8092506facbd9 100644 +--- a/sound/soc/mediatek/mt8173/mt8173-afe-pcm.c ++++ b/sound/soc/mediatek/mt8173/mt8173-afe-pcm.c +@@ -1054,6 +1054,7 @@ static int mt8173_afe_pcm_dev_probe(struct platform_device *pdev) + int irq_id; + struct mtk_base_afe *afe; + struct mt8173_afe_private *afe_priv; ++ struct snd_soc_component *comp_pcm, *comp_hdmi; + + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(33)); + if (ret) +@@ -1071,16 +1072,6 @@ static int mt8173_afe_pcm_dev_probe(struct platform_device *pdev) + + afe->dev = &pdev->dev; + +- irq_id = platform_get_irq(pdev, 0); +- if (irq_id <= 0) +- return irq_id < 0 ? irq_id : -ENXIO; +- ret = devm_request_irq(afe->dev, irq_id, mt8173_afe_irq_handler, +- 0, "Afe_ISR_Handle", (void *)afe); +- if (ret) { +- dev_err(afe->dev, "could not request_irq\n"); +- return ret; +- } +- + afe->base_addr = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(afe->base_addr)) + return PTR_ERR(afe->base_addr); +@@ -1142,23 +1133,65 @@ static int mt8173_afe_pcm_dev_probe(struct platform_device *pdev) + if (ret) + goto err_pm_disable; + +- ret = devm_snd_soc_register_component(&pdev->dev, +- &mt8173_afe_pcm_dai_component, +- mt8173_afe_pcm_dais, +- ARRAY_SIZE(mt8173_afe_pcm_dais)); ++ comp_pcm = devm_kzalloc(&pdev->dev, sizeof(*comp_pcm), GFP_KERNEL); ++ if (!comp_pcm) { ++ ret = -ENOMEM; ++ goto err_pm_disable; ++ } ++ ++ ret = snd_soc_component_initialize(comp_pcm, ++ &mt8173_afe_pcm_dai_component, ++ &pdev->dev); + if (ret) + goto err_pm_disable; + +- ret = devm_snd_soc_register_component(&pdev->dev, +- &mt8173_afe_hdmi_dai_component, +- mt8173_afe_hdmi_dais, +- ARRAY_SIZE(mt8173_afe_hdmi_dais)); ++#ifdef CONFIG_DEBUG_FS ++ comp_pcm->debugfs_prefix = "pcm"; ++#endif ++ ++ ret = snd_soc_add_component(comp_pcm, ++ mt8173_afe_pcm_dais, ++ ARRAY_SIZE(mt8173_afe_pcm_dais)); ++ if (ret) ++ goto err_pm_disable; ++ ++ comp_hdmi = devm_kzalloc(&pdev->dev, sizeof(*comp_hdmi), GFP_KERNEL); ++ if (!comp_hdmi) { ++ ret = -ENOMEM; ++ goto err_pm_disable; ++ } ++ ++ ret = snd_soc_component_initialize(comp_hdmi, ++ &mt8173_afe_hdmi_dai_component, ++ &pdev->dev); + if (ret) + goto err_pm_disable; + ++#ifdef CONFIG_DEBUG_FS ++ comp_hdmi->debugfs_prefix = "hdmi"; ++#endif ++ ++ ret = snd_soc_add_component(comp_hdmi, ++ mt8173_afe_hdmi_dais, ++ ARRAY_SIZE(mt8173_afe_hdmi_dais)); ++ if (ret) ++ goto err_cleanup_components; ++ ++ irq_id = platform_get_irq(pdev, 0); ++ if (irq_id <= 0) ++ return irq_id < 0 ? irq_id : -ENXIO; ++ ret = devm_request_irq(afe->dev, irq_id, mt8173_afe_irq_handler, ++ 0, "Afe_ISR_Handle", (void *)afe); ++ if (ret) { ++ dev_err(afe->dev, "could not request_irq\n"); ++ goto err_pm_disable; ++ } ++ + dev_info(&pdev->dev, "MT8173 AFE driver initialized.\n"); + return 0; + ++err_cleanup_components: ++ snd_soc_unregister_component(&pdev->dev); + err_pm_disable: + pm_runtime_disable(&pdev->dev); + return ret; +@@ -1166,6 +1199,8 @@ err_pm_disable: + + static int mt8173_afe_pcm_dev_remove(struct platform_device *pdev) + { ++ snd_soc_unregister_component(&pdev->dev); ++ + pm_runtime_disable(&pdev->dev); + if (!pm_runtime_status_suspended(&pdev->dev)) + mt8173_afe_runtime_suspend(&pdev->dev); diff --git a/sound/soc/mediatek/mt8173/mt8173-max98090.c b/sound/soc/mediatek/mt8173/mt8173-max98090.c index fc94314bfc02f..5f39e810e27ae 100644 --- a/sound/soc/mediatek/mt8173/mt8173-max98090.c @@ -427983,14 +550029,34 @@ index fc94314bfc02f..5f39e810e27ae 100644 } diff --git a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c -index 0f28dc2217c09..390da5bf727eb 100644 +index 0f28dc2217c09..9421b919d4627 100644 --- a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c +++ b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c -@@ -218,6 +218,8 @@ static int mt8173_rt5650_rt5514_dev_probe(struct platform_device *pdev) +@@ -200,14 +200,16 @@ static int mt8173_rt5650_rt5514_dev_probe(struct platform_device *pdev) + if (!mt8173_rt5650_rt5514_dais[DAI_LINK_CODEC_I2S].codecs[0].of_node) { + dev_err(&pdev->dev, + "Property 'audio-codec' missing or invalid\n"); +- return -EINVAL; ++ ret = -EINVAL; ++ goto out; + } + mt8173_rt5650_rt5514_dais[DAI_LINK_CODEC_I2S].codecs[1].of_node = + of_parse_phandle(pdev->dev.of_node, "mediatek,audio-codec", 1); + if (!mt8173_rt5650_rt5514_dais[DAI_LINK_CODEC_I2S].codecs[1].of_node) { + dev_err(&pdev->dev, + "Property 'audio-codec' missing or invalid\n"); +- return -EINVAL; ++ ret = -EINVAL; ++ goto out; + } + mt8173_rt5650_rt5514_codec_conf[0].dlc.of_node = + mt8173_rt5650_rt5514_dais[DAI_LINK_CODEC_I2S].codecs[1].of_node; +@@ -218,6 +220,9 @@ static int mt8173_rt5650_rt5514_dev_probe(struct platform_device *pdev) if (ret) dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n", __func__, ret); + ++out: + of_node_put(platform_node); return ret; } @@ -428121,7 +550187,7 @@ index a4d26a6fc8492..0ab8b050b305f 100644 #ifdef CONFIG_OF diff --git a/sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c b/sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c -index 94dcbd36c8697..a56c1e87d5642 100644 +index 94dcbd36c8697..4dab1ee69ec07 100644 --- a/sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c +++ b/sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c @@ -637,7 +637,6 @@ mt8183_mt6358_ts3a227_max98357_dev_probe(struct platform_device *pdev) @@ -428132,27 +550198,53 @@ index 94dcbd36c8697..a56c1e87d5642 100644 int ret, i; platform_node = of_parse_phandle(pdev->dev.of_node, -@@ -647,11 +646,9 @@ mt8183_mt6358_ts3a227_max98357_dev_probe(struct platform_device *pdev) +@@ -647,11 +646,11 @@ mt8183_mt6358_ts3a227_max98357_dev_probe(struct platform_device *pdev) return -EINVAL; } - match = of_match_device(pdev->dev.driver->of_match_table, &pdev->dev); - if (!match || !match->data) + card = (struct snd_soc_card *)of_device_get_match_data(&pdev->dev); -+ if (!card) ++ if (!card) { ++ of_node_put(platform_node); return -EINVAL; - - card = (struct snd_soc_card *)match->data; ++ } card->dev = &pdev->dev; ec_codec = of_parse_phandle(pdev->dev.of_node, "mediatek,ec-codec", 0); -@@ -780,7 +777,12 @@ mt8183_mt6358_ts3a227_max98357_dev_probe(struct platform_device *pdev) +@@ -740,8 +739,10 @@ mt8183_mt6358_ts3a227_max98357_dev_probe(struct platform_device *pdev) + } + + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); +- if (!priv) +- return -ENOMEM; ++ if (!priv) { ++ ret = -ENOMEM; ++ goto out; ++ } + + snd_soc_card_set_drvdata(card, priv); + +@@ -749,7 +750,8 @@ mt8183_mt6358_ts3a227_max98357_dev_probe(struct platform_device *pdev) + if (IS_ERR(priv->pinctrl)) { + dev_err(&pdev->dev, "%s devm_pinctrl_get failed\n", + __func__); +- return PTR_ERR(priv->pinctrl); ++ ret = PTR_ERR(priv->pinctrl); ++ goto out; + } + + for (i = 0; i < PIN_STATE_MAX; i++) { +@@ -780,7 +782,13 @@ mt8183_mt6358_ts3a227_max98357_dev_probe(struct platform_device *pdev) __func__, ret); } - return devm_snd_soc_register_card(&pdev->dev, card); + ret = devm_snd_soc_register_card(&pdev->dev, card); + ++out: + of_node_put(platform_node); + of_node_put(ec_codec); + of_node_put(hdmi_codec); @@ -428593,11 +550685,59 @@ index a6407f4388de7..fb721bc499496 100644 return -EINVAL; } +diff --git a/sound/soc/pxa/mmp-pcm.c b/sound/soc/pxa/mmp-pcm.c +index 5d520e18e512f..99b245e3079a2 100644 +--- a/sound/soc/pxa/mmp-pcm.c ++++ b/sound/soc/pxa/mmp-pcm.c +@@ -98,7 +98,7 @@ static bool filter(struct dma_chan *chan, void *param) + + devname = kasprintf(GFP_KERNEL, "%s.%d", dma_data->dma_res->name, + dma_data->ssp_id); +- if ((strcmp(dev_name(chan->device->dev), devname) == 0) && ++ if (devname && (strcmp(dev_name(chan->device->dev), devname) == 0) && + (chan->chan_id == dma_data->dma_res->start)) { + found = true; + } diff --git a/sound/soc/qcom/lpass-cpu.c b/sound/soc/qcom/lpass-cpu.c -index 3bd9eb3cc688b..5e89d280e3550 100644 +index 3bd9eb3cc688b..9f5e3e1dfd947 100644 --- a/sound/soc/qcom/lpass-cpu.c +++ b/sound/soc/qcom/lpass-cpu.c -@@ -880,6 +880,7 @@ int asoc_qcom_lpass_cpu_platform_probe(struct platform_device *pdev) +@@ -779,10 +779,20 @@ static bool lpass_hdmi_regmap_volatile(struct device *dev, unsigned int reg) + return true; + if (reg == LPASS_HDMI_TX_LEGACY_ADDR(v)) + return true; ++ if (reg == LPASS_HDMI_TX_VBIT_CTL_ADDR(v)) ++ return true; ++ if (reg == LPASS_HDMI_TX_PARITY_ADDR(v)) ++ return true; + + for (i = 0; i < v->hdmi_rdma_channels; ++i) { + if (reg == LPAIF_HDMI_RDMACURR_REG(v, i)) + return true; ++ if (reg == LPASS_HDMI_TX_DMA_ADDR(v, i)) ++ return true; ++ if (reg == LPASS_HDMI_TX_CH_LSB_ADDR(v, i)) ++ return true; ++ if (reg == LPASS_HDMI_TX_CH_MSB_ADDR(v, i)) ++ return true; + } + return false; + } +@@ -841,10 +851,11 @@ static void of_lpass_cpu_parse_dai_data(struct device *dev, + struct lpass_data *data) + { + struct device_node *node; +- int ret, id; ++ int ret, i, id; + + /* Allow all channels by default for backwards compatibility */ +- for (id = 0; id < data->variant->num_dai; id++) { ++ for (i = 0; i < data->variant->num_dai; i++) { ++ id = data->variant->dai_driver[i].id; + data->mi2s_playback_sd_mode[id] = LPAIF_I2SCTL_MODE_8CH; + data->mi2s_capture_sd_mode[id] = LPAIF_I2SCTL_MODE_8CH; + } +@@ -880,6 +891,7 @@ int asoc_qcom_lpass_cpu_platform_probe(struct platform_device *pdev) dsp_of_node = of_parse_phandle(pdev->dev.of_node, "qcom,adsp", 0); if (dsp_of_node) { dev_err(dev, "DSP exists and holds audio resources\n"); @@ -428645,6 +550785,20 @@ index a59e9d20cb46b..4b1773c1fb95f 100644 if (rv) { dev_err(soc_runtime->dev, "error writing to irqclear reg: %d\n", rv); +diff --git a/sound/soc/qcom/lpass-sc7180.c b/sound/soc/qcom/lpass-sc7180.c +index 77a556b27cf09..24a1c121cb2e9 100644 +--- a/sound/soc/qcom/lpass-sc7180.c ++++ b/sound/soc/qcom/lpass-sc7180.c +@@ -131,6 +131,9 @@ static int sc7180_lpass_init(struct platform_device *pdev) + + drvdata->clks = devm_kcalloc(dev, variant->num_clks, + sizeof(*drvdata->clks), GFP_KERNEL); ++ if (!drvdata->clks) ++ return -ENOMEM; ++ + drvdata->num_clks = variant->num_clks; + + for (i = 0; i < drvdata->num_clks; i++) diff --git a/sound/soc/qcom/qdsp6/q6adm.c b/sound/soc/qcom/qdsp6/q6adm.c index 3d831b635524f..4ae97afe90624 100644 --- a/sound/soc/qcom/qdsp6/q6adm.c @@ -428724,10 +550878,36 @@ index 3390ebef9549d..18c90bb4922be 100644 session->port_id = -1; snd_soc_dapm_mixer_update_power(dapm, kcontrol, 0, update); } +diff --git a/sound/soc/qcom/sdm845.c b/sound/soc/qcom/sdm845.c +index 0adfc57089492..4da5ad609fcea 100644 +--- a/sound/soc/qcom/sdm845.c ++++ b/sound/soc/qcom/sdm845.c +@@ -56,8 +56,8 @@ static int sdm845_slim_snd_hw_params(struct snd_pcm_substream *substream, + int ret = 0, i; + + for_each_rtd_codec_dais(rtd, i, codec_dai) { +- sruntime = snd_soc_dai_get_sdw_stream(codec_dai, +- substream->stream); ++ sruntime = snd_soc_dai_get_stream(codec_dai, ++ substream->stream); + if (sruntime != ERR_PTR(-ENOTSUPP)) + pdata->sruntime[cpu_dai->id] = sruntime; + diff --git a/sound/soc/qcom/sm8250.c b/sound/soc/qcom/sm8250.c -index fe8fd7367e21b..e5190aa588c63 100644 +index fe8fd7367e21b..feb6589171ca7 100644 --- a/sound/soc/qcom/sm8250.c +++ b/sound/soc/qcom/sm8250.c +@@ -70,8 +70,8 @@ static int sm8250_snd_hw_params(struct snd_pcm_substream *substream, + switch (cpu_dai->id) { + case WSA_CODEC_DMA_RX_0: + for_each_rtd_codec_dais(rtd, i, codec_dai) { +- sruntime = snd_soc_dai_get_sdw_stream(codec_dai, +- substream->stream); ++ sruntime = snd_soc_dai_get_stream(codec_dai, ++ substream->stream); + if (sruntime != ERR_PTR(-ENOTSUPP)) + pdata->sruntime[cpu_dai->id] = sruntime; + } @@ -191,6 +191,7 @@ static int sm8250_platform_probe(struct platform_device *pdev) if (!card) return -ENOMEM; @@ -428778,6 +550958,30 @@ index 7e89f5b0c237f..2880a05376469 100644 return ret; } +diff --git a/sound/soc/rockchip/rockchip_pdm.c b/sound/soc/rockchip/rockchip_pdm.c +index 38bd603eeb454..7c0b0fe326c22 100644 +--- a/sound/soc/rockchip/rockchip_pdm.c ++++ b/sound/soc/rockchip/rockchip_pdm.c +@@ -368,6 +368,7 @@ static int rockchip_pdm_runtime_resume(struct device *dev) + + ret = clk_prepare_enable(pdm->hclk); + if (ret) { ++ clk_disable_unprepare(pdm->clk); + dev_err(pdm->dev, "hclock enable failed %d\n", ret); + return ret; + } +diff --git a/sound/soc/rockchip/rockchip_spdif.c b/sound/soc/rockchip/rockchip_spdif.c +index d027ca4b17964..09a25d84fee6f 100644 +--- a/sound/soc/rockchip/rockchip_spdif.c ++++ b/sound/soc/rockchip/rockchip_spdif.c +@@ -88,6 +88,7 @@ static int __maybe_unused rk_spdif_runtime_resume(struct device *dev) + + ret = clk_prepare_enable(spdif->hclk); + if (ret) { ++ clk_disable_unprepare(spdif->mclk); + dev_err(spdif->dev, "hclk clock enable failed %d\n", ret); + return ret; + } diff --git a/sound/soc/samsung/aries_wm8994.c b/sound/soc/samsung/aries_wm8994.c index 313ab650f8d9f..a0825da9fff97 100644 --- a/sound/soc/samsung/aries_wm8994.c @@ -429559,7 +551763,7 @@ index 36060800e9bd9..2050728063a15 100644 if (snd_soc_dai_stream_valid(codec_dai, SNDRV_PCM_STREAM_PLAYBACK) && snd_soc_dai_stream_valid(cpu_dai, SNDRV_PCM_STREAM_PLAYBACK)) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c -index 80ca260595fda..5da762807824d 100644 +index 80ca260595fda..1b1749b920f45 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -3208,7 +3208,7 @@ int snd_soc_get_dai_name(const struct of_phandle_args *args, @@ -429571,6 +551775,32 @@ index 80ca260595fda..5da762807824d 100644 continue; ret = snd_soc_component_of_xlate_dai_name(pos, args, dai_name); +@@ -3366,10 +3366,23 @@ EXPORT_SYMBOL_GPL(snd_soc_of_get_dai_link_codecs); + + static int __init snd_soc_init(void) + { ++ int ret; ++ + snd_soc_debugfs_init(); +- snd_soc_util_init(); ++ ret = snd_soc_util_init(); ++ if (ret) ++ goto err_util_init; + +- return platform_driver_register(&soc_driver); ++ ret = platform_driver_register(&soc_driver); ++ if (ret) ++ goto err_register; ++ return 0; ++ ++err_register: ++ snd_soc_util_exit(); ++err_util_init: ++ snd_soc_debugfs_exit(); ++ return ret; + } + module_init(snd_soc_init); + diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 59d07648a7e7f..b957049bae337 100644 --- a/sound/soc/soc-dapm.c @@ -429681,7 +551911,7 @@ index 59d07648a7e7f..b957049bae337 100644 snd_soc_dapm_sync(&card->dapm); return ret; diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c -index 08eaa9ddf191e..e73360e9de8f9 100644 +index 08eaa9ddf191e..12effaa59fdb7 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -308,7 +308,7 @@ int snd_soc_put_volsw(struct snd_kcontrol *kcontrol, @@ -429745,7 +551975,7 @@ index 08eaa9ddf191e..e73360e9de8f9 100644 } EXPORT_SYMBOL_GPL(snd_soc_put_volsw); -@@ -407,15 +427,24 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol, +@@ -407,27 +427,46 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol, int min = mc->min; unsigned int mask = (1U << (fls(min + max) - 1)) - 1; int err = 0; @@ -429755,7 +551985,7 @@ index 08eaa9ddf191e..e73360e9de8f9 100644 + val = ucontrol->value.integer.value[0]; + if (mc->platform_max && val > mc->platform_max) + return -EINVAL; -+ if (val > max - min) ++ if (val > max) + return -EINVAL; + if (val < 0) + return -EINVAL; @@ -429770,8 +552000,18 @@ index 08eaa9ddf191e..e73360e9de8f9 100644 + ret = err; if (snd_soc_volsw_is_stereo(mc)) { - unsigned int val2; -@@ -426,8 +455,13 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol, +- unsigned int val2; ++ unsigned int val2 = ucontrol->value.integer.value[1]; ++ ++ if (mc->platform_max && val2 > mc->platform_max) ++ return -EINVAL; ++ if (val2 > max) ++ return -EINVAL; + + val_mask = mask << rshift; +- val2 = (ucontrol->value.integer.value[1] + min) & mask; ++ val2 = (val2 + min) & mask; + val2 = val2 << rshift; err = snd_soc_component_update_bits(component, reg2, val_mask, val2); @@ -429786,7 +552026,7 @@ index 08eaa9ddf191e..e73360e9de8f9 100644 } EXPORT_SYMBOL_GPL(snd_soc_put_volsw_sx); -@@ -485,7 +519,15 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, +@@ -485,7 +524,15 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, unsigned int mask = (1 << fls(max)) - 1; unsigned int invert = mc->invert; unsigned int val, val_mask; @@ -429803,7 +552043,7 @@ index 08eaa9ddf191e..e73360e9de8f9 100644 if (invert) val = (max - ucontrol->value.integer.value[0]) & mask; -@@ -494,11 +536,20 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, +@@ -494,11 +541,20 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, val_mask = mask << shift; val = val << shift; @@ -429827,7 +552067,7 @@ index 08eaa9ddf191e..e73360e9de8f9 100644 if (invert) val = (max - ucontrol->value.integer.value[1]) & mask; else -@@ -506,8 +557,12 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, +@@ -506,8 +562,12 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, val_mask = mask << shift; val = val << shift; @@ -429841,7 +552081,7 @@ index 08eaa9ddf191e..e73360e9de8f9 100644 } return ret; -@@ -856,8 +911,11 @@ int snd_soc_put_xr_sx(struct snd_kcontrol *kcontrol, +@@ -856,8 +916,11 @@ int snd_soc_put_xr_sx(struct snd_kcontrol *kcontrol, unsigned long mask = (1UL<<mc->nbits)-1; long max = mc->max; long val = ucontrol->value.integer.value[0]; @@ -429853,7 +552093,7 @@ index 08eaa9ddf191e..e73360e9de8f9 100644 if (invert) val = max - val; val &= mask; -@@ -868,9 +926,11 @@ int snd_soc_put_xr_sx(struct snd_kcontrol *kcontrol, +@@ -868,9 +931,11 @@ int snd_soc_put_xr_sx(struct snd_kcontrol *kcontrol, regmask, regval); if (err < 0) return err; @@ -429866,8 +552106,33 @@ index 08eaa9ddf191e..e73360e9de8f9 100644 } EXPORT_SYMBOL_GPL(snd_soc_put_xr_sx); +diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c +index 48f71bb81a2f2..3b673477f6215 100644 +--- a/sound/soc/soc-pcm.c ++++ b/sound/soc/soc-pcm.c +@@ -759,11 +759,6 @@ static int soc_pcm_open(struct snd_pcm_substream *substream) + ret = snd_soc_dai_startup(dai, substream); + if (ret < 0) + goto err; +- +- if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) +- dai->tx_mask = 0; +- else +- dai->rx_mask = 0; + } + + /* Dynamic PCM DAI links compat checks use dynamic capabilities */ +@@ -1171,6 +1166,8 @@ static void dpcm_be_reparent(struct snd_soc_pcm_runtime *fe, + return; + + be_substream = snd_soc_dpcm_get_substream(be, stream); ++ if (!be_substream) ++ return; + + for_each_dpcm_fe(be, stream, dpcm) { + if (dpcm->fe == fe) diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c -index f6e5ac3e03140..eff8d4f715611 100644 +index f6e5ac3e03140..55b69e3c67186 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -510,7 +510,8 @@ static int soc_tplg_kcontrol_bind_io(struct snd_soc_tplg_ctl_hdr *hdr, @@ -429880,22 +552145,29 @@ index f6e5ac3e03140..eff8d4f715611 100644 && k->access & SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK) { struct soc_bytes_ext *sbe; struct snd_soc_tplg_bytes_control *be; -@@ -1480,12 +1481,12 @@ static int soc_tplg_dapm_widget_create(struct soc_tplg *tplg, +@@ -1479,13 +1480,17 @@ static int soc_tplg_dapm_widget_create(struct soc_tplg *tplg, + template.num_kcontrols = le32_to_cpu(w->num_kcontrols); kc = devm_kcalloc(tplg->dev, le32_to_cpu(w->num_kcontrols), sizeof(*kc), GFP_KERNEL); - if (!kc) +- if (!kc) - goto err; ++ if (!kc) { ++ ret = -ENOMEM; + goto hdr_err; ++ } kcontrol_type = devm_kcalloc(tplg->dev, le32_to_cpu(w->num_kcontrols), sizeof(unsigned int), GFP_KERNEL); - if (!kcontrol_type) +- if (!kcontrol_type) - goto err; ++ if (!kcontrol_type) { ++ ret = -ENOMEM; + goto hdr_err; ++ } for (i = 0; i < w->num_kcontrols; i++) { control_hdr = (struct snd_soc_tplg_ctl_hdr *)tplg->pos; -@@ -2674,6 +2675,7 @@ EXPORT_SYMBOL_GPL(snd_soc_tplg_component_load); +@@ -2674,6 +2679,7 @@ EXPORT_SYMBOL_GPL(snd_soc_tplg_component_load); /* remove dynamic controls from the component driver */ int snd_soc_tplg_component_remove(struct snd_soc_component *comp) { @@ -429903,7 +552175,7 @@ index f6e5ac3e03140..eff8d4f715611 100644 struct snd_soc_dobj *dobj, *next_dobj; int pass = SOC_TPLG_PASS_END; -@@ -2681,6 +2683,7 @@ int snd_soc_tplg_component_remove(struct snd_soc_component *comp) +@@ -2681,6 +2687,7 @@ int snd_soc_tplg_component_remove(struct snd_soc_component *comp) while (pass >= SOC_TPLG_PASS_START) { /* remove mixer controls */ @@ -429911,7 +552183,7 @@ index f6e5ac3e03140..eff8d4f715611 100644 list_for_each_entry_safe(dobj, next_dobj, &comp->dobj_list, list) { -@@ -2719,6 +2722,7 @@ int snd_soc_tplg_component_remove(struct snd_soc_component *comp) +@@ -2719,6 +2726,7 @@ int snd_soc_tplg_component_remove(struct snd_soc_component *comp) break; } } @@ -429919,6 +552191,19 @@ index f6e5ac3e03140..eff8d4f715611 100644 pass--; } +diff --git a/sound/soc/soc-utils.c b/sound/soc/soc-utils.c +index 299b5d6ebfd13..f2c9d97c19c74 100644 +--- a/sound/soc/soc-utils.c ++++ b/sound/soc/soc-utils.c +@@ -206,7 +206,7 @@ int __init snd_soc_util_init(void) + return ret; + } + +-void __exit snd_soc_util_exit(void) ++void snd_soc_util_exit(void) + { + platform_driver_unregister(&soc_dummy_driver); + platform_device_unregister(soc_dummy_dev); diff --git a/sound/soc/sof/debug.c b/sound/soc/sof/debug.c index a51a928ea40a7..5f780ef9581a9 100644 --- a/sound/soc/sof/debug.c @@ -430035,7 +552320,7 @@ index 30025d3c16b6e..0862ff8b66273 100644 static const struct hdac_bus_ops bus_core_ops = { diff --git a/sound/soc/sof/intel/hda-dai.c b/sound/soc/sof/intel/hda-dai.c -index c1f9f0f584647..6704dbcd101cd 100644 +index c1f9f0f584647..56653d78d2200 100644 --- a/sound/soc/sof/intel/hda-dai.c +++ b/sound/soc/sof/intel/hda-dai.c @@ -68,6 +68,7 @@ static struct hdac_ext_stream * @@ -430062,6 +552347,35 @@ index c1f9f0f584647..6704dbcd101cd 100644 return res; } +@@ -211,6 +212,10 @@ static int hda_link_hw_params(struct snd_pcm_substream *substream, + int stream_tag; + int ret; + ++ link = snd_hdac_ext_bus_get_link(bus, codec_dai->component->name); ++ if (!link) ++ return -EINVAL; ++ + /* get stored dma data if resuming from system suspend */ + link_dev = snd_soc_dai_get_dma_data(dai, substream); + if (!link_dev) { +@@ -231,15 +236,8 @@ static int hda_link_hw_params(struct snd_pcm_substream *substream, + if (ret < 0) + return ret; + +- link = snd_hdac_ext_bus_get_link(bus, codec_dai->component->name); +- if (!link) +- return -EINVAL; +- +- /* set the stream tag in the codec dai dma params */ +- if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) +- snd_soc_dai_set_tdm_slot(codec_dai, stream_tag, 0, 0, 0); +- else +- snd_soc_dai_set_tdm_slot(codec_dai, 0, stream_tag, 0, 0); ++ /* set the hdac_stream in the codec dai */ ++ snd_soc_dai_set_stream(codec_dai, hdac_stream(link_dev), substream->stream); + + p_params.s_fmt = snd_pcm_format_width(params_format(params)); + p_params.ch = params_channels(params); diff --git a/sound/soc/sof/intel/hda-dsp.c b/sound/soc/sof/intel/hda-dsp.c index 623cf291e2074..262a70791a8f8 100644 --- a/sound/soc/sof/intel/hda-dsp.c @@ -430484,6 +552798,35 @@ index 136059331211d..065c5f0d1f5f0 100644 ret = IRQ_HANDLED; } +diff --git a/sound/soc/stm/stm32_adfsdm.c b/sound/soc/stm/stm32_adfsdm.c +index e6078f50e508e..1e9b4b1df69e3 100644 +--- a/sound/soc/stm/stm32_adfsdm.c ++++ b/sound/soc/stm/stm32_adfsdm.c +@@ -303,6 +303,11 @@ static int stm32_adfsdm_dummy_cb(const void *data, void *private) + return 0; + } + ++static void stm32_adfsdm_cleanup(void *data) ++{ ++ iio_channel_release_all_cb(data); ++} ++ + static struct snd_soc_component_driver stm32_adfsdm_soc_platform = { + .open = stm32_adfsdm_pcm_open, + .close = stm32_adfsdm_pcm_close, +@@ -349,6 +354,12 @@ static int stm32_adfsdm_probe(struct platform_device *pdev) + if (IS_ERR(priv->iio_cb)) + return PTR_ERR(priv->iio_cb); + ++ ret = devm_add_action_or_reset(&pdev->dev, stm32_adfsdm_cleanup, priv->iio_cb); ++ if (ret < 0) { ++ dev_err(&pdev->dev, "Unable to add action\n"); ++ return ret; ++ } ++ + component = devm_kzalloc(&pdev->dev, sizeof(*component), GFP_KERNEL); + if (!component) + return -ENOMEM; diff --git a/sound/soc/stm/stm32_i2s.c b/sound/soc/stm/stm32_i2s.c index 6254bacad6eb7..717f45a83445c 100644 --- a/sound/soc/stm/stm32_i2s.c @@ -431963,7 +554306,7 @@ index 76c0e37a838cf..8a2da6b1012eb 100644 return 0; diff --git a/sound/synth/emux/emux.c b/sound/synth/emux/emux.c -index 49d1976a132c0..5ed8e36d2e043 100644 +index 49d1976a132c0..a870759d179ed 100644 --- a/sound/synth/emux/emux.c +++ b/sound/synth/emux/emux.c @@ -88,7 +88,7 @@ int snd_emux_register(struct snd_emux *emu, struct snd_card *card, int index, ch @@ -431975,6 +554318,37 @@ index 49d1976a132c0..5ed8e36d2e043 100644 return -ENOMEM; /* create soundfont list */ +@@ -126,15 +126,10 @@ EXPORT_SYMBOL(snd_emux_register); + */ + int snd_emux_free(struct snd_emux *emu) + { +- unsigned long flags; +- + if (! emu) + return -EINVAL; + +- spin_lock_irqsave(&emu->voice_lock, flags); +- if (emu->timer_active) +- del_timer(&emu->tlist); +- spin_unlock_irqrestore(&emu->voice_lock, flags); ++ del_timer_sync(&emu->tlist); + + snd_emux_proc_free(emu); + snd_emux_delete_virmidi(emu); +diff --git a/sound/synth/emux/emux_nrpn.c b/sound/synth/emux/emux_nrpn.c +index 8056422ed7c51..0d6b82ae29558 100644 +--- a/sound/synth/emux/emux_nrpn.c ++++ b/sound/synth/emux/emux_nrpn.c +@@ -349,6 +349,9 @@ int + snd_emux_xg_control(struct snd_emux_port *port, struct snd_midi_channel *chan, + int param) + { ++ if (param >= ARRAY_SIZE(chan->control)) ++ return -EINVAL; ++ + return send_converted_effect(xg_effects, ARRAY_SIZE(xg_effects), + port, chan, param, + chan->control[param], diff --git a/sound/usb/6fire/comm.c b/sound/usb/6fire/comm.c index 43a2a62d66f7e..49629d4bb327a 100644 --- a/sound/usb/6fire/comm.c @@ -432034,7 +554408,7 @@ index cd4a0bc6d278f..7aec0a95c609a 100644 usb_free_urb(bcd2k->midi_out_urb); usb_free_urb(bcd2k->midi_in_urb); diff --git a/sound/usb/card.c b/sound/usb/card.c -index 1764b9302d467..4526f1d1fd6ee 100644 +index 1764b9302d467..550c6a72fb5bc 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -387,6 +387,14 @@ static const struct usb_audio_device_name usb_audio_names[] = { @@ -432095,7 +554469,26 @@ index 1764b9302d467..4526f1d1fd6ee 100644 /* look for the corresponding quirk */ static const struct snd_usb_audio_quirk * get_alias_quirk(struct usb_device *dev, unsigned int id) -@@ -804,6 +829,7 @@ static int usb_audio_probe(struct usb_interface *intf, +@@ -716,6 +741,18 @@ get_alias_quirk(struct usb_device *dev, unsigned int id) + return NULL; + } + ++/* register card if we reach to the last interface or to the specified ++ * one given via option ++ */ ++static int try_to_register_card(struct snd_usb_audio *chip, int ifnum) ++{ ++ if (check_delayed_register_option(chip) == ifnum || ++ chip->last_iface == ifnum || ++ usb_interface_claimed(usb_ifnum_to_if(chip->dev, chip->last_iface))) ++ return snd_card_register(chip->card); ++ return 0; ++} ++ + /* + * probe the active usb device + * +@@ -804,6 +841,7 @@ static int usb_audio_probe(struct usb_interface *intf, err = -ENODEV; goto __error; } @@ -432103,22 +554496,37 @@ index 1764b9302d467..4526f1d1fd6ee 100644 } if (chip->num_interfaces >= MAX_CARD_INTERFACES) { -@@ -853,11 +879,11 @@ static int usb_audio_probe(struct usb_interface *intf, +@@ -853,15 +891,9 @@ static int usb_audio_probe(struct usb_interface *intf, chip->need_delayed_register = false; /* clear again */ } - /* we are allowed to call snd_card_register() many times, but first - * check to see if a device needs to skip it or do anything special -+ /* register card if we reach to the last interface or to the specified -+ * one given via option - */ +- */ - if (!snd_usb_registration_quirk(chip, ifnum) && - !check_delayed_register_option(chip, ifnum)) { -+ if (check_delayed_register_option(chip) == ifnum || -+ usb_interface_claimed(usb_ifnum_to_if(dev, chip->last_iface))) { - err = snd_card_register(chip->card); - if (err < 0) - goto __error; +- err = snd_card_register(chip->card); +- if (err < 0) +- goto __error; +- } ++ err = try_to_register_card(chip, ifnum); ++ if (err < 0) ++ goto __error_no_register; + + if (chip->quirk_flags & QUIRK_FLAG_SHARE_MEDIA_DEVICE) { + /* don't want to fail when snd_media_device_create() fails */ +@@ -880,6 +912,11 @@ static int usb_audio_probe(struct usb_interface *intf, + return 0; + + __error: ++ /* in the case of error in secondary interface, still try to register */ ++ if (chip) ++ try_to_register_card(chip, ifnum); ++ ++ __error_no_register: + if (chip) { + /* chip->active is inside the chip->card object, + * decrement before memory is possibly returned. diff --git a/sound/usb/card.h b/sound/usb/card.h index 5b19901f305a3..87f042d06ce08 100644 --- a/sound/usb/card.h @@ -432205,7 +554613,7 @@ index 81d5ce07d548b..ccca9efa7d33f 100644 /* validate clock after rate change */ if (!uac_clock_source_is_valid(chip, fmt, clock)) diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c -index 533919a28856f..3bbc227769d01 100644 +index 533919a28856f..092350eb5f4e3 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -85,12 +85,13 @@ static inline unsigned get_usb_high_speed_rate(unsigned int rate) @@ -432545,7 +554953,17 @@ index 533919a28856f..3bbc227769d01 100644 WRITE_ONCE(ep->data_subs, data_subs); } -@@ -833,6 +895,7 @@ void snd_usb_endpoint_close(struct snd_usb_audio *chip, +@@ -825,7 +887,8 @@ void snd_usb_endpoint_close(struct snd_usb_audio *chip, + usb_audio_dbg(chip, "Closing EP 0x%x (count %d)\n", + ep->ep_num, ep->opened); + +- if (!--ep->iface_ref->opened) ++ if (!--ep->iface_ref->opened && ++ !(chip->quirk_flags & QUIRK_FLAG_IFACE_SKIP_CLOSE)) + endpoint_set_interface(chip, ep, false); + + if (!--ep->opened) { +@@ -833,6 +896,7 @@ void snd_usb_endpoint_close(struct snd_usb_audio *chip, ep->altsetting = 0; ep->cur_audiofmt = NULL; ep->cur_rate = 0; @@ -432553,7 +554971,7 @@ index 533919a28856f..3bbc227769d01 100644 ep->iface_ref = NULL; usb_audio_dbg(chip, "EP 0x%x closed\n", ep->ep_num); } -@@ -859,7 +922,7 @@ static int wait_clear_urbs(struct snd_usb_endpoint *ep) +@@ -859,7 +923,7 @@ static int wait_clear_urbs(struct snd_usb_endpoint *ep) return 0; do { @@ -432562,7 +554980,7 @@ index 533919a28856f..3bbc227769d01 100644 if (!alive) break; -@@ -893,9 +956,10 @@ void snd_usb_endpoint_sync_pending_stop(struct snd_usb_endpoint *ep) +@@ -893,9 +957,10 @@ void snd_usb_endpoint_sync_pending_stop(struct snd_usb_endpoint *ep) * * This function moves the EP to STOPPING state if it's being RUNNING. */ @@ -432574,7 +554992,7 @@ index 533919a28856f..3bbc227769d01 100644 if (!force && atomic_read(&ep->running)) return -EBUSY; -@@ -903,9 +967,14 @@ static int stop_urbs(struct snd_usb_endpoint *ep, bool force) +@@ -903,9 +968,14 @@ static int stop_urbs(struct snd_usb_endpoint *ep, bool force) if (!ep_state_update(ep, EP_STATE_RUNNING, EP_STATE_STOPPING)) return 0; @@ -432589,7 +555007,7 @@ index 533919a28856f..3bbc227769d01 100644 for (i = 0; i < ep->nurbs; i++) { if (test_bit(i, &ep->active_mask)) { -@@ -930,7 +999,7 @@ static int release_urbs(struct snd_usb_endpoint *ep, bool force) +@@ -930,7 +1000,7 @@ static int release_urbs(struct snd_usb_endpoint *ep, bool force) snd_usb_endpoint_set_callback(ep, NULL, NULL, NULL); /* stop and unlink urbs */ @@ -432598,7 +555016,7 @@ index 533919a28856f..3bbc227769d01 100644 if (err) return err; -@@ -1132,10 +1201,6 @@ static int data_ep_set_params(struct snd_usb_endpoint *ep) +@@ -1132,10 +1202,6 @@ static int data_ep_set_params(struct snd_usb_endpoint *ep) INIT_LIST_HEAD(&u->ready_list); } @@ -432609,7 +555027,7 @@ index 533919a28856f..3bbc227769d01 100644 return 0; out_of_memory: -@@ -1159,6 +1224,7 @@ static int sync_ep_set_params(struct snd_usb_endpoint *ep) +@@ -1159,6 +1225,7 @@ static int sync_ep_set_params(struct snd_usb_endpoint *ep) if (!ep->syncbuf) return -ENOMEM; @@ -432617,7 +555035,7 @@ index 533919a28856f..3bbc227769d01 100644 for (i = 0; i < SYNC_URBS; i++) { struct snd_urb_ctx *u = &ep->urb[i]; u->index = i; -@@ -1178,8 +1244,6 @@ static int sync_ep_set_params(struct snd_usb_endpoint *ep) +@@ -1178,8 +1245,6 @@ static int sync_ep_set_params(struct snd_usb_endpoint *ep) u->urb->complete = snd_complete_urb; } @@ -432626,7 +555044,7 @@ index 533919a28856f..3bbc227769d01 100644 return 0; out_of_memory: -@@ -1340,6 +1404,25 @@ unlock: +@@ -1340,6 +1405,25 @@ unlock: return err; } @@ -432652,7 +555070,7 @@ index 533919a28856f..3bbc227769d01 100644 /** * snd_usb_endpoint_start: start an snd_usb_endpoint * -@@ -1355,6 +1438,7 @@ unlock: +@@ -1355,6 +1439,7 @@ unlock: */ int snd_usb_endpoint_start(struct snd_usb_endpoint *ep) { @@ -432660,7 +555078,7 @@ index 533919a28856f..3bbc227769d01 100644 int err; unsigned int i; -@@ -1391,13 +1475,9 @@ int snd_usb_endpoint_start(struct snd_usb_endpoint *ep) +@@ -1391,13 +1476,9 @@ int snd_usb_endpoint_start(struct snd_usb_endpoint *ep) if (snd_usb_endpoint_implicit_feedback_sink(ep) && !(ep->chip->quirk_flags & QUIRK_FLAG_PLAYBACK_FIRST)) { @@ -432676,7 +555094,7 @@ index 533919a28856f..3bbc227769d01 100644 } for (i = 0; i < ep->nurbs; i++) { -@@ -1406,10 +1486,18 @@ int snd_usb_endpoint_start(struct snd_usb_endpoint *ep) +@@ -1406,10 +1487,18 @@ int snd_usb_endpoint_start(struct snd_usb_endpoint *ep) if (snd_BUG_ON(!urb)) goto __error; @@ -432699,7 +555117,7 @@ index 533919a28856f..3bbc227769d01 100644 } err = usb_submit_urb(urb, GFP_ATOMIC); -@@ -1420,14 +1508,29 @@ int snd_usb_endpoint_start(struct snd_usb_endpoint *ep) +@@ -1420,14 +1509,29 @@ int snd_usb_endpoint_start(struct snd_usb_endpoint *ep) goto __error; } set_bit(i, &ep->active_mask); @@ -432731,7 +555149,7 @@ index 533919a28856f..3bbc227769d01 100644 return -EPIPE; } -@@ -1435,6 +1538,7 @@ __error: +@@ -1435,6 +1539,7 @@ __error: * snd_usb_endpoint_stop: stop an snd_usb_endpoint * * @ep: the endpoint to stop (may be NULL) @@ -432739,7 +555157,7 @@ index 533919a28856f..3bbc227769d01 100644 * * A call to this function will decrement the running count of the endpoint. * In case the last user has requested the endpoint stop, the URBs will -@@ -1445,7 +1549,7 @@ __error: +@@ -1445,7 +1550,7 @@ __error: * The caller needs to synchronize the pending stop operation via * snd_usb_endpoint_sync_pending_stop(). */ @@ -432748,7 +555166,7 @@ index 533919a28856f..3bbc227769d01 100644 { if (!ep) return; -@@ -1460,7 +1564,7 @@ void snd_usb_endpoint_stop(struct snd_usb_endpoint *ep) +@@ -1460,7 +1565,7 @@ void snd_usb_endpoint_stop(struct snd_usb_endpoint *ep) if (!atomic_dec_return(&ep->running)) { if (ep->sync_source) WRITE_ONCE(ep->sync_source->sync_sink, NULL); @@ -432757,7 +555175,7 @@ index 533919a28856f..3bbc227769d01 100644 } } -@@ -1575,7 +1679,7 @@ static void snd_usb_handle_sync_urb(struct snd_usb_endpoint *ep, +@@ -1575,7 +1680,7 @@ static void snd_usb_handle_sync_urb(struct snd_usb_endpoint *ep, } spin_unlock_irqrestore(&ep->lock, flags); @@ -432830,10 +555248,10 @@ index 50efccbffb8a7..405dc0bf6678c 100644 case USB_ID(0x0e41, 0x4248): /* Line6 Helix >= fw 2.82 */ case USB_ID(0x0e41, 0x4249): /* Line6 Helix Rack >= fw 2.82 */ diff --git a/sound/usb/implicit.c b/sound/usb/implicit.c -index 23767a14d1266..e1bf1b5da423c 100644 +index 23767a14d1266..f3e8484b3d9cb 100644 --- a/sound/usb/implicit.c +++ b/sound/usb/implicit.c -@@ -45,17 +45,10 @@ struct snd_usb_implicit_fb_match { +@@ -45,17 +45,12 @@ struct snd_usb_implicit_fb_match { /* Implicit feedback quirk table for playback */ static const struct snd_usb_implicit_fb_match playback_implicit_fb_quirks[] = { @@ -432848,12 +555266,14 @@ index 23767a14d1266..e1bf1b5da423c 100644 /* FIXME: check the availability of generic matching */ - IMPLICIT_FB_FIXED_DEV(0x1397, 0x0001, 0x81, 1), /* Behringer UFX1604 */ - IMPLICIT_FB_FIXED_DEV(0x1397, 0x0002, 0x81, 1), /* Behringer UFX1204 */ ++ IMPLICIT_FB_FIXED_DEV(0x0763, 0x2030, 0x81, 3), /* M-Audio Fast Track C400 */ ++ IMPLICIT_FB_FIXED_DEV(0x0763, 0x2031, 0x81, 3), /* M-Audio Fast Track C600 */ + IMPLICIT_FB_FIXED_DEV(0x0763, 0x2080, 0x81, 2), /* M-Audio FastTrack Ultra */ + IMPLICIT_FB_FIXED_DEV(0x0763, 0x2081, 0x81, 2), /* M-Audio FastTrack Ultra */ IMPLICIT_FB_FIXED_DEV(0x2466, 0x8010, 0x81, 2), /* Fractal Audio Axe-Fx III */ IMPLICIT_FB_FIXED_DEV(0x31e9, 0x0001, 0x81, 2), /* Solid State Logic SSL2 */ IMPLICIT_FB_FIXED_DEV(0x31e9, 0x0002, 0x81, 2), /* Solid State Logic SSL2+ */ -@@ -352,7 +345,8 @@ static int audioformat_implicit_fb_quirk(struct snd_usb_audio *chip, +@@ -352,7 +347,8 @@ static int audioformat_implicit_fb_quirk(struct snd_usb_audio *chip, } /* Try the generic implicit fb if available */ @@ -432863,7 +555283,7 @@ index 23767a14d1266..e1bf1b5da423c 100644 return add_generic_implicit_fb(chip, fmt, alts); /* No quirk */ -@@ -389,6 +383,8 @@ int snd_usb_parse_implicit_fb_quirk(struct snd_usb_audio *chip, +@@ -389,6 +385,8 @@ int snd_usb_parse_implicit_fb_quirk(struct snd_usb_audio *chip, struct audioformat *fmt, struct usb_host_interface *alts) { @@ -432873,7 +555293,7 @@ index 23767a14d1266..e1bf1b5da423c 100644 return audioformat_capture_quirk(chip, fmt, alts); else diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c -index 9602929b7de90..59faa5a9a7141 100644 +index 9602929b7de90..b67617b68e509 100644 --- a/sound/usb/line6/driver.c +++ b/sound/usb/line6/driver.c @@ -113,12 +113,12 @@ int line6_send_raw_message(struct usb_line6 *line6, const char *buffer, @@ -432891,7 +555311,17 @@ index 9602929b7de90..59faa5a9a7141 100644 } if (retval) { -@@ -347,7 +347,7 @@ int line6_read_data(struct usb_line6 *line6, unsigned address, void *data, +@@ -304,7 +304,8 @@ static void line6_data_received(struct urb *urb) + for (;;) { + done = + line6_midibuf_read(mb, line6->buffer_message, +- LINE6_MIDI_MESSAGE_MAXLEN); ++ LINE6_MIDI_MESSAGE_MAXLEN, ++ LINE6_MIDIBUF_READ_RX); + + if (done <= 0) + break; +@@ -347,7 +348,7 @@ int line6_read_data(struct usb_line6 *line6, unsigned address, void *data, ret = usb_control_msg_send(usbdev, 0, 0x67, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, (datalen << 8) | 0x21, address, NULL, 0, @@ -432900,7 +555330,7 @@ index 9602929b7de90..59faa5a9a7141 100644 if (ret) { dev_err(line6->ifcdev, "read request failed (error %d)\n", ret); goto exit; -@@ -360,7 +360,7 @@ int line6_read_data(struct usb_line6 *line6, unsigned address, void *data, +@@ -360,7 +361,7 @@ int line6_read_data(struct usb_line6 *line6, unsigned address, void *data, ret = usb_control_msg_recv(usbdev, 0, 0x67, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, 0x0012, 0x0000, &len, 1, @@ -432909,7 +555339,7 @@ index 9602929b7de90..59faa5a9a7141 100644 if (ret) { dev_err(line6->ifcdev, "receive length failed (error %d)\n", ret); -@@ -387,7 +387,7 @@ int line6_read_data(struct usb_line6 *line6, unsigned address, void *data, +@@ -387,7 +388,7 @@ int line6_read_data(struct usb_line6 *line6, unsigned address, void *data, /* receive the result: */ ret = usb_control_msg_recv(usbdev, 0, 0x67, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, @@ -432918,7 +555348,7 @@ index 9602929b7de90..59faa5a9a7141 100644 GFP_KERNEL); if (ret) dev_err(line6->ifcdev, "read failed (error %d)\n", ret); -@@ -417,7 +417,7 @@ int line6_write_data(struct usb_line6 *line6, unsigned address, void *data, +@@ -417,7 +418,7 @@ int line6_write_data(struct usb_line6 *line6, unsigned address, void *data, ret = usb_control_msg_send(usbdev, 0, 0x67, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, @@ -432927,7 +555357,7 @@ index 9602929b7de90..59faa5a9a7141 100644 GFP_KERNEL); if (ret) { dev_err(line6->ifcdev, -@@ -430,7 +430,7 @@ int line6_write_data(struct usb_line6 *line6, unsigned address, void *data, +@@ -430,7 +431,7 @@ int line6_write_data(struct usb_line6 *line6, unsigned address, void *data, ret = usb_control_msg_recv(usbdev, 0, 0x67, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, @@ -432949,6 +555379,127 @@ index 71d3da1db8c81..ecf3a2b39c7eb 100644 #define LINE6_BUFSIZE_LISTEN 64 #define LINE6_MIDI_MESSAGE_MAXLEN 256 +diff --git a/sound/usb/line6/midi.c b/sound/usb/line6/midi.c +index ba0e2b7e8fe19..0838632c788e4 100644 +--- a/sound/usb/line6/midi.c ++++ b/sound/usb/line6/midi.c +@@ -44,7 +44,8 @@ static void line6_midi_transmit(struct snd_rawmidi_substream *substream) + int req, done; + + for (;;) { +- req = min(line6_midibuf_bytes_free(mb), line6->max_packet_size); ++ req = min3(line6_midibuf_bytes_free(mb), line6->max_packet_size, ++ LINE6_FALLBACK_MAXPACKETSIZE); + done = snd_rawmidi_transmit_peek(substream, chunk, req); + + if (done == 0) +@@ -56,7 +57,8 @@ static void line6_midi_transmit(struct snd_rawmidi_substream *substream) + + for (;;) { + done = line6_midibuf_read(mb, chunk, +- LINE6_FALLBACK_MAXPACKETSIZE); ++ LINE6_FALLBACK_MAXPACKETSIZE, ++ LINE6_MIDIBUF_READ_TX); + + if (done == 0) + break; +diff --git a/sound/usb/line6/midibuf.c b/sound/usb/line6/midibuf.c +index 6a70463f82c4e..e7f830f7526c9 100644 +--- a/sound/usb/line6/midibuf.c ++++ b/sound/usb/line6/midibuf.c +@@ -9,6 +9,7 @@ + + #include "midibuf.h" + ++ + static int midibuf_message_length(unsigned char code) + { + int message_length; +@@ -20,12 +21,7 @@ static int midibuf_message_length(unsigned char code) + + message_length = length[(code >> 4) - 8]; + } else { +- /* +- Note that according to the MIDI specification 0xf2 is +- the "Song Position Pointer", but this is used by Line 6 +- to send sysex messages to the host. +- */ +- static const int length[] = { -1, 2, -1, 2, -1, -1, 1, 1, 1, 1, ++ static const int length[] = { -1, 2, 2, 2, -1, -1, 1, 1, 1, -1, + 1, 1, 1, -1, 1, 1 + }; + message_length = length[code & 0x0f]; +@@ -125,7 +121,7 @@ int line6_midibuf_write(struct midi_buffer *this, unsigned char *data, + } + + int line6_midibuf_read(struct midi_buffer *this, unsigned char *data, +- int length) ++ int length, int read_type) + { + int bytes_used; + int length1, length2; +@@ -148,9 +144,22 @@ int line6_midibuf_read(struct midi_buffer *this, unsigned char *data, + + length1 = this->size - this->pos_read; + +- /* check MIDI command length */ + command = this->buf[this->pos_read]; ++ /* ++ PODxt always has status byte lower nibble set to 0010, ++ when it means to send 0000, so we correct if here so ++ that control/program changes come on channel 1 and ++ sysex message status byte is correct ++ */ ++ if (read_type == LINE6_MIDIBUF_READ_RX) { ++ if (command == 0xb2 || command == 0xc2 || command == 0xf2) { ++ unsigned char fixed = command & 0xf0; ++ this->buf[this->pos_read] = fixed; ++ command = fixed; ++ } ++ } + ++ /* check MIDI command length */ + if (command & 0x80) { + midi_length = midibuf_message_length(command); + this->command_prev = command; +diff --git a/sound/usb/line6/midibuf.h b/sound/usb/line6/midibuf.h +index 124a8f9f7e96c..542e8d836f87d 100644 +--- a/sound/usb/line6/midibuf.h ++++ b/sound/usb/line6/midibuf.h +@@ -8,6 +8,9 @@ + #ifndef MIDIBUF_H + #define MIDIBUF_H + ++#define LINE6_MIDIBUF_READ_TX 0 ++#define LINE6_MIDIBUF_READ_RX 1 ++ + struct midi_buffer { + unsigned char *buf; + int size; +@@ -23,7 +26,7 @@ extern void line6_midibuf_destroy(struct midi_buffer *mb); + extern int line6_midibuf_ignore(struct midi_buffer *mb, int length); + extern int line6_midibuf_init(struct midi_buffer *mb, int size, int split); + extern int line6_midibuf_read(struct midi_buffer *mb, unsigned char *data, +- int length); ++ int length, int read_type); + extern void line6_midibuf_reset(struct midi_buffer *mb); + extern int line6_midibuf_write(struct midi_buffer *mb, unsigned char *data, + int length); +diff --git a/sound/usb/line6/pod.c b/sound/usb/line6/pod.c +index 16e644330c4d6..54be5ac919bfb 100644 +--- a/sound/usb/line6/pod.c ++++ b/sound/usb/line6/pod.c +@@ -159,8 +159,9 @@ static struct line6_pcm_properties pod_pcm_properties = { + .bytes_per_channel = 3 /* SNDRV_PCM_FMTBIT_S24_3LE */ + }; + ++ + static const char pod_version_header[] = { +- 0xf2, 0x7e, 0x7f, 0x06, 0x02 ++ 0xf0, 0x7e, 0x7f, 0x06, 0x02 + }; + + static char *pod_alloc_sysex_buffer(struct usb_line6_pod *pod, int code, diff --git a/sound/usb/line6/podhd.c b/sound/usb/line6/podhd.c index 28794a35949d4..b24bc82f89e37 100644 --- a/sound/usb/line6/podhd.c @@ -432994,10 +555545,22 @@ index 4e5693c97aa42..e33df58740a91 100644 if (ret) { diff --git a/sound/usb/midi.c b/sound/usb/midi.c -index 2c01649c70f61..344fbeadf161b 100644 +index 2c01649c70f61..9a361b202a09d 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c -@@ -1145,6 +1145,9 @@ static int snd_usbmidi_output_open(struct snd_rawmidi_substream *substream) +@@ -1133,10 +1133,8 @@ static int snd_usbmidi_output_open(struct snd_rawmidi_substream *substream) + port = &umidi->endpoints[i].out->ports[j]; + break; + } +- if (!port) { +- snd_BUG(); ++ if (!port) + return -ENXIO; +- } + + substream->runtime->private_data = port; + port->state = STATE_UNKNOWN; +@@ -1145,6 +1143,9 @@ static int snd_usbmidi_output_open(struct snd_rawmidi_substream *substream) static int snd_usbmidi_output_close(struct snd_rawmidi_substream *substream) { @@ -433007,7 +555570,7 @@ index 2c01649c70f61..344fbeadf161b 100644 return substream_open(substream, 0, 0); } -@@ -1194,6 +1197,7 @@ static void snd_usbmidi_output_drain(struct snd_rawmidi_substream *substream) +@@ -1194,6 +1195,7 @@ static void snd_usbmidi_output_drain(struct snd_rawmidi_substream *substream) } while (drain_urbs && timeout); finish_wait(&ep->drain_wait, &wait); } @@ -433245,7 +555808,7 @@ index 46082dc57be09..d12b87e52d22a 100644 cval->min_mute = 1; break; diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c -index 5dc9266180e37..b6cd43c5ea3e6 100644 +index 5dc9266180e37..87a30be643242 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -219,16 +219,16 @@ int snd_usb_init_pitch(struct snd_usb_audio *chip, @@ -433327,7 +555890,16 @@ index 5dc9266180e37..b6cd43c5ea3e6 100644 } return 0; -@@ -572,7 +580,7 @@ static int snd_usb_hw_free(struct snd_pcm_substream *substream) +@@ -517,6 +525,8 @@ static int snd_usb_hw_params(struct snd_pcm_substream *substream, + if (snd_usb_endpoint_compatible(chip, subs->data_endpoint, + fmt, hw_params)) + goto unlock; ++ if (stop_endpoints(subs, false)) ++ sync_pending_stops(subs); + close_endpoints(chip, subs); + } + +@@ -572,7 +582,7 @@ static int snd_usb_hw_free(struct snd_pcm_substream *substream) subs->cur_audiofmt = NULL; mutex_unlock(&chip->mutex); if (!snd_usb_lock_shutdown(chip)) { @@ -433336,7 +555908,7 @@ index 5dc9266180e37..b6cd43c5ea3e6 100644 sync_pending_stops(subs); close_endpoints(chip, subs); snd_usb_unlock_shutdown(chip); -@@ -581,6 +589,31 @@ static int snd_usb_hw_free(struct snd_pcm_substream *substream) +@@ -581,6 +591,31 @@ static int snd_usb_hw_free(struct snd_pcm_substream *substream) return 0; } @@ -433368,7 +555940,7 @@ index 5dc9266180e37..b6cd43c5ea3e6 100644 /* * prepare callback * -@@ -614,13 +647,9 @@ static int snd_usb_pcm_prepare(struct snd_pcm_substream *substream) +@@ -614,13 +649,9 @@ static int snd_usb_pcm_prepare(struct snd_pcm_substream *substream) subs->period_elapsed_pending = 0; runtime->delay = 0; @@ -433385,7 +555957,7 @@ index 5dc9266180e37..b6cd43c5ea3e6 100644 ret = start_endpoints(subs); unlock: -@@ -648,9 +677,9 @@ static const struct snd_pcm_hardware snd_usb_hardware = +@@ -648,9 +679,9 @@ static const struct snd_pcm_hardware snd_usb_hardware = SNDRV_PCM_INFO_PAUSE, .channels_min = 1, .channels_max = 256, @@ -433397,7 +555969,7 @@ index 5dc9266180e37..b6cd43c5ea3e6 100644 .periods_min = 2, .periods_max = 1024, }; -@@ -734,6 +763,7 @@ static int hw_rule_rate(struct snd_pcm_hw_params *params, +@@ -734,6 +765,7 @@ static int hw_rule_rate(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { struct snd_usb_substream *subs = rule->private; @@ -433405,7 +555977,7 @@ index 5dc9266180e37..b6cd43c5ea3e6 100644 const struct audioformat *fp; struct snd_interval *it = hw_param_interval(params, SNDRV_PCM_HW_PARAM_RATE); unsigned int rmin, rmax, r; -@@ -745,6 +775,14 @@ static int hw_rule_rate(struct snd_pcm_hw_params *params, +@@ -745,6 +777,14 @@ static int hw_rule_rate(struct snd_pcm_hw_params *params, list_for_each_entry(fp, &subs->fmt_list, list) { if (!hw_check_valid_format(subs, params, fp)) continue; @@ -433420,7 +555992,23 @@ index 5dc9266180e37..b6cd43c5ea3e6 100644 if (fp->rate_table && fp->nr_rates) { for (i = 0; i < fp->nr_rates; i++) { r = fp->rate_table[i]; -@@ -1034,6 +1072,18 @@ static int setup_hw_info(struct snd_pcm_runtime *runtime, struct snd_usb_substre +@@ -869,8 +909,13 @@ get_sync_ep_from_substream(struct snd_usb_substream *subs) + continue; + /* for the implicit fb, check the sync ep as well */ + ep = snd_usb_get_endpoint(chip, fp->sync_ep); +- if (ep && ep->cur_audiofmt) +- return ep; ++ if (ep && ep->cur_audiofmt) { ++ /* ditto, if the sync (data) ep is used by others, ++ * this stream is restricted by the sync ep ++ */ ++ if (ep != subs->sync_endpoint || ep->opened > 1) ++ return ep; ++ } + } + return NULL; + } +@@ -1034,6 +1079,18 @@ static int setup_hw_info(struct snd_pcm_runtime *runtime, struct snd_usb_substre return err; } @@ -433439,7 +556027,7 @@ index 5dc9266180e37..b6cd43c5ea3e6 100644 /* additional hw constraints for implicit fb */ err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_FORMAT, hw_rule_format_implicit_fb, subs, -@@ -1068,6 +1118,10 @@ static int snd_usb_pcm_open(struct snd_pcm_substream *substream) +@@ -1068,6 +1125,10 @@ static int snd_usb_pcm_open(struct snd_pcm_substream *substream) int ret; runtime->hw = snd_usb_hardware; @@ -433450,7 +556038,7 @@ index 5dc9266180e37..b6cd43c5ea3e6 100644 runtime->private_data = subs; subs->pcm_substream = substream; /* runtime PM is also done there */ -@@ -1320,44 +1374,66 @@ static unsigned int copy_to_urb_quirk(struct snd_usb_substream *subs, +@@ -1320,44 +1381,66 @@ static unsigned int copy_to_urb_quirk(struct snd_usb_substream *subs, return bytes; } @@ -433530,7 +556118,7 @@ index 5dc9266180e37..b6cd43c5ea3e6 100644 } i++; if (i < ctx->packets) { -@@ -1371,13 +1447,19 @@ static void prepare_playback_urb(struct snd_usb_substream *subs, +@@ -1371,13 +1454,19 @@ static void prepare_playback_urb(struct snd_usb_substream *subs, } } /* finish at the period boundary or after enough frames */ @@ -433553,7 +556141,7 @@ index 5dc9266180e37..b6cd43c5ea3e6 100644 if (unlikely(ep->cur_format == SNDRV_PCM_FORMAT_DSD_U16_LE && subs->cur_audiofmt->dsd_dop)) { fill_playback_urb_dsd_dop(subs, urb, bytes); -@@ -1403,14 +1485,23 @@ static void prepare_playback_urb(struct snd_usb_substream *subs, +@@ -1403,14 +1492,23 @@ static void prepare_playback_urb(struct snd_usb_substream *subs, subs->trigger_tstamp_pending_update = false; } @@ -433580,7 +556168,7 @@ index 5dc9266180e37..b6cd43c5ea3e6 100644 } /* -@@ -1442,6 +1533,27 @@ static void retire_playback_urb(struct snd_usb_substream *subs, +@@ -1442,6 +1540,27 @@ static void retire_playback_urb(struct snd_usb_substream *subs, snd_pcm_period_elapsed(subs->pcm_substream); } @@ -433608,7 +556196,7 @@ index 5dc9266180e37..b6cd43c5ea3e6 100644 static int snd_usb_substream_playback_trigger(struct snd_pcm_substream *substream, int cmd) { -@@ -1457,8 +1569,10 @@ static int snd_usb_substream_playback_trigger(struct snd_pcm_substream *substrea +@@ -1457,8 +1576,10 @@ static int snd_usb_substream_playback_trigger(struct snd_pcm_substream *substrea prepare_playback_urb, retire_playback_urb, subs); @@ -433620,7 +556208,7 @@ index 5dc9266180e37..b6cd43c5ea3e6 100644 err = start_endpoints(subs); if (err < 0) { snd_usb_endpoint_set_callback(subs->data_endpoint, -@@ -1473,7 +1587,7 @@ static int snd_usb_substream_playback_trigger(struct snd_pcm_substream *substrea +@@ -1473,7 +1594,7 @@ static int snd_usb_substream_playback_trigger(struct snd_pcm_substream *substrea return 0; case SNDRV_PCM_TRIGGER_SUSPEND: case SNDRV_PCM_TRIGGER_STOP: @@ -433629,7 +556217,7 @@ index 5dc9266180e37..b6cd43c5ea3e6 100644 snd_usb_endpoint_set_callback(subs->data_endpoint, NULL, NULL, NULL); subs->running = 0; -@@ -1521,7 +1635,7 @@ static int snd_usb_substream_capture_trigger(struct snd_pcm_substream *substream +@@ -1521,7 +1642,7 @@ static int snd_usb_substream_capture_trigger(struct snd_pcm_substream *substream return 0; case SNDRV_PCM_TRIGGER_SUSPEND: case SNDRV_PCM_TRIGGER_STOP: @@ -433638,7 +556226,7 @@ index 5dc9266180e37..b6cd43c5ea3e6 100644 fallthrough; case SNDRV_PCM_TRIGGER_PAUSE_PUSH: snd_usb_endpoint_set_callback(subs->data_endpoint, -@@ -1545,6 +1659,7 @@ static const struct snd_pcm_ops snd_usb_playback_ops = { +@@ -1545,6 +1666,7 @@ static const struct snd_pcm_ops snd_usb_playback_ops = { .trigger = snd_usb_substream_playback_trigger, .sync_stop = snd_usb_pcm_sync_stop, .pointer = snd_usb_pcm_pointer, @@ -433647,10 +556235,19 @@ index 5dc9266180e37..b6cd43c5ea3e6 100644 static const struct snd_pcm_ops snd_usb_capture_ops = { diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h -index 2af8c68fac275..f93201a830b5a 100644 +index 2af8c68fac275..b2f896e863532 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h -@@ -84,7 +84,7 @@ +@@ -76,6 +76,8 @@ + { USB_DEVICE_VENDOR_SPEC(0x041e, 0x3f0a) }, + /* E-Mu 0204 USB */ + { USB_DEVICE_VENDOR_SPEC(0x041e, 0x3f19) }, ++/* Ktmicro Usb_audio device */ ++{ USB_DEVICE_VENDOR_SPEC(0x31b2, 0x0011) }, + + /* + * Creative Technology, Ltd Live! Cam Sync HD [VF0770] +@@ -84,7 +86,7 @@ * combination. */ { @@ -433659,7 +556256,18 @@ index 2af8c68fac275..f93201a830b5a 100644 .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { .ifnum = QUIRK_ANY_INTERFACE, .type = QUIRK_COMPOSITE, -@@ -2658,7 +2658,12 @@ YAMAHA_DEVICE(0x7010, "UB99"), +@@ -2049,6 +2051,10 @@ YAMAHA_DEVICE(0x7010, "UB99"), + } + } + }, ++{ ++ /* M-Audio Micro */ ++ USB_DEVICE_VENDOR_SPEC(0x0763, 0x201a), ++}, + { + USB_DEVICE_VENDOR_SPEC(0x0763, 0x2030), + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { +@@ -2658,7 +2664,12 @@ YAMAHA_DEVICE(0x7010, "UB99"), .nr_rates = 2, .rate_table = (unsigned int[]) { 44100, 48000 @@ -433673,7 +556281,7 @@ index 2af8c68fac275..f93201a830b5a 100644 } }, { -@@ -2672,6 +2677,7 @@ YAMAHA_DEVICE(0x7010, "UB99"), +@@ -2672,6 +2683,7 @@ YAMAHA_DEVICE(0x7010, "UB99"), .altset_idx = 1, .attributes = 0, .endpoint = 0x82, @@ -433681,7 +556289,7 @@ index 2af8c68fac275..f93201a830b5a 100644 .ep_attr = USB_ENDPOINT_XFER_ISOC, .datainterval = 1, .maxpacksize = 0x0126, -@@ -2875,6 +2881,7 @@ YAMAHA_DEVICE(0x7010, "UB99"), +@@ -2875,6 +2887,7 @@ YAMAHA_DEVICE(0x7010, "UB99"), .altset_idx = 1, .attributes = 0x4, .endpoint = 0x81, @@ -433689,7 +556297,7 @@ index 2af8c68fac275..f93201a830b5a 100644 .ep_attr = USB_ENDPOINT_XFER_ISOC | USB_ENDPOINT_SYNC_ASYNC, .maxpacksize = 0x130, -@@ -3235,6 +3242,15 @@ YAMAHA_DEVICE(0x7010, "UB99"), +@@ -3235,6 +3248,15 @@ YAMAHA_DEVICE(0x7010, "UB99"), } }, @@ -433705,7 +556313,7 @@ index 2af8c68fac275..f93201a830b5a 100644 /* disabled due to regression for other devices; * see https://bugzilla.kernel.org/show_bug.cgi?id=199905 */ -@@ -3382,6 +3398,7 @@ YAMAHA_DEVICE(0x7010, "UB99"), +@@ -3382,6 +3404,7 @@ YAMAHA_DEVICE(0x7010, "UB99"), .altset_idx = 1, .attributes = 0, .endpoint = 0x03, @@ -433713,7 +556321,7 @@ index 2af8c68fac275..f93201a830b5a 100644 .rates = SNDRV_PCM_RATE_96000, .ep_attr = USB_ENDPOINT_XFER_ISOC | USB_ENDPOINT_SYNC_ASYNC, -@@ -3785,6 +3802,54 @@ YAMAHA_DEVICE(0x7010, "UB99"), +@@ -3785,6 +3808,54 @@ YAMAHA_DEVICE(0x7010, "UB99"), } }, @@ -433768,7 +556376,7 @@ index 2af8c68fac275..f93201a830b5a 100644 /* * MacroSilicon MS2109 based HDMI capture cards * -@@ -3892,6 +3957,64 @@ YAMAHA_DEVICE(0x7010, "UB99"), +@@ -3892,6 +3963,64 @@ YAMAHA_DEVICE(0x7010, "UB99"), } } }, @@ -433833,7 +556441,7 @@ index 2af8c68fac275..f93201a830b5a 100644 { /* * Pioneer DJ DJM-850 -@@ -4044,6 +4167,206 @@ YAMAHA_DEVICE(0x7010, "UB99"), +@@ -4044,6 +4173,206 @@ YAMAHA_DEVICE(0x7010, "UB99"), } } }, @@ -434041,7 +556649,7 @@ index 2af8c68fac275..f93201a830b5a 100644 #undef USB_DEVICE_VENDOR_SPEC #undef USB_AUDIO_DEVICE diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c -index 8929d9abe8aa8..8c3b0be909eb0 100644 +index 8929d9abe8aa8..2ae9ad993ff47 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1290,7 +1290,7 @@ int snd_usb_apply_interface_quirk(struct snd_usb_audio *chip, @@ -434061,7 +556669,15 @@ index 8929d9abe8aa8..8c3b0be909eb0 100644 case USB_ID(0x534d, 0x2109): /* MacroSilicon MS2109 */ subs->stream_offset_adj = 2; break; -@@ -1727,47 +1728,6 @@ void snd_usb_audioformat_attributes_quirk(struct snd_usb_audio *chip, +@@ -1610,6 +1611,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, + /* XMOS based USB DACs */ + switch (chip->usb_id) { + case USB_ID(0x1511, 0x0037): /* AURALiC VEGA */ ++ case USB_ID(0x21ed, 0xd75a): /* Accuphase DAC-60 option card */ + case USB_ID(0x2522, 0x0012): /* LH Labs VI DAC Infinity */ + case USB_ID(0x2772, 0x0230): /* Pro-Ject Pre Box S2 Digital */ + if (fp->altsetting == 2) +@@ -1727,47 +1729,6 @@ void snd_usb_audioformat_attributes_quirk(struct snd_usb_audio *chip, } } @@ -434109,7 +556725,7 @@ index 8929d9abe8aa8..8c3b0be909eb0 100644 /* * driver behavior quirk flags */ -@@ -1792,6 +1752,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { +@@ -1792,6 +1753,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_CTL_MSG_DELAY_1M | QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x046d, 0x09a4, /* Logitech QuickCam E 3500 */ QUIRK_FLAG_CTL_MSG_DELAY_1M | QUIRK_FLAG_IGNORE_CTL_ERROR), @@ -434118,7 +556734,7 @@ index 8929d9abe8aa8..8c3b0be909eb0 100644 DEVICE_FLG(0x04d8, 0xfeea, /* Benchmark DAC1 Pre */ QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x04e8, 0xa051, /* Samsung USBC Headset (AKG) */ -@@ -1821,8 +1783,14 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { +@@ -1821,8 +1784,14 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x06f8, 0xd002, /* Hercules DJ Console (Macintosh Edition) */ QUIRK_FLAG_IGNORE_CTL_ERROR), @@ -434133,7 +556749,7 @@ index 8929d9abe8aa8..8c3b0be909eb0 100644 DEVICE_FLG(0x08bb, 0x2702, /* LineX FM Transmitter */ QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x0951, 0x16ad, /* Kingston HyperX */ -@@ -1833,6 +1801,12 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { +@@ -1833,6 +1802,12 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER), DEVICE_FLG(0x1395, 0x740a, /* Sennheiser DECT */ QUIRK_FLAG_GET_SAMPLE_RATE), @@ -434146,7 +556762,7 @@ index 8929d9abe8aa8..8c3b0be909eb0 100644 DEVICE_FLG(0x13e5, 0x0001, /* Serato Phono */ QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x154e, 0x1002, /* Denon DCD-1500RE */ -@@ -1887,16 +1861,28 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { +@@ -1887,16 +1862,30 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER), DEVICE_FLG(0x21b4, 0x0081, /* AudioQuest DragonFly */ QUIRK_FLAG_GET_SAMPLE_RATE), @@ -434172,6 +556788,8 @@ index 8929d9abe8aa8..8c3b0be909eb0 100644 + QUIRK_FLAG_GENERIC_IMPLICIT_FB), + DEVICE_FLG(0x2b53, 0x0031, /* Fiero SC-01 (firmware v1.1.0) */ + QUIRK_FLAG_GENERIC_IMPLICIT_FB), ++ DEVICE_FLG(0x0525, 0xa4ad, /* Hamedal C20 usb camero */ ++ QUIRK_FLAG_IFACE_SKIP_CLOSE), /* Vendor matches */ VENDOR_FLG(0x045e, /* MS Lifecam */ @@ -434223,7 +556841,7 @@ index ceb93d798182c..f10f4e6d3fb85 100644 for (i = 0; i < num; i++) { diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h -index 167834133b9bc..39c3c61a7e491 100644 +index 167834133b9bc..ec06f441e890f 100644 --- a/sound/usb/usbaudio.h +++ b/sound/usb/usbaudio.h @@ -8,7 +8,7 @@ @@ -434243,7 +556861,7 @@ index 167834133b9bc..39c3c61a7e491 100644 int num_suspended_intf; int sample_rate_read_error; -@@ -164,6 +165,10 @@ extern bool snd_usb_skip_validation; +@@ -164,6 +165,12 @@ extern bool snd_usb_skip_validation; * Support generic DSD raw U32_BE format * QUIRK_FLAG_SET_IFACE_FIRST: * Set up the interface at first like UAC1 @@ -434251,15 +556869,18 @@ index 167834133b9bc..39c3c61a7e491 100644 + * Apply the generic implicit feedback sync mode (same as implicit_fb=1 option) + * QUIRK_FLAG_SKIP_IMPLICIT_FB + * Don't apply implicit feedback sync mode ++ * QUIRK_FLAG_IFACE_SKIP_CLOSE ++ * Don't closed interface during setting sample rate */ #define QUIRK_FLAG_GET_SAMPLE_RATE (1U << 0) -@@ -183,5 +188,7 @@ extern bool snd_usb_skip_validation; +@@ -183,5 +190,8 @@ extern bool snd_usb_skip_validation; #define QUIRK_FLAG_IGNORE_CTL_ERROR (1U << 14) #define QUIRK_FLAG_DSD_RAW (1U << 15) #define QUIRK_FLAG_SET_IFACE_FIRST (1U << 16) +#define QUIRK_FLAG_GENERIC_IMPLICIT_FB (1U << 17) +#define QUIRK_FLAG_SKIP_IMPLICIT_FB (1U << 18) ++#define QUIRK_FLAG_IFACE_SKIP_CLOSE (1U << 19) #endif /* __USBAUDIO_H */ diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c @@ -434296,6 +556917,37 @@ index 378826312abe6..42add5df37fda 100644 static const struct dev_pm_ops hdmi_lpe_audio_pm = { SET_SYSTEM_SLEEP_PM_OPS(hdmi_lpe_audio_suspend, hdmi_lpe_audio_resume) }; +diff --git a/tools/arch/parisc/include/uapi/asm/mman.h b/tools/arch/parisc/include/uapi/asm/mman.h +index 506c06a6536fb..4cc88a642e106 100644 +--- a/tools/arch/parisc/include/uapi/asm/mman.h ++++ b/tools/arch/parisc/include/uapi/asm/mman.h +@@ -1,20 +1,20 @@ + /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + #ifndef TOOLS_ARCH_PARISC_UAPI_ASM_MMAN_FIX_H + #define TOOLS_ARCH_PARISC_UAPI_ASM_MMAN_FIX_H +-#define MADV_DODUMP 70 ++#define MADV_DODUMP 17 + #define MADV_DOFORK 11 +-#define MADV_DONTDUMP 69 ++#define MADV_DONTDUMP 16 + #define MADV_DONTFORK 10 + #define MADV_DONTNEED 4 + #define MADV_FREE 8 +-#define MADV_HUGEPAGE 67 +-#define MADV_MERGEABLE 65 +-#define MADV_NOHUGEPAGE 68 ++#define MADV_HUGEPAGE 14 ++#define MADV_MERGEABLE 12 ++#define MADV_NOHUGEPAGE 15 + #define MADV_NORMAL 0 + #define MADV_RANDOM 1 + #define MADV_REMOVE 9 + #define MADV_SEQUENTIAL 2 +-#define MADV_UNMERGEABLE 66 ++#define MADV_UNMERGEABLE 13 + #define MADV_WILLNEED 3 + #define MAP_ANONYMOUS 0x10 + #define MAP_DENYWRITE 0x0800 diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index d0ce5cfd3ac14..bcaedfe60572f 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h @@ -434381,7 +557033,7 @@ index 8f28fafa98b32..834a3b6d81e12 100644 #define DISABLED_MASK13 0 #define DISABLED_MASK14 0 diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h -index a7c413432b33d..8f38265bc81dc 100644 +index a7c413432b33d..2c0838ee3eaca 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -51,6 +51,8 @@ @@ -434456,7 +557108,19 @@ index a7c413432b33d..8f38265bc81dc 100644 #define MSR_IA32_SYSENTER_CS 0x00000174 #define MSR_IA32_SYSENTER_ESP 0x00000175 -@@ -489,6 +528,9 @@ +@@ -456,6 +495,11 @@ + #define MSR_AMD64_CPUID_FN_1 0xc0011004 + #define MSR_AMD64_LS_CFG 0xc0011020 + #define MSR_AMD64_DC_CFG 0xc0011022 ++ ++#define MSR_AMD64_DE_CFG 0xc0011029 ++#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT 1 ++#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE BIT_ULL(MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT) ++ + #define MSR_AMD64_BU_CFG2 0xc001102a + #define MSR_AMD64_IBSFETCHCTL 0xc0011030 + #define MSR_AMD64_IBSFETCHLINAD 0xc0011031 +@@ -489,6 +533,9 @@ /* Fam 17h MSRs */ #define MSR_F17H_IRPERF 0xc00000e9 @@ -434466,6 +557130,16 @@ index a7c413432b33d..8f38265bc81dc 100644 /* Fam 16h MSRs */ #define MSR_F16H_L2I_PERF_CTL 0xc0010230 #define MSR_F16H_L2I_PERF_CTR 0xc0010231 +@@ -530,9 +577,6 @@ + #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL + #define FAM10H_MMIO_CONF_BASE_SHIFT 20 + #define MSR_FAM10H_NODE_ID 0xc001100c +-#define MSR_F10H_DECFG 0xc0011029 +-#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 +-#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT) + + /* K8 MSRs */ + #define MSR_K8_TOP_MEM1 0xc001001a diff --git a/tools/arch/x86/lib/insn.c b/tools/arch/x86/lib/insn.c index 797699462cd8e..8fd63a067308a 100644 --- a/tools/arch/x86/lib/insn.c @@ -434729,6 +557403,20 @@ index 9c25286a5c737..70fb26a3dfa8d 100644 break; default: /* shouldn't happen */ +diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c +index d42d930a3ec4d..e4c65d34fe74f 100644 +--- a/tools/bpf/bpftool/common.c ++++ b/tools/bpf/bpftool/common.c +@@ -278,6 +278,9 @@ int do_pin_any(int argc, char **argv, int (*get_fd)(int *, char ***)) + int err; + int fd; + ++ if (!REQ_ARGS(3)) ++ return -EINVAL; ++ + fd = get_fd(&argc, &argv); + if (fd < 0) + return fd; diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index d40d92bbf0e48..07fa502a4ac15 100644 --- a/tools/bpf/bpftool/gen.c @@ -435092,6 +557780,52 @@ index 47714b942d4d3..0000000000000 -{ - return 0; -} +diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c +index a2b233fdb572e..6670199909822 100644 +--- a/tools/gpio/gpio-event-mon.c ++++ b/tools/gpio/gpio-event-mon.c +@@ -86,6 +86,7 @@ int monitor_device(const char *device_name, + gpiotools_test_bit(values.bits, i)); + } + ++ i = 0; + while (1) { + struct gpio_v2_line_event event; + +diff --git a/tools/iio/iio_generic_buffer.c b/tools/iio/iio_generic_buffer.c +index 2491c54a5e4fb..f8deae4e26a15 100644 +--- a/tools/iio/iio_generic_buffer.c ++++ b/tools/iio/iio_generic_buffer.c +@@ -715,12 +715,12 @@ int main(int argc, char **argv) + continue; + } + +- toread = buf_len; + } else { + usleep(timedelay); +- toread = 64; + } + ++ toread = buf_len; ++ + read_size = read(buf_fd, data, toread * scan_size); + if (read_size < 0) { + if (errno == EAGAIN) { +diff --git a/tools/iio/iio_utils.c b/tools/iio/iio_utils.c +index aadee6d34c74c..8d35893b2fa85 100644 +--- a/tools/iio/iio_utils.c ++++ b/tools/iio/iio_utils.c +@@ -547,6 +547,10 @@ static int calc_digits(int num) + { + int count = 0; + ++ /* It takes a digit to represent zero */ ++ if (!num) ++ return 1; ++ + while (num != 0) { + num /= 10; + count++; diff --git a/tools/include/asm-generic/unaligned.h b/tools/include/asm-generic/unaligned.h new file mode 100644 index 0000000000000..47387c607035e @@ -435121,6 +557855,40 @@ index 0000000000000..47387c607035e + +#endif /* __TOOLS_LINUX_ASM_GENERIC_UNALIGNED_H */ + +diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h +index a7e54a08fb54c..5a79572f8b2d7 100644 +--- a/tools/include/linux/kernel.h ++++ b/tools/include/linux/kernel.h +@@ -14,6 +14,8 @@ + #define UINT_MAX (~0U) + #endif + ++#define _RET_IP_ ((unsigned long)__builtin_return_address(0)) ++ + #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) + + #define PERF_ALIGN(x, a) __PERF_ALIGN_MASK(x, (typeof(x))(a)-1) +@@ -52,6 +54,10 @@ + _min1 < _min2 ? _min1 : _min2; }) + #endif + ++#define max_t(type, x, y) max((type)x, (type)y) ++#define min_t(type, x, y) min((type)x, (type)y) ++#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi) ++ + #ifndef roundup + #define roundup(x, y) ( \ + { \ +@@ -102,7 +108,9 @@ int vscnprintf(char *buf, size_t size, const char *fmt, va_list args); + int scnprintf(char * buf, size_t size, const char * fmt, ...); + int scnprintf_pad(char * buf, size_t size, const char * fmt, ...); + ++#ifndef ARRAY_SIZE + #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) ++#endif + + /* + * This looks more complex than it should be. But we need to diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h index 7e72d975cb761..a2042c4186864 100644 --- a/tools/include/linux/objtool.h @@ -435160,102 +557928,2866 @@ index 7e72d975cb761..a2042c4186864 100644 .endm .macro STACK_FRAME_NON_STANDARD func:req .endm -diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h -index 3430667b0d241..3e2c6f2ed587f 100644 ---- a/tools/include/nolibc/nolibc.h -+++ b/tools/include/nolibc/nolibc.h -@@ -399,16 +399,22 @@ struct stat { - }) - - /* startup code */ +diff --git a/tools/include/nolibc/arch-aarch64.h b/tools/include/nolibc/arch-aarch64.h +new file mode 100644 +index 0000000000000..2dbd80d633cbb +--- /dev/null ++++ b/tools/include/nolibc/arch-aarch64.h +@@ -0,0 +1,199 @@ ++/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* -+ * x86-64 System V ABI mandates: -+ * 1) %rsp must be 16-byte aligned right before the function call. -+ * 2) The deepest stack frame should be zero (the %rbp). ++ * AARCH64 specific definitions for NOLIBC ++ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> ++ */ ++ ++#ifndef _NOLIBC_ARCH_AARCH64_H ++#define _NOLIBC_ARCH_AARCH64_H ++ ++/* O_* macros for fcntl/open are architecture-specific */ ++#define O_RDONLY 0 ++#define O_WRONLY 1 ++#define O_RDWR 2 ++#define O_CREAT 0x40 ++#define O_EXCL 0x80 ++#define O_NOCTTY 0x100 ++#define O_TRUNC 0x200 ++#define O_APPEND 0x400 ++#define O_NONBLOCK 0x800 ++#define O_DIRECTORY 0x4000 ++ ++/* The struct returned by the newfstatat() syscall. Differs slightly from the ++ * x86_64's stat one by field ordering, so be careful. ++ */ ++struct sys_stat_struct { ++ unsigned long st_dev; ++ unsigned long st_ino; ++ unsigned int st_mode; ++ unsigned int st_nlink; ++ unsigned int st_uid; ++ unsigned int st_gid; ++ ++ unsigned long st_rdev; ++ unsigned long __pad1; ++ long st_size; ++ int st_blksize; ++ int __pad2; ++ ++ long st_blocks; ++ long st_atime; ++ unsigned long st_atime_nsec; ++ long st_mtime; ++ ++ unsigned long st_mtime_nsec; ++ long st_ctime; ++ unsigned long st_ctime_nsec; ++ unsigned int __unused[2]; ++}; ++ ++/* Syscalls for AARCH64 : ++ * - registers are 64-bit ++ * - stack is 16-byte aligned ++ * - syscall number is passed in x8 ++ * - arguments are in x0, x1, x2, x3, x4, x5 ++ * - the system call is performed by calling svc 0 ++ * - syscall return comes in x0. ++ * - the arguments are cast to long and assigned into the target registers ++ * which are then simply passed as registers to the asm code, so that we ++ * don't have to experience issues with register constraints. + * ++ * On aarch64, select() is not implemented so we have to use pselect6(). + */ - asm(".section .text\n" - ".global _start\n" - "_start:\n" - "pop %rdi\n" // argc (first arg, %rdi) - "mov %rsp, %rsi\n" // argv[] (second arg, %rsi) - "lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx) -- "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned when -- "sub $8, %rsp\n" // entering the callee -+ "xor %ebp, %ebp\n" // zero the stack frame -+ "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned before call - "call main\n" // main() returns the status code, we'll exit with it. -- "movzb %al, %rdi\n" // retrieve exit code from 8 lower bits -+ "mov %eax, %edi\n" // retrieve exit code (32 bit) - "mov $60, %rax\n" // NR_exit == 60 - "syscall\n" // really exit - "hlt\n" // ensure it does not return -@@ -577,20 +583,28 @@ struct sys_stat_struct { - }) - - /* startup code */ ++#define __ARCH_WANT_SYS_PSELECT6 ++ ++#define my_syscall0(num) \ ++({ \ ++ register long _num asm("x8") = (num); \ ++ register long _arg1 asm("x0"); \ ++ \ ++ asm volatile ( \ ++ "svc #0\n" \ ++ : "=r"(_arg1) \ ++ : "r"(_num) \ ++ : "memory", "cc" \ ++ ); \ ++ _arg1; \ ++}) ++ ++#define my_syscall1(num, arg1) \ ++({ \ ++ register long _num asm("x8") = (num); \ ++ register long _arg1 asm("x0") = (long)(arg1); \ ++ \ ++ asm volatile ( \ ++ "svc #0\n" \ ++ : "=r"(_arg1) \ ++ : "r"(_arg1), \ ++ "r"(_num) \ ++ : "memory", "cc" \ ++ ); \ ++ _arg1; \ ++}) ++ ++#define my_syscall2(num, arg1, arg2) \ ++({ \ ++ register long _num asm("x8") = (num); \ ++ register long _arg1 asm("x0") = (long)(arg1); \ ++ register long _arg2 asm("x1") = (long)(arg2); \ ++ \ ++ asm volatile ( \ ++ "svc #0\n" \ ++ : "=r"(_arg1) \ ++ : "r"(_arg1), "r"(_arg2), \ ++ "r"(_num) \ ++ : "memory", "cc" \ ++ ); \ ++ _arg1; \ ++}) ++ ++#define my_syscall3(num, arg1, arg2, arg3) \ ++({ \ ++ register long _num asm("x8") = (num); \ ++ register long _arg1 asm("x0") = (long)(arg1); \ ++ register long _arg2 asm("x1") = (long)(arg2); \ ++ register long _arg3 asm("x2") = (long)(arg3); \ ++ \ ++ asm volatile ( \ ++ "svc #0\n" \ ++ : "=r"(_arg1) \ ++ : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ ++ "r"(_num) \ ++ : "memory", "cc" \ ++ ); \ ++ _arg1; \ ++}) ++ ++#define my_syscall4(num, arg1, arg2, arg3, arg4) \ ++({ \ ++ register long _num asm("x8") = (num); \ ++ register long _arg1 asm("x0") = (long)(arg1); \ ++ register long _arg2 asm("x1") = (long)(arg2); \ ++ register long _arg3 asm("x2") = (long)(arg3); \ ++ register long _arg4 asm("x3") = (long)(arg4); \ ++ \ ++ asm volatile ( \ ++ "svc #0\n" \ ++ : "=r"(_arg1) \ ++ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ ++ "r"(_num) \ ++ : "memory", "cc" \ ++ ); \ ++ _arg1; \ ++}) ++ ++#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ ++({ \ ++ register long _num asm("x8") = (num); \ ++ register long _arg1 asm("x0") = (long)(arg1); \ ++ register long _arg2 asm("x1") = (long)(arg2); \ ++ register long _arg3 asm("x2") = (long)(arg3); \ ++ register long _arg4 asm("x3") = (long)(arg4); \ ++ register long _arg5 asm("x4") = (long)(arg5); \ ++ \ ++ asm volatile ( \ ++ "svc #0\n" \ ++ : "=r" (_arg1) \ ++ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ ++ "r"(_num) \ ++ : "memory", "cc" \ ++ ); \ ++ _arg1; \ ++}) ++ ++#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ ++({ \ ++ register long _num asm("x8") = (num); \ ++ register long _arg1 asm("x0") = (long)(arg1); \ ++ register long _arg2 asm("x1") = (long)(arg2); \ ++ register long _arg3 asm("x2") = (long)(arg3); \ ++ register long _arg4 asm("x3") = (long)(arg4); \ ++ register long _arg5 asm("x4") = (long)(arg5); \ ++ register long _arg6 asm("x5") = (long)(arg6); \ ++ \ ++ asm volatile ( \ ++ "svc #0\n" \ ++ : "=r" (_arg1) \ ++ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ ++ "r"(_arg6), "r"(_num) \ ++ : "memory", "cc" \ ++ ); \ ++ _arg1; \ ++}) ++ ++/* startup code */ ++asm(".section .text\n" ++ ".weak _start\n" ++ "_start:\n" ++ "ldr x0, [sp]\n" // argc (x0) was in the stack ++ "add x1, sp, 8\n" // argv (x1) = sp ++ "lsl x2, x0, 3\n" // envp (x2) = 8*argc ... ++ "add x2, x2, 8\n" // + 8 (skip null) ++ "add x2, x2, x1\n" // + argv ++ "and sp, x1, -16\n" // sp must be 16-byte aligned in the callee ++ "bl main\n" // main() returns the status code, we'll exit with it. ++ "mov x8, 93\n" // NR_exit == 93 ++ "svc #0\n" ++ ""); ++ ++#endif // _NOLIBC_ARCH_AARCH64_H +diff --git a/tools/include/nolibc/arch-arm.h b/tools/include/nolibc/arch-arm.h +new file mode 100644 +index 0000000000000..1191395b5acd9 +--- /dev/null ++++ b/tools/include/nolibc/arch-arm.h +@@ -0,0 +1,204 @@ ++/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ ++/* ++ * ARM specific definitions for NOLIBC ++ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> ++ */ ++ ++#ifndef _NOLIBC_ARCH_ARM_H ++#define _NOLIBC_ARCH_ARM_H ++ ++/* O_* macros for fcntl/open are architecture-specific */ ++#define O_RDONLY 0 ++#define O_WRONLY 1 ++#define O_RDWR 2 ++#define O_CREAT 0x40 ++#define O_EXCL 0x80 ++#define O_NOCTTY 0x100 ++#define O_TRUNC 0x200 ++#define O_APPEND 0x400 ++#define O_NONBLOCK 0x800 ++#define O_DIRECTORY 0x4000 ++ ++/* The struct returned by the stat() syscall, 32-bit only, the syscall returns ++ * exactly 56 bytes (stops before the unused array). In big endian, the format ++ * differs as devices are returned as short only. ++ */ ++struct sys_stat_struct { ++#if defined(__ARMEB__) ++ unsigned short st_dev; ++ unsigned short __pad1; ++#else ++ unsigned long st_dev; ++#endif ++ unsigned long st_ino; ++ unsigned short st_mode; ++ unsigned short st_nlink; ++ unsigned short st_uid; ++ unsigned short st_gid; ++ ++#if defined(__ARMEB__) ++ unsigned short st_rdev; ++ unsigned short __pad2; ++#else ++ unsigned long st_rdev; ++#endif ++ unsigned long st_size; ++ unsigned long st_blksize; ++ unsigned long st_blocks; ++ ++ unsigned long st_atime; ++ unsigned long st_atime_nsec; ++ unsigned long st_mtime; ++ unsigned long st_mtime_nsec; ++ ++ unsigned long st_ctime; ++ unsigned long st_ctime_nsec; ++ unsigned long __unused[2]; ++}; ++ ++/* Syscalls for ARM in ARM or Thumb modes : ++ * - registers are 32-bit ++ * - stack is 8-byte aligned ++ * ( http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka4127.html) ++ * - syscall number is passed in r7 ++ * - arguments are in r0, r1, r2, r3, r4, r5 ++ * - the system call is performed by calling svc #0 ++ * - syscall return comes in r0. ++ * - only lr is clobbered. ++ * - the arguments are cast to long and assigned into the target registers ++ * which are then simply passed as registers to the asm code, so that we ++ * don't have to experience issues with register constraints. ++ * - the syscall number is always specified last in order to allow to force ++ * some registers before (gcc refuses a %-register at the last position). ++ * ++ * Also, ARM supports the old_select syscall if newselect is not available ++ */ ++#define __ARCH_WANT_SYS_OLD_SELECT ++ ++#define my_syscall0(num) \ ++({ \ ++ register long _num asm("r7") = (num); \ ++ register long _arg1 asm("r0"); \ ++ \ ++ asm volatile ( \ ++ "svc #0\n" \ ++ : "=r"(_arg1) \ ++ : "r"(_num) \ ++ : "memory", "cc", "lr" \ ++ ); \ ++ _arg1; \ ++}) ++ ++#define my_syscall1(num, arg1) \ ++({ \ ++ register long _num asm("r7") = (num); \ ++ register long _arg1 asm("r0") = (long)(arg1); \ ++ \ ++ asm volatile ( \ ++ "svc #0\n" \ ++ : "=r"(_arg1) \ ++ : "r"(_arg1), \ ++ "r"(_num) \ ++ : "memory", "cc", "lr" \ ++ ); \ ++ _arg1; \ ++}) ++ ++#define my_syscall2(num, arg1, arg2) \ ++({ \ ++ register long _num asm("r7") = (num); \ ++ register long _arg1 asm("r0") = (long)(arg1); \ ++ register long _arg2 asm("r1") = (long)(arg2); \ ++ \ ++ asm volatile ( \ ++ "svc #0\n" \ ++ : "=r"(_arg1) \ ++ : "r"(_arg1), "r"(_arg2), \ ++ "r"(_num) \ ++ : "memory", "cc", "lr" \ ++ ); \ ++ _arg1; \ ++}) ++ ++#define my_syscall3(num, arg1, arg2, arg3) \ ++({ \ ++ register long _num asm("r7") = (num); \ ++ register long _arg1 asm("r0") = (long)(arg1); \ ++ register long _arg2 asm("r1") = (long)(arg2); \ ++ register long _arg3 asm("r2") = (long)(arg3); \ ++ \ ++ asm volatile ( \ ++ "svc #0\n" \ ++ : "=r"(_arg1) \ ++ : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ ++ "r"(_num) \ ++ : "memory", "cc", "lr" \ ++ ); \ ++ _arg1; \ ++}) ++ ++#define my_syscall4(num, arg1, arg2, arg3, arg4) \ ++({ \ ++ register long _num asm("r7") = (num); \ ++ register long _arg1 asm("r0") = (long)(arg1); \ ++ register long _arg2 asm("r1") = (long)(arg2); \ ++ register long _arg3 asm("r2") = (long)(arg3); \ ++ register long _arg4 asm("r3") = (long)(arg4); \ ++ \ ++ asm volatile ( \ ++ "svc #0\n" \ ++ : "=r"(_arg1) \ ++ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ ++ "r"(_num) \ ++ : "memory", "cc", "lr" \ ++ ); \ ++ _arg1; \ ++}) ++ ++#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ ++({ \ ++ register long _num asm("r7") = (num); \ ++ register long _arg1 asm("r0") = (long)(arg1); \ ++ register long _arg2 asm("r1") = (long)(arg2); \ ++ register long _arg3 asm("r2") = (long)(arg3); \ ++ register long _arg4 asm("r3") = (long)(arg4); \ ++ register long _arg5 asm("r4") = (long)(arg5); \ ++ \ ++ asm volatile ( \ ++ "svc #0\n" \ ++ : "=r" (_arg1) \ ++ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ ++ "r"(_num) \ ++ : "memory", "cc", "lr" \ ++ ); \ ++ _arg1; \ ++}) ++ ++/* startup code */ ++asm(".section .text\n" ++ ".weak _start\n" ++ "_start:\n" ++#if defined(__THUMBEB__) || defined(__THUMBEL__) ++ /* We enter here in 32-bit mode but if some previous functions were in ++ * 16-bit mode, the assembler cannot know, so we need to tell it we're in ++ * 32-bit now, then switch to 16-bit (is there a better way to do it than ++ * adding 1 by hand ?) and tell the asm we're now in 16-bit mode so that ++ * it generates correct instructions. Note that we do not support thumb1. ++ */ ++ ".code 32\n" ++ "add r0, pc, #1\n" ++ "bx r0\n" ++ ".code 16\n" ++#endif ++ "pop {%r0}\n" // argc was in the stack ++ "mov %r1, %sp\n" // argv = sp ++ "add %r2, %r1, %r0, lsl #2\n" // envp = argv + 4*argc ... ++ "add %r2, %r2, $4\n" // ... + 4 ++ "and %r3, %r1, $-8\n" // AAPCS : sp must be 8-byte aligned in the ++ "mov %sp, %r3\n" // callee, an bl doesn't push (lr=pc) ++ "bl main\n" // main() returns the status code, we'll exit with it. ++ "movs r7, $1\n" // NR_exit == 1 ++ "svc $0x00\n" ++ ""); ++ ++#endif // _NOLIBC_ARCH_ARM_H +diff --git a/tools/include/nolibc/arch-i386.h b/tools/include/nolibc/arch-i386.h +new file mode 100644 +index 0000000000000..125a691fc631e +--- /dev/null ++++ b/tools/include/nolibc/arch-i386.h +@@ -0,0 +1,196 @@ ++/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ ++/* ++ * i386 specific definitions for NOLIBC ++ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> ++ */ ++ ++#ifndef _NOLIBC_ARCH_I386_H ++#define _NOLIBC_ARCH_I386_H ++ ++/* O_* macros for fcntl/open are architecture-specific */ ++#define O_RDONLY 0 ++#define O_WRONLY 1 ++#define O_RDWR 2 ++#define O_CREAT 0x40 ++#define O_EXCL 0x80 ++#define O_NOCTTY 0x100 ++#define O_TRUNC 0x200 ++#define O_APPEND 0x400 ++#define O_NONBLOCK 0x800 ++#define O_DIRECTORY 0x10000 ++ ++/* The struct returned by the stat() syscall, 32-bit only, the syscall returns ++ * exactly 56 bytes (stops before the unused array). ++ */ ++struct sys_stat_struct { ++ unsigned long st_dev; ++ unsigned long st_ino; ++ unsigned short st_mode; ++ unsigned short st_nlink; ++ unsigned short st_uid; ++ unsigned short st_gid; ++ ++ unsigned long st_rdev; ++ unsigned long st_size; ++ unsigned long st_blksize; ++ unsigned long st_blocks; ++ ++ unsigned long st_atime; ++ unsigned long st_atime_nsec; ++ unsigned long st_mtime; ++ unsigned long st_mtime_nsec; ++ ++ unsigned long st_ctime; ++ unsigned long st_ctime_nsec; ++ unsigned long __unused[2]; ++}; ++ ++/* Syscalls for i386 : ++ * - mostly similar to x86_64 ++ * - registers are 32-bit ++ * - syscall number is passed in eax ++ * - arguments are in ebx, ecx, edx, esi, edi, ebp respectively ++ * - all registers are preserved (except eax of course) ++ * - the system call is performed by calling int $0x80 ++ * - syscall return comes in eax ++ * - the arguments are cast to long and assigned into the target registers ++ * which are then simply passed as registers to the asm code, so that we ++ * don't have to experience issues with register constraints. ++ * - the syscall number is always specified last in order to allow to force ++ * some registers before (gcc refuses a %-register at the last position). ++ * ++ * Also, i386 supports the old_select syscall if newselect is not available ++ */ ++#define __ARCH_WANT_SYS_OLD_SELECT ++ ++#define my_syscall0(num) \ ++({ \ ++ long _ret; \ ++ register long _num asm("eax") = (num); \ ++ \ ++ asm volatile ( \ ++ "int $0x80\n" \ ++ : "=a" (_ret) \ ++ : "0"(_num) \ ++ : "memory", "cc" \ ++ ); \ ++ _ret; \ ++}) ++ ++#define my_syscall1(num, arg1) \ ++({ \ ++ long _ret; \ ++ register long _num asm("eax") = (num); \ ++ register long _arg1 asm("ebx") = (long)(arg1); \ ++ \ ++ asm volatile ( \ ++ "int $0x80\n" \ ++ : "=a" (_ret) \ ++ : "r"(_arg1), \ ++ "0"(_num) \ ++ : "memory", "cc" \ ++ ); \ ++ _ret; \ ++}) ++ ++#define my_syscall2(num, arg1, arg2) \ ++({ \ ++ long _ret; \ ++ register long _num asm("eax") = (num); \ ++ register long _arg1 asm("ebx") = (long)(arg1); \ ++ register long _arg2 asm("ecx") = (long)(arg2); \ ++ \ ++ asm volatile ( \ ++ "int $0x80\n" \ ++ : "=a" (_ret) \ ++ : "r"(_arg1), "r"(_arg2), \ ++ "0"(_num) \ ++ : "memory", "cc" \ ++ ); \ ++ _ret; \ ++}) ++ ++#define my_syscall3(num, arg1, arg2, arg3) \ ++({ \ ++ long _ret; \ ++ register long _num asm("eax") = (num); \ ++ register long _arg1 asm("ebx") = (long)(arg1); \ ++ register long _arg2 asm("ecx") = (long)(arg2); \ ++ register long _arg3 asm("edx") = (long)(arg3); \ ++ \ ++ asm volatile ( \ ++ "int $0x80\n" \ ++ : "=a" (_ret) \ ++ : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ ++ "0"(_num) \ ++ : "memory", "cc" \ ++ ); \ ++ _ret; \ ++}) ++ ++#define my_syscall4(num, arg1, arg2, arg3, arg4) \ ++({ \ ++ long _ret; \ ++ register long _num asm("eax") = (num); \ ++ register long _arg1 asm("ebx") = (long)(arg1); \ ++ register long _arg2 asm("ecx") = (long)(arg2); \ ++ register long _arg3 asm("edx") = (long)(arg3); \ ++ register long _arg4 asm("esi") = (long)(arg4); \ ++ \ ++ asm volatile ( \ ++ "int $0x80\n" \ ++ : "=a" (_ret) \ ++ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ ++ "0"(_num) \ ++ : "memory", "cc" \ ++ ); \ ++ _ret; \ ++}) ++ ++#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ ++({ \ ++ long _ret; \ ++ register long _num asm("eax") = (num); \ ++ register long _arg1 asm("ebx") = (long)(arg1); \ ++ register long _arg2 asm("ecx") = (long)(arg2); \ ++ register long _arg3 asm("edx") = (long)(arg3); \ ++ register long _arg4 asm("esi") = (long)(arg4); \ ++ register long _arg5 asm("edi") = (long)(arg5); \ ++ \ ++ asm volatile ( \ ++ "int $0x80\n" \ ++ : "=a" (_ret) \ ++ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ ++ "0"(_num) \ ++ : "memory", "cc" \ ++ ); \ ++ _ret; \ ++}) ++ ++/* startup code */ +/* + * i386 System V ABI mandates: + * 1) last pushed argument must be 16-byte aligned. + * 2) The deepest stack frame should be set to zero + * + */ - asm(".section .text\n" - ".global _start\n" - "_start:\n" - "pop %eax\n" // argc (first arg, %eax) - "mov %esp, %ebx\n" // argv[] (second arg, %ebx) - "lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx) -- "and $-16, %esp\n" // x86 ABI : esp must be 16-byte aligned when ++asm(".section .text\n" ++ ".weak _start\n" ++ "_start:\n" ++ "pop %eax\n" // argc (first arg, %eax) ++ "mov %esp, %ebx\n" // argv[] (second arg, %ebx) ++ "lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx) + "xor %ebp, %ebp\n" // zero the stack frame + "and $-16, %esp\n" // x86 ABI : esp must be 16-byte aligned before + "sub $4, %esp\n" // the call instruction (args are aligned) - "push %ecx\n" // push all registers on the stack so that we - "push %ebx\n" // support both regparm and plain stack modes - "push %eax\n" - "call main\n" // main() returns the status code in %eax -- "movzbl %al, %ebx\n" // retrieve exit code from lower 8 bits -- "movl $1, %eax\n" // NR_exit == 1 -- "int $0x80\n" // exit now ++ "push %ecx\n" // push all registers on the stack so that we ++ "push %ebx\n" // support both regparm and plain stack modes ++ "push %eax\n" ++ "call main\n" // main() returns the status code in %eax + "mov %eax, %ebx\n" // retrieve exit code (32-bit int) + "movl $1, %eax\n" // NR_exit == 1 + "int $0x80\n" // exit now - "hlt\n" // ensure it does not - ""); ++ "hlt\n" // ensure it does not ++ ""); ++ ++#endif // _NOLIBC_ARCH_I386_H +diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h +new file mode 100644 +index 0000000000000..5d647afa42e68 +--- /dev/null ++++ b/tools/include/nolibc/arch-mips.h +@@ -0,0 +1,217 @@ ++/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ ++/* ++ * MIPS specific definitions for NOLIBC ++ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> ++ */ ++ ++#ifndef _NOLIBC_ARCH_MIPS_H ++#define _NOLIBC_ARCH_MIPS_H ++ ++/* O_* macros for fcntl/open are architecture-specific */ ++#define O_RDONLY 0 ++#define O_WRONLY 1 ++#define O_RDWR 2 ++#define O_APPEND 0x0008 ++#define O_NONBLOCK 0x0080 ++#define O_CREAT 0x0100 ++#define O_TRUNC 0x0200 ++#define O_EXCL 0x0400 ++#define O_NOCTTY 0x0800 ++#define O_DIRECTORY 0x10000 ++ ++/* The struct returned by the stat() syscall. 88 bytes are returned by the ++ * syscall. ++ */ ++struct sys_stat_struct { ++ unsigned int st_dev; ++ long st_pad1[3]; ++ unsigned long st_ino; ++ unsigned int st_mode; ++ unsigned int st_nlink; ++ unsigned int st_uid; ++ unsigned int st_gid; ++ unsigned int st_rdev; ++ long st_pad2[2]; ++ long st_size; ++ long st_pad3; ++ ++ long st_atime; ++ long st_atime_nsec; ++ long st_mtime; ++ long st_mtime_nsec; ++ ++ long st_ctime; ++ long st_ctime_nsec; ++ long st_blksize; ++ long st_blocks; ++ long st_pad4[14]; ++}; ++ ++/* Syscalls for MIPS ABI O32 : ++ * - WARNING! there's always a delayed slot! ++ * - WARNING again, the syntax is different, registers take a '$' and numbers ++ * do not. ++ * - registers are 32-bit ++ * - stack is 8-byte aligned ++ * - syscall number is passed in v0 (starts at 0xfa0). ++ * - arguments are in a0, a1, a2, a3, then the stack. The caller needs to ++ * leave some room in the stack for the callee to save a0..a3 if needed. ++ * - Many registers are clobbered, in fact only a0..a2 and s0..s8 are ++ * preserved. See: https://www.linux-mips.org/wiki/Syscall as well as ++ * scall32-o32.S in the kernel sources. ++ * - the system call is performed by calling "syscall" ++ * - syscall return comes in v0, and register a3 needs to be checked to know ++ * if an error occurred, in which case errno is in v0. ++ * - the arguments are cast to long and assigned into the target registers ++ * which are then simply passed as registers to the asm code, so that we ++ * don't have to experience issues with register constraints. ++ */ ++ ++#define my_syscall0(num) \ ++({ \ ++ register long _num asm("v0") = (num); \ ++ register long _arg4 asm("a3"); \ ++ \ ++ asm volatile ( \ ++ "addiu $sp, $sp, -32\n" \ ++ "syscall\n" \ ++ "addiu $sp, $sp, 32\n" \ ++ : "=r"(_num), "=r"(_arg4) \ ++ : "r"(_num) \ ++ : "memory", "cc", "at", "v1", "hi", "lo", \ ++ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \ ++ ); \ ++ _arg4 ? -_num : _num; \ ++}) ++ ++#define my_syscall1(num, arg1) \ ++({ \ ++ register long _num asm("v0") = (num); \ ++ register long _arg1 asm("a0") = (long)(arg1); \ ++ register long _arg4 asm("a3"); \ ++ \ ++ asm volatile ( \ ++ "addiu $sp, $sp, -32\n" \ ++ "syscall\n" \ ++ "addiu $sp, $sp, 32\n" \ ++ : "=r"(_num), "=r"(_arg4) \ ++ : "0"(_num), \ ++ "r"(_arg1) \ ++ : "memory", "cc", "at", "v1", "hi", "lo", \ ++ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \ ++ ); \ ++ _arg4 ? -_num : _num; \ ++}) ++ ++#define my_syscall2(num, arg1, arg2) \ ++({ \ ++ register long _num asm("v0") = (num); \ ++ register long _arg1 asm("a0") = (long)(arg1); \ ++ register long _arg2 asm("a1") = (long)(arg2); \ ++ register long _arg4 asm("a3"); \ ++ \ ++ asm volatile ( \ ++ "addiu $sp, $sp, -32\n" \ ++ "syscall\n" \ ++ "addiu $sp, $sp, 32\n" \ ++ : "=r"(_num), "=r"(_arg4) \ ++ : "0"(_num), \ ++ "r"(_arg1), "r"(_arg2) \ ++ : "memory", "cc", "at", "v1", "hi", "lo", \ ++ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \ ++ ); \ ++ _arg4 ? -_num : _num; \ ++}) ++ ++#define my_syscall3(num, arg1, arg2, arg3) \ ++({ \ ++ register long _num asm("v0") = (num); \ ++ register long _arg1 asm("a0") = (long)(arg1); \ ++ register long _arg2 asm("a1") = (long)(arg2); \ ++ register long _arg3 asm("a2") = (long)(arg3); \ ++ register long _arg4 asm("a3"); \ ++ \ ++ asm volatile ( \ ++ "addiu $sp, $sp, -32\n" \ ++ "syscall\n" \ ++ "addiu $sp, $sp, 32\n" \ ++ : "=r"(_num), "=r"(_arg4) \ ++ : "0"(_num), \ ++ "r"(_arg1), "r"(_arg2), "r"(_arg3) \ ++ : "memory", "cc", "at", "v1", "hi", "lo", \ ++ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \ ++ ); \ ++ _arg4 ? -_num : _num; \ ++}) ++ ++#define my_syscall4(num, arg1, arg2, arg3, arg4) \ ++({ \ ++ register long _num asm("v0") = (num); \ ++ register long _arg1 asm("a0") = (long)(arg1); \ ++ register long _arg2 asm("a1") = (long)(arg2); \ ++ register long _arg3 asm("a2") = (long)(arg3); \ ++ register long _arg4 asm("a3") = (long)(arg4); \ ++ \ ++ asm volatile ( \ ++ "addiu $sp, $sp, -32\n" \ ++ "syscall\n" \ ++ "addiu $sp, $sp, 32\n" \ ++ : "=r" (_num), "=r"(_arg4) \ ++ : "0"(_num), \ ++ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \ ++ : "memory", "cc", "at", "v1", "hi", "lo", \ ++ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \ ++ ); \ ++ _arg4 ? -_num : _num; \ ++}) ++ ++#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ ++({ \ ++ register long _num asm("v0") = (num); \ ++ register long _arg1 asm("a0") = (long)(arg1); \ ++ register long _arg2 asm("a1") = (long)(arg2); \ ++ register long _arg3 asm("a2") = (long)(arg3); \ ++ register long _arg4 asm("a3") = (long)(arg4); \ ++ register long _arg5 = (long)(arg5); \ ++ \ ++ asm volatile ( \ ++ "addiu $sp, $sp, -32\n" \ ++ "sw %7, 16($sp)\n" \ ++ "syscall\n " \ ++ "addiu $sp, $sp, 32\n" \ ++ : "=r" (_num), "=r"(_arg4) \ ++ : "0"(_num), \ ++ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \ ++ : "memory", "cc", "at", "v1", "hi", "lo", \ ++ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \ ++ ); \ ++ _arg4 ? -_num : _num; \ ++}) ++ ++/* startup code, note that it's called __start on MIPS */ ++asm(".section .text\n" ++ ".weak __start\n" ++ ".set nomips16\n" ++ ".set push\n" ++ ".set noreorder\n" ++ ".option pic0\n" ++ ".ent __start\n" ++ "__start:\n" ++ "lw $a0,($sp)\n" // argc was in the stack ++ "addiu $a1, $sp, 4\n" // argv = sp + 4 ++ "sll $a2, $a0, 2\n" // a2 = argc * 4 ++ "add $a2, $a2, $a1\n" // envp = argv + 4*argc ... ++ "addiu $a2, $a2, 4\n" // ... + 4 ++ "li $t0, -8\n" ++ "and $sp, $sp, $t0\n" // sp must be 8-byte aligned ++ "addiu $sp,$sp,-16\n" // the callee expects to save a0..a3 there! ++ "jal main\n" // main() returns the status code, we'll exit with it. ++ "nop\n" // delayed slot ++ "move $a0, $v0\n" // retrieve 32-bit exit code from v0 ++ "li $v0, 4001\n" // NR_exit == 4001 ++ "syscall\n" ++ ".end __start\n" ++ ".set pop\n" ++ ""); ++ ++#endif // _NOLIBC_ARCH_MIPS_H +diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h +new file mode 100644 +index 0000000000000..8c0cb1abb29f7 +--- /dev/null ++++ b/tools/include/nolibc/arch-riscv.h +@@ -0,0 +1,204 @@ ++/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ ++/* ++ * RISCV (32 and 64) specific definitions for NOLIBC ++ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> ++ */ ++ ++#ifndef _NOLIBC_ARCH_RISCV_H ++#define _NOLIBC_ARCH_RISCV_H ++ ++/* O_* macros for fcntl/open are architecture-specific */ ++#define O_RDONLY 0 ++#define O_WRONLY 1 ++#define O_RDWR 2 ++#define O_CREAT 0x40 ++#define O_EXCL 0x80 ++#define O_NOCTTY 0x100 ++#define O_TRUNC 0x200 ++#define O_APPEND 0x400 ++#define O_NONBLOCK 0x800 ++#define O_DIRECTORY 0x10000 ++ ++struct sys_stat_struct { ++ unsigned long st_dev; /* Device. */ ++ unsigned long st_ino; /* File serial number. */ ++ unsigned int st_mode; /* File mode. */ ++ unsigned int st_nlink; /* Link count. */ ++ unsigned int st_uid; /* User ID of the file's owner. */ ++ unsigned int st_gid; /* Group ID of the file's group. */ ++ unsigned long st_rdev; /* Device number, if device. */ ++ unsigned long __pad1; ++ long st_size; /* Size of file, in bytes. */ ++ int st_blksize; /* Optimal block size for I/O. */ ++ int __pad2; ++ long st_blocks; /* Number 512-byte blocks allocated. */ ++ long st_atime; /* Time of last access. */ ++ unsigned long st_atime_nsec; ++ long st_mtime; /* Time of last modification. */ ++ unsigned long st_mtime_nsec; ++ long st_ctime; /* Time of last status change. */ ++ unsigned long st_ctime_nsec; ++ unsigned int __unused4; ++ unsigned int __unused5; ++}; ++ ++#if __riscv_xlen == 64 ++#define PTRLOG "3" ++#define SZREG "8" ++#elif __riscv_xlen == 32 ++#define PTRLOG "2" ++#define SZREG "4" ++#endif ++ ++/* Syscalls for RISCV : ++ * - stack is 16-byte aligned ++ * - syscall number is passed in a7 ++ * - arguments are in a0, a1, a2, a3, a4, a5 ++ * - the system call is performed by calling ecall ++ * - syscall return comes in a0 ++ * - the arguments are cast to long and assigned into the target ++ * registers which are then simply passed as registers to the asm code, ++ * so that we don't have to experience issues with register constraints. ++ * ++ * On riscv, select() is not implemented so we have to use pselect6(). ++ */ ++#define __ARCH_WANT_SYS_PSELECT6 ++ ++#define my_syscall0(num) \ ++({ \ ++ register long _num asm("a7") = (num); \ ++ register long _arg1 asm("a0"); \ ++ \ ++ asm volatile ( \ ++ "ecall\n\t" \ ++ : "=r"(_arg1) \ ++ : "r"(_num) \ ++ : "memory", "cc" \ ++ ); \ ++ _arg1; \ ++}) ++ ++#define my_syscall1(num, arg1) \ ++({ \ ++ register long _num asm("a7") = (num); \ ++ register long _arg1 asm("a0") = (long)(arg1); \ ++ \ ++ asm volatile ( \ ++ "ecall\n" \ ++ : "+r"(_arg1) \ ++ : "r"(_num) \ ++ : "memory", "cc" \ ++ ); \ ++ _arg1; \ ++}) ++ ++#define my_syscall2(num, arg1, arg2) \ ++({ \ ++ register long _num asm("a7") = (num); \ ++ register long _arg1 asm("a0") = (long)(arg1); \ ++ register long _arg2 asm("a1") = (long)(arg2); \ ++ \ ++ asm volatile ( \ ++ "ecall\n" \ ++ : "+r"(_arg1) \ ++ : "r"(_arg2), \ ++ "r"(_num) \ ++ : "memory", "cc" \ ++ ); \ ++ _arg1; \ ++}) ++ ++#define my_syscall3(num, arg1, arg2, arg3) \ ++({ \ ++ register long _num asm("a7") = (num); \ ++ register long _arg1 asm("a0") = (long)(arg1); \ ++ register long _arg2 asm("a1") = (long)(arg2); \ ++ register long _arg3 asm("a2") = (long)(arg3); \ ++ \ ++ asm volatile ( \ ++ "ecall\n\t" \ ++ : "+r"(_arg1) \ ++ : "r"(_arg2), "r"(_arg3), \ ++ "r"(_num) \ ++ : "memory", "cc" \ ++ ); \ ++ _arg1; \ ++}) ++ ++#define my_syscall4(num, arg1, arg2, arg3, arg4) \ ++({ \ ++ register long _num asm("a7") = (num); \ ++ register long _arg1 asm("a0") = (long)(arg1); \ ++ register long _arg2 asm("a1") = (long)(arg2); \ ++ register long _arg3 asm("a2") = (long)(arg3); \ ++ register long _arg4 asm("a3") = (long)(arg4); \ ++ \ ++ asm volatile ( \ ++ "ecall\n" \ ++ : "+r"(_arg1) \ ++ : "r"(_arg2), "r"(_arg3), "r"(_arg4), \ ++ "r"(_num) \ ++ : "memory", "cc" \ ++ ); \ ++ _arg1; \ ++}) ++ ++#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ ++({ \ ++ register long _num asm("a7") = (num); \ ++ register long _arg1 asm("a0") = (long)(arg1); \ ++ register long _arg2 asm("a1") = (long)(arg2); \ ++ register long _arg3 asm("a2") = (long)(arg3); \ ++ register long _arg4 asm("a3") = (long)(arg4); \ ++ register long _arg5 asm("a4") = (long)(arg5); \ ++ \ ++ asm volatile ( \ ++ "ecall\n" \ ++ : "+r"(_arg1) \ ++ : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ ++ "r"(_num) \ ++ : "memory", "cc" \ ++ ); \ ++ _arg1; \ ++}) ++ ++#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ ++({ \ ++ register long _num asm("a7") = (num); \ ++ register long _arg1 asm("a0") = (long)(arg1); \ ++ register long _arg2 asm("a1") = (long)(arg2); \ ++ register long _arg3 asm("a2") = (long)(arg3); \ ++ register long _arg4 asm("a3") = (long)(arg4); \ ++ register long _arg5 asm("a4") = (long)(arg5); \ ++ register long _arg6 asm("a5") = (long)(arg6); \ ++ \ ++ asm volatile ( \ ++ "ecall\n" \ ++ : "+r"(_arg1) \ ++ : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \ ++ "r"(_num) \ ++ : "memory", "cc" \ ++ ); \ ++ _arg1; \ ++}) ++ ++/* startup code */ ++asm(".section .text\n" ++ ".weak _start\n" ++ "_start:\n" ++ ".option push\n" ++ ".option norelax\n" ++ "lla gp, __global_pointer$\n" ++ ".option pop\n" ++ "ld a0, 0(sp)\n" // argc (a0) was in the stack ++ "add a1, sp, "SZREG"\n" // argv (a1) = sp ++ "slli a2, a0, "PTRLOG"\n" // envp (a2) = SZREG*argc ... ++ "add a2, a2, "SZREG"\n" // + SZREG (skip null) ++ "add a2,a2,a1\n" // + argv ++ "andi sp,a1,-16\n" // sp must be 16-byte aligned ++ "call main\n" // main() returns the status code, we'll exit with it. ++ "li a7, 93\n" // NR_exit == 93 ++ "ecall\n" ++ ""); ++ ++#endif // _NOLIBC_ARCH_RISCV_H +diff --git a/tools/include/nolibc/arch-x86_64.h b/tools/include/nolibc/arch-x86_64.h +new file mode 100644 +index 0000000000000..b1af63ce1cb0b +--- /dev/null ++++ b/tools/include/nolibc/arch-x86_64.h +@@ -0,0 +1,215 @@ ++/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ ++/* ++ * x86_64 specific definitions for NOLIBC ++ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> ++ */ ++ ++#ifndef _NOLIBC_ARCH_X86_64_H ++#define _NOLIBC_ARCH_X86_64_H ++ ++/* O_* macros for fcntl/open are architecture-specific */ ++#define O_RDONLY 0 ++#define O_WRONLY 1 ++#define O_RDWR 2 ++#define O_CREAT 0x40 ++#define O_EXCL 0x80 ++#define O_NOCTTY 0x100 ++#define O_TRUNC 0x200 ++#define O_APPEND 0x400 ++#define O_NONBLOCK 0x800 ++#define O_DIRECTORY 0x10000 ++ ++/* The struct returned by the stat() syscall, equivalent to stat64(). The ++ * syscall returns 116 bytes and stops in the middle of __unused. ++ */ ++struct sys_stat_struct { ++ unsigned long st_dev; ++ unsigned long st_ino; ++ unsigned long st_nlink; ++ unsigned int st_mode; ++ unsigned int st_uid; ++ ++ unsigned int st_gid; ++ unsigned int __pad0; ++ unsigned long st_rdev; ++ long st_size; ++ long st_blksize; ++ ++ long st_blocks; ++ unsigned long st_atime; ++ unsigned long st_atime_nsec; ++ unsigned long st_mtime; ++ ++ unsigned long st_mtime_nsec; ++ unsigned long st_ctime; ++ unsigned long st_ctime_nsec; ++ long __unused[3]; ++}; ++ ++/* Syscalls for x86_64 : ++ * - registers are 64-bit ++ * - syscall number is passed in rax ++ * - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively ++ * - the system call is performed by calling the syscall instruction ++ * - syscall return comes in rax ++ * - rcx and r11 are clobbered, others are preserved. ++ * - the arguments are cast to long and assigned into the target registers ++ * which are then simply passed as registers to the asm code, so that we ++ * don't have to experience issues with register constraints. ++ * - the syscall number is always specified last in order to allow to force ++ * some registers before (gcc refuses a %-register at the last position). ++ * - see also x86-64 ABI section A.2 AMD64 Linux Kernel Conventions, A.2.1 ++ * Calling Conventions. ++ * ++ * Link x86-64 ABI: https://gitlab.com/x86-psABIs/x86-64-ABI/-/wikis/x86-64-psABI ++ * ++ */ ++ ++#define my_syscall0(num) \ ++({ \ ++ long _ret; \ ++ register long _num asm("rax") = (num); \ ++ \ ++ asm volatile ( \ ++ "syscall\n" \ ++ : "=a"(_ret) \ ++ : "0"(_num) \ ++ : "rcx", "r11", "memory", "cc" \ ++ ); \ ++ _ret; \ ++}) ++ ++#define my_syscall1(num, arg1) \ ++({ \ ++ long _ret; \ ++ register long _num asm("rax") = (num); \ ++ register long _arg1 asm("rdi") = (long)(arg1); \ ++ \ ++ asm volatile ( \ ++ "syscall\n" \ ++ : "=a"(_ret) \ ++ : "r"(_arg1), \ ++ "0"(_num) \ ++ : "rcx", "r11", "memory", "cc" \ ++ ); \ ++ _ret; \ ++}) ++ ++#define my_syscall2(num, arg1, arg2) \ ++({ \ ++ long _ret; \ ++ register long _num asm("rax") = (num); \ ++ register long _arg1 asm("rdi") = (long)(arg1); \ ++ register long _arg2 asm("rsi") = (long)(arg2); \ ++ \ ++ asm volatile ( \ ++ "syscall\n" \ ++ : "=a"(_ret) \ ++ : "r"(_arg1), "r"(_arg2), \ ++ "0"(_num) \ ++ : "rcx", "r11", "memory", "cc" \ ++ ); \ ++ _ret; \ ++}) ++ ++#define my_syscall3(num, arg1, arg2, arg3) \ ++({ \ ++ long _ret; \ ++ register long _num asm("rax") = (num); \ ++ register long _arg1 asm("rdi") = (long)(arg1); \ ++ register long _arg2 asm("rsi") = (long)(arg2); \ ++ register long _arg3 asm("rdx") = (long)(arg3); \ ++ \ ++ asm volatile ( \ ++ "syscall\n" \ ++ : "=a"(_ret) \ ++ : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ ++ "0"(_num) \ ++ : "rcx", "r11", "memory", "cc" \ ++ ); \ ++ _ret; \ ++}) ++ ++#define my_syscall4(num, arg1, arg2, arg3, arg4) \ ++({ \ ++ long _ret; \ ++ register long _num asm("rax") = (num); \ ++ register long _arg1 asm("rdi") = (long)(arg1); \ ++ register long _arg2 asm("rsi") = (long)(arg2); \ ++ register long _arg3 asm("rdx") = (long)(arg3); \ ++ register long _arg4 asm("r10") = (long)(arg4); \ ++ \ ++ asm volatile ( \ ++ "syscall\n" \ ++ : "=a"(_ret) \ ++ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ ++ "0"(_num) \ ++ : "rcx", "r11", "memory", "cc" \ ++ ); \ ++ _ret; \ ++}) ++ ++#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ ++({ \ ++ long _ret; \ ++ register long _num asm("rax") = (num); \ ++ register long _arg1 asm("rdi") = (long)(arg1); \ ++ register long _arg2 asm("rsi") = (long)(arg2); \ ++ register long _arg3 asm("rdx") = (long)(arg3); \ ++ register long _arg4 asm("r10") = (long)(arg4); \ ++ register long _arg5 asm("r8") = (long)(arg5); \ ++ \ ++ asm volatile ( \ ++ "syscall\n" \ ++ : "=a"(_ret) \ ++ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ ++ "0"(_num) \ ++ : "rcx", "r11", "memory", "cc" \ ++ ); \ ++ _ret; \ ++}) ++ ++#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ ++({ \ ++ long _ret; \ ++ register long _num asm("rax") = (num); \ ++ register long _arg1 asm("rdi") = (long)(arg1); \ ++ register long _arg2 asm("rsi") = (long)(arg2); \ ++ register long _arg3 asm("rdx") = (long)(arg3); \ ++ register long _arg4 asm("r10") = (long)(arg4); \ ++ register long _arg5 asm("r8") = (long)(arg5); \ ++ register long _arg6 asm("r9") = (long)(arg6); \ ++ \ ++ asm volatile ( \ ++ "syscall\n" \ ++ : "=a"(_ret) \ ++ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ ++ "r"(_arg6), "0"(_num) \ ++ : "rcx", "r11", "memory", "cc" \ ++ ); \ ++ _ret; \ ++}) ++ ++/* startup code */ ++/* ++ * x86-64 System V ABI mandates: ++ * 1) %rsp must be 16-byte aligned right before the function call. ++ * 2) The deepest stack frame should be zero (the %rbp). ++ * ++ */ ++asm(".section .text\n" ++ ".weak _start\n" ++ "_start:\n" ++ "pop %rdi\n" // argc (first arg, %rdi) ++ "mov %rsp, %rsi\n" // argv[] (second arg, %rsi) ++ "lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx) ++ "xor %ebp, %ebp\n" // zero the stack frame ++ "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned before call ++ "call main\n" // main() returns the status code, we'll exit with it. ++ "mov %eax, %edi\n" // retrieve exit code (32 bit) ++ "mov $60, %eax\n" // NR_exit == 60 ++ "syscall\n" // really exit ++ "hlt\n" // ensure it does not return ++ ""); ++ ++#endif // _NOLIBC_ARCH_X86_64_H +diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h +new file mode 100644 +index 0000000000000..4c6992321b0d6 +--- /dev/null ++++ b/tools/include/nolibc/arch.h +@@ -0,0 +1,32 @@ ++/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ ++/* ++ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> ++ */ ++ ++/* Below comes the architecture-specific code. For each architecture, we have ++ * the syscall declarations and the _start code definition. This is the only ++ * global part. On all architectures the kernel puts everything in the stack ++ * before jumping to _start just above us, without any return address (_start ++ * is not a function but an entry pint). So at the stack pointer we find argc. ++ * Then argv[] begins, and ends at the first NULL. Then we have envp which ++ * starts and ends with a NULL as well. So envp=argv+argc+1. ++ */ ++ ++#ifndef _NOLIBC_ARCH_H ++#define _NOLIBC_ARCH_H ++ ++#if defined(__x86_64__) ++#include "arch-x86_64.h" ++#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) ++#include "arch-i386.h" ++#elif defined(__ARM_EABI__) ++#include "arch-arm.h" ++#elif defined(__aarch64__) ++#include "arch-aarch64.h" ++#elif defined(__mips__) && defined(_ABIO32) ++#include "arch-mips.h" ++#elif defined(__riscv) ++#include "arch-riscv.h" ++#endif ++ ++#endif /* _NOLIBC_ARCH_H */ +diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h +index 3430667b0d241..d272b721dc519 100644 +--- a/tools/include/nolibc/nolibc.h ++++ b/tools/include/nolibc/nolibc.h +@@ -81,13 +81,21 @@ + * + */ + ++/* standard type definitions */ ++#include "std.h" ++ ++/* system includes */ + #include <asm/unistd.h> ++#include <asm/signal.h> // for SIGCHLD + #include <asm/ioctls.h> + #include <asm/errno.h> + #include <linux/fs.h> + #include <linux/loop.h> + #include <linux/time.h> ++#include "arch.h" ++#include "types.h" + ++/* Used by programs to avoid std includes */ + #define NOLIBC + + /* this way it will be removed if unused */ +@@ -104,1318 +112,6 @@ static int errno; + */ + #define MAX_ERRNO 4095 -@@ -774,7 +788,6 @@ asm(".section .text\n" - "and %r3, %r1, $-8\n" // AAPCS : sp must be 8-byte aligned in the - "mov %sp, %r3\n" // callee, an bl doesn't push (lr=pc) - "bl main\n" // main() returns the status code, we'll exit with it. +-/* Declare a few quite common macros and types that usually are in stdlib.h, +- * stdint.h, ctype.h, unistd.h and a few other common locations. +- */ +- +-#define NULL ((void *)0) +- +-/* stdint types */ +-typedef unsigned char uint8_t; +-typedef signed char int8_t; +-typedef unsigned short uint16_t; +-typedef signed short int16_t; +-typedef unsigned int uint32_t; +-typedef signed int int32_t; +-typedef unsigned long long uint64_t; +-typedef signed long long int64_t; +-typedef unsigned long size_t; +-typedef signed long ssize_t; +-typedef unsigned long uintptr_t; +-typedef signed long intptr_t; +-typedef signed long ptrdiff_t; +- +-/* for stat() */ +-typedef unsigned int dev_t; +-typedef unsigned long ino_t; +-typedef unsigned int mode_t; +-typedef signed int pid_t; +-typedef unsigned int uid_t; +-typedef unsigned int gid_t; +-typedef unsigned long nlink_t; +-typedef signed long off_t; +-typedef signed long blksize_t; +-typedef signed long blkcnt_t; +-typedef signed long time_t; +- +-/* for poll() */ +-struct pollfd { +- int fd; +- short int events; +- short int revents; +-}; +- +-/* for getdents64() */ +-struct linux_dirent64 { +- uint64_t d_ino; +- int64_t d_off; +- unsigned short d_reclen; +- unsigned char d_type; +- char d_name[]; +-}; +- +-/* commonly an fd_set represents 256 FDs */ +-#define FD_SETSIZE 256 +-typedef struct { uint32_t fd32[FD_SETSIZE/32]; } fd_set; +- +-/* needed by wait4() */ +-struct rusage { +- struct timeval ru_utime; +- struct timeval ru_stime; +- long ru_maxrss; +- long ru_ixrss; +- long ru_idrss; +- long ru_isrss; +- long ru_minflt; +- long ru_majflt; +- long ru_nswap; +- long ru_inblock; +- long ru_oublock; +- long ru_msgsnd; +- long ru_msgrcv; +- long ru_nsignals; +- long ru_nvcsw; +- long ru_nivcsw; +-}; +- +-/* stat flags (WARNING, octal here) */ +-#define S_IFDIR 0040000 +-#define S_IFCHR 0020000 +-#define S_IFBLK 0060000 +-#define S_IFREG 0100000 +-#define S_IFIFO 0010000 +-#define S_IFLNK 0120000 +-#define S_IFSOCK 0140000 +-#define S_IFMT 0170000 +- +-#define S_ISDIR(mode) (((mode) & S_IFDIR) == S_IFDIR) +-#define S_ISCHR(mode) (((mode) & S_IFCHR) == S_IFCHR) +-#define S_ISBLK(mode) (((mode) & S_IFBLK) == S_IFBLK) +-#define S_ISREG(mode) (((mode) & S_IFREG) == S_IFREG) +-#define S_ISFIFO(mode) (((mode) & S_IFIFO) == S_IFIFO) +-#define S_ISLNK(mode) (((mode) & S_IFLNK) == S_IFLNK) +-#define S_ISSOCK(mode) (((mode) & S_IFSOCK) == S_IFSOCK) +- +-#define DT_UNKNOWN 0 +-#define DT_FIFO 1 +-#define DT_CHR 2 +-#define DT_DIR 4 +-#define DT_BLK 6 +-#define DT_REG 8 +-#define DT_LNK 10 +-#define DT_SOCK 12 +- +-/* all the *at functions */ +-#ifndef AT_FDCWD +-#define AT_FDCWD -100 +-#endif +- +-/* lseek */ +-#define SEEK_SET 0 +-#define SEEK_CUR 1 +-#define SEEK_END 2 +- +-/* reboot */ +-#define LINUX_REBOOT_MAGIC1 0xfee1dead +-#define LINUX_REBOOT_MAGIC2 0x28121969 +-#define LINUX_REBOOT_CMD_HALT 0xcdef0123 +-#define LINUX_REBOOT_CMD_POWER_OFF 0x4321fedc +-#define LINUX_REBOOT_CMD_RESTART 0x01234567 +-#define LINUX_REBOOT_CMD_SW_SUSPEND 0xd000fce2 +- +- +-/* The format of the struct as returned by the libc to the application, which +- * significantly differs from the format returned by the stat() syscall flavours. +- */ +-struct stat { +- dev_t st_dev; /* ID of device containing file */ +- ino_t st_ino; /* inode number */ +- mode_t st_mode; /* protection */ +- nlink_t st_nlink; /* number of hard links */ +- uid_t st_uid; /* user ID of owner */ +- gid_t st_gid; /* group ID of owner */ +- dev_t st_rdev; /* device ID (if special file) */ +- off_t st_size; /* total size, in bytes */ +- blksize_t st_blksize; /* blocksize for file system I/O */ +- blkcnt_t st_blocks; /* number of 512B blocks allocated */ +- time_t st_atime; /* time of last access */ +- time_t st_mtime; /* time of last modification */ +- time_t st_ctime; /* time of last status change */ +-}; +- +-#define WEXITSTATUS(status) (((status) & 0xff00) >> 8) +-#define WIFEXITED(status) (((status) & 0x7f) == 0) +- +-/* for SIGCHLD */ +-#include <asm/signal.h> +- +-/* Below comes the architecture-specific code. For each architecture, we have +- * the syscall declarations and the _start code definition. This is the only +- * global part. On all architectures the kernel puts everything in the stack +- * before jumping to _start just above us, without any return address (_start +- * is not a function but an entry pint). So at the stack pointer we find argc. +- * Then argv[] begins, and ends at the first NULL. Then we have envp which +- * starts and ends with a NULL as well. So envp=argv+argc+1. +- */ +- +-#if defined(__x86_64__) +-/* Syscalls for x86_64 : +- * - registers are 64-bit +- * - syscall number is passed in rax +- * - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively +- * - the system call is performed by calling the syscall instruction +- * - syscall return comes in rax +- * - rcx and r8..r11 may be clobbered, others are preserved. +- * - the arguments are cast to long and assigned into the target registers +- * which are then simply passed as registers to the asm code, so that we +- * don't have to experience issues with register constraints. +- * - the syscall number is always specified last in order to allow to force +- * some registers before (gcc refuses a %-register at the last position). +- */ +- +-#define my_syscall0(num) \ +-({ \ +- long _ret; \ +- register long _num asm("rax") = (num); \ +- \ +- asm volatile ( \ +- "syscall\n" \ +- : "=a" (_ret) \ +- : "0"(_num) \ +- : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ +- ); \ +- _ret; \ +-}) +- +-#define my_syscall1(num, arg1) \ +-({ \ +- long _ret; \ +- register long _num asm("rax") = (num); \ +- register long _arg1 asm("rdi") = (long)(arg1); \ +- \ +- asm volatile ( \ +- "syscall\n" \ +- : "=a" (_ret) \ +- : "r"(_arg1), \ +- "0"(_num) \ +- : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ +- ); \ +- _ret; \ +-}) +- +-#define my_syscall2(num, arg1, arg2) \ +-({ \ +- long _ret; \ +- register long _num asm("rax") = (num); \ +- register long _arg1 asm("rdi") = (long)(arg1); \ +- register long _arg2 asm("rsi") = (long)(arg2); \ +- \ +- asm volatile ( \ +- "syscall\n" \ +- : "=a" (_ret) \ +- : "r"(_arg1), "r"(_arg2), \ +- "0"(_num) \ +- : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ +- ); \ +- _ret; \ +-}) +- +-#define my_syscall3(num, arg1, arg2, arg3) \ +-({ \ +- long _ret; \ +- register long _num asm("rax") = (num); \ +- register long _arg1 asm("rdi") = (long)(arg1); \ +- register long _arg2 asm("rsi") = (long)(arg2); \ +- register long _arg3 asm("rdx") = (long)(arg3); \ +- \ +- asm volatile ( \ +- "syscall\n" \ +- : "=a" (_ret) \ +- : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ +- "0"(_num) \ +- : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ +- ); \ +- _ret; \ +-}) +- +-#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +-({ \ +- long _ret; \ +- register long _num asm("rax") = (num); \ +- register long _arg1 asm("rdi") = (long)(arg1); \ +- register long _arg2 asm("rsi") = (long)(arg2); \ +- register long _arg3 asm("rdx") = (long)(arg3); \ +- register long _arg4 asm("r10") = (long)(arg4); \ +- \ +- asm volatile ( \ +- "syscall\n" \ +- : "=a" (_ret), "=r"(_arg4) \ +- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ +- "0"(_num) \ +- : "rcx", "r8", "r9", "r11", "memory", "cc" \ +- ); \ +- _ret; \ +-}) +- +-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +-({ \ +- long _ret; \ +- register long _num asm("rax") = (num); \ +- register long _arg1 asm("rdi") = (long)(arg1); \ +- register long _arg2 asm("rsi") = (long)(arg2); \ +- register long _arg3 asm("rdx") = (long)(arg3); \ +- register long _arg4 asm("r10") = (long)(arg4); \ +- register long _arg5 asm("r8") = (long)(arg5); \ +- \ +- asm volatile ( \ +- "syscall\n" \ +- : "=a" (_ret), "=r"(_arg4), "=r"(_arg5) \ +- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ +- "0"(_num) \ +- : "rcx", "r9", "r11", "memory", "cc" \ +- ); \ +- _ret; \ +-}) +- +-#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +-({ \ +- long _ret; \ +- register long _num asm("rax") = (num); \ +- register long _arg1 asm("rdi") = (long)(arg1); \ +- register long _arg2 asm("rsi") = (long)(arg2); \ +- register long _arg3 asm("rdx") = (long)(arg3); \ +- register long _arg4 asm("r10") = (long)(arg4); \ +- register long _arg5 asm("r8") = (long)(arg5); \ +- register long _arg6 asm("r9") = (long)(arg6); \ +- \ +- asm volatile ( \ +- "syscall\n" \ +- : "=a" (_ret), "=r"(_arg4), "=r"(_arg5) \ +- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ +- "r"(_arg6), "0"(_num) \ +- : "rcx", "r11", "memory", "cc" \ +- ); \ +- _ret; \ +-}) +- +-/* startup code */ +-asm(".section .text\n" +- ".global _start\n" +- "_start:\n" +- "pop %rdi\n" // argc (first arg, %rdi) +- "mov %rsp, %rsi\n" // argv[] (second arg, %rsi) +- "lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx) +- "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned when +- "sub $8, %rsp\n" // entering the callee +- "call main\n" // main() returns the status code, we'll exit with it. +- "movzb %al, %rdi\n" // retrieve exit code from 8 lower bits +- "mov $60, %rax\n" // NR_exit == 60 +- "syscall\n" // really exit +- "hlt\n" // ensure it does not return +- ""); +- +-/* fcntl / open */ +-#define O_RDONLY 0 +-#define O_WRONLY 1 +-#define O_RDWR 2 +-#define O_CREAT 0x40 +-#define O_EXCL 0x80 +-#define O_NOCTTY 0x100 +-#define O_TRUNC 0x200 +-#define O_APPEND 0x400 +-#define O_NONBLOCK 0x800 +-#define O_DIRECTORY 0x10000 +- +-/* The struct returned by the stat() syscall, equivalent to stat64(). The +- * syscall returns 116 bytes and stops in the middle of __unused. +- */ +-struct sys_stat_struct { +- unsigned long st_dev; +- unsigned long st_ino; +- unsigned long st_nlink; +- unsigned int st_mode; +- unsigned int st_uid; +- +- unsigned int st_gid; +- unsigned int __pad0; +- unsigned long st_rdev; +- long st_size; +- long st_blksize; +- +- long st_blocks; +- unsigned long st_atime; +- unsigned long st_atime_nsec; +- unsigned long st_mtime; +- +- unsigned long st_mtime_nsec; +- unsigned long st_ctime; +- unsigned long st_ctime_nsec; +- long __unused[3]; +-}; +- +-#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) +-/* Syscalls for i386 : +- * - mostly similar to x86_64 +- * - registers are 32-bit +- * - syscall number is passed in eax +- * - arguments are in ebx, ecx, edx, esi, edi, ebp respectively +- * - all registers are preserved (except eax of course) +- * - the system call is performed by calling int $0x80 +- * - syscall return comes in eax +- * - the arguments are cast to long and assigned into the target registers +- * which are then simply passed as registers to the asm code, so that we +- * don't have to experience issues with register constraints. +- * - the syscall number is always specified last in order to allow to force +- * some registers before (gcc refuses a %-register at the last position). +- * +- * Also, i386 supports the old_select syscall if newselect is not available +- */ +-#define __ARCH_WANT_SYS_OLD_SELECT +- +-#define my_syscall0(num) \ +-({ \ +- long _ret; \ +- register long _num asm("eax") = (num); \ +- \ +- asm volatile ( \ +- "int $0x80\n" \ +- : "=a" (_ret) \ +- : "0"(_num) \ +- : "memory", "cc" \ +- ); \ +- _ret; \ +-}) +- +-#define my_syscall1(num, arg1) \ +-({ \ +- long _ret; \ +- register long _num asm("eax") = (num); \ +- register long _arg1 asm("ebx") = (long)(arg1); \ +- \ +- asm volatile ( \ +- "int $0x80\n" \ +- : "=a" (_ret) \ +- : "r"(_arg1), \ +- "0"(_num) \ +- : "memory", "cc" \ +- ); \ +- _ret; \ +-}) +- +-#define my_syscall2(num, arg1, arg2) \ +-({ \ +- long _ret; \ +- register long _num asm("eax") = (num); \ +- register long _arg1 asm("ebx") = (long)(arg1); \ +- register long _arg2 asm("ecx") = (long)(arg2); \ +- \ +- asm volatile ( \ +- "int $0x80\n" \ +- : "=a" (_ret) \ +- : "r"(_arg1), "r"(_arg2), \ +- "0"(_num) \ +- : "memory", "cc" \ +- ); \ +- _ret; \ +-}) +- +-#define my_syscall3(num, arg1, arg2, arg3) \ +-({ \ +- long _ret; \ +- register long _num asm("eax") = (num); \ +- register long _arg1 asm("ebx") = (long)(arg1); \ +- register long _arg2 asm("ecx") = (long)(arg2); \ +- register long _arg3 asm("edx") = (long)(arg3); \ +- \ +- asm volatile ( \ +- "int $0x80\n" \ +- : "=a" (_ret) \ +- : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ +- "0"(_num) \ +- : "memory", "cc" \ +- ); \ +- _ret; \ +-}) +- +-#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +-({ \ +- long _ret; \ +- register long _num asm("eax") = (num); \ +- register long _arg1 asm("ebx") = (long)(arg1); \ +- register long _arg2 asm("ecx") = (long)(arg2); \ +- register long _arg3 asm("edx") = (long)(arg3); \ +- register long _arg4 asm("esi") = (long)(arg4); \ +- \ +- asm volatile ( \ +- "int $0x80\n" \ +- : "=a" (_ret) \ +- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ +- "0"(_num) \ +- : "memory", "cc" \ +- ); \ +- _ret; \ +-}) +- +-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +-({ \ +- long _ret; \ +- register long _num asm("eax") = (num); \ +- register long _arg1 asm("ebx") = (long)(arg1); \ +- register long _arg2 asm("ecx") = (long)(arg2); \ +- register long _arg3 asm("edx") = (long)(arg3); \ +- register long _arg4 asm("esi") = (long)(arg4); \ +- register long _arg5 asm("edi") = (long)(arg5); \ +- \ +- asm volatile ( \ +- "int $0x80\n" \ +- : "=a" (_ret) \ +- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ +- "0"(_num) \ +- : "memory", "cc" \ +- ); \ +- _ret; \ +-}) +- +-/* startup code */ +-asm(".section .text\n" +- ".global _start\n" +- "_start:\n" +- "pop %eax\n" // argc (first arg, %eax) +- "mov %esp, %ebx\n" // argv[] (second arg, %ebx) +- "lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx) +- "and $-16, %esp\n" // x86 ABI : esp must be 16-byte aligned when +- "push %ecx\n" // push all registers on the stack so that we +- "push %ebx\n" // support both regparm and plain stack modes +- "push %eax\n" +- "call main\n" // main() returns the status code in %eax +- "movzbl %al, %ebx\n" // retrieve exit code from lower 8 bits +- "movl $1, %eax\n" // NR_exit == 1 +- "int $0x80\n" // exit now +- "hlt\n" // ensure it does not +- ""); +- +-/* fcntl / open */ +-#define O_RDONLY 0 +-#define O_WRONLY 1 +-#define O_RDWR 2 +-#define O_CREAT 0x40 +-#define O_EXCL 0x80 +-#define O_NOCTTY 0x100 +-#define O_TRUNC 0x200 +-#define O_APPEND 0x400 +-#define O_NONBLOCK 0x800 +-#define O_DIRECTORY 0x10000 +- +-/* The struct returned by the stat() syscall, 32-bit only, the syscall returns +- * exactly 56 bytes (stops before the unused array). +- */ +-struct sys_stat_struct { +- unsigned long st_dev; +- unsigned long st_ino; +- unsigned short st_mode; +- unsigned short st_nlink; +- unsigned short st_uid; +- unsigned short st_gid; +- +- unsigned long st_rdev; +- unsigned long st_size; +- unsigned long st_blksize; +- unsigned long st_blocks; +- +- unsigned long st_atime; +- unsigned long st_atime_nsec; +- unsigned long st_mtime; +- unsigned long st_mtime_nsec; +- +- unsigned long st_ctime; +- unsigned long st_ctime_nsec; +- unsigned long __unused[2]; +-}; +- +-#elif defined(__ARM_EABI__) +-/* Syscalls for ARM in ARM or Thumb modes : +- * - registers are 32-bit +- * - stack is 8-byte aligned +- * ( http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka4127.html) +- * - syscall number is passed in r7 +- * - arguments are in r0, r1, r2, r3, r4, r5 +- * - the system call is performed by calling svc #0 +- * - syscall return comes in r0. +- * - only lr is clobbered. +- * - the arguments are cast to long and assigned into the target registers +- * which are then simply passed as registers to the asm code, so that we +- * don't have to experience issues with register constraints. +- * - the syscall number is always specified last in order to allow to force +- * some registers before (gcc refuses a %-register at the last position). +- * +- * Also, ARM supports the old_select syscall if newselect is not available +- */ +-#define __ARCH_WANT_SYS_OLD_SELECT +- +-#define my_syscall0(num) \ +-({ \ +- register long _num asm("r7") = (num); \ +- register long _arg1 asm("r0"); \ +- \ +- asm volatile ( \ +- "svc #0\n" \ +- : "=r"(_arg1) \ +- : "r"(_num) \ +- : "memory", "cc", "lr" \ +- ); \ +- _arg1; \ +-}) +- +-#define my_syscall1(num, arg1) \ +-({ \ +- register long _num asm("r7") = (num); \ +- register long _arg1 asm("r0") = (long)(arg1); \ +- \ +- asm volatile ( \ +- "svc #0\n" \ +- : "=r"(_arg1) \ +- : "r"(_arg1), \ +- "r"(_num) \ +- : "memory", "cc", "lr" \ +- ); \ +- _arg1; \ +-}) +- +-#define my_syscall2(num, arg1, arg2) \ +-({ \ +- register long _num asm("r7") = (num); \ +- register long _arg1 asm("r0") = (long)(arg1); \ +- register long _arg2 asm("r1") = (long)(arg2); \ +- \ +- asm volatile ( \ +- "svc #0\n" \ +- : "=r"(_arg1) \ +- : "r"(_arg1), "r"(_arg2), \ +- "r"(_num) \ +- : "memory", "cc", "lr" \ +- ); \ +- _arg1; \ +-}) +- +-#define my_syscall3(num, arg1, arg2, arg3) \ +-({ \ +- register long _num asm("r7") = (num); \ +- register long _arg1 asm("r0") = (long)(arg1); \ +- register long _arg2 asm("r1") = (long)(arg2); \ +- register long _arg3 asm("r2") = (long)(arg3); \ +- \ +- asm volatile ( \ +- "svc #0\n" \ +- : "=r"(_arg1) \ +- : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ +- "r"(_num) \ +- : "memory", "cc", "lr" \ +- ); \ +- _arg1; \ +-}) +- +-#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +-({ \ +- register long _num asm("r7") = (num); \ +- register long _arg1 asm("r0") = (long)(arg1); \ +- register long _arg2 asm("r1") = (long)(arg2); \ +- register long _arg3 asm("r2") = (long)(arg3); \ +- register long _arg4 asm("r3") = (long)(arg4); \ +- \ +- asm volatile ( \ +- "svc #0\n" \ +- : "=r"(_arg1) \ +- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ +- "r"(_num) \ +- : "memory", "cc", "lr" \ +- ); \ +- _arg1; \ +-}) +- +-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +-({ \ +- register long _num asm("r7") = (num); \ +- register long _arg1 asm("r0") = (long)(arg1); \ +- register long _arg2 asm("r1") = (long)(arg2); \ +- register long _arg3 asm("r2") = (long)(arg3); \ +- register long _arg4 asm("r3") = (long)(arg4); \ +- register long _arg5 asm("r4") = (long)(arg5); \ +- \ +- asm volatile ( \ +- "svc #0\n" \ +- : "=r" (_arg1) \ +- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ +- "r"(_num) \ +- : "memory", "cc", "lr" \ +- ); \ +- _arg1; \ +-}) +- +-/* startup code */ +-asm(".section .text\n" +- ".global _start\n" +- "_start:\n" +-#if defined(__THUMBEB__) || defined(__THUMBEL__) +- /* We enter here in 32-bit mode but if some previous functions were in +- * 16-bit mode, the assembler cannot know, so we need to tell it we're in +- * 32-bit now, then switch to 16-bit (is there a better way to do it than +- * adding 1 by hand ?) and tell the asm we're now in 16-bit mode so that +- * it generates correct instructions. Note that we do not support thumb1. +- */ +- ".code 32\n" +- "add r0, pc, #1\n" +- "bx r0\n" +- ".code 16\n" +-#endif +- "pop {%r0}\n" // argc was in the stack +- "mov %r1, %sp\n" // argv = sp +- "add %r2, %r1, %r0, lsl #2\n" // envp = argv + 4*argc ... +- "add %r2, %r2, $4\n" // ... + 4 +- "and %r3, %r1, $-8\n" // AAPCS : sp must be 8-byte aligned in the +- "mov %sp, %r3\n" // callee, an bl doesn't push (lr=pc) +- "bl main\n" // main() returns the status code, we'll exit with it. - "and %r0, %r0, $0xff\n" // limit exit code to 8 bits - "movs r7, $1\n" // NR_exit == 1 - "svc $0x00\n" - ""); -@@ -971,7 +984,6 @@ asm(".section .text\n" - "add x2, x2, x1\n" // + argv - "and sp, x1, -16\n" // sp must be 16-byte aligned in the callee - "bl main\n" // main() returns the status code, we'll exit with it. +- "movs r7, $1\n" // NR_exit == 1 +- "svc $0x00\n" +- ""); +- +-/* fcntl / open */ +-#define O_RDONLY 0 +-#define O_WRONLY 1 +-#define O_RDWR 2 +-#define O_CREAT 0x40 +-#define O_EXCL 0x80 +-#define O_NOCTTY 0x100 +-#define O_TRUNC 0x200 +-#define O_APPEND 0x400 +-#define O_NONBLOCK 0x800 +-#define O_DIRECTORY 0x4000 +- +-/* The struct returned by the stat() syscall, 32-bit only, the syscall returns +- * exactly 56 bytes (stops before the unused array). In big endian, the format +- * differs as devices are returned as short only. +- */ +-struct sys_stat_struct { +-#if defined(__ARMEB__) +- unsigned short st_dev; +- unsigned short __pad1; +-#else +- unsigned long st_dev; +-#endif +- unsigned long st_ino; +- unsigned short st_mode; +- unsigned short st_nlink; +- unsigned short st_uid; +- unsigned short st_gid; +-#if defined(__ARMEB__) +- unsigned short st_rdev; +- unsigned short __pad2; +-#else +- unsigned long st_rdev; +-#endif +- unsigned long st_size; +- unsigned long st_blksize; +- unsigned long st_blocks; +- unsigned long st_atime; +- unsigned long st_atime_nsec; +- unsigned long st_mtime; +- unsigned long st_mtime_nsec; +- unsigned long st_ctime; +- unsigned long st_ctime_nsec; +- unsigned long __unused[2]; +-}; +- +-#elif defined(__aarch64__) +-/* Syscalls for AARCH64 : +- * - registers are 64-bit +- * - stack is 16-byte aligned +- * - syscall number is passed in x8 +- * - arguments are in x0, x1, x2, x3, x4, x5 +- * - the system call is performed by calling svc 0 +- * - syscall return comes in x0. +- * - the arguments are cast to long and assigned into the target registers +- * which are then simply passed as registers to the asm code, so that we +- * don't have to experience issues with register constraints. +- * +- * On aarch64, select() is not implemented so we have to use pselect6(). +- */ +-#define __ARCH_WANT_SYS_PSELECT6 +- +-#define my_syscall0(num) \ +-({ \ +- register long _num asm("x8") = (num); \ +- register long _arg1 asm("x0"); \ +- \ +- asm volatile ( \ +- "svc #0\n" \ +- : "=r"(_arg1) \ +- : "r"(_num) \ +- : "memory", "cc" \ +- ); \ +- _arg1; \ +-}) +- +-#define my_syscall1(num, arg1) \ +-({ \ +- register long _num asm("x8") = (num); \ +- register long _arg1 asm("x0") = (long)(arg1); \ +- \ +- asm volatile ( \ +- "svc #0\n" \ +- : "=r"(_arg1) \ +- : "r"(_arg1), \ +- "r"(_num) \ +- : "memory", "cc" \ +- ); \ +- _arg1; \ +-}) +- +-#define my_syscall2(num, arg1, arg2) \ +-({ \ +- register long _num asm("x8") = (num); \ +- register long _arg1 asm("x0") = (long)(arg1); \ +- register long _arg2 asm("x1") = (long)(arg2); \ +- \ +- asm volatile ( \ +- "svc #0\n" \ +- : "=r"(_arg1) \ +- : "r"(_arg1), "r"(_arg2), \ +- "r"(_num) \ +- : "memory", "cc" \ +- ); \ +- _arg1; \ +-}) +- +-#define my_syscall3(num, arg1, arg2, arg3) \ +-({ \ +- register long _num asm("x8") = (num); \ +- register long _arg1 asm("x0") = (long)(arg1); \ +- register long _arg2 asm("x1") = (long)(arg2); \ +- register long _arg3 asm("x2") = (long)(arg3); \ +- \ +- asm volatile ( \ +- "svc #0\n" \ +- : "=r"(_arg1) \ +- : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ +- "r"(_num) \ +- : "memory", "cc" \ +- ); \ +- _arg1; \ +-}) +- +-#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +-({ \ +- register long _num asm("x8") = (num); \ +- register long _arg1 asm("x0") = (long)(arg1); \ +- register long _arg2 asm("x1") = (long)(arg2); \ +- register long _arg3 asm("x2") = (long)(arg3); \ +- register long _arg4 asm("x3") = (long)(arg4); \ +- \ +- asm volatile ( \ +- "svc #0\n" \ +- : "=r"(_arg1) \ +- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ +- "r"(_num) \ +- : "memory", "cc" \ +- ); \ +- _arg1; \ +-}) +- +-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +-({ \ +- register long _num asm("x8") = (num); \ +- register long _arg1 asm("x0") = (long)(arg1); \ +- register long _arg2 asm("x1") = (long)(arg2); \ +- register long _arg3 asm("x2") = (long)(arg3); \ +- register long _arg4 asm("x3") = (long)(arg4); \ +- register long _arg5 asm("x4") = (long)(arg5); \ +- \ +- asm volatile ( \ +- "svc #0\n" \ +- : "=r" (_arg1) \ +- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ +- "r"(_num) \ +- : "memory", "cc" \ +- ); \ +- _arg1; \ +-}) +- +-#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +-({ \ +- register long _num asm("x8") = (num); \ +- register long _arg1 asm("x0") = (long)(arg1); \ +- register long _arg2 asm("x1") = (long)(arg2); \ +- register long _arg3 asm("x2") = (long)(arg3); \ +- register long _arg4 asm("x3") = (long)(arg4); \ +- register long _arg5 asm("x4") = (long)(arg5); \ +- register long _arg6 asm("x5") = (long)(arg6); \ +- \ +- asm volatile ( \ +- "svc #0\n" \ +- : "=r" (_arg1) \ +- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ +- "r"(_arg6), "r"(_num) \ +- : "memory", "cc" \ +- ); \ +- _arg1; \ +-}) +- +-/* startup code */ +-asm(".section .text\n" +- ".global _start\n" +- "_start:\n" +- "ldr x0, [sp]\n" // argc (x0) was in the stack +- "add x1, sp, 8\n" // argv (x1) = sp +- "lsl x2, x0, 3\n" // envp (x2) = 8*argc ... +- "add x2, x2, 8\n" // + 8 (skip null) +- "add x2, x2, x1\n" // + argv +- "and sp, x1, -16\n" // sp must be 16-byte aligned in the callee +- "bl main\n" // main() returns the status code, we'll exit with it. - "and x0, x0, 0xff\n" // limit exit code to 8 bits - "mov x8, 93\n" // NR_exit == 93 - "svc #0\n" - ""); -@@ -1176,7 +1188,7 @@ asm(".section .text\n" - "addiu $sp,$sp,-16\n" // the callee expects to save a0..a3 there! - "jal main\n" // main() returns the status code, we'll exit with it. - "nop\n" // delayed slot +- "mov x8, 93\n" // NR_exit == 93 +- "svc #0\n" +- ""); +- +-/* fcntl / open */ +-#define O_RDONLY 0 +-#define O_WRONLY 1 +-#define O_RDWR 2 +-#define O_CREAT 0x40 +-#define O_EXCL 0x80 +-#define O_NOCTTY 0x100 +-#define O_TRUNC 0x200 +-#define O_APPEND 0x400 +-#define O_NONBLOCK 0x800 +-#define O_DIRECTORY 0x4000 +- +-/* The struct returned by the newfstatat() syscall. Differs slightly from the +- * x86_64's stat one by field ordering, so be careful. +- */ +-struct sys_stat_struct { +- unsigned long st_dev; +- unsigned long st_ino; +- unsigned int st_mode; +- unsigned int st_nlink; +- unsigned int st_uid; +- unsigned int st_gid; +- +- unsigned long st_rdev; +- unsigned long __pad1; +- long st_size; +- int st_blksize; +- int __pad2; +- +- long st_blocks; +- long st_atime; +- unsigned long st_atime_nsec; +- long st_mtime; +- +- unsigned long st_mtime_nsec; +- long st_ctime; +- unsigned long st_ctime_nsec; +- unsigned int __unused[2]; +-}; +- +-#elif defined(__mips__) && defined(_ABIO32) +-/* Syscalls for MIPS ABI O32 : +- * - WARNING! there's always a delayed slot! +- * - WARNING again, the syntax is different, registers take a '$' and numbers +- * do not. +- * - registers are 32-bit +- * - stack is 8-byte aligned +- * - syscall number is passed in v0 (starts at 0xfa0). +- * - arguments are in a0, a1, a2, a3, then the stack. The caller needs to +- * leave some room in the stack for the callee to save a0..a3 if needed. +- * - Many registers are clobbered, in fact only a0..a2 and s0..s8 are +- * preserved. See: https://www.linux-mips.org/wiki/Syscall as well as +- * scall32-o32.S in the kernel sources. +- * - the system call is performed by calling "syscall" +- * - syscall return comes in v0, and register a3 needs to be checked to know +- * if an error occurred, in which case errno is in v0. +- * - the arguments are cast to long and assigned into the target registers +- * which are then simply passed as registers to the asm code, so that we +- * don't have to experience issues with register constraints. +- */ +- +-#define my_syscall0(num) \ +-({ \ +- register long _num asm("v0") = (num); \ +- register long _arg4 asm("a3"); \ +- \ +- asm volatile ( \ +- "addiu $sp, $sp, -32\n" \ +- "syscall\n" \ +- "addiu $sp, $sp, 32\n" \ +- : "=r"(_num), "=r"(_arg4) \ +- : "r"(_num) \ +- : "memory", "cc", "at", "v1", "hi", "lo", \ +- "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \ +- ); \ +- _arg4 ? -_num : _num; \ +-}) +- +-#define my_syscall1(num, arg1) \ +-({ \ +- register long _num asm("v0") = (num); \ +- register long _arg1 asm("a0") = (long)(arg1); \ +- register long _arg4 asm("a3"); \ +- \ +- asm volatile ( \ +- "addiu $sp, $sp, -32\n" \ +- "syscall\n" \ +- "addiu $sp, $sp, 32\n" \ +- : "=r"(_num), "=r"(_arg4) \ +- : "0"(_num), \ +- "r"(_arg1) \ +- : "memory", "cc", "at", "v1", "hi", "lo", \ +- "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \ +- ); \ +- _arg4 ? -_num : _num; \ +-}) +- +-#define my_syscall2(num, arg1, arg2) \ +-({ \ +- register long _num asm("v0") = (num); \ +- register long _arg1 asm("a0") = (long)(arg1); \ +- register long _arg2 asm("a1") = (long)(arg2); \ +- register long _arg4 asm("a3"); \ +- \ +- asm volatile ( \ +- "addiu $sp, $sp, -32\n" \ +- "syscall\n" \ +- "addiu $sp, $sp, 32\n" \ +- : "=r"(_num), "=r"(_arg4) \ +- : "0"(_num), \ +- "r"(_arg1), "r"(_arg2) \ +- : "memory", "cc", "at", "v1", "hi", "lo", \ +- "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \ +- ); \ +- _arg4 ? -_num : _num; \ +-}) +- +-#define my_syscall3(num, arg1, arg2, arg3) \ +-({ \ +- register long _num asm("v0") = (num); \ +- register long _arg1 asm("a0") = (long)(arg1); \ +- register long _arg2 asm("a1") = (long)(arg2); \ +- register long _arg3 asm("a2") = (long)(arg3); \ +- register long _arg4 asm("a3"); \ +- \ +- asm volatile ( \ +- "addiu $sp, $sp, -32\n" \ +- "syscall\n" \ +- "addiu $sp, $sp, 32\n" \ +- : "=r"(_num), "=r"(_arg4) \ +- : "0"(_num), \ +- "r"(_arg1), "r"(_arg2), "r"(_arg3) \ +- : "memory", "cc", "at", "v1", "hi", "lo", \ +- "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \ +- ); \ +- _arg4 ? -_num : _num; \ +-}) +- +-#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +-({ \ +- register long _num asm("v0") = (num); \ +- register long _arg1 asm("a0") = (long)(arg1); \ +- register long _arg2 asm("a1") = (long)(arg2); \ +- register long _arg3 asm("a2") = (long)(arg3); \ +- register long _arg4 asm("a3") = (long)(arg4); \ +- \ +- asm volatile ( \ +- "addiu $sp, $sp, -32\n" \ +- "syscall\n" \ +- "addiu $sp, $sp, 32\n" \ +- : "=r" (_num), "=r"(_arg4) \ +- : "0"(_num), \ +- "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \ +- : "memory", "cc", "at", "v1", "hi", "lo", \ +- "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \ +- ); \ +- _arg4 ? -_num : _num; \ +-}) +- +-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +-({ \ +- register long _num asm("v0") = (num); \ +- register long _arg1 asm("a0") = (long)(arg1); \ +- register long _arg2 asm("a1") = (long)(arg2); \ +- register long _arg3 asm("a2") = (long)(arg3); \ +- register long _arg4 asm("a3") = (long)(arg4); \ +- register long _arg5 = (long)(arg5); \ +- \ +- asm volatile ( \ +- "addiu $sp, $sp, -32\n" \ +- "sw %7, 16($sp)\n" \ +- "syscall\n " \ +- "addiu $sp, $sp, 32\n" \ +- : "=r" (_num), "=r"(_arg4) \ +- : "0"(_num), \ +- "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \ +- : "memory", "cc", "at", "v1", "hi", "lo", \ +- "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \ +- ); \ +- _arg4 ? -_num : _num; \ +-}) +- +-/* startup code, note that it's called __start on MIPS */ +-asm(".section .text\n" +- ".set nomips16\n" +- ".global __start\n" +- ".set noreorder\n" +- ".option pic0\n" +- ".ent __start\n" +- "__start:\n" +- "lw $a0,($sp)\n" // argc was in the stack +- "addiu $a1, $sp, 4\n" // argv = sp + 4 +- "sll $a2, $a0, 2\n" // a2 = argc * 4 +- "add $a2, $a2, $a1\n" // envp = argv + 4*argc ... +- "addiu $a2, $a2, 4\n" // ... + 4 +- "li $t0, -8\n" +- "and $sp, $sp, $t0\n" // sp must be 8-byte aligned +- "addiu $sp,$sp,-16\n" // the callee expects to save a0..a3 there! +- "jal main\n" // main() returns the status code, we'll exit with it. +- "nop\n" // delayed slot - "and $a0, $v0, 0xff\n" // limit exit code to 8 bits -+ "move $a0, $v0\n" // retrieve 32-bit exit code from v0 - "li $v0, 4001\n" // NR_exit == 4001 - "syscall\n" - ".end __start\n" -@@ -1374,7 +1386,6 @@ asm(".section .text\n" - "add a2,a2,a1\n" // + argv - "andi sp,a1,-16\n" // sp must be 16-byte aligned - "call main\n" // main() returns the status code, we'll exit with it. +- "li $v0, 4001\n" // NR_exit == 4001 +- "syscall\n" +- ".end __start\n" +- ""); +- +-/* fcntl / open */ +-#define O_RDONLY 0 +-#define O_WRONLY 1 +-#define O_RDWR 2 +-#define O_APPEND 0x0008 +-#define O_NONBLOCK 0x0080 +-#define O_CREAT 0x0100 +-#define O_TRUNC 0x0200 +-#define O_EXCL 0x0400 +-#define O_NOCTTY 0x0800 +-#define O_DIRECTORY 0x10000 +- +-/* The struct returned by the stat() syscall. 88 bytes are returned by the +- * syscall. +- */ +-struct sys_stat_struct { +- unsigned int st_dev; +- long st_pad1[3]; +- unsigned long st_ino; +- unsigned int st_mode; +- unsigned int st_nlink; +- unsigned int st_uid; +- unsigned int st_gid; +- unsigned int st_rdev; +- long st_pad2[2]; +- long st_size; +- long st_pad3; +- long st_atime; +- long st_atime_nsec; +- long st_mtime; +- long st_mtime_nsec; +- long st_ctime; +- long st_ctime_nsec; +- long st_blksize; +- long st_blocks; +- long st_pad4[14]; +-}; +- +-#elif defined(__riscv) +- +-#if __riscv_xlen == 64 +-#define PTRLOG "3" +-#define SZREG "8" +-#elif __riscv_xlen == 32 +-#define PTRLOG "2" +-#define SZREG "4" +-#endif +- +-/* Syscalls for RISCV : +- * - stack is 16-byte aligned +- * - syscall number is passed in a7 +- * - arguments are in a0, a1, a2, a3, a4, a5 +- * - the system call is performed by calling ecall +- * - syscall return comes in a0 +- * - the arguments are cast to long and assigned into the target +- * registers which are then simply passed as registers to the asm code, +- * so that we don't have to experience issues with register constraints. +- */ +- +-#define my_syscall0(num) \ +-({ \ +- register long _num asm("a7") = (num); \ +- register long _arg1 asm("a0"); \ +- \ +- asm volatile ( \ +- "ecall\n\t" \ +- : "=r"(_arg1) \ +- : "r"(_num) \ +- : "memory", "cc" \ +- ); \ +- _arg1; \ +-}) +- +-#define my_syscall1(num, arg1) \ +-({ \ +- register long _num asm("a7") = (num); \ +- register long _arg1 asm("a0") = (long)(arg1); \ +- \ +- asm volatile ( \ +- "ecall\n" \ +- : "+r"(_arg1) \ +- : "r"(_num) \ +- : "memory", "cc" \ +- ); \ +- _arg1; \ +-}) +- +-#define my_syscall2(num, arg1, arg2) \ +-({ \ +- register long _num asm("a7") = (num); \ +- register long _arg1 asm("a0") = (long)(arg1); \ +- register long _arg2 asm("a1") = (long)(arg2); \ +- \ +- asm volatile ( \ +- "ecall\n" \ +- : "+r"(_arg1) \ +- : "r"(_arg2), \ +- "r"(_num) \ +- : "memory", "cc" \ +- ); \ +- _arg1; \ +-}) +- +-#define my_syscall3(num, arg1, arg2, arg3) \ +-({ \ +- register long _num asm("a7") = (num); \ +- register long _arg1 asm("a0") = (long)(arg1); \ +- register long _arg2 asm("a1") = (long)(arg2); \ +- register long _arg3 asm("a2") = (long)(arg3); \ +- \ +- asm volatile ( \ +- "ecall\n\t" \ +- : "+r"(_arg1) \ +- : "r"(_arg2), "r"(_arg3), \ +- "r"(_num) \ +- : "memory", "cc" \ +- ); \ +- _arg1; \ +-}) +- +-#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +-({ \ +- register long _num asm("a7") = (num); \ +- register long _arg1 asm("a0") = (long)(arg1); \ +- register long _arg2 asm("a1") = (long)(arg2); \ +- register long _arg3 asm("a2") = (long)(arg3); \ +- register long _arg4 asm("a3") = (long)(arg4); \ +- \ +- asm volatile ( \ +- "ecall\n" \ +- : "+r"(_arg1) \ +- : "r"(_arg2), "r"(_arg3), "r"(_arg4), \ +- "r"(_num) \ +- : "memory", "cc" \ +- ); \ +- _arg1; \ +-}) +- +-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +-({ \ +- register long _num asm("a7") = (num); \ +- register long _arg1 asm("a0") = (long)(arg1); \ +- register long _arg2 asm("a1") = (long)(arg2); \ +- register long _arg3 asm("a2") = (long)(arg3); \ +- register long _arg4 asm("a3") = (long)(arg4); \ +- register long _arg5 asm("a4") = (long)(arg5); \ +- \ +- asm volatile ( \ +- "ecall\n" \ +- : "+r"(_arg1) \ +- : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ +- "r"(_num) \ +- : "memory", "cc" \ +- ); \ +- _arg1; \ +-}) +- +-#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +-({ \ +- register long _num asm("a7") = (num); \ +- register long _arg1 asm("a0") = (long)(arg1); \ +- register long _arg2 asm("a1") = (long)(arg2); \ +- register long _arg3 asm("a2") = (long)(arg3); \ +- register long _arg4 asm("a3") = (long)(arg4); \ +- register long _arg5 asm("a4") = (long)(arg5); \ +- register long _arg6 asm("a5") = (long)(arg6); \ +- \ +- asm volatile ( \ +- "ecall\n" \ +- : "+r"(_arg1) \ +- : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \ +- "r"(_num) \ +- : "memory", "cc" \ +- ); \ +- _arg1; \ +-}) +- +-/* startup code */ +-asm(".section .text\n" +- ".global _start\n" +- "_start:\n" +- ".option push\n" +- ".option norelax\n" +- "lla gp, __global_pointer$\n" +- ".option pop\n" +- "ld a0, 0(sp)\n" // argc (a0) was in the stack +- "add a1, sp, "SZREG"\n" // argv (a1) = sp +- "slli a2, a0, "PTRLOG"\n" // envp (a2) = SZREG*argc ... +- "add a2, a2, "SZREG"\n" // + SZREG (skip null) +- "add a2,a2,a1\n" // + argv +- "andi sp,a1,-16\n" // sp must be 16-byte aligned +- "call main\n" // main() returns the status code, we'll exit with it. - "andi a0, a0, 0xff\n" // limit exit code to 8 bits - "li a7, 93\n" // NR_exit == 93 - "ecall\n" - ""); +- "li a7, 93\n" // NR_exit == 93 +- "ecall\n" +- ""); +- +-/* fcntl / open */ +-#define O_RDONLY 0 +-#define O_WRONLY 1 +-#define O_RDWR 2 +-#define O_CREAT 0x100 +-#define O_EXCL 0x200 +-#define O_NOCTTY 0x400 +-#define O_TRUNC 0x1000 +-#define O_APPEND 0x2000 +-#define O_NONBLOCK 0x4000 +-#define O_DIRECTORY 0x200000 +- +-struct sys_stat_struct { +- unsigned long st_dev; /* Device. */ +- unsigned long st_ino; /* File serial number. */ +- unsigned int st_mode; /* File mode. */ +- unsigned int st_nlink; /* Link count. */ +- unsigned int st_uid; /* User ID of the file's owner. */ +- unsigned int st_gid; /* Group ID of the file's group. */ +- unsigned long st_rdev; /* Device number, if device. */ +- unsigned long __pad1; +- long st_size; /* Size of file, in bytes. */ +- int st_blksize; /* Optimal block size for I/O. */ +- int __pad2; +- long st_blocks; /* Number 512-byte blocks allocated. */ +- long st_atime; /* Time of last access. */ +- unsigned long st_atime_nsec; +- long st_mtime; /* Time of last modification. */ +- unsigned long st_mtime_nsec; +- long st_ctime; /* Time of last status change. */ +- unsigned long st_ctime_nsec; +- unsigned int __unused4; +- unsigned int __unused5; +-}; +- +-#endif +- + + /* Below are the C functions used to declare the raw syscalls. They try to be + * architecture-agnostic, and return either a success or -errno. Declaring them +@@ -2397,9 +1093,9 @@ static __attribute__((unused)) + int memcmp(const void *s1, const void *s2, size_t n) + { + size_t ofs = 0; +- char c1 = 0; ++ int c1 = 0; + +- while (ofs < n && !(c1 = ((char *)s1)[ofs] - ((char *)s2)[ofs])) { ++ while (ofs < n && !(c1 = ((unsigned char *)s1)[ofs] - ((unsigned char *)s2)[ofs])) { + ofs++; + } + return c1; +diff --git a/tools/include/nolibc/std.h b/tools/include/nolibc/std.h +new file mode 100644 +index 0000000000000..1747ae1253920 +--- /dev/null ++++ b/tools/include/nolibc/std.h +@@ -0,0 +1,49 @@ ++/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ ++/* ++ * Standard definitions and types for NOLIBC ++ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> ++ */ ++ ++#ifndef _NOLIBC_STD_H ++#define _NOLIBC_STD_H ++ ++/* Declare a few quite common macros and types that usually are in stdlib.h, ++ * stdint.h, ctype.h, unistd.h and a few other common locations. Please place ++ * integer type definitions and generic macros here, but avoid OS-specific and ++ * syscall-specific stuff, as this file is expected to be included very early. ++ */ ++ ++/* note: may already be defined */ ++#ifndef NULL ++#define NULL ((void *)0) ++#endif ++ ++/* stdint types */ ++typedef unsigned char uint8_t; ++typedef signed char int8_t; ++typedef unsigned short uint16_t; ++typedef signed short int16_t; ++typedef unsigned int uint32_t; ++typedef signed int int32_t; ++typedef unsigned long long uint64_t; ++typedef signed long long int64_t; ++typedef unsigned long size_t; ++typedef signed long ssize_t; ++typedef unsigned long uintptr_t; ++typedef signed long intptr_t; ++typedef signed long ptrdiff_t; ++ ++/* those are commonly provided by sys/types.h */ ++typedef unsigned int dev_t; ++typedef unsigned long ino_t; ++typedef unsigned int mode_t; ++typedef signed int pid_t; ++typedef unsigned int uid_t; ++typedef unsigned int gid_t; ++typedef unsigned long nlink_t; ++typedef signed long off_t; ++typedef signed long blksize_t; ++typedef signed long blkcnt_t; ++typedef signed long time_t; ++ ++#endif /* _NOLIBC_STD_H */ +diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h +new file mode 100644 +index 0000000000000..2f09abaf95f19 +--- /dev/null ++++ b/tools/include/nolibc/types.h +@@ -0,0 +1,133 @@ ++/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ ++/* ++ * Special types used by various syscalls for NOLIBC ++ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> ++ */ ++ ++#ifndef _NOLIBC_TYPES_H ++#define _NOLIBC_TYPES_H ++ ++#include "std.h" ++#include <linux/time.h> ++ ++ ++/* Only the generic macros and types may be defined here. The arch-specific ++ * ones such as the O_RDONLY and related macros used by fcntl() and open(), or ++ * the layout of sys_stat_struct must not be defined here. ++ */ ++ ++/* stat flags (WARNING, octal here) */ ++#define S_IFDIR 0040000 ++#define S_IFCHR 0020000 ++#define S_IFBLK 0060000 ++#define S_IFREG 0100000 ++#define S_IFIFO 0010000 ++#define S_IFLNK 0120000 ++#define S_IFSOCK 0140000 ++#define S_IFMT 0170000 ++ ++#define S_ISDIR(mode) (((mode) & S_IFDIR) == S_IFDIR) ++#define S_ISCHR(mode) (((mode) & S_IFCHR) == S_IFCHR) ++#define S_ISBLK(mode) (((mode) & S_IFBLK) == S_IFBLK) ++#define S_ISREG(mode) (((mode) & S_IFREG) == S_IFREG) ++#define S_ISFIFO(mode) (((mode) & S_IFIFO) == S_IFIFO) ++#define S_ISLNK(mode) (((mode) & S_IFLNK) == S_IFLNK) ++#define S_ISSOCK(mode) (((mode) & S_IFSOCK) == S_IFSOCK) ++ ++/* dirent types */ ++#define DT_UNKNOWN 0x0 ++#define DT_FIFO 0x1 ++#define DT_CHR 0x2 ++#define DT_DIR 0x4 ++#define DT_BLK 0x6 ++#define DT_REG 0x8 ++#define DT_LNK 0xa ++#define DT_SOCK 0xc ++ ++/* commonly an fd_set represents 256 FDs */ ++#define FD_SETSIZE 256 ++ ++/* Special FD used by all the *at functions */ ++#ifndef AT_FDCWD ++#define AT_FDCWD (-100) ++#endif ++ ++/* whence values for lseek() */ ++#define SEEK_SET 0 ++#define SEEK_CUR 1 ++#define SEEK_END 2 ++ ++/* cmd for reboot() */ ++#define LINUX_REBOOT_MAGIC1 0xfee1dead ++#define LINUX_REBOOT_MAGIC2 0x28121969 ++#define LINUX_REBOOT_CMD_HALT 0xcdef0123 ++#define LINUX_REBOOT_CMD_POWER_OFF 0x4321fedc ++#define LINUX_REBOOT_CMD_RESTART 0x01234567 ++#define LINUX_REBOOT_CMD_SW_SUSPEND 0xd000fce2 ++ ++/* Macros used on waitpid()'s return status */ ++#define WEXITSTATUS(status) (((status) & 0xff00) >> 8) ++#define WIFEXITED(status) (((status) & 0x7f) == 0) ++ ++ ++/* for select() */ ++typedef struct { ++ uint32_t fd32[FD_SETSIZE / 32]; ++} fd_set; ++ ++/* for poll() */ ++struct pollfd { ++ int fd; ++ short int events; ++ short int revents; ++}; ++ ++/* for getdents64() */ ++struct linux_dirent64 { ++ uint64_t d_ino; ++ int64_t d_off; ++ unsigned short d_reclen; ++ unsigned char d_type; ++ char d_name[]; ++}; ++ ++/* needed by wait4() */ ++struct rusage { ++ struct timeval ru_utime; ++ struct timeval ru_stime; ++ long ru_maxrss; ++ long ru_ixrss; ++ long ru_idrss; ++ long ru_isrss; ++ long ru_minflt; ++ long ru_majflt; ++ long ru_nswap; ++ long ru_inblock; ++ long ru_oublock; ++ long ru_msgsnd; ++ long ru_msgrcv; ++ long ru_nsignals; ++ long ru_nvcsw; ++ long ru_nivcsw; ++}; ++ ++/* The format of the struct as returned by the libc to the application, which ++ * significantly differs from the format returned by the stat() syscall flavours. ++ */ ++struct stat { ++ dev_t st_dev; /* ID of device containing file */ ++ ino_t st_ino; /* inode number */ ++ mode_t st_mode; /* protection */ ++ nlink_t st_nlink; /* number of hard links */ ++ uid_t st_uid; /* user ID of owner */ ++ gid_t st_gid; /* group ID of owner */ ++ dev_t st_rdev; /* device ID (if special file) */ ++ off_t st_size; /* total size, in bytes */ ++ blksize_t st_blksize; /* blocksize for file system I/O */ ++ blkcnt_t st_blocks; /* number of 512B blocks allocated */ ++ time_t st_atime; /* time of last access */ ++ time_t st_mtime; /* time of last modification */ ++ time_t st_ctime; /* time of last status change */ ++}; ++ ++#endif /* _NOLIBC_TYPES_H */ diff --git a/tools/include/uapi/asm/errno.h b/tools/include/uapi/asm/errno.h index d30439b4b8ab4..869379f91fe48 100644 --- a/tools/include/uapi/asm/errno.h @@ -435370,6 +560902,26 @@ index 2401fad090c52..bfd1ce9fe2110 100644 } int bpf_map_delete_elem(int fd, const void *key) +diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h +index 6fffb3cdf39b9..49bd43b998c8a 100644 +--- a/tools/lib/bpf/bpf.h ++++ b/tools/lib/bpf/bpf.h +@@ -249,8 +249,15 @@ LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, + __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, + __u64 *probe_offset, __u64 *probe_addr); + ++#ifdef __cplusplus ++/* forward-declaring enums in C++ isn't compatible with pure C enums, so ++ * instead define bpf_enable_stats() as accepting int as an input ++ */ ++LIBBPF_API int bpf_enable_stats(int type); ++#else + enum bpf_stats_type; /* defined in up-to-date linux/bpf.h */ + LIBBPF_API int bpf_enable_stats(enum bpf_stats_type type); ++#endif + + struct bpf_prog_bind_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index 09ebe3db5f2f8..e4aa9996a5501 100644 --- a/tools/lib/bpf/bpf_core_read.h @@ -435384,7 +560936,7 @@ index 09ebe3db5f2f8..e4aa9996a5501 100644 bpf_probe_read_kernel( \ (void *)dst, \ diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c -index 77dc24d58302d..5f3d20ae66d56 100644 +index 77dc24d58302d..3ed759f53e7c2 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -231,17 +231,23 @@ static int btf_parse_hdr(struct btf *btf) @@ -435468,7 +561020,21 @@ index 77dc24d58302d..5f3d20ae66d56 100644 * referenced types equivalence is checked separately. */ static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2) -@@ -3722,6 +3730,31 @@ static int btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2) +@@ -3710,18 +3718,45 @@ static inline __u16 btf_fwd_kind(struct btf_type *t) + } + + /* Check if given two types are identical ARRAY definitions */ +-static int btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2) ++static bool btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2) + { + struct btf_type *t1, *t2; + + t1 = btf_type_by_id(d->btf, id1); + t2 = btf_type_by_id(d->btf, id2); + if (!btf_is_array(t1) || !btf_is_array(t2)) +- return 0; ++ return false; + return btf_equal_array(t1, t2); } @@ -435491,7 +561057,9 @@ index 77dc24d58302d..5f3d20ae66d56 100644 + m1 = btf_members(t1); + m2 = btf_members(t2); + for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) { -+ if (m1->type != m2->type) ++ if (m1->type != m2->type && ++ !btf_dedup_identical_arrays(d, m1->type, m2->type) && ++ !btf_dedup_identical_structs(d, m1->type, m2->type)) + return false; + } + return true; @@ -435500,7 +561068,7 @@ index 77dc24d58302d..5f3d20ae66d56 100644 /* * Check equivalence of BTF type graph formed by candidate struct/union (we'll * call it "candidate graph" in this description for brevity) to a type graph -@@ -3833,6 +3866,8 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, +@@ -3833,6 +3868,8 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, hypot_type_id = d->hypot_map[canon_id]; if (hypot_type_id <= BTF_MAX_NR_TYPES) { @@ -435509,7 +561077,7 @@ index 77dc24d58302d..5f3d20ae66d56 100644 /* In some cases compiler will generate different DWARF types * for *identical* array type definitions and use them for * different fields within the *same* struct. This breaks type -@@ -3841,8 +3876,18 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, +@@ -3841,8 +3878,18 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, * types within a single CU. So work around that by explicitly * allowing identical array types here. */ @@ -435544,10 +561112,64 @@ index 4a711f990904b..b0ee338a0cc87 100644 LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size); diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c -index e4b483f15fb99..841cc68e3f427 100644 +index e4b483f15fb99..f620911ad3bb5 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c -@@ -1481,6 +1481,11 @@ static const char *btf_dump_resolve_name(struct btf_dump *d, __u32 id, +@@ -215,6 +215,17 @@ static int btf_dump_resize(struct btf_dump *d) + return 0; + } + ++static void btf_dump_free_names(struct hashmap *map) ++{ ++ size_t bkt; ++ struct hashmap_entry *cur; ++ ++ hashmap__for_each_entry(map, cur, bkt) ++ free((void *)cur->key); ++ ++ hashmap__free(map); ++} ++ + void btf_dump__free(struct btf_dump *d) + { + int i; +@@ -233,8 +244,8 @@ void btf_dump__free(struct btf_dump *d) + free(d->cached_names); + free(d->emit_queue); + free(d->decl_stack); +- hashmap__free(d->type_names); +- hashmap__free(d->ident_names); ++ btf_dump_free_names(d->type_names); ++ btf_dump_free_names(d->ident_names); + + free(d); + } +@@ -1457,11 +1468,23 @@ static void btf_dump_emit_type_cast(struct btf_dump *d, __u32 id, + static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map, + const char *orig_name) + { ++ char *old_name, *new_name; + size_t dup_cnt = 0; ++ int err; ++ ++ new_name = strdup(orig_name); ++ if (!new_name) ++ return 1; + + hashmap__find(name_map, orig_name, (void **)&dup_cnt); + dup_cnt++; +- hashmap__set(name_map, orig_name, (void *)dup_cnt, NULL, NULL); ++ ++ err = hashmap__set(name_map, new_name, (void *)dup_cnt, ++ (const void **)&old_name, NULL); ++ if (err) ++ free(new_name); ++ ++ free(old_name); + + return dup_cnt; + } +@@ -1481,6 +1504,11 @@ static const char *btf_dump_resolve_name(struct btf_dump *d, __u32 id, if (s->name_resolved) return *cached_name ? *cached_name : orig_name; @@ -435559,7 +561181,7 @@ index e4b483f15fb99..841cc68e3f427 100644 dup_cnt = btf_dump_name_dups(d, name_map, orig_name); if (dup_cnt > 1) { const size_t max_len = 256; -@@ -1829,14 +1834,16 @@ static int btf_dump_array_data(struct btf_dump *d, +@@ -1829,14 +1857,16 @@ static int btf_dump_array_data(struct btf_dump *d, { const struct btf_array *array = btf_array(t); const struct btf_type *elem_type; @@ -435578,7 +561200,16 @@ index e4b483f15fb99..841cc68e3f427 100644 return -EINVAL; } -@@ -2186,7 +2193,7 @@ static int btf_dump_dump_type_data(struct btf_dump *d, +@@ -1885,7 +1915,7 @@ static int btf_dump_struct_data(struct btf_dump *d, + { + const struct btf_member *m = btf_members(t); + __u16 n = btf_vlen(t); +- int i, err; ++ int i, err = 0; + + /* note that we increment depth before calling btf_dump_print() below; + * this is intentional. btf_dump_data_newline() will not print a +@@ -2186,7 +2216,7 @@ static int btf_dump_dump_type_data(struct btf_dump *d, __u8 bits_offset, __u8 bit_sz) { @@ -435614,7 +561245,7 @@ index 8df718a6b142d..4435c09fe132f 100644 /* remember prog_fd in the stack, if successful */ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c -index e4f83c304ec92..050622649797c 100644 +index e4f83c304ec92..f87a15bbf53b3 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2993,6 +2993,12 @@ static int bpf_object__elf_collect(struct bpf_object *obj) @@ -435630,7 +561261,17 @@ index e4f83c304ec92..050622649797c 100644 scn = NULL; while ((scn = elf_nextscn(elf, scn)) != NULL) { idx++; -@@ -3936,7 +3942,7 @@ static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info) +@@ -3757,6 +3763,9 @@ static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj, + int l = 0, r = obj->nr_programs - 1, m; + struct bpf_program *prog; + ++ if (!obj->nr_programs) ++ return NULL; ++ + while (l < r) { + m = l + (r - l + 1) / 2; + prog = &obj->programs[m]; +@@ -3936,7 +3945,7 @@ static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info) int bpf_map__reuse_fd(struct bpf_map *map, int fd) { struct bpf_map_info info = {}; @@ -435639,7 +561280,7 @@ index e4f83c304ec92..050622649797c 100644 int new_fd, err; char *new_name; -@@ -3946,7 +3952,12 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd) +@@ -3946,7 +3955,12 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd) if (err) return libbpf_err(err); @@ -435653,7 +561294,7 @@ index e4f83c304ec92..050622649797c 100644 if (!new_name) return libbpf_err(-errno); -@@ -5132,7 +5143,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog, +@@ -5132,7 +5146,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog, * relocated, so it's enough to just subtract in-section offset */ insn_idx = insn_idx - prog->sec_insn_off; @@ -435662,7 +561303,7 @@ index e4f83c304ec92..050622649797c 100644 return -EINVAL; insn = &prog->insns[insn_idx]; -@@ -5215,9 +5226,10 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) +@@ -5215,9 +5229,10 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) */ prog = NULL; for (i = 0; i < obj->nr_programs; i++) { @@ -435675,7 +561316,7 @@ index e4f83c304ec92..050622649797c 100644 } if (!prog) { pr_warn("sec '%s': failed to find a BPF program\n", sec_name); -@@ -5232,10 +5244,17 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) +@@ -5232,10 +5247,17 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) insn_idx = rec->insn_off / BPF_INSN_SZ; prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx); if (!prog) { @@ -435697,7 +561338,7 @@ index e4f83c304ec92..050622649797c 100644 } /* no need to apply CO-RE relocation if the program is * not going to be loaded -@@ -8670,7 +8689,10 @@ int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd) +@@ -8670,7 +8692,10 @@ int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd) pr_warn("error: inner_map_fd already specified\n"); return libbpf_err(-EINVAL); } @@ -435709,7 +561350,7 @@ index e4f83c304ec92..050622649797c 100644 map->inner_map_fd = fd; return 0; } -@@ -10800,6 +10822,9 @@ void bpf_object__detach_skeleton(struct bpf_object_skeleton *s) +@@ -10800,6 +10825,9 @@ void bpf_object__detach_skeleton(struct bpf_object_skeleton *s) void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s) { @@ -435719,6 +561360,19 @@ index e4f83c304ec92..050622649797c 100644 if (s->progs) bpf_object__detach_skeleton(s); if (s->obj) +diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c +index cd8c703dde718..8f425473ccaa8 100644 +--- a/tools/lib/bpf/libbpf_probes.c ++++ b/tools/lib/bpf/libbpf_probes.c +@@ -245,7 +245,7 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) + case BPF_MAP_TYPE_RINGBUF: + key_size = 0; + value_size = 0; +- max_entries = 4096; ++ max_entries = sysconf(_SC_PAGE_SIZE); + break; + case BPF_MAP_TYPE_UNSPEC: + case BPF_MAP_TYPE_HASH: diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 2df880cefdaee..6b2f59ddb6918 100644 --- a/tools/lib/bpf/linker.c @@ -435864,6 +561518,43 @@ index 39f25e09b51e2..fadde7d80a51c 100644 return ret; } +diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c +index 8bc117bcc7bcd..c42ba9358d8ce 100644 +--- a/tools/lib/bpf/ringbuf.c ++++ b/tools/lib/bpf/ringbuf.c +@@ -59,6 +59,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, + __u32 len = sizeof(info); + struct epoll_event *e; + struct ring *r; ++ __u64 mmap_sz; + void *tmp; + int err; + +@@ -97,8 +98,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, + r->mask = info.max_entries - 1; + + /* Map writable consumer page */ +- tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED, +- map_fd, 0); ++ tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, 0); + if (tmp == MAP_FAILED) { + err = -errno; + pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n", +@@ -111,8 +111,12 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, + * data size to allow simple reading of samples that wrap around the + * end of a ring buffer. See kernel implementation for details. + * */ +- tmp = mmap(NULL, rb->page_size + 2 * info.max_entries, PROT_READ, +- MAP_SHARED, map_fd, rb->page_size); ++ mmap_sz = rb->page_size + 2 * (__u64)info.max_entries; ++ if (mmap_sz != (__u64)(size_t)mmap_sz) { ++ pr_warn("ringbuf: ring buffer size (%u) is too big\n", info.max_entries); ++ return libbpf_err(-E2BIG); ++ } ++ tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ, MAP_SHARED, map_fd, rb->page_size); + if (tmp == MAP_FAILED) { + err = -errno; + ringbuf_unmap_ring(rb, r); diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h index b22b50c1b173e..9cf66702fa8dd 100644 --- a/tools/lib/bpf/skel_internal.h @@ -435990,6 +561681,21 @@ index 794a375dad360..b2aec04fce8f6 100644 return ret; } +diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile +index 92ce4fce7bc73..549acc5859e9e 100644 +--- a/tools/objtool/Makefile ++++ b/tools/objtool/Makefile +@@ -19,8 +19,8 @@ LIBSUBCMD = $(LIBSUBCMD_OUTPUT)libsubcmd.a + OBJTOOL := $(OUTPUT)objtool + OBJTOOL_IN := $(OBJTOOL)-in.o + +-LIBELF_FLAGS := $(shell pkg-config libelf --cflags 2>/dev/null) +-LIBELF_LIBS := $(shell pkg-config libelf --libs 2>/dev/null || echo -lelf) ++LIBELF_FLAGS := $(shell $(HOSTPKG_CONFIG) libelf --cflags 2>/dev/null) ++LIBELF_LIBS := $(shell $(HOSTPKG_CONFIG) libelf --libs 2>/dev/null || echo -lelf) + + all: $(OBJTOOL) + diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 0893436cc09f8..f62db0e006e9c 100644 --- a/tools/objtool/arch/x86/decode.c @@ -436223,7 +561929,7 @@ index 8b38b5d6fec7b..35081fe373203 100644 }; diff --git a/tools/objtool/check.c b/tools/objtool/check.c -index 06b5c164ae931..72e5d23f1ad88 100644 +index 06b5c164ae931..758c0ba8de350 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -5,6 +5,8 @@ @@ -436248,15 +561954,36 @@ index 06b5c164ae931..72e5d23f1ad88 100644 struct instruction *find_insn(struct objtool_file *file, struct section *sec, unsigned long offset) -@@ -173,6 +179,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, - "rewind_stack_do_exit", +@@ -163,6 +169,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, + "panic", + "do_exit", + "do_task_dead", ++ "make_task_dead", + "__module_put_and_exit", + "complete_and_exit", + "__reiserfs_panic", +@@ -170,9 +177,10 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, + "fortify_panic", + "usercopy_abort", + "machine_real_restart", +- "rewind_stack_do_exit", ++ "rewind_stack_and_make_dead", "kunit_try_catch_throw", "xen_start_kernel", + "cpu_bringup_and_idle", }; if (!func) -@@ -265,6 +272,78 @@ static void init_insn_state(struct insn_state *state, struct section *sec) +@@ -190,7 +198,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, + return false; + + insn = find_insn(file, func->sec, func->offset); +- if (!insn->func) ++ if (!insn || !insn->func) + return false; + + func_for_each_insn(file, func, insn) { +@@ -265,6 +273,78 @@ static void init_insn_state(struct insn_state *state, struct section *sec) state->noinstr = sec->noinstr; } @@ -436335,7 +562062,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 /* * Call the arch-specific instruction decoder for all the instructions and add * them to the global instruction list. -@@ -275,7 +354,6 @@ static int decode_instructions(struct objtool_file *file) +@@ -275,7 +355,6 @@ static int decode_instructions(struct objtool_file *file) struct symbol *func; unsigned long offset; struct instruction *insn; @@ -436343,7 +562070,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 int ret; for_each_sec(file, sec) { -@@ -289,7 +367,8 @@ static int decode_instructions(struct objtool_file *file) +@@ -289,7 +368,8 @@ static int decode_instructions(struct objtool_file *file) sec->text = true; if (!strcmp(sec->name, ".noinstr.text") || @@ -436353,7 +562080,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 sec->noinstr = true; for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) { -@@ -301,7 +380,6 @@ static int decode_instructions(struct objtool_file *file) +@@ -301,7 +381,6 @@ static int decode_instructions(struct objtool_file *file) memset(insn, 0, sizeof(*insn)); INIT_LIST_HEAD(&insn->alts); INIT_LIST_HEAD(&insn->stack_ops); @@ -436361,7 +562088,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 insn->sec = sec; insn->offset = offset; -@@ -392,12 +470,12 @@ static int add_dead_ends(struct objtool_file *file) +@@ -392,12 +471,12 @@ static int add_dead_ends(struct objtool_file *file) else if (reloc->addend == reloc->sym->sec->sh.sh_size) { insn = find_last_insn(file, reloc->sym->sec); if (!insn) { @@ -436376,7 +562103,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 reloc->sym->sec->name, reloc->addend); return -1; } -@@ -427,12 +505,12 @@ reachable: +@@ -427,12 +506,12 @@ reachable: else if (reloc->addend == reloc->sym->sec->sh.sh_size) { insn = find_last_insn(file, reloc->sym->sec); if (!insn) { @@ -436391,7 +562118,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 reloc->sym->sec->name, reloc->addend); return -1; } -@@ -531,6 +609,98 @@ static int create_static_call_sections(struct objtool_file *file) +@@ -531,6 +610,98 @@ static int create_static_call_sections(struct objtool_file *file) return 0; } @@ -436490,7 +562217,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 static int create_mcount_loc_sections(struct objtool_file *file) { struct section *sec; -@@ -549,7 +719,7 @@ static int create_mcount_loc_sections(struct objtool_file *file) +@@ -549,7 +720,7 @@ static int create_mcount_loc_sections(struct objtool_file *file) return 0; idx = 0; @@ -436499,7 +562226,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 idx++; sec = elf_create_section(file->elf, "__mcount_loc", 0, sizeof(unsigned long), idx); -@@ -557,7 +727,7 @@ static int create_mcount_loc_sections(struct objtool_file *file) +@@ -557,7 +728,7 @@ static int create_mcount_loc_sections(struct objtool_file *file) return -1; idx = 0; @@ -436508,7 +562235,24 @@ index 06b5c164ae931..72e5d23f1ad88 100644 loc = (unsigned long *)sec->data->d_buf + idx; memset(loc, 0, sizeof(unsigned long)); -@@ -809,6 +979,11 @@ __weak bool arch_is_retpoline(struct symbol *sym) +@@ -676,6 +847,16 @@ static const char *uaccess_safe_builtin[] = { + "__tsan_read_write4", + "__tsan_read_write8", + "__tsan_read_write16", ++ "__tsan_volatile_read1", ++ "__tsan_volatile_read2", ++ "__tsan_volatile_read4", ++ "__tsan_volatile_read8", ++ "__tsan_volatile_read16", ++ "__tsan_volatile_write1", ++ "__tsan_volatile_write2", ++ "__tsan_volatile_write4", ++ "__tsan_volatile_write8", ++ "__tsan_volatile_write16", + "__tsan_atomic8_load", + "__tsan_atomic16_load", + "__tsan_atomic32_load", +@@ -809,6 +990,11 @@ __weak bool arch_is_retpoline(struct symbol *sym) return false; } @@ -436520,7 +562264,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 #define NEGATIVE_RELOC ((void *)-1L) static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn) -@@ -828,6 +1003,162 @@ static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *i +@@ -828,6 +1014,162 @@ static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *i return insn->reloc; } @@ -436683,7 +562427,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 /* * Find the destination instructions for all jumps. */ -@@ -849,28 +1180,15 @@ static int add_jump_destinations(struct objtool_file *file) +@@ -849,28 +1191,15 @@ static int add_jump_destinations(struct objtool_file *file) } else if (reloc->sym->type == STT_SECTION) { dest_sec = reloc->sym->sec; dest_off = arch_dest_reloc_offset(reloc->addend); @@ -436718,7 +562462,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 continue; } else if (reloc->sym->sec->idx) { dest_sec = reloc->sym->sec; -@@ -883,6 +1201,7 @@ static int add_jump_destinations(struct objtool_file *file) +@@ -883,6 +1212,7 @@ static int add_jump_destinations(struct objtool_file *file) insn->jump_dest = find_insn(file, dest_sec, dest_off); if (!insn->jump_dest) { @@ -436726,7 +562470,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 /* * This is a special case where an alt instruction -@@ -892,6 +1211,19 @@ static int add_jump_destinations(struct objtool_file *file) +@@ -892,6 +1222,19 @@ static int add_jump_destinations(struct objtool_file *file) if (!strcmp(insn->sec->name, ".altinstr_replacement")) continue; @@ -436746,7 +562490,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 WARN_FUNC("can't find jump dest instruction at %s+0x%lx", insn->sec, insn->offset, dest_sec->name, dest_off); -@@ -926,13 +1258,8 @@ static int add_jump_destinations(struct objtool_file *file) +@@ -926,13 +1269,8 @@ static int add_jump_destinations(struct objtool_file *file) } else if (insn->jump_dest->func->pfunc != insn->func->pfunc && insn->jump_dest->offset == insn->jump_dest->func->offset) { @@ -436761,7 +562505,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 } } } -@@ -940,16 +1267,6 @@ static int add_jump_destinations(struct objtool_file *file) +@@ -940,16 +1278,6 @@ static int add_jump_destinations(struct objtool_file *file) return 0; } @@ -436778,7 +562522,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 static struct symbol *find_call_destination(struct section *sec, unsigned long offset) { struct symbol *call_dest; -@@ -968,6 +1285,7 @@ static int add_call_destinations(struct objtool_file *file) +@@ -968,6 +1296,7 @@ static int add_call_destinations(struct objtool_file *file) { struct instruction *insn; unsigned long dest_off; @@ -436786,7 +562530,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 struct reloc *reloc; for_each_insn(file, insn) { -@@ -977,7 +1295,9 @@ static int add_call_destinations(struct objtool_file *file) +@@ -977,7 +1306,9 @@ static int add_call_destinations(struct objtool_file *file) reloc = insn_reloc(file, insn); if (!reloc) { dest_off = arch_jump_destination(insn); @@ -436797,7 +562541,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 if (insn->ignore) continue; -@@ -995,9 +1315,8 @@ static int add_call_destinations(struct objtool_file *file) +@@ -995,9 +1326,8 @@ static int add_call_destinations(struct objtool_file *file) } else if (reloc->sym->type == STT_SECTION) { dest_off = arch_dest_reloc_offset(reloc->addend); @@ -436809,7 +562553,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 WARN_FUNC("can't find call dest symbol at %s+0x%lx", insn->sec, insn->offset, reloc->sym->sec->name, -@@ -1005,70 +1324,13 @@ static int add_call_destinations(struct objtool_file *file) +@@ -1005,70 +1335,13 @@ static int add_call_destinations(struct objtool_file *file) return -1; } @@ -436884,7 +562628,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 } return 0; -@@ -1136,7 +1398,6 @@ static int handle_group_alt(struct objtool_file *file, +@@ -1136,7 +1409,6 @@ static int handle_group_alt(struct objtool_file *file, memset(nop, 0, sizeof(*nop)); INIT_LIST_HEAD(&nop->alts); INIT_LIST_HEAD(&nop->stack_ops); @@ -436892,7 +562636,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 nop->sec = special_alt->new_sec; nop->offset = special_alt->new_off + special_alt->new_len; -@@ -1545,10 +1806,11 @@ static void set_func_state(struct cfi_state *state) +@@ -1545,10 +1817,11 @@ static void set_func_state(struct cfi_state *state) static int read_unwind_hints(struct objtool_file *file) { @@ -436905,7 +562649,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 int i; sec = find_section_by_name(file->elf, ".discard.unwind_hints"); -@@ -1585,20 +1847,49 @@ static int read_unwind_hints(struct objtool_file *file) +@@ -1585,20 +1858,49 @@ static int read_unwind_hints(struct objtool_file *file) insn->hint = true; @@ -436960,7 +562704,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 } return 0; -@@ -1627,8 +1918,10 @@ static int read_retpoline_hints(struct objtool_file *file) +@@ -1627,8 +1929,10 @@ static int read_retpoline_hints(struct objtool_file *file) } if (insn->type != INSN_JUMP_DYNAMIC && @@ -436973,7 +562717,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 insn->sec, insn->offset); return -1; } -@@ -1737,17 +2030,31 @@ static int read_intra_function_calls(struct objtool_file *file) +@@ -1737,17 +2041,31 @@ static int read_intra_function_calls(struct objtool_file *file) return 0; } @@ -437008,7 +562752,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 } } -@@ -1780,11 +2087,6 @@ static void mark_rodata(struct objtool_file *file) +@@ -1780,11 +2098,6 @@ static void mark_rodata(struct objtool_file *file) file->rodata = found; } @@ -437020,7 +562764,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 static int decode_sections(struct objtool_file *file) { int ret; -@@ -1809,7 +2111,7 @@ static int decode_sections(struct objtool_file *file) +@@ -1809,7 +2122,7 @@ static int decode_sections(struct objtool_file *file) /* * Must be before add_{jump_call}_destination. */ @@ -437029,7 +562773,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 if (ret) return ret; -@@ -1853,23 +2155,14 @@ static int decode_sections(struct objtool_file *file) +@@ -1853,23 +2166,14 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; @@ -437056,7 +562800,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 return true; return false; -@@ -2452,13 +2745,18 @@ static int propagate_alt_cfi(struct objtool_file *file, struct instruction *insn +@@ -2452,13 +2756,18 @@ static int propagate_alt_cfi(struct objtool_file *file, struct instruction *insn if (!insn->alt_group) return 0; @@ -437077,7 +562821,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 WARN_FUNC("stack layout conflict in alternatives", insn->sec, insn->offset); return -1; -@@ -2509,9 +2807,14 @@ static int handle_insn_ops(struct instruction *insn, +@@ -2509,9 +2818,14 @@ static int handle_insn_ops(struct instruction *insn, static bool insn_cfi_match(struct instruction *insn, struct cfi_state *cfi2) { @@ -437093,7 +562837,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 if (memcmp(&cfi1->cfa, &cfi2->cfa, sizeof(cfi1->cfa))) { WARN_FUNC("stack state mismatch: cfa1=%d%+d cfa2=%d%+d", -@@ -2696,7 +2999,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, +@@ -2696,7 +3010,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, struct instruction *insn, struct insn_state state) { struct alternative *alt; @@ -437102,7 +562846,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 struct section *sec; u8 visited; int ret; -@@ -2718,22 +3021,61 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, +@@ -2718,22 +3032,61 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, return 1; } @@ -437170,7 +562914,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 insn->visited |= visited; -@@ -2765,6 +3107,11 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, +@@ -2765,6 +3118,11 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, switch (insn->type) { case INSN_RETURN: @@ -437182,7 +562926,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 return validate_return(func, insn, &state); case INSN_CALL: -@@ -2808,6 +3155,13 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, +@@ -2808,6 +3166,13 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, break; case INSN_JUMP_DYNAMIC: @@ -437196,7 +562940,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 case INSN_JUMP_DYNAMIC_CONDITIONAL: if (is_sibling_call(insn)) { ret = validate_sibling_call(insn, &state); -@@ -2883,6 +3237,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, +@@ -2883,6 +3248,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, return 1; } @@ -437204,7 +562948,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 insn = next_insn; } -@@ -2922,6 +3277,145 @@ static int validate_unwind_hints(struct objtool_file *file, struct section *sec) +@@ -2922,6 +3288,145 @@ static int validate_unwind_hints(struct objtool_file *file, struct section *sec) return warnings; } @@ -437350,7 +563094,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 static int validate_retpoline(struct objtool_file *file) { struct instruction *insn; -@@ -2929,7 +3423,8 @@ static int validate_retpoline(struct objtool_file *file) +@@ -2929,7 +3434,8 @@ static int validate_retpoline(struct objtool_file *file) for_each_insn(file, insn) { if (insn->type != INSN_JUMP_DYNAMIC && @@ -437360,7 +563104,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 continue; if (insn->retpoline_safe) -@@ -2944,9 +3439,17 @@ static int validate_retpoline(struct objtool_file *file) +@@ -2944,9 +3450,17 @@ static int validate_retpoline(struct objtool_file *file) if (!strcmp(insn->sec->name, ".init.text") && !module) continue; @@ -437381,7 +563125,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 warnings++; } -@@ -2972,7 +3475,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio +@@ -2972,7 +3486,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio int i; struct instruction *prev_insn; @@ -437390,7 +563134,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 return true; /* -@@ -3138,10 +3641,20 @@ int check(struct objtool_file *file) +@@ -3138,10 +3652,20 @@ int check(struct objtool_file *file) int ret, warnings = 0; arch_initial_func_cfi_state(&initial_func_cfi); @@ -437411,7 +563155,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 warnings += ret; if (list_empty(&file->insn_list)) -@@ -3173,6 +3686,17 @@ int check(struct objtool_file *file) +@@ -3173,6 +3697,17 @@ int check(struct objtool_file *file) goto out; warnings += ret; @@ -437429,7 +563173,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 if (!warnings) { ret = validate_reachable_instructions(file); if (ret < 0) -@@ -3185,6 +3709,20 @@ int check(struct objtool_file *file) +@@ -3185,6 +3720,20 @@ int check(struct objtool_file *file) goto out; warnings += ret; @@ -437450,7 +563194,7 @@ index 06b5c164ae931..72e5d23f1ad88 100644 if (mcount) { ret = create_mcount_loc_sections(file); if (ret < 0) -@@ -3192,6 +3730,13 @@ int check(struct objtool_file *file) +@@ -3192,6 +3741,13 @@ int check(struct objtool_file *file) warnings += ret; } @@ -438082,6 +563826,18 @@ index 06c3eacab3d53..e2223dd91c379 100644 reloc_to_sec_off(new_reloc, &alt->new_sec, &alt->new_off); /* _ASM_EXTABLE_EX hack */ +diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore +index 8e0163b7ef016..cdb7a347ceb5e 100644 +--- a/tools/perf/.gitignore ++++ b/tools/perf/.gitignore +@@ -4,6 +4,7 @@ PERF-GUI-VARS + PERF-VERSION-FILE + FEATURE-DUMP + perf ++!include/perf/ + perf-read-vdso32 + perf-read-vdsox32 + perf-help diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 14e3e8d702a02..35e1f2a52435e 100644 --- a/tools/perf/Makefile.config @@ -438349,6 +564105,36 @@ index 207c56805c551..0ed177991ad05 100644 event_attr_init(&attr); fd = sys_perf_event_open(&attr, 0, -1, -1, 0); +diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h +index b3480bc33fe84..baa7c63014004 100644 +--- a/tools/perf/bench/bench.h ++++ b/tools/perf/bench/bench.h +@@ -10,25 +10,13 @@ extern struct timeval bench__start, bench__end, bench__runtime; + * The madvise transparent hugepage constants were added in glibc + * 2.13. For compatibility with older versions of glibc, define these + * tokens if they are not already defined. +- * +- * PA-RISC uses different madvise values from other architectures and +- * needs to be special-cased. + */ +-#ifdef __hppa__ +-# ifndef MADV_HUGEPAGE +-# define MADV_HUGEPAGE 67 +-# endif +-# ifndef MADV_NOHUGEPAGE +-# define MADV_NOHUGEPAGE 68 +-# endif +-#else + # ifndef MADV_HUGEPAGE + # define MADV_HUGEPAGE 14 + # endif + # ifndef MADV_NOHUGEPAGE + # define MADV_NOHUGEPAGE 15 + # endif +-#endif + + int bench_numa(int argc, const char **argv); + int bench_sched_messaging(int argc, const char **argv); diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 5d1fe9c35807a..137890f78e17a 100644 --- a/tools/perf/bench/futex-lock-pi.c @@ -438593,10 +564379,38 @@ index c32c2eb16d7df..c6c40191933d4 100644 if (!script->show_switch_events) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c -index f0ecfda34eceb..abf88a1ad455c 100644 +index f0ecfda34eceb..aad65c95c3711 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c -@@ -807,6 +807,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) +@@ -558,26 +558,14 @@ static int enable_counters(void) + return err; + } + +- if (stat_config.initial_delay < 0) { +- pr_info(EVLIST_DISABLED_MSG); +- return 0; +- } +- +- if (stat_config.initial_delay > 0) { +- pr_info(EVLIST_DISABLED_MSG); +- usleep(stat_config.initial_delay * USEC_PER_MSEC); +- } +- + /* + * We need to enable counters only if: + * - we don't have tracee (attaching to task or cpu) + * - we have initial delay configured + */ +- if (!target__none(&target) || stat_config.initial_delay) { ++ if (!target__none(&target)) { + if (!all_counters_use_bpf) + evlist__enable(evsel_list); +- if (stat_config.initial_delay > 0) +- pr_info(EVLIST_ENABLED_MSG); + } + return 0; + } +@@ -807,6 +795,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) return -1; evlist__for_each_entry(evsel_list, counter) { @@ -438604,18 +564418,158 @@ index f0ecfda34eceb..abf88a1ad455c 100644 if (bpf_counter__load(counter, &target)) return -1; if (!evsel__is_bpf(counter)) -@@ -956,10 +957,10 @@ try_again_reset: - * Enable counters and exec the command: - */ - if (forks) { -- evlist__start_workload(evsel_list); +@@ -952,18 +941,31 @@ try_again_reset: + return err; + } + +- /* +- * Enable counters and exec the command: +- */ +- if (forks) { ++ if (stat_config.initial_delay) { ++ pr_info(EVLIST_DISABLED_MSG); ++ } else { ++ err = enable_counters(); ++ if (err) ++ return -1; ++ } ++ ++ /* Exec the command, if any */ ++ if (forks) + evlist__start_workload(evsel_list); ++ ++ if (stat_config.initial_delay > 0) { ++ usleep(stat_config.initial_delay * USEC_PER_MSEC); err = enable_counters(); if (err) return -1; -+ evlist__start_workload(evsel_list); - t0 = rdclock(); - clock_gettime(CLOCK_MONOTONIC, &ref_time); +- t0 = rdclock(); +- clock_gettime(CLOCK_MONOTONIC, &ref_time); ++ pr_info(EVLIST_ENABLED_MSG); ++ } + ++ t0 = rdclock(); ++ clock_gettime(CLOCK_MONOTONIC, &ref_time); ++ ++ if (forks) { + if (interval || timeout || evlist__ctlfd_initialized(evsel_list)) + status = dispatch_events(forks, timeout, interval, ×); + if (child_pid != -1) { +@@ -981,13 +983,6 @@ try_again_reset: + if (WIFSIGNALED(status)) + psignal(WTERMSIG(status), argv[0]); + } else { +- err = enable_counters(); +- if (err) +- return -1; +- +- t0 = rdclock(); +- clock_gettime(CLOCK_MONOTONIC, &ref_time); +- + status = dispatch_events(forks, timeout, interval, ×); + } + +diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c +index 2bf21194c7b39..d9ea546850cd6 100644 +--- a/tools/perf/builtin-trace.c ++++ b/tools/perf/builtin-trace.c +@@ -17,7 +17,9 @@ + #include "util/record.h" + #include <traceevent/event-parse.h> + #include <api/fs/tracing_path.h> ++#ifdef HAVE_LIBBPF_SUPPORT + #include <bpf/bpf.h> ++#endif + #include "util/bpf_map.h" + #include "util/rlimit.h" + #include "builtin.h" +@@ -87,6 +89,8 @@ + # define F_LINUX_SPECIFIC_BASE 1024 + #endif + ++#define RAW_SYSCALL_ARGS_NUM 6 ++ + /* + * strtoul: Go from a string to a value, i.e. for msr: MSR_FS_BASE to 0xc0000100 + */ +@@ -107,7 +111,7 @@ struct syscall_fmt { + const char *sys_enter, + *sys_exit; + } bpf_prog_name; +- struct syscall_arg_fmt arg[6]; ++ struct syscall_arg_fmt arg[RAW_SYSCALL_ARGS_NUM]; + u8 nr_args; + bool errpid; + bool timeout; +@@ -1224,7 +1228,7 @@ struct syscall { + */ + struct bpf_map_syscall_entry { + bool enabled; +- u16 string_args_len[6]; ++ u16 string_args_len[RAW_SYSCALL_ARGS_NUM]; + }; + + /* +@@ -1649,7 +1653,7 @@ static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args) + { + int idx; + +- if (nr_args == 6 && sc->fmt && sc->fmt->nr_args != 0) ++ if (nr_args == RAW_SYSCALL_ARGS_NUM && sc->fmt && sc->fmt->nr_args != 0) + nr_args = sc->fmt->nr_args; + + sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt)); +@@ -1782,11 +1786,11 @@ static int trace__read_syscall_info(struct trace *trace, int id) + #endif + sc = trace->syscalls.table + id; + if (sc->nonexistent) +- return 0; ++ return -EEXIST; + + if (name == NULL) { + sc->nonexistent = true; +- return 0; ++ return -EEXIST; + } + + sc->name = name; +@@ -1800,11 +1804,18 @@ static int trace__read_syscall_info(struct trace *trace, int id) + sc->tp_format = trace_event__tp_format("syscalls", tp_name); + } + +- if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields)) +- return -ENOMEM; +- +- if (IS_ERR(sc->tp_format)) ++ /* ++ * Fails to read trace point format via sysfs node, so the trace point ++ * doesn't exist. Set the 'nonexistent' flag as true. ++ */ ++ if (IS_ERR(sc->tp_format)) { ++ sc->nonexistent = true; + return PTR_ERR(sc->tp_format); ++ } ++ ++ if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? ++ RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields)) ++ return -ENOMEM; + + sc->args = sc->tp_format->format.fields; + /* +@@ -2122,11 +2133,8 @@ static struct syscall *trace__syscall_info(struct trace *trace, + (err = trace__read_syscall_info(trace, id)) != 0) + goto out_cant_read; + +- if (trace->syscalls.table[id].name == NULL) { +- if (trace->syscalls.table[id].nonexistent) +- return NULL; ++ if (trace->syscalls.table && trace->syscalls.table[id].nonexistent) + goto out_cant_read; +- } + + return &trace->syscalls.table[id]; + diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 2f6b67189b426..6aae7b6c376b4 100644 --- a/tools/perf/perf.c @@ -438629,6 +564583,297 @@ index 2f6b67189b426..6aae7b6c376b4 100644 } int main(int argc, const char **argv) +diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json +index dda8e59149d22..be23d3c89a791 100644 +--- a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json ++++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json +@@ -112,21 +112,21 @@ + "MetricName": "indirect_branch" + }, + { +- "MetricExpr": "(armv8_pmuv3_0@event\\=0x1014@ + armv8_pmuv3_0@event\\=0x1018@) / BR_MIS_PRED", ++ "MetricExpr": "(armv8_pmuv3_0@event\\=0x1013@ + armv8_pmuv3_0@event\\=0x1016@) / BR_MIS_PRED", + "PublicDescription": "Push branch L3 topdown metric", + "BriefDescription": "Push branch L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "push_branch" + }, + { +- "MetricExpr": "armv8_pmuv3_0@event\\=0x100c@ / BR_MIS_PRED", ++ "MetricExpr": "armv8_pmuv3_0@event\\=0x100d@ / BR_MIS_PRED", + "PublicDescription": "Pop branch L3 topdown metric", + "BriefDescription": "Pop branch L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "pop_branch" + }, + { +- "MetricExpr": "(BR_MIS_PRED - armv8_pmuv3_0@event\\=0x1010@ - armv8_pmuv3_0@event\\=0x1014@ - armv8_pmuv3_0@event\\=0x1018@ - armv8_pmuv3_0@event\\=0x100c@) / BR_MIS_PRED", ++ "MetricExpr": "(BR_MIS_PRED - armv8_pmuv3_0@event\\=0x1010@ - armv8_pmuv3_0@event\\=0x1013@ - armv8_pmuv3_0@event\\=0x1016@ - armv8_pmuv3_0@event\\=0x100d@) / BR_MIS_PRED", + "PublicDescription": "Other branch L3 topdown metric", + "BriefDescription": "Other branch L3 topdown metric", + "MetricGroup": "TopDownL3", +diff --git a/tools/perf/pmu-events/arch/powerpc/power10/nest_metrics.json b/tools/perf/pmu-events/arch/powerpc/power10/nest_metrics.json +index 8ba3e81c9808b..fe050d44374ba 100644 +--- a/tools/perf/pmu-events/arch/powerpc/power10/nest_metrics.json ++++ b/tools/perf/pmu-events/arch/powerpc/power10/nest_metrics.json +@@ -1,13 +1,13 @@ + [ + { + "MetricName": "VEC_GROUP_PUMP_RETRY_RATIO_P01", +- "MetricExpr": "(hv_24x7@PM_PB_RTY_VG_PUMP01\\,chip\\=?@ / hv_24x7@PM_PB_VG_PUMP01\\,chip\\=?@) * 100", ++ "MetricExpr": "(hv_24x7@PM_PB_RTY_VG_PUMP01\\,chip\\=?@ / (1 + hv_24x7@PM_PB_VG_PUMP01\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "VEC_GROUP_PUMP_RETRY_RATIO_P23", +- "MetricExpr": "(hv_24x7@PM_PB_RTY_VG_PUMP23\\,chip\\=?@ / hv_24x7@PM_PB_VG_PUMP23\\,chip\\=?@) * 100", ++ "MetricExpr": "(hv_24x7@PM_PB_RTY_VG_PUMP23\\,chip\\=?@ / (1 + hv_24x7@PM_PB_VG_PUMP23\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, +@@ -61,13 +61,13 @@ + }, + { + "MetricName": "REMOTE_NODE_PUMPS_RETRIES_RATIO_P01", +- "MetricExpr": "(hv_24x7@PM_PB_RTY_RNS_PUMP01\\,chip\\=?@ / hv_24x7@PM_PB_RNS_PUMP01\\,chip\\=?@) * 100", ++ "MetricExpr": "(hv_24x7@PM_PB_RTY_RNS_PUMP01\\,chip\\=?@ / (1 + hv_24x7@PM_PB_RNS_PUMP01\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "REMOTE_NODE_PUMPS_RETRIES_RATIO_P23", +- "MetricExpr": "(hv_24x7@PM_PB_RTY_RNS_PUMP23\\,chip\\=?@ / hv_24x7@PM_PB_RNS_PUMP23\\,chip\\=?@) * 100", ++ "MetricExpr": "(hv_24x7@PM_PB_RTY_RNS_PUMP23\\,chip\\=?@ / (1 + hv_24x7@PM_PB_RNS_PUMP23\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, +@@ -151,193 +151,193 @@ + }, + { + "MetricName": "XLINK0_OUT_TOTAL_UTILIZATION", +- "MetricExpr": "((hv_24x7@PM_XLINK0_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK0_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_XLINK0_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK0_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", ++ "MetricExpr": "((hv_24x7@PM_XLINK0_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK0_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (1 + hv_24x7@PM_XLINK0_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK0_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "XLINK1_OUT_TOTAL_UTILIZATION", +- "MetricExpr": "((hv_24x7@PM_XLINK1_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK1_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_XLINK1_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK1_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", ++ "MetricExpr": "((hv_24x7@PM_XLINK1_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK1_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (1 + hv_24x7@PM_XLINK1_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK1_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "XLINK2_OUT_TOTAL_UTILIZATION", +- "MetricExpr": "((hv_24x7@PM_XLINK2_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK2_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_XLINK2_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK2_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", ++ "MetricExpr": "((hv_24x7@PM_XLINK2_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK2_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (1 + hv_24x7@PM_XLINK2_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK2_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "XLINK3_OUT_TOTAL_UTILIZATION", +- "MetricExpr": "((hv_24x7@PM_XLINK3_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK3_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_XLINK3_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK3_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", ++ "MetricExpr": "((hv_24x7@PM_XLINK3_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK3_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (1 + hv_24x7@PM_XLINK3_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK3_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "XLINK4_OUT_TOTAL_UTILIZATION", +- "MetricExpr": "((hv_24x7@PM_XLINK4_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK4_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_XLINK4_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK4_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", ++ "MetricExpr": "((hv_24x7@PM_XLINK4_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK4_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (1 + hv_24x7@PM_XLINK4_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK4_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "XLINK5_OUT_TOTAL_UTILIZATION", +- "MetricExpr": "((hv_24x7@PM_XLINK5_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK5_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_XLINK5_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK5_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", ++ "MetricExpr": "((hv_24x7@PM_XLINK5_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK5_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (1 + hv_24x7@PM_XLINK5_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK5_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "XLINK6_OUT_TOTAL_UTILIZATION", +- "MetricExpr": "((hv_24x7@PM_XLINK6_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK6_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_XLINK6_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK6_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", ++ "MetricExpr": "((hv_24x7@PM_XLINK6_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK6_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (1 + hv_24x7@PM_XLINK6_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK6_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "XLINK7_OUT_TOTAL_UTILIZATION", +- "MetricExpr": "((hv_24x7@PM_XLINK7_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK7_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_XLINK7_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK7_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", ++ "MetricExpr": "((hv_24x7@PM_XLINK7_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_XLINK7_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (1 + hv_24x7@PM_XLINK7_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK7_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "XLINK0_OUT_DATA_UTILIZATION", +- "MetricExpr": "((hv_24x7@PM_XLINK0_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK0_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_XLINK0_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK0_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", ++ "MetricExpr": "((hv_24x7@PM_XLINK0_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK0_OUT_EVEN_DATA\\,chip\\=?@) / (1 + hv_24x7@PM_XLINK0_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK0_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1.063%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "XLINK1_OUT_DATA_UTILIZATION", +- "MetricExpr": "((hv_24x7@PM_XLINK1_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK1_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_XLINK1_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK1_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", ++ "MetricExpr": "((hv_24x7@PM_XLINK1_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK1_OUT_EVEN_DATA\\,chip\\=?@) / (1 + hv_24x7@PM_XLINK1_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK1_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1.063%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "XLINK2_OUT_DATA_UTILIZATION", +- "MetricExpr": "((hv_24x7@PM_XLINK2_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK2_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_XLINK2_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK2_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", ++ "MetricExpr": "((hv_24x7@PM_XLINK2_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK2_OUT_EVEN_DATA\\,chip\\=?@) / (1 + hv_24x7@PM_XLINK2_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK2_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1.063%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "XLINK3_OUT_DATA_UTILIZATION", +- "MetricExpr": "((hv_24x7@PM_XLINK3_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK3_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_XLINK3_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK3_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", ++ "MetricExpr": "((hv_24x7@PM_XLINK3_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK3_OUT_EVEN_DATA\\,chip\\=?@) / (1 + hv_24x7@PM_XLINK3_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK3_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1.063%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "XLINK4_OUT_DATA_UTILIZATION", +- "MetricExpr": "((hv_24x7@PM_XLINK4_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK4_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_XLINK4_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK4_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", ++ "MetricExpr": "((hv_24x7@PM_XLINK4_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK4_OUT_EVEN_DATA\\,chip\\=?@) / (1 + hv_24x7@PM_XLINK4_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK4_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1.063%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "XLINK5_OUT_DATA_UTILIZATION", +- "MetricExpr": "((hv_24x7@PM_XLINK5_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK5_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_XLINK5_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK5_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", ++ "MetricExpr": "((hv_24x7@PM_XLINK5_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK5_OUT_EVEN_DATA\\,chip\\=?@) / (1 + hv_24x7@PM_XLINK5_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK5_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1.063%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "XLINK6_OUT_DATA_UTILIZATION", +- "MetricExpr": "((hv_24x7@PM_XLINK6_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK6_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_XLINK6_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK6_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", ++ "MetricExpr": "((hv_24x7@PM_XLINK6_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK6_OUT_EVEN_DATA\\,chip\\=?@) / (1 + hv_24x7@PM_XLINK6_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK6_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1.063%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "XLINK7_OUT_DATA_UTILIZATION", +- "MetricExpr": "((hv_24x7@PM_XLINK7_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK7_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_XLINK7_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK7_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", ++ "MetricExpr": "((hv_24x7@PM_XLINK7_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_XLINK7_OUT_EVEN_DATA\\,chip\\=?@) / (1 + hv_24x7@PM_XLINK7_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_XLINK7_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1.063%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "ALINK0_OUT_TOTAL_UTILIZATION", +- "MetricExpr": "((hv_24x7@PM_ALINK0_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK0_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_ALINK0_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK0_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", ++ "MetricExpr": "((hv_24x7@PM_ALINK0_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK0_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (1 + hv_24x7@PM_ALINK0_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK0_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "ALINK1_OUT_TOTAL_UTILIZATION", +- "MetricExpr": "((hv_24x7@PM_ALINK1_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK1_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_ALINK1_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK1_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", ++ "MetricExpr": "((hv_24x7@PM_ALINK1_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK1_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (1 + hv_24x7@PM_ALINK1_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK1_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "ALINK2_OUT_TOTAL_UTILIZATION", +- "MetricExpr": "((hv_24x7@PM_ALINK2_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK2_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_ALINK2_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK2_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", ++ "MetricExpr": "((hv_24x7@PM_ALINK2_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK2_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (1 + hv_24x7@PM_ALINK2_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK2_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "ALINK3_OUT_TOTAL_UTILIZATION", +- "MetricExpr": "((hv_24x7@PM_ALINK3_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK3_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_ALINK3_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK3_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", ++ "MetricExpr": "((hv_24x7@PM_ALINK3_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK3_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (1 + hv_24x7@PM_ALINK3_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK3_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "ALINK4_OUT_TOTAL_UTILIZATION", +- "MetricExpr": "((hv_24x7@PM_ALINK4_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK4_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_ALINK4_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK4_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", ++ "MetricExpr": "((hv_24x7@PM_ALINK4_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK4_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (1 + hv_24x7@PM_ALINK4_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK4_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "ALINK5_OUT_TOTAL_UTILIZATION", +- "MetricExpr": "((hv_24x7@PM_ALINK5_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK5_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_ALINK5_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK5_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", ++ "MetricExpr": "((hv_24x7@PM_ALINK5_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK5_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (1 + hv_24x7@PM_ALINK5_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK5_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "ALINK6_OUT_TOTAL_UTILIZATION", +- "MetricExpr": "((hv_24x7@PM_ALINK6_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK6_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_ALINK6_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK6_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", ++ "MetricExpr": "((hv_24x7@PM_ALINK6_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK6_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (1 + hv_24x7@PM_ALINK6_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK6_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "ALINK7_OUT_TOTAL_UTILIZATION", +- "MetricExpr": "((hv_24x7@PM_ALINK7_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK7_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (hv_24x7@PM_ALINK7_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK7_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", ++ "MetricExpr": "((hv_24x7@PM_ALINK7_OUT_ODD_TOTAL_UTIL\\,chip\\=?@ + hv_24x7@PM_ALINK7_OUT_EVEN_TOTAL_UTIL\\,chip\\=?@) / (1 + hv_24x7@PM_ALINK7_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK7_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "ALINK0_OUT_DATA_UTILIZATION", +- "MetricExpr": "((hv_24x7@PM_ALINK0_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK0_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_ALINK0_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK0_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", ++ "MetricExpr": "((hv_24x7@PM_ALINK0_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK0_OUT_EVEN_DATA\\,chip\\=?@) / (1 + hv_24x7@PM_ALINK0_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK0_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1.063%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "ALINK1_OUT_DATA_UTILIZATION", +- "MetricExpr": "((hv_24x7@PM_ALINK1_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK1_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_ALINK1_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK1_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", ++ "MetricExpr": "((hv_24x7@PM_ALINK1_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK1_OUT_EVEN_DATA\\,chip\\=?@) / (1 + hv_24x7@PM_ALINK1_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK1_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1.063%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "ALINK2_OUT_DATA_UTILIZATION", +- "MetricExpr": "((hv_24x7@PM_ALINK2_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK2_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_ALINK2_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK2_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", ++ "MetricExpr": "((hv_24x7@PM_ALINK2_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK2_OUT_EVEN_DATA\\,chip\\=?@) / (1 + hv_24x7@PM_ALINK2_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK2_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1.063%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "ALINK3_OUT_DATA_UTILIZATION", +- "MetricExpr": "((hv_24x7@PM_ALINK3_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK3_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_ALINK3_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK3_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", ++ "MetricExpr": "((hv_24x7@PM_ALINK3_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK3_OUT_EVEN_DATA\\,chip\\=?@) / (1 + hv_24x7@PM_ALINK3_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK3_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1.063%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "ALINK4_OUT_DATA_UTILIZATION", +- "MetricExpr": "((hv_24x7@PM_ALINK4_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK4_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_ALINK4_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK4_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", ++ "MetricExpr": "((hv_24x7@PM_ALINK4_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK4_OUT_EVEN_DATA\\,chip\\=?@) / (1 + hv_24x7@PM_ALINK4_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK4_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1.063%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "ALINK5_OUT_DATA_UTILIZATION", +- "MetricExpr": "((hv_24x7@PM_ALINK5_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK5_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_ALINK5_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK5_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", ++ "MetricExpr": "((hv_24x7@PM_ALINK5_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK5_OUT_EVEN_DATA\\,chip\\=?@) / (1 + hv_24x7@PM_ALINK5_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK5_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1.063%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "ALINK6_OUT_DATA_UTILIZATION", +- "MetricExpr": "((hv_24x7@PM_ALINK6_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK6_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_ALINK6_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK6_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", ++ "MetricExpr": "((hv_24x7@PM_ALINK6_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK6_OUT_EVEN_DATA\\,chip\\=?@) / (1 + hv_24x7@PM_ALINK6_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK6_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1.063%", + "AggregationMode": "PerChip" + }, + { + "MetricName": "ALINK7_OUT_DATA_UTILIZATION", +- "MetricExpr": "((hv_24x7@PM_ALINK7_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK7_OUT_EVEN_DATA\\,chip\\=?@) / (hv_24x7@PM_ALINK7_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK7_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", ++ "MetricExpr": "((hv_24x7@PM_ALINK7_OUT_ODD_DATA\\,chip\\=?@ + hv_24x7@PM_ALINK7_OUT_EVEN_DATA\\,chip\\=?@) / (1 + hv_24x7@PM_ALINK7_OUT_ODD_AVLBL_CYCLES\\,chip\\=?@ + hv_24x7@PM_ALINK7_OUT_EVEN_AVLBL_CYCLES\\,chip\\=?@)) * 100", + "ScaleUnit": "1.063%", + "AggregationMode": "PerChip" + }, diff --git a/tools/perf/pmu-events/arch/x86/skylakex/cache.json b/tools/perf/pmu-events/arch/x86/skylakex/cache.json index 9ff67206ade4e..821d2f2a8f251 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/cache.json @@ -440767,6 +567012,40 @@ index 58b7069c5a5f8..569e1b8ad0abc 100644 attr.exclude_user = evsel->core.attr.exclude_user; attr.exclude_kernel = evsel->core.attr.exclude_kernel; attr.exclude_hv = evsel->core.attr.exclude_hv; +diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c +index 8d2865b9ade20..0ef4cbf21e627 100644 +--- a/tools/perf/util/auxtrace.c ++++ b/tools/perf/util/auxtrace.c +@@ -2260,11 +2260,19 @@ struct sym_args { + bool near; + }; + ++static bool kern_sym_name_match(const char *kname, const char *name) ++{ ++ size_t n = strlen(name); ++ ++ return !strcmp(kname, name) || ++ (!strncmp(kname, name, n) && kname[n] == '\t'); ++} ++ + static bool kern_sym_match(struct sym_args *args, const char *name, char type) + { + /* A function with the same name, and global or the n'th found or any */ + return kallsyms__is_function(type) && +- !strcmp(name, args->name) && ++ kern_sym_name_match(name, args->name) && + ((args->global && isupper(type)) || + (args->selected && ++(args->cnt) == args->idx) || + (!args->global && !args->selected)); +@@ -2537,7 +2545,7 @@ static int find_dso_sym(struct dso *dso, const char *sym_name, u64 *start, + *size = sym->start - *start; + if (idx > 0) { + if (*size) +- return 1; ++ return 0; + } else if (dso_sym_match(sym, sym_name, &cnt, idx)) { + print_duplicate_syms(dso, sym_name); + return -EINVAL; diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 1a7112a87736a..cf1b9f6ec0dbe 100644 --- a/tools/perf/util/bpf-event.c @@ -440834,6 +567113,39 @@ index fbb3c4057c302..71710a1da4472 100644 return err; } +diff --git a/tools/perf/util/bpf_counter.h b/tools/perf/util/bpf_counter.h +index 65ebaa6694fbd..4b5dda7530c4d 100644 +--- a/tools/perf/util/bpf_counter.h ++++ b/tools/perf/util/bpf_counter.h +@@ -4,9 +4,12 @@ + + #include <linux/list.h> + #include <sys/resource.h> ++ ++#ifdef HAVE_LIBBPF_SUPPORT + #include <bpf/bpf.h> + #include <bpf/btf.h> + #include <bpf/libbpf.h> ++#endif + + struct evsel; + struct target; +@@ -87,6 +90,8 @@ static inline void set_max_rlimit(void) + setrlimit(RLIMIT_MEMLOCK, &rinf); + } + ++#ifdef HAVE_BPF_SKEL ++ + static inline __u32 bpf_link_get_id(int fd) + { + struct bpf_link_info link_info = { .id = 0, }; +@@ -127,5 +132,6 @@ static inline int bperf_trigger_reading(int prog_fd, int cpu) + + return bpf_prog_test_run_opts(prog_fd, &opts); + } ++#endif /* HAVE_BPF_SKEL */ + + #endif /* __PERF_BPF_COUNTER_H */ diff --git a/tools/perf/util/bpf_skel/bperf.h b/tools/perf/util/bpf_skel/bperf.h deleted file mode 100644 index 186a5551ddb9d..0000000000000 @@ -440976,8 +567288,59 @@ index e32e8f2ff3bd7..1d7c53873dd2d 100644 return build_id_cache__add_b(&dso->bid, name, dso->nsinfo, is_kallsyms, is_vdso); } +diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c +index e99b41f9be45a..cd978c240e0dd 100644 +--- a/tools/perf/util/cgroup.c ++++ b/tools/perf/util/cgroup.c +@@ -224,6 +224,19 @@ static int add_cgroup_name(const char *fpath, const struct stat *sb __maybe_unus + return 0; + } + ++static int check_and_add_cgroup_name(const char *fpath) ++{ ++ struct cgroup_name *cn; ++ ++ list_for_each_entry(cn, &cgroup_list, list) { ++ if (!strcmp(cn->name, fpath)) ++ return 0; ++ } ++ ++ /* pretend if it's added by ftw() */ ++ return add_cgroup_name(fpath, NULL, FTW_D, NULL); ++} ++ + static void release_cgroup_list(void) + { + struct cgroup_name *cn; +@@ -242,7 +255,7 @@ static int list_cgroups(const char *str) + struct cgroup_name *cn; + char *s; + +- /* use given name as is - for testing purpose */ ++ /* use given name as is when no regex is given */ + for (;;) { + p = strchr(str, ','); + e = p ? p : eos; +@@ -253,13 +266,13 @@ static int list_cgroups(const char *str) + s = strndup(str, e - str); + if (!s) + return -1; +- /* pretend if it's added by ftw() */ +- ret = add_cgroup_name(s, NULL, FTW_D, NULL); ++ ++ ret = check_and_add_cgroup_name(s); + free(s); +- if (ret) ++ if (ret < 0) + return -1; + } else { +- if (add_cgroup_name("", NULL, FTW_D, NULL) < 0) ++ if (check_and_add_cgroup_name("/") < 0) + return -1; + } + diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c -index f5d260b1df4d1..15a4547d608ec 100644 +index f5d260b1df4d1..090a76be522bb 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -44,10 +44,6 @@ int perf_data__create_dir(struct perf_data *data, int nr) @@ -441001,6 +567364,22 @@ index f5d260b1df4d1..15a4547d608ec 100644 return 0; out_err: +@@ -128,6 +127,7 @@ int perf_data__open_dir(struct perf_data *data) + file->size = st.st_size; + } + ++ closedir(dir); + if (!files) + return -EINVAL; + +@@ -136,6 +136,7 @@ int perf_data__open_dir(struct perf_data *data) + return 0; + + out_err: ++ closedir(dir); + close_dir(files, nr); + return ret; + } diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index c9de82af5584e..1402d9657ef27 100644 --- a/tools/perf/util/data.h @@ -441014,7 +567393,7 @@ index c9de82af5584e..1402d9657ef27 100644 enum perf_data_mode { PERF_DATA_MODE_WRITE, diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c -index 2c06abf6dcd26..65e6c22f38e4f 100644 +index 2c06abf6dcd26..190e818a07176 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -179,7 +179,7 @@ static int trace_event_printer(enum binary_printer_ops op, @@ -441026,6 +567405,17 @@ index 2c06abf6dcd26..65e6c22f38e4f 100644 break; case BINARY_PRINT_CHAR_PAD: printed += color_fprintf(fp, color, " "); +@@ -241,6 +241,10 @@ int perf_quiet_option(void) + opt++; + } + ++ /* For debug variables that are used as bool types, set to 0. */ ++ redirect_to_stderr = 0; ++ debug_peo_args = 0; ++ + return 0; + } + diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c index 183a81d5b2f92..2db91121bdafe 100644 --- a/tools/perf/util/dsos.c @@ -441052,6 +567442,62 @@ index 183a81d5b2f92..2db91121bdafe 100644 return 0; } +diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c +index 609ca16715018..623527edeac1e 100644 +--- a/tools/perf/util/dwarf-aux.c ++++ b/tools/perf/util/dwarf-aux.c +@@ -308,26 +308,13 @@ static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name, + { + Dwarf_Attribute attr; + +- if (dwarf_attr(tp_die, attr_name, &attr) == NULL || ++ if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL || + dwarf_formudata(&attr, result) != 0) + return -ENOENT; + + return 0; + } + +-/* Get attribute and translate it as a sdata */ +-static int die_get_attr_sdata(Dwarf_Die *tp_die, unsigned int attr_name, +- Dwarf_Sword *result) +-{ +- Dwarf_Attribute attr; +- +- if (dwarf_attr(tp_die, attr_name, &attr) == NULL || +- dwarf_formsdata(&attr, result) != 0) +- return -ENOENT; +- +- return 0; +-} +- + /** + * die_is_signed_type - Check whether a type DIE is signed or not + * @tp_die: a DIE of a type +@@ -467,9 +454,9 @@ int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs) + /* Get the call file index number in CU DIE */ + static int die_get_call_fileno(Dwarf_Die *in_die) + { +- Dwarf_Sword idx; ++ Dwarf_Word idx; + +- if (die_get_attr_sdata(in_die, DW_AT_call_file, &idx) == 0) ++ if (die_get_attr_udata(in_die, DW_AT_call_file, &idx) == 0) + return (int)idx; + else + return -ENOENT; +@@ -478,9 +465,9 @@ static int die_get_call_fileno(Dwarf_Die *in_die) + /* Get the declared file index number in CU DIE */ + static int die_get_decl_fileno(Dwarf_Die *pdie) + { +- Dwarf_Sword idx; ++ Dwarf_Word idx; + +- if (die_get_attr_sdata(pdie, DW_AT_decl_file, &idx) == 0) ++ if (die_get_attr_udata(pdie, DW_AT_decl_file, &idx) == 0) + return (int)idx; + else + return -ENOENT; diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index cf773f0dec384..5b24eb010336c 100644 --- a/tools/perf/util/env.c @@ -442059,9 +568505,18 @@ index b67c469aba795..7b7145501933f 100644 u8 depth; diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c -index 588601000f3f9..db00ca6a67deb 100644 +index 588601000f3f9..24e50fabb6c33 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c +@@ -207,7 +207,7 @@ static void new_line_csv(struct perf_stat_config *config, void *ctx) + + fputc('\n', os->fh); + if (os->prefix) +- fprintf(os->fh, "%s%s", os->prefix, config->csv_sep); ++ fprintf(os->fh, "%s", os->prefix); + aggr_printout(config, os->evsel, os->id, os->nr); + for (i = 0; i < os->nfields; i++) + fputs(config->csv_sep, os->fh); @@ -584,15 +584,16 @@ static void collect_all_aliases(struct perf_stat_config *config, struct evsel *c alias = list_prepare_entry(counter, &(evlist->core.entries), core.node); @@ -442089,7 +568544,7 @@ index 588601000f3f9..db00ca6a67deb 100644 } diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c -index 31cd59a2b66e6..6c183df191aaa 100644 +index 31cd59a2b66e6..fd42f768e5848 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -233,6 +233,33 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, @@ -442145,7 +568600,7 @@ index 31cd59a2b66e6..6c183df191aaa 100644 - sym.st_value -= shdr.sh_addr - shdr.sh_offset; + GElf_Phdr phdr; + -+ if (elf_read_program_header(syms_ss->elf, ++ if (elf_read_program_header(runtime_ss->elf, + (u64)sym.st_value, &phdr)) { + pr_debug4("%s: failed to find program header for " + "symbol: %s st_value: %#" PRIx64 "\n", @@ -442401,6 +568856,96 @@ index 47d3ba895d6d9..4f176bbf29f42 100644 return (rapl_dram_energy_units = 15.3 / 1000000); default: return (rapl_energy_units); +diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl +index 09d1578f9d66f..1737c59e4ff67 100755 +--- a/tools/testing/ktest/ktest.pl ++++ b/tools/testing/ktest/ktest.pl +@@ -1963,7 +1963,7 @@ sub run_scp_mod { + + sub _get_grub_index { + +- my ($command, $target, $skip) = @_; ++ my ($command, $target, $skip, $submenu) = @_; + + return if (defined($grub_number) && defined($last_grub_menu) && + $last_grub_menu eq $grub_menu && defined($last_machine) && +@@ -1980,11 +1980,16 @@ sub _get_grub_index { + + my $found = 0; + ++ my $submenu_number = 0; ++ + while (<IN>) { + if (/$target/) { + $grub_number++; + $found = 1; + last; ++ } elsif (defined($submenu) && /$submenu/) { ++ $submenu_number++; ++ $grub_number = -1; + } elsif (/$skip/) { + $grub_number++; + } +@@ -1993,6 +1998,9 @@ sub _get_grub_index { + + dodie "Could not find '$grub_menu' through $command on $machine" + if (!$found); ++ if ($submenu_number > 0) { ++ $grub_number = "$submenu_number>$grub_number"; ++ } + doprint "$grub_number\n"; + $last_grub_menu = $grub_menu; + $last_machine = $machine; +@@ -2003,6 +2011,7 @@ sub get_grub_index { + my $command; + my $target; + my $skip; ++ my $submenu; + my $grub_menu_qt; + + if ($reboot_type !~ /^grub/) { +@@ -2017,8 +2026,9 @@ sub get_grub_index { + $skip = '^\s*title\s'; + } elsif ($reboot_type eq "grub2") { + $command = "cat $grub_file"; +- $target = '^menuentry.*' . $grub_menu_qt; +- $skip = '^menuentry\s|^submenu\s'; ++ $target = '^\s*menuentry.*' . $grub_menu_qt; ++ $skip = '^\s*menuentry'; ++ $submenu = '^\s*submenu\s'; + } elsif ($reboot_type eq "grub2bls") { + $command = $grub_bls_get; + $target = '^title=.*' . $grub_menu_qt; +@@ -2027,7 +2037,7 @@ sub get_grub_index { + return; + } + +- _get_grub_index($command, $target, $skip); ++ _get_grub_index($command, $target, $skip, $submenu); + } + + sub wait_for_input { +@@ -2090,7 +2100,7 @@ sub reboot_to { + if ($reboot_type eq "grub") { + run_ssh "'(echo \"savedefault --default=$grub_number --once\" | grub --batch)'"; + } elsif (($reboot_type eq "grub2") or ($reboot_type eq "grub2bls")) { +- run_ssh "$grub_reboot $grub_number"; ++ run_ssh "$grub_reboot \"'$grub_number'\""; + } elsif ($reboot_type eq "syslinux") { + run_ssh "$syslinux --once \\\"$syslinux_label\\\" $syslinux_path"; + } elsif (defined $reboot_script) { +@@ -3768,9 +3778,10 @@ sub test_this_config { + # .config to make sure it is missing the config that + # we had before + my %configs = %min_configs; +- delete $configs{$config}; ++ $configs{$config} = "# $config is not set"; + make_new_config ((values %configs), (values %keep_configs)); + make_oldconfig; ++ delete $configs{$config}; + undef %configs; + assign_configs \%configs, $output_config; + diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 2c6f916ccbafa..0874e512d109b 100644 --- a/tools/testing/kunit/kunit_kernel.py @@ -442479,7 +569024,7 @@ index ed563bdd88f39..b752ce47ead3c 100644 error = devm_add_action_or_reset(dev, nfit_test_kill, pgmap); if (error) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile -index c852eb40c4f7d..14206d1d1efeb 100644 +index c852eb40c4f7d..56a4873a343cf 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -8,6 +8,7 @@ TARGETS += clone3 @@ -442490,6 +569035,43 @@ index c852eb40c4f7d..14206d1d1efeb 100644 TARGETS += drivers/dma-buf TARGETS += efivarfs TARGETS += exec +@@ -113,19 +114,27 @@ ifdef building_out_of_srctree + override LDFLAGS = + endif + +-ifneq ($(O),) +- BUILD := $(O)/kselftest ++top_srcdir ?= ../../.. ++ ++ifeq ("$(origin O)", "command line") ++ KBUILD_OUTPUT := $(O) ++endif ++ ++ifneq ($(KBUILD_OUTPUT),) ++ # Make's built-in functions such as $(abspath ...), $(realpath ...) cannot ++ # expand a shell special character '~'. We use a somewhat tedious way here. ++ abs_objtree := $(shell cd $(top_srcdir) && mkdir -p $(KBUILD_OUTPUT) && cd $(KBUILD_OUTPUT) && pwd) ++ $(if $(abs_objtree),, \ ++ $(error failed to create output directory "$(KBUILD_OUTPUT)")) ++ # $(realpath ...) resolves symlinks ++ abs_objtree := $(realpath $(abs_objtree)) ++ BUILD := $(abs_objtree)/kselftest + else +- ifneq ($(KBUILD_OUTPUT),) +- BUILD := $(KBUILD_OUTPUT)/kselftest +- else +- BUILD := $(shell pwd) +- DEFAULT_INSTALL_HDR_PATH := 1 +- endif ++ BUILD := $(CURDIR) ++ DEFAULT_INSTALL_HDR_PATH := 1 + endif + + # Prepare for headers install +-top_srcdir ?= ../../.. + include $(top_srcdir)/scripts/subarch.include + ARCH ?= $(SUBARCH) + export KSFT_KHDR_INSTALL_DONE := 1 diff --git a/tools/testing/selftests/arm64/bti/Makefile b/tools/testing/selftests/arm64/bti/Makefile index 73e013c082a65..dafa1c2aa5c47 100644 --- a/tools/testing/selftests/arm64/bti/Makefile @@ -444067,6 +570649,94 @@ index 336a749673d19..2e701e7f69680 100644 { "calls: overlapping caller/callee", .insns = { +diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c +index 3b6ee009c00b6..4a768b130d61c 100644 +--- a/tools/testing/selftests/bpf/verifier/ref_tracking.c ++++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c +@@ -905,3 +905,39 @@ + .result_unpriv = REJECT, + .errstr_unpriv = "unknown func", + }, ++{ ++ "reference tracking: try to leak released ptr reg", ++ .insns = { ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), ++ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), ++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), ++ BPF_LD_MAP_FD(BPF_REG_1, 0), ++ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), ++ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), ++ BPF_EXIT_INSN(), ++ BPF_MOV64_REG(BPF_REG_9, BPF_REG_0), ++ ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_LD_MAP_FD(BPF_REG_1, 0), ++ BPF_MOV64_IMM(BPF_REG_2, 8), ++ BPF_MOV64_IMM(BPF_REG_3, 0), ++ BPF_EMIT_CALL(BPF_FUNC_ringbuf_reserve), ++ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), ++ BPF_EXIT_INSN(), ++ BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), ++ ++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), ++ BPF_MOV64_IMM(BPF_REG_2, 0), ++ BPF_EMIT_CALL(BPF_FUNC_ringbuf_discard), ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ ++ BPF_STX_MEM(BPF_DW, BPF_REG_9, BPF_REG_8, 0), ++ BPF_EXIT_INSN() ++ }, ++ .fixup_map_array_48b = { 4 }, ++ .fixup_map_ringbuf = { 11 }, ++ .result = ACCEPT, ++ .result_unpriv = REJECT, ++ .errstr_unpriv = "R8 !read_ok" ++}, +diff --git a/tools/testing/selftests/bpf/verifier/search_pruning.c b/tools/testing/selftests/bpf/verifier/search_pruning.c +index 7e50cb80873a5..7e36078f8f482 100644 +--- a/tools/testing/selftests/bpf/verifier/search_pruning.c ++++ b/tools/testing/selftests/bpf/verifier/search_pruning.c +@@ -154,3 +154,39 @@ + .result_unpriv = ACCEPT, + .insn_processed = 15, + }, ++/* The test performs a conditional 64-bit write to a stack location ++ * fp[-8], this is followed by an unconditional 8-bit write to fp[-8], ++ * then data is read from fp[-8]. This sequence is unsafe. ++ * ++ * The test would be mistakenly marked as safe w/o dst register parent ++ * preservation in verifier.c:copy_register_state() function. ++ * ++ * Note the usage of BPF_F_TEST_STATE_FREQ to force creation of the ++ * checkpoint state after conditional 64-bit assignment. ++ */ ++{ ++ "write tracking and register parent chain bug", ++ .insns = { ++ /* r6 = ktime_get_ns() */ ++ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), ++ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), ++ /* r0 = ktime_get_ns() */ ++ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), ++ /* if r0 > r6 goto +1 */ ++ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_6, 1), ++ /* *(u64 *)(r10 - 8) = 0xdeadbeef */ ++ BPF_ST_MEM(BPF_DW, BPF_REG_FP, -8, 0xdeadbeef), ++ /* r1 = 42 */ ++ BPF_MOV64_IMM(BPF_REG_1, 42), ++ /* *(u8 *)(r10 - 8) = r1 */ ++ BPF_STX_MEM(BPF_B, BPF_REG_FP, BPF_REG_1, -8), ++ /* r2 = *(u64 *)(r10 - 8) */ ++ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_FP, -8), ++ /* exit(0) */ ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_EXIT_INSN(), ++ }, ++ .flags = BPF_F_TEST_STATE_FREQ, ++ .errstr = "invalid read from stack off -8+1 size 8", ++ .result = REJECT, ++}, diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c index ce13ece08d51c..8c224eac93df7 100644 --- a/tools/testing/selftests/bpf/verifier/sock.c @@ -445518,6 +572188,21 @@ index fedcb7b35af9f..af5ea50ed5c0e 100755 } flooding_check_packets() +diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh +index 9de1d123f4f5d..a08c02abde121 100755 +--- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh ++++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh +@@ -496,8 +496,8 @@ dummy_reporter_test() + + check_reporter_info dummy healthy 3 3 10 true + +- echo 8192> $DEBUGFS_DIR/health/binary_len +- check_fail $? "Failed set dummy reporter binary len to 8192" ++ echo 8192 > $DEBUGFS_DIR/health/binary_len ++ check_err $? "Failed set dummy reporter binary len to 8192" + + local dump=$(devlink health dump show $DL_HANDLE reporter dummy -j) + check_err $? "Failed show dump of dummy reporter" diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh index f7d84549cc3e3..79f751259098d 100755 --- a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh @@ -445531,6 +572216,22 @@ index f7d84549cc3e3..79f751259098d 100755 action goto chain $(IS2 1 0) } +diff --git a/tools/testing/selftests/efivarfs/efivarfs.sh b/tools/testing/selftests/efivarfs/efivarfs.sh +index a90f394f9aa90..d374878cc0ba9 100755 +--- a/tools/testing/selftests/efivarfs/efivarfs.sh ++++ b/tools/testing/selftests/efivarfs/efivarfs.sh +@@ -87,6 +87,11 @@ test_create_read() + { + local file=$efivarfs_mount/$FUNCNAME-$test_guid + ./create-read $file ++ if [ $? -ne 0 ]; then ++ echo "create and read $file failed" ++ file_cleanup $file ++ exit 1 ++ fi + file_cleanup $file + } + diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile index dd61118df66ed..2d7fca446c7f7 100644 --- a/tools/testing/selftests/exec/Makefile @@ -445548,6 +572249,33 @@ index dd61118df66ed..2d7fca446c7f7 100644 # Makefile is a run-time dependency, since it's accessed by the execveat test TEST_FILES := Makefile +diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc +index 3145b0f1835c3..27a68bbe778be 100644 +--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc ++++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc +@@ -38,11 +38,18 @@ cnt_trace() { + + test_event_enabled() { + val=$1 ++ check_times=10 # wait for 10 * SLEEP_TIME at most + +- e=`cat $EVENT_ENABLE` +- if [ "$e" != $val ]; then +- fail "Expected $val but found $e" +- fi ++ while [ $check_times -ne 0 ]; do ++ e=`cat $EVENT_ENABLE` ++ if [ "$e" == $val ]; then ++ return 0 ++ fi ++ sleep $SLEEP_TIME ++ check_times=$((check_times - 1)) ++ done ++ ++ fail "Expected $val but found $e" + } + + run_enable_disable() { diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc index e96e279e0533a..25432b8cd5bd2 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc @@ -445606,6 +572334,30 @@ index 12631f0076a10..11e157d7533b8 100644 + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ done endef +diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile +index bd1fec59e010d..ece2e38fbb0be 100644 +--- a/tools/testing/selftests/futex/functional/Makefile ++++ b/tools/testing/selftests/futex/functional/Makefile +@@ -4,11 +4,11 @@ INCLUDES := -I../include -I../../ -I../../../../../usr/include/ \ + CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE -pthread $(INCLUDES) + LDLIBS := -lpthread -lrt + +-HEADERS := \ ++LOCAL_HDRS := \ + ../include/futextest.h \ + ../include/atomic.h \ + ../include/logging.h +-TEST_GEN_FILES := \ ++TEST_GEN_PROGS := \ + futex_wait_timeout \ + futex_wait_wouldblock \ + futex_requeue_pi \ +@@ -24,5 +24,3 @@ TEST_PROGS := run.sh + top_srcdir = ../../../../.. + KSFT_KHDR_INSTALL := 1 + include ../../lib.mk +- +-$(TEST_GEN_FILES): $(HEADERS) diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile index 39f2bbe8dd3df..42ea7d2aa8440 100644 --- a/tools/testing/selftests/gpio/Makefile @@ -445617,11 +572369,69 @@ index 39f2bbe8dd3df..42ea7d2aa8440 100644 +CFLAGS += -I../../../../usr/include include ../lib.mk +diff --git a/tools/testing/selftests/intel_pstate/Makefile b/tools/testing/selftests/intel_pstate/Makefile +index 39f0fa2a8fd63..05d66ef50c977 100644 +--- a/tools/testing/selftests/intel_pstate/Makefile ++++ b/tools/testing/selftests/intel_pstate/Makefile +@@ -2,10 +2,10 @@ + CFLAGS := $(CFLAGS) -Wall -D_GNU_SOURCE + LDLIBS += -lm + +-uname_M := $(shell uname -m 2>/dev/null || echo not) +-ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) ++ARCH ?= $(shell uname -m 2>/dev/null || echo not) ++ARCH_PROCESSED := $(shell echo $(ARCH) | sed -e s/i.86/x86/ -e s/x86_64/x86/) + +-ifeq (x86,$(ARCH)) ++ifeq (x86,$(ARCH_PROCESSED)) + TEST_GEN_FILES := msr aperf + endif + +diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h +index 8d50483fe204a..898d7b2fac6cc 100644 +--- a/tools/testing/selftests/kselftest.h ++++ b/tools/testing/selftests/kselftest.h +@@ -48,6 +48,25 @@ + #include <stdarg.h> + #include <stdio.h> + ++#ifndef ARRAY_SIZE ++#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) ++#endif ++ ++/* ++ * gcc cpuid.h provides __cpuid_count() since v4.4. ++ * Clang/LLVM cpuid.h provides __cpuid_count() since v3.4.0. ++ * ++ * Provide local define for tests needing __cpuid_count() because ++ * selftests need to work in older environments that do not yet ++ * have __cpuid_count(). ++ */ ++#ifndef __cpuid_count ++#define __cpuid_count(level, count, a, b, c, d) \ ++ __asm__ __volatile__ ("cpuid\n\t" \ ++ : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \ ++ : "0" (level), "2" (count)) ++#endif ++ + /* define kselftest exit codes */ + #define KSFT_PASS 0 + #define KSFT_FAIL 1 diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h -index ae0f0f33b2a6e..78e59620d28de 100644 +index ae0f0f33b2a6e..11779405dc804 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h -@@ -875,7 +875,8 @@ static void __timeout_handler(int sig, siginfo_t *info, void *ucontext) +@@ -671,7 +671,9 @@ + #define EXPECT_STRNE(expected, seen) \ + __EXPECT_STR(expected, seen, !=, 0) + ++#ifndef ARRAY_SIZE + #define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) ++#endif + + /* Support an optional handler after and ASSERT_* or EXPECT_*. The approach is + * not thread-safe, but it should be fine in most sane test scenarios. +@@ -875,7 +877,8 @@ static void __timeout_handler(int sig, siginfo_t *info, void *ucontext) } t->timed_out = true; @@ -445631,7 +572441,7 @@ index ae0f0f33b2a6e..78e59620d28de 100644 } void __wait_for_test(struct __test_metadata *t) -@@ -969,7 +970,7 @@ void __run_test(struct __fixture_metadata *f, +@@ -969,7 +972,7 @@ void __run_test(struct __fixture_metadata *f, t->passed = 1; t->skip = 0; t->trigger = 0; @@ -445640,7 +572450,7 @@ index ae0f0f33b2a6e..78e59620d28de 100644 t->no_print = 0; memset(t->results->reason, 0, sizeof(t->results->reason)); -@@ -985,6 +986,7 @@ void __run_test(struct __fixture_metadata *f, +@@ -985,6 +988,7 @@ void __run_test(struct __fixture_metadata *f, ksft_print_msg("ERROR SPAWNING TEST CHILD\n"); t->passed = 0; } else if (t->pid == 0) { @@ -446266,8 +573076,21 @@ index 2ac98d70d02bd..161eba7cd1289 100644 asm volatile ("vmsave %0\n\t" : : "a" (vmcb_gpa) : "memory"); vmcb_set_seg(&save->es, get_es(), 0, -1U, data_seg_attr); vmcb_set_seg(&save->cs, get_cs(), 0, -1U, code_seg_attr); +diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c +index 4cfcafea9f5a6..766c1790df664 100644 +--- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c ++++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c +@@ -72,7 +72,7 @@ struct memslot_antagonist_args { + static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay, + uint64_t nr_modifications) + { +- const uint64_t pages = 1; ++ uint64_t pages = max_t(int, vm_get_page_size(vm), getpagesize()) / vm_get_page_size(vm); + uint64_t gpa; + int i; + diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c -index 4158da0da2bba..2237d1aac8014 100644 +index 4158da0da2bba..d7a7e760adc80 100644 --- a/tools/testing/selftests/kvm/rseq_test.c +++ b/tools/testing/selftests/kvm/rseq_test.c @@ -82,8 +82,9 @@ static int next_cpu(int cpu) @@ -446296,7 +573119,7 @@ index 4158da0da2bba..2237d1aac8014 100644 - pthread_create(&migration_thread, NULL, migration_worker, 0); + pthread_create(&migration_thread, NULL, migration_worker, -+ (void *)(unsigned long)gettid()); ++ (void *)(unsigned long)syscall(SYS_gettid)); for (i = 0; !done; i++) { vcpu_run(vm, VCPU_ID); @@ -448772,6 +575595,22 @@ index 15fbef9cc8496..c28ef98ff3ac1 100644 _metadata->passed = 0; } +diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk +index fe7ee2b0f29c2..a3df78b7702c1 100644 +--- a/tools/testing/selftests/lib.mk ++++ b/tools/testing/selftests/lib.mk +@@ -129,6 +129,11 @@ endef + clean: + $(CLEAN) + ++# Enables to extend CFLAGS and LDFLAGS from command line, e.g. ++# make USERCFLAGS=-Werror USERLDFLAGS=-static ++CFLAGS += $(USERCFLAGS) ++LDFLAGS += $(USERLDFLAGS) ++ + # When make O= with kselftest target from main level + # the following aren't defined. + # diff --git a/tools/testing/selftests/lkdtm/config b/tools/testing/selftests/lkdtm/config index 38edea25631bc..b642411ceb6c3 100644 --- a/tools/testing/selftests/lkdtm/config @@ -448783,6 +575622,39 @@ index 38edea25631bc..b642411ceb6c3 100644 +CONFIG_UBSAN=y CONFIG_UBSAN_BOUNDS=y CONFIG_UBSAN_TRAP=y +diff --git a/tools/testing/selftests/lkdtm/stack-entropy.sh b/tools/testing/selftests/lkdtm/stack-entropy.sh +index 1b4d95d575f85..14fedeef762ed 100755 +--- a/tools/testing/selftests/lkdtm/stack-entropy.sh ++++ b/tools/testing/selftests/lkdtm/stack-entropy.sh +@@ -4,13 +4,27 @@ + # Measure kernel stack entropy by sampling via LKDTM's REPORT_STACK test. + set -e + samples="${1:-1000}" ++TRIGGER=/sys/kernel/debug/provoke-crash/DIRECT ++KSELFTEST_SKIP_TEST=4 ++ ++# Verify we have LKDTM available in the kernel. ++if [ ! -r $TRIGGER ] ; then ++ /sbin/modprobe -q lkdtm || true ++ if [ ! -r $TRIGGER ] ; then ++ echo "Cannot find $TRIGGER (missing CONFIG_LKDTM?)" ++ else ++ echo "Cannot write $TRIGGER (need to run as root?)" ++ fi ++ # Skip this test ++ exit $KSELFTEST_SKIP_TEST ++fi + + # Capture dmesg continuously since it may fill up depending on sample size. + log=$(mktemp -t stack-entropy-XXXXXX) + dmesg --follow >"$log" & pid=$! + report=-1 + for i in $(seq 1 $samples); do +- echo "REPORT_STACK" >/sys/kernel/debug/provoke-crash/DIRECT ++ echo "REPORT_STACK" > $TRIGGER + if [ -t 1 ]; then + percent=$(( 100 * $i / $samples )) + if [ "$percent" -ne "$report" ]; then diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c index 192a2899bae8f..94df2692e6e4a 100644 --- a/tools/testing/selftests/memfd/memfd_test.c @@ -448961,9 +575833,18 @@ index 492b273743b4e..6a953ec793ced 100644 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c -index 3dece8b292536..b57e91e1c3f28 100644 +index 3dece8b292536..532459a15067c 100644 --- a/tools/testing/selftests/net/af_unix/test_unix_oob.c +++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c +@@ -124,7 +124,7 @@ void producer(struct sockaddr_un *consumer_addr) + + wait_for_signal(pipefd[0]); + if (connect(cfd, (struct sockaddr *)consumer_addr, +- sizeof(struct sockaddr)) != 0) { ++ sizeof(*consumer_addr)) != 0) { + perror("Connect failed"); + kill(0, SIGTERM); + exit(1); @@ -218,10 +218,10 @@ main(int argc, char **argv) /* Test 1: @@ -448979,7 +575860,7 @@ index 3dece8b292536..b57e91e1c3f28 100644 len = read_data(pfd, buf, 1024); if (!signal_recvd || len != 63 || oob != '@') { diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh -index 3313566ce9062..91f54112167f1 100755 +index 3313566ce9062..364c82b797c19 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -455,6 +455,22 @@ cleanup() @@ -449137,11 +576018,66 @@ index 3313566ce9062..91f54112167f1 100755 TESTS_OTHER="use_cases" PAUSE_ON_FAIL=no +@@ -4037,10 +4072,13 @@ elif [ "$TESTS" = "ipv6" ]; then + TESTS="$TESTS_IPV6" + fi + +-which nettest >/dev/null +-if [ $? -ne 0 ]; then +- echo "'nettest' command not found; skipping tests" +- exit $ksft_skip ++# nettest can be run from PATH or from same directory as this selftest ++if ! which nettest >/dev/null; then ++ PATH=$PWD:$PATH ++ if ! which nettest >/dev/null; then ++ echo "'nettest' command not found; skipping tests" ++ exit $ksft_skip ++ fi + fi + + declare -i nfail=0 diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh -index 0d293391e9a44..b5a69ad191b07 100755 +index 0d293391e9a44..0c066ba579d45 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh -@@ -2078,6 +2078,7 @@ basic_res() +@@ -1145,6 +1145,36 @@ ipv4_fcnal() + set +e + check_nexthop "dev veth1" "" + log_test $? 0 "Nexthops removed on admin down" ++ ++ # nexthop route delete warning: route add with nhid and delete ++ # using device ++ run_cmd "$IP li set dev veth1 up" ++ run_cmd "$IP nexthop add id 12 via 172.16.1.3 dev veth1" ++ out1=`dmesg | grep "WARNING:.*fib_nh_match.*" | wc -l` ++ run_cmd "$IP route add 172.16.101.1/32 nhid 12" ++ run_cmd "$IP route delete 172.16.101.1/32 dev veth1" ++ out2=`dmesg | grep "WARNING:.*fib_nh_match.*" | wc -l` ++ [ $out1 -eq $out2 ] ++ rc=$? ++ log_test $rc 0 "Delete nexthop route warning" ++ run_cmd "$IP route delete 172.16.101.1/32 nhid 12" ++ run_cmd "$IP nexthop del id 12" ++ ++ run_cmd "$IP nexthop add id 21 via 172.16.1.6 dev veth1" ++ run_cmd "$IP ro add 172.16.101.0/24 nhid 21" ++ run_cmd "$IP ro del 172.16.101.0/24 nexthop via 172.16.1.7 dev veth1 nexthop via 172.16.1.8 dev veth1" ++ log_test $? 2 "Delete multipath route with only nh id based entry" ++ ++ run_cmd "$IP nexthop add id 22 via 172.16.1.6 dev veth1" ++ run_cmd "$IP ro add 172.16.102.0/24 nhid 22" ++ run_cmd "$IP ro del 172.16.102.0/24 dev veth1" ++ log_test $? 2 "Delete route when specifying only nexthop device" ++ ++ run_cmd "$IP ro del 172.16.102.0/24 via 172.16.1.6" ++ log_test $? 2 "Delete route when specifying only gateway" ++ ++ run_cmd "$IP ro del 172.16.102.0/24" ++ log_test $? 0 "Delete route when not specifying nexthop attributes" + } + + ipv4_grp_fcnal() +@@ -2078,6 +2108,7 @@ basic_res() "id 101 index 0 nhid 2 id 101 index 1 nhid 2 id 101 index 2 nhid 1 id 101 index 3 nhid 1" log_test $? 0 "Dump all nexthop buckets in a group" @@ -449150,7 +576086,7 @@ index 0d293391e9a44..b5a69ad191b07 100755 jq '[.[] | select(.bucket.idle_time > 0 and .bucket.idle_time < 2)] | length') == 4 )) diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh -index 5abe92d55b696..996af1ae3d3dd 100755 +index 5abe92d55b696..7df066bf74b87 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -444,24 +444,63 @@ fib_rp_filter_test() @@ -449226,6 +576162,64 @@ index 5abe92d55b696..996af1ae3d3dd 100755 + run_cmd "ip netns exec ns2 ping -w1 -c1 127.0.0.1" log_test $? 0 "rp_filter passes loopback packets" + cleanup +@@ -1583,13 +1622,21 @@ ipv4_del_addr_test() + + $IP addr add dev dummy1 172.16.104.1/24 + $IP addr add dev dummy1 172.16.104.11/24 ++ $IP addr add dev dummy1 172.16.104.12/24 ++ $IP addr add dev dummy1 172.16.104.13/24 + $IP addr add dev dummy2 172.16.104.1/24 + $IP addr add dev dummy2 172.16.104.11/24 ++ $IP addr add dev dummy2 172.16.104.12/24 + $IP route add 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11 ++ $IP route add 172.16.106.0/24 dev lo src 172.16.104.12 ++ $IP route add table 0 172.16.107.0/24 via 172.16.104.2 src 172.16.104.13 + $IP route add vrf red 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11 ++ $IP route add vrf red 172.16.106.0/24 dev lo src 172.16.104.12 + set +e + + # removing address from device in vrf should only remove route from vrf table ++ echo " Regular FIB info" ++ + $IP addr del dev dummy2 172.16.104.11/24 + $IP ro ls vrf red | grep -q 172.16.105.0/24 + log_test $? 1 "Route removed from VRF when source address deleted" +@@ -1607,6 +1654,35 @@ ipv4_del_addr_test() + $IP ro ls vrf red | grep -q 172.16.105.0/24 + log_test $? 0 "Route in VRF is not removed by address delete" + ++ # removing address from device in vrf should only remove route from vrf ++ # table even when the associated fib info only differs in table ID ++ echo " Identical FIB info with different table ID" ++ ++ $IP addr del dev dummy2 172.16.104.12/24 ++ $IP ro ls vrf red | grep -q 172.16.106.0/24 ++ log_test $? 1 "Route removed from VRF when source address deleted" ++ ++ $IP ro ls | grep -q 172.16.106.0/24 ++ log_test $? 0 "Route in default VRF not removed" ++ ++ $IP addr add dev dummy2 172.16.104.12/24 ++ $IP route add vrf red 172.16.106.0/24 dev lo src 172.16.104.12 ++ ++ $IP addr del dev dummy1 172.16.104.12/24 ++ $IP ro ls | grep -q 172.16.106.0/24 ++ log_test $? 1 "Route removed in default VRF when source address deleted" ++ ++ $IP ro ls vrf red | grep -q 172.16.106.0/24 ++ log_test $? 0 "Route in VRF is not removed by address delete" ++ ++ # removing address from device in default vrf should remove route from ++ # the default vrf even when route was inserted with a table ID of 0. ++ echo " Table ID 0" ++ ++ $IP addr del dev dummy1 172.16.104.13/24 ++ $IP ro ls | grep -q 172.16.107.0/24 ++ log_test $? 1 "Route removed in default VRF when source address deleted" ++ + $IP li del dummy1 + $IP li del dummy2 cleanup diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh index 675eff45b0371..1162836f8f329 100755 @@ -449521,9 +576515,26 @@ index 8fea2c2e0b25d..d40183b4eccc8 100755 } diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh -index 92087d423bcf1..c9507df9c05bc 100644 +index 92087d423bcf1..b7d946cf14eb5 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh +@@ -817,14 +817,14 @@ sysctl_set() + local value=$1; shift + + SYSCTL_ORIG[$key]=$(sysctl -n $key) +- sysctl -qw $key=$value ++ sysctl -qw $key="$value" + } + + sysctl_restore() + { + local key=$1; shift + +- sysctl -qw $key=${SYSCTL_ORIG["$key"]} ++ sysctl -qw $key="${SYSCTL_ORIG[$key]}" + } + + forwarding_enable() @@ -1149,6 +1149,7 @@ learning_test() # FDB entry was installed. bridge link set dev $br_port1 flood off @@ -449873,6 +576884,195 @@ index 2674ba20d5249..49dfabded1d44 100755 flush_pids exit $ret +diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c +index 89c4753c2760c..95e81d557b088 100644 +--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c ++++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c +@@ -14,6 +14,7 @@ + #include <strings.h> + #include <signal.h> + #include <unistd.h> ++#include <time.h> + + #include <sys/poll.h> + #include <sys/sendfile.h> +@@ -64,6 +65,7 @@ static int cfg_sndbuf; + static int cfg_rcvbuf; + static bool cfg_join; + static bool cfg_remove; ++static unsigned int cfg_time; + static unsigned int cfg_do_w; + static int cfg_wait; + static uint32_t cfg_mark; +@@ -78,9 +80,10 @@ static struct cfg_cmsg_types cfg_cmsg_types; + static void die_usage(void) + { + fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] [-m mode]" +- "[-l] [-w sec] connect_address\n"); ++ "[-l] [-w sec] [-t num] [-T num] connect_address\n"); + fprintf(stderr, "\t-6 use ipv6\n"); + fprintf(stderr, "\t-t num -- set poll timeout to num\n"); ++ fprintf(stderr, "\t-T num -- set expected runtime to num ms\n"); + fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n"); + fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n"); + fprintf(stderr, "\t-p num -- use port num\n"); +@@ -448,7 +451,7 @@ static void set_nonblock(int fd) + fcntl(fd, F_SETFL, flags | O_NONBLOCK); + } + +-static int copyfd_io_poll(int infd, int peerfd, int outfd) ++static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after_out) + { + struct pollfd fds = { + .fd = peerfd, +@@ -487,9 +490,11 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd) + */ + fds.events &= ~POLLIN; + +- if ((fds.events & POLLOUT) == 0) ++ if ((fds.events & POLLOUT) == 0) { ++ *in_closed_after_out = true; + /* and nothing more to send */ + break; ++ } + + /* Else, still have data to transmit */ + } else if (len < 0) { +@@ -547,7 +552,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd) + } + + /* leave some time for late join/announce */ +- if (cfg_join || cfg_remove) ++ if (cfg_remove) + usleep(cfg_wait); + + close(peerfd); +@@ -646,7 +651,7 @@ static int do_sendfile(int infd, int outfd, unsigned int count) + } + + static int copyfd_io_mmap(int infd, int peerfd, int outfd, +- unsigned int size) ++ unsigned int size, bool *in_closed_after_out) + { + int err; + +@@ -664,13 +669,14 @@ static int copyfd_io_mmap(int infd, int peerfd, int outfd, + shutdown(peerfd, SHUT_WR); + + err = do_recvfile(peerfd, outfd); ++ *in_closed_after_out = true; + } + + return err; + } + + static int copyfd_io_sendfile(int infd, int peerfd, int outfd, +- unsigned int size) ++ unsigned int size, bool *in_closed_after_out) + { + int err; + +@@ -685,6 +691,7 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd, + if (err) + return err; + err = do_recvfile(peerfd, outfd); ++ *in_closed_after_out = true; + } + + return err; +@@ -692,27 +699,62 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd, + + static int copyfd_io(int infd, int peerfd, int outfd) + { ++ bool in_closed_after_out = false; ++ struct timespec start, end; + int file_size; ++ int ret; ++ ++ if (cfg_time && (clock_gettime(CLOCK_MONOTONIC, &start) < 0)) ++ xerror("can not fetch start time %d", errno); + + switch (cfg_mode) { + case CFG_MODE_POLL: +- return copyfd_io_poll(infd, peerfd, outfd); ++ ret = copyfd_io_poll(infd, peerfd, outfd, &in_closed_after_out); ++ break; ++ + case CFG_MODE_MMAP: + file_size = get_infd_size(infd); + if (file_size < 0) + return file_size; +- return copyfd_io_mmap(infd, peerfd, outfd, file_size); ++ ret = copyfd_io_mmap(infd, peerfd, outfd, file_size, &in_closed_after_out); ++ break; ++ + case CFG_MODE_SENDFILE: + file_size = get_infd_size(infd); + if (file_size < 0) + return file_size; +- return copyfd_io_sendfile(infd, peerfd, outfd, file_size); ++ ret = copyfd_io_sendfile(infd, peerfd, outfd, file_size, &in_closed_after_out); ++ break; ++ ++ default: ++ fprintf(stderr, "Invalid mode %d\n", cfg_mode); ++ ++ die_usage(); ++ return 1; + } + +- fprintf(stderr, "Invalid mode %d\n", cfg_mode); ++ if (ret) ++ return ret; + +- die_usage(); +- return 1; ++ if (cfg_time) { ++ unsigned int delta_ms; ++ ++ if (clock_gettime(CLOCK_MONOTONIC, &end) < 0) ++ xerror("can not fetch end time %d", errno); ++ delta_ms = (end.tv_sec - start.tv_sec) * 1000 + (end.tv_nsec - start.tv_nsec) / 1000000; ++ if (delta_ms > cfg_time) { ++ xerror("transfer slower than expected! runtime %d ms, expected %d ms", ++ delta_ms, cfg_time); ++ } ++ ++ /* show the runtime only if this end shutdown(wr) before receiving the EOF, ++ * (that is, if this end got the longer runtime) ++ */ ++ if (in_closed_after_out) ++ fprintf(stderr, "%d", delta_ms); ++ } ++ ++ return 0; + } + + static void check_sockaddr(int pf, struct sockaddr_storage *ss, +@@ -1005,12 +1047,11 @@ static void parse_opts(int argc, char **argv) + { + int c; + +- while ((c = getopt(argc, argv, "6jr:lp:s:hut:m:S:R:w:M:P:c:")) != -1) { ++ while ((c = getopt(argc, argv, "6jr:lp:s:hut:T:m:S:R:w:M:P:c:")) != -1) { + switch (c) { + case 'j': + cfg_join = true; + cfg_mode = CFG_MODE_POLL; +- cfg_wait = 400000; + break; + case 'r': + cfg_remove = true; +@@ -1043,6 +1084,9 @@ static void parse_opts(int argc, char **argv) + if (poll_timeout <= 0) + poll_timeout = -1; + break; ++ case 'T': ++ cfg_time = atoi(optarg); ++ break; + case 'm': + cfg_mode = parse_mode(optarg); + break; diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 559173a8e387b..d75fa97609c15 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -449981,8 +577181,105 @@ index 255793c5ac4ff..3be615ab1588b 100755 run_tests $ns1 $ns2 dead:beef:1::1 0 -1 -1 slow chk_join_nr "remove subflow and signal IPv6" 2 2 2 chk_add_nr 1 1 +diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh +index 910d8126af8f2..7df4900dfaf73 100755 +--- a/tools/testing/selftests/net/mptcp/simult_flows.sh ++++ b/tools/testing/selftests/net/mptcp/simult_flows.sh +@@ -51,7 +51,7 @@ setup() + sout=$(mktemp) + cout=$(mktemp) + capout=$(mktemp) +- size=$((2048 * 4096)) ++ size=$((2 * 2048 * 4096)) + dd if=/dev/zero of=$small bs=4096 count=20 >/dev/null 2>&1 + dd if=/dev/zero of=$large bs=4096 count=$((size / 4096)) >/dev/null 2>&1 + +@@ -161,17 +161,15 @@ do_transfer() + + timeout ${timeout_test} \ + ip netns exec ${ns3} \ +- ./mptcp_connect -jt ${timeout_poll} -l -p $port \ ++ ./mptcp_connect -jt ${timeout_poll} -l -p $port -T $time \ + 0.0.0.0 < "$sin" > "$sout" & + local spid=$! + + wait_local_port_listen "${ns3}" "${port}" + +- local start +- start=$(date +%s%3N) + timeout ${timeout_test} \ + ip netns exec ${ns1} \ +- ./mptcp_connect -jt ${timeout_poll} -p $port \ ++ ./mptcp_connect -jt ${timeout_poll} -p $port -T $time \ + 10.0.3.3 < "$cin" > "$cout" & + local cpid=$! + +@@ -180,27 +178,20 @@ do_transfer() + wait $spid + local rets=$? + +- local stop +- stop=$(date +%s%3N) +- + if $capture; then + sleep 1 + kill ${cappid_listener} + kill ${cappid_connector} + fi + +- local duration +- duration=$((stop-start)) +- + cmp $sin $cout > /dev/null 2>&1 + local cmps=$? + cmp $cin $sout > /dev/null 2>&1 + local cmpc=$? + +- printf "%16s" "$duration max $max_time " ++ printf "%-16s" " max $max_time " + if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \ +- [ $cmpc -eq 0 ] && [ $cmps -eq 0 ] && \ +- [ $duration -lt $max_time ]; then ++ [ $cmpc -eq 0 ] && [ $cmps -eq 0 ]; then + echo "[ OK ]" + cat "$capout" + return 0 +@@ -244,23 +235,25 @@ run_test() + tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1 + tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2 + +- # time is measure in ms +- local time=$((size * 8 * 1000 / (( $rate1 + $rate2) * 1024 *1024) )) ++ # time is measured in ms, account for transfer size, aggregated link speed ++ # and header overhead (10%) ++ # ms byte -> bit 10% mbit -> kbit -> bit 10% ++ local time=$((1000 * size * 8 * 10 / ((rate1 + rate2) * 1000 * 1000 * 9) )) + + # mptcp_connect will do some sleeps to allow the mp_join handshake +- # completion +- time=$((time + 1350)) ++ # completion (see mptcp_connect): 200ms on each side, add some slack ++ time=$((time + 450)) + +- printf "%-50s" "$msg" +- do_transfer $small $large $((time * 11 / 10)) ++ printf "%-60s" "$msg" ++ do_transfer $small $large $time + lret=$? + if [ $lret -ne 0 ]; then + ret=$lret + [ $bail -eq 0 ] || exit $ret + fi + +- printf "%-50s" "$msg - reverse direction" +- do_transfer $large $small $((time * 11 / 10)) ++ printf "%-60s" "$msg - reverse direction" ++ do_transfer $large $small $time + lret=$? + if [ $lret -ne 0 ]; then + ret=$lret diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh -index 543ad7513a8e9..694732e4b3448 100755 +index 543ad7513a8e9..da6ab300207c0 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -374,6 +374,16 @@ run_cmd() { @@ -450002,7 +577299,22 @@ index 543ad7513a8e9..694732e4b3448 100755 # Find the auto-generated name for this namespace nsname() { eval echo \$NS_$1 -@@ -670,10 +680,10 @@ setup_nettest_xfrm() { +@@ -661,19 +671,21 @@ setup_xfrm() { + } + + setup_nettest_xfrm() { +- which nettest >/dev/null +- if [ $? -ne 0 ]; then +- echo "'nettest' command not found; skipping tests" +- return 1 ++ if ! which nettest >/dev/null; then ++ PATH=$PWD:$PATH ++ if ! which nettest >/dev/null; then ++ echo "'nettest' command not found; skipping tests" ++ return 1 ++ fi + fi + [ ${1} -eq 6 ] && proto="-6" || proto="" port=${2} @@ -450015,7 +577327,7 @@ index 543ad7513a8e9..694732e4b3448 100755 nettest_pids="${nettest_pids} $!" } -@@ -865,7 +875,6 @@ setup_ovs_bridge() { +@@ -865,7 +877,6 @@ setup_ovs_bridge() { setup() { [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip @@ -450023,7 +577335,7 @@ index 543ad7513a8e9..694732e4b3448 100755 for arg do eval setup_${arg} || { echo " ${arg} not supported"; return 1; } done -@@ -876,7 +885,7 @@ trace() { +@@ -876,7 +887,7 @@ trace() { for arg do [ "${ns_cmd}" = "" ] && ns_cmd="${arg}" && continue @@ -450032,7 +577344,7 @@ index 543ad7513a8e9..694732e4b3448 100755 tcpdump_pids="${tcpdump_pids} $!" ns_cmd= done -@@ -1836,6 +1845,10 @@ run_test() { +@@ -1836,6 +1847,10 @@ run_test() { unset IFS @@ -450056,6 +577368,19 @@ index b5277106df1fd..b0cc082fbb84f 100644 error(1, errno, "bind socket 2 should fail with EADDRINUSE"); free(addr); +diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh +index c9ce3dfa42ee7..c3a905923ef29 100755 +--- a/tools/testing/selftests/net/rtnetlink.sh ++++ b/tools/testing/selftests/net/rtnetlink.sh +@@ -782,7 +782,7 @@ kci_test_ipsec_offload() + tmpl proto esp src $srcip dst $dstip spi 9 \ + mode transport reqid 42 + check_err $? +- ip x p add dir out src $dstip/24 dst $srcip/24 \ ++ ip x p add dir in src $dstip/24 dst $srcip/24 \ + tmpl proto esp src $dstip dst $srcip spi 9 \ + mode transport reqid 42 + check_err $? diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c index 59067f64b7753..2672ac0b6d1f3 100644 --- a/tools/testing/selftests/net/so_txtime.c @@ -450114,10 +577439,61 @@ index aee631c5284eb..044bc0e9ed81a 100644 struct ip_mreq imr; struct in_addr iaddr; diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/net/toeplitz.c -index 710ac956bdb33..c5489341cfb80 100644 +index 710ac956bdb33..8ce96028341d5 100644 --- a/tools/testing/selftests/net/toeplitz.c +++ b/tools/testing/selftests/net/toeplitz.c -@@ -498,7 +498,7 @@ static void parse_opts(int argc, char **argv) +@@ -213,7 +213,7 @@ static char *recv_frame(const struct ring_state *ring, char *frame) + } + + /* A single TPACKET_V3 block can hold multiple frames */ +-static void recv_block(struct ring_state *ring) ++static bool recv_block(struct ring_state *ring) + { + struct tpacket_block_desc *block; + char *frame; +@@ -221,7 +221,7 @@ static void recv_block(struct ring_state *ring) + + block = (void *)(ring->mmap + ring->idx * ring_block_sz); + if (!(block->hdr.bh1.block_status & TP_STATUS_USER)) +- return; ++ return false; + + frame = (char *)block; + frame += block->hdr.bh1.offset_to_first_pkt; +@@ -233,6 +233,8 @@ static void recv_block(struct ring_state *ring) + + block->hdr.bh1.block_status = TP_STATUS_KERNEL; + ring->idx = (ring->idx + 1) % ring_block_nr; ++ ++ return true; + } + + /* simple test: sleep once unconditionally and then process all rings */ +@@ -243,7 +245,7 @@ static void process_rings(void) + usleep(1000 * cfg_timeout_msec); + + for (i = 0; i < num_cpus; i++) +- recv_block(&rings[i]); ++ do {} while (recv_block(&rings[i])); + + fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n", + frames_received - frames_nohash - frames_error, +@@ -255,12 +257,12 @@ static char *setup_ring(int fd) + struct tpacket_req3 req3 = {0}; + void *ring; + +- req3.tp_retire_blk_tov = cfg_timeout_msec; ++ req3.tp_retire_blk_tov = cfg_timeout_msec / 8; + req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH; + + req3.tp_frame_size = 2048; + req3.tp_frame_nr = 1 << 10; +- req3.tp_block_nr = 2; ++ req3.tp_block_nr = 16; + + req3.tp_block_size = req3.tp_frame_size * req3.tp_frame_nr; + req3.tp_block_size /= req3.tp_block_nr; +@@ -498,7 +500,7 @@ static void parse_opts(int argc, char **argv) bool have_toeplitz = false; int index, c; @@ -450126,6 +577502,19 @@ index 710ac956bdb33..c5489341cfb80 100644 switch (c) { case '4': cfg_family = AF_INET; +diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh +index 0a49907cd4fef..da5bfd834effe 100755 +--- a/tools/testing/selftests/net/toeplitz.sh ++++ b/tools/testing/selftests/net/toeplitz.sh +@@ -32,7 +32,7 @@ DEV="eth0" + # This is determined by reading the RSS indirection table using ethtool. + get_rss_cfg_num_rxqs() { + echo $(ethtool -x "${DEV}" | +- egrep [[:space:]]+[0-9]+:[[:space:]]+ | ++ grep -E [[:space:]]+[0-9]+:[[:space:]]+ | + cut -d: -f2- | + awk '{$1=$1};1' | + tr ' ' '\n' | diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index f8a19f548ae9d..ebbd0b2824327 100755 --- a/tools/testing/selftests/net/udpgro.sh @@ -450239,10 +577628,49 @@ index c66da6ffd6d8d..7badaf215de28 100644 .tfail = true, }, diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh -index 80b5d352702e5..dc932fd653634 100755 +index 80b5d352702e5..640bc43452faa 100755 --- a/tools/testing/selftests/net/udpgso_bench.sh +++ b/tools/testing/selftests/net/udpgso_bench.sh -@@ -120,7 +120,7 @@ run_all() { +@@ -7,6 +7,7 @@ readonly GREEN='\033[0;92m' + readonly YELLOW='\033[0;33m' + readonly RED='\033[0;31m' + readonly NC='\033[0m' # No Color ++readonly TESTPORT=8000 + + readonly KSFT_PASS=0 + readonly KSFT_FAIL=1 +@@ -56,11 +57,26 @@ trap wake_children EXIT + + run_one() { + local -r args=$@ ++ local nr_socks=0 ++ local i=0 ++ local -r timeout=10 ++ ++ ./udpgso_bench_rx -p "$TESTPORT" & ++ ./udpgso_bench_rx -p "$TESTPORT" -t & ++ ++ # Wait for the above test program to get ready to receive connections. ++ while [ "$i" -lt "$timeout" ]; do ++ nr_socks="$(ss -lnHi | grep -c "\*:${TESTPORT}")" ++ [ "$nr_socks" -eq 2 ] && break ++ i=$((i + 1)) ++ sleep 1 ++ done ++ if [ "$nr_socks" -ne 2 ]; then ++ echo "timed out while waiting for udpgso_bench_rx" ++ exit 1 ++ fi + +- ./udpgso_bench_rx & +- ./udpgso_bench_rx -t & +- +- ./udpgso_bench_tx ${args} ++ ./udpgso_bench_tx -p "$TESTPORT" ${args} + } + + run_in_netns() { +@@ -120,7 +136,7 @@ run_all() { run_udp "${ipv4_args}" echo "ipv6" @@ -450252,9 +577680,18 @@ index 80b5d352702e5..dc932fd653634 100755 } diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c -index 76a24052f4b47..6a193425c367f 100644 +index 76a24052f4b47..4058c7451e70d 100644 --- a/tools/testing/selftests/net/udpgso_bench_rx.c +++ b/tools/testing/selftests/net/udpgso_bench_rx.c +@@ -250,7 +250,7 @@ static int recv_msg(int fd, char *buf, int len, int *gso_size) + static void do_flush_udp(int fd) + { + static char rbuf[ETH_MAX_MTU]; +- int ret, len, gso_size, budget = 256; ++ int ret, len, gso_size = 0, budget = 256; + + len = cfg_read_all ? sizeof(rbuf) : 0; + while (budget--) { @@ -293,19 +293,17 @@ static void usage(const char *filepath) static void parse_opts(int argc, char **argv) @@ -450277,7 +577714,12 @@ index 76a24052f4b47..6a193425c367f 100644 break; case 'C': cfg_connect_timeout_ms = strtoul(optarg, NULL, 0); -@@ -341,6 +339,11 @@ static void parse_opts(int argc, char **argv) +@@ -338,9 +336,16 @@ static void parse_opts(int argc, char **argv) + cfg_verify = true; + cfg_read_all = true; + break; ++ default: ++ exit(1); } } @@ -450290,10 +577732,68 @@ index 76a24052f4b47..6a193425c367f 100644 usage(argv[0]); diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c -index 17512a43885e7..f1fdaa2702913 100644 +index 17512a43885e7..477392715a9ad 100644 --- a/tools/testing/selftests/net/udpgso_bench_tx.c +++ b/tools/testing/selftests/net/udpgso_bench_tx.c -@@ -419,6 +419,7 @@ static void usage(const char *filepath) +@@ -62,6 +62,7 @@ static int cfg_payload_len = (1472 * 42); + static int cfg_port = 8000; + static int cfg_runtime_ms = -1; + static bool cfg_poll; ++static int cfg_poll_loop_timeout_ms = 2000; + static bool cfg_segment; + static bool cfg_sendmmsg; + static bool cfg_tcp; +@@ -235,16 +236,17 @@ static void flush_errqueue_recv(int fd) + } + } + +-static void flush_errqueue(int fd, const bool do_poll) ++static void flush_errqueue(int fd, const bool do_poll, ++ unsigned long poll_timeout, const bool poll_err) + { + if (do_poll) { + struct pollfd fds = {0}; + int ret; + + fds.fd = fd; +- ret = poll(&fds, 1, 500); ++ ret = poll(&fds, 1, poll_timeout); + if (ret == 0) { +- if (cfg_verbose) ++ if ((cfg_verbose) && (poll_err)) + fprintf(stderr, "poll timeout\n"); + } else if (ret < 0) { + error(1, errno, "poll"); +@@ -254,6 +256,20 @@ static void flush_errqueue(int fd, const bool do_poll) + flush_errqueue_recv(fd); + } + ++static void flush_errqueue_retry(int fd, unsigned long num_sends) ++{ ++ unsigned long tnow, tstop; ++ bool first_try = true; ++ ++ tnow = gettimeofday_ms(); ++ tstop = tnow + cfg_poll_loop_timeout_ms; ++ do { ++ flush_errqueue(fd, true, tstop - tnow, first_try); ++ first_try = false; ++ tnow = gettimeofday_ms(); ++ } while ((stat_zcopies != num_sends) && (tnow < tstop)); ++} ++ + static int send_tcp(int fd, char *data) + { + int ret, done = 0, count = 0; +@@ -413,16 +429,18 @@ static int send_udp_segment(int fd, char *data) + + static void usage(const char *filepath) + { +- error(1, 0, "Usage: %s [-46acmHPtTuvz] [-C cpu] [-D dst ip] [-l secs] [-M messagenr] [-p port] [-s sendsize] [-S gsosize]", ++ error(1, 0, "Usage: %s [-46acmHPtTuvz] [-C cpu] [-D dst ip] [-l secs] " ++ "[-L secs] [-M messagenr] [-p port] [-s sendsize] [-S gsosize]", + filepath); + } static void parse_opts(int argc, char **argv) { @@ -450301,7 +577801,12 @@ index 17512a43885e7..f1fdaa2702913 100644 int max_len, hdrlen; int c; -@@ -446,7 +447,7 @@ static void parse_opts(int argc, char **argv) +- while ((c = getopt(argc, argv, "46acC:D:Hl:mM:p:s:PS:tTuvz")) != -1) { ++ while ((c = getopt(argc, argv, "46acC:D:Hl:L:mM:p:s:PS:tTuvz")) != -1) { + switch (c) { + case '4': + if (cfg_family != PF_UNSPEC) +@@ -446,11 +464,14 @@ static void parse_opts(int argc, char **argv) cfg_cpu = strtol(optarg, NULL, 0); break; case 'D': @@ -450310,7 +577815,19 @@ index 17512a43885e7..f1fdaa2702913 100644 break; case 'l': cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000; -@@ -492,6 +493,11 @@ static void parse_opts(int argc, char **argv) + break; ++ case 'L': ++ cfg_poll_loop_timeout_ms = strtoul(optarg, NULL, 10) * 1000; ++ break; + case 'm': + cfg_sendmmsg = true; + break; +@@ -489,9 +510,16 @@ static void parse_opts(int argc, char **argv) + case 'z': + cfg_zerocopy = true; + break; ++ default: ++ exit(1); } } @@ -450322,6 +577839,24 @@ index 17512a43885e7..f1fdaa2702913 100644 if (optind != argc) usage(argv[0]); +@@ -671,7 +699,7 @@ int main(int argc, char **argv) + num_sends += send_udp(fd, buf[i]); + num_msgs++; + if ((cfg_zerocopy && ((num_msgs & 0xF) == 0)) || cfg_tx_tstamp) +- flush_errqueue(fd, cfg_poll); ++ flush_errqueue(fd, cfg_poll, 500, true); + + if (cfg_msg_nr && num_msgs >= cfg_msg_nr) + break; +@@ -690,7 +718,7 @@ int main(int argc, char **argv) + } while (!interrupted && (cfg_runtime_ms == -1 || tnow < tstop)); + + if (cfg_zerocopy || cfg_tx_tstamp) +- flush_errqueue(fd, true); ++ flush_errqueue_retry(fd, num_sends); + + if (close(fd)) + error(1, errno, "close"); diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh index 19eac3e44c065..430895d1a2b63 100755 --- a/tools/testing/selftests/net/veth.sh @@ -450366,6 +577901,69 @@ index 8748199ac1098..ffca314897c4c 100644 LDLIBS = -lmnl TEST_GEN_FILES = nf-queue +diff --git a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh +index b48e1833bc896..76645aaf2b58f 100755 +--- a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh ++++ b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh +@@ -35,6 +35,8 @@ cleanup() { + for i in 1 2;do ip netns del nsrouter$i;done + } + ++trap cleanup EXIT ++ + ipv4() { + echo -n 192.168.$1.2 + } +@@ -146,11 +148,17 @@ ip netns exec nsclient1 nft -f - <<EOF + table inet filter { + counter unknown { } + counter related { } ++ counter redir4 { } ++ counter redir6 { } + chain input { + type filter hook input priority 0; policy accept; +- meta l4proto { icmp, icmpv6 } ct state established,untracked accept + ++ icmp type "redirect" ct state "related" counter name "redir4" accept ++ icmpv6 type "nd-redirect" ct state "related" counter name "redir6" accept ++ ++ meta l4proto { icmp, icmpv6 } ct state established,untracked accept + meta l4proto { icmp, icmpv6 } ct state "related" counter name "related" accept ++ + counter name "unknown" drop + } + } +@@ -279,5 +287,29 @@ else + echo "ERROR: icmp error RELATED state test has failed" + fi + +-cleanup ++# add 'bad' route, expect icmp REDIRECT to be generated ++ip netns exec nsclient1 ip route add 192.168.1.42 via 192.168.1.1 ++ip netns exec nsclient1 ip route add dead:1::42 via dead:1::1 ++ ++ip netns exec "nsclient1" ping -q -c 2 192.168.1.42 > /dev/null ++ ++expect="packets 1 bytes 112" ++check_counter nsclient1 "redir4" "$expect" ++if [ $? -ne 0 ];then ++ ret=1 ++fi ++ ++ip netns exec "nsclient1" ping -c 1 dead:1::42 > /dev/null ++expect="packets 1 bytes 192" ++check_counter nsclient1 "redir6" "$expect" ++if [ $? -ne 0 ];then ++ ret=1 ++fi ++ ++if [ $ret -eq 0 ];then ++ echo "PASS: icmp redirects had RELATED state" ++else ++ echo "ERROR: icmp redirect RELATED state test has failed" ++fi ++ + exit $ret diff --git a/tools/testing/selftests/netfilter/conntrack_vrf.sh b/tools/testing/selftests/netfilter/conntrack_vrf.sh new file mode 100755 index 0000000000000..8b5ea92345882 @@ -451601,6 +579199,30 @@ index be2943f072f60..17999e082aa71 100644 pid_t parent_tid = -1; struct clone_args args = { .parent_tid = ptr_to_u64(&parent_tid), +diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c +index fbbdffdb2e5d2..f20d1c166d1e4 100644 +--- a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c ++++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c +@@ -24,6 +24,7 @@ static int check_cpu_dscr_default(char *file, unsigned long val) + rc = read(fd, buf, sizeof(buf)); + if (rc == -1) { + perror("read() failed"); ++ close(fd); + return 1; + } + close(fd); +@@ -65,8 +66,10 @@ static int check_all_cpu_dscr_defaults(unsigned long val) + if (access(file, F_OK)) + continue; + +- if (check_cpu_dscr_default(file, val)) ++ if (check_cpu_dscr_default(file, val)) { ++ closedir(sysfs); + return 1; ++ } + } + closedir(sysfs); + return 0; diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c index adc2b7294e5fd..83647b8277e7d 100644 --- a/tools/testing/selftests/powerpc/security/spectre_v2.c @@ -451772,6 +579394,27 @@ index 0000000000000..0a1b6e591eeed +{ + return test_harness(test_sigreturn_kernel, "sigreturn_kernel"); +} +diff --git a/tools/testing/selftests/proc/proc-uptime-002.c b/tools/testing/selftests/proc/proc-uptime-002.c +index e7ceabed7f51f..7d0aa22bdc12b 100644 +--- a/tools/testing/selftests/proc/proc-uptime-002.c ++++ b/tools/testing/selftests/proc/proc-uptime-002.c +@@ -17,6 +17,7 @@ + // while shifting across CPUs. + #undef NDEBUG + #include <assert.h> ++#include <errno.h> + #include <unistd.h> + #include <sys/syscall.h> + #include <stdlib.h> +@@ -54,7 +55,7 @@ int main(void) + len += sizeof(unsigned long); + free(m); + m = malloc(len); +- } while (sys_sched_getaffinity(0, len, m) == -EINVAL); ++ } while (sys_sched_getaffinity(0, len, m) == -1 && errno == EINVAL); + + fd = open("/proc/uptime", O_RDONLY); + assert(fd >= 0); diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index 363f56081eff3..66f0f724a1a6d 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh @@ -455638,7 +583281,7 @@ index d91bde5112686..eed44322d1a63 100644 if (munmap((void *)addr, size) != 0) { dump_maps(); diff --git a/tools/testing/selftests/vm/mremap_test.c b/tools/testing/selftests/vm/mremap_test.c -index 0624d1bd71b53..e3ce33a9954ea 100644 +index 0624d1bd71b53..58775dab3cc6c 100644 --- a/tools/testing/selftests/vm/mremap_test.c +++ b/tools/testing/selftests/vm/mremap_test.c @@ -6,9 +6,11 @@ @@ -455653,7 +583296,15 @@ index 0624d1bd71b53..e3ce33a9954ea 100644 #include "../kselftest.h" -@@ -64,6 +66,59 @@ enum { +@@ -20,7 +22,6 @@ + #define VALIDATION_DEFAULT_THRESHOLD 4 /* 4MB */ + #define VALIDATION_NO_THRESHOLD 0 /* Verify the entire region */ + +-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) + + struct config { +@@ -64,6 +65,59 @@ enum { .expect_failure = should_fail \ } @@ -455713,7 +583364,7 @@ index 0624d1bd71b53..e3ce33a9954ea 100644 /* * Returns the start address of the mapping on success, else returns * NULL on failure. -@@ -72,11 +127,18 @@ static void *get_source_mapping(struct config c) +@@ -72,11 +126,18 @@ static void *get_source_mapping(struct config c) { unsigned long long addr = 0ULL; void *src_addr = NULL; @@ -455734,7 +583385,7 @@ index 0624d1bd71b53..e3ce33a9954ea 100644 if (src_addr == MAP_FAILED) { if (errno == EPERM || errno == EEXIST) goto retry; -@@ -91,8 +153,10 @@ retry: +@@ -91,8 +152,10 @@ retry: * alignment in the tests. */ if (((unsigned long long) src_addr & (c.src_alignment - 1)) || @@ -455746,7 +583397,7 @@ index 0624d1bd71b53..e3ce33a9954ea 100644 if (!src_addr) goto error; -@@ -141,9 +205,20 @@ static long long remap_region(struct config c, unsigned int threshold_mb, +@@ -141,9 +204,20 @@ static long long remap_region(struct config c, unsigned int threshold_mb, if (!((unsigned long long) addr & c.dest_alignment)) addr = (void *) ((unsigned long long) addr | c.dest_alignment); @@ -455768,6 +583419,210 @@ index 0624d1bd71b53..e3ce33a9954ea 100644 clock_gettime(CLOCK_MONOTONIC, &t_end); if (dest_addr == MAP_FAILED) { +diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h +index 622a85848f61b..92f3be3dd8e59 100644 +--- a/tools/testing/selftests/vm/pkey-helpers.h ++++ b/tools/testing/selftests/vm/pkey-helpers.h +@@ -13,6 +13,8 @@ + #include <ucontext.h> + #include <sys/mman.h> + ++#include "../kselftest.h" ++ + /* Define some kernel-like types */ + #define u8 __u8 + #define u16 __u16 +@@ -175,7 +177,6 @@ static inline void __pkey_write_allow(int pkey, int do_allow_write) + dprintf4("pkey_reg now: %016llx\n", read_pkey_reg()); + } + +-#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) + #define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) + #define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1)) + #define ALIGN_PTR_UP(p, ptr_align_to) \ +diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h +index e4a4ce2b826d2..ea8c8afbcdbb3 100644 +--- a/tools/testing/selftests/vm/pkey-x86.h ++++ b/tools/testing/selftests/vm/pkey-x86.h +@@ -119,6 +119,18 @@ static inline int cpu_has_pkeys(void) + return 1; + } + ++static inline int cpu_max_xsave_size(void) ++{ ++ unsigned long XSTATE_CPUID = 0xd; ++ unsigned int eax; ++ unsigned int ebx; ++ unsigned int ecx; ++ unsigned int edx; ++ ++ __cpuid_count(XSTATE_CPUID, 0, eax, ebx, ecx, edx); ++ return ecx; ++} ++ + static inline u32 pkey_bit_position(int pkey) + { + return pkey * PKEY_BITS_PER_PKEY; +diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c +index 2d0ae88665db0..2d48272b2463e 100644 +--- a/tools/testing/selftests/vm/protection_keys.c ++++ b/tools/testing/selftests/vm/protection_keys.c +@@ -18,12 +18,13 @@ + * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks + * + * Compile like this: +- * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm +- * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm ++ * gcc -mxsave -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm ++ * gcc -mxsave -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm + */ + #define _GNU_SOURCE + #define __SANE_USERSPACE_TYPES__ + #include <errno.h> ++#include <linux/elf.h> + #include <linux/futex.h> + #include <time.h> + #include <sys/time.h> +@@ -1550,6 +1551,129 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) + do_not_expect_pkey_fault("plain read on recently PROT_EXEC area"); + } + ++#if defined(__i386__) || defined(__x86_64__) ++void test_ptrace_modifies_pkru(int *ptr, u16 pkey) ++{ ++ u32 new_pkru; ++ pid_t child; ++ int status, ret; ++ int pkey_offset = pkey_reg_xstate_offset(); ++ size_t xsave_size = cpu_max_xsave_size(); ++ void *xsave; ++ u32 *pkey_register; ++ u64 *xstate_bv; ++ struct iovec iov; ++ ++ new_pkru = ~read_pkey_reg(); ++ /* Don't make PROT_EXEC mappings inaccessible */ ++ new_pkru &= ~3; ++ ++ child = fork(); ++ pkey_assert(child >= 0); ++ dprintf3("[%d] fork() ret: %d\n", getpid(), child); ++ if (!child) { ++ ptrace(PTRACE_TRACEME, 0, 0, 0); ++ /* Stop and allow the tracer to modify PKRU directly */ ++ raise(SIGSTOP); ++ ++ /* ++ * need __read_pkey_reg() version so we do not do shadow_pkey_reg ++ * checking ++ */ ++ if (__read_pkey_reg() != new_pkru) ++ exit(1); ++ ++ /* Stop and allow the tracer to clear XSTATE_BV for PKRU */ ++ raise(SIGSTOP); ++ ++ if (__read_pkey_reg() != 0) ++ exit(1); ++ ++ /* Stop and allow the tracer to examine PKRU */ ++ raise(SIGSTOP); ++ ++ exit(0); ++ } ++ ++ pkey_assert(child == waitpid(child, &status, 0)); ++ dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); ++ pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); ++ ++ xsave = (void *)malloc(xsave_size); ++ pkey_assert(xsave > 0); ++ ++ /* Modify the PKRU register directly */ ++ iov.iov_base = xsave; ++ iov.iov_len = xsave_size; ++ ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); ++ pkey_assert(ret == 0); ++ ++ pkey_register = (u32 *)(xsave + pkey_offset); ++ pkey_assert(*pkey_register == read_pkey_reg()); ++ ++ *pkey_register = new_pkru; ++ ++ ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov); ++ pkey_assert(ret == 0); ++ ++ /* Test that the modification is visible in ptrace before any execution */ ++ memset(xsave, 0xCC, xsave_size); ++ ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); ++ pkey_assert(ret == 0); ++ pkey_assert(*pkey_register == new_pkru); ++ ++ /* Execute the tracee */ ++ ret = ptrace(PTRACE_CONT, child, 0, 0); ++ pkey_assert(ret == 0); ++ ++ /* Test that the tracee saw the PKRU value change */ ++ pkey_assert(child == waitpid(child, &status, 0)); ++ dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); ++ pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); ++ ++ /* Test that the modification is visible in ptrace after execution */ ++ memset(xsave, 0xCC, xsave_size); ++ ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); ++ pkey_assert(ret == 0); ++ pkey_assert(*pkey_register == new_pkru); ++ ++ /* Clear the PKRU bit from XSTATE_BV */ ++ xstate_bv = (u64 *)(xsave + 512); ++ *xstate_bv &= ~(1 << 9); ++ ++ ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov); ++ pkey_assert(ret == 0); ++ ++ /* Test that the modification is visible in ptrace before any execution */ ++ memset(xsave, 0xCC, xsave_size); ++ ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); ++ pkey_assert(ret == 0); ++ pkey_assert(*pkey_register == 0); ++ ++ ret = ptrace(PTRACE_CONT, child, 0, 0); ++ pkey_assert(ret == 0); ++ ++ /* Test that the tracee saw the PKRU value go to 0 */ ++ pkey_assert(child == waitpid(child, &status, 0)); ++ dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); ++ pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); ++ ++ /* Test that the modification is visible in ptrace after execution */ ++ memset(xsave, 0xCC, xsave_size); ++ ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); ++ pkey_assert(ret == 0); ++ pkey_assert(*pkey_register == 0); ++ ++ ret = ptrace(PTRACE_CONT, child, 0, 0); ++ pkey_assert(ret == 0); ++ pkey_assert(child == waitpid(child, &status, 0)); ++ dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); ++ pkey_assert(WIFEXITED(status)); ++ pkey_assert(WEXITSTATUS(status) == 0); ++ free(xsave); ++} ++#endif ++ + void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) + { + int size = PAGE_SIZE; +@@ -1585,6 +1709,9 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = { + test_pkey_syscalls_bad_args, + test_pkey_alloc_exhaust, + test_pkey_alloc_free_attach_pkey0, ++#if defined(__i386__) || defined(__x86_64__) ++ test_ptrace_modifies_pkru, ++#endif + }; + + void run_tests_once(void) diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 60aa1a4fc69b6..138b011c667e2 100644 --- a/tools/testing/selftests/vm/userfaultfd.c @@ -455822,6 +583677,19 @@ index 60aa1a4fc69b6..138b011c667e2 100644 huge_fd, *alloc_area == area_src ? 0 : nr_pages * page_size); if (area_alias == MAP_FAILED) +diff --git a/tools/testing/selftests/vm/va_128TBswitch.c b/tools/testing/selftests/vm/va_128TBswitch.c +index 83acdff26a135..da6ec3b53ea8d 100644 +--- a/tools/testing/selftests/vm/va_128TBswitch.c ++++ b/tools/testing/selftests/vm/va_128TBswitch.c +@@ -9,7 +9,7 @@ + #include <sys/mman.h> + #include <string.h> + +-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) ++#include "../kselftest.h" + + #ifdef __powerpc64__ + #define PAGE_SIZE (64 << 10) diff --git a/tools/testing/selftests/vm/write_hugetlb_memory.sh b/tools/testing/selftests/vm/write_hugetlb_memory.sh index d3d0d108924d4..70a02301f4c27 100644 --- a/tools/testing/selftests/vm/write_hugetlb_memory.sh @@ -456572,6 +584440,121 @@ index 0d7bbe49359d8..1b25cc7c64bbd 100644 vpath %.c ../../drivers/virtio ../../drivers/vhost mod: ${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test V=${V} +diff --git a/tools/virtio/linux/bug.h b/tools/virtio/linux/bug.h +index 813baf13f62a2..51a919083d9b8 100644 +--- a/tools/virtio/linux/bug.h ++++ b/tools/virtio/linux/bug.h +@@ -1,13 +1,11 @@ + /* SPDX-License-Identifier: GPL-2.0 */ +-#ifndef BUG_H +-#define BUG_H ++#ifndef _LINUX_BUG_H ++#define _LINUX_BUG_H + + #include <asm/bug.h> + + #define BUG_ON(__BUG_ON_cond) assert(!(__BUG_ON_cond)) + +-#define BUILD_BUG_ON(x) +- + #define BUG() abort() + +-#endif /* BUG_H */ ++#endif /* _LINUX_BUG_H */ +diff --git a/tools/virtio/linux/build_bug.h b/tools/virtio/linux/build_bug.h +new file mode 100644 +index 0000000000000..cdbb75e28a604 +--- /dev/null ++++ b/tools/virtio/linux/build_bug.h +@@ -0,0 +1,7 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef _LINUX_BUILD_BUG_H ++#define _LINUX_BUILD_BUG_H ++ ++#define BUILD_BUG_ON(x) ++ ++#endif /* _LINUX_BUILD_BUG_H */ +diff --git a/tools/virtio/linux/cpumask.h b/tools/virtio/linux/cpumask.h +new file mode 100644 +index 0000000000000..307da69d6b26c +--- /dev/null ++++ b/tools/virtio/linux/cpumask.h +@@ -0,0 +1,7 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef _LINUX_CPUMASK_H ++#define _LINUX_CPUMASK_H ++ ++#include <linux/kernel.h> ++ ++#endif /* _LINUX_CPUMASK_H */ +diff --git a/tools/virtio/linux/gfp.h b/tools/virtio/linux/gfp.h +new file mode 100644 +index 0000000000000..43d146f236f14 +--- /dev/null ++++ b/tools/virtio/linux/gfp.h +@@ -0,0 +1,7 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef __LINUX_GFP_H ++#define __LINUX_GFP_H ++ ++#include <linux/topology.h> ++ ++#endif +diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h +index 0b493542e61a6..a4beb719d2174 100644 +--- a/tools/virtio/linux/kernel.h ++++ b/tools/virtio/linux/kernel.h +@@ -10,6 +10,7 @@ + #include <stdarg.h> + + #include <linux/compiler.h> ++#include <linux/log2.h> + #include <linux/types.h> + #include <linux/overflow.h> + #include <linux/list.h> +diff --git a/tools/virtio/linux/kmsan.h b/tools/virtio/linux/kmsan.h +new file mode 100644 +index 0000000000000..272b5aa285d5a +--- /dev/null ++++ b/tools/virtio/linux/kmsan.h +@@ -0,0 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef _LINUX_KMSAN_H ++#define _LINUX_KMSAN_H ++ ++#include <linux/gfp.h> ++ ++inline void kmsan_handle_dma(struct page *page, size_t offset, size_t size, ++ enum dma_data_direction dir) ++{ ++} ++ ++#endif /* _LINUX_KMSAN_H */ +diff --git a/tools/virtio/linux/scatterlist.h b/tools/virtio/linux/scatterlist.h +index 369ee308b6686..74d9e1825748e 100644 +--- a/tools/virtio/linux/scatterlist.h ++++ b/tools/virtio/linux/scatterlist.h +@@ -2,6 +2,7 @@ + #ifndef SCATTERLIST_H + #define SCATTERLIST_H + #include <linux/kernel.h> ++#include <linux/bug.h> + + struct scatterlist { + unsigned long page_link; +diff --git a/tools/virtio/linux/topology.h b/tools/virtio/linux/topology.h +new file mode 100644 +index 0000000000000..910794afb993a +--- /dev/null ++++ b/tools/virtio/linux/topology.h +@@ -0,0 +1,7 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef _LINUX_TOPOLOGY_H ++#define _LINUX_TOPOLOGY_H ++ ++#include <linux/cpumask.h> ++ ++#endif /* _LINUX_TOPOLOGY_H */ diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c index cb3f29c09aff3..23f142af544ad 100644 --- a/tools/virtio/virtio_test.c @@ -456584,6 +584567,48 @@ index cb3f29c09aff3..23f142af544ad 100644 dev->buf_size = 1024; dev->buf = malloc(dev->buf_size); assert(dev->buf); +diff --git a/tools/virtio/vringh_test.c b/tools/virtio/vringh_test.c +index fa87b58bd5fa5..98ff808d6f0c2 100644 +--- a/tools/virtio/vringh_test.c ++++ b/tools/virtio/vringh_test.c +@@ -308,6 +308,7 @@ static int parallel_test(u64 features, + + gvdev.vdev.features = features; + INIT_LIST_HEAD(&gvdev.vdev.vqs); ++ spin_lock_init(&gvdev.vdev.vqs_list_lock); + gvdev.to_host_fd = to_host[1]; + gvdev.notifies = 0; + +@@ -455,6 +456,7 @@ int main(int argc, char *argv[]) + getrange = getrange_iov; + vdev.features = 0; + INIT_LIST_HEAD(&vdev.vqs); ++ spin_lock_init(&vdev.vqs_list_lock); + + while (argv[1]) { + if (strcmp(argv[1], "--indirect") == 0) +diff --git a/tools/vm/slabinfo-gnuplot.sh b/tools/vm/slabinfo-gnuplot.sh +index 26e193ffd2a2f..873a892147e57 100644 +--- a/tools/vm/slabinfo-gnuplot.sh ++++ b/tools/vm/slabinfo-gnuplot.sh +@@ -150,7 +150,7 @@ do_preprocess() + let lines=3 + out=`basename "$in"`"-slabs-by-loss" + `cat "$in" | grep -A "$lines" 'Slabs sorted by loss' |\ +- egrep -iv '\-\-|Name|Slabs'\ ++ grep -E -iv '\-\-|Name|Slabs'\ + | awk '{print $1" "$4+$2*$3" "$4}' > "$out"` + if [ $? -eq 0 ]; then + do_slabs_plotting "$out" +@@ -159,7 +159,7 @@ do_preprocess() + let lines=3 + out=`basename "$in"`"-slabs-by-size" + `cat "$in" | grep -A "$lines" 'Slabs sorted by size' |\ +- egrep -iv '\-\-|Name|Slabs'\ ++ grep -E -iv '\-\-|Name|Slabs'\ + | awk '{print $1" "$4" "$4-$2*$3}' > "$out"` + if [ $? -eq 0 ]; then + do_slabs_plotting "$out" diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c index 9b68658b6bb85..0fffaeedee767 100644 --- a/tools/vm/slabinfo.c diff --git a/system/easy-kernel/0500-print-fw-info.patch b/system/easy-kernel/0500-print-fw-info.patch index ba8c60930d..340eb957cf 100644 --- a/system/easy-kernel/0500-print-fw-info.patch +++ b/system/easy-kernel/0500-print-fw-info.patch @@ -1,10 +1,11 @@ ---- a/drivers/base/firmware_loader/main.c 2021-08-24 15:42:07.025482085 -0400 -+++ b/drivers/base/firmware_loader/main.c 2021-08-24 15:44:40.782975313 -0400 -@@ -809,6 +809,9 @@ _request_firmware(const struct firmware +diff -Naur linux-5.15.98/drivers/base/firmware_loader/main.c linux-5.15-mc4/drivers/base/firmware_loader/main.c +--- linux-5.15.98/drivers/base/firmware_loader/main.c 2023-03-09 13:40:26.380005346 +1100 ++++ linux-5.15-mc4/drivers/base/firmware_loader/main.c 2023-03-09 13:48:55.620003403 +1100 +@@ -810,6 +810,9 @@ ret = _request_firmware_prepare(&fw, name, device, buf, size, offset, opt_flags); -+ ++ + printk(KERN_NOTICE "Loading firmware: %s\n", name); + if (ret <= 0) /* error or already assigned */ diff --git a/system/easy-kernel/1000-version.patch b/system/easy-kernel/1000-version.patch index 0e4210b605..e077ef48b7 100644 --- a/system/easy-kernel/1000-version.patch +++ b/system/easy-kernel/1000-version.patch @@ -4,10 +4,10 @@ diff -Naur linux-5.15/Makefile linux-5.15-branded/Makefile @@ -2,8 +2,8 @@ VERSION = 5 PATCHLEVEL = 15 - SUBLEVEL = 76 + SUBLEVEL = 98 -EXTRAVERSION = -NAME = Trick or Treat -+EXTRAVERSION = -mc3 ++EXTRAVERSION = -mc4 +NAME = Ponder the Icosahedron # *DOCUMENTATION* diff --git a/system/easy-kernel/APKBUILD b/system/easy-kernel/APKBUILD index a3fe0aad0d..58c594e63d 100644 --- a/system/easy-kernel/APKBUILD +++ b/system/easy-kernel/APKBUILD @@ -2,9 +2,9 @@ # Maintainer: Adelie Platform Group <adelie-devel@lists.adelielinux.org> # KEEP THIS IN SYNC with the other easy-kernel packages. _kflavour="" -_patchver=3 # must match 1000-version.patch +_patchver=4 # must match 1000-version.patch _pkgname=easy-kernel$_kflavour -pkgver=5.15.76 +pkgver=5.15.98 pkgrel=0 pkgname=$_pkgname-$pkgver-mc$_patchver pkgdesc="The Linux kernel, packaged for your convenience" @@ -32,7 +32,7 @@ source="https://cdn.kernel.org/pub/linux/kernel/v${_pkgmajver}.x/linux-${_pkgmin config-x86_64 kernel.h - 0100-linux-5.15.76.patch + 0100-linux-5.15.98.patch 0120-XATTR_USER_PREFIX.patch 0122-link-security-restrictions.patch 0124-bluetooth-keysize-check.patch @@ -154,16 +154,16 @@ src() { } sha512sums="d25ad40b5bcd6a4c6042fd0fd84e196e7a58024734c3e9a484fd0d5d54a0c1d87db8a3c784eff55e43b6f021709dc685eb0efa18d2aec327e4f88a79f405705a linux-5.15.tar.xz -025d721c9ef36ca62b18ff37d9a4ae34aeb420cbe5fb1d9a32081e0cac2693e0cd9cf8b87175920166bfa07011e66a7db802acd563d2153ebb1c21e5e4e99e41 config-aarch64 -9f5279d20fc6eaad78ab27b7fb86553e310369c8a68a2ff60c7cd9895febd3002cae748ad3a8b4fddbb62c6e829104138fc2bbca939e1c88c0bfcf7aa42809bf config-armv7 -d87b8052b5180e5a2ebfe248fae9917afad3aec4c156836106238368e7f990f9ac5e5f6fa4251cd240c3726bfb8bdab91467902d1ddf65305049a2e74ce2ba02 config-m68k -4f585e36cc0f4a8ec47a131f15fc25c2e36e42a2ec00ddbb8b2460ba136c63d70bd1d435b418ac23b5df75a51cb3d05a5e3e174c85aad5261f0d0d73f6353b30 config-pmmx -9a0010921060d791df9f492c37f6e607da27a060587393b308cc3da1acc3ca516917f023e5b649b3fa90792963a0edad1e703f880721cafe37d718b90ccd1cd0 config-ppc -c6b7f7cb0c6a155465b2db849187a95ff1a31dd157699019a3f3f44e522c62c6e6ec29532b1b1b0c508142e32200a5611e3be3d44d6ea750883808ac6578401c config-ppc64 -e0c859b385140a390ef696a6c26ff28843282d0a0d57cabb50b0d8da66bbb17c37ee5733010d57bd58c6eac5f4e1eb4accf33216d8638115d376f0a431803f78 config-sparc64 -d02dad2531c13ccb8dc9b0bdb31943ba12b374f559b6c0ffaac7cf7d3b809e35b83df705758ce1ea223611ffe6f399295a06286e6ad9033ae8b861ad4933f049 config-x86_64 +72eb26a8c0ce4ddfeae9c37425280b79ee1669d6434b445525da251247c683af64151ce3b46e7c9c7fbb31abfb3038c14d42864bbe17c797dc44ddf506d43c2b config-aarch64 +109cd698c92429c311fda768f8cde491dbde44457172d822b9a9096618e25fde61c46c3f34f3847a82ac9506d56b7222f93580ecd5dc0d77950e649c49c968f1 config-armv7 +da6b532f9e4faa1a7406eecf47fab7e1ddc5a0414ad3cf4d2acd44ff19cebb63ffab0b463e57edde1901088f0e3d8a088a7e9337dc4a2072e70172d129f71d17 config-m68k +3e337aa5e468103b782ff201570970576b8f4a105824867295365c7c6378439e8cbc8ca146b3ac4d1e38e2c3a157c8d4481891dd13c93a81e7b66d36cb26fa34 config-pmmx +0bf9faf114a337acf172656efb2c52a0f618c59a76c20b30a133840f62aabd8b3cfc4dc905f07b118cf50c29439d8e710ebd04ea8b16cd4176596530d42835c0 config-ppc +5f4ce1e31b7c1ad9bc1a2188a3c0fc6a55a2afe206f33b426bbea377e9287aa2ca43fee456e17448564bb60d7ff9a452d402ab804db60d3004dfc3cbff5764b9 config-ppc64 +d1cd7a398dc65d814c8b4b9fb01e87dcf14f42eae237369be54520c133cf7b84415f6959feb56b8cd98dc6eadd7904977241cf12aadd257ecae4c3acd3d2cb7f config-sparc64 +9a1aa926172b86c37666127c4c06947d7c28c98d8e4d95b86fcb724c9846935881093ada10f5c68ee17706bc642d5a9731cc33e7cad303a9d8a6a422a12fc038 config-x86_64 1ca1bc094a8106b33176ab6cfb55c9514e7d69f1ed30c580eee419a3c422a2e6625d71c4581702e911f36c4bbac175010f46b7123bb81fda3303d4decdc57287 kernel.h -9e2e5c0f45c532f3782c575d2d1b1dd34c94ecb21ae19798284c118e56dedfd08b3956892639e54550da94e808ba4e3c73b8e665e44993d6c9d2ec2fdf193946 0100-linux-5.15.76.patch +0222e2313f17f869d356d54813e0868f6003c4d2f2a63972c073afbe2b40355057cbe878bdc18425fee66f6ff873a9086730abf9de47e3be55b22df87f1b9910 0100-linux-5.15.98.patch 3ed100909f9aed72836a3c712e45e0116cd3c4331961a76a27b867a7098d0df9458387b656c9ea01385c3c37585436e48168ac35666b0e46dca7da05e5e38a61 0120-XATTR_USER_PREFIX.patch c97a3799a2d5e4da9c9dfe129756da629fba8183479b02ca82f9b6d9993f17a165a96bd35ac50eb25fb293785b9b529a95165b1a2eb79c05134bee8ccf22a5d3 0122-link-security-restrictions.patch dc47b18749d95a456f8bc47fd6a0618c286b646b38466c3d950dfbeb25adf3fc1a794e95552e4da1abb58e49f0bd841f7222e71c4d04cb0264ca23476ca9caef 0124-bluetooth-keysize-check.patch @@ -172,9 +172,9 @@ dc47b18749d95a456f8bc47fd6a0618c286b646b38466c3d950dfbeb25adf3fc1a794e95552e4da1 ed92b74c9ea406eb65a304b3c5b93de69167569c70d4d5027ae0a83c59159596ce88bd433573832e36bc1d7fb66b31d20921aa2bc583c7fbd9bf5563075c4c41 0255-ultra-ksm.patch 5f74e6a72876d3cf3b3188a43b999b981b6ea0ca401ad72b3c7d5cc65bf505f50e7ee17d435ec95b7a012dc92e6540aea1bdb501f48690c242705c47d2403513 0260-reduce-swappiness.patch 4e637935c2f37cc18f347293e3c94b18f90e2caccca726304a95c4891257a5b2bb3093aee7a97571038b29c0c987cc60a9a80aefd0d4c9a063b33d102f03579e 0300-tmp513-regression-fix.patch -8ddac562bd16fd96aea4b5165cf4a93eaee49011805c5d648913cea6865a1415c61aed5a34bfc319e4cd394dbaebad1360a6e07dab6e02b6b425a0e414107984 0500-print-fw-info.patch +dfd940af1d14e8a2cc1fcffbfa0fa8db52accf67cad3a7f435fc84047c945ba64dc3c45eb8dbbcc82715d8b8f2fbafa1e22b123158029016c5506cc134a7842c 0500-print-fw-info.patch f0e532539e93d19fc65b417b4a0663e3757823340b968f63bd3a2665f99524feebb843ecf88ccf6909f93a8e7e9290721677c8f43bc3a2a37d99a51c1281a469 0502-gcc9-kcflags.patch -8e5b1357056c31a865bf00ec5999e8df6836674ebe99a6a6f3cc460d91b45075a9436e891948f04f8b3732c52b6f9272ff4c4bb02a5b4b0bc92fc77ea53a3081 1000-version.patch +407b7ee1cd1a7273c914be3ff5388d55d407b95aadd7032a6dd228438c61cc50233c061e006538f9c7a2b989602003bdd5c189e67afe8e5cbc2c2c1196263f31 1000-version.patch 03a73db9eda84a52315499cb511f730946939d2de1b3aa52c60f9bd3a364377a65ddf2b62f505689a84d3e2f0fc7da5ca90429629d93d9909360ee0c3c599bbe no-require-gnu-tar.patch aadf8a3cc46a08e3a396ebd45656aee235103db7a2155cc6980df20b750151a9938b8b73c9319c6cd1b5f8aba6ce707f857a47dabf69df8d91dd93d440cffcb5 no-require-lilo.patch 7bb07eb22002cc48caf0cd55d17ce4097aa583e0ca4048c11c92e1519761b2ae982ffe98311543d4b0dfc991c8bc411b2e1c7be9488b6c6f19ffaa08e69e2f47 no-autoload-fb.conf diff --git a/system/easy-kernel/config-aarch64 b/system/easy-kernel/config-aarch64 index b13f8d7b72..c695d508b5 100644 --- a/system/easy-kernel/config-aarch64 +++ b/system/easy-kernel/config-aarch64 @@ -1,6 +1,6 @@ # # Automatically generated file; DO NOT EDIT. -# Linux/arm64 5.15.76-mc3 Kernel Configuration +# Linux/arm64 5.15.98-mc4 Kernel Configuration # CONFIG_CC_VERSION_TEXT="gcc (Adelie 8.5.0) 8.5.0" CONFIG_CC_IS_GCC=y @@ -16,6 +16,7 @@ CONFIG_CC_CAN_LINK_STATIC=y CONFIG_CC_HAS_ASM_GOTO=y CONFIG_CC_HAS_ASM_INLINE=y CONFIG_CC_HAS_NO_PROFILE_FN_ATTR=y +CONFIG_PAHOLE_VERSION=0 CONFIG_IRQ_WORK=y CONFIG_BUILDTIME_TABLE_SORT=y CONFIG_THREAD_INFO_IN_TASK=y @@ -289,6 +290,7 @@ CONFIG_FIX_EARLYCON_MEM=y CONFIG_PGTABLE_LEVELS=4 CONFIG_ARCH_SUPPORTS_UPROBES=y CONFIG_ARCH_PROC_KCORE_TEXT=y +CONFIG_BROKEN_GAS_INST=y # # Platform selection @@ -350,7 +352,6 @@ CONFIG_ARM64_ERRATUM_834220=y CONFIG_ARM64_ERRATUM_1742098=y CONFIG_ARM64_ERRATUM_845719=y CONFIG_ARM64_ERRATUM_843419=y -CONFIG_ARM64_LD_HAS_FIX_ERRATUM_843419=y CONFIG_ARM64_ERRATUM_1024718=y CONFIG_ARM64_ERRATUM_1418040=y CONFIG_ARM64_WORKAROUND_SPECULATIVE_AT=y @@ -435,9 +436,6 @@ CONFIG_CP15_BARRIER_EMULATION=y # CONFIG_ARM64_HW_AFDBM=y CONFIG_ARM64_PAN=y -CONFIG_AS_HAS_LDAPR=y -CONFIG_AS_HAS_LSE_ATOMICS=y -CONFIG_ARM64_LSE_ATOMICS=y CONFIG_ARM64_USE_LSE_ATOMICS=y # end of ARMv8.1 architectural features @@ -453,22 +451,17 @@ CONFIG_ARM64_CNP=y # ARMv8.3 architectural features # # CONFIG_ARM64_PTR_AUTH is not set -CONFIG_CC_HAS_SIGN_RETURN_ADDRESS=y -CONFIG_AS_HAS_PAC=y # end of ARMv8.3 architectural features # # ARMv8.4 architectural features # CONFIG_ARM64_AMU_EXTN=y -CONFIG_AS_HAS_ARMV8_4=y -CONFIG_ARM64_TLB_RANGE=y # end of ARMv8.4 architectural features # # ARMv8.5 architectural features # -CONFIG_AS_HAS_ARMV8_5=y CONFIG_ARM64_BTI=y CONFIG_ARM64_E0PD=y CONFIG_ARCH_RANDOM=y @@ -1086,6 +1079,7 @@ CONFIG_INET_ESP=m CONFIG_INET_ESP_OFFLOAD=m # CONFIG_INET_ESPINTCP is not set CONFIG_INET_IPCOMP=m +CONFIG_INET_TABLE_PERTURB_ORDER=16 CONFIG_INET_XFRM_TUNNEL=m CONFIG_INET_TUNNEL=m CONFIG_INET_DIAG=m @@ -7410,7 +7404,6 @@ CONFIG_EXTCON=y # CONFIG_EXTCON_SM5502 is not set CONFIG_EXTCON_USB_GPIO=y # CONFIG_EXTCON_USBC_CROS_EC is not set -# CONFIG_EXTCON_USBC_TUSB320 is not set CONFIG_MEMORY=y # CONFIG_ARM_PL172_MPMC is not set CONFIG_MTK_SMI=m diff --git a/system/easy-kernel/config-armv7 b/system/easy-kernel/config-armv7 index 24bba8d1f9..19a97fceb2 100644 --- a/system/easy-kernel/config-armv7 +++ b/system/easy-kernel/config-armv7 @@ -1,25 +1,29 @@ # # Automatically generated file; DO NOT EDIT. -# Linux/arm 5.4.66-mc1 Kernel Configuration -# - -# -# Compiler: gcc (Gentoo Hardened 10.2.0-r1 p2) 10.2.0 +# Linux/arm 5.15.98-mc4 Kernel Configuration # +CONFIG_CC_VERSION_TEXT="gcc (Adelie 8.5.0) 8.5.0" CONFIG_CC_IS_GCC=y -CONFIG_GCC_VERSION=100200 +CONFIG_GCC_VERSION=80500 CONFIG_CLANG_VERSION=0 -CONFIG_CC_CAN_LINK=y +CONFIG_AS_IS_GNU=y +CONFIG_AS_VERSION=23200 +CONFIG_LD_IS_BFD=y +CONFIG_LD_VERSION=23200 +CONFIG_LLD_VERSION=0 CONFIG_CC_HAS_ASM_GOTO=y CONFIG_CC_HAS_ASM_INLINE=y +CONFIG_CC_HAS_NO_PROFILE_FN_ATTR=y +CONFIG_PAHOLE_VERSION=0 CONFIG_IRQ_WORK=y -CONFIG_BUILDTIME_EXTABLE_SORT=y +CONFIG_BUILDTIME_TABLE_SORT=y # # General setup # CONFIG_INIT_ENV_ARG_LIMIT=32 # CONFIG_COMPILE_TEST is not set +# CONFIG_WERROR is not set CONFIG_LOCALVERSION="-easy" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_BUILD_SALT="" @@ -33,12 +37,14 @@ CONFIG_HAVE_KERNEL_LZ4=y # CONFIG_KERNEL_XZ is not set CONFIG_KERNEL_LZO=y # CONFIG_KERNEL_LZ4 is not set +CONFIG_DEFAULT_INIT="/sbin/init" CONFIG_DEFAULT_HOSTNAME="adelie" CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y CONFIG_POSIX_MQUEUE_SYSCTL=y +CONFIG_WATCH_QUEUE=y CONFIG_CROSS_MEMORY_ATTACH=y # CONFIG_USELIB is not set # CONFIG_AUDIT is not set @@ -56,6 +62,8 @@ CONFIG_HARDIRQS_SW_RESEND=y CONFIG_GENERIC_IRQ_CHIP=y CONFIG_IRQ_DOMAIN=y CONFIG_IRQ_DOMAIN_HIERARCHY=y +CONFIG_IRQ_FASTEOI_HIERARCHY_HANDLERS=y +CONFIG_GENERIC_IRQ_IPI=y CONFIG_GENERIC_MSI_IRQ=y CONFIG_GENERIC_MSI_IRQ_DOMAIN=y CONFIG_HANDLE_DOMAIN_IRQ=y @@ -65,7 +73,6 @@ CONFIG_SPARSE_IRQ=y # end of IRQ subsystem CONFIG_GENERIC_IRQ_MULTI_HANDLER=y -CONFIG_ARCH_CLOCKSOURCE_DATA=y CONFIG_GENERIC_TIME_VSYSCALL=y CONFIG_GENERIC_CLOCKEVENTS=y CONFIG_ARCH_HAS_TICK_BROADCAST=y @@ -83,9 +90,25 @@ CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y # end of Timers subsystem +CONFIG_BPF=y +CONFIG_HAVE_EBPF_JIT=y + +# +# BPF subsystem +# +CONFIG_BPF_SYSCALL=y +CONFIG_BPF_JIT=y +CONFIG_BPF_JIT_ALWAYS_ON=y +CONFIG_BPF_JIT_DEFAULT_ON=y +# CONFIG_BPF_UNPRIV_DEFAULT_OFF is not set +# CONFIG_BPF_PRELOAD is not set +# CONFIG_BPF_LSM is not set +# end of BPF subsystem + # CONFIG_PREEMPT_NONE is not set CONFIG_PREEMPT_VOLUNTARY=y # CONFIG_PREEMPT is not set +CONFIG_SCHED_CORE=y # # CPU/Task time and stats accounting @@ -94,6 +117,7 @@ CONFIG_TICK_CPU_ACCOUNTING=y # CONFIG_VIRT_CPU_ACCOUNTING_GEN is not set # CONFIG_IRQ_TIME_ACCOUNTING is not set CONFIG_HAVE_SCHED_AVG_IRQ=y +CONFIG_SCHED_THERMAL_PRESSURE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_TASKSTATS=y @@ -113,6 +137,8 @@ CONFIG_TREE_RCU=y # CONFIG_RCU_EXPERT is not set CONFIG_SRCU=y CONFIG_TREE_SRCU=y +CONFIG_TASKS_RCU_GENERIC=y +CONFIG_TASKS_TRACE_RCU=y CONFIG_RCU_STALL_COMMON=y CONFIG_RCU_NEED_SEGCBLIST=y # end of RCU Subsystem @@ -123,6 +149,7 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_LOG_CPU_MAX_BUF_SHIFT=12 CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=12 +# CONFIG_PRINTK_INDEX is not set CONFIG_GENERIC_SCHED_CLOCK=y # @@ -135,7 +162,6 @@ CONFIG_CGROUPS=y CONFIG_PAGE_COUNTER=y CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y -CONFIG_MEMCG_SWAP_ENABLED=y CONFIG_MEMCG_KMEM=y CONFIG_BLK_CGROUP=y CONFIG_CGROUP_WRITEBACK=y @@ -153,6 +179,7 @@ CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_PERF=y CONFIG_CGROUP_BPF=y +# CONFIG_CGROUP_MISC is not set # CONFIG_CGROUP_DEBUG is not set CONFIG_SOCK_CGROUP_DATA=y CONFIG_NAMESPACES=y @@ -173,26 +200,27 @@ CONFIG_RD_LZMA=y CONFIG_RD_XZ=y CONFIG_RD_LZO=y CONFIG_RD_LZ4=y +CONFIG_RD_ZSTD=y +CONFIG_BOOT_CONFIG=y CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set +CONFIG_LD_ORPHAN_WARN=y CONFIG_SYSCTL=y CONFIG_HAVE_UID16=y -CONFIG_BPF=y CONFIG_EXPERT=y # CONFIG_UID16 is not set CONFIG_MULTIUSER=y # CONFIG_SGETMASK_SYSCALL is not set # CONFIG_SYSFS_SYSCALL is not set -# CONFIG_SYSCTL_SYSCALL is not set CONFIG_FHANDLE=y CONFIG_POSIX_TIMERS=y CONFIG_PRINTK=y -CONFIG_PRINTK_NMI=y CONFIG_BUG=y CONFIG_ELF_CORE=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_FUTEX_PI=y +CONFIG_HAVE_FUTEX_CMPXCHG=y CONFIG_EPOLL=y CONFIG_SIGNALFD=y CONFIG_TIMERFD=y @@ -205,10 +233,9 @@ CONFIG_MEMBARRIER=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set CONFIG_KALLSYMS_BASE_RELATIVE=y -CONFIG_BPF_SYSCALL=y -CONFIG_BPF_JIT_ALWAYS_ON=y CONFIG_USERFAULTFD=y CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE=y +CONFIG_KCMP=y CONFIG_RSEQ=y # CONFIG_DEBUG_RSEQ is not set # CONFIG_EMBEDDED is not set @@ -225,7 +252,6 @@ CONFIG_PERF_EVENTS=y CONFIG_VM_EVENT_COUNTERS=y # CONFIG_SLUB_DEBUG is not set -# CONFIG_SLUB_MEMCG_SYSFS_ON is not set # CONFIG_COMPAT_BRK is not set # CONFIG_SLAB is not set CONFIG_SLUB=y @@ -249,12 +275,10 @@ CONFIG_HAVE_PROC_CPU=y CONFIG_NO_IOPORT_MAP=y CONFIG_STACKTRACE_SUPPORT=y CONFIG_LOCKDEP_SUPPORT=y -CONFIG_TRACE_IRQFLAGS_SUPPORT=y CONFIG_ARCH_HAS_BANDGAP=y CONFIG_FIX_EARLYCON_MEM=y CONFIG_GENERIC_HWEIGHT=y CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_ZONE_DMA=y CONFIG_ARCH_SUPPORTS_UPROBES=y CONFIG_ARM_PATCH_PHYS_VIRT=y CONFIG_GENERIC_BUG=y @@ -267,7 +291,6 @@ CONFIG_MMU=y CONFIG_ARCH_MMAP_RND_BITS_MIN=8 CONFIG_ARCH_MMAP_RND_BITS_MAX=16 CONFIG_ARCH_MULTIPLATFORM=y -# CONFIG_ARCH_EBSA110 is not set # CONFIG_ARCH_EP93XX is not set # CONFIG_ARCH_FOOTBRIDGE is not set # CONFIG_ARCH_IOP32X is not set @@ -331,7 +354,7 @@ CONFIG_ARCH_EXYNOS4=y CONFIG_ARCH_EXYNOS5=y # -# EXYNOS SoCs +# Exynos SoCs # CONFIG_SOC_EXYNOS3250=y CONFIG_CPU_EXYNOS4210=y @@ -343,21 +366,6 @@ CONFIG_SOC_EXYNOS5420=y CONFIG_SOC_EXYNOS5800=y CONFIG_EXYNOS_MCPM=y CONFIG_EXYNOS_CPU_SUSPEND=y -CONFIG_PLAT_SAMSUNG=y - -# -# Samsung Common options -# - -# -# Boot options -# - -# -# Power management -# -# end of Samsung Common options - # CONFIG_ARCH_HIGHBANK is not set # CONFIG_ARCH_HISI is not set CONFIG_ARCH_MXC=y @@ -366,10 +374,6 @@ CONFIG_HAVE_IMX_GPC=y CONFIG_HAVE_IMX_MMDC=y CONFIG_HAVE_IMX_SRC=y -# -# Device tree only -# - # # Cortex-A platforms # @@ -395,6 +399,7 @@ CONFIG_SOC_IMX6UL=y # CONFIG_ARCH_MESON is not set # CONFIG_ARCH_MILBEAUT is not set # CONFIG_ARCH_MMP is not set +# CONFIG_ARCH_MSTARV7 is not set # CONFIG_ARCH_MVEBU is not set # CONFIG_ARCH_NPCM is not set CONFIG_ARCH_OMAP=y @@ -417,6 +422,7 @@ CONFIG_MACH_OMAP_GENERIC=y # # TI OMAP/AM/DM/DRA Family # +CONFIG_OMAP_HWMOD=y CONFIG_ARCH_OMAP3=y CONFIG_ARCH_OMAP4=y CONFIG_SOC_OMAP5=y @@ -434,27 +440,24 @@ CONFIG_SOC_HAS_OMAP2_SDRC=y CONFIG_SOC_HAS_REALTIME_COUNTER=y CONFIG_SOC_OMAP3430=y CONFIG_SOC_TI81XX=y -CONFIG_OMAP_PACKAGE_CBB=y # # OMAP Legacy Platform Data Board Type # -CONFIG_MACH_OMAP3517EVM=y -CONFIG_MACH_OMAP3_PANDORA=y # CONFIG_OMAP3_SDRC_AC_TIMING is not set # end of TI OMAP2/3/4 Specific Features # CONFIG_OMAP5_ERRATA_801819 is not set # end of TI OMAP/AM/DM/DRA Family -# CONFIG_ARCH_SIRF is not set # CONFIG_ARCH_QCOM is not set # CONFIG_ARCH_RDA is not set +# CONFIG_ARCH_REALTEK is not set # CONFIG_ARCH_REALVIEW is not set CONFIG_ARCH_ROCKCHIP=y # CONFIG_ARCH_S5PV210 is not set # CONFIG_ARCH_RENESAS is not set -# CONFIG_ARCH_SOCFPGA is not set +# CONFIG_ARCH_INTEL_SOCFPGA is not set # CONFIG_PLAT_SPEAR is not set # CONFIG_ARCH_STI is not set # CONFIG_ARCH_STM32 is not set @@ -466,7 +469,6 @@ CONFIG_MACH_SUN7I=y CONFIG_MACH_SUN8I=y CONFIG_MACH_SUN9I=y CONFIG_ARCH_SUNXI_MC_SMP=y -# CONFIG_ARCH_TANGO is not set # CONFIG_ARCH_TEGRA is not set # CONFIG_ARCH_UNIPHIER is not set # CONFIG_ARCH_U8500 is not set @@ -476,7 +478,6 @@ CONFIG_ARCH_VEXPRESS_CORTEX_A5_A9_ERRATA=y CONFIG_ARCH_VEXPRESS_SPC=y CONFIG_ARCH_VEXPRESS_TC2_PM=y # CONFIG_ARCH_WM8850 is not set -# CONFIG_ARCH_ZX is not set # CONFIG_ARCH_ZYNQ is not set CONFIG_PLAT_VERSATILE=y @@ -511,6 +512,7 @@ CONFIG_SWP_EMULATE=y # CONFIG_CPU_BPREDICT_DISABLE is not set CONFIG_CPU_SPECTRE=y CONFIG_HARDEN_BRANCH_PREDICTOR=y +CONFIG_HARDEN_BRANCH_HISTORY=y CONFIG_KUSER_HELPERS=y CONFIG_VDSO=y CONFIG_OUTER_CACHE=y @@ -584,22 +586,19 @@ CONFIG_HZ_100=y CONFIG_HZ=100 CONFIG_SCHED_HRTICK=y CONFIG_THUMB2_KERNEL=y -CONFIG_THUMB2_AVOID_R_ARM_THM_JUMP11=y CONFIG_ARM_PATCH_IDIV=y CONFIG_AEABI=y -CONFIG_ARCH_HAS_HOLES_MEMORYMODEL=y -CONFIG_HAVE_ARCH_PFN_VALID=y +CONFIG_ARCH_SELECT_MEMORY_MODEL=y +CONFIG_ARCH_FLATMEM_ENABLE=y +CONFIG_ARCH_SPARSEMEM_ENABLE=y CONFIG_HIGHMEM=y CONFIG_HIGHPTE=y CONFIG_HW_PERF_EVENTS=y -CONFIG_SYS_SUPPORTS_HUGETLBFS=y -CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE=y CONFIG_ARCH_WANT_GENERAL_HUGETLB=y CONFIG_ARM_MODULE_PLTS=y CONFIG_FORCE_MAX_ZONEORDER=12 CONFIG_ALIGNMENT_TRAP=y # CONFIG_UACCESS_WITH_MEMCPY is not set -CONFIG_SECCOMP=y CONFIG_PARAVIRT=y CONFIG_PARAVIRT_TIME_ACCOUNTING=y CONFIG_XEN_DOM0=y @@ -658,13 +657,12 @@ CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y CONFIG_CPUFREQ_DT=y CONFIG_CPUFREQ_DT_PLATDEV=y # CONFIG_ARM_ALLWINNER_SUN50I_CPUFREQ_NVMEM is not set -# CONFIG_ARM_BIG_LITTLE_CPUFREQ is not set +# CONFIG_ARM_VEXPRESS_SPC_CPUFREQ is not set CONFIG_ARM_IMX6Q_CPUFREQ=y # CONFIG_ARM_IMX_CPUFREQ_DT is not set # CONFIG_ARM_OMAP2PLUS_CPUFREQ is not set CONFIG_ARM_RASPBERRYPI_CPUFREQ=y # CONFIG_ARM_TI_CPUFREQ is not set -CONFIG_QORIQ_CPUFREQ=y # end of CPU Frequency scaling # @@ -712,6 +710,7 @@ CONFIG_SUSPEND_FREEZER=y # CONFIG_SUSPEND_SKIP_SYNC is not set CONFIG_HIBERNATE_CALLBACKS=y CONFIG_HIBERNATION=y +CONFIG_HIBERNATION_SNAPSHOT_DEV=y CONFIG_PM_STD_PARTITION="" CONFIG_PM_SLEEP=y CONFIG_PM_SLEEP_SMP=y @@ -736,42 +735,6 @@ CONFIG_ARM_CPU_SUSPEND=y CONFIG_ARCH_HIBERNATION_POSSIBLE=y # end of Power management options -# -# Firmware Drivers -# -# CONFIG_ARM_SCMI_PROTOCOL is not set -CONFIG_ARM_SCPI_PROTOCOL=m -CONFIG_ARM_SCPI_POWER_DOMAIN=m -CONFIG_FIRMWARE_MEMMAP=y -CONFIG_DMIID=y -CONFIG_DMI_SYSFS=y -CONFIG_RASPBERRYPI_FIRMWARE=y -CONFIG_TRUSTED_FOUNDATIONS=y -CONFIG_HAVE_ARM_SMCCC=y -CONFIG_ARM_PSCI_FW=y -# CONFIG_ARM_PSCI_CHECKER is not set -# CONFIG_GOOGLE_FIRMWARE is not set - -# -# EFI (Extensible Firmware Interface) Support -# -# CONFIG_EFI_VARS is not set -CONFIG_EFI_ESRT=y -CONFIG_EFI_PARAMS_FROM_FDT=y -CONFIG_EFI_RUNTIME_WRAPPERS=y -CONFIG_EFI_ARMSTUB=y -CONFIG_EFI_ARMSTUB_DTB_LOADER=y -# CONFIG_EFI_CAPSULE_LOADER is not set -# CONFIG_EFI_TEST is not set -# CONFIG_RESET_ATTACK_MITIGATION is not set -# end of EFI (Extensible Firmware Interface) Support - -# -# Tegra firmware driver -# -# end of Tegra firmware driver -# end of Firmware Drivers - CONFIG_ARM_CRYPTO=y CONFIG_CRYPTO_SHA1_ARM=m CONFIG_CRYPTO_SHA1_ARM_NEON=m @@ -779,6 +742,8 @@ CONFIG_CRYPTO_SHA1_ARM_CE=m CONFIG_CRYPTO_SHA2_ARM_CE=m CONFIG_CRYPTO_SHA256_ARM=m CONFIG_CRYPTO_SHA512_ARM=m +CONFIG_CRYPTO_BLAKE2S_ARM=y +# CONFIG_CRYPTO_BLAKE2B_NEON is not set CONFIG_CRYPTO_AES_ARM=m CONFIG_CRYPTO_AES_ARM_BS=m CONFIG_CRYPTO_AES_ARM_CE=m @@ -786,33 +751,15 @@ CONFIG_CRYPTO_GHASH_ARM_CE=m # CONFIG_CRYPTO_CRCT10DIF_ARM_CE is not set CONFIG_CRYPTO_CRC32_ARM_CE=m CONFIG_CRYPTO_CHACHA20_NEON=m +CONFIG_CRYPTO_POLY1305_ARM=m # CONFIG_CRYPTO_NHPOLY1305_NEON is not set -CONFIG_HAVE_KVM_IRQCHIP=y -CONFIG_HAVE_KVM_IRQFD=y -CONFIG_HAVE_KVM_IRQ_ROUTING=y -CONFIG_HAVE_KVM_EVENTFD=y -CONFIG_KVM_MMIO=y -CONFIG_HAVE_KVM_MSI=y -CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT=y -CONFIG_KVM_VFIO=y -CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL=y -CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT=y -CONFIG_HAVE_KVM_IRQ_BYPASS=y -CONFIG_IRQ_BYPASS_MANAGER=y -CONFIG_VIRTUALIZATION=y -CONFIG_KVM=y -CONFIG_KVM_ARM_HOST=y -CONFIG_VHOST_NET=m -CONFIG_VHOST=m -# CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set +CONFIG_CRYPTO_CURVE25519_NEON=m # # General architecture-dependent options # CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y -CONFIG_OPROFILE=m -CONFIG_HAVE_OPROFILE=y CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y # CONFIG_STATIC_KEYS_SELFTEST is not set @@ -823,6 +770,7 @@ CONFIG_KRETPROBES=y CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_NMI=y +CONFIG_TRACE_IRQFLAGS_SUPPORT=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_DMA_CONTIGUOUS=y CONFIG_GENERIC_SMP_IDLE_THREAD=y @@ -834,22 +782,27 @@ CONFIG_HAVE_ARCH_THREAD_STRUCT_WHITELIST=y CONFIG_ARCH_32BIT_OFF_T=y CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y CONFIG_HAVE_RSEQ=y -CONFIG_HAVE_CLK=y CONFIG_HAVE_HW_BREAKPOINT=y CONFIG_HAVE_PERF_REGS=y CONFIG_HAVE_PERF_USER_STACK_DUMP=y CONFIG_HAVE_ARCH_JUMP_LABEL=y -CONFIG_HAVE_RCU_TABLE_FREE=y +CONFIG_MMU_GATHER_TABLE_FREE=y +CONFIG_MMU_GATHER_RCU_TABLE_FREE=y +CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG=y CONFIG_ARCH_WANT_IPC_PARSE_VERSION=y +CONFIG_HAVE_ARCH_SECCOMP=y CONFIG_HAVE_ARCH_SECCOMP_FILTER=y +CONFIG_SECCOMP=y CONFIG_SECCOMP_FILTER=y +# CONFIG_SECCOMP_CACHE_DEBUG is not set CONFIG_HAVE_STACKPROTECTOR=y -CONFIG_CC_HAS_STACKPROTECTOR_NONE=y CONFIG_STACKPROTECTOR=y CONFIG_STACKPROTECTOR_STRONG=y +CONFIG_LTO_NONE=y CONFIG_HAVE_CONTEXT_TRACKING=y CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y CONFIG_HAVE_IRQ_TIME_ACCOUNTING=y +CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE=y CONFIG_HAVE_MOD_ARCH_SPECIFIC=y CONFIG_MODULES_USE_ELF_REL=y CONFIG_ARCH_HAS_ELF_RANDOMIZE=y @@ -857,7 +810,6 @@ CONFIG_HAVE_ARCH_MMAP_RND_BITS=y CONFIG_HAVE_EXIT_THREAD=y CONFIG_ARCH_MMAP_RND_BITS=8 CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT=y -CONFIG_HAVE_COPY_THREAD_TLS=y CONFIG_CLONE_BACKWARDS=y CONFIG_OLD_SIGSUSPEND3=y CONFIG_OLD_SIGACTION=y @@ -869,8 +821,9 @@ CONFIG_STRICT_KERNEL_RWX=y CONFIG_ARCH_HAS_STRICT_MODULE_RWX=y CONFIG_STRICT_MODULE_RWX=y CONFIG_ARCH_HAS_PHYS_TO_DMA=y -CONFIG_REFCOUNT_FULL=y # CONFIG_LOCK_EVENT_COUNTS is not set +CONFIG_ARCH_WANT_LD_ORPHAN_WARN=y +CONFIG_HAVE_ARCH_PFN_VALID=y # # GCOV-based kernel profiling @@ -879,7 +832,6 @@ CONFIG_REFCOUNT_FULL=y CONFIG_ARCH_HAS_GCOV_PROFILE_ALL=y # end of GCOV-based kernel profiling -CONFIG_PLUGIN_HOSTCC="g++" CONFIG_HAVE_GCC_PLUGINS=y # CONFIG_GCC_PLUGINS is not set # end of General architecture-dependent options @@ -902,29 +854,32 @@ CONFIG_MODULE_SIG_ALL=y # CONFIG_MODULE_SIG_SHA384 is not set CONFIG_MODULE_SIG_SHA512=y CONFIG_MODULE_SIG_HASH="sha512" -CONFIG_MODULE_COMPRESS=y +# CONFIG_MODULE_COMPRESS_NONE is not set # CONFIG_MODULE_COMPRESS_GZIP is not set CONFIG_MODULE_COMPRESS_XZ=y +# CONFIG_MODULE_COMPRESS_ZSTD is not set # CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS is not set -# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_MODPROBE_PATH="/sbin/modprobe" # CONFIG_TRIM_UNUSED_KSYMS is not set CONFIG_MODULES_TREE_LOOKUP=y CONFIG_BLOCK=y CONFIG_BLK_RQ_ALLOC_TIME=y -CONFIG_BLK_SCSI_REQUEST=y -CONFIG_BLK_DEV_BSG=y +CONFIG_BLK_CGROUP_RWSTAT=y +CONFIG_BLK_DEV_BSG_COMMON=y CONFIG_BLK_DEV_BSGLIB=y CONFIG_BLK_DEV_INTEGRITY=y +CONFIG_BLK_DEV_INTEGRITY_T10=y # CONFIG_BLK_DEV_ZONED is not set CONFIG_BLK_DEV_THROTTLING=y # CONFIG_BLK_DEV_THROTTLING_LOW is not set -# CONFIG_BLK_CMDLINE_PARSER is not set CONFIG_BLK_WBT=y +CONFIG_BLK_WBT_MQ=y # CONFIG_BLK_CGROUP_IOLATENCY is not set CONFIG_BLK_CGROUP_IOCOST=y -CONFIG_BLK_WBT_MQ=y +# CONFIG_BLK_CGROUP_IOPRIO is not set CONFIG_BLK_DEBUG_FS=y CONFIG_BLK_SED_OPAL=y +# CONFIG_BLK_INLINE_ENCRYPTION is not set # # Partition Types @@ -955,6 +910,7 @@ CONFIG_EFI_PARTITION=y CONFIG_BLK_MQ_PCI=y CONFIG_BLK_MQ_VIRTIO=y CONFIG_BLK_PM=y +CONFIG_BLOCK_HOLDER_DEPRECATED=y # # IO Schedulers @@ -966,7 +922,6 @@ CONFIG_BFQ_GROUP_IOSCHED=y CONFIG_BFQ_CGROUP_DEBUG=y # end of IO Schedulers -CONFIG_PREEMPT_NOTIFIERS=y CONFIG_ASN1=y CONFIG_INLINE_SPIN_UNLOCK_IRQ=y CONFIG_INLINE_READ_UNLOCK=y @@ -977,6 +932,7 @@ CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=y CONFIG_MUTEX_SPIN_ON_OWNER=y CONFIG_RWSEM_SPIN_ON_OWNER=y CONFIG_LOCK_SPIN_ON_OWNER=y +CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE=y CONFIG_FREEZER=y # @@ -997,8 +953,10 @@ CONFIG_COREDUMP=y # # Memory Management options # +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_SPARSEMEM_MANUAL is not set CONFIG_FLATMEM=y -CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_HAVE_FAST_GUP=y CONFIG_ARCH_KEEP_MEMBLOCK=y CONFIG_MEMORY_ISOLATION=y @@ -1006,6 +964,7 @@ CONFIG_SPLIT_PTLOCK_CPUS=4 CONFIG_MEMORY_BALLOON=y CONFIG_BALLOON_COMPACTION=y CONFIG_COMPACTION=y +CONFIG_PAGE_REPORTING=y CONFIG_MIGRATION=y CONFIG_CONTIG_ALLOC=y CONFIG_PHYS_ADDR_T_64BIT=y @@ -1018,25 +977,45 @@ CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 CONFIG_TRANSPARENT_HUGEPAGE=y # CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS is not set CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y -CONFIG_TRANSPARENT_HUGE_PAGECACHE=y # CONFIG_CLEANCACHE is not set CONFIG_FRONTSWAP=y CONFIG_CMA=y # CONFIG_CMA_DEBUG is not set CONFIG_CMA_DEBUGFS=y +# CONFIG_CMA_SYSFS is not set CONFIG_CMA_AREAS=7 CONFIG_ZSWAP=y +# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_DEFLATE is not set +CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZO=y +# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_842 is not set +# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4 is not set +# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4HC is not set +# CONFIG_ZSWAP_COMPRESSOR_DEFAULT_ZSTD is not set +CONFIG_ZSWAP_COMPRESSOR_DEFAULT="lzo" +CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y +# CONFIG_ZSWAP_ZPOOL_DEFAULT_Z3FOLD is not set +# CONFIG_ZSWAP_ZPOOL_DEFAULT_ZSMALLOC is not set +CONFIG_ZSWAP_ZPOOL_DEFAULT="zbud" +# CONFIG_ZSWAP_DEFAULT_ON is not set CONFIG_ZPOOL=y CONFIG_ZBUD=y CONFIG_Z3FOLD=m # CONFIG_ZSMALLOC is not set CONFIG_GENERIC_EARLY_IOREMAP=y # CONFIG_IDLE_PAGE_TRACKING is not set -CONFIG_FRAME_VECTOR=y +CONFIG_ZONE_DMA=y # CONFIG_PERCPU_STATS is not set -# CONFIG_GUP_BENCHMARK is not set +# CONFIG_GUP_TEST is not set # CONFIG_READ_ONLY_THP_FOR_FS is not set CONFIG_ARCH_HAS_PTE_SPECIAL=y +CONFIG_KMAP_LOCAL=y +CONFIG_KMAP_LOCAL_NON_LINEAR_PTE_ARRAY=y + +# +# Data Access Monitoring +# +# CONFIG_DAMON is not set +# end of Data Access Monitoring # end of Memory Management options CONFIG_NET=y @@ -1051,6 +1030,7 @@ CONFIG_PACKET=y CONFIG_PACKET_DIAG=m CONFIG_UNIX=y CONFIG_UNIX_SCM=y +CONFIG_AF_UNIX_OOB=y CONFIG_UNIX_DIAG=m # CONFIG_TLS is not set CONFIG_XFRM=y @@ -1061,6 +1041,8 @@ CONFIG_XFRM_USER=m CONFIG_XFRM_SUB_POLICY=y CONFIG_XFRM_MIGRATE=y # CONFIG_XFRM_STATISTICS is not set +CONFIG_XFRM_AH=m +CONFIG_XFRM_ESP=m CONFIG_XFRM_IPCOMP=m CONFIG_NET_KEY=m CONFIG_NET_KEY_MIGRATE=y @@ -1076,7 +1058,7 @@ CONFIG_IP_ROUTE_CLASSID=y # CONFIG_IP_PNP is not set CONFIG_NET_IPIP=m CONFIG_NET_IPGRE_DEMUX=m -CONFIG_NET_IP_TUNNEL=m +CONFIG_NET_IP_TUNNEL=y CONFIG_NET_IPGRE=m CONFIG_NET_IPGRE_BROADCAST=y CONFIG_IP_MROUTE_COMMON=y @@ -1086,13 +1068,15 @@ CONFIG_IP_PIMSM_V1=y CONFIG_IP_PIMSM_V2=y CONFIG_SYN_COOKIES=y CONFIG_NET_IPVTI=m -CONFIG_NET_UDP_TUNNEL=m +CONFIG_NET_UDP_TUNNEL=y CONFIG_NET_FOU=m CONFIG_NET_FOU_IP_TUNNELS=y CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_ESP_OFFLOAD=m +# CONFIG_INET_ESPINTCP is not set CONFIG_INET_IPCOMP=m +CONFIG_INET_TABLE_PERTURB_ORDER=16 CONFIG_INET_XFRM_TUNNEL=m CONFIG_INET_TUNNEL=m CONFIG_INET_DIAG=m @@ -1128,6 +1112,7 @@ CONFIG_IPV6_OPTIMISTIC_DAD=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_ESP_OFFLOAD=m +# CONFIG_INET6_ESPINTCP is not set CONFIG_INET6_IPCOMP=m CONFIG_IPV6_MIP6=m # CONFIG_IPV6_ILA is not set @@ -1148,7 +1133,10 @@ CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y CONFIG_IPV6_PIMSM_V2=y # CONFIG_IPV6_SEG6_LWTUNNEL is not set # CONFIG_IPV6_SEG6_HMAC is not set +# CONFIG_IPV6_RPL_LWTUNNEL is not set +# CONFIG_IPV6_IOAM6_LWTUNNEL is not set # CONFIG_NETLABEL is not set +# CONFIG_MPTCP is not set # CONFIG_NETWORK_SECMARK is not set # CONFIG_NETWORK_PHY_TIMESTAMPING is not set CONFIG_NETFILTER=y @@ -1162,13 +1150,13 @@ CONFIG_NETFILTER_INGRESS=y CONFIG_NETFILTER_NETLINK=m CONFIG_NETFILTER_FAMILY_BRIDGE=y CONFIG_NETFILTER_FAMILY_ARP=y +# CONFIG_NETFILTER_NETLINK_HOOK is not set CONFIG_NETFILTER_NETLINK_ACCT=m CONFIG_NETFILTER_NETLINK_QUEUE=m CONFIG_NETFILTER_NETLINK_LOG=m CONFIG_NETFILTER_NETLINK_OSF=m CONFIG_NF_CONNTRACK=m -CONFIG_NF_LOG_COMMON=m -CONFIG_NF_LOG_NETDEV=m +CONFIG_NF_LOG_SYSLOG=m CONFIG_NETFILTER_CONNCOUNT=m CONFIG_NF_CONNTRACK_MARK=y CONFIG_NF_CONNTRACK_ZONES=y @@ -1206,7 +1194,6 @@ CONFIG_NF_NAT_REDIRECT=y CONFIG_NF_NAT_MASQUERADE=y CONFIG_NETFILTER_SYNPROXY=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_SET=m CONFIG_NF_TABLES_INET=y CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m @@ -1238,6 +1225,7 @@ CONFIG_NF_DUP_NETDEV=m CONFIG_NFT_DUP_NETDEV=m CONFIG_NFT_FWD_NETDEV=m # CONFIG_NFT_FIB_NETDEV is not set +# CONFIG_NFT_REJECT_NETDEV is not set CONFIG_NF_FLOW_TABLE_INET=m CONFIG_NF_FLOW_TABLE=m CONFIG_NETFILTER_XTABLES=m @@ -1429,7 +1417,6 @@ CONFIG_NF_DEFRAG_IPV6=m CONFIG_NF_TABLES_BRIDGE=m CONFIG_NFT_BRIDGE_META=m CONFIG_NFT_BRIDGE_REJECT=m -CONFIG_NF_LOG_BRIDGE=m CONFIG_NF_CONNTRACK_BRIDGE=m CONFIG_BRIDGE_NF_EBTABLES=m CONFIG_BRIDGE_EBT_BROUTE=m @@ -1476,21 +1463,28 @@ CONFIG_MRP=m CONFIG_BRIDGE=m CONFIG_BRIDGE_IGMP_SNOOPING=y CONFIG_BRIDGE_VLAN_FILTERING=y -CONFIG_HAVE_NET_DSA=y +# CONFIG_BRIDGE_MRP is not set +# CONFIG_BRIDGE_CFM is not set CONFIG_NET_DSA=m -# CONFIG_NET_DSA_TAG_8021Q is not set +# CONFIG_NET_DSA_TAG_AR9331 is not set CONFIG_NET_DSA_TAG_BRCM_COMMON=m CONFIG_NET_DSA_TAG_BRCM=m +CONFIG_NET_DSA_TAG_BRCM_LEGACY=m CONFIG_NET_DSA_TAG_BRCM_PREPEND=m +# CONFIG_NET_DSA_TAG_HELLCREEK is not set # CONFIG_NET_DSA_TAG_GSWIP is not set # CONFIG_NET_DSA_TAG_DSA is not set # CONFIG_NET_DSA_TAG_EDSA is not set # CONFIG_NET_DSA_TAG_MTK is not set # CONFIG_NET_DSA_TAG_KSZ is not set +# CONFIG_NET_DSA_TAG_RTL4_A is not set +# CONFIG_NET_DSA_TAG_OCELOT is not set +# CONFIG_NET_DSA_TAG_OCELOT_8021Q is not set # CONFIG_NET_DSA_TAG_QCA is not set # CONFIG_NET_DSA_TAG_LAN9303 is not set # CONFIG_NET_DSA_TAG_SJA1105 is not set # CONFIG_NET_DSA_TAG_TRAILER is not set +# CONFIG_NET_DSA_TAG_XRS700X is not set CONFIG_VLAN_8021Q=m CONFIG_VLAN_8021Q_GVRP=y CONFIG_VLAN_8021Q_MVRP=y @@ -1535,8 +1529,10 @@ CONFIG_NET_SCH_CAKE=m CONFIG_NET_SCH_FQ=m CONFIG_NET_SCH_HHF=m CONFIG_NET_SCH_PIE=m +# CONFIG_NET_SCH_FQ_PIE is not set CONFIG_NET_SCH_INGRESS=m CONFIG_NET_SCH_PLUG=m +# CONFIG_NET_SCH_ETS is not set CONFIG_NET_SCH_DEFAULT=y # CONFIG_DEFAULT_FQ is not set # CONFIG_DEFAULT_CODEL is not set @@ -1585,6 +1581,7 @@ CONFIG_NET_ACT_SKBMOD=m # CONFIG_NET_ACT_IFE is not set CONFIG_NET_ACT_TUNNEL_KEY=m CONFIG_NET_ACT_CT=m +# CONFIG_NET_ACT_GATE is not set # CONFIG_NET_TC_SKB_EXT is not set CONFIG_NET_SCH_FIFO=y # CONFIG_DCB is not set @@ -1603,15 +1600,17 @@ CONFIG_NET_NSH=m # CONFIG_HSR is not set CONFIG_NET_SWITCHDEV=y CONFIG_NET_L3_MASTER_DEV=y +# CONFIG_QRTR is not set # CONFIG_NET_NCSI is not set +CONFIG_PCPU_DEV_REFCNT=y CONFIG_RPS=y CONFIG_RFS_ACCEL=y +CONFIG_SOCK_RX_QUEUE_MAPPING=y CONFIG_XPS=y CONFIG_CGROUP_NET_PRIO=y CONFIG_CGROUP_NET_CLASSID=y CONFIG_NET_RX_BUSY_POLL=y CONFIG_BQL=y -CONFIG_BPF_JIT=y # CONFIG_BPF_STREAM_PARSER is not set CONFIG_NET_FLOW_LIMIT=y @@ -1636,8 +1635,11 @@ CONFIG_BT_HIDP=m CONFIG_BT_HS=y CONFIG_BT_LE=y CONFIG_BT_LEDS=y -# CONFIG_BT_SELFTEST is not set +# CONFIG_BT_MSFTEXT is not set +# CONFIG_BT_AOSPEXT is not set # CONFIG_BT_DEBUGFS is not set +# CONFIG_BT_SELFTEST is not set +# CONFIG_BT_FEATURE_DEBUG is not set # # Bluetooth device drivers @@ -1662,6 +1664,7 @@ CONFIG_BT_HCIUART_LL=y CONFIG_BT_HCIUART_3WIRE=y CONFIG_BT_HCIUART_INTEL=y CONFIG_BT_HCIUART_BCM=y +# CONFIG_BT_HCIUART_RTL is not set CONFIG_BT_HCIUART_QCA=y CONFIG_BT_HCIUART_AG6XX=y CONFIG_BT_HCIUART_MRVL=y @@ -1674,10 +1677,12 @@ CONFIG_BT_MRVL=m CONFIG_BT_ATH3K=m # CONFIG_BT_MTKSDIO is not set CONFIG_BT_MTKUART=m +# CONFIG_BT_VIRTIO is not set # end of Bluetooth device drivers # CONFIG_AF_RXRPC is not set # CONFIG_AF_KCM is not set +# CONFIG_MCTP is not set CONFIG_FIB_RULES=y CONFIG_WIRELESS=y CONFIG_WEXT_CORE=y @@ -1703,7 +1708,6 @@ CONFIG_MAC80211_LEDS=y # CONFIG_MAC80211_MESSAGE_TRACING is not set # CONFIG_MAC80211_DEBUG_MENU is not set CONFIG_MAC80211_STA_HASH_MAX_SIZE=0 -# CONFIG_WIMAX is not set CONFIG_RFKILL=m CONFIG_RFKILL_LEDS=y CONFIG_RFKILL_INPUT=y @@ -1721,10 +1725,12 @@ CONFIG_LWTUNNEL=y # CONFIG_LWTUNNEL_BPF is not set CONFIG_DST_CACHE=y CONFIG_GRO_CELLS=y +CONFIG_NET_SELFTESTS=y +CONFIG_NET_SOCK_MSG=y CONFIG_NET_DEVLINK=y CONFIG_PAGE_POOL=y CONFIG_FAILOVER=y -CONFIG_HAVE_EBPF_JIT=y +CONFIG_ETHTOOL_NETLINK=y # # Device Drivers @@ -1741,7 +1747,6 @@ CONFIG_PCIEAER=y # CONFIG_PCIEAER_INJECT is not set # CONFIG_PCIE_ECRC is not set CONFIG_PCIEASPM=y -CONFIG_PCIEASPM_DEBUG=y CONFIG_PCIEASPM_DEFAULT=y # CONFIG_PCIEASPM_POWERSAVE is not set # CONFIG_PCIEASPM_POWER_SUPERSAVE is not set @@ -1749,7 +1754,6 @@ CONFIG_PCIEASPM_DEFAULT=y CONFIG_PCIE_PME=y # CONFIG_PCIE_DPC is not set # CONFIG_PCIE_PTM is not set -# CONFIG_PCIE_BW is not set CONFIG_PCI_MSI=y CONFIG_PCI_MSI_IRQ_DOMAIN=y CONFIG_PCI_QUIRKS=y @@ -1763,6 +1767,11 @@ CONFIG_PCI_IOV=y # CONFIG_PCI_PRI is not set # CONFIG_PCI_PASID is not set CONFIG_PCI_LABEL=y +# CONFIG_PCIE_BUS_TUNE_OFF is not set +CONFIG_PCIE_BUS_DEFAULT=y +# CONFIG_PCIE_BUS_SAFE is not set +# CONFIG_PCIE_BUS_PERFORMANCE is not set +# CONFIG_PCIE_BUS_PEER2PEER is not set CONFIG_HOTPLUG_PCI=y # CONFIG_HOTPLUG_PCI_CPCI is not set CONFIG_HOTPLUG_PCI_SHPC=y @@ -1770,13 +1779,6 @@ CONFIG_HOTPLUG_PCI_SHPC=y # # PCI controller drivers # - -# -# Cadence PCIe controllers support -# -# CONFIG_PCIE_CADENCE_HOST is not set -# end of Cadence PCIe controllers support - # CONFIG_PCI_FTPCI100 is not set CONFIG_PCI_HOST_COMMON=y CONFIG_PCI_HOST_GENERIC=y @@ -1784,6 +1786,8 @@ CONFIG_PCI_HOST_GENERIC=y # CONFIG_PCI_V3_SEMI is not set # CONFIG_PCIE_ALTERA is not set # CONFIG_PCIE_ROCKCHIP_HOST is not set +# CONFIG_PCIE_BRCMSTB is not set +# CONFIG_PCIE_MICROCHIP_HOST is not set # # DesignWare PCI Core Support @@ -1793,10 +1797,24 @@ CONFIG_PCIE_DW_HOST=y CONFIG_PCI_DRA7XX=y CONFIG_PCI_DRA7XX_HOST=y # CONFIG_PCIE_DW_PLAT_HOST is not set +# CONFIG_PCI_EXYNOS is not set # CONFIG_PCI_IMX6 is not set # CONFIG_PCI_LAYERSCAPE is not set +# CONFIG_PCIE_ROCKCHIP_DW_HOST is not set # CONFIG_PCI_MESON is not set # end of DesignWare PCI Core Support + +# +# Mobiveil PCIe Core Support +# +# end of Mobiveil PCIe Core Support + +# +# Cadence PCIe controllers support +# +# CONFIG_PCIE_CADENCE_PLAT_HOST is not set +# CONFIG_PCI_J721E_HOST is not set +# end of Cadence PCIe controllers support # end of PCI controller drivers # @@ -1811,6 +1829,7 @@ CONFIG_PCI_DRA7XX_HOST=y # CONFIG_PCI_SW_SWITCHTEC is not set # end of PCI switch controller drivers +# CONFIG_CXL_BUS is not set # CONFIG_PCCARD is not set # CONFIG_RAPIDIO is not set @@ -1831,6 +1850,7 @@ CONFIG_FW_LOADER_PAGED_BUF=y CONFIG_EXTRA_FIRMWARE="" # CONFIG_FW_LOADER_USER_HELPER is not set CONFIG_FW_LOADER_COMPRESS=y +CONFIG_FW_CACHE=y # end of Firmware loader CONFIG_WANT_DEV_COREDUMP=y @@ -1841,6 +1861,7 @@ CONFIG_WANT_DEV_COREDUMP=y # CONFIG_TEST_ASYNC_DRIVER_PROBE is not set CONFIG_SYS_HYPERVISOR=y CONFIG_GENERIC_CPU_AUTOPROBE=y +CONFIG_GENERIC_CPU_VULNERABILITIES=y CONFIG_SOC_BUS=y CONFIG_REGMAP=y CONFIG_REGMAP_I2C=y @@ -1863,14 +1884,66 @@ CONFIG_ARM_CCI400_PORT_CTRL=y # CONFIG_IMX_WEIM is not set CONFIG_OMAP_INTERCONNECT=y CONFIG_OMAP_OCP2SCP=y -CONFIG_SIMPLE_PM_BUS=y # CONFIG_SUN50I_DE2_BUS is not set CONFIG_SUNXI_RSB=y CONFIG_TI_SYSC=y CONFIG_VEXPRESS_CONFIG=y +# CONFIG_MHI_BUS is not set # end of Bus devices CONFIG_CONNECTOR=m + +# +# Firmware Drivers +# + +# +# ARM System Control and Management Interface Protocol +# +# CONFIG_ARM_SCMI_PROTOCOL is not set +# end of ARM System Control and Management Interface Protocol + +CONFIG_ARM_SCPI_PROTOCOL=m +CONFIG_ARM_SCPI_POWER_DOMAIN=m +CONFIG_FIRMWARE_MEMMAP=y +CONFIG_DMIID=y +CONFIG_DMI_SYSFS=y +CONFIG_RASPBERRYPI_FIRMWARE=y +CONFIG_SYSFB=y +# CONFIG_SYSFB_SIMPLEFB is not set +CONFIG_TRUSTED_FOUNDATIONS=y +# CONFIG_GOOGLE_FIRMWARE is not set + +# +# EFI (Extensible Firmware Interface) Support +# +CONFIG_EFI_ESRT=y +CONFIG_EFI_VARS_PSTORE=y +# CONFIG_EFI_VARS_PSTORE_DEFAULT_DISABLE is not set +CONFIG_EFI_PARAMS_FROM_FDT=y +CONFIG_EFI_RUNTIME_WRAPPERS=y +CONFIG_EFI_GENERIC_STUB=y +CONFIG_EFI_ARMSTUB_DTB_LOADER=y +# CONFIG_EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER is not set +# CONFIG_EFI_BOOTLOADER_CONTROL is not set +# CONFIG_EFI_CAPSULE_LOADER is not set +# CONFIG_EFI_TEST is not set +# CONFIG_RESET_ATTACK_MITIGATION is not set +# CONFIG_EFI_DISABLE_PCI_DMA is not set +# end of EFI (Extensible Firmware Interface) Support + +CONFIG_ARM_PSCI_FW=y +# CONFIG_ARM_PSCI_CHECKER is not set +CONFIG_HAVE_ARM_SMCCC=y +CONFIG_HAVE_ARM_SMCCC_DISCOVERY=y +CONFIG_ARM_SMCCC_SOC_ID=y + +# +# Tegra firmware driver +# +# end of Tegra firmware driver +# end of Firmware Drivers + # CONFIG_GNSS is not set CONFIG_MTD=m # CONFIG_MTD_TESTS is not set @@ -1891,6 +1964,10 @@ CONFIG_MTD_OF_PARTS=m CONFIG_MTD_BLKDEVS=m CONFIG_MTD_BLOCK=m # CONFIG_MTD_BLOCK_RO is not set + +# +# Note that in some cases UBI block is preferred. See MTD_UBI_BLOCK. +# # CONFIG_FTL is not set # CONFIG_NFTL is not set # CONFIG_INFTL is not set @@ -1939,6 +2016,7 @@ CONFIG_MTD_COMPLEX_MAPPINGS=y # CONFIG_MTD_PMC551 is not set # CONFIG_MTD_DATAFLASH is not set # CONFIG_MTD_MCHP23K256 is not set +# CONFIG_MTD_MCHP48L640 is not set # CONFIG_MTD_SST25L is not set # CONFIG_MTD_BCM47XXSFLASH is not set # CONFIG_MTD_SLRAM is not set @@ -1952,10 +2030,21 @@ CONFIG_MTD_BLOCK2MTD=m # CONFIG_MTD_DOCG3 is not set # end of Self-contained MTD device drivers +# +# NAND +# # CONFIG_MTD_ONENAND is not set # CONFIG_MTD_RAW_NAND is not set # CONFIG_MTD_SPI_NAND is not set +# +# ECC engine support +# +# CONFIG_MTD_NAND_ECC_SW_HAMMING is not set +# CONFIG_MTD_NAND_ECC_SW_BCH is not set +# end of ECC engine support +# end of NAND + # # LPDDR & LPDDR2 PCM memory drivers # @@ -1965,8 +2054,9 @@ CONFIG_MTD_BLOCK2MTD=m CONFIG_MTD_SPI_NOR=m CONFIG_MTD_SPI_NOR_USE_4K_SECTORS=y -# CONFIG_SPI_CADENCE_QUADSPI is not set -# CONFIG_SPI_MTK_QUADSPI is not set +# CONFIG_MTD_SPI_NOR_SWP_DISABLE is not set +CONFIG_MTD_SPI_NOR_SWP_DISABLE_ON_VOLATILE=y +# CONFIG_MTD_SPI_NOR_SWP_KEEP is not set CONFIG_MTD_UBI=m CONFIG_MTD_UBI_WL_THRESHOLD=4096 CONFIG_MTD_UBI_BEB_LIMIT=20 @@ -1983,8 +2073,6 @@ CONFIG_OF_KOBJ=y CONFIG_OF_DYNAMIC=y CONFIG_OF_ADDRESS=y CONFIG_OF_IRQ=y -CONFIG_OF_NET=y -CONFIG_OF_MDIO=y CONFIG_OF_RESERVED_MEM=y CONFIG_OF_RESOLVE=y CONFIG_OF_OVERLAY=y @@ -1994,7 +2082,6 @@ CONFIG_BLK_DEV=y # CONFIG_BLK_DEV_NULL_BLK is not set CONFIG_CDROM=y # CONFIG_BLK_DEV_PCIESSD_MTIP32XX is not set -# CONFIG_BLK_DEV_UMEM is not set CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_LOOP_MIN_COUNT=8 # CONFIG_BLK_DEV_CRYPTOLOOP is not set @@ -2009,7 +2096,6 @@ CONFIG_ATA_OVER_ETH=m CONFIG_XEN_BLKDEV_FRONTEND=y CONFIG_XEN_BLKDEV_BACKEND=m CONFIG_VIRTIO_BLK=y -# CONFIG_VIRTIO_BLK_SCSI is not set # CONFIG_BLK_DEV_RBD is not set # CONFIG_BLK_DEV_RSXX is not set @@ -2019,6 +2105,7 @@ CONFIG_VIRTIO_BLK=y CONFIG_NVME_CORE=y CONFIG_BLK_DEV_NVME=y # CONFIG_NVME_MULTIPATH is not set +# CONFIG_NVME_HWMON is not set # CONFIG_NVME_FC is not set # CONFIG_NVME_TCP is not set # CONFIG_NVME_TARGET is not set @@ -2045,10 +2132,10 @@ CONFIG_BLK_DEV_NVME=y # CONFIG_LATTICE_ECP3_CONFIG is not set CONFIG_SRAM=y CONFIG_SRAM_EXEC=y -CONFIG_VEXPRESS_SYSCFG=y +# CONFIG_DW_XDATA_PCIE is not set # CONFIG_PCI_ENDPOINT_TEST is not set # CONFIG_XILINX_SDFEC is not set -# CONFIG_PVPANIC is not set +# CONFIG_HISI_HIKEY_USB is not set # CONFIG_C2PORT is not set # @@ -2075,60 +2162,22 @@ CONFIG_EEPROM_EE1004=m # CONFIG_SENSORS_LIS3_SPI is not set # CONFIG_SENSORS_LIS3_I2C is not set # CONFIG_ALTERA_STAPL is not set - -# -# Intel MIC & related support -# - -# -# Intel MIC Bus Driver -# - -# -# SCIF Bus Driver -# - -# -# VOP Bus Driver -# -# CONFIG_VOP_BUS is not set - -# -# Intel MIC Host Driver -# - -# -# Intel MIC Card Driver -# - -# -# SCIF Driver -# - -# -# Intel MIC Coprocessor State Management (COSM) Drivers -# - -# -# VOP Driver -# -# end of Intel MIC & related support - # CONFIG_ECHO is not set +# CONFIG_BCM_VK is not set # CONFIG_MISC_ALCOR_PCI is not set # CONFIG_MISC_RTSX_PCI is not set # CONFIG_MISC_RTSX_USB is not set # CONFIG_HABANA_AI is not set +# CONFIG_UACCE is not set +# CONFIG_PVPANIC is not set # end of Misc devices -CONFIG_HAVE_IDE=y -# CONFIG_IDE is not set - # # SCSI device support # CONFIG_SCSI_MOD=y CONFIG_RAID_ATTRS=m +CONFIG_SCSI_COMMON=y CONFIG_SCSI=y CONFIG_SCSI_DMA=y CONFIG_SCSI_PROC_FS=y @@ -2140,6 +2189,7 @@ CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y CONFIG_CHR_DEV_SG=y +CONFIG_BLK_DEV_BSG=y CONFIG_CHR_DEV_SCH=m CONFIG_SCSI_CONSTANTS=y # CONFIG_SCSI_LOGGING is not set @@ -2186,6 +2236,7 @@ CONFIG_SCSI_MPT3SAS=m CONFIG_SCSI_MPT2SAS_MAX_SGE=128 CONFIG_SCSI_MPT3SAS_MAX_SGE=128 # CONFIG_SCSI_MPT2SAS is not set +# CONFIG_SCSI_MPI3MR is not set # CONFIG_SCSI_SMARTPQI is not set # CONFIG_SCSI_UFSHCD is not set # CONFIG_SCSI_HPTIOP is not set @@ -2195,7 +2246,6 @@ CONFIG_XEN_SCSI_FRONTEND=y # CONFIG_SCSI_SNIC is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_FDOMAIN_PCI is not set -# CONFIG_SCSI_GDTH is not set # CONFIG_SCSI_IPS is not set # CONFIG_SCSI_INITIO is not set # CONFIG_SCSI_INIA100 is not set @@ -2217,7 +2267,9 @@ CONFIG_SCSI_VIRTIO=y CONFIG_HAVE_PATA_PLATFORM=y CONFIG_ATA=y +CONFIG_SATA_HOST=y CONFIG_ATA_VERBOSE_ERROR=y +CONFIG_ATA_FORCE=y CONFIG_SATA_PMP=y # @@ -2247,6 +2299,7 @@ CONFIG_MD_RAID456=m CONFIG_BCACHE=m # CONFIG_BCACHE_DEBUG is not set # CONFIG_BCACHE_CLOSURES_DEBUG is not set +# CONFIG_BCACHE_ASYNC_REGISTRATION is not set CONFIG_BLK_DEV_DM_BUILTIN=y CONFIG_BLK_DEV_DM=m # CONFIG_DM_DEBUG is not set @@ -2293,6 +2346,8 @@ CONFIG_MII=y CONFIG_NET_CORE=y CONFIG_BONDING=m CONFIG_DUMMY=m +CONFIG_WIREGUARD=m +# CONFIG_WIREGUARD_DEBUG is not set # CONFIG_EQUALIZER is not set # CONFIG_NET_FC is not set # CONFIG_IFB is not set @@ -2309,6 +2364,7 @@ CONFIG_IPVLAN=m CONFIG_IPVTAP=m CONFIG_VXLAN=m # CONFIG_GENEVE is not set +# CONFIG_BAREUDP is not set # CONFIG_GTP is not set CONFIG_MACSEC=m CONFIG_NETCONSOLE=m @@ -2324,10 +2380,6 @@ CONFIG_VIRTIO_NET=m # CONFIG_NET_VRF is not set # CONFIG_ARCNET is not set -# -# CAIF transport drivers -# - # # Distributed Switch Architecture drivers # @@ -2345,7 +2397,12 @@ CONFIG_NET_DSA_BCM_SF2=m # CONFIG_NET_DSA_MICROCHIP_KSZ9477 is not set # CONFIG_NET_DSA_MICROCHIP_KSZ8795 is not set # CONFIG_NET_DSA_MV88E6XXX is not set +# CONFIG_NET_DSA_MSCC_FELIX is not set +# CONFIG_NET_DSA_MSCC_SEVILLE is not set +# CONFIG_NET_DSA_AR9331 is not set # CONFIG_NET_DSA_SJA1105 is not set +# CONFIG_NET_DSA_XRS700X_I2C is not set +# CONFIG_NET_DSA_XRS700X_MDIO is not set # CONFIG_NET_DSA_QCA8K is not set # CONFIG_NET_DSA_REALTEK_SMI is not set # CONFIG_NET_DSA_SMSC_LAN9303_I2C is not set @@ -2374,6 +2431,7 @@ CONFIG_NET_VENDOR_AMD=y # CONFIG_AMD8111_ETH is not set # CONFIG_PCNET32 is not set CONFIG_NET_VENDOR_AQUANTIA=y +# CONFIG_AQTION is not set CONFIG_NET_VENDOR_ARC=y # CONFIG_EMAC_ROCKCHIP is not set CONFIG_NET_VENDOR_ATHEROS=y @@ -2382,8 +2440,6 @@ CONFIG_NET_VENDOR_ATHEROS=y # CONFIG_ATL1E is not set # CONFIG_ATL1C is not set # CONFIG_ALX is not set -CONFIG_NET_VENDOR_AURORA=y -# CONFIG_AURORA_NB8800 is not set CONFIG_NET_VENDOR_BROADCOM=y # CONFIG_B44 is not set CONFIG_BCMGENET=m @@ -2393,11 +2449,8 @@ CONFIG_BCMGENET=m # CONFIG_BNX2X is not set CONFIG_SYSTEMPORT=m # CONFIG_BNXT is not set -CONFIG_NET_VENDOR_BROCADE=y -# CONFIG_BNA is not set CONFIG_NET_VENDOR_CADENCE=y CONFIG_MACB=y -CONFIG_MACB_USE_HWSTAMP=y # CONFIG_MACB_PCI is not set CONFIG_NET_VENDOR_CAVIUM=y CONFIG_NET_VENDOR_CHELSIO=y @@ -2406,7 +2459,7 @@ CONFIG_NET_VENDOR_CHELSIO=y # CONFIG_CHELSIO_T4 is not set # CONFIG_CHELSIO_T4VF is not set CONFIG_NET_VENDOR_CIRRUS=y -# CONFIG_CS89x0 is not set +# CONFIG_CS89x0_PLATFORM is not set CONFIG_NET_VENDOR_CISCO=y # CONFIG_ENIC is not set CONFIG_NET_VENDOR_CORTINA=y @@ -2430,18 +2483,19 @@ CONFIG_FEC=y CONFIG_FSL_PQ_MDIO=y # CONFIG_FSL_XGMAC_MDIO is not set CONFIG_GIANFAR=y +# CONFIG_FSL_DPAA2_SWITCH is not set +# CONFIG_FSL_ENETC is not set +# CONFIG_FSL_ENETC_VF is not set +# CONFIG_FSL_ENETC_IERB is not set +# CONFIG_FSL_ENETC_MDIO is not set CONFIG_NET_VENDOR_GOOGLE=y -# CONFIG_GVE is not set CONFIG_NET_VENDOR_HISILICON=y CONFIG_HIX5HD2_GMAC=y # CONFIG_HISI_FEMAC is not set # CONFIG_HIP04_ETH is not set -# CONFIG_HNS is not set # CONFIG_HNS_DSAF is not set # CONFIG_HNS_ENET is not set # CONFIG_HNS3 is not set -CONFIG_NET_VENDOR_HP=y -# CONFIG_HP100 is not set CONFIG_NET_VENDOR_HUAWEI=y CONFIG_NET_VENDOR_I825XX=y CONFIG_NET_VENDOR_INTEL=y @@ -2460,10 +2514,13 @@ CONFIG_IGB_HWMON=y # CONFIG_FM10K is not set # CONFIG_IGC is not set # CONFIG_JME is not set +CONFIG_NET_VENDOR_LITEX=y +# CONFIG_LITEX_LITEETH is not set CONFIG_NET_VENDOR_MARVELL=y CONFIG_MVMDIO=y # CONFIG_SKGE is not set # CONFIG_SKY2 is not set +# CONFIG_PRESTERA is not set CONFIG_NET_VENDOR_MELLANOX=y # CONFIG_MLX4_EN is not set # CONFIG_MLX5_CORE is not set @@ -2480,9 +2537,12 @@ CONFIG_NET_VENDOR_MICROCHIP=y # CONFIG_LAN743X is not set CONFIG_NET_VENDOR_MICROSEMI=y # CONFIG_MSCC_OCELOT_SWITCH is not set +CONFIG_NET_VENDOR_MICROSOFT=y CONFIG_NET_VENDOR_MYRI=y # CONFIG_MYRI10GE is not set # CONFIG_FEALNX is not set +CONFIG_NET_VENDOR_NI=y +# CONFIG_NI_XGE_MANAGEMENT_ENET is not set CONFIG_NET_VENDOR_NATSEMI=y # CONFIG_NATSEMI is not set # CONFIG_NS83820 is not set @@ -2491,8 +2551,6 @@ CONFIG_NET_VENDOR_NETERION=y # CONFIG_VXGE is not set CONFIG_NET_VENDOR_NETRONOME=y # CONFIG_NFP is not set -CONFIG_NET_VENDOR_NI=y -# CONFIG_NI_XGE_MANAGEMENT_ENET is not set CONFIG_NET_VENDOR_8390=y # CONFIG_AX88796 is not set # CONFIG_NE2K_PCI is not set @@ -2509,6 +2567,8 @@ CONFIG_NET_VENDOR_QLOGIC=y # CONFIG_QLCNIC is not set # CONFIG_NETXEN_NIC is not set # CONFIG_QED is not set +CONFIG_NET_VENDOR_BROCADE=y +# CONFIG_BNA is not set CONFIG_NET_VENDOR_QUALCOMM=y # CONFIG_QCA7000_SPI is not set # CONFIG_QCA7000_UART is not set @@ -2526,14 +2586,14 @@ CONFIG_NET_VENDOR_ROCKER=y CONFIG_NET_VENDOR_SAMSUNG=y # CONFIG_SXGBE_ETH is not set CONFIG_NET_VENDOR_SEEQ=y -CONFIG_NET_VENDOR_SOLARFLARE=y -# CONFIG_SFC is not set -# CONFIG_SFC_FALCON is not set CONFIG_NET_VENDOR_SILAN=y # CONFIG_SC92031 is not set CONFIG_NET_VENDOR_SIS=y # CONFIG_SIS900 is not set # CONFIG_SIS190 is not set +CONFIG_NET_VENDOR_SOLARFLARE=y +# CONFIG_SFC is not set +# CONFIG_SFC_FALCON is not set CONFIG_NET_VENDOR_SMSC=y # CONFIG_SMC91X is not set # CONFIG_EPIC100 is not set @@ -2550,6 +2610,9 @@ CONFIG_DWMAC_GENERIC=y CONFIG_DWMAC_ROCKCHIP=y CONFIG_DWMAC_SUNXI=y CONFIG_DWMAC_SUN8I=y +CONFIG_DWMAC_IMX8=y +# CONFIG_DWMAC_INTEL_PLAT is not set +# CONFIG_DWMAC_LOONGSON is not set # CONFIG_STMMAC_PCI is not set CONFIG_NET_VENDOR_SUN=y # CONFIG_HAPPYMEAL is not set @@ -2565,7 +2628,7 @@ CONFIG_NET_VENDOR_TI=y CONFIG_TI_DAVINCI_MDIO=y # CONFIG_TI_CPSW_PHY_SEL is not set CONFIG_TI_CPSW=y -# CONFIG_TI_CPTS is not set +# CONFIG_TI_CPSW_SWITCHDEV is not set # CONFIG_TLAN is not set CONFIG_NET_VENDOR_VIA=y # CONFIG_VIA_RHINE is not set @@ -2574,59 +2637,52 @@ CONFIG_NET_VENDOR_WIZNET=y # CONFIG_WIZNET_W5100 is not set # CONFIG_WIZNET_W5300 is not set CONFIG_NET_VENDOR_XILINX=y +# CONFIG_XILINX_EMACLITE is not set # CONFIG_XILINX_AXI_EMAC is not set +# CONFIG_XILINX_LL_TEMAC is not set # CONFIG_FDDI is not set # CONFIG_HIPPI is not set -CONFIG_MDIO_DEVICE=y -CONFIG_MDIO_BUS=y -CONFIG_MDIO_BCM_UNIMAC=m -CONFIG_MDIO_BITBANG=y -CONFIG_MDIO_BUS_MUX=y -# CONFIG_MDIO_BUS_MUX_GPIO is not set -# CONFIG_MDIO_BUS_MUX_MMIOREG is not set -# CONFIG_MDIO_BUS_MUX_MULTIPLEXER is not set -# CONFIG_MDIO_GPIO is not set -# CONFIG_MDIO_HISI_FEMAC is not set -# CONFIG_MDIO_MSCC_MIIM is not set -CONFIG_MDIO_SUN4I=y CONFIG_PHYLINK=y CONFIG_PHYLIB=y CONFIG_SWPHY=y # CONFIG_LED_TRIGGER_PHY is not set +CONFIG_FIXED_PHY=y +# CONFIG_SFP is not set # # MII PHY device drivers # -# CONFIG_SFP is not set -# CONFIG_ADIN_PHY is not set # CONFIG_AMD_PHY is not set +# CONFIG_ADIN_PHY is not set # CONFIG_AQUANTIA_PHY is not set -# CONFIG_AX88796B_PHY is not set -CONFIG_AT803X_PHY=y +CONFIG_AX88796B_PHY=m +CONFIG_BROADCOM_PHY=y +# CONFIG_BCM54140_PHY is not set CONFIG_BCM7XXX_PHY=m +# CONFIG_BCM84881_PHY is not set # CONFIG_BCM87XX_PHY is not set CONFIG_BCM_NET_PHYLIB=y -CONFIG_BROADCOM_PHY=y # CONFIG_CICADA_PHY is not set # CONFIG_CORTINA_PHY is not set # CONFIG_DAVICOM_PHY is not set -# CONFIG_DP83822_PHY is not set -# CONFIG_DP83TC811_PHY is not set -# CONFIG_DP83848_PHY is not set -# CONFIG_DP83867_PHY is not set -CONFIG_FIXED_PHY=y CONFIG_ICPLUS_PHY=y +# CONFIG_LXT_PHY is not set # CONFIG_INTEL_XWAY_PHY is not set # CONFIG_LSI_ET1011C_PHY is not set -# CONFIG_LXT_PHY is not set CONFIG_MARVELL_PHY=y # CONFIG_MARVELL_10G_PHY is not set +# CONFIG_MARVELL_88X2222_PHY is not set +# CONFIG_MAXLINEAR_GPHY is not set +# CONFIG_MEDIATEK_GE_PHY is not set CONFIG_MICREL_PHY=y CONFIG_MICROCHIP_PHY=m # CONFIG_MICROCHIP_T1_PHY is not set # CONFIG_MICROSEMI_PHY is not set +# CONFIG_MOTORCOMM_PHY is not set # CONFIG_NATIONAL_PHY is not set +# CONFIG_NXP_C45_TJA11XX_PHY is not set # CONFIG_NXP_TJA11XX_PHY is not set +CONFIG_AT803X_PHY=y # CONFIG_QSEMI_PHY is not set CONFIG_REALTEK_PHY=y # CONFIG_RENESAS_PHY is not set @@ -2634,9 +2690,43 @@ CONFIG_ROCKCHIP_PHY=y CONFIG_SMSC_PHY=y # CONFIG_STE10XP is not set # CONFIG_TERANETICS_PHY is not set +# CONFIG_DP83822_PHY is not set +# CONFIG_DP83TC811_PHY is not set +# CONFIG_DP83848_PHY is not set +# CONFIG_DP83867_PHY is not set +# CONFIG_DP83869_PHY is not set # CONFIG_VITESSE_PHY is not set # CONFIG_XILINX_GMII2RGMII is not set # CONFIG_MICREL_KS8995MA is not set +CONFIG_MDIO_DEVICE=y +CONFIG_MDIO_BUS=y +CONFIG_FWNODE_MDIO=y +CONFIG_OF_MDIO=y +CONFIG_MDIO_DEVRES=y +CONFIG_MDIO_SUN4I=y +CONFIG_MDIO_BITBANG=y +CONFIG_MDIO_BCM_UNIMAC=m +# CONFIG_MDIO_GPIO is not set +# CONFIG_MDIO_HISI_FEMAC is not set +# CONFIG_MDIO_MVUSB is not set +# CONFIG_MDIO_MSCC_MIIM is not set +# CONFIG_MDIO_IPQ4019 is not set +# CONFIG_MDIO_IPQ8064 is not set + +# +# MDIO Multiplexers +# +CONFIG_MDIO_BUS_MUX=y +# CONFIG_MDIO_BUS_MUX_GPIO is not set +# CONFIG_MDIO_BUS_MUX_MULTIPLEXER is not set +# CONFIG_MDIO_BUS_MUX_MMIOREG is not set + +# +# PCS device drivers +# +CONFIG_PCS_XPCS=y +# end of PCS device drivers + CONFIG_PPP=m # CONFIG_PPP_BSDCOMP is not set CONFIG_PPP_DEFLATE=m @@ -2694,8 +2784,8 @@ CONFIG_USB_NET_ZAURUS=m # CONFIG_USB_VL600 is not set # CONFIG_USB_NET_CH9200 is not set # CONFIG_USB_NET_AQC111 is not set +CONFIG_USB_RTL8153_ECM=m CONFIG_WLAN=y -# CONFIG_WIRELESS_WDS is not set CONFIG_WLAN_VENDOR_ADMTEK=y # CONFIG_ADM8211 is not set CONFIG_WLAN_VENDOR_ATH=y @@ -2736,7 +2826,6 @@ CONFIG_WLAN_VENDOR_INTERSIL=y # CONFIG_HOSTAP is not set # CONFIG_HERMES is not set # CONFIG_P54_COMMON is not set -# CONFIG_PRISM54 is not set CONFIG_WLAN_VENDOR_MARVELL=y # CONFIG_LIBERTAS is not set # CONFIG_LIBERTAS_THINFIRM is not set @@ -2753,6 +2842,13 @@ CONFIG_WLAN_VENDOR_MEDIATEK=y # CONFIG_MT76x2U is not set # CONFIG_MT7603E is not set # CONFIG_MT7615E is not set +# CONFIG_MT7663U is not set +# CONFIG_MT7663S is not set +# CONFIG_MT7915E is not set +# CONFIG_MT7921E is not set +CONFIG_WLAN_VENDOR_MICROCHIP=y +# CONFIG_WILC1000_SDIO is not set +# CONFIG_WILC1000_SPI is not set CONFIG_WLAN_VENDOR_RALINK=y CONFIG_RT2X00=m # CONFIG_RT2400PCI is not set @@ -2807,18 +2903,20 @@ CONFIG_WLAN_VENDOR_QUANTENNA=y # CONFIG_MAC80211_HWSIM is not set # CONFIG_USB_NET_RNDIS_WLAN is not set # CONFIG_VIRT_WIFI is not set +# CONFIG_WAN is not set # -# Enable WiMAX (Networking options) to see the WiMAX drivers +# Wireless WAN # -# CONFIG_WAN is not set +# CONFIG_WWAN is not set +# end of Wireless WAN + CONFIG_XEN_NETDEV_FRONTEND=m CONFIG_XEN_NETDEV_BACKEND=m CONFIG_VMXNET3=m # CONFIG_NETDEVSIM is not set CONFIG_NET_FAILOVER=m # CONFIG_ISDN is not set -# CONFIG_NVM is not set # # Input device support @@ -2826,7 +2924,6 @@ CONFIG_NET_FAILOVER=m CONFIG_INPUT=y CONFIG_INPUT_LEDS=y CONFIG_INPUT_FF_MEMLESS=m -CONFIG_INPUT_POLLDEV=y # CONFIG_INPUT_SPARSEKMAP is not set CONFIG_INPUT_MATRIXKMAP=y @@ -2882,7 +2979,6 @@ CONFIG_KEYBOARD_BCM=y # CONFIG_INPUT_JOYSTICK is not set # CONFIG_INPUT_TABLET is not set CONFIG_INPUT_TOUCHSCREEN=y -CONFIG_TOUCHSCREEN_PROPERTIES=y # CONFIG_TOUCHSCREEN_ADS7846 is not set # CONFIG_TOUCHSCREEN_AD7877 is not set # CONFIG_TOUCHSCREEN_AD7879 is not set @@ -2893,6 +2989,7 @@ CONFIG_TOUCHSCREEN_ADC=m # CONFIG_TOUCHSCREEN_BU21013 is not set # CONFIG_TOUCHSCREEN_BU21029 is not set # CONFIG_TOUCHSCREEN_CHIPONE_ICN8318 is not set +# CONFIG_TOUCHSCREEN_CY8CTMA140 is not set # CONFIG_TOUCHSCREEN_CY8CTMG110 is not set # CONFIG_TOUCHSCREEN_CYTTSP_CORE is not set # CONFIG_TOUCHSCREEN_CYTTSP4_CORE is not set @@ -2905,7 +3002,9 @@ CONFIG_TOUCHSCREEN_ADC=m # CONFIG_TOUCHSCREEN_FUJITSU is not set CONFIG_TOUCHSCREEN_GOODIX=m # CONFIG_TOUCHSCREEN_HIDEEP is not set +# CONFIG_TOUCHSCREEN_HYCON_HY46XX is not set # CONFIG_TOUCHSCREEN_ILI210X is not set +# CONFIG_TOUCHSCREEN_ILITEK is not set # CONFIG_TOUCHSCREEN_S6SY761 is not set # CONFIG_TOUCHSCREEN_GUNZE is not set # CONFIG_TOUCHSCREEN_EKTF2127 is not set @@ -2917,6 +3016,7 @@ CONFIG_TOUCHSCREEN_GOODIX=m # CONFIG_TOUCHSCREEN_MCS5000 is not set # CONFIG_TOUCHSCREEN_MMS114 is not set # CONFIG_TOUCHSCREEN_MELFAS_MIP4 is not set +# CONFIG_TOUCHSCREEN_MSG2638 is not set # CONFIG_TOUCHSCREEN_MTOUCH is not set # CONFIG_TOUCHSCREEN_IMX6UL_TSC is not set # CONFIG_TOUCHSCREEN_INEXIO is not set @@ -2950,18 +3050,17 @@ CONFIG_TOUCHSCREEN_SUN4I=m # CONFIG_TOUCHSCREEN_COLIBRI_VF50 is not set # CONFIG_TOUCHSCREEN_ROHM_BU21023 is not set # CONFIG_TOUCHSCREEN_IQS5XX is not set +# CONFIG_TOUCHSCREEN_ZINITIX is not set CONFIG_INPUT_MISC=y # CONFIG_INPUT_AD714X is not set # CONFIG_INPUT_ATMEL_CAPTOUCH is not set # CONFIG_INPUT_BMA150 is not set # CONFIG_INPUT_E3X0_BUTTON is not set -# CONFIG_INPUT_MSM_VIBRATOR is not set # CONFIG_INPUT_PM8XXX_VIBRATOR is not set # CONFIG_INPUT_PMIC8XXX_PWRKEY is not set # CONFIG_INPUT_MAX77693_HAPTIC is not set # CONFIG_INPUT_MAX8997_HAPTIC is not set # CONFIG_INPUT_MMA8450 is not set -# CONFIG_INPUT_GP2A is not set # CONFIG_INPUT_GPIO_BEEPER is not set # CONFIG_INPUT_GPIO_DECODER is not set CONFIG_INPUT_GPIO_VIBRA=m @@ -2984,9 +3083,12 @@ CONFIG_INPUT_AXP20X_PEK=m CONFIG_INPUT_PWM_VIBRA=m # CONFIG_INPUT_RK805_PWRKEY is not set # CONFIG_INPUT_GPIO_ROTARY_ENCODER is not set +# CONFIG_INPUT_DA7280_HAPTICS is not set # CONFIG_INPUT_DA9063_ONKEY is not set # CONFIG_INPUT_ADXL34X is not set # CONFIG_INPUT_IMS_PCU is not set +# CONFIG_INPUT_IQS269A is not set +# CONFIG_INPUT_IQS626A is not set # CONFIG_INPUT_CMA3000 is not set CONFIG_INPUT_XEN_KBDDEV_FRONTEND=y # CONFIG_INPUT_DRV260X_HAPTICS is not set @@ -3028,14 +3130,7 @@ CONFIG_VT_HW_CONSOLE_BINDING=y CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 -# CONFIG_SERIAL_NONSTANDARD is not set -# CONFIG_NOZOMI is not set -# CONFIG_N_GSM is not set -# CONFIG_TRACE_SINK is not set -# CONFIG_NULL_TTY is not set CONFIG_LDISC_AUTOLOAD=y -CONFIG_DEVMEM=y -# CONFIG_DEVKMEM is not set # # Serial drivers @@ -3043,6 +3138,7 @@ CONFIG_DEVMEM=y CONFIG_SERIAL_EARLYCON=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_DEPRECATED_OPTIONS=y +CONFIG_SERIAL_8250_16550A_VARIANTS=y # CONFIG_SERIAL_8250_FINTEK is not set CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_DMA=y @@ -3081,12 +3177,11 @@ CONFIG_SERIAL_SAMSUNG_CONSOLE=y # CONFIG_SERIAL_MAX310X is not set CONFIG_SERIAL_IMX=y CONFIG_SERIAL_IMX_CONSOLE=y +# CONFIG_SERIAL_IMX_EARLYCON is not set # CONFIG_SERIAL_UARTLITE is not set CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_SERIAL_JSM is not set -CONFIG_SERIAL_OMAP=y -CONFIG_SERIAL_OMAP_CONSOLE=y # CONFIG_SERIAL_SIFIVE is not set # CONFIG_SERIAL_SCCNXP is not set # CONFIG_SERIAL_SC16IS7XX is not set @@ -3094,7 +3189,6 @@ CONFIG_SERIAL_BCM63XX=y CONFIG_SERIAL_BCM63XX_CONSOLE=y # CONFIG_SERIAL_ALTERA_JTAGUART is not set # CONFIG_SERIAL_ALTERA_UART is not set -# CONFIG_SERIAL_IFX6X60 is not set CONFIG_SERIAL_XILINX_PS_UART=y CONFIG_SERIAL_XILINX_PS_UART_CONSOLE=y # CONFIG_SERIAL_ARC is not set @@ -3106,34 +3200,46 @@ CONFIG_SERIAL_CONEXANT_DIGICOLOR=y CONFIG_SERIAL_CONEXANT_DIGICOLOR_CONSOLE=y CONFIG_SERIAL_ST_ASC=y CONFIG_SERIAL_ST_ASC_CONSOLE=y +# CONFIG_SERIAL_SPRD is not set # end of Serial drivers CONFIG_SERIAL_MCTRL_GPIO=y -CONFIG_SERIAL_DEV_BUS=y -CONFIG_SERIAL_DEV_CTRL_TTYPORT=y -# CONFIG_TTY_PRINTK is not set +# CONFIG_SERIAL_NONSTANDARD is not set +# CONFIG_N_GSM is not set +# CONFIG_NOZOMI is not set +# CONFIG_NULL_TTY is not set CONFIG_HVC_DRIVER=y CONFIG_HVC_IRQ=y CONFIG_HVC_XEN=y CONFIG_HVC_XEN_FRONTEND=y # CONFIG_HVC_DCC is not set +CONFIG_SERIAL_DEV_BUS=y +CONFIG_SERIAL_DEV_CTRL_TTYPORT=y +# CONFIG_TTY_PRINTK is not set CONFIG_VIRTIO_CONSOLE=y # CONFIG_IPMI_HANDLER is not set # CONFIG_IPMB_DEVICE_INTERFACE is not set CONFIG_HW_RANDOM=y # CONFIG_HW_RANDOM_TIMERIOMEM is not set +# CONFIG_HW_RANDOM_BA431 is not set CONFIG_HW_RANDOM_BCM2835=y +CONFIG_HW_RANDOM_IPROC_RNG200=y CONFIG_HW_RANDOM_OMAP=y CONFIG_HW_RANDOM_OMAP3_ROM=y # CONFIG_HW_RANDOM_VIRTIO is not set CONFIG_HW_RANDOM_IMX_RNGC=y CONFIG_HW_RANDOM_EXYNOS=y +# CONFIG_HW_RANDOM_CCTRNG is not set +# CONFIG_HW_RANDOM_XIPHERA is not set +CONFIG_HW_RANDOM_ARM_SMCCC_TRNG=y # CONFIG_APPLICOM is not set -# CONFIG_RAW_DRIVER is not set +CONFIG_DEVMEM=y +CONFIG_DEVPORT=y CONFIG_TCG_TPM=m CONFIG_HW_RANDOM_TPM=y # CONFIG_TCG_TIS is not set # CONFIG_TCG_TIS_SPI is not set +# CONFIG_TCG_TIS_I2C_CR50 is not set # CONFIG_TCG_TIS_I2C_ATMEL is not set CONFIG_TCG_TIS_I2C_INFINEON=m # CONFIG_TCG_TIS_I2C_NUVOTON is not set @@ -3141,11 +3247,10 @@ CONFIG_TCG_TIS_I2C_INFINEON=m # CONFIG_TCG_VTPM_PROXY is not set # CONFIG_TCG_TIS_ST33ZP24_I2C is not set # CONFIG_TCG_TIS_ST33ZP24_SPI is not set -CONFIG_DEVPORT=y # CONFIG_XILLYBUS is not set -# end of Character devices - +# CONFIG_XILLYUSB is not set # CONFIG_RANDOM_TRUST_BOOTLOADER is not set +# end of Character devices # # I2C support @@ -3201,10 +3306,11 @@ CONFIG_I2C_ALGOBIT=y # I2C system bus drivers (mostly embedded / system-on-chip) # CONFIG_I2C_BCM2835=y +CONFIG_I2C_BRCMSTB=y # CONFIG_I2C_CBUS_GPIO is not set CONFIG_I2C_DESIGNWARE_CORE=y -CONFIG_I2C_DESIGNWARE_PLATFORM=y # CONFIG_I2C_DESIGNWARE_SLAVE is not set +CONFIG_I2C_DESIGNWARE_PLATFORM=y # CONFIG_I2C_DESIGNWARE_PCI is not set # CONFIG_I2C_EMEV2 is not set CONFIG_I2C_EXYNOS5=y @@ -3229,7 +3335,7 @@ CONFIG_I2C_SUN6I_P2WI=y # External I2C/SMBus adapter drivers # # CONFIG_I2C_DIOLAN_U2C is not set -# CONFIG_I2C_PARPORT_LIGHT is not set +# CONFIG_I2C_CP2615 is not set # CONFIG_I2C_ROBOTFUZZ_OSIF is not set # CONFIG_I2C_TAOS_EVM is not set # CONFIG_I2C_TINY_USB is not set @@ -3238,11 +3344,13 @@ CONFIG_I2C_SUN6I_P2WI=y # Other I2C/SMBus bus drivers # CONFIG_I2C_CROS_EC_TUNNEL=m +# CONFIG_I2C_VIRTIO is not set # end of I2C Hardware Bus support # CONFIG_I2C_STUB is not set CONFIG_I2C_SLAVE=y CONFIG_I2C_SLAVE_EEPROM=y +# CONFIG_I2C_SLAVE_TESTUNIT is not set # CONFIG_I2C_DEBUG_CORE is not set # CONFIG_I2C_DEBUG_ALGO is not set # CONFIG_I2C_DEBUG_BUS is not set @@ -3264,6 +3372,7 @@ CONFIG_SPI_BCM2835AUX=y CONFIG_SPI_BCM_QSPI=y CONFIG_SPI_BITBANG=y # CONFIG_SPI_CADENCE is not set +# CONFIG_SPI_CADENCE_QUADSPI is not set # CONFIG_SPI_DESIGNWARE is not set # CONFIG_SPI_FSL_LPSPI is not set CONFIG_SPI_FSL_QUADSPI=m @@ -3277,6 +3386,7 @@ CONFIG_SPI_OMAP24XX=y CONFIG_SPI_PL022=y # CONFIG_SPI_PXA2XX is not set CONFIG_SPI_ROCKCHIP=m +# CONFIG_SPI_ROCKCHIP_SFC is not set CONFIG_SPI_S3C64XX=m # CONFIG_SPI_SC18IS602 is not set # CONFIG_SPI_SIFIVE is not set @@ -3286,6 +3396,12 @@ CONFIG_SPI_SUN6I=y # CONFIG_SPI_XCOMM is not set # CONFIG_SPI_XILINX is not set # CONFIG_SPI_ZYNQMP_GQSPI is not set +# CONFIG_SPI_AMD is not set + +# +# SPI Multiplexer support +# +# CONFIG_SPI_MUX is not set # # SPI Protocol Masters @@ -3303,6 +3419,7 @@ CONFIG_SPI_DYNAMIC=y # PTP clock support # # CONFIG_PTP_1588_CLOCK is not set +CONFIG_PTP_1588_CLOCK_OPTIONAL=y # # Enable PHYLIB and NETWORK_PHY_TIMESTAMPING to see the additional clocks. @@ -3318,7 +3435,6 @@ CONFIG_GENERIC_PINCONF=y # CONFIG_DEBUG_PINCTRL is not set CONFIG_PINCTRL_AS3722=m CONFIG_PINCTRL_AXP209=m -# CONFIG_PINCTRL_AMD is not set # CONFIG_PINCTRL_MCP23S08 is not set CONFIG_PINCTRL_ROCKCHIP=y CONFIG_PINCTRL_SINGLE=y @@ -3327,6 +3443,7 @@ CONFIG_PINCTRL_SINGLE=y CONFIG_PINCTRL_PALMAS=m CONFIG_PINCTRL_RK805=m # CONFIG_PINCTRL_OCELOT is not set +# CONFIG_PINCTRL_MICROCHIP_SGPIO is not set CONFIG_PINCTRL_BCM2835=y CONFIG_PINCTRL_IMX=y CONFIG_PINCTRL_IMX6Q=y @@ -3334,6 +3451,17 @@ CONFIG_PINCTRL_IMX6SL=y CONFIG_PINCTRL_IMX6SLL=y CONFIG_PINCTRL_IMX6SX=y CONFIG_PINCTRL_IMX6UL=y +# CONFIG_PINCTRL_IMX8MM is not set +# CONFIG_PINCTRL_IMX8MN is not set +# CONFIG_PINCTRL_IMX8MP is not set +# CONFIG_PINCTRL_IMX8MQ is not set +# CONFIG_PINCTRL_IMX8ULP is not set + +# +# Renesas pinctrl drivers +# +# end of Renesas pinctrl drivers + CONFIG_PINCTRL_SAMSUNG=y CONFIG_PINCTRL_EXYNOS=y CONFIG_PINCTRL_EXYNOS_ARM=y @@ -3354,9 +3482,13 @@ CONFIG_PINCTRL_SUN9I_A80=y CONFIG_PINCTRL_SUN9I_A80_R=y # CONFIG_PINCTRL_SUN50I_A64 is not set # CONFIG_PINCTRL_SUN50I_A64_R is not set +# CONFIG_PINCTRL_SUN50I_A100 is not set +# CONFIG_PINCTRL_SUN50I_A100_R is not set # CONFIG_PINCTRL_SUN50I_H5 is not set # CONFIG_PINCTRL_SUN50I_H6 is not set # CONFIG_PINCTRL_SUN50I_H6_R is not set +# CONFIG_PINCTRL_SUN50I_H616 is not set +# CONFIG_PINCTRL_SUN50I_H616_R is not set CONFIG_PINCTRL_TI_IODELAY=y CONFIG_ARCH_HAVE_CUSTOM_GPIO_H=y CONFIG_GPIOLIB=y @@ -3365,6 +3497,8 @@ CONFIG_OF_GPIO=y CONFIG_GPIOLIB_IRQCHIP=y # CONFIG_DEBUG_GPIO is not set CONFIG_GPIO_SYSFS=y +CONFIG_GPIO_CDEV=y +CONFIG_GPIO_CDEV_V1=y CONFIG_GPIO_GENERIC=y # @@ -3380,12 +3514,15 @@ CONFIG_GPIO_DWAPB=y CONFIG_GPIO_GENERIC_PLATFORM=y # CONFIG_GPIO_GRGPIO is not set # CONFIG_GPIO_HLWD is not set +# CONFIG_GPIO_LOGICVC is not set # CONFIG_GPIO_MB86S7X is not set # CONFIG_GPIO_MPC8XXX is not set CONFIG_GPIO_MXC=y CONFIG_GPIO_OMAP=y CONFIG_GPIO_PL061=y +CONFIG_GPIO_ROCKCHIP=y # CONFIG_GPIO_SAMA5D2_PIOBU is not set +# CONFIG_GPIO_SIFIVE is not set CONFIG_GPIO_SYSCON=y # CONFIG_GPIO_XILINX is not set # CONFIG_GPIO_ZEVIO is not set @@ -3402,6 +3539,7 @@ CONFIG_GPIO_SYSCON=y # CONFIG_GPIO_MAX732X is not set CONFIG_GPIO_PCA953X=y CONFIG_GPIO_PCA953X_IRQ=y +# CONFIG_GPIO_PCA9570 is not set CONFIG_GPIO_PCF857X=y # CONFIG_GPIO_TPIC2810 is not set # CONFIG_GPIO_TS4900 is not set @@ -3445,10 +3583,15 @@ CONFIG_GPIO_TWL4030=y # # end of USB GPIO expanders +# +# Virtual GPIO drivers +# +# CONFIG_GPIO_AGGREGATOR is not set # CONFIG_GPIO_MOCKUP is not set +# CONFIG_GPIO_VIRTIO is not set +# end of Virtual GPIO drivers + # CONFIG_W1 is not set -CONFIG_POWER_AVS=y -CONFIG_ROCKCHIP_IODOMAIN=y CONFIG_POWER_RESET=y CONFIG_POWER_RESET_AS3722=y CONFIG_POWER_RESET_BRCMKONA=y @@ -3456,6 +3599,7 @@ CONFIG_POWER_RESET_BRCMSTB=y CONFIG_POWER_RESET_GPIO=y CONFIG_POWER_RESET_GPIO_RESTART=y # CONFIG_POWER_RESET_LTC2952 is not set +# CONFIG_POWER_RESET_REGULATOR is not set # CONFIG_POWER_RESET_RESTART is not set # CONFIG_POWER_RESET_VERSATILE is not set CONFIG_POWER_RESET_VEXPRESS=y @@ -3473,10 +3617,10 @@ CONFIG_POWER_SUPPLY_HWMON=y # CONFIG_CHARGER_ADP5061 is not set CONFIG_BATTERY_ACT8945A=y CONFIG_BATTERY_CPCAP=m +# CONFIG_BATTERY_CW2015 is not set # CONFIG_BATTERY_DS2780 is not set # CONFIG_BATTERY_DS2781 is not set # CONFIG_BATTERY_DS2782 is not set -# CONFIG_BATTERY_LEGO_EV3 is not set # CONFIG_BATTERY_SBS is not set # CONFIG_CHARGER_SBS is not set # CONFIG_MANAGER_SBS is not set @@ -3484,7 +3628,6 @@ CONFIG_BATTERY_BQ27XXX=m CONFIG_BATTERY_BQ27XXX_I2C=m # CONFIG_BATTERY_BQ27XXX_DT_UPDATES_NVM is not set CONFIG_AXP20X_POWER=m -# CONFIG_AXP288_FUEL_GAUGE is not set CONFIG_BATTERY_MAX17040=m CONFIG_BATTERY_MAX17042=m CONFIG_CHARGER_CPCAP=m @@ -3495,6 +3638,7 @@ CONFIG_CHARGER_CPCAP=m CONFIG_CHARGER_GPIO=m # CONFIG_CHARGER_MANAGER is not set # CONFIG_CHARGER_LT3651 is not set +# CONFIG_CHARGER_LTC4162L is not set CONFIG_CHARGER_MAX14577=m # CONFIG_CHARGER_DETECTOR_MAX14656 is not set CONFIG_CHARGER_MAX77693=m @@ -3504,14 +3648,21 @@ CONFIG_CHARGER_MAX8998=m # CONFIG_CHARGER_BQ24190 is not set # CONFIG_CHARGER_BQ24257 is not set # CONFIG_CHARGER_BQ24735 is not set +# CONFIG_CHARGER_BQ2515X is not set # CONFIG_CHARGER_BQ25890 is not set +# CONFIG_CHARGER_BQ25980 is not set +# CONFIG_CHARGER_BQ256XX is not set # CONFIG_CHARGER_SMB347 is not set CONFIG_CHARGER_TPS65090=y # CONFIG_CHARGER_TPS65217 is not set # CONFIG_BATTERY_GAUGE_LTC2941 is not set +# CONFIG_BATTERY_GOLDFISH is not set +# CONFIG_BATTERY_RT5033 is not set # CONFIG_CHARGER_RT9455 is not set # CONFIG_CHARGER_CROS_USBPD is not set +CONFIG_CHARGER_CROS_PCHG=m # CONFIG_CHARGER_UCS1002 is not set +# CONFIG_CHARGER_BD99954 is not set CONFIG_HWMON=y # CONFIG_HWMON_DEBUG_CHIP is not set @@ -3526,6 +3677,7 @@ CONFIG_HWMON=y # CONFIG_SENSORS_ADM1026 is not set # CONFIG_SENSORS_ADM1029 is not set # CONFIG_SENSORS_ADM1031 is not set +# CONFIG_SENSORS_ADM1177 is not set # CONFIG_SENSORS_ADM9240 is not set # CONFIG_SENSORS_ADT7310 is not set # CONFIG_SENSORS_ADT7410 is not set @@ -3533,11 +3685,17 @@ CONFIG_HWMON=y # CONFIG_SENSORS_ADT7462 is not set # CONFIG_SENSORS_ADT7470 is not set # CONFIG_SENSORS_ADT7475 is not set +# CONFIG_SENSORS_AHT10 is not set +# CONFIG_SENSORS_AQUACOMPUTER_D5NEXT is not set # CONFIG_SENSORS_AS370 is not set # CONFIG_SENSORS_ASC7621 is not set +# CONFIG_SENSORS_AXI_FAN_CONTROL is not set # CONFIG_SENSORS_ARM_SCPI is not set # CONFIG_SENSORS_ASPEED is not set # CONFIG_SENSORS_ATXP1 is not set +# CONFIG_SENSORS_CORSAIR_CPRO is not set +# CONFIG_SENSORS_CORSAIR_PSU is not set +# CONFIG_SENSORS_DRIVETEMP is not set # CONFIG_SENSORS_DS620 is not set # CONFIG_SENSORS_DS1621 is not set # CONFIG_SENSORS_I5K_AMB is not set @@ -3557,7 +3715,10 @@ CONFIG_SENSORS_IIO_HWMON=y # CONFIG_SENSORS_POWR1220 is not set # CONFIG_SENSORS_LINEAGE is not set # CONFIG_SENSORS_LTC2945 is not set +# CONFIG_SENSORS_LTC2947_I2C is not set +# CONFIG_SENSORS_LTC2947_SPI is not set # CONFIG_SENSORS_LTC2990 is not set +# CONFIG_SENSORS_LTC2992 is not set # CONFIG_SENSORS_LTC4151 is not set # CONFIG_SENSORS_LTC4215 is not set # CONFIG_SENSORS_LTC4222 is not set @@ -3565,11 +3726,13 @@ CONFIG_SENSORS_IIO_HWMON=y # CONFIG_SENSORS_LTC4260 is not set # CONFIG_SENSORS_LTC4261 is not set # CONFIG_SENSORS_MAX1111 is not set +# CONFIG_SENSORS_MAX127 is not set # CONFIG_SENSORS_MAX16065 is not set # CONFIG_SENSORS_MAX1619 is not set # CONFIG_SENSORS_MAX1668 is not set # CONFIG_SENSORS_MAX197 is not set # CONFIG_SENSORS_MAX31722 is not set +# CONFIG_SENSORS_MAX31730 is not set # CONFIG_SENSORS_MAX6621 is not set # CONFIG_SENSORS_MAX6639 is not set # CONFIG_SENSORS_MAX6642 is not set @@ -3578,6 +3741,8 @@ CONFIG_SENSORS_IIO_HWMON=y # CONFIG_SENSORS_MAX31790 is not set # CONFIG_SENSORS_MCP3021 is not set # CONFIG_SENSORS_TC654 is not set +# CONFIG_SENSORS_TPS23861 is not set +# CONFIG_SENSORS_MR75203 is not set # CONFIG_SENSORS_ADCXX is not set # CONFIG_SENSORS_LM63 is not set # CONFIG_SENSORS_LM70 is not set @@ -3603,14 +3768,18 @@ CONFIG_SENSORS_NTC_THERMISTOR=m # CONFIG_SENSORS_NCT7802 is not set # CONFIG_SENSORS_NCT7904 is not set # CONFIG_SENSORS_NPCM7XX is not set +# CONFIG_SENSORS_NZXT_KRAKEN2 is not set # CONFIG_SENSORS_OCC_P8_I2C is not set # CONFIG_SENSORS_PCF8591 is not set # CONFIG_PMBUS is not set CONFIG_SENSORS_PWM_FAN=m CONFIG_SENSORS_RASPBERRYPI_HWMON=m +# CONFIG_SENSORS_SBTSI is not set +# CONFIG_SENSORS_SBRMI is not set # CONFIG_SENSORS_SHT15 is not set # CONFIG_SENSORS_SHT21 is not set # CONFIG_SENSORS_SHT3x is not set +# CONFIG_SENSORS_SHT4x is not set # CONFIG_SENSORS_SHTC1 is not set # CONFIG_SENSORS_SIS5595 is not set # CONFIG_SENSORS_DME1737 is not set @@ -3638,6 +3807,7 @@ CONFIG_SENSORS_INA2XX=m # CONFIG_SENSORS_TMP108 is not set # CONFIG_SENSORS_TMP401 is not set # CONFIG_SENSORS_TMP421 is not set +# CONFIG_SENSORS_TMP513 is not set # CONFIG_SENSORS_VEXPRESS is not set # CONFIG_SENSORS_VIA686A is not set # CONFIG_SENSORS_VT1211 is not set @@ -3653,6 +3823,7 @@ CONFIG_SENSORS_INA2XX=m # CONFIG_SENSORS_W83627HF is not set # CONFIG_SENSORS_W83627EHF is not set CONFIG_THERMAL=y +# CONFIG_THERMAL_NETLINK is not set # CONFIG_THERMAL_STATISTICS is not set CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS=0 CONFIG_THERMAL_HWMON=y @@ -3661,24 +3832,25 @@ CONFIG_THERMAL_OF=y CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE=y # CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE is not set # CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE is not set -# CONFIG_THERMAL_DEFAULT_GOV_POWER_ALLOCATOR is not set # CONFIG_THERMAL_GOV_FAIR_SHARE is not set CONFIG_THERMAL_GOV_STEP_WISE=y # CONFIG_THERMAL_GOV_BANG_BANG is not set # CONFIG_THERMAL_GOV_USER_SPACE is not set # CONFIG_THERMAL_GOV_POWER_ALLOCATOR is not set CONFIG_CPU_THERMAL=y -# CONFIG_CLOCK_THERMAL is not set +CONFIG_CPU_FREQ_THERMAL=y # CONFIG_DEVFREQ_THERMAL is not set # CONFIG_THERMAL_EMULATION is not set # CONFIG_THERMAL_MMIO is not set CONFIG_IMX_THERMAL=y -# CONFIG_QORIQ_THERMAL is not set +# CONFIG_IMX8MM_THERMAL is not set +# CONFIG_SUN8I_THERMAL is not set CONFIG_ROCKCHIP_THERMAL=y # # Broadcom thermal drivers # +# CONFIG_BCM2711_THERMAL is not set CONFIG_BCM2835_THERMAL=m # end of Broadcom thermal drivers @@ -3706,6 +3878,7 @@ CONFIG_WATCHDOG_CORE=y CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED=y CONFIG_WATCHDOG_OPEN_TIMEOUT=0 # CONFIG_WATCHDOG_SYSFS is not set +# CONFIG_WATCHDOG_HRTIMER_PRETIMEOUT is not set # # Watchdog Pretimeout Governors @@ -3723,7 +3896,6 @@ CONFIG_XILINX_WATCHDOG=y CONFIG_ARM_SP805_WATCHDOG=y # CONFIG_CADENCE_WATCHDOG is not set # CONFIG_FTWDT010_WATCHDOG is not set -CONFIG_HAVE_S3C2410_WATCHDOG=y # CONFIG_S3C2410_WATCHDOG is not set CONFIG_DW_WATCHDOG=y # CONFIG_OMAP_WATCHDOG is not set @@ -3733,6 +3905,7 @@ CONFIG_SUNXI_WATCHDOG=y # CONFIG_MAX63XX_WATCHDOG is not set CONFIG_IMX2_WDT=y # CONFIG_IMX7ULP_WDT is not set +# CONFIG_ARM_SMC_WATCHDOG is not set CONFIG_STPMIC1_WATCHDOG=y # CONFIG_ALIM7101_WDT is not set # CONFIG_I6300ESB_WDT is not set @@ -3793,13 +3966,17 @@ CONFIG_MFD_DA9063=m # CONFIG_MFD_DA9150 is not set # CONFIG_MFD_DLN2 is not set # CONFIG_MFD_EXYNOS_LPASS is not set +# CONFIG_MFD_GATEWORKS_GSC is not set # CONFIG_MFD_MC13XXX_SPI is not set # CONFIG_MFD_MC13XXX_I2C is not set +# CONFIG_MFD_MP2629 is not set # CONFIG_MFD_HI6421_PMIC is not set # CONFIG_HTC_PASIC3 is not set # CONFIG_HTC_I2CPLD is not set # CONFIG_LPC_ICH is not set # CONFIG_LPC_SCH is not set +# CONFIG_MFD_INTEL_PMT is not set +# CONFIG_MFD_IQS62X is not set # CONFIG_MFD_JANZ_CMODIO is not set # CONFIG_MFD_KEMPLD is not set # CONFIG_MFD_88PM800 is not set @@ -3815,15 +3992,18 @@ CONFIG_MFD_MAX8907=y # CONFIG_MFD_MAX8925 is not set CONFIG_MFD_MAX8997=y CONFIG_MFD_MAX8998=y +# CONFIG_MFD_MT6360 is not set # CONFIG_MFD_MT6397 is not set # CONFIG_MFD_MENF21BMC is not set # CONFIG_EZX_PCAP is not set CONFIG_MFD_CPCAP=y # CONFIG_MFD_VIPERBOARD is not set +# CONFIG_MFD_NTXEC is not set # CONFIG_MFD_RETU is not set # CONFIG_MFD_PCF50633 is not set CONFIG_MFD_PM8XXX=y # CONFIG_MFD_RDC321X is not set +# CONFIG_MFD_RT4831 is not set # CONFIG_MFD_RT5033 is not set # CONFIG_MFD_RC5T583 is not set CONFIG_MFD_RK808=y @@ -3832,9 +4012,6 @@ CONFIG_MFD_SEC_CORE=y # CONFIG_MFD_SI476X_CORE is not set # CONFIG_MFD_SM501 is not set # CONFIG_MFD_SKY81452 is not set -# CONFIG_MFD_SMSC is not set -CONFIG_ABX500_CORE=y -# CONFIG_AB3100_CORE is not set CONFIG_MFD_STMPE=y # @@ -3888,10 +4065,18 @@ CONFIG_TWL4030_POWER=y CONFIG_MFD_WM8994=m # CONFIG_MFD_ROHM_BD718XX is not set # CONFIG_MFD_ROHM_BD70528 is not set +# CONFIG_MFD_ROHM_BD71828 is not set +# CONFIG_MFD_ROHM_BD957XMUF is not set CONFIG_MFD_STPMIC1=y CONFIG_MFD_STMFX=y +# CONFIG_MFD_ATC260X_I2C is not set +# CONFIG_MFD_KHADAS_MCU is not set +# CONFIG_MFD_QCOM_PM8008 is not set CONFIG_MFD_VEXPRESS_SYSREG=y # CONFIG_RAVE_SP_CORE is not set +# CONFIG_MFD_INTEL_M10_BMC is not set +# CONFIG_MFD_RSMU_I2C is not set +# CONFIG_MFD_RSMU_SPI is not set # end of Multifunction device drivers CONFIG_REGULATOR=y @@ -3909,10 +4094,13 @@ CONFIG_REGULATOR_AS3722=y CONFIG_REGULATOR_AXP20X=y CONFIG_REGULATOR_BCM590XX=y CONFIG_REGULATOR_CPCAP=y +# CONFIG_REGULATOR_CROS_EC is not set # CONFIG_REGULATOR_DA9063 is not set +# CONFIG_REGULATOR_DA9121 is not set CONFIG_REGULATOR_DA9210=y # CONFIG_REGULATOR_DA9211 is not set CONFIG_REGULATOR_FAN53555=y +# CONFIG_REGULATOR_FAN53880 is not set CONFIG_REGULATOR_GPIO=y # CONFIG_REGULATOR_ISL9305 is not set # CONFIG_REGULATOR_ISL6271A is not set @@ -3926,6 +4114,7 @@ CONFIG_REGULATOR_MAX14577=m # CONFIG_REGULATOR_MAX1586 is not set # CONFIG_REGULATOR_MAX8649 is not set # CONFIG_REGULATOR_MAX8660 is not set +# CONFIG_REGULATOR_MAX8893 is not set CONFIG_REGULATOR_MAX8907=y CONFIG_REGULATOR_MAX8952=m CONFIG_REGULATOR_MAX8973=y @@ -3934,17 +4123,31 @@ CONFIG_REGULATOR_MAX8998=m CONFIG_REGULATOR_MAX77686=y CONFIG_REGULATOR_MAX77693=m CONFIG_REGULATOR_MAX77802=y +# CONFIG_REGULATOR_MAX77826 is not set # CONFIG_REGULATOR_MCP16502 is not set +# CONFIG_REGULATOR_MP5416 is not set +# CONFIG_REGULATOR_MP8859 is not set +# CONFIG_REGULATOR_MP886X is not set +# CONFIG_REGULATOR_MPQ7920 is not set # CONFIG_REGULATOR_MT6311 is not set CONFIG_REGULATOR_PALMAS=y CONFIG_REGULATOR_PBIAS=y +# CONFIG_REGULATOR_PCA9450 is not set +# CONFIG_REGULATOR_PF8X00 is not set # CONFIG_REGULATOR_PFUZE100 is not set # CONFIG_REGULATOR_PV88060 is not set # CONFIG_REGULATOR_PV88080 is not set # CONFIG_REGULATOR_PV88090 is not set CONFIG_REGULATOR_PWM=y +# CONFIG_REGULATOR_RASPBERRYPI_TOUCHSCREEN_ATTINY is not set CONFIG_REGULATOR_RK808=y CONFIG_REGULATOR_RN5T618=y +# CONFIG_REGULATOR_RT4801 is not set +# CONFIG_REGULATOR_RT6160 is not set +# CONFIG_REGULATOR_RT6245 is not set +# CONFIG_REGULATOR_RTQ2134 is not set +# CONFIG_REGULATOR_RTMV20 is not set +# CONFIG_REGULATOR_RTQ6752 is not set # CONFIG_REGULATOR_S2MPA01 is not set CONFIG_REGULATOR_S2MPS11=y CONFIG_REGULATOR_S5M8767=y @@ -3953,6 +4156,7 @@ CONFIG_REGULATOR_STPMIC1=y CONFIG_REGULATOR_TI_ABB=y # CONFIG_REGULATOR_SY8106A is not set # CONFIG_REGULATOR_SY8824X is not set +# CONFIG_REGULATOR_SY8827N is not set CONFIG_REGULATOR_TPS51632=y CONFIG_REGULATOR_TPS62360=y # CONFIG_REGULATOR_TPS65023 is not set @@ -3968,30 +4172,74 @@ CONFIG_REGULATOR_TWL4030=y # CONFIG_REGULATOR_VCTRL is not set CONFIG_REGULATOR_VEXPRESS=y CONFIG_REGULATOR_WM8994=m -CONFIG_CEC_CORE=m -CONFIG_CEC_NOTIFIER=y # CONFIG_RC_CORE is not set +CONFIG_CEC_CORE=m +CONFIG_MEDIA_CEC_SUPPORT=y +# CONFIG_CEC_CH7322 is not set +# CONFIG_CEC_CROS_EC is not set +# CONFIG_CEC_SAMSUNG_S5P is not set +# CONFIG_USB_PULSE8_CEC is not set +# CONFIG_USB_RAINSHADOW_CEC is not set CONFIG_MEDIA_SUPPORT=m +# CONFIG_MEDIA_SUPPORT_FILTER is not set +# CONFIG_MEDIA_SUBDRV_AUTOSELECT is not set # -# Multimedia core support +# Media device types # CONFIG_MEDIA_CAMERA_SUPPORT=y -# CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set -# CONFIG_MEDIA_DIGITAL_TV_SUPPORT is not set -# CONFIG_MEDIA_RADIO_SUPPORT is not set -# CONFIG_MEDIA_SDR_SUPPORT is not set -CONFIG_MEDIA_CEC_SUPPORT=y -CONFIG_MEDIA_CONTROLLER=y +CONFIG_MEDIA_ANALOG_TV_SUPPORT=y +CONFIG_MEDIA_DIGITAL_TV_SUPPORT=y +CONFIG_MEDIA_RADIO_SUPPORT=y +CONFIG_MEDIA_SDR_SUPPORT=y +CONFIG_MEDIA_PLATFORM_SUPPORT=y +CONFIG_MEDIA_TEST_SUPPORT=y +# end of Media device types + +# +# Media core support +# CONFIG_VIDEO_DEV=m -CONFIG_VIDEO_V4L2_SUBDEV_API=y +CONFIG_MEDIA_CONTROLLER=y +CONFIG_DVB_CORE=m +# end of Media core support + +# +# Video4Linux options +# CONFIG_VIDEO_V4L2=m CONFIG_VIDEO_V4L2_I2C=y +CONFIG_VIDEO_V4L2_SUBDEV_API=y # CONFIG_VIDEO_ADV_DEBUG is not set # CONFIG_VIDEO_FIXED_MINOR_RANGES is not set +CONFIG_VIDEO_TUNER=m CONFIG_V4L2_MEM2MEM_DEV=m # CONFIG_V4L2_FLASH_LED_CLASS is not set CONFIG_V4L2_FWNODE=m +CONFIG_V4L2_ASYNC=m +# end of Video4Linux options + +# +# Media controller options +# +# CONFIG_MEDIA_CONTROLLER_DVB is not set +CONFIG_MEDIA_CONTROLLER_REQUEST_API=y + +# +# Please notice that the enabled Media controller Request API is EXPERIMENTAL +# +# end of Media controller options + +# +# Digital TV options +# +# CONFIG_DVB_MMAP is not set +CONFIG_DVB_NET=y +CONFIG_DVB_MAX_ADAPTERS=16 +CONFIG_DVB_DYNAMIC_MINORS=y +# CONFIG_DVB_DEMUX_SECTION_LOSS_LOG is not set +# CONFIG_DVB_ULE_DEBUG is not set +# end of Digital TV options # # Media drivers @@ -4060,20 +4308,75 @@ CONFIG_USB_GSPCA=m # CONFIG_USB_S2255 is not set # CONFIG_VIDEO_USBTV is not set +# +# Analog TV USB devices +# +CONFIG_VIDEO_PVRUSB2=m +CONFIG_VIDEO_PVRUSB2_SYSFS=y +CONFIG_VIDEO_PVRUSB2_DVB=y +# CONFIG_VIDEO_PVRUSB2_DEBUGIFC is not set +CONFIG_VIDEO_HDPVR=m +# CONFIG_VIDEO_STK1160_COMMON is not set +# CONFIG_VIDEO_GO7007 is not set + +# +# Analog/digital TV USB devices +# +# CONFIG_VIDEO_AU0828 is not set +# CONFIG_VIDEO_CX231XX is not set + +# +# Digital TV USB devices +# +# CONFIG_DVB_USB_V2 is not set +# CONFIG_DVB_TTUSB_BUDGET is not set +# CONFIG_DVB_TTUSB_DEC is not set +# CONFIG_SMS_USB_DRV is not set +# CONFIG_DVB_B2C2_FLEXCOP_USB is not set +# CONFIG_DVB_AS102 is not set + # # Webcam, TV (analog/digital) USB devices # # CONFIG_VIDEO_EM28XX is not set # -# USB HDMI CEC adapters +# Software defined radio USB devices # +# CONFIG_USB_AIRSPY is not set +# CONFIG_USB_HACKRF is not set +# CONFIG_USB_MSI2500 is not set # CONFIG_MEDIA_PCI_SUPPORT is not set +CONFIG_RADIO_ADAPTERS=y +# CONFIG_RADIO_SI470X is not set +# CONFIG_RADIO_SI4713 is not set +# CONFIG_USB_MR800 is not set +# CONFIG_USB_DSBR is not set +# CONFIG_RADIO_MAXIRADIO is not set +# CONFIG_RADIO_SHARK is not set +# CONFIG_RADIO_SHARK2 is not set +# CONFIG_USB_KEENE is not set +# CONFIG_USB_RAREMONO is not set +# CONFIG_USB_MA901 is not set +# CONFIG_RADIO_TEA5764 is not set +# CONFIG_RADIO_SAA7706H is not set +# CONFIG_RADIO_TEF6862 is not set +# CONFIG_RADIO_WL1273 is not set +CONFIG_VIDEO_CX2341X=m +CONFIG_VIDEO_TVEEPROM=m +CONFIG_VIDEOBUF2_CORE=m +CONFIG_VIDEOBUF2_V4L2=m +CONFIG_VIDEOBUF2_MEMOPS=m +CONFIG_VIDEOBUF2_DMA_CONTIG=m +CONFIG_VIDEOBUF2_VMALLOC=m +CONFIG_VIDEOBUF2_DMA_SG=m +CONFIG_VIDEO_V4L2_TPG=m CONFIG_V4L_PLATFORM_DRIVERS=y # CONFIG_VIDEO_CAFE_CCIC is not set # CONFIG_VIDEO_CADENCE is not set CONFIG_VIDEO_ASPEED=m # CONFIG_VIDEO_MUX is not set +# CONFIG_VIDEO_ROCKCHIP_ISP1 is not set CONFIG_VIDEO_SAMSUNG_EXYNOS4_IS=m CONFIG_VIDEO_EXYNOS4_IS_COMMON=m CONFIG_VIDEO_S5P_FIMC=m @@ -4089,14 +4392,23 @@ CONFIG_VIDEO_EXYNOS4_ISP_DMA_CAPTURE=y CONFIG_V4L_MEM2MEM_DRIVERS=y # CONFIG_VIDEO_CODA is not set # CONFIG_VIDEO_IMX_PXP is not set +# CONFIG_VIDEO_IMX8_JPEG is not set # CONFIG_VIDEO_MEM2MEM_DEINTERLACE is not set # CONFIG_VIDEO_SAMSUNG_S5P_G2D is not set CONFIG_VIDEO_SAMSUNG_S5P_JPEG=m CONFIG_VIDEO_SAMSUNG_S5P_MFC=m CONFIG_VIDEO_SAMSUNG_EXYNOS_GSC=m -# CONFIG_VIDEO_SH_VEU is not set # CONFIG_VIDEO_ROCKCHIP_RGA is not set # CONFIG_VIDEO_TI_VPE is not set +# CONFIG_VIDEO_SUN8I_DEINTERLACE is not set +# CONFIG_VIDEO_SUN8I_ROTATE is not set +# CONFIG_DVB_PLATFORM_DRIVERS is not set +# CONFIG_SDR_PLATFORM_DRIVERS is not set + +# +# MMC/SDIO DVB adapters +# +# CONFIG_SMS_SDIO_DRV is not set CONFIG_V4L_TEST_DRIVERS=y # CONFIG_VIDEO_VIMC is not set CONFIG_VIDEO_VIVID=m @@ -4104,29 +4416,13 @@ CONFIG_VIDEO_VIVID=m CONFIG_VIDEO_VIVID_MAX_DEVS=64 # CONFIG_VIDEO_VIM2M is not set # CONFIG_VIDEO_VICODEC is not set -CONFIG_CEC_PLATFORM_DRIVERS=y -# CONFIG_VIDEO_CROS_EC_CEC is not set -CONFIG_VIDEO_SAMSUNG_S5P_CEC=m - -# -# Supported MMC/SDIO adapters -# -# CONFIG_CYPRESS_FIRMWARE is not set -CONFIG_VIDEOBUF2_CORE=m -CONFIG_VIDEOBUF2_V4L2=m -CONFIG_VIDEOBUF2_MEMOPS=m -CONFIG_VIDEOBUF2_DMA_CONTIG=m -CONFIG_VIDEOBUF2_VMALLOC=m -CONFIG_VIDEO_V4L2_TPG=m - -# -# Media ancillary drivers (tuners, sensors, i2c, spi, frontends) -# -# CONFIG_MEDIA_SUBDRV_AUTOSELECT is not set +# CONFIG_DVB_TEST_DRIVERS is not set +# end of Media drivers # -# I2C Encoders, decoders, sensors and other helper chips +# Media ancillary drivers # +CONFIG_MEDIA_ATTACH=y # # Audio decoders, processors and mixers @@ -4137,21 +4433,23 @@ CONFIG_VIDEO_V4L2_TPG=m # CONFIG_VIDEO_TDA1997X is not set # CONFIG_VIDEO_TEA6415C is not set # CONFIG_VIDEO_TEA6420 is not set -# CONFIG_VIDEO_MSP3400 is not set +CONFIG_VIDEO_MSP3400=m # CONFIG_VIDEO_CS3308 is not set # CONFIG_VIDEO_CS5345 is not set -# CONFIG_VIDEO_CS53L32A is not set +CONFIG_VIDEO_CS53L32A=m # CONFIG_VIDEO_TLV320AIC23B is not set # CONFIG_VIDEO_UDA1342 is not set -# CONFIG_VIDEO_WM8775 is not set +CONFIG_VIDEO_WM8775=m # CONFIG_VIDEO_WM8739 is not set # CONFIG_VIDEO_VP27SMPX is not set # CONFIG_VIDEO_SONY_BTF_MPX is not set +# end of Audio decoders, processors and mixers # # RDS decoders # # CONFIG_VIDEO_SAA6588 is not set +# end of RDS decoders # # Video decoders @@ -4167,7 +4465,7 @@ CONFIG_VIDEO_ADV7180=m # CONFIG_VIDEO_KS0127 is not set CONFIG_VIDEO_ML86V7667=m # CONFIG_VIDEO_SAA7110 is not set -# CONFIG_VIDEO_SAA711X is not set +CONFIG_VIDEO_SAA711X=m # CONFIG_VIDEO_TC358743 is not set # CONFIG_VIDEO_TVP514X is not set # CONFIG_VIDEO_TVP5150 is not set @@ -4177,12 +4475,14 @@ CONFIG_VIDEO_ML86V7667=m # CONFIG_VIDEO_TW9906 is not set # CONFIG_VIDEO_TW9910 is not set # CONFIG_VIDEO_VPX3220 is not set +# CONFIG_VIDEO_MAX9286 is not set # # Video and audio decoders # # CONFIG_VIDEO_SAA717X is not set -# CONFIG_VIDEO_CX25840 is not set +CONFIG_VIDEO_CX25840=m +# end of Video decoders # # Video encoders @@ -4196,15 +4496,52 @@ CONFIG_VIDEO_ML86V7667=m # CONFIG_VIDEO_AD9389B is not set # CONFIG_VIDEO_AK881X is not set # CONFIG_VIDEO_THS8200 is not set +# end of Video encoders + +# +# Video improvement chips +# +# CONFIG_VIDEO_UPD64031A is not set +# CONFIG_VIDEO_UPD64083 is not set +# end of Video improvement chips + +# +# Audio/Video compression chips +# +# CONFIG_VIDEO_SAA6752HS is not set +# end of Audio/Video compression chips + +# +# SDR tuner chips +# +# CONFIG_SDR_MAX2175 is not set +# end of SDR tuner chips + +# +# Miscellaneous helper chips +# +# CONFIG_VIDEO_THS7303 is not set +# CONFIG_VIDEO_M52790 is not set +# CONFIG_VIDEO_I2C is not set +# CONFIG_VIDEO_ST_MIPID02 is not set +# end of Miscellaneous helper chips # # Camera sensor devices # +# CONFIG_VIDEO_HI556 is not set +# CONFIG_VIDEO_IMX208 is not set # CONFIG_VIDEO_IMX214 is not set +# CONFIG_VIDEO_IMX219 is not set # CONFIG_VIDEO_IMX258 is not set # CONFIG_VIDEO_IMX274 is not set +# CONFIG_VIDEO_IMX290 is not set # CONFIG_VIDEO_IMX319 is not set +# CONFIG_VIDEO_IMX334 is not set +# CONFIG_VIDEO_IMX335 is not set # CONFIG_VIDEO_IMX355 is not set +# CONFIG_VIDEO_IMX412 is not set +# CONFIG_VIDEO_OV02A10 is not set # CONFIG_VIDEO_OV2640 is not set # CONFIG_VIDEO_OV2659 is not set # CONFIG_VIDEO_OV2680 is not set @@ -4212,6 +4549,7 @@ CONFIG_VIDEO_ML86V7667=m # CONFIG_VIDEO_OV5640 is not set # CONFIG_VIDEO_OV5645 is not set # CONFIG_VIDEO_OV5647 is not set +# CONFIG_VIDEO_OV5648 is not set # CONFIG_VIDEO_OV6650 is not set # CONFIG_VIDEO_OV5670 is not set # CONFIG_VIDEO_OV5675 is not set @@ -4222,6 +4560,8 @@ CONFIG_VIDEO_ML86V7667=m # CONFIG_VIDEO_OV7670 is not set # CONFIG_VIDEO_OV7740 is not set # CONFIG_VIDEO_OV8856 is not set +# CONFIG_VIDEO_OV8865 is not set +# CONFIG_VIDEO_OV9282 is not set # CONFIG_VIDEO_OV9640 is not set # CONFIG_VIDEO_OV9650 is not set # CONFIG_VIDEO_OV13858 is not set @@ -4238,14 +4578,17 @@ CONFIG_VIDEO_ML86V7667=m # CONFIG_VIDEO_SR030PC30 is not set # CONFIG_VIDEO_NOON010PC30 is not set # CONFIG_VIDEO_M5MOLS is not set +# CONFIG_VIDEO_RDACM20 is not set +# CONFIG_VIDEO_RDACM21 is not set # CONFIG_VIDEO_RJ54N1 is not set # CONFIG_VIDEO_S5K6AA is not set # CONFIG_VIDEO_S5K6A3 is not set # CONFIG_VIDEO_S5K4ECGX is not set # CONFIG_VIDEO_S5K5BAF is not set -# CONFIG_VIDEO_SMIAPP is not set +# CONFIG_VIDEO_CCS is not set # CONFIG_VIDEO_ET8EK8 is not set # CONFIG_VIDEO_S5C73M3 is not set +# end of Camera sensor devices # # Lens drivers @@ -4253,7 +4596,9 @@ CONFIG_VIDEO_ML86V7667=m # CONFIG_VIDEO_AD5820 is not set # CONFIG_VIDEO_AK7375 is not set # CONFIG_VIDEO_DW9714 is not set +# CONFIG_VIDEO_DW9768 is not set # CONFIG_VIDEO_DW9807_VCM is not set +# end of Lens drivers # # Flash devices @@ -4261,30 +4606,7 @@ CONFIG_VIDEO_ML86V7667=m # CONFIG_VIDEO_ADP1653 is not set # CONFIG_VIDEO_LM3560 is not set # CONFIG_VIDEO_LM3646 is not set - -# -# Video improvement chips -# -# CONFIG_VIDEO_UPD64031A is not set -# CONFIG_VIDEO_UPD64083 is not set - -# -# Audio/Video compression chips -# -# CONFIG_VIDEO_SAA6752HS is not set - -# -# SDR tuner chips -# - -# -# Miscellaneous helper chips -# -# CONFIG_VIDEO_THS7303 is not set -# CONFIG_VIDEO_M52790 is not set -# CONFIG_VIDEO_I2C is not set -# CONFIG_VIDEO_ST_MIPID02 is not set -# end of I2C Encoders, decoders, sensors and other helper chips +# end of Flash devices # # SPI helper chips @@ -4295,16 +4617,221 @@ CONFIG_VIDEO_ML86V7667=m # # Media SPI Adapters # +CONFIG_CXD2880_SPI_DRV=m # end of Media SPI Adapters +CONFIG_MEDIA_TUNER=m + +# +# Customize TV tuners +# +CONFIG_MEDIA_TUNER_SIMPLE=m +CONFIG_MEDIA_TUNER_TDA18250=m +CONFIG_MEDIA_TUNER_TDA8290=m +CONFIG_MEDIA_TUNER_TDA827X=m +CONFIG_MEDIA_TUNER_TDA18271=m +CONFIG_MEDIA_TUNER_TDA9887=m +CONFIG_MEDIA_TUNER_TEA5761=m +CONFIG_MEDIA_TUNER_TEA5767=m +CONFIG_MEDIA_TUNER_MSI001=m +CONFIG_MEDIA_TUNER_MT20XX=m +CONFIG_MEDIA_TUNER_MT2060=m +CONFIG_MEDIA_TUNER_MT2063=m +CONFIG_MEDIA_TUNER_MT2266=m +CONFIG_MEDIA_TUNER_MT2131=m +CONFIG_MEDIA_TUNER_QT1010=m +CONFIG_MEDIA_TUNER_XC2028=m +CONFIG_MEDIA_TUNER_XC5000=m +CONFIG_MEDIA_TUNER_XC4000=m +CONFIG_MEDIA_TUNER_MXL5005S=m +CONFIG_MEDIA_TUNER_MXL5007T=m +CONFIG_MEDIA_TUNER_MC44S803=m +CONFIG_MEDIA_TUNER_MAX2165=m +CONFIG_MEDIA_TUNER_TDA18218=m +CONFIG_MEDIA_TUNER_FC0011=m +CONFIG_MEDIA_TUNER_FC0012=m +CONFIG_MEDIA_TUNER_FC0013=m +CONFIG_MEDIA_TUNER_TDA18212=m +CONFIG_MEDIA_TUNER_E4000=m +CONFIG_MEDIA_TUNER_FC2580=m +CONFIG_MEDIA_TUNER_M88RS6000T=m +CONFIG_MEDIA_TUNER_TUA9001=m +CONFIG_MEDIA_TUNER_SI2157=m +CONFIG_MEDIA_TUNER_IT913X=m +CONFIG_MEDIA_TUNER_R820T=m +CONFIG_MEDIA_TUNER_MXL301RF=m +CONFIG_MEDIA_TUNER_QM1D1C0042=m +CONFIG_MEDIA_TUNER_QM1D1B0004=m +# end of Customize TV tuners + # # Customise DVB Frontends # +# +# Multistandard (satellite) frontends +# +CONFIG_DVB_STB0899=m +CONFIG_DVB_STB6100=m +CONFIG_DVB_STV090x=m +CONFIG_DVB_STV0910=m +CONFIG_DVB_STV6110x=m +CONFIG_DVB_STV6111=m +CONFIG_DVB_MXL5XX=m +CONFIG_DVB_M88DS3103=m + +# +# Multistandard (cable + terrestrial) frontends +# +CONFIG_DVB_DRXK=m +CONFIG_DVB_TDA18271C2DD=m +CONFIG_DVB_SI2165=m +CONFIG_DVB_MN88472=m +CONFIG_DVB_MN88473=m + +# +# DVB-S (satellite) frontends +# +CONFIG_DVB_CX24110=m +CONFIG_DVB_CX24123=m +CONFIG_DVB_MT312=m +CONFIG_DVB_ZL10036=m +CONFIG_DVB_ZL10039=m +CONFIG_DVB_S5H1420=m +CONFIG_DVB_STV0288=m +CONFIG_DVB_STB6000=m +CONFIG_DVB_STV0299=m +CONFIG_DVB_STV6110=m +CONFIG_DVB_STV0900=m +CONFIG_DVB_TDA8083=m +CONFIG_DVB_TDA10086=m +CONFIG_DVB_TDA8261=m +CONFIG_DVB_VES1X93=m +CONFIG_DVB_TUNER_ITD1000=m +CONFIG_DVB_TUNER_CX24113=m +CONFIG_DVB_TDA826X=m +CONFIG_DVB_TUA6100=m +CONFIG_DVB_CX24116=m +CONFIG_DVB_CX24117=m +CONFIG_DVB_CX24120=m +CONFIG_DVB_SI21XX=m +CONFIG_DVB_TS2020=m +CONFIG_DVB_DS3000=m +CONFIG_DVB_MB86A16=m +CONFIG_DVB_TDA10071=m + +# +# DVB-T (terrestrial) frontends +# +CONFIG_DVB_SP887X=m +CONFIG_DVB_CX22700=m +CONFIG_DVB_CX22702=m +CONFIG_DVB_S5H1432=m +CONFIG_DVB_DRXD=m +CONFIG_DVB_L64781=m +CONFIG_DVB_TDA1004X=m +CONFIG_DVB_NXT6000=m +CONFIG_DVB_MT352=m +CONFIG_DVB_ZL10353=m +CONFIG_DVB_DIB3000MB=m +CONFIG_DVB_DIB3000MC=m +CONFIG_DVB_DIB7000M=m +CONFIG_DVB_DIB7000P=m +CONFIG_DVB_DIB9000=m +CONFIG_DVB_TDA10048=m +CONFIG_DVB_AF9013=m +CONFIG_DVB_EC100=m +CONFIG_DVB_STV0367=m +CONFIG_DVB_CXD2820R=m +CONFIG_DVB_CXD2841ER=m +CONFIG_DVB_RTL2830=m +CONFIG_DVB_RTL2832=m +CONFIG_DVB_RTL2832_SDR=m +CONFIG_DVB_SI2168=m +CONFIG_DVB_ZD1301_DEMOD=m +CONFIG_DVB_CXD2880=m + +# +# DVB-C (cable) frontends +# +CONFIG_DVB_VES1820=m +CONFIG_DVB_TDA10021=m +CONFIG_DVB_TDA10023=m +CONFIG_DVB_STV0297=m + +# +# ATSC (North American/Korean Terrestrial/Cable DTV) frontends +# +CONFIG_DVB_NXT200X=m +CONFIG_DVB_OR51211=m +CONFIG_DVB_OR51132=m +CONFIG_DVB_BCM3510=m +CONFIG_DVB_LGDT330X=m +CONFIG_DVB_LGDT3305=m +CONFIG_DVB_LGDT3306A=m +CONFIG_DVB_LG2160=m +CONFIG_DVB_S5H1409=m +CONFIG_DVB_AU8522=m +CONFIG_DVB_AU8522_DTV=m +CONFIG_DVB_AU8522_V4L=m +CONFIG_DVB_S5H1411=m +CONFIG_DVB_MXL692=m + +# +# ISDB-T (terrestrial) frontends +# +CONFIG_DVB_S921=m +CONFIG_DVB_DIB8000=m +CONFIG_DVB_MB86A20S=m + +# +# ISDB-S (satellite) & ISDB-T (terrestrial) frontends +# +CONFIG_DVB_TC90522=m +CONFIG_DVB_MN88443X=m + +# +# Digital terrestrial only tuners/PLL +# +CONFIG_DVB_PLL=m +CONFIG_DVB_TUNER_DIB0070=m +CONFIG_DVB_TUNER_DIB0090=m + +# +# SEC control devices for DVB-S +# +CONFIG_DVB_DRX39XYJ=m +CONFIG_DVB_LNBH25=m +CONFIG_DVB_LNBH29=m +CONFIG_DVB_LNBP21=m +CONFIG_DVB_LNBP22=m +CONFIG_DVB_ISL6405=m +CONFIG_DVB_ISL6421=m +CONFIG_DVB_ISL6423=m +CONFIG_DVB_A8293=m +CONFIG_DVB_LGS8GL5=m +CONFIG_DVB_LGS8GXX=m +CONFIG_DVB_ATBM8830=m +CONFIG_DVB_TDA665x=m +CONFIG_DVB_IX2505V=m +CONFIG_DVB_M88RS2000=m +CONFIG_DVB_AF9033=m +CONFIG_DVB_HORUS3A=m +CONFIG_DVB_ASCOT2E=m +CONFIG_DVB_HELENE=m + +# +# Common Interface (EN50221) controller drivers +# +CONFIG_DVB_CXD2099=m +CONFIG_DVB_SP2=m +# end of Customise DVB Frontends + # # Tools to develop new frontends # -# end of Customise DVB Frontends +# CONFIG_DVB_DUMMY_FE is not set +# end of Media ancillary drivers # # Graphics support @@ -4314,21 +4841,22 @@ CONFIG_VGA_ARB_MAX_GPUS=16 # CONFIG_IMX_IPUV3_CORE is not set CONFIG_DRM=y CONFIG_DRM_MIPI_DSI=y +CONFIG_DRM_DP_AUX_BUS=y # CONFIG_DRM_DP_AUX_CHARDEV is not set # CONFIG_DRM_DEBUG_MM is not set # CONFIG_DRM_DEBUG_SELFTEST is not set CONFIG_DRM_KMS_HELPER=y -CONFIG_DRM_KMS_FB_HELPER=y +# CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS is not set CONFIG_DRM_FBDEV_EMULATION=y CONFIG_DRM_FBDEV_OVERALLOC=100 # CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM is not set # CONFIG_DRM_LOAD_EDID_FIRMWARE is not set # CONFIG_DRM_DP_CEC is not set CONFIG_DRM_TTM=m +CONFIG_DRM_TTM_HELPER=m CONFIG_DRM_GEM_CMA_HELPER=y CONFIG_DRM_KMS_CMA_HELPER=y CONFIG_DRM_GEM_SHMEM_HELPER=y -CONFIG_DRM_VM=y CONFIG_DRM_SCHED=m # @@ -4350,21 +4878,15 @@ CONFIG_DRM_SCHED=m # CONFIG_DRM_RADEON is not set # CONFIG_DRM_AMDGPU is not set - -# -# ACP (Audio CoProcessor) Configuration -# -# end of ACP (Audio CoProcessor) Configuration - CONFIG_DRM_NOUVEAU=m # CONFIG_NOUVEAU_LEGACY_CTX_SUPPORT is not set CONFIG_NOUVEAU_DEBUG=5 CONFIG_NOUVEAU_DEBUG_DEFAULT=3 # CONFIG_NOUVEAU_DEBUG_MMU is not set +# CONFIG_NOUVEAU_DEBUG_PUSH is not set CONFIG_DRM_NOUVEAU_BACKLIGHT=y # CONFIG_DRM_VGEM is not set # CONFIG_DRM_VKMS is not set -CONFIG_DRM_ATI_PCIGART=y CONFIG_DRM_EXYNOS=m # @@ -4403,7 +4925,6 @@ CONFIG_ROCKCHIP_INNO_HDMI=y # CONFIG_DRM_UDL is not set # CONFIG_DRM_AST is not set # CONFIG_DRM_MGAG200 is not set -# CONFIG_DRM_CIRRUS_QEMU is not set # CONFIG_DRM_ARMADA is not set CONFIG_DRM_ATMEL_HLCDC=m # CONFIG_DRM_RCAR_DW_HDMI is not set @@ -4419,7 +4940,6 @@ CONFIG_DRM_SUN8I_TCON_TOP=m # CONFIG_DRM_OMAP is not set # CONFIG_DRM_TILCDC is not set # CONFIG_DRM_QXL is not set -# CONFIG_DRM_BOCHS is not set CONFIG_DRM_VIRTIO_GPU=m CONFIG_DRM_FSL_DCU=m CONFIG_DRM_STM=m @@ -4429,20 +4949,35 @@ CONFIG_DRM_PANEL=y # # Display Panels # +# CONFIG_DRM_PANEL_ABT_Y030XX067A is not set # CONFIG_DRM_PANEL_ARM_VERSATILE is not set +# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set +# CONFIG_DRM_PANEL_BOE_HIMAX8279D is not set +# CONFIG_DRM_PANEL_BOE_TV101WUM_NL6 is not set +# CONFIG_DRM_PANEL_DSI_CM is not set # CONFIG_DRM_PANEL_LVDS is not set CONFIG_DRM_PANEL_SIMPLE=y +# CONFIG_DRM_PANEL_ELIDA_KD35T133 is not set +# CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02 is not set # CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set # CONFIG_DRM_PANEL_ILITEK_IL9322 is not set +# CONFIG_DRM_PANEL_ILITEK_ILI9341 is not set # CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set +# CONFIG_DRM_PANEL_INNOLUX_EJ030NA is not set # CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set # CONFIG_DRM_PANEL_JDI_LT070ME05000 is not set +# CONFIG_DRM_PANEL_KHADAS_TS050 is not set # CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04 is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829 is not set CONFIG_DRM_PANEL_SAMSUNG_LD9040=m # CONFIG_DRM_PANEL_LG_LB035Q02 is not set # CONFIG_DRM_PANEL_LG_LG4573 is not set # CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35510 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT36672A is not set # CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set +# CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set # CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set CONFIG_DRM_PANEL_ORISETECH_OTM8009A=m # CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS is not set @@ -4450,24 +4985,33 @@ CONFIG_DRM_PANEL_ORISETECH_OTM8009A=m # CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set # CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set CONFIG_DRM_PANEL_RAYDIUM_RM68200=m -# CONFIG_DRM_PANEL_ROCKTECH_JH057N00900 is not set # CONFIG_DRM_PANEL_RONBO_RB070D30 is not set +# CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 is not set +# CONFIG_DRM_PANEL_SAMSUNG_DB7430 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2 is not set CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03=m # CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 is not set CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0=m +# CONFIG_DRM_PANEL_SAMSUNG_SOFEF00 is not set # CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set # CONFIG_DRM_PANEL_SHARP_LQ101R1SX01 is not set # CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set # CONFIG_DRM_PANEL_SHARP_LS043T1LE01 is not set # CONFIG_DRM_PANEL_SITRONIX_ST7701 is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set # CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set +# CONFIG_DRM_PANEL_SONY_ACX424AKP is not set # CONFIG_DRM_PANEL_SONY_ACX565AKM is not set +# CONFIG_DRM_PANEL_TDO_TL070WSH30 is not set # CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set # CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set # CONFIG_DRM_PANEL_TPO_TPG110 is not set # CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set +# CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set +# CONFIG_DRM_PANEL_WIDECHIPS_WS2401 is not set +# CONFIG_DRM_PANEL_XINPENG_XPP055C272 is not set # end of Display Panels CONFIG_DRM_BRIDGE=y @@ -4476,26 +5020,43 @@ CONFIG_DRM_PANEL_BRIDGE=y # # Display Interface Bridges # -# CONFIG_DRM_ANALOGIX_ANX78XX is not set # CONFIG_DRM_CDNS_DSI is not set -CONFIG_DRM_DUMB_VGA_DAC=m -# CONFIG_DRM_LVDS_ENCODER is not set +# CONFIG_DRM_CHIPONE_ICN6211 is not set +# CONFIG_DRM_CHRONTEL_CH7033 is not set +# CONFIG_DRM_CROS_EC_ANX7688 is not set +# CONFIG_DRM_DISPLAY_CONNECTOR is not set +# CONFIG_DRM_LONTIUM_LT8912B is not set +# CONFIG_DRM_LONTIUM_LT9611 is not set +# CONFIG_DRM_LONTIUM_LT9611UXC is not set +# CONFIG_DRM_ITE_IT66121 is not set +# CONFIG_DRM_LVDS_CODEC is not set # CONFIG_DRM_MEGACHIPS_STDPXXXX_GE_B850V3_FW is not set +# CONFIG_DRM_NWL_MIPI_DSI is not set CONFIG_DRM_NXP_PTN3460=m CONFIG_DRM_PARADE_PS8622=m +# CONFIG_DRM_PARADE_PS8640 is not set # CONFIG_DRM_SIL_SII8620 is not set CONFIG_DRM_SII902X=m CONFIG_DRM_SII9234=m +# CONFIG_DRM_SIMPLE_BRIDGE is not set # CONFIG_DRM_THINE_THC63LVD1024 is not set +# CONFIG_DRM_TOSHIBA_TC358762 is not set CONFIG_DRM_TOSHIBA_TC358764=m # CONFIG_DRM_TOSHIBA_TC358767 is not set +# CONFIG_DRM_TOSHIBA_TC358768 is not set +# CONFIG_DRM_TOSHIBA_TC358775 is not set # CONFIG_DRM_TI_TFP410 is not set +# CONFIG_DRM_TI_SN65DSI83 is not set # CONFIG_DRM_TI_SN65DSI86 is not set +# CONFIG_DRM_TI_TPD12S015 is not set +# CONFIG_DRM_ANALOGIX_ANX6345 is not set +# CONFIG_DRM_ANALOGIX_ANX78XX is not set CONFIG_DRM_ANALOGIX_DP=m +# CONFIG_DRM_ANALOGIX_ANX7625 is not set CONFIG_DRM_I2C_ADV7511=m CONFIG_DRM_I2C_ADV7511_AUDIO=y -CONFIG_DRM_I2C_ADV7533=y CONFIG_DRM_I2C_ADV7511_CEC=y +# CONFIG_DRM_CDNS_MHDP8546 is not set CONFIG_DRM_DW_HDMI=m # CONFIG_DRM_DW_HDMI_AHB_AUDIO is not set # CONFIG_DRM_DW_HDMI_I2S_AUDIO is not set @@ -4509,23 +5070,29 @@ CONFIG_DRM_VC4=m # CONFIG_DRM_VC4_HDMI_CEC is not set CONFIG_DRM_ETNAVIV=m CONFIG_DRM_ETNAVIV_THERMAL=y -# CONFIG_DRM_ARCPGU is not set CONFIG_DRM_MXS=y CONFIG_DRM_MXSFB=m +# CONFIG_DRM_ARCPGU is not set +# CONFIG_DRM_BOCHS is not set +# CONFIG_DRM_CIRRUS_QEMU is not set # CONFIG_DRM_GM12U320 is not set +# CONFIG_DRM_SIMPLEDRM is not set # CONFIG_TINYDRM_HX8357D is not set # CONFIG_TINYDRM_ILI9225 is not set # CONFIG_TINYDRM_ILI9341 is not set +# CONFIG_TINYDRM_ILI9486 is not set # CONFIG_TINYDRM_MI0283QT is not set # CONFIG_TINYDRM_REPAPER is not set # CONFIG_TINYDRM_ST7586 is not set # CONFIG_TINYDRM_ST7735R is not set CONFIG_DRM_PL111=m # CONFIG_DRM_TVE200 is not set -# CONFIG_DRM_XEN is not set +# CONFIG_DRM_XEN_FRONTEND is not set CONFIG_DRM_LIMA=m CONFIG_DRM_PANFROST=m # CONFIG_DRM_MCDE is not set +# CONFIG_DRM_TIDSS is not set +# CONFIG_DRM_GUD is not set CONFIG_DRM_LEGACY=y # CONFIG_DRM_TDFX is not set # CONFIG_DRM_R128 is not set @@ -4607,9 +5174,9 @@ CONFIG_FB_SIMPLE=y # # CONFIG_LCD_CLASS_DEVICE is not set CONFIG_BACKLIGHT_CLASS_DEVICE=y -CONFIG_BACKLIGHT_GENERIC=y +# CONFIG_BACKLIGHT_KTD253 is not set CONFIG_BACKLIGHT_PWM=y -# CONFIG_BACKLIGHT_PM8941_WLED is not set +# CONFIG_BACKLIGHT_QCOM_WLED is not set # CONFIG_BACKLIGHT_ADP8860 is not set # CONFIG_BACKLIGHT_ADP8870 is not set # CONFIG_BACKLIGHT_LM3630A is not set @@ -4622,6 +5189,7 @@ CONFIG_BACKLIGHT_GPIO=y # CONFIG_BACKLIGHT_LV5207LP is not set # CONFIG_BACKLIGHT_BD6107 is not set # CONFIG_BACKLIGHT_ARCXCNN is not set +# CONFIG_BACKLIGHT_LED is not set # end of Backlight & LCD device support CONFIG_VIDEOMODE_HELPERS=y @@ -4632,6 +5200,7 @@ CONFIG_HDMI=y # CONFIG_DUMMY_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE=y +# CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION is not set CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y # CONFIG_FRAMEBUFFER_CONSOLE_DEFERRED_TAKEOVER is not set @@ -4756,10 +5325,14 @@ CONFIG_SND_USB_AUDIO_USE_MEDIA_CONTROLLER=y # CONFIG_SND_USB_VARIAX is not set CONFIG_SND_SOC=m CONFIG_SND_SOC_GENERIC_DMAENGINE_PCM=y +CONFIG_SND_SOC_ADI=m +# CONFIG_SND_SOC_ADI_AXI_I2S is not set +# CONFIG_SND_SOC_ADI_AXI_SPDIF is not set # CONFIG_SND_SOC_AMD_ACP is not set CONFIG_SND_ATMEL_SOC=m # CONFIG_SND_SOC_MIKROE_PROTO is not set CONFIG_SND_BCM2835_SOC_I2S=m +# CONFIG_SND_BCM63XX_I2S_WHISTLER is not set # CONFIG_SND_DESIGNWARE_I2S is not set # @@ -4771,11 +5344,15 @@ CONFIG_SND_BCM2835_SOC_I2S=m # # CONFIG_SND_SOC_FSL_ASRC is not set CONFIG_SND_SOC_FSL_SAI=m +# CONFIG_SND_SOC_FSL_MQS is not set # CONFIG_SND_SOC_FSL_AUDMIX is not set # CONFIG_SND_SOC_FSL_SSI is not set # CONFIG_SND_SOC_FSL_SPDIF is not set # CONFIG_SND_SOC_FSL_ESAI is not set # CONFIG_SND_SOC_FSL_MICFIL is not set +# CONFIG_SND_SOC_FSL_XCVR is not set +# CONFIG_SND_SOC_FSL_AUD2HTX is not set +# CONFIG_SND_SOC_FSL_RPMSG is not set # CONFIG_SND_SOC_IMX_AUDMUX is not set # CONFIG_SND_IMX_SOC is not set # end of SoC Audio for Freescale CPUs @@ -4800,7 +5377,9 @@ CONFIG_SND_SOC_SAMSUNG_SMDK_WM8994=m CONFIG_SND_SOC_SMDK_WM8994_PCM=m CONFIG_SND_SOC_SNOW=m CONFIG_SND_SOC_ODROID=m -# CONFIG_SND_SOC_ARNDALE_RT5631_ALC5631 is not set +# CONFIG_SND_SOC_ARNDALE is not set +# CONFIG_SND_SOC_SAMSUNG_ARIES_WM8994 is not set +# CONFIG_SND_SOC_SAMSUNG_MIDAS_WM1811 is not set # CONFIG_SND_SOC_SOF_TOPLEVEL is not set # @@ -4843,7 +5422,6 @@ CONFIG_SND_SUN8I_ADDA_PR_REGMAP=m # CONFIG_SND_SOC_XILINX_AUDIO_FORMATTER is not set # CONFIG_SND_SOC_XILINX_SPDIF is not set # CONFIG_SND_SOC_XTFPGA_I2S is not set -# CONFIG_ZX_TDM is not set CONFIG_SND_SOC_I2C_AND_SPI=m # @@ -4851,10 +5429,14 @@ CONFIG_SND_SOC_I2C_AND_SPI=m # CONFIG_SND_SOC_WM_HUBS=m # CONFIG_SND_SOC_AC97_CODEC is not set +# CONFIG_SND_SOC_ADAU1372_I2C is not set +# CONFIG_SND_SOC_ADAU1372_SPI is not set # CONFIG_SND_SOC_ADAU1701 is not set # CONFIG_SND_SOC_ADAU1761_I2C is not set # CONFIG_SND_SOC_ADAU1761_SPI is not set # CONFIG_SND_SOC_ADAU7002 is not set +# CONFIG_SND_SOC_ADAU7118_HW is not set +# CONFIG_SND_SOC_ADAU7118_I2C is not set # CONFIG_SND_SOC_AK4104 is not set # CONFIG_SND_SOC_AK4118 is not set # CONFIG_SND_SOC_AK4458 is not set @@ -4878,6 +5460,7 @@ CONFIG_SND_SOC_CPCAP=m # CONFIG_SND_SOC_CS42L52 is not set # CONFIG_SND_SOC_CS42L56 is not set # CONFIG_SND_SOC_CS42L73 is not set +# CONFIG_SND_SOC_CS4234 is not set # CONFIG_SND_SOC_CS4265 is not set # CONFIG_SND_SOC_CS4270 is not set # CONFIG_SND_SOC_CS4271_I2C is not set @@ -4888,6 +5471,7 @@ CONFIG_SND_SOC_CPCAP=m # CONFIG_SND_SOC_CS4349 is not set # CONFIG_SND_SOC_CS53L30 is not set # CONFIG_SND_SOC_CX2072X is not set +# CONFIG_SND_SOC_DA7213 is not set # CONFIG_SND_SOC_DMIC is not set CONFIG_SND_SOC_HDMI_CODEC=m # CONFIG_SND_SOC_ES7134 is not set @@ -4896,6 +5480,7 @@ CONFIG_SND_SOC_HDMI_CODEC=m # CONFIG_SND_SOC_ES8328_I2C is not set # CONFIG_SND_SOC_ES8328_SPI is not set # CONFIG_SND_SOC_GTM601 is not set +# CONFIG_SND_SOC_ICS43432 is not set # CONFIG_SND_SOC_INNO_RK3036 is not set # CONFIG_SND_SOC_MAX98088 is not set CONFIG_SND_SOC_MAX98090=m @@ -4904,7 +5489,8 @@ CONFIG_SND_SOC_MAX98095=m # CONFIG_SND_SOC_MAX98504 is not set # CONFIG_SND_SOC_MAX9867 is not set # CONFIG_SND_SOC_MAX98927 is not set -# CONFIG_SND_SOC_MAX98373 is not set +# CONFIG_SND_SOC_MAX98373_I2C is not set +# CONFIG_SND_SOC_MAX98390 is not set # CONFIG_SND_SOC_MAX9860 is not set # CONFIG_SND_SOC_MSM8916_WCD_DIGITAL is not set # CONFIG_SND_SOC_PCM1681 is not set @@ -4917,18 +5503,23 @@ CONFIG_SND_SOC_MAX98095=m # CONFIG_SND_SOC_PCM3060_SPI is not set # CONFIG_SND_SOC_PCM3168A_I2C is not set # CONFIG_SND_SOC_PCM3168A_SPI is not set +# CONFIG_SND_SOC_PCM5102A is not set # CONFIG_SND_SOC_PCM512x_I2C is not set # CONFIG_SND_SOC_PCM512x_SPI is not set # CONFIG_SND_SOC_RK3328 is not set +# CONFIG_SND_SOC_RK817 is not set CONFIG_SND_SOC_RL6231=m # CONFIG_SND_SOC_RT5616 is not set # CONFIG_SND_SOC_RT5631 is not set +# CONFIG_SND_SOC_RT5640 is not set CONFIG_SND_SOC_RT5645=m +# CONFIG_SND_SOC_RT5659 is not set CONFIG_SND_SOC_SGTL5000=m # CONFIG_SND_SOC_SIMPLE_AMPLIFIER is not set -# CONFIG_SND_SOC_SIRF_AUDIO_CODEC is not set +# CONFIG_SND_SOC_SIMPLE_MUX is not set CONFIG_SND_SOC_SPDIF=m # CONFIG_SND_SOC_SSM2305 is not set +# CONFIG_SND_SOC_SSM2518 is not set # CONFIG_SND_SOC_SSM2602_SPI is not set # CONFIG_SND_SOC_SSM2602_I2C is not set # CONFIG_SND_SOC_SSM4567 is not set @@ -4936,19 +5527,25 @@ CONFIG_SND_SOC_SPDIF=m # CONFIG_SND_SOC_STA350 is not set CONFIG_SND_SOC_STI_SAS=m # CONFIG_SND_SOC_TAS2552 is not set +# CONFIG_SND_SOC_TAS2562 is not set +# CONFIG_SND_SOC_TAS2764 is not set +# CONFIG_SND_SOC_TAS2770 is not set # CONFIG_SND_SOC_TAS5086 is not set # CONFIG_SND_SOC_TAS571X is not set # CONFIG_SND_SOC_TAS5720 is not set # CONFIG_SND_SOC_TAS6424 is not set # CONFIG_SND_SOC_TDA7419 is not set # CONFIG_SND_SOC_TFA9879 is not set +# CONFIG_SND_SOC_TFA989X is not set CONFIG_SND_SOC_TLV320AIC23=m CONFIG_SND_SOC_TLV320AIC23_I2C=m # CONFIG_SND_SOC_TLV320AIC23_SPI is not set # CONFIG_SND_SOC_TLV320AIC31XX is not set # CONFIG_SND_SOC_TLV320AIC32X4_I2C is not set # CONFIG_SND_SOC_TLV320AIC32X4_SPI is not set -# CONFIG_SND_SOC_TLV320AIC3X is not set +# CONFIG_SND_SOC_TLV320AIC3X_I2C is not set +# CONFIG_SND_SOC_TLV320AIC3X_SPI is not set +# CONFIG_SND_SOC_TLV320ADCX140 is not set CONFIG_SND_SOC_TS3A227E=m # CONFIG_SND_SOC_TSCS42XX is not set # CONFIG_SND_SOC_TSCS454 is not set @@ -4977,21 +5574,28 @@ CONFIG_SND_SOC_WM8904=m CONFIG_SND_SOC_WM8978=m # CONFIG_SND_SOC_WM8985 is not set CONFIG_SND_SOC_WM8994=m -# CONFIG_SND_SOC_ZX_AUD96P22 is not set +# CONFIG_SND_SOC_ZL38060 is not set # CONFIG_SND_SOC_MAX9759 is not set # CONFIG_SND_SOC_MT6351 is not set # CONFIG_SND_SOC_MT6358 is not set +# CONFIG_SND_SOC_MT6660 is not set +# CONFIG_SND_SOC_NAU8315 is not set # CONFIG_SND_SOC_NAU8540 is not set # CONFIG_SND_SOC_NAU8810 is not set # CONFIG_SND_SOC_NAU8822 is not set # CONFIG_SND_SOC_NAU8824 is not set # CONFIG_SND_SOC_TPA6130A2 is not set +# CONFIG_SND_SOC_LPASS_WSA_MACRO is not set +# CONFIG_SND_SOC_LPASS_VA_MACRO is not set +# CONFIG_SND_SOC_LPASS_RX_MACRO is not set +# CONFIG_SND_SOC_LPASS_TX_MACRO is not set # end of CODEC drivers CONFIG_SND_SIMPLE_CARD_UTILS=m CONFIG_SND_SIMPLE_CARD=m # CONFIG_SND_AUDIO_GRAPH_CARD is not set CONFIG_SND_XEN_FRONTEND=m +# CONFIG_SND_VIRTIO is not set # # HID support @@ -5032,8 +5636,10 @@ CONFIG_HID_GENERIC=y # CONFIG_HID_EZKEY is not set # CONFIG_HID_GEMBIRD is not set # CONFIG_HID_GFRM is not set +# CONFIG_HID_GLORIOUS is not set # CONFIG_HID_HOLTEK is not set # CONFIG_HID_GOOGLE_HAMMER is not set +# CONFIG_HID_VIVALDI is not set # CONFIG_HID_GT683R is not set # CONFIG_HID_KEYTOUCH is not set # CONFIG_HID_KYE is not set @@ -5065,11 +5671,13 @@ CONFIG_HID_GENERIC=y # CONFIG_HID_PETALYNX is not set # CONFIG_HID_PICOLCD is not set # CONFIG_HID_PLANTRONICS is not set +# CONFIG_HID_PLAYSTATION is not set # CONFIG_HID_PRIMAX is not set # CONFIG_HID_RETRODE is not set # CONFIG_HID_ROCCAT is not set # CONFIG_HID_SAITEK is not set # CONFIG_HID_SAMSUNG is not set +# CONFIG_HID_SEMITEK is not set # CONFIG_HID_SONY is not set # CONFIG_HID_SPEEDLINK is not set # CONFIG_HID_STEAM is not set @@ -5091,6 +5699,7 @@ CONFIG_HID_GENERIC=y # CONFIG_HID_ZYDACRON is not set # CONFIG_HID_SENSOR_HUB is not set # CONFIG_HID_ALPS is not set +# CONFIG_HID_MCP2221 is not set # end of Special HID drivers # @@ -5104,7 +5713,8 @@ CONFIG_USB_HID=y # # I2C HID support # -# CONFIG_I2C_HID is not set +# CONFIG_I2C_HID_OF is not set +# CONFIG_I2C_HID_OF_GOODIX is not set # end of I2C HID support # end of HID support @@ -5123,10 +5733,11 @@ CONFIG_USB_PCI=y # Miscellaneous USB options # CONFIG_USB_DEFAULT_PERSIST=y +# CONFIG_USB_FEW_INIT_RETRIES is not set # CONFIG_USB_DYNAMIC_MINORS is not set CONFIG_USB_OTG=y -# CONFIG_USB_OTG_WHITELIST is not set -# CONFIG_USB_OTG_BLACKLIST_HUB is not set +# CONFIG_USB_OTG_PRODUCTLIST is not set +# CONFIG_USB_OTG_DISABLE_EXTERNAL_HUB is not set # CONFIG_USB_OTG_FSM is not set # CONFIG_USB_LEDS_TRIGGER_USBPORT is not set CONFIG_USB_AUTOSUSPEND_DELAY=2 @@ -5139,13 +5750,13 @@ CONFIG_USB_AUTOSUSPEND_DELAY=2 CONFIG_USB_XHCI_HCD=y # CONFIG_USB_XHCI_DBGCAP is not set CONFIG_USB_XHCI_PCI=y +# CONFIG_USB_XHCI_PCI_RENESAS is not set CONFIG_USB_XHCI_PLATFORM=y CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_ROOT_HUB_TT=y CONFIG_USB_EHCI_TT_NEWSCHED=y CONFIG_USB_EHCI_PCI=y # CONFIG_USB_EHCI_FSL is not set -# CONFIG_USB_EHCI_MXC is not set CONFIG_USB_EHCI_HCD_OMAP=m CONFIG_USB_EHCI_EXYNOS=y CONFIG_USB_EHCI_HCD_PLATFORM=y @@ -5161,7 +5772,6 @@ CONFIG_USB_OHCI_HCD_PLATFORM=y # CONFIG_USB_UHCI_HCD is not set # CONFIG_USB_SL811_HCD is not set CONFIG_USB_R8A66597_HCD=m -# CONFIG_USB_IMX21_HCD is not set # CONFIG_USB_HCD_BCMA is not set # CONFIG_USB_HCD_TEST_MODE is not set @@ -5203,7 +5813,7 @@ CONFIG_USB_UAS=m # CONFIG_USB_MDC800 is not set # CONFIG_USB_MICROTEK is not set # CONFIG_USBIP_CORE is not set -# CONFIG_USB_CDNS3 is not set +# CONFIG_USB_CDNS_SUPPORT is not set CONFIG_USB_MUSB_HDRC=m # CONFIG_USB_MUSB_HOST is not set # CONFIG_USB_MUSB_GADGET is not set @@ -5217,7 +5827,6 @@ CONFIG_USB_MUSB_TUSB6010=m CONFIG_USB_MUSB_OMAP2PLUS=m CONFIG_USB_MUSB_AM35X=m CONFIG_USB_MUSB_DSPS=m -CONFIG_USB_MUSB_AM335X_CHILD=m # # MUSB DMA mode @@ -5251,10 +5860,13 @@ CONFIG_USB_DWC2_DUAL_ROLE=y # CONFIG_USB_DWC2_DEBUG is not set # CONFIG_USB_DWC2_TRACK_MISSED_SOFS is not set CONFIG_USB_CHIPIDEA=y -CONFIG_USB_CHIPIDEA_OF=y -CONFIG_USB_CHIPIDEA_PCI=m CONFIG_USB_CHIPIDEA_UDC=y CONFIG_USB_CHIPIDEA_HOST=y +CONFIG_USB_CHIPIDEA_PCI=m +CONFIG_USB_CHIPIDEA_MSM=y +CONFIG_USB_CHIPIDEA_IMX=y +CONFIG_USB_CHIPIDEA_GENERIC=y +CONFIG_USB_CHIPIDEA_TEGRA=y CONFIG_USB_ISP1760=y CONFIG_USB_ISP1760_HCD=y CONFIG_USB_ISP1761_UDC=y @@ -5281,6 +5893,7 @@ CONFIG_USB_ISP1760_DUAL_ROLE=y # CONFIG_USB_IDMOUSE is not set # CONFIG_USB_FTDI_ELAN is not set # CONFIG_USB_APPLEDISPLAY is not set +CONFIG_APPLE_MFI_FASTCHARGE=m # CONFIG_USB_SISUSBVGA is not set # CONFIG_USB_LD is not set # CONFIG_USB_TRANCEVIBRATOR is not set @@ -5322,7 +5935,6 @@ CONFIG_USB_GADGET_STORAGE_NUM_BUFFERS=2 # # USB Peripheral Controller # -CONFIG_USB_FSL_USB2=y # CONFIG_USB_FOTG210_UDC is not set # CONFIG_USB_GR_UDC is not set # CONFIG_USB_R8A66597 is not set @@ -5333,17 +5945,13 @@ CONFIG_USB_SNP_CORE=y CONFIG_USB_SNP_UDC_PLAT=y # CONFIG_USB_M66592 is not set CONFIG_USB_BDC_UDC=y - -# -# Platform Support -# -CONFIG_USB_BDC_PCI=y # CONFIG_USB_AMD5536UDC is not set # CONFIG_USB_NET2272 is not set # CONFIG_USB_NET2280 is not set # CONFIG_USB_GOKU is not set # CONFIG_USB_EG20T is not set # CONFIG_USB_GADGET_XILINX is not set +# CONFIG_USB_MAX3420_UDC is not set # CONFIG_USB_DUMMY_HCD is not set # end of USB Peripheral Controller @@ -5388,6 +5996,10 @@ CONFIG_USB_CONFIGFS_F_MIDI=y CONFIG_USB_CONFIGFS_F_HID=y CONFIG_USB_CONFIGFS_F_UVC=y CONFIG_USB_CONFIGFS_F_PRINTER=y + +# +# USB Gadget precomposed configurations +# # CONFIG_USB_ZERO is not set # CONFIG_USB_AUDIO is not set CONFIG_USB_ETH=m @@ -5406,6 +6018,9 @@ CONFIG_USB_ETH_RNDIS=y # CONFIG_USB_G_HID is not set # CONFIG_USB_G_DBGP is not set # CONFIG_USB_G_WEBCAM is not set +# CONFIG_USB_RAW_GADGET is not set +# end of USB Gadget precomposed configurations + # CONFIG_TYPEC is not set CONFIG_USB_ROLE_SWITCH=y CONFIG_MMC=y @@ -5437,6 +6052,7 @@ CONFIG_MMC_SDHCI_ESDHC_IMX=y CONFIG_MMC_SDHCI_S3C=y CONFIG_MMC_SDHCI_S3C_DMA=y # CONFIG_MMC_SDHCI_F_SDH30 is not set +# CONFIG_MMC_SDHCI_MILBEAUT is not set CONFIG_MMC_SDHCI_IPROC=y CONFIG_MMC_OMAP=y CONFIG_MMC_OMAP_HS=y @@ -5458,41 +6074,41 @@ CONFIG_MMC_DW_ROCKCHIP=y # CONFIG_MMC_USDHI6ROL0 is not set CONFIG_MMC_SUNXI=y CONFIG_MMC_CQHCI=y +# CONFIG_MMC_HSQ is not set # CONFIG_MMC_TOSHIBA_PCI is not set CONFIG_MMC_BCM2835=y # CONFIG_MMC_MTK is not set # CONFIG_MMC_SDHCI_XENON is not set CONFIG_MMC_SDHCI_OMAP=y # CONFIG_MMC_SDHCI_AM654 is not set +CONFIG_MMC_SDHCI_EXTERNAL_DMA=y # CONFIG_MEMSTICK is not set CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y CONFIG_LEDS_CLASS_FLASH=m +# CONFIG_LEDS_CLASS_MULTICOLOR is not set # CONFIG_LEDS_BRIGHTNESS_HW_CHANGED is not set # # LED drivers # -# CONFIG_LEDS_AAT1290 is not set # CONFIG_LEDS_AN30259A is not set -# CONFIG_LEDS_AS3645A is not set +# CONFIG_LEDS_AW2013 is not set # CONFIG_LEDS_BCM6328 is not set # CONFIG_LEDS_BCM6358 is not set CONFIG_LEDS_CPCAP=m # CONFIG_LEDS_CR0014114 is not set +# CONFIG_LEDS_EL15203000 is not set # CONFIG_LEDS_LM3530 is not set # CONFIG_LEDS_LM3532 is not set # CONFIG_LEDS_LM3642 is not set # CONFIG_LEDS_LM3692X is not set -# CONFIG_LEDS_LM3601X is not set # CONFIG_LEDS_PCA9532 is not set CONFIG_LEDS_GPIO=y # CONFIG_LEDS_LP3944 is not set # CONFIG_LEDS_LP3952 is not set -# CONFIG_LEDS_LP5521 is not set -# CONFIG_LEDS_LP5523 is not set -# CONFIG_LEDS_LP5562 is not set -# CONFIG_LEDS_LP8501 is not set +# CONFIG_LEDS_LP50XX is not set +# CONFIG_LEDS_LP55XX_COMMON is not set # CONFIG_LEDS_LP8860 is not set # CONFIG_LEDS_PCA955X is not set # CONFIG_LEDS_PCA963X is not set @@ -5503,10 +6119,8 @@ CONFIG_LEDS_PWM=y # CONFIG_LEDS_LT3593 is not set # CONFIG_LEDS_TCA6507 is not set # CONFIG_LEDS_TLC591XX is not set -CONFIG_LEDS_MAX77693=m CONFIG_LEDS_MAX8997=m # CONFIG_LEDS_LM355x is not set -# CONFIG_LEDS_KTD2692 is not set # CONFIG_LEDS_IS31FL319X is not set # CONFIG_LEDS_IS31FL32XX is not set @@ -5521,6 +6135,18 @@ CONFIG_LEDS_MAX8997=m # CONFIG_LEDS_SPI_BYTE is not set # CONFIG_LEDS_TI_LMU_COMMON is not set +# +# Flash and Torch LED drivers +# +# CONFIG_LEDS_AAT1290 is not set +# CONFIG_LEDS_AS3645A is not set +# CONFIG_LEDS_KTD2692 is not set +# CONFIG_LEDS_LM3601X is not set +CONFIG_LEDS_MAX77693=m +# CONFIG_LEDS_RT4505 is not set +# CONFIG_LEDS_RT8515 is not set +# CONFIG_LEDS_SGM3140 is not set + # # LED Triggers # @@ -5545,6 +6171,7 @@ CONFIG_LEDS_TRIGGER_CAMERA=y # CONFIG_LEDS_TRIGGER_NETDEV is not set # CONFIG_LEDS_TRIGGER_PATTERN is not set # CONFIG_LEDS_TRIGGER_AUDIO is not set +# CONFIG_LEDS_TRIGGER_TTY is not set # CONFIG_ACCESSIBILITY is not set # CONFIG_INFINIBAND is not set CONFIG_EDAC_ATOMIC_SCRUB=y @@ -5606,6 +6233,7 @@ CONFIG_RTC_DRV_TWL4030=y CONFIG_RTC_DRV_PALMAS=y CONFIG_RTC_DRV_TPS6586X=y CONFIG_RTC_DRV_TPS65910=y +# CONFIG_RTC_DRV_RC5T619 is not set CONFIG_RTC_DRV_S35390A=m # CONFIG_RTC_DRV_FM3130 is not set # CONFIG_RTC_DRV_RX8010 is not set @@ -5613,6 +6241,7 @@ CONFIG_RTC_DRV_RX8581=m # CONFIG_RTC_DRV_RX8025 is not set CONFIG_RTC_DRV_EM3027=y # CONFIG_RTC_DRV_RV3028 is not set +# CONFIG_RTC_DRV_RV3032 is not set # CONFIG_RTC_DRV_RV8803 is not set CONFIG_RTC_DRV_S5M=m # CONFIG_RTC_DRV_SD3078 is not set @@ -5630,7 +6259,6 @@ CONFIG_RTC_DRV_S5M=m # CONFIG_RTC_DRV_MAX6916 is not set # CONFIG_RTC_DRV_R9701 is not set # CONFIG_RTC_DRV_RX4581 is not set -# CONFIG_RTC_DRV_RX6110 is not set # CONFIG_RTC_DRV_RS5C348 is not set # CONFIG_RTC_DRV_MAX6902 is not set # CONFIG_RTC_DRV_PCF2123 is not set @@ -5643,6 +6271,7 @@ CONFIG_RTC_I2C_AND_SPI=y # CONFIG_RTC_DRV_DS3232 is not set # CONFIG_RTC_DRV_PCF2127 is not set # CONFIG_RTC_DRV_RV3029C2 is not set +# CONFIG_RTC_DRV_RX6110 is not set # # Platform RTC drivers @@ -5690,6 +6319,7 @@ CONFIG_RTC_DRV_CPCAP=m # # HID Sensor RTC drivers # +# CONFIG_RTC_DRV_GOLDFISH is not set CONFIG_DMADEVICES=y # CONFIG_DMADEVICES_DEBUG is not set @@ -5715,6 +6345,8 @@ CONFIG_MX3_IPU=y CONFIG_MX3_IPU_IRQS=4 # CONFIG_NBPFAXI_DMA is not set CONFIG_PL330_DMA=y +# CONFIG_PLX_DMA is not set +# CONFIG_XILINX_ZYNQMP_DPDMA is not set # CONFIG_QCOM_HIDMA_MGMT is not set # CONFIG_QCOM_HIDMA is not set CONFIG_DW_DMAC_CORE=y @@ -5722,6 +6354,7 @@ CONFIG_DW_DMAC=y # CONFIG_DW_DMAC_PCI is not set # CONFIG_DW_EDMA is not set # CONFIG_DW_EDMA_PCIE is not set +# CONFIG_SF_PDMA is not set CONFIG_TI_CPPI41=m CONFIG_TI_EDMA=y CONFIG_DMA_OMAP=y @@ -5739,7 +6372,11 @@ CONFIG_TI_DMA_CROSSBAR=y CONFIG_SYNC_FILE=y # CONFIG_SW_SYNC is not set # CONFIG_UDMABUF is not set +# CONFIG_DMABUF_MOVE_NOTIFY is not set +# CONFIG_DMABUF_DEBUG is not set # CONFIG_DMABUF_SELFTESTS is not set +# CONFIG_DMABUF_HEAPS is not set +# CONFIG_DMABUF_SYSFS_STATS is not set # end of DMABUF options # CONFIG_AUXDISPLAY is not set @@ -5747,6 +6384,7 @@ CONFIG_SYNC_FILE=y # CONFIG_VFIO is not set # CONFIG_VIRT_DRIVERS is not set CONFIG_VIRTIO=y +CONFIG_VIRTIO_PCI_LIB=y CONFIG_VIRTIO_MENU=y CONFIG_VIRTIO_PCI=y CONFIG_VIRTIO_PCI_LEGACY=y @@ -5754,6 +6392,13 @@ CONFIG_VIRTIO_BALLOON=m # CONFIG_VIRTIO_INPUT is not set CONFIG_VIRTIO_MMIO=y # CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES is not set +CONFIG_VIRTIO_DMA_SHARED_BUFFER=m +# CONFIG_VDPA is not set +CONFIG_VHOST_IOTLB=m +CONFIG_VHOST=m +CONFIG_VHOST_MENU=y +CONFIG_VHOST_NET=m +# CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set # # Microsoft Hyper-V guest support @@ -5784,9 +6429,9 @@ CONFIG_XEN_FRONT_PGDIR_SHBUF=m # end of Xen driver support # CONFIG_GREYBUS is not set +# CONFIG_COMEDI is not set CONFIG_STAGING=y # CONFIG_PRISM2_USB is not set -# CONFIG_COMEDI is not set # CONFIG_RTL8192U is not set # CONFIG_RTLLIB is not set # CONFIG_RTL8723BS is not set @@ -5811,7 +6456,6 @@ CONFIG_STAGING=y # Analog to digital converters # # CONFIG_AD7816 is not set -# CONFIG_AD7192 is not set # CONFIG_AD7280 is not set # end of Analog to digital converters @@ -5824,7 +6468,6 @@ CONFIG_STAGING=y # # Capacitance to digital converters # -# CONFIG_AD7150 is not set # CONFIG_AD7746 is not set # end of Capacitance to digital converters @@ -5855,13 +6498,6 @@ CONFIG_STAGING=y # end of IIO staging drivers # CONFIG_FB_SM750 is not set - -# -# Speakup console speech -# -# CONFIG_SPEAKUP is not set -# end of Speakup console speech - # CONFIG_STAGING_MEDIA is not set # @@ -5875,29 +6511,17 @@ CONFIG_STAGING_BOARD=y # CONFIG_UNISYSSPAR is not set # CONFIG_COMMON_CLK_XLNX_CLKWZRD is not set # CONFIG_FB_TFT is not set -# CONFIG_WILC1000_SDIO is not set -# CONFIG_WILC1000_SPI is not set -# CONFIG_MOST is not set # CONFIG_KS7010 is not set CONFIG_BCM_VIDEOCORE=y # CONFIG_BCM2835_VCHIQ is not set # CONFIG_SND_BCM2835 is not set # CONFIG_VIDEO_BCM2835 is not set # CONFIG_PI433 is not set - -# -# Gasket devices -# -# end of Gasket devices - # CONFIG_XIL_AXIS_FIFO is not set # CONFIG_FIELDBUS_DEV is not set -# CONFIG_USB_WUSB_CBAF is not set -# CONFIG_UWB is not set -# CONFIG_EXFAT_FS is not set # CONFIG_QLGE is not set +# CONFIG_WFX is not set # CONFIG_GOLDFISH is not set -CONFIG_MFD_CROS_EC=m CONFIG_CHROME_PLATFORMS=y CONFIG_CROS_EC=m CONFIG_CROS_EC_I2C=m @@ -5908,20 +6532,23 @@ CONFIG_CROS_EC_CHARDEV=m CONFIG_CROS_EC_LIGHTBAR=m CONFIG_CROS_EC_VBC=m CONFIG_CROS_EC_DEBUGFS=m +CONFIG_CROS_EC_SENSORHUB=m CONFIG_CROS_EC_SYSFS=m +CONFIG_CROS_USBPD_NOTIFY=m # CONFIG_MELLANOX_PLATFORM is not set -CONFIG_CLKDEV_LOOKUP=y +CONFIG_HAVE_CLK=y CONFIG_HAVE_CLK_PREPARE=y CONFIG_COMMON_CLK=y # -# Common Clock Framework +# Clock driver for ARM Reference designs # CONFIG_ICST=y -CONFIG_COMMON_CLK_VERSATILE=y CONFIG_CLK_SP810=y CONFIG_CLK_VEXPRESS_OSC=y -# CONFIG_CLK_HSDK is not set +# end of Clock driver for ARM Reference designs + +# CONFIG_LMK04832 is not set CONFIG_COMMON_CLK_MAX77686=y # CONFIG_COMMON_CLK_MAX9485 is not set CONFIG_COMMON_CLK_RK808=m @@ -5935,16 +6562,40 @@ CONFIG_COMMON_CLK_RK808=m # CONFIG_COMMON_CLK_CDCE925 is not set # CONFIG_COMMON_CLK_CS2000_CP is not set CONFIG_COMMON_CLK_S2MPS11=m -CONFIG_CLK_QORIQ=y +# CONFIG_COMMON_CLK_AXI_CLKGEN is not set # CONFIG_COMMON_CLK_PALMAS is not set # CONFIG_COMMON_CLK_PWM is not set # CONFIG_COMMON_CLK_VC5 is not set # CONFIG_COMMON_CLK_FIXED_MMIO is not set +CONFIG_CLK_BCM2711_DVP=y CONFIG_CLK_BCM2835=y CONFIG_CLK_RASPBERRYPI=y CONFIG_MXC_CLK=y +CONFIG_CLK_IMX6Q=y +CONFIG_CLK_IMX6SL=y +CONFIG_CLK_IMX6SLL=y +CONFIG_CLK_IMX6SX=y +CONFIG_CLK_IMX6UL=y +# CONFIG_CLK_IMX8MM is not set +# CONFIG_CLK_IMX8MN is not set +# CONFIG_CLK_IMX8MP is not set +# CONFIG_CLK_IMX8MQ is not set +CONFIG_COMMON_CLK_ROCKCHIP=y +CONFIG_CLK_RV110X=y +CONFIG_CLK_RK3036=y +CONFIG_CLK_RK312X=y +CONFIG_CLK_RK3188=y +CONFIG_CLK_RK322X=y +CONFIG_CLK_RK3288=y CONFIG_COMMON_CLK_SAMSUNG=y +CONFIG_EXYNOS_3250_COMMON_CLK=y +CONFIG_EXYNOS_4_COMMON_CLK=y +CONFIG_EXYNOS_5250_COMMON_CLK=y +CONFIG_EXYNOS_5260_COMMON_CLK=y +CONFIG_EXYNOS_5410_COMMON_CLK=y +CONFIG_EXYNOS_5420_COMMON_CLK=y CONFIG_EXYNOS_AUDSS_CLK_CON=y +CONFIG_EXYNOS_CLKOUT=y CONFIG_CLK_SUNXI=y CONFIG_CLK_SUNXI_CLOCKS=y CONFIG_CLK_SUNXI_PRCM_SUN6I=y @@ -5964,8 +6615,7 @@ CONFIG_SUN8I_R40_CCU=y CONFIG_SUN9I_A80_CCU=y CONFIG_SUN8I_R_CCU=y CONFIG_COMMON_CLK_TI_ADPLL=y -# end of Common Clock Framework - +# CONFIG_XILINX_VCU is not set # CONFIG_HWSPINLOCK is not set # @@ -5985,16 +6635,19 @@ CONFIG_CLKSRC_TI_32K=y CONFIG_ARM_ARCH_TIMER=y CONFIG_ARM_ARCH_TIMER_EVTSTREAM=y CONFIG_ARM_GLOBAL_TIMER=y +CONFIG_ARM_GT_INITIAL_PRESCALER_VAL=1 CONFIG_ARM_TIMER_SP804=y CONFIG_CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK=y CONFIG_CLKSRC_EXYNOS_MCT=y CONFIG_CLKSRC_SAMSUNG_PWM=y CONFIG_CLKSRC_VERSATILE=y CONFIG_CLKSRC_IMX_GPT=y +# CONFIG_MICROCHIP_PIT64B is not set # end of Clock Source drivers CONFIG_MAILBOX=y # CONFIG_ARM_MHU is not set +# CONFIG_ARM_MHU_V2 is not set # CONFIG_IMX_MBOX is not set # CONFIG_PLATFORM_MHU is not set CONFIG_PL320_MBOX=y @@ -6003,6 +6656,7 @@ CONFIG_PL320_MBOX=y # CONFIG_ALTERA_MBOX is not set CONFIG_BCM2835_MBOX=y # CONFIG_MAILBOX_TEST is not set +CONFIG_SUN6I_MSGBOX=y CONFIG_IOMMU_API=y CONFIG_IOMMU_SUPPORT=y @@ -6016,10 +6670,13 @@ CONFIG_IOMMU_IO_PGTABLE_LPAE=y # end of Generic IOMMU Pagetable Support # CONFIG_IOMMU_DEBUGFS is not set +CONFIG_IOMMU_DEFAULT_DMA_STRICT=y +# CONFIG_IOMMU_DEFAULT_DMA_LAZY is not set # CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set CONFIG_OF_IOMMU=y # CONFIG_OMAP_IOMMU is not set CONFIG_ROCKCHIP_IOMMU=y +# CONFIG_SUN50I_IOMMU is not set # CONFIG_EXYNOS_IOMMU is not set # CONFIG_ARM_SMMU is not set @@ -6027,6 +6684,7 @@ CONFIG_ROCKCHIP_IOMMU=y # Remoteproc drivers # CONFIG_REMOTEPROC=y +# CONFIG_REMOTEPROC_CDEV is not set # CONFIG_IMX_REMOTEPROC is not set # CONFIG_WKUP_M3_RPROC is not set # end of Remoteproc drivers @@ -6036,6 +6694,7 @@ CONFIG_REMOTEPROC=y # CONFIG_RPMSG=m # CONFIG_RPMSG_CHAR is not set +CONFIG_RPMSG_NS=m # CONFIG_RPMSG_QCOM_GLINK_RPM is not set CONFIG_RPMSG_VIRTIO=m # end of Rpmsg drivers @@ -6051,11 +6710,6 @@ CONFIG_RPMSG_VIRTIO=m # # end of Amlogic SoC drivers -# -# Aspeed SoC drivers -# -# end of Aspeed SoC drivers - # # Broadcom SoC drivers # @@ -6067,34 +6721,46 @@ CONFIG_SOC_BRCMSTB=y # # NXP/Freescale QorIQ SoC drivers # +# CONFIG_QUICC_ENGINE is not set CONFIG_FSL_GUTS=y +# CONFIG_FSL_RCPM is not set # end of NXP/Freescale QorIQ SoC drivers # # i.MX SoC drivers # CONFIG_IMX_GPCV2_PM_DOMAINS=y +# CONFIG_SOC_IMX8M is not set # end of i.MX SoC drivers +# +# Enable LiteX SoC Builder specific drivers +# +# CONFIG_LITEX_SOC_CONTROLLER is not set +# end of Enable LiteX SoC Builder specific drivers + # # Qualcomm SoC drivers # # end of Qualcomm SoC drivers CONFIG_ROCKCHIP_GRF=y +CONFIG_ROCKCHIP_IODOMAIN=y CONFIG_ROCKCHIP_PM_DOMAINS=y CONFIG_SOC_SAMSUNG=y +CONFIG_EXYNOS_ASV_ARM=y CONFIG_EXYNOS_CHIPID=y CONFIG_EXYNOS_PMU=y CONFIG_EXYNOS_PMU_ARM_DRIVERS=y CONFIG_EXYNOS_PM_DOMAINS=y +CONFIG_EXYNOS_REGULATOR_COUPLER=y +CONFIG_SUNXI_MBUS=y CONFIG_SUNXI_SRAM=y # CONFIG_SOC_TI is not set # # Xilinx SoC drivers # -# CONFIG_XILINX_VCU is not set # end of Xilinx SoC drivers # end of SOC (System On Chip) specific Drivers @@ -6113,6 +6779,8 @@ CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=m # DEVFREQ Drivers # # CONFIG_ARM_EXYNOS_BUS_DEVFREQ is not set +# CONFIG_ARM_IMX_BUS_DEVFREQ is not set +# CONFIG_ARM_IMX8M_DDRC_DEVFREQ is not set # CONFIG_ARM_RK3399_DMC_DEVFREQ is not set # CONFIG_PM_DEVFREQ_EVENT is not set CONFIG_EXTCON=y @@ -6145,6 +6813,8 @@ CONFIG_EXYNOS_SROM=y CONFIG_IIO=y CONFIG_IIO_BUFFER=y CONFIG_IIO_BUFFER_CB=m +# CONFIG_IIO_BUFFER_DMA is not set +# CONFIG_IIO_BUFFER_DMAENGINE is not set CONFIG_IIO_BUFFER_HW_CONSUMER=m CONFIG_IIO_KFIFO_BUF=y CONFIG_IIO_TRIGGERED_BUFFER=y @@ -6153,6 +6823,7 @@ CONFIG_IIO_TRIGGER=y CONFIG_IIO_CONSUMERS_PER_TRIGGER=2 # CONFIG_IIO_SW_DEVICE is not set CONFIG_IIO_SW_TRIGGER=y +# CONFIG_IIO_TRIGGERED_EVENT is not set # # Accelerometers @@ -6165,12 +6836,16 @@ CONFIG_IIO_SW_TRIGGER=y # CONFIG_ADXL372_I2C is not set # CONFIG_BMA180 is not set # CONFIG_BMA220 is not set +# CONFIG_BMA400 is not set # CONFIG_BMC150_ACCEL is not set +# CONFIG_BMI088_ACCEL is not set # CONFIG_DA280 is not set # CONFIG_DA311 is not set # CONFIG_DMARD06 is not set # CONFIG_DMARD09 is not set # CONFIG_DMARD10 is not set +# CONFIG_FXLS8962AF_I2C is not set +# CONFIG_FXLS8962AF_SPI is not set # CONFIG_IIO_CROS_EC_ACCEL_LEGACY is not set # CONFIG_IIO_ST_ACCEL_3AXIS is not set # CONFIG_KXSD9 is not set @@ -6185,6 +6860,7 @@ CONFIG_IIO_SW_TRIGGER=y # CONFIG_MXC4005 is not set # CONFIG_MXC6255 is not set # CONFIG_SCA3000 is not set +# CONFIG_SCA3300 is not set # CONFIG_STK8312 is not set # CONFIG_STK8BA50 is not set # end of Accelerometers @@ -6192,9 +6868,12 @@ CONFIG_IIO_SW_TRIGGER=y # # Analog to digital converters # +# CONFIG_AD7091R5 is not set # CONFIG_AD7124 is not set +# CONFIG_AD7192 is not set # CONFIG_AD7266 is not set # CONFIG_AD7291 is not set +# CONFIG_AD7292 is not set # CONFIG_AD7298 is not set # CONFIG_AD7476 is not set # CONFIG_AD7606_IFACE_PARALLEL is not set @@ -6208,6 +6887,7 @@ CONFIG_IIO_SW_TRIGGER=y # CONFIG_AD7923 is not set # CONFIG_AD7949 is not set # CONFIG_AD799X is not set +# CONFIG_ADI_AXI_ADC is not set # CONFIG_AXP20X_ADC is not set # CONFIG_AXP288_ADC is not set # CONFIG_CC10001_ADC is not set @@ -6220,10 +6900,12 @@ CONFIG_EXYNOS_ADC=m # CONFIG_IMX7D_ADC is not set # CONFIG_LTC2471 is not set # CONFIG_LTC2485 is not set +# CONFIG_LTC2496 is not set # CONFIG_LTC2497 is not set # CONFIG_MAX1027 is not set # CONFIG_MAX11100 is not set # CONFIG_MAX1118 is not set +# CONFIG_MAX1241 is not set # CONFIG_MAX1363 is not set # CONFIG_MAX9611 is not set # CONFIG_MCP320X is not set @@ -6232,6 +6914,7 @@ CONFIG_EXYNOS_ADC=m # CONFIG_NAU7802 is not set # CONFIG_PALMAS_GPADC is not set # CONFIG_QCOM_PM8XXX_XOADC is not set +# CONFIG_RN5T618_ADC is not set CONFIG_ROCKCHIP_SARADC=m # CONFIG_SD_ADC_MODULATOR is not set # CONFIG_STMPE_ADC is not set @@ -6248,7 +6931,9 @@ CONFIG_ROCKCHIP_SARADC=m # CONFIG_TI_ADS8344 is not set # CONFIG_TI_ADS8688 is not set # CONFIG_TI_ADS124S08 is not set +# CONFIG_TI_ADS131E08 is not set # CONFIG_TI_TLC4541 is not set +# CONFIG_TI_TSC2046 is not set # CONFIG_TWL4030_MADC is not set # CONFIG_TWL6030_GPADC is not set CONFIG_VF610_ADC=m @@ -6265,18 +6950,29 @@ CONFIG_XILINX_XADC=y # Amplifiers # # CONFIG_AD8366 is not set +# CONFIG_HMC425 is not set # end of Amplifiers +# +# Capacitance to digital converters +# +# CONFIG_AD7150 is not set +# end of Capacitance to digital converters + # # Chemical Sensors # # CONFIG_ATLAS_PH_SENSOR is not set +# CONFIG_ATLAS_EZO_SENSOR is not set # CONFIG_BME680 is not set # CONFIG_CCS811 is not set # CONFIG_IAQCORE is not set # CONFIG_PMS7003 is not set +# CONFIG_SCD30_CORE is not set # CONFIG_SENSIRION_SGP30 is not set -# CONFIG_SPS30 is not set +# CONFIG_SENSIRION_SGP40 is not set +# CONFIG_SPS30_I2C is not set +# CONFIG_SPS30_SERIAL is not set # CONFIG_VZ89X is not set # end of Chemical Sensors @@ -6289,6 +6985,11 @@ CONFIG_IIO_CROS_EC_SENSORS=m # # end of Hid Sensor IIO Common +# +# IIO SCMI Sensors +# +# end of IIO SCMI Sensors + # # SSP Sensor Common # @@ -6308,19 +7009,21 @@ CONFIG_IIO_CROS_EC_SENSORS=m # CONFIG_AD5593R is not set # CONFIG_AD5504 is not set # CONFIG_AD5624R_SPI is not set -# CONFIG_LTC1660 is not set -# CONFIG_LTC2632 is not set # CONFIG_AD5686_SPI is not set # CONFIG_AD5696_I2C is not set # CONFIG_AD5755 is not set # CONFIG_AD5758 is not set # CONFIG_AD5761 is not set # CONFIG_AD5764 is not set +# CONFIG_AD5766 is not set +# CONFIG_AD5770R is not set # CONFIG_AD5791 is not set # CONFIG_AD7303 is not set # CONFIG_AD8801 is not set # CONFIG_DPOT_DAC is not set # CONFIG_DS4424 is not set +# CONFIG_LTC1660 is not set +# CONFIG_LTC2632 is not set # CONFIG_M62332 is not set # CONFIG_MAX517 is not set # CONFIG_MAX5821 is not set @@ -6363,6 +7066,7 @@ CONFIG_IIO_CROS_EC_SENSORS=m # CONFIG_ADIS16130 is not set # CONFIG_ADIS16136 is not set # CONFIG_ADIS16260 is not set +# CONFIG_ADXRS290 is not set # CONFIG_ADXRS450 is not set # CONFIG_BMG160 is not set # CONFIG_FXAS21002C is not set @@ -6392,6 +7096,7 @@ CONFIG_MPU3050_I2C=y # CONFIG_AM2315 is not set # CONFIG_DHT11 is not set # CONFIG_HDC100X is not set +# CONFIG_HDC2010 is not set # CONFIG_HTS221 is not set # CONFIG_HTU21 is not set # CONFIG_SI7005 is not set @@ -6403,22 +7108,31 @@ CONFIG_MPU3050_I2C=y # # CONFIG_ADIS16400 is not set # CONFIG_ADIS16460 is not set +# CONFIG_ADIS16475 is not set # CONFIG_ADIS16480 is not set # CONFIG_BMI160_I2C is not set # CONFIG_BMI160_SPI is not set +# CONFIG_FXOS8700_I2C is not set +# CONFIG_FXOS8700_SPI is not set # CONFIG_KMX61 is not set +# CONFIG_INV_ICM42600_I2C is not set +# CONFIG_INV_ICM42600_SPI is not set # CONFIG_INV_MPU6050_I2C is not set # CONFIG_INV_MPU6050_SPI is not set # CONFIG_IIO_ST_LSM6DSX is not set +# CONFIG_IIO_ST_LSM9DS0 is not set # end of Inertial measurement units # # Light sensors # # CONFIG_ADJD_S311 is not set +# CONFIG_ADUX1020 is not set +# CONFIG_AL3010 is not set # CONFIG_AL3320A is not set # CONFIG_APDS9300 is not set # CONFIG_APDS9960 is not set +# CONFIG_AS73211 is not set # CONFIG_BH1750 is not set # CONFIG_BH1780 is not set # CONFIG_CM32181 is not set @@ -6427,6 +7141,7 @@ CONFIG_MPU3050_I2C=y # CONFIG_CM3605 is not set CONFIG_CM36651=m CONFIG_IIO_CROS_EC_LIGHT_PROX=m +# CONFIG_GP2AP002 is not set # CONFIG_GP2AP020A00F is not set CONFIG_SENSORS_ISL29018=y CONFIG_SENSORS_ISL29028=y @@ -6448,11 +7163,13 @@ CONFIG_SENSORS_ISL29028=y # CONFIG_TCS3472 is not set # CONFIG_SENSORS_TSL2563 is not set # CONFIG_TSL2583 is not set +# CONFIG_TSL2591 is not set # CONFIG_TSL2772 is not set # CONFIG_TSL4531 is not set # CONFIG_US5182D is not set # CONFIG_VCNL4000 is not set # CONFIG_VCNL4035 is not set +# CONFIG_VEML6030 is not set # CONFIG_VEML6070 is not set # CONFIG_VL6180 is not set # CONFIG_ZOPT2201 is not set @@ -6473,6 +7190,7 @@ CONFIG_AK8975=y # CONFIG_SENSORS_HMC5843_SPI is not set # CONFIG_SENSORS_RM3100_I2C is not set # CONFIG_SENSORS_RM3100_SPI is not set +# CONFIG_YAMAHA_YAS530 is not set # end of Magnetometer sensors # @@ -6495,9 +7213,15 @@ CONFIG_IIO_HRTIMER_TRIGGER=y # CONFIG_IIO_SYSFS_TRIGGER is not set # end of Triggers - standalone +# +# Linear and angular position sensors +# +# end of Linear and angular position sensors + # # Digital potentiometers # +# CONFIG_AD5110 is not set # CONFIG_AD5272 is not set # CONFIG_DS1803 is not set # CONFIG_MAX5432 is not set @@ -6522,8 +7246,10 @@ CONFIG_IIO_HRTIMER_TRIGGER=y # CONFIG_ABP060MG is not set # CONFIG_BMP280 is not set # CONFIG_IIO_CROS_EC_BARO is not set +# CONFIG_DLHL60D is not set # CONFIG_DPS310 is not set # CONFIG_HP03 is not set +# CONFIG_ICP10100 is not set # CONFIG_MPL115_I2C is not set # CONFIG_MPL115_SPI is not set # CONFIG_MPL3115 is not set @@ -6544,13 +7270,17 @@ CONFIG_IIO_HRTIMER_TRIGGER=y # # Proximity and distance sensors # +# CONFIG_CROS_EC_MKBP_PROXIMITY is not set # CONFIG_ISL29501 is not set # CONFIG_LIDAR_LITE_V2 is not set # CONFIG_MB1232 is not set +# CONFIG_PING is not set # CONFIG_RFD77402 is not set # CONFIG_SRF04 is not set +# CONFIG_SX9310 is not set # CONFIG_SX9500 is not set # CONFIG_SRF08 is not set +# CONFIG_VCNL3020 is not set # CONFIG_VL53L0X_I2C is not set # end of Proximity and distance sensors @@ -6564,11 +7294,13 @@ CONFIG_IIO_HRTIMER_TRIGGER=y # # Temperature sensors # +# CONFIG_LTC2983 is not set # CONFIG_MAXIM_THERMOCOUPLE is not set # CONFIG_MLX90614 is not set # CONFIG_MLX90632 is not set # CONFIG_TMP006 is not set # CONFIG_TMP007 is not set +# CONFIG_TMP117 is not set # CONFIG_TSYS01 is not set # CONFIG_TSYS02D is not set # CONFIG_MAX31856 is not set @@ -6578,15 +7310,19 @@ CONFIG_IIO_HRTIMER_TRIGGER=y # CONFIG_VME_BUS is not set CONFIG_PWM=y CONFIG_PWM_SYSFS=y +# CONFIG_PWM_DEBUG is not set CONFIG_PWM_ATMEL_HLCDC_PWM=m +# CONFIG_PWM_ATMEL_TCB is not set CONFIG_PWM_BCM2835=y # CONFIG_PWM_CROS_EC is not set +# CONFIG_PWM_DWC is not set CONFIG_PWM_FSL_FTM=m # CONFIG_PWM_IMX1 is not set # CONFIG_PWM_IMX27 is not set # CONFIG_PWM_IMX_TPM is not set # CONFIG_PWM_OMAP_DMTIMER is not set # CONFIG_PWM_PCA9685 is not set +# CONFIG_PWM_RASPBERRYPI_POE is not set CONFIG_PWM_ROCKCHIP=m CONFIG_PWM_SAMSUNG=m # CONFIG_PWM_STMPE is not set @@ -6608,15 +7344,19 @@ CONFIG_ARM_GIC_V3=y CONFIG_ARM_GIC_V3_ITS=y CONFIG_ARM_GIC_V3_ITS_PCI=y # CONFIG_AL_FIC is not set +CONFIG_BRCMSTB_L2_IRQ=y CONFIG_OMAP_IRQCHIP=y CONFIG_IRQ_CROSSBAR=y CONFIG_PARTITION_PERCPU=y CONFIG_IMX_IRQSTEER=y +CONFIG_IMX_INTMUX=y +CONFIG_EXYNOS_IRQ_COMBINER=y # end of IRQ chip support # CONFIG_IPACK_BUS is not set CONFIG_ARCH_HAS_RESET_CONTROLLER=y CONFIG_RESET_CONTROLLER=y +CONFIG_RESET_RASPBERRYPI=y CONFIG_RESET_SIMPLE=y CONFIG_RESET_SUNXI=y # CONFIG_RESET_TI_SYSCON is not set @@ -6626,13 +7366,16 @@ CONFIG_RESET_SUNXI=y # CONFIG_GENERIC_PHY=y CONFIG_GENERIC_PHY_MIPI_DPHY=y +# CONFIG_PHY_CAN_TRANSCEIVER is not set CONFIG_PHY_SUN4I_USB=y CONFIG_PHY_SUN6I_MIPI_DPHY=m CONFIG_PHY_SUN9I_USB=y +# CONFIG_PHY_SUN50I_USB3 is not set # CONFIG_BCM_KONA_USB2_PHY is not set -# CONFIG_PHY_CADENCE_DP is not set +# CONFIG_PHY_CADENCE_TORRENT is not set # CONFIG_PHY_CADENCE_DPHY is not set # CONFIG_PHY_CADENCE_SIERRA is not set +# CONFIG_PHY_CADENCE_SALVO is not set # CONFIG_PHY_FSL_IMX8MQ_USB is not set # CONFIG_PHY_MIXEL_MIPI_DPHY is not set # CONFIG_PHY_PXA_28NM_HSIC is not set @@ -6643,15 +7386,19 @@ CONFIG_PHY_CPCAP_USB=m # CONFIG_PHY_QCOM_USB_HS is not set # CONFIG_PHY_QCOM_USB_HSIC is not set CONFIG_PHY_ROCKCHIP_DP=m +# CONFIG_PHY_ROCKCHIP_DPHY_RX0 is not set # CONFIG_PHY_ROCKCHIP_EMMC is not set # CONFIG_PHY_ROCKCHIP_INNO_HDMI is not set # CONFIG_PHY_ROCKCHIP_INNO_USB2 is not set +# CONFIG_PHY_ROCKCHIP_INNO_CSIDPHY is not set +# CONFIG_PHY_ROCKCHIP_INNO_DSIDPHY is not set # CONFIG_PHY_ROCKCHIP_PCIE is not set # CONFIG_PHY_ROCKCHIP_TYPEC is not set CONFIG_PHY_ROCKCHIP_USB=y CONFIG_PHY_EXYNOS_DP_VIDEO=y CONFIG_PHY_EXYNOS_MIPI_VIDEO=y # CONFIG_PHY_EXYNOS_PCIE is not set +# CONFIG_PHY_SAMSUNG_UFS is not set CONFIG_PHY_SAMSUNG_USB2=m CONFIG_PHY_EXYNOS4210_USB2=y CONFIG_PHY_EXYNOS4X12_USB2=y @@ -6680,6 +7427,7 @@ CONFIG_ARM_PMU=y # end of Performance monitor support CONFIG_RAS=y +# CONFIG_USB4 is not set # # Android @@ -6694,8 +7442,10 @@ CONFIG_NVMEM_SYSFS=y # CONFIG_NVMEM_IMX_IIM is not set CONFIG_NVMEM_IMX_OCOTP=y CONFIG_ROCKCHIP_EFUSE=m +# CONFIG_ROCKCHIP_OTP is not set CONFIG_NVMEM_SUNXI_SID=y # CONFIG_NVMEM_SNVS_LPGPR is not set +# CONFIG_NVMEM_RMEM is not set # # HW tracing support @@ -6712,6 +7462,7 @@ CONFIG_PM_OPP=y # CONFIG_SLIMBUS is not set # CONFIG_INTERCONNECT is not set # CONFIG_COUNTER is not set +# CONFIG_MOST is not set # end of Device Drivers # @@ -6740,6 +7491,7 @@ CONFIG_JFS_FS=m # CONFIG_JFS_DEBUG is not set # CONFIG_JFS_STATISTICS is not set CONFIG_XFS_FS=y +CONFIG_XFS_SUPPORT_V4=y # CONFIG_XFS_QUOTA is not set # CONFIG_XFS_POSIX_ACL is not set CONFIG_XFS_RT=y @@ -6763,11 +7515,12 @@ CONFIG_F2FS_FS_POSIX_ACL=y # CONFIG_F2FS_FS_SECURITY is not set # CONFIG_F2FS_CHECK_FS is not set # CONFIG_F2FS_FAULT_INJECTION is not set +# CONFIG_F2FS_FS_COMPRESSION is not set +CONFIG_F2FS_IOSTAT=y CONFIG_FS_POSIX_ACL=y CONFIG_EXPORTFS=y # CONFIG_EXPORTFS_BLOCK_OPS is not set CONFIG_FILE_LOCKING=y -# CONFIG_MANDATORY_FILE_LOCKING is not set # CONFIG_FS_ENCRYPTION is not set # CONFIG_FS_VERITY is not set CONFIG_FSNOTIFY=y @@ -6794,7 +7547,7 @@ CONFIG_AUTOFS_FS=y # end of CD-ROM/DVD Filesystems # -# DOS/FAT/NT Filesystems +# DOS/FAT/EXFAT/NT Filesystems # CONFIG_FAT_FS=y CONFIG_MSDOS_FS=y @@ -6802,10 +7555,14 @@ CONFIG_VFAT_FS=y CONFIG_FAT_DEFAULT_CODEPAGE=437 CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" # CONFIG_FAT_DEFAULT_UTF8 is not set +# CONFIG_EXFAT_FS is not set CONFIG_NTFS_FS=y # CONFIG_NTFS_DEBUG is not set # CONFIG_NTFS_RW is not set -# end of DOS/FAT/NT Filesystems +CONFIG_NTFS3_FS=m +# CONFIG_NTFS3_LZX_XPRESS is not set +# CONFIG_NTFS3_FS_POSIX_ACL is not set +# end of DOS/FAT/EXFAT/NT Filesystems # # Pseudo filesystems @@ -6819,6 +7576,7 @@ CONFIG_SYSFS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_TMPFS_XATTR=y +CONFIG_ARCH_SUPPORTS_HUGETLBFS=y CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y CONFIG_MEMFD_CREATE=y @@ -6870,6 +7628,7 @@ CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3 # CONFIG_QNX6FS_FS is not set # CONFIG_ROMFS_FS is not set CONFIG_PSTORE=y +CONFIG_PSTORE_DEFAULT_KMSG_BYTES=10240 CONFIG_PSTORE_DEFLATE_COMPRESS=y # CONFIG_PSTORE_LZO_COMPRESS is not set # CONFIG_PSTORE_LZ4_COMPRESS is not set @@ -6882,6 +7641,7 @@ CONFIG_PSTORE_COMPRESS_DEFAULT="deflate" CONFIG_PSTORE_CONSOLE=y CONFIG_PSTORE_PMSG=y CONFIG_PSTORE_RAM=y +# CONFIG_PSTORE_BLK is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set # CONFIG_EROFS_FS is not set @@ -6895,6 +7655,7 @@ CONFIG_NFS_V4=y # CONFIG_NFS_V4_1 is not set # CONFIG_NFS_USE_LEGACY_DNS is not set CONFIG_NFS_USE_KERNEL_DNS=y +CONFIG_NFS_DISABLE_UDP_SUPPORT=y # CONFIG_NFSD is not set CONFIG_GRACE_PERIOD=y CONFIG_LOCKD=y @@ -6906,6 +7667,7 @@ CONFIG_SUNRPC_GSS=y # CONFIG_SUNRPC_DEBUG is not set # CONFIG_CEPH_FS is not set # CONFIG_CIFS is not set +# CONFIG_SMB_SERVER is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set # CONFIG_9P_FS is not set @@ -6962,6 +7724,7 @@ CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y # CONFIG_DLM is not set # CONFIG_UNICODE is not set +CONFIG_IO_WQ=y # end of File systems # @@ -6970,10 +7733,10 @@ CONFIG_NLS_UTF8=y CONFIG_KEYS=y # CONFIG_KEYS_REQUEST_CACHE is not set # CONFIG_PERSISTENT_KEYRINGS is not set -# CONFIG_BIG_KEYS is not set # CONFIG_TRUSTED_KEYS is not set # CONFIG_ENCRYPTED_KEYS is not set # CONFIG_KEY_DH_OPERATIONS is not set +# CONFIG_KEY_NOTIFICATIONS is not set # CONFIG_SECURITY_DMESG_RESTRICT is not set CONFIG_SECURITY=y # CONFIG_SECURITYFS is not set @@ -6992,6 +7755,7 @@ CONFIG_FORTIFY_SOURCE=y CONFIG_SECURITY_YAMA=y # CONFIG_SECURITY_SAFESETID is not set # CONFIG_SECURITY_LOCKDOWN_LSM is not set +# CONFIG_SECURITY_LANDLOCK is not set # CONFIG_INTEGRITY is not set CONFIG_DEFAULT_SECURITY_DAC=y CONFIG_LSM="lockdown,yama,loadpin,safesetid,integrity" @@ -7025,8 +7789,8 @@ CONFIG_CRYPTO_ALGAPI=y CONFIG_CRYPTO_ALGAPI2=y CONFIG_CRYPTO_AEAD=m CONFIG_CRYPTO_AEAD2=y -CONFIG_CRYPTO_BLKCIPHER=m -CONFIG_CRYPTO_BLKCIPHER2=y +CONFIG_CRYPTO_SKCIPHER=m +CONFIG_CRYPTO_SKCIPHER2=y CONFIG_CRYPTO_HASH=y CONFIG_CRYPTO_HASH2=y CONFIG_CRYPTO_RNG=m @@ -7058,7 +7822,10 @@ CONFIG_CRYPTO_RSA=y # CONFIG_CRYPTO_DH is not set CONFIG_CRYPTO_ECC=m CONFIG_CRYPTO_ECDH=m +# CONFIG_CRYPTO_ECDSA is not set # CONFIG_CRYPTO_ECRDSA is not set +# CONFIG_CRYPTO_SM2 is not set +# CONFIG_CRYPTO_CURVE25519 is not set # # Authenticated Encryption with Associated Data @@ -7099,45 +7866,37 @@ CONFIG_CRYPTO_HMAC=y # CONFIG_CRYPTO_CRC32C=y CONFIG_CRYPTO_CRC32=m -# CONFIG_CRYPTO_XXHASH is not set +CONFIG_CRYPTO_XXHASH=m +CONFIG_CRYPTO_BLAKE2B=m CONFIG_CRYPTO_CRCT10DIF=y CONFIG_CRYPTO_GHASH=m # CONFIG_CRYPTO_POLY1305 is not set # CONFIG_CRYPTO_MD4 is not set CONFIG_CRYPTO_MD5=y # CONFIG_CRYPTO_MICHAEL_MIC is not set -# CONFIG_CRYPTO_RMD128 is not set # CONFIG_CRYPTO_RMD160 is not set -# CONFIG_CRYPTO_RMD256 is not set -# CONFIG_CRYPTO_RMD320 is not set CONFIG_CRYPTO_SHA1=y -CONFIG_CRYPTO_LIB_SHA256=y CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=y # CONFIG_CRYPTO_SHA3 is not set # CONFIG_CRYPTO_SM3 is not set # CONFIG_CRYPTO_STREEBOG is not set -# CONFIG_CRYPTO_TGR192 is not set # CONFIG_CRYPTO_WP512 is not set # # Ciphers # -CONFIG_CRYPTO_LIB_AES=m CONFIG_CRYPTO_AES=m # CONFIG_CRYPTO_AES_TI is not set # CONFIG_CRYPTO_ANUBIS is not set -CONFIG_CRYPTO_LIB_ARC4=m # CONFIG_CRYPTO_ARC4 is not set # CONFIG_CRYPTO_BLOWFISH is not set # CONFIG_CRYPTO_CAMELLIA is not set # CONFIG_CRYPTO_CAST5 is not set # CONFIG_CRYPTO_CAST6 is not set -CONFIG_CRYPTO_LIB_DES=m CONFIG_CRYPTO_DES=m # CONFIG_CRYPTO_FCRYPT is not set # CONFIG_CRYPTO_KHAZAD is not set -# CONFIG_CRYPTO_SALSA20 is not set CONFIG_CRYPTO_CHACHA20=m # CONFIG_CRYPTO_SEED is not set # CONFIG_CRYPTO_SERPENT is not set @@ -7169,10 +7928,18 @@ CONFIG_CRYPTO_USER_API=m CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m +# CONFIG_CRYPTO_USER_API_RNG_CAVP is not set CONFIG_CRYPTO_USER_API_AEAD=m +CONFIG_CRYPTO_USER_API_ENABLE_OBSOLETE=y # CONFIG_CRYPTO_STATS is not set CONFIG_CRYPTO_HASH_INFO=y CONFIG_CRYPTO_HW=y +CONFIG_CRYPTO_DEV_ALLWINNER=y +CONFIG_CRYPTO_DEV_SUN4I_SS=m +# CONFIG_CRYPTO_DEV_SUN4I_SS_PRNG is not set +# CONFIG_CRYPTO_DEV_SUN4I_SS_DEBUG is not set +# CONFIG_CRYPTO_DEV_SUN8I_CE is not set +# CONFIG_CRYPTO_DEV_SUN8I_SS is not set # CONFIG_CRYPTO_DEV_FSL_CAAM is not set # CONFIG_CRYPTO_DEV_OMAP is not set # CONFIG_CRYPTO_DEV_SAHARA is not set @@ -7181,12 +7948,11 @@ CONFIG_CRYPTO_DEV_S5P=m # CONFIG_CRYPTO_DEV_ATMEL_ECC is not set # CONFIG_CRYPTO_DEV_ATMEL_SHA204A is not set # CONFIG_CRYPTO_DEV_MXS_DCP is not set -CONFIG_CRYPTO_DEV_SUN4I_SS=m -# CONFIG_CRYPTO_DEV_SUN4I_SS_PRNG is not set CONFIG_CRYPTO_DEV_ROCKCHIP=m CONFIG_CRYPTO_DEV_VIRTIO=m # CONFIG_CRYPTO_DEV_SAFEXCEL is not set # CONFIG_CRYPTO_DEV_CCREE is not set +# CONFIG_CRYPTO_DEV_AMLOGIC_GXL is not set CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y CONFIG_X509_CERTIFICATE_PARSER=y @@ -7199,6 +7965,8 @@ CONFIG_PKCS7_MESSAGE_PARSER=y # Certificates for signature checking # CONFIG_MODULE_SIG_KEY="certs/signing_key.pem" +CONFIG_MODULE_SIG_KEY_TYPE_RSA=y +# CONFIG_MODULE_SIG_KEY_TYPE_ECDSA is not set CONFIG_SYSTEM_TRUSTED_KEYRING=y CONFIG_SYSTEM_TRUSTED_KEYS="" # CONFIG_SYSTEM_EXTRA_CERTIFICATE is not set @@ -7213,6 +7981,7 @@ CONFIG_BINARY_PRINTF=y # CONFIG_RAID6_PQ=m CONFIG_RAID6_PQ_BENCHMARK=y +CONFIG_LINEAR_RANGES=y # CONFIG_PACKING is not set CONFIG_BITREVERSE=y CONFIG_HAVE_ARCH_BITREVERSE=y @@ -7220,10 +7989,33 @@ CONFIG_GENERIC_STRNCPY_FROM_USER=y CONFIG_GENERIC_STRNLEN_USER=y CONFIG_GENERIC_NET_UTILS=y # CONFIG_CORDIC is not set +# CONFIG_PRIME_NUMBERS is not set CONFIG_RATIONAL=y CONFIG_GENERIC_PCI_IOMAP=y CONFIG_STMP_DEVICE=y CONFIG_ARCH_USE_CMPXCHG_LOCKREF=y + +# +# Crypto library routines +# +CONFIG_CRYPTO_LIB_AES=m +CONFIG_CRYPTO_LIB_ARC4=m +CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S=y +CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA=m +CONFIG_CRYPTO_LIB_CHACHA_GENERIC=m +CONFIG_CRYPTO_LIB_CHACHA=m +CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CURVE25519_GENERIC=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_DES=m +CONFIG_CRYPTO_LIB_POLY1305_RSIZE=9 +CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305=m +CONFIG_CRYPTO_LIB_POLY1305=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m +CONFIG_CRYPTO_LIB_SHA256=y +# end of Crypto library routines + +CONFIG_LIB_MEMNEQ=y CONFIG_CRC_CCITT=m CONFIG_CRC16=y CONFIG_CRC_T10DIF=y @@ -7263,6 +8055,7 @@ CONFIG_DECOMPRESS_LZMA=y CONFIG_DECOMPRESS_XZ=y CONFIG_DECOMPRESS_LZO=y CONFIG_DECOMPRESS_LZ4=y +CONFIG_DECOMPRESS_ZSTD=y CONFIG_GENERIC_ALLOCATOR=y CONFIG_REED_SOLOMON=y CONFIG_REED_SOLOMON_ENC8=y @@ -7271,10 +8064,12 @@ CONFIG_TEXTSEARCH=y CONFIG_TEXTSEARCH_KMP=m CONFIG_TEXTSEARCH_BM=m CONFIG_TEXTSEARCH_FSM=m +CONFIG_INTERVAL_TREE=y CONFIG_XARRAY_MULTI=y CONFIG_ASSOCIATIVE_ARRAY=y CONFIG_HAS_IOMEM=y CONFIG_HAS_DMA=y +CONFIG_DMA_OPS=y CONFIG_NEED_SG_DMA_LENGTH=y CONFIG_NEED_DMA_MAP_STATE=y CONFIG_ARCH_DMA_ADDR_T_64BIT=y @@ -7283,10 +8078,12 @@ CONFIG_ARCH_HAS_SETUP_DMA_OPS=y CONFIG_ARCH_HAS_TEARDOWN_DMA_OPS=y CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE=y CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU=y -CONFIG_ARCH_HAS_DMA_COHERENT_TO_PFN=y CONFIG_SWIOTLB=y +# CONFIG_DMA_RESTRICTED_POOL is not set +CONFIG_DMA_NONCOHERENT_MMAP=y CONFIG_DMA_REMAP=y CONFIG_DMA_CMA=y +# CONFIG_DMA_PERNUMA_CMA is not set # # Default contiguous memory area size: @@ -7298,6 +8095,7 @@ CONFIG_CMA_SIZE_SEL_MBYTES=y # CONFIG_CMA_SIZE_SEL_MAX is not set CONFIG_CMA_ALIGNMENT=8 # CONFIG_DMA_API_DEBUG is not set +# CONFIG_DMA_MAP_BENCHMARK is not set CONFIG_SGL_ALLOC=y CONFIG_CPU_RMAP=y CONFIG_DQL=y @@ -7321,9 +8119,10 @@ CONFIG_FONT_8x16=y CONFIG_SG_SPLIT=y CONFIG_SG_POOL=y CONFIG_SBITMAP=y -# CONFIG_STRING_SELFTEST is not set # end of Library routines +CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED=y + # # Kernel hacking # @@ -7333,32 +8132,48 @@ CONFIG_SBITMAP=y # CONFIG_PRINTK_TIME=y # CONFIG_PRINTK_CALLER is not set +# CONFIG_STACKTRACE_BUILD_ID is not set CONFIG_CONSOLE_LOGLEVEL_DEFAULT=7 CONFIG_CONSOLE_LOGLEVEL_QUIET=4 CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4 CONFIG_BOOT_PRINTK_DELAY=y # CONFIG_DYNAMIC_DEBUG is not set +# CONFIG_DYNAMIC_DEBUG_CORE is not set +CONFIG_SYMBOLIC_ERRNAME=y +CONFIG_DEBUG_BUGVERBOSE=y # end of printk and dmesg options +CONFIG_AS_HAS_NON_CONST_LEB128=y + # # Compile-time checks and compiler options # # CONFIG_DEBUG_INFO is not set -CONFIG_ENABLE_MUST_CHECK=y CONFIG_FRAME_WARN=1024 # CONFIG_STRIP_ASM_SYMS is not set # CONFIG_READABLE_ASM is not set -CONFIG_DEBUG_FS=y # CONFIG_HEADERS_INSTALL is not set -CONFIG_OPTIMIZE_INLINING=y # CONFIG_DEBUG_SECTION_MISMATCH is not set CONFIG_SECTION_MISMATCH_WARN_ONLY=y +# CONFIG_VMLINUX_MAP is not set # CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set # end of Compile-time checks and compiler options +# +# Generic Kernel Debugging Instruments +# CONFIG_MAGIC_SYSRQ=y CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x1 # CONFIG_MAGIC_SYSRQ_SERIAL is not set +CONFIG_DEBUG_FS=y +CONFIG_DEBUG_FS_ALLOW_ALL=y +# CONFIG_DEBUG_FS_DISALLOW_MOUNT is not set +# CONFIG_DEBUG_FS_ALLOW_NONE is not set +CONFIG_HAVE_ARCH_KGDB=y +# CONFIG_KGDB is not set +# CONFIG_UBSAN is not set +# end of Generic Kernel Debugging Instruments + CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_MISC=y @@ -7370,41 +8185,48 @@ CONFIG_DEBUG_MISC=y # CONFIG_PAGE_POISONING is not set # CONFIG_DEBUG_PAGE_REF is not set # CONFIG_DEBUG_RODATA_TEST is not set +# CONFIG_DEBUG_WX is not set # CONFIG_DEBUG_OBJECTS is not set # CONFIG_SLUB_STATS is not set CONFIG_HAVE_DEBUG_KMEMLEAK=y # CONFIG_DEBUG_KMEMLEAK is not set # CONFIG_DEBUG_STACK_USAGE is not set +CONFIG_SCHED_STACK_END_CHECK=y # CONFIG_DEBUG_VM is not set CONFIG_ARCH_HAS_DEBUG_VIRTUAL=y # CONFIG_DEBUG_VIRTUAL is not set CONFIG_DEBUG_MEMORY_INIT=y # CONFIG_DEBUG_PER_CPU_MAPS is not set +# CONFIG_DEBUG_KMAP_LOCAL is not set # CONFIG_DEBUG_HIGHMEM is not set +CONFIG_HAVE_ARCH_KASAN=y CONFIG_CC_HAS_KASAN_GENERIC=y -CONFIG_KASAN_STACK=1 +CONFIG_CC_HAS_WORKING_NOSANITIZE_ADDRESS=y +# CONFIG_KASAN is not set # end of Memory Debugging -CONFIG_ARCH_HAS_KCOV=y -CONFIG_CC_HAS_SANCOV_TRACE_PC=y -# CONFIG_KCOV is not set # CONFIG_DEBUG_SHIRQ is not set # -# Debug Lockups and Hangs +# Debug Oops, Lockups and Hangs # +# CONFIG_PANIC_ON_OOPS is not set +CONFIG_PANIC_ON_OOPS_VALUE=0 +CONFIG_PANIC_TIMEOUT=120 # CONFIG_SOFTLOCKUP_DETECTOR is not set # CONFIG_DETECT_HUNG_TASK is not set # CONFIG_WQ_WATCHDOG is not set -# end of Debug Lockups and Hangs +# CONFIG_TEST_LOCKUP is not set +# end of Debug Oops, Lockups and Hangs -# CONFIG_PANIC_ON_OOPS is not set -CONFIG_PANIC_ON_OOPS_VALUE=0 -CONFIG_PANIC_TIMEOUT=120 +# +# Scheduler Debugging +# # CONFIG_SCHED_DEBUG is not set CONFIG_SCHED_INFO=y # CONFIG_SCHEDSTATS is not set -CONFIG_SCHED_STACK_END_CHECK=y +# end of Scheduler Debugging + # CONFIG_DEBUG_TIMEKEEPING is not set # @@ -7423,33 +8245,39 @@ CONFIG_LOCK_DEBUGGING_SUPPORT=y # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set # CONFIG_LOCK_TORTURE_TEST is not set # CONFIG_WW_MUTEX_SELFTEST is not set +# CONFIG_SCF_TORTURE_TEST is not set # end of Lock Debugging (spinlocks, mutexes, etc...) +# CONFIG_DEBUG_IRQFLAGS is not set CONFIG_STACKTRACE=y # CONFIG_WARN_ALL_UNSEEDED_RANDOM is not set # CONFIG_DEBUG_KOBJECT is not set -CONFIG_DEBUG_BUGVERBOSE=y + +# +# Debug kernel data structures +# # CONFIG_DEBUG_LIST is not set # CONFIG_DEBUG_PLIST is not set # CONFIG_DEBUG_SG is not set # CONFIG_DEBUG_NOTIFIERS is not set +# CONFIG_BUG_ON_DATA_CORRUPTION is not set +# end of Debug kernel data structures + # CONFIG_DEBUG_CREDENTIALS is not set # # RCU Debugging # -# CONFIG_RCU_PERF_TEST is not set +# CONFIG_RCU_SCALE_TEST is not set # CONFIG_RCU_TORTURE_TEST is not set +# CONFIG_RCU_REF_SCALE_TEST is not set CONFIG_RCU_CPU_STALL_TIMEOUT=21 CONFIG_RCU_TRACE=y # CONFIG_RCU_EQS_DEBUG is not set # end of RCU Debugging # CONFIG_DEBUG_WQ_FORCE_RR_CPU is not set -# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set # CONFIG_CPU_HOTPLUG_STATE_CONTROL is not set -# CONFIG_NOTIFIER_ERROR_INJECTION is not set -# CONFIG_FAULT_INJECTION is not set # CONFIG_LATENCYTOP is not set CONFIG_NOP_TRACER=y CONFIG_HAVE_FUNCTION_TRACER=y @@ -7462,36 +8290,70 @@ CONFIG_TRACE_CLOCK=y CONFIG_RING_BUFFER=y CONFIG_EVENT_TRACING=y CONFIG_CONTEXT_SWITCH_TRACER=y -CONFIG_RING_BUFFER_ALLOW_SWAP=y CONFIG_TRACING=y CONFIG_TRACING_SUPPORT=y CONFIG_FTRACE=y +# CONFIG_BOOTTIME_TRACING is not set # CONFIG_FUNCTION_TRACER is not set -# CONFIG_PREEMPTIRQ_EVENTS is not set +# CONFIG_STACK_TRACER is not set # CONFIG_IRQSOFF_TRACER is not set # CONFIG_SCHED_TRACER is not set # CONFIG_HWLAT_TRACER is not set +# CONFIG_OSNOISE_TRACER is not set +# CONFIG_TIMERLAT_TRACER is not set # CONFIG_ENABLE_DEFAULT_TRACERS is not set # CONFIG_FTRACE_SYSCALLS is not set # CONFIG_TRACER_SNAPSHOT is not set CONFIG_BRANCH_PROFILE_NONE=y # CONFIG_PROFILE_ANNOTATED_BRANCHES is not set -# CONFIG_STACK_TRACER is not set # CONFIG_BLK_DEV_IO_TRACE is not set CONFIG_KPROBE_EVENTS=y CONFIG_UPROBE_EVENTS=y CONFIG_BPF_EVENTS=y CONFIG_DYNAMIC_EVENTS=y CONFIG_PROBE_EVENTS=y +# CONFIG_SYNTH_EVENTS is not set +# CONFIG_HIST_TRIGGERS is not set +# CONFIG_TRACE_EVENT_INJECT is not set # CONFIG_TRACEPOINT_BENCHMARK is not set # CONFIG_RING_BUFFER_BENCHMARK is not set +# CONFIG_TRACE_EVAL_MAP_FILE is not set # CONFIG_RING_BUFFER_STARTUP_TEST is not set +# CONFIG_RING_BUFFER_VALIDATE_TIME_DELTAS is not set # CONFIG_PREEMPTIRQ_DELAY_TEST is not set -# CONFIG_TRACE_EVAL_MAP_FILE is not set +# CONFIG_KPROBE_EVENT_GEN_TEST is not set +# CONFIG_SAMPLES is not set +CONFIG_STRICT_DEVMEM=y +CONFIG_IO_STRICT_DEVMEM=y + +# +# arm Debugging +# +# CONFIG_ARM_PTDUMP_DEBUGFS is not set +CONFIG_UNWINDER_ARM=y +CONFIG_ARM_UNWIND=y +# CONFIG_DEBUG_USER is not set +# CONFIG_DEBUG_LL is not set +CONFIG_DEBUG_LL_INCLUDE="mach/debug-macro.S" +CONFIG_UNCOMPRESS_INCLUDE="debug/uncompress.h" +# CONFIG_ARM_KPROBES_TEST is not set +# CONFIG_PID_IN_CONTEXTIDR is not set +# CONFIG_CORESIGHT is not set +# end of arm Debugging + +# +# Kernel Testing and Coverage +# +# CONFIG_KUNIT is not set +# CONFIG_NOTIFIER_ERROR_INJECTION is not set +# CONFIG_FAULT_INJECTION is not set +CONFIG_ARCH_HAS_KCOV=y +CONFIG_CC_HAS_SANCOV_TRACE_PC=y +# CONFIG_KCOV is not set CONFIG_RUNTIME_TESTING_MENU=y # CONFIG_LKDTM is not set -# CONFIG_TEST_LIST_SORT is not set -# CONFIG_TEST_SORT is not set +# CONFIG_TEST_MIN_HEAP is not set +# CONFIG_TEST_DIV64 is not set # CONFIG_KPROBES_SANITY_TEST is not set # CONFIG_BACKTRACE_SELF_TEST is not set # CONFIG_RBTREE_TEST is not set @@ -7501,12 +8363,13 @@ CONFIG_RUNTIME_TESTING_MENU=y # CONFIG_ATOMIC64_SELFTEST is not set # CONFIG_ASYNC_RAID6_TEST is not set # CONFIG_TEST_HEXDUMP is not set +# CONFIG_STRING_SELFTEST is not set # CONFIG_TEST_STRING_HELPERS is not set # CONFIG_TEST_STRSCPY is not set # CONFIG_TEST_KSTRTOX is not set # CONFIG_TEST_PRINTF is not set +# CONFIG_TEST_SCANF is not set # CONFIG_TEST_BITMAP is not set -# CONFIG_TEST_BITFIELD is not set # CONFIG_TEST_UUID is not set # CONFIG_TEST_XARRAY is not set # CONFIG_TEST_OVERFLOW is not set @@ -7514,6 +8377,7 @@ CONFIG_RUNTIME_TESTING_MENU=y # CONFIG_TEST_HASH is not set # CONFIG_TEST_IDA is not set # CONFIG_TEST_LKM is not set +# CONFIG_TEST_BITOPS is not set # CONFIG_TEST_VMALLOC is not set # CONFIG_TEST_USER_COPY is not set # CONFIG_TEST_BPF is not set @@ -7527,25 +8391,8 @@ CONFIG_RUNTIME_TESTING_MENU=y # CONFIG_TEST_MEMCAT_P is not set # CONFIG_TEST_STACKINIT is not set # CONFIG_TEST_MEMINIT is not set +# CONFIG_TEST_FREE_PAGES is not set +CONFIG_ARCH_USE_MEMTEST=y # CONFIG_MEMTEST is not set -# CONFIG_BUG_ON_DATA_CORRUPTION is not set -# CONFIG_SAMPLES is not set -CONFIG_HAVE_ARCH_KGDB=y -# CONFIG_KGDB is not set -# CONFIG_UBSAN is not set -CONFIG_UBSAN_ALIGNMENT=y -CONFIG_ARCH_HAS_DEVMEM_IS_ALLOWED=y -CONFIG_STRICT_DEVMEM=y -CONFIG_IO_STRICT_DEVMEM=y -# CONFIG_ARM_PTDUMP_DEBUGFS is not set -# CONFIG_DEBUG_WX is not set -CONFIG_UNWINDER_ARM=y -CONFIG_ARM_UNWIND=y -# CONFIG_DEBUG_USER is not set -# CONFIG_DEBUG_LL is not set -CONFIG_DEBUG_LL_INCLUDE="mach/debug-macro.S" -CONFIG_UNCOMPRESS_INCLUDE="debug/uncompress.h" -# CONFIG_ARM_KPROBES_TEST is not set -# CONFIG_PID_IN_CONTEXTIDR is not set -# CONFIG_CORESIGHT is not set +# end of Kernel Testing and Coverage # end of Kernel hacking diff --git a/system/easy-kernel/config-m68k b/system/easy-kernel/config-m68k index 3b167e8de7..be69ff9011 100644 --- a/system/easy-kernel/config-m68k +++ b/system/easy-kernel/config-m68k @@ -1,21 +1,20 @@ # # Automatically generated file; DO NOT EDIT. -# Linux/m68k 5.15.28-mc1 Kernel Configuration +# Linux/m68k 5.15.98-mc4 Kernel Configuration # -CONFIG_CC_VERSION_TEXT="gcc (Adelie 8.3.0) 8.3.0" +CONFIG_CC_VERSION_TEXT="gcc (Adelie 8.5.0) 8.5.0" CONFIG_CC_IS_GCC=y -CONFIG_GCC_VERSION=80300 +CONFIG_GCC_VERSION=80500 CONFIG_CLANG_VERSION=0 CONFIG_AS_IS_GNU=y CONFIG_AS_VERSION=23200 CONFIG_LD_IS_BFD=y CONFIG_LD_VERSION=23200 CONFIG_LLD_VERSION=0 -CONFIG_CC_CAN_LINK=y -CONFIG_CC_CAN_LINK_STATIC=y CONFIG_CC_HAS_ASM_GOTO=y CONFIG_CC_HAS_ASM_INLINE=y CONFIG_CC_HAS_NO_PROFILE_FN_ATTR=y +CONFIG_PAHOLE_VERSION=0 CONFIG_IRQ_WORK=y # @@ -96,7 +95,6 @@ CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=12 # # Scheduler features # -# CONFIG_SCHED_ALT is not set # end of Scheduler features CONFIG_CGROUPS=y @@ -536,6 +534,7 @@ CONFIG_INET_ESP=m CONFIG_INET_ESP_OFFLOAD=m # CONFIG_INET_ESPINTCP is not set CONFIG_INET_IPCOMP=m +CONFIG_INET_TABLE_PERTURB_ORDER=16 CONFIG_INET_XFRM_TUNNEL=m CONFIG_INET_TUNNEL=m CONFIG_INET_DIAG=m @@ -1634,7 +1633,6 @@ CONFIG_NET_VENDOR_I825XX=y CONFIG_BVME6000_NET=y CONFIG_MVME16x_NET=y CONFIG_NET_VENDOR_INTEL=y -CONFIG_NET_VENDOR_MICROSOFT=y CONFIG_NET_VENDOR_LITEX=y # CONFIG_NET_VENDOR_MARVELL is not set CONFIG_NET_VENDOR_MELLANOX=y @@ -1643,11 +1641,12 @@ CONFIG_NET_VENDOR_MELLANOX=y # CONFIG_NET_VENDOR_MICREL is not set CONFIG_NET_VENDOR_MICROCHIP=y CONFIG_NET_VENDOR_MICROSEMI=y +CONFIG_NET_VENDOR_MICROSOFT=y +CONFIG_NET_VENDOR_NI=y +# CONFIG_NI_XGE_MANAGEMENT_ENET is not set CONFIG_NET_VENDOR_NATSEMI=y CONFIG_MACSONIC=y # CONFIG_NET_VENDOR_NETRONOME is not set -CONFIG_NET_VENDOR_NI=y -# CONFIG_NI_XGE_MANAGEMENT_ENET is not set CONFIG_NET_VENDOR_8390=y # CONFIG_PCMCIA_AXNET is not set # CONFIG_XSURF100 is not set @@ -2015,57 +2014,6 @@ CONFIG_NVRAM=y CONFIG_DEVPORT=y # CONFIG_TCG_TPM is not set # CONFIG_RANDOM_TRUST_BOOTLOADER is not set -CONFIG_LRNG=y - -# -# Specific DRNG seeding strategies -# -# CONFIG_LRNG_OVERSAMPLE_ENTROPY_SOURCES is not set -CONFIG_LRNG_OVERSAMPLE_ES_BITS=0 -CONFIG_LRNG_SEED_BUFFER_INIT_ADD_BITS=0 -# end of Specific DRNG seeding strategies - -# -# Entropy Source Configuration -# - -# -# Interrupt Entropy Source -# -CONFIG_LRNG_IRQ=y -CONFIG_LRNG_CONTINUOUS_COMPRESSION_ENABLED=y -# CONFIG_LRNG_CONTINUOUS_COMPRESSION_DISABLED is not set -CONFIG_LRNG_ENABLE_CONTINUOUS_COMPRESSION=y -# CONFIG_LRNG_SWITCHABLE_CONTINUOUS_COMPRESSION is not set -# CONFIG_LRNG_COLLECTION_SIZE_32 is not set -# CONFIG_LRNG_COLLECTION_SIZE_256 is not set -# CONFIG_LRNG_COLLECTION_SIZE_512 is not set -CONFIG_LRNG_COLLECTION_SIZE_1024=y -# CONFIG_LRNG_COLLECTION_SIZE_2048 is not set -# CONFIG_LRNG_COLLECTION_SIZE_4096 is not set -# CONFIG_LRNG_COLLECTION_SIZE_8192 is not set -CONFIG_LRNG_COLLECTION_SIZE=1024 -# CONFIG_LRNG_HEALTH_TESTS is not set -CONFIG_LRNG_RCT_CUTOFF=31 -CONFIG_LRNG_APT_CUTOFF=325 -CONFIG_LRNG_IRQ_ENTROPY_RATE=256 - -# -# Jitter RNG Entropy Source -# -# CONFIG_LRNG_JENT is not set - -# -# CPU Entropy Source -# -CONFIG_LRNG_CPU=y -CONFIG_LRNG_CPU_FULL_ENT_MULTIPLIER=1 -CONFIG_LRNG_CPU_ENTROPY_RATE=8 -# end of Entropy Source Configuration - -# CONFIG_LRNG_DRNG_SWITCH is not set -# CONFIG_LRNG_TESTING_MENU is not set -# CONFIG_LRNG_SELFTEST is not set # end of Character devices # @@ -3108,7 +3056,6 @@ CONFIG_CRYPTO_CRC32C=y CONFIG_CRYPTO_CRC32=m CONFIG_CRYPTO_XXHASH=m CONFIG_CRYPTO_BLAKE2B=m -# CONFIG_CRYPTO_BLAKE2S is not set CONFIG_CRYPTO_CRCT10DIF=y CONFIG_CRYPTO_GHASH=m CONFIG_CRYPTO_POLY1305=m @@ -3177,24 +3124,6 @@ CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_USER_API_ENABLE_OBSOLETE=y # CONFIG_CRYPTO_STATS is not set CONFIG_CRYPTO_HASH_INFO=y - -# -# Crypto library routines -# -CONFIG_CRYPTO_LIB_AES=y -CONFIG_CRYPTO_LIB_ARC4=m -CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC=m -CONFIG_CRYPTO_LIB_BLAKE2S=m -CONFIG_CRYPTO_LIB_CHACHA_GENERIC=m -CONFIG_CRYPTO_LIB_CHACHA=m -CONFIG_CRYPTO_LIB_CURVE25519_GENERIC=m -CONFIG_CRYPTO_LIB_CURVE25519=m -CONFIG_CRYPTO_LIB_DES=m -CONFIG_CRYPTO_LIB_POLY1305_RSIZE=1 -CONFIG_CRYPTO_LIB_POLY1305_GENERIC=m -CONFIG_CRYPTO_LIB_POLY1305=m -CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m -CONFIG_CRYPTO_LIB_SHA256=y # CONFIG_CRYPTO_HW is not set CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y @@ -3233,6 +3162,26 @@ CONFIG_GENERIC_NET_UTILS=y # CONFIG_PRIME_NUMBERS is not set CONFIG_GENERIC_PCI_IOMAP=y CONFIG_GENERIC_IOMAP=y + +# +# Crypto library routines +# +CONFIG_CRYPTO_LIB_AES=y +CONFIG_CRYPTO_LIB_ARC4=m +CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC=y +CONFIG_CRYPTO_LIB_CHACHA_GENERIC=m +CONFIG_CRYPTO_LIB_CHACHA=m +CONFIG_CRYPTO_LIB_CURVE25519_GENERIC=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_DES=m +CONFIG_CRYPTO_LIB_POLY1305_RSIZE=1 +CONFIG_CRYPTO_LIB_POLY1305_GENERIC=m +CONFIG_CRYPTO_LIB_POLY1305=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m +CONFIG_CRYPTO_LIB_SHA256=y +# end of Crypto library routines + +CONFIG_LIB_MEMNEQ=y CONFIG_CRC_CCITT=m CONFIG_CRC16=y CONFIG_CRC_T10DIF=y @@ -3333,6 +3282,8 @@ CONFIG_SYMBOLIC_ERRNAME=y CONFIG_DEBUG_BUGVERBOSE=y # end of printk and dmesg options +CONFIG_AS_HAS_NON_CONST_LEB128=y + # # Compile-time checks and compiler options # @@ -3343,7 +3294,6 @@ CONFIG_FRAME_WARN=1024 # CONFIG_HEADERS_INSTALL is not set # CONFIG_DEBUG_SECTION_MISMATCH is not set CONFIG_SECTION_MISMATCH_WARN_ONLY=y -# CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B is not set # CONFIG_FRAME_POINTER is not set # CONFIG_VMLINUX_MAP is not set # CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set diff --git a/system/easy-kernel/config-pmmx b/system/easy-kernel/config-pmmx index 1123ce9e33..5e9377d9a0 100644 --- a/system/easy-kernel/config-pmmx +++ b/system/easy-kernel/config-pmmx @@ -1,21 +1,20 @@ # # Automatically generated file; DO NOT EDIT. -# Linux/x86 5.15.28-mc1 Kernel Configuration +# Linux/x86 5.15.98-mc4 Kernel Configuration # -CONFIG_CC_VERSION_TEXT="gcc (Adelie 8.3.0) 8.3.0" +CONFIG_CC_VERSION_TEXT="gcc (Adelie 8.5.0) 8.5.0" CONFIG_CC_IS_GCC=y -CONFIG_GCC_VERSION=80300 +CONFIG_GCC_VERSION=80500 CONFIG_CLANG_VERSION=0 CONFIG_AS_IS_GNU=y CONFIG_AS_VERSION=23200 CONFIG_LD_IS_BFD=y CONFIG_LD_VERSION=23200 CONFIG_LLD_VERSION=0 -CONFIG_CC_CAN_LINK=y -CONFIG_CC_CAN_LINK_STATIC=y CONFIG_CC_HAS_ASM_GOTO=y CONFIG_CC_HAS_ASM_INLINE=y CONFIG_CC_HAS_NO_PROFILE_FN_ATTR=y +CONFIG_PAHOLE_VERSION=0 CONFIG_IRQ_WORK=y CONFIG_BUILDTIME_TABLE_SORT=y CONFIG_THREAD_INFO_IN_TASK=y @@ -162,7 +161,6 @@ CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y # Scheduler features # # CONFIG_UCLAMP_TASK is not set -# CONFIG_SCHED_ALT is not set # end of Scheduler features CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH=y @@ -313,7 +311,6 @@ CONFIG_SMP=y CONFIG_X86_FEATURE_NAMES=y CONFIG_X86_MPPARSE=y # CONFIG_GOLDFISH is not set -CONFIG_RETPOLINE=y # CONFIG_X86_CPU_RESCTRL is not set CONFIG_X86_BIGSMP=y CONFIG_X86_EXTENDED_PLATFORM=y @@ -508,6 +505,8 @@ CONFIG_HOTPLUG_CPU=y CONFIG_MODIFY_LDT_SYSCALL=y # end of Processor type and features +CONFIG_SPECULATION_MITIGATIONS=y +CONFIG_RETPOLINE=y CONFIG_ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE=y # @@ -737,6 +736,7 @@ CONFIG_HAVE_KPROBES_ON_FTRACE=y CONFIG_HAVE_FUNCTION_ERROR_INJECTION=y CONFIG_HAVE_NMI=y CONFIG_TRACE_IRQFLAGS_SUPPORT=y +CONFIG_TRACE_IRQFLAGS_NMI_SUPPORT=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_DMA_CONTIGUOUS=y CONFIG_GENERIC_SMP_IDLE_THREAD=y @@ -1072,6 +1072,7 @@ CONFIG_INET_ESP=m CONFIG_INET_ESP_OFFLOAD=m # CONFIG_INET_ESPINTCP is not set CONFIG_INET_IPCOMP=m +CONFIG_INET_TABLE_PERTURB_ORDER=16 CONFIG_INET_XFRM_TUNNEL=m CONFIG_INET_TUNNEL=m CONFIG_INET_DIAG=m @@ -2078,6 +2079,7 @@ CONFIG_PNPACPI=y CONFIG_BLK_DEV=y # CONFIG_BLK_DEV_NULL_BLK is not set CONFIG_BLK_DEV_FD=y +# CONFIG_BLK_DEV_FD_RAWCMD is not set CONFIG_CDROM=y CONFIG_PARIDE=m @@ -2594,6 +2596,7 @@ CONFIG_ATL1=m CONFIG_ATL1E=m CONFIG_ATL1C=m CONFIG_ALX=m +CONFIG_CX_ECAT=m CONFIG_NET_VENDOR_BROADCOM=y CONFIG_B44=m CONFIG_B44_PCI_AUTOSELECT=y @@ -2611,8 +2614,6 @@ CONFIG_BNXT=m CONFIG_BNXT_SRIOV=y CONFIG_BNXT_FLOWER_OFFLOAD=y CONFIG_BNXT_HWMON=y -CONFIG_NET_VENDOR_BROCADE=y -CONFIG_BNA=m CONFIG_NET_VENDOR_CADENCE=y CONFIG_MACB=m CONFIG_MACB_USE_HWSTAMP=y @@ -2633,7 +2634,6 @@ CONFIG_NET_VENDOR_CISCO=y CONFIG_ENIC=m CONFIG_NET_VENDOR_CORTINA=y # CONFIG_GEMINI_ETHERNET is not set -CONFIG_CX_ECAT=m CONFIG_DNET=m CONFIG_NET_VENDOR_DEC=y CONFIG_NET_TULIP=y @@ -2688,7 +2688,6 @@ CONFIG_I40EVF=m # CONFIG_ICE is not set CONFIG_FM10K=m # CONFIG_IGC is not set -CONFIG_NET_VENDOR_MICROSOFT=y CONFIG_JME=m CONFIG_NET_VENDOR_LITEX=y # CONFIG_LITEX_LITEETH is not set @@ -2720,9 +2719,12 @@ CONFIG_KSZ884X_PCI=m CONFIG_NET_VENDOR_MICROCHIP=y # CONFIG_LAN743X is not set CONFIG_NET_VENDOR_MICROSEMI=y +CONFIG_NET_VENDOR_MICROSOFT=y CONFIG_NET_VENDOR_MYRI=y CONFIG_MYRI10GE=m CONFIG_FEALNX=m +CONFIG_NET_VENDOR_NI=y +# CONFIG_NI_XGE_MANAGEMENT_ENET is not set CONFIG_NET_VENDOR_NATSEMI=y CONFIG_NATSEMI=m CONFIG_NS83820=m @@ -2733,8 +2735,6 @@ CONFIG_VXGE=m CONFIG_NET_VENDOR_NETRONOME=y CONFIG_NFP=m # CONFIG_NFP_DEBUG is not set -CONFIG_NET_VENDOR_NI=y -# CONFIG_NI_XGE_MANAGEMENT_ENET is not set CONFIG_NET_VENDOR_8390=y CONFIG_PCMCIA_AXNET=m CONFIG_NE2000=m @@ -2763,6 +2763,8 @@ CONFIG_QED_SRIOV=y CONFIG_QEDE=m CONFIG_QED_ISCSI=y CONFIG_QED_OOO=y +CONFIG_NET_VENDOR_BROCADE=y +CONFIG_BNA=m CONFIG_NET_VENDOR_QUALCOMM=y # CONFIG_QCA7000_UART is not set CONFIG_QCOM_EMAC=m @@ -2783,6 +2785,11 @@ CONFIG_NET_VENDOR_ROCKER=y CONFIG_NET_VENDOR_SAMSUNG=y CONFIG_SXGBE_ETH=m CONFIG_NET_VENDOR_SEEQ=y +CONFIG_NET_VENDOR_SILAN=y +CONFIG_SC92031=m +CONFIG_NET_VENDOR_SIS=y +CONFIG_SIS900=m +CONFIG_SIS190=m CONFIG_NET_VENDOR_SOLARFLARE=y CONFIG_SFC=m CONFIG_SFC_MTD=y @@ -2791,11 +2798,6 @@ CONFIG_SFC_SRIOV=y CONFIG_SFC_MCDI_LOGGING=y CONFIG_SFC_FALCON=m CONFIG_SFC_FALCON_MTD=y -CONFIG_NET_VENDOR_SILAN=y -CONFIG_SC92031=m -CONFIG_NET_VENDOR_SIS=y -CONFIG_SIS900=m -CONFIG_SIS190=m CONFIG_NET_VENDOR_SMSC=y # CONFIG_SMC9194 is not set CONFIG_PCMCIA_SMC91C92=m @@ -2985,7 +2987,7 @@ CONFIG_USB_SIERRA_NET=m CONFIG_USB_VL600=m CONFIG_USB_NET_CH9200=m # CONFIG_USB_NET_AQC111 is not set -# CONFIG_USB_RTL8153_ECM is not set +CONFIG_USB_RTL8153_ECM=m CONFIG_WLAN=y CONFIG_WLAN_VENDOR_ADMTEK=y CONFIG_ADM8211=m @@ -3700,57 +3702,6 @@ CONFIG_TCG_TIS_ST33ZP24_I2C=m # CONFIG_XILLYUSB is not set # CONFIG_RANDOM_TRUST_CPU is not set # CONFIG_RANDOM_TRUST_BOOTLOADER is not set -CONFIG_LRNG=y - -# -# Specific DRNG seeding strategies -# -# CONFIG_LRNG_OVERSAMPLE_ENTROPY_SOURCES is not set -CONFIG_LRNG_OVERSAMPLE_ES_BITS=0 -CONFIG_LRNG_SEED_BUFFER_INIT_ADD_BITS=0 -# end of Specific DRNG seeding strategies - -# -# Entropy Source Configuration -# - -# -# Interrupt Entropy Source -# -CONFIG_LRNG_IRQ=y -CONFIG_LRNG_CONTINUOUS_COMPRESSION_ENABLED=y -# CONFIG_LRNG_CONTINUOUS_COMPRESSION_DISABLED is not set -CONFIG_LRNG_ENABLE_CONTINUOUS_COMPRESSION=y -# CONFIG_LRNG_SWITCHABLE_CONTINUOUS_COMPRESSION is not set -# CONFIG_LRNG_COLLECTION_SIZE_32 is not set -# CONFIG_LRNG_COLLECTION_SIZE_256 is not set -# CONFIG_LRNG_COLLECTION_SIZE_512 is not set -CONFIG_LRNG_COLLECTION_SIZE_1024=y -# CONFIG_LRNG_COLLECTION_SIZE_2048 is not set -# CONFIG_LRNG_COLLECTION_SIZE_4096 is not set -# CONFIG_LRNG_COLLECTION_SIZE_8192 is not set -CONFIG_LRNG_COLLECTION_SIZE=1024 -# CONFIG_LRNG_HEALTH_TESTS is not set -CONFIG_LRNG_RCT_CUTOFF=31 -CONFIG_LRNG_APT_CUTOFF=325 -CONFIG_LRNG_IRQ_ENTROPY_RATE=256 - -# -# Jitter RNG Entropy Source -# -# CONFIG_LRNG_JENT is not set - -# -# CPU Entropy Source -# -CONFIG_LRNG_CPU=y -CONFIG_LRNG_CPU_FULL_ENT_MULTIPLIER=1 -CONFIG_LRNG_CPU_ENTROPY_RATE=8 -# end of Entropy Source Configuration - -# CONFIG_LRNG_DRNG_SWITCH is not set -# CONFIG_LRNG_TESTING_MENU is not set -# CONFIG_LRNG_SELFTEST is not set # end of Character devices # @@ -7417,7 +7368,6 @@ CONFIG_CRYPTO_CRC32=m # CONFIG_CRYPTO_CRC32_PCLMUL is not set CONFIG_CRYPTO_XXHASH=m CONFIG_CRYPTO_BLAKE2B=m -# CONFIG_CRYPTO_BLAKE2S is not set CONFIG_CRYPTO_CRCT10DIF=y CONFIG_CRYPTO_GHASH=m CONFIG_CRYPTO_POLY1305=m @@ -7485,24 +7435,6 @@ CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_USER_API_ENABLE_OBSOLETE=y CONFIG_CRYPTO_HASH_INFO=y - -# -# Crypto library routines -# -CONFIG_CRYPTO_LIB_AES=y -CONFIG_CRYPTO_LIB_ARC4=m -CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC=m -CONFIG_CRYPTO_LIB_BLAKE2S=m -CONFIG_CRYPTO_LIB_CHACHA_GENERIC=m -CONFIG_CRYPTO_LIB_CHACHA=m -CONFIG_CRYPTO_LIB_CURVE25519_GENERIC=m -CONFIG_CRYPTO_LIB_CURVE25519=m -CONFIG_CRYPTO_LIB_DES=m -CONFIG_CRYPTO_LIB_POLY1305_RSIZE=1 -CONFIG_CRYPTO_LIB_POLY1305_GENERIC=m -CONFIG_CRYPTO_LIB_POLY1305=m -CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m -CONFIG_CRYPTO_LIB_SHA256=y # CONFIG_CRYPTO_HW is not set CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y @@ -7546,6 +7478,26 @@ CONFIG_GENERIC_PCI_IOMAP=y CONFIG_GENERIC_IOMAP=y CONFIG_ARCH_HAS_FAST_MULTIPLIER=y CONFIG_ARCH_USE_SYM_ANNOTATIONS=y + +# +# Crypto library routines +# +CONFIG_CRYPTO_LIB_AES=y +CONFIG_CRYPTO_LIB_ARC4=m +CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC=y +CONFIG_CRYPTO_LIB_CHACHA_GENERIC=m +CONFIG_CRYPTO_LIB_CHACHA=m +CONFIG_CRYPTO_LIB_CURVE25519_GENERIC=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_DES=m +CONFIG_CRYPTO_LIB_POLY1305_RSIZE=1 +CONFIG_CRYPTO_LIB_POLY1305_GENERIC=m +CONFIG_CRYPTO_LIB_POLY1305=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m +CONFIG_CRYPTO_LIB_SHA256=y +# end of Crypto library routines + +CONFIG_LIB_MEMNEQ=y CONFIG_CRC_CCITT=m CONFIG_CRC16=y CONFIG_CRC_T10DIF=y @@ -7661,6 +7613,8 @@ CONFIG_SYMBOLIC_ERRNAME=y CONFIG_DEBUG_BUGVERBOSE=y # end of printk and dmesg options +CONFIG_AS_HAS_NON_CONST_LEB128=y + # # Compile-time checks and compiler options # @@ -7671,7 +7625,6 @@ CONFIG_FRAME_WARN=2048 # CONFIG_HEADERS_INSTALL is not set # CONFIG_DEBUG_SECTION_MISMATCH is not set CONFIG_SECTION_MISMATCH_WARN_ONLY=y -# CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B is not set CONFIG_FRAME_POINTER=y # CONFIG_VMLINUX_MAP is not set # CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set @@ -7830,7 +7783,6 @@ CONFIG_IO_STRICT_DEVMEM=y # # x86 Debugging # -CONFIG_TRACE_IRQFLAGS_NMI_SUPPORT=y # CONFIG_X86_VERBOSE_BOOTUP is not set CONFIG_EARLY_PRINTK=y # CONFIG_EARLY_PRINTK_DBGP is not set diff --git a/system/easy-kernel/config-ppc b/system/easy-kernel/config-ppc index c28ccd73fc..2daaaf79c3 100644 --- a/system/easy-kernel/config-ppc +++ b/system/easy-kernel/config-ppc @@ -1,6 +1,6 @@ # # Automatically generated file; DO NOT EDIT. -# Linux/powerpc 5.15.76-mc3 Kernel Configuration +# Linux/powerpc 5.15.98-mc4 Kernel Configuration # CONFIG_CC_VERSION_TEXT="gcc (Adelie 8.5.0) 8.5.0" CONFIG_CC_IS_GCC=y @@ -11,11 +11,10 @@ CONFIG_AS_VERSION=23200 CONFIG_LD_IS_BFD=y CONFIG_LD_VERSION=23200 CONFIG_LLD_VERSION=0 -CONFIG_CC_CAN_LINK=y -CONFIG_CC_CAN_LINK_STATIC=y CONFIG_CC_HAS_ASM_GOTO=y CONFIG_CC_HAS_ASM_INLINE=y CONFIG_CC_HAS_NO_PROFILE_FN_ATTR=y +CONFIG_PAHOLE_VERSION=0 CONFIG_IRQ_WORK=y CONFIG_BUILDTIME_TABLE_SORT=y CONFIG_THREAD_INFO_IN_TASK=y @@ -850,6 +849,7 @@ CONFIG_INET_ESP=m CONFIG_INET_ESP_OFFLOAD=m # CONFIG_INET_ESPINTCP is not set CONFIG_INET_IPCOMP=m +CONFIG_INET_TABLE_PERTURB_ORDER=16 CONFIG_INET_XFRM_TUNNEL=m CONFIG_INET_TUNNEL=m CONFIG_INET_DIAG=m @@ -5228,6 +5228,7 @@ CONFIG_MMC_TOSHIBA_PCI=m # CONFIG_MMC_MTK is not set # CONFIG_MMC_SDHCI_XENON is not set # CONFIG_MMC_SDHCI_OMAP is not set +# CONFIG_MMC_SDHCI_AM654 is not set # CONFIG_MEMSTICK is not set CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y @@ -5588,7 +5589,6 @@ CONFIG_EXTCON=m # CONFIG_EXTCON_RT8973A is not set # CONFIG_EXTCON_SM5502 is not set # CONFIG_EXTCON_USB_GPIO is not set -# CONFIG_EXTCON_USBC_TUSB320 is not set # CONFIG_MEMORY is not set # CONFIG_IIO is not set # CONFIG_NTB is not set diff --git a/system/easy-kernel/config-ppc64 b/system/easy-kernel/config-ppc64 index 4a867ab81f..3715c23a00 100644 --- a/system/easy-kernel/config-ppc64 +++ b/system/easy-kernel/config-ppc64 @@ -1,10 +1,10 @@ # # Automatically generated file; DO NOT EDIT. -# Linux/powerpc 5.15.28-mc1 Kernel Configuration +# Linux/powerpc 5.15.98-mc4 Kernel Configuration # -CONFIG_CC_VERSION_TEXT="gcc (Adelie 8.3.0) 8.3.0" +CONFIG_CC_VERSION_TEXT="gcc (Adelie 8.5.0) 8.5.0" CONFIG_CC_IS_GCC=y -CONFIG_GCC_VERSION=80300 +CONFIG_GCC_VERSION=80500 CONFIG_CLANG_VERSION=0 CONFIG_AS_IS_GNU=y CONFIG_AS_VERSION=23200 @@ -16,6 +16,7 @@ CONFIG_CC_CAN_LINK_STATIC=y CONFIG_CC_HAS_ASM_GOTO=y CONFIG_CC_HAS_ASM_INLINE=y CONFIG_CC_HAS_NO_PROFILE_FN_ATTR=y +CONFIG_PAHOLE_VERSION=0 CONFIG_IRQ_WORK=y CONFIG_BUILDTIME_TABLE_SORT=y CONFIG_THREAD_INFO_IN_TASK=y @@ -146,7 +147,6 @@ CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=12 # # Scheduler features # -# CONFIG_SCHED_ALT is not set # CONFIG_UCLAMP_TASK is not set # end of Scheduler features @@ -334,6 +334,7 @@ CONFIG_EPAPR_BOOT=y CONFIG_ARCH_HIBERNATION_POSSIBLE=y CONFIG_ARCH_SUSPEND_POSSIBLE=y CONFIG_ARCH_SUSPEND_NONZERO_CPU=y +CONFIG_ARCH_HAS_ADD_PAGES=y CONFIG_PPC_DCR_MMIO=y CONFIG_PPC_DCR=y CONFIG_PPC_OF_PLATFORM_PCI=y @@ -666,7 +667,6 @@ CONFIG_HAVE_ARCH_HUGE_VMALLOC=y CONFIG_HAVE_ARCH_SOFT_DIRTY=y CONFIG_HAVE_MOD_ARCH_SPECIFIC=y CONFIG_MODULES_USE_ELF_RELA=y -CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK=y CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK=y CONFIG_ARCH_HAS_ELF_RANDOMIZE=y CONFIG_HAVE_ARCH_MMAP_RND_BITS=y @@ -965,6 +965,7 @@ CONFIG_INET_ESP=m CONFIG_INET_ESP_OFFLOAD=m # CONFIG_INET_ESPINTCP is not set CONFIG_INET_IPCOMP=m +CONFIG_INET_TABLE_PERTURB_ORDER=16 CONFIG_INET_XFRM_TUNNEL=m CONFIG_INET_TUNNEL=m CONFIG_INET_DIAG=m @@ -2234,6 +2235,7 @@ CONFIG_DM_INTEGRITY=m CONFIG_MACINTOSH_DRIVERS=y CONFIG_ADB_PMU=y +CONFIG_ADB_PMU_EVENT=y CONFIG_ADB_PMU_LED=y # CONFIG_ADB_PMU_LED_DISK is not set CONFIG_PMAC_SMU=y @@ -2323,8 +2325,6 @@ CONFIG_TIGON3_HWMON=y # CONFIG_BNX2X is not set # CONFIG_SYSTEMPORT is not set # CONFIG_BNXT is not set -CONFIG_NET_VENDOR_BROCADE=y -# CONFIG_BNA is not set CONFIG_NET_VENDOR_CADENCE=y CONFIG_NET_VENDOR_CAVIUM=y # CONFIG_THUNDER_NIC_PF is not set @@ -2385,7 +2385,6 @@ CONFIG_IXGB=m # CONFIG_ICE is not set # CONFIG_FM10K is not set # CONFIG_IGC is not set -CONFIG_NET_VENDOR_MICROSOFT=y # CONFIG_JME is not set CONFIG_NET_VENDOR_LITEX=y CONFIG_LITEX_LITEETH=m @@ -2404,9 +2403,12 @@ CONFIG_NET_VENDOR_MICREL=y CONFIG_NET_VENDOR_MICROCHIP=y # CONFIG_LAN743X is not set CONFIG_NET_VENDOR_MICROSEMI=y +CONFIG_NET_VENDOR_MICROSOFT=y CONFIG_NET_VENDOR_MYRI=y # CONFIG_MYRI10GE is not set # CONFIG_FEALNX is not set +CONFIG_NET_VENDOR_NI=y +# CONFIG_NI_XGE_MANAGEMENT_ENET is not set CONFIG_NET_VENDOR_NATSEMI=y # CONFIG_NATSEMI is not set # CONFIG_NS83820 is not set @@ -2415,8 +2417,6 @@ CONFIG_NET_VENDOR_NETERION=y # CONFIG_VXGE is not set CONFIG_NET_VENDOR_NETRONOME=y # CONFIG_NFP is not set -CONFIG_NET_VENDOR_NI=y -# CONFIG_NI_XGE_MANAGEMENT_ENET is not set CONFIG_NET_VENDOR_8390=y # CONFIG_NE2K_PCI is not set CONFIG_NET_VENDOR_NVIDIA=y @@ -2433,6 +2433,8 @@ CONFIG_NET_VENDOR_QLOGIC=y # CONFIG_QLCNIC is not set # CONFIG_NETXEN_NIC is not set # CONFIG_QED is not set +CONFIG_NET_VENDOR_BROCADE=y +# CONFIG_BNA is not set CONFIG_NET_VENDOR_QUALCOMM=y # CONFIG_QCOM_EMAC is not set # CONFIG_RMNET is not set @@ -2447,14 +2449,14 @@ CONFIG_NET_VENDOR_ROCKER=y CONFIG_NET_VENDOR_SAMSUNG=y # CONFIG_SXGBE_ETH is not set CONFIG_NET_VENDOR_SEEQ=y -CONFIG_NET_VENDOR_SOLARFLARE=y -# CONFIG_SFC is not set -# CONFIG_SFC_FALCON is not set CONFIG_NET_VENDOR_SILAN=y # CONFIG_SC92031 is not set CONFIG_NET_VENDOR_SIS=y # CONFIG_SIS900 is not set # CONFIG_SIS190 is not set +CONFIG_NET_VENDOR_SOLARFLARE=y +# CONFIG_SFC is not set +# CONFIG_SFC_FALCON is not set CONFIG_NET_VENDOR_SMSC=y # CONFIG_EPIC100 is not set # CONFIG_SMSC911X is not set @@ -3103,56 +3105,6 @@ CONFIG_DEVPORT=y # CONFIG_XILLYUSB is not set # CONFIG_RANDOM_TRUST_CPU is not set # CONFIG_RANDOM_TRUST_BOOTLOADER is not set -CONFIG_LRNG=y - -# -# Specific DRNG seeding strategies -# -# CONFIG_LRNG_OVERSAMPLE_ENTROPY_SOURCES is not set -CONFIG_LRNG_OVERSAMPLE_ES_BITS=0 -CONFIG_LRNG_SEED_BUFFER_INIT_ADD_BITS=0 -# end of Specific DRNG seeding strategies - -# -# Entropy Source Configuration -# - -# -# Interrupt Entropy Source -# -CONFIG_LRNG_IRQ=y -CONFIG_LRNG_CONTINUOUS_COMPRESSION_ENABLED=y -# CONFIG_LRNG_CONTINUOUS_COMPRESSION_DISABLED is not set -CONFIG_LRNG_ENABLE_CONTINUOUS_COMPRESSION=y -CONFIG_LRNG_SWITCHABLE_CONTINUOUS_COMPRESSION=y -# CONFIG_LRNG_COLLECTION_SIZE_256 is not set -# CONFIG_LRNG_COLLECTION_SIZE_512 is not set -CONFIG_LRNG_COLLECTION_SIZE_1024=y -# CONFIG_LRNG_COLLECTION_SIZE_2048 is not set -# CONFIG_LRNG_COLLECTION_SIZE_4096 is not set -# CONFIG_LRNG_COLLECTION_SIZE_8192 is not set -CONFIG_LRNG_COLLECTION_SIZE=1024 -# CONFIG_LRNG_HEALTH_TESTS is not set -CONFIG_LRNG_RCT_CUTOFF=31 -CONFIG_LRNG_APT_CUTOFF=325 -CONFIG_LRNG_IRQ_ENTROPY_RATE=256 - -# -# Jitter RNG Entropy Source -# -# CONFIG_LRNG_JENT is not set - -# -# CPU Entropy Source -# -CONFIG_LRNG_CPU=y -CONFIG_LRNG_CPU_FULL_ENT_MULTIPLIER=1 -CONFIG_LRNG_CPU_ENTROPY_RATE=8 -# end of Entropy Source Configuration - -# CONFIG_LRNG_DRNG_SWITCH is not set -# CONFIG_LRNG_TESTING_MENU is not set -# CONFIG_LRNG_SELFTEST is not set # end of Character devices # @@ -4948,6 +4900,7 @@ CONFIG_MMC_SDHCI_OF_HLWD=m # CONFIG_MMC_TOSHIBA_PCI is not set # CONFIG_MMC_SDHCI_XENON is not set # CONFIG_MMC_SDHCI_OMAP is not set +# CONFIG_MMC_SDHCI_AM654 is not set CONFIG_MEMSTICK=m # CONFIG_MEMSTICK_DEBUG is not set @@ -5807,7 +5760,6 @@ CONFIG_CRYPTO_CRC32C=y CONFIG_CRYPTO_CRC32=m CONFIG_CRYPTO_XXHASH=m CONFIG_CRYPTO_BLAKE2B=m -# CONFIG_CRYPTO_BLAKE2S is not set CONFIG_CRYPTO_CRCT10DIF=y # CONFIG_CRYPTO_CRCT10DIF_VPMSUM is not set CONFIG_CRYPTO_GHASH=m @@ -5875,24 +5827,6 @@ CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_USER_API_ENABLE_OBSOLETE=y CONFIG_CRYPTO_HASH_INFO=y - -# -# Crypto library routines -# -CONFIG_CRYPTO_LIB_AES=y -CONFIG_CRYPTO_LIB_ARC4=m -CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC=m -CONFIG_CRYPTO_LIB_BLAKE2S=m -CONFIG_CRYPTO_LIB_CHACHA_GENERIC=m -CONFIG_CRYPTO_LIB_CHACHA=m -CONFIG_CRYPTO_LIB_CURVE25519_GENERIC=m -CONFIG_CRYPTO_LIB_CURVE25519=m -CONFIG_CRYPTO_LIB_DES=m -CONFIG_CRYPTO_LIB_POLY1305_RSIZE=1 -CONFIG_CRYPTO_LIB_POLY1305_GENERIC=m -CONFIG_CRYPTO_LIB_POLY1305=m -CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m -CONFIG_CRYPTO_LIB_SHA256=y # CONFIG_CRYPTO_HW is not set CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y @@ -5933,6 +5867,26 @@ CONFIG_RATIONAL=m CONFIG_GENERIC_PCI_IOMAP=y CONFIG_GENERIC_IOMAP=y CONFIG_ARCH_USE_CMPXCHG_LOCKREF=y + +# +# Crypto library routines +# +CONFIG_CRYPTO_LIB_AES=y +CONFIG_CRYPTO_LIB_ARC4=m +CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC=y +CONFIG_CRYPTO_LIB_CHACHA_GENERIC=m +CONFIG_CRYPTO_LIB_CHACHA=m +CONFIG_CRYPTO_LIB_CURVE25519_GENERIC=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_DES=m +CONFIG_CRYPTO_LIB_POLY1305_RSIZE=1 +CONFIG_CRYPTO_LIB_POLY1305_GENERIC=m +CONFIG_CRYPTO_LIB_POLY1305=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m +CONFIG_CRYPTO_LIB_SHA256=y +# end of Crypto library routines + +CONFIG_LIB_MEMNEQ=y CONFIG_CRC_CCITT=m CONFIG_CRC16=y CONFIG_CRC_T10DIF=y @@ -6043,6 +5997,8 @@ CONFIG_SYMBOLIC_ERRNAME=y CONFIG_DEBUG_BUGVERBOSE=y # end of printk and dmesg options +CONFIG_AS_HAS_NON_CONST_LEB128=y + # # Compile-time checks and compiler options # diff --git a/system/easy-kernel/config-sparc64 b/system/easy-kernel/config-sparc64 index bd19d076a8..fde6dde2ac 100644 --- a/system/easy-kernel/config-sparc64 +++ b/system/easy-kernel/config-sparc64 @@ -1,10 +1,10 @@ # # Automatically generated file; DO NOT EDIT. -# Linux/sparc64 5.15.28-mc1 Kernel Configuration +# Linux/sparc64 5.15.98-mc4 Kernel Configuration # -CONFIG_CC_VERSION_TEXT="gcc (Adelie 8.3.0) 8.3.0" +CONFIG_CC_VERSION_TEXT="gcc (Adelie 8.5.0) 8.5.0" CONFIG_CC_IS_GCC=y -CONFIG_GCC_VERSION=80300 +CONFIG_GCC_VERSION=80500 CONFIG_CLANG_VERSION=0 CONFIG_AS_IS_GNU=y CONFIG_AS_VERSION=23200 @@ -16,6 +16,7 @@ CONFIG_CC_CAN_LINK_STATIC=y CONFIG_CC_HAS_ASM_GOTO=y CONFIG_CC_HAS_ASM_INLINE=y CONFIG_CC_HAS_NO_PROFILE_FN_ATTR=y +CONFIG_PAHOLE_VERSION=0 CONFIG_IRQ_WORK=y # @@ -76,9 +77,8 @@ CONFIG_HAVE_EBPF_JIT=y # CONFIG_BPF_SYSCALL=y CONFIG_BPF_JIT=y -CONFIG_BPF_JIT_ALWAYS_ON=y -CONFIG_BPF_JIT_DEFAULT_ON=y -# CONFIG_BPF_UNPRIV_DEFAULT_OFF is not set +# CONFIG_BPF_JIT_ALWAYS_ON is not set +CONFIG_BPF_UNPRIV_DEFAULT_OFF=y # CONFIG_BPF_PRELOAD is not set # CONFIG_BPF_LSM is not set # end of BPF subsystem @@ -86,7 +86,7 @@ CONFIG_BPF_JIT_DEFAULT_ON=y # CONFIG_PREEMPT_NONE is not set CONFIG_PREEMPT_VOLUNTARY=y # CONFIG_PREEMPT is not set -# CONFIG_SCHED_CORE is not set +CONFIG_SCHED_CORE=y # # CPU/Task time and stats accounting @@ -121,7 +121,7 @@ CONFIG_RCU_NEED_SEGCBLIST=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y # CONFIG_IKHEADERS is not set -CONFIG_LOG_BUF_SHIFT=16 +CONFIG_LOG_BUF_SHIFT=14 CONFIG_LOG_CPU_MAX_BUF_SHIFT=12 CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=12 # CONFIG_PRINTK_INDEX is not set @@ -130,7 +130,6 @@ CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=12 # Scheduler features # # CONFIG_UCLAMP_TASK is not set -# CONFIG_SCHED_ALT is not set # end of Scheduler features CONFIG_CC_HAS_INT128=y @@ -148,7 +147,6 @@ CONFIG_FAIR_GROUP_SCHED=y CONFIG_CGROUP_PIDS=y CONFIG_CGROUP_RDMA=y CONFIG_CGROUP_FREEZER=y -CONFIG_CGROUP_HUGETLB=y CONFIG_CPUSETS=y # CONFIG_PROC_PID_CPUSET is not set CONFIG_CGROUP_DEVICE=y @@ -177,14 +175,14 @@ CONFIG_RD_XZ=y CONFIG_RD_LZO=y CONFIG_RD_LZ4=y CONFIG_RD_ZSTD=y -# CONFIG_BOOT_CONFIG is not set +CONFIG_BOOT_CONFIG=y CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_SYSCTL=y CONFIG_HAVE_UID16=y CONFIG_SYSCTL_EXCEPTION_TRACE=y CONFIG_EXPERT=y -# CONFIG_UID16 is not set +CONFIG_UID16=y CONFIG_MULTIUSER=y # CONFIG_SGETMASK_SYSCALL is not set # CONFIG_SYSFS_SYSCALL is not set @@ -259,7 +257,7 @@ CONFIG_ARCH_SUPPORTS_UPROBES=y # Processor type and features # CONFIG_SMP=y -CONFIG_NR_CPUS=256 +CONFIG_NR_CPUS=4 CONFIG_HZ_100=y # CONFIG_HZ_250 is not set # CONFIG_HZ_300 is not set @@ -296,16 +294,16 @@ CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y # # CPU frequency scaling drivers # -# CONFIG_SPARC_US3_CPUFREQ is not set -# CONFIG_SPARC_US2E_CPUFREQ is not set +# CONFIG_CPUFREQ_DT is not set +CONFIG_SPARC_US3_CPUFREQ=m +CONFIG_SPARC_US2E_CPUFREQ=m # end of CPU Frequency scaling CONFIG_US3_MC=y -CONFIG_NUMA=y -CONFIG_NODES_SHIFT=4 +# CONFIG_NUMA is not set CONFIG_ARCH_SPARSEMEM_ENABLE=y CONFIG_ARCH_SPARSEMEM_DEFAULT=y -CONFIG_FORCE_MAX_ZONEORDER=13 +CONFIG_FORCE_MAX_ZONEORDER=11 CONFIG_HIBERNATE_CALLBACKS=y CONFIG_HIBERNATION=y CONFIG_HIBERNATION_SNAPSHOT_DEV=y @@ -318,6 +316,7 @@ CONFIG_PM=y CONFIG_PM_DEBUG=y CONFIG_PM_ADVANCED_DEBUG=y CONFIG_PM_SLEEP_DEBUG=y +CONFIG_PM_CLK=y CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y CONFIG_ENERGY_MODEL=y CONFIG_SCHED_SMT=y @@ -331,7 +330,7 @@ CONFIG_SCHED_MC=y CONFIG_SBUS=y CONFIG_SBUSCHAR=y CONFIG_SUN_LDOMS=y -# CONFIG_SUN_OPENPROMFS is not set +CONFIG_SUN_OPENPROMFS=m CONFIG_SPARC64_PCI=y CONFIG_SPARC64_PCI_MSI=y # end of Bus options (PCI etc.) @@ -342,12 +341,12 @@ CONFIG_SYSVIPC_COMPAT=y # # Misc Linux/SPARC drivers # -CONFIG_SUN_OPENPROMIO=y -CONFIG_OBP_FLASH=y +CONFIG_SUN_OPENPROMIO=m +CONFIG_OBP_FLASH=m CONFIG_TADPOLE_TS102_UCTRL=m CONFIG_BBC_I2C=m CONFIG_ENVCTRL=m -CONFIG_DISPLAY7SEG=m +# CONFIG_DISPLAY7SEG is not set CONFIG_ORACLE_DAX=m # end of Misc Linux/SPARC drivers @@ -442,6 +441,7 @@ CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y CONFIG_BLK_WBT_MQ=y # CONFIG_BLK_CGROUP_IOLATENCY is not set +# CONFIG_BLK_CGROUP_FC_APPID is not set CONFIG_BLK_CGROUP_IOCOST=y # CONFIG_BLK_CGROUP_IOPRIO is not set CONFIG_BLK_DEBUG_FS=y @@ -453,7 +453,7 @@ CONFIG_BLK_SED_OPAL=y # CONFIG_PARTITION_ADVANCED=y # CONFIG_ACORN_PARTITION is not set -# CONFIG_AIX_PARTITION is not set +CONFIG_AIX_PARTITION=y # CONFIG_OSF_PARTITION is not set CONFIG_AMIGA_PARTITION=y # CONFIG_ATARI_PARTITION is not set @@ -490,7 +490,6 @@ CONFIG_BFQ_GROUP_IOSCHED=y CONFIG_BFQ_CGROUP_DEBUG=y # end of IO Schedulers -CONFIG_PADATA=y CONFIG_ASN1=y CONFIG_INLINE_SPIN_UNLOCK_IRQ=y CONFIG_INLINE_READ_UNLOCK=y @@ -535,14 +534,11 @@ CONFIG_PAGE_REPORTING=y CONFIG_MIGRATION=y CONFIG_CONTIG_ALLOC=y CONFIG_PHYS_ADDR_T_64BIT=y -CONFIG_MMU_NOTIFIER=y CONFIG_KSM=y CONFIG_UKSM=y # CONFIG_KSM_LEGACY is not set -CONFIG_DEFAULT_MMAP_MIN_ADDR=65536 -CONFIG_TRANSPARENT_HUGEPAGE=y -# CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS is not set -CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y +CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 +# CONFIG_TRANSPARENT_HUGEPAGE is not set # CONFIG_CLEANCACHE is not set CONFIG_FRONTSWAP=y CONFIG_CMA=y @@ -567,12 +563,10 @@ CONFIG_ZPOOL=y CONFIG_ZBUD=y CONFIG_Z3FOLD=m # CONFIG_ZSMALLOC is not set -CONFIG_DEFERRED_STRUCT_PAGE_INIT=y +# CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set # CONFIG_IDLE_PAGE_TRACKING is not set -CONFIG_HMM_MIRROR=y # CONFIG_PERCPU_STATS is not set # CONFIG_GUP_TEST is not set -# CONFIG_READ_ONLY_THP_FOR_FS is not set CONFIG_ARCH_HAS_PTE_SPECIAL=y # @@ -641,6 +635,7 @@ CONFIG_INET_ESP=m CONFIG_INET_ESP_OFFLOAD=m # CONFIG_INET_ESPINTCP is not set CONFIG_INET_IPCOMP=m +CONFIG_INET_TABLE_PERTURB_ORDER=16 CONFIG_INET_XFRM_TUNNEL=m CONFIG_INET_TUNNEL=m CONFIG_INET_DIAG=m @@ -702,7 +697,6 @@ CONFIG_IPV6_PIMSM_V2=y # CONFIG_NETLABEL is not set # CONFIG_MPTCP is not set # CONFIG_NETWORK_SECMARK is not set -CONFIG_NET_PTP_CLASSIFY=y # CONFIG_NETWORK_PHY_TIMESTAMPING is not set CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y @@ -1132,7 +1126,7 @@ CONFIG_NET_ACT_CT=m # CONFIG_NET_TC_SKB_EXT is not set CONFIG_NET_SCH_FIFO=y # CONFIG_DCB is not set -CONFIG_DNS_RESOLVER=y +CONFIG_DNS_RESOLVER=m # CONFIG_BATMAN_ADV is not set CONFIG_OPENVSWITCH=m CONFIG_OPENVSWITCH_GRE=m @@ -1147,7 +1141,9 @@ CONFIG_NET_NSH=m # CONFIG_HSR is not set # CONFIG_NET_SWITCHDEV is not set CONFIG_NET_L3_MASTER_DEV=y -# CONFIG_QRTR is not set +CONFIG_QRTR=m +# CONFIG_QRTR_TUN is not set +CONFIG_QRTR_MHI=m # CONFIG_NET_NCSI is not set CONFIG_PCPU_DEV_REFCNT=y CONFIG_RPS=y @@ -1182,7 +1178,7 @@ CONFIG_BT_HIDP=m CONFIG_BT_HS=y CONFIG_BT_LE=y CONFIG_BT_LEDS=y -# CONFIG_BT_MSFTEXT is not set +CONFIG_BT_MSFTEXT=y # CONFIG_BT_AOSPEXT is not set # CONFIG_BT_DEBUGFS is not set # CONFIG_BT_SELFTEST is not set @@ -1204,6 +1200,7 @@ CONFIG_BT_HCIUART=m CONFIG_BT_HCIUART_H4=y CONFIG_BT_HCIUART_BCSP=y CONFIG_BT_HCIUART_ATH3K=y +CONFIG_BT_HCIUART_INTEL=y CONFIG_BT_HCIUART_AG6XX=y CONFIG_BT_HCIBCM203X=m CONFIG_BT_HCIBPA10X=m @@ -1216,7 +1213,7 @@ CONFIG_BT_MRVL=m # CONFIG_BT_MRVL_SDIO is not set CONFIG_BT_ATH3K=m # CONFIG_BT_MTKSDIO is not set -# CONFIG_BT_VIRTIO is not set +CONFIG_BT_VIRTIO=m # end of Bluetooth device drivers # CONFIG_AF_RXRPC is not set @@ -1224,8 +1221,11 @@ CONFIG_BT_ATH3K=m # CONFIG_MCTP is not set CONFIG_FIB_RULES=y CONFIG_WIRELESS=y +CONFIG_WIRELESS_EXT=y CONFIG_WEXT_CORE=y CONFIG_WEXT_PROC=y +CONFIG_WEXT_SPY=y +CONFIG_WEXT_PRIV=y CONFIG_CFG80211=m # CONFIG_NL80211_TESTMODE is not set # CONFIG_CFG80211_DEVELOPER_WARNINGS is not set @@ -1236,6 +1236,12 @@ CONFIG_CFG80211_DEFAULT_PS=y # CONFIG_CFG80211_DEBUGFS is not set # CONFIG_CFG80211_CRDA_SUPPORT is not set CONFIG_CFG80211_WEXT=y +CONFIG_CFG80211_WEXT_EXPORT=y +CONFIG_LIB80211=m +CONFIG_LIB80211_CRYPT_WEP=m +CONFIG_LIB80211_CRYPT_CCMP=m +CONFIG_LIB80211_CRYPT_TKIP=m +# CONFIG_LIB80211_DEBUG is not set CONFIG_MAC80211=m CONFIG_MAC80211_HAS_RC=y CONFIG_MAC80211_RC_MINSTREL=y @@ -1250,6 +1256,7 @@ CONFIG_MAC80211_STA_HASH_MAX_SIZE=0 CONFIG_RFKILL=m CONFIG_RFKILL_LEDS=y CONFIG_RFKILL_INPUT=y +# CONFIG_RFKILL_GPIO is not set CONFIG_NET_9P=m CONFIG_NET_9P_VIRTIO=m # CONFIG_NET_9P_DEBUG is not set @@ -1262,8 +1269,9 @@ CONFIG_LWTUNNEL=y # CONFIG_LWTUNNEL_BPF is not set CONFIG_DST_CACHE=y CONFIG_GRO_CELLS=y -CONFIG_NET_SELFTESTS=m +CONFIG_NET_SELFTESTS=y CONFIG_NET_SOCK_MSG=y +CONFIG_NET_DEVLINK=y CONFIG_FAILOVER=m CONFIG_ETHTOOL_NETLINK=y @@ -1349,28 +1357,29 @@ CONFIG_HOTPLUG_PCI_SHPC=y # end of PCI switch controller drivers # CONFIG_CXL_BUS is not set -CONFIG_PCCARD=m -CONFIG_PCMCIA=m +CONFIG_PCCARD=y +CONFIG_PCMCIA=y CONFIG_PCMCIA_LOAD_CIS=y CONFIG_CARDBUS=y # # PC-card bridges # -CONFIG_YENTA=m +CONFIG_YENTA=y CONFIG_YENTA_O2=y CONFIG_YENTA_RICOH=y CONFIG_YENTA_TI=y CONFIG_YENTA_ENE_TUNE=y CONFIG_YENTA_TOSHIBA=y -CONFIG_PD6729=m -CONFIG_I82092=m +# CONFIG_PD6729 is not set +# CONFIG_I82092 is not set CONFIG_PCCARD_NONSTATIC=y # CONFIG_RAPIDIO is not set # # Generic Driver Options # +CONFIG_AUXILIARY_BUS=y # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y @@ -1388,6 +1397,7 @@ CONFIG_FW_LOADER_COMPRESS=y CONFIG_FW_CACHE=y # end of Firmware loader +CONFIG_WANT_DEV_COREDUMP=y # CONFIG_ALLOW_DEV_COREDUMP is not set # CONFIG_DEBUG_DRIVER is not set # CONFIG_DEBUG_DEVRES is not set @@ -1402,10 +1412,13 @@ CONFIG_DMA_SHARED_BUFFER=y # # Bus devices # -# CONFIG_MHI_BUS is not set +CONFIG_MHI_BUS=m +# CONFIG_MHI_BUS_DEBUG is not set +# CONFIG_MHI_BUS_PCI_GENERIC is not set # end of Bus devices -CONFIG_CONNECTOR=m +CONFIG_CONNECTOR=y +CONFIG_PROC_EVENTS=y # # Firmware Drivers @@ -1539,27 +1552,38 @@ CONFIG_OF_PROMTREE=y CONFIG_OF_KOBJ=y # CONFIG_OF_OVERLAY is not set CONFIG_ARCH_MIGHT_HAVE_PC_PARPORT=y -# CONFIG_PARPORT is not set +CONFIG_PARPORT=m +CONFIG_PARPORT_PC=m +CONFIG_PARPORT_SERIAL=m +CONFIG_PARPORT_PC_FIFO=y +# CONFIG_PARPORT_PC_SUPERIO is not set +CONFIG_PARPORT_PC_PCMCIA=m +CONFIG_PARPORT_SUNBPP=m +CONFIG_PARPORT_AX88796=m +CONFIG_PARPORT_1284=y +CONFIG_PARPORT_NOT_PC=y CONFIG_BLK_DEV=y # CONFIG_BLK_DEV_NULL_BLK is not set CONFIG_BLK_DEV_FD=y +# CONFIG_BLK_DEV_FD_RAWCMD is not set CONFIG_CDROM=y -# CONFIG_BLK_DEV_PCIESSD_MTIP32XX is not set +# CONFIG_PARIDE is not set +CONFIG_BLK_DEV_PCIESSD_MTIP32XX=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_LOOP_MIN_COUNT=8 # CONFIG_BLK_DEV_CRYPTOLOOP is not set # CONFIG_BLK_DEV_DRBD is not set CONFIG_BLK_DEV_NBD=m -# CONFIG_BLK_DEV_SX8 is not set +CONFIG_BLK_DEV_SX8=y CONFIG_BLK_DEV_RAM=m -CONFIG_BLK_DEV_RAM_COUNT=16 -CONFIG_BLK_DEV_RAM_SIZE=65536 +CONFIG_BLK_DEV_RAM_COUNT=4 +CONFIG_BLK_DEV_RAM_SIZE=4096 # CONFIG_CDROM_PKTCDVD is not set CONFIG_ATA_OVER_ETH=m -CONFIG_SUNVDC=y +CONFIG_SUNVDC=m CONFIG_VIRTIO_BLK=y # CONFIG_BLK_DEV_RBD is not set -# CONFIG_BLK_DEV_RSXX is not set +CONFIG_BLK_DEV_RSXX=y # # NVME Support @@ -1567,9 +1591,14 @@ CONFIG_VIRTIO_BLK=y CONFIG_NVME_CORE=y CONFIG_BLK_DEV_NVME=y # CONFIG_NVME_MULTIPATH is not set -# CONFIG_NVME_HWMON is not set -# CONFIG_NVME_FC is not set -# CONFIG_NVME_TCP is not set +CONFIG_NVME_FABRICS=m +CONFIG_NVME_FC=m +CONFIG_NVME_TCP=m +CONFIG_NVME_TARGET=m +# CONFIG_NVME_TARGET_PASSTHRU is not set +# CONFIG_NVME_TARGET_LOOP is not set +# CONFIG_NVME_TARGET_FC is not set +# CONFIG_NVME_TARGET_TCP is not set # end of NVME Support # @@ -1608,15 +1637,18 @@ CONFIG_EEPROM_93CX6=m CONFIG_EEPROM_EE1004=m # end of EEPROM support -# CONFIG_CB710_CORE is not set +CONFIG_CB710_CORE=m +# CONFIG_CB710_DEBUG is not set +CONFIG_CB710_DEBUG_ASSUMPTIONS=y # # Texas Instruments shared transport line discipline # +# CONFIG_TI_ST is not set # end of Texas Instruments shared transport line discipline # CONFIG_SENSORS_LIS3_I2C is not set -# CONFIG_ALTERA_STAPL is not set +CONFIG_ALTERA_STAPL=m # CONFIG_GENWQE is not set # CONFIG_ECHO is not set # CONFIG_BCM_VK is not set @@ -1632,7 +1664,7 @@ CONFIG_EEPROM_EE1004=m # SCSI device support # CONFIG_SCSI_MOD=y -CONFIG_RAID_ATTRS=y +CONFIG_RAID_ATTRS=m CONFIG_SCSI_COMMON=y CONFIG_SCSI=y CONFIG_SCSI_DMA=y @@ -1656,21 +1688,21 @@ CONFIG_SCSI_SCAN_ASYNC=y # CONFIG_SCSI_SPI_ATTRS=y # CONFIG_SCSI_FC_ATTRS is not set -CONFIG_SCSI_ISCSI_ATTRS=y +CONFIG_SCSI_ISCSI_ATTRS=m CONFIG_SCSI_SAS_ATTRS=y CONFIG_SCSI_SAS_LIBSAS=y -CONFIG_SCSI_SAS_ATA=y +# CONFIG_SCSI_SAS_ATA is not set CONFIG_SCSI_SAS_HOST_SMP=y -CONFIG_SCSI_SRP_ATTRS=m +# CONFIG_SCSI_SRP_ATTRS is not set # end of SCSI Transports CONFIG_SCSI_LOWLEVEL=y CONFIG_ISCSI_TCP=m -CONFIG_ISCSI_BOOT_SYSFS=y -# CONFIG_SCSI_CXGB3_ISCSI is not set -# CONFIG_SCSI_CXGB4_ISCSI is not set -# CONFIG_SCSI_BNX2_ISCSI is not set -# CONFIG_BE2ISCSI is not set +CONFIG_ISCSI_BOOT_SYSFS=m +CONFIG_SCSI_CXGB3_ISCSI=m +CONFIG_SCSI_CXGB4_ISCSI=m +CONFIG_SCSI_BNX2_ISCSI=m +CONFIG_BE2ISCSI=m CONFIG_BLK_DEV_3W_XXXX_RAID=m CONFIG_SCSI_HPSA=m CONFIG_SCSI_3W_9XXX=m @@ -1678,8 +1710,8 @@ CONFIG_SCSI_3W_SAS=m CONFIG_SCSI_ACARD=m CONFIG_SCSI_AACRAID=m CONFIG_SCSI_AIC7XXX=y -CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 -CONFIG_AIC7XXX_RESET_DELAY_MS=5000 +CONFIG_AIC7XXX_CMDS_PER_DEVICE=253 +CONFIG_AIC7XXX_RESET_DELAY_MS=15000 CONFIG_AIC7XXX_DEBUG_ENABLE=y CONFIG_AIC7XXX_DEBUG_MASK=0 CONFIG_AIC7XXX_REG_PRETTY_PRINT=y @@ -1695,11 +1727,13 @@ CONFIG_SCSI_MVSAS=m CONFIG_SCSI_MVSAS_DEBUG=y # CONFIG_SCSI_MVSAS_TASKLET is not set CONFIG_SCSI_MVUMI=m -CONFIG_SCSI_ADVANSYS=m +CONFIG_SCSI_ADVANSYS=y CONFIG_SCSI_ARCMSR=m CONFIG_SCSI_ESAS2R=m -# CONFIG_MEGARAID_NEWGEN is not set -CONFIG_MEGARAID_LEGACY=y +CONFIG_MEGARAID_NEWGEN=y +CONFIG_MEGARAID_MM=y +CONFIG_MEGARAID_MAILBOX=y +CONFIG_MEGARAID_LEGACY=m CONFIG_MEGARAID_SAS=m CONFIG_SCSI_MPT3SAS=m CONFIG_SCSI_MPT2SAS_MAX_SGE=128 @@ -1707,46 +1741,52 @@ CONFIG_SCSI_MPT3SAS_MAX_SGE=128 CONFIG_SCSI_MPT2SAS=m CONFIG_SCSI_MPI3MR=m CONFIG_SCSI_SMARTPQI=m -CONFIG_SCSI_UFSHCD=m -# CONFIG_SCSI_UFSHCD_PCI is not set -# CONFIG_SCSI_UFSHCD_PLATFORM is not set -# CONFIG_SCSI_UFS_BSG is not set -# CONFIG_SCSI_UFS_HPB is not set +# CONFIG_SCSI_UFSHCD is not set CONFIG_SCSI_HPTIOP=m # CONFIG_SCSI_MYRB is not set CONFIG_SCSI_SNIC=m # CONFIG_SCSI_SNIC_DEBUG_FS is not set CONFIG_SCSI_DMX3191D=m -# CONFIG_SCSI_FDOMAIN_PCI is not set +CONFIG_SCSI_FDOMAIN=m +CONFIG_SCSI_FDOMAIN_PCI=m CONFIG_SCSI_IPS=m CONFIG_SCSI_INITIO=m CONFIG_SCSI_INIA100=m +CONFIG_SCSI_PPA=m +CONFIG_SCSI_IMM=m +# CONFIG_SCSI_IZIP_EPP16 is not set +# CONFIG_SCSI_IZIP_SLOW_CTR is not set CONFIG_SCSI_STEX=m CONFIG_SCSI_SYM53C8XX_2=y -CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 +CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 CONFIG_SCSI_SYM53C8XX_MMIO=y -CONFIG_SCSI_IPR=m +CONFIG_SCSI_IPR=y CONFIG_SCSI_IPR_TRACE=y CONFIG_SCSI_IPR_DUMP=y CONFIG_SCSI_QLOGIC_1280=m # CONFIG_SCSI_QLOGICPTI is not set -CONFIG_SCSI_QLA_ISCSI=y -CONFIG_SCSI_DC395x=m -CONFIG_SCSI_AM53C974=m +CONFIG_SCSI_QLA_ISCSI=m +CONFIG_SCSI_DC395x=y +CONFIG_SCSI_AM53C974=y CONFIG_SCSI_WD719X=m # CONFIG_SCSI_DEBUG is not set -CONFIG_SCSI_SUNESP=y +CONFIG_SCSI_SUNESP=m CONFIG_SCSI_PMCRAID=m CONFIG_SCSI_PM8001=m CONFIG_SCSI_VIRTIO=y -# CONFIG_SCSI_LOWLEVEL_PCMCIA is not set +CONFIG_SCSI_LOWLEVEL_PCMCIA=y +CONFIG_PCMCIA_AHA152X=m +CONFIG_PCMCIA_FDOMAIN=m +CONFIG_PCMCIA_QLOGIC=m +CONFIG_PCMCIA_SYM53C500=m # CONFIG_SCSI_DH is not set # end of SCSI device support CONFIG_ATA=y CONFIG_SATA_HOST=y +CONFIG_PATA_TIMINGS=y CONFIG_ATA_VERBOSE_ERROR=y CONFIG_ATA_FORCE=y CONFIG_SATA_PMP=y @@ -1757,10 +1797,10 @@ CONFIG_SATA_PMP=y CONFIG_SATA_AHCI=y CONFIG_SATA_MOBILE_LPM_POLICY=0 CONFIG_SATA_AHCI_PLATFORM=y -# CONFIG_AHCI_CEVA is not set -# CONFIG_AHCI_QORIQ is not set +CONFIG_AHCI_CEVA=m +CONFIG_AHCI_QORIQ=y # CONFIG_SATA_INIC162X is not set -# CONFIG_SATA_ACARD_AHCI is not set +CONFIG_SATA_ACARD_AHCI=y CONFIG_SATA_SIL24=y CONFIG_ATA_SFF=y @@ -1776,13 +1816,13 @@ CONFIG_ATA_BMDMA=y # SATA SFF controllers with BMDMA # # CONFIG_ATA_PIIX is not set -# CONFIG_SATA_MV is not set +CONFIG_SATA_MV=y # CONFIG_SATA_NV is not set # CONFIG_SATA_PROMISE is not set CONFIG_SATA_SIL=y # CONFIG_SATA_SIS is not set -# CONFIG_SATA_SVW is not set -# CONFIG_SATA_ULI is not set +CONFIG_SATA_SVW=y +CONFIG_SATA_ULI=y # CONFIG_SATA_VIA is not set # CONFIG_SATA_VITESSE is not set @@ -1794,7 +1834,7 @@ CONFIG_SATA_SIL=y # CONFIG_PATA_ARTOP is not set # CONFIG_PATA_ATIIXP is not set # CONFIG_PATA_ATP867X is not set -# CONFIG_PATA_CMD64X is not set +CONFIG_PATA_CMD64X=m # CONFIG_PATA_CYPRESS is not set # CONFIG_PATA_EFAR is not set # CONFIG_PATA_HPT366 is not set @@ -1803,35 +1843,36 @@ CONFIG_SATA_SIL=y # CONFIG_PATA_HPT3X3 is not set # CONFIG_PATA_IT8213 is not set # CONFIG_PATA_IT821X is not set -# CONFIG_PATA_JMICRON is not set +CONFIG_PATA_JMICRON=y # CONFIG_PATA_MARVELL is not set # CONFIG_PATA_NETCELL is not set # CONFIG_PATA_NINJA32 is not set # CONFIG_PATA_NS87415 is not set # CONFIG_PATA_OLDPIIX is not set # CONFIG_PATA_OPTIDMA is not set -# CONFIG_PATA_PDC2027X is not set +CONFIG_PATA_PDC2027X=y # CONFIG_PATA_PDC_OLD is not set # CONFIG_PATA_RADISYS is not set # CONFIG_PATA_RDC is not set # CONFIG_PATA_SCH is not set -# CONFIG_PATA_SERVERWORKS is not set -# CONFIG_PATA_SIL680 is not set +CONFIG_PATA_SERVERWORKS=y +CONFIG_PATA_SIL680=y # CONFIG_PATA_SIS is not set # CONFIG_PATA_TOSHIBA is not set # CONFIG_PATA_TRIFLEX is not set # CONFIG_PATA_VIA is not set -# CONFIG_PATA_WINBOND is not set +CONFIG_PATA_WINBOND=y # # PIO-only SFF controllers # -# CONFIG_PATA_CMD640_PCI is not set +CONFIG_PATA_CMD640_PCI=m # CONFIG_PATA_MPIIX is not set # CONFIG_PATA_NS87410 is not set # CONFIG_PATA_OPTI is not set -# CONFIG_PATA_PCMCIA is not set -# CONFIG_PATA_PLATFORM is not set +CONFIG_PATA_PCMCIA=y +CONFIG_PATA_PLATFORM=m +CONFIG_PATA_OF_PLATFORM=m # CONFIG_PATA_RZ1000 is not set # @@ -1924,6 +1965,7 @@ CONFIG_VXLAN=m # CONFIG_GTP is not set CONFIG_MACSEC=m CONFIG_NETCONSOLE=m +CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_NETPOLL=y CONFIG_NET_POLL_CONTROLLER=y CONFIG_TUN=m @@ -1933,69 +1975,65 @@ CONFIG_VETH=m CONFIG_VIRTIO_NET=m # CONFIG_NLMON is not set # CONFIG_NET_VRF is not set +CONFIG_MHI_NET=m CONFIG_SUNGEM_PHY=y # CONFIG_ARCNET is not set CONFIG_ETHERNET=y +CONFIG_MDIO=m CONFIG_NET_VENDOR_3COM=y -# CONFIG_PCMCIA_3C574 is not set +CONFIG_PCMCIA_3C574=m CONFIG_PCMCIA_3C589=m -# CONFIG_VORTEX is not set -# CONFIG_TYPHOON is not set +CONFIG_VORTEX=m +CONFIG_TYPHOON=m CONFIG_NET_VENDOR_ADAPTEC=y -# CONFIG_ADAPTEC_STARFIRE is not set +CONFIG_ADAPTEC_STARFIRE=m # CONFIG_GRETH is not set -CONFIG_NET_VENDOR_AGERE=y -# CONFIG_ET131X is not set -CONFIG_NET_VENDOR_ALACRITECH=y -# CONFIG_SLICOSS is not set +# CONFIG_NET_VENDOR_AGERE is not set +# CONFIG_NET_VENDOR_ALACRITECH is not set CONFIG_NET_VENDOR_ALTEON=y CONFIG_ACENIC=m -CONFIG_ACENIC_OMIT_TIGON_I=y +# CONFIG_ACENIC_OMIT_TIGON_I is not set # CONFIG_ALTERA_TSE is not set -CONFIG_NET_VENDOR_AMAZON=y +# CONFIG_NET_VENDOR_AMAZON is not set CONFIG_NET_VENDOR_AMD=y CONFIG_AMD8111_ETH=m CONFIG_PCNET32=m # CONFIG_PCMCIA_NMCLAN is not set -CONFIG_SUNLANCE=y -CONFIG_NET_VENDOR_AQUANTIA=y -# CONFIG_AQTION is not set -CONFIG_NET_VENDOR_ARC=y +# CONFIG_SUNLANCE is not set +# CONFIG_NET_VENDOR_AQUANTIA is not set +# CONFIG_NET_VENDOR_ARC is not set CONFIG_NET_VENDOR_ATHEROS=y -# CONFIG_ATL2 is not set -# CONFIG_ATL1 is not set -# CONFIG_ATL1E is not set -# CONFIG_ATL1C is not set -# CONFIG_ALX is not set +CONFIG_ATL2=m +CONFIG_ATL1=m +CONFIG_ATL1E=m +CONFIG_ATL1C=m +CONFIG_ALX=m CONFIG_NET_VENDOR_BROADCOM=y -CONFIG_B44=m -CONFIG_B44_PCI_AUTOSELECT=y -CONFIG_B44_PCICORE_AUTOSELECT=y -CONFIG_B44_PCI=y +# CONFIG_B44 is not set # CONFIG_BCMGENET is not set -# CONFIG_BNX2 is not set -# CONFIG_CNIC is not set -CONFIG_TIGON3=m -CONFIG_TIGON3_HWMON=y +CONFIG_BNX2=m +CONFIG_CNIC=m +# CONFIG_TIGON3 is not set # CONFIG_BNX2X is not set # CONFIG_SYSTEMPORT is not set # CONFIG_BNXT is not set -CONFIG_NET_VENDOR_BROCADE=y -# CONFIG_BNA is not set CONFIG_NET_VENDOR_CADENCE=y +# CONFIG_MACB is not set CONFIG_NET_VENDOR_CAVIUM=y # CONFIG_THUNDER_NIC_PF is not set # CONFIG_THUNDER_NIC_VF is not set # CONFIG_THUNDER_NIC_BGX is not set # CONFIG_THUNDER_NIC_RGX is not set -# CONFIG_CAVIUM_PTP is not set # CONFIG_LIQUIDIO is not set # CONFIG_LIQUIDIO_VF is not set CONFIG_NET_VENDOR_CHELSIO=y # CONFIG_CHELSIO_T1 is not set -# CONFIG_CHELSIO_T3 is not set -# CONFIG_CHELSIO_T4 is not set +CONFIG_CHELSIO_T3=m +CONFIG_CHELSIO_T4=m # CONFIG_CHELSIO_T4VF is not set +CONFIG_CHELSIO_LIB=m +CONFIG_CHELSIO_INLINE_CRYPTO=y +# CONFIG_CHELSIO_IPSEC_INLINE is not set CONFIG_NET_VENDOR_CISCO=y # CONFIG_ENIC is not set CONFIG_NET_VENDOR_CORTINA=y @@ -2003,163 +2041,146 @@ CONFIG_NET_VENDOR_CORTINA=y # CONFIG_DNET is not set CONFIG_NET_VENDOR_DEC=y CONFIG_NET_TULIP=y -# CONFIG_DE2104X is not set -CONFIG_TULIP=y +CONFIG_DE2104X=m +CONFIG_DE2104X_DSL=4 +CONFIG_TULIP=m # CONFIG_TULIP_MWI is not set # CONFIG_TULIP_MMIO is not set # CONFIG_TULIP_NAPI is not set CONFIG_TULIP_DM910X=y -# CONFIG_DE4X5 is not set +CONFIG_DE4X5=m # CONFIG_WINBOND_840 is not set # CONFIG_DM9102 is not set # CONFIG_ULI526X is not set -# CONFIG_PCMCIA_XIRCOM is not set -CONFIG_NET_VENDOR_DLINK=y -# CONFIG_DL2K is not set -# CONFIG_SUNDANCE is not set -CONFIG_NET_VENDOR_EMULEX=y -# CONFIG_BE2NET is not set -CONFIG_NET_VENDOR_EZCHIP=y -CONFIG_NET_VENDOR_FUJITSU=y -# CONFIG_PCMCIA_FMVJ18X is not set -CONFIG_NET_VENDOR_GOOGLE=y -CONFIG_NET_VENDOR_HUAWEI=y +CONFIG_PCMCIA_XIRCOM=m +# CONFIG_NET_VENDOR_DLINK is not set +# CONFIG_NET_VENDOR_EMULEX is not set +# CONFIG_NET_VENDOR_EZCHIP is not set +# CONFIG_NET_VENDOR_FUJITSU is not set +# CONFIG_NET_VENDOR_GOOGLE is not set +# CONFIG_NET_VENDOR_HUAWEI is not set CONFIG_NET_VENDOR_I825XX=y CONFIG_NET_VENDOR_INTEL=y -# CONFIG_E100 is not set +CONFIG_E100=m CONFIG_E1000=m CONFIG_E1000E=m CONFIG_IGB=m CONFIG_IGB_HWMON=y -# CONFIG_IGBVF is not set +CONFIG_IGBVF=m CONFIG_IXGB=m -# CONFIG_IXGBE is not set -# CONFIG_IXGBEVF is not set -# CONFIG_I40E is not set -# CONFIG_I40EVF is not set -# CONFIG_ICE is not set -# CONFIG_FM10K is not set -# CONFIG_IGC is not set -CONFIG_NET_VENDOR_MICROSOFT=y -# CONFIG_JME is not set +CONFIG_IXGBE=m +CONFIG_IXGBE_HWMON=y +CONFIG_IXGBE_IPSEC=y +CONFIG_IXGBEVF=m +CONFIG_IXGBEVF_IPSEC=y +CONFIG_I40E=m +CONFIG_IAVF=m +CONFIG_I40EVF=m +CONFIG_ICE=m +CONFIG_FM10K=m +CONFIG_IGC=m +CONFIG_JME=m CONFIG_NET_VENDOR_LITEX=y -# CONFIG_LITEX_LITEETH is not set -CONFIG_NET_VENDOR_MARVELL=y -# CONFIG_MVMDIO is not set -# CONFIG_SKGE is not set -# CONFIG_SKY2 is not set -CONFIG_NET_VENDOR_MELLANOX=y -# CONFIG_MLX4_EN is not set -# CONFIG_MLX5_CORE is not set -# CONFIG_MLXSW_CORE is not set -# CONFIG_MLXFW is not set -CONFIG_NET_VENDOR_MICREL=y -# CONFIG_KS8851_MLL is not set -# CONFIG_KSZ884X_PCI is not set +CONFIG_LITEX_LITEETH=m +# CONFIG_NET_VENDOR_MARVELL is not set +# CONFIG_NET_VENDOR_MELLANOX is not set +# CONFIG_NET_VENDOR_MICREL is not set CONFIG_NET_VENDOR_MICROCHIP=y # CONFIG_LAN743X is not set CONFIG_NET_VENDOR_MICROSEMI=y -CONFIG_NET_VENDOR_MYRI=y -# CONFIG_MYRI10GE is not set +# CONFIG_NET_VENDOR_MICROSOFT is not set +# CONFIG_NET_VENDOR_MYRI is not set # CONFIG_FEALNX is not set +CONFIG_NET_VENDOR_NI=y +# CONFIG_NI_XGE_MANAGEMENT_ENET is not set CONFIG_NET_VENDOR_NATSEMI=y CONFIG_NATSEMI=m CONFIG_NS83820=m CONFIG_NET_VENDOR_NETERION=y # CONFIG_S2IO is not set # CONFIG_VXGE is not set -CONFIG_NET_VENDOR_NETRONOME=y -# CONFIG_NFP is not set -CONFIG_NET_VENDOR_NI=y -# CONFIG_NI_XGE_MANAGEMENT_ENET is not set +# CONFIG_NET_VENDOR_NETRONOME is not set CONFIG_NET_VENDOR_8390=y # CONFIG_PCMCIA_AXNET is not set -# CONFIG_NE2K_PCI is not set -# CONFIG_PCMCIA_PCNET is not set -CONFIG_NET_VENDOR_NVIDIA=y -# CONFIG_FORCEDETH is not set -CONFIG_NET_VENDOR_OKI=y +CONFIG_NE2K_PCI=m +CONFIG_PCMCIA_PCNET=m +# CONFIG_NET_VENDOR_NVIDIA is not set +# CONFIG_NET_VENDOR_OKI is not set # CONFIG_ETHOC is not set CONFIG_NET_VENDOR_PACKET_ENGINES=y # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set CONFIG_NET_VENDOR_PENSANDO=y # CONFIG_IONIC is not set -CONFIG_NET_VENDOR_QLOGIC=y -# CONFIG_QLA3XXX is not set -# CONFIG_QLCNIC is not set -# CONFIG_NETXEN_NIC is not set -# CONFIG_QED is not set -CONFIG_NET_VENDOR_QUALCOMM=y -# CONFIG_QCOM_EMAC is not set -# CONFIG_RMNET is not set -CONFIG_NET_VENDOR_RDC=y -# CONFIG_R6040 is not set +# CONFIG_NET_VENDOR_QLOGIC is not set +CONFIG_NET_VENDOR_BROCADE=y +# CONFIG_BNA is not set +# CONFIG_NET_VENDOR_QUALCOMM is not set +# CONFIG_NET_VENDOR_RDC is not set CONFIG_NET_VENDOR_REALTEK=y CONFIG_8139CP=m CONFIG_8139TOO=m -CONFIG_8139TOO_PIO=y +# CONFIG_8139TOO_PIO is not set # CONFIG_8139TOO_TUNE_TWISTER is not set -# CONFIG_8139TOO_8129 is not set +CONFIG_8139TOO_8129=y # CONFIG_8139_OLD_RX_RESET is not set CONFIG_R8169=m -CONFIG_NET_VENDOR_RENESAS=y -CONFIG_NET_VENDOR_ROCKER=y -CONFIG_NET_VENDOR_SAMSUNG=y -# CONFIG_SXGBE_ETH is not set -CONFIG_NET_VENDOR_SEEQ=y -CONFIG_NET_VENDOR_SOLARFLARE=y -# CONFIG_SFC is not set -# CONFIG_SFC_FALCON is not set -CONFIG_NET_VENDOR_SILAN=y -# CONFIG_SC92031 is not set -CONFIG_NET_VENDOR_SIS=y -# CONFIG_SIS900 is not set -# CONFIG_SIS190 is not set +# CONFIG_NET_VENDOR_RENESAS is not set +# CONFIG_NET_VENDOR_ROCKER is not set +# CONFIG_NET_VENDOR_SAMSUNG is not set +# CONFIG_NET_VENDOR_SEEQ is not set +# CONFIG_NET_VENDOR_SILAN is not set +# CONFIG_NET_VENDOR_SIS is not set +# CONFIG_NET_VENDOR_SOLARFLARE is not set CONFIG_NET_VENDOR_SMSC=y -# CONFIG_PCMCIA_SMC91C92 is not set -# CONFIG_EPIC100 is not set +CONFIG_PCMCIA_SMC91C92=m +CONFIG_EPIC100=m # CONFIG_SMSC911X is not set -# CONFIG_SMSC9420 is not set +CONFIG_SMSC9420=m CONFIG_NET_VENDOR_SOCIONEXT=y -CONFIG_NET_VENDOR_STMICRO=y -# CONFIG_STMMAC_ETH is not set +# CONFIG_NET_VENDOR_STMICRO is not set CONFIG_NET_VENDOR_SUN=y CONFIG_HAPPYMEAL=y -CONFIG_SUNBMAC=y +CONFIG_SUNBMAC=m CONFIG_SUNQE=m CONFIG_SUNGEM=y -CONFIG_CASSINI=y -CONFIG_SUNVNET_COMMON=y +CONFIG_CASSINI=m +CONFIG_SUNVNET_COMMON=m CONFIG_SUNVNET=m -CONFIG_LDMVSW=y +CONFIG_LDMVSW=m CONFIG_NIU=m CONFIG_NET_VENDOR_SYNOPSYS=y -# CONFIG_DWC_XLGMAC is not set +CONFIG_DWC_XLGMAC=m +CONFIG_DWC_XLGMAC_PCI=m CONFIG_NET_VENDOR_TEHUTI=y -# CONFIG_TEHUTI is not set +CONFIG_TEHUTI=m CONFIG_NET_VENDOR_TI=y # CONFIG_TI_CPSW_PHY_SEL is not set -# CONFIG_TLAN is not set +CONFIG_TLAN=m CONFIG_NET_VENDOR_VIA=y CONFIG_VIA_RHINE=m # CONFIG_VIA_RHINE_MMIO is not set -# CONFIG_VIA_VELOCITY is not set +CONFIG_VIA_VELOCITY=m CONFIG_NET_VENDOR_WIZNET=y -# CONFIG_WIZNET_W5100 is not set -# CONFIG_WIZNET_W5300 is not set +CONFIG_WIZNET_W5100=m +CONFIG_WIZNET_W5300=m +# CONFIG_WIZNET_BUS_DIRECT is not set +# CONFIG_WIZNET_BUS_INDIRECT is not set +CONFIG_WIZNET_BUS_ANY=y CONFIG_NET_VENDOR_XILINX=y -# CONFIG_XILINX_EMACLITE is not set -# CONFIG_XILINX_AXI_EMAC is not set -# CONFIG_XILINX_LL_TEMAC is not set +CONFIG_XILINX_EMACLITE=m +CONFIG_XILINX_AXI_EMAC=m +CONFIG_XILINX_LL_TEMAC=m CONFIG_NET_VENDOR_XIRCOM=y -# CONFIG_PCMCIA_XIRC2PS is not set +CONFIG_PCMCIA_XIRC2PS=m # CONFIG_FDDI is not set # CONFIG_HIPPI is not set -CONFIG_PHYLIB=m +CONFIG_PHYLINK=m +CONFIG_PHYLIB=y CONFIG_SWPHY=y -# CONFIG_LED_TRIGGER_PHY is not set -CONFIG_FIXED_PHY=m +CONFIG_LED_TRIGGER_PHY=y +CONFIG_FIXED_PHY=y +# CONFIG_SFP is not set # # MII PHY device drivers @@ -2167,12 +2188,13 @@ CONFIG_FIXED_PHY=m # CONFIG_AMD_PHY is not set # CONFIG_ADIN_PHY is not set # CONFIG_AQUANTIA_PHY is not set -# CONFIG_AX88796B_PHY is not set +CONFIG_AX88796B_PHY=m # CONFIG_BROADCOM_PHY is not set -# CONFIG_BCM54140_PHY is not set +CONFIG_BCM54140_PHY=m # CONFIG_BCM7XXX_PHY is not set -# CONFIG_BCM84881_PHY is not set +CONFIG_BCM84881_PHY=m # CONFIG_BCM87XX_PHY is not set +CONFIG_BCM_NET_PHYLIB=m # CONFIG_CICADA_PHY is not set # CONFIG_CORTINA_PHY is not set # CONFIG_DAVICOM_PHY is not set @@ -2182,78 +2204,83 @@ CONFIG_FIXED_PHY=m # CONFIG_LSI_ET1011C_PHY is not set # CONFIG_MARVELL_PHY is not set # CONFIG_MARVELL_10G_PHY is not set -# CONFIG_MARVELL_88X2222_PHY is not set -# CONFIG_MAXLINEAR_GPHY is not set -# CONFIG_MEDIATEK_GE_PHY is not set +CONFIG_MARVELL_88X2222_PHY=m +CONFIG_MAXLINEAR_GPHY=m +CONFIG_MEDIATEK_GE_PHY=m # CONFIG_MICREL_PHY is not set # CONFIG_MICROCHIP_PHY is not set # CONFIG_MICROCHIP_T1_PHY is not set # CONFIG_MICROSEMI_PHY is not set -# CONFIG_MOTORCOMM_PHY is not set +CONFIG_MOTORCOMM_PHY=m # CONFIG_NATIONAL_PHY is not set -# CONFIG_NXP_C45_TJA11XX_PHY is not set +CONFIG_NXP_C45_TJA11XX_PHY=m # CONFIG_NXP_TJA11XX_PHY is not set +CONFIG_AT803X_PHY=m # CONFIG_QSEMI_PHY is not set CONFIG_REALTEK_PHY=m # CONFIG_RENESAS_PHY is not set # CONFIG_ROCKCHIP_PHY is not set -# CONFIG_SMSC_PHY is not set +CONFIG_SMSC_PHY=m # CONFIG_STE10XP is not set # CONFIG_TERANETICS_PHY is not set # CONFIG_DP83822_PHY is not set # CONFIG_DP83TC811_PHY is not set # CONFIG_DP83848_PHY is not set # CONFIG_DP83867_PHY is not set -# CONFIG_DP83869_PHY is not set +CONFIG_DP83869_PHY=m # CONFIG_VITESSE_PHY is not set # CONFIG_XILINX_GMII2RGMII is not set -CONFIG_MDIO_DEVICE=m -CONFIG_MDIO_BUS=m -CONFIG_FWNODE_MDIO=m -CONFIG_OF_MDIO=m -CONFIG_MDIO_DEVRES=m -# CONFIG_MDIO_BITBANG is not set +CONFIG_MDIO_DEVICE=y +CONFIG_MDIO_BUS=y +CONFIG_FWNODE_MDIO=y +CONFIG_OF_MDIO=y +CONFIG_MDIO_DEVRES=y +CONFIG_MDIO_BITBANG=y # CONFIG_MDIO_BCM_UNIMAC is not set +# CONFIG_MDIO_GPIO is not set # CONFIG_MDIO_HISI_FEMAC is not set -# CONFIG_MDIO_MVUSB is not set +CONFIG_MDIO_MVUSB=m # CONFIG_MDIO_MSCC_MIIM is not set # CONFIG_MDIO_OCTEON is not set +# CONFIG_MDIO_IPQ4019 is not set # CONFIG_MDIO_THUNDER is not set # # MDIO Multiplexers # +# CONFIG_MDIO_BUS_MUX_GPIO is not set # CONFIG_MDIO_BUS_MUX_MULTIPLEXER is not set # CONFIG_MDIO_BUS_MUX_MMIOREG is not set # # PCS device drivers # -# CONFIG_PCS_XPCS is not set +CONFIG_PCS_XPCS=m # end of PCS device drivers +# CONFIG_PLIP is not set CONFIG_PPP=m CONFIG_PPP_BSDCOMP=m CONFIG_PPP_DEFLATE=m # CONFIG_PPP_FILTER is not set -# CONFIG_PPP_MPPE is not set -# CONFIG_PPP_MULTILINK is not set +CONFIG_PPP_MPPE=m +CONFIG_PPP_MULTILINK=y CONFIG_PPPOE=m # CONFIG_PPTP is not set -# CONFIG_PPPOL2TP is not set +CONFIG_PPPOL2TP=m CONFIG_PPP_ASYNC=m CONFIG_PPP_SYNC_TTY=m # CONFIG_SLIP is not set CONFIG_SLHC=m CONFIG_USB_NET_DRIVERS=m -CONFIG_USB_CATC=m -CONFIG_USB_KAWETH=m -CONFIG_USB_PEGASUS=m -CONFIG_USB_RTL8150=m -# CONFIG_USB_RTL8152 is not set +# CONFIG_USB_CATC is not set +# CONFIG_USB_KAWETH is not set +# CONFIG_USB_PEGASUS is not set +# CONFIG_USB_RTL8150 is not set +CONFIG_USB_RTL8152=m # CONFIG_USB_LAN78XX is not set CONFIG_USB_USBNET=m -# CONFIG_USB_NET_AX8817X is not set +CONFIG_USB_NET_AX8817X=m CONFIG_USB_NET_AX88179_178A=m CONFIG_USB_NET_CDCETHER=m # CONFIG_USB_NET_CDC_EEM is not set @@ -2266,26 +2293,26 @@ CONFIG_USB_NET_CDC_NCM=m # CONFIG_USB_NET_SMSC75XX is not set # CONFIG_USB_NET_SMSC95XX is not set # CONFIG_USB_NET_GL620A is not set -# CONFIG_USB_NET_NET1080 is not set +CONFIG_USB_NET_NET1080=m # CONFIG_USB_NET_PLUSB is not set # CONFIG_USB_NET_MCS7830 is not set -# CONFIG_USB_NET_RNDIS_HOST is not set +CONFIG_USB_NET_RNDIS_HOST=m # CONFIG_USB_NET_CDC_SUBSET is not set -# CONFIG_USB_NET_ZAURUS is not set +CONFIG_USB_NET_ZAURUS=m # CONFIG_USB_NET_CX82310_ETH is not set # CONFIG_USB_NET_KALMIA is not set # CONFIG_USB_NET_QMI_WWAN is not set # CONFIG_USB_HSO is not set # CONFIG_USB_NET_INT51X1 is not set -# CONFIG_USB_IPHETH is not set +CONFIG_USB_IPHETH=m # CONFIG_USB_SIERRA_NET is not set # CONFIG_USB_VL600 is not set # CONFIG_USB_NET_CH9200 is not set # CONFIG_USB_NET_AQC111 is not set -# CONFIG_USB_RTL8153_ECM is not set +CONFIG_USB_RTL8153_ECM=m CONFIG_WLAN=y CONFIG_WLAN_VENDOR_ADMTEK=y -# CONFIG_ADM8211 is not set +CONFIG_ADM8211=m CONFIG_ATH_COMMON=m CONFIG_WLAN_VENDOR_ATH=y # CONFIG_ATH_DEBUG is not set @@ -2308,25 +2335,39 @@ CONFIG_ATH9K_PCOEM=y # CONFIG_ATH9K_PCI_NO_EEPROM is not set CONFIG_ATH9K_HTC=m # CONFIG_ATH9K_HTC_DEBUGFS is not set +# CONFIG_ATH9K_HWRNG is not set CONFIG_CARL9170=m CONFIG_CARL9170_LEDS=y CONFIG_CARL9170_WPC=y +# CONFIG_CARL9170_HWRNG is not set CONFIG_ATH6KL=m -# CONFIG_ATH6KL_SDIO is not set +CONFIG_ATH6KL_SDIO=m CONFIG_ATH6KL_USB=m # CONFIG_ATH6KL_DEBUG is not set # CONFIG_ATH6KL_TRACING is not set CONFIG_AR5523=m -# CONFIG_WIL6210 is not set -# CONFIG_ATH10K is not set +CONFIG_WIL6210=m +CONFIG_WIL6210_ISR_COR=y +CONFIG_WIL6210_TRACING=y +CONFIG_WIL6210_DEBUGFS=y +CONFIG_ATH10K=m +CONFIG_ATH10K_CE=y +CONFIG_ATH10K_PCI=m +CONFIG_ATH10K_SDIO=m +CONFIG_ATH10K_USB=m +# CONFIG_ATH10K_DEBUG is not set +# CONFIG_ATH10K_DEBUGFS is not set +# CONFIG_ATH10K_TRACING is not set # CONFIG_WCN36XX is not set CONFIG_ATH11K=m -# CONFIG_ATH11K_PCI is not set +CONFIG_ATH11K_PCI=m # CONFIG_ATH11K_DEBUG is not set # CONFIG_ATH11K_TRACING is not set CONFIG_WLAN_VENDOR_ATMEL=y -# CONFIG_ATMEL is not set -# CONFIG_AT76C50X_USB is not set +CONFIG_ATMEL=m +CONFIG_PCI_ATMEL=m +CONFIG_PCMCIA_ATMEL=m +CONFIG_AT76C50X_USB=m CONFIG_WLAN_VENDOR_BROADCOM=y CONFIG_B43=m CONFIG_B43_BCMA=y @@ -2336,7 +2377,7 @@ CONFIG_B43_BUSES_BCMA_AND_SSB=y # CONFIG_B43_BUSES_SSB is not set CONFIG_B43_PCI_AUTOSELECT=y CONFIG_B43_PCICORE_AUTOSELECT=y -# CONFIG_B43_SDIO is not set +CONFIG_B43_SDIO=y CONFIG_B43_BCMA_PIO=y CONFIG_B43_PIO=y CONFIG_B43_PHY_G=y @@ -2344,43 +2385,153 @@ CONFIG_B43_PHY_N=y CONFIG_B43_PHY_LP=y CONFIG_B43_PHY_HT=y CONFIG_B43_LEDS=y +CONFIG_B43_HWRNG=y # CONFIG_B43_DEBUG is not set -# CONFIG_B43LEGACY is not set -# CONFIG_BRCMSMAC is not set -# CONFIG_BRCMFMAC is not set +CONFIG_B43LEGACY=m +CONFIG_B43LEGACY_PCI_AUTOSELECT=y +CONFIG_B43LEGACY_PCICORE_AUTOSELECT=y +CONFIG_B43LEGACY_LEDS=y +CONFIG_B43LEGACY_HWRNG=y +CONFIG_B43LEGACY_DEBUG=y +CONFIG_B43LEGACY_DMA=y +CONFIG_B43LEGACY_PIO=y +CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y +# CONFIG_B43LEGACY_DMA_MODE is not set +# CONFIG_B43LEGACY_PIO_MODE is not set +CONFIG_BRCMUTIL=m +CONFIG_BRCMSMAC=m +CONFIG_BRCMFMAC=m +CONFIG_BRCMFMAC_PROTO_BCDC=y +CONFIG_BRCMFMAC_SDIO=y +CONFIG_BRCMFMAC_USB=y +# CONFIG_BRCMFMAC_PCIE is not set +# CONFIG_BRCM_TRACING is not set +# CONFIG_BRCMDBG is not set CONFIG_WLAN_VENDOR_CISCO=y -# CONFIG_AIRO_CS is not set +CONFIG_AIRO_CS=m CONFIG_WLAN_VENDOR_INTEL=y -# CONFIG_IPW2100 is not set -# CONFIG_IPW2200 is not set -# CONFIG_IWL4965 is not set -# CONFIG_IWL3945 is not set -# CONFIG_IWLWIFI is not set +CONFIG_IPW2100=m +CONFIG_IPW2100_MONITOR=y +# CONFIG_IPW2100_DEBUG is not set +CONFIG_IPW2200=m +CONFIG_IPW2200_MONITOR=y +# CONFIG_IPW2200_RADIOTAP is not set +# CONFIG_IPW2200_PROMISCUOUS is not set +CONFIG_IPW2200_QOS=y +# CONFIG_IPW2200_DEBUG is not set +CONFIG_LIBIPW=m +# CONFIG_LIBIPW_DEBUG is not set +CONFIG_IWLEGACY=m +CONFIG_IWL4965=m +CONFIG_IWL3945=m + +# +# iwl3945 / iwl4965 Debugging Options +# +# CONFIG_IWLEGACY_DEBUG is not set +# end of iwl3945 / iwl4965 Debugging Options + +CONFIG_IWLWIFI=m +CONFIG_IWLWIFI_LEDS=y +CONFIG_IWLDVM=m +CONFIG_IWLMVM=m +CONFIG_IWLWIFI_OPMODE_MODULAR=y +# CONFIG_IWLWIFI_BCAST_FILTERING is not set + +# +# Debugging Options +# +# CONFIG_IWLWIFI_DEBUG is not set +CONFIG_IWLWIFI_DEVICE_TRACING=y +# end of Debugging Options + CONFIG_WLAN_VENDOR_INTERSIL=y -# CONFIG_HOSTAP is not set -# CONFIG_HERMES is not set -# CONFIG_P54_COMMON is not set +CONFIG_HOSTAP=m +CONFIG_HOSTAP_FIRMWARE=y +CONFIG_HOSTAP_FIRMWARE_NVRAM=y +CONFIG_HOSTAP_PLX=m +CONFIG_HOSTAP_PCI=m +CONFIG_HOSTAP_CS=m +CONFIG_HERMES=m +# CONFIG_HERMES_PRISM is not set +CONFIG_HERMES_CACHE_FW_ON_INIT=y +CONFIG_PLX_HERMES=m +CONFIG_TMD_HERMES=m +CONFIG_NORTEL_HERMES=m +CONFIG_PCMCIA_HERMES=m +CONFIG_PCMCIA_SPECTRUM=m +CONFIG_ORINOCO_USB=m +CONFIG_P54_COMMON=m +CONFIG_P54_USB=m +CONFIG_P54_PCI=m +CONFIG_P54_LEDS=y CONFIG_WLAN_VENDOR_MARVELL=y -# CONFIG_LIBERTAS is not set -# CONFIG_LIBERTAS_THINFIRM is not set -# CONFIG_MWIFIEX is not set +CONFIG_LIBERTAS=m +CONFIG_LIBERTAS_USB=m +CONFIG_LIBERTAS_CS=m +CONFIG_LIBERTAS_SDIO=m +# CONFIG_LIBERTAS_DEBUG is not set +# CONFIG_LIBERTAS_MESH is not set +CONFIG_LIBERTAS_THINFIRM=m +# CONFIG_LIBERTAS_THINFIRM_DEBUG is not set +CONFIG_LIBERTAS_THINFIRM_USB=m +CONFIG_MWIFIEX=m +CONFIG_MWIFIEX_SDIO=m +CONFIG_MWIFIEX_PCIE=m +CONFIG_MWIFIEX_USB=m CONFIG_MWL8K=m CONFIG_WLAN_VENDOR_MEDIATEK=y -# CONFIG_MT7601U is not set +CONFIG_MT7601U=m +CONFIG_MT76_CORE=m +CONFIG_MT76_LEDS=y +CONFIG_MT76_USB=m +CONFIG_MT76_SDIO=m +CONFIG_MT76_CONNAC_LIB=m # CONFIG_MT76x0U is not set # CONFIG_MT76x0E is not set # CONFIG_MT76x2E is not set # CONFIG_MT76x2U is not set # CONFIG_MT7603E is not set +CONFIG_MT7615_COMMON=m # CONFIG_MT7615E is not set -# CONFIG_MT7663U is not set -# CONFIG_MT7663S is not set -# CONFIG_MT7915E is not set -# CONFIG_MT7921E is not set +CONFIG_MT7663_USB_SDIO_COMMON=m +CONFIG_MT7663U=m +CONFIG_MT7663S=m +CONFIG_MT7915E=m +CONFIG_MT7921E=m CONFIG_WLAN_VENDOR_MICROCHIP=y -# CONFIG_WILC1000_SDIO is not set +CONFIG_WILC1000=m +CONFIG_WILC1000_SDIO=m +# CONFIG_WILC1000_HW_OOB_INTR is not set CONFIG_WLAN_VENDOR_RALINK=y -# CONFIG_RT2X00 is not set +CONFIG_RT2X00=m +CONFIG_RT2400PCI=m +CONFIG_RT2500PCI=m +CONFIG_RT61PCI=m +CONFIG_RT2800PCI=m +CONFIG_RT2800PCI_RT33XX=y +CONFIG_RT2800PCI_RT35XX=y +CONFIG_RT2800PCI_RT53XX=y +CONFIG_RT2800PCI_RT3290=y +CONFIG_RT2500USB=m +CONFIG_RT73USB=m +CONFIG_RT2800USB=m +CONFIG_RT2800USB_RT33XX=y +CONFIG_RT2800USB_RT35XX=y +CONFIG_RT2800USB_RT3573=y +CONFIG_RT2800USB_RT53XX=y +CONFIG_RT2800USB_RT55XX=y +CONFIG_RT2800USB_UNKNOWN=y +CONFIG_RT2800_LIB=m +CONFIG_RT2800_LIB_MMIO=m +CONFIG_RT2X00_LIB_MMIO=m +CONFIG_RT2X00_LIB_PCI=m +CONFIG_RT2X00_LIB_USB=m +CONFIG_RT2X00_LIB=m +CONFIG_RT2X00_LIB_FIRMWARE=y +CONFIG_RT2X00_LIB_CRYPTO=y +CONFIG_RT2X00_LIB_LEDS=y +# CONFIG_RT2X00_DEBUG is not set CONFIG_WLAN_VENDOR_REALTEK=y CONFIG_RTL8180=m CONFIG_RTL8187=m @@ -2403,26 +2554,17 @@ CONFIG_RTL8192C_COMMON=m CONFIG_RTL8723_COMMON=m CONFIG_RTLBTCOEXIST=m CONFIG_RTL8XXXU=m -# CONFIG_RTL8XXXU_UNTESTED is not set +CONFIG_RTL8XXXU_UNTESTED=y # CONFIG_RTW88 is not set -CONFIG_WLAN_VENDOR_RSI=y -# CONFIG_RSI_91X is not set -CONFIG_WLAN_VENDOR_ST=y -# CONFIG_CW1200 is not set -CONFIG_WLAN_VENDOR_TI=y -# CONFIG_WL1251 is not set -# CONFIG_WL12XX is not set -# CONFIG_WL18XX is not set -# CONFIG_WLCORE is not set -CONFIG_WLAN_VENDOR_ZYDAS=y -# CONFIG_USB_ZD1201 is not set -# CONFIG_ZD1211RW is not set -CONFIG_WLAN_VENDOR_QUANTENNA=y -# CONFIG_QTNFMAC_PCIE is not set -# CONFIG_PCMCIA_RAYCS is not set -# CONFIG_PCMCIA_WL3501 is not set +# CONFIG_WLAN_VENDOR_RSI is not set +# CONFIG_WLAN_VENDOR_ST is not set +# CONFIG_WLAN_VENDOR_TI is not set +# CONFIG_WLAN_VENDOR_ZYDAS is not set +# CONFIG_WLAN_VENDOR_QUANTENNA is not set +CONFIG_PCMCIA_RAYCS=m +CONFIG_PCMCIA_WL3501=m # CONFIG_MAC80211_HWSIM is not set -# CONFIG_USB_NET_RNDIS_WLAN is not set +CONFIG_USB_NET_RNDIS_WLAN=m # CONFIG_VIRT_WIFI is not set # CONFIG_WAN is not set @@ -2442,15 +2584,18 @@ CONFIG_NET_FAILOVER=m # CONFIG_INPUT=y CONFIG_INPUT_LEDS=y -CONFIG_INPUT_FF_MEMLESS=y +# CONFIG_INPUT_FF_MEMLESS is not set # CONFIG_INPUT_SPARSEKMAP is not set # CONFIG_INPUT_MATRIXKMAP is not set # # Userland interfaces # -# CONFIG_INPUT_MOUSEDEV is not set -CONFIG_INPUT_JOYDEV=m +CONFIG_INPUT_MOUSEDEV=y +CONFIG_INPUT_MOUSEDEV_PSAUX=y +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 +CONFIG_INPUT_JOYDEV=y CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_EVBUG is not set @@ -2466,29 +2611,35 @@ CONFIG_INPUT_KEYBOARD=y # CONFIG_KEYBOARD_QT2160 is not set # CONFIG_KEYBOARD_DLINK_DIR685 is not set # CONFIG_KEYBOARD_LKKBD is not set +# CONFIG_KEYBOARD_GPIO is not set +# CONFIG_KEYBOARD_GPIO_POLLED is not set # CONFIG_KEYBOARD_TCA6416 is not set # CONFIG_KEYBOARD_TCA8418 is not set +# CONFIG_KEYBOARD_MATRIX is not set # CONFIG_KEYBOARD_LM8323 is not set # CONFIG_KEYBOARD_LM8333 is not set # CONFIG_KEYBOARD_MAX7359 is not set # CONFIG_KEYBOARD_MCS is not set # CONFIG_KEYBOARD_MPR121 is not set -# CONFIG_KEYBOARD_NEWTON is not set +CONFIG_KEYBOARD_NEWTON=m # CONFIG_KEYBOARD_OPENCORES is not set +# CONFIG_KEYBOARD_SAMSUNG is not set # CONFIG_KEYBOARD_STOWAWAY is not set -CONFIG_KEYBOARD_SUNKBD=y +CONFIG_KEYBOARD_SUNKBD=m # CONFIG_KEYBOARD_OMAP4 is not set # CONFIG_KEYBOARD_TM2_TOUCHKEY is not set # CONFIG_KEYBOARD_XTKBD is not set # CONFIG_KEYBOARD_CAP11XX is not set +# CONFIG_KEYBOARD_BCM is not set CONFIG_INPUT_MOUSE=y # CONFIG_MOUSE_PS2 is not set -CONFIG_MOUSE_SERIAL=y -# CONFIG_MOUSE_APPLETOUCH is not set +# CONFIG_MOUSE_SERIAL is not set +CONFIG_MOUSE_APPLETOUCH=y # CONFIG_MOUSE_BCM5974 is not set # CONFIG_MOUSE_CYAPA is not set # CONFIG_MOUSE_ELAN_I2C is not set # CONFIG_MOUSE_VSXXXAA is not set +# CONFIG_MOUSE_GPIO is not set # CONFIG_MOUSE_SYNAPTICS_I2C is not set # CONFIG_MOUSE_SYNAPTICS_USB is not set # CONFIG_INPUT_JOYSTICK is not set @@ -2499,22 +2650,28 @@ CONFIG_INPUT_MISC=y # CONFIG_INPUT_ATMEL_CAPTOUCH is not set # CONFIG_INPUT_BMA150 is not set # CONFIG_INPUT_E3X0_BUTTON is not set -# CONFIG_INPUT_SPARCSPKR is not set +CONFIG_INPUT_SPARCSPKR=m # CONFIG_INPUT_MMA8450 is not set -# CONFIG_INPUT_ATI_REMOTE2 is not set +# CONFIG_INPUT_GPIO_BEEPER is not set +# CONFIG_INPUT_GPIO_DECODER is not set +# CONFIG_INPUT_GPIO_VIBRA is not set +CONFIG_INPUT_ATI_REMOTE2=m # CONFIG_INPUT_KEYSPAN_REMOTE is not set # CONFIG_INPUT_KXTJ9 is not set # CONFIG_INPUT_POWERMATE is not set # CONFIG_INPUT_YEALINK is not set # CONFIG_INPUT_CM109 is not set +# CONFIG_INPUT_REGULATOR_HAPTIC is not set CONFIG_INPUT_UINPUT=y # CONFIG_INPUT_PCF8574 is not set +# CONFIG_INPUT_GPIO_ROTARY_ENCODER is not set # CONFIG_INPUT_DA7280_HAPTICS is not set # CONFIG_INPUT_ADXL34X is not set # CONFIG_INPUT_IMS_PCU is not set # CONFIG_INPUT_IQS269A is not set # CONFIG_INPUT_IQS626A is not set # CONFIG_INPUT_CMA3000 is not set +# CONFIG_INPUT_DRV260X_HAPTICS is not set # CONFIG_INPUT_DRV2665_HAPTICS is not set # CONFIG_INPUT_DRV2667_HAPTICS is not set # CONFIG_RMI4_CORE is not set @@ -2525,7 +2682,8 @@ CONFIG_INPUT_UINPUT=y CONFIG_SERIO=y CONFIG_ARCH_MIGHT_HAVE_PC_SERIO=y # CONFIG_SERIO_I8042 is not set -CONFIG_SERIO_SERPORT=m +# CONFIG_SERIO_SERPORT is not set +# CONFIG_SERIO_PARKBD is not set # CONFIG_SERIO_PCIPS2 is not set # CONFIG_SERIO_LIBPS2 is not set # CONFIG_SERIO_RAW is not set @@ -2533,6 +2691,7 @@ CONFIG_SERIO_SERPORT=m # CONFIG_SERIO_PS2MULT is not set # CONFIG_SERIO_ARC_PS2 is not set # CONFIG_SERIO_APBPS2 is not set +# CONFIG_SERIO_GPIO_PS2 is not set # CONFIG_USERIO is not set # CONFIG_GAMEPORT is not set # end of Hardware I/O ports @@ -2549,14 +2708,27 @@ CONFIG_VT_CONSOLE_SLEEP=y CONFIG_HW_CONSOLE=y CONFIG_VT_HW_CONSOLE_BINDING=y CONFIG_UNIX98_PTYS=y -CONFIG_LEGACY_PTYS=y -CONFIG_LEGACY_PTY_COUNT=256 +# CONFIG_LEGACY_PTYS is not set CONFIG_LDISC_AUTOLOAD=y # # Serial drivers # -# CONFIG_SERIAL_8250 is not set +CONFIG_SERIAL_EARLYCON=y +CONFIG_SERIAL_8250=y +# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set +CONFIG_SERIAL_8250_16550A_VARIANTS=y +# CONFIG_SERIAL_8250_FINTEK is not set +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_PCI=m +CONFIG_SERIAL_8250_EXAR=m +CONFIG_SERIAL_8250_CS=m +CONFIG_SERIAL_8250_NR_UARTS=4 +CONFIG_SERIAL_8250_RUNTIME_UARTS=4 +# CONFIG_SERIAL_8250_EXTENDED is not set +# CONFIG_SERIAL_8250_DW is not set +# CONFIG_SERIAL_8250_RT288X is not set +# CONFIG_SERIAL_OF_PLATFORM is not set # # Non-8250 serial port support @@ -2566,13 +2738,14 @@ CONFIG_SERIAL_SUNCORE=y # CONFIG_SERIAL_SUNZILOG is not set # CONFIG_SERIAL_SUNSU is not set # CONFIG_SERIAL_SUNSAB is not set -# CONFIG_SERIAL_SUNHV is not set +CONFIG_SERIAL_SUNHV=y CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_SERIAL_JSM is not set # CONFIG_SERIAL_SIFIVE is not set # CONFIG_SERIAL_SCCNXP is not set # CONFIG_SERIAL_SC16IS7XX is not set +# CONFIG_SERIAL_BCM63XX is not set # CONFIG_SERIAL_GRLIB_GAISLER_APBUART is not set # CONFIG_SERIAL_ALTERA_JTAGUART is not set # CONFIG_SERIAL_ALTERA_UART is not set @@ -2582,25 +2755,36 @@ CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_SERIAL_FSL_LPUART is not set # CONFIG_SERIAL_FSL_LINFLEXUART is not set # CONFIG_SERIAL_CONEXANT_DIGICOLOR is not set +# CONFIG_SERIAL_SPRD is not set # end of Serial drivers +CONFIG_SERIAL_MCTRL_GPIO=y # CONFIG_SERIAL_NONSTANDARD is not set # CONFIG_N_GSM is not set # CONFIG_NOZOMI is not set # CONFIG_NULL_TTY is not set -# CONFIG_VCC is not set +CONFIG_VCC=m CONFIG_HVC_DRIVER=y # CONFIG_SERIAL_DEV_BUS is not set # CONFIG_TTY_PRINTK is not set +CONFIG_PRINTER=m +# CONFIG_LP_CONSOLE is not set +CONFIG_PPDEV=m CONFIG_VIRTIO_CONSOLE=y -CONFIG_IPMI_HANDLER=m +CONFIG_IPMI_HANDLER=y # CONFIG_IPMI_PANIC_EVENT is not set -# CONFIG_IPMI_DEVICE_INTERFACE is not set +CONFIG_IPMI_DEVICE_INTERFACE=m # CONFIG_IPMI_SI is not set # CONFIG_IPMI_SSIF is not set -# CONFIG_IPMI_WATCHDOG is not set -# CONFIG_IPMI_POWEROFF is not set -# CONFIG_HW_RANDOM is not set +CONFIG_IPMI_WATCHDOG=m +CONFIG_IPMI_POWEROFF=m +CONFIG_HW_RANDOM=y +CONFIG_HW_RANDOM_TIMERIOMEM=y +# CONFIG_HW_RANDOM_BA431 is not set +CONFIG_HW_RANDOM_N2RNG=y +# CONFIG_HW_RANDOM_VIRTIO is not set +# CONFIG_HW_RANDOM_CCTRNG is not set +# CONFIG_HW_RANDOM_XIPHERA is not set # CONFIG_APPLICOM is not set # @@ -2620,57 +2804,6 @@ CONFIG_DEVPORT=y # CONFIG_XILLYUSB is not set CONFIG_ADI=m # CONFIG_RANDOM_TRUST_BOOTLOADER is not set -CONFIG_LRNG=y - -# -# Specific DRNG seeding strategies -# -# CONFIG_LRNG_OVERSAMPLE_ENTROPY_SOURCES is not set -CONFIG_LRNG_OVERSAMPLE_ES_BITS=0 -CONFIG_LRNG_SEED_BUFFER_INIT_ADD_BITS=0 -# end of Specific DRNG seeding strategies - -# -# Entropy Source Configuration -# - -# -# Interrupt Entropy Source -# -CONFIG_LRNG_IRQ=y -CONFIG_LRNG_CONTINUOUS_COMPRESSION_ENABLED=y -# CONFIG_LRNG_CONTINUOUS_COMPRESSION_DISABLED is not set -CONFIG_LRNG_ENABLE_CONTINUOUS_COMPRESSION=y -# CONFIG_LRNG_SWITCHABLE_CONTINUOUS_COMPRESSION is not set -# CONFIG_LRNG_COLLECTION_SIZE_32 is not set -# CONFIG_LRNG_COLLECTION_SIZE_256 is not set -# CONFIG_LRNG_COLLECTION_SIZE_512 is not set -CONFIG_LRNG_COLLECTION_SIZE_1024=y -# CONFIG_LRNG_COLLECTION_SIZE_2048 is not set -# CONFIG_LRNG_COLLECTION_SIZE_4096 is not set -# CONFIG_LRNG_COLLECTION_SIZE_8192 is not set -CONFIG_LRNG_COLLECTION_SIZE=1024 -# CONFIG_LRNG_HEALTH_TESTS is not set -CONFIG_LRNG_RCT_CUTOFF=31 -CONFIG_LRNG_APT_CUTOFF=325 -CONFIG_LRNG_IRQ_ENTROPY_RATE=256 - -# -# Jitter RNG Entropy Source -# -# CONFIG_LRNG_JENT is not set - -# -# CPU Entropy Source -# -CONFIG_LRNG_CPU=y -CONFIG_LRNG_CPU_FULL_ENT_MULTIPLIER=1 -CONFIG_LRNG_CPU_ENTROPY_RATE=8 -# end of Entropy Source Configuration - -# CONFIG_LRNG_DRNG_SWITCH is not set -# CONFIG_LRNG_TESTING_MENU is not set -# CONFIG_LRNG_SELFTEST is not set # end of Character devices # @@ -2679,16 +2812,19 @@ CONFIG_LRNG_CPU_ENTROPY_RATE=8 CONFIG_I2C=y CONFIG_I2C_BOARDINFO=y CONFIG_I2C_COMPAT=y -CONFIG_I2C_CHARDEV=y -CONFIG_I2C_MUX=m +CONFIG_I2C_CHARDEV=m +CONFIG_I2C_MUX=y # # Multiplexer I2C Chip support # -# CONFIG_I2C_MUX_GPMUX is not set -# CONFIG_I2C_MUX_LTC4306 is not set -# CONFIG_I2C_MUX_PCA9541 is not set -# CONFIG_I2C_MUX_REG is not set +CONFIG_I2C_ARB_GPIO_CHALLENGE=m +CONFIG_I2C_MUX_GPIO=m +CONFIG_I2C_MUX_GPMUX=m +CONFIG_I2C_MUX_LTC4306=m +CONFIG_I2C_MUX_PCA9541=m +# CONFIG_I2C_MUX_PCA954x is not set +CONFIG_I2C_MUX_REG=m # CONFIG_I2C_MUX_MLXCPLD is not set # end of Multiplexer I2C Chip support @@ -2721,10 +2857,14 @@ CONFIG_I2C_ALGOBIT=m # # I2C system bus drivers (mostly embedded / system-on-chip) # +# CONFIG_I2C_CBUS_GPIO is not set # CONFIG_I2C_DESIGNWARE_PLATFORM is not set # CONFIG_I2C_DESIGNWARE_PCI is not set +# CONFIG_I2C_EMEV2 is not set +# CONFIG_I2C_GPIO is not set # CONFIG_I2C_OCORES is not set # CONFIG_I2C_PCA_PLATFORM is not set +# CONFIG_I2C_RK3X is not set # CONFIG_I2C_SIMTEC is not set # CONFIG_I2C_XILINX is not set @@ -2733,6 +2873,7 @@ CONFIG_I2C_ALGOBIT=m # # CONFIG_I2C_DIOLAN_U2C is not set # CONFIG_I2C_CP2615 is not set +# CONFIG_I2C_PARPORT is not set # CONFIG_I2C_ROBOTFUZZ_OSIF is not set # CONFIG_I2C_TAOS_EVM is not set # CONFIG_I2C_TINY_USB is not set @@ -2754,40 +2895,90 @@ CONFIG_I2C_ALGOBIT=m # CONFIG_SPI is not set # CONFIG_SPMI is not set # CONFIG_HSI is not set -CONFIG_PPS=y -# CONFIG_PPS_DEBUG is not set +# CONFIG_PPS is not set # -# PPS clients support +# PTP clock support # -# CONFIG_PPS_CLIENT_KTIMER is not set -# CONFIG_PPS_CLIENT_LDISC is not set -# CONFIG_PPS_CLIENT_GPIO is not set +# CONFIG_PTP_1588_CLOCK is not set +CONFIG_PTP_1588_CLOCK_OPTIONAL=y # -# PPS generators support +# Enable PHYLIB and NETWORK_PHY_TIMESTAMPING to see the additional clocks. # +# end of PTP clock support + +# CONFIG_PINCTRL is not set +CONFIG_GPIOLIB=y +CONFIG_GPIOLIB_FASTPATH_LIMIT=512 +CONFIG_OF_GPIO=y +# CONFIG_DEBUG_GPIO is not set +# CONFIG_GPIO_SYSFS is not set +CONFIG_GPIO_CDEV=y +CONFIG_GPIO_CDEV_V1=y # -# PTP clock support +# Memory mapped GPIO drivers # -CONFIG_PTP_1588_CLOCK=y -CONFIG_PTP_1588_CLOCK_OPTIONAL=y +# CONFIG_GPIO_74XX_MMIO is not set +# CONFIG_GPIO_ALTERA is not set +# CONFIG_GPIO_CADENCE is not set +# CONFIG_GPIO_DWAPB is not set +# CONFIG_GPIO_EXAR is not set +# CONFIG_GPIO_FTGPIO010 is not set +# CONFIG_GPIO_GENERIC_PLATFORM is not set +# CONFIG_GPIO_GRGPIO is not set +# CONFIG_GPIO_HLWD is not set +# CONFIG_GPIO_MB86S7X is not set +# CONFIG_GPIO_SIFIVE is not set +# CONFIG_GPIO_XILINX is not set +# CONFIG_GPIO_AMD_FCH is not set +# end of Memory mapped GPIO drivers # -# Enable PHYLIB and NETWORK_PHY_TIMESTAMPING to see the additional clocks. +# I2C GPIO expanders # -# CONFIG_PTP_1588_CLOCK_IDT82P33 is not set -# CONFIG_PTP_1588_CLOCK_IDTCM is not set -# end of PTP clock support +# CONFIG_GPIO_ADP5588 is not set +# CONFIG_GPIO_ADNP is not set +# CONFIG_GPIO_GW_PLD is not set +# CONFIG_GPIO_MAX7300 is not set +# CONFIG_GPIO_MAX732X is not set +# CONFIG_GPIO_PCA953X is not set +# CONFIG_GPIO_PCA9570 is not set +# CONFIG_GPIO_PCF857X is not set +# CONFIG_GPIO_TPIC2810 is not set +# end of I2C GPIO expanders + +# +# MFD GPIO expanders +# +# end of MFD GPIO expanders + +# +# PCI GPIO expanders +# +# CONFIG_GPIO_PCI_IDIO_16 is not set +# CONFIG_GPIO_PCIE_IDIO_24 is not set +# CONFIG_GPIO_RDC321X is not set +# end of PCI GPIO expanders + +# +# USB GPIO expanders +# +# end of USB GPIO expanders + +# +# Virtual GPIO drivers +# +# CONFIG_GPIO_AGGREGATOR is not set +# CONFIG_GPIO_MOCKUP is not set +# CONFIG_GPIO_VIRTIO is not set +# end of Virtual GPIO drivers -# CONFIG_PINCTRL is not set -# CONFIG_GPIOLIB is not set # CONFIG_W1 is not set # CONFIG_POWER_RESET is not set CONFIG_POWER_SUPPLY=y # CONFIG_POWER_SUPPLY_DEBUG is not set -CONFIG_POWER_SUPPLY_HWMON=y # CONFIG_PDA_POWER is not set # CONFIG_TEST_POWER is not set # CONFIG_CHARGER_ADP5061 is not set @@ -2797,19 +2988,33 @@ CONFIG_POWER_SUPPLY_HWMON=y # CONFIG_BATTERY_DS2782 is not set # CONFIG_BATTERY_SBS is not set # CONFIG_CHARGER_SBS is not set +# CONFIG_MANAGER_SBS is not set # CONFIG_BATTERY_BQ27XXX is not set # CONFIG_BATTERY_MAX17040 is not set # CONFIG_BATTERY_MAX17042 is not set # CONFIG_CHARGER_MAX8903 is not set # CONFIG_CHARGER_LP8727 is not set +# CONFIG_CHARGER_GPIO is not set +# CONFIG_CHARGER_MANAGER is not set +# CONFIG_CHARGER_LT3651 is not set # CONFIG_CHARGER_LTC4162L is not set # CONFIG_CHARGER_DETECTOR_MAX14656 is not set # CONFIG_CHARGER_BQ2415X is not set +# CONFIG_CHARGER_BQ24190 is not set +# CONFIG_CHARGER_BQ24257 is not set +# CONFIG_CHARGER_BQ24735 is not set +# CONFIG_CHARGER_BQ2515X is not set +# CONFIG_CHARGER_BQ25890 is not set +# CONFIG_CHARGER_BQ25980 is not set +# CONFIG_CHARGER_BQ256XX is not set +# CONFIG_CHARGER_SMB347 is not set # CONFIG_BATTERY_GAUGE_LTC2941 is not set # CONFIG_BATTERY_GOLDFISH is not set # CONFIG_BATTERY_RT5033 is not set +# CONFIG_CHARGER_RT9455 is not set +# CONFIG_CHARGER_UCS1002 is not set # CONFIG_CHARGER_BD99954 is not set -CONFIG_HWMON=y +CONFIG_HWMON=m # CONFIG_HWMON_DEBUG_CHIP is not set # @@ -2849,6 +3054,7 @@ CONFIG_HWMON=y # CONFIG_SENSORS_GL520SM is not set # CONFIG_SENSORS_G760A is not set # CONFIG_SENSORS_G762 is not set +# CONFIG_SENSORS_GPIO_FAN is not set # CONFIG_SENSORS_HIH6130 is not set # CONFIG_SENSORS_IBMAEM is not set # CONFIG_SENSORS_IBMPEX is not set @@ -2859,6 +3065,7 @@ CONFIG_HWMON=y # CONFIG_SENSORS_LTC2945 is not set # CONFIG_SENSORS_LTC2947_I2C is not set # CONFIG_SENSORS_LTC2990 is not set +# CONFIG_SENSORS_LTC2992 is not set # CONFIG_SENSORS_LTC4151 is not set # CONFIG_SENSORS_LTC4215 is not set # CONFIG_SENSORS_LTC4222 is not set @@ -2905,9 +3112,48 @@ CONFIG_HWMON=y # CONFIG_SENSORS_NPCM7XX is not set # CONFIG_SENSORS_NZXT_KRAKEN2 is not set # CONFIG_SENSORS_PCF8591 is not set -# CONFIG_PMBUS is not set +CONFIG_PMBUS=m +CONFIG_SENSORS_PMBUS=m +# CONFIG_SENSORS_ADM1266 is not set +# CONFIG_SENSORS_ADM1275 is not set +# CONFIG_SENSORS_BEL_PFE is not set +# CONFIG_SENSORS_BPA_RS600 is not set +# CONFIG_SENSORS_FSP_3Y is not set +# CONFIG_SENSORS_IBM_CFFPS is not set +# CONFIG_SENSORS_DPS920AB is not set +# CONFIG_SENSORS_INSPUR_IPSPS is not set +# CONFIG_SENSORS_IR35221 is not set +# CONFIG_SENSORS_IR36021 is not set +# CONFIG_SENSORS_IR38064 is not set +# CONFIG_SENSORS_IRPS5401 is not set +# CONFIG_SENSORS_ISL68137 is not set +# CONFIG_SENSORS_LM25066 is not set +# CONFIG_SENSORS_LTC2978 is not set +# CONFIG_SENSORS_LTC3815 is not set +# CONFIG_SENSORS_MAX15301 is not set +# CONFIG_SENSORS_MAX16064 is not set +# CONFIG_SENSORS_MAX16601 is not set +# CONFIG_SENSORS_MAX20730 is not set +# CONFIG_SENSORS_MAX20751 is not set +# CONFIG_SENSORS_MAX31785 is not set +# CONFIG_SENSORS_MAX34440 is not set +# CONFIG_SENSORS_MAX8688 is not set +# CONFIG_SENSORS_MP2888 is not set +# CONFIG_SENSORS_MP2975 is not set +# CONFIG_SENSORS_PIM4328 is not set +# CONFIG_SENSORS_PM6764TR is not set +# CONFIG_SENSORS_PXE1610 is not set +# CONFIG_SENSORS_Q54SJ108A2 is not set +# CONFIG_SENSORS_STPDDC60 is not set +# CONFIG_SENSORS_TPS40422 is not set +# CONFIG_SENSORS_TPS53679 is not set +# CONFIG_SENSORS_UCD9000 is not set +# CONFIG_SENSORS_UCD9200 is not set +# CONFIG_SENSORS_XDPE122 is not set +# CONFIG_SENSORS_ZL6100 is not set # CONFIG_SENSORS_SBTSI is not set # CONFIG_SENSORS_SBRMI is not set +# CONFIG_SENSORS_SHT15 is not set # CONFIG_SENSORS_SHT21 is not set # CONFIG_SENSORS_SHT3x is not set # CONFIG_SENSORS_SHT4x is not set @@ -2949,7 +3195,7 @@ CONFIG_HWMON=y # CONFIG_SENSORS_W83L786NG is not set # CONFIG_SENSORS_W83627HF is not set # CONFIG_SENSORS_W83627EHF is not set -CONFIG_SENSORS_ULTRA45=m +# CONFIG_SENSORS_ULTRA45 is not set # CONFIG_THERMAL is not set # CONFIG_WATCHDOG is not set CONFIG_SSB_POSSIBLE=y @@ -2962,9 +3208,10 @@ CONFIG_SSB_B43_PCI_BRIDGE=y CONFIG_SSB_PCMCIAHOST_POSSIBLE=y # CONFIG_SSB_PCMCIAHOST is not set CONFIG_SSB_SDIOHOST_POSSIBLE=y -# CONFIG_SSB_SDIOHOST is not set +CONFIG_SSB_SDIOHOST=y CONFIG_SSB_DRIVER_PCICORE_POSSIBLE=y CONFIG_SSB_DRIVER_PCICORE=y +# CONFIG_SSB_DRIVER_GPIO is not set CONFIG_BCMA_POSSIBLE=y CONFIG_BCMA=m CONFIG_BCMA_BLOCKIO=y @@ -2972,7 +3219,8 @@ CONFIG_BCMA_HOST_PCI_POSSIBLE=y CONFIG_BCMA_HOST_PCI=y # CONFIG_BCMA_HOST_SOC is not set CONFIG_BCMA_DRIVER_PCI=y -# CONFIG_BCMA_DRIVER_GMAC_CMN is not set +CONFIG_BCMA_DRIVER_GMAC_CMN=y +# CONFIG_BCMA_DRIVER_GPIO is not set # CONFIG_BCMA_DEBUG is not set # @@ -2982,6 +3230,7 @@ CONFIG_BCMA_DRIVER_PCI=y # CONFIG_MFD_AS3711 is not set # CONFIG_MFD_AS3722 is not set # CONFIG_PMIC_ADP5520 is not set +# CONFIG_MFD_AAT2870_CORE is not set # CONFIG_MFD_ATMEL_FLEXCOM is not set # CONFIG_MFD_ATMEL_HLCDC is not set # CONFIG_MFD_BCM590XX is not set @@ -3000,6 +3249,7 @@ CONFIG_BCMA_DRIVER_PCI=y # CONFIG_MFD_MP2629 is not set # CONFIG_MFD_HI6421_PMIC is not set # CONFIG_HTC_PASIC3 is not set +# CONFIG_HTC_I2CPLD is not set # CONFIG_LPC_ICH is not set # CONFIG_LPC_SCH is not set # CONFIG_MFD_INTEL_PMT is not set @@ -3026,6 +3276,7 @@ CONFIG_BCMA_DRIVER_PCI=y # CONFIG_MFD_NTXEC is not set # CONFIG_MFD_RETU is not set # CONFIG_MFD_PCF50633 is not set +# CONFIG_UCB1400_CORE is not set # CONFIG_MFD_RDC321X is not set # CONFIG_MFD_RT4831 is not set # CONFIG_MFD_RT5033 is not set @@ -3044,6 +3295,7 @@ CONFIG_BCMA_DRIVER_PCI=y # CONFIG_MFD_TI_LMU is not set # CONFIG_MFD_PALMAS is not set # CONFIG_TPS6105X is not set +# CONFIG_TPS65010 is not set # CONFIG_TPS6507X is not set # CONFIG_MFD_TPS65086 is not set # CONFIG_MFD_TPS65090 is not set @@ -3052,6 +3304,7 @@ CONFIG_BCMA_DRIVER_PCI=y # CONFIG_MFD_TI_LP87565 is not set # CONFIG_MFD_TPS65218 is not set # CONFIG_MFD_TPS6586X is not set +# CONFIG_MFD_TPS65910 is not set # CONFIG_MFD_TPS65912_I2C is not set # CONFIG_MFD_TPS80031 is not set # CONFIG_TWL4030_CORE is not set @@ -3078,7 +3331,63 @@ CONFIG_BCMA_DRIVER_PCI=y # CONFIG_MFD_RSMU_I2C is not set # end of Multifunction device drivers -# CONFIG_REGULATOR is not set +CONFIG_REGULATOR=y +# CONFIG_REGULATOR_DEBUG is not set +# CONFIG_REGULATOR_FIXED_VOLTAGE is not set +# CONFIG_REGULATOR_VIRTUAL_CONSUMER is not set +# CONFIG_REGULATOR_USERSPACE_CONSUMER is not set +# CONFIG_REGULATOR_88PG86X is not set +# CONFIG_REGULATOR_ACT8865 is not set +# CONFIG_REGULATOR_AD5398 is not set +# CONFIG_REGULATOR_DA9121 is not set +# CONFIG_REGULATOR_DA9210 is not set +# CONFIG_REGULATOR_DA9211 is not set +# CONFIG_REGULATOR_FAN53555 is not set +# CONFIG_REGULATOR_FAN53880 is not set +# CONFIG_REGULATOR_GPIO is not set +# CONFIG_REGULATOR_ISL9305 is not set +# CONFIG_REGULATOR_ISL6271A is not set +# CONFIG_REGULATOR_LP3971 is not set +# CONFIG_REGULATOR_LP3972 is not set +# CONFIG_REGULATOR_LP872X is not set +# CONFIG_REGULATOR_LP8755 is not set +# CONFIG_REGULATOR_LTC3589 is not set +# CONFIG_REGULATOR_LTC3676 is not set +# CONFIG_REGULATOR_MAX1586 is not set +# CONFIG_REGULATOR_MAX8649 is not set +# CONFIG_REGULATOR_MAX8660 is not set +# CONFIG_REGULATOR_MAX8893 is not set +# CONFIG_REGULATOR_MAX8952 is not set +# CONFIG_REGULATOR_MAX77826 is not set +# CONFIG_REGULATOR_MCP16502 is not set +# CONFIG_REGULATOR_MP5416 is not set +# CONFIG_REGULATOR_MP8859 is not set +# CONFIG_REGULATOR_MP886X is not set +# CONFIG_REGULATOR_MPQ7920 is not set +# CONFIG_REGULATOR_MT6311 is not set +# CONFIG_REGULATOR_PCA9450 is not set +# CONFIG_REGULATOR_PF8X00 is not set +# CONFIG_REGULATOR_PFUZE100 is not set +# CONFIG_REGULATOR_PV88060 is not set +# CONFIG_REGULATOR_PV88080 is not set +# CONFIG_REGULATOR_PV88090 is not set +# CONFIG_REGULATOR_RASPBERRYPI_TOUCHSCREEN_ATTINY is not set +# CONFIG_REGULATOR_RT4801 is not set +# CONFIG_REGULATOR_RT6160 is not set +# CONFIG_REGULATOR_RT6245 is not set +# CONFIG_REGULATOR_RTQ2134 is not set +# CONFIG_REGULATOR_RTMV20 is not set +# CONFIG_REGULATOR_RTQ6752 is not set +# CONFIG_REGULATOR_SLG51000 is not set +# CONFIG_REGULATOR_SY8106A is not set +# CONFIG_REGULATOR_SY8824X is not set +# CONFIG_REGULATOR_SY8827N is not set +# CONFIG_REGULATOR_TPS51632 is not set +# CONFIG_REGULATOR_TPS62360 is not set +# CONFIG_REGULATOR_TPS65023 is not set +# CONFIG_REGULATOR_TPS6507X is not set +# CONFIG_REGULATOR_TPS65132 is not set +# CONFIG_REGULATOR_VCTRL is not set CONFIG_RC_CORE=y CONFIG_RC_MAP=y # CONFIG_LIRC is not set @@ -3095,8 +3404,10 @@ CONFIG_IR_XMP_DECODER=y # CONFIG_IR_IMON_DECODER is not set # CONFIG_IR_RCMM_DECODER is not set # CONFIG_RC_DEVICES is not set +CONFIG_CEC_CORE=m +# CONFIG_MEDIA_CEC_RC is not set # CONFIG_MEDIA_CEC_SUPPORT is not set -CONFIG_MEDIA_SUPPORT=m +CONFIG_MEDIA_SUPPORT=y # CONFIG_MEDIA_SUPPORT_FILTER is not set CONFIG_MEDIA_SUBDRV_AUTOSELECT=y @@ -3115,19 +3426,21 @@ CONFIG_MEDIA_TEST_SUPPORT=y # # Media core support # -CONFIG_VIDEO_DEV=m +CONFIG_VIDEO_DEV=y # CONFIG_MEDIA_CONTROLLER is not set -CONFIG_DVB_CORE=m +CONFIG_DVB_CORE=y # end of Media core support # # Video4Linux options # -CONFIG_VIDEO_V4L2=m +CONFIG_VIDEO_V4L2=y CONFIG_VIDEO_V4L2_I2C=y # CONFIG_VIDEO_ADV_DEBUG is not set # CONFIG_VIDEO_FIXED_MINOR_RANGES is not set CONFIG_VIDEO_TUNER=m +CONFIG_V4L2_FWNODE=m +CONFIG_V4L2_ASYNC=m CONFIG_VIDEOBUF_GEN=m CONFIG_VIDEOBUF_DMA_SG=m CONFIG_VIDEOBUF_VMALLOC=m @@ -3152,24 +3465,72 @@ CONFIG_MEDIA_USB_SUPPORT=y # # Webcam devices # -CONFIG_USB_VIDEO_CLASS=m +CONFIG_USB_VIDEO_CLASS=y CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV=y -# CONFIG_USB_GSPCA is not set -# CONFIG_USB_PWC is not set +CONFIG_USB_GSPCA=m +CONFIG_USB_M5602=m +CONFIG_USB_STV06XX=m +CONFIG_USB_GL860=m +CONFIG_USB_GSPCA_BENQ=m +CONFIG_USB_GSPCA_CONEX=m +CONFIG_USB_GSPCA_CPIA1=m +CONFIG_USB_GSPCA_DTCS033=m +CONFIG_USB_GSPCA_ETOMS=m +CONFIG_USB_GSPCA_FINEPIX=m +CONFIG_USB_GSPCA_JEILINJ=m +CONFIG_USB_GSPCA_JL2005BCD=m +CONFIG_USB_GSPCA_KINECT=m +CONFIG_USB_GSPCA_KONICA=m +CONFIG_USB_GSPCA_MARS=m +CONFIG_USB_GSPCA_MR97310A=m +CONFIG_USB_GSPCA_NW80X=m +CONFIG_USB_GSPCA_OV519=m +CONFIG_USB_GSPCA_OV534=m +CONFIG_USB_GSPCA_OV534_9=m +CONFIG_USB_GSPCA_PAC207=m +CONFIG_USB_GSPCA_PAC7302=m +CONFIG_USB_GSPCA_PAC7311=m +CONFIG_USB_GSPCA_SE401=m +CONFIG_USB_GSPCA_SN9C2028=m +CONFIG_USB_GSPCA_SN9C20X=m +CONFIG_USB_GSPCA_SONIXB=m +CONFIG_USB_GSPCA_SONIXJ=m +CONFIG_USB_GSPCA_SPCA500=m +CONFIG_USB_GSPCA_SPCA501=m +CONFIG_USB_GSPCA_SPCA505=m +CONFIG_USB_GSPCA_SPCA506=m +CONFIG_USB_GSPCA_SPCA508=m +CONFIG_USB_GSPCA_SPCA561=m +CONFIG_USB_GSPCA_SPCA1528=m +CONFIG_USB_GSPCA_SQ905=m +CONFIG_USB_GSPCA_SQ905C=m +CONFIG_USB_GSPCA_SQ930X=m +CONFIG_USB_GSPCA_STK014=m +CONFIG_USB_GSPCA_STK1135=m +CONFIG_USB_GSPCA_STV0680=m +CONFIG_USB_GSPCA_SUNPLUS=m +CONFIG_USB_GSPCA_T613=m +CONFIG_USB_GSPCA_TOPRO=m +CONFIG_USB_GSPCA_TOUPTEK=m +CONFIG_USB_GSPCA_TV8532=m +CONFIG_USB_GSPCA_VC032X=m +CONFIG_USB_GSPCA_VICAM=m +CONFIG_USB_GSPCA_XIRLINK_CIT=m +CONFIG_USB_GSPCA_ZC3XX=m +CONFIG_USB_PWC=m +# CONFIG_USB_PWC_DEBUG is not set +CONFIG_USB_PWC_INPUT_EVDEV=y # CONFIG_VIDEO_CPIA2 is not set -# CONFIG_USB_ZR364XX is not set -# CONFIG_USB_STKWEBCAM is not set -# CONFIG_USB_S2255 is not set -# CONFIG_VIDEO_USBTV is not set +CONFIG_USB_ZR364XX=m +CONFIG_USB_STKWEBCAM=m +CONFIG_USB_S2255=m +CONFIG_VIDEO_USBTV=m # # Analog TV USB devices # -CONFIG_VIDEO_PVRUSB2=m -CONFIG_VIDEO_PVRUSB2_SYSFS=y -CONFIG_VIDEO_PVRUSB2_DVB=y -# CONFIG_VIDEO_PVRUSB2_DEBUGIFC is not set -CONFIG_VIDEO_HDPVR=m +# CONFIG_VIDEO_PVRUSB2 is not set +# CONFIG_VIDEO_HDPVR is not set # CONFIG_VIDEO_STK1160_COMMON is not set # CONFIG_VIDEO_GO7007 is not set @@ -3177,10 +3538,7 @@ CONFIG_VIDEO_HDPVR=m # Analog/digital TV USB devices # # CONFIG_VIDEO_AU0828 is not set -CONFIG_VIDEO_CX231XX=m -CONFIG_VIDEO_CX231XX_RC=y -CONFIG_VIDEO_CX231XX_ALSA=m -CONFIG_VIDEO_CX231XX_DVB=m +# CONFIG_VIDEO_CX231XX is not set # CONFIG_VIDEO_TM6000 is not set # @@ -3197,7 +3555,11 @@ CONFIG_VIDEO_CX231XX_DVB=m # # Webcam, TV (analog/digital) USB devices # -# CONFIG_VIDEO_EM28XX is not set +CONFIG_VIDEO_EM28XX=m +CONFIG_VIDEO_EM28XX_V4L2=m +CONFIG_VIDEO_EM28XX_ALSA=m +# CONFIG_VIDEO_EM28XX_DVB is not set +CONFIG_VIDEO_EM28XX_RC=m # # Software defined radio USB devices @@ -3209,17 +3571,17 @@ CONFIG_MEDIA_PCI_SUPPORT=y # # Media capture support # -# CONFIG_VIDEO_SOLO6X10 is not set -# CONFIG_VIDEO_TW5864 is not set -# CONFIG_VIDEO_TW68 is not set -# CONFIG_VIDEO_TW686X is not set +CONFIG_VIDEO_SOLO6X10=m +CONFIG_VIDEO_TW5864=m +CONFIG_VIDEO_TW68=m +CONFIG_VIDEO_TW686X=m # # Media capture/analog TV support # CONFIG_VIDEO_IVTV=m CONFIG_VIDEO_IVTV_ALSA=m -# CONFIG_VIDEO_FB_IVTV is not set +CONFIG_VIDEO_FB_IVTV=m # CONFIG_VIDEO_HEXIUM_GEMINI is not set # CONFIG_VIDEO_HEXIUM_ORION is not set # CONFIG_VIDEO_MXB is not set @@ -3231,7 +3593,7 @@ CONFIG_VIDEO_IVTV_ALSA=m CONFIG_VIDEO_CX18=m CONFIG_VIDEO_CX18_ALSA=m CONFIG_VIDEO_CX23885=m -# CONFIG_MEDIA_ALTERA_CI is not set +CONFIG_MEDIA_ALTERA_CI=m CONFIG_VIDEO_CX25821=m CONFIG_VIDEO_CX25821_ALSA=m CONFIG_VIDEO_CX88=m @@ -3261,7 +3623,9 @@ CONFIG_DVB_BT8XX=m # CONFIG_DVB_SMIPCIE is not set CONFIG_RADIO_ADAPTERS=y CONFIG_RADIO_TEA575X=m -# CONFIG_RADIO_SI470X is not set +CONFIG_RADIO_SI470X=m +CONFIG_USB_SI470X=m +CONFIG_I2C_SI470X=m # CONFIG_RADIO_SI4713 is not set # CONFIG_USB_MR800 is not set # CONFIG_USB_DSBR is not set @@ -3275,30 +3639,44 @@ CONFIG_RADIO_TEA575X=m # CONFIG_RADIO_SAA7706H is not set # CONFIG_RADIO_TEF6862 is not set # CONFIG_RADIO_WL1273 is not set +CONFIG_MEDIA_COMMON_OPTIONS=y + +# +# common driver options +# CONFIG_VIDEO_CX2341X=m CONFIG_VIDEO_TVEEPROM=m -CONFIG_VIDEOBUF2_CORE=m -CONFIG_VIDEOBUF2_V4L2=m -CONFIG_VIDEOBUF2_MEMOPS=m -CONFIG_VIDEOBUF2_VMALLOC=m +CONFIG_VIDEOBUF2_CORE=y +CONFIG_VIDEOBUF2_V4L2=y +CONFIG_VIDEOBUF2_MEMOPS=y +CONFIG_VIDEOBUF2_DMA_CONTIG=m +CONFIG_VIDEOBUF2_VMALLOC=y CONFIG_VIDEOBUF2_DMA_SG=m CONFIG_VIDEOBUF2_DVB=m -# CONFIG_V4L_PLATFORM_DRIVERS is not set +CONFIG_SMS_SIANO_MDTV=m +CONFIG_SMS_SIANO_RC=y +CONFIG_V4L_PLATFORM_DRIVERS=y +# CONFIG_VIDEO_CAFE_CCIC is not set +# CONFIG_VIDEO_CADENCE is not set +# CONFIG_VIDEO_ASPEED is not set +# CONFIG_VIDEO_MUX is not set +# CONFIG_VIDEO_XILINX is not set # CONFIG_V4L_MEM2MEM_DRIVERS is not set -# CONFIG_DVB_PLATFORM_DRIVERS is not set -# CONFIG_SDR_PLATFORM_DRIVERS is not set +CONFIG_DVB_PLATFORM_DRIVERS=y +CONFIG_SDR_PLATFORM_DRIVERS=y # # MMC/SDIO DVB adapters # -# CONFIG_SMS_SDIO_DRV is not set +CONFIG_SMS_SDIO_DRV=m # CONFIG_V4L_TEST_DRIVERS is not set # CONFIG_DVB_TEST_DRIVERS is not set # # FireWire (IEEE 1394) Adapters # -# CONFIG_DVB_FIREDTV is not set +CONFIG_DVB_FIREDTV=m +CONFIG_DVB_FIREDTV_INPUT=y # end of Media drivers # @@ -3309,7 +3687,7 @@ CONFIG_MEDIA_ATTACH=y # # IR I2C driver auto-selected by 'Autoselect ancillary drivers' # -CONFIG_VIDEO_IR_I2C=m +CONFIG_VIDEO_IR_I2C=y # # Audio decoders, processors and mixers @@ -3340,25 +3718,28 @@ CONFIG_VIDEO_SAA6588=m # # Video decoders # +# CONFIG_VIDEO_ADV7180 is not set # CONFIG_VIDEO_ADV7183 is not set # CONFIG_VIDEO_ADV748X is not set +# CONFIG_VIDEO_ADV7604 is not set # CONFIG_VIDEO_ADV7842 is not set -# CONFIG_VIDEO_BT819 is not set -# CONFIG_VIDEO_BT856 is not set -# CONFIG_VIDEO_BT866 is not set -# CONFIG_VIDEO_KS0127 is not set +CONFIG_VIDEO_BT819=m +CONFIG_VIDEO_BT856=m +CONFIG_VIDEO_BT866=m +CONFIG_VIDEO_KS0127=m # CONFIG_VIDEO_ML86V7667 is not set -# CONFIG_VIDEO_SAA7110 is not set +CONFIG_VIDEO_SAA7110=m CONFIG_VIDEO_SAA711X=m # CONFIG_VIDEO_TC358743 is not set # CONFIG_VIDEO_TVP514X is not set -# CONFIG_VIDEO_TVP5150 is not set +CONFIG_VIDEO_TVP5150=m # CONFIG_VIDEO_TVP7002 is not set # CONFIG_VIDEO_TW2804 is not set # CONFIG_VIDEO_TW9903 is not set # CONFIG_VIDEO_TW9906 is not set # CONFIG_VIDEO_TW9910 is not set -# CONFIG_VIDEO_VPX3220 is not set +CONFIG_VIDEO_VPX3220=m +# CONFIG_VIDEO_MAX9286 is not set # # Video and audio decoders @@ -3371,12 +3752,11 @@ CONFIG_VIDEO_CX25840=m # Video encoders # CONFIG_VIDEO_SAA7127=m -# CONFIG_VIDEO_SAA7185 is not set -# CONFIG_VIDEO_ADV7170 is not set -# CONFIG_VIDEO_ADV7175 is not set +CONFIG_VIDEO_SAA7185=m +CONFIG_VIDEO_ADV7170=m +CONFIG_VIDEO_ADV7175=m # CONFIG_VIDEO_ADV7343 is not set # CONFIG_VIDEO_ADV7393 is not set -# CONFIG_VIDEO_ADV7511 is not set # CONFIG_VIDEO_AD9389B is not set # CONFIG_VIDEO_AK881X is not set # CONFIG_VIDEO_THS8200 is not set @@ -3414,16 +3794,22 @@ CONFIG_VIDEO_M52790=m # Camera sensor devices # # CONFIG_VIDEO_HI556 is not set +# CONFIG_VIDEO_IMX214 is not set # CONFIG_VIDEO_IMX219 is not set # CONFIG_VIDEO_IMX258 is not set # CONFIG_VIDEO_IMX274 is not set # CONFIG_VIDEO_IMX290 is not set # CONFIG_VIDEO_IMX319 is not set +# CONFIG_VIDEO_IMX334 is not set +# CONFIG_VIDEO_IMX335 is not set # CONFIG_VIDEO_IMX355 is not set +# CONFIG_VIDEO_IMX412 is not set # CONFIG_VIDEO_OV02A10 is not set -# CONFIG_VIDEO_OV2640 is not set +CONFIG_VIDEO_OV2640=m +# CONFIG_VIDEO_OV2659 is not set # CONFIG_VIDEO_OV2680 is not set # CONFIG_VIDEO_OV2685 is not set +# CONFIG_VIDEO_OV5640 is not set # CONFIG_VIDEO_OV5645 is not set # CONFIG_VIDEO_OV5647 is not set # CONFIG_VIDEO_OV5648 is not set @@ -3438,17 +3824,18 @@ CONFIG_VIDEO_M52790=m # CONFIG_VIDEO_OV7740 is not set # CONFIG_VIDEO_OV8856 is not set # CONFIG_VIDEO_OV8865 is not set +# CONFIG_VIDEO_OV9282 is not set # CONFIG_VIDEO_OV9640 is not set # CONFIG_VIDEO_OV9650 is not set # CONFIG_VIDEO_OV13858 is not set # CONFIG_VIDEO_VS6624 is not set # CONFIG_VIDEO_MT9M001 is not set # CONFIG_VIDEO_MT9M032 is not set -# CONFIG_VIDEO_MT9M111 is not set +CONFIG_VIDEO_MT9M111=m # CONFIG_VIDEO_MT9P031 is not set # CONFIG_VIDEO_MT9T001 is not set # CONFIG_VIDEO_MT9T112 is not set -# CONFIG_VIDEO_MT9V011 is not set +CONFIG_VIDEO_MT9V011=m # CONFIG_VIDEO_MT9V032 is not set # CONFIG_VIDEO_MT9V111 is not set # CONFIG_VIDEO_SR030PC30 is not set @@ -3461,12 +3848,14 @@ CONFIG_VIDEO_M52790=m # CONFIG_VIDEO_S5K6A3 is not set # CONFIG_VIDEO_S5K4ECGX is not set # CONFIG_VIDEO_S5K5BAF is not set +# CONFIG_VIDEO_CCS is not set # CONFIG_VIDEO_ET8EK8 is not set # end of Camera sensor devices # # Lens drivers # +# CONFIG_VIDEO_AD5820 is not set # CONFIG_VIDEO_AK7375 is not set # CONFIG_VIDEO_DW9714 is not set # CONFIG_VIDEO_DW9768 is not set @@ -3486,31 +3875,31 @@ CONFIG_VIDEO_M52790=m # # end of SPI helper chips -CONFIG_MEDIA_TUNER=m +CONFIG_MEDIA_TUNER=y # # Customize TV tuners # -CONFIG_MEDIA_TUNER_SIMPLE=m +CONFIG_MEDIA_TUNER_SIMPLE=y # CONFIG_MEDIA_TUNER_TDA18250 is not set -CONFIG_MEDIA_TUNER_TDA8290=m -CONFIG_MEDIA_TUNER_TDA827X=m -CONFIG_MEDIA_TUNER_TDA18271=m -CONFIG_MEDIA_TUNER_TDA9887=m -CONFIG_MEDIA_TUNER_TEA5761=m -CONFIG_MEDIA_TUNER_TEA5767=m -CONFIG_MEDIA_TUNER_MT20XX=m +CONFIG_MEDIA_TUNER_TDA8290=y +CONFIG_MEDIA_TUNER_TDA827X=y +CONFIG_MEDIA_TUNER_TDA18271=y +CONFIG_MEDIA_TUNER_TDA9887=y +CONFIG_MEDIA_TUNER_TEA5761=y +CONFIG_MEDIA_TUNER_TEA5767=y +CONFIG_MEDIA_TUNER_MT20XX=y # CONFIG_MEDIA_TUNER_MT2060 is not set CONFIG_MEDIA_TUNER_MT2063=m # CONFIG_MEDIA_TUNER_MT2266 is not set CONFIG_MEDIA_TUNER_MT2131=m # CONFIG_MEDIA_TUNER_QT1010 is not set -CONFIG_MEDIA_TUNER_XC2028=m -CONFIG_MEDIA_TUNER_XC5000=m -CONFIG_MEDIA_TUNER_XC4000=m +CONFIG_MEDIA_TUNER_XC2028=y +CONFIG_MEDIA_TUNER_XC5000=y +CONFIG_MEDIA_TUNER_XC4000=y CONFIG_MEDIA_TUNER_MXL5005S=m # CONFIG_MEDIA_TUNER_MXL5007T is not set -CONFIG_MEDIA_TUNER_MC44S803=m +CONFIG_MEDIA_TUNER_MC44S803=y # CONFIG_MEDIA_TUNER_MAX2165 is not set # CONFIG_MEDIA_TUNER_TDA18218 is not set # CONFIG_MEDIA_TUNER_FC0011 is not set @@ -3523,7 +3912,7 @@ CONFIG_MEDIA_TUNER_M88RS6000T=m # CONFIG_MEDIA_TUNER_TUA9001 is not set CONFIG_MEDIA_TUNER_SI2157=m # CONFIG_MEDIA_TUNER_IT913X is not set -CONFIG_MEDIA_TUNER_R820T=m +# CONFIG_MEDIA_TUNER_R820T is not set # CONFIG_MEDIA_TUNER_MXL301RF is not set # CONFIG_MEDIA_TUNER_QM1D1C0042 is not set # CONFIG_MEDIA_TUNER_QM1D1B0004 is not set @@ -3549,10 +3938,10 @@ CONFIG_DVB_M88DS3103=m # Multistandard (cable + terrestrial) frontends # CONFIG_DVB_DRXK=m -CONFIG_DVB_TDA18271C2DD=m +# CONFIG_DVB_TDA18271C2DD is not set CONFIG_DVB_SI2165=m # CONFIG_DVB_MN88472 is not set -CONFIG_DVB_MN88473=m +# CONFIG_DVB_MN88473 is not set # # DVB-S (satellite) frontends @@ -3631,8 +4020,8 @@ CONFIG_DVB_OR51211=m CONFIG_DVB_OR51132=m # CONFIG_DVB_BCM3510 is not set CONFIG_DVB_LGDT330X=m -CONFIG_DVB_LGDT3305=m -CONFIG_DVB_LGDT3306A=m +# CONFIG_DVB_LGDT3305 is not set +# CONFIG_DVB_LGDT3306A is not set # CONFIG_DVB_LG2160 is not set CONFIG_DVB_S5H1409=m # CONFIG_DVB_AU8522_DTV is not set @@ -3700,8 +4089,10 @@ CONFIG_DVB_A8293=m # Graphics support # CONFIG_VGA_ARB=y -CONFIG_VGA_ARB_MAX_GPUS=16 +CONFIG_VGA_ARB_MAX_GPUS=4 CONFIG_DRM=m +CONFIG_DRM_MIPI_DSI=y +CONFIG_DRM_DP_AUX_BUS=m # CONFIG_DRM_DP_AUX_CHARDEV is not set # CONFIG_DRM_DEBUG_SELFTEST is not set CONFIG_DRM_KMS_HELPER=m @@ -3714,6 +4105,8 @@ CONFIG_DRM_FBDEV_OVERALLOC=100 CONFIG_DRM_TTM=m CONFIG_DRM_VRAM_HELPER=m CONFIG_DRM_TTM_HELPER=m +CONFIG_DRM_GEM_CMA_HELPER=y +CONFIG_DRM_KMS_CMA_HELPER=y CONFIG_DRM_GEM_SHMEM_HELPER=y CONFIG_DRM_SCHED=m @@ -3722,21 +4115,22 @@ CONFIG_DRM_SCHED=m # CONFIG_DRM_I2C_CH7006=m CONFIG_DRM_I2C_SIL164=m -# CONFIG_DRM_I2C_NXP_TDA998X is not set +CONFIG_DRM_I2C_NXP_TDA998X=m # CONFIG_DRM_I2C_NXP_TDA9950 is not set # end of I2C encoder or helper chips # # ARM devices # +# CONFIG_DRM_KOMEDA is not set # end of ARM devices CONFIG_DRM_RADEON=m -CONFIG_DRM_RADEON_USERPTR=y +# CONFIG_DRM_RADEON_USERPTR is not set CONFIG_DRM_AMDGPU=m CONFIG_DRM_AMDGPU_SI=y CONFIG_DRM_AMDGPU_CIK=y -CONFIG_DRM_AMDGPU_USERPTR=y +# CONFIG_DRM_AMDGPU_USERPTR is not set # # ACP (Audio CoProcessor) Configuration @@ -3759,11 +4153,11 @@ CONFIG_NOUVEAU_DEBUG_DEFAULT=3 # CONFIG_NOUVEAU_DEBUG_MMU is not set # CONFIG_NOUVEAU_DEBUG_PUSH is not set CONFIG_DRM_NOUVEAU_BACKLIGHT=y -# CONFIG_DRM_VGEM is not set +CONFIG_DRM_VGEM=m # CONFIG_DRM_VKMS is not set # CONFIG_DRM_UDL is not set CONFIG_DRM_AST=m -CONFIG_DRM_MGAG200=m +# CONFIG_DRM_MGAG200 is not set # CONFIG_DRM_RCAR_DW_HDMI is not set # CONFIG_DRM_RCAR_LVDS is not set CONFIG_DRM_QXL=m @@ -3773,14 +4167,52 @@ CONFIG_DRM_PANEL=y # # Display Panels # -# CONFIG_DRM_PANEL_LVDS is not set -# CONFIG_DRM_PANEL_SIMPLE is not set +# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set +# CONFIG_DRM_PANEL_BOE_HIMAX8279D is not set +# CONFIG_DRM_PANEL_BOE_TV101WUM_NL6 is not set +# CONFIG_DRM_PANEL_DSI_CM is not set +CONFIG_DRM_PANEL_LVDS=m +CONFIG_DRM_PANEL_SIMPLE=m +# CONFIG_DRM_PANEL_ELIDA_KD35T133 is not set +# CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02 is not set +# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set +# CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set +CONFIG_DRM_PANEL_INNOLUX_P079ZCA=m +CONFIG_DRM_PANEL_JDI_LT070ME05000=m +# CONFIG_DRM_PANEL_KHADAS_TS050 is not set +# CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04 is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK050H3146W is not set +# CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT35510 is not set +# CONFIG_DRM_PANEL_NOVATEK_NT36672A is not set +# CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set # CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set +# CONFIG_DRM_PANEL_ORISETECH_OTM8009A is not set +# CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS is not set +CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00=m +# CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM67191 is not set +# CONFIG_DRM_PANEL_RAYDIUM_RM68200 is not set +# CONFIG_DRM_PANEL_RONBO_RB070D30 is not set # CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20 is not set +# CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set +CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2=m +# CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E63M0 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01 is not set -# CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0 is not set +CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0=m +# CONFIG_DRM_PANEL_SAMSUNG_SOFEF00 is not set # CONFIG_DRM_PANEL_SEIKO_43WVF1G is not set +CONFIG_DRM_PANEL_SHARP_LQ101R1SX01=m +# CONFIG_DRM_PANEL_SHARP_LS037V7DW01 is not set +CONFIG_DRM_PANEL_SHARP_LS043T1LE01=m +# CONFIG_DRM_PANEL_SITRONIX_ST7701 is not set +# CONFIG_DRM_PANEL_SITRONIX_ST7703 is not set +# CONFIG_DRM_PANEL_SONY_ACX424AKP is not set +# CONFIG_DRM_PANEL_TDO_TL070WSH30 is not set +# CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA is not set +# CONFIG_DRM_PANEL_VISIONOX_RM69299 is not set +# CONFIG_DRM_PANEL_XINPENG_XPP055C272 is not set # end of Display Panels CONFIG_DRM_BRIDGE=y @@ -3798,39 +4230,48 @@ CONFIG_DRM_PANEL_BRIDGE=y # CONFIG_DRM_LONTIUM_LT9611UXC is not set # CONFIG_DRM_ITE_IT66121 is not set # CONFIG_DRM_LVDS_CODEC is not set -# CONFIG_DRM_MEGACHIPS_STDPXXXX_GE_B850V3_FW is not set -# CONFIG_DRM_NXP_PTN3460 is not set -# CONFIG_DRM_PARADE_PS8622 is not set +CONFIG_DRM_MEGACHIPS_STDPXXXX_GE_B850V3_FW=m +# CONFIG_DRM_NWL_MIPI_DSI is not set +CONFIG_DRM_NXP_PTN3460=m +CONFIG_DRM_PARADE_PS8622=m # CONFIG_DRM_PARADE_PS8640 is not set -# CONFIG_DRM_SIL_SII8620 is not set -# CONFIG_DRM_SII902X is not set +CONFIG_DRM_SIL_SII8620=m +CONFIG_DRM_SII902X=m # CONFIG_DRM_SII9234 is not set # CONFIG_DRM_SIMPLE_BRIDGE is not set # CONFIG_DRM_THINE_THC63LVD1024 is not set # CONFIG_DRM_TOSHIBA_TC358762 is not set # CONFIG_DRM_TOSHIBA_TC358764 is not set -# CONFIG_DRM_TOSHIBA_TC358767 is not set +CONFIG_DRM_TOSHIBA_TC358767=m # CONFIG_DRM_TOSHIBA_TC358768 is not set # CONFIG_DRM_TOSHIBA_TC358775 is not set -# CONFIG_DRM_TI_TFP410 is not set +CONFIG_DRM_TI_TFP410=m # CONFIG_DRM_TI_SN65DSI83 is not set # CONFIG_DRM_TI_SN65DSI86 is not set # CONFIG_DRM_TI_TPD12S015 is not set # CONFIG_DRM_ANALOGIX_ANX6345 is not set -# CONFIG_DRM_ANALOGIX_ANX78XX is not set +CONFIG_DRM_ANALOGIX_ANX78XX=m +CONFIG_DRM_ANALOGIX_DP=m # CONFIG_DRM_ANALOGIX_ANX7625 is not set -# CONFIG_DRM_I2C_ADV7511 is not set +CONFIG_DRM_I2C_ADV7511=m +CONFIG_DRM_I2C_ADV7511_CEC=y # CONFIG_DRM_CDNS_MHDP8546 is not set # end of Display Interface Bridges # CONFIG_DRM_ETNAVIV is not set -# CONFIG_DRM_ARCPGU is not set +# CONFIG_DRM_MXSFB is not set +CONFIG_DRM_ARCPGU=m CONFIG_DRM_BOCHS=m CONFIG_DRM_CIRRUS_QEMU=m # CONFIG_DRM_GM12U320 is not set CONFIG_DRM_SIMPLEDRM=m # CONFIG_DRM_GUD is not set -# CONFIG_DRM_LEGACY is not set +CONFIG_DRM_LEGACY=y +CONFIG_DRM_TDFX=m +CONFIG_DRM_R128=m +CONFIG_DRM_MGA=m +CONFIG_DRM_VIA=m +CONFIG_DRM_SAVAGE=m CONFIG_DRM_PANEL_ORIENTATION_QUIRKS=m # @@ -3839,7 +4280,7 @@ CONFIG_DRM_PANEL_ORIENTATION_QUIRKS=m CONFIG_FB_CMDLINE=y CONFIG_FB_NOTIFY=y CONFIG_FB=y -CONFIG_FIRMWARE_EDID=y +# CONFIG_FIRMWARE_EDID is not set CONFIG_FB_DDC=m CONFIG_FB_CFB_FILLRECT=y CONFIG_FB_CFB_COPYAREA=y @@ -3853,6 +4294,7 @@ CONFIG_FB_BOTH_ENDIAN=y # CONFIG_FB_LITTLE_ENDIAN is not set CONFIG_FB_SYS_FOPS=m CONFIG_FB_DEFERRED_IO=y +CONFIG_FB_SVGALIB=m CONFIG_FB_BACKLIGHT=m CONFIG_FB_MODE_HELPERS=y CONFIG_FB_TILEBLITTING=y @@ -3861,29 +4303,39 @@ CONFIG_FB_TILEBLITTING=y # Frame buffer hardware drivers # # CONFIG_FB_GRVGA is not set -# CONFIG_FB_CIRRUS is not set +CONFIG_FB_CIRRUS=m # CONFIG_FB_PM2 is not set # CONFIG_FB_ASILIANT is not set -# CONFIG_FB_IMSTT is not set -# CONFIG_FB_UVESA is not set +CONFIG_FB_IMSTT=y +CONFIG_FB_UVESA=m CONFIG_FB_SBUS=y -CONFIG_FB_BW2=y -CONFIG_FB_CG3=y -CONFIG_FB_CG6=y +# CONFIG_FB_BW2 is not set +# CONFIG_FB_CG3 is not set +# CONFIG_FB_CG6 is not set CONFIG_FB_FFB=y -CONFIG_FB_TCX=y -CONFIG_FB_CG14=y -CONFIG_FB_P9100=y -CONFIG_FB_LEO=y +# CONFIG_FB_TCX is not set +# CONFIG_FB_CG14 is not set +# CONFIG_FB_P9100 is not set +# CONFIG_FB_LEO is not set CONFIG_FB_XVR500=y CONFIG_FB_XVR2500=y CONFIG_FB_XVR1000=y # CONFIG_FB_OPENCORES is not set # CONFIG_FB_S1D13XXX is not set -# CONFIG_FB_NVIDIA is not set -# CONFIG_FB_RIVA is not set +CONFIG_FB_NVIDIA=m +CONFIG_FB_NVIDIA_I2C=y +# CONFIG_FB_NVIDIA_DEBUG is not set +CONFIG_FB_NVIDIA_BACKLIGHT=y +CONFIG_FB_RIVA=m +# CONFIG_FB_RIVA_I2C is not set +# CONFIG_FB_RIVA_DEBUG is not set +CONFIG_FB_RIVA_BACKLIGHT=y # CONFIG_FB_I740 is not set -# CONFIG_FB_MATROX is not set +CONFIG_FB_MATROX=m +CONFIG_FB_MATROX_MILLENIUM=y +CONFIG_FB_MATROX_MYSTIQUE=y +CONFIG_FB_MATROX_G=y +# CONFIG_FB_MATROX_I2C is not set CONFIG_FB_RADEON=m CONFIG_FB_RADEON_I2C=y CONFIG_FB_RADEON_BACKLIGHT=y @@ -3895,12 +4347,15 @@ CONFIG_FB_ATY_CT=y # CONFIG_FB_ATY_GENERIC_LCD is not set CONFIG_FB_ATY_GX=y CONFIG_FB_ATY_BACKLIGHT=y -# CONFIG_FB_S3 is not set +CONFIG_FB_S3=m +CONFIG_FB_S3_DDC=y # CONFIG_FB_SAVAGE is not set # CONFIG_FB_SIS is not set # CONFIG_FB_NEOMAGIC is not set # CONFIG_FB_KYRO is not set -# CONFIG_FB_3DFX is not set +CONFIG_FB_3DFX=m +# CONFIG_FB_3DFX_ACCEL is not set +CONFIG_FB_3DFX_I2C=y # CONFIG_FB_VOODOO1 is not set # CONFIG_FB_VT8623 is not set # CONFIG_FB_TRIDENT is not set @@ -3914,6 +4369,7 @@ CONFIG_FB_ATY_BACKLIGHT=y # CONFIG_FB_METRONOME is not set # CONFIG_FB_MB862XX is not set CONFIG_FB_SIMPLE=m +# CONFIG_FB_SSD1307 is not set # CONFIG_FB_SM712 is not set # end of Frame buffer Devices @@ -3923,16 +4379,20 @@ CONFIG_FB_SIMPLE=m CONFIG_LCD_CLASS_DEVICE=m # CONFIG_LCD_PLATFORM is not set CONFIG_BACKLIGHT_CLASS_DEVICE=y +# CONFIG_BACKLIGHT_KTD253 is not set # CONFIG_BACKLIGHT_QCOM_WLED is not set -# CONFIG_BACKLIGHT_ADP8860 is not set -# CONFIG_BACKLIGHT_ADP8870 is not set -# CONFIG_BACKLIGHT_LM3639 is not set -# CONFIG_BACKLIGHT_LV5207LP is not set -# CONFIG_BACKLIGHT_BD6107 is not set -# CONFIG_BACKLIGHT_ARCXCNN is not set +CONFIG_BACKLIGHT_ADP8860=m +CONFIG_BACKLIGHT_ADP8870=m +CONFIG_BACKLIGHT_LM3639=m +CONFIG_BACKLIGHT_GPIO=m +CONFIG_BACKLIGHT_LV5207LP=m +CONFIG_BACKLIGHT_BD6107=m +CONFIG_BACKLIGHT_ARCXCNN=m # CONFIG_BACKLIGHT_LED is not set # end of Backlight & LCD device support +CONFIG_VGASTATE=m +CONFIG_VIDEOMODE_HELPERS=y CONFIG_HDMI=y # @@ -3950,15 +4410,15 @@ CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y CONFIG_LOGO=y CONFIG_LOGO_LINUX_MONO=y -CONFIG_LOGO_LINUX_VGA16=y +# CONFIG_LOGO_LINUX_VGA16 is not set CONFIG_LOGO_LINUX_CLUT224=y CONFIG_LOGO_SUN_CLUT224=y # end of Graphics support -CONFIG_SOUND=m +CONFIG_SOUND=y CONFIG_SOUND_OSS_CORE=y CONFIG_SOUND_OSS_CORE_PRECLAIM=y -CONFIG_SND=m +CONFIG_SND=y CONFIG_SND_TIMER=m CONFIG_SND_PCM=m CONFIG_SND_HWDEP=m @@ -3982,38 +4442,39 @@ CONFIG_SND_VERBOSE_PROCFS=y CONFIG_SND_VMASTER=y CONFIG_SND_CTL_LED=m CONFIG_SND_SEQUENCER=m -# CONFIG_SND_SEQ_DUMMY is not set +CONFIG_SND_SEQ_DUMMY=m CONFIG_SND_SEQUENCER_OSS=m CONFIG_SND_SEQ_MIDI_EVENT=m CONFIG_SND_SEQ_MIDI=m CONFIG_SND_SEQ_MIDI_EMUL=m +CONFIG_SND_SEQ_VIRMIDI=m CONFIG_SND_MPU401_UART=m CONFIG_SND_OPL3_LIB=m CONFIG_SND_OPL3_LIB_SEQ=m CONFIG_SND_VX_LIB=m CONFIG_SND_AC97_CODEC=m CONFIG_SND_DRIVERS=y -# CONFIG_SND_DUMMY is not set +CONFIG_SND_DUMMY=m # CONFIG_SND_ALOOP is not set -# CONFIG_SND_VIRMIDI is not set -# CONFIG_SND_MTPAV is not set +CONFIG_SND_VIRMIDI=m +CONFIG_SND_MTPAV=m +CONFIG_SND_MTS64=m # CONFIG_SND_SERIAL_U16550 is not set CONFIG_SND_MPU401=m +CONFIG_SND_PORTMAN2X4=m # CONFIG_SND_AC97_POWER_SAVE is not set CONFIG_SND_PCI=y -CONFIG_SND_AD1889=m -CONFIG_SND_ATIIXP=m -CONFIG_SND_ATIIXP_MODEM=m -CONFIG_SND_AU8810=m -CONFIG_SND_AU8820=m -CONFIG_SND_AU8830=m -CONFIG_SND_AW2=m -CONFIG_SND_BT87X=m -# CONFIG_SND_BT87X_OVERCLOCK is not set +# CONFIG_SND_AD1889 is not set +# CONFIG_SND_ATIIXP is not set +# CONFIG_SND_ATIIXP_MODEM is not set +# CONFIG_SND_AU8810 is not set +# CONFIG_SND_AU8820 is not set +# CONFIG_SND_AU8830 is not set +# CONFIG_SND_AW2 is not set +# CONFIG_SND_BT87X is not set CONFIG_SND_CA0106=m -CONFIG_SND_CMIPCI=m -CONFIG_SND_OXYGEN_LIB=m -CONFIG_SND_OXYGEN=m +# CONFIG_SND_CMIPCI is not set +# CONFIG_SND_OXYGEN is not set CONFIG_SND_CS4281=m CONFIG_SND_CS46XX=m CONFIG_SND_CS46XX_NEW_DSP=y @@ -4035,25 +4496,26 @@ CONFIG_SND_INDIGODJX=m CONFIG_SND_ENS1370=m CONFIG_SND_ENS1371=m CONFIG_SND_FM801=m -CONFIG_SND_FM801_TEA575X_BOOL=y -CONFIG_SND_HDSP=m -CONFIG_SND_HDSPM=m -CONFIG_SND_ICE1724=m -CONFIG_SND_INTEL8X0=m -CONFIG_SND_INTEL8X0M=m +# CONFIG_SND_FM801_TEA575X_BOOL is not set +# CONFIG_SND_HDSP is not set +# CONFIG_SND_HDSPM is not set +# CONFIG_SND_ICE1724 is not set +# CONFIG_SND_INTEL8X0 is not set +# CONFIG_SND_INTEL8X0M is not set CONFIG_SND_KORG1212=m CONFIG_SND_LOLA=m CONFIG_SND_LX6464ES=m -CONFIG_SND_MIXART=m -CONFIG_SND_NM256=m -CONFIG_SND_PCXHR=m -CONFIG_SND_RIPTIDE=m -CONFIG_SND_RME32=m -CONFIG_SND_RME96=m -CONFIG_SND_RME9652=m -CONFIG_SND_VIA82XX=m -CONFIG_SND_VIA82XX_MODEM=m -CONFIG_SND_VIRTUOSO=m +# CONFIG_SND_MIXART is not set +# CONFIG_SND_NM256 is not set +# CONFIG_SND_PCXHR is not set +# CONFIG_SND_RIPTIDE is not set +# CONFIG_SND_RME32 is not set +# CONFIG_SND_RME96 is not set +# CONFIG_SND_RME9652 is not set +# CONFIG_SND_SE6X is not set +# CONFIG_SND_VIA82XX is not set +# CONFIG_SND_VIA82XX_MODEM is not set +# CONFIG_SND_VIRTUOSO is not set CONFIG_SND_VX222=m CONFIG_SND_YMFPCI=m @@ -4065,8 +4527,7 @@ CONFIG_SND_HDA_GENERIC_LEDS=y CONFIG_SND_HDA_INTEL=m # CONFIG_SND_HDA_HWDEP is not set # CONFIG_SND_HDA_RECONFIG is not set -CONFIG_SND_HDA_INPUT_BEEP=y -CONFIG_SND_HDA_INPUT_BEEP_MODE=1 +# CONFIG_SND_HDA_INPUT_BEEP is not set # CONFIG_SND_HDA_PATCH_LOADER is not set CONFIG_SND_HDA_CODEC_REALTEK=m CONFIG_SND_HDA_CODEC_ANALOG=m @@ -4078,7 +4539,7 @@ CONFIG_SND_HDA_CODEC_CS8409=m CONFIG_SND_HDA_CODEC_CONEXANT=m CONFIG_SND_HDA_CODEC_CA0110=m CONFIG_SND_HDA_CODEC_CA0132=m -# CONFIG_SND_HDA_CODEC_CA0132_DSP is not set +CONFIG_SND_HDA_CODEC_CA0132_DSP=y CONFIG_SND_HDA_CODEC_CMEDIA=m CONFIG_SND_HDA_CODEC_SI3054=m CONFIG_SND_HDA_GENERIC=m @@ -4087,6 +4548,7 @@ CONFIG_SND_HDA_POWER_SAVE_DEFAULT=0 # end of HD-Audio CONFIG_SND_HDA_CORE=m +CONFIG_SND_HDA_DSP_LOADER=y CONFIG_SND_HDA_COMPONENT=y CONFIG_SND_HDA_PREALLOC_SIZE=64 CONFIG_SND_INTEL_DSP_CONFIG=m @@ -4102,24 +4564,25 @@ CONFIG_SND_USB_AUDIO=m # CONFIG_SND_USB_TONEPORT is not set # CONFIG_SND_USB_VARIAX is not set CONFIG_SND_FIREWIRE=y -# CONFIG_SND_DICE is not set -# CONFIG_SND_OXFW is not set -# CONFIG_SND_ISIGHT is not set -# CONFIG_SND_FIREWORKS is not set -# CONFIG_SND_BEBOB is not set -# CONFIG_SND_FIREWIRE_DIGI00X is not set -# CONFIG_SND_FIREWIRE_TASCAM is not set -# CONFIG_SND_FIREWIRE_MOTU is not set -# CONFIG_SND_FIREFACE is not set +CONFIG_SND_FIREWIRE_LIB=m +CONFIG_SND_DICE=m +CONFIG_SND_OXFW=m +CONFIG_SND_ISIGHT=m +CONFIG_SND_FIREWORKS=m +CONFIG_SND_BEBOB=m +CONFIG_SND_FIREWIRE_DIGI00X=m +CONFIG_SND_FIREWIRE_TASCAM=m +CONFIG_SND_FIREWIRE_MOTU=m +CONFIG_SND_FIREFACE=m CONFIG_SND_PCMCIA=y -# CONFIG_SND_VXPOCKET is not set -# CONFIG_SND_PDAUDIOCF is not set +CONFIG_SND_VXPOCKET=m +CONFIG_SND_PDAUDIOCF=m CONFIG_SND_SPARC=y CONFIG_SND_SUN_AMD7930=m CONFIG_SND_SUN_CS4231=m CONFIG_SND_SUN_DBRI=m # CONFIG_SND_SOC is not set -# CONFIG_SND_VIRTIO is not set +CONFIG_SND_VIRTIO=m CONFIG_AC97_BUS=m # @@ -4134,31 +4597,31 @@ CONFIG_HID_GENERIC=y # # Special HID drivers # -CONFIG_HID_A4TECH=m +# CONFIG_HID_A4TECH is not set # CONFIG_HID_ACCUTOUCH is not set # CONFIG_HID_ACRUX is not set -# CONFIG_HID_APPLE is not set -# CONFIG_HID_APPLEIR is not set +CONFIG_HID_APPLE=y +CONFIG_HID_APPLEIR=y # CONFIG_HID_ASUS is not set # CONFIG_HID_AUREAL is not set -CONFIG_HID_BELKIN=m +# CONFIG_HID_BELKIN is not set # CONFIG_HID_BETOP_FF is not set # CONFIG_HID_BIGBEN_FF is not set -CONFIG_HID_CHERRY=m -CONFIG_HID_CHICONY=m +# CONFIG_HID_CHERRY is not set +# CONFIG_HID_CHICONY is not set # CONFIG_HID_CORSAIR is not set # CONFIG_HID_COUGAR is not set # CONFIG_HID_MACALLY is not set # CONFIG_HID_PRODIKEYS is not set # CONFIG_HID_CMEDIA is not set # CONFIG_HID_CREATIVE_SB0540 is not set -CONFIG_HID_CYPRESS=m +# CONFIG_HID_CYPRESS is not set # CONFIG_HID_DRAGONRISE is not set # CONFIG_HID_EMS_FF is not set # CONFIG_HID_ELAN is not set # CONFIG_HID_ELECOM is not set # CONFIG_HID_ELO is not set -CONFIG_HID_EZKEY=m +# CONFIG_HID_EZKEY is not set # CONFIG_HID_GEMBIRD is not set # CONFIG_HID_GFRM is not set # CONFIG_HID_GLORIOUS is not set @@ -4170,35 +4633,29 @@ CONFIG_HID_EZKEY=m # CONFIG_HID_UCLOGIC is not set # CONFIG_HID_WALTOP is not set # CONFIG_HID_VIEWSONIC is not set -CONFIG_HID_GYRATION=m +# CONFIG_HID_GYRATION is not set # CONFIG_HID_ICADE is not set -CONFIG_HID_ITE=m +# CONFIG_HID_ITE is not set # CONFIG_HID_JABRA is not set # CONFIG_HID_TWINHAN is not set -CONFIG_HID_KENSINGTON=m +# CONFIG_HID_KENSINGTON is not set # CONFIG_HID_LCPOWER is not set # CONFIG_HID_LED is not set # CONFIG_HID_LENOVO is not set -CONFIG_HID_LOGITECH=m -# CONFIG_HID_LOGITECH_HIDPP is not set -CONFIG_LOGITECH_FF=y -# CONFIG_LOGIRUMBLEPAD2_FF is not set -# CONFIG_LOGIG940_FF is not set -CONFIG_LOGIWHEELS_FF=y -CONFIG_HID_MAGICMOUSE=m +# CONFIG_HID_LOGITECH is not set +CONFIG_HID_MAGICMOUSE=y # CONFIG_HID_MALTRON is not set # CONFIG_HID_MAYFLASH is not set # CONFIG_HID_REDRAGON is not set -CONFIG_HID_MICROSOFT=m -CONFIG_HID_MONTEREY=m +# CONFIG_HID_MICROSOFT is not set +# CONFIG_HID_MONTEREY is not set # CONFIG_HID_MULTITOUCH is not set # CONFIG_HID_NTI is not set # CONFIG_HID_NTRIG is not set # CONFIG_HID_ORTEK is not set -CONFIG_HID_PANTHERLORD=m -# CONFIG_PANTHERLORD_FF is not set +# CONFIG_HID_PANTHERLORD is not set # CONFIG_HID_PENMOUNT is not set -CONFIG_HID_PETALYNX=m +# CONFIG_HID_PETALYNX is not set # CONFIG_HID_PICOLCD is not set # CONFIG_HID_PLANTRONICS is not set # CONFIG_HID_PLAYSTATION is not set @@ -4206,14 +4663,13 @@ CONFIG_HID_PETALYNX=m # CONFIG_HID_RETRODE is not set # CONFIG_HID_ROCCAT is not set # CONFIG_HID_SAITEK is not set -CONFIG_HID_SAMSUNG=m +# CONFIG_HID_SAMSUNG is not set # CONFIG_HID_SEMITEK is not set -CONFIG_HID_SONY=m -CONFIG_SONY_FF=y +# CONFIG_HID_SONY is not set # CONFIG_HID_SPEEDLINK is not set # CONFIG_HID_STEAM is not set # CONFIG_HID_STEELSERIES is not set -CONFIG_HID_SUNPLUS=m +# CONFIG_HID_SUNPLUS is not set # CONFIG_HID_RMI is not set # CONFIG_HID_GREENASIA is not set # CONFIG_HID_SMARTJOYPLUS is not set @@ -4222,21 +4678,23 @@ CONFIG_HID_SUNPLUS=m # CONFIG_HID_THINGM is not set # CONFIG_HID_THRUSTMASTER is not set # CONFIG_HID_UDRAW_PS3 is not set -# CONFIG_HID_WACOM is not set +# CONFIG_HID_U2FZERO is not set +CONFIG_HID_WACOM=m # CONFIG_HID_WIIMOTE is not set # CONFIG_HID_XINMO is not set # CONFIG_HID_ZEROPLUS is not set # CONFIG_HID_ZYDACRON is not set # CONFIG_HID_SENSOR_HUB is not set # CONFIG_HID_ALPS is not set +# CONFIG_HID_MCP2221 is not set # end of Special HID drivers # # USB HID support # CONFIG_USB_HID=y -CONFIG_HID_PID=y -CONFIG_USB_HIDDEV=y +# CONFIG_HID_PID is not set +# CONFIG_USB_HIDDEV is not set # end of USB HID support # @@ -4252,6 +4710,7 @@ CONFIG_USB_SUPPORT=y CONFIG_USB_COMMON=y # CONFIG_USB_LED_TRIG is not set # CONFIG_USB_ULPI_BUS is not set +# CONFIG_USB_CONN_GPIO is not set CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB=y CONFIG_USB_PCI=y @@ -4262,25 +4721,21 @@ CONFIG_USB_PCI=y # CONFIG_USB_DEFAULT_PERSIST=y # CONFIG_USB_FEW_INIT_RETRIES is not set -# CONFIG_USB_DYNAMIC_MINORS is not set +CONFIG_USB_DYNAMIC_MINORS=y # CONFIG_USB_OTG is not set # CONFIG_USB_OTG_PRODUCTLIST is not set # CONFIG_USB_OTG_DISABLE_EXTERNAL_HUB is not set # CONFIG_USB_LEDS_TRIGGER_USBPORT is not set CONFIG_USB_AUTOSUSPEND_DELAY=2 -CONFIG_USB_MON=y +CONFIG_USB_MON=m # # USB Host Controller Drivers # # CONFIG_USB_C67X00_HCD is not set -CONFIG_USB_XHCI_HCD=y -# CONFIG_USB_XHCI_DBGCAP is not set -CONFIG_USB_XHCI_PCI=y -# CONFIG_USB_XHCI_PCI_RENESAS is not set -CONFIG_USB_XHCI_PLATFORM=m +# CONFIG_USB_XHCI_HCD is not set CONFIG_USB_EHCI_HCD=y -# CONFIG_USB_EHCI_ROOT_HUB_TT is not set +CONFIG_USB_EHCI_ROOT_HUB_TT=y CONFIG_USB_EHCI_TT_NEWSCHED=y CONFIG_USB_EHCI_PCI=y # CONFIG_USB_EHCI_FSL is not set @@ -4315,26 +4770,27 @@ CONFIG_USB_PRINTER=m # CONFIG_USB_STORAGE=y # CONFIG_USB_STORAGE_DEBUG is not set -# CONFIG_USB_STORAGE_REALTEK is not set +CONFIG_USB_STORAGE_REALTEK=m +CONFIG_REALTEK_AUTOPM=y CONFIG_USB_STORAGE_DATAFAB=m CONFIG_USB_STORAGE_FREECOM=m CONFIG_USB_STORAGE_ISD200=m -# CONFIG_USB_STORAGE_USBAT is not set +CONFIG_USB_STORAGE_USBAT=m CONFIG_USB_STORAGE_SDDR09=m CONFIG_USB_STORAGE_SDDR55=m CONFIG_USB_STORAGE_JUMPSHOT=m -# CONFIG_USB_STORAGE_ALAUDA is not set -# CONFIG_USB_STORAGE_ONETOUCH is not set -# CONFIG_USB_STORAGE_KARMA is not set -# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set -# CONFIG_USB_STORAGE_ENE_UB6250 is not set +CONFIG_USB_STORAGE_ALAUDA=m +CONFIG_USB_STORAGE_ONETOUCH=m +CONFIG_USB_STORAGE_KARMA=m +CONFIG_USB_STORAGE_CYPRESS_ATACB=m +CONFIG_USB_STORAGE_ENE_UB6250=m CONFIG_USB_UAS=m # # USB Imaging devices # -# CONFIG_USB_MDC800 is not set -# CONFIG_USB_MICROTEK is not set +CONFIG_USB_MDC800=m +CONFIG_USB_MICROTEK=m # CONFIG_USBIP_CORE is not set # CONFIG_USB_CDNS_SUPPORT is not set # CONFIG_USB_MUSB_HDRC is not set @@ -4346,6 +4802,7 @@ CONFIG_USB_UAS=m # # USB port drivers # +# CONFIG_USB_USS720 is not set CONFIG_USB_SERIAL=m CONFIG_USB_SERIAL_GENERIC=y CONFIG_USB_SERIAL_SIMPLE=m @@ -4376,6 +4833,7 @@ CONFIG_USB_SERIAL_KOBIL_SCT=m CONFIG_USB_SERIAL_MCT_U232=m CONFIG_USB_SERIAL_METRO=m CONFIG_USB_SERIAL_MOS7720=m +# CONFIG_USB_SERIAL_MOS7715_PARPORT is not set CONFIG_USB_SERIAL_MOS7840=m CONFIG_USB_SERIAL_MXUPORT=m CONFIG_USB_SERIAL_NAVMAN=m @@ -4415,7 +4873,7 @@ CONFIG_USB_SERIAL_DEBUG=m # CONFIG_USB_CYTHERM is not set # CONFIG_USB_IDMOUSE is not set # CONFIG_USB_FTDI_ELAN is not set -# CONFIG_USB_APPLEDISPLAY is not set +CONFIG_USB_APPLEDISPLAY=m # CONFIG_APPLE_MFI_FASTCHARGE is not set # CONFIG_USB_SISUSBVGA is not set # CONFIG_USB_LD is not set @@ -4423,18 +4881,20 @@ CONFIG_USB_SERIAL_DEBUG=m # CONFIG_USB_IOWARRIOR is not set # CONFIG_USB_TEST is not set # CONFIG_USB_EHSET_TEST_FIXTURE is not set -# CONFIG_USB_ISIGHTFW is not set +CONFIG_USB_ISIGHTFW=m # CONFIG_USB_YUREX is not set CONFIG_USB_EZUSB_FX2=m # CONFIG_USB_HUB_USB251XB is not set # CONFIG_USB_HSIC_USB3503 is not set # CONFIG_USB_HSIC_USB4604 is not set # CONFIG_USB_LINK_LAYER_TEST is not set +# CONFIG_USB_CHAOSKEY is not set # # USB Physical Layer drivers # # CONFIG_NOP_USB_XCEIV is not set +# CONFIG_USB_GPIO_VBUS is not set # CONFIG_USB_ISP1301 is not set # end of USB Physical Layer drivers @@ -4443,6 +4903,7 @@ CONFIG_USB_EZUSB_FX2=m # CONFIG_USB_ROLE_SWITCH is not set CONFIG_MMC=m CONFIG_PWRSEQ_EMMC=m +# CONFIG_PWRSEQ_SD8787 is not set CONFIG_PWRSEQ_SIMPLE=m CONFIG_MMC_BLOCK=m CONFIG_MMC_BLOCK_MINORS=8 @@ -4456,40 +4917,30 @@ CONFIG_MMC_BLOCK_MINORS=8 CONFIG_MMC_SDHCI=m # CONFIG_MMC_SDHCI_PCI is not set CONFIG_MMC_SDHCI_PLTFM=m -# CONFIG_MMC_SDHCI_CADENCE is not set -# CONFIG_MMC_SDHCI_F_SDH30 is not set +# CONFIG_MMC_SDHCI_OF_ARASAN is not set +# CONFIG_MMC_SDHCI_OF_AT91 is not set +# CONFIG_MMC_SDHCI_OF_DWCMSHC is not set +CONFIG_MMC_SDHCI_CADENCE=m +CONFIG_MMC_SDHCI_F_SDH30=m # CONFIG_MMC_SDHCI_MILBEAUT is not set -# CONFIG_MMC_TIFM_SD is not set -# CONFIG_MMC_SDRICOH_CS is not set -# CONFIG_MMC_CB710 is not set -# CONFIG_MMC_VIA_SDMMC is not set -# CONFIG_MMC_VUB300 is not set -# CONFIG_MMC_USHC is not set -# CONFIG_MMC_USDHI6ROL0 is not set +CONFIG_MMC_TIFM_SD=m +CONFIG_MMC_SDRICOH_CS=m +CONFIG_MMC_CB710=m +CONFIG_MMC_VIA_SDMMC=m +CONFIG_MMC_VUB300=m +CONFIG_MMC_USHC=m +CONFIG_MMC_USDHI6ROL0=m # CONFIG_MMC_CQHCI is not set # CONFIG_MMC_HSQ is not set -# CONFIG_MMC_TOSHIBA_PCI is not set +CONFIG_MMC_TOSHIBA_PCI=m +# CONFIG_MMC_MTK is not set # CONFIG_MMC_SDHCI_XENON is not set # CONFIG_MMC_SDHCI_OMAP is not set -CONFIG_MEMSTICK=m -# CONFIG_MEMSTICK_DEBUG is not set - -# -# MemoryStick drivers -# -# CONFIG_MEMSTICK_UNSAFE_RESUME is not set -CONFIG_MSPRO_BLOCK=m -CONFIG_MS_BLOCK=m - -# -# MemoryStick Host Controller Drivers -# -CONFIG_MEMSTICK_TIFM_MS=m -CONFIG_MEMSTICK_JMICRON_38X=m -CONFIG_MEMSTICK_R592=m +# CONFIG_MMC_SDHCI_AM654 is not set +# CONFIG_MEMSTICK is not set CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y -# CONFIG_LEDS_CLASS_FLASH is not set +CONFIG_LEDS_CLASS_FLASH=m # CONFIG_LEDS_CLASS_MULTICOLOR is not set # CONFIG_LEDS_BRIGHTNESS_HW_CHANGED is not set @@ -4504,15 +4955,19 @@ CONFIG_LEDS_CLASS=y # CONFIG_LEDS_LM3532 is not set # CONFIG_LEDS_LM3642 is not set # CONFIG_LEDS_LM3692X is not set -# CONFIG_LEDS_SUNFIRE is not set +CONFIG_LEDS_SUNFIRE=m # CONFIG_LEDS_PCA9532 is not set +# CONFIG_LEDS_GPIO is not set # CONFIG_LEDS_LP3944 is not set +# CONFIG_LEDS_LP3952 is not set # CONFIG_LEDS_LP50XX is not set # CONFIG_LEDS_LP55XX_COMMON is not set # CONFIG_LEDS_LP8860 is not set # CONFIG_LEDS_PCA955X is not set # CONFIG_LEDS_PCA963X is not set +# CONFIG_LEDS_REGULATOR is not set # CONFIG_LEDS_BD2802 is not set +# CONFIG_LEDS_LT3593 is not set # CONFIG_LEDS_TCA6507 is not set # CONFIG_LEDS_TLC591XX is not set # CONFIG_LEDS_LM355x is not set @@ -4524,32 +4979,39 @@ CONFIG_LEDS_CLASS=y # # CONFIG_LEDS_BLINKM is not set # CONFIG_LEDS_MLXREG is not set -# CONFIG_LEDS_USER is not set +CONFIG_LEDS_USER=m # CONFIG_LEDS_TI_LMU_COMMON is not set # # Flash and Torch LED drivers # +# CONFIG_LEDS_AS3645A is not set +# CONFIG_LEDS_KTD2692 is not set +# CONFIG_LEDS_LM3601X is not set +# CONFIG_LEDS_RT4505 is not set +# CONFIG_LEDS_RT8515 is not set +# CONFIG_LEDS_SGM3140 is not set # # LED Triggers # CONFIG_LEDS_TRIGGERS=y -# CONFIG_LEDS_TRIGGER_TIMER is not set +CONFIG_LEDS_TRIGGER_TIMER=m # CONFIG_LEDS_TRIGGER_ONESHOT is not set -# CONFIG_LEDS_TRIGGER_DISK is not set +CONFIG_LEDS_TRIGGER_DISK=y # CONFIG_LEDS_TRIGGER_MTD is not set -# CONFIG_LEDS_TRIGGER_HEARTBEAT is not set +CONFIG_LEDS_TRIGGER_HEARTBEAT=m # CONFIG_LEDS_TRIGGER_BACKLIGHT is not set # CONFIG_LEDS_TRIGGER_CPU is not set # CONFIG_LEDS_TRIGGER_ACTIVITY is not set -# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set +CONFIG_LEDS_TRIGGER_GPIO=m +CONFIG_LEDS_TRIGGER_DEFAULT_ON=y # # iptables trigger is under Netfilter config (LED target) # -# CONFIG_LEDS_TRIGGER_TRANSIENT is not set -# CONFIG_LEDS_TRIGGER_CAMERA is not set +CONFIG_LEDS_TRIGGER_TRANSIENT=m +CONFIG_LEDS_TRIGGER_CAMERA=m # CONFIG_LEDS_TRIGGER_PANIC is not set # CONFIG_LEDS_TRIGGER_NETDEV is not set # CONFIG_LEDS_TRIGGER_PATTERN is not set @@ -4672,22 +5134,34 @@ CONFIG_SYNC_FILE=y # end of DMABUF options # CONFIG_AUXDISPLAY is not set -# CONFIG_UIO is not set +# CONFIG_PANEL is not set +CONFIG_UIO=m +# CONFIG_UIO_CIF is not set +# CONFIG_UIO_PDRV_GENIRQ is not set +# CONFIG_UIO_DMEM_GENIRQ is not set +# CONFIG_UIO_AEC is not set +# CONFIG_UIO_SERCOS3 is not set +# CONFIG_UIO_PCI_GENERIC is not set +# CONFIG_UIO_NETX is not set +# CONFIG_UIO_PRUSS is not set +# CONFIG_UIO_MF624 is not set # CONFIG_VFIO is not set -CONFIG_VIRT_DRIVERS=y +# CONFIG_VIRT_DRIVERS is not set CONFIG_VIRTIO=y CONFIG_VIRTIO_PCI_LIB=y CONFIG_VIRTIO_MENU=y CONFIG_VIRTIO_PCI=y -# CONFIG_VIRTIO_PCI_LEGACY is not set +CONFIG_VIRTIO_PCI_LEGACY=y CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=m CONFIG_VIRTIO_MMIO=y # CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES is not set CONFIG_VIRTIO_DMA_SHARED_BUFFER=m # CONFIG_VDPA is not set +CONFIG_VHOST_IOTLB=m +CONFIG_VHOST=m CONFIG_VHOST_MENU=y -# CONFIG_VHOST_NET is not set +CONFIG_VHOST_NET=m # CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set # @@ -4699,7 +5173,30 @@ CONFIG_VHOST_MENU=y # CONFIG_COMEDI is not set # CONFIG_STAGING is not set # CONFIG_GOLDFISH is not set -# CONFIG_COMMON_CLK is not set +CONFIG_HAVE_CLK=y +CONFIG_HAVE_CLK_PREPARE=y +CONFIG_COMMON_CLK=y + +# +# Clock driver for ARM Reference designs +# +# CONFIG_ICST is not set +# CONFIG_CLK_SP810 is not set +# end of Clock driver for ARM Reference designs + +# CONFIG_COMMON_CLK_MAX9485 is not set +# CONFIG_COMMON_CLK_SI5341 is not set +# CONFIG_COMMON_CLK_SI5351 is not set +# CONFIG_COMMON_CLK_SI514 is not set +# CONFIG_COMMON_CLK_SI544 is not set +# CONFIG_COMMON_CLK_SI570 is not set +# CONFIG_COMMON_CLK_CDCE706 is not set +# CONFIG_COMMON_CLK_CDCE925 is not set +# CONFIG_COMMON_CLK_CS2000_CP is not set +# CONFIG_COMMON_CLK_AXI_CLKGEN is not set +# CONFIG_COMMON_CLK_VC5 is not set +# CONFIG_COMMON_CLK_FIXED_MMIO is not set +# CONFIG_XILINX_VCU is not set # CONFIG_HWSPINLOCK is not set # @@ -4781,22 +5278,19 @@ CONFIG_QCOM_QMI_HELPERS=m # end of Xilinx SoC drivers # end of SOC (System On Chip) specific Drivers -CONFIG_PM_DEVFREQ=y - -# -# DEVFREQ Governors -# -CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=m -# CONFIG_DEVFREQ_GOV_PERFORMANCE is not set -# CONFIG_DEVFREQ_GOV_POWERSAVE is not set -# CONFIG_DEVFREQ_GOV_USERSPACE is not set -# CONFIG_DEVFREQ_GOV_PASSIVE is not set +# CONFIG_PM_DEVFREQ is not set +CONFIG_EXTCON=m # -# DEVFREQ Drivers +# Extcon Device Drivers # -# CONFIG_PM_DEVFREQ_EVENT is not set -# CONFIG_EXTCON is not set +# CONFIG_EXTCON_FSA9480 is not set +# CONFIG_EXTCON_GPIO is not set +# CONFIG_EXTCON_MAX3355 is not set +# CONFIG_EXTCON_PTN5150 is not set +# CONFIG_EXTCON_RT8973A is not set +# CONFIG_EXTCON_SM5502 is not set +# CONFIG_EXTCON_USB_GPIO is not set # CONFIG_MEMORY is not set # CONFIG_IIO is not set # CONFIG_NTB is not set @@ -4815,15 +5309,17 @@ CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=m # # PHY Subsystem # -# CONFIG_GENERIC_PHY is not set +CONFIG_GENERIC_PHY=y # CONFIG_PHY_CAN_TRANSCEIVER is not set # CONFIG_BCM_KONA_USB2_PHY is not set +# CONFIG_PHY_CADENCE_TORRENT is not set # CONFIG_PHY_CADENCE_DPHY is not set # CONFIG_PHY_CADENCE_SALVO is not set # CONFIG_PHY_FSL_IMX8MQ_USB is not set # CONFIG_PHY_MIXEL_MIPI_DPHY is not set # CONFIG_PHY_PXA_28NM_HSIC is not set # CONFIG_PHY_PXA_28NM_USB2 is not set +# CONFIG_PHY_MAPPHONE_MDM6600 is not set # end of PHY Subsystem # CONFIG_POWERCAP is not set @@ -4845,7 +5341,6 @@ CONFIG_RAS=y # CONFIG_LIBNVDIMM is not set CONFIG_DAX=y -# CONFIG_DEV_DAX is not set CONFIG_NVMEM=y CONFIG_NVMEM_SYSFS=y # CONFIG_NVMEM_RMEM is not set @@ -4859,11 +5354,21 @@ CONFIG_NVMEM_SYSFS=y # CONFIG_FPGA is not set # CONFIG_FSI is not set -CONFIG_PM_OPP=y +CONFIG_MULTIPLEXER=m + +# +# Multiplexer drivers +# +# CONFIG_MUX_ADG792A is not set +# CONFIG_MUX_GPIO is not set +# CONFIG_MUX_MMIO is not set +# end of Multiplexer drivers + # CONFIG_SIOX is not set # CONFIG_SLIMBUS is not set # CONFIG_INTERCONNECT is not set # CONFIG_COUNTER is not set +# CONFIG_MOST is not set # end of Device Drivers # @@ -4889,7 +5394,7 @@ CONFIG_REISERFS_FS_POSIX_ACL=y CONFIG_REISERFS_FS_SECURITY=y CONFIG_JFS_FS=y CONFIG_JFS_POSIX_ACL=y -# CONFIG_JFS_SECURITY is not set +CONFIG_JFS_SECURITY=y # CONFIG_JFS_DEBUG is not set # CONFIG_JFS_STATISTICS is not set CONFIG_XFS_FS=y @@ -4901,6 +5406,11 @@ CONFIG_XFS_RT=y # CONFIG_XFS_WARN is not set # CONFIG_XFS_DEBUG is not set CONFIG_GFS2_FS=m +CONFIG_OCFS2_FS=m +CONFIG_OCFS2_FS_O2CB=m +# CONFIG_OCFS2_FS_STATS is not set +# CONFIG_OCFS2_DEBUG_MASKLOG is not set +# CONFIG_OCFS2_DEBUG_FS is not set CONFIG_BTRFS_FS=m CONFIG_BTRFS_FS_POSIX_ACL=y # CONFIG_BTRFS_FS_CHECK_INTEGRITY is not set @@ -4913,7 +5423,7 @@ CONFIG_F2FS_FS=m CONFIG_F2FS_STAT_FS=y CONFIG_F2FS_FS_XATTR=y CONFIG_F2FS_FS_POSIX_ACL=y -# CONFIG_F2FS_FS_SECURITY is not set +CONFIG_F2FS_FS_SECURITY=y # CONFIG_F2FS_CHECK_FS is not set # CONFIG_F2FS_FAULT_INJECTION is not set # CONFIG_F2FS_FS_COMPRESSION is not set @@ -4927,9 +5437,15 @@ CONFIG_FILE_LOCKING=y CONFIG_FSNOTIFY=y CONFIG_DNOTIFY=y CONFIG_INOTIFY_USER=y -# CONFIG_FANOTIFY is not set -# CONFIG_QUOTA is not set -# CONFIG_QUOTA_NETLINK_INTERFACE is not set +CONFIG_FANOTIFY=y +# CONFIG_FANOTIFY_ACCESS_PERMISSIONS is not set +CONFIG_QUOTA=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +# CONFIG_PRINT_QUOTA_WARNING is not set +# CONFIG_QUOTA_DEBUG is not set +CONFIG_QUOTA_TREE=m +# CONFIG_QFMT_V1 is not set +# CONFIG_QFMT_V2 is not set CONFIG_QUOTACTL=y # CONFIG_AUTOFS4_FS is not set # CONFIG_AUTOFS_FS is not set @@ -4966,7 +5482,7 @@ CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=y CONFIG_FAT_DEFAULT_CODEPAGE=437 CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" -# CONFIG_FAT_DEFAULT_UTF8 is not set +CONFIG_FAT_DEFAULT_UTF8=y CONFIG_EXFAT_FS=m CONFIG_EXFAT_DEFAULT_IOCHARSET="utf8" CONFIG_NTFS_FS=m @@ -4992,11 +5508,10 @@ CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_TMPFS_XATTR=y # CONFIG_TMPFS_INODE64 is not set -CONFIG_HUGETLBFS=y -CONFIG_HUGETLB_PAGE=y +# CONFIG_HUGETLBFS is not set CONFIG_MEMFD_CREATE=y CONFIG_ARCH_HAS_GIGANTIC_PAGE=y -# CONFIG_CONFIGFS_FS is not set +CONFIG_CONFIGFS_FS=m # end of Pseudo filesystems CONFIG_MISC_FILESYSTEMS=y @@ -5006,21 +5521,20 @@ CONFIG_AFFS_FS=m # CONFIG_ECRYPT_FS is not set CONFIG_HFS_FS=m CONFIG_HFSPLUS_FS=m -# CONFIG_BEFS_FS is not set +CONFIG_BEFS_FS=m +# CONFIG_BEFS_DEBUG is not set # CONFIG_BFS_FS is not set # CONFIG_EFS_FS is not set # CONFIG_JFFS2_FS is not set # CONFIG_UBIFS_FS is not set -CONFIG_CRAMFS=m -CONFIG_CRAMFS_BLOCKDEV=y -# CONFIG_CRAMFS_MTD is not set +# CONFIG_CRAMFS is not set CONFIG_SQUASHFS=y CONFIG_SQUASHFS_FILE_CACHE=y # CONFIG_SQUASHFS_FILE_DIRECT is not set CONFIG_SQUASHFS_DECOMP_SINGLE=y # CONFIG_SQUASHFS_DECOMP_MULTI is not set # CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU is not set -# CONFIG_SQUASHFS_XATTR is not set +CONFIG_SQUASHFS_XATTR=y CONFIG_SQUASHFS_ZLIB=y CONFIG_SQUASHFS_LZ4=y CONFIG_SQUASHFS_LZO=y @@ -5030,26 +5544,33 @@ CONFIG_SQUASHFS_XZ=y # CONFIG_SQUASHFS_EMBEDDED is not set CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3 # CONFIG_VXFS_FS is not set -# CONFIG_MINIX_FS is not set +CONFIG_MINIX_FS=m +CONFIG_MINIX_FS_NATIVE_ENDIAN=y # CONFIG_OMFS_FS is not set -# CONFIG_HPFS_FS is not set +CONFIG_HPFS_FS=m # CONFIG_QNX4FS_FS is not set # CONFIG_QNX6FS_FS is not set # CONFIG_ROMFS_FS is not set # CONFIG_PSTORE is not set -# CONFIG_SYSV_FS is not set +CONFIG_SYSV_FS=m CONFIG_UFS_FS=m # CONFIG_UFS_FS_WRITE is not set # CONFIG_UFS_DEBUG is not set # CONFIG_EROFS_FS is not set CONFIG_NETWORK_FILESYSTEMS=y -CONFIG_NFS_FS=y +CONFIG_NFS_FS=m CONFIG_NFS_V2=m -CONFIG_NFS_V3=y +CONFIG_NFS_V3=m CONFIG_NFS_V3_ACL=y -CONFIG_NFS_V4=y +CONFIG_NFS_V4=m # CONFIG_NFS_SWAP is not set -# CONFIG_NFS_V4_1 is not set +CONFIG_NFS_V4_1=y +# CONFIG_NFS_V4_2 is not set +CONFIG_PNFS_FILE_LAYOUT=m +CONFIG_PNFS_BLOCK=m +CONFIG_PNFS_FLEXFILE_LAYOUT=m +CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN="kernel.org" +# CONFIG_NFS_V4_1_MIGRATION is not set # CONFIG_NFS_USE_LEGACY_DNS is not set CONFIG_NFS_USE_KERNEL_DNS=y CONFIG_NFS_DISABLE_UDP_SUPPORT=y @@ -5062,20 +5583,22 @@ CONFIG_NFSD_V4=y # CONFIG_NFSD_SCSILAYOUT is not set # CONFIG_NFSD_FLEXFILELAYOUT is not set # CONFIG_NFSD_V4_SECURITY_LABEL is not set -CONFIG_GRACE_PERIOD=y -CONFIG_LOCKD=y +CONFIG_GRACE_PERIOD=m +CONFIG_LOCKD=m CONFIG_LOCKD_V4=y -CONFIG_NFS_ACL_SUPPORT=y +CONFIG_NFS_ACL_SUPPORT=m CONFIG_NFS_COMMON=y -CONFIG_SUNRPC=y -CONFIG_SUNRPC_GSS=y +CONFIG_SUNRPC=m +CONFIG_SUNRPC_GSS=m +CONFIG_SUNRPC_BACKCHANNEL=y # CONFIG_SUNRPC_DEBUG is not set # CONFIG_CEPH_FS is not set CONFIG_CIFS=m # CONFIG_CIFS_STATS2 is not set CONFIG_CIFS_ALLOW_INSECURE_LEGACY=y # CONFIG_CIFS_UPCALL is not set -# CONFIG_CIFS_XATTR is not set +CONFIG_CIFS_XATTR=y +# CONFIG_CIFS_POSIX is not set CONFIG_CIFS_DEBUG=y # CONFIG_CIFS_DEBUG2 is not set # CONFIG_CIFS_DEBUG_DUMP_KEYS is not set @@ -5091,54 +5614,55 @@ CONFIG_9P_FS_SECURITY=y CONFIG_NLS=y CONFIG_NLS_DEFAULT="iso8859-1" CONFIG_NLS_CODEPAGE_437=y -# CONFIG_NLS_CODEPAGE_737 is not set -# CONFIG_NLS_CODEPAGE_775 is not set -# CONFIG_NLS_CODEPAGE_850 is not set -# CONFIG_NLS_CODEPAGE_852 is not set -# CONFIG_NLS_CODEPAGE_855 is not set -# CONFIG_NLS_CODEPAGE_857 is not set -# CONFIG_NLS_CODEPAGE_860 is not set -# CONFIG_NLS_CODEPAGE_861 is not set -# CONFIG_NLS_CODEPAGE_862 is not set -# CONFIG_NLS_CODEPAGE_863 is not set -# CONFIG_NLS_CODEPAGE_864 is not set -# CONFIG_NLS_CODEPAGE_865 is not set -# CONFIG_NLS_CODEPAGE_866 is not set -# CONFIG_NLS_CODEPAGE_869 is not set -# CONFIG_NLS_CODEPAGE_936 is not set -# CONFIG_NLS_CODEPAGE_950 is not set -# CONFIG_NLS_CODEPAGE_932 is not set -# CONFIG_NLS_CODEPAGE_949 is not set -# CONFIG_NLS_CODEPAGE_874 is not set -# CONFIG_NLS_ISO8859_8 is not set -CONFIG_NLS_CODEPAGE_1250=y -CONFIG_NLS_CODEPAGE_1251=y -CONFIG_NLS_ASCII=y +CONFIG_NLS_CODEPAGE_737=m +CONFIG_NLS_CODEPAGE_775=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_CODEPAGE_852=m +CONFIG_NLS_CODEPAGE_855=m +CONFIG_NLS_CODEPAGE_857=m +CONFIG_NLS_CODEPAGE_860=m +CONFIG_NLS_CODEPAGE_861=m +CONFIG_NLS_CODEPAGE_862=m +CONFIG_NLS_CODEPAGE_863=m +CONFIG_NLS_CODEPAGE_864=m +CONFIG_NLS_CODEPAGE_865=m +CONFIG_NLS_CODEPAGE_866=m +CONFIG_NLS_CODEPAGE_869=m +CONFIG_NLS_CODEPAGE_936=m +CONFIG_NLS_CODEPAGE_950=m +CONFIG_NLS_CODEPAGE_932=m +CONFIG_NLS_CODEPAGE_949=m +CONFIG_NLS_CODEPAGE_874=m +CONFIG_NLS_ISO8859_8=m +CONFIG_NLS_CODEPAGE_1250=m +CONFIG_NLS_CODEPAGE_1251=m +CONFIG_NLS_ASCII=m CONFIG_NLS_ISO8859_1=y -# CONFIG_NLS_ISO8859_2 is not set -# CONFIG_NLS_ISO8859_3 is not set -# CONFIG_NLS_ISO8859_4 is not set -# CONFIG_NLS_ISO8859_5 is not set -# CONFIG_NLS_ISO8859_6 is not set -# CONFIG_NLS_ISO8859_7 is not set -# CONFIG_NLS_ISO8859_9 is not set -# CONFIG_NLS_ISO8859_13 is not set -# CONFIG_NLS_ISO8859_14 is not set -CONFIG_NLS_ISO8859_15=y -# CONFIG_NLS_KOI8_R is not set -# CONFIG_NLS_KOI8_U is not set -# CONFIG_NLS_MAC_ROMAN is not set -# CONFIG_NLS_MAC_CELTIC is not set -# CONFIG_NLS_MAC_CENTEURO is not set -# CONFIG_NLS_MAC_CROATIAN is not set -# CONFIG_NLS_MAC_CYRILLIC is not set -# CONFIG_NLS_MAC_GAELIC is not set -# CONFIG_NLS_MAC_GREEK is not set -# CONFIG_NLS_MAC_ICELAND is not set -# CONFIG_NLS_MAC_INUIT is not set -# CONFIG_NLS_MAC_ROMANIAN is not set -# CONFIG_NLS_MAC_TURKISH is not set +CONFIG_NLS_ISO8859_2=m +CONFIG_NLS_ISO8859_3=m +CONFIG_NLS_ISO8859_4=m +CONFIG_NLS_ISO8859_5=m +CONFIG_NLS_ISO8859_6=m +CONFIG_NLS_ISO8859_7=m +CONFIG_NLS_ISO8859_9=m +CONFIG_NLS_ISO8859_13=m +CONFIG_NLS_ISO8859_14=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_KOI8_R=m +CONFIG_NLS_KOI8_U=m +CONFIG_NLS_MAC_ROMAN=m +CONFIG_NLS_MAC_CELTIC=m +CONFIG_NLS_MAC_CENTEURO=m +CONFIG_NLS_MAC_CROATIAN=m +CONFIG_NLS_MAC_CYRILLIC=m +CONFIG_NLS_MAC_GAELIC=m +CONFIG_NLS_MAC_GREEK=m +CONFIG_NLS_MAC_ICELAND=m +CONFIG_NLS_MAC_INUIT=m +CONFIG_NLS_MAC_ROMANIAN=m +CONFIG_NLS_MAC_TURKISH=m CONFIG_NLS_UTF8=y +# CONFIG_DLM is not set # CONFIG_UNICODE is not set CONFIG_IO_WQ=y # end of File systems @@ -5224,7 +5748,7 @@ CONFIG_CRYPTO_NULL2=y # CONFIG_CRYPTO_PCRYPT is not set # CONFIG_CRYPTO_CRYPTD is not set CONFIG_CRYPTO_AUTHENC=y -CONFIG_CRYPTO_TEST=m +# CONFIG_CRYPTO_TEST is not set # # Public-key cryptography @@ -5243,7 +5767,7 @@ CONFIG_CRYPTO_ECDH=m # CONFIG_CRYPTO_CCM=m CONFIG_CRYPTO_GCM=m -# CONFIG_CRYPTO_CHACHA20POLY1305 is not set +CONFIG_CRYPTO_CHACHA20POLY1305=m # CONFIG_CRYPTO_AEGIS128 is not set CONFIG_CRYPTO_SEQIV=m CONFIG_CRYPTO_ECHAINIV=m @@ -5258,7 +5782,7 @@ CONFIG_CRYPTO_CTR=m CONFIG_CRYPTO_ECB=m # CONFIG_CRYPTO_LRW is not set # CONFIG_CRYPTO_OFB is not set -CONFIG_CRYPTO_PCBC=m +# CONFIG_CRYPTO_PCBC is not set CONFIG_CRYPTO_XTS=m # CONFIG_CRYPTO_KEYWRAP is not set # CONFIG_CRYPTO_ADIANTUM is not set @@ -5280,10 +5804,9 @@ CONFIG_CRYPTO_CRC32C=y CONFIG_CRYPTO_CRC32=m CONFIG_CRYPTO_XXHASH=m CONFIG_CRYPTO_BLAKE2B=m -# CONFIG_CRYPTO_BLAKE2S is not set CONFIG_CRYPTO_CRCT10DIF=y CONFIG_CRYPTO_GHASH=m -# CONFIG_CRYPTO_POLY1305 is not set +CONFIG_CRYPTO_POLY1305=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MD5=y # CONFIG_CRYPTO_MD5_SPARC64 is not set @@ -5298,7 +5821,7 @@ CONFIG_CRYPTO_SHA512=y # CONFIG_CRYPTO_SHA3 is not set # CONFIG_CRYPTO_SM3 is not set # CONFIG_CRYPTO_STREEBOG is not set -CONFIG_CRYPTO_WP512=m +# CONFIG_CRYPTO_WP512 is not set # # Ciphers @@ -5306,26 +5829,23 @@ CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES=y # CONFIG_CRYPTO_AES_TI is not set # CONFIG_CRYPTO_AES_SPARC64 is not set -CONFIG_CRYPTO_ANUBIS=m +# CONFIG_CRYPTO_ANUBIS is not set CONFIG_CRYPTO_ARC4=m -CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_BLOWFISH_COMMON=m +# CONFIG_CRYPTO_BLOWFISH is not set # CONFIG_CRYPTO_CAMELLIA is not set # CONFIG_CRYPTO_CAMELLIA_SPARC64 is not set -CONFIG_CRYPTO_CAST_COMMON=m -CONFIG_CRYPTO_CAST5=m -CONFIG_CRYPTO_CAST6=m +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set CONFIG_CRYPTO_DES=m # CONFIG_CRYPTO_DES_SPARC64 is not set # CONFIG_CRYPTO_FCRYPT is not set -CONFIG_CRYPTO_KHAZAD=m -# CONFIG_CRYPTO_CHACHA20 is not set +# CONFIG_CRYPTO_KHAZAD is not set +CONFIG_CRYPTO_CHACHA20=m # CONFIG_CRYPTO_SEED is not set -CONFIG_CRYPTO_SERPENT=m +# CONFIG_CRYPTO_SERPENT is not set # CONFIG_CRYPTO_SM4 is not set -CONFIG_CRYPTO_TEA=m -CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_TWOFISH_COMMON=m +# CONFIG_CRYPTO_TEA is not set +# CONFIG_CRYPTO_TWOFISH is not set # # Compression @@ -5355,24 +5875,6 @@ CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_USER_API_ENABLE_OBSOLETE=y CONFIG_CRYPTO_HASH_INFO=y - -# -# Crypto library routines -# -CONFIG_CRYPTO_LIB_AES=y -CONFIG_CRYPTO_LIB_ARC4=m -CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC=m -CONFIG_CRYPTO_LIB_BLAKE2S=m -CONFIG_CRYPTO_LIB_CHACHA_GENERIC=m -CONFIG_CRYPTO_LIB_CHACHA=m -CONFIG_CRYPTO_LIB_CURVE25519_GENERIC=m -CONFIG_CRYPTO_LIB_CURVE25519=m -CONFIG_CRYPTO_LIB_DES=m -CONFIG_CRYPTO_LIB_POLY1305_RSIZE=1 -CONFIG_CRYPTO_LIB_POLY1305_GENERIC=m -CONFIG_CRYPTO_LIB_POLY1305=m -CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m -CONFIG_CRYPTO_LIB_SHA256=y # CONFIG_CRYPTO_HW is not set CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y @@ -5402,6 +5904,7 @@ CONFIG_BINARY_PRINTF=y # CONFIG_RAID6_PQ=m CONFIG_RAID6_PQ_BENCHMARK=y +CONFIG_LINEAR_RANGES=y # CONFIG_PACKING is not set CONFIG_BITREVERSE=y CONFIG_GENERIC_STRNCPY_FROM_USER=y @@ -5409,8 +5912,28 @@ CONFIG_GENERIC_STRNLEN_USER=y CONFIG_GENERIC_NET_UTILS=y CONFIG_CORDIC=m # CONFIG_PRIME_NUMBERS is not set -CONFIG_RATIONAL=m +CONFIG_RATIONAL=y CONFIG_GENERIC_PCI_IOMAP=y + +# +# Crypto library routines +# +CONFIG_CRYPTO_LIB_AES=y +CONFIG_CRYPTO_LIB_ARC4=m +CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC=y +CONFIG_CRYPTO_LIB_CHACHA_GENERIC=m +CONFIG_CRYPTO_LIB_CHACHA=m +CONFIG_CRYPTO_LIB_CURVE25519_GENERIC=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_DES=m +CONFIG_CRYPTO_LIB_POLY1305_RSIZE=1 +CONFIG_CRYPTO_LIB_POLY1305_GENERIC=m +CONFIG_CRYPTO_LIB_POLY1305=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m +CONFIG_CRYPTO_LIB_SHA256=y +# end of Crypto library routines + +CONFIG_LIB_MEMNEQ=y CONFIG_CRC_CCITT=m CONFIG_CRC16=y CONFIG_CRC_T10DIF=y @@ -5457,7 +5980,6 @@ CONFIG_TEXTSEARCH_KMP=m CONFIG_TEXTSEARCH_BM=m CONFIG_TEXTSEARCH_FSM=m CONFIG_INTERVAL_TREE=y -CONFIG_XARRAY_MULTI=y CONFIG_ASSOCIATIVE_ARRAY=y CONFIG_HAS_IOMEM=y CONFIG_HAS_IOPORT_MAP=y @@ -5479,24 +6001,27 @@ CONFIG_NLATTR=y CONFIG_CLZ_TAB=y CONFIG_IRQ_POLL=y CONFIG_MPILIB=y +CONFIG_DIMLIB=y CONFIG_OID_REGISTRY=y CONFIG_FONT_SUPPORT=y CONFIG_FONTS=y -CONFIG_FONT_8x8=y +# CONFIG_FONT_8x8 is not set CONFIG_FONT_8x16=y CONFIG_FONT_6x11=y # CONFIG_FONT_7x14 is not set # CONFIG_FONT_PEARL_8x8 is not set # CONFIG_FONT_ACORN_8x8 is not set # CONFIG_FONT_10x18 is not set -CONFIG_FONT_SUN8x16=y -CONFIG_FONT_SUN12x22=y +# CONFIG_FONT_SUN8x16 is not set +# CONFIG_FONT_SUN12x22 is not set # CONFIG_FONT_TER16x32 is not set # CONFIG_FONT_6x8 is not set CONFIG_SG_POOL=y CONFIG_SBITMAP=y # end of Library routines +CONFIG_PLDMFW=y + # # Kernel hacking # @@ -5510,24 +6035,25 @@ CONFIG_PRINTK_TIME=y CONFIG_CONSOLE_LOGLEVEL_DEFAULT=7 CONFIG_CONSOLE_LOGLEVEL_QUIET=4 CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4 -CONFIG_BOOT_PRINTK_DELAY=y +# CONFIG_BOOT_PRINTK_DELAY is not set # CONFIG_DYNAMIC_DEBUG is not set # CONFIG_DYNAMIC_DEBUG_CORE is not set CONFIG_SYMBOLIC_ERRNAME=y CONFIG_DEBUG_BUGVERBOSE=y # end of printk and dmesg options +CONFIG_AS_HAS_NON_CONST_LEB128=y + # # Compile-time checks and compiler options # # CONFIG_DEBUG_INFO is not set -CONFIG_FRAME_WARN=2048 +CONFIG_FRAME_WARN=1024 # CONFIG_STRIP_ASM_SYMS is not set # CONFIG_READABLE_ASM is not set # CONFIG_HEADERS_INSTALL is not set # CONFIG_DEBUG_SECTION_MISMATCH is not set CONFIG_SECTION_MISMATCH_WARN_ONLY=y -# CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B is not set # CONFIG_VMLINUX_MAP is not set # CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set # end of Compile-time checks and compiler options @@ -5579,7 +6105,10 @@ CONFIG_CC_HAS_WORKING_NOSANITIZE_ADDRESS=y CONFIG_PANIC_ON_OOPS_VALUE=0 CONFIG_PANIC_TIMEOUT=120 # CONFIG_SOFTLOCKUP_DETECTOR is not set -# CONFIG_DETECT_HUNG_TASK is not set +CONFIG_DETECT_HUNG_TASK=y +CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=120 +# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set +CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0 # CONFIG_WQ_WATCHDOG is not set # CONFIG_TEST_LOCKUP is not set # end of Debug Oops, Lockups and Hangs @@ -5639,7 +6168,7 @@ CONFIG_HAVE_DEBUG_BUGVERBOSE=y # CONFIG_RCU_TORTURE_TEST is not set # CONFIG_RCU_REF_SCALE_TEST is not set CONFIG_RCU_CPU_STALL_TIMEOUT=21 -CONFIG_RCU_TRACE=y +# CONFIG_RCU_TRACE is not set # CONFIG_RCU_EQS_DEBUG is not set # end of RCU Debugging diff --git a/system/easy-kernel/config-x86_64 b/system/easy-kernel/config-x86_64 index 1a300499fc..f0c42e7c6d 100644 --- a/system/easy-kernel/config-x86_64 +++ b/system/easy-kernel/config-x86_64 @@ -1,10 +1,10 @@ # # Automatically generated file; DO NOT EDIT. -# Linux/x86 5.15.28-mc1 Kernel Configuration +# Linux/x86_64 5.15.98-mc4 Kernel Configuration # -CONFIG_CC_VERSION_TEXT="gcc (Adelie 8.3.0) 8.3.0" +CONFIG_CC_VERSION_TEXT="gcc (Adelie 8.5.0) 8.5.0" CONFIG_CC_IS_GCC=y -CONFIG_GCC_VERSION=80300 +CONFIG_GCC_VERSION=80500 CONFIG_CLANG_VERSION=0 CONFIG_AS_IS_GNU=y CONFIG_AS_VERSION=23200 @@ -16,6 +16,7 @@ CONFIG_CC_CAN_LINK_STATIC=y CONFIG_CC_HAS_ASM_GOTO=y CONFIG_CC_HAS_ASM_INLINE=y CONFIG_CC_HAS_NO_PROFILE_FN_ATTR=y +CONFIG_PAHOLE_VERSION=0 CONFIG_IRQ_WORK=y CONFIG_BUILDTIME_TABLE_SORT=y CONFIG_THREAD_INFO_IN_TASK=y @@ -165,7 +166,6 @@ CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y # # Scheduler features # -# CONFIG_SCHED_ALT is not set # CONFIG_UCLAMP_TASK is not set # end of Scheduler features @@ -318,7 +318,6 @@ CONFIG_X86_64_SMP=y CONFIG_ARCH_SUPPORTS_UPROBES=y CONFIG_FIX_EARLYCON_MEM=y CONFIG_PGTABLE_LEVELS=4 -CONFIG_CC_HAS_SANE_STACKPROTECTOR=y # # Processor type and features @@ -328,7 +327,6 @@ CONFIG_X86_FEATURE_NAMES=y CONFIG_X86_X2APIC=y CONFIG_X86_MPPARSE=y # CONFIG_GOLDFISH is not set -CONFIG_RETPOLINE=y # CONFIG_X86_CPU_RESCTRL is not set CONFIG_X86_EXTENDED_PLATFORM=y # CONFIG_X86_VSMP is not set @@ -522,6 +520,11 @@ CONFIG_MODIFY_LDT_SYSCALL=y CONFIG_HAVE_LIVEPATCH=y # end of Processor type and features +CONFIG_SPECULATION_MITIGATIONS=y +CONFIG_PAGE_TABLE_ISOLATION=y +CONFIG_RETPOLINE=y +CONFIG_CPU_IBPB_ENTRY=y +CONFIG_CPU_IBRS_ENTRY=y CONFIG_ARCH_HAS_ADD_PAGES=y CONFIG_ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE=y CONFIG_USE_PERCPU_NUMA_NODE_ID=y @@ -712,10 +715,6 @@ CONFIG_KVM=y CONFIG_KVM_INTEL=m CONFIG_KVM_AMD=m # CONFIG_KVM_XEN is not set -CONFIG_AS_AVX512=y -CONFIG_AS_SHA1_NI=y -CONFIG_AS_SHA256_NI=y -CONFIG_AS_TPAUSE=y # # General architecture-dependent options @@ -741,6 +740,7 @@ CONFIG_HAVE_KPROBES_ON_FTRACE=y CONFIG_HAVE_FUNCTION_ERROR_INJECTION=y CONFIG_HAVE_NMI=y CONFIG_TRACE_IRQFLAGS_SUPPORT=y +CONFIG_TRACE_IRQFLAGS_NMI_SUPPORT=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_DMA_CONTIGUOUS=y CONFIG_GENERIC_SMP_IDLE_THREAD=y @@ -777,9 +777,6 @@ CONFIG_SECCOMP=y CONFIG_SECCOMP_FILTER=y # CONFIG_SECCOMP_CACHE_DEBUG is not set CONFIG_HAVE_ARCH_STACKLEAK=y -CONFIG_HAVE_STACKPROTECTOR=y -CONFIG_STACKPROTECTOR=y -CONFIG_STACKPROTECTOR_STRONG=y CONFIG_ARCH_SUPPORTS_LTO_CLANG=y CONFIG_ARCH_SUPPORTS_LTO_CLANG_THIN=y CONFIG_LTO_NONE=y @@ -1120,6 +1117,7 @@ CONFIG_INET_ESP=m CONFIG_INET_ESP_OFFLOAD=m # CONFIG_INET_ESPINTCP is not set CONFIG_INET_IPCOMP=m +CONFIG_INET_TABLE_PERTURB_ORDER=16 CONFIG_INET_XFRM_TUNNEL=m CONFIG_INET_TUNNEL=m CONFIG_INET_DIAG=m @@ -2150,6 +2148,7 @@ CONFIG_PNPACPI=y CONFIG_BLK_DEV=y # CONFIG_BLK_DEV_NULL_BLK is not set CONFIG_BLK_DEV_FD=y +# CONFIG_BLK_DEV_FD_RAWCMD is not set CONFIG_CDROM=y CONFIG_PARIDE=m @@ -2670,6 +2669,7 @@ CONFIG_ATL1=m CONFIG_ATL1E=m CONFIG_ATL1C=m CONFIG_ALX=m +CONFIG_CX_ECAT=m CONFIG_NET_VENDOR_BROADCOM=y CONFIG_B44=m CONFIG_B44_PCI_AUTOSELECT=y @@ -2687,8 +2687,6 @@ CONFIG_BNXT=m CONFIG_BNXT_SRIOV=y CONFIG_BNXT_FLOWER_OFFLOAD=y CONFIG_BNXT_HWMON=y -CONFIG_NET_VENDOR_BROCADE=y -CONFIG_BNA=m CONFIG_NET_VENDOR_CADENCE=y CONFIG_MACB=m CONFIG_MACB_USE_HWSTAMP=y @@ -2715,7 +2713,6 @@ CONFIG_NET_VENDOR_CISCO=y CONFIG_ENIC=m CONFIG_NET_VENDOR_CORTINA=y # CONFIG_GEMINI_ETHERNET is not set -CONFIG_CX_ECAT=m CONFIG_DNET=m CONFIG_NET_VENDOR_DEC=y CONFIG_NET_TULIP=y @@ -2772,7 +2769,6 @@ CONFIG_I40EVF=m # CONFIG_ICE is not set CONFIG_FM10K=m # CONFIG_IGC is not set -CONFIG_NET_VENDOR_MICROSOFT=y CONFIG_JME=m CONFIG_NET_VENDOR_LITEX=y # CONFIG_LITEX_LITEETH is not set @@ -2808,10 +2804,13 @@ CONFIG_NET_VENDOR_MICROCHIP=y # CONFIG_ENCX24J600 is not set # CONFIG_LAN743X is not set CONFIG_NET_VENDOR_MICROSEMI=y +CONFIG_NET_VENDOR_MICROSOFT=y CONFIG_NET_VENDOR_MYRI=y CONFIG_MYRI10GE=m CONFIG_MYRI10GE_DCA=y CONFIG_FEALNX=m +CONFIG_NET_VENDOR_NI=y +# CONFIG_NI_XGE_MANAGEMENT_ENET is not set CONFIG_NET_VENDOR_NATSEMI=y CONFIG_NATSEMI=m CONFIG_NS83820=m @@ -2822,8 +2821,6 @@ CONFIG_VXGE=m CONFIG_NET_VENDOR_NETRONOME=y CONFIG_NFP=m # CONFIG_NFP_DEBUG is not set -CONFIG_NET_VENDOR_NI=y -# CONFIG_NI_XGE_MANAGEMENT_ENET is not set CONFIG_NET_VENDOR_8390=y CONFIG_PCMCIA_AXNET=m CONFIG_NE2K_PCI=m @@ -2849,6 +2846,8 @@ CONFIG_QED_SRIOV=y CONFIG_QEDE=m CONFIG_QED_ISCSI=y CONFIG_QED_OOO=y +CONFIG_NET_VENDOR_BROCADE=y +CONFIG_BNA=m CONFIG_NET_VENDOR_QUALCOMM=y # CONFIG_QCA7000_SPI is not set # CONFIG_QCA7000_UART is not set @@ -2870,6 +2869,11 @@ CONFIG_NET_VENDOR_ROCKER=y CONFIG_NET_VENDOR_SAMSUNG=y CONFIG_SXGBE_ETH=m CONFIG_NET_VENDOR_SEEQ=y +CONFIG_NET_VENDOR_SILAN=y +CONFIG_SC92031=m +CONFIG_NET_VENDOR_SIS=y +CONFIG_SIS900=m +CONFIG_SIS190=m CONFIG_NET_VENDOR_SOLARFLARE=y CONFIG_SFC=m CONFIG_SFC_MTD=y @@ -2878,11 +2882,6 @@ CONFIG_SFC_SRIOV=y CONFIG_SFC_MCDI_LOGGING=y CONFIG_SFC_FALCON=m CONFIG_SFC_FALCON_MTD=y -CONFIG_NET_VENDOR_SILAN=y -CONFIG_SC92031=m -CONFIG_NET_VENDOR_SIS=y -CONFIG_SIS900=m -CONFIG_SIS190=m CONFIG_NET_VENDOR_SMSC=y CONFIG_PCMCIA_SMC91C92=m CONFIG_EPIC100=m @@ -3832,60 +3831,6 @@ CONFIG_TCG_TIS_ST33ZP24_I2C=m # CONFIG_XILLYUSB is not set # CONFIG_RANDOM_TRUST_CPU is not set # CONFIG_RANDOM_TRUST_BOOTLOADER is not set -CONFIG_LRNG=y - -# -# Specific DRNG seeding strategies -# -CONFIG_LRNG_OVERSAMPLE_ENTROPY_SOURCES=y -CONFIG_LRNG_OVERSAMPLE_ES_BITS=64 -CONFIG_LRNG_SEED_BUFFER_INIT_ADD_BITS=128 -# end of Specific DRNG seeding strategies - -# -# Entropy Source Configuration -# - -# -# Interrupt Entropy Source -# -CONFIG_LRNG_IRQ=y -CONFIG_LRNG_CONTINUOUS_COMPRESSION_ENABLED=y -# CONFIG_LRNG_CONTINUOUS_COMPRESSION_DISABLED is not set -CONFIG_LRNG_ENABLE_CONTINUOUS_COMPRESSION=y -# CONFIG_LRNG_SWITCHABLE_CONTINUOUS_COMPRESSION is not set -CONFIG_LRNG_COLLECTION_SIZE_512=y -# CONFIG_LRNG_COLLECTION_SIZE_1024 is not set -# CONFIG_LRNG_COLLECTION_SIZE_2048 is not set -# CONFIG_LRNG_COLLECTION_SIZE_4096 is not set -# CONFIG_LRNG_COLLECTION_SIZE_8192 is not set -CONFIG_LRNG_COLLECTION_SIZE=512 -# CONFIG_LRNG_HEALTH_TESTS is not set -CONFIG_LRNG_RCT_CUTOFF=31 -CONFIG_LRNG_APT_CUTOFF=325 -CONFIG_LRNG_IRQ_ENTROPY_RATE=256 - -# -# Jitter RNG Entropy Source -# -CONFIG_LRNG_JENT=y -CONFIG_LRNG_JENT_ENTROPY_RATE=16 - -# -# CPU Entropy Source -# -CONFIG_LRNG_CPU=y -CONFIG_LRNG_CPU_FULL_ENT_MULTIPLIER=1 -CONFIG_LRNG_CPU_ENTROPY_RATE=8 -# end of Entropy Source Configuration - -CONFIG_LRNG_DRNG_SWITCH=y -CONFIG_LRNG_KCAPI_HASH=y -CONFIG_LRNG_DRBG=m -# CONFIG_LRNG_KCAPI is not set -# CONFIG_LRNG_TESTING_MENU is not set -CONFIG_LRNG_SELFTEST=y -# CONFIG_LRNG_SELFTEST_PANIC is not set # end of Character devices # @@ -7738,7 +7683,6 @@ CONFIG_KEYS=y CONFIG_SECURITY=y # CONFIG_SECURITYFS is not set # CONFIG_SECURITY_NETWORK is not set -CONFIG_PAGE_TABLE_ISOLATION=y # CONFIG_SECURITY_PATH is not set # CONFIG_INTEL_TXT is not set CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y @@ -7877,7 +7821,6 @@ CONFIG_CRYPTO_CRC32=m # CONFIG_CRYPTO_CRC32_PCLMUL is not set CONFIG_CRYPTO_XXHASH=y CONFIG_CRYPTO_BLAKE2B=y -# CONFIG_CRYPTO_BLAKE2S is not set # CONFIG_CRYPTO_BLAKE2S_X86 is not set CONFIG_CRYPTO_CRCT10DIF=y # CONFIG_CRYPTO_CRCT10DIF_PCLMUL is not set @@ -7966,24 +7909,6 @@ CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_USER_API_ENABLE_OBSOLETE=y CONFIG_CRYPTO_HASH_INFO=y - -# -# Crypto library routines -# -CONFIG_CRYPTO_LIB_AES=y -CONFIG_CRYPTO_LIB_ARC4=m -# CONFIG_CRYPTO_LIB_BLAKE2S is not set -CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA=m -CONFIG_CRYPTO_LIB_CHACHA_GENERIC=m -# CONFIG_CRYPTO_LIB_CHACHA is not set -# CONFIG_CRYPTO_LIB_CURVE25519 is not set -CONFIG_CRYPTO_LIB_DES=m -CONFIG_CRYPTO_LIB_POLY1305_RSIZE=11 -CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305=m -CONFIG_CRYPTO_LIB_POLY1305_GENERIC=m -# CONFIG_CRYPTO_LIB_POLY1305 is not set -# CONFIG_CRYPTO_LIB_CHACHA20POLY1305 is not set -CONFIG_CRYPTO_LIB_SHA256=y # CONFIG_CRYPTO_HW is not set CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y @@ -8028,6 +7953,27 @@ CONFIG_GENERIC_IOMAP=y CONFIG_ARCH_USE_CMPXCHG_LOCKREF=y CONFIG_ARCH_HAS_FAST_MULTIPLIER=y CONFIG_ARCH_USE_SYM_ANNOTATIONS=y + +# +# Crypto library routines +# +CONFIG_CRYPTO_LIB_AES=y +CONFIG_CRYPTO_LIB_ARC4=m +CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC=y +CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA=m +CONFIG_CRYPTO_LIB_CHACHA_GENERIC=m +# CONFIG_CRYPTO_LIB_CHACHA is not set +# CONFIG_CRYPTO_LIB_CURVE25519 is not set +CONFIG_CRYPTO_LIB_DES=m +CONFIG_CRYPTO_LIB_POLY1305_RSIZE=11 +CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305=m +CONFIG_CRYPTO_LIB_POLY1305_GENERIC=m +# CONFIG_CRYPTO_LIB_POLY1305 is not set +# CONFIG_CRYPTO_LIB_CHACHA20POLY1305 is not set +CONFIG_CRYPTO_LIB_SHA256=y +# end of Crypto library routines + +CONFIG_LIB_MEMNEQ=y CONFIG_CRC_CCITT=m CONFIG_CRC16=y CONFIG_CRC_T10DIF=y @@ -8150,6 +8096,8 @@ CONFIG_SYMBOLIC_ERRNAME=y CONFIG_DEBUG_BUGVERBOSE=y # end of printk and dmesg options +CONFIG_AS_HAS_NON_CONST_LEB128=y + # # Compile-time checks and compiler options # @@ -8322,7 +8270,6 @@ CONFIG_IO_STRICT_DEVMEM=y # # x86 Debugging # -CONFIG_TRACE_IRQFLAGS_NMI_SUPPORT=y # CONFIG_X86_VERBOSE_BOOTUP is not set CONFIG_EARLY_PRINTK=y # CONFIG_EARLY_PRINTK_DBGP is not set -- GitLab From 338a4133b263d1aab585ea1657134f806f819c87 Mon Sep 17 00:00:00 2001 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com> Date: Sun, 2 Apr 2023 17:11:07 -0500 Subject: [PATCH 47/47] system/openssh: Update to 9.3_p1 --- system/openssh/APKBUILD | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/system/openssh/APKBUILD b/system/openssh/APKBUILD index b649cc2bd4..9686385ceb 100644 --- a/system/openssh/APKBUILD +++ b/system/openssh/APKBUILD @@ -2,7 +2,7 @@ # Contributor: Valery Kartel <valery.kartel@gmail.com> # Maintainer: Horst Burkhardt <horst@adelielinux.org> pkgname=openssh -pkgver=9.0_p1 +pkgver=9.3_p1 _myver=${pkgver%_*}${pkgver#*_} pkgrel=0 pkgdesc="Remote login tool using encrypted SSH protocol" @@ -147,7 +147,7 @@ openrc() { install_if="openssh-server=$pkgver-r$pkgrel openrc" } -sha512sums="613ae95317e734868c6a60d9cc5af47a889baa3124bbdd2b31bb51dd6b57b136f4cfcb5604cca78a03bd500baab9b9b45eaf77e038b1ed776c86dce0437449a9 openssh-9.0p1.tar.gz +sha512sums="087ff6fe5f6caab4c6c3001d906399e02beffad7277280f11187420c2939fd4befdcb14643862a657ce4cad2f115b82a0a1a2c99df6ee54dcd76b53647637c19 openssh-9.3p1.tar.gz f3d5960572ddf49635d4edbdff45835df1b538a81840db169c36b39862e6fa8b0393ca90626000b758f59567ff6810b2537304098652483b3b31fb438a061de6 disable-forwarding-by-default.patch 70bffa6c061a02dd790dbaa68cd0b488395aa2312039b037e1a707e8cf7465754bf376d943d351914b64044c074af7504e845de865dec45ea00d992c2bbb8925 fix-utmpx.patch 34c0673f550e7afcd47eda4fe1da48fb42e5344c95ba8064c9c3c137fda9c43635b0f7b8145d0300f59c79f75a396ebd467afb54cdaa42aa251d624d0752dc84 sftp-interactive.patch -- GitLab