|
| 1 | +From git@z Thu Jan 1 00:00:00 1970 |
| 2 | +Subject: [PATCH v3 1/2] include: Move typedefs in nls.h to their own header |
| 3 | +From: Nathan Chancellor <nathan@kernel.org> |
| 4 | +Date: Fri, 28 Mar 2025 12:26:31 -0700 |
| 5 | +Message-Id: <20250328-string-add-wcslen-for-llvm-opt-v3-1-a180b4c0c1c4@kernel.org> |
| 6 | +MIME-Version: 1.0 |
| 7 | +Content-Type: text/plain; charset="utf-8" |
| 8 | +Content-Transfer-Encoding: 7bit |
| 9 | + |
| 10 | +In order to allow commonly included headers such as string.h to access |
| 11 | +typedefs such as wchar_t without running into issues with the rest of |
| 12 | +the NLS library, refactor the typedefs out into their own header that |
| 13 | +can be included in a much safer manner. |
| 14 | + |
| 15 | +Cc: stable@vger.kernel.org |
| 16 | +Reviewed-by: Andy Shevchenko <andy@kernel.org> |
| 17 | +Signed-off-by: Nathan Chancellor <nathan@kernel.org> |
| 18 | +--- |
| 19 | + include/linux/nls.h | 19 +------------------ |
| 20 | + include/linux/nls_types.h | 26 ++++++++++++++++++++++++++ |
| 21 | + 2 files changed, 27 insertions(+), 18 deletions(-) |
| 22 | + |
| 23 | +diff --git a/include/linux/nls.h b/include/linux/nls.h |
| 24 | +index e0bf8367b274..3d416d1f60b6 100644 |
| 25 | +--- a/include/linux/nls.h |
| 26 | ++++ b/include/linux/nls.h |
| 27 | +@@ -3,24 +3,7 @@ |
| 28 | + #define _LINUX_NLS_H |
| 29 | + |
| 30 | + #include <linux/init.h> |
| 31 | +- |
| 32 | +-/* Unicode has changed over the years. Unicode code points no longer |
| 33 | +- * fit into 16 bits; as of Unicode 5 valid code points range from 0 |
| 34 | +- * to 0x10ffff (17 planes, where each plane holds 65536 code points). |
| 35 | +- * |
| 36 | +- * The original decision to represent Unicode characters as 16-bit |
| 37 | +- * wchar_t values is now outdated. But plane 0 still includes the |
| 38 | +- * most commonly used characters, so we will retain it. The newer |
| 39 | +- * 32-bit unicode_t type can be used when it is necessary to |
| 40 | +- * represent the full Unicode character set. |
| 41 | +- */ |
| 42 | +- |
| 43 | +-/* Plane-0 Unicode character */ |
| 44 | +-typedef u16 wchar_t; |
| 45 | +-#define MAX_WCHAR_T 0xffff |
| 46 | +- |
| 47 | +-/* Arbitrary Unicode character */ |
| 48 | +-typedef u32 unicode_t; |
| 49 | ++#include <linux/nls_types.h> |
| 50 | + |
| 51 | + struct nls_table { |
| 52 | + const char *charset; |
| 53 | +diff --git a/include/linux/nls_types.h b/include/linux/nls_types.h |
| 54 | +new file mode 100644 |
| 55 | +index 000000000000..9479df1016da |
| 56 | +--- /dev/null |
| 57 | ++++ b/include/linux/nls_types.h |
| 58 | +@@ -0,0 +1,26 @@ |
| 59 | ++/* SPDX-License-Identifier: GPL-2.0 */ |
| 60 | ++#ifndef _LINUX_NLS_TYPES_H |
| 61 | ++#define _LINUX_NLS_TYPES_H |
| 62 | ++ |
| 63 | ++#include <linux/types.h> |
| 64 | ++ |
| 65 | ++/* |
| 66 | ++ * Unicode has changed over the years. Unicode code points no longer |
| 67 | ++ * fit into 16 bits; as of Unicode 5 valid code points range from 0 |
| 68 | ++ * to 0x10ffff (17 planes, where each plane holds 65536 code points). |
| 69 | ++ * |
| 70 | ++ * The original decision to represent Unicode characters as 16-bit |
| 71 | ++ * wchar_t values is now outdated. But plane 0 still includes the |
| 72 | ++ * most commonly used characters, so we will retain it. The newer |
| 73 | ++ * 32-bit unicode_t type can be used when it is necessary to |
| 74 | ++ * represent the full Unicode character set. |
| 75 | ++ */ |
| 76 | ++ |
| 77 | ++/* Plane-0 Unicode character */ |
| 78 | ++typedef u16 wchar_t; |
| 79 | ++#define MAX_WCHAR_T 0xffff |
| 80 | ++ |
| 81 | ++/* Arbitrary Unicode character */ |
| 82 | ++typedef u32 unicode_t; |
| 83 | ++ |
| 84 | ++#endif /* _LINUX_NLS_TYPES_H */ |
| 85 | +-- |
| 86 | +2.49.0 |
| 87 | + |
| 88 | +From git@z Thu Jan 1 00:00:00 1970 |
| 89 | +Subject: [PATCH v3 2/2] lib/string.c: Add wcslen() |
| 90 | +From: Nathan Chancellor <nathan@kernel.org> |
| 91 | +Date: Fri, 28 Mar 2025 12:26:32 -0700 |
| 92 | +Message-Id: <20250328-string-add-wcslen-for-llvm-opt-v3-2-a180b4c0c1c4@kernel.org> |
| 93 | +MIME-Version: 1.0 |
| 94 | +Content-Type: text/plain; charset="utf-8" |
| 95 | +Content-Transfer-Encoding: 7bit |
| 96 | + |
| 97 | +A recent optimization change in LLVM [1] aims to transform certain loop |
| 98 | +idioms into calls to strlen() or wcslen(). This change transforms the |
| 99 | +first while loop in UniStrcat() into a call to wcslen(), breaking the |
| 100 | +build when UniStrcat() gets inlined into alloc_path_with_tree_prefix(): |
| 101 | + |
| 102 | + ld.lld: error: undefined symbol: wcslen |
| 103 | + >>> referenced by nls_ucs2_utils.h:54 (fs/smb/client/../../nls/nls_ucs2_utils.h:54) |
| 104 | + >>> vmlinux.o:(alloc_path_with_tree_prefix) |
| 105 | + >>> referenced by nls_ucs2_utils.h:54 (fs/smb/client/../../nls/nls_ucs2_utils.h:54) |
| 106 | + >>> vmlinux.o:(alloc_path_with_tree_prefix) |
| 107 | + |
| 108 | +The kernel does not build with '-ffreestanding' (which would avoid this |
| 109 | +transformation) because it does want libcall optimizations in general |
| 110 | +and turning on '-ffreestanding' disables the majority of them. While |
| 111 | +'-fno-builtin-wcslen' would be more targeted at the problem, it does not |
| 112 | +work with LTO. |
| 113 | + |
| 114 | +Add a basic wcslen() to avoid this linkage failure. While no |
| 115 | +architecture or FORTIFY_SOURCE overrides this, add it to string.c |
| 116 | +instead of string_helpers.c so that it is built with '-ffreestanding', |
| 117 | +otherwise the compiler might transform it into a call to itself. |
| 118 | + |
| 119 | +Cc: stable@vger.kernel.org |
| 120 | +Link: https://github.com/llvm/llvm-project/commit/9694844d7e36fd5e01011ab56b64f27b867aa72d [1] |
| 121 | +Signed-off-by: Nathan Chancellor <nathan@kernel.org> |
| 122 | +--- |
| 123 | + include/linux/string.h | 2 ++ |
| 124 | + lib/string.c | 11 +++++++++++ |
| 125 | + 2 files changed, 13 insertions(+) |
| 126 | + |
| 127 | +diff --git a/include/linux/string.h b/include/linux/string.h |
| 128 | +index 0403a4ca4c11..b000f445a2c7 100644 |
| 129 | +--- a/include/linux/string.h |
| 130 | ++++ b/include/linux/string.h |
| 131 | +@@ -10,6 +10,7 @@ |
| 132 | + #include <linux/stddef.h> /* for NULL */ |
| 133 | + #include <linux/err.h> /* for ERR_PTR() */ |
| 134 | + #include <linux/errno.h> /* for E2BIG */ |
| 135 | ++#include <linux/nls_types.h> /* for wchar_t */ |
| 136 | + #include <linux/overflow.h> /* for check_mul_overflow() */ |
| 137 | + #include <linux/stdarg.h> |
| 138 | + #include <uapi/linux/string.h> |
| 139 | +@@ -203,6 +204,7 @@ extern __kernel_size_t strlen(const char *); |
| 140 | + #ifndef __HAVE_ARCH_STRNLEN |
| 141 | + extern __kernel_size_t strnlen(const char *,__kernel_size_t); |
| 142 | + #endif |
| 143 | ++__kernel_size_t wcslen(const wchar_t *s); |
| 144 | + #ifndef __HAVE_ARCH_STRPBRK |
| 145 | + extern char * strpbrk(const char *,const char *); |
| 146 | + #endif |
| 147 | +diff --git a/lib/string.c b/lib/string.c |
| 148 | +index eb4486ed40d2..2c6f8c8f4159 100644 |
| 149 | +--- a/lib/string.c |
| 150 | ++++ b/lib/string.c |
| 151 | +@@ -21,6 +21,7 @@ |
| 152 | + #include <linux/errno.h> |
| 153 | + #include <linux/limits.h> |
| 154 | + #include <linux/linkage.h> |
| 155 | ++#include <linux/nls_types.h> |
| 156 | + #include <linux/stddef.h> |
| 157 | + #include <linux/string.h> |
| 158 | + #include <linux/types.h> |
| 159 | +@@ -429,6 +430,16 @@ size_t strnlen(const char *s, size_t count) |
| 160 | + EXPORT_SYMBOL(strnlen); |
| 161 | + #endif |
| 162 | + |
| 163 | ++size_t wcslen(const wchar_t *s) |
| 164 | ++{ |
| 165 | ++ const wchar_t *sc; |
| 166 | ++ |
| 167 | ++ for (sc = s; *sc != '\0'; ++sc) |
| 168 | ++ /* nothing */; |
| 169 | ++ return sc - s; |
| 170 | ++} |
| 171 | ++EXPORT_SYMBOL(wcslen); |
| 172 | ++ |
| 173 | + #ifndef __HAVE_ARCH_STRSPN |
| 174 | + /** |
| 175 | + * strspn - Calculate the length of the initial substring of @s which only contain letters in @accept |
| 176 | +-- |
| 177 | +2.49.0 |
| 178 | + |
0 commit comments