Skip to content

Commit dfe6a5e

Browse files
committed
src: Add patch for wcslen() error
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
1 parent 40a38ee commit dfe6a5e

File tree

1 file changed

+178
-0
lines changed

1 file changed

+178
-0
lines changed

src/wcslen.patch

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
From git@z Thu Jan 1 00:00:00 1970
2+
Subject: [PATCH v3 1/2] include: Move typedefs in nls.h to their own header
3+
From: Nathan Chancellor <nathan@kernel.org>
4+
Date: Fri, 28 Mar 2025 12:26:31 -0700
5+
Message-Id: <20250328-string-add-wcslen-for-llvm-opt-v3-1-a180b4c0c1c4@kernel.org>
6+
MIME-Version: 1.0
7+
Content-Type: text/plain; charset="utf-8"
8+
Content-Transfer-Encoding: 7bit
9+
10+
In order to allow commonly included headers such as string.h to access
11+
typedefs such as wchar_t without running into issues with the rest of
12+
the NLS library, refactor the typedefs out into their own header that
13+
can be included in a much safer manner.
14+
15+
Cc: stable@vger.kernel.org
16+
Reviewed-by: Andy Shevchenko <andy@kernel.org>
17+
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
18+
---
19+
include/linux/nls.h | 19 +------------------
20+
include/linux/nls_types.h | 26 ++++++++++++++++++++++++++
21+
2 files changed, 27 insertions(+), 18 deletions(-)
22+
23+
diff --git a/include/linux/nls.h b/include/linux/nls.h
24+
index e0bf8367b274..3d416d1f60b6 100644
25+
--- a/include/linux/nls.h
26+
+++ b/include/linux/nls.h
27+
@@ -3,24 +3,7 @@
28+
#define _LINUX_NLS_H
29+
30+
#include <linux/init.h>
31+
-
32+
-/* Unicode has changed over the years. Unicode code points no longer
33+
- * fit into 16 bits; as of Unicode 5 valid code points range from 0
34+
- * to 0x10ffff (17 planes, where each plane holds 65536 code points).
35+
- *
36+
- * The original decision to represent Unicode characters as 16-bit
37+
- * wchar_t values is now outdated. But plane 0 still includes the
38+
- * most commonly used characters, so we will retain it. The newer
39+
- * 32-bit unicode_t type can be used when it is necessary to
40+
- * represent the full Unicode character set.
41+
- */
42+
-
43+
-/* Plane-0 Unicode character */
44+
-typedef u16 wchar_t;
45+
-#define MAX_WCHAR_T 0xffff
46+
-
47+
-/* Arbitrary Unicode character */
48+
-typedef u32 unicode_t;
49+
+#include <linux/nls_types.h>
50+
51+
struct nls_table {
52+
const char *charset;
53+
diff --git a/include/linux/nls_types.h b/include/linux/nls_types.h
54+
new file mode 100644
55+
index 000000000000..9479df1016da
56+
--- /dev/null
57+
+++ b/include/linux/nls_types.h
58+
@@ -0,0 +1,26 @@
59+
+/* SPDX-License-Identifier: GPL-2.0 */
60+
+#ifndef _LINUX_NLS_TYPES_H
61+
+#define _LINUX_NLS_TYPES_H
62+
+
63+
+#include <linux/types.h>
64+
+
65+
+/*
66+
+ * Unicode has changed over the years. Unicode code points no longer
67+
+ * fit into 16 bits; as of Unicode 5 valid code points range from 0
68+
+ * to 0x10ffff (17 planes, where each plane holds 65536 code points).
69+
+ *
70+
+ * The original decision to represent Unicode characters as 16-bit
71+
+ * wchar_t values is now outdated. But plane 0 still includes the
72+
+ * most commonly used characters, so we will retain it. The newer
73+
+ * 32-bit unicode_t type can be used when it is necessary to
74+
+ * represent the full Unicode character set.
75+
+ */
76+
+
77+
+/* Plane-0 Unicode character */
78+
+typedef u16 wchar_t;
79+
+#define MAX_WCHAR_T 0xffff
80+
+
81+
+/* Arbitrary Unicode character */
82+
+typedef u32 unicode_t;
83+
+
84+
+#endif /* _LINUX_NLS_TYPES_H */
85+
--
86+
2.49.0
87+
88+
From git@z Thu Jan 1 00:00:00 1970
89+
Subject: [PATCH v3 2/2] lib/string.c: Add wcslen()
90+
From: Nathan Chancellor <nathan@kernel.org>
91+
Date: Fri, 28 Mar 2025 12:26:32 -0700
92+
Message-Id: <20250328-string-add-wcslen-for-llvm-opt-v3-2-a180b4c0c1c4@kernel.org>
93+
MIME-Version: 1.0
94+
Content-Type: text/plain; charset="utf-8"
95+
Content-Transfer-Encoding: 7bit
96+
97+
A recent optimization change in LLVM [1] aims to transform certain loop
98+
idioms into calls to strlen() or wcslen(). This change transforms the
99+
first while loop in UniStrcat() into a call to wcslen(), breaking the
100+
build when UniStrcat() gets inlined into alloc_path_with_tree_prefix():
101+
102+
ld.lld: error: undefined symbol: wcslen
103+
>>> referenced by nls_ucs2_utils.h:54 (fs/smb/client/../../nls/nls_ucs2_utils.h:54)
104+
>>> vmlinux.o:(alloc_path_with_tree_prefix)
105+
>>> referenced by nls_ucs2_utils.h:54 (fs/smb/client/../../nls/nls_ucs2_utils.h:54)
106+
>>> vmlinux.o:(alloc_path_with_tree_prefix)
107+
108+
The kernel does not build with '-ffreestanding' (which would avoid this
109+
transformation) because it does want libcall optimizations in general
110+
and turning on '-ffreestanding' disables the majority of them. While
111+
'-fno-builtin-wcslen' would be more targeted at the problem, it does not
112+
work with LTO.
113+
114+
Add a basic wcslen() to avoid this linkage failure. While no
115+
architecture or FORTIFY_SOURCE overrides this, add it to string.c
116+
instead of string_helpers.c so that it is built with '-ffreestanding',
117+
otherwise the compiler might transform it into a call to itself.
118+
119+
Cc: stable@vger.kernel.org
120+
Link: https://github.com/llvm/llvm-project/commit/9694844d7e36fd5e01011ab56b64f27b867aa72d [1]
121+
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
122+
---
123+
include/linux/string.h | 2 ++
124+
lib/string.c | 11 +++++++++++
125+
2 files changed, 13 insertions(+)
126+
127+
diff --git a/include/linux/string.h b/include/linux/string.h
128+
index 0403a4ca4c11..b000f445a2c7 100644
129+
--- a/include/linux/string.h
130+
+++ b/include/linux/string.h
131+
@@ -10,6 +10,7 @@
132+
#include <linux/stddef.h> /* for NULL */
133+
#include <linux/err.h> /* for ERR_PTR() */
134+
#include <linux/errno.h> /* for E2BIG */
135+
+#include <linux/nls_types.h> /* for wchar_t */
136+
#include <linux/overflow.h> /* for check_mul_overflow() */
137+
#include <linux/stdarg.h>
138+
#include <uapi/linux/string.h>
139+
@@ -203,6 +204,7 @@ extern __kernel_size_t strlen(const char *);
140+
#ifndef __HAVE_ARCH_STRNLEN
141+
extern __kernel_size_t strnlen(const char *,__kernel_size_t);
142+
#endif
143+
+__kernel_size_t wcslen(const wchar_t *s);
144+
#ifndef __HAVE_ARCH_STRPBRK
145+
extern char * strpbrk(const char *,const char *);
146+
#endif
147+
diff --git a/lib/string.c b/lib/string.c
148+
index eb4486ed40d2..2c6f8c8f4159 100644
149+
--- a/lib/string.c
150+
+++ b/lib/string.c
151+
@@ -21,6 +21,7 @@
152+
#include <linux/errno.h>
153+
#include <linux/limits.h>
154+
#include <linux/linkage.h>
155+
+#include <linux/nls_types.h>
156+
#include <linux/stddef.h>
157+
#include <linux/string.h>
158+
#include <linux/types.h>
159+
@@ -429,6 +430,16 @@ size_t strnlen(const char *s, size_t count)
160+
EXPORT_SYMBOL(strnlen);
161+
#endif
162+
163+
+size_t wcslen(const wchar_t *s)
164+
+{
165+
+ const wchar_t *sc;
166+
+
167+
+ for (sc = s; *sc != '\0'; ++sc)
168+
+ /* nothing */;
169+
+ return sc - s;
170+
+}
171+
+EXPORT_SYMBOL(wcslen);
172+
+
173+
#ifndef __HAVE_ARCH_STRSPN
174+
/**
175+
* strspn - Calculate the length of the initial substring of @s which only contain letters in @accept
176+
--
177+
2.49.0
178+

0 commit comments

Comments
 (0)