11/*
2- * Copyright (C) 2016 Michael Brown <mbrown@fensystems.co.uk>.
3- * Copyright (c) 2023, Xiaotian Wu <wuxiaotian@loongson.cn>
2+ * Copyright (C) 2026 Michael Brown <mbrown@fensystems.co.uk>.
43 *
54 * This program is free software; you can redistribute it and/or
65 * modify it under the terms of the GNU General Public License as
2928 */
3029
3130FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
31+ FILE_SECBOOT ( PERMITTED );
3232
3333#include <string.h>
3434
@@ -41,68 +41,65 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
4141 * @ret dest Destination address
4242 */
4343void loong64_memcpy ( void * dest , const void * src , size_t len ) {
44- void * discard_dest ;
45- void * discard_end ;
46- const void * discard_src ;
47- size_t discard_offset ;
44+ size_t len_pre ;
45+ size_t len_mid ;
46+ size_t len_post ;
47+ size_t offset ;
4848 unsigned long discard_data ;
49- unsigned long discard_low ;
50- unsigned long discard_high ;
5149
52- /* If length is too short, then just copy individual bytes.
50+ /* Calculate pre-aligned, aligned, and post-aligned lengths.
51+ * (Align on the destination address, on the assumption that
52+ * misaligned stores are likely to be more expensive than
53+ * misaligned loads.)
5354 */
54- if ( len < 16 ) {
55- __asm__ __volatile__ ( "beqz %0, 2f\n\t"
56- "\n1:\n\t"
57- "addi.d %0, %0, -1\n\t"
58- "ldx.b %1, %3, %0\n\t"
59- "stx.b %1, %2, %0\n\t"
60- "bnez %0, 1b\n\t"
61- "\n2:\n\t"
62- : "=&r" ( discard_offset ),
63- "=&r" ( discard_data )
64- : "r" ( dest ), "r" ( src ), "0" ( len )
65- : "memory" , "t0" );
66- return ;
67- }
55+ len_pre = ( ( sizeof ( unsigned long ) - ( ( intptr_t ) dest ) ) &
56+ ( sizeof ( unsigned long ) - 1 ) );
57+ if ( len_pre > len )
58+ len_pre = len ;
59+ len -= len_pre ;
60+ len_mid = ( len & ~( sizeof ( unsigned long ) - 1 ) );
61+ len -= len_mid ;
62+ len_post = len ;
63+
64+ /* Copy pre-aligned section */
65+ __asm__ __volatile__ ( "b 2f\n\t"
66+ "\n1:\n\t"
67+ "ldx.b %1, %3, %0\n\t"
68+ "stx.b %1, %2, %0\n\t"
69+ "addi.d %0, %0, 1\n\t"
70+ "\n2:\n\t"
71+ "bne %0, %4, 1b\n\t"
72+ : "=&r" ( offset ), "=&r" ( discard_data )
73+ : "r" ( dest ), "r" ( src ), "r" ( len_pre ),
74+ "0" ( 0 )
75+ : "memory" );
6876
69- /* Copy 16 bytes at a time: one initial
70- * potentially unaligned access, multiple destination-aligned
71- * accesses, one final potentially unaligned access.
72- */
73- __asm__ __volatile__ ( "ld.d %3, %1, 0\n\t"
74- "ld.d %4, %1, 8\n\t"
75- "addi.d %1, %1, 16\n\t"
76- "st.d %3, %0, 0\n\t"
77- "st.d %4, %0, 8\n\t"
78- "addi.d %0, %0, 16\n\t"
79- "andi %3, %0, 15\n\t"
80- "sub.d %0, %0, %3\n\t"
81- "sub.d %1, %1, %3\n\t"
82- "addi.d $t0, $zero, 0xf\n\t"
83- "andn %2, %5, $t0\n\t"
84- "b 2f\n\t"
77+ /* Copy aligned section */
78+ __asm__ __volatile__ ( "b 2f\n\t"
8579 "\n1:\n\t"
86- "ld.d %3, %1, 0\n\t"
87- "ld.d %4, %1, 8\n\t"
88- "addi.d %1, %1, 16\n\t"
89- "st.d %3, %0, 0\n\t"
90- "st.d %4, %0, 8\n\t"
91- "addi.d %0, %0, 16\n\t"
80+ "ldx.d %1, %3, %0\n\t"
81+ "stx.d %1, %2, %0\n\t"
82+ "addi.d %0, %0, %5\n\t"
9283 "\n2:\n\t"
93- "bne %0, %2, 1b\n\t"
94- "ld.d %3, %6, -16\n\t"
95- "ld.d %4, %6, -8\n\t"
96- "st.d %3, %5, -16\n\t"
97- "st.d %4, %5, -8\n\t"
98- : "=&r" ( discard_dest ),
99- "=&r" ( discard_src ),
100- "=&r" ( discard_end ),
101- "=&r" ( discard_low ),
102- "=&r" ( discard_high )
103- : "r" ( dest + len ), "r" ( src + len ),
104- "0" ( dest ), "1" ( src )
105- : "memory" , "t0" );
84+ "bne %0, %4, 1b\n\t"
85+ : "+r" ( offset ), "=&r" ( discard_data )
86+ : "r" ( dest ), "r" ( src ),
87+ "r" ( offset + len_mid ),
88+ "i" ( sizeof ( unsigned long ) )
89+ : "memory" );
90+
91+ /* Copy post-aligned section */
92+ __asm__ __volatile__ ( "b 2f\n\t"
93+ "\n1:\n\t"
94+ "ldx.b %1, %3, %0\n\t"
95+ "stx.b %1, %2, %0\n\t"
96+ "addi.d %0, %0, 1\n\t"
97+ "\n2:\n\t"
98+ "bne %0, %4, 1b\n\t"
99+ : "+r" ( offset ), "=&r" ( discard_data )
100+ : "r" ( dest ), "r" ( src ),
101+ "r" ( offset + len_post )
102+ : "memory" );
106103}
107104
108105/**
@@ -112,50 +109,54 @@ void loong64_memcpy ( void *dest, const void *src, size_t len ) {
112109 * @v len Length
113110 */
114111void loong64_bzero ( void * dest , size_t len ) {
115- size_t discard_offset ;
116- void * discard_dest ;
117- void * discard_end ;
118-
119- /* If length is too short, then just zero individual bytes.
120- */
121- if ( len < 16 ) {
122- __asm__ __volatile__ ( "beqz %0, 2f\n\t"
123- "\n1:\n\t"
124- "addi.d %0, %0, -1\n\t"
125- "stx.b $zero, %1, %0\n\t"
126- "bnez %0, 1b\n\t"
127- "\n2:\n\t"
128- : "=&r" ( discard_offset )
129- : "r" ( dest ), "0" ( len )
130- : "memory" );
131- return ;
132- }
112+ size_t len_pre ;
113+ size_t len_mid ;
114+ size_t len_post ;
115+ size_t offset ;
116+
117+ /* Calculate pre-aligned, aligned, and post-aligned lengths */
118+ len_pre = ( ( sizeof ( unsigned long ) - ( ( intptr_t ) dest ) ) &
119+ ( sizeof ( unsigned long ) - 1 ) );
120+ if ( len_pre > len )
121+ len_pre = len ;
122+ len -= len_pre ;
123+ len_mid = ( len & ~( sizeof ( unsigned long ) - 1 ) );
124+ len -= len_mid ;
125+ len_post = len ;
126+
127+ /* Zero pre-aligned section */
128+ __asm__ __volatile__ ( "b 2f\n\t"
129+ "\n1:\n\t"
130+ "stx.b $zero, %1, %0\n\t"
131+ "addi.d %0, %0, 1\n\t"
132+ "\n2:\n\t"
133+ "bne %0, %2, 1b\n\t"
134+ : "=&r" ( offset )
135+ : "r" ( dest ), "r" ( len_pre ), "0" ( 0 )
136+ : "memory" );
133137
134- /* To zero 16 bytes at a time: one initial
135- * potentially unaligned access, multiple aligned accesses,
136- * one final potentially unaligned access.
137- */
138+ /* Zero aligned section */
139+ __asm__ __volatile__ ( "b 2f\n\t"
140+ "\n1:\n\t"
141+ "stx.d $zero, %1, %0\n\t"
142+ "addi.d %0, %0, %3\n\t"
143+ "\n2:\n\t"
144+ "bne %0, %2, 1b\n\t"
145+ : "+r" ( offset )
146+ : "r" ( dest ), "r" ( offset + len_mid ),
147+ "i" ( sizeof ( unsigned long ) )
148+ : "memory" );
138149
139- __asm__ __volatile__ ( "st.d $zero, %0, 0\n\t"
140- "st.d $zero, %0, 8\n\t"
141- "addi.d %0, %0, 16\n\t"
142- "addi.w $t0, $zero, 15\n\t"
143- "andn %0, %0, $t0\n\t"
144- "addi.w $t0, $zero, 15\n\t"
145- "andn %1, %2, $t0\n\t"
146- "b 2f\n\t"
150+ /* Zero post-aligned section */
151+ __asm__ __volatile__ ( "b 2f\n\t"
147152 "\n1:\n\t"
148- "st.d $zero, %0, 0\n\t"
149- "st.d $zero, %0, 8\n\t"
150- "addi.d %0, %0, 16\n\t"
153+ "stx.b $zero, %1, %0\n\t"
154+ "addi.d %0, %0, 1\n\t"
151155 "\n2:\n\t"
152- "bne %0, %1, 1b\n\t"
153- "st.d $zero, %2, -16\n\t"
154- "st.d $zero, %2, -8\n\t"
155- : "=&r" ( discard_dest ),
156- "=&r" ( discard_end )
157- : "r" ( dest + len ), "0" ( dest )
158- : "memory" , "t0" );
156+ "bne %0, %2, 1b\n\t"
157+ : "+r" ( offset )
158+ : "r" ( dest ), "r" ( offset + len_post )
159+ : "memory" );
159160}
160161
161162/**
@@ -166,10 +167,14 @@ void loong64_bzero ( void *dest, size_t len ) {
166167 * @v character Fill character
167168 *
168169 * The unusual parameter order is to allow for more efficient
169- * tail-calling to loong64_memset () when zeroing a region.
170+ * tail-calling to loong64_bzero () when zeroing a region.
170171 */
171172void loong64_memset ( void * dest , size_t len , int character ) {
172- size_t discard_offset ;
173+ size_t offset ;
174+
175+ /* Do nothing if length is zero */
176+ if ( ! len )
177+ return ;
173178
174179 /* Use optimised zeroing code if applicable */
175180 if ( character == 0 ) {
@@ -181,86 +186,49 @@ void loong64_memset ( void *dest, size_t len, int character ) {
181186 * value is relatively rare and unlikely to be
182187 * performance-critical.
183188 */
184- __asm__ __volatile__ ( "beqz %0, 2f\n\t"
185- "\n1:\n\t"
186- "addi.d %0, %0, -1\n\t"
189+ __asm__ __volatile__ ( "\n1:\n\t"
187190 "stx.b %2, %1, %0\n\t"
188- "bnez %0, 1b\n\t"
189- "\n2:\n\t"
190- : "=&r" ( discard_offset )
191- : "r" ( dest ), "r" ( character ), "0" ( len )
192- : "memory" );
193- }
194-
195- /**
196- * Copy (possibly overlapping) memory region forwards
197- *
198- * @v dest Destination region
199- * @v src Source region
200- * @v len Length
201- */
202- void loong64_memmove_forwards ( void * dest , const void * src , size_t len ) {
203- void * discard_dest ;
204- const void * discard_src ;
205- unsigned long discard_data ;
206-
207- /* Assume memmove() is not performance-critical, and perform a
208- * bytewise copy for simplicity.
209- */
210- __asm__ __volatile__ ( "b 2f\n\t"
211- "\n1:\n\t"
212- "ld.b %2, %1, 0\n\t"
213- "addi.d %1, %1, 1\n\t"
214- "st.b %2, %0, 0\n\t"
215191 "addi.d %0, %0, 1\n\t"
216192 "\n2:\n\t"
217193 "bne %0, %3, 1b\n\t"
218- : "=&r" ( discard_dest ),
219- "=&r" ( discard_src ),
220- "=&r" ( discard_data )
221- : "r" ( dest + len ), "0" ( dest ), "1" ( src )
194+ : "=&r" ( offset )
195+ : "r" ( dest ), "r" ( character ), "r" ( len ),
196+ "0" ( 0 )
222197 : "memory" );
223198}
224199
225200/**
226- * Copy (possibly overlapping) memory region backwards
201+ * Copy (possibly overlapping) memory region
227202 *
228203 * @v dest Destination region
229204 * @v src Source region
230205 * @v len Length
231206 */
232- void loong64_memmove_backwards ( void * dest , const void * src , size_t len ) {
233- size_t discard_offset ;
207+ void loong64_memmove ( void * dest , const void * src , size_t len ) {
208+ size_t offset ;
234209 unsigned long discard_data ;
235210
211+ /* Do nothing if length is zero */
212+ if ( ! len )
213+ return ;
214+
215+ /* Use memcpy() if copy direction is forwards */
216+ if ( dest <= src ) {
217+ memcpy ( dest , src , len );
218+ return ;
219+ }
220+
236221 /* Assume memmove() is not performance-critical, and perform a
237- * bytewise copy for simplicity.
222+ * bytewise copy backwards for simplicity.
238223 */
239- __asm__ __volatile__ ( "beqz %0, 2f\n\t"
240- "\n1:\n\t"
224+ __asm__ __volatile__ ( "\n1:\n\t"
241225 "addi.d %0, %0, -1\n\t"
242226 "ldx.b %1, %3, %0\n\t"
243227 "stx.b %1, %2, %0\n\t"
244- "bnez %0, 1b\n\t"
245228 "\n2:\n\t"
246- : "=&r" ( discard_offset ),
247- "=&r" ( discard_data )
248- : "r" ( dest ), "r" ( src ), "0" ( len )
229+ "bnez %0, 1b\n\t"
230+ : "=&r" ( offset ), "=&r" ( discard_data )
231+ : "r" ( dest ), "r" ( src ),
232+ "0" ( len )
249233 : "memory" );
250234}
251-
252- /**
253- * Copy (possibly overlapping) memory region
254- *
255- * @v dest Destination region
256- * @v src Source region
257- * @v len Length
258- */
259- void loong64_memmove ( void * dest , const void * src , size_t len ) {
260-
261- if ( dest <= src ) {
262- loong64_memmove_forwards ( dest , src , len );
263- } else {
264- loong64_memmove_backwards ( dest , src , len );
265- }
266- }
0 commit comments