Skip to content

Commit ecf9a0d

Browse files
committed
Bug 1940795: Don't tear aligned subwords in AtomicMemcpy. r=spidermonkey-reviewers,jandem
Reads/writes to integer typed arrays mustn't tear, so they can't be split into single byte operations. Replace single byte copy loops with `AtomicCopy{Down,Up}NoTearIfAlignedUnsynchronized`, which performs additional checks when aligned word or dword copying is needed. Test262 tests: <tc39/test262#4369> Differential Revision: https://phabricator.services.mozilla.com/D233710
1 parent 243bf79 commit ecf9a0d

File tree

2 files changed

+145
-19
lines changed

2 files changed

+145
-19
lines changed

js/src/jit/GenerateAtomicOperations.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -664,6 +664,9 @@ def gen_copy(fun_name, cpp_type, size, unroll, direction):
664664
if size == 1:
665665
insns += fmt_insn("movb OFFSET(%[src]), %[scratch]")
666666
insns += fmt_insn("movb %[scratch], OFFSET(%[dst])")
667+
elif size == 2:
668+
insns += fmt_insn("movw OFFSET(%[src]), %[scratch]")
669+
insns += fmt_insn("movw %[scratch], OFFSET(%[dst])")
667670
elif size == 4:
668671
insns += fmt_insn("movl OFFSET(%[src]), %[scratch]")
669672
insns += fmt_insn("movl %[scratch], OFFSET(%[dst])")
@@ -675,6 +678,12 @@ def gen_copy(fun_name, cpp_type, size, unroll, direction):
675678
if size == 1:
676679
insns += fmt_insn("ldrb %w[scratch], [%x[src], OFFSET]")
677680
insns += fmt_insn("strb %w[scratch], [%x[dst], OFFSET]")
681+
elif size == 2:
682+
insns += fmt_insn("ldrh %w[scratch], [%x[src], OFFSET]")
683+
insns += fmt_insn("strh %w[scratch], [%x[dst], OFFSET]")
684+
elif size == 4:
685+
insns += fmt_insn("ldr %w[scratch], [%x[src], OFFSET]")
686+
insns += fmt_insn("str %w[scratch], [%x[dst], OFFSET]")
678687
else:
679688
assert size == 8
680689
insns += fmt_insn("ldr %x[scratch], [%x[src], OFFSET]")
@@ -683,6 +692,9 @@ def gen_copy(fun_name, cpp_type, size, unroll, direction):
683692
if size == 1:
684693
insns += fmt_insn("ldrb %[scratch], [%[src], #OFFSET]")
685694
insns += fmt_insn("strb %[scratch], [%[dst], #OFFSET]")
695+
elif size == 2:
696+
insns += fmt_insn("ldrh %[scratch], [%[src], #OFFSET]")
697+
insns += fmt_insn("strh %[scratch], [%[dst], #OFFSET]")
686698
else:
687699
assert size == 4
688700
insns += fmt_insn("ldr %[scratch], [%[src], #OFFSET]")
@@ -864,7 +876,9 @@ def generate_atomics_header(c_out):
864876
contents += gen_copy(
865877
"AtomicCopyWordUnsynchronized", "uintptr_t", wordsize, 1, "down"
866878
)
867-
contents += gen_copy("AtomicCopyByteUnsynchronized", "uint8_t", 1, 1, "down")
879+
contents += gen_copy("AtomicCopy32Unsynchronized", "uint32_t", 4, 1, "down")
880+
contents += gen_copy("AtomicCopy16Unsynchronized", "uint16_t", 2, 1, "down")
881+
contents += gen_copy("AtomicCopy8Unsynchronized", "uint8_t", 1, 1, "down")
868882

869883
contents += "\n"
870884
contents += (

js/src/jit/shared/AtomicOperations-shared-jit.cpp

+130-18
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,26 @@
44
* License, v. 2.0. If a copy of the MPL was not distributed with this
55
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
66

7+
#include "mozilla/Assertions.h"
8+
#include "mozilla/Attributes.h"
9+
#include "mozilla/MathAlgorithms.h"
10+
11+
#include <atomic>
12+
#include <stddef.h>
13+
#include <stdint.h>
14+
#include <stdlib.h>
15+
#include <tuple>
16+
#include <utility>
17+
718
#include "jit/AtomicOperations.h"
19+
#include "js/GCAPI.h"
820

921
#if defined(__arm__)
1022
# include "jit/arm/Architecture-arm.h"
1123
#endif
1224

1325
#ifdef JS_HAVE_GENERATED_ATOMIC_OPS
1426

15-
# include <atomic>
16-
17-
# include "js/GCAPI.h"
18-
1927
using namespace js;
2028
using namespace js::jit;
2129

@@ -70,6 +78,64 @@ void AtomicCompilerFence() {
7078
}
7179
# endif
7280

81+
/**
82+
* Return `true` if all pointers are aligned to `Alignment`.
83+
*/
84+
template <size_t Alignment>
85+
static inline bool CanCopyAligned(const uint8_t* dest, const uint8_t* src,
86+
const uint8_t* lim) {
87+
static_assert(mozilla::IsPowerOfTwo(Alignment));
88+
return ((uintptr_t(dest) | uintptr_t(src) | uintptr_t(lim)) &
89+
(Alignment - 1)) == 0;
90+
}
91+
92+
/**
93+
* Return `true` if both pointers have the same alignment and can be aligned to
94+
* `Alignment`.
95+
*/
96+
template <size_t Alignment>
97+
static inline bool CanAlignTo(const uint8_t* dest, const uint8_t* src) {
98+
static_assert(mozilla::IsPowerOfTwo(Alignment));
99+
return ((uintptr_t(dest) ^ uintptr_t(src)) & (Alignment - 1)) == 0;
100+
}
101+
102+
/**
103+
* Copy a datum smaller than `WORDSIZE`. Prevents tearing when `dest` and `src`
104+
* are both aligned.
105+
*
106+
* No tearing is a requirement for integer TypedArrays.
107+
*
108+
* https://tc39.es/ecma262/#sec-isnotearconfiguration
109+
* https://tc39.es/ecma262/#sec-tear-free-aligned-reads
110+
* https://tc39.es/ecma262/#sec-valid-executions
111+
*/
112+
static MOZ_ALWAYS_INLINE auto AtomicCopyDownNoTearIfAlignedUnsynchronized(
113+
uint8_t* dest, const uint8_t* src, const uint8_t* srcEnd) {
114+
MOZ_ASSERT(src <= srcEnd);
115+
MOZ_ASSERT(size_t(srcEnd - src) < WORDSIZE);
116+
117+
if (WORDSIZE > 4 && CanCopyAligned<4>(dest, src, srcEnd)) {
118+
static_assert(WORDSIZE <= 8, "copies 32-bits at most once");
119+
120+
if (src < srcEnd) {
121+
AtomicCopy32Unsynchronized(dest, src);
122+
dest += 4;
123+
src += 4;
124+
}
125+
} else if (CanCopyAligned<2>(dest, src, srcEnd)) {
126+
while (src < srcEnd) {
127+
AtomicCopy16Unsynchronized(dest, src);
128+
dest += 2;
129+
src += 2;
130+
}
131+
} else {
132+
while (src < srcEnd) {
133+
AtomicCopy8Unsynchronized(dest++, src++);
134+
}
135+
}
136+
return std::pair{dest, src};
137+
}
138+
73139
void AtomicMemcpyDownUnsynchronized(uint8_t* dest, const uint8_t* src,
74140
size_t nbytes) {
75141
JS::AutoSuppressGCAnalysis nogc;
@@ -85,12 +151,14 @@ void AtomicMemcpyDownUnsynchronized(uint8_t* dest, const uint8_t* src,
85151
void (*copyBlock)(uint8_t* dest, const uint8_t* src);
86152
void (*copyWord)(uint8_t* dest, const uint8_t* src);
87153

88-
if (((uintptr_t(dest) ^ uintptr_t(src)) & WORDMASK) == 0) {
154+
if (CanAlignTo<WORDSIZE>(dest, src)) {
89155
const uint8_t* cutoff = (const uint8_t*)RoundUp(uintptr_t(src), WORDSIZE);
90156
MOZ_ASSERT(cutoff <= lim); // because nbytes >= WORDSIZE
91-
while (src < cutoff) {
92-
AtomicCopyByteUnsynchronized(dest++, src++);
93-
}
157+
158+
// Copy initial bytes to align to word size.
159+
std::tie(dest, src) =
160+
AtomicCopyDownNoTearIfAlignedUnsynchronized(dest, src, cutoff);
161+
94162
copyBlock = AtomicCopyBlockDownUnsynchronized;
95163
copyWord = AtomicCopyWordUnsynchronized;
96164
} else if (UnalignedAccessesAreOK()) {
@@ -118,11 +186,46 @@ void AtomicMemcpyDownUnsynchronized(uint8_t* dest, const uint8_t* src,
118186
}
119187
}
120188

121-
// Byte copy any remaining tail.
189+
// Copy any remaining tail.
122190

123-
while (src < lim) {
124-
AtomicCopyByteUnsynchronized(dest++, src++);
191+
AtomicCopyDownNoTearIfAlignedUnsynchronized(dest, src, lim);
192+
}
193+
194+
/**
195+
* Copy a datum smaller than `WORDSIZE`. Prevents tearing when `dest` and `src`
196+
* are both aligned.
197+
*
198+
* No tearing is a requirement for integer TypedArrays.
199+
*
200+
* https://tc39.es/ecma262/#sec-isnotearconfiguration
201+
* https://tc39.es/ecma262/#sec-tear-free-aligned-reads
202+
* https://tc39.es/ecma262/#sec-valid-executions
203+
*/
204+
static MOZ_ALWAYS_INLINE auto AtomicCopyUpNoTearIfAlignedUnsynchronized(
205+
uint8_t* dest, const uint8_t* src, const uint8_t* srcBegin) {
206+
MOZ_ASSERT(src >= srcBegin);
207+
MOZ_ASSERT(size_t(src - srcBegin) < WORDSIZE);
208+
209+
if (WORDSIZE > 4 && CanCopyAligned<4>(dest, src, srcBegin)) {
210+
static_assert(WORDSIZE <= 8, "copies 32-bits at most once");
211+
212+
if (src > srcBegin) {
213+
dest -= 4;
214+
src -= 4;
215+
AtomicCopy32Unsynchronized(dest, src);
216+
}
217+
} else if (CanCopyAligned<2>(dest, src, srcBegin)) {
218+
while (src > srcBegin) {
219+
dest -= 2;
220+
src -= 2;
221+
AtomicCopy16Unsynchronized(dest, src);
222+
}
223+
} else {
224+
while (src > srcBegin) {
225+
AtomicCopy8Unsynchronized(--dest, --src);
226+
}
125227
}
228+
return std::pair{dest, src};
126229
}
127230

128231
void AtomicMemcpyUpUnsynchronized(uint8_t* dest, const uint8_t* src,
@@ -134,16 +237,23 @@ void AtomicMemcpyUpUnsynchronized(uint8_t* dest, const uint8_t* src,
134237
src += nbytes;
135238
dest += nbytes;
136239

240+
// Set up bulk copying. The cases are ordered the way they are on the
241+
// assumption that if we can achieve aligned copies even with a little
242+
// preprocessing then that is better than unaligned copying on a platform
243+
// that supports it.
244+
137245
if (nbytes >= WORDSIZE) {
138246
void (*copyBlock)(uint8_t* dest, const uint8_t* src);
139247
void (*copyWord)(uint8_t* dest, const uint8_t* src);
140248

141-
if (((uintptr_t(dest) ^ uintptr_t(src)) & WORDMASK) == 0) {
249+
if (CanAlignTo<WORDSIZE>(dest, src)) {
142250
const uint8_t* cutoff = (const uint8_t*)(uintptr_t(src) & ~WORDMASK);
143251
MOZ_ASSERT(cutoff >= lim); // Because nbytes >= WORDSIZE
144-
while (src > cutoff) {
145-
AtomicCopyByteUnsynchronized(--dest, --src);
146-
}
252+
253+
// Copy initial bytes to align to word size.
254+
std::tie(dest, src) =
255+
AtomicCopyUpNoTearIfAlignedUnsynchronized(dest, src, cutoff);
256+
147257
copyBlock = AtomicCopyBlockUpUnsynchronized;
148258
copyWord = AtomicCopyWordUnsynchronized;
149259
} else if (UnalignedAccessesAreOK()) {
@@ -154,6 +264,8 @@ void AtomicMemcpyUpUnsynchronized(uint8_t* dest, const uint8_t* src,
154264
copyWord = AtomicCopyUnalignedWordUpUnsynchronized;
155265
}
156266

267+
// Bulk copy, first larger blocks and then individual words.
268+
157269
const uint8_t* blocklim = src - ((src - lim) & ~BLOCKMASK);
158270
while (src > blocklim) {
159271
dest -= BLOCKSIZE;
@@ -169,9 +281,9 @@ void AtomicMemcpyUpUnsynchronized(uint8_t* dest, const uint8_t* src,
169281
}
170282
}
171283

172-
while (src > lim) {
173-
AtomicCopyByteUnsynchronized(--dest, --src);
174-
}
284+
// Copy any remaining tail.
285+
286+
AtomicCopyUpNoTearIfAlignedUnsynchronized(dest, src, lim);
175287
}
176288

177289
} // namespace jit

0 commit comments

Comments
 (0)