Description
Slicing a memory does not modify the underlying object it points to. It only modifies the index/length. However, it does create a new memory and hence has to set the object field.
This causes memory slicing to be slower since the JIT injects a call to write barrier. It makes methods like below no longer leaf methods, and hence a stack frame gets added as well.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private void Advance(int count)
{
Debug.Assert(count >= 0 && _buffered <= int.MaxValue - count);
_buffered += count;
// Unsafe, do not do this.
//UnsafeMemory<byte> temp = Unsafe.As<Memory<byte>, UnsafeMemory<byte>>(ref _buffer).Slice(count);
//_buffer = Unsafe.As<UnsafeMemory<byte>, Memory<byte>>(ref temp);
_buffer = _buffer.Slice(count);
}
Can the JIT elide the call to the write barrier here? Are there concerns with the GC moving the object?
From @AndyAyersMS:
Though perhaps it doesn't matter as the pointer the jit has and the pointer the struct has should have the same value whether or not GC happens in between
Disassembly:
https://www.diffchecker.com/zbiwhYXM
Note the call here:
call coreclr!coreclr_shutdown_2+0xc910
I believe this is CoreCLR!JIT_WriteBarrier
00007ff9`65e76820 System.Text.Json.Perf_MemSlice.MemSlice()
00007ff9`65e76820 57 push rdi
00007ff9`65e76821 56 push rsi
00007ff9`65e76822 53 push rbx
00007ff9`65e76823 4883ec20 sub rsp,20h
00007ff9`65e76827 488bf2 mov rsi,rdx
00007ff9`65e7682a 3909 cmp dword ptr [rcx],ecx
00007ff9`65e7682c 4883c110 add rcx,10h
00007ff9`65e76830 8b790c mov edi,dword ptr [rcx+0Ch]
00007ff9`65e76833 83ff01 cmp edi,1
00007ff9`65e76836 7262 jb 00007ff9`65e7689a
00007ff9`65e76838 488b11 mov rdx,qword ptr [rcx]
00007ff9`65e7683b 8b5908 mov ebx,dword ptr [rcx+8]
00007ff9`65e7683e ffc3 inc ebx
00007ff9`65e76840 ffcf dec edi
00007ff9`65e76842 83ff01 cmp edi,1
00007ff9`65e76845 725e jb 00007ff9`65e768a5
00007ff9`65e76847 ffc3 inc ebx
00007ff9`65e76849 ffcf dec edi
00007ff9`65e7684b 83ff01 cmp edi,1
00007ff9`65e7684e 7260 jb 00007ff9`65e768b0
00007ff9`65e76850 ffc3 inc ebx
00007ff9`65e76852 ffcf dec edi
00007ff9`65e76854 83ff01 cmp edi,1
00007ff9`65e76857 7262 jb 00007ff9`65e768bb
00007ff9`65e76859 ffc3 inc ebx
00007ff9`65e7685b ffcf dec edi
00007ff9`65e7685d 83ff01 cmp edi,1
00007ff9`65e76860 7264 jb 00007ff9`65e768c6
00007ff9`65e76862 ffc3 inc ebx
00007ff9`65e76864 ffcf dec edi
00007ff9`65e76866 83ff01 cmp edi,1
00007ff9`65e76869 7266 jb 00007ff9`65e768d1
00007ff9`65e7686b ffc3 inc ebx
00007ff9`65e7686d ffcf dec edi
00007ff9`65e7686f 83ff01 cmp edi,1
00007ff9`65e76872 7268 jb 00007ff9`65e768dc
00007ff9`65e76874 ffc3 inc ebx
00007ff9`65e76876 ffcf dec edi
00007ff9`65e76878 83ff01 cmp edi,1
00007ff9`65e7687b 726a jb 00007ff9`65e768e7
00007ff9`65e7687d ffc3 inc ebx
00007ff9`65e7687f ffcf dec edi
00007ff9`65e76881 488bce mov rcx,rsi
00007ff9`65e76884 e887dd8e5f call coreclr!coreclr_shutdown_2+0xc910 (00007ff9`c5764610)
00007ff9`65e76889 895e08 mov dword ptr [rsi+8],ebx
00007ff9`65e7688c 897e0c mov dword ptr [rsi+0Ch],edi
00007ff9`65e7688f 488bc6 mov rax,rsi
00007ff9`65e76892 4883c420 add rsp,20h
00007ff9`65e76896 5b pop rbx
00007ff9`65e76897 5e pop rsi
00007ff9`65e76898 5f pop rdi
00007ff9`65e76899 c3 **ret**
00007ff9`65e56af0 System.Text.Json.Perf_MemSlice.MemUnsafeSlice()
00007ff9`65e56af0 4883ec28 sub rsp,28h
00007ff9`65e56af4 90 nop
00007ff9`65e56af5 3909 cmp dword ptr [rcx],ecx
00007ff9`65e56af7 4883c110 add rcx,10h
00007ff9`65e56afb 8b410c mov eax,dword ptr [rcx+0Ch]
00007ff9`65e56afe 83f801 cmp eax,1
00007ff9`65e56b01 725a jb 00007ff9`65e56b5d
00007ff9`65e56b03 4c8b01 mov r8,qword ptr [rcx]
00007ff9`65e56b06 8b4908 mov ecx,dword ptr [rcx+8]
00007ff9`65e56b09 ffc1 inc ecx
00007ff9`65e56b0b ffc8 dec eax
00007ff9`65e56b0d 83f801 cmp eax,1
00007ff9`65e56b10 7251 jb 00007ff9`65e56b63
00007ff9`65e56b12 ffc1 inc ecx
00007ff9`65e56b14 ffc8 dec eax
00007ff9`65e56b16 83f801 cmp eax,1
00007ff9`65e56b19 724e jb 00007ff9`65e56b69
00007ff9`65e56b1b ffc1 inc ecx
00007ff9`65e56b1d ffc8 dec eax
00007ff9`65e56b1f 83f801 cmp eax,1
00007ff9`65e56b22 724b jb 00007ff9`65e56b6f
00007ff9`65e56b24 ffc1 inc ecx
00007ff9`65e56b26 ffc8 dec eax
00007ff9`65e56b28 83f801 cmp eax,1
00007ff9`65e56b2b 7248 jb 00007ff9`65e56b75
00007ff9`65e56b2d ffc1 inc ecx
00007ff9`65e56b2f ffc8 dec eax
00007ff9`65e56b31 83f801 cmp eax,1
00007ff9`65e56b34 7245 jb 00007ff9`65e56b7b
00007ff9`65e56b36 ffc1 inc ecx
00007ff9`65e56b38 ffc8 dec eax
00007ff9`65e56b3a 83f801 cmp eax,1
00007ff9`65e56b3d 7242 jb 00007ff9`65e56b81
00007ff9`65e56b3f ffc1 inc ecx
00007ff9`65e56b41 ffc8 dec eax
00007ff9`65e56b43 83f801 cmp eax,1
00007ff9`65e56b46 723f jb 00007ff9`65e56b87
00007ff9`65e56b48 ffc1 inc ecx
00007ff9`65e56b4a ffc8 dec eax
00007ff9`65e56b4c 4c8902 mov qword ptr [rdx],r8
00007ff9`65e56b4f 894a08 mov dword ptr [rdx+8],ecx
00007ff9`65e56b52 89420c mov dword ptr [rdx+0Ch],eax
00007ff9`65e56b55 488bc2 mov rax,rdx
00007ff9`65e56b58 4883c428 add rsp,28h
00007ff9`65e56b5c c3 ret
00007ff9`65e46af0 System.Text.Json.Perf_MemSlice.SpanSlice()
00007ff9`65e46af0 4883ec28 sub rsp,28h
00007ff9`65e46af4 90 nop
00007ff9`65e46af5 488b4108 mov rax,qword ptr [rcx+8]
00007ff9`65e46af9 4885c0 test rax,rax
00007ff9`65e46afc 7465 je 00007ff9`65e46b63
00007ff9`65e46afe 8b4808 mov ecx,dword ptr [rax+8]
00007ff9`65e46b01 83f901 cmp ecx,1
00007ff9`65e46b04 7263 jb 00007ff9`65e46b69
00007ff9`65e46b06 4883c010 add rax,10h
00007ff9`65e46b0a ffc9 dec ecx
00007ff9`65e46b0c 48ffc0 inc rax
00007ff9`65e46b0f 83f901 cmp ecx,1
00007ff9`65e46b12 725b jb 00007ff9`65e46b6f
00007ff9`65e46b14 ffc9 dec ecx
00007ff9`65e46b16 48ffc0 inc rax
00007ff9`65e46b19 83f901 cmp ecx,1
00007ff9`65e46b1c 7257 jb 00007ff9`65e46b75
00007ff9`65e46b1e ffc9 dec ecx
00007ff9`65e46b20 48ffc0 inc rax
00007ff9`65e46b23 83f901 cmp ecx,1
00007ff9`65e46b26 7253 jb 00007ff9`65e46b7b
00007ff9`65e46b28 ffc9 dec ecx
00007ff9`65e46b2a 48ffc0 inc rax
00007ff9`65e46b2d 83f901 cmp ecx,1
00007ff9`65e46b30 724f jb 00007ff9`65e46b81
00007ff9`65e46b32 ffc9 dec ecx
00007ff9`65e46b34 48ffc0 inc rax
00007ff9`65e46b37 83f901 cmp ecx,1
00007ff9`65e46b3a 724b jb 00007ff9`65e46b87
00007ff9`65e46b3c ffc9 dec ecx
00007ff9`65e46b3e 48ffc0 inc rax
00007ff9`65e46b41 83f901 cmp ecx,1
00007ff9`65e46b44 7247 jb 00007ff9`65e46b8d
00007ff9`65e46b46 ffc9 dec ecx
00007ff9`65e46b48 48ffc0 inc rax
00007ff9`65e46b4b 83f901 cmp ecx,1
00007ff9`65e46b4e 7243 jb 00007ff9`65e46b93
00007ff9`65e46b50 ffc9 dec ecx
00007ff9`65e46b52 48ffc0 inc rax
00007ff9`65e46b55 488902 mov qword ptr [rdx],rax
00007ff9`65e46b58 894a08 mov dword ptr [rdx+8],ecx
00007ff9`65e46b5b 488bc2 mov rax,rdx
00007ff9`65e46b5e 4883c428 add rsp,28h
00007ff9`65e46b62 c3 ret
Benchmark:
public class Perf_MemSlice
{
byte[] _array;
Memory<byte> _memory;
[GlobalSetup]
public void Setup()
{
_array = new byte[128];
_memory = _array;
}
[Benchmark]
public Memory<byte> MemSlice()
{
Memory<byte> memory = _memory.Slice(1);
memory = memory.Slice(1);
memory = memory.Slice(1);
memory = memory.Slice(1);
memory = memory.Slice(1);
memory = memory.Slice(1);
memory = memory.Slice(1);
memory = memory.Slice(1);
return memory;
}
[Benchmark]
public Memory<byte> MemUnsafeSlice()
{
UnsafeMemory<byte> memory = Unsafe.As<Memory<byte>, UnsafeMemory<byte>>(ref _memory).Slice(1);
memory = memory.Slice(1);
memory = memory.Slice(1);
memory = memory.Slice(1);
memory = memory.Slice(1);
memory = memory.Slice(1);
memory = memory.Slice(1);
memory = memory.Slice(1);
return Unsafe.As<UnsafeMemory<byte>, Memory<byte>>(ref memory);
}
[Benchmark]
public Span<byte> SpanSlice()
{
Span<byte> span = _array.AsSpan(1);
span = span.Slice(1);
span = span.Slice(1);
span = span.Slice(1);
span = span.Slice(1);
span = span.Slice(1);
span = span.Slice(1);
span = span.Slice(1);
return span;
}
}
// Unsafe hack to measure what perf we can get if we don't have the write barrier.
// Courtesy of Levi
public struct UnsafeMemory<T>
{
private readonly IntPtr _object;
private readonly int _index;
private readonly int _length;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public UnsafeMemory<T> Slice(int start)
{
if ((uint)start > (uint)_length)
{
ThrowArgumentOutOfRangeException();
}
// It is expected for _index + start to be negative if the memory is already pre-pinned.
return new UnsafeMemory<T>(_object, _index + start, _length - start);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal UnsafeMemory(IntPtr obj, int start, int length)
{
// No validation performed in release builds; caller must provide any necessary validation.
_object = obj;
_index = start;
_length = length;
}
internal static void ThrowArgumentOutOfRangeException()
{
throw new ArgumentOutOfRangeException();
}
}
cc @AndyAyersMS, @CarolEidt, @davidfowl, @jkotas
category:cq
theme:barriers
skill-level:expert
cost:large