Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 33 additions & 1 deletion include/glaze/json/write.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -495,8 +495,39 @@ namespace glz
data += 2;
++c;
}*/
#define ENABLE_GENERIC_VECTOR 1

#ifdef ENABLE_GENERIC_VECTOR
if (n > 63) {
for (const auto end_m7 = e - 63; c < end_m7;) {
std::memcpy(data, c, 64);
uint64x8_t swar;
std::memcpy(&swar, c, 64);

static constexpr uint64x8_t lo7_mask = repeat_byte64(0b01111111);
const uint64x8_t lo7 = swar & lo7_mask;
const uint64x8_t quote = (lo7 ^ repeat_byte64('"')) + lo7_mask;
const uint64x8_t backslash = (lo7 ^ repeat_byte64('\\')) + lo7_mask;
const uint64x8_t less_32 = (swar & repeat_byte64(0b01100000)) + lo7_mask;
uint64x8_t next = ~((quote & backslash & less_32) | swar);

uint64_t mask = movemask_64(next);
if (mask == 0) {
data += 64;
c += 64;
continue;
}

uint32_t length = countr_zero(mask);
c += length;
data += length;

#if defined(__APPLE__)
std::memcpy(data, &char_escape_table[uint8_t(*c)], 2);
data += 2;
++c;
}
}
#elif defined(__APPLE__)
// This approach is faster when strings don't contain many escapes
// But, this is not faster in the general case
/*if (n > 15) {
Expand Down Expand Up @@ -2081,3 +2112,4 @@ namespace glz
return {buffer_to_file(buffer, file_name)};
}
}

36 changes: 32 additions & 4 deletions include/glaze/util/parse.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

#pragma once

#include <immintrin.h>

#include <algorithm>
#include <bit>
#include <charconv>
Expand Down Expand Up @@ -116,10 +118,37 @@ namespace glz::detail
return t;
}();

consteval uint32_t repeat_byte4(const auto repeat) { return uint32_t(0x01010101u) * uint8_t(repeat); }
[[nodiscard, gnu::always_inline, gnu::const]]
consteval uint32_t repeat_byte4(const auto repeat)
{
return uint32_t(0x01010101u) * uint8_t(repeat);
}

[[nodiscard, gnu::always_inline, gnu::const]]
consteval uint64_t repeat_byte8(const uint8_t repeat)
{
return 0x0101010101010101ull * repeat;
}
using uint64x4_t = uint64_t __attribute__((__vector_size__(32)));
using uint64x8_t = uint64_t __attribute__((__vector_size__(64)));

consteval uint64_t repeat_byte8(const uint8_t repeat) { return 0x0101010101010101ull * repeat; }
[[nodiscard, gnu::always_inline, gnu::const]]
consteval uint64x4_t repeat_byte32(const uint8_t repeat)
{
return uint64x4_t{repeat_byte8(repeat), repeat_byte8(repeat), repeat_byte8(repeat), repeat_byte8(repeat)};
}
[[nodiscard, gnu::always_inline, gnu::const]]
consteval uint64x8_t repeat_byte64(const uint8_t repeat)
{
return uint64x8_t{repeat_byte8(repeat), repeat_byte8(repeat), repeat_byte8(repeat), repeat_byte8(repeat),
repeat_byte8(repeat), repeat_byte8(repeat), repeat_byte8(repeat), repeat_byte8(repeat)};
}

[[nodiscard, gnu::always_inline, gnu::const]]
auto movemask_64(const uint64x8_t v) noexcept -> uint64_t
{
return _mm512_movepi8_mask(v);
}
#if defined(__SIZEOF_INT128__)
consteval __uint128_t repeat_byte16(const uint8_t repeat)
{
Expand Down Expand Up @@ -479,8 +508,7 @@ namespace glz::detail
ctx.error = error_code::unexpected_end;
}
else if (*it == '/') {
while (++it != end && *it != '\n')
;
while (++it != end && *it != '\n');
}
else if (*it == '*') {
while (++it != end) {
Expand Down
Loading