Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

impl(spanner): add uuid type #15023

Merged
merged 10 commits into from
Mar 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions google/cloud/spanner/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,8 @@ add_library(
transaction.cc
transaction.h
update_instance_request_builder.h
uuid.cc
uuid.h
value.cc
value.h
version.cc
Expand Down Expand Up @@ -502,6 +504,7 @@ function (spanner_client_define_tests)
timestamp_test.cc
transaction_test.cc
update_instance_request_builder_test.cc
uuid_test.cc
value_test.cc)

# Export the list of unit tests to a .bzl file so we do not need to maintain
Expand Down
2 changes: 2 additions & 0 deletions google/cloud/spanner/google_cloud_cpp_spanner.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ google_cloud_cpp_spanner_hdrs = [
"tracing_options.h",
"transaction.h",
"update_instance_request_builder.h",
"uuid.h",
"value.h",
"version.h",
"version_info.h",
Expand Down Expand Up @@ -198,6 +199,7 @@ google_cloud_cpp_spanner_srcs = [
"sql_statement.cc",
"timestamp.cc",
"transaction.cc",
"uuid.cc",
"value.cc",
"version.cc",
]
1 change: 1 addition & 0 deletions google/cloud/spanner/spanner_client_unit_tests.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -69,5 +69,6 @@ spanner_client_unit_tests = [
"timestamp_test.cc",
"transaction_test.cc",
"update_instance_request_builder_test.cc",
"uuid_test.cc",
"value_test.cc",
]
150 changes: 150 additions & 0 deletions google/cloud/spanner/uuid.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "google/cloud/spanner/uuid.h"
#include "google/cloud/internal/make_status.h"
#include "absl/strings/str_format.h"
#include "absl/strings/strip.h"
#include <unordered_map>

namespace google {
namespace cloud {
namespace spanner {
GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN
namespace {

// Helper function to parse a single hexadecimal block of a UUID.
// A hexadecimal block is a 16-digit hexadecimal number, which is represented
// as 8 bytes.
StatusOr<std::uint64_t> ParseHexBlock(absl::string_view& str,
absl::string_view original_str) {
constexpr int kUuidNumberOfHexDigits = 32;
constexpr int kMaxUuidBlockLength = 16;
static auto const* char_to_hex = new std::unordered_map<char, std::uint8_t>(
{{'0', 0x00}, {'1', 0x01}, {'2', 0x02}, {'3', 0x03}, {'4', 0x04},
{'5', 0x05}, {'6', 0x06}, {'7', 0x07}, {'8', 0x08}, {'9', 0x09},
{'a', 0x0a}, {'b', 0x0b}, {'c', 0x0c}, {'d', 0x0d}, {'e', 0x0e},
{'f', 0x0f}, {'A', 0x0a}, {'B', 0x0b}, {'C', 0x0c}, {'D', 0x0d},
{'E', 0x0e}, {'F', 0x0f}});
std::uint64_t block = 0;
for (int j = 0; j < kMaxUuidBlockLength; ++j) {
absl::ConsumePrefix(&str, "-");
if (str.empty()) {
return internal::InvalidArgumentError(
absl::StrFormat("UUID must contain %d hexadecimal digits: %s",
kUuidNumberOfHexDigits, original_str),
GCP_ERROR_INFO());
}
auto it = char_to_hex->find(str[0]);
if (it == char_to_hex->end()) {
if (str[0] == '-') {
return internal::InvalidArgumentError(
absl::StrFormat("UUID cannot contain consecutive hyphens: %s",
original_str),
GCP_ERROR_INFO());
}

return internal::InvalidArgumentError(
absl::StrFormat("UUID contains invalid character (%c): %s", str[0],
original_str),
GCP_ERROR_INFO());
}
block = (block << 4) + it->second;
str.remove_prefix(1);
}
return block;
}
} // namespace

Uuid::Uuid(absl::uint128 value) : uuid_(value) {}

Uuid::Uuid(std::uint64_t high_bits, std::uint64_t low_bits)
: Uuid(absl::MakeUint128(high_bits, low_bits)) {}

std::pair<std::uint64_t, std::uint64_t> Uuid::As64BitPair() const {
return std::make_pair(Uint128High64(uuid_), Uint128Low64(uuid_));
}

// TODO(#15043): Refactor to handle all 128 bits at once instead of splitting
// into a pair of unsigned 64-bit integers.
Uuid::operator std::string() const {
constexpr int kUuidStringLen = 36;
constexpr int kChunkLength[] = {8, 4, 4, 4, 12};
auto to_hex = [](std::uint64_t v, int start_index, int end_index, char* out) {
static constexpr char kHexChar[] = {'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
for (int i = start_index; i >= end_index; --i) {
*out++ = kHexChar[(v >> (i * 4)) & 0xf];
}
return start_index - end_index + 1;
};

std::string output;
output.resize(kUuidStringLen);
char* target = const_cast<char*>(output.data());
char* const last = &((output)[output.size()]);
auto bits = Uint128High64(uuid_);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why split uuid_ into two 64-bit values when it could be more clearly treated as the single 128-bit value that it is?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

absl::uint128 isn't a drop in replacement in the existing code (I tried). When we get a std::uint128_t I'm happy to revisit.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know what you tried, but if absl::uint128 didn't work as an integral type then that would be an Abseil bug.

In any case, I'll assert that it does work.

constexpr char kHexDigits[] = "0123456789abcdef";

Uuid::operator std::string() const {
  constexpr char kTemplate[] = "00000000-0000-0000-0000-000000000000";
  char buf[sizeof kTemplate];
  auto uuid = uuid_;
  for (auto j = sizeof buf; j-- != 0;) {
    if (kTemplate[j] != '0') {
      buf[j] = kTemplate[j];
    } else {
      buf[j] = kHexDigits[static_cast<int>(uuid & 0xf)];
      uuid >>= 4;
    }
  }
  return buf;
}

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't try much. I changed to_hex first parameter to an absl::uint128 and got compile errors trying to index into kHexChar
Created #15043 for future improvements as I'm out of time budget to work on this.

int start = 16;
for (auto length : kChunkLength) {
int end = start - length;
target += to_hex(bits, start - 1, end, target);
// Only hyphens write to valid addresses.
if (target < last) *(target++) = '-';
if (end == 0) {
start = 16;
bits = Uint128Low64(uuid_);
} else {
start = end;
}
}
return output;
}

StatusOr<Uuid> MakeUuid(absl::string_view str) {
absl::string_view original_str = str;
// Check and remove optional braces
if (absl::ConsumePrefix(&str, "{")) {
if (!absl::ConsumeSuffix(&str, "}")) {
return internal::InvalidArgumentError(
absl::StrFormat("UUID missing closing '}': %s", original_str),
GCP_ERROR_INFO());
}
}

// Check for leading hyphen after stripping any surrounding braces.
if (absl::StartsWith(str, "-")) {
return internal::InvalidArgumentError(
absl::StrFormat("UUID cannot begin with '-': %s", original_str),
GCP_ERROR_INFO());
}

// TODO(#15043): Refactor to parse all the bits at once.
auto high_bits = ParseHexBlock(str, original_str);
if (!high_bits.ok()) return std::move(high_bits).status();
auto low_bits = ParseHexBlock(str, original_str);
if (!low_bits.ok()) return std::move(low_bits).status();
Comment on lines +133 to +136
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similarly, I would suggest not introducing two 64-bit values here. (Otherwise, why not four 32-bit values, for example?) A single 128-bit value should suffice.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lacking a std::uint128_t and absl::uint128 not behaving exactly like a std::uint128_t, std::uint64_t is the largest integer currently available.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, I don't know what absl::uint128 behavior you think is lacking. For example,

#include <cctype>
#include <cstring>

constexpr char kHexDigits[] = "0123456789abcdef";

StatusOr<Uuid> MakeUuid(absl::string_view str) {
  absl::uint128 uuid = 0;
  auto const original_str = str;

  if (absl::ConsumePrefix(&str, "{")) {
    if (!absl::ConsumeSuffix(&str, "}")) {
      return internal::InvalidArgumentError(
          absl::StrFormat("UUID missing closing '}': %s", original_str),
          GCP_ERROR_INFO());
    }
  }
  if (absl::StartsWith(str, "-")) {
    return internal::InvalidArgumentError(
        absl::StrFormat("UUID cannot begin with '-': %s", original_str),
        GCP_ERROR_INFO());
  }

  constexpr int kUuidNumberOfHexDigits = 32;
  for (int j = 0; j != kUuidNumberOfHexDigits; ++j) {
    absl::ConsumePrefix(&str, "-");
    if (str.empty()) {
      return internal::InvalidArgumentError(
          absl::StrFormat("UUID must contain %d hexadecimal digits: %s",
                          kUuidNumberOfHexDigits, original_str),
          GCP_ERROR_INFO());
    }
    auto const* dp = std::strchr(
        kHexDigits, std::tolower(static_cast<unsigned char>(str[0])));
    if (dp == nullptr) {
      if (str[0] == '-') {
        return internal::InvalidArgumentError(
            absl::StrFormat("UUID cannot contain consecutive hyphens: %s",
                            original_str),
            GCP_ERROR_INFO());
      }
      return internal::InvalidArgumentError(
          absl::StrFormat("UUID contains invalid character (%c): %s", str[0],
                          original_str),
          GCP_ERROR_INFO());
    }
    str.remove_prefix(1);
    uuid <<= 4;
    uuid += dp - kHexDigits;
  }

  if (!str.empty()) {
    return internal::InvalidArgumentError(
        absl::StrFormat("Extra characters found after parsing UUID: %s", str),
        GCP_ERROR_INFO());
  }
  return Uuid{uuid};
}

No "blocks" (or hash maps). Just a simple per-nibble loop.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Created #15043 for future improvements as I'm out of time budget to work on this.


if (!str.empty()) {
return internal::InvalidArgumentError(
absl::StrFormat("Extra characters found after parsing UUID: %s", str),
GCP_ERROR_INFO());
}

return Uuid(absl::MakeUint128(*high_bits, *low_bits));
}

GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END
} // namespace spanner
} // namespace cloud
} // namespace google
120 changes: 120 additions & 0 deletions google/cloud/spanner/uuid.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_SPANNER_UUID_H
#define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_SPANNER_UUID_H

#include "google/cloud/spanner/version.h"
#include "google/cloud/status_or.h"
#include "absl/numeric/int128.h"
#include "absl/strings/string_view.h"
#include <cstdint>
#include <iosfwd>
#include <string>
#include <utility>

namespace google {
namespace cloud {
namespace spanner {
GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN

/**
* A representation of the Spanner UUID type: A fixed size 16 byte value
* that can be represented as a 32-digit hexadecimal string.
*
* @see https://cloud.google.com/spanner/docs/data-types#uuid_type
*/
class Uuid {
public:
/// Default construction yields a zero value UUID.
Uuid() = default;

/// Construct a UUID from one unsigned 128-bit integer.
explicit Uuid(absl::uint128 value);

/// Construct a UUID from two unsigned 64-bit pieces.
Uuid(std::uint64_t high_bits, std::uint64_t low_bits);

/// @name Regular value type, supporting copy, assign, move.
///@{
Uuid(Uuid&&) = default;
Uuid& operator=(Uuid&&) = default;
Uuid(Uuid const&) = default;
Uuid& operator=(Uuid const&) = default;
///@}

/// @name Relational operators
///
///@{
friend bool operator==(Uuid const& lhs, Uuid const& rhs) {
return lhs.uuid_ == rhs.uuid_;
}
friend bool operator!=(Uuid const& lhs, Uuid const& rhs) {
return !(lhs == rhs);
}
friend bool operator<(Uuid const& lhs, Uuid const& rhs) {
return lhs.uuid_ < rhs.uuid_;
}
friend bool operator<=(Uuid const& lhs, Uuid const& rhs) {
return !(rhs < lhs);
}
friend bool operator>=(Uuid const& lhs, Uuid const& rhs) {
return !(lhs < rhs);
}
friend bool operator>(Uuid const& lhs, Uuid const& rhs) { return rhs < lhs; }
///@}

/// @name Returns a pair of unsigned 64-bit integers representing the UUID.
std::pair<std::uint64_t, std::uint64_t> As64BitPair() const;

/// @name Conversion to unsigned 128-bit integer representation.
explicit operator absl::uint128() const { return uuid_; }

/// @name Conversion to a lower case string formatted as:
/// [8 hex-digits]-[4 hex-digits]-[4 hex-digits]-[4 hex-digits]-[12
/// hex-digits]
/// Example: 0b6ed04c-a16d-fc46-5281-7f9978c13738
explicit operator std::string() const;

/// @name Output streaming
friend std::ostream& operator<<(std::ostream& os, Uuid uuid) {
return os << std::string(uuid);
}

private:
absl::uint128 uuid_ = 0;
};

/**
* Parses a textual representation a `Uuid` from a string of hexadecimal digits.
* Returns an error if unable to parse the given input.
*
* Acceptable input strings must consist of 32 hexadecimal digits: [0-9a-fA-F].
* Optional curly braces are allowed around the entire sequence of digits as are
* hyphens between any pair of hexadecimal digits.
*
* Example acceptable inputs:
* - {0b6ed04c-a16d-fc46-5281-7f9978c13738}
* - 0b6ed04ca16dfc4652817f9978c13738
* - 7Bf8-A7b8-1917-1919-2625-F208-c582-4254
* - {DECAFBAD-DEAD-FADE-CAFE-FEEDFACEBEEF}
*/
StatusOr<Uuid> MakeUuid(absl::string_view s);

GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END
} // namespace spanner
} // namespace cloud
} // namespace google

#endif // GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_SPANNER_UUID_H
Loading
Loading