Skip to content
Open
2 changes: 2 additions & 0 deletions components/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,8 @@ set(SOURCE_FILES_unitTest
src/clp/ffi/ir_stream/IrUnitType.hpp
src/clp/ffi/ir_stream/KvIrDeserializerImpl.cpp
src/clp/ffi/ir_stream/KvIrDeserializerImpl.hpp
src/clp/ffi/ir_stream/UnstructuredIrDeserializerImpl.cpp
src/clp/ffi/ir_stream/UnstructuredIrDeserializerImpl.hpp
src/clp/ffi/ir_stream/ir_unit_deserialization_methods.cpp
src/clp/ffi/ir_stream/ir_unit_deserialization_methods.hpp
src/clp/ffi/ir_stream/protocol_constants.hpp
Expand Down
15 changes: 11 additions & 4 deletions components/core/src/clp/ffi/SchemaTree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,16 +216,23 @@ class SchemaTree {
*/
class NodeLocator {
public:
NodeLocator(Node::id_t parent_id, std::string_view key_name, Node::Type type)
constexpr NodeLocator(Node::id_t parent_id, std::string_view key_name, Node::Type type)
: m_parent_id{parent_id},
m_key_name{key_name},
m_type{type} {}

[[nodiscard]] auto get_parent_id() const -> Node::id_t { return m_parent_id; }
constexpr ~NodeLocator() = default;

[[nodiscard]] auto get_key_name() const -> std::string_view { return m_key_name; }
constexpr NodeLocator(NodeLocator const&) noexcept = default;
constexpr NodeLocator(NodeLocator&&) noexcept = default;
constexpr auto operator=(NodeLocator const&) noexcept -> NodeLocator& = default;
constexpr auto operator=(NodeLocator&&) noexcept -> NodeLocator& = default;

[[nodiscard]] constexpr auto get_parent_id() const -> Node::id_t { return m_parent_id; }

[[nodiscard]] constexpr auto get_key_name() const -> std::string_view { return m_key_name; }

[[nodiscard]] auto get_type() const -> Node::Type { return m_type; }
[[nodiscard]] constexpr auto get_type() const -> Node::Type { return m_type; }

private:
Node::id_t m_parent_id;
Expand Down
73 changes: 50 additions & 23 deletions components/core/src/clp/ffi/ir_stream/Deserializer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,28 @@
#define CLP_FFI_IR_STREAM_DESERIALIZER_HPP

#include <cstddef>
#include <cstdint>
#include <memory>
#include <string>
#include <system_error>
#include <utility>
#include <vector>

#include <nlohmann/json.hpp>
#include <nlohmann/json_fwd.hpp>
#include <ystdlib/error_handling/Result.hpp>

#include "../../ir/types.hpp"
#include "../../ReaderInterface.hpp"
#include "../../time_types.hpp"
#include "../SchemaTree.hpp"
#include "DeserializerImpl.hpp"
#include "IrDeserializationError.hpp"
#include "IrUnitHandlerReq.hpp"
#include "IrUnitType.hpp"
#include "KvIrDeserializerImpl.hpp"
#include "protocol_constants.hpp"
#include "search/AstEvaluationResult.hpp"
#include "search/QueryHandlerReq.hpp"
#include "UnstructuredIrDeserializerImpl.hpp"
#include "utils.hpp"

// This include has a circular dependency with the `.inc` file.
Expand Down Expand Up @@ -175,14 +176,19 @@ class Deserializer {
* @param ir_unit_handler
* @param query_handler
* @return A result containing the deserializer or an error code indicating the failure:
* - std::errc::result_out_of_range if the IR stream is truncated
* - std::errc::protocol_error if the IR stream is corrupted
* - std::errc::protocol_not_supported if either:
* - the IR stream contains an unsupported metadata format;
* - the IR stream's version is unsupported;
* - or the IR stream's user-defined metadata is not a JSON object.
* - IrDeserializationErrorEnum::UnsupportedMetadataFormat if:
* - the IR stream's preamble metadata encoding is not JSON;
* - the metadata payload cannot be parsed as JSON;
* - the metadata object does not contain a string value for
* `cProtocol::Metadata::VersionKey`;
* - or for a supported key-value pair IR protocol version,
* `cProtocol::Metadata::UserDefinedMetadataKey` is present but its value is not a JSON
* object.
* - IrDeserializationErrorEnum::UnsupportedVersion if the protocol version is neither supported
* nor backward compatible.
* - Forwards `deserialize_preamble`'s return values on failure.
* - Forwards `get_encoding_type`'s return values on failure.
* - Forwards `UnstructuredIrDeserializerImpl::create`'s return values on failure.
*/
[[nodiscard]] static auto create_generic(
ReaderInterface& reader,
Expand Down Expand Up @@ -244,38 +250,56 @@ auto Deserializer<IrUnitHandlerType, QueryHandlerType>::create_generic(
IrUnitHandlerType ir_unit_handler,
QueryHandlerType query_handler
) -> ystdlib::error_handling::Result<Deserializer> {
[[maybe_unused]] auto const encoding_type{
YSTDLIB_ERROR_HANDLING_TRYX(get_encoding_type(reader))
};
auto const encoding_type{YSTDLIB_ERROR_HANDLING_TRYX(get_encoding_type(reader))};
auto const [metadata_type, metadata]{YSTDLIB_ERROR_HANDLING_TRYX(deserialize_preamble(reader))};

if (cProtocol::Metadata::EncodingJson != metadata_type) {
return std::errc::protocol_not_supported;
return IrDeserializationError{IrDeserializationErrorEnum::UnsupportedMetadataFormat};
}

auto metadata_json = nlohmann::json::parse(metadata, nullptr, false);
if (metadata_json.is_discarded()) {
return std::errc::protocol_error;
return IrDeserializationError{IrDeserializationErrorEnum::UnsupportedMetadataFormat};
}
auto const version_iter{metadata_json.find(cProtocol::Metadata::VersionKey)};
if (metadata_json.end() == version_iter || false == version_iter->is_string()) {
return std::errc::protocol_error;
return IrDeserializationError{IrDeserializationErrorEnum::UnsupportedMetadataFormat};
}
auto const version = version_iter->get_ref<nlohmann::json::string_t&>();
if (ffi::ir_stream::IRProtocolErrorCode::Supported
!= ffi::ir_stream::validate_protocol_version(version))
auto const version{version_iter->get_ref<nlohmann::json::string_t const&>()};
auto const protocol_version_status{ffi::ir_stream::validate_protocol_version(version)};
if (IRProtocolErrorCode::Supported != protocol_version_status
&& IRProtocolErrorCode::BackwardCompatible != protocol_version_status)
{
return std::errc::protocol_not_supported;
return IrDeserializationError{IrDeserializationErrorEnum::UnsupportedVersion};
}

if (metadata_json.contains(cProtocol::Metadata::UserDefinedMetadataKey)
&& false == metadata_json.at(cProtocol::Metadata::UserDefinedMetadataKey).is_object())
{
return std::errc::protocol_not_supported;
return IrDeserializationError{IrDeserializationErrorEnum::UnsupportedMetadataFormat};
}

std::unique_ptr<DeserializerImpl> deserializer_impl;
if (IRProtocolErrorCode::Supported == protocol_version_status) {
deserializer_impl = std::make_unique<KvIrDeserializerImpl>();
} else {
if (EncodingType::FourByte == encoding_type) {
deserializer_impl = YSTDLIB_ERROR_HANDLING_TRYX(
UnstructuredIrDeserializerImpl<ir::four_byte_encoded_variable_t>::create(
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need to update the docstring to reflect that it forwards UnstructuredIrDeserializerImpl::create's return values on failure.

metadata_json
)
);
} else {
deserializer_impl = YSTDLIB_ERROR_HANDLING_TRYX(
UnstructuredIrDeserializerImpl<ir::eight_byte_encoded_variable_t>::create(
metadata_json
)
);
}
}

return Deserializer{
std::make_unique<KvIrDeserializerImpl>(),
std::move(deserializer_impl),
std::move(ir_unit_handler),
std::move(metadata_json),
std::move(query_handler)
Expand Down Expand Up @@ -329,10 +353,13 @@ auto Deserializer<IrUnitHandler, QueryHandlerType>::deserialize_next_ir_unit(
}

case IrUnitType::SchemaTreeNodeInsertion: {
std::string key_name;
std::string key_name_buffer;
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this buffer should be made as a member variable of Deserializer to avoid memory reallocation. Can you create an issue to track it? @coderabbitai

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧩 Analysis chain

@LinZhihao-723, I've created a GitHub issue to track moving key_name_buffer to a member variable of Deserializer. The link will be provided once the script completes.


🏁 Script executed:

#!/bin/bash
gh issue create \
  --repo y-scope/clp \
  --title "refactor(kv-ir): Make \`key_name_buffer\` a member variable of \`Deserializer\` to avoid repeated memory reallocation" \
  --body "## Summary

In \`components/core/src/clp/ffi/ir_stream/Deserializer.hpp\`, the \`key_name_buffer\` variable inside \`Deserializer::deserialize_next_ir_unit\` is currently declared as a local \`std::string\` within the \`IrUnitType::SchemaTreeNodeInsertion\` case block:

\`\`\`cpp
case IrUnitType::SchemaTreeNodeInsertion: {
    std::string key_name_buffer;
    ...
}
\`\`\`

This causes a heap allocation on every call to \`deserialize_next_ir_unit\` that encounters a schema tree node insertion IR unit.

## Proposed Change

Move \`key_name_buffer\` to be a member variable of \`Deserializer\`, and clear it at the start of each use. This allows the allocated memory to be reused across calls, avoiding repeated heap allocations.

## References

- PR: https://github.com/y-scope/clp/pull/2227
- Comment: https://github.com/y-scope/clp/pull/2227#discussion_r3191785760
- Requested by: `@LinZhihao-723`
"

Length of output: 43


@LinZhihao-723, I've created the issue to track this: #2249

The issue captures the proposal to move key_name_buffer from a local variable inside the IrUnitType::SchemaTreeNodeInsertion case block to a member variable of Deserializer, so that the allocated memory can be reused across calls instead of being reallocated on every schema tree node insertion encountered during deserialization.


🧠 Learnings used
Learnt from: SharafMohamed
Repo: y-scope/clp PR: 2080
File: components/core/src/clp/ProfilerReporter.hpp:34-53
Timestamp: 2026-03-07T10:37:02.035Z
Learning: In components/core/src/clp/ProfilerReporter.hpp, do not suggest or implement resetting Profiler::reset_runtime_measurements() in ProfilerReporter constructors. The Reporter is intended to accumulate and report all runtime measurements up to its destruction, enabling mid-run reporting with a single reporter created at program start. When reviewing similar code, prefer preserving accumulated profiler state and only report/clear at explicit points if the design requires it; avoid constructor-time resets in reporter components.

auto const [is_auto_generated, node_locator]{YSTDLIB_ERROR_HANDLING_TRYX(
m_deserializer_impl
->deserialize_ir_unit_schema_tree_node_insertion(reader, tag, key_name)
m_deserializer_impl->deserialize_ir_unit_schema_tree_node_insertion(
reader,
tag,
key_name_buffer
)
)};
auto& schema_tree_to_insert{
is_auto_generated ? m_auto_gen_keys_schema_tree : m_user_gen_keys_schema_tree
Expand Down
5 changes: 3 additions & 2 deletions components/core/src/clp/ffi/ir_stream/DeserializerImpl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ class DeserializerImpl {
* Deserializes a schema tree node insertion IR unit from the given reader.
* @param reader
* @param tag
* @param key_name Returns the inserted node's key name.
* @param key_name_buffer Returns the inserted node's key name. May be empty if the
* implementation does not need to read the key from the stream.
* @return A result containing a pair on success, or an error code indicating the failure:
* - The pair:
* - Whether the node is for auto-generated keys schema tree.
Expand All @@ -91,7 +92,7 @@ class DeserializerImpl {
[[nodiscard]] virtual auto deserialize_ir_unit_schema_tree_node_insertion(
ReaderInterface& reader,
encoded_tag_t tag,
std::string& key_name
std::string& key_name_buffer
) -> ystdlib::error_handling::Result<std::pair<bool, SchemaTree::NodeLocator>>
= 0;
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,22 @@ auto IrErrorCategory::message(IrDeserializationErrorEnum error_enum) const -> st
return "invalid key-ID-group ordering";
case IrDeserializationErrorEnum::InvalidMagicNumber:
return "invalid magic number";
case IrDeserializationErrorEnum::InvalidReferenceTimestampMetadata:
return "reference timestamp metadata is missing or not a string";
case IrDeserializationErrorEnum::InvalidReferenceTimestampValue:
return "reference timestamp string could not be parsed as an integer";
case IrDeserializationErrorEnum::InvalidTag:
return "invalid tag";
case IrDeserializationErrorEnum::UnsupportedMetadataFormat:
return "IR stream metadata format unsupported";
case IrDeserializationErrorEnum::UnsupportedVersion:
return "IR stream version unsupported";
case IrDeserializationErrorEnum::MissingRequiredSchemaNodes:
return "required message or timestamp schema tree node is missing";
case IrDeserializationErrorEnum::UnknownSchemaTreeNodeType:
return "unknown schema tree node type";
case IrDeserializationErrorEnum::UnknownValueType:
return "unknown value type";
case IrDeserializationErrorEnum::UnsupportedMetadataFormat:
return "IR stream metadata format unsupported";
case IrDeserializationErrorEnum::UnsupportedVersion:
return "IR stream version unsupported";
default:
return "unknown error code enum";
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,14 @@ enum class IrDeserializationErrorEnum : uint8_t {
IncompleteStream,
InvalidKeyGroupOrdering,
InvalidMagicNumber,
InvalidReferenceTimestampMetadata,
InvalidReferenceTimestampValue,
InvalidTag,
UnsupportedMetadataFormat,
UnsupportedVersion,
MissingRequiredSchemaNodes,
UnknownSchemaTreeNodeType,
UnknownValueType,
UnsupportedMetadataFormat,
UnsupportedVersion,
Comment on lines +19 to +26
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Keep existing IrDeserializationErrorEnum values stable.

Lines 19-26 insert new members in the middle of a public uint8_t enum, which renumbers existing codes like InvalidTag and UnsupportedVersion. That is a breaking change for any caller that persists or branches on the numeric value.

Proposed fix
 enum class IrDeserializationErrorEnum : uint8_t {
     DuplicateKey = 1,
-    EncodedTextAstDeserializationFailure,
-    EndOfStream,
-    IncompleteStream,
-    InvalidKeyGroupOrdering,
-    InvalidMagicNumber,
-    InvalidReferenceTimestampMetadata,
-    InvalidReferenceTimestampValue,
-    InvalidTag,
-    MissingRequiredSchemaNodes,
-    UnknownSchemaTreeNodeType,
-    UnknownValueType,
-    UnsupportedMetadataFormat,
-    UnsupportedVersion,
+    EncodedTextAstDeserializationFailure = 2,
+    EndOfStream = 3,
+    IncompleteStream = 4,
+    InvalidKeyGroupOrdering = 5,
+    InvalidMagicNumber = 6,
+    InvalidTag = 7,
+    UnknownSchemaTreeNodeType = 8,
+    UnknownValueType = 9,
+    UnsupportedMetadataFormat = 10,
+    UnsupportedVersion = 11,
+    InvalidReferenceTimestampMetadata = 12,
+    InvalidReferenceTimestampValue = 13,
+    MissingRequiredSchemaNodes = 14,
 };
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@components/core/src/clp/ffi/ir_stream/IrDeserializationError.hpp` around
lines 19 - 26, The new enum members were inserted mid-list in
IrDeserializationErrorEnum, which changes the numeric values of existing members
(e.g., InvalidTag, UnsupportedVersion) and is a breaking change; restore
stability by either appending any new error members to the end of
IrDeserializationErrorEnum or by assigning explicit numeric values to every enum
member so existing numeric codes (including InvalidReferenceTimestampMetadata,
InvalidReferenceTimestampValue, InvalidTag, MissingRequiredSchemaNodes,
UnknownSchemaTreeNodeType, UnknownValueType, UnsupportedMetadataFormat,
UnsupportedVersion) keep their original values; update the enum declaration in
IrDeserializationErrorEnum accordingly and add a comment explaining that numeric
values are stable and must not be reordered.

};

using IrDeserializationError = ystdlib::error_handling::ErrorCode<IrDeserializationErrorEnum>;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ auto KvIrDeserializerImpl::deserialize_ir_unit_kv_pair_log_event(
auto KvIrDeserializerImpl::deserialize_ir_unit_schema_tree_node_insertion(
ReaderInterface& reader,
encoded_tag_t tag,
std::string& key_name
std::string& key_name_buffer
) -> ystdlib::error_handling::Result<std::pair<bool, SchemaTree::NodeLocator>> {
return ir_stream::deserialize_ir_unit_schema_tree_node_insertion(reader, tag, key_name);
return ir_stream::deserialize_ir_unit_schema_tree_node_insertion(reader, tag, key_name_buffer);
}
} // namespace clp::ffi::ir_stream
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class KvIrDeserializerImpl : public DeserializerImpl {
[[nodiscard]] auto deserialize_ir_unit_schema_tree_node_insertion(
ReaderInterface& reader,
encoded_tag_t tag,
std::string& key_name
std::string& key_name_buffer
) -> ystdlib::error_handling::Result<std::pair<bool, SchemaTree::NodeLocator>> override;
};
} // namespace clp::ffi::ir_stream
Expand Down
Loading
Loading