Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions components/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,8 @@ set(SOURCE_FILES_unitTest
src/clp/ffi/ir_stream/IrUnitType.hpp
src/clp/ffi/ir_stream/KvIrDeserializerImpl.cpp
src/clp/ffi/ir_stream/KvIrDeserializerImpl.hpp
src/clp/ffi/ir_stream/UnstructuredIrDeserializerImpl.cpp
src/clp/ffi/ir_stream/UnstructuredIrDeserializerImpl.hpp
src/clp/ffi/ir_stream/ir_unit_deserialization_methods.cpp
src/clp/ffi/ir_stream/ir_unit_deserialization_methods.hpp
src/clp/ffi/ir_stream/protocol_constants.hpp
Expand Down
15 changes: 11 additions & 4 deletions components/core/src/clp/ffi/SchemaTree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,16 +216,23 @@ class SchemaTree {
*/
class NodeLocator {
public:
NodeLocator(Node::id_t parent_id, std::string_view key_name, Node::Type type)
constexpr NodeLocator(Node::id_t parent_id, std::string_view key_name, Node::Type type)
: m_parent_id{parent_id},
m_key_name{key_name},
m_type{type} {}

[[nodiscard]] auto get_parent_id() const -> Node::id_t { return m_parent_id; }
constexpr ~NodeLocator() = default;

[[nodiscard]] auto get_key_name() const -> std::string_view { return m_key_name; }
constexpr NodeLocator(NodeLocator const&) noexcept = default;
constexpr NodeLocator(NodeLocator&&) noexcept = default;
constexpr auto operator=(NodeLocator const&) noexcept -> NodeLocator& = default;
constexpr auto operator=(NodeLocator&&) noexcept -> NodeLocator& = default;

[[nodiscard]] constexpr auto get_parent_id() const -> Node::id_t { return m_parent_id; }

[[nodiscard]] constexpr auto get_key_name() const -> std::string_view { return m_key_name; }

[[nodiscard]] auto get_type() const -> Node::Type { return m_type; }
[[nodiscard]] constexpr auto get_type() const -> Node::Type { return m_type; }

private:
Node::id_t m_parent_id;
Expand Down
52 changes: 36 additions & 16 deletions components/core/src/clp/ffi/ir_stream/Deserializer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,16 @@
#define CLP_FFI_IR_STREAM_DESERIALIZER_HPP

#include <cstddef>
#include <cstdint>
#include <memory>
#include <string>
#include <system_error>
#include <utility>
#include <vector>

#include <nlohmann/json.hpp>
#include <nlohmann/json_fwd.hpp>
#include <ystdlib/error_handling/Result.hpp>

#include "../../ir/types.hpp"
#include "../../ReaderInterface.hpp"
#include "../../time_types.hpp"
#include "../SchemaTree.hpp"
Expand All @@ -23,6 +22,7 @@
#include "protocol_constants.hpp"
#include "search/AstEvaluationResult.hpp"
#include "search/QueryHandlerReq.hpp"
#include "UnstructuredIrDeserializerImpl.hpp"
#include "utils.hpp"

// This include has a circular dependency with the `.inc` file.
Expand Down Expand Up @@ -244,9 +244,7 @@ auto Deserializer<IrUnitHandlerType, QueryHandlerType>::create_generic(
IrUnitHandlerType ir_unit_handler,
QueryHandlerType query_handler
) -> ystdlib::error_handling::Result<Deserializer> {
[[maybe_unused]] auto const encoding_type{
YSTDLIB_ERROR_HANDLING_TRYX(get_encoding_type(reader))
};
auto const encoding_type{YSTDLIB_ERROR_HANDLING_TRYX(get_encoding_type(reader))};
auto const [metadata_type, metadata]{YSTDLIB_ERROR_HANDLING_TRYX(deserialize_preamble(reader))};

if (cProtocol::Metadata::EncodingJson != metadata_type) {
Expand All @@ -261,21 +259,40 @@ auto Deserializer<IrUnitHandlerType, QueryHandlerType>::create_generic(
if (metadata_json.end() == version_iter || false == version_iter->is_string()) {
return std::errc::protocol_error;
}
auto const version = version_iter->get_ref<nlohmann::json::string_t&>();
if (ffi::ir_stream::IRProtocolErrorCode::Supported
!= ffi::ir_stream::validate_protocol_version(version))
auto const version{version_iter->get_ref<nlohmann::json::string_t const&>()};
auto const protocol_version_status{ffi::ir_stream::validate_protocol_version(version)};
if (ffi::ir_stream::IRProtocolErrorCode::Supported != protocol_version_status
&& ffi::ir_stream::IRProtocolErrorCode::BackwardCompatible != protocol_version_status)
Comment thread
jonathan-imanu marked this conversation as resolved.
Outdated
{
return std::errc::protocol_not_supported;
}

if (metadata_json.contains(cProtocol::Metadata::UserDefinedMetadataKey)
&& false == metadata_json.at(cProtocol::Metadata::UserDefinedMetadataKey).is_object())
{
return std::errc::protocol_not_supported;
std::unique_ptr<DeserializerImpl> deserializer_impl;
if (ffi::ir_stream::IRProtocolErrorCode::Supported == protocol_version_status) {
Comment thread
jonathan-imanu marked this conversation as resolved.
Outdated
if (metadata_json.contains(cProtocol::Metadata::UserDefinedMetadataKey)
&& false == metadata_json.at(cProtocol::Metadata::UserDefinedMetadataKey).is_object())
{
return std::errc::protocol_not_supported;
}
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the metadata check. It should be supported for both kv-ir and the old IR format. Let's move it outside of this if-else cluster.

deserializer_impl = std::make_unique<KvIrDeserializerImpl>();
} else {
if (EncodingType::FourByte == encoding_type) {
deserializer_impl = YSTDLIB_ERROR_HANDLING_TRYX(
UnstructuredIrDeserializerImpl<ir::four_byte_encoded_variable_t>::create(
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need to update the docstring to reflect that it forwards UnstructuredIrDeserializerImpl::create's return values on failure.

metadata_json
)
);
} else {
deserializer_impl = YSTDLIB_ERROR_HANDLING_TRYX(
UnstructuredIrDeserializerImpl<ir::eight_byte_encoded_variable_t>::create(
metadata_json
)
);
}
}

return Deserializer{
std::make_unique<KvIrDeserializerImpl>(),
std::move(deserializer_impl),
std::move(ir_unit_handler),
std::move(metadata_json),
std::move(query_handler)
Expand Down Expand Up @@ -329,10 +346,13 @@ auto Deserializer<IrUnitHandler, QueryHandlerType>::deserialize_next_ir_unit(
}

case IrUnitType::SchemaTreeNodeInsertion: {
std::string key_name;
std::string key_name_buffer;
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this buffer should be made as a member variable of Deserializer to avoid memory reallocation. Can you create an issue to track it? @coderabbitai

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧩 Analysis chain

@LinZhihao-723, I've created a GitHub issue to track moving key_name_buffer to a member variable of Deserializer. The link will be provided once the script completes.


🏁 Script executed:

#!/bin/bash
gh issue create \
  --repo y-scope/clp \
  --title "refactor(kv-ir): Make \`key_name_buffer\` a member variable of \`Deserializer\` to avoid repeated memory reallocation" \
  --body "## Summary

In \`components/core/src/clp/ffi/ir_stream/Deserializer.hpp\`, the \`key_name_buffer\` variable inside \`Deserializer::deserialize_next_ir_unit\` is currently declared as a local \`std::string\` within the \`IrUnitType::SchemaTreeNodeInsertion\` case block:

\`\`\`cpp
case IrUnitType::SchemaTreeNodeInsertion: {
    std::string key_name_buffer;
    ...
}
\`\`\`

This causes a heap allocation on every call to \`deserialize_next_ir_unit\` that encounters a schema tree node insertion IR unit.

## Proposed Change

Move \`key_name_buffer\` to be a member variable of \`Deserializer\`, and clear it at the start of each use. This allows the allocated memory to be reused across calls, avoiding repeated heap allocations.

## References

- PR: https://github.com/y-scope/clp/pull/2227
- Comment: https://github.com/y-scope/clp/pull/2227#discussion_r3191785760
- Requested by: `@LinZhihao-723`
"

Length of output: 43


@LinZhihao-723, I've created the issue to track this: #2249

The issue captures the proposal to move key_name_buffer from a local variable inside the IrUnitType::SchemaTreeNodeInsertion case block to a member variable of Deserializer, so that the allocated memory can be reused across calls instead of being reallocated on every schema tree node insertion encountered during deserialization.


🧠 Learnings used
Learnt from: SharafMohamed
Repo: y-scope/clp PR: 2080
File: components/core/src/clp/ProfilerReporter.hpp:34-53
Timestamp: 2026-03-07T10:37:02.035Z
Learning: In components/core/src/clp/ProfilerReporter.hpp, do not suggest or implement resetting Profiler::reset_runtime_measurements() in ProfilerReporter constructors. The Reporter is intended to accumulate and report all runtime measurements up to its destruction, enabling mid-run reporting with a single reporter created at program start. When reviewing similar code, prefer preserving accumulated profiler state and only report/clear at explicit points if the design requires it; avoid constructor-time resets in reporter components.

auto const [is_auto_generated, node_locator]{YSTDLIB_ERROR_HANDLING_TRYX(
m_deserializer_impl
->deserialize_ir_unit_schema_tree_node_insertion(reader, tag, key_name)
m_deserializer_impl->deserialize_ir_unit_schema_tree_node_insertion(
reader,
tag,
key_name_buffer
)
)};
auto& schema_tree_to_insert{
is_auto_generated ? m_auto_gen_keys_schema_tree : m_user_gen_keys_schema_tree
Expand Down
23 changes: 13 additions & 10 deletions components/core/src/clp/ffi/ir_stream/DeserializerImpl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@

namespace clp::ffi::ir_stream {
/**
* Virtual base class defining methods for deserializing IR units and log events called by
* `Deserializer`.
* Virtual base class defining methods for deserializing IR units and log events
* called by `Deserializer`.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change should be reverted (might be introduced by IDE auto-formatter).
Please check the diff everytime you make a commit to make sure all changes are intentional.

*/
class DeserializerImpl {
public:
Expand All @@ -38,8 +38,8 @@ class DeserializerImpl {
/**
* Deserializes a UTC offset change IR unit from the given reader.
* @param reader
* @return A result containing the deserialized UTC offset on success, or an error code
* indicating the failure:
* @return A result containing the deserialized UTC offset on success, or an
* error code indicating the failure:
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Revert.

* - Forwards `deserialize_int`'s return values on failure.
*/
[[nodiscard]] static auto deserialize_ir_unit_utc_offset_change(ReaderInterface& reader)
Expand All @@ -48,7 +48,8 @@ class DeserializerImpl {
/**
* Deserializes the type of the next IR unit from the given reader.
* @param reader
* @return A result containing a pair on success, or an error code indicating the failure:
* @return A result containing a pair on success, or an error code indicating
* the failure:
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Revert.

* - The pair:
* - The type of the deserialized IR unit.
* - The tag of the deserialized IR unit.
Expand All @@ -65,8 +66,8 @@ class DeserializerImpl {
* @param auto_gen_keys_schema_tree
* @param user_gen_keys_schema_tree
* @param utc_offset
* @return A result containing the deserialized KV pair log event on success, or an error code
* indicating the failure defined by the derived class.
* @return A result containing the deserialized KV pair log event on success,
* or an error code indicating the failure defined by the derived class.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Revert.

*/
[[nodiscard]] virtual auto deserialize_ir_unit_kv_pair_log_event(
ReaderInterface& reader,
Expand All @@ -81,8 +82,10 @@ class DeserializerImpl {
* Deserializes a schema tree node insertion IR unit from the given reader.
* @param reader
* @param tag
* @param key_name Returns the inserted node's key name.
* @return A result containing a pair on success, or an error code indicating the failure:
* @param key_name_buffer Returns the inserted node's key name. May be empty
* if the implementation does not read a key from the stream.
* @return A result containing a pair on success, or an error code indicating
* the failure:
Comment thread
jonathan-imanu marked this conversation as resolved.
Outdated
* - The pair:
* - Whether the node is for auto-generated keys schema tree.
* - The locator of the inserted schema tree node.
Expand All @@ -91,7 +94,7 @@ class DeserializerImpl {
[[nodiscard]] virtual auto deserialize_ir_unit_schema_tree_node_insertion(
ReaderInterface& reader,
encoded_tag_t tag,
std::string& key_name
std::string& key_name_buffer
) -> ystdlib::error_handling::Result<std::pair<bool, SchemaTree::NodeLocator>>
= 0;
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,22 @@ auto IrErrorCategory::message(IrDeserializationErrorEnum error_enum) const -> st
return "invalid key-ID-group ordering";
case IrDeserializationErrorEnum::InvalidMagicNumber:
return "invalid magic number";
case IrDeserializationErrorEnum::InvalidReferenceTimestampMetadata:
return "reference timestamp metadata is missing or not a string";
case IrDeserializationErrorEnum::InvalidReferenceTimestampValue:
return "reference timestamp string could not be parsed as an integer";
case IrDeserializationErrorEnum::InvalidTag:
return "invalid tag";
case IrDeserializationErrorEnum::UnsupportedMetadataFormat:
return "IR stream metadata format unsupported";
case IrDeserializationErrorEnum::UnsupportedVersion:
return "IR stream version unsupported";
case IrDeserializationErrorEnum::MissingRequiredSchemaNodes:
return "required message or timestamp schema tree node is missing";
case IrDeserializationErrorEnum::UnknownSchemaTreeNodeType:
return "unknown schema tree node type";
case IrDeserializationErrorEnum::UnknownValueType:
return "unknown value type";
case IrDeserializationErrorEnum::UnsupportedMetadataFormat:
return "IR stream metadata format unsupported";
case IrDeserializationErrorEnum::UnsupportedVersion:
return "IR stream version unsupported";
default:
return "unknown error code enum";
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,14 @@ enum class IrDeserializationErrorEnum : uint8_t {
IncompleteStream,
InvalidKeyGroupOrdering,
InvalidMagicNumber,
InvalidReferenceTimestampMetadata,
InvalidReferenceTimestampValue,
InvalidTag,
UnsupportedMetadataFormat,
UnsupportedVersion,
MissingRequiredSchemaNodes,
UnknownSchemaTreeNodeType,
UnknownValueType,
UnsupportedMetadataFormat,
UnsupportedVersion,
Comment on lines +19 to +26
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Keep existing IrDeserializationErrorEnum values stable.

Lines 19-26 insert new members in the middle of a public uint8_t enum, which renumbers existing codes like InvalidTag and UnsupportedVersion. That is a breaking change for any caller that persists or branches on the numeric value.

Proposed fix
 enum class IrDeserializationErrorEnum : uint8_t {
     DuplicateKey = 1,
-    EncodedTextAstDeserializationFailure,
-    EndOfStream,
-    IncompleteStream,
-    InvalidKeyGroupOrdering,
-    InvalidMagicNumber,
-    InvalidReferenceTimestampMetadata,
-    InvalidReferenceTimestampValue,
-    InvalidTag,
-    MissingRequiredSchemaNodes,
-    UnknownSchemaTreeNodeType,
-    UnknownValueType,
-    UnsupportedMetadataFormat,
-    UnsupportedVersion,
+    EncodedTextAstDeserializationFailure = 2,
+    EndOfStream = 3,
+    IncompleteStream = 4,
+    InvalidKeyGroupOrdering = 5,
+    InvalidMagicNumber = 6,
+    InvalidTag = 7,
+    UnknownSchemaTreeNodeType = 8,
+    UnknownValueType = 9,
+    UnsupportedMetadataFormat = 10,
+    UnsupportedVersion = 11,
+    InvalidReferenceTimestampMetadata = 12,
+    InvalidReferenceTimestampValue = 13,
+    MissingRequiredSchemaNodes = 14,
 };
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@components/core/src/clp/ffi/ir_stream/IrDeserializationError.hpp` around
lines 19 - 26, The new enum members were inserted mid-list in
IrDeserializationErrorEnum, which changes the numeric values of existing members
(e.g., InvalidTag, UnsupportedVersion) and is a breaking change; restore
stability by either appending any new error members to the end of
IrDeserializationErrorEnum or by assigning explicit numeric values to every enum
member so existing numeric codes (including InvalidReferenceTimestampMetadata,
InvalidReferenceTimestampValue, InvalidTag, MissingRequiredSchemaNodes,
UnknownSchemaTreeNodeType, UnknownValueType, UnsupportedMetadataFormat,
UnsupportedVersion) keep their original values; update the enum declaration in
IrDeserializationErrorEnum accordingly and add a comment explaining that numeric
values are stable and must not be reordered.

};

using IrDeserializationError = ystdlib::error_handling::ErrorCode<IrDeserializationErrorEnum>;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ auto KvIrDeserializerImpl::deserialize_ir_unit_kv_pair_log_event(
auto KvIrDeserializerImpl::deserialize_ir_unit_schema_tree_node_insertion(
ReaderInterface& reader,
encoded_tag_t tag,
std::string& key_name
std::string& key_name_buffer
) -> ystdlib::error_handling::Result<std::pair<bool, SchemaTree::NodeLocator>> {
return ir_stream::deserialize_ir_unit_schema_tree_node_insertion(reader, tag, key_name);
return ir_stream::deserialize_ir_unit_schema_tree_node_insertion(reader, tag, key_name_buffer);
}
} // namespace clp::ffi::ir_stream
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class KvIrDeserializerImpl : public DeserializerImpl {
[[nodiscard]] auto deserialize_ir_unit_schema_tree_node_insertion(
ReaderInterface& reader,
encoded_tag_t tag,
std::string& key_name
std::string& key_name_buffer
) -> ystdlib::error_handling::Result<std::pair<bool, SchemaTree::NodeLocator>> override;
};
} // namespace clp::ffi::ir_stream
Expand Down
Loading
Loading