forked from facebookincubator/velox
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathBaseClpCursor.h
More file actions
174 lines (152 loc) · 5.26 KB
/
BaseClpCursor.h
File metadata and controls
174 lines (152 loc) · 5.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <memory>
#include <string>
#include <string_view>
#include <vector>
#include "clp_s/InputConfig.hpp"
#include "velox/connectors/clp/ClpConnectorSplit.h"
namespace clp_s {
class BaseColumnReader;
} // namespace clp_s
namespace clp_s::search::ast {
class Expression;
} // namespace clp_s::search::ast
namespace facebook::velox::connector::clp::search_lib {
enum class ErrorCode {
DictionaryNotFound,
InternalError,
InvalidQuerySyntax,
InvalidTimestampRange,
LogicalError,
QueryNotInitialized,
SchemaNotFound,
Success
};
enum class ColumnType {
Array,
Boolean,
Float,
Integer,
String,
Timestamp,
Unknown = -1
};
struct Field {
ColumnType type;
std::string name;
};
enum class TimestampPrecision : uint8_t {
Seconds,
Milliseconds,
Microseconds,
Nanoseconds
};
/// Estimates the precision of an epoch timestamp as seconds, milliseconds,
/// microseconds, or nanoseconds.
///
/// This heuristic relies on the fact that 1 year of epoch nanoseconds is
/// approximately 1000 years of epoch microseconds and so on. This heuristic
/// can be unreliable for timestamps sufficiently close to the epoch, but
/// should otherwise be accurate for the next 1000 years.
///
/// Note: Future versions of the clp-s archive format will adopt a
/// nanosecond-precision integer timestamp format (as opposed to the current
/// format which allows other precisions), at which point we can remove this
/// heuristic.
///
/// @param timestamp
/// @return the estimated timestamp precision
template <typename T>
auto estimatePrecision(T timestamp) -> TimestampPrecision;
/// Converts a float value into a Velox timestamp.
///
/// @param timestamp the input timestamp as a float
/// @return the corresponding Velox timestamp
auto convertToVeloxTimestamp(double timestamp) -> Timestamp;
/// Converts an integer value into a Velox timestamp.
///
/// @param timestamp the input timestamp as an integer
/// @return the corresponding Velox timestamp
auto convertToVeloxTimestamp(int64_t timestamp) -> Timestamp;
/// A query execution interface that manages the lifecycle of a query on a CLP-S
/// split (archive or IR), including parsing and validating the query, loading
/// the relevant splits, applying filters, and iterating over the results. It
/// abstracts away the low-level details of split access
/// while supporting projection and batch-oriented retrieval of filtered rows.
class BaseClpCursor {
public:
explicit BaseClpCursor(
clp_s::InputSource inputSource,
std::string_view splitPath)
: errorCode_(ErrorCode::QueryNotInitialized),
inputSource_(inputSource),
splitPath_(std::string(splitPath)) {}
virtual ~BaseClpCursor() = default;
/// Executes a query. This function parses, validates, and prepares the given
/// query for execution.
///
/// @param query The KQL query to execute.
/// @param outputColumns A vector specifying the columns to be included in the
/// query result.
void executeQuery(
const std::string& query,
const std::vector<Field>& outputColumns);
/// Fetches the next set of rows from the cursor. If the split is not yet
/// loaded, this function will perform the necessary loading.
///
/// @param numRows The maximum number of rows to fetch.
/// @return The number of rows scanned.
virtual uint64_t fetchNext(uint64_t numRows) = 0;
/// Gets the count of rows that satisfy the query (used to size the result
/// vector).
///
/// @return Count of rows matching the query.
virtual size_t getNumFilteredRows() const = 0;
/// Creates a Vector of the specified type and size.
///
/// This method recursively creates vectors for complex types like ROW. For
/// primitive types, it creates a LazyVector that will load the data from the
/// underlying data source when it is accessed.
///
/// @param pool The memory pool used by ClpDataSource to create the vector
/// @param vectorType
/// @param vectorSize
/// @return A Vector of the specified type and size.
virtual VectorPtr createVector(
memory::MemoryPool* pool,
const TypePtr& vectorType,
size_t vectorSize) = 0;
protected:
/// Loads the split from archive or IR stream.
///
/// @return The error code.
virtual ErrorCode loadSplit() = 0;
bool currentSplitLoaded_{false};
ErrorCode errorCode_;
std::shared_ptr<clp_s::search::ast::Expression> expr_;
clp_s::InputSource inputSource_;
std::vector<Field> outputColumns_;
std::string query_;
std::string splitPath_;
private:
/// Preprocesses the query, performing parsing, validation, and optimization.
///
/// @return The error code.
ErrorCode preprocessQuery();
};
} // namespace facebook::velox::connector::clp::search_lib