Skip to content

Commit 87dfa36

Browse files
authored
vector-store: split table.rs (scylladb#476)
`table` module is getting bigger and bigger. There are plans for more refactoring of this module - let's split it into several submodules. This PR divides this module without business logic change. The first commit is a git move to preserve git history. The rest of commit are simple splitting commits. Fixes: VECTOR-688
2 parents f496aa8 + 6ed01f3 commit 87dfa36

5 files changed

Lines changed: 588 additions & 554 deletions

File tree

Lines changed: 324 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,324 @@
1+
/*
2+
* Copyright 2026-present ScyllaDB
3+
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
4+
*/
5+
6+
use crate::PrimaryKey;
7+
use crate::Timestamp;
8+
use crate::table::ColumnVec;
9+
use crate::table::PrimaryId;
10+
use crate::table::TValue;
11+
use anyhow::bail;
12+
use scylla::cluster::metadata::NativeType;
13+
use scylla::value::CqlDate;
14+
use scylla::value::CqlDecimal;
15+
use scylla::value::CqlTime;
16+
use scylla::value::CqlTimestamp;
17+
use scylla::value::CqlTimeuuid;
18+
use scylla::value::CqlValue;
19+
use scylla::value::CqlVarint;
20+
use std::net::IpAddr;
21+
use uuid::Uuid;
22+
23+
/// A newtype for defining the offset of the key column.
24+
#[derive(Clone, Copy, Debug, derive_more::From, derive_more::Into)]
25+
pub(super) struct KeyOffset(usize);
26+
27+
/// An enum that represents a column in the table. It can be a column with values of a specific
28+
/// type or a primary key column (as an offset in the primary key columns).
29+
#[derive(Debug)]
30+
pub(super) enum Column {
31+
Ascii(ColumnVec<PrimaryId, TValue<String>>),
32+
BigInt(ColumnVec<PrimaryId, TValue<i64>>),
33+
Blob(ColumnVec<PrimaryId, TValue<Vec<u8>>>),
34+
Boolean(ColumnVec<PrimaryId, TValue<bool>>),
35+
Date(ColumnVec<PrimaryId, TValue<CqlDate>>),
36+
Decimal(ColumnVec<PrimaryId, TValue<CqlDecimal>>),
37+
Double(ColumnVec<PrimaryId, TValue<f64>>),
38+
Float(ColumnVec<PrimaryId, TValue<f32>>),
39+
Inet(ColumnVec<PrimaryId, TValue<IpAddr>>),
40+
Int(ColumnVec<PrimaryId, TValue<i32>>),
41+
SmallInt(ColumnVec<PrimaryId, TValue<i16>>),
42+
Text(ColumnVec<PrimaryId, TValue<String>>),
43+
Time(ColumnVec<PrimaryId, TValue<CqlTime>>),
44+
Timestamp(ColumnVec<PrimaryId, TValue<CqlTimestamp>>),
45+
Timeuuid(ColumnVec<PrimaryId, TValue<CqlTimeuuid>>),
46+
TinyInt(ColumnVec<PrimaryId, TValue<i8>>),
47+
Uuid(ColumnVec<PrimaryId, TValue<Uuid>>),
48+
Varint(ColumnVec<PrimaryId, TValue<CqlVarint>>),
49+
PrimaryKey(KeyOffset),
50+
}
51+
52+
impl Column {
53+
pub(super) fn new(native_type: &NativeType) -> anyhow::Result<Self> {
54+
Ok(match native_type {
55+
NativeType::Ascii => Self::Ascii(ColumnVec::new()),
56+
NativeType::BigInt => Self::BigInt(ColumnVec::new()),
57+
NativeType::Blob => Self::Blob(ColumnVec::new()),
58+
NativeType::Boolean => Self::Boolean(ColumnVec::new()),
59+
NativeType::Date => Self::Date(ColumnVec::new()),
60+
NativeType::Decimal => Self::Decimal(ColumnVec::new()),
61+
NativeType::Double => Self::Double(ColumnVec::new()),
62+
NativeType::Float => Self::Float(ColumnVec::new()),
63+
NativeType::Inet => Self::Inet(ColumnVec::new()),
64+
NativeType::Int => Self::Int(ColumnVec::new()),
65+
NativeType::SmallInt => Self::SmallInt(ColumnVec::new()),
66+
NativeType::Text => Self::Text(ColumnVec::new()),
67+
NativeType::Time => Self::Time(ColumnVec::new()),
68+
NativeType::Timestamp => Self::Timestamp(ColumnVec::new()),
69+
NativeType::Timeuuid => Self::Timeuuid(ColumnVec::new()),
70+
NativeType::TinyInt => Self::TinyInt(ColumnVec::new()),
71+
NativeType::Uuid => Self::Uuid(ColumnVec::new()),
72+
NativeType::Varint => Self::Varint(ColumnVec::new()),
73+
_ => bail!("Unsupported native type: {native_type:?}"),
74+
})
75+
}
76+
77+
pub(super) fn resize_with(&mut self, size: usize) {
78+
let timestamp = Timestamp::UNIX_EPOCH;
79+
match self {
80+
Self::Ascii(vec) => vec.resize_with(size, || TValue::None(timestamp)),
81+
Self::BigInt(vec) => vec.resize_with(size, || TValue::None(timestamp)),
82+
Self::Blob(vec) => vec.resize_with(size, || TValue::None(timestamp)),
83+
Self::Boolean(vec) => vec.resize_with(size, || TValue::None(timestamp)),
84+
Self::Date(vec) => vec.resize_with(size, || TValue::None(timestamp)),
85+
Self::Decimal(vec) => vec.resize_with(size, || TValue::None(timestamp)),
86+
Self::Double(vec) => vec.resize_with(size, || TValue::None(timestamp)),
87+
Self::Float(vec) => vec.resize_with(size, || TValue::None(timestamp)),
88+
Self::Inet(vec) => vec.resize_with(size, || TValue::None(timestamp)),
89+
Self::Int(vec) => vec.resize_with(size, || TValue::None(timestamp)),
90+
Self::SmallInt(vec) => vec.resize_with(size, || TValue::None(timestamp)),
91+
Self::Text(vec) => vec.resize_with(size, || TValue::None(timestamp)),
92+
Self::Time(vec) => vec.resize_with(size, || TValue::None(timestamp)),
93+
Self::Timestamp(vec) => vec.resize_with(size, || TValue::None(timestamp)),
94+
Self::Timeuuid(vec) => vec.resize_with(size, || TValue::None(timestamp)),
95+
Self::TinyInt(vec) => vec.resize_with(size, || TValue::None(timestamp)),
96+
Self::Uuid(vec) => vec.resize_with(size, || TValue::None(timestamp)),
97+
Self::Varint(vec) => vec.resize_with(size, || TValue::None(timestamp)),
98+
Self::PrimaryKey(_) => {}
99+
}
100+
}
101+
102+
#[allow(dead_code)]
103+
fn insert_cqlvalue(
104+
&mut self,
105+
primary_id: PrimaryId,
106+
timestamp: Timestamp,
107+
value: CqlValue,
108+
) -> anyhow::Result<()> {
109+
match self {
110+
Self::Ascii(vec) => {
111+
let CqlValue::Ascii(value) = value else {
112+
bail!("Failed to convert value to Ascii");
113+
};
114+
vec.update(primary_id, TValue::Some(timestamp, value))
115+
}
116+
Self::BigInt(vec) => {
117+
let CqlValue::BigInt(value) = value else {
118+
bail!("Failed to convert value to BigInt");
119+
};
120+
vec.update(primary_id, TValue::Some(timestamp, value))
121+
}
122+
Self::Blob(vec) => {
123+
let CqlValue::Blob(value) = value else {
124+
bail!("Failed to convert value to Blob");
125+
};
126+
vec.update(primary_id, TValue::Some(timestamp, value))
127+
}
128+
Self::Boolean(vec) => {
129+
let CqlValue::Boolean(value) = value else {
130+
bail!("Failed to convert value to Boolean");
131+
};
132+
vec.update(primary_id, TValue::Some(timestamp, value))
133+
}
134+
Self::Date(vec) => {
135+
let CqlValue::Date(value) = value else {
136+
bail!("Failed to convert value to Date");
137+
};
138+
vec.update(primary_id, TValue::Some(timestamp, value))
139+
}
140+
Self::Decimal(vec) => {
141+
let CqlValue::Decimal(value) = value else {
142+
bail!("Failed to convert value to Decimal");
143+
};
144+
vec.update(primary_id, TValue::Some(timestamp, value))
145+
}
146+
Self::Double(vec) => {
147+
let CqlValue::Double(value) = value else {
148+
bail!("Failed to convert value to Double");
149+
};
150+
vec.update(primary_id, TValue::Some(timestamp, value))
151+
}
152+
Self::Float(vec) => {
153+
let CqlValue::Float(value) = value else {
154+
bail!("Failed to convert value to Float");
155+
};
156+
vec.update(primary_id, TValue::Some(timestamp, value))
157+
}
158+
Self::Inet(vec) => {
159+
let CqlValue::Inet(value) = value else {
160+
bail!("Failed to convert value to Inet");
161+
};
162+
vec.update(primary_id, TValue::Some(timestamp, value))
163+
}
164+
Self::Int(vec) => {
165+
let CqlValue::Int(value) = value else {
166+
bail!("Failed to convert value to Int");
167+
};
168+
vec.update(primary_id, TValue::Some(timestamp, value))
169+
}
170+
Self::SmallInt(vec) => {
171+
let CqlValue::SmallInt(value) = value else {
172+
bail!("Failed to convert value to SmallInt");
173+
};
174+
vec.update(primary_id, TValue::Some(timestamp, value))
175+
}
176+
Self::Text(vec) => {
177+
let CqlValue::Text(value) = value else {
178+
bail!("Failed to convert value to Text");
179+
};
180+
vec.update(primary_id, TValue::Some(timestamp, value))
181+
}
182+
Self::Time(vec) => {
183+
let CqlValue::Time(value) = value else {
184+
bail!("Failed to convert value to Time");
185+
};
186+
vec.update(primary_id, TValue::Some(timestamp, value))
187+
}
188+
Self::Timestamp(vec) => {
189+
let CqlValue::Timestamp(value) = value else {
190+
bail!("Failed to convert value to Timestamp");
191+
};
192+
vec.update(primary_id, TValue::Some(timestamp, value))
193+
}
194+
Self::Timeuuid(vec) => {
195+
let CqlValue::Timeuuid(value) = value else {
196+
bail!("Failed to convert value to Timeuuid");
197+
};
198+
vec.update(primary_id, TValue::Some(timestamp, value))
199+
}
200+
Self::TinyInt(vec) => {
201+
let CqlValue::TinyInt(value) = value else {
202+
bail!("Failed to convert value to TinyInt");
203+
};
204+
vec.update(primary_id, TValue::Some(timestamp, value))
205+
}
206+
Self::Uuid(vec) => {
207+
let CqlValue::Uuid(value) = value else {
208+
bail!("Failed to convert value to Uuid");
209+
};
210+
vec.update(primary_id, TValue::Some(timestamp, value))
211+
}
212+
Self::Varint(vec) => {
213+
let CqlValue::Varint(value) = value else {
214+
bail!("Failed to convert value to Varint");
215+
};
216+
vec.update(primary_id, TValue::Some(timestamp, value))
217+
}
218+
Self::PrimaryKey(_) => bail!("Cannot insert value into PrimaryKey column"),
219+
}
220+
}
221+
222+
pub(super) fn get(
223+
&self,
224+
primary_id: PrimaryId,
225+
primary_keys: &ColumnVec<PrimaryId, Option<PrimaryKey>>,
226+
) -> Option<CqlValue> {
227+
match self {
228+
Self::Ascii(vec) => vec
229+
.get(primary_id)
230+
.and_then(|val| val.get())
231+
.cloned()
232+
.map(CqlValue::Ascii),
233+
Self::BigInt(vec) => vec
234+
.get(primary_id)
235+
.and_then(|val| val.get())
236+
.cloned()
237+
.map(CqlValue::BigInt),
238+
Self::Blob(vec) => vec
239+
.get(primary_id)
240+
.and_then(|val| val.get())
241+
.cloned()
242+
.map(CqlValue::Blob),
243+
Self::Boolean(vec) => vec
244+
.get(primary_id)
245+
.and_then(|val| val.get())
246+
.cloned()
247+
.map(CqlValue::Boolean),
248+
Self::Date(vec) => vec
249+
.get(primary_id)
250+
.and_then(|val| val.get())
251+
.cloned()
252+
.map(CqlValue::Date),
253+
Self::Decimal(vec) => vec
254+
.get(primary_id)
255+
.and_then(|val| val.get())
256+
.cloned()
257+
.map(CqlValue::Decimal),
258+
Self::Double(vec) => vec
259+
.get(primary_id)
260+
.and_then(|val| val.get())
261+
.cloned()
262+
.map(CqlValue::Double),
263+
Self::Float(vec) => vec
264+
.get(primary_id)
265+
.and_then(|val| val.get())
266+
.cloned()
267+
.map(CqlValue::Float),
268+
Self::Inet(vec) => vec
269+
.get(primary_id)
270+
.and_then(|val| val.get())
271+
.cloned()
272+
.map(CqlValue::Inet),
273+
Self::Int(vec) => vec
274+
.get(primary_id)
275+
.and_then(|val| val.get())
276+
.cloned()
277+
.map(CqlValue::Int),
278+
Self::SmallInt(vec) => vec
279+
.get(primary_id)
280+
.and_then(|val| val.get())
281+
.cloned()
282+
.map(CqlValue::SmallInt),
283+
Self::Text(vec) => vec
284+
.get(primary_id)
285+
.and_then(|val| val.get())
286+
.cloned()
287+
.map(CqlValue::Text),
288+
Self::Time(vec) => vec
289+
.get(primary_id)
290+
.and_then(|val| val.get())
291+
.cloned()
292+
.map(CqlValue::Time),
293+
Self::Timestamp(vec) => vec
294+
.get(primary_id)
295+
.and_then(|val| val.get())
296+
.cloned()
297+
.map(CqlValue::Timestamp),
298+
Self::Timeuuid(vec) => vec
299+
.get(primary_id)
300+
.and_then(|val| val.get())
301+
.cloned()
302+
.map(CqlValue::Timeuuid),
303+
Self::TinyInt(vec) => vec
304+
.get(primary_id)
305+
.and_then(|val| val.get())
306+
.cloned()
307+
.map(CqlValue::TinyInt),
308+
Self::Uuid(vec) => vec
309+
.get(primary_id)
310+
.and_then(|val| val.get())
311+
.cloned()
312+
.map(CqlValue::Uuid),
313+
Self::Varint(vec) => vec
314+
.get(primary_id)
315+
.and_then(|val| val.get())
316+
.cloned()
317+
.map(CqlValue::Varint),
318+
Self::PrimaryKey(key_offset) => primary_keys
319+
.get(primary_id)
320+
.and_then(|opt_key| opt_key.as_ref())
321+
.and_then(|key| key.get((*key_offset).into())),
322+
}
323+
}
324+
}
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
/*
2+
* Copyright 2026-present ScyllaDB
3+
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
4+
*/
5+
6+
use crate::Timestamp;
7+
use crate::table::ETValue;
8+
use crate::table::Idx;
9+
use crate::table::PrimaryId;
10+
use anyhow::anyhow;
11+
12+
/// ColumnVec is a wrapper around Vec and generic index type. It is used to safely access columns by specific index types.
13+
#[derive(Debug)]
14+
pub(super) struct ColumnVec<I, T> {
15+
vec: Vec<T>,
16+
_index: std::marker::PhantomData<I>,
17+
}
18+
19+
impl<I: Idx, T> ColumnVec<I, T> {
20+
pub(super) fn new() -> Self {
21+
Self {
22+
vec: Vec::new(),
23+
_index: std::marker::PhantomData,
24+
}
25+
}
26+
27+
pub(super) fn resize_with(&mut self, size: usize, f: impl FnMut() -> T) {
28+
self.vec.resize_with(size, f);
29+
}
30+
31+
pub(super) fn get(&self, idx: I) -> Option<&T> {
32+
self.vec.get(idx.idx())
33+
}
34+
35+
pub(super) fn get_mut(&mut self, idx: I) -> Option<&mut T> {
36+
self.vec.get_mut(idx.idx())
37+
}
38+
39+
pub(super) fn update(&mut self, idx: I, value: T) -> anyhow::Result<()> {
40+
*self
41+
.get_mut(idx)
42+
.ok_or_else(|| anyhow!("Index out of ColumnVec bounds"))? = value;
43+
Ok(())
44+
}
45+
}
46+
47+
impl<T> ColumnVec<PrimaryId, ETValue<T>> {
48+
pub(super) fn update_epoch_timestamp(
49+
&mut self,
50+
primary_id: PrimaryId,
51+
timestamp: Timestamp,
52+
) -> anyhow::Result<()> {
53+
self.get_mut(primary_id)
54+
.map(|value| {
55+
value.update_epoch_timestamp(primary_id.epoch(), timestamp);
56+
})
57+
.ok_or_else(|| anyhow!("Index out of ColumnVec bounds"))
58+
}
59+
}

0 commit comments

Comments
 (0)