Skip to content

Commit afef093

Browse files
authored
chore: new mode for enterprise edition (#1215)
Added index mode for Parseable server
1 parent 887a63f commit afef093

16 files changed

+280
-16
lines changed

src/catalog/mod.rs

+7
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,12 @@ pub async fn remove_manifest_from_snapshot(
340340
Ok(get_first_event(storage.clone(), stream_name, Vec::new()).await?)
341341
}
342342
Mode::Query => Ok(get_first_event(storage, stream_name, dates).await?),
343+
Mode::Index => Err(ObjectStorageError::UnhandledError(Box::new(
344+
std::io::Error::new(
345+
std::io::ErrorKind::Unsupported,
346+
"Can't remove manifest from within Index server",
347+
),
348+
))),
343349
}
344350
}
345351

@@ -350,6 +356,7 @@ pub async fn get_first_event(
350356
) -> Result<Option<String>, ObjectStorageError> {
351357
let mut first_event_at: String = String::default();
352358
match PARSEABLE.options.mode {
359+
Mode::Index => unimplemented!(),
353360
Mode::All | Mode::Ingest => {
354361
// get current snapshot
355362
let stream_first_event = PARSEABLE.get_stream(stream_name)?.get_first_event();

src/enterprise/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
pub mod utils;

src/enterprise/utils.rs

+159
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
use std::{collections::HashMap, path::PathBuf, sync::Arc};
2+
3+
use datafusion::{common::Column, prelude::Expr};
4+
use itertools::Itertools;
5+
use relative_path::RelativePathBuf;
6+
7+
use crate::query::stream_schema_provider::extract_primary_filter;
8+
use crate::{
9+
catalog::{
10+
manifest::{File, Manifest},
11+
snapshot, Snapshot,
12+
},
13+
event,
14+
parseable::PARSEABLE,
15+
query::{stream_schema_provider::ManifestExt, PartialTimeFilter},
16+
storage::{ObjectStorage, ObjectStorageError, ObjectStoreFormat, STREAM_ROOT_DIRECTORY},
17+
utils::time::TimeRange,
18+
};
19+
20+
pub fn create_time_filter(
21+
time_range: &TimeRange,
22+
time_partition: Option<String>,
23+
table_name: &str,
24+
) -> Vec<Expr> {
25+
let mut new_filters = vec![];
26+
let start_time = time_range.start.naive_utc();
27+
let end_time = time_range.end.naive_utc();
28+
let mut _start_time_filter: Expr;
29+
let mut _end_time_filter: Expr;
30+
31+
match time_partition {
32+
Some(time_partition) => {
33+
_start_time_filter = PartialTimeFilter::Low(std::ops::Bound::Included(start_time))
34+
.binary_expr(Expr::Column(Column::new(
35+
Some(table_name.to_owned()),
36+
time_partition.clone(),
37+
)));
38+
_end_time_filter =
39+
PartialTimeFilter::High(std::ops::Bound::Excluded(end_time)).binary_expr(
40+
Expr::Column(Column::new(Some(table_name.to_owned()), time_partition)),
41+
);
42+
}
43+
None => {
44+
_start_time_filter = PartialTimeFilter::Low(std::ops::Bound::Included(start_time))
45+
.binary_expr(Expr::Column(Column::new(
46+
Some(table_name.to_owned()),
47+
event::DEFAULT_TIMESTAMP_KEY,
48+
)));
49+
_end_time_filter = PartialTimeFilter::High(std::ops::Bound::Excluded(end_time))
50+
.binary_expr(Expr::Column(Column::new(
51+
Some(table_name.to_owned()),
52+
event::DEFAULT_TIMESTAMP_KEY,
53+
)));
54+
}
55+
}
56+
57+
new_filters.push(_start_time_filter);
58+
new_filters.push(_end_time_filter);
59+
60+
new_filters
61+
}
62+
63+
pub async fn fetch_parquet_file_paths(
64+
stream: &str,
65+
time_range: &TimeRange,
66+
) -> Result<HashMap<RelativePathBuf, Vec<File>>, ObjectStorageError> {
67+
let glob_storage = PARSEABLE.storage.get_object_store();
68+
69+
let object_store_format = glob_storage.get_object_store_format(stream).await?;
70+
71+
let time_partition = object_store_format.time_partition;
72+
73+
let time_filter_expr = create_time_filter(time_range, time_partition.clone(), stream);
74+
75+
let time_filters = extract_primary_filter(&time_filter_expr, &time_partition);
76+
77+
let mut merged_snapshot: snapshot::Snapshot = snapshot::Snapshot::default();
78+
79+
let path = RelativePathBuf::from_iter([stream, STREAM_ROOT_DIRECTORY]);
80+
let obs = glob_storage
81+
.get_objects(
82+
Some(&path),
83+
Box::new(|file_name| file_name.ends_with("stream.json")),
84+
)
85+
.await;
86+
if let Ok(obs) = obs {
87+
for ob in obs {
88+
if let Ok(object_store_format) = serde_json::from_slice::<ObjectStoreFormat>(&ob) {
89+
let snapshot = object_store_format.snapshot;
90+
for manifest in snapshot.manifest_list {
91+
merged_snapshot.manifest_list.push(manifest);
92+
}
93+
}
94+
}
95+
}
96+
97+
let manifest_files = collect_manifest_files(
98+
glob_storage,
99+
merged_snapshot
100+
.manifests(&time_filters)
101+
.into_iter()
102+
.sorted_by_key(|file| file.time_lower_bound)
103+
.map(|item| item.manifest_path)
104+
.collect(),
105+
)
106+
.await?;
107+
108+
let mut parquet_files: HashMap<RelativePathBuf, Vec<File>> = HashMap::new();
109+
110+
let mut selected_files = manifest_files
111+
.into_iter()
112+
.flat_map(|file| file.files)
113+
.rev()
114+
.collect_vec();
115+
116+
for filter in time_filter_expr {
117+
selected_files.retain(|file| !file.can_be_pruned(&filter))
118+
}
119+
120+
selected_files
121+
.into_iter()
122+
.map(|file| {
123+
let date = file.file_path.split("/").collect_vec();
124+
125+
let date = date.as_slice()[1..4].iter().map(|s| s.to_string());
126+
127+
let date = RelativePathBuf::from_iter(date);
128+
129+
parquet_files.entry(date).or_default().push(file);
130+
})
131+
.for_each(|_| {});
132+
133+
Ok(parquet_files)
134+
}
135+
136+
async fn collect_manifest_files(
137+
storage: Arc<dyn ObjectStorage>,
138+
manifest_urls: Vec<String>,
139+
) -> Result<Vec<Manifest>, ObjectStorageError> {
140+
let mut tasks = Vec::new();
141+
manifest_urls.into_iter().for_each(|path| {
142+
let path = RelativePathBuf::from_path(PathBuf::from(path)).expect("Invalid path");
143+
let storage = Arc::clone(&storage);
144+
tasks.push(tokio::task::spawn(async move {
145+
storage.get_object(&path).await
146+
}));
147+
});
148+
149+
let mut op = Vec::new();
150+
for task in tasks {
151+
let file = task.await??;
152+
op.push(file);
153+
}
154+
155+
Ok(op
156+
.into_iter()
157+
.map(|res| serde_json::from_slice(&res).expect("Data is invalid for Manifest"))
158+
.collect())
159+
}

src/handlers/http/middleware.rs

+19
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,25 @@ where
357357
Ok(res)
358358
})
359359
}
360+
361+
Mode::Index => {
362+
let accessable_endpoints = ["create", "delete"];
363+
let cond = path.split('/').any(|x| accessable_endpoints.contains(&x));
364+
if !cond {
365+
Box::pin(async {
366+
Err(actix_web::error::ErrorUnauthorized(
367+
"Only Index API can be accessed in Index Mode",
368+
))
369+
})
370+
} else {
371+
let fut = self.service.call(req);
372+
373+
Box::pin(async move {
374+
let res = fut.await?;
375+
Ok(res)
376+
})
377+
}
378+
}
360379
}
361380
}
362381
}

src/lib.rs

+4-3
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,12 @@ pub mod alerts;
2121
pub mod analytics;
2222
pub mod audit;
2323
pub mod banner;
24-
mod catalog;
24+
pub mod catalog;
2525
mod cli;
2626
#[cfg(feature = "kafka")]
2727
pub mod connectors;
2828
pub mod correlation;
29+
pub mod enterprise;
2930
mod event;
3031
pub mod handlers;
3132
pub mod hottier;
@@ -37,15 +38,15 @@ mod oidc;
3738
pub mod option;
3839
pub mod otel;
3940
pub mod parseable;
40-
mod query;
41+
pub mod query;
4142
pub mod rbac;
4243
mod response;
4344
mod static_schema;
4445
mod stats;
4546
pub mod storage;
4647
pub mod sync;
4748
pub mod users;
48-
mod utils;
49+
pub mod utils;
4950
mod validator;
5051

5152
use std::time::Duration;

src/main.rs

+6
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use std::process::exit;
2+
13
/*
24
* Parseable Server (C) 2022 - 2024 Parseable, Inc.
35
*
@@ -37,6 +39,10 @@ async fn main() -> anyhow::Result<()> {
3739
let server: Box<dyn ParseableServer> = match &PARSEABLE.options.mode {
3840
Mode::Query => Box::new(QueryServer),
3941
Mode::Ingest => Box::new(IngestServer),
42+
Mode::Index => {
43+
println!("Indexing is an enterprise feature. Check out https://www.parseable.com/pricing to know more!");
44+
exit(0)
45+
}
4046
Mode::All => Box::new(Server),
4147
};
4248

src/option.rs

+2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ use serde::{Deserialize, Serialize};
2222
pub enum Mode {
2323
Query,
2424
Ingest,
25+
Index,
2526
#[default]
2627
All,
2728
}
@@ -128,6 +129,7 @@ pub mod validation {
128129
"query" => Ok(Mode::Query),
129130
"ingest" => Ok(Mode::Ingest),
130131
"all" => Ok(Mode::All),
132+
"index" => Ok(Mode::Index),
131133
_ => Err("Invalid MODE provided".to_string()),
132134
}
133135
}

src/parseable/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ impl Parseable {
243243
match self.options.mode {
244244
Mode::Query => "Distributed (Query)",
245245
Mode::Ingest => "Distributed (Ingest)",
246+
Mode::Index => "Distributed (Index)",
246247
Mode::All => "Standalone",
247248
}
248249
}

src/parseable/streams.rs

+1-4
Original file line numberDiff line numberDiff line change
@@ -513,10 +513,7 @@ impl Stream {
513513
let file_size = match file.metadata() {
514514
Ok(meta) => meta.len(),
515515
Err(err) => {
516-
warn!(
517-
"File ({}) not found; Error = {err}",
518-
file.display()
519-
);
516+
warn!("File ({}) not found; Error = {err}", file.display());
520517
continue;
521518
}
522519
};

src/query/stream_schema_provider.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -894,7 +894,7 @@ pub fn extract_primary_filter(
894894
.collect()
895895
}
896896

897-
trait ManifestExt: ManifestFile {
897+
pub trait ManifestExt: ManifestFile {
898898
fn find_matching_column(&self, partial_filter: &Expr) -> Option<&Column> {
899899
let name = match partial_filter {
900900
Expr::BinaryExpr(binary_expr) => {

src/storage/azure_blob.rs

+22-1
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,10 @@ use datafusion::{
3535
use futures::{stream::FuturesUnordered, StreamExt, TryStreamExt};
3636
use object_store::{
3737
azure::{MicrosoftAzure, MicrosoftAzureBuilder},
38+
buffered::BufReader,
3839
limit::LimitStore,
3940
path::Path as StorePath,
40-
BackoffConfig, ClientOptions, ObjectStore, PutPayload, RetryConfig,
41+
BackoffConfig, ClientOptions, ObjectMeta, ObjectStore, PutPayload, RetryConfig,
4142
};
4243
use relative_path::{RelativePath, RelativePathBuf};
4344
use tracing::{error, info};
@@ -423,6 +424,26 @@ impl BlobStore {
423424

424425
#[async_trait]
425426
impl ObjectStorage for BlobStore {
427+
async fn get_buffered_reader(
428+
&self,
429+
_path: &RelativePath,
430+
) -> Result<BufReader, ObjectStorageError> {
431+
Err(ObjectStorageError::UnhandledError(Box::new(
432+
std::io::Error::new(
433+
std::io::ErrorKind::Unsupported,
434+
"Buffered reader not implemented for Blob Storage yet",
435+
),
436+
)))
437+
}
438+
async fn head(&self, _path: &RelativePath) -> Result<ObjectMeta, ObjectStorageError> {
439+
Err(ObjectStorageError::UnhandledError(Box::new(
440+
std::io::Error::new(
441+
std::io::ErrorKind::Unsupported,
442+
"Head operation not implemented for Blob Storage yet",
443+
),
444+
)))
445+
}
446+
426447
async fn get_object(&self, path: &RelativePath) -> Result<Bytes, ObjectStorageError> {
427448
Ok(self._get_object(path).await?)
428449
}

src/storage/localfs.rs

+20
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ use bytes::Bytes;
2828
use datafusion::{datasource::listing::ListingTableUrl, execution::runtime_env::RuntimeEnvBuilder};
2929
use fs_extra::file::CopyOptions;
3030
use futures::{stream::FuturesUnordered, TryStreamExt};
31+
use object_store::{buffered::BufReader, ObjectMeta};
3132
use relative_path::{RelativePath, RelativePathBuf};
3233
use tokio::fs::{self, DirEntry};
3334
use tokio_stream::wrappers::ReadDirStream;
@@ -103,6 +104,25 @@ impl LocalFS {
103104

104105
#[async_trait]
105106
impl ObjectStorage for LocalFS {
107+
async fn get_buffered_reader(
108+
&self,
109+
_path: &RelativePath,
110+
) -> Result<BufReader, ObjectStorageError> {
111+
Err(ObjectStorageError::UnhandledError(Box::new(
112+
std::io::Error::new(
113+
std::io::ErrorKind::Unsupported,
114+
"Buffered reader not implemented for LocalFS yet",
115+
),
116+
)))
117+
}
118+
async fn head(&self, _path: &RelativePath) -> Result<ObjectMeta, ObjectStorageError> {
119+
Err(ObjectStorageError::UnhandledError(Box::new(
120+
std::io::Error::new(
121+
std::io::ErrorKind::Unsupported,
122+
"Head operation not implemented for LocalFS yet",
123+
),
124+
)))
125+
}
106126
async fn get_object(&self, path: &RelativePath) -> Result<Bytes, ObjectStorageError> {
107127
let time = Instant::now();
108128
let file_path = self.path_in_root(path);

src/storage/mod.rs

+4
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ use chrono::Local;
2020
use object_store::path::Path;
2121
use relative_path::RelativePath;
2222
use serde::{Deserialize, Serialize};
23+
use tokio::task::JoinError;
2324

2425
use crate::{
2526
catalog::snapshot::Snapshot,
@@ -254,6 +255,9 @@ pub enum ObjectStorageError {
254255

255256
#[error("{0}")]
256257
StandaloneWithDistributed(#[from] StandaloneWithDistributed),
258+
259+
#[error("JoinError: {0}")]
260+
JoinError(#[from] JoinError),
257261
}
258262

259263
pub fn to_object_store_path(path: &RelativePath) -> Path {

0 commit comments

Comments
 (0)