@@ -22,7 +22,9 @@ use actix_web::http::header::ContentType;
22
22
use arrow_schema:: Schema ;
23
23
use chrono:: Utc ;
24
24
use http:: StatusCode ;
25
- use serde:: Serialize ;
25
+ use serde:: { Deserialize , Serialize } ;
26
+ use serde_json:: { json, Value } ;
27
+ use tracing:: { debug, warn} ;
26
28
27
29
use crate :: {
28
30
handlers:: http:: {
@@ -31,11 +33,18 @@ use crate::{
31
33
utils:: { merge_quried_stats, IngestionStats , QueriedStats , StorageStats } ,
32
34
} ,
33
35
logstream:: error:: StreamError ,
34
- query:: update_schema_when_distributed,
36
+ query:: { into_query , update_schema_when_distributed, Query , QueryError } ,
35
37
} ,
38
+ hottier:: { HotTierError , HotTierManager , StreamHotTier } ,
36
39
parseable:: { StreamNotFound , PARSEABLE } ,
40
+ query:: { error:: ExecuteError , execute, CountsRequest , CountsResponse , QUERY_SESSION } ,
41
+ rbac:: { map:: SessionKey , role:: Action , Users } ,
37
42
stats,
38
43
storage:: { retention:: Retention , StreamInfo , StreamType } ,
44
+ utils:: {
45
+ arrow:: record_batches_to_json,
46
+ time:: { TimeParseError , TimeRange } ,
47
+ } ,
39
48
LOCK_EXPECT ,
40
49
} ;
41
50
@@ -185,6 +194,168 @@ async fn get_stream_info_helper(stream_name: &str) -> Result<StreamInfo, StreamE
185
194
Ok ( stream_info)
186
195
}
187
196
197
+ /// Response structure for Prism dataset queries.
198
+ /// Contains information about a stream, its statistics, retention policy,
199
+ /// and query results.
200
+ #[ derive( Serialize ) ]
201
+ pub struct PrismDatasetResponse {
202
+ /// Name of the stream
203
+ stream : String ,
204
+ /// Basic information about the stream
205
+ info : StreamInfo ,
206
+ /// Statistics for the queried timeframe
207
+ stats : QueriedStats ,
208
+ /// Retention policy details
209
+ retention : Retention ,
210
+ /// Hot tier information if available
211
+ hottier : Option < StreamHotTier > ,
212
+ /// Count of records in the specified time range
213
+ counts : CountsResponse ,
214
+ /// Collection of distinct values for source identifiers
215
+ distinct_sources : Value ,
216
+ }
217
+
218
+ /// Request parameters for retrieving Prism dataset information.
219
+ /// Defines which streams to query
220
+ #[ derive( Deserialize , Default ) ]
221
+ #[ serde( rename_all = "camelCase" ) ]
222
+ pub struct PrismDatasetRequest {
223
+ /// List of stream names to query
224
+ #[ serde( default ) ]
225
+ streams : Vec < String > ,
226
+ }
227
+
228
+ impl PrismDatasetRequest {
229
+ /// Retrieves dataset information for all specified streams.
230
+ ///
231
+ /// Processes each stream in the request and compiles their information.
232
+ /// Streams that don't exist or can't be accessed are skipped.
233
+ ///
234
+ /// # Returns
235
+ /// - `Ok(Vec<PrismDatasetResponse>)`: List of responses for successfully processed streams
236
+ /// - `Err(PrismLogstreamError)`: If a critical error occurs during processing
237
+ ///
238
+ /// # Note
239
+ /// 1. This method won't fail if individual streams fail - it will only include
240
+ /// successfully processed streams in the result.
241
+ /// 2. On receiving an empty stream list, we return for all streams the user is able to query for
242
+ pub async fn get_datasets (
243
+ mut self ,
244
+ key : SessionKey ,
245
+ ) -> Result < Vec < PrismDatasetResponse > , PrismLogstreamError > {
246
+ let is_empty = self . streams . is_empty ( ) ;
247
+ if is_empty {
248
+ self . streams = PARSEABLE . streams . list ( ) ;
249
+ }
250
+
251
+ let mut responses = vec ! [ ] ;
252
+ for stream in self . streams . iter ( ) {
253
+ if Users . authorize ( key. clone ( ) , Action :: ListStream , Some ( stream) , None )
254
+ != crate :: rbac:: Response :: Authorized
255
+ {
256
+ // Don't warn if listed from Parseable
257
+ if !is_empty {
258
+ warn ! ( "Unauthorized access requested for stream: {stream}" ) ;
259
+ }
260
+ continue ;
261
+ }
262
+
263
+ if PARSEABLE . check_or_load_stream ( stream) . await {
264
+ debug ! ( "Stream not found: {stream}" ) ;
265
+ continue ;
266
+ }
267
+
268
+ let PrismLogstreamInfo {
269
+ info,
270
+ stats,
271
+ retention,
272
+ ..
273
+ } = get_prism_logstream_info ( stream) . await ?;
274
+
275
+ let hottier = match HotTierManager :: global ( ) {
276
+ Some ( hot_tier_manager) => {
277
+ let stats = hot_tier_manager. get_hot_tier ( stream) . await ?;
278
+ Some ( stats)
279
+ }
280
+ _ => None ,
281
+ } ;
282
+ let records = CountsRequest {
283
+ stream : stream. clone ( ) ,
284
+ start_time : "1h" . to_owned ( ) ,
285
+ end_time : "now" . to_owned ( ) ,
286
+ num_bins : 10 ,
287
+ }
288
+ . get_bin_density ( )
289
+ . await ?;
290
+ let counts = CountsResponse {
291
+ fields : vec ! [ "start_time" . into( ) , "end_time" . into( ) , "count" . into( ) ] ,
292
+ records,
293
+ } ;
294
+
295
+ // Retrieve distinct values for source identifiers
296
+ // Returns None if fields aren't present or if query fails
297
+ let ips = self . get_distinct_entries ( stream, "p_src_ip" ) . await . ok ( ) ;
298
+ let user_agents = self . get_distinct_entries ( stream, "p_user_agent" ) . await . ok ( ) ;
299
+
300
+ responses. push ( PrismDatasetResponse {
301
+ stream : stream. clone ( ) ,
302
+ info,
303
+ stats,
304
+ retention,
305
+ hottier,
306
+ counts,
307
+ distinct_sources : json ! ( {
308
+ "ips" : ips,
309
+ "user_agents" : user_agents
310
+ } ) ,
311
+ } )
312
+ }
313
+
314
+ Ok ( responses)
315
+ }
316
+
317
+ /// Retrieves distinct values for a specific field in a stream.
318
+ ///
319
+ /// # Parameters
320
+ /// - `stream_name`: Name of the stream to query
321
+ /// - `field`: Field name to get distinct values for
322
+ ///
323
+ /// # Returns
324
+ /// - `Ok(Vec<String>)`: List of distinct values found for the field
325
+ /// - `Err(QueryError)`: If the query fails or field doesn't exist
326
+ async fn get_distinct_entries (
327
+ & self ,
328
+ stream_name : & str ,
329
+ field : & str ,
330
+ ) -> Result < Vec < String > , QueryError > {
331
+ let query = Query {
332
+ query : format ! ( "SELECT DISTINCT({field}) FOR {stream_name}" ) ,
333
+ start_time : "1h" . to_owned ( ) ,
334
+ end_time : "now" . to_owned ( ) ,
335
+ send_null : false ,
336
+ filter_tags : None ,
337
+ fields : true ,
338
+ } ;
339
+ let time_range = TimeRange :: parse_human_time ( "1h" , "now" ) ?;
340
+
341
+ let session_state = QUERY_SESSION . state ( ) ;
342
+ let query = into_query ( & query, & session_state, time_range) . await ?;
343
+ let ( records, _) = execute ( query, stream_name) . await ?;
344
+ let response = record_batches_to_json ( & records) ?;
345
+ // Extract field values from the JSON response
346
+ let values = response
347
+ . iter ( )
348
+ . flat_map ( |row| {
349
+ row. get ( field)
350
+ . and_then ( |s| s. as_str ( ) )
351
+ . map ( |s| s. to_string ( ) )
352
+ } )
353
+ . collect ( ) ;
354
+
355
+ Ok ( values)
356
+ }
357
+ }
358
+
188
359
#[ derive( Debug , thiserror:: Error ) ]
189
360
pub enum PrismLogstreamError {
190
361
#[ error( "Error: {0}" ) ]
@@ -193,6 +364,16 @@ pub enum PrismLogstreamError {
193
364
StreamError ( #[ from] StreamError ) ,
194
365
#[ error( "StreamNotFound: {0}" ) ]
195
366
StreamNotFound ( #[ from] StreamNotFound ) ,
367
+ #[ error( "Hottier: {0}" ) ]
368
+ Hottier ( #[ from] HotTierError ) ,
369
+ #[ error( "Query: {0}" ) ]
370
+ Query ( #[ from] QueryError ) ,
371
+ #[ error( "TimeParse: {0}" ) ]
372
+ TimeParse ( #[ from] TimeParseError ) ,
373
+ #[ error( "Execute: {0}" ) ]
374
+ Execute ( #[ from] ExecuteError ) ,
375
+ #[ error( "Auth: {0}" ) ]
376
+ Auth ( #[ from] actix_web:: Error ) ,
196
377
}
197
378
198
379
impl actix_web:: ResponseError for PrismLogstreamError {
@@ -201,6 +382,11 @@ impl actix_web::ResponseError for PrismLogstreamError {
201
382
PrismLogstreamError :: Anyhow ( _) => StatusCode :: INTERNAL_SERVER_ERROR ,
202
383
PrismLogstreamError :: StreamError ( e) => e. status_code ( ) ,
203
384
PrismLogstreamError :: StreamNotFound ( _) => StatusCode :: NOT_FOUND ,
385
+ PrismLogstreamError :: Hottier ( _) => StatusCode :: INTERNAL_SERVER_ERROR ,
386
+ PrismLogstreamError :: Query ( _) => StatusCode :: INTERNAL_SERVER_ERROR ,
387
+ PrismLogstreamError :: TimeParse ( _) => StatusCode :: NOT_FOUND ,
388
+ PrismLogstreamError :: Execute ( _) => StatusCode :: INTERNAL_SERVER_ERROR ,
389
+ PrismLogstreamError :: Auth ( _) => StatusCode :: UNAUTHORIZED ,
204
390
}
205
391
}
206
392
0 commit comments