@@ -10,14 +10,16 @@ use bytes::Bytes;
10
10
use chroma_cache:: CacheConfig ;
11
11
use chroma_config:: Configurable ;
12
12
use chroma_error:: ChromaError ;
13
+ use chroma_log:: { config:: GrpcLogConfig , grpc_log:: GrpcLog } ;
13
14
use chroma_storage:: config:: StorageConfig ;
14
15
use chroma_storage:: Storage ;
15
16
use chroma_types:: chroma_proto:: {
16
- log_service_server:: LogService , CollectionInfo , GetAllCollectionInfoToCompactRequest ,
17
- GetAllCollectionInfoToCompactResponse , InspectDirtyLogRequest , InspectDirtyLogResponse ,
18
- LogRecord , OperationRecord , PullLogsRequest , PullLogsResponse , PurgeDirtyForCollectionRequest ,
19
- PurgeDirtyForCollectionResponse , PushLogsRequest , PushLogsResponse , ScoutLogsRequest ,
20
- ScoutLogsResponse , UpdateCollectionLogOffsetRequest , UpdateCollectionLogOffsetResponse ,
17
+ log_service_client:: LogServiceClient , log_service_server:: LogService , CollectionInfo ,
18
+ GetAllCollectionInfoToCompactRequest , GetAllCollectionInfoToCompactResponse ,
19
+ InspectDirtyLogRequest , InspectDirtyLogResponse , LogRecord , OperationRecord , PullLogsRequest ,
20
+ PullLogsResponse , PurgeDirtyForCollectionRequest , PurgeDirtyForCollectionResponse ,
21
+ PushLogsRequest , PushLogsResponse , ScoutLogsRequest , ScoutLogsResponse ,
22
+ UpdateCollectionLogOffsetRequest , UpdateCollectionLogOffsetResponse ,
21
23
} ;
22
24
use chroma_types:: chroma_proto:: { ForkLogsRequest , ForkLogsResponse } ;
23
25
use chroma_types:: CollectionUuid ;
@@ -201,7 +203,7 @@ async fn get_log_from_handle<'a>(
201
203
} ) ;
202
204
}
203
205
tracing:: info!( "Opening log at {}" , prefix) ;
204
- let opened = LogWriter :: open_or_initialize (
206
+ let opened = LogWriter :: open (
205
207
options. clone ( ) ,
206
208
Arc :: clone ( storage) ,
207
209
prefix,
@@ -630,11 +632,70 @@ pub struct LogServer {
630
632
storage : Arc < Storage > ,
631
633
open_logs : Arc < StateHashTable < LogKey , LogStub > > ,
632
634
dirty_log : Arc < LogWriter > ,
635
+ #[ allow( clippy:: type_complexity) ]
636
+ proxy : Option < LogServiceClient < chroma_tracing:: GrpcTraceService < tonic:: transport:: Channel > > > ,
633
637
compacting : tokio:: sync:: Mutex < ( ) > ,
634
638
cache : Option < Box < dyn chroma_cache:: PersistentCache < String , CachedParquetFragment > > > ,
635
639
metrics : Metrics ,
636
640
}
637
641
642
+ impl LogServer {
643
+ fn should_initialize_log ( & self , collection : CollectionUuid ) -> bool {
644
+ todo ! ( ) ;
645
+ }
646
+
647
+ async fn forward_push_logs (
648
+ & self ,
649
+ request : Request < PushLogsRequest > ,
650
+ ) -> Result < Response < PushLogsResponse > , Status > {
651
+ todo ! ( ) ;
652
+ }
653
+
654
+ async fn forward_scout_logs (
655
+ & self ,
656
+ request : Request < ScoutLogsRequest > ,
657
+ ) -> Result < Response < ScoutLogsResponse > , Status > {
658
+ if let Some ( proxy) = self . proxy . as_ref ( ) {
659
+ proxy. clone ( ) . scout_logs ( request) . await
660
+ } else {
661
+ Err ( Status :: failed_precondition ( "proxy not initialized" ) )
662
+ }
663
+ }
664
+
665
+ async fn forward_pull_logs (
666
+ & self ,
667
+ request : Request < PullLogsRequest > ,
668
+ ) -> Result < Response < PullLogsResponse > , Status > {
669
+ if let Some ( proxy) = self . proxy . as_ref ( ) {
670
+ proxy. clone ( ) . pull_logs ( request) . await
671
+ } else {
672
+ Err ( Status :: failed_precondition ( "proxy not initialized" ) )
673
+ }
674
+ }
675
+
676
+ async fn forward_update_collection_log_offset (
677
+ & self ,
678
+ request : Request < UpdateCollectionLogOffsetRequest > ,
679
+ ) -> Result < Response < UpdateCollectionLogOffsetResponse > , Status > {
680
+ if let Some ( proxy) = self . proxy . as_ref ( ) {
681
+ proxy. clone ( ) . update_collection_log_offset ( request) . await
682
+ } else {
683
+ Err ( Status :: failed_precondition ( "proxy not initialized" ) )
684
+ }
685
+ }
686
+
687
+ async fn forward_fork_logs (
688
+ & self ,
689
+ request : Request < ForkLogsRequest > ,
690
+ ) -> Result < Response < ForkLogsResponse > , Status > {
691
+ if let Some ( proxy) = self . proxy . as_ref ( ) {
692
+ proxy. clone ( ) . fork_logs ( request) . await
693
+ } else {
694
+ Err ( Status :: failed_precondition ( "proxy not initialized" ) )
695
+ }
696
+ }
697
+ }
698
+
638
699
#[ async_trait:: async_trait]
639
700
impl LogService for LogServer {
640
701
async fn push_logs (
@@ -662,16 +723,36 @@ impl LogService for LogServer {
662
723
collection_id,
663
724
dirty_log : Arc :: clone ( & self . dirty_log ) ,
664
725
} ;
665
- let log = get_log_from_handle (
726
+ let log = match get_log_from_handle (
666
727
& handle,
667
728
& self . config . writer ,
668
729
& self . storage ,
669
730
& prefix,
670
731
mark_dirty,
671
732
)
672
733
. await
673
- // TODO(rescrv): better error handling.
674
- . map_err ( |err| Status :: unknown ( err. to_string ( ) ) ) ?;
734
+ {
735
+ Ok ( log) => log,
736
+ Err ( wal3:: Error :: UninitializedLog ) => {
737
+ if self . should_initialize_log ( collection_id) {
738
+ if let Err ( err) = LogWriter :: initialize (
739
+ & self . config . writer ,
740
+ & self . storage ,
741
+ & prefix,
742
+ "push_logs initializer" ,
743
+ )
744
+ . await
745
+ {
746
+ return Err ( Status :: unknown ( err. to_string ( ) ) ) ;
747
+ }
748
+ return Box :: pin ( self . push_logs ( Request :: new ( push_logs) ) ) . await ;
749
+ }
750
+ return self . forward_push_logs ( Request :: new ( push_logs) ) . await ;
751
+ }
752
+ Err ( err) => {
753
+ return Err ( Status :: unknown ( err. to_string ( ) ) ) ;
754
+ }
755
+ } ;
675
756
let mut messages = Vec :: with_capacity ( push_logs. records . len ( ) ) ;
676
757
for record in push_logs. records {
677
758
let mut buf = vec ! [ ] ;
@@ -718,6 +799,17 @@ impl LogService for LogServer {
718
799
) ;
719
800
let limit_position = match log_reader. maximum_log_position ( ) . await {
720
801
Ok ( limit_position) => limit_position,
802
+ Err ( wal3:: Error :: UninitializedLog ) => {
803
+ // NOTE(rescrv): In this case, what we have is a guarantee that we know the
804
+ // log is ours. This only comes from the by-tenant or by-collection alt-log
805
+ // shunts. The log is uninitialized, so we know it's not because
806
+ // has_manifest().
807
+ if self . should_initialize_log ( collection_id) {
808
+ LogPosition :: from_offset ( 1 )
809
+ } else {
810
+ return self . forward_scout_logs ( Request :: new ( scout_logs) ) . await ;
811
+ }
812
+ }
721
813
Err ( err) => {
722
814
if err. code ( ) == chroma_error:: ErrorCodes :: FailedPrecondition {
723
815
LogPosition :: from_offset ( 1 )
@@ -768,14 +860,18 @@ impl LogService for LogServer {
768
860
. await
769
861
{
770
862
Ok ( fragments) => fragments,
771
- Err ( err) => {
772
- if let wal3:: Error :: UninitializedLog = err {
863
+ Err ( wal3:: Error :: UninitializedLog ) => {
864
+ // NOTE(rescrv): Same as with ScoutLogs.
865
+ if self . should_initialize_log ( collection_id) {
773
866
tracing:: info!( "Uninitialized log for collection {}" , collection_id) ;
774
867
return Ok ( Response :: new ( PullLogsResponse { records : vec ! [ ] } ) ) ;
775
868
} else {
776
- return Err ( Status :: new ( err . code ( ) . into ( ) , err . to_string ( ) ) ) ;
869
+ return self . forward_pull_logs ( Request :: new ( pull_logs ) ) . await ;
777
870
}
778
871
}
872
+ Err ( err) => {
873
+ return Err ( Status :: new ( err. code ( ) . into ( ) , err. to_string ( ) ) ) ;
874
+ }
779
875
} ;
780
876
let futures = fragments
781
877
. iter ( )
@@ -854,6 +950,37 @@ impl LogService for LogServer {
854
950
Arc :: clone ( & storage) ,
855
951
source_prefix. clone ( ) ,
856
952
) ;
953
+ if let Err ( err) = log_reader. maximum_log_position ( ) . await {
954
+ match err {
955
+ wal3:: Error :: UninitializedLog => {
956
+ // NOTE(rescrv): Same as with ScoutLogs.
957
+ if self . should_initialize_log ( source_collection_id) {
958
+ LogWriter :: initialize (
959
+ & self . config . writer ,
960
+ & storage,
961
+ & source_prefix,
962
+ "fork logs initializer" ,
963
+ )
964
+ . await
965
+ . map_err ( |err| {
966
+ Status :: new (
967
+ err. code ( ) . into ( ) ,
968
+ format ! ( "Failed to initialize log for fork: {err:?}" ) ,
969
+ )
970
+ } ) ?;
971
+ return Box :: pin ( self . fork_logs ( Request :: new ( request) ) ) . await ;
972
+ } else {
973
+ return self . forward_fork_logs ( Request :: new ( request) ) . await ;
974
+ }
975
+ }
976
+ _ => {
977
+ return Err ( Status :: new (
978
+ err. code ( ) . into ( ) ,
979
+ format ! ( "Failed to load log: {}" , err) ,
980
+ ) ) ;
981
+ }
982
+ }
983
+ }
857
984
let cursors = CursorStore :: new (
858
985
CursorStoreOptions :: default ( ) ,
859
986
Arc :: clone ( & storage) ,
@@ -862,7 +989,6 @@ impl LogService for LogServer {
862
989
) ;
863
990
let cursor_name = & COMPACTION ;
864
991
let witness = cursors. load ( cursor_name) . await . map_err ( |err| {
865
- tracing:: info!( "FINDME" ) ;
866
992
Status :: new ( err. code ( ) . into ( ) , format ! ( "Failed to load cursor: {}" , err) )
867
993
} ) ?;
868
994
// This is the existing compaction_offset, which is the last record that was compacted.
@@ -879,7 +1005,6 @@ impl LogService for LogServer {
879
1005
)
880
1006
. await
881
1007
. map_err ( |err| {
882
- tracing:: info!( "FINDME" ) ;
883
1008
Status :: new ( err. code ( ) . into ( ) , format ! ( "Failed to copy log: {}" , err) )
884
1009
} ) ?;
885
1010
let log_reader = LogReader :: new (
@@ -889,11 +1014,9 @@ impl LogService for LogServer {
889
1014
) ;
890
1015
// This is the next record to insert, so we'll have to adjust downwards.
891
1016
let max_offset = log_reader. maximum_log_position ( ) . await . map_err ( |err| {
892
- tracing:: info!( "FINDME" ) ;
893
1017
Status :: new ( err. code ( ) . into ( ) , format ! ( "Failed to copy log: {}" , err) )
894
1018
} ) ?;
895
1019
if max_offset < offset {
896
- tracing:: info!( "FINDME" ) ;
897
1020
return Err ( Status :: new (
898
1021
chroma_error:: ErrorCodes :: Internal . into ( ) ,
899
1022
format ! ( "max_offset={:?} < offset={:?}" , max_offset, offset) ,
@@ -1074,6 +1197,26 @@ impl LogService for LogServer {
1074
1197
request. log_offset
1075
1198
) ;
1076
1199
let storage_prefix = storage_prefix_for_log ( collection_id) ;
1200
+ let log_reader = LogReader :: new (
1201
+ self . config . reader . clone ( ) ,
1202
+ Arc :: clone ( & self . storage ) ,
1203
+ storage_prefix. clone ( ) ,
1204
+ ) ;
1205
+
1206
+ let res = log_reader. maximum_log_position ( ) . await ;
1207
+ if let Err ( wal3:: Error :: UninitializedLog ) = res {
1208
+ if self . should_initialize_log ( collection_id) {
1209
+ return Err ( Status :: failed_precondition (
1210
+ "uninitialized log has its cursor updated" ,
1211
+ ) ) ;
1212
+ } else {
1213
+ return self
1214
+ . forward_update_collection_log_offset ( Request :: new ( request) )
1215
+ . await ;
1216
+ }
1217
+ }
1218
+ res. map_err ( |err| Status :: unknown ( err. to_string ( ) ) ) ?;
1219
+
1077
1220
let cursor_name = & COMPACTION ;
1078
1221
let cursor_store = CursorStore :: new (
1079
1222
CursorStoreOptions :: default ( ) ,
@@ -1416,6 +1559,8 @@ pub struct LogServerConfig {
1416
1559
pub reinsert_threshold : u64 ,
1417
1560
#[ serde( default = "LogServerConfig::default_timeout_us" ) ]
1418
1561
pub timeout_us : u64 ,
1562
+ #[ serde( default ) ]
1563
+ pub proxy_to : Option < GrpcLogConfig > ,
1419
1564
}
1420
1565
1421
1566
impl LogServerConfig {
@@ -1447,6 +1592,7 @@ impl Default for LogServerConfig {
1447
1592
record_count_threshold : Self :: default_record_count_threshold ( ) ,
1448
1593
reinsert_threshold : Self :: default_reinsert_threshold ( ) ,
1449
1594
timeout_us : Self :: default_timeout_us ( ) ,
1595
+ proxy_to : None ,
1450
1596
}
1451
1597
}
1452
1598
}
@@ -1484,13 +1630,24 @@ impl Configurable<LogServerConfig> for LogServer {
1484
1630
. await
1485
1631
. map_err ( |err| -> Box < dyn ChromaError > { Box :: new ( err) as _ } ) ?;
1486
1632
let dirty_log = Arc :: new ( dirty_log) ;
1633
+ let proxy = if let Some ( proxy_to) = config. proxy_to . as_ref ( ) {
1634
+ match GrpcLog :: primary_client_from_config ( proxy_to) . await {
1635
+ Ok ( log) => Some ( log) ,
1636
+ Err ( err) => {
1637
+ return Err ( err) ;
1638
+ }
1639
+ }
1640
+ } else {
1641
+ None
1642
+ } ;
1487
1643
let compacting = tokio:: sync:: Mutex :: new ( ( ) ) ;
1488
1644
let metrics = Metrics :: new ( opentelemetry:: global:: meter ( "chroma" ) ) ;
1489
1645
Ok ( Self {
1490
1646
config : config. clone ( ) ,
1491
1647
open_logs : Arc :: new ( StateHashTable :: default ( ) ) ,
1492
1648
storage,
1493
1649
dirty_log,
1650
+ proxy,
1494
1651
compacting,
1495
1652
cache,
1496
1653
metrics,
0 commit comments