Skip to content

Commit b719b6e

Browse files
authored
Mute chatty xet in migration service (#594)
The repo scanner migration services calls xet-core with tracing subscriber set to logging at `INFO` level. As a result, xet-core has been too chatty for repo scanner because we log at `INFO` level at many places as we wanted to log more info in hf-xet to help diagnose problems. This PR employs a build feature `elevated_information_level` which is enabled only for hf-xet, such that the information we want to log in hf-xet are still emitted at `INFO` level, but for others emitted at `DEBUG` level, so not to clutter the repo scanner log.
1 parent 74d7c59 commit b719b6e

File tree

6 files changed

+80
-27
lines changed

6 files changed

+80
-27
lines changed

cas_client/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ statrs = { workspace = true }
5252
[features]
5353
strict = []
5454
default = ["rustls-tls"]
55+
elevated_information_level = []
5556

5657
# Three options for compliation here.
5758
rustls-tls = [

cas_client/src/lib.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ pub use local_client::LocalClient;
66
#[cfg(not(target_family = "wasm"))]
77
pub use output_provider::*;
88
pub use remote_client::RemoteClient;
9+
use tracing::Level;
910

1011
pub use crate::error::CasClientError;
1112

@@ -23,3 +24,8 @@ mod output_provider;
2324
pub mod remote_client;
2425
pub mod retry_wrapper;
2526
pub mod upload_progress_stream;
27+
28+
#[cfg(not(feature = "elevated_information_level"))]
29+
pub const INFORMATION_LOG_LEVEL: Level = Level::DEBUG;
30+
#[cfg(feature = "elevated_information_level")]
31+
pub const INFORMATION_LOG_LEVEL: Level = Level::INFO;

cas_client/src/remote_client.rs

Lines changed: 63 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use reqwest_middleware::ClientWithMiddleware;
2323
use tokio::io::AsyncWriteExt;
2424
use tokio::sync::{OwnedSemaphorePermit, mpsc};
2525
use tokio::task::{JoinHandle, JoinSet};
26-
use tracing::{debug, info, instrument};
26+
use tracing::{debug, event, instrument};
2727
use utils::auth::AuthConfig;
2828
#[cfg(not(target_family = "wasm"))]
2929
use utils::singleflight::Group;
@@ -37,7 +37,7 @@ use crate::http_client::{Api, ResponseErrorLogger, RetryConfig};
3737
#[cfg(not(target_family = "wasm"))]
3838
use crate::output_provider::{SeekingOutputProvider, SequentialOutput};
3939
use crate::retry_wrapper::RetryWrapper;
40-
use crate::{Client, http_client};
40+
use crate::{Client, INFORMATION_LOG_LEVEL, http_client};
4141

4242
pub const CAS_ENDPOINT: &str = "http://localhost:8080";
4343
pub const PREFIX_DEFAULT: &str = "default";
@@ -71,7 +71,8 @@ pub(crate) async fn get_reconstruction_with_endpoint_and_client(
7171
) -> Result<Option<QueryReconstructionResponse>> {
7272
let call_id = FN_CALL_ID.fetch_add(1, Ordering::Relaxed);
7373
let url = Url::parse(&format!("{endpoint}/v1/reconstructions/{}", file_hash.hex()))?;
74-
info!(
74+
event!(
75+
INFORMATION_LOG_LEVEL,
7576
call_id,
7677
%file_hash,
7778
?byte_range,
@@ -99,14 +100,15 @@ pub(crate) async fn get_reconstruction_with_endpoint_and_client(
99100
};
100101

101102
let len = response.content_length();
102-
info!(%file_hash, len, "query_reconstruction");
103+
event!(INFORMATION_LOG_LEVEL, %file_hash, len, "query_reconstruction");
103104

104105
let query_reconstruction_response: QueryReconstructionResponse = response
105106
.json()
106107
.await
107108
.info_error_fn(|| format!("JSON parsing failed in get_reconstruction, call_id={}", call_id))?;
108109

109-
info!(
110+
event!(
111+
INFORMATION_LOG_LEVEL,
110112
call_id,
111113
%file_hash,
112114
?byte_range,
@@ -196,10 +198,10 @@ impl RemoteClient {
196198
// use disk cache if cache_config provided.
197199
let chunk_cache = if let Some(cache_config) = cache_config {
198200
if cache_config.cache_size == 0 {
199-
info!("Chunk cache size set to 0, disabling chunk cache");
201+
event!(INFORMATION_LOG_LEVEL, "Chunk cache size set to 0, disabling chunk cache");
200202
None
201203
} else {
202-
info!(cache.dir=?cache_config.cache_directory, cache.size=cache_config.cache_size,"Using disk cache");
204+
event!(INFORMATION_LOG_LEVEL, cache.dir=?cache_config.cache_directory, cache.size=cache_config.cache_size,"Using disk cache");
203205
chunk_cache::get_cache(cache_config)
204206
.log_error("failed to initialize cache, not using cache")
205207
.ok()
@@ -236,7 +238,8 @@ impl RemoteClient {
236238

237239
let call_id = FN_CALL_ID.fetch_add(1, Ordering::Relaxed);
238240
let url = Url::parse(&format!("{}/v1/chunks/{key}", self.endpoint))?;
239-
info!(
241+
event!(
242+
INFORMATION_LOG_LEVEL,
240243
call_id,
241244
prefix,
242245
%chunk_hash,
@@ -253,7 +256,8 @@ impl RemoteClient {
253256
.await;
254257

255258
if result.as_ref().is_err_and(|e| e.status().is_some()) {
256-
info!(
259+
event!(
260+
INFORMATION_LOG_LEVEL,
257261
call_id,
258262
prefix,
259263
%chunk_hash,
@@ -263,7 +267,8 @@ impl RemoteClient {
263267
return Ok(None);
264268
}
265269

266-
info!(
270+
event!(
271+
INFORMATION_LOG_LEVEL,
267272
call_id,
268273
prefix,
269274
%chunk_hash,
@@ -298,7 +303,7 @@ impl RemoteClient {
298303
let url: Url = url_str.parse()?;
299304

300305
let call_id = FN_CALL_ID.fetch_add(1, Ordering::Relaxed);
301-
info!(call_id, file_ids=?file_id_list, "Starting batch_get_reconstruction API call");
306+
event!(INFORMATION_LOG_LEVEL, call_id, file_ids=?file_id_list, "Starting batch_get_reconstruction API call");
302307

303308
let api_tag = "cas::batch_get_reconstruction";
304309
let client = self.authenticated_http_client.clone();
@@ -307,7 +312,8 @@ impl RemoteClient {
307312
.run_and_extract_json(move |_partial_report_fn| client.get(url.clone()).with_extension(Api(api_tag)).send())
308313
.await?;
309314

310-
info!(
315+
event!(
316+
INFORMATION_LOG_LEVEL,
311317
call_id,
312318
file_ids=?file_id_list,
313319
response_count=response.files.len(),
@@ -331,7 +337,8 @@ impl RemoteClient {
331337
progress_updater: Option<Arc<SingleItemProgressUpdater>>,
332338
) -> Result<u64> {
333339
let call_id = FN_CALL_ID.fetch_add(1, Ordering::Relaxed);
334-
info!(
340+
event!(
341+
INFORMATION_LOG_LEVEL,
335342
call_id,
336343
%file_hash,
337344
?byte_range,
@@ -371,7 +378,11 @@ impl RemoteClient {
371378
let download_concurrency_limiter =
372379
XetRuntime::current().global_semaphore(*DOWNLOAD_CHUNK_RANGE_CONCURRENCY_LIMITER);
373380

374-
info!(concurrency_limit = xet_config().client.num_concurrent_range_gets, "Starting segmented download");
381+
event!(
382+
INFORMATION_LOG_LEVEL,
383+
concurrency_limit = xet_config().client.num_concurrent_range_gets,
384+
"Starting segmented download"
385+
);
375386

376387
let queue_dispatcher: JoinHandle<Result<()>> = tokio::spawn(async move {
377388
let mut remaining_total_len = total_len;
@@ -484,7 +495,8 @@ impl RemoteClient {
484495

485496
queue_dispatcher.await??;
486497

487-
info!(
498+
event!(
499+
INFORMATION_LOG_LEVEL,
488500
call_id,
489501
%file_hash,
490502
?byte_range,
@@ -508,7 +520,8 @@ impl RemoteClient {
508520
progress_updater: Option<Arc<SingleItemProgressUpdater>>,
509521
) -> Result<u64> {
510522
let call_id = FN_CALL_ID.fetch_add(1, Ordering::Relaxed);
511-
info!(
523+
event!(
524+
INFORMATION_LOG_LEVEL,
512525
call_id,
513526
%file_hash,
514527
?byte_range,
@@ -634,7 +647,8 @@ impl RemoteClient {
634647
}
635648
}
636649

637-
info!(
650+
event!(
651+
INFORMATION_LOG_LEVEL,
638652
call_id,
639653
%file_hash,
640654
?byte_range,
@@ -695,7 +709,7 @@ impl Client for RemoteClient {
695709
) -> Result<Option<(MDBFileInfo, Option<MerkleHash>)>> {
696710
let call_id = FN_CALL_ID.fetch_add(1, Ordering::Relaxed);
697711
let url = Url::parse(&format!("{}/v1/reconstructions/{}", self.endpoint, file_hash.hex()))?;
698-
info!(call_id, %file_hash, "Starting get_file_reconstruction_info API call");
712+
event!(INFORMATION_LOG_LEVEL, call_id, %file_hash, "Starting get_file_reconstruction_info API call");
699713

700714
let api_tag = "cas::get_reconstruction_info";
701715
let client = self.authenticated_http_client.clone();
@@ -721,7 +735,7 @@ impl Client for RemoteClient {
721735
None,
722736
));
723737

724-
info!(call_id, %file_hash, terms_count, "Completed get_file_reconstruction_info API call");
738+
event!(INFORMATION_LOG_LEVEL, call_id, %file_hash, terms_count, "Completed get_file_reconstruction_info API call");
725739

726740
Ok(result)
727741
}
@@ -744,6 +758,10 @@ impl Client for RemoteClient {
744758
return Ok(true);
745759
}
746760

761+
let call_id = FN_CALL_ID.fetch_add(1, Ordering::Relaxed);
762+
let n_upload_bytes = shard_data.len();
763+
event!(INFORMATION_LOG_LEVEL, call_id, size = n_upload_bytes, "Starting upload_shard API call",);
764+
747765
let api_tag = "cas::upload_shard";
748766
let client = self.authenticated_http_client.clone();
749767

@@ -761,8 +779,26 @@ impl Client for RemoteClient {
761779
.await?;
762780

763781
match response.result {
764-
UploadShardResponseType::Exists => Ok(false),
765-
UploadShardResponseType::SyncPerformed => Ok(true),
782+
UploadShardResponseType::Exists => {
783+
event!(
784+
INFORMATION_LOG_LEVEL,
785+
call_id,
786+
size = n_upload_bytes,
787+
result = "exists",
788+
"Completed upload_shard API call",
789+
);
790+
Ok(false)
791+
},
792+
UploadShardResponseType::SyncPerformed => {
793+
event!(
794+
INFORMATION_LOG_LEVEL,
795+
call_id,
796+
size = n_upload_bytes,
797+
result = "sync performed",
798+
"Completed upload_shard API call",
799+
);
800+
Ok(true)
801+
},
766802
}
767803
}
768804

@@ -786,7 +822,8 @@ impl Client for RemoteClient {
786822
let url = Url::parse(&format!("{}/v1/xorbs/{key}", self.endpoint))?;
787823

788824
let n_upload_bytes = serialized_cas_object.serialized_data.len() as u64;
789-
info!(
825+
event!(
826+
INFORMATION_LOG_LEVEL,
790827
call_id,
791828
prefix,
792829
hash=%serialized_cas_object.hash,
@@ -856,15 +893,17 @@ impl Client for RemoteClient {
856893
};
857894

858895
if !xorb_uploaded {
859-
info!(
896+
event!(
897+
INFORMATION_LOG_LEVEL,
860898
call_id,
861899
prefix,
862900
hash=%serialized_cas_object.hash,
863901
result="not_inserted",
864902
"Completed upload_xorb API call",
865903
);
866904
} else {
867-
info!(
905+
event!(
906+
INFORMATION_LOG_LEVEL,
868907
call_id,
869908
prefix,
870909
hash=%serialized_cas_object.hash,

hf_xet/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,13 @@ signal-hook = "0.3"
4848
ctrlc = "3.4"
4949

5050
[features]
51-
default = ["no-default-cache"] # By default, hf_xet disables the disk cache.
51+
default = ["no-default-cache", "elevated_information_level"] # By default, hf_xet disables the disk cache and uses aggressive logging level
5252
native-tls = ["cas_client/native-tls-vendored"]
5353
native-tls-vendored = ["cas_client/native-tls-vendored"]
5454
no-default-cache = ["xet_config/no-default-cache"]
5555
profiling = ["pprof"]
5656
tokio-console = ["xet_logging/tokio-console"]
57+
elevated_information_level = ["cas_client/elevated_information_level", "utils/elevated_information_level"]
5758

5859
[profile.release]
5960
split-debuginfo = "packed"

utils/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,4 @@ futures-util = { workspace = true }
4747

4848
[features]
4949
strict = []
50+
elevated_information_level = []

utils/src/configuration_utils.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,14 @@
11
use std::str::FromStr;
22

3-
use tracing::{info, warn};
3+
use tracing::{Level, event, info, warn};
44

55
use crate::ByteSize;
66

7+
#[cfg(not(feature = "elevated_information_level"))]
8+
pub const INFORMATION_LOG_LEVEL: Level = Level::DEBUG;
9+
#[cfg(feature = "elevated_information_level")]
10+
pub const INFORMATION_LOG_LEVEL: Level = Level::INFO;
11+
712
/// A trait to control how a value is parsed from an environment string or other config source
813
/// if it's present.
914
///
@@ -29,7 +34,7 @@ pub trait ParsableConfigValue: std::fmt::Debug + Sized {
2934
},
3035
},
3136
None => {
32-
info!("Config: {variable_name} = {default:?} (default)");
37+
event!(INFORMATION_LOG_LEVEL, "Config: {variable_name} = {default:?} (default)");
3338
default
3439
},
3540
}

0 commit comments

Comments
 (0)