Skip to content
66 changes: 45 additions & 21 deletions datadog-tracer-flare/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@
pub mod error;
pub mod zip;

use std::sync::atomic::{AtomicBool, Ordering};
use std::{fmt::Display, sync::Mutex};

use datadog_remote_config::{
config::agent_task::AgentTaskFile, file_storage::RawFile, RemoteConfigData,
};
use ddcommon::MutexExt;

#[cfg(feature = "listener")]
use {
Expand Down Expand Up @@ -42,7 +43,9 @@ use crate::error::FlareError;
///
/// - `agent_url`: The agent endpoint URL for flare transmission
/// - `language`: The tracer language identifier
/// - `collecting`: Current collection state (true when actively collecting)
/// - `current_log_level`: Log level at which we are collecting if we are collecting
/// - `original_log_level`: Log level of the tracers we need to restore once the Flare ends. Managed
/// inside the integrations, not here.
/// - `listener`: Optional remote config listener (requires "listener" feature)
///
/// # Typical usage flow
Expand All @@ -53,20 +56,22 @@ use crate::error::FlareError;
/// 3. Handle returned [`ReturnAction`]: `Send(agent_task)`, `Set(log_level)`, `Unset`, or `None`
/// 4. Use the `collecting` field to track current flare collection state
pub struct TracerFlareManager {
pub agent_url: String,
pub language: String,
pub collecting: AtomicBool,
agent_url: String,
language: String,
current_log_level: Mutex<Option<LogLevel>>,
pub original_log_level: Mutex<Option<LogLevel>>,
/// As a featured option so we can use the component with no Listener
#[cfg(feature = "listener")]
pub listener: Option<Listener>,
listener: Option<Listener>,
}

impl Default for TracerFlareManager {
fn default() -> Self {
TracerFlareManager {
agent_url: hyper::Uri::default().to_string(),
language: "rust".to_string(),
collecting: AtomicBool::new(false),
current_log_level: Mutex::new(None),
original_log_level: Mutex::new(None),
#[cfg(feature = "listener")]
listener: None,
}
Expand Down Expand Up @@ -182,14 +187,15 @@ impl TracerFlareManager {
data: &RemoteConfigData,
) -> Result<ReturnAction, FlareError> {
let action = data.try_into();
if let Ok(ReturnAction::Set(_)) = action {
if self.collecting.load(Ordering::Relaxed) {
if let Ok(ReturnAction::Set(log_level)) = action {
// If we are already collecting
if self.current_log_level.lock_or_panic().is_some() {
return Ok(ReturnAction::None);
}
self.collecting.store(true, Ordering::Relaxed);
*self.current_log_level.lock_or_panic() = Some(log_level);
} else if Ok(ReturnAction::None) != action {
// If action is Send, Unset or an error, we need to stop collecting
self.collecting.store(false, Ordering::Relaxed);
*self.current_log_level.lock_or_panic() = None;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you remove the current_log_level is the case of Send, you'll fall into the "Log level is not set" every time the zip_and_send function is called. With collecting only it was not a problem but with the log level you should split this case in two.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see what you mean, then maybe just putting getter/setters for the integration would work

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay I think the best way is indeed to just have both log_levels fields public, and have the collecting bool as before. That way we don't change anything to the ReturnAction we return, and the integrater can manage these to make it work out.

}
action
}
Expand All @@ -215,7 +221,7 @@ impl TracerFlareManager {
Ok(data) => self.handle_remote_config_data(data),
Err(e) => {
// If encounter an error we need to stop collecting
self.collecting.store(false, Ordering::Relaxed);
*self.current_log_level.lock_or_panic() = None;
Err(FlareError::ParsingError(e.to_string()))
}
}
Expand All @@ -224,7 +230,7 @@ impl TracerFlareManager {

/// Enum that holds the different log levels possible
/// Do not change the order of the variants because we rely on Ord
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
pub enum LogLevel {
Trace,
Debug,
Expand Down Expand Up @@ -285,6 +291,24 @@ impl ReturnAction {
}
}

impl Display for LogLevel {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{}",
match self {
LogLevel::Trace => String::from("trace"),
LogLevel::Debug => String::from("debug"),
LogLevel::Info => String::from("info"),
LogLevel::Warn => String::from("warn"),
LogLevel::Error => String::from("error"),
LogLevel::Critical => String::from("todo"),
LogLevel::Off => String::from("off"),
}
)
}
}

impl TryFrom<&str> for LogLevel {
type Error = FlareError;

Expand Down Expand Up @@ -456,10 +480,10 @@ pub async fn run_remote_config_listener(
}
}

if let ReturnAction::Set(_) = state {
tracer_flare.collecting.store(true, Ordering::Relaxed);
if let ReturnAction::Set(log_level) = state {
*tracer_flare.current_log_level.lock_or_panic() = Some(log_level);
} else if let ReturnAction::Send(_) = state {
tracer_flare.collecting.store(false, Ordering::Relaxed);
*tracer_flare.current_log_level.lock_or_panic() = None;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here you should not remove the current_log_level. It should be kept until the Send is done and then you can put it back as None.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup good point that's my bad

}

Ok(state)
Expand All @@ -477,7 +501,7 @@ mod tests {
file_storage::ParsedFileStorage,
RemoteConfigPath, RemoteConfigProduct, RemoteConfigSource,
};
use std::sync::atomic::Ordering;
use ddcommon::MutexExt;
use std::{num::NonZeroU64, sync::Arc};

#[test]
Expand Down Expand Up @@ -663,19 +687,19 @@ mod tests {
.unwrap();

// First AGENT_CONFIG
assert!(!tracer_flare.collecting.load(Ordering::Relaxed));
assert!(tracer_flare.current_log_level.lock_or_panic().is_none());
let result = tracer_flare
.handle_remote_config_file(agent_config_file.clone())
.unwrap();
assert_eq!(result, ReturnAction::Set(LogLevel::Info));
assert!(tracer_flare.collecting.load(Ordering::Relaxed));
assert!(*tracer_flare.current_log_level.lock_or_panic() == Some(LogLevel::Info));

// Second AGENT_CONFIG
let result = tracer_flare
.handle_remote_config_file(agent_config_file)
.unwrap();
assert_eq!(result, ReturnAction::None);
assert!(tracer_flare.collecting.load(Ordering::Relaxed));
assert!(*tracer_flare.current_log_level.lock_or_panic() == Some(LogLevel::Info));

// Non-None actions stop collecting
let error_file = storage
Expand All @@ -692,7 +716,7 @@ mod tests {
.unwrap();

let _ = tracer_flare.handle_remote_config_file(error_file);
assert!(!tracer_flare.collecting.load(Ordering::Relaxed));
assert!(tracer_flare.current_log_level.lock_or_panic().is_none());
}

#[test]
Expand Down
18 changes: 12 additions & 6 deletions datadog-tracer-flare/src/zip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// SPDX-License-Identifier: Apache-2.0

use datadog_remote_config::config::agent_task::AgentTaskFile;
use ddcommon::{hyper_migration, Endpoint};
use ddcommon::{hyper_migration, Endpoint, MutexExt};
use hyper::{body::Bytes, Method};
use std::{
collections::HashMap,
Expand All @@ -17,7 +17,7 @@ use tempfile::tempfile;
use walkdir::WalkDir;
use zip::{write::FileOptions, ZipWriter};

use crate::{error::FlareError, ReturnAction, TracerFlareManager};
use crate::{error::FlareError, LogLevel, ReturnAction, TracerFlareManager};

/// Adds a single file to the zip archive with the specified options and relative path
fn add_file_to_zip(
Expand Down Expand Up @@ -151,7 +151,7 @@ const BOUNDARY: &str = "83CAD6AA-8A24-462C-8B3D-FF9CC683B51B";
fn generate_payload(
mut zip: File,
language: &String,
log_level: &String,
log_level: &LogLevel,
case_id: &NonZeroU64,
hostname: &String,
user_handle: &String,
Expand Down Expand Up @@ -231,7 +231,7 @@ fn generate_payload(
/// - The agent returns a non-success HTTP status code
async fn send(
zip: File,
log_level: String,
log_level: LogLevel,
agent_task: AgentTaskFile,
tracer_flare: &TracerFlareManager,
) -> Result<(), FlareError> {
Expand Down Expand Up @@ -354,7 +354,7 @@ async fn send(
/// "/path/to/config.txt".to_string(),
/// ];
///
/// match zip_and_send(files, "debug".to_string(), &tracer_flare, send_action).await {
/// match zip_and_send(files, &tracer_flare, send_action).await {
/// Ok(_) => println!("Flare sent successfully"),
/// Err(e) => eprintln!("Failed to send flare: {}", e),
/// }
Expand All @@ -363,7 +363,6 @@ async fn send(
/// ```
pub async fn zip_and_send(
files: Vec<String>,
log_level: String,
tracer_flare: &TracerFlareManager,
send_action: ReturnAction,
) -> Result<(), FlareError> {
Expand All @@ -380,6 +379,13 @@ pub async fn zip_and_send(

// APMSP-2118 - TODO: Implement obfuscation of sensitive data

let log_level = tracer_flare
.current_log_level
.lock_or_panic()
.ok_or(FlareError::ZipError(String::from(
"Trying to send the flare when log_level is not set",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The case when you receive only an AGENT_TASK and thus only have to do a Send action is possible. In that case we still want to send the flare. So we either have the choice to use the tracer actual log_level or we do as Python and Java and putting by default the log_level which is Debug. I think putting Debug would be the best option but it's also more complicated to handle in a design way.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it complicates anything. Maybe we can make the field public so that the integrater can set it if it is not set correctly, but it should make things automagic most of the time

)))?;

send(zip, log_level, agent_task, tracer_flare).await
}

Expand Down
Loading