Skip to content

Commit 2f579c8

Browse files
lwshangclaude
andcommitted
chore: restore machine_id in telemetry
Re-add the persistent machine_id field that was removed in the previous commit. A per-installation UUID is standard practice for CLI telemetry (Next.js, .NET CLI, AWS CDK all do this) and enables cross-batch analysis like version upgrade tracking and unique installation counts. The batch/sequence documentation additions from the previous commit are preserved. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent a826fe3 commit 2f579c8

File tree

3 files changed

+76
-2
lines changed

3 files changed

+76
-2
lines changed

crates/icp-cli/src/telemetry.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ use time::OffsetDateTime;
2020
use crate::version::icp_cli_version_str;
2121

2222
const EVENTS_FILE: &str = "events.jsonl";
23+
const MACHINE_ID_FILE: &str = "machine-id";
2324
const NOTICE_SHOWN_FILE: &str = "notice-shown";
2425
const NEXT_SEND_TIME_FILE: &str = "next-send-time";
2526

@@ -71,6 +72,7 @@ pub(crate) struct Argument {
7172
#[derive(Debug, Serialize, Deserialize)]
7273
pub(crate) struct TelemetryRecord {
7374
// --- Metadata that is constant across all events on the same machine
75+
pub machine_id: String,
7476
pub platform: String,
7577
pub arch: &'static str,
7678

@@ -130,11 +132,13 @@ impl TelemetrySession {
130132

131133
/// Finish the session, record the event, and trigger a send if needed.
132134
pub(crate) fn finish(self, success: bool, telemetry_data: &TelemetryData) {
135+
let machine_id = get_or_create_machine_id(&self.telemetry_dir);
133136
let duration_ms = self.start.elapsed().as_millis() as u64;
134137

135138
let date = OffsetDateTime::now_utc().date().to_string();
136139

137140
let record = TelemetryRecord {
141+
machine_id,
138142
platform: if cfg!(target_os = "linux") && std::env::var_os("WSL_DISTRO_NAME").is_some()
139143
{
140144
"wsl".to_string()
@@ -233,6 +237,20 @@ fn show_notice_if_needed(telemetry_dir: &Path) {
233237
let _ = std::fs::write(&marker, "");
234238
}
235239

240+
fn get_or_create_machine_id(telemetry_dir: &Path) -> String {
241+
let path = telemetry_dir.join(MACHINE_ID_FILE);
242+
if let Ok(id) = std::fs::read_to_string(&path) {
243+
let id = id.trim().to_string();
244+
if !id.is_empty() {
245+
return id;
246+
}
247+
}
248+
let id = uuid::Uuid::new_v4().to_string();
249+
let _ = std::fs::create_dir_all(telemetry_dir);
250+
let _ = std::fs::write(&path, &id);
251+
id
252+
}
253+
236254
fn append_record(telemetry_dir: &Path, record: &TelemetryRecord) {
237255
let Ok(line) = serde_json::to_string(record) else {
238256
return;

crates/icp-cli/tests/telemetry_tests.rs

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
//! 5. **No rotation when triggers not met** — events.jsonl kept intact
1111
//! 6. **Batch send** — `__telemetry-send-batch`: payload shape, silent failure, file cleanup
1212
//! 7. **Stale batch cleanup** — old/excess batch files pruned when a send is triggered
13+
//! 8. **Machine-id persistence** — same UUID is reused across invocations
1314
//!
1415
//! Full-pipeline tests run `icp settings telemetry` (a fast, network-free
1516
//! command) with `ICP_HOME` set to a known temp path and all opt-out env vars
@@ -28,7 +29,7 @@ mod common;
2829
use common::TestContext;
2930

3031
/// A minimal, syntactically-valid NDJSON telemetry record.
31-
const FAKE_RECORD: &str = r#"{"platform":"test","arch":"x86_64","version":"0.0.0","command":"version","arguments":[],"success":true,"duration_ms":42}"#;
32+
const FAKE_RECORD: &str = r#"{"machine_id":"test-machine","platform":"test","arch":"x86_64","version":"0.0.0","command":"version","arguments":[],"success":true,"duration_ms":42}"#;
3233

3334
/// A timestamp guaranteed to be far in the future (~year 2286).
3435
/// Written to `next-send-time` to prevent the time-based send trigger from
@@ -174,6 +175,13 @@ fn telemetry_record_appended_to_events_file() {
174175
let record: Value = serde_json::from_str(first_line).expect("record must be valid JSON");
175176

176177
// Required fields
178+
assert!(
179+
record["machine_id"]
180+
.as_str()
181+
.map(|s| !s.is_empty())
182+
.unwrap_or(false),
183+
"machine_id must be a non-empty string"
184+
);
177185
assert!(
178186
!record["platform"].as_str().unwrap_or("").is_empty(),
179187
"platform must be present"
@@ -373,6 +381,7 @@ fn telemetry_send_batch_delivers_data() {
373381
request::body(matches("\"batch\"")),
374382
request::body(matches("\"sequence\"")),
375383
// Original fields must be preserved.
384+
request::body(matches("\"machine_id\":\"test-machine\"")),
376385
request::body(matches("\"command\":\"version\"")),
377386
])
378387
.times(1)
@@ -471,3 +480,46 @@ fn telemetry_excess_batches_pruned_on_trigger() {
471480
"batch count must be pruned to ≤10; found {remaining}"
472481
);
473482
}
483+
484+
/// The same `machine_id` UUID must appear in all records produced by
485+
/// consecutive command invocations.
486+
#[test]
487+
fn telemetry_machine_id_persists_across_invocations() {
488+
let ctx = TestContext::new();
489+
let icp_home = ctx.home_path().join("icp-home");
490+
let telemetry_dir = icp_home.join("telemetry");
491+
492+
// Keep next-send-time in the future so events.jsonl is never rotated
493+
// and both records land in the same file.
494+
init_telemetry_dir(&telemetry_dir, Some(FAR_FUTURE_SECS));
495+
496+
for _ in 0..2 {
497+
ctx.icp()
498+
.env("ICP_HOME", icp_home.as_str())
499+
.env_remove("CI")
500+
.env_remove("DO_NOT_TRACK")
501+
.env_remove("ICP_TELEMETRY_DISABLED")
502+
.args(["settings", "telemetry"])
503+
.assert()
504+
.success();
505+
}
506+
507+
let contents = std::fs::read_to_string(telemetry_dir.join("events.jsonl")).unwrap();
508+
let ids: Vec<&str> = contents
509+
.lines()
510+
.filter_map(|l| serde_json::from_str::<Value>(l).ok())
511+
.filter_map(|v| {
512+
v["machine_id"]
513+
.as_str()
514+
.map(str::to_owned)
515+
.map(|s| Box::leak(s.into_boxed_str()) as &str)
516+
})
517+
.collect();
518+
519+
assert_eq!(ids.len(), 2, "expected 2 records");
520+
assert_eq!(
521+
ids[0], ids[1],
522+
"machine_id must be identical across invocations: got {:?} and {:?}",
523+
ids[0], ids[1]
524+
);
525+
}

docs/telemetry.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ Each command invocation produces a single telemetry record with the following fi
1010
|---|---|---|
1111
| `batch` | `a1b2c3d4-...` | Group records from the same transmission; server-side deduplication |
1212
| `sequence` | `0`, `1`, `2` | Ordering of records within a batch |
13+
| `machine_id` | `a1b2c3d4-...` | Count unique installations |
1314
| `platform` | `macos`, `linux`, `windows`, `wsl` | Platform distribution |
1415
| `arch` | `aarch64`, `x86_64` | Architecture distribution |
1516
| `version` | `0.1.0` | Identify version adoption |
@@ -41,7 +42,9 @@ For example, `icp deploy --mode install --environment production` records:
4142
]
4243
```
4344

44-
The `batch` UUID is generated fresh each time records are transmitted and is not persisted across sends. Records within the same batch can be grouped, but there is no long-lived identifier that links activity across different transmissions.
45+
The `batch` UUID is generated fresh each time records are transmitted and is not persisted across sends. Records within the same batch can be grouped for server-side deduplication.
46+
47+
The `machine_id` is a random UUID generated on first run and stored locally. It is used solely to count unique installations and is not linked to any user identity.
4548

4649
Additional fields may be introduced in future versions. This page will be updated accordingly. The same privacy principles apply: no personally identifiable information, no project data.
4750

@@ -106,6 +109,7 @@ Runtime state and event data live in the `telemetry/` data directory. Each piece
106109

107110
```
108111
telemetry/
112+
machine-id # plain text UUID, generated on first run
109113
notice-shown # empty marker file, presence = notice was shown
110114
next-send-time # plain text UTC timestamp
111115
events.jsonl # active event log

0 commit comments

Comments
 (0)