Skip to content

Commit a826fe3

Browse files
lwshangclaude
andcommitted
chore: remove machine_id from telemetry, document batch/sequence fields
Replace the persistent machine_id (which enabled cross-session tracking) with the already-existing batch UUID that only groups records within a single transmission. This is a better privacy/utility balance. Also add batch and sequence to the "What is collected" table so it serves as a complete schema reference. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 463b69e commit a826fe3

File tree

3 files changed

+4
-74
lines changed

3 files changed

+4
-74
lines changed

crates/icp-cli/src/telemetry.rs

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ use time::OffsetDateTime;
2020
use crate::version::icp_cli_version_str;
2121

2222
const EVENTS_FILE: &str = "events.jsonl";
23-
const MACHINE_ID_FILE: &str = "machine-id";
2423
const NOTICE_SHOWN_FILE: &str = "notice-shown";
2524
const NEXT_SEND_TIME_FILE: &str = "next-send-time";
2625

@@ -72,7 +71,6 @@ pub(crate) struct Argument {
7271
#[derive(Debug, Serialize, Deserialize)]
7372
pub(crate) struct TelemetryRecord {
7473
// --- Metadata that is constant across all events on the same machine
75-
pub machine_id: String,
7674
pub platform: String,
7775
pub arch: &'static str,
7876

@@ -132,13 +130,11 @@ impl TelemetrySession {
132130

133131
/// Finish the session, record the event, and trigger a send if needed.
134132
pub(crate) fn finish(self, success: bool, telemetry_data: &TelemetryData) {
135-
let machine_id = get_or_create_machine_id(&self.telemetry_dir);
136133
let duration_ms = self.start.elapsed().as_millis() as u64;
137134

138135
let date = OffsetDateTime::now_utc().date().to_string();
139136

140137
let record = TelemetryRecord {
141-
machine_id,
142138
platform: if cfg!(target_os = "linux") && std::env::var_os("WSL_DISTRO_NAME").is_some()
143139
{
144140
"wsl".to_string()
@@ -237,20 +233,6 @@ fn show_notice_if_needed(telemetry_dir: &Path) {
237233
let _ = std::fs::write(&marker, "");
238234
}
239235

240-
fn get_or_create_machine_id(telemetry_dir: &Path) -> String {
241-
let path = telemetry_dir.join(MACHINE_ID_FILE);
242-
if let Ok(id) = std::fs::read_to_string(&path) {
243-
let id = id.trim().to_string();
244-
if !id.is_empty() {
245-
return id;
246-
}
247-
}
248-
let id = uuid::Uuid::new_v4().to_string();
249-
let _ = std::fs::create_dir_all(telemetry_dir);
250-
let _ = std::fs::write(&path, &id);
251-
id
252-
}
253-
254236
fn append_record(telemetry_dir: &Path, record: &TelemetryRecord) {
255237
let Ok(line) = serde_json::to_string(record) else {
256238
return;

crates/icp-cli/tests/telemetry_tests.rs

Lines changed: 1 addition & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
//! 5. **No rotation when triggers not met** — events.jsonl kept intact
1111
//! 6. **Batch send** — `__telemetry-send-batch`: payload shape, silent failure, file cleanup
1212
//! 7. **Stale batch cleanup** — old/excess batch files pruned when a send is triggered
13-
//! 8. **Machine-id persistence** — same UUID is reused across invocations
1413
//!
1514
//! Full-pipeline tests run `icp settings telemetry` (a fast, network-free
1615
//! command) with `ICP_HOME` set to a known temp path and all opt-out env vars
@@ -29,7 +28,7 @@ mod common;
2928
use common::TestContext;
3029

3130
/// A minimal, syntactically-valid NDJSON telemetry record.
32-
const FAKE_RECORD: &str = r#"{"machine_id":"test-machine","platform":"test","arch":"x86_64","version":"0.0.0","command":"version","arguments":[],"success":true,"duration_ms":42}"#;
31+
const FAKE_RECORD: &str = r#"{"platform":"test","arch":"x86_64","version":"0.0.0","command":"version","arguments":[],"success":true,"duration_ms":42}"#;
3332

3433
/// A timestamp guaranteed to be far in the future (~year 2286).
3534
/// Written to `next-send-time` to prevent the time-based send trigger from
@@ -175,13 +174,6 @@ fn telemetry_record_appended_to_events_file() {
175174
let record: Value = serde_json::from_str(first_line).expect("record must be valid JSON");
176175

177176
// Required fields
178-
assert!(
179-
record["machine_id"]
180-
.as_str()
181-
.map(|s| !s.is_empty())
182-
.unwrap_or(false),
183-
"machine_id must be a non-empty string"
184-
);
185177
assert!(
186178
!record["platform"].as_str().unwrap_or("").is_empty(),
187179
"platform must be present"
@@ -381,7 +373,6 @@ fn telemetry_send_batch_delivers_data() {
381373
request::body(matches("\"batch\"")),
382374
request::body(matches("\"sequence\"")),
383375
// Original fields must be preserved.
384-
request::body(matches("\"machine_id\":\"test-machine\"")),
385376
request::body(matches("\"command\":\"version\"")),
386377
])
387378
.times(1)
@@ -480,46 +471,3 @@ fn telemetry_excess_batches_pruned_on_trigger() {
480471
"batch count must be pruned to ≤10; found {remaining}"
481472
);
482473
}
483-
484-
/// The same `machine_id` UUID must appear in all records produced by
485-
/// consecutive command invocations.
486-
#[test]
487-
fn telemetry_machine_id_persists_across_invocations() {
488-
let ctx = TestContext::new();
489-
let icp_home = ctx.home_path().join("icp-home");
490-
let telemetry_dir = icp_home.join("telemetry");
491-
492-
// Keep next-send-time in the future so events.jsonl is never rotated
493-
// and both records land in the same file.
494-
init_telemetry_dir(&telemetry_dir, Some(FAR_FUTURE_SECS));
495-
496-
for _ in 0..2 {
497-
ctx.icp()
498-
.env("ICP_HOME", icp_home.as_str())
499-
.env_remove("CI")
500-
.env_remove("DO_NOT_TRACK")
501-
.env_remove("ICP_TELEMETRY_DISABLED")
502-
.args(["settings", "telemetry"])
503-
.assert()
504-
.success();
505-
}
506-
507-
let contents = std::fs::read_to_string(telemetry_dir.join("events.jsonl")).unwrap();
508-
let ids: Vec<&str> = contents
509-
.lines()
510-
.filter_map(|l| serde_json::from_str::<Value>(l).ok())
511-
.filter_map(|v| {
512-
v["machine_id"]
513-
.as_str()
514-
.map(str::to_owned)
515-
.map(|s| Box::leak(s.into_boxed_str()) as &str)
516-
})
517-
.collect();
518-
519-
assert_eq!(ids.len(), 2, "expected 2 records");
520-
assert_eq!(
521-
ids[0], ids[1],
522-
"machine_id must be identical across invocations: got {:?} and {:?}",
523-
ids[0], ids[1]
524-
);
525-
}

docs/telemetry.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ Each command invocation produces a single telemetry record with the following fi
88

99
| Field | Example | Purpose |
1010
|---|---|---|
11-
| `machine_id` | `a1b2c3d4-...` | Count unique installations |
11+
| `batch` | `a1b2c3d4-...` | Group records from the same transmission; server-side deduplication |
12+
| `sequence` | `0`, `1`, `2` | Ordering of records within a batch |
1213
| `platform` | `macos`, `linux`, `windows`, `wsl` | Platform distribution |
1314
| `arch` | `aarch64`, `x86_64` | Architecture distribution |
1415
| `version` | `0.1.0` | Identify version adoption |
@@ -40,7 +41,7 @@ For example, `icp deploy --mode install --environment production` records:
4041
]
4142
```
4243

43-
The `machine_id` is a random UUID generated on first run and stored locally. It is used solely to count unique installations and is not linked to any user identity.
44+
The `batch` UUID is generated fresh each time records are transmitted and is not persisted across sends. Records within the same batch can be grouped, but there is no long-lived identifier that links activity across different transmissions.
4445

4546
Additional fields may be introduced in future versions. This page will be updated accordingly. The same privacy principles apply: no personally identifiable information, no project data.
4647

@@ -105,7 +106,6 @@ Runtime state and event data live in the `telemetry/` data directory. Each piece
105106

106107
```
107108
telemetry/
108-
machine-id # plain text UUID, generated on first run
109109
notice-shown # empty marker file, presence = notice was shown
110110
next-send-time # plain text UTC timestamp
111111
events.jsonl # active event log

0 commit comments

Comments
 (0)