-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Expand file tree
/
Copy pathtelemetry.py
More file actions
285 lines (224 loc) · 8.67 KB
/
telemetry.py
File metadata and controls
285 lines (224 loc) · 8.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
# Copyright 2025 Flower Labs GmbH. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Flower telemetry."""
import datetime
import json
import logging
import os
import platform
import urllib.request
import uuid
from concurrent.futures import Future, ThreadPoolExecutor
from enum import Enum, auto
from pathlib import Path
from typing import Any, Optional, Union, cast
from flwr.common.constant import FLWR_DIR
from flwr.common.version import package_name, package_version
FLWR_TELEMETRY_ENABLED = os.getenv("FLWR_TELEMETRY_ENABLED", "1")
FLWR_TELEMETRY_LOGGING = os.getenv("FLWR_TELEMETRY_LOGGING", "0")
TELEMETRY_EVENTS_URL = "https://telemetry.flower.ai/api/v1/event"
LOGGER_NAME = "flwr-telemetry"
LOGGER_LEVEL = logging.DEBUG
def _configure_logger(log_level: int) -> None:
console_handler = logging.StreamHandler()
console_handler.setLevel(log_level)
console_handler.setFormatter(
logging.Formatter(
"%(levelname)s %(name)s %(asctime)s | %(filename)s:%(lineno)d | %(message)s"
)
)
logger = logging.getLogger(LOGGER_NAME)
logger.setLevel(log_level)
logger.addHandler(console_handler)
_configure_logger(LOGGER_LEVEL)
def log(msg: Union[str, Exception]) -> None:
"""Log message using logger at DEBUG level."""
logging.getLogger(LOGGER_NAME).log(LOGGER_LEVEL, msg)
def _get_home() -> Path:
return Path().home()
def _get_partner_id() -> str:
"""Get partner ID."""
partner_id = os.getenv("FLWR_TELEMETRY_PARTNER_ID")
if not partner_id:
return "unavailable"
try:
uuid.UUID(partner_id)
except ValueError:
partner_id = "invalid"
return partner_id
def _get_source_id() -> str:
"""Get existing or new source ID."""
source_id = "unavailable"
# Check if .flwr in home exists
try:
home = _get_home()
except RuntimeError:
# If the home directory can’t be resolved, RuntimeError is raised.
return source_id
flwr_dir = home.joinpath(FLWR_DIR)
# Create .flwr directory if it does not exist yet.
try:
flwr_dir.mkdir(parents=True, exist_ok=True)
except PermissionError:
return source_id
source_file = flwr_dir.joinpath("source")
# If no source_file exists create one and write it
if not source_file.exists():
try:
source_file.touch(exist_ok=True)
source_file.write_text(str(uuid.uuid4()), encoding="utf-8")
except PermissionError:
return source_id
source_id = source_file.read_text(encoding="utf-8").strip()
try:
uuid.UUID(source_id)
except ValueError:
source_id = "invalid"
return source_id
# Using str as first base type to make it JSON serializable as
# otherwise the following exception will be thrown when serializing
# the event dict:
# TypeError: Object of type EventType is not JSON serializable
class EventType(str, Enum):
"""Types of telemetry events."""
# This method combined with auto() will set the property value to
# the property name e.g.
# `START_CLIENT = auto()` becomes `START_CLIENT = "START_CLIENT"`
# The type signature is not compatible with mypy and pylint
# so each of those needs to be disabled for this line.
# pylint: disable-next=no-self-argument,arguments-differ,line-too-long
def _generate_next_value_(name: str, start: int, count: int, last_values: list[Any]) -> Any: # type: ignore # noqa: E501
return name
# Ping
PING = auto()
# --- LEGACY FUNCTIONS -------------------------------------------------------------
# Legacy: `start_client` function
START_CLIENT_ENTER = auto()
START_CLIENT_LEAVE = auto()
# Legacy: `start_server` function
START_SERVER_ENTER = auto()
START_SERVER_LEAVE = auto()
# Legacy: `start_simulation` function
START_SIMULATION_ENTER = auto()
START_SIMULATION_LEAVE = auto()
# --- `flwr` CLI -------------------------------------------------------------------
# Not yet implemented
# --- `flwr-*` commands ------------------------------------------------------------
# CLI: flwr-simulation
FLWR_SIMULATION_RUN_ENTER = auto()
FLWR_SIMULATION_RUN_LEAVE = auto()
# CLI: flwr-serverapp
FLWR_SERVERAPP_RUN_ENTER = auto()
FLWR_SERVERAPP_RUN_LEAVE = auto()
# CLI: flwr-app-scheduler
FLWR_APP_SCHEDULER_RUN_ENTER = auto()
FLWR_APP_SCHEDULER_RUN_LEAVE = auto()
# --- Simulation Engine ------------------------------------------------------------
# CLI: flower-simulation
CLI_FLOWER_SIMULATION_ENTER = auto()
CLI_FLOWER_SIMULATION_LEAVE = auto()
# Python API: `run_simulation`
PYTHON_API_RUN_SIMULATION_ENTER = auto()
PYTHON_API_RUN_SIMULATION_LEAVE = auto()
# --- Deployment Engine ------------------------------------------------------------
# CLI: `flower-superlink`
RUN_SUPERLINK_ENTER = auto()
RUN_SUPERLINK_LEAVE = auto()
# CLI: `flower-supernode`
RUN_SUPERNODE_ENTER = auto()
RUN_SUPERNODE_LEAVE = auto()
# Use the ThreadPoolExecutor with max_workers=1 to have a queue
# and also ensure that telemetry calls are not blocking.
state: dict[str, Union[Optional[str], Optional[ThreadPoolExecutor]]] = {
# Will be assigned ThreadPoolExecutor(max_workers=1)
# in event() the first time it's required
"executor": None,
"source": None,
"cluster": None,
"partner": None,
}
def event(
event_type: EventType,
event_details: Optional[dict[str, Any]] = None,
) -> Future: # type: ignore
"""Submit create_event to ThreadPoolExecutor to avoid blocking."""
if state["executor"] is None:
state["executor"] = ThreadPoolExecutor(max_workers=1)
executor: ThreadPoolExecutor = cast(ThreadPoolExecutor, state["executor"])
result = executor.submit(create_event, event_type, event_details)
return result
def create_event(event_type: EventType, event_details: Optional[dict[str, Any]]) -> str:
"""Create telemetry event."""
if state["source"] is None:
state["source"] = _get_source_id()
if state["cluster"] is None:
state["cluster"] = str(uuid.uuid4())
if state["partner"] is None:
state["partner"] = _get_partner_id()
if event_details is None:
event_details = {}
date = datetime.datetime.now(tz=datetime.timezone.utc).isoformat()
context = {
"partner": state["partner"],
"source": state["source"],
"cluster": state["cluster"],
"date": date,
"flower": {
"package_name": package_name,
"package_version": package_version,
},
"hw": {
"cpu_count": os.cpu_count(),
},
"platform": {
"system": platform.system(),
"release": platform.release(),
"platform": platform.platform(),
"python_implementation": platform.python_implementation(),
"python_version": platform.python_version(),
"machine": platform.machine(),
"architecture": platform.architecture(),
"version": platform.uname().version,
},
}
payload = {
"event_type": event_type,
"event_details": event_details,
"context": context,
}
payload_json = json.dumps(payload)
if FLWR_TELEMETRY_LOGGING == "1":
log(" - ".join([date, "POST", payload_json]))
# If telemetry is not disabled with setting FLWR_TELEMETRY_ENABLED=0
# create a request and send it to the telemetry backend
if FLWR_TELEMETRY_ENABLED == "1":
request = urllib.request.Request(
url=TELEMETRY_EVENTS_URL,
data=payload_json.encode("utf-8"),
headers={
"User-Agent": f"{package_name}/{package_version}",
"Content-Type": "application/json",
},
method="POST",
)
try:
with urllib.request.urlopen(request, timeout=60) as response:
result = response.read()
response_json: str = result.decode("utf-8")
return response_json
except urllib.error.URLError as ex:
if FLWR_TELEMETRY_LOGGING == "1":
log(ex)
return "disabled"