Skip to content

Commit a483e07

Browse files
committed
feat: ops[tracing]
1 parent f57b2bb commit a483e07

22 files changed

+967
-17
lines changed

HACKING.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -496,7 +496,7 @@ The main improvements in this release are ...
496496
Read more in the [full release notes on GitHub](link to the GitHub release).
497497
```
498498

499-
In the post, outline the key improvements both in `ops` and `ops-scenario` -
499+
In the post, outline the key improvements both in `ops` and `ops-scenario` -
500500
the point here is to encourage people to check out the full notes and to upgrade
501501
promptly, so ensure that you entice them with the best that the new versions
502502
have to offer.

docs/requirements.txt

+12
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ click==8.1.8
2929
# via uvicorn
3030
colorama==0.4.6
3131
# via sphinx-autobuild
32+
deprecated==1.2.15
33+
# via opentelemetry-api
3234
docutils==0.21.2
3335
# via
3436
# canonical-sphinx-extensions
@@ -51,6 +53,10 @@ idna==3.10
5153
# requests
5254
imagesize==1.4.1
5355
# via sphinx
56+
importlib-metadata==8.5.0
57+
# via
58+
# opentelemetry-api
59+
# ops (pyproject.toml)
5460
jinja2==3.1.5
5561
# via
5662
# myst-parser
@@ -73,6 +79,8 @@ mdurl==0.1.2
7379
# via markdown-it-py
7480
myst-parser==4.0.0
7581
# via ops (pyproject.toml)
82+
opentelemetry-api==1.29.0
83+
# via ops (pyproject.toml)
7684
packaging==24.2
7785
# via sphinx
7886
pygments==2.19.1
@@ -165,4 +173,8 @@ websocket-client==1.8.0
165173
# via ops (pyproject.toml)
166174
websockets==14.1
167175
# via sphinx-autobuild
176+
wrapt==1.17.2
177+
# via deprecated
178+
zipp==3.21.0
179+
# via importlib-metadata
168180
./testing/

dont-merge/fake-charm.py

+61
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
#!/usr/bin/env python
2+
# Copyright 2025 Canonical Ltd.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
"""FIXME dummy_load docstring."""
16+
17+
from __future__ import annotations
18+
19+
import time
20+
21+
import opentelemetry.trace
22+
23+
import ops
24+
25+
tracer = opentelemetry.trace.get_tracer(__name__)
26+
27+
28+
class FakeCharm(ops.CharmBase):
29+
"""Dummy docstring."""
30+
31+
def __init__(self, framework: ops.Framework):
32+
"""Dummy docstring."""
33+
super().__init__(framework)
34+
self.framework.observe(self.on.start, self._on_start)
35+
self.framework.observe(self.on.collect_app_status, self._on_collect_app_status)
36+
self.framework.observe(self.on.collect_unit_status, self._on_collect_unit_status)
37+
38+
def _on_start(self, event: ops.StartEvent) -> None:
39+
"""Dummy docstring."""
40+
ops.configure_tracing_destination('http://localhost:4318/v1/traces')
41+
self.dummy_load(event, 0.0025)
42+
43+
def _on_collect_app_status(self, event: ops.CollectStatusEvent) -> None:
44+
"""Dummy docstring."""
45+
self.dummy_load(event)
46+
event.add_status(ops.ActiveStatus('app seems ready'))
47+
48+
def _on_collect_unit_status(self, event: ops.CollectStatusEvent) -> None:
49+
"""Dummy docstring."""
50+
self.dummy_load(event)
51+
event.add_status(ops.ActiveStatus('unit ready'))
52+
53+
@tracer.start_as_current_span('FakeCharm.dummy_load') # type: ignore
54+
def dummy_load(self, event: ops.EventBase, duration: float = 0.001) -> None:
55+
"""Dummy docstring."""
56+
print(event)
57+
time.sleep(duration)
58+
59+
60+
if __name__ == '__main__':
61+
ops.main(FakeCharm)

dont-merge/metadata.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
name: testmetest

dont-merge/otel-collector-config.yaml

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
receivers:
2+
otlp:
3+
protocols:
4+
grpc:
5+
endpoint: "[::]:4317"
6+
http:
7+
endpoint: "[::]:4318"
8+
9+
processors:
10+
batch:
11+
12+
exporters:
13+
debug:
14+
verbosity: detailed
15+
jaeger:
16+
endpoint: jaeger:14250
17+
tls:
18+
insecure: true
19+
20+
service:
21+
pipelines:
22+
traces:
23+
receivers: [otlp]
24+
processors: [batch]
25+
exporters: [debug]

dont-merge/readme.md

+114
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
### Usage
2+
3+
Recommended for traces of moderate and high complexity:
4+
5+
```command
6+
dima@colima-ahh /c/operator (feat-otel)> docker run --rm --name jaeger \
7+
-p 16686:16686 \
8+
-p 4317:4317 \
9+
-p 4318:4318 \
10+
-p 5778:5778 \
11+
-p 9411:9411 \
12+
jaegertracing/jaeger:2.2.0
13+
```
14+
15+
After which, you should be able to:
16+
- open http://192.168.107.4:16686/ in your browser
17+
- select the correct **Service** (`testapp-charm` at current branch state)
18+
- click Search at the bottom of the form
19+
20+
Note: the `jaeger` container keeps traces in memory, and your Service can't be selected
21+
until it has sent some data to `jaeger`.
22+
23+
Alternatively, text-based:
24+
25+
```command
26+
dima@colima-ahh /c/operator (feat-otel)> docker run -it --rm \
27+
-v (pwd)/dont-merge/otel-collector-config.yaml:/etc/otel-collector-config.yaml \
28+
-p 4317:4317 \
29+
-p 4318:4318 \
30+
otel/opentelemetry-collector:latest \
31+
--config=/etc/otel-collector-config.yaml
32+
```
33+
34+
and then
35+
36+
```command
37+
dima@colima-ahh /c/operator (feat-otel)> uv venv --seed .ahh-venv
38+
Using CPython 3.13.0
39+
Creating virtual environment with seed packages at: .ahh-venv
40+
41+
dima@colima-ahh /c/operator (feat-otel)> . .ahh-venv/bin/activate.fish
42+
(.ahh-venv) dima@colima-ahh /c/operator (feat-otel)>
43+
44+
(.ahh-venv) dima@colima-ahh /c/operator (feat-otel)> uv pip install -e .[tracing] -U
45+
Using Python 3.13.0 environment at .ahh-venv
46+
Resolved 21 packages in 907ms
47+
Prepared 18 packages in 72ms
48+
...
49+
50+
(.ahh-venv) dima@colima-ahh /c/operator (feat-otel)> python dont-merge/send-traces.py
51+
Span created and exported to the collector!
52+
```
53+
54+
### Hacking
55+
56+
Or, trying to run code outside of a charm.
57+
58+
Somehow I'm not getting anything, because the `juju-log` hook tool is missing.
59+
60+
Let's fix that.
61+
62+
```command
63+
> ln -s (which echo) juju-log
64+
```
65+
66+
Generate some tracing data:
67+
68+
```command
69+
(venv) > JUJU_UNIT_NAME=testapp/42 JUJU_CHARM_DIR=dont-merge/ PATH=$PATH:. JUJU_VERSION=3.5.4 ./dont-merge/start
70+
```
71+
72+
OTEL collector debug output would look like this:
73+
74+
```
75+
2025-01-15T08:46:23.229Z info Traces {"kind": "exporter", "data_type": "traces", "name": "debug", "resource spans": 1, "spans": 1}
76+
2025-01-15T08:46:23.229Z info ResourceSpans #0
77+
Resource SchemaURL:
78+
Resource attributes:
79+
-> telemetry.sdk.language: Str(python)
80+
-> telemetry.sdk.name: Str(opentelemetry)
81+
-> telemetry.sdk.version: Str(1.29.0)
82+
-> service.name: Str(testapp-charm)
83+
-> compose_service: Str(testapp-charm)
84+
-> charm_type: Str(CharmBase)
85+
-> juju_unit: Str(testapp/42)
86+
-> juju_application: Str(testapp)
87+
-> juju_model: Str()
88+
-> juju_model_uuid: Str()
89+
ScopeSpans #0
90+
ScopeSpans SchemaURL:
91+
InstrumentationScope ops
92+
Span #0
93+
Trace ID : 8c3f292c89f29c59f1b37fe59ba0abbc
94+
Parent ID :
95+
ID : e0253a03ef694a4f
96+
Name : ops.main
97+
Kind : Internal
98+
Start time : 2025-01-15 08:46:23.175916835 +0000 UTC
99+
End time : 2025-01-15 08:46:23.182329655 +0000 UTC
100+
Status code : Error
101+
Status message : RuntimeError: command not found: is-leader
102+
Events:
103+
SpanEvent #0
104+
-> Name: exception
105+
-> Timestamp: 2025-01-15 08:46:23.182316071 +0000 UTC
106+
-> DroppedAttributesCount: 0
107+
-> Attributes::
108+
-> exception.type: Str(RuntimeError)
109+
-> exception.message: Str(command not found: is-leader)
110+
-> exception.stacktrace: Str(Traceback (most recent call last):
111+
...
112+
-> exception.escaped: Str(False)
113+
{"kind": "exporter", "data_type": "traces", "name": "debug"}
114+
```

dont-merge/send-traces.py

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# Copyright 2025 Canonical Ltd.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
"""FIXME dummy docstring."""
15+
16+
from __future__ import annotations
17+
18+
import logging
19+
20+
import opentelemetry.trace
21+
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
22+
from opentelemetry.sdk.resources import Resource
23+
from opentelemetry.sdk.trace import TracerProvider
24+
from opentelemetry.sdk.trace.export import BatchSpanProcessor
25+
26+
# The default ProxyTracer allows tracers to be declared ahead of time like loggers
27+
logger = logging.getLogger(__name__)
28+
tracer = opentelemetry.trace.get_tracer(__name__)
29+
30+
# 1. Create a tracer provider with a "service.name" resource attribute
31+
opentelemetry.trace.set_tracer_provider(
32+
TracerProvider(resource=Resource.create({'service.name': 'example-service'}))
33+
)
34+
35+
# 2. Configure the OTLP HTTP exporter (defaults to protobuf format)
36+
otlp_exporter = OTLPSpanExporter(
37+
endpoint='http://localhost:4318/v1/traces'
38+
# If you needed headers or auth, you could add them like:
39+
# headers={"Authorization": "Bearer <TOKEN>"},
40+
)
41+
42+
# 3. Create a span processor (BatchSpanProcessor recommended for production)
43+
span_processor = BatchSpanProcessor(otlp_exporter)
44+
opentelemetry.trace.get_tracer_provider().add_span_processor(span_processor) # type: ignore
45+
46+
47+
@tracer.start_as_current_span('some label') # type: ignore
48+
def main(foo: int = 42):
49+
"""Do something."""
50+
# can't add attributes to a decorator, if needed use the below instead
51+
#
52+
# with tracer.start_as_current_span("some label") as span:
53+
# span.set_attribute('foo', 'bar')
54+
# span.add_event('sample_event', {'event_attr': 123})
55+
56+
logger.info('Span created and will be exported to the collector soon!')
57+
58+
59+
if __name__ == '__main__':
60+
logging.basicConfig(level='INFO')
61+
main()
62+
# from typing_extensions import reveal_type
63+
# reveal_type(main)

ops/__init__.py

+7
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@
5454
# that those symbols are part of the public API, so we have to add __all__.
5555
__all__ = [ # noqa: RUF022 `__all__` is not sorted
5656
'__version__',
57+
'configure_tracing_buffer',
58+
'configure_tracing_destination',
5759
'main',
5860
'pebble',
5961
# From charm.py
@@ -333,6 +335,11 @@
333335
# NOTE: don't import testing or Harness here, as that's a test-time concern
334336
# rather than a runtime concern.
335337

338+
from .tracing import (
339+
configure_tracing_buffer,
340+
configure_tracing_destination,
341+
)
342+
336343
from .version import version as __version__
337344

338345

ops/_main.py

+18-3
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,13 @@
2323
from pathlib import Path
2424
from typing import Any, Dict, List, Optional, Tuple, Type, Union, cast
2525

26+
import opentelemetry.trace
27+
2628
import ops.charm
2729
import ops.framework
2830
import ops.model
2931
import ops.storage
32+
import ops.tracing
3033
from ops.charm import CharmMeta
3134
from ops.jujucontext import _JujuContext
3235
from ops.log import setup_root_logging
@@ -35,6 +38,7 @@
3538

3639

3740
logger = logging.getLogger()
41+
tracer = opentelemetry.trace.get_tracer(__name__)
3842

3943

4044
def _exe_path(path: Path) -> Optional[Path]:
@@ -212,6 +216,8 @@ class _Dispatcher:
212216
213217
"""
214218

219+
event_name: str
220+
215221
def __init__(self, charm_dir: Path, juju_context: _JujuContext):
216222
self._juju_context = juju_context
217223
self._charm_dir = charm_dir
@@ -268,7 +274,9 @@ def run_any_legacy_hook(self):
268274
argv[0] = str(dispatch_path)
269275
logger.info('Running legacy %s.', self._dispatch_path)
270276
try:
271-
subprocess.run(argv, check=True)
277+
with tracer.start_as_current_span('ops.run_legacy_hook') as span: # type: ignore
278+
span.set_attribute('argv', ' '.join(argv)) # type: ignore
279+
subprocess.run(argv, check=True)
272280
except subprocess.CalledProcessError as e:
273281
logger.warning('Legacy %s exited with status %d.', self._dispatch_path, e.returncode)
274282
raise _Abort(e.returncode) from e
@@ -552,9 +560,16 @@ def main(charm_class: Type[ops.charm.CharmBase], use_juju_for_storage: Optional[
552560
553561
See `ops.main() <#ops-main-entry-point>`_ for details.
554562
"""
563+
ops.tracing.setup_tracing(charm_class.__name__)
564+
565+
# opentelemetry-api types are broken
566+
# https://github.com/open-telemetry/opentelemetry-python/issues/3836
555567
try:
556-
manager = _Manager(charm_class, use_juju_for_storage=use_juju_for_storage)
568+
with tracer.start_as_current_span('ops.main'): # type: ignore
569+
manager = _Manager(charm_class, use_juju_for_storage=use_juju_for_storage)
557570

558-
manager.run()
571+
manager.run()
559572
except _Abort as e:
560573
sys.exit(e.exit_code)
574+
finally:
575+
ops.tracing.shutdown_tracing()

0 commit comments

Comments
 (0)