-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathtelemetry.py
More file actions
188 lines (164 loc) · 6.2 KB
/
telemetry.py
File metadata and controls
188 lines (164 loc) · 6.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
from typing import Tuple
import time
import os
from opentelemetry import trace
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
from opentelemetry.instrumentation.logging import LoggingInstrumentor
from opentelemetry.sdk.resources import Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from prometheus_client import REGISTRY, Counter, Gauge, Histogram
from prometheus_client.openmetrics.exposition import (
CONTENT_TYPE_LATEST,
generate_latest,
)
from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
from starlette.requests import Request
from starlette.responses import Response
from starlette.routing import Match
from starlette.status import HTTP_500_INTERNAL_SERVER_ERROR
from starlette.types import ASGIApp
APP_NAME = os.getenv("APP_NAME")
ENABLE_TELEMETRY = os.getenv("ENABLE_TELEMETRY", "false") == "true"
INFO = Gauge("fastapi_app_info", "FastAPI application information.", ["app_name"])
REQUESTS = Counter(
"fastapi_requests_total",
"Total count of requests by method and path.",
["method", "path", "app_name"],
)
RESPONSES = Counter(
"fastapi_responses_total",
"Total count of responses by method, path and status codes.",
["method", "path", "status_code", "app_name"],
)
REQUESTS_PROCESSING_TIME = Histogram(
"fastapi_requests_duration_seconds",
"Histogram of requests processing time by path (in seconds)",
["method", "path", "app_name"],
)
EXCEPTIONS = Counter(
"fastapi_exceptions_total",
"Total count of exceptions raised by path and exception type",
["method", "path", "exception_type", "app_name"],
)
REQUESTS_IN_PROGRESS = Gauge(
"fastapi_requests_in_progress",
"Gauge of requests by method and path currently being processed",
["method", "path", "app_name"],
)
TASKS_IN_PROGRESS = Gauge(
"cognition_tasks_in_progress",
"Indicates if the task master thread is running (1) or not (0)",
["task_name", "app_name"],
)
TASKS_PROCESSED = Counter(
"cognition_task_processed_total",
"Total items processed by the task",
["task_name", "app_name"],
)
TASKS_ERRORS = Counter(
"cognition_task_errors_total",
"Total errors encountered by the task",
["task_name", "app_name"],
)
WEBSOCKET_EXTERNAL_SUCCESS = Counter(
"cognition_websocket_external_success_total",
"Total successful external websocket connections",
["app_name", "org_id", "project_id"],
)
WEBSOCKET_EXTERNAL_FAILURE = Counter(
"cognition_websocket_external_failure_total",
"Total failed external websocket connections",
["app_name", "org_id", "project_id"],
)
WEBSOCKET_INTERNAL_SUCCESS = Counter(
"cognition_websocket_internal_success_total",
"Total successful internal websocket connections",
["app_name", "org_id", "project_id"],
)
WEBSOCKET_INTERNAL_FAILURE = Counter(
"cognition_websocket_internal_failure_total",
"Total failed internal websocket connections",
["app_name", "org_id", "project_id"],
)
class PrometheusMiddleware(BaseHTTPMiddleware):
def __init__(self, app: ASGIApp, app_name: str = "fastapi-app") -> None:
super().__init__(app)
self.app_name = app_name
INFO.labels(app_name=self.app_name).inc()
async def dispatch(
self, request: Request, call_next: RequestResponseEndpoint
) -> Response:
method = request.method
path, is_handled_path = self.get_path(request)
if not is_handled_path:
return await call_next(request)
REQUESTS_IN_PROGRESS.labels(
method=method, path=path, app_name=self.app_name
).inc()
REQUESTS.labels(method=method, path=path, app_name=self.app_name).inc()
before_time = time.perf_counter()
try:
response = await call_next(request)
except BaseException as e:
status_code = HTTP_500_INTERNAL_SERVER_ERROR
EXCEPTIONS.labels(
method=method,
path=path,
exception_type=type(e).__name__,
app_name=self.app_name,
).inc()
raise e from None
else:
status_code = response.status_code
after_time = time.perf_counter()
# retrieve trace id for exemplar
span = trace.get_current_span()
trace_id = trace.format_trace_id(span.get_span_context().trace_id)
REQUESTS_PROCESSING_TIME.labels(
method=method, path=path, app_name=self.app_name
).observe(after_time - before_time, exemplar={"TraceID": trace_id})
finally:
RESPONSES.labels(
method=method,
path=path,
status_code=status_code,
app_name=self.app_name,
).inc()
REQUESTS_IN_PROGRESS.labels(
method=method, path=path, app_name=self.app_name
).dec()
return response
@staticmethod
def get_path(request: Request) -> Tuple[str, bool]:
for route in request.app.routes:
match, child_scope = route.matches(request.scope)
if match == Match.FULL:
return route.path, True
return request.url.path, False
def metrics(request: Request) -> Response:
return Response(
generate_latest(REGISTRY), headers={"Content-Type": CONTENT_TYPE_LATEST}
)
def setting_app_name(app_name: str) -> None:
global APP_NAME
if APP_NAME is None:
APP_NAME = app_name
def setting_otlp(
app: ASGIApp, app_name: str, endpoint: str, log_correlation: bool = True
) -> None:
# Setting OpenTelemetry
# set the service name to show in traces
resource = Resource.create(
attributes={"service.name": app_name, "compose_service": app_name}
)
# set the tracer provider
tracer = TracerProvider(resource=resource)
trace.set_tracer_provider(tracer)
tracer.add_span_processor(
BatchSpanProcessor(OTLPSpanExporter(endpoint=endpoint, insecure=True))
)
if log_correlation:
LoggingInstrumentor().instrument(set_logging_format=True)
FastAPIInstrumentor.instrument_app(app, tracer_provider=tracer)