Skip to content

Commit 81d04db

Browse files
rkuo-danswerRichard Kuo (Onyx)
and
Richard Kuo (Onyx)
authored
Feature/request id middleware 2 (#4427)
* stubbing out request id * passthru or create request id's in api and model server * add onyx request id * get request id logging into uvicorn * no logs * change prefixes * fix comment * docker image needs specific shared files --------- Co-authored-by: Richard Kuo (Onyx) <[email protected]>
1 parent d50a17d commit 81d04db

File tree

8 files changed

+149
-10
lines changed

8 files changed

+149
-10
lines changed

backend/Dockerfile.model_server

+1
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ WORKDIR /app
4646

4747
# Utils used by model server
4848
COPY ./onyx/utils/logger.py /app/onyx/utils/logger.py
49+
COPY ./onyx/utils/middleware.py /app/onyx/utils/middleware.py
4950

5051
# Place to fetch version information
5152
COPY ./onyx/__init__.py /app/onyx/__init__.py

backend/model_server/main.py

+15
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import logging
12
import os
23
import shutil
34
from collections.abc import AsyncGenerator
@@ -20,6 +21,8 @@
2021
from model_server.utils import get_gpu_type
2122
from onyx import __version__
2223
from onyx.utils.logger import setup_logger
24+
from onyx.utils.logger import setup_uvicorn_logger
25+
from onyx.utils.middleware import add_onyx_request_id_middleware
2326
from shared_configs.configs import INDEXING_ONLY
2427
from shared_configs.configs import MIN_THREADS_ML_MODELS
2528
from shared_configs.configs import MODEL_SERVER_ALLOWED_HOST
@@ -36,6 +39,12 @@
3639

3740
logger = setup_logger()
3841

42+
file_handlers = [
43+
h for h in logger.logger.handlers if isinstance(h, logging.FileHandler)
44+
]
45+
46+
setup_uvicorn_logger(shared_file_handlers=file_handlers)
47+
3948

4049
def _move_files_recursively(source: Path, dest: Path, overwrite: bool = False) -> None:
4150
"""
@@ -112,6 +121,12 @@ def get_model_app() -> FastAPI:
112121
application.include_router(encoders_router)
113122
application.include_router(custom_models_router)
114123

124+
request_id_prefix = "INF"
125+
if INDEXING_ONLY:
126+
request_id_prefix = "IDX"
127+
128+
add_onyx_request_id_middleware(application, request_id_prefix, logger)
129+
115130
return application
116131

117132

backend/onyx/main.py

+11
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import logging
12
import sys
23
import traceback
34
from collections.abc import AsyncGenerator
@@ -102,6 +103,8 @@
102103
from onyx.setup import setup_multitenant_onyx
103104
from onyx.setup import setup_onyx
104105
from onyx.utils.logger import setup_logger
106+
from onyx.utils.logger import setup_uvicorn_logger
107+
from onyx.utils.middleware import add_onyx_request_id_middleware
105108
from onyx.utils.telemetry import get_or_generate_uuid
106109
from onyx.utils.telemetry import optional_telemetry
107110
from onyx.utils.telemetry import RecordType
@@ -116,6 +119,12 @@
116119

117120
logger = setup_logger()
118121

122+
file_handlers = [
123+
h for h in logger.logger.handlers if isinstance(h, logging.FileHandler)
124+
]
125+
126+
setup_uvicorn_logger(shared_file_handlers=file_handlers)
127+
119128

120129
def validation_exception_handler(request: Request, exc: Exception) -> JSONResponse:
121130
if not isinstance(exc, RequestValidationError):
@@ -421,6 +430,8 @@ def get_application() -> FastAPI:
421430
if LOG_ENDPOINT_LATENCY:
422431
add_latency_logging_middleware(application, logger)
423432

433+
add_onyx_request_id_middleware(application, "API", logger)
434+
424435
# Ensure all routes have auth enabled or are explicitly marked as public
425436
check_router_auth(application)
426437

backend/onyx/utils/logger.py

+49-9
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from shared_configs.configs import SLACK_CHANNEL_ID
1414
from shared_configs.configs import TENANT_ID_PREFIX
1515
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
16+
from shared_configs.contextvars import ONYX_REQUEST_ID_CONTEXTVAR
1617

1718

1819
logging.addLevelName(logging.INFO + 5, "NOTICE")
@@ -71,6 +72,14 @@ def get_log_level_from_str(log_level_str: str = LOG_LEVEL) -> int:
7172
return log_level_dict.get(log_level_str.upper(), logging.getLevelName("NOTICE"))
7273

7374

75+
class OnyxRequestIDFilter(logging.Filter):
76+
def filter(self, record: logging.LogRecord) -> bool:
77+
from shared_configs.contextvars import ONYX_REQUEST_ID_CONTEXTVAR
78+
79+
record.request_id = ONYX_REQUEST_ID_CONTEXTVAR.get() or "-"
80+
return True
81+
82+
7483
class OnyxLoggingAdapter(logging.LoggerAdapter):
7584
def process(
7685
self, msg: str, kwargs: MutableMapping[str, Any]
@@ -103,6 +112,7 @@ def process(
103112
msg = f"[CC Pair: {cc_pair_id}] {msg}"
104113

105114
break
115+
106116
# Add tenant information if it differs from default
107117
# This will always be the case for authenticated API requests
108118
if MULTI_TENANT:
@@ -115,6 +125,11 @@ def process(
115125
)
116126
msg = f"[t:{short_tenant}] {msg}"
117127

128+
# request id within a fastapi route
129+
fastapi_request_id = ONYX_REQUEST_ID_CONTEXTVAR.get()
130+
if fastapi_request_id:
131+
msg = f"[{fastapi_request_id}] {msg}"
132+
118133
# For Slack Bot, logs the channel relevant to the request
119134
channel_id = self.extra.get(SLACK_CHANNEL_ID) if self.extra else None
120135
if channel_id:
@@ -165,6 +180,14 @@ def format(self, record: logging.LogRecord) -> str:
165180
return super().format(record)
166181

167182

183+
def get_uvicorn_standard_formatter() -> ColoredFormatter:
184+
"""Returns a standard colored logging formatter."""
185+
return ColoredFormatter(
186+
"%(asctime)s %(filename)30s %(lineno)4s: [%(request_id)s] %(message)s",
187+
datefmt="%m/%d/%Y %I:%M:%S %p",
188+
)
189+
190+
168191
def get_standard_formatter() -> ColoredFormatter:
169192
"""Returns a standard colored logging formatter."""
170193
return ColoredFormatter(
@@ -201,12 +224,6 @@ def setup_logger(
201224

202225
logger.addHandler(handler)
203226

204-
uvicorn_logger = logging.getLogger("uvicorn.access")
205-
if uvicorn_logger:
206-
uvicorn_logger.handlers = []
207-
uvicorn_logger.addHandler(handler)
208-
uvicorn_logger.setLevel(log_level)
209-
210227
is_containerized = is_running_in_container()
211228
if LOG_FILE_NAME and (is_containerized or DEV_LOGGING_ENABLED):
212229
log_levels = ["debug", "info", "notice"]
@@ -225,14 +242,37 @@ def setup_logger(
225242
file_handler.setFormatter(formatter)
226243
logger.addHandler(file_handler)
227244

228-
if uvicorn_logger:
229-
uvicorn_logger.addHandler(file_handler)
230-
231245
logger.notice = lambda msg, *args, **kwargs: logger.log(logging.getLevelName("NOTICE"), msg, *args, **kwargs) # type: ignore
232246

233247
return OnyxLoggingAdapter(logger, extra=extra)
234248

235249

250+
def setup_uvicorn_logger(
251+
log_level: int = get_log_level_from_str(),
252+
shared_file_handlers: list[logging.FileHandler] | None = None,
253+
) -> None:
254+
uvicorn_logger = logging.getLogger("uvicorn.access")
255+
if not uvicorn_logger:
256+
return
257+
258+
formatter = get_uvicorn_standard_formatter()
259+
260+
handler = logging.StreamHandler()
261+
handler.setLevel(log_level)
262+
handler.setFormatter(formatter)
263+
264+
uvicorn_logger.handlers = []
265+
uvicorn_logger.addHandler(handler)
266+
uvicorn_logger.setLevel(log_level)
267+
uvicorn_logger.addFilter(OnyxRequestIDFilter())
268+
269+
if shared_file_handlers:
270+
for fh in shared_file_handlers:
271+
uvicorn_logger.addHandler(fh)
272+
273+
return
274+
275+
236276
def print_loggers() -> None:
237277
"""Print information about all loggers. Use to debug logging issues."""
238278
root_logger = logging.getLogger()

backend/onyx/utils/middleware.py

+62
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
import base64
2+
import hashlib
3+
import logging
4+
import uuid
5+
from collections.abc import Awaitable
6+
from collections.abc import Callable
7+
from datetime import datetime
8+
from datetime import timezone
9+
10+
from fastapi import FastAPI
11+
from fastapi import Request
12+
from fastapi import Response
13+
14+
from shared_configs.contextvars import ONYX_REQUEST_ID_CONTEXTVAR
15+
16+
17+
def add_onyx_request_id_middleware(
18+
app: FastAPI, prefix: str, logger: logging.LoggerAdapter
19+
) -> None:
20+
@app.middleware("http")
21+
async def set_request_id(
22+
request: Request, call_next: Callable[[Request], Awaitable[Response]]
23+
) -> Response:
24+
"""Generate a request hash that can be used to track the lifecycle
25+
of a request. The hash is prefixed to help indicated where the request id
26+
originated.
27+
28+
Format is f"{PREFIX}:{ID}" where PREFIX is 3 chars and ID is 8 chars.
29+
Total length is 12 chars.
30+
"""
31+
32+
onyx_request_id = request.headers.get("X-Onyx-Request-ID")
33+
if not onyx_request_id:
34+
onyx_request_id = make_randomized_onyx_request_id(prefix)
35+
36+
ONYX_REQUEST_ID_CONTEXTVAR.set(onyx_request_id)
37+
return await call_next(request)
38+
39+
40+
def make_randomized_onyx_request_id(prefix: str) -> str:
41+
"""generates a randomized request id"""
42+
43+
hash_input = str(uuid.uuid4())
44+
return _make_onyx_request_id(prefix, hash_input)
45+
46+
47+
def make_structured_onyx_request_id(prefix: str, request_url: str) -> str:
48+
"""Not used yet, but could be in the future!"""
49+
hash_input = f"{request_url}:{datetime.now(timezone.utc)}"
50+
return _make_onyx_request_id(prefix, hash_input)
51+
52+
53+
def _make_onyx_request_id(prefix: str, hash_input: str) -> str:
54+
"""helper function to return an id given a string input"""
55+
hash_obj = hashlib.md5(hash_input.encode("utf-8"))
56+
hash_bytes = hash_obj.digest()[:6] # Truncate to 6 bytes
57+
58+
# 6 bytes becomes 8 bytes. we shouldn't need to strip but just in case
59+
# NOTE: possible we'll want more input bytes if id's aren't unique enough
60+
hash_str = base64.urlsafe_b64encode(hash_bytes).decode("utf-8").rstrip("=")
61+
onyx_request_id = f"{prefix}:{hash_str}"
62+
return onyx_request_id

backend/requirements/default.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -95,4 +95,4 @@ urllib3==2.2.3
9595
mistune==0.8.4
9696
sentry-sdk==2.14.0
9797
prometheus_client==0.21.0
98-
fastapi-limiter==0.1.6
98+
fastapi-limiter==0.1.6

backend/shared_configs/configs.py

+1
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858

5959
# The process needs to have this for the log file to write to
6060
# otherwise, it will not create additional log files
61+
# This should just be the filename base without extension or path.
6162
LOG_FILE_NAME = os.environ.get("LOG_FILE_NAME") or "onyx"
6263

6364
# Enable generating persistent log files for local dev environments

backend/shared_configs/contextvars.py

+9
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,15 @@
1111
"current_tenant_id", default=None if MULTI_TENANT else POSTGRES_DEFAULT_SCHEMA
1212
)
1313

14+
# set by every route in the API server
15+
INDEXING_REQUEST_ID_CONTEXTVAR: contextvars.ContextVar[
16+
str | None
17+
] = contextvars.ContextVar("indexing_request_id", default=None)
18+
19+
# set by every route in the API server
20+
ONYX_REQUEST_ID_CONTEXTVAR: contextvars.ContextVar[str | None] = contextvars.ContextVar(
21+
"onyx_request_id", default=None
22+
)
1423

1524
"""Utils related to contextvars"""
1625

0 commit comments

Comments
 (0)