-
Notifications
You must be signed in to change notification settings - Fork 0
Ramp‐up load test 구 아키텍쳐(제한적 비동기) 신 아키텍쳐(동기) 신 아키텍쳐(완전 비동기) 비교 실험
OhJin-Soo edited this page Feb 11, 2026
·
1 revision
{
"annotations": { "list": [] },
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"links": [],
"liveNow": false,
"panels": [
{
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 },
"id": 100,
"panels": [],
"title": "1. User-perceived",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"description": "Time to First User Response P95 (LLM 첫 토큰까지 시간)",
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisPlacement": "auto", "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } },
"mappings": [],
"thresholds": { "mode": "absolute", "steps": [{"color": "green", "value": null}] },
"unit": "s"
},
"overrides": []
},
"gridPos": { "h": 6, "w": 12, "x": 0, "y": 1 },
"id": 1,
"options": { "legend": { "calcs": ["mean"], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } },
"targets": [
{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "histogram_quantile(0.95, sum by (le, pipeline, analysis_type) (rate(llm_ttft_seconds_bucket{job=\"fastapi\"}[5m])))", "legendFormat": "{{pipeline}} - {{analysis_type}}", "refId": "A" }
],
"title": "TTFUR P95",
"type": "timeseries"
},
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"description": "분석 요청 중 성공 비율 (success / total)",
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisPlacement": "auto", "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } },
"mappings": [],
"max": 1,
"min": 0,
"thresholds": { "mode": "absolute", "steps": [{"color": "green", "value": null}] },
"unit": "percentunit"
},
"overrides": []
},
"gridPos": { "h": 6, "w": 12, "x": 12, "y": 1 },
"id": 2,
"options": { "legend": { "calcs": ["mean"], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } },
"targets": [
{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "sum by (pipeline) (rate(analysis_requests_total{job=\"fastapi\",status=\"success\"}[5m])) / (sum by (pipeline) (rate(analysis_requests_total{job=\"fastapi\"}[5m])) + 1e-9)", "legendFormat": "{{pipeline}}", "refId": "A" }
],
"title": "Analysis Success Rate",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 7 },
"id": 101,
"panels": [],
"title": "2. E2E pipeline",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"description": "분석 파이프라인 완료 시간 P95",
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisPlacement": "auto", "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } },
"mappings": [],
"thresholds": { "mode": "absolute", "steps": [{"color": "green", "value": null}] },
"unit": "s"
},
"overrides": []
},
"gridPos": { "h": 6, "w": 12, "x": 0, "y": 8 },
"id": 3,
"options": { "legend": { "calcs": ["mean"], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } },
"targets": [
{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "histogram_quantile(0.95, sum by (le, pipeline, analysis_type) (rate(analysis_processing_time_seconds_bucket{job=\"fastapi\"}[5m])))", "legendFormat": "{{pipeline}} - {{analysis_type}}", "refId": "A" }
],
"title": "Completion time P95",
"type": "timeseries"
},
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"description": "초당 완료된 분석 작업 수 (성공 기준)",
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisPlacement": "auto", "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } },
"mappings": [],
"thresholds": { "mode": "absolute", "steps": [{"color": "green", "value": null}] },
"unit": "reqps"
},
"overrides": []
},
"gridPos": { "h": 6, "w": 12, "x": 12, "y": 8 },
"id": 4,
"options": { "legend": { "calcs": ["mean"], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } },
"targets": [
{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "sum by (pipeline, analysis_type) (rate(analysis_requests_total{job=\"fastapi\",status=\"success\"}[5m]))", "legendFormat": "{{pipeline}} - {{analysis_type}}", "refId": "A" }
],
"title": "Completed jobs/s",
"type": "timeseries"
},
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"description": "대기 중인 작업 수 (queue_depth 메트릭이 있을 때 표시)",
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisPlacement": "auto", "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } },
"mappings": [],
"thresholds": { "mode": "absolute", "steps": [{"color": "green", "value": null}] },
"unit": "short"
},
"overrides": []
},
"gridPos": { "h": 6, "w": 12, "x": 0, "y": 14 },
"id": 5,
"options": { "legend": { "calcs": ["mean"], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } },
"targets": [
{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "app_queue_depth{job=\"fastapi\"} or vector(0)", "legendFormat": "{{pipeline}} ({{instance}})", "refId": "A" }
],
"title": "Backlog / Lag (queue depth)",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 20 },
"id": 102,
"panels": [],
"title": "3. Resource & saturation",
"type": "row"
},
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"description": "API 프로세스 CPU 사용률 (fastapi)",
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisPlacement": "auto", "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } },
"mappings": [],
"max": 100,
"min": 0,
"thresholds": { "mode": "absolute", "steps": [{"color": "green", "value": null}] },
"unit": "percent"
},
"overrides": []
},
"gridPos": { "h": 6, "w": 8, "x": 0, "y": 21 },
"id": 6,
"options": { "legend": { "calcs": ["mean"], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } },
"targets": [
{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "rate(process_cpu_seconds_total{job=\"fastapi\"}[5m]) * 100", "legendFormat": "{{pipeline}} ({{instance}})", "refId": "A" }
],
"title": "CPU (API process)",
"type": "timeseries"
},
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"description": "API 프로세스 메모리 (RSS)",
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisPlacement": "auto", "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } },
"mappings": [],
"thresholds": { "mode": "absolute", "steps": [{"color": "green", "value": null}] },
"unit": "bytes"
},
"overrides": []
},
"gridPos": { "h": 6, "w": 8, "x": 8, "y": 21 },
"id": 7,
"options": { "legend": { "calcs": ["mean"], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } },
"targets": [
{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "process_resident_memory_bytes{job=\"fastapi\"}", "legendFormat": "{{pipeline}} ({{instance}})", "refId": "A" }
],
"title": "Mem (API process)",
"type": "timeseries"
},
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"description": "이벤트 루프 지연 (노드/API에서 수집 시 표시)",
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisPlacement": "auto", "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } },
"mappings": [],
"thresholds": { "mode": "absolute", "steps": [{"color": "green", "value": null}] },
"unit": "s"
},
"overrides": []
},
"gridPos": { "h": 6, "w": 8, "x": 16, "y": 21 },
"id": 8,
"options": { "legend": { "calcs": ["mean"], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } },
"targets": [
{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "event_loop_lag_seconds{job=\"fastapi\"} or vector(0)", "legendFormat": "{{pipeline}}", "refId": "A" }
],
"title": "Event loop lag",
"type": "timeseries"
},
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"description": "바쁜 워커 비율 (처리 중인 워커 수 / 전체 워커 수). sync/멀티워커 해석용.",
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisPlacement": "auto", "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } },
"mappings": [],
"max": 1,
"min": 0,
"thresholds": { "mode": "absolute", "steps": [{"color": "green", "value": null}] },
"unit": "percentunit"
},
"overrides": []
},
"gridPos": { "h": 6, "w": 12, "x": 0, "y": 27 },
"id": 9,
"options": { "legend": { "calcs": ["mean"], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } },
"targets": [
{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "sum by (pipeline) (app_worker_busy{job=\"fastapi\"}) / (count by (pipeline) (app_worker_busy{job=\"fastapi\"}) + 1e-9)", "legendFormat": "{{pipeline}}", "refId": "A" }
],
"title": "Worker utilization",
"type": "timeseries"
}
],
"refresh": "10s",
"schemaVersion": 38,
"style": "dark",
"tags": ["prometheus", "performance", "user-perceived", "e2e", "resource"],
"templating": { "list": [] },
"time": { "from": "now-1h", "to": "now" },
"timepicker": {},
"timezone": "browser",
"title": "Architecture Performance Verification",
"uid": "prometheus-overview",
"version": 2,
"weekStart": ""
}2026-02-08 15:33:56,714 - src.vector_search - WARNING - Point 5b7925a214ff6359ca4be3951f66fab6의 sentiment 업데이트 실패: '5b7925a214ff6359ca4be3951f66fab6'
2026-02-08 15:33:56,714 - src.vector_search - WARNING - Point 7f0257dfa7a42bb69f7a2672a04687c6의 sentiment 업데이트 실패: '7f0257dfa7a42bb69f7a2672a04687c6'
2026-02-08 15:33:56,714 - src.vector_search - WARNING - Point c6899e9a0a645170167c98fc89d275ae의 sentiment 업데이트 실패: 'c6899e9a0a645170167c98fc89d275ae'
2026-02-08 15:33:56,714 - src.vector_search - WARNING - Point 394a7d4c1ae06753ec83f97ea33e7fa0의 sentiment 업데이트 실패: '394a7d4c1ae06753ec83f97ea33e7fa0'
2026-02-08 15:33:56,714 - src.vector_search - WARNING - Point 37651d5c2705e7348ff5d4b6bd570c33의 sentiment 업데이트 실패: '37651d5c2705e7348ff5d4b6bd570c33'
2026-02-08 15:33:56,714 - src.vector_search - WARNING - Point 4099e91cfd53fcfbf9d98a7e81d9afd1의 sentiment 업데이트 실패: '4099e91cfd53fcfbf9d98a7e81d9afd1'
2026-02-08 15:33:56,714 - src.vector_search - WARNING - Point 4837bc8d138a92077e3dca48c69fb5b7의 sentiment 업데이트 실패: '4837bc8d138a92077e3dca48c69fb5b7'
2026-02-08 15:33:56,714 - src.vector_search - WARNING - Point 7fd03a93d9363c95a6b38f06490d57a2의 sentiment 업데이트 실패: '7fd03a93d9363c95a6b38f06490d57a2'
2026-02-08 15:33:56,714 - src.vector_search - WARNING - Point c6ccb4ab6b698996d4620c34cbf6ae3a의 sentiment 업데이트 실패: 'c6ccb4ab6b698996d4620c34cbf6ae3a'
2026-02-08 15:33:56,714 - src.vector_search - INFO - Qdrant sentiment 라벨 업데이트 완료: 0개 리뷰 (restaurant_id: 4)
2026-02-08 15:33:56,714 - src.sentiment_analysis - INFO - Qdrant에 0개 리뷰의 sentiment 라벨 저장 완료 (restaurant_id: 4)
2026-02-08 15:33:56,715 - src.sentiment_analysis - INFO - 총 100개의 리뷰를 sentiment 모델로 분류합니다 (restaurant_id: 5).2026-02-08 15:24:29,602 - httpx - INFO - HTTP Request: HEAD https://huggingface.co/Xenova/paraphrase-multilingual-mpnet-base-v2/resolve/e5d116277351513fd260955ece953ecddde7046e/tokenizer_config.json "HTTP/1.1 307 Temporary Redirect"
2026-02-08 15:24:29,630 - src.api.main - ERROR - Unhandled exception: [ONNXRuntimeError] : 3 : NO_SUCHFILE : Load model from /tmp/fastembed_cache/models--xenova--paraphrase-multilingual-mpnet-base-v2/snapshots/e5d116277351513fd260955ece953ecddde7046e/onnx/model.onnx failed:Load model /tmp/fastembed_cache/models--xenova--paraphrase-multilingual-mpnet-base-v2/snapshots/e5d116277351513fd260955ece953ecddde7046e/onnx/model.onnx failed. File doesn't exist
+ Exception Group Traceback (most recent call last):
| File "/usr/local/lib/python3.11/site-packages/starlette/_utils.py", line 81, in collapse_excgroups
| yield
| File "/usr/local/lib/python3.11/site-packages/starlette/middleware/base.py", line 192, in __call__
| async with anyio.create_task_group() as task_group:
| File "/usr/local/lib/python3.11/site-packages/anyio/_backends/_asyncio.py", line 783, in __aexit__
| raise BaseExceptionGroup(
| ExceptionGroup: unhandled errors in a TaskGroup (1 sub-exception)
+-+---------------- 1 ----------------
| Traceback (most recent call last):
| File "/usr/local/lib/python3.11/site-packages/starlette/middleware/errors.py", line 164, in __call__
| await self.app(scope, receive, _send)
| File "/usr/local/lib/python3.11/site-packages/prometheus_fastapi_instrumentator/middleware.py", line 177, in __call__
| raise exc
| File "/usr/local/lib/python3.11/site-packages/prometheus_fastapi_instrumentator/middleware.py", line 175, in __call__
| await self.app(scope, receive, send_wrapper)
| File "/usr/local/lib/python3.11/site-packages/starlette/middleware/cors.py", line 87, in __call__
| await self.app(scope, receive, send)
| File "/usr/local/lib/python3.11/site-packages/starlette/middleware/base.py", line 191, in __call__
| with recv_stream, send_stream, collapse_excgroups():
| File "/usr/local/lib/python3.11/contextlib.py", line 158, in __exit__
| self.gen.throw(typ, value, traceback)
| File "/usr/local/lib/python3.11/site-packages/starlette/_utils.py", line 87, in collapse_excgroups
| raise exc
| File "/usr/local/lib/python3.11/site-packages/starlette/middleware/base.py", line 193, in __call__
| response = await self.dispatch_func(request, call_next)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/app/src/api/main.py", line 89, in track_queue_depth
| response = await call_next(request)
| ^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/starlette/middleware/base.py", line 168, in call_next
| raise app_exc from app_exc.__cause__ or app_exc.__context__
| File "/usr/local/lib/python3.11/site-packages/starlette/middleware/base.py", line 144, in coro
| await self.app(scope, receive_or_disconnect, send_no_error)
| File "/usr/local/lib/python3.11/site-packages/starlette/middleware/base.py", line 191, in __call__
| with recv_stream, send_stream, collapse_excgroups():
| File "/usr/local/lib/python3.11/contextlib.py", line 158, in __exit__
| self.gen.throw(typ, value, traceback)
| File "/usr/local/lib/python3.11/site-packages/starlette/_utils.py", line 87, in collapse_excgroups
| raise exc
| File "/usr/local/lib/python3.11/site-packages/starlette/middleware/base.py", line 193, in __call__
| response = await self.dispatch_func(request, call_next)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/app/src/api/main.py", line 79, in add_request_id
| response = await call_next(request)
| ^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/starlette/middleware/base.py", line 168, in call_next
| raise app_exc from app_exc.__cause__ or app_exc.__context__
| File "/usr/local/lib/python3.11/site-packages/starlette/middleware/base.py", line 144, in coro
| await self.app(scope, receive_or_disconnect, send_no_error)
| File "/usr/local/lib/python3.11/site-packages/starlette/middleware/exceptions.py", line 63, in __call__
| await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
| File "/usr/local/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
| raise exc
| File "/usr/local/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
| await app(scope, receive, sender)
| File "/usr/local/lib/python3.11/site-packages/fastapi/middleware/asyncexitstack.py", line 18, in __call__
| await self.app(scope, receive, send)
| File "/usr/local/lib/python3.11/site-packages/starlette/routing.py", line 716, in __call__
| await self.middleware_stack(scope, receive, send)
| File "/usr/local/lib/python3.11/site-packages/starlette/routing.py", line 736, in app
| await route.handle(scope, receive, send)
| File "/usr/local/lib/python3.11/site-packages/starlette/routing.py", line 290, in handle
| await self.app(scope, receive, send)
| File "/usr/local/lib/python3.11/site-packages/fastapi/routing.py", line 121, in app
| await wrap_app_handling_exceptions(app, request)(scope, receive, send)
| File "/usr/local/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
| raise exc
| File "/usr/local/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
| await app(scope, receive, sender)
| File "/usr/local/lib/python3.11/site-packages/fastapi/routing.py", line 107, in app
| response = await f(request)
| ^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/fastapi/routing.py", line 416, in app
| solved_result = await solve_dependencies(
| ^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/fastapi/dependencies/utils.py", line 618, in solve_dependencies
| solved_result = await solve_dependencies(
| ^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/fastapi/dependencies/utils.py", line 649, in solve_dependencies
| solved = await run_in_threadpool(call, **solved_result.values)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/starlette/concurrency.py", line 32, in run_in_threadpool
| return await anyio.to_thread.run_sync(func)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/anyio/to_thread.py", line 63, in run_sync
| return await get_async_backend().run_sync_in_worker_thread(
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/anyio/_backends/_asyncio.py", line 2502, in run_sync_in_worker_thread
| return await future
| ^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/anyio/_backends/_asyncio.py", line 986, in run
| result = context.run(func, *args)
| ^^^^^^^^^^^^^^^^^^^^^^^^
| File "/app/src/api/dependencies.py", line 154, in get_vector_search
| _vector_search_singleton = VectorSearch(
| ^^^^^^^^^^^^^
| File "/app/src/vector_search.py", line 63, in __init__
| self._dense_model = TextEmbedding(Config.EMBEDDING_MODEL)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/fastembed/text/text_embedding.py", line 114, in __init__
| self.model = EMBEDDING_MODEL_TYPE(
| ^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/fastembed/text/onnx_embedding.py", line 259, in __init__
| self.load_onnx_model()
| File "/usr/local/lib/python3.11/site-packages/fastembed/text/onnx_embedding.py", line 324, in load_onnx_model
| self._load_onnx_model(
| File "/usr/local/lib/python3.11/site-packages/fastembed/text/onnx_text_model.py", line 59, in _load_onnx_model
| super()._load_onnx_model(
| File "/usr/local/lib/python3.11/site-packages/fastembed/common/onnx_model.py", line 108, in _load_onnx_model
| self.model = ort.InferenceSession(
| ^^^^^^^^^^^^^^^^^^^^^
| File "/usr/local/lib/python3.11/site-packages/onnxruntime/capi/onnxruntime_inference_collection.py", line 504, in __init__
| self._create_inference_session(providers, provider_options, disabled_optimizers)
| File "/usr/local/lib/python3.11/site-packages/onnxruntime/capi/onnxruntime_inference_collection.py", line 599, in _create_inference_session
| sess = C.InferenceSession(session_options, self._model_path, True, self._read_config_from_model)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
| onnxruntime.capi.onnxruntime_pybind11_state.NoSuchFile: [ONNXRuntimeError] : 3 : NO_SUCHFILE : Load model from /tmp/fastembed_cache/models--xenova--paraphrase-multilingual-mpnet-base-v2/snapshots/e5d116277351513fd260955ece953ecddde7046e/onnx/model.onnx failed:Load model /tmp/fastembed_cache/models--xenova--paraphrase-multilingual-mpnet-base-v2/snapshots/e5d116277351513fd260955ece953ecddde7046e/onnx/model.onnx failed. File doesn't exist
+------------------------------------
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/usr/local/lib/python3.11/site-packages/starlette/middleware/errors.py", line 164, in __call__
await self.app(scope, receive, _send)
File "/usr/local/lib/python3.11/site-packages/prometheus_fastapi_instrumentator/middleware.py", line 177, in __call__
raise exc
File "/usr/local/lib/python3.11/site-packages/prometheus_fastapi_instrumentator/middleware.py", line 175, in __call__
await self.app(scope, receive, send_wrapper)
File "/usr/local/lib/python3.11/site-packages/starlette/middleware/cors.py", line 87, in __call__
await self.app(scope, receive, send)
File "/usr/local/lib/python3.11/site-packages/starlette/middleware/base.py", line 191, in __call__
with recv_stream, send_stream, collapse_excgroups():
File "/usr/local/lib/python3.11/contextlib.py", line 158, in __exit__
self.gen.throw(typ, value, traceback)
File "/usr/local/lib/python3.11/site-packages/starlette/_utils.py", line 87, in collapse_excgroups
raise exc
File "/usr/local/lib/python3.11/site-packages/starlette/middleware/base.py", line 193, in __call__
response = await self.dispatch_func(request, call_next)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/app/src/api/main.py", line 89, in track_queue_depth
response = await call_next(request)
^^^^^^^^^^^^^^^^^^^^^^^^2026-02-08 15:24:31,668 - httpx - INFO - HTTP Request: HEAD https://huggingface.co/Xenova/paraphrase-multilingual-mpnet-base-v2/resolve/e5d116277351513fd260955ece953ecddde7046e/onnx/model.onnx "HTTP/1.1 302 Found"
2026-02-08 15:24:31,669 - httpx - INFO - HTTP Request: HEAD https://huggingface.co/Xenova/paraphrase-multilingual-mpnet-base-v2/resolve/e5d116277351513fd260955ece953ecddde7046e/tokenizer.json "HTTP/1.1 302 Found"
2026-02-08 15:24:31,702 - httpx - INFO - HTTP Request: HEAD https://huggingface.co/Xenova/paraphrase-multilingual-mpnet-base-v2/resolve/e5d116277351513fd260955ece953ecddde7046e/onnx/model.onnx "HTTP/1.1 302 Found"
2026-02-08 15:24:31,707 - httpx - INFO - HTTP Request: HEAD https://huggingface.co/Xenova/paraphrase-multilingual-mpnet-base-v2/resolve/e5d116277351513fd260955ece953ecddde7046e/tokenizer.json "HTTP/1.1 302 Found"
INFO: 172.18.0.8:45734 - "GET /metrics HTTP/1.1" 200 OK
INFO: 172.18.0.8:50606 - "GET /metrics HTTP/1.1" 200 OK
INFO: 172.18.0.8:33816 - "GET /metrics HTTP/1.1" 200 OK
2026-02-08 15:25:24,359 - src.vector_search - INFO - Dense 벡터 모델 로드 완료: sentence-transformers/paraphrase-multilingual-mpnet-base-v2
2026-02-08 15:25:24,399 - src.vector_search - INFO - 하이브리드 검색 지원 컬렉션 생성 완료: reviews_collection (dense + sparse, on_disk=False)
2026-02-08 15:25:48,919 - src.vector_search - INFO - Dense 벡터 모델 로드 완료: sentence-transformers/paraphrase-multilingual-mpnet-base-v2
2026-02-08 15:26:04,980 - src.vector_search - INFO - Dense 벡터 모델 로드 완료: sentence-transformers/paraphrase-multilingual-mpnet-base-v2| 파이프라인 | 죽음 타입 | 원인 |
|---|---|---|
| old_pipe | soft-death | CPU 바운드 작업으로 이벤트 루프 고착 |
| new_pipe | hard-death | ONNX 모델 파일 missing → unhandled exception |
| new_async | hard-death | async 동시 로딩 → 캐시 race + 동일 에러 |