Skip to content

Commit 3cbfe26

Browse files
authored
Add structured logging for query outputs and update service (#54)
This PR adds logging for the query outputs, so we know which documents were fed into the context, which model was used and which answer was returned to the user. Structured logging allows us to parse the logs later and extract the information we are interested in.
1 parent 772a8c4 commit 3cbfe26

File tree

4 files changed

+49
-15
lines changed

4 files changed

+49
-15
lines changed

rag/generate.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,10 @@ def get_sources_and_context(query, embedding_model, num_chunks):
7676
(embedding, num_chunks),
7777
)
7878
rows = cur.fetchall()
79+
document_ids = [row[0] for row in rows]
7980
context = [{"text": row[1]} for row in rows]
8081
sources = [row[2] for row in rows]
81-
return sources, context
82+
return document_ids, sources, context
8283

8384

8485
class QueryAgent:
@@ -107,7 +108,7 @@ def __init__(
107108

108109
def __call__(self, query, num_chunks=5, stream=True):
109110
# Get sources and context
110-
sources, context = get_sources_and_context(
111+
document_ids, sources, context = get_sources_and_context(
111112
query=query, embedding_model=self.embedding_model, num_chunks=num_chunks
112113
)
113114

@@ -126,6 +127,7 @@ def __call__(self, query, num_chunks=5, stream=True):
126127
result = {
127128
"question": query,
128129
"sources": sources,
130+
"document_ids": document_ids,
129131
"answer": answer,
130132
"llm": self.llm,
131133
}

rag/serve.py

+40-12
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
# You can run the whole script locally with
2-
# serve run rag.serve:deployment
2+
# serve run rag.serve:deployment --runtime-env-json='{"env_vars": {"RAY_ASSISTANT_LOGS": "/mnt/shared_storage/ray-assistant-logs/info.log", "RAY_ASSISTANT_SECRET": "ray-assistant-prod"}}'
33

44
import json
5+
import logging
56
import os
67
import pickle
78
from pathlib import Path
8-
from typing import List
9+
from typing import Any, Dict, List
910

1011
import openai
1112
import ray
@@ -17,6 +18,7 @@
1718
from slack_bolt import App
1819
from slack_bolt.adapter.socket_mode import SocketModeHandler
1920
from starlette.responses import StreamingResponse
21+
import structlog
2022

2123
from rag.config import MAX_CONTEXT_LENGTHS, ROOT_DIR
2224
from rag.generate import QueryAgent
@@ -37,7 +39,7 @@ def get_secret(secret_name):
3739
import boto3
3840

3941
client = boto3.client("secretsmanager", region_name="us-west-2")
40-
response = client.get_secret_value(SecretId="ray-assistant")
42+
response = client.get_secret_value(SecretId=os.environ["RAY_ASSISTANT_SECRET"])
4143
return json.loads(response["SecretString"])[secret_name]
4244

4345

@@ -78,6 +80,17 @@ class Answer(BaseModel):
7880
@serve.ingress(app)
7981
class RayAssistantDeployment:
8082
def __init__(self, num_chunks, embedding_model_name, llm, run_slack=False):
83+
# Configure logging
84+
logging.basicConfig(filename=os.environ["RAY_ASSISTANT_LOGS"], level=logging.INFO, encoding='utf-8')
85+
structlog.configure(
86+
processors=[
87+
structlog.processors.TimeStamper(fmt="iso"),
88+
structlog.processors.JSONRenderer(),
89+
],
90+
logger_factory=structlog.stdlib.LoggerFactory(),
91+
)
92+
self.logger = structlog.get_logger()
93+
8194
# Set credentials
8295
os.environ["ANYSCALE_API_BASE"] = "https://api.endpoints.anyscale.com/v1"
8396
os.environ["ANYSCALE_API_KEY"] = get_secret("ANYSCALE_API_KEY")
@@ -111,33 +124,48 @@ def __init__(self, num_chunks, embedding_model_name, llm, run_slack=False):
111124
self.slack_app = SlackApp.remote()
112125
self.runner = self.slack_app.run.remote()
113126

114-
@app.post("/query")
115-
def query(self, query: Query) -> Answer:
127+
def predict(self, query: Query, stream: bool) -> Dict[str, Any]:
116128
use_oss_agent = self.router.predict([query.query])[0]
117129
agent = self.oss_agent if use_oss_agent else self.gpt_agent
118-
result = agent(query=query.query, num_chunks=self.num_chunks, stream=False)
130+
result = agent(query=query.query, num_chunks=self.num_chunks, stream=stream)
131+
return result
132+
133+
@app.post("/query")
134+
def query(self, query: Query) -> Answer:
135+
result = self.predict(query, stream=False)
119136
return Answer.parse_obj(result)
120137

121-
def produce_streaming_answer(self, result):
138+
def produce_streaming_answer(self, query, result):
139+
answer = []
122140
for answer_piece in result["answer"]:
141+
answer.append(answer_piece)
123142
yield answer_piece
143+
124144
if result["sources"]:
125145
yield "\n\n**Sources:**\n"
126146
for source in result["sources"]:
127147
yield "* " + source + "\n"
128148

149+
self.logger.info(
150+
"finished streaming query",
151+
query=query,
152+
document_ids=result["document_ids"],
153+
llm=result["llm"],
154+
answer="".join(answer)
155+
)
156+
129157
@app.post("/stream")
130158
def stream(self, query: Query) -> StreamingResponse:
131-
use_oss_agent = self.router.predict([query.query])[0]
132-
agent = self.oss_agent if use_oss_agent else self.gpt_agent
133-
result = agent(query=query.query, num_chunks=self.num_chunks, stream=True)
159+
result = self.predict(query, stream=True)
134160
return StreamingResponse(
135-
self.produce_streaming_answer(result), media_type="text/plain")
161+
self.produce_streaming_answer(query.query, result),
162+
media_type="text/plain"
163+
)
136164

137165

138166
# Deploy the Ray Serve app
139167
deployment = RayAssistantDeployment.bind(
140-
num_chunks=7,
168+
num_chunks=5,
141169
embedding_model_name="thenlper/gte-large",
142170
llm="meta-llama/Llama-2-70b-chat-hf",
143171
)

rag/service.yaml

+4-1
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,7 @@ cluster_env: ray-assistant
33
ray_serve_config:
44
import_path: app.serve:deployment
55
runtime_env:
6-
working_dir: "https://github.com/ray-project/llm-applications/archive/refs/tags/v0.0.8.zip"
6+
working_dir: "https://github.com/ray-project/llm-applications/archive/refs/tags/v0.0.9.zip"
7+
env_vars:
8+
RAY_ASSISTANT_SECRET: "ray-assistant-prod"
9+
RAY_ASSISTANT_LOGS: "/mnt/shared_storage/ray-assistant-logs/info.log"

requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ ray
1010
sentence_transformers
1111
slack_bolt
1212
streamlit
13+
structlog
1314
typer
1415
tiktoken
1516

0 commit comments

Comments
 (0)