Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 45 additions & 10 deletions llm-monitoring-main.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,24 @@
"After creating the project, you enable model monitoring."
]
},
{
"metadata": {},
"cell_type": "code",
"outputs": [],
"execution_count": null,
"source": [
"import mlrun\n",
"from datasets import Dataset, load_dataset\n",
"import pandas as pd\n",
"from src.llm_as_a_judge import OpenAIJudge"
],
"id": "2186eac3109c41eb"
},
{
"cell_type": "code",
"execution_count": null,
"id": "6e99f06d",
"metadata": {},
"outputs": [],
"source": [
"# Create the project:\n",
"project = mlrun.get_or_create_project(\n",
Expand All @@ -65,14 +77,26 @@
" context=\"./src\",\n",
")\n",
"\n",
"secrets = mlrun.set_env_from_file('env.env', return_dict=True)\n",
"project.set_secrets(secrets)"
],
"outputs": []
},
{
"metadata": {},
"cell_type": "code",
"outputs": [],
"execution_count": null,
"source": [
"# Enable model monitoring\n",
"from src.model_monitoring_utils import enable_model_monitoring\n",
"\n",
"# If this project was running with MM enabled pre-1.8.0, disable the old model monitoring to update configurations\n",
"project.disable_model_monitoring(delete_stream_function=True)\n",
"\n",
"enable_model_monitoring(project=project, base_period=2)\n"
]
"enable_model_monitoring(project=project, base_period=2)"
],
"id": "78a9e4fe68462400"
},
{
"cell_type": "markdown",
Expand Down Expand Up @@ -277,6 +301,7 @@
"metadata": {},
"outputs": [],
"source": [
"OPENAI_MODEL = mlrun.get_secret_or_env(\"OPENAI_MODEL\")\n",
"# Creating the OpenAI Judge\n",
"judge = OpenAIJudge(\n",
" judge_type=\"custom-grading\",\n",
Expand Down Expand Up @@ -437,14 +462,11 @@
"source": [
"# Define application requirements\n",
"requirements = ['openai==1.108.0',\n",
"'transformers==4.56.1',\n",
"'optimum==1.27.0',\n",
"'deepeval==2.5.5',\n",
"'llama-index==0.14.2',\n",
"'llama-index-core==0.14.2',\n",
"]\n",
"if sys.version_info.major == 3 and sys.version_info.minor == 9:\n",
" requirements += ['protobuf==3.20.3']"
"'langchain==0.2.17',\n",
"]"
]
},
{
Expand All @@ -456,6 +478,12 @@
"\n"
]
},
{
"metadata": {},
"cell_type": "markdown",
"source": "",
"id": "e31576a009f2ff93"
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -560,6 +588,8 @@
},
"outputs": [],
"source": [
"from mlrun.features import Feature\n",
"\n",
"# Log the model to the project:\n",
"base_model = \"google-gemma-2b\"\n",
"project.log_model(\n",
Expand Down Expand Up @@ -696,6 +726,7 @@
},
"outputs": [],
"source": [
"from mlrun.model_monitoring.helpers import get_result_instance_fqn\n",
"prj_alert_obj = get_result_instance_fqn(\n",
" ep_id, app_name=app_name, result_name=result_name\n",
")\n",
Expand All @@ -718,6 +749,7 @@
"metadata": {},
"outputs": [],
"source": [
"import mlrun.common.schemas.alert as alert_constants\n",
"import mlrun.common.schemas.alert as alert_objects"
]
},
Expand Down Expand Up @@ -904,7 +936,10 @@
"from deepeval.metrics import (\n",
" AnswerRelevancyMetric,\n",
" HallucinationMetric,\n",
")"
")\n",
"import os\n",
"os.environ[\"OPENAI_API_KEY\"] = mlrun.get_secret_or_env(\"OPENAI_API_KEY\")\n",
"os.environ[\"OPENAI_BASE_URL\"]= mlrun.get_secret_or_env(\"OPENAI_API_BASE\")"
]
},
{
Expand Down Expand Up @@ -1072,7 +1107,7 @@
"ret = project.run_function(\n",
" function=\"generate-ds\",\n",
" handler=\"generate_ds\",\n",
" params={\"input_ds\": input_ds,\"hf_repo_id\":None},\n",
" params={\"input_ds\": input_ds},\n",
" outputs=[\"new-train-ds\", \"dataset\"],\n",
")"
]
Expand Down
6 changes: 2 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@ openai==1.108.0
transformers==4.56.1
datasets==4.1.1
sentencepiece==0.2.0
deepeval==2.5.5
deepeval==3.7.0
pyarrow>=21.0.0
pydantic>=2.0
langchain==0.2.17


langchain==0.2.17
16 changes: 7 additions & 9 deletions src/deepeval_as_a_judge.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,38 +42,36 @@ def __init__(
self.name = "deepeval-as-a-judge"
self.metric_name = kwargs.pop("metric_name")
os.environ["OPENAI_API_KEY"] = mlrun.get_secret_or_env("OPENAI_API_KEY")
os.environ["OPENAI_BASE_URL"]= mlrun.get_secret_or_env("OPENAI_API_BASE")

os.environ["OPENAI_BASE_URL"] = mlrun.get_secret_or_env("OPENAI_API_BASE")

def judge(self, sample_df: pd.DataFrame) -> pd.DataFrame:

result_df = pd.DataFrame(columns=["question", "answer", "score", "explanation"])

correctness_metric = GEval(
name="Correctness",
criteria="Correctness - determine if the actual output is related to banking.",
evaluation_params=[LLMTestCaseParams.ACTUAL_OUTPUT]
)

for i in range(len(sample_df)):
question, answer = sample_df.loc[i, "question"], sample_df.loc[i, "answer"]
question, answer = sample_df.loc[i, "question"], sample_df.loc[i, "answer"]
test_case = LLMTestCase(
input=question,
actual_output=answer
)
correctness_metric.measure(test_case)

correctness_metric.score, correctness_metric.reason

result_df.loc[i] = [
question,
answer,
correctness_metric.score,
correctness_metric.reason,
]

return result_df

def do_tracking(
self,
monitoring_context,
Expand Down
14 changes: 1 addition & 13 deletions src/generate_ds.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import openai
from datasets import Dataset
from huggingface_hub import create_repo, login

import mlrun
Expand Down Expand Up @@ -87,15 +86,4 @@ def generate_ds(context: MLClientCtx, input_ds: str, hf_repo_id:str =None):
df.drop(inplace=True, columns=["answer", "explanation"])
context.log_dataset("new-train-ds", df)
context.logger.info("Dataframe logged")
if hf_repo_id:
# Upload the dataset to HuggingFace
hf_dataset = Dataset.from_pandas(df)
login(token=hf_token)

# Create a new repository on the Hugging Face Hub
create_repo(hf_repo_id, repo_type="dataset", exist_ok=True)

# Push the dataset to the Hub
hf_dataset.push_to_hub(hf_repo_id)
context.log_result("dataset", hf_repo_id)
context.logger.info("Dataset uploaded to HF")

Loading