Skip to content

Commit 835cc92

Browse files
Merge pull request #32 from mlrun/main
Update branch to latest version python 3.11 support only
2 parents b7a9ed9 + 221b464 commit 835cc92

File tree

7 files changed

+161
-136
lines changed

7 files changed

+161
-136
lines changed

llm-monitoring-main.ipynb

Lines changed: 45 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,24 @@
4848
"After creating the project, you enable model monitoring."
4949
]
5050
},
51+
{
52+
"metadata": {},
53+
"cell_type": "code",
54+
"outputs": [],
55+
"execution_count": null,
56+
"source": [
57+
"import mlrun\n",
58+
"from datasets import Dataset, load_dataset\n",
59+
"import pandas as pd\n",
60+
"from src.llm_as_a_judge import OpenAIJudge"
61+
],
62+
"id": "2186eac3109c41eb"
63+
},
5164
{
5265
"cell_type": "code",
5366
"execution_count": null,
5467
"id": "6e99f06d",
5568
"metadata": {},
56-
"outputs": [],
5769
"source": [
5870
"# Create the project:\n",
5971
"project = mlrun.get_or_create_project(\n",
@@ -65,14 +77,26 @@
6577
" context=\"./src\",\n",
6678
")\n",
6779
"\n",
80+
"secrets = mlrun.set_env_from_file('env.env', return_dict=True)\n",
81+
"project.set_secrets(secrets)"
82+
],
83+
"outputs": []
84+
},
85+
{
86+
"metadata": {},
87+
"cell_type": "code",
88+
"outputs": [],
89+
"execution_count": null,
90+
"source": [
6891
"# Enable model monitoring\n",
6992
"from src.model_monitoring_utils import enable_model_monitoring\n",
7093
"\n",
7194
"# If this project was running with MM enabled pre-1.8.0, disable the old model monitoring to update configurations\n",
7295
"project.disable_model_monitoring(delete_stream_function=True)\n",
7396
"\n",
74-
"enable_model_monitoring(project=project, base_period=2)\n"
75-
]
97+
"enable_model_monitoring(project=project, base_period=2)"
98+
],
99+
"id": "78a9e4fe68462400"
76100
},
77101
{
78102
"cell_type": "markdown",
@@ -277,6 +301,7 @@
277301
"metadata": {},
278302
"outputs": [],
279303
"source": [
304+
"OPENAI_MODEL = mlrun.get_secret_or_env(\"OPENAI_MODEL\")\n",
280305
"# Creating the OpenAI Judge\n",
281306
"judge = OpenAIJudge(\n",
282307
" judge_type=\"custom-grading\",\n",
@@ -437,14 +462,11 @@
437462
"source": [
438463
"# Define application requirements\n",
439464
"requirements = ['openai==1.108.0',\n",
440-
"'transformers==4.56.1',\n",
441-
"'optimum==1.27.0',\n",
442465
"'deepeval==2.5.5',\n",
443466
"'llama-index==0.14.2',\n",
444467
"'llama-index-core==0.14.2',\n",
445-
"]\n",
446-
"if sys.version_info.major == 3 and sys.version_info.minor == 9:\n",
447-
" requirements += ['protobuf==3.20.3']"
468+
"'langchain==0.2.17',\n",
469+
"]"
448470
]
449471
},
450472
{
@@ -456,6 +478,12 @@
456478
"\n"
457479
]
458480
},
481+
{
482+
"metadata": {},
483+
"cell_type": "markdown",
484+
"source": "",
485+
"id": "e31576a009f2ff93"
486+
},
459487
{
460488
"cell_type": "code",
461489
"execution_count": null,
@@ -560,6 +588,8 @@
560588
},
561589
"outputs": [],
562590
"source": [
591+
"from mlrun.features import Feature\n",
592+
"\n",
563593
"# Log the model to the project:\n",
564594
"base_model = \"google-gemma-2b\"\n",
565595
"project.log_model(\n",
@@ -696,6 +726,7 @@
696726
},
697727
"outputs": [],
698728
"source": [
729+
"from mlrun.model_monitoring.helpers import get_result_instance_fqn\n",
699730
"prj_alert_obj = get_result_instance_fqn(\n",
700731
" ep_id, app_name=app_name, result_name=result_name\n",
701732
")\n",
@@ -718,6 +749,7 @@
718749
"metadata": {},
719750
"outputs": [],
720751
"source": [
752+
"import mlrun.common.schemas.alert as alert_constants\n",
721753
"import mlrun.common.schemas.alert as alert_objects"
722754
]
723755
},
@@ -904,7 +936,10 @@
904936
"from deepeval.metrics import (\n",
905937
" AnswerRelevancyMetric,\n",
906938
" HallucinationMetric,\n",
907-
")"
939+
")\n",
940+
"import os\n",
941+
"os.environ[\"OPENAI_API_KEY\"] = mlrun.get_secret_or_env(\"OPENAI_API_KEY\")\n",
942+
"os.environ[\"OPENAI_BASE_URL\"]= mlrun.get_secret_or_env(\"OPENAI_API_BASE\")"
908943
]
909944
},
910945
{
@@ -1072,7 +1107,7 @@
10721107
"ret = project.run_function(\n",
10731108
" function=\"generate-ds\",\n",
10741109
" handler=\"generate_ds\",\n",
1075-
" params={\"input_ds\": input_ds,\"hf_repo_id\":None},\n",
1110+
" params={\"input_ds\": input_ds},\n",
10761111
" outputs=[\"new-train-ds\", \"dataset\"],\n",
10771112
")"
10781113
]

requirements.txt

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,7 @@ openai==1.108.0
55
transformers==4.56.1
66
datasets==4.1.1
77
sentencepiece==0.2.0
8-
deepeval==2.5.5
8+
deepeval==3.7.0
99
pyarrow>=21.0.0
1010
pydantic>=2.0
11-
langchain==0.2.17
12-
13-
11+
langchain==0.2.17

src/deepeval_as_a_judge.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -42,38 +42,36 @@ def __init__(
4242
self.name = "deepeval-as-a-judge"
4343
self.metric_name = kwargs.pop("metric_name")
4444
os.environ["OPENAI_API_KEY"] = mlrun.get_secret_or_env("OPENAI_API_KEY")
45-
os.environ["OPENAI_BASE_URL"]= mlrun.get_secret_or_env("OPENAI_API_BASE")
46-
45+
os.environ["OPENAI_BASE_URL"] = mlrun.get_secret_or_env("OPENAI_API_BASE")
4746

4847
def judge(self, sample_df: pd.DataFrame) -> pd.DataFrame:
49-
5048
result_df = pd.DataFrame(columns=["question", "answer", "score", "explanation"])
5149

5250
correctness_metric = GEval(
5351
name="Correctness",
5452
criteria="Correctness - determine if the actual output is related to banking.",
5553
evaluation_params=[LLMTestCaseParams.ACTUAL_OUTPUT]
5654
)
57-
55+
5856
for i in range(len(sample_df)):
59-
question, answer = sample_df.loc[i, "question"], sample_df.loc[i, "answer"]
57+
question, answer = sample_df.loc[i, "question"], sample_df.loc[i, "answer"]
6058
test_case = LLMTestCase(
6159
input=question,
6260
actual_output=answer
6361
)
6462
correctness_metric.measure(test_case)
65-
63+
6664
correctness_metric.score, correctness_metric.reason
67-
65+
6866
result_df.loc[i] = [
6967
question,
7068
answer,
7169
correctness_metric.score,
7270
correctness_metric.reason,
7371
]
74-
72+
7573
return result_df
76-
74+
7775
def do_tracking(
7876
self,
7977
monitoring_context,

src/generate_ds.py

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import openai
2-
from datasets import Dataset
32
from huggingface_hub import create_repo, login
43

54
import mlrun
@@ -87,15 +86,4 @@ def generate_ds(context: MLClientCtx, input_ds: str, hf_repo_id:str =None):
8786
df.drop(inplace=True, columns=["answer", "explanation"])
8887
context.log_dataset("new-train-ds", df)
8988
context.logger.info("Dataframe logged")
90-
if hf_repo_id:
91-
# Upload the dataset to HuggingFace
92-
hf_dataset = Dataset.from_pandas(df)
93-
login(token=hf_token)
94-
95-
# Create a new repository on the Hugging Face Hub
96-
create_repo(hf_repo_id, repo_type="dataset", exist_ok=True)
97-
98-
# Push the dataset to the Hub
99-
hf_dataset.push_to_hub(hf_repo_id)
100-
context.log_result("dataset", hf_repo_id)
101-
context.logger.info("Dataset uploaded to HF")
89+

0 commit comments

Comments
 (0)