1414
1515# 1. Configuration
1616### Scaleway API
17- """
17+
1818lm = dspy .LM (
19- model="mistral/mistral-small-3.2-24b-instruct-2506:fp8 ",
19+ model = "mistral/mistral-small-3.2-24b-instruct-2506" ,
2020 api_key = os .getenv ('SCALEWAY_API_KEY' ),
21- api_base="https://c1b66caa-347e-448c-a54c-d3fb43889a62.ifr.fr-par.scaleway.com "
21+ api_base = "https://api.scaleway.ai/a2dc0d31-c47f-47f1-b0b9-9877dd4eb2b5/v1 "
2222 )
23- """
23+
2424### OpenAI API
25- lm = dspy .LM ("openai/gpt-4o-mini" , api_key = os .getenv ('OPENAI_API_KEY' ))
25+ # lm = dspy.LM("openai/gpt-4o-mini", api_key=os.getenv('OPENAI_API_KEY'))
2626
2727
2828dspy .configure (lm = lm )
2929model_used = lm .model .replace ("/" ,"_" )
3030# 2. Data Loading
3131golden_dataset = []
3232
33- if os .path .exists ('model_training_data/conclusions&pollitiques_gold .jsonl' ):
34- with open ('model_training_data/conclusions&pollitiques_gold .jsonl' , 'r' , encoding = 'utf-8' ) as f :
33+ if os .path .exists ('dspy_policies_and_taxonomy_extraction/ model_training_data/gold_policy .jsonl' ):
34+ with open ('dspy_policies_and_taxonomy_extraction/ model_training_data/gold_policy .jsonl' , 'r' , encoding = 'utf-8' ) as f :
3535 for line in f :
3636 data = json .loads (line )
3737 example = dspy .Example (question = data ['question' ], response = data ['response' ])
3838 golden_dataset .append (example .with_inputs ('question' ))
3939else :
40- exit ("Data file 'model_training_data/conclusions&pollitiques_gold .jsonl' not found." )
40+ exit ("Data file 'model_training_data/gold_policy .jsonl' not found." )
4141
4242syntetic_dataset = []
43- if os .path .exists ('model_training_data/conclusions&pollitiques_synthetiques_diversifies .jsonl' ):
44- with open ('model_training_data/conclusions&pollitiques_synthetiques_diversifies .jsonl' , 'r' , encoding = 'utf-8' ) as f :
43+ if os .path .exists ('dspy_policies_and_taxonomy_extraction/ model_training_data/synthetic_policy .jsonl' ):
44+ with open ('dspy_policies_and_taxonomy_extraction/ model_training_data/synthetic_policy .jsonl' , 'r' , encoding = 'utf-8' ) as f :
4545 for line in f :
4646 data = json .loads (line )
4747 example = dspy .Example (question = data ['question' ], response = data ['response' ])
4848 syntetic_dataset .append (example .with_inputs ('question' ))
4949else :
50- exit ("Data file 'model_training_data/conclusions&pollitiques_synthetiques_diversifies .jsonl' not found." )
50+ exit ("Data file 'dspy_policies_and_taxonomy_extraction/ model_training_data/synthetic_policy .jsonl' not found." )
5151
5252# Meilleur score avec dataset synthetique petit
53- trainset = syntetic_dataset
54- devset = golden_dataset
53+ trainset = golden_dataset [ 40 :]
54+ devset = golden_dataset [: 40 ]
5555
5656print (f"Training examples: { len (trainset )} , Validation examples: { len (devset )} " )
5757
@@ -106,7 +106,7 @@ def __call__(self,example, pred, trace=None):
106106print ("Starting optimization..." )
107107
108108optimizer = MIPROv2 (metric = metric_fn
109- # ,auto="heavy"
109+ ,auto = "heavy"
110110 )
111111
112112compiled_program = optimizer .compile (MonProgramme (), trainset = trainset )
@@ -121,7 +121,7 @@ def __call__(self,example, pred, trace=None):
121121timestamp = datetime .now ().strftime ("%Y%m%d_%H%M%S" )
122122
123123
124- optimized_score = optimized_evaluator (compiled_program ,save_as_json = f"saved_dspy_model/policy/{ model_used } { timestamp } .json" )
124+ optimized_score = optimized_evaluator (compiled_program ,save_as_json = f"dspy_policies_and_taxonomy_extraction/ saved_dspy_model/policy/{ timestamp } .json" )
125125print (optimized_score )
126126
127127score_str = f"{ round (optimized_score .score ,2 )} " .replace ("." , "_" )
@@ -134,6 +134,6 @@ def __call__(self,example, pred, trace=None):
134134print (f"Final Score on Validation Set (optimized): { optimized_score } %" )
135135
136136# --- Saving the optimized model ---
137- model_path = f"saved_dspy_model/policy/ { score_str } "
137+ model_path = f"saved_dspy_model/policy_model/"
138138compiled_program .save (model_path ,save_program = True )
139139print (f"Optimized model saved to { model_path } " )
0 commit comments