Skip to content

Commit 2e18ed6

Browse files
EdouardCallet123fraboniface
authored andcommitted
add classifying model
1 parent a03e8e1 commit 2e18ed6

31 files changed

+3628
-2497
lines changed

policy_analysis/dspy_policies_and_taxonomy_extraction/model_training_data/gold_policy.jsonl

Lines changed: 58 additions & 0 deletions
Large diffs are not rendered by default.

policy_analysis/dspy_policies_and_taxonomy_extraction/model_training_data/gold_taxonomy.jsonl

Lines changed: 82 additions & 32 deletions
Large diffs are not rendered by default.
Binary file not shown.

policy_analysis/dspy_policies_and_taxonomy_extraction/model_training_data/synthetic_taxonomy.jsonl

Whitespace-only changes.

policy_analysis/dspy_policies_and_taxonomy_extraction/model_training_data/sythetic_taxonomy.jsonl

Lines changed: 0 additions & 50 deletions
This file was deleted.

policy_analysis/dspy_policies_and_taxonomy_extraction/pipeline_policy_and_taxonomy_extraction.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def suppress_output():
5151
with open("saved_dspy_model/best_policy_extraction_model/program.pkl", "rb") as f:
5252
policy_program = pickle.load(f)
5353

54-
with open("saved_dspy_model/best_geo_impact_extraction_model/program.pkl", "rb") as f:
54+
with open("saved_dspy_model/best_geo_extraction_model/program.pkl", "rb") as f:
5555
geo_program = pickle.load(f)
5656

5757
# --------------------------------------------------
@@ -123,12 +123,7 @@ def suppress_output():
123123
"policy_list": policy_list,
124124
"regional_group": geo_dict.get("regional_group"),
125125
"geographical_scopes": geo_dict.get("geographical_scopes", []),
126-
"main_country_focus": geo_dict.get("main_country_focus", []),
127-
"human_needs": geo_dict.get("human_needs", []),
128-
"natural_resources": geo_dict.get("natural_ressource", []),
129-
"wellbeing": geo_dict.get("wellbeing", []),
130-
"justice": geo_dict.get("justice_consideration", []),
131-
"planetary_boundaries": geo_dict.get("planetary_boundaries", []),
126+
"main_country_focus": geo_dict.get("main_country_focus", [])
132127
}
133128

134129
buffered_records.append(record)

policy_analysis/dspy_policies_and_taxonomy_extraction/policy_extraction/policy_dspy_model_creation.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,44 +14,44 @@
1414

1515
# 1. Configuration
1616
### Scaleway API
17-
"""
17+
1818
lm = dspy.LM(
19-
model="mistral/mistral-small-3.2-24b-instruct-2506:fp8",
19+
model="mistral/mistral-small-3.2-24b-instruct-2506",
2020
api_key=os.getenv('SCALEWAY_API_KEY'),
21-
api_base="https://c1b66caa-347e-448c-a54c-d3fb43889a62.ifr.fr-par.scaleway.com"
21+
api_base="https://api.scaleway.ai/a2dc0d31-c47f-47f1-b0b9-9877dd4eb2b5/v1"
2222
)
23-
"""
23+
2424
### OpenAI API
25-
lm = dspy.LM("openai/gpt-4o-mini", api_key=os.getenv('OPENAI_API_KEY'))
25+
#lm = dspy.LM("openai/gpt-4o-mini", api_key=os.getenv('OPENAI_API_KEY'))
2626

2727

2828
dspy.configure(lm=lm)
2929
model_used = lm.model.replace("/","_")
3030
# 2. Data Loading
3131
golden_dataset = []
3232

33-
if os.path.exists('model_training_data/conclusions&pollitiques_gold.jsonl'):
34-
with open('model_training_data/conclusions&pollitiques_gold.jsonl', 'r', encoding='utf-8') as f:
33+
if os.path.exists('dspy_policies_and_taxonomy_extraction/model_training_data/gold_policy.jsonl'):
34+
with open('dspy_policies_and_taxonomy_extraction/model_training_data/gold_policy.jsonl', 'r', encoding='utf-8') as f:
3535
for line in f:
3636
data = json.loads(line)
3737
example = dspy.Example(question=data['question'], response=data['response'])
3838
golden_dataset.append(example.with_inputs('question'))
3939
else:
40-
exit ("Data file 'model_training_data/conclusions&pollitiques_gold.jsonl' not found.")
40+
exit ("Data file 'model_training_data/gold_policy.jsonl' not found.")
4141

4242
syntetic_dataset = []
43-
if os.path.exists('model_training_data/conclusions&pollitiques_synthetiques_diversifies.jsonl'):
44-
with open('model_training_data/conclusions&pollitiques_synthetiques_diversifies.jsonl', 'r', encoding='utf-8') as f:
43+
if os.path.exists('dspy_policies_and_taxonomy_extraction/model_training_data/synthetic_policy.jsonl'):
44+
with open('dspy_policies_and_taxonomy_extraction/model_training_data/synthetic_policy.jsonl', 'r', encoding='utf-8') as f:
4545
for line in f:
4646
data = json.loads(line)
4747
example = dspy.Example(question=data['question'], response=data['response'])
4848
syntetic_dataset.append(example.with_inputs('question'))
4949
else:
50-
exit ("Data file 'model_training_data/conclusions&pollitiques_synthetiques_diversifies.jsonl' not found.")
50+
exit ("Data file 'dspy_policies_and_taxonomy_extraction/model_training_data/synthetic_policy.jsonl' not found.")
5151

5252
# Meilleur score avec dataset synthetique petit
53-
trainset = syntetic_dataset
54-
devset = golden_dataset
53+
trainset = golden_dataset[40:]
54+
devset = golden_dataset[:40]
5555

5656
print(f"Training examples: {len(trainset)}, Validation examples: {len(devset)}")
5757

@@ -106,7 +106,7 @@ def __call__(self,example, pred, trace=None):
106106
print("Starting optimization...")
107107

108108
optimizer = MIPROv2(metric=metric_fn
109-
#,auto="heavy"
109+
,auto="heavy"
110110
)
111111

112112
compiled_program = optimizer.compile(MonProgramme(), trainset=trainset)
@@ -121,7 +121,7 @@ def __call__(self,example, pred, trace=None):
121121
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
122122

123123

124-
optimized_score = optimized_evaluator(compiled_program,save_as_json=f"saved_dspy_model/policy/{model_used}{timestamp}.json")
124+
optimized_score = optimized_evaluator(compiled_program,save_as_json=f"dspy_policies_and_taxonomy_extraction/saved_dspy_model/policy/{timestamp}.json")
125125
print(optimized_score)
126126

127127
score_str = f"{round(optimized_score.score,2)}".replace(".", "_")
@@ -134,6 +134,6 @@ def __call__(self,example, pred, trace=None):
134134
print(f"Final Score on Validation Set (optimized): {optimized_score}%")
135135

136136
# --- Saving the optimized model ---
137-
model_path = f"saved_dspy_model/policy/{score_str}"
137+
model_path = f"saved_dspy_model/policy_model/"
138138
compiled_program.save(model_path,save_program=True)
139139
print(f"Optimized model saved to {model_path}")

0 commit comments

Comments
 (0)