-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathgeometer.py
92 lines (85 loc) · 3.11 KB
/
geometer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import os
from typing import Any
from eureka_ml_insights.core import EvalReporting, Inference, PromptProcessing
from eureka_ml_insights.data_utils import (
ColumnRename,
DataReader,
HFDataReader,
MMDataLoader,
SequenceTransform,
)
from eureka_ml_insights.metrics import CountAggregator, GeoMCQMetric
from eureka_ml_insights.configs import(
AggregatorConfig,
DataSetConfig,
EvalReportingConfig,
InferenceConfig,
MetricConfig,
ModelConfig,
PipelineConfig,
PromptProcessingConfig,
)
from eureka_ml_insights.configs import ExperimentConfig
"""This file contains user defined configuration classes for the geometric reasoning task on geometer dataset.
"""
class GEOMETER_PIPELINE(ExperimentConfig):
def configure_pipeline(
self, model_config: ModelConfig, resume_from: str = None, **kwargs: dict[str, Any]
) -> PipelineConfig:
# Configure the data processing component.
data_processing_comp = PromptProcessingConfig(
component_type=PromptProcessing,
data_reader_config=DataSetConfig(
HFDataReader,
{
"path": "GeoMeterData/GeoMeter",
"split": "train",
"transform": SequenceTransform(
[
ColumnRename(name_mapping={"query_text": "prompt", "target_text": "ground_truth"}),
]
),
},
),
output_dir=os.path.join(self.log_dir, "data_processing_output"),
)
# Configure the inference component
inference_comp = InferenceConfig(
component_type=Inference,
model_config=model_config,
data_loader_config=DataSetConfig(
MMDataLoader,
{"path": os.path.join(data_processing_comp.output_dir, "transformed_data.jsonl")},
),
output_dir=os.path.join(self.log_dir, "inference_result"),
resume_from=resume_from,
)
# # Configure the evaluation and reporting component.
evalreporting_comp = EvalReportingConfig(
component_type=EvalReporting,
data_reader_config=DataSetConfig(
DataReader,
{
"path": os.path.join(inference_comp.output_dir, "inference_result.jsonl"),
"format": ".jsonl",
},
),
metric_config=MetricConfig(GeoMCQMetric),
aggregator_configs=[
AggregatorConfig(CountAggregator, {"column_names": ["GeoMCQMetric_result"], "normalize": True}),
AggregatorConfig(
CountAggregator,
{"column_names": ["GeoMCQMetric_result"], "group_by": "category"},
),
],
output_dir=os.path.join(self.log_dir, "eval_report"),
)
# # Configure the pipeline
return PipelineConfig(
[
data_processing_comp,
inference_comp,
evalreporting_comp,
],
self.log_dir,
)