-
Notifications
You must be signed in to change notification settings - Fork 17
Expand file tree
/
Copy pathMakefile
More file actions
29 lines (22 loc) · 885 Bytes
/
Makefile
File metadata and controls
29 lines (22 loc) · 885 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
.PHONY: test test-all lint run_evals run_benchmark pipeline setup-consultation
test:
uv run pytest tests/test_tasks.py tests/test_models.py tests/test_llm_batch_processor.py -v -s
test-all:
uv run pytest tests/ -v -s
lint:
uv run pre-commit run --all-files
run_evals:
@echo "Running quick benchmark (1 run, gpt-4.1-mini)..."
cd evals && uv run python benchmark.py --quick
run_benchmark:
@echo "Running full benchmark..."
cd evals && uv run python benchmark.py --dataset housing_S --runs 5 --provider all --judge-model gpt-4.1
# Need to replicate Consult Pipeline containers for this to work
pipeline:
./run_pipeline.sh synthetic_gambling_XS gpt-4.1-sweden-2025-03
setup-consultation:
uv run python setup_consultation.py $(NAME) \
$(if $(DIR),--dir $(DIR)) \
$(if $(RESPONSES),--responses $(RESPONSES)) \
$(if $(QU),--qu $(QU)) \
$(if $(UNTIL),--until $(UNTIL))