File tree Expand file tree Collapse file tree 8 files changed +49
-5
lines changed
Expand file tree Collapse file tree 8 files changed +49
-5
lines changed Original file line number Diff line number Diff line change 1+ """
2+ Example script for generating dataset from a Lean 4 GitHub repository.
3+ The data is saved at <RAID_DIR>/<DATA_DIR>/<repo.name>_<repo.commit>.
4+ e.g. LeanDojo-v2/raid/data/lean4-example_005de00d03f1aaa32cb2923d5e3cbaf0b954a192
5+
6+ Usage: python examples/data_generation/trace_github.py
7+ """
8+
9+ from lean_dojo_v2 .database import DynamicDatabase
10+
11+ url = "https://github.com/durant42040/lean4-example"
12+ commit = "005de00d03f1aaa32cb2923d5e3cbaf0b954a192"
13+
14+ database = DynamicDatabase ()
15+
16+ database .trace_repository (
17+ url = url ,
18+ commit = commit ,
19+ build_deps = False ,
20+ )
Original file line number Diff line number Diff line change 1+ """
2+ Example script for generating dataset from a local Lean 4 repository.
3+ The data is saved at <RAID_DIR>/<DATA_DIR>/<repo.name>_<repo.commit>.
4+ e.g. LeanDojo-v2/raid/data/lean4-example_005de00d03f1aaa32cb2923d5e3cbaf0b954a192
5+
6+ Usage: python examples/data_generation/trace_local.py
7+ """
8+
9+ from lean_dojo_v2 .database import DynamicDatabase
10+
11+ # path = "path/to/lean4-example"
12+ path = "/Users/electron/Code/lean4-example"
13+ commit = "005de00d03f1aaa32cb2923d5e3cbaf0b954a192"
14+
15+ database = DynamicDatabase ()
16+
17+ database .trace_repository (
18+ url = path ,
19+ commit = commit ,
20+ build_deps = False ,
21+ )
Original file line number Diff line number Diff line change 55from loguru import logger
66from pantograph import Server
77
8- from lean_dojo_v2 .database . dynamic_database import DynamicDatabase
8+ from lean_dojo_v2 .database import DynamicDatabase
99from lean_dojo_v2 .lean_dojo .data_extraction .lean import LeanGitRepo
1010from lean_dojo_v2 .lean_dojo .data_extraction .trace import get_traced_repo_path
1111from lean_dojo_v2 .utils .constants import DATA_DIR , RAID_DIR
Original file line number Diff line number Diff line change @@ -624,7 +624,10 @@ def exists(self) -> bool:
624624 repo = self .repo # git repo
625625 try :
626626 repo .commit (self .commit )
627- return repo .head .commit .hexsha == self .commit
627+ if repo .head .commit .hexsha != self .commit :
628+ self .repo .git .checkout (self .commit )
629+ print (self .repo .working_dir , self .commit )
630+ return True
628631 except BadName :
629632 logger .warning (
630633 f"Commit { self .commit } does not exist in this repository."
Original file line number Diff line number Diff line change 1212from transformers import AutoModelForCausalLM , AutoTokenizer
1313from trl import GRPOConfig
1414
15- from lean_dojo_v2 .database . dynamic_database import DynamicDatabase
15+ from lean_dojo_v2 .database import DynamicDatabase
1616from lean_dojo_v2 .lean_dojo .data_extraction .lean import LeanGitRepo
1717from lean_dojo_v2 .utils import remove_marks
1818from lean_dojo_v2 .utils .constants import DATA_DIR , RAID_DIR
Original file line number Diff line number Diff line change 99from pytorch_lightning import seed_everything
1010from tqdm import tqdm
1111
12- from lean_dojo_v2 .database . dynamic_database import DynamicDatabase
12+ from lean_dojo_v2 .database import DynamicDatabase
1313from lean_dojo_v2 .database .models .repository import Repository
1414from lean_dojo_v2 .lean_agent .config import TrainingConfig
1515from lean_dojo_v2 .lean_agent .retrieval .datamodule import RetrievalDataModule
Original file line number Diff line number Diff line change 1313from transformers import AutoModelForCausalLM , AutoTokenizer
1414from trl import SFTConfig
1515
16- from lean_dojo_v2 .database . dynamic_database import DynamicDatabase
16+ from lean_dojo_v2 .database import DynamicDatabase
1717from lean_dojo_v2 .lean_dojo .data_extraction .lean import LeanGitRepo
1818from lean_dojo_v2 .utils import remove_marks
1919from lean_dojo_v2 .utils .constants import DATA_DIR , RAID_DIR
You can’t perform that action at this time.
0 commit comments