Skip to content

Commit 7f4364d

Browse files
committed
add example
1 parent e77ae00 commit 7f4364d

File tree

5 files changed

+24
-4
lines changed

5 files changed

+24
-4
lines changed
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
"""
2+
Example script for discovering popular Lean 4 repositories on Github.
3+
The repositories are saved at <RAID_DIR>/<DATA_DIR>/repo_info_compatible.json
4+
5+
Usage: python examples/data_generation/discover_github.py
6+
"""
7+
8+
from lean_dojo_v2.database import DynamicDatabase
9+
10+
database = DynamicDatabase()
11+
12+
database.discover_repositories(
13+
num_repos=5,
14+
curriculum_learning=True,
15+
build_deps=False,
16+
)

lean_dojo_v2/database/dynamic_database.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -516,6 +516,7 @@ def discover_repositories(
516516
self,
517517
num_repos: int,
518518
curriculum_learning: bool,
519+
build_deps: bool = True,
519520
) -> List[LeanGitRepo]:
520521
"""
521522
Initialize the database and discover repositories.
@@ -536,7 +537,9 @@ def discover_repositories(
536537

537538
for repo in lean_git_repos:
538539
logger.info(f"Processing {repo.url}")
539-
traced_repo = self.trace_repository(repo.url, repo.commit)
540+
traced_repo = self.trace_repository(
541+
repo.url, repo.commit, build_deps=build_deps
542+
)
540543
if traced_repo:
541544
self.add_repository(traced_repo)
542545
logger.info(f"Successfully added repo {traced_repo.url}")

lean_dojo_v2/lean_dojo/data_extraction/lean.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -626,7 +626,6 @@ def exists(self) -> bool:
626626
repo.commit(self.commit)
627627
if repo.head.commit.hexsha != self.commit:
628628
self.repo.git.checkout(self.commit)
629-
print(self.repo.working_dir, self.commit)
630629
return True
631630
except BadName:
632631
logger.warning(

lean_dojo_v2/utils/git.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ def clone_repo(repo_url: str) -> Tuple[str, str]:
3535
"""
3636
repo_name = "/".join(repo_url.split("/")[-2:]).replace(".git", "")
3737
RAID_DIR = os.environ.get("RAID_DIR")
38+
if not RAID_DIR:
39+
RAID_DIR = os.path.join(os.getcwd(), "raid")
3840

3941
logger.info(f"Cloning {repo_url}")
4042
logger.info(f"Repo name: {repo_name}")

lean_dojo_v2/utils/lean.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,13 @@ def get_lean4_version_from_config(toolchain: str) -> str:
2222

2323
def is_supported_version(v: str) -> bool:
2424
"""
25-
Check if ``v`` is at least `v4.3.0` and at most `v4.24.0`.
25+
Check if ``v`` is at least `v4.3.0` and at most `v4.30.0`.
2626
Note: Lean versions are generally not backwards-compatible. Also, the Lean FRO
2727
keeps bumping the default versions of repos to the latest version, which is
2828
not necessarily the latest stable version. So, we need to be careful about
2929
what we choose to support.
3030
"""
31-
max_version = 24
31+
max_version = 30
3232
min_version = 3
3333
if not v.startswith("v"):
3434
return False

0 commit comments

Comments
 (0)