-
Notifications
You must be signed in to change notification settings - Fork 16
Expand file tree
/
Copy pathconfig.py
More file actions
51 lines (44 loc) · 3.5 KB
/
Copy pathconfig.py
File metadata and controls
51 lines (44 loc) · 3.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
"""
config.py - Central configuration for OSCP Repo Collector
"""
import os
# ──────────────────────────────────────────────────
# API Keys (set as environment variables)
# ──────────────────────────────────────────────────
GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN", "")
ANTHROPIC_API_KEY = os.environ.get("ANTHROPIC_API_KEY", "")
# ──────────────────────────────────────────────────
# GitHub Search
# ──────────────────────────────────────────────────
GITHUB_API_BASE = "https://api.github.com"
GITHUB_QUERY = "OSCP in:name,description created:>2025-01-01 pushed:>2025-10-01 fork:false"
PER_PAGE = 100 # GitHub max per page
MAX_PAGES = 10 # Safety cap (100 × 10 = 1000 repos max)
# ──────────────────────────────────────────────────
# Filters
# ──────────────────────────────────────────────────
MIN_SIZE_KB = 4 # Drop repos smaller than this
# ──────────────────────────────────────────────────
# Paths
# ──────────────────────────────────────────────────
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
REPOS_DIR = os.path.join(BASE_DIR, "cloned_repos")
DB_PATH = os.path.join(BASE_DIR, "oscp_repos.db")
EXPORT_JSON_PATH = os.path.join(BASE_DIR, "visualize", "data.json")
# ──────────────────────────────────────────────────
# Clone settings
# ──────────────────────────────────────────────────
CLONE_DEPTH = 1 # Shallow clone – we only need the latest snapshot
CLONE_TIMEOUT = 120 # seconds per repo before giving up
MAX_CLONE_SIZE = 500_000 # KB – skip cloning repos larger than this (fetch tree via API instead)
# ──────────────────────────────────────────────────
# Metadata extraction
# ──────────────────────────────────────────────────
MAX_DIR_DEPTH = 8 # Max recursion depth when walking the file tree
README_MAX_BYTES = 30_000 # Cap README at 30 KB before Base64 encoding
# ──────────────────────────────────────────────────
# Categorization (Claude)
# ──────────────────────────────────────────────────
CLAUDE_MODEL = "claude-sonnet-4-6"
CAT_MAX_RETRIES = 3
CAT_RETRY_DELAY = 5 # seconds between retries