-
Notifications
You must be signed in to change notification settings - Fork 49
Expand file tree
/
Copy pathconfig.yaml
More file actions
106 lines (92 loc) · 2.7 KB
/
config.yaml
File metadata and controls
106 lines (92 loc) · 2.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# OCR System Configuration
# Environment
env_file_path: "/home/jupyter/Program/.env"
# Directories
directories:
credentials: "/home/jupyter/Program/credentials"
docker_build: "/home/jupyter/Program/OCR/src/ocr"
# Google Cloud Storage
gcs:
bucket_name: "eju-ocr-results"
# ↓ Centralized in v3.0_initial (defaults match prior hardcoded values)
stage1_prefix: "stage_1"
stage2_prefix: "stage_2"
validate_max_results: 5
# API Keys (loaded from environment)
api_keys:
openai: "OPENAI_API_KEY"
gemini_api_key: "GEMINI_API_KEY"
mathpix_app_id: "MATHPIX_APP_ID"
mathpix_app_key: "MATHPIX_APP_KEY"
# Docker Configuration
docker:
image_name: "cantaloupe"
gpu_enabled: false
runtime: "nvidia"
nvidia_settings:
visible_devices: "all"
driver_capabilities: "compute,utility"
# Source file paths (for Docker build)
source_files:
dockerfile: "src/ocr/Dockerfile"
advanced_ocr: "src/ocr/advanced_ocr.py"
custom_doclayout_yolo: "src/ocr/custom_doclayout_yolo.py"
config: "config.yaml"
prompts: "prompts/"
# Container paths
container_paths:
input: "/app/input"
output: "/app/output"
credentials: "/app/credentials"
config: "/app/config"
python_path: "/app:/app/doclayout-yolo"
main_script: "/app/advanced_ocr.py"
# Volume mounts
volumes:
- "/app/input"
- "/app/output"
- "/app/credentials"
- "/app/config"
# Credentials
credentials:
google_vision_account: "Google_Vision_S.Account.json"
# Logging
logging:
level: "INFO"
format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
stage2_log_file: "ocr_stage2.log"
# OCR Processing
ocr:
file_extensions: [".pdf"]
max_display_files: 20
confidence_threshold: 0.5
use_cache: true
cache_dir: "cache"
# ↓ Centralized in v3.0_initial (defaults match prior hardcoded values)
language_hints: ["ja", "en", "ko"]
pdf_dpi: 200
iou_threshold: 0.5
image_processing:
vision_max_dim: 1600
gemini_max_dim: 1024
jpeg_quality: 85
# DocLayout-YOLO Configuration
doclayout_yolo:
model_path: null # Local model file path (optional)
device: "auto" # "auto", "cuda:0", "cpu", etc.
huggingface_repo_id: "juliozhao/DocLayout-YOLO-DocStructBench"
huggingface_filename: "doclayout_yolo_docstructbench_imgsz1024.pt"
fallback_model: "yolov8n.pt" # Fallback model for ultralytics YOLO
default_imgsz: 1024
default_conf: 0.25
# Gemini API (Centralized in v3.0_initial)
gemini:
model: "gemini-2.0-flash"
figure_prompt_path: "prompts/gemini_figure.txt"
table_prompt_path: "prompts/gemini_table.txt"
# Stage 2 Processing (ChatGPT correction)
stage2:
model: "gpt-5-nano"
max_tokens: 100000
# ↓ Centralized in v3.0_initial
system_prompt_path: "prompts/chatgpt_stage2.md"