Skip to content

Commit 386ea4e

Browse files
committed
Update text_recognition model to 2025_08_04
1 parent aca4d4c commit 386ea4e

File tree

9 files changed

+17
-15
lines changed

9 files changed

+17
-15
lines changed

.vscode/launch.json

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
"XDG_CACHE_HOME": "${workspaceFolder}/cache",
3636
"HF_HUB_OFFLINE": "true",
3737
"DETECTOR_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/text_detection/2025_05_07",
38-
"RECOGNITION_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/text_recognition/2025_05_16",
38+
"FOUNDATION_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/text_recognition/2025_08_04",
3939
"LAYOUT_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/layout/2025_02_18",
4040
"OCR_ERROR_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/ocr_error_detection/2025_02_18",
4141
"TABLE_REC_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/table_recognition/2025_02_18",
@@ -74,7 +74,7 @@
7474
"XDG_CACHE_HOME": "${workspaceFolder}/cache",
7575
"HF_HUB_OFFLINE": "true",
7676
"DETECTOR_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/text_detection/2025_05_07",
77-
"RECOGNITION_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/text_recognition/2025_05_16",
77+
"FOUNDATION_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/text_recognition/2025_08_04",
7878
"LAYOUT_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/layout/2025_02_18",
7979
"OCR_ERROR_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/ocr_error_detection/2025_02_18",
8080
"TABLE_REC_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/table_recognition/2025_02_18",
@@ -122,7 +122,7 @@
122122
"XDG_CACHE_HOME": "${workspaceFolder}/cache",
123123
"HF_HUB_OFFLINE": "true",
124124
"DETECTOR_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/text_detection/2025_05_07",
125-
"RECOGNITION_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/text_recognition/2025_05_16",
125+
"FOUNDATION_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/text_recognition/2025_08_04",
126126
"LAYOUT_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/layout/2025_02_18",
127127
"OCR_ERROR_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/ocr_error_detection/2025_02_18",
128128
"TABLE_REC_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/table_recognition/2025_02_18",
@@ -155,7 +155,7 @@
155155
"XDG_CACHE_HOME": "${workspaceFolder}/cache",
156156
"HF_HUB_OFFLINE": "true",
157157
"DETECTOR_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/text_detection/2025_05_07",
158-
"RECOGNITION_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/text_recognition/2025_05_16",
158+
"FOUNDATION_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/text_recognition/2025_08_04",
159159
"LAYOUT_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/layout/2025_02_18",
160160
"OCR_ERROR_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/ocr_error_detection/2025_02_18",
161161
"TABLE_REC_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/table_recognition/2025_02_18",
@@ -200,7 +200,7 @@
200200
"XDG_CACHE_HOME": "${workspaceFolder}/cache",
201201
"HF_HUB_OFFLINE": "true",
202202
"DETECTOR_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/text_detection/2025_05_07",
203-
"RECOGNITION_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/text_recognition/2025_05_16",
203+
"FOUNDATION_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/text_recognition/2025_08_04",
204204
"LAYOUT_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/layout/2025_02_18",
205205
"OCR_ERROR_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/ocr_error_detection/2025_02_18",
206206
"TABLE_REC_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/table_recognition/2025_02_18",
@@ -241,7 +241,7 @@
241241
"XDG_CACHE_HOME": "${workspaceFolder}/cache",
242242
"HF_HUB_OFFLINE": "true",
243243
"DETECTOR_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/text_detection/2025_05_07",
244-
"RECOGNITION_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/text_recognition/2025_05_16",
244+
"FOUNDATION_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/text_recognition/2025_08_04",
245245
"LAYOUT_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/layout/2025_02_18",
246246
"OCR_ERROR_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/ocr_error_detection/2025_02_18",
247247
"TABLE_REC_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/table_recognition/2025_02_18",
@@ -283,7 +283,7 @@
283283
"XDG_CACHE_HOME": "${workspaceFolder}/cache",
284284
"HF_HUB_OFFLINE": "true",
285285
"DETECTOR_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/text_detection/2025_05_07",
286-
"RECOGNITION_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/text_recognition/2025_05_16",
286+
"FOUNDATION_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/text_recognition/2025_08_04",
287287
"LAYOUT_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/layout/2025_02_18",
288288
"OCR_ERROR_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/ocr_error_detection/2025_02_18",
289289
"TABLE_REC_MODEL_CHECKPOINT": "${workspaceFolder}/cache/datalab/models/table_recognition/2025_02_18",

.vscode/tasks.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
},
4343
"command": [
4444
"huggingface-cli download xiaoyao9184/surya_text_detection --repo-type model --revision 2025_05_07 --local-dir ./cache/datalab/models/text_detection/2025_05_07",
45-
"&& huggingface-cli download xiaoyao9184/surya_text_recognition --repo-type model --revision 2025_05_16 --local-dir ./cache/datalab/models/text_recognition/2025_05_16",
45+
"&& huggingface-cli download xiaoyao9184/surya_text_recognition --repo-type model --revision 2025_05_16 --local-dir ./cache/datalab/models/text_recognition/2025_08_04",
4646
"&& huggingface-cli download xiaoyao9184/surya_table_recognition --repo-type model --revision 2025_02_18 --local-dir ./cache/datalab/models/table_recognition/2025_02_18",
4747
"&& huggingface-cli download xiaoyao9184/surya_layout --repo-type model --revision 2025_02_18 --local-dir ./cache/datalab/models/layout/2025_02_18",
4848
"&& huggingface-cli download xiaoyao9184/surya_ocr_error_detection --repo-type model --revision 2025_02_18 --local-dir ./cache/datalab/models/ocr_error_detection/2025_02_18",

cache/README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,8 @@ diff -qr ./cache/huggingface/hub/models--datalab-to--inline_math_det0/snapshots/
336336
| --- | --- | --- | --- |
337337
| xiaoyao9184/surya_text_detection | 2025_05_07 | text_detection | 2025_05_07 |
338338
| xiaoyao9184/surya_text_recognition | 2025_05_16 | text_recognition | 2025_05_16 |
339+
| xiaoyao9184/surya_text_recognition | 2025_08_01 | text_recognition | 2025_08_01 |
340+
| xiaoyao9184/surya_text_recognition | 2025_08_04 | text_recognition | 2025_08_04 |
339341
| xiaoyao9184/surya_table_recognition | 2025_02_18 | table_recognition | 2025_02_18 |
340342
| xiaoyao9184/surya_texify | 2025_02_18 | texify | 2025_02_18 |
341343
| xiaoyao9184/surya_layout | 2025_02_18 | layout | 2025_02_18 |
@@ -363,7 +365,7 @@ download model
363365

364366
```bash
365367
huggingface-cli download xiaoyao9184/surya_text_detection --repo-type model --revision 2025_05_07 --local-dir ./cache/datalab/models/text_detection/2025_05_07
366-
huggingface-cli download xiaoyao9184/surya_text_recognition --repo-type model --revision 2025_05_16 --local-dir ./cache/datalab/models/text_recognition/2025_05_16
368+
huggingface-cli download xiaoyao9184/surya_text_recognition --repo-type model --revision 2025_05_16 --local-dir ./cache/datalab/models/text_recognition/2025_08_04
367369
huggingface-cli download xiaoyao9184/surya_table_recognition --repo-type model --revision 2025_02_18 --local-dir ./cache/datalab/models/table_recognition/2025_02_18
368370
huggingface-cli download xiaoyao9184/surya_layout --repo-type model --revision 2025_02_18 --local-dir ./cache/datalab/models/layout/2025_02_18
369371
huggingface-cli download xiaoyao9184/surya_ocr_error_detection --repo-type model --revision 2025_02_18 --local-dir ./cache/datalab/models/ocr_error_detection/2025_02_18

cache/hf-s3-model-upload.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
map_remote_path_env_name = {
1010
settings.DETECTOR_MODEL_CHECKPOINT: "DETECTOR_MODEL_REPO_ID",
11-
settings.RECOGNITION_MODEL_CHECKPOINT: "RECOGNITION_MODEL_REPO_ID",
11+
settings.FOUNDATION_MODEL_CHECKPOINT: "RECOGNITION_MODEL_REPO_ID",
1212
settings.LAYOUT_MODEL_CHECKPOINT: "LAYOUT_MODEL_REPO_ID",
1313
settings.TABLE_REC_MODEL_CHECKPOINT: "TABLE_REC_MODEL_REPO_ID",
1414
settings.OCR_ERROR_MODEL_CHECKPOINT: "OCR_ERROR_MODEL_REPO_ID",

cache/surya-s3-model-download.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
for remote_path in [
88
settings.DETECTOR_MODEL_CHECKPOINT,
9-
settings.RECOGNITION_MODEL_CHECKPOINT,
9+
settings.FOUNDATION_MODEL_CHECKPOINT,
1010
settings.LAYOUT_MODEL_CHECKPOINT,
1111
settings.TABLE_REC_MODEL_CHECKPOINT,
1212
settings.OCR_ERROR_MODEL_CHECKPOINT

docker/up.gradio@cpu-offline/docker-compose.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ services:
1010
- TORCH_DEVICE=cpu
1111
- HF_HUB_OFFLINE=true
1212
- DETECTOR_MODEL_CHECKPOINT=/root/.cache/datalab/models/text_detection/2025_05_07
13-
- RECOGNITION_MODEL_CHECKPOINT=/root/.cache/datalab/models/text_recognition/2025_05_16
13+
- FOUNDATION_MODEL_CHECKPOINT=/root/.cache/datalab/models/text_recognition/2025_08_04
1414
- LAYOUT_MODEL_CHECKPOINT=/root/.cache/datalab/models/layout/2025_02_18
1515
- OCR_ERROR_MODEL_CHECKPOINT=/root/.cache/datalab/models/ocr_error_detection/2025_02_18
1616
- TABLE_REC_MODEL_CHECKPOINT=/root/.cache/datalab/models/table_recognition/2025_02_18

docker/up.gradio@gpu-offline/docker-compose.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ services:
1010
- TORCH_DEVICE=cuda
1111
- HF_HUB_OFFLINE=true
1212
- DETECTOR_MODEL_CHECKPOINT=/root/.cache/datalab/models/text_detection/2025_05_07
13-
- RECOGNITION_MODEL_CHECKPOINT=/root/.cache/datalab/models/text_recognition/2025_05_16
13+
- FOUNDATION_MODEL_CHECKPOINT=/root/.cache/datalab/models/text_recognition/2025_08_04
1414
- LAYOUT_MODEL_CHECKPOINT=/root/.cache/datalab/models/layout/2025_02_18
1515
- OCR_ERROR_MODEL_CHECKPOINT=/root/.cache/datalab/models/ocr_error_detection/2025_02_18
1616
- TABLE_REC_MODEL_CHECKPOINT=/root/.cache/datalab/models/table_recognition/2025_02_18

docker/up@cpu-offline/docker-compose.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ services:
88
- TORCH_DEVICE=cpu
99
- HF_HUB_OFFLINE=true
1010
- DETECTOR_MODEL_CHECKPOINT=/root/.cache/datalab/models/text_detection/2025_05_07
11-
- RECOGNITION_MODEL_CHECKPOINT=/root/.cache/datalab/models/text_recognition/2025_05_16
11+
- FOUNDATION_MODEL_CHECKPOINT=/root/.cache/datalab/models/text_recognition/2025_08_04
1212
- LAYOUT_MODEL_CHECKPOINT=/root/.cache/datalab/models/layout/2025_02_18
1313
- OCR_ERROR_MODEL_CHECKPOINT=/root/.cache/datalab/models/ocr_error_detection/2025_02_18
1414
- TABLE_REC_MODEL_CHECKPOINT=/root/.cache/datalab/models/table_recognition/2025_02_18

docker/up@gpu-offline/docker-compose.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ services:
88
- TORCH_DEVICE=cuda
99
- HF_HUB_OFFLINE=true
1010
- DETECTOR_MODEL_CHECKPOINT=/root/.cache/datalab/models/text_detection/2025_05_07
11-
- RECOGNITION_MODEL_CHECKPOINT=/root/.cache/datalab/models/text_recognition/2025_05_16
11+
- FOUNDATION_MODEL_CHECKPOINT=/root/.cache/datalab/models/text_recognition/2025_08_04
1212
- LAYOUT_MODEL_CHECKPOINT=/root/.cache/datalab/models/layout/2025_02_18
1313
- OCR_ERROR_MODEL_CHECKPOINT=/root/.cache/datalab/models/ocr_error_detection/2025_02_18
1414
- TABLE_REC_MODEL_CHECKPOINT=/root/.cache/datalab/models/table_recognition/2025_02_18

0 commit comments

Comments
 (0)