-
Notifications
You must be signed in to change notification settings - Fork 10
Expand file tree
/
Copy pathbenchmark_batch.py
More file actions
46 lines (39 loc) · 1.46 KB
/
benchmark_batch.py
File metadata and controls
46 lines (39 loc) · 1.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import time
import os
import gc
import torch
from IndicPhotoOCR.ocr import OCR
image_dir = '/DATA1/ocrteam/anik/splitonBSTD/4/12C_images'
all_images = [os.path.join(image_dir, f) for f in os.listdir(image_dir) if f.endswith(('.jpg', '.png'))]
test_images = all_images[:5] # Test 5 images
print(f"Benchmarking on {len(test_images)} images:")
for img in test_images:
print(f" - {img}")
print("\n--- PHASE 1: Initialization & Warmup (Downloads models and loads them to GPU) ---")
ocr = OCR(device="cuda:0", identifier_lang='auto', verbose=False)
for img in test_images:
ocr.ocr(img, batch_size=0)
print("\n--- PHASE 2: Sequential Benchmarking (batch_size=0) ---")
start = time.time()
for img in test_images:
ocr.ocr(img, batch_size=0)
seq_time = time.time() - start
print(f"Sequential took {seq_time:.2f}s")
torch.cuda.empty_cache()
gc.collect()
print("\n--- PHASE 3: Batched Benchmarking (batch_size=32) ---")
start = time.time()
for img in test_images:
ocr.ocr(img, batch_size=32)
bat_time = time.time() - start
print(f"Batched took {bat_time:.2f}s")
torch.cuda.empty_cache()
gc.collect()
print("\n========================================")
print("BENCHMARK RESULTS")
print("========================================")
print(f"Images tested: {len(test_images)}")
print(f"Sequential Time: {seq_time:.2f} seconds")
print(f"Batched Time: {bat_time:.2f} seconds")
print(f"Speedup: {seq_time / bat_time:.2f}x")
print("========================================\n")