forked from achimrabus/polyscriptor
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlighton_models.py
More file actions
133 lines (104 loc) · 3.38 KB
/
lighton_models.py
File metadata and controls
133 lines (104 loc) · 3.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
"""
LightOnOCR Model Registry
Extensible registry for LightOnOCR fine-tuned model variants.
Easy for users/community to add new models.
Usage:
from lighton_models import LIGHTON_MODELS, get_available_models, add_custom_model
# Get all available models
models = get_available_models()
# Add a custom model at runtime
add_custom_model("My Custom Model", "username/model-id", "Description")
"""
from typing import Dict, Any, List, Optional
# Registry of available LightOnOCR models
# Format: display_name -> {id, description, language, ...}
LIGHTON_MODELS: Dict[str, Dict[str, Any]] = {
"Base Model (1B)": {
"id": "lightonai/LightOnOCR-2-1B-base",
"description": "Base LightOnOCR model (1B params). General multilingual OCR.",
"language": "multilingual",
"vram": "~4GB",
"type": "line",
},
"German Shorthand": {
"id": "wjbmattingly/LightOnOCR-2-1B-german-shorthand-line",
"description": "Fine-tuned for German Kurrent/Shorthand scripts.",
"language": "de",
"vram": "~4GB",
"type": "line",
},
# Easy to add new models:
# "Church Slavonic": {
# "id": "username/LightOnOCR-2-1B-church-slavonic",
# "description": "Fine-tuned for Church Slavonic manuscripts",
# "language": "cu",
# "vram": "~4GB",
# "type": "line",
# },
}
def get_available_models() -> Dict[str, Dict[str, Any]]:
"""
Returns all available preset models for dropdown.
Returns:
Dict mapping display names to model info dicts
"""
return LIGHTON_MODELS.copy()
def get_model_info(display_name: str) -> Optional[Dict[str, Any]]:
"""
Get info for a specific model by display name.
Args:
display_name: The display name of the model
Returns:
Model info dict or None if not found
"""
return LIGHTON_MODELS.get(display_name)
def get_model_id(display_name: str) -> Optional[str]:
"""
Get HuggingFace model ID for a display name.
Args:
display_name: The display name of the model
Returns:
HuggingFace model ID or None if not found
"""
info = LIGHTON_MODELS.get(display_name)
return info.get("id") if info else None
def add_custom_model(
name: str,
model_id: str,
description: str = "",
language: str = "unknown",
vram: str = "~4GB"
):
"""
Add a custom model to the registry at runtime.
Useful for user-defined presets or community models discovered during session.
Args:
name: Display name for the model
model_id: HuggingFace model ID (e.g., "username/model-name")
description: Brief description of the model
language: Language code(s) the model supports
vram: Estimated VRAM requirement
"""
LIGHTON_MODELS[name] = {
"id": model_id,
"description": description,
"language": language,
"vram": vram,
"type": "line",
}
def get_model_names() -> List[str]:
"""
Get list of all model display names.
Returns:
List of model names for UI dropdowns
"""
return list(LIGHTON_MODELS.keys())
def is_valid_model(display_name: str) -> bool:
"""
Check if a display name corresponds to a valid model.
Args:
display_name: The display name to check
Returns:
True if model exists in registry
"""
return display_name in LIGHTON_MODELS