Skip to content

Commit a19db08

Browse files
committed
CLI improvement
1 parent b2e1c69 commit a19db08

File tree

6 files changed

+57
-22
lines changed

6 files changed

+57
-22
lines changed

kraken/configs/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
from .base import * # NOQA
22
from .vgsl import * # NOQA
3+
from .pretrain import * # NOQA

kraken/configs/base.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,10 +98,6 @@ class SegmentationTrainingDataConfig(TrainingDataConfig):
9898
9999
format_type (Literal['alto', 'page', 'xml'] defaults to 'xml'):
100100
Format of the training data.
101-
suppress_regions (bool, defaults to False):
102-
Suppresses all regions in the dataset.
103-
suppress_baselines (bool, defaults to False)
104-
Suppresses all baselines in the dataset.
105101
line_class_mapping (dict[str, int], defaults to defaultdict):
106102
Mapping between line class identifiers and integer labels.
107103
region_class_mapping (dict[str, int], defaults to None):

kraken/configs/pretrain.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,26 @@
1-
from kraken.models.vgsl import VGSLRecognitionTrainingConfig, VGSLRecognitionTrainingDataConfig
1+
#
2+
# Copyright 2025 Benjamin Kiessling
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
13+
# or implied. See the License for the specific language governing
14+
# permissions and limitations under the License.
15+
"""
16+
kraken.configs.pretrain
17+
~~~~~~~~~~~~~~~~~~~~~~~
218
19+
Configurations for semi-supervised model pretraining.
20+
"""
21+
from kraken.configs.vgsl import VGSLRecognitionTrainingConfig
22+
23+
__all__ = ['VGSLPreTrainingConfig']
324

425

526
class VGSLPreTrainingConfig(VGSLRecognitionTrainingConfig):

kraken/ketos/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848

4949
from kraken.configs import (Config,
5050
TrainingDataConfig,
51+
VGSLPreTrainingConfig,
5152
VGSLRecognitionTrainingConfig,
5253
VGSLRecognitionTrainingDataConfig,
5354
BLLASegmentationTrainingConfig,
@@ -59,7 +60,8 @@
5960
'train': {**VGSLRecognitionTrainingConfig().__dict__, **VGSLRecognitionTrainingDataConfig().__dict__},
6061
'test': VGSLRecognitionTrainingDataConfig().__dict__,
6162
'segtrain': {**BLLASegmentationTrainingConfig().__dict__, **BLLASegmentationTrainingDataConfig().__dict__},
62-
'segtest': {**BLLASegmentationTrainingConfig().__dict__, **BLLASegmentationTrainingDataConfig().__dict__}}))
63+
'segtest': {**BLLASegmentationTrainingConfig().__dict__, **BLLASegmentationTrainingDataConfig().__dict__},
64+
'pretrain': {**VGSLRecognitionTrainingDataConfig().__dict__, **VGSLPreTrainingConfig().__dict__}}))
6365

6466
@click.version_option()
6567
@click.pass_context

kraken/ketos/pretrain.py

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,24 @@
3636

3737
@click.command('pretrain')
3838
@click.pass_context
39-
@click.option('-B', '--batch-size', type=int, help='batch sample size')
40-
@click.option('--pad', 'padding', type=int, help='Left and right padding around lines')
41-
@click.option('-o', '--output', 'checkpoint_path', type=click.Path(), help='Output checkpoint path')
42-
@click.option('-s', '--spec', help='VGSL spec of the network to train.')
43-
@click.option('-i', '--load', type=click.Path(exists=True, readable=True),
39+
@click.option('-B',
40+
'--batch-size',
41+
type=int,
42+
help='batch sample size')
43+
@click.option('--pad',
44+
'padding',
45+
type=int,
46+
help='Left and right padding around lines')
47+
@click.option('-o',
48+
'--output',
49+
'checkpoint_path',
50+
type=click.Path(), help='Output checkpoint path')
51+
@click.option('-s',
52+
'--spec',
53+
help='VGSL spec of the network to train.')
54+
@click.option('-i',
55+
'--load',
56+
type=click.Path(exists=True, readable=True),
4457
help='Load existing file to continue training')
4558
@click.option('-F',
4659
'--freq',
@@ -98,7 +111,7 @@
98111
type=int,
99112
help='Minimal number of validation runs between LR reduction for reduceonplateau LR schedule.')
100113
@click.option('--cos-max',
101-
'cos_max_t',
114+
'cos_t_max',
102115
type=int,
103116
help='Epoch of minimal learning rate for cosine LR scheduler.')
104117
@click.option('--cos-min-lr',
@@ -108,16 +121,12 @@
108121
'--partition',
109122
type=float,
110123
help='Ground truth data partition ratio between train/validation set')
111-
@click.option('--fixed-splits/--ignore-fixed-splits', default=False,
112-
help='Whether to honor fixed splits in binary datasets.')
113-
@click.option('-t', '--training-files', default=None, multiple=True,
124+
@click.option('-t', '--training-files', 'training_data', multiple=True,
114125
callback=_validate_manifests, type=click.File(mode='r', lazy=True),
115126
help='File(s) with additional paths to training data')
116-
@click.option('-e', '--evaluation-files', default=None, multiple=True,
127+
@click.option('-e', '--evaluation-files', 'evaluation_data', multiple=True,
117128
callback=_validate_manifests, type=click.File(mode='r', lazy=True),
118129
help='File(s) with paths to evaluation data. Overrides the `-p` parameter')
119-
@click.option('--load-hyper-parameters/--no-load-hyper-parameters', default=False,
120-
help='When loading an existing model, retrieve hyperparameters from the model')
121130
@click.option('-f', '--format-type', type=click.Choice(['path', 'xml', 'alto', 'page', 'binary']),
122131
help='Sets the training data format. In ALTO and PageXML mode all '
123132
'data is extracted from xml files containing both line definitions and a '

kraken/ketos/segmentation.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838

3939
@click.command('segtrain')
4040
@click.pass_context
41-
@click.option('-o', '--output', 'checkpoint_path', type=click.Path(), default='model', help='Output model file')
41+
@click.option('-o', '--output', 'checkpoint_path', type=click.Path(), default='model', help='Output checkpoint path')
4242
@click.option('--weights-format', default='safetensors', help='Output weights format.')
4343
@click.option('-s', '--spec', help='VGSL spec of the baseline labeling network')
4444
@click.option('--line-width', type=int, help='The height of each baseline in the target after scaling')
@@ -256,10 +256,16 @@ def segtrain(ctx, **kwargs):
256256
@click.pass_context
257257
@click.option('-m', '--model', type=click.Path(exists=True, readable=True),
258258
multiple=False, help='Model(s) to evaluate')
259-
@click.option('-e', '--test-data', default=None, multiple=True,
260-
callback=_validate_manifests, type=click.File(mode='r', lazy=True),
259+
@click.option('-e',
260+
'--test-files',
261+
'test_data',
262+
multiple=True,
263+
callback=_validate_manifests,
264+
type=click.File(mode='r', lazy=True),
261265
help='File(s) with paths to evaluation data.')
262-
@click.option('-f', '--format-type', type=click.Choice(['xml', 'alto', 'page']), default='xml',
266+
@click.option('-f',
267+
'--format-type',
268+
type=click.Choice(['xml', 'alto', 'page']),
263269
help='Sets the training data format. In ALTO and PageXML mode all '
264270
'data is extracted from xml files containing both baselines and a '
265271
'link to source images.')

0 commit comments

Comments
 (0)