Skip to content

Commit 13f4696

Browse files
authored
Merge pull request #1319 from OCR-D/1294-impl-rm-server-revert-revert
Continuation of #1309: Implementation of the resource manager server (issue #1294)
2 parents 8b7a6b7 + 3df4560 commit 13f4696

24 files changed

+775
-368
lines changed

CHANGELOG.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,28 @@ Versioned according to [Semantic Versioning](http://semver.org/).
55

66
## Unreleased
77

8+
Added:
9+
10+
* Resource Manager Server as `ocrd_network` analogon of `ocrd.cli.resmgr`, #1309
11+
* `ocrd network resmgr-server` for triggering Resource Manager Server (RMS) in the background
12+
* Processing Server also deploys RMS on each processing host
13+
* ...
14+
15+
Fixed:
16+
17+
* `ocrd.resource_manager`: ensure necessary + reduce unnecessary updates of user database
18+
* `ocrd.resource_manager`: deduplicate entries (newer wins) before updating user database
19+
* `ocrd resmgr download`: extract archives independent of whether they are URLs or local paths
20+
* `ocrd resmgr download`: if `--overwrite`, ensure the old res gets removed
21+
* `ocrd resmgr download`: default to `data` location instead of first in list of allowed
22+
* `ocrd_utils.list_all_resources`: filter module non-resource files w/ more anti-patterns
23+
* `ocrd_utils.list_all_resources`: no subpaths except for `cwd` location, OCR-D/spec#263, #1315
24+
* `ocrd_utils.list_all_resources`: filter resources via media (MIME) type, if specified, #1315
25+
26+
Removed:
27+
28+
* `ocrd resmgr download`: `*` (asterisk) as wildcard for all processors removed, processor must be provided explicitly, #1319
29+
830
## [3.7.0] - 2025-11-02
931

1032
Changed:

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@ beanie~=1.7
33
click >=7
44
cryptography < 43.0.0
55
Deprecated == 1.2.0
6-
docker
6+
docker>=7.1.0
77
elementpath
88
fastapi>=0.78.0
99
filetype
1010
Flask
1111
frozendict>=2.4.0
12+
gitpython
1213
gdown
1314
httpx>=0.22.0
1415
importlib_metadata ; python_version < '3.8'

src/ocrd/cli/network.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
client_cli,
1313
processing_server_cli,
1414
processing_worker_cli,
15+
resource_manager_server_cli
1516
)
1617

1718

@@ -26,3 +27,4 @@ def network_cli():
2627
network_cli.add_command(client_cli)
2728
network_cli.add_command(processing_server_cli)
2829
network_cli.add_command(processing_worker_cli)
30+
network_cli.add_command(resource_manager_server_cli)

src/ocrd/cli/resmgr.py

Lines changed: 29 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
get_ocrd_tool_json,
2121
initLogging,
2222
RESOURCE_LOCATIONS,
23+
RESOURCE_TYPES
2324
)
2425
from ocrd.constants import RESOURCE_USER_LIST_COMMENT
2526

@@ -70,16 +71,16 @@ def list_installed(executable=None):
7071
@resmgr_cli.command('download')
7172
@click.option('-n', '--any-url', default='', help='URL of unregistered resource to download/copy from')
7273
@click.option('-D', '--no-dynamic', default=False, is_flag=True,
73-
help="Whether to skip looking into each processor's --dump-{json,module-dir} for module-level resources")
74-
@click.option('-t', '--resource-type', type=click.Choice(['file', 'directory', 'archive']), default='file',
75-
help='Type of resource',)
76-
@click.option('-P', '--path-in-archive', default='.', help='Path to extract in case of archive type')
74+
help="Skip looking into each processor's --dump-{json,module-dir} module-registered resources")
75+
@click.option('-t', '--resource-type', type=click.Choice(RESOURCE_TYPES), default='file',
76+
help='Type of resource (when unregistered or incomplete)',)
77+
@click.option('-P', '--path-in-archive', default='.', help='Path to extract in case of archive type (when unregistered or incomplete)')
7778
@click.option('-a', '--allow-uninstalled', is_flag=True,
78-
help="Allow installing resources for uninstalled processors",)
79+
help="Allow installing resources for not installed processors",)
7980
@click.option('-o', '--overwrite', help='Overwrite existing resources', is_flag=True)
80-
@click.option('-l', '--location', type=click.Choice(RESOURCE_LOCATIONS),
81+
@click.option('-l', '--location', type=click.Choice(RESOURCE_LOCATIONS),
8182
help="Where to store resources - defaults to first location in processor's 'resource_locations' "
82-
"list or finally 'data'")
83+
"list, i.e. usually 'data'")
8384
@click.argument('executable', required=True)
8485
@click.argument('name', required=False)
8586
def download(any_url, no_dynamic, resource_type, path_in_archive, allow_uninstalled, overwrite, location, executable,
@@ -106,8 +107,6 @@ def download(any_url, no_dynamic, resource_type, path_in_archive, allow_uninstal
106107
executable = None
107108
if name == '*':
108109
name = None
109-
is_url = (any_url.startswith('https://') or any_url.startswith('http://')) if any_url else False
110-
is_filename = Path(any_url).exists() if any_url else False
111110
if executable and not which(executable):
112111
if not allow_uninstalled:
113112
log.error(f"Executable '{executable}' is not installed. "
@@ -126,65 +125,30 @@ def download(any_url, no_dynamic, resource_type, path_in_archive, allow_uninstal
126125
'path_in_archive': path_in_archive}]
127126
)]
128127
for this_executable, this_reslist in reslist:
129-
for resdict in this_reslist:
130-
if 'size' in resdict:
131-
registered = "registered"
132-
else:
133-
registered = "unregistered"
134-
if any_url:
135-
resdict['url'] = any_url
136-
if resdict['url'] == '???':
137-
log.warning(f"Cannot download user resource {resdict['name']}")
138-
continue
139-
if resdict['url'].startswith('https://') or resdict['url'].startswith('http://'):
140-
log.info(f"Downloading {registered} resource '{resdict['name']}' ({resdict['url']})")
141-
if 'size' not in resdict:
142-
with requests.head(resdict['url']) as r:
143-
resdict['size'] = int(r.headers.get('content-length', 0))
144-
else:
145-
log.info(f"Copying {registered} resource '{resdict['name']}' ({resdict['url']})")
146-
urlpath = Path(resdict['url'])
147-
resdict['url'] = str(urlpath.resolve())
148-
if Path(urlpath).is_dir():
149-
resdict['size'] = directory_size(urlpath)
150-
else:
151-
resdict['size'] = urlpath.stat().st_size
152-
if not location:
153-
location = get_ocrd_tool_json(this_executable)['resource_locations'][0]
154-
elif location not in get_ocrd_tool_json(this_executable)['resource_locations']:
155-
log.error(f"The selected --location {location} is not in the {this_executable}'s resource search path, "
156-
f"refusing to install to invalid location")
157-
sys.exit(1)
158-
if location != 'module':
159-
basedir = resmgr.location_to_resource_dir(location)
160-
else:
161-
basedir = get_moduledir(this_executable)
162-
if not basedir:
163-
basedir = resmgr.location_to_resource_dir('data')
164-
128+
resource_locations = get_ocrd_tool_json(this_executable)['resource_locations']
129+
if not location:
130+
location = resource_locations[0]
131+
elif location not in resource_locations:
132+
log.warning(f"The selected --location {location} is not in the {this_executable}'s resource search path, "
133+
f"refusing to install to invalid location. Instead installing to: {resource_locations[0]}")
134+
res_dest_dir = resmgr.build_resource_dest_dir(location=location, executable=this_executable)
135+
for res_dict in this_reslist:
165136
try:
166-
with click.progressbar(length=resdict['size']) as bar:
167-
fpath = resmgr.download(
168-
this_executable,
169-
resdict['url'],
170-
basedir,
171-
name=resdict['name'],
172-
resource_type=resdict.get('type', resource_type),
173-
path_in_archive=resdict.get('path_in_archive', path_in_archive),
174-
overwrite=overwrite,
175-
no_subdir=location in ['cwd', 'module'],
176-
progress_cb=lambda delta: bar.update(delta)
177-
)
178-
if registered == 'unregistered':
179-
log.info(f"{this_executable} resource '{name}' ({any_url}) not a known resource, creating stub "
180-
f"in {resmgr.user_list}'")
181-
resmgr.add_to_user_database(this_executable, fpath, url=any_url)
182-
resmgr.save_user_list()
183-
log.info(f"Installed resource {resdict['url']} under {fpath}")
137+
fpath = resmgr.handle_resource(
138+
res_dict=res_dict,
139+
executable=this_executable,
140+
dest_dir=res_dest_dir,
141+
any_url=any_url,
142+
overwrite=overwrite,
143+
resource_type=resource_type,
144+
path_in_archive=path_in_archive
145+
)
146+
if not fpath:
147+
continue
184148
except FileExistsError as exc:
185149
log.info(str(exc))
186-
log.info(f"Use in parameters as "
187-
f"'{resmgr.parameter_usage(resdict['name'], usage=resdict.get('parameter_usage', 'as-is'))}'")
150+
usage = res_dict.get('parameter_usage', 'as-is')
151+
log.info(f"Use in parameters as '{resmgr.parameter_usage(res_dict['name'], usage)}'")
188152

189153

190154
@resmgr_cli.command('migrate')

src/ocrd/constants.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
'DOWNLOAD_DIR',
1010
'DEFAULT_REPOSITORY_URL',
1111
'BASHLIB_FILENAME',
12-
'RESOURCE_LIST_FILENAME',
1312
'BACKUP_DIR',
1413
'RESOURCE_USER_LIST_COMMENT',
1514
]
@@ -19,6 +18,5 @@
1918
DOWNLOAD_DIR = '/tmp/ocrd-core-downloads'
2019
DEFAULT_REPOSITORY_URL = 'http://localhost:5000/'
2120
BASHLIB_FILENAME = resource_filename(__package__, 'lib.bash')
22-
RESOURCE_LIST_FILENAME = resource_filename(__package__, 'resource_list.yml')
2321
RESOURCE_USER_LIST_COMMENT = "# OCR-D private resource list (consider sending a PR with your own resources to OCR-D/core)"
2422
BACKUP_DIR = '.backup'

src/ocrd/processor/base.py

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,14 @@
4242
from ocrd_utils import (
4343
VERSION as OCRD_VERSION,
4444
MIMETYPE_PAGE,
45-
MIME_TO_EXT,
4645
config,
4746
getLogger,
4847
list_resource_candidates,
49-
pushd_popd,
5048
list_all_resources,
5149
get_processor_resource_types,
5250
resource_filename,
5351
parse_json_file_with_comments,
52+
pushd_popd,
5453
make_file_id,
5554
deprecation_warning
5655
)
@@ -935,9 +934,8 @@ def resolve_resource(self, val):
935934
cwd = self.old_pwd
936935
else:
937936
cwd = getcwd()
938-
ret = [cand for cand in list_resource_candidates(executable, val,
939-
cwd=cwd, moduled=self.moduledir)
940-
if exists(cand)]
937+
ret = list(filter(exists, list_resource_candidates(executable, val,
938+
cwd=cwd, moduled=self.moduledir)))
941939
if ret:
942940
self._base_logger.debug("Resolved %s to absolute path %s" % (val, ret[0]))
943941
return ret[0]
@@ -968,17 +966,9 @@ def list_all_resources(self):
968966
"""
969967
List all resources found in the filesystem and matching content-type by filename suffix
970968
"""
971-
mimetypes = get_processor_resource_types(None, self.ocrd_tool)
972-
for res in list_all_resources(self.ocrd_tool['executable'], moduled=self.moduledir):
969+
for res in list_all_resources(self.executable, ocrd_tool=self.ocrd_tool, moduled=self.moduledir):
973970
res = Path(res)
974-
if '*/*' not in mimetypes:
975-
if res.is_dir() and 'text/directory' not in mimetypes:
976-
continue
977-
# if we do not know all MIME types, then keep the file, otherwise require suffix match
978-
if res.is_file() and not any(res.suffix == MIME_TO_EXT.get(mime, res.suffix)
979-
for mime in mimetypes):
980-
continue
981-
yield res
971+
yield res.name
982972

983973
@property
984974
def module(self):

src/ocrd/resource_list.yml

Lines changed: 0 additions & 61 deletions
This file was deleted.

0 commit comments

Comments
 (0)