Skip to content

Commit 20918d6

Browse files
committed
Added support for multiple, read-only, caching directories.
1 parent 83b161b commit 20918d6

File tree

3 files changed

+80
-25
lines changed

3 files changed

+80
-25
lines changed

cached-translated-groovy3-parser.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -360,11 +360,16 @@ def analyze_nf_source(
360360
jsonfile: "str",
361361
resultfile: "str",
362362
cache_directory: "Optional[str]" = None,
363+
ro_cache_directories: "Sequence[str]" = [],
363364
) -> "Union[RuleNode, LeafNode, EmptyNode]":
364365
with open(filename, mode="r", encoding="utf-8") as wfH:
365366
content = wfH.read()
366367

367-
t_tree = parse_and_digest_groovy_content(content, cache_directory=cache_directory)
368+
t_tree = parse_and_digest_groovy_content(
369+
content,
370+
cache_directory=cache_directory,
371+
ro_cache_directories=ro_cache_directories,
372+
)
368373

369374
# These are for debugging purposes
370375
# logging.debug(tree.pretty())
@@ -409,6 +414,16 @@ def analyze_nf_source(
409414
print(
410415
"[WARNING] No caching is done. If you want to cache parsed content declare variable GROOVY_CACHEDIR"
411416
)
417+
418+
ro_cache_directories = []
419+
cache_directory_ro = os.environ.get("GROOVY_CACHEDIRS_RO")
420+
if cache_directory_ro is not None:
421+
print(f"* Using as read-only caching directories {cache_directory_ro}")
422+
ro_cache_directories = cache_directory_ro.split(":")
423+
else:
424+
print(
425+
"[WARNING] No read-only caching is used. If you want to use cached parsed contents declare variable GROOVY_CACHEDIRS_RO, separating more than one path by colons"
426+
)
412427
for filename in sys.argv[1:]:
413428
print(f"* Parsing {filename}")
414429
logfile = filename + ".lark"
@@ -420,7 +435,11 @@ def analyze_nf_source(
420435
log.addHandler(fH) # set the new handler
421436
try:
422437
analyze_nf_source(
423-
filename, jsonfile, resultfile, cache_directory=cache_directory
438+
filename,
439+
jsonfile,
440+
resultfile,
441+
cache_directory=cache_directory,
442+
ro_cache_directories=ro_cache_directories,
424443
)
425444
except Exception as e:
426445
print(f"\tParse failed, see {logfile}")

groovy_parser/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# -*- coding: utf-8 -*-
33

44
# SPDX-License-Identifier: Apache-2.0
5-
# Copyright (C) 2024 Barcelona Supercomputing Center, José M. Fernández
5+
# Copyright (C) 2025 Barcelona Supercomputing Center, José M. Fernández
66
#
77
# Licensed under the Apache License, Version 2.0 (the "License");
88
# you may not use this file except in compliance with the License.
@@ -17,8 +17,8 @@
1717
# limitations under the License.
1818

1919
__author__ = "José M. Fernández <https://orcid.org/0000-0002-4806-5140>"
20-
__copyright__ = 2024 Barcelona Supercomputing Center (BSC), ES"
20+
__copyright__ = 2025 Barcelona Supercomputing Center (BSC), ES"
2121
__license__ = "Apache-2.0"
2222

2323
# https://www.python.org/dev/peps/pep-0396/
24-
__version__ = "0.1.2"
24+
__version__ = "0.2.0"

groovy_parser/parser.py

Lines changed: 56 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# -*- coding: utf-8 -*-
33

44
# SPDX-License-Identifier: Apache-2.0
5-
# Copyright (C) 2024 Barcelona Supercomputing Center, José M. Fernández
5+
# Copyright (C) 2025 Barcelona Supercomputing Center, José M. Fernández
66
#
77
# Licensed under the Apache License, Version 2.0 (the "License");
88
# you may not use this file except in compliance with the License.
@@ -24,6 +24,7 @@
2424
import json
2525
import os
2626
import os.path
27+
import pathlib
2728
from typing import (
2829
cast,
2930
TYPE_CHECKING,
@@ -157,6 +158,7 @@ def create_groovy_parser() -> "Lark":
157158
# parser='lalr',
158159
# debug=True,
159160
start="compilation_unit",
161+
# ambiguity='explicit',
160162
# lexer_callbacks={
161163
# 'square_bracket_block': jarlmethod
162164
# }
@@ -217,13 +219,21 @@ def digest_lark_tree(
217219

218220
def parse_and_digest_groovy_content(
219221
content: "str",
220-
cache_directory: "Optional[str]" = None,
222+
ro_cache_directories: "Optional[Sequence[Union[str, os.PathLike[str]]]]" = None,
223+
cache_directory: "Optional[Union[str, os.PathLike[str]]]" = None,
221224
prune: "Sequence[str]" = ["sep", "nls"],
222225
noflat: "Sequence[str]" = ["script_statement"],
223226
) -> "Union[RuleNode, LeafNode, EmptyNode]":
224227
t_tree: "Optional[Union[RuleNode, LeafNode, EmptyNode]]" = None
225-
hashfile: "Optional[str]" = None
226-
if cache_directory is not None and os.path.isdir(cache_directory):
228+
hashpath: "Optional[pathlib.Path]" = None
229+
cache_path: "Optional[pathlib.Path]" = None
230+
if cache_directory is not None:
231+
if isinstance(cache_directory, pathlib.Path):
232+
cache_path = cache_directory
233+
else:
234+
cache_path = pathlib.Path(cache_directory)
235+
236+
if cache_path is not None and cache_path.is_dir():
227237
h = hashlib.sha256()
228238
buff = bytearray(BLOCK_SIZE)
229239

@@ -246,31 +256,57 @@ def parse_and_digest_groovy_content(
246256
# Now we can obtain the relative directory, unique to this
247257
# version of the software and its dependencies
248258
hreldir = h.copy().hexdigest()
249-
this_cache_directory = os.path.join(cache_directory, hreldir)
250-
os.makedirs(this_cache_directory, exist_ok=True)
251259

252-
# Now, let's go for the content signature
253-
h.update(content.encode("utf-8"))
254-
hashfile = os.path.join(this_cache_directory, h.hexdigest() + ".json.gz")
260+
ro_cache_paths: "MutableSequence[pathlib.Path]" = []
261+
if ro_cache_directories is not None:
262+
for ro_cache_directory in ro_cache_directories:
263+
if isinstance(ro_cache_directory, pathlib.Path):
264+
ro_cache_path = ro_cache_directory
265+
else:
266+
ro_cache_path = pathlib.Path(ro_cache_directory)
267+
268+
# Include only existing cache paths
269+
this_ro_cache_path = ro_cache_path / hreldir
270+
if this_ro_cache_path.is_dir():
271+
ro_cache_paths.append(this_ro_cache_path)
255272

256-
if os.path.isfile(hashfile):
257-
try:
258-
with gzip.open(hashfile, mode="rt", encoding="utf-8") as jH:
259-
t_tree = json.load(jH)
260-
except:
261-
# If it is unreadable, re-create
262-
pass
273+
this_cache_path = cache_path / hreldir
274+
this_cache_path.mkdir(parents=True, exist_ok=True)
263275

264-
if t_tree is None:
276+
ro_cache_paths.append(this_cache_path)
277+
278+
# Now, let's go for the content signature
279+
h.update(content.encode("utf-8"))
280+
rel_hashpath = h.hexdigest() + ".json.gz"
281+
282+
# This is needed in case nothing was available
283+
hashpath = this_cache_path / rel_hashpath
284+
for ro_cache_path in ro_cache_paths:
285+
ro_hashpath = ro_cache_path / rel_hashpath
286+
if ro_hashpath.is_file():
287+
try:
288+
with gzip.open(
289+
ro_hashpath.as_posix(), mode="rt", encoding="utf-8"
290+
) as jH:
291+
t_tree = json.load(jH)
292+
hashpath = None
293+
break
294+
except:
295+
# If it is unreadable, re-create
296+
pass
297+
298+
if t_tree is None and (hashpath is not None or cache_path is None):
265299
tree = parse_groovy_content(content)
266300
t_tree = LarkFilteringTreeEncoder().default(
267301
tree,
268302
prune=prune,
269303
noflat=noflat,
270304
)
271305

272-
if hashfile is not None:
273-
with gzip.open(hashfile, mode="wt", encoding="utf-8") as jH:
274-
json.dump(t_tree, jH, sort_keys=True)
306+
assert t_tree is not None
307+
308+
if hashpath is not None:
309+
with gzip.open(hashpath.as_posix(), mode="wt", encoding="utf-8") as jH:
310+
json.dump(t_tree, jH, sort_keys=True)
275311

276312
return t_tree

0 commit comments

Comments
 (0)