66 :nested: full
77
88"""
9- from __future__ import print_function
10- import sys
11- import click
129
10+ # WARNING: bashlib processors have been deprecated as of v3 of the OCR-D/core API
11+ # and will be removed in v3.7.0. We retain the `ocrd bashlib` CLI only
12+ # to not break the `ocrd bashlib filename` command, which is used in CD
13+ # scripts to get the `share` directory of the core installation.
14+
15+ import click
1316from ocrd .constants import BASHLIB_FILENAME
14- import ocrd .constants
15- import ocrd_utils .constants
16- from ocrd_utils .constants import DEFAULT_METS_BASENAME
17- import ocrd_models .constants
18- import ocrd_validators .constants
19- from ocrd .decorators import (
20- parameter_option ,
21- parameter_override_option ,
22- ocrd_loglevel ,
23- ocrd_cli_wrap_processor
24- )
25- from ocrd_utils import make_file_id
26- from ocrd .processor import Processor
2717
2818# ----------------------------------------------------------------------
2919# ocrd bashlib
@@ -50,104 +40,3 @@ def bashlib_filename():
5040 """
5141 print (BASHLIB_FILENAME )
5242
53-
54- @bashlib_cli .command ('constants' )
55- @click .argument ('name' )
56- def bashlib_constants (name ):
57- """
58- Query constants from ocrd_utils and ocrd_models
59- """
60- all_constants = {}
61- for src in [ocrd .constants , ocrd_utils .constants , ocrd_models .constants , ocrd_validators .constants ]:
62- for k in src .__all__ :
63- all_constants [k ] = src .__dict__ [k ]
64- if name in ['*' , 'KEYS' , '__all__' ]:
65- print (sorted (all_constants .keys ()))
66- sys .exit (0 )
67- if name not in all_constants :
68- print ("ERROR: name '%s' is not a known constant" % name , file = sys .stderr )
69- sys .exit (1 )
70- val = all_constants [name ]
71- if isinstance (val , dict ):
72- # make this bash-friendly (show initialization for associative array)
73- for key in val :
74- print ("[%s]=%s" % (key , val [key ]), end = ' ' )
75- else :
76- print (val )
77-
78-
79- @bashlib_cli .command ('input-files' )
80- @click .option ('--ocrd-tool' , help = "path to ocrd-tool.json of processor to feed" , default = None )
81- @click .option ('--executable' , help = "name of processor executable in ocrd-tool.json" , default = None )
82- @click .option ('-m' , '--mets' , help = "METS to process" , default = DEFAULT_METS_BASENAME )
83- @click .option ('-U' , '--mets-server-url' , help = 'TCP host URI or UDS path of METS server' , default = None )
84- @click .option ('-d' , '--working-dir' , help = "Working Directory" )
85- @click .option ('-I' , '--input-file-grp' , help = 'File group(s) used as input.' , default = None )
86- @click .option ('-O' , '--output-file-grp' , help = 'File group(s) used as output.' , default = None )
87- @click .option ('-g' , '--page-id' , help = "ID(s) of the pages to process" )
88- @click .option ('--overwrite' , is_flag = True , default = False , help = "Remove output pages/images if they already exist\n "
89- "(with '--page-id', remove only those).\n "
90- "Short-hand for OCRD_EXISTING_OUTPUT=OVERWRITE" )
91- @click .option ('--debug' , is_flag = True , default = False , help = "Abort on any errors with full stack trace.\n "
92- "Short-hand for OCRD_MISSING_OUTPUT=ABORT" )
93- @parameter_option
94- @parameter_override_option
95- @ocrd_loglevel
96- def bashlib_input_files (ocrd_tool , executable , ** kwargs ):
97- """
98- List input files for processing
99-
100- Instantiate a processor and workspace from the given processing options.
101- Then loop through the input files of the input fileGrp, and for each one,
102- print its `url`, `ID`, `mimetype` and `pageId`, as well as its recommended
103- `outputFileId` (from ``make_file_id``).
104-
105- (The printing format is one associative array initializer per line.)
106- """
107- class BashlibProcessor (Processor ):
108- # go half way of the normal run_processor / process_workspace call tree
109- # by just delegating to process_workspace, overriding process_page_file
110- # to ensure all input files exist locally (without persisting them in the METS)
111- # and print what needs to be acted on in bash-friendly way
112- def process_page_file (self , * input_files ):
113- for field in ['url' , 'local_filename' , 'ID' , 'mimetype' , 'pageId' ]:
114- # make this bash-friendly (show initialization for associative array)
115- if len (input_files ) > 1 :
116- # single quotes allow us to preserve the list value inside the alist
117- value = ' ' .join (str (getattr (res , field )) for res in input_files )
118- else :
119- value = str (getattr (input_files [0 ], field ))
120- print (f"[{ field } ]='{ value } '" , end = ' ' )
121- output_file_id = make_file_id (input_files [0 ], kwargs ['output_file_grp' ])
122- print (f"[outputFileId]='{ output_file_id } '" )
123- if ocrd_tool and executable :
124- class FullBashlibProcessor (BashlibProcessor ):
125- @property
126- def metadata_location (self ):
127- # needed for metadata loading and validation mechanism
128- return ocrd_tool
129-
130- @property
131- def executable (self ):
132- # needed for ocrd_tool lookup
133- return executable
134- processor_class = FullBashlibProcessor
135- else :
136- # we have no true metadata file, so fill in just to make it work
137- class UnknownBashlibProcessor (BashlibProcessor ):
138- @property
139- def ocrd_tool (self ):
140- # needed to satisfy the validator
141- return {'executable' : '' ,
142- # required now
143- 'input_file_grp_cardinality' : 1 ,
144- 'output_file_grp_cardinality' : 1 ,
145- 'steps' : ['' ]}
146-
147- @property
148- def version (self ):
149- # needed to satisfy the validator and wrapper
150- return '1.0'
151- processor_class = UnknownBashlibProcessor
152-
153- ocrd_cli_wrap_processor (processor_class , ** kwargs )
0 commit comments