|
6 | 6 | <expand macro="requirements"/> |
7 | 7 | <expand macro="creators"/> |
8 | 8 | <command detect_errors="exit_code"><![CDATA[ |
9 | | - #if $format_type == '-h' |
| 9 | + #if $output_format_flag == '-h' |
10 | 10 | #set $out_ext = 'html' |
11 | | - #elif $format_type == '-n' |
| 11 | + #elif $output_format_flag == '-n' |
12 | 12 | #set $out_ext = 'json' |
13 | 13 | #else |
14 | 14 | #set $out_ext = 'xml' |
15 | 15 | #end if |
16 | 16 | @REJECT_MULTIPAGE_TIFF_BASED_ON_FILE_METADATA@ |
17 | 17 | @CREATE_SYMLINKS@ |
| 18 | + #if $segmenter.type == 'baseline' and $segmenter.model_source.source == "history" |
| 19 | + #set $model_name = str($segmenter.model_source.model.name) |
| 20 | + ln -s '$segmenter.model_source.model' '$model_name' && |
| 21 | + #end if |
18 | 22 | #if $segmenter.type == 'boxes' |
19 | 23 | #if $image.ext == 'pdf' |
20 | 24 | echo "The legacy box segmenter requires a binarized image input. PDF input seems to be not supported with the legacy box segmenter since its fails even for 1-bit bilevel PDFs. Please extract the PDF pages to binarized PNG/TIFF images first, or use the neural baseline segmenter." >&2; |
|
27 | 31 | fi && |
28 | 32 | #end if |
29 | 33 | #end if |
30 | | - @TORCH_TMP_DIR@ |
31 | 34 | @RUN_KRAKEN@ segment |
32 | 35 | #if $segmenter.type == 'baseline' |
33 | 36 | --baseline |
34 | | - #if $segmenter.model.fields.sort_key != '00-default' |
35 | | - --model '$segmenter.model.fields.path' |
| 37 | + #if $segmenter.model_source.source == "datatable" |
| 38 | + --model '${segmenter.model_source.model.fields.path}' |
| 39 | + #elif $segmenter.model_source.source == "history" |
| 40 | + --model '$model_name' |
36 | 41 | #end if |
37 | 42 | #else |
38 | 43 | --boxes |
|
47 | 52 | --text-direction '$text_direction' |
48 | 53 | --input-pad '$input_pad' |
49 | 54 | ]]></command> |
| 55 | + <expand macro="env_vars"/> |
50 | 56 | <inputs> |
51 | 57 | <expand macro="input_param"/> |
52 | | - <param name="format_type" type="select" label="Output format"> |
| 58 | + <param name="output_format_flag" type="select" label="Output format"> |
53 | 59 | <option value="-a" selected="true">ALTO XML</option> |
54 | 60 | <option value="-x">PageXML</option> |
55 | 61 | <option value="-h">hOCR</option> |
|
62 | 68 | <option value="boxes">Legacy box segmenter (requires binarized input image)</option> |
63 | 69 | </param> |
64 | 70 | <when value="baseline"> |
65 | | - <param name="model" type="select" label="Segmentation model" help="Select a segmentation model to use. The default is kraken's internal stock model"> |
66 | | - <options from_data_table="ml_models"> |
67 | | - <column name="value" index="0"/> |
68 | | - <column name="name" index="1"/> |
69 | | - <column name="version" index="2"/> |
70 | | - <column name="path" index="3"/> |
71 | | - <column name="sort_key" index="4"/> |
72 | | - <filter type="sort_by" column="4"/> |
73 | | - </options> |
74 | | - </param> |
| 71 | + <conditional name="model_source"> |
| 72 | + <param name="source" type="select" label="Model source"> |
| 73 | + <option value="default" selected="true">Kraken internal stock model (BLLA base)</option> |
| 74 | + <option value="datatable">Models served by Galaxy</option> |
| 75 | + <option value="history">Model from Galaxy history</option> |
| 76 | + </param> |
| 77 | + <when value="default"/> |
| 78 | + <when value="datatable"> |
| 79 | + <param name="model" type="select" label="Segmentation model" help="Select a segmentation model to use. The default is kraken's internal stock model"> |
| 80 | + <options from_data_table="ml_models"> |
| 81 | + <column name="value" index="0"/> |
| 82 | + <column name="name" index="1"/> |
| 83 | + <column name="version" index="2"/> |
| 84 | + <column name="path" index="3"/> |
| 85 | + <column name="compatibility" index="4"/> |
| 86 | + <filter type="sort_by" column="2"/> |
| 87 | + </options> |
| 88 | + </param> |
| 89 | + </when> |
| 90 | + <when value="history"> |
| 91 | + <param name="model" type="data" format="binary" label="Segmentation model from history"/> |
| 92 | + </when> |
| 93 | + </conditional> |
75 | 94 | </when> |
76 | 95 | <when value="boxes"> |
77 | 96 | <!-- in kraken unbounded - tested manually: <1; >100 lines is empty or the processing time is very long --> |
|
106 | 125 | <data name="output" format="json" label="${tool.name} on ${on_string}"> |
107 | 126 | <filter>image.ext != "pdf"</filter> |
108 | 127 | <actions> |
109 | | - <conditional name="format_type"> |
| 128 | + <conditional name="output_format_flag"> |
110 | 129 | <when value="-a"> |
111 | 130 | <action type="format" default="xml"/> |
112 | 131 | </when> |
|
133 | 152 | <tests> |
134 | 153 | <test expect_num_outputs="1"> |
135 | 154 | <param name="image" value="input.jpg" ftype="jpg"/> |
136 | | - <param name="format_type" value="-n"/> |
137 | | - <conditional name="segmenter"> |
138 | | - <param name="type" value="baseline"/> |
139 | | - </conditional> |
| 155 | + <param name="output_format_flag" value="-n"/> |
| 156 | + <param name="segmenter|type" value="baseline"/> |
140 | 157 | <output name="output" ftype="json"> |
141 | 158 | <assert_contents> |
142 | 159 | <has_text text=""type": "baselines""/> |
143 | 160 | <has_text text=""imagename": "input.jpg""/> |
144 | 161 | </assert_contents> |
145 | 162 | </output> |
146 | 163 | </test> |
147 | | - <!-- <test expect_num_outputs="1"> |
| 164 | + <test expect_num_outputs="1"> |
148 | 165 | <param name="image" value="input.jpg" ftype="jpg"/> |
149 | | - <param name="format_type" value="-a"/> |
150 | | - <conditional name="segmenter"> |
151 | | - <param name="type" value="baseline"/> |
152 | | - <param name="model" value="test_ml_model"/> |
153 | | - </conditional> |
| 166 | + <param name="output_format_flag" value="-a"/> |
| 167 | + <param name="segmenter|type" value="baseline"/> |
| 168 | + <param name="segmenter|model_source|source" value="history"/> |
| 169 | + <!-- The test model is too large (4.3 MB) to include in the repository. |
| 170 | + Thus pulled from a commit-pinned GitHub URL. Zenodo cannot be used here |
| 171 | + because the model is only available inside a ZIP archive. --> |
| 172 | + <param name="segmenter|model_source|model" value="bdd-segmentation-regions-1.0.mlmodel" location="https://raw.githubusercontent.com/michaelscho/bdd-segmentation-regions/b818488376bc635f1a121427979b33dfc11638df/bdd-segmentation-regions-1.0.mlmodel" ftype="binary"/> |
154 | 173 | <output name="output" ftype="xml"> |
155 | 174 | <assert_contents> |
156 | 175 | <has_text text="alto"/> |
157 | 176 | <has_text text="bdd-segmentation-regions-1.0.mlmodel"/> |
158 | 177 | </assert_contents> |
159 | 178 | </output> |
160 | | - </test> --> |
| 179 | + </test> |
161 | 180 | <test expect_num_outputs="1"> |
162 | 181 | <param name="image" value="input_2pages.pdf" ftype="pdf"/> |
163 | | - <param name="format_type" value="-n"/> |
| 182 | + <param name="output_format_flag" value="-n"/> |
164 | 183 | <conditional name="segmenter"> |
165 | 184 | <param name="type" value="baseline"/> |
166 | 185 | </conditional> |
|
179 | 198 | </test> |
180 | 199 | <test expect_num_outputs="1"> |
181 | 200 | <param name="image" value="input_binarized.png" ftype="png"/> |
182 | | - <param name="format_type" value="-n"/> |
| 201 | + <param name="output_format_flag" value="-n"/> |
183 | 202 | <conditional name="segmenter"> |
184 | 203 | <param name="type" value="boxes"/> |
185 | 204 | <param name="scale" value="10"/> |
|
199 | 218 | </test> |
200 | 219 | <test expect_num_outputs="1"> |
201 | 220 | <param name="image" value="input_binarized.png" ftype="png"/> |
202 | | - <param name="format_type" value="-a"/> |
| 221 | + <param name="output_format_flag" value="-a"/> |
203 | 222 | <conditional name="segmenter"> |
204 | 223 | <param name="type" value="boxes"/> |
205 | 224 | </conditional> |
|
211 | 230 | </test> |
212 | 231 | <test expect_num_outputs="1"> |
213 | 232 | <param name="image" value="input_binarized.png" ftype="png"/> |
214 | | - <param name="format_type" value="-h"/> |
| 233 | + <param name="output_format_flag" value="-h"/> |
215 | 234 | <conditional name="segmenter"> |
216 | 235 | <param name="type" value="boxes"/> |
217 | 236 | </conditional> |
|
223 | 242 | </test> |
224 | 243 | <test expect_failure="true"> |
225 | 244 | <param name="image" value="input.jpg" ftype="jpg"/> |
226 | | - <param name="format_type" value="-n"/> |
| 245 | + <param name="output_format_flag" value="-n"/> |
227 | 246 | <conditional name="segmenter"> |
228 | 247 | <param name="type" value="boxes"/> |
229 | 248 | </conditional> |
|
235 | 254 | </test> |
236 | 255 | <test expect_failure="true"> |
237 | 256 | <param name="image" value="input_2pages.pdf" ftype="pdf"/> |
238 | | - <param name="format_type" value="-n"/> |
| 257 | + <param name="output_format_flag" value="-n"/> |
239 | 258 | <conditional name="segmenter"> |
240 | 259 | <param name="type" value="boxes"/> |
241 | 260 | </conditional> |
|
0 commit comments