You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
" ./build/bin/korapxmltool -t zip -T spacy app/src/test/resources/goe.zip",
97
+
"",
98
+
" Native Docker spaCy tagging and dependency parsing:",
99
+
" ./build/bin/korapxmltool -t zip -P spacy app/src/test/resources/goe.zip",
100
+
"",
101
+
" Use external spaCy annotation (legacy method):",
96
102
" ./build/bin/korapxmltool -j4 -A \"docker run -e SPACY_USE_DEPENDENCIES=False --rm -i korap/conllu2spacy:latest\" -t zip ./app/src/test/resources/goe.zip",
97
103
"",
98
104
" Generate Krill tar from wud24_sample with multiple annotation foundries:",
@@ -323,12 +329,14 @@ class KorapXmlTool : Callable<Int> {
323
329
324
330
data classDockerTaggerConfig(valimage:String, valdefaultModel:String, valdefaultArgs:String)
325
331
privateval dockerTaggers =mapOf(
326
-
"treetagger" to DockerTaggerConfig("korap/conllu-treetagger", "german", "-p")
332
+
"treetagger" to DockerTaggerConfig("korap/conllu-treetagger", "german", "-p"),
333
+
"spacy" to DockerTaggerConfig("korap/conllu-spacy", "de_core_news_lg", "")
327
334
)
328
335
329
336
privateval defaultParserModels =mapOf(
330
337
"malt" to "german.mco",
331
-
"corenlp" to "germanSR.ser.gz"
338
+
"corenlp" to "germanSR.ser.gz",
339
+
"spacy" to "de_core_news_lg"
332
340
)
333
341
334
342
// Calculate optimal thread count based on format, memory, and input characteristics
@@ -467,7 +475,7 @@ class KorapXmlTool : Callable<Int> {
467
475
names = ["-T", "--tag-with"],
468
476
paramLabel ="TAGGER[:MODEL]",
469
477
description = ["Specify a tagger and optionally a model: ${taggerFoundries}[:<path/to/model>].",
470
-
"If model is omitted, defaults are: marmot→de.marmot, opennlp→de-pos-maxent.bin, corenlp→german-fast.tagger"]
478
+
"If model is omitted, defaults are: marmot→de.marmot, opennlp→de-pos-maxent.bin, corenlp→german-fast.tagger, treetagger→german, spacy→de_core_news_lg"]
471
479
)
472
480
funsetTagWith(tagWith:String) {
473
481
// Pattern now makes the model part optional
@@ -518,7 +526,16 @@ class KorapXmlTool : Callable<Int> {
518
526
// The user request said: "docker run -v $KORAPXMLTOOL_MODELS_PATH:/local/models ..."
519
527
// AnnotationWorkerPool uses /bin/sh -c, so environment variables should be expanded by the shell.
520
528
521
-
annotateWith ="docker run -v \${KORAPXMLTOOL_MODELS_PATH:-.}:/local/models --rm -i ${config.image}$args -l $model"
529
+
// Handle different Docker command formats
530
+
if (taggerName =="spacy") {
531
+
// spaCy uses -m for model and -d to disable dependencies (tagging only)
0 commit comments