Skip to content

Commit f41cd9f

Browse files
authored
Fail when models are partially initialised and add a new more informative health api entry (#1373)
* feat: enhance service health checks and error handling for model initialization * test: ignore integration tests requiring JEP/DeLFT environment
1 parent e757b18 commit f41cd9f

File tree

16 files changed

+248
-992
lines changed

16 files changed

+248
-992
lines changed

doc/Grobid-service.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@ The directory `grobid-installation` should have the following structure:
4646
You can check whether the service is up and running by opening the following URL:
4747

4848
* <http://yourhost:8070/api/version> will return you the current version, and the github revision (commit hash) of the running service
49-
* <http://yourhost:8070/api/isalive> will return `true`/`false` whether the service is up and running
49+
* <http://yourhost:8070/api/isalive> **(liveness)** returns `true`/`false` as plain text indicating whether the service completed initialization successfully. Returns HTTP 200 when alive, HTTP 503 when not initialized or initialization failed. Suitable for use as a liveness probe in container orchestrators (Docker, Kubernetes).
50+
* <http://yourhost:8070/api/health> **(readiness)** returns a JSON object with detailed status including initialization state, engine pool metrics (active/idle/max engines), and configuration checks. Returns HTTP 200 when ready to process requests, HTTP 503 otherwise. Suitable for use as a readiness probe.
5051

5152
The service provides also an admin console, reachable at <http://yourhost:8071> where some additional checks like ping, metrics, hearthbeat are available.
5253
We recommend, in particular to have a look at the metrics (using the [Metric library](https://metrics.dropwizard.io/3.1.0/getting-started/)) which are providing the rate of execution as well as the throughput of each entry point.

grobid-core/src/main/java/org/grobid/core/engines/EngineParsers.java

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -259,22 +259,31 @@ public FundingAcknowledgementParser getFundingAcknowledgementParser() {
259259
}
260260

261261
/**
262-
* Init all model, this will also load the model into memory
262+
* Init all model, this will also load the model into memory.
263+
* Each parser is initialized independently so that one failure doesn't prevent others from loading.
263264
*/
264265
public void initAll() {
265-
affiliationAddressParser = getAffiliationAddressParser();
266-
authorParser = getAuthorParser();
267-
headerParser = getHeaderParser();
268-
dateParser = getDateParser();
269-
citationParser = getCitationParser();
270-
fullTextParser = getFullTextParser();
271-
//referenceExtractor = getReferenceExtractor();
272-
segmentationParser = getSegmentationParser();
273-
referenceSegmenterParser = getReferenceSegmenterParser();
274-
figureParser = getFigureParser();
275-
tableParser = getTableParser();
276-
//MonographParser monographParser = getMonographParser();
277-
fundingAcknowledgementParser = getFundingAcknowledgementParser();
266+
tryInit(() -> affiliationAddressParser = getAffiliationAddressParser(), "affiliationAddress");
267+
tryInit(() -> authorParser = getAuthorParser(), "author");
268+
tryInit(() -> headerParser = getHeaderParser(), "header");
269+
tryInit(() -> dateParser = getDateParser(), "date");
270+
tryInit(() -> citationParser = getCitationParser(), "citation");
271+
tryInit(() -> fullTextParser = getFullTextParser(), "fullText");
272+
//tryInit(() -> referenceExtractor = getReferenceExtractor(), "referenceExtractor");
273+
tryInit(() -> segmentationParser = getSegmentationParser(), "segmentation");
274+
tryInit(() -> referenceSegmenterParser = getReferenceSegmenterParser(), "referenceSegmenter");
275+
tryInit(() -> figureParser = getFigureParser(), "figure");
276+
tryInit(() -> tableParser = getTableParser(), "table");
277+
//tryInit(() -> monographParser = getMonographParser(), "monograph");
278+
tryInit(() -> fundingAcknowledgementParser = getFundingAcknowledgementParser(), "fundingAcknowledgement");
279+
}
280+
281+
private void tryInit(Runnable init, String parserName) {
282+
try {
283+
init.run();
284+
} catch (Exception e) {
285+
LOGGER.error("Failed to initialize " + parserName + " parser", e);
286+
}
278287
}
279288

280289
@Override

grobid-core/src/main/java/org/grobid/core/engines/tagging/TaggerFactory.java

Lines changed: 62 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,24 +3,24 @@
33
import org.grobid.core.GrobidModel;
44
import org.grobid.core.GrobidModels;
55
import org.grobid.core.utilities.GrobidProperties;
6-
import org.grobid.core.main.LibraryLoader;
6+
import org.slf4j.Logger;
7+
import org.slf4j.LoggerFactory;
78

9+
import java.util.Collections;
810
import java.util.HashMap;
11+
import java.util.LinkedHashMap;
912
import java.util.Map;
10-
import java.io.File;
11-
12-
import org.slf4j.Logger;
13-
import org.slf4j.LoggerFactory;
1413

1514
/**
1615
* Factory for a sequence labelling, aka a tagger, instance.
17-
* Supported implementations are CRF (CRFPP, Wapiti) and Deep Learning (DeLFT)
16+
* Supported implementations are CRF (CRFPP, Wapiti) and Deep Learning (DeLFT)
1817
*
1918
*/
2019
public class TaggerFactory {
2120
public static final Logger LOGGER = LoggerFactory.getLogger(TaggerFactory.class);
2221

2322
private static Map<GrobidModel, GenericTagger> cache = new HashMap<>();
23+
private static Map<String, String> failedModels = new LinkedHashMap<>();
2424

2525
private TaggerFactory() {}
2626

@@ -40,24 +40,67 @@ public static synchronized GenericTagger getTagger(GrobidModel model, GrobidCRFE
4040
}
4141

4242
if(engine != null) {
43-
switch (engine) {
44-
case CRFPP:
45-
t = new CRFPPTagger(model);
46-
break;
47-
case WAPITI:
48-
t = new WapitiTagger(model);
49-
break;
50-
case DELFT:
51-
t = new DeLFTTagger(model, architecture);
52-
break;
53-
default:
54-
throw new IllegalStateException("Unsupported Grobid sequence labelling engine: " + engine.getExt());
43+
try {
44+
switch (engine) {
45+
case CRFPP:
46+
t = new CRFPPTagger(model);
47+
break;
48+
case WAPITI:
49+
t = new WapitiTagger(model);
50+
break;
51+
case DELFT:
52+
t = new DeLFTTagger(model, architecture);
53+
break;
54+
default:
55+
throw new IllegalStateException("Unsupported Grobid sequence labelling engine: " + engine.getExt());
56+
}
57+
cache.put(model, t);
58+
} catch (Exception e) {
59+
String modelName = model.getModelName();
60+
failedModels.put(modelName, e.getMessage() != null ? e.getMessage() : e.getClass().getName());
61+
LOGGER.error("Failed to create tagger for model " + modelName + " with engine " + engine, e);
62+
throw e;
5563
}
56-
cache.put(model, t);
5764
} else {
5865
throw new IllegalStateException("Unsupported or null Grobid sequence labelling engine: " + engine.getExt());
5966
}
6067
}
6168
return t;
6269
}
70+
71+
/**
72+
* Returns a map of successfully loaded models and their engine types.
73+
*/
74+
public static synchronized Map<String, String> getLoadedModels() {
75+
Map<String, String> loaded = new LinkedHashMap<>();
76+
for (Map.Entry<GrobidModel, GenericTagger> entry : cache.entrySet()) {
77+
String engineType;
78+
GenericTagger tagger = entry.getValue();
79+
if (tagger instanceof WapitiTagger) {
80+
engineType = "wapiti";
81+
} else if (tagger instanceof DeLFTTagger) {
82+
engineType = "delft";
83+
} else if (tagger instanceof CRFPPTagger) {
84+
engineType = "crfpp";
85+
} else {
86+
engineType = "unknown";
87+
}
88+
loaded.put(entry.getKey().getModelName(), engineType);
89+
}
90+
return loaded;
91+
}
92+
93+
/**
94+
* Returns a map of models that failed to load and their error messages.
95+
*/
96+
public static synchronized Map<String, String> getFailedModels() {
97+
return Collections.unmodifiableMap(new LinkedHashMap<>(failedModels));
98+
}
99+
100+
/**
101+
* Returns true if any model failed to load.
102+
*/
103+
public static synchronized boolean hasFailures() {
104+
return !failedModels.isEmpty();
105+
}
63106
}

grobid-core/src/main/java/org/grobid/core/factory/GrobidPoolingFactory.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,4 +133,35 @@ public boolean validateObject(Engine arg0) {
133133
return false;
134134
}
135135

136+
/**
137+
* Returns whether the engine pool has been initialized.
138+
*/
139+
public static boolean isPoolInitialized() {
140+
return grobidEnginePool != null;
141+
}
142+
143+
/**
144+
* Returns the number of currently borrowed (active) engines, or -1 if pool is not initialized.
145+
*/
146+
public static int getActiveEngineCount() {
147+
GenericObjectPool<Engine> pool = grobidEnginePool;
148+
return pool != null ? pool.getNumActive() : -1;
149+
}
150+
151+
/**
152+
* Returns the number of idle engines in the pool, or -1 if pool is not initialized.
153+
*/
154+
public static int getIdleEngineCount() {
155+
GenericObjectPool<Engine> pool = grobidEnginePool;
156+
return pool != null ? pool.getNumIdle() : -1;
157+
}
158+
159+
/**
160+
* Returns the configured maximum number of active engines, or -1 if pool is not initialized.
161+
*/
162+
public static int getMaxActiveEngineCount() {
163+
GenericObjectPool<Engine> pool = grobidEnginePool;
164+
return pool != null ? pool.getMaxActive() : -1;
165+
}
166+
136167
}

grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,13 @@ public DeLFTModel(GrobidModel model, String architecture) {
3636
try {
3737
LOGGER.info("Loading DeLFT model for " + model.getModelName() + " with architecture " + architecture + "...");
3838
JEPThreadPool.getInstance().run(new InitModel(this.modelName, GrobidProperties.getInstance().getModelPath(), architecture));
39-
} catch (InterruptedException | RuntimeException e) {
39+
} catch (InterruptedException e) {
40+
LOGGER.error("DeLFT model " + this.modelName + " initialization was interrupted", e);
41+
Thread.currentThread().interrupt();
42+
throw new GrobidException("DeLFT model " + this.modelName + " initialization was interrupted", e);
43+
} catch (RuntimeException e) {
4044
LOGGER.error("DeLFT model " + this.modelName + " initialization failed", e);
45+
throw e;
4146
}
4247
}
4348

grobid-core/src/main/java/org/grobid/core/jni/JEPThreadPool.java

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -169,10 +169,15 @@ public synchronized Jep getJEPInstance() {
169169

170170
public void run(Runnable task) throws InterruptedException {
171171
LOGGER.debug("running thread: " + Thread.currentThread().getId());
172-
Future future = executor.submit(task);
173-
// wait until done (in ms)
174-
while (!future.isDone()) {
175-
Thread.sleep(1);
172+
Future<?> future = executor.submit(task);
173+
try {
174+
future.get(); // blocks until done, propagates exceptions
175+
} catch (ExecutionException e) {
176+
Throwable cause = e.getCause();
177+
if (cause instanceof RuntimeException) {
178+
throw (RuntimeException) cause;
179+
}
180+
throw new RuntimeException(cause);
176181
}
177182
}
178183

grobid-core/src/test/java/org/grobid/core/engines/tagging/DeLFTTaggerIntegrationTest.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,14 @@
77
import org.grobid.core.engines.EngineParsers;
88
import org.grobid.core.jni.JEPThreadPool;
99
import org.grobid.core.main.LibraryLoader;
10+
import org.junit.Ignore;
1011
import org.junit.Test;
1112

1213
import java.util.List;
1314

1415
import static org.junit.Assert.*;
1516

17+
@Ignore("Requires JEP/DeLFT environment")
1618
public class DeLFTTaggerIntegrationTest {
1719

1820
DeLFTTagger target;

grobid-core/src/test/java/org/grobid/core/engines/tagging/TaggerFactoryTest.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import org.grobid.core.main.LibraryLoader;
55
import org.junit.After;
66
import org.junit.Before;
7+
import org.junit.Ignore;
78
import org.junit.Test;
89
import org.powermock.reflect.Whitebox;
910

@@ -34,6 +35,7 @@ public void testGetTagger_shouldReturnDummyTagger() {
3435
assertThat(tagger instanceof DummyTagger, is(true));
3536
}
3637

38+
@Ignore("Requires JEP/DeLFT environment")
3739
@Test
3840
public void testGetDelftTagger_existingModel_shouldReturn() {
3941
GenericTagger tagger = TaggerFactory.getTagger(GrobidModels.HEADER, GrobidCRFEngine.DELFT);

0 commit comments

Comments
 (0)