Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion doc/Grobid-service.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ The directory `grobid-installation` should have the following structure:
You can check whether the service is up and running by opening the following URL:

* <http://yourhost:8070/api/version> will return you the current version, and the github revision (commit hash) of the running service
* <http://yourhost:8070/api/isalive> will return `true`/`false` whether the service is up and running
* <http://yourhost:8070/api/isalive> **(liveness)** returns `true`/`false` as plain text indicating whether the service completed initialization successfully. Returns HTTP 200 when alive, HTTP 503 when not initialized or initialization failed. Suitable for use as a liveness probe in container orchestrators (Docker, Kubernetes).
* <http://yourhost:8070/api/health> **(readiness)** returns a JSON object with detailed status including initialization state, engine pool metrics (active/idle/max engines), and configuration checks. Returns HTTP 200 when ready to process requests, HTTP 503 otherwise. Suitable for use as a readiness probe.

The service provides also an admin console, reachable at <http://yourhost:8071> where some additional checks like ping, metrics, hearthbeat are available.
We recommend, in particular to have a look at the metrics (using the [Metric library](https://metrics.dropwizard.io/3.1.0/getting-started/)) which are providing the rate of execution as well as the throughput of each entry point.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -259,22 +259,31 @@ public FundingAcknowledgementParser getFundingAcknowledgementParser() {
}

/**
* Init all model, this will also load the model into memory
* Init all model, this will also load the model into memory.
* Each parser is initialized independently so that one failure doesn't prevent others from loading.
*/
public void initAll() {
affiliationAddressParser = getAffiliationAddressParser();
authorParser = getAuthorParser();
headerParser = getHeaderParser();
dateParser = getDateParser();
citationParser = getCitationParser();
fullTextParser = getFullTextParser();
//referenceExtractor = getReferenceExtractor();
segmentationParser = getSegmentationParser();
referenceSegmenterParser = getReferenceSegmenterParser();
figureParser = getFigureParser();
tableParser = getTableParser();
//MonographParser monographParser = getMonographParser();
fundingAcknowledgementParser = getFundingAcknowledgementParser();
tryInit(() -> affiliationAddressParser = getAffiliationAddressParser(), "affiliationAddress");
tryInit(() -> authorParser = getAuthorParser(), "author");
tryInit(() -> headerParser = getHeaderParser(), "header");
tryInit(() -> dateParser = getDateParser(), "date");
tryInit(() -> citationParser = getCitationParser(), "citation");
tryInit(() -> fullTextParser = getFullTextParser(), "fullText");
//tryInit(() -> referenceExtractor = getReferenceExtractor(), "referenceExtractor");
tryInit(() -> segmentationParser = getSegmentationParser(), "segmentation");
tryInit(() -> referenceSegmenterParser = getReferenceSegmenterParser(), "referenceSegmenter");
tryInit(() -> figureParser = getFigureParser(), "figure");
tryInit(() -> tableParser = getTableParser(), "table");
//tryInit(() -> monographParser = getMonographParser(), "monograph");
tryInit(() -> fundingAcknowledgementParser = getFundingAcknowledgementParser(), "fundingAcknowledgement");
}

private void tryInit(Runnable init, String parserName) {
try {
init.run();
} catch (Exception e) {
LOGGER.error("Failed to initialize " + parserName + " parser", e);
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,24 @@
import org.grobid.core.GrobidModel;
import org.grobid.core.GrobidModels;
import org.grobid.core.utilities.GrobidProperties;
import org.grobid.core.main.LibraryLoader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
import java.io.File;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Factory for a sequence labelling, aka a tagger, instance.
* Supported implementations are CRF (CRFPP, Wapiti) and Deep Learning (DeLFT)
* Supported implementations are CRF (CRFPP, Wapiti) and Deep Learning (DeLFT)
*
*/
public class TaggerFactory {
public static final Logger LOGGER = LoggerFactory.getLogger(TaggerFactory.class);

private static Map<GrobidModel, GenericTagger> cache = new HashMap<>();
private static Map<String, String> failedModels = new LinkedHashMap<>();

private TaggerFactory() {}

Expand All @@ -40,24 +40,67 @@ public static synchronized GenericTagger getTagger(GrobidModel model, GrobidCRFE
}

if(engine != null) {
switch (engine) {
case CRFPP:
t = new CRFPPTagger(model);
break;
case WAPITI:
t = new WapitiTagger(model);
break;
case DELFT:
t = new DeLFTTagger(model, architecture);
break;
default:
throw new IllegalStateException("Unsupported Grobid sequence labelling engine: " + engine.getExt());
try {
switch (engine) {
case CRFPP:
t = new CRFPPTagger(model);
break;
case WAPITI:
t = new WapitiTagger(model);
break;
case DELFT:
t = new DeLFTTagger(model, architecture);
break;
default:
throw new IllegalStateException("Unsupported Grobid sequence labelling engine: " + engine.getExt());
}
cache.put(model, t);
} catch (Exception e) {
String modelName = model.getModelName();
failedModels.put(modelName, e.getMessage() != null ? e.getMessage() : e.getClass().getName());
LOGGER.error("Failed to create tagger for model " + modelName + " with engine " + engine, e);
throw e;
}
cache.put(model, t);
} else {
throw new IllegalStateException("Unsupported or null Grobid sequence labelling engine: " + engine.getExt());
}
}
return t;
}

/**
* Returns a map of successfully loaded models and their engine types.
*/
public static synchronized Map<String, String> getLoadedModels() {
Map<String, String> loaded = new LinkedHashMap<>();
for (Map.Entry<GrobidModel, GenericTagger> entry : cache.entrySet()) {
String engineType;
GenericTagger tagger = entry.getValue();
if (tagger instanceof WapitiTagger) {
engineType = "wapiti";
} else if (tagger instanceof DeLFTTagger) {
engineType = "delft";
} else if (tagger instanceof CRFPPTagger) {
engineType = "crfpp";
} else {
engineType = "unknown";
}
loaded.put(entry.getKey().getModelName(), engineType);
}
return loaded;
}

/**
* Returns a map of models that failed to load and their error messages.
*/
public static synchronized Map<String, String> getFailedModels() {
return Collections.unmodifiableMap(new LinkedHashMap<>(failedModels));
}

/**
* Returns true if any model failed to load.
*/
public static synchronized boolean hasFailures() {
return !failedModels.isEmpty();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -133,4 +133,35 @@ public boolean validateObject(Engine arg0) {
return false;
}

/**
* Returns whether the engine pool has been initialized.
*/
public static boolean isPoolInitialized() {
return grobidEnginePool != null;
}

/**
* Returns the number of currently borrowed (active) engines, or -1 if pool is not initialized.
*/
public static int getActiveEngineCount() {
GenericObjectPool<Engine> pool = grobidEnginePool;
return pool != null ? pool.getNumActive() : -1;
}

/**
* Returns the number of idle engines in the pool, or -1 if pool is not initialized.
*/
public static int getIdleEngineCount() {
GenericObjectPool<Engine> pool = grobidEnginePool;
return pool != null ? pool.getNumIdle() : -1;
}

/**
* Returns the configured maximum number of active engines, or -1 if pool is not initialized.
*/
public static int getMaxActiveEngineCount() {
GenericObjectPool<Engine> pool = grobidEnginePool;
return pool != null ? pool.getMaxActive() : -1;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,13 @@ public DeLFTModel(GrobidModel model, String architecture) {
try {
LOGGER.info("Loading DeLFT model for " + model.getModelName() + " with architecture " + architecture + "...");
JEPThreadPool.getInstance().run(new InitModel(this.modelName, GrobidProperties.getInstance().getModelPath(), architecture));
} catch (InterruptedException | RuntimeException e) {
} catch (InterruptedException e) {
LOGGER.error("DeLFT model " + this.modelName + " initialization was interrupted", e);
Thread.currentThread().interrupt();
throw new GrobidException("DeLFT model " + this.modelName + " initialization was interrupted", e);
} catch (RuntimeException e) {
LOGGER.error("DeLFT model " + this.modelName + " initialization failed", e);
throw e;
}
}

Expand Down
13 changes: 9 additions & 4 deletions grobid-core/src/main/java/org/grobid/core/jni/JEPThreadPool.java
Original file line number Diff line number Diff line change
Expand Up @@ -169,10 +169,15 @@ public synchronized Jep getJEPInstance() {

public void run(Runnable task) throws InterruptedException {
LOGGER.debug("running thread: " + Thread.currentThread().getId());
Future future = executor.submit(task);
// wait until done (in ms)
while (!future.isDone()) {
Thread.sleep(1);
Future<?> future = executor.submit(task);
try {
future.get(); // blocks until done, propagates exceptions
} catch (ExecutionException e) {
Throwable cause = e.getCause();
if (cause instanceof RuntimeException) {
throw (RuntimeException) cause;
}
throw new RuntimeException(cause);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,14 @@
import org.grobid.core.engines.EngineParsers;
import org.grobid.core.jni.JEPThreadPool;
import org.grobid.core.main.LibraryLoader;
import org.junit.Ignore;
import org.junit.Test;

import java.util.List;

import static org.junit.Assert.*;

@Ignore("Requires JEP/DeLFT environment")
public class DeLFTTaggerIntegrationTest {

DeLFTTagger target;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import org.grobid.core.main.LibraryLoader;
import org.junit.After;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import org.powermock.reflect.Whitebox;

Expand Down Expand Up @@ -34,6 +35,7 @@ public void testGetTagger_shouldReturnDummyTagger() {
assertThat(tagger instanceof DummyTagger, is(true));
}

@Ignore("Requires JEP/DeLFT environment")
@Test
public void testGetDelftTagger_existingModel_shouldReturn() {
GenericTagger tagger = TaggerFactory.getTagger(GrobidModels.HEADER, GrobidCRFEngine.DELFT);
Expand Down
Loading
Loading