Skip to content

Commit a07f913

Browse files
Merge pull request #148 from mastodon-sc/python-thread-death-workaround
Add a retry mechanism to workaround Python Runtime exceptions
2 parents 4f7d519 + 2decd65 commit a07f913

3 files changed

Lines changed: 63 additions & 33 deletions

File tree

src/main/java/org/mastodon/mamut/linking/trackastra/appose/computation/RegionPropsComputation.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ private SingleTimepointRegionProps computeSingleTimepointProps( final Source< ?
131131
inputs.put( IMAGE, imageNDArray );
132132
inputs.put( MASK, masksNDArray );
133133

134-
Service.Task result = runScript();
134+
Service.Task result = runScriptWithRetries( 1, 10 );
135135
ShmImg< IntType > labels = new ShmImg<>( ( NDArray ) result.outputs.get( LABELS ) );
136136
ShmImg< IntType > timepoints = new ShmImg<>( ( NDArray ) result.outputs.get( TIMEPOINTS ) );
137137
LoopBuilder.setImages( timepoints ).multiThreaded().forEachPixel( p -> p.set( timepoint ) ); // all timepoints are the same

src/main/java/org/mastodon/mamut/util/appose/ApposeProcess.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,27 @@ protected Service.Task runScript() throws IOException
185185
return currentTask;
186186
}
187187

188+
protected Service.Task runScriptWithRetries( final int attempt, final int maxRetries ) throws IOException
189+
{
190+
try
191+
{
192+
return runScript();
193+
}
194+
catch ( PythonRuntimeException e )
195+
{
196+
if ( attempt <= maxRetries )
197+
{
198+
logger.warn( "Python runtime exception on attempt {}/{}. Retrying...", attempt, maxRetries );
199+
return runScriptWithRetries( attempt + 1, maxRetries );
200+
}
201+
else
202+
{
203+
logger.error( "Python runtime exception on final attempt {}/{}. No more retries.", attempt, maxRetries );
204+
throw e;
205+
}
206+
}
207+
}
208+
188209
public void cancel()
189210
{
190211
pythonWorker.kill();

src/test/java/org/mastodon/mamut/appose/ApposeReuseEnvironmentExample.java

Lines changed: 41 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -28,63 +28,72 @@
2828
*/
2929
package org.mastodon.mamut.appose;
3030

31-
import java.io.BufferedWriter;
32-
import java.io.File;
33-
import java.io.FileWriter;
3431
import java.io.IOException;
35-
import java.nio.file.Files;
32+
import java.util.function.Consumer;
3633

3734
import org.apache.commons.lang3.time.StopWatch;
3835
import org.apposed.appose.Appose;
3936
import org.apposed.appose.Environment;
4037
import org.apposed.appose.Service;
38+
import org.apposed.appose.TaskEvent;
39+
import org.mastodon.mamut.detection.cellpose.Cellpose3;
4140

4241
public class ApposeReuseEnvironmentExample
4342
{
4443
public static void main( String[] args ) throws IOException, InterruptedException
4544
{
46-
String content = "name: cellpose\n"
47-
+ "channels:\n"
48-
+ " - nvidia\n"
49-
+ " - pytorch\n"
50-
+ " - conda-forge\n"
51-
+ "dependencies:\n"
52-
+ " - python=3.10\n"
53-
+ " - pip\n"
54-
+ " - pip:\n"
55-
+ " - cellpose[gui]\n"
56-
+ " - appose\n"
57-
+ " - pytorch\n"
58-
+ " - pytorch-cuda=11.8\n"
59-
+ " - numpy=2.0.2\n";
60-
61-
Environment env = Appose.mamba().scheme( "environment.yml" ).content( content ).logDebug().build();
45+
Environment env = Appose.mamba().scheme( "environment.yml" ).content( Cellpose3.ENV_FILE_CONTENT ).logDebug().build();
6246
System.out.println( "Created environment" );
6347
StopWatch stopWatch = StopWatch.createStarted();
6448

65-
String script = "import numpy as np" + "\n"
66-
+ "from cellpose import models" + "\n"
67-
+ "import appose" + "\n\n"
68-
+ "5 + 6" + "\n";
49+
String importScript = "import networkx as nx\n"
50+
+ "import appose\n"
51+
+ "from pandas import DataFrame\n"
52+
+ "\n"
53+
+ "task.update(message='Hello from import script')\n"
54+
+ "task.export(nx=nx, appose=appose, DataFrame=DataFrame)\n";
55+
56+
String script = "task.update(message='Creating a graph...')\n"
57+
+ "G = nx.Graph()\n"
58+
+ "G.add_nodes_from(['A', 'B', 'C'])\n"
59+
+ "G.add_edges_from([('A', 'B'), ('B', 'C')])\n"
60+
+ "print('Nodes:', G.nodes())\n"
61+
+ "print('Edges:', G.edges())\n"
62+
+ "data = {'name': ['Alice', 'Bob'], 'age': [25, 30]}\n"
63+
+ "df = DataFrame(data)\n"
64+
+ "task.update(message='Graph created with ' + str(len(G.nodes())) + ' nodes and ' + str(len(G.edges())) + ' edges.')\n"
65+
+ "task.update(message='DataFrame created: ' + df.to_string())\n"
66+
+ "task.outputs['result']=len(G.nodes()) + len(G.edges())\n";
6967

70-
try (Service python = env.python())
68+
try (Service python = env.python().init( "import numpy" ))
7169
{
7270
stopWatch.split();
7371
System.out.println( "Python service started: " + stopWatch.formatSplitTime() );
74-
Service.Task task1 = python.task( script );
72+
Service.Task task1 = python.task( importScript, "main" );
73+
task1.listen( getTaskListener() );
7574
task1.waitFor();
76-
Object result1 = task1.outputs.get( "result" );
77-
System.out.println( "result1: " + result1 );
75+
System.out.println( "Import script ran successfully." );
7876
stopWatch.split();
79-
System.out.println( "Python task1 finished: " + stopWatch.formatSplitTime() );
77+
System.out.println( "Import task finished: " + stopWatch.formatSplitTime() );
8078

79+
// Now run the payload script that uses the imported modules
8180
Service.Task task2 = python.task( script );
81+
task2.listen( getTaskListener() );
8282
task2.waitFor();
83-
Object result2 = task2.outputs.get( "result" );
84-
System.out.println( "result2: " + result2 );
83+
Object result = task2.outputs.get( "result" );
84+
System.out.println( "Nodes + Edges (result): " + result );
8585

8686
stopWatch.split();
87-
System.out.println( "Python task2 finished: " + stopWatch.formatSplitTime() );
87+
System.out.println( "Payload task finished: " + stopWatch.formatSplitTime() );
8888
}
8989
}
90+
91+
private static Consumer< TaskEvent > getTaskListener()
92+
{
93+
return taskEvent -> {
94+
String message = taskEvent.responseType.toString();
95+
message += taskEvent.message == null ? "" : ": " + taskEvent.message;
96+
System.out.println( message );
97+
};
98+
}
9099
}

0 commit comments

Comments
 (0)