33
33
import java .util .LinkedHashMap ;
34
34
import java .util .List ;
35
35
import java .util .Map ;
36
- import java .util .Objects ;
37
36
import java .util .Optional ;
38
37
import java .util .Properties ;
39
38
import java .util .UUID ;
40
39
import java .util .function .Consumer ;
41
- import java .util .stream .Collectors ;
42
40
import org .apache .spark .ExceptionFailure ;
43
41
import org .apache .spark .SparkConf ;
44
42
import org .apache .spark .TaskFailedReason ;
54
52
import org .apache .spark .sql .streaming .StateOperatorProgress ;
55
53
import org .apache .spark .sql .streaming .StreamingQueryListener ;
56
54
import org .apache .spark .sql .streaming .StreamingQueryProgress ;
55
+ import org .apache .spark .util .Utils ;
57
56
import org .slf4j .Logger ;
58
57
import org .slf4j .LoggerFactory ;
59
58
import scala .Tuple2 ;
@@ -71,6 +70,9 @@ public abstract class AbstractDatadogSparkListener extends SparkListener {
71
70
private static final Logger log = LoggerFactory .getLogger (AbstractDatadogSparkListener .class );
72
71
private static final ObjectMapper objectMapper = new ObjectMapper ();
73
72
public static volatile AbstractDatadogSparkListener listener = null ;
73
+ public static volatile SparkListenerInterface openLineageSparkListener = null ;
74
+ public static volatile SparkConf openLineageSparkConf = null ;
75
+
74
76
public static volatile boolean finishTraceOnApplicationEnd = true ;
75
77
public static volatile boolean isPysparkShell = false ;
76
78
@@ -113,6 +115,7 @@ public abstract class AbstractDatadogSparkListener extends SparkListener {
113
115
private final Map <Long , SparkSQLUtils .AccumulatorWithStage > accumulators =
114
116
new RemoveEldestHashMap <>(MAX_ACCUMULATOR_SIZE );
115
117
118
+ private volatile boolean isStreamingJob = false ;
116
119
private final boolean isRunningOnDatabricks ;
117
120
private final String databricksClusterName ;
118
121
private final String databricksServiceName ;
@@ -127,7 +130,6 @@ public abstract class AbstractDatadogSparkListener extends SparkListener {
127
130
private long availableExecutorTime = 0 ;
128
131
129
132
private volatile boolean applicationEnded = false ;
130
- private SparkListener openLineageSparkListener = null ;
131
133
132
134
public AbstractDatadogSparkListener (SparkConf sparkConf , String appId , String sparkVersion ) {
133
135
tracer = AgentTracer .get ();
@@ -156,8 +158,6 @@ public AbstractDatadogSparkListener(SparkConf sparkConf, String appId, String sp
156
158
finishApplication (System .currentTimeMillis (), null , 0 , null );
157
159
}
158
160
}));
159
- initApplicationSpanIfNotInitialized ();
160
- loadOlSparkListener ();
161
161
}
162
162
163
163
static void setupSparkConf (SparkConf sparkConf ) {
@@ -167,34 +167,44 @@ static void setupSparkConf(SparkConf sparkConf) {
167
167
sparkConf .set ("spark.openlineage.transport.transports.agent.url" , getAgentHttpUrl ());
168
168
sparkConf .set ("spark.openlineage.transport.transports.agent.endpoint" , AGENT_OL_ENDPOINT );
169
169
sparkConf .set ("spark.openlineage.transport.transports.agent.compression" , "gzip" );
170
- }
171
-
172
- void setupTrace (SparkConf sc ) {
173
- sc .set (
170
+ sparkConf .set (
174
171
"spark.openlineage.run.tags" ,
175
172
"_dd.trace_id:"
176
- + applicationSpan .context ().getTraceId ().toString ()
177
- + ";_dd.intake .emit_spans:false" );
173
+ + listener . applicationSpan .context ().getTraceId ().toString ()
174
+ + ";_dd.ol_intake .emit_spans:false" );
178
175
}
179
176
180
- void loadOlSparkListener () {
177
+ public void setupOpenLineage () {
178
+ log .debug ("Setting up OpenLineage-Datadog integration" );
179
+ if (openLineageSparkListener != null ) {
180
+ setupSparkConf (openLineageSparkConf );
181
+ return ;
182
+ }
183
+
181
184
String className = "io.openlineage.spark.agent.OpenLineageSparkListener" ;
182
- Optional <Class > clazz = loadClass (className );
183
- if (!clazz .isPresent ()) {
185
+ Class clazz ;
186
+ try {
187
+ try {
188
+ clazz = Class .forName (className , true , Thread .currentThread ().getContextClassLoader ());
189
+ } catch (ClassNotFoundException e ) {
190
+ clazz = Class .forName (className , true , Utils .class .getClassLoader ());
191
+ }
192
+ } catch (ClassNotFoundException e ) {
184
193
log .info ("OpenLineage integration is not present on the classpath" );
185
194
return ;
186
195
}
187
- try {
188
- setupSparkConf (sparkConf );
189
- sparkConf .set (
190
- "spark.openlineage.run.tags" ,
191
- "_dd.trace_id:"
192
- + applicationSpan .context ().getTraceId ().toString ()
193
- + ";_dd.ol_intake.emit_spans:false" );
194
196
197
+ openLineageSparkConf = sparkConf ;
198
+ if (clazz == null ) {
199
+ log .info ("OpenLineage integration is not present on the classpath: class is null" );
200
+ return ;
201
+ }
202
+ try {
203
+ setupSparkConf (openLineageSparkConf );
195
204
openLineageSparkListener =
196
- (SparkListener )
197
- clazz .get ().getDeclaredConstructor (SparkConf .class ).newInstance (sparkConf );
205
+ (SparkListenerInterface )
206
+ clazz .getConstructor (SparkConf .class ).newInstance (openLineageSparkConf );
207
+
198
208
log .info (
199
209
"Created OL spark listener: {}" , openLineageSparkListener .getClass ().getSimpleName ());
200
210
} catch (Exception e ) {
@@ -223,8 +233,6 @@ void loadOlSparkListener() {
223
233
@ Override
224
234
public synchronized void onApplicationStart (SparkListenerApplicationStart applicationStart ) {
225
235
this .applicationStart = applicationStart ;
226
- initApplicationSpanIfNotInitialized ();
227
- notifyOl (this .openLineageSparkListener ::onApplicationStart , applicationStart );
228
236
}
229
237
230
238
private void initApplicationSpanIfNotInitialized () {
@@ -245,6 +253,8 @@ private void initApplicationSpanIfNotInitialized() {
245
253
}
246
254
}
247
255
256
+ notifyOl (x -> this .openLineageSparkListener .onApplicationStart (x ), applicationStart );
257
+
248
258
captureApplicationParameters (builder );
249
259
captureOpenlineageContextIfPresent (builder );
250
260
@@ -455,6 +465,7 @@ public synchronized void onJobStart(SparkListenerJobStart jobStart) {
455
465
if (sqlSpan != null ) {
456
466
jobSpanBuilder .asChildOf (sqlSpan .context ());
457
467
} else if (batchKey != null ) {
468
+ isStreamingJob = true ;
458
469
AgentSpan batchSpan =
459
470
getOrCreateStreamingBatchSpan (batchKey , jobStart .time (), jobStart .properties ());
460
471
jobSpanBuilder .asChildOf (batchSpan .context ());
@@ -760,7 +771,11 @@ public void onOtherEvent(SparkListenerEvent event) {
760
771
}
761
772
762
773
private <T extends SparkListenerEvent > void notifyOl (Consumer <T > ol , T event ) {
763
- if (this .openLineageSparkListener != null ) {
774
+ if (isRunningOnDatabricks || isStreamingJob ) {
775
+ log .debug ("Not emitting event when running on databricks or on streaming jobs" );
776
+ return ;
777
+ }
778
+ if (openLineageSparkListener != null ) {
764
779
log .debug ("Notifying with event `{}`" , event .getClass ().getCanonicalName ());
765
780
ol .accept (event );
766
781
} else {
@@ -816,6 +831,7 @@ private synchronized void updateAdaptiveSQLPlan(SparkListenerEvent event) {
816
831
private synchronized void onSQLExecutionStart (SparkListenerSQLExecutionStart sqlStart ) {
817
832
sqlPlans .put (sqlStart .executionId (), sqlStart .sparkPlanInfo ());
818
833
sqlQueries .put (sqlStart .executionId (), sqlStart );
834
+ notifyOl (x -> openLineageSparkListener .onOtherEvent (x ), sqlStart );
819
835
}
820
836
821
837
private synchronized void onSQLExecutionEnd (SparkListenerSQLExecutionEnd sqlEnd ) {
@@ -1338,57 +1354,4 @@ private static String removeUuidFromEndOfString(String input) {
1338
1354
return input .replaceAll (
1339
1355
"_[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$" , "" );
1340
1356
}
1341
-
1342
- private Optional <Class > loadClass (String className ) {
1343
- Class clazz = null ;
1344
- List <ClassLoader > availableClassloaders =
1345
- Thread .getAllStackTraces ().keySet ().stream ()
1346
- .map (Thread ::getContextClassLoader )
1347
- .filter (Objects ::nonNull )
1348
- .collect (Collectors .toList ());
1349
- try {
1350
- clazz = Class .forName (className );
1351
- } catch (Exception e ) {
1352
- log .debug ("Failed to load {} via Class.forName: {}" , className , e .toString ());
1353
- for (ClassLoader classLoader : availableClassloaders ) {
1354
- try {
1355
- clazz = classLoader .loadClass (className );
1356
- log .debug ("Loaded {} via classLoader: {}" , className , classLoader );
1357
- break ;
1358
- } catch (Exception ex ) {
1359
- log .debug (
1360
- "Failed to load {} via loadClass via ClassLoader {} - {}" ,
1361
- className ,
1362
- classLoader ,
1363
- ex .toString ());
1364
- }
1365
- try {
1366
- clazz = classLoader .getParent ().loadClass (className );
1367
- log .debug (
1368
- "Loaded {} via parent classLoader: {} for CL {}" ,
1369
- className ,
1370
- classLoader .getParent (),
1371
- classLoader );
1372
- break ;
1373
- } catch (Exception ex ) {
1374
- log .debug (
1375
- "Failed to load {} via loadClass via parent ClassLoader {} - {}" ,
1376
- className ,
1377
- classLoader .getParent (),
1378
- ex .toString ());
1379
- }
1380
- }
1381
- }
1382
- if (clazz == null ) {
1383
- try {
1384
- clazz = ClassLoader .getSystemClassLoader ().loadClass (className );
1385
- log .debug (
1386
- "Loaded {} via system classLoader: {}" , className , ClassLoader .getSystemClassLoader ());
1387
- } catch (Exception ex ) {
1388
- log .debug (
1389
- "Failed to load {} via loadClass via SystemClassLoader {}" , className , ex .toString ());
1390
- }
1391
- }
1392
- return Optional .ofNullable (clazz );
1393
- }
1394
1357
}
0 commit comments